google-cloud-bigquery 0.28.0 → 0.29.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,6 +14,7 @@
14
14
 
15
15
 
16
16
  require "google/cloud/bigquery/service"
17
+ require "google/cloud/bigquery/data"
17
18
 
18
19
  module Google
19
20
  module Cloud
@@ -29,9 +30,29 @@ module Google
29
30
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
30
31
  # reference
31
32
  #
33
+ # @example
34
+ # require "google/cloud/bigquery"
35
+ #
36
+ # bigquery = Google::Cloud::Bigquery.new
37
+ #
38
+ # job = bigquery.query_job "SELECT COUNT(word) as count FROM " \
39
+ # "publicdata.samples.shakespeare"
40
+ #
41
+ # job.wait_until_done!
42
+ #
43
+ # if job.failed?
44
+ # puts job.error
45
+ # else
46
+ # puts job.data.first
47
+ # end
48
+ #
32
49
  class QueryJob < Job
33
50
  ##
34
51
  # Checks if the priority for the query is `BATCH`.
52
+ #
53
+ # @return [Boolean] `true` when the priority is `BATCH`, `false`
54
+ # otherwise.
55
+ #
35
56
  def batch?
36
57
  val = @gapi.configuration.query.priority
37
58
  val == "BATCH"
@@ -39,6 +60,10 @@ module Google
39
60
 
40
61
  ##
41
62
  # Checks if the priority for the query is `INTERACTIVE`.
63
+ #
64
+ # @return [Boolean] `true` when the priority is `INTERACTIVE`, `false`
65
+ # otherwise.
66
+ #
42
67
  def interactive?
43
68
  val = @gapi.configuration.query.priority
44
69
  return true if val.nil?
@@ -48,6 +73,10 @@ module Google
48
73
  ##
49
74
  # Checks if the the query job allows arbitrarily large results at a
50
75
  # slight cost to performance.
76
+ #
77
+ # @return [Boolean] `true` when large results are allowed, `false`
78
+ # otherwise.
79
+ #
51
80
  def large_results?
52
81
  val = @gapi.configuration.query.allow_large_results
53
82
  return false if val.nil?
@@ -58,6 +87,10 @@ module Google
58
87
  # Checks if the query job looks for an existing result in the query
59
88
  # cache. For more information, see [Query
60
89
  # Caching](https://cloud.google.com/bigquery/querying-data#querycaching).
90
+ #
91
+ # @return [Boolean] `true` when the query cache will be used, `false`
92
+ # otherwise.
93
+ #
61
94
  def cache?
62
95
  val = @gapi.configuration.query.use_query_cache
63
96
  return false if val.nil?
@@ -68,6 +101,10 @@ module Google
68
101
  # Checks if the query job flattens nested and repeated fields in the
69
102
  # query results. The default is `true`. If the value is `false`,
70
103
  # #large_results? should return `true`.
104
+ #
105
+ # @return [Boolean] `true` when the job flattens results, `false`
106
+ # otherwise.
107
+ #
71
108
  def flatten?
72
109
  val = @gapi.configuration.query.flatten_results
73
110
  return true if val.nil?
@@ -75,15 +112,27 @@ module Google
75
112
  end
76
113
 
77
114
  ##
78
- # Limits the billing tier for this job.
79
- # For more information, see [High-Compute
115
+ # Limits the billing tier for this job. Queries that have resource usage
116
+ # beyond this tier will fail (without incurring a charge). If
117
+ # unspecified, this will be set to your project default. For more
118
+ # information, see [High-Compute
80
119
  # queries](https://cloud.google.com/bigquery/pricing#high-compute).
120
+ #
121
+ # @return [Integer, nil] The tier number, or `nil` for the project
122
+ # default.
123
+ #
81
124
  def maximum_billing_tier
82
125
  @gapi.configuration.query.maximum_billing_tier
83
126
  end
84
127
 
85
128
  ##
86
- # Limits the bytes billed for this job.
129
+ # Limits the bytes billed for this job. Queries that will have bytes
130
+ # billed beyond this limit will fail (without incurring a charge). If
131
+ # `nil`, this will be set to your project default.
132
+ #
133
+ # @return [Integer, nil] The number of bytes, or `nil` for the project
134
+ # default.
135
+ #
87
136
  def maximum_bytes_billed
88
137
  Integer @gapi.configuration.query.maximum_bytes_billed
89
138
  rescue
@@ -92,20 +141,62 @@ module Google
92
141
 
93
142
  ##
94
143
  # Checks if the query results are from the query cache.
144
+ #
145
+ # @return [Boolean] `true` when the job statistics indicate a cache hit,
146
+ # `false` otherwise.
147
+ #
95
148
  def cache_hit?
96
149
  @gapi.statistics.query.cache_hit
97
150
  end
98
151
 
99
152
  ##
100
153
  # The number of bytes processed by the query.
154
+ #
155
+ # @return [Integer] Total bytes processed for the job.
156
+ #
101
157
  def bytes_processed
102
158
  Integer @gapi.statistics.query.total_bytes_processed
103
159
  rescue
104
160
  nil
105
161
  end
106
162
 
163
+ ##
164
+ # Describes the execution plan for the query.
165
+ #
166
+ # @return [Array<Google::Cloud::Bigquery::QueryJob::Stage>] An array
167
+ # containing the stages of the execution plan.
168
+ #
169
+ # @example
170
+ # require "google/cloud/bigquery"
171
+ #
172
+ # bigquery = Google::Cloud::Bigquery.new
173
+ #
174
+ # sql = "SELECT word FROM publicdata.samples.shakespeare"
175
+ # job = bigquery.query_job sql
176
+ #
177
+ # job.wait_until_done!
178
+ #
179
+ # stages = job.query_plan
180
+ # stages.each do |stage|
181
+ # puts stage.name
182
+ # stage.steps.each do |step|
183
+ # puts step.kind
184
+ # puts step.substeps.inspect
185
+ # end
186
+ # end
187
+ #
188
+ def query_plan
189
+ return nil unless @gapi.statistics.query.query_plan
190
+ Array(@gapi.statistics.query.query_plan).map do |stage|
191
+ Stage.from_gapi stage
192
+ end
193
+ end
194
+
107
195
  ##
108
196
  # The table in which the query results are stored.
197
+ #
198
+ # @return [Table] A table instance.
199
+ #
109
200
  def destination
110
201
  table = @gapi.configuration.query.destination_table
111
202
  return nil unless table
@@ -116,6 +207,9 @@ module Google
116
207
 
117
208
  ##
118
209
  # Checks if the query job is using legacy sql.
210
+ #
211
+ # @return [Boolean] `true` when legacy sql is used, `false` otherwise.
212
+ #
119
213
  def legacy_sql?
120
214
  val = @gapi.configuration.query.use_legacy_sql
121
215
  return true if val.nil?
@@ -124,10 +218,61 @@ module Google
124
218
 
125
219
  ##
126
220
  # Checks if the query job is using standard sql.
221
+ #
222
+ # @return [Boolean] `true` when standard sql is used, `false` otherwise.
223
+ #
127
224
  def standard_sql?
128
225
  !legacy_sql?
129
226
  end
130
227
 
228
+ ##
229
+ # The user-defined function resources used in the query. May be either a
230
+ # code resource to load from a Google Cloud Storage URI
231
+ # (`gs://bucket/path`), or an inline resource that contains code for a
232
+ # user-defined function (UDF). Providing an inline code resource is
233
+ # equivalent to providing a URI for a file containing the same code. See
234
+ # [User-Defined Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
235
+ #
236
+ # @return [Array<String>] An array containing Google Cloud Storage URIs
237
+ # and/or inline source code.
238
+ #
239
+ def udfs
240
+ udfs_gapi = @gapi.configuration.query.user_defined_function_resources
241
+ return nil unless udfs_gapi
242
+ Array(udfs_gapi).map do |udf|
243
+ udf.inline_code || udf.resource_uri
244
+ end
245
+ end
246
+
247
+ ##
248
+ # Refreshes the job until the job is `DONE`.
249
+ # The delay between refreshes will incrementally increase.
250
+ #
251
+ # @example
252
+ # require "google/cloud/bigquery"
253
+ #
254
+ # bigquery = Google::Cloud::Bigquery.new
255
+ #
256
+ # sql = "SELECT word FROM publicdata.samples.shakespeare"
257
+ # job = bigquery.query_job sql
258
+ #
259
+ # job.wait_until_done!
260
+ # job.done? #=> true
261
+ #
262
+ def wait_until_done!
263
+ return if done?
264
+
265
+ ensure_service!
266
+ loop do
267
+ query_results_gapi = service.job_query_results job_id, max: 0
268
+ if query_results_gapi.job_complete
269
+ @destination_schema_gapi = query_results_gapi.schema
270
+ break
271
+ end
272
+ end
273
+ reload!
274
+ end
275
+
131
276
  ##
132
277
  # Retrieves the query results for the job.
133
278
  #
@@ -135,11 +280,9 @@ module Google
135
280
  # identifying the result set.
136
281
  # @param [Integer] max Maximum number of results to return.
137
282
  # @param [Integer] start Zero-based index of the starting row to read.
138
- # @param [Integer] timeout How long to wait for the query to complete,
139
- # in milliseconds, before returning. Default is 10,000 milliseconds
140
- # (10 seconds).
141
283
  #
142
- # @return [Google::Cloud::Bigquery::QueryData]
284
+ # @return [Google::Cloud::Bigquery::Data] An object providing access to
285
+ # data read from the destination table for the job.
143
286
  #
144
287
  # @example
145
288
  # require "google/cloud/bigquery"
@@ -150,17 +293,184 @@ module Google
150
293
  # job = bigquery.query_job sql
151
294
  #
152
295
  # job.wait_until_done!
153
- # data = job.query_results
296
+ # data = job.data
154
297
  # data.each do |row|
155
298
  # puts row[:word]
156
299
  # end
157
300
  # data = data.next if data.next?
158
301
  #
159
- def query_results token: nil, max: nil, start: nil, timeout: nil
160
- ensure_service!
161
- options = { token: token, max: max, start: start, timeout: timeout }
162
- gapi = service.job_query_results job_id, options
163
- QueryData.from_gapi gapi, service
302
+ def data token: nil, max: nil, start: nil
303
+ return nil unless done?
304
+
305
+ ensure_schema!
306
+
307
+ options = { token: token, max: max, start: start }
308
+ data_gapi = service.list_tabledata destination_table_dataset_id,
309
+ destination_table_table_id, options
310
+ Data.from_gapi data_gapi, destination_table_gapi, service
311
+ end
312
+ alias_method :query_results, :data
313
+
314
+ ##
315
+ # Represents a stage in the execution plan for the query.
316
+ #
317
+ # @attr_reader [Float] compute_ratio_avg Relative amount of time the
318
+ # average shard spent on CPU-bound tasks.
319
+ # @attr_reader [Float] compute_ratio_max Relative amount of time the
320
+ # slowest shard spent on CPU-bound tasks.
321
+ # @attr_reader [Integer] id Unique ID for the stage within the query
322
+ # plan.
323
+ # @attr_reader [String] name Human-readable name for the stage.
324
+ # @attr_reader [Float] read_ratio_avg Relative amount of time the
325
+ # average shard spent reading input.
326
+ # @attr_reader [Float] read_ratio_max Relative amount of time the
327
+ # slowest shard spent reading input.
328
+ # @attr_reader [Integer] records_read Number of records read into the
329
+ # stage.
330
+ # @attr_reader [Integer] records_written Number of records written by
331
+ # the stage.
332
+ # @attr_reader [Array<Step>] steps List of operations within the stage
333
+ # in dependency order (approximately chronological).
334
+ # @attr_reader [Float] wait_ratio_avg Relative amount of time the
335
+ # average shard spent waiting to be scheduled.
336
+ # @attr_reader [Float] wait_ratio_max Relative amount of time the
337
+ # slowest shard spent waiting to be scheduled.
338
+ # @attr_reader [Float] write_ratio_avg Relative amount of time the
339
+ # average shard spent on writing output.
340
+ # @attr_reader [Float] write_ratio_max Relative amount of time the
341
+ # slowest shard spent on writing output.
342
+ #
343
+ # @example
344
+ # require "google/cloud/bigquery"
345
+ #
346
+ # bigquery = Google::Cloud::Bigquery.new
347
+ #
348
+ # sql = "SELECT word FROM publicdata.samples.shakespeare"
349
+ # job = bigquery.query_job sql
350
+ #
351
+ # job.wait_until_done!
352
+ #
353
+ # stages = job.query_plan
354
+ # stages.each do |stage|
355
+ # puts stage.name
356
+ # stage.steps.each do |step|
357
+ # puts step.kind
358
+ # puts step.substeps.inspect
359
+ # end
360
+ # end
361
+ #
362
+ class Stage
363
+ attr_reader :compute_ratio_avg, :compute_ratio_max, :id, :name,
364
+ :read_ratio_avg, :read_ratio_max, :records_read,
365
+ :records_written, :status, :steps, :wait_ratio_avg,
366
+ :wait_ratio_max, :write_ratio_avg, :write_ratio_max
367
+
368
+ ##
369
+ # @private Creates a new Stage instance.
370
+ def initialize compute_ratio_avg, compute_ratio_max, id, name,
371
+ read_ratio_avg, read_ratio_max, records_read,
372
+ records_written, status, steps, wait_ratio_avg,
373
+ wait_ratio_max, write_ratio_avg, write_ratio_max
374
+ @compute_ratio_avg = compute_ratio_avg
375
+ @compute_ratio_max = compute_ratio_max
376
+ @id = id
377
+ @name = name
378
+ @read_ratio_avg = read_ratio_avg
379
+ @read_ratio_max = read_ratio_max
380
+ @records_read = records_read
381
+ @records_written = records_written
382
+ @status = status
383
+ @steps = steps
384
+ @wait_ratio_avg = wait_ratio_avg
385
+ @wait_ratio_max = wait_ratio_max
386
+ @write_ratio_avg = write_ratio_avg
387
+ @write_ratio_max = write_ratio_max
388
+ end
389
+
390
+ ##
391
+ # @private New Stage from a statistics.query.queryPlan element.
392
+ def self.from_gapi gapi
393
+ steps = Array(gapi.steps).map { |g| Step.from_gapi g }
394
+ new gapi.compute_ratio_avg, gapi.compute_ratio_max, gapi.id,
395
+ gapi.name, gapi.read_ratio_avg, gapi.read_ratio_max,
396
+ gapi.records_read, gapi.records_written, gapi.status, steps,
397
+ gapi.wait_ratio_avg, gapi.wait_ratio_max, gapi.write_ratio_avg,
398
+ gapi.write_ratio_max
399
+ end
400
+ end
401
+
402
+ ##
403
+ # Represents an operation in a stage in the execution plan for the
404
+ # query.
405
+ #
406
+ # @attr_reader [String] kind Machine-readable operation type. For a full
407
+ # list of operation types, see [Steps
408
+ # metadata](https://cloud.google.com/bigquery/query-plan-explanation#steps_metadata).
409
+ # @attr_reader [Array<String>] substeps Human-readable stage
410
+ # descriptions.
411
+ #
412
+ # @example
413
+ # require "google/cloud/bigquery"
414
+ #
415
+ # bigquery = Google::Cloud::Bigquery.new
416
+ #
417
+ # sql = "SELECT word FROM publicdata.samples.shakespeare"
418
+ # job = bigquery.query_job sql
419
+ #
420
+ # job.wait_until_done!
421
+ #
422
+ # stages = job.query_plan
423
+ # stages.each do |stage|
424
+ # puts stage.name
425
+ # stage.steps.each do |step|
426
+ # puts step.kind
427
+ # puts step.substeps.inspect
428
+ # end
429
+ # end
430
+ #
431
+ class Step
432
+ attr_reader :kind, :substeps
433
+
434
+ ##
435
+ # @private Creates a new Stage instance.
436
+ def initialize kind, substeps
437
+ @kind = kind
438
+ @substeps = substeps
439
+ end
440
+
441
+ ##
442
+ # @private New Step from a statistics.query.queryPlan[].steps element.
443
+ def self.from_gapi gapi
444
+ new gapi.kind, Array(gapi.substeps)
445
+ end
446
+ end
447
+
448
+ protected
449
+
450
+ def ensure_schema!
451
+ return unless destination_schema.nil?
452
+
453
+ query_results_gapi = service.job_query_results job_id, max: 0
454
+ # fail "unable to retrieve schema" if query_results_gapi.schema.nil?
455
+ @destination_schema_gapi = query_results_gapi.schema
456
+ end
457
+
458
+ def destination_schema
459
+ @destination_schema_gapi
460
+ end
461
+
462
+ def destination_table_dataset_id
463
+ @gapi.configuration.query.destination_table.dataset_id
464
+ end
465
+
466
+ def destination_table_table_id
467
+ @gapi.configuration.query.destination_table.table_id
468
+ end
469
+
470
+ def destination_table_gapi
471
+ Google::Apis::BigqueryV2::Table.new \
472
+ table_reference: @gapi.configuration.query.destination_table,
473
+ schema: destination_schema
164
474
  end
165
475
  end
166
476
  end
@@ -46,6 +46,22 @@ module Google
46
46
  class Schema
47
47
  ##
48
48
  # The fields of the table schema.
49
+ #
50
+ # @return [Array<Field>] An array of field objects.
51
+ #
52
+ # @example
53
+ # require "google/cloud/bigquery"
54
+ #
55
+ # bigquery = Google::Cloud::Bigquery.new
56
+ # dataset = bigquery.dataset "my_dataset"
57
+ # table = dataset.table "my_table"
58
+ #
59
+ # schema = table.schema
60
+ #
61
+ # schema.fields.each do |field|
62
+ # puts field.name
63
+ # end
64
+ #
49
65
  def fields
50
66
  if frozen?
51
67
  Array(@gapi.fields).map { |f| Field.from_gapi(f).freeze }.freeze
@@ -56,12 +72,41 @@ module Google
56
72
 
57
73
  ##
58
74
  # The names of the fields as symbols.
75
+ #
76
+ # @return [Array<Symbol>] An array of column names.
77
+ #
78
+ # @example
79
+ # require "google/cloud/bigquery"
80
+ #
81
+ # bigquery = Google::Cloud::Bigquery.new
82
+ # dataset = bigquery.dataset "my_dataset"
83
+ # table = dataset.create_table "my_table"
84
+ #
85
+ # schema = table.schema
86
+ #
87
+ # schema.headers.each do |header|
88
+ # puts header
89
+ # end
90
+ #
59
91
  def headers
60
92
  fields.map(&:name).map(&:to_sym)
61
93
  end
62
94
 
63
95
  ##
64
- # Retreive a fields by name.
96
+ # Retrieve a field by name.
97
+ #
98
+ # @return [Field] A field object.
99
+ #
100
+ # @example
101
+ # require "google/cloud/bigquery"
102
+ #
103
+ # bigquery = Google::Cloud::Bigquery.new
104
+ # dataset = bigquery.dataset "my_dataset"
105
+ # table = dataset.table "my_table"
106
+ #
107
+ # field = table.schema.field "name"
108
+ # field.required? #=> true
109
+ #
65
110
  def field name
66
111
  f = fields.find { |fld| fld.name == name.to_s }
67
112
  return nil if f.nil?
@@ -71,6 +116,9 @@ module Google
71
116
 
72
117
  ##
73
118
  # Whether the schema has no fields defined.
119
+ #
120
+ # @return [Boolean] `true` when there are no fields, `false` otherwise.
121
+ #
74
122
  def empty?
75
123
  fields.empty?
76
124
  end
@@ -86,6 +134,7 @@ module Google
86
134
  # @param [Symbol] mode The field's mode. The possible values are
87
135
  # `:nullable`, `:required`, and `:repeated`. The default value is
88
136
  # `:nullable`.
137
+ #
89
138
  def string name, description: nil, mode: :nullable
90
139
  add_field name, :string, description: description, mode: mode
91
140
  end
@@ -101,6 +150,7 @@ module Google
101
150
  # @param [Symbol] mode The field's mode. The possible values are
102
151
  # `:nullable`, `:required`, and `:repeated`. The default value is
103
152
  # `:nullable`.
153
+ #
104
154
  def integer name, description: nil, mode: :nullable
105
155
  add_field name, :integer, description: description, mode: mode
106
156
  end
@@ -116,6 +166,7 @@ module Google
116
166
  # @param [Symbol] mode The field's mode. The possible values are
117
167
  # `:nullable`, `:required`, and `:repeated`. The default value is
118
168
  # `:nullable`.
169
+ #
119
170
  def float name, description: nil, mode: :nullable
120
171
  add_field name, :float, description: description, mode: mode
121
172
  end
@@ -131,6 +182,7 @@ module Google
131
182
  # @param [Symbol] mode The field's mode. The possible values are
132
183
  # `:nullable`, `:required`, and `:repeated`. The default value is
133
184
  # `:nullable`.
185
+ #
134
186
  def boolean name, description: nil, mode: :nullable
135
187
  add_field name, :boolean, description: description, mode: mode
136
188
  end
@@ -146,6 +198,7 @@ module Google
146
198
  # @param [Symbol] mode The field's mode. The possible values are
147
199
  # `:nullable`, `:required`, and `:repeated`. The default value is
148
200
  # `:nullable`.
201
+ #
149
202
  def bytes name, description: nil, mode: :nullable
150
203
  add_field name, :bytes, description: description, mode: mode
151
204
  end
@@ -176,6 +229,7 @@ module Google
176
229
  # @param [Symbol] mode The field's mode. The possible values are
177
230
  # `:nullable`, `:required`, and `:repeated`. The default value is
178
231
  # `:nullable`.
232
+ #
179
233
  def time name, description: nil, mode: :nullable
180
234
  add_field name, :time, description: description, mode: mode
181
235
  end
@@ -191,6 +245,7 @@ module Google
191
245
  # @param [Symbol] mode The field's mode. The possible values are
192
246
  # `:nullable`, `:required`, and `:repeated`. The default value is
193
247
  # `:nullable`.
248
+ #
194
249
  def datetime name, description: nil, mode: :nullable
195
250
  add_field name, :datetime, description: description, mode: mode
196
251
  end
@@ -206,6 +261,7 @@ module Google
206
261
  # @param [Symbol] mode The field's mode. The possible values are
207
262
  # `:nullable`, `:required`, and `:repeated`. The default value is
208
263
  # `:nullable`.
264
+ #
209
265
  def date name, description: nil, mode: :nullable
210
266
  add_field name, :date, description: description, mode: mode
211
267
  end