google-cloud-bigquery 0.28.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,7 @@
14
14
 
15
15
 
16
16
  require "google/cloud/bigquery/service"
17
+ require "google/cloud/bigquery/data"
17
18
 
18
19
  module Google
19
20
  module Cloud
@@ -29,9 +30,29 @@ module Google
29
30
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
30
31
  # reference
31
32
  #
33
+ # @example
34
+ # require "google/cloud/bigquery"
35
+ #
36
+ # bigquery = Google::Cloud::Bigquery.new
37
+ #
38
+ # job = bigquery.query_job "SELECT COUNT(word) as count FROM " \
39
+ # "publicdata.samples.shakespeare"
40
+ #
41
+ # job.wait_until_done!
42
+ #
43
+ # if job.failed?
44
+ # puts job.error
45
+ # else
46
+ # puts job.data.first
47
+ # end
48
+ #
32
49
  class QueryJob < Job
33
50
  ##
34
51
  # Checks if the priority for the query is `BATCH`.
52
+ #
53
+ # @return [Boolean] `true` when the priority is `BATCH`, `false`
54
+ # otherwise.
55
+ #
35
56
  def batch?
36
57
  val = @gapi.configuration.query.priority
37
58
  val == "BATCH"
@@ -39,6 +60,10 @@ module Google
39
60
 
40
61
  ##
41
62
  # Checks if the priority for the query is `INTERACTIVE`.
63
+ #
64
+ # @return [Boolean] `true` when the priority is `INTERACTIVE`, `false`
65
+ # otherwise.
66
+ #
42
67
  def interactive?
43
68
  val = @gapi.configuration.query.priority
44
69
  return true if val.nil?
@@ -48,6 +73,10 @@ module Google
48
73
  ##
49
74
  # Checks if the the query job allows arbitrarily large results at a
50
75
  # slight cost to performance.
76
+ #
77
+ # @return [Boolean] `true` when large results are allowed, `false`
78
+ # otherwise.
79
+ #
51
80
  def large_results?
52
81
  val = @gapi.configuration.query.allow_large_results
53
82
  return false if val.nil?
@@ -58,6 +87,10 @@ module Google
58
87
  # Checks if the query job looks for an existing result in the query
59
88
  # cache. For more information, see [Query
60
89
  # Caching](https://cloud.google.com/bigquery/querying-data#querycaching).
90
+ #
91
+ # @return [Boolean] `true` when the query cache will be used, `false`
92
+ # otherwise.
93
+ #
61
94
  def cache?
62
95
  val = @gapi.configuration.query.use_query_cache
63
96
  return false if val.nil?
@@ -68,6 +101,10 @@ module Google
68
101
  # Checks if the query job flattens nested and repeated fields in the
69
102
  # query results. The default is `true`. If the value is `false`,
70
103
  # #large_results? should return `true`.
104
+ #
105
+ # @return [Boolean] `true` when the job flattens results, `false`
106
+ # otherwise.
107
+ #
71
108
  def flatten?
72
109
  val = @gapi.configuration.query.flatten_results
73
110
  return true if val.nil?
@@ -75,15 +112,27 @@ module Google
75
112
  end
76
113
 
77
114
  ##
78
- # Limits the billing tier for this job.
79
- # For more information, see [High-Compute
115
+ # Limits the billing tier for this job. Queries that have resource usage
116
+ # beyond this tier will fail (without incurring a charge). If
117
+ # unspecified, this will be set to your project default. For more
118
+ # information, see [High-Compute
80
119
  # queries](https://cloud.google.com/bigquery/pricing#high-compute).
120
+ #
121
+ # @return [Integer, nil] The tier number, or `nil` for the project
122
+ # default.
123
+ #
81
124
  def maximum_billing_tier
82
125
  @gapi.configuration.query.maximum_billing_tier
83
126
  end
84
127
 
85
128
  ##
86
- # Limits the bytes billed for this job.
129
+ # Limits the bytes billed for this job. Queries that will have bytes
130
+ # billed beyond this limit will fail (without incurring a charge). If
131
+ # `nil`, this will be set to your project default.
132
+ #
133
+ # @return [Integer, nil] The number of bytes, or `nil` for the project
134
+ # default.
135
+ #
87
136
  def maximum_bytes_billed
88
137
  Integer @gapi.configuration.query.maximum_bytes_billed
89
138
  rescue
@@ -92,20 +141,62 @@ module Google
92
141
 
93
142
  ##
94
143
  # Checks if the query results are from the query cache.
144
+ #
145
+ # @return [Boolean] `true` when the job statistics indicate a cache hit,
146
+ # `false` otherwise.
147
+ #
95
148
  def cache_hit?
96
149
  @gapi.statistics.query.cache_hit
97
150
  end
98
151
 
99
152
  ##
100
153
  # The number of bytes processed by the query.
154
+ #
155
+ # @return [Integer] Total bytes processed for the job.
156
+ #
101
157
  def bytes_processed
102
158
  Integer @gapi.statistics.query.total_bytes_processed
103
159
  rescue
104
160
  nil
105
161
  end
106
162
 
163
+ ##
164
+ # Describes the execution plan for the query.
165
+ #
166
+ # @return [Array<Google::Cloud::Bigquery::QueryJob::Stage>] An array
167
+ # containing the stages of the execution plan.
168
+ #
169
+ # @example
170
+ # require "google/cloud/bigquery"
171
+ #
172
+ # bigquery = Google::Cloud::Bigquery.new
173
+ #
174
+ # sql = "SELECT word FROM publicdata.samples.shakespeare"
175
+ # job = bigquery.query_job sql
176
+ #
177
+ # job.wait_until_done!
178
+ #
179
+ # stages = job.query_plan
180
+ # stages.each do |stage|
181
+ # puts stage.name
182
+ # stage.steps.each do |step|
183
+ # puts step.kind
184
+ # puts step.substeps.inspect
185
+ # end
186
+ # end
187
+ #
188
+ def query_plan
189
+ return nil unless @gapi.statistics.query.query_plan
190
+ Array(@gapi.statistics.query.query_plan).map do |stage|
191
+ Stage.from_gapi stage
192
+ end
193
+ end
194
+
107
195
  ##
108
196
  # The table in which the query results are stored.
197
+ #
198
+ # @return [Table] A table instance.
199
+ #
109
200
  def destination
110
201
  table = @gapi.configuration.query.destination_table
111
202
  return nil unless table
@@ -116,6 +207,9 @@ module Google
116
207
 
117
208
  ##
118
209
  # Checks if the query job is using legacy sql.
210
+ #
211
+ # @return [Boolean] `true` when legacy sql is used, `false` otherwise.
212
+ #
119
213
  def legacy_sql?
120
214
  val = @gapi.configuration.query.use_legacy_sql
121
215
  return true if val.nil?
@@ -124,10 +218,61 @@ module Google
124
218
 
125
219
  ##
126
220
  # Checks if the query job is using standard sql.
221
+ #
222
+ # @return [Boolean] `true` when standard sql is used, `false` otherwise.
223
+ #
127
224
  def standard_sql?
128
225
  !legacy_sql?
129
226
  end
130
227
 
228
+ ##
229
+ # The user-defined function resources used in the query. May be either a
230
+ # code resource to load from a Google Cloud Storage URI
231
+ # (`gs://bucket/path`), or an inline resource that contains code for a
232
+ # user-defined function (UDF). Providing an inline code resource is
233
+ # equivalent to providing a URI for a file containing the same code. See
234
+ # [User-Defined Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
235
+ #
236
+ # @return [Array<String>] An array containing Google Cloud Storage URIs
237
+ # and/or inline source code.
238
+ #
239
+ def udfs
240
+ udfs_gapi = @gapi.configuration.query.user_defined_function_resources
241
+ return nil unless udfs_gapi
242
+ Array(udfs_gapi).map do |udf|
243
+ udf.inline_code || udf.resource_uri
244
+ end
245
+ end
246
+
247
+ ##
248
+ # Refreshes the job until the job is `DONE`.
249
+ # The delay between refreshes will incrementally increase.
250
+ #
251
+ # @example
252
+ # require "google/cloud/bigquery"
253
+ #
254
+ # bigquery = Google::Cloud::Bigquery.new
255
+ #
256
+ # sql = "SELECT word FROM publicdata.samples.shakespeare"
257
+ # job = bigquery.query_job sql
258
+ #
259
+ # job.wait_until_done!
260
+ # job.done? #=> true
261
+ #
262
+ def wait_until_done!
263
+ return if done?
264
+
265
+ ensure_service!
266
+ loop do
267
+ query_results_gapi = service.job_query_results job_id, max: 0
268
+ if query_results_gapi.job_complete
269
+ @destination_schema_gapi = query_results_gapi.schema
270
+ break
271
+ end
272
+ end
273
+ reload!
274
+ end
275
+
131
276
  ##
132
277
  # Retrieves the query results for the job.
133
278
  #
@@ -135,11 +280,9 @@ module Google
135
280
  # identifying the result set.
136
281
  # @param [Integer] max Maximum number of results to return.
137
282
  # @param [Integer] start Zero-based index of the starting row to read.
138
- # @param [Integer] timeout How long to wait for the query to complete,
139
- # in milliseconds, before returning. Default is 10,000 milliseconds
140
- # (10 seconds).
141
283
  #
142
- # @return [Google::Cloud::Bigquery::QueryData]
284
+ # @return [Google::Cloud::Bigquery::Data] An object providing access to
285
+ # data read from the destination table for the job.
143
286
  #
144
287
  # @example
145
288
  # require "google/cloud/bigquery"
@@ -150,17 +293,184 @@ module Google
150
293
  # job = bigquery.query_job sql
151
294
  #
152
295
  # job.wait_until_done!
153
- # data = job.query_results
296
+ # data = job.data
154
297
  # data.each do |row|
155
298
  # puts row[:word]
156
299
  # end
157
300
  # data = data.next if data.next?
158
301
  #
159
- def query_results token: nil, max: nil, start: nil, timeout: nil
160
- ensure_service!
161
- options = { token: token, max: max, start: start, timeout: timeout }
162
- gapi = service.job_query_results job_id, options
163
- QueryData.from_gapi gapi, service
302
+ def data token: nil, max: nil, start: nil
303
+ return nil unless done?
304
+
305
+ ensure_schema!
306
+
307
+ options = { token: token, max: max, start: start }
308
+ data_gapi = service.list_tabledata destination_table_dataset_id,
309
+ destination_table_table_id, options
310
+ Data.from_gapi data_gapi, destination_table_gapi, service
311
+ end
312
+ alias_method :query_results, :data
313
+
314
+ ##
315
+ # Represents a stage in the execution plan for the query.
316
+ #
317
+ # @attr_reader [Float] compute_ratio_avg Relative amount of time the
318
+ # average shard spent on CPU-bound tasks.
319
+ # @attr_reader [Float] compute_ratio_max Relative amount of time the
320
+ # slowest shard spent on CPU-bound tasks.
321
+ # @attr_reader [Integer] id Unique ID for the stage within the query
322
+ # plan.
323
+ # @attr_reader [String] name Human-readable name for the stage.
324
+ # @attr_reader [Float] read_ratio_avg Relative amount of time the
325
+ # average shard spent reading input.
326
+ # @attr_reader [Float] read_ratio_max Relative amount of time the
327
+ # slowest shard spent reading input.
328
+ # @attr_reader [Integer] records_read Number of records read into the
329
+ # stage.
330
+ # @attr_reader [Integer] records_written Number of records written by
331
+ # the stage.
332
+ # @attr_reader [Array<Step>] steps List of operations within the stage
333
+ # in dependency order (approximately chronological).
334
+ # @attr_reader [Float] wait_ratio_avg Relative amount of time the
335
+ # average shard spent waiting to be scheduled.
336
+ # @attr_reader [Float] wait_ratio_max Relative amount of time the
337
+ # slowest shard spent waiting to be scheduled.
338
+ # @attr_reader [Float] write_ratio_avg Relative amount of time the
339
+ # average shard spent on writing output.
340
+ # @attr_reader [Float] write_ratio_max Relative amount of time the
341
+ # slowest shard spent on writing output.
342
+ #
343
+ # @example
344
+ # require "google/cloud/bigquery"
345
+ #
346
+ # bigquery = Google::Cloud::Bigquery.new
347
+ #
348
+ # sql = "SELECT word FROM publicdata.samples.shakespeare"
349
+ # job = bigquery.query_job sql
350
+ #
351
+ # job.wait_until_done!
352
+ #
353
+ # stages = job.query_plan
354
+ # stages.each do |stage|
355
+ # puts stage.name
356
+ # stage.steps.each do |step|
357
+ # puts step.kind
358
+ # puts step.substeps.inspect
359
+ # end
360
+ # end
361
+ #
362
+ class Stage
363
+ attr_reader :compute_ratio_avg, :compute_ratio_max, :id, :name,
364
+ :read_ratio_avg, :read_ratio_max, :records_read,
365
+ :records_written, :status, :steps, :wait_ratio_avg,
366
+ :wait_ratio_max, :write_ratio_avg, :write_ratio_max
367
+
368
+ ##
369
+ # @private Creates a new Stage instance.
370
+ def initialize compute_ratio_avg, compute_ratio_max, id, name,
371
+ read_ratio_avg, read_ratio_max, records_read,
372
+ records_written, status, steps, wait_ratio_avg,
373
+ wait_ratio_max, write_ratio_avg, write_ratio_max
374
+ @compute_ratio_avg = compute_ratio_avg
375
+ @compute_ratio_max = compute_ratio_max
376
+ @id = id
377
+ @name = name
378
+ @read_ratio_avg = read_ratio_avg
379
+ @read_ratio_max = read_ratio_max
380
+ @records_read = records_read
381
+ @records_written = records_written
382
+ @status = status
383
+ @steps = steps
384
+ @wait_ratio_avg = wait_ratio_avg
385
+ @wait_ratio_max = wait_ratio_max
386
+ @write_ratio_avg = write_ratio_avg
387
+ @write_ratio_max = write_ratio_max
388
+ end
389
+
390
+ ##
391
+ # @private New Stage from a statistics.query.queryPlan element.
392
+ def self.from_gapi gapi
393
+ steps = Array(gapi.steps).map { |g| Step.from_gapi g }
394
+ new gapi.compute_ratio_avg, gapi.compute_ratio_max, gapi.id,
395
+ gapi.name, gapi.read_ratio_avg, gapi.read_ratio_max,
396
+ gapi.records_read, gapi.records_written, gapi.status, steps,
397
+ gapi.wait_ratio_avg, gapi.wait_ratio_max, gapi.write_ratio_avg,
398
+ gapi.write_ratio_max
399
+ end
400
+ end
401
+
402
+ ##
403
+ # Represents an operation in a stage in the execution plan for the
404
+ # query.
405
+ #
406
+ # @attr_reader [String] kind Machine-readable operation type. For a full
407
+ # list of operation types, see [Steps
408
+ # metadata](https://cloud.google.com/bigquery/query-plan-explanation#steps_metadata).
409
+ # @attr_reader [Array<String>] substeps Human-readable stage
410
+ # descriptions.
411
+ #
412
+ # @example
413
+ # require "google/cloud/bigquery"
414
+ #
415
+ # bigquery = Google::Cloud::Bigquery.new
416
+ #
417
+ # sql = "SELECT word FROM publicdata.samples.shakespeare"
418
+ # job = bigquery.query_job sql
419
+ #
420
+ # job.wait_until_done!
421
+ #
422
+ # stages = job.query_plan
423
+ # stages.each do |stage|
424
+ # puts stage.name
425
+ # stage.steps.each do |step|
426
+ # puts step.kind
427
+ # puts step.substeps.inspect
428
+ # end
429
+ # end
430
+ #
431
+ class Step
432
+ attr_reader :kind, :substeps
433
+
434
+ ##
435
+ # @private Creates a new Stage instance.
436
+ def initialize kind, substeps
437
+ @kind = kind
438
+ @substeps = substeps
439
+ end
440
+
441
+ ##
442
+ # @private New Step from a statistics.query.queryPlan[].steps element.
443
+ def self.from_gapi gapi
444
+ new gapi.kind, Array(gapi.substeps)
445
+ end
446
+ end
447
+
448
+ protected
449
+
450
+ def ensure_schema!
451
+ return unless destination_schema.nil?
452
+
453
+ query_results_gapi = service.job_query_results job_id, max: 0
454
+ # fail "unable to retrieve schema" if query_results_gapi.schema.nil?
455
+ @destination_schema_gapi = query_results_gapi.schema
456
+ end
457
+
458
+ def destination_schema
459
+ @destination_schema_gapi
460
+ end
461
+
462
+ def destination_table_dataset_id
463
+ @gapi.configuration.query.destination_table.dataset_id
464
+ end
465
+
466
+ def destination_table_table_id
467
+ @gapi.configuration.query.destination_table.table_id
468
+ end
469
+
470
+ def destination_table_gapi
471
+ Google::Apis::BigqueryV2::Table.new \
472
+ table_reference: @gapi.configuration.query.destination_table,
473
+ schema: destination_schema
164
474
  end
165
475
  end
166
476
  end
@@ -46,6 +46,22 @@ module Google
46
46
  class Schema
47
47
  ##
48
48
  # The fields of the table schema.
49
+ #
50
+ # @return [Array<Field>] An array of field objects.
51
+ #
52
+ # @example
53
+ # require "google/cloud/bigquery"
54
+ #
55
+ # bigquery = Google::Cloud::Bigquery.new
56
+ # dataset = bigquery.dataset "my_dataset"
57
+ # table = dataset.table "my_table"
58
+ #
59
+ # schema = table.schema
60
+ #
61
+ # schema.fields.each do |field|
62
+ # puts field.name
63
+ # end
64
+ #
49
65
  def fields
50
66
  if frozen?
51
67
  Array(@gapi.fields).map { |f| Field.from_gapi(f).freeze }.freeze
@@ -56,12 +72,41 @@ module Google
56
72
 
57
73
  ##
58
74
  # The names of the fields as symbols.
75
+ #
76
+ # @return [Array<Symbol>] An array of column names.
77
+ #
78
+ # @example
79
+ # require "google/cloud/bigquery"
80
+ #
81
+ # bigquery = Google::Cloud::Bigquery.new
82
+ # dataset = bigquery.dataset "my_dataset"
83
+ # table = dataset.create_table "my_table"
84
+ #
85
+ # schema = table.schema
86
+ #
87
+ # schema.headers.each do |header|
88
+ # puts header
89
+ # end
90
+ #
59
91
  def headers
60
92
  fields.map(&:name).map(&:to_sym)
61
93
  end
62
94
 
63
95
  ##
64
- # Retreive a fields by name.
96
+ # Retrieve a field by name.
97
+ #
98
+ # @return [Field] A field object.
99
+ #
100
+ # @example
101
+ # require "google/cloud/bigquery"
102
+ #
103
+ # bigquery = Google::Cloud::Bigquery.new
104
+ # dataset = bigquery.dataset "my_dataset"
105
+ # table = dataset.table "my_table"
106
+ #
107
+ # field = table.schema.field "name"
108
+ # field.required? #=> true
109
+ #
65
110
  def field name
66
111
  f = fields.find { |fld| fld.name == name.to_s }
67
112
  return nil if f.nil?
@@ -71,6 +116,9 @@ module Google
71
116
 
72
117
  ##
73
118
  # Whether the schema has no fields defined.
119
+ #
120
+ # @return [Boolean] `true` when there are no fields, `false` otherwise.
121
+ #
74
122
  def empty?
75
123
  fields.empty?
76
124
  end
@@ -86,6 +134,7 @@ module Google
86
134
  # @param [Symbol] mode The field's mode. The possible values are
87
135
  # `:nullable`, `:required`, and `:repeated`. The default value is
88
136
  # `:nullable`.
137
+ #
89
138
  def string name, description: nil, mode: :nullable
90
139
  add_field name, :string, description: description, mode: mode
91
140
  end
@@ -101,6 +150,7 @@ module Google
101
150
  # @param [Symbol] mode The field's mode. The possible values are
102
151
  # `:nullable`, `:required`, and `:repeated`. The default value is
103
152
  # `:nullable`.
153
+ #
104
154
  def integer name, description: nil, mode: :nullable
105
155
  add_field name, :integer, description: description, mode: mode
106
156
  end
@@ -116,6 +166,7 @@ module Google
116
166
  # @param [Symbol] mode The field's mode. The possible values are
117
167
  # `:nullable`, `:required`, and `:repeated`. The default value is
118
168
  # `:nullable`.
169
+ #
119
170
  def float name, description: nil, mode: :nullable
120
171
  add_field name, :float, description: description, mode: mode
121
172
  end
@@ -131,6 +182,7 @@ module Google
131
182
  # @param [Symbol] mode The field's mode. The possible values are
132
183
  # `:nullable`, `:required`, and `:repeated`. The default value is
133
184
  # `:nullable`.
185
+ #
134
186
  def boolean name, description: nil, mode: :nullable
135
187
  add_field name, :boolean, description: description, mode: mode
136
188
  end
@@ -146,6 +198,7 @@ module Google
146
198
  # @param [Symbol] mode The field's mode. The possible values are
147
199
  # `:nullable`, `:required`, and `:repeated`. The default value is
148
200
  # `:nullable`.
201
+ #
149
202
  def bytes name, description: nil, mode: :nullable
150
203
  add_field name, :bytes, description: description, mode: mode
151
204
  end
@@ -176,6 +229,7 @@ module Google
176
229
  # @param [Symbol] mode The field's mode. The possible values are
177
230
  # `:nullable`, `:required`, and `:repeated`. The default value is
178
231
  # `:nullable`.
232
+ #
179
233
  def time name, description: nil, mode: :nullable
180
234
  add_field name, :time, description: description, mode: mode
181
235
  end
@@ -191,6 +245,7 @@ module Google
191
245
  # @param [Symbol] mode The field's mode. The possible values are
192
246
  # `:nullable`, `:required`, and `:repeated`. The default value is
193
247
  # `:nullable`.
248
+ #
194
249
  def datetime name, description: nil, mode: :nullable
195
250
  add_field name, :datetime, description: description, mode: mode
196
251
  end
@@ -206,6 +261,7 @@ module Google
206
261
  # @param [Symbol] mode The field's mode. The possible values are
207
262
  # `:nullable`, `:required`, and `:repeated`. The default value is
208
263
  # `:nullable`.
264
+ #
209
265
  def date name, description: nil, mode: :nullable
210
266
  add_field name, :date, description: description, mode: mode
211
267
  end