google-cloud-bigquery 1.12.0 → 1.38.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +9 -28
  3. data/CHANGELOG.md +372 -1
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +2 -2
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +154 -170
  10. data/lib/google/cloud/bigquery/copy_job.rb +40 -23
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +322 -51
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset.rb +960 -279
  16. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  17. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  20. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  21. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  22. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  23. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  24. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  25. data/lib/google/cloud/bigquery/external.rb +50 -2256
  26. data/lib/google/cloud/bigquery/extract_job.rb +217 -58
  27. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  28. data/lib/google/cloud/bigquery/job/list.rb +13 -20
  29. data/lib/google/cloud/bigquery/job.rb +286 -11
  30. data/lib/google/cloud/bigquery/load_job.rb +801 -133
  31. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  32. data/lib/google/cloud/bigquery/model.rb +247 -16
  33. data/lib/google/cloud/bigquery/policy.rb +432 -0
  34. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  35. data/lib/google/cloud/bigquery/project.rb +526 -243
  36. data/lib/google/cloud/bigquery/query_job.rb +584 -125
  37. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  38. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  39. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  40. data/lib/google/cloud/bigquery/schema.rb +221 -48
  41. data/lib/google/cloud/bigquery/service.rb +186 -109
  42. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  43. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -42
  44. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  45. data/lib/google/cloud/bigquery/table.rb +1188 -326
  46. data/lib/google/cloud/bigquery/time.rb +6 -0
  47. data/lib/google/cloud/bigquery/version.rb +1 -1
  48. data/lib/google/cloud/bigquery.rb +18 -8
  49. data/lib/google-cloud-bigquery.rb +15 -13
  50. metadata +67 -40
@@ -19,7 +19,7 @@ require "google/cloud/errors"
19
19
  require "google/apis/bigquery_v2"
20
20
  require "pathname"
21
21
  require "securerandom"
22
- require "mime/types"
22
+ require "mini_mime"
23
23
  require "date"
24
24
 
25
25
  module Google
@@ -39,15 +39,17 @@ module Google
39
39
  attr_accessor :credentials
40
40
 
41
41
  # @private
42
- attr_reader :retries, :timeout
42
+ attr_reader :retries, :timeout, :host
43
43
 
44
44
  ##
45
45
  # Creates a new Service instance.
46
- def initialize project, credentials, retries: nil, timeout: nil
46
+ def initialize project, credentials, retries: nil, timeout: nil, host: nil, quota_project: nil
47
47
  @project = project
48
48
  @credentials = credentials
49
49
  @retries = retries
50
50
  @timeout = timeout
51
+ @host = host
52
+ @quota_project = quota_project
51
53
  end
52
54
 
53
55
  def service
@@ -55,8 +57,7 @@ module Google
55
57
  @service ||= begin
56
58
  service = API::BigqueryService.new
57
59
  service.client_options.application_name = "gcloud-ruby"
58
- service.client_options.application_version = \
59
- Google::Cloud::Bigquery::VERSION
60
+ service.client_options.application_version = Google::Cloud::Bigquery::VERSION
60
61
  service.client_options.open_timeout_sec = timeout
61
62
  service.client_options.read_timeout_sec = timeout
62
63
  service.client_options.send_timeout_sec = timeout
@@ -64,7 +65,11 @@ module Google
64
65
  service.request_options.header ||= {}
65
66
  service.request_options.header["x-goog-api-client"] = \
66
67
  "gl-ruby/#{RUBY_VERSION} gccl/#{Google::Cloud::Bigquery::VERSION}"
68
+ service.request_options.query ||= {}
69
+ service.request_options.query["prettyPrint"] = false
70
+ service.request_options.quota_project = @quota_project if @quota_project
67
71
  service.authorization = @credentials.client
72
+ service.root_url = host if host
68
73
  service
69
74
  end
70
75
  end
@@ -77,12 +82,10 @@ module Google
77
82
  ##
78
83
  # Lists all datasets in the specified project to which you have
79
84
  # been granted the READER dataset role.
80
- def list_datasets options = {}
85
+ def list_datasets all: nil, filter: nil, max: nil, token: nil
81
86
  # The list operation is considered idempotent
82
87
  execute backoff: true do
83
- service.list_datasets \
84
- @project, all: options[:all], filter: options[:filter],
85
- max_results: options[:max], page_token: options[:token]
88
+ service.list_datasets @project, all: all, filter: filter, max_results: max, page_token: token
86
89
  end
87
90
  end
88
91
 
@@ -113,8 +116,7 @@ module Google
113
116
  patch_with_backoff = true
114
117
  end
115
118
  execute backoff: patch_with_backoff do
116
- service.patch_dataset @project, dataset_id, patched_dataset_gapi,
117
- options: options
119
+ service.patch_dataset @project, dataset_id, patched_dataset_gapi, options: options
118
120
  end
119
121
  end
120
122
 
@@ -133,15 +135,15 @@ module Google
133
135
  ##
134
136
  # Lists all tables in the specified dataset.
135
137
  # Requires the READER dataset role.
136
- def list_tables dataset_id, options = {}
138
+ def list_tables dataset_id, max: nil, token: nil
137
139
  # The list operation is considered idempotent
138
140
  execute backoff: true do
139
- service.list_tables @project, dataset_id,
140
- max_results: options[:max],
141
- page_token: options[:token]
141
+ service.list_tables @project, dataset_id, max_results: max, page_token: token
142
142
  end
143
143
  end
144
144
 
145
+ ##
146
+ # Gets the specified table resource by full table reference.
145
147
  def get_project_table project_id, dataset_id, table_id
146
148
  # The get operation is considered idempotent
147
149
  execute backoff: true do
@@ -155,10 +157,7 @@ module Google
155
157
  # it only returns the table resource,
156
158
  # which describes the structure of this table.
157
159
  def get_table dataset_id, table_id
158
- # The get operation is considered idempotent
159
- execute backoff: true do
160
- get_project_table @project, dataset_id, table_id
161
- end
160
+ get_project_table @project, dataset_id, table_id
162
161
  end
163
162
 
164
163
  ##
@@ -179,8 +178,35 @@ module Google
179
178
  patch_with_backoff = true
180
179
  end
181
180
  execute backoff: patch_with_backoff do
182
- service.patch_table @project, dataset_id, table_id,
183
- patched_table_gapi, options: options
181
+ service.patch_table @project, dataset_id, table_id, patched_table_gapi, options: options
182
+ end
183
+ end
184
+
185
+ ##
186
+ # Returns Google::Apis::BigqueryV2::Policy
187
+ def get_table_policy dataset_id, table_id
188
+ policy_options = API::GetPolicyOptions.new requested_policy_version: 1
189
+ execute do
190
+ service.get_table_iam_policy table_path(dataset_id, table_id),
191
+ API::GetIamPolicyRequest.new(options: policy_options)
192
+ end
193
+ end
194
+
195
+ ##
196
+ # @param [Google::Apis::BigqueryV2::Policy] new_policy
197
+ def set_table_policy dataset_id, table_id, new_policy
198
+ execute do
199
+ service.set_table_iam_policy table_path(dataset_id, table_id),
200
+ API::SetIamPolicyRequest.new(policy: new_policy)
201
+ end
202
+ end
203
+
204
+ ##
205
+ # Returns Google::Apis::BigqueryV2::TestIamPermissionsResponse
206
+ def test_table_permissions dataset_id, table_id, permissions
207
+ execute do
208
+ service.test_table_iam_permissions table_path(dataset_id, table_id),
209
+ API::TestIamPermissionsRequest.new(permissions: permissions)
184
210
  end
185
211
  end
186
212
 
@@ -193,40 +219,45 @@ module Google
193
219
 
194
220
  ##
195
221
  # Retrieves data from the table.
196
- def list_tabledata dataset_id, table_id, options = {}
222
+ def list_tabledata dataset_id, table_id, max: nil, token: nil, start: nil
197
223
  # The list operation is considered idempotent
198
224
  execute backoff: true do
199
225
  json_txt = service.list_table_data \
200
226
  @project, dataset_id, table_id,
201
- max_results: options.delete(:max),
202
- page_token: options.delete(:token),
203
- start_index: options.delete(:start),
227
+ max_results: max,
228
+ page_token: token,
229
+ start_index: start,
204
230
  options: { skip_deserialization: true }
205
231
  JSON.parse json_txt, symbolize_names: true
206
232
  end
207
233
  end
208
234
 
209
- def insert_tabledata dataset_id, table_id, rows, options = {}
235
+ def insert_tabledata dataset_id, table_id, rows, insert_ids: nil, ignore_unknown: nil, skip_invalid: nil
210
236
  json_rows = Array(rows).map { |row| Convert.to_json_row row }
211
- insert_tabledata_json_rows dataset_id, table_id, json_rows, options
237
+ insert_tabledata_json_rows dataset_id, table_id, json_rows, insert_ids: insert_ids,
238
+ ignore_unknown: ignore_unknown,
239
+ skip_invalid: skip_invalid
212
240
  end
213
241
 
214
- def insert_tabledata_json_rows dataset_id, table_id, json_rows,
215
- options = {}
216
-
217
- rows_and_ids = Array(json_rows).zip Array(options[:insert_ids])
242
+ def insert_tabledata_json_rows dataset_id, table_id, json_rows, insert_ids: nil, ignore_unknown: nil,
243
+ skip_invalid: nil
244
+ rows_and_ids = Array(json_rows).zip Array(insert_ids)
218
245
  insert_rows = rows_and_ids.map do |json_row, insert_id|
219
- insert_id ||= SecureRandom.uuid
220
- {
221
- insertId: insert_id,
222
- json: json_row
223
- }
246
+ if insert_id == :skip
247
+ { json: json_row }
248
+ else
249
+ insert_id ||= SecureRandom.uuid
250
+ {
251
+ insertId: insert_id,
252
+ json: json_row
253
+ }
254
+ end
224
255
  end
225
256
 
226
257
  insert_req = {
227
258
  rows: insert_rows,
228
- ignoreUnknownValues: options[:ignore_unknown],
229
- skipInvalidRows: options[:skip_invalid]
259
+ ignoreUnknownValues: ignore_unknown,
260
+ skipInvalidRows: skip_invalid
230
261
  }.to_json
231
262
 
232
263
  # The insertAll with insertId operation is considered idempotent
@@ -245,27 +276,26 @@ module Google
245
276
  options = { skip_deserialization: true }
246
277
  # The list operation is considered idempotent
247
278
  execute backoff: true do
248
- json_txt = service.list_models @project, dataset_id,
249
- max_results: max,
250
- page_token: token,
251
- options: options
279
+ json_txt = service.list_models @project, dataset_id, max_results: max, page_token: token, options: options
252
280
  JSON.parse json_txt, symbolize_names: true
253
281
  end
254
282
  end
255
283
 
256
- # Gets the specified model resource by model ID.
257
- # This method does not return the data in the model,
258
- # it only returns the model resource,
259
- # which describes the structure of this model.
260
- def get_model dataset_id, model_id
284
+ # Gets the specified model resource by full model reference.
285
+ def get_project_model project_id, dataset_id, model_id
261
286
  # The get operation is considered idempotent
262
287
  execute backoff: true do
263
- json_txt = service.get_model @project, dataset_id, model_id,
264
- options: { skip_deserialization: true }
288
+ json_txt = service.get_model project_id, dataset_id, model_id, options: { skip_deserialization: true }
265
289
  JSON.parse json_txt, symbolize_names: true
266
290
  end
267
291
  end
268
292
 
293
+ # Gets the specified model resource by model ID. This method does not return the data in the model, it only
294
+ # returns the model resource, which describes the structure of this model.
295
+ def get_model dataset_id, model_id
296
+ get_project_model @project, dataset_id, model_id
297
+ end
298
+
269
299
  ##
270
300
  # Updates information in an existing model, replacing fields that
271
301
  # are provided in the submitted model resource.
@@ -278,9 +308,7 @@ module Google
278
308
  patch_with_backoff = true
279
309
  end
280
310
  execute backoff: patch_with_backoff do
281
- json_txt = service.patch_model @project, dataset_id, model_id,
282
- patched_model_gapi,
283
- options: options
311
+ json_txt = service.patch_model @project, dataset_id, model_id, patched_model_gapi, options: options
284
312
  JSON.parse json_txt, symbolize_names: true
285
313
  end
286
314
  end
@@ -292,16 +320,69 @@ module Google
292
320
  execute { service.delete_model @project, dataset_id, model_id }
293
321
  end
294
322
 
323
+ ##
324
+ # Creates a new routine in the dataset.
325
+ def insert_routine dataset_id, new_routine_gapi
326
+ execute { service.insert_routine @project, dataset_id, new_routine_gapi }
327
+ end
328
+
329
+ ##
330
+ # Lists all routines in the specified dataset.
331
+ # Requires the READER dataset role.
332
+ # Unless readMask is set in the request, only the following fields are populated:
333
+ # etag, projectId, datasetId, routineId, routineType, creationTime, lastModifiedTime, and language.
334
+ def list_routines dataset_id, max: nil, token: nil, filter: nil
335
+ # The list operation is considered idempotent
336
+ execute backoff: true do
337
+ service.list_routines @project, dataset_id, max_results: max,
338
+ page_token: token,
339
+ filter: filter
340
+ end
341
+ end
342
+
343
+ ##
344
+ # Gets the specified routine resource by routine ID.
345
+ def get_routine dataset_id, routine_id
346
+ # The get operation is considered idempotent
347
+ execute backoff: true do
348
+ service.get_routine @project, dataset_id, routine_id
349
+ end
350
+ end
351
+
352
+ ##
353
+ # Updates information in an existing routine, replacing the entire routine resource.
354
+ def update_routine dataset_id, routine_id, new_routine_gapi
355
+ update_with_backoff = false
356
+ options = {}
357
+ if new_routine_gapi.etag
358
+ options[:header] = { "If-Match" => new_routine_gapi.etag }
359
+ # The update with etag operation is considered idempotent
360
+ update_with_backoff = true
361
+ end
362
+ execute backoff: update_with_backoff do
363
+ service.update_routine @project, dataset_id, routine_id, new_routine_gapi, options: options
364
+ end
365
+ end
366
+
367
+ ##
368
+ # Deletes the routine specified by routine_id from the dataset.
369
+ def delete_routine dataset_id, routine_id
370
+ execute { service.delete_routine @project, dataset_id, routine_id }
371
+ end
372
+
295
373
  ##
296
374
  # Lists all jobs in the specified project to which you have
297
375
  # been granted the READER job role.
298
- def list_jobs options = {}
376
+ def list_jobs all: nil, token: nil, max: nil, filter: nil, min_created_at: nil, max_created_at: nil,
377
+ parent_job_id: nil
299
378
  # The list operation is considered idempotent
379
+ min_creation_time = Convert.time_to_millis min_created_at
380
+ max_creation_time = Convert.time_to_millis max_created_at
300
381
  execute backoff: true do
301
- service.list_jobs \
302
- @project, all_users: options[:all], max_results: options[:max],
303
- page_token: options[:token], projection: "full",
304
- state_filter: options[:filter]
382
+ service.list_jobs @project, all_users: all, max_results: max,
383
+ page_token: token, projection: "full", state_filter: filter,
384
+ min_creation_time: min_creation_time, max_creation_time: max_creation_time,
385
+ parent_job_id: parent_job_id
305
386
  end
306
387
  end
307
388
 
@@ -324,10 +405,7 @@ module Google
324
405
  end
325
406
 
326
407
  def insert_job config, location: nil
327
- job_object = API::Job.new(
328
- job_reference: job_ref_from(nil, nil, location: location),
329
- configuration: config
330
- )
408
+ job_object = API::Job.new job_reference: job_ref_from(nil, nil, location: location), configuration: config
331
409
  # Jobs have generated id, so this operation is considered idempotent
332
410
  execute backoff: true do
333
411
  service.insert_job @project, job_object
@@ -340,18 +418,25 @@ module Google
340
418
  end
341
419
  end
342
420
 
421
+ ##
422
+ # Deletes the job specified by jobId and location (required).
423
+ def delete_job job_id, location: nil
424
+ execute do
425
+ service.delete_job @project, job_id, location: location
426
+ end
427
+ end
428
+
343
429
  ##
344
430
  # Returns the query data for the job
345
- def job_query_results job_id, options = {}
431
+ def job_query_results job_id, location: nil, max: nil, token: nil, start: nil, timeout: nil
346
432
  # The get operation is considered idempotent
347
433
  execute backoff: true do
348
- service.get_job_query_results \
349
- @project, job_id,
350
- location: options.delete(:location),
351
- max_results: options.delete(:max),
352
- page_token: options.delete(:token),
353
- start_index: options.delete(:start),
354
- timeout_ms: options.delete(:timeout)
434
+ service.get_job_query_results @project, job_id,
435
+ location: location,
436
+ max_results: max,
437
+ page_token: token,
438
+ start_index: start,
439
+ timeout_ms: timeout
355
440
  end
356
441
  end
357
442
 
@@ -375,10 +460,7 @@ module Google
375
460
 
376
461
  def load_table_file file, load_job_gapi
377
462
  execute backoff: true do
378
- service.insert_job \
379
- @project,
380
- load_job_gapi,
381
- upload_source: file, content_type: mime_type_for(file)
463
+ service.insert_job @project, load_job_gapi, upload_source: file, content_type: mime_type_for(file)
382
464
  end
383
465
  end
384
466
 
@@ -402,34 +484,29 @@ module Google
402
484
  def self.table_ref_from_s str, default_ref: {}
403
485
  str = str.to_s
404
486
  m = /\A(((?<prj>\S*)(:|\.))?(?<dts>\S*)\.)?(?<tbl>\S*)\z/.match str
405
- unless m
406
- raise ArgumentError, "unable to identify table from #{str.inspect}"
407
- end
487
+ raise ArgumentError, "unable to identify table from #{str.inspect}" unless m
408
488
  str_table_ref_hash = {
409
489
  project_id: m["prj"],
410
490
  dataset_id: m["dts"],
411
491
  table_id: m["tbl"]
412
492
  }.delete_if { |_, v| v.nil? }
413
493
  str_table_ref_hash = default_ref.to_h.merge str_table_ref_hash
414
- ref = Google::Apis::BigqueryV2::TableReference.new str_table_ref_hash
494
+ ref = Google::Apis::BigqueryV2::TableReference.new(**str_table_ref_hash)
415
495
  validate_table_ref ref
416
496
  ref
417
497
  end
418
498
 
419
499
  def self.validate_table_ref table_ref
420
- %i[project_id dataset_id table_id].each do |f|
421
- if table_ref.send(f).nil?
422
- raise ArgumentError, "TableReference is missing #{f}"
423
- end
500
+ [:project_id, :dataset_id, :table_id].each do |f|
501
+ raise ArgumentError, "TableReference is missing #{f}" if table_ref.send(f).nil?
424
502
  end
425
503
  end
426
504
 
427
505
  ##
428
506
  # Lists all projects to which you have been granted any project role.
429
- def list_projects options = {}
507
+ def list_projects max: nil, token: nil
430
508
  execute backoff: true do
431
- service.list_projects max_results: options[:max],
432
- page_token: options[:token]
509
+ service.list_projects max_results: max, page_token: token
433
510
  end
434
511
  end
435
512
 
@@ -439,10 +516,7 @@ module Google
439
516
  def job_ref_from job_id, prefix, location: nil
440
517
  prefix ||= "job_"
441
518
  job_id ||= "#{prefix}#{generate_id}"
442
- job_ref = API::JobReference.new(
443
- project_id: @project,
444
- job_id: job_id
445
- )
519
+ job_ref = API::JobReference.new project_id: @project, job_id: job_id
446
520
  # BigQuery does not allow nil location, but missing is ok.
447
521
  job_ref.location = location if location
448
522
  job_ref
@@ -470,22 +544,27 @@ module Google
470
544
 
471
545
  protected
472
546
 
547
+ # Creates a formatted table path.
548
+ def table_path dataset_id, table_id
549
+ "projects/#{@project}/datasets/#{dataset_id}/tables/#{table_id}"
550
+ end
551
+
473
552
  # Generate a random string similar to the BigQuery service job IDs.
474
553
  def generate_id
475
554
  SecureRandom.urlsafe_base64 21
476
555
  end
477
556
 
478
557
  def mime_type_for file
479
- mime_type = MIME::Types.of(Pathname(file).to_path).first.to_s
480
- return nil if mime_type.empty?
481
- mime_type
558
+ mime_type = MiniMime.lookup_by_filename Pathname(file).to_path
559
+ return nil if mime_type.nil?
560
+ mime_type.content_type
482
561
  rescue StandardError
483
562
  nil
484
563
  end
485
564
 
486
- def execute backoff: nil
565
+ def execute backoff: nil, &block
487
566
  if backoff
488
- Backoff.new(retries: retries).execute { yield }
567
+ Backoff.new(retries: retries).execute(&block)
489
568
  else
490
569
  yield
491
570
  end
@@ -500,7 +579,7 @@ module Google
500
579
  attr_accessor :backoff
501
580
  end
502
581
  self.retries = 5
503
- self.reasons = %w[rateLimitExceeded backendError]
582
+ self.reasons = ["rateLimitExceeded", "backendError"]
504
583
  self.backoff = lambda do |retries|
505
584
  # Max delay is 32 seconds
506
585
  # See "Back-off Requirements" here:
@@ -510,31 +589,29 @@ module Google
510
589
  sleep delay
511
590
  end
512
591
 
513
- def initialize options = {}
514
- @retries = (options[:retries] || Backoff.retries).to_i
515
- @reasons = (options[:reasons] || Backoff.reasons).to_a
516
- @backoff = options[:backoff] || Backoff.backoff
592
+ def initialize retries: nil, reasons: nil, backoff: nil
593
+ @retries = (retries || Backoff.retries).to_i
594
+ @reasons = (reasons || Backoff.reasons).to_a
595
+ @backoff = backoff || Backoff.backoff
517
596
  end
518
597
 
519
598
  def execute
520
599
  current_retries = 0
521
600
  loop do
522
- begin
523
- return yield
524
- rescue Google::Apis::Error => e
525
- raise e unless retry? e.body, current_retries
526
-
527
- @backoff.call current_retries
528
- current_retries += 1
529
- end
601
+ return yield
602
+ rescue Google::Apis::Error => e
603
+ raise e unless retry? e.body, current_retries
604
+
605
+ @backoff.call current_retries
606
+ current_retries += 1
530
607
  end
531
608
  end
532
609
 
533
610
  protected
534
611
 
535
- def retry? result, current_retries #:nodoc:
536
- if current_retries < @retries
537
- return true if retry_error_reason? result
612
+ def retry? result, current_retries
613
+ if current_retries < @retries && retry_error_reason?(result)
614
+ return true
538
615
  end
539
616
  false
540
617
  end