google-cloud-bigquery 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,8 +40,8 @@ module Google
40
40
  MODES = %w[NULLABLE REQUIRED REPEATED].freeze
41
41
 
42
42
  # @private
43
- TYPES = %w[STRING INTEGER FLOAT BOOLEAN BYTES TIMESTAMP TIME DATETIME
44
- DATE RECORD].freeze
43
+ TYPES = %w[STRING INTEGER INT64 FLOAT FLOAT64 BOOLEAN BOOL BYTES
44
+ TIMESTAMP TIME DATETIME DATE RECORD STRUCT].freeze
45
45
 
46
46
  ##
47
47
  # The name of the field.
@@ -164,93 +164,93 @@ module Google
164
164
  end
165
165
 
166
166
  ##
167
- # Checks if the mode of the field is `STRING`.
167
+ # Checks if the type of the field is `STRING`.
168
168
  #
169
169
  # @return [Boolean] `true` when `STRING`, `false` otherwise.
170
170
  #
171
171
  def string?
172
- mode == "STRING"
172
+ type == "STRING"
173
173
  end
174
174
 
175
175
  ##
176
- # Checks if the mode of the field is `INTEGER`.
176
+ # Checks if the type of the field is `INTEGER`.
177
177
  #
178
178
  # @return [Boolean] `true` when `INTEGER`, `false` otherwise.
179
179
  #
180
180
  def integer?
181
- mode == "INTEGER"
181
+ type == "INTEGER" || type == "INT64"
182
182
  end
183
183
 
184
184
  ##
185
- # Checks if the mode of the field is `FLOAT`.
185
+ # Checks if the type of the field is `FLOAT`.
186
186
  #
187
187
  # @return [Boolean] `true` when `FLOAT`, `false` otherwise.
188
188
  #
189
189
  def float?
190
- mode == "FLOAT"
190
+ type == "FLOAT" || type == "FLOAT64"
191
191
  end
192
192
 
193
193
  ##
194
- # Checks if the mode of the field is `BOOLEAN`.
194
+ # Checks if the type of the field is `BOOLEAN`.
195
195
  #
196
196
  # @return [Boolean] `true` when `BOOLEAN`, `false` otherwise.
197
197
  #
198
198
  def boolean?
199
- mode == "BOOLEAN"
199
+ type == "BOOLEAN" || type == "BOOL"
200
200
  end
201
201
 
202
202
  ##
203
- # Checks if the mode of the field is `BYTES`.
203
+ # Checks if the type of the field is `BYTES`.
204
204
  #
205
205
  # @return [Boolean] `true` when `BYTES`, `false` otherwise.
206
206
  #
207
207
  def bytes?
208
- mode == "BYTES"
208
+ type == "BYTES"
209
209
  end
210
210
 
211
211
  ##
212
- # Checks if the mode of the field is `TIMESTAMP`.
212
+ # Checks if the type of the field is `TIMESTAMP`.
213
213
  #
214
214
  # @return [Boolean] `true` when `TIMESTAMP`, `false` otherwise.
215
215
  #
216
216
  def timestamp?
217
- mode == "TIMESTAMP"
217
+ type == "TIMESTAMP"
218
218
  end
219
219
 
220
220
  ##
221
- # Checks if the mode of the field is `TIME`.
221
+ # Checks if the type of the field is `TIME`.
222
222
  #
223
223
  # @return [Boolean] `true` when `TIME`, `false` otherwise.
224
224
  #
225
225
  def time?
226
- mode == "TIME"
226
+ type == "TIME"
227
227
  end
228
228
 
229
229
  ##
230
- # Checks if the mode of the field is `DATETIME`.
230
+ # Checks if the type of the field is `DATETIME`.
231
231
  #
232
232
  # @return [Boolean] `true` when `DATETIME`, `false` otherwise.
233
233
  #
234
234
  def datetime?
235
- mode == "DATETIME"
235
+ type == "DATETIME"
236
236
  end
237
237
 
238
238
  ##
239
- # Checks if the mode of the field is `DATE`.
239
+ # Checks if the type of the field is `DATE`.
240
240
  #
241
241
  # @return [Boolean] `true` when `DATE`, `false` otherwise.
242
242
  #
243
243
  def date?
244
- mode == "DATE"
244
+ type == "DATE"
245
245
  end
246
246
 
247
247
  ##
248
- # Checks if the mode of the field is `RECORD`.
248
+ # Checks if the type of the field is `RECORD`.
249
249
  #
250
250
  # @return [Boolean] `true` when `RECORD`, `false` otherwise.
251
251
  #
252
252
  def record?
253
- mode == "RECORD"
253
+ type == "RECORD" || type == "STRUCT"
254
254
  end
255
255
 
256
256
  ##
@@ -245,31 +245,35 @@ module Google
245
245
 
246
246
  ##
247
247
  # Cancel the job specified by jobId.
248
- def cancel_job job_id
248
+ def cancel_job job_id, location: nil
249
249
  # The BigQuery team has told us cancelling is considered idempotent
250
- execute(backoff: true) { service.cancel_job @project, job_id }
250
+ execute(backoff: true) do
251
+ service.cancel_job @project, job_id, location: location
252
+ end
251
253
  end
252
254
 
253
255
  ##
254
256
  # Returns the job specified by jobID.
255
- def get_job job_id
257
+ def get_job job_id, location: nil
256
258
  # The get operation is considered idempotent
257
- execute(backoff: true) { service.get_job @project, job_id }
259
+ execute(backoff: true) do
260
+ service.get_job @project, job_id, location: location
261
+ end
258
262
  end
259
263
 
260
- def insert_job config
264
+ def insert_job config, location: nil
261
265
  job_object = API::Job.new(
262
- job_reference: job_ref_from(nil, nil),
266
+ job_reference: job_ref_from(nil, nil, location: location),
263
267
  configuration: config
264
268
  )
265
269
  # Jobs have generated id, so this operation is considered idempotent
266
270
  execute(backoff: true) { service.insert_job @project, job_object }
267
271
  end
268
272
 
269
- def query_job query, options = {}
270
- config = query_table_config(query, options)
271
- # Jobs have generated id, so this operation is considered idempotent
272
- execute(backoff: true) { service.insert_job @project, config }
273
+ def query_job query_job_gapi
274
+ execute backoff: true do
275
+ service.insert_job @project, query_job_gapi
276
+ end
273
277
  end
274
278
 
275
279
  ##
@@ -279,6 +283,7 @@ module Google
279
283
  execute backoff: true do
280
284
  service.get_job_query_results @project,
281
285
  job_id,
286
+ location: options.delete(:location),
282
287
  max_results: options.delete(:max),
283
288
  page_token: options.delete(:token),
284
289
  start_index: options.delete(:start),
@@ -286,39 +291,29 @@ module Google
286
291
  end
287
292
  end
288
293
 
289
- def copy_table source, target, options = {}
290
- # Jobs have generated id, so this operation is considered idempotent
294
+ def copy_table copy_job_gapi
291
295
  execute backoff: true do
292
- service.insert_job @project, copy_table_config(
293
- source, target, options
294
- )
296
+ service.insert_job @project, copy_job_gapi
295
297
  end
296
298
  end
297
299
 
298
- def extract_table table, storage_files, options = {}
299
- # Jobs have generated id, so this operation is considered idempotent
300
+ def extract_table extract_job_gapi
300
301
  execute backoff: true do
301
- service.insert_job \
302
- @project, extract_table_config(table, storage_files, options)
302
+ service.insert_job @project, extract_job_gapi
303
303
  end
304
304
  end
305
305
 
306
- def load_table_gs_url dataset_id, table_id, url, options = {}
307
- # Jobs have generated id, so this operation is considered idempotent
306
+ def load_table_gs_url load_job_gapi
308
307
  execute backoff: true do
309
- service.insert_job \
310
- @project, load_table_url_config(dataset_id, table_id,
311
- url, options)
308
+ service.insert_job @project, load_job_gapi
312
309
  end
313
310
  end
314
311
 
315
- def load_table_file dataset_id, table_id, file, options = {}
316
- # Jobs have generated id, so this operation is considered idempotent
312
+ def load_table_file file, load_job_gapi
317
313
  execute backoff: true do
318
314
  service.insert_job \
319
- @project, load_table_file_config(
320
- dataset_id, table_id, file, options
321
- ),
315
+ @project,
316
+ load_job_gapi,
322
317
  upload_source: file, content_type: mime_type_for(file)
323
318
  end
324
319
  end
@@ -352,312 +347,46 @@ module Google
352
347
  end
353
348
  end
354
349
 
355
- def inspect
356
- "#{self.class}(#{@project})"
357
- end
358
-
359
- protected
360
-
361
- def table_ref_from tbl
362
- return nil if tbl.nil?
363
- API::TableReference.new(
364
- project_id: tbl.project_id,
365
- dataset_id: tbl.dataset_id,
366
- table_id: tbl.table_id
350
+ # If no job_id or prefix is given, always generate a client-side job ID
351
+ # anyway, for idempotent retry in the google-api-client layer.
352
+ # See https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid
353
+ def job_ref_from job_id, prefix, location: nil
354
+ prefix ||= "job_"
355
+ job_id ||= "#{prefix}#{generate_id}"
356
+ job_ref = API::JobReference.new(
357
+ project_id: @project,
358
+ job_id: job_id
367
359
  )
360
+ # BigQuery does not allow nil location, but missing is ok.
361
+ job_ref.location = location if location
362
+ job_ref
368
363
  end
369
364
 
365
+ # API object for dataset.
370
366
  def dataset_ref_from dts, pjt = nil
371
367
  return nil if dts.nil?
372
368
  if dts.respond_to? :dataset_id
373
- API::DatasetReference.new(
369
+ Google::Apis::BigqueryV2::DatasetReference.new(
374
370
  project_id: (pjt || dts.project_id || @project),
375
371
  dataset_id: dts.dataset_id
376
372
  )
377
373
  else
378
- API::DatasetReference.new(
374
+ Google::Apis::BigqueryV2::DatasetReference.new(
379
375
  project_id: (pjt || @project),
380
376
  dataset_id: dts
381
377
  )
382
378
  end
383
379
  end
384
380
 
385
- # Generate a random string similar to the BigQuery service job IDs.
386
- def generate_id
387
- SecureRandom.urlsafe_base64(21)
388
- end
389
-
390
- # If no job_id or prefix is given, always generate a client-side job ID
391
- # anyway, for idempotent retry in the google-api-client layer.
392
- # See https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid
393
- def job_ref_from job_id, prefix
394
- prefix ||= "job_"
395
- job_id ||= "#{prefix}#{generate_id}"
396
- API::JobReference.new(
397
- project_id: @project,
398
- job_id: job_id
399
- )
400
- end
401
-
402
- def load_table_file_opts dataset_id, table_id, file, options = {}
403
- path = Pathname(file).to_path
404
- {
405
- destination_table: Google::Apis::BigqueryV2::TableReference.new(
406
- project_id: @project, dataset_id: dataset_id, table_id: table_id
407
- ),
408
- create_disposition: create_disposition(options[:create]),
409
- write_disposition: write_disposition(options[:write]),
410
- source_format: source_format(path, options[:format]),
411
- projection_fields: projection_fields(options[:projection_fields]),
412
- allow_jagged_rows: options[:jagged_rows],
413
- allow_quoted_newlines: options[:quoted_newlines],
414
- autodetect: options[:autodetect],
415
- encoding: options[:encoding], field_delimiter: options[:delimiter],
416
- ignore_unknown_values: options[:ignore_unknown],
417
- max_bad_records: options[:max_bad_records],
418
- null_marker: options[:null_marker], quote: options[:quote],
419
- schema: options[:schema], skip_leading_rows: options[:skip_leading]
420
- }.delete_if { |_, v| v.nil? }
421
- end
422
-
423
- def load_table_file_config dataset_id, table_id, file, options = {}
424
- load_opts = load_table_file_opts dataset_id, table_id, file, options
425
- req = API::Job.new(
426
- job_reference: job_ref_from(options[:job_id], options[:prefix]),
427
- configuration: API::JobConfiguration.new(
428
- load: API::JobConfigurationLoad.new(load_opts),
429
- dry_run: options[:dryrun]
430
- )
431
- )
432
- req.configuration.labels = options[:labels] if options[:labels]
433
- req
434
- end
435
-
436
- def load_table_url_opts dataset_id, table_id, url, options = {}
437
- {
438
- destination_table: Google::Apis::BigqueryV2::TableReference.new(
439
- project_id: @project, dataset_id: dataset_id, table_id: table_id
440
- ),
441
- source_uris: Array(url),
442
- create_disposition: create_disposition(options[:create]),
443
- write_disposition: write_disposition(options[:write]),
444
- source_format: source_format(url, options[:format]),
445
- projection_fields: projection_fields(options[:projection_fields]),
446
- allow_jagged_rows: options[:jagged_rows],
447
- allow_quoted_newlines: options[:quoted_newlines],
448
- autodetect: options[:autodetect],
449
- encoding: options[:encoding], field_delimiter: options[:delimiter],
450
- ignore_unknown_values: options[:ignore_unknown],
451
- max_bad_records: options[:max_bad_records],
452
- null_marker: options[:null_marker], quote: options[:quote],
453
- schema: options[:schema], skip_leading_rows: options[:skip_leading]
454
- }.delete_if { |_, v| v.nil? }
455
- end
456
-
457
- def load_table_url_config dataset_id, table_id, url, options = {}
458
- load_opts = load_table_url_opts dataset_id, table_id, url, options
459
- req = API::Job.new(
460
- job_reference: job_ref_from(options[:job_id], options[:prefix]),
461
- configuration: API::JobConfiguration.new(
462
- load: API::JobConfigurationLoad.new(load_opts),
463
- dry_run: options[:dryrun]
464
- )
465
- )
466
- req.configuration.labels = options[:labels] if options[:labels]
467
- req
468
- end
469
-
470
- # rubocop:disable all
471
-
472
- ##
473
- # Job description for query job
474
- def query_table_config query, options
475
- dest_table = table_ref_from options[:table]
476
- dataset_config = dataset_ref_from options[:dataset], options[:project]
477
- req = API::Job.new(
478
- job_reference: job_ref_from(options[:job_id], options[:prefix]),
479
- configuration: API::JobConfiguration.new(
480
- query: API::JobConfigurationQuery.new(
481
- query: query,
482
- # tableDefinitions: { ... },
483
- priority: priority_value(options[:priority]),
484
- use_query_cache: options[:cache],
485
- destination_table: dest_table,
486
- create_disposition: create_disposition(options[:create]),
487
- write_disposition: write_disposition(options[:write]),
488
- allow_large_results: options[:large_results],
489
- flatten_results: options[:flatten],
490
- default_dataset: dataset_config,
491
- use_legacy_sql: Convert.resolve_legacy_sql(
492
- options[:standard_sql], options[:legacy_sql]),
493
- maximum_billing_tier: options[:maximum_billing_tier],
494
- maximum_bytes_billed: options[:maximum_bytes_billed],
495
- user_defined_function_resources: udfs(options[:udfs])
496
- )
497
- )
498
- )
499
- req.configuration.labels = options[:labels] if options[:labels]
500
-
501
- if options[:params]
502
- if Array === options[:params]
503
- req.configuration.query.use_legacy_sql = false
504
- req.configuration.query.parameter_mode = "POSITIONAL"
505
- req.configuration.query.query_parameters = options[:params].map do |param|
506
- Convert.to_query_param param
507
- end
508
- elsif Hash === options[:params]
509
- req.configuration.query.use_legacy_sql = false
510
- req.configuration.query.parameter_mode = "NAMED"
511
- req.configuration.query.query_parameters = options[:params].map do |name, param|
512
- Convert.to_query_param(param).tap do |named_param|
513
- named_param.name = String name
514
- end
515
- end
516
- else
517
- raise "Query parameters must be an Array or a Hash."
518
- end
519
- end
520
-
521
- if options[:external]
522
- external_table_pairs = options[:external].map do |name, obj|
523
- [String(name), obj.to_gapi]
524
- end
525
- external_table_hash = Hash[external_table_pairs]
526
- req.configuration.query.table_definitions = external_table_hash
527
- end
528
-
529
- req
530
- end
531
-
532
- def query_config query, options = {}
533
- dataset_config = dataset_ref_from options[:dataset], options[:project]
534
-
535
- req = API::QueryRequest.new(
536
- query: query,
537
- max_results: options[:max],
538
- default_dataset: dataset_config,
539
- timeout_ms: options[:timeout],
540
- dry_run: options[:dryrun],
541
- use_query_cache: options[:cache],
542
- use_legacy_sql: Convert.resolve_legacy_sql(
543
- options[:standard_sql], options[:legacy_sql])
544
- )
545
-
546
- if options[:params]
547
- if Array === options[:params]
548
- req.use_legacy_sql = false
549
- req.parameter_mode = "POSITIONAL"
550
- req.query_parameters = options[:params].map do |param|
551
- Convert.to_query_param param
552
- end
553
- elsif Hash === options[:params]
554
- req.use_legacy_sql = false
555
- req.parameter_mode = "NAMED"
556
- req.query_parameters = options[:params].map do |name, param|
557
- Convert.to_query_param(param).tap do |named_param|
558
- named_param.name = String name
559
- end
560
- end
561
- else
562
- raise "Query parameters must be an Array or a Hash."
563
- end
564
- end
565
-
566
- req
381
+ def inspect
382
+ "#{self.class}(#{@project})"
567
383
  end
568
384
 
569
- # rubocop:enable all
570
-
571
- ##
572
- # Job description for copy job
573
- def copy_table_config source, target, options = {}
574
- req = API::Job.new(
575
- job_reference: job_ref_from(options[:job_id], options[:prefix]),
576
- configuration: API::JobConfiguration.new(
577
- copy: API::JobConfigurationTableCopy.new(
578
- source_table: source,
579
- destination_table: target,
580
- create_disposition: create_disposition(options[:create]),
581
- write_disposition: write_disposition(options[:write])
582
- ),
583
- dry_run: options[:dryrun]
584
- )
585
- )
586
- req.configuration.labels = options[:labels] if options[:labels]
587
- req
588
- end
589
-
590
- def extract_table_config table, storage_files, options = {}
591
- storage_urls = Array(storage_files).map do |url|
592
- url.respond_to?(:to_gs_url) ? url.to_gs_url : url
593
- end
594
- dest_format = source_format storage_urls.first, options[:format]
595
- req = API::Job.new(
596
- job_reference: job_ref_from(options[:job_id], options[:prefix]),
597
- configuration: API::JobConfiguration.new(
598
- extract: API::JobConfigurationExtract.new(
599
- destination_uris: Array(storage_urls),
600
- source_table: table,
601
- destination_format: dest_format,
602
- compression: options[:compression],
603
- field_delimiter: options[:delimiter],
604
- print_header: options[:header]
605
- ),
606
- dry_run: options[:dryrun]
607
- )
608
- )
609
- req.configuration.labels = options[:labels] if options[:labels]
610
- req
611
- end
612
-
613
- def create_disposition str
614
- { "create_if_needed" => "CREATE_IF_NEEDED",
615
- "createifneeded" => "CREATE_IF_NEEDED",
616
- "if_needed" => "CREATE_IF_NEEDED",
617
- "needed" => "CREATE_IF_NEEDED",
618
- "create_never" => "CREATE_NEVER",
619
- "createnever" => "CREATE_NEVER",
620
- "never" => "CREATE_NEVER" }[str.to_s.downcase]
621
- end
622
-
623
- def write_disposition str
624
- { "write_truncate" => "WRITE_TRUNCATE",
625
- "writetruncate" => "WRITE_TRUNCATE",
626
- "truncate" => "WRITE_TRUNCATE",
627
- "write_append" => "WRITE_APPEND",
628
- "writeappend" => "WRITE_APPEND",
629
- "append" => "WRITE_APPEND",
630
- "write_empty" => "WRITE_EMPTY",
631
- "writeempty" => "WRITE_EMPTY",
632
- "empty" => "WRITE_EMPTY" }[str.to_s.downcase]
633
- end
634
-
635
- def priority_value str
636
- { "batch" => "BATCH",
637
- "interactive" => "INTERACTIVE" }[str.to_s.downcase]
638
- end
639
-
640
- def source_format path, format
641
- val = {
642
- "csv" => "CSV",
643
- "json" => "NEWLINE_DELIMITED_JSON",
644
- "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
645
- "avro" => "AVRO",
646
- "datastore" => "DATASTORE_BACKUP",
647
- "backup" => "DATASTORE_BACKUP",
648
- "datastore_backup" => "DATASTORE_BACKUP"
649
- }[format.to_s.downcase]
650
- return val unless val.nil?
651
- return nil if path.nil?
652
- return "CSV" if path.end_with? ".csv"
653
- return "NEWLINE_DELIMITED_JSON" if path.end_with? ".json"
654
- return "AVRO" if path.end_with? ".avro"
655
- return "DATASTORE_BACKUP" if path.end_with? ".backup_info"
656
- nil
657
- end
385
+ protected
658
386
 
659
- def projection_fields array_or_str
660
- Array(array_or_str) unless array_or_str.nil?
387
+ # Generate a random string similar to the BigQuery service job IDs.
388
+ def generate_id
389
+ SecureRandom.urlsafe_base64(21)
661
390
  end
662
391
 
663
392
  def mime_type_for file
@@ -668,18 +397,6 @@ module Google
668
397
  nil
669
398
  end
670
399
 
671
- def udfs array_or_str
672
- Array(array_or_str).map do |uri_or_code|
673
- resource = API::UserDefinedFunctionResource.new
674
- if uri_or_code.start_with?("gs://")
675
- resource.resource_uri = uri_or_code
676
- else
677
- resource.inline_code = uri_or_code
678
- end
679
- resource
680
- end
681
- end
682
-
683
400
  def execute backoff: nil
684
401
  if backoff
685
402
  Backoff.new(retries: retries).execute { yield }