google-cloud-bigquery 1.14.0 → 1.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +17 -54
  3. data/CHANGELOG.md +377 -0
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +1 -1
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +155 -173
  10. data/lib/google/cloud/bigquery/copy_job.rb +74 -26
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
  16. data/lib/google/cloud/bigquery/dataset.rb +1044 -287
  17. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  20. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  21. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  22. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  23. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  24. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  25. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  26. data/lib/google/cloud/bigquery/external.rb +50 -2256
  27. data/lib/google/cloud/bigquery/extract_job.rb +226 -61
  28. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  29. data/lib/google/cloud/bigquery/job/list.rb +10 -14
  30. data/lib/google/cloud/bigquery/job.rb +289 -14
  31. data/lib/google/cloud/bigquery/load_job.rb +810 -136
  32. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  33. data/lib/google/cloud/bigquery/model.rb +247 -16
  34. data/lib/google/cloud/bigquery/policy.rb +432 -0
  35. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  36. data/lib/google/cloud/bigquery/project.rb +509 -250
  37. data/lib/google/cloud/bigquery/query_job.rb +594 -128
  38. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  39. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  40. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  41. data/lib/google/cloud/bigquery/schema.rb +221 -48
  42. data/lib/google/cloud/bigquery/service.rb +204 -112
  43. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  44. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
  45. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  46. data/lib/google/cloud/bigquery/table.rb +1470 -377
  47. data/lib/google/cloud/bigquery/time.rb +6 -0
  48. data/lib/google/cloud/bigquery/version.rb +1 -1
  49. data/lib/google/cloud/bigquery.rb +4 -6
  50. data/lib/google-cloud-bigquery.rb +14 -13
  51. metadata +66 -38
@@ -124,17 +124,15 @@ module Google
124
124
  # puts model.model_id
125
125
  # end
126
126
  #
127
- def all request_limit: nil
127
+ def all request_limit: nil, &block
128
128
  request_limit = request_limit.to_i if request_limit
129
- unless block_given?
130
- return enum_for :all, request_limit: request_limit
131
- end
129
+ return enum_for :all, request_limit: request_limit unless block_given?
132
130
  results = self
133
131
  loop do
134
- results.each { |r| yield r }
132
+ results.each(&block)
135
133
  if request_limit
136
134
  request_limit -= 1
137
- break if request_limit < 0
135
+ break if request_limit.negative?
138
136
  end
139
137
  break unless results.next?
140
138
  results = results.next
@@ -144,9 +142,7 @@ module Google
144
142
  ##
145
143
  # @private New Model::List from a response object.
146
144
  def self.from_gapi gapi_list, service, dataset_id = nil, max = nil
147
- models = List.new(Array(gapi_list[:models]).map do |gapi_json|
148
- Model.from_gapi_json gapi_json, service
149
- end)
145
+ models = List.new(Array(gapi_list[:models]).map { |gapi_json| Model.from_gapi_json gapi_json, service })
150
146
  models.instance_variable_set :@token, gapi_list[:nextPageToken]
151
147
  models.instance_variable_set :@service, service
152
148
  models.instance_variable_set :@dataset_id, dataset_id
@@ -87,8 +87,8 @@ module Google
87
87
  ##
88
88
  # A unique ID for this model.
89
89
  #
90
- # @return [String] The ID must contain only letters (a-z, A-Z), numbers
91
- # (0-9), or underscores (_). The maximum length is 1,024 characters.
90
+ # @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
91
+ # (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
92
92
  #
93
93
  # @!group Attributes
94
94
  #
@@ -100,8 +100,8 @@ module Google
100
100
  ##
101
101
  # The ID of the `Dataset` containing this model.
102
102
  #
103
- # @return [String] The ID must contain only letters (a-z, A-Z), numbers
104
- # (0-9), or underscores (_). The maximum length is 1,024 characters.
103
+ # @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
104
+ # (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
105
105
  #
106
106
  # @!group Attributes
107
107
  #
@@ -341,14 +341,19 @@ module Google
341
341
  # the update to comply with ETag-based optimistic concurrency control.
342
342
  #
343
343
  # @param [Hash<String, String>] new_labels A hash containing key/value
344
- # pairs.
345
- #
346
- # * Label keys and values can be no longer than 63 characters.
347
- # * Label keys and values can contain only lowercase letters, numbers,
348
- # underscores, hyphens, and international characters.
349
- # * Label keys and values cannot exceed 128 bytes in size.
350
- # * Label keys must begin with a letter.
351
- # * Label keys must be unique within a model.
344
+ # pairs. The labels applied to a resource must meet the following requirements:
345
+ #
346
+ # * Each resource can have multiple labels, up to a maximum of 64.
347
+ # * Each label must be a key-value pair.
348
+ # * Keys have a minimum length of 1 character and a maximum length of
349
+ # 63 characters, and cannot be empty. Values can be empty, and have
350
+ # a maximum length of 63 characters.
351
+ # * Keys and values can contain only lowercase letters, numeric characters,
352
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
353
+ # international characters are allowed.
354
+ # * The key portion of a label must be unique. However, you can use the
355
+ # same key with multiple resources.
356
+ # * Keys must start with a lowercase letter or international character.
352
357
  #
353
358
  # @example
354
359
  # require "google/cloud/bigquery"
@@ -366,6 +371,79 @@ module Google
366
371
  patch_gapi! labels: new_labels
367
372
  end
368
373
 
374
+ ##
375
+ # The {EncryptionConfiguration} object that represents the custom
376
+ # encryption method used to protect this model. If not set,
377
+ # {Dataset#default_encryption} is used.
378
+ #
379
+ # Present only if this model is using custom encryption.
380
+ #
381
+ # @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
382
+ # Protecting Data with Cloud KMS Keys
383
+ #
384
+ # @return [EncryptionConfiguration, nil] The encryption configuration.
385
+ #
386
+ # @!group Attributes
387
+ #
388
+ # @example
389
+ # require "google/cloud/bigquery"
390
+ #
391
+ # bigquery = Google::Cloud::Bigquery.new
392
+ # dataset = bigquery.dataset "my_dataset"
393
+ # model = dataset.model "my_model"
394
+ #
395
+ # encrypt_config = model.encryption
396
+ #
397
+ # @!group Attributes
398
+ #
399
+ def encryption
400
+ return nil if reference?
401
+ return nil if @gapi_json[:encryptionConfiguration].nil?
402
+ # We have to create a gapic object from the hash because that is what
403
+ # EncryptionConfiguration is expecing.
404
+ json_cmek = @gapi_json[:encryptionConfiguration].to_json
405
+ gapi_cmek = Google::Apis::BigqueryV2::EncryptionConfiguration.from_json json_cmek
406
+ EncryptionConfiguration.from_gapi(gapi_cmek).freeze
407
+ end
408
+
409
+ ##
410
+ # Set the {EncryptionConfiguration} object that represents the custom
411
+ # encryption method used to protect this model. If not set,
412
+ # {Dataset#default_encryption} is used.
413
+ #
414
+ # Present only if this model is using custom encryption.
415
+ #
416
+ # If the model is not a full resource representation (see
417
+ # {#resource_full?}), the full representation will be retrieved before
418
+ # the update to comply with ETag-based optimistic concurrency control.
419
+ #
420
+ # @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
421
+ # Protecting Data with Cloud KMS Keys
422
+ #
423
+ # @param [EncryptionConfiguration] value The new encryption config.
424
+ #
425
+ # @example
426
+ # require "google/cloud/bigquery"
427
+ #
428
+ # bigquery = Google::Cloud::Bigquery.new
429
+ # dataset = bigquery.dataset "my_dataset"
430
+ # model = dataset.model "my_model"
431
+ #
432
+ # key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
433
+ # encrypt_config = bigquery.encryption kms_key: key_name
434
+ #
435
+ # model.encryption = encrypt_config
436
+ #
437
+ # @!group Attributes
438
+ #
439
+ def encryption= value
440
+ ensure_full_data!
441
+ # We have to create a hash from the gapic object's JSON because that
442
+ # is what Model is expecing.
443
+ json_cmek = JSON.parse value.to_gapi.to_json, symbolize_names: true
444
+ patch_gapi! encryptionConfiguration: json_cmek
445
+ end
446
+
369
447
  ##
370
448
  # The input feature columns that were used to train this model.
371
449
  #
@@ -376,7 +454,8 @@ module Google
376
454
  def feature_columns
377
455
  ensure_full_data!
378
456
  Array(@gapi_json[:featureColumns]).map do |field_gapi_json|
379
- StandardSql::Field.from_gapi_json field_gapi_json
457
+ field_gapi = Google::Apis::BigqueryV2::StandardSqlField.from_json field_gapi_json.to_json
458
+ StandardSql::Field.from_gapi field_gapi
380
459
  end
381
460
  end
382
461
 
@@ -391,7 +470,8 @@ module Google
391
470
  def label_columns
392
471
  ensure_full_data!
393
472
  Array(@gapi_json[:labelColumns]).map do |field_gapi_json|
394
- StandardSql::Field.from_gapi_json field_gapi_json
473
+ field_gapi = Google::Apis::BigqueryV2::StandardSqlField.from_json field_gapi_json.to_json
474
+ StandardSql::Field.from_gapi field_gapi
395
475
  end
396
476
  end
397
477
 
@@ -407,6 +487,146 @@ module Google
407
487
  Array @gapi_json[:trainingRuns]
408
488
  end
409
489
 
490
+ ##
491
+ # Exports the model to Google Cloud Storage asynchronously, immediately
492
+ # returning an {ExtractJob} that can be used to track the progress of the
493
+ # export job. The caller may poll the service by repeatedly calling
494
+ # {Job#reload!} and {Job#done?} to detect when the job is done, or
495
+ # simply block until the job is done by calling #{Job#wait_until_done!}.
496
+ # See also {#extract}.
497
+ #
498
+ # The geographic location for the job ("US", "EU", etc.) can be set via
499
+ # {ExtractJob::Updater#location=} in a block passed to this method. If
500
+ # the model is a full resource representation (see {#resource_full?}),
501
+ # the location of the job will automatically be set to the location of
502
+ # the model.
503
+ #
504
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
505
+ # Exporting models
506
+ #
507
+ # @param [String] extract_url The Google Storage URI to which BigQuery
508
+ # should extract the model. This value should be end in an object name
509
+ # prefix, since multiple objects will be exported.
510
+ # @param [String] format The exported file format. The default value is
511
+ # `ml_tf_saved_model`.
512
+ #
513
+ # The following values are supported:
514
+ #
515
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
516
+ # * `ml_xgboost_booster` - XGBoost Booster
517
+ # @param [String] job_id A user-defined ID for the extract job. The ID
518
+ # must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
519
+ # (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
520
+ # `job_id` is provided, then `prefix` will not be used.
521
+ #
522
+ # See [Generating a job
523
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
524
+ # @param [String] prefix A string, usually human-readable, that will be
525
+ # prepended to a generated value to produce a unique job ID. For
526
+ # example, the prefix `daily_import_job_` can be given to generate a
527
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
528
+ # prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
529
+ # underscores (`_`), or dashes (`-`). The maximum length of the entire ID
530
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
531
+ # be used.
532
+ # @param [Hash] labels A hash of user-provided labels associated with
533
+ # the job. You can use these to organize and group your jobs.
534
+ #
535
+ # The labels applied to a resource must meet the following requirements:
536
+ #
537
+ # * Each resource can have multiple labels, up to a maximum of 64.
538
+ # * Each label must be a key-value pair.
539
+ # * Keys have a minimum length of 1 character and a maximum length of
540
+ # 63 characters, and cannot be empty. Values can be empty, and have
541
+ # a maximum length of 63 characters.
542
+ # * Keys and values can contain only lowercase letters, numeric characters,
543
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
544
+ # international characters are allowed.
545
+ # * The key portion of a label must be unique. However, you can use the
546
+ # same key with multiple resources.
547
+ # * Keys must start with a lowercase letter or international character.
548
+ #
549
+ # @yield [job] a job configuration object
550
+ # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
551
+ # configuration object for setting additional options.
552
+ #
553
+ # @return [Google::Cloud::Bigquery::ExtractJob]
554
+ #
555
+ # @example
556
+ # require "google/cloud/bigquery"
557
+ #
558
+ # bigquery = Google::Cloud::Bigquery.new
559
+ # dataset = bigquery.dataset "my_dataset"
560
+ # model = dataset.model "my_model"
561
+ #
562
+ # extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
563
+ #
564
+ # extract_job.wait_until_done!
565
+ # extract_job.done? #=> true
566
+ #
567
+ # @!group Data
568
+ #
569
+ def extract_job extract_url, format: nil, job_id: nil, prefix: nil, labels: nil
570
+ ensure_service!
571
+ options = { format: format, job_id: job_id, prefix: prefix, labels: labels }
572
+ updater = ExtractJob::Updater.from_options service, model_ref, extract_url, options
573
+ updater.location = location if location # may be model reference
574
+
575
+ yield updater if block_given?
576
+
577
+ job_gapi = updater.to_gapi
578
+ gapi = service.extract_table job_gapi
579
+ Job.from_gapi gapi, service
580
+ end
581
+
582
+ ##
583
+ # Exports the model to Google Cloud Storage using a synchronous method
584
+ # that blocks for a response. Timeouts and transient errors are generally
585
+ # handled as needed to complete the job. See also {#extract_job}.
586
+ #
587
+ # The geographic location for the job ("US", "EU", etc.) can be set via
588
+ # {ExtractJob::Updater#location=} in a block passed to this method. If
589
+ # the model is a full resource representation (see {#resource_full?}),
590
+ # the location of the job will automatically be set to the location of
591
+ # the model.
592
+ #
593
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
594
+ # Exporting models
595
+ #
596
+ # @param [String] extract_url The Google Storage URI to which BigQuery
597
+ # should extract the model. This value should be end in an object name
598
+ # prefix, since multiple objects will be exported.
599
+ # @param [String] format The exported file format. The default value is
600
+ # `ml_tf_saved_model`.
601
+ #
602
+ # The following values are supported:
603
+ #
604
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
605
+ # * `ml_xgboost_booster` - XGBoost Booster
606
+ # @yield [job] a job configuration object
607
+ # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
608
+ # configuration object for setting additional options.
609
+ #
610
+ # @return [Boolean] Returns `true` if the extract operation succeeded.
611
+ #
612
+ # @example
613
+ # require "google/cloud/bigquery"
614
+ #
615
+ # bigquery = Google::Cloud::Bigquery.new
616
+ # dataset = bigquery.dataset "my_dataset"
617
+ # model = dataset.model "my_model"
618
+ #
619
+ # model.extract "gs://my-bucket/#{model.model_id}"
620
+ #
621
+ # @!group Data
622
+ #
623
+ def extract extract_url, format: nil, &block
624
+ job = extract_job extract_url, format: format, &block
625
+ job.wait_until_done!
626
+ ensure_job_succeeded! job
627
+ true
628
+ end
629
+
410
630
  ##
411
631
  # Permanently deletes the model.
412
632
  #
@@ -481,7 +701,7 @@ module Google
481
701
  # model = dataset.model "my_model", skip_lookup: true
482
702
  # model.exists? #=> true
483
703
  #
484
- def exists? force: nil
704
+ def exists? force: false
485
705
  return resource_exists? if force
486
706
  # If we have a value, return it
487
707
  return @exists unless @exists.nil?
@@ -595,7 +815,7 @@ module Google
595
815
  end
596
816
 
597
817
  ##
598
- # @private New lazy Model object without making an HTTP request.
818
+ # @private New lazy Model object without making an HTTP request, for use with the skip_lookup option.
599
819
  def self.new_reference project_id, dataset_id, model_id, service
600
820
  raise ArgumentError, "project_id is required" unless project_id
601
821
  raise ArgumentError, "dataset_id is required" unless dataset_id
@@ -659,6 +879,17 @@ module Google
659
879
  def ensure_full_data!
660
880
  reload! unless resource_full?
661
881
  end
882
+
883
+ def ensure_job_succeeded! job
884
+ return unless job.failed?
885
+ begin
886
+ # raise to activate ruby exception cause handling
887
+ raise job.gapi_error
888
+ rescue StandardError => e
889
+ # wrap Google::Apis::Error with Google::Cloud::Error
890
+ raise Google::Cloud::Error.from_error(e)
891
+ end
892
+ end
662
893
  end
663
894
  end
664
895
  end