google-cloud-bigquery 1.12.0 → 1.38.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +9 -28
  3. data/CHANGELOG.md +372 -1
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +2 -2
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +154 -170
  10. data/lib/google/cloud/bigquery/copy_job.rb +40 -23
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +322 -51
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset.rb +960 -279
  16. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  17. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  20. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  21. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  22. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  23. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  24. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  25. data/lib/google/cloud/bigquery/external.rb +50 -2256
  26. data/lib/google/cloud/bigquery/extract_job.rb +217 -58
  27. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  28. data/lib/google/cloud/bigquery/job/list.rb +13 -20
  29. data/lib/google/cloud/bigquery/job.rb +286 -11
  30. data/lib/google/cloud/bigquery/load_job.rb +801 -133
  31. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  32. data/lib/google/cloud/bigquery/model.rb +247 -16
  33. data/lib/google/cloud/bigquery/policy.rb +432 -0
  34. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  35. data/lib/google/cloud/bigquery/project.rb +526 -243
  36. data/lib/google/cloud/bigquery/query_job.rb +584 -125
  37. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  38. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  39. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  40. data/lib/google/cloud/bigquery/schema.rb +221 -48
  41. data/lib/google/cloud/bigquery/service.rb +186 -109
  42. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  43. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -42
  44. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  45. data/lib/google/cloud/bigquery/table.rb +1188 -326
  46. data/lib/google/cloud/bigquery/time.rb +6 -0
  47. data/lib/google/cloud/bigquery/version.rb +1 -1
  48. data/lib/google/cloud/bigquery.rb +18 -8
  49. data/lib/google-cloud-bigquery.rb +15 -13
  50. metadata +67 -40
@@ -124,17 +124,15 @@ module Google
124
124
  # puts model.model_id
125
125
  # end
126
126
  #
127
- def all request_limit: nil
127
+ def all request_limit: nil, &block
128
128
  request_limit = request_limit.to_i if request_limit
129
- unless block_given?
130
- return enum_for :all, request_limit: request_limit
131
- end
129
+ return enum_for :all, request_limit: request_limit unless block_given?
132
130
  results = self
133
131
  loop do
134
- results.each { |r| yield r }
132
+ results.each(&block)
135
133
  if request_limit
136
134
  request_limit -= 1
137
- break if request_limit < 0
135
+ break if request_limit.negative?
138
136
  end
139
137
  break unless results.next?
140
138
  results = results.next
@@ -144,9 +142,7 @@ module Google
144
142
  ##
145
143
  # @private New Model::List from a response object.
146
144
  def self.from_gapi gapi_list, service, dataset_id = nil, max = nil
147
- models = List.new(Array(gapi_list[:models]).map do |gapi_json|
148
- Model.from_gapi_json gapi_json, service
149
- end)
145
+ models = List.new(Array(gapi_list[:models]).map { |gapi_json| Model.from_gapi_json gapi_json, service })
150
146
  models.instance_variable_set :@token, gapi_list[:nextPageToken]
151
147
  models.instance_variable_set :@service, service
152
148
  models.instance_variable_set :@dataset_id, dataset_id
@@ -87,8 +87,8 @@ module Google
87
87
  ##
88
88
  # A unique ID for this model.
89
89
  #
90
- # @return [String] The ID must contain only letters (a-z, A-Z), numbers
91
- # (0-9), or underscores (_). The maximum length is 1,024 characters.
90
+ # @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
91
+ # (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
92
92
  #
93
93
  # @!group Attributes
94
94
  #
@@ -100,8 +100,8 @@ module Google
100
100
  ##
101
101
  # The ID of the `Dataset` containing this model.
102
102
  #
103
- # @return [String] The ID must contain only letters (a-z, A-Z), numbers
104
- # (0-9), or underscores (_). The maximum length is 1,024 characters.
103
+ # @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
104
+ # (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
105
105
  #
106
106
  # @!group Attributes
107
107
  #
@@ -341,14 +341,19 @@ module Google
341
341
  # the update to comply with ETag-based optimistic concurrency control.
342
342
  #
343
343
  # @param [Hash<String, String>] new_labels A hash containing key/value
344
- # pairs.
345
- #
346
- # * Label keys and values can be no longer than 63 characters.
347
- # * Label keys and values can contain only lowercase letters, numbers,
348
- # underscores, hyphens, and international characters.
349
- # * Label keys and values cannot exceed 128 bytes in size.
350
- # * Label keys must begin with a letter.
351
- # * Label keys must be unique within a model.
344
+ # pairs. The labels applied to a resource must meet the following requirements:
345
+ #
346
+ # * Each resource can have multiple labels, up to a maximum of 64.
347
+ # * Each label must be a key-value pair.
348
+ # * Keys have a minimum length of 1 character and a maximum length of
349
+ # 63 characters, and cannot be empty. Values can be empty, and have
350
+ # a maximum length of 63 characters.
351
+ # * Keys and values can contain only lowercase letters, numeric characters,
352
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
353
+ # international characters are allowed.
354
+ # * The key portion of a label must be unique. However, you can use the
355
+ # same key with multiple resources.
356
+ # * Keys must start with a lowercase letter or international character.
352
357
  #
353
358
  # @example
354
359
  # require "google/cloud/bigquery"
@@ -366,6 +371,79 @@ module Google
366
371
  patch_gapi! labels: new_labels
367
372
  end
368
373
 
374
+ ##
375
+ # The {EncryptionConfiguration} object that represents the custom
376
+ # encryption method used to protect this model. If not set,
377
+ # {Dataset#default_encryption} is used.
378
+ #
379
+ # Present only if this model is using custom encryption.
380
+ #
381
+ # @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
382
+ # Protecting Data with Cloud KMS Keys
383
+ #
384
+ # @return [EncryptionConfiguration, nil] The encryption configuration.
385
+ #
386
+ # @!group Attributes
387
+ #
388
+ # @example
389
+ # require "google/cloud/bigquery"
390
+ #
391
+ # bigquery = Google::Cloud::Bigquery.new
392
+ # dataset = bigquery.dataset "my_dataset"
393
+ # model = dataset.model "my_model"
394
+ #
395
+ # encrypt_config = model.encryption
396
+ #
397
+ # @!group Attributes
398
+ #
399
+ def encryption
400
+ return nil if reference?
401
+ return nil if @gapi_json[:encryptionConfiguration].nil?
402
+ # We have to create a gapic object from the hash because that is what
403
+ # EncryptionConfiguration is expecing.
404
+ json_cmek = @gapi_json[:encryptionConfiguration].to_json
405
+ gapi_cmek = Google::Apis::BigqueryV2::EncryptionConfiguration.from_json json_cmek
406
+ EncryptionConfiguration.from_gapi(gapi_cmek).freeze
407
+ end
408
+
409
+ ##
410
+ # Set the {EncryptionConfiguration} object that represents the custom
411
+ # encryption method used to protect this model. If not set,
412
+ # {Dataset#default_encryption} is used.
413
+ #
414
+ # Present only if this model is using custom encryption.
415
+ #
416
+ # If the model is not a full resource representation (see
417
+ # {#resource_full?}), the full representation will be retrieved before
418
+ # the update to comply with ETag-based optimistic concurrency control.
419
+ #
420
+ # @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
421
+ # Protecting Data with Cloud KMS Keys
422
+ #
423
+ # @param [EncryptionConfiguration] value The new encryption config.
424
+ #
425
+ # @example
426
+ # require "google/cloud/bigquery"
427
+ #
428
+ # bigquery = Google::Cloud::Bigquery.new
429
+ # dataset = bigquery.dataset "my_dataset"
430
+ # model = dataset.model "my_model"
431
+ #
432
+ # key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
433
+ # encrypt_config = bigquery.encryption kms_key: key_name
434
+ #
435
+ # model.encryption = encrypt_config
436
+ #
437
+ # @!group Attributes
438
+ #
439
+ def encryption= value
440
+ ensure_full_data!
441
+ # We have to create a hash from the gapic object's JSON because that
442
+ # is what Model is expecing.
443
+ json_cmek = JSON.parse value.to_gapi.to_json, symbolize_names: true
444
+ patch_gapi! encryptionConfiguration: json_cmek
445
+ end
446
+
369
447
  ##
370
448
  # The input feature columns that were used to train this model.
371
449
  #
@@ -376,7 +454,8 @@ module Google
376
454
  def feature_columns
377
455
  ensure_full_data!
378
456
  Array(@gapi_json[:featureColumns]).map do |field_gapi_json|
379
- StandardSql::Field.from_gapi_json field_gapi_json
457
+ field_gapi = Google::Apis::BigqueryV2::StandardSqlField.from_json field_gapi_json.to_json
458
+ StandardSql::Field.from_gapi field_gapi
380
459
  end
381
460
  end
382
461
 
@@ -391,7 +470,8 @@ module Google
391
470
  def label_columns
392
471
  ensure_full_data!
393
472
  Array(@gapi_json[:labelColumns]).map do |field_gapi_json|
394
- StandardSql::Field.from_gapi_json field_gapi_json
473
+ field_gapi = Google::Apis::BigqueryV2::StandardSqlField.from_json field_gapi_json.to_json
474
+ StandardSql::Field.from_gapi field_gapi
395
475
  end
396
476
  end
397
477
 
@@ -407,6 +487,146 @@ module Google
407
487
  Array @gapi_json[:trainingRuns]
408
488
  end
409
489
 
490
+ ##
491
+ # Exports the model to Google Cloud Storage asynchronously, immediately
492
+ # returning an {ExtractJob} that can be used to track the progress of the
493
+ # export job. The caller may poll the service by repeatedly calling
494
+ # {Job#reload!} and {Job#done?} to detect when the job is done, or
495
+ # simply block until the job is done by calling #{Job#wait_until_done!}.
496
+ # See also {#extract}.
497
+ #
498
+ # The geographic location for the job ("US", "EU", etc.) can be set via
499
+ # {ExtractJob::Updater#location=} in a block passed to this method. If
500
+ # the model is a full resource representation (see {#resource_full?}),
501
+ # the location of the job will automatically be set to the location of
502
+ # the model.
503
+ #
504
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
505
+ # Exporting models
506
+ #
507
+ # @param [String] extract_url The Google Storage URI to which BigQuery
508
+ # should extract the model. This value should be end in an object name
509
+ # prefix, since multiple objects will be exported.
510
+ # @param [String] format The exported file format. The default value is
511
+ # `ml_tf_saved_model`.
512
+ #
513
+ # The following values are supported:
514
+ #
515
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
516
+ # * `ml_xgboost_booster` - XGBoost Booster
517
+ # @param [String] job_id A user-defined ID for the extract job. The ID
518
+ # must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
519
+ # (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
520
+ # `job_id` is provided, then `prefix` will not be used.
521
+ #
522
+ # See [Generating a job
523
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
524
+ # @param [String] prefix A string, usually human-readable, that will be
525
+ # prepended to a generated value to produce a unique job ID. For
526
+ # example, the prefix `daily_import_job_` can be given to generate a
527
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
528
+ # prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
529
+ # underscores (`_`), or dashes (`-`). The maximum length of the entire ID
530
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
531
+ # be used.
532
+ # @param [Hash] labels A hash of user-provided labels associated with
533
+ # the job. You can use these to organize and group your jobs.
534
+ #
535
+ # The labels applied to a resource must meet the following requirements:
536
+ #
537
+ # * Each resource can have multiple labels, up to a maximum of 64.
538
+ # * Each label must be a key-value pair.
539
+ # * Keys have a minimum length of 1 character and a maximum length of
540
+ # 63 characters, and cannot be empty. Values can be empty, and have
541
+ # a maximum length of 63 characters.
542
+ # * Keys and values can contain only lowercase letters, numeric characters,
543
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
544
+ # international characters are allowed.
545
+ # * The key portion of a label must be unique. However, you can use the
546
+ # same key with multiple resources.
547
+ # * Keys must start with a lowercase letter or international character.
548
+ #
549
+ # @yield [job] a job configuration object
550
+ # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
551
+ # configuration object for setting additional options.
552
+ #
553
+ # @return [Google::Cloud::Bigquery::ExtractJob]
554
+ #
555
+ # @example
556
+ # require "google/cloud/bigquery"
557
+ #
558
+ # bigquery = Google::Cloud::Bigquery.new
559
+ # dataset = bigquery.dataset "my_dataset"
560
+ # model = dataset.model "my_model"
561
+ #
562
+ # extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
563
+ #
564
+ # extract_job.wait_until_done!
565
+ # extract_job.done? #=> true
566
+ #
567
+ # @!group Data
568
+ #
569
+ def extract_job extract_url, format: nil, job_id: nil, prefix: nil, labels: nil
570
+ ensure_service!
571
+ options = { format: format, job_id: job_id, prefix: prefix, labels: labels }
572
+ updater = ExtractJob::Updater.from_options service, model_ref, extract_url, options
573
+ updater.location = location if location # may be model reference
574
+
575
+ yield updater if block_given?
576
+
577
+ job_gapi = updater.to_gapi
578
+ gapi = service.extract_table job_gapi
579
+ Job.from_gapi gapi, service
580
+ end
581
+
582
+ ##
583
+ # Exports the model to Google Cloud Storage using a synchronous method
584
+ # that blocks for a response. Timeouts and transient errors are generally
585
+ # handled as needed to complete the job. See also {#extract_job}.
586
+ #
587
+ # The geographic location for the job ("US", "EU", etc.) can be set via
588
+ # {ExtractJob::Updater#location=} in a block passed to this method. If
589
+ # the model is a full resource representation (see {#resource_full?}),
590
+ # the location of the job will automatically be set to the location of
591
+ # the model.
592
+ #
593
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
594
+ # Exporting models
595
+ #
596
+ # @param [String] extract_url The Google Storage URI to which BigQuery
597
+ # should extract the model. This value should be end in an object name
598
+ # prefix, since multiple objects will be exported.
599
+ # @param [String] format The exported file format. The default value is
600
+ # `ml_tf_saved_model`.
601
+ #
602
+ # The following values are supported:
603
+ #
604
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
605
+ # * `ml_xgboost_booster` - XGBoost Booster
606
+ # @yield [job] a job configuration object
607
+ # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
608
+ # configuration object for setting additional options.
609
+ #
610
+ # @return [Boolean] Returns `true` if the extract operation succeeded.
611
+ #
612
+ # @example
613
+ # require "google/cloud/bigquery"
614
+ #
615
+ # bigquery = Google::Cloud::Bigquery.new
616
+ # dataset = bigquery.dataset "my_dataset"
617
+ # model = dataset.model "my_model"
618
+ #
619
+ # model.extract "gs://my-bucket/#{model.model_id}"
620
+ #
621
+ # @!group Data
622
+ #
623
+ def extract extract_url, format: nil, &block
624
+ job = extract_job extract_url, format: format, &block
625
+ job.wait_until_done!
626
+ ensure_job_succeeded! job
627
+ true
628
+ end
629
+
410
630
  ##
411
631
  # Permanently deletes the model.
412
632
  #
@@ -481,7 +701,7 @@ module Google
481
701
  # model = dataset.model "my_model", skip_lookup: true
482
702
  # model.exists? #=> true
483
703
  #
484
- def exists? force: nil
704
+ def exists? force: false
485
705
  return resource_exists? if force
486
706
  # If we have a value, return it
487
707
  return @exists unless @exists.nil?
@@ -595,7 +815,7 @@ module Google
595
815
  end
596
816
 
597
817
  ##
598
- # @private New lazy Model object without making an HTTP request.
818
+ # @private New lazy Model object without making an HTTP request, for use with the skip_lookup option.
599
819
  def self.new_reference project_id, dataset_id, model_id, service
600
820
  raise ArgumentError, "project_id is required" unless project_id
601
821
  raise ArgumentError, "dataset_id is required" unless dataset_id
@@ -659,6 +879,17 @@ module Google
659
879
  def ensure_full_data!
660
880
  reload! unless resource_full?
661
881
  end
882
+
883
+ def ensure_job_succeeded! job
884
+ return unless job.failed?
885
+ begin
886
+ # raise to activate ruby exception cause handling
887
+ raise job.gapi_error
888
+ rescue StandardError => e
889
+ # wrap Google::Apis::Error with Google::Cloud::Error
890
+ raise Google::Cloud::Error.from_error(e)
891
+ end
892
+ end
662
893
  end
663
894
  end
664
895
  end