google-cloud-bigquery 1.14.0 → 1.42.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/AUTHENTICATION.md +17 -54
- data/CHANGELOG.md +377 -0
- data/CONTRIBUTING.md +328 -116
- data/LOGGING.md +1 -1
- data/OVERVIEW.md +21 -20
- data/TROUBLESHOOTING.md +2 -8
- data/lib/google/cloud/bigquery/argument.rb +197 -0
- data/lib/google/cloud/bigquery/convert.rb +155 -173
- data/lib/google/cloud/bigquery/copy_job.rb +74 -26
- data/lib/google/cloud/bigquery/credentials.rb +5 -12
- data/lib/google/cloud/bigquery/data.rb +109 -18
- data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
- data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
- data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
- data/lib/google/cloud/bigquery/dataset.rb +1044 -287
- data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
- data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
- data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
- data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
- data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
- data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
- data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
- data/lib/google/cloud/bigquery/external.rb +50 -2256
- data/lib/google/cloud/bigquery/extract_job.rb +226 -61
- data/lib/google/cloud/bigquery/insert_response.rb +1 -3
- data/lib/google/cloud/bigquery/job/list.rb +10 -14
- data/lib/google/cloud/bigquery/job.rb +289 -14
- data/lib/google/cloud/bigquery/load_job.rb +810 -136
- data/lib/google/cloud/bigquery/model/list.rb +5 -9
- data/lib/google/cloud/bigquery/model.rb +247 -16
- data/lib/google/cloud/bigquery/policy.rb +432 -0
- data/lib/google/cloud/bigquery/project/list.rb +6 -11
- data/lib/google/cloud/bigquery/project.rb +509 -250
- data/lib/google/cloud/bigquery/query_job.rb +594 -128
- data/lib/google/cloud/bigquery/routine/list.rb +165 -0
- data/lib/google/cloud/bigquery/routine.rb +1227 -0
- data/lib/google/cloud/bigquery/schema/field.rb +413 -63
- data/lib/google/cloud/bigquery/schema.rb +221 -48
- data/lib/google/cloud/bigquery/service.rb +204 -112
- data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
- data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
- data/lib/google/cloud/bigquery/table/list.rb +6 -11
- data/lib/google/cloud/bigquery/table.rb +1470 -377
- data/lib/google/cloud/bigquery/time.rb +6 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery.rb +4 -6
- data/lib/google-cloud-bigquery.rb +14 -13
- metadata +66 -38
@@ -124,17 +124,15 @@ module Google
|
|
124
124
|
# puts model.model_id
|
125
125
|
# end
|
126
126
|
#
|
127
|
-
def all request_limit: nil
|
127
|
+
def all request_limit: nil, &block
|
128
128
|
request_limit = request_limit.to_i if request_limit
|
129
|
-
unless block_given?
|
130
|
-
return enum_for :all, request_limit: request_limit
|
131
|
-
end
|
129
|
+
return enum_for :all, request_limit: request_limit unless block_given?
|
132
130
|
results = self
|
133
131
|
loop do
|
134
|
-
results.each
|
132
|
+
results.each(&block)
|
135
133
|
if request_limit
|
136
134
|
request_limit -= 1
|
137
|
-
break if request_limit
|
135
|
+
break if request_limit.negative?
|
138
136
|
end
|
139
137
|
break unless results.next?
|
140
138
|
results = results.next
|
@@ -144,9 +142,7 @@ module Google
|
|
144
142
|
##
|
145
143
|
# @private New Model::List from a response object.
|
146
144
|
def self.from_gapi gapi_list, service, dataset_id = nil, max = nil
|
147
|
-
models = List.new(Array(gapi_list[:models]).map
|
148
|
-
Model.from_gapi_json gapi_json, service
|
149
|
-
end)
|
145
|
+
models = List.new(Array(gapi_list[:models]).map { |gapi_json| Model.from_gapi_json gapi_json, service })
|
150
146
|
models.instance_variable_set :@token, gapi_list[:nextPageToken]
|
151
147
|
models.instance_variable_set :@service, service
|
152
148
|
models.instance_variable_set :@dataset_id, dataset_id
|
@@ -87,8 +87,8 @@ module Google
|
|
87
87
|
##
|
88
88
|
# A unique ID for this model.
|
89
89
|
#
|
90
|
-
# @return [String] The ID must contain only letters (
|
91
|
-
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
90
|
+
# @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
|
91
|
+
# (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
|
92
92
|
#
|
93
93
|
# @!group Attributes
|
94
94
|
#
|
@@ -100,8 +100,8 @@ module Google
|
|
100
100
|
##
|
101
101
|
# The ID of the `Dataset` containing this model.
|
102
102
|
#
|
103
|
-
# @return [String] The ID must contain only letters (
|
104
|
-
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
103
|
+
# @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
|
104
|
+
# (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
|
105
105
|
#
|
106
106
|
# @!group Attributes
|
107
107
|
#
|
@@ -341,14 +341,19 @@ module Google
|
|
341
341
|
# the update to comply with ETag-based optimistic concurrency control.
|
342
342
|
#
|
343
343
|
# @param [Hash<String, String>] new_labels A hash containing key/value
|
344
|
-
# pairs.
|
345
|
-
#
|
346
|
-
# *
|
347
|
-
# *
|
348
|
-
#
|
349
|
-
#
|
350
|
-
#
|
351
|
-
# *
|
344
|
+
# pairs. The labels applied to a resource must meet the following requirements:
|
345
|
+
#
|
346
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
347
|
+
# * Each label must be a key-value pair.
|
348
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
349
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
350
|
+
# a maximum length of 63 characters.
|
351
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
352
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
353
|
+
# international characters are allowed.
|
354
|
+
# * The key portion of a label must be unique. However, you can use the
|
355
|
+
# same key with multiple resources.
|
356
|
+
# * Keys must start with a lowercase letter or international character.
|
352
357
|
#
|
353
358
|
# @example
|
354
359
|
# require "google/cloud/bigquery"
|
@@ -366,6 +371,79 @@ module Google
|
|
366
371
|
patch_gapi! labels: new_labels
|
367
372
|
end
|
368
373
|
|
374
|
+
##
|
375
|
+
# The {EncryptionConfiguration} object that represents the custom
|
376
|
+
# encryption method used to protect this model. If not set,
|
377
|
+
# {Dataset#default_encryption} is used.
|
378
|
+
#
|
379
|
+
# Present only if this model is using custom encryption.
|
380
|
+
#
|
381
|
+
# @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
|
382
|
+
# Protecting Data with Cloud KMS Keys
|
383
|
+
#
|
384
|
+
# @return [EncryptionConfiguration, nil] The encryption configuration.
|
385
|
+
#
|
386
|
+
# @!group Attributes
|
387
|
+
#
|
388
|
+
# @example
|
389
|
+
# require "google/cloud/bigquery"
|
390
|
+
#
|
391
|
+
# bigquery = Google::Cloud::Bigquery.new
|
392
|
+
# dataset = bigquery.dataset "my_dataset"
|
393
|
+
# model = dataset.model "my_model"
|
394
|
+
#
|
395
|
+
# encrypt_config = model.encryption
|
396
|
+
#
|
397
|
+
# @!group Attributes
|
398
|
+
#
|
399
|
+
def encryption
|
400
|
+
return nil if reference?
|
401
|
+
return nil if @gapi_json[:encryptionConfiguration].nil?
|
402
|
+
# We have to create a gapic object from the hash because that is what
|
403
|
+
# EncryptionConfiguration is expecing.
|
404
|
+
json_cmek = @gapi_json[:encryptionConfiguration].to_json
|
405
|
+
gapi_cmek = Google::Apis::BigqueryV2::EncryptionConfiguration.from_json json_cmek
|
406
|
+
EncryptionConfiguration.from_gapi(gapi_cmek).freeze
|
407
|
+
end
|
408
|
+
|
409
|
+
##
|
410
|
+
# Set the {EncryptionConfiguration} object that represents the custom
|
411
|
+
# encryption method used to protect this model. If not set,
|
412
|
+
# {Dataset#default_encryption} is used.
|
413
|
+
#
|
414
|
+
# Present only if this model is using custom encryption.
|
415
|
+
#
|
416
|
+
# If the model is not a full resource representation (see
|
417
|
+
# {#resource_full?}), the full representation will be retrieved before
|
418
|
+
# the update to comply with ETag-based optimistic concurrency control.
|
419
|
+
#
|
420
|
+
# @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
|
421
|
+
# Protecting Data with Cloud KMS Keys
|
422
|
+
#
|
423
|
+
# @param [EncryptionConfiguration] value The new encryption config.
|
424
|
+
#
|
425
|
+
# @example
|
426
|
+
# require "google/cloud/bigquery"
|
427
|
+
#
|
428
|
+
# bigquery = Google::Cloud::Bigquery.new
|
429
|
+
# dataset = bigquery.dataset "my_dataset"
|
430
|
+
# model = dataset.model "my_model"
|
431
|
+
#
|
432
|
+
# key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
|
433
|
+
# encrypt_config = bigquery.encryption kms_key: key_name
|
434
|
+
#
|
435
|
+
# model.encryption = encrypt_config
|
436
|
+
#
|
437
|
+
# @!group Attributes
|
438
|
+
#
|
439
|
+
def encryption= value
|
440
|
+
ensure_full_data!
|
441
|
+
# We have to create a hash from the gapic object's JSON because that
|
442
|
+
# is what Model is expecing.
|
443
|
+
json_cmek = JSON.parse value.to_gapi.to_json, symbolize_names: true
|
444
|
+
patch_gapi! encryptionConfiguration: json_cmek
|
445
|
+
end
|
446
|
+
|
369
447
|
##
|
370
448
|
# The input feature columns that were used to train this model.
|
371
449
|
#
|
@@ -376,7 +454,8 @@ module Google
|
|
376
454
|
def feature_columns
|
377
455
|
ensure_full_data!
|
378
456
|
Array(@gapi_json[:featureColumns]).map do |field_gapi_json|
|
379
|
-
|
457
|
+
field_gapi = Google::Apis::BigqueryV2::StandardSqlField.from_json field_gapi_json.to_json
|
458
|
+
StandardSql::Field.from_gapi field_gapi
|
380
459
|
end
|
381
460
|
end
|
382
461
|
|
@@ -391,7 +470,8 @@ module Google
|
|
391
470
|
def label_columns
|
392
471
|
ensure_full_data!
|
393
472
|
Array(@gapi_json[:labelColumns]).map do |field_gapi_json|
|
394
|
-
|
473
|
+
field_gapi = Google::Apis::BigqueryV2::StandardSqlField.from_json field_gapi_json.to_json
|
474
|
+
StandardSql::Field.from_gapi field_gapi
|
395
475
|
end
|
396
476
|
end
|
397
477
|
|
@@ -407,6 +487,146 @@ module Google
|
|
407
487
|
Array @gapi_json[:trainingRuns]
|
408
488
|
end
|
409
489
|
|
490
|
+
##
|
491
|
+
# Exports the model to Google Cloud Storage asynchronously, immediately
|
492
|
+
# returning an {ExtractJob} that can be used to track the progress of the
|
493
|
+
# export job. The caller may poll the service by repeatedly calling
|
494
|
+
# {Job#reload!} and {Job#done?} to detect when the job is done, or
|
495
|
+
# simply block until the job is done by calling #{Job#wait_until_done!}.
|
496
|
+
# See also {#extract}.
|
497
|
+
#
|
498
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
499
|
+
# {ExtractJob::Updater#location=} in a block passed to this method. If
|
500
|
+
# the model is a full resource representation (see {#resource_full?}),
|
501
|
+
# the location of the job will automatically be set to the location of
|
502
|
+
# the model.
|
503
|
+
#
|
504
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
505
|
+
# Exporting models
|
506
|
+
#
|
507
|
+
# @param [String] extract_url The Google Storage URI to which BigQuery
|
508
|
+
# should extract the model. This value should be end in an object name
|
509
|
+
# prefix, since multiple objects will be exported.
|
510
|
+
# @param [String] format The exported file format. The default value is
|
511
|
+
# `ml_tf_saved_model`.
|
512
|
+
#
|
513
|
+
# The following values are supported:
|
514
|
+
#
|
515
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
516
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
517
|
+
# @param [String] job_id A user-defined ID for the extract job. The ID
|
518
|
+
# must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
|
519
|
+
# (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
|
520
|
+
# `job_id` is provided, then `prefix` will not be used.
|
521
|
+
#
|
522
|
+
# See [Generating a job
|
523
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
524
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
525
|
+
# prepended to a generated value to produce a unique job ID. For
|
526
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
527
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
528
|
+
# prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
|
529
|
+
# underscores (`_`), or dashes (`-`). The maximum length of the entire ID
|
530
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
531
|
+
# be used.
|
532
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
533
|
+
# the job. You can use these to organize and group your jobs.
|
534
|
+
#
|
535
|
+
# The labels applied to a resource must meet the following requirements:
|
536
|
+
#
|
537
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
538
|
+
# * Each label must be a key-value pair.
|
539
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
540
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
541
|
+
# a maximum length of 63 characters.
|
542
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
543
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
544
|
+
# international characters are allowed.
|
545
|
+
# * The key portion of a label must be unique. However, you can use the
|
546
|
+
# same key with multiple resources.
|
547
|
+
# * Keys must start with a lowercase letter or international character.
|
548
|
+
#
|
549
|
+
# @yield [job] a job configuration object
|
550
|
+
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
551
|
+
# configuration object for setting additional options.
|
552
|
+
#
|
553
|
+
# @return [Google::Cloud::Bigquery::ExtractJob]
|
554
|
+
#
|
555
|
+
# @example
|
556
|
+
# require "google/cloud/bigquery"
|
557
|
+
#
|
558
|
+
# bigquery = Google::Cloud::Bigquery.new
|
559
|
+
# dataset = bigquery.dataset "my_dataset"
|
560
|
+
# model = dataset.model "my_model"
|
561
|
+
#
|
562
|
+
# extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
|
563
|
+
#
|
564
|
+
# extract_job.wait_until_done!
|
565
|
+
# extract_job.done? #=> true
|
566
|
+
#
|
567
|
+
# @!group Data
|
568
|
+
#
|
569
|
+
def extract_job extract_url, format: nil, job_id: nil, prefix: nil, labels: nil
|
570
|
+
ensure_service!
|
571
|
+
options = { format: format, job_id: job_id, prefix: prefix, labels: labels }
|
572
|
+
updater = ExtractJob::Updater.from_options service, model_ref, extract_url, options
|
573
|
+
updater.location = location if location # may be model reference
|
574
|
+
|
575
|
+
yield updater if block_given?
|
576
|
+
|
577
|
+
job_gapi = updater.to_gapi
|
578
|
+
gapi = service.extract_table job_gapi
|
579
|
+
Job.from_gapi gapi, service
|
580
|
+
end
|
581
|
+
|
582
|
+
##
|
583
|
+
# Exports the model to Google Cloud Storage using a synchronous method
|
584
|
+
# that blocks for a response. Timeouts and transient errors are generally
|
585
|
+
# handled as needed to complete the job. See also {#extract_job}.
|
586
|
+
#
|
587
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
588
|
+
# {ExtractJob::Updater#location=} in a block passed to this method. If
|
589
|
+
# the model is a full resource representation (see {#resource_full?}),
|
590
|
+
# the location of the job will automatically be set to the location of
|
591
|
+
# the model.
|
592
|
+
#
|
593
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
594
|
+
# Exporting models
|
595
|
+
#
|
596
|
+
# @param [String] extract_url The Google Storage URI to which BigQuery
|
597
|
+
# should extract the model. This value should be end in an object name
|
598
|
+
# prefix, since multiple objects will be exported.
|
599
|
+
# @param [String] format The exported file format. The default value is
|
600
|
+
# `ml_tf_saved_model`.
|
601
|
+
#
|
602
|
+
# The following values are supported:
|
603
|
+
#
|
604
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
605
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
606
|
+
# @yield [job] a job configuration object
|
607
|
+
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
608
|
+
# configuration object for setting additional options.
|
609
|
+
#
|
610
|
+
# @return [Boolean] Returns `true` if the extract operation succeeded.
|
611
|
+
#
|
612
|
+
# @example
|
613
|
+
# require "google/cloud/bigquery"
|
614
|
+
#
|
615
|
+
# bigquery = Google::Cloud::Bigquery.new
|
616
|
+
# dataset = bigquery.dataset "my_dataset"
|
617
|
+
# model = dataset.model "my_model"
|
618
|
+
#
|
619
|
+
# model.extract "gs://my-bucket/#{model.model_id}"
|
620
|
+
#
|
621
|
+
# @!group Data
|
622
|
+
#
|
623
|
+
def extract extract_url, format: nil, &block
|
624
|
+
job = extract_job extract_url, format: format, &block
|
625
|
+
job.wait_until_done!
|
626
|
+
ensure_job_succeeded! job
|
627
|
+
true
|
628
|
+
end
|
629
|
+
|
410
630
|
##
|
411
631
|
# Permanently deletes the model.
|
412
632
|
#
|
@@ -481,7 +701,7 @@ module Google
|
|
481
701
|
# model = dataset.model "my_model", skip_lookup: true
|
482
702
|
# model.exists? #=> true
|
483
703
|
#
|
484
|
-
def exists? force:
|
704
|
+
def exists? force: false
|
485
705
|
return resource_exists? if force
|
486
706
|
# If we have a value, return it
|
487
707
|
return @exists unless @exists.nil?
|
@@ -595,7 +815,7 @@ module Google
|
|
595
815
|
end
|
596
816
|
|
597
817
|
##
|
598
|
-
# @private New lazy Model object without making an HTTP request.
|
818
|
+
# @private New lazy Model object without making an HTTP request, for use with the skip_lookup option.
|
599
819
|
def self.new_reference project_id, dataset_id, model_id, service
|
600
820
|
raise ArgumentError, "project_id is required" unless project_id
|
601
821
|
raise ArgumentError, "dataset_id is required" unless dataset_id
|
@@ -659,6 +879,17 @@ module Google
|
|
659
879
|
def ensure_full_data!
|
660
880
|
reload! unless resource_full?
|
661
881
|
end
|
882
|
+
|
883
|
+
def ensure_job_succeeded! job
|
884
|
+
return unless job.failed?
|
885
|
+
begin
|
886
|
+
# raise to activate ruby exception cause handling
|
887
|
+
raise job.gapi_error
|
888
|
+
rescue StandardError => e
|
889
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
890
|
+
raise Google::Cloud::Error.from_error(e)
|
891
|
+
end
|
892
|
+
end
|
662
893
|
end
|
663
894
|
end
|
664
895
|
end
|