google-cloud-bigquery 1.14.0 → 1.42.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/AUTHENTICATION.md +17 -54
- data/CHANGELOG.md +377 -0
- data/CONTRIBUTING.md +328 -116
- data/LOGGING.md +1 -1
- data/OVERVIEW.md +21 -20
- data/TROUBLESHOOTING.md +2 -8
- data/lib/google/cloud/bigquery/argument.rb +197 -0
- data/lib/google/cloud/bigquery/convert.rb +155 -173
- data/lib/google/cloud/bigquery/copy_job.rb +74 -26
- data/lib/google/cloud/bigquery/credentials.rb +5 -12
- data/lib/google/cloud/bigquery/data.rb +109 -18
- data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
- data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
- data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
- data/lib/google/cloud/bigquery/dataset.rb +1044 -287
- data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
- data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
- data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
- data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
- data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
- data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
- data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
- data/lib/google/cloud/bigquery/external.rb +50 -2256
- data/lib/google/cloud/bigquery/extract_job.rb +226 -61
- data/lib/google/cloud/bigquery/insert_response.rb +1 -3
- data/lib/google/cloud/bigquery/job/list.rb +10 -14
- data/lib/google/cloud/bigquery/job.rb +289 -14
- data/lib/google/cloud/bigquery/load_job.rb +810 -136
- data/lib/google/cloud/bigquery/model/list.rb +5 -9
- data/lib/google/cloud/bigquery/model.rb +247 -16
- data/lib/google/cloud/bigquery/policy.rb +432 -0
- data/lib/google/cloud/bigquery/project/list.rb +6 -11
- data/lib/google/cloud/bigquery/project.rb +509 -250
- data/lib/google/cloud/bigquery/query_job.rb +594 -128
- data/lib/google/cloud/bigquery/routine/list.rb +165 -0
- data/lib/google/cloud/bigquery/routine.rb +1227 -0
- data/lib/google/cloud/bigquery/schema/field.rb +413 -63
- data/lib/google/cloud/bigquery/schema.rb +221 -48
- data/lib/google/cloud/bigquery/service.rb +204 -112
- data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
- data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
- data/lib/google/cloud/bigquery/table/list.rb +6 -11
- data/lib/google/cloud/bigquery/table.rb +1470 -377
- data/lib/google/cloud/bigquery/time.rb +6 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery.rb +4 -6
- data/lib/google-cloud-bigquery.rb +14 -13
- metadata +66 -38
@@ -124,17 +124,15 @@ module Google
|
|
124
124
|
# puts model.model_id
|
125
125
|
# end
|
126
126
|
#
|
127
|
-
def all request_limit: nil
|
127
|
+
def all request_limit: nil, &block
|
128
128
|
request_limit = request_limit.to_i if request_limit
|
129
|
-
unless block_given?
|
130
|
-
return enum_for :all, request_limit: request_limit
|
131
|
-
end
|
129
|
+
return enum_for :all, request_limit: request_limit unless block_given?
|
132
130
|
results = self
|
133
131
|
loop do
|
134
|
-
results.each
|
132
|
+
results.each(&block)
|
135
133
|
if request_limit
|
136
134
|
request_limit -= 1
|
137
|
-
break if request_limit
|
135
|
+
break if request_limit.negative?
|
138
136
|
end
|
139
137
|
break unless results.next?
|
140
138
|
results = results.next
|
@@ -144,9 +142,7 @@ module Google
|
|
144
142
|
##
|
145
143
|
# @private New Model::List from a response object.
|
146
144
|
def self.from_gapi gapi_list, service, dataset_id = nil, max = nil
|
147
|
-
models = List.new(Array(gapi_list[:models]).map
|
148
|
-
Model.from_gapi_json gapi_json, service
|
149
|
-
end)
|
145
|
+
models = List.new(Array(gapi_list[:models]).map { |gapi_json| Model.from_gapi_json gapi_json, service })
|
150
146
|
models.instance_variable_set :@token, gapi_list[:nextPageToken]
|
151
147
|
models.instance_variable_set :@service, service
|
152
148
|
models.instance_variable_set :@dataset_id, dataset_id
|
@@ -87,8 +87,8 @@ module Google
|
|
87
87
|
##
|
88
88
|
# A unique ID for this model.
|
89
89
|
#
|
90
|
-
# @return [String] The ID must contain only letters (
|
91
|
-
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
90
|
+
# @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
|
91
|
+
# (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
|
92
92
|
#
|
93
93
|
# @!group Attributes
|
94
94
|
#
|
@@ -100,8 +100,8 @@ module Google
|
|
100
100
|
##
|
101
101
|
# The ID of the `Dataset` containing this model.
|
102
102
|
#
|
103
|
-
# @return [String] The ID must contain only letters (
|
104
|
-
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
103
|
+
# @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
|
104
|
+
# (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
|
105
105
|
#
|
106
106
|
# @!group Attributes
|
107
107
|
#
|
@@ -341,14 +341,19 @@ module Google
|
|
341
341
|
# the update to comply with ETag-based optimistic concurrency control.
|
342
342
|
#
|
343
343
|
# @param [Hash<String, String>] new_labels A hash containing key/value
|
344
|
-
# pairs.
|
345
|
-
#
|
346
|
-
# *
|
347
|
-
# *
|
348
|
-
#
|
349
|
-
#
|
350
|
-
#
|
351
|
-
# *
|
344
|
+
# pairs. The labels applied to a resource must meet the following requirements:
|
345
|
+
#
|
346
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
347
|
+
# * Each label must be a key-value pair.
|
348
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
349
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
350
|
+
# a maximum length of 63 characters.
|
351
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
352
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
353
|
+
# international characters are allowed.
|
354
|
+
# * The key portion of a label must be unique. However, you can use the
|
355
|
+
# same key with multiple resources.
|
356
|
+
# * Keys must start with a lowercase letter or international character.
|
352
357
|
#
|
353
358
|
# @example
|
354
359
|
# require "google/cloud/bigquery"
|
@@ -366,6 +371,79 @@ module Google
|
|
366
371
|
patch_gapi! labels: new_labels
|
367
372
|
end
|
368
373
|
|
374
|
+
##
|
375
|
+
# The {EncryptionConfiguration} object that represents the custom
|
376
|
+
# encryption method used to protect this model. If not set,
|
377
|
+
# {Dataset#default_encryption} is used.
|
378
|
+
#
|
379
|
+
# Present only if this model is using custom encryption.
|
380
|
+
#
|
381
|
+
# @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
|
382
|
+
# Protecting Data with Cloud KMS Keys
|
383
|
+
#
|
384
|
+
# @return [EncryptionConfiguration, nil] The encryption configuration.
|
385
|
+
#
|
386
|
+
# @!group Attributes
|
387
|
+
#
|
388
|
+
# @example
|
389
|
+
# require "google/cloud/bigquery"
|
390
|
+
#
|
391
|
+
# bigquery = Google::Cloud::Bigquery.new
|
392
|
+
# dataset = bigquery.dataset "my_dataset"
|
393
|
+
# model = dataset.model "my_model"
|
394
|
+
#
|
395
|
+
# encrypt_config = model.encryption
|
396
|
+
#
|
397
|
+
# @!group Attributes
|
398
|
+
#
|
399
|
+
def encryption
|
400
|
+
return nil if reference?
|
401
|
+
return nil if @gapi_json[:encryptionConfiguration].nil?
|
402
|
+
# We have to create a gapic object from the hash because that is what
|
403
|
+
# EncryptionConfiguration is expecing.
|
404
|
+
json_cmek = @gapi_json[:encryptionConfiguration].to_json
|
405
|
+
gapi_cmek = Google::Apis::BigqueryV2::EncryptionConfiguration.from_json json_cmek
|
406
|
+
EncryptionConfiguration.from_gapi(gapi_cmek).freeze
|
407
|
+
end
|
408
|
+
|
409
|
+
##
|
410
|
+
# Set the {EncryptionConfiguration} object that represents the custom
|
411
|
+
# encryption method used to protect this model. If not set,
|
412
|
+
# {Dataset#default_encryption} is used.
|
413
|
+
#
|
414
|
+
# Present only if this model is using custom encryption.
|
415
|
+
#
|
416
|
+
# If the model is not a full resource representation (see
|
417
|
+
# {#resource_full?}), the full representation will be retrieved before
|
418
|
+
# the update to comply with ETag-based optimistic concurrency control.
|
419
|
+
#
|
420
|
+
# @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
|
421
|
+
# Protecting Data with Cloud KMS Keys
|
422
|
+
#
|
423
|
+
# @param [EncryptionConfiguration] value The new encryption config.
|
424
|
+
#
|
425
|
+
# @example
|
426
|
+
# require "google/cloud/bigquery"
|
427
|
+
#
|
428
|
+
# bigquery = Google::Cloud::Bigquery.new
|
429
|
+
# dataset = bigquery.dataset "my_dataset"
|
430
|
+
# model = dataset.model "my_model"
|
431
|
+
#
|
432
|
+
# key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
|
433
|
+
# encrypt_config = bigquery.encryption kms_key: key_name
|
434
|
+
#
|
435
|
+
# model.encryption = encrypt_config
|
436
|
+
#
|
437
|
+
# @!group Attributes
|
438
|
+
#
|
439
|
+
def encryption= value
|
440
|
+
ensure_full_data!
|
441
|
+
# We have to create a hash from the gapic object's JSON because that
|
442
|
+
# is what Model is expecing.
|
443
|
+
json_cmek = JSON.parse value.to_gapi.to_json, symbolize_names: true
|
444
|
+
patch_gapi! encryptionConfiguration: json_cmek
|
445
|
+
end
|
446
|
+
|
369
447
|
##
|
370
448
|
# The input feature columns that were used to train this model.
|
371
449
|
#
|
@@ -376,7 +454,8 @@ module Google
|
|
376
454
|
def feature_columns
|
377
455
|
ensure_full_data!
|
378
456
|
Array(@gapi_json[:featureColumns]).map do |field_gapi_json|
|
379
|
-
|
457
|
+
field_gapi = Google::Apis::BigqueryV2::StandardSqlField.from_json field_gapi_json.to_json
|
458
|
+
StandardSql::Field.from_gapi field_gapi
|
380
459
|
end
|
381
460
|
end
|
382
461
|
|
@@ -391,7 +470,8 @@ module Google
|
|
391
470
|
def label_columns
|
392
471
|
ensure_full_data!
|
393
472
|
Array(@gapi_json[:labelColumns]).map do |field_gapi_json|
|
394
|
-
|
473
|
+
field_gapi = Google::Apis::BigqueryV2::StandardSqlField.from_json field_gapi_json.to_json
|
474
|
+
StandardSql::Field.from_gapi field_gapi
|
395
475
|
end
|
396
476
|
end
|
397
477
|
|
@@ -407,6 +487,146 @@ module Google
|
|
407
487
|
Array @gapi_json[:trainingRuns]
|
408
488
|
end
|
409
489
|
|
490
|
+
##
|
491
|
+
# Exports the model to Google Cloud Storage asynchronously, immediately
|
492
|
+
# returning an {ExtractJob} that can be used to track the progress of the
|
493
|
+
# export job. The caller may poll the service by repeatedly calling
|
494
|
+
# {Job#reload!} and {Job#done?} to detect when the job is done, or
|
495
|
+
# simply block until the job is done by calling #{Job#wait_until_done!}.
|
496
|
+
# See also {#extract}.
|
497
|
+
#
|
498
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
499
|
+
# {ExtractJob::Updater#location=} in a block passed to this method. If
|
500
|
+
# the model is a full resource representation (see {#resource_full?}),
|
501
|
+
# the location of the job will automatically be set to the location of
|
502
|
+
# the model.
|
503
|
+
#
|
504
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
505
|
+
# Exporting models
|
506
|
+
#
|
507
|
+
# @param [String] extract_url The Google Storage URI to which BigQuery
|
508
|
+
# should extract the model. This value should be end in an object name
|
509
|
+
# prefix, since multiple objects will be exported.
|
510
|
+
# @param [String] format The exported file format. The default value is
|
511
|
+
# `ml_tf_saved_model`.
|
512
|
+
#
|
513
|
+
# The following values are supported:
|
514
|
+
#
|
515
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
516
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
517
|
+
# @param [String] job_id A user-defined ID for the extract job. The ID
|
518
|
+
# must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
|
519
|
+
# (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
|
520
|
+
# `job_id` is provided, then `prefix` will not be used.
|
521
|
+
#
|
522
|
+
# See [Generating a job
|
523
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
524
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
525
|
+
# prepended to a generated value to produce a unique job ID. For
|
526
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
527
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
528
|
+
# prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
|
529
|
+
# underscores (`_`), or dashes (`-`). The maximum length of the entire ID
|
530
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
531
|
+
# be used.
|
532
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
533
|
+
# the job. You can use these to organize and group your jobs.
|
534
|
+
#
|
535
|
+
# The labels applied to a resource must meet the following requirements:
|
536
|
+
#
|
537
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
538
|
+
# * Each label must be a key-value pair.
|
539
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
540
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
541
|
+
# a maximum length of 63 characters.
|
542
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
543
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
544
|
+
# international characters are allowed.
|
545
|
+
# * The key portion of a label must be unique. However, you can use the
|
546
|
+
# same key with multiple resources.
|
547
|
+
# * Keys must start with a lowercase letter or international character.
|
548
|
+
#
|
549
|
+
# @yield [job] a job configuration object
|
550
|
+
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
551
|
+
# configuration object for setting additional options.
|
552
|
+
#
|
553
|
+
# @return [Google::Cloud::Bigquery::ExtractJob]
|
554
|
+
#
|
555
|
+
# @example
|
556
|
+
# require "google/cloud/bigquery"
|
557
|
+
#
|
558
|
+
# bigquery = Google::Cloud::Bigquery.new
|
559
|
+
# dataset = bigquery.dataset "my_dataset"
|
560
|
+
# model = dataset.model "my_model"
|
561
|
+
#
|
562
|
+
# extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
|
563
|
+
#
|
564
|
+
# extract_job.wait_until_done!
|
565
|
+
# extract_job.done? #=> true
|
566
|
+
#
|
567
|
+
# @!group Data
|
568
|
+
#
|
569
|
+
def extract_job extract_url, format: nil, job_id: nil, prefix: nil, labels: nil
|
570
|
+
ensure_service!
|
571
|
+
options = { format: format, job_id: job_id, prefix: prefix, labels: labels }
|
572
|
+
updater = ExtractJob::Updater.from_options service, model_ref, extract_url, options
|
573
|
+
updater.location = location if location # may be model reference
|
574
|
+
|
575
|
+
yield updater if block_given?
|
576
|
+
|
577
|
+
job_gapi = updater.to_gapi
|
578
|
+
gapi = service.extract_table job_gapi
|
579
|
+
Job.from_gapi gapi, service
|
580
|
+
end
|
581
|
+
|
582
|
+
##
|
583
|
+
# Exports the model to Google Cloud Storage using a synchronous method
|
584
|
+
# that blocks for a response. Timeouts and transient errors are generally
|
585
|
+
# handled as needed to complete the job. See also {#extract_job}.
|
586
|
+
#
|
587
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
588
|
+
# {ExtractJob::Updater#location=} in a block passed to this method. If
|
589
|
+
# the model is a full resource representation (see {#resource_full?}),
|
590
|
+
# the location of the job will automatically be set to the location of
|
591
|
+
# the model.
|
592
|
+
#
|
593
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
594
|
+
# Exporting models
|
595
|
+
#
|
596
|
+
# @param [String] extract_url The Google Storage URI to which BigQuery
|
597
|
+
# should extract the model. This value should be end in an object name
|
598
|
+
# prefix, since multiple objects will be exported.
|
599
|
+
# @param [String] format The exported file format. The default value is
|
600
|
+
# `ml_tf_saved_model`.
|
601
|
+
#
|
602
|
+
# The following values are supported:
|
603
|
+
#
|
604
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
605
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
606
|
+
# @yield [job] a job configuration object
|
607
|
+
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
608
|
+
# configuration object for setting additional options.
|
609
|
+
#
|
610
|
+
# @return [Boolean] Returns `true` if the extract operation succeeded.
|
611
|
+
#
|
612
|
+
# @example
|
613
|
+
# require "google/cloud/bigquery"
|
614
|
+
#
|
615
|
+
# bigquery = Google::Cloud::Bigquery.new
|
616
|
+
# dataset = bigquery.dataset "my_dataset"
|
617
|
+
# model = dataset.model "my_model"
|
618
|
+
#
|
619
|
+
# model.extract "gs://my-bucket/#{model.model_id}"
|
620
|
+
#
|
621
|
+
# @!group Data
|
622
|
+
#
|
623
|
+
def extract extract_url, format: nil, &block
|
624
|
+
job = extract_job extract_url, format: format, &block
|
625
|
+
job.wait_until_done!
|
626
|
+
ensure_job_succeeded! job
|
627
|
+
true
|
628
|
+
end
|
629
|
+
|
410
630
|
##
|
411
631
|
# Permanently deletes the model.
|
412
632
|
#
|
@@ -481,7 +701,7 @@ module Google
|
|
481
701
|
# model = dataset.model "my_model", skip_lookup: true
|
482
702
|
# model.exists? #=> true
|
483
703
|
#
|
484
|
-
def exists? force:
|
704
|
+
def exists? force: false
|
485
705
|
return resource_exists? if force
|
486
706
|
# If we have a value, return it
|
487
707
|
return @exists unless @exists.nil?
|
@@ -595,7 +815,7 @@ module Google
|
|
595
815
|
end
|
596
816
|
|
597
817
|
##
|
598
|
-
# @private New lazy Model object without making an HTTP request.
|
818
|
+
# @private New lazy Model object without making an HTTP request, for use with the skip_lookup option.
|
599
819
|
def self.new_reference project_id, dataset_id, model_id, service
|
600
820
|
raise ArgumentError, "project_id is required" unless project_id
|
601
821
|
raise ArgumentError, "dataset_id is required" unless dataset_id
|
@@ -659,6 +879,17 @@ module Google
|
|
659
879
|
def ensure_full_data!
|
660
880
|
reload! unless resource_full?
|
661
881
|
end
|
882
|
+
|
883
|
+
def ensure_job_succeeded! job
|
884
|
+
return unless job.failed?
|
885
|
+
begin
|
886
|
+
# raise to activate ruby exception cause handling
|
887
|
+
raise job.gapi_error
|
888
|
+
rescue StandardError => e
|
889
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
890
|
+
raise Google::Cloud::Error.from_error(e)
|
891
|
+
end
|
892
|
+
end
|
662
893
|
end
|
663
894
|
end
|
664
895
|
end
|