google-cloud-bigquery 1.21.0 → 1.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +45 -0
- data/TROUBLESHOOTING.md +2 -8
- data/lib/google-cloud-bigquery.rb +9 -2
- data/lib/google/cloud/bigquery.rb +1 -1
- data/lib/google/cloud/bigquery/convert.rb +3 -1
- data/lib/google/cloud/bigquery/copy_job.rb +15 -6
- data/lib/google/cloud/bigquery/data.rb +12 -0
- data/lib/google/cloud/bigquery/dataset.rb +85 -28
- data/lib/google/cloud/bigquery/dataset/access.rb +183 -4
- data/lib/google/cloud/bigquery/external.rb +24 -0
- data/lib/google/cloud/bigquery/extract_job.rb +153 -45
- data/lib/google/cloud/bigquery/load_job.rb +21 -10
- data/lib/google/cloud/bigquery/model.rb +164 -8
- data/lib/google/cloud/bigquery/project.rb +176 -72
- data/lib/google/cloud/bigquery/query_job.rb +26 -10
- data/lib/google/cloud/bigquery/service.rb +17 -11
- data/lib/google/cloud/bigquery/table.rb +91 -46
- data/lib/google/cloud/bigquery/time.rb +6 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +7 -7
@@ -341,14 +341,19 @@ module Google
|
|
341
341
|
# the update to comply with ETag-based optimistic concurrency control.
|
342
342
|
#
|
343
343
|
# @param [Hash<String, String>] new_labels A hash containing key/value
|
344
|
-
# pairs.
|
345
|
-
#
|
346
|
-
# *
|
347
|
-
# *
|
348
|
-
#
|
349
|
-
#
|
350
|
-
#
|
351
|
-
# *
|
344
|
+
# pairs. The labels applied to a resource must meet the following requirements:
|
345
|
+
#
|
346
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
347
|
+
# * Each label must be a key-value pair.
|
348
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
349
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
350
|
+
# a maximum length of 63 characters.
|
351
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
352
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
353
|
+
# international characters are allowed.
|
354
|
+
# * The key portion of a label must be unique. However, you can use the
|
355
|
+
# same key with multiple resources.
|
356
|
+
# * Keys must start with a lowercase letter or international character.
|
352
357
|
#
|
353
358
|
# @example
|
354
359
|
# require "google/cloud/bigquery"
|
@@ -482,6 +487,146 @@ module Google
|
|
482
487
|
Array @gapi_json[:trainingRuns]
|
483
488
|
end
|
484
489
|
|
490
|
+
##
|
491
|
+
# Exports the model to Google Cloud Storage asynchronously, immediately
|
492
|
+
# returning an {ExtractJob} that can be used to track the progress of the
|
493
|
+
# export job. The caller may poll the service by repeatedly calling
|
494
|
+
# {Job#reload!} and {Job#done?} to detect when the job is done, or
|
495
|
+
# simply block until the job is done by calling #{Job#wait_until_done!}.
|
496
|
+
# See also {#extract}.
|
497
|
+
#
|
498
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
499
|
+
# {ExtractJob::Updater#location=} in a block passed to this method. If
|
500
|
+
# the model is a full resource representation (see {#resource_full?}),
|
501
|
+
# the location of the job will automatically be set to the location of
|
502
|
+
# the model.
|
503
|
+
#
|
504
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
505
|
+
# Exporting models
|
506
|
+
#
|
507
|
+
# @param [String] extract_url The Google Storage URI to which BigQuery
|
508
|
+
# should extract the model. This value should be end in an object name
|
509
|
+
# prefix, since multiple objects will be exported.
|
510
|
+
# @param [String] format The exported file format. The default value is
|
511
|
+
# `ml_tf_saved_model`.
|
512
|
+
#
|
513
|
+
# The following values are supported:
|
514
|
+
#
|
515
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
516
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
517
|
+
# @param [String] job_id A user-defined ID for the extract job. The ID
|
518
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
519
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
520
|
+
# `job_id` is provided, then `prefix` will not be used.
|
521
|
+
#
|
522
|
+
# See [Generating a job
|
523
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
524
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
525
|
+
# prepended to a generated value to produce a unique job ID. For
|
526
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
527
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
528
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
529
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
530
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
531
|
+
# be used.
|
532
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
533
|
+
# the job. You can use these to organize and group your jobs.
|
534
|
+
#
|
535
|
+
# The labels applied to a resource must meet the following requirements:
|
536
|
+
#
|
537
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
538
|
+
# * Each label must be a key-value pair.
|
539
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
540
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
541
|
+
# a maximum length of 63 characters.
|
542
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
543
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
544
|
+
# international characters are allowed.
|
545
|
+
# * The key portion of a label must be unique. However, you can use the
|
546
|
+
# same key with multiple resources.
|
547
|
+
# * Keys must start with a lowercase letter or international character.
|
548
|
+
#
|
549
|
+
# @yield [job] a job configuration object
|
550
|
+
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
551
|
+
# configuration object for setting additional options.
|
552
|
+
#
|
553
|
+
# @return [Google::Cloud::Bigquery::ExtractJob]
|
554
|
+
#
|
555
|
+
# @example
|
556
|
+
# require "google/cloud/bigquery"
|
557
|
+
#
|
558
|
+
# bigquery = Google::Cloud::Bigquery.new
|
559
|
+
# dataset = bigquery.dataset "my_dataset"
|
560
|
+
# model = dataset.model "my_model"
|
561
|
+
#
|
562
|
+
# extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
|
563
|
+
#
|
564
|
+
# extract_job.wait_until_done!
|
565
|
+
# extract_job.done? #=> true
|
566
|
+
#
|
567
|
+
# @!group Data
|
568
|
+
#
|
569
|
+
def extract_job extract_url, format: nil, job_id: nil, prefix: nil, labels: nil
|
570
|
+
ensure_service!
|
571
|
+
options = { format: format, job_id: job_id, prefix: prefix, labels: labels }
|
572
|
+
updater = ExtractJob::Updater.from_options service, model_ref, extract_url, options
|
573
|
+
updater.location = location if location # may be model reference
|
574
|
+
|
575
|
+
yield updater if block_given?
|
576
|
+
|
577
|
+
job_gapi = updater.to_gapi
|
578
|
+
gapi = service.extract_table job_gapi
|
579
|
+
Job.from_gapi gapi, service
|
580
|
+
end
|
581
|
+
|
582
|
+
##
|
583
|
+
# Exports the model to Google Cloud Storage using a synchronous method
|
584
|
+
# that blocks for a response. Timeouts and transient errors are generally
|
585
|
+
# handled as needed to complete the job. See also {#extract_job}.
|
586
|
+
#
|
587
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
588
|
+
# {ExtractJob::Updater#location=} in a block passed to this method. If
|
589
|
+
# the model is a full resource representation (see {#resource_full?}),
|
590
|
+
# the location of the job will automatically be set to the location of
|
591
|
+
# the model.
|
592
|
+
#
|
593
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
594
|
+
# Exporting models
|
595
|
+
#
|
596
|
+
# @param [String] extract_url The Google Storage URI to which BigQuery
|
597
|
+
# should extract the model. This value should be end in an object name
|
598
|
+
# prefix, since multiple objects will be exported.
|
599
|
+
# @param [String] format The exported file format. The default value is
|
600
|
+
# `ml_tf_saved_model`.
|
601
|
+
#
|
602
|
+
# The following values are supported:
|
603
|
+
#
|
604
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
605
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
606
|
+
# @yield [job] a job configuration object
|
607
|
+
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
608
|
+
# configuration object for setting additional options.
|
609
|
+
#
|
610
|
+
# @return [Boolean] Returns `true` if the extract operation succeeded.
|
611
|
+
#
|
612
|
+
# @example
|
613
|
+
# require "google/cloud/bigquery"
|
614
|
+
#
|
615
|
+
# bigquery = Google::Cloud::Bigquery.new
|
616
|
+
# dataset = bigquery.dataset "my_dataset"
|
617
|
+
# model = dataset.model "my_model"
|
618
|
+
#
|
619
|
+
# model.extract "gs://my-bucket/#{model.model_id}"
|
620
|
+
#
|
621
|
+
# @!group Data
|
622
|
+
#
|
623
|
+
def extract extract_url, format: nil, &block
|
624
|
+
job = extract_job extract_url, format: format, &block
|
625
|
+
job.wait_until_done!
|
626
|
+
ensure_job_succeeded! job
|
627
|
+
true
|
628
|
+
end
|
629
|
+
|
485
630
|
##
|
486
631
|
# Permanently deletes the model.
|
487
632
|
#
|
@@ -734,6 +879,17 @@ module Google
|
|
734
879
|
def ensure_full_data!
|
735
880
|
reload! unless resource_full?
|
736
881
|
end
|
882
|
+
|
883
|
+
def ensure_job_succeeded! job
|
884
|
+
return unless job.failed?
|
885
|
+
begin
|
886
|
+
# raise to activate ruby exception cause handling
|
887
|
+
raise job.gapi_error
|
888
|
+
rescue StandardError => e
|
889
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
890
|
+
raise Google::Cloud::Error.from_error(e)
|
891
|
+
end
|
892
|
+
end
|
737
893
|
end
|
738
894
|
end
|
739
895
|
end
|
@@ -153,13 +153,21 @@ module Google
|
|
153
153
|
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
154
154
|
# be used.
|
155
155
|
# @param [Hash] labels A hash of user-provided labels associated with
|
156
|
-
# the job. You can use these to organize and group your jobs.
|
157
|
-
#
|
158
|
-
#
|
159
|
-
#
|
160
|
-
#
|
161
|
-
#
|
162
|
-
#
|
156
|
+
# the job. You can use these to organize and group your jobs.
|
157
|
+
#
|
158
|
+
# The labels applied to a resource must meet the following requirements:
|
159
|
+
#
|
160
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
161
|
+
# * Each label must be a key-value pair.
|
162
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
163
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
164
|
+
# a maximum length of 63 characters.
|
165
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
166
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
167
|
+
# international characters are allowed.
|
168
|
+
# * The key portion of a label must be unique. However, you can use the
|
169
|
+
# same key with multiple resources.
|
170
|
+
# * Keys must start with a lowercase letter or international character.
|
163
171
|
# @yield [job] a job configuration object
|
164
172
|
# @yieldparam [Google::Cloud::Bigquery::CopyJob::Updater] job a job
|
165
173
|
# configuration object for setting additional options.
|
@@ -411,20 +419,36 @@ module Google
|
|
411
419
|
# See [Generating a job
|
412
420
|
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
413
421
|
# @param [Hash] labels A hash of user-provided labels associated with
|
414
|
-
# the job. You can use these to organize and group your jobs.
|
415
|
-
#
|
416
|
-
#
|
417
|
-
#
|
418
|
-
#
|
419
|
-
#
|
420
|
-
#
|
422
|
+
# the job. You can use these to organize and group your jobs.
|
423
|
+
#
|
424
|
+
# The labels applied to a resource must meet the following requirements:
|
425
|
+
#
|
426
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
427
|
+
# * Each label must be a key-value pair.
|
428
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
429
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
430
|
+
# a maximum length of 63 characters.
|
431
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
432
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
433
|
+
# international characters are allowed.
|
434
|
+
# * The key portion of a label must be unique. However, you can use the
|
435
|
+
# same key with multiple resources.
|
436
|
+
# * Keys must start with a lowercase letter or international character.
|
421
437
|
# @param [Array<String>, String] udfs User-defined function resources
|
422
|
-
# used in
|
423
|
-
# Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
438
|
+
# used in a legacy SQL query. May be either a code resource to load from
|
439
|
+
# a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
424
440
|
# that contains code for a user-defined function (UDF). Providing an
|
425
441
|
# inline code resource is equivalent to providing a URI for a file
|
426
|
-
# containing the same code.
|
427
|
-
#
|
442
|
+
# containing the same code.
|
443
|
+
#
|
444
|
+
# This parameter is used for defining User Defined Function (UDF)
|
445
|
+
# resources only when using legacy SQL. Users of standard SQL should
|
446
|
+
# leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
|
447
|
+
# Routines API to define UDF resources.
|
448
|
+
#
|
449
|
+
# For additional information on migrating, see: [Migrating to
|
450
|
+
# standard SQL - Differences in user-defined JavaScript
|
451
|
+
# functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
|
428
452
|
# @param [Integer] maximum_billing_tier Deprecated: Change the billing
|
429
453
|
# tier to allow high-compute queries.
|
430
454
|
# @yield [job] a job configuration object
|
@@ -709,9 +733,12 @@ module Google
|
|
709
733
|
# sql = "SELECT name FROM `my_project.my_dataset.my_table`"
|
710
734
|
# data = bigquery.query sql
|
711
735
|
#
|
736
|
+
# # Iterate over the first page of results
|
712
737
|
# data.each do |row|
|
713
738
|
# puts row[:name]
|
714
739
|
# end
|
740
|
+
# # Retrieve the next page of results
|
741
|
+
# data = data.next if data.next?
|
715
742
|
#
|
716
743
|
# @example Query using legacy SQL:
|
717
744
|
# require "google/cloud/bigquery"
|
@@ -721,9 +748,12 @@ module Google
|
|
721
748
|
# sql = "SELECT name FROM [my_project:my_dataset.my_table]"
|
722
749
|
# data = bigquery.query sql, legacy_sql: true
|
723
750
|
#
|
751
|
+
# # Iterate over the first page of results
|
724
752
|
# data.each do |row|
|
725
753
|
# puts row[:name]
|
726
754
|
# end
|
755
|
+
# # Retrieve the next page of results
|
756
|
+
# data = data.next if data.next?
|
727
757
|
#
|
728
758
|
# @example Retrieve all rows: (See {Data#all})
|
729
759
|
# require "google/cloud/bigquery"
|
@@ -746,9 +776,12 @@ module Google
|
|
746
776
|
# "WHERE id = ?",
|
747
777
|
# params: [1]
|
748
778
|
#
|
779
|
+
# # Iterate over the first page of results
|
749
780
|
# data.each do |row|
|
750
781
|
# puts row[:name]
|
751
782
|
# end
|
783
|
+
# # Retrieve the next page of results
|
784
|
+
# data = data.next if data.next?
|
752
785
|
#
|
753
786
|
# @example Query using named query parameters:
|
754
787
|
# require "google/cloud/bigquery"
|
@@ -760,9 +793,12 @@ module Google
|
|
760
793
|
# "WHERE id = @id",
|
761
794
|
# params: { id: 1 }
|
762
795
|
#
|
796
|
+
# # Iterate over the first page of results
|
763
797
|
# data.each do |row|
|
764
798
|
# puts row[:name]
|
765
799
|
# end
|
800
|
+
# # Retrieve the next page of results
|
801
|
+
# data = data.next if data.next?
|
766
802
|
#
|
767
803
|
# @example Query using named query parameters with types:
|
768
804
|
# require "google/cloud/bigquery"
|
@@ -775,9 +811,12 @@ module Google
|
|
775
811
|
# params: { ids: [] },
|
776
812
|
# types: { ids: [:INT64] }
|
777
813
|
#
|
814
|
+
# # Iterate over the first page of results
|
778
815
|
# data.each do |row|
|
779
816
|
# puts row[:name]
|
780
817
|
# end
|
818
|
+
# # Retrieve the next page of results
|
819
|
+
# data = data.next if data.next?
|
781
820
|
#
|
782
821
|
# @example Execute a DDL statement:
|
783
822
|
# require "google/cloud/bigquery"
|
@@ -816,9 +855,12 @@ module Google
|
|
816
855
|
# query.table = dataset.table "my_table", skip_lookup: true
|
817
856
|
# end
|
818
857
|
#
|
858
|
+
# # Iterate over the first page of results
|
819
859
|
# data.each do |row|
|
820
860
|
# puts row[:name]
|
821
861
|
# end
|
862
|
+
# # Retrieve the next page of results
|
863
|
+
# data = data.next if data.next?
|
822
864
|
#
|
823
865
|
def query query, params: nil, types: nil, external: nil, max: nil, cache: true, dataset: nil, project: nil,
|
824
866
|
standard_sql: nil, legacy_sql: nil, &block
|
@@ -880,9 +922,12 @@ module Google
|
|
880
922
|
# data = bigquery.query "SELECT * FROM my_ext_table",
|
881
923
|
# external: { my_ext_table: csv_table }
|
882
924
|
#
|
925
|
+
# # Iterate over the first page of results
|
883
926
|
# data.each do |row|
|
884
927
|
# puts row[:name]
|
885
928
|
# end
|
929
|
+
# # Retrieve the next page of results
|
930
|
+
# data = data.next if data.next?
|
886
931
|
#
|
887
932
|
def external url, format: nil
|
888
933
|
ext = External.from_urls url, format
|
@@ -1276,9 +1321,12 @@ module Google
|
|
1276
1321
|
# "WHERE time_of_date = @time",
|
1277
1322
|
# params: { time: fourpm }
|
1278
1323
|
#
|
1324
|
+
# # Iterate over the first page of results
|
1279
1325
|
# data.each do |row|
|
1280
1326
|
# puts row[:name]
|
1281
1327
|
# end
|
1328
|
+
# # Retrieve the next page of results
|
1329
|
+
# data = data.next if data.next?
|
1282
1330
|
#
|
1283
1331
|
# @example Create Time with fractional seconds:
|
1284
1332
|
# require "google/cloud/bigquery"
|
@@ -1291,9 +1339,12 @@ module Google
|
|
1291
1339
|
# "WHERE time_of_date >= @time",
|
1292
1340
|
# params: { time: precise_time }
|
1293
1341
|
#
|
1342
|
+
# # Iterate over the first page of results
|
1294
1343
|
# data.each do |row|
|
1295
1344
|
# puts row[:name]
|
1296
1345
|
# end
|
1346
|
+
# # Retrieve the next page of results
|
1347
|
+
# data = data.next if data.next?
|
1297
1348
|
#
|
1298
1349
|
def time hour, minute, second
|
1299
1350
|
Bigquery::Time.new "#{hour}:#{minute}:#{second}"
|
@@ -1410,46 +1461,58 @@ module Google
|
|
1410
1461
|
end
|
1411
1462
|
|
1412
1463
|
##
|
1413
|
-
# Extracts the data from
|
1414
|
-
#
|
1415
|
-
#
|
1416
|
-
# calling {Job#reload!} and {Job#done?} to detect when the job
|
1417
|
-
# or simply block until the job is done by calling
|
1464
|
+
# Extracts the data from a table or exports a model to Google Cloud Storage
|
1465
|
+
# asynchronously, immediately returning an {ExtractJob} that can be used to
|
1466
|
+
# track the progress of the export job. The caller may poll the service by
|
1467
|
+
# repeatedly calling {Job#reload!} and {Job#done?} to detect when the job
|
1468
|
+
# is done, or simply block until the job is done by calling
|
1418
1469
|
# #{Job#wait_until_done!}. See {#extract} for the synchronous version.
|
1419
|
-
#
|
1420
|
-
#
|
1470
|
+
#
|
1471
|
+
# Use this method instead of {Table#extract_job} or {Model#extract_job} to
|
1472
|
+
# extract data from source tables or models in other projects.
|
1421
1473
|
#
|
1422
1474
|
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1423
1475
|
# {ExtractJob::Updater#location=} in a block passed to this method.
|
1424
1476
|
#
|
1425
|
-
# @see https://cloud.google.com/bigquery/exporting-data
|
1426
|
-
# Exporting
|
1477
|
+
# @see https://cloud.google.com/bigquery/docs/exporting-data
|
1478
|
+
# Exporting table data
|
1479
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
1480
|
+
# Exporting models
|
1427
1481
|
#
|
1428
|
-
# @param [
|
1429
|
-
#
|
1430
|
-
# [Standard SQL Query
|
1482
|
+
# @param [Table, Model, String] source The source table or model for
|
1483
|
+
# the extract operation. This can be a table or model object; or a
|
1484
|
+
# table ID string as specified by the [Standard SQL Query
|
1431
1485
|
# Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
|
1432
1486
|
# (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
|
1433
1487
|
# Reference](https://cloud.google.com/bigquery/query-reference#from)
|
1434
1488
|
# (`project-name:dataset_id.table_id`).
|
1435
1489
|
# @param [Google::Cloud::Storage::File, String, Array<String>]
|
1436
1490
|
# extract_url The Google Storage file or file URI pattern(s) to which
|
1437
|
-
# BigQuery should extract
|
1438
|
-
#
|
1439
|
-
#
|
1491
|
+
# BigQuery should extract. For a model export this value should be a
|
1492
|
+
# string ending in an object name prefix, since multiple objects will
|
1493
|
+
# be exported.
|
1494
|
+
# @param [String] format The exported file format. The default value for
|
1495
|
+
# tables is `csv`. Tables with nested or repeated fields cannot be
|
1496
|
+
# exported as CSV. The default value for models is `ml_tf_saved_model`.
|
1440
1497
|
#
|
1441
|
-
#
|
1498
|
+
# Supported values for tables:
|
1442
1499
|
#
|
1443
1500
|
# * `csv` - CSV
|
1444
1501
|
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
1445
1502
|
# * `avro` - [Avro](http://avro.apache.org/)
|
1503
|
+
#
|
1504
|
+
# Supported values for models:
|
1505
|
+
#
|
1506
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
1507
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
1446
1508
|
# @param [String] compression The compression type to use for exported
|
1447
1509
|
# files. Possible values include `GZIP` and `NONE`. The default value
|
1448
|
-
# is `NONE`.
|
1510
|
+
# is `NONE`. Not applicable when extracting models.
|
1449
1511
|
# @param [String] delimiter Delimiter to use between fields in the
|
1450
|
-
# exported data. Default is
|
1451
|
-
#
|
1452
|
-
#
|
1512
|
+
# exported table data. Default is `,`. Not applicable when extracting
|
1513
|
+
# models.
|
1514
|
+
# @param [Boolean] header Whether to print out a header row in table
|
1515
|
+
# exports. Default is `true`. Not applicable when extracting models.
|
1453
1516
|
# @param [String] job_id A user-defined ID for the extract job. The ID
|
1454
1517
|
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
1455
1518
|
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
@@ -1466,40 +1529,60 @@ module Google
|
|
1466
1529
|
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
1467
1530
|
# be used.
|
1468
1531
|
# @param [Hash] labels A hash of user-provided labels associated with
|
1469
|
-
# the job. You can use these to organize and group your jobs.
|
1470
|
-
#
|
1471
|
-
#
|
1472
|
-
#
|
1473
|
-
#
|
1474
|
-
#
|
1475
|
-
#
|
1532
|
+
# the job. You can use these to organize and group your jobs.
|
1533
|
+
#
|
1534
|
+
# The labels applied to a resource must meet the following requirements:
|
1535
|
+
#
|
1536
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
1537
|
+
# * Each label must be a key-value pair.
|
1538
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
1539
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
1540
|
+
# a maximum length of 63 characters.
|
1541
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
1542
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
1543
|
+
# international characters are allowed.
|
1544
|
+
# * The key portion of a label must be unique. However, you can use the
|
1545
|
+
# same key with multiple resources.
|
1546
|
+
# * Keys must start with a lowercase letter or international character.
|
1476
1547
|
# @yield [job] a job configuration object
|
1477
1548
|
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
1478
1549
|
# configuration object for setting additional options.
|
1479
1550
|
#
|
1480
1551
|
# @return [Google::Cloud::Bigquery::ExtractJob]
|
1481
1552
|
#
|
1482
|
-
# @example
|
1553
|
+
# @example Export table data
|
1483
1554
|
# require "google/cloud/bigquery"
|
1484
1555
|
#
|
1485
1556
|
# bigquery = Google::Cloud::Bigquery.new
|
1486
1557
|
#
|
1487
1558
|
# table_id = "bigquery-public-data.samples.shakespeare"
|
1488
|
-
# extract_job = bigquery.extract_job table_id,
|
1489
|
-
# "gs://my-bucket/shakespeare.csv"
|
1559
|
+
# extract_job = bigquery.extract_job table_id, "gs://my-bucket/shakespeare.csv"
|
1490
1560
|
# extract_job.wait_until_done!
|
1491
1561
|
# extract_job.done? #=> true
|
1492
1562
|
#
|
1563
|
+
# @example Export a model
|
1564
|
+
# require "google/cloud/bigquery"
|
1565
|
+
#
|
1566
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1567
|
+
# dataset = bigquery.dataset "my_dataset"
|
1568
|
+
# model = dataset.model "my_model"
|
1569
|
+
#
|
1570
|
+
# extract_job = bigquery.extract model, "gs://my-bucket/#{model.model_id}"
|
1571
|
+
#
|
1493
1572
|
# @!group Data
|
1494
1573
|
#
|
1495
|
-
def extract_job
|
1574
|
+
def extract_job source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, job_id: nil,
|
1496
1575
|
prefix: nil, labels: nil
|
1497
1576
|
ensure_service!
|
1498
1577
|
options = { format: format, compression: compression, delimiter: delimiter, header: header, job_id: job_id,
|
1499
1578
|
prefix: prefix, labels: labels }
|
1579
|
+
source_ref = if source.respond_to? :model_ref
|
1580
|
+
source.model_ref
|
1581
|
+
else
|
1582
|
+
Service.get_table_ref source, default_ref: project_ref
|
1583
|
+
end
|
1500
1584
|
|
1501
|
-
|
1502
|
-
updater = ExtractJob::Updater.from_options service, table_ref, extract_url, options
|
1585
|
+
updater = ExtractJob::Updater.from_options service, source_ref, extract_url, options
|
1503
1586
|
|
1504
1587
|
yield updater if block_given?
|
1505
1588
|
|
@@ -1509,51 +1592,63 @@ module Google
|
|
1509
1592
|
end
|
1510
1593
|
|
1511
1594
|
##
|
1512
|
-
# Extracts the data from
|
1513
|
-
#
|
1595
|
+
# Extracts the data from a table or exports a model to Google Cloud Storage
|
1596
|
+
# using a synchronous method that blocks for a response. Timeouts
|
1514
1597
|
# and transient errors are generally handled as needed to complete the
|
1515
|
-
# job. See {#extract_job} for the asynchronous version.
|
1516
|
-
#
|
1517
|
-
#
|
1598
|
+
# job. See {#extract_job} for the asynchronous version.
|
1599
|
+
#
|
1600
|
+
# Use this method instead of {Table#extract} or {Model#extract} to
|
1601
|
+
# extract data from source tables or models in other projects.
|
1518
1602
|
#
|
1519
1603
|
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1520
1604
|
# {ExtractJob::Updater#location=} in a block passed to this method.
|
1521
1605
|
#
|
1522
|
-
# @see https://cloud.google.com/bigquery/exporting-data
|
1523
|
-
# Exporting
|
1606
|
+
# @see https://cloud.google.com/bigquery/docs/exporting-data
|
1607
|
+
# Exporting table data
|
1608
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
1609
|
+
# Exporting models
|
1524
1610
|
#
|
1525
|
-
# @param [
|
1526
|
-
#
|
1527
|
-
# [Standard SQL Query
|
1611
|
+
# @param [Table, Model, String] source The source table or model for
|
1612
|
+
# the extract operation. This can be a table or model object; or a
|
1613
|
+
# table ID string as specified by the [Standard SQL Query
|
1528
1614
|
# Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
|
1529
1615
|
# (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
|
1530
1616
|
# Reference](https://cloud.google.com/bigquery/query-reference#from)
|
1531
1617
|
# (`project-name:dataset_id.table_id`).
|
1532
1618
|
# @param [Google::Cloud::Storage::File, String, Array<String>]
|
1533
1619
|
# extract_url The Google Storage file or file URI pattern(s) to which
|
1534
|
-
# BigQuery should extract
|
1535
|
-
#
|
1536
|
-
#
|
1620
|
+
# BigQuery should extract. For a model export this value should be a
|
1621
|
+
# string ending in an object name prefix, since multiple objects will
|
1622
|
+
# be exported.
|
1623
|
+
# @param [String] format The exported file format. The default value for
|
1624
|
+
# tables is `csv`. Tables with nested or repeated fields cannot be
|
1625
|
+
# exported as CSV. The default value for models is `ml_tf_saved_model`.
|
1537
1626
|
#
|
1538
|
-
#
|
1627
|
+
# Supported values for tables:
|
1539
1628
|
#
|
1540
1629
|
# * `csv` - CSV
|
1541
1630
|
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
1542
1631
|
# * `avro` - [Avro](http://avro.apache.org/)
|
1632
|
+
#
|
1633
|
+
# Supported values for models:
|
1634
|
+
#
|
1635
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
1636
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
1543
1637
|
# @param [String] compression The compression type to use for exported
|
1544
1638
|
# files. Possible values include `GZIP` and `NONE`. The default value
|
1545
|
-
# is `NONE`.
|
1639
|
+
# is `NONE`. Not applicable when extracting models.
|
1546
1640
|
# @param [String] delimiter Delimiter to use between fields in the
|
1547
|
-
# exported data. Default is
|
1548
|
-
#
|
1549
|
-
#
|
1641
|
+
# exported table data. Default is `,`. Not applicable when extracting
|
1642
|
+
# models.
|
1643
|
+
# @param [Boolean] header Whether to print out a header row in table
|
1644
|
+
# exports. Default is `true`. Not applicable when extracting models.
|
1550
1645
|
# @yield [job] a job configuration object
|
1551
1646
|
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
1552
1647
|
# configuration object for setting additional options.
|
1553
1648
|
#
|
1554
1649
|
# @return [Boolean] Returns `true` if the extract operation succeeded.
|
1555
1650
|
#
|
1556
|
-
# @example
|
1651
|
+
# @example Export table data
|
1557
1652
|
# require "google/cloud/bigquery"
|
1558
1653
|
#
|
1559
1654
|
# bigquery = Google::Cloud::Bigquery.new
|
@@ -1561,10 +1656,19 @@ module Google
|
|
1561
1656
|
# bigquery.extract "bigquery-public-data.samples.shakespeare",
|
1562
1657
|
# "gs://my-bucket/shakespeare.csv"
|
1563
1658
|
#
|
1659
|
+
# @example Export a model
|
1660
|
+
# require "google/cloud/bigquery"
|
1661
|
+
#
|
1662
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1663
|
+
# dataset = bigquery.dataset "my_dataset"
|
1664
|
+
# model = dataset.model "my_model"
|
1665
|
+
#
|
1666
|
+
# bigquery.extract model, "gs://my-bucket/#{model.model_id}"
|
1667
|
+
#
|
1564
1668
|
# @!group Data
|
1565
1669
|
#
|
1566
|
-
def extract
|
1567
|
-
job = extract_job
|
1670
|
+
def extract source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, &block
|
1671
|
+
job = extract_job source, extract_url,
|
1568
1672
|
format: format,
|
1569
1673
|
compression: compression,
|
1570
1674
|
delimiter: delimiter,
|