google-cloud-bigquery 1.20.0 → 1.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +39 -0
- data/TROUBLESHOOTING.md +2 -8
- data/lib/google-cloud-bigquery.rb +9 -2
- data/lib/google/cloud/bigquery.rb +1 -1
- data/lib/google/cloud/bigquery/convert.rb +3 -1
- data/lib/google/cloud/bigquery/copy_job.rb +15 -6
- data/lib/google/cloud/bigquery/data.rb +12 -0
- data/lib/google/cloud/bigquery/dataset.rb +85 -28
- data/lib/google/cloud/bigquery/external.rb +24 -0
- data/lib/google/cloud/bigquery/extract_job.rb +153 -45
- data/lib/google/cloud/bigquery/job.rb +198 -0
- data/lib/google/cloud/bigquery/load_job.rb +15 -6
- data/lib/google/cloud/bigquery/model.rb +164 -8
- data/lib/google/cloud/bigquery/project.rb +242 -84
- data/lib/google/cloud/bigquery/query_job.rb +56 -6
- data/lib/google/cloud/bigquery/service.rb +19 -13
- data/lib/google/cloud/bigquery/table.rb +82 -41
- data/lib/google/cloud/bigquery/time.rb +6 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +5 -5
@@ -1303,12 +1303,21 @@ module Google
|
|
1303
1303
|
# Sets the labels to use for the load job.
|
1304
1304
|
#
|
1305
1305
|
# @param [Hash] val A hash of user-provided labels associated with
|
1306
|
-
# the job. You can use these to organize and group your jobs.
|
1307
|
-
#
|
1308
|
-
#
|
1309
|
-
#
|
1310
|
-
#
|
1311
|
-
#
|
1306
|
+
# the job. You can use these to organize and group your jobs.
|
1307
|
+
#
|
1308
|
+
# The labels applied to a resource must meet the following requirements:
|
1309
|
+
#
|
1310
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
1311
|
+
# * Each label must be a key-value pair.
|
1312
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
1313
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
1314
|
+
# a maximum length of 63 characters.
|
1315
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
1316
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
1317
|
+
# international characters are allowed.
|
1318
|
+
# * The key portion of a label must be unique. However, you can use the
|
1319
|
+
# same key with multiple resources.
|
1320
|
+
# * Keys must start with a lowercase letter or international character.
|
1312
1321
|
#
|
1313
1322
|
# @!group Attributes
|
1314
1323
|
#
|
@@ -341,14 +341,19 @@ module Google
|
|
341
341
|
# the update to comply with ETag-based optimistic concurrency control.
|
342
342
|
#
|
343
343
|
# @param [Hash<String, String>] new_labels A hash containing key/value
|
344
|
-
# pairs.
|
345
|
-
#
|
346
|
-
# *
|
347
|
-
# *
|
348
|
-
#
|
349
|
-
#
|
350
|
-
#
|
351
|
-
# *
|
344
|
+
# pairs. The labels applied to a resource must meet the following requirements:
|
345
|
+
#
|
346
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
347
|
+
# * Each label must be a key-value pair.
|
348
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
349
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
350
|
+
# a maximum length of 63 characters.
|
351
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
352
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
353
|
+
# international characters are allowed.
|
354
|
+
# * The key portion of a label must be unique. However, you can use the
|
355
|
+
# same key with multiple resources.
|
356
|
+
# * Keys must start with a lowercase letter or international character.
|
352
357
|
#
|
353
358
|
# @example
|
354
359
|
# require "google/cloud/bigquery"
|
@@ -482,6 +487,146 @@ module Google
|
|
482
487
|
Array @gapi_json[:trainingRuns]
|
483
488
|
end
|
484
489
|
|
490
|
+
##
|
491
|
+
# Exports the model to Google Cloud Storage asynchronously, immediately
|
492
|
+
# returning an {ExtractJob} that can be used to track the progress of the
|
493
|
+
# export job. The caller may poll the service by repeatedly calling
|
494
|
+
# {Job#reload!} and {Job#done?} to detect when the job is done, or
|
495
|
+
# simply block until the job is done by calling #{Job#wait_until_done!}.
|
496
|
+
# See also {#extract}.
|
497
|
+
#
|
498
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
499
|
+
# {ExtractJob::Updater#location=} in a block passed to this method. If
|
500
|
+
# the model is a full resource representation (see {#resource_full?}),
|
501
|
+
# the location of the job will automatically be set to the location of
|
502
|
+
# the model.
|
503
|
+
#
|
504
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
505
|
+
# Exporting models
|
506
|
+
#
|
507
|
+
# @param [String] extract_url The Google Storage URI to which BigQuery
|
508
|
+
# should extract the model. This value should be end in an object name
|
509
|
+
# prefix, since multiple objects will be exported.
|
510
|
+
# @param [String] format The exported file format. The default value is
|
511
|
+
# `ml_tf_saved_model`.
|
512
|
+
#
|
513
|
+
# The following values are supported:
|
514
|
+
#
|
515
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
516
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
517
|
+
# @param [String] job_id A user-defined ID for the extract job. The ID
|
518
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
519
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
520
|
+
# `job_id` is provided, then `prefix` will not be used.
|
521
|
+
#
|
522
|
+
# See [Generating a job
|
523
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
524
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
525
|
+
# prepended to a generated value to produce a unique job ID. For
|
526
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
527
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
528
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
529
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
530
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
531
|
+
# be used.
|
532
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
533
|
+
# the job. You can use these to organize and group your jobs.
|
534
|
+
#
|
535
|
+
# The labels applied to a resource must meet the following requirements:
|
536
|
+
#
|
537
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
538
|
+
# * Each label must be a key-value pair.
|
539
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
540
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
541
|
+
# a maximum length of 63 characters.
|
542
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
543
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
544
|
+
# international characters are allowed.
|
545
|
+
# * The key portion of a label must be unique. However, you can use the
|
546
|
+
# same key with multiple resources.
|
547
|
+
# * Keys must start with a lowercase letter or international character.
|
548
|
+
#
|
549
|
+
# @yield [job] a job configuration object
|
550
|
+
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
551
|
+
# configuration object for setting additional options.
|
552
|
+
#
|
553
|
+
# @return [Google::Cloud::Bigquery::ExtractJob]
|
554
|
+
#
|
555
|
+
# @example
|
556
|
+
# require "google/cloud/bigquery"
|
557
|
+
#
|
558
|
+
# bigquery = Google::Cloud::Bigquery.new
|
559
|
+
# dataset = bigquery.dataset "my_dataset"
|
560
|
+
# model = dataset.model "my_model"
|
561
|
+
#
|
562
|
+
# extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
|
563
|
+
#
|
564
|
+
# extract_job.wait_until_done!
|
565
|
+
# extract_job.done? #=> true
|
566
|
+
#
|
567
|
+
# @!group Data
|
568
|
+
#
|
569
|
+
def extract_job extract_url, format: nil, job_id: nil, prefix: nil, labels: nil
|
570
|
+
ensure_service!
|
571
|
+
options = { format: format, job_id: job_id, prefix: prefix, labels: labels }
|
572
|
+
updater = ExtractJob::Updater.from_options service, model_ref, extract_url, options
|
573
|
+
updater.location = location if location # may be model reference
|
574
|
+
|
575
|
+
yield updater if block_given?
|
576
|
+
|
577
|
+
job_gapi = updater.to_gapi
|
578
|
+
gapi = service.extract_table job_gapi
|
579
|
+
Job.from_gapi gapi, service
|
580
|
+
end
|
581
|
+
|
582
|
+
##
|
583
|
+
# Exports the model to Google Cloud Storage using a synchronous method
|
584
|
+
# that blocks for a response. Timeouts and transient errors are generally
|
585
|
+
# handled as needed to complete the job. See also {#extract_job}.
|
586
|
+
#
|
587
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
588
|
+
# {ExtractJob::Updater#location=} in a block passed to this method. If
|
589
|
+
# the model is a full resource representation (see {#resource_full?}),
|
590
|
+
# the location of the job will automatically be set to the location of
|
591
|
+
# the model.
|
592
|
+
#
|
593
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
594
|
+
# Exporting models
|
595
|
+
#
|
596
|
+
# @param [String] extract_url The Google Storage URI to which BigQuery
|
597
|
+
# should extract the model. This value should be end in an object name
|
598
|
+
# prefix, since multiple objects will be exported.
|
599
|
+
# @param [String] format The exported file format. The default value is
|
600
|
+
# `ml_tf_saved_model`.
|
601
|
+
#
|
602
|
+
# The following values are supported:
|
603
|
+
#
|
604
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
605
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
606
|
+
# @yield [job] a job configuration object
|
607
|
+
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
608
|
+
# configuration object for setting additional options.
|
609
|
+
#
|
610
|
+
# @return [Boolean] Returns `true` if the extract operation succeeded.
|
611
|
+
#
|
612
|
+
# @example
|
613
|
+
# require "google/cloud/bigquery"
|
614
|
+
#
|
615
|
+
# bigquery = Google::Cloud::Bigquery.new
|
616
|
+
# dataset = bigquery.dataset "my_dataset"
|
617
|
+
# model = dataset.model "my_model"
|
618
|
+
#
|
619
|
+
# model.extract "gs://my-bucket/#{model.model_id}"
|
620
|
+
#
|
621
|
+
# @!group Data
|
622
|
+
#
|
623
|
+
def extract extract_url, format: nil, &block
|
624
|
+
job = extract_job extract_url, format: format, &block
|
625
|
+
job.wait_until_done!
|
626
|
+
ensure_job_succeeded! job
|
627
|
+
true
|
628
|
+
end
|
629
|
+
|
485
630
|
##
|
486
631
|
# Permanently deletes the model.
|
487
632
|
#
|
@@ -734,6 +879,17 @@ module Google
|
|
734
879
|
def ensure_full_data!
|
735
880
|
reload! unless resource_full?
|
736
881
|
end
|
882
|
+
|
883
|
+
def ensure_job_succeeded! job
|
884
|
+
return unless job.failed?
|
885
|
+
begin
|
886
|
+
# raise to activate ruby exception cause handling
|
887
|
+
raise job.gapi_error
|
888
|
+
rescue StandardError => e
|
889
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
890
|
+
raise Google::Cloud::Error.from_error(e)
|
891
|
+
end
|
892
|
+
end
|
737
893
|
end
|
738
894
|
end
|
739
895
|
end
|
@@ -153,13 +153,21 @@ module Google
|
|
153
153
|
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
154
154
|
# be used.
|
155
155
|
# @param [Hash] labels A hash of user-provided labels associated with
|
156
|
-
# the job. You can use these to organize and group your jobs.
|
157
|
-
#
|
158
|
-
#
|
159
|
-
#
|
160
|
-
#
|
161
|
-
#
|
162
|
-
#
|
156
|
+
# the job. You can use these to organize and group your jobs.
|
157
|
+
#
|
158
|
+
# The labels applied to a resource must meet the following requirements:
|
159
|
+
#
|
160
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
161
|
+
# * Each label must be a key-value pair.
|
162
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
163
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
164
|
+
# a maximum length of 63 characters.
|
165
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
166
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
167
|
+
# international characters are allowed.
|
168
|
+
# * The key portion of a label must be unique. However, you can use the
|
169
|
+
# same key with multiple resources.
|
170
|
+
# * Keys must start with a lowercase letter or international character.
|
163
171
|
# @yield [job] a job configuration object
|
164
172
|
# @yieldparam [Google::Cloud::Bigquery::CopyJob::Updater] job a job
|
165
173
|
# configuration object for setting additional options.
|
@@ -411,20 +419,36 @@ module Google
|
|
411
419
|
# See [Generating a job
|
412
420
|
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
413
421
|
# @param [Hash] labels A hash of user-provided labels associated with
|
414
|
-
# the job. You can use these to organize and group your jobs.
|
415
|
-
#
|
416
|
-
#
|
417
|
-
#
|
418
|
-
#
|
419
|
-
#
|
420
|
-
#
|
422
|
+
# the job. You can use these to organize and group your jobs.
|
423
|
+
#
|
424
|
+
# The labels applied to a resource must meet the following requirements:
|
425
|
+
#
|
426
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
427
|
+
# * Each label must be a key-value pair.
|
428
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
429
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
430
|
+
# a maximum length of 63 characters.
|
431
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
432
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
433
|
+
# international characters are allowed.
|
434
|
+
# * The key portion of a label must be unique. However, you can use the
|
435
|
+
# same key with multiple resources.
|
436
|
+
# * Keys must start with a lowercase letter or international character.
|
421
437
|
# @param [Array<String>, String] udfs User-defined function resources
|
422
|
-
# used in
|
423
|
-
# Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
438
|
+
# used in a legacy SQL query. May be either a code resource to load from
|
439
|
+
# a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
424
440
|
# that contains code for a user-defined function (UDF). Providing an
|
425
441
|
# inline code resource is equivalent to providing a URI for a file
|
426
|
-
# containing the same code.
|
427
|
-
#
|
442
|
+
# containing the same code.
|
443
|
+
#
|
444
|
+
# This parameter is used for defining User Defined Function (UDF)
|
445
|
+
# resources only when using legacy SQL. Users of standard SQL should
|
446
|
+
# leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
|
447
|
+
# Routines API to define UDF resources.
|
448
|
+
#
|
449
|
+
# For additional information on migrating, see: [Migrating to
|
450
|
+
# standard SQL - Differences in user-defined JavaScript
|
451
|
+
# functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
|
428
452
|
# @param [Integer] maximum_billing_tier Deprecated: Change the billing
|
429
453
|
# tier to allow high-compute queries.
|
430
454
|
# @yield [job] a job configuration object
|
@@ -709,9 +733,12 @@ module Google
|
|
709
733
|
# sql = "SELECT name FROM `my_project.my_dataset.my_table`"
|
710
734
|
# data = bigquery.query sql
|
711
735
|
#
|
736
|
+
# # Iterate over the first page of results
|
712
737
|
# data.each do |row|
|
713
738
|
# puts row[:name]
|
714
739
|
# end
|
740
|
+
# # Retrieve the next page of results
|
741
|
+
# data = data.next if data.next?
|
715
742
|
#
|
716
743
|
# @example Query using legacy SQL:
|
717
744
|
# require "google/cloud/bigquery"
|
@@ -721,9 +748,12 @@ module Google
|
|
721
748
|
# sql = "SELECT name FROM [my_project:my_dataset.my_table]"
|
722
749
|
# data = bigquery.query sql, legacy_sql: true
|
723
750
|
#
|
751
|
+
# # Iterate over the first page of results
|
724
752
|
# data.each do |row|
|
725
753
|
# puts row[:name]
|
726
754
|
# end
|
755
|
+
# # Retrieve the next page of results
|
756
|
+
# data = data.next if data.next?
|
727
757
|
#
|
728
758
|
# @example Retrieve all rows: (See {Data#all})
|
729
759
|
# require "google/cloud/bigquery"
|
@@ -746,9 +776,12 @@ module Google
|
|
746
776
|
# "WHERE id = ?",
|
747
777
|
# params: [1]
|
748
778
|
#
|
779
|
+
# # Iterate over the first page of results
|
749
780
|
# data.each do |row|
|
750
781
|
# puts row[:name]
|
751
782
|
# end
|
783
|
+
# # Retrieve the next page of results
|
784
|
+
# data = data.next if data.next?
|
752
785
|
#
|
753
786
|
# @example Query using named query parameters:
|
754
787
|
# require "google/cloud/bigquery"
|
@@ -760,9 +793,12 @@ module Google
|
|
760
793
|
# "WHERE id = @id",
|
761
794
|
# params: { id: 1 }
|
762
795
|
#
|
796
|
+
# # Iterate over the first page of results
|
763
797
|
# data.each do |row|
|
764
798
|
# puts row[:name]
|
765
799
|
# end
|
800
|
+
# # Retrieve the next page of results
|
801
|
+
# data = data.next if data.next?
|
766
802
|
#
|
767
803
|
# @example Query using named query parameters with types:
|
768
804
|
# require "google/cloud/bigquery"
|
@@ -775,9 +811,12 @@ module Google
|
|
775
811
|
# params: { ids: [] },
|
776
812
|
# types: { ids: [:INT64] }
|
777
813
|
#
|
814
|
+
# # Iterate over the first page of results
|
778
815
|
# data.each do |row|
|
779
816
|
# puts row[:name]
|
780
817
|
# end
|
818
|
+
# # Retrieve the next page of results
|
819
|
+
# data = data.next if data.next?
|
781
820
|
#
|
782
821
|
# @example Execute a DDL statement:
|
783
822
|
# require "google/cloud/bigquery"
|
@@ -816,9 +855,12 @@ module Google
|
|
816
855
|
# query.table = dataset.table "my_table", skip_lookup: true
|
817
856
|
# end
|
818
857
|
#
|
858
|
+
# # Iterate over the first page of results
|
819
859
|
# data.each do |row|
|
820
860
|
# puts row[:name]
|
821
861
|
# end
|
862
|
+
# # Retrieve the next page of results
|
863
|
+
# data = data.next if data.next?
|
822
864
|
#
|
823
865
|
def query query, params: nil, types: nil, external: nil, max: nil, cache: true, dataset: nil, project: nil,
|
824
866
|
standard_sql: nil, legacy_sql: nil, &block
|
@@ -880,9 +922,12 @@ module Google
|
|
880
922
|
# data = bigquery.query "SELECT * FROM my_ext_table",
|
881
923
|
# external: { my_ext_table: csv_table }
|
882
924
|
#
|
925
|
+
# # Iterate over the first page of results
|
883
926
|
# data.each do |row|
|
884
927
|
# puts row[:name]
|
885
928
|
# end
|
929
|
+
# # Retrieve the next page of results
|
930
|
+
# data = data.next if data.next?
|
886
931
|
#
|
887
932
|
def external url, format: nil
|
888
933
|
ext = External.from_urls url, format
|
@@ -1084,18 +1129,22 @@ module Google
|
|
1084
1129
|
# part of the larger set of results to view. Optional.
|
1085
1130
|
# @param [Integer] max Maximum number of jobs to return. Optional.
|
1086
1131
|
# @param [String] filter A filter for job state. Optional.
|
1087
|
-
# @param [Time] min_created_at Min value for {Job#created_at}. When
|
1088
|
-
# provided, only jobs created after or at this time are returned.
|
1089
|
-
# Optional.
|
1090
|
-
# @param [Time] max_created_at Max value for {Job#created_at}. When
|
1091
|
-
# provided, only jobs created before or at this time are returned.
|
1092
|
-
# Optional.
|
1093
1132
|
#
|
1094
1133
|
# Acceptable values are:
|
1095
1134
|
#
|
1096
1135
|
# * `done` - Finished jobs
|
1097
1136
|
# * `pending` - Pending jobs
|
1098
1137
|
# * `running` - Running jobs
|
1138
|
+
# @param [Time] min_created_at Min value for {Job#created_at}. When
|
1139
|
+
# provided, only jobs created after or at this time are returned.
|
1140
|
+
# Optional.
|
1141
|
+
# @param [Time] max_created_at Max value for {Job#created_at}. When
|
1142
|
+
# provided, only jobs created before or at this time are returned.
|
1143
|
+
# Optional.
|
1144
|
+
# @param [Google::Cloud::Bigquery::Job, String] parent_job A job
|
1145
|
+
# object or a job ID. If set, retrieve only child jobs of the
|
1146
|
+
# specified parent. Optional. See {Job#job_id}, {Job#num_child_jobs},
|
1147
|
+
# and {Job#parent_job_id}.
|
1099
1148
|
#
|
1100
1149
|
# @return [Array<Google::Cloud::Bigquery::Job>] (See
|
1101
1150
|
# {Google::Cloud::Bigquery::Job::List})
|
@@ -1144,13 +1193,63 @@ module Google
|
|
1144
1193
|
# # process job
|
1145
1194
|
# end
|
1146
1195
|
#
|
1147
|
-
|
1148
|
-
|
1196
|
+
# @example Retrieve child jobs by setting `parent_job`:
|
1197
|
+
# require "google/cloud/bigquery"
|
1198
|
+
#
|
1199
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1200
|
+
#
|
1201
|
+
# multi_statement_sql = <<~SQL
|
1202
|
+
# -- Declare a variable to hold names as an array.
|
1203
|
+
# DECLARE top_names ARRAY<STRING>;
|
1204
|
+
# -- Build an array of the top 100 names from the year 2017.
|
1205
|
+
# SET top_names = (
|
1206
|
+
# SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
|
1207
|
+
# FROM `bigquery-public-data.usa_names.usa_1910_current`
|
1208
|
+
# WHERE year = 2017
|
1209
|
+
# );
|
1210
|
+
# -- Which names appear as words in Shakespeare's plays?
|
1211
|
+
# SELECT
|
1212
|
+
# name AS shakespeare_name
|
1213
|
+
# FROM UNNEST(top_names) AS name
|
1214
|
+
# WHERE name IN (
|
1215
|
+
# SELECT word
|
1216
|
+
# FROM `bigquery-public-data.samples.shakespeare`
|
1217
|
+
# );
|
1218
|
+
# SQL
|
1219
|
+
#
|
1220
|
+
# job = bigquery.query_job multi_statement_sql
|
1221
|
+
#
|
1222
|
+
# job.wait_until_done!
|
1223
|
+
#
|
1224
|
+
# child_jobs = bigquery.jobs parent_job: job
|
1225
|
+
#
|
1226
|
+
# child_jobs.each do |child_job|
|
1227
|
+
# script_statistics = child_job.script_statistics
|
1228
|
+
# puts script_statistics.evaluation_kind
|
1229
|
+
# script_statistics.stack_frames.each do |stack_frame|
|
1230
|
+
# puts stack_frame.text
|
1231
|
+
# end
|
1232
|
+
# end
|
1233
|
+
#
|
1234
|
+
def jobs all: nil,
|
1235
|
+
token: nil,
|
1236
|
+
max: nil,
|
1237
|
+
filter: nil,
|
1238
|
+
min_created_at: nil,
|
1239
|
+
max_created_at: nil,
|
1240
|
+
parent_job: nil
|
1149
1241
|
ensure_service!
|
1150
|
-
|
1151
|
-
|
1152
|
-
|
1153
|
-
|
1242
|
+
parent_job = parent_job.job_id if parent_job.is_a? Job
|
1243
|
+
options = {
|
1244
|
+
parent_job_id: parent_job,
|
1245
|
+
all: all,
|
1246
|
+
token: token,
|
1247
|
+
max: max, filter: filter,
|
1248
|
+
min_created_at: min_created_at,
|
1249
|
+
max_created_at: max_created_at
|
1250
|
+
}
|
1251
|
+
gapi = service.list_jobs(**options)
|
1252
|
+
Job::List.from_gapi gapi, service, **options
|
1154
1253
|
end
|
1155
1254
|
|
1156
1255
|
##
|
@@ -1222,9 +1321,12 @@ module Google
|
|
1222
1321
|
# "WHERE time_of_date = @time",
|
1223
1322
|
# params: { time: fourpm }
|
1224
1323
|
#
|
1324
|
+
# # Iterate over the first page of results
|
1225
1325
|
# data.each do |row|
|
1226
1326
|
# puts row[:name]
|
1227
1327
|
# end
|
1328
|
+
# # Retrieve the next page of results
|
1329
|
+
# data = data.next if data.next?
|
1228
1330
|
#
|
1229
1331
|
# @example Create Time with fractional seconds:
|
1230
1332
|
# require "google/cloud/bigquery"
|
@@ -1237,9 +1339,12 @@ module Google
|
|
1237
1339
|
# "WHERE time_of_date >= @time",
|
1238
1340
|
# params: { time: precise_time }
|
1239
1341
|
#
|
1342
|
+
# # Iterate over the first page of results
|
1240
1343
|
# data.each do |row|
|
1241
1344
|
# puts row[:name]
|
1242
1345
|
# end
|
1346
|
+
# # Retrieve the next page of results
|
1347
|
+
# data = data.next if data.next?
|
1243
1348
|
#
|
1244
1349
|
def time hour, minute, second
|
1245
1350
|
Bigquery::Time.new "#{hour}:#{minute}:#{second}"
|
@@ -1356,46 +1461,58 @@ module Google
|
|
1356
1461
|
end
|
1357
1462
|
|
1358
1463
|
##
|
1359
|
-
# Extracts the data from
|
1360
|
-
#
|
1361
|
-
#
|
1362
|
-
# calling {Job#reload!} and {Job#done?} to detect when the job
|
1363
|
-
# or simply block until the job is done by calling
|
1464
|
+
# Extracts the data from a table or exports a model to Google Cloud Storage
|
1465
|
+
# asynchronously, immediately returning an {ExtractJob} that can be used to
|
1466
|
+
# track the progress of the export job. The caller may poll the service by
|
1467
|
+
# repeatedly calling {Job#reload!} and {Job#done?} to detect when the job
|
1468
|
+
# is done, or simply block until the job is done by calling
|
1364
1469
|
# #{Job#wait_until_done!}. See {#extract} for the synchronous version.
|
1365
|
-
#
|
1366
|
-
#
|
1470
|
+
#
|
1471
|
+
# Use this method instead of {Table#extract_job} or {Model#extract_job} to
|
1472
|
+
# extract data from source tables or models in other projects.
|
1367
1473
|
#
|
1368
1474
|
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1369
1475
|
# {ExtractJob::Updater#location=} in a block passed to this method.
|
1370
1476
|
#
|
1371
|
-
# @see https://cloud.google.com/bigquery/exporting-data
|
1372
|
-
# Exporting
|
1477
|
+
# @see https://cloud.google.com/bigquery/docs/exporting-data
|
1478
|
+
# Exporting table data
|
1479
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
1480
|
+
# Exporting models
|
1373
1481
|
#
|
1374
|
-
# @param [
|
1375
|
-
#
|
1376
|
-
# [Standard SQL Query
|
1482
|
+
# @param [Table, Model, String] source The source table or model for
|
1483
|
+
# the extract operation. This can be a table or model object; or a
|
1484
|
+
# table ID string as specified by the [Standard SQL Query
|
1377
1485
|
# Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
|
1378
1486
|
# (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
|
1379
1487
|
# Reference](https://cloud.google.com/bigquery/query-reference#from)
|
1380
1488
|
# (`project-name:dataset_id.table_id`).
|
1381
1489
|
# @param [Google::Cloud::Storage::File, String, Array<String>]
|
1382
1490
|
# extract_url The Google Storage file or file URI pattern(s) to which
|
1383
|
-
# BigQuery should extract
|
1384
|
-
#
|
1385
|
-
#
|
1491
|
+
# BigQuery should extract. For a model export this value should be a
|
1492
|
+
# string ending in an object name prefix, since multiple objects will
|
1493
|
+
# be exported.
|
1494
|
+
# @param [String] format The exported file format. The default value for
|
1495
|
+
# tables is `csv`. Tables with nested or repeated fields cannot be
|
1496
|
+
# exported as CSV. The default value for models is `ml_tf_saved_model`.
|
1386
1497
|
#
|
1387
|
-
#
|
1498
|
+
# Supported values for tables:
|
1388
1499
|
#
|
1389
1500
|
# * `csv` - CSV
|
1390
1501
|
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
1391
1502
|
# * `avro` - [Avro](http://avro.apache.org/)
|
1503
|
+
#
|
1504
|
+
# Supported values for models:
|
1505
|
+
#
|
1506
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
1507
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
1392
1508
|
# @param [String] compression The compression type to use for exported
|
1393
1509
|
# files. Possible values include `GZIP` and `NONE`. The default value
|
1394
|
-
# is `NONE`.
|
1510
|
+
# is `NONE`. Not applicable when extracting models.
|
1395
1511
|
# @param [String] delimiter Delimiter to use between fields in the
|
1396
|
-
# exported data. Default is
|
1397
|
-
#
|
1398
|
-
#
|
1512
|
+
# exported table data. Default is `,`. Not applicable when extracting
|
1513
|
+
# models.
|
1514
|
+
# @param [Boolean] header Whether to print out a header row in table
|
1515
|
+
# exports. Default is `true`. Not applicable when extracting models.
|
1399
1516
|
# @param [String] job_id A user-defined ID for the extract job. The ID
|
1400
1517
|
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
1401
1518
|
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
@@ -1412,40 +1529,60 @@ module Google
|
|
1412
1529
|
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
1413
1530
|
# be used.
|
1414
1531
|
# @param [Hash] labels A hash of user-provided labels associated with
|
1415
|
-
# the job. You can use these to organize and group your jobs.
|
1416
|
-
#
|
1417
|
-
#
|
1418
|
-
#
|
1419
|
-
#
|
1420
|
-
#
|
1421
|
-
#
|
1532
|
+
# the job. You can use these to organize and group your jobs.
|
1533
|
+
#
|
1534
|
+
# The labels applied to a resource must meet the following requirements:
|
1535
|
+
#
|
1536
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
1537
|
+
# * Each label must be a key-value pair.
|
1538
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
1539
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
1540
|
+
# a maximum length of 63 characters.
|
1541
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
1542
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
1543
|
+
# international characters are allowed.
|
1544
|
+
# * The key portion of a label must be unique. However, you can use the
|
1545
|
+
# same key with multiple resources.
|
1546
|
+
# * Keys must start with a lowercase letter or international character.
|
1422
1547
|
# @yield [job] a job configuration object
|
1423
1548
|
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
1424
1549
|
# configuration object for setting additional options.
|
1425
1550
|
#
|
1426
1551
|
# @return [Google::Cloud::Bigquery::ExtractJob]
|
1427
1552
|
#
|
1428
|
-
# @example
|
1553
|
+
# @example Export table data
|
1429
1554
|
# require "google/cloud/bigquery"
|
1430
1555
|
#
|
1431
1556
|
# bigquery = Google::Cloud::Bigquery.new
|
1432
1557
|
#
|
1433
1558
|
# table_id = "bigquery-public-data.samples.shakespeare"
|
1434
|
-
# extract_job = bigquery.extract_job table_id,
|
1435
|
-
# "gs://my-bucket/shakespeare.csv"
|
1559
|
+
# extract_job = bigquery.extract_job table_id, "gs://my-bucket/shakespeare.csv"
|
1436
1560
|
# extract_job.wait_until_done!
|
1437
1561
|
# extract_job.done? #=> true
|
1438
1562
|
#
|
1563
|
+
# @example Export a model
|
1564
|
+
# require "google/cloud/bigquery"
|
1565
|
+
#
|
1566
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1567
|
+
# dataset = bigquery.dataset "my_dataset"
|
1568
|
+
# model = dataset.model "my_model"
|
1569
|
+
#
|
1570
|
+
# extract_job = bigquery.extract model, "gs://my-bucket/#{model.model_id}"
|
1571
|
+
#
|
1439
1572
|
# @!group Data
|
1440
1573
|
#
|
1441
|
-
def extract_job
|
1574
|
+
def extract_job source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, job_id: nil,
|
1442
1575
|
prefix: nil, labels: nil
|
1443
1576
|
ensure_service!
|
1444
1577
|
options = { format: format, compression: compression, delimiter: delimiter, header: header, job_id: job_id,
|
1445
1578
|
prefix: prefix, labels: labels }
|
1579
|
+
source_ref = if source.respond_to? :model_ref
|
1580
|
+
source.model_ref
|
1581
|
+
else
|
1582
|
+
Service.get_table_ref source, default_ref: project_ref
|
1583
|
+
end
|
1446
1584
|
|
1447
|
-
|
1448
|
-
updater = ExtractJob::Updater.from_options service, table_ref, extract_url, options
|
1585
|
+
updater = ExtractJob::Updater.from_options service, source_ref, extract_url, options
|
1449
1586
|
|
1450
1587
|
yield updater if block_given?
|
1451
1588
|
|
@@ -1455,51 +1592,63 @@ module Google
|
|
1455
1592
|
end
|
1456
1593
|
|
1457
1594
|
##
|
1458
|
-
# Extracts the data from
|
1459
|
-
#
|
1595
|
+
# Extracts the data from a table or exports a model to Google Cloud Storage
|
1596
|
+
# using a synchronous method that blocks for a response. Timeouts
|
1460
1597
|
# and transient errors are generally handled as needed to complete the
|
1461
|
-
# job. See {#extract_job} for the asynchronous version.
|
1462
|
-
#
|
1463
|
-
#
|
1598
|
+
# job. See {#extract_job} for the asynchronous version.
|
1599
|
+
#
|
1600
|
+
# Use this method instead of {Table#extract} or {Model#extract} to
|
1601
|
+
# extract data from source tables or models in other projects.
|
1464
1602
|
#
|
1465
1603
|
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1466
1604
|
# {ExtractJob::Updater#location=} in a block passed to this method.
|
1467
1605
|
#
|
1468
|
-
# @see https://cloud.google.com/bigquery/exporting-data
|
1469
|
-
# Exporting
|
1606
|
+
# @see https://cloud.google.com/bigquery/docs/exporting-data
|
1607
|
+
# Exporting table data
|
1608
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
1609
|
+
# Exporting models
|
1470
1610
|
#
|
1471
|
-
# @param [
|
1472
|
-
#
|
1473
|
-
# [Standard SQL Query
|
1611
|
+
# @param [Table, Model, String] source The source table or model for
|
1612
|
+
# the extract operation. This can be a table or model object; or a
|
1613
|
+
# table ID string as specified by the [Standard SQL Query
|
1474
1614
|
# Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
|
1475
1615
|
# (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
|
1476
1616
|
# Reference](https://cloud.google.com/bigquery/query-reference#from)
|
1477
1617
|
# (`project-name:dataset_id.table_id`).
|
1478
1618
|
# @param [Google::Cloud::Storage::File, String, Array<String>]
|
1479
1619
|
# extract_url The Google Storage file or file URI pattern(s) to which
|
1480
|
-
# BigQuery should extract
|
1481
|
-
#
|
1482
|
-
#
|
1620
|
+
# BigQuery should extract. For a model export this value should be a
|
1621
|
+
# string ending in an object name prefix, since multiple objects will
|
1622
|
+
# be exported.
|
1623
|
+
# @param [String] format The exported file format. The default value for
|
1624
|
+
# tables is `csv`. Tables with nested or repeated fields cannot be
|
1625
|
+
# exported as CSV. The default value for models is `ml_tf_saved_model`.
|
1483
1626
|
#
|
1484
|
-
#
|
1627
|
+
# Supported values for tables:
|
1485
1628
|
#
|
1486
1629
|
# * `csv` - CSV
|
1487
1630
|
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
1488
1631
|
# * `avro` - [Avro](http://avro.apache.org/)
|
1632
|
+
#
|
1633
|
+
# Supported values for models:
|
1634
|
+
#
|
1635
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
1636
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
1489
1637
|
# @param [String] compression The compression type to use for exported
|
1490
1638
|
# files. Possible values include `GZIP` and `NONE`. The default value
|
1491
|
-
# is `NONE`.
|
1639
|
+
# is `NONE`. Not applicable when extracting models.
|
1492
1640
|
# @param [String] delimiter Delimiter to use between fields in the
|
1493
|
-
# exported data. Default is
|
1494
|
-
#
|
1495
|
-
#
|
1641
|
+
# exported table data. Default is `,`. Not applicable when extracting
|
1642
|
+
# models.
|
1643
|
+
# @param [Boolean] header Whether to print out a header row in table
|
1644
|
+
# exports. Default is `true`. Not applicable when extracting models.
|
1496
1645
|
# @yield [job] a job configuration object
|
1497
1646
|
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
1498
1647
|
# configuration object for setting additional options.
|
1499
1648
|
#
|
1500
1649
|
# @return [Boolean] Returns `true` if the extract operation succeeded.
|
1501
1650
|
#
|
1502
|
-
# @example
|
1651
|
+
# @example Export table data
|
1503
1652
|
# require "google/cloud/bigquery"
|
1504
1653
|
#
|
1505
1654
|
# bigquery = Google::Cloud::Bigquery.new
|
@@ -1507,10 +1656,19 @@ module Google
|
|
1507
1656
|
# bigquery.extract "bigquery-public-data.samples.shakespeare",
|
1508
1657
|
# "gs://my-bucket/shakespeare.csv"
|
1509
1658
|
#
|
1659
|
+
# @example Export a model
|
1660
|
+
# require "google/cloud/bigquery"
|
1661
|
+
#
|
1662
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1663
|
+
# dataset = bigquery.dataset "my_dataset"
|
1664
|
+
# model = dataset.model "my_model"
|
1665
|
+
#
|
1666
|
+
# bigquery.extract model, "gs://my-bucket/#{model.model_id}"
|
1667
|
+
#
|
1510
1668
|
# @!group Data
|
1511
1669
|
#
|
1512
|
-
def extract
|
1513
|
-
job = extract_job
|
1670
|
+
def extract source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, &block
|
1671
|
+
job = extract_job source, extract_url,
|
1514
1672
|
format: format,
|
1515
1673
|
compression: compression,
|
1516
1674
|
delimiter: delimiter,
|