google-cloud-bigquery 1.20.0 → 1.23.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1303,12 +1303,21 @@ module Google
1303
1303
  # Sets the labels to use for the load job.
1304
1304
  #
1305
1305
  # @param [Hash] val A hash of user-provided labels associated with
1306
- # the job. You can use these to organize and group your jobs. Label
1307
- # keys and values can be no longer than 63 characters, can only
1308
- # contain lowercase letters, numeric characters, underscores and
1309
- # dashes. International characters are allowed. Label values are
1310
- # optional. Label keys must start with a letter and each label in
1311
- # the list must have a different key.
1306
+ # the job. You can use these to organize and group your jobs.
1307
+ #
1308
+ # The labels applied to a resource must meet the following requirements:
1309
+ #
1310
+ # * Each resource can have multiple labels, up to a maximum of 64.
1311
+ # * Each label must be a key-value pair.
1312
+ # * Keys have a minimum length of 1 character and a maximum length of
1313
+ # 63 characters, and cannot be empty. Values can be empty, and have
1314
+ # a maximum length of 63 characters.
1315
+ # * Keys and values can contain only lowercase letters, numeric characters,
1316
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
1317
+ # international characters are allowed.
1318
+ # * The key portion of a label must be unique. However, you can use the
1319
+ # same key with multiple resources.
1320
+ # * Keys must start with a lowercase letter or international character.
1312
1321
  #
1313
1322
  # @!group Attributes
1314
1323
  #
@@ -341,14 +341,19 @@ module Google
341
341
  # the update to comply with ETag-based optimistic concurrency control.
342
342
  #
343
343
  # @param [Hash<String, String>] new_labels A hash containing key/value
344
- # pairs.
345
- #
346
- # * Label keys and values can be no longer than 63 characters.
347
- # * Label keys and values can contain only lowercase letters, numbers,
348
- # underscores, hyphens, and international characters.
349
- # * Label keys and values cannot exceed 128 bytes in size.
350
- # * Label keys must begin with a letter.
351
- # * Label keys must be unique within a model.
344
+ # pairs. The labels applied to a resource must meet the following requirements:
345
+ #
346
+ # * Each resource can have multiple labels, up to a maximum of 64.
347
+ # * Each label must be a key-value pair.
348
+ # * Keys have a minimum length of 1 character and a maximum length of
349
+ # 63 characters, and cannot be empty. Values can be empty, and have
350
+ # a maximum length of 63 characters.
351
+ # * Keys and values can contain only lowercase letters, numeric characters,
352
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
353
+ # international characters are allowed.
354
+ # * The key portion of a label must be unique. However, you can use the
355
+ # same key with multiple resources.
356
+ # * Keys must start with a lowercase letter or international character.
352
357
  #
353
358
  # @example
354
359
  # require "google/cloud/bigquery"
@@ -482,6 +487,146 @@ module Google
482
487
  Array @gapi_json[:trainingRuns]
483
488
  end
484
489
 
490
+ ##
491
+ # Exports the model to Google Cloud Storage asynchronously, immediately
492
+ # returning an {ExtractJob} that can be used to track the progress of the
493
+ # export job. The caller may poll the service by repeatedly calling
494
+ # {Job#reload!} and {Job#done?} to detect when the job is done, or
495
+ # simply block until the job is done by calling #{Job#wait_until_done!}.
496
+ # See also {#extract}.
497
+ #
498
+ # The geographic location for the job ("US", "EU", etc.) can be set via
499
+ # {ExtractJob::Updater#location=} in a block passed to this method. If
500
+ # the model is a full resource representation (see {#resource_full?}),
501
+ # the location of the job will automatically be set to the location of
502
+ # the model.
503
+ #
504
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
505
+ # Exporting models
506
+ #
507
+ # @param [String] extract_url The Google Storage URI to which BigQuery
508
+ # should extract the model. This value should be end in an object name
509
+ # prefix, since multiple objects will be exported.
510
+ # @param [String] format The exported file format. The default value is
511
+ # `ml_tf_saved_model`.
512
+ #
513
+ # The following values are supported:
514
+ #
515
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
516
+ # * `ml_xgboost_booster` - XGBoost Booster
517
+ # @param [String] job_id A user-defined ID for the extract job. The ID
518
+ # must contain only letters (a-z, A-Z), numbers (0-9), underscores
519
+ # (_), or dashes (-). The maximum length is 1,024 characters. If
520
+ # `job_id` is provided, then `prefix` will not be used.
521
+ #
522
+ # See [Generating a job
523
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
524
+ # @param [String] prefix A string, usually human-readable, that will be
525
+ # prepended to a generated value to produce a unique job ID. For
526
+ # example, the prefix `daily_import_job_` can be given to generate a
527
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
528
+ # prefix must contain only letters (a-z, A-Z), numbers (0-9),
529
+ # underscores (_), or dashes (-). The maximum length of the entire ID
530
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
531
+ # be used.
532
+ # @param [Hash] labels A hash of user-provided labels associated with
533
+ # the job. You can use these to organize and group your jobs.
534
+ #
535
+ # The labels applied to a resource must meet the following requirements:
536
+ #
537
+ # * Each resource can have multiple labels, up to a maximum of 64.
538
+ # * Each label must be a key-value pair.
539
+ # * Keys have a minimum length of 1 character and a maximum length of
540
+ # 63 characters, and cannot be empty. Values can be empty, and have
541
+ # a maximum length of 63 characters.
542
+ # * Keys and values can contain only lowercase letters, numeric characters,
543
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
544
+ # international characters are allowed.
545
+ # * The key portion of a label must be unique. However, you can use the
546
+ # same key with multiple resources.
547
+ # * Keys must start with a lowercase letter or international character.
548
+ #
549
+ # @yield [job] a job configuration object
550
+ # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
551
+ # configuration object for setting additional options.
552
+ #
553
+ # @return [Google::Cloud::Bigquery::ExtractJob]
554
+ #
555
+ # @example
556
+ # require "google/cloud/bigquery"
557
+ #
558
+ # bigquery = Google::Cloud::Bigquery.new
559
+ # dataset = bigquery.dataset "my_dataset"
560
+ # model = dataset.model "my_model"
561
+ #
562
+ # extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
563
+ #
564
+ # extract_job.wait_until_done!
565
+ # extract_job.done? #=> true
566
+ #
567
+ # @!group Data
568
+ #
569
+ def extract_job extract_url, format: nil, job_id: nil, prefix: nil, labels: nil
570
+ ensure_service!
571
+ options = { format: format, job_id: job_id, prefix: prefix, labels: labels }
572
+ updater = ExtractJob::Updater.from_options service, model_ref, extract_url, options
573
+ updater.location = location if location # may be model reference
574
+
575
+ yield updater if block_given?
576
+
577
+ job_gapi = updater.to_gapi
578
+ gapi = service.extract_table job_gapi
579
+ Job.from_gapi gapi, service
580
+ end
581
+
582
+ ##
583
+ # Exports the model to Google Cloud Storage using a synchronous method
584
+ # that blocks for a response. Timeouts and transient errors are generally
585
+ # handled as needed to complete the job. See also {#extract_job}.
586
+ #
587
+ # The geographic location for the job ("US", "EU", etc.) can be set via
588
+ # {ExtractJob::Updater#location=} in a block passed to this method. If
589
+ # the model is a full resource representation (see {#resource_full?}),
590
+ # the location of the job will automatically be set to the location of
591
+ # the model.
592
+ #
593
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
594
+ # Exporting models
595
+ #
596
+ # @param [String] extract_url The Google Storage URI to which BigQuery
597
+ # should extract the model. This value should be end in an object name
598
+ # prefix, since multiple objects will be exported.
599
+ # @param [String] format The exported file format. The default value is
600
+ # `ml_tf_saved_model`.
601
+ #
602
+ # The following values are supported:
603
+ #
604
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
605
+ # * `ml_xgboost_booster` - XGBoost Booster
606
+ # @yield [job] a job configuration object
607
+ # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
608
+ # configuration object for setting additional options.
609
+ #
610
+ # @return [Boolean] Returns `true` if the extract operation succeeded.
611
+ #
612
+ # @example
613
+ # require "google/cloud/bigquery"
614
+ #
615
+ # bigquery = Google::Cloud::Bigquery.new
616
+ # dataset = bigquery.dataset "my_dataset"
617
+ # model = dataset.model "my_model"
618
+ #
619
+ # model.extract "gs://my-bucket/#{model.model_id}"
620
+ #
621
+ # @!group Data
622
+ #
623
+ def extract extract_url, format: nil, &block
624
+ job = extract_job extract_url, format: format, &block
625
+ job.wait_until_done!
626
+ ensure_job_succeeded! job
627
+ true
628
+ end
629
+
485
630
  ##
486
631
  # Permanently deletes the model.
487
632
  #
@@ -734,6 +879,17 @@ module Google
734
879
  def ensure_full_data!
735
880
  reload! unless resource_full?
736
881
  end
882
+
883
+ def ensure_job_succeeded! job
884
+ return unless job.failed?
885
+ begin
886
+ # raise to activate ruby exception cause handling
887
+ raise job.gapi_error
888
+ rescue StandardError => e
889
+ # wrap Google::Apis::Error with Google::Cloud::Error
890
+ raise Google::Cloud::Error.from_error(e)
891
+ end
892
+ end
737
893
  end
738
894
  end
739
895
  end
@@ -153,13 +153,21 @@ module Google
153
153
  # is 1,024 characters. If `job_id` is provided, then `prefix` will not
154
154
  # be used.
155
155
  # @param [Hash] labels A hash of user-provided labels associated with
156
- # the job. You can use these to organize and group your jobs. Label
157
- # keys and values can be no longer than 63 characters, can only
158
- # contain lowercase letters, numeric characters, underscores and
159
- # dashes. International characters are allowed. Label values are
160
- # optional. Label keys must start with a letter and each label in the
161
- # list must have a different key. See [Requirements for
162
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
156
+ # the job. You can use these to organize and group your jobs.
157
+ #
158
+ # The labels applied to a resource must meet the following requirements:
159
+ #
160
+ # * Each resource can have multiple labels, up to a maximum of 64.
161
+ # * Each label must be a key-value pair.
162
+ # * Keys have a minimum length of 1 character and a maximum length of
163
+ # 63 characters, and cannot be empty. Values can be empty, and have
164
+ # a maximum length of 63 characters.
165
+ # * Keys and values can contain only lowercase letters, numeric characters,
166
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
167
+ # international characters are allowed.
168
+ # * The key portion of a label must be unique. However, you can use the
169
+ # same key with multiple resources.
170
+ # * Keys must start with a lowercase letter or international character.
163
171
  # @yield [job] a job configuration object
164
172
  # @yieldparam [Google::Cloud::Bigquery::CopyJob::Updater] job a job
165
173
  # configuration object for setting additional options.
@@ -411,20 +419,36 @@ module Google
411
419
  # See [Generating a job
412
420
  # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
413
421
  # @param [Hash] labels A hash of user-provided labels associated with
414
- # the job. You can use these to organize and group your jobs. Label
415
- # keys and values can be no longer than 63 characters, can only
416
- # contain lowercase letters, numeric characters, underscores and
417
- # dashes. International characters are allowed. Label values are
418
- # optional. Label keys must start with a letter and each label in the
419
- # list must have a different key. See [Requirements for
420
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
422
+ # the job. You can use these to organize and group your jobs.
423
+ #
424
+ # The labels applied to a resource must meet the following requirements:
425
+ #
426
+ # * Each resource can have multiple labels, up to a maximum of 64.
427
+ # * Each label must be a key-value pair.
428
+ # * Keys have a minimum length of 1 character and a maximum length of
429
+ # 63 characters, and cannot be empty. Values can be empty, and have
430
+ # a maximum length of 63 characters.
431
+ # * Keys and values can contain only lowercase letters, numeric characters,
432
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
433
+ # international characters are allowed.
434
+ # * The key portion of a label must be unique. However, you can use the
435
+ # same key with multiple resources.
436
+ # * Keys must start with a lowercase letter or international character.
421
437
  # @param [Array<String>, String] udfs User-defined function resources
422
- # used in the query. May be either a code resource to load from a
423
- # Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
438
+ # used in a legacy SQL query. May be either a code resource to load from
439
+ # a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
424
440
  # that contains code for a user-defined function (UDF). Providing an
425
441
  # inline code resource is equivalent to providing a URI for a file
426
- # containing the same code. See [User-Defined
427
- # Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
442
+ # containing the same code.
443
+ #
444
+ # This parameter is used for defining User Defined Function (UDF)
445
+ # resources only when using legacy SQL. Users of standard SQL should
446
+ # leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
447
+ # Routines API to define UDF resources.
448
+ #
449
+ # For additional information on migrating, see: [Migrating to
450
+ # standard SQL - Differences in user-defined JavaScript
451
+ # functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
428
452
  # @param [Integer] maximum_billing_tier Deprecated: Change the billing
429
453
  # tier to allow high-compute queries.
430
454
  # @yield [job] a job configuration object
@@ -709,9 +733,12 @@ module Google
709
733
  # sql = "SELECT name FROM `my_project.my_dataset.my_table`"
710
734
  # data = bigquery.query sql
711
735
  #
736
+ # # Iterate over the first page of results
712
737
  # data.each do |row|
713
738
  # puts row[:name]
714
739
  # end
740
+ # # Retrieve the next page of results
741
+ # data = data.next if data.next?
715
742
  #
716
743
  # @example Query using legacy SQL:
717
744
  # require "google/cloud/bigquery"
@@ -721,9 +748,12 @@ module Google
721
748
  # sql = "SELECT name FROM [my_project:my_dataset.my_table]"
722
749
  # data = bigquery.query sql, legacy_sql: true
723
750
  #
751
+ # # Iterate over the first page of results
724
752
  # data.each do |row|
725
753
  # puts row[:name]
726
754
  # end
755
+ # # Retrieve the next page of results
756
+ # data = data.next if data.next?
727
757
  #
728
758
  # @example Retrieve all rows: (See {Data#all})
729
759
  # require "google/cloud/bigquery"
@@ -746,9 +776,12 @@ module Google
746
776
  # "WHERE id = ?",
747
777
  # params: [1]
748
778
  #
779
+ # # Iterate over the first page of results
749
780
  # data.each do |row|
750
781
  # puts row[:name]
751
782
  # end
783
+ # # Retrieve the next page of results
784
+ # data = data.next if data.next?
752
785
  #
753
786
  # @example Query using named query parameters:
754
787
  # require "google/cloud/bigquery"
@@ -760,9 +793,12 @@ module Google
760
793
  # "WHERE id = @id",
761
794
  # params: { id: 1 }
762
795
  #
796
+ # # Iterate over the first page of results
763
797
  # data.each do |row|
764
798
  # puts row[:name]
765
799
  # end
800
+ # # Retrieve the next page of results
801
+ # data = data.next if data.next?
766
802
  #
767
803
  # @example Query using named query parameters with types:
768
804
  # require "google/cloud/bigquery"
@@ -775,9 +811,12 @@ module Google
775
811
  # params: { ids: [] },
776
812
  # types: { ids: [:INT64] }
777
813
  #
814
+ # # Iterate over the first page of results
778
815
  # data.each do |row|
779
816
  # puts row[:name]
780
817
  # end
818
+ # # Retrieve the next page of results
819
+ # data = data.next if data.next?
781
820
  #
782
821
  # @example Execute a DDL statement:
783
822
  # require "google/cloud/bigquery"
@@ -816,9 +855,12 @@ module Google
816
855
  # query.table = dataset.table "my_table", skip_lookup: true
817
856
  # end
818
857
  #
858
+ # # Iterate over the first page of results
819
859
  # data.each do |row|
820
860
  # puts row[:name]
821
861
  # end
862
+ # # Retrieve the next page of results
863
+ # data = data.next if data.next?
822
864
  #
823
865
  def query query, params: nil, types: nil, external: nil, max: nil, cache: true, dataset: nil, project: nil,
824
866
  standard_sql: nil, legacy_sql: nil, &block
@@ -880,9 +922,12 @@ module Google
880
922
  # data = bigquery.query "SELECT * FROM my_ext_table",
881
923
  # external: { my_ext_table: csv_table }
882
924
  #
925
+ # # Iterate over the first page of results
883
926
  # data.each do |row|
884
927
  # puts row[:name]
885
928
  # end
929
+ # # Retrieve the next page of results
930
+ # data = data.next if data.next?
886
931
  #
887
932
  def external url, format: nil
888
933
  ext = External.from_urls url, format
@@ -1084,18 +1129,22 @@ module Google
1084
1129
  # part of the larger set of results to view. Optional.
1085
1130
  # @param [Integer] max Maximum number of jobs to return. Optional.
1086
1131
  # @param [String] filter A filter for job state. Optional.
1087
- # @param [Time] min_created_at Min value for {Job#created_at}. When
1088
- # provided, only jobs created after or at this time are returned.
1089
- # Optional.
1090
- # @param [Time] max_created_at Max value for {Job#created_at}. When
1091
- # provided, only jobs created before or at this time are returned.
1092
- # Optional.
1093
1132
  #
1094
1133
  # Acceptable values are:
1095
1134
  #
1096
1135
  # * `done` - Finished jobs
1097
1136
  # * `pending` - Pending jobs
1098
1137
  # * `running` - Running jobs
1138
+ # @param [Time] min_created_at Min value for {Job#created_at}. When
1139
+ # provided, only jobs created after or at this time are returned.
1140
+ # Optional.
1141
+ # @param [Time] max_created_at Max value for {Job#created_at}. When
1142
+ # provided, only jobs created before or at this time are returned.
1143
+ # Optional.
1144
+ # @param [Google::Cloud::Bigquery::Job, String] parent_job A job
1145
+ # object or a job ID. If set, retrieve only child jobs of the
1146
+ # specified parent. Optional. See {Job#job_id}, {Job#num_child_jobs},
1147
+ # and {Job#parent_job_id}.
1099
1148
  #
1100
1149
  # @return [Array<Google::Cloud::Bigquery::Job>] (See
1101
1150
  # {Google::Cloud::Bigquery::Job::List})
@@ -1144,13 +1193,63 @@ module Google
1144
1193
  # # process job
1145
1194
  # end
1146
1195
  #
1147
- def jobs all: nil, token: nil, max: nil, filter: nil,
1148
- min_created_at: nil, max_created_at: nil
1196
+ # @example Retrieve child jobs by setting `parent_job`:
1197
+ # require "google/cloud/bigquery"
1198
+ #
1199
+ # bigquery = Google::Cloud::Bigquery.new
1200
+ #
1201
+ # multi_statement_sql = <<~SQL
1202
+ # -- Declare a variable to hold names as an array.
1203
+ # DECLARE top_names ARRAY<STRING>;
1204
+ # -- Build an array of the top 100 names from the year 2017.
1205
+ # SET top_names = (
1206
+ # SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
1207
+ # FROM `bigquery-public-data.usa_names.usa_1910_current`
1208
+ # WHERE year = 2017
1209
+ # );
1210
+ # -- Which names appear as words in Shakespeare's plays?
1211
+ # SELECT
1212
+ # name AS shakespeare_name
1213
+ # FROM UNNEST(top_names) AS name
1214
+ # WHERE name IN (
1215
+ # SELECT word
1216
+ # FROM `bigquery-public-data.samples.shakespeare`
1217
+ # );
1218
+ # SQL
1219
+ #
1220
+ # job = bigquery.query_job multi_statement_sql
1221
+ #
1222
+ # job.wait_until_done!
1223
+ #
1224
+ # child_jobs = bigquery.jobs parent_job: job
1225
+ #
1226
+ # child_jobs.each do |child_job|
1227
+ # script_statistics = child_job.script_statistics
1228
+ # puts script_statistics.evaluation_kind
1229
+ # script_statistics.stack_frames.each do |stack_frame|
1230
+ # puts stack_frame.text
1231
+ # end
1232
+ # end
1233
+ #
1234
+ def jobs all: nil,
1235
+ token: nil,
1236
+ max: nil,
1237
+ filter: nil,
1238
+ min_created_at: nil,
1239
+ max_created_at: nil,
1240
+ parent_job: nil
1149
1241
  ensure_service!
1150
- options = { all: all, token: token, max: max, filter: filter, min_created_at: min_created_at,
1151
- max_created_at: max_created_at }
1152
- gapi = service.list_jobs options
1153
- Job::List.from_gapi gapi, service, options
1242
+ parent_job = parent_job.job_id if parent_job.is_a? Job
1243
+ options = {
1244
+ parent_job_id: parent_job,
1245
+ all: all,
1246
+ token: token,
1247
+ max: max, filter: filter,
1248
+ min_created_at: min_created_at,
1249
+ max_created_at: max_created_at
1250
+ }
1251
+ gapi = service.list_jobs(**options)
1252
+ Job::List.from_gapi gapi, service, **options
1154
1253
  end
1155
1254
 
1156
1255
  ##
@@ -1222,9 +1321,12 @@ module Google
1222
1321
  # "WHERE time_of_date = @time",
1223
1322
  # params: { time: fourpm }
1224
1323
  #
1324
+ # # Iterate over the first page of results
1225
1325
  # data.each do |row|
1226
1326
  # puts row[:name]
1227
1327
  # end
1328
+ # # Retrieve the next page of results
1329
+ # data = data.next if data.next?
1228
1330
  #
1229
1331
  # @example Create Time with fractional seconds:
1230
1332
  # require "google/cloud/bigquery"
@@ -1237,9 +1339,12 @@ module Google
1237
1339
  # "WHERE time_of_date >= @time",
1238
1340
  # params: { time: precise_time }
1239
1341
  #
1342
+ # # Iterate over the first page of results
1240
1343
  # data.each do |row|
1241
1344
  # puts row[:name]
1242
1345
  # end
1346
+ # # Retrieve the next page of results
1347
+ # data = data.next if data.next?
1243
1348
  #
1244
1349
  def time hour, minute, second
1245
1350
  Bigquery::Time.new "#{hour}:#{minute}:#{second}"
@@ -1356,46 +1461,58 @@ module Google
1356
1461
  end
1357
1462
 
1358
1463
  ##
1359
- # Extracts the data from the provided table to a Google Cloud Storage
1360
- # file using an asynchronous method. In this method, an {ExtractJob} is
1361
- # immediately returned. The caller may poll the service by repeatedly
1362
- # calling {Job#reload!} and {Job#done?} to detect when the job is done,
1363
- # or simply block until the job is done by calling
1464
+ # Extracts the data from a table or exports a model to Google Cloud Storage
1465
+ # asynchronously, immediately returning an {ExtractJob} that can be used to
1466
+ # track the progress of the export job. The caller may poll the service by
1467
+ # repeatedly calling {Job#reload!} and {Job#done?} to detect when the job
1468
+ # is done, or simply block until the job is done by calling
1364
1469
  # #{Job#wait_until_done!}. See {#extract} for the synchronous version.
1365
- # Use this method instead of {Table#extract_job} to extract data from
1366
- # source tables in other projects.
1470
+ #
1471
+ # Use this method instead of {Table#extract_job} or {Model#extract_job} to
1472
+ # extract data from source tables or models in other projects.
1367
1473
  #
1368
1474
  # The geographic location for the job ("US", "EU", etc.) can be set via
1369
1475
  # {ExtractJob::Updater#location=} in a block passed to this method.
1370
1476
  #
1371
- # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
1372
- # Exporting Data From BigQuery
1477
+ # @see https://cloud.google.com/bigquery/docs/exporting-data
1478
+ # Exporting table data
1479
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
1480
+ # Exporting models
1373
1481
  #
1374
- # @param [String, Table] table The source table from which to extract
1375
- # data. This can be a table object; or a string ID as specified by the
1376
- # [Standard SQL Query
1482
+ # @param [Table, Model, String] source The source table or model for
1483
+ # the extract operation. This can be a table or model object; or a
1484
+ # table ID string as specified by the [Standard SQL Query
1377
1485
  # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
1378
1486
  # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
1379
1487
  # Reference](https://cloud.google.com/bigquery/query-reference#from)
1380
1488
  # (`project-name:dataset_id.table_id`).
1381
1489
  # @param [Google::Cloud::Storage::File, String, Array<String>]
1382
1490
  # extract_url The Google Storage file or file URI pattern(s) to which
1383
- # BigQuery should extract the table data.
1384
- # @param [String] format The exported file format. The default value is
1385
- # `csv`.
1491
+ # BigQuery should extract. For a model export this value should be a
1492
+ # string ending in an object name prefix, since multiple objects will
1493
+ # be exported.
1494
+ # @param [String] format The exported file format. The default value for
1495
+ # tables is `csv`. Tables with nested or repeated fields cannot be
1496
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
1386
1497
  #
1387
- # The following values are supported:
1498
+ # Supported values for tables:
1388
1499
  #
1389
1500
  # * `csv` - CSV
1390
1501
  # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1391
1502
  # * `avro` - [Avro](http://avro.apache.org/)
1503
+ #
1504
+ # Supported values for models:
1505
+ #
1506
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
1507
+ # * `ml_xgboost_booster` - XGBoost Booster
1392
1508
  # @param [String] compression The compression type to use for exported
1393
1509
  # files. Possible values include `GZIP` and `NONE`. The default value
1394
- # is `NONE`.
1510
+ # is `NONE`. Not applicable when extracting models.
1395
1511
  # @param [String] delimiter Delimiter to use between fields in the
1396
- # exported data. Default is <code>,</code>.
1397
- # @param [Boolean] header Whether to print out a header row in the
1398
- # results. Default is `true`.
1512
+ # exported table data. Default is `,`. Not applicable when extracting
1513
+ # models.
1514
+ # @param [Boolean] header Whether to print out a header row in table
1515
+ # exports. Default is `true`. Not applicable when extracting models.
1399
1516
  # @param [String] job_id A user-defined ID for the extract job. The ID
1400
1517
  # must contain only letters (a-z, A-Z), numbers (0-9), underscores
1401
1518
  # (_), or dashes (-). The maximum length is 1,024 characters. If
@@ -1412,40 +1529,60 @@ module Google
1412
1529
  # is 1,024 characters. If `job_id` is provided, then `prefix` will not
1413
1530
  # be used.
1414
1531
  # @param [Hash] labels A hash of user-provided labels associated with
1415
- # the job. You can use these to organize and group your jobs. Label
1416
- # keys and values can be no longer than 63 characters, can only
1417
- # contain lowercase letters, numeric characters, underscores and
1418
- # dashes. International characters are allowed. Label values are
1419
- # optional. Label keys must start with a letter and each label in the
1420
- # list must have a different key. See [Requirements for
1421
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
1532
+ # the job. You can use these to organize and group your jobs.
1533
+ #
1534
+ # The labels applied to a resource must meet the following requirements:
1535
+ #
1536
+ # * Each resource can have multiple labels, up to a maximum of 64.
1537
+ # * Each label must be a key-value pair.
1538
+ # * Keys have a minimum length of 1 character and a maximum length of
1539
+ # 63 characters, and cannot be empty. Values can be empty, and have
1540
+ # a maximum length of 63 characters.
1541
+ # * Keys and values can contain only lowercase letters, numeric characters,
1542
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
1543
+ # international characters are allowed.
1544
+ # * The key portion of a label must be unique. However, you can use the
1545
+ # same key with multiple resources.
1546
+ # * Keys must start with a lowercase letter or international character.
1422
1547
  # @yield [job] a job configuration object
1423
1548
  # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
1424
1549
  # configuration object for setting additional options.
1425
1550
  #
1426
1551
  # @return [Google::Cloud::Bigquery::ExtractJob]
1427
1552
  #
1428
- # @example
1553
+ # @example Export table data
1429
1554
  # require "google/cloud/bigquery"
1430
1555
  #
1431
1556
  # bigquery = Google::Cloud::Bigquery.new
1432
1557
  #
1433
1558
  # table_id = "bigquery-public-data.samples.shakespeare"
1434
- # extract_job = bigquery.extract_job table_id,
1435
- # "gs://my-bucket/shakespeare.csv"
1559
+ # extract_job = bigquery.extract_job table_id, "gs://my-bucket/shakespeare.csv"
1436
1560
  # extract_job.wait_until_done!
1437
1561
  # extract_job.done? #=> true
1438
1562
  #
1563
+ # @example Export a model
1564
+ # require "google/cloud/bigquery"
1565
+ #
1566
+ # bigquery = Google::Cloud::Bigquery.new
1567
+ # dataset = bigquery.dataset "my_dataset"
1568
+ # model = dataset.model "my_model"
1569
+ #
1570
+ # extract_job = bigquery.extract model, "gs://my-bucket/#{model.model_id}"
1571
+ #
1439
1572
  # @!group Data
1440
1573
  #
1441
- def extract_job table, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, job_id: nil,
1574
+ def extract_job source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, job_id: nil,
1442
1575
  prefix: nil, labels: nil
1443
1576
  ensure_service!
1444
1577
  options = { format: format, compression: compression, delimiter: delimiter, header: header, job_id: job_id,
1445
1578
  prefix: prefix, labels: labels }
1579
+ source_ref = if source.respond_to? :model_ref
1580
+ source.model_ref
1581
+ else
1582
+ Service.get_table_ref source, default_ref: project_ref
1583
+ end
1446
1584
 
1447
- table_ref = Service.get_table_ref table, default_ref: project_ref
1448
- updater = ExtractJob::Updater.from_options service, table_ref, extract_url, options
1585
+ updater = ExtractJob::Updater.from_options service, source_ref, extract_url, options
1449
1586
 
1450
1587
  yield updater if block_given?
1451
1588
 
@@ -1455,51 +1592,63 @@ module Google
1455
1592
  end
1456
1593
 
1457
1594
  ##
1458
- # Extracts the data from the provided table to a Google Cloud Storage
1459
- # file using a synchronous method that blocks for a response. Timeouts
1595
+ # Extracts the data from a table or exports a model to Google Cloud Storage
1596
+ # using a synchronous method that blocks for a response. Timeouts
1460
1597
  # and transient errors are generally handled as needed to complete the
1461
- # job. See {#extract_job} for the asynchronous version. Use this method
1462
- # instead of {Table#extract} to extract data from source tables in other
1463
- # projects.
1598
+ # job. See {#extract_job} for the asynchronous version.
1599
+ #
1600
+ # Use this method instead of {Table#extract} or {Model#extract} to
1601
+ # extract data from source tables or models in other projects.
1464
1602
  #
1465
1603
  # The geographic location for the job ("US", "EU", etc.) can be set via
1466
1604
  # {ExtractJob::Updater#location=} in a block passed to this method.
1467
1605
  #
1468
- # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
1469
- # Exporting Data From BigQuery
1606
+ # @see https://cloud.google.com/bigquery/docs/exporting-data
1607
+ # Exporting table data
1608
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
1609
+ # Exporting models
1470
1610
  #
1471
- # @param [String, Table] table The source table from which to extract
1472
- # data. This can be a table object; or a string ID as specified by the
1473
- # [Standard SQL Query
1611
+ # @param [Table, Model, String] source The source table or model for
1612
+ # the extract operation. This can be a table or model object; or a
1613
+ # table ID string as specified by the [Standard SQL Query
1474
1614
  # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
1475
1615
  # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
1476
1616
  # Reference](https://cloud.google.com/bigquery/query-reference#from)
1477
1617
  # (`project-name:dataset_id.table_id`).
1478
1618
  # @param [Google::Cloud::Storage::File, String, Array<String>]
1479
1619
  # extract_url The Google Storage file or file URI pattern(s) to which
1480
- # BigQuery should extract the table data.
1481
- # @param [String] format The exported file format. The default value is
1482
- # `csv`.
1620
+ # BigQuery should extract. For a model export this value should be a
1621
+ # string ending in an object name prefix, since multiple objects will
1622
+ # be exported.
1623
+ # @param [String] format The exported file format. The default value for
1624
+ # tables is `csv`. Tables with nested or repeated fields cannot be
1625
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
1483
1626
  #
1484
- # The following values are supported:
1627
+ # Supported values for tables:
1485
1628
  #
1486
1629
  # * `csv` - CSV
1487
1630
  # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1488
1631
  # * `avro` - [Avro](http://avro.apache.org/)
1632
+ #
1633
+ # Supported values for models:
1634
+ #
1635
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
1636
+ # * `ml_xgboost_booster` - XGBoost Booster
1489
1637
  # @param [String] compression The compression type to use for exported
1490
1638
  # files. Possible values include `GZIP` and `NONE`. The default value
1491
- # is `NONE`.
1639
+ # is `NONE`. Not applicable when extracting models.
1492
1640
  # @param [String] delimiter Delimiter to use between fields in the
1493
- # exported data. Default is <code>,</code>.
1494
- # @param [Boolean] header Whether to print out a header row in the
1495
- # results. Default is `true`.
1641
+ # exported table data. Default is `,`. Not applicable when extracting
1642
+ # models.
1643
+ # @param [Boolean] header Whether to print out a header row in table
1644
+ # exports. Default is `true`. Not applicable when extracting models.
1496
1645
  # @yield [job] a job configuration object
1497
1646
  # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
1498
1647
  # configuration object for setting additional options.
1499
1648
  #
1500
1649
  # @return [Boolean] Returns `true` if the extract operation succeeded.
1501
1650
  #
1502
- # @example
1651
+ # @example Export table data
1503
1652
  # require "google/cloud/bigquery"
1504
1653
  #
1505
1654
  # bigquery = Google::Cloud::Bigquery.new
@@ -1507,10 +1656,19 @@ module Google
1507
1656
  # bigquery.extract "bigquery-public-data.samples.shakespeare",
1508
1657
  # "gs://my-bucket/shakespeare.csv"
1509
1658
  #
1659
+ # @example Export a model
1660
+ # require "google/cloud/bigquery"
1661
+ #
1662
+ # bigquery = Google::Cloud::Bigquery.new
1663
+ # dataset = bigquery.dataset "my_dataset"
1664
+ # model = dataset.model "my_model"
1665
+ #
1666
+ # bigquery.extract model, "gs://my-bucket/#{model.model_id}"
1667
+ #
1510
1668
  # @!group Data
1511
1669
  #
1512
- def extract table, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, &block
1513
- job = extract_job table, extract_url,
1670
+ def extract source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, &block
1671
+ job = extract_job source, extract_url,
1514
1672
  format: format,
1515
1673
  compression: compression,
1516
1674
  delimiter: delimiter,