google-cloud-bigquery 1.21.2 → 1.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/lib/google/cloud/bigquery/convert.rb +3 -1
- data/lib/google/cloud/bigquery/copy_job.rb +15 -6
- data/lib/google/cloud/bigquery/dataset.rb +43 -20
- data/lib/google/cloud/bigquery/extract_job.rb +153 -45
- data/lib/google/cloud/bigquery/load_job.rb +15 -6
- data/lib/google/cloud/bigquery/model.rb +164 -8
- data/lib/google/cloud/bigquery/project.rb +137 -68
- data/lib/google/cloud/bigquery/query_job.rb +15 -6
- data/lib/google/cloud/bigquery/service.rb +12 -10
- data/lib/google/cloud/bigquery/table.rb +63 -32
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +2 -2
@@ -1303,12 +1303,21 @@ module Google
|
|
1303
1303
|
# Sets the labels to use for the load job.
|
1304
1304
|
#
|
1305
1305
|
# @param [Hash] val A hash of user-provided labels associated with
|
1306
|
-
# the job. You can use these to organize and group your jobs.
|
1307
|
-
#
|
1308
|
-
#
|
1309
|
-
#
|
1310
|
-
#
|
1311
|
-
#
|
1306
|
+
# the job. You can use these to organize and group your jobs.
|
1307
|
+
#
|
1308
|
+
# The labels applied to a resource must meet the following requirements:
|
1309
|
+
#
|
1310
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
1311
|
+
# * Each label must be a key-value pair.
|
1312
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
1313
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
1314
|
+
# a maximum length of 63 characters.
|
1315
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
1316
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
1317
|
+
# international characters are allowed.
|
1318
|
+
# * The key portion of a label must be unique. However, you can use the
|
1319
|
+
# same key with multiple resources.
|
1320
|
+
# * Keys must start with a lowercase letter or international character.
|
1312
1321
|
#
|
1313
1322
|
# @!group Attributes
|
1314
1323
|
#
|
@@ -341,14 +341,19 @@ module Google
|
|
341
341
|
# the update to comply with ETag-based optimistic concurrency control.
|
342
342
|
#
|
343
343
|
# @param [Hash<String, String>] new_labels A hash containing key/value
|
344
|
-
# pairs.
|
345
|
-
#
|
346
|
-
# *
|
347
|
-
# *
|
348
|
-
#
|
349
|
-
#
|
350
|
-
#
|
351
|
-
# *
|
344
|
+
# pairs. The labels applied to a resource must meet the following requirements:
|
345
|
+
#
|
346
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
347
|
+
# * Each label must be a key-value pair.
|
348
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
349
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
350
|
+
# a maximum length of 63 characters.
|
351
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
352
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
353
|
+
# international characters are allowed.
|
354
|
+
# * The key portion of a label must be unique. However, you can use the
|
355
|
+
# same key with multiple resources.
|
356
|
+
# * Keys must start with a lowercase letter or international character.
|
352
357
|
#
|
353
358
|
# @example
|
354
359
|
# require "google/cloud/bigquery"
|
@@ -482,6 +487,146 @@ module Google
|
|
482
487
|
Array @gapi_json[:trainingRuns]
|
483
488
|
end
|
484
489
|
|
490
|
+
##
|
491
|
+
# Exports the model to Google Cloud Storage asynchronously, immediately
|
492
|
+
# returning an {ExtractJob} that can be used to track the progress of the
|
493
|
+
# export job. The caller may poll the service by repeatedly calling
|
494
|
+
# {Job#reload!} and {Job#done?} to detect when the job is done, or
|
495
|
+
# simply block until the job is done by calling #{Job#wait_until_done!}.
|
496
|
+
# See also {#extract}.
|
497
|
+
#
|
498
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
499
|
+
# {ExtractJob::Updater#location=} in a block passed to this method. If
|
500
|
+
# the model is a full resource representation (see {#resource_full?}),
|
501
|
+
# the location of the job will automatically be set to the location of
|
502
|
+
# the model.
|
503
|
+
#
|
504
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
505
|
+
# Exporting models
|
506
|
+
#
|
507
|
+
# @param [String] extract_url The Google Storage URI to which BigQuery
|
508
|
+
# should extract the model. This value should be end in an object name
|
509
|
+
# prefix, since multiple objects will be exported.
|
510
|
+
# @param [String] format The exported file format. The default value is
|
511
|
+
# `ml_tf_saved_model`.
|
512
|
+
#
|
513
|
+
# The following values are supported:
|
514
|
+
#
|
515
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
516
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
517
|
+
# @param [String] job_id A user-defined ID for the extract job. The ID
|
518
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
519
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
520
|
+
# `job_id` is provided, then `prefix` will not be used.
|
521
|
+
#
|
522
|
+
# See [Generating a job
|
523
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
524
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
525
|
+
# prepended to a generated value to produce a unique job ID. For
|
526
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
527
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
528
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
529
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
530
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
531
|
+
# be used.
|
532
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
533
|
+
# the job. You can use these to organize and group your jobs.
|
534
|
+
#
|
535
|
+
# The labels applied to a resource must meet the following requirements:
|
536
|
+
#
|
537
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
538
|
+
# * Each label must be a key-value pair.
|
539
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
540
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
541
|
+
# a maximum length of 63 characters.
|
542
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
543
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
544
|
+
# international characters are allowed.
|
545
|
+
# * The key portion of a label must be unique. However, you can use the
|
546
|
+
# same key with multiple resources.
|
547
|
+
# * Keys must start with a lowercase letter or international character.
|
548
|
+
#
|
549
|
+
# @yield [job] a job configuration object
|
550
|
+
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
551
|
+
# configuration object for setting additional options.
|
552
|
+
#
|
553
|
+
# @return [Google::Cloud::Bigquery::ExtractJob]
|
554
|
+
#
|
555
|
+
# @example
|
556
|
+
# require "google/cloud/bigquery"
|
557
|
+
#
|
558
|
+
# bigquery = Google::Cloud::Bigquery.new
|
559
|
+
# dataset = bigquery.dataset "my_dataset"
|
560
|
+
# model = dataset.model "my_model"
|
561
|
+
#
|
562
|
+
# extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
|
563
|
+
#
|
564
|
+
# extract_job.wait_until_done!
|
565
|
+
# extract_job.done? #=> true
|
566
|
+
#
|
567
|
+
# @!group Data
|
568
|
+
#
|
569
|
+
def extract_job extract_url, format: nil, job_id: nil, prefix: nil, labels: nil
|
570
|
+
ensure_service!
|
571
|
+
options = { format: format, job_id: job_id, prefix: prefix, labels: labels }
|
572
|
+
updater = ExtractJob::Updater.from_options service, model_ref, extract_url, options
|
573
|
+
updater.location = location if location # may be model reference
|
574
|
+
|
575
|
+
yield updater if block_given?
|
576
|
+
|
577
|
+
job_gapi = updater.to_gapi
|
578
|
+
gapi = service.extract_table job_gapi
|
579
|
+
Job.from_gapi gapi, service
|
580
|
+
end
|
581
|
+
|
582
|
+
##
|
583
|
+
# Exports the model to Google Cloud Storage using a synchronous method
|
584
|
+
# that blocks for a response. Timeouts and transient errors are generally
|
585
|
+
# handled as needed to complete the job. See also {#extract_job}.
|
586
|
+
#
|
587
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
588
|
+
# {ExtractJob::Updater#location=} in a block passed to this method. If
|
589
|
+
# the model is a full resource representation (see {#resource_full?}),
|
590
|
+
# the location of the job will automatically be set to the location of
|
591
|
+
# the model.
|
592
|
+
#
|
593
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
594
|
+
# Exporting models
|
595
|
+
#
|
596
|
+
# @param [String] extract_url The Google Storage URI to which BigQuery
|
597
|
+
# should extract the model. This value should be end in an object name
|
598
|
+
# prefix, since multiple objects will be exported.
|
599
|
+
# @param [String] format The exported file format. The default value is
|
600
|
+
# `ml_tf_saved_model`.
|
601
|
+
#
|
602
|
+
# The following values are supported:
|
603
|
+
#
|
604
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
605
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
606
|
+
# @yield [job] a job configuration object
|
607
|
+
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
608
|
+
# configuration object for setting additional options.
|
609
|
+
#
|
610
|
+
# @return [Boolean] Returns `true` if the extract operation succeeded.
|
611
|
+
#
|
612
|
+
# @example
|
613
|
+
# require "google/cloud/bigquery"
|
614
|
+
#
|
615
|
+
# bigquery = Google::Cloud::Bigquery.new
|
616
|
+
# dataset = bigquery.dataset "my_dataset"
|
617
|
+
# model = dataset.model "my_model"
|
618
|
+
#
|
619
|
+
# model.extract "gs://my-bucket/#{model.model_id}"
|
620
|
+
#
|
621
|
+
# @!group Data
|
622
|
+
#
|
623
|
+
def extract extract_url, format: nil, &block
|
624
|
+
job = extract_job extract_url, format: format, &block
|
625
|
+
job.wait_until_done!
|
626
|
+
ensure_job_succeeded! job
|
627
|
+
true
|
628
|
+
end
|
629
|
+
|
485
630
|
##
|
486
631
|
# Permanently deletes the model.
|
487
632
|
#
|
@@ -734,6 +879,17 @@ module Google
|
|
734
879
|
def ensure_full_data!
|
735
880
|
reload! unless resource_full?
|
736
881
|
end
|
882
|
+
|
883
|
+
def ensure_job_succeeded! job
|
884
|
+
return unless job.failed?
|
885
|
+
begin
|
886
|
+
# raise to activate ruby exception cause handling
|
887
|
+
raise job.gapi_error
|
888
|
+
rescue StandardError => e
|
889
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
890
|
+
raise Google::Cloud::Error.from_error(e)
|
891
|
+
end
|
892
|
+
end
|
737
893
|
end
|
738
894
|
end
|
739
895
|
end
|
@@ -153,13 +153,21 @@ module Google
|
|
153
153
|
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
154
154
|
# be used.
|
155
155
|
# @param [Hash] labels A hash of user-provided labels associated with
|
156
|
-
# the job. You can use these to organize and group your jobs.
|
157
|
-
#
|
158
|
-
#
|
159
|
-
#
|
160
|
-
#
|
161
|
-
#
|
162
|
-
#
|
156
|
+
# the job. You can use these to organize and group your jobs.
|
157
|
+
#
|
158
|
+
# The labels applied to a resource must meet the following requirements:
|
159
|
+
#
|
160
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
161
|
+
# * Each label must be a key-value pair.
|
162
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
163
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
164
|
+
# a maximum length of 63 characters.
|
165
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
166
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
167
|
+
# international characters are allowed.
|
168
|
+
# * The key portion of a label must be unique. However, you can use the
|
169
|
+
# same key with multiple resources.
|
170
|
+
# * Keys must start with a lowercase letter or international character.
|
163
171
|
# @yield [job] a job configuration object
|
164
172
|
# @yieldparam [Google::Cloud::Bigquery::CopyJob::Updater] job a job
|
165
173
|
# configuration object for setting additional options.
|
@@ -411,13 +419,21 @@ module Google
|
|
411
419
|
# See [Generating a job
|
412
420
|
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
413
421
|
# @param [Hash] labels A hash of user-provided labels associated with
|
414
|
-
# the job. You can use these to organize and group your jobs.
|
415
|
-
#
|
416
|
-
#
|
417
|
-
#
|
418
|
-
#
|
419
|
-
#
|
420
|
-
#
|
422
|
+
# the job. You can use these to organize and group your jobs.
|
423
|
+
#
|
424
|
+
# The labels applied to a resource must meet the following requirements:
|
425
|
+
#
|
426
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
427
|
+
# * Each label must be a key-value pair.
|
428
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
429
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
430
|
+
# a maximum length of 63 characters.
|
431
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
432
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
433
|
+
# international characters are allowed.
|
434
|
+
# * The key portion of a label must be unique. However, you can use the
|
435
|
+
# same key with multiple resources.
|
436
|
+
# * Keys must start with a lowercase letter or international character.
|
421
437
|
# @param [Array<String>, String] udfs User-defined function resources
|
422
438
|
# used in a legacy SQL query. May be either a code resource to load from
|
423
439
|
# a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
@@ -1445,46 +1461,58 @@ module Google
|
|
1445
1461
|
end
|
1446
1462
|
|
1447
1463
|
##
|
1448
|
-
# Extracts the data from
|
1449
|
-
#
|
1450
|
-
#
|
1451
|
-
# calling {Job#reload!} and {Job#done?} to detect when the job
|
1452
|
-
# or simply block until the job is done by calling
|
1464
|
+
# Extracts the data from a table or exports a model to Google Cloud Storage
|
1465
|
+
# asynchronously, immediately returning an {ExtractJob} that can be used to
|
1466
|
+
# track the progress of the export job. The caller may poll the service by
|
1467
|
+
# repeatedly calling {Job#reload!} and {Job#done?} to detect when the job
|
1468
|
+
# is done, or simply block until the job is done by calling
|
1453
1469
|
# #{Job#wait_until_done!}. See {#extract} for the synchronous version.
|
1454
|
-
#
|
1455
|
-
#
|
1470
|
+
#
|
1471
|
+
# Use this method instead of {Table#extract_job} or {Model#extract_job} to
|
1472
|
+
# extract data from source tables or models in other projects.
|
1456
1473
|
#
|
1457
1474
|
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1458
1475
|
# {ExtractJob::Updater#location=} in a block passed to this method.
|
1459
1476
|
#
|
1460
|
-
# @see https://cloud.google.com/bigquery/exporting-data
|
1461
|
-
# Exporting
|
1477
|
+
# @see https://cloud.google.com/bigquery/docs/exporting-data
|
1478
|
+
# Exporting table data
|
1479
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
1480
|
+
# Exporting models
|
1462
1481
|
#
|
1463
|
-
# @param [
|
1464
|
-
#
|
1465
|
-
# [Standard SQL Query
|
1482
|
+
# @param [Table, Model, String] source The source table or model for
|
1483
|
+
# the extract operation. This can be a table or model object; or a
|
1484
|
+
# table ID string as specified by the [Standard SQL Query
|
1466
1485
|
# Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
|
1467
1486
|
# (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
|
1468
1487
|
# Reference](https://cloud.google.com/bigquery/query-reference#from)
|
1469
1488
|
# (`project-name:dataset_id.table_id`).
|
1470
1489
|
# @param [Google::Cloud::Storage::File, String, Array<String>]
|
1471
1490
|
# extract_url The Google Storage file or file URI pattern(s) to which
|
1472
|
-
# BigQuery should extract
|
1473
|
-
#
|
1474
|
-
#
|
1491
|
+
# BigQuery should extract. For a model export this value should be a
|
1492
|
+
# string ending in an object name prefix, since multiple objects will
|
1493
|
+
# be exported.
|
1494
|
+
# @param [String] format The exported file format. The default value for
|
1495
|
+
# tables is `csv`. Tables with nested or repeated fields cannot be
|
1496
|
+
# exported as CSV. The default value for models is `ml_tf_saved_model`.
|
1475
1497
|
#
|
1476
|
-
#
|
1498
|
+
# Supported values for tables:
|
1477
1499
|
#
|
1478
1500
|
# * `csv` - CSV
|
1479
1501
|
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
1480
1502
|
# * `avro` - [Avro](http://avro.apache.org/)
|
1503
|
+
#
|
1504
|
+
# Supported values for models:
|
1505
|
+
#
|
1506
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
1507
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
1481
1508
|
# @param [String] compression The compression type to use for exported
|
1482
1509
|
# files. Possible values include `GZIP` and `NONE`. The default value
|
1483
|
-
# is `NONE`.
|
1510
|
+
# is `NONE`. Not applicable when extracting models.
|
1484
1511
|
# @param [String] delimiter Delimiter to use between fields in the
|
1485
|
-
# exported data. Default is
|
1486
|
-
#
|
1487
|
-
#
|
1512
|
+
# exported table data. Default is `,`. Not applicable when extracting
|
1513
|
+
# models.
|
1514
|
+
# @param [Boolean] header Whether to print out a header row in table
|
1515
|
+
# exports. Default is `true`. Not applicable when extracting models.
|
1488
1516
|
# @param [String] job_id A user-defined ID for the extract job. The ID
|
1489
1517
|
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
1490
1518
|
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
@@ -1501,40 +1529,60 @@ module Google
|
|
1501
1529
|
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
1502
1530
|
# be used.
|
1503
1531
|
# @param [Hash] labels A hash of user-provided labels associated with
|
1504
|
-
# the job. You can use these to organize and group your jobs.
|
1505
|
-
#
|
1506
|
-
#
|
1507
|
-
#
|
1508
|
-
#
|
1509
|
-
#
|
1510
|
-
#
|
1532
|
+
# the job. You can use these to organize and group your jobs.
|
1533
|
+
#
|
1534
|
+
# The labels applied to a resource must meet the following requirements:
|
1535
|
+
#
|
1536
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
1537
|
+
# * Each label must be a key-value pair.
|
1538
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
1539
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
1540
|
+
# a maximum length of 63 characters.
|
1541
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
1542
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
1543
|
+
# international characters are allowed.
|
1544
|
+
# * The key portion of a label must be unique. However, you can use the
|
1545
|
+
# same key with multiple resources.
|
1546
|
+
# * Keys must start with a lowercase letter or international character.
|
1511
1547
|
# @yield [job] a job configuration object
|
1512
1548
|
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
1513
1549
|
# configuration object for setting additional options.
|
1514
1550
|
#
|
1515
1551
|
# @return [Google::Cloud::Bigquery::ExtractJob]
|
1516
1552
|
#
|
1517
|
-
# @example
|
1553
|
+
# @example Export table data
|
1518
1554
|
# require "google/cloud/bigquery"
|
1519
1555
|
#
|
1520
1556
|
# bigquery = Google::Cloud::Bigquery.new
|
1521
1557
|
#
|
1522
1558
|
# table_id = "bigquery-public-data.samples.shakespeare"
|
1523
|
-
# extract_job = bigquery.extract_job table_id,
|
1524
|
-
# "gs://my-bucket/shakespeare.csv"
|
1559
|
+
# extract_job = bigquery.extract_job table_id, "gs://my-bucket/shakespeare.csv"
|
1525
1560
|
# extract_job.wait_until_done!
|
1526
1561
|
# extract_job.done? #=> true
|
1527
1562
|
#
|
1563
|
+
# @example Export a model
|
1564
|
+
# require "google/cloud/bigquery"
|
1565
|
+
#
|
1566
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1567
|
+
# dataset = bigquery.dataset "my_dataset"
|
1568
|
+
# model = dataset.model "my_model"
|
1569
|
+
#
|
1570
|
+
# extract_job = bigquery.extract model, "gs://my-bucket/#{model.model_id}"
|
1571
|
+
#
|
1528
1572
|
# @!group Data
|
1529
1573
|
#
|
1530
|
-
def extract_job
|
1574
|
+
def extract_job source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, job_id: nil,
|
1531
1575
|
prefix: nil, labels: nil
|
1532
1576
|
ensure_service!
|
1533
1577
|
options = { format: format, compression: compression, delimiter: delimiter, header: header, job_id: job_id,
|
1534
1578
|
prefix: prefix, labels: labels }
|
1579
|
+
source_ref = if source.respond_to? :model_ref
|
1580
|
+
source.model_ref
|
1581
|
+
else
|
1582
|
+
Service.get_table_ref source, default_ref: project_ref
|
1583
|
+
end
|
1535
1584
|
|
1536
|
-
|
1537
|
-
updater = ExtractJob::Updater.from_options service, table_ref, extract_url, options
|
1585
|
+
updater = ExtractJob::Updater.from_options service, source_ref, extract_url, options
|
1538
1586
|
|
1539
1587
|
yield updater if block_given?
|
1540
1588
|
|
@@ -1544,51 +1592,63 @@ module Google
|
|
1544
1592
|
end
|
1545
1593
|
|
1546
1594
|
##
|
1547
|
-
# Extracts the data from
|
1548
|
-
#
|
1595
|
+
# Extracts the data from a table or exports a model to Google Cloud Storage
|
1596
|
+
# using a synchronous method that blocks for a response. Timeouts
|
1549
1597
|
# and transient errors are generally handled as needed to complete the
|
1550
|
-
# job. See {#extract_job} for the asynchronous version.
|
1551
|
-
#
|
1552
|
-
#
|
1598
|
+
# job. See {#extract_job} for the asynchronous version.
|
1599
|
+
#
|
1600
|
+
# Use this method instead of {Table#extract} or {Model#extract} to
|
1601
|
+
# extract data from source tables or models in other projects.
|
1553
1602
|
#
|
1554
1603
|
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1555
1604
|
# {ExtractJob::Updater#location=} in a block passed to this method.
|
1556
1605
|
#
|
1557
|
-
# @see https://cloud.google.com/bigquery/exporting-data
|
1558
|
-
# Exporting
|
1606
|
+
# @see https://cloud.google.com/bigquery/docs/exporting-data
|
1607
|
+
# Exporting table data
|
1608
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
1609
|
+
# Exporting models
|
1559
1610
|
#
|
1560
|
-
# @param [
|
1561
|
-
#
|
1562
|
-
# [Standard SQL Query
|
1611
|
+
# @param [Table, Model, String] source The source table or model for
|
1612
|
+
# the extract operation. This can be a table or model object; or a
|
1613
|
+
# table ID string as specified by the [Standard SQL Query
|
1563
1614
|
# Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
|
1564
1615
|
# (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
|
1565
1616
|
# Reference](https://cloud.google.com/bigquery/query-reference#from)
|
1566
1617
|
# (`project-name:dataset_id.table_id`).
|
1567
1618
|
# @param [Google::Cloud::Storage::File, String, Array<String>]
|
1568
1619
|
# extract_url The Google Storage file or file URI pattern(s) to which
|
1569
|
-
# BigQuery should extract
|
1570
|
-
#
|
1571
|
-
#
|
1620
|
+
# BigQuery should extract. For a model export this value should be a
|
1621
|
+
# string ending in an object name prefix, since multiple objects will
|
1622
|
+
# be exported.
|
1623
|
+
# @param [String] format The exported file format. The default value for
|
1624
|
+
# tables is `csv`. Tables with nested or repeated fields cannot be
|
1625
|
+
# exported as CSV. The default value for models is `ml_tf_saved_model`.
|
1572
1626
|
#
|
1573
|
-
#
|
1627
|
+
# Supported values for tables:
|
1574
1628
|
#
|
1575
1629
|
# * `csv` - CSV
|
1576
1630
|
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
1577
1631
|
# * `avro` - [Avro](http://avro.apache.org/)
|
1632
|
+
#
|
1633
|
+
# Supported values for models:
|
1634
|
+
#
|
1635
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
1636
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
1578
1637
|
# @param [String] compression The compression type to use for exported
|
1579
1638
|
# files. Possible values include `GZIP` and `NONE`. The default value
|
1580
|
-
# is `NONE`.
|
1639
|
+
# is `NONE`. Not applicable when extracting models.
|
1581
1640
|
# @param [String] delimiter Delimiter to use between fields in the
|
1582
|
-
# exported data. Default is
|
1583
|
-
#
|
1584
|
-
#
|
1641
|
+
# exported table data. Default is `,`. Not applicable when extracting
|
1642
|
+
# models.
|
1643
|
+
# @param [Boolean] header Whether to print out a header row in table
|
1644
|
+
# exports. Default is `true`. Not applicable when extracting models.
|
1585
1645
|
# @yield [job] a job configuration object
|
1586
1646
|
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
1587
1647
|
# configuration object for setting additional options.
|
1588
1648
|
#
|
1589
1649
|
# @return [Boolean] Returns `true` if the extract operation succeeded.
|
1590
1650
|
#
|
1591
|
-
# @example
|
1651
|
+
# @example Export table data
|
1592
1652
|
# require "google/cloud/bigquery"
|
1593
1653
|
#
|
1594
1654
|
# bigquery = Google::Cloud::Bigquery.new
|
@@ -1596,10 +1656,19 @@ module Google
|
|
1596
1656
|
# bigquery.extract "bigquery-public-data.samples.shakespeare",
|
1597
1657
|
# "gs://my-bucket/shakespeare.csv"
|
1598
1658
|
#
|
1659
|
+
# @example Export a model
|
1660
|
+
# require "google/cloud/bigquery"
|
1661
|
+
#
|
1662
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1663
|
+
# dataset = bigquery.dataset "my_dataset"
|
1664
|
+
# model = dataset.model "my_model"
|
1665
|
+
#
|
1666
|
+
# bigquery.extract model, "gs://my-bucket/#{model.model_id}"
|
1667
|
+
#
|
1599
1668
|
# @!group Data
|
1600
1669
|
#
|
1601
|
-
def extract
|
1602
|
-
job = extract_job
|
1670
|
+
def extract source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, &block
|
1671
|
+
job = extract_job source, extract_url,
|
1603
1672
|
format: format,
|
1604
1673
|
compression: compression,
|
1605
1674
|
delimiter: delimiter,
|