google-cloud-bigquery 1.21.2 → 1.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +54 -0
- data/CONTRIBUTING.md +1 -1
- data/lib/google-cloud-bigquery.rb +1 -0
- data/lib/google/cloud/bigquery.rb +1 -1
- data/lib/google/cloud/bigquery/convert.rb +3 -1
- data/lib/google/cloud/bigquery/copy_job.rb +15 -6
- data/lib/google/cloud/bigquery/dataset.rb +43 -20
- data/lib/google/cloud/bigquery/dataset/access.rb +293 -16
- data/lib/google/cloud/bigquery/external.rb +328 -3
- data/lib/google/cloud/bigquery/extract_job.rb +154 -50
- data/lib/google/cloud/bigquery/load_job.rb +197 -34
- data/lib/google/cloud/bigquery/model.rb +164 -8
- data/lib/google/cloud/bigquery/policy.rb +431 -0
- data/lib/google/cloud/bigquery/project.rb +137 -68
- data/lib/google/cloud/bigquery/query_job.rb +24 -12
- data/lib/google/cloud/bigquery/service.rb +50 -11
- data/lib/google/cloud/bigquery/table.rb +174 -37
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +7 -6
@@ -37,8 +37,8 @@ module Google
|
|
37
37
|
# bigquery = Google::Cloud::Bigquery.new
|
38
38
|
# dataset = bigquery.dataset "my_dataset"
|
39
39
|
#
|
40
|
-
#
|
41
|
-
# load_job = dataset.load_job "my_new_table",
|
40
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
41
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |schema|
|
42
42
|
# schema.string "first_name", mode: :required
|
43
43
|
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
44
44
|
# nested_schema.string "place", mode: :required
|
@@ -112,8 +112,7 @@ module Google
|
|
112
112
|
# `false` otherwise.
|
113
113
|
#
|
114
114
|
def iso8859_1?
|
115
|
-
|
116
|
-
val == "ISO-8859-1"
|
115
|
+
@gapi.configuration.load.encoding == "ISO-8859-1"
|
117
116
|
end
|
118
117
|
|
119
118
|
##
|
@@ -195,8 +194,7 @@ module Google
|
|
195
194
|
# `NEWLINE_DELIMITED_JSON`, `false` otherwise.
|
196
195
|
#
|
197
196
|
def json?
|
198
|
-
|
199
|
-
val == "NEWLINE_DELIMITED_JSON"
|
197
|
+
@gapi.configuration.load.source_format == "NEWLINE_DELIMITED_JSON"
|
200
198
|
end
|
201
199
|
|
202
200
|
##
|
@@ -218,8 +216,27 @@ module Google
|
|
218
216
|
# `false` otherwise.
|
219
217
|
#
|
220
218
|
def backup?
|
221
|
-
|
222
|
-
|
219
|
+
@gapi.configuration.load.source_format == "DATASTORE_BACKUP"
|
220
|
+
end
|
221
|
+
|
222
|
+
##
|
223
|
+
# Checks if the source format is ORC.
|
224
|
+
#
|
225
|
+
# @return [Boolean] `true` when the source format is `ORC`,
|
226
|
+
# `false` otherwise.
|
227
|
+
#
|
228
|
+
def orc?
|
229
|
+
@gapi.configuration.load.source_format == "ORC"
|
230
|
+
end
|
231
|
+
|
232
|
+
##
|
233
|
+
# Checks if the source format is Parquet.
|
234
|
+
#
|
235
|
+
# @return [Boolean] `true` when the source format is `PARQUET`,
|
236
|
+
# `false` otherwise.
|
237
|
+
#
|
238
|
+
def parquet?
|
239
|
+
@gapi.configuration.load.source_format == "PARQUET"
|
223
240
|
end
|
224
241
|
|
225
242
|
##
|
@@ -347,6 +364,58 @@ module Google
|
|
347
364
|
nil
|
348
365
|
end
|
349
366
|
|
367
|
+
###
|
368
|
+
# Checks if hive partitioning options are set.
|
369
|
+
#
|
370
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
371
|
+
#
|
372
|
+
# @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
|
373
|
+
#
|
374
|
+
# @!group Attributes
|
375
|
+
#
|
376
|
+
def hive_partitioning?
|
377
|
+
!@gapi.configuration.load.hive_partitioning_options.nil?
|
378
|
+
end
|
379
|
+
|
380
|
+
###
|
381
|
+
# The mode of hive partitioning to use when reading data. The following modes are supported:
|
382
|
+
#
|
383
|
+
# 1. `AUTO`: automatically infer partition key name(s) and type(s).
|
384
|
+
# 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
|
385
|
+
# 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
|
386
|
+
#
|
387
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
388
|
+
#
|
389
|
+
# @return [String, nil] The mode of hive partitioning, or `nil` if not set.
|
390
|
+
#
|
391
|
+
# @!group Attributes
|
392
|
+
#
|
393
|
+
def hive_partitioning_mode
|
394
|
+
@gapi.configuration.load.hive_partitioning_options.mode if hive_partitioning?
|
395
|
+
end
|
396
|
+
|
397
|
+
###
|
398
|
+
# The common prefix for all source uris when hive partition detection is requested. The prefix must end
|
399
|
+
# immediately before the partition key encoding begins. For example, consider files following this data layout:
|
400
|
+
#
|
401
|
+
# ```
|
402
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
403
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
404
|
+
# ```
|
405
|
+
#
|
406
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
407
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
408
|
+
#
|
409
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
410
|
+
#
|
411
|
+
# @return [String, nil] The common prefix for all source uris, or `nil` if not set.
|
412
|
+
#
|
413
|
+
# @!group Attributes
|
414
|
+
#
|
415
|
+
def hive_partitioning_source_uri_prefix
|
416
|
+
@gapi.configuration.load.hive_partitioning_options.source_uri_prefix if hive_partitioning?
|
417
|
+
end
|
418
|
+
|
350
419
|
###
|
351
420
|
# Checks if the destination table will be range partitioned. See [Creating and using integer range partitioned
|
352
421
|
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
@@ -428,8 +497,9 @@ module Google
|
|
428
497
|
# The period for which the destination table will be time partitioned, if
|
429
498
|
# any. See [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
430
499
|
#
|
431
|
-
# @return [String, nil] The time partition type.
|
432
|
-
#
|
500
|
+
# @return [String, nil] The time partition type. The supported types are `DAY`,
|
501
|
+
# `HOUR`, `MONTH`, and `YEAR`, which will generate one partition per day,
|
502
|
+
# hour, month, and year, respectively; or `nil` if not present.
|
433
503
|
#
|
434
504
|
# @!group Attributes
|
435
505
|
#
|
@@ -1303,12 +1373,21 @@ module Google
|
|
1303
1373
|
# Sets the labels to use for the load job.
|
1304
1374
|
#
|
1305
1375
|
# @param [Hash] val A hash of user-provided labels associated with
|
1306
|
-
# the job. You can use these to organize and group your jobs.
|
1307
|
-
#
|
1308
|
-
#
|
1309
|
-
#
|
1310
|
-
#
|
1311
|
-
#
|
1376
|
+
# the job. You can use these to organize and group your jobs.
|
1377
|
+
#
|
1378
|
+
# The labels applied to a resource must meet the following requirements:
|
1379
|
+
#
|
1380
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
1381
|
+
# * Each label must be a key-value pair.
|
1382
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
1383
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
1384
|
+
# a maximum length of 63 characters.
|
1385
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
1386
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
1387
|
+
# international characters are allowed.
|
1388
|
+
# * The key portion of a label must be unique. However, you can use the
|
1389
|
+
# same key with multiple resources.
|
1390
|
+
# * Keys must start with a lowercase letter or international character.
|
1312
1391
|
#
|
1313
1392
|
# @!group Attributes
|
1314
1393
|
#
|
@@ -1316,6 +1395,89 @@ module Google
|
|
1316
1395
|
@gapi.configuration.update! labels: val
|
1317
1396
|
end
|
1318
1397
|
|
1398
|
+
##
|
1399
|
+
# Sets the mode of hive partitioning to use when reading data. The following modes are supported:
|
1400
|
+
#
|
1401
|
+
# 1. `auto`: automatically infer partition key name(s) and type(s).
|
1402
|
+
# 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
|
1403
|
+
# 3. `custom`: partition key schema is encoded in the source URI prefix.
|
1404
|
+
#
|
1405
|
+
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
1406
|
+
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
1407
|
+
#
|
1408
|
+
# See {#format=} and {#hive_partitioning_source_uri_prefix=}.
|
1409
|
+
#
|
1410
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
1411
|
+
#
|
1412
|
+
# @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
|
1413
|
+
#
|
1414
|
+
# @example
|
1415
|
+
# require "google/cloud/bigquery"
|
1416
|
+
#
|
1417
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1418
|
+
# dataset = bigquery.dataset "my_dataset"
|
1419
|
+
#
|
1420
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
1421
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
1422
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1423
|
+
# job.format = :parquet
|
1424
|
+
# job.hive_partitioning_mode = :auto
|
1425
|
+
# job.hive_partitioning_source_uri_prefix = source_uri_prefix
|
1426
|
+
# end
|
1427
|
+
#
|
1428
|
+
# load_job.wait_until_done!
|
1429
|
+
# load_job.done? #=> true
|
1430
|
+
#
|
1431
|
+
# @!group Attributes
|
1432
|
+
#
|
1433
|
+
def hive_partitioning_mode= mode
|
1434
|
+
@gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
1435
|
+
@gapi.configuration.load.hive_partitioning_options.mode = mode.to_s.upcase
|
1436
|
+
end
|
1437
|
+
|
1438
|
+
##
|
1439
|
+
# Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
|
1440
|
+
# immediately before the partition key encoding begins. For example, consider files following this data
|
1441
|
+
# layout:
|
1442
|
+
#
|
1443
|
+
# ```
|
1444
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
1445
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
1446
|
+
# ```
|
1447
|
+
#
|
1448
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
1449
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
1450
|
+
#
|
1451
|
+
# See {#hive_partitioning_mode=}.
|
1452
|
+
#
|
1453
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
1454
|
+
#
|
1455
|
+
# @param [String] source_uri_prefix The common prefix for all source uris.
|
1456
|
+
#
|
1457
|
+
# @example
|
1458
|
+
# require "google/cloud/bigquery"
|
1459
|
+
#
|
1460
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1461
|
+
# dataset = bigquery.dataset "my_dataset"
|
1462
|
+
#
|
1463
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
1464
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
1465
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1466
|
+
# job.format = :parquet
|
1467
|
+
# job.hive_partitioning_mode = :auto
|
1468
|
+
# job.hive_partitioning_source_uri_prefix = source_uri_prefix
|
1469
|
+
# end
|
1470
|
+
#
|
1471
|
+
# load_job.wait_until_done!
|
1472
|
+
# load_job.done? #=> true
|
1473
|
+
#
|
1474
|
+
# @!group Attributes
|
1475
|
+
#
|
1476
|
+
def hive_partitioning_source_uri_prefix= source_uri_prefix
|
1477
|
+
@gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
1478
|
+
@gapi.configuration.load.hive_partitioning_options.source_uri_prefix = source_uri_prefix
|
1479
|
+
end
|
1480
|
+
|
1319
1481
|
##
|
1320
1482
|
# Sets the field on which to range partition the table. See [Creating and using integer range partitioned
|
1321
1483
|
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
@@ -1335,8 +1497,8 @@ module Google
|
|
1335
1497
|
# bigquery = Google::Cloud::Bigquery.new
|
1336
1498
|
# dataset = bigquery.dataset "my_dataset"
|
1337
1499
|
#
|
1338
|
-
#
|
1339
|
-
# load_job = dataset.load_job "my_new_table",
|
1500
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1501
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1340
1502
|
# job.schema do |schema|
|
1341
1503
|
# schema.integer "my_table_id", mode: :required
|
1342
1504
|
# schema.string "my_table_data", mode: :required
|
@@ -1376,8 +1538,8 @@ module Google
|
|
1376
1538
|
# bigquery = Google::Cloud::Bigquery.new
|
1377
1539
|
# dataset = bigquery.dataset "my_dataset"
|
1378
1540
|
#
|
1379
|
-
#
|
1380
|
-
# load_job = dataset.load_job "my_new_table",
|
1541
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1542
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1381
1543
|
# job.schema do |schema|
|
1382
1544
|
# schema.integer "my_table_id", mode: :required
|
1383
1545
|
# schema.string "my_table_data", mode: :required
|
@@ -1417,8 +1579,8 @@ module Google
|
|
1417
1579
|
# bigquery = Google::Cloud::Bigquery.new
|
1418
1580
|
# dataset = bigquery.dataset "my_dataset"
|
1419
1581
|
#
|
1420
|
-
#
|
1421
|
-
# load_job = dataset.load_job "my_new_table",
|
1582
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1583
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1422
1584
|
# job.schema do |schema|
|
1423
1585
|
# schema.integer "my_table_id", mode: :required
|
1424
1586
|
# schema.string "my_table_data", mode: :required
|
@@ -1458,8 +1620,8 @@ module Google
|
|
1458
1620
|
# bigquery = Google::Cloud::Bigquery.new
|
1459
1621
|
# dataset = bigquery.dataset "my_dataset"
|
1460
1622
|
#
|
1461
|
-
#
|
1462
|
-
# load_job = dataset.load_job "my_new_table",
|
1623
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1624
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1463
1625
|
# job.schema do |schema|
|
1464
1626
|
# schema.integer "my_table_id", mode: :required
|
1465
1627
|
# schema.string "my_table_data", mode: :required
|
@@ -1490,8 +1652,9 @@ module Google
|
|
1490
1652
|
# BigQuery does not allow you to change partitioning on an existing
|
1491
1653
|
# table.
|
1492
1654
|
#
|
1493
|
-
# @param [String] type The time partition type.
|
1494
|
-
#
|
1655
|
+
# @param [String] type The time partition type. The supported types are `DAY`,
|
1656
|
+
# `HOUR`, `MONTH`, and `YEAR`, which will generate one partition per day,
|
1657
|
+
# hour, month, and year, respectively.
|
1495
1658
|
#
|
1496
1659
|
# @example
|
1497
1660
|
# require "google/cloud/bigquery"
|
@@ -1499,8 +1662,8 @@ module Google
|
|
1499
1662
|
# bigquery = Google::Cloud::Bigquery.new
|
1500
1663
|
# dataset = bigquery.dataset "my_dataset"
|
1501
1664
|
#
|
1502
|
-
#
|
1503
|
-
# load_job = dataset.load_job "my_new_table",
|
1665
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1666
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1504
1667
|
# job.time_partitioning_type = "DAY"
|
1505
1668
|
# end
|
1506
1669
|
#
|
@@ -1538,8 +1701,8 @@ module Google
|
|
1538
1701
|
# bigquery = Google::Cloud::Bigquery.new
|
1539
1702
|
# dataset = bigquery.dataset "my_dataset"
|
1540
1703
|
#
|
1541
|
-
#
|
1542
|
-
# load_job = dataset.load_job "my_new_table",
|
1704
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1705
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1543
1706
|
# job.time_partitioning_type = "DAY"
|
1544
1707
|
# job.time_partitioning_field = "dob"
|
1545
1708
|
# job.schema do |schema|
|
@@ -1574,8 +1737,8 @@ module Google
|
|
1574
1737
|
# bigquery = Google::Cloud::Bigquery.new
|
1575
1738
|
# dataset = bigquery.dataset "my_dataset"
|
1576
1739
|
#
|
1577
|
-
#
|
1578
|
-
# load_job = dataset.load_job "my_new_table",
|
1740
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1741
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1579
1742
|
# job.time_partitioning_type = "DAY"
|
1580
1743
|
# job.time_partitioning_expiration = 86_400
|
1581
1744
|
# end
|
@@ -1634,8 +1797,8 @@ module Google
|
|
1634
1797
|
# bigquery = Google::Cloud::Bigquery.new
|
1635
1798
|
# dataset = bigquery.dataset "my_dataset"
|
1636
1799
|
#
|
1637
|
-
#
|
1638
|
-
# load_job = dataset.load_job "my_new_table",
|
1800
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1801
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1639
1802
|
# job.time_partitioning_type = "DAY"
|
1640
1803
|
# job.time_partitioning_field = "dob"
|
1641
1804
|
# job.schema do |schema|
|
@@ -341,14 +341,19 @@ module Google
|
|
341
341
|
# the update to comply with ETag-based optimistic concurrency control.
|
342
342
|
#
|
343
343
|
# @param [Hash<String, String>] new_labels A hash containing key/value
|
344
|
-
# pairs.
|
345
|
-
#
|
346
|
-
# *
|
347
|
-
# *
|
348
|
-
#
|
349
|
-
#
|
350
|
-
#
|
351
|
-
# *
|
344
|
+
# pairs. The labels applied to a resource must meet the following requirements:
|
345
|
+
#
|
346
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
347
|
+
# * Each label must be a key-value pair.
|
348
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
349
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
350
|
+
# a maximum length of 63 characters.
|
351
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
352
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
353
|
+
# international characters are allowed.
|
354
|
+
# * The key portion of a label must be unique. However, you can use the
|
355
|
+
# same key with multiple resources.
|
356
|
+
# * Keys must start with a lowercase letter or international character.
|
352
357
|
#
|
353
358
|
# @example
|
354
359
|
# require "google/cloud/bigquery"
|
@@ -482,6 +487,146 @@ module Google
|
|
482
487
|
Array @gapi_json[:trainingRuns]
|
483
488
|
end
|
484
489
|
|
490
|
+
##
|
491
|
+
# Exports the model to Google Cloud Storage asynchronously, immediately
|
492
|
+
# returning an {ExtractJob} that can be used to track the progress of the
|
493
|
+
# export job. The caller may poll the service by repeatedly calling
|
494
|
+
# {Job#reload!} and {Job#done?} to detect when the job is done, or
|
495
|
+
# simply block until the job is done by calling #{Job#wait_until_done!}.
|
496
|
+
# See also {#extract}.
|
497
|
+
#
|
498
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
499
|
+
# {ExtractJob::Updater#location=} in a block passed to this method. If
|
500
|
+
# the model is a full resource representation (see {#resource_full?}),
|
501
|
+
# the location of the job will automatically be set to the location of
|
502
|
+
# the model.
|
503
|
+
#
|
504
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
505
|
+
# Exporting models
|
506
|
+
#
|
507
|
+
# @param [String] extract_url The Google Storage URI to which BigQuery
|
508
|
+
# should extract the model. This value should be end in an object name
|
509
|
+
# prefix, since multiple objects will be exported.
|
510
|
+
# @param [String] format The exported file format. The default value is
|
511
|
+
# `ml_tf_saved_model`.
|
512
|
+
#
|
513
|
+
# The following values are supported:
|
514
|
+
#
|
515
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
516
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
517
|
+
# @param [String] job_id A user-defined ID for the extract job. The ID
|
518
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
519
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
520
|
+
# `job_id` is provided, then `prefix` will not be used.
|
521
|
+
#
|
522
|
+
# See [Generating a job
|
523
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
524
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
525
|
+
# prepended to a generated value to produce a unique job ID. For
|
526
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
527
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
528
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
529
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
530
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
531
|
+
# be used.
|
532
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
533
|
+
# the job. You can use these to organize and group your jobs.
|
534
|
+
#
|
535
|
+
# The labels applied to a resource must meet the following requirements:
|
536
|
+
#
|
537
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
538
|
+
# * Each label must be a key-value pair.
|
539
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
540
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
541
|
+
# a maximum length of 63 characters.
|
542
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
543
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
544
|
+
# international characters are allowed.
|
545
|
+
# * The key portion of a label must be unique. However, you can use the
|
546
|
+
# same key with multiple resources.
|
547
|
+
# * Keys must start with a lowercase letter or international character.
|
548
|
+
#
|
549
|
+
# @yield [job] a job configuration object
|
550
|
+
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
551
|
+
# configuration object for setting additional options.
|
552
|
+
#
|
553
|
+
# @return [Google::Cloud::Bigquery::ExtractJob]
|
554
|
+
#
|
555
|
+
# @example
|
556
|
+
# require "google/cloud/bigquery"
|
557
|
+
#
|
558
|
+
# bigquery = Google::Cloud::Bigquery.new
|
559
|
+
# dataset = bigquery.dataset "my_dataset"
|
560
|
+
# model = dataset.model "my_model"
|
561
|
+
#
|
562
|
+
# extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
|
563
|
+
#
|
564
|
+
# extract_job.wait_until_done!
|
565
|
+
# extract_job.done? #=> true
|
566
|
+
#
|
567
|
+
# @!group Data
|
568
|
+
#
|
569
|
+
def extract_job extract_url, format: nil, job_id: nil, prefix: nil, labels: nil
|
570
|
+
ensure_service!
|
571
|
+
options = { format: format, job_id: job_id, prefix: prefix, labels: labels }
|
572
|
+
updater = ExtractJob::Updater.from_options service, model_ref, extract_url, options
|
573
|
+
updater.location = location if location # may be model reference
|
574
|
+
|
575
|
+
yield updater if block_given?
|
576
|
+
|
577
|
+
job_gapi = updater.to_gapi
|
578
|
+
gapi = service.extract_table job_gapi
|
579
|
+
Job.from_gapi gapi, service
|
580
|
+
end
|
581
|
+
|
582
|
+
##
|
583
|
+
# Exports the model to Google Cloud Storage using a synchronous method
|
584
|
+
# that blocks for a response. Timeouts and transient errors are generally
|
585
|
+
# handled as needed to complete the job. See also {#extract_job}.
|
586
|
+
#
|
587
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
588
|
+
# {ExtractJob::Updater#location=} in a block passed to this method. If
|
589
|
+
# the model is a full resource representation (see {#resource_full?}),
|
590
|
+
# the location of the job will automatically be set to the location of
|
591
|
+
# the model.
|
592
|
+
#
|
593
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
594
|
+
# Exporting models
|
595
|
+
#
|
596
|
+
# @param [String] extract_url The Google Storage URI to which BigQuery
|
597
|
+
# should extract the model. This value should be end in an object name
|
598
|
+
# prefix, since multiple objects will be exported.
|
599
|
+
# @param [String] format The exported file format. The default value is
|
600
|
+
# `ml_tf_saved_model`.
|
601
|
+
#
|
602
|
+
# The following values are supported:
|
603
|
+
#
|
604
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
605
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
606
|
+
# @yield [job] a job configuration object
|
607
|
+
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
608
|
+
# configuration object for setting additional options.
|
609
|
+
#
|
610
|
+
# @return [Boolean] Returns `true` if the extract operation succeeded.
|
611
|
+
#
|
612
|
+
# @example
|
613
|
+
# require "google/cloud/bigquery"
|
614
|
+
#
|
615
|
+
# bigquery = Google::Cloud::Bigquery.new
|
616
|
+
# dataset = bigquery.dataset "my_dataset"
|
617
|
+
# model = dataset.model "my_model"
|
618
|
+
#
|
619
|
+
# model.extract "gs://my-bucket/#{model.model_id}"
|
620
|
+
#
|
621
|
+
# @!group Data
|
622
|
+
#
|
623
|
+
def extract extract_url, format: nil, &block
|
624
|
+
job = extract_job extract_url, format: format, &block
|
625
|
+
job.wait_until_done!
|
626
|
+
ensure_job_succeeded! job
|
627
|
+
true
|
628
|
+
end
|
629
|
+
|
485
630
|
##
|
486
631
|
# Permanently deletes the model.
|
487
632
|
#
|
@@ -734,6 +879,17 @@ module Google
|
|
734
879
|
def ensure_full_data!
|
735
880
|
reload! unless resource_full?
|
736
881
|
end
|
882
|
+
|
883
|
+
def ensure_job_succeeded! job
|
884
|
+
return unless job.failed?
|
885
|
+
begin
|
886
|
+
# raise to activate ruby exception cause handling
|
887
|
+
raise job.gapi_error
|
888
|
+
rescue StandardError => e
|
889
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
890
|
+
raise Google::Cloud::Error.from_error(e)
|
891
|
+
end
|
892
|
+
end
|
737
893
|
end
|
738
894
|
end
|
739
895
|
end
|