google-cloud-bigquery 1.21.2 → 1.26.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +54 -0
- data/CONTRIBUTING.md +1 -1
- data/lib/google-cloud-bigquery.rb +1 -0
- data/lib/google/cloud/bigquery.rb +1 -1
- data/lib/google/cloud/bigquery/convert.rb +3 -1
- data/lib/google/cloud/bigquery/copy_job.rb +15 -6
- data/lib/google/cloud/bigquery/dataset.rb +43 -20
- data/lib/google/cloud/bigquery/dataset/access.rb +293 -16
- data/lib/google/cloud/bigquery/external.rb +328 -3
- data/lib/google/cloud/bigquery/extract_job.rb +154 -50
- data/lib/google/cloud/bigquery/load_job.rb +197 -34
- data/lib/google/cloud/bigquery/model.rb +164 -8
- data/lib/google/cloud/bigquery/policy.rb +431 -0
- data/lib/google/cloud/bigquery/project.rb +137 -68
- data/lib/google/cloud/bigquery/query_job.rb +24 -12
- data/lib/google/cloud/bigquery/service.rb +50 -11
- data/lib/google/cloud/bigquery/table.rb +174 -37
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +7 -6
@@ -37,8 +37,8 @@ module Google
|
|
37
37
|
# bigquery = Google::Cloud::Bigquery.new
|
38
38
|
# dataset = bigquery.dataset "my_dataset"
|
39
39
|
#
|
40
|
-
#
|
41
|
-
# load_job = dataset.load_job "my_new_table",
|
40
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
41
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |schema|
|
42
42
|
# schema.string "first_name", mode: :required
|
43
43
|
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
44
44
|
# nested_schema.string "place", mode: :required
|
@@ -112,8 +112,7 @@ module Google
|
|
112
112
|
# `false` otherwise.
|
113
113
|
#
|
114
114
|
def iso8859_1?
|
115
|
-
|
116
|
-
val == "ISO-8859-1"
|
115
|
+
@gapi.configuration.load.encoding == "ISO-8859-1"
|
117
116
|
end
|
118
117
|
|
119
118
|
##
|
@@ -195,8 +194,7 @@ module Google
|
|
195
194
|
# `NEWLINE_DELIMITED_JSON`, `false` otherwise.
|
196
195
|
#
|
197
196
|
def json?
|
198
|
-
|
199
|
-
val == "NEWLINE_DELIMITED_JSON"
|
197
|
+
@gapi.configuration.load.source_format == "NEWLINE_DELIMITED_JSON"
|
200
198
|
end
|
201
199
|
|
202
200
|
##
|
@@ -218,8 +216,27 @@ module Google
|
|
218
216
|
# `false` otherwise.
|
219
217
|
#
|
220
218
|
def backup?
|
221
|
-
|
222
|
-
|
219
|
+
@gapi.configuration.load.source_format == "DATASTORE_BACKUP"
|
220
|
+
end
|
221
|
+
|
222
|
+
##
|
223
|
+
# Checks if the source format is ORC.
|
224
|
+
#
|
225
|
+
# @return [Boolean] `true` when the source format is `ORC`,
|
226
|
+
# `false` otherwise.
|
227
|
+
#
|
228
|
+
def orc?
|
229
|
+
@gapi.configuration.load.source_format == "ORC"
|
230
|
+
end
|
231
|
+
|
232
|
+
##
|
233
|
+
# Checks if the source format is Parquet.
|
234
|
+
#
|
235
|
+
# @return [Boolean] `true` when the source format is `PARQUET`,
|
236
|
+
# `false` otherwise.
|
237
|
+
#
|
238
|
+
def parquet?
|
239
|
+
@gapi.configuration.load.source_format == "PARQUET"
|
223
240
|
end
|
224
241
|
|
225
242
|
##
|
@@ -347,6 +364,58 @@ module Google
|
|
347
364
|
nil
|
348
365
|
end
|
349
366
|
|
367
|
+
###
|
368
|
+
# Checks if hive partitioning options are set.
|
369
|
+
#
|
370
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
371
|
+
#
|
372
|
+
# @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
|
373
|
+
#
|
374
|
+
# @!group Attributes
|
375
|
+
#
|
376
|
+
def hive_partitioning?
|
377
|
+
!@gapi.configuration.load.hive_partitioning_options.nil?
|
378
|
+
end
|
379
|
+
|
380
|
+
###
|
381
|
+
# The mode of hive partitioning to use when reading data. The following modes are supported:
|
382
|
+
#
|
383
|
+
# 1. `AUTO`: automatically infer partition key name(s) and type(s).
|
384
|
+
# 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
|
385
|
+
# 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
|
386
|
+
#
|
387
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
388
|
+
#
|
389
|
+
# @return [String, nil] The mode of hive partitioning, or `nil` if not set.
|
390
|
+
#
|
391
|
+
# @!group Attributes
|
392
|
+
#
|
393
|
+
def hive_partitioning_mode
|
394
|
+
@gapi.configuration.load.hive_partitioning_options.mode if hive_partitioning?
|
395
|
+
end
|
396
|
+
|
397
|
+
###
|
398
|
+
# The common prefix for all source uris when hive partition detection is requested. The prefix must end
|
399
|
+
# immediately before the partition key encoding begins. For example, consider files following this data layout:
|
400
|
+
#
|
401
|
+
# ```
|
402
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
403
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
404
|
+
# ```
|
405
|
+
#
|
406
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
407
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
408
|
+
#
|
409
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
410
|
+
#
|
411
|
+
# @return [String, nil] The common prefix for all source uris, or `nil` if not set.
|
412
|
+
#
|
413
|
+
# @!group Attributes
|
414
|
+
#
|
415
|
+
def hive_partitioning_source_uri_prefix
|
416
|
+
@gapi.configuration.load.hive_partitioning_options.source_uri_prefix if hive_partitioning?
|
417
|
+
end
|
418
|
+
|
350
419
|
###
|
351
420
|
# Checks if the destination table will be range partitioned. See [Creating and using integer range partitioned
|
352
421
|
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
@@ -428,8 +497,9 @@ module Google
|
|
428
497
|
# The period for which the destination table will be time partitioned, if
|
429
498
|
# any. See [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
430
499
|
#
|
431
|
-
# @return [String, nil] The time partition type.
|
432
|
-
#
|
500
|
+
# @return [String, nil] The time partition type. The supported types are `DAY`,
|
501
|
+
# `HOUR`, `MONTH`, and `YEAR`, which will generate one partition per day,
|
502
|
+
# hour, month, and year, respectively; or `nil` if not present.
|
433
503
|
#
|
434
504
|
# @!group Attributes
|
435
505
|
#
|
@@ -1303,12 +1373,21 @@ module Google
|
|
1303
1373
|
# Sets the labels to use for the load job.
|
1304
1374
|
#
|
1305
1375
|
# @param [Hash] val A hash of user-provided labels associated with
|
1306
|
-
# the job. You can use these to organize and group your jobs.
|
1307
|
-
#
|
1308
|
-
#
|
1309
|
-
#
|
1310
|
-
#
|
1311
|
-
#
|
1376
|
+
# the job. You can use these to organize and group your jobs.
|
1377
|
+
#
|
1378
|
+
# The labels applied to a resource must meet the following requirements:
|
1379
|
+
#
|
1380
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
1381
|
+
# * Each label must be a key-value pair.
|
1382
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
1383
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
1384
|
+
# a maximum length of 63 characters.
|
1385
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
1386
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
1387
|
+
# international characters are allowed.
|
1388
|
+
# * The key portion of a label must be unique. However, you can use the
|
1389
|
+
# same key with multiple resources.
|
1390
|
+
# * Keys must start with a lowercase letter or international character.
|
1312
1391
|
#
|
1313
1392
|
# @!group Attributes
|
1314
1393
|
#
|
@@ -1316,6 +1395,89 @@ module Google
|
|
1316
1395
|
@gapi.configuration.update! labels: val
|
1317
1396
|
end
|
1318
1397
|
|
1398
|
+
##
|
1399
|
+
# Sets the mode of hive partitioning to use when reading data. The following modes are supported:
|
1400
|
+
#
|
1401
|
+
# 1. `auto`: automatically infer partition key name(s) and type(s).
|
1402
|
+
# 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
|
1403
|
+
# 3. `custom`: partition key schema is encoded in the source URI prefix.
|
1404
|
+
#
|
1405
|
+
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
1406
|
+
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
1407
|
+
#
|
1408
|
+
# See {#format=} and {#hive_partitioning_source_uri_prefix=}.
|
1409
|
+
#
|
1410
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
1411
|
+
#
|
1412
|
+
# @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
|
1413
|
+
#
|
1414
|
+
# @example
|
1415
|
+
# require "google/cloud/bigquery"
|
1416
|
+
#
|
1417
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1418
|
+
# dataset = bigquery.dataset "my_dataset"
|
1419
|
+
#
|
1420
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
1421
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
1422
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1423
|
+
# job.format = :parquet
|
1424
|
+
# job.hive_partitioning_mode = :auto
|
1425
|
+
# job.hive_partitioning_source_uri_prefix = source_uri_prefix
|
1426
|
+
# end
|
1427
|
+
#
|
1428
|
+
# load_job.wait_until_done!
|
1429
|
+
# load_job.done? #=> true
|
1430
|
+
#
|
1431
|
+
# @!group Attributes
|
1432
|
+
#
|
1433
|
+
def hive_partitioning_mode= mode
|
1434
|
+
@gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
1435
|
+
@gapi.configuration.load.hive_partitioning_options.mode = mode.to_s.upcase
|
1436
|
+
end
|
1437
|
+
|
1438
|
+
##
|
1439
|
+
# Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
|
1440
|
+
# immediately before the partition key encoding begins. For example, consider files following this data
|
1441
|
+
# layout:
|
1442
|
+
#
|
1443
|
+
# ```
|
1444
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
1445
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
1446
|
+
# ```
|
1447
|
+
#
|
1448
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
1449
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
1450
|
+
#
|
1451
|
+
# See {#hive_partitioning_mode=}.
|
1452
|
+
#
|
1453
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
1454
|
+
#
|
1455
|
+
# @param [String] source_uri_prefix The common prefix for all source uris.
|
1456
|
+
#
|
1457
|
+
# @example
|
1458
|
+
# require "google/cloud/bigquery"
|
1459
|
+
#
|
1460
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1461
|
+
# dataset = bigquery.dataset "my_dataset"
|
1462
|
+
#
|
1463
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
1464
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
1465
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1466
|
+
# job.format = :parquet
|
1467
|
+
# job.hive_partitioning_mode = :auto
|
1468
|
+
# job.hive_partitioning_source_uri_prefix = source_uri_prefix
|
1469
|
+
# end
|
1470
|
+
#
|
1471
|
+
# load_job.wait_until_done!
|
1472
|
+
# load_job.done? #=> true
|
1473
|
+
#
|
1474
|
+
# @!group Attributes
|
1475
|
+
#
|
1476
|
+
def hive_partitioning_source_uri_prefix= source_uri_prefix
|
1477
|
+
@gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
1478
|
+
@gapi.configuration.load.hive_partitioning_options.source_uri_prefix = source_uri_prefix
|
1479
|
+
end
|
1480
|
+
|
1319
1481
|
##
|
1320
1482
|
# Sets the field on which to range partition the table. See [Creating and using integer range partitioned
|
1321
1483
|
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
@@ -1335,8 +1497,8 @@ module Google
|
|
1335
1497
|
# bigquery = Google::Cloud::Bigquery.new
|
1336
1498
|
# dataset = bigquery.dataset "my_dataset"
|
1337
1499
|
#
|
1338
|
-
#
|
1339
|
-
# load_job = dataset.load_job "my_new_table",
|
1500
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1501
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1340
1502
|
# job.schema do |schema|
|
1341
1503
|
# schema.integer "my_table_id", mode: :required
|
1342
1504
|
# schema.string "my_table_data", mode: :required
|
@@ -1376,8 +1538,8 @@ module Google
|
|
1376
1538
|
# bigquery = Google::Cloud::Bigquery.new
|
1377
1539
|
# dataset = bigquery.dataset "my_dataset"
|
1378
1540
|
#
|
1379
|
-
#
|
1380
|
-
# load_job = dataset.load_job "my_new_table",
|
1541
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1542
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1381
1543
|
# job.schema do |schema|
|
1382
1544
|
# schema.integer "my_table_id", mode: :required
|
1383
1545
|
# schema.string "my_table_data", mode: :required
|
@@ -1417,8 +1579,8 @@ module Google
|
|
1417
1579
|
# bigquery = Google::Cloud::Bigquery.new
|
1418
1580
|
# dataset = bigquery.dataset "my_dataset"
|
1419
1581
|
#
|
1420
|
-
#
|
1421
|
-
# load_job = dataset.load_job "my_new_table",
|
1582
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1583
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1422
1584
|
# job.schema do |schema|
|
1423
1585
|
# schema.integer "my_table_id", mode: :required
|
1424
1586
|
# schema.string "my_table_data", mode: :required
|
@@ -1458,8 +1620,8 @@ module Google
|
|
1458
1620
|
# bigquery = Google::Cloud::Bigquery.new
|
1459
1621
|
# dataset = bigquery.dataset "my_dataset"
|
1460
1622
|
#
|
1461
|
-
#
|
1462
|
-
# load_job = dataset.load_job "my_new_table",
|
1623
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1624
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1463
1625
|
# job.schema do |schema|
|
1464
1626
|
# schema.integer "my_table_id", mode: :required
|
1465
1627
|
# schema.string "my_table_data", mode: :required
|
@@ -1490,8 +1652,9 @@ module Google
|
|
1490
1652
|
# BigQuery does not allow you to change partitioning on an existing
|
1491
1653
|
# table.
|
1492
1654
|
#
|
1493
|
-
# @param [String] type The time partition type.
|
1494
|
-
#
|
1655
|
+
# @param [String] type The time partition type. The supported types are `DAY`,
|
1656
|
+
# `HOUR`, `MONTH`, and `YEAR`, which will generate one partition per day,
|
1657
|
+
# hour, month, and year, respectively.
|
1495
1658
|
#
|
1496
1659
|
# @example
|
1497
1660
|
# require "google/cloud/bigquery"
|
@@ -1499,8 +1662,8 @@ module Google
|
|
1499
1662
|
# bigquery = Google::Cloud::Bigquery.new
|
1500
1663
|
# dataset = bigquery.dataset "my_dataset"
|
1501
1664
|
#
|
1502
|
-
#
|
1503
|
-
# load_job = dataset.load_job "my_new_table",
|
1665
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1666
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1504
1667
|
# job.time_partitioning_type = "DAY"
|
1505
1668
|
# end
|
1506
1669
|
#
|
@@ -1538,8 +1701,8 @@ module Google
|
|
1538
1701
|
# bigquery = Google::Cloud::Bigquery.new
|
1539
1702
|
# dataset = bigquery.dataset "my_dataset"
|
1540
1703
|
#
|
1541
|
-
#
|
1542
|
-
# load_job = dataset.load_job "my_new_table",
|
1704
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1705
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1543
1706
|
# job.time_partitioning_type = "DAY"
|
1544
1707
|
# job.time_partitioning_field = "dob"
|
1545
1708
|
# job.schema do |schema|
|
@@ -1574,8 +1737,8 @@ module Google
|
|
1574
1737
|
# bigquery = Google::Cloud::Bigquery.new
|
1575
1738
|
# dataset = bigquery.dataset "my_dataset"
|
1576
1739
|
#
|
1577
|
-
#
|
1578
|
-
# load_job = dataset.load_job "my_new_table",
|
1740
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1741
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1579
1742
|
# job.time_partitioning_type = "DAY"
|
1580
1743
|
# job.time_partitioning_expiration = 86_400
|
1581
1744
|
# end
|
@@ -1634,8 +1797,8 @@ module Google
|
|
1634
1797
|
# bigquery = Google::Cloud::Bigquery.new
|
1635
1798
|
# dataset = bigquery.dataset "my_dataset"
|
1636
1799
|
#
|
1637
|
-
#
|
1638
|
-
# load_job = dataset.load_job "my_new_table",
|
1800
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1801
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1639
1802
|
# job.time_partitioning_type = "DAY"
|
1640
1803
|
# job.time_partitioning_field = "dob"
|
1641
1804
|
# job.schema do |schema|
|
@@ -341,14 +341,19 @@ module Google
|
|
341
341
|
# the update to comply with ETag-based optimistic concurrency control.
|
342
342
|
#
|
343
343
|
# @param [Hash<String, String>] new_labels A hash containing key/value
|
344
|
-
# pairs.
|
345
|
-
#
|
346
|
-
# *
|
347
|
-
# *
|
348
|
-
#
|
349
|
-
#
|
350
|
-
#
|
351
|
-
# *
|
344
|
+
# pairs. The labels applied to a resource must meet the following requirements:
|
345
|
+
#
|
346
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
347
|
+
# * Each label must be a key-value pair.
|
348
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
349
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
350
|
+
# a maximum length of 63 characters.
|
351
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
352
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
353
|
+
# international characters are allowed.
|
354
|
+
# * The key portion of a label must be unique. However, you can use the
|
355
|
+
# same key with multiple resources.
|
356
|
+
# * Keys must start with a lowercase letter or international character.
|
352
357
|
#
|
353
358
|
# @example
|
354
359
|
# require "google/cloud/bigquery"
|
@@ -482,6 +487,146 @@ module Google
|
|
482
487
|
Array @gapi_json[:trainingRuns]
|
483
488
|
end
|
484
489
|
|
490
|
+
##
|
491
|
+
# Exports the model to Google Cloud Storage asynchronously, immediately
|
492
|
+
# returning an {ExtractJob} that can be used to track the progress of the
|
493
|
+
# export job. The caller may poll the service by repeatedly calling
|
494
|
+
# {Job#reload!} and {Job#done?} to detect when the job is done, or
|
495
|
+
# simply block until the job is done by calling #{Job#wait_until_done!}.
|
496
|
+
# See also {#extract}.
|
497
|
+
#
|
498
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
499
|
+
# {ExtractJob::Updater#location=} in a block passed to this method. If
|
500
|
+
# the model is a full resource representation (see {#resource_full?}),
|
501
|
+
# the location of the job will automatically be set to the location of
|
502
|
+
# the model.
|
503
|
+
#
|
504
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
505
|
+
# Exporting models
|
506
|
+
#
|
507
|
+
# @param [String] extract_url The Google Storage URI to which BigQuery
|
508
|
+
# should extract the model. This value should be end in an object name
|
509
|
+
# prefix, since multiple objects will be exported.
|
510
|
+
# @param [String] format The exported file format. The default value is
|
511
|
+
# `ml_tf_saved_model`.
|
512
|
+
#
|
513
|
+
# The following values are supported:
|
514
|
+
#
|
515
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
516
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
517
|
+
# @param [String] job_id A user-defined ID for the extract job. The ID
|
518
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
519
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
520
|
+
# `job_id` is provided, then `prefix` will not be used.
|
521
|
+
#
|
522
|
+
# See [Generating a job
|
523
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
524
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
525
|
+
# prepended to a generated value to produce a unique job ID. For
|
526
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
527
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
528
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
529
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
530
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
531
|
+
# be used.
|
532
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
533
|
+
# the job. You can use these to organize and group your jobs.
|
534
|
+
#
|
535
|
+
# The labels applied to a resource must meet the following requirements:
|
536
|
+
#
|
537
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
538
|
+
# * Each label must be a key-value pair.
|
539
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
540
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
541
|
+
# a maximum length of 63 characters.
|
542
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
543
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
544
|
+
# international characters are allowed.
|
545
|
+
# * The key portion of a label must be unique. However, you can use the
|
546
|
+
# same key with multiple resources.
|
547
|
+
# * Keys must start with a lowercase letter or international character.
|
548
|
+
#
|
549
|
+
# @yield [job] a job configuration object
|
550
|
+
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
551
|
+
# configuration object for setting additional options.
|
552
|
+
#
|
553
|
+
# @return [Google::Cloud::Bigquery::ExtractJob]
|
554
|
+
#
|
555
|
+
# @example
|
556
|
+
# require "google/cloud/bigquery"
|
557
|
+
#
|
558
|
+
# bigquery = Google::Cloud::Bigquery.new
|
559
|
+
# dataset = bigquery.dataset "my_dataset"
|
560
|
+
# model = dataset.model "my_model"
|
561
|
+
#
|
562
|
+
# extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
|
563
|
+
#
|
564
|
+
# extract_job.wait_until_done!
|
565
|
+
# extract_job.done? #=> true
|
566
|
+
#
|
567
|
+
# @!group Data
|
568
|
+
#
|
569
|
+
def extract_job extract_url, format: nil, job_id: nil, prefix: nil, labels: nil
|
570
|
+
ensure_service!
|
571
|
+
options = { format: format, job_id: job_id, prefix: prefix, labels: labels }
|
572
|
+
updater = ExtractJob::Updater.from_options service, model_ref, extract_url, options
|
573
|
+
updater.location = location if location # may be model reference
|
574
|
+
|
575
|
+
yield updater if block_given?
|
576
|
+
|
577
|
+
job_gapi = updater.to_gapi
|
578
|
+
gapi = service.extract_table job_gapi
|
579
|
+
Job.from_gapi gapi, service
|
580
|
+
end
|
581
|
+
|
582
|
+
##
|
583
|
+
# Exports the model to Google Cloud Storage using a synchronous method
|
584
|
+
# that blocks for a response. Timeouts and transient errors are generally
|
585
|
+
# handled as needed to complete the job. See also {#extract_job}.
|
586
|
+
#
|
587
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
588
|
+
# {ExtractJob::Updater#location=} in a block passed to this method. If
|
589
|
+
# the model is a full resource representation (see {#resource_full?}),
|
590
|
+
# the location of the job will automatically be set to the location of
|
591
|
+
# the model.
|
592
|
+
#
|
593
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
594
|
+
# Exporting models
|
595
|
+
#
|
596
|
+
# @param [String] extract_url The Google Storage URI to which BigQuery
|
597
|
+
# should extract the model. This value should be end in an object name
|
598
|
+
# prefix, since multiple objects will be exported.
|
599
|
+
# @param [String] format The exported file format. The default value is
|
600
|
+
# `ml_tf_saved_model`.
|
601
|
+
#
|
602
|
+
# The following values are supported:
|
603
|
+
#
|
604
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
605
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
606
|
+
# @yield [job] a job configuration object
|
607
|
+
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
608
|
+
# configuration object for setting additional options.
|
609
|
+
#
|
610
|
+
# @return [Boolean] Returns `true` if the extract operation succeeded.
|
611
|
+
#
|
612
|
+
# @example
|
613
|
+
# require "google/cloud/bigquery"
|
614
|
+
#
|
615
|
+
# bigquery = Google::Cloud::Bigquery.new
|
616
|
+
# dataset = bigquery.dataset "my_dataset"
|
617
|
+
# model = dataset.model "my_model"
|
618
|
+
#
|
619
|
+
# model.extract "gs://my-bucket/#{model.model_id}"
|
620
|
+
#
|
621
|
+
# @!group Data
|
622
|
+
#
|
623
|
+
def extract extract_url, format: nil, &block
|
624
|
+
job = extract_job extract_url, format: format, &block
|
625
|
+
job.wait_until_done!
|
626
|
+
ensure_job_succeeded! job
|
627
|
+
true
|
628
|
+
end
|
629
|
+
|
485
630
|
##
|
486
631
|
# Permanently deletes the model.
|
487
632
|
#
|
@@ -734,6 +879,17 @@ module Google
|
|
734
879
|
def ensure_full_data!
|
735
880
|
reload! unless resource_full?
|
736
881
|
end
|
882
|
+
|
883
|
+
def ensure_job_succeeded! job
|
884
|
+
return unless job.failed?
|
885
|
+
begin
|
886
|
+
# raise to activate ruby exception cause handling
|
887
|
+
raise job.gapi_error
|
888
|
+
rescue StandardError => e
|
889
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
890
|
+
raise Google::Cloud::Error.from_error(e)
|
891
|
+
end
|
892
|
+
end
|
737
893
|
end
|
738
894
|
end
|
739
895
|
end
|