google-cloud-bigquery 1.14.0 → 1.42.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/AUTHENTICATION.md +17 -54
- data/CHANGELOG.md +377 -0
- data/CONTRIBUTING.md +328 -116
- data/LOGGING.md +1 -1
- data/OVERVIEW.md +21 -20
- data/TROUBLESHOOTING.md +2 -8
- data/lib/google/cloud/bigquery/argument.rb +197 -0
- data/lib/google/cloud/bigquery/convert.rb +155 -173
- data/lib/google/cloud/bigquery/copy_job.rb +74 -26
- data/lib/google/cloud/bigquery/credentials.rb +5 -12
- data/lib/google/cloud/bigquery/data.rb +109 -18
- data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
- data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
- data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
- data/lib/google/cloud/bigquery/dataset.rb +1044 -287
- data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
- data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
- data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
- data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
- data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
- data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
- data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
- data/lib/google/cloud/bigquery/external.rb +50 -2256
- data/lib/google/cloud/bigquery/extract_job.rb +226 -61
- data/lib/google/cloud/bigquery/insert_response.rb +1 -3
- data/lib/google/cloud/bigquery/job/list.rb +10 -14
- data/lib/google/cloud/bigquery/job.rb +289 -14
- data/lib/google/cloud/bigquery/load_job.rb +810 -136
- data/lib/google/cloud/bigquery/model/list.rb +5 -9
- data/lib/google/cloud/bigquery/model.rb +247 -16
- data/lib/google/cloud/bigquery/policy.rb +432 -0
- data/lib/google/cloud/bigquery/project/list.rb +6 -11
- data/lib/google/cloud/bigquery/project.rb +509 -250
- data/lib/google/cloud/bigquery/query_job.rb +594 -128
- data/lib/google/cloud/bigquery/routine/list.rb +165 -0
- data/lib/google/cloud/bigquery/routine.rb +1227 -0
- data/lib/google/cloud/bigquery/schema/field.rb +413 -63
- data/lib/google/cloud/bigquery/schema.rb +221 -48
- data/lib/google/cloud/bigquery/service.rb +204 -112
- data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
- data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
- data/lib/google/cloud/bigquery/table/list.rb +6 -11
- data/lib/google/cloud/bigquery/table.rb +1470 -377
- data/lib/google/cloud/bigquery/time.rb +6 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery.rb +4 -6
- data/lib/google-cloud-bigquery.rb +14 -13
- metadata +66 -38
@@ -56,7 +56,8 @@ module Google
|
|
56
56
|
# @private The Service object.
|
57
57
|
attr_accessor :service
|
58
58
|
|
59
|
-
attr_reader :name
|
59
|
+
attr_reader :name
|
60
|
+
attr_reader :numeric_id
|
60
61
|
|
61
62
|
##
|
62
63
|
# Creates a new Service instance.
|
@@ -91,8 +92,7 @@ module Google
|
|
91
92
|
# @return [String] The service account email address.
|
92
93
|
#
|
93
94
|
def service_account_email
|
94
|
-
@service_account_email ||=
|
95
|
-
service.project_service_account.email
|
95
|
+
@service_account_email ||= service.project_service_account.email
|
96
96
|
end
|
97
97
|
|
98
98
|
##
|
@@ -139,8 +139,8 @@ module Google
|
|
139
139
|
# * `empty` - An error will be returned if the destination table
|
140
140
|
# already contains data.
|
141
141
|
# @param [String] job_id A user-defined ID for the copy job. The ID
|
142
|
-
# must contain only letters (
|
143
|
-
# (_), or dashes (
|
142
|
+
# must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
|
143
|
+
# (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
|
144
144
|
# `job_id` is provided, then `prefix` will not be used.
|
145
145
|
#
|
146
146
|
# See [Generating a job
|
@@ -149,18 +149,26 @@ module Google
|
|
149
149
|
# prepended to a generated value to produce a unique job ID. For
|
150
150
|
# example, the prefix `daily_import_job_` can be given to generate a
|
151
151
|
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
152
|
-
# prefix must contain only letters (
|
153
|
-
# underscores (_), or dashes (
|
152
|
+
# prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
|
153
|
+
# underscores (`_`), or dashes (`-`). The maximum length of the entire ID
|
154
154
|
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
155
155
|
# be used.
|
156
156
|
# @param [Hash] labels A hash of user-provided labels associated with
|
157
|
-
# the job. You can use these to organize and group your jobs.
|
158
|
-
#
|
159
|
-
#
|
160
|
-
#
|
161
|
-
#
|
162
|
-
#
|
163
|
-
#
|
157
|
+
# the job. You can use these to organize and group your jobs.
|
158
|
+
#
|
159
|
+
# The labels applied to a resource must meet the following requirements:
|
160
|
+
#
|
161
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
162
|
+
# * Each label must be a key-value pair.
|
163
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
164
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
165
|
+
# a maximum length of 63 characters.
|
166
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
167
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
168
|
+
# international characters are allowed.
|
169
|
+
# * The key portion of a label must be unique. However, you can use the
|
170
|
+
# same key with multiple resources.
|
171
|
+
# * Keys must start with a lowercase letter or international character.
|
164
172
|
# @yield [job] a job configuration object
|
165
173
|
# @yieldparam [Google::Cloud::Bigquery::CopyJob::Updater] job a job
|
166
174
|
# configuration object for setting additional options.
|
@@ -182,11 +190,9 @@ module Google
|
|
182
190
|
#
|
183
191
|
# @!group Data
|
184
192
|
#
|
185
|
-
def copy_job source_table, destination_table, create: nil, write: nil,
|
186
|
-
job_id: nil, prefix: nil, labels: nil
|
193
|
+
def copy_job source_table, destination_table, create: nil, write: nil, job_id: nil, prefix: nil, labels: nil
|
187
194
|
ensure_service!
|
188
|
-
options = { create: create, write: write, labels: labels,
|
189
|
-
job_id: job_id, prefix: prefix }
|
195
|
+
options = { create: create, write: write, labels: labels, job_id: job_id, prefix: prefix }
|
190
196
|
|
191
197
|
updater = CopyJob::Updater.from_options(
|
192
198
|
service,
|
@@ -261,13 +267,8 @@ module Google
|
|
261
267
|
#
|
262
268
|
# @!group Data
|
263
269
|
#
|
264
|
-
def copy source_table, destination_table, create: nil, write: nil,
|
265
|
-
|
266
|
-
job = copy_job source_table,
|
267
|
-
destination_table,
|
268
|
-
create: create,
|
269
|
-
write: write,
|
270
|
-
&block
|
270
|
+
def copy source_table, destination_table, create: nil, write: nil, &block
|
271
|
+
job = copy_job source_table, destination_table, create: create, write: write, &block
|
271
272
|
job.wait_until_done!
|
272
273
|
ensure_job_succeeded! job
|
273
274
|
true
|
@@ -277,27 +278,6 @@ module Google
|
|
277
278
|
# Queries data by creating a [query
|
278
279
|
# job](https://cloud.google.com/bigquery/docs/query-overview#query_jobs).
|
279
280
|
#
|
280
|
-
# When using standard SQL and passing arguments using `params`, Ruby
|
281
|
-
# types are mapped to BigQuery types as follows:
|
282
|
-
#
|
283
|
-
# | BigQuery | Ruby | Notes |
|
284
|
-
# |-------------|----------------|---|
|
285
|
-
# | `BOOL` | `true`/`false` | |
|
286
|
-
# | `INT64` | `Integer` | |
|
287
|
-
# | `FLOAT64` | `Float` | |
|
288
|
-
# | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
|
289
|
-
# | `STRING` | `String` | |
|
290
|
-
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
291
|
-
# | `DATE` | `Date` | |
|
292
|
-
# | `TIMESTAMP` | `Time` | |
|
293
|
-
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
294
|
-
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
295
|
-
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
296
|
-
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
297
|
-
#
|
298
|
-
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
|
299
|
-
# for an overview of each BigQuery data type, including allowed values.
|
300
|
-
#
|
301
281
|
# The geographic location for the job ("US", "EU", etc.) can be set via
|
302
282
|
# {QueryJob::Updater#location=} in a block passed to this method.
|
303
283
|
#
|
@@ -305,13 +285,60 @@ module Google
|
|
305
285
|
# syntax](https://cloud.google.com/bigquery/query-reference), of the
|
306
286
|
# query to execute. Example: "SELECT count(f1) FROM
|
307
287
|
# [myProjectId:myDatasetId.myTableId]".
|
308
|
-
# @param [Array, Hash] params Standard SQL only. Used to pass query
|
309
|
-
#
|
310
|
-
#
|
311
|
-
#
|
312
|
-
#
|
313
|
-
#
|
314
|
-
#
|
288
|
+
# @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
|
289
|
+
# either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
|
290
|
+
# query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
|
291
|
+
# use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
|
292
|
+
# true.
|
293
|
+
#
|
294
|
+
# BigQuery types are converted from Ruby types as follows:
|
295
|
+
#
|
296
|
+
# | BigQuery | Ruby | Notes |
|
297
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
298
|
+
# | `BOOL` | `true`/`false` | |
|
299
|
+
# | `INT64` | `Integer` | |
|
300
|
+
# | `FLOAT64` | `Float` | |
|
301
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
302
|
+
# | `BIGNUMERIC` | `BigDecimal` | NOT AUTOMATIC: Must be mapped using `types`, below.|
|
303
|
+
# | `STRING` | `String` | |
|
304
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
305
|
+
# | `DATE` | `Date` | |
|
306
|
+
# | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
|
307
|
+
# | `TIMESTAMP` | `Time` | |
|
308
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
309
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
310
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
311
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
312
|
+
#
|
313
|
+
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
|
314
|
+
# of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
|
315
|
+
# GIS data](https://cloud.google.com/bigquery/docs/gis-data).
|
316
|
+
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
317
|
+
# possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
318
|
+
# specify the SQL type for these values.
|
319
|
+
#
|
320
|
+
# Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
|
321
|
+
# positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
|
322
|
+
# should be BigQuery type codes from the following list:
|
323
|
+
#
|
324
|
+
# * `:BOOL`
|
325
|
+
# * `:INT64`
|
326
|
+
# * `:FLOAT64`
|
327
|
+
# * `:NUMERIC`
|
328
|
+
# * `:BIGNUMERIC`
|
329
|
+
# * `:STRING`
|
330
|
+
# * `:DATETIME`
|
331
|
+
# * `:DATE`
|
332
|
+
# * `:GEOGRAPHY`
|
333
|
+
# * `:TIMESTAMP`
|
334
|
+
# * `:TIME`
|
335
|
+
# * `:BYTES`
|
336
|
+
# * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
|
337
|
+
# are specified as `[:INT64]`.
|
338
|
+
# * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
|
339
|
+
# match the `params` hash, and the values are the types value that matches the data.
|
340
|
+
#
|
341
|
+
# Types are optional.
|
315
342
|
# @param [Hash<String|Symbol, External::DataSource>] external A Hash
|
316
343
|
# that represents the mapping of the external tables to the table
|
317
344
|
# names used in the SQL query. The hash keys are the table names, and
|
@@ -375,13 +402,19 @@ module Google
|
|
375
402
|
# Flattens all nested and repeated fields in the query results. The
|
376
403
|
# default value is `true`. `large_results` parameter must be `true` if
|
377
404
|
# this is set to `false`.
|
405
|
+
# @param [Integer] maximum_billing_tier Limits the billing tier for this
|
406
|
+
# job. Queries that have resource usage beyond this tier will fail
|
407
|
+
# (without incurring a charge). WARNING: The billed byte amount can be
|
408
|
+
# multiplied by an amount up to this number! Most users should not need
|
409
|
+
# to alter this setting, and we recommend that you avoid introducing new
|
410
|
+
# uses of it. Deprecated.
|
378
411
|
# @param [Integer] maximum_bytes_billed Limits the bytes billed for this
|
379
412
|
# job. Queries that will have bytes billed beyond this limit will fail
|
380
413
|
# (without incurring a charge). Optional. If unspecified, this will be
|
381
414
|
# set to your project default.
|
382
415
|
# @param [String] job_id A user-defined ID for the query job. The ID
|
383
|
-
# must contain only letters (
|
384
|
-
# (_), or dashes (
|
416
|
+
# must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
|
417
|
+
# (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
|
385
418
|
# `job_id` is provided, then `prefix` will not be used.
|
386
419
|
#
|
387
420
|
# See [Generating a job
|
@@ -390,30 +423,51 @@ module Google
|
|
390
423
|
# prepended to a generated value to produce a unique job ID. For
|
391
424
|
# example, the prefix `daily_import_job_` can be given to generate a
|
392
425
|
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
393
|
-
# prefix must contain only letters (
|
394
|
-
# underscores (_), or dashes (
|
426
|
+
# prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
|
427
|
+
# underscores (`_`), or dashes (`-`). The maximum length of the entire ID
|
395
428
|
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
396
429
|
# be used.
|
397
430
|
#
|
398
431
|
# See [Generating a job
|
399
432
|
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
400
433
|
# @param [Hash] labels A hash of user-provided labels associated with
|
401
|
-
# the job. You can use these to organize and group your jobs.
|
402
|
-
#
|
403
|
-
#
|
404
|
-
#
|
405
|
-
#
|
406
|
-
#
|
407
|
-
#
|
434
|
+
# the job. You can use these to organize and group your jobs.
|
435
|
+
#
|
436
|
+
# The labels applied to a resource must meet the following requirements:
|
437
|
+
#
|
438
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
439
|
+
# * Each label must be a key-value pair.
|
440
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
441
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
442
|
+
# a maximum length of 63 characters.
|
443
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
444
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
445
|
+
# international characters are allowed.
|
446
|
+
# * The key portion of a label must be unique. However, you can use the
|
447
|
+
# same key with multiple resources.
|
448
|
+
# * Keys must start with a lowercase letter or international character.
|
408
449
|
# @param [Array<String>, String] udfs User-defined function resources
|
409
|
-
# used in
|
410
|
-
# Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
450
|
+
# used in a legacy SQL query. May be either a code resource to load from
|
451
|
+
# a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
411
452
|
# that contains code for a user-defined function (UDF). Providing an
|
412
453
|
# inline code resource is equivalent to providing a URI for a file
|
413
|
-
# containing the same code.
|
414
|
-
#
|
415
|
-
#
|
416
|
-
#
|
454
|
+
# containing the same code.
|
455
|
+
#
|
456
|
+
# This parameter is used for defining User Defined Function (UDF)
|
457
|
+
# resources only when using legacy SQL. Users of standard SQL should
|
458
|
+
# leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
|
459
|
+
# Routines API to define UDF resources.
|
460
|
+
#
|
461
|
+
# For additional information on migrating, see: [Migrating to
|
462
|
+
# standard SQL - Differences in user-defined JavaScript
|
463
|
+
# functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
|
464
|
+
# @param [Boolean] create_session If true, creates a new session, where the
|
465
|
+
# session ID will be a server generated random id. If false, runs query
|
466
|
+
# with an existing session ID when one is provided in the `session_id`
|
467
|
+
# param, otherwise runs query in non-session mode. See {Job#session_id}.
|
468
|
+
# The default value is false.
|
469
|
+
# @param [String] session_id The ID of an existing session. See also the
|
470
|
+
# `create_session` param and {Job#session_id}.
|
417
471
|
# @yield [job] a job configuration object
|
418
472
|
# @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
|
419
473
|
# configuration object for setting query options.
|
@@ -425,8 +479,7 @@ module Google
|
|
425
479
|
#
|
426
480
|
# bigquery = Google::Cloud::Bigquery.new
|
427
481
|
#
|
428
|
-
# job = bigquery.query_job "SELECT name FROM "
|
429
|
-
# "`my_project.my_dataset.my_table`"
|
482
|
+
# job = bigquery.query_job "SELECT name FROM `my_project.my_dataset.my_table`"
|
430
483
|
#
|
431
484
|
# job.wait_until_done!
|
432
485
|
# if !job.failed?
|
@@ -440,8 +493,7 @@ module Google
|
|
440
493
|
#
|
441
494
|
# bigquery = Google::Cloud::Bigquery.new
|
442
495
|
#
|
443
|
-
# job = bigquery.query_job "SELECT name FROM "
|
444
|
-
# " [my_project:my_dataset.my_table]",
|
496
|
+
# job = bigquery.query_job "SELECT name FROM [my_project:my_dataset.my_table]",
|
445
497
|
# legacy_sql: true
|
446
498
|
#
|
447
499
|
# job.wait_until_done!
|
@@ -456,9 +508,7 @@ module Google
|
|
456
508
|
#
|
457
509
|
# bigquery = Google::Cloud::Bigquery.new
|
458
510
|
#
|
459
|
-
# job = bigquery.query_job "SELECT name FROM "
|
460
|
-
# "`my_dataset.my_table`" \
|
461
|
-
# " WHERE id = ?",
|
511
|
+
# job = bigquery.query_job "SELECT name FROM `my_dataset.my_table` WHERE id = ?",
|
462
512
|
# params: [1]
|
463
513
|
#
|
464
514
|
# job.wait_until_done!
|
@@ -473,9 +523,7 @@ module Google
|
|
473
523
|
#
|
474
524
|
# bigquery = Google::Cloud::Bigquery.new
|
475
525
|
#
|
476
|
-
# job = bigquery.query_job "SELECT name FROM "
|
477
|
-
# "`my_dataset.my_table`" \
|
478
|
-
# " WHERE id = @id",
|
526
|
+
# job = bigquery.query_job "SELECT name FROM `my_dataset.my_table` WHERE id = @id",
|
479
527
|
# params: { id: 1 }
|
480
528
|
#
|
481
529
|
# job.wait_until_done!
|
@@ -485,18 +533,32 @@ module Google
|
|
485
533
|
# end
|
486
534
|
# end
|
487
535
|
#
|
536
|
+
# @example Query using named query parameters with types:
|
537
|
+
# require "google/cloud/bigquery"
|
538
|
+
#
|
539
|
+
# bigquery = Google::Cloud::Bigquery.new
|
540
|
+
#
|
541
|
+
# job = bigquery.query_job "SELECT name FROM `my_dataset.my_table` WHERE id IN UNNEST(@ids)",
|
542
|
+
# params: { ids: [] },
|
543
|
+
# types: { ids: [:INT64] }
|
544
|
+
#
|
545
|
+
# job.wait_until_done!
|
546
|
+
# if !job.failed?
|
547
|
+
# job.data.each do |row|
|
548
|
+
# puts row[:name]
|
549
|
+
# end
|
550
|
+
# end
|
551
|
+
#
|
488
552
|
# @example Execute a DDL statement:
|
489
553
|
# require "google/cloud/bigquery"
|
490
554
|
#
|
491
555
|
# bigquery = Google::Cloud::Bigquery.new
|
492
556
|
#
|
493
|
-
# job = bigquery.query_job "CREATE TABLE "
|
494
|
-
# "`my_dataset.my_table` " \
|
495
|
-
# "(x INT64)"
|
557
|
+
# job = bigquery.query_job "CREATE TABLE`my_dataset.my_table` (x INT64)"
|
496
558
|
#
|
497
559
|
# job.wait_until_done!
|
498
560
|
# if !job.failed?
|
499
|
-
# table_ref = job.ddl_target_table
|
561
|
+
# table_ref = job.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
|
500
562
|
# end
|
501
563
|
#
|
502
564
|
# @example Execute a DML statement:
|
@@ -504,10 +566,7 @@ module Google
|
|
504
566
|
#
|
505
567
|
# bigquery = Google::Cloud::Bigquery.new
|
506
568
|
#
|
507
|
-
# job = bigquery.query_job "UPDATE "
|
508
|
-
# "`my_dataset.my_table` " \
|
509
|
-
# "SET x = x + 1 " \
|
510
|
-
# "WHERE x IS NOT NULL"
|
569
|
+
# job = bigquery.query_job "UPDATE `my_dataset.my_table` SET x = x + 1 WHERE x IS NOT NULL"
|
511
570
|
#
|
512
571
|
# job.wait_until_done!
|
513
572
|
# if !job.failed?
|
@@ -538,23 +597,56 @@ module Google
|
|
538
597
|
# end
|
539
598
|
# end
|
540
599
|
#
|
541
|
-
def query_job query,
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
600
|
+
def query_job query,
|
601
|
+
params: nil,
|
602
|
+
types: nil,
|
603
|
+
external: nil,
|
604
|
+
priority: "INTERACTIVE",
|
605
|
+
cache: true,
|
606
|
+
table: nil,
|
607
|
+
create: nil,
|
608
|
+
write: nil,
|
609
|
+
dryrun: nil,
|
610
|
+
dataset: nil,
|
611
|
+
project: nil,
|
612
|
+
standard_sql: nil,
|
613
|
+
legacy_sql: nil,
|
614
|
+
large_results: nil,
|
615
|
+
flatten: nil,
|
616
|
+
maximum_billing_tier: nil,
|
617
|
+
maximum_bytes_billed: nil,
|
618
|
+
job_id: nil,
|
619
|
+
prefix: nil,
|
620
|
+
labels: nil,
|
621
|
+
udfs: nil,
|
622
|
+
create_session: nil,
|
623
|
+
session_id: nil
|
548
624
|
ensure_service!
|
549
|
-
options = {
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
625
|
+
options = {
|
626
|
+
params: params,
|
627
|
+
types: types,
|
628
|
+
external: external,
|
629
|
+
priority: priority,
|
630
|
+
cache: cache,
|
631
|
+
table: table,
|
632
|
+
create: create,
|
633
|
+
write: write,
|
634
|
+
dryrun: dryrun,
|
635
|
+
dataset: dataset,
|
636
|
+
project: (project || self.project),
|
637
|
+
standard_sql: standard_sql,
|
638
|
+
legacy_sql: legacy_sql,
|
639
|
+
large_results: large_results,
|
640
|
+
flatten: flatten,
|
641
|
+
maximum_billing_tier: maximum_billing_tier,
|
642
|
+
maximum_bytes_billed: maximum_bytes_billed,
|
643
|
+
job_id: job_id,
|
644
|
+
prefix: prefix,
|
645
|
+
labels: labels,
|
646
|
+
udfs: udfs,
|
647
|
+
create_session: create_session,
|
648
|
+
session_id: session_id
|
649
|
+
}
|
558
650
|
|
559
651
|
updater = QueryJob::Updater.from_options service, query, options
|
560
652
|
|
@@ -571,27 +663,6 @@ module Google
|
|
571
663
|
# as needed to complete the query. When used for executing DDL/DML
|
572
664
|
# statements, this method does not return row data.
|
573
665
|
#
|
574
|
-
# When using standard SQL and passing arguments using `params`, Ruby
|
575
|
-
# types are mapped to BigQuery types as follows:
|
576
|
-
#
|
577
|
-
# | BigQuery | Ruby | Notes |
|
578
|
-
# |-------------|----------------|---|
|
579
|
-
# | `BOOL` | `true`/`false` | |
|
580
|
-
# | `INT64` | `Integer` | |
|
581
|
-
# | `FLOAT64` | `Float` | |
|
582
|
-
# | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
|
583
|
-
# | `STRING` | `String` | |
|
584
|
-
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
585
|
-
# | `DATE` | `Date` | |
|
586
|
-
# | `TIMESTAMP` | `Time` | |
|
587
|
-
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
588
|
-
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
589
|
-
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
590
|
-
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
591
|
-
#
|
592
|
-
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
|
593
|
-
# for an overview of each BigQuery data type, including allowed values.
|
594
|
-
#
|
595
666
|
# The geographic location for the job ("US", "EU", etc.) can be set via
|
596
667
|
# {QueryJob::Updater#location=} in a block passed to this method.
|
597
668
|
#
|
@@ -601,13 +672,60 @@ module Google
|
|
601
672
|
# syntax](https://cloud.google.com/bigquery/query-reference), of the
|
602
673
|
# query to execute. Example: "SELECT count(f1) FROM
|
603
674
|
# [myProjectId:myDatasetId.myTableId]".
|
604
|
-
# @param [Array, Hash] params Standard SQL only. Used to pass query
|
605
|
-
#
|
606
|
-
#
|
607
|
-
#
|
608
|
-
#
|
609
|
-
#
|
610
|
-
#
|
675
|
+
# @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
|
676
|
+
# either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
|
677
|
+
# query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
|
678
|
+
# use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
|
679
|
+
# true.
|
680
|
+
#
|
681
|
+
# BigQuery types are converted from Ruby types as follows:
|
682
|
+
#
|
683
|
+
# | BigQuery | Ruby | Notes |
|
684
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
685
|
+
# | `BOOL` | `true`/`false` | |
|
686
|
+
# | `INT64` | `Integer` | |
|
687
|
+
# | `FLOAT64` | `Float` | |
|
688
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
689
|
+
# | `BIGNUMERIC` | `BigDecimal` | NOT AUTOMATIC: Must be mapped using `types`, below.|
|
690
|
+
# | `STRING` | `String` | |
|
691
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
692
|
+
# | `DATE` | `Date` | |
|
693
|
+
# | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
|
694
|
+
# | `TIMESTAMP` | `Time` | |
|
695
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
696
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
697
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
698
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
699
|
+
#
|
700
|
+
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
|
701
|
+
# of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
|
702
|
+
# GIS data](https://cloud.google.com/bigquery/docs/gis-data).
|
703
|
+
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
704
|
+
# possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
705
|
+
# specify the SQL type for these values.
|
706
|
+
#
|
707
|
+
# Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
|
708
|
+
# positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
|
709
|
+
# should be BigQuery type codes from the following list:
|
710
|
+
#
|
711
|
+
# * `:BOOL`
|
712
|
+
# * `:INT64`
|
713
|
+
# * `:FLOAT64`
|
714
|
+
# * `:NUMERIC`
|
715
|
+
# * `:BIGNUMERIC`
|
716
|
+
# * `:STRING`
|
717
|
+
# * `:DATETIME`
|
718
|
+
# * `:DATE`
|
719
|
+
# * `:GEOGRAPHY`
|
720
|
+
# * `:TIMESTAMP`
|
721
|
+
# * `:TIME`
|
722
|
+
# * `:BYTES`
|
723
|
+
# * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
|
724
|
+
# are specified as `[:INT64]`.
|
725
|
+
# * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
|
726
|
+
# match the `params` hash, and the values are the types value that matches the data.
|
727
|
+
#
|
728
|
+
# Types are optional.
|
611
729
|
# @param [Hash<String|Symbol, External::DataSource>] external A Hash
|
612
730
|
# that represents the mapping of the external tables to the table
|
613
731
|
# names used in the SQL query. The hash keys are the table names, and
|
@@ -649,6 +767,8 @@ module Google
|
|
649
767
|
# When set to false, the values of `large_results` and `flatten` are
|
650
768
|
# ignored; the query will be run as if `large_results` is true and
|
651
769
|
# `flatten` is false. Optional. The default value is false.
|
770
|
+
# @param [String] session_id The ID of an existing session. See the
|
771
|
+
# `create_session` param in {#query_job} and {Job#session_id}.
|
652
772
|
# @yield [job] a job configuration object
|
653
773
|
# @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
|
654
774
|
# configuration object for setting additional options for the query.
|
@@ -663,9 +783,12 @@ module Google
|
|
663
783
|
# sql = "SELECT name FROM `my_project.my_dataset.my_table`"
|
664
784
|
# data = bigquery.query sql
|
665
785
|
#
|
786
|
+
# # Iterate over the first page of results
|
666
787
|
# data.each do |row|
|
667
788
|
# puts row[:name]
|
668
789
|
# end
|
790
|
+
# # Retrieve the next page of results
|
791
|
+
# data = data.next if data.next?
|
669
792
|
#
|
670
793
|
# @example Query using legacy SQL:
|
671
794
|
# require "google/cloud/bigquery"
|
@@ -675,9 +798,12 @@ module Google
|
|
675
798
|
# sql = "SELECT name FROM [my_project:my_dataset.my_table]"
|
676
799
|
# data = bigquery.query sql, legacy_sql: true
|
677
800
|
#
|
801
|
+
# # Iterate over the first page of results
|
678
802
|
# data.each do |row|
|
679
803
|
# puts row[:name]
|
680
804
|
# end
|
805
|
+
# # Retrieve the next page of results
|
806
|
+
# data = data.next if data.next?
|
681
807
|
#
|
682
808
|
# @example Retrieve all rows: (See {Data#all})
|
683
809
|
# require "google/cloud/bigquery"
|
@@ -695,28 +821,46 @@ module Google
|
|
695
821
|
#
|
696
822
|
# bigquery = Google::Cloud::Bigquery.new
|
697
823
|
#
|
698
|
-
# data = bigquery.query "SELECT name "
|
699
|
-
# "FROM `my_dataset.my_table`" \
|
700
|
-
# "WHERE id = ?",
|
824
|
+
# data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE id = ?",
|
701
825
|
# params: [1]
|
702
826
|
#
|
827
|
+
# # Iterate over the first page of results
|
703
828
|
# data.each do |row|
|
704
829
|
# puts row[:name]
|
705
830
|
# end
|
831
|
+
# # Retrieve the next page of results
|
832
|
+
# data = data.next if data.next?
|
706
833
|
#
|
707
834
|
# @example Query using named query parameters:
|
708
835
|
# require "google/cloud/bigquery"
|
709
836
|
#
|
710
837
|
# bigquery = Google::Cloud::Bigquery.new
|
711
838
|
#
|
712
|
-
# data = bigquery.query "SELECT name "
|
713
|
-
# "FROM `my_dataset.my_table`" \
|
714
|
-
# "WHERE id = @id",
|
839
|
+
# data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE id = @id",
|
715
840
|
# params: { id: 1 }
|
716
841
|
#
|
842
|
+
# # Iterate over the first page of results
|
717
843
|
# data.each do |row|
|
718
844
|
# puts row[:name]
|
719
845
|
# end
|
846
|
+
# # Retrieve the next page of results
|
847
|
+
# data = data.next if data.next?
|
848
|
+
#
|
849
|
+
# @example Query using named query parameters with types:
|
850
|
+
# require "google/cloud/bigquery"
|
851
|
+
#
|
852
|
+
# bigquery = Google::Cloud::Bigquery.new
|
853
|
+
#
|
854
|
+
# data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE id IN UNNEST(@ids)",
|
855
|
+
# params: { ids: [] },
|
856
|
+
# types: { ids: [:INT64] }
|
857
|
+
#
|
858
|
+
# # Iterate over the first page of results
|
859
|
+
# data.each do |row|
|
860
|
+
# puts row[:name]
|
861
|
+
# end
|
862
|
+
# # Retrieve the next page of results
|
863
|
+
# data = data.next if data.next?
|
720
864
|
#
|
721
865
|
# @example Execute a DDL statement:
|
722
866
|
# require "google/cloud/bigquery"
|
@@ -725,16 +869,14 @@ module Google
|
|
725
869
|
#
|
726
870
|
# data = bigquery.query "CREATE TABLE `my_dataset.my_table` (x INT64)"
|
727
871
|
#
|
728
|
-
# table_ref = data.ddl_target_table
|
872
|
+
# table_ref = data.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
|
729
873
|
#
|
730
874
|
# @example Execute a DML statement:
|
731
875
|
# require "google/cloud/bigquery"
|
732
876
|
#
|
733
877
|
# bigquery = Google::Cloud::Bigquery.new
|
734
878
|
#
|
735
|
-
# data = bigquery.query "UPDATE `my_dataset.my_table` "
|
736
|
-
# "SET x = x + 1 " \
|
737
|
-
# "WHERE x IS NOT NULL"
|
879
|
+
# data = bigquery.query "UPDATE `my_dataset.my_table` SET x = x + 1 WHERE x IS NOT NULL"
|
738
880
|
#
|
739
881
|
# puts data.num_dml_affected_rows
|
740
882
|
#
|
@@ -755,17 +897,36 @@ module Google
|
|
755
897
|
# query.table = dataset.table "my_table", skip_lookup: true
|
756
898
|
# end
|
757
899
|
#
|
900
|
+
# # Iterate over the first page of results
|
758
901
|
# data.each do |row|
|
759
902
|
# puts row[:name]
|
760
903
|
# end
|
761
|
-
#
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
904
|
+
# # Retrieve the next page of results
|
905
|
+
# data = data.next if data.next?
|
906
|
+
#
|
907
|
+
def query query,
|
908
|
+
params: nil,
|
909
|
+
types: nil,
|
910
|
+
external: nil,
|
911
|
+
max: nil,
|
912
|
+
cache: true,
|
913
|
+
dataset: nil,
|
914
|
+
project: nil,
|
915
|
+
standard_sql: nil,
|
916
|
+
legacy_sql: nil,
|
917
|
+
session_id: nil,
|
918
|
+
&block
|
919
|
+
job = query_job query,
|
920
|
+
params: params,
|
921
|
+
types: types,
|
922
|
+
external: external,
|
923
|
+
cache: cache,
|
924
|
+
dataset: dataset,
|
925
|
+
project: project,
|
926
|
+
standard_sql: standard_sql,
|
927
|
+
legacy_sql: legacy_sql,
|
928
|
+
session_id: session_id,
|
929
|
+
&block
|
769
930
|
job.wait_until_done!
|
770
931
|
|
771
932
|
if job.failed?
|
@@ -800,7 +961,7 @@ module Google
|
|
800
961
|
# The following values are supported:
|
801
962
|
#
|
802
963
|
# * `csv` - CSV
|
803
|
-
# * `json` - [Newline-delimited JSON](
|
964
|
+
# * `json` - [Newline-delimited JSON](https://jsonlines.org/)
|
804
965
|
# * `avro` - [Avro](http://avro.apache.org/)
|
805
966
|
# * `sheets` - Google Sheets
|
806
967
|
# * `datastore_backup` - Cloud Datastore backup
|
@@ -822,9 +983,12 @@ module Google
|
|
822
983
|
# data = bigquery.query "SELECT * FROM my_ext_table",
|
823
984
|
# external: { my_ext_table: csv_table }
|
824
985
|
#
|
986
|
+
# # Iterate over the first page of results
|
825
987
|
# data.each do |row|
|
826
988
|
# puts row[:name]
|
827
989
|
# end
|
990
|
+
# # Retrieve the next page of results
|
991
|
+
# data = data.next if data.next?
|
828
992
|
#
|
829
993
|
def external url, format: nil
|
830
994
|
ext = External.from_urls url, format
|
@@ -861,9 +1025,7 @@ module Google
|
|
861
1025
|
#
|
862
1026
|
def dataset dataset_id, skip_lookup: nil
|
863
1027
|
ensure_service!
|
864
|
-
if skip_lookup
|
865
|
-
return Dataset.new_reference project, dataset_id, service
|
866
|
-
end
|
1028
|
+
return Dataset.new_reference project, dataset_id, service if skip_lookup
|
867
1029
|
gapi = service.get_dataset dataset_id
|
868
1030
|
Dataset.from_gapi gapi, service
|
869
1031
|
rescue Google::Cloud::NotFoundError
|
@@ -874,14 +1036,13 @@ module Google
|
|
874
1036
|
# Creates a new dataset.
|
875
1037
|
#
|
876
1038
|
# @param [String] dataset_id A unique ID for this dataset, without the
|
877
|
-
# project name. The ID must contain only letters (
|
878
|
-
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
1039
|
+
# project name. The ID must contain only letters (`[A-Za-z]`), numbers
|
1040
|
+
# (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
|
879
1041
|
# @param [String] name A descriptive name for the dataset.
|
880
1042
|
# @param [String] description A user-friendly description of the
|
881
1043
|
# dataset.
|
882
1044
|
# @param [Integer] expiration The default lifetime of all tables in the
|
883
|
-
# dataset, in milliseconds. The minimum value is
|
884
|
-
# (one hour).
|
1045
|
+
# dataset, in milliseconds. The minimum value is `3_600_000` (one hour).
|
885
1046
|
# @param [String] location The geographic location where the dataset
|
886
1047
|
# should reside. Possible values include `EU` and `US`. The default
|
887
1048
|
# value is `US`.
|
@@ -990,8 +1151,7 @@ module Google
|
|
990
1151
|
#
|
991
1152
|
def datasets all: nil, filter: nil, token: nil, max: nil
|
992
1153
|
ensure_service!
|
993
|
-
|
994
|
-
gapi = service.list_datasets options
|
1154
|
+
gapi = service.list_datasets all: all, filter: filter, token: token, max: max
|
995
1155
|
Dataset::List.from_gapi gapi, service, all, filter, max
|
996
1156
|
end
|
997
1157
|
|
@@ -1029,18 +1189,22 @@ module Google
|
|
1029
1189
|
# part of the larger set of results to view. Optional.
|
1030
1190
|
# @param [Integer] max Maximum number of jobs to return. Optional.
|
1031
1191
|
# @param [String] filter A filter for job state. Optional.
|
1032
|
-
# @param [Time] min_created_at Min value for {Job#created_at}. When
|
1033
|
-
# provided, only jobs created after or at this time are returned.
|
1034
|
-
# Optional.
|
1035
|
-
# @param [Time] max_created_at Max value for {Job#created_at}. When
|
1036
|
-
# provided, only jobs created before or at this time are returned.
|
1037
|
-
# Optional.
|
1038
1192
|
#
|
1039
1193
|
# Acceptable values are:
|
1040
1194
|
#
|
1041
1195
|
# * `done` - Finished jobs
|
1042
1196
|
# * `pending` - Pending jobs
|
1043
1197
|
# * `running` - Running jobs
|
1198
|
+
# @param [Time] min_created_at Min value for {Job#created_at}. When
|
1199
|
+
# provided, only jobs created after or at this time are returned.
|
1200
|
+
# Optional.
|
1201
|
+
# @param [Time] max_created_at Max value for {Job#created_at}. When
|
1202
|
+
# provided, only jobs created before or at this time are returned.
|
1203
|
+
# Optional.
|
1204
|
+
# @param [Google::Cloud::Bigquery::Job, String] parent_job A job
|
1205
|
+
# object or a job ID. If set, retrieve only child jobs of the
|
1206
|
+
# specified parent. Optional. See {Job#job_id}, {Job#num_child_jobs},
|
1207
|
+
# and {Job#parent_job_id}.
|
1044
1208
|
#
|
1045
1209
|
# @return [Array<Google::Cloud::Bigquery::Job>] (See
|
1046
1210
|
# {Google::Cloud::Bigquery::Job::List})
|
@@ -1089,15 +1253,63 @@ module Google
|
|
1089
1253
|
# # process job
|
1090
1254
|
# end
|
1091
1255
|
#
|
1092
|
-
|
1093
|
-
|
1256
|
+
# @example Retrieve child jobs by setting `parent_job`:
|
1257
|
+
# require "google/cloud/bigquery"
|
1258
|
+
#
|
1259
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1260
|
+
#
|
1261
|
+
# multi_statement_sql = <<~SQL
|
1262
|
+
# -- Declare a variable to hold names as an array.
|
1263
|
+
# DECLARE top_names ARRAY<STRING>;
|
1264
|
+
# -- Build an array of the top 100 names from the year 2017.
|
1265
|
+
# SET top_names = (
|
1266
|
+
# SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
|
1267
|
+
# FROM `bigquery-public-data.usa_names.usa_1910_current`
|
1268
|
+
# WHERE year = 2017
|
1269
|
+
# );
|
1270
|
+
# -- Which names appear as words in Shakespeare's plays?
|
1271
|
+
# SELECT
|
1272
|
+
# name AS shakespeare_name
|
1273
|
+
# FROM UNNEST(top_names) AS name
|
1274
|
+
# WHERE name IN (
|
1275
|
+
# SELECT word
|
1276
|
+
# FROM `bigquery-public-data.samples.shakespeare`
|
1277
|
+
# );
|
1278
|
+
# SQL
|
1279
|
+
#
|
1280
|
+
# job = bigquery.query_job multi_statement_sql
|
1281
|
+
#
|
1282
|
+
# job.wait_until_done!
|
1283
|
+
#
|
1284
|
+
# child_jobs = bigquery.jobs parent_job: job
|
1285
|
+
#
|
1286
|
+
# child_jobs.each do |child_job|
|
1287
|
+
# script_statistics = child_job.script_statistics
|
1288
|
+
# puts script_statistics.evaluation_kind
|
1289
|
+
# script_statistics.stack_frames.each do |stack_frame|
|
1290
|
+
# puts stack_frame.text
|
1291
|
+
# end
|
1292
|
+
# end
|
1293
|
+
#
|
1294
|
+
def jobs all: nil,
|
1295
|
+
token: nil,
|
1296
|
+
max: nil,
|
1297
|
+
filter: nil,
|
1298
|
+
min_created_at: nil,
|
1299
|
+
max_created_at: nil,
|
1300
|
+
parent_job: nil
|
1094
1301
|
ensure_service!
|
1302
|
+
parent_job = parent_job.job_id if parent_job.is_a? Job
|
1095
1303
|
options = {
|
1096
|
-
|
1097
|
-
|
1304
|
+
parent_job_id: parent_job,
|
1305
|
+
all: all,
|
1306
|
+
token: token,
|
1307
|
+
max: max, filter: filter,
|
1308
|
+
min_created_at: min_created_at,
|
1309
|
+
max_created_at: max_created_at
|
1098
1310
|
}
|
1099
|
-
gapi = service.list_jobs
|
1100
|
-
Job::List.from_gapi gapi, service, options
|
1311
|
+
gapi = service.list_jobs(**options)
|
1312
|
+
Job::List.from_gapi gapi, service, **options
|
1101
1313
|
end
|
1102
1314
|
|
1103
1315
|
##
|
@@ -1143,8 +1355,7 @@ module Google
|
|
1143
1355
|
#
|
1144
1356
|
def projects token: nil, max: nil
|
1145
1357
|
ensure_service!
|
1146
|
-
|
1147
|
-
gapi = service.list_projects options
|
1358
|
+
gapi = service.list_projects token: token, max: max
|
1148
1359
|
Project::List.from_gapi gapi, service, max
|
1149
1360
|
end
|
1150
1361
|
|
@@ -1165,14 +1376,15 @@ module Google
|
|
1165
1376
|
# bigquery = Google::Cloud::Bigquery.new
|
1166
1377
|
#
|
1167
1378
|
# fourpm = bigquery.time 16, 0, 0
|
1168
|
-
# data = bigquery.query "SELECT name "
|
1169
|
-
# "FROM `my_dataset.my_table`" \
|
1170
|
-
# "WHERE time_of_date = @time",
|
1379
|
+
# data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE time_of_date = @time",
|
1171
1380
|
# params: { time: fourpm }
|
1172
1381
|
#
|
1382
|
+
# # Iterate over the first page of results
|
1173
1383
|
# data.each do |row|
|
1174
1384
|
# puts row[:name]
|
1175
1385
|
# end
|
1386
|
+
# # Retrieve the next page of results
|
1387
|
+
# data = data.next if data.next?
|
1176
1388
|
#
|
1177
1389
|
# @example Create Time with fractional seconds:
|
1178
1390
|
# require "google/cloud/bigquery"
|
@@ -1180,14 +1392,15 @@ module Google
|
|
1180
1392
|
# bigquery = Google::Cloud::Bigquery.new
|
1181
1393
|
#
|
1182
1394
|
# precise_time = bigquery.time 16, 35, 15.376541
|
1183
|
-
# data = bigquery.query "SELECT name "
|
1184
|
-
# "FROM `my_dataset.my_table`" \
|
1185
|
-
# "WHERE time_of_date >= @time",
|
1395
|
+
# data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE time_of_date >= @time",
|
1186
1396
|
# params: { time: precise_time }
|
1187
1397
|
#
|
1398
|
+
# # Iterate over the first page of results
|
1188
1399
|
# data.each do |row|
|
1189
1400
|
# puts row[:name]
|
1190
1401
|
# end
|
1402
|
+
# # Retrieve the next page of results
|
1403
|
+
# data = data.next if data.next?
|
1191
1404
|
#
|
1192
1405
|
def time hour, minute, second
|
1193
1406
|
Bigquery::Time.new "#{hour}:#{minute}:#{second}"
|
@@ -1304,49 +1517,61 @@ module Google
|
|
1304
1517
|
end
|
1305
1518
|
|
1306
1519
|
##
|
1307
|
-
# Extracts the data from
|
1308
|
-
#
|
1309
|
-
#
|
1310
|
-
# calling {Job#reload!} and {Job#done?} to detect when the job
|
1311
|
-
# or simply block until the job is done by calling
|
1520
|
+
# Extracts the data from a table or exports a model to Google Cloud Storage
|
1521
|
+
# asynchronously, immediately returning an {ExtractJob} that can be used to
|
1522
|
+
# track the progress of the export job. The caller may poll the service by
|
1523
|
+
# repeatedly calling {Job#reload!} and {Job#done?} to detect when the job
|
1524
|
+
# is done, or simply block until the job is done by calling
|
1312
1525
|
# #{Job#wait_until_done!}. See {#extract} for the synchronous version.
|
1313
|
-
#
|
1314
|
-
#
|
1526
|
+
#
|
1527
|
+
# Use this method instead of {Table#extract_job} or {Model#extract_job} to
|
1528
|
+
# extract data from source tables or models in other projects.
|
1315
1529
|
#
|
1316
1530
|
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1317
1531
|
# {ExtractJob::Updater#location=} in a block passed to this method.
|
1318
1532
|
#
|
1319
|
-
# @see https://cloud.google.com/bigquery/exporting-data
|
1320
|
-
# Exporting
|
1533
|
+
# @see https://cloud.google.com/bigquery/docs/exporting-data
|
1534
|
+
# Exporting table data
|
1535
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
1536
|
+
# Exporting models
|
1321
1537
|
#
|
1322
|
-
# @param [
|
1323
|
-
#
|
1324
|
-
# [Standard SQL Query
|
1538
|
+
# @param [Table, Model, String] source The source table or model for
|
1539
|
+
# the extract operation. This can be a table or model object; or a
|
1540
|
+
# table ID string as specified by the [Standard SQL Query
|
1325
1541
|
# Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
|
1326
1542
|
# (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
|
1327
1543
|
# Reference](https://cloud.google.com/bigquery/query-reference#from)
|
1328
1544
|
# (`project-name:dataset_id.table_id`).
|
1329
1545
|
# @param [Google::Cloud::Storage::File, String, Array<String>]
|
1330
1546
|
# extract_url The Google Storage file or file URI pattern(s) to which
|
1331
|
-
# BigQuery should extract
|
1332
|
-
#
|
1333
|
-
#
|
1547
|
+
# BigQuery should extract. For a model export this value should be a
|
1548
|
+
# string ending in an object name prefix, since multiple objects will
|
1549
|
+
# be exported.
|
1550
|
+
# @param [String] format The exported file format. The default value for
|
1551
|
+
# tables is `csv`. Tables with nested or repeated fields cannot be
|
1552
|
+
# exported as CSV. The default value for models is `ml_tf_saved_model`.
|
1334
1553
|
#
|
1335
|
-
#
|
1554
|
+
# Supported values for tables:
|
1336
1555
|
#
|
1337
1556
|
# * `csv` - CSV
|
1338
|
-
# * `json` - [Newline-delimited JSON](
|
1557
|
+
# * `json` - [Newline-delimited JSON](https://jsonlines.org/)
|
1339
1558
|
# * `avro` - [Avro](http://avro.apache.org/)
|
1559
|
+
#
|
1560
|
+
# Supported values for models:
|
1561
|
+
#
|
1562
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
1563
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
1340
1564
|
# @param [String] compression The compression type to use for exported
|
1341
1565
|
# files. Possible values include `GZIP` and `NONE`. The default value
|
1342
|
-
# is `NONE`.
|
1566
|
+
# is `NONE`. Not applicable when extracting models.
|
1343
1567
|
# @param [String] delimiter Delimiter to use between fields in the
|
1344
|
-
# exported data. Default is
|
1345
|
-
#
|
1346
|
-
#
|
1568
|
+
# exported table data. Default is `,`. Not applicable when extracting
|
1569
|
+
# models.
|
1570
|
+
# @param [Boolean] header Whether to print out a header row in table
|
1571
|
+
# exports. Default is `true`. Not applicable when extracting models.
|
1347
1572
|
# @param [String] job_id A user-defined ID for the extract job. The ID
|
1348
|
-
# must contain only letters (
|
1349
|
-
# (_), or dashes (
|
1573
|
+
# must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
|
1574
|
+
# (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
|
1350
1575
|
# `job_id` is provided, then `prefix` will not be used.
|
1351
1576
|
#
|
1352
1577
|
# See [Generating a job
|
@@ -1355,48 +1580,65 @@ module Google
|
|
1355
1580
|
# prepended to a generated value to produce a unique job ID. For
|
1356
1581
|
# example, the prefix `daily_import_job_` can be given to generate a
|
1357
1582
|
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
1358
|
-
# prefix must contain only letters (
|
1359
|
-
# underscores (_), or dashes (
|
1583
|
+
# prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
|
1584
|
+
# underscores (`_`), or dashes (`-`). The maximum length of the entire ID
|
1360
1585
|
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
1361
1586
|
# be used.
|
1362
1587
|
# @param [Hash] labels A hash of user-provided labels associated with
|
1363
|
-
# the job. You can use these to organize and group your jobs.
|
1364
|
-
#
|
1365
|
-
#
|
1366
|
-
#
|
1367
|
-
#
|
1368
|
-
#
|
1369
|
-
#
|
1588
|
+
# the job. You can use these to organize and group your jobs.
|
1589
|
+
#
|
1590
|
+
# The labels applied to a resource must meet the following requirements:
|
1591
|
+
#
|
1592
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
1593
|
+
# * Each label must be a key-value pair.
|
1594
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
1595
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
1596
|
+
# a maximum length of 63 characters.
|
1597
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
1598
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
1599
|
+
# international characters are allowed.
|
1600
|
+
# * The key portion of a label must be unique. However, you can use the
|
1601
|
+
# same key with multiple resources.
|
1602
|
+
# * Keys must start with a lowercase letter or international character.
|
1370
1603
|
# @yield [job] a job configuration object
|
1371
1604
|
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
1372
1605
|
# configuration object for setting additional options.
|
1373
1606
|
#
|
1374
1607
|
# @return [Google::Cloud::Bigquery::ExtractJob]
|
1375
1608
|
#
|
1376
|
-
# @example
|
1609
|
+
# @example Export table data
|
1377
1610
|
# require "google/cloud/bigquery"
|
1378
1611
|
#
|
1379
1612
|
# bigquery = Google::Cloud::Bigquery.new
|
1380
1613
|
#
|
1381
1614
|
# table_id = "bigquery-public-data.samples.shakespeare"
|
1382
|
-
# extract_job = bigquery.extract_job table_id,
|
1383
|
-
# "gs://my-bucket/shakespeare.csv"
|
1615
|
+
# extract_job = bigquery.extract_job table_id, "gs://my-bucket/shakespeare.csv"
|
1384
1616
|
# extract_job.wait_until_done!
|
1385
1617
|
# extract_job.done? #=> true
|
1386
1618
|
#
|
1619
|
+
# @example Export a model
|
1620
|
+
# require "google/cloud/bigquery"
|
1621
|
+
#
|
1622
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1623
|
+
# dataset = bigquery.dataset "my_dataset"
|
1624
|
+
# model = dataset.model "my_model"
|
1625
|
+
#
|
1626
|
+
# extract_job = bigquery.extract model, "gs://my-bucket/#{model.model_id}"
|
1627
|
+
#
|
1387
1628
|
# @!group Data
|
1388
1629
|
#
|
1389
|
-
def extract_job
|
1390
|
-
|
1391
|
-
labels: nil
|
1630
|
+
def extract_job source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, job_id: nil,
|
1631
|
+
prefix: nil, labels: nil
|
1392
1632
|
ensure_service!
|
1393
|
-
options = { format: format, compression: compression,
|
1394
|
-
delimiter: delimiter, header: header, job_id: job_id,
|
1633
|
+
options = { format: format, compression: compression, delimiter: delimiter, header: header, job_id: job_id,
|
1395
1634
|
prefix: prefix, labels: labels }
|
1635
|
+
source_ref = if source.respond_to? :model_ref
|
1636
|
+
source.model_ref
|
1637
|
+
else
|
1638
|
+
Service.get_table_ref source, default_ref: project_ref
|
1639
|
+
end
|
1396
1640
|
|
1397
|
-
|
1398
|
-
updater = ExtractJob::Updater.from_options service, table_ref,
|
1399
|
-
extract_url, options
|
1641
|
+
updater = ExtractJob::Updater.from_options service, source_ref, extract_url, options
|
1400
1642
|
|
1401
1643
|
yield updater if block_given?
|
1402
1644
|
|
@@ -1406,51 +1648,63 @@ module Google
|
|
1406
1648
|
end
|
1407
1649
|
|
1408
1650
|
##
|
1409
|
-
# Extracts the data from
|
1410
|
-
#
|
1651
|
+
# Extracts the data from a table or exports a model to Google Cloud Storage
|
1652
|
+
# using a synchronous method that blocks for a response. Timeouts
|
1411
1653
|
# and transient errors are generally handled as needed to complete the
|
1412
|
-
# job. See {#extract_job} for the asynchronous version.
|
1413
|
-
#
|
1414
|
-
#
|
1654
|
+
# job. See {#extract_job} for the asynchronous version.
|
1655
|
+
#
|
1656
|
+
# Use this method instead of {Table#extract} or {Model#extract} to
|
1657
|
+
# extract data from source tables or models in other projects.
|
1415
1658
|
#
|
1416
1659
|
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1417
1660
|
# {ExtractJob::Updater#location=} in a block passed to this method.
|
1418
1661
|
#
|
1419
|
-
# @see https://cloud.google.com/bigquery/exporting-data
|
1420
|
-
# Exporting
|
1662
|
+
# @see https://cloud.google.com/bigquery/docs/exporting-data
|
1663
|
+
# Exporting table data
|
1664
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
1665
|
+
# Exporting models
|
1421
1666
|
#
|
1422
|
-
# @param [
|
1423
|
-
#
|
1424
|
-
# [Standard SQL Query
|
1667
|
+
# @param [Table, Model, String] source The source table or model for
|
1668
|
+
# the extract operation. This can be a table or model object; or a
|
1669
|
+
# table ID string as specified by the [Standard SQL Query
|
1425
1670
|
# Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
|
1426
1671
|
# (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
|
1427
1672
|
# Reference](https://cloud.google.com/bigquery/query-reference#from)
|
1428
1673
|
# (`project-name:dataset_id.table_id`).
|
1429
1674
|
# @param [Google::Cloud::Storage::File, String, Array<String>]
|
1430
1675
|
# extract_url The Google Storage file or file URI pattern(s) to which
|
1431
|
-
# BigQuery should extract
|
1432
|
-
#
|
1433
|
-
#
|
1676
|
+
# BigQuery should extract. For a model export this value should be a
|
1677
|
+
# string ending in an object name prefix, since multiple objects will
|
1678
|
+
# be exported.
|
1679
|
+
# @param [String] format The exported file format. The default value for
|
1680
|
+
# tables is `csv`. Tables with nested or repeated fields cannot be
|
1681
|
+
# exported as CSV. The default value for models is `ml_tf_saved_model`.
|
1434
1682
|
#
|
1435
|
-
#
|
1683
|
+
# Supported values for tables:
|
1436
1684
|
#
|
1437
1685
|
# * `csv` - CSV
|
1438
|
-
# * `json` - [Newline-delimited JSON](
|
1686
|
+
# * `json` - [Newline-delimited JSON](https://jsonlines.org/)
|
1439
1687
|
# * `avro` - [Avro](http://avro.apache.org/)
|
1688
|
+
#
|
1689
|
+
# Supported values for models:
|
1690
|
+
#
|
1691
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
1692
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
1440
1693
|
# @param [String] compression The compression type to use for exported
|
1441
1694
|
# files. Possible values include `GZIP` and `NONE`. The default value
|
1442
|
-
# is `NONE`.
|
1695
|
+
# is `NONE`. Not applicable when extracting models.
|
1443
1696
|
# @param [String] delimiter Delimiter to use between fields in the
|
1444
|
-
# exported data. Default is
|
1445
|
-
#
|
1446
|
-
#
|
1697
|
+
# exported table data. Default is `,`. Not applicable when extracting
|
1698
|
+
# models.
|
1699
|
+
# @param [Boolean] header Whether to print out a header row in table
|
1700
|
+
# exports. Default is `true`. Not applicable when extracting models.
|
1447
1701
|
# @yield [job] a job configuration object
|
1448
1702
|
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
1449
1703
|
# configuration object for setting additional options.
|
1450
1704
|
#
|
1451
1705
|
# @return [Boolean] Returns `true` if the extract operation succeeded.
|
1452
1706
|
#
|
1453
|
-
# @example
|
1707
|
+
# @example Export table data
|
1454
1708
|
# require "google/cloud/bigquery"
|
1455
1709
|
#
|
1456
1710
|
# bigquery = Google::Cloud::Bigquery.new
|
@@ -1458,12 +1712,19 @@ module Google
|
|
1458
1712
|
# bigquery.extract "bigquery-public-data.samples.shakespeare",
|
1459
1713
|
# "gs://my-bucket/shakespeare.csv"
|
1460
1714
|
#
|
1715
|
+
# @example Export a model
|
1716
|
+
# require "google/cloud/bigquery"
|
1717
|
+
#
|
1718
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1719
|
+
# dataset = bigquery.dataset "my_dataset"
|
1720
|
+
# model = dataset.model "my_model"
|
1721
|
+
#
|
1722
|
+
# bigquery.extract model, "gs://my-bucket/#{model.model_id}"
|
1723
|
+
#
|
1461
1724
|
# @!group Data
|
1462
1725
|
#
|
1463
|
-
def extract
|
1464
|
-
|
1465
|
-
job = extract_job table,
|
1466
|
-
extract_url,
|
1726
|
+
def extract source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, &block
|
1727
|
+
job = extract_job source, extract_url,
|
1467
1728
|
format: format,
|
1468
1729
|
compression: compression,
|
1469
1730
|
delimiter: delimiter,
|
@@ -1487,9 +1748,7 @@ module Google
|
|
1487
1748
|
|
1488
1749
|
# TODO: remove `Integer` and set normally after migrating to Gax or
|
1489
1750
|
# to google-api-client 0.10 (See google/google-api-ruby-client#439)
|
1490
|
-
if gapi.numeric_id
|
1491
|
-
p.instance_variable_set :@numeric_id, Integer(gapi.numeric_id)
|
1492
|
-
end
|
1751
|
+
p.instance_variable_set :@numeric_id, Integer(gapi.numeric_id) if gapi.numeric_id
|
1493
1752
|
end
|
1494
1753
|
end
|
1495
1754
|
|