google-cloud-bigquery 1.14.0 → 1.42.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/AUTHENTICATION.md +17 -54
- data/CHANGELOG.md +377 -0
- data/CONTRIBUTING.md +328 -116
- data/LOGGING.md +1 -1
- data/OVERVIEW.md +21 -20
- data/TROUBLESHOOTING.md +2 -8
- data/lib/google/cloud/bigquery/argument.rb +197 -0
- data/lib/google/cloud/bigquery/convert.rb +155 -173
- data/lib/google/cloud/bigquery/copy_job.rb +74 -26
- data/lib/google/cloud/bigquery/credentials.rb +5 -12
- data/lib/google/cloud/bigquery/data.rb +109 -18
- data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
- data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
- data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
- data/lib/google/cloud/bigquery/dataset.rb +1044 -287
- data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
- data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
- data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
- data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
- data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
- data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
- data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
- data/lib/google/cloud/bigquery/external.rb +50 -2256
- data/lib/google/cloud/bigquery/extract_job.rb +226 -61
- data/lib/google/cloud/bigquery/insert_response.rb +1 -3
- data/lib/google/cloud/bigquery/job/list.rb +10 -14
- data/lib/google/cloud/bigquery/job.rb +289 -14
- data/lib/google/cloud/bigquery/load_job.rb +810 -136
- data/lib/google/cloud/bigquery/model/list.rb +5 -9
- data/lib/google/cloud/bigquery/model.rb +247 -16
- data/lib/google/cloud/bigquery/policy.rb +432 -0
- data/lib/google/cloud/bigquery/project/list.rb +6 -11
- data/lib/google/cloud/bigquery/project.rb +509 -250
- data/lib/google/cloud/bigquery/query_job.rb +594 -128
- data/lib/google/cloud/bigquery/routine/list.rb +165 -0
- data/lib/google/cloud/bigquery/routine.rb +1227 -0
- data/lib/google/cloud/bigquery/schema/field.rb +413 -63
- data/lib/google/cloud/bigquery/schema.rb +221 -48
- data/lib/google/cloud/bigquery/service.rb +204 -112
- data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
- data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
- data/lib/google/cloud/bigquery/table/list.rb +6 -11
- data/lib/google/cloud/bigquery/table.rb +1470 -377
- data/lib/google/cloud/bigquery/time.rb +6 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery.rb +4 -6
- data/lib/google-cloud-bigquery.rb +14 -13
- metadata +66 -38
@@ -56,7 +56,8 @@ module Google
|
|
56
56
|
# @private The Service object.
|
57
57
|
attr_accessor :service
|
58
58
|
|
59
|
-
attr_reader :name
|
59
|
+
attr_reader :name
|
60
|
+
attr_reader :numeric_id
|
60
61
|
|
61
62
|
##
|
62
63
|
# Creates a new Service instance.
|
@@ -91,8 +92,7 @@ module Google
|
|
91
92
|
# @return [String] The service account email address.
|
92
93
|
#
|
93
94
|
def service_account_email
|
94
|
-
@service_account_email ||=
|
95
|
-
service.project_service_account.email
|
95
|
+
@service_account_email ||= service.project_service_account.email
|
96
96
|
end
|
97
97
|
|
98
98
|
##
|
@@ -139,8 +139,8 @@ module Google
|
|
139
139
|
# * `empty` - An error will be returned if the destination table
|
140
140
|
# already contains data.
|
141
141
|
# @param [String] job_id A user-defined ID for the copy job. The ID
|
142
|
-
# must contain only letters (
|
143
|
-
# (_), or dashes (
|
142
|
+
# must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
|
143
|
+
# (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
|
144
144
|
# `job_id` is provided, then `prefix` will not be used.
|
145
145
|
#
|
146
146
|
# See [Generating a job
|
@@ -149,18 +149,26 @@ module Google
|
|
149
149
|
# prepended to a generated value to produce a unique job ID. For
|
150
150
|
# example, the prefix `daily_import_job_` can be given to generate a
|
151
151
|
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
152
|
-
# prefix must contain only letters (
|
153
|
-
# underscores (_), or dashes (
|
152
|
+
# prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
|
153
|
+
# underscores (`_`), or dashes (`-`). The maximum length of the entire ID
|
154
154
|
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
155
155
|
# be used.
|
156
156
|
# @param [Hash] labels A hash of user-provided labels associated with
|
157
|
-
# the job. You can use these to organize and group your jobs.
|
158
|
-
#
|
159
|
-
#
|
160
|
-
#
|
161
|
-
#
|
162
|
-
#
|
163
|
-
#
|
157
|
+
# the job. You can use these to organize and group your jobs.
|
158
|
+
#
|
159
|
+
# The labels applied to a resource must meet the following requirements:
|
160
|
+
#
|
161
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
162
|
+
# * Each label must be a key-value pair.
|
163
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
164
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
165
|
+
# a maximum length of 63 characters.
|
166
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
167
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
168
|
+
# international characters are allowed.
|
169
|
+
# * The key portion of a label must be unique. However, you can use the
|
170
|
+
# same key with multiple resources.
|
171
|
+
# * Keys must start with a lowercase letter or international character.
|
164
172
|
# @yield [job] a job configuration object
|
165
173
|
# @yieldparam [Google::Cloud::Bigquery::CopyJob::Updater] job a job
|
166
174
|
# configuration object for setting additional options.
|
@@ -182,11 +190,9 @@ module Google
|
|
182
190
|
#
|
183
191
|
# @!group Data
|
184
192
|
#
|
185
|
-
def copy_job source_table, destination_table, create: nil, write: nil,
|
186
|
-
job_id: nil, prefix: nil, labels: nil
|
193
|
+
def copy_job source_table, destination_table, create: nil, write: nil, job_id: nil, prefix: nil, labels: nil
|
187
194
|
ensure_service!
|
188
|
-
options = { create: create, write: write, labels: labels,
|
189
|
-
job_id: job_id, prefix: prefix }
|
195
|
+
options = { create: create, write: write, labels: labels, job_id: job_id, prefix: prefix }
|
190
196
|
|
191
197
|
updater = CopyJob::Updater.from_options(
|
192
198
|
service,
|
@@ -261,13 +267,8 @@ module Google
|
|
261
267
|
#
|
262
268
|
# @!group Data
|
263
269
|
#
|
264
|
-
def copy source_table, destination_table, create: nil, write: nil,
|
265
|
-
|
266
|
-
job = copy_job source_table,
|
267
|
-
destination_table,
|
268
|
-
create: create,
|
269
|
-
write: write,
|
270
|
-
&block
|
270
|
+
def copy source_table, destination_table, create: nil, write: nil, &block
|
271
|
+
job = copy_job source_table, destination_table, create: create, write: write, &block
|
271
272
|
job.wait_until_done!
|
272
273
|
ensure_job_succeeded! job
|
273
274
|
true
|
@@ -277,27 +278,6 @@ module Google
|
|
277
278
|
# Queries data by creating a [query
|
278
279
|
# job](https://cloud.google.com/bigquery/docs/query-overview#query_jobs).
|
279
280
|
#
|
280
|
-
# When using standard SQL and passing arguments using `params`, Ruby
|
281
|
-
# types are mapped to BigQuery types as follows:
|
282
|
-
#
|
283
|
-
# | BigQuery | Ruby | Notes |
|
284
|
-
# |-------------|----------------|---|
|
285
|
-
# | `BOOL` | `true`/`false` | |
|
286
|
-
# | `INT64` | `Integer` | |
|
287
|
-
# | `FLOAT64` | `Float` | |
|
288
|
-
# | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
|
289
|
-
# | `STRING` | `String` | |
|
290
|
-
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
291
|
-
# | `DATE` | `Date` | |
|
292
|
-
# | `TIMESTAMP` | `Time` | |
|
293
|
-
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
294
|
-
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
295
|
-
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
296
|
-
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
297
|
-
#
|
298
|
-
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
|
299
|
-
# for an overview of each BigQuery data type, including allowed values.
|
300
|
-
#
|
301
281
|
# The geographic location for the job ("US", "EU", etc.) can be set via
|
302
282
|
# {QueryJob::Updater#location=} in a block passed to this method.
|
303
283
|
#
|
@@ -305,13 +285,60 @@ module Google
|
|
305
285
|
# syntax](https://cloud.google.com/bigquery/query-reference), of the
|
306
286
|
# query to execute. Example: "SELECT count(f1) FROM
|
307
287
|
# [myProjectId:myDatasetId.myTableId]".
|
308
|
-
# @param [Array, Hash] params Standard SQL only. Used to pass query
|
309
|
-
#
|
310
|
-
#
|
311
|
-
#
|
312
|
-
#
|
313
|
-
#
|
314
|
-
#
|
288
|
+
# @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
|
289
|
+
# either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
|
290
|
+
# query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
|
291
|
+
# use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
|
292
|
+
# true.
|
293
|
+
#
|
294
|
+
# BigQuery types are converted from Ruby types as follows:
|
295
|
+
#
|
296
|
+
# | BigQuery | Ruby | Notes |
|
297
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
298
|
+
# | `BOOL` | `true`/`false` | |
|
299
|
+
# | `INT64` | `Integer` | |
|
300
|
+
# | `FLOAT64` | `Float` | |
|
301
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
302
|
+
# | `BIGNUMERIC` | `BigDecimal` | NOT AUTOMATIC: Must be mapped using `types`, below.|
|
303
|
+
# | `STRING` | `String` | |
|
304
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
305
|
+
# | `DATE` | `Date` | |
|
306
|
+
# | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
|
307
|
+
# | `TIMESTAMP` | `Time` | |
|
308
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
309
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
310
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
311
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
312
|
+
#
|
313
|
+
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
|
314
|
+
# of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
|
315
|
+
# GIS data](https://cloud.google.com/bigquery/docs/gis-data).
|
316
|
+
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
317
|
+
# possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
318
|
+
# specify the SQL type for these values.
|
319
|
+
#
|
320
|
+
# Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
|
321
|
+
# positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
|
322
|
+
# should be BigQuery type codes from the following list:
|
323
|
+
#
|
324
|
+
# * `:BOOL`
|
325
|
+
# * `:INT64`
|
326
|
+
# * `:FLOAT64`
|
327
|
+
# * `:NUMERIC`
|
328
|
+
# * `:BIGNUMERIC`
|
329
|
+
# * `:STRING`
|
330
|
+
# * `:DATETIME`
|
331
|
+
# * `:DATE`
|
332
|
+
# * `:GEOGRAPHY`
|
333
|
+
# * `:TIMESTAMP`
|
334
|
+
# * `:TIME`
|
335
|
+
# * `:BYTES`
|
336
|
+
# * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
|
337
|
+
# are specified as `[:INT64]`.
|
338
|
+
# * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
|
339
|
+
# match the `params` hash, and the values are the types value that matches the data.
|
340
|
+
#
|
341
|
+
# Types are optional.
|
315
342
|
# @param [Hash<String|Symbol, External::DataSource>] external A Hash
|
316
343
|
# that represents the mapping of the external tables to the table
|
317
344
|
# names used in the SQL query. The hash keys are the table names, and
|
@@ -375,13 +402,19 @@ module Google
|
|
375
402
|
# Flattens all nested and repeated fields in the query results. The
|
376
403
|
# default value is `true`. `large_results` parameter must be `true` if
|
377
404
|
# this is set to `false`.
|
405
|
+
# @param [Integer] maximum_billing_tier Limits the billing tier for this
|
406
|
+
# job. Queries that have resource usage beyond this tier will fail
|
407
|
+
# (without incurring a charge). WARNING: The billed byte amount can be
|
408
|
+
# multiplied by an amount up to this number! Most users should not need
|
409
|
+
# to alter this setting, and we recommend that you avoid introducing new
|
410
|
+
# uses of it. Deprecated.
|
378
411
|
# @param [Integer] maximum_bytes_billed Limits the bytes billed for this
|
379
412
|
# job. Queries that will have bytes billed beyond this limit will fail
|
380
413
|
# (without incurring a charge). Optional. If unspecified, this will be
|
381
414
|
# set to your project default.
|
382
415
|
# @param [String] job_id A user-defined ID for the query job. The ID
|
383
|
-
# must contain only letters (
|
384
|
-
# (_), or dashes (
|
416
|
+
# must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
|
417
|
+
# (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
|
385
418
|
# `job_id` is provided, then `prefix` will not be used.
|
386
419
|
#
|
387
420
|
# See [Generating a job
|
@@ -390,30 +423,51 @@ module Google
|
|
390
423
|
# prepended to a generated value to produce a unique job ID. For
|
391
424
|
# example, the prefix `daily_import_job_` can be given to generate a
|
392
425
|
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
393
|
-
# prefix must contain only letters (
|
394
|
-
# underscores (_), or dashes (
|
426
|
+
# prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
|
427
|
+
# underscores (`_`), or dashes (`-`). The maximum length of the entire ID
|
395
428
|
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
396
429
|
# be used.
|
397
430
|
#
|
398
431
|
# See [Generating a job
|
399
432
|
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
400
433
|
# @param [Hash] labels A hash of user-provided labels associated with
|
401
|
-
# the job. You can use these to organize and group your jobs.
|
402
|
-
#
|
403
|
-
#
|
404
|
-
#
|
405
|
-
#
|
406
|
-
#
|
407
|
-
#
|
434
|
+
# the job. You can use these to organize and group your jobs.
|
435
|
+
#
|
436
|
+
# The labels applied to a resource must meet the following requirements:
|
437
|
+
#
|
438
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
439
|
+
# * Each label must be a key-value pair.
|
440
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
441
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
442
|
+
# a maximum length of 63 characters.
|
443
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
444
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
445
|
+
# international characters are allowed.
|
446
|
+
# * The key portion of a label must be unique. However, you can use the
|
447
|
+
# same key with multiple resources.
|
448
|
+
# * Keys must start with a lowercase letter or international character.
|
408
449
|
# @param [Array<String>, String] udfs User-defined function resources
|
409
|
-
# used in
|
410
|
-
# Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
450
|
+
# used in a legacy SQL query. May be either a code resource to load from
|
451
|
+
# a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
411
452
|
# that contains code for a user-defined function (UDF). Providing an
|
412
453
|
# inline code resource is equivalent to providing a URI for a file
|
413
|
-
# containing the same code.
|
414
|
-
#
|
415
|
-
#
|
416
|
-
#
|
454
|
+
# containing the same code.
|
455
|
+
#
|
456
|
+
# This parameter is used for defining User Defined Function (UDF)
|
457
|
+
# resources only when using legacy SQL. Users of standard SQL should
|
458
|
+
# leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
|
459
|
+
# Routines API to define UDF resources.
|
460
|
+
#
|
461
|
+
# For additional information on migrating, see: [Migrating to
|
462
|
+
# standard SQL - Differences in user-defined JavaScript
|
463
|
+
# functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
|
464
|
+
# @param [Boolean] create_session If true, creates a new session, where the
|
465
|
+
# session ID will be a server generated random id. If false, runs query
|
466
|
+
# with an existing session ID when one is provided in the `session_id`
|
467
|
+
# param, otherwise runs query in non-session mode. See {Job#session_id}.
|
468
|
+
# The default value is false.
|
469
|
+
# @param [String] session_id The ID of an existing session. See also the
|
470
|
+
# `create_session` param and {Job#session_id}.
|
417
471
|
# @yield [job] a job configuration object
|
418
472
|
# @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
|
419
473
|
# configuration object for setting query options.
|
@@ -425,8 +479,7 @@ module Google
|
|
425
479
|
#
|
426
480
|
# bigquery = Google::Cloud::Bigquery.new
|
427
481
|
#
|
428
|
-
# job = bigquery.query_job "SELECT name FROM "
|
429
|
-
# "`my_project.my_dataset.my_table`"
|
482
|
+
# job = bigquery.query_job "SELECT name FROM `my_project.my_dataset.my_table`"
|
430
483
|
#
|
431
484
|
# job.wait_until_done!
|
432
485
|
# if !job.failed?
|
@@ -440,8 +493,7 @@ module Google
|
|
440
493
|
#
|
441
494
|
# bigquery = Google::Cloud::Bigquery.new
|
442
495
|
#
|
443
|
-
# job = bigquery.query_job "SELECT name FROM "
|
444
|
-
# " [my_project:my_dataset.my_table]",
|
496
|
+
# job = bigquery.query_job "SELECT name FROM [my_project:my_dataset.my_table]",
|
445
497
|
# legacy_sql: true
|
446
498
|
#
|
447
499
|
# job.wait_until_done!
|
@@ -456,9 +508,7 @@ module Google
|
|
456
508
|
#
|
457
509
|
# bigquery = Google::Cloud::Bigquery.new
|
458
510
|
#
|
459
|
-
# job = bigquery.query_job "SELECT name FROM "
|
460
|
-
# "`my_dataset.my_table`" \
|
461
|
-
# " WHERE id = ?",
|
511
|
+
# job = bigquery.query_job "SELECT name FROM `my_dataset.my_table` WHERE id = ?",
|
462
512
|
# params: [1]
|
463
513
|
#
|
464
514
|
# job.wait_until_done!
|
@@ -473,9 +523,7 @@ module Google
|
|
473
523
|
#
|
474
524
|
# bigquery = Google::Cloud::Bigquery.new
|
475
525
|
#
|
476
|
-
# job = bigquery.query_job "SELECT name FROM "
|
477
|
-
# "`my_dataset.my_table`" \
|
478
|
-
# " WHERE id = @id",
|
526
|
+
# job = bigquery.query_job "SELECT name FROM `my_dataset.my_table` WHERE id = @id",
|
479
527
|
# params: { id: 1 }
|
480
528
|
#
|
481
529
|
# job.wait_until_done!
|
@@ -485,18 +533,32 @@ module Google
|
|
485
533
|
# end
|
486
534
|
# end
|
487
535
|
#
|
536
|
+
# @example Query using named query parameters with types:
|
537
|
+
# require "google/cloud/bigquery"
|
538
|
+
#
|
539
|
+
# bigquery = Google::Cloud::Bigquery.new
|
540
|
+
#
|
541
|
+
# job = bigquery.query_job "SELECT name FROM `my_dataset.my_table` WHERE id IN UNNEST(@ids)",
|
542
|
+
# params: { ids: [] },
|
543
|
+
# types: { ids: [:INT64] }
|
544
|
+
#
|
545
|
+
# job.wait_until_done!
|
546
|
+
# if !job.failed?
|
547
|
+
# job.data.each do |row|
|
548
|
+
# puts row[:name]
|
549
|
+
# end
|
550
|
+
# end
|
551
|
+
#
|
488
552
|
# @example Execute a DDL statement:
|
489
553
|
# require "google/cloud/bigquery"
|
490
554
|
#
|
491
555
|
# bigquery = Google::Cloud::Bigquery.new
|
492
556
|
#
|
493
|
-
# job = bigquery.query_job "CREATE TABLE "
|
494
|
-
# "`my_dataset.my_table` " \
|
495
|
-
# "(x INT64)"
|
557
|
+
# job = bigquery.query_job "CREATE TABLE`my_dataset.my_table` (x INT64)"
|
496
558
|
#
|
497
559
|
# job.wait_until_done!
|
498
560
|
# if !job.failed?
|
499
|
-
# table_ref = job.ddl_target_table
|
561
|
+
# table_ref = job.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
|
500
562
|
# end
|
501
563
|
#
|
502
564
|
# @example Execute a DML statement:
|
@@ -504,10 +566,7 @@ module Google
|
|
504
566
|
#
|
505
567
|
# bigquery = Google::Cloud::Bigquery.new
|
506
568
|
#
|
507
|
-
# job = bigquery.query_job "UPDATE "
|
508
|
-
# "`my_dataset.my_table` " \
|
509
|
-
# "SET x = x + 1 " \
|
510
|
-
# "WHERE x IS NOT NULL"
|
569
|
+
# job = bigquery.query_job "UPDATE `my_dataset.my_table` SET x = x + 1 WHERE x IS NOT NULL"
|
511
570
|
#
|
512
571
|
# job.wait_until_done!
|
513
572
|
# if !job.failed?
|
@@ -538,23 +597,56 @@ module Google
|
|
538
597
|
# end
|
539
598
|
# end
|
540
599
|
#
|
541
|
-
def query_job query,
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
600
|
+
def query_job query,
|
601
|
+
params: nil,
|
602
|
+
types: nil,
|
603
|
+
external: nil,
|
604
|
+
priority: "INTERACTIVE",
|
605
|
+
cache: true,
|
606
|
+
table: nil,
|
607
|
+
create: nil,
|
608
|
+
write: nil,
|
609
|
+
dryrun: nil,
|
610
|
+
dataset: nil,
|
611
|
+
project: nil,
|
612
|
+
standard_sql: nil,
|
613
|
+
legacy_sql: nil,
|
614
|
+
large_results: nil,
|
615
|
+
flatten: nil,
|
616
|
+
maximum_billing_tier: nil,
|
617
|
+
maximum_bytes_billed: nil,
|
618
|
+
job_id: nil,
|
619
|
+
prefix: nil,
|
620
|
+
labels: nil,
|
621
|
+
udfs: nil,
|
622
|
+
create_session: nil,
|
623
|
+
session_id: nil
|
548
624
|
ensure_service!
|
549
|
-
options = {
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
625
|
+
options = {
|
626
|
+
params: params,
|
627
|
+
types: types,
|
628
|
+
external: external,
|
629
|
+
priority: priority,
|
630
|
+
cache: cache,
|
631
|
+
table: table,
|
632
|
+
create: create,
|
633
|
+
write: write,
|
634
|
+
dryrun: dryrun,
|
635
|
+
dataset: dataset,
|
636
|
+
project: (project || self.project),
|
637
|
+
standard_sql: standard_sql,
|
638
|
+
legacy_sql: legacy_sql,
|
639
|
+
large_results: large_results,
|
640
|
+
flatten: flatten,
|
641
|
+
maximum_billing_tier: maximum_billing_tier,
|
642
|
+
maximum_bytes_billed: maximum_bytes_billed,
|
643
|
+
job_id: job_id,
|
644
|
+
prefix: prefix,
|
645
|
+
labels: labels,
|
646
|
+
udfs: udfs,
|
647
|
+
create_session: create_session,
|
648
|
+
session_id: session_id
|
649
|
+
}
|
558
650
|
|
559
651
|
updater = QueryJob::Updater.from_options service, query, options
|
560
652
|
|
@@ -571,27 +663,6 @@ module Google
|
|
571
663
|
# as needed to complete the query. When used for executing DDL/DML
|
572
664
|
# statements, this method does not return row data.
|
573
665
|
#
|
574
|
-
# When using standard SQL and passing arguments using `params`, Ruby
|
575
|
-
# types are mapped to BigQuery types as follows:
|
576
|
-
#
|
577
|
-
# | BigQuery | Ruby | Notes |
|
578
|
-
# |-------------|----------------|---|
|
579
|
-
# | `BOOL` | `true`/`false` | |
|
580
|
-
# | `INT64` | `Integer` | |
|
581
|
-
# | `FLOAT64` | `Float` | |
|
582
|
-
# | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
|
583
|
-
# | `STRING` | `String` | |
|
584
|
-
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
585
|
-
# | `DATE` | `Date` | |
|
586
|
-
# | `TIMESTAMP` | `Time` | |
|
587
|
-
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
588
|
-
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
589
|
-
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
590
|
-
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
591
|
-
#
|
592
|
-
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
|
593
|
-
# for an overview of each BigQuery data type, including allowed values.
|
594
|
-
#
|
595
666
|
# The geographic location for the job ("US", "EU", etc.) can be set via
|
596
667
|
# {QueryJob::Updater#location=} in a block passed to this method.
|
597
668
|
#
|
@@ -601,13 +672,60 @@ module Google
|
|
601
672
|
# syntax](https://cloud.google.com/bigquery/query-reference), of the
|
602
673
|
# query to execute. Example: "SELECT count(f1) FROM
|
603
674
|
# [myProjectId:myDatasetId.myTableId]".
|
604
|
-
# @param [Array, Hash] params Standard SQL only. Used to pass query
|
605
|
-
#
|
606
|
-
#
|
607
|
-
#
|
608
|
-
#
|
609
|
-
#
|
610
|
-
#
|
675
|
+
# @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
|
676
|
+
# either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
|
677
|
+
# query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
|
678
|
+
# use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
|
679
|
+
# true.
|
680
|
+
#
|
681
|
+
# BigQuery types are converted from Ruby types as follows:
|
682
|
+
#
|
683
|
+
# | BigQuery | Ruby | Notes |
|
684
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
685
|
+
# | `BOOL` | `true`/`false` | |
|
686
|
+
# | `INT64` | `Integer` | |
|
687
|
+
# | `FLOAT64` | `Float` | |
|
688
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
689
|
+
# | `BIGNUMERIC` | `BigDecimal` | NOT AUTOMATIC: Must be mapped using `types`, below.|
|
690
|
+
# | `STRING` | `String` | |
|
691
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
692
|
+
# | `DATE` | `Date` | |
|
693
|
+
# | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
|
694
|
+
# | `TIMESTAMP` | `Time` | |
|
695
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
696
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
697
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
698
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
699
|
+
#
|
700
|
+
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
|
701
|
+
# of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
|
702
|
+
# GIS data](https://cloud.google.com/bigquery/docs/gis-data).
|
703
|
+
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
704
|
+
# possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
705
|
+
# specify the SQL type for these values.
|
706
|
+
#
|
707
|
+
# Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
|
708
|
+
# positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
|
709
|
+
# should be BigQuery type codes from the following list:
|
710
|
+
#
|
711
|
+
# * `:BOOL`
|
712
|
+
# * `:INT64`
|
713
|
+
# * `:FLOAT64`
|
714
|
+
# * `:NUMERIC`
|
715
|
+
# * `:BIGNUMERIC`
|
716
|
+
# * `:STRING`
|
717
|
+
# * `:DATETIME`
|
718
|
+
# * `:DATE`
|
719
|
+
# * `:GEOGRAPHY`
|
720
|
+
# * `:TIMESTAMP`
|
721
|
+
# * `:TIME`
|
722
|
+
# * `:BYTES`
|
723
|
+
# * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
|
724
|
+
# are specified as `[:INT64]`.
|
725
|
+
# * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
|
726
|
+
# match the `params` hash, and the values are the types value that matches the data.
|
727
|
+
#
|
728
|
+
# Types are optional.
|
611
729
|
# @param [Hash<String|Symbol, External::DataSource>] external A Hash
|
612
730
|
# that represents the mapping of the external tables to the table
|
613
731
|
# names used in the SQL query. The hash keys are the table names, and
|
@@ -649,6 +767,8 @@ module Google
|
|
649
767
|
# When set to false, the values of `large_results` and `flatten` are
|
650
768
|
# ignored; the query will be run as if `large_results` is true and
|
651
769
|
# `flatten` is false. Optional. The default value is false.
|
770
|
+
# @param [String] session_id The ID of an existing session. See the
|
771
|
+
# `create_session` param in {#query_job} and {Job#session_id}.
|
652
772
|
# @yield [job] a job configuration object
|
653
773
|
# @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
|
654
774
|
# configuration object for setting additional options for the query.
|
@@ -663,9 +783,12 @@ module Google
|
|
663
783
|
# sql = "SELECT name FROM `my_project.my_dataset.my_table`"
|
664
784
|
# data = bigquery.query sql
|
665
785
|
#
|
786
|
+
# # Iterate over the first page of results
|
666
787
|
# data.each do |row|
|
667
788
|
# puts row[:name]
|
668
789
|
# end
|
790
|
+
# # Retrieve the next page of results
|
791
|
+
# data = data.next if data.next?
|
669
792
|
#
|
670
793
|
# @example Query using legacy SQL:
|
671
794
|
# require "google/cloud/bigquery"
|
@@ -675,9 +798,12 @@ module Google
|
|
675
798
|
# sql = "SELECT name FROM [my_project:my_dataset.my_table]"
|
676
799
|
# data = bigquery.query sql, legacy_sql: true
|
677
800
|
#
|
801
|
+
# # Iterate over the first page of results
|
678
802
|
# data.each do |row|
|
679
803
|
# puts row[:name]
|
680
804
|
# end
|
805
|
+
# # Retrieve the next page of results
|
806
|
+
# data = data.next if data.next?
|
681
807
|
#
|
682
808
|
# @example Retrieve all rows: (See {Data#all})
|
683
809
|
# require "google/cloud/bigquery"
|
@@ -695,28 +821,46 @@ module Google
|
|
695
821
|
#
|
696
822
|
# bigquery = Google::Cloud::Bigquery.new
|
697
823
|
#
|
698
|
-
# data = bigquery.query "SELECT name "
|
699
|
-
# "FROM `my_dataset.my_table`" \
|
700
|
-
# "WHERE id = ?",
|
824
|
+
# data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE id = ?",
|
701
825
|
# params: [1]
|
702
826
|
#
|
827
|
+
# # Iterate over the first page of results
|
703
828
|
# data.each do |row|
|
704
829
|
# puts row[:name]
|
705
830
|
# end
|
831
|
+
# # Retrieve the next page of results
|
832
|
+
# data = data.next if data.next?
|
706
833
|
#
|
707
834
|
# @example Query using named query parameters:
|
708
835
|
# require "google/cloud/bigquery"
|
709
836
|
#
|
710
837
|
# bigquery = Google::Cloud::Bigquery.new
|
711
838
|
#
|
712
|
-
# data = bigquery.query "SELECT name "
|
713
|
-
# "FROM `my_dataset.my_table`" \
|
714
|
-
# "WHERE id = @id",
|
839
|
+
# data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE id = @id",
|
715
840
|
# params: { id: 1 }
|
716
841
|
#
|
842
|
+
# # Iterate over the first page of results
|
717
843
|
# data.each do |row|
|
718
844
|
# puts row[:name]
|
719
845
|
# end
|
846
|
+
# # Retrieve the next page of results
|
847
|
+
# data = data.next if data.next?
|
848
|
+
#
|
849
|
+
# @example Query using named query parameters with types:
|
850
|
+
# require "google/cloud/bigquery"
|
851
|
+
#
|
852
|
+
# bigquery = Google::Cloud::Bigquery.new
|
853
|
+
#
|
854
|
+
# data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE id IN UNNEST(@ids)",
|
855
|
+
# params: { ids: [] },
|
856
|
+
# types: { ids: [:INT64] }
|
857
|
+
#
|
858
|
+
# # Iterate over the first page of results
|
859
|
+
# data.each do |row|
|
860
|
+
# puts row[:name]
|
861
|
+
# end
|
862
|
+
# # Retrieve the next page of results
|
863
|
+
# data = data.next if data.next?
|
720
864
|
#
|
721
865
|
# @example Execute a DDL statement:
|
722
866
|
# require "google/cloud/bigquery"
|
@@ -725,16 +869,14 @@ module Google
|
|
725
869
|
#
|
726
870
|
# data = bigquery.query "CREATE TABLE `my_dataset.my_table` (x INT64)"
|
727
871
|
#
|
728
|
-
# table_ref = data.ddl_target_table
|
872
|
+
# table_ref = data.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
|
729
873
|
#
|
730
874
|
# @example Execute a DML statement:
|
731
875
|
# require "google/cloud/bigquery"
|
732
876
|
#
|
733
877
|
# bigquery = Google::Cloud::Bigquery.new
|
734
878
|
#
|
735
|
-
# data = bigquery.query "UPDATE `my_dataset.my_table` "
|
736
|
-
# "SET x = x + 1 " \
|
737
|
-
# "WHERE x IS NOT NULL"
|
879
|
+
# data = bigquery.query "UPDATE `my_dataset.my_table` SET x = x + 1 WHERE x IS NOT NULL"
|
738
880
|
#
|
739
881
|
# puts data.num_dml_affected_rows
|
740
882
|
#
|
@@ -755,17 +897,36 @@ module Google
|
|
755
897
|
# query.table = dataset.table "my_table", skip_lookup: true
|
756
898
|
# end
|
757
899
|
#
|
900
|
+
# # Iterate over the first page of results
|
758
901
|
# data.each do |row|
|
759
902
|
# puts row[:name]
|
760
903
|
# end
|
761
|
-
#
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
904
|
+
# # Retrieve the next page of results
|
905
|
+
# data = data.next if data.next?
|
906
|
+
#
|
907
|
+
def query query,
|
908
|
+
params: nil,
|
909
|
+
types: nil,
|
910
|
+
external: nil,
|
911
|
+
max: nil,
|
912
|
+
cache: true,
|
913
|
+
dataset: nil,
|
914
|
+
project: nil,
|
915
|
+
standard_sql: nil,
|
916
|
+
legacy_sql: nil,
|
917
|
+
session_id: nil,
|
918
|
+
&block
|
919
|
+
job = query_job query,
|
920
|
+
params: params,
|
921
|
+
types: types,
|
922
|
+
external: external,
|
923
|
+
cache: cache,
|
924
|
+
dataset: dataset,
|
925
|
+
project: project,
|
926
|
+
standard_sql: standard_sql,
|
927
|
+
legacy_sql: legacy_sql,
|
928
|
+
session_id: session_id,
|
929
|
+
&block
|
769
930
|
job.wait_until_done!
|
770
931
|
|
771
932
|
if job.failed?
|
@@ -800,7 +961,7 @@ module Google
|
|
800
961
|
# The following values are supported:
|
801
962
|
#
|
802
963
|
# * `csv` - CSV
|
803
|
-
# * `json` - [Newline-delimited JSON](
|
964
|
+
# * `json` - [Newline-delimited JSON](https://jsonlines.org/)
|
804
965
|
# * `avro` - [Avro](http://avro.apache.org/)
|
805
966
|
# * `sheets` - Google Sheets
|
806
967
|
# * `datastore_backup` - Cloud Datastore backup
|
@@ -822,9 +983,12 @@ module Google
|
|
822
983
|
# data = bigquery.query "SELECT * FROM my_ext_table",
|
823
984
|
# external: { my_ext_table: csv_table }
|
824
985
|
#
|
986
|
+
# # Iterate over the first page of results
|
825
987
|
# data.each do |row|
|
826
988
|
# puts row[:name]
|
827
989
|
# end
|
990
|
+
# # Retrieve the next page of results
|
991
|
+
# data = data.next if data.next?
|
828
992
|
#
|
829
993
|
def external url, format: nil
|
830
994
|
ext = External.from_urls url, format
|
@@ -861,9 +1025,7 @@ module Google
|
|
861
1025
|
#
|
862
1026
|
def dataset dataset_id, skip_lookup: nil
|
863
1027
|
ensure_service!
|
864
|
-
if skip_lookup
|
865
|
-
return Dataset.new_reference project, dataset_id, service
|
866
|
-
end
|
1028
|
+
return Dataset.new_reference project, dataset_id, service if skip_lookup
|
867
1029
|
gapi = service.get_dataset dataset_id
|
868
1030
|
Dataset.from_gapi gapi, service
|
869
1031
|
rescue Google::Cloud::NotFoundError
|
@@ -874,14 +1036,13 @@ module Google
|
|
874
1036
|
# Creates a new dataset.
|
875
1037
|
#
|
876
1038
|
# @param [String] dataset_id A unique ID for this dataset, without the
|
877
|
-
# project name. The ID must contain only letters (
|
878
|
-
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
1039
|
+
# project name. The ID must contain only letters (`[A-Za-z]`), numbers
|
1040
|
+
# (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
|
879
1041
|
# @param [String] name A descriptive name for the dataset.
|
880
1042
|
# @param [String] description A user-friendly description of the
|
881
1043
|
# dataset.
|
882
1044
|
# @param [Integer] expiration The default lifetime of all tables in the
|
883
|
-
# dataset, in milliseconds. The minimum value is
|
884
|
-
# (one hour).
|
1045
|
+
# dataset, in milliseconds. The minimum value is `3_600_000` (one hour).
|
885
1046
|
# @param [String] location The geographic location where the dataset
|
886
1047
|
# should reside. Possible values include `EU` and `US`. The default
|
887
1048
|
# value is `US`.
|
@@ -990,8 +1151,7 @@ module Google
|
|
990
1151
|
#
|
991
1152
|
def datasets all: nil, filter: nil, token: nil, max: nil
|
992
1153
|
ensure_service!
|
993
|
-
|
994
|
-
gapi = service.list_datasets options
|
1154
|
+
gapi = service.list_datasets all: all, filter: filter, token: token, max: max
|
995
1155
|
Dataset::List.from_gapi gapi, service, all, filter, max
|
996
1156
|
end
|
997
1157
|
|
@@ -1029,18 +1189,22 @@ module Google
|
|
1029
1189
|
# part of the larger set of results to view. Optional.
|
1030
1190
|
# @param [Integer] max Maximum number of jobs to return. Optional.
|
1031
1191
|
# @param [String] filter A filter for job state. Optional.
|
1032
|
-
# @param [Time] min_created_at Min value for {Job#created_at}. When
|
1033
|
-
# provided, only jobs created after or at this time are returned.
|
1034
|
-
# Optional.
|
1035
|
-
# @param [Time] max_created_at Max value for {Job#created_at}. When
|
1036
|
-
# provided, only jobs created before or at this time are returned.
|
1037
|
-
# Optional.
|
1038
1192
|
#
|
1039
1193
|
# Acceptable values are:
|
1040
1194
|
#
|
1041
1195
|
# * `done` - Finished jobs
|
1042
1196
|
# * `pending` - Pending jobs
|
1043
1197
|
# * `running` - Running jobs
|
1198
|
+
# @param [Time] min_created_at Min value for {Job#created_at}. When
|
1199
|
+
# provided, only jobs created after or at this time are returned.
|
1200
|
+
# Optional.
|
1201
|
+
# @param [Time] max_created_at Max value for {Job#created_at}. When
|
1202
|
+
# provided, only jobs created before or at this time are returned.
|
1203
|
+
# Optional.
|
1204
|
+
# @param [Google::Cloud::Bigquery::Job, String] parent_job A job
|
1205
|
+
# object or a job ID. If set, retrieve only child jobs of the
|
1206
|
+
# specified parent. Optional. See {Job#job_id}, {Job#num_child_jobs},
|
1207
|
+
# and {Job#parent_job_id}.
|
1044
1208
|
#
|
1045
1209
|
# @return [Array<Google::Cloud::Bigquery::Job>] (See
|
1046
1210
|
# {Google::Cloud::Bigquery::Job::List})
|
@@ -1089,15 +1253,63 @@ module Google
|
|
1089
1253
|
# # process job
|
1090
1254
|
# end
|
1091
1255
|
#
|
1092
|
-
|
1093
|
-
|
1256
|
+
# @example Retrieve child jobs by setting `parent_job`:
|
1257
|
+
# require "google/cloud/bigquery"
|
1258
|
+
#
|
1259
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1260
|
+
#
|
1261
|
+
# multi_statement_sql = <<~SQL
|
1262
|
+
# -- Declare a variable to hold names as an array.
|
1263
|
+
# DECLARE top_names ARRAY<STRING>;
|
1264
|
+
# -- Build an array of the top 100 names from the year 2017.
|
1265
|
+
# SET top_names = (
|
1266
|
+
# SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
|
1267
|
+
# FROM `bigquery-public-data.usa_names.usa_1910_current`
|
1268
|
+
# WHERE year = 2017
|
1269
|
+
# );
|
1270
|
+
# -- Which names appear as words in Shakespeare's plays?
|
1271
|
+
# SELECT
|
1272
|
+
# name AS shakespeare_name
|
1273
|
+
# FROM UNNEST(top_names) AS name
|
1274
|
+
# WHERE name IN (
|
1275
|
+
# SELECT word
|
1276
|
+
# FROM `bigquery-public-data.samples.shakespeare`
|
1277
|
+
# );
|
1278
|
+
# SQL
|
1279
|
+
#
|
1280
|
+
# job = bigquery.query_job multi_statement_sql
|
1281
|
+
#
|
1282
|
+
# job.wait_until_done!
|
1283
|
+
#
|
1284
|
+
# child_jobs = bigquery.jobs parent_job: job
|
1285
|
+
#
|
1286
|
+
# child_jobs.each do |child_job|
|
1287
|
+
# script_statistics = child_job.script_statistics
|
1288
|
+
# puts script_statistics.evaluation_kind
|
1289
|
+
# script_statistics.stack_frames.each do |stack_frame|
|
1290
|
+
# puts stack_frame.text
|
1291
|
+
# end
|
1292
|
+
# end
|
1293
|
+
#
|
1294
|
+
def jobs all: nil,
|
1295
|
+
token: nil,
|
1296
|
+
max: nil,
|
1297
|
+
filter: nil,
|
1298
|
+
min_created_at: nil,
|
1299
|
+
max_created_at: nil,
|
1300
|
+
parent_job: nil
|
1094
1301
|
ensure_service!
|
1302
|
+
parent_job = parent_job.job_id if parent_job.is_a? Job
|
1095
1303
|
options = {
|
1096
|
-
|
1097
|
-
|
1304
|
+
parent_job_id: parent_job,
|
1305
|
+
all: all,
|
1306
|
+
token: token,
|
1307
|
+
max: max, filter: filter,
|
1308
|
+
min_created_at: min_created_at,
|
1309
|
+
max_created_at: max_created_at
|
1098
1310
|
}
|
1099
|
-
gapi = service.list_jobs
|
1100
|
-
Job::List.from_gapi gapi, service, options
|
1311
|
+
gapi = service.list_jobs(**options)
|
1312
|
+
Job::List.from_gapi gapi, service, **options
|
1101
1313
|
end
|
1102
1314
|
|
1103
1315
|
##
|
@@ -1143,8 +1355,7 @@ module Google
|
|
1143
1355
|
#
|
1144
1356
|
def projects token: nil, max: nil
|
1145
1357
|
ensure_service!
|
1146
|
-
|
1147
|
-
gapi = service.list_projects options
|
1358
|
+
gapi = service.list_projects token: token, max: max
|
1148
1359
|
Project::List.from_gapi gapi, service, max
|
1149
1360
|
end
|
1150
1361
|
|
@@ -1165,14 +1376,15 @@ module Google
|
|
1165
1376
|
# bigquery = Google::Cloud::Bigquery.new
|
1166
1377
|
#
|
1167
1378
|
# fourpm = bigquery.time 16, 0, 0
|
1168
|
-
# data = bigquery.query "SELECT name "
|
1169
|
-
# "FROM `my_dataset.my_table`" \
|
1170
|
-
# "WHERE time_of_date = @time",
|
1379
|
+
# data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE time_of_date = @time",
|
1171
1380
|
# params: { time: fourpm }
|
1172
1381
|
#
|
1382
|
+
# # Iterate over the first page of results
|
1173
1383
|
# data.each do |row|
|
1174
1384
|
# puts row[:name]
|
1175
1385
|
# end
|
1386
|
+
# # Retrieve the next page of results
|
1387
|
+
# data = data.next if data.next?
|
1176
1388
|
#
|
1177
1389
|
# @example Create Time with fractional seconds:
|
1178
1390
|
# require "google/cloud/bigquery"
|
@@ -1180,14 +1392,15 @@ module Google
|
|
1180
1392
|
# bigquery = Google::Cloud::Bigquery.new
|
1181
1393
|
#
|
1182
1394
|
# precise_time = bigquery.time 16, 35, 15.376541
|
1183
|
-
# data = bigquery.query "SELECT name "
|
1184
|
-
# "FROM `my_dataset.my_table`" \
|
1185
|
-
# "WHERE time_of_date >= @time",
|
1395
|
+
# data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE time_of_date >= @time",
|
1186
1396
|
# params: { time: precise_time }
|
1187
1397
|
#
|
1398
|
+
# # Iterate over the first page of results
|
1188
1399
|
# data.each do |row|
|
1189
1400
|
# puts row[:name]
|
1190
1401
|
# end
|
1402
|
+
# # Retrieve the next page of results
|
1403
|
+
# data = data.next if data.next?
|
1191
1404
|
#
|
1192
1405
|
def time hour, minute, second
|
1193
1406
|
Bigquery::Time.new "#{hour}:#{minute}:#{second}"
|
@@ -1304,49 +1517,61 @@ module Google
|
|
1304
1517
|
end
|
1305
1518
|
|
1306
1519
|
##
|
1307
|
-
# Extracts the data from
|
1308
|
-
#
|
1309
|
-
#
|
1310
|
-
# calling {Job#reload!} and {Job#done?} to detect when the job
|
1311
|
-
# or simply block until the job is done by calling
|
1520
|
+
# Extracts the data from a table or exports a model to Google Cloud Storage
|
1521
|
+
# asynchronously, immediately returning an {ExtractJob} that can be used to
|
1522
|
+
# track the progress of the export job. The caller may poll the service by
|
1523
|
+
# repeatedly calling {Job#reload!} and {Job#done?} to detect when the job
|
1524
|
+
# is done, or simply block until the job is done by calling
|
1312
1525
|
# #{Job#wait_until_done!}. See {#extract} for the synchronous version.
|
1313
|
-
#
|
1314
|
-
#
|
1526
|
+
#
|
1527
|
+
# Use this method instead of {Table#extract_job} or {Model#extract_job} to
|
1528
|
+
# extract data from source tables or models in other projects.
|
1315
1529
|
#
|
1316
1530
|
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1317
1531
|
# {ExtractJob::Updater#location=} in a block passed to this method.
|
1318
1532
|
#
|
1319
|
-
# @see https://cloud.google.com/bigquery/exporting-data
|
1320
|
-
# Exporting
|
1533
|
+
# @see https://cloud.google.com/bigquery/docs/exporting-data
|
1534
|
+
# Exporting table data
|
1535
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
1536
|
+
# Exporting models
|
1321
1537
|
#
|
1322
|
-
# @param [
|
1323
|
-
#
|
1324
|
-
# [Standard SQL Query
|
1538
|
+
# @param [Table, Model, String] source The source table or model for
|
1539
|
+
# the extract operation. This can be a table or model object; or a
|
1540
|
+
# table ID string as specified by the [Standard SQL Query
|
1325
1541
|
# Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
|
1326
1542
|
# (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
|
1327
1543
|
# Reference](https://cloud.google.com/bigquery/query-reference#from)
|
1328
1544
|
# (`project-name:dataset_id.table_id`).
|
1329
1545
|
# @param [Google::Cloud::Storage::File, String, Array<String>]
|
1330
1546
|
# extract_url The Google Storage file or file URI pattern(s) to which
|
1331
|
-
# BigQuery should extract
|
1332
|
-
#
|
1333
|
-
#
|
1547
|
+
# BigQuery should extract. For a model export this value should be a
|
1548
|
+
# string ending in an object name prefix, since multiple objects will
|
1549
|
+
# be exported.
|
1550
|
+
# @param [String] format The exported file format. The default value for
|
1551
|
+
# tables is `csv`. Tables with nested or repeated fields cannot be
|
1552
|
+
# exported as CSV. The default value for models is `ml_tf_saved_model`.
|
1334
1553
|
#
|
1335
|
-
#
|
1554
|
+
# Supported values for tables:
|
1336
1555
|
#
|
1337
1556
|
# * `csv` - CSV
|
1338
|
-
# * `json` - [Newline-delimited JSON](
|
1557
|
+
# * `json` - [Newline-delimited JSON](https://jsonlines.org/)
|
1339
1558
|
# * `avro` - [Avro](http://avro.apache.org/)
|
1559
|
+
#
|
1560
|
+
# Supported values for models:
|
1561
|
+
#
|
1562
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
1563
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
1340
1564
|
# @param [String] compression The compression type to use for exported
|
1341
1565
|
# files. Possible values include `GZIP` and `NONE`. The default value
|
1342
|
-
# is `NONE`.
|
1566
|
+
# is `NONE`. Not applicable when extracting models.
|
1343
1567
|
# @param [String] delimiter Delimiter to use between fields in the
|
1344
|
-
# exported data. Default is
|
1345
|
-
#
|
1346
|
-
#
|
1568
|
+
# exported table data. Default is `,`. Not applicable when extracting
|
1569
|
+
# models.
|
1570
|
+
# @param [Boolean] header Whether to print out a header row in table
|
1571
|
+
# exports. Default is `true`. Not applicable when extracting models.
|
1347
1572
|
# @param [String] job_id A user-defined ID for the extract job. The ID
|
1348
|
-
# must contain only letters (
|
1349
|
-
# (_), or dashes (
|
1573
|
+
# must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
|
1574
|
+
# (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
|
1350
1575
|
# `job_id` is provided, then `prefix` will not be used.
|
1351
1576
|
#
|
1352
1577
|
# See [Generating a job
|
@@ -1355,48 +1580,65 @@ module Google
|
|
1355
1580
|
# prepended to a generated value to produce a unique job ID. For
|
1356
1581
|
# example, the prefix `daily_import_job_` can be given to generate a
|
1357
1582
|
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
1358
|
-
# prefix must contain only letters (
|
1359
|
-
# underscores (_), or dashes (
|
1583
|
+
# prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
|
1584
|
+
# underscores (`_`), or dashes (`-`). The maximum length of the entire ID
|
1360
1585
|
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
1361
1586
|
# be used.
|
1362
1587
|
# @param [Hash] labels A hash of user-provided labels associated with
|
1363
|
-
# the job. You can use these to organize and group your jobs.
|
1364
|
-
#
|
1365
|
-
#
|
1366
|
-
#
|
1367
|
-
#
|
1368
|
-
#
|
1369
|
-
#
|
1588
|
+
# the job. You can use these to organize and group your jobs.
|
1589
|
+
#
|
1590
|
+
# The labels applied to a resource must meet the following requirements:
|
1591
|
+
#
|
1592
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
1593
|
+
# * Each label must be a key-value pair.
|
1594
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
1595
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
1596
|
+
# a maximum length of 63 characters.
|
1597
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
1598
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
1599
|
+
# international characters are allowed.
|
1600
|
+
# * The key portion of a label must be unique. However, you can use the
|
1601
|
+
# same key with multiple resources.
|
1602
|
+
# * Keys must start with a lowercase letter or international character.
|
1370
1603
|
# @yield [job] a job configuration object
|
1371
1604
|
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
1372
1605
|
# configuration object for setting additional options.
|
1373
1606
|
#
|
1374
1607
|
# @return [Google::Cloud::Bigquery::ExtractJob]
|
1375
1608
|
#
|
1376
|
-
# @example
|
1609
|
+
# @example Export table data
|
1377
1610
|
# require "google/cloud/bigquery"
|
1378
1611
|
#
|
1379
1612
|
# bigquery = Google::Cloud::Bigquery.new
|
1380
1613
|
#
|
1381
1614
|
# table_id = "bigquery-public-data.samples.shakespeare"
|
1382
|
-
# extract_job = bigquery.extract_job table_id,
|
1383
|
-
# "gs://my-bucket/shakespeare.csv"
|
1615
|
+
# extract_job = bigquery.extract_job table_id, "gs://my-bucket/shakespeare.csv"
|
1384
1616
|
# extract_job.wait_until_done!
|
1385
1617
|
# extract_job.done? #=> true
|
1386
1618
|
#
|
1619
|
+
# @example Export a model
|
1620
|
+
# require "google/cloud/bigquery"
|
1621
|
+
#
|
1622
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1623
|
+
# dataset = bigquery.dataset "my_dataset"
|
1624
|
+
# model = dataset.model "my_model"
|
1625
|
+
#
|
1626
|
+
# extract_job = bigquery.extract model, "gs://my-bucket/#{model.model_id}"
|
1627
|
+
#
|
1387
1628
|
# @!group Data
|
1388
1629
|
#
|
1389
|
-
def extract_job
|
1390
|
-
|
1391
|
-
labels: nil
|
1630
|
+
def extract_job source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, job_id: nil,
|
1631
|
+
prefix: nil, labels: nil
|
1392
1632
|
ensure_service!
|
1393
|
-
options = { format: format, compression: compression,
|
1394
|
-
delimiter: delimiter, header: header, job_id: job_id,
|
1633
|
+
options = { format: format, compression: compression, delimiter: delimiter, header: header, job_id: job_id,
|
1395
1634
|
prefix: prefix, labels: labels }
|
1635
|
+
source_ref = if source.respond_to? :model_ref
|
1636
|
+
source.model_ref
|
1637
|
+
else
|
1638
|
+
Service.get_table_ref source, default_ref: project_ref
|
1639
|
+
end
|
1396
1640
|
|
1397
|
-
|
1398
|
-
updater = ExtractJob::Updater.from_options service, table_ref,
|
1399
|
-
extract_url, options
|
1641
|
+
updater = ExtractJob::Updater.from_options service, source_ref, extract_url, options
|
1400
1642
|
|
1401
1643
|
yield updater if block_given?
|
1402
1644
|
|
@@ -1406,51 +1648,63 @@ module Google
|
|
1406
1648
|
end
|
1407
1649
|
|
1408
1650
|
##
|
1409
|
-
# Extracts the data from
|
1410
|
-
#
|
1651
|
+
# Extracts the data from a table or exports a model to Google Cloud Storage
|
1652
|
+
# using a synchronous method that blocks for a response. Timeouts
|
1411
1653
|
# and transient errors are generally handled as needed to complete the
|
1412
|
-
# job. See {#extract_job} for the asynchronous version.
|
1413
|
-
#
|
1414
|
-
#
|
1654
|
+
# job. See {#extract_job} for the asynchronous version.
|
1655
|
+
#
|
1656
|
+
# Use this method instead of {Table#extract} or {Model#extract} to
|
1657
|
+
# extract data from source tables or models in other projects.
|
1415
1658
|
#
|
1416
1659
|
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1417
1660
|
# {ExtractJob::Updater#location=} in a block passed to this method.
|
1418
1661
|
#
|
1419
|
-
# @see https://cloud.google.com/bigquery/exporting-data
|
1420
|
-
# Exporting
|
1662
|
+
# @see https://cloud.google.com/bigquery/docs/exporting-data
|
1663
|
+
# Exporting table data
|
1664
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
1665
|
+
# Exporting models
|
1421
1666
|
#
|
1422
|
-
# @param [
|
1423
|
-
#
|
1424
|
-
# [Standard SQL Query
|
1667
|
+
# @param [Table, Model, String] source The source table or model for
|
1668
|
+
# the extract operation. This can be a table or model object; or a
|
1669
|
+
# table ID string as specified by the [Standard SQL Query
|
1425
1670
|
# Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
|
1426
1671
|
# (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
|
1427
1672
|
# Reference](https://cloud.google.com/bigquery/query-reference#from)
|
1428
1673
|
# (`project-name:dataset_id.table_id`).
|
1429
1674
|
# @param [Google::Cloud::Storage::File, String, Array<String>]
|
1430
1675
|
# extract_url The Google Storage file or file URI pattern(s) to which
|
1431
|
-
# BigQuery should extract
|
1432
|
-
#
|
1433
|
-
#
|
1676
|
+
# BigQuery should extract. For a model export this value should be a
|
1677
|
+
# string ending in an object name prefix, since multiple objects will
|
1678
|
+
# be exported.
|
1679
|
+
# @param [String] format The exported file format. The default value for
|
1680
|
+
# tables is `csv`. Tables with nested or repeated fields cannot be
|
1681
|
+
# exported as CSV. The default value for models is `ml_tf_saved_model`.
|
1434
1682
|
#
|
1435
|
-
#
|
1683
|
+
# Supported values for tables:
|
1436
1684
|
#
|
1437
1685
|
# * `csv` - CSV
|
1438
|
-
# * `json` - [Newline-delimited JSON](
|
1686
|
+
# * `json` - [Newline-delimited JSON](https://jsonlines.org/)
|
1439
1687
|
# * `avro` - [Avro](http://avro.apache.org/)
|
1688
|
+
#
|
1689
|
+
# Supported values for models:
|
1690
|
+
#
|
1691
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
1692
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
1440
1693
|
# @param [String] compression The compression type to use for exported
|
1441
1694
|
# files. Possible values include `GZIP` and `NONE`. The default value
|
1442
|
-
# is `NONE`.
|
1695
|
+
# is `NONE`. Not applicable when extracting models.
|
1443
1696
|
# @param [String] delimiter Delimiter to use between fields in the
|
1444
|
-
# exported data. Default is
|
1445
|
-
#
|
1446
|
-
#
|
1697
|
+
# exported table data. Default is `,`. Not applicable when extracting
|
1698
|
+
# models.
|
1699
|
+
# @param [Boolean] header Whether to print out a header row in table
|
1700
|
+
# exports. Default is `true`. Not applicable when extracting models.
|
1447
1701
|
# @yield [job] a job configuration object
|
1448
1702
|
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
|
1449
1703
|
# configuration object for setting additional options.
|
1450
1704
|
#
|
1451
1705
|
# @return [Boolean] Returns `true` if the extract operation succeeded.
|
1452
1706
|
#
|
1453
|
-
# @example
|
1707
|
+
# @example Export table data
|
1454
1708
|
# require "google/cloud/bigquery"
|
1455
1709
|
#
|
1456
1710
|
# bigquery = Google::Cloud::Bigquery.new
|
@@ -1458,12 +1712,19 @@ module Google
|
|
1458
1712
|
# bigquery.extract "bigquery-public-data.samples.shakespeare",
|
1459
1713
|
# "gs://my-bucket/shakespeare.csv"
|
1460
1714
|
#
|
1715
|
+
# @example Export a model
|
1716
|
+
# require "google/cloud/bigquery"
|
1717
|
+
#
|
1718
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1719
|
+
# dataset = bigquery.dataset "my_dataset"
|
1720
|
+
# model = dataset.model "my_model"
|
1721
|
+
#
|
1722
|
+
# bigquery.extract model, "gs://my-bucket/#{model.model_id}"
|
1723
|
+
#
|
1461
1724
|
# @!group Data
|
1462
1725
|
#
|
1463
|
-
def extract
|
1464
|
-
|
1465
|
-
job = extract_job table,
|
1466
|
-
extract_url,
|
1726
|
+
def extract source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, &block
|
1727
|
+
job = extract_job source, extract_url,
|
1467
1728
|
format: format,
|
1468
1729
|
compression: compression,
|
1469
1730
|
delimiter: delimiter,
|
@@ -1487,9 +1748,7 @@ module Google
|
|
1487
1748
|
|
1488
1749
|
# TODO: remove `Integer` and set normally after migrating to Gax or
|
1489
1750
|
# to google-api-client 0.10 (See google/google-api-ruby-client#439)
|
1490
|
-
if gapi.numeric_id
|
1491
|
-
p.instance_variable_set :@numeric_id, Integer(gapi.numeric_id)
|
1492
|
-
end
|
1751
|
+
p.instance_variable_set :@numeric_id, Integer(gapi.numeric_id) if gapi.numeric_id
|
1493
1752
|
end
|
1494
1753
|
end
|
1495
1754
|
|