google-cloud-bigquery 1.14.0 → 1.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +17 -54
  3. data/CHANGELOG.md +377 -0
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +1 -1
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +155 -173
  10. data/lib/google/cloud/bigquery/copy_job.rb +74 -26
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
  16. data/lib/google/cloud/bigquery/dataset.rb +1044 -287
  17. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  20. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  21. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  22. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  23. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  24. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  25. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  26. data/lib/google/cloud/bigquery/external.rb +50 -2256
  27. data/lib/google/cloud/bigquery/extract_job.rb +226 -61
  28. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  29. data/lib/google/cloud/bigquery/job/list.rb +10 -14
  30. data/lib/google/cloud/bigquery/job.rb +289 -14
  31. data/lib/google/cloud/bigquery/load_job.rb +810 -136
  32. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  33. data/lib/google/cloud/bigquery/model.rb +247 -16
  34. data/lib/google/cloud/bigquery/policy.rb +432 -0
  35. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  36. data/lib/google/cloud/bigquery/project.rb +509 -250
  37. data/lib/google/cloud/bigquery/query_job.rb +594 -128
  38. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  39. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  40. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  41. data/lib/google/cloud/bigquery/schema.rb +221 -48
  42. data/lib/google/cloud/bigquery/service.rb +204 -112
  43. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  44. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
  45. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  46. data/lib/google/cloud/bigquery/table.rb +1470 -377
  47. data/lib/google/cloud/bigquery/time.rb +6 -0
  48. data/lib/google/cloud/bigquery/version.rb +1 -1
  49. data/lib/google/cloud/bigquery.rb +4 -6
  50. data/lib/google-cloud-bigquery.rb +14 -13
  51. metadata +66 -38
@@ -56,7 +56,8 @@ module Google
56
56
  # @private The Service object.
57
57
  attr_accessor :service
58
58
 
59
- attr_reader :name, :numeric_id
59
+ attr_reader :name
60
+ attr_reader :numeric_id
60
61
 
61
62
  ##
62
63
  # Creates a new Service instance.
@@ -91,8 +92,7 @@ module Google
91
92
  # @return [String] The service account email address.
92
93
  #
93
94
  def service_account_email
94
- @service_account_email ||= \
95
- service.project_service_account.email
95
+ @service_account_email ||= service.project_service_account.email
96
96
  end
97
97
 
98
98
  ##
@@ -139,8 +139,8 @@ module Google
139
139
  # * `empty` - An error will be returned if the destination table
140
140
  # already contains data.
141
141
  # @param [String] job_id A user-defined ID for the copy job. The ID
142
- # must contain only letters (a-z, A-Z), numbers (0-9), underscores
143
- # (_), or dashes (-). The maximum length is 1,024 characters. If
142
+ # must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
143
+ # (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
144
144
  # `job_id` is provided, then `prefix` will not be used.
145
145
  #
146
146
  # See [Generating a job
@@ -149,18 +149,26 @@ module Google
149
149
  # prepended to a generated value to produce a unique job ID. For
150
150
  # example, the prefix `daily_import_job_` can be given to generate a
151
151
  # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
152
- # prefix must contain only letters (a-z, A-Z), numbers (0-9),
153
- # underscores (_), or dashes (-). The maximum length of the entire ID
152
+ # prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
153
+ # underscores (`_`), or dashes (`-`). The maximum length of the entire ID
154
154
  # is 1,024 characters. If `job_id` is provided, then `prefix` will not
155
155
  # be used.
156
156
  # @param [Hash] labels A hash of user-provided labels associated with
157
- # the job. You can use these to organize and group your jobs. Label
158
- # keys and values can be no longer than 63 characters, can only
159
- # contain lowercase letters, numeric characters, underscores and
160
- # dashes. International characters are allowed. Label values are
161
- # optional. Label keys must start with a letter and each label in the
162
- # list must have a different key. See [Requirements for
163
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
157
+ # the job. You can use these to organize and group your jobs.
158
+ #
159
+ # The labels applied to a resource must meet the following requirements:
160
+ #
161
+ # * Each resource can have multiple labels, up to a maximum of 64.
162
+ # * Each label must be a key-value pair.
163
+ # * Keys have a minimum length of 1 character and a maximum length of
164
+ # 63 characters, and cannot be empty. Values can be empty, and have
165
+ # a maximum length of 63 characters.
166
+ # * Keys and values can contain only lowercase letters, numeric characters,
167
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
168
+ # international characters are allowed.
169
+ # * The key portion of a label must be unique. However, you can use the
170
+ # same key with multiple resources.
171
+ # * Keys must start with a lowercase letter or international character.
164
172
  # @yield [job] a job configuration object
165
173
  # @yieldparam [Google::Cloud::Bigquery::CopyJob::Updater] job a job
166
174
  # configuration object for setting additional options.
@@ -182,11 +190,9 @@ module Google
182
190
  #
183
191
  # @!group Data
184
192
  #
185
- def copy_job source_table, destination_table, create: nil, write: nil,
186
- job_id: nil, prefix: nil, labels: nil
193
+ def copy_job source_table, destination_table, create: nil, write: nil, job_id: nil, prefix: nil, labels: nil
187
194
  ensure_service!
188
- options = { create: create, write: write, labels: labels,
189
- job_id: job_id, prefix: prefix }
195
+ options = { create: create, write: write, labels: labels, job_id: job_id, prefix: prefix }
190
196
 
191
197
  updater = CopyJob::Updater.from_options(
192
198
  service,
@@ -261,13 +267,8 @@ module Google
261
267
  #
262
268
  # @!group Data
263
269
  #
264
- def copy source_table, destination_table, create: nil, write: nil,
265
- &block
266
- job = copy_job source_table,
267
- destination_table,
268
- create: create,
269
- write: write,
270
- &block
270
+ def copy source_table, destination_table, create: nil, write: nil, &block
271
+ job = copy_job source_table, destination_table, create: create, write: write, &block
271
272
  job.wait_until_done!
272
273
  ensure_job_succeeded! job
273
274
  true
@@ -277,27 +278,6 @@ module Google
277
278
  # Queries data by creating a [query
278
279
  # job](https://cloud.google.com/bigquery/docs/query-overview#query_jobs).
279
280
  #
280
- # When using standard SQL and passing arguments using `params`, Ruby
281
- # types are mapped to BigQuery types as follows:
282
- #
283
- # | BigQuery | Ruby | Notes |
284
- # |-------------|----------------|---|
285
- # | `BOOL` | `true`/`false` | |
286
- # | `INT64` | `Integer` | |
287
- # | `FLOAT64` | `Float` | |
288
- # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
289
- # | `STRING` | `String` | |
290
- # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
291
- # | `DATE` | `Date` | |
292
- # | `TIMESTAMP` | `Time` | |
293
- # | `TIME` | `Google::Cloud::BigQuery::Time` | |
294
- # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
295
- # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
296
- # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
297
- #
298
- # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
299
- # for an overview of each BigQuery data type, including allowed values.
300
- #
301
281
  # The geographic location for the job ("US", "EU", etc.) can be set via
302
282
  # {QueryJob::Updater#location=} in a block passed to this method.
303
283
  #
@@ -305,13 +285,60 @@ module Google
305
285
  # syntax](https://cloud.google.com/bigquery/query-reference), of the
306
286
  # query to execute. Example: "SELECT count(f1) FROM
307
287
  # [myProjectId:myDatasetId.myTableId]".
308
- # @param [Array, Hash] params Standard SQL only. Used to pass query
309
- # arguments when the `query` string contains either positional (`?`)
310
- # or named (`@myparam`) query parameters. If value passed is an array
311
- # `["foo"]`, the query must use positional query parameters. If value
312
- # passed is a hash `{ myparam: "foo" }`, the query must use named
313
- # query parameters. When set, `legacy_sql` will automatically be set
314
- # to false and `standard_sql` to true.
288
+ # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
289
+ # either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
290
+ # query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
291
+ # use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
292
+ # true.
293
+ #
294
+ # BigQuery types are converted from Ruby types as follows:
295
+ #
296
+ # | BigQuery | Ruby | Notes |
297
+ # |--------------|--------------------------------------|----------------------------------------------------|
298
+ # | `BOOL` | `true`/`false` | |
299
+ # | `INT64` | `Integer` | |
300
+ # | `FLOAT64` | `Float` | |
301
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
302
+ # | `BIGNUMERIC` | `BigDecimal` | NOT AUTOMATIC: Must be mapped using `types`, below.|
303
+ # | `STRING` | `String` | |
304
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
305
+ # | `DATE` | `Date` | |
306
+ # | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
307
+ # | `TIMESTAMP` | `Time` | |
308
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
309
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
310
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
311
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
312
+ #
313
+ # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
314
+ # of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
315
+ # GIS data](https://cloud.google.com/bigquery/docs/gis-data).
316
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
317
+ # possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
318
+ # specify the SQL type for these values.
319
+ #
320
+ # Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
321
+ # positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
322
+ # should be BigQuery type codes from the following list:
323
+ #
324
+ # * `:BOOL`
325
+ # * `:INT64`
326
+ # * `:FLOAT64`
327
+ # * `:NUMERIC`
328
+ # * `:BIGNUMERIC`
329
+ # * `:STRING`
330
+ # * `:DATETIME`
331
+ # * `:DATE`
332
+ # * `:GEOGRAPHY`
333
+ # * `:TIMESTAMP`
334
+ # * `:TIME`
335
+ # * `:BYTES`
336
+ # * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
337
+ # are specified as `[:INT64]`.
338
+ # * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
339
+ # match the `params` hash, and the values are the types value that matches the data.
340
+ #
341
+ # Types are optional.
315
342
  # @param [Hash<String|Symbol, External::DataSource>] external A Hash
316
343
  # that represents the mapping of the external tables to the table
317
344
  # names used in the SQL query. The hash keys are the table names, and
@@ -375,13 +402,19 @@ module Google
375
402
  # Flattens all nested and repeated fields in the query results. The
376
403
  # default value is `true`. `large_results` parameter must be `true` if
377
404
  # this is set to `false`.
405
+ # @param [Integer] maximum_billing_tier Limits the billing tier for this
406
+ # job. Queries that have resource usage beyond this tier will fail
407
+ # (without incurring a charge). WARNING: The billed byte amount can be
408
+ # multiplied by an amount up to this number! Most users should not need
409
+ # to alter this setting, and we recommend that you avoid introducing new
410
+ # uses of it. Deprecated.
378
411
  # @param [Integer] maximum_bytes_billed Limits the bytes billed for this
379
412
  # job. Queries that will have bytes billed beyond this limit will fail
380
413
  # (without incurring a charge). Optional. If unspecified, this will be
381
414
  # set to your project default.
382
415
  # @param [String] job_id A user-defined ID for the query job. The ID
383
- # must contain only letters (a-z, A-Z), numbers (0-9), underscores
384
- # (_), or dashes (-). The maximum length is 1,024 characters. If
416
+ # must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
417
+ # (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
385
418
  # `job_id` is provided, then `prefix` will not be used.
386
419
  #
387
420
  # See [Generating a job
@@ -390,30 +423,51 @@ module Google
390
423
  # prepended to a generated value to produce a unique job ID. For
391
424
  # example, the prefix `daily_import_job_` can be given to generate a
392
425
  # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
393
- # prefix must contain only letters (a-z, A-Z), numbers (0-9),
394
- # underscores (_), or dashes (-). The maximum length of the entire ID
426
+ # prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
427
+ # underscores (`_`), or dashes (`-`). The maximum length of the entire ID
395
428
  # is 1,024 characters. If `job_id` is provided, then `prefix` will not
396
429
  # be used.
397
430
  #
398
431
  # See [Generating a job
399
432
  # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
400
433
  # @param [Hash] labels A hash of user-provided labels associated with
401
- # the job. You can use these to organize and group your jobs. Label
402
- # keys and values can be no longer than 63 characters, can only
403
- # contain lowercase letters, numeric characters, underscores and
404
- # dashes. International characters are allowed. Label values are
405
- # optional. Label keys must start with a letter and each label in the
406
- # list must have a different key. See [Requirements for
407
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
434
+ # the job. You can use these to organize and group your jobs.
435
+ #
436
+ # The labels applied to a resource must meet the following requirements:
437
+ #
438
+ # * Each resource can have multiple labels, up to a maximum of 64.
439
+ # * Each label must be a key-value pair.
440
+ # * Keys have a minimum length of 1 character and a maximum length of
441
+ # 63 characters, and cannot be empty. Values can be empty, and have
442
+ # a maximum length of 63 characters.
443
+ # * Keys and values can contain only lowercase letters, numeric characters,
444
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
445
+ # international characters are allowed.
446
+ # * The key portion of a label must be unique. However, you can use the
447
+ # same key with multiple resources.
448
+ # * Keys must start with a lowercase letter or international character.
408
449
  # @param [Array<String>, String] udfs User-defined function resources
409
- # used in the query. May be either a code resource to load from a
410
- # Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
450
+ # used in a legacy SQL query. May be either a code resource to load from
451
+ # a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
411
452
  # that contains code for a user-defined function (UDF). Providing an
412
453
  # inline code resource is equivalent to providing a URI for a file
413
- # containing the same code. See [User-Defined
414
- # Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
415
- # @param [Integer] maximum_billing_tier Deprecated: Change the billing
416
- # tier to allow high-compute queries.
454
+ # containing the same code.
455
+ #
456
+ # This parameter is used for defining User Defined Function (UDF)
457
+ # resources only when using legacy SQL. Users of standard SQL should
458
+ # leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
459
+ # Routines API to define UDF resources.
460
+ #
461
+ # For additional information on migrating, see: [Migrating to
462
+ # standard SQL - Differences in user-defined JavaScript
463
+ # functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
464
+ # @param [Boolean] create_session If true, creates a new session, where the
465
+ # session ID will be a server generated random id. If false, runs query
466
+ # with an existing session ID when one is provided in the `session_id`
467
+ # param, otherwise runs query in non-session mode. See {Job#session_id}.
468
+ # The default value is false.
469
+ # @param [String] session_id The ID of an existing session. See also the
470
+ # `create_session` param and {Job#session_id}.
417
471
  # @yield [job] a job configuration object
418
472
  # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
419
473
  # configuration object for setting query options.
@@ -425,8 +479,7 @@ module Google
425
479
  #
426
480
  # bigquery = Google::Cloud::Bigquery.new
427
481
  #
428
- # job = bigquery.query_job "SELECT name FROM " \
429
- # "`my_project.my_dataset.my_table`"
482
+ # job = bigquery.query_job "SELECT name FROM `my_project.my_dataset.my_table`"
430
483
  #
431
484
  # job.wait_until_done!
432
485
  # if !job.failed?
@@ -440,8 +493,7 @@ module Google
440
493
  #
441
494
  # bigquery = Google::Cloud::Bigquery.new
442
495
  #
443
- # job = bigquery.query_job "SELECT name FROM " \
444
- # " [my_project:my_dataset.my_table]",
496
+ # job = bigquery.query_job "SELECT name FROM [my_project:my_dataset.my_table]",
445
497
  # legacy_sql: true
446
498
  #
447
499
  # job.wait_until_done!
@@ -456,9 +508,7 @@ module Google
456
508
  #
457
509
  # bigquery = Google::Cloud::Bigquery.new
458
510
  #
459
- # job = bigquery.query_job "SELECT name FROM " \
460
- # "`my_dataset.my_table`" \
461
- # " WHERE id = ?",
511
+ # job = bigquery.query_job "SELECT name FROM `my_dataset.my_table` WHERE id = ?",
462
512
  # params: [1]
463
513
  #
464
514
  # job.wait_until_done!
@@ -473,9 +523,7 @@ module Google
473
523
  #
474
524
  # bigquery = Google::Cloud::Bigquery.new
475
525
  #
476
- # job = bigquery.query_job "SELECT name FROM " \
477
- # "`my_dataset.my_table`" \
478
- # " WHERE id = @id",
526
+ # job = bigquery.query_job "SELECT name FROM `my_dataset.my_table` WHERE id = @id",
479
527
  # params: { id: 1 }
480
528
  #
481
529
  # job.wait_until_done!
@@ -485,18 +533,32 @@ module Google
485
533
  # end
486
534
  # end
487
535
  #
536
+ # @example Query using named query parameters with types:
537
+ # require "google/cloud/bigquery"
538
+ #
539
+ # bigquery = Google::Cloud::Bigquery.new
540
+ #
541
+ # job = bigquery.query_job "SELECT name FROM `my_dataset.my_table` WHERE id IN UNNEST(@ids)",
542
+ # params: { ids: [] },
543
+ # types: { ids: [:INT64] }
544
+ #
545
+ # job.wait_until_done!
546
+ # if !job.failed?
547
+ # job.data.each do |row|
548
+ # puts row[:name]
549
+ # end
550
+ # end
551
+ #
488
552
  # @example Execute a DDL statement:
489
553
  # require "google/cloud/bigquery"
490
554
  #
491
555
  # bigquery = Google::Cloud::Bigquery.new
492
556
  #
493
- # job = bigquery.query_job "CREATE TABLE " \
494
- # "`my_dataset.my_table` " \
495
- # "(x INT64)"
557
+ # job = bigquery.query_job "CREATE TABLE`my_dataset.my_table` (x INT64)"
496
558
  #
497
559
  # job.wait_until_done!
498
560
  # if !job.failed?
499
- # table_ref = job.ddl_target_table
561
+ # table_ref = job.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
500
562
  # end
501
563
  #
502
564
  # @example Execute a DML statement:
@@ -504,10 +566,7 @@ module Google
504
566
  #
505
567
  # bigquery = Google::Cloud::Bigquery.new
506
568
  #
507
- # job = bigquery.query_job "UPDATE " \
508
- # "`my_dataset.my_table` " \
509
- # "SET x = x + 1 " \
510
- # "WHERE x IS NOT NULL"
569
+ # job = bigquery.query_job "UPDATE `my_dataset.my_table` SET x = x + 1 WHERE x IS NOT NULL"
511
570
  #
512
571
  # job.wait_until_done!
513
572
  # if !job.failed?
@@ -538,23 +597,56 @@ module Google
538
597
  # end
539
598
  # end
540
599
  #
541
- def query_job query, params: nil, external: nil,
542
- priority: "INTERACTIVE", cache: true, table: nil,
543
- create: nil, write: nil, dryrun: nil, dataset: nil,
544
- project: nil, standard_sql: nil, legacy_sql: nil,
545
- large_results: nil, flatten: nil,
546
- maximum_billing_tier: nil, maximum_bytes_billed: nil,
547
- job_id: nil, prefix: nil, labels: nil, udfs: nil
600
+ def query_job query,
601
+ params: nil,
602
+ types: nil,
603
+ external: nil,
604
+ priority: "INTERACTIVE",
605
+ cache: true,
606
+ table: nil,
607
+ create: nil,
608
+ write: nil,
609
+ dryrun: nil,
610
+ dataset: nil,
611
+ project: nil,
612
+ standard_sql: nil,
613
+ legacy_sql: nil,
614
+ large_results: nil,
615
+ flatten: nil,
616
+ maximum_billing_tier: nil,
617
+ maximum_bytes_billed: nil,
618
+ job_id: nil,
619
+ prefix: nil,
620
+ labels: nil,
621
+ udfs: nil,
622
+ create_session: nil,
623
+ session_id: nil
548
624
  ensure_service!
549
- options = { priority: priority, cache: cache, table: table,
550
- create: create, write: write, dryrun: dryrun,
551
- large_results: large_results, flatten: flatten,
552
- dataset: dataset, project: (project || self.project),
553
- legacy_sql: legacy_sql, standard_sql: standard_sql,
554
- maximum_billing_tier: maximum_billing_tier,
555
- maximum_bytes_billed: maximum_bytes_billed,
556
- external: external, job_id: job_id, prefix: prefix,
557
- labels: labels, udfs: udfs, params: params }
625
+ options = {
626
+ params: params,
627
+ types: types,
628
+ external: external,
629
+ priority: priority,
630
+ cache: cache,
631
+ table: table,
632
+ create: create,
633
+ write: write,
634
+ dryrun: dryrun,
635
+ dataset: dataset,
636
+ project: (project || self.project),
637
+ standard_sql: standard_sql,
638
+ legacy_sql: legacy_sql,
639
+ large_results: large_results,
640
+ flatten: flatten,
641
+ maximum_billing_tier: maximum_billing_tier,
642
+ maximum_bytes_billed: maximum_bytes_billed,
643
+ job_id: job_id,
644
+ prefix: prefix,
645
+ labels: labels,
646
+ udfs: udfs,
647
+ create_session: create_session,
648
+ session_id: session_id
649
+ }
558
650
 
559
651
  updater = QueryJob::Updater.from_options service, query, options
560
652
 
@@ -571,27 +663,6 @@ module Google
571
663
  # as needed to complete the query. When used for executing DDL/DML
572
664
  # statements, this method does not return row data.
573
665
  #
574
- # When using standard SQL and passing arguments using `params`, Ruby
575
- # types are mapped to BigQuery types as follows:
576
- #
577
- # | BigQuery | Ruby | Notes |
578
- # |-------------|----------------|---|
579
- # | `BOOL` | `true`/`false` | |
580
- # | `INT64` | `Integer` | |
581
- # | `FLOAT64` | `Float` | |
582
- # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
583
- # | `STRING` | `String` | |
584
- # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
585
- # | `DATE` | `Date` | |
586
- # | `TIMESTAMP` | `Time` | |
587
- # | `TIME` | `Google::Cloud::BigQuery::Time` | |
588
- # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
589
- # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
590
- # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
591
- #
592
- # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
593
- # for an overview of each BigQuery data type, including allowed values.
594
- #
595
666
  # The geographic location for the job ("US", "EU", etc.) can be set via
596
667
  # {QueryJob::Updater#location=} in a block passed to this method.
597
668
  #
@@ -601,13 +672,60 @@ module Google
601
672
  # syntax](https://cloud.google.com/bigquery/query-reference), of the
602
673
  # query to execute. Example: "SELECT count(f1) FROM
603
674
  # [myProjectId:myDatasetId.myTableId]".
604
- # @param [Array, Hash] params Standard SQL only. Used to pass query
605
- # arguments when the `query` string contains either positional (`?`)
606
- # or named (`@myparam`) query parameters. If value passed is an array
607
- # `["foo"]`, the query must use positional query parameters. If value
608
- # passed is a hash `{ myparam: "foo" }`, the query must use named
609
- # query parameters. When set, `legacy_sql` will automatically be set
610
- # to false and `standard_sql` to true.
675
+ # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
676
+ # either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
677
+ # query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
678
+ # use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
679
+ # true.
680
+ #
681
+ # BigQuery types are converted from Ruby types as follows:
682
+ #
683
+ # | BigQuery | Ruby | Notes |
684
+ # |--------------|--------------------------------------|----------------------------------------------------|
685
+ # | `BOOL` | `true`/`false` | |
686
+ # | `INT64` | `Integer` | |
687
+ # | `FLOAT64` | `Float` | |
688
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
689
+ # | `BIGNUMERIC` | `BigDecimal` | NOT AUTOMATIC: Must be mapped using `types`, below.|
690
+ # | `STRING` | `String` | |
691
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
692
+ # | `DATE` | `Date` | |
693
+ # | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
694
+ # | `TIMESTAMP` | `Time` | |
695
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
696
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
697
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
698
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
699
+ #
700
+ # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
701
+ # of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
702
+ # GIS data](https://cloud.google.com/bigquery/docs/gis-data).
703
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
704
+ # possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
705
+ # specify the SQL type for these values.
706
+ #
707
+ # Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
708
+ # positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
709
+ # should be BigQuery type codes from the following list:
710
+ #
711
+ # * `:BOOL`
712
+ # * `:INT64`
713
+ # * `:FLOAT64`
714
+ # * `:NUMERIC`
715
+ # * `:BIGNUMERIC`
716
+ # * `:STRING`
717
+ # * `:DATETIME`
718
+ # * `:DATE`
719
+ # * `:GEOGRAPHY`
720
+ # * `:TIMESTAMP`
721
+ # * `:TIME`
722
+ # * `:BYTES`
723
+ # * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
724
+ # are specified as `[:INT64]`.
725
+ # * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
726
+ # match the `params` hash, and the values are the types value that matches the data.
727
+ #
728
+ # Types are optional.
611
729
  # @param [Hash<String|Symbol, External::DataSource>] external A Hash
612
730
  # that represents the mapping of the external tables to the table
613
731
  # names used in the SQL query. The hash keys are the table names, and
@@ -649,6 +767,8 @@ module Google
649
767
  # When set to false, the values of `large_results` and `flatten` are
650
768
  # ignored; the query will be run as if `large_results` is true and
651
769
  # `flatten` is false. Optional. The default value is false.
770
+ # @param [String] session_id The ID of an existing session. See the
771
+ # `create_session` param in {#query_job} and {Job#session_id}.
652
772
  # @yield [job] a job configuration object
653
773
  # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
654
774
  # configuration object for setting additional options for the query.
@@ -663,9 +783,12 @@ module Google
663
783
  # sql = "SELECT name FROM `my_project.my_dataset.my_table`"
664
784
  # data = bigquery.query sql
665
785
  #
786
+ # # Iterate over the first page of results
666
787
  # data.each do |row|
667
788
  # puts row[:name]
668
789
  # end
790
+ # # Retrieve the next page of results
791
+ # data = data.next if data.next?
669
792
  #
670
793
  # @example Query using legacy SQL:
671
794
  # require "google/cloud/bigquery"
@@ -675,9 +798,12 @@ module Google
675
798
  # sql = "SELECT name FROM [my_project:my_dataset.my_table]"
676
799
  # data = bigquery.query sql, legacy_sql: true
677
800
  #
801
+ # # Iterate over the first page of results
678
802
  # data.each do |row|
679
803
  # puts row[:name]
680
804
  # end
805
+ # # Retrieve the next page of results
806
+ # data = data.next if data.next?
681
807
  #
682
808
  # @example Retrieve all rows: (See {Data#all})
683
809
  # require "google/cloud/bigquery"
@@ -695,28 +821,46 @@ module Google
695
821
  #
696
822
  # bigquery = Google::Cloud::Bigquery.new
697
823
  #
698
- # data = bigquery.query "SELECT name " \
699
- # "FROM `my_dataset.my_table`" \
700
- # "WHERE id = ?",
824
+ # data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE id = ?",
701
825
  # params: [1]
702
826
  #
827
+ # # Iterate over the first page of results
703
828
  # data.each do |row|
704
829
  # puts row[:name]
705
830
  # end
831
+ # # Retrieve the next page of results
832
+ # data = data.next if data.next?
706
833
  #
707
834
  # @example Query using named query parameters:
708
835
  # require "google/cloud/bigquery"
709
836
  #
710
837
  # bigquery = Google::Cloud::Bigquery.new
711
838
  #
712
- # data = bigquery.query "SELECT name " \
713
- # "FROM `my_dataset.my_table`" \
714
- # "WHERE id = @id",
839
+ # data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE id = @id",
715
840
  # params: { id: 1 }
716
841
  #
842
+ # # Iterate over the first page of results
717
843
  # data.each do |row|
718
844
  # puts row[:name]
719
845
  # end
846
+ # # Retrieve the next page of results
847
+ # data = data.next if data.next?
848
+ #
849
+ # @example Query using named query parameters with types:
850
+ # require "google/cloud/bigquery"
851
+ #
852
+ # bigquery = Google::Cloud::Bigquery.new
853
+ #
854
+ # data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE id IN UNNEST(@ids)",
855
+ # params: { ids: [] },
856
+ # types: { ids: [:INT64] }
857
+ #
858
+ # # Iterate over the first page of results
859
+ # data.each do |row|
860
+ # puts row[:name]
861
+ # end
862
+ # # Retrieve the next page of results
863
+ # data = data.next if data.next?
720
864
  #
721
865
  # @example Execute a DDL statement:
722
866
  # require "google/cloud/bigquery"
@@ -725,16 +869,14 @@ module Google
725
869
  #
726
870
  # data = bigquery.query "CREATE TABLE `my_dataset.my_table` (x INT64)"
727
871
  #
728
- # table_ref = data.ddl_target_table
872
+ # table_ref = data.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
729
873
  #
730
874
  # @example Execute a DML statement:
731
875
  # require "google/cloud/bigquery"
732
876
  #
733
877
  # bigquery = Google::Cloud::Bigquery.new
734
878
  #
735
- # data = bigquery.query "UPDATE `my_dataset.my_table` " \
736
- # "SET x = x + 1 " \
737
- # "WHERE x IS NOT NULL"
879
+ # data = bigquery.query "UPDATE `my_dataset.my_table` SET x = x + 1 WHERE x IS NOT NULL"
738
880
  #
739
881
  # puts data.num_dml_affected_rows
740
882
  #
@@ -755,17 +897,36 @@ module Google
755
897
  # query.table = dataset.table "my_table", skip_lookup: true
756
898
  # end
757
899
  #
900
+ # # Iterate over the first page of results
758
901
  # data.each do |row|
759
902
  # puts row[:name]
760
903
  # end
761
- #
762
- def query query, params: nil, external: nil, max: nil, cache: true,
763
- dataset: nil, project: nil, standard_sql: nil,
764
- legacy_sql: nil, &block
765
- job = query_job query, params: params, external: external,
766
- cache: cache, dataset: dataset,
767
- project: project, standard_sql: standard_sql,
768
- legacy_sql: legacy_sql, &block
904
+ # # Retrieve the next page of results
905
+ # data = data.next if data.next?
906
+ #
907
+ def query query,
908
+ params: nil,
909
+ types: nil,
910
+ external: nil,
911
+ max: nil,
912
+ cache: true,
913
+ dataset: nil,
914
+ project: nil,
915
+ standard_sql: nil,
916
+ legacy_sql: nil,
917
+ session_id: nil,
918
+ &block
919
+ job = query_job query,
920
+ params: params,
921
+ types: types,
922
+ external: external,
923
+ cache: cache,
924
+ dataset: dataset,
925
+ project: project,
926
+ standard_sql: standard_sql,
927
+ legacy_sql: legacy_sql,
928
+ session_id: session_id,
929
+ &block
769
930
  job.wait_until_done!
770
931
 
771
932
  if job.failed?
@@ -800,7 +961,7 @@ module Google
800
961
  # The following values are supported:
801
962
  #
802
963
  # * `csv` - CSV
803
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
964
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
804
965
  # * `avro` - [Avro](http://avro.apache.org/)
805
966
  # * `sheets` - Google Sheets
806
967
  # * `datastore_backup` - Cloud Datastore backup
@@ -822,9 +983,12 @@ module Google
822
983
  # data = bigquery.query "SELECT * FROM my_ext_table",
823
984
  # external: { my_ext_table: csv_table }
824
985
  #
986
+ # # Iterate over the first page of results
825
987
  # data.each do |row|
826
988
  # puts row[:name]
827
989
  # end
990
+ # # Retrieve the next page of results
991
+ # data = data.next if data.next?
828
992
  #
829
993
  def external url, format: nil
830
994
  ext = External.from_urls url, format
@@ -861,9 +1025,7 @@ module Google
861
1025
  #
862
1026
  def dataset dataset_id, skip_lookup: nil
863
1027
  ensure_service!
864
- if skip_lookup
865
- return Dataset.new_reference project, dataset_id, service
866
- end
1028
+ return Dataset.new_reference project, dataset_id, service if skip_lookup
867
1029
  gapi = service.get_dataset dataset_id
868
1030
  Dataset.from_gapi gapi, service
869
1031
  rescue Google::Cloud::NotFoundError
@@ -874,14 +1036,13 @@ module Google
874
1036
  # Creates a new dataset.
875
1037
  #
876
1038
  # @param [String] dataset_id A unique ID for this dataset, without the
877
- # project name. The ID must contain only letters (a-z, A-Z), numbers
878
- # (0-9), or underscores (_). The maximum length is 1,024 characters.
1039
+ # project name. The ID must contain only letters (`[A-Za-z]`), numbers
1040
+ # (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
879
1041
  # @param [String] name A descriptive name for the dataset.
880
1042
  # @param [String] description A user-friendly description of the
881
1043
  # dataset.
882
1044
  # @param [Integer] expiration The default lifetime of all tables in the
883
- # dataset, in milliseconds. The minimum value is 3600000 milliseconds
884
- # (one hour).
1045
+ # dataset, in milliseconds. The minimum value is `3_600_000` (one hour).
885
1046
  # @param [String] location The geographic location where the dataset
886
1047
  # should reside. Possible values include `EU` and `US`. The default
887
1048
  # value is `US`.
@@ -990,8 +1151,7 @@ module Google
990
1151
  #
991
1152
  def datasets all: nil, filter: nil, token: nil, max: nil
992
1153
  ensure_service!
993
- options = { all: all, filter: filter, token: token, max: max }
994
- gapi = service.list_datasets options
1154
+ gapi = service.list_datasets all: all, filter: filter, token: token, max: max
995
1155
  Dataset::List.from_gapi gapi, service, all, filter, max
996
1156
  end
997
1157
 
@@ -1029,18 +1189,22 @@ module Google
1029
1189
  # part of the larger set of results to view. Optional.
1030
1190
  # @param [Integer] max Maximum number of jobs to return. Optional.
1031
1191
  # @param [String] filter A filter for job state. Optional.
1032
- # @param [Time] min_created_at Min value for {Job#created_at}. When
1033
- # provided, only jobs created after or at this time are returned.
1034
- # Optional.
1035
- # @param [Time] max_created_at Max value for {Job#created_at}. When
1036
- # provided, only jobs created before or at this time are returned.
1037
- # Optional.
1038
1192
  #
1039
1193
  # Acceptable values are:
1040
1194
  #
1041
1195
  # * `done` - Finished jobs
1042
1196
  # * `pending` - Pending jobs
1043
1197
  # * `running` - Running jobs
1198
+ # @param [Time] min_created_at Min value for {Job#created_at}. When
1199
+ # provided, only jobs created after or at this time are returned.
1200
+ # Optional.
1201
+ # @param [Time] max_created_at Max value for {Job#created_at}. When
1202
+ # provided, only jobs created before or at this time are returned.
1203
+ # Optional.
1204
+ # @param [Google::Cloud::Bigquery::Job, String] parent_job A job
1205
+ # object or a job ID. If set, retrieve only child jobs of the
1206
+ # specified parent. Optional. See {Job#job_id}, {Job#num_child_jobs},
1207
+ # and {Job#parent_job_id}.
1044
1208
  #
1045
1209
  # @return [Array<Google::Cloud::Bigquery::Job>] (See
1046
1210
  # {Google::Cloud::Bigquery::Job::List})
@@ -1089,15 +1253,63 @@ module Google
1089
1253
  # # process job
1090
1254
  # end
1091
1255
  #
1092
- def jobs all: nil, token: nil, max: nil, filter: nil,
1093
- min_created_at: nil, max_created_at: nil
1256
+ # @example Retrieve child jobs by setting `parent_job`:
1257
+ # require "google/cloud/bigquery"
1258
+ #
1259
+ # bigquery = Google::Cloud::Bigquery.new
1260
+ #
1261
+ # multi_statement_sql = <<~SQL
1262
+ # -- Declare a variable to hold names as an array.
1263
+ # DECLARE top_names ARRAY<STRING>;
1264
+ # -- Build an array of the top 100 names from the year 2017.
1265
+ # SET top_names = (
1266
+ # SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
1267
+ # FROM `bigquery-public-data.usa_names.usa_1910_current`
1268
+ # WHERE year = 2017
1269
+ # );
1270
+ # -- Which names appear as words in Shakespeare's plays?
1271
+ # SELECT
1272
+ # name AS shakespeare_name
1273
+ # FROM UNNEST(top_names) AS name
1274
+ # WHERE name IN (
1275
+ # SELECT word
1276
+ # FROM `bigquery-public-data.samples.shakespeare`
1277
+ # );
1278
+ # SQL
1279
+ #
1280
+ # job = bigquery.query_job multi_statement_sql
1281
+ #
1282
+ # job.wait_until_done!
1283
+ #
1284
+ # child_jobs = bigquery.jobs parent_job: job
1285
+ #
1286
+ # child_jobs.each do |child_job|
1287
+ # script_statistics = child_job.script_statistics
1288
+ # puts script_statistics.evaluation_kind
1289
+ # script_statistics.stack_frames.each do |stack_frame|
1290
+ # puts stack_frame.text
1291
+ # end
1292
+ # end
1293
+ #
1294
+ def jobs all: nil,
1295
+ token: nil,
1296
+ max: nil,
1297
+ filter: nil,
1298
+ min_created_at: nil,
1299
+ max_created_at: nil,
1300
+ parent_job: nil
1094
1301
  ensure_service!
1302
+ parent_job = parent_job.job_id if parent_job.is_a? Job
1095
1303
  options = {
1096
- all: all, token: token, max: max, filter: filter,
1097
- min_created_at: min_created_at, max_created_at: max_created_at
1304
+ parent_job_id: parent_job,
1305
+ all: all,
1306
+ token: token,
1307
+ max: max, filter: filter,
1308
+ min_created_at: min_created_at,
1309
+ max_created_at: max_created_at
1098
1310
  }
1099
- gapi = service.list_jobs options
1100
- Job::List.from_gapi gapi, service, options
1311
+ gapi = service.list_jobs(**options)
1312
+ Job::List.from_gapi gapi, service, **options
1101
1313
  end
1102
1314
 
1103
1315
  ##
@@ -1143,8 +1355,7 @@ module Google
1143
1355
  #
1144
1356
  def projects token: nil, max: nil
1145
1357
  ensure_service!
1146
- options = { token: token, max: max }
1147
- gapi = service.list_projects options
1358
+ gapi = service.list_projects token: token, max: max
1148
1359
  Project::List.from_gapi gapi, service, max
1149
1360
  end
1150
1361
 
@@ -1165,14 +1376,15 @@ module Google
1165
1376
  # bigquery = Google::Cloud::Bigquery.new
1166
1377
  #
1167
1378
  # fourpm = bigquery.time 16, 0, 0
1168
- # data = bigquery.query "SELECT name " \
1169
- # "FROM `my_dataset.my_table`" \
1170
- # "WHERE time_of_date = @time",
1379
+ # data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE time_of_date = @time",
1171
1380
  # params: { time: fourpm }
1172
1381
  #
1382
+ # # Iterate over the first page of results
1173
1383
  # data.each do |row|
1174
1384
  # puts row[:name]
1175
1385
  # end
1386
+ # # Retrieve the next page of results
1387
+ # data = data.next if data.next?
1176
1388
  #
1177
1389
  # @example Create Time with fractional seconds:
1178
1390
  # require "google/cloud/bigquery"
@@ -1180,14 +1392,15 @@ module Google
1180
1392
  # bigquery = Google::Cloud::Bigquery.new
1181
1393
  #
1182
1394
  # precise_time = bigquery.time 16, 35, 15.376541
1183
- # data = bigquery.query "SELECT name " \
1184
- # "FROM `my_dataset.my_table`" \
1185
- # "WHERE time_of_date >= @time",
1395
+ # data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE time_of_date >= @time",
1186
1396
  # params: { time: precise_time }
1187
1397
  #
1398
+ # # Iterate over the first page of results
1188
1399
  # data.each do |row|
1189
1400
  # puts row[:name]
1190
1401
  # end
1402
+ # # Retrieve the next page of results
1403
+ # data = data.next if data.next?
1191
1404
  #
1192
1405
  def time hour, minute, second
1193
1406
  Bigquery::Time.new "#{hour}:#{minute}:#{second}"
@@ -1304,49 +1517,61 @@ module Google
1304
1517
  end
1305
1518
 
1306
1519
  ##
1307
- # Extracts the data from the provided table to a Google Cloud Storage
1308
- # file using an asynchronous method. In this method, an {ExtractJob} is
1309
- # immediately returned. The caller may poll the service by repeatedly
1310
- # calling {Job#reload!} and {Job#done?} to detect when the job is done,
1311
- # or simply block until the job is done by calling
1520
+ # Extracts the data from a table or exports a model to Google Cloud Storage
1521
+ # asynchronously, immediately returning an {ExtractJob} that can be used to
1522
+ # track the progress of the export job. The caller may poll the service by
1523
+ # repeatedly calling {Job#reload!} and {Job#done?} to detect when the job
1524
+ # is done, or simply block until the job is done by calling
1312
1525
  # #{Job#wait_until_done!}. See {#extract} for the synchronous version.
1313
- # Use this method instead of {Table#extract_job} to extract data from
1314
- # source tables in other projects.
1526
+ #
1527
+ # Use this method instead of {Table#extract_job} or {Model#extract_job} to
1528
+ # extract data from source tables or models in other projects.
1315
1529
  #
1316
1530
  # The geographic location for the job ("US", "EU", etc.) can be set via
1317
1531
  # {ExtractJob::Updater#location=} in a block passed to this method.
1318
1532
  #
1319
- # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
1320
- # Exporting Data From BigQuery
1533
+ # @see https://cloud.google.com/bigquery/docs/exporting-data
1534
+ # Exporting table data
1535
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
1536
+ # Exporting models
1321
1537
  #
1322
- # @param [String, Table] table The source table from which to extract
1323
- # data. This can be a table object; or a string ID as specified by the
1324
- # [Standard SQL Query
1538
+ # @param [Table, Model, String] source The source table or model for
1539
+ # the extract operation. This can be a table or model object; or a
1540
+ # table ID string as specified by the [Standard SQL Query
1325
1541
  # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
1326
1542
  # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
1327
1543
  # Reference](https://cloud.google.com/bigquery/query-reference#from)
1328
1544
  # (`project-name:dataset_id.table_id`).
1329
1545
  # @param [Google::Cloud::Storage::File, String, Array<String>]
1330
1546
  # extract_url The Google Storage file or file URI pattern(s) to which
1331
- # BigQuery should extract the table data.
1332
- # @param [String] format The exported file format. The default value is
1333
- # `csv`.
1547
+ # BigQuery should extract. For a model export this value should be a
1548
+ # string ending in an object name prefix, since multiple objects will
1549
+ # be exported.
1550
+ # @param [String] format The exported file format. The default value for
1551
+ # tables is `csv`. Tables with nested or repeated fields cannot be
1552
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
1334
1553
  #
1335
- # The following values are supported:
1554
+ # Supported values for tables:
1336
1555
  #
1337
1556
  # * `csv` - CSV
1338
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1557
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
1339
1558
  # * `avro` - [Avro](http://avro.apache.org/)
1559
+ #
1560
+ # Supported values for models:
1561
+ #
1562
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
1563
+ # * `ml_xgboost_booster` - XGBoost Booster
1340
1564
  # @param [String] compression The compression type to use for exported
1341
1565
  # files. Possible values include `GZIP` and `NONE`. The default value
1342
- # is `NONE`.
1566
+ # is `NONE`. Not applicable when extracting models.
1343
1567
  # @param [String] delimiter Delimiter to use between fields in the
1344
- # exported data. Default is <code>,</code>.
1345
- # @param [Boolean] header Whether to print out a header row in the
1346
- # results. Default is `true`.
1568
+ # exported table data. Default is `,`. Not applicable when extracting
1569
+ # models.
1570
+ # @param [Boolean] header Whether to print out a header row in table
1571
+ # exports. Default is `true`. Not applicable when extracting models.
1347
1572
  # @param [String] job_id A user-defined ID for the extract job. The ID
1348
- # must contain only letters (a-z, A-Z), numbers (0-9), underscores
1349
- # (_), or dashes (-). The maximum length is 1,024 characters. If
1573
+ # must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
1574
+ # (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
1350
1575
  # `job_id` is provided, then `prefix` will not be used.
1351
1576
  #
1352
1577
  # See [Generating a job
@@ -1355,48 +1580,65 @@ module Google
1355
1580
  # prepended to a generated value to produce a unique job ID. For
1356
1581
  # example, the prefix `daily_import_job_` can be given to generate a
1357
1582
  # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
1358
- # prefix must contain only letters (a-z, A-Z), numbers (0-9),
1359
- # underscores (_), or dashes (-). The maximum length of the entire ID
1583
+ # prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
1584
+ # underscores (`_`), or dashes (`-`). The maximum length of the entire ID
1360
1585
  # is 1,024 characters. If `job_id` is provided, then `prefix` will not
1361
1586
  # be used.
1362
1587
  # @param [Hash] labels A hash of user-provided labels associated with
1363
- # the job. You can use these to organize and group your jobs. Label
1364
- # keys and values can be no longer than 63 characters, can only
1365
- # contain lowercase letters, numeric characters, underscores and
1366
- # dashes. International characters are allowed. Label values are
1367
- # optional. Label keys must start with a letter and each label in the
1368
- # list must have a different key. See [Requirements for
1369
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
1588
+ # the job. You can use these to organize and group your jobs.
1589
+ #
1590
+ # The labels applied to a resource must meet the following requirements:
1591
+ #
1592
+ # * Each resource can have multiple labels, up to a maximum of 64.
1593
+ # * Each label must be a key-value pair.
1594
+ # * Keys have a minimum length of 1 character and a maximum length of
1595
+ # 63 characters, and cannot be empty. Values can be empty, and have
1596
+ # a maximum length of 63 characters.
1597
+ # * Keys and values can contain only lowercase letters, numeric characters,
1598
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
1599
+ # international characters are allowed.
1600
+ # * The key portion of a label must be unique. However, you can use the
1601
+ # same key with multiple resources.
1602
+ # * Keys must start with a lowercase letter or international character.
1370
1603
  # @yield [job] a job configuration object
1371
1604
  # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
1372
1605
  # configuration object for setting additional options.
1373
1606
  #
1374
1607
  # @return [Google::Cloud::Bigquery::ExtractJob]
1375
1608
  #
1376
- # @example
1609
+ # @example Export table data
1377
1610
  # require "google/cloud/bigquery"
1378
1611
  #
1379
1612
  # bigquery = Google::Cloud::Bigquery.new
1380
1613
  #
1381
1614
  # table_id = "bigquery-public-data.samples.shakespeare"
1382
- # extract_job = bigquery.extract_job table_id,
1383
- # "gs://my-bucket/shakespeare.csv"
1615
+ # extract_job = bigquery.extract_job table_id, "gs://my-bucket/shakespeare.csv"
1384
1616
  # extract_job.wait_until_done!
1385
1617
  # extract_job.done? #=> true
1386
1618
  #
1619
+ # @example Export a model
1620
+ # require "google/cloud/bigquery"
1621
+ #
1622
+ # bigquery = Google::Cloud::Bigquery.new
1623
+ # dataset = bigquery.dataset "my_dataset"
1624
+ # model = dataset.model "my_model"
1625
+ #
1626
+ # extract_job = bigquery.extract model, "gs://my-bucket/#{model.model_id}"
1627
+ #
1387
1628
  # @!group Data
1388
1629
  #
1389
- def extract_job table, extract_url, format: nil, compression: nil,
1390
- delimiter: nil, header: nil, job_id: nil, prefix: nil,
1391
- labels: nil
1630
+ def extract_job source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, job_id: nil,
1631
+ prefix: nil, labels: nil
1392
1632
  ensure_service!
1393
- options = { format: format, compression: compression,
1394
- delimiter: delimiter, header: header, job_id: job_id,
1633
+ options = { format: format, compression: compression, delimiter: delimiter, header: header, job_id: job_id,
1395
1634
  prefix: prefix, labels: labels }
1635
+ source_ref = if source.respond_to? :model_ref
1636
+ source.model_ref
1637
+ else
1638
+ Service.get_table_ref source, default_ref: project_ref
1639
+ end
1396
1640
 
1397
- table_ref = Service.get_table_ref table, default_ref: project_ref
1398
- updater = ExtractJob::Updater.from_options service, table_ref,
1399
- extract_url, options
1641
+ updater = ExtractJob::Updater.from_options service, source_ref, extract_url, options
1400
1642
 
1401
1643
  yield updater if block_given?
1402
1644
 
@@ -1406,51 +1648,63 @@ module Google
1406
1648
  end
1407
1649
 
1408
1650
  ##
1409
- # Extracts the data from the provided table to a Google Cloud Storage
1410
- # file using a synchronous method that blocks for a response. Timeouts
1651
+ # Extracts the data from a table or exports a model to Google Cloud Storage
1652
+ # using a synchronous method that blocks for a response. Timeouts
1411
1653
  # and transient errors are generally handled as needed to complete the
1412
- # job. See {#extract_job} for the asynchronous version. Use this method
1413
- # instead of {Table#extract} to extract data from source tables in other
1414
- # projects.
1654
+ # job. See {#extract_job} for the asynchronous version.
1655
+ #
1656
+ # Use this method instead of {Table#extract} or {Model#extract} to
1657
+ # extract data from source tables or models in other projects.
1415
1658
  #
1416
1659
  # The geographic location for the job ("US", "EU", etc.) can be set via
1417
1660
  # {ExtractJob::Updater#location=} in a block passed to this method.
1418
1661
  #
1419
- # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
1420
- # Exporting Data From BigQuery
1662
+ # @see https://cloud.google.com/bigquery/docs/exporting-data
1663
+ # Exporting table data
1664
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
1665
+ # Exporting models
1421
1666
  #
1422
- # @param [String, Table] table The source table from which to extract
1423
- # data. This can be a table object; or a string ID as specified by the
1424
- # [Standard SQL Query
1667
+ # @param [Table, Model, String] source The source table or model for
1668
+ # the extract operation. This can be a table or model object; or a
1669
+ # table ID string as specified by the [Standard SQL Query
1425
1670
  # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
1426
1671
  # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
1427
1672
  # Reference](https://cloud.google.com/bigquery/query-reference#from)
1428
1673
  # (`project-name:dataset_id.table_id`).
1429
1674
  # @param [Google::Cloud::Storage::File, String, Array<String>]
1430
1675
  # extract_url The Google Storage file or file URI pattern(s) to which
1431
- # BigQuery should extract the table data.
1432
- # @param [String] format The exported file format. The default value is
1433
- # `csv`.
1676
+ # BigQuery should extract. For a model export this value should be a
1677
+ # string ending in an object name prefix, since multiple objects will
1678
+ # be exported.
1679
+ # @param [String] format The exported file format. The default value for
1680
+ # tables is `csv`. Tables with nested or repeated fields cannot be
1681
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
1434
1682
  #
1435
- # The following values are supported:
1683
+ # Supported values for tables:
1436
1684
  #
1437
1685
  # * `csv` - CSV
1438
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1686
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
1439
1687
  # * `avro` - [Avro](http://avro.apache.org/)
1688
+ #
1689
+ # Supported values for models:
1690
+ #
1691
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
1692
+ # * `ml_xgboost_booster` - XGBoost Booster
1440
1693
  # @param [String] compression The compression type to use for exported
1441
1694
  # files. Possible values include `GZIP` and `NONE`. The default value
1442
- # is `NONE`.
1695
+ # is `NONE`. Not applicable when extracting models.
1443
1696
  # @param [String] delimiter Delimiter to use between fields in the
1444
- # exported data. Default is <code>,</code>.
1445
- # @param [Boolean] header Whether to print out a header row in the
1446
- # results. Default is `true`.
1697
+ # exported table data. Default is `,`. Not applicable when extracting
1698
+ # models.
1699
+ # @param [Boolean] header Whether to print out a header row in table
1700
+ # exports. Default is `true`. Not applicable when extracting models.
1447
1701
  # @yield [job] a job configuration object
1448
1702
  # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
1449
1703
  # configuration object for setting additional options.
1450
1704
  #
1451
1705
  # @return [Boolean] Returns `true` if the extract operation succeeded.
1452
1706
  #
1453
- # @example
1707
+ # @example Export table data
1454
1708
  # require "google/cloud/bigquery"
1455
1709
  #
1456
1710
  # bigquery = Google::Cloud::Bigquery.new
@@ -1458,12 +1712,19 @@ module Google
1458
1712
  # bigquery.extract "bigquery-public-data.samples.shakespeare",
1459
1713
  # "gs://my-bucket/shakespeare.csv"
1460
1714
  #
1715
+ # @example Export a model
1716
+ # require "google/cloud/bigquery"
1717
+ #
1718
+ # bigquery = Google::Cloud::Bigquery.new
1719
+ # dataset = bigquery.dataset "my_dataset"
1720
+ # model = dataset.model "my_model"
1721
+ #
1722
+ # bigquery.extract model, "gs://my-bucket/#{model.model_id}"
1723
+ #
1461
1724
  # @!group Data
1462
1725
  #
1463
- def extract table, extract_url, format: nil, compression: nil,
1464
- delimiter: nil, header: nil, &block
1465
- job = extract_job table,
1466
- extract_url,
1726
+ def extract source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, &block
1727
+ job = extract_job source, extract_url,
1467
1728
  format: format,
1468
1729
  compression: compression,
1469
1730
  delimiter: delimiter,
@@ -1487,9 +1748,7 @@ module Google
1487
1748
 
1488
1749
  # TODO: remove `Integer` and set normally after migrating to Gax or
1489
1750
  # to google-api-client 0.10 (See google/google-api-ruby-client#439)
1490
- if gapi.numeric_id
1491
- p.instance_variable_set :@numeric_id, Integer(gapi.numeric_id)
1492
- end
1751
+ p.instance_variable_set :@numeric_id, Integer(gapi.numeric_id) if gapi.numeric_id
1493
1752
  end
1494
1753
  end
1495
1754