google-cloud-bigquery 1.12.0 → 1.38.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +9 -28
  3. data/CHANGELOG.md +372 -1
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +2 -2
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +154 -170
  10. data/lib/google/cloud/bigquery/copy_job.rb +40 -23
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +322 -51
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset.rb +960 -279
  16. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  17. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  20. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  21. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  22. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  23. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  24. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  25. data/lib/google/cloud/bigquery/external.rb +50 -2256
  26. data/lib/google/cloud/bigquery/extract_job.rb +217 -58
  27. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  28. data/lib/google/cloud/bigquery/job/list.rb +13 -20
  29. data/lib/google/cloud/bigquery/job.rb +286 -11
  30. data/lib/google/cloud/bigquery/load_job.rb +801 -133
  31. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  32. data/lib/google/cloud/bigquery/model.rb +247 -16
  33. data/lib/google/cloud/bigquery/policy.rb +432 -0
  34. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  35. data/lib/google/cloud/bigquery/project.rb +526 -243
  36. data/lib/google/cloud/bigquery/query_job.rb +584 -125
  37. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  38. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  39. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  40. data/lib/google/cloud/bigquery/schema.rb +221 -48
  41. data/lib/google/cloud/bigquery/service.rb +186 -109
  42. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  43. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -42
  44. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  45. data/lib/google/cloud/bigquery/table.rb +1188 -326
  46. data/lib/google/cloud/bigquery/time.rb +6 -0
  47. data/lib/google/cloud/bigquery/version.rb +1 -1
  48. data/lib/google/cloud/bigquery.rb +18 -8
  49. data/lib/google-cloud-bigquery.rb +15 -13
  50. metadata +67 -40
@@ -56,7 +56,8 @@ module Google
56
56
  # @private The Service object.
57
57
  attr_accessor :service
58
58
 
59
- attr_reader :name, :numeric_id
59
+ attr_reader :name
60
+ attr_reader :numeric_id
60
61
 
61
62
  ##
62
63
  # Creates a new Service instance.
@@ -91,8 +92,7 @@ module Google
91
92
  # @return [String] The service account email address.
92
93
  #
93
94
  def service_account_email
94
- @service_account_email ||= \
95
- service.project_service_account.email
95
+ @service_account_email ||= service.project_service_account.email
96
96
  end
97
97
 
98
98
  ##
@@ -139,8 +139,8 @@ module Google
139
139
  # * `empty` - An error will be returned if the destination table
140
140
  # already contains data.
141
141
  # @param [String] job_id A user-defined ID for the copy job. The ID
142
- # must contain only letters (a-z, A-Z), numbers (0-9), underscores
143
- # (_), or dashes (-). The maximum length is 1,024 characters. If
142
+ # must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
143
+ # (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
144
144
  # `job_id` is provided, then `prefix` will not be used.
145
145
  #
146
146
  # See [Generating a job
@@ -149,18 +149,26 @@ module Google
149
149
  # prepended to a generated value to produce a unique job ID. For
150
150
  # example, the prefix `daily_import_job_` can be given to generate a
151
151
  # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
152
- # prefix must contain only letters (a-z, A-Z), numbers (0-9),
153
- # underscores (_), or dashes (-). The maximum length of the entire ID
152
+ # prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
153
+ # underscores (`_`), or dashes (`-`). The maximum length of the entire ID
154
154
  # is 1,024 characters. If `job_id` is provided, then `prefix` will not
155
155
  # be used.
156
156
  # @param [Hash] labels A hash of user-provided labels associated with
157
- # the job. You can use these to organize and group your jobs. Label
158
- # keys and values can be no longer than 63 characters, can only
159
- # contain lowercase letters, numeric characters, underscores and
160
- # dashes. International characters are allowed. Label values are
161
- # optional. Label keys must start with a letter and each label in the
162
- # list must have a different key. See [Requirements for
163
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
157
+ # the job. You can use these to organize and group your jobs.
158
+ #
159
+ # The labels applied to a resource must meet the following requirements:
160
+ #
161
+ # * Each resource can have multiple labels, up to a maximum of 64.
162
+ # * Each label must be a key-value pair.
163
+ # * Keys have a minimum length of 1 character and a maximum length of
164
+ # 63 characters, and cannot be empty. Values can be empty, and have
165
+ # a maximum length of 63 characters.
166
+ # * Keys and values can contain only lowercase letters, numeric characters,
167
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
168
+ # international characters are allowed.
169
+ # * The key portion of a label must be unique. However, you can use the
170
+ # same key with multiple resources.
171
+ # * Keys must start with a lowercase letter or international character.
164
172
  # @yield [job] a job configuration object
165
173
  # @yieldparam [Google::Cloud::Bigquery::CopyJob::Updater] job a job
166
174
  # configuration object for setting additional options.
@@ -182,11 +190,9 @@ module Google
182
190
  #
183
191
  # @!group Data
184
192
  #
185
- def copy_job source_table, destination_table, create: nil, write: nil,
186
- job_id: nil, prefix: nil, labels: nil
193
+ def copy_job source_table, destination_table, create: nil, write: nil, job_id: nil, prefix: nil, labels: nil
187
194
  ensure_service!
188
- options = { create: create, write: write, labels: labels,
189
- job_id: job_id, prefix: prefix }
195
+ options = { create: create, write: write, labels: labels, job_id: job_id, prefix: prefix }
190
196
 
191
197
  updater = CopyJob::Updater.from_options(
192
198
  service,
@@ -261,13 +267,8 @@ module Google
261
267
  #
262
268
  # @!group Data
263
269
  #
264
- def copy source_table, destination_table, create: nil, write: nil,
265
- &block
266
- job = copy_job source_table,
267
- destination_table,
268
- create: create,
269
- write: write,
270
- &block
270
+ def copy source_table, destination_table, create: nil, write: nil, &block
271
+ job = copy_job source_table, destination_table, create: create, write: write, &block
271
272
  job.wait_until_done!
272
273
  ensure_job_succeeded! job
273
274
  true
@@ -277,27 +278,6 @@ module Google
277
278
  # Queries data by creating a [query
278
279
  # job](https://cloud.google.com/bigquery/docs/query-overview#query_jobs).
279
280
  #
280
- # When using standard SQL and passing arguments using `params`, Ruby
281
- # types are mapped to BigQuery types as follows:
282
- #
283
- # | BigQuery | Ruby | Notes |
284
- # |-------------|----------------|---|
285
- # | `BOOL` | `true`/`false` | |
286
- # | `INT64` | `Integer` | |
287
- # | `FLOAT64` | `Float` | |
288
- # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
289
- # | `STRING` | `String` | |
290
- # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
291
- # | `DATE` | `Date` | |
292
- # | `TIMESTAMP` | `Time` | |
293
- # | `TIME` | `Google::Cloud::BigQuery::Time` | |
294
- # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
295
- # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
296
- # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
297
- #
298
- # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
299
- # for an overview of each BigQuery data type, including allowed values.
300
- #
301
281
  # The geographic location for the job ("US", "EU", etc.) can be set via
302
282
  # {QueryJob::Updater#location=} in a block passed to this method.
303
283
  #
@@ -305,13 +285,60 @@ module Google
305
285
  # syntax](https://cloud.google.com/bigquery/query-reference), of the
306
286
  # query to execute. Example: "SELECT count(f1) FROM
307
287
  # [myProjectId:myDatasetId.myTableId]".
308
- # @param [Array, Hash] params Standard SQL only. Used to pass query
309
- # arguments when the `query` string contains either positional (`?`)
310
- # or named (`@myparam`) query parameters. If value passed is an array
311
- # `["foo"]`, the query must use positional query parameters. If value
312
- # passed is a hash `{ myparam: "foo" }`, the query must use named
313
- # query parameters. When set, `legacy_sql` will automatically be set
314
- # to false and `standard_sql` to true.
288
+ # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
289
+ # either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
290
+ # query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
291
+ # use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
292
+ # true.
293
+ #
294
+ # BigQuery types are converted from Ruby types as follows:
295
+ #
296
+ # | BigQuery | Ruby | Notes |
297
+ # |--------------|--------------------------------------|----------------------------------------------------|
298
+ # | `BOOL` | `true`/`false` | |
299
+ # | `INT64` | `Integer` | |
300
+ # | `FLOAT64` | `Float` | |
301
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
302
+ # | `BIGNUMERIC` | `BigDecimal` | NOT AUTOMATIC: Must be mapped using `types`, below.|
303
+ # | `STRING` | `String` | |
304
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
305
+ # | `DATE` | `Date` | |
306
+ # | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
307
+ # | `TIMESTAMP` | `Time` | |
308
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
309
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
310
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
311
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
312
+ #
313
+ # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
314
+ # of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
315
+ # GIS data](https://cloud.google.com/bigquery/docs/gis-data).
316
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
317
+ # possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
318
+ # specify the SQL type for these values.
319
+ #
320
+ # Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
321
+ # positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
322
+ # should be BigQuery type codes from the following list:
323
+ #
324
+ # * `:BOOL`
325
+ # * `:INT64`
326
+ # * `:FLOAT64`
327
+ # * `:NUMERIC`
328
+ # * `:BIGNUMERIC`
329
+ # * `:STRING`
330
+ # * `:DATETIME`
331
+ # * `:DATE`
332
+ # * `:GEOGRAPHY`
333
+ # * `:TIMESTAMP`
334
+ # * `:TIME`
335
+ # * `:BYTES`
336
+ # * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
337
+ # are specified as `[:INT64]`.
338
+ # * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
339
+ # match the `params` hash, and the values are the types value that matches the data.
340
+ #
341
+ # Types are optional.
315
342
  # @param [Hash<String|Symbol, External::DataSource>] external A Hash
316
343
  # that represents the mapping of the external tables to the table
317
344
  # names used in the SQL query. The hash keys are the table names, and
@@ -375,13 +402,19 @@ module Google
375
402
  # Flattens all nested and repeated fields in the query results. The
376
403
  # default value is `true`. `large_results` parameter must be `true` if
377
404
  # this is set to `false`.
405
+ # @param [Integer] maximum_billing_tier Limits the billing tier for this
406
+ # job. Queries that have resource usage beyond this tier will fail
407
+ # (without incurring a charge). WARNING: The billed byte amount can be
408
+ # multiplied by an amount up to this number! Most users should not need
409
+ # to alter this setting, and we recommend that you avoid introducing new
410
+ # uses of it. Deprecated.
378
411
  # @param [Integer] maximum_bytes_billed Limits the bytes billed for this
379
412
  # job. Queries that will have bytes billed beyond this limit will fail
380
413
  # (without incurring a charge). Optional. If unspecified, this will be
381
414
  # set to your project default.
382
415
  # @param [String] job_id A user-defined ID for the query job. The ID
383
- # must contain only letters (a-z, A-Z), numbers (0-9), underscores
384
- # (_), or dashes (-). The maximum length is 1,024 characters. If
416
+ # must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
417
+ # (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
385
418
  # `job_id` is provided, then `prefix` will not be used.
386
419
  #
387
420
  # See [Generating a job
@@ -390,30 +423,51 @@ module Google
390
423
  # prepended to a generated value to produce a unique job ID. For
391
424
  # example, the prefix `daily_import_job_` can be given to generate a
392
425
  # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
393
- # prefix must contain only letters (a-z, A-Z), numbers (0-9),
394
- # underscores (_), or dashes (-). The maximum length of the entire ID
426
+ # prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
427
+ # underscores (`_`), or dashes (`-`). The maximum length of the entire ID
395
428
  # is 1,024 characters. If `job_id` is provided, then `prefix` will not
396
429
  # be used.
397
430
  #
398
431
  # See [Generating a job
399
432
  # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
400
433
  # @param [Hash] labels A hash of user-provided labels associated with
401
- # the job. You can use these to organize and group your jobs. Label
402
- # keys and values can be no longer than 63 characters, can only
403
- # contain lowercase letters, numeric characters, underscores and
404
- # dashes. International characters are allowed. Label values are
405
- # optional. Label keys must start with a letter and each label in the
406
- # list must have a different key. See [Requirements for
407
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
434
+ # the job. You can use these to organize and group your jobs.
435
+ #
436
+ # The labels applied to a resource must meet the following requirements:
437
+ #
438
+ # * Each resource can have multiple labels, up to a maximum of 64.
439
+ # * Each label must be a key-value pair.
440
+ # * Keys have a minimum length of 1 character and a maximum length of
441
+ # 63 characters, and cannot be empty. Values can be empty, and have
442
+ # a maximum length of 63 characters.
443
+ # * Keys and values can contain only lowercase letters, numeric characters,
444
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
445
+ # international characters are allowed.
446
+ # * The key portion of a label must be unique. However, you can use the
447
+ # same key with multiple resources.
448
+ # * Keys must start with a lowercase letter or international character.
408
449
  # @param [Array<String>, String] udfs User-defined function resources
409
- # used in the query. May be either a code resource to load from a
410
- # Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
450
+ # used in a legacy SQL query. May be either a code resource to load from
451
+ # a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
411
452
  # that contains code for a user-defined function (UDF). Providing an
412
453
  # inline code resource is equivalent to providing a URI for a file
413
- # containing the same code. See [User-Defined
414
- # Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
415
- # @param [Integer] maximum_billing_tier Deprecated: Change the billing
416
- # tier to allow high-compute queries.
454
+ # containing the same code.
455
+ #
456
+ # This parameter is used for defining User Defined Function (UDF)
457
+ # resources only when using legacy SQL. Users of standard SQL should
458
+ # leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
459
+ # Routines API to define UDF resources.
460
+ #
461
+ # For additional information on migrating, see: [Migrating to
462
+ # standard SQL - Differences in user-defined JavaScript
463
+ # functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
464
+ # @param [Boolean] create_session If true, creates a new session, where the
465
+ # session ID will be a server generated random id. If false, runs query
466
+ # with an existing session ID when one is provided in the `session_id`
467
+ # param, otherwise runs query in non-session mode. See {Job#session_id}.
468
+ # The default value is false.
469
+ # @param [String] session_id The ID of an existing session. See also the
470
+ # `create_session` param and {Job#session_id}.
417
471
  # @yield [job] a job configuration object
418
472
  # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
419
473
  # configuration object for setting query options.
@@ -425,8 +479,7 @@ module Google
425
479
  #
426
480
  # bigquery = Google::Cloud::Bigquery.new
427
481
  #
428
- # job = bigquery.query_job "SELECT name FROM " \
429
- # "`my_project.my_dataset.my_table`"
482
+ # job = bigquery.query_job "SELECT name FROM `my_project.my_dataset.my_table`"
430
483
  #
431
484
  # job.wait_until_done!
432
485
  # if !job.failed?
@@ -440,8 +493,7 @@ module Google
440
493
  #
441
494
  # bigquery = Google::Cloud::Bigquery.new
442
495
  #
443
- # job = bigquery.query_job "SELECT name FROM " \
444
- # " [my_project:my_dataset.my_table]",
496
+ # job = bigquery.query_job "SELECT name FROM [my_project:my_dataset.my_table]",
445
497
  # legacy_sql: true
446
498
  #
447
499
  # job.wait_until_done!
@@ -456,9 +508,7 @@ module Google
456
508
  #
457
509
  # bigquery = Google::Cloud::Bigquery.new
458
510
  #
459
- # job = bigquery.query_job "SELECT name FROM " \
460
- # "`my_dataset.my_table`" \
461
- # " WHERE id = ?",
511
+ # job = bigquery.query_job "SELECT name FROM `my_dataset.my_table` WHERE id = ?",
462
512
  # params: [1]
463
513
  #
464
514
  # job.wait_until_done!
@@ -473,9 +523,7 @@ module Google
473
523
  #
474
524
  # bigquery = Google::Cloud::Bigquery.new
475
525
  #
476
- # job = bigquery.query_job "SELECT name FROM " \
477
- # "`my_dataset.my_table`" \
478
- # " WHERE id = @id",
526
+ # job = bigquery.query_job "SELECT name FROM `my_dataset.my_table` WHERE id = @id",
479
527
  # params: { id: 1 }
480
528
  #
481
529
  # job.wait_until_done!
@@ -485,18 +533,32 @@ module Google
485
533
  # end
486
534
  # end
487
535
  #
536
+ # @example Query using named query parameters with types:
537
+ # require "google/cloud/bigquery"
538
+ #
539
+ # bigquery = Google::Cloud::Bigquery.new
540
+ #
541
+ # job = bigquery.query_job "SELECT name FROM `my_dataset.my_table` WHERE id IN UNNEST(@ids)",
542
+ # params: { ids: [] },
543
+ # types: { ids: [:INT64] }
544
+ #
545
+ # job.wait_until_done!
546
+ # if !job.failed?
547
+ # job.data.each do |row|
548
+ # puts row[:name]
549
+ # end
550
+ # end
551
+ #
488
552
  # @example Execute a DDL statement:
489
553
  # require "google/cloud/bigquery"
490
554
  #
491
555
  # bigquery = Google::Cloud::Bigquery.new
492
556
  #
493
- # job = bigquery.query_job "CREATE TABLE " \
494
- # "`my_dataset.my_table` " \
495
- # "(x INT64)"
557
+ # job = bigquery.query_job "CREATE TABLE`my_dataset.my_table` (x INT64)"
496
558
  #
497
559
  # job.wait_until_done!
498
560
  # if !job.failed?
499
- # table_ref = job.ddl_target_table
561
+ # table_ref = job.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
500
562
  # end
501
563
  #
502
564
  # @example Execute a DML statement:
@@ -504,10 +566,7 @@ module Google
504
566
  #
505
567
  # bigquery = Google::Cloud::Bigquery.new
506
568
  #
507
- # job = bigquery.query_job "UPDATE " \
508
- # "`my_dataset.my_table` " \
509
- # "SET x = x + 1 " \
510
- # "WHERE x IS NOT NULL"
569
+ # job = bigquery.query_job "UPDATE `my_dataset.my_table` SET x = x + 1 WHERE x IS NOT NULL"
511
570
  #
512
571
  # job.wait_until_done!
513
572
  # if !job.failed?
@@ -538,23 +597,56 @@ module Google
538
597
  # end
539
598
  # end
540
599
  #
541
- def query_job query, params: nil, external: nil,
542
- priority: "INTERACTIVE", cache: true, table: nil,
543
- create: nil, write: nil, dryrun: nil, dataset: nil,
544
- project: nil, standard_sql: nil, legacy_sql: nil,
545
- large_results: nil, flatten: nil,
546
- maximum_billing_tier: nil, maximum_bytes_billed: nil,
547
- job_id: nil, prefix: nil, labels: nil, udfs: nil
600
+ def query_job query,
601
+ params: nil,
602
+ types: nil,
603
+ external: nil,
604
+ priority: "INTERACTIVE",
605
+ cache: true,
606
+ table: nil,
607
+ create: nil,
608
+ write: nil,
609
+ dryrun: nil,
610
+ dataset: nil,
611
+ project: nil,
612
+ standard_sql: nil,
613
+ legacy_sql: nil,
614
+ large_results: nil,
615
+ flatten: nil,
616
+ maximum_billing_tier: nil,
617
+ maximum_bytes_billed: nil,
618
+ job_id: nil,
619
+ prefix: nil,
620
+ labels: nil,
621
+ udfs: nil,
622
+ create_session: nil,
623
+ session_id: nil
548
624
  ensure_service!
549
- options = { priority: priority, cache: cache, table: table,
550
- create: create, write: write, dryrun: dryrun,
551
- large_results: large_results, flatten: flatten,
552
- dataset: dataset, project: (project || self.project),
553
- legacy_sql: legacy_sql, standard_sql: standard_sql,
554
- maximum_billing_tier: maximum_billing_tier,
555
- maximum_bytes_billed: maximum_bytes_billed,
556
- external: external, job_id: job_id, prefix: prefix,
557
- labels: labels, udfs: udfs, params: params }
625
+ options = {
626
+ params: params,
627
+ types: types,
628
+ external: external,
629
+ priority: priority,
630
+ cache: cache,
631
+ table: table,
632
+ create: create,
633
+ write: write,
634
+ dryrun: dryrun,
635
+ dataset: dataset,
636
+ project: (project || self.project),
637
+ standard_sql: standard_sql,
638
+ legacy_sql: legacy_sql,
639
+ large_results: large_results,
640
+ flatten: flatten,
641
+ maximum_billing_tier: maximum_billing_tier,
642
+ maximum_bytes_billed: maximum_bytes_billed,
643
+ job_id: job_id,
644
+ prefix: prefix,
645
+ labels: labels,
646
+ udfs: udfs,
647
+ create_session: create_session,
648
+ session_id: session_id
649
+ }
558
650
 
559
651
  updater = QueryJob::Updater.from_options service, query, options
560
652
 
@@ -571,27 +663,6 @@ module Google
571
663
  # as needed to complete the query. When used for executing DDL/DML
572
664
  # statements, this method does not return row data.
573
665
  #
574
- # When using standard SQL and passing arguments using `params`, Ruby
575
- # types are mapped to BigQuery types as follows:
576
- #
577
- # | BigQuery | Ruby | Notes |
578
- # |-------------|----------------|---|
579
- # | `BOOL` | `true`/`false` | |
580
- # | `INT64` | `Integer` | |
581
- # | `FLOAT64` | `Float` | |
582
- # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
583
- # | `STRING` | `String` | |
584
- # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
585
- # | `DATE` | `Date` | |
586
- # | `TIMESTAMP` | `Time` | |
587
- # | `TIME` | `Google::Cloud::BigQuery::Time` | |
588
- # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
589
- # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
590
- # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
591
- #
592
- # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
593
- # for an overview of each BigQuery data type, including allowed values.
594
- #
595
666
  # The geographic location for the job ("US", "EU", etc.) can be set via
596
667
  # {QueryJob::Updater#location=} in a block passed to this method.
597
668
  #
@@ -601,13 +672,60 @@ module Google
601
672
  # syntax](https://cloud.google.com/bigquery/query-reference), of the
602
673
  # query to execute. Example: "SELECT count(f1) FROM
603
674
  # [myProjectId:myDatasetId.myTableId]".
604
- # @param [Array, Hash] params Standard SQL only. Used to pass query
605
- # arguments when the `query` string contains either positional (`?`)
606
- # or named (`@myparam`) query parameters. If value passed is an array
607
- # `["foo"]`, the query must use positional query parameters. If value
608
- # passed is a hash `{ myparam: "foo" }`, the query must use named
609
- # query parameters. When set, `legacy_sql` will automatically be set
610
- # to false and `standard_sql` to true.
675
+ # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
676
+ # either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
677
+ # query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
678
+ # use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
679
+ # true.
680
+ #
681
+ # BigQuery types are converted from Ruby types as follows:
682
+ #
683
+ # | BigQuery | Ruby | Notes |
684
+ # |--------------|--------------------------------------|----------------------------------------------------|
685
+ # | `BOOL` | `true`/`false` | |
686
+ # | `INT64` | `Integer` | |
687
+ # | `FLOAT64` | `Float` | |
688
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
689
+ # | `BIGNUMERIC` | `BigDecimal` | NOT AUTOMATIC: Must be mapped using `types`, below.|
690
+ # | `STRING` | `String` | |
691
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
692
+ # | `DATE` | `Date` | |
693
+ # | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
694
+ # | `TIMESTAMP` | `Time` | |
695
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
696
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
697
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
698
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
699
+ #
700
+ # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
701
+ # of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
702
+ # GIS data](https://cloud.google.com/bigquery/docs/gis-data).
703
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
704
+ # possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
705
+ # specify the SQL type for these values.
706
+ #
707
+ # Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
708
+ # positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
709
+ # should be BigQuery type codes from the following list:
710
+ #
711
+ # * `:BOOL`
712
+ # * `:INT64`
713
+ # * `:FLOAT64`
714
+ # * `:NUMERIC`
715
+ # * `:BIGNUMERIC`
716
+ # * `:STRING`
717
+ # * `:DATETIME`
718
+ # * `:DATE`
719
+ # * `:GEOGRAPHY`
720
+ # * `:TIMESTAMP`
721
+ # * `:TIME`
722
+ # * `:BYTES`
723
+ # * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
724
+ # are specified as `[:INT64]`.
725
+ # * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
726
+ # match the `params` hash, and the values are the types value that matches the data.
727
+ #
728
+ # Types are optional.
611
729
  # @param [Hash<String|Symbol, External::DataSource>] external A Hash
612
730
  # that represents the mapping of the external tables to the table
613
731
  # names used in the SQL query. The hash keys are the table names, and
@@ -649,6 +767,8 @@ module Google
649
767
  # When set to false, the values of `large_results` and `flatten` are
650
768
  # ignored; the query will be run as if `large_results` is true and
651
769
  # `flatten` is false. Optional. The default value is false.
770
+ # @param [String] session_id The ID of an existing session. See the
771
+ # `create_session` param in {#query_job} and {Job#session_id}.
652
772
  # @yield [job] a job configuration object
653
773
  # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
654
774
  # configuration object for setting additional options for the query.
@@ -663,9 +783,12 @@ module Google
663
783
  # sql = "SELECT name FROM `my_project.my_dataset.my_table`"
664
784
  # data = bigquery.query sql
665
785
  #
786
+ # # Iterate over the first page of results
666
787
  # data.each do |row|
667
788
  # puts row[:name]
668
789
  # end
790
+ # # Retrieve the next page of results
791
+ # data = data.next if data.next?
669
792
  #
670
793
  # @example Query using legacy SQL:
671
794
  # require "google/cloud/bigquery"
@@ -675,9 +798,12 @@ module Google
675
798
  # sql = "SELECT name FROM [my_project:my_dataset.my_table]"
676
799
  # data = bigquery.query sql, legacy_sql: true
677
800
  #
801
+ # # Iterate over the first page of results
678
802
  # data.each do |row|
679
803
  # puts row[:name]
680
804
  # end
805
+ # # Retrieve the next page of results
806
+ # data = data.next if data.next?
681
807
  #
682
808
  # @example Retrieve all rows: (See {Data#all})
683
809
  # require "google/cloud/bigquery"
@@ -695,28 +821,46 @@ module Google
695
821
  #
696
822
  # bigquery = Google::Cloud::Bigquery.new
697
823
  #
698
- # data = bigquery.query "SELECT name " \
699
- # "FROM `my_dataset.my_table`" \
700
- # "WHERE id = ?",
824
+ # data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE id = ?",
701
825
  # params: [1]
702
826
  #
827
+ # # Iterate over the first page of results
703
828
  # data.each do |row|
704
829
  # puts row[:name]
705
830
  # end
831
+ # # Retrieve the next page of results
832
+ # data = data.next if data.next?
706
833
  #
707
834
  # @example Query using named query parameters:
708
835
  # require "google/cloud/bigquery"
709
836
  #
710
837
  # bigquery = Google::Cloud::Bigquery.new
711
838
  #
712
- # data = bigquery.query "SELECT name " \
713
- # "FROM `my_dataset.my_table`" \
714
- # "WHERE id = @id",
839
+ # data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE id = @id",
715
840
  # params: { id: 1 }
716
841
  #
842
+ # # Iterate over the first page of results
843
+ # data.each do |row|
844
+ # puts row[:name]
845
+ # end
846
+ # # Retrieve the next page of results
847
+ # data = data.next if data.next?
848
+ #
849
+ # @example Query using named query parameters with types:
850
+ # require "google/cloud/bigquery"
851
+ #
852
+ # bigquery = Google::Cloud::Bigquery.new
853
+ #
854
+ # data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE id IN UNNEST(@ids)",
855
+ # params: { ids: [] },
856
+ # types: { ids: [:INT64] }
857
+ #
858
+ # # Iterate over the first page of results
717
859
  # data.each do |row|
718
860
  # puts row[:name]
719
861
  # end
862
+ # # Retrieve the next page of results
863
+ # data = data.next if data.next?
720
864
  #
721
865
  # @example Execute a DDL statement:
722
866
  # require "google/cloud/bigquery"
@@ -725,16 +869,14 @@ module Google
725
869
  #
726
870
  # data = bigquery.query "CREATE TABLE `my_dataset.my_table` (x INT64)"
727
871
  #
728
- # table_ref = data.ddl_target_table
872
+ # table_ref = data.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
729
873
  #
730
874
  # @example Execute a DML statement:
731
875
  # require "google/cloud/bigquery"
732
876
  #
733
877
  # bigquery = Google::Cloud::Bigquery.new
734
878
  #
735
- # data = bigquery.query "UPDATE `my_dataset.my_table` " \
736
- # "SET x = x + 1 " \
737
- # "WHERE x IS NOT NULL"
879
+ # data = bigquery.query "UPDATE `my_dataset.my_table` SET x = x + 1 WHERE x IS NOT NULL"
738
880
  #
739
881
  # puts data.num_dml_affected_rows
740
882
  #
@@ -755,17 +897,36 @@ module Google
755
897
  # query.table = dataset.table "my_table", skip_lookup: true
756
898
  # end
757
899
  #
900
+ # # Iterate over the first page of results
758
901
  # data.each do |row|
759
902
  # puts row[:name]
760
903
  # end
761
- #
762
- def query query, params: nil, external: nil, max: nil, cache: true,
763
- dataset: nil, project: nil, standard_sql: nil,
764
- legacy_sql: nil, &block
765
- job = query_job query, params: params, external: external,
766
- cache: cache, dataset: dataset,
767
- project: project, standard_sql: standard_sql,
768
- legacy_sql: legacy_sql, &block
904
+ # # Retrieve the next page of results
905
+ # data = data.next if data.next?
906
+ #
907
+ def query query,
908
+ params: nil,
909
+ types: nil,
910
+ external: nil,
911
+ max: nil,
912
+ cache: true,
913
+ dataset: nil,
914
+ project: nil,
915
+ standard_sql: nil,
916
+ legacy_sql: nil,
917
+ session_id: nil,
918
+ &block
919
+ job = query_job query,
920
+ params: params,
921
+ types: types,
922
+ external: external,
923
+ cache: cache,
924
+ dataset: dataset,
925
+ project: project,
926
+ standard_sql: standard_sql,
927
+ legacy_sql: legacy_sql,
928
+ session_id: session_id,
929
+ &block
769
930
  job.wait_until_done!
770
931
 
771
932
  if job.failed?
@@ -822,9 +983,12 @@ module Google
822
983
  # data = bigquery.query "SELECT * FROM my_ext_table",
823
984
  # external: { my_ext_table: csv_table }
824
985
  #
986
+ # # Iterate over the first page of results
825
987
  # data.each do |row|
826
988
  # puts row[:name]
827
989
  # end
990
+ # # Retrieve the next page of results
991
+ # data = data.next if data.next?
828
992
  #
829
993
  def external url, format: nil
830
994
  ext = External.from_urls url, format
@@ -861,9 +1025,7 @@ module Google
861
1025
  #
862
1026
  def dataset dataset_id, skip_lookup: nil
863
1027
  ensure_service!
864
- if skip_lookup
865
- return Dataset.new_reference project, dataset_id, service
866
- end
1028
+ return Dataset.new_reference project, dataset_id, service if skip_lookup
867
1029
  gapi = service.get_dataset dataset_id
868
1030
  Dataset.from_gapi gapi, service
869
1031
  rescue Google::Cloud::NotFoundError
@@ -874,14 +1036,13 @@ module Google
874
1036
  # Creates a new dataset.
875
1037
  #
876
1038
  # @param [String] dataset_id A unique ID for this dataset, without the
877
- # project name. The ID must contain only letters (a-z, A-Z), numbers
878
- # (0-9), or underscores (_). The maximum length is 1,024 characters.
1039
+ # project name. The ID must contain only letters (`[A-Za-z]`), numbers
1040
+ # (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
879
1041
  # @param [String] name A descriptive name for the dataset.
880
1042
  # @param [String] description A user-friendly description of the
881
1043
  # dataset.
882
1044
  # @param [Integer] expiration The default lifetime of all tables in the
883
- # dataset, in milliseconds. The minimum value is 3600000 milliseconds
884
- # (one hour).
1045
+ # dataset, in milliseconds. The minimum value is `3_600_000` (one hour).
885
1046
  # @param [String] location The geographic location where the dataset
886
1047
  # should reside. Possible values include `EU` and `US`. The default
887
1048
  # value is `US`.
@@ -990,8 +1151,7 @@ module Google
990
1151
  #
991
1152
  def datasets all: nil, filter: nil, token: nil, max: nil
992
1153
  ensure_service!
993
- options = { all: all, filter: filter, token: token, max: max }
994
- gapi = service.list_datasets options
1154
+ gapi = service.list_datasets all: all, filter: filter, token: token, max: max
995
1155
  Dataset::List.from_gapi gapi, service, all, filter, max
996
1156
  end
997
1157
 
@@ -1024,17 +1184,27 @@ module Google
1024
1184
  # Retrieves the list of jobs belonging to the project.
1025
1185
  #
1026
1186
  # @param [Boolean] all Whether to display jobs owned by all users in the
1027
- # project. The default is `false`.
1187
+ # project. The default is `false`. Optional.
1028
1188
  # @param [String] token A previously-returned page token representing
1029
- # part of the larger set of results to view.
1030
- # @param [Integer] max Maximum number of jobs to return.
1031
- # @param [String] filter A filter for job state.
1189
+ # part of the larger set of results to view. Optional.
1190
+ # @param [Integer] max Maximum number of jobs to return. Optional.
1191
+ # @param [String] filter A filter for job state. Optional.
1032
1192
  #
1033
1193
  # Acceptable values are:
1034
1194
  #
1035
1195
  # * `done` - Finished jobs
1036
1196
  # * `pending` - Pending jobs
1037
1197
  # * `running` - Running jobs
1198
+ # @param [Time] min_created_at Min value for {Job#created_at}. When
1199
+ # provided, only jobs created after or at this time are returned.
1200
+ # Optional.
1201
+ # @param [Time] max_created_at Max value for {Job#created_at}. When
1202
+ # provided, only jobs created before or at this time are returned.
1203
+ # Optional.
1204
+ # @param [Google::Cloud::Bigquery::Job, String] parent_job A job
1205
+ # object or a job ID. If set, retrieve only child jobs of the
1206
+ # specified parent. Optional. See {Job#job_id}, {Job#num_child_jobs},
1207
+ # and {Job#parent_job_id}.
1038
1208
  #
1039
1209
  # @return [Array<Google::Cloud::Bigquery::Job>] (See
1040
1210
  # {Google::Cloud::Bigquery::Job::List})
@@ -1059,6 +1229,20 @@ module Google
1059
1229
  # # process job
1060
1230
  # end
1061
1231
  #
1232
+ # @example Retrieve only jobs created within provided times:
1233
+ # require "google/cloud/bigquery"
1234
+ #
1235
+ # bigquery = Google::Cloud::Bigquery.new
1236
+ #
1237
+ # two_days_ago = Time.now - 60*60*24*2
1238
+ # three_days_ago = Time.now - 60*60*24*3
1239
+ #
1240
+ # jobs = bigquery.jobs min_created_at: three_days_ago,
1241
+ # max_created_at: two_days_ago
1242
+ # jobs.each do |job|
1243
+ # # process job
1244
+ # end
1245
+ #
1062
1246
  # @example Retrieve all jobs: (See {Job::List#all})
1063
1247
  # require "google/cloud/bigquery"
1064
1248
  #
@@ -1069,11 +1253,63 @@ module Google
1069
1253
  # # process job
1070
1254
  # end
1071
1255
  #
1072
- def jobs all: nil, token: nil, max: nil, filter: nil
1256
+ # @example Retrieve child jobs by setting `parent_job`:
1257
+ # require "google/cloud/bigquery"
1258
+ #
1259
+ # bigquery = Google::Cloud::Bigquery.new
1260
+ #
1261
+ # multi_statement_sql = <<~SQL
1262
+ # -- Declare a variable to hold names as an array.
1263
+ # DECLARE top_names ARRAY<STRING>;
1264
+ # -- Build an array of the top 100 names from the year 2017.
1265
+ # SET top_names = (
1266
+ # SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
1267
+ # FROM `bigquery-public-data.usa_names.usa_1910_current`
1268
+ # WHERE year = 2017
1269
+ # );
1270
+ # -- Which names appear as words in Shakespeare's plays?
1271
+ # SELECT
1272
+ # name AS shakespeare_name
1273
+ # FROM UNNEST(top_names) AS name
1274
+ # WHERE name IN (
1275
+ # SELECT word
1276
+ # FROM `bigquery-public-data.samples.shakespeare`
1277
+ # );
1278
+ # SQL
1279
+ #
1280
+ # job = bigquery.query_job multi_statement_sql
1281
+ #
1282
+ # job.wait_until_done!
1283
+ #
1284
+ # child_jobs = bigquery.jobs parent_job: job
1285
+ #
1286
+ # child_jobs.each do |child_job|
1287
+ # script_statistics = child_job.script_statistics
1288
+ # puts script_statistics.evaluation_kind
1289
+ # script_statistics.stack_frames.each do |stack_frame|
1290
+ # puts stack_frame.text
1291
+ # end
1292
+ # end
1293
+ #
1294
+ def jobs all: nil,
1295
+ token: nil,
1296
+ max: nil,
1297
+ filter: nil,
1298
+ min_created_at: nil,
1299
+ max_created_at: nil,
1300
+ parent_job: nil
1073
1301
  ensure_service!
1074
- options = { all: all, token: token, max: max, filter: filter }
1075
- gapi = service.list_jobs options
1076
- Job::List.from_gapi gapi, service, all, max, filter
1302
+ parent_job = parent_job.job_id if parent_job.is_a? Job
1303
+ options = {
1304
+ parent_job_id: parent_job,
1305
+ all: all,
1306
+ token: token,
1307
+ max: max, filter: filter,
1308
+ min_created_at: min_created_at,
1309
+ max_created_at: max_created_at
1310
+ }
1311
+ gapi = service.list_jobs(**options)
1312
+ Job::List.from_gapi gapi, service, **options
1077
1313
  end
1078
1314
 
1079
1315
  ##
@@ -1119,8 +1355,7 @@ module Google
1119
1355
  #
1120
1356
  def projects token: nil, max: nil
1121
1357
  ensure_service!
1122
- options = { token: token, max: max }
1123
- gapi = service.list_projects options
1358
+ gapi = service.list_projects token: token, max: max
1124
1359
  Project::List.from_gapi gapi, service, max
1125
1360
  end
1126
1361
 
@@ -1141,14 +1376,15 @@ module Google
1141
1376
  # bigquery = Google::Cloud::Bigquery.new
1142
1377
  #
1143
1378
  # fourpm = bigquery.time 16, 0, 0
1144
- # data = bigquery.query "SELECT name " \
1145
- # "FROM `my_dataset.my_table`" \
1146
- # "WHERE time_of_date = @time",
1379
+ # data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE time_of_date = @time",
1147
1380
  # params: { time: fourpm }
1148
1381
  #
1382
+ # # Iterate over the first page of results
1149
1383
  # data.each do |row|
1150
1384
  # puts row[:name]
1151
1385
  # end
1386
+ # # Retrieve the next page of results
1387
+ # data = data.next if data.next?
1152
1388
  #
1153
1389
  # @example Create Time with fractional seconds:
1154
1390
  # require "google/cloud/bigquery"
@@ -1156,14 +1392,15 @@ module Google
1156
1392
  # bigquery = Google::Cloud::Bigquery.new
1157
1393
  #
1158
1394
  # precise_time = bigquery.time 16, 35, 15.376541
1159
- # data = bigquery.query "SELECT name " \
1160
- # "FROM `my_dataset.my_table`" \
1161
- # "WHERE time_of_date >= @time",
1395
+ # data = bigquery.query "SELECT name FROM `my_dataset.my_table` WHERE time_of_date >= @time",
1162
1396
  # params: { time: precise_time }
1163
1397
  #
1398
+ # # Iterate over the first page of results
1164
1399
  # data.each do |row|
1165
1400
  # puts row[:name]
1166
1401
  # end
1402
+ # # Retrieve the next page of results
1403
+ # data = data.next if data.next?
1167
1404
  #
1168
1405
  def time hour, minute, second
1169
1406
  Bigquery::Time.new "#{hour}:#{minute}:#{second}"
@@ -1280,49 +1517,61 @@ module Google
1280
1517
  end
1281
1518
 
1282
1519
  ##
1283
- # Extracts the data from the provided table to a Google Cloud Storage
1284
- # file using an asynchronous method. In this method, an {ExtractJob} is
1285
- # immediately returned. The caller may poll the service by repeatedly
1286
- # calling {Job#reload!} and {Job#done?} to detect when the job is done,
1287
- # or simply block until the job is done by calling
1520
+ # Extracts the data from a table or exports a model to Google Cloud Storage
1521
+ # asynchronously, immediately returning an {ExtractJob} that can be used to
1522
+ # track the progress of the export job. The caller may poll the service by
1523
+ # repeatedly calling {Job#reload!} and {Job#done?} to detect when the job
1524
+ # is done, or simply block until the job is done by calling
1288
1525
  # #{Job#wait_until_done!}. See {#extract} for the synchronous version.
1289
- # Use this method instead of {Table#extract_job} to extract data from
1290
- # source tables in other projects.
1526
+ #
1527
+ # Use this method instead of {Table#extract_job} or {Model#extract_job} to
1528
+ # extract data from source tables or models in other projects.
1291
1529
  #
1292
1530
  # The geographic location for the job ("US", "EU", etc.) can be set via
1293
1531
  # {ExtractJob::Updater#location=} in a block passed to this method.
1294
1532
  #
1295
- # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
1296
- # Exporting Data From BigQuery
1533
+ # @see https://cloud.google.com/bigquery/docs/exporting-data
1534
+ # Exporting table data
1535
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
1536
+ # Exporting models
1297
1537
  #
1298
- # @param [String, Table] table The source table from which to extract
1299
- # data. This can be a table object; or a string ID as specified by the
1300
- # [Standard SQL Query
1538
+ # @param [Table, Model, String] source The source table or model for
1539
+ # the extract operation. This can be a table or model object; or a
1540
+ # table ID string as specified by the [Standard SQL Query
1301
1541
  # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
1302
1542
  # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
1303
1543
  # Reference](https://cloud.google.com/bigquery/query-reference#from)
1304
1544
  # (`project-name:dataset_id.table_id`).
1305
1545
  # @param [Google::Cloud::Storage::File, String, Array<String>]
1306
1546
  # extract_url The Google Storage file or file URI pattern(s) to which
1307
- # BigQuery should extract the table data.
1308
- # @param [String] format The exported file format. The default value is
1309
- # `csv`.
1547
+ # BigQuery should extract. For a model export this value should be a
1548
+ # string ending in an object name prefix, since multiple objects will
1549
+ # be exported.
1550
+ # @param [String] format The exported file format. The default value for
1551
+ # tables is `csv`. Tables with nested or repeated fields cannot be
1552
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
1310
1553
  #
1311
- # The following values are supported:
1554
+ # Supported values for tables:
1312
1555
  #
1313
1556
  # * `csv` - CSV
1314
1557
  # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1315
1558
  # * `avro` - [Avro](http://avro.apache.org/)
1559
+ #
1560
+ # Supported values for models:
1561
+ #
1562
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
1563
+ # * `ml_xgboost_booster` - XGBoost Booster
1316
1564
  # @param [String] compression The compression type to use for exported
1317
1565
  # files. Possible values include `GZIP` and `NONE`. The default value
1318
- # is `NONE`.
1566
+ # is `NONE`. Not applicable when extracting models.
1319
1567
  # @param [String] delimiter Delimiter to use between fields in the
1320
- # exported data. Default is <code>,</code>.
1321
- # @param [Boolean] header Whether to print out a header row in the
1322
- # results. Default is `true`.
1568
+ # exported table data. Default is `,`. Not applicable when extracting
1569
+ # models.
1570
+ # @param [Boolean] header Whether to print out a header row in table
1571
+ # exports. Default is `true`. Not applicable when extracting models.
1323
1572
  # @param [String] job_id A user-defined ID for the extract job. The ID
1324
- # must contain only letters (a-z, A-Z), numbers (0-9), underscores
1325
- # (_), or dashes (-). The maximum length is 1,024 characters. If
1573
+ # must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
1574
+ # (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
1326
1575
  # `job_id` is provided, then `prefix` will not be used.
1327
1576
  #
1328
1577
  # See [Generating a job
@@ -1331,48 +1580,65 @@ module Google
1331
1580
  # prepended to a generated value to produce a unique job ID. For
1332
1581
  # example, the prefix `daily_import_job_` can be given to generate a
1333
1582
  # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
1334
- # prefix must contain only letters (a-z, A-Z), numbers (0-9),
1335
- # underscores (_), or dashes (-). The maximum length of the entire ID
1583
+ # prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
1584
+ # underscores (`_`), or dashes (`-`). The maximum length of the entire ID
1336
1585
  # is 1,024 characters. If `job_id` is provided, then `prefix` will not
1337
1586
  # be used.
1338
1587
  # @param [Hash] labels A hash of user-provided labels associated with
1339
- # the job. You can use these to organize and group your jobs. Label
1340
- # keys and values can be no longer than 63 characters, can only
1341
- # contain lowercase letters, numeric characters, underscores and
1342
- # dashes. International characters are allowed. Label values are
1343
- # optional. Label keys must start with a letter and each label in the
1344
- # list must have a different key. See [Requirements for
1345
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
1588
+ # the job. You can use these to organize and group your jobs.
1589
+ #
1590
+ # The labels applied to a resource must meet the following requirements:
1591
+ #
1592
+ # * Each resource can have multiple labels, up to a maximum of 64.
1593
+ # * Each label must be a key-value pair.
1594
+ # * Keys have a minimum length of 1 character and a maximum length of
1595
+ # 63 characters, and cannot be empty. Values can be empty, and have
1596
+ # a maximum length of 63 characters.
1597
+ # * Keys and values can contain only lowercase letters, numeric characters,
1598
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
1599
+ # international characters are allowed.
1600
+ # * The key portion of a label must be unique. However, you can use the
1601
+ # same key with multiple resources.
1602
+ # * Keys must start with a lowercase letter or international character.
1346
1603
  # @yield [job] a job configuration object
1347
1604
  # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
1348
1605
  # configuration object for setting additional options.
1349
1606
  #
1350
1607
  # @return [Google::Cloud::Bigquery::ExtractJob]
1351
1608
  #
1352
- # @example
1609
+ # @example Export table data
1353
1610
  # require "google/cloud/bigquery"
1354
1611
  #
1355
1612
  # bigquery = Google::Cloud::Bigquery.new
1356
1613
  #
1357
1614
  # table_id = "bigquery-public-data.samples.shakespeare"
1358
- # extract_job = bigquery.extract_job table_id,
1359
- # "gs://my-bucket/shakespeare.csv"
1615
+ # extract_job = bigquery.extract_job table_id, "gs://my-bucket/shakespeare.csv"
1360
1616
  # extract_job.wait_until_done!
1361
1617
  # extract_job.done? #=> true
1362
1618
  #
1619
+ # @example Export a model
1620
+ # require "google/cloud/bigquery"
1621
+ #
1622
+ # bigquery = Google::Cloud::Bigquery.new
1623
+ # dataset = bigquery.dataset "my_dataset"
1624
+ # model = dataset.model "my_model"
1625
+ #
1626
+ # extract_job = bigquery.extract model, "gs://my-bucket/#{model.model_id}"
1627
+ #
1363
1628
  # @!group Data
1364
1629
  #
1365
- def extract_job table, extract_url, format: nil, compression: nil,
1366
- delimiter: nil, header: nil, job_id: nil, prefix: nil,
1367
- labels: nil
1630
+ def extract_job source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, job_id: nil,
1631
+ prefix: nil, labels: nil
1368
1632
  ensure_service!
1369
- options = { format: format, compression: compression,
1370
- delimiter: delimiter, header: header, job_id: job_id,
1633
+ options = { format: format, compression: compression, delimiter: delimiter, header: header, job_id: job_id,
1371
1634
  prefix: prefix, labels: labels }
1635
+ source_ref = if source.respond_to? :model_ref
1636
+ source.model_ref
1637
+ else
1638
+ Service.get_table_ref source, default_ref: project_ref
1639
+ end
1372
1640
 
1373
- table_ref = Service.get_table_ref table, default_ref: project_ref
1374
- updater = ExtractJob::Updater.from_options service, table_ref,
1375
- extract_url, options
1641
+ updater = ExtractJob::Updater.from_options service, source_ref, extract_url, options
1376
1642
 
1377
1643
  yield updater if block_given?
1378
1644
 
@@ -1382,51 +1648,63 @@ module Google
1382
1648
  end
1383
1649
 
1384
1650
  ##
1385
- # Extracts the data from the provided table to a Google Cloud Storage
1386
- # file using a synchronous method that blocks for a response. Timeouts
1651
+ # Extracts the data from a table or exports a model to Google Cloud Storage
1652
+ # using a synchronous method that blocks for a response. Timeouts
1387
1653
  # and transient errors are generally handled as needed to complete the
1388
- # job. See {#extract_job} for the asynchronous version. Use this method
1389
- # instead of {Table#extract} to extract data from source tables in other
1390
- # projects.
1654
+ # job. See {#extract_job} for the asynchronous version.
1655
+ #
1656
+ # Use this method instead of {Table#extract} or {Model#extract} to
1657
+ # extract data from source tables or models in other projects.
1391
1658
  #
1392
1659
  # The geographic location for the job ("US", "EU", etc.) can be set via
1393
1660
  # {ExtractJob::Updater#location=} in a block passed to this method.
1394
1661
  #
1395
- # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
1396
- # Exporting Data From BigQuery
1662
+ # @see https://cloud.google.com/bigquery/docs/exporting-data
1663
+ # Exporting table data
1664
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
1665
+ # Exporting models
1397
1666
  #
1398
- # @param [String, Table] table The source table from which to extract
1399
- # data. This can be a table object; or a string ID as specified by the
1400
- # [Standard SQL Query
1667
+ # @param [Table, Model, String] source The source table or model for
1668
+ # the extract operation. This can be a table or model object; or a
1669
+ # table ID string as specified by the [Standard SQL Query
1401
1670
  # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
1402
1671
  # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
1403
1672
  # Reference](https://cloud.google.com/bigquery/query-reference#from)
1404
1673
  # (`project-name:dataset_id.table_id`).
1405
1674
  # @param [Google::Cloud::Storage::File, String, Array<String>]
1406
1675
  # extract_url The Google Storage file or file URI pattern(s) to which
1407
- # BigQuery should extract the table data.
1408
- # @param [String] format The exported file format. The default value is
1409
- # `csv`.
1676
+ # BigQuery should extract. For a model export this value should be a
1677
+ # string ending in an object name prefix, since multiple objects will
1678
+ # be exported.
1679
+ # @param [String] format The exported file format. The default value for
1680
+ # tables is `csv`. Tables with nested or repeated fields cannot be
1681
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
1410
1682
  #
1411
- # The following values are supported:
1683
+ # Supported values for tables:
1412
1684
  #
1413
1685
  # * `csv` - CSV
1414
1686
  # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1415
1687
  # * `avro` - [Avro](http://avro.apache.org/)
1688
+ #
1689
+ # Supported values for models:
1690
+ #
1691
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
1692
+ # * `ml_xgboost_booster` - XGBoost Booster
1416
1693
  # @param [String] compression The compression type to use for exported
1417
1694
  # files. Possible values include `GZIP` and `NONE`. The default value
1418
- # is `NONE`.
1695
+ # is `NONE`. Not applicable when extracting models.
1419
1696
  # @param [String] delimiter Delimiter to use between fields in the
1420
- # exported data. Default is <code>,</code>.
1421
- # @param [Boolean] header Whether to print out a header row in the
1422
- # results. Default is `true`.
1697
+ # exported table data. Default is `,`. Not applicable when extracting
1698
+ # models.
1699
+ # @param [Boolean] header Whether to print out a header row in table
1700
+ # exports. Default is `true`. Not applicable when extracting models.
1423
1701
  # @yield [job] a job configuration object
1424
1702
  # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
1425
1703
  # configuration object for setting additional options.
1426
1704
  #
1427
1705
  # @return [Boolean] Returns `true` if the extract operation succeeded.
1428
1706
  #
1429
- # @example
1707
+ # @example Export table data
1430
1708
  # require "google/cloud/bigquery"
1431
1709
  #
1432
1710
  # bigquery = Google::Cloud::Bigquery.new
@@ -1434,12 +1712,19 @@ module Google
1434
1712
  # bigquery.extract "bigquery-public-data.samples.shakespeare",
1435
1713
  # "gs://my-bucket/shakespeare.csv"
1436
1714
  #
1715
+ # @example Export a model
1716
+ # require "google/cloud/bigquery"
1717
+ #
1718
+ # bigquery = Google::Cloud::Bigquery.new
1719
+ # dataset = bigquery.dataset "my_dataset"
1720
+ # model = dataset.model "my_model"
1721
+ #
1722
+ # bigquery.extract model, "gs://my-bucket/#{model.model_id}"
1723
+ #
1437
1724
  # @!group Data
1438
1725
  #
1439
- def extract table, extract_url, format: nil, compression: nil,
1440
- delimiter: nil, header: nil, &block
1441
- job = extract_job table,
1442
- extract_url,
1726
+ def extract source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, &block
1727
+ job = extract_job source, extract_url,
1443
1728
  format: format,
1444
1729
  compression: compression,
1445
1730
  delimiter: delimiter,
@@ -1463,9 +1748,7 @@ module Google
1463
1748
 
1464
1749
  # TODO: remove `Integer` and set normally after migrating to Gax or
1465
1750
  # to google-api-client 0.10 (See google/google-api-ruby-client#439)
1466
- if gapi.numeric_id
1467
- p.instance_variable_set :@numeric_id, Integer(gapi.numeric_id)
1468
- end
1751
+ p.instance_variable_set :@numeric_id, Integer(gapi.numeric_id) if gapi.numeric_id
1469
1752
  end
1470
1753
  end
1471
1754