google-cloud-bigquery 1.21.1 → 1.27.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -20,15 +20,17 @@ module Google
20
20
  # # ExtractJob
21
21
  #
22
22
  # A {Job} subclass representing an export operation that may be performed
23
- # on a {Table}. A ExtractJob instance is created when you call
24
- # {Table#extract_job}.
23
+ # on a {Table} or {Model}. A ExtractJob instance is returned when you call
24
+ # {Project#extract_job}, {Table#extract_job} or {Model#extract_job}.
25
25
  #
26
26
  # @see https://cloud.google.com/bigquery/docs/exporting-data
27
- # Exporting Data From BigQuery
27
+ # Exporting table data
28
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
29
+ # Exporting models
28
30
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
29
31
  # reference
30
32
  #
31
- # @example
33
+ # @example Export table data
32
34
  # require "google/cloud/bigquery"
33
35
  #
34
36
  # bigquery = Google::Cloud::Bigquery.new
@@ -40,6 +42,18 @@ module Google
40
42
  # extract_job.wait_until_done!
41
43
  # extract_job.done? #=> true
42
44
  #
45
+ # @example Export a model
46
+ # require "google/cloud/bigquery"
47
+ #
48
+ # bigquery = Google::Cloud::Bigquery.new
49
+ # dataset = bigquery.dataset "my_dataset"
50
+ # model = dataset.model "my_model"
51
+ #
52
+ # extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
53
+ #
54
+ # extract_job.wait_until_done!
55
+ # extract_job.done? #=> true
56
+ #
43
57
  class ExtractJob < Job
44
58
  ##
45
59
  # The URI or URIs representing the Google Cloud Storage files to which
@@ -49,71 +63,126 @@ module Google
49
63
  end
50
64
 
51
65
  ##
52
- # The table from which the data is exported. This is the table upon
53
- # which {Table#extract_job} was called.
66
+ # The table or model which is exported.
54
67
  #
55
- # @return [Table] A table instance.
68
+ # @return [Table, Model, nil] A table or model instance, or `nil`.
56
69
  #
57
70
  def source
58
- table = @gapi.configuration.extract.source_table
59
- return nil unless table
60
- retrieve_table table.project_id, table.dataset_id, table.table_id
71
+ if (table = @gapi.configuration.extract.source_table)
72
+ retrieve_table table.project_id, table.dataset_id, table.table_id
73
+ elsif (model = @gapi.configuration.extract.source_model)
74
+ retrieve_model model.project_id, model.dataset_id, model.model_id
75
+ end
61
76
  end
62
77
 
63
78
  ##
64
- # Checks if the export operation compresses the data using gzip. The
65
- # default is `false`.
79
+ # Whether the source of the export job is a table. See {#source}.
66
80
  #
67
- # @return [Boolean] `true` when `GZIP`, `false` otherwise.
81
+ # @return [Boolean] `true` when the source is a table, `false`
82
+ # otherwise.
68
83
  #
69
- def compression?
70
- val = @gapi.configuration.extract.compression
71
- val == "GZIP"
84
+ def table?
85
+ !@gapi.configuration.extract.source_table.nil?
72
86
  end
73
87
 
74
88
  ##
75
- # Checks if the destination format for the data is [newline-delimited
76
- # JSON](http://jsonlines.org/). The default is `false`.
89
+ # Whether the source of the export job is a model. See {#source}.
77
90
  #
78
- # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
91
+ # @return [Boolean] `true` when the source is a model, `false`
79
92
  # otherwise.
80
93
  #
94
+ def model?
95
+ !@gapi.configuration.extract.source_model.nil?
96
+ end
97
+
98
+ ##
99
+ # Checks if the export operation compresses the data using gzip. The
100
+ # default is `false`. Not applicable when extracting models.
101
+ #
102
+ # @return [Boolean] `true` when `GZIP`, `false` if not `GZIP` or not a
103
+ # table extraction.
104
+ def compression?
105
+ return false unless table?
106
+ @gapi.configuration.extract.compression == "GZIP"
107
+ end
108
+
109
+ ##
110
+ # Checks if the destination format for the table data is [newline-delimited
111
+ # JSON](http://jsonlines.org/). The default is `false`. Not applicable when
112
+ # extracting models.
113
+ #
114
+ # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
115
+ # `NEWLINE_DELIMITED_JSON` or not a table extraction.
116
+ #
81
117
  def json?
82
- val = @gapi.configuration.extract.destination_format
83
- val == "NEWLINE_DELIMITED_JSON"
118
+ return false unless table?
119
+ @gapi.configuration.extract.destination_format == "NEWLINE_DELIMITED_JSON"
84
120
  end
85
121
 
86
122
  ##
87
- # Checks if the destination format for the data is CSV. Tables with
123
+ # Checks if the destination format for the table data is CSV. Tables with
88
124
  # nested or repeated fields cannot be exported as CSV. The default is
89
- # `true`.
125
+ # `true` for tables. Not applicable when extracting models.
90
126
  #
91
- # @return [Boolean] `true` when `CSV`, `false` otherwise.
127
+ # @return [Boolean] `true` when `CSV`, or `false` if not `CSV` or not a
128
+ # table extraction.
92
129
  #
93
130
  def csv?
131
+ return false unless table?
94
132
  val = @gapi.configuration.extract.destination_format
95
133
  return true if val.nil?
96
134
  val == "CSV"
97
135
  end
98
136
 
99
137
  ##
100
- # Checks if the destination format for the data is
101
- # [Avro](http://avro.apache.org/). The default is `false`.
138
+ # Checks if the destination format for the table data is
139
+ # [Avro](http://avro.apache.org/). The default is `false`. Not applicable
140
+ # when extracting models.
102
141
  #
103
- # @return [Boolean] `true` when `AVRO`, `false` otherwise.
142
+ # @return [Boolean] `true` when `AVRO`, `false` if not `AVRO` or not a
143
+ # table extraction.
104
144
  #
105
145
  def avro?
146
+ return false unless table?
147
+ @gapi.configuration.extract.destination_format == "AVRO"
148
+ end
149
+
150
+ ##
151
+ # Checks if the destination format for the model is TensorFlow SavedModel.
152
+ # The default is `true` for models. Not applicable when extracting tables.
153
+ #
154
+ # @return [Boolean] `true` when `ML_TF_SAVED_MODEL`, `false` if not
155
+ # `ML_TF_SAVED_MODEL` or not a model extraction.
156
+ #
157
+ def ml_tf_saved_model?
158
+ return false unless model?
106
159
  val = @gapi.configuration.extract.destination_format
107
- val == "AVRO"
160
+ return true if val.nil?
161
+ val == "ML_TF_SAVED_MODEL"
162
+ end
163
+
164
+ ##
165
+ # Checks if the destination format for the model is XGBoost. The default
166
+ # is `false`. Not applicable when extracting tables.
167
+ #
168
+ # @return [Boolean] `true` when `ML_XGBOOST_BOOSTER`, `false` if not
169
+ # `ML_XGBOOST_BOOSTER` or not a model extraction.
170
+ #
171
+ def ml_xgboost_booster?
172
+ return false unless model?
173
+ @gapi.configuration.extract.destination_format == "ML_XGBOOST_BOOSTER"
108
174
  end
109
175
 
110
176
  ##
111
177
  # The character or symbol the operation uses to delimit fields in the
112
- # exported data. The default is a comma (,).
178
+ # exported data. The default is a comma (,) for tables. Not applicable
179
+ # when extracting models.
113
180
  #
114
- # @return [String] A string containing the character, such as `","`.
181
+ # @return [String, nil] A string containing the character, such as `","`,
182
+ # `nil` if not a table extraction.
115
183
  #
116
184
  def delimiter
185
+ return unless table?
117
186
  val = @gapi.configuration.extract.field_delimiter
118
187
  val = "," if val.nil?
119
188
  val
@@ -121,12 +190,13 @@ module Google
121
190
 
122
191
  ##
123
192
  # Checks if the exported data contains a header row. The default is
124
- # `true`.
193
+ # `true` for tables. Not applicable when extracting models.
125
194
  #
126
195
  # @return [Boolean] `true` when the print header configuration is
127
- # present or `nil`, `false` otherwise.
196
+ # present or `nil`, `false` if disabled or not a table extraction.
128
197
  #
129
198
  def print_header?
199
+ return false unless table?
130
200
  val = @gapi.configuration.extract.print_header
131
201
  val = true if val.nil?
132
202
  val
@@ -159,12 +229,14 @@ module Google
159
229
  # whether to enable extracting applicable column types (such as
160
230
  # `TIMESTAMP`) to their corresponding AVRO logical types
161
231
  # (`timestamp-micros`), instead of only using their raw types
162
- # (`avro-long`).
232
+ # (`avro-long`). Not applicable when extracting models.
163
233
  #
164
234
  # @return [Boolean] `true` when applicable column types will use their
165
- # corresponding AVRO logical types, `false` otherwise.
235
+ # corresponding AVRO logical types, `false` if not enabled or not a
236
+ # table extraction.
166
237
  #
167
238
  def use_avro_logical_types?
239
+ return false unless table?
168
240
  @gapi.configuration.extract.use_avro_logical_types
169
241
  end
170
242
 
@@ -182,19 +254,24 @@ module Google
182
254
  #
183
255
  # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
184
256
  # configuration object for setting query options.
185
- def self.from_options service, table, storage_files, options
257
+ def self.from_options service, source, storage_files, options
186
258
  job_ref = service.job_ref_from options[:job_id], options[:prefix]
187
259
  storage_urls = Array(storage_files).map do |url|
188
260
  url.respond_to?(:to_gs_url) ? url.to_gs_url : url
189
261
  end
190
262
  options[:format] ||= Convert.derive_source_format storage_urls.first
263
+ extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
264
+ destination_uris: Array(storage_urls)
265
+ )
266
+ if source.is_a? Google::Apis::BigqueryV2::TableReference
267
+ extract_config.source_table = source
268
+ elsif source.is_a? Google::Apis::BigqueryV2::ModelReference
269
+ extract_config.source_model = source
270
+ end
191
271
  job = Google::Apis::BigqueryV2::Job.new(
192
272
  job_reference: job_ref,
193
273
  configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
194
- extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
195
- destination_uris: Array(storage_urls),
196
- source_table: table
197
- ),
274
+ extract: extract_config,
198
275
  dry_run: options[:dryrun]
199
276
  )
200
277
  )
@@ -253,7 +330,7 @@ module Google
253
330
  end
254
331
 
255
332
  ##
256
- # Sets the compression type.
333
+ # Sets the compression type. Not applicable when extracting models.
257
334
  #
258
335
  # @param [String] value The compression type to use for exported
259
336
  # files. Possible values include `GZIP` and `NONE`. The default
@@ -265,7 +342,7 @@ module Google
265
342
  end
266
343
 
267
344
  ##
268
- # Sets the field delimiter.
345
+ # Sets the field delimiter. Not applicable when extracting models.
269
346
  #
270
347
  # @param [String] value Delimiter to use between fields in the
271
348
  # exported data. Default is <code>,</code>.
@@ -276,14 +353,21 @@ module Google
276
353
  end
277
354
 
278
355
  ##
279
- # Sets the destination file format. The default value is `csv`.
356
+ # Sets the destination file format. The default value for
357
+ # tables is `csv`. Tables with nested or repeated fields cannot be
358
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
280
359
  #
281
- # The following values are supported:
360
+ # Supported values for tables:
282
361
  #
283
362
  # * `csv` - CSV
284
363
  # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
285
364
  # * `avro` - [Avro](http://avro.apache.org/)
286
365
  #
366
+ # Supported values for models:
367
+ #
368
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
369
+ # * `ml_xgboost_booster` - XGBoost Booster
370
+ #
287
371
  # @param [String] new_format The new source format.
288
372
  #
289
373
  # @!group Attributes
@@ -293,7 +377,8 @@ module Google
293
377
  end
294
378
 
295
379
  ##
296
- # Print a header row in the exported file.
380
+ # Print a header row in the exported file. Not applicable when
381
+ # extracting models.
297
382
  #
298
383
  # @param [Boolean] value Whether to print out a header row in the
299
384
  # results. Default is `true`.
@@ -307,12 +392,21 @@ module Google
307
392
  # Sets the labels to use for the job.
308
393
  #
309
394
  # @param [Hash] value A hash of user-provided labels associated with
310
- # the job. You can use these to organize and group your jobs. Label
311
- # keys and values can be no longer than 63 characters, can only
312
- # contain lowercase letters, numeric characters, underscores and
313
- # dashes. International characters are allowed. Label values are
314
- # optional. Label keys must start with a letter and each label in
315
- # the list must have a different key.
395
+ # the job. You can use these to organize and group your jobs.
396
+ #
397
+ # The labels applied to a resource must meet the following requirements:
398
+ #
399
+ # * Each resource can have multiple labels, up to a maximum of 64.
400
+ # * Each label must be a key-value pair.
401
+ # * Keys have a minimum length of 1 character and a maximum length of
402
+ # 63 characters, and cannot be empty. Values can be empty, and have
403
+ # a maximum length of 63 characters.
404
+ # * Keys and values can contain only lowercase letters, numeric characters,
405
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
406
+ # international characters are allowed.
407
+ # * The key portion of a label must be unique. However, you can use the
408
+ # same key with multiple resources.
409
+ # * Keys must start with a lowercase letter or international character.
316
410
  #
317
411
  # @!group Attributes
318
412
  #
@@ -362,6 +456,16 @@ module Google
362
456
  @gapi
363
457
  end
364
458
  end
459
+
460
+ protected
461
+
462
+ def retrieve_model project_id, dataset_id, model_id
463
+ ensure_service!
464
+ gapi = service.get_project_model project_id, dataset_id, model_id
465
+ Model.from_gapi_json gapi, service
466
+ rescue Google::Cloud::NotFoundError
467
+ nil
468
+ end
365
469
  end
366
470
  end
367
471
  end
@@ -215,6 +215,17 @@ module Google
215
215
  @gapi.statistics.parent_job_id
216
216
  end
217
217
 
218
+ ##
219
+ # An array containing the job resource usage breakdown by reservation, if present. Reservation usage statistics
220
+ # are only reported for jobs that are executed within reservations. On-demand jobs do not report this data.
221
+ #
222
+ # @return [Array<Google::Cloud::Bigquery::Job::ReservationUsage>, nil] The reservation usage, if present.
223
+ #
224
+ def reservation_usage
225
+ return nil unless @gapi.statistics.reservation_usage
226
+ Array(@gapi.statistics.reservation_usage).map { |g| ReservationUsage.from_gapi g }
227
+ end
228
+
218
229
  ##
219
230
  # The statistics including stack frames for a child job of a script.
220
231
  #
@@ -489,6 +500,29 @@ module Google
489
500
  end
490
501
  end
491
502
 
503
+ ##
504
+ # Represents Job resource usage breakdown by reservation.
505
+ #
506
+ # @attr_reader [String] name The reservation name or "unreserved" for on-demand resources usage.
507
+ # @attr_reader [Fixnum] slot_ms The slot-milliseconds the job spent in the given reservation.
508
+ #
509
+ class ReservationUsage
510
+ attr_reader :name, :slot_ms
511
+
512
+ ##
513
+ # @private Creates a new ReservationUsage instance.
514
+ def initialize name, slot_ms
515
+ @name = name
516
+ @slot_ms = slot_ms
517
+ end
518
+
519
+ ##
520
+ # @private New ReservationUsage from a statistics.reservation_usage value.
521
+ def self.from_gapi gapi
522
+ new gapi.name, gapi.slot_ms
523
+ end
524
+ end
525
+
492
526
  ##
493
527
  # Represents statistics for a child job of a script.
494
528
  #
@@ -547,7 +581,7 @@ module Google
547
581
  end
548
582
 
549
583
  ##
550
- # @private New ScriptStatistics from a statistics.script_statistics object.
584
+ # @private New ScriptStatistics from a statistics.script_statistics value.
551
585
  def self.from_gapi gapi
552
586
  frames = Array(gapi.stack_frames).map { |g| ScriptStackFrame.from_gapi g }
553
587
  new gapi.evaluation_kind, frames
@@ -37,8 +37,8 @@ module Google
37
37
  # bigquery = Google::Cloud::Bigquery.new
38
38
  # dataset = bigquery.dataset "my_dataset"
39
39
  #
40
- # gs_url = "gs://my-bucket/file-name.csv"
41
- # load_job = dataset.load_job "my_new_table", gs_url do |schema|
40
+ # gcs_uri = "gs://my-bucket/file-name.csv"
41
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |schema|
42
42
  # schema.string "first_name", mode: :required
43
43
  # schema.record "cities_lived", mode: :repeated do |nested_schema|
44
44
  # nested_schema.string "place", mode: :required
@@ -112,8 +112,7 @@ module Google
112
112
  # `false` otherwise.
113
113
  #
114
114
  def iso8859_1?
115
- val = @gapi.configuration.load.encoding
116
- val == "ISO-8859-1"
115
+ @gapi.configuration.load.encoding == "ISO-8859-1"
117
116
  end
118
117
 
119
118
  ##
@@ -195,8 +194,7 @@ module Google
195
194
  # `NEWLINE_DELIMITED_JSON`, `false` otherwise.
196
195
  #
197
196
  def json?
198
- val = @gapi.configuration.load.source_format
199
- val == "NEWLINE_DELIMITED_JSON"
197
+ @gapi.configuration.load.source_format == "NEWLINE_DELIMITED_JSON"
200
198
  end
201
199
 
202
200
  ##
@@ -218,8 +216,27 @@ module Google
218
216
  # `false` otherwise.
219
217
  #
220
218
  def backup?
221
- val = @gapi.configuration.load.source_format
222
- val == "DATASTORE_BACKUP"
219
+ @gapi.configuration.load.source_format == "DATASTORE_BACKUP"
220
+ end
221
+
222
+ ##
223
+ # Checks if the source format is ORC.
224
+ #
225
+ # @return [Boolean] `true` when the source format is `ORC`,
226
+ # `false` otherwise.
227
+ #
228
+ def orc?
229
+ @gapi.configuration.load.source_format == "ORC"
230
+ end
231
+
232
+ ##
233
+ # Checks if the source format is Parquet.
234
+ #
235
+ # @return [Boolean] `true` when the source format is `PARQUET`,
236
+ # `false` otherwise.
237
+ #
238
+ def parquet?
239
+ @gapi.configuration.load.source_format == "PARQUET"
223
240
  end
224
241
 
225
242
  ##
@@ -347,6 +364,58 @@ module Google
347
364
  nil
348
365
  end
349
366
 
367
+ ###
368
+ # Checks if hive partitioning options are set.
369
+ #
370
+ # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
371
+ #
372
+ # @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
373
+ #
374
+ # @!group Attributes
375
+ #
376
+ def hive_partitioning?
377
+ !@gapi.configuration.load.hive_partitioning_options.nil?
378
+ end
379
+
380
+ ###
381
+ # The mode of hive partitioning to use when reading data. The following modes are supported:
382
+ #
383
+ # 1. `AUTO`: automatically infer partition key name(s) and type(s).
384
+ # 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
385
+ # 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
386
+ #
387
+ # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
388
+ #
389
+ # @return [String, nil] The mode of hive partitioning, or `nil` if not set.
390
+ #
391
+ # @!group Attributes
392
+ #
393
+ def hive_partitioning_mode
394
+ @gapi.configuration.load.hive_partitioning_options.mode if hive_partitioning?
395
+ end
396
+
397
+ ###
398
+ # The common prefix for all source uris when hive partition detection is requested. The prefix must end
399
+ # immediately before the partition key encoding begins. For example, consider files following this data layout:
400
+ #
401
+ # ```
402
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
403
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
404
+ # ```
405
+ #
406
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
407
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
408
+ #
409
+ # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
410
+ #
411
+ # @return [String, nil] The common prefix for all source uris, or `nil` if not set.
412
+ #
413
+ # @!group Attributes
414
+ #
415
+ def hive_partitioning_source_uri_prefix
416
+ @gapi.configuration.load.hive_partitioning_options.source_uri_prefix if hive_partitioning?
417
+ end
418
+
350
419
  ###
351
420
  # Checks if the destination table will be range partitioned. See [Creating and using integer range partitioned
352
421
  # tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
@@ -428,8 +497,9 @@ module Google
428
497
  # The period for which the destination table will be time partitioned, if
429
498
  # any. See [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
430
499
  #
431
- # @return [String, nil] The time partition type. Currently the only supported
432
- # value is "DAY", or `nil` if not present.
500
+ # @return [String, nil] The time partition type. The supported types are `DAY`,
501
+ # `HOUR`, `MONTH`, and `YEAR`, which will generate one partition per day,
502
+ # hour, month, and year, respectively; or `nil` if not present.
433
503
  #
434
504
  # @!group Attributes
435
505
  #
@@ -1303,12 +1373,21 @@ module Google
1303
1373
  # Sets the labels to use for the load job.
1304
1374
  #
1305
1375
  # @param [Hash] val A hash of user-provided labels associated with
1306
- # the job. You can use these to organize and group your jobs. Label
1307
- # keys and values can be no longer than 63 characters, can only
1308
- # contain lowercase letters, numeric characters, underscores and
1309
- # dashes. International characters are allowed. Label values are
1310
- # optional. Label keys must start with a letter and each label in
1311
- # the list must have a different key.
1376
+ # the job. You can use these to organize and group your jobs.
1377
+ #
1378
+ # The labels applied to a resource must meet the following requirements:
1379
+ #
1380
+ # * Each resource can have multiple labels, up to a maximum of 64.
1381
+ # * Each label must be a key-value pair.
1382
+ # * Keys have a minimum length of 1 character and a maximum length of
1383
+ # 63 characters, and cannot be empty. Values can be empty, and have
1384
+ # a maximum length of 63 characters.
1385
+ # * Keys and values can contain only lowercase letters, numeric characters,
1386
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
1387
+ # international characters are allowed.
1388
+ # * The key portion of a label must be unique. However, you can use the
1389
+ # same key with multiple resources.
1390
+ # * Keys must start with a lowercase letter or international character.
1312
1391
  #
1313
1392
  # @!group Attributes
1314
1393
  #
@@ -1316,6 +1395,89 @@ module Google
1316
1395
  @gapi.configuration.update! labels: val
1317
1396
  end
1318
1397
 
1398
+ ##
1399
+ # Sets the mode of hive partitioning to use when reading data. The following modes are supported:
1400
+ #
1401
+ # 1. `auto`: automatically infer partition key name(s) and type(s).
1402
+ # 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
1403
+ # 3. `custom`: partition key schema is encoded in the source URI prefix.
1404
+ #
1405
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
1406
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
1407
+ #
1408
+ # See {#format=} and {#hive_partitioning_source_uri_prefix=}.
1409
+ #
1410
+ # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
1411
+ #
1412
+ # @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
1413
+ #
1414
+ # @example
1415
+ # require "google/cloud/bigquery"
1416
+ #
1417
+ # bigquery = Google::Cloud::Bigquery.new
1418
+ # dataset = bigquery.dataset "my_dataset"
1419
+ #
1420
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
1421
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
1422
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1423
+ # job.format = :parquet
1424
+ # job.hive_partitioning_mode = :auto
1425
+ # job.hive_partitioning_source_uri_prefix = source_uri_prefix
1426
+ # end
1427
+ #
1428
+ # load_job.wait_until_done!
1429
+ # load_job.done? #=> true
1430
+ #
1431
+ # @!group Attributes
1432
+ #
1433
+ def hive_partitioning_mode= mode
1434
+ @gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
1435
+ @gapi.configuration.load.hive_partitioning_options.mode = mode.to_s.upcase
1436
+ end
1437
+
1438
+ ##
1439
+ # Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
1440
+ # immediately before the partition key encoding begins. For example, consider files following this data
1441
+ # layout:
1442
+ #
1443
+ # ```
1444
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
1445
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
1446
+ # ```
1447
+ #
1448
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
1449
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
1450
+ #
1451
+ # See {#hive_partitioning_mode=}.
1452
+ #
1453
+ # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
1454
+ #
1455
+ # @param [String] source_uri_prefix The common prefix for all source uris.
1456
+ #
1457
+ # @example
1458
+ # require "google/cloud/bigquery"
1459
+ #
1460
+ # bigquery = Google::Cloud::Bigquery.new
1461
+ # dataset = bigquery.dataset "my_dataset"
1462
+ #
1463
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
1464
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
1465
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1466
+ # job.format = :parquet
1467
+ # job.hive_partitioning_mode = :auto
1468
+ # job.hive_partitioning_source_uri_prefix = source_uri_prefix
1469
+ # end
1470
+ #
1471
+ # load_job.wait_until_done!
1472
+ # load_job.done? #=> true
1473
+ #
1474
+ # @!group Attributes
1475
+ #
1476
+ def hive_partitioning_source_uri_prefix= source_uri_prefix
1477
+ @gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
1478
+ @gapi.configuration.load.hive_partitioning_options.source_uri_prefix = source_uri_prefix
1479
+ end
1480
+
1319
1481
  ##
1320
1482
  # Sets the field on which to range partition the table. See [Creating and using integer range partitioned
1321
1483
  # tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
@@ -1335,8 +1497,8 @@ module Google
1335
1497
  # bigquery = Google::Cloud::Bigquery.new
1336
1498
  # dataset = bigquery.dataset "my_dataset"
1337
1499
  #
1338
- # gs_url = "gs://my-bucket/file-name.csv"
1339
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1500
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1501
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1340
1502
  # job.schema do |schema|
1341
1503
  # schema.integer "my_table_id", mode: :required
1342
1504
  # schema.string "my_table_data", mode: :required
@@ -1376,8 +1538,8 @@ module Google
1376
1538
  # bigquery = Google::Cloud::Bigquery.new
1377
1539
  # dataset = bigquery.dataset "my_dataset"
1378
1540
  #
1379
- # gs_url = "gs://my-bucket/file-name.csv"
1380
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1541
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1542
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1381
1543
  # job.schema do |schema|
1382
1544
  # schema.integer "my_table_id", mode: :required
1383
1545
  # schema.string "my_table_data", mode: :required
@@ -1417,8 +1579,8 @@ module Google
1417
1579
  # bigquery = Google::Cloud::Bigquery.new
1418
1580
  # dataset = bigquery.dataset "my_dataset"
1419
1581
  #
1420
- # gs_url = "gs://my-bucket/file-name.csv"
1421
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1582
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1583
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1422
1584
  # job.schema do |schema|
1423
1585
  # schema.integer "my_table_id", mode: :required
1424
1586
  # schema.string "my_table_data", mode: :required
@@ -1458,8 +1620,8 @@ module Google
1458
1620
  # bigquery = Google::Cloud::Bigquery.new
1459
1621
  # dataset = bigquery.dataset "my_dataset"
1460
1622
  #
1461
- # gs_url = "gs://my-bucket/file-name.csv"
1462
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1623
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1624
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1463
1625
  # job.schema do |schema|
1464
1626
  # schema.integer "my_table_id", mode: :required
1465
1627
  # schema.string "my_table_data", mode: :required
@@ -1490,8 +1652,9 @@ module Google
1490
1652
  # BigQuery does not allow you to change partitioning on an existing
1491
1653
  # table.
1492
1654
  #
1493
- # @param [String] type The time partition type. Currently the only
1494
- # supported value is "DAY".
1655
+ # @param [String] type The time partition type. The supported types are `DAY`,
1656
+ # `HOUR`, `MONTH`, and `YEAR`, which will generate one partition per day,
1657
+ # hour, month, and year, respectively.
1495
1658
  #
1496
1659
  # @example
1497
1660
  # require "google/cloud/bigquery"
@@ -1499,8 +1662,8 @@ module Google
1499
1662
  # bigquery = Google::Cloud::Bigquery.new
1500
1663
  # dataset = bigquery.dataset "my_dataset"
1501
1664
  #
1502
- # gs_url = "gs://my-bucket/file-name.csv"
1503
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1665
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1666
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1504
1667
  # job.time_partitioning_type = "DAY"
1505
1668
  # end
1506
1669
  #
@@ -1538,8 +1701,8 @@ module Google
1538
1701
  # bigquery = Google::Cloud::Bigquery.new
1539
1702
  # dataset = bigquery.dataset "my_dataset"
1540
1703
  #
1541
- # gs_url = "gs://my-bucket/file-name.csv"
1542
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1704
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1705
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1543
1706
  # job.time_partitioning_type = "DAY"
1544
1707
  # job.time_partitioning_field = "dob"
1545
1708
  # job.schema do |schema|
@@ -1574,8 +1737,8 @@ module Google
1574
1737
  # bigquery = Google::Cloud::Bigquery.new
1575
1738
  # dataset = bigquery.dataset "my_dataset"
1576
1739
  #
1577
- # gs_url = "gs://my-bucket/file-name.csv"
1578
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1740
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1741
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1579
1742
  # job.time_partitioning_type = "DAY"
1580
1743
  # job.time_partitioning_expiration = 86_400
1581
1744
  # end
@@ -1634,8 +1797,8 @@ module Google
1634
1797
  # bigquery = Google::Cloud::Bigquery.new
1635
1798
  # dataset = bigquery.dataset "my_dataset"
1636
1799
  #
1637
- # gs_url = "gs://my-bucket/file-name.csv"
1638
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1800
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1801
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1639
1802
  # job.time_partitioning_type = "DAY"
1640
1803
  # job.time_partitioning_field = "dob"
1641
1804
  # job.schema do |schema|