google-cloud-bigquery 1.21.1 → 1.27.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +72 -0
- data/CONTRIBUTING.md +1 -1
- data/lib/google-cloud-bigquery.rb +9 -2
- data/lib/google/cloud/bigquery.rb +1 -1
- data/lib/google/cloud/bigquery/convert.rb +3 -1
- data/lib/google/cloud/bigquery/copy_job.rb +15 -6
- data/lib/google/cloud/bigquery/data.rb +12 -0
- data/lib/google/cloud/bigquery/dataset.rb +61 -20
- data/lib/google/cloud/bigquery/dataset/access.rb +293 -16
- data/lib/google/cloud/bigquery/external.rb +352 -3
- data/lib/google/cloud/bigquery/extract_job.rb +154 -50
- data/lib/google/cloud/bigquery/job.rb +35 -1
- data/lib/google/cloud/bigquery/load_job.rb +197 -34
- data/lib/google/cloud/bigquery/model.rb +164 -8
- data/lib/google/cloud/bigquery/policy.rb +431 -0
- data/lib/google/cloud/bigquery/project.rb +164 -68
- data/lib/google/cloud/bigquery/query_job.rb +27 -12
- data/lib/google/cloud/bigquery/routine.rb +127 -5
- data/lib/google/cloud/bigquery/service.rb +50 -11
- data/lib/google/cloud/bigquery/table.rb +181 -42
- data/lib/google/cloud/bigquery/time.rb +6 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +7 -6
@@ -20,15 +20,17 @@ module Google
|
|
20
20
|
# # ExtractJob
|
21
21
|
#
|
22
22
|
# A {Job} subclass representing an export operation that may be performed
|
23
|
-
# on a {Table}. A ExtractJob instance is
|
24
|
-
# {Table#extract_job}.
|
23
|
+
# on a {Table} or {Model}. A ExtractJob instance is returned when you call
|
24
|
+
# {Project#extract_job}, {Table#extract_job} or {Model#extract_job}.
|
25
25
|
#
|
26
26
|
# @see https://cloud.google.com/bigquery/docs/exporting-data
|
27
|
-
# Exporting
|
27
|
+
# Exporting table data
|
28
|
+
# @see https://cloud.google.com/bigquery-ml/docs/exporting-models
|
29
|
+
# Exporting models
|
28
30
|
# @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
|
29
31
|
# reference
|
30
32
|
#
|
31
|
-
# @example
|
33
|
+
# @example Export table data
|
32
34
|
# require "google/cloud/bigquery"
|
33
35
|
#
|
34
36
|
# bigquery = Google::Cloud::Bigquery.new
|
@@ -40,6 +42,18 @@ module Google
|
|
40
42
|
# extract_job.wait_until_done!
|
41
43
|
# extract_job.done? #=> true
|
42
44
|
#
|
45
|
+
# @example Export a model
|
46
|
+
# require "google/cloud/bigquery"
|
47
|
+
#
|
48
|
+
# bigquery = Google::Cloud::Bigquery.new
|
49
|
+
# dataset = bigquery.dataset "my_dataset"
|
50
|
+
# model = dataset.model "my_model"
|
51
|
+
#
|
52
|
+
# extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
|
53
|
+
#
|
54
|
+
# extract_job.wait_until_done!
|
55
|
+
# extract_job.done? #=> true
|
56
|
+
#
|
43
57
|
class ExtractJob < Job
|
44
58
|
##
|
45
59
|
# The URI or URIs representing the Google Cloud Storage files to which
|
@@ -49,71 +63,126 @@ module Google
|
|
49
63
|
end
|
50
64
|
|
51
65
|
##
|
52
|
-
# The table
|
53
|
-
# which {Table#extract_job} was called.
|
66
|
+
# The table or model which is exported.
|
54
67
|
#
|
55
|
-
# @return [Table] A table instance
|
68
|
+
# @return [Table, Model, nil] A table or model instance, or `nil`.
|
56
69
|
#
|
57
70
|
def source
|
58
|
-
table = @gapi.configuration.extract.source_table
|
59
|
-
|
60
|
-
|
71
|
+
if (table = @gapi.configuration.extract.source_table)
|
72
|
+
retrieve_table table.project_id, table.dataset_id, table.table_id
|
73
|
+
elsif (model = @gapi.configuration.extract.source_model)
|
74
|
+
retrieve_model model.project_id, model.dataset_id, model.model_id
|
75
|
+
end
|
61
76
|
end
|
62
77
|
|
63
78
|
##
|
64
|
-
#
|
65
|
-
# default is `false`.
|
79
|
+
# Whether the source of the export job is a table. See {#source}.
|
66
80
|
#
|
67
|
-
# @return [Boolean] `true` when
|
81
|
+
# @return [Boolean] `true` when the source is a table, `false`
|
82
|
+
# otherwise.
|
68
83
|
#
|
69
|
-
def
|
70
|
-
|
71
|
-
val == "GZIP"
|
84
|
+
def table?
|
85
|
+
!@gapi.configuration.extract.source_table.nil?
|
72
86
|
end
|
73
87
|
|
74
88
|
##
|
75
|
-
#
|
76
|
-
# JSON](http://jsonlines.org/). The default is `false`.
|
89
|
+
# Whether the source of the export job is a model. See {#source}.
|
77
90
|
#
|
78
|
-
# @return [Boolean] `true` when
|
91
|
+
# @return [Boolean] `true` when the source is a model, `false`
|
79
92
|
# otherwise.
|
80
93
|
#
|
94
|
+
def model?
|
95
|
+
!@gapi.configuration.extract.source_model.nil?
|
96
|
+
end
|
97
|
+
|
98
|
+
##
|
99
|
+
# Checks if the export operation compresses the data using gzip. The
|
100
|
+
# default is `false`. Not applicable when extracting models.
|
101
|
+
#
|
102
|
+
# @return [Boolean] `true` when `GZIP`, `false` if not `GZIP` or not a
|
103
|
+
# table extraction.
|
104
|
+
def compression?
|
105
|
+
return false unless table?
|
106
|
+
@gapi.configuration.extract.compression == "GZIP"
|
107
|
+
end
|
108
|
+
|
109
|
+
##
|
110
|
+
# Checks if the destination format for the table data is [newline-delimited
|
111
|
+
# JSON](http://jsonlines.org/). The default is `false`. Not applicable when
|
112
|
+
# extracting models.
|
113
|
+
#
|
114
|
+
# @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
|
115
|
+
# `NEWLINE_DELIMITED_JSON` or not a table extraction.
|
116
|
+
#
|
81
117
|
def json?
|
82
|
-
|
83
|
-
|
118
|
+
return false unless table?
|
119
|
+
@gapi.configuration.extract.destination_format == "NEWLINE_DELIMITED_JSON"
|
84
120
|
end
|
85
121
|
|
86
122
|
##
|
87
|
-
# Checks if the destination format for the data is CSV. Tables with
|
123
|
+
# Checks if the destination format for the table data is CSV. Tables with
|
88
124
|
# nested or repeated fields cannot be exported as CSV. The default is
|
89
|
-
# `true
|
125
|
+
# `true` for tables. Not applicable when extracting models.
|
90
126
|
#
|
91
|
-
# @return [Boolean] `true` when `CSV`, `false`
|
127
|
+
# @return [Boolean] `true` when `CSV`, or `false` if not `CSV` or not a
|
128
|
+
# table extraction.
|
92
129
|
#
|
93
130
|
def csv?
|
131
|
+
return false unless table?
|
94
132
|
val = @gapi.configuration.extract.destination_format
|
95
133
|
return true if val.nil?
|
96
134
|
val == "CSV"
|
97
135
|
end
|
98
136
|
|
99
137
|
##
|
100
|
-
# Checks if the destination format for the data is
|
101
|
-
# [Avro](http://avro.apache.org/). The default is `false`.
|
138
|
+
# Checks if the destination format for the table data is
|
139
|
+
# [Avro](http://avro.apache.org/). The default is `false`. Not applicable
|
140
|
+
# when extracting models.
|
102
141
|
#
|
103
|
-
# @return [Boolean] `true` when `AVRO`, `false`
|
142
|
+
# @return [Boolean] `true` when `AVRO`, `false` if not `AVRO` or not a
|
143
|
+
# table extraction.
|
104
144
|
#
|
105
145
|
def avro?
|
146
|
+
return false unless table?
|
147
|
+
@gapi.configuration.extract.destination_format == "AVRO"
|
148
|
+
end
|
149
|
+
|
150
|
+
##
|
151
|
+
# Checks if the destination format for the model is TensorFlow SavedModel.
|
152
|
+
# The default is `true` for models. Not applicable when extracting tables.
|
153
|
+
#
|
154
|
+
# @return [Boolean] `true` when `ML_TF_SAVED_MODEL`, `false` if not
|
155
|
+
# `ML_TF_SAVED_MODEL` or not a model extraction.
|
156
|
+
#
|
157
|
+
def ml_tf_saved_model?
|
158
|
+
return false unless model?
|
106
159
|
val = @gapi.configuration.extract.destination_format
|
107
|
-
|
160
|
+
return true if val.nil?
|
161
|
+
val == "ML_TF_SAVED_MODEL"
|
162
|
+
end
|
163
|
+
|
164
|
+
##
|
165
|
+
# Checks if the destination format for the model is XGBoost. The default
|
166
|
+
# is `false`. Not applicable when extracting tables.
|
167
|
+
#
|
168
|
+
# @return [Boolean] `true` when `ML_XGBOOST_BOOSTER`, `false` if not
|
169
|
+
# `ML_XGBOOST_BOOSTER` or not a model extraction.
|
170
|
+
#
|
171
|
+
def ml_xgboost_booster?
|
172
|
+
return false unless model?
|
173
|
+
@gapi.configuration.extract.destination_format == "ML_XGBOOST_BOOSTER"
|
108
174
|
end
|
109
175
|
|
110
176
|
##
|
111
177
|
# The character or symbol the operation uses to delimit fields in the
|
112
|
-
# exported data. The default is a comma (,).
|
178
|
+
# exported data. The default is a comma (,) for tables. Not applicable
|
179
|
+
# when extracting models.
|
113
180
|
#
|
114
|
-
# @return [String] A string containing the character, such as `","
|
181
|
+
# @return [String, nil] A string containing the character, such as `","`,
|
182
|
+
# `nil` if not a table extraction.
|
115
183
|
#
|
116
184
|
def delimiter
|
185
|
+
return unless table?
|
117
186
|
val = @gapi.configuration.extract.field_delimiter
|
118
187
|
val = "," if val.nil?
|
119
188
|
val
|
@@ -121,12 +190,13 @@ module Google
|
|
121
190
|
|
122
191
|
##
|
123
192
|
# Checks if the exported data contains a header row. The default is
|
124
|
-
# `true
|
193
|
+
# `true` for tables. Not applicable when extracting models.
|
125
194
|
#
|
126
195
|
# @return [Boolean] `true` when the print header configuration is
|
127
|
-
# present or `nil`, `false`
|
196
|
+
# present or `nil`, `false` if disabled or not a table extraction.
|
128
197
|
#
|
129
198
|
def print_header?
|
199
|
+
return false unless table?
|
130
200
|
val = @gapi.configuration.extract.print_header
|
131
201
|
val = true if val.nil?
|
132
202
|
val
|
@@ -159,12 +229,14 @@ module Google
|
|
159
229
|
# whether to enable extracting applicable column types (such as
|
160
230
|
# `TIMESTAMP`) to their corresponding AVRO logical types
|
161
231
|
# (`timestamp-micros`), instead of only using their raw types
|
162
|
-
# (`avro-long`).
|
232
|
+
# (`avro-long`). Not applicable when extracting models.
|
163
233
|
#
|
164
234
|
# @return [Boolean] `true` when applicable column types will use their
|
165
|
-
# corresponding AVRO logical types, `false`
|
235
|
+
# corresponding AVRO logical types, `false` if not enabled or not a
|
236
|
+
# table extraction.
|
166
237
|
#
|
167
238
|
def use_avro_logical_types?
|
239
|
+
return false unless table?
|
168
240
|
@gapi.configuration.extract.use_avro_logical_types
|
169
241
|
end
|
170
242
|
|
@@ -182,19 +254,24 @@ module Google
|
|
182
254
|
#
|
183
255
|
# @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
|
184
256
|
# configuration object for setting query options.
|
185
|
-
def self.from_options service,
|
257
|
+
def self.from_options service, source, storage_files, options
|
186
258
|
job_ref = service.job_ref_from options[:job_id], options[:prefix]
|
187
259
|
storage_urls = Array(storage_files).map do |url|
|
188
260
|
url.respond_to?(:to_gs_url) ? url.to_gs_url : url
|
189
261
|
end
|
190
262
|
options[:format] ||= Convert.derive_source_format storage_urls.first
|
263
|
+
extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
|
264
|
+
destination_uris: Array(storage_urls)
|
265
|
+
)
|
266
|
+
if source.is_a? Google::Apis::BigqueryV2::TableReference
|
267
|
+
extract_config.source_table = source
|
268
|
+
elsif source.is_a? Google::Apis::BigqueryV2::ModelReference
|
269
|
+
extract_config.source_model = source
|
270
|
+
end
|
191
271
|
job = Google::Apis::BigqueryV2::Job.new(
|
192
272
|
job_reference: job_ref,
|
193
273
|
configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
|
194
|
-
extract:
|
195
|
-
destination_uris: Array(storage_urls),
|
196
|
-
source_table: table
|
197
|
-
),
|
274
|
+
extract: extract_config,
|
198
275
|
dry_run: options[:dryrun]
|
199
276
|
)
|
200
277
|
)
|
@@ -253,7 +330,7 @@ module Google
|
|
253
330
|
end
|
254
331
|
|
255
332
|
##
|
256
|
-
# Sets the compression type.
|
333
|
+
# Sets the compression type. Not applicable when extracting models.
|
257
334
|
#
|
258
335
|
# @param [String] value The compression type to use for exported
|
259
336
|
# files. Possible values include `GZIP` and `NONE`. The default
|
@@ -265,7 +342,7 @@ module Google
|
|
265
342
|
end
|
266
343
|
|
267
344
|
##
|
268
|
-
# Sets the field delimiter.
|
345
|
+
# Sets the field delimiter. Not applicable when extracting models.
|
269
346
|
#
|
270
347
|
# @param [String] value Delimiter to use between fields in the
|
271
348
|
# exported data. Default is <code>,</code>.
|
@@ -276,14 +353,21 @@ module Google
|
|
276
353
|
end
|
277
354
|
|
278
355
|
##
|
279
|
-
# Sets the destination file format. The default value
|
356
|
+
# Sets the destination file format. The default value for
|
357
|
+
# tables is `csv`. Tables with nested or repeated fields cannot be
|
358
|
+
# exported as CSV. The default value for models is `ml_tf_saved_model`.
|
280
359
|
#
|
281
|
-
#
|
360
|
+
# Supported values for tables:
|
282
361
|
#
|
283
362
|
# * `csv` - CSV
|
284
363
|
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
285
364
|
# * `avro` - [Avro](http://avro.apache.org/)
|
286
365
|
#
|
366
|
+
# Supported values for models:
|
367
|
+
#
|
368
|
+
# * `ml_tf_saved_model` - TensorFlow SavedModel
|
369
|
+
# * `ml_xgboost_booster` - XGBoost Booster
|
370
|
+
#
|
287
371
|
# @param [String] new_format The new source format.
|
288
372
|
#
|
289
373
|
# @!group Attributes
|
@@ -293,7 +377,8 @@ module Google
|
|
293
377
|
end
|
294
378
|
|
295
379
|
##
|
296
|
-
# Print a header row in the exported file.
|
380
|
+
# Print a header row in the exported file. Not applicable when
|
381
|
+
# extracting models.
|
297
382
|
#
|
298
383
|
# @param [Boolean] value Whether to print out a header row in the
|
299
384
|
# results. Default is `true`.
|
@@ -307,12 +392,21 @@ module Google
|
|
307
392
|
# Sets the labels to use for the job.
|
308
393
|
#
|
309
394
|
# @param [Hash] value A hash of user-provided labels associated with
|
310
|
-
# the job. You can use these to organize and group your jobs.
|
311
|
-
#
|
312
|
-
#
|
313
|
-
#
|
314
|
-
#
|
315
|
-
#
|
395
|
+
# the job. You can use these to organize and group your jobs.
|
396
|
+
#
|
397
|
+
# The labels applied to a resource must meet the following requirements:
|
398
|
+
#
|
399
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
400
|
+
# * Each label must be a key-value pair.
|
401
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
402
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
403
|
+
# a maximum length of 63 characters.
|
404
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
405
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
406
|
+
# international characters are allowed.
|
407
|
+
# * The key portion of a label must be unique. However, you can use the
|
408
|
+
# same key with multiple resources.
|
409
|
+
# * Keys must start with a lowercase letter or international character.
|
316
410
|
#
|
317
411
|
# @!group Attributes
|
318
412
|
#
|
@@ -362,6 +456,16 @@ module Google
|
|
362
456
|
@gapi
|
363
457
|
end
|
364
458
|
end
|
459
|
+
|
460
|
+
protected
|
461
|
+
|
462
|
+
def retrieve_model project_id, dataset_id, model_id
|
463
|
+
ensure_service!
|
464
|
+
gapi = service.get_project_model project_id, dataset_id, model_id
|
465
|
+
Model.from_gapi_json gapi, service
|
466
|
+
rescue Google::Cloud::NotFoundError
|
467
|
+
nil
|
468
|
+
end
|
365
469
|
end
|
366
470
|
end
|
367
471
|
end
|
@@ -215,6 +215,17 @@ module Google
|
|
215
215
|
@gapi.statistics.parent_job_id
|
216
216
|
end
|
217
217
|
|
218
|
+
##
|
219
|
+
# An array containing the job resource usage breakdown by reservation, if present. Reservation usage statistics
|
220
|
+
# are only reported for jobs that are executed within reservations. On-demand jobs do not report this data.
|
221
|
+
#
|
222
|
+
# @return [Array<Google::Cloud::Bigquery::Job::ReservationUsage>, nil] The reservation usage, if present.
|
223
|
+
#
|
224
|
+
def reservation_usage
|
225
|
+
return nil unless @gapi.statistics.reservation_usage
|
226
|
+
Array(@gapi.statistics.reservation_usage).map { |g| ReservationUsage.from_gapi g }
|
227
|
+
end
|
228
|
+
|
218
229
|
##
|
219
230
|
# The statistics including stack frames for a child job of a script.
|
220
231
|
#
|
@@ -489,6 +500,29 @@ module Google
|
|
489
500
|
end
|
490
501
|
end
|
491
502
|
|
503
|
+
##
|
504
|
+
# Represents Job resource usage breakdown by reservation.
|
505
|
+
#
|
506
|
+
# @attr_reader [String] name The reservation name or "unreserved" for on-demand resources usage.
|
507
|
+
# @attr_reader [Fixnum] slot_ms The slot-milliseconds the job spent in the given reservation.
|
508
|
+
#
|
509
|
+
class ReservationUsage
|
510
|
+
attr_reader :name, :slot_ms
|
511
|
+
|
512
|
+
##
|
513
|
+
# @private Creates a new ReservationUsage instance.
|
514
|
+
def initialize name, slot_ms
|
515
|
+
@name = name
|
516
|
+
@slot_ms = slot_ms
|
517
|
+
end
|
518
|
+
|
519
|
+
##
|
520
|
+
# @private New ReservationUsage from a statistics.reservation_usage value.
|
521
|
+
def self.from_gapi gapi
|
522
|
+
new gapi.name, gapi.slot_ms
|
523
|
+
end
|
524
|
+
end
|
525
|
+
|
492
526
|
##
|
493
527
|
# Represents statistics for a child job of a script.
|
494
528
|
#
|
@@ -547,7 +581,7 @@ module Google
|
|
547
581
|
end
|
548
582
|
|
549
583
|
##
|
550
|
-
# @private New ScriptStatistics from a statistics.script_statistics
|
584
|
+
# @private New ScriptStatistics from a statistics.script_statistics value.
|
551
585
|
def self.from_gapi gapi
|
552
586
|
frames = Array(gapi.stack_frames).map { |g| ScriptStackFrame.from_gapi g }
|
553
587
|
new gapi.evaluation_kind, frames
|
@@ -37,8 +37,8 @@ module Google
|
|
37
37
|
# bigquery = Google::Cloud::Bigquery.new
|
38
38
|
# dataset = bigquery.dataset "my_dataset"
|
39
39
|
#
|
40
|
-
#
|
41
|
-
# load_job = dataset.load_job "my_new_table",
|
40
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
41
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |schema|
|
42
42
|
# schema.string "first_name", mode: :required
|
43
43
|
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
44
44
|
# nested_schema.string "place", mode: :required
|
@@ -112,8 +112,7 @@ module Google
|
|
112
112
|
# `false` otherwise.
|
113
113
|
#
|
114
114
|
def iso8859_1?
|
115
|
-
|
116
|
-
val == "ISO-8859-1"
|
115
|
+
@gapi.configuration.load.encoding == "ISO-8859-1"
|
117
116
|
end
|
118
117
|
|
119
118
|
##
|
@@ -195,8 +194,7 @@ module Google
|
|
195
194
|
# `NEWLINE_DELIMITED_JSON`, `false` otherwise.
|
196
195
|
#
|
197
196
|
def json?
|
198
|
-
|
199
|
-
val == "NEWLINE_DELIMITED_JSON"
|
197
|
+
@gapi.configuration.load.source_format == "NEWLINE_DELIMITED_JSON"
|
200
198
|
end
|
201
199
|
|
202
200
|
##
|
@@ -218,8 +216,27 @@ module Google
|
|
218
216
|
# `false` otherwise.
|
219
217
|
#
|
220
218
|
def backup?
|
221
|
-
|
222
|
-
|
219
|
+
@gapi.configuration.load.source_format == "DATASTORE_BACKUP"
|
220
|
+
end
|
221
|
+
|
222
|
+
##
|
223
|
+
# Checks if the source format is ORC.
|
224
|
+
#
|
225
|
+
# @return [Boolean] `true` when the source format is `ORC`,
|
226
|
+
# `false` otherwise.
|
227
|
+
#
|
228
|
+
def orc?
|
229
|
+
@gapi.configuration.load.source_format == "ORC"
|
230
|
+
end
|
231
|
+
|
232
|
+
##
|
233
|
+
# Checks if the source format is Parquet.
|
234
|
+
#
|
235
|
+
# @return [Boolean] `true` when the source format is `PARQUET`,
|
236
|
+
# `false` otherwise.
|
237
|
+
#
|
238
|
+
def parquet?
|
239
|
+
@gapi.configuration.load.source_format == "PARQUET"
|
223
240
|
end
|
224
241
|
|
225
242
|
##
|
@@ -347,6 +364,58 @@ module Google
|
|
347
364
|
nil
|
348
365
|
end
|
349
366
|
|
367
|
+
###
|
368
|
+
# Checks if hive partitioning options are set.
|
369
|
+
#
|
370
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
371
|
+
#
|
372
|
+
# @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
|
373
|
+
#
|
374
|
+
# @!group Attributes
|
375
|
+
#
|
376
|
+
def hive_partitioning?
|
377
|
+
!@gapi.configuration.load.hive_partitioning_options.nil?
|
378
|
+
end
|
379
|
+
|
380
|
+
###
|
381
|
+
# The mode of hive partitioning to use when reading data. The following modes are supported:
|
382
|
+
#
|
383
|
+
# 1. `AUTO`: automatically infer partition key name(s) and type(s).
|
384
|
+
# 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
|
385
|
+
# 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
|
386
|
+
#
|
387
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
388
|
+
#
|
389
|
+
# @return [String, nil] The mode of hive partitioning, or `nil` if not set.
|
390
|
+
#
|
391
|
+
# @!group Attributes
|
392
|
+
#
|
393
|
+
def hive_partitioning_mode
|
394
|
+
@gapi.configuration.load.hive_partitioning_options.mode if hive_partitioning?
|
395
|
+
end
|
396
|
+
|
397
|
+
###
|
398
|
+
# The common prefix for all source uris when hive partition detection is requested. The prefix must end
|
399
|
+
# immediately before the partition key encoding begins. For example, consider files following this data layout:
|
400
|
+
#
|
401
|
+
# ```
|
402
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
403
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
404
|
+
# ```
|
405
|
+
#
|
406
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
407
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
408
|
+
#
|
409
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
410
|
+
#
|
411
|
+
# @return [String, nil] The common prefix for all source uris, or `nil` if not set.
|
412
|
+
#
|
413
|
+
# @!group Attributes
|
414
|
+
#
|
415
|
+
def hive_partitioning_source_uri_prefix
|
416
|
+
@gapi.configuration.load.hive_partitioning_options.source_uri_prefix if hive_partitioning?
|
417
|
+
end
|
418
|
+
|
350
419
|
###
|
351
420
|
# Checks if the destination table will be range partitioned. See [Creating and using integer range partitioned
|
352
421
|
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
@@ -428,8 +497,9 @@ module Google
|
|
428
497
|
# The period for which the destination table will be time partitioned, if
|
429
498
|
# any. See [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
430
499
|
#
|
431
|
-
# @return [String, nil] The time partition type.
|
432
|
-
#
|
500
|
+
# @return [String, nil] The time partition type. The supported types are `DAY`,
|
501
|
+
# `HOUR`, `MONTH`, and `YEAR`, which will generate one partition per day,
|
502
|
+
# hour, month, and year, respectively; or `nil` if not present.
|
433
503
|
#
|
434
504
|
# @!group Attributes
|
435
505
|
#
|
@@ -1303,12 +1373,21 @@ module Google
|
|
1303
1373
|
# Sets the labels to use for the load job.
|
1304
1374
|
#
|
1305
1375
|
# @param [Hash] val A hash of user-provided labels associated with
|
1306
|
-
# the job. You can use these to organize and group your jobs.
|
1307
|
-
#
|
1308
|
-
#
|
1309
|
-
#
|
1310
|
-
#
|
1311
|
-
#
|
1376
|
+
# the job. You can use these to organize and group your jobs.
|
1377
|
+
#
|
1378
|
+
# The labels applied to a resource must meet the following requirements:
|
1379
|
+
#
|
1380
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
1381
|
+
# * Each label must be a key-value pair.
|
1382
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
1383
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
1384
|
+
# a maximum length of 63 characters.
|
1385
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
1386
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
1387
|
+
# international characters are allowed.
|
1388
|
+
# * The key portion of a label must be unique. However, you can use the
|
1389
|
+
# same key with multiple resources.
|
1390
|
+
# * Keys must start with a lowercase letter or international character.
|
1312
1391
|
#
|
1313
1392
|
# @!group Attributes
|
1314
1393
|
#
|
@@ -1316,6 +1395,89 @@ module Google
|
|
1316
1395
|
@gapi.configuration.update! labels: val
|
1317
1396
|
end
|
1318
1397
|
|
1398
|
+
##
|
1399
|
+
# Sets the mode of hive partitioning to use when reading data. The following modes are supported:
|
1400
|
+
#
|
1401
|
+
# 1. `auto`: automatically infer partition key name(s) and type(s).
|
1402
|
+
# 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
|
1403
|
+
# 3. `custom`: partition key schema is encoded in the source URI prefix.
|
1404
|
+
#
|
1405
|
+
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
1406
|
+
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
1407
|
+
#
|
1408
|
+
# See {#format=} and {#hive_partitioning_source_uri_prefix=}.
|
1409
|
+
#
|
1410
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
1411
|
+
#
|
1412
|
+
# @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
|
1413
|
+
#
|
1414
|
+
# @example
|
1415
|
+
# require "google/cloud/bigquery"
|
1416
|
+
#
|
1417
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1418
|
+
# dataset = bigquery.dataset "my_dataset"
|
1419
|
+
#
|
1420
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
1421
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
1422
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1423
|
+
# job.format = :parquet
|
1424
|
+
# job.hive_partitioning_mode = :auto
|
1425
|
+
# job.hive_partitioning_source_uri_prefix = source_uri_prefix
|
1426
|
+
# end
|
1427
|
+
#
|
1428
|
+
# load_job.wait_until_done!
|
1429
|
+
# load_job.done? #=> true
|
1430
|
+
#
|
1431
|
+
# @!group Attributes
|
1432
|
+
#
|
1433
|
+
def hive_partitioning_mode= mode
|
1434
|
+
@gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
1435
|
+
@gapi.configuration.load.hive_partitioning_options.mode = mode.to_s.upcase
|
1436
|
+
end
|
1437
|
+
|
1438
|
+
##
|
1439
|
+
# Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
|
1440
|
+
# immediately before the partition key encoding begins. For example, consider files following this data
|
1441
|
+
# layout:
|
1442
|
+
#
|
1443
|
+
# ```
|
1444
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
1445
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
1446
|
+
# ```
|
1447
|
+
#
|
1448
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
1449
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
1450
|
+
#
|
1451
|
+
# See {#hive_partitioning_mode=}.
|
1452
|
+
#
|
1453
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
1454
|
+
#
|
1455
|
+
# @param [String] source_uri_prefix The common prefix for all source uris.
|
1456
|
+
#
|
1457
|
+
# @example
|
1458
|
+
# require "google/cloud/bigquery"
|
1459
|
+
#
|
1460
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1461
|
+
# dataset = bigquery.dataset "my_dataset"
|
1462
|
+
#
|
1463
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
1464
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
1465
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1466
|
+
# job.format = :parquet
|
1467
|
+
# job.hive_partitioning_mode = :auto
|
1468
|
+
# job.hive_partitioning_source_uri_prefix = source_uri_prefix
|
1469
|
+
# end
|
1470
|
+
#
|
1471
|
+
# load_job.wait_until_done!
|
1472
|
+
# load_job.done? #=> true
|
1473
|
+
#
|
1474
|
+
# @!group Attributes
|
1475
|
+
#
|
1476
|
+
def hive_partitioning_source_uri_prefix= source_uri_prefix
|
1477
|
+
@gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
1478
|
+
@gapi.configuration.load.hive_partitioning_options.source_uri_prefix = source_uri_prefix
|
1479
|
+
end
|
1480
|
+
|
1319
1481
|
##
|
1320
1482
|
# Sets the field on which to range partition the table. See [Creating and using integer range partitioned
|
1321
1483
|
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
@@ -1335,8 +1497,8 @@ module Google
|
|
1335
1497
|
# bigquery = Google::Cloud::Bigquery.new
|
1336
1498
|
# dataset = bigquery.dataset "my_dataset"
|
1337
1499
|
#
|
1338
|
-
#
|
1339
|
-
# load_job = dataset.load_job "my_new_table",
|
1500
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1501
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1340
1502
|
# job.schema do |schema|
|
1341
1503
|
# schema.integer "my_table_id", mode: :required
|
1342
1504
|
# schema.string "my_table_data", mode: :required
|
@@ -1376,8 +1538,8 @@ module Google
|
|
1376
1538
|
# bigquery = Google::Cloud::Bigquery.new
|
1377
1539
|
# dataset = bigquery.dataset "my_dataset"
|
1378
1540
|
#
|
1379
|
-
#
|
1380
|
-
# load_job = dataset.load_job "my_new_table",
|
1541
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1542
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1381
1543
|
# job.schema do |schema|
|
1382
1544
|
# schema.integer "my_table_id", mode: :required
|
1383
1545
|
# schema.string "my_table_data", mode: :required
|
@@ -1417,8 +1579,8 @@ module Google
|
|
1417
1579
|
# bigquery = Google::Cloud::Bigquery.new
|
1418
1580
|
# dataset = bigquery.dataset "my_dataset"
|
1419
1581
|
#
|
1420
|
-
#
|
1421
|
-
# load_job = dataset.load_job "my_new_table",
|
1582
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1583
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1422
1584
|
# job.schema do |schema|
|
1423
1585
|
# schema.integer "my_table_id", mode: :required
|
1424
1586
|
# schema.string "my_table_data", mode: :required
|
@@ -1458,8 +1620,8 @@ module Google
|
|
1458
1620
|
# bigquery = Google::Cloud::Bigquery.new
|
1459
1621
|
# dataset = bigquery.dataset "my_dataset"
|
1460
1622
|
#
|
1461
|
-
#
|
1462
|
-
# load_job = dataset.load_job "my_new_table",
|
1623
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1624
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1463
1625
|
# job.schema do |schema|
|
1464
1626
|
# schema.integer "my_table_id", mode: :required
|
1465
1627
|
# schema.string "my_table_data", mode: :required
|
@@ -1490,8 +1652,9 @@ module Google
|
|
1490
1652
|
# BigQuery does not allow you to change partitioning on an existing
|
1491
1653
|
# table.
|
1492
1654
|
#
|
1493
|
-
# @param [String] type The time partition type.
|
1494
|
-
#
|
1655
|
+
# @param [String] type The time partition type. The supported types are `DAY`,
|
1656
|
+
# `HOUR`, `MONTH`, and `YEAR`, which will generate one partition per day,
|
1657
|
+
# hour, month, and year, respectively.
|
1495
1658
|
#
|
1496
1659
|
# @example
|
1497
1660
|
# require "google/cloud/bigquery"
|
@@ -1499,8 +1662,8 @@ module Google
|
|
1499
1662
|
# bigquery = Google::Cloud::Bigquery.new
|
1500
1663
|
# dataset = bigquery.dataset "my_dataset"
|
1501
1664
|
#
|
1502
|
-
#
|
1503
|
-
# load_job = dataset.load_job "my_new_table",
|
1665
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1666
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1504
1667
|
# job.time_partitioning_type = "DAY"
|
1505
1668
|
# end
|
1506
1669
|
#
|
@@ -1538,8 +1701,8 @@ module Google
|
|
1538
1701
|
# bigquery = Google::Cloud::Bigquery.new
|
1539
1702
|
# dataset = bigquery.dataset "my_dataset"
|
1540
1703
|
#
|
1541
|
-
#
|
1542
|
-
# load_job = dataset.load_job "my_new_table",
|
1704
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1705
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1543
1706
|
# job.time_partitioning_type = "DAY"
|
1544
1707
|
# job.time_partitioning_field = "dob"
|
1545
1708
|
# job.schema do |schema|
|
@@ -1574,8 +1737,8 @@ module Google
|
|
1574
1737
|
# bigquery = Google::Cloud::Bigquery.new
|
1575
1738
|
# dataset = bigquery.dataset "my_dataset"
|
1576
1739
|
#
|
1577
|
-
#
|
1578
|
-
# load_job = dataset.load_job "my_new_table",
|
1740
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1741
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1579
1742
|
# job.time_partitioning_type = "DAY"
|
1580
1743
|
# job.time_partitioning_expiration = 86_400
|
1581
1744
|
# end
|
@@ -1634,8 +1797,8 @@ module Google
|
|
1634
1797
|
# bigquery = Google::Cloud::Bigquery.new
|
1635
1798
|
# dataset = bigquery.dataset "my_dataset"
|
1636
1799
|
#
|
1637
|
-
#
|
1638
|
-
# load_job = dataset.load_job "my_new_table",
|
1800
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1801
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1639
1802
|
# job.time_partitioning_type = "DAY"
|
1640
1803
|
# job.time_partitioning_field = "dob"
|
1641
1804
|
# job.schema do |schema|
|