google-cloud-bigquery 1.14.0 → 1.42.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/AUTHENTICATION.md +17 -54
- data/CHANGELOG.md +377 -0
- data/CONTRIBUTING.md +328 -116
- data/LOGGING.md +1 -1
- data/OVERVIEW.md +21 -20
- data/TROUBLESHOOTING.md +2 -8
- data/lib/google/cloud/bigquery/argument.rb +197 -0
- data/lib/google/cloud/bigquery/convert.rb +155 -173
- data/lib/google/cloud/bigquery/copy_job.rb +74 -26
- data/lib/google/cloud/bigquery/credentials.rb +5 -12
- data/lib/google/cloud/bigquery/data.rb +109 -18
- data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
- data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
- data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
- data/lib/google/cloud/bigquery/dataset.rb +1044 -287
- data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
- data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
- data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
- data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
- data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
- data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
- data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
- data/lib/google/cloud/bigquery/external.rb +50 -2256
- data/lib/google/cloud/bigquery/extract_job.rb +226 -61
- data/lib/google/cloud/bigquery/insert_response.rb +1 -3
- data/lib/google/cloud/bigquery/job/list.rb +10 -14
- data/lib/google/cloud/bigquery/job.rb +289 -14
- data/lib/google/cloud/bigquery/load_job.rb +810 -136
- data/lib/google/cloud/bigquery/model/list.rb +5 -9
- data/lib/google/cloud/bigquery/model.rb +247 -16
- data/lib/google/cloud/bigquery/policy.rb +432 -0
- data/lib/google/cloud/bigquery/project/list.rb +6 -11
- data/lib/google/cloud/bigquery/project.rb +509 -250
- data/lib/google/cloud/bigquery/query_job.rb +594 -128
- data/lib/google/cloud/bigquery/routine/list.rb +165 -0
- data/lib/google/cloud/bigquery/routine.rb +1227 -0
- data/lib/google/cloud/bigquery/schema/field.rb +413 -63
- data/lib/google/cloud/bigquery/schema.rb +221 -48
- data/lib/google/cloud/bigquery/service.rb +204 -112
- data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
- data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
- data/lib/google/cloud/bigquery/table/list.rb +6 -11
- data/lib/google/cloud/bigquery/table.rb +1470 -377
- data/lib/google/cloud/bigquery/time.rb +6 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery.rb +4 -6
- data/lib/google-cloud-bigquery.rb +14 -13
- metadata +66 -38
@@ -0,0 +1,771 @@
|
|
1
|
+
# Copyright 2021 Google LLC
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/apis/bigquery_v2"
|
17
|
+
|
18
|
+
module Google
|
19
|
+
module Cloud
|
20
|
+
module Bigquery
|
21
|
+
module External
|
22
|
+
##
|
23
|
+
# # DataSource
|
24
|
+
#
|
25
|
+
# External::DataSource and its subclasses represents an external data
|
26
|
+
# source that can be queried from directly, even though the data is not
|
27
|
+
# stored in BigQuery. Instead of loading or streaming the data, this
|
28
|
+
# object references the external data source.
|
29
|
+
#
|
30
|
+
# The AVRO and Datastore Backup formats use {External::DataSource}. See
|
31
|
+
# {External::CsvSource}, {External::JsonSource},
|
32
|
+
# {External::SheetsSource}, {External::BigtableSource} for the other
|
33
|
+
# formats.
|
34
|
+
#
|
35
|
+
# @example
|
36
|
+
# require "google/cloud/bigquery"
|
37
|
+
#
|
38
|
+
# bigquery = Google::Cloud::Bigquery.new
|
39
|
+
#
|
40
|
+
# avro_url = "gs://bucket/path/to/*.avro"
|
41
|
+
# avro_table = bigquery.external avro_url do |avro|
|
42
|
+
# avro.autodetect = true
|
43
|
+
# end
|
44
|
+
#
|
45
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
46
|
+
# external: { my_ext_table: avro_table }
|
47
|
+
#
|
48
|
+
# # Iterate over the first page of results
|
49
|
+
# data.each do |row|
|
50
|
+
# puts row[:name]
|
51
|
+
# end
|
52
|
+
# # Retrieve the next page of results
|
53
|
+
# data = data.next if data.next?
|
54
|
+
#
|
55
|
+
# @example Hive partitioning options:
|
56
|
+
# require "google/cloud/bigquery"
|
57
|
+
#
|
58
|
+
# bigquery = Google::Cloud::Bigquery.new
|
59
|
+
#
|
60
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
61
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
62
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
63
|
+
# ext.hive_partitioning_mode = :auto
|
64
|
+
# ext.hive_partitioning_require_partition_filter = true
|
65
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
66
|
+
# end
|
67
|
+
#
|
68
|
+
# external_data.hive_partitioning? #=> true
|
69
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
70
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
71
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
72
|
+
#
|
73
|
+
class DataSource
|
74
|
+
##
|
75
|
+
# @private The Google API Client object.
|
76
|
+
attr_accessor :gapi
|
77
|
+
|
78
|
+
##
|
79
|
+
# @private Create an empty Table object.
|
80
|
+
def initialize
|
81
|
+
@gapi = Google::Apis::BigqueryV2::ExternalDataConfiguration.new
|
82
|
+
end
|
83
|
+
|
84
|
+
##
|
85
|
+
# The data format. For CSV files, specify "CSV". For Google sheets,
|
86
|
+
# specify "GOOGLE_SHEETS". For newline-delimited JSON, specify
|
87
|
+
# "NEWLINE_DELIMITED_JSON". For Avro files, specify "AVRO". For Google
|
88
|
+
# Cloud Datastore backups, specify "DATASTORE_BACKUP". [Beta] For
|
89
|
+
# Google Cloud Bigtable, specify "BIGTABLE".
|
90
|
+
#
|
91
|
+
# @return [String]
|
92
|
+
#
|
93
|
+
# @example
|
94
|
+
# require "google/cloud/bigquery"
|
95
|
+
#
|
96
|
+
# bigquery = Google::Cloud::Bigquery.new
|
97
|
+
#
|
98
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
99
|
+
# csv_table = bigquery.external csv_url
|
100
|
+
#
|
101
|
+
# csv_table.format #=> "CSV"
|
102
|
+
#
|
103
|
+
def format
|
104
|
+
@gapi.source_format
|
105
|
+
end
|
106
|
+
|
107
|
+
##
|
108
|
+
# Whether the data format is "CSV".
|
109
|
+
#
|
110
|
+
# @return [Boolean]
|
111
|
+
#
|
112
|
+
# @example
|
113
|
+
# require "google/cloud/bigquery"
|
114
|
+
#
|
115
|
+
# bigquery = Google::Cloud::Bigquery.new
|
116
|
+
#
|
117
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
118
|
+
# csv_table = bigquery.external csv_url
|
119
|
+
#
|
120
|
+
# csv_table.format #=> "CSV"
|
121
|
+
# csv_table.csv? #=> true
|
122
|
+
#
|
123
|
+
def csv?
|
124
|
+
@gapi.source_format == "CSV"
|
125
|
+
end
|
126
|
+
|
127
|
+
##
|
128
|
+
# Whether the data format is "NEWLINE_DELIMITED_JSON".
|
129
|
+
#
|
130
|
+
# @return [Boolean]
|
131
|
+
#
|
132
|
+
# @example
|
133
|
+
# require "google/cloud/bigquery"
|
134
|
+
#
|
135
|
+
# bigquery = Google::Cloud::Bigquery.new
|
136
|
+
#
|
137
|
+
# json_url = "gs://bucket/path/to/data.json"
|
138
|
+
# json_table = bigquery.external json_url
|
139
|
+
#
|
140
|
+
# json_table.format #=> "NEWLINE_DELIMITED_JSON"
|
141
|
+
# json_table.json? #=> true
|
142
|
+
#
|
143
|
+
def json?
|
144
|
+
@gapi.source_format == "NEWLINE_DELIMITED_JSON"
|
145
|
+
end
|
146
|
+
|
147
|
+
##
|
148
|
+
# Whether the data format is "GOOGLE_SHEETS".
|
149
|
+
#
|
150
|
+
# @return [Boolean]
|
151
|
+
#
|
152
|
+
# @example
|
153
|
+
# require "google/cloud/bigquery"
|
154
|
+
#
|
155
|
+
# bigquery = Google::Cloud::Bigquery.new
|
156
|
+
#
|
157
|
+
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
158
|
+
# sheets_table = bigquery.external sheets_url
|
159
|
+
#
|
160
|
+
# sheets_table.format #=> "GOOGLE_SHEETS"
|
161
|
+
# sheets_table.sheets? #=> true
|
162
|
+
#
|
163
|
+
def sheets?
|
164
|
+
@gapi.source_format == "GOOGLE_SHEETS"
|
165
|
+
end
|
166
|
+
|
167
|
+
##
|
168
|
+
# Whether the data format is "AVRO".
|
169
|
+
#
|
170
|
+
# @return [Boolean]
|
171
|
+
#
|
172
|
+
# @example
|
173
|
+
# require "google/cloud/bigquery"
|
174
|
+
#
|
175
|
+
# bigquery = Google::Cloud::Bigquery.new
|
176
|
+
#
|
177
|
+
# avro_url = "gs://bucket/path/to/*.avro"
|
178
|
+
# avro_table = bigquery.external avro_url
|
179
|
+
#
|
180
|
+
# avro_table.format #=> "AVRO"
|
181
|
+
# avro_table.avro? #=> true
|
182
|
+
#
|
183
|
+
def avro?
|
184
|
+
@gapi.source_format == "AVRO"
|
185
|
+
end
|
186
|
+
|
187
|
+
##
|
188
|
+
# Whether the data format is "DATASTORE_BACKUP".
|
189
|
+
#
|
190
|
+
# @return [Boolean]
|
191
|
+
#
|
192
|
+
# @example
|
193
|
+
# require "google/cloud/bigquery"
|
194
|
+
#
|
195
|
+
# bigquery = Google::Cloud::Bigquery.new
|
196
|
+
#
|
197
|
+
# backup_url = "gs://bucket/path/to/data.backup_info"
|
198
|
+
# backup_table = bigquery.external backup_url
|
199
|
+
#
|
200
|
+
# backup_table.format #=> "DATASTORE_BACKUP"
|
201
|
+
# backup_table.backup? #=> true
|
202
|
+
#
|
203
|
+
def backup?
|
204
|
+
@gapi.source_format == "DATASTORE_BACKUP"
|
205
|
+
end
|
206
|
+
|
207
|
+
##
|
208
|
+
# Whether the data format is "BIGTABLE".
|
209
|
+
#
|
210
|
+
# @return [Boolean]
|
211
|
+
#
|
212
|
+
# @example
|
213
|
+
# require "google/cloud/bigquery"
|
214
|
+
#
|
215
|
+
# bigquery = Google::Cloud::Bigquery.new
|
216
|
+
#
|
217
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
218
|
+
# bigtable_table = bigquery.external bigtable_url
|
219
|
+
#
|
220
|
+
# bigtable_table.format #=> "BIGTABLE"
|
221
|
+
# bigtable_table.bigtable? #=> true
|
222
|
+
#
|
223
|
+
def bigtable?
|
224
|
+
@gapi.source_format == "BIGTABLE"
|
225
|
+
end
|
226
|
+
|
227
|
+
##
|
228
|
+
# Whether the data format is "ORC".
|
229
|
+
#
|
230
|
+
# @return [Boolean]
|
231
|
+
#
|
232
|
+
# @example
|
233
|
+
# require "google/cloud/bigquery"
|
234
|
+
#
|
235
|
+
# bigquery = Google::Cloud::Bigquery.new
|
236
|
+
#
|
237
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
238
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
239
|
+
# external_data = bigquery.external gcs_uri, format: :orc do |ext|
|
240
|
+
# ext.hive_partitioning_mode = :auto
|
241
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
242
|
+
# end
|
243
|
+
# external_data.format #=> "ORC"
|
244
|
+
# external_data.orc? #=> true
|
245
|
+
#
|
246
|
+
def orc?
|
247
|
+
@gapi.source_format == "ORC"
|
248
|
+
end
|
249
|
+
|
250
|
+
##
|
251
|
+
# Whether the data format is "PARQUET".
|
252
|
+
#
|
253
|
+
# @return [Boolean]
|
254
|
+
#
|
255
|
+
# @example
|
256
|
+
# require "google/cloud/bigquery"
|
257
|
+
#
|
258
|
+
# bigquery = Google::Cloud::Bigquery.new
|
259
|
+
#
|
260
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
261
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
262
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
263
|
+
# ext.hive_partitioning_mode = :auto
|
264
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
265
|
+
# end
|
266
|
+
# external_data.format #=> "PARQUET"
|
267
|
+
# external_data.parquet? #=> true
|
268
|
+
#
|
269
|
+
def parquet?
|
270
|
+
@gapi.source_format == "PARQUET"
|
271
|
+
end
|
272
|
+
|
273
|
+
##
|
274
|
+
# The fully-qualified URIs that point to your data in Google Cloud.
|
275
|
+
# For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
|
276
|
+
# character and it must come after the 'bucket' name. Size limits
|
277
|
+
# related to load jobs apply to external data sources. For Google
|
278
|
+
# Cloud Bigtable URIs: Exactly one URI can be specified and it has be
|
279
|
+
# a fully specified and valid HTTPS URL for a Google Cloud Bigtable
|
280
|
+
# table. For Google Cloud Datastore backups, exactly one URI can be
|
281
|
+
# specified, and it must end with '.backup_info'. Also, the '*'
|
282
|
+
# wildcard character is not allowed.
|
283
|
+
#
|
284
|
+
# @return [Array<String>]
|
285
|
+
#
|
286
|
+
# @example
|
287
|
+
# require "google/cloud/bigquery"
|
288
|
+
#
|
289
|
+
# bigquery = Google::Cloud::Bigquery.new
|
290
|
+
#
|
291
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
292
|
+
# csv_table = bigquery.external csv_url
|
293
|
+
#
|
294
|
+
# csv_table.urls #=> ["gs://bucket/path/to/data.csv"]
|
295
|
+
#
|
296
|
+
def urls
|
297
|
+
@gapi.source_uris
|
298
|
+
end
|
299
|
+
|
300
|
+
##
|
301
|
+
# Indicates if the schema and format options are detected
|
302
|
+
# automatically.
|
303
|
+
#
|
304
|
+
# @return [Boolean]
|
305
|
+
#
|
306
|
+
# @example
|
307
|
+
# require "google/cloud/bigquery"
|
308
|
+
#
|
309
|
+
# bigquery = Google::Cloud::Bigquery.new
|
310
|
+
#
|
311
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
312
|
+
# csv_table = bigquery.external csv_url do |csv|
|
313
|
+
# csv.autodetect = true
|
314
|
+
# end
|
315
|
+
#
|
316
|
+
# csv_table.autodetect #=> true
|
317
|
+
#
|
318
|
+
def autodetect
|
319
|
+
@gapi.autodetect
|
320
|
+
end
|
321
|
+
|
322
|
+
##
|
323
|
+
# Set whether to detect schema and format options automatically. Any
|
324
|
+
# option specified explicitly will be honored.
|
325
|
+
#
|
326
|
+
# @param [Boolean] new_autodetect New autodetect value
|
327
|
+
#
|
328
|
+
# @example
|
329
|
+
# require "google/cloud/bigquery"
|
330
|
+
#
|
331
|
+
# bigquery = Google::Cloud::Bigquery.new
|
332
|
+
#
|
333
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
334
|
+
# csv_table = bigquery.external csv_url do |csv|
|
335
|
+
# csv.autodetect = true
|
336
|
+
# end
|
337
|
+
#
|
338
|
+
# csv_table.autodetect #=> true
|
339
|
+
#
|
340
|
+
def autodetect= new_autodetect
|
341
|
+
frozen_check!
|
342
|
+
@gapi.autodetect = new_autodetect
|
343
|
+
end
|
344
|
+
|
345
|
+
##
|
346
|
+
# The compression type of the data source. Possible values include
|
347
|
+
# `"GZIP"` and `nil`. The default value is `nil`. This setting is
|
348
|
+
# ignored for Google Cloud Bigtable, Google Cloud Datastore backups
|
349
|
+
# and Avro formats. Optional.
|
350
|
+
#
|
351
|
+
# @return [String]
|
352
|
+
#
|
353
|
+
# @example
|
354
|
+
# require "google/cloud/bigquery"
|
355
|
+
#
|
356
|
+
# bigquery = Google::Cloud::Bigquery.new
|
357
|
+
#
|
358
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
359
|
+
# csv_table = bigquery.external csv_url do |csv|
|
360
|
+
# csv.compression = "GZIP"
|
361
|
+
# end
|
362
|
+
#
|
363
|
+
# csv_table.compression #=> "GZIP"
|
364
|
+
def compression
|
365
|
+
@gapi.compression
|
366
|
+
end
|
367
|
+
|
368
|
+
##
|
369
|
+
# Set the compression type of the data source. Possible values include
|
370
|
+
# `"GZIP"` and `nil`. The default value is `nil`. This setting is
|
371
|
+
# ignored for Google Cloud Bigtable, Google Cloud Datastore backups
|
372
|
+
# and Avro formats. Optional.
|
373
|
+
#
|
374
|
+
# @param [String] new_compression New compression value
|
375
|
+
#
|
376
|
+
# @example
|
377
|
+
# require "google/cloud/bigquery"
|
378
|
+
#
|
379
|
+
# bigquery = Google::Cloud::Bigquery.new
|
380
|
+
#
|
381
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
382
|
+
# csv_table = bigquery.external csv_url do |csv|
|
383
|
+
# csv.compression = "GZIP"
|
384
|
+
# end
|
385
|
+
#
|
386
|
+
# csv_table.compression #=> "GZIP"
|
387
|
+
#
|
388
|
+
def compression= new_compression
|
389
|
+
frozen_check!
|
390
|
+
@gapi.compression = new_compression
|
391
|
+
end
|
392
|
+
|
393
|
+
##
|
394
|
+
# Indicates if BigQuery should allow extra values that are not
|
395
|
+
# represented in the table schema. If `true`, the extra values are
|
396
|
+
# ignored. If `false`, records with extra columns are treated as bad
|
397
|
+
# records, and if there are too many bad records, an invalid error is
|
398
|
+
# returned in the job result. The default value is `false`.
|
399
|
+
#
|
400
|
+
# BigQuery treats trailing columns as an extra in `CSV`, named values
|
401
|
+
# that don't match any column names in `JSON`. This setting is ignored
|
402
|
+
# for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
|
403
|
+
# formats. Optional.
|
404
|
+
#
|
405
|
+
# @return [Boolean]
|
406
|
+
#
|
407
|
+
# @example
|
408
|
+
# require "google/cloud/bigquery"
|
409
|
+
#
|
410
|
+
# bigquery = Google::Cloud::Bigquery.new
|
411
|
+
#
|
412
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
413
|
+
# csv_table = bigquery.external csv_url do |csv|
|
414
|
+
# csv.ignore_unknown = true
|
415
|
+
# end
|
416
|
+
#
|
417
|
+
# csv_table.ignore_unknown #=> true
|
418
|
+
#
|
419
|
+
def ignore_unknown
|
420
|
+
@gapi.ignore_unknown_values
|
421
|
+
end
|
422
|
+
|
423
|
+
##
|
424
|
+
# Set whether BigQuery should allow extra values that are not
|
425
|
+
# represented in the table schema. If `true`, the extra values are
|
426
|
+
# ignored. If `false`, records with extra columns are treated as bad
|
427
|
+
# records, and if there are too many bad records, an invalid error is
|
428
|
+
# returned in the job result. The default value is `false`.
|
429
|
+
#
|
430
|
+
# BigQuery treats trailing columns as an extra in `CSV`, named values
|
431
|
+
# that don't match any column names in `JSON`. This setting is ignored
|
432
|
+
# for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
|
433
|
+
# formats. Optional.
|
434
|
+
#
|
435
|
+
# @param [Boolean] new_ignore_unknown New ignore_unknown value
|
436
|
+
#
|
437
|
+
# @example
|
438
|
+
# require "google/cloud/bigquery"
|
439
|
+
#
|
440
|
+
# bigquery = Google::Cloud::Bigquery.new
|
441
|
+
#
|
442
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
443
|
+
# csv_table = bigquery.external csv_url do |csv|
|
444
|
+
# csv.ignore_unknown = true
|
445
|
+
# end
|
446
|
+
#
|
447
|
+
# csv_table.ignore_unknown #=> true
|
448
|
+
#
|
449
|
+
def ignore_unknown= new_ignore_unknown
|
450
|
+
frozen_check!
|
451
|
+
@gapi.ignore_unknown_values = new_ignore_unknown
|
452
|
+
end
|
453
|
+
|
454
|
+
##
|
455
|
+
# The maximum number of bad records that BigQuery can ignore when
|
456
|
+
# reading data. If the number of bad records exceeds this value, an
|
457
|
+
# invalid error is returned in the job result. The default value is 0,
|
458
|
+
# which requires that all records are valid. This setting is ignored
|
459
|
+
# for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
|
460
|
+
# formats.
|
461
|
+
#
|
462
|
+
# @return [Integer]
|
463
|
+
#
|
464
|
+
# @example
|
465
|
+
# require "google/cloud/bigquery"
|
466
|
+
#
|
467
|
+
# bigquery = Google::Cloud::Bigquery.new
|
468
|
+
#
|
469
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
470
|
+
# csv_table = bigquery.external csv_url do |csv|
|
471
|
+
# csv.max_bad_records = 10
|
472
|
+
# end
|
473
|
+
#
|
474
|
+
# csv_table.max_bad_records #=> 10
|
475
|
+
#
|
476
|
+
def max_bad_records
|
477
|
+
@gapi.max_bad_records
|
478
|
+
end
|
479
|
+
|
480
|
+
##
|
481
|
+
# Set the maximum number of bad records that BigQuery can ignore when
|
482
|
+
# reading data. If the number of bad records exceeds this value, an
|
483
|
+
# invalid error is returned in the job result. The default value is 0,
|
484
|
+
# which requires that all records are valid. This setting is ignored
|
485
|
+
# for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
|
486
|
+
# formats.
|
487
|
+
#
|
488
|
+
# @param [Integer] new_max_bad_records New max_bad_records value
|
489
|
+
#
|
490
|
+
# @example
|
491
|
+
# require "google/cloud/bigquery"
|
492
|
+
#
|
493
|
+
# bigquery = Google::Cloud::Bigquery.new
|
494
|
+
#
|
495
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
496
|
+
# csv_table = bigquery.external csv_url do |csv|
|
497
|
+
# csv.max_bad_records = 10
|
498
|
+
# end
|
499
|
+
#
|
500
|
+
# csv_table.max_bad_records #=> 10
|
501
|
+
#
|
502
|
+
def max_bad_records= new_max_bad_records
|
503
|
+
frozen_check!
|
504
|
+
@gapi.max_bad_records = new_max_bad_records
|
505
|
+
end
|
506
|
+
|
507
|
+
###
|
508
|
+
# Checks if hive partitioning options are set.
|
509
|
+
#
|
510
|
+
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
511
|
+
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
512
|
+
# If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
|
513
|
+
# Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
|
514
|
+
#
|
515
|
+
# @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
|
516
|
+
#
|
517
|
+
# @example
|
518
|
+
# require "google/cloud/bigquery"
|
519
|
+
#
|
520
|
+
# bigquery = Google::Cloud::Bigquery.new
|
521
|
+
#
|
522
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
523
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
524
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
525
|
+
# ext.hive_partitioning_mode = :auto
|
526
|
+
# ext.hive_partitioning_require_partition_filter = true
|
527
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
528
|
+
# end
|
529
|
+
#
|
530
|
+
# external_data.hive_partitioning? #=> true
|
531
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
532
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
533
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
534
|
+
#
|
535
|
+
def hive_partitioning?
|
536
|
+
!@gapi.hive_partitioning_options.nil?
|
537
|
+
end
|
538
|
+
|
539
|
+
###
|
540
|
+
# The mode of hive partitioning to use when reading data. The following modes are supported:
|
541
|
+
#
|
542
|
+
# 1. `AUTO`: automatically infer partition key name(s) and type(s).
|
543
|
+
# 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
|
544
|
+
# 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
|
545
|
+
#
|
546
|
+
# @return [String, nil] The mode of hive partitioning, or `nil` if not set.
|
547
|
+
#
|
548
|
+
# @example
|
549
|
+
# require "google/cloud/bigquery"
|
550
|
+
#
|
551
|
+
# bigquery = Google::Cloud::Bigquery.new
|
552
|
+
#
|
553
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
554
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
555
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
556
|
+
# ext.hive_partitioning_mode = :auto
|
557
|
+
# ext.hive_partitioning_require_partition_filter = true
|
558
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
559
|
+
# end
|
560
|
+
#
|
561
|
+
# external_data.hive_partitioning? #=> true
|
562
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
563
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
564
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
565
|
+
#
|
566
|
+
def hive_partitioning_mode
|
567
|
+
@gapi.hive_partitioning_options.mode if hive_partitioning?
|
568
|
+
end
|
569
|
+
|
570
|
+
##
|
571
|
+
# Sets the mode of hive partitioning to use when reading data. The following modes are supported:
|
572
|
+
#
|
573
|
+
# 1. `auto`: automatically infer partition key name(s) and type(s).
|
574
|
+
# 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
|
575
|
+
# 3. `custom`: partition key schema is encoded in the source URI prefix.
|
576
|
+
#
|
577
|
+
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
578
|
+
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
579
|
+
# If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
|
580
|
+
# Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
|
581
|
+
#
|
582
|
+
# See {#format}, {#hive_partitioning_require_partition_filter=} and {#hive_partitioning_source_uri_prefix=}.
|
583
|
+
#
|
584
|
+
# @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
|
585
|
+
#
|
586
|
+
# @example
|
587
|
+
# require "google/cloud/bigquery"
|
588
|
+
#
|
589
|
+
# bigquery = Google::Cloud::Bigquery.new
|
590
|
+
#
|
591
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
592
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
593
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
594
|
+
# ext.hive_partitioning_mode = :auto
|
595
|
+
# ext.hive_partitioning_require_partition_filter = true
|
596
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
597
|
+
# end
|
598
|
+
#
|
599
|
+
# external_data.hive_partitioning? #=> true
|
600
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
601
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
602
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
603
|
+
#
|
604
|
+
def hive_partitioning_mode= mode
|
605
|
+
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
606
|
+
@gapi.hive_partitioning_options.mode = mode.to_s.upcase
|
607
|
+
end
|
608
|
+
|
609
|
+
###
|
610
|
+
# Whether queries over the table using this external data source require a partition filter that can be used
|
611
|
+
# for partition elimination to be specified. Note that this field should only be true when creating a
|
612
|
+
# permanent external table or querying a temporary external table.
|
613
|
+
#
|
614
|
+
# @return [Boolean] `true` when queries over this table require a partition filter, or `false` otherwise.
|
615
|
+
#
|
616
|
+
# @example
|
617
|
+
# require "google/cloud/bigquery"
|
618
|
+
#
|
619
|
+
# bigquery = Google::Cloud::Bigquery.new
|
620
|
+
#
|
621
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
622
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
623
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
624
|
+
# ext.hive_partitioning_mode = :auto
|
625
|
+
# ext.hive_partitioning_require_partition_filter = true
|
626
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
627
|
+
# end
|
628
|
+
#
|
629
|
+
# external_data.hive_partitioning? #=> true
|
630
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
631
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
632
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
633
|
+
#
|
634
|
+
def hive_partitioning_require_partition_filter?
|
635
|
+
return false unless hive_partitioning?
|
636
|
+
!@gapi.hive_partitioning_options.require_partition_filter.nil?
|
637
|
+
end
|
638
|
+
|
639
|
+
##
|
640
|
+
# Sets whether queries over the table using this external data source require a partition filter
|
641
|
+
# that can be used for partition elimination to be specified.
|
642
|
+
#
|
643
|
+
# See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_source_uri_prefix=}.
|
644
|
+
#
|
645
|
+
# @param [Boolean] require_partition_filter `true` if a partition filter must be specified, `false` otherwise.
|
646
|
+
#
|
647
|
+
# @example
|
648
|
+
# require "google/cloud/bigquery"
|
649
|
+
#
|
650
|
+
# bigquery = Google::Cloud::Bigquery.new
|
651
|
+
#
|
652
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
653
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
654
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
655
|
+
# ext.hive_partitioning_mode = :auto
|
656
|
+
# ext.hive_partitioning_require_partition_filter = true
|
657
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
658
|
+
# end
|
659
|
+
#
|
660
|
+
# external_data.hive_partitioning? #=> true
|
661
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
662
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
663
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
664
|
+
#
|
665
|
+
def hive_partitioning_require_partition_filter= require_partition_filter
|
666
|
+
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
667
|
+
@gapi.hive_partitioning_options.require_partition_filter = require_partition_filter
|
668
|
+
end
|
669
|
+
|
670
|
+
###
|
671
|
+
# The common prefix for all source uris when hive partition detection is requested. The prefix must end
|
672
|
+
# immediately before the partition key encoding begins. For example, consider files following this data
|
673
|
+
# layout:
|
674
|
+
#
|
675
|
+
# ```
|
676
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
677
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
678
|
+
# ```
|
679
|
+
#
|
680
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
681
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
682
|
+
#
|
683
|
+
# @return [String, nil] The common prefix for all source uris, or `nil` if not set.
|
684
|
+
#
|
685
|
+
# @example
|
686
|
+
# require "google/cloud/bigquery"
|
687
|
+
#
|
688
|
+
# bigquery = Google::Cloud::Bigquery.new
|
689
|
+
#
|
690
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
691
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
692
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
693
|
+
# ext.hive_partitioning_mode = :auto
|
694
|
+
# ext.hive_partitioning_require_partition_filter = true
|
695
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
696
|
+
# end
|
697
|
+
#
|
698
|
+
# external_data.hive_partitioning? #=> true
|
699
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
700
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
701
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
702
|
+
#
|
703
|
+
def hive_partitioning_source_uri_prefix
|
704
|
+
@gapi.hive_partitioning_options.source_uri_prefix if hive_partitioning?
|
705
|
+
end
|
706
|
+
|
707
|
+
##
|
708
|
+
# Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
|
709
|
+
# immediately before the partition key encoding begins. For example, consider files following this data
|
710
|
+
# layout:
|
711
|
+
#
|
712
|
+
# ```
|
713
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
714
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
715
|
+
# ```
|
716
|
+
#
|
717
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
718
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
719
|
+
#
|
720
|
+
# See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_require_partition_filter=}.
|
721
|
+
#
|
722
|
+
# @param [String] source_uri_prefix The common prefix for all source uris.
|
723
|
+
#
|
724
|
+
# @example
|
725
|
+
# require "google/cloud/bigquery"
|
726
|
+
#
|
727
|
+
# bigquery = Google::Cloud::Bigquery.new
|
728
|
+
#
|
729
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
730
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
731
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
732
|
+
# ext.hive_partitioning_mode = :auto
|
733
|
+
# ext.hive_partitioning_require_partition_filter = true
|
734
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
735
|
+
# end
|
736
|
+
#
|
737
|
+
# external_data.hive_partitioning? #=> true
|
738
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
739
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
740
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
741
|
+
#
|
742
|
+
def hive_partitioning_source_uri_prefix= source_uri_prefix
|
743
|
+
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
744
|
+
@gapi.hive_partitioning_options.source_uri_prefix = source_uri_prefix
|
745
|
+
end
|
746
|
+
|
747
|
+
##
|
748
|
+
# @private Google API Client object.
|
749
|
+
def to_gapi
|
750
|
+
@gapi
|
751
|
+
end
|
752
|
+
|
753
|
+
##
|
754
|
+
# @private Google API Client object.
|
755
|
+
def self.from_gapi gapi
|
756
|
+
new_table = new
|
757
|
+
new_table.instance_variable_set :@gapi, gapi
|
758
|
+
new_table
|
759
|
+
end
|
760
|
+
|
761
|
+
protected
|
762
|
+
|
763
|
+
def frozen_check!
|
764
|
+
return unless frozen?
|
765
|
+
raise ArgumentError, "Cannot modify external data source when frozen"
|
766
|
+
end
|
767
|
+
end
|
768
|
+
end
|
769
|
+
end
|
770
|
+
end
|
771
|
+
end
|