google-cloud-bigquery 1.14.0 → 1.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +17 -54
  3. data/CHANGELOG.md +377 -0
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +1 -1
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +155 -173
  10. data/lib/google/cloud/bigquery/copy_job.rb +74 -26
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
  16. data/lib/google/cloud/bigquery/dataset.rb +1044 -287
  17. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  20. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  21. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  22. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  23. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  24. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  25. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  26. data/lib/google/cloud/bigquery/external.rb +50 -2256
  27. data/lib/google/cloud/bigquery/extract_job.rb +226 -61
  28. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  29. data/lib/google/cloud/bigquery/job/list.rb +10 -14
  30. data/lib/google/cloud/bigquery/job.rb +289 -14
  31. data/lib/google/cloud/bigquery/load_job.rb +810 -136
  32. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  33. data/lib/google/cloud/bigquery/model.rb +247 -16
  34. data/lib/google/cloud/bigquery/policy.rb +432 -0
  35. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  36. data/lib/google/cloud/bigquery/project.rb +509 -250
  37. data/lib/google/cloud/bigquery/query_job.rb +594 -128
  38. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  39. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  40. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  41. data/lib/google/cloud/bigquery/schema.rb +221 -48
  42. data/lib/google/cloud/bigquery/service.rb +204 -112
  43. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  44. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
  45. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  46. data/lib/google/cloud/bigquery/table.rb +1470 -377
  47. data/lib/google/cloud/bigquery/time.rb +6 -0
  48. data/lib/google/cloud/bigquery/version.rb +1 -1
  49. data/lib/google/cloud/bigquery.rb +4 -6
  50. data/lib/google-cloud-bigquery.rb +14 -13
  51. metadata +66 -38
@@ -0,0 +1,771 @@
1
+ # Copyright 2021 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/apis/bigquery_v2"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ module External
22
+ ##
23
+ # # DataSource
24
+ #
25
+ # External::DataSource and its subclasses represents an external data
26
+ # source that can be queried from directly, even though the data is not
27
+ # stored in BigQuery. Instead of loading or streaming the data, this
28
+ # object references the external data source.
29
+ #
30
+ # The AVRO and Datastore Backup formats use {External::DataSource}. See
31
+ # {External::CsvSource}, {External::JsonSource},
32
+ # {External::SheetsSource}, {External::BigtableSource} for the other
33
+ # formats.
34
+ #
35
+ # @example
36
+ # require "google/cloud/bigquery"
37
+ #
38
+ # bigquery = Google::Cloud::Bigquery.new
39
+ #
40
+ # avro_url = "gs://bucket/path/to/*.avro"
41
+ # avro_table = bigquery.external avro_url do |avro|
42
+ # avro.autodetect = true
43
+ # end
44
+ #
45
+ # data = bigquery.query "SELECT * FROM my_ext_table",
46
+ # external: { my_ext_table: avro_table }
47
+ #
48
+ # # Iterate over the first page of results
49
+ # data.each do |row|
50
+ # puts row[:name]
51
+ # end
52
+ # # Retrieve the next page of results
53
+ # data = data.next if data.next?
54
+ #
55
+ # @example Hive partitioning options:
56
+ # require "google/cloud/bigquery"
57
+ #
58
+ # bigquery = Google::Cloud::Bigquery.new
59
+ #
60
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
61
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
62
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
63
+ # ext.hive_partitioning_mode = :auto
64
+ # ext.hive_partitioning_require_partition_filter = true
65
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
66
+ # end
67
+ #
68
+ # external_data.hive_partitioning? #=> true
69
+ # external_data.hive_partitioning_mode #=> "AUTO"
70
+ # external_data.hive_partitioning_require_partition_filter? #=> true
71
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
72
+ #
73
+ class DataSource
74
+ ##
75
+ # @private The Google API Client object.
76
+ attr_accessor :gapi
77
+
78
+ ##
79
+ # @private Create an empty Table object.
80
+ def initialize
81
+ @gapi = Google::Apis::BigqueryV2::ExternalDataConfiguration.new
82
+ end
83
+
84
+ ##
85
+ # The data format. For CSV files, specify "CSV". For Google sheets,
86
+ # specify "GOOGLE_SHEETS". For newline-delimited JSON, specify
87
+ # "NEWLINE_DELIMITED_JSON". For Avro files, specify "AVRO". For Google
88
+ # Cloud Datastore backups, specify "DATASTORE_BACKUP". [Beta] For
89
+ # Google Cloud Bigtable, specify "BIGTABLE".
90
+ #
91
+ # @return [String]
92
+ #
93
+ # @example
94
+ # require "google/cloud/bigquery"
95
+ #
96
+ # bigquery = Google::Cloud::Bigquery.new
97
+ #
98
+ # csv_url = "gs://bucket/path/to/data.csv"
99
+ # csv_table = bigquery.external csv_url
100
+ #
101
+ # csv_table.format #=> "CSV"
102
+ #
103
+ def format
104
+ @gapi.source_format
105
+ end
106
+
107
+ ##
108
+ # Whether the data format is "CSV".
109
+ #
110
+ # @return [Boolean]
111
+ #
112
+ # @example
113
+ # require "google/cloud/bigquery"
114
+ #
115
+ # bigquery = Google::Cloud::Bigquery.new
116
+ #
117
+ # csv_url = "gs://bucket/path/to/data.csv"
118
+ # csv_table = bigquery.external csv_url
119
+ #
120
+ # csv_table.format #=> "CSV"
121
+ # csv_table.csv? #=> true
122
+ #
123
+ def csv?
124
+ @gapi.source_format == "CSV"
125
+ end
126
+
127
+ ##
128
+ # Whether the data format is "NEWLINE_DELIMITED_JSON".
129
+ #
130
+ # @return [Boolean]
131
+ #
132
+ # @example
133
+ # require "google/cloud/bigquery"
134
+ #
135
+ # bigquery = Google::Cloud::Bigquery.new
136
+ #
137
+ # json_url = "gs://bucket/path/to/data.json"
138
+ # json_table = bigquery.external json_url
139
+ #
140
+ # json_table.format #=> "NEWLINE_DELIMITED_JSON"
141
+ # json_table.json? #=> true
142
+ #
143
+ def json?
144
+ @gapi.source_format == "NEWLINE_DELIMITED_JSON"
145
+ end
146
+
147
+ ##
148
+ # Whether the data format is "GOOGLE_SHEETS".
149
+ #
150
+ # @return [Boolean]
151
+ #
152
+ # @example
153
+ # require "google/cloud/bigquery"
154
+ #
155
+ # bigquery = Google::Cloud::Bigquery.new
156
+ #
157
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
158
+ # sheets_table = bigquery.external sheets_url
159
+ #
160
+ # sheets_table.format #=> "GOOGLE_SHEETS"
161
+ # sheets_table.sheets? #=> true
162
+ #
163
+ def sheets?
164
+ @gapi.source_format == "GOOGLE_SHEETS"
165
+ end
166
+
167
+ ##
168
+ # Whether the data format is "AVRO".
169
+ #
170
+ # @return [Boolean]
171
+ #
172
+ # @example
173
+ # require "google/cloud/bigquery"
174
+ #
175
+ # bigquery = Google::Cloud::Bigquery.new
176
+ #
177
+ # avro_url = "gs://bucket/path/to/*.avro"
178
+ # avro_table = bigquery.external avro_url
179
+ #
180
+ # avro_table.format #=> "AVRO"
181
+ # avro_table.avro? #=> true
182
+ #
183
+ def avro?
184
+ @gapi.source_format == "AVRO"
185
+ end
186
+
187
+ ##
188
+ # Whether the data format is "DATASTORE_BACKUP".
189
+ #
190
+ # @return [Boolean]
191
+ #
192
+ # @example
193
+ # require "google/cloud/bigquery"
194
+ #
195
+ # bigquery = Google::Cloud::Bigquery.new
196
+ #
197
+ # backup_url = "gs://bucket/path/to/data.backup_info"
198
+ # backup_table = bigquery.external backup_url
199
+ #
200
+ # backup_table.format #=> "DATASTORE_BACKUP"
201
+ # backup_table.backup? #=> true
202
+ #
203
+ def backup?
204
+ @gapi.source_format == "DATASTORE_BACKUP"
205
+ end
206
+
207
+ ##
208
+ # Whether the data format is "BIGTABLE".
209
+ #
210
+ # @return [Boolean]
211
+ #
212
+ # @example
213
+ # require "google/cloud/bigquery"
214
+ #
215
+ # bigquery = Google::Cloud::Bigquery.new
216
+ #
217
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
218
+ # bigtable_table = bigquery.external bigtable_url
219
+ #
220
+ # bigtable_table.format #=> "BIGTABLE"
221
+ # bigtable_table.bigtable? #=> true
222
+ #
223
+ def bigtable?
224
+ @gapi.source_format == "BIGTABLE"
225
+ end
226
+
227
+ ##
228
+ # Whether the data format is "ORC".
229
+ #
230
+ # @return [Boolean]
231
+ #
232
+ # @example
233
+ # require "google/cloud/bigquery"
234
+ #
235
+ # bigquery = Google::Cloud::Bigquery.new
236
+ #
237
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
238
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
239
+ # external_data = bigquery.external gcs_uri, format: :orc do |ext|
240
+ # ext.hive_partitioning_mode = :auto
241
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
242
+ # end
243
+ # external_data.format #=> "ORC"
244
+ # external_data.orc? #=> true
245
+ #
246
+ def orc?
247
+ @gapi.source_format == "ORC"
248
+ end
249
+
250
+ ##
251
+ # Whether the data format is "PARQUET".
252
+ #
253
+ # @return [Boolean]
254
+ #
255
+ # @example
256
+ # require "google/cloud/bigquery"
257
+ #
258
+ # bigquery = Google::Cloud::Bigquery.new
259
+ #
260
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
261
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
262
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
263
+ # ext.hive_partitioning_mode = :auto
264
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
265
+ # end
266
+ # external_data.format #=> "PARQUET"
267
+ # external_data.parquet? #=> true
268
+ #
269
+ def parquet?
270
+ @gapi.source_format == "PARQUET"
271
+ end
272
+
273
+ ##
274
+ # The fully-qualified URIs that point to your data in Google Cloud.
275
+ # For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
276
+ # character and it must come after the 'bucket' name. Size limits
277
+ # related to load jobs apply to external data sources. For Google
278
+ # Cloud Bigtable URIs: Exactly one URI can be specified and it has be
279
+ # a fully specified and valid HTTPS URL for a Google Cloud Bigtable
280
+ # table. For Google Cloud Datastore backups, exactly one URI can be
281
+ # specified, and it must end with '.backup_info'. Also, the '*'
282
+ # wildcard character is not allowed.
283
+ #
284
+ # @return [Array<String>]
285
+ #
286
+ # @example
287
+ # require "google/cloud/bigquery"
288
+ #
289
+ # bigquery = Google::Cloud::Bigquery.new
290
+ #
291
+ # csv_url = "gs://bucket/path/to/data.csv"
292
+ # csv_table = bigquery.external csv_url
293
+ #
294
+ # csv_table.urls #=> ["gs://bucket/path/to/data.csv"]
295
+ #
296
+ def urls
297
+ @gapi.source_uris
298
+ end
299
+
300
+ ##
301
+ # Indicates if the schema and format options are detected
302
+ # automatically.
303
+ #
304
+ # @return [Boolean]
305
+ #
306
+ # @example
307
+ # require "google/cloud/bigquery"
308
+ #
309
+ # bigquery = Google::Cloud::Bigquery.new
310
+ #
311
+ # csv_url = "gs://bucket/path/to/data.csv"
312
+ # csv_table = bigquery.external csv_url do |csv|
313
+ # csv.autodetect = true
314
+ # end
315
+ #
316
+ # csv_table.autodetect #=> true
317
+ #
318
+ def autodetect
319
+ @gapi.autodetect
320
+ end
321
+
322
+ ##
323
+ # Set whether to detect schema and format options automatically. Any
324
+ # option specified explicitly will be honored.
325
+ #
326
+ # @param [Boolean] new_autodetect New autodetect value
327
+ #
328
+ # @example
329
+ # require "google/cloud/bigquery"
330
+ #
331
+ # bigquery = Google::Cloud::Bigquery.new
332
+ #
333
+ # csv_url = "gs://bucket/path/to/data.csv"
334
+ # csv_table = bigquery.external csv_url do |csv|
335
+ # csv.autodetect = true
336
+ # end
337
+ #
338
+ # csv_table.autodetect #=> true
339
+ #
340
+ def autodetect= new_autodetect
341
+ frozen_check!
342
+ @gapi.autodetect = new_autodetect
343
+ end
344
+
345
+ ##
346
+ # The compression type of the data source. Possible values include
347
+ # `"GZIP"` and `nil`. The default value is `nil`. This setting is
348
+ # ignored for Google Cloud Bigtable, Google Cloud Datastore backups
349
+ # and Avro formats. Optional.
350
+ #
351
+ # @return [String]
352
+ #
353
+ # @example
354
+ # require "google/cloud/bigquery"
355
+ #
356
+ # bigquery = Google::Cloud::Bigquery.new
357
+ #
358
+ # csv_url = "gs://bucket/path/to/data.csv"
359
+ # csv_table = bigquery.external csv_url do |csv|
360
+ # csv.compression = "GZIP"
361
+ # end
362
+ #
363
+ # csv_table.compression #=> "GZIP"
364
+ def compression
365
+ @gapi.compression
366
+ end
367
+
368
+ ##
369
+ # Set the compression type of the data source. Possible values include
370
+ # `"GZIP"` and `nil`. The default value is `nil`. This setting is
371
+ # ignored for Google Cloud Bigtable, Google Cloud Datastore backups
372
+ # and Avro formats. Optional.
373
+ #
374
+ # @param [String] new_compression New compression value
375
+ #
376
+ # @example
377
+ # require "google/cloud/bigquery"
378
+ #
379
+ # bigquery = Google::Cloud::Bigquery.new
380
+ #
381
+ # csv_url = "gs://bucket/path/to/data.csv"
382
+ # csv_table = bigquery.external csv_url do |csv|
383
+ # csv.compression = "GZIP"
384
+ # end
385
+ #
386
+ # csv_table.compression #=> "GZIP"
387
+ #
388
+ def compression= new_compression
389
+ frozen_check!
390
+ @gapi.compression = new_compression
391
+ end
392
+
393
+ ##
394
+ # Indicates if BigQuery should allow extra values that are not
395
+ # represented in the table schema. If `true`, the extra values are
396
+ # ignored. If `false`, records with extra columns are treated as bad
397
+ # records, and if there are too many bad records, an invalid error is
398
+ # returned in the job result. The default value is `false`.
399
+ #
400
+ # BigQuery treats trailing columns as an extra in `CSV`, named values
401
+ # that don't match any column names in `JSON`. This setting is ignored
402
+ # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
403
+ # formats. Optional.
404
+ #
405
+ # @return [Boolean]
406
+ #
407
+ # @example
408
+ # require "google/cloud/bigquery"
409
+ #
410
+ # bigquery = Google::Cloud::Bigquery.new
411
+ #
412
+ # csv_url = "gs://bucket/path/to/data.csv"
413
+ # csv_table = bigquery.external csv_url do |csv|
414
+ # csv.ignore_unknown = true
415
+ # end
416
+ #
417
+ # csv_table.ignore_unknown #=> true
418
+ #
419
+ def ignore_unknown
420
+ @gapi.ignore_unknown_values
421
+ end
422
+
423
+ ##
424
+ # Set whether BigQuery should allow extra values that are not
425
+ # represented in the table schema. If `true`, the extra values are
426
+ # ignored. If `false`, records with extra columns are treated as bad
427
+ # records, and if there are too many bad records, an invalid error is
428
+ # returned in the job result. The default value is `false`.
429
+ #
430
+ # BigQuery treats trailing columns as an extra in `CSV`, named values
431
+ # that don't match any column names in `JSON`. This setting is ignored
432
+ # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
433
+ # formats. Optional.
434
+ #
435
+ # @param [Boolean] new_ignore_unknown New ignore_unknown value
436
+ #
437
+ # @example
438
+ # require "google/cloud/bigquery"
439
+ #
440
+ # bigquery = Google::Cloud::Bigquery.new
441
+ #
442
+ # csv_url = "gs://bucket/path/to/data.csv"
443
+ # csv_table = bigquery.external csv_url do |csv|
444
+ # csv.ignore_unknown = true
445
+ # end
446
+ #
447
+ # csv_table.ignore_unknown #=> true
448
+ #
449
+ def ignore_unknown= new_ignore_unknown
450
+ frozen_check!
451
+ @gapi.ignore_unknown_values = new_ignore_unknown
452
+ end
453
+
454
+ ##
455
+ # The maximum number of bad records that BigQuery can ignore when
456
+ # reading data. If the number of bad records exceeds this value, an
457
+ # invalid error is returned in the job result. The default value is 0,
458
+ # which requires that all records are valid. This setting is ignored
459
+ # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
460
+ # formats.
461
+ #
462
+ # @return [Integer]
463
+ #
464
+ # @example
465
+ # require "google/cloud/bigquery"
466
+ #
467
+ # bigquery = Google::Cloud::Bigquery.new
468
+ #
469
+ # csv_url = "gs://bucket/path/to/data.csv"
470
+ # csv_table = bigquery.external csv_url do |csv|
471
+ # csv.max_bad_records = 10
472
+ # end
473
+ #
474
+ # csv_table.max_bad_records #=> 10
475
+ #
476
+ def max_bad_records
477
+ @gapi.max_bad_records
478
+ end
479
+
480
+ ##
481
+ # Set the maximum number of bad records that BigQuery can ignore when
482
+ # reading data. If the number of bad records exceeds this value, an
483
+ # invalid error is returned in the job result. The default value is 0,
484
+ # which requires that all records are valid. This setting is ignored
485
+ # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
486
+ # formats.
487
+ #
488
+ # @param [Integer] new_max_bad_records New max_bad_records value
489
+ #
490
+ # @example
491
+ # require "google/cloud/bigquery"
492
+ #
493
+ # bigquery = Google::Cloud::Bigquery.new
494
+ #
495
+ # csv_url = "gs://bucket/path/to/data.csv"
496
+ # csv_table = bigquery.external csv_url do |csv|
497
+ # csv.max_bad_records = 10
498
+ # end
499
+ #
500
+ # csv_table.max_bad_records #=> 10
501
+ #
502
+ def max_bad_records= new_max_bad_records
503
+ frozen_check!
504
+ @gapi.max_bad_records = new_max_bad_records
505
+ end
506
+
507
+ ###
508
+ # Checks if hive partitioning options are set.
509
+ #
510
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
511
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
512
+ # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
513
+ # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
514
+ #
515
+ # @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
516
+ #
517
+ # @example
518
+ # require "google/cloud/bigquery"
519
+ #
520
+ # bigquery = Google::Cloud::Bigquery.new
521
+ #
522
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
523
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
524
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
525
+ # ext.hive_partitioning_mode = :auto
526
+ # ext.hive_partitioning_require_partition_filter = true
527
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
528
+ # end
529
+ #
530
+ # external_data.hive_partitioning? #=> true
531
+ # external_data.hive_partitioning_mode #=> "AUTO"
532
+ # external_data.hive_partitioning_require_partition_filter? #=> true
533
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
534
+ #
535
+ def hive_partitioning?
536
+ !@gapi.hive_partitioning_options.nil?
537
+ end
538
+
539
+ ###
540
+ # The mode of hive partitioning to use when reading data. The following modes are supported:
541
+ #
542
+ # 1. `AUTO`: automatically infer partition key name(s) and type(s).
543
+ # 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
544
+ # 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
545
+ #
546
+ # @return [String, nil] The mode of hive partitioning, or `nil` if not set.
547
+ #
548
+ # @example
549
+ # require "google/cloud/bigquery"
550
+ #
551
+ # bigquery = Google::Cloud::Bigquery.new
552
+ #
553
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
554
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
555
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
556
+ # ext.hive_partitioning_mode = :auto
557
+ # ext.hive_partitioning_require_partition_filter = true
558
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
559
+ # end
560
+ #
561
+ # external_data.hive_partitioning? #=> true
562
+ # external_data.hive_partitioning_mode #=> "AUTO"
563
+ # external_data.hive_partitioning_require_partition_filter? #=> true
564
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
565
+ #
566
+ def hive_partitioning_mode
567
+ @gapi.hive_partitioning_options.mode if hive_partitioning?
568
+ end
569
+
570
+ ##
571
+ # Sets the mode of hive partitioning to use when reading data. The following modes are supported:
572
+ #
573
+ # 1. `auto`: automatically infer partition key name(s) and type(s).
574
+ # 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
575
+ # 3. `custom`: partition key schema is encoded in the source URI prefix.
576
+ #
577
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
578
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
579
+ # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
580
+ # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
581
+ #
582
+ # See {#format}, {#hive_partitioning_require_partition_filter=} and {#hive_partitioning_source_uri_prefix=}.
583
+ #
584
+ # @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
585
+ #
586
+ # @example
587
+ # require "google/cloud/bigquery"
588
+ #
589
+ # bigquery = Google::Cloud::Bigquery.new
590
+ #
591
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
592
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
593
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
594
+ # ext.hive_partitioning_mode = :auto
595
+ # ext.hive_partitioning_require_partition_filter = true
596
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
597
+ # end
598
+ #
599
+ # external_data.hive_partitioning? #=> true
600
+ # external_data.hive_partitioning_mode #=> "AUTO"
601
+ # external_data.hive_partitioning_require_partition_filter? #=> true
602
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
603
+ #
604
+ def hive_partitioning_mode= mode
605
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
606
+ @gapi.hive_partitioning_options.mode = mode.to_s.upcase
607
+ end
608
+
609
+ ###
610
+ # Whether queries over the table using this external data source require a partition filter that can be used
611
+ # for partition elimination to be specified. Note that this field should only be true when creating a
612
+ # permanent external table or querying a temporary external table.
613
+ #
614
+ # @return [Boolean] `true` when queries over this table require a partition filter, or `false` otherwise.
615
+ #
616
+ # @example
617
+ # require "google/cloud/bigquery"
618
+ #
619
+ # bigquery = Google::Cloud::Bigquery.new
620
+ #
621
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
622
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
623
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
624
+ # ext.hive_partitioning_mode = :auto
625
+ # ext.hive_partitioning_require_partition_filter = true
626
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
627
+ # end
628
+ #
629
+ # external_data.hive_partitioning? #=> true
630
+ # external_data.hive_partitioning_mode #=> "AUTO"
631
+ # external_data.hive_partitioning_require_partition_filter? #=> true
632
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
633
+ #
634
+ def hive_partitioning_require_partition_filter?
635
+ return false unless hive_partitioning?
636
+ !@gapi.hive_partitioning_options.require_partition_filter.nil?
637
+ end
638
+
639
+ ##
640
+ # Sets whether queries over the table using this external data source require a partition filter
641
+ # that can be used for partition elimination to be specified.
642
+ #
643
+ # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_source_uri_prefix=}.
644
+ #
645
+ # @param [Boolean] require_partition_filter `true` if a partition filter must be specified, `false` otherwise.
646
+ #
647
+ # @example
648
+ # require "google/cloud/bigquery"
649
+ #
650
+ # bigquery = Google::Cloud::Bigquery.new
651
+ #
652
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
653
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
654
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
655
+ # ext.hive_partitioning_mode = :auto
656
+ # ext.hive_partitioning_require_partition_filter = true
657
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
658
+ # end
659
+ #
660
+ # external_data.hive_partitioning? #=> true
661
+ # external_data.hive_partitioning_mode #=> "AUTO"
662
+ # external_data.hive_partitioning_require_partition_filter? #=> true
663
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
664
+ #
665
+ def hive_partitioning_require_partition_filter= require_partition_filter
666
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
667
+ @gapi.hive_partitioning_options.require_partition_filter = require_partition_filter
668
+ end
669
+
670
+ ###
671
+ # The common prefix for all source uris when hive partition detection is requested. The prefix must end
672
+ # immediately before the partition key encoding begins. For example, consider files following this data
673
+ # layout:
674
+ #
675
+ # ```
676
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
677
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
678
+ # ```
679
+ #
680
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
681
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
682
+ #
683
+ # @return [String, nil] The common prefix for all source uris, or `nil` if not set.
684
+ #
685
+ # @example
686
+ # require "google/cloud/bigquery"
687
+ #
688
+ # bigquery = Google::Cloud::Bigquery.new
689
+ #
690
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
691
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
692
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
693
+ # ext.hive_partitioning_mode = :auto
694
+ # ext.hive_partitioning_require_partition_filter = true
695
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
696
+ # end
697
+ #
698
+ # external_data.hive_partitioning? #=> true
699
+ # external_data.hive_partitioning_mode #=> "AUTO"
700
+ # external_data.hive_partitioning_require_partition_filter? #=> true
701
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
702
+ #
703
+ def hive_partitioning_source_uri_prefix
704
+ @gapi.hive_partitioning_options.source_uri_prefix if hive_partitioning?
705
+ end
706
+
707
+ ##
708
+ # Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
709
+ # immediately before the partition key encoding begins. For example, consider files following this data
710
+ # layout:
711
+ #
712
+ # ```
713
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
714
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
715
+ # ```
716
+ #
717
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
718
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
719
+ #
720
+ # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_require_partition_filter=}.
721
+ #
722
+ # @param [String] source_uri_prefix The common prefix for all source uris.
723
+ #
724
+ # @example
725
+ # require "google/cloud/bigquery"
726
+ #
727
+ # bigquery = Google::Cloud::Bigquery.new
728
+ #
729
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
730
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
731
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
732
+ # ext.hive_partitioning_mode = :auto
733
+ # ext.hive_partitioning_require_partition_filter = true
734
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
735
+ # end
736
+ #
737
+ # external_data.hive_partitioning? #=> true
738
+ # external_data.hive_partitioning_mode #=> "AUTO"
739
+ # external_data.hive_partitioning_require_partition_filter? #=> true
740
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
741
+ #
742
+ def hive_partitioning_source_uri_prefix= source_uri_prefix
743
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
744
+ @gapi.hive_partitioning_options.source_uri_prefix = source_uri_prefix
745
+ end
746
+
747
+ ##
748
+ # @private Google API Client object.
749
+ def to_gapi
750
+ @gapi
751
+ end
752
+
753
+ ##
754
+ # @private Google API Client object.
755
+ def self.from_gapi gapi
756
+ new_table = new
757
+ new_table.instance_variable_set :@gapi, gapi
758
+ new_table
759
+ end
760
+
761
+ protected
762
+
763
+ def frozen_check!
764
+ return unless frozen?
765
+ raise ArgumentError, "Cannot modify external data source when frozen"
766
+ end
767
+ end
768
+ end
769
+ end
770
+ end
771
+ end