google-cloud-bigquery 1.12.0 → 1.38.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +9 -28
  3. data/CHANGELOG.md +372 -1
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +2 -2
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +154 -170
  10. data/lib/google/cloud/bigquery/copy_job.rb +40 -23
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +322 -51
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset.rb +960 -279
  16. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  17. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  20. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  21. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  22. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  23. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  24. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  25. data/lib/google/cloud/bigquery/external.rb +50 -2256
  26. data/lib/google/cloud/bigquery/extract_job.rb +217 -58
  27. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  28. data/lib/google/cloud/bigquery/job/list.rb +13 -20
  29. data/lib/google/cloud/bigquery/job.rb +286 -11
  30. data/lib/google/cloud/bigquery/load_job.rb +801 -133
  31. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  32. data/lib/google/cloud/bigquery/model.rb +247 -16
  33. data/lib/google/cloud/bigquery/policy.rb +432 -0
  34. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  35. data/lib/google/cloud/bigquery/project.rb +526 -243
  36. data/lib/google/cloud/bigquery/query_job.rb +584 -125
  37. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  38. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  39. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  40. data/lib/google/cloud/bigquery/schema.rb +221 -48
  41. data/lib/google/cloud/bigquery/service.rb +186 -109
  42. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  43. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -42
  44. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  45. data/lib/google/cloud/bigquery/table.rb +1188 -326
  46. data/lib/google/cloud/bigquery/time.rb +6 -0
  47. data/lib/google/cloud/bigquery/version.rb +1 -1
  48. data/lib/google/cloud/bigquery.rb +18 -8
  49. data/lib/google-cloud-bigquery.rb +15 -13
  50. metadata +67 -40
@@ -0,0 +1,771 @@
1
+ # Copyright 2021 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/apis/bigquery_v2"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ module External
22
+ ##
23
+ # # DataSource
24
+ #
25
+ # External::DataSource and its subclasses represents an external data
26
+ # source that can be queried from directly, even though the data is not
27
+ # stored in BigQuery. Instead of loading or streaming the data, this
28
+ # object references the external data source.
29
+ #
30
+ # The AVRO and Datastore Backup formats use {External::DataSource}. See
31
+ # {External::CsvSource}, {External::JsonSource},
32
+ # {External::SheetsSource}, {External::BigtableSource} for the other
33
+ # formats.
34
+ #
35
+ # @example
36
+ # require "google/cloud/bigquery"
37
+ #
38
+ # bigquery = Google::Cloud::Bigquery.new
39
+ #
40
+ # avro_url = "gs://bucket/path/to/*.avro"
41
+ # avro_table = bigquery.external avro_url do |avro|
42
+ # avro.autodetect = true
43
+ # end
44
+ #
45
+ # data = bigquery.query "SELECT * FROM my_ext_table",
46
+ # external: { my_ext_table: avro_table }
47
+ #
48
+ # # Iterate over the first page of results
49
+ # data.each do |row|
50
+ # puts row[:name]
51
+ # end
52
+ # # Retrieve the next page of results
53
+ # data = data.next if data.next?
54
+ #
55
+ # @example Hive partitioning options:
56
+ # require "google/cloud/bigquery"
57
+ #
58
+ # bigquery = Google::Cloud::Bigquery.new
59
+ #
60
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
61
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
62
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
63
+ # ext.hive_partitioning_mode = :auto
64
+ # ext.hive_partitioning_require_partition_filter = true
65
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
66
+ # end
67
+ #
68
+ # external_data.hive_partitioning? #=> true
69
+ # external_data.hive_partitioning_mode #=> "AUTO"
70
+ # external_data.hive_partitioning_require_partition_filter? #=> true
71
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
72
+ #
73
+ class DataSource
74
+ ##
75
+ # @private The Google API Client object.
76
+ attr_accessor :gapi
77
+
78
+ ##
79
+ # @private Create an empty Table object.
80
+ def initialize
81
+ @gapi = Google::Apis::BigqueryV2::ExternalDataConfiguration.new
82
+ end
83
+
84
+ ##
85
+ # The data format. For CSV files, specify "CSV". For Google sheets,
86
+ # specify "GOOGLE_SHEETS". For newline-delimited JSON, specify
87
+ # "NEWLINE_DELIMITED_JSON". For Avro files, specify "AVRO". For Google
88
+ # Cloud Datastore backups, specify "DATASTORE_BACKUP". [Beta] For
89
+ # Google Cloud Bigtable, specify "BIGTABLE".
90
+ #
91
+ # @return [String]
92
+ #
93
+ # @example
94
+ # require "google/cloud/bigquery"
95
+ #
96
+ # bigquery = Google::Cloud::Bigquery.new
97
+ #
98
+ # csv_url = "gs://bucket/path/to/data.csv"
99
+ # csv_table = bigquery.external csv_url
100
+ #
101
+ # csv_table.format #=> "CSV"
102
+ #
103
+ def format
104
+ @gapi.source_format
105
+ end
106
+
107
+ ##
108
+ # Whether the data format is "CSV".
109
+ #
110
+ # @return [Boolean]
111
+ #
112
+ # @example
113
+ # require "google/cloud/bigquery"
114
+ #
115
+ # bigquery = Google::Cloud::Bigquery.new
116
+ #
117
+ # csv_url = "gs://bucket/path/to/data.csv"
118
+ # csv_table = bigquery.external csv_url
119
+ #
120
+ # csv_table.format #=> "CSV"
121
+ # csv_table.csv? #=> true
122
+ #
123
+ def csv?
124
+ @gapi.source_format == "CSV"
125
+ end
126
+
127
+ ##
128
+ # Whether the data format is "NEWLINE_DELIMITED_JSON".
129
+ #
130
+ # @return [Boolean]
131
+ #
132
+ # @example
133
+ # require "google/cloud/bigquery"
134
+ #
135
+ # bigquery = Google::Cloud::Bigquery.new
136
+ #
137
+ # json_url = "gs://bucket/path/to/data.json"
138
+ # json_table = bigquery.external json_url
139
+ #
140
+ # json_table.format #=> "NEWLINE_DELIMITED_JSON"
141
+ # json_table.json? #=> true
142
+ #
143
+ def json?
144
+ @gapi.source_format == "NEWLINE_DELIMITED_JSON"
145
+ end
146
+
147
+ ##
148
+ # Whether the data format is "GOOGLE_SHEETS".
149
+ #
150
+ # @return [Boolean]
151
+ #
152
+ # @example
153
+ # require "google/cloud/bigquery"
154
+ #
155
+ # bigquery = Google::Cloud::Bigquery.new
156
+ #
157
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
158
+ # sheets_table = bigquery.external sheets_url
159
+ #
160
+ # sheets_table.format #=> "GOOGLE_SHEETS"
161
+ # sheets_table.sheets? #=> true
162
+ #
163
+ def sheets?
164
+ @gapi.source_format == "GOOGLE_SHEETS"
165
+ end
166
+
167
+ ##
168
+ # Whether the data format is "AVRO".
169
+ #
170
+ # @return [Boolean]
171
+ #
172
+ # @example
173
+ # require "google/cloud/bigquery"
174
+ #
175
+ # bigquery = Google::Cloud::Bigquery.new
176
+ #
177
+ # avro_url = "gs://bucket/path/to/*.avro"
178
+ # avro_table = bigquery.external avro_url
179
+ #
180
+ # avro_table.format #=> "AVRO"
181
+ # avro_table.avro? #=> true
182
+ #
183
+ def avro?
184
+ @gapi.source_format == "AVRO"
185
+ end
186
+
187
+ ##
188
+ # Whether the data format is "DATASTORE_BACKUP".
189
+ #
190
+ # @return [Boolean]
191
+ #
192
+ # @example
193
+ # require "google/cloud/bigquery"
194
+ #
195
+ # bigquery = Google::Cloud::Bigquery.new
196
+ #
197
+ # backup_url = "gs://bucket/path/to/data.backup_info"
198
+ # backup_table = bigquery.external backup_url
199
+ #
200
+ # backup_table.format #=> "DATASTORE_BACKUP"
201
+ # backup_table.backup? #=> true
202
+ #
203
+ def backup?
204
+ @gapi.source_format == "DATASTORE_BACKUP"
205
+ end
206
+
207
+ ##
208
+ # Whether the data format is "BIGTABLE".
209
+ #
210
+ # @return [Boolean]
211
+ #
212
+ # @example
213
+ # require "google/cloud/bigquery"
214
+ #
215
+ # bigquery = Google::Cloud::Bigquery.new
216
+ #
217
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
218
+ # bigtable_table = bigquery.external bigtable_url
219
+ #
220
+ # bigtable_table.format #=> "BIGTABLE"
221
+ # bigtable_table.bigtable? #=> true
222
+ #
223
+ def bigtable?
224
+ @gapi.source_format == "BIGTABLE"
225
+ end
226
+
227
+ ##
228
+ # Whether the data format is "ORC".
229
+ #
230
+ # @return [Boolean]
231
+ #
232
+ # @example
233
+ # require "google/cloud/bigquery"
234
+ #
235
+ # bigquery = Google::Cloud::Bigquery.new
236
+ #
237
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
238
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
239
+ # external_data = bigquery.external gcs_uri, format: :orc do |ext|
240
+ # ext.hive_partitioning_mode = :auto
241
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
242
+ # end
243
+ # external_data.format #=> "ORC"
244
+ # external_data.orc? #=> true
245
+ #
246
+ def orc?
247
+ @gapi.source_format == "ORC"
248
+ end
249
+
250
+ ##
251
+ # Whether the data format is "PARQUET".
252
+ #
253
+ # @return [Boolean]
254
+ #
255
+ # @example
256
+ # require "google/cloud/bigquery"
257
+ #
258
+ # bigquery = Google::Cloud::Bigquery.new
259
+ #
260
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
261
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
262
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
263
+ # ext.hive_partitioning_mode = :auto
264
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
265
+ # end
266
+ # external_data.format #=> "PARQUET"
267
+ # external_data.parquet? #=> true
268
+ #
269
+ def parquet?
270
+ @gapi.source_format == "PARQUET"
271
+ end
272
+
273
+ ##
274
+ # The fully-qualified URIs that point to your data in Google Cloud.
275
+ # For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
276
+ # character and it must come after the 'bucket' name. Size limits
277
+ # related to load jobs apply to external data sources. For Google
278
+ # Cloud Bigtable URIs: Exactly one URI can be specified and it has be
279
+ # a fully specified and valid HTTPS URL for a Google Cloud Bigtable
280
+ # table. For Google Cloud Datastore backups, exactly one URI can be
281
+ # specified, and it must end with '.backup_info'. Also, the '*'
282
+ # wildcard character is not allowed.
283
+ #
284
+ # @return [Array<String>]
285
+ #
286
+ # @example
287
+ # require "google/cloud/bigquery"
288
+ #
289
+ # bigquery = Google::Cloud::Bigquery.new
290
+ #
291
+ # csv_url = "gs://bucket/path/to/data.csv"
292
+ # csv_table = bigquery.external csv_url
293
+ #
294
+ # csv_table.urls #=> ["gs://bucket/path/to/data.csv"]
295
+ #
296
+ def urls
297
+ @gapi.source_uris
298
+ end
299
+
300
+ ##
301
+ # Indicates if the schema and format options are detected
302
+ # automatically.
303
+ #
304
+ # @return [Boolean]
305
+ #
306
+ # @example
307
+ # require "google/cloud/bigquery"
308
+ #
309
+ # bigquery = Google::Cloud::Bigquery.new
310
+ #
311
+ # csv_url = "gs://bucket/path/to/data.csv"
312
+ # csv_table = bigquery.external csv_url do |csv|
313
+ # csv.autodetect = true
314
+ # end
315
+ #
316
+ # csv_table.autodetect #=> true
317
+ #
318
+ def autodetect
319
+ @gapi.autodetect
320
+ end
321
+
322
+ ##
323
+ # Set whether to detect schema and format options automatically. Any
324
+ # option specified explicitly will be honored.
325
+ #
326
+ # @param [Boolean] new_autodetect New autodetect value
327
+ #
328
+ # @example
329
+ # require "google/cloud/bigquery"
330
+ #
331
+ # bigquery = Google::Cloud::Bigquery.new
332
+ #
333
+ # csv_url = "gs://bucket/path/to/data.csv"
334
+ # csv_table = bigquery.external csv_url do |csv|
335
+ # csv.autodetect = true
336
+ # end
337
+ #
338
+ # csv_table.autodetect #=> true
339
+ #
340
+ def autodetect= new_autodetect
341
+ frozen_check!
342
+ @gapi.autodetect = new_autodetect
343
+ end
344
+
345
+ ##
346
+ # The compression type of the data source. Possible values include
347
+ # `"GZIP"` and `nil`. The default value is `nil`. This setting is
348
+ # ignored for Google Cloud Bigtable, Google Cloud Datastore backups
349
+ # and Avro formats. Optional.
350
+ #
351
+ # @return [String]
352
+ #
353
+ # @example
354
+ # require "google/cloud/bigquery"
355
+ #
356
+ # bigquery = Google::Cloud::Bigquery.new
357
+ #
358
+ # csv_url = "gs://bucket/path/to/data.csv"
359
+ # csv_table = bigquery.external csv_url do |csv|
360
+ # csv.compression = "GZIP"
361
+ # end
362
+ #
363
+ # csv_table.compression #=> "GZIP"
364
+ def compression
365
+ @gapi.compression
366
+ end
367
+
368
+ ##
369
+ # Set the compression type of the data source. Possible values include
370
+ # `"GZIP"` and `nil`. The default value is `nil`. This setting is
371
+ # ignored for Google Cloud Bigtable, Google Cloud Datastore backups
372
+ # and Avro formats. Optional.
373
+ #
374
+ # @param [String] new_compression New compression value
375
+ #
376
+ # @example
377
+ # require "google/cloud/bigquery"
378
+ #
379
+ # bigquery = Google::Cloud::Bigquery.new
380
+ #
381
+ # csv_url = "gs://bucket/path/to/data.csv"
382
+ # csv_table = bigquery.external csv_url do |csv|
383
+ # csv.compression = "GZIP"
384
+ # end
385
+ #
386
+ # csv_table.compression #=> "GZIP"
387
+ #
388
+ def compression= new_compression
389
+ frozen_check!
390
+ @gapi.compression = new_compression
391
+ end
392
+
393
+ ##
394
+ # Indicates if BigQuery should allow extra values that are not
395
+ # represented in the table schema. If `true`, the extra values are
396
+ # ignored. If `false`, records with extra columns are treated as bad
397
+ # records, and if there are too many bad records, an invalid error is
398
+ # returned in the job result. The default value is `false`.
399
+ #
400
+ # BigQuery treats trailing columns as an extra in `CSV`, named values
401
+ # that don't match any column names in `JSON`. This setting is ignored
402
+ # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
403
+ # formats. Optional.
404
+ #
405
+ # @return [Boolean]
406
+ #
407
+ # @example
408
+ # require "google/cloud/bigquery"
409
+ #
410
+ # bigquery = Google::Cloud::Bigquery.new
411
+ #
412
+ # csv_url = "gs://bucket/path/to/data.csv"
413
+ # csv_table = bigquery.external csv_url do |csv|
414
+ # csv.ignore_unknown = true
415
+ # end
416
+ #
417
+ # csv_table.ignore_unknown #=> true
418
+ #
419
+ def ignore_unknown
420
+ @gapi.ignore_unknown_values
421
+ end
422
+
423
+ ##
424
+ # Set whether BigQuery should allow extra values that are not
425
+ # represented in the table schema. If `true`, the extra values are
426
+ # ignored. If `false`, records with extra columns are treated as bad
427
+ # records, and if there are too many bad records, an invalid error is
428
+ # returned in the job result. The default value is `false`.
429
+ #
430
+ # BigQuery treats trailing columns as an extra in `CSV`, named values
431
+ # that don't match any column names in `JSON`. This setting is ignored
432
+ # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
433
+ # formats. Optional.
434
+ #
435
+ # @param [Boolean] new_ignore_unknown New ignore_unknown value
436
+ #
437
+ # @example
438
+ # require "google/cloud/bigquery"
439
+ #
440
+ # bigquery = Google::Cloud::Bigquery.new
441
+ #
442
+ # csv_url = "gs://bucket/path/to/data.csv"
443
+ # csv_table = bigquery.external csv_url do |csv|
444
+ # csv.ignore_unknown = true
445
+ # end
446
+ #
447
+ # csv_table.ignore_unknown #=> true
448
+ #
449
+ def ignore_unknown= new_ignore_unknown
450
+ frozen_check!
451
+ @gapi.ignore_unknown_values = new_ignore_unknown
452
+ end
453
+
454
+ ##
455
+ # The maximum number of bad records that BigQuery can ignore when
456
+ # reading data. If the number of bad records exceeds this value, an
457
+ # invalid error is returned in the job result. The default value is 0,
458
+ # which requires that all records are valid. This setting is ignored
459
+ # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
460
+ # formats.
461
+ #
462
+ # @return [Integer]
463
+ #
464
+ # @example
465
+ # require "google/cloud/bigquery"
466
+ #
467
+ # bigquery = Google::Cloud::Bigquery.new
468
+ #
469
+ # csv_url = "gs://bucket/path/to/data.csv"
470
+ # csv_table = bigquery.external csv_url do |csv|
471
+ # csv.max_bad_records = 10
472
+ # end
473
+ #
474
+ # csv_table.max_bad_records #=> 10
475
+ #
476
+ def max_bad_records
477
+ @gapi.max_bad_records
478
+ end
479
+
480
+ ##
481
+ # Set the maximum number of bad records that BigQuery can ignore when
482
+ # reading data. If the number of bad records exceeds this value, an
483
+ # invalid error is returned in the job result. The default value is 0,
484
+ # which requires that all records are valid. This setting is ignored
485
+ # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
486
+ # formats.
487
+ #
488
+ # @param [Integer] new_max_bad_records New max_bad_records value
489
+ #
490
+ # @example
491
+ # require "google/cloud/bigquery"
492
+ #
493
+ # bigquery = Google::Cloud::Bigquery.new
494
+ #
495
+ # csv_url = "gs://bucket/path/to/data.csv"
496
+ # csv_table = bigquery.external csv_url do |csv|
497
+ # csv.max_bad_records = 10
498
+ # end
499
+ #
500
+ # csv_table.max_bad_records #=> 10
501
+ #
502
+ def max_bad_records= new_max_bad_records
503
+ frozen_check!
504
+ @gapi.max_bad_records = new_max_bad_records
505
+ end
506
+
507
+ ###
508
+ # Checks if hive partitioning options are set.
509
+ #
510
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
511
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
512
+ # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
513
+ # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
514
+ #
515
+ # @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
516
+ #
517
+ # @example
518
+ # require "google/cloud/bigquery"
519
+ #
520
+ # bigquery = Google::Cloud::Bigquery.new
521
+ #
522
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
523
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
524
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
525
+ # ext.hive_partitioning_mode = :auto
526
+ # ext.hive_partitioning_require_partition_filter = true
527
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
528
+ # end
529
+ #
530
+ # external_data.hive_partitioning? #=> true
531
+ # external_data.hive_partitioning_mode #=> "AUTO"
532
+ # external_data.hive_partitioning_require_partition_filter? #=> true
533
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
534
+ #
535
+ def hive_partitioning?
536
+ !@gapi.hive_partitioning_options.nil?
537
+ end
538
+
539
+ ###
540
+ # The mode of hive partitioning to use when reading data. The following modes are supported:
541
+ #
542
+ # 1. `AUTO`: automatically infer partition key name(s) and type(s).
543
+ # 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
544
+ # 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
545
+ #
546
+ # @return [String, nil] The mode of hive partitioning, or `nil` if not set.
547
+ #
548
+ # @example
549
+ # require "google/cloud/bigquery"
550
+ #
551
+ # bigquery = Google::Cloud::Bigquery.new
552
+ #
553
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
554
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
555
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
556
+ # ext.hive_partitioning_mode = :auto
557
+ # ext.hive_partitioning_require_partition_filter = true
558
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
559
+ # end
560
+ #
561
+ # external_data.hive_partitioning? #=> true
562
+ # external_data.hive_partitioning_mode #=> "AUTO"
563
+ # external_data.hive_partitioning_require_partition_filter? #=> true
564
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
565
+ #
566
+ def hive_partitioning_mode
567
+ @gapi.hive_partitioning_options.mode if hive_partitioning?
568
+ end
569
+
570
+ ##
571
+ # Sets the mode of hive partitioning to use when reading data. The following modes are supported:
572
+ #
573
+ # 1. `auto`: automatically infer partition key name(s) and type(s).
574
+ # 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
575
+ # 3. `custom`: partition key schema is encoded in the source URI prefix.
576
+ #
577
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
578
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
579
+ # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
580
+ # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
581
+ #
582
+ # See {#format}, {#hive_partitioning_require_partition_filter=} and {#hive_partitioning_source_uri_prefix=}.
583
+ #
584
+ # @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
585
+ #
586
+ # @example
587
+ # require "google/cloud/bigquery"
588
+ #
589
+ # bigquery = Google::Cloud::Bigquery.new
590
+ #
591
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
592
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
593
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
594
+ # ext.hive_partitioning_mode = :auto
595
+ # ext.hive_partitioning_require_partition_filter = true
596
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
597
+ # end
598
+ #
599
+ # external_data.hive_partitioning? #=> true
600
+ # external_data.hive_partitioning_mode #=> "AUTO"
601
+ # external_data.hive_partitioning_require_partition_filter? #=> true
602
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
603
+ #
604
+ def hive_partitioning_mode= mode
605
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
606
+ @gapi.hive_partitioning_options.mode = mode.to_s.upcase
607
+ end
608
+
609
+ ###
610
+ # Whether queries over the table using this external data source require a partition filter that can be used
611
+ # for partition elimination to be specified. Note that this field should only be true when creating a
612
+ # permanent external table or querying a temporary external table.
613
+ #
614
+ # @return [Boolean] `true` when queries over this table require a partition filter, or `false` otherwise.
615
+ #
616
+ # @example
617
+ # require "google/cloud/bigquery"
618
+ #
619
+ # bigquery = Google::Cloud::Bigquery.new
620
+ #
621
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
622
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
623
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
624
+ # ext.hive_partitioning_mode = :auto
625
+ # ext.hive_partitioning_require_partition_filter = true
626
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
627
+ # end
628
+ #
629
+ # external_data.hive_partitioning? #=> true
630
+ # external_data.hive_partitioning_mode #=> "AUTO"
631
+ # external_data.hive_partitioning_require_partition_filter? #=> true
632
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
633
+ #
634
+ def hive_partitioning_require_partition_filter?
635
+ return false unless hive_partitioning?
636
+ !@gapi.hive_partitioning_options.require_partition_filter.nil?
637
+ end
638
+
639
+ ##
640
+ # Sets whether queries over the table using this external data source require a partition filter
641
+ # that can be used for partition elimination to be specified.
642
+ #
643
+ # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_source_uri_prefix=}.
644
+ #
645
+ # @param [Boolean] require_partition_filter `true` if a partition filter must be specified, `false` otherwise.
646
+ #
647
+ # @example
648
+ # require "google/cloud/bigquery"
649
+ #
650
+ # bigquery = Google::Cloud::Bigquery.new
651
+ #
652
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
653
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
654
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
655
+ # ext.hive_partitioning_mode = :auto
656
+ # ext.hive_partitioning_require_partition_filter = true
657
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
658
+ # end
659
+ #
660
+ # external_data.hive_partitioning? #=> true
661
+ # external_data.hive_partitioning_mode #=> "AUTO"
662
+ # external_data.hive_partitioning_require_partition_filter? #=> true
663
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
664
+ #
665
+ def hive_partitioning_require_partition_filter= require_partition_filter
666
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
667
+ @gapi.hive_partitioning_options.require_partition_filter = require_partition_filter
668
+ end
669
+
670
+ ###
671
+ # The common prefix for all source uris when hive partition detection is requested. The prefix must end
672
+ # immediately before the partition key encoding begins. For example, consider files following this data
673
+ # layout:
674
+ #
675
+ # ```
676
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
677
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
678
+ # ```
679
+ #
680
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
681
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
682
+ #
683
+ # @return [String, nil] The common prefix for all source uris, or `nil` if not set.
684
+ #
685
+ # @example
686
+ # require "google/cloud/bigquery"
687
+ #
688
+ # bigquery = Google::Cloud::Bigquery.new
689
+ #
690
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
691
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
692
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
693
+ # ext.hive_partitioning_mode = :auto
694
+ # ext.hive_partitioning_require_partition_filter = true
695
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
696
+ # end
697
+ #
698
+ # external_data.hive_partitioning? #=> true
699
+ # external_data.hive_partitioning_mode #=> "AUTO"
700
+ # external_data.hive_partitioning_require_partition_filter? #=> true
701
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
702
+ #
703
+ def hive_partitioning_source_uri_prefix
704
+ @gapi.hive_partitioning_options.source_uri_prefix if hive_partitioning?
705
+ end
706
+
707
+ ##
708
+ # Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
709
+ # immediately before the partition key encoding begins. For example, consider files following this data
710
+ # layout:
711
+ #
712
+ # ```
713
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
714
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
715
+ # ```
716
+ #
717
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
718
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
719
+ #
720
+ # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_require_partition_filter=}.
721
+ #
722
+ # @param [String] source_uri_prefix The common prefix for all source uris.
723
+ #
724
+ # @example
725
+ # require "google/cloud/bigquery"
726
+ #
727
+ # bigquery = Google::Cloud::Bigquery.new
728
+ #
729
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
730
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
731
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
732
+ # ext.hive_partitioning_mode = :auto
733
+ # ext.hive_partitioning_require_partition_filter = true
734
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
735
+ # end
736
+ #
737
+ # external_data.hive_partitioning? #=> true
738
+ # external_data.hive_partitioning_mode #=> "AUTO"
739
+ # external_data.hive_partitioning_require_partition_filter? #=> true
740
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
741
+ #
742
+ def hive_partitioning_source_uri_prefix= source_uri_prefix
743
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
744
+ @gapi.hive_partitioning_options.source_uri_prefix = source_uri_prefix
745
+ end
746
+
747
+ ##
748
+ # @private Google API Client object.
749
+ def to_gapi
750
+ @gapi
751
+ end
752
+
753
+ ##
754
+ # @private Google API Client object.
755
+ def self.from_gapi gapi
756
+ new_table = new
757
+ new_table.instance_variable_set :@gapi, gapi
758
+ new_table
759
+ end
760
+
761
+ protected
762
+
763
+ def frozen_check!
764
+ return unless frozen?
765
+ raise ArgumentError, "Cannot modify external data source when frozen"
766
+ end
767
+ end
768
+ end
769
+ end
770
+ end
771
+ end