google-cloud-bigquery 1.12.0 → 1.38.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +9 -28
  3. data/CHANGELOG.md +372 -1
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +2 -2
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +154 -170
  10. data/lib/google/cloud/bigquery/copy_job.rb +40 -23
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +322 -51
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset.rb +960 -279
  16. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  17. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  20. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  21. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  22. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  23. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  24. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  25. data/lib/google/cloud/bigquery/external.rb +50 -2256
  26. data/lib/google/cloud/bigquery/extract_job.rb +217 -58
  27. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  28. data/lib/google/cloud/bigquery/job/list.rb +13 -20
  29. data/lib/google/cloud/bigquery/job.rb +286 -11
  30. data/lib/google/cloud/bigquery/load_job.rb +801 -133
  31. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  32. data/lib/google/cloud/bigquery/model.rb +247 -16
  33. data/lib/google/cloud/bigquery/policy.rb +432 -0
  34. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  35. data/lib/google/cloud/bigquery/project.rb +526 -243
  36. data/lib/google/cloud/bigquery/query_job.rb +584 -125
  37. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  38. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  39. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  40. data/lib/google/cloud/bigquery/schema.rb +221 -48
  41. data/lib/google/cloud/bigquery/service.rb +186 -109
  42. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  43. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -42
  44. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  45. data/lib/google/cloud/bigquery/table.rb +1188 -326
  46. data/lib/google/cloud/bigquery/time.rb +6 -0
  47. data/lib/google/cloud/bigquery/version.rb +1 -1
  48. data/lib/google/cloud/bigquery.rb +18 -8
  49. data/lib/google-cloud-bigquery.rb +15 -13
  50. metadata +67 -40
@@ -13,8 +13,13 @@
13
13
  # limitations under the License.
14
14
 
15
15
 
16
- require "google/apis/bigquery_v2"
17
- require "base64"
16
+ require "google/cloud/bigquery/external/data_source"
17
+ require "google/cloud/bigquery/external/avro_source"
18
+ require "google/cloud/bigquery/external/bigtable_source"
19
+ require "google/cloud/bigquery/external/csv_source"
20
+ require "google/cloud/bigquery/external/json_source"
21
+ require "google/cloud/bigquery/external/parquet_source"
22
+ require "google/cloud/bigquery/external/sheets_source"
18
23
 
19
24
  module Google
20
25
  module Cloud
@@ -45,18 +50,37 @@ module Google
45
50
  # data = bigquery.query "SELECT * FROM my_ext_table",
46
51
  # external: { my_ext_table: csv_table }
47
52
  #
53
+ # # Iterate over the first page of results
48
54
  # data.each do |row|
49
55
  # puts row[:name]
50
56
  # end
57
+ # # Retrieve the next page of results
58
+ # data = data.next if data.next?
59
+ #
60
+ # @example Hive partitioning options:
61
+ # require "google/cloud/bigquery"
62
+ #
63
+ # bigquery = Google::Cloud::Bigquery.new
64
+ #
65
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
66
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
67
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
68
+ # ext.hive_partitioning_mode = :auto
69
+ # ext.hive_partitioning_require_partition_filter = true
70
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
71
+ # end
72
+ #
73
+ # external_data.hive_partitioning? #=> true
74
+ # external_data.hive_partitioning_mode #=> "AUTO"
75
+ # external_data.hive_partitioning_require_partition_filter? #=> true
76
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
51
77
  #
52
78
  module External
53
79
  ##
54
80
  # @private New External from URLs and format
55
81
  def self.from_urls urls, format = nil
56
82
  external_format = source_format_for urls, format
57
- if external_format.nil?
58
- raise ArgumentError, "Unable to determine external table format"
59
- end
83
+ raise ArgumentError, "Unable to determine external table format" if external_format.nil?
60
84
  external_class = table_class_for external_format
61
85
  external_class.new.tap do |e|
62
86
  e.gapi.source_uris = Array(urls)
@@ -69,9 +93,7 @@ module Google
69
93
  def self.from_gapi gapi
70
94
  external_format = source_format_for gapi.source_uris,
71
95
  gapi.source_format
72
- if external_format.nil?
73
- raise ArgumentError, "Unable to determine external table format"
74
- end
96
+ raise ArgumentError, "Unable to determine external table format" if external_format.nil?
75
97
  external_class = table_class_for external_format
76
98
  external_class.from_gapi gapi
77
99
  end
@@ -80,28 +102,28 @@ module Google
80
102
  # @private Determine source_format from inputs
81
103
  def self.source_format_for urls, format
82
104
  val = {
83
- "csv" => "CSV", "avro" => "AVRO",
84
- "json" => "NEWLINE_DELIMITED_JSON",
85
- "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
86
- "sheets" => "GOOGLE_SHEETS",
87
- "google_sheets" => "GOOGLE_SHEETS",
88
- "datastore" => "DATASTORE_BACKUP",
105
+ "avro" => "AVRO",
106
+ "bigtable" => "BIGTABLE",
107
+ "csv" => "CSV",
89
108
  "backup" => "DATASTORE_BACKUP",
109
+ "datastore" => "DATASTORE_BACKUP",
90
110
  "datastore_backup" => "DATASTORE_BACKUP",
91
- "bigtable" => "BIGTABLE"
111
+ "sheets" => "GOOGLE_SHEETS",
112
+ "google_sheets" => "GOOGLE_SHEETS",
113
+ "json" => "NEWLINE_DELIMITED_JSON",
114
+ "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
115
+ "orc" => "ORC",
116
+ "parquet" => "PARQUET"
92
117
  }[format.to_s.downcase]
93
118
  return val unless val.nil?
94
119
  Array(urls).each do |url|
95
- return "CSV" if url.end_with? ".csv"
96
- return "NEWLINE_DELIMITED_JSON" if url.end_with? ".json"
97
120
  return "AVRO" if url.end_with? ".avro"
121
+ return "BIGTABLE" if url.start_with? "https://googleapis.com/bigtable/projects/"
122
+ return "CSV" if url.end_with? ".csv"
98
123
  return "DATASTORE_BACKUP" if url.end_with? ".backup_info"
99
- if url.start_with? "https://docs.google.com/spreadsheets/"
100
- return "GOOGLE_SHEETS"
101
- end
102
- if url.start_with? "https://googleapis.com/bigtable/projects/"
103
- return "BIGTABLE"
104
- end
124
+ return "GOOGLE_SHEETS" if url.start_with? "https://docs.google.com/spreadsheets/"
125
+ return "NEWLINE_DELIMITED_JSON" if url.end_with? ".json"
126
+ return "PARQUET" if url.end_with? ".parquet"
105
127
  end
106
128
  nil
107
129
  end
@@ -110,2245 +132,17 @@ module Google
110
132
  # @private Determine table class from source_format
111
133
  def self.table_class_for format
112
134
  case format
135
+ when "AVRO" then External::AvroSource
136
+ when "BIGTABLE" then External::BigtableSource
113
137
  when "CSV" then External::CsvSource
114
- when "NEWLINE_DELIMITED_JSON" then External::JsonSource
115
138
  when "GOOGLE_SHEETS" then External::SheetsSource
116
- when "BIGTABLE" then External::BigtableSource
139
+ when "NEWLINE_DELIMITED_JSON" then External::JsonSource
140
+ when "PARQUET" then External::ParquetSource
117
141
  else
118
- # AVRO and DATASTORE_BACKUP
142
+ # DATASTORE_BACKUP, ORC
119
143
  External::DataSource
120
144
  end
121
145
  end
122
-
123
- ##
124
- # # DataSource
125
- #
126
- # External::DataSource and its subclasses represents an external data
127
- # source that can be queried from directly, even though the data is not
128
- # stored in BigQuery. Instead of loading or streaming the data, this
129
- # object references the external data source.
130
- #
131
- # The AVRO and Datastore Backup formats use {External::DataSource}. See
132
- # {External::CsvSource}, {External::JsonSource},
133
- # {External::SheetsSource}, {External::BigtableSource} for the other
134
- # formats.
135
- #
136
- # @example
137
- # require "google/cloud/bigquery"
138
- #
139
- # bigquery = Google::Cloud::Bigquery.new
140
- #
141
- # avro_url = "gs://bucket/path/to/data.avro"
142
- # avro_table = bigquery.external avro_url do |avro|
143
- # avro.autodetect = true
144
- # end
145
- #
146
- # data = bigquery.query "SELECT * FROM my_ext_table",
147
- # external: { my_ext_table: avro_table }
148
- #
149
- # data.each do |row|
150
- # puts row[:name]
151
- # end
152
- #
153
- class DataSource
154
- ##
155
- # @private The Google API Client object.
156
- attr_accessor :gapi
157
-
158
- ##
159
- # @private Create an empty Table object.
160
- def initialize
161
- @gapi = Google::Apis::BigqueryV2::ExternalDataConfiguration.new
162
- end
163
-
164
- ##
165
- # The data format. For CSV files, specify "CSV". For Google sheets,
166
- # specify "GOOGLE_SHEETS". For newline-delimited JSON, specify
167
- # "NEWLINE_DELIMITED_JSON". For Avro files, specify "AVRO". For Google
168
- # Cloud Datastore backups, specify "DATASTORE_BACKUP". [Beta] For
169
- # Google Cloud Bigtable, specify "BIGTABLE".
170
- #
171
- # @return [String]
172
- #
173
- # @example
174
- # require "google/cloud/bigquery"
175
- #
176
- # bigquery = Google::Cloud::Bigquery.new
177
- #
178
- # csv_url = "gs://bucket/path/to/data.csv"
179
- # csv_table = bigquery.external csv_url
180
- #
181
- # csv_table.format #=> "CSV"
182
- #
183
- def format
184
- @gapi.source_format
185
- end
186
-
187
- ##
188
- # Whether the data format is "CSV".
189
- #
190
- # @return [Boolean]
191
- #
192
- # @example
193
- # require "google/cloud/bigquery"
194
- #
195
- # bigquery = Google::Cloud::Bigquery.new
196
- #
197
- # csv_url = "gs://bucket/path/to/data.csv"
198
- # csv_table = bigquery.external csv_url
199
- #
200
- # csv_table.format #=> "CSV"
201
- # csv_table.csv? #=> true
202
- #
203
- def csv?
204
- @gapi.source_format == "CSV"
205
- end
206
-
207
- ##
208
- # Whether the data format is "NEWLINE_DELIMITED_JSON".
209
- #
210
- # @return [Boolean]
211
- #
212
- # @example
213
- # require "google/cloud/bigquery"
214
- #
215
- # bigquery = Google::Cloud::Bigquery.new
216
- #
217
- # json_url = "gs://bucket/path/to/data.json"
218
- # json_table = bigquery.external json_url
219
- #
220
- # json_table.format #=> "NEWLINE_DELIMITED_JSON"
221
- # json_table.json? #=> true
222
- #
223
- def json?
224
- @gapi.source_format == "NEWLINE_DELIMITED_JSON"
225
- end
226
-
227
- ##
228
- # Whether the data format is "GOOGLE_SHEETS".
229
- #
230
- # @return [Boolean]
231
- #
232
- # @example
233
- # require "google/cloud/bigquery"
234
- #
235
- # bigquery = Google::Cloud::Bigquery.new
236
- #
237
- # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
238
- # sheets_table = bigquery.external sheets_url
239
- #
240
- # sheets_table.format #=> "GOOGLE_SHEETS"
241
- # sheets_table.sheets? #=> true
242
- #
243
- def sheets?
244
- @gapi.source_format == "GOOGLE_SHEETS"
245
- end
246
-
247
- ##
248
- # Whether the data format is "AVRO".
249
- #
250
- # @return [Boolean]
251
- #
252
- # @example
253
- # require "google/cloud/bigquery"
254
- #
255
- # bigquery = Google::Cloud::Bigquery.new
256
- #
257
- # avro_url = "gs://bucket/path/to/data.avro"
258
- # avro_table = bigquery.external avro_url
259
- #
260
- # avro_table.format #=> "AVRO"
261
- # avro_table.avro? #=> true
262
- #
263
- def avro?
264
- @gapi.source_format == "AVRO"
265
- end
266
-
267
- ##
268
- # Whether the data format is "DATASTORE_BACKUP".
269
- #
270
- # @return [Boolean]
271
- #
272
- # @example
273
- # require "google/cloud/bigquery"
274
- #
275
- # bigquery = Google::Cloud::Bigquery.new
276
- #
277
- # backup_url = "gs://bucket/path/to/data.backup_info"
278
- # backup_table = bigquery.external backup_url
279
- #
280
- # backup_table.format #=> "DATASTORE_BACKUP"
281
- # backup_table.backup? #=> true
282
- #
283
- def backup?
284
- @gapi.source_format == "DATASTORE_BACKUP"
285
- end
286
-
287
- ##
288
- # Whether the data format is "BIGTABLE".
289
- #
290
- # @return [Boolean]
291
- #
292
- # @example
293
- # require "google/cloud/bigquery"
294
- #
295
- # bigquery = Google::Cloud::Bigquery.new
296
- #
297
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
298
- # bigtable_table = bigquery.external bigtable_url
299
- #
300
- # bigtable_table.format #=> "BIGTABLE"
301
- # bigtable_table.bigtable? #=> true
302
- #
303
- def bigtable?
304
- @gapi.source_format == "BIGTABLE"
305
- end
306
-
307
- ##
308
- # The fully-qualified URIs that point to your data in Google Cloud.
309
- # For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
310
- # character and it must come after the 'bucket' name. Size limits
311
- # related to load jobs apply to external data sources. For Google
312
- # Cloud Bigtable URIs: Exactly one URI can be specified and it has be
313
- # a fully specified and valid HTTPS URL for a Google Cloud Bigtable
314
- # table. For Google Cloud Datastore backups, exactly one URI can be
315
- # specified, and it must end with '.backup_info'. Also, the '*'
316
- # wildcard character is not allowed.
317
- #
318
- # @return [Array<String>]
319
- #
320
- # @example
321
- # require "google/cloud/bigquery"
322
- #
323
- # bigquery = Google::Cloud::Bigquery.new
324
- #
325
- # csv_url = "gs://bucket/path/to/data.csv"
326
- # csv_table = bigquery.external csv_url
327
- #
328
- # csv_table.urls #=> ["gs://bucket/path/to/data.csv"]
329
- #
330
- def urls
331
- @gapi.source_uris
332
- end
333
-
334
- ##
335
- # Indicates if the schema and format options are detected
336
- # automatically.
337
- #
338
- # @return [Boolean]
339
- #
340
- # @example
341
- # require "google/cloud/bigquery"
342
- #
343
- # bigquery = Google::Cloud::Bigquery.new
344
- #
345
- # csv_url = "gs://bucket/path/to/data.csv"
346
- # csv_table = bigquery.external csv_url do |csv|
347
- # csv.autodetect = true
348
- # end
349
- #
350
- # csv_table.autodetect #=> true
351
- #
352
- def autodetect
353
- @gapi.autodetect
354
- end
355
-
356
- ##
357
- # Set whether to detect schema and format options automatically. Any
358
- # option specified explicitly will be honored.
359
- #
360
- # @param [Boolean] new_autodetect New autodetect value
361
- #
362
- # @example
363
- # require "google/cloud/bigquery"
364
- #
365
- # bigquery = Google::Cloud::Bigquery.new
366
- #
367
- # csv_url = "gs://bucket/path/to/data.csv"
368
- # csv_table = bigquery.external csv_url do |csv|
369
- # csv.autodetect = true
370
- # end
371
- #
372
- # csv_table.autodetect #=> true
373
- #
374
- def autodetect= new_autodetect
375
- frozen_check!
376
- @gapi.autodetect = new_autodetect
377
- end
378
-
379
- ##
380
- # The compression type of the data source. Possible values include
381
- # `"GZIP"` and `nil`. The default value is `nil`. This setting is
382
- # ignored for Google Cloud Bigtable, Google Cloud Datastore backups
383
- # and Avro formats. Optional.
384
- #
385
- # @return [String]
386
- #
387
- # @example
388
- # require "google/cloud/bigquery"
389
- #
390
- # bigquery = Google::Cloud::Bigquery.new
391
- #
392
- # csv_url = "gs://bucket/path/to/data.csv"
393
- # csv_table = bigquery.external csv_url do |csv|
394
- # csv.compression = "GZIP"
395
- # end
396
- #
397
- # csv_table.compression #=> "GZIP"
398
- def compression
399
- @gapi.compression
400
- end
401
-
402
- ##
403
- # Set the compression type of the data source. Possible values include
404
- # `"GZIP"` and `nil`. The default value is `nil`. This setting is
405
- # ignored for Google Cloud Bigtable, Google Cloud Datastore backups
406
- # and Avro formats. Optional.
407
- #
408
- # @param [String] new_compression New compression value
409
- #
410
- # @example
411
- # require "google/cloud/bigquery"
412
- #
413
- # bigquery = Google::Cloud::Bigquery.new
414
- #
415
- # csv_url = "gs://bucket/path/to/data.csv"
416
- # csv_table = bigquery.external csv_url do |csv|
417
- # csv.compression = "GZIP"
418
- # end
419
- #
420
- # csv_table.compression #=> "GZIP"
421
- #
422
- def compression= new_compression
423
- frozen_check!
424
- @gapi.compression = new_compression
425
- end
426
-
427
- ##
428
- # Indicates if BigQuery should allow extra values that are not
429
- # represented in the table schema. If `true`, the extra values are
430
- # ignored. If `false`, records with extra columns are treated as bad
431
- # records, and if there are too many bad records, an invalid error is
432
- # returned in the job result. The default value is `false`.
433
- #
434
- # BigQuery treats trailing columns as an extra in `CSV`, named values
435
- # that don't match any column names in `JSON`. This setting is ignored
436
- # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
437
- # formats. Optional.
438
- #
439
- # @return [Boolean]
440
- #
441
- # @example
442
- # require "google/cloud/bigquery"
443
- #
444
- # bigquery = Google::Cloud::Bigquery.new
445
- #
446
- # csv_url = "gs://bucket/path/to/data.csv"
447
- # csv_table = bigquery.external csv_url do |csv|
448
- # csv.ignore_unknown = true
449
- # end
450
- #
451
- # csv_table.ignore_unknown #=> true
452
- #
453
- def ignore_unknown
454
- @gapi.ignore_unknown_values
455
- end
456
-
457
- ##
458
- # Set whether BigQuery should allow extra values that are not
459
- # represented in the table schema. If `true`, the extra values are
460
- # ignored. If `false`, records with extra columns are treated as bad
461
- # records, and if there are too many bad records, an invalid error is
462
- # returned in the job result. The default value is `false`.
463
- #
464
- # BigQuery treats trailing columns as an extra in `CSV`, named values
465
- # that don't match any column names in `JSON`. This setting is ignored
466
- # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
467
- # formats. Optional.
468
- #
469
- # @param [Boolean] new_ignore_unknown New ignore_unknown value
470
- #
471
- # @example
472
- # require "google/cloud/bigquery"
473
- #
474
- # bigquery = Google::Cloud::Bigquery.new
475
- #
476
- # csv_url = "gs://bucket/path/to/data.csv"
477
- # csv_table = bigquery.external csv_url do |csv|
478
- # csv.ignore_unknown = true
479
- # end
480
- #
481
- # csv_table.ignore_unknown #=> true
482
- #
483
- def ignore_unknown= new_ignore_unknown
484
- frozen_check!
485
- @gapi.ignore_unknown_values = new_ignore_unknown
486
- end
487
-
488
- ##
489
- # The maximum number of bad records that BigQuery can ignore when
490
- # reading data. If the number of bad records exceeds this value, an
491
- # invalid error is returned in the job result. The default value is 0,
492
- # which requires that all records are valid. This setting is ignored
493
- # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
494
- # formats.
495
- #
496
- # @return [Integer]
497
- #
498
- # @example
499
- # require "google/cloud/bigquery"
500
- #
501
- # bigquery = Google::Cloud::Bigquery.new
502
- #
503
- # csv_url = "gs://bucket/path/to/data.csv"
504
- # csv_table = bigquery.external csv_url do |csv|
505
- # csv.max_bad_records = 10
506
- # end
507
- #
508
- # csv_table.max_bad_records #=> 10
509
- #
510
- def max_bad_records
511
- @gapi.max_bad_records
512
- end
513
-
514
- ##
515
- # Set the maximum number of bad records that BigQuery can ignore when
516
- # reading data. If the number of bad records exceeds this value, an
517
- # invalid error is returned in the job result. The default value is 0,
518
- # which requires that all records are valid. This setting is ignored
519
- # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
520
- # formats.
521
- #
522
- # @param [Integer] new_max_bad_records New max_bad_records value
523
- #
524
- # @example
525
- # require "google/cloud/bigquery"
526
- #
527
- # bigquery = Google::Cloud::Bigquery.new
528
- #
529
- # csv_url = "gs://bucket/path/to/data.csv"
530
- # csv_table = bigquery.external csv_url do |csv|
531
- # csv.max_bad_records = 10
532
- # end
533
- #
534
- # csv_table.max_bad_records #=> 10
535
- #
536
- def max_bad_records= new_max_bad_records
537
- frozen_check!
538
- @gapi.max_bad_records = new_max_bad_records
539
- end
540
-
541
- ##
542
- # @private Google API Client object.
543
- def to_gapi
544
- @gapi
545
- end
546
-
547
- ##
548
- # @private Google API Client object.
549
- def self.from_gapi gapi
550
- new_table = new
551
- new_table.instance_variable_set :@gapi, gapi
552
- new_table
553
- end
554
-
555
- protected
556
-
557
- def frozen_check!
558
- return unless frozen?
559
- raise ArgumentError,
560
- "Cannot modify external data source when frozen"
561
- end
562
- end
563
-
564
- ##
565
- # # CsvSource
566
- #
567
- # {External::CsvSource} is a subclass of {External::DataSource} and
568
- # represents a CSV external data source that can be queried from
569
- # directly, such as Google Cloud Storage or Google Drive, even though
570
- # the data is not stored in BigQuery. Instead of loading or streaming
571
- # the data, this object references the external data source.
572
- #
573
- # @example
574
- # require "google/cloud/bigquery"
575
- #
576
- # bigquery = Google::Cloud::Bigquery.new
577
- #
578
- # csv_url = "gs://bucket/path/to/data.csv"
579
- # csv_table = bigquery.external csv_url do |csv|
580
- # csv.autodetect = true
581
- # csv.skip_leading_rows = 1
582
- # end
583
- #
584
- # data = bigquery.query "SELECT * FROM my_ext_table",
585
- # external: { my_ext_table: csv_table }
586
- #
587
- # data.each do |row|
588
- # puts row[:name]
589
- # end
590
- #
591
- class CsvSource < External::DataSource
592
- ##
593
- # @private Create an empty CsvSource object.
594
- def initialize
595
- super
596
- @gapi.csv_options = Google::Apis::BigqueryV2::CsvOptions.new
597
- end
598
-
599
- ##
600
- # Indicates if BigQuery should accept rows that are missing trailing
601
- # optional columns.
602
- #
603
- # @return [Boolean]
604
- #
605
- # @example
606
- # require "google/cloud/bigquery"
607
- #
608
- # bigquery = Google::Cloud::Bigquery.new
609
- #
610
- # csv_url = "gs://bucket/path/to/data.csv"
611
- # csv_table = bigquery.external csv_url do |csv|
612
- # csv.jagged_rows = true
613
- # end
614
- #
615
- # csv_table.jagged_rows #=> true
616
- #
617
- def jagged_rows
618
- @gapi.csv_options.allow_jagged_rows
619
- end
620
-
621
- ##
622
- # Set whether BigQuery should accept rows that are missing trailing
623
- # optional columns.
624
- #
625
- # @param [Boolean] new_jagged_rows New jagged_rows value
626
- #
627
- # @example
628
- # require "google/cloud/bigquery"
629
- #
630
- # bigquery = Google::Cloud::Bigquery.new
631
- #
632
- # csv_url = "gs://bucket/path/to/data.csv"
633
- # csv_table = bigquery.external csv_url do |csv|
634
- # csv.jagged_rows = true
635
- # end
636
- #
637
- # csv_table.jagged_rows #=> true
638
- #
639
- def jagged_rows= new_jagged_rows
640
- frozen_check!
641
- @gapi.csv_options.allow_jagged_rows = new_jagged_rows
642
- end
643
-
644
- ##
645
- # Indicates if BigQuery should allow quoted data sections that contain
646
- # newline characters in a CSV file.
647
- #
648
- # @return [Boolean]
649
- #
650
- # @example
651
- # require "google/cloud/bigquery"
652
- #
653
- # bigquery = Google::Cloud::Bigquery.new
654
- #
655
- # csv_url = "gs://bucket/path/to/data.csv"
656
- # csv_table = bigquery.external csv_url do |csv|
657
- # csv.quoted_newlines = true
658
- # end
659
- #
660
- # csv_table.quoted_newlines #=> true
661
- #
662
- def quoted_newlines
663
- @gapi.csv_options.allow_quoted_newlines
664
- end
665
-
666
- ##
667
- # Set whether BigQuery should allow quoted data sections that contain
668
- # newline characters in a CSV file.
669
- #
670
- # @param [Boolean] new_quoted_newlines New quoted_newlines value
671
- #
672
- # @example
673
- # require "google/cloud/bigquery"
674
- #
675
- # bigquery = Google::Cloud::Bigquery.new
676
- #
677
- # csv_url = "gs://bucket/path/to/data.csv"
678
- # csv_table = bigquery.external csv_url do |csv|
679
- # csv.quoted_newlines = true
680
- # end
681
- #
682
- # csv_table.quoted_newlines #=> true
683
- #
684
- def quoted_newlines= new_quoted_newlines
685
- frozen_check!
686
- @gapi.csv_options.allow_quoted_newlines = new_quoted_newlines
687
- end
688
-
689
- ##
690
- # The character encoding of the data.
691
- #
692
- # @return [String]
693
- #
694
- # @example
695
- # require "google/cloud/bigquery"
696
- #
697
- # bigquery = Google::Cloud::Bigquery.new
698
- #
699
- # csv_url = "gs://bucket/path/to/data.csv"
700
- # csv_table = bigquery.external csv_url do |csv|
701
- # csv.encoding = "UTF-8"
702
- # end
703
- #
704
- # csv_table.encoding #=> "UTF-8"
705
- #
706
- def encoding
707
- @gapi.csv_options.encoding
708
- end
709
-
710
- ##
711
- # Set the character encoding of the data.
712
- #
713
- # @param [String] new_encoding New encoding value
714
- #
715
- # @example
716
- # require "google/cloud/bigquery"
717
- #
718
- # bigquery = Google::Cloud::Bigquery.new
719
- #
720
- # csv_url = "gs://bucket/path/to/data.csv"
721
- # csv_table = bigquery.external csv_url do |csv|
722
- # csv.encoding = "UTF-8"
723
- # end
724
- #
725
- # csv_table.encoding #=> "UTF-8"
726
- #
727
- def encoding= new_encoding
728
- frozen_check!
729
- @gapi.csv_options.encoding = new_encoding
730
- end
731
-
732
- ##
733
- # Checks if the character encoding of the data is "UTF-8". This is the
734
- # default.
735
- #
736
- # @return [Boolean]
737
- #
738
- # @example
739
- # require "google/cloud/bigquery"
740
- #
741
- # bigquery = Google::Cloud::Bigquery.new
742
- #
743
- # csv_url = "gs://bucket/path/to/data.csv"
744
- # csv_table = bigquery.external csv_url do |csv|
745
- # csv.encoding = "UTF-8"
746
- # end
747
- #
748
- # csv_table.encoding #=> "UTF-8"
749
- # csv_table.utf8? #=> true
750
- #
751
- def utf8?
752
- return true if encoding.nil?
753
- encoding == "UTF-8"
754
- end
755
-
756
- ##
757
- # Checks if the character encoding of the data is "ISO-8859-1".
758
- #
759
- # @return [Boolean]
760
- #
761
- # @example
762
- # require "google/cloud/bigquery"
763
- #
764
- # bigquery = Google::Cloud::Bigquery.new
765
- #
766
- # csv_url = "gs://bucket/path/to/data.csv"
767
- # csv_table = bigquery.external csv_url do |csv|
768
- # csv.encoding = "ISO-8859-1"
769
- # end
770
- #
771
- # csv_table.encoding #=> "ISO-8859-1"
772
- # csv_table.iso8859_1? #=> true
773
- #
774
- def iso8859_1?
775
- encoding == "ISO-8859-1"
776
- end
777
-
778
- ##
779
- # The separator for fields in a CSV file.
780
- #
781
- # @return [String]
782
- #
783
- # @example
784
- # require "google/cloud/bigquery"
785
- #
786
- # bigquery = Google::Cloud::Bigquery.new
787
- #
788
- # csv_url = "gs://bucket/path/to/data.csv"
789
- # csv_table = bigquery.external csv_url do |csv|
790
- # csv.delimiter = "|"
791
- # end
792
- #
793
- # csv_table.delimiter #=> "|"
794
- #
795
- def delimiter
796
- @gapi.csv_options.field_delimiter
797
- end
798
-
799
- ##
800
- # Set the separator for fields in a CSV file.
801
- #
802
- # @param [String] new_delimiter New delimiter value
803
- #
804
- # @example
805
- # require "google/cloud/bigquery"
806
- #
807
- # bigquery = Google::Cloud::Bigquery.new
808
- #
809
- # csv_url = "gs://bucket/path/to/data.csv"
810
- # csv_table = bigquery.external csv_url do |csv|
811
- # csv.delimiter = "|"
812
- # end
813
- #
814
- # csv_table.delimiter #=> "|"
815
- #
816
- def delimiter= new_delimiter
817
- frozen_check!
818
- @gapi.csv_options.field_delimiter = new_delimiter
819
- end
820
-
821
- ##
822
- # The value that is used to quote data sections in a CSV file.
823
- #
824
- # @return [String]
825
- #
826
- # @example
827
- # require "google/cloud/bigquery"
828
- #
829
- # bigquery = Google::Cloud::Bigquery.new
830
- #
831
- # csv_url = "gs://bucket/path/to/data.csv"
832
- # csv_table = bigquery.external csv_url do |csv|
833
- # csv.quote = "'"
834
- # end
835
- #
836
- # csv_table.quote #=> "'"
837
- #
838
- def quote
839
- @gapi.csv_options.quote
840
- end
841
-
842
- ##
843
- # Set the value that is used to quote data sections in a CSV file.
844
- #
845
- # @param [String] new_quote New quote value
846
- #
847
- # @example
848
- # require "google/cloud/bigquery"
849
- #
850
- # bigquery = Google::Cloud::Bigquery.new
851
- #
852
- # csv_url = "gs://bucket/path/to/data.csv"
853
- # csv_table = bigquery.external csv_url do |csv|
854
- # csv.quote = "'"
855
- # end
856
- #
857
- # csv_table.quote #=> "'"
858
- #
859
- def quote= new_quote
860
- frozen_check!
861
- @gapi.csv_options.quote = new_quote
862
- end
863
-
864
- ##
865
- # The number of rows at the top of a CSV file that BigQuery will skip
866
- # when reading the data.
867
- #
868
- # @return [Integer]
869
- #
870
- # @example
871
- # require "google/cloud/bigquery"
872
- #
873
- # bigquery = Google::Cloud::Bigquery.new
874
- #
875
- # csv_url = "gs://bucket/path/to/data.csv"
876
- # csv_table = bigquery.external csv_url do |csv|
877
- # csv.skip_leading_rows = 1
878
- # end
879
- #
880
- # csv_table.skip_leading_rows #=> 1
881
- #
882
- def skip_leading_rows
883
- @gapi.csv_options.skip_leading_rows
884
- end
885
-
886
- ##
887
- # Set the number of rows at the top of a CSV file that BigQuery will
888
- # skip when reading the data.
889
- #
890
- # @param [Integer] row_count New skip_leading_rows value
891
- #
892
- # @example
893
- # require "google/cloud/bigquery"
894
- #
895
- # bigquery = Google::Cloud::Bigquery.new
896
- #
897
- # csv_url = "gs://bucket/path/to/data.csv"
898
- # csv_table = bigquery.external csv_url do |csv|
899
- # csv.skip_leading_rows = 1
900
- # end
901
- #
902
- # csv_table.skip_leading_rows #=> 1
903
- #
904
- def skip_leading_rows= row_count
905
- frozen_check!
906
- @gapi.csv_options.skip_leading_rows = row_count
907
- end
908
-
909
- ##
910
- # The schema for the data.
911
- #
912
- # @param [Boolean] replace Whether to replace the existing schema with
913
- # the new schema. If `true`, the fields will replace the existing
914
- # schema. If `false`, the fields will be added to the existing
915
- # schema. The default value is `false`.
916
- # @yield [schema] a block for setting the schema
917
- # @yieldparam [Schema] schema the object accepting the schema
918
- #
919
- # @return [Google::Cloud::Bigquery::Schema]
920
- #
921
- # @example
922
- # require "google/cloud/bigquery"
923
- #
924
- # bigquery = Google::Cloud::Bigquery.new
925
- #
926
- # csv_url = "gs://bucket/path/to/data.csv"
927
- # csv_table = bigquery.external csv_url do |csv|
928
- # csv.schema do |schema|
929
- # schema.string "name", mode: :required
930
- # schema.string "email", mode: :required
931
- # schema.integer "age", mode: :required
932
- # schema.boolean "active", mode: :required
933
- # end
934
- # end
935
- #
936
- def schema replace: false
937
- @schema ||= Schema.from_gapi @gapi.schema
938
- if replace
939
- frozen_check!
940
- @schema = Schema.from_gapi
941
- end
942
- @schema.freeze if frozen?
943
- yield @schema if block_given?
944
- @schema
945
- end
946
-
947
- ##
948
- # Set the schema for the data.
949
- #
950
- # @param [Schema] new_schema The schema object.
951
- #
952
- # @example
953
- # require "google/cloud/bigquery"
954
- #
955
- # bigquery = Google::Cloud::Bigquery.new
956
- #
957
- # csv_shema = bigquery.schema do |schema|
958
- # schema.string "name", mode: :required
959
- # schema.string "email", mode: :required
960
- # schema.integer "age", mode: :required
961
- # schema.boolean "active", mode: :required
962
- # end
963
- #
964
- # csv_url = "gs://bucket/path/to/data.csv"
965
- # csv_table = bigquery.external csv_url
966
- # csv_table.schema = csv_shema
967
- #
968
- def schema= new_schema
969
- frozen_check!
970
- @schema = new_schema
971
- end
972
-
973
- ##
974
- # The fields of the schema.
975
- #
976
- def fields
977
- schema.fields
978
- end
979
-
980
- ##
981
- # The names of the columns in the schema.
982
- #
983
- def headers
984
- schema.headers
985
- end
986
-
987
- ##
988
- # @private Google API Client object.
989
- def to_gapi
990
- @gapi.schema = @schema.to_gapi if @schema
991
- @gapi
992
- end
993
-
994
- ##
995
- # @private Google API Client object.
996
- def self.from_gapi gapi
997
- new_table = super
998
- schema = Schema.from_gapi gapi.schema
999
- new_table.instance_variable_set :@schema, schema
1000
- new_table
1001
- end
1002
- end
1003
-
1004
- ##
1005
- # # JsonSource
1006
- #
1007
- # {External::JsonSource} is a subclass of {External::DataSource} and
1008
- # represents a JSON external data source that can be queried from
1009
- # directly, such as Google Cloud Storage or Google Drive, even though
1010
- # the data is not stored in BigQuery. Instead of loading or streaming
1011
- # the data, this object references the external data source.
1012
- #
1013
- # @example
1014
- # require "google/cloud/bigquery"
1015
- #
1016
- # bigquery = Google::Cloud::Bigquery.new
1017
- #
1018
- # require "google/cloud/bigquery"
1019
- #
1020
- # bigquery = Google::Cloud::Bigquery.new
1021
- #
1022
- # json_url = "gs://bucket/path/to/data.json"
1023
- # json_table = bigquery.external json_url do |json|
1024
- # json.schema do |schema|
1025
- # schema.string "name", mode: :required
1026
- # schema.string "email", mode: :required
1027
- # schema.integer "age", mode: :required
1028
- # schema.boolean "active", mode: :required
1029
- # end
1030
- # end
1031
- #
1032
- # data = bigquery.query "SELECT * FROM my_ext_table",
1033
- # external: { my_ext_table: json_table }
1034
- #
1035
- # data.each do |row|
1036
- # puts row[:name]
1037
- # end
1038
- #
1039
- class JsonSource < External::DataSource
1040
- ##
1041
- # The schema for the data.
1042
- #
1043
- # @param [Boolean] replace Whether to replace the existing schema with
1044
- # the new schema. If `true`, the fields will replace the existing
1045
- # schema. If `false`, the fields will be added to the existing
1046
- # schema. The default value is `false`.
1047
- # @yield [schema] a block for setting the schema
1048
- # @yieldparam [Schema] schema the object accepting the schema
1049
- #
1050
- # @return [Google::Cloud::Bigquery::Schema]
1051
- #
1052
- # @example
1053
- # require "google/cloud/bigquery"
1054
- #
1055
- # bigquery = Google::Cloud::Bigquery.new
1056
- #
1057
- # json_url = "gs://bucket/path/to/data.json"
1058
- # json_table = bigquery.external json_url do |json|
1059
- # json.schema do |schema|
1060
- # schema.string "name", mode: :required
1061
- # schema.string "email", mode: :required
1062
- # schema.integer "age", mode: :required
1063
- # schema.boolean "active", mode: :required
1064
- # end
1065
- # end
1066
- #
1067
- def schema replace: false
1068
- @schema ||= Schema.from_gapi @gapi.schema
1069
- if replace
1070
- frozen_check!
1071
- @schema = Schema.from_gapi
1072
- end
1073
- @schema.freeze if frozen?
1074
- yield @schema if block_given?
1075
- @schema
1076
- end
1077
-
1078
- ##
1079
- # Set the schema for the data.
1080
- #
1081
- # @param [Schema] new_schema The schema object.
1082
- #
1083
- # @example
1084
- # require "google/cloud/bigquery"
1085
- #
1086
- # bigquery = Google::Cloud::Bigquery.new
1087
- #
1088
- # json_shema = bigquery.schema do |schema|
1089
- # schema.string "name", mode: :required
1090
- # schema.string "email", mode: :required
1091
- # schema.integer "age", mode: :required
1092
- # schema.boolean "active", mode: :required
1093
- # end
1094
- #
1095
- # json_url = "gs://bucket/path/to/data.json"
1096
- # json_table = bigquery.external json_url
1097
- # json_table.schema = json_shema
1098
- #
1099
- def schema= new_schema
1100
- frozen_check!
1101
- @schema = new_schema
1102
- end
1103
-
1104
- ##
1105
- # The fields of the schema.
1106
- #
1107
- def fields
1108
- schema.fields
1109
- end
1110
-
1111
- ##
1112
- # The names of the columns in the schema.
1113
- #
1114
- def headers
1115
- schema.headers
1116
- end
1117
-
1118
- ##
1119
- # @private Google API Client object.
1120
- def to_gapi
1121
- @gapi.schema = @schema.to_gapi if @schema
1122
- @gapi
1123
- end
1124
-
1125
- ##
1126
- # @private Google API Client object.
1127
- def self.from_gapi gapi
1128
- new_table = super
1129
- schema = Schema.from_gapi gapi.schema
1130
- new_table.instance_variable_set :@schema, schema
1131
- new_table
1132
- end
1133
- end
1134
-
1135
- ##
1136
- # # SheetsSource
1137
- #
1138
- # {External::SheetsSource} is a subclass of {External::DataSource} and
1139
- # represents a Google Sheets external data source that can be queried
1140
- # from directly, even though the data is not stored in BigQuery. Instead
1141
- # of loading or streaming the data, this object references the external
1142
- # data source.
1143
- #
1144
- # @example
1145
- # require "google/cloud/bigquery"
1146
- #
1147
- # bigquery = Google::Cloud::Bigquery.new
1148
- #
1149
- # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
1150
- # sheets_table = bigquery.external sheets_url do |sheets|
1151
- # sheets.skip_leading_rows = 1
1152
- # end
1153
- #
1154
- # data = bigquery.query "SELECT * FROM my_ext_table",
1155
- # external: { my_ext_table: sheets_table }
1156
- #
1157
- # data.each do |row|
1158
- # puts row[:name]
1159
- # end
1160
- #
1161
- class SheetsSource < External::DataSource
1162
- ##
1163
- # @private Create an empty SheetsSource object.
1164
- def initialize
1165
- super
1166
- @gapi.google_sheets_options = \
1167
- Google::Apis::BigqueryV2::GoogleSheetsOptions.new
1168
- end
1169
-
1170
- ##
1171
- # The number of rows at the top of a sheet that BigQuery will skip
1172
- # when reading the data. The default value is `0`.
1173
- #
1174
- # This property is useful if you have header rows that should be
1175
- # skipped. When `autodetect` is on, behavior is the following:
1176
- #
1177
- # * `nil` - Autodetect tries to detect headers in the first row. If
1178
- # they are not detected, the row is read as data. Otherwise data is
1179
- # read starting from the second row.
1180
- # * `0` - Instructs autodetect that there are no headers and data
1181
- # should be read starting from the first row.
1182
- # * `N > 0` - Autodetect skips `N-1` rows and tries to detect headers
1183
- # in row `N`. If headers are not detected, row `N` is just skipped.
1184
- # Otherwise row `N` is used to extract column names for the detected
1185
- # schema.
1186
- #
1187
- # @return [Integer]
1188
- #
1189
- # @example
1190
- # require "google/cloud/bigquery"
1191
- #
1192
- # bigquery = Google::Cloud::Bigquery.new
1193
- #
1194
- # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
1195
- # sheets_table = bigquery.external sheets_url do |sheets|
1196
- # sheets.skip_leading_rows = 1
1197
- # end
1198
- #
1199
- # sheets_table.skip_leading_rows #=> 1
1200
- #
1201
- def skip_leading_rows
1202
- @gapi.google_sheets_options.skip_leading_rows
1203
- end
1204
-
1205
- ##
1206
- # Set the number of rows at the top of a sheet that BigQuery will skip
1207
- # when reading the data.
1208
- #
1209
- # @param [Integer] row_count New skip_leading_rows value
1210
- #
1211
- # @example
1212
- # require "google/cloud/bigquery"
1213
- #
1214
- # bigquery = Google::Cloud::Bigquery.new
1215
- #
1216
- # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
1217
- # sheets_table = bigquery.external sheets_url do |sheets|
1218
- # sheets.skip_leading_rows = 1
1219
- # end
1220
- #
1221
- # sheets_table.skip_leading_rows #=> 1
1222
- #
1223
- def skip_leading_rows= row_count
1224
- frozen_check!
1225
- @gapi.google_sheets_options.skip_leading_rows = row_count
1226
- end
1227
- end
1228
-
1229
- ##
1230
- # # BigtableSource
1231
- #
1232
- # {External::BigtableSource} is a subclass of {External::DataSource} and
1233
- # represents a Bigtable external data source that can be queried from
1234
- # directly, even though the data is not stored in BigQuery. Instead of
1235
- # loading or streaming the data, this object references the external
1236
- # data source.
1237
- #
1238
- # @example
1239
- # require "google/cloud/bigquery"
1240
- #
1241
- # bigquery = Google::Cloud::Bigquery.new
1242
- #
1243
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1244
- # bigtable_table = bigquery.external bigtable_url do |bt|
1245
- # bt.rowkey_as_string = true
1246
- # bt.add_family "user" do |u|
1247
- # u.add_string "name"
1248
- # u.add_string "email"
1249
- # u.add_integer "age"
1250
- # u.add_boolean "active"
1251
- # end
1252
- # end
1253
- #
1254
- # data = bigquery.query "SELECT * FROM my_ext_table",
1255
- # external: { my_ext_table: bigtable_table }
1256
- #
1257
- # data.each do |row|
1258
- # puts row[:name]
1259
- # end
1260
- #
1261
- class BigtableSource < External::DataSource
1262
- ##
1263
- # @private Create an empty BigtableSource object.
1264
- def initialize
1265
- super
1266
- @gapi.bigtable_options = \
1267
- Google::Apis::BigqueryV2::BigtableOptions.new
1268
- @families = []
1269
- end
1270
-
1271
- ##
1272
- # List of column families to expose in the table schema along with
1273
- # their types. This list restricts the column families that can be
1274
- # referenced in queries and specifies their value types. You can use
1275
- # this list to do type conversions - see
1276
- # {BigtableSource::ColumnFamily#type} for more details. If you leave
1277
- # this list empty, all column families are present in the table schema
1278
- # and their values are read as `BYTES`. During a query only the column
1279
- # families referenced in that query are read from Bigtable.
1280
- #
1281
- # @return [Array<BigtableSource::ColumnFamily>]
1282
- #
1283
- # @example
1284
- # require "google/cloud/bigquery"
1285
- #
1286
- # bigquery = Google::Cloud::Bigquery.new
1287
- #
1288
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1289
- # bigtable_table = bigquery.external bigtable_url do |bt|
1290
- # bt.rowkey_as_string = true
1291
- # bt.add_family "user" do |u|
1292
- # u.add_string "name"
1293
- # u.add_string "email"
1294
- # u.add_integer "age"
1295
- # u.add_boolean "active"
1296
- # end
1297
- # end
1298
- #
1299
- # bigtable_table.families.count #=> 1
1300
- #
1301
- def families
1302
- @families
1303
- end
1304
-
1305
- ##
1306
- # Add a column family to expose in the table schema along with its
1307
- # types. Columns belonging to the column family may also be exposed.
1308
- #
1309
- # @param [String] family_id Identifier of the column family. See
1310
- # {BigtableSource::ColumnFamily#family_id}.
1311
- # @param [String] encoding The encoding of the values when the type is
1312
- # not `STRING`. See {BigtableSource::ColumnFamily#encoding}.
1313
- # @param [Boolean] latest Whether only the latest version of value are
1314
- # exposed for all columns in this column family. See
1315
- # {BigtableSource::ColumnFamily#latest}.
1316
- # @param [String] type The type to convert the value in cells of this
1317
- # column. See {BigtableSource::ColumnFamily#type}.
1318
- #
1319
- # @yield [family] a block for setting the family
1320
- # @yieldparam [BigtableSource::ColumnFamily] family the family object
1321
- #
1322
- # @return [BigtableSource::ColumnFamily]
1323
- #
1324
- # @example
1325
- # require "google/cloud/bigquery"
1326
- #
1327
- # bigquery = Google::Cloud::Bigquery.new
1328
- #
1329
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1330
- # bigtable_table = bigquery.external bigtable_url do |bt|
1331
- # bt.rowkey_as_string = true
1332
- # bt.add_family "user" do |u|
1333
- # u.add_string "name"
1334
- # u.add_string "email"
1335
- # u.add_integer "age"
1336
- # u.add_boolean "active"
1337
- # end
1338
- # end
1339
- #
1340
- def add_family family_id, encoding: nil, latest: nil, type: nil
1341
- frozen_check!
1342
- fam = BigtableSource::ColumnFamily.new
1343
- fam.family_id = family_id
1344
- fam.encoding = encoding if encoding
1345
- fam.latest = latest if latest
1346
- fam.type = type if type
1347
- yield fam if block_given?
1348
- @families << fam
1349
- fam
1350
- end
1351
-
1352
- ##
1353
- # Whether the rowkey column families will be read and converted to
1354
- # string. Otherwise they are read with `BYTES` type values and users
1355
- # need to manually cast them with `CAST` if necessary. The default
1356
- # value is `false`.
1357
- #
1358
- # @return [Boolean]
1359
- #
1360
- # @example
1361
- # require "google/cloud/bigquery"
1362
- #
1363
- # bigquery = Google::Cloud::Bigquery.new
1364
- #
1365
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1366
- # bigtable_table = bigquery.external bigtable_url do |bt|
1367
- # bt.rowkey_as_string = true
1368
- # end
1369
- #
1370
- # bigtable_table.rowkey_as_string #=> true
1371
- #
1372
- def rowkey_as_string
1373
- @gapi.bigtable_options.read_rowkey_as_string
1374
- end
1375
-
1376
- ##
1377
- # Set the number of rows at the top of a sheet that BigQuery will skip
1378
- # when reading the data.
1379
- #
1380
- # @param [Boolean] row_rowkey New rowkey_as_string value
1381
- #
1382
- # @example
1383
- # require "google/cloud/bigquery"
1384
- #
1385
- # bigquery = Google::Cloud::Bigquery.new
1386
- #
1387
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1388
- # bigtable_table = bigquery.external bigtable_url do |bt|
1389
- # bt.rowkey_as_string = true
1390
- # end
1391
- #
1392
- # bigtable_table.rowkey_as_string #=> true
1393
- #
1394
- def rowkey_as_string= row_rowkey
1395
- frozen_check!
1396
- @gapi.bigtable_options.read_rowkey_as_string = row_rowkey
1397
- end
1398
-
1399
- ##
1400
- # @private Google API Client object.
1401
- def to_gapi
1402
- @gapi.bigtable_options.column_families = @families.map(&:to_gapi)
1403
- @gapi
1404
- end
1405
-
1406
- ##
1407
- # @private Google API Client object.
1408
- def self.from_gapi gapi
1409
- new_table = super
1410
- families = Array gapi.bigtable_options.column_families
1411
- families = families.map do |fam_gapi|
1412
- BigtableSource::ColumnFamily.from_gapi fam_gapi
1413
- end
1414
- new_table.instance_variable_set :@families, families
1415
- new_table
1416
- end
1417
-
1418
- ##
1419
- # @private
1420
- def freeze
1421
- @families.map(&:freeze!)
1422
- @families.freeze!
1423
- super
1424
- end
1425
-
1426
- protected
1427
-
1428
- def frozen_check!
1429
- return unless frozen?
1430
- raise ArgumentError,
1431
- "Cannot modify external data source when frozen"
1432
- end
1433
-
1434
- ##
1435
- # # BigtableSource::ColumnFamily
1436
- #
1437
- # A Bigtable column family used to expose in the table schema along
1438
- # with its types and columns.
1439
- #
1440
- # @example
1441
- # require "google/cloud/bigquery"
1442
- #
1443
- # bigquery = Google::Cloud::Bigquery.new
1444
- #
1445
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1446
- # bigtable_table = bigquery.external bigtable_url do |bt|
1447
- # bt.rowkey_as_string = true
1448
- # bt.add_family "user" do |u|
1449
- # u.add_string "name"
1450
- # u.add_string "email"
1451
- # u.add_integer "age"
1452
- # u.add_boolean "active"
1453
- # end
1454
- # end
1455
- #
1456
- # data = bigquery.query "SELECT * FROM my_ext_table",
1457
- # external: { my_ext_table: bigtable_table }
1458
- #
1459
- # data.each do |row|
1460
- # puts row[:name]
1461
- # end
1462
- #
1463
- class ColumnFamily
1464
- ##
1465
- # @private Create an empty BigtableSource::ColumnFamily object.
1466
- def initialize
1467
- @gapi = Google::Apis::BigqueryV2::BigtableColumnFamily.new
1468
- @columns = []
1469
- end
1470
-
1471
- ##
1472
- # The encoding of the values when the type is not `STRING`.
1473
- #
1474
- # @return [String]
1475
- #
1476
- # @example
1477
- # require "google/cloud/bigquery"
1478
- #
1479
- # bigquery = Google::Cloud::Bigquery.new
1480
- #
1481
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1482
- # bigtable_table = bigquery.external bigtable_url do |bt|
1483
- # bt.add_family "user" do |u|
1484
- # u.encoding = "UTF-8"
1485
- # end
1486
- # end
1487
- #
1488
- # bigtable_table.families[0].encoding #=> "UTF-8"
1489
- #
1490
- def encoding
1491
- @gapi.encoding
1492
- end
1493
-
1494
- ##
1495
- # Set the encoding of the values when the type is not `STRING`.
1496
- # Acceptable encoding values are:
1497
- #
1498
- # * `TEXT` - indicates values are alphanumeric text strings.
1499
- # * `BINARY` - indicates values are encoded using HBase
1500
- # `Bytes.toBytes` family of functions. This can be overridden on a
1501
- # column.
1502
- #
1503
- # @param [String] new_encoding New encoding value
1504
- #
1505
- # @example
1506
- # require "google/cloud/bigquery"
1507
- #
1508
- # bigquery = Google::Cloud::Bigquery.new
1509
- #
1510
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1511
- # bigtable_table = bigquery.external bigtable_url do |bt|
1512
- # bt.add_family "user" do |u|
1513
- # u.encoding = "UTF-8"
1514
- # end
1515
- # end
1516
- #
1517
- # bigtable_table.families[0].encoding #=> "UTF-8"
1518
- #
1519
- def encoding= new_encoding
1520
- frozen_check!
1521
- @gapi.encoding = new_encoding
1522
- end
1523
-
1524
- ##
1525
- # Identifier of the column family.
1526
- #
1527
- # @return [String]
1528
- #
1529
- # @example
1530
- # require "google/cloud/bigquery"
1531
- #
1532
- # bigquery = Google::Cloud::Bigquery.new
1533
- #
1534
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1535
- # bigtable_table = bigquery.external bigtable_url do |bt|
1536
- # bt.add_family "user"
1537
- # end
1538
- #
1539
- # bigtable_table.families[0].family_id #=> "user"
1540
- #
1541
- def family_id
1542
- @gapi.family_id
1543
- end
1544
-
1545
- ##
1546
- # Set the identifier of the column family.
1547
- #
1548
- # @param [String] new_family_id New family_id value
1549
- #
1550
- # @example
1551
- # require "google/cloud/bigquery"
1552
- #
1553
- # bigquery = Google::Cloud::Bigquery.new
1554
- #
1555
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1556
- # bigtable_table = bigquery.external bigtable_url do |bt|
1557
- # bt.add_family "user"
1558
- # end
1559
- #
1560
- # bigtable_table.families[0].family_id #=> "user"
1561
- # bigtable_table.families[0].family_id = "User"
1562
- # bigtable_table.families[0].family_id #=> "User"
1563
- #
1564
- def family_id= new_family_id
1565
- frozen_check!
1566
- @gapi.family_id = new_family_id
1567
- end
1568
-
1569
- ##
1570
- # Whether only the latest version of value are exposed for all
1571
- # columns in this column family.
1572
- #
1573
- # @return [Boolean]
1574
- #
1575
- # @example
1576
- # require "google/cloud/bigquery"
1577
- #
1578
- # bigquery = Google::Cloud::Bigquery.new
1579
- #
1580
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1581
- # bigtable_table = bigquery.external bigtable_url do |bt|
1582
- # bt.add_family "user" do |u|
1583
- # u.latest = true
1584
- # end
1585
- # end
1586
- #
1587
- # bigtable_table.families[0].latest #=> true
1588
- #
1589
- def latest
1590
- @gapi.only_read_latest
1591
- end
1592
-
1593
- ##
1594
- # Set whether only the latest version of value are exposed for all
1595
- # columns in this column family.
1596
- #
1597
- # @param [Boolean] new_latest New latest value
1598
- #
1599
- # @example
1600
- # require "google/cloud/bigquery"
1601
- #
1602
- # bigquery = Google::Cloud::Bigquery.new
1603
- #
1604
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1605
- # bigtable_table = bigquery.external bigtable_url do |bt|
1606
- # bt.add_family "user" do |u|
1607
- # u.latest = true
1608
- # end
1609
- # end
1610
- #
1611
- # bigtable_table.families[0].latest #=> true
1612
- #
1613
- def latest= new_latest
1614
- frozen_check!
1615
- @gapi.only_read_latest = new_latest
1616
- end
1617
-
1618
- ##
1619
- # The type to convert the value in cells of this column family. The
1620
- # values are expected to be encoded using HBase `Bytes.toBytes`
1621
- # function when using the `BINARY` encoding value. The following
1622
- # BigQuery types are allowed:
1623
- #
1624
- # * `BYTES`
1625
- # * `STRING`
1626
- # * `INTEGER`
1627
- # * `FLOAT`
1628
- # * `BOOLEAN`
1629
- #
1630
- # Default type is `BYTES`. This can be overridden on a column.
1631
- #
1632
- # @return [String]
1633
- #
1634
- # @example
1635
- # require "google/cloud/bigquery"
1636
- #
1637
- # bigquery = Google::Cloud::Bigquery.new
1638
- #
1639
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1640
- # bigtable_table = bigquery.external bigtable_url do |bt|
1641
- # bt.add_family "user" do |u|
1642
- # u.type = "STRING"
1643
- # end
1644
- # end
1645
- #
1646
- # bigtable_table.families[0].type #=> "STRING"
1647
- #
1648
- def type
1649
- @gapi.type
1650
- end
1651
-
1652
- ##
1653
- # Set the type to convert the value in cells of this column family.
1654
- # The values are expected to be encoded using HBase `Bytes.toBytes`
1655
- # function when using the `BINARY` encoding value. The following
1656
- # BigQuery types are allowed:
1657
- #
1658
- # * `BYTES`
1659
- # * `STRING`
1660
- # * `INTEGER`
1661
- # * `FLOAT`
1662
- # * `BOOLEAN`
1663
- #
1664
- # Default type is `BYTES`. This can be overridden on a column.
1665
- #
1666
- # @param [String] new_type New type value
1667
- #
1668
- # @example
1669
- # require "google/cloud/bigquery"
1670
- #
1671
- # bigquery = Google::Cloud::Bigquery.new
1672
- #
1673
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1674
- # bigtable_table = bigquery.external bigtable_url do |bt|
1675
- # bt.add_family "user" do |u|
1676
- # u.type = "STRING"
1677
- # end
1678
- # end
1679
- #
1680
- # bigtable_table.families[0].type #=> "STRING"
1681
- #
1682
- def type= new_type
1683
- frozen_check!
1684
- @gapi.type = new_type
1685
- end
1686
-
1687
- ##
1688
- # Lists of columns that should be exposed as individual fields.
1689
- #
1690
- # @return [Array<BigtableSource::Column>]
1691
- #
1692
- # @example
1693
- # require "google/cloud/bigquery"
1694
- #
1695
- # bigquery = Google::Cloud::Bigquery.new
1696
- #
1697
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1698
- # bigtable_table = bigquery.external bigtable_url do |bt|
1699
- # bt.rowkey_as_string = true
1700
- # bt.add_family "user" do |u|
1701
- # u.add_string "name"
1702
- # u.add_string "email"
1703
- # u.add_integer "age"
1704
- # u.add_boolean "active"
1705
- # end
1706
- # end
1707
- #
1708
- # bigtable_table.families[0].columns.count #=> 4
1709
- #
1710
- def columns
1711
- @columns
1712
- end
1713
-
1714
- ##
1715
- # Add a column to the column family to expose in the table schema
1716
- # along with its types.
1717
- #
1718
- # @param [String] qualifier Qualifier of the column. See
1719
- # {BigtableSource::Column#qualifier}.
1720
- # @param [String] as A valid identifier to be used as the column
1721
- # field name if the qualifier is not a valid BigQuery field
1722
- # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1723
- # {BigtableSource::Column#field_name}.
1724
- # @param [String] type The type to convert the value in cells of
1725
- # this column. See {BigtableSource::Column#type}. The following
1726
- # BigQuery types are allowed:
1727
- #
1728
- # * `BYTES`
1729
- # * `STRING`
1730
- # * `INTEGER`
1731
- # * `FLOAT`
1732
- # * `BOOLEAN`
1733
- #
1734
- # @yield [column] a block for setting the column
1735
- # @yieldparam [BigtableSource::Column] column the column object
1736
- #
1737
- # @return [Array<BigtableSource::Column>]
1738
- #
1739
- # @example
1740
- # require "google/cloud/bigquery"
1741
- #
1742
- # bigquery = Google::Cloud::Bigquery.new
1743
- #
1744
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1745
- # bigtable_table = bigquery.external bigtable_url do |bt|
1746
- # bt.rowkey_as_string = true
1747
- # bt.add_family "user" do |u|
1748
- # u.add_column "name", type: "STRING"
1749
- # end
1750
- # end
1751
- #
1752
- def add_column qualifier, as: nil, type: nil
1753
- frozen_check!
1754
- col = BigtableSource::Column.new
1755
- col.qualifier = qualifier
1756
- col.field_name = as if as
1757
- col.type = type if type
1758
- yield col if block_given?
1759
- @columns << col
1760
- col
1761
- end
1762
-
1763
- ##
1764
- # Add a column to the column family to expose in the table schema
1765
- # that is specified as the `BYTES` type.
1766
- #
1767
- # @param [String] qualifier Qualifier of the column. See
1768
- # {BigtableSource::Column#qualifier}.
1769
- # @param [String] as A valid identifier to be used as the column
1770
- # field name if the qualifier is not a valid BigQuery field
1771
- # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1772
- # {BigtableSource::Column#field_name}.
1773
- #
1774
- # @yield [column] a block for setting the column
1775
- # @yieldparam [BigtableSource::Column] column the column object
1776
- #
1777
- # @return [Array<BigtableSource::Column>]
1778
- #
1779
- # @example
1780
- # require "google/cloud/bigquery"
1781
- #
1782
- # bigquery = Google::Cloud::Bigquery.new
1783
- #
1784
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1785
- # bigtable_table = bigquery.external bigtable_url do |bt|
1786
- # bt.rowkey_as_string = true
1787
- # bt.add_family "user" do |u|
1788
- # u.add_bytes "avatar"
1789
- # end
1790
- # end
1791
- #
1792
- def add_bytes qualifier, as: nil
1793
- col = add_column qualifier, as: as, type: "BYTES"
1794
- yield col if block_given?
1795
- col
1796
- end
1797
-
1798
- ##
1799
- # Add a column to the column family to expose in the table schema
1800
- # that is specified as the `STRING` type.
1801
- #
1802
- # @param [String] qualifier Qualifier of the column. See
1803
- # {BigtableSource::Column#qualifier}.
1804
- # @param [String] as A valid identifier to be used as the column
1805
- # field name if the qualifier is not a valid BigQuery field
1806
- # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1807
- # {BigtableSource::Column#field_name}.
1808
- #
1809
- # @yield [column] a block for setting the column
1810
- # @yieldparam [BigtableSource::Column] column the column object
1811
- #
1812
- # @return [Array<BigtableSource::Column>]
1813
- #
1814
- # @example
1815
- # require "google/cloud/bigquery"
1816
- #
1817
- # bigquery = Google::Cloud::Bigquery.new
1818
- #
1819
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1820
- # bigtable_table = bigquery.external bigtable_url do |bt|
1821
- # bt.rowkey_as_string = true
1822
- # bt.add_family "user" do |u|
1823
- # u.add_string "name"
1824
- # end
1825
- # end
1826
- #
1827
- def add_string qualifier, as: nil
1828
- col = add_column qualifier, as: as, type: "STRING"
1829
- yield col if block_given?
1830
- col
1831
- end
1832
-
1833
- ##
1834
- # Add a column to the column family to expose in the table schema
1835
- # that is specified as the `INTEGER` type.
1836
- #
1837
- # @param [String] qualifier Qualifier of the column. See
1838
- # {BigtableSource::Column#qualifier}.
1839
- # @param [String] as A valid identifier to be used as the column
1840
- # field name if the qualifier is not a valid BigQuery field
1841
- # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1842
- # {BigtableSource::Column#field_name}.
1843
- #
1844
- # @yield [column] a block for setting the column
1845
- # @yieldparam [BigtableSource::Column] column the column object
1846
- #
1847
- # @return [Array<BigtableSource::Column>]
1848
- #
1849
- # @example
1850
- # require "google/cloud/bigquery"
1851
- #
1852
- # bigquery = Google::Cloud::Bigquery.new
1853
- #
1854
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1855
- # bigtable_table = bigquery.external bigtable_url do |bt|
1856
- # bt.rowkey_as_string = true
1857
- # bt.add_family "user" do |u|
1858
- # u.add_integer "age"
1859
- # end
1860
- # end
1861
- #
1862
- def add_integer qualifier, as: nil
1863
- col = add_column qualifier, as: as, type: "INTEGER"
1864
- yield col if block_given?
1865
- col
1866
- end
1867
-
1868
- ##
1869
- # Add a column to the column family to expose in the table schema
1870
- # that is specified as the `FLOAT` type.
1871
- #
1872
- # @param [String] qualifier Qualifier of the column. See
1873
- # {BigtableSource::Column#qualifier}.
1874
- # @param [String] as A valid identifier to be used as the column
1875
- # field name if the qualifier is not a valid BigQuery field
1876
- # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1877
- # {BigtableSource::Column#field_name}.
1878
- #
1879
- # @yield [column] a block for setting the column
1880
- # @yieldparam [BigtableSource::Column] column the column object
1881
- #
1882
- # @return [Array<BigtableSource::Column>]
1883
- #
1884
- # @example
1885
- # require "google/cloud/bigquery"
1886
- #
1887
- # bigquery = Google::Cloud::Bigquery.new
1888
- #
1889
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1890
- # bigtable_table = bigquery.external bigtable_url do |bt|
1891
- # bt.rowkey_as_string = true
1892
- # bt.add_family "user" do |u|
1893
- # u.add_float "score"
1894
- # end
1895
- # end
1896
- #
1897
- def add_float qualifier, as: nil
1898
- col = add_column qualifier, as: as, type: "FLOAT"
1899
- yield col if block_given?
1900
- col
1901
- end
1902
-
1903
- ##
1904
- # Add a column to the column family to expose in the table schema
1905
- # that is specified as the `BOOLEAN` type.
1906
- #
1907
- # @param [String] qualifier Qualifier of the column. See
1908
- # {BigtableSource::Column#qualifier}.
1909
- # @param [String] as A valid identifier to be used as the column
1910
- # field name if the qualifier is not a valid BigQuery field
1911
- # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1912
- # {BigtableSource::Column#field_name}.
1913
- #
1914
- # @yield [column] a block for setting the column
1915
- # @yieldparam [BigtableSource::Column] column the column object
1916
- #
1917
- # @return [Array<BigtableSource::Column>]
1918
- #
1919
- # @example
1920
- # require "google/cloud/bigquery"
1921
- #
1922
- # bigquery = Google::Cloud::Bigquery.new
1923
- #
1924
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1925
- # bigtable_table = bigquery.external bigtable_url do |bt|
1926
- # bt.rowkey_as_string = true
1927
- # bt.add_family "user" do |u|
1928
- # u.add_boolean "active"
1929
- # end
1930
- # end
1931
- #
1932
- def add_boolean qualifier, as: nil
1933
- col = add_column qualifier, as: as, type: "BOOLEAN"
1934
- yield col if block_given?
1935
- col
1936
- end
1937
-
1938
- ##
1939
- # @private Google API Client object.
1940
- def to_gapi
1941
- @gapi.columns = @columns.map(&:to_gapi)
1942
- @gapi
1943
- end
1944
-
1945
- ##
1946
- # @private Google API Client object.
1947
- def self.from_gapi gapi
1948
- new_fam = new
1949
- new_fam.instance_variable_set :@gapi, gapi
1950
- columns = Array(gapi.columns).map do |col_gapi|
1951
- BigtableSource::Column.from_gapi col_gapi
1952
- end
1953
- new_fam.instance_variable_set :@columns, columns
1954
- new_fam
1955
- end
1956
-
1957
- ##
1958
- # @private
1959
- def freeze
1960
- @columns.map(&:freeze!)
1961
- @columns.freeze!
1962
- super
1963
- end
1964
-
1965
- protected
1966
-
1967
- def frozen_check!
1968
- return unless frozen?
1969
- raise ArgumentError,
1970
- "Cannot modify external data source when frozen"
1971
- end
1972
- end
1973
-
1974
- ##
1975
- # # BigtableSource::Column
1976
- #
1977
- # A Bigtable column to expose in the table schema along with its
1978
- # types.
1979
- #
1980
- # @example
1981
- # require "google/cloud/bigquery"
1982
- #
1983
- # bigquery = Google::Cloud::Bigquery.new
1984
- #
1985
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1986
- # bigtable_table = bigquery.external bigtable_url do |bt|
1987
- # bt.rowkey_as_string = true
1988
- # bt.add_family "user" do |u|
1989
- # u.add_string "name"
1990
- # u.add_string "email"
1991
- # u.add_integer "age"
1992
- # u.add_boolean "active"
1993
- # end
1994
- # end
1995
- #
1996
- # data = bigquery.query "SELECT * FROM my_ext_table",
1997
- # external: { my_ext_table: bigtable_table }
1998
- #
1999
- # data.each do |row|
2000
- # puts row[:name]
2001
- # end
2002
- #
2003
- class Column
2004
- ##
2005
- # @private Create an empty BigtableSource::Column object.
2006
- def initialize
2007
- @gapi = Google::Apis::BigqueryV2::BigtableColumn.new
2008
- end
2009
-
2010
- ##
2011
- # Qualifier of the column. Columns in the parent column family that
2012
- # has this exact qualifier are exposed as `.` field. If the
2013
- # qualifier is valid UTF-8 string, it will be represented as a UTF-8
2014
- # string. Otherwise, it will represented as a ASCII-8BIT string.
2015
- #
2016
- # If the qualifier is not a valid BigQuery field identifier (does
2017
- # not match `[a-zA-Z][a-zA-Z0-9_]*`) a valid identifier must be
2018
- # provided as `field_name`.
2019
- #
2020
- # @return [String]
2021
- #
2022
- # @example
2023
- # require "google/cloud/bigquery"
2024
- #
2025
- # bigquery = Google::Cloud::Bigquery.new
2026
- #
2027
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2028
- # bigtable_table = bigquery.external bigtable_url do |bt|
2029
- # bt.add_family "user" do |u|
2030
- # u.add_string "name" do |col|
2031
- # col.qualifier # "user"
2032
- # col.qualifier = "User"
2033
- # col.qualifier # "User"
2034
- # end
2035
- # end
2036
- # end
2037
- #
2038
- def qualifier
2039
- @gapi.qualifier_string || \
2040
- Base64.strict_decode64(@gapi.qualifier_encoded.to_s)
2041
- end
2042
-
2043
- ##
2044
- # Set the qualifier of the column. Columns in the parent column
2045
- # family that has this exact qualifier are exposed as `.` field.
2046
- # Values that are valid UTF-8 strings will be treated as such. All
2047
- # other values will be treated as `BINARY`.
2048
- #
2049
- # @param [String] new_qualifier New qualifier value
2050
- #
2051
- # @example
2052
- # require "google/cloud/bigquery"
2053
- #
2054
- # bigquery = Google::Cloud::Bigquery.new
2055
- #
2056
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2057
- # bigtable_table = bigquery.external bigtable_url do |bt|
2058
- # bt.add_family "user" do |u|
2059
- # u.add_string "name" do |col|
2060
- # col.qualifier # "user"
2061
- # col.qualifier = "User"
2062
- # col.qualifier # "User"
2063
- # end
2064
- # end
2065
- # end
2066
- #
2067
- def qualifier= new_qualifier
2068
- frozen_check!
2069
- raise ArgumentError if new_qualifier.nil?
2070
-
2071
- utf8_qualifier = new_qualifier.encode Encoding::UTF_8
2072
- if utf8_qualifier.valid_encoding?
2073
- @gapi.qualifier_string = utf8_qualifier
2074
- if @gapi.instance_variables.include? :@qualifier_encoded
2075
- @gapi.remove_instance_variable :@qualifier_encoded
2076
- end
2077
- else
2078
- @gapi.qualifier_encoded = Base64.strict_encode64 new_qualifier
2079
- if @gapi.instance_variables.include? :@qualifier_string
2080
- @gapi.remove_instance_variable :@qualifier_string
2081
- end
2082
- end
2083
- rescue EncodingError
2084
- @gapi.qualifier_encoded = Base64.strict_encode64 new_qualifier
2085
- if @gapi.instance_variables.include? :@qualifier_string
2086
- @gapi.remove_instance_variable :@qualifier_string
2087
- end
2088
- end
2089
-
2090
- ##
2091
- # The encoding of the values when the type is not `STRING`.
2092
- #
2093
- # @return [String]
2094
- #
2095
- # @example
2096
- # require "google/cloud/bigquery"
2097
- #
2098
- # bigquery = Google::Cloud::Bigquery.new
2099
- #
2100
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2101
- # bigtable_table = bigquery.external bigtable_url do |bt|
2102
- # bt.add_family "user" do |u|
2103
- # u.add_bytes "name" do |col|
2104
- # col.encoding = "TEXT"
2105
- # col.encoding # "TEXT"
2106
- # end
2107
- # end
2108
- # end
2109
- #
2110
- def encoding
2111
- @gapi.encoding
2112
- end
2113
-
2114
- ##
2115
- # Set the encoding of the values when the type is not `STRING`.
2116
- # Acceptable encoding values are:
2117
- #
2118
- # * `TEXT` - indicates values are alphanumeric text strings.
2119
- # * `BINARY` - indicates values are encoded using HBase
2120
- # `Bytes.toBytes` family of functions. This can be overridden on a
2121
- # column.
2122
- #
2123
- # @param [String] new_encoding New encoding value
2124
- #
2125
- # @example
2126
- # require "google/cloud/bigquery"
2127
- #
2128
- # bigquery = Google::Cloud::Bigquery.new
2129
- #
2130
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2131
- # bigtable_table = bigquery.external bigtable_url do |bt|
2132
- # bt.add_family "user" do |u|
2133
- # u.add_bytes "name" do |col|
2134
- # col.encoding = "TEXT"
2135
- # col.encoding # "TEXT"
2136
- # end
2137
- # end
2138
- # end
2139
- #
2140
- def encoding= new_encoding
2141
- frozen_check!
2142
- @gapi.encoding = new_encoding
2143
- end
2144
-
2145
- ##
2146
- # If the qualifier is not a valid BigQuery field identifier (does
2147
- # not match `[a-zA-Z][a-zA-Z0-9_]*`) a valid identifier must be
2148
- # provided as the column field name and is used as field name in
2149
- # queries.
2150
- #
2151
- # @return [String]
2152
- #
2153
- # @example
2154
- # require "google/cloud/bigquery"
2155
- #
2156
- # bigquery = Google::Cloud::Bigquery.new
2157
- #
2158
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2159
- # bigtable_table = bigquery.external bigtable_url do |bt|
2160
- # bt.add_family "user" do |u|
2161
- # u.add_string "001_name", as: "user" do |col|
2162
- # col.field_name # "user"
2163
- # col.field_name = "User"
2164
- # col.field_name # "User"
2165
- # end
2166
- # end
2167
- # end
2168
- #
2169
- def field_name
2170
- @gapi.field_name
2171
- end
2172
-
2173
- ##
2174
- # Sets the identifier to be used as the column field name in queries
2175
- # when the qualifier is not a valid BigQuery field identifier (does
2176
- # not match `[a-zA-Z][a-zA-Z0-9_]*`).
2177
- #
2178
- # @param [String] new_field_name New field_name value
2179
- #
2180
- # @example
2181
- # require "google/cloud/bigquery"
2182
- #
2183
- # bigquery = Google::Cloud::Bigquery.new
2184
- #
2185
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2186
- # bigtable_table = bigquery.external bigtable_url do |bt|
2187
- # bt.add_family "user" do |u|
2188
- # u.add_string "001_name", as: "user" do |col|
2189
- # col.field_name # "user"
2190
- # col.field_name = "User"
2191
- # col.field_name # "User"
2192
- # end
2193
- # end
2194
- # end
2195
- #
2196
- def field_name= new_field_name
2197
- frozen_check!
2198
- @gapi.field_name = new_field_name
2199
- end
2200
-
2201
- ##
2202
- # Whether only the latest version of value in this column are
2203
- # exposed. Can also be set at the column family level. However, this
2204
- # value takes precedence when set at both levels.
2205
- #
2206
- # @return [Boolean]
2207
- #
2208
- # @example
2209
- # require "google/cloud/bigquery"
2210
- #
2211
- # bigquery = Google::Cloud::Bigquery.new
2212
- #
2213
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2214
- # bigtable_table = bigquery.external bigtable_url do |bt|
2215
- # bt.add_family "user" do |u|
2216
- # u.add_string "name" do |col|
2217
- # col.latest = true
2218
- # col.latest # true
2219
- # end
2220
- # end
2221
- # end
2222
- #
2223
- def latest
2224
- @gapi.only_read_latest
2225
- end
2226
-
2227
- ##
2228
- # Set whether only the latest version of value in this column are
2229
- # exposed. Can also be set at the column family level. However, this
2230
- # value takes precedence when set at both levels.
2231
- #
2232
- # @param [Boolean] new_latest New latest value
2233
- #
2234
- # @example
2235
- # require "google/cloud/bigquery"
2236
- #
2237
- # bigquery = Google::Cloud::Bigquery.new
2238
- #
2239
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2240
- # bigtable_table = bigquery.external bigtable_url do |bt|
2241
- # bt.add_family "user" do |u|
2242
- # u.add_string "name" do |col|
2243
- # col.latest = true
2244
- # col.latest # true
2245
- # end
2246
- # end
2247
- # end
2248
- #
2249
- def latest= new_latest
2250
- frozen_check!
2251
- @gapi.only_read_latest = new_latest
2252
- end
2253
-
2254
- ##
2255
- # The type to convert the value in cells of this column. The values
2256
- # are expected to be encoded using HBase `Bytes.toBytes` function
2257
- # when using the `BINARY` encoding value. The following BigQuery
2258
- # types are allowed:
2259
- #
2260
- # * `BYTES`
2261
- # * `STRING`
2262
- # * `INTEGER`
2263
- # * `FLOAT`
2264
- # * `BOOLEAN`
2265
- #
2266
- # Default type is `BYTES`. Can also be set at the column family
2267
- # level. However, this value takes precedence when set at both
2268
- # levels.
2269
- #
2270
- # @return [String]
2271
- #
2272
- # @example
2273
- # require "google/cloud/bigquery"
2274
- #
2275
- # bigquery = Google::Cloud::Bigquery.new
2276
- #
2277
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2278
- # bigtable_table = bigquery.external bigtable_url do |bt|
2279
- # bt.add_family "user" do |u|
2280
- # u.add_string "name" do |col|
2281
- # col.type # "STRING"
2282
- # end
2283
- # end
2284
- # end
2285
- #
2286
- def type
2287
- @gapi.type
2288
- end
2289
-
2290
- ##
2291
- # Set the type to convert the value in cells of this column. The
2292
- # values are expected to be encoded using HBase `Bytes.toBytes`
2293
- # function when using the `BINARY` encoding value. The following
2294
- # BigQuery types are allowed:
2295
- #
2296
- # * `BYTES`
2297
- # * `STRING`
2298
- # * `INTEGER`
2299
- # * `FLOAT`
2300
- # * `BOOLEAN`
2301
- #
2302
- # Default type is `BYTES`. Can also be set at the column family
2303
- # level. However, this value takes precedence when set at both
2304
- # levels.
2305
- #
2306
- # @param [String] new_type New type value
2307
- #
2308
- # @example
2309
- # require "google/cloud/bigquery"
2310
- #
2311
- # bigquery = Google::Cloud::Bigquery.new
2312
- #
2313
- # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2314
- # bigtable_table = bigquery.external bigtable_url do |bt|
2315
- # bt.add_family "user" do |u|
2316
- # u.add_string "name" do |col|
2317
- # col.type # "STRING"
2318
- # col.type = "BYTES"
2319
- # col.type # "BYTES"
2320
- # end
2321
- # end
2322
- # end
2323
- #
2324
- def type= new_type
2325
- frozen_check!
2326
- @gapi.type = new_type
2327
- end
2328
-
2329
- ##
2330
- # @private Google API Client object.
2331
- def to_gapi
2332
- @gapi
2333
- end
2334
-
2335
- ##
2336
- # @private Google API Client object.
2337
- def self.from_gapi gapi
2338
- new_col = new
2339
- new_col.instance_variable_set :@gapi, gapi
2340
- new_col
2341
- end
2342
-
2343
- protected
2344
-
2345
- def frozen_check!
2346
- return unless frozen?
2347
- raise ArgumentError,
2348
- "Cannot modify external data source when frozen"
2349
- end
2350
- end
2351
- end
2352
146
  end
2353
147
  end
2354
148
  end