google-cloud-bigquery 0.28.0 → 0.29.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -71,9 +71,9 @@ module Google
71
71
  def next
72
72
  return nil unless next?
73
73
  ensure_service!
74
- options = { all: @hidden, token: token, max: @max }
74
+ options = { all: @hidden, filter: @filter, token: token, max: @max }
75
75
  gapi = @service.list_datasets options
76
- self.class.from_gapi gapi, @service, @hidden, @max
76
+ self.class.from_gapi gapi, @service, @hidden, @filter, @max
77
77
  end
78
78
 
79
79
  ##
@@ -140,7 +140,8 @@ module Google
140
140
 
141
141
  ##
142
142
  # @private New Dataset::List from a response object.
143
- def self.from_gapi gapi_list, service, hidden = nil, max = nil
143
+ def self.from_gapi gapi_list, service, hidden = nil, filter = nil,
144
+ max = nil
144
145
  datasets = List.new(Array(gapi_list.datasets).map do |gapi_object|
145
146
  Dataset.from_gapi gapi_object, service
146
147
  end)
@@ -148,6 +149,7 @@ module Google
148
149
  datasets.instance_variable_set :@etag, gapi_list.etag
149
150
  datasets.instance_variable_set :@service, service
150
151
  datasets.instance_variable_set :@hidden, hidden
152
+ datasets.instance_variable_set :@filter, filter
151
153
  datasets.instance_variable_set :@max, max
152
154
  datasets
153
155
  end
@@ -0,0 +1,2353 @@
1
+ # Copyright 2017 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/apis/bigquery_v2"
17
+ require "base64"
18
+
19
+ module Google
20
+ module Cloud
21
+ module Bigquery
22
+ ##
23
+ # # External
24
+ #
25
+ # Creates a new {External::DataSource} (or subclass) object that
26
+ # represents the external data source that can be queried from directly,
27
+ # even though the data is not stored in BigQuery. Instead of loading or
28
+ # streaming the data, this object references the external data source.
29
+ #
30
+ # See {External::DataSource}, {External::CsvSource},
31
+ # {External::JsonSource}, {External::SheetsSource},
32
+ # {External::BigtableSource}
33
+ #
34
+ # @example
35
+ # require "google/cloud/bigquery"
36
+ #
37
+ # bigquery = Google::Cloud::Bigquery.new
38
+ #
39
+ # csv_url = "gs://bucket/path/to/data.csv"
40
+ # csv_table = bigquery.external csv_url do |csv|
41
+ # csv.autodetect = true
42
+ # csv.skip_leading_rows = 1
43
+ # end
44
+ #
45
+ # data = bigquery.query "SELECT * FROM my_ext_table",
46
+ # external: { my_ext_table: csv_table }
47
+ #
48
+ # data.each do |row|
49
+ # puts row[:name]
50
+ # end
51
+ #
52
+ module External
53
+ ##
54
+ # @private New External from URLs and format
55
+ def self.from_urls urls, format = nil
56
+ external_format = source_format_for urls, format
57
+ if external_format.nil?
58
+ fail ArgumentError, "Unable to determine external table format"
59
+ end
60
+ external_class = table_class_for external_format
61
+ external_class.new.tap do |e|
62
+ e.gapi.source_uris = Array(urls)
63
+ e.gapi.source_format = external_format
64
+ end
65
+ end
66
+
67
+ ##
68
+ # @private Google API Client object.
69
+ def self.from_gapi gapi
70
+ external_format = source_format_for gapi.source_uris,
71
+ gapi.source_format
72
+ if external_format.nil?
73
+ fail ArgumentError, "Unable to determine external table format"
74
+ end
75
+ external_class = table_class_for external_format
76
+ external_class.from_gapi gapi
77
+ end
78
+
79
+ ##
80
+ # @private Determine source_format from inputs
81
+ def self.source_format_for urls, format
82
+ val = { "csv" => "CSV",
83
+ "json" => "NEWLINE_DELIMITED_JSON",
84
+ "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
85
+ "sheets" => "GOOGLE_SHEETS",
86
+ "google_sheets" => "GOOGLE_SHEETS",
87
+ "avro" => "AVRO",
88
+ "datastore" => "DATASTORE_BACKUP",
89
+ "backup" => "DATASTORE_BACKUP",
90
+ "datastore_backup" => "DATASTORE_BACKUP",
91
+ "bigtable" => "BIGTABLE"
92
+ }[format.to_s.downcase]
93
+ return val unless val.nil?
94
+ Array(urls).each do |url|
95
+ return "CSV" if url.end_with? ".csv"
96
+ return "NEWLINE_DELIMITED_JSON" if url.end_with? ".json"
97
+ return "AVRO" if url.end_with? ".avro"
98
+ return "DATASTORE_BACKUP" if url.end_with? ".backup_info"
99
+ if url.start_with? "https://docs.google.com/spreadsheets/"
100
+ return "GOOGLE_SHEETS"
101
+ end
102
+ if url.start_with? "https://googleapis.com/bigtable/projects/"
103
+ return "BIGTABLE"
104
+ end
105
+ end
106
+ nil
107
+ end
108
+
109
+ ##
110
+ # @private Determine table class from source_format
111
+ def self.table_class_for format
112
+ case format
113
+ when "CSV" then External::CsvSource
114
+ when "NEWLINE_DELIMITED_JSON" then External::JsonSource
115
+ when "GOOGLE_SHEETS" then External::SheetsSource
116
+ when "BIGTABLE" then External::BigtableSource
117
+ else
118
+ # AVRO and DATASTORE_BACKUP
119
+ External::DataSource
120
+ end
121
+ end
122
+
123
+ ##
124
+ # # DataSource
125
+ #
126
+ # External::DataSource and its subclasses represents an external data
127
+ # source that can be queried from directly, even though the data is not
128
+ # stored in BigQuery. Instead of loading or streaming the data, this
129
+ # object references the external data source.
130
+ #
131
+ # The AVRO and Datastore Backup formats use {External::DataSource}. See
132
+ # {External::CsvSource}, {External::JsonSource},
133
+ # {External::SheetsSource}, {External::BigtableSource} for the other
134
+ # formats.
135
+ #
136
+ # @example
137
+ # require "google/cloud/bigquery"
138
+ #
139
+ # bigquery = Google::Cloud::Bigquery.new
140
+ #
141
+ # avro_url = "gs://bucket/path/to/data.avro"
142
+ # avro_table = bigquery.external avro_url do |avro|
143
+ # avro.autodetect = true
144
+ # end
145
+ #
146
+ # data = bigquery.query "SELECT * FROM my_ext_table",
147
+ # external: { my_ext_table: avro_table }
148
+ #
149
+ # data.each do |row|
150
+ # puts row[:name]
151
+ # end
152
+ #
153
+ class DataSource
154
+ ##
155
+ # @private The Google API Client object.
156
+ attr_accessor :gapi
157
+
158
+ ##
159
+ # @private Create an empty Table object.
160
+ def initialize
161
+ @gapi = Google::Apis::BigqueryV2::ExternalDataConfiguration.new
162
+ end
163
+
164
+ ##
165
+ # The data format. For CSV files, specify "CSV". For Google sheets,
166
+ # specify "GOOGLE_SHEETS". For newline-delimited JSON, specify
167
+ # "NEWLINE_DELIMITED_JSON". For Avro files, specify "AVRO". For Google
168
+ # Cloud Datastore backups, specify "DATASTORE_BACKUP". [Beta] For
169
+ # Google Cloud Bigtable, specify "BIGTABLE".
170
+ #
171
+ # @return [String]
172
+ #
173
+ # @example
174
+ # require "google/cloud/bigquery"
175
+ #
176
+ # bigquery = Google::Cloud::Bigquery.new
177
+ #
178
+ # csv_url = "gs://bucket/path/to/data.csv"
179
+ # csv_table = bigquery.external csv_url
180
+ #
181
+ # csv_table.format #=> "CSV"
182
+ #
183
+ def format
184
+ @gapi.source_format
185
+ end
186
+
187
+ ##
188
+ # Whether the data format is "CSV".
189
+ #
190
+ # @return [Boolean]
191
+ #
192
+ # @example
193
+ # require "google/cloud/bigquery"
194
+ #
195
+ # bigquery = Google::Cloud::Bigquery.new
196
+ #
197
+ # csv_url = "gs://bucket/path/to/data.csv"
198
+ # csv_table = bigquery.external csv_url
199
+ #
200
+ # csv_table.format #=> "CSV"
201
+ # csv_table.csv? #=> true
202
+ #
203
+ def csv?
204
+ @gapi.source_format == "CSV"
205
+ end
206
+
207
+ ##
208
+ # Whether the data format is "NEWLINE_DELIMITED_JSON".
209
+ #
210
+ # @return [Boolean]
211
+ #
212
+ # @example
213
+ # require "google/cloud/bigquery"
214
+ #
215
+ # bigquery = Google::Cloud::Bigquery.new
216
+ #
217
+ # json_url = "gs://bucket/path/to/data.json"
218
+ # json_table = bigquery.external json_url
219
+ #
220
+ # json_table.format #=> "NEWLINE_DELIMITED_JSON"
221
+ # json_table.json? #=> true
222
+ #
223
+ def json?
224
+ @gapi.source_format == "NEWLINE_DELIMITED_JSON"
225
+ end
226
+
227
+ ##
228
+ # Whether the data format is "GOOGLE_SHEETS".
229
+ #
230
+ # @return [Boolean]
231
+ #
232
+ # @example
233
+ # require "google/cloud/bigquery"
234
+ #
235
+ # bigquery = Google::Cloud::Bigquery.new
236
+ #
237
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
238
+ # sheets_table = bigquery.external sheets_url
239
+ #
240
+ # sheets_table.format #=> "GOOGLE_SHEETS"
241
+ # sheets_table.sheets? #=> true
242
+ #
243
+ def sheets?
244
+ @gapi.source_format == "GOOGLE_SHEETS"
245
+ end
246
+
247
+ ##
248
+ # Whether the data format is "AVRO".
249
+ #
250
+ # @return [Boolean]
251
+ #
252
+ # @example
253
+ # require "google/cloud/bigquery"
254
+ #
255
+ # bigquery = Google::Cloud::Bigquery.new
256
+ #
257
+ # avro_url = "gs://bucket/path/to/data.avro"
258
+ # avro_table = bigquery.external avro_url
259
+ #
260
+ # avro_table.format #=> "AVRO"
261
+ # avro_table.avro? #=> true
262
+ #
263
+ def avro?
264
+ @gapi.source_format == "AVRO"
265
+ end
266
+
267
+ ##
268
+ # Whether the data format is "DATASTORE_BACKUP".
269
+ #
270
+ # @return [Boolean]
271
+ #
272
+ # @example
273
+ # require "google/cloud/bigquery"
274
+ #
275
+ # bigquery = Google::Cloud::Bigquery.new
276
+ #
277
+ # backup_url = "gs://bucket/path/to/data.backup_info"
278
+ # backup_table = bigquery.external backup_url
279
+ #
280
+ # backup_table.format #=> "DATASTORE_BACKUP"
281
+ # backup_table.backup? #=> true
282
+ #
283
+ def backup?
284
+ @gapi.source_format == "DATASTORE_BACKUP"
285
+ end
286
+
287
+ ##
288
+ # Whether the data format is "BIGTABLE".
289
+ #
290
+ # @return [Boolean]
291
+ #
292
+ # @example
293
+ # require "google/cloud/bigquery"
294
+ #
295
+ # bigquery = Google::Cloud::Bigquery.new
296
+ #
297
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
298
+ # bigtable_table = bigquery.external bigtable_url
299
+ #
300
+ # bigtable_table.format #=> "BIGTABLE"
301
+ # bigtable_table.bigtable? #=> true
302
+ #
303
+ def bigtable?
304
+ @gapi.source_format == "BIGTABLE"
305
+ end
306
+
307
+ ##
308
+ # The fully-qualified URIs that point to your data in Google Cloud.
309
+ # For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
310
+ # character and it must come after the 'bucket' name. Size limits
311
+ # related to load jobs apply to external data sources. For Google
312
+ # Cloud Bigtable URIs: Exactly one URI can be specified and it has be
313
+ # a fully specified and valid HTTPS URL for a Google Cloud Bigtable
314
+ # table. For Google Cloud Datastore backups, exactly one URI can be
315
+ # specified, and it must end with '.backup_info'. Also, the '*'
316
+ # wildcard character is not allowed.
317
+ #
318
+ # @return [Array<String>]
319
+ #
320
+ # @example
321
+ # require "google/cloud/bigquery"
322
+ #
323
+ # bigquery = Google::Cloud::Bigquery.new
324
+ #
325
+ # csv_url = "gs://bucket/path/to/data.csv"
326
+ # csv_table = bigquery.external csv_url
327
+ #
328
+ # csv_table.urls #=> ["gs://bucket/path/to/data.csv"]
329
+ #
330
+ def urls
331
+ @gapi.source_uris
332
+ end
333
+
334
+ ##
335
+ # Indicates if the schema and format options are detected
336
+ # automatically.
337
+ #
338
+ # @return [Boolean]
339
+ #
340
+ # @example
341
+ # require "google/cloud/bigquery"
342
+ #
343
+ # bigquery = Google::Cloud::Bigquery.new
344
+ #
345
+ # csv_url = "gs://bucket/path/to/data.csv"
346
+ # csv_table = bigquery.external csv_url do |csv|
347
+ # csv.autodetect = true
348
+ # end
349
+ #
350
+ # csv_table.autodetect #=> true
351
+ #
352
+ def autodetect
353
+ @gapi.autodetect
354
+ end
355
+
356
+ ##
357
+ # Set whether to detect schema and format options automatically. Any
358
+ # option specified explicitly will be honored.
359
+ #
360
+ # @param [Boolean] new_autodetect New autodetect value
361
+ #
362
+ # @example
363
+ # require "google/cloud/bigquery"
364
+ #
365
+ # bigquery = Google::Cloud::Bigquery.new
366
+ #
367
+ # csv_url = "gs://bucket/path/to/data.csv"
368
+ # csv_table = bigquery.external csv_url do |csv|
369
+ # csv.autodetect = true
370
+ # end
371
+ #
372
+ # csv_table.autodetect #=> true
373
+ #
374
+ def autodetect= new_autodetect
375
+ frozen_check!
376
+ @gapi.autodetect = new_autodetect
377
+ end
378
+
379
+ ##
380
+ # The compression type of the data source. Possible values include
381
+ # `"GZIP"` and `nil`. The default value is `nil`. This setting is
382
+ # ignored for Google Cloud Bigtable, Google Cloud Datastore backups
383
+ # and Avro formats. Optional.
384
+ #
385
+ # @return [String]
386
+ #
387
+ # @example
388
+ # require "google/cloud/bigquery"
389
+ #
390
+ # bigquery = Google::Cloud::Bigquery.new
391
+ #
392
+ # csv_url = "gs://bucket/path/to/data.csv"
393
+ # csv_table = bigquery.external csv_url do |csv|
394
+ # csv.compression = "GZIP"
395
+ # end
396
+ #
397
+ # csv_table.compression #=> "GZIP"
398
+ def compression
399
+ @gapi.compression
400
+ end
401
+
402
+ ##
403
+ # Set the compression type of the data source. Possible values include
404
+ # `"GZIP"` and `nil`. The default value is `nil`. This setting is
405
+ # ignored for Google Cloud Bigtable, Google Cloud Datastore backups
406
+ # and Avro formats. Optional.
407
+ #
408
+ # @param [String] new_compression New compression value
409
+ #
410
+ # @example
411
+ # require "google/cloud/bigquery"
412
+ #
413
+ # bigquery = Google::Cloud::Bigquery.new
414
+ #
415
+ # csv_url = "gs://bucket/path/to/data.csv"
416
+ # csv_table = bigquery.external csv_url do |csv|
417
+ # csv.compression = "GZIP"
418
+ # end
419
+ #
420
+ # csv_table.compression #=> "GZIP"
421
+ #
422
+ def compression= new_compression
423
+ frozen_check!
424
+ @gapi.compression = new_compression
425
+ end
426
+
427
+ ##
428
+ # Indicates if BigQuery should allow extra values that are not
429
+ # represented in the table schema. If `true`, the extra values are
430
+ # ignored. If `false`, records with extra columns are treated as bad
431
+ # records, and if there are too many bad records, an invalid error is
432
+ # returned in the job result. The default value is `false`.
433
+ #
434
+ # BigQuery treats trailing columns as an extra in `CSV`, named values
435
+ # that don't match any column names in `JSON`. This setting is ignored
436
+ # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
437
+ # formats. Optional.
438
+ #
439
+ # @return [Boolean]
440
+ #
441
+ # @example
442
+ # require "google/cloud/bigquery"
443
+ #
444
+ # bigquery = Google::Cloud::Bigquery.new
445
+ #
446
+ # csv_url = "gs://bucket/path/to/data.csv"
447
+ # csv_table = bigquery.external csv_url do |csv|
448
+ # csv.ignore_unknown = true
449
+ # end
450
+ #
451
+ # csv_table.ignore_unknown #=> true
452
+ #
453
+ def ignore_unknown
454
+ @gapi.ignore_unknown_values
455
+ end
456
+
457
+ ##
458
+ # Set whether BigQuery should allow extra values that are not
459
+ # represented in the table schema. If `true`, the extra values are
460
+ # ignored. If `false`, records with extra columns are treated as bad
461
+ # records, and if there are too many bad records, an invalid error is
462
+ # returned in the job result. The default value is `false`.
463
+ #
464
+ # BigQuery treats trailing columns as an extra in `CSV`, named values
465
+ # that don't match any column names in `JSON`. This setting is ignored
466
+ # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
467
+ # formats. Optional.
468
+ #
469
+ # @param [Boolean] new_ignore_unknown New ignore_unknown value
470
+ #
471
+ # @example
472
+ # require "google/cloud/bigquery"
473
+ #
474
+ # bigquery = Google::Cloud::Bigquery.new
475
+ #
476
+ # csv_url = "gs://bucket/path/to/data.csv"
477
+ # csv_table = bigquery.external csv_url do |csv|
478
+ # csv.ignore_unknown = true
479
+ # end
480
+ #
481
+ # csv_table.ignore_unknown #=> true
482
+ #
483
+ def ignore_unknown= new_ignore_unknown
484
+ frozen_check!
485
+ @gapi.ignore_unknown_values = new_ignore_unknown
486
+ end
487
+
488
+ ##
489
+ # The maximum number of bad records that BigQuery can ignore when
490
+ # reading data. If the number of bad records exceeds this value, an
491
+ # invalid error is returned in the job result. The default value is 0,
492
+ # which requires that all records are valid. This setting is ignored
493
+ # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
494
+ # formats.
495
+ #
496
+ # @return [Integer]
497
+ #
498
+ # @example
499
+ # require "google/cloud/bigquery"
500
+ #
501
+ # bigquery = Google::Cloud::Bigquery.new
502
+ #
503
+ # csv_url = "gs://bucket/path/to/data.csv"
504
+ # csv_table = bigquery.external csv_url do |csv|
505
+ # csv.max_bad_records = 10
506
+ # end
507
+ #
508
+ # csv_table.max_bad_records #=> 10
509
+ #
510
+ def max_bad_records
511
+ @gapi.max_bad_records
512
+ end
513
+
514
+ ##
515
+ # Set the maximum number of bad records that BigQuery can ignore when
516
+ # reading data. If the number of bad records exceeds this value, an
517
+ # invalid error is returned in the job result. The default value is 0,
518
+ # which requires that all records are valid. This setting is ignored
519
+ # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
520
+ # formats.
521
+ #
522
+ # @param [Integer] new_max_bad_records New max_bad_records value
523
+ #
524
+ # @example
525
+ # require "google/cloud/bigquery"
526
+ #
527
+ # bigquery = Google::Cloud::Bigquery.new
528
+ #
529
+ # csv_url = "gs://bucket/path/to/data.csv"
530
+ # csv_table = bigquery.external csv_url do |csv|
531
+ # csv.max_bad_records = 10
532
+ # end
533
+ #
534
+ # csv_table.max_bad_records #=> 10
535
+ #
536
+ def max_bad_records= new_max_bad_records
537
+ frozen_check!
538
+ @gapi.max_bad_records = new_max_bad_records
539
+ end
540
+
541
+ ##
542
+ # @private Google API Client object.
543
+ def to_gapi
544
+ @gapi
545
+ end
546
+
547
+ ##
548
+ # @private Google API Client object.
549
+ def self.from_gapi gapi
550
+ new_table = new
551
+ new_table.instance_variable_set :@gapi, gapi
552
+ new_table
553
+ end
554
+
555
+ protected
556
+
557
+ def frozen_check!
558
+ return unless frozen?
559
+ fail ArgumentError, "Cannot modify external data source when frozen"
560
+ end
561
+ end
562
+
563
+ ##
564
+ # # CsvSource
565
+ #
566
+ # {External::CsvSource} is a subclass of {External::DataSource} and
567
+ # represents a CSV external data source that can be queried from
568
+ # directly, such as Google Cloud Storage or Google Drive, even though
569
+ # the data is not stored in BigQuery. Instead of loading or streaming
570
+ # the data, this object references the external data source.
571
+ #
572
+ # @example
573
+ # require "google/cloud/bigquery"
574
+ #
575
+ # bigquery = Google::Cloud::Bigquery.new
576
+ #
577
+ # csv_url = "gs://bucket/path/to/data.csv"
578
+ # csv_table = bigquery.external csv_url do |csv|
579
+ # csv.autodetect = true
580
+ # csv.skip_leading_rows = 1
581
+ # end
582
+ #
583
+ # data = bigquery.query "SELECT * FROM my_ext_table",
584
+ # external: { my_ext_table: csv_table }
585
+ #
586
+ # data.each do |row|
587
+ # puts row[:name]
588
+ # end
589
+ #
590
+ class CsvSource < External::DataSource
591
+ ##
592
+ # @private Create an empty CsvSource object.
593
+ def initialize
594
+ super
595
+ @gapi.csv_options = Google::Apis::BigqueryV2::CsvOptions.new
596
+ end
597
+
598
+ ##
599
+ # Indicates if BigQuery should accept rows that are missing trailing
600
+ # optional columns.
601
+ #
602
+ # @return [Boolean]
603
+ #
604
+ # @example
605
+ # require "google/cloud/bigquery"
606
+ #
607
+ # bigquery = Google::Cloud::Bigquery.new
608
+ #
609
+ # csv_url = "gs://bucket/path/to/data.csv"
610
+ # csv_table = bigquery.external csv_url do |csv|
611
+ # csv.jagged_rows = true
612
+ # end
613
+ #
614
+ # csv_table.jagged_rows #=> true
615
+ #
616
+ def jagged_rows
617
+ @gapi.csv_options.allow_jagged_rows
618
+ end
619
+
620
+ ##
621
+ # Set whether BigQuery should accept rows that are missing trailing
622
+ # optional columns.
623
+ #
624
+ # @param [Boolean] new_jagged_rows New jagged_rows value
625
+ #
626
+ # @example
627
+ # require "google/cloud/bigquery"
628
+ #
629
+ # bigquery = Google::Cloud::Bigquery.new
630
+ #
631
+ # csv_url = "gs://bucket/path/to/data.csv"
632
+ # csv_table = bigquery.external csv_url do |csv|
633
+ # csv.jagged_rows = true
634
+ # end
635
+ #
636
+ # csv_table.jagged_rows #=> true
637
+ #
638
+ def jagged_rows= new_jagged_rows
639
+ frozen_check!
640
+ @gapi.csv_options.allow_jagged_rows = new_jagged_rows
641
+ end
642
+
643
+ ##
644
+ # Indicates if BigQuery should allow quoted data sections that contain
645
+ # newline characters in a CSV file.
646
+ #
647
+ # @return [Boolean]
648
+ #
649
+ # @example
650
+ # require "google/cloud/bigquery"
651
+ #
652
+ # bigquery = Google::Cloud::Bigquery.new
653
+ #
654
+ # csv_url = "gs://bucket/path/to/data.csv"
655
+ # csv_table = bigquery.external csv_url do |csv|
656
+ # csv.quoted_newlines = true
657
+ # end
658
+ #
659
+ # csv_table.quoted_newlines #=> true
660
+ #
661
+ def quoted_newlines
662
+ @gapi.csv_options.allow_quoted_newlines
663
+ end
664
+
665
+ ##
666
+ # Set whether BigQuery should allow quoted data sections that contain
667
+ # newline characters in a CSV file.
668
+ #
669
+ # @param [Boolean] new_quoted_newlines New quoted_newlines value
670
+ #
671
+ # @example
672
+ # require "google/cloud/bigquery"
673
+ #
674
+ # bigquery = Google::Cloud::Bigquery.new
675
+ #
676
+ # csv_url = "gs://bucket/path/to/data.csv"
677
+ # csv_table = bigquery.external csv_url do |csv|
678
+ # csv.quoted_newlines = true
679
+ # end
680
+ #
681
+ # csv_table.quoted_newlines #=> true
682
+ #
683
+ def quoted_newlines= new_quoted_newlines
684
+ frozen_check!
685
+ @gapi.csv_options.allow_quoted_newlines = new_quoted_newlines
686
+ end
687
+
688
+ ##
689
+ # The character encoding of the data.
690
+ #
691
+ # @return [String]
692
+ #
693
+ # @example
694
+ # require "google/cloud/bigquery"
695
+ #
696
+ # bigquery = Google::Cloud::Bigquery.new
697
+ #
698
+ # csv_url = "gs://bucket/path/to/data.csv"
699
+ # csv_table = bigquery.external csv_url do |csv|
700
+ # csv.encoding = "UTF-8"
701
+ # end
702
+ #
703
+ # csv_table.encoding #=> "UTF-8"
704
+ #
705
+ def encoding
706
+ @gapi.csv_options.encoding
707
+ end
708
+
709
+ ##
710
+ # Set the character encoding of the data.
711
+ #
712
+ # @param [String] new_encoding New encoding value
713
+ #
714
+ # @example
715
+ # require "google/cloud/bigquery"
716
+ #
717
+ # bigquery = Google::Cloud::Bigquery.new
718
+ #
719
+ # csv_url = "gs://bucket/path/to/data.csv"
720
+ # csv_table = bigquery.external csv_url do |csv|
721
+ # csv.encoding = "UTF-8"
722
+ # end
723
+ #
724
+ # csv_table.encoding #=> "UTF-8"
725
+ #
726
+ def encoding= new_encoding
727
+ frozen_check!
728
+ @gapi.csv_options.encoding = new_encoding
729
+ end
730
+
731
+ ##
732
+ # Checks if the character encoding of the data is "UTF-8". This is the
733
+ # default.
734
+ #
735
+ # @return [Boolean]
736
+ #
737
+ # @example
738
+ # require "google/cloud/bigquery"
739
+ #
740
+ # bigquery = Google::Cloud::Bigquery.new
741
+ #
742
+ # csv_url = "gs://bucket/path/to/data.csv"
743
+ # csv_table = bigquery.external csv_url do |csv|
744
+ # csv.encoding = "UTF-8"
745
+ # end
746
+ #
747
+ # csv_table.encoding #=> "UTF-8"
748
+ # csv_table.utf8? #=> true
749
+ #
750
+ def utf8?
751
+ return true if encoding.nil?
752
+ encoding == "UTF-8"
753
+ end
754
+
755
+ ##
756
+ # Checks if the character encoding of the data is "ISO-8859-1".
757
+ #
758
+ # @return [Boolean]
759
+ #
760
+ # @example
761
+ # require "google/cloud/bigquery"
762
+ #
763
+ # bigquery = Google::Cloud::Bigquery.new
764
+ #
765
+ # csv_url = "gs://bucket/path/to/data.csv"
766
+ # csv_table = bigquery.external csv_url do |csv|
767
+ # csv.encoding = "ISO-8859-1"
768
+ # end
769
+ #
770
+ # csv_table.encoding #=> "ISO-8859-1"
771
+ # csv_table.iso8859_1? #=> true
772
+ #
773
+ def iso8859_1?
774
+ encoding == "ISO-8859-1"
775
+ end
776
+
777
+ ##
778
+ # The separator for fields in a CSV file.
779
+ #
780
+ # @return [String]
781
+ #
782
+ # @example
783
+ # require "google/cloud/bigquery"
784
+ #
785
+ # bigquery = Google::Cloud::Bigquery.new
786
+ #
787
+ # csv_url = "gs://bucket/path/to/data.csv"
788
+ # csv_table = bigquery.external csv_url do |csv|
789
+ # csv.delimiter = "|"
790
+ # end
791
+ #
792
+ # csv_table.delimiter #=> "|"
793
+ #
794
+ def delimiter
795
+ @gapi.csv_options.field_delimiter
796
+ end
797
+
798
+ ##
799
+ # Set the separator for fields in a CSV file.
800
+ #
801
+ # @param [String] new_delimiter New delimiter value
802
+ #
803
+ # @example
804
+ # require "google/cloud/bigquery"
805
+ #
806
+ # bigquery = Google::Cloud::Bigquery.new
807
+ #
808
+ # csv_url = "gs://bucket/path/to/data.csv"
809
+ # csv_table = bigquery.external csv_url do |csv|
810
+ # csv.delimiter = "|"
811
+ # end
812
+ #
813
+ # csv_table.delimiter #=> "|"
814
+ #
815
+ def delimiter= new_delimiter
816
+ frozen_check!
817
+ @gapi.csv_options.field_delimiter = new_delimiter
818
+ end
819
+
820
+ ##
821
+ # The value that is used to quote data sections in a CSV file.
822
+ #
823
+ # @return [String]
824
+ #
825
+ # @example
826
+ # require "google/cloud/bigquery"
827
+ #
828
+ # bigquery = Google::Cloud::Bigquery.new
829
+ #
830
+ # csv_url = "gs://bucket/path/to/data.csv"
831
+ # csv_table = bigquery.external csv_url do |csv|
832
+ # csv.quote = "'"
833
+ # end
834
+ #
835
+ # csv_table.quote #=> "'"
836
+ #
837
+ def quote
838
+ @gapi.csv_options.quote
839
+ end
840
+
841
+ ##
842
+ # Set the value that is used to quote data sections in a CSV file.
843
+ #
844
+ # @param [String] new_quote New quote value
845
+ #
846
+ # @example
847
+ # require "google/cloud/bigquery"
848
+ #
849
+ # bigquery = Google::Cloud::Bigquery.new
850
+ #
851
+ # csv_url = "gs://bucket/path/to/data.csv"
852
+ # csv_table = bigquery.external csv_url do |csv|
853
+ # csv.quote = "'"
854
+ # end
855
+ #
856
+ # csv_table.quote #=> "'"
857
+ #
858
+ def quote= new_quote
859
+ frozen_check!
860
+ @gapi.csv_options.quote = new_quote
861
+ end
862
+
863
+ ##
864
+ # The number of rows at the top of a CSV file that BigQuery will skip
865
+ # when reading the data.
866
+ #
867
+ # @return [Integer]
868
+ #
869
+ # @example
870
+ # require "google/cloud/bigquery"
871
+ #
872
+ # bigquery = Google::Cloud::Bigquery.new
873
+ #
874
+ # csv_url = "gs://bucket/path/to/data.csv"
875
+ # csv_table = bigquery.external csv_url do |csv|
876
+ # csv.skip_leading_rows = 1
877
+ # end
878
+ #
879
+ # csv_table.skip_leading_rows #=> 1
880
+ #
881
+ def skip_leading_rows
882
+ @gapi.csv_options.skip_leading_rows
883
+ end
884
+
885
+ ##
886
+ # Set the number of rows at the top of a CSV file that BigQuery will
887
+ # skip when reading the data.
888
+ #
889
+ # @param [Integer] row_count New skip_leading_rows value
890
+ #
891
+ # @example
892
+ # require "google/cloud/bigquery"
893
+ #
894
+ # bigquery = Google::Cloud::Bigquery.new
895
+ #
896
+ # csv_url = "gs://bucket/path/to/data.csv"
897
+ # csv_table = bigquery.external csv_url do |csv|
898
+ # csv.skip_leading_rows = 1
899
+ # end
900
+ #
901
+ # csv_table.skip_leading_rows #=> 1
902
+ #
903
+ def skip_leading_rows= row_count
904
+ frozen_check!
905
+ @gapi.csv_options.skip_leading_rows = row_count
906
+ end
907
+
908
+ ##
909
+ # The schema for the data.
910
+ #
911
+ # @param [Boolean] replace Whether to replace the existing schema with
912
+ # the new schema. If `true`, the fields will replace the existing
913
+ # schema. If `false`, the fields will be added to the existing
914
+ # schema. The default value is `false`.
915
+ # @yield [schema] a block for setting the schema
916
+ # @yieldparam [Schema] schema the object accepting the schema
917
+ #
918
+ # @return [Google::Cloud::Bigquery::Schema]
919
+ #
920
+ # @example
921
+ # require "google/cloud/bigquery"
922
+ #
923
+ # bigquery = Google::Cloud::Bigquery.new
924
+ #
925
+ # csv_url = "gs://bucket/path/to/data.csv"
926
+ # csv_table = bigquery.external csv_url do |csv|
927
+ # csv.schema do |schema|
928
+ # schema.string "name", mode: :required
929
+ # schema.string "email", mode: :required
930
+ # schema.integer "age", mode: :required
931
+ # schema.boolean "active", mode: :required
932
+ # end
933
+ # end
934
+ #
935
+ def schema replace: false
936
+ @schema ||= Schema.from_gapi @gapi.schema
937
+ if replace
938
+ frozen_check!
939
+ @schema = Schema.from_gapi
940
+ end
941
+ @schema.freeze if frozen?
942
+ yield @schema if block_given?
943
+ @schema
944
+ end
945
+
946
+ ##
947
+ # Set the schema for the data.
948
+ #
949
+ # @param [Schema] new_schema The schema object.
950
+ #
951
+ # @example
952
+ # require "google/cloud/bigquery"
953
+ #
954
+ # bigquery = Google::Cloud::Bigquery.new
955
+ #
956
+ # csv_shema = bigquery.schema do |schema|
957
+ # schema.string "name", mode: :required
958
+ # schema.string "email", mode: :required
959
+ # schema.integer "age", mode: :required
960
+ # schema.boolean "active", mode: :required
961
+ # end
962
+ #
963
+ # csv_url = "gs://bucket/path/to/data.csv"
964
+ # csv_table = bigquery.external csv_url
965
+ # csv_table.schema = csv_shema
966
+ #
967
+ def schema= new_schema
968
+ frozen_check!
969
+ @schema = new_schema
970
+ end
971
+
972
+ ##
973
+ # The fields of the schema.
974
+ #
975
+ def fields
976
+ schema.fields
977
+ end
978
+
979
+ ##
980
+ # The names of the columns in the schema.
981
+ #
982
+ def headers
983
+ schema.headers
984
+ end
985
+
986
+ ##
987
+ # @private Google API Client object.
988
+ def to_gapi
989
+ @gapi.schema = @schema.to_gapi if @schema
990
+ @gapi
991
+ end
992
+
993
+ ##
994
+ # @private Google API Client object.
995
+ def self.from_gapi gapi
996
+ new_table = super
997
+ schema = Schema.from_gapi gapi.schema
998
+ new_table.instance_variable_set :@schema, schema
999
+ new_table
1000
+ end
1001
+ end
1002
+
1003
+ ##
1004
+ # # JsonSource
1005
+ #
1006
+ # {External::JsonSource} is a subclass of {External::DataSource} and
1007
+ # represents a JSON external data source that can be queried from
1008
+ # directly, such as Google Cloud Storage or Google Drive, even though
1009
+ # the data is not stored in BigQuery. Instead of loading or streaming
1010
+ # the data, this object references the external data source.
1011
+ #
1012
+ # @example
1013
+ # require "google/cloud/bigquery"
1014
+ #
1015
+ # bigquery = Google::Cloud::Bigquery.new
1016
+ #
1017
+ # require "google/cloud/bigquery"
1018
+ #
1019
+ # bigquery = Google::Cloud::Bigquery.new
1020
+ #
1021
+ # json_url = "gs://bucket/path/to/data.json"
1022
+ # json_table = bigquery.external json_url do |json|
1023
+ # json.schema do |schema|
1024
+ # schema.string "name", mode: :required
1025
+ # schema.string "email", mode: :required
1026
+ # schema.integer "age", mode: :required
1027
+ # schema.boolean "active", mode: :required
1028
+ # end
1029
+ # end
1030
+ #
1031
+ # data = bigquery.query "SELECT * FROM my_ext_table",
1032
+ # external: { my_ext_table: json_table }
1033
+ #
1034
+ # data.each do |row|
1035
+ # puts row[:name]
1036
+ # end
1037
+ #
1038
+ class JsonSource < External::DataSource
1039
+ ##
1040
+ # The schema for the data.
1041
+ #
1042
+ # @param [Boolean] replace Whether to replace the existing schema with
1043
+ # the new schema. If `true`, the fields will replace the existing
1044
+ # schema. If `false`, the fields will be added to the existing
1045
+ # schema. The default value is `false`.
1046
+ # @yield [schema] a block for setting the schema
1047
+ # @yieldparam [Schema] schema the object accepting the schema
1048
+ #
1049
+ # @return [Google::Cloud::Bigquery::Schema]
1050
+ #
1051
+ # @example
1052
+ # require "google/cloud/bigquery"
1053
+ #
1054
+ # bigquery = Google::Cloud::Bigquery.new
1055
+ #
1056
+ # json_url = "gs://bucket/path/to/data.json"
1057
+ # json_table = bigquery.external json_url do |json|
1058
+ # json.schema do |schema|
1059
+ # schema.string "name", mode: :required
1060
+ # schema.string "email", mode: :required
1061
+ # schema.integer "age", mode: :required
1062
+ # schema.boolean "active", mode: :required
1063
+ # end
1064
+ # end
1065
+ #
1066
+ def schema replace: false
1067
+ @schema ||= Schema.from_gapi @gapi.schema
1068
+ if replace
1069
+ frozen_check!
1070
+ @schema = Schema.from_gapi
1071
+ end
1072
+ @schema.freeze if frozen?
1073
+ yield @schema if block_given?
1074
+ @schema
1075
+ end
1076
+
1077
+ ##
1078
+ # Set the schema for the data.
1079
+ #
1080
+ # @param [Schema] new_schema The schema object.
1081
+ #
1082
+ # @example
1083
+ # require "google/cloud/bigquery"
1084
+ #
1085
+ # bigquery = Google::Cloud::Bigquery.new
1086
+ #
1087
+ # json_shema = bigquery.schema do |schema|
1088
+ # schema.string "name", mode: :required
1089
+ # schema.string "email", mode: :required
1090
+ # schema.integer "age", mode: :required
1091
+ # schema.boolean "active", mode: :required
1092
+ # end
1093
+ #
1094
+ # json_url = "gs://bucket/path/to/data.json"
1095
+ # json_table = bigquery.external json_url
1096
+ # json_table.schema = json_shema
1097
+ #
1098
+ def schema= new_schema
1099
+ frozen_check!
1100
+ @schema = new_schema
1101
+ end
1102
+
1103
+ ##
1104
+ # The fields of the schema.
1105
+ #
1106
+ def fields
1107
+ schema.fields
1108
+ end
1109
+
1110
+ ##
1111
+ # The names of the columns in the schema.
1112
+ #
1113
+ def headers
1114
+ schema.headers
1115
+ end
1116
+
1117
+ ##
1118
+ # @private Google API Client object.
1119
+ def to_gapi
1120
+ @gapi.schema = @schema.to_gapi if @schema
1121
+ @gapi
1122
+ end
1123
+
1124
+ ##
1125
+ # @private Google API Client object.
1126
+ def self.from_gapi gapi
1127
+ new_table = super
1128
+ schema = Schema.from_gapi gapi.schema
1129
+ new_table.instance_variable_set :@schema, schema
1130
+ new_table
1131
+ end
1132
+ end
1133
+
1134
+ ##
1135
+ # # SheetsSource
1136
+ #
1137
+ # {External::SheetsSource} is a subclass of {External::DataSource} and
1138
+ # represents a Google Sheets external data source that can be queried
1139
+ # from directly, even though the data is not stored in BigQuery. Instead
1140
+ # of loading or streaming the data, this object references the external
1141
+ # data source.
1142
+ #
1143
+ # @example
1144
+ # require "google/cloud/bigquery"
1145
+ #
1146
+ # bigquery = Google::Cloud::Bigquery.new
1147
+ #
1148
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
1149
+ # sheets_table = bigquery.external sheets_url do |sheets|
1150
+ # sheets.skip_leading_rows = 1
1151
+ # end
1152
+ #
1153
+ # data = bigquery.query "SELECT * FROM my_ext_table",
1154
+ # external: { my_ext_table: sheets_table }
1155
+ #
1156
+ # data.each do |row|
1157
+ # puts row[:name]
1158
+ # end
1159
+ #
1160
+ class SheetsSource < External::DataSource
1161
+ ##
1162
+ # @private Create an empty SheetsSource object.
1163
+ def initialize
1164
+ super
1165
+ @gapi.google_sheets_options = \
1166
+ Google::Apis::BigqueryV2::GoogleSheetsOptions.new
1167
+ end
1168
+
1169
+ ##
1170
+ # The number of rows at the top of a sheet that BigQuery will skip
1171
+ # when reading the data. The default value is `0`.
1172
+ #
1173
+ # This property is useful if you have header rows that should be
1174
+ # skipped. When `autodetect` is on, behavior is the following:
1175
+ #
1176
+ # * `nil` - Autodetect tries to detect headers in the first row. If
1177
+ # they are not detected, the row is read as data. Otherwise data is
1178
+ # read starting from the second row.
1179
+ # * `0` - Instructs autodetect that there are no headers and data
1180
+ # should be read starting from the first row.
1181
+ # * `N > 0` - Autodetect skips `N-1` rows and tries to detect headers
1182
+ # in row `N`. If headers are not detected, row `N` is just skipped.
1183
+ # Otherwise row `N` is used to extract column names for the detected
1184
+ # schema.
1185
+ #
1186
+ # @return [Integer]
1187
+ #
1188
+ # @example
1189
+ # require "google/cloud/bigquery"
1190
+ #
1191
+ # bigquery = Google::Cloud::Bigquery.new
1192
+ #
1193
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
1194
+ # sheets_table = bigquery.external sheets_url do |sheets|
1195
+ # sheets.skip_leading_rows = 1
1196
+ # end
1197
+ #
1198
+ # sheets_table.skip_leading_rows #=> 1
1199
+ #
1200
+ def skip_leading_rows
1201
+ @gapi.google_sheets_options.skip_leading_rows
1202
+ end
1203
+
1204
+ ##
1205
+ # Set the number of rows at the top of a sheet that BigQuery will skip
1206
+ # when reading the data.
1207
+ #
1208
+ # @param [Integer] row_count New skip_leading_rows value
1209
+ #
1210
+ # @example
1211
+ # require "google/cloud/bigquery"
1212
+ #
1213
+ # bigquery = Google::Cloud::Bigquery.new
1214
+ #
1215
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
1216
+ # sheets_table = bigquery.external sheets_url do |sheets|
1217
+ # sheets.skip_leading_rows = 1
1218
+ # end
1219
+ #
1220
+ # sheets_table.skip_leading_rows #=> 1
1221
+ #
1222
+ def skip_leading_rows= row_count
1223
+ frozen_check!
1224
+ @gapi.google_sheets_options.skip_leading_rows = row_count
1225
+ end
1226
+ end
1227
+
1228
+ ##
1229
+ # # BigtableSource
1230
+ #
1231
+ # {External::BigtableSource} is a subclass of {External::DataSource} and
1232
+ # represents a Bigtable external data source that can be queried from
1233
+ # directly, even though the data is not stored in BigQuery. Instead of
1234
+ # loading or streaming the data, this object references the external
1235
+ # data source.
1236
+ #
1237
+ # @example
1238
+ # require "google/cloud/bigquery"
1239
+ #
1240
+ # bigquery = Google::Cloud::Bigquery.new
1241
+ #
1242
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1243
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1244
+ # bt.rowkey_as_string = true
1245
+ # bt.add_family "user" do |u|
1246
+ # u.add_string "name"
1247
+ # u.add_string "email"
1248
+ # u.add_integer "age"
1249
+ # u.add_boolean "active"
1250
+ # end
1251
+ # end
1252
+ #
1253
+ # data = bigquery.query "SELECT * FROM my_ext_table",
1254
+ # external: { my_ext_table: bigtable_table }
1255
+ #
1256
+ # data.each do |row|
1257
+ # puts row[:name]
1258
+ # end
1259
+ #
1260
+ class BigtableSource < External::DataSource
1261
+ ##
1262
+ # @private Create an empty BigtableSource object.
1263
+ def initialize
1264
+ super
1265
+ @gapi.bigtable_options = \
1266
+ Google::Apis::BigqueryV2::BigtableOptions.new
1267
+ @families = []
1268
+ end
1269
+
1270
+ ##
1271
+ # List of column families to expose in the table schema along with
1272
+ # their types. This list restricts the column families that can be
1273
+ # referenced in queries and specifies their value types. You can use
1274
+ # this list to do type conversions - see
1275
+ # {BigtableSource::ColumnFamily#type} for more details. If you leave
1276
+ # this list empty, all column families are present in the table schema
1277
+ # and their values are read as `BYTES`. During a query only the column
1278
+ # families referenced in that query are read from Bigtable.
1279
+ #
1280
+ # @return [Array<BigtableSource::ColumnFamily>]
1281
+ #
1282
+ # @example
1283
+ # require "google/cloud/bigquery"
1284
+ #
1285
+ # bigquery = Google::Cloud::Bigquery.new
1286
+ #
1287
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1288
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1289
+ # bt.rowkey_as_string = true
1290
+ # bt.add_family "user" do |u|
1291
+ # u.add_string "name"
1292
+ # u.add_string "email"
1293
+ # u.add_integer "age"
1294
+ # u.add_boolean "active"
1295
+ # end
1296
+ # end
1297
+ #
1298
+ # bigtable_table.families.count #=> 1
1299
+ #
1300
+ def families
1301
+ @families
1302
+ end
1303
+
1304
+ ##
1305
+ # Add a column family to expose in the table schema along with its
1306
+ # types. Columns belonging to the column family may also be exposed.
1307
+ #
1308
+ # @param [String] family_id Identifier of the column family. See
1309
+ # {BigtableSource::ColumnFamily#family_id}.
1310
+ # @param [String] encoding The encoding of the values when the type is
1311
+ # not `STRING`. See {BigtableSource::ColumnFamily#encoding}.
1312
+ # @param [Boolean] latest Whether only the latest version of value are
1313
+ # exposed for all columns in this column family. See
1314
+ # {BigtableSource::ColumnFamily#latest}.
1315
+ # @param [String] type The type to convert the value in cells of this
1316
+ # column. See {BigtableSource::ColumnFamily#type}.
1317
+ #
1318
+ # @yield [family] a block for setting the family
1319
+ # @yieldparam [BigtableSource::ColumnFamily] family the family object
1320
+ #
1321
+ # @return [BigtableSource::ColumnFamily]
1322
+ #
1323
+ # @example
1324
+ # require "google/cloud/bigquery"
1325
+ #
1326
+ # bigquery = Google::Cloud::Bigquery.new
1327
+ #
1328
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1329
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1330
+ # bt.rowkey_as_string = true
1331
+ # bt.add_family "user" do |u|
1332
+ # u.add_string "name"
1333
+ # u.add_string "email"
1334
+ # u.add_integer "age"
1335
+ # u.add_boolean "active"
1336
+ # end
1337
+ # end
1338
+ #
1339
+ def add_family family_id, encoding: nil, latest: nil, type: nil
1340
+ frozen_check!
1341
+ fam = BigtableSource::ColumnFamily.new
1342
+ fam.family_id = family_id
1343
+ fam.encoding = encoding if encoding
1344
+ fam.latest = latest if latest
1345
+ fam.type = type if type
1346
+ yield fam if block_given?
1347
+ @families << fam
1348
+ fam
1349
+ end
1350
+
1351
+ ##
1352
+ # Whether the rowkey column families will be read and converted to
1353
+ # string. Otherwise they are read with `BYTES` type values and users
1354
+ # need to manually cast them with `CAST` if necessary. The default
1355
+ # value is `false`.
1356
+ #
1357
+ # @return [Boolean]
1358
+ #
1359
+ # @example
1360
+ # require "google/cloud/bigquery"
1361
+ #
1362
+ # bigquery = Google::Cloud::Bigquery.new
1363
+ #
1364
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1365
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1366
+ # bt.rowkey_as_string = true
1367
+ # end
1368
+ #
1369
+ # bigtable_table.rowkey_as_string #=> true
1370
+ #
1371
+ def rowkey_as_string
1372
+ @gapi.bigtable_options.read_rowkey_as_string
1373
+ end
1374
+
1375
+ ##
1376
+ # Set the number of rows at the top of a sheet that BigQuery will skip
1377
+ # when reading the data.
1378
+ #
1379
+ # @param [Boolean] row_rowkey New rowkey_as_string value
1380
+ #
1381
+ # @example
1382
+ # require "google/cloud/bigquery"
1383
+ #
1384
+ # bigquery = Google::Cloud::Bigquery.new
1385
+ #
1386
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1387
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1388
+ # bt.rowkey_as_string = true
1389
+ # end
1390
+ #
1391
+ # bigtable_table.rowkey_as_string #=> true
1392
+ #
1393
+ def rowkey_as_string= row_rowkey
1394
+ frozen_check!
1395
+ @gapi.bigtable_options.read_rowkey_as_string = row_rowkey
1396
+ end
1397
+
1398
+ ##
1399
+ # @private Google API Client object.
1400
+ def to_gapi
1401
+ @gapi.bigtable_options.column_families = @families.map(&:to_gapi)
1402
+ @gapi
1403
+ end
1404
+
1405
+ ##
1406
+ # @private Google API Client object.
1407
+ def self.from_gapi gapi
1408
+ new_table = super
1409
+ families = Array gapi.bigtable_options.column_families
1410
+ families = families.map do |fam_gapi|
1411
+ BigtableSource::ColumnFamily.from_gapi fam_gapi
1412
+ end
1413
+ new_table.instance_variable_set :@families, families
1414
+ new_table
1415
+ end
1416
+
1417
+ ##
1418
+ # @private
1419
+ def freeze
1420
+ @families.map(&:freeze!)
1421
+ @families.freeze!
1422
+ super
1423
+ end
1424
+
1425
+ protected
1426
+
1427
+ def frozen_check!
1428
+ return unless frozen?
1429
+ fail ArgumentError, "Cannot modify external data source when frozen"
1430
+ end
1431
+
1432
+ ##
1433
+ # # BigtableSource::ColumnFamily
1434
+ #
1435
+ # A Bigtable column family used to expose in the table schema along
1436
+ # with its types and columns.
1437
+ #
1438
+ # @example
1439
+ # require "google/cloud/bigquery"
1440
+ #
1441
+ # bigquery = Google::Cloud::Bigquery.new
1442
+ #
1443
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1444
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1445
+ # bt.rowkey_as_string = true
1446
+ # bt.add_family "user" do |u|
1447
+ # u.add_string "name"
1448
+ # u.add_string "email"
1449
+ # u.add_integer "age"
1450
+ # u.add_boolean "active"
1451
+ # end
1452
+ # end
1453
+ #
1454
+ # data = bigquery.query "SELECT * FROM my_ext_table",
1455
+ # external: { my_ext_table: bigtable_table }
1456
+ #
1457
+ # data.each do |row|
1458
+ # puts row[:name]
1459
+ # end
1460
+ #
1461
+ class ColumnFamily
1462
+ ##
1463
+ # @private Create an empty BigtableSource::ColumnFamily object.
1464
+ def initialize
1465
+ @gapi = Google::Apis::BigqueryV2::BigtableColumnFamily.new
1466
+ @columns = []
1467
+ end
1468
+
1469
+ ##
1470
+ # The encoding of the values when the type is not `STRING`.
1471
+ #
1472
+ # @return [String]
1473
+ #
1474
+ # @example
1475
+ # require "google/cloud/bigquery"
1476
+ #
1477
+ # bigquery = Google::Cloud::Bigquery.new
1478
+ #
1479
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1480
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1481
+ # bt.add_family "user" do |u|
1482
+ # u.encoding = "UTF-8"
1483
+ # end
1484
+ # end
1485
+ #
1486
+ # bigtable_table.families[0].encoding #=> "UTF-8"
1487
+ #
1488
+ def encoding
1489
+ @gapi.encoding
1490
+ end
1491
+
1492
+ ##
1493
+ # Set the encoding of the values when the type is not `STRING`.
1494
+ # Acceptable encoding values are:
1495
+ #
1496
+ # * `TEXT` - indicates values are alphanumeric text strings.
1497
+ # * `BINARY` - indicates values are encoded using HBase
1498
+ # `Bytes.toBytes` family of functions. This can be overridden on a
1499
+ # column.
1500
+ #
1501
+ # @param [String] new_encoding New encoding value
1502
+ #
1503
+ # @example
1504
+ # require "google/cloud/bigquery"
1505
+ #
1506
+ # bigquery = Google::Cloud::Bigquery.new
1507
+ #
1508
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1509
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1510
+ # bt.add_family "user" do |u|
1511
+ # u.encoding = "UTF-8"
1512
+ # end
1513
+ # end
1514
+ #
1515
+ # bigtable_table.families[0].encoding #=> "UTF-8"
1516
+ #
1517
+ def encoding= new_encoding
1518
+ frozen_check!
1519
+ @gapi.encoding = new_encoding
1520
+ end
1521
+
1522
+ ##
1523
+ # Identifier of the column family.
1524
+ #
1525
+ # @return [String]
1526
+ #
1527
+ # @example
1528
+ # require "google/cloud/bigquery"
1529
+ #
1530
+ # bigquery = Google::Cloud::Bigquery.new
1531
+ #
1532
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1533
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1534
+ # bt.add_family "user"
1535
+ # end
1536
+ #
1537
+ # bigtable_table.families[0].family_id #=> "user"
1538
+ #
1539
+ def family_id
1540
+ @gapi.family_id
1541
+ end
1542
+
1543
+ ##
1544
+ # Set the identifier of the column family.
1545
+ #
1546
+ # @param [String] new_family_id New family_id value
1547
+ #
1548
+ # @example
1549
+ # require "google/cloud/bigquery"
1550
+ #
1551
+ # bigquery = Google::Cloud::Bigquery.new
1552
+ #
1553
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1554
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1555
+ # bt.add_family "user"
1556
+ # end
1557
+ #
1558
+ # bigtable_table.families[0].family_id #=> "user"
1559
+ # bigtable_table.families[0].family_id = "User"
1560
+ # bigtable_table.families[0].family_id #=> "User"
1561
+ #
1562
+ def family_id= new_family_id
1563
+ frozen_check!
1564
+ @gapi.family_id = new_family_id
1565
+ end
1566
+
1567
+ ##
1568
+ # Whether only the latest version of value are exposed for all
1569
+ # columns in this column family.
1570
+ #
1571
+ # @return [Boolean]
1572
+ #
1573
+ # @example
1574
+ # require "google/cloud/bigquery"
1575
+ #
1576
+ # bigquery = Google::Cloud::Bigquery.new
1577
+ #
1578
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1579
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1580
+ # bt.add_family "user" do |u|
1581
+ # u.latest = true
1582
+ # end
1583
+ # end
1584
+ #
1585
+ # bigtable_table.families[0].latest #=> true
1586
+ #
1587
+ def latest
1588
+ @gapi.only_read_latest
1589
+ end
1590
+
1591
+ ##
1592
+ # Set whether only the latest version of value are exposed for all
1593
+ # columns in this column family.
1594
+ #
1595
+ # @param [Boolean] new_latest New latest value
1596
+ #
1597
+ # @example
1598
+ # require "google/cloud/bigquery"
1599
+ #
1600
+ # bigquery = Google::Cloud::Bigquery.new
1601
+ #
1602
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1603
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1604
+ # bt.add_family "user" do |u|
1605
+ # u.latest = true
1606
+ # end
1607
+ # end
1608
+ #
1609
+ # bigtable_table.families[0].latest #=> true
1610
+ #
1611
+ def latest= new_latest
1612
+ frozen_check!
1613
+ @gapi.only_read_latest = new_latest
1614
+ end
1615
+
1616
+ ##
1617
+ # The type to convert the value in cells of this column family. The
1618
+ # values are expected to be encoded using HBase `Bytes.toBytes`
1619
+ # function when using the `BINARY` encoding value. The following
1620
+ # BigQuery types are allowed:
1621
+ #
1622
+ # * `BYTES`
1623
+ # * `STRING`
1624
+ # * `INTEGER`
1625
+ # * `FLOAT`
1626
+ # * `BOOLEAN`
1627
+ #
1628
+ # Default type is `BYTES`. This can be overridden on a column.
1629
+ #
1630
+ # @return [String]
1631
+ #
1632
+ # @example
1633
+ # require "google/cloud/bigquery"
1634
+ #
1635
+ # bigquery = Google::Cloud::Bigquery.new
1636
+ #
1637
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1638
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1639
+ # bt.add_family "user" do |u|
1640
+ # u.type = "STRING"
1641
+ # end
1642
+ # end
1643
+ #
1644
+ # bigtable_table.families[0].type #=> "STRING"
1645
+ #
1646
+ def type
1647
+ @gapi.type
1648
+ end
1649
+
1650
+ ##
1651
+ # Set the type to convert the value in cells of this column family.
1652
+ # The values are expected to be encoded using HBase `Bytes.toBytes`
1653
+ # function when using the `BINARY` encoding value. The following
1654
+ # BigQuery types are allowed:
1655
+ #
1656
+ # * `BYTES`
1657
+ # * `STRING`
1658
+ # * `INTEGER`
1659
+ # * `FLOAT`
1660
+ # * `BOOLEAN`
1661
+ #
1662
+ # Default type is `BYTES`. This can be overridden on a column.
1663
+ #
1664
+ # @param [String] new_type New type value
1665
+ #
1666
+ # @example
1667
+ # require "google/cloud/bigquery"
1668
+ #
1669
+ # bigquery = Google::Cloud::Bigquery.new
1670
+ #
1671
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1672
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1673
+ # bt.add_family "user" do |u|
1674
+ # u.type = "STRING"
1675
+ # end
1676
+ # end
1677
+ #
1678
+ # bigtable_table.families[0].type #=> "STRING"
1679
+ #
1680
+ def type= new_type
1681
+ frozen_check!
1682
+ @gapi.type = new_type
1683
+ end
1684
+
1685
+ ##
1686
+ # Lists of columns that should be exposed as individual fields.
1687
+ #
1688
+ # @return [Array<BigtableSource::Column>]
1689
+ #
1690
+ # @example
1691
+ # require "google/cloud/bigquery"
1692
+ #
1693
+ # bigquery = Google::Cloud::Bigquery.new
1694
+ #
1695
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1696
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1697
+ # bt.rowkey_as_string = true
1698
+ # bt.add_family "user" do |u|
1699
+ # u.add_string "name"
1700
+ # u.add_string "email"
1701
+ # u.add_integer "age"
1702
+ # u.add_boolean "active"
1703
+ # end
1704
+ # end
1705
+ #
1706
+ # bigtable_table.families[0].columns.count #=> 4
1707
+ #
1708
+ def columns
1709
+ @columns
1710
+ end
1711
+
1712
+ ##
1713
+ # Add a column to the column family to expose in the table schema
1714
+ # along with its types.
1715
+ #
1716
+ # @param [String] qualifier Qualifier of the column. See
1717
+ # {BigtableSource::Column#qualifier}.
1718
+ # @param [String] as A valid identifier to be used as the column
1719
+ # field name if the qualifier is not a valid BigQuery field
1720
+ # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1721
+ # {BigtableSource::Column#field_name}.
1722
+ # @param [String] type The type to convert the value in cells of
1723
+ # this column. See {BigtableSource::Column#type}. The following
1724
+ # BigQuery types are allowed:
1725
+ #
1726
+ # * `BYTES`
1727
+ # * `STRING`
1728
+ # * `INTEGER`
1729
+ # * `FLOAT`
1730
+ # * `BOOLEAN`
1731
+ #
1732
+ # @yield [column] a block for setting the column
1733
+ # @yieldparam [BigtableSource::Column] column the column object
1734
+ #
1735
+ # @return [Array<BigtableSource::Column>]
1736
+ #
1737
+ # @example
1738
+ # require "google/cloud/bigquery"
1739
+ #
1740
+ # bigquery = Google::Cloud::Bigquery.new
1741
+ #
1742
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1743
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1744
+ # bt.rowkey_as_string = true
1745
+ # bt.add_family "user" do |u|
1746
+ # u.add_column "name", type: "STRING"
1747
+ # end
1748
+ # end
1749
+ #
1750
+ def add_column qualifier, as: nil, type: nil
1751
+ frozen_check!
1752
+ col = BigtableSource::Column.new
1753
+ col.qualifier = qualifier
1754
+ col.field_name = as if as
1755
+ col.type = type if type
1756
+ yield col if block_given?
1757
+ @columns << col
1758
+ col
1759
+ end
1760
+
1761
+ ##
1762
+ # Add a column to the column family to expose in the table schema
1763
+ # that is specified as the `BYTES` type.
1764
+ #
1765
+ # @param [String] qualifier Qualifier of the column. See
1766
+ # {BigtableSource::Column#qualifier}.
1767
+ # @param [String] as A valid identifier to be used as the column
1768
+ # field name if the qualifier is not a valid BigQuery field
1769
+ # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1770
+ # {BigtableSource::Column#field_name}.
1771
+ #
1772
+ # @yield [column] a block for setting the column
1773
+ # @yieldparam [BigtableSource::Column] column the column object
1774
+ #
1775
+ # @return [Array<BigtableSource::Column>]
1776
+ #
1777
+ # @example
1778
+ # require "google/cloud/bigquery"
1779
+ #
1780
+ # bigquery = Google::Cloud::Bigquery.new
1781
+ #
1782
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1783
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1784
+ # bt.rowkey_as_string = true
1785
+ # bt.add_family "user" do |u|
1786
+ # u.add_bytes "avatar"
1787
+ # end
1788
+ # end
1789
+ #
1790
+ def add_bytes qualifier, as: nil
1791
+ col = add_column qualifier, as: as, type: "BYTES"
1792
+ yield col if block_given?
1793
+ col
1794
+ end
1795
+
1796
+ ##
1797
+ # Add a column to the column family to expose in the table schema
1798
+ # that is specified as the `STRING` type.
1799
+ #
1800
+ # @param [String] qualifier Qualifier of the column. See
1801
+ # {BigtableSource::Column#qualifier}.
1802
+ # @param [String] as A valid identifier to be used as the column
1803
+ # field name if the qualifier is not a valid BigQuery field
1804
+ # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1805
+ # {BigtableSource::Column#field_name}.
1806
+ #
1807
+ # @yield [column] a block for setting the column
1808
+ # @yieldparam [BigtableSource::Column] column the column object
1809
+ #
1810
+ # @return [Array<BigtableSource::Column>]
1811
+ #
1812
+ # @example
1813
+ # require "google/cloud/bigquery"
1814
+ #
1815
+ # bigquery = Google::Cloud::Bigquery.new
1816
+ #
1817
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1818
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1819
+ # bt.rowkey_as_string = true
1820
+ # bt.add_family "user" do |u|
1821
+ # u.add_string "name"
1822
+ # end
1823
+ # end
1824
+ #
1825
+ def add_string qualifier, as: nil
1826
+ col = add_column qualifier, as: as, type: "STRING"
1827
+ yield col if block_given?
1828
+ col
1829
+ end
1830
+
1831
+ ##
1832
+ # Add a column to the column family to expose in the table schema
1833
+ # that is specified as the `INTEGER` type.
1834
+ #
1835
+ # @param [String] qualifier Qualifier of the column. See
1836
+ # {BigtableSource::Column#qualifier}.
1837
+ # @param [String] as A valid identifier to be used as the column
1838
+ # field name if the qualifier is not a valid BigQuery field
1839
+ # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1840
+ # {BigtableSource::Column#field_name}.
1841
+ #
1842
+ # @yield [column] a block for setting the column
1843
+ # @yieldparam [BigtableSource::Column] column the column object
1844
+ #
1845
+ # @return [Array<BigtableSource::Column>]
1846
+ #
1847
+ # @example
1848
+ # require "google/cloud/bigquery"
1849
+ #
1850
+ # bigquery = Google::Cloud::Bigquery.new
1851
+ #
1852
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1853
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1854
+ # bt.rowkey_as_string = true
1855
+ # bt.add_family "user" do |u|
1856
+ # u.add_integer "age"
1857
+ # end
1858
+ # end
1859
+ #
1860
+ def add_integer qualifier, as: nil
1861
+ col = add_column qualifier, as: as, type: "INTEGER"
1862
+ yield col if block_given?
1863
+ col
1864
+ end
1865
+
1866
+ ##
1867
+ # Add a column to the column family to expose in the table schema
1868
+ # that is specified as the `FLOAT` type.
1869
+ #
1870
+ # @param [String] qualifier Qualifier of the column. See
1871
+ # {BigtableSource::Column#qualifier}.
1872
+ # @param [String] as A valid identifier to be used as the column
1873
+ # field name if the qualifier is not a valid BigQuery field
1874
+ # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1875
+ # {BigtableSource::Column#field_name}.
1876
+ #
1877
+ # @yield [column] a block for setting the column
1878
+ # @yieldparam [BigtableSource::Column] column the column object
1879
+ #
1880
+ # @return [Array<BigtableSource::Column>]
1881
+ #
1882
+ # @example
1883
+ # require "google/cloud/bigquery"
1884
+ #
1885
+ # bigquery = Google::Cloud::Bigquery.new
1886
+ #
1887
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1888
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1889
+ # bt.rowkey_as_string = true
1890
+ # bt.add_family "user" do |u|
1891
+ # u.add_float "score"
1892
+ # end
1893
+ # end
1894
+ #
1895
+ def add_float qualifier, as: nil
1896
+ col = add_column qualifier, as: as, type: "FLOAT"
1897
+ yield col if block_given?
1898
+ col
1899
+ end
1900
+
1901
+ ##
1902
+ # Add a column to the column family to expose in the table schema
1903
+ # that is specified as the `BOOLEAN` type.
1904
+ #
1905
+ # @param [String] qualifier Qualifier of the column. See
1906
+ # {BigtableSource::Column#qualifier}.
1907
+ # @param [String] as A valid identifier to be used as the column
1908
+ # field name if the qualifier is not a valid BigQuery field
1909
+ # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1910
+ # {BigtableSource::Column#field_name}.
1911
+ #
1912
+ # @yield [column] a block for setting the column
1913
+ # @yieldparam [BigtableSource::Column] column the column object
1914
+ #
1915
+ # @return [Array<BigtableSource::Column>]
1916
+ #
1917
+ # @example
1918
+ # require "google/cloud/bigquery"
1919
+ #
1920
+ # bigquery = Google::Cloud::Bigquery.new
1921
+ #
1922
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1923
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1924
+ # bt.rowkey_as_string = true
1925
+ # bt.add_family "user" do |u|
1926
+ # u.add_boolean "active"
1927
+ # end
1928
+ # end
1929
+ #
1930
+ def add_boolean qualifier, as: nil
1931
+ col = add_column qualifier, as: as, type: "BOOLEAN"
1932
+ yield col if block_given?
1933
+ col
1934
+ end
1935
+
1936
+ ##
1937
+ # @private Google API Client object.
1938
+ def to_gapi
1939
+ @gapi.columns = @columns.map(&:to_gapi)
1940
+ @gapi
1941
+ end
1942
+
1943
+ ##
1944
+ # @private Google API Client object.
1945
+ def self.from_gapi gapi
1946
+ new_fam = new
1947
+ new_fam.instance_variable_set :@gapi, gapi
1948
+ columns = Array(gapi.columns).map do |col_gapi|
1949
+ BigtableSource::Column.from_gapi col_gapi
1950
+ end
1951
+ new_fam.instance_variable_set :@columns, columns
1952
+ new_fam
1953
+ end
1954
+
1955
+ ##
1956
+ # @private
1957
+ def freeze
1958
+ @columns.map(&:freeze!)
1959
+ @columns.freeze!
1960
+ super
1961
+ end
1962
+
1963
+ protected
1964
+
1965
+ def frozen_check!
1966
+ return unless frozen?
1967
+ fail ArgumentError,
1968
+ "Cannot modify external data source when frozen"
1969
+ end
1970
+ end
1971
+
1972
+ ##
1973
+ # # BigtableSource::Column
1974
+ #
1975
+ # A Bigtable column to expose in the table schema along with its
1976
+ # types.
1977
+ #
1978
+ # @example
1979
+ # require "google/cloud/bigquery"
1980
+ #
1981
+ # bigquery = Google::Cloud::Bigquery.new
1982
+ #
1983
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1984
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1985
+ # bt.rowkey_as_string = true
1986
+ # bt.add_family "user" do |u|
1987
+ # u.add_string "name"
1988
+ # u.add_string "email"
1989
+ # u.add_integer "age"
1990
+ # u.add_boolean "active"
1991
+ # end
1992
+ # end
1993
+ #
1994
+ # data = bigquery.query "SELECT * FROM my_ext_table",
1995
+ # external: { my_ext_table: bigtable_table }
1996
+ #
1997
+ # data.each do |row|
1998
+ # puts row[:name]
1999
+ # end
2000
+ #
2001
+ class Column
2002
+ ##
2003
+ # @private Create an empty BigtableSource::Column object.
2004
+ def initialize
2005
+ @gapi = Google::Apis::BigqueryV2::BigtableColumn.new
2006
+ end
2007
+
2008
+ ##
2009
+ # Qualifier of the column. Columns in the parent column family that
2010
+ # has this exact qualifier are exposed as `.` field. If the
2011
+ # qualifier is valid UTF-8 string, it will be represented as a UTF-8
2012
+ # string. Otherwise, it will represented as a ASCII-8BIT string.
2013
+ #
2014
+ # If the qualifier is not a valid BigQuery field identifier (does
2015
+ # not match `[a-zA-Z][a-zA-Z0-9_]*`) a valid identifier must be
2016
+ # provided as `field_name`.
2017
+ #
2018
+ # @return [String]
2019
+ #
2020
+ # @example
2021
+ # require "google/cloud/bigquery"
2022
+ #
2023
+ # bigquery = Google::Cloud::Bigquery.new
2024
+ #
2025
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2026
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2027
+ # bt.add_family "user" do |u|
2028
+ # u.add_string "name" do |col|
2029
+ # col.qualifier # "user"
2030
+ # col.qualifier = "User"
2031
+ # col.qualifier # "User"
2032
+ # end
2033
+ # end
2034
+ # end
2035
+ #
2036
+ def qualifier
2037
+ @gapi.qualifier_string || \
2038
+ Base64.strict_decode64(@gapi.qualifier_encoded.to_s)
2039
+ end
2040
+
2041
+ ##
2042
+ # Set the qualifier of the column. Columns in the parent column
2043
+ # family that has this exact qualifier are exposed as `.` field.
2044
+ # Values that are valid UTF-8 strings will be treated as such. All
2045
+ # other values will be treated as `BINARY`.
2046
+ #
2047
+ # @param [String] new_qualifier New qualifier value
2048
+ #
2049
+ # @example
2050
+ # require "google/cloud/bigquery"
2051
+ #
2052
+ # bigquery = Google::Cloud::Bigquery.new
2053
+ #
2054
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2055
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2056
+ # bt.add_family "user" do |u|
2057
+ # u.add_string "name" do |col|
2058
+ # col.qualifier # "user"
2059
+ # col.qualifier = "User"
2060
+ # col.qualifier # "User"
2061
+ # end
2062
+ # end
2063
+ # end
2064
+ #
2065
+ def qualifier= new_qualifier
2066
+ frozen_check!
2067
+ fail ArgumentError if new_qualifier.nil?
2068
+
2069
+ utf8_qualifier = new_qualifier.encode Encoding::UTF_8
2070
+ if utf8_qualifier.valid_encoding?
2071
+ @gapi.qualifier_string = utf8_qualifier
2072
+ if @gapi.instance_variables.include? :@qualifier_encoded
2073
+ @gapi.remove_instance_variable :@qualifier_encoded
2074
+ end
2075
+ else
2076
+ @gapi.qualifier_encoded = Base64.strict_encode64 new_qualifier
2077
+ if @gapi.instance_variables.include? :@qualifier_string
2078
+ @gapi.remove_instance_variable :@qualifier_string
2079
+ end
2080
+ end
2081
+ rescue EncodingError
2082
+ @gapi.qualifier_encoded = Base64.strict_encode64 new_qualifier
2083
+ if @gapi.instance_variables.include? :@qualifier_string
2084
+ @gapi.remove_instance_variable :@qualifier_string
2085
+ end
2086
+ end
2087
+
2088
+ ##
2089
+ # The encoding of the values when the type is not `STRING`.
2090
+ #
2091
+ # @return [String]
2092
+ #
2093
+ # @example
2094
+ # require "google/cloud/bigquery"
2095
+ #
2096
+ # bigquery = Google::Cloud::Bigquery.new
2097
+ #
2098
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2099
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2100
+ # bt.add_family "user" do |u|
2101
+ # u.add_bytes "name" do |col|
2102
+ # col.encoding = "TEXT"
2103
+ # col.encoding # "TEXT"
2104
+ # end
2105
+ # end
2106
+ # end
2107
+ #
2108
+ def encoding
2109
+ @gapi.encoding
2110
+ end
2111
+
2112
+ ##
2113
+ # Set the encoding of the values when the type is not `STRING`.
2114
+ # Acceptable encoding values are:
2115
+ #
2116
+ # * `TEXT` - indicates values are alphanumeric text strings.
2117
+ # * `BINARY` - indicates values are encoded using HBase
2118
+ # `Bytes.toBytes` family of functions. This can be overridden on a
2119
+ # column.
2120
+ #
2121
+ # @param [String] new_encoding New encoding value
2122
+ #
2123
+ # @example
2124
+ # require "google/cloud/bigquery"
2125
+ #
2126
+ # bigquery = Google::Cloud::Bigquery.new
2127
+ #
2128
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2129
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2130
+ # bt.add_family "user" do |u|
2131
+ # u.add_bytes "name" do |col|
2132
+ # col.encoding = "TEXT"
2133
+ # col.encoding # "TEXT"
2134
+ # end
2135
+ # end
2136
+ # end
2137
+ #
2138
+ def encoding= new_encoding
2139
+ frozen_check!
2140
+ @gapi.encoding = new_encoding
2141
+ end
2142
+
2143
+ ##
2144
+ # If the qualifier is not a valid BigQuery field identifier (does
2145
+ # not match `[a-zA-Z][a-zA-Z0-9_]*`) a valid identifier must be
2146
+ # provided as the column field name and is used as field name in
2147
+ # queries.
2148
+ #
2149
+ # @return [String]
2150
+ #
2151
+ # @example
2152
+ # require "google/cloud/bigquery"
2153
+ #
2154
+ # bigquery = Google::Cloud::Bigquery.new
2155
+ #
2156
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2157
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2158
+ # bt.add_family "user" do |u|
2159
+ # u.add_string "001_name", as: "user" do |col|
2160
+ # col.field_name # "user"
2161
+ # col.field_name = "User"
2162
+ # col.field_name # "User"
2163
+ # end
2164
+ # end
2165
+ # end
2166
+ #
2167
+ def field_name
2168
+ @gapi.field_name
2169
+ end
2170
+
2171
+ ##
2172
+ # Sets the identifier to be used as the column field name in queries
2173
+ # when the qualifier is not a valid BigQuery field identifier (does
2174
+ # not match `[a-zA-Z][a-zA-Z0-9_]*`).
2175
+ #
2176
+ # @param [String] new_field_name New field_name value
2177
+ #
2178
+ # @example
2179
+ # require "google/cloud/bigquery"
2180
+ #
2181
+ # bigquery = Google::Cloud::Bigquery.new
2182
+ #
2183
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2184
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2185
+ # bt.add_family "user" do |u|
2186
+ # u.add_string "001_name", as: "user" do |col|
2187
+ # col.field_name # "user"
2188
+ # col.field_name = "User"
2189
+ # col.field_name # "User"
2190
+ # end
2191
+ # end
2192
+ # end
2193
+ #
2194
+ def field_name= new_field_name
2195
+ frozen_check!
2196
+ @gapi.field_name = new_field_name
2197
+ end
2198
+
2199
+ ##
2200
+ # Whether only the latest version of value in this column are
2201
+ # exposed. Can also be set at the column family level. However, this
2202
+ # value takes precedence when set at both levels.
2203
+ #
2204
+ # @return [Boolean]
2205
+ #
2206
+ # @example
2207
+ # require "google/cloud/bigquery"
2208
+ #
2209
+ # bigquery = Google::Cloud::Bigquery.new
2210
+ #
2211
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2212
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2213
+ # bt.add_family "user" do |u|
2214
+ # u.add_string "name" do |col|
2215
+ # col.latest = true
2216
+ # col.latest # true
2217
+ # end
2218
+ # end
2219
+ # end
2220
+ #
2221
+ def latest
2222
+ @gapi.only_read_latest
2223
+ end
2224
+
2225
+ ##
2226
+ # Set whether only the latest version of value in this column are
2227
+ # exposed. Can also be set at the column family level. However, this
2228
+ # value takes precedence when set at both levels.
2229
+ #
2230
+ # @param [Boolean] new_latest New latest value
2231
+ #
2232
+ # @example
2233
+ # require "google/cloud/bigquery"
2234
+ #
2235
+ # bigquery = Google::Cloud::Bigquery.new
2236
+ #
2237
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2238
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2239
+ # bt.add_family "user" do |u|
2240
+ # u.add_string "name" do |col|
2241
+ # col.latest = true
2242
+ # col.latest # true
2243
+ # end
2244
+ # end
2245
+ # end
2246
+ #
2247
+ def latest= new_latest
2248
+ frozen_check!
2249
+ @gapi.only_read_latest = new_latest
2250
+ end
2251
+
2252
+ ##
2253
+ # The type to convert the value in cells of this column. The values
2254
+ # are expected to be encoded using HBase `Bytes.toBytes` function
2255
+ # when using the `BINARY` encoding value. The following BigQuery
2256
+ # types are allowed:
2257
+ #
2258
+ # * `BYTES`
2259
+ # * `STRING`
2260
+ # * `INTEGER`
2261
+ # * `FLOAT`
2262
+ # * `BOOLEAN`
2263
+ #
2264
+ # Default type is `BYTES`. Can also be set at the column family
2265
+ # level. However, this value takes precedence when set at both
2266
+ # levels.
2267
+ #
2268
+ # @return [String]
2269
+ #
2270
+ # @example
2271
+ # require "google/cloud/bigquery"
2272
+ #
2273
+ # bigquery = Google::Cloud::Bigquery.new
2274
+ #
2275
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2276
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2277
+ # bt.add_family "user" do |u|
2278
+ # u.add_string "name" do |col|
2279
+ # col.type # "STRING"
2280
+ # end
2281
+ # end
2282
+ # end
2283
+ #
2284
+ def type
2285
+ @gapi.type
2286
+ end
2287
+
2288
+ ##
2289
+ # Set the type to convert the value in cells of this column. The
2290
+ # values are expected to be encoded using HBase `Bytes.toBytes`
2291
+ # function when using the `BINARY` encoding value. The following
2292
+ # BigQuery types are allowed:
2293
+ #
2294
+ # * `BYTES`
2295
+ # * `STRING`
2296
+ # * `INTEGER`
2297
+ # * `FLOAT`
2298
+ # * `BOOLEAN`
2299
+ #
2300
+ # Default type is `BYTES`. Can also be set at the column family
2301
+ # level. However, this value takes precedence when set at both
2302
+ # levels.
2303
+ #
2304
+ # @param [String] new_type New type value
2305
+ #
2306
+ # @example
2307
+ # require "google/cloud/bigquery"
2308
+ #
2309
+ # bigquery = Google::Cloud::Bigquery.new
2310
+ #
2311
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2312
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2313
+ # bt.add_family "user" do |u|
2314
+ # u.add_string "name" do |col|
2315
+ # col.type # "STRING"
2316
+ # col.type = "BYTES"
2317
+ # col.type # "BYTES"
2318
+ # end
2319
+ # end
2320
+ # end
2321
+ #
2322
+ def type= new_type
2323
+ frozen_check!
2324
+ @gapi.type = new_type
2325
+ end
2326
+
2327
+ ##
2328
+ # @private Google API Client object.
2329
+ def to_gapi
2330
+ @gapi
2331
+ end
2332
+
2333
+ ##
2334
+ # @private Google API Client object.
2335
+ def self.from_gapi gapi
2336
+ new_col = new
2337
+ new_col.instance_variable_set :@gapi, gapi
2338
+ new_col
2339
+ end
2340
+
2341
+ protected
2342
+
2343
+ def frozen_check!
2344
+ return unless frozen?
2345
+ fail ArgumentError,
2346
+ "Cannot modify external data source when frozen"
2347
+ end
2348
+ end
2349
+ end
2350
+ end
2351
+ end
2352
+ end
2353
+ end