google-cloud-bigquery 0.28.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,9 +71,9 @@ module Google
71
71
  def next
72
72
  return nil unless next?
73
73
  ensure_service!
74
- options = { all: @hidden, token: token, max: @max }
74
+ options = { all: @hidden, filter: @filter, token: token, max: @max }
75
75
  gapi = @service.list_datasets options
76
- self.class.from_gapi gapi, @service, @hidden, @max
76
+ self.class.from_gapi gapi, @service, @hidden, @filter, @max
77
77
  end
78
78
 
79
79
  ##
@@ -140,7 +140,8 @@ module Google
140
140
 
141
141
  ##
142
142
  # @private New Dataset::List from a response object.
143
- def self.from_gapi gapi_list, service, hidden = nil, max = nil
143
+ def self.from_gapi gapi_list, service, hidden = nil, filter = nil,
144
+ max = nil
144
145
  datasets = List.new(Array(gapi_list.datasets).map do |gapi_object|
145
146
  Dataset.from_gapi gapi_object, service
146
147
  end)
@@ -148,6 +149,7 @@ module Google
148
149
  datasets.instance_variable_set :@etag, gapi_list.etag
149
150
  datasets.instance_variable_set :@service, service
150
151
  datasets.instance_variable_set :@hidden, hidden
152
+ datasets.instance_variable_set :@filter, filter
151
153
  datasets.instance_variable_set :@max, max
152
154
  datasets
153
155
  end
@@ -0,0 +1,2353 @@
1
+ # Copyright 2017 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/apis/bigquery_v2"
17
+ require "base64"
18
+
19
+ module Google
20
+ module Cloud
21
+ module Bigquery
22
+ ##
23
+ # # External
24
+ #
25
+ # Creates a new {External::DataSource} (or subclass) object that
26
+ # represents the external data source that can be queried from directly,
27
+ # even though the data is not stored in BigQuery. Instead of loading or
28
+ # streaming the data, this object references the external data source.
29
+ #
30
+ # See {External::DataSource}, {External::CsvSource},
31
+ # {External::JsonSource}, {External::SheetsSource},
32
+ # {External::BigtableSource}
33
+ #
34
+ # @example
35
+ # require "google/cloud/bigquery"
36
+ #
37
+ # bigquery = Google::Cloud::Bigquery.new
38
+ #
39
+ # csv_url = "gs://bucket/path/to/data.csv"
40
+ # csv_table = bigquery.external csv_url do |csv|
41
+ # csv.autodetect = true
42
+ # csv.skip_leading_rows = 1
43
+ # end
44
+ #
45
+ # data = bigquery.query "SELECT * FROM my_ext_table",
46
+ # external: { my_ext_table: csv_table }
47
+ #
48
+ # data.each do |row|
49
+ # puts row[:name]
50
+ # end
51
+ #
52
+ module External
53
+ ##
54
+ # @private New External from URLs and format
55
+ def self.from_urls urls, format = nil
56
+ external_format = source_format_for urls, format
57
+ if external_format.nil?
58
+ fail ArgumentError, "Unable to determine external table format"
59
+ end
60
+ external_class = table_class_for external_format
61
+ external_class.new.tap do |e|
62
+ e.gapi.source_uris = Array(urls)
63
+ e.gapi.source_format = external_format
64
+ end
65
+ end
66
+
67
+ ##
68
+ # @private Google API Client object.
69
+ def self.from_gapi gapi
70
+ external_format = source_format_for gapi.source_uris,
71
+ gapi.source_format
72
+ if external_format.nil?
73
+ fail ArgumentError, "Unable to determine external table format"
74
+ end
75
+ external_class = table_class_for external_format
76
+ external_class.from_gapi gapi
77
+ end
78
+
79
+ ##
80
+ # @private Determine source_format from inputs
81
+ def self.source_format_for urls, format
82
+ val = { "csv" => "CSV",
83
+ "json" => "NEWLINE_DELIMITED_JSON",
84
+ "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
85
+ "sheets" => "GOOGLE_SHEETS",
86
+ "google_sheets" => "GOOGLE_SHEETS",
87
+ "avro" => "AVRO",
88
+ "datastore" => "DATASTORE_BACKUP",
89
+ "backup" => "DATASTORE_BACKUP",
90
+ "datastore_backup" => "DATASTORE_BACKUP",
91
+ "bigtable" => "BIGTABLE"
92
+ }[format.to_s.downcase]
93
+ return val unless val.nil?
94
+ Array(urls).each do |url|
95
+ return "CSV" if url.end_with? ".csv"
96
+ return "NEWLINE_DELIMITED_JSON" if url.end_with? ".json"
97
+ return "AVRO" if url.end_with? ".avro"
98
+ return "DATASTORE_BACKUP" if url.end_with? ".backup_info"
99
+ if url.start_with? "https://docs.google.com/spreadsheets/"
100
+ return "GOOGLE_SHEETS"
101
+ end
102
+ if url.start_with? "https://googleapis.com/bigtable/projects/"
103
+ return "BIGTABLE"
104
+ end
105
+ end
106
+ nil
107
+ end
108
+
109
+ ##
110
+ # @private Determine table class from source_format
111
+ def self.table_class_for format
112
+ case format
113
+ when "CSV" then External::CsvSource
114
+ when "NEWLINE_DELIMITED_JSON" then External::JsonSource
115
+ when "GOOGLE_SHEETS" then External::SheetsSource
116
+ when "BIGTABLE" then External::BigtableSource
117
+ else
118
+ # AVRO and DATASTORE_BACKUP
119
+ External::DataSource
120
+ end
121
+ end
122
+
123
+ ##
124
+ # # DataSource
125
+ #
126
+ # External::DataSource and its subclasses represents an external data
127
+ # source that can be queried from directly, even though the data is not
128
+ # stored in BigQuery. Instead of loading or streaming the data, this
129
+ # object references the external data source.
130
+ #
131
+ # The AVRO and Datastore Backup formats use {External::DataSource}. See
132
+ # {External::CsvSource}, {External::JsonSource},
133
+ # {External::SheetsSource}, {External::BigtableSource} for the other
134
+ # formats.
135
+ #
136
+ # @example
137
+ # require "google/cloud/bigquery"
138
+ #
139
+ # bigquery = Google::Cloud::Bigquery.new
140
+ #
141
+ # avro_url = "gs://bucket/path/to/data.avro"
142
+ # avro_table = bigquery.external avro_url do |avro|
143
+ # avro.autodetect = true
144
+ # end
145
+ #
146
+ # data = bigquery.query "SELECT * FROM my_ext_table",
147
+ # external: { my_ext_table: avro_table }
148
+ #
149
+ # data.each do |row|
150
+ # puts row[:name]
151
+ # end
152
+ #
153
+ class DataSource
154
+ ##
155
+ # @private The Google API Client object.
156
+ attr_accessor :gapi
157
+
158
+ ##
159
+ # @private Create an empty Table object.
160
+ def initialize
161
+ @gapi = Google::Apis::BigqueryV2::ExternalDataConfiguration.new
162
+ end
163
+
164
+ ##
165
+ # The data format. For CSV files, specify "CSV". For Google sheets,
166
+ # specify "GOOGLE_SHEETS". For newline-delimited JSON, specify
167
+ # "NEWLINE_DELIMITED_JSON". For Avro files, specify "AVRO". For Google
168
+ # Cloud Datastore backups, specify "DATASTORE_BACKUP". [Beta] For
169
+ # Google Cloud Bigtable, specify "BIGTABLE".
170
+ #
171
+ # @return [String]
172
+ #
173
+ # @example
174
+ # require "google/cloud/bigquery"
175
+ #
176
+ # bigquery = Google::Cloud::Bigquery.new
177
+ #
178
+ # csv_url = "gs://bucket/path/to/data.csv"
179
+ # csv_table = bigquery.external csv_url
180
+ #
181
+ # csv_table.format #=> "CSV"
182
+ #
183
+ def format
184
+ @gapi.source_format
185
+ end
186
+
187
+ ##
188
+ # Whether the data format is "CSV".
189
+ #
190
+ # @return [Boolean]
191
+ #
192
+ # @example
193
+ # require "google/cloud/bigquery"
194
+ #
195
+ # bigquery = Google::Cloud::Bigquery.new
196
+ #
197
+ # csv_url = "gs://bucket/path/to/data.csv"
198
+ # csv_table = bigquery.external csv_url
199
+ #
200
+ # csv_table.format #=> "CSV"
201
+ # csv_table.csv? #=> true
202
+ #
203
+ def csv?
204
+ @gapi.source_format == "CSV"
205
+ end
206
+
207
+ ##
208
+ # Whether the data format is "NEWLINE_DELIMITED_JSON".
209
+ #
210
+ # @return [Boolean]
211
+ #
212
+ # @example
213
+ # require "google/cloud/bigquery"
214
+ #
215
+ # bigquery = Google::Cloud::Bigquery.new
216
+ #
217
+ # json_url = "gs://bucket/path/to/data.json"
218
+ # json_table = bigquery.external json_url
219
+ #
220
+ # json_table.format #=> "NEWLINE_DELIMITED_JSON"
221
+ # json_table.json? #=> true
222
+ #
223
+ def json?
224
+ @gapi.source_format == "NEWLINE_DELIMITED_JSON"
225
+ end
226
+
227
+ ##
228
+ # Whether the data format is "GOOGLE_SHEETS".
229
+ #
230
+ # @return [Boolean]
231
+ #
232
+ # @example
233
+ # require "google/cloud/bigquery"
234
+ #
235
+ # bigquery = Google::Cloud::Bigquery.new
236
+ #
237
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
238
+ # sheets_table = bigquery.external sheets_url
239
+ #
240
+ # sheets_table.format #=> "GOOGLE_SHEETS"
241
+ # sheets_table.sheets? #=> true
242
+ #
243
+ def sheets?
244
+ @gapi.source_format == "GOOGLE_SHEETS"
245
+ end
246
+
247
+ ##
248
+ # Whether the data format is "AVRO".
249
+ #
250
+ # @return [Boolean]
251
+ #
252
+ # @example
253
+ # require "google/cloud/bigquery"
254
+ #
255
+ # bigquery = Google::Cloud::Bigquery.new
256
+ #
257
+ # avro_url = "gs://bucket/path/to/data.avro"
258
+ # avro_table = bigquery.external avro_url
259
+ #
260
+ # avro_table.format #=> "AVRO"
261
+ # avro_table.avro? #=> true
262
+ #
263
+ def avro?
264
+ @gapi.source_format == "AVRO"
265
+ end
266
+
267
+ ##
268
+ # Whether the data format is "DATASTORE_BACKUP".
269
+ #
270
+ # @return [Boolean]
271
+ #
272
+ # @example
273
+ # require "google/cloud/bigquery"
274
+ #
275
+ # bigquery = Google::Cloud::Bigquery.new
276
+ #
277
+ # backup_url = "gs://bucket/path/to/data.backup_info"
278
+ # backup_table = bigquery.external backup_url
279
+ #
280
+ # backup_table.format #=> "DATASTORE_BACKUP"
281
+ # backup_table.backup? #=> true
282
+ #
283
+ def backup?
284
+ @gapi.source_format == "DATASTORE_BACKUP"
285
+ end
286
+
287
+ ##
288
+ # Whether the data format is "BIGTABLE".
289
+ #
290
+ # @return [Boolean]
291
+ #
292
+ # @example
293
+ # require "google/cloud/bigquery"
294
+ #
295
+ # bigquery = Google::Cloud::Bigquery.new
296
+ #
297
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
298
+ # bigtable_table = bigquery.external bigtable_url
299
+ #
300
+ # bigtable_table.format #=> "BIGTABLE"
301
+ # bigtable_table.bigtable? #=> true
302
+ #
303
+ def bigtable?
304
+ @gapi.source_format == "BIGTABLE"
305
+ end
306
+
307
+ ##
308
+ # The fully-qualified URIs that point to your data in Google Cloud.
309
+ # For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
310
+ # character and it must come after the 'bucket' name. Size limits
311
+ # related to load jobs apply to external data sources. For Google
312
+ # Cloud Bigtable URIs: Exactly one URI can be specified and it has be
313
+ # a fully specified and valid HTTPS URL for a Google Cloud Bigtable
314
+ # table. For Google Cloud Datastore backups, exactly one URI can be
315
+ # specified, and it must end with '.backup_info'. Also, the '*'
316
+ # wildcard character is not allowed.
317
+ #
318
+ # @return [Array<String>]
319
+ #
320
+ # @example
321
+ # require "google/cloud/bigquery"
322
+ #
323
+ # bigquery = Google::Cloud::Bigquery.new
324
+ #
325
+ # csv_url = "gs://bucket/path/to/data.csv"
326
+ # csv_table = bigquery.external csv_url
327
+ #
328
+ # csv_table.urls #=> ["gs://bucket/path/to/data.csv"]
329
+ #
330
+ def urls
331
+ @gapi.source_uris
332
+ end
333
+
334
+ ##
335
+ # Indicates if the schema and format options are detected
336
+ # automatically.
337
+ #
338
+ # @return [Boolean]
339
+ #
340
+ # @example
341
+ # require "google/cloud/bigquery"
342
+ #
343
+ # bigquery = Google::Cloud::Bigquery.new
344
+ #
345
+ # csv_url = "gs://bucket/path/to/data.csv"
346
+ # csv_table = bigquery.external csv_url do |csv|
347
+ # csv.autodetect = true
348
+ # end
349
+ #
350
+ # csv_table.autodetect #=> true
351
+ #
352
+ def autodetect
353
+ @gapi.autodetect
354
+ end
355
+
356
+ ##
357
+ # Set whether to detect schema and format options automatically. Any
358
+ # option specified explicitly will be honored.
359
+ #
360
+ # @param [Boolean] new_autodetect New autodetect value
361
+ #
362
+ # @example
363
+ # require "google/cloud/bigquery"
364
+ #
365
+ # bigquery = Google::Cloud::Bigquery.new
366
+ #
367
+ # csv_url = "gs://bucket/path/to/data.csv"
368
+ # csv_table = bigquery.external csv_url do |csv|
369
+ # csv.autodetect = true
370
+ # end
371
+ #
372
+ # csv_table.autodetect #=> true
373
+ #
374
+ def autodetect= new_autodetect
375
+ frozen_check!
376
+ @gapi.autodetect = new_autodetect
377
+ end
378
+
379
+ ##
380
+ # The compression type of the data source. Possible values include
381
+ # `"GZIP"` and `nil`. The default value is `nil`. This setting is
382
+ # ignored for Google Cloud Bigtable, Google Cloud Datastore backups
383
+ # and Avro formats. Optional.
384
+ #
385
+ # @return [String]
386
+ #
387
+ # @example
388
+ # require "google/cloud/bigquery"
389
+ #
390
+ # bigquery = Google::Cloud::Bigquery.new
391
+ #
392
+ # csv_url = "gs://bucket/path/to/data.csv"
393
+ # csv_table = bigquery.external csv_url do |csv|
394
+ # csv.compression = "GZIP"
395
+ # end
396
+ #
397
+ # csv_table.compression #=> "GZIP"
398
+ def compression
399
+ @gapi.compression
400
+ end
401
+
402
+ ##
403
+ # Set the compression type of the data source. Possible values include
404
+ # `"GZIP"` and `nil`. The default value is `nil`. This setting is
405
+ # ignored for Google Cloud Bigtable, Google Cloud Datastore backups
406
+ # and Avro formats. Optional.
407
+ #
408
+ # @param [String] new_compression New compression value
409
+ #
410
+ # @example
411
+ # require "google/cloud/bigquery"
412
+ #
413
+ # bigquery = Google::Cloud::Bigquery.new
414
+ #
415
+ # csv_url = "gs://bucket/path/to/data.csv"
416
+ # csv_table = bigquery.external csv_url do |csv|
417
+ # csv.compression = "GZIP"
418
+ # end
419
+ #
420
+ # csv_table.compression #=> "GZIP"
421
+ #
422
+ def compression= new_compression
423
+ frozen_check!
424
+ @gapi.compression = new_compression
425
+ end
426
+
427
+ ##
428
+ # Indicates if BigQuery should allow extra values that are not
429
+ # represented in the table schema. If `true`, the extra values are
430
+ # ignored. If `false`, records with extra columns are treated as bad
431
+ # records, and if there are too many bad records, an invalid error is
432
+ # returned in the job result. The default value is `false`.
433
+ #
434
+ # BigQuery treats trailing columns as an extra in `CSV`, named values
435
+ # that don't match any column names in `JSON`. This setting is ignored
436
+ # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
437
+ # formats. Optional.
438
+ #
439
+ # @return [Boolean]
440
+ #
441
+ # @example
442
+ # require "google/cloud/bigquery"
443
+ #
444
+ # bigquery = Google::Cloud::Bigquery.new
445
+ #
446
+ # csv_url = "gs://bucket/path/to/data.csv"
447
+ # csv_table = bigquery.external csv_url do |csv|
448
+ # csv.ignore_unknown = true
449
+ # end
450
+ #
451
+ # csv_table.ignore_unknown #=> true
452
+ #
453
+ def ignore_unknown
454
+ @gapi.ignore_unknown_values
455
+ end
456
+
457
+ ##
458
+ # Set whether BigQuery should allow extra values that are not
459
+ # represented in the table schema. If `true`, the extra values are
460
+ # ignored. If `false`, records with extra columns are treated as bad
461
+ # records, and if there are too many bad records, an invalid error is
462
+ # returned in the job result. The default value is `false`.
463
+ #
464
+ # BigQuery treats trailing columns as an extra in `CSV`, named values
465
+ # that don't match any column names in `JSON`. This setting is ignored
466
+ # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
467
+ # formats. Optional.
468
+ #
469
+ # @param [Boolean] new_ignore_unknown New ignore_unknown value
470
+ #
471
+ # @example
472
+ # require "google/cloud/bigquery"
473
+ #
474
+ # bigquery = Google::Cloud::Bigquery.new
475
+ #
476
+ # csv_url = "gs://bucket/path/to/data.csv"
477
+ # csv_table = bigquery.external csv_url do |csv|
478
+ # csv.ignore_unknown = true
479
+ # end
480
+ #
481
+ # csv_table.ignore_unknown #=> true
482
+ #
483
+ def ignore_unknown= new_ignore_unknown
484
+ frozen_check!
485
+ @gapi.ignore_unknown_values = new_ignore_unknown
486
+ end
487
+
488
+ ##
489
+ # The maximum number of bad records that BigQuery can ignore when
490
+ # reading data. If the number of bad records exceeds this value, an
491
+ # invalid error is returned in the job result. The default value is 0,
492
+ # which requires that all records are valid. This setting is ignored
493
+ # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
494
+ # formats.
495
+ #
496
+ # @return [Integer]
497
+ #
498
+ # @example
499
+ # require "google/cloud/bigquery"
500
+ #
501
+ # bigquery = Google::Cloud::Bigquery.new
502
+ #
503
+ # csv_url = "gs://bucket/path/to/data.csv"
504
+ # csv_table = bigquery.external csv_url do |csv|
505
+ # csv.max_bad_records = 10
506
+ # end
507
+ #
508
+ # csv_table.max_bad_records #=> 10
509
+ #
510
+ def max_bad_records
511
+ @gapi.max_bad_records
512
+ end
513
+
514
+ ##
515
+ # Set the maximum number of bad records that BigQuery can ignore when
516
+ # reading data. If the number of bad records exceeds this value, an
517
+ # invalid error is returned in the job result. The default value is 0,
518
+ # which requires that all records are valid. This setting is ignored
519
+ # for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
520
+ # formats.
521
+ #
522
+ # @param [Integer] new_max_bad_records New max_bad_records value
523
+ #
524
+ # @example
525
+ # require "google/cloud/bigquery"
526
+ #
527
+ # bigquery = Google::Cloud::Bigquery.new
528
+ #
529
+ # csv_url = "gs://bucket/path/to/data.csv"
530
+ # csv_table = bigquery.external csv_url do |csv|
531
+ # csv.max_bad_records = 10
532
+ # end
533
+ #
534
+ # csv_table.max_bad_records #=> 10
535
+ #
536
+ def max_bad_records= new_max_bad_records
537
+ frozen_check!
538
+ @gapi.max_bad_records = new_max_bad_records
539
+ end
540
+
541
+ ##
542
+ # @private Google API Client object.
543
+ def to_gapi
544
+ @gapi
545
+ end
546
+
547
+ ##
548
+ # @private Google API Client object.
549
+ def self.from_gapi gapi
550
+ new_table = new
551
+ new_table.instance_variable_set :@gapi, gapi
552
+ new_table
553
+ end
554
+
555
+ protected
556
+
557
+ def frozen_check!
558
+ return unless frozen?
559
+ fail ArgumentError, "Cannot modify external data source when frozen"
560
+ end
561
+ end
562
+
563
+ ##
564
+ # # CsvSource
565
+ #
566
+ # {External::CsvSource} is a subclass of {External::DataSource} and
567
+ # represents a CSV external data source that can be queried from
568
+ # directly, such as Google Cloud Storage or Google Drive, even though
569
+ # the data is not stored in BigQuery. Instead of loading or streaming
570
+ # the data, this object references the external data source.
571
+ #
572
+ # @example
573
+ # require "google/cloud/bigquery"
574
+ #
575
+ # bigquery = Google::Cloud::Bigquery.new
576
+ #
577
+ # csv_url = "gs://bucket/path/to/data.csv"
578
+ # csv_table = bigquery.external csv_url do |csv|
579
+ # csv.autodetect = true
580
+ # csv.skip_leading_rows = 1
581
+ # end
582
+ #
583
+ # data = bigquery.query "SELECT * FROM my_ext_table",
584
+ # external: { my_ext_table: csv_table }
585
+ #
586
+ # data.each do |row|
587
+ # puts row[:name]
588
+ # end
589
+ #
590
+ class CsvSource < External::DataSource
591
+ ##
592
+ # @private Create an empty CsvSource object.
593
+ def initialize
594
+ super
595
+ @gapi.csv_options = Google::Apis::BigqueryV2::CsvOptions.new
596
+ end
597
+
598
+ ##
599
+ # Indicates if BigQuery should accept rows that are missing trailing
600
+ # optional columns.
601
+ #
602
+ # @return [Boolean]
603
+ #
604
+ # @example
605
+ # require "google/cloud/bigquery"
606
+ #
607
+ # bigquery = Google::Cloud::Bigquery.new
608
+ #
609
+ # csv_url = "gs://bucket/path/to/data.csv"
610
+ # csv_table = bigquery.external csv_url do |csv|
611
+ # csv.jagged_rows = true
612
+ # end
613
+ #
614
+ # csv_table.jagged_rows #=> true
615
+ #
616
+ def jagged_rows
617
+ @gapi.csv_options.allow_jagged_rows
618
+ end
619
+
620
+ ##
621
+ # Set whether BigQuery should accept rows that are missing trailing
622
+ # optional columns.
623
+ #
624
+ # @param [Boolean] new_jagged_rows New jagged_rows value
625
+ #
626
+ # @example
627
+ # require "google/cloud/bigquery"
628
+ #
629
+ # bigquery = Google::Cloud::Bigquery.new
630
+ #
631
+ # csv_url = "gs://bucket/path/to/data.csv"
632
+ # csv_table = bigquery.external csv_url do |csv|
633
+ # csv.jagged_rows = true
634
+ # end
635
+ #
636
+ # csv_table.jagged_rows #=> true
637
+ #
638
+ def jagged_rows= new_jagged_rows
639
+ frozen_check!
640
+ @gapi.csv_options.allow_jagged_rows = new_jagged_rows
641
+ end
642
+
643
+ ##
644
+ # Indicates if BigQuery should allow quoted data sections that contain
645
+ # newline characters in a CSV file.
646
+ #
647
+ # @return [Boolean]
648
+ #
649
+ # @example
650
+ # require "google/cloud/bigquery"
651
+ #
652
+ # bigquery = Google::Cloud::Bigquery.new
653
+ #
654
+ # csv_url = "gs://bucket/path/to/data.csv"
655
+ # csv_table = bigquery.external csv_url do |csv|
656
+ # csv.quoted_newlines = true
657
+ # end
658
+ #
659
+ # csv_table.quoted_newlines #=> true
660
+ #
661
+ def quoted_newlines
662
+ @gapi.csv_options.allow_quoted_newlines
663
+ end
664
+
665
+ ##
666
+ # Set whether BigQuery should allow quoted data sections that contain
667
+ # newline characters in a CSV file.
668
+ #
669
+ # @param [Boolean] new_quoted_newlines New quoted_newlines value
670
+ #
671
+ # @example
672
+ # require "google/cloud/bigquery"
673
+ #
674
+ # bigquery = Google::Cloud::Bigquery.new
675
+ #
676
+ # csv_url = "gs://bucket/path/to/data.csv"
677
+ # csv_table = bigquery.external csv_url do |csv|
678
+ # csv.quoted_newlines = true
679
+ # end
680
+ #
681
+ # csv_table.quoted_newlines #=> true
682
+ #
683
+ def quoted_newlines= new_quoted_newlines
684
+ frozen_check!
685
+ @gapi.csv_options.allow_quoted_newlines = new_quoted_newlines
686
+ end
687
+
688
+ ##
689
+ # The character encoding of the data.
690
+ #
691
+ # @return [String]
692
+ #
693
+ # @example
694
+ # require "google/cloud/bigquery"
695
+ #
696
+ # bigquery = Google::Cloud::Bigquery.new
697
+ #
698
+ # csv_url = "gs://bucket/path/to/data.csv"
699
+ # csv_table = bigquery.external csv_url do |csv|
700
+ # csv.encoding = "UTF-8"
701
+ # end
702
+ #
703
+ # csv_table.encoding #=> "UTF-8"
704
+ #
705
+ def encoding
706
+ @gapi.csv_options.encoding
707
+ end
708
+
709
+ ##
710
+ # Set the character encoding of the data.
711
+ #
712
+ # @param [String] new_encoding New encoding value
713
+ #
714
+ # @example
715
+ # require "google/cloud/bigquery"
716
+ #
717
+ # bigquery = Google::Cloud::Bigquery.new
718
+ #
719
+ # csv_url = "gs://bucket/path/to/data.csv"
720
+ # csv_table = bigquery.external csv_url do |csv|
721
+ # csv.encoding = "UTF-8"
722
+ # end
723
+ #
724
+ # csv_table.encoding #=> "UTF-8"
725
+ #
726
+ def encoding= new_encoding
727
+ frozen_check!
728
+ @gapi.csv_options.encoding = new_encoding
729
+ end
730
+
731
+ ##
732
+ # Checks if the character encoding of the data is "UTF-8". This is the
733
+ # default.
734
+ #
735
+ # @return [Boolean]
736
+ #
737
+ # @example
738
+ # require "google/cloud/bigquery"
739
+ #
740
+ # bigquery = Google::Cloud::Bigquery.new
741
+ #
742
+ # csv_url = "gs://bucket/path/to/data.csv"
743
+ # csv_table = bigquery.external csv_url do |csv|
744
+ # csv.encoding = "UTF-8"
745
+ # end
746
+ #
747
+ # csv_table.encoding #=> "UTF-8"
748
+ # csv_table.utf8? #=> true
749
+ #
750
+ def utf8?
751
+ return true if encoding.nil?
752
+ encoding == "UTF-8"
753
+ end
754
+
755
+ ##
756
+ # Checks if the character encoding of the data is "ISO-8859-1".
757
+ #
758
+ # @return [Boolean]
759
+ #
760
+ # @example
761
+ # require "google/cloud/bigquery"
762
+ #
763
+ # bigquery = Google::Cloud::Bigquery.new
764
+ #
765
+ # csv_url = "gs://bucket/path/to/data.csv"
766
+ # csv_table = bigquery.external csv_url do |csv|
767
+ # csv.encoding = "ISO-8859-1"
768
+ # end
769
+ #
770
+ # csv_table.encoding #=> "ISO-8859-1"
771
+ # csv_table.iso8859_1? #=> true
772
+ #
773
+ def iso8859_1?
774
+ encoding == "ISO-8859-1"
775
+ end
776
+
777
+ ##
778
+ # The separator for fields in a CSV file.
779
+ #
780
+ # @return [String]
781
+ #
782
+ # @example
783
+ # require "google/cloud/bigquery"
784
+ #
785
+ # bigquery = Google::Cloud::Bigquery.new
786
+ #
787
+ # csv_url = "gs://bucket/path/to/data.csv"
788
+ # csv_table = bigquery.external csv_url do |csv|
789
+ # csv.delimiter = "|"
790
+ # end
791
+ #
792
+ # csv_table.delimiter #=> "|"
793
+ #
794
+ def delimiter
795
+ @gapi.csv_options.field_delimiter
796
+ end
797
+
798
+ ##
799
+ # Set the separator for fields in a CSV file.
800
+ #
801
+ # @param [String] new_delimiter New delimiter value
802
+ #
803
+ # @example
804
+ # require "google/cloud/bigquery"
805
+ #
806
+ # bigquery = Google::Cloud::Bigquery.new
807
+ #
808
+ # csv_url = "gs://bucket/path/to/data.csv"
809
+ # csv_table = bigquery.external csv_url do |csv|
810
+ # csv.delimiter = "|"
811
+ # end
812
+ #
813
+ # csv_table.delimiter #=> "|"
814
+ #
815
+ def delimiter= new_delimiter
816
+ frozen_check!
817
+ @gapi.csv_options.field_delimiter = new_delimiter
818
+ end
819
+
820
+ ##
821
+ # The value that is used to quote data sections in a CSV file.
822
+ #
823
+ # @return [String]
824
+ #
825
+ # @example
826
+ # require "google/cloud/bigquery"
827
+ #
828
+ # bigquery = Google::Cloud::Bigquery.new
829
+ #
830
+ # csv_url = "gs://bucket/path/to/data.csv"
831
+ # csv_table = bigquery.external csv_url do |csv|
832
+ # csv.quote = "'"
833
+ # end
834
+ #
835
+ # csv_table.quote #=> "'"
836
+ #
837
+ def quote
838
+ @gapi.csv_options.quote
839
+ end
840
+
841
+ ##
842
+ # Set the value that is used to quote data sections in a CSV file.
843
+ #
844
+ # @param [String] new_quote New quote value
845
+ #
846
+ # @example
847
+ # require "google/cloud/bigquery"
848
+ #
849
+ # bigquery = Google::Cloud::Bigquery.new
850
+ #
851
+ # csv_url = "gs://bucket/path/to/data.csv"
852
+ # csv_table = bigquery.external csv_url do |csv|
853
+ # csv.quote = "'"
854
+ # end
855
+ #
856
+ # csv_table.quote #=> "'"
857
+ #
858
+ def quote= new_quote
859
+ frozen_check!
860
+ @gapi.csv_options.quote = new_quote
861
+ end
862
+
863
+ ##
864
+ # The number of rows at the top of a CSV file that BigQuery will skip
865
+ # when reading the data.
866
+ #
867
+ # @return [Integer]
868
+ #
869
+ # @example
870
+ # require "google/cloud/bigquery"
871
+ #
872
+ # bigquery = Google::Cloud::Bigquery.new
873
+ #
874
+ # csv_url = "gs://bucket/path/to/data.csv"
875
+ # csv_table = bigquery.external csv_url do |csv|
876
+ # csv.skip_leading_rows = 1
877
+ # end
878
+ #
879
+ # csv_table.skip_leading_rows #=> 1
880
+ #
881
+ def skip_leading_rows
882
+ @gapi.csv_options.skip_leading_rows
883
+ end
884
+
885
+ ##
886
+ # Set the number of rows at the top of a CSV file that BigQuery will
887
+ # skip when reading the data.
888
+ #
889
+ # @param [Integer] row_count New skip_leading_rows value
890
+ #
891
+ # @example
892
+ # require "google/cloud/bigquery"
893
+ #
894
+ # bigquery = Google::Cloud::Bigquery.new
895
+ #
896
+ # csv_url = "gs://bucket/path/to/data.csv"
897
+ # csv_table = bigquery.external csv_url do |csv|
898
+ # csv.skip_leading_rows = 1
899
+ # end
900
+ #
901
+ # csv_table.skip_leading_rows #=> 1
902
+ #
903
+ def skip_leading_rows= row_count
904
+ frozen_check!
905
+ @gapi.csv_options.skip_leading_rows = row_count
906
+ end
907
+
908
+ ##
909
+ # The schema for the data.
910
+ #
911
+ # @param [Boolean] replace Whether to replace the existing schema with
912
+ # the new schema. If `true`, the fields will replace the existing
913
+ # schema. If `false`, the fields will be added to the existing
914
+ # schema. The default value is `false`.
915
+ # @yield [schema] a block for setting the schema
916
+ # @yieldparam [Schema] schema the object accepting the schema
917
+ #
918
+ # @return [Google::Cloud::Bigquery::Schema]
919
+ #
920
+ # @example
921
+ # require "google/cloud/bigquery"
922
+ #
923
+ # bigquery = Google::Cloud::Bigquery.new
924
+ #
925
+ # csv_url = "gs://bucket/path/to/data.csv"
926
+ # csv_table = bigquery.external csv_url do |csv|
927
+ # csv.schema do |schema|
928
+ # schema.string "name", mode: :required
929
+ # schema.string "email", mode: :required
930
+ # schema.integer "age", mode: :required
931
+ # schema.boolean "active", mode: :required
932
+ # end
933
+ # end
934
+ #
935
+ def schema replace: false
936
+ @schema ||= Schema.from_gapi @gapi.schema
937
+ if replace
938
+ frozen_check!
939
+ @schema = Schema.from_gapi
940
+ end
941
+ @schema.freeze if frozen?
942
+ yield @schema if block_given?
943
+ @schema
944
+ end
945
+
946
+ ##
947
+ # Set the schema for the data.
948
+ #
949
+ # @param [Schema] new_schema The schema object.
950
+ #
951
+ # @example
952
+ # require "google/cloud/bigquery"
953
+ #
954
+ # bigquery = Google::Cloud::Bigquery.new
955
+ #
956
+ # csv_shema = bigquery.schema do |schema|
957
+ # schema.string "name", mode: :required
958
+ # schema.string "email", mode: :required
959
+ # schema.integer "age", mode: :required
960
+ # schema.boolean "active", mode: :required
961
+ # end
962
+ #
963
+ # csv_url = "gs://bucket/path/to/data.csv"
964
+ # csv_table = bigquery.external csv_url
965
+ # csv_table.schema = csv_shema
966
+ #
967
+ def schema= new_schema
968
+ frozen_check!
969
+ @schema = new_schema
970
+ end
971
+
972
+ ##
973
+ # The fields of the schema.
974
+ #
975
+ def fields
976
+ schema.fields
977
+ end
978
+
979
+ ##
980
+ # The names of the columns in the schema.
981
+ #
982
+ def headers
983
+ schema.headers
984
+ end
985
+
986
+ ##
987
+ # @private Google API Client object.
988
+ def to_gapi
989
+ @gapi.schema = @schema.to_gapi if @schema
990
+ @gapi
991
+ end
992
+
993
+ ##
994
+ # @private Google API Client object.
995
+ def self.from_gapi gapi
996
+ new_table = super
997
+ schema = Schema.from_gapi gapi.schema
998
+ new_table.instance_variable_set :@schema, schema
999
+ new_table
1000
+ end
1001
+ end
1002
+
1003
+ ##
1004
+ # # JsonSource
1005
+ #
1006
+ # {External::JsonSource} is a subclass of {External::DataSource} and
1007
+ # represents a JSON external data source that can be queried from
1008
+ # directly, such as Google Cloud Storage or Google Drive, even though
1009
+ # the data is not stored in BigQuery. Instead of loading or streaming
1010
+ # the data, this object references the external data source.
1011
+ #
1012
+ # @example
1013
+ # require "google/cloud/bigquery"
1014
+ #
1015
+ # bigquery = Google::Cloud::Bigquery.new
1016
+ #
1017
+ # require "google/cloud/bigquery"
1018
+ #
1019
+ # bigquery = Google::Cloud::Bigquery.new
1020
+ #
1021
+ # json_url = "gs://bucket/path/to/data.json"
1022
+ # json_table = bigquery.external json_url do |json|
1023
+ # json.schema do |schema|
1024
+ # schema.string "name", mode: :required
1025
+ # schema.string "email", mode: :required
1026
+ # schema.integer "age", mode: :required
1027
+ # schema.boolean "active", mode: :required
1028
+ # end
1029
+ # end
1030
+ #
1031
+ # data = bigquery.query "SELECT * FROM my_ext_table",
1032
+ # external: { my_ext_table: json_table }
1033
+ #
1034
+ # data.each do |row|
1035
+ # puts row[:name]
1036
+ # end
1037
+ #
1038
+ class JsonSource < External::DataSource
1039
+ ##
1040
+ # The schema for the data.
1041
+ #
1042
+ # @param [Boolean] replace Whether to replace the existing schema with
1043
+ # the new schema. If `true`, the fields will replace the existing
1044
+ # schema. If `false`, the fields will be added to the existing
1045
+ # schema. The default value is `false`.
1046
+ # @yield [schema] a block for setting the schema
1047
+ # @yieldparam [Schema] schema the object accepting the schema
1048
+ #
1049
+ # @return [Google::Cloud::Bigquery::Schema]
1050
+ #
1051
+ # @example
1052
+ # require "google/cloud/bigquery"
1053
+ #
1054
+ # bigquery = Google::Cloud::Bigquery.new
1055
+ #
1056
+ # json_url = "gs://bucket/path/to/data.json"
1057
+ # json_table = bigquery.external json_url do |json|
1058
+ # json.schema do |schema|
1059
+ # schema.string "name", mode: :required
1060
+ # schema.string "email", mode: :required
1061
+ # schema.integer "age", mode: :required
1062
+ # schema.boolean "active", mode: :required
1063
+ # end
1064
+ # end
1065
+ #
1066
+ def schema replace: false
1067
+ @schema ||= Schema.from_gapi @gapi.schema
1068
+ if replace
1069
+ frozen_check!
1070
+ @schema = Schema.from_gapi
1071
+ end
1072
+ @schema.freeze if frozen?
1073
+ yield @schema if block_given?
1074
+ @schema
1075
+ end
1076
+
1077
+ ##
1078
+ # Set the schema for the data.
1079
+ #
1080
+ # @param [Schema] new_schema The schema object.
1081
+ #
1082
+ # @example
1083
+ # require "google/cloud/bigquery"
1084
+ #
1085
+ # bigquery = Google::Cloud::Bigquery.new
1086
+ #
1087
+ # json_shema = bigquery.schema do |schema|
1088
+ # schema.string "name", mode: :required
1089
+ # schema.string "email", mode: :required
1090
+ # schema.integer "age", mode: :required
1091
+ # schema.boolean "active", mode: :required
1092
+ # end
1093
+ #
1094
+ # json_url = "gs://bucket/path/to/data.json"
1095
+ # json_table = bigquery.external json_url
1096
+ # json_table.schema = json_shema
1097
+ #
1098
+ def schema= new_schema
1099
+ frozen_check!
1100
+ @schema = new_schema
1101
+ end
1102
+
1103
+ ##
1104
+ # The fields of the schema.
1105
+ #
1106
+ def fields
1107
+ schema.fields
1108
+ end
1109
+
1110
+ ##
1111
+ # The names of the columns in the schema.
1112
+ #
1113
+ def headers
1114
+ schema.headers
1115
+ end
1116
+
1117
+ ##
1118
+ # @private Google API Client object.
1119
+ def to_gapi
1120
+ @gapi.schema = @schema.to_gapi if @schema
1121
+ @gapi
1122
+ end
1123
+
1124
+ ##
1125
+ # @private Google API Client object.
1126
+ def self.from_gapi gapi
1127
+ new_table = super
1128
+ schema = Schema.from_gapi gapi.schema
1129
+ new_table.instance_variable_set :@schema, schema
1130
+ new_table
1131
+ end
1132
+ end
1133
+
1134
+ ##
1135
+ # # SheetsSource
1136
+ #
1137
+ # {External::SheetsSource} is a subclass of {External::DataSource} and
1138
+ # represents a Google Sheets external data source that can be queried
1139
+ # from directly, even though the data is not stored in BigQuery. Instead
1140
+ # of loading or streaming the data, this object references the external
1141
+ # data source.
1142
+ #
1143
+ # @example
1144
+ # require "google/cloud/bigquery"
1145
+ #
1146
+ # bigquery = Google::Cloud::Bigquery.new
1147
+ #
1148
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
1149
+ # sheets_table = bigquery.external sheets_url do |sheets|
1150
+ # sheets.skip_leading_rows = 1
1151
+ # end
1152
+ #
1153
+ # data = bigquery.query "SELECT * FROM my_ext_table",
1154
+ # external: { my_ext_table: sheets_table }
1155
+ #
1156
+ # data.each do |row|
1157
+ # puts row[:name]
1158
+ # end
1159
+ #
1160
+ class SheetsSource < External::DataSource
1161
+ ##
1162
+ # @private Create an empty SheetsSource object.
1163
+ def initialize
1164
+ super
1165
+ @gapi.google_sheets_options = \
1166
+ Google::Apis::BigqueryV2::GoogleSheetsOptions.new
1167
+ end
1168
+
1169
+ ##
1170
+ # The number of rows at the top of a sheet that BigQuery will skip
1171
+ # when reading the data. The default value is `0`.
1172
+ #
1173
+ # This property is useful if you have header rows that should be
1174
+ # skipped. When `autodetect` is on, behavior is the following:
1175
+ #
1176
+ # * `nil` - Autodetect tries to detect headers in the first row. If
1177
+ # they are not detected, the row is read as data. Otherwise data is
1178
+ # read starting from the second row.
1179
+ # * `0` - Instructs autodetect that there are no headers and data
1180
+ # should be read starting from the first row.
1181
+ # * `N > 0` - Autodetect skips `N-1` rows and tries to detect headers
1182
+ # in row `N`. If headers are not detected, row `N` is just skipped.
1183
+ # Otherwise row `N` is used to extract column names for the detected
1184
+ # schema.
1185
+ #
1186
+ # @return [Integer]
1187
+ #
1188
+ # @example
1189
+ # require "google/cloud/bigquery"
1190
+ #
1191
+ # bigquery = Google::Cloud::Bigquery.new
1192
+ #
1193
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
1194
+ # sheets_table = bigquery.external sheets_url do |sheets|
1195
+ # sheets.skip_leading_rows = 1
1196
+ # end
1197
+ #
1198
+ # sheets_table.skip_leading_rows #=> 1
1199
+ #
1200
+ def skip_leading_rows
1201
+ @gapi.google_sheets_options.skip_leading_rows
1202
+ end
1203
+
1204
+ ##
1205
+ # Set the number of rows at the top of a sheet that BigQuery will skip
1206
+ # when reading the data.
1207
+ #
1208
+ # @param [Integer] row_count New skip_leading_rows value
1209
+ #
1210
+ # @example
1211
+ # require "google/cloud/bigquery"
1212
+ #
1213
+ # bigquery = Google::Cloud::Bigquery.new
1214
+ #
1215
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
1216
+ # sheets_table = bigquery.external sheets_url do |sheets|
1217
+ # sheets.skip_leading_rows = 1
1218
+ # end
1219
+ #
1220
+ # sheets_table.skip_leading_rows #=> 1
1221
+ #
1222
+ def skip_leading_rows= row_count
1223
+ frozen_check!
1224
+ @gapi.google_sheets_options.skip_leading_rows = row_count
1225
+ end
1226
+ end
1227
+
1228
+ ##
1229
+ # # BigtableSource
1230
+ #
1231
+ # {External::BigtableSource} is a subclass of {External::DataSource} and
1232
+ # represents a Bigtable external data source that can be queried from
1233
+ # directly, even though the data is not stored in BigQuery. Instead of
1234
+ # loading or streaming the data, this object references the external
1235
+ # data source.
1236
+ #
1237
+ # @example
1238
+ # require "google/cloud/bigquery"
1239
+ #
1240
+ # bigquery = Google::Cloud::Bigquery.new
1241
+ #
1242
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1243
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1244
+ # bt.rowkey_as_string = true
1245
+ # bt.add_family "user" do |u|
1246
+ # u.add_string "name"
1247
+ # u.add_string "email"
1248
+ # u.add_integer "age"
1249
+ # u.add_boolean "active"
1250
+ # end
1251
+ # end
1252
+ #
1253
+ # data = bigquery.query "SELECT * FROM my_ext_table",
1254
+ # external: { my_ext_table: bigtable_table }
1255
+ #
1256
+ # data.each do |row|
1257
+ # puts row[:name]
1258
+ # end
1259
+ #
1260
+ class BigtableSource < External::DataSource
1261
+ ##
1262
+ # @private Create an empty BigtableSource object.
1263
+ def initialize
1264
+ super
1265
+ @gapi.bigtable_options = \
1266
+ Google::Apis::BigqueryV2::BigtableOptions.new
1267
+ @families = []
1268
+ end
1269
+
1270
+ ##
1271
+ # List of column families to expose in the table schema along with
1272
+ # their types. This list restricts the column families that can be
1273
+ # referenced in queries and specifies their value types. You can use
1274
+ # this list to do type conversions - see
1275
+ # {BigtableSource::ColumnFamily#type} for more details. If you leave
1276
+ # this list empty, all column families are present in the table schema
1277
+ # and their values are read as `BYTES`. During a query only the column
1278
+ # families referenced in that query are read from Bigtable.
1279
+ #
1280
+ # @return [Array<BigtableSource::ColumnFamily>]
1281
+ #
1282
+ # @example
1283
+ # require "google/cloud/bigquery"
1284
+ #
1285
+ # bigquery = Google::Cloud::Bigquery.new
1286
+ #
1287
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1288
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1289
+ # bt.rowkey_as_string = true
1290
+ # bt.add_family "user" do |u|
1291
+ # u.add_string "name"
1292
+ # u.add_string "email"
1293
+ # u.add_integer "age"
1294
+ # u.add_boolean "active"
1295
+ # end
1296
+ # end
1297
+ #
1298
+ # bigtable_table.families.count #=> 1
1299
+ #
1300
+ def families
1301
+ @families
1302
+ end
1303
+
1304
+ ##
1305
+ # Add a column family to expose in the table schema along with its
1306
+ # types. Columns belonging to the column family may also be exposed.
1307
+ #
1308
+ # @param [String] family_id Identifier of the column family. See
1309
+ # {BigtableSource::ColumnFamily#family_id}.
1310
+ # @param [String] encoding The encoding of the values when the type is
1311
+ # not `STRING`. See {BigtableSource::ColumnFamily#encoding}.
1312
+ # @param [Boolean] latest Whether only the latest version of value are
1313
+ # exposed for all columns in this column family. See
1314
+ # {BigtableSource::ColumnFamily#latest}.
1315
+ # @param [String] type The type to convert the value in cells of this
1316
+ # column. See {BigtableSource::ColumnFamily#type}.
1317
+ #
1318
+ # @yield [family] a block for setting the family
1319
+ # @yieldparam [BigtableSource::ColumnFamily] family the family object
1320
+ #
1321
+ # @return [BigtableSource::ColumnFamily]
1322
+ #
1323
+ # @example
1324
+ # require "google/cloud/bigquery"
1325
+ #
1326
+ # bigquery = Google::Cloud::Bigquery.new
1327
+ #
1328
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1329
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1330
+ # bt.rowkey_as_string = true
1331
+ # bt.add_family "user" do |u|
1332
+ # u.add_string "name"
1333
+ # u.add_string "email"
1334
+ # u.add_integer "age"
1335
+ # u.add_boolean "active"
1336
+ # end
1337
+ # end
1338
+ #
1339
+ def add_family family_id, encoding: nil, latest: nil, type: nil
1340
+ frozen_check!
1341
+ fam = BigtableSource::ColumnFamily.new
1342
+ fam.family_id = family_id
1343
+ fam.encoding = encoding if encoding
1344
+ fam.latest = latest if latest
1345
+ fam.type = type if type
1346
+ yield fam if block_given?
1347
+ @families << fam
1348
+ fam
1349
+ end
1350
+
1351
+ ##
1352
+ # Whether the rowkey column families will be read and converted to
1353
+ # string. Otherwise they are read with `BYTES` type values and users
1354
+ # need to manually cast them with `CAST` if necessary. The default
1355
+ # value is `false`.
1356
+ #
1357
+ # @return [Boolean]
1358
+ #
1359
+ # @example
1360
+ # require "google/cloud/bigquery"
1361
+ #
1362
+ # bigquery = Google::Cloud::Bigquery.new
1363
+ #
1364
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1365
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1366
+ # bt.rowkey_as_string = true
1367
+ # end
1368
+ #
1369
+ # bigtable_table.rowkey_as_string #=> true
1370
+ #
1371
+ def rowkey_as_string
1372
+ @gapi.bigtable_options.read_rowkey_as_string
1373
+ end
1374
+
1375
+ ##
1376
+ # Set the number of rows at the top of a sheet that BigQuery will skip
1377
+ # when reading the data.
1378
+ #
1379
+ # @param [Boolean] row_rowkey New rowkey_as_string value
1380
+ #
1381
+ # @example
1382
+ # require "google/cloud/bigquery"
1383
+ #
1384
+ # bigquery = Google::Cloud::Bigquery.new
1385
+ #
1386
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1387
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1388
+ # bt.rowkey_as_string = true
1389
+ # end
1390
+ #
1391
+ # bigtable_table.rowkey_as_string #=> true
1392
+ #
1393
+ def rowkey_as_string= row_rowkey
1394
+ frozen_check!
1395
+ @gapi.bigtable_options.read_rowkey_as_string = row_rowkey
1396
+ end
1397
+
1398
+ ##
1399
+ # @private Google API Client object.
1400
+ def to_gapi
1401
+ @gapi.bigtable_options.column_families = @families.map(&:to_gapi)
1402
+ @gapi
1403
+ end
1404
+
1405
+ ##
1406
+ # @private Google API Client object.
1407
+ def self.from_gapi gapi
1408
+ new_table = super
1409
+ families = Array gapi.bigtable_options.column_families
1410
+ families = families.map do |fam_gapi|
1411
+ BigtableSource::ColumnFamily.from_gapi fam_gapi
1412
+ end
1413
+ new_table.instance_variable_set :@families, families
1414
+ new_table
1415
+ end
1416
+
1417
+ ##
1418
+ # @private
1419
+ def freeze
1420
+ @families.map(&:freeze!)
1421
+ @families.freeze!
1422
+ super
1423
+ end
1424
+
1425
+ protected
1426
+
1427
+ def frozen_check!
1428
+ return unless frozen?
1429
+ fail ArgumentError, "Cannot modify external data source when frozen"
1430
+ end
1431
+
1432
+ ##
1433
+ # # BigtableSource::ColumnFamily
1434
+ #
1435
+ # A Bigtable column family used to expose in the table schema along
1436
+ # with its types and columns.
1437
+ #
1438
+ # @example
1439
+ # require "google/cloud/bigquery"
1440
+ #
1441
+ # bigquery = Google::Cloud::Bigquery.new
1442
+ #
1443
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1444
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1445
+ # bt.rowkey_as_string = true
1446
+ # bt.add_family "user" do |u|
1447
+ # u.add_string "name"
1448
+ # u.add_string "email"
1449
+ # u.add_integer "age"
1450
+ # u.add_boolean "active"
1451
+ # end
1452
+ # end
1453
+ #
1454
+ # data = bigquery.query "SELECT * FROM my_ext_table",
1455
+ # external: { my_ext_table: bigtable_table }
1456
+ #
1457
+ # data.each do |row|
1458
+ # puts row[:name]
1459
+ # end
1460
+ #
1461
+ class ColumnFamily
1462
+ ##
1463
+ # @private Create an empty BigtableSource::ColumnFamily object.
1464
+ def initialize
1465
+ @gapi = Google::Apis::BigqueryV2::BigtableColumnFamily.new
1466
+ @columns = []
1467
+ end
1468
+
1469
+ ##
1470
+ # The encoding of the values when the type is not `STRING`.
1471
+ #
1472
+ # @return [String]
1473
+ #
1474
+ # @example
1475
+ # require "google/cloud/bigquery"
1476
+ #
1477
+ # bigquery = Google::Cloud::Bigquery.new
1478
+ #
1479
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1480
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1481
+ # bt.add_family "user" do |u|
1482
+ # u.encoding = "UTF-8"
1483
+ # end
1484
+ # end
1485
+ #
1486
+ # bigtable_table.families[0].encoding #=> "UTF-8"
1487
+ #
1488
+ def encoding
1489
+ @gapi.encoding
1490
+ end
1491
+
1492
+ ##
1493
+ # Set the encoding of the values when the type is not `STRING`.
1494
+ # Acceptable encoding values are:
1495
+ #
1496
+ # * `TEXT` - indicates values are alphanumeric text strings.
1497
+ # * `BINARY` - indicates values are encoded using HBase
1498
+ # `Bytes.toBytes` family of functions. This can be overridden on a
1499
+ # column.
1500
+ #
1501
+ # @param [String] new_encoding New encoding value
1502
+ #
1503
+ # @example
1504
+ # require "google/cloud/bigquery"
1505
+ #
1506
+ # bigquery = Google::Cloud::Bigquery.new
1507
+ #
1508
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1509
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1510
+ # bt.add_family "user" do |u|
1511
+ # u.encoding = "UTF-8"
1512
+ # end
1513
+ # end
1514
+ #
1515
+ # bigtable_table.families[0].encoding #=> "UTF-8"
1516
+ #
1517
+ def encoding= new_encoding
1518
+ frozen_check!
1519
+ @gapi.encoding = new_encoding
1520
+ end
1521
+
1522
+ ##
1523
+ # Identifier of the column family.
1524
+ #
1525
+ # @return [String]
1526
+ #
1527
+ # @example
1528
+ # require "google/cloud/bigquery"
1529
+ #
1530
+ # bigquery = Google::Cloud::Bigquery.new
1531
+ #
1532
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1533
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1534
+ # bt.add_family "user"
1535
+ # end
1536
+ #
1537
+ # bigtable_table.families[0].family_id #=> "user"
1538
+ #
1539
+ def family_id
1540
+ @gapi.family_id
1541
+ end
1542
+
1543
+ ##
1544
+ # Set the identifier of the column family.
1545
+ #
1546
+ # @param [String] new_family_id New family_id value
1547
+ #
1548
+ # @example
1549
+ # require "google/cloud/bigquery"
1550
+ #
1551
+ # bigquery = Google::Cloud::Bigquery.new
1552
+ #
1553
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1554
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1555
+ # bt.add_family "user"
1556
+ # end
1557
+ #
1558
+ # bigtable_table.families[0].family_id #=> "user"
1559
+ # bigtable_table.families[0].family_id = "User"
1560
+ # bigtable_table.families[0].family_id #=> "User"
1561
+ #
1562
+ def family_id= new_family_id
1563
+ frozen_check!
1564
+ @gapi.family_id = new_family_id
1565
+ end
1566
+
1567
+ ##
1568
+ # Whether only the latest version of value are exposed for all
1569
+ # columns in this column family.
1570
+ #
1571
+ # @return [Boolean]
1572
+ #
1573
+ # @example
1574
+ # require "google/cloud/bigquery"
1575
+ #
1576
+ # bigquery = Google::Cloud::Bigquery.new
1577
+ #
1578
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1579
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1580
+ # bt.add_family "user" do |u|
1581
+ # u.latest = true
1582
+ # end
1583
+ # end
1584
+ #
1585
+ # bigtable_table.families[0].latest #=> true
1586
+ #
1587
+ def latest
1588
+ @gapi.only_read_latest
1589
+ end
1590
+
1591
+ ##
1592
+ # Set whether only the latest version of value are exposed for all
1593
+ # columns in this column family.
1594
+ #
1595
+ # @param [Boolean] new_latest New latest value
1596
+ #
1597
+ # @example
1598
+ # require "google/cloud/bigquery"
1599
+ #
1600
+ # bigquery = Google::Cloud::Bigquery.new
1601
+ #
1602
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1603
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1604
+ # bt.add_family "user" do |u|
1605
+ # u.latest = true
1606
+ # end
1607
+ # end
1608
+ #
1609
+ # bigtable_table.families[0].latest #=> true
1610
+ #
1611
+ def latest= new_latest
1612
+ frozen_check!
1613
+ @gapi.only_read_latest = new_latest
1614
+ end
1615
+
1616
+ ##
1617
+ # The type to convert the value in cells of this column family. The
1618
+ # values are expected to be encoded using HBase `Bytes.toBytes`
1619
+ # function when using the `BINARY` encoding value. The following
1620
+ # BigQuery types are allowed:
1621
+ #
1622
+ # * `BYTES`
1623
+ # * `STRING`
1624
+ # * `INTEGER`
1625
+ # * `FLOAT`
1626
+ # * `BOOLEAN`
1627
+ #
1628
+ # Default type is `BYTES`. This can be overridden on a column.
1629
+ #
1630
+ # @return [String]
1631
+ #
1632
+ # @example
1633
+ # require "google/cloud/bigquery"
1634
+ #
1635
+ # bigquery = Google::Cloud::Bigquery.new
1636
+ #
1637
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1638
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1639
+ # bt.add_family "user" do |u|
1640
+ # u.type = "STRING"
1641
+ # end
1642
+ # end
1643
+ #
1644
+ # bigtable_table.families[0].type #=> "STRING"
1645
+ #
1646
+ def type
1647
+ @gapi.type
1648
+ end
1649
+
1650
+ ##
1651
+ # Set the type to convert the value in cells of this column family.
1652
+ # The values are expected to be encoded using HBase `Bytes.toBytes`
1653
+ # function when using the `BINARY` encoding value. The following
1654
+ # BigQuery types are allowed:
1655
+ #
1656
+ # * `BYTES`
1657
+ # * `STRING`
1658
+ # * `INTEGER`
1659
+ # * `FLOAT`
1660
+ # * `BOOLEAN`
1661
+ #
1662
+ # Default type is `BYTES`. This can be overridden on a column.
1663
+ #
1664
+ # @param [String] new_type New type value
1665
+ #
1666
+ # @example
1667
+ # require "google/cloud/bigquery"
1668
+ #
1669
+ # bigquery = Google::Cloud::Bigquery.new
1670
+ #
1671
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1672
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1673
+ # bt.add_family "user" do |u|
1674
+ # u.type = "STRING"
1675
+ # end
1676
+ # end
1677
+ #
1678
+ # bigtable_table.families[0].type #=> "STRING"
1679
+ #
1680
+ def type= new_type
1681
+ frozen_check!
1682
+ @gapi.type = new_type
1683
+ end
1684
+
1685
+ ##
1686
+ # Lists of columns that should be exposed as individual fields.
1687
+ #
1688
+ # @return [Array<BigtableSource::Column>]
1689
+ #
1690
+ # @example
1691
+ # require "google/cloud/bigquery"
1692
+ #
1693
+ # bigquery = Google::Cloud::Bigquery.new
1694
+ #
1695
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1696
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1697
+ # bt.rowkey_as_string = true
1698
+ # bt.add_family "user" do |u|
1699
+ # u.add_string "name"
1700
+ # u.add_string "email"
1701
+ # u.add_integer "age"
1702
+ # u.add_boolean "active"
1703
+ # end
1704
+ # end
1705
+ #
1706
+ # bigtable_table.families[0].columns.count #=> 4
1707
+ #
1708
+ def columns
1709
+ @columns
1710
+ end
1711
+
1712
+ ##
1713
+ # Add a column to the column family to expose in the table schema
1714
+ # along with its types.
1715
+ #
1716
+ # @param [String] qualifier Qualifier of the column. See
1717
+ # {BigtableSource::Column#qualifier}.
1718
+ # @param [String] as A valid identifier to be used as the column
1719
+ # field name if the qualifier is not a valid BigQuery field
1720
+ # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1721
+ # {BigtableSource::Column#field_name}.
1722
+ # @param [String] type The type to convert the value in cells of
1723
+ # this column. See {BigtableSource::Column#type}. The following
1724
+ # BigQuery types are allowed:
1725
+ #
1726
+ # * `BYTES`
1727
+ # * `STRING`
1728
+ # * `INTEGER`
1729
+ # * `FLOAT`
1730
+ # * `BOOLEAN`
1731
+ #
1732
+ # @yield [column] a block for setting the column
1733
+ # @yieldparam [BigtableSource::Column] column the column object
1734
+ #
1735
+ # @return [Array<BigtableSource::Column>]
1736
+ #
1737
+ # @example
1738
+ # require "google/cloud/bigquery"
1739
+ #
1740
+ # bigquery = Google::Cloud::Bigquery.new
1741
+ #
1742
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1743
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1744
+ # bt.rowkey_as_string = true
1745
+ # bt.add_family "user" do |u|
1746
+ # u.add_column "name", type: "STRING"
1747
+ # end
1748
+ # end
1749
+ #
1750
+ def add_column qualifier, as: nil, type: nil
1751
+ frozen_check!
1752
+ col = BigtableSource::Column.new
1753
+ col.qualifier = qualifier
1754
+ col.field_name = as if as
1755
+ col.type = type if type
1756
+ yield col if block_given?
1757
+ @columns << col
1758
+ col
1759
+ end
1760
+
1761
+ ##
1762
+ # Add a column to the column family to expose in the table schema
1763
+ # that is specified as the `BYTES` type.
1764
+ #
1765
+ # @param [String] qualifier Qualifier of the column. See
1766
+ # {BigtableSource::Column#qualifier}.
1767
+ # @param [String] as A valid identifier to be used as the column
1768
+ # field name if the qualifier is not a valid BigQuery field
1769
+ # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1770
+ # {BigtableSource::Column#field_name}.
1771
+ #
1772
+ # @yield [column] a block for setting the column
1773
+ # @yieldparam [BigtableSource::Column] column the column object
1774
+ #
1775
+ # @return [Array<BigtableSource::Column>]
1776
+ #
1777
+ # @example
1778
+ # require "google/cloud/bigquery"
1779
+ #
1780
+ # bigquery = Google::Cloud::Bigquery.new
1781
+ #
1782
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1783
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1784
+ # bt.rowkey_as_string = true
1785
+ # bt.add_family "user" do |u|
1786
+ # u.add_bytes "avatar"
1787
+ # end
1788
+ # end
1789
+ #
1790
+ def add_bytes qualifier, as: nil
1791
+ col = add_column qualifier, as: as, type: "BYTES"
1792
+ yield col if block_given?
1793
+ col
1794
+ end
1795
+
1796
+ ##
1797
+ # Add a column to the column family to expose in the table schema
1798
+ # that is specified as the `STRING` type.
1799
+ #
1800
+ # @param [String] qualifier Qualifier of the column. See
1801
+ # {BigtableSource::Column#qualifier}.
1802
+ # @param [String] as A valid identifier to be used as the column
1803
+ # field name if the qualifier is not a valid BigQuery field
1804
+ # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1805
+ # {BigtableSource::Column#field_name}.
1806
+ #
1807
+ # @yield [column] a block for setting the column
1808
+ # @yieldparam [BigtableSource::Column] column the column object
1809
+ #
1810
+ # @return [Array<BigtableSource::Column>]
1811
+ #
1812
+ # @example
1813
+ # require "google/cloud/bigquery"
1814
+ #
1815
+ # bigquery = Google::Cloud::Bigquery.new
1816
+ #
1817
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1818
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1819
+ # bt.rowkey_as_string = true
1820
+ # bt.add_family "user" do |u|
1821
+ # u.add_string "name"
1822
+ # end
1823
+ # end
1824
+ #
1825
+ def add_string qualifier, as: nil
1826
+ col = add_column qualifier, as: as, type: "STRING"
1827
+ yield col if block_given?
1828
+ col
1829
+ end
1830
+
1831
+ ##
1832
+ # Add a column to the column family to expose in the table schema
1833
+ # that is specified as the `INTEGER` type.
1834
+ #
1835
+ # @param [String] qualifier Qualifier of the column. See
1836
+ # {BigtableSource::Column#qualifier}.
1837
+ # @param [String] as A valid identifier to be used as the column
1838
+ # field name if the qualifier is not a valid BigQuery field
1839
+ # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1840
+ # {BigtableSource::Column#field_name}.
1841
+ #
1842
+ # @yield [column] a block for setting the column
1843
+ # @yieldparam [BigtableSource::Column] column the column object
1844
+ #
1845
+ # @return [Array<BigtableSource::Column>]
1846
+ #
1847
+ # @example
1848
+ # require "google/cloud/bigquery"
1849
+ #
1850
+ # bigquery = Google::Cloud::Bigquery.new
1851
+ #
1852
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1853
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1854
+ # bt.rowkey_as_string = true
1855
+ # bt.add_family "user" do |u|
1856
+ # u.add_integer "age"
1857
+ # end
1858
+ # end
1859
+ #
1860
+ def add_integer qualifier, as: nil
1861
+ col = add_column qualifier, as: as, type: "INTEGER"
1862
+ yield col if block_given?
1863
+ col
1864
+ end
1865
+
1866
+ ##
1867
+ # Add a column to the column family to expose in the table schema
1868
+ # that is specified as the `FLOAT` type.
1869
+ #
1870
+ # @param [String] qualifier Qualifier of the column. See
1871
+ # {BigtableSource::Column#qualifier}.
1872
+ # @param [String] as A valid identifier to be used as the column
1873
+ # field name if the qualifier is not a valid BigQuery field
1874
+ # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1875
+ # {BigtableSource::Column#field_name}.
1876
+ #
1877
+ # @yield [column] a block for setting the column
1878
+ # @yieldparam [BigtableSource::Column] column the column object
1879
+ #
1880
+ # @return [Array<BigtableSource::Column>]
1881
+ #
1882
+ # @example
1883
+ # require "google/cloud/bigquery"
1884
+ #
1885
+ # bigquery = Google::Cloud::Bigquery.new
1886
+ #
1887
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1888
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1889
+ # bt.rowkey_as_string = true
1890
+ # bt.add_family "user" do |u|
1891
+ # u.add_float "score"
1892
+ # end
1893
+ # end
1894
+ #
1895
+ def add_float qualifier, as: nil
1896
+ col = add_column qualifier, as: as, type: "FLOAT"
1897
+ yield col if block_given?
1898
+ col
1899
+ end
1900
+
1901
+ ##
1902
+ # Add a column to the column family to expose in the table schema
1903
+ # that is specified as the `BOOLEAN` type.
1904
+ #
1905
+ # @param [String] qualifier Qualifier of the column. See
1906
+ # {BigtableSource::Column#qualifier}.
1907
+ # @param [String] as A valid identifier to be used as the column
1908
+ # field name if the qualifier is not a valid BigQuery field
1909
+ # identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
1910
+ # {BigtableSource::Column#field_name}.
1911
+ #
1912
+ # @yield [column] a block for setting the column
1913
+ # @yieldparam [BigtableSource::Column] column the column object
1914
+ #
1915
+ # @return [Array<BigtableSource::Column>]
1916
+ #
1917
+ # @example
1918
+ # require "google/cloud/bigquery"
1919
+ #
1920
+ # bigquery = Google::Cloud::Bigquery.new
1921
+ #
1922
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1923
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1924
+ # bt.rowkey_as_string = true
1925
+ # bt.add_family "user" do |u|
1926
+ # u.add_boolean "active"
1927
+ # end
1928
+ # end
1929
+ #
1930
+ def add_boolean qualifier, as: nil
1931
+ col = add_column qualifier, as: as, type: "BOOLEAN"
1932
+ yield col if block_given?
1933
+ col
1934
+ end
1935
+
1936
+ ##
1937
+ # @private Google API Client object.
1938
+ def to_gapi
1939
+ @gapi.columns = @columns.map(&:to_gapi)
1940
+ @gapi
1941
+ end
1942
+
1943
+ ##
1944
+ # @private Google API Client object.
1945
+ def self.from_gapi gapi
1946
+ new_fam = new
1947
+ new_fam.instance_variable_set :@gapi, gapi
1948
+ columns = Array(gapi.columns).map do |col_gapi|
1949
+ BigtableSource::Column.from_gapi col_gapi
1950
+ end
1951
+ new_fam.instance_variable_set :@columns, columns
1952
+ new_fam
1953
+ end
1954
+
1955
+ ##
1956
+ # @private
1957
+ def freeze
1958
+ @columns.map(&:freeze!)
1959
+ @columns.freeze!
1960
+ super
1961
+ end
1962
+
1963
+ protected
1964
+
1965
+ def frozen_check!
1966
+ return unless frozen?
1967
+ fail ArgumentError,
1968
+ "Cannot modify external data source when frozen"
1969
+ end
1970
+ end
1971
+
1972
+ ##
1973
+ # # BigtableSource::Column
1974
+ #
1975
+ # A Bigtable column to expose in the table schema along with its
1976
+ # types.
1977
+ #
1978
+ # @example
1979
+ # require "google/cloud/bigquery"
1980
+ #
1981
+ # bigquery = Google::Cloud::Bigquery.new
1982
+ #
1983
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
1984
+ # bigtable_table = bigquery.external bigtable_url do |bt|
1985
+ # bt.rowkey_as_string = true
1986
+ # bt.add_family "user" do |u|
1987
+ # u.add_string "name"
1988
+ # u.add_string "email"
1989
+ # u.add_integer "age"
1990
+ # u.add_boolean "active"
1991
+ # end
1992
+ # end
1993
+ #
1994
+ # data = bigquery.query "SELECT * FROM my_ext_table",
1995
+ # external: { my_ext_table: bigtable_table }
1996
+ #
1997
+ # data.each do |row|
1998
+ # puts row[:name]
1999
+ # end
2000
+ #
2001
+ class Column
2002
+ ##
2003
+ # @private Create an empty BigtableSource::Column object.
2004
+ def initialize
2005
+ @gapi = Google::Apis::BigqueryV2::BigtableColumn.new
2006
+ end
2007
+
2008
+ ##
2009
+ # Qualifier of the column. Columns in the parent column family that
2010
+ # has this exact qualifier are exposed as `.` field. If the
2011
+ # qualifier is valid UTF-8 string, it will be represented as a UTF-8
2012
+ # string. Otherwise, it will represented as a ASCII-8BIT string.
2013
+ #
2014
+ # If the qualifier is not a valid BigQuery field identifier (does
2015
+ # not match `[a-zA-Z][a-zA-Z0-9_]*`) a valid identifier must be
2016
+ # provided as `field_name`.
2017
+ #
2018
+ # @return [String]
2019
+ #
2020
+ # @example
2021
+ # require "google/cloud/bigquery"
2022
+ #
2023
+ # bigquery = Google::Cloud::Bigquery.new
2024
+ #
2025
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2026
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2027
+ # bt.add_family "user" do |u|
2028
+ # u.add_string "name" do |col|
2029
+ # col.qualifier # "user"
2030
+ # col.qualifier = "User"
2031
+ # col.qualifier # "User"
2032
+ # end
2033
+ # end
2034
+ # end
2035
+ #
2036
+ def qualifier
2037
+ @gapi.qualifier_string || \
2038
+ Base64.strict_decode64(@gapi.qualifier_encoded.to_s)
2039
+ end
2040
+
2041
+ ##
2042
+ # Set the qualifier of the column. Columns in the parent column
2043
+ # family that has this exact qualifier are exposed as `.` field.
2044
+ # Values that are valid UTF-8 strings will be treated as such. All
2045
+ # other values will be treated as `BINARY`.
2046
+ #
2047
+ # @param [String] new_qualifier New qualifier value
2048
+ #
2049
+ # @example
2050
+ # require "google/cloud/bigquery"
2051
+ #
2052
+ # bigquery = Google::Cloud::Bigquery.new
2053
+ #
2054
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2055
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2056
+ # bt.add_family "user" do |u|
2057
+ # u.add_string "name" do |col|
2058
+ # col.qualifier # "user"
2059
+ # col.qualifier = "User"
2060
+ # col.qualifier # "User"
2061
+ # end
2062
+ # end
2063
+ # end
2064
+ #
2065
+ def qualifier= new_qualifier
2066
+ frozen_check!
2067
+ fail ArgumentError if new_qualifier.nil?
2068
+
2069
+ utf8_qualifier = new_qualifier.encode Encoding::UTF_8
2070
+ if utf8_qualifier.valid_encoding?
2071
+ @gapi.qualifier_string = utf8_qualifier
2072
+ if @gapi.instance_variables.include? :@qualifier_encoded
2073
+ @gapi.remove_instance_variable :@qualifier_encoded
2074
+ end
2075
+ else
2076
+ @gapi.qualifier_encoded = Base64.strict_encode64 new_qualifier
2077
+ if @gapi.instance_variables.include? :@qualifier_string
2078
+ @gapi.remove_instance_variable :@qualifier_string
2079
+ end
2080
+ end
2081
+ rescue EncodingError
2082
+ @gapi.qualifier_encoded = Base64.strict_encode64 new_qualifier
2083
+ if @gapi.instance_variables.include? :@qualifier_string
2084
+ @gapi.remove_instance_variable :@qualifier_string
2085
+ end
2086
+ end
2087
+
2088
+ ##
2089
+ # The encoding of the values when the type is not `STRING`.
2090
+ #
2091
+ # @return [String]
2092
+ #
2093
+ # @example
2094
+ # require "google/cloud/bigquery"
2095
+ #
2096
+ # bigquery = Google::Cloud::Bigquery.new
2097
+ #
2098
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2099
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2100
+ # bt.add_family "user" do |u|
2101
+ # u.add_bytes "name" do |col|
2102
+ # col.encoding = "TEXT"
2103
+ # col.encoding # "TEXT"
2104
+ # end
2105
+ # end
2106
+ # end
2107
+ #
2108
+ def encoding
2109
+ @gapi.encoding
2110
+ end
2111
+
2112
+ ##
2113
+ # Set the encoding of the values when the type is not `STRING`.
2114
+ # Acceptable encoding values are:
2115
+ #
2116
+ # * `TEXT` - indicates values are alphanumeric text strings.
2117
+ # * `BINARY` - indicates values are encoded using HBase
2118
+ # `Bytes.toBytes` family of functions. This can be overridden on a
2119
+ # column.
2120
+ #
2121
+ # @param [String] new_encoding New encoding value
2122
+ #
2123
+ # @example
2124
+ # require "google/cloud/bigquery"
2125
+ #
2126
+ # bigquery = Google::Cloud::Bigquery.new
2127
+ #
2128
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2129
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2130
+ # bt.add_family "user" do |u|
2131
+ # u.add_bytes "name" do |col|
2132
+ # col.encoding = "TEXT"
2133
+ # col.encoding # "TEXT"
2134
+ # end
2135
+ # end
2136
+ # end
2137
+ #
2138
+ def encoding= new_encoding
2139
+ frozen_check!
2140
+ @gapi.encoding = new_encoding
2141
+ end
2142
+
2143
+ ##
2144
+ # If the qualifier is not a valid BigQuery field identifier (does
2145
+ # not match `[a-zA-Z][a-zA-Z0-9_]*`) a valid identifier must be
2146
+ # provided as the column field name and is used as field name in
2147
+ # queries.
2148
+ #
2149
+ # @return [String]
2150
+ #
2151
+ # @example
2152
+ # require "google/cloud/bigquery"
2153
+ #
2154
+ # bigquery = Google::Cloud::Bigquery.new
2155
+ #
2156
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2157
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2158
+ # bt.add_family "user" do |u|
2159
+ # u.add_string "001_name", as: "user" do |col|
2160
+ # col.field_name # "user"
2161
+ # col.field_name = "User"
2162
+ # col.field_name # "User"
2163
+ # end
2164
+ # end
2165
+ # end
2166
+ #
2167
+ def field_name
2168
+ @gapi.field_name
2169
+ end
2170
+
2171
+ ##
2172
+ # Sets the identifier to be used as the column field name in queries
2173
+ # when the qualifier is not a valid BigQuery field identifier (does
2174
+ # not match `[a-zA-Z][a-zA-Z0-9_]*`).
2175
+ #
2176
+ # @param [String] new_field_name New field_name value
2177
+ #
2178
+ # @example
2179
+ # require "google/cloud/bigquery"
2180
+ #
2181
+ # bigquery = Google::Cloud::Bigquery.new
2182
+ #
2183
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2184
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2185
+ # bt.add_family "user" do |u|
2186
+ # u.add_string "001_name", as: "user" do |col|
2187
+ # col.field_name # "user"
2188
+ # col.field_name = "User"
2189
+ # col.field_name # "User"
2190
+ # end
2191
+ # end
2192
+ # end
2193
+ #
2194
+ def field_name= new_field_name
2195
+ frozen_check!
2196
+ @gapi.field_name = new_field_name
2197
+ end
2198
+
2199
+ ##
2200
+ # Whether only the latest version of value in this column are
2201
+ # exposed. Can also be set at the column family level. However, this
2202
+ # value takes precedence when set at both levels.
2203
+ #
2204
+ # @return [Boolean]
2205
+ #
2206
+ # @example
2207
+ # require "google/cloud/bigquery"
2208
+ #
2209
+ # bigquery = Google::Cloud::Bigquery.new
2210
+ #
2211
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2212
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2213
+ # bt.add_family "user" do |u|
2214
+ # u.add_string "name" do |col|
2215
+ # col.latest = true
2216
+ # col.latest # true
2217
+ # end
2218
+ # end
2219
+ # end
2220
+ #
2221
+ def latest
2222
+ @gapi.only_read_latest
2223
+ end
2224
+
2225
+ ##
2226
+ # Set whether only the latest version of value in this column are
2227
+ # exposed. Can also be set at the column family level. However, this
2228
+ # value takes precedence when set at both levels.
2229
+ #
2230
+ # @param [Boolean] new_latest New latest value
2231
+ #
2232
+ # @example
2233
+ # require "google/cloud/bigquery"
2234
+ #
2235
+ # bigquery = Google::Cloud::Bigquery.new
2236
+ #
2237
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2238
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2239
+ # bt.add_family "user" do |u|
2240
+ # u.add_string "name" do |col|
2241
+ # col.latest = true
2242
+ # col.latest # true
2243
+ # end
2244
+ # end
2245
+ # end
2246
+ #
2247
+ def latest= new_latest
2248
+ frozen_check!
2249
+ @gapi.only_read_latest = new_latest
2250
+ end
2251
+
2252
+ ##
2253
+ # The type to convert the value in cells of this column. The values
2254
+ # are expected to be encoded using HBase `Bytes.toBytes` function
2255
+ # when using the `BINARY` encoding value. The following BigQuery
2256
+ # types are allowed:
2257
+ #
2258
+ # * `BYTES`
2259
+ # * `STRING`
2260
+ # * `INTEGER`
2261
+ # * `FLOAT`
2262
+ # * `BOOLEAN`
2263
+ #
2264
+ # Default type is `BYTES`. Can also be set at the column family
2265
+ # level. However, this value takes precedence when set at both
2266
+ # levels.
2267
+ #
2268
+ # @return [String]
2269
+ #
2270
+ # @example
2271
+ # require "google/cloud/bigquery"
2272
+ #
2273
+ # bigquery = Google::Cloud::Bigquery.new
2274
+ #
2275
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2276
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2277
+ # bt.add_family "user" do |u|
2278
+ # u.add_string "name" do |col|
2279
+ # col.type # "STRING"
2280
+ # end
2281
+ # end
2282
+ # end
2283
+ #
2284
+ def type
2285
+ @gapi.type
2286
+ end
2287
+
2288
+ ##
2289
+ # Set the type to convert the value in cells of this column. The
2290
+ # values are expected to be encoded using HBase `Bytes.toBytes`
2291
+ # function when using the `BINARY` encoding value. The following
2292
+ # BigQuery types are allowed:
2293
+ #
2294
+ # * `BYTES`
2295
+ # * `STRING`
2296
+ # * `INTEGER`
2297
+ # * `FLOAT`
2298
+ # * `BOOLEAN`
2299
+ #
2300
+ # Default type is `BYTES`. Can also be set at the column family
2301
+ # level. However, this value takes precedence when set at both
2302
+ # levels.
2303
+ #
2304
+ # @param [String] new_type New type value
2305
+ #
2306
+ # @example
2307
+ # require "google/cloud/bigquery"
2308
+ #
2309
+ # bigquery = Google::Cloud::Bigquery.new
2310
+ #
2311
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
2312
+ # bigtable_table = bigquery.external bigtable_url do |bt|
2313
+ # bt.add_family "user" do |u|
2314
+ # u.add_string "name" do |col|
2315
+ # col.type # "STRING"
2316
+ # col.type = "BYTES"
2317
+ # col.type # "BYTES"
2318
+ # end
2319
+ # end
2320
+ # end
2321
+ #
2322
+ def type= new_type
2323
+ frozen_check!
2324
+ @gapi.type = new_type
2325
+ end
2326
+
2327
+ ##
2328
+ # @private Google API Client object.
2329
+ def to_gapi
2330
+ @gapi
2331
+ end
2332
+
2333
+ ##
2334
+ # @private Google API Client object.
2335
+ def self.from_gapi gapi
2336
+ new_col = new
2337
+ new_col.instance_variable_set :@gapi, gapi
2338
+ new_col
2339
+ end
2340
+
2341
+ protected
2342
+
2343
+ def frozen_check!
2344
+ return unless frozen?
2345
+ fail ArgumentError,
2346
+ "Cannot modify external data source when frozen"
2347
+ end
2348
+ end
2349
+ end
2350
+ end
2351
+ end
2352
+ end
2353
+ end