google-cloud-bigquery 1.31.0 → 1.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/lib/google/cloud/bigquery/external.rb +9 -2619
- data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
- data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
- data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
- data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
- data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
- data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
- data/lib/google/cloud/bigquery/load_job.rb +103 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +23 -9
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 36bd68946313d76f828e1fdbfb66606e7495ff876e25da7925772ad324ea8da3
|
|
4
|
+
data.tar.gz: b75c3a1b4d585c64c048bb073d0e3c0ab0231a666e8df6d350b98a0b282150d4
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 823a1a0dd34a02c1278df3620270225fd9d592e0577f6812ad9df48722a2132c05e4799e5acd7aafc1973874ab60cd6f6c1a90b48382e1a55a52e8a0a38a8cb4
|
|
7
|
+
data.tar.gz: 9f3ab6425237255aa466637fe62911ed6253a13033205929e3e43d5948a0ad30943dc5deffcae74329dc94c782358305eb545767a9a5352e135ea2b38d3705a5
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,22 @@
|
|
|
1
1
|
# Release History
|
|
2
2
|
|
|
3
|
+
### 1.32.0 / 2021-06-21
|
|
4
|
+
|
|
5
|
+
#### Features
|
|
6
|
+
|
|
7
|
+
* Add support for Parquet options
|
|
8
|
+
* feat(bigquery): Add Bigquery::External::ParquetSource
|
|
9
|
+
* Add Parquet options to LoadJob
|
|
10
|
+
* Add LoadJob#parquet_options?
|
|
11
|
+
* Add LoadJob#parquet_enable_list_inference?
|
|
12
|
+
* Add LoadJob#parquet_enum_as_string?
|
|
13
|
+
* Add LoadJob::Updater#parquet_enable_list_inference=
|
|
14
|
+
* Add LoadJob::Updater#parquet_enum_as_string=
|
|
15
|
+
|
|
16
|
+
#### Bug Fixes
|
|
17
|
+
|
|
18
|
+
* Expand googleauth dependency to support future 1.x versions
|
|
19
|
+
|
|
3
20
|
### 1.31.0 / 2021-04-28
|
|
4
21
|
|
|
5
22
|
#### Features
|
|
@@ -13,8 +13,12 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
require "google/
|
|
17
|
-
require "
|
|
16
|
+
require "google/cloud/bigquery/external/data_source"
|
|
17
|
+
require "google/cloud/bigquery/external/bigtable_source"
|
|
18
|
+
require "google/cloud/bigquery/external/csv_source"
|
|
19
|
+
require "google/cloud/bigquery/external/json_source"
|
|
20
|
+
require "google/cloud/bigquery/external/parquet_source"
|
|
21
|
+
require "google/cloud/bigquery/external/sheets_source"
|
|
18
22
|
|
|
19
23
|
module Google
|
|
20
24
|
module Cloud
|
|
@@ -114,6 +118,7 @@ module Google
|
|
|
114
118
|
Array(urls).each do |url|
|
|
115
119
|
return "CSV" if url.end_with? ".csv"
|
|
116
120
|
return "NEWLINE_DELIMITED_JSON" if url.end_with? ".json"
|
|
121
|
+
return "PARQUET" if url.end_with? ".parquet"
|
|
117
122
|
return "AVRO" if url.end_with? ".avro"
|
|
118
123
|
return "DATASTORE_BACKUP" if url.end_with? ".backup_info"
|
|
119
124
|
return "GOOGLE_SHEETS" if url.start_with? "https://docs.google.com/spreadsheets/"
|
|
@@ -128,2629 +133,14 @@ module Google
|
|
|
128
133
|
case format
|
|
129
134
|
when "CSV" then External::CsvSource
|
|
130
135
|
when "NEWLINE_DELIMITED_JSON" then External::JsonSource
|
|
136
|
+
when "PARQUET" then External::ParquetSource
|
|
131
137
|
when "GOOGLE_SHEETS" then External::SheetsSource
|
|
132
138
|
when "BIGTABLE" then External::BigtableSource
|
|
133
139
|
else
|
|
134
|
-
# AVRO, DATASTORE_BACKUP
|
|
140
|
+
# AVRO, DATASTORE_BACKUP
|
|
135
141
|
External::DataSource
|
|
136
142
|
end
|
|
137
143
|
end
|
|
138
|
-
|
|
139
|
-
##
|
|
140
|
-
# # DataSource
|
|
141
|
-
#
|
|
142
|
-
# External::DataSource and its subclasses represents an external data
|
|
143
|
-
# source that can be queried from directly, even though the data is not
|
|
144
|
-
# stored in BigQuery. Instead of loading or streaming the data, this
|
|
145
|
-
# object references the external data source.
|
|
146
|
-
#
|
|
147
|
-
# The AVRO and Datastore Backup formats use {External::DataSource}. See
|
|
148
|
-
# {External::CsvSource}, {External::JsonSource},
|
|
149
|
-
# {External::SheetsSource}, {External::BigtableSource} for the other
|
|
150
|
-
# formats.
|
|
151
|
-
#
|
|
152
|
-
# @example
|
|
153
|
-
# require "google/cloud/bigquery"
|
|
154
|
-
#
|
|
155
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
156
|
-
#
|
|
157
|
-
# avro_url = "gs://bucket/path/to/data.avro"
|
|
158
|
-
# avro_table = bigquery.external avro_url do |avro|
|
|
159
|
-
# avro.autodetect = true
|
|
160
|
-
# end
|
|
161
|
-
#
|
|
162
|
-
# data = bigquery.query "SELECT * FROM my_ext_table",
|
|
163
|
-
# external: { my_ext_table: avro_table }
|
|
164
|
-
#
|
|
165
|
-
# # Iterate over the first page of results
|
|
166
|
-
# data.each do |row|
|
|
167
|
-
# puts row[:name]
|
|
168
|
-
# end
|
|
169
|
-
# # Retrieve the next page of results
|
|
170
|
-
# data = data.next if data.next?
|
|
171
|
-
#
|
|
172
|
-
# @example Hive partitioning options:
|
|
173
|
-
# require "google/cloud/bigquery"
|
|
174
|
-
#
|
|
175
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
176
|
-
#
|
|
177
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
178
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
179
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
180
|
-
# ext.hive_partitioning_mode = :auto
|
|
181
|
-
# ext.hive_partitioning_require_partition_filter = true
|
|
182
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
183
|
-
# end
|
|
184
|
-
#
|
|
185
|
-
# external_data.hive_partitioning? #=> true
|
|
186
|
-
# external_data.hive_partitioning_mode #=> "AUTO"
|
|
187
|
-
# external_data.hive_partitioning_require_partition_filter? #=> true
|
|
188
|
-
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
|
189
|
-
#
|
|
190
|
-
class DataSource
|
|
191
|
-
##
|
|
192
|
-
# @private The Google API Client object.
|
|
193
|
-
attr_accessor :gapi
|
|
194
|
-
|
|
195
|
-
##
|
|
196
|
-
# @private Create an empty Table object.
|
|
197
|
-
def initialize
|
|
198
|
-
@gapi = Google::Apis::BigqueryV2::ExternalDataConfiguration.new
|
|
199
|
-
end
|
|
200
|
-
|
|
201
|
-
##
|
|
202
|
-
# The data format. For CSV files, specify "CSV". For Google sheets,
|
|
203
|
-
# specify "GOOGLE_SHEETS". For newline-delimited JSON, specify
|
|
204
|
-
# "NEWLINE_DELIMITED_JSON". For Avro files, specify "AVRO". For Google
|
|
205
|
-
# Cloud Datastore backups, specify "DATASTORE_BACKUP". [Beta] For
|
|
206
|
-
# Google Cloud Bigtable, specify "BIGTABLE".
|
|
207
|
-
#
|
|
208
|
-
# @return [String]
|
|
209
|
-
#
|
|
210
|
-
# @example
|
|
211
|
-
# require "google/cloud/bigquery"
|
|
212
|
-
#
|
|
213
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
214
|
-
#
|
|
215
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
216
|
-
# csv_table = bigquery.external csv_url
|
|
217
|
-
#
|
|
218
|
-
# csv_table.format #=> "CSV"
|
|
219
|
-
#
|
|
220
|
-
def format
|
|
221
|
-
@gapi.source_format
|
|
222
|
-
end
|
|
223
|
-
|
|
224
|
-
##
|
|
225
|
-
# Whether the data format is "CSV".
|
|
226
|
-
#
|
|
227
|
-
# @return [Boolean]
|
|
228
|
-
#
|
|
229
|
-
# @example
|
|
230
|
-
# require "google/cloud/bigquery"
|
|
231
|
-
#
|
|
232
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
233
|
-
#
|
|
234
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
235
|
-
# csv_table = bigquery.external csv_url
|
|
236
|
-
#
|
|
237
|
-
# csv_table.format #=> "CSV"
|
|
238
|
-
# csv_table.csv? #=> true
|
|
239
|
-
#
|
|
240
|
-
def csv?
|
|
241
|
-
@gapi.source_format == "CSV"
|
|
242
|
-
end
|
|
243
|
-
|
|
244
|
-
##
|
|
245
|
-
# Whether the data format is "NEWLINE_DELIMITED_JSON".
|
|
246
|
-
#
|
|
247
|
-
# @return [Boolean]
|
|
248
|
-
#
|
|
249
|
-
# @example
|
|
250
|
-
# require "google/cloud/bigquery"
|
|
251
|
-
#
|
|
252
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
253
|
-
#
|
|
254
|
-
# json_url = "gs://bucket/path/to/data.json"
|
|
255
|
-
# json_table = bigquery.external json_url
|
|
256
|
-
#
|
|
257
|
-
# json_table.format #=> "NEWLINE_DELIMITED_JSON"
|
|
258
|
-
# json_table.json? #=> true
|
|
259
|
-
#
|
|
260
|
-
def json?
|
|
261
|
-
@gapi.source_format == "NEWLINE_DELIMITED_JSON"
|
|
262
|
-
end
|
|
263
|
-
|
|
264
|
-
##
|
|
265
|
-
# Whether the data format is "GOOGLE_SHEETS".
|
|
266
|
-
#
|
|
267
|
-
# @return [Boolean]
|
|
268
|
-
#
|
|
269
|
-
# @example
|
|
270
|
-
# require "google/cloud/bigquery"
|
|
271
|
-
#
|
|
272
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
273
|
-
#
|
|
274
|
-
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
|
275
|
-
# sheets_table = bigquery.external sheets_url
|
|
276
|
-
#
|
|
277
|
-
# sheets_table.format #=> "GOOGLE_SHEETS"
|
|
278
|
-
# sheets_table.sheets? #=> true
|
|
279
|
-
#
|
|
280
|
-
def sheets?
|
|
281
|
-
@gapi.source_format == "GOOGLE_SHEETS"
|
|
282
|
-
end
|
|
283
|
-
|
|
284
|
-
##
|
|
285
|
-
# Whether the data format is "AVRO".
|
|
286
|
-
#
|
|
287
|
-
# @return [Boolean]
|
|
288
|
-
#
|
|
289
|
-
# @example
|
|
290
|
-
# require "google/cloud/bigquery"
|
|
291
|
-
#
|
|
292
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
293
|
-
#
|
|
294
|
-
# avro_url = "gs://bucket/path/to/data.avro"
|
|
295
|
-
# avro_table = bigquery.external avro_url
|
|
296
|
-
#
|
|
297
|
-
# avro_table.format #=> "AVRO"
|
|
298
|
-
# avro_table.avro? #=> true
|
|
299
|
-
#
|
|
300
|
-
def avro?
|
|
301
|
-
@gapi.source_format == "AVRO"
|
|
302
|
-
end
|
|
303
|
-
|
|
304
|
-
##
|
|
305
|
-
# Whether the data format is "DATASTORE_BACKUP".
|
|
306
|
-
#
|
|
307
|
-
# @return [Boolean]
|
|
308
|
-
#
|
|
309
|
-
# @example
|
|
310
|
-
# require "google/cloud/bigquery"
|
|
311
|
-
#
|
|
312
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
313
|
-
#
|
|
314
|
-
# backup_url = "gs://bucket/path/to/data.backup_info"
|
|
315
|
-
# backup_table = bigquery.external backup_url
|
|
316
|
-
#
|
|
317
|
-
# backup_table.format #=> "DATASTORE_BACKUP"
|
|
318
|
-
# backup_table.backup? #=> true
|
|
319
|
-
#
|
|
320
|
-
def backup?
|
|
321
|
-
@gapi.source_format == "DATASTORE_BACKUP"
|
|
322
|
-
end
|
|
323
|
-
|
|
324
|
-
##
|
|
325
|
-
# Whether the data format is "BIGTABLE".
|
|
326
|
-
#
|
|
327
|
-
# @return [Boolean]
|
|
328
|
-
#
|
|
329
|
-
# @example
|
|
330
|
-
# require "google/cloud/bigquery"
|
|
331
|
-
#
|
|
332
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
333
|
-
#
|
|
334
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
335
|
-
# bigtable_table = bigquery.external bigtable_url
|
|
336
|
-
#
|
|
337
|
-
# bigtable_table.format #=> "BIGTABLE"
|
|
338
|
-
# bigtable_table.bigtable? #=> true
|
|
339
|
-
#
|
|
340
|
-
def bigtable?
|
|
341
|
-
@gapi.source_format == "BIGTABLE"
|
|
342
|
-
end
|
|
343
|
-
|
|
344
|
-
##
|
|
345
|
-
# Whether the data format is "ORC".
|
|
346
|
-
#
|
|
347
|
-
# @return [Boolean]
|
|
348
|
-
#
|
|
349
|
-
# @example
|
|
350
|
-
# require "google/cloud/bigquery"
|
|
351
|
-
#
|
|
352
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
353
|
-
#
|
|
354
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
355
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
356
|
-
# external_data = bigquery.external gcs_uri, format: :orc do |ext|
|
|
357
|
-
# ext.hive_partitioning_mode = :auto
|
|
358
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
359
|
-
# end
|
|
360
|
-
# external_data.format #=> "ORC"
|
|
361
|
-
# external_data.orc? #=> true
|
|
362
|
-
#
|
|
363
|
-
def orc?
|
|
364
|
-
@gapi.source_format == "ORC"
|
|
365
|
-
end
|
|
366
|
-
|
|
367
|
-
##
|
|
368
|
-
# Whether the data format is "PARQUET".
|
|
369
|
-
#
|
|
370
|
-
# @return [Boolean]
|
|
371
|
-
#
|
|
372
|
-
# @example
|
|
373
|
-
# require "google/cloud/bigquery"
|
|
374
|
-
#
|
|
375
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
376
|
-
#
|
|
377
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
378
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
379
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
380
|
-
# ext.hive_partitioning_mode = :auto
|
|
381
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
382
|
-
# end
|
|
383
|
-
# external_data.format #=> "PARQUET"
|
|
384
|
-
# external_data.parquet? #=> true
|
|
385
|
-
#
|
|
386
|
-
def parquet?
|
|
387
|
-
@gapi.source_format == "PARQUET"
|
|
388
|
-
end
|
|
389
|
-
|
|
390
|
-
##
|
|
391
|
-
# The fully-qualified URIs that point to your data in Google Cloud.
|
|
392
|
-
# For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
|
|
393
|
-
# character and it must come after the 'bucket' name. Size limits
|
|
394
|
-
# related to load jobs apply to external data sources. For Google
|
|
395
|
-
# Cloud Bigtable URIs: Exactly one URI can be specified and it has be
|
|
396
|
-
# a fully specified and valid HTTPS URL for a Google Cloud Bigtable
|
|
397
|
-
# table. For Google Cloud Datastore backups, exactly one URI can be
|
|
398
|
-
# specified, and it must end with '.backup_info'. Also, the '*'
|
|
399
|
-
# wildcard character is not allowed.
|
|
400
|
-
#
|
|
401
|
-
# @return [Array<String>]
|
|
402
|
-
#
|
|
403
|
-
# @example
|
|
404
|
-
# require "google/cloud/bigquery"
|
|
405
|
-
#
|
|
406
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
407
|
-
#
|
|
408
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
409
|
-
# csv_table = bigquery.external csv_url
|
|
410
|
-
#
|
|
411
|
-
# csv_table.urls #=> ["gs://bucket/path/to/data.csv"]
|
|
412
|
-
#
|
|
413
|
-
def urls
|
|
414
|
-
@gapi.source_uris
|
|
415
|
-
end
|
|
416
|
-
|
|
417
|
-
##
|
|
418
|
-
# Indicates if the schema and format options are detected
|
|
419
|
-
# automatically.
|
|
420
|
-
#
|
|
421
|
-
# @return [Boolean]
|
|
422
|
-
#
|
|
423
|
-
# @example
|
|
424
|
-
# require "google/cloud/bigquery"
|
|
425
|
-
#
|
|
426
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
427
|
-
#
|
|
428
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
429
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
430
|
-
# csv.autodetect = true
|
|
431
|
-
# end
|
|
432
|
-
#
|
|
433
|
-
# csv_table.autodetect #=> true
|
|
434
|
-
#
|
|
435
|
-
def autodetect
|
|
436
|
-
@gapi.autodetect
|
|
437
|
-
end
|
|
438
|
-
|
|
439
|
-
##
|
|
440
|
-
# Set whether to detect schema and format options automatically. Any
|
|
441
|
-
# option specified explicitly will be honored.
|
|
442
|
-
#
|
|
443
|
-
# @param [Boolean] new_autodetect New autodetect value
|
|
444
|
-
#
|
|
445
|
-
# @example
|
|
446
|
-
# require "google/cloud/bigquery"
|
|
447
|
-
#
|
|
448
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
449
|
-
#
|
|
450
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
451
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
452
|
-
# csv.autodetect = true
|
|
453
|
-
# end
|
|
454
|
-
#
|
|
455
|
-
# csv_table.autodetect #=> true
|
|
456
|
-
#
|
|
457
|
-
def autodetect= new_autodetect
|
|
458
|
-
frozen_check!
|
|
459
|
-
@gapi.autodetect = new_autodetect
|
|
460
|
-
end
|
|
461
|
-
|
|
462
|
-
##
|
|
463
|
-
# The compression type of the data source. Possible values include
|
|
464
|
-
# `"GZIP"` and `nil`. The default value is `nil`. This setting is
|
|
465
|
-
# ignored for Google Cloud Bigtable, Google Cloud Datastore backups
|
|
466
|
-
# and Avro formats. Optional.
|
|
467
|
-
#
|
|
468
|
-
# @return [String]
|
|
469
|
-
#
|
|
470
|
-
# @example
|
|
471
|
-
# require "google/cloud/bigquery"
|
|
472
|
-
#
|
|
473
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
474
|
-
#
|
|
475
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
476
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
477
|
-
# csv.compression = "GZIP"
|
|
478
|
-
# end
|
|
479
|
-
#
|
|
480
|
-
# csv_table.compression #=> "GZIP"
|
|
481
|
-
def compression
|
|
482
|
-
@gapi.compression
|
|
483
|
-
end
|
|
484
|
-
|
|
485
|
-
##
|
|
486
|
-
# Set the compression type of the data source. Possible values include
|
|
487
|
-
# `"GZIP"` and `nil`. The default value is `nil`. This setting is
|
|
488
|
-
# ignored for Google Cloud Bigtable, Google Cloud Datastore backups
|
|
489
|
-
# and Avro formats. Optional.
|
|
490
|
-
#
|
|
491
|
-
# @param [String] new_compression New compression value
|
|
492
|
-
#
|
|
493
|
-
# @example
|
|
494
|
-
# require "google/cloud/bigquery"
|
|
495
|
-
#
|
|
496
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
497
|
-
#
|
|
498
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
499
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
500
|
-
# csv.compression = "GZIP"
|
|
501
|
-
# end
|
|
502
|
-
#
|
|
503
|
-
# csv_table.compression #=> "GZIP"
|
|
504
|
-
#
|
|
505
|
-
def compression= new_compression
|
|
506
|
-
frozen_check!
|
|
507
|
-
@gapi.compression = new_compression
|
|
508
|
-
end
|
|
509
|
-
|
|
510
|
-
##
|
|
511
|
-
# Indicates if BigQuery should allow extra values that are not
|
|
512
|
-
# represented in the table schema. If `true`, the extra values are
|
|
513
|
-
# ignored. If `false`, records with extra columns are treated as bad
|
|
514
|
-
# records, and if there are too many bad records, an invalid error is
|
|
515
|
-
# returned in the job result. The default value is `false`.
|
|
516
|
-
#
|
|
517
|
-
# BigQuery treats trailing columns as an extra in `CSV`, named values
|
|
518
|
-
# that don't match any column names in `JSON`. This setting is ignored
|
|
519
|
-
# for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
|
|
520
|
-
# formats. Optional.
|
|
521
|
-
#
|
|
522
|
-
# @return [Boolean]
|
|
523
|
-
#
|
|
524
|
-
# @example
|
|
525
|
-
# require "google/cloud/bigquery"
|
|
526
|
-
#
|
|
527
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
528
|
-
#
|
|
529
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
530
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
531
|
-
# csv.ignore_unknown = true
|
|
532
|
-
# end
|
|
533
|
-
#
|
|
534
|
-
# csv_table.ignore_unknown #=> true
|
|
535
|
-
#
|
|
536
|
-
def ignore_unknown
|
|
537
|
-
@gapi.ignore_unknown_values
|
|
538
|
-
end
|
|
539
|
-
|
|
540
|
-
##
|
|
541
|
-
# Set whether BigQuery should allow extra values that are not
|
|
542
|
-
# represented in the table schema. If `true`, the extra values are
|
|
543
|
-
# ignored. If `false`, records with extra columns are treated as bad
|
|
544
|
-
# records, and if there are too many bad records, an invalid error is
|
|
545
|
-
# returned in the job result. The default value is `false`.
|
|
546
|
-
#
|
|
547
|
-
# BigQuery treats trailing columns as an extra in `CSV`, named values
|
|
548
|
-
# that don't match any column names in `JSON`. This setting is ignored
|
|
549
|
-
# for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
|
|
550
|
-
# formats. Optional.
|
|
551
|
-
#
|
|
552
|
-
# @param [Boolean] new_ignore_unknown New ignore_unknown value
|
|
553
|
-
#
|
|
554
|
-
# @example
|
|
555
|
-
# require "google/cloud/bigquery"
|
|
556
|
-
#
|
|
557
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
558
|
-
#
|
|
559
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
560
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
561
|
-
# csv.ignore_unknown = true
|
|
562
|
-
# end
|
|
563
|
-
#
|
|
564
|
-
# csv_table.ignore_unknown #=> true
|
|
565
|
-
#
|
|
566
|
-
def ignore_unknown= new_ignore_unknown
|
|
567
|
-
frozen_check!
|
|
568
|
-
@gapi.ignore_unknown_values = new_ignore_unknown
|
|
569
|
-
end
|
|
570
|
-
|
|
571
|
-
##
|
|
572
|
-
# The maximum number of bad records that BigQuery can ignore when
|
|
573
|
-
# reading data. If the number of bad records exceeds this value, an
|
|
574
|
-
# invalid error is returned in the job result. The default value is 0,
|
|
575
|
-
# which requires that all records are valid. This setting is ignored
|
|
576
|
-
# for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
|
|
577
|
-
# formats.
|
|
578
|
-
#
|
|
579
|
-
# @return [Integer]
|
|
580
|
-
#
|
|
581
|
-
# @example
|
|
582
|
-
# require "google/cloud/bigquery"
|
|
583
|
-
#
|
|
584
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
585
|
-
#
|
|
586
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
587
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
588
|
-
# csv.max_bad_records = 10
|
|
589
|
-
# end
|
|
590
|
-
#
|
|
591
|
-
# csv_table.max_bad_records #=> 10
|
|
592
|
-
#
|
|
593
|
-
def max_bad_records
|
|
594
|
-
@gapi.max_bad_records
|
|
595
|
-
end
|
|
596
|
-
|
|
597
|
-
##
|
|
598
|
-
# Set the maximum number of bad records that BigQuery can ignore when
|
|
599
|
-
# reading data. If the number of bad records exceeds this value, an
|
|
600
|
-
# invalid error is returned in the job result. The default value is 0,
|
|
601
|
-
# which requires that all records are valid. This setting is ignored
|
|
602
|
-
# for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
|
|
603
|
-
# formats.
|
|
604
|
-
#
|
|
605
|
-
# @param [Integer] new_max_bad_records New max_bad_records value
|
|
606
|
-
#
|
|
607
|
-
# @example
|
|
608
|
-
# require "google/cloud/bigquery"
|
|
609
|
-
#
|
|
610
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
611
|
-
#
|
|
612
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
613
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
614
|
-
# csv.max_bad_records = 10
|
|
615
|
-
# end
|
|
616
|
-
#
|
|
617
|
-
# csv_table.max_bad_records #=> 10
|
|
618
|
-
#
|
|
619
|
-
def max_bad_records= new_max_bad_records
|
|
620
|
-
frozen_check!
|
|
621
|
-
@gapi.max_bad_records = new_max_bad_records
|
|
622
|
-
end
|
|
623
|
-
|
|
624
|
-
###
|
|
625
|
-
# Checks if hive partitioning options are set.
|
|
626
|
-
#
|
|
627
|
-
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
|
628
|
-
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
|
629
|
-
# If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
|
|
630
|
-
# Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
|
|
631
|
-
#
|
|
632
|
-
# @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
|
|
633
|
-
#
|
|
634
|
-
# @example
|
|
635
|
-
# require "google/cloud/bigquery"
|
|
636
|
-
#
|
|
637
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
638
|
-
#
|
|
639
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
640
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
641
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
642
|
-
# ext.hive_partitioning_mode = :auto
|
|
643
|
-
# ext.hive_partitioning_require_partition_filter = true
|
|
644
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
645
|
-
# end
|
|
646
|
-
#
|
|
647
|
-
# external_data.hive_partitioning? #=> true
|
|
648
|
-
# external_data.hive_partitioning_mode #=> "AUTO"
|
|
649
|
-
# external_data.hive_partitioning_require_partition_filter? #=> true
|
|
650
|
-
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
|
651
|
-
#
|
|
652
|
-
def hive_partitioning?
|
|
653
|
-
!@gapi.hive_partitioning_options.nil?
|
|
654
|
-
end
|
|
655
|
-
|
|
656
|
-
###
|
|
657
|
-
# The mode of hive partitioning to use when reading data. The following modes are supported:
|
|
658
|
-
#
|
|
659
|
-
# 1. `AUTO`: automatically infer partition key name(s) and type(s).
|
|
660
|
-
# 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
|
|
661
|
-
# 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
|
|
662
|
-
#
|
|
663
|
-
# @return [String, nil] The mode of hive partitioning, or `nil` if not set.
|
|
664
|
-
#
|
|
665
|
-
# @example
|
|
666
|
-
# require "google/cloud/bigquery"
|
|
667
|
-
#
|
|
668
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
669
|
-
#
|
|
670
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
671
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
672
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
673
|
-
# ext.hive_partitioning_mode = :auto
|
|
674
|
-
# ext.hive_partitioning_require_partition_filter = true
|
|
675
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
676
|
-
# end
|
|
677
|
-
#
|
|
678
|
-
# external_data.hive_partitioning? #=> true
|
|
679
|
-
# external_data.hive_partitioning_mode #=> "AUTO"
|
|
680
|
-
# external_data.hive_partitioning_require_partition_filter? #=> true
|
|
681
|
-
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
|
682
|
-
#
|
|
683
|
-
def hive_partitioning_mode
|
|
684
|
-
@gapi.hive_partitioning_options.mode if hive_partitioning?
|
|
685
|
-
end
|
|
686
|
-
|
|
687
|
-
##
|
|
688
|
-
# Sets the mode of hive partitioning to use when reading data. The following modes are supported:
|
|
689
|
-
#
|
|
690
|
-
# 1. `auto`: automatically infer partition key name(s) and type(s).
|
|
691
|
-
# 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
|
|
692
|
-
# 3. `custom`: partition key schema is encoded in the source URI prefix.
|
|
693
|
-
#
|
|
694
|
-
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
|
695
|
-
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
|
696
|
-
# If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
|
|
697
|
-
# Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
|
|
698
|
-
#
|
|
699
|
-
# See {#format}, {#hive_partitioning_require_partition_filter=} and {#hive_partitioning_source_uri_prefix=}.
|
|
700
|
-
#
|
|
701
|
-
# @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
|
|
702
|
-
#
|
|
703
|
-
# @example
|
|
704
|
-
# require "google/cloud/bigquery"
|
|
705
|
-
#
|
|
706
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
707
|
-
#
|
|
708
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
709
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
710
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
711
|
-
# ext.hive_partitioning_mode = :auto
|
|
712
|
-
# ext.hive_partitioning_require_partition_filter = true
|
|
713
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
714
|
-
# end
|
|
715
|
-
#
|
|
716
|
-
# external_data.hive_partitioning? #=> true
|
|
717
|
-
# external_data.hive_partitioning_mode #=> "AUTO"
|
|
718
|
-
# external_data.hive_partitioning_require_partition_filter? #=> true
|
|
719
|
-
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
|
720
|
-
#
|
|
721
|
-
def hive_partitioning_mode= mode
|
|
722
|
-
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
|
723
|
-
@gapi.hive_partitioning_options.mode = mode.to_s.upcase
|
|
724
|
-
end
|
|
725
|
-
|
|
726
|
-
###
|
|
727
|
-
# Whether queries over the table using this external data source require a partition filter that can be used
|
|
728
|
-
# for partition elimination to be specified. Note that this field should only be true when creating a
|
|
729
|
-
# permanent external table or querying a temporary external table.
|
|
730
|
-
#
|
|
731
|
-
# @return [Boolean] `true` when queries over this table require a partition filter, or `false` otherwise.
|
|
732
|
-
#
|
|
733
|
-
# @example
|
|
734
|
-
# require "google/cloud/bigquery"
|
|
735
|
-
#
|
|
736
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
737
|
-
#
|
|
738
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
739
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
740
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
741
|
-
# ext.hive_partitioning_mode = :auto
|
|
742
|
-
# ext.hive_partitioning_require_partition_filter = true
|
|
743
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
744
|
-
# end
|
|
745
|
-
#
|
|
746
|
-
# external_data.hive_partitioning? #=> true
|
|
747
|
-
# external_data.hive_partitioning_mode #=> "AUTO"
|
|
748
|
-
# external_data.hive_partitioning_require_partition_filter? #=> true
|
|
749
|
-
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
|
750
|
-
#
|
|
751
|
-
def hive_partitioning_require_partition_filter?
|
|
752
|
-
return false unless hive_partitioning?
|
|
753
|
-
!@gapi.hive_partitioning_options.require_partition_filter.nil?
|
|
754
|
-
end
|
|
755
|
-
|
|
756
|
-
##
|
|
757
|
-
# Sets whether queries over the table using this external data source require a partition filter
|
|
758
|
-
# that can be used for partition elimination to be specified.
|
|
759
|
-
#
|
|
760
|
-
# See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_source_uri_prefix=}.
|
|
761
|
-
#
|
|
762
|
-
# @param [Boolean] require_partition_filter `true` if a partition filter must be specified, `false` otherwise.
|
|
763
|
-
#
|
|
764
|
-
# @example
|
|
765
|
-
# require "google/cloud/bigquery"
|
|
766
|
-
#
|
|
767
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
768
|
-
#
|
|
769
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
770
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
771
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
772
|
-
# ext.hive_partitioning_mode = :auto
|
|
773
|
-
# ext.hive_partitioning_require_partition_filter = true
|
|
774
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
775
|
-
# end
|
|
776
|
-
#
|
|
777
|
-
# external_data.hive_partitioning? #=> true
|
|
778
|
-
# external_data.hive_partitioning_mode #=> "AUTO"
|
|
779
|
-
# external_data.hive_partitioning_require_partition_filter? #=> true
|
|
780
|
-
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
|
781
|
-
#
|
|
782
|
-
def hive_partitioning_require_partition_filter= require_partition_filter
|
|
783
|
-
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
|
784
|
-
@gapi.hive_partitioning_options.require_partition_filter = require_partition_filter
|
|
785
|
-
end
|
|
786
|
-
|
|
787
|
-
###
|
|
788
|
-
# The common prefix for all source uris when hive partition detection is requested. The prefix must end
|
|
789
|
-
# immediately before the partition key encoding begins. For example, consider files following this data
|
|
790
|
-
# layout:
|
|
791
|
-
#
|
|
792
|
-
# ```
|
|
793
|
-
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
|
794
|
-
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
|
795
|
-
# ```
|
|
796
|
-
#
|
|
797
|
-
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
|
798
|
-
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
|
799
|
-
#
|
|
800
|
-
# @return [String, nil] The common prefix for all source uris, or `nil` if not set.
|
|
801
|
-
#
|
|
802
|
-
# @example
|
|
803
|
-
# require "google/cloud/bigquery"
|
|
804
|
-
#
|
|
805
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
806
|
-
#
|
|
807
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
808
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
809
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
810
|
-
# ext.hive_partitioning_mode = :auto
|
|
811
|
-
# ext.hive_partitioning_require_partition_filter = true
|
|
812
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
813
|
-
# end
|
|
814
|
-
#
|
|
815
|
-
# external_data.hive_partitioning? #=> true
|
|
816
|
-
# external_data.hive_partitioning_mode #=> "AUTO"
|
|
817
|
-
# external_data.hive_partitioning_require_partition_filter? #=> true
|
|
818
|
-
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
|
819
|
-
#
|
|
820
|
-
def hive_partitioning_source_uri_prefix
|
|
821
|
-
@gapi.hive_partitioning_options.source_uri_prefix if hive_partitioning?
|
|
822
|
-
end
|
|
823
|
-
|
|
824
|
-
##
|
|
825
|
-
# Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
|
|
826
|
-
# immediately before the partition key encoding begins. For example, consider files following this data
|
|
827
|
-
# layout:
|
|
828
|
-
#
|
|
829
|
-
# ```
|
|
830
|
-
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
|
831
|
-
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
|
832
|
-
# ```
|
|
833
|
-
#
|
|
834
|
-
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
|
835
|
-
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
|
836
|
-
#
|
|
837
|
-
# See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_require_partition_filter=}.
|
|
838
|
-
#
|
|
839
|
-
# @param [String] source_uri_prefix The common prefix for all source uris.
|
|
840
|
-
#
|
|
841
|
-
# @example
|
|
842
|
-
# require "google/cloud/bigquery"
|
|
843
|
-
#
|
|
844
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
845
|
-
#
|
|
846
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
847
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
848
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
849
|
-
# ext.hive_partitioning_mode = :auto
|
|
850
|
-
# ext.hive_partitioning_require_partition_filter = true
|
|
851
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
852
|
-
# end
|
|
853
|
-
#
|
|
854
|
-
# external_data.hive_partitioning? #=> true
|
|
855
|
-
# external_data.hive_partitioning_mode #=> "AUTO"
|
|
856
|
-
# external_data.hive_partitioning_require_partition_filter? #=> true
|
|
857
|
-
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
|
858
|
-
#
|
|
859
|
-
def hive_partitioning_source_uri_prefix= source_uri_prefix
|
|
860
|
-
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
|
861
|
-
@gapi.hive_partitioning_options.source_uri_prefix = source_uri_prefix
|
|
862
|
-
end
|
|
863
|
-
|
|
864
|
-
##
|
|
865
|
-
# @private Google API Client object.
|
|
866
|
-
def to_gapi
|
|
867
|
-
@gapi
|
|
868
|
-
end
|
|
869
|
-
|
|
870
|
-
##
|
|
871
|
-
# @private Google API Client object.
|
|
872
|
-
def self.from_gapi gapi
|
|
873
|
-
new_table = new
|
|
874
|
-
new_table.instance_variable_set :@gapi, gapi
|
|
875
|
-
new_table
|
|
876
|
-
end
|
|
877
|
-
|
|
878
|
-
protected
|
|
879
|
-
|
|
880
|
-
def frozen_check!
|
|
881
|
-
return unless frozen?
|
|
882
|
-
raise ArgumentError, "Cannot modify external data source when frozen"
|
|
883
|
-
end
|
|
884
|
-
end
|
|
885
|
-
|
|
886
|
-
##
|
|
887
|
-
# # CsvSource
|
|
888
|
-
#
|
|
889
|
-
# {External::CsvSource} is a subclass of {External::DataSource} and
|
|
890
|
-
# represents a CSV external data source that can be queried from
|
|
891
|
-
# directly, such as Google Cloud Storage or Google Drive, even though
|
|
892
|
-
# the data is not stored in BigQuery. Instead of loading or streaming
|
|
893
|
-
# the data, this object references the external data source.
|
|
894
|
-
#
|
|
895
|
-
# @example
|
|
896
|
-
# require "google/cloud/bigquery"
|
|
897
|
-
#
|
|
898
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
899
|
-
#
|
|
900
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
901
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
902
|
-
# csv.autodetect = true
|
|
903
|
-
# csv.skip_leading_rows = 1
|
|
904
|
-
# end
|
|
905
|
-
#
|
|
906
|
-
# data = bigquery.query "SELECT * FROM my_ext_table",
|
|
907
|
-
# external: { my_ext_table: csv_table }
|
|
908
|
-
#
|
|
909
|
-
# # Iterate over the first page of results
|
|
910
|
-
# data.each do |row|
|
|
911
|
-
# puts row[:name]
|
|
912
|
-
# end
|
|
913
|
-
# # Retrieve the next page of results
|
|
914
|
-
# data = data.next if data.next?
|
|
915
|
-
#
|
|
916
|
-
class CsvSource < External::DataSource
|
|
917
|
-
##
|
|
918
|
-
# @private Create an empty CsvSource object.
|
|
919
|
-
def initialize
|
|
920
|
-
super
|
|
921
|
-
@gapi.csv_options = Google::Apis::BigqueryV2::CsvOptions.new
|
|
922
|
-
end
|
|
923
|
-
|
|
924
|
-
##
|
|
925
|
-
# Indicates if BigQuery should accept rows that are missing trailing
|
|
926
|
-
# optional columns.
|
|
927
|
-
#
|
|
928
|
-
# @return [Boolean]
|
|
929
|
-
#
|
|
930
|
-
# @example
|
|
931
|
-
# require "google/cloud/bigquery"
|
|
932
|
-
#
|
|
933
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
934
|
-
#
|
|
935
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
936
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
937
|
-
# csv.jagged_rows = true
|
|
938
|
-
# end
|
|
939
|
-
#
|
|
940
|
-
# csv_table.jagged_rows #=> true
|
|
941
|
-
#
|
|
942
|
-
def jagged_rows
|
|
943
|
-
@gapi.csv_options.allow_jagged_rows
|
|
944
|
-
end
|
|
945
|
-
|
|
946
|
-
##
|
|
947
|
-
# Set whether BigQuery should accept rows that are missing trailing
|
|
948
|
-
# optional columns.
|
|
949
|
-
#
|
|
950
|
-
# @param [Boolean] new_jagged_rows New jagged_rows value
|
|
951
|
-
#
|
|
952
|
-
# @example
|
|
953
|
-
# require "google/cloud/bigquery"
|
|
954
|
-
#
|
|
955
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
956
|
-
#
|
|
957
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
958
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
959
|
-
# csv.jagged_rows = true
|
|
960
|
-
# end
|
|
961
|
-
#
|
|
962
|
-
# csv_table.jagged_rows #=> true
|
|
963
|
-
#
|
|
964
|
-
def jagged_rows= new_jagged_rows
|
|
965
|
-
frozen_check!
|
|
966
|
-
@gapi.csv_options.allow_jagged_rows = new_jagged_rows
|
|
967
|
-
end
|
|
968
|
-
|
|
969
|
-
##
|
|
970
|
-
# Indicates if BigQuery should allow quoted data sections that contain
|
|
971
|
-
# newline characters in a CSV file.
|
|
972
|
-
#
|
|
973
|
-
# @return [Boolean]
|
|
974
|
-
#
|
|
975
|
-
# @example
|
|
976
|
-
# require "google/cloud/bigquery"
|
|
977
|
-
#
|
|
978
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
979
|
-
#
|
|
980
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
981
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
982
|
-
# csv.quoted_newlines = true
|
|
983
|
-
# end
|
|
984
|
-
#
|
|
985
|
-
# csv_table.quoted_newlines #=> true
|
|
986
|
-
#
|
|
987
|
-
def quoted_newlines
|
|
988
|
-
@gapi.csv_options.allow_quoted_newlines
|
|
989
|
-
end
|
|
990
|
-
|
|
991
|
-
##
|
|
992
|
-
# Set whether BigQuery should allow quoted data sections that contain
|
|
993
|
-
# newline characters in a CSV file.
|
|
994
|
-
#
|
|
995
|
-
# @param [Boolean] new_quoted_newlines New quoted_newlines value
|
|
996
|
-
#
|
|
997
|
-
# @example
|
|
998
|
-
# require "google/cloud/bigquery"
|
|
999
|
-
#
|
|
1000
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1001
|
-
#
|
|
1002
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1003
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1004
|
-
# csv.quoted_newlines = true
|
|
1005
|
-
# end
|
|
1006
|
-
#
|
|
1007
|
-
# csv_table.quoted_newlines #=> true
|
|
1008
|
-
#
|
|
1009
|
-
def quoted_newlines= new_quoted_newlines
|
|
1010
|
-
frozen_check!
|
|
1011
|
-
@gapi.csv_options.allow_quoted_newlines = new_quoted_newlines
|
|
1012
|
-
end
|
|
1013
|
-
|
|
1014
|
-
##
|
|
1015
|
-
# The character encoding of the data.
|
|
1016
|
-
#
|
|
1017
|
-
# @return [String]
|
|
1018
|
-
#
|
|
1019
|
-
# @example
|
|
1020
|
-
# require "google/cloud/bigquery"
|
|
1021
|
-
#
|
|
1022
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1023
|
-
#
|
|
1024
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1025
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1026
|
-
# csv.encoding = "UTF-8"
|
|
1027
|
-
# end
|
|
1028
|
-
#
|
|
1029
|
-
# csv_table.encoding #=> "UTF-8"
|
|
1030
|
-
#
|
|
1031
|
-
def encoding
|
|
1032
|
-
@gapi.csv_options.encoding
|
|
1033
|
-
end
|
|
1034
|
-
|
|
1035
|
-
##
|
|
1036
|
-
# Set the character encoding of the data.
|
|
1037
|
-
#
|
|
1038
|
-
# @param [String] new_encoding New encoding value
|
|
1039
|
-
#
|
|
1040
|
-
# @example
|
|
1041
|
-
# require "google/cloud/bigquery"
|
|
1042
|
-
#
|
|
1043
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1044
|
-
#
|
|
1045
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1046
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1047
|
-
# csv.encoding = "UTF-8"
|
|
1048
|
-
# end
|
|
1049
|
-
#
|
|
1050
|
-
# csv_table.encoding #=> "UTF-8"
|
|
1051
|
-
#
|
|
1052
|
-
def encoding= new_encoding
|
|
1053
|
-
frozen_check!
|
|
1054
|
-
@gapi.csv_options.encoding = new_encoding
|
|
1055
|
-
end
|
|
1056
|
-
|
|
1057
|
-
##
|
|
1058
|
-
# Checks if the character encoding of the data is "UTF-8". This is the
|
|
1059
|
-
# default.
|
|
1060
|
-
#
|
|
1061
|
-
# @return [Boolean]
|
|
1062
|
-
#
|
|
1063
|
-
# @example
|
|
1064
|
-
# require "google/cloud/bigquery"
|
|
1065
|
-
#
|
|
1066
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1067
|
-
#
|
|
1068
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1069
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1070
|
-
# csv.encoding = "UTF-8"
|
|
1071
|
-
# end
|
|
1072
|
-
#
|
|
1073
|
-
# csv_table.encoding #=> "UTF-8"
|
|
1074
|
-
# csv_table.utf8? #=> true
|
|
1075
|
-
#
|
|
1076
|
-
def utf8?
|
|
1077
|
-
return true if encoding.nil?
|
|
1078
|
-
encoding == "UTF-8"
|
|
1079
|
-
end
|
|
1080
|
-
|
|
1081
|
-
##
|
|
1082
|
-
# Checks if the character encoding of the data is "ISO-8859-1".
|
|
1083
|
-
#
|
|
1084
|
-
# @return [Boolean]
|
|
1085
|
-
#
|
|
1086
|
-
# @example
|
|
1087
|
-
# require "google/cloud/bigquery"
|
|
1088
|
-
#
|
|
1089
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1090
|
-
#
|
|
1091
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1092
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1093
|
-
# csv.encoding = "ISO-8859-1"
|
|
1094
|
-
# end
|
|
1095
|
-
#
|
|
1096
|
-
# csv_table.encoding #=> "ISO-8859-1"
|
|
1097
|
-
# csv_table.iso8859_1? #=> true
|
|
1098
|
-
#
|
|
1099
|
-
def iso8859_1?
|
|
1100
|
-
encoding == "ISO-8859-1"
|
|
1101
|
-
end
|
|
1102
|
-
|
|
1103
|
-
##
|
|
1104
|
-
# The separator for fields in a CSV file.
|
|
1105
|
-
#
|
|
1106
|
-
# @return [String]
|
|
1107
|
-
#
|
|
1108
|
-
# @example
|
|
1109
|
-
# require "google/cloud/bigquery"
|
|
1110
|
-
#
|
|
1111
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1112
|
-
#
|
|
1113
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1114
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1115
|
-
# csv.delimiter = "|"
|
|
1116
|
-
# end
|
|
1117
|
-
#
|
|
1118
|
-
# csv_table.delimiter #=> "|"
|
|
1119
|
-
#
|
|
1120
|
-
def delimiter
|
|
1121
|
-
@gapi.csv_options.field_delimiter
|
|
1122
|
-
end
|
|
1123
|
-
|
|
1124
|
-
##
|
|
1125
|
-
# Set the separator for fields in a CSV file.
|
|
1126
|
-
#
|
|
1127
|
-
# @param [String] new_delimiter New delimiter value
|
|
1128
|
-
#
|
|
1129
|
-
# @example
|
|
1130
|
-
# require "google/cloud/bigquery"
|
|
1131
|
-
#
|
|
1132
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1133
|
-
#
|
|
1134
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1135
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1136
|
-
# csv.delimiter = "|"
|
|
1137
|
-
# end
|
|
1138
|
-
#
|
|
1139
|
-
# csv_table.delimiter #=> "|"
|
|
1140
|
-
#
|
|
1141
|
-
def delimiter= new_delimiter
|
|
1142
|
-
frozen_check!
|
|
1143
|
-
@gapi.csv_options.field_delimiter = new_delimiter
|
|
1144
|
-
end
|
|
1145
|
-
|
|
1146
|
-
##
|
|
1147
|
-
# The value that is used to quote data sections in a CSV file.
|
|
1148
|
-
#
|
|
1149
|
-
# @return [String]
|
|
1150
|
-
#
|
|
1151
|
-
# @example
|
|
1152
|
-
# require "google/cloud/bigquery"
|
|
1153
|
-
#
|
|
1154
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1155
|
-
#
|
|
1156
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1157
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1158
|
-
# csv.quote = "'"
|
|
1159
|
-
# end
|
|
1160
|
-
#
|
|
1161
|
-
# csv_table.quote #=> "'"
|
|
1162
|
-
#
|
|
1163
|
-
def quote
|
|
1164
|
-
@gapi.csv_options.quote
|
|
1165
|
-
end
|
|
1166
|
-
|
|
1167
|
-
##
|
|
1168
|
-
# Set the value that is used to quote data sections in a CSV file.
|
|
1169
|
-
#
|
|
1170
|
-
# @param [String] new_quote New quote value
|
|
1171
|
-
#
|
|
1172
|
-
# @example
|
|
1173
|
-
# require "google/cloud/bigquery"
|
|
1174
|
-
#
|
|
1175
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1176
|
-
#
|
|
1177
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1178
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1179
|
-
# csv.quote = "'"
|
|
1180
|
-
# end
|
|
1181
|
-
#
|
|
1182
|
-
# csv_table.quote #=> "'"
|
|
1183
|
-
#
|
|
1184
|
-
def quote= new_quote
|
|
1185
|
-
frozen_check!
|
|
1186
|
-
@gapi.csv_options.quote = new_quote
|
|
1187
|
-
end
|
|
1188
|
-
|
|
1189
|
-
##
|
|
1190
|
-
# The number of rows at the top of a CSV file that BigQuery will skip
|
|
1191
|
-
# when reading the data.
|
|
1192
|
-
#
|
|
1193
|
-
# @return [Integer]
|
|
1194
|
-
#
|
|
1195
|
-
# @example
|
|
1196
|
-
# require "google/cloud/bigquery"
|
|
1197
|
-
#
|
|
1198
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1199
|
-
#
|
|
1200
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1201
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1202
|
-
# csv.skip_leading_rows = 1
|
|
1203
|
-
# end
|
|
1204
|
-
#
|
|
1205
|
-
# csv_table.skip_leading_rows #=> 1
|
|
1206
|
-
#
|
|
1207
|
-
def skip_leading_rows
|
|
1208
|
-
@gapi.csv_options.skip_leading_rows
|
|
1209
|
-
end
|
|
1210
|
-
|
|
1211
|
-
##
|
|
1212
|
-
# Set the number of rows at the top of a CSV file that BigQuery will
|
|
1213
|
-
# skip when reading the data.
|
|
1214
|
-
#
|
|
1215
|
-
# @param [Integer] row_count New skip_leading_rows value
|
|
1216
|
-
#
|
|
1217
|
-
# @example
|
|
1218
|
-
# require "google/cloud/bigquery"
|
|
1219
|
-
#
|
|
1220
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1221
|
-
#
|
|
1222
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1223
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1224
|
-
# csv.skip_leading_rows = 1
|
|
1225
|
-
# end
|
|
1226
|
-
#
|
|
1227
|
-
# csv_table.skip_leading_rows #=> 1
|
|
1228
|
-
#
|
|
1229
|
-
def skip_leading_rows= row_count
|
|
1230
|
-
frozen_check!
|
|
1231
|
-
@gapi.csv_options.skip_leading_rows = row_count
|
|
1232
|
-
end
|
|
1233
|
-
|
|
1234
|
-
##
|
|
1235
|
-
# The schema for the data.
|
|
1236
|
-
#
|
|
1237
|
-
# @param [Boolean] replace Whether to replace the existing schema with
|
|
1238
|
-
# the new schema. If `true`, the fields will replace the existing
|
|
1239
|
-
# schema. If `false`, the fields will be added to the existing
|
|
1240
|
-
# schema. The default value is `false`.
|
|
1241
|
-
# @yield [schema] a block for setting the schema
|
|
1242
|
-
# @yieldparam [Schema] schema the object accepting the schema
|
|
1243
|
-
#
|
|
1244
|
-
# @return [Google::Cloud::Bigquery::Schema]
|
|
1245
|
-
#
|
|
1246
|
-
# @example
|
|
1247
|
-
# require "google/cloud/bigquery"
|
|
1248
|
-
#
|
|
1249
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1250
|
-
#
|
|
1251
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1252
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1253
|
-
# csv.schema do |schema|
|
|
1254
|
-
# schema.string "name", mode: :required
|
|
1255
|
-
# schema.string "email", mode: :required
|
|
1256
|
-
# schema.integer "age", mode: :required
|
|
1257
|
-
# schema.boolean "active", mode: :required
|
|
1258
|
-
# end
|
|
1259
|
-
# end
|
|
1260
|
-
#
|
|
1261
|
-
def schema replace: false
|
|
1262
|
-
@schema ||= Schema.from_gapi @gapi.schema
|
|
1263
|
-
if replace
|
|
1264
|
-
frozen_check!
|
|
1265
|
-
@schema = Schema.from_gapi
|
|
1266
|
-
end
|
|
1267
|
-
@schema.freeze if frozen?
|
|
1268
|
-
yield @schema if block_given?
|
|
1269
|
-
@schema
|
|
1270
|
-
end
|
|
1271
|
-
|
|
1272
|
-
##
|
|
1273
|
-
# Set the schema for the data.
|
|
1274
|
-
#
|
|
1275
|
-
# @param [Schema] new_schema The schema object.
|
|
1276
|
-
#
|
|
1277
|
-
# @example
|
|
1278
|
-
# require "google/cloud/bigquery"
|
|
1279
|
-
#
|
|
1280
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1281
|
-
#
|
|
1282
|
-
# csv_shema = bigquery.schema do |schema|
|
|
1283
|
-
# schema.string "name", mode: :required
|
|
1284
|
-
# schema.string "email", mode: :required
|
|
1285
|
-
# schema.integer "age", mode: :required
|
|
1286
|
-
# schema.boolean "active", mode: :required
|
|
1287
|
-
# end
|
|
1288
|
-
#
|
|
1289
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1290
|
-
# csv_table = bigquery.external csv_url
|
|
1291
|
-
# csv_table.schema = csv_shema
|
|
1292
|
-
#
|
|
1293
|
-
def schema= new_schema
|
|
1294
|
-
frozen_check!
|
|
1295
|
-
@schema = new_schema
|
|
1296
|
-
end
|
|
1297
|
-
|
|
1298
|
-
##
|
|
1299
|
-
# The fields of the schema.
|
|
1300
|
-
#
|
|
1301
|
-
# @return [Array<Schema::Field>] An array of field objects.
|
|
1302
|
-
#
|
|
1303
|
-
def fields
|
|
1304
|
-
schema.fields
|
|
1305
|
-
end
|
|
1306
|
-
|
|
1307
|
-
##
|
|
1308
|
-
# The names of the columns in the schema.
|
|
1309
|
-
#
|
|
1310
|
-
# @return [Array<Symbol>] An array of column names.
|
|
1311
|
-
#
|
|
1312
|
-
def headers
|
|
1313
|
-
schema.headers
|
|
1314
|
-
end
|
|
1315
|
-
|
|
1316
|
-
##
|
|
1317
|
-
# The types of the fields in the data in the schema, using the same
|
|
1318
|
-
# format as the optional query parameter types.
|
|
1319
|
-
#
|
|
1320
|
-
# @return [Hash] A hash with field names as keys, and types as values.
|
|
1321
|
-
#
|
|
1322
|
-
def param_types
|
|
1323
|
-
schema.param_types
|
|
1324
|
-
end
|
|
1325
|
-
|
|
1326
|
-
##
|
|
1327
|
-
# @private Google API Client object.
|
|
1328
|
-
def to_gapi
|
|
1329
|
-
@gapi.schema = @schema.to_gapi if @schema
|
|
1330
|
-
@gapi
|
|
1331
|
-
end
|
|
1332
|
-
|
|
1333
|
-
##
|
|
1334
|
-
# @private Google API Client object.
|
|
1335
|
-
def self.from_gapi gapi
|
|
1336
|
-
new_table = super
|
|
1337
|
-
schema = Schema.from_gapi gapi.schema
|
|
1338
|
-
new_table.instance_variable_set :@schema, schema
|
|
1339
|
-
new_table
|
|
1340
|
-
end
|
|
1341
|
-
end
|
|
1342
|
-
|
|
1343
|
-
##
|
|
1344
|
-
# # JsonSource
|
|
1345
|
-
#
|
|
1346
|
-
# {External::JsonSource} is a subclass of {External::DataSource} and
|
|
1347
|
-
# represents a JSON external data source that can be queried from
|
|
1348
|
-
# directly, such as Google Cloud Storage or Google Drive, even though
|
|
1349
|
-
# the data is not stored in BigQuery. Instead of loading or streaming
|
|
1350
|
-
# the data, this object references the external data source.
|
|
1351
|
-
#
|
|
1352
|
-
# @example
|
|
1353
|
-
# require "google/cloud/bigquery"
|
|
1354
|
-
#
|
|
1355
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1356
|
-
#
|
|
1357
|
-
# require "google/cloud/bigquery"
|
|
1358
|
-
#
|
|
1359
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1360
|
-
#
|
|
1361
|
-
# json_url = "gs://bucket/path/to/data.json"
|
|
1362
|
-
# json_table = bigquery.external json_url do |json|
|
|
1363
|
-
# json.schema do |schema|
|
|
1364
|
-
# schema.string "name", mode: :required
|
|
1365
|
-
# schema.string "email", mode: :required
|
|
1366
|
-
# schema.integer "age", mode: :required
|
|
1367
|
-
# schema.boolean "active", mode: :required
|
|
1368
|
-
# end
|
|
1369
|
-
# end
|
|
1370
|
-
#
|
|
1371
|
-
# data = bigquery.query "SELECT * FROM my_ext_table",
|
|
1372
|
-
# external: { my_ext_table: json_table }
|
|
1373
|
-
#
|
|
1374
|
-
# # Iterate over the first page of results
|
|
1375
|
-
# data.each do |row|
|
|
1376
|
-
# puts row[:name]
|
|
1377
|
-
# end
|
|
1378
|
-
# # Retrieve the next page of results
|
|
1379
|
-
# data = data.next if data.next?
|
|
1380
|
-
#
|
|
1381
|
-
class JsonSource < External::DataSource
|
|
1382
|
-
##
|
|
1383
|
-
# The schema for the data.
|
|
1384
|
-
#
|
|
1385
|
-
# @param [Boolean] replace Whether to replace the existing schema with
|
|
1386
|
-
# the new schema. If `true`, the fields will replace the existing
|
|
1387
|
-
# schema. If `false`, the fields will be added to the existing
|
|
1388
|
-
# schema. The default value is `false`.
|
|
1389
|
-
# @yield [schema] a block for setting the schema
|
|
1390
|
-
# @yieldparam [Schema] schema the object accepting the schema
|
|
1391
|
-
#
|
|
1392
|
-
# @return [Google::Cloud::Bigquery::Schema]
|
|
1393
|
-
#
|
|
1394
|
-
# @example
|
|
1395
|
-
# require "google/cloud/bigquery"
|
|
1396
|
-
#
|
|
1397
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1398
|
-
#
|
|
1399
|
-
# json_url = "gs://bucket/path/to/data.json"
|
|
1400
|
-
# json_table = bigquery.external json_url do |json|
|
|
1401
|
-
# json.schema do |schema|
|
|
1402
|
-
# schema.string "name", mode: :required
|
|
1403
|
-
# schema.string "email", mode: :required
|
|
1404
|
-
# schema.integer "age", mode: :required
|
|
1405
|
-
# schema.boolean "active", mode: :required
|
|
1406
|
-
# end
|
|
1407
|
-
# end
|
|
1408
|
-
#
|
|
1409
|
-
def schema replace: false
|
|
1410
|
-
@schema ||= Schema.from_gapi @gapi.schema
|
|
1411
|
-
if replace
|
|
1412
|
-
frozen_check!
|
|
1413
|
-
@schema = Schema.from_gapi
|
|
1414
|
-
end
|
|
1415
|
-
@schema.freeze if frozen?
|
|
1416
|
-
yield @schema if block_given?
|
|
1417
|
-
@schema
|
|
1418
|
-
end
|
|
1419
|
-
|
|
1420
|
-
##
|
|
1421
|
-
# Set the schema for the data.
|
|
1422
|
-
#
|
|
1423
|
-
# @param [Schema] new_schema The schema object.
|
|
1424
|
-
#
|
|
1425
|
-
# @example
|
|
1426
|
-
# require "google/cloud/bigquery"
|
|
1427
|
-
#
|
|
1428
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1429
|
-
#
|
|
1430
|
-
# json_shema = bigquery.schema do |schema|
|
|
1431
|
-
# schema.string "name", mode: :required
|
|
1432
|
-
# schema.string "email", mode: :required
|
|
1433
|
-
# schema.integer "age", mode: :required
|
|
1434
|
-
# schema.boolean "active", mode: :required
|
|
1435
|
-
# end
|
|
1436
|
-
#
|
|
1437
|
-
# json_url = "gs://bucket/path/to/data.json"
|
|
1438
|
-
# json_table = bigquery.external json_url
|
|
1439
|
-
# json_table.schema = json_shema
|
|
1440
|
-
#
|
|
1441
|
-
def schema= new_schema
|
|
1442
|
-
frozen_check!
|
|
1443
|
-
@schema = new_schema
|
|
1444
|
-
end
|
|
1445
|
-
|
|
1446
|
-
##
|
|
1447
|
-
# The fields of the schema.
|
|
1448
|
-
#
|
|
1449
|
-
# @return [Array<Schema::Field>] An array of field objects.
|
|
1450
|
-
#
|
|
1451
|
-
def fields
|
|
1452
|
-
schema.fields
|
|
1453
|
-
end
|
|
1454
|
-
|
|
1455
|
-
##
|
|
1456
|
-
# The names of the columns in the schema.
|
|
1457
|
-
#
|
|
1458
|
-
# @return [Array<Symbol>] An array of column names.
|
|
1459
|
-
#
|
|
1460
|
-
def headers
|
|
1461
|
-
schema.headers
|
|
1462
|
-
end
|
|
1463
|
-
|
|
1464
|
-
##
|
|
1465
|
-
# The types of the fields in the data in the schema, using the same
|
|
1466
|
-
# format as the optional query parameter types.
|
|
1467
|
-
#
|
|
1468
|
-
# @return [Hash] A hash with field names as keys, and types as values.
|
|
1469
|
-
#
|
|
1470
|
-
def param_types
|
|
1471
|
-
schema.param_types
|
|
1472
|
-
end
|
|
1473
|
-
|
|
1474
|
-
##
|
|
1475
|
-
# @private Google API Client object.
|
|
1476
|
-
def to_gapi
|
|
1477
|
-
@gapi.schema = @schema.to_gapi if @schema
|
|
1478
|
-
@gapi
|
|
1479
|
-
end
|
|
1480
|
-
|
|
1481
|
-
##
|
|
1482
|
-
# @private Google API Client object.
|
|
1483
|
-
def self.from_gapi gapi
|
|
1484
|
-
new_table = super
|
|
1485
|
-
schema = Schema.from_gapi gapi.schema
|
|
1486
|
-
new_table.instance_variable_set :@schema, schema
|
|
1487
|
-
new_table
|
|
1488
|
-
end
|
|
1489
|
-
end
|
|
1490
|
-
|
|
1491
|
-
##
|
|
1492
|
-
# # SheetsSource
|
|
1493
|
-
#
|
|
1494
|
-
# {External::SheetsSource} is a subclass of {External::DataSource} and
|
|
1495
|
-
# represents a Google Sheets external data source that can be queried
|
|
1496
|
-
# from directly, even though the data is not stored in BigQuery. Instead
|
|
1497
|
-
# of loading or streaming the data, this object references the external
|
|
1498
|
-
# data source.
|
|
1499
|
-
#
|
|
1500
|
-
# @example
|
|
1501
|
-
# require "google/cloud/bigquery"
|
|
1502
|
-
#
|
|
1503
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1504
|
-
#
|
|
1505
|
-
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
|
1506
|
-
# sheets_table = bigquery.external sheets_url do |sheets|
|
|
1507
|
-
# sheets.skip_leading_rows = 1
|
|
1508
|
-
# end
|
|
1509
|
-
#
|
|
1510
|
-
# data = bigquery.query "SELECT * FROM my_ext_table",
|
|
1511
|
-
# external: { my_ext_table: sheets_table }
|
|
1512
|
-
#
|
|
1513
|
-
# # Iterate over the first page of results
|
|
1514
|
-
# data.each do |row|
|
|
1515
|
-
# puts row[:name]
|
|
1516
|
-
# end
|
|
1517
|
-
# # Retrieve the next page of results
|
|
1518
|
-
# data = data.next if data.next?
|
|
1519
|
-
#
|
|
1520
|
-
class SheetsSource < External::DataSource
|
|
1521
|
-
##
|
|
1522
|
-
# @private Create an empty SheetsSource object.
|
|
1523
|
-
def initialize
|
|
1524
|
-
super
|
|
1525
|
-
@gapi.google_sheets_options = Google::Apis::BigqueryV2::GoogleSheetsOptions.new
|
|
1526
|
-
end
|
|
1527
|
-
|
|
1528
|
-
##
|
|
1529
|
-
# The number of rows at the top of a sheet that BigQuery will skip
|
|
1530
|
-
# when reading the data. The default value is `0`.
|
|
1531
|
-
#
|
|
1532
|
-
# This property is useful if you have header rows that should be
|
|
1533
|
-
# skipped. When `autodetect` is on, behavior is the following:
|
|
1534
|
-
#
|
|
1535
|
-
# * `nil` - Autodetect tries to detect headers in the first row. If
|
|
1536
|
-
# they are not detected, the row is read as data. Otherwise data is
|
|
1537
|
-
# read starting from the second row.
|
|
1538
|
-
# * `0` - Instructs autodetect that there are no headers and data
|
|
1539
|
-
# should be read starting from the first row.
|
|
1540
|
-
# * `N > 0` - Autodetect skips `N-1` rows and tries to detect headers
|
|
1541
|
-
# in row `N`. If headers are not detected, row `N` is just skipped.
|
|
1542
|
-
# Otherwise row `N` is used to extract column names for the detected
|
|
1543
|
-
# schema.
|
|
1544
|
-
#
|
|
1545
|
-
# @return [Integer]
|
|
1546
|
-
#
|
|
1547
|
-
# @example
|
|
1548
|
-
# require "google/cloud/bigquery"
|
|
1549
|
-
#
|
|
1550
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1551
|
-
#
|
|
1552
|
-
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
|
1553
|
-
# sheets_table = bigquery.external sheets_url do |sheets|
|
|
1554
|
-
# sheets.skip_leading_rows = 1
|
|
1555
|
-
# end
|
|
1556
|
-
#
|
|
1557
|
-
# sheets_table.skip_leading_rows #=> 1
|
|
1558
|
-
#
|
|
1559
|
-
def skip_leading_rows
|
|
1560
|
-
@gapi.google_sheets_options.skip_leading_rows
|
|
1561
|
-
end
|
|
1562
|
-
|
|
1563
|
-
##
|
|
1564
|
-
# Set the number of rows at the top of a sheet that BigQuery will skip
|
|
1565
|
-
# when reading the data.
|
|
1566
|
-
#
|
|
1567
|
-
# @param [Integer] row_count New skip_leading_rows value
|
|
1568
|
-
#
|
|
1569
|
-
# @example
|
|
1570
|
-
# require "google/cloud/bigquery"
|
|
1571
|
-
#
|
|
1572
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1573
|
-
#
|
|
1574
|
-
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
|
1575
|
-
# sheets_table = bigquery.external sheets_url do |sheets|
|
|
1576
|
-
# sheets.skip_leading_rows = 1
|
|
1577
|
-
# end
|
|
1578
|
-
#
|
|
1579
|
-
# sheets_table.skip_leading_rows #=> 1
|
|
1580
|
-
#
|
|
1581
|
-
def skip_leading_rows= row_count
|
|
1582
|
-
frozen_check!
|
|
1583
|
-
@gapi.google_sheets_options.skip_leading_rows = row_count
|
|
1584
|
-
end
|
|
1585
|
-
|
|
1586
|
-
##
|
|
1587
|
-
# Range of a sheet to query from. Only used when non-empty. Typical
|
|
1588
|
-
# format: `{sheet_name}!{top_left_cell_id}:{bottom_right_cell_id}`.
|
|
1589
|
-
#
|
|
1590
|
-
# @return [String] Range of a sheet to query from.
|
|
1591
|
-
#
|
|
1592
|
-
# @example
|
|
1593
|
-
# require "google/cloud/bigquery"
|
|
1594
|
-
#
|
|
1595
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1596
|
-
#
|
|
1597
|
-
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
|
1598
|
-
# sheets_table = bigquery.external sheets_url do |sheets|
|
|
1599
|
-
# sheets.range = "sheet1!A1:B20"
|
|
1600
|
-
# end
|
|
1601
|
-
#
|
|
1602
|
-
# sheets_table.range #=> "sheet1!A1:B20"
|
|
1603
|
-
#
|
|
1604
|
-
def range
|
|
1605
|
-
@gapi.google_sheets_options.range
|
|
1606
|
-
end
|
|
1607
|
-
|
|
1608
|
-
##
|
|
1609
|
-
# Set the range of a sheet to query from. Only used when non-empty.
|
|
1610
|
-
# Typical format:
|
|
1611
|
-
# `{sheet_name}!{top_left_cell_id}:{bottom_right_cell_id}`.
|
|
1612
|
-
#
|
|
1613
|
-
# @param [String] new_range New range of a sheet to query from.
|
|
1614
|
-
#
|
|
1615
|
-
# @example
|
|
1616
|
-
# require "google/cloud/bigquery"
|
|
1617
|
-
#
|
|
1618
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1619
|
-
#
|
|
1620
|
-
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
|
1621
|
-
# sheets_table = bigquery.external sheets_url do |sheets|
|
|
1622
|
-
# sheets.range = "sheet1!A1:B20"
|
|
1623
|
-
# end
|
|
1624
|
-
#
|
|
1625
|
-
# sheets_table.range #=> "sheet1!A1:B20"
|
|
1626
|
-
#
|
|
1627
|
-
def range= new_range
|
|
1628
|
-
frozen_check!
|
|
1629
|
-
@gapi.google_sheets_options.range = new_range
|
|
1630
|
-
end
|
|
1631
|
-
end
|
|
1632
|
-
|
|
1633
|
-
##
|
|
1634
|
-
# # BigtableSource
|
|
1635
|
-
#
|
|
1636
|
-
# {External::BigtableSource} is a subclass of {External::DataSource} and
|
|
1637
|
-
# represents a Bigtable external data source that can be queried from
|
|
1638
|
-
# directly, even though the data is not stored in BigQuery. Instead of
|
|
1639
|
-
# loading or streaming the data, this object references the external
|
|
1640
|
-
# data source.
|
|
1641
|
-
#
|
|
1642
|
-
# @example
|
|
1643
|
-
# require "google/cloud/bigquery"
|
|
1644
|
-
#
|
|
1645
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1646
|
-
#
|
|
1647
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1648
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1649
|
-
# bt.rowkey_as_string = true
|
|
1650
|
-
# bt.add_family "user" do |u|
|
|
1651
|
-
# u.add_string "name"
|
|
1652
|
-
# u.add_string "email"
|
|
1653
|
-
# u.add_integer "age"
|
|
1654
|
-
# u.add_boolean "active"
|
|
1655
|
-
# end
|
|
1656
|
-
# end
|
|
1657
|
-
#
|
|
1658
|
-
# data = bigquery.query "SELECT * FROM my_ext_table",
|
|
1659
|
-
# external: { my_ext_table: bigtable_table }
|
|
1660
|
-
#
|
|
1661
|
-
# # Iterate over the first page of results
|
|
1662
|
-
# data.each do |row|
|
|
1663
|
-
# puts row[:name]
|
|
1664
|
-
# end
|
|
1665
|
-
# # Retrieve the next page of results
|
|
1666
|
-
# data = data.next if data.next?
|
|
1667
|
-
#
|
|
1668
|
-
class BigtableSource < External::DataSource
|
|
1669
|
-
##
|
|
1670
|
-
# @private Create an empty BigtableSource object.
|
|
1671
|
-
def initialize
|
|
1672
|
-
super
|
|
1673
|
-
@gapi.bigtable_options = Google::Apis::BigqueryV2::BigtableOptions.new
|
|
1674
|
-
@families = []
|
|
1675
|
-
end
|
|
1676
|
-
|
|
1677
|
-
##
|
|
1678
|
-
# List of column families to expose in the table schema along with
|
|
1679
|
-
# their types. This list restricts the column families that can be
|
|
1680
|
-
# referenced in queries and specifies their value types. You can use
|
|
1681
|
-
# this list to do type conversions - see
|
|
1682
|
-
# {BigtableSource::ColumnFamily#type} for more details. If you leave
|
|
1683
|
-
# this list empty, all column families are present in the table schema
|
|
1684
|
-
# and their values are read as `BYTES`. During a query only the column
|
|
1685
|
-
# families referenced in that query are read from Bigtable.
|
|
1686
|
-
#
|
|
1687
|
-
# @return [Array<BigtableSource::ColumnFamily>]
|
|
1688
|
-
#
|
|
1689
|
-
# @example
|
|
1690
|
-
# require "google/cloud/bigquery"
|
|
1691
|
-
#
|
|
1692
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1693
|
-
#
|
|
1694
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1695
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1696
|
-
# bt.rowkey_as_string = true
|
|
1697
|
-
# bt.add_family "user" do |u|
|
|
1698
|
-
# u.add_string "name"
|
|
1699
|
-
# u.add_string "email"
|
|
1700
|
-
# u.add_integer "age"
|
|
1701
|
-
# u.add_boolean "active"
|
|
1702
|
-
# end
|
|
1703
|
-
# end
|
|
1704
|
-
#
|
|
1705
|
-
# bigtable_table.families.count #=> 1
|
|
1706
|
-
#
|
|
1707
|
-
def families
|
|
1708
|
-
@families
|
|
1709
|
-
end
|
|
1710
|
-
|
|
1711
|
-
##
|
|
1712
|
-
# Add a column family to expose in the table schema along with its
|
|
1713
|
-
# types. Columns belonging to the column family may also be exposed.
|
|
1714
|
-
#
|
|
1715
|
-
# @param [String] family_id Identifier of the column family. See
|
|
1716
|
-
# {BigtableSource::ColumnFamily#family_id}.
|
|
1717
|
-
# @param [String] encoding The encoding of the values when the type is
|
|
1718
|
-
# not `STRING`. See {BigtableSource::ColumnFamily#encoding}.
|
|
1719
|
-
# @param [Boolean] latest Whether only the latest version of value are
|
|
1720
|
-
# exposed for all columns in this column family. See
|
|
1721
|
-
# {BigtableSource::ColumnFamily#latest}.
|
|
1722
|
-
# @param [String] type The type to convert the value in cells of this
|
|
1723
|
-
# column. See {BigtableSource::ColumnFamily#type}.
|
|
1724
|
-
#
|
|
1725
|
-
# @yield [family] a block for setting the family
|
|
1726
|
-
# @yieldparam [BigtableSource::ColumnFamily] family the family object
|
|
1727
|
-
#
|
|
1728
|
-
# @return [BigtableSource::ColumnFamily]
|
|
1729
|
-
#
|
|
1730
|
-
# @example
|
|
1731
|
-
# require "google/cloud/bigquery"
|
|
1732
|
-
#
|
|
1733
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1734
|
-
#
|
|
1735
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1736
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1737
|
-
# bt.rowkey_as_string = true
|
|
1738
|
-
# bt.add_family "user" do |u|
|
|
1739
|
-
# u.add_string "name"
|
|
1740
|
-
# u.add_string "email"
|
|
1741
|
-
# u.add_integer "age"
|
|
1742
|
-
# u.add_boolean "active"
|
|
1743
|
-
# end
|
|
1744
|
-
# end
|
|
1745
|
-
#
|
|
1746
|
-
def add_family family_id, encoding: nil, latest: nil, type: nil
|
|
1747
|
-
frozen_check!
|
|
1748
|
-
fam = BigtableSource::ColumnFamily.new
|
|
1749
|
-
fam.family_id = family_id
|
|
1750
|
-
fam.encoding = encoding if encoding
|
|
1751
|
-
fam.latest = latest if latest
|
|
1752
|
-
fam.type = type if type
|
|
1753
|
-
yield fam if block_given?
|
|
1754
|
-
@families << fam
|
|
1755
|
-
fam
|
|
1756
|
-
end
|
|
1757
|
-
|
|
1758
|
-
##
|
|
1759
|
-
# Whether the rowkey column families will be read and converted to
|
|
1760
|
-
# string. Otherwise they are read with `BYTES` type values and users
|
|
1761
|
-
# need to manually cast them with `CAST` if necessary. The default
|
|
1762
|
-
# value is `false`.
|
|
1763
|
-
#
|
|
1764
|
-
# @return [Boolean]
|
|
1765
|
-
#
|
|
1766
|
-
# @example
|
|
1767
|
-
# require "google/cloud/bigquery"
|
|
1768
|
-
#
|
|
1769
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1770
|
-
#
|
|
1771
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1772
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1773
|
-
# bt.rowkey_as_string = true
|
|
1774
|
-
# end
|
|
1775
|
-
#
|
|
1776
|
-
# bigtable_table.rowkey_as_string #=> true
|
|
1777
|
-
#
|
|
1778
|
-
def rowkey_as_string
|
|
1779
|
-
@gapi.bigtable_options.read_rowkey_as_string
|
|
1780
|
-
end
|
|
1781
|
-
|
|
1782
|
-
##
|
|
1783
|
-
# Set the number of rows at the top of a sheet that BigQuery will skip
|
|
1784
|
-
# when reading the data.
|
|
1785
|
-
#
|
|
1786
|
-
# @param [Boolean] row_rowkey New rowkey_as_string value
|
|
1787
|
-
#
|
|
1788
|
-
# @example
|
|
1789
|
-
# require "google/cloud/bigquery"
|
|
1790
|
-
#
|
|
1791
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1792
|
-
#
|
|
1793
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1794
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1795
|
-
# bt.rowkey_as_string = true
|
|
1796
|
-
# end
|
|
1797
|
-
#
|
|
1798
|
-
# bigtable_table.rowkey_as_string #=> true
|
|
1799
|
-
#
|
|
1800
|
-
def rowkey_as_string= row_rowkey
|
|
1801
|
-
frozen_check!
|
|
1802
|
-
@gapi.bigtable_options.read_rowkey_as_string = row_rowkey
|
|
1803
|
-
end
|
|
1804
|
-
|
|
1805
|
-
##
|
|
1806
|
-
# @private Google API Client object.
|
|
1807
|
-
def to_gapi
|
|
1808
|
-
@gapi.bigtable_options.column_families = @families.map(&:to_gapi)
|
|
1809
|
-
@gapi
|
|
1810
|
-
end
|
|
1811
|
-
|
|
1812
|
-
##
|
|
1813
|
-
# @private Google API Client object.
|
|
1814
|
-
def self.from_gapi gapi
|
|
1815
|
-
new_table = super
|
|
1816
|
-
families = Array gapi.bigtable_options.column_families
|
|
1817
|
-
families = families.map { |fam_gapi| BigtableSource::ColumnFamily.from_gapi fam_gapi }
|
|
1818
|
-
new_table.instance_variable_set :@families, families
|
|
1819
|
-
new_table
|
|
1820
|
-
end
|
|
1821
|
-
|
|
1822
|
-
##
|
|
1823
|
-
# @private
|
|
1824
|
-
def freeze
|
|
1825
|
-
@families.map(&:freeze!)
|
|
1826
|
-
@families.freeze!
|
|
1827
|
-
super
|
|
1828
|
-
end
|
|
1829
|
-
|
|
1830
|
-
protected
|
|
1831
|
-
|
|
1832
|
-
def frozen_check!
|
|
1833
|
-
return unless frozen?
|
|
1834
|
-
raise ArgumentError, "Cannot modify external data source when frozen"
|
|
1835
|
-
end
|
|
1836
|
-
|
|
1837
|
-
##
|
|
1838
|
-
# # BigtableSource::ColumnFamily
|
|
1839
|
-
#
|
|
1840
|
-
# A Bigtable column family used to expose in the table schema along
|
|
1841
|
-
# with its types and columns.
|
|
1842
|
-
#
|
|
1843
|
-
# @example
|
|
1844
|
-
# require "google/cloud/bigquery"
|
|
1845
|
-
#
|
|
1846
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1847
|
-
#
|
|
1848
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1849
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1850
|
-
# bt.rowkey_as_string = true
|
|
1851
|
-
# bt.add_family "user" do |u|
|
|
1852
|
-
# u.add_string "name"
|
|
1853
|
-
# u.add_string "email"
|
|
1854
|
-
# u.add_integer "age"
|
|
1855
|
-
# u.add_boolean "active"
|
|
1856
|
-
# end
|
|
1857
|
-
# end
|
|
1858
|
-
#
|
|
1859
|
-
# data = bigquery.query "SELECT * FROM my_ext_table",
|
|
1860
|
-
# external: { my_ext_table: bigtable_table }
|
|
1861
|
-
#
|
|
1862
|
-
# # Iterate over the first page of results
|
|
1863
|
-
# data.each do |row|
|
|
1864
|
-
# puts row[:name]
|
|
1865
|
-
# end
|
|
1866
|
-
# # Retrieve the next page of results
|
|
1867
|
-
# data = data.next if data.next?
|
|
1868
|
-
#
|
|
1869
|
-
class ColumnFamily
|
|
1870
|
-
##
|
|
1871
|
-
# @private Create an empty BigtableSource::ColumnFamily object.
|
|
1872
|
-
def initialize
|
|
1873
|
-
@gapi = Google::Apis::BigqueryV2::BigtableColumnFamily.new
|
|
1874
|
-
@columns = []
|
|
1875
|
-
end
|
|
1876
|
-
|
|
1877
|
-
##
|
|
1878
|
-
# The encoding of the values when the type is not `STRING`.
|
|
1879
|
-
#
|
|
1880
|
-
# @return [String]
|
|
1881
|
-
#
|
|
1882
|
-
# @example
|
|
1883
|
-
# require "google/cloud/bigquery"
|
|
1884
|
-
#
|
|
1885
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1886
|
-
#
|
|
1887
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1888
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1889
|
-
# bt.add_family "user" do |u|
|
|
1890
|
-
# u.encoding = "UTF-8"
|
|
1891
|
-
# end
|
|
1892
|
-
# end
|
|
1893
|
-
#
|
|
1894
|
-
# bigtable_table.families[0].encoding #=> "UTF-8"
|
|
1895
|
-
#
|
|
1896
|
-
def encoding
|
|
1897
|
-
@gapi.encoding
|
|
1898
|
-
end
|
|
1899
|
-
|
|
1900
|
-
##
|
|
1901
|
-
# Set the encoding of the values when the type is not `STRING`.
|
|
1902
|
-
# Acceptable encoding values are:
|
|
1903
|
-
#
|
|
1904
|
-
# * `TEXT` - indicates values are alphanumeric text strings.
|
|
1905
|
-
# * `BINARY` - indicates values are encoded using HBase
|
|
1906
|
-
# `Bytes.toBytes` family of functions. This can be overridden on a
|
|
1907
|
-
# column.
|
|
1908
|
-
#
|
|
1909
|
-
# @param [String] new_encoding New encoding value
|
|
1910
|
-
#
|
|
1911
|
-
# @example
|
|
1912
|
-
# require "google/cloud/bigquery"
|
|
1913
|
-
#
|
|
1914
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1915
|
-
#
|
|
1916
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1917
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1918
|
-
# bt.add_family "user" do |u|
|
|
1919
|
-
# u.encoding = "UTF-8"
|
|
1920
|
-
# end
|
|
1921
|
-
# end
|
|
1922
|
-
#
|
|
1923
|
-
# bigtable_table.families[0].encoding #=> "UTF-8"
|
|
1924
|
-
#
|
|
1925
|
-
def encoding= new_encoding
|
|
1926
|
-
frozen_check!
|
|
1927
|
-
@gapi.encoding = new_encoding
|
|
1928
|
-
end
|
|
1929
|
-
|
|
1930
|
-
##
|
|
1931
|
-
# Identifier of the column family.
|
|
1932
|
-
#
|
|
1933
|
-
# @return [String]
|
|
1934
|
-
#
|
|
1935
|
-
# @example
|
|
1936
|
-
# require "google/cloud/bigquery"
|
|
1937
|
-
#
|
|
1938
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1939
|
-
#
|
|
1940
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1941
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1942
|
-
# bt.add_family "user"
|
|
1943
|
-
# end
|
|
1944
|
-
#
|
|
1945
|
-
# bigtable_table.families[0].family_id #=> "user"
|
|
1946
|
-
#
|
|
1947
|
-
def family_id
|
|
1948
|
-
@gapi.family_id
|
|
1949
|
-
end
|
|
1950
|
-
|
|
1951
|
-
##
|
|
1952
|
-
# Set the identifier of the column family.
|
|
1953
|
-
#
|
|
1954
|
-
# @param [String] new_family_id New family_id value
|
|
1955
|
-
#
|
|
1956
|
-
# @example
|
|
1957
|
-
# require "google/cloud/bigquery"
|
|
1958
|
-
#
|
|
1959
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1960
|
-
#
|
|
1961
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1962
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1963
|
-
# bt.add_family "user"
|
|
1964
|
-
# end
|
|
1965
|
-
#
|
|
1966
|
-
# bigtable_table.families[0].family_id #=> "user"
|
|
1967
|
-
# bigtable_table.families[0].family_id = "User"
|
|
1968
|
-
# bigtable_table.families[0].family_id #=> "User"
|
|
1969
|
-
#
|
|
1970
|
-
def family_id= new_family_id
|
|
1971
|
-
frozen_check!
|
|
1972
|
-
@gapi.family_id = new_family_id
|
|
1973
|
-
end
|
|
1974
|
-
|
|
1975
|
-
##
|
|
1976
|
-
# Whether only the latest version of value are exposed for all
|
|
1977
|
-
# columns in this column family.
|
|
1978
|
-
#
|
|
1979
|
-
# @return [Boolean]
|
|
1980
|
-
#
|
|
1981
|
-
# @example
|
|
1982
|
-
# require "google/cloud/bigquery"
|
|
1983
|
-
#
|
|
1984
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1985
|
-
#
|
|
1986
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1987
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1988
|
-
# bt.add_family "user" do |u|
|
|
1989
|
-
# u.latest = true
|
|
1990
|
-
# end
|
|
1991
|
-
# end
|
|
1992
|
-
#
|
|
1993
|
-
# bigtable_table.families[0].latest #=> true
|
|
1994
|
-
#
|
|
1995
|
-
def latest
|
|
1996
|
-
@gapi.only_read_latest
|
|
1997
|
-
end
|
|
1998
|
-
|
|
1999
|
-
##
|
|
2000
|
-
# Set whether only the latest version of value are exposed for all
|
|
2001
|
-
# columns in this column family.
|
|
2002
|
-
#
|
|
2003
|
-
# @param [Boolean] new_latest New latest value
|
|
2004
|
-
#
|
|
2005
|
-
# @example
|
|
2006
|
-
# require "google/cloud/bigquery"
|
|
2007
|
-
#
|
|
2008
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2009
|
-
#
|
|
2010
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2011
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2012
|
-
# bt.add_family "user" do |u|
|
|
2013
|
-
# u.latest = true
|
|
2014
|
-
# end
|
|
2015
|
-
# end
|
|
2016
|
-
#
|
|
2017
|
-
# bigtable_table.families[0].latest #=> true
|
|
2018
|
-
#
|
|
2019
|
-
def latest= new_latest
|
|
2020
|
-
frozen_check!
|
|
2021
|
-
@gapi.only_read_latest = new_latest
|
|
2022
|
-
end
|
|
2023
|
-
|
|
2024
|
-
##
|
|
2025
|
-
# The type to convert the value in cells of this column family. The
|
|
2026
|
-
# values are expected to be encoded using HBase `Bytes.toBytes`
|
|
2027
|
-
# function when using the `BINARY` encoding value. The following
|
|
2028
|
-
# BigQuery types are allowed:
|
|
2029
|
-
#
|
|
2030
|
-
# * `BYTES`
|
|
2031
|
-
# * `STRING`
|
|
2032
|
-
# * `INTEGER`
|
|
2033
|
-
# * `FLOAT`
|
|
2034
|
-
# * `BOOLEAN`
|
|
2035
|
-
#
|
|
2036
|
-
# Default type is `BYTES`. This can be overridden on a column.
|
|
2037
|
-
#
|
|
2038
|
-
# @return [String]
|
|
2039
|
-
#
|
|
2040
|
-
# @example
|
|
2041
|
-
# require "google/cloud/bigquery"
|
|
2042
|
-
#
|
|
2043
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2044
|
-
#
|
|
2045
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2046
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2047
|
-
# bt.add_family "user" do |u|
|
|
2048
|
-
# u.type = "STRING"
|
|
2049
|
-
# end
|
|
2050
|
-
# end
|
|
2051
|
-
#
|
|
2052
|
-
# bigtable_table.families[0].type #=> "STRING"
|
|
2053
|
-
#
|
|
2054
|
-
def type
|
|
2055
|
-
@gapi.type
|
|
2056
|
-
end
|
|
2057
|
-
|
|
2058
|
-
##
|
|
2059
|
-
# Set the type to convert the value in cells of this column family.
|
|
2060
|
-
# The values are expected to be encoded using HBase `Bytes.toBytes`
|
|
2061
|
-
# function when using the `BINARY` encoding value. The following
|
|
2062
|
-
# BigQuery types are allowed:
|
|
2063
|
-
#
|
|
2064
|
-
# * `BYTES`
|
|
2065
|
-
# * `STRING`
|
|
2066
|
-
# * `INTEGER`
|
|
2067
|
-
# * `FLOAT`
|
|
2068
|
-
# * `BOOLEAN`
|
|
2069
|
-
#
|
|
2070
|
-
# Default type is `BYTES`. This can be overridden on a column.
|
|
2071
|
-
#
|
|
2072
|
-
# @param [String] new_type New type value
|
|
2073
|
-
#
|
|
2074
|
-
# @example
|
|
2075
|
-
# require "google/cloud/bigquery"
|
|
2076
|
-
#
|
|
2077
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2078
|
-
#
|
|
2079
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2080
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2081
|
-
# bt.add_family "user" do |u|
|
|
2082
|
-
# u.type = "STRING"
|
|
2083
|
-
# end
|
|
2084
|
-
# end
|
|
2085
|
-
#
|
|
2086
|
-
# bigtable_table.families[0].type #=> "STRING"
|
|
2087
|
-
#
|
|
2088
|
-
def type= new_type
|
|
2089
|
-
frozen_check!
|
|
2090
|
-
@gapi.type = new_type
|
|
2091
|
-
end
|
|
2092
|
-
|
|
2093
|
-
##
|
|
2094
|
-
# Lists of columns that should be exposed as individual fields.
|
|
2095
|
-
#
|
|
2096
|
-
# @return [Array<BigtableSource::Column>]
|
|
2097
|
-
#
|
|
2098
|
-
# @example
|
|
2099
|
-
# require "google/cloud/bigquery"
|
|
2100
|
-
#
|
|
2101
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2102
|
-
#
|
|
2103
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2104
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2105
|
-
# bt.rowkey_as_string = true
|
|
2106
|
-
# bt.add_family "user" do |u|
|
|
2107
|
-
# u.add_string "name"
|
|
2108
|
-
# u.add_string "email"
|
|
2109
|
-
# u.add_integer "age"
|
|
2110
|
-
# u.add_boolean "active"
|
|
2111
|
-
# end
|
|
2112
|
-
# end
|
|
2113
|
-
#
|
|
2114
|
-
# bigtable_table.families[0].columns.count #=> 4
|
|
2115
|
-
#
|
|
2116
|
-
def columns
|
|
2117
|
-
@columns
|
|
2118
|
-
end
|
|
2119
|
-
|
|
2120
|
-
##
|
|
2121
|
-
# Add a column to the column family to expose in the table schema
|
|
2122
|
-
# along with its types.
|
|
2123
|
-
#
|
|
2124
|
-
# @param [String] qualifier Qualifier of the column. See
|
|
2125
|
-
# {BigtableSource::Column#qualifier}.
|
|
2126
|
-
# @param [String] as A valid identifier to be used as the column
|
|
2127
|
-
# field name if the qualifier is not a valid BigQuery field
|
|
2128
|
-
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
|
2129
|
-
# {BigtableSource::Column#field_name}.
|
|
2130
|
-
# @param [String] type The type to convert the value in cells of
|
|
2131
|
-
# this column. See {BigtableSource::Column#type}. The following
|
|
2132
|
-
# BigQuery types are allowed:
|
|
2133
|
-
#
|
|
2134
|
-
# * `BYTES`
|
|
2135
|
-
# * `STRING`
|
|
2136
|
-
# * `INTEGER`
|
|
2137
|
-
# * `FLOAT`
|
|
2138
|
-
# * `BOOLEAN`
|
|
2139
|
-
#
|
|
2140
|
-
# @yield [column] a block for setting the column
|
|
2141
|
-
# @yieldparam [BigtableSource::Column] column the column object
|
|
2142
|
-
#
|
|
2143
|
-
# @return [Array<BigtableSource::Column>]
|
|
2144
|
-
#
|
|
2145
|
-
# @example
|
|
2146
|
-
# require "google/cloud/bigquery"
|
|
2147
|
-
#
|
|
2148
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2149
|
-
#
|
|
2150
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2151
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2152
|
-
# bt.rowkey_as_string = true
|
|
2153
|
-
# bt.add_family "user" do |u|
|
|
2154
|
-
# u.add_column "name", type: "STRING"
|
|
2155
|
-
# end
|
|
2156
|
-
# end
|
|
2157
|
-
#
|
|
2158
|
-
def add_column qualifier, as: nil, type: nil
|
|
2159
|
-
frozen_check!
|
|
2160
|
-
col = BigtableSource::Column.new
|
|
2161
|
-
col.qualifier = qualifier
|
|
2162
|
-
col.field_name = as if as
|
|
2163
|
-
col.type = type if type
|
|
2164
|
-
yield col if block_given?
|
|
2165
|
-
@columns << col
|
|
2166
|
-
col
|
|
2167
|
-
end
|
|
2168
|
-
|
|
2169
|
-
##
|
|
2170
|
-
# Add a column to the column family to expose in the table schema
|
|
2171
|
-
# that is specified as the `BYTES` type.
|
|
2172
|
-
#
|
|
2173
|
-
# @param [String] qualifier Qualifier of the column. See
|
|
2174
|
-
# {BigtableSource::Column#qualifier}.
|
|
2175
|
-
# @param [String] as A valid identifier to be used as the column
|
|
2176
|
-
# field name if the qualifier is not a valid BigQuery field
|
|
2177
|
-
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
|
2178
|
-
# {BigtableSource::Column#field_name}.
|
|
2179
|
-
#
|
|
2180
|
-
# @yield [column] a block for setting the column
|
|
2181
|
-
# @yieldparam [BigtableSource::Column] column the column object
|
|
2182
|
-
#
|
|
2183
|
-
# @return [Array<BigtableSource::Column>]
|
|
2184
|
-
#
|
|
2185
|
-
# @example
|
|
2186
|
-
# require "google/cloud/bigquery"
|
|
2187
|
-
#
|
|
2188
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2189
|
-
#
|
|
2190
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2191
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2192
|
-
# bt.rowkey_as_string = true
|
|
2193
|
-
# bt.add_family "user" do |u|
|
|
2194
|
-
# u.add_bytes "avatar"
|
|
2195
|
-
# end
|
|
2196
|
-
# end
|
|
2197
|
-
#
|
|
2198
|
-
def add_bytes qualifier, as: nil
|
|
2199
|
-
col = add_column qualifier, as: as, type: "BYTES"
|
|
2200
|
-
yield col if block_given?
|
|
2201
|
-
col
|
|
2202
|
-
end
|
|
2203
|
-
|
|
2204
|
-
##
|
|
2205
|
-
# Add a column to the column family to expose in the table schema
|
|
2206
|
-
# that is specified as the `STRING` type.
|
|
2207
|
-
#
|
|
2208
|
-
# @param [String] qualifier Qualifier of the column. See
|
|
2209
|
-
# {BigtableSource::Column#qualifier}.
|
|
2210
|
-
# @param [String] as A valid identifier to be used as the column
|
|
2211
|
-
# field name if the qualifier is not a valid BigQuery field
|
|
2212
|
-
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
|
2213
|
-
# {BigtableSource::Column#field_name}.
|
|
2214
|
-
#
|
|
2215
|
-
# @yield [column] a block for setting the column
|
|
2216
|
-
# @yieldparam [BigtableSource::Column] column the column object
|
|
2217
|
-
#
|
|
2218
|
-
# @return [Array<BigtableSource::Column>]
|
|
2219
|
-
#
|
|
2220
|
-
# @example
|
|
2221
|
-
# require "google/cloud/bigquery"
|
|
2222
|
-
#
|
|
2223
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2224
|
-
#
|
|
2225
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2226
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2227
|
-
# bt.rowkey_as_string = true
|
|
2228
|
-
# bt.add_family "user" do |u|
|
|
2229
|
-
# u.add_string "name"
|
|
2230
|
-
# end
|
|
2231
|
-
# end
|
|
2232
|
-
#
|
|
2233
|
-
def add_string qualifier, as: nil
|
|
2234
|
-
col = add_column qualifier, as: as, type: "STRING"
|
|
2235
|
-
yield col if block_given?
|
|
2236
|
-
col
|
|
2237
|
-
end
|
|
2238
|
-
|
|
2239
|
-
##
|
|
2240
|
-
# Add a column to the column family to expose in the table schema
|
|
2241
|
-
# that is specified as the `INTEGER` type.
|
|
2242
|
-
#
|
|
2243
|
-
# @param [String] qualifier Qualifier of the column. See
|
|
2244
|
-
# {BigtableSource::Column#qualifier}.
|
|
2245
|
-
# @param [String] as A valid identifier to be used as the column
|
|
2246
|
-
# field name if the qualifier is not a valid BigQuery field
|
|
2247
|
-
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
|
2248
|
-
# {BigtableSource::Column#field_name}.
|
|
2249
|
-
#
|
|
2250
|
-
# @yield [column] a block for setting the column
|
|
2251
|
-
# @yieldparam [BigtableSource::Column] column the column object
|
|
2252
|
-
#
|
|
2253
|
-
# @return [Array<BigtableSource::Column>]
|
|
2254
|
-
#
|
|
2255
|
-
# @example
|
|
2256
|
-
# require "google/cloud/bigquery"
|
|
2257
|
-
#
|
|
2258
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2259
|
-
#
|
|
2260
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2261
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2262
|
-
# bt.rowkey_as_string = true
|
|
2263
|
-
# bt.add_family "user" do |u|
|
|
2264
|
-
# u.add_integer "age"
|
|
2265
|
-
# end
|
|
2266
|
-
# end
|
|
2267
|
-
#
|
|
2268
|
-
def add_integer qualifier, as: nil
|
|
2269
|
-
col = add_column qualifier, as: as, type: "INTEGER"
|
|
2270
|
-
yield col if block_given?
|
|
2271
|
-
col
|
|
2272
|
-
end
|
|
2273
|
-
|
|
2274
|
-
##
|
|
2275
|
-
# Add a column to the column family to expose in the table schema
|
|
2276
|
-
# that is specified as the `FLOAT` type.
|
|
2277
|
-
#
|
|
2278
|
-
# @param [String] qualifier Qualifier of the column. See
|
|
2279
|
-
# {BigtableSource::Column#qualifier}.
|
|
2280
|
-
# @param [String] as A valid identifier to be used as the column
|
|
2281
|
-
# field name if the qualifier is not a valid BigQuery field
|
|
2282
|
-
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
|
2283
|
-
# {BigtableSource::Column#field_name}.
|
|
2284
|
-
#
|
|
2285
|
-
# @yield [column] a block for setting the column
|
|
2286
|
-
# @yieldparam [BigtableSource::Column] column the column object
|
|
2287
|
-
#
|
|
2288
|
-
# @return [Array<BigtableSource::Column>]
|
|
2289
|
-
#
|
|
2290
|
-
# @example
|
|
2291
|
-
# require "google/cloud/bigquery"
|
|
2292
|
-
#
|
|
2293
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2294
|
-
#
|
|
2295
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2296
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2297
|
-
# bt.rowkey_as_string = true
|
|
2298
|
-
# bt.add_family "user" do |u|
|
|
2299
|
-
# u.add_float "score"
|
|
2300
|
-
# end
|
|
2301
|
-
# end
|
|
2302
|
-
#
|
|
2303
|
-
def add_float qualifier, as: nil
|
|
2304
|
-
col = add_column qualifier, as: as, type: "FLOAT"
|
|
2305
|
-
yield col if block_given?
|
|
2306
|
-
col
|
|
2307
|
-
end
|
|
2308
|
-
|
|
2309
|
-
##
|
|
2310
|
-
# Add a column to the column family to expose in the table schema
|
|
2311
|
-
# that is specified as the `BOOLEAN` type.
|
|
2312
|
-
#
|
|
2313
|
-
# @param [String] qualifier Qualifier of the column. See
|
|
2314
|
-
# {BigtableSource::Column#qualifier}.
|
|
2315
|
-
# @param [String] as A valid identifier to be used as the column
|
|
2316
|
-
# field name if the qualifier is not a valid BigQuery field
|
|
2317
|
-
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
|
2318
|
-
# {BigtableSource::Column#field_name}.
|
|
2319
|
-
#
|
|
2320
|
-
# @yield [column] a block for setting the column
|
|
2321
|
-
# @yieldparam [BigtableSource::Column] column the column object
|
|
2322
|
-
#
|
|
2323
|
-
# @return [Array<BigtableSource::Column>]
|
|
2324
|
-
#
|
|
2325
|
-
# @example
|
|
2326
|
-
# require "google/cloud/bigquery"
|
|
2327
|
-
#
|
|
2328
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2329
|
-
#
|
|
2330
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2331
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2332
|
-
# bt.rowkey_as_string = true
|
|
2333
|
-
# bt.add_family "user" do |u|
|
|
2334
|
-
# u.add_boolean "active"
|
|
2335
|
-
# end
|
|
2336
|
-
# end
|
|
2337
|
-
#
|
|
2338
|
-
def add_boolean qualifier, as: nil
|
|
2339
|
-
col = add_column qualifier, as: as, type: "BOOLEAN"
|
|
2340
|
-
yield col if block_given?
|
|
2341
|
-
col
|
|
2342
|
-
end
|
|
2343
|
-
|
|
2344
|
-
##
|
|
2345
|
-
# @private Google API Client object.
|
|
2346
|
-
def to_gapi
|
|
2347
|
-
@gapi.columns = @columns.map(&:to_gapi)
|
|
2348
|
-
@gapi
|
|
2349
|
-
end
|
|
2350
|
-
|
|
2351
|
-
##
|
|
2352
|
-
# @private Google API Client object.
|
|
2353
|
-
def self.from_gapi gapi
|
|
2354
|
-
new_fam = new
|
|
2355
|
-
new_fam.instance_variable_set :@gapi, gapi
|
|
2356
|
-
columns = Array(gapi.columns).map { |col_gapi| BigtableSource::Column.from_gapi col_gapi }
|
|
2357
|
-
new_fam.instance_variable_set :@columns, columns
|
|
2358
|
-
new_fam
|
|
2359
|
-
end
|
|
2360
|
-
|
|
2361
|
-
##
|
|
2362
|
-
# @private
|
|
2363
|
-
def freeze
|
|
2364
|
-
@columns.map(&:freeze!)
|
|
2365
|
-
@columns.freeze!
|
|
2366
|
-
super
|
|
2367
|
-
end
|
|
2368
|
-
|
|
2369
|
-
protected
|
|
2370
|
-
|
|
2371
|
-
def frozen_check!
|
|
2372
|
-
return unless frozen?
|
|
2373
|
-
raise ArgumentError, "Cannot modify external data source when frozen"
|
|
2374
|
-
end
|
|
2375
|
-
end
|
|
2376
|
-
|
|
2377
|
-
##
|
|
2378
|
-
# # BigtableSource::Column
|
|
2379
|
-
#
|
|
2380
|
-
# A Bigtable column to expose in the table schema along with its
|
|
2381
|
-
# types.
|
|
2382
|
-
#
|
|
2383
|
-
# @example
|
|
2384
|
-
# require "google/cloud/bigquery"
|
|
2385
|
-
#
|
|
2386
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2387
|
-
#
|
|
2388
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2389
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2390
|
-
# bt.rowkey_as_string = true
|
|
2391
|
-
# bt.add_family "user" do |u|
|
|
2392
|
-
# u.add_string "name"
|
|
2393
|
-
# u.add_string "email"
|
|
2394
|
-
# u.add_integer "age"
|
|
2395
|
-
# u.add_boolean "active"
|
|
2396
|
-
# end
|
|
2397
|
-
# end
|
|
2398
|
-
#
|
|
2399
|
-
# data = bigquery.query "SELECT * FROM my_ext_table",
|
|
2400
|
-
# external: { my_ext_table: bigtable_table }
|
|
2401
|
-
#
|
|
2402
|
-
# # Iterate over the first page of results
|
|
2403
|
-
# data.each do |row|
|
|
2404
|
-
# puts row[:name]
|
|
2405
|
-
# end
|
|
2406
|
-
# # Retrieve the next page of results
|
|
2407
|
-
# data = data.next if data.next?
|
|
2408
|
-
#
|
|
2409
|
-
class Column
|
|
2410
|
-
##
|
|
2411
|
-
# @private Create an empty BigtableSource::Column object.
|
|
2412
|
-
def initialize
|
|
2413
|
-
@gapi = Google::Apis::BigqueryV2::BigtableColumn.new
|
|
2414
|
-
end
|
|
2415
|
-
|
|
2416
|
-
##
|
|
2417
|
-
# Qualifier of the column. Columns in the parent column family that
|
|
2418
|
-
# has this exact qualifier are exposed as `.` field. If the
|
|
2419
|
-
# qualifier is valid UTF-8 string, it will be represented as a UTF-8
|
|
2420
|
-
# string. Otherwise, it will represented as a ASCII-8BIT string.
|
|
2421
|
-
#
|
|
2422
|
-
# If the qualifier is not a valid BigQuery field identifier (does
|
|
2423
|
-
# not match `[a-zA-Z][a-zA-Z0-9_]*`) a valid identifier must be
|
|
2424
|
-
# provided as `field_name`.
|
|
2425
|
-
#
|
|
2426
|
-
# @return [String]
|
|
2427
|
-
#
|
|
2428
|
-
# @example
|
|
2429
|
-
# require "google/cloud/bigquery"
|
|
2430
|
-
#
|
|
2431
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2432
|
-
#
|
|
2433
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2434
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2435
|
-
# bt.add_family "user" do |u|
|
|
2436
|
-
# u.add_string "name" do |col|
|
|
2437
|
-
# col.qualifier # "user"
|
|
2438
|
-
# col.qualifier = "User"
|
|
2439
|
-
# col.qualifier # "User"
|
|
2440
|
-
# end
|
|
2441
|
-
# end
|
|
2442
|
-
# end
|
|
2443
|
-
#
|
|
2444
|
-
def qualifier
|
|
2445
|
-
@gapi.qualifier_string || Base64.strict_decode64(@gapi.qualifier_encoded.to_s)
|
|
2446
|
-
end
|
|
2447
|
-
|
|
2448
|
-
##
|
|
2449
|
-
# Set the qualifier of the column. Columns in the parent column
|
|
2450
|
-
# family that has this exact qualifier are exposed as `.` field.
|
|
2451
|
-
# Values that are valid UTF-8 strings will be treated as such. All
|
|
2452
|
-
# other values will be treated as `BINARY`.
|
|
2453
|
-
#
|
|
2454
|
-
# @param [String] new_qualifier New qualifier value
|
|
2455
|
-
#
|
|
2456
|
-
# @example
|
|
2457
|
-
# require "google/cloud/bigquery"
|
|
2458
|
-
#
|
|
2459
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2460
|
-
#
|
|
2461
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2462
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2463
|
-
# bt.add_family "user" do |u|
|
|
2464
|
-
# u.add_string "name" do |col|
|
|
2465
|
-
# col.qualifier # "user"
|
|
2466
|
-
# col.qualifier = "User"
|
|
2467
|
-
# col.qualifier # "User"
|
|
2468
|
-
# end
|
|
2469
|
-
# end
|
|
2470
|
-
# end
|
|
2471
|
-
#
|
|
2472
|
-
def qualifier= new_qualifier
|
|
2473
|
-
frozen_check!
|
|
2474
|
-
raise ArgumentError if new_qualifier.nil?
|
|
2475
|
-
|
|
2476
|
-
utf8_qualifier = new_qualifier.encode Encoding::UTF_8
|
|
2477
|
-
if utf8_qualifier.valid_encoding?
|
|
2478
|
-
@gapi.qualifier_string = utf8_qualifier
|
|
2479
|
-
if @gapi.instance_variables.include? :@qualifier_encoded
|
|
2480
|
-
@gapi.remove_instance_variable :@qualifier_encoded
|
|
2481
|
-
end
|
|
2482
|
-
else
|
|
2483
|
-
@gapi.qualifier_encoded = Base64.strict_encode64 new_qualifier
|
|
2484
|
-
if @gapi.instance_variables.include? :@qualifier_string
|
|
2485
|
-
@gapi.remove_instance_variable :@qualifier_string
|
|
2486
|
-
end
|
|
2487
|
-
end
|
|
2488
|
-
rescue EncodingError
|
|
2489
|
-
@gapi.qualifier_encoded = Base64.strict_encode64 new_qualifier
|
|
2490
|
-
@gapi.remove_instance_variable :@qualifier_string if @gapi.instance_variables.include? :@qualifier_string
|
|
2491
|
-
end
|
|
2492
|
-
|
|
2493
|
-
##
|
|
2494
|
-
# The encoding of the values when the type is not `STRING`.
|
|
2495
|
-
#
|
|
2496
|
-
# @return [String]
|
|
2497
|
-
#
|
|
2498
|
-
# @example
|
|
2499
|
-
# require "google/cloud/bigquery"
|
|
2500
|
-
#
|
|
2501
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2502
|
-
#
|
|
2503
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2504
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2505
|
-
# bt.add_family "user" do |u|
|
|
2506
|
-
# u.add_bytes "name" do |col|
|
|
2507
|
-
# col.encoding = "TEXT"
|
|
2508
|
-
# col.encoding # "TEXT"
|
|
2509
|
-
# end
|
|
2510
|
-
# end
|
|
2511
|
-
# end
|
|
2512
|
-
#
|
|
2513
|
-
def encoding
|
|
2514
|
-
@gapi.encoding
|
|
2515
|
-
end
|
|
2516
|
-
|
|
2517
|
-
##
|
|
2518
|
-
# Set the encoding of the values when the type is not `STRING`.
|
|
2519
|
-
# Acceptable encoding values are:
|
|
2520
|
-
#
|
|
2521
|
-
# * `TEXT` - indicates values are alphanumeric text strings.
|
|
2522
|
-
# * `BINARY` - indicates values are encoded using HBase
|
|
2523
|
-
# `Bytes.toBytes` family of functions. This can be overridden on a
|
|
2524
|
-
# column.
|
|
2525
|
-
#
|
|
2526
|
-
# @param [String] new_encoding New encoding value
|
|
2527
|
-
#
|
|
2528
|
-
# @example
|
|
2529
|
-
# require "google/cloud/bigquery"
|
|
2530
|
-
#
|
|
2531
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2532
|
-
#
|
|
2533
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2534
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2535
|
-
# bt.add_family "user" do |u|
|
|
2536
|
-
# u.add_bytes "name" do |col|
|
|
2537
|
-
# col.encoding = "TEXT"
|
|
2538
|
-
# col.encoding # "TEXT"
|
|
2539
|
-
# end
|
|
2540
|
-
# end
|
|
2541
|
-
# end
|
|
2542
|
-
#
|
|
2543
|
-
def encoding= new_encoding
|
|
2544
|
-
frozen_check!
|
|
2545
|
-
@gapi.encoding = new_encoding
|
|
2546
|
-
end
|
|
2547
|
-
|
|
2548
|
-
##
|
|
2549
|
-
# If the qualifier is not a valid BigQuery field identifier (does
|
|
2550
|
-
# not match `[a-zA-Z][a-zA-Z0-9_]*`) a valid identifier must be
|
|
2551
|
-
# provided as the column field name and is used as field name in
|
|
2552
|
-
# queries.
|
|
2553
|
-
#
|
|
2554
|
-
# @return [String]
|
|
2555
|
-
#
|
|
2556
|
-
# @example
|
|
2557
|
-
# require "google/cloud/bigquery"
|
|
2558
|
-
#
|
|
2559
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2560
|
-
#
|
|
2561
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2562
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2563
|
-
# bt.add_family "user" do |u|
|
|
2564
|
-
# u.add_string "001_name", as: "user" do |col|
|
|
2565
|
-
# col.field_name # "user"
|
|
2566
|
-
# col.field_name = "User"
|
|
2567
|
-
# col.field_name # "User"
|
|
2568
|
-
# end
|
|
2569
|
-
# end
|
|
2570
|
-
# end
|
|
2571
|
-
#
|
|
2572
|
-
def field_name
|
|
2573
|
-
@gapi.field_name
|
|
2574
|
-
end
|
|
2575
|
-
|
|
2576
|
-
##
|
|
2577
|
-
# Sets the identifier to be used as the column field name in queries
|
|
2578
|
-
# when the qualifier is not a valid BigQuery field identifier (does
|
|
2579
|
-
# not match `[a-zA-Z][a-zA-Z0-9_]*`).
|
|
2580
|
-
#
|
|
2581
|
-
# @param [String] new_field_name New field_name value
|
|
2582
|
-
#
|
|
2583
|
-
# @example
|
|
2584
|
-
# require "google/cloud/bigquery"
|
|
2585
|
-
#
|
|
2586
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2587
|
-
#
|
|
2588
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2589
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2590
|
-
# bt.add_family "user" do |u|
|
|
2591
|
-
# u.add_string "001_name", as: "user" do |col|
|
|
2592
|
-
# col.field_name # "user"
|
|
2593
|
-
# col.field_name = "User"
|
|
2594
|
-
# col.field_name # "User"
|
|
2595
|
-
# end
|
|
2596
|
-
# end
|
|
2597
|
-
# end
|
|
2598
|
-
#
|
|
2599
|
-
def field_name= new_field_name
|
|
2600
|
-
frozen_check!
|
|
2601
|
-
@gapi.field_name = new_field_name
|
|
2602
|
-
end
|
|
2603
|
-
|
|
2604
|
-
##
|
|
2605
|
-
# Whether only the latest version of value in this column are
|
|
2606
|
-
# exposed. Can also be set at the column family level. However, this
|
|
2607
|
-
# value takes precedence when set at both levels.
|
|
2608
|
-
#
|
|
2609
|
-
# @return [Boolean]
|
|
2610
|
-
#
|
|
2611
|
-
# @example
|
|
2612
|
-
# require "google/cloud/bigquery"
|
|
2613
|
-
#
|
|
2614
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2615
|
-
#
|
|
2616
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2617
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2618
|
-
# bt.add_family "user" do |u|
|
|
2619
|
-
# u.add_string "name" do |col|
|
|
2620
|
-
# col.latest = true
|
|
2621
|
-
# col.latest # true
|
|
2622
|
-
# end
|
|
2623
|
-
# end
|
|
2624
|
-
# end
|
|
2625
|
-
#
|
|
2626
|
-
def latest
|
|
2627
|
-
@gapi.only_read_latest
|
|
2628
|
-
end
|
|
2629
|
-
|
|
2630
|
-
##
|
|
2631
|
-
# Set whether only the latest version of value in this column are
|
|
2632
|
-
# exposed. Can also be set at the column family level. However, this
|
|
2633
|
-
# value takes precedence when set at both levels.
|
|
2634
|
-
#
|
|
2635
|
-
# @param [Boolean] new_latest New latest value
|
|
2636
|
-
#
|
|
2637
|
-
# @example
|
|
2638
|
-
# require "google/cloud/bigquery"
|
|
2639
|
-
#
|
|
2640
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2641
|
-
#
|
|
2642
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2643
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2644
|
-
# bt.add_family "user" do |u|
|
|
2645
|
-
# u.add_string "name" do |col|
|
|
2646
|
-
# col.latest = true
|
|
2647
|
-
# col.latest # true
|
|
2648
|
-
# end
|
|
2649
|
-
# end
|
|
2650
|
-
# end
|
|
2651
|
-
#
|
|
2652
|
-
def latest= new_latest
|
|
2653
|
-
frozen_check!
|
|
2654
|
-
@gapi.only_read_latest = new_latest
|
|
2655
|
-
end
|
|
2656
|
-
|
|
2657
|
-
##
|
|
2658
|
-
# The type to convert the value in cells of this column. The values
|
|
2659
|
-
# are expected to be encoded using HBase `Bytes.toBytes` function
|
|
2660
|
-
# when using the `BINARY` encoding value. The following BigQuery
|
|
2661
|
-
# types are allowed:
|
|
2662
|
-
#
|
|
2663
|
-
# * `BYTES`
|
|
2664
|
-
# * `STRING`
|
|
2665
|
-
# * `INTEGER`
|
|
2666
|
-
# * `FLOAT`
|
|
2667
|
-
# * `BOOLEAN`
|
|
2668
|
-
#
|
|
2669
|
-
# Default type is `BYTES`. Can also be set at the column family
|
|
2670
|
-
# level. However, this value takes precedence when set at both
|
|
2671
|
-
# levels.
|
|
2672
|
-
#
|
|
2673
|
-
# @return [String]
|
|
2674
|
-
#
|
|
2675
|
-
# @example
|
|
2676
|
-
# require "google/cloud/bigquery"
|
|
2677
|
-
#
|
|
2678
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2679
|
-
#
|
|
2680
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2681
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2682
|
-
# bt.add_family "user" do |u|
|
|
2683
|
-
# u.add_string "name" do |col|
|
|
2684
|
-
# col.type # "STRING"
|
|
2685
|
-
# end
|
|
2686
|
-
# end
|
|
2687
|
-
# end
|
|
2688
|
-
#
|
|
2689
|
-
def type
|
|
2690
|
-
@gapi.type
|
|
2691
|
-
end
|
|
2692
|
-
|
|
2693
|
-
##
|
|
2694
|
-
# Set the type to convert the value in cells of this column. The
|
|
2695
|
-
# values are expected to be encoded using HBase `Bytes.toBytes`
|
|
2696
|
-
# function when using the `BINARY` encoding value. The following
|
|
2697
|
-
# BigQuery types are allowed:
|
|
2698
|
-
#
|
|
2699
|
-
# * `BYTES`
|
|
2700
|
-
# * `STRING`
|
|
2701
|
-
# * `INTEGER`
|
|
2702
|
-
# * `FLOAT`
|
|
2703
|
-
# * `BOOLEAN`
|
|
2704
|
-
#
|
|
2705
|
-
# Default type is `BYTES`. Can also be set at the column family
|
|
2706
|
-
# level. However, this value takes precedence when set at both
|
|
2707
|
-
# levels.
|
|
2708
|
-
#
|
|
2709
|
-
# @param [String] new_type New type value
|
|
2710
|
-
#
|
|
2711
|
-
# @example
|
|
2712
|
-
# require "google/cloud/bigquery"
|
|
2713
|
-
#
|
|
2714
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2715
|
-
#
|
|
2716
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2717
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2718
|
-
# bt.add_family "user" do |u|
|
|
2719
|
-
# u.add_string "name" do |col|
|
|
2720
|
-
# col.type # "STRING"
|
|
2721
|
-
# col.type = "BYTES"
|
|
2722
|
-
# col.type # "BYTES"
|
|
2723
|
-
# end
|
|
2724
|
-
# end
|
|
2725
|
-
# end
|
|
2726
|
-
#
|
|
2727
|
-
def type= new_type
|
|
2728
|
-
frozen_check!
|
|
2729
|
-
@gapi.type = new_type
|
|
2730
|
-
end
|
|
2731
|
-
|
|
2732
|
-
##
|
|
2733
|
-
# @private Google API Client object.
|
|
2734
|
-
def to_gapi
|
|
2735
|
-
@gapi
|
|
2736
|
-
end
|
|
2737
|
-
|
|
2738
|
-
##
|
|
2739
|
-
# @private Google API Client object.
|
|
2740
|
-
def self.from_gapi gapi
|
|
2741
|
-
new_col = new
|
|
2742
|
-
new_col.instance_variable_set :@gapi, gapi
|
|
2743
|
-
new_col
|
|
2744
|
-
end
|
|
2745
|
-
|
|
2746
|
-
protected
|
|
2747
|
-
|
|
2748
|
-
def frozen_check!
|
|
2749
|
-
return unless frozen?
|
|
2750
|
-
raise ArgumentError, "Cannot modify external data source when frozen"
|
|
2751
|
-
end
|
|
2752
|
-
end
|
|
2753
|
-
end
|
|
2754
144
|
end
|
|
2755
145
|
end
|
|
2756
146
|
end
|