google-cloud-bigquery 0.28.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/google-cloud-bigquery.rb +2 -2
- data/lib/google/cloud/bigquery.rb +10 -12
- data/lib/google/cloud/bigquery/copy_job.rb +42 -6
- data/lib/google/cloud/bigquery/data.rb +129 -23
- data/lib/google/cloud/bigquery/dataset.rb +708 -66
- data/lib/google/cloud/bigquery/dataset/access.rb +533 -27
- data/lib/google/cloud/bigquery/dataset/list.rb +5 -3
- data/lib/google/cloud/bigquery/external.rb +2353 -0
- data/lib/google/cloud/bigquery/extract_job.rb +52 -11
- data/lib/google/cloud/bigquery/insert_response.rb +90 -2
- data/lib/google/cloud/bigquery/job.rb +160 -21
- data/lib/google/cloud/bigquery/load_job.rb +128 -11
- data/lib/google/cloud/bigquery/project.rb +187 -44
- data/lib/google/cloud/bigquery/query_job.rb +323 -13
- data/lib/google/cloud/bigquery/schema.rb +57 -1
- data/lib/google/cloud/bigquery/schema/field.rb +118 -17
- data/lib/google/cloud/bigquery/service.rb +196 -43
- data/lib/google/cloud/bigquery/table.rb +739 -49
- data/lib/google/cloud/bigquery/table/async_inserter.rb +280 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery/view.rb +306 -69
- metadata +18 -3
- data/lib/google/cloud/bigquery/query_data.rb +0 -234
@@ -71,9 +71,9 @@ module Google
|
|
71
71
|
def next
|
72
72
|
return nil unless next?
|
73
73
|
ensure_service!
|
74
|
-
options = { all: @hidden, token: token, max: @max }
|
74
|
+
options = { all: @hidden, filter: @filter, token: token, max: @max }
|
75
75
|
gapi = @service.list_datasets options
|
76
|
-
self.class.from_gapi gapi, @service, @hidden, @max
|
76
|
+
self.class.from_gapi gapi, @service, @hidden, @filter, @max
|
77
77
|
end
|
78
78
|
|
79
79
|
##
|
@@ -140,7 +140,8 @@ module Google
|
|
140
140
|
|
141
141
|
##
|
142
142
|
# @private New Dataset::List from a response object.
|
143
|
-
def self.from_gapi gapi_list, service, hidden = nil,
|
143
|
+
def self.from_gapi gapi_list, service, hidden = nil, filter = nil,
|
144
|
+
max = nil
|
144
145
|
datasets = List.new(Array(gapi_list.datasets).map do |gapi_object|
|
145
146
|
Dataset.from_gapi gapi_object, service
|
146
147
|
end)
|
@@ -148,6 +149,7 @@ module Google
|
|
148
149
|
datasets.instance_variable_set :@etag, gapi_list.etag
|
149
150
|
datasets.instance_variable_set :@service, service
|
150
151
|
datasets.instance_variable_set :@hidden, hidden
|
152
|
+
datasets.instance_variable_set :@filter, filter
|
151
153
|
datasets.instance_variable_set :@max, max
|
152
154
|
datasets
|
153
155
|
end
|
@@ -0,0 +1,2353 @@
|
|
1
|
+
# Copyright 2017 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/apis/bigquery_v2"
|
17
|
+
require "base64"
|
18
|
+
|
19
|
+
module Google
|
20
|
+
module Cloud
|
21
|
+
module Bigquery
|
22
|
+
##
|
23
|
+
# # External
|
24
|
+
#
|
25
|
+
# Creates a new {External::DataSource} (or subclass) object that
|
26
|
+
# represents the external data source that can be queried from directly,
|
27
|
+
# even though the data is not stored in BigQuery. Instead of loading or
|
28
|
+
# streaming the data, this object references the external data source.
|
29
|
+
#
|
30
|
+
# See {External::DataSource}, {External::CsvSource},
|
31
|
+
# {External::JsonSource}, {External::SheetsSource},
|
32
|
+
# {External::BigtableSource}
|
33
|
+
#
|
34
|
+
# @example
|
35
|
+
# require "google/cloud/bigquery"
|
36
|
+
#
|
37
|
+
# bigquery = Google::Cloud::Bigquery.new
|
38
|
+
#
|
39
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
40
|
+
# csv_table = bigquery.external csv_url do |csv|
|
41
|
+
# csv.autodetect = true
|
42
|
+
# csv.skip_leading_rows = 1
|
43
|
+
# end
|
44
|
+
#
|
45
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
46
|
+
# external: { my_ext_table: csv_table }
|
47
|
+
#
|
48
|
+
# data.each do |row|
|
49
|
+
# puts row[:name]
|
50
|
+
# end
|
51
|
+
#
|
52
|
+
module External
|
53
|
+
##
|
54
|
+
# @private New External from URLs and format
|
55
|
+
def self.from_urls urls, format = nil
|
56
|
+
external_format = source_format_for urls, format
|
57
|
+
if external_format.nil?
|
58
|
+
fail ArgumentError, "Unable to determine external table format"
|
59
|
+
end
|
60
|
+
external_class = table_class_for external_format
|
61
|
+
external_class.new.tap do |e|
|
62
|
+
e.gapi.source_uris = Array(urls)
|
63
|
+
e.gapi.source_format = external_format
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
##
|
68
|
+
# @private Google API Client object.
|
69
|
+
def self.from_gapi gapi
|
70
|
+
external_format = source_format_for gapi.source_uris,
|
71
|
+
gapi.source_format
|
72
|
+
if external_format.nil?
|
73
|
+
fail ArgumentError, "Unable to determine external table format"
|
74
|
+
end
|
75
|
+
external_class = table_class_for external_format
|
76
|
+
external_class.from_gapi gapi
|
77
|
+
end
|
78
|
+
|
79
|
+
##
|
80
|
+
# @private Determine source_format from inputs
|
81
|
+
def self.source_format_for urls, format
|
82
|
+
val = { "csv" => "CSV",
|
83
|
+
"json" => "NEWLINE_DELIMITED_JSON",
|
84
|
+
"newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
|
85
|
+
"sheets" => "GOOGLE_SHEETS",
|
86
|
+
"google_sheets" => "GOOGLE_SHEETS",
|
87
|
+
"avro" => "AVRO",
|
88
|
+
"datastore" => "DATASTORE_BACKUP",
|
89
|
+
"backup" => "DATASTORE_BACKUP",
|
90
|
+
"datastore_backup" => "DATASTORE_BACKUP",
|
91
|
+
"bigtable" => "BIGTABLE"
|
92
|
+
}[format.to_s.downcase]
|
93
|
+
return val unless val.nil?
|
94
|
+
Array(urls).each do |url|
|
95
|
+
return "CSV" if url.end_with? ".csv"
|
96
|
+
return "NEWLINE_DELIMITED_JSON" if url.end_with? ".json"
|
97
|
+
return "AVRO" if url.end_with? ".avro"
|
98
|
+
return "DATASTORE_BACKUP" if url.end_with? ".backup_info"
|
99
|
+
if url.start_with? "https://docs.google.com/spreadsheets/"
|
100
|
+
return "GOOGLE_SHEETS"
|
101
|
+
end
|
102
|
+
if url.start_with? "https://googleapis.com/bigtable/projects/"
|
103
|
+
return "BIGTABLE"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
nil
|
107
|
+
end
|
108
|
+
|
109
|
+
##
|
110
|
+
# @private Determine table class from source_format
|
111
|
+
def self.table_class_for format
|
112
|
+
case format
|
113
|
+
when "CSV" then External::CsvSource
|
114
|
+
when "NEWLINE_DELIMITED_JSON" then External::JsonSource
|
115
|
+
when "GOOGLE_SHEETS" then External::SheetsSource
|
116
|
+
when "BIGTABLE" then External::BigtableSource
|
117
|
+
else
|
118
|
+
# AVRO and DATASTORE_BACKUP
|
119
|
+
External::DataSource
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
##
|
124
|
+
# # DataSource
|
125
|
+
#
|
126
|
+
# External::DataSource and its subclasses represents an external data
|
127
|
+
# source that can be queried from directly, even though the data is not
|
128
|
+
# stored in BigQuery. Instead of loading or streaming the data, this
|
129
|
+
# object references the external data source.
|
130
|
+
#
|
131
|
+
# The AVRO and Datastore Backup formats use {External::DataSource}. See
|
132
|
+
# {External::CsvSource}, {External::JsonSource},
|
133
|
+
# {External::SheetsSource}, {External::BigtableSource} for the other
|
134
|
+
# formats.
|
135
|
+
#
|
136
|
+
# @example
|
137
|
+
# require "google/cloud/bigquery"
|
138
|
+
#
|
139
|
+
# bigquery = Google::Cloud::Bigquery.new
|
140
|
+
#
|
141
|
+
# avro_url = "gs://bucket/path/to/data.avro"
|
142
|
+
# avro_table = bigquery.external avro_url do |avro|
|
143
|
+
# avro.autodetect = true
|
144
|
+
# end
|
145
|
+
#
|
146
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
147
|
+
# external: { my_ext_table: avro_table }
|
148
|
+
#
|
149
|
+
# data.each do |row|
|
150
|
+
# puts row[:name]
|
151
|
+
# end
|
152
|
+
#
|
153
|
+
class DataSource
|
154
|
+
##
|
155
|
+
# @private The Google API Client object.
|
156
|
+
attr_accessor :gapi
|
157
|
+
|
158
|
+
##
|
159
|
+
# @private Create an empty Table object.
|
160
|
+
def initialize
|
161
|
+
@gapi = Google::Apis::BigqueryV2::ExternalDataConfiguration.new
|
162
|
+
end
|
163
|
+
|
164
|
+
##
|
165
|
+
# The data format. For CSV files, specify "CSV". For Google sheets,
|
166
|
+
# specify "GOOGLE_SHEETS". For newline-delimited JSON, specify
|
167
|
+
# "NEWLINE_DELIMITED_JSON". For Avro files, specify "AVRO". For Google
|
168
|
+
# Cloud Datastore backups, specify "DATASTORE_BACKUP". [Beta] For
|
169
|
+
# Google Cloud Bigtable, specify "BIGTABLE".
|
170
|
+
#
|
171
|
+
# @return [String]
|
172
|
+
#
|
173
|
+
# @example
|
174
|
+
# require "google/cloud/bigquery"
|
175
|
+
#
|
176
|
+
# bigquery = Google::Cloud::Bigquery.new
|
177
|
+
#
|
178
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
179
|
+
# csv_table = bigquery.external csv_url
|
180
|
+
#
|
181
|
+
# csv_table.format #=> "CSV"
|
182
|
+
#
|
183
|
+
def format
|
184
|
+
@gapi.source_format
|
185
|
+
end
|
186
|
+
|
187
|
+
##
|
188
|
+
# Whether the data format is "CSV".
|
189
|
+
#
|
190
|
+
# @return [Boolean]
|
191
|
+
#
|
192
|
+
# @example
|
193
|
+
# require "google/cloud/bigquery"
|
194
|
+
#
|
195
|
+
# bigquery = Google::Cloud::Bigquery.new
|
196
|
+
#
|
197
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
198
|
+
# csv_table = bigquery.external csv_url
|
199
|
+
#
|
200
|
+
# csv_table.format #=> "CSV"
|
201
|
+
# csv_table.csv? #=> true
|
202
|
+
#
|
203
|
+
def csv?
|
204
|
+
@gapi.source_format == "CSV"
|
205
|
+
end
|
206
|
+
|
207
|
+
##
|
208
|
+
# Whether the data format is "NEWLINE_DELIMITED_JSON".
|
209
|
+
#
|
210
|
+
# @return [Boolean]
|
211
|
+
#
|
212
|
+
# @example
|
213
|
+
# require "google/cloud/bigquery"
|
214
|
+
#
|
215
|
+
# bigquery = Google::Cloud::Bigquery.new
|
216
|
+
#
|
217
|
+
# json_url = "gs://bucket/path/to/data.json"
|
218
|
+
# json_table = bigquery.external json_url
|
219
|
+
#
|
220
|
+
# json_table.format #=> "NEWLINE_DELIMITED_JSON"
|
221
|
+
# json_table.json? #=> true
|
222
|
+
#
|
223
|
+
def json?
|
224
|
+
@gapi.source_format == "NEWLINE_DELIMITED_JSON"
|
225
|
+
end
|
226
|
+
|
227
|
+
##
|
228
|
+
# Whether the data format is "GOOGLE_SHEETS".
|
229
|
+
#
|
230
|
+
# @return [Boolean]
|
231
|
+
#
|
232
|
+
# @example
|
233
|
+
# require "google/cloud/bigquery"
|
234
|
+
#
|
235
|
+
# bigquery = Google::Cloud::Bigquery.new
|
236
|
+
#
|
237
|
+
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
238
|
+
# sheets_table = bigquery.external sheets_url
|
239
|
+
#
|
240
|
+
# sheets_table.format #=> "GOOGLE_SHEETS"
|
241
|
+
# sheets_table.sheets? #=> true
|
242
|
+
#
|
243
|
+
def sheets?
|
244
|
+
@gapi.source_format == "GOOGLE_SHEETS"
|
245
|
+
end
|
246
|
+
|
247
|
+
##
|
248
|
+
# Whether the data format is "AVRO".
|
249
|
+
#
|
250
|
+
# @return [Boolean]
|
251
|
+
#
|
252
|
+
# @example
|
253
|
+
# require "google/cloud/bigquery"
|
254
|
+
#
|
255
|
+
# bigquery = Google::Cloud::Bigquery.new
|
256
|
+
#
|
257
|
+
# avro_url = "gs://bucket/path/to/data.avro"
|
258
|
+
# avro_table = bigquery.external avro_url
|
259
|
+
#
|
260
|
+
# avro_table.format #=> "AVRO"
|
261
|
+
# avro_table.avro? #=> true
|
262
|
+
#
|
263
|
+
def avro?
|
264
|
+
@gapi.source_format == "AVRO"
|
265
|
+
end
|
266
|
+
|
267
|
+
##
|
268
|
+
# Whether the data format is "DATASTORE_BACKUP".
|
269
|
+
#
|
270
|
+
# @return [Boolean]
|
271
|
+
#
|
272
|
+
# @example
|
273
|
+
# require "google/cloud/bigquery"
|
274
|
+
#
|
275
|
+
# bigquery = Google::Cloud::Bigquery.new
|
276
|
+
#
|
277
|
+
# backup_url = "gs://bucket/path/to/data.backup_info"
|
278
|
+
# backup_table = bigquery.external backup_url
|
279
|
+
#
|
280
|
+
# backup_table.format #=> "DATASTORE_BACKUP"
|
281
|
+
# backup_table.backup? #=> true
|
282
|
+
#
|
283
|
+
def backup?
|
284
|
+
@gapi.source_format == "DATASTORE_BACKUP"
|
285
|
+
end
|
286
|
+
|
287
|
+
##
|
288
|
+
# Whether the data format is "BIGTABLE".
|
289
|
+
#
|
290
|
+
# @return [Boolean]
|
291
|
+
#
|
292
|
+
# @example
|
293
|
+
# require "google/cloud/bigquery"
|
294
|
+
#
|
295
|
+
# bigquery = Google::Cloud::Bigquery.new
|
296
|
+
#
|
297
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
298
|
+
# bigtable_table = bigquery.external bigtable_url
|
299
|
+
#
|
300
|
+
# bigtable_table.format #=> "BIGTABLE"
|
301
|
+
# bigtable_table.bigtable? #=> true
|
302
|
+
#
|
303
|
+
def bigtable?
|
304
|
+
@gapi.source_format == "BIGTABLE"
|
305
|
+
end
|
306
|
+
|
307
|
+
##
|
308
|
+
# The fully-qualified URIs that point to your data in Google Cloud.
|
309
|
+
# For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
|
310
|
+
# character and it must come after the 'bucket' name. Size limits
|
311
|
+
# related to load jobs apply to external data sources. For Google
|
312
|
+
# Cloud Bigtable URIs: Exactly one URI can be specified and it has be
|
313
|
+
# a fully specified and valid HTTPS URL for a Google Cloud Bigtable
|
314
|
+
# table. For Google Cloud Datastore backups, exactly one URI can be
|
315
|
+
# specified, and it must end with '.backup_info'. Also, the '*'
|
316
|
+
# wildcard character is not allowed.
|
317
|
+
#
|
318
|
+
# @return [Array<String>]
|
319
|
+
#
|
320
|
+
# @example
|
321
|
+
# require "google/cloud/bigquery"
|
322
|
+
#
|
323
|
+
# bigquery = Google::Cloud::Bigquery.new
|
324
|
+
#
|
325
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
326
|
+
# csv_table = bigquery.external csv_url
|
327
|
+
#
|
328
|
+
# csv_table.urls #=> ["gs://bucket/path/to/data.csv"]
|
329
|
+
#
|
330
|
+
def urls
|
331
|
+
@gapi.source_uris
|
332
|
+
end
|
333
|
+
|
334
|
+
##
|
335
|
+
# Indicates if the schema and format options are detected
|
336
|
+
# automatically.
|
337
|
+
#
|
338
|
+
# @return [Boolean]
|
339
|
+
#
|
340
|
+
# @example
|
341
|
+
# require "google/cloud/bigquery"
|
342
|
+
#
|
343
|
+
# bigquery = Google::Cloud::Bigquery.new
|
344
|
+
#
|
345
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
346
|
+
# csv_table = bigquery.external csv_url do |csv|
|
347
|
+
# csv.autodetect = true
|
348
|
+
# end
|
349
|
+
#
|
350
|
+
# csv_table.autodetect #=> true
|
351
|
+
#
|
352
|
+
def autodetect
|
353
|
+
@gapi.autodetect
|
354
|
+
end
|
355
|
+
|
356
|
+
##
|
357
|
+
# Set whether to detect schema and format options automatically. Any
|
358
|
+
# option specified explicitly will be honored.
|
359
|
+
#
|
360
|
+
# @param [Boolean] new_autodetect New autodetect value
|
361
|
+
#
|
362
|
+
# @example
|
363
|
+
# require "google/cloud/bigquery"
|
364
|
+
#
|
365
|
+
# bigquery = Google::Cloud::Bigquery.new
|
366
|
+
#
|
367
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
368
|
+
# csv_table = bigquery.external csv_url do |csv|
|
369
|
+
# csv.autodetect = true
|
370
|
+
# end
|
371
|
+
#
|
372
|
+
# csv_table.autodetect #=> true
|
373
|
+
#
|
374
|
+
def autodetect= new_autodetect
|
375
|
+
frozen_check!
|
376
|
+
@gapi.autodetect = new_autodetect
|
377
|
+
end
|
378
|
+
|
379
|
+
##
|
380
|
+
# The compression type of the data source. Possible values include
|
381
|
+
# `"GZIP"` and `nil`. The default value is `nil`. This setting is
|
382
|
+
# ignored for Google Cloud Bigtable, Google Cloud Datastore backups
|
383
|
+
# and Avro formats. Optional.
|
384
|
+
#
|
385
|
+
# @return [String]
|
386
|
+
#
|
387
|
+
# @example
|
388
|
+
# require "google/cloud/bigquery"
|
389
|
+
#
|
390
|
+
# bigquery = Google::Cloud::Bigquery.new
|
391
|
+
#
|
392
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
393
|
+
# csv_table = bigquery.external csv_url do |csv|
|
394
|
+
# csv.compression = "GZIP"
|
395
|
+
# end
|
396
|
+
#
|
397
|
+
# csv_table.compression #=> "GZIP"
|
398
|
+
def compression
|
399
|
+
@gapi.compression
|
400
|
+
end
|
401
|
+
|
402
|
+
##
|
403
|
+
# Set the compression type of the data source. Possible values include
|
404
|
+
# `"GZIP"` and `nil`. The default value is `nil`. This setting is
|
405
|
+
# ignored for Google Cloud Bigtable, Google Cloud Datastore backups
|
406
|
+
# and Avro formats. Optional.
|
407
|
+
#
|
408
|
+
# @param [String] new_compression New compression value
|
409
|
+
#
|
410
|
+
# @example
|
411
|
+
# require "google/cloud/bigquery"
|
412
|
+
#
|
413
|
+
# bigquery = Google::Cloud::Bigquery.new
|
414
|
+
#
|
415
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
416
|
+
# csv_table = bigquery.external csv_url do |csv|
|
417
|
+
# csv.compression = "GZIP"
|
418
|
+
# end
|
419
|
+
#
|
420
|
+
# csv_table.compression #=> "GZIP"
|
421
|
+
#
|
422
|
+
def compression= new_compression
|
423
|
+
frozen_check!
|
424
|
+
@gapi.compression = new_compression
|
425
|
+
end
|
426
|
+
|
427
|
+
##
|
428
|
+
# Indicates if BigQuery should allow extra values that are not
|
429
|
+
# represented in the table schema. If `true`, the extra values are
|
430
|
+
# ignored. If `false`, records with extra columns are treated as bad
|
431
|
+
# records, and if there are too many bad records, an invalid error is
|
432
|
+
# returned in the job result. The default value is `false`.
|
433
|
+
#
|
434
|
+
# BigQuery treats trailing columns as an extra in `CSV`, named values
|
435
|
+
# that don't match any column names in `JSON`. This setting is ignored
|
436
|
+
# for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
|
437
|
+
# formats. Optional.
|
438
|
+
#
|
439
|
+
# @return [Boolean]
|
440
|
+
#
|
441
|
+
# @example
|
442
|
+
# require "google/cloud/bigquery"
|
443
|
+
#
|
444
|
+
# bigquery = Google::Cloud::Bigquery.new
|
445
|
+
#
|
446
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
447
|
+
# csv_table = bigquery.external csv_url do |csv|
|
448
|
+
# csv.ignore_unknown = true
|
449
|
+
# end
|
450
|
+
#
|
451
|
+
# csv_table.ignore_unknown #=> true
|
452
|
+
#
|
453
|
+
def ignore_unknown
|
454
|
+
@gapi.ignore_unknown_values
|
455
|
+
end
|
456
|
+
|
457
|
+
##
|
458
|
+
# Set whether BigQuery should allow extra values that are not
|
459
|
+
# represented in the table schema. If `true`, the extra values are
|
460
|
+
# ignored. If `false`, records with extra columns are treated as bad
|
461
|
+
# records, and if there are too many bad records, an invalid error is
|
462
|
+
# returned in the job result. The default value is `false`.
|
463
|
+
#
|
464
|
+
# BigQuery treats trailing columns as an extra in `CSV`, named values
|
465
|
+
# that don't match any column names in `JSON`. This setting is ignored
|
466
|
+
# for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
|
467
|
+
# formats. Optional.
|
468
|
+
#
|
469
|
+
# @param [Boolean] new_ignore_unknown New ignore_unknown value
|
470
|
+
#
|
471
|
+
# @example
|
472
|
+
# require "google/cloud/bigquery"
|
473
|
+
#
|
474
|
+
# bigquery = Google::Cloud::Bigquery.new
|
475
|
+
#
|
476
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
477
|
+
# csv_table = bigquery.external csv_url do |csv|
|
478
|
+
# csv.ignore_unknown = true
|
479
|
+
# end
|
480
|
+
#
|
481
|
+
# csv_table.ignore_unknown #=> true
|
482
|
+
#
|
483
|
+
def ignore_unknown= new_ignore_unknown
|
484
|
+
frozen_check!
|
485
|
+
@gapi.ignore_unknown_values = new_ignore_unknown
|
486
|
+
end
|
487
|
+
|
488
|
+
##
|
489
|
+
# The maximum number of bad records that BigQuery can ignore when
|
490
|
+
# reading data. If the number of bad records exceeds this value, an
|
491
|
+
# invalid error is returned in the job result. The default value is 0,
|
492
|
+
# which requires that all records are valid. This setting is ignored
|
493
|
+
# for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
|
494
|
+
# formats.
|
495
|
+
#
|
496
|
+
# @return [Integer]
|
497
|
+
#
|
498
|
+
# @example
|
499
|
+
# require "google/cloud/bigquery"
|
500
|
+
#
|
501
|
+
# bigquery = Google::Cloud::Bigquery.new
|
502
|
+
#
|
503
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
504
|
+
# csv_table = bigquery.external csv_url do |csv|
|
505
|
+
# csv.max_bad_records = 10
|
506
|
+
# end
|
507
|
+
#
|
508
|
+
# csv_table.max_bad_records #=> 10
|
509
|
+
#
|
510
|
+
def max_bad_records
|
511
|
+
@gapi.max_bad_records
|
512
|
+
end
|
513
|
+
|
514
|
+
##
|
515
|
+
# Set the maximum number of bad records that BigQuery can ignore when
|
516
|
+
# reading data. If the number of bad records exceeds this value, an
|
517
|
+
# invalid error is returned in the job result. The default value is 0,
|
518
|
+
# which requires that all records are valid. This setting is ignored
|
519
|
+
# for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
|
520
|
+
# formats.
|
521
|
+
#
|
522
|
+
# @param [Integer] new_max_bad_records New max_bad_records value
|
523
|
+
#
|
524
|
+
# @example
|
525
|
+
# require "google/cloud/bigquery"
|
526
|
+
#
|
527
|
+
# bigquery = Google::Cloud::Bigquery.new
|
528
|
+
#
|
529
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
530
|
+
# csv_table = bigquery.external csv_url do |csv|
|
531
|
+
# csv.max_bad_records = 10
|
532
|
+
# end
|
533
|
+
#
|
534
|
+
# csv_table.max_bad_records #=> 10
|
535
|
+
#
|
536
|
+
def max_bad_records= new_max_bad_records
|
537
|
+
frozen_check!
|
538
|
+
@gapi.max_bad_records = new_max_bad_records
|
539
|
+
end
|
540
|
+
|
541
|
+
##
|
542
|
+
# @private Google API Client object.
|
543
|
+
def to_gapi
|
544
|
+
@gapi
|
545
|
+
end
|
546
|
+
|
547
|
+
##
|
548
|
+
# @private Google API Client object.
|
549
|
+
def self.from_gapi gapi
|
550
|
+
new_table = new
|
551
|
+
new_table.instance_variable_set :@gapi, gapi
|
552
|
+
new_table
|
553
|
+
end
|
554
|
+
|
555
|
+
protected
|
556
|
+
|
557
|
+
def frozen_check!
|
558
|
+
return unless frozen?
|
559
|
+
fail ArgumentError, "Cannot modify external data source when frozen"
|
560
|
+
end
|
561
|
+
end
|
562
|
+
|
563
|
+
##
|
564
|
+
# # CsvSource
|
565
|
+
#
|
566
|
+
# {External::CsvSource} is a subclass of {External::DataSource} and
|
567
|
+
# represents a CSV external data source that can be queried from
|
568
|
+
# directly, such as Google Cloud Storage or Google Drive, even though
|
569
|
+
# the data is not stored in BigQuery. Instead of loading or streaming
|
570
|
+
# the data, this object references the external data source.
|
571
|
+
#
|
572
|
+
# @example
|
573
|
+
# require "google/cloud/bigquery"
|
574
|
+
#
|
575
|
+
# bigquery = Google::Cloud::Bigquery.new
|
576
|
+
#
|
577
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
578
|
+
# csv_table = bigquery.external csv_url do |csv|
|
579
|
+
# csv.autodetect = true
|
580
|
+
# csv.skip_leading_rows = 1
|
581
|
+
# end
|
582
|
+
#
|
583
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
584
|
+
# external: { my_ext_table: csv_table }
|
585
|
+
#
|
586
|
+
# data.each do |row|
|
587
|
+
# puts row[:name]
|
588
|
+
# end
|
589
|
+
#
|
590
|
+
class CsvSource < External::DataSource
|
591
|
+
##
|
592
|
+
# @private Create an empty CsvSource object.
|
593
|
+
def initialize
|
594
|
+
super
|
595
|
+
@gapi.csv_options = Google::Apis::BigqueryV2::CsvOptions.new
|
596
|
+
end
|
597
|
+
|
598
|
+
##
|
599
|
+
# Indicates if BigQuery should accept rows that are missing trailing
|
600
|
+
# optional columns.
|
601
|
+
#
|
602
|
+
# @return [Boolean]
|
603
|
+
#
|
604
|
+
# @example
|
605
|
+
# require "google/cloud/bigquery"
|
606
|
+
#
|
607
|
+
# bigquery = Google::Cloud::Bigquery.new
|
608
|
+
#
|
609
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
610
|
+
# csv_table = bigquery.external csv_url do |csv|
|
611
|
+
# csv.jagged_rows = true
|
612
|
+
# end
|
613
|
+
#
|
614
|
+
# csv_table.jagged_rows #=> true
|
615
|
+
#
|
616
|
+
def jagged_rows
|
617
|
+
@gapi.csv_options.allow_jagged_rows
|
618
|
+
end
|
619
|
+
|
620
|
+
##
|
621
|
+
# Set whether BigQuery should accept rows that are missing trailing
|
622
|
+
# optional columns.
|
623
|
+
#
|
624
|
+
# @param [Boolean] new_jagged_rows New jagged_rows value
|
625
|
+
#
|
626
|
+
# @example
|
627
|
+
# require "google/cloud/bigquery"
|
628
|
+
#
|
629
|
+
# bigquery = Google::Cloud::Bigquery.new
|
630
|
+
#
|
631
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
632
|
+
# csv_table = bigquery.external csv_url do |csv|
|
633
|
+
# csv.jagged_rows = true
|
634
|
+
# end
|
635
|
+
#
|
636
|
+
# csv_table.jagged_rows #=> true
|
637
|
+
#
|
638
|
+
def jagged_rows= new_jagged_rows
|
639
|
+
frozen_check!
|
640
|
+
@gapi.csv_options.allow_jagged_rows = new_jagged_rows
|
641
|
+
end
|
642
|
+
|
643
|
+
##
|
644
|
+
# Indicates if BigQuery should allow quoted data sections that contain
|
645
|
+
# newline characters in a CSV file.
|
646
|
+
#
|
647
|
+
# @return [Boolean]
|
648
|
+
#
|
649
|
+
# @example
|
650
|
+
# require "google/cloud/bigquery"
|
651
|
+
#
|
652
|
+
# bigquery = Google::Cloud::Bigquery.new
|
653
|
+
#
|
654
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
655
|
+
# csv_table = bigquery.external csv_url do |csv|
|
656
|
+
# csv.quoted_newlines = true
|
657
|
+
# end
|
658
|
+
#
|
659
|
+
# csv_table.quoted_newlines #=> true
|
660
|
+
#
|
661
|
+
def quoted_newlines
|
662
|
+
@gapi.csv_options.allow_quoted_newlines
|
663
|
+
end
|
664
|
+
|
665
|
+
##
|
666
|
+
# Set whether BigQuery should allow quoted data sections that contain
|
667
|
+
# newline characters in a CSV file.
|
668
|
+
#
|
669
|
+
# @param [Boolean] new_quoted_newlines New quoted_newlines value
|
670
|
+
#
|
671
|
+
# @example
|
672
|
+
# require "google/cloud/bigquery"
|
673
|
+
#
|
674
|
+
# bigquery = Google::Cloud::Bigquery.new
|
675
|
+
#
|
676
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
677
|
+
# csv_table = bigquery.external csv_url do |csv|
|
678
|
+
# csv.quoted_newlines = true
|
679
|
+
# end
|
680
|
+
#
|
681
|
+
# csv_table.quoted_newlines #=> true
|
682
|
+
#
|
683
|
+
def quoted_newlines= new_quoted_newlines
|
684
|
+
frozen_check!
|
685
|
+
@gapi.csv_options.allow_quoted_newlines = new_quoted_newlines
|
686
|
+
end
|
687
|
+
|
688
|
+
##
|
689
|
+
# The character encoding of the data.
|
690
|
+
#
|
691
|
+
# @return [String]
|
692
|
+
#
|
693
|
+
# @example
|
694
|
+
# require "google/cloud/bigquery"
|
695
|
+
#
|
696
|
+
# bigquery = Google::Cloud::Bigquery.new
|
697
|
+
#
|
698
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
699
|
+
# csv_table = bigquery.external csv_url do |csv|
|
700
|
+
# csv.encoding = "UTF-8"
|
701
|
+
# end
|
702
|
+
#
|
703
|
+
# csv_table.encoding #=> "UTF-8"
|
704
|
+
#
|
705
|
+
def encoding
|
706
|
+
@gapi.csv_options.encoding
|
707
|
+
end
|
708
|
+
|
709
|
+
##
|
710
|
+
# Set the character encoding of the data.
|
711
|
+
#
|
712
|
+
# @param [String] new_encoding New encoding value
|
713
|
+
#
|
714
|
+
# @example
|
715
|
+
# require "google/cloud/bigquery"
|
716
|
+
#
|
717
|
+
# bigquery = Google::Cloud::Bigquery.new
|
718
|
+
#
|
719
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
720
|
+
# csv_table = bigquery.external csv_url do |csv|
|
721
|
+
# csv.encoding = "UTF-8"
|
722
|
+
# end
|
723
|
+
#
|
724
|
+
# csv_table.encoding #=> "UTF-8"
|
725
|
+
#
|
726
|
+
def encoding= new_encoding
|
727
|
+
frozen_check!
|
728
|
+
@gapi.csv_options.encoding = new_encoding
|
729
|
+
end
|
730
|
+
|
731
|
+
##
|
732
|
+
# Checks if the character encoding of the data is "UTF-8". This is the
|
733
|
+
# default.
|
734
|
+
#
|
735
|
+
# @return [Boolean]
|
736
|
+
#
|
737
|
+
# @example
|
738
|
+
# require "google/cloud/bigquery"
|
739
|
+
#
|
740
|
+
# bigquery = Google::Cloud::Bigquery.new
|
741
|
+
#
|
742
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
743
|
+
# csv_table = bigquery.external csv_url do |csv|
|
744
|
+
# csv.encoding = "UTF-8"
|
745
|
+
# end
|
746
|
+
#
|
747
|
+
# csv_table.encoding #=> "UTF-8"
|
748
|
+
# csv_table.utf8? #=> true
|
749
|
+
#
|
750
|
+
def utf8?
|
751
|
+
return true if encoding.nil?
|
752
|
+
encoding == "UTF-8"
|
753
|
+
end
|
754
|
+
|
755
|
+
##
|
756
|
+
# Checks if the character encoding of the data is "ISO-8859-1".
|
757
|
+
#
|
758
|
+
# @return [Boolean]
|
759
|
+
#
|
760
|
+
# @example
|
761
|
+
# require "google/cloud/bigquery"
|
762
|
+
#
|
763
|
+
# bigquery = Google::Cloud::Bigquery.new
|
764
|
+
#
|
765
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
766
|
+
# csv_table = bigquery.external csv_url do |csv|
|
767
|
+
# csv.encoding = "ISO-8859-1"
|
768
|
+
# end
|
769
|
+
#
|
770
|
+
# csv_table.encoding #=> "ISO-8859-1"
|
771
|
+
# csv_table.iso8859_1? #=> true
|
772
|
+
#
|
773
|
+
def iso8859_1?
|
774
|
+
encoding == "ISO-8859-1"
|
775
|
+
end
|
776
|
+
|
777
|
+
##
|
778
|
+
# The separator for fields in a CSV file.
|
779
|
+
#
|
780
|
+
# @return [String]
|
781
|
+
#
|
782
|
+
# @example
|
783
|
+
# require "google/cloud/bigquery"
|
784
|
+
#
|
785
|
+
# bigquery = Google::Cloud::Bigquery.new
|
786
|
+
#
|
787
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
788
|
+
# csv_table = bigquery.external csv_url do |csv|
|
789
|
+
# csv.delimiter = "|"
|
790
|
+
# end
|
791
|
+
#
|
792
|
+
# csv_table.delimiter #=> "|"
|
793
|
+
#
|
794
|
+
def delimiter
|
795
|
+
@gapi.csv_options.field_delimiter
|
796
|
+
end
|
797
|
+
|
798
|
+
##
|
799
|
+
# Set the separator for fields in a CSV file.
|
800
|
+
#
|
801
|
+
# @param [String] new_delimiter New delimiter value
|
802
|
+
#
|
803
|
+
# @example
|
804
|
+
# require "google/cloud/bigquery"
|
805
|
+
#
|
806
|
+
# bigquery = Google::Cloud::Bigquery.new
|
807
|
+
#
|
808
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
809
|
+
# csv_table = bigquery.external csv_url do |csv|
|
810
|
+
# csv.delimiter = "|"
|
811
|
+
# end
|
812
|
+
#
|
813
|
+
# csv_table.delimiter #=> "|"
|
814
|
+
#
|
815
|
+
def delimiter= new_delimiter
|
816
|
+
frozen_check!
|
817
|
+
@gapi.csv_options.field_delimiter = new_delimiter
|
818
|
+
end
|
819
|
+
|
820
|
+
##
|
821
|
+
# The value that is used to quote data sections in a CSV file.
|
822
|
+
#
|
823
|
+
# @return [String]
|
824
|
+
#
|
825
|
+
# @example
|
826
|
+
# require "google/cloud/bigquery"
|
827
|
+
#
|
828
|
+
# bigquery = Google::Cloud::Bigquery.new
|
829
|
+
#
|
830
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
831
|
+
# csv_table = bigquery.external csv_url do |csv|
|
832
|
+
# csv.quote = "'"
|
833
|
+
# end
|
834
|
+
#
|
835
|
+
# csv_table.quote #=> "'"
|
836
|
+
#
|
837
|
+
def quote
|
838
|
+
@gapi.csv_options.quote
|
839
|
+
end
|
840
|
+
|
841
|
+
##
|
842
|
+
# Set the value that is used to quote data sections in a CSV file.
|
843
|
+
#
|
844
|
+
# @param [String] new_quote New quote value
|
845
|
+
#
|
846
|
+
# @example
|
847
|
+
# require "google/cloud/bigquery"
|
848
|
+
#
|
849
|
+
# bigquery = Google::Cloud::Bigquery.new
|
850
|
+
#
|
851
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
852
|
+
# csv_table = bigquery.external csv_url do |csv|
|
853
|
+
# csv.quote = "'"
|
854
|
+
# end
|
855
|
+
#
|
856
|
+
# csv_table.quote #=> "'"
|
857
|
+
#
|
858
|
+
def quote= new_quote
|
859
|
+
frozen_check!
|
860
|
+
@gapi.csv_options.quote = new_quote
|
861
|
+
end
|
862
|
+
|
863
|
+
##
|
864
|
+
# The number of rows at the top of a CSV file that BigQuery will skip
|
865
|
+
# when reading the data.
|
866
|
+
#
|
867
|
+
# @return [Integer]
|
868
|
+
#
|
869
|
+
# @example
|
870
|
+
# require "google/cloud/bigquery"
|
871
|
+
#
|
872
|
+
# bigquery = Google::Cloud::Bigquery.new
|
873
|
+
#
|
874
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
875
|
+
# csv_table = bigquery.external csv_url do |csv|
|
876
|
+
# csv.skip_leading_rows = 1
|
877
|
+
# end
|
878
|
+
#
|
879
|
+
# csv_table.skip_leading_rows #=> 1
|
880
|
+
#
|
881
|
+
def skip_leading_rows
|
882
|
+
@gapi.csv_options.skip_leading_rows
|
883
|
+
end
|
884
|
+
|
885
|
+
##
|
886
|
+
# Set the number of rows at the top of a CSV file that BigQuery will
|
887
|
+
# skip when reading the data.
|
888
|
+
#
|
889
|
+
# @param [Integer] row_count New skip_leading_rows value
|
890
|
+
#
|
891
|
+
# @example
|
892
|
+
# require "google/cloud/bigquery"
|
893
|
+
#
|
894
|
+
# bigquery = Google::Cloud::Bigquery.new
|
895
|
+
#
|
896
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
897
|
+
# csv_table = bigquery.external csv_url do |csv|
|
898
|
+
# csv.skip_leading_rows = 1
|
899
|
+
# end
|
900
|
+
#
|
901
|
+
# csv_table.skip_leading_rows #=> 1
|
902
|
+
#
|
903
|
+
def skip_leading_rows= row_count
|
904
|
+
frozen_check!
|
905
|
+
@gapi.csv_options.skip_leading_rows = row_count
|
906
|
+
end
|
907
|
+
|
908
|
+
##
|
909
|
+
# The schema for the data.
|
910
|
+
#
|
911
|
+
# @param [Boolean] replace Whether to replace the existing schema with
|
912
|
+
# the new schema. If `true`, the fields will replace the existing
|
913
|
+
# schema. If `false`, the fields will be added to the existing
|
914
|
+
# schema. The default value is `false`.
|
915
|
+
# @yield [schema] a block for setting the schema
|
916
|
+
# @yieldparam [Schema] schema the object accepting the schema
|
917
|
+
#
|
918
|
+
# @return [Google::Cloud::Bigquery::Schema]
|
919
|
+
#
|
920
|
+
# @example
|
921
|
+
# require "google/cloud/bigquery"
|
922
|
+
#
|
923
|
+
# bigquery = Google::Cloud::Bigquery.new
|
924
|
+
#
|
925
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
926
|
+
# csv_table = bigquery.external csv_url do |csv|
|
927
|
+
# csv.schema do |schema|
|
928
|
+
# schema.string "name", mode: :required
|
929
|
+
# schema.string "email", mode: :required
|
930
|
+
# schema.integer "age", mode: :required
|
931
|
+
# schema.boolean "active", mode: :required
|
932
|
+
# end
|
933
|
+
# end
|
934
|
+
#
|
935
|
+
def schema replace: false
|
936
|
+
@schema ||= Schema.from_gapi @gapi.schema
|
937
|
+
if replace
|
938
|
+
frozen_check!
|
939
|
+
@schema = Schema.from_gapi
|
940
|
+
end
|
941
|
+
@schema.freeze if frozen?
|
942
|
+
yield @schema if block_given?
|
943
|
+
@schema
|
944
|
+
end
|
945
|
+
|
946
|
+
##
|
947
|
+
# Set the schema for the data.
|
948
|
+
#
|
949
|
+
# @param [Schema] new_schema The schema object.
|
950
|
+
#
|
951
|
+
# @example
|
952
|
+
# require "google/cloud/bigquery"
|
953
|
+
#
|
954
|
+
# bigquery = Google::Cloud::Bigquery.new
|
955
|
+
#
|
956
|
+
# csv_shema = bigquery.schema do |schema|
|
957
|
+
# schema.string "name", mode: :required
|
958
|
+
# schema.string "email", mode: :required
|
959
|
+
# schema.integer "age", mode: :required
|
960
|
+
# schema.boolean "active", mode: :required
|
961
|
+
# end
|
962
|
+
#
|
963
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
964
|
+
# csv_table = bigquery.external csv_url
|
965
|
+
# csv_table.schema = csv_shema
|
966
|
+
#
|
967
|
+
def schema= new_schema
|
968
|
+
frozen_check!
|
969
|
+
@schema = new_schema
|
970
|
+
end
|
971
|
+
|
972
|
+
##
|
973
|
+
# The fields of the schema.
|
974
|
+
#
|
975
|
+
def fields
|
976
|
+
schema.fields
|
977
|
+
end
|
978
|
+
|
979
|
+
##
|
980
|
+
# The names of the columns in the schema.
|
981
|
+
#
|
982
|
+
def headers
|
983
|
+
schema.headers
|
984
|
+
end
|
985
|
+
|
986
|
+
##
|
987
|
+
# @private Google API Client object.
|
988
|
+
def to_gapi
|
989
|
+
@gapi.schema = @schema.to_gapi if @schema
|
990
|
+
@gapi
|
991
|
+
end
|
992
|
+
|
993
|
+
##
|
994
|
+
# @private Google API Client object.
|
995
|
+
def self.from_gapi gapi
|
996
|
+
new_table = super
|
997
|
+
schema = Schema.from_gapi gapi.schema
|
998
|
+
new_table.instance_variable_set :@schema, schema
|
999
|
+
new_table
|
1000
|
+
end
|
1001
|
+
end
|
1002
|
+
|
1003
|
+
##
|
1004
|
+
# # JsonSource
|
1005
|
+
#
|
1006
|
+
# {External::JsonSource} is a subclass of {External::DataSource} and
|
1007
|
+
# represents a JSON external data source that can be queried from
|
1008
|
+
# directly, such as Google Cloud Storage or Google Drive, even though
|
1009
|
+
# the data is not stored in BigQuery. Instead of loading or streaming
|
1010
|
+
# the data, this object references the external data source.
|
1011
|
+
#
|
1012
|
+
# @example
|
1013
|
+
# require "google/cloud/bigquery"
|
1014
|
+
#
|
1015
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1016
|
+
#
|
1017
|
+
# require "google/cloud/bigquery"
|
1018
|
+
#
|
1019
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1020
|
+
#
|
1021
|
+
# json_url = "gs://bucket/path/to/data.json"
|
1022
|
+
# json_table = bigquery.external json_url do |json|
|
1023
|
+
# json.schema do |schema|
|
1024
|
+
# schema.string "name", mode: :required
|
1025
|
+
# schema.string "email", mode: :required
|
1026
|
+
# schema.integer "age", mode: :required
|
1027
|
+
# schema.boolean "active", mode: :required
|
1028
|
+
# end
|
1029
|
+
# end
|
1030
|
+
#
|
1031
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
1032
|
+
# external: { my_ext_table: json_table }
|
1033
|
+
#
|
1034
|
+
# data.each do |row|
|
1035
|
+
# puts row[:name]
|
1036
|
+
# end
|
1037
|
+
#
|
1038
|
+
class JsonSource < External::DataSource
|
1039
|
+
##
|
1040
|
+
# The schema for the data.
|
1041
|
+
#
|
1042
|
+
# @param [Boolean] replace Whether to replace the existing schema with
|
1043
|
+
# the new schema. If `true`, the fields will replace the existing
|
1044
|
+
# schema. If `false`, the fields will be added to the existing
|
1045
|
+
# schema. The default value is `false`.
|
1046
|
+
# @yield [schema] a block for setting the schema
|
1047
|
+
# @yieldparam [Schema] schema the object accepting the schema
|
1048
|
+
#
|
1049
|
+
# @return [Google::Cloud::Bigquery::Schema]
|
1050
|
+
#
|
1051
|
+
# @example
|
1052
|
+
# require "google/cloud/bigquery"
|
1053
|
+
#
|
1054
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1055
|
+
#
|
1056
|
+
# json_url = "gs://bucket/path/to/data.json"
|
1057
|
+
# json_table = bigquery.external json_url do |json|
|
1058
|
+
# json.schema do |schema|
|
1059
|
+
# schema.string "name", mode: :required
|
1060
|
+
# schema.string "email", mode: :required
|
1061
|
+
# schema.integer "age", mode: :required
|
1062
|
+
# schema.boolean "active", mode: :required
|
1063
|
+
# end
|
1064
|
+
# end
|
1065
|
+
#
|
1066
|
+
def schema replace: false
|
1067
|
+
@schema ||= Schema.from_gapi @gapi.schema
|
1068
|
+
if replace
|
1069
|
+
frozen_check!
|
1070
|
+
@schema = Schema.from_gapi
|
1071
|
+
end
|
1072
|
+
@schema.freeze if frozen?
|
1073
|
+
yield @schema if block_given?
|
1074
|
+
@schema
|
1075
|
+
end
|
1076
|
+
|
1077
|
+
##
|
1078
|
+
# Set the schema for the data.
|
1079
|
+
#
|
1080
|
+
# @param [Schema] new_schema The schema object.
|
1081
|
+
#
|
1082
|
+
# @example
|
1083
|
+
# require "google/cloud/bigquery"
|
1084
|
+
#
|
1085
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1086
|
+
#
|
1087
|
+
# json_shema = bigquery.schema do |schema|
|
1088
|
+
# schema.string "name", mode: :required
|
1089
|
+
# schema.string "email", mode: :required
|
1090
|
+
# schema.integer "age", mode: :required
|
1091
|
+
# schema.boolean "active", mode: :required
|
1092
|
+
# end
|
1093
|
+
#
|
1094
|
+
# json_url = "gs://bucket/path/to/data.json"
|
1095
|
+
# json_table = bigquery.external json_url
|
1096
|
+
# json_table.schema = json_shema
|
1097
|
+
#
|
1098
|
+
def schema= new_schema
|
1099
|
+
frozen_check!
|
1100
|
+
@schema = new_schema
|
1101
|
+
end
|
1102
|
+
|
1103
|
+
##
|
1104
|
+
# The fields of the schema.
|
1105
|
+
#
|
1106
|
+
def fields
|
1107
|
+
schema.fields
|
1108
|
+
end
|
1109
|
+
|
1110
|
+
##
|
1111
|
+
# The names of the columns in the schema.
|
1112
|
+
#
|
1113
|
+
def headers
|
1114
|
+
schema.headers
|
1115
|
+
end
|
1116
|
+
|
1117
|
+
##
|
1118
|
+
# @private Google API Client object.
|
1119
|
+
def to_gapi
|
1120
|
+
@gapi.schema = @schema.to_gapi if @schema
|
1121
|
+
@gapi
|
1122
|
+
end
|
1123
|
+
|
1124
|
+
##
|
1125
|
+
# @private Google API Client object.
|
1126
|
+
def self.from_gapi gapi
|
1127
|
+
new_table = super
|
1128
|
+
schema = Schema.from_gapi gapi.schema
|
1129
|
+
new_table.instance_variable_set :@schema, schema
|
1130
|
+
new_table
|
1131
|
+
end
|
1132
|
+
end
|
1133
|
+
|
1134
|
+
##
|
1135
|
+
# # SheetsSource
|
1136
|
+
#
|
1137
|
+
# {External::SheetsSource} is a subclass of {External::DataSource} and
|
1138
|
+
# represents a Google Sheets external data source that can be queried
|
1139
|
+
# from directly, even though the data is not stored in BigQuery. Instead
|
1140
|
+
# of loading or streaming the data, this object references the external
|
1141
|
+
# data source.
|
1142
|
+
#
|
1143
|
+
# @example
|
1144
|
+
# require "google/cloud/bigquery"
|
1145
|
+
#
|
1146
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1147
|
+
#
|
1148
|
+
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
1149
|
+
# sheets_table = bigquery.external sheets_url do |sheets|
|
1150
|
+
# sheets.skip_leading_rows = 1
|
1151
|
+
# end
|
1152
|
+
#
|
1153
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
1154
|
+
# external: { my_ext_table: sheets_table }
|
1155
|
+
#
|
1156
|
+
# data.each do |row|
|
1157
|
+
# puts row[:name]
|
1158
|
+
# end
|
1159
|
+
#
|
1160
|
+
class SheetsSource < External::DataSource
|
1161
|
+
##
|
1162
|
+
# @private Create an empty SheetsSource object.
|
1163
|
+
def initialize
|
1164
|
+
super
|
1165
|
+
@gapi.google_sheets_options = \
|
1166
|
+
Google::Apis::BigqueryV2::GoogleSheetsOptions.new
|
1167
|
+
end
|
1168
|
+
|
1169
|
+
##
|
1170
|
+
# The number of rows at the top of a sheet that BigQuery will skip
|
1171
|
+
# when reading the data. The default value is `0`.
|
1172
|
+
#
|
1173
|
+
# This property is useful if you have header rows that should be
|
1174
|
+
# skipped. When `autodetect` is on, behavior is the following:
|
1175
|
+
#
|
1176
|
+
# * `nil` - Autodetect tries to detect headers in the first row. If
|
1177
|
+
# they are not detected, the row is read as data. Otherwise data is
|
1178
|
+
# read starting from the second row.
|
1179
|
+
# * `0` - Instructs autodetect that there are no headers and data
|
1180
|
+
# should be read starting from the first row.
|
1181
|
+
# * `N > 0` - Autodetect skips `N-1` rows and tries to detect headers
|
1182
|
+
# in row `N`. If headers are not detected, row `N` is just skipped.
|
1183
|
+
# Otherwise row `N` is used to extract column names for the detected
|
1184
|
+
# schema.
|
1185
|
+
#
|
1186
|
+
# @return [Integer]
|
1187
|
+
#
|
1188
|
+
# @example
|
1189
|
+
# require "google/cloud/bigquery"
|
1190
|
+
#
|
1191
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1192
|
+
#
|
1193
|
+
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
1194
|
+
# sheets_table = bigquery.external sheets_url do |sheets|
|
1195
|
+
# sheets.skip_leading_rows = 1
|
1196
|
+
# end
|
1197
|
+
#
|
1198
|
+
# sheets_table.skip_leading_rows #=> 1
|
1199
|
+
#
|
1200
|
+
def skip_leading_rows
|
1201
|
+
@gapi.google_sheets_options.skip_leading_rows
|
1202
|
+
end
|
1203
|
+
|
1204
|
+
##
|
1205
|
+
# Set the number of rows at the top of a sheet that BigQuery will skip
|
1206
|
+
# when reading the data.
|
1207
|
+
#
|
1208
|
+
# @param [Integer] row_count New skip_leading_rows value
|
1209
|
+
#
|
1210
|
+
# @example
|
1211
|
+
# require "google/cloud/bigquery"
|
1212
|
+
#
|
1213
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1214
|
+
#
|
1215
|
+
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
1216
|
+
# sheets_table = bigquery.external sheets_url do |sheets|
|
1217
|
+
# sheets.skip_leading_rows = 1
|
1218
|
+
# end
|
1219
|
+
#
|
1220
|
+
# sheets_table.skip_leading_rows #=> 1
|
1221
|
+
#
|
1222
|
+
def skip_leading_rows= row_count
|
1223
|
+
frozen_check!
|
1224
|
+
@gapi.google_sheets_options.skip_leading_rows = row_count
|
1225
|
+
end
|
1226
|
+
end
|
1227
|
+
|
1228
|
+
##
|
1229
|
+
# # BigtableSource
|
1230
|
+
#
|
1231
|
+
# {External::BigtableSource} is a subclass of {External::DataSource} and
|
1232
|
+
# represents a Bigtable external data source that can be queried from
|
1233
|
+
# directly, even though the data is not stored in BigQuery. Instead of
|
1234
|
+
# loading or streaming the data, this object references the external
|
1235
|
+
# data source.
|
1236
|
+
#
|
1237
|
+
# @example
|
1238
|
+
# require "google/cloud/bigquery"
|
1239
|
+
#
|
1240
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1241
|
+
#
|
1242
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1243
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1244
|
+
# bt.rowkey_as_string = true
|
1245
|
+
# bt.add_family "user" do |u|
|
1246
|
+
# u.add_string "name"
|
1247
|
+
# u.add_string "email"
|
1248
|
+
# u.add_integer "age"
|
1249
|
+
# u.add_boolean "active"
|
1250
|
+
# end
|
1251
|
+
# end
|
1252
|
+
#
|
1253
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
1254
|
+
# external: { my_ext_table: bigtable_table }
|
1255
|
+
#
|
1256
|
+
# data.each do |row|
|
1257
|
+
# puts row[:name]
|
1258
|
+
# end
|
1259
|
+
#
|
1260
|
+
class BigtableSource < External::DataSource
|
1261
|
+
##
|
1262
|
+
# @private Create an empty BigtableSource object.
|
1263
|
+
def initialize
|
1264
|
+
super
|
1265
|
+
@gapi.bigtable_options = \
|
1266
|
+
Google::Apis::BigqueryV2::BigtableOptions.new
|
1267
|
+
@families = []
|
1268
|
+
end
|
1269
|
+
|
1270
|
+
##
|
1271
|
+
# List of column families to expose in the table schema along with
|
1272
|
+
# their types. This list restricts the column families that can be
|
1273
|
+
# referenced in queries and specifies their value types. You can use
|
1274
|
+
# this list to do type conversions - see
|
1275
|
+
# {BigtableSource::ColumnFamily#type} for more details. If you leave
|
1276
|
+
# this list empty, all column families are present in the table schema
|
1277
|
+
# and their values are read as `BYTES`. During a query only the column
|
1278
|
+
# families referenced in that query are read from Bigtable.
|
1279
|
+
#
|
1280
|
+
# @return [Array<BigtableSource::ColumnFamily>]
|
1281
|
+
#
|
1282
|
+
# @example
|
1283
|
+
# require "google/cloud/bigquery"
|
1284
|
+
#
|
1285
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1286
|
+
#
|
1287
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1288
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1289
|
+
# bt.rowkey_as_string = true
|
1290
|
+
# bt.add_family "user" do |u|
|
1291
|
+
# u.add_string "name"
|
1292
|
+
# u.add_string "email"
|
1293
|
+
# u.add_integer "age"
|
1294
|
+
# u.add_boolean "active"
|
1295
|
+
# end
|
1296
|
+
# end
|
1297
|
+
#
|
1298
|
+
# bigtable_table.families.count #=> 1
|
1299
|
+
#
|
1300
|
+
def families
|
1301
|
+
@families
|
1302
|
+
end
|
1303
|
+
|
1304
|
+
##
|
1305
|
+
# Add a column family to expose in the table schema along with its
|
1306
|
+
# types. Columns belonging to the column family may also be exposed.
|
1307
|
+
#
|
1308
|
+
# @param [String] family_id Identifier of the column family. See
|
1309
|
+
# {BigtableSource::ColumnFamily#family_id}.
|
1310
|
+
# @param [String] encoding The encoding of the values when the type is
|
1311
|
+
# not `STRING`. See {BigtableSource::ColumnFamily#encoding}.
|
1312
|
+
# @param [Boolean] latest Whether only the latest version of value are
|
1313
|
+
# exposed for all columns in this column family. See
|
1314
|
+
# {BigtableSource::ColumnFamily#latest}.
|
1315
|
+
# @param [String] type The type to convert the value in cells of this
|
1316
|
+
# column. See {BigtableSource::ColumnFamily#type}.
|
1317
|
+
#
|
1318
|
+
# @yield [family] a block for setting the family
|
1319
|
+
# @yieldparam [BigtableSource::ColumnFamily] family the family object
|
1320
|
+
#
|
1321
|
+
# @return [BigtableSource::ColumnFamily]
|
1322
|
+
#
|
1323
|
+
# @example
|
1324
|
+
# require "google/cloud/bigquery"
|
1325
|
+
#
|
1326
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1327
|
+
#
|
1328
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1329
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1330
|
+
# bt.rowkey_as_string = true
|
1331
|
+
# bt.add_family "user" do |u|
|
1332
|
+
# u.add_string "name"
|
1333
|
+
# u.add_string "email"
|
1334
|
+
# u.add_integer "age"
|
1335
|
+
# u.add_boolean "active"
|
1336
|
+
# end
|
1337
|
+
# end
|
1338
|
+
#
|
1339
|
+
def add_family family_id, encoding: nil, latest: nil, type: nil
|
1340
|
+
frozen_check!
|
1341
|
+
fam = BigtableSource::ColumnFamily.new
|
1342
|
+
fam.family_id = family_id
|
1343
|
+
fam.encoding = encoding if encoding
|
1344
|
+
fam.latest = latest if latest
|
1345
|
+
fam.type = type if type
|
1346
|
+
yield fam if block_given?
|
1347
|
+
@families << fam
|
1348
|
+
fam
|
1349
|
+
end
|
1350
|
+
|
1351
|
+
##
|
1352
|
+
# Whether the rowkey column families will be read and converted to
|
1353
|
+
# string. Otherwise they are read with `BYTES` type values and users
|
1354
|
+
# need to manually cast them with `CAST` if necessary. The default
|
1355
|
+
# value is `false`.
|
1356
|
+
#
|
1357
|
+
# @return [Boolean]
|
1358
|
+
#
|
1359
|
+
# @example
|
1360
|
+
# require "google/cloud/bigquery"
|
1361
|
+
#
|
1362
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1363
|
+
#
|
1364
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1365
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1366
|
+
# bt.rowkey_as_string = true
|
1367
|
+
# end
|
1368
|
+
#
|
1369
|
+
# bigtable_table.rowkey_as_string #=> true
|
1370
|
+
#
|
1371
|
+
def rowkey_as_string
|
1372
|
+
@gapi.bigtable_options.read_rowkey_as_string
|
1373
|
+
end
|
1374
|
+
|
1375
|
+
##
|
1376
|
+
# Set the number of rows at the top of a sheet that BigQuery will skip
|
1377
|
+
# when reading the data.
|
1378
|
+
#
|
1379
|
+
# @param [Boolean] row_rowkey New rowkey_as_string value
|
1380
|
+
#
|
1381
|
+
# @example
|
1382
|
+
# require "google/cloud/bigquery"
|
1383
|
+
#
|
1384
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1385
|
+
#
|
1386
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1387
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1388
|
+
# bt.rowkey_as_string = true
|
1389
|
+
# end
|
1390
|
+
#
|
1391
|
+
# bigtable_table.rowkey_as_string #=> true
|
1392
|
+
#
|
1393
|
+
def rowkey_as_string= row_rowkey
|
1394
|
+
frozen_check!
|
1395
|
+
@gapi.bigtable_options.read_rowkey_as_string = row_rowkey
|
1396
|
+
end
|
1397
|
+
|
1398
|
+
##
|
1399
|
+
# @private Google API Client object.
|
1400
|
+
def to_gapi
|
1401
|
+
@gapi.bigtable_options.column_families = @families.map(&:to_gapi)
|
1402
|
+
@gapi
|
1403
|
+
end
|
1404
|
+
|
1405
|
+
##
|
1406
|
+
# @private Google API Client object.
|
1407
|
+
def self.from_gapi gapi
|
1408
|
+
new_table = super
|
1409
|
+
families = Array gapi.bigtable_options.column_families
|
1410
|
+
families = families.map do |fam_gapi|
|
1411
|
+
BigtableSource::ColumnFamily.from_gapi fam_gapi
|
1412
|
+
end
|
1413
|
+
new_table.instance_variable_set :@families, families
|
1414
|
+
new_table
|
1415
|
+
end
|
1416
|
+
|
1417
|
+
##
|
1418
|
+
# @private
|
1419
|
+
def freeze
|
1420
|
+
@families.map(&:freeze!)
|
1421
|
+
@families.freeze!
|
1422
|
+
super
|
1423
|
+
end
|
1424
|
+
|
1425
|
+
protected
|
1426
|
+
|
1427
|
+
def frozen_check!
|
1428
|
+
return unless frozen?
|
1429
|
+
fail ArgumentError, "Cannot modify external data source when frozen"
|
1430
|
+
end
|
1431
|
+
|
1432
|
+
##
|
1433
|
+
# # BigtableSource::ColumnFamily
|
1434
|
+
#
|
1435
|
+
# A Bigtable column family used to expose in the table schema along
|
1436
|
+
# with its types and columns.
|
1437
|
+
#
|
1438
|
+
# @example
|
1439
|
+
# require "google/cloud/bigquery"
|
1440
|
+
#
|
1441
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1442
|
+
#
|
1443
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1444
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1445
|
+
# bt.rowkey_as_string = true
|
1446
|
+
# bt.add_family "user" do |u|
|
1447
|
+
# u.add_string "name"
|
1448
|
+
# u.add_string "email"
|
1449
|
+
# u.add_integer "age"
|
1450
|
+
# u.add_boolean "active"
|
1451
|
+
# end
|
1452
|
+
# end
|
1453
|
+
#
|
1454
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
1455
|
+
# external: { my_ext_table: bigtable_table }
|
1456
|
+
#
|
1457
|
+
# data.each do |row|
|
1458
|
+
# puts row[:name]
|
1459
|
+
# end
|
1460
|
+
#
|
1461
|
+
class ColumnFamily
|
1462
|
+
##
|
1463
|
+
# @private Create an empty BigtableSource::ColumnFamily object.
|
1464
|
+
def initialize
|
1465
|
+
@gapi = Google::Apis::BigqueryV2::BigtableColumnFamily.new
|
1466
|
+
@columns = []
|
1467
|
+
end
|
1468
|
+
|
1469
|
+
##
|
1470
|
+
# The encoding of the values when the type is not `STRING`.
|
1471
|
+
#
|
1472
|
+
# @return [String]
|
1473
|
+
#
|
1474
|
+
# @example
|
1475
|
+
# require "google/cloud/bigquery"
|
1476
|
+
#
|
1477
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1478
|
+
#
|
1479
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1480
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1481
|
+
# bt.add_family "user" do |u|
|
1482
|
+
# u.encoding = "UTF-8"
|
1483
|
+
# end
|
1484
|
+
# end
|
1485
|
+
#
|
1486
|
+
# bigtable_table.families[0].encoding #=> "UTF-8"
|
1487
|
+
#
|
1488
|
+
def encoding
|
1489
|
+
@gapi.encoding
|
1490
|
+
end
|
1491
|
+
|
1492
|
+
##
|
1493
|
+
# Set the encoding of the values when the type is not `STRING`.
|
1494
|
+
# Acceptable encoding values are:
|
1495
|
+
#
|
1496
|
+
# * `TEXT` - indicates values are alphanumeric text strings.
|
1497
|
+
# * `BINARY` - indicates values are encoded using HBase
|
1498
|
+
# `Bytes.toBytes` family of functions. This can be overridden on a
|
1499
|
+
# column.
|
1500
|
+
#
|
1501
|
+
# @param [String] new_encoding New encoding value
|
1502
|
+
#
|
1503
|
+
# @example
|
1504
|
+
# require "google/cloud/bigquery"
|
1505
|
+
#
|
1506
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1507
|
+
#
|
1508
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1509
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1510
|
+
# bt.add_family "user" do |u|
|
1511
|
+
# u.encoding = "UTF-8"
|
1512
|
+
# end
|
1513
|
+
# end
|
1514
|
+
#
|
1515
|
+
# bigtable_table.families[0].encoding #=> "UTF-8"
|
1516
|
+
#
|
1517
|
+
def encoding= new_encoding
|
1518
|
+
frozen_check!
|
1519
|
+
@gapi.encoding = new_encoding
|
1520
|
+
end
|
1521
|
+
|
1522
|
+
##
|
1523
|
+
# Identifier of the column family.
|
1524
|
+
#
|
1525
|
+
# @return [String]
|
1526
|
+
#
|
1527
|
+
# @example
|
1528
|
+
# require "google/cloud/bigquery"
|
1529
|
+
#
|
1530
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1531
|
+
#
|
1532
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1533
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1534
|
+
# bt.add_family "user"
|
1535
|
+
# end
|
1536
|
+
#
|
1537
|
+
# bigtable_table.families[0].family_id #=> "user"
|
1538
|
+
#
|
1539
|
+
def family_id
|
1540
|
+
@gapi.family_id
|
1541
|
+
end
|
1542
|
+
|
1543
|
+
##
|
1544
|
+
# Set the identifier of the column family.
|
1545
|
+
#
|
1546
|
+
# @param [String] new_family_id New family_id value
|
1547
|
+
#
|
1548
|
+
# @example
|
1549
|
+
# require "google/cloud/bigquery"
|
1550
|
+
#
|
1551
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1552
|
+
#
|
1553
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1554
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1555
|
+
# bt.add_family "user"
|
1556
|
+
# end
|
1557
|
+
#
|
1558
|
+
# bigtable_table.families[0].family_id #=> "user"
|
1559
|
+
# bigtable_table.families[0].family_id = "User"
|
1560
|
+
# bigtable_table.families[0].family_id #=> "User"
|
1561
|
+
#
|
1562
|
+
def family_id= new_family_id
|
1563
|
+
frozen_check!
|
1564
|
+
@gapi.family_id = new_family_id
|
1565
|
+
end
|
1566
|
+
|
1567
|
+
##
|
1568
|
+
# Whether only the latest version of value are exposed for all
|
1569
|
+
# columns in this column family.
|
1570
|
+
#
|
1571
|
+
# @return [Boolean]
|
1572
|
+
#
|
1573
|
+
# @example
|
1574
|
+
# require "google/cloud/bigquery"
|
1575
|
+
#
|
1576
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1577
|
+
#
|
1578
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1579
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1580
|
+
# bt.add_family "user" do |u|
|
1581
|
+
# u.latest = true
|
1582
|
+
# end
|
1583
|
+
# end
|
1584
|
+
#
|
1585
|
+
# bigtable_table.families[0].latest #=> true
|
1586
|
+
#
|
1587
|
+
def latest
|
1588
|
+
@gapi.only_read_latest
|
1589
|
+
end
|
1590
|
+
|
1591
|
+
##
|
1592
|
+
# Set whether only the latest version of value are exposed for all
|
1593
|
+
# columns in this column family.
|
1594
|
+
#
|
1595
|
+
# @param [Boolean] new_latest New latest value
|
1596
|
+
#
|
1597
|
+
# @example
|
1598
|
+
# require "google/cloud/bigquery"
|
1599
|
+
#
|
1600
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1601
|
+
#
|
1602
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1603
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1604
|
+
# bt.add_family "user" do |u|
|
1605
|
+
# u.latest = true
|
1606
|
+
# end
|
1607
|
+
# end
|
1608
|
+
#
|
1609
|
+
# bigtable_table.families[0].latest #=> true
|
1610
|
+
#
|
1611
|
+
def latest= new_latest
|
1612
|
+
frozen_check!
|
1613
|
+
@gapi.only_read_latest = new_latest
|
1614
|
+
end
|
1615
|
+
|
1616
|
+
##
|
1617
|
+
# The type to convert the value in cells of this column family. The
|
1618
|
+
# values are expected to be encoded using HBase `Bytes.toBytes`
|
1619
|
+
# function when using the `BINARY` encoding value. The following
|
1620
|
+
# BigQuery types are allowed:
|
1621
|
+
#
|
1622
|
+
# * `BYTES`
|
1623
|
+
# * `STRING`
|
1624
|
+
# * `INTEGER`
|
1625
|
+
# * `FLOAT`
|
1626
|
+
# * `BOOLEAN`
|
1627
|
+
#
|
1628
|
+
# Default type is `BYTES`. This can be overridden on a column.
|
1629
|
+
#
|
1630
|
+
# @return [String]
|
1631
|
+
#
|
1632
|
+
# @example
|
1633
|
+
# require "google/cloud/bigquery"
|
1634
|
+
#
|
1635
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1636
|
+
#
|
1637
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1638
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1639
|
+
# bt.add_family "user" do |u|
|
1640
|
+
# u.type = "STRING"
|
1641
|
+
# end
|
1642
|
+
# end
|
1643
|
+
#
|
1644
|
+
# bigtable_table.families[0].type #=> "STRING"
|
1645
|
+
#
|
1646
|
+
def type
|
1647
|
+
@gapi.type
|
1648
|
+
end
|
1649
|
+
|
1650
|
+
##
|
1651
|
+
# Set the type to convert the value in cells of this column family.
|
1652
|
+
# The values are expected to be encoded using HBase `Bytes.toBytes`
|
1653
|
+
# function when using the `BINARY` encoding value. The following
|
1654
|
+
# BigQuery types are allowed:
|
1655
|
+
#
|
1656
|
+
# * `BYTES`
|
1657
|
+
# * `STRING`
|
1658
|
+
# * `INTEGER`
|
1659
|
+
# * `FLOAT`
|
1660
|
+
# * `BOOLEAN`
|
1661
|
+
#
|
1662
|
+
# Default type is `BYTES`. This can be overridden on a column.
|
1663
|
+
#
|
1664
|
+
# @param [String] new_type New type value
|
1665
|
+
#
|
1666
|
+
# @example
|
1667
|
+
# require "google/cloud/bigquery"
|
1668
|
+
#
|
1669
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1670
|
+
#
|
1671
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1672
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1673
|
+
# bt.add_family "user" do |u|
|
1674
|
+
# u.type = "STRING"
|
1675
|
+
# end
|
1676
|
+
# end
|
1677
|
+
#
|
1678
|
+
# bigtable_table.families[0].type #=> "STRING"
|
1679
|
+
#
|
1680
|
+
def type= new_type
|
1681
|
+
frozen_check!
|
1682
|
+
@gapi.type = new_type
|
1683
|
+
end
|
1684
|
+
|
1685
|
+
##
|
1686
|
+
# Lists of columns that should be exposed as individual fields.
|
1687
|
+
#
|
1688
|
+
# @return [Array<BigtableSource::Column>]
|
1689
|
+
#
|
1690
|
+
# @example
|
1691
|
+
# require "google/cloud/bigquery"
|
1692
|
+
#
|
1693
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1694
|
+
#
|
1695
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1696
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1697
|
+
# bt.rowkey_as_string = true
|
1698
|
+
# bt.add_family "user" do |u|
|
1699
|
+
# u.add_string "name"
|
1700
|
+
# u.add_string "email"
|
1701
|
+
# u.add_integer "age"
|
1702
|
+
# u.add_boolean "active"
|
1703
|
+
# end
|
1704
|
+
# end
|
1705
|
+
#
|
1706
|
+
# bigtable_table.families[0].columns.count #=> 4
|
1707
|
+
#
|
1708
|
+
def columns
|
1709
|
+
@columns
|
1710
|
+
end
|
1711
|
+
|
1712
|
+
##
|
1713
|
+
# Add a column to the column family to expose in the table schema
|
1714
|
+
# along with its types.
|
1715
|
+
#
|
1716
|
+
# @param [String] qualifier Qualifier of the column. See
|
1717
|
+
# {BigtableSource::Column#qualifier}.
|
1718
|
+
# @param [String] as A valid identifier to be used as the column
|
1719
|
+
# field name if the qualifier is not a valid BigQuery field
|
1720
|
+
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
1721
|
+
# {BigtableSource::Column#field_name}.
|
1722
|
+
# @param [String] type The type to convert the value in cells of
|
1723
|
+
# this column. See {BigtableSource::Column#type}. The following
|
1724
|
+
# BigQuery types are allowed:
|
1725
|
+
#
|
1726
|
+
# * `BYTES`
|
1727
|
+
# * `STRING`
|
1728
|
+
# * `INTEGER`
|
1729
|
+
# * `FLOAT`
|
1730
|
+
# * `BOOLEAN`
|
1731
|
+
#
|
1732
|
+
# @yield [column] a block for setting the column
|
1733
|
+
# @yieldparam [BigtableSource::Column] column the column object
|
1734
|
+
#
|
1735
|
+
# @return [Array<BigtableSource::Column>]
|
1736
|
+
#
|
1737
|
+
# @example
|
1738
|
+
# require "google/cloud/bigquery"
|
1739
|
+
#
|
1740
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1741
|
+
#
|
1742
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1743
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1744
|
+
# bt.rowkey_as_string = true
|
1745
|
+
# bt.add_family "user" do |u|
|
1746
|
+
# u.add_column "name", type: "STRING"
|
1747
|
+
# end
|
1748
|
+
# end
|
1749
|
+
#
|
1750
|
+
def add_column qualifier, as: nil, type: nil
|
1751
|
+
frozen_check!
|
1752
|
+
col = BigtableSource::Column.new
|
1753
|
+
col.qualifier = qualifier
|
1754
|
+
col.field_name = as if as
|
1755
|
+
col.type = type if type
|
1756
|
+
yield col if block_given?
|
1757
|
+
@columns << col
|
1758
|
+
col
|
1759
|
+
end
|
1760
|
+
|
1761
|
+
##
|
1762
|
+
# Add a column to the column family to expose in the table schema
|
1763
|
+
# that is specified as the `BYTES` type.
|
1764
|
+
#
|
1765
|
+
# @param [String] qualifier Qualifier of the column. See
|
1766
|
+
# {BigtableSource::Column#qualifier}.
|
1767
|
+
# @param [String] as A valid identifier to be used as the column
|
1768
|
+
# field name if the qualifier is not a valid BigQuery field
|
1769
|
+
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
1770
|
+
# {BigtableSource::Column#field_name}.
|
1771
|
+
#
|
1772
|
+
# @yield [column] a block for setting the column
|
1773
|
+
# @yieldparam [BigtableSource::Column] column the column object
|
1774
|
+
#
|
1775
|
+
# @return [Array<BigtableSource::Column>]
|
1776
|
+
#
|
1777
|
+
# @example
|
1778
|
+
# require "google/cloud/bigquery"
|
1779
|
+
#
|
1780
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1781
|
+
#
|
1782
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1783
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1784
|
+
# bt.rowkey_as_string = true
|
1785
|
+
# bt.add_family "user" do |u|
|
1786
|
+
# u.add_bytes "avatar"
|
1787
|
+
# end
|
1788
|
+
# end
|
1789
|
+
#
|
1790
|
+
def add_bytes qualifier, as: nil
|
1791
|
+
col = add_column qualifier, as: as, type: "BYTES"
|
1792
|
+
yield col if block_given?
|
1793
|
+
col
|
1794
|
+
end
|
1795
|
+
|
1796
|
+
##
|
1797
|
+
# Add a column to the column family to expose in the table schema
|
1798
|
+
# that is specified as the `STRING` type.
|
1799
|
+
#
|
1800
|
+
# @param [String] qualifier Qualifier of the column. See
|
1801
|
+
# {BigtableSource::Column#qualifier}.
|
1802
|
+
# @param [String] as A valid identifier to be used as the column
|
1803
|
+
# field name if the qualifier is not a valid BigQuery field
|
1804
|
+
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
1805
|
+
# {BigtableSource::Column#field_name}.
|
1806
|
+
#
|
1807
|
+
# @yield [column] a block for setting the column
|
1808
|
+
# @yieldparam [BigtableSource::Column] column the column object
|
1809
|
+
#
|
1810
|
+
# @return [Array<BigtableSource::Column>]
|
1811
|
+
#
|
1812
|
+
# @example
|
1813
|
+
# require "google/cloud/bigquery"
|
1814
|
+
#
|
1815
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1816
|
+
#
|
1817
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1818
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1819
|
+
# bt.rowkey_as_string = true
|
1820
|
+
# bt.add_family "user" do |u|
|
1821
|
+
# u.add_string "name"
|
1822
|
+
# end
|
1823
|
+
# end
|
1824
|
+
#
|
1825
|
+
def add_string qualifier, as: nil
|
1826
|
+
col = add_column qualifier, as: as, type: "STRING"
|
1827
|
+
yield col if block_given?
|
1828
|
+
col
|
1829
|
+
end
|
1830
|
+
|
1831
|
+
##
|
1832
|
+
# Add a column to the column family to expose in the table schema
|
1833
|
+
# that is specified as the `INTEGER` type.
|
1834
|
+
#
|
1835
|
+
# @param [String] qualifier Qualifier of the column. See
|
1836
|
+
# {BigtableSource::Column#qualifier}.
|
1837
|
+
# @param [String] as A valid identifier to be used as the column
|
1838
|
+
# field name if the qualifier is not a valid BigQuery field
|
1839
|
+
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
1840
|
+
# {BigtableSource::Column#field_name}.
|
1841
|
+
#
|
1842
|
+
# @yield [column] a block for setting the column
|
1843
|
+
# @yieldparam [BigtableSource::Column] column the column object
|
1844
|
+
#
|
1845
|
+
# @return [Array<BigtableSource::Column>]
|
1846
|
+
#
|
1847
|
+
# @example
|
1848
|
+
# require "google/cloud/bigquery"
|
1849
|
+
#
|
1850
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1851
|
+
#
|
1852
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1853
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1854
|
+
# bt.rowkey_as_string = true
|
1855
|
+
# bt.add_family "user" do |u|
|
1856
|
+
# u.add_integer "age"
|
1857
|
+
# end
|
1858
|
+
# end
|
1859
|
+
#
|
1860
|
+
def add_integer qualifier, as: nil
|
1861
|
+
col = add_column qualifier, as: as, type: "INTEGER"
|
1862
|
+
yield col if block_given?
|
1863
|
+
col
|
1864
|
+
end
|
1865
|
+
|
1866
|
+
##
|
1867
|
+
# Add a column to the column family to expose in the table schema
|
1868
|
+
# that is specified as the `FLOAT` type.
|
1869
|
+
#
|
1870
|
+
# @param [String] qualifier Qualifier of the column. See
|
1871
|
+
# {BigtableSource::Column#qualifier}.
|
1872
|
+
# @param [String] as A valid identifier to be used as the column
|
1873
|
+
# field name if the qualifier is not a valid BigQuery field
|
1874
|
+
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
1875
|
+
# {BigtableSource::Column#field_name}.
|
1876
|
+
#
|
1877
|
+
# @yield [column] a block for setting the column
|
1878
|
+
# @yieldparam [BigtableSource::Column] column the column object
|
1879
|
+
#
|
1880
|
+
# @return [Array<BigtableSource::Column>]
|
1881
|
+
#
|
1882
|
+
# @example
|
1883
|
+
# require "google/cloud/bigquery"
|
1884
|
+
#
|
1885
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1886
|
+
#
|
1887
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1888
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1889
|
+
# bt.rowkey_as_string = true
|
1890
|
+
# bt.add_family "user" do |u|
|
1891
|
+
# u.add_float "score"
|
1892
|
+
# end
|
1893
|
+
# end
|
1894
|
+
#
|
1895
|
+
def add_float qualifier, as: nil
|
1896
|
+
col = add_column qualifier, as: as, type: "FLOAT"
|
1897
|
+
yield col if block_given?
|
1898
|
+
col
|
1899
|
+
end
|
1900
|
+
|
1901
|
+
##
|
1902
|
+
# Add a column to the column family to expose in the table schema
|
1903
|
+
# that is specified as the `BOOLEAN` type.
|
1904
|
+
#
|
1905
|
+
# @param [String] qualifier Qualifier of the column. See
|
1906
|
+
# {BigtableSource::Column#qualifier}.
|
1907
|
+
# @param [String] as A valid identifier to be used as the column
|
1908
|
+
# field name if the qualifier is not a valid BigQuery field
|
1909
|
+
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
1910
|
+
# {BigtableSource::Column#field_name}.
|
1911
|
+
#
|
1912
|
+
# @yield [column] a block for setting the column
|
1913
|
+
# @yieldparam [BigtableSource::Column] column the column object
|
1914
|
+
#
|
1915
|
+
# @return [Array<BigtableSource::Column>]
|
1916
|
+
#
|
1917
|
+
# @example
|
1918
|
+
# require "google/cloud/bigquery"
|
1919
|
+
#
|
1920
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1921
|
+
#
|
1922
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1923
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1924
|
+
# bt.rowkey_as_string = true
|
1925
|
+
# bt.add_family "user" do |u|
|
1926
|
+
# u.add_boolean "active"
|
1927
|
+
# end
|
1928
|
+
# end
|
1929
|
+
#
|
1930
|
+
def add_boolean qualifier, as: nil
|
1931
|
+
col = add_column qualifier, as: as, type: "BOOLEAN"
|
1932
|
+
yield col if block_given?
|
1933
|
+
col
|
1934
|
+
end
|
1935
|
+
|
1936
|
+
##
|
1937
|
+
# @private Google API Client object.
|
1938
|
+
def to_gapi
|
1939
|
+
@gapi.columns = @columns.map(&:to_gapi)
|
1940
|
+
@gapi
|
1941
|
+
end
|
1942
|
+
|
1943
|
+
##
|
1944
|
+
# @private Google API Client object.
|
1945
|
+
def self.from_gapi gapi
|
1946
|
+
new_fam = new
|
1947
|
+
new_fam.instance_variable_set :@gapi, gapi
|
1948
|
+
columns = Array(gapi.columns).map do |col_gapi|
|
1949
|
+
BigtableSource::Column.from_gapi col_gapi
|
1950
|
+
end
|
1951
|
+
new_fam.instance_variable_set :@columns, columns
|
1952
|
+
new_fam
|
1953
|
+
end
|
1954
|
+
|
1955
|
+
##
|
1956
|
+
# @private
|
1957
|
+
def freeze
|
1958
|
+
@columns.map(&:freeze!)
|
1959
|
+
@columns.freeze!
|
1960
|
+
super
|
1961
|
+
end
|
1962
|
+
|
1963
|
+
protected
|
1964
|
+
|
1965
|
+
def frozen_check!
|
1966
|
+
return unless frozen?
|
1967
|
+
fail ArgumentError,
|
1968
|
+
"Cannot modify external data source when frozen"
|
1969
|
+
end
|
1970
|
+
end
|
1971
|
+
|
1972
|
+
##
|
1973
|
+
# # BigtableSource::Column
|
1974
|
+
#
|
1975
|
+
# A Bigtable column to expose in the table schema along with its
|
1976
|
+
# types.
|
1977
|
+
#
|
1978
|
+
# @example
|
1979
|
+
# require "google/cloud/bigquery"
|
1980
|
+
#
|
1981
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1982
|
+
#
|
1983
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
1984
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
1985
|
+
# bt.rowkey_as_string = true
|
1986
|
+
# bt.add_family "user" do |u|
|
1987
|
+
# u.add_string "name"
|
1988
|
+
# u.add_string "email"
|
1989
|
+
# u.add_integer "age"
|
1990
|
+
# u.add_boolean "active"
|
1991
|
+
# end
|
1992
|
+
# end
|
1993
|
+
#
|
1994
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
1995
|
+
# external: { my_ext_table: bigtable_table }
|
1996
|
+
#
|
1997
|
+
# data.each do |row|
|
1998
|
+
# puts row[:name]
|
1999
|
+
# end
|
2000
|
+
#
|
2001
|
+
class Column
|
2002
|
+
##
|
2003
|
+
# @private Create an empty BigtableSource::Column object.
|
2004
|
+
def initialize
|
2005
|
+
@gapi = Google::Apis::BigqueryV2::BigtableColumn.new
|
2006
|
+
end
|
2007
|
+
|
2008
|
+
##
|
2009
|
+
# Qualifier of the column. Columns in the parent column family that
|
2010
|
+
# has this exact qualifier are exposed as `.` field. If the
|
2011
|
+
# qualifier is valid UTF-8 string, it will be represented as a UTF-8
|
2012
|
+
# string. Otherwise, it will represented as a ASCII-8BIT string.
|
2013
|
+
#
|
2014
|
+
# If the qualifier is not a valid BigQuery field identifier (does
|
2015
|
+
# not match `[a-zA-Z][a-zA-Z0-9_]*`) a valid identifier must be
|
2016
|
+
# provided as `field_name`.
|
2017
|
+
#
|
2018
|
+
# @return [String]
|
2019
|
+
#
|
2020
|
+
# @example
|
2021
|
+
# require "google/cloud/bigquery"
|
2022
|
+
#
|
2023
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2024
|
+
#
|
2025
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
2026
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
2027
|
+
# bt.add_family "user" do |u|
|
2028
|
+
# u.add_string "name" do |col|
|
2029
|
+
# col.qualifier # "user"
|
2030
|
+
# col.qualifier = "User"
|
2031
|
+
# col.qualifier # "User"
|
2032
|
+
# end
|
2033
|
+
# end
|
2034
|
+
# end
|
2035
|
+
#
|
2036
|
+
def qualifier
|
2037
|
+
@gapi.qualifier_string || \
|
2038
|
+
Base64.strict_decode64(@gapi.qualifier_encoded.to_s)
|
2039
|
+
end
|
2040
|
+
|
2041
|
+
##
|
2042
|
+
# Set the qualifier of the column. Columns in the parent column
|
2043
|
+
# family that has this exact qualifier are exposed as `.` field.
|
2044
|
+
# Values that are valid UTF-8 strings will be treated as such. All
|
2045
|
+
# other values will be treated as `BINARY`.
|
2046
|
+
#
|
2047
|
+
# @param [String] new_qualifier New qualifier value
|
2048
|
+
#
|
2049
|
+
# @example
|
2050
|
+
# require "google/cloud/bigquery"
|
2051
|
+
#
|
2052
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2053
|
+
#
|
2054
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
2055
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
2056
|
+
# bt.add_family "user" do |u|
|
2057
|
+
# u.add_string "name" do |col|
|
2058
|
+
# col.qualifier # "user"
|
2059
|
+
# col.qualifier = "User"
|
2060
|
+
# col.qualifier # "User"
|
2061
|
+
# end
|
2062
|
+
# end
|
2063
|
+
# end
|
2064
|
+
#
|
2065
|
+
def qualifier= new_qualifier
|
2066
|
+
frozen_check!
|
2067
|
+
fail ArgumentError if new_qualifier.nil?
|
2068
|
+
|
2069
|
+
utf8_qualifier = new_qualifier.encode Encoding::UTF_8
|
2070
|
+
if utf8_qualifier.valid_encoding?
|
2071
|
+
@gapi.qualifier_string = utf8_qualifier
|
2072
|
+
if @gapi.instance_variables.include? :@qualifier_encoded
|
2073
|
+
@gapi.remove_instance_variable :@qualifier_encoded
|
2074
|
+
end
|
2075
|
+
else
|
2076
|
+
@gapi.qualifier_encoded = Base64.strict_encode64 new_qualifier
|
2077
|
+
if @gapi.instance_variables.include? :@qualifier_string
|
2078
|
+
@gapi.remove_instance_variable :@qualifier_string
|
2079
|
+
end
|
2080
|
+
end
|
2081
|
+
rescue EncodingError
|
2082
|
+
@gapi.qualifier_encoded = Base64.strict_encode64 new_qualifier
|
2083
|
+
if @gapi.instance_variables.include? :@qualifier_string
|
2084
|
+
@gapi.remove_instance_variable :@qualifier_string
|
2085
|
+
end
|
2086
|
+
end
|
2087
|
+
|
2088
|
+
##
|
2089
|
+
# The encoding of the values when the type is not `STRING`.
|
2090
|
+
#
|
2091
|
+
# @return [String]
|
2092
|
+
#
|
2093
|
+
# @example
|
2094
|
+
# require "google/cloud/bigquery"
|
2095
|
+
#
|
2096
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2097
|
+
#
|
2098
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
2099
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
2100
|
+
# bt.add_family "user" do |u|
|
2101
|
+
# u.add_bytes "name" do |col|
|
2102
|
+
# col.encoding = "TEXT"
|
2103
|
+
# col.encoding # "TEXT"
|
2104
|
+
# end
|
2105
|
+
# end
|
2106
|
+
# end
|
2107
|
+
#
|
2108
|
+
def encoding
|
2109
|
+
@gapi.encoding
|
2110
|
+
end
|
2111
|
+
|
2112
|
+
##
|
2113
|
+
# Set the encoding of the values when the type is not `STRING`.
|
2114
|
+
# Acceptable encoding values are:
|
2115
|
+
#
|
2116
|
+
# * `TEXT` - indicates values are alphanumeric text strings.
|
2117
|
+
# * `BINARY` - indicates values are encoded using HBase
|
2118
|
+
# `Bytes.toBytes` family of functions. This can be overridden on a
|
2119
|
+
# column.
|
2120
|
+
#
|
2121
|
+
# @param [String] new_encoding New encoding value
|
2122
|
+
#
|
2123
|
+
# @example
|
2124
|
+
# require "google/cloud/bigquery"
|
2125
|
+
#
|
2126
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2127
|
+
#
|
2128
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
2129
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
2130
|
+
# bt.add_family "user" do |u|
|
2131
|
+
# u.add_bytes "name" do |col|
|
2132
|
+
# col.encoding = "TEXT"
|
2133
|
+
# col.encoding # "TEXT"
|
2134
|
+
# end
|
2135
|
+
# end
|
2136
|
+
# end
|
2137
|
+
#
|
2138
|
+
def encoding= new_encoding
|
2139
|
+
frozen_check!
|
2140
|
+
@gapi.encoding = new_encoding
|
2141
|
+
end
|
2142
|
+
|
2143
|
+
##
|
2144
|
+
# If the qualifier is not a valid BigQuery field identifier (does
|
2145
|
+
# not match `[a-zA-Z][a-zA-Z0-9_]*`) a valid identifier must be
|
2146
|
+
# provided as the column field name and is used as field name in
|
2147
|
+
# queries.
|
2148
|
+
#
|
2149
|
+
# @return [String]
|
2150
|
+
#
|
2151
|
+
# @example
|
2152
|
+
# require "google/cloud/bigquery"
|
2153
|
+
#
|
2154
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2155
|
+
#
|
2156
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
2157
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
2158
|
+
# bt.add_family "user" do |u|
|
2159
|
+
# u.add_string "001_name", as: "user" do |col|
|
2160
|
+
# col.field_name # "user"
|
2161
|
+
# col.field_name = "User"
|
2162
|
+
# col.field_name # "User"
|
2163
|
+
# end
|
2164
|
+
# end
|
2165
|
+
# end
|
2166
|
+
#
|
2167
|
+
def field_name
|
2168
|
+
@gapi.field_name
|
2169
|
+
end
|
2170
|
+
|
2171
|
+
##
|
2172
|
+
# Sets the identifier to be used as the column field name in queries
|
2173
|
+
# when the qualifier is not a valid BigQuery field identifier (does
|
2174
|
+
# not match `[a-zA-Z][a-zA-Z0-9_]*`).
|
2175
|
+
#
|
2176
|
+
# @param [String] new_field_name New field_name value
|
2177
|
+
#
|
2178
|
+
# @example
|
2179
|
+
# require "google/cloud/bigquery"
|
2180
|
+
#
|
2181
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2182
|
+
#
|
2183
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
2184
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
2185
|
+
# bt.add_family "user" do |u|
|
2186
|
+
# u.add_string "001_name", as: "user" do |col|
|
2187
|
+
# col.field_name # "user"
|
2188
|
+
# col.field_name = "User"
|
2189
|
+
# col.field_name # "User"
|
2190
|
+
# end
|
2191
|
+
# end
|
2192
|
+
# end
|
2193
|
+
#
|
2194
|
+
def field_name= new_field_name
|
2195
|
+
frozen_check!
|
2196
|
+
@gapi.field_name = new_field_name
|
2197
|
+
end
|
2198
|
+
|
2199
|
+
##
|
2200
|
+
# Whether only the latest version of value in this column are
|
2201
|
+
# exposed. Can also be set at the column family level. However, this
|
2202
|
+
# value takes precedence when set at both levels.
|
2203
|
+
#
|
2204
|
+
# @return [Boolean]
|
2205
|
+
#
|
2206
|
+
# @example
|
2207
|
+
# require "google/cloud/bigquery"
|
2208
|
+
#
|
2209
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2210
|
+
#
|
2211
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
2212
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
2213
|
+
# bt.add_family "user" do |u|
|
2214
|
+
# u.add_string "name" do |col|
|
2215
|
+
# col.latest = true
|
2216
|
+
# col.latest # true
|
2217
|
+
# end
|
2218
|
+
# end
|
2219
|
+
# end
|
2220
|
+
#
|
2221
|
+
def latest
|
2222
|
+
@gapi.only_read_latest
|
2223
|
+
end
|
2224
|
+
|
2225
|
+
##
|
2226
|
+
# Set whether only the latest version of value in this column are
|
2227
|
+
# exposed. Can also be set at the column family level. However, this
|
2228
|
+
# value takes precedence when set at both levels.
|
2229
|
+
#
|
2230
|
+
# @param [Boolean] new_latest New latest value
|
2231
|
+
#
|
2232
|
+
# @example
|
2233
|
+
# require "google/cloud/bigquery"
|
2234
|
+
#
|
2235
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2236
|
+
#
|
2237
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
2238
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
2239
|
+
# bt.add_family "user" do |u|
|
2240
|
+
# u.add_string "name" do |col|
|
2241
|
+
# col.latest = true
|
2242
|
+
# col.latest # true
|
2243
|
+
# end
|
2244
|
+
# end
|
2245
|
+
# end
|
2246
|
+
#
|
2247
|
+
def latest= new_latest
|
2248
|
+
frozen_check!
|
2249
|
+
@gapi.only_read_latest = new_latest
|
2250
|
+
end
|
2251
|
+
|
2252
|
+
##
|
2253
|
+
# The type to convert the value in cells of this column. The values
|
2254
|
+
# are expected to be encoded using HBase `Bytes.toBytes` function
|
2255
|
+
# when using the `BINARY` encoding value. The following BigQuery
|
2256
|
+
# types are allowed:
|
2257
|
+
#
|
2258
|
+
# * `BYTES`
|
2259
|
+
# * `STRING`
|
2260
|
+
# * `INTEGER`
|
2261
|
+
# * `FLOAT`
|
2262
|
+
# * `BOOLEAN`
|
2263
|
+
#
|
2264
|
+
# Default type is `BYTES`. Can also be set at the column family
|
2265
|
+
# level. However, this value takes precedence when set at both
|
2266
|
+
# levels.
|
2267
|
+
#
|
2268
|
+
# @return [String]
|
2269
|
+
#
|
2270
|
+
# @example
|
2271
|
+
# require "google/cloud/bigquery"
|
2272
|
+
#
|
2273
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2274
|
+
#
|
2275
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
2276
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
2277
|
+
# bt.add_family "user" do |u|
|
2278
|
+
# u.add_string "name" do |col|
|
2279
|
+
# col.type # "STRING"
|
2280
|
+
# end
|
2281
|
+
# end
|
2282
|
+
# end
|
2283
|
+
#
|
2284
|
+
def type
|
2285
|
+
@gapi.type
|
2286
|
+
end
|
2287
|
+
|
2288
|
+
##
|
2289
|
+
# Set the type to convert the value in cells of this column. The
|
2290
|
+
# values are expected to be encoded using HBase `Bytes.toBytes`
|
2291
|
+
# function when using the `BINARY` encoding value. The following
|
2292
|
+
# BigQuery types are allowed:
|
2293
|
+
#
|
2294
|
+
# * `BYTES`
|
2295
|
+
# * `STRING`
|
2296
|
+
# * `INTEGER`
|
2297
|
+
# * `FLOAT`
|
2298
|
+
# * `BOOLEAN`
|
2299
|
+
#
|
2300
|
+
# Default type is `BYTES`. Can also be set at the column family
|
2301
|
+
# level. However, this value takes precedence when set at both
|
2302
|
+
# levels.
|
2303
|
+
#
|
2304
|
+
# @param [String] new_type New type value
|
2305
|
+
#
|
2306
|
+
# @example
|
2307
|
+
# require "google/cloud/bigquery"
|
2308
|
+
#
|
2309
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2310
|
+
#
|
2311
|
+
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
2312
|
+
# bigtable_table = bigquery.external bigtable_url do |bt|
|
2313
|
+
# bt.add_family "user" do |u|
|
2314
|
+
# u.add_string "name" do |col|
|
2315
|
+
# col.type # "STRING"
|
2316
|
+
# col.type = "BYTES"
|
2317
|
+
# col.type # "BYTES"
|
2318
|
+
# end
|
2319
|
+
# end
|
2320
|
+
# end
|
2321
|
+
#
|
2322
|
+
def type= new_type
|
2323
|
+
frozen_check!
|
2324
|
+
@gapi.type = new_type
|
2325
|
+
end
|
2326
|
+
|
2327
|
+
##
|
2328
|
+
# @private Google API Client object.
|
2329
|
+
def to_gapi
|
2330
|
+
@gapi
|
2331
|
+
end
|
2332
|
+
|
2333
|
+
##
|
2334
|
+
# @private Google API Client object.
|
2335
|
+
def self.from_gapi gapi
|
2336
|
+
new_col = new
|
2337
|
+
new_col.instance_variable_set :@gapi, gapi
|
2338
|
+
new_col
|
2339
|
+
end
|
2340
|
+
|
2341
|
+
protected
|
2342
|
+
|
2343
|
+
def frozen_check!
|
2344
|
+
return unless frozen?
|
2345
|
+
fail ArgumentError,
|
2346
|
+
"Cannot modify external data source when frozen"
|
2347
|
+
end
|
2348
|
+
end
|
2349
|
+
end
|
2350
|
+
end
|
2351
|
+
end
|
2352
|
+
end
|
2353
|
+
end
|