google-cloud-bigquery 1.31.0 → 1.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/lib/google/cloud/bigquery/external.rb +9 -2619
- data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
- data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
- data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
- data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
- data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
- data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
- data/lib/google/cloud/bigquery/load_job.rb +103 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +23 -9
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
# Copyright 2021 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
require "google/apis/bigquery_v2"
|
|
17
|
+
|
|
18
|
+
module Google
|
|
19
|
+
module Cloud
|
|
20
|
+
module Bigquery
|
|
21
|
+
module External
|
|
22
|
+
##
|
|
23
|
+
# # CsvSource
|
|
24
|
+
#
|
|
25
|
+
# {External::CsvSource} is a subclass of {External::DataSource} and
|
|
26
|
+
# represents a CSV external data source that can be queried from
|
|
27
|
+
# directly, such as Google Cloud Storage or Google Drive, even though
|
|
28
|
+
# the data is not stored in BigQuery. Instead of loading or streaming
|
|
29
|
+
# the data, this object references the external data source.
|
|
30
|
+
#
|
|
31
|
+
# @example
|
|
32
|
+
# require "google/cloud/bigquery"
|
|
33
|
+
#
|
|
34
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
35
|
+
#
|
|
36
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
|
37
|
+
# csv_table = bigquery.external csv_url do |csv|
|
|
38
|
+
# csv.autodetect = true
|
|
39
|
+
# csv.skip_leading_rows = 1
|
|
40
|
+
# end
|
|
41
|
+
#
|
|
42
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
|
43
|
+
# external: { my_ext_table: csv_table }
|
|
44
|
+
#
|
|
45
|
+
# # Iterate over the first page of results
|
|
46
|
+
# data.each do |row|
|
|
47
|
+
# puts row[:name]
|
|
48
|
+
# end
|
|
49
|
+
# # Retrieve the next page of results
|
|
50
|
+
# data = data.next if data.next?
|
|
51
|
+
#
|
|
52
|
+
class CsvSource < External::DataSource
|
|
53
|
+
##
|
|
54
|
+
# @private Create an empty CsvSource object.
|
|
55
|
+
def initialize
|
|
56
|
+
super
|
|
57
|
+
@gapi.csv_options = Google::Apis::BigqueryV2::CsvOptions.new
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
##
|
|
61
|
+
# Indicates if BigQuery should accept rows that are missing trailing
|
|
62
|
+
# optional columns.
|
|
63
|
+
#
|
|
64
|
+
# @return [Boolean]
|
|
65
|
+
#
|
|
66
|
+
# @example
|
|
67
|
+
# require "google/cloud/bigquery"
|
|
68
|
+
#
|
|
69
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
70
|
+
#
|
|
71
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
|
72
|
+
# csv_table = bigquery.external csv_url do |csv|
|
|
73
|
+
# csv.jagged_rows = true
|
|
74
|
+
# end
|
|
75
|
+
#
|
|
76
|
+
# csv_table.jagged_rows #=> true
|
|
77
|
+
#
|
|
78
|
+
def jagged_rows
|
|
79
|
+
@gapi.csv_options.allow_jagged_rows
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
##
|
|
83
|
+
# Set whether BigQuery should accept rows that are missing trailing
|
|
84
|
+
# optional columns.
|
|
85
|
+
#
|
|
86
|
+
# @param [Boolean] new_jagged_rows New jagged_rows value
|
|
87
|
+
#
|
|
88
|
+
# @example
|
|
89
|
+
# require "google/cloud/bigquery"
|
|
90
|
+
#
|
|
91
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
92
|
+
#
|
|
93
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
|
94
|
+
# csv_table = bigquery.external csv_url do |csv|
|
|
95
|
+
# csv.jagged_rows = true
|
|
96
|
+
# end
|
|
97
|
+
#
|
|
98
|
+
# csv_table.jagged_rows #=> true
|
|
99
|
+
#
|
|
100
|
+
def jagged_rows= new_jagged_rows
|
|
101
|
+
frozen_check!
|
|
102
|
+
@gapi.csv_options.allow_jagged_rows = new_jagged_rows
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
##
|
|
106
|
+
# Indicates if BigQuery should allow quoted data sections that contain
|
|
107
|
+
# newline characters in a CSV file.
|
|
108
|
+
#
|
|
109
|
+
# @return [Boolean]
|
|
110
|
+
#
|
|
111
|
+
# @example
|
|
112
|
+
# require "google/cloud/bigquery"
|
|
113
|
+
#
|
|
114
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
115
|
+
#
|
|
116
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
|
117
|
+
# csv_table = bigquery.external csv_url do |csv|
|
|
118
|
+
# csv.quoted_newlines = true
|
|
119
|
+
# end
|
|
120
|
+
#
|
|
121
|
+
# csv_table.quoted_newlines #=> true
|
|
122
|
+
#
|
|
123
|
+
def quoted_newlines
|
|
124
|
+
@gapi.csv_options.allow_quoted_newlines
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
##
|
|
128
|
+
# Set whether BigQuery should allow quoted data sections that contain
|
|
129
|
+
# newline characters in a CSV file.
|
|
130
|
+
#
|
|
131
|
+
# @param [Boolean] new_quoted_newlines New quoted_newlines value
|
|
132
|
+
#
|
|
133
|
+
# @example
|
|
134
|
+
# require "google/cloud/bigquery"
|
|
135
|
+
#
|
|
136
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
137
|
+
#
|
|
138
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
|
139
|
+
# csv_table = bigquery.external csv_url do |csv|
|
|
140
|
+
# csv.quoted_newlines = true
|
|
141
|
+
# end
|
|
142
|
+
#
|
|
143
|
+
# csv_table.quoted_newlines #=> true
|
|
144
|
+
#
|
|
145
|
+
def quoted_newlines= new_quoted_newlines
|
|
146
|
+
frozen_check!
|
|
147
|
+
@gapi.csv_options.allow_quoted_newlines = new_quoted_newlines
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
##
|
|
151
|
+
# The character encoding of the data.
|
|
152
|
+
#
|
|
153
|
+
# @return [String]
|
|
154
|
+
#
|
|
155
|
+
# @example
|
|
156
|
+
# require "google/cloud/bigquery"
|
|
157
|
+
#
|
|
158
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
159
|
+
#
|
|
160
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
|
161
|
+
# csv_table = bigquery.external csv_url do |csv|
|
|
162
|
+
# csv.encoding = "UTF-8"
|
|
163
|
+
# end
|
|
164
|
+
#
|
|
165
|
+
# csv_table.encoding #=> "UTF-8"
|
|
166
|
+
#
|
|
167
|
+
def encoding
|
|
168
|
+
@gapi.csv_options.encoding
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
##
|
|
172
|
+
# Set the character encoding of the data.
|
|
173
|
+
#
|
|
174
|
+
# @param [String] new_encoding New encoding value
|
|
175
|
+
#
|
|
176
|
+
# @example
|
|
177
|
+
# require "google/cloud/bigquery"
|
|
178
|
+
#
|
|
179
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
180
|
+
#
|
|
181
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
|
182
|
+
# csv_table = bigquery.external csv_url do |csv|
|
|
183
|
+
# csv.encoding = "UTF-8"
|
|
184
|
+
# end
|
|
185
|
+
#
|
|
186
|
+
# csv_table.encoding #=> "UTF-8"
|
|
187
|
+
#
|
|
188
|
+
def encoding= new_encoding
|
|
189
|
+
frozen_check!
|
|
190
|
+
@gapi.csv_options.encoding = new_encoding
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
##
|
|
194
|
+
# Checks if the character encoding of the data is "UTF-8". This is the
|
|
195
|
+
# default.
|
|
196
|
+
#
|
|
197
|
+
# @return [Boolean]
|
|
198
|
+
#
|
|
199
|
+
# @example
|
|
200
|
+
# require "google/cloud/bigquery"
|
|
201
|
+
#
|
|
202
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
203
|
+
#
|
|
204
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
|
205
|
+
# csv_table = bigquery.external csv_url do |csv|
|
|
206
|
+
# csv.encoding = "UTF-8"
|
|
207
|
+
# end
|
|
208
|
+
#
|
|
209
|
+
# csv_table.encoding #=> "UTF-8"
|
|
210
|
+
# csv_table.utf8? #=> true
|
|
211
|
+
#
|
|
212
|
+
def utf8?
|
|
213
|
+
return true if encoding.nil?
|
|
214
|
+
encoding == "UTF-8"
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
##
|
|
218
|
+
# Checks if the character encoding of the data is "ISO-8859-1".
|
|
219
|
+
#
|
|
220
|
+
# @return [Boolean]
|
|
221
|
+
#
|
|
222
|
+
# @example
|
|
223
|
+
# require "google/cloud/bigquery"
|
|
224
|
+
#
|
|
225
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
226
|
+
#
|
|
227
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
|
228
|
+
# csv_table = bigquery.external csv_url do |csv|
|
|
229
|
+
# csv.encoding = "ISO-8859-1"
|
|
230
|
+
# end
|
|
231
|
+
#
|
|
232
|
+
# csv_table.encoding #=> "ISO-8859-1"
|
|
233
|
+
# csv_table.iso8859_1? #=> true
|
|
234
|
+
#
|
|
235
|
+
def iso8859_1?
|
|
236
|
+
encoding == "ISO-8859-1"
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
##
|
|
240
|
+
# The separator for fields in a CSV file.
|
|
241
|
+
#
|
|
242
|
+
# @return [String]
|
|
243
|
+
#
|
|
244
|
+
# @example
|
|
245
|
+
# require "google/cloud/bigquery"
|
|
246
|
+
#
|
|
247
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
248
|
+
#
|
|
249
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
|
250
|
+
# csv_table = bigquery.external csv_url do |csv|
|
|
251
|
+
# csv.delimiter = "|"
|
|
252
|
+
# end
|
|
253
|
+
#
|
|
254
|
+
# csv_table.delimiter #=> "|"
|
|
255
|
+
#
|
|
256
|
+
def delimiter
|
|
257
|
+
@gapi.csv_options.field_delimiter
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
##
|
|
261
|
+
# Set the separator for fields in a CSV file.
|
|
262
|
+
#
|
|
263
|
+
# @param [String] new_delimiter New delimiter value
|
|
264
|
+
#
|
|
265
|
+
# @example
|
|
266
|
+
# require "google/cloud/bigquery"
|
|
267
|
+
#
|
|
268
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
269
|
+
#
|
|
270
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
|
271
|
+
# csv_table = bigquery.external csv_url do |csv|
|
|
272
|
+
# csv.delimiter = "|"
|
|
273
|
+
# end
|
|
274
|
+
#
|
|
275
|
+
# csv_table.delimiter #=> "|"
|
|
276
|
+
#
|
|
277
|
+
def delimiter= new_delimiter
|
|
278
|
+
frozen_check!
|
|
279
|
+
@gapi.csv_options.field_delimiter = new_delimiter
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
##
|
|
283
|
+
# The value that is used to quote data sections in a CSV file.
|
|
284
|
+
#
|
|
285
|
+
# @return [String]
|
|
286
|
+
#
|
|
287
|
+
# @example
|
|
288
|
+
# require "google/cloud/bigquery"
|
|
289
|
+
#
|
|
290
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
291
|
+
#
|
|
292
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
|
293
|
+
# csv_table = bigquery.external csv_url do |csv|
|
|
294
|
+
# csv.quote = "'"
|
|
295
|
+
# end
|
|
296
|
+
#
|
|
297
|
+
# csv_table.quote #=> "'"
|
|
298
|
+
#
|
|
299
|
+
def quote
|
|
300
|
+
@gapi.csv_options.quote
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
##
|
|
304
|
+
# Set the value that is used to quote data sections in a CSV file.
|
|
305
|
+
#
|
|
306
|
+
# @param [String] new_quote New quote value
|
|
307
|
+
#
|
|
308
|
+
# @example
|
|
309
|
+
# require "google/cloud/bigquery"
|
|
310
|
+
#
|
|
311
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
312
|
+
#
|
|
313
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
|
314
|
+
# csv_table = bigquery.external csv_url do |csv|
|
|
315
|
+
# csv.quote = "'"
|
|
316
|
+
# end
|
|
317
|
+
#
|
|
318
|
+
# csv_table.quote #=> "'"
|
|
319
|
+
#
|
|
320
|
+
def quote= new_quote
|
|
321
|
+
frozen_check!
|
|
322
|
+
@gapi.csv_options.quote = new_quote
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
##
|
|
326
|
+
# The number of rows at the top of a CSV file that BigQuery will skip
|
|
327
|
+
# when reading the data.
|
|
328
|
+
#
|
|
329
|
+
# @return [Integer]
|
|
330
|
+
#
|
|
331
|
+
# @example
|
|
332
|
+
# require "google/cloud/bigquery"
|
|
333
|
+
#
|
|
334
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
335
|
+
#
|
|
336
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
|
337
|
+
# csv_table = bigquery.external csv_url do |csv|
|
|
338
|
+
# csv.skip_leading_rows = 1
|
|
339
|
+
# end
|
|
340
|
+
#
|
|
341
|
+
# csv_table.skip_leading_rows #=> 1
|
|
342
|
+
#
|
|
343
|
+
def skip_leading_rows
|
|
344
|
+
@gapi.csv_options.skip_leading_rows
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
##
|
|
348
|
+
# Set the number of rows at the top of a CSV file that BigQuery will
|
|
349
|
+
# skip when reading the data.
|
|
350
|
+
#
|
|
351
|
+
# @param [Integer] row_count New skip_leading_rows value
|
|
352
|
+
#
|
|
353
|
+
# @example
|
|
354
|
+
# require "google/cloud/bigquery"
|
|
355
|
+
#
|
|
356
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
357
|
+
#
|
|
358
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
|
359
|
+
# csv_table = bigquery.external csv_url do |csv|
|
|
360
|
+
# csv.skip_leading_rows = 1
|
|
361
|
+
# end
|
|
362
|
+
#
|
|
363
|
+
# csv_table.skip_leading_rows #=> 1
|
|
364
|
+
#
|
|
365
|
+
def skip_leading_rows= row_count
|
|
366
|
+
frozen_check!
|
|
367
|
+
@gapi.csv_options.skip_leading_rows = row_count
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
##
|
|
371
|
+
# The schema for the data.
|
|
372
|
+
#
|
|
373
|
+
# @param [Boolean] replace Whether to replace the existing schema with
|
|
374
|
+
# the new schema. If `true`, the fields will replace the existing
|
|
375
|
+
# schema. If `false`, the fields will be added to the existing
|
|
376
|
+
# schema. The default value is `false`.
|
|
377
|
+
# @yield [schema] a block for setting the schema
|
|
378
|
+
# @yieldparam [Schema] schema the object accepting the schema
|
|
379
|
+
#
|
|
380
|
+
# @return [Google::Cloud::Bigquery::Schema]
|
|
381
|
+
#
|
|
382
|
+
# @example
|
|
383
|
+
# require "google/cloud/bigquery"
|
|
384
|
+
#
|
|
385
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
386
|
+
#
|
|
387
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
|
388
|
+
# csv_table = bigquery.external csv_url do |csv|
|
|
389
|
+
# csv.schema do |schema|
|
|
390
|
+
# schema.string "name", mode: :required
|
|
391
|
+
# schema.string "email", mode: :required
|
|
392
|
+
# schema.integer "age", mode: :required
|
|
393
|
+
# schema.boolean "active", mode: :required
|
|
394
|
+
# end
|
|
395
|
+
# end
|
|
396
|
+
#
|
|
397
|
+
def schema replace: false
|
|
398
|
+
@schema ||= Schema.from_gapi @gapi.schema
|
|
399
|
+
if replace
|
|
400
|
+
frozen_check!
|
|
401
|
+
@schema = Schema.from_gapi
|
|
402
|
+
end
|
|
403
|
+
@schema.freeze if frozen?
|
|
404
|
+
yield @schema if block_given?
|
|
405
|
+
@schema
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
##
|
|
409
|
+
# Set the schema for the data.
|
|
410
|
+
#
|
|
411
|
+
# @param [Schema] new_schema The schema object.
|
|
412
|
+
#
|
|
413
|
+
# @example
|
|
414
|
+
# require "google/cloud/bigquery"
|
|
415
|
+
#
|
|
416
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
417
|
+
#
|
|
418
|
+
# csv_shema = bigquery.schema do |schema|
|
|
419
|
+
# schema.string "name", mode: :required
|
|
420
|
+
# schema.string "email", mode: :required
|
|
421
|
+
# schema.integer "age", mode: :required
|
|
422
|
+
# schema.boolean "active", mode: :required
|
|
423
|
+
# end
|
|
424
|
+
#
|
|
425
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
|
426
|
+
# csv_table = bigquery.external csv_url
|
|
427
|
+
# csv_table.schema = csv_shema
|
|
428
|
+
#
|
|
429
|
+
def schema= new_schema
|
|
430
|
+
frozen_check!
|
|
431
|
+
@schema = new_schema
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
##
|
|
435
|
+
# The fields of the schema.
|
|
436
|
+
#
|
|
437
|
+
# @return [Array<Schema::Field>] An array of field objects.
|
|
438
|
+
#
|
|
439
|
+
def fields
|
|
440
|
+
schema.fields
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
##
|
|
444
|
+
# The names of the columns in the schema.
|
|
445
|
+
#
|
|
446
|
+
# @return [Array<Symbol>] An array of column names.
|
|
447
|
+
#
|
|
448
|
+
def headers
|
|
449
|
+
schema.headers
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
##
|
|
453
|
+
# The types of the fields in the data in the schema, using the same
|
|
454
|
+
# format as the optional query parameter types.
|
|
455
|
+
#
|
|
456
|
+
# @return [Hash] A hash with field names as keys, and types as values.
|
|
457
|
+
#
|
|
458
|
+
def param_types
|
|
459
|
+
schema.param_types
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
##
|
|
463
|
+
# @private Google API Client object.
|
|
464
|
+
def to_gapi
|
|
465
|
+
@gapi.schema = @schema.to_gapi if @schema
|
|
466
|
+
@gapi
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
##
|
|
470
|
+
# @private Google API Client object.
|
|
471
|
+
def self.from_gapi gapi
|
|
472
|
+
new_table = super
|
|
473
|
+
schema = Schema.from_gapi gapi.schema
|
|
474
|
+
new_table.instance_variable_set :@schema, schema
|
|
475
|
+
new_table
|
|
476
|
+
end
|
|
477
|
+
end
|
|
478
|
+
end
|
|
479
|
+
end
|
|
480
|
+
end
|
|
481
|
+
end
|