google-cloud-bigquery 1.27.0 → 1.32.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +58 -0
- data/CONTRIBUTING.md +3 -4
- data/LOGGING.md +1 -1
- data/OVERVIEW.md +15 -14
- data/lib/google/cloud/bigquery/convert.rb +72 -76
- data/lib/google/cloud/bigquery/copy_job.rb +1 -0
- data/lib/google/cloud/bigquery/data.rb +2 -2
- data/lib/google/cloud/bigquery/dataset.rb +181 -62
- data/lib/google/cloud/bigquery/dataset/access.rb +3 -3
- data/lib/google/cloud/bigquery/dataset/list.rb +2 -2
- data/lib/google/cloud/bigquery/external.rb +9 -2619
- data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
- data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
- data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
- data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
- data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
- data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
- data/lib/google/cloud/bigquery/extract_job.rb +4 -2
- data/lib/google/cloud/bigquery/job.rb +9 -3
- data/lib/google/cloud/bigquery/job/list.rb +4 -4
- data/lib/google/cloud/bigquery/load_job.rb +178 -19
- data/lib/google/cloud/bigquery/model/list.rb +2 -2
- data/lib/google/cloud/bigquery/policy.rb +2 -1
- data/lib/google/cloud/bigquery/project.rb +47 -43
- data/lib/google/cloud/bigquery/project/list.rb +2 -2
- data/lib/google/cloud/bigquery/query_job.rb +84 -62
- data/lib/google/cloud/bigquery/routine.rb +1 -4
- data/lib/google/cloud/bigquery/routine/list.rb +2 -2
- data/lib/google/cloud/bigquery/schema.rb +39 -3
- data/lib/google/cloud/bigquery/schema/field.rb +63 -13
- data/lib/google/cloud/bigquery/service.rb +11 -13
- data/lib/google/cloud/bigquery/standard_sql.rb +15 -3
- data/lib/google/cloud/bigquery/table.rb +312 -69
- data/lib/google/cloud/bigquery/table/async_inserter.rb +44 -17
- data/lib/google/cloud/bigquery/table/list.rb +2 -2
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +28 -14
@@ -0,0 +1,481 @@
|
|
1
|
+
# Copyright 2021 Google LLC
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/apis/bigquery_v2"
|
17
|
+
|
18
|
+
module Google
|
19
|
+
module Cloud
|
20
|
+
module Bigquery
|
21
|
+
module External
|
22
|
+
##
|
23
|
+
# # CsvSource
|
24
|
+
#
|
25
|
+
# {External::CsvSource} is a subclass of {External::DataSource} and
|
26
|
+
# represents a CSV external data source that can be queried from
|
27
|
+
# directly, such as Google Cloud Storage or Google Drive, even though
|
28
|
+
# the data is not stored in BigQuery. Instead of loading or streaming
|
29
|
+
# the data, this object references the external data source.
|
30
|
+
#
|
31
|
+
# @example
|
32
|
+
# require "google/cloud/bigquery"
|
33
|
+
#
|
34
|
+
# bigquery = Google::Cloud::Bigquery.new
|
35
|
+
#
|
36
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
37
|
+
# csv_table = bigquery.external csv_url do |csv|
|
38
|
+
# csv.autodetect = true
|
39
|
+
# csv.skip_leading_rows = 1
|
40
|
+
# end
|
41
|
+
#
|
42
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
43
|
+
# external: { my_ext_table: csv_table }
|
44
|
+
#
|
45
|
+
# # Iterate over the first page of results
|
46
|
+
# data.each do |row|
|
47
|
+
# puts row[:name]
|
48
|
+
# end
|
49
|
+
# # Retrieve the next page of results
|
50
|
+
# data = data.next if data.next?
|
51
|
+
#
|
52
|
+
class CsvSource < External::DataSource
|
53
|
+
##
|
54
|
+
# @private Create an empty CsvSource object.
|
55
|
+
def initialize
|
56
|
+
super
|
57
|
+
@gapi.csv_options = Google::Apis::BigqueryV2::CsvOptions.new
|
58
|
+
end
|
59
|
+
|
60
|
+
##
|
61
|
+
# Indicates if BigQuery should accept rows that are missing trailing
|
62
|
+
# optional columns.
|
63
|
+
#
|
64
|
+
# @return [Boolean]
|
65
|
+
#
|
66
|
+
# @example
|
67
|
+
# require "google/cloud/bigquery"
|
68
|
+
#
|
69
|
+
# bigquery = Google::Cloud::Bigquery.new
|
70
|
+
#
|
71
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
72
|
+
# csv_table = bigquery.external csv_url do |csv|
|
73
|
+
# csv.jagged_rows = true
|
74
|
+
# end
|
75
|
+
#
|
76
|
+
# csv_table.jagged_rows #=> true
|
77
|
+
#
|
78
|
+
def jagged_rows
|
79
|
+
@gapi.csv_options.allow_jagged_rows
|
80
|
+
end
|
81
|
+
|
82
|
+
##
|
83
|
+
# Set whether BigQuery should accept rows that are missing trailing
|
84
|
+
# optional columns.
|
85
|
+
#
|
86
|
+
# @param [Boolean] new_jagged_rows New jagged_rows value
|
87
|
+
#
|
88
|
+
# @example
|
89
|
+
# require "google/cloud/bigquery"
|
90
|
+
#
|
91
|
+
# bigquery = Google::Cloud::Bigquery.new
|
92
|
+
#
|
93
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
94
|
+
# csv_table = bigquery.external csv_url do |csv|
|
95
|
+
# csv.jagged_rows = true
|
96
|
+
# end
|
97
|
+
#
|
98
|
+
# csv_table.jagged_rows #=> true
|
99
|
+
#
|
100
|
+
def jagged_rows= new_jagged_rows
|
101
|
+
frozen_check!
|
102
|
+
@gapi.csv_options.allow_jagged_rows = new_jagged_rows
|
103
|
+
end
|
104
|
+
|
105
|
+
##
|
106
|
+
# Indicates if BigQuery should allow quoted data sections that contain
|
107
|
+
# newline characters in a CSV file.
|
108
|
+
#
|
109
|
+
# @return [Boolean]
|
110
|
+
#
|
111
|
+
# @example
|
112
|
+
# require "google/cloud/bigquery"
|
113
|
+
#
|
114
|
+
# bigquery = Google::Cloud::Bigquery.new
|
115
|
+
#
|
116
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
117
|
+
# csv_table = bigquery.external csv_url do |csv|
|
118
|
+
# csv.quoted_newlines = true
|
119
|
+
# end
|
120
|
+
#
|
121
|
+
# csv_table.quoted_newlines #=> true
|
122
|
+
#
|
123
|
+
def quoted_newlines
|
124
|
+
@gapi.csv_options.allow_quoted_newlines
|
125
|
+
end
|
126
|
+
|
127
|
+
##
|
128
|
+
# Set whether BigQuery should allow quoted data sections that contain
|
129
|
+
# newline characters in a CSV file.
|
130
|
+
#
|
131
|
+
# @param [Boolean] new_quoted_newlines New quoted_newlines value
|
132
|
+
#
|
133
|
+
# @example
|
134
|
+
# require "google/cloud/bigquery"
|
135
|
+
#
|
136
|
+
# bigquery = Google::Cloud::Bigquery.new
|
137
|
+
#
|
138
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
139
|
+
# csv_table = bigquery.external csv_url do |csv|
|
140
|
+
# csv.quoted_newlines = true
|
141
|
+
# end
|
142
|
+
#
|
143
|
+
# csv_table.quoted_newlines #=> true
|
144
|
+
#
|
145
|
+
def quoted_newlines= new_quoted_newlines
|
146
|
+
frozen_check!
|
147
|
+
@gapi.csv_options.allow_quoted_newlines = new_quoted_newlines
|
148
|
+
end
|
149
|
+
|
150
|
+
##
|
151
|
+
# The character encoding of the data.
|
152
|
+
#
|
153
|
+
# @return [String]
|
154
|
+
#
|
155
|
+
# @example
|
156
|
+
# require "google/cloud/bigquery"
|
157
|
+
#
|
158
|
+
# bigquery = Google::Cloud::Bigquery.new
|
159
|
+
#
|
160
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
161
|
+
# csv_table = bigquery.external csv_url do |csv|
|
162
|
+
# csv.encoding = "UTF-8"
|
163
|
+
# end
|
164
|
+
#
|
165
|
+
# csv_table.encoding #=> "UTF-8"
|
166
|
+
#
|
167
|
+
def encoding
|
168
|
+
@gapi.csv_options.encoding
|
169
|
+
end
|
170
|
+
|
171
|
+
##
|
172
|
+
# Set the character encoding of the data.
|
173
|
+
#
|
174
|
+
# @param [String] new_encoding New encoding value
|
175
|
+
#
|
176
|
+
# @example
|
177
|
+
# require "google/cloud/bigquery"
|
178
|
+
#
|
179
|
+
# bigquery = Google::Cloud::Bigquery.new
|
180
|
+
#
|
181
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
182
|
+
# csv_table = bigquery.external csv_url do |csv|
|
183
|
+
# csv.encoding = "UTF-8"
|
184
|
+
# end
|
185
|
+
#
|
186
|
+
# csv_table.encoding #=> "UTF-8"
|
187
|
+
#
|
188
|
+
def encoding= new_encoding
|
189
|
+
frozen_check!
|
190
|
+
@gapi.csv_options.encoding = new_encoding
|
191
|
+
end
|
192
|
+
|
193
|
+
##
|
194
|
+
# Checks if the character encoding of the data is "UTF-8". This is the
|
195
|
+
# default.
|
196
|
+
#
|
197
|
+
# @return [Boolean]
|
198
|
+
#
|
199
|
+
# @example
|
200
|
+
# require "google/cloud/bigquery"
|
201
|
+
#
|
202
|
+
# bigquery = Google::Cloud::Bigquery.new
|
203
|
+
#
|
204
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
205
|
+
# csv_table = bigquery.external csv_url do |csv|
|
206
|
+
# csv.encoding = "UTF-8"
|
207
|
+
# end
|
208
|
+
#
|
209
|
+
# csv_table.encoding #=> "UTF-8"
|
210
|
+
# csv_table.utf8? #=> true
|
211
|
+
#
|
212
|
+
def utf8?
|
213
|
+
return true if encoding.nil?
|
214
|
+
encoding == "UTF-8"
|
215
|
+
end
|
216
|
+
|
217
|
+
##
|
218
|
+
# Checks if the character encoding of the data is "ISO-8859-1".
|
219
|
+
#
|
220
|
+
# @return [Boolean]
|
221
|
+
#
|
222
|
+
# @example
|
223
|
+
# require "google/cloud/bigquery"
|
224
|
+
#
|
225
|
+
# bigquery = Google::Cloud::Bigquery.new
|
226
|
+
#
|
227
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
228
|
+
# csv_table = bigquery.external csv_url do |csv|
|
229
|
+
# csv.encoding = "ISO-8859-1"
|
230
|
+
# end
|
231
|
+
#
|
232
|
+
# csv_table.encoding #=> "ISO-8859-1"
|
233
|
+
# csv_table.iso8859_1? #=> true
|
234
|
+
#
|
235
|
+
def iso8859_1?
|
236
|
+
encoding == "ISO-8859-1"
|
237
|
+
end
|
238
|
+
|
239
|
+
##
|
240
|
+
# The separator for fields in a CSV file.
|
241
|
+
#
|
242
|
+
# @return [String]
|
243
|
+
#
|
244
|
+
# @example
|
245
|
+
# require "google/cloud/bigquery"
|
246
|
+
#
|
247
|
+
# bigquery = Google::Cloud::Bigquery.new
|
248
|
+
#
|
249
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
250
|
+
# csv_table = bigquery.external csv_url do |csv|
|
251
|
+
# csv.delimiter = "|"
|
252
|
+
# end
|
253
|
+
#
|
254
|
+
# csv_table.delimiter #=> "|"
|
255
|
+
#
|
256
|
+
def delimiter
|
257
|
+
@gapi.csv_options.field_delimiter
|
258
|
+
end
|
259
|
+
|
260
|
+
##
|
261
|
+
# Set the separator for fields in a CSV file.
|
262
|
+
#
|
263
|
+
# @param [String] new_delimiter New delimiter value
|
264
|
+
#
|
265
|
+
# @example
|
266
|
+
# require "google/cloud/bigquery"
|
267
|
+
#
|
268
|
+
# bigquery = Google::Cloud::Bigquery.new
|
269
|
+
#
|
270
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
271
|
+
# csv_table = bigquery.external csv_url do |csv|
|
272
|
+
# csv.delimiter = "|"
|
273
|
+
# end
|
274
|
+
#
|
275
|
+
# csv_table.delimiter #=> "|"
|
276
|
+
#
|
277
|
+
def delimiter= new_delimiter
|
278
|
+
frozen_check!
|
279
|
+
@gapi.csv_options.field_delimiter = new_delimiter
|
280
|
+
end
|
281
|
+
|
282
|
+
##
|
283
|
+
# The value that is used to quote data sections in a CSV file.
|
284
|
+
#
|
285
|
+
# @return [String]
|
286
|
+
#
|
287
|
+
# @example
|
288
|
+
# require "google/cloud/bigquery"
|
289
|
+
#
|
290
|
+
# bigquery = Google::Cloud::Bigquery.new
|
291
|
+
#
|
292
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
293
|
+
# csv_table = bigquery.external csv_url do |csv|
|
294
|
+
# csv.quote = "'"
|
295
|
+
# end
|
296
|
+
#
|
297
|
+
# csv_table.quote #=> "'"
|
298
|
+
#
|
299
|
+
def quote
|
300
|
+
@gapi.csv_options.quote
|
301
|
+
end
|
302
|
+
|
303
|
+
##
|
304
|
+
# Set the value that is used to quote data sections in a CSV file.
|
305
|
+
#
|
306
|
+
# @param [String] new_quote New quote value
|
307
|
+
#
|
308
|
+
# @example
|
309
|
+
# require "google/cloud/bigquery"
|
310
|
+
#
|
311
|
+
# bigquery = Google::Cloud::Bigquery.new
|
312
|
+
#
|
313
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
314
|
+
# csv_table = bigquery.external csv_url do |csv|
|
315
|
+
# csv.quote = "'"
|
316
|
+
# end
|
317
|
+
#
|
318
|
+
# csv_table.quote #=> "'"
|
319
|
+
#
|
320
|
+
def quote= new_quote
|
321
|
+
frozen_check!
|
322
|
+
@gapi.csv_options.quote = new_quote
|
323
|
+
end
|
324
|
+
|
325
|
+
##
|
326
|
+
# The number of rows at the top of a CSV file that BigQuery will skip
|
327
|
+
# when reading the data.
|
328
|
+
#
|
329
|
+
# @return [Integer]
|
330
|
+
#
|
331
|
+
# @example
|
332
|
+
# require "google/cloud/bigquery"
|
333
|
+
#
|
334
|
+
# bigquery = Google::Cloud::Bigquery.new
|
335
|
+
#
|
336
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
337
|
+
# csv_table = bigquery.external csv_url do |csv|
|
338
|
+
# csv.skip_leading_rows = 1
|
339
|
+
# end
|
340
|
+
#
|
341
|
+
# csv_table.skip_leading_rows #=> 1
|
342
|
+
#
|
343
|
+
def skip_leading_rows
|
344
|
+
@gapi.csv_options.skip_leading_rows
|
345
|
+
end
|
346
|
+
|
347
|
+
##
|
348
|
+
# Set the number of rows at the top of a CSV file that BigQuery will
|
349
|
+
# skip when reading the data.
|
350
|
+
#
|
351
|
+
# @param [Integer] row_count New skip_leading_rows value
|
352
|
+
#
|
353
|
+
# @example
|
354
|
+
# require "google/cloud/bigquery"
|
355
|
+
#
|
356
|
+
# bigquery = Google::Cloud::Bigquery.new
|
357
|
+
#
|
358
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
359
|
+
# csv_table = bigquery.external csv_url do |csv|
|
360
|
+
# csv.skip_leading_rows = 1
|
361
|
+
# end
|
362
|
+
#
|
363
|
+
# csv_table.skip_leading_rows #=> 1
|
364
|
+
#
|
365
|
+
def skip_leading_rows= row_count
|
366
|
+
frozen_check!
|
367
|
+
@gapi.csv_options.skip_leading_rows = row_count
|
368
|
+
end
|
369
|
+
|
370
|
+
##
|
371
|
+
# The schema for the data.
|
372
|
+
#
|
373
|
+
# @param [Boolean] replace Whether to replace the existing schema with
|
374
|
+
# the new schema. If `true`, the fields will replace the existing
|
375
|
+
# schema. If `false`, the fields will be added to the existing
|
376
|
+
# schema. The default value is `false`.
|
377
|
+
# @yield [schema] a block for setting the schema
|
378
|
+
# @yieldparam [Schema] schema the object accepting the schema
|
379
|
+
#
|
380
|
+
# @return [Google::Cloud::Bigquery::Schema]
|
381
|
+
#
|
382
|
+
# @example
|
383
|
+
# require "google/cloud/bigquery"
|
384
|
+
#
|
385
|
+
# bigquery = Google::Cloud::Bigquery.new
|
386
|
+
#
|
387
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
388
|
+
# csv_table = bigquery.external csv_url do |csv|
|
389
|
+
# csv.schema do |schema|
|
390
|
+
# schema.string "name", mode: :required
|
391
|
+
# schema.string "email", mode: :required
|
392
|
+
# schema.integer "age", mode: :required
|
393
|
+
# schema.boolean "active", mode: :required
|
394
|
+
# end
|
395
|
+
# end
|
396
|
+
#
|
397
|
+
def schema replace: false
|
398
|
+
@schema ||= Schema.from_gapi @gapi.schema
|
399
|
+
if replace
|
400
|
+
frozen_check!
|
401
|
+
@schema = Schema.from_gapi
|
402
|
+
end
|
403
|
+
@schema.freeze if frozen?
|
404
|
+
yield @schema if block_given?
|
405
|
+
@schema
|
406
|
+
end
|
407
|
+
|
408
|
+
##
|
409
|
+
# Set the schema for the data.
|
410
|
+
#
|
411
|
+
# @param [Schema] new_schema The schema object.
|
412
|
+
#
|
413
|
+
# @example
|
414
|
+
# require "google/cloud/bigquery"
|
415
|
+
#
|
416
|
+
# bigquery = Google::Cloud::Bigquery.new
|
417
|
+
#
|
418
|
+
# csv_shema = bigquery.schema do |schema|
|
419
|
+
# schema.string "name", mode: :required
|
420
|
+
# schema.string "email", mode: :required
|
421
|
+
# schema.integer "age", mode: :required
|
422
|
+
# schema.boolean "active", mode: :required
|
423
|
+
# end
|
424
|
+
#
|
425
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
426
|
+
# csv_table = bigquery.external csv_url
|
427
|
+
# csv_table.schema = csv_shema
|
428
|
+
#
|
429
|
+
def schema= new_schema
|
430
|
+
frozen_check!
|
431
|
+
@schema = new_schema
|
432
|
+
end
|
433
|
+
|
434
|
+
##
|
435
|
+
# The fields of the schema.
|
436
|
+
#
|
437
|
+
# @return [Array<Schema::Field>] An array of field objects.
|
438
|
+
#
|
439
|
+
def fields
|
440
|
+
schema.fields
|
441
|
+
end
|
442
|
+
|
443
|
+
##
|
444
|
+
# The names of the columns in the schema.
|
445
|
+
#
|
446
|
+
# @return [Array<Symbol>] An array of column names.
|
447
|
+
#
|
448
|
+
def headers
|
449
|
+
schema.headers
|
450
|
+
end
|
451
|
+
|
452
|
+
##
|
453
|
+
# The types of the fields in the data in the schema, using the same
|
454
|
+
# format as the optional query parameter types.
|
455
|
+
#
|
456
|
+
# @return [Hash] A hash with field names as keys, and types as values.
|
457
|
+
#
|
458
|
+
def param_types
|
459
|
+
schema.param_types
|
460
|
+
end
|
461
|
+
|
462
|
+
##
|
463
|
+
# @private Google API Client object.
|
464
|
+
def to_gapi
|
465
|
+
@gapi.schema = @schema.to_gapi if @schema
|
466
|
+
@gapi
|
467
|
+
end
|
468
|
+
|
469
|
+
##
|
470
|
+
# @private Google API Client object.
|
471
|
+
def self.from_gapi gapi
|
472
|
+
new_table = super
|
473
|
+
schema = Schema.from_gapi gapi.schema
|
474
|
+
new_table.instance_variable_set :@schema, schema
|
475
|
+
new_table
|
476
|
+
end
|
477
|
+
end
|
478
|
+
end
|
479
|
+
end
|
480
|
+
end
|
481
|
+
end
|