google-cloud-bigquery 1.14.0 → 1.42.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/AUTHENTICATION.md +17 -54
- data/CHANGELOG.md +377 -0
- data/CONTRIBUTING.md +328 -116
- data/LOGGING.md +1 -1
- data/OVERVIEW.md +21 -20
- data/TROUBLESHOOTING.md +2 -8
- data/lib/google/cloud/bigquery/argument.rb +197 -0
- data/lib/google/cloud/bigquery/convert.rb +155 -173
- data/lib/google/cloud/bigquery/copy_job.rb +74 -26
- data/lib/google/cloud/bigquery/credentials.rb +5 -12
- data/lib/google/cloud/bigquery/data.rb +109 -18
- data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
- data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
- data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
- data/lib/google/cloud/bigquery/dataset.rb +1044 -287
- data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
- data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
- data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
- data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
- data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
- data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
- data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
- data/lib/google/cloud/bigquery/external.rb +50 -2256
- data/lib/google/cloud/bigquery/extract_job.rb +226 -61
- data/lib/google/cloud/bigquery/insert_response.rb +1 -3
- data/lib/google/cloud/bigquery/job/list.rb +10 -14
- data/lib/google/cloud/bigquery/job.rb +289 -14
- data/lib/google/cloud/bigquery/load_job.rb +810 -136
- data/lib/google/cloud/bigquery/model/list.rb +5 -9
- data/lib/google/cloud/bigquery/model.rb +247 -16
- data/lib/google/cloud/bigquery/policy.rb +432 -0
- data/lib/google/cloud/bigquery/project/list.rb +6 -11
- data/lib/google/cloud/bigquery/project.rb +509 -250
- data/lib/google/cloud/bigquery/query_job.rb +594 -128
- data/lib/google/cloud/bigquery/routine/list.rb +165 -0
- data/lib/google/cloud/bigquery/routine.rb +1227 -0
- data/lib/google/cloud/bigquery/schema/field.rb +413 -63
- data/lib/google/cloud/bigquery/schema.rb +221 -48
- data/lib/google/cloud/bigquery/service.rb +204 -112
- data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
- data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
- data/lib/google/cloud/bigquery/table/list.rb +6 -11
- data/lib/google/cloud/bigquery/table.rb +1470 -377
- data/lib/google/cloud/bigquery/time.rb +6 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery.rb +4 -6
- data/lib/google-cloud-bigquery.rb +14 -13
- metadata +66 -38
@@ -18,89 +18,226 @@ module Google
|
|
18
18
|
module Bigquery
|
19
19
|
##
|
20
20
|
# BigQuery standard SQL is compliant with the SQL 2011 standard and has
|
21
|
-
# extensions that support querying nested and repeated data.
|
21
|
+
# extensions that support querying nested and repeated data. See {Routine} and {Argument}.
|
22
|
+
#
|
23
|
+
# @example
|
24
|
+
# require "google/cloud/bigquery"
|
25
|
+
#
|
26
|
+
# bigquery = Google::Cloud::Bigquery.new
|
27
|
+
# dataset = bigquery.dataset "my_dataset"
|
28
|
+
# routine = dataset.create_routine "my_routine" do |r|
|
29
|
+
# r.routine_type = "SCALAR_FUNCTION"
|
30
|
+
# r.language = :SQL
|
31
|
+
# r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
|
32
|
+
# r.arguments = [
|
33
|
+
# Google::Cloud::Bigquery::Argument.new(
|
34
|
+
# name: "arr",
|
35
|
+
# argument_kind: "FIXED_TYPE",
|
36
|
+
# data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
37
|
+
# type_kind: "ARRAY",
|
38
|
+
# array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
39
|
+
# type_kind: "STRUCT",
|
40
|
+
# struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
|
41
|
+
# fields: [
|
42
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
43
|
+
# name: "name",
|
44
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
|
45
|
+
# ),
|
46
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
47
|
+
# name: "val",
|
48
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
|
49
|
+
# )
|
50
|
+
# ]
|
51
|
+
# )
|
52
|
+
# )
|
53
|
+
# )
|
54
|
+
# )
|
55
|
+
# ]
|
56
|
+
# end
|
57
|
+
#
|
22
58
|
module StandardSql
|
23
59
|
##
|
24
|
-
# A field or a column.
|
60
|
+
# A field or a column. See {Routine} and {Argument}.
|
61
|
+
#
|
62
|
+
# @example
|
63
|
+
# require "google/cloud/bigquery"
|
64
|
+
#
|
65
|
+
# bigquery = Google::Cloud::Bigquery.new
|
66
|
+
# dataset = bigquery.dataset "my_dataset"
|
67
|
+
# routine = dataset.create_routine "my_routine" do |r|
|
68
|
+
# r.routine_type = "SCALAR_FUNCTION"
|
69
|
+
# r.language = :SQL
|
70
|
+
# r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
|
71
|
+
# r.arguments = [
|
72
|
+
# Google::Cloud::Bigquery::Argument.new(
|
73
|
+
# name: "arr",
|
74
|
+
# argument_kind: "FIXED_TYPE",
|
75
|
+
# data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
76
|
+
# type_kind: "ARRAY",
|
77
|
+
# array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
78
|
+
# type_kind: "STRUCT",
|
79
|
+
# struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
|
80
|
+
# fields: [
|
81
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
82
|
+
# name: "name",
|
83
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
|
84
|
+
# ),
|
85
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
86
|
+
# name: "val",
|
87
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
|
88
|
+
# )
|
89
|
+
# ]
|
90
|
+
# )
|
91
|
+
# )
|
92
|
+
# )
|
93
|
+
# )
|
94
|
+
# ]
|
95
|
+
# end
|
96
|
+
#
|
25
97
|
class Field
|
26
98
|
##
|
27
|
-
#
|
28
|
-
|
29
|
-
|
99
|
+
# Creates a new, immutable StandardSql::Field object.
|
100
|
+
#
|
101
|
+
# @overload initialize(name, type)
|
102
|
+
# @param [String] name The name of the field. Optional. Can be absent for struct fields.
|
103
|
+
# @param [StandardSql::DataType, String] type The type of the field. Optional. Absent if not explicitly
|
104
|
+
# specified (e.g., `CREATE FUNCTION` statement can omit the return type; in this case the output parameter
|
105
|
+
# does not have this "type" field).
|
106
|
+
#
|
107
|
+
def initialize **kwargs
|
108
|
+
# Convert client object kwargs to a gapi object
|
109
|
+
kwargs[:type] = DataType.gapi_from_string_or_data_type kwargs[:type] if kwargs[:type]
|
110
|
+
@gapi = Google::Apis::BigqueryV2::StandardSqlField.new(**kwargs)
|
30
111
|
end
|
31
112
|
|
32
113
|
##
|
33
|
-
# The name of the field.
|
114
|
+
# The name of the field. Optional. Can be absent for struct fields.
|
34
115
|
#
|
35
116
|
# @return [String, nil]
|
36
117
|
#
|
37
118
|
def name
|
38
|
-
return
|
39
|
-
|
40
|
-
@gapi_json[:name]
|
119
|
+
return if @gapi.name == "".freeze
|
120
|
+
@gapi.name
|
41
121
|
end
|
42
122
|
|
43
123
|
##
|
44
|
-
# The type of the field.
|
124
|
+
# The type of the field. Optional. Absent if not explicitly specified (e.g., `CREATE FUNCTION` statement can
|
125
|
+
# omit the return type; in this case the output parameter does not have this "type" field).
|
45
126
|
#
|
46
|
-
# @return [DataType]
|
127
|
+
# @return [DataType, nil] The type of the field.
|
47
128
|
#
|
48
129
|
def type
|
49
|
-
DataType.
|
130
|
+
DataType.from_gapi @gapi.type if @gapi.type
|
131
|
+
end
|
132
|
+
|
133
|
+
##
|
134
|
+
# @private New Google::Apis::BigqueryV2::StandardSqlField object.
|
135
|
+
def to_gapi
|
136
|
+
@gapi
|
50
137
|
end
|
51
138
|
|
52
139
|
##
|
53
|
-
# @private New StandardSql::Field from a
|
54
|
-
def self.
|
140
|
+
# @private New StandardSql::Field from a Google::Apis::BigqueryV2::StandardSqlField object.
|
141
|
+
def self.from_gapi gapi
|
55
142
|
new.tap do |f|
|
56
|
-
f.instance_variable_set :@
|
143
|
+
f.instance_variable_set :@gapi, gapi
|
57
144
|
end
|
58
145
|
end
|
59
146
|
end
|
60
147
|
|
61
148
|
##
|
62
|
-
# The type of a
|
149
|
+
# The type of a variable, e.g., a function argument. See {Routine} and {Argument}.
|
150
|
+
#
|
151
|
+
# @example
|
152
|
+
# require "google/cloud/bigquery"
|
153
|
+
#
|
154
|
+
# bigquery = Google::Cloud::Bigquery.new
|
155
|
+
# dataset = bigquery.dataset "my_dataset"
|
156
|
+
# routine = dataset.create_routine "my_routine" do |r|
|
157
|
+
# r.routine_type = "SCALAR_FUNCTION"
|
158
|
+
# r.language = :SQL
|
159
|
+
# r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
|
160
|
+
# r.arguments = [
|
161
|
+
# Google::Cloud::Bigquery::Argument.new(
|
162
|
+
# name: "arr",
|
163
|
+
# argument_kind: "FIXED_TYPE",
|
164
|
+
# data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
165
|
+
# type_kind: "ARRAY",
|
166
|
+
# array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
167
|
+
# type_kind: "STRUCT",
|
168
|
+
# struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
|
169
|
+
# fields: [
|
170
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
171
|
+
# name: "name",
|
172
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
|
173
|
+
# ),
|
174
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
175
|
+
# name: "val",
|
176
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
|
177
|
+
# )
|
178
|
+
# ]
|
179
|
+
# )
|
180
|
+
# )
|
181
|
+
# )
|
182
|
+
# )
|
183
|
+
# ]
|
184
|
+
# end
|
185
|
+
#
|
186
|
+
# @see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types Standard SQL Data Types
|
187
|
+
#
|
63
188
|
class DataType
|
64
189
|
##
|
65
|
-
#
|
66
|
-
|
67
|
-
|
190
|
+
# Creates a new, immutable StandardSql::DataType object.
|
191
|
+
#
|
192
|
+
# @overload initialize(type_kind, array_element_type, struct_type)
|
193
|
+
# @param [String] type_kind The top level type of this field. Required. Can be [any standard SQL data
|
194
|
+
# type](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) (e.g., `INT64`, `DATE`,
|
195
|
+
# `ARRAY`).
|
196
|
+
# @param [DataType, String] array_element_type The type of the array's elements, if {#type_kind} is `ARRAY`.
|
197
|
+
# See {#array?}. Optional.
|
198
|
+
# @param [StructType] struct_type The fields of the struct, in order, if {#type_kind} is `STRUCT`. See
|
199
|
+
# {#struct?}. Optional.
|
200
|
+
#
|
201
|
+
def initialize **kwargs
|
202
|
+
# Convert client object kwargs to a gapi object
|
203
|
+
if kwargs[:array_element_type]
|
204
|
+
kwargs[:array_element_type] = self.class.gapi_from_string_or_data_type kwargs[:array_element_type]
|
205
|
+
end
|
206
|
+
kwargs[:struct_type] = kwargs[:struct_type].to_gapi if kwargs[:struct_type]
|
207
|
+
|
208
|
+
@gapi = Google::Apis::BigqueryV2::StandardSqlDataType.new(**kwargs)
|
68
209
|
end
|
69
210
|
|
70
211
|
##
|
71
|
-
# The top level type of this field.
|
212
|
+
# The top level type of this field. Required. Can be any standard SQL data type (e.g., `INT64`, `DATE`,
|
213
|
+
# `ARRAY`).
|
72
214
|
#
|
73
|
-
#
|
215
|
+
# @see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types Standard SQL Data Types
|
74
216
|
#
|
75
|
-
# @
|
76
|
-
# Standard SQL Data Types
|
77
|
-
#
|
78
|
-
# @return [String]
|
217
|
+
# @return [String] The upper case type.
|
79
218
|
#
|
80
219
|
def type_kind
|
81
|
-
@
|
220
|
+
@gapi.type_kind
|
82
221
|
end
|
83
222
|
|
84
223
|
##
|
85
|
-
# The type of
|
224
|
+
# The type of the array's elements, if {#type_kind} is `ARRAY`. See {#array?}. Optional.
|
86
225
|
#
|
87
226
|
# @return [DataType, nil]
|
88
227
|
#
|
89
228
|
def array_element_type
|
90
|
-
return if @
|
91
|
-
|
92
|
-
DataType.from_gapi_json @gapi_json[:arrayElementType]
|
229
|
+
return if @gapi.array_element_type.nil?
|
230
|
+
DataType.from_gapi @gapi.array_element_type
|
93
231
|
end
|
94
232
|
|
95
233
|
##
|
96
|
-
# The fields of the struct
|
234
|
+
# The fields of the struct, in order, if {#type_kind} is `STRUCT`. See {#struct?}. Optional.
|
97
235
|
#
|
98
236
|
# @return [StructType, nil]
|
99
237
|
#
|
100
238
|
def struct_type
|
101
|
-
return if @
|
102
|
-
|
103
|
-
StructType.from_gapi_json @gapi_json[:structType]
|
239
|
+
return if @gapi.struct_type.nil?
|
240
|
+
StructType.from_gapi @gapi.struct_type
|
104
241
|
end
|
105
242
|
|
106
243
|
##
|
@@ -136,6 +273,17 @@ module Google
|
|
136
273
|
type_kind == "NUMERIC".freeze
|
137
274
|
end
|
138
275
|
|
276
|
+
##
|
277
|
+
# Checks if the {#type_kind} of the field is `BIGNUMERIC`.
|
278
|
+
#
|
279
|
+
# @return [Boolean] `true` when `BIGNUMERIC`, `false` otherwise.
|
280
|
+
#
|
281
|
+
# @!group Helpers
|
282
|
+
#
|
283
|
+
def bignumeric?
|
284
|
+
type_kind == "BIGNUMERIC".freeze
|
285
|
+
end
|
286
|
+
|
139
287
|
##
|
140
288
|
# Checks if the {#type_kind} of the field is `BOOL`.
|
141
289
|
#
|
@@ -247,41 +395,109 @@ module Google
|
|
247
395
|
end
|
248
396
|
|
249
397
|
##
|
250
|
-
# @private New
|
251
|
-
def
|
252
|
-
|
253
|
-
|
398
|
+
# @private New Google::Apis::BigqueryV2::StandardSqlDataType object.
|
399
|
+
def to_gapi
|
400
|
+
@gapi
|
401
|
+
end
|
402
|
+
|
403
|
+
##
|
404
|
+
# @private New StandardSql::DataType from a Google::Apis::BigqueryV2::StandardSqlDataType object.
|
405
|
+
def self.from_gapi gapi
|
406
|
+
new.tap do |f|
|
407
|
+
f.instance_variable_set :@gapi, gapi
|
408
|
+
end
|
409
|
+
end
|
410
|
+
|
411
|
+
##
|
412
|
+
# @private New Google::Apis::BigqueryV2::StandardSqlDataType from a String or StandardSql::DataType object.
|
413
|
+
def self.gapi_from_string_or_data_type data_type
|
414
|
+
return if data_type.nil?
|
415
|
+
case data_type
|
416
|
+
when StandardSql::DataType
|
417
|
+
data_type.to_gapi
|
418
|
+
when Hash
|
419
|
+
data_type
|
420
|
+
when String, Symbol
|
421
|
+
Google::Apis::BigqueryV2::StandardSqlDataType.new type_kind: data_type.to_s.upcase
|
422
|
+
else
|
423
|
+
raise ArgumentError, "Unable to convert #{data_type} to Google::Apis::BigqueryV2::StandardSqlDataType"
|
254
424
|
end
|
255
425
|
end
|
256
426
|
end
|
257
427
|
|
258
428
|
##
|
259
|
-
# The
|
429
|
+
# The fields of a `STRUCT` type. See {DataType#struct_type}. See {Routine} and {Argument}.
|
430
|
+
#
|
431
|
+
# @example
|
432
|
+
# require "google/cloud/bigquery"
|
433
|
+
#
|
434
|
+
# bigquery = Google::Cloud::Bigquery.new
|
435
|
+
# dataset = bigquery.dataset "my_dataset"
|
436
|
+
# routine = dataset.create_routine "my_routine" do |r|
|
437
|
+
# r.routine_type = "SCALAR_FUNCTION"
|
438
|
+
# r.language = :SQL
|
439
|
+
# r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
|
440
|
+
# r.arguments = [
|
441
|
+
# Google::Cloud::Bigquery::Argument.new(
|
442
|
+
# name: "arr",
|
443
|
+
# argument_kind: "FIXED_TYPE",
|
444
|
+
# data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
445
|
+
# type_kind: "ARRAY",
|
446
|
+
# array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
447
|
+
# type_kind: "STRUCT",
|
448
|
+
# struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
|
449
|
+
# fields: [
|
450
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
451
|
+
# name: "name",
|
452
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
|
453
|
+
# ),
|
454
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
455
|
+
# name: "val",
|
456
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
|
457
|
+
# )
|
458
|
+
# ]
|
459
|
+
# )
|
460
|
+
# )
|
461
|
+
# )
|
462
|
+
# )
|
463
|
+
# ]
|
464
|
+
# end
|
465
|
+
#
|
260
466
|
class StructType
|
261
467
|
##
|
262
|
-
#
|
263
|
-
|
264
|
-
|
468
|
+
# Creates a new, immutable StandardSql::StructType object.
|
469
|
+
#
|
470
|
+
# @overload initialize(fields)
|
471
|
+
# @param [Array<Field>] fields The fields of the struct. Required.
|
472
|
+
#
|
473
|
+
def initialize **kwargs
|
474
|
+
# Convert each field client object to gapi object, if fields given (self.from_gapi does not pass kwargs)
|
475
|
+
kwargs[:fields] = kwargs[:fields]&.map(&:to_gapi) if kwargs[:fields]
|
476
|
+
@gapi = Google::Apis::BigqueryV2::StandardSqlStructType.new(**kwargs)
|
265
477
|
end
|
266
478
|
|
267
479
|
##
|
268
|
-
# The
|
480
|
+
# The fields of the struct.
|
269
481
|
#
|
270
|
-
#
|
271
|
-
#
|
272
|
-
# @return [Array<Field>]
|
482
|
+
# @return [Array<Field>] A frozen array of fields.
|
273
483
|
#
|
274
484
|
def fields
|
275
|
-
Array(@
|
276
|
-
Field.
|
277
|
-
end
|
485
|
+
Array(@gapi.fields).map do |field_gapi|
|
486
|
+
Field.from_gapi field_gapi
|
487
|
+
end.freeze
|
278
488
|
end
|
279
489
|
|
280
490
|
##
|
281
|
-
# @private New
|
282
|
-
def
|
283
|
-
|
284
|
-
|
491
|
+
# @private New Google::Apis::BigqueryV2::StandardSqlStructType object.
|
492
|
+
def to_gapi
|
493
|
+
@gapi
|
494
|
+
end
|
495
|
+
|
496
|
+
##
|
497
|
+
# @private New StandardSql::StructType from a Google::Apis::BigqueryV2::StandardSqlStructType object.
|
498
|
+
def self.from_gapi gapi
|
499
|
+
new.tap do |f|
|
500
|
+
f.instance_variable_set :@gapi, gapi
|
285
501
|
end
|
286
502
|
end
|
287
503
|
end
|
@@ -64,16 +64,21 @@ module Google
|
|
64
64
|
class AsyncInserter
|
65
65
|
include MonitorMixin
|
66
66
|
|
67
|
-
attr_reader :max_bytes
|
67
|
+
attr_reader :max_bytes
|
68
|
+
attr_reader :max_rows
|
69
|
+
attr_reader :interval
|
70
|
+
attr_reader :threads
|
68
71
|
##
|
69
72
|
# @private Implementation accessors
|
70
73
|
attr_reader :table, :batch
|
71
74
|
|
72
75
|
##
|
73
76
|
# @private
|
74
|
-
def initialize table, skip_invalid: nil, ignore_unknown: nil,
|
75
|
-
|
76
|
-
|
77
|
+
def initialize table, skip_invalid: nil, ignore_unknown: nil, max_bytes: 10_000_000, max_rows: 500,
|
78
|
+
interval: 10, threads: 4, &block
|
79
|
+
# init MonitorMixin
|
80
|
+
super()
|
81
|
+
|
77
82
|
@table = table
|
78
83
|
@skip_invalid = skip_invalid
|
79
84
|
@ignore_unknown = ignore_unknown
|
@@ -86,13 +91,9 @@ module Google
|
|
86
91
|
|
87
92
|
@batch = nil
|
88
93
|
|
89
|
-
@thread_pool = Concurrent::ThreadPoolExecutor.new
|
90
|
-
max_threads: @threads
|
94
|
+
@thread_pool = Concurrent::ThreadPoolExecutor.new max_threads: @threads
|
91
95
|
|
92
96
|
@cond = new_cond
|
93
|
-
|
94
|
-
# init MonitorMixin
|
95
|
-
super()
|
96
97
|
end
|
97
98
|
|
98
99
|
##
|
@@ -100,14 +101,47 @@ module Google
|
|
100
101
|
# collected in batches and inserted together.
|
101
102
|
# See {Google::Cloud::Bigquery::Table#insert_async}.
|
102
103
|
#
|
103
|
-
#
|
104
|
-
#
|
105
|
-
#
|
106
|
-
#
|
107
|
-
#
|
108
|
-
#
|
109
|
-
#
|
110
|
-
#
|
104
|
+
# Simple Ruby types are generally accepted per JSON rules, along with the following support for BigQuery's
|
105
|
+
# more complex types:
|
106
|
+
#
|
107
|
+
# | BigQuery | Ruby | Notes |
|
108
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
109
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
110
|
+
# | `BIGNUMERIC` | `String` | Pass as `String` to avoid rounding to scale 9. |
|
111
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
112
|
+
# | `DATE` | `Date` | |
|
113
|
+
# | `GEOGRAPHY` | `String` | |
|
114
|
+
# | `TIMESTAMP` | `Time` | |
|
115
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
116
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
117
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
118
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
119
|
+
#
|
120
|
+
# Because BigQuery's streaming API is designed for high insertion
|
121
|
+
# rates, modifications to the underlying table metadata are eventually
|
122
|
+
# consistent when interacting with the streaming system. In most cases
|
123
|
+
# metadata changes are propagated within minutes, but during this
|
124
|
+
# period API responses may reflect the inconsistent state of the
|
125
|
+
# table.
|
126
|
+
#
|
127
|
+
# @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
|
128
|
+
# Streaming Data Into BigQuery
|
129
|
+
#
|
130
|
+
# @see https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts
|
131
|
+
# BigQuery Troubleshooting: Metadata errors for streaming inserts
|
132
|
+
#
|
133
|
+
# @param [Hash, Array<Hash>] rows A hash object or array of hash objects
|
134
|
+
# containing the data. Required. `BigDecimal` values will be rounded to
|
135
|
+
# scale 9 to conform with the BigQuery `NUMERIC` data type. To avoid
|
136
|
+
# rounding `BIGNUMERIC` type values with scale greater than 9, use `String`
|
137
|
+
# instead of `BigDecimal`.
|
138
|
+
# @param [Array<String|Symbol>, Symbol] insert_ids A unique ID for each row. BigQuery uses this property to
|
139
|
+
# detect duplicate insertion requests on a best-effort basis. For more information, see [data
|
140
|
+
# consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency). Optional. If
|
141
|
+
# not provided, the client library will assign a UUID to each row before the request is sent.
|
142
|
+
#
|
143
|
+
# The value `:skip` can be provided to skip the generation of IDs for all rows, or to skip the generation of
|
144
|
+
# an ID for a specific row in the array.
|
111
145
|
#
|
112
146
|
def insert rows, insert_ids: nil
|
113
147
|
return nil if rows.nil?
|
@@ -123,8 +157,7 @@ module Google
|
|
123
157
|
unless @batch.try_insert row, insert_id
|
124
158
|
push_batch_request!
|
125
159
|
|
126
|
-
@batch = Batch.new max_bytes: @max_bytes,
|
127
|
-
max_rows: @max_rows
|
160
|
+
@batch = Batch.new max_bytes: @max_bytes, max_rows: @max_rows
|
128
161
|
@batch.insert row, insert_id
|
129
162
|
end
|
130
163
|
end
|
@@ -214,10 +247,14 @@ module Google
|
|
214
247
|
|
215
248
|
def validate_insert_args rows, insert_ids
|
216
249
|
rows = [rows] if rows.is_a? Hash
|
250
|
+
raise ArgumentError, "No rows provided" if rows.empty?
|
251
|
+
|
252
|
+
insert_ids = Array.new(rows.count) { :skip } if insert_ids == :skip
|
217
253
|
insert_ids = Array insert_ids
|
218
|
-
if insert_ids.count
|
254
|
+
if insert_ids.count.positive? && insert_ids.count != rows.count
|
219
255
|
raise ArgumentError, "insert_ids must be the same size as rows"
|
220
256
|
end
|
257
|
+
|
221
258
|
[rows, insert_ids]
|
222
259
|
end
|
223
260
|
|
@@ -250,22 +287,19 @@ module Google
|
|
250
287
|
json_rows = @batch.json_rows
|
251
288
|
insert_ids = @batch.insert_ids
|
252
289
|
Concurrent::Future.new executor: @thread_pool do
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
ensure
|
267
|
-
@callback.call result if @callback
|
268
|
-
end
|
290
|
+
raise ArgumentError, "No rows provided" if json_rows.empty?
|
291
|
+
insert_resp = @table.service.insert_tabledata_json_rows @table.dataset_id,
|
292
|
+
@table.table_id,
|
293
|
+
json_rows,
|
294
|
+
skip_invalid: @skip_invalid,
|
295
|
+
ignore_unknown: @ignore_unknown,
|
296
|
+
insert_ids: insert_ids
|
297
|
+
|
298
|
+
result = Result.new InsertResponse.from_gapi(orig_rows, insert_resp)
|
299
|
+
rescue StandardError => e
|
300
|
+
result = Result.new nil, e
|
301
|
+
ensure
|
302
|
+
@callback&.call result
|
269
303
|
end.execute
|
270
304
|
|
271
305
|
@batch = nil
|
@@ -275,9 +309,13 @@ module Google
|
|
275
309
|
##
|
276
310
|
# @private
|
277
311
|
class Batch
|
278
|
-
attr_reader :max_bytes
|
312
|
+
attr_reader :max_bytes
|
313
|
+
attr_reader :max_rows
|
314
|
+
attr_reader :rows
|
315
|
+
attr_reader :json_rows
|
316
|
+
attr_reader :insert_ids
|
279
317
|
|
280
|
-
def initialize max_bytes:
|
318
|
+
def initialize max_bytes: 10_000_000, max_rows: 500
|
281
319
|
@max_bytes = max_bytes
|
282
320
|
@max_rows = max_rows
|
283
321
|
@rows = []
|
@@ -293,8 +331,7 @@ module Google
|
|
293
331
|
insert_id ||= SecureRandom.uuid
|
294
332
|
json_row = to_json_row row
|
295
333
|
|
296
|
-
insert_rows_bytes
|
297
|
-
row, json_row, insert_id, addl_bytes_for(json_row, insert_id)
|
334
|
+
insert_rows_bytes row, json_row, insert_id, addl_bytes_for(json_row, insert_id)
|
298
335
|
end
|
299
336
|
|
300
337
|
def try_insert row, insert_id
|
@@ -327,8 +364,13 @@ module Google
|
|
327
364
|
end
|
328
365
|
|
329
366
|
def addl_bytes_for json_row, insert_id
|
330
|
-
|
331
|
-
|
367
|
+
if insert_id == :skip
|
368
|
+
# "{\"json\":},".bytesize #=> 10
|
369
|
+
10 + json_row.to_json.bytesize
|
370
|
+
else
|
371
|
+
# "{\"insertId\":\"\",\"json\":},".bytesize #=> 24
|
372
|
+
24 + json_row.to_json.bytesize + insert_id.bytesize
|
373
|
+
end
|
332
374
|
end
|
333
375
|
end
|
334
376
|
|
@@ -380,7 +422,8 @@ module Google
|
|
380
422
|
@error = error
|
381
423
|
end
|
382
424
|
|
383
|
-
attr_reader :insert_response
|
425
|
+
attr_reader :insert_response
|
426
|
+
attr_reader :error
|
384
427
|
|
385
428
|
##
|
386
429
|
# Checks if an error is present, meaning that the insert operation
|
@@ -78,8 +78,7 @@ module Google
|
|
78
78
|
def next
|
79
79
|
return nil unless next?
|
80
80
|
ensure_service!
|
81
|
-
|
82
|
-
gapi = @service.list_tables @dataset_id, options
|
81
|
+
gapi = @service.list_tables @dataset_id, token: token, max: @max
|
83
82
|
self.class.from_gapi gapi, @service, @dataset_id, @max
|
84
83
|
end
|
85
84
|
|
@@ -131,17 +130,15 @@ module Google
|
|
131
130
|
# puts table.name
|
132
131
|
# end
|
133
132
|
#
|
134
|
-
def all request_limit: nil
|
133
|
+
def all request_limit: nil, &block
|
135
134
|
request_limit = request_limit.to_i if request_limit
|
136
|
-
unless block_given?
|
137
|
-
return enum_for :all, request_limit: request_limit
|
138
|
-
end
|
135
|
+
return enum_for :all, request_limit: request_limit unless block_given?
|
139
136
|
results = self
|
140
137
|
loop do
|
141
|
-
results.each
|
138
|
+
results.each(&block)
|
142
139
|
if request_limit
|
143
140
|
request_limit -= 1
|
144
|
-
break if request_limit
|
141
|
+
break if request_limit.negative?
|
145
142
|
end
|
146
143
|
break unless results.next?
|
147
144
|
results = results.next
|
@@ -151,9 +148,7 @@ module Google
|
|
151
148
|
##
|
152
149
|
# @private New Table::List from a response object.
|
153
150
|
def self.from_gapi gapi_list, service, dataset_id = nil, max = nil
|
154
|
-
tables = List.new(Array(gapi_list.tables).map
|
155
|
-
Table.from_gapi gapi_object, service
|
156
|
-
end)
|
151
|
+
tables = List.new(Array(gapi_list.tables).map { |gapi_object| Table.from_gapi gapi_object, service })
|
157
152
|
tables.instance_variable_set :@token, gapi_list.next_page_token
|
158
153
|
tables.instance_variable_set :@etag, gapi_list.etag
|
159
154
|
tables.instance_variable_set :@total, gapi_list.total_items
|