google-cloud-bigquery 1.14.0 → 1.42.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/AUTHENTICATION.md +17 -54
- data/CHANGELOG.md +377 -0
- data/CONTRIBUTING.md +328 -116
- data/LOGGING.md +1 -1
- data/OVERVIEW.md +21 -20
- data/TROUBLESHOOTING.md +2 -8
- data/lib/google/cloud/bigquery/argument.rb +197 -0
- data/lib/google/cloud/bigquery/convert.rb +155 -173
- data/lib/google/cloud/bigquery/copy_job.rb +74 -26
- data/lib/google/cloud/bigquery/credentials.rb +5 -12
- data/lib/google/cloud/bigquery/data.rb +109 -18
- data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
- data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
- data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
- data/lib/google/cloud/bigquery/dataset.rb +1044 -287
- data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
- data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
- data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
- data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
- data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
- data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
- data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
- data/lib/google/cloud/bigquery/external.rb +50 -2256
- data/lib/google/cloud/bigquery/extract_job.rb +226 -61
- data/lib/google/cloud/bigquery/insert_response.rb +1 -3
- data/lib/google/cloud/bigquery/job/list.rb +10 -14
- data/lib/google/cloud/bigquery/job.rb +289 -14
- data/lib/google/cloud/bigquery/load_job.rb +810 -136
- data/lib/google/cloud/bigquery/model/list.rb +5 -9
- data/lib/google/cloud/bigquery/model.rb +247 -16
- data/lib/google/cloud/bigquery/policy.rb +432 -0
- data/lib/google/cloud/bigquery/project/list.rb +6 -11
- data/lib/google/cloud/bigquery/project.rb +509 -250
- data/lib/google/cloud/bigquery/query_job.rb +594 -128
- data/lib/google/cloud/bigquery/routine/list.rb +165 -0
- data/lib/google/cloud/bigquery/routine.rb +1227 -0
- data/lib/google/cloud/bigquery/schema/field.rb +413 -63
- data/lib/google/cloud/bigquery/schema.rb +221 -48
- data/lib/google/cloud/bigquery/service.rb +204 -112
- data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
- data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
- data/lib/google/cloud/bigquery/table/list.rb +6 -11
- data/lib/google/cloud/bigquery/table.rb +1470 -377
- data/lib/google/cloud/bigquery/time.rb +6 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery.rb +4 -6
- data/lib/google-cloud-bigquery.rb +14 -13
- metadata +66 -38
@@ -18,89 +18,226 @@ module Google
|
|
18
18
|
module Bigquery
|
19
19
|
##
|
20
20
|
# BigQuery standard SQL is compliant with the SQL 2011 standard and has
|
21
|
-
# extensions that support querying nested and repeated data.
|
21
|
+
# extensions that support querying nested and repeated data. See {Routine} and {Argument}.
|
22
|
+
#
|
23
|
+
# @example
|
24
|
+
# require "google/cloud/bigquery"
|
25
|
+
#
|
26
|
+
# bigquery = Google::Cloud::Bigquery.new
|
27
|
+
# dataset = bigquery.dataset "my_dataset"
|
28
|
+
# routine = dataset.create_routine "my_routine" do |r|
|
29
|
+
# r.routine_type = "SCALAR_FUNCTION"
|
30
|
+
# r.language = :SQL
|
31
|
+
# r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
|
32
|
+
# r.arguments = [
|
33
|
+
# Google::Cloud::Bigquery::Argument.new(
|
34
|
+
# name: "arr",
|
35
|
+
# argument_kind: "FIXED_TYPE",
|
36
|
+
# data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
37
|
+
# type_kind: "ARRAY",
|
38
|
+
# array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
39
|
+
# type_kind: "STRUCT",
|
40
|
+
# struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
|
41
|
+
# fields: [
|
42
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
43
|
+
# name: "name",
|
44
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
|
45
|
+
# ),
|
46
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
47
|
+
# name: "val",
|
48
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
|
49
|
+
# )
|
50
|
+
# ]
|
51
|
+
# )
|
52
|
+
# )
|
53
|
+
# )
|
54
|
+
# )
|
55
|
+
# ]
|
56
|
+
# end
|
57
|
+
#
|
22
58
|
module StandardSql
|
23
59
|
##
|
24
|
-
# A field or a column.
|
60
|
+
# A field or a column. See {Routine} and {Argument}.
|
61
|
+
#
|
62
|
+
# @example
|
63
|
+
# require "google/cloud/bigquery"
|
64
|
+
#
|
65
|
+
# bigquery = Google::Cloud::Bigquery.new
|
66
|
+
# dataset = bigquery.dataset "my_dataset"
|
67
|
+
# routine = dataset.create_routine "my_routine" do |r|
|
68
|
+
# r.routine_type = "SCALAR_FUNCTION"
|
69
|
+
# r.language = :SQL
|
70
|
+
# r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
|
71
|
+
# r.arguments = [
|
72
|
+
# Google::Cloud::Bigquery::Argument.new(
|
73
|
+
# name: "arr",
|
74
|
+
# argument_kind: "FIXED_TYPE",
|
75
|
+
# data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
76
|
+
# type_kind: "ARRAY",
|
77
|
+
# array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
78
|
+
# type_kind: "STRUCT",
|
79
|
+
# struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
|
80
|
+
# fields: [
|
81
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
82
|
+
# name: "name",
|
83
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
|
84
|
+
# ),
|
85
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
86
|
+
# name: "val",
|
87
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
|
88
|
+
# )
|
89
|
+
# ]
|
90
|
+
# )
|
91
|
+
# )
|
92
|
+
# )
|
93
|
+
# )
|
94
|
+
# ]
|
95
|
+
# end
|
96
|
+
#
|
25
97
|
class Field
|
26
98
|
##
|
27
|
-
#
|
28
|
-
|
29
|
-
|
99
|
+
# Creates a new, immutable StandardSql::Field object.
|
100
|
+
#
|
101
|
+
# @overload initialize(name, type)
|
102
|
+
# @param [String] name The name of the field. Optional. Can be absent for struct fields.
|
103
|
+
# @param [StandardSql::DataType, String] type The type of the field. Optional. Absent if not explicitly
|
104
|
+
# specified (e.g., `CREATE FUNCTION` statement can omit the return type; in this case the output parameter
|
105
|
+
# does not have this "type" field).
|
106
|
+
#
|
107
|
+
def initialize **kwargs
|
108
|
+
# Convert client object kwargs to a gapi object
|
109
|
+
kwargs[:type] = DataType.gapi_from_string_or_data_type kwargs[:type] if kwargs[:type]
|
110
|
+
@gapi = Google::Apis::BigqueryV2::StandardSqlField.new(**kwargs)
|
30
111
|
end
|
31
112
|
|
32
113
|
##
|
33
|
-
# The name of the field.
|
114
|
+
# The name of the field. Optional. Can be absent for struct fields.
|
34
115
|
#
|
35
116
|
# @return [String, nil]
|
36
117
|
#
|
37
118
|
def name
|
38
|
-
return
|
39
|
-
|
40
|
-
@gapi_json[:name]
|
119
|
+
return if @gapi.name == "".freeze
|
120
|
+
@gapi.name
|
41
121
|
end
|
42
122
|
|
43
123
|
##
|
44
|
-
# The type of the field.
|
124
|
+
# The type of the field. Optional. Absent if not explicitly specified (e.g., `CREATE FUNCTION` statement can
|
125
|
+
# omit the return type; in this case the output parameter does not have this "type" field).
|
45
126
|
#
|
46
|
-
# @return [DataType]
|
127
|
+
# @return [DataType, nil] The type of the field.
|
47
128
|
#
|
48
129
|
def type
|
49
|
-
DataType.
|
130
|
+
DataType.from_gapi @gapi.type if @gapi.type
|
131
|
+
end
|
132
|
+
|
133
|
+
##
|
134
|
+
# @private New Google::Apis::BigqueryV2::StandardSqlField object.
|
135
|
+
def to_gapi
|
136
|
+
@gapi
|
50
137
|
end
|
51
138
|
|
52
139
|
##
|
53
|
-
# @private New StandardSql::Field from a
|
54
|
-
def self.
|
140
|
+
# @private New StandardSql::Field from a Google::Apis::BigqueryV2::StandardSqlField object.
|
141
|
+
def self.from_gapi gapi
|
55
142
|
new.tap do |f|
|
56
|
-
f.instance_variable_set :@
|
143
|
+
f.instance_variable_set :@gapi, gapi
|
57
144
|
end
|
58
145
|
end
|
59
146
|
end
|
60
147
|
|
61
148
|
##
|
62
|
-
# The type of a
|
149
|
+
# The type of a variable, e.g., a function argument. See {Routine} and {Argument}.
|
150
|
+
#
|
151
|
+
# @example
|
152
|
+
# require "google/cloud/bigquery"
|
153
|
+
#
|
154
|
+
# bigquery = Google::Cloud::Bigquery.new
|
155
|
+
# dataset = bigquery.dataset "my_dataset"
|
156
|
+
# routine = dataset.create_routine "my_routine" do |r|
|
157
|
+
# r.routine_type = "SCALAR_FUNCTION"
|
158
|
+
# r.language = :SQL
|
159
|
+
# r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
|
160
|
+
# r.arguments = [
|
161
|
+
# Google::Cloud::Bigquery::Argument.new(
|
162
|
+
# name: "arr",
|
163
|
+
# argument_kind: "FIXED_TYPE",
|
164
|
+
# data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
165
|
+
# type_kind: "ARRAY",
|
166
|
+
# array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
167
|
+
# type_kind: "STRUCT",
|
168
|
+
# struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
|
169
|
+
# fields: [
|
170
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
171
|
+
# name: "name",
|
172
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
|
173
|
+
# ),
|
174
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
175
|
+
# name: "val",
|
176
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
|
177
|
+
# )
|
178
|
+
# ]
|
179
|
+
# )
|
180
|
+
# )
|
181
|
+
# )
|
182
|
+
# )
|
183
|
+
# ]
|
184
|
+
# end
|
185
|
+
#
|
186
|
+
# @see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types Standard SQL Data Types
|
187
|
+
#
|
63
188
|
class DataType
|
64
189
|
##
|
65
|
-
#
|
66
|
-
|
67
|
-
|
190
|
+
# Creates a new, immutable StandardSql::DataType object.
|
191
|
+
#
|
192
|
+
# @overload initialize(type_kind, array_element_type, struct_type)
|
193
|
+
# @param [String] type_kind The top level type of this field. Required. Can be [any standard SQL data
|
194
|
+
# type](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) (e.g., `INT64`, `DATE`,
|
195
|
+
# `ARRAY`).
|
196
|
+
# @param [DataType, String] array_element_type The type of the array's elements, if {#type_kind} is `ARRAY`.
|
197
|
+
# See {#array?}. Optional.
|
198
|
+
# @param [StructType] struct_type The fields of the struct, in order, if {#type_kind} is `STRUCT`. See
|
199
|
+
# {#struct?}. Optional.
|
200
|
+
#
|
201
|
+
def initialize **kwargs
|
202
|
+
# Convert client object kwargs to a gapi object
|
203
|
+
if kwargs[:array_element_type]
|
204
|
+
kwargs[:array_element_type] = self.class.gapi_from_string_or_data_type kwargs[:array_element_type]
|
205
|
+
end
|
206
|
+
kwargs[:struct_type] = kwargs[:struct_type].to_gapi if kwargs[:struct_type]
|
207
|
+
|
208
|
+
@gapi = Google::Apis::BigqueryV2::StandardSqlDataType.new(**kwargs)
|
68
209
|
end
|
69
210
|
|
70
211
|
##
|
71
|
-
# The top level type of this field.
|
212
|
+
# The top level type of this field. Required. Can be any standard SQL data type (e.g., `INT64`, `DATE`,
|
213
|
+
# `ARRAY`).
|
72
214
|
#
|
73
|
-
#
|
215
|
+
# @see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types Standard SQL Data Types
|
74
216
|
#
|
75
|
-
# @
|
76
|
-
# Standard SQL Data Types
|
77
|
-
#
|
78
|
-
# @return [String]
|
217
|
+
# @return [String] The upper case type.
|
79
218
|
#
|
80
219
|
def type_kind
|
81
|
-
@
|
220
|
+
@gapi.type_kind
|
82
221
|
end
|
83
222
|
|
84
223
|
##
|
85
|
-
# The type of
|
224
|
+
# The type of the array's elements, if {#type_kind} is `ARRAY`. See {#array?}. Optional.
|
86
225
|
#
|
87
226
|
# @return [DataType, nil]
|
88
227
|
#
|
89
228
|
def array_element_type
|
90
|
-
return if @
|
91
|
-
|
92
|
-
DataType.from_gapi_json @gapi_json[:arrayElementType]
|
229
|
+
return if @gapi.array_element_type.nil?
|
230
|
+
DataType.from_gapi @gapi.array_element_type
|
93
231
|
end
|
94
232
|
|
95
233
|
##
|
96
|
-
# The fields of the struct
|
234
|
+
# The fields of the struct, in order, if {#type_kind} is `STRUCT`. See {#struct?}. Optional.
|
97
235
|
#
|
98
236
|
# @return [StructType, nil]
|
99
237
|
#
|
100
238
|
def struct_type
|
101
|
-
return if @
|
102
|
-
|
103
|
-
StructType.from_gapi_json @gapi_json[:structType]
|
239
|
+
return if @gapi.struct_type.nil?
|
240
|
+
StructType.from_gapi @gapi.struct_type
|
104
241
|
end
|
105
242
|
|
106
243
|
##
|
@@ -136,6 +273,17 @@ module Google
|
|
136
273
|
type_kind == "NUMERIC".freeze
|
137
274
|
end
|
138
275
|
|
276
|
+
##
|
277
|
+
# Checks if the {#type_kind} of the field is `BIGNUMERIC`.
|
278
|
+
#
|
279
|
+
# @return [Boolean] `true` when `BIGNUMERIC`, `false` otherwise.
|
280
|
+
#
|
281
|
+
# @!group Helpers
|
282
|
+
#
|
283
|
+
def bignumeric?
|
284
|
+
type_kind == "BIGNUMERIC".freeze
|
285
|
+
end
|
286
|
+
|
139
287
|
##
|
140
288
|
# Checks if the {#type_kind} of the field is `BOOL`.
|
141
289
|
#
|
@@ -247,41 +395,109 @@ module Google
|
|
247
395
|
end
|
248
396
|
|
249
397
|
##
|
250
|
-
# @private New
|
251
|
-
def
|
252
|
-
|
253
|
-
|
398
|
+
# @private New Google::Apis::BigqueryV2::StandardSqlDataType object.
|
399
|
+
def to_gapi
|
400
|
+
@gapi
|
401
|
+
end
|
402
|
+
|
403
|
+
##
|
404
|
+
# @private New StandardSql::DataType from a Google::Apis::BigqueryV2::StandardSqlDataType object.
|
405
|
+
def self.from_gapi gapi
|
406
|
+
new.tap do |f|
|
407
|
+
f.instance_variable_set :@gapi, gapi
|
408
|
+
end
|
409
|
+
end
|
410
|
+
|
411
|
+
##
|
412
|
+
# @private New Google::Apis::BigqueryV2::StandardSqlDataType from a String or StandardSql::DataType object.
|
413
|
+
def self.gapi_from_string_or_data_type data_type
|
414
|
+
return if data_type.nil?
|
415
|
+
case data_type
|
416
|
+
when StandardSql::DataType
|
417
|
+
data_type.to_gapi
|
418
|
+
when Hash
|
419
|
+
data_type
|
420
|
+
when String, Symbol
|
421
|
+
Google::Apis::BigqueryV2::StandardSqlDataType.new type_kind: data_type.to_s.upcase
|
422
|
+
else
|
423
|
+
raise ArgumentError, "Unable to convert #{data_type} to Google::Apis::BigqueryV2::StandardSqlDataType"
|
254
424
|
end
|
255
425
|
end
|
256
426
|
end
|
257
427
|
|
258
428
|
##
|
259
|
-
# The
|
429
|
+
# The fields of a `STRUCT` type. See {DataType#struct_type}. See {Routine} and {Argument}.
|
430
|
+
#
|
431
|
+
# @example
|
432
|
+
# require "google/cloud/bigquery"
|
433
|
+
#
|
434
|
+
# bigquery = Google::Cloud::Bigquery.new
|
435
|
+
# dataset = bigquery.dataset "my_dataset"
|
436
|
+
# routine = dataset.create_routine "my_routine" do |r|
|
437
|
+
# r.routine_type = "SCALAR_FUNCTION"
|
438
|
+
# r.language = :SQL
|
439
|
+
# r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
|
440
|
+
# r.arguments = [
|
441
|
+
# Google::Cloud::Bigquery::Argument.new(
|
442
|
+
# name: "arr",
|
443
|
+
# argument_kind: "FIXED_TYPE",
|
444
|
+
# data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
445
|
+
# type_kind: "ARRAY",
|
446
|
+
# array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
447
|
+
# type_kind: "STRUCT",
|
448
|
+
# struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
|
449
|
+
# fields: [
|
450
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
451
|
+
# name: "name",
|
452
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
|
453
|
+
# ),
|
454
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
455
|
+
# name: "val",
|
456
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
|
457
|
+
# )
|
458
|
+
# ]
|
459
|
+
# )
|
460
|
+
# )
|
461
|
+
# )
|
462
|
+
# )
|
463
|
+
# ]
|
464
|
+
# end
|
465
|
+
#
|
260
466
|
class StructType
|
261
467
|
##
|
262
|
-
#
|
263
|
-
|
264
|
-
|
468
|
+
# Creates a new, immutable StandardSql::StructType object.
|
469
|
+
#
|
470
|
+
# @overload initialize(fields)
|
471
|
+
# @param [Array<Field>] fields The fields of the struct. Required.
|
472
|
+
#
|
473
|
+
def initialize **kwargs
|
474
|
+
# Convert each field client object to gapi object, if fields given (self.from_gapi does not pass kwargs)
|
475
|
+
kwargs[:fields] = kwargs[:fields]&.map(&:to_gapi) if kwargs[:fields]
|
476
|
+
@gapi = Google::Apis::BigqueryV2::StandardSqlStructType.new(**kwargs)
|
265
477
|
end
|
266
478
|
|
267
479
|
##
|
268
|
-
# The
|
480
|
+
# The fields of the struct.
|
269
481
|
#
|
270
|
-
#
|
271
|
-
#
|
272
|
-
# @return [Array<Field>]
|
482
|
+
# @return [Array<Field>] A frozen array of fields.
|
273
483
|
#
|
274
484
|
def fields
|
275
|
-
Array(@
|
276
|
-
Field.
|
277
|
-
end
|
485
|
+
Array(@gapi.fields).map do |field_gapi|
|
486
|
+
Field.from_gapi field_gapi
|
487
|
+
end.freeze
|
278
488
|
end
|
279
489
|
|
280
490
|
##
|
281
|
-
# @private New
|
282
|
-
def
|
283
|
-
|
284
|
-
|
491
|
+
# @private New Google::Apis::BigqueryV2::StandardSqlStructType object.
|
492
|
+
def to_gapi
|
493
|
+
@gapi
|
494
|
+
end
|
495
|
+
|
496
|
+
##
|
497
|
+
# @private New StandardSql::StructType from a Google::Apis::BigqueryV2::StandardSqlStructType object.
|
498
|
+
def self.from_gapi gapi
|
499
|
+
new.tap do |f|
|
500
|
+
f.instance_variable_set :@gapi, gapi
|
285
501
|
end
|
286
502
|
end
|
287
503
|
end
|
@@ -64,16 +64,21 @@ module Google
|
|
64
64
|
class AsyncInserter
|
65
65
|
include MonitorMixin
|
66
66
|
|
67
|
-
attr_reader :max_bytes
|
67
|
+
attr_reader :max_bytes
|
68
|
+
attr_reader :max_rows
|
69
|
+
attr_reader :interval
|
70
|
+
attr_reader :threads
|
68
71
|
##
|
69
72
|
# @private Implementation accessors
|
70
73
|
attr_reader :table, :batch
|
71
74
|
|
72
75
|
##
|
73
76
|
# @private
|
74
|
-
def initialize table, skip_invalid: nil, ignore_unknown: nil,
|
75
|
-
|
76
|
-
|
77
|
+
def initialize table, skip_invalid: nil, ignore_unknown: nil, max_bytes: 10_000_000, max_rows: 500,
|
78
|
+
interval: 10, threads: 4, &block
|
79
|
+
# init MonitorMixin
|
80
|
+
super()
|
81
|
+
|
77
82
|
@table = table
|
78
83
|
@skip_invalid = skip_invalid
|
79
84
|
@ignore_unknown = ignore_unknown
|
@@ -86,13 +91,9 @@ module Google
|
|
86
91
|
|
87
92
|
@batch = nil
|
88
93
|
|
89
|
-
@thread_pool = Concurrent::ThreadPoolExecutor.new
|
90
|
-
max_threads: @threads
|
94
|
+
@thread_pool = Concurrent::ThreadPoolExecutor.new max_threads: @threads
|
91
95
|
|
92
96
|
@cond = new_cond
|
93
|
-
|
94
|
-
# init MonitorMixin
|
95
|
-
super()
|
96
97
|
end
|
97
98
|
|
98
99
|
##
|
@@ -100,14 +101,47 @@ module Google
|
|
100
101
|
# collected in batches and inserted together.
|
101
102
|
# See {Google::Cloud::Bigquery::Table#insert_async}.
|
102
103
|
#
|
103
|
-
#
|
104
|
-
#
|
105
|
-
#
|
106
|
-
#
|
107
|
-
#
|
108
|
-
#
|
109
|
-
#
|
110
|
-
#
|
104
|
+
# Simple Ruby types are generally accepted per JSON rules, along with the following support for BigQuery's
|
105
|
+
# more complex types:
|
106
|
+
#
|
107
|
+
# | BigQuery | Ruby | Notes |
|
108
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
109
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
110
|
+
# | `BIGNUMERIC` | `String` | Pass as `String` to avoid rounding to scale 9. |
|
111
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
112
|
+
# | `DATE` | `Date` | |
|
113
|
+
# | `GEOGRAPHY` | `String` | |
|
114
|
+
# | `TIMESTAMP` | `Time` | |
|
115
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
116
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
117
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
118
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
119
|
+
#
|
120
|
+
# Because BigQuery's streaming API is designed for high insertion
|
121
|
+
# rates, modifications to the underlying table metadata are eventually
|
122
|
+
# consistent when interacting with the streaming system. In most cases
|
123
|
+
# metadata changes are propagated within minutes, but during this
|
124
|
+
# period API responses may reflect the inconsistent state of the
|
125
|
+
# table.
|
126
|
+
#
|
127
|
+
# @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
|
128
|
+
# Streaming Data Into BigQuery
|
129
|
+
#
|
130
|
+
# @see https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts
|
131
|
+
# BigQuery Troubleshooting: Metadata errors for streaming inserts
|
132
|
+
#
|
133
|
+
# @param [Hash, Array<Hash>] rows A hash object or array of hash objects
|
134
|
+
# containing the data. Required. `BigDecimal` values will be rounded to
|
135
|
+
# scale 9 to conform with the BigQuery `NUMERIC` data type. To avoid
|
136
|
+
# rounding `BIGNUMERIC` type values with scale greater than 9, use `String`
|
137
|
+
# instead of `BigDecimal`.
|
138
|
+
# @param [Array<String|Symbol>, Symbol] insert_ids A unique ID for each row. BigQuery uses this property to
|
139
|
+
# detect duplicate insertion requests on a best-effort basis. For more information, see [data
|
140
|
+
# consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency). Optional. If
|
141
|
+
# not provided, the client library will assign a UUID to each row before the request is sent.
|
142
|
+
#
|
143
|
+
# The value `:skip` can be provided to skip the generation of IDs for all rows, or to skip the generation of
|
144
|
+
# an ID for a specific row in the array.
|
111
145
|
#
|
112
146
|
def insert rows, insert_ids: nil
|
113
147
|
return nil if rows.nil?
|
@@ -123,8 +157,7 @@ module Google
|
|
123
157
|
unless @batch.try_insert row, insert_id
|
124
158
|
push_batch_request!
|
125
159
|
|
126
|
-
@batch = Batch.new max_bytes: @max_bytes,
|
127
|
-
max_rows: @max_rows
|
160
|
+
@batch = Batch.new max_bytes: @max_bytes, max_rows: @max_rows
|
128
161
|
@batch.insert row, insert_id
|
129
162
|
end
|
130
163
|
end
|
@@ -214,10 +247,14 @@ module Google
|
|
214
247
|
|
215
248
|
def validate_insert_args rows, insert_ids
|
216
249
|
rows = [rows] if rows.is_a? Hash
|
250
|
+
raise ArgumentError, "No rows provided" if rows.empty?
|
251
|
+
|
252
|
+
insert_ids = Array.new(rows.count) { :skip } if insert_ids == :skip
|
217
253
|
insert_ids = Array insert_ids
|
218
|
-
if insert_ids.count
|
254
|
+
if insert_ids.count.positive? && insert_ids.count != rows.count
|
219
255
|
raise ArgumentError, "insert_ids must be the same size as rows"
|
220
256
|
end
|
257
|
+
|
221
258
|
[rows, insert_ids]
|
222
259
|
end
|
223
260
|
|
@@ -250,22 +287,19 @@ module Google
|
|
250
287
|
json_rows = @batch.json_rows
|
251
288
|
insert_ids = @batch.insert_ids
|
252
289
|
Concurrent::Future.new executor: @thread_pool do
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
ensure
|
267
|
-
@callback.call result if @callback
|
268
|
-
end
|
290
|
+
raise ArgumentError, "No rows provided" if json_rows.empty?
|
291
|
+
insert_resp = @table.service.insert_tabledata_json_rows @table.dataset_id,
|
292
|
+
@table.table_id,
|
293
|
+
json_rows,
|
294
|
+
skip_invalid: @skip_invalid,
|
295
|
+
ignore_unknown: @ignore_unknown,
|
296
|
+
insert_ids: insert_ids
|
297
|
+
|
298
|
+
result = Result.new InsertResponse.from_gapi(orig_rows, insert_resp)
|
299
|
+
rescue StandardError => e
|
300
|
+
result = Result.new nil, e
|
301
|
+
ensure
|
302
|
+
@callback&.call result
|
269
303
|
end.execute
|
270
304
|
|
271
305
|
@batch = nil
|
@@ -275,9 +309,13 @@ module Google
|
|
275
309
|
##
|
276
310
|
# @private
|
277
311
|
class Batch
|
278
|
-
attr_reader :max_bytes
|
312
|
+
attr_reader :max_bytes
|
313
|
+
attr_reader :max_rows
|
314
|
+
attr_reader :rows
|
315
|
+
attr_reader :json_rows
|
316
|
+
attr_reader :insert_ids
|
279
317
|
|
280
|
-
def initialize max_bytes:
|
318
|
+
def initialize max_bytes: 10_000_000, max_rows: 500
|
281
319
|
@max_bytes = max_bytes
|
282
320
|
@max_rows = max_rows
|
283
321
|
@rows = []
|
@@ -293,8 +331,7 @@ module Google
|
|
293
331
|
insert_id ||= SecureRandom.uuid
|
294
332
|
json_row = to_json_row row
|
295
333
|
|
296
|
-
insert_rows_bytes
|
297
|
-
row, json_row, insert_id, addl_bytes_for(json_row, insert_id)
|
334
|
+
insert_rows_bytes row, json_row, insert_id, addl_bytes_for(json_row, insert_id)
|
298
335
|
end
|
299
336
|
|
300
337
|
def try_insert row, insert_id
|
@@ -327,8 +364,13 @@ module Google
|
|
327
364
|
end
|
328
365
|
|
329
366
|
def addl_bytes_for json_row, insert_id
|
330
|
-
|
331
|
-
|
367
|
+
if insert_id == :skip
|
368
|
+
# "{\"json\":},".bytesize #=> 10
|
369
|
+
10 + json_row.to_json.bytesize
|
370
|
+
else
|
371
|
+
# "{\"insertId\":\"\",\"json\":},".bytesize #=> 24
|
372
|
+
24 + json_row.to_json.bytesize + insert_id.bytesize
|
373
|
+
end
|
332
374
|
end
|
333
375
|
end
|
334
376
|
|
@@ -380,7 +422,8 @@ module Google
|
|
380
422
|
@error = error
|
381
423
|
end
|
382
424
|
|
383
|
-
attr_reader :insert_response
|
425
|
+
attr_reader :insert_response
|
426
|
+
attr_reader :error
|
384
427
|
|
385
428
|
##
|
386
429
|
# Checks if an error is present, meaning that the insert operation
|
@@ -78,8 +78,7 @@ module Google
|
|
78
78
|
def next
|
79
79
|
return nil unless next?
|
80
80
|
ensure_service!
|
81
|
-
|
82
|
-
gapi = @service.list_tables @dataset_id, options
|
81
|
+
gapi = @service.list_tables @dataset_id, token: token, max: @max
|
83
82
|
self.class.from_gapi gapi, @service, @dataset_id, @max
|
84
83
|
end
|
85
84
|
|
@@ -131,17 +130,15 @@ module Google
|
|
131
130
|
# puts table.name
|
132
131
|
# end
|
133
132
|
#
|
134
|
-
def all request_limit: nil
|
133
|
+
def all request_limit: nil, &block
|
135
134
|
request_limit = request_limit.to_i if request_limit
|
136
|
-
unless block_given?
|
137
|
-
return enum_for :all, request_limit: request_limit
|
138
|
-
end
|
135
|
+
return enum_for :all, request_limit: request_limit unless block_given?
|
139
136
|
results = self
|
140
137
|
loop do
|
141
|
-
results.each
|
138
|
+
results.each(&block)
|
142
139
|
if request_limit
|
143
140
|
request_limit -= 1
|
144
|
-
break if request_limit
|
141
|
+
break if request_limit.negative?
|
145
142
|
end
|
146
143
|
break unless results.next?
|
147
144
|
results = results.next
|
@@ -151,9 +148,7 @@ module Google
|
|
151
148
|
##
|
152
149
|
# @private New Table::List from a response object.
|
153
150
|
def self.from_gapi gapi_list, service, dataset_id = nil, max = nil
|
154
|
-
tables = List.new(Array(gapi_list.tables).map
|
155
|
-
Table.from_gapi gapi_object, service
|
156
|
-
end)
|
151
|
+
tables = List.new(Array(gapi_list.tables).map { |gapi_object| Table.from_gapi gapi_object, service })
|
157
152
|
tables.instance_variable_set :@token, gapi_list.next_page_token
|
158
153
|
tables.instance_variable_set :@etag, gapi_list.etag
|
159
154
|
tables.instance_variable_set :@total, gapi_list.total_items
|