google-cloud-bigquery 1.12.0 → 1.38.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +9 -28
  3. data/CHANGELOG.md +372 -1
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +2 -2
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +154 -170
  10. data/lib/google/cloud/bigquery/copy_job.rb +40 -23
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +322 -51
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset.rb +960 -279
  16. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  17. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  20. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  21. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  22. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  23. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  24. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  25. data/lib/google/cloud/bigquery/external.rb +50 -2256
  26. data/lib/google/cloud/bigquery/extract_job.rb +217 -58
  27. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  28. data/lib/google/cloud/bigquery/job/list.rb +13 -20
  29. data/lib/google/cloud/bigquery/job.rb +286 -11
  30. data/lib/google/cloud/bigquery/load_job.rb +801 -133
  31. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  32. data/lib/google/cloud/bigquery/model.rb +247 -16
  33. data/lib/google/cloud/bigquery/policy.rb +432 -0
  34. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  35. data/lib/google/cloud/bigquery/project.rb +526 -243
  36. data/lib/google/cloud/bigquery/query_job.rb +584 -125
  37. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  38. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  39. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  40. data/lib/google/cloud/bigquery/schema.rb +221 -48
  41. data/lib/google/cloud/bigquery/service.rb +186 -109
  42. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  43. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -42
  44. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  45. data/lib/google/cloud/bigquery/table.rb +1188 -326
  46. data/lib/google/cloud/bigquery/time.rb +6 -0
  47. data/lib/google/cloud/bigquery/version.rb +1 -1
  48. data/lib/google/cloud/bigquery.rb +18 -8
  49. data/lib/google-cloud-bigquery.rb +15 -13
  50. metadata +67 -40
@@ -18,89 +18,226 @@ module Google
18
18
  module Bigquery
19
19
  ##
20
20
  # BigQuery standard SQL is compliant with the SQL 2011 standard and has
21
- # extensions that support querying nested and repeated data.
21
+ # extensions that support querying nested and repeated data. See {Routine} and {Argument}.
22
+ #
23
+ # @example
24
+ # require "google/cloud/bigquery"
25
+ #
26
+ # bigquery = Google::Cloud::Bigquery.new
27
+ # dataset = bigquery.dataset "my_dataset"
28
+ # routine = dataset.create_routine "my_routine" do |r|
29
+ # r.routine_type = "SCALAR_FUNCTION"
30
+ # r.language = :SQL
31
+ # r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
32
+ # r.arguments = [
33
+ # Google::Cloud::Bigquery::Argument.new(
34
+ # name: "arr",
35
+ # argument_kind: "FIXED_TYPE",
36
+ # data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
37
+ # type_kind: "ARRAY",
38
+ # array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
39
+ # type_kind: "STRUCT",
40
+ # struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
41
+ # fields: [
42
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
43
+ # name: "name",
44
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
45
+ # ),
46
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
47
+ # name: "val",
48
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
49
+ # )
50
+ # ]
51
+ # )
52
+ # )
53
+ # )
54
+ # )
55
+ # ]
56
+ # end
57
+ #
22
58
  module StandardSql
23
59
  ##
24
- # A field or a column.
60
+ # A field or a column. See {Routine} and {Argument}.
61
+ #
62
+ # @example
63
+ # require "google/cloud/bigquery"
64
+ #
65
+ # bigquery = Google::Cloud::Bigquery.new
66
+ # dataset = bigquery.dataset "my_dataset"
67
+ # routine = dataset.create_routine "my_routine" do |r|
68
+ # r.routine_type = "SCALAR_FUNCTION"
69
+ # r.language = :SQL
70
+ # r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
71
+ # r.arguments = [
72
+ # Google::Cloud::Bigquery::Argument.new(
73
+ # name: "arr",
74
+ # argument_kind: "FIXED_TYPE",
75
+ # data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
76
+ # type_kind: "ARRAY",
77
+ # array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
78
+ # type_kind: "STRUCT",
79
+ # struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
80
+ # fields: [
81
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
82
+ # name: "name",
83
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
84
+ # ),
85
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
86
+ # name: "val",
87
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
88
+ # )
89
+ # ]
90
+ # )
91
+ # )
92
+ # )
93
+ # )
94
+ # ]
95
+ # end
96
+ #
25
97
  class Field
26
98
  ##
27
- # @private Create an empty StandardSql::Field object.
28
- def initialize
29
- @gapi_json = nil
99
+ # Creates a new, immutable StandardSql::Field object.
100
+ #
101
+ # @overload initialize(name, type)
102
+ # @param [String] name The name of the field. Optional. Can be absent for struct fields.
103
+ # @param [StandardSql::DataType, String] type The type of the field. Optional. Absent if not explicitly
104
+ # specified (e.g., `CREATE FUNCTION` statement can omit the return type; in this case the output parameter
105
+ # does not have this "type" field).
106
+ #
107
+ def initialize **kwargs
108
+ # Convert client object kwargs to a gapi object
109
+ kwargs[:type] = DataType.gapi_from_string_or_data_type kwargs[:type] if kwargs[:type]
110
+ @gapi = Google::Apis::BigqueryV2::StandardSqlField.new(**kwargs)
30
111
  end
31
112
 
32
113
  ##
33
- # The name of the field. (Can be absent for struct fields.)
114
+ # The name of the field. Optional. Can be absent for struct fields.
34
115
  #
35
116
  # @return [String, nil]
36
117
  #
37
118
  def name
38
- return nil if @gapi_json[:name] == "".freeze
39
-
40
- @gapi_json[:name]
119
+ return if @gapi.name == "".freeze
120
+ @gapi.name
41
121
  end
42
122
 
43
123
  ##
44
- # The type of the field.
124
+ # The type of the field. Optional. Absent if not explicitly specified (e.g., `CREATE FUNCTION` statement can
125
+ # omit the return type; in this case the output parameter does not have this "type" field).
45
126
  #
46
- # @return [DataType]
127
+ # @return [DataType, nil] The type of the field.
47
128
  #
48
129
  def type
49
- DataType.from_gapi_json @gapi_json[:type]
130
+ DataType.from_gapi @gapi.type if @gapi.type
131
+ end
132
+
133
+ ##
134
+ # @private New Google::Apis::BigqueryV2::StandardSqlField object.
135
+ def to_gapi
136
+ @gapi
50
137
  end
51
138
 
52
139
  ##
53
- # @private New StandardSql::Field from a JSON object.
54
- def self.from_gapi_json gapi_json
140
+ # @private New StandardSql::Field from a Google::Apis::BigqueryV2::StandardSqlField object.
141
+ def self.from_gapi gapi
55
142
  new.tap do |f|
56
- f.instance_variable_set :@gapi_json, gapi_json
143
+ f.instance_variable_set :@gapi, gapi
57
144
  end
58
145
  end
59
146
  end
60
147
 
61
148
  ##
62
- # The type of a field or a column.
149
+ # The type of a variable, e.g., a function argument. See {Routine} and {Argument}.
150
+ #
151
+ # @example
152
+ # require "google/cloud/bigquery"
153
+ #
154
+ # bigquery = Google::Cloud::Bigquery.new
155
+ # dataset = bigquery.dataset "my_dataset"
156
+ # routine = dataset.create_routine "my_routine" do |r|
157
+ # r.routine_type = "SCALAR_FUNCTION"
158
+ # r.language = :SQL
159
+ # r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
160
+ # r.arguments = [
161
+ # Google::Cloud::Bigquery::Argument.new(
162
+ # name: "arr",
163
+ # argument_kind: "FIXED_TYPE",
164
+ # data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
165
+ # type_kind: "ARRAY",
166
+ # array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
167
+ # type_kind: "STRUCT",
168
+ # struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
169
+ # fields: [
170
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
171
+ # name: "name",
172
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
173
+ # ),
174
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
175
+ # name: "val",
176
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
177
+ # )
178
+ # ]
179
+ # )
180
+ # )
181
+ # )
182
+ # )
183
+ # ]
184
+ # end
185
+ #
186
+ # @see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types Standard SQL Data Types
187
+ #
63
188
  class DataType
64
189
  ##
65
- # @private Create an empty StandardSql::DataType object.
66
- def initialize
67
- @gapi_json = nil
190
+ # Creates a new, immutable StandardSql::DataType object.
191
+ #
192
+ # @overload initialize(type_kind, array_element_type, struct_type)
193
+ # @param [String] type_kind The top level type of this field. Required. Can be [any standard SQL data
194
+ # type](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) (e.g., `INT64`, `DATE`,
195
+ # `ARRAY`).
196
+ # @param [DataType, String] array_element_type The type of the array's elements, if {#type_kind} is `ARRAY`.
197
+ # See {#array?}. Optional.
198
+ # @param [StructType] struct_type The fields of the struct, in order, if {#type_kind} is `STRUCT`. See
199
+ # {#struct?}. Optional.
200
+ #
201
+ def initialize **kwargs
202
+ # Convert client object kwargs to a gapi object
203
+ if kwargs[:array_element_type]
204
+ kwargs[:array_element_type] = self.class.gapi_from_string_or_data_type kwargs[:array_element_type]
205
+ end
206
+ kwargs[:struct_type] = kwargs[:struct_type].to_gapi if kwargs[:struct_type]
207
+
208
+ @gapi = Google::Apis::BigqueryV2::StandardSqlDataType.new(**kwargs)
68
209
  end
69
210
 
70
211
  ##
71
- # The top level type of this field.
212
+ # The top level type of this field. Required. Can be any standard SQL data type (e.g., `INT64`, `DATE`,
213
+ # `ARRAY`).
72
214
  #
73
- # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
215
+ # @see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types Standard SQL Data Types
74
216
  #
75
- # @see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
76
- # Standard SQL Data Types
77
- #
78
- # @return [String]
217
+ # @return [String] The upper case type.
79
218
  #
80
219
  def type_kind
81
- @gapi_json[:typeKind]
220
+ @gapi.type_kind
82
221
  end
83
222
 
84
223
  ##
85
- # The type of a fields when DataType is an Array. (See #array?)
224
+ # The type of the array's elements, if {#type_kind} is `ARRAY`. See {#array?}. Optional.
86
225
  #
87
226
  # @return [DataType, nil]
88
227
  #
89
228
  def array_element_type
90
- return if @gapi_json[:arrayElementType].nil?
91
-
92
- DataType.from_gapi_json @gapi_json[:arrayElementType]
229
+ return if @gapi.array_element_type.nil?
230
+ DataType.from_gapi @gapi.array_element_type
93
231
  end
94
232
 
95
233
  ##
96
- # The fields of the struct. (See #struct?)
234
+ # The fields of the struct, in order, if {#type_kind} is `STRUCT`. See {#struct?}. Optional.
97
235
  #
98
236
  # @return [StructType, nil]
99
237
  #
100
238
  def struct_type
101
- return if @gapi_json[:structType].nil?
102
-
103
- StructType.from_gapi_json @gapi_json[:structType]
239
+ return if @gapi.struct_type.nil?
240
+ StructType.from_gapi @gapi.struct_type
104
241
  end
105
242
 
106
243
  ##
@@ -136,6 +273,17 @@ module Google
136
273
  type_kind == "NUMERIC".freeze
137
274
  end
138
275
 
276
+ ##
277
+ # Checks if the {#type_kind} of the field is `BIGNUMERIC`.
278
+ #
279
+ # @return [Boolean] `true` when `BIGNUMERIC`, `false` otherwise.
280
+ #
281
+ # @!group Helpers
282
+ #
283
+ def bignumeric?
284
+ type_kind == "BIGNUMERIC".freeze
285
+ end
286
+
139
287
  ##
140
288
  # Checks if the {#type_kind} of the field is `BOOL`.
141
289
  #
@@ -247,41 +395,109 @@ module Google
247
395
  end
248
396
 
249
397
  ##
250
- # @private New StandardSql::DataType from a JSON object.
251
- def self.from_gapi_json gapi_json
252
- new.tap do |dt|
253
- dt.instance_variable_set :@gapi_json, gapi_json
398
+ # @private New Google::Apis::BigqueryV2::StandardSqlDataType object.
399
+ def to_gapi
400
+ @gapi
401
+ end
402
+
403
+ ##
404
+ # @private New StandardSql::DataType from a Google::Apis::BigqueryV2::StandardSqlDataType object.
405
+ def self.from_gapi gapi
406
+ new.tap do |f|
407
+ f.instance_variable_set :@gapi, gapi
408
+ end
409
+ end
410
+
411
+ ##
412
+ # @private New Google::Apis::BigqueryV2::StandardSqlDataType from a String or StandardSql::DataType object.
413
+ def self.gapi_from_string_or_data_type data_type
414
+ return if data_type.nil?
415
+ case data_type
416
+ when StandardSql::DataType
417
+ data_type.to_gapi
418
+ when Hash
419
+ data_type
420
+ when String, Symbol
421
+ Google::Apis::BigqueryV2::StandardSqlDataType.new type_kind: data_type.to_s.upcase
422
+ else
423
+ raise ArgumentError, "Unable to convert #{data_type} to Google::Apis::BigqueryV2::StandardSqlDataType"
254
424
  end
255
425
  end
256
426
  end
257
427
 
258
428
  ##
259
- # The type of a `STRUCT` field or a column.
429
+ # The fields of a `STRUCT` type. See {DataType#struct_type}. See {Routine} and {Argument}.
430
+ #
431
+ # @example
432
+ # require "google/cloud/bigquery"
433
+ #
434
+ # bigquery = Google::Cloud::Bigquery.new
435
+ # dataset = bigquery.dataset "my_dataset"
436
+ # routine = dataset.create_routine "my_routine" do |r|
437
+ # r.routine_type = "SCALAR_FUNCTION"
438
+ # r.language = :SQL
439
+ # r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
440
+ # r.arguments = [
441
+ # Google::Cloud::Bigquery::Argument.new(
442
+ # name: "arr",
443
+ # argument_kind: "FIXED_TYPE",
444
+ # data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
445
+ # type_kind: "ARRAY",
446
+ # array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
447
+ # type_kind: "STRUCT",
448
+ # struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
449
+ # fields: [
450
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
451
+ # name: "name",
452
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
453
+ # ),
454
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
455
+ # name: "val",
456
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
457
+ # )
458
+ # ]
459
+ # )
460
+ # )
461
+ # )
462
+ # )
463
+ # ]
464
+ # end
465
+ #
260
466
  class StructType
261
467
  ##
262
- # @private Create an empty StandardSql::DataType object.
263
- def initialize
264
- @gapi_json = nil
468
+ # Creates a new, immutable StandardSql::StructType object.
469
+ #
470
+ # @overload initialize(fields)
471
+ # @param [Array<Field>] fields The fields of the struct. Required.
472
+ #
473
+ def initialize **kwargs
474
+ # Convert each field client object to gapi object, if fields given (self.from_gapi does not pass kwargs)
475
+ kwargs[:fields] = kwargs[:fields]&.map(&:to_gapi) if kwargs[:fields]
476
+ @gapi = Google::Apis::BigqueryV2::StandardSqlStructType.new(**kwargs)
265
477
  end
266
478
 
267
479
  ##
268
- # The top level type of this field.
480
+ # The fields of the struct.
269
481
  #
270
- # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
271
- #
272
- # @return [Array<Field>]
482
+ # @return [Array<Field>] A frozen array of fields.
273
483
  #
274
484
  def fields
275
- Array(@gapi_json[:fields]).map do |field_gapi_json|
276
- Field.from_gapi_json field_gapi_json
277
- end
485
+ Array(@gapi.fields).map do |field_gapi|
486
+ Field.from_gapi field_gapi
487
+ end.freeze
278
488
  end
279
489
 
280
490
  ##
281
- # @private New StandardSql::StructType from a JSON object.
282
- def self.from_gapi_json gapi_json
283
- new.tap do |st|
284
- st.instance_variable_set :@gapi_json, gapi_json
491
+ # @private New Google::Apis::BigqueryV2::StandardSqlStructType object.
492
+ def to_gapi
493
+ @gapi
494
+ end
495
+
496
+ ##
497
+ # @private New StandardSql::StructType from a Google::Apis::BigqueryV2::StandardSqlStructType object.
498
+ def self.from_gapi gapi
499
+ new.tap do |f|
500
+ f.instance_variable_set :@gapi, gapi
285
501
  end
286
502
  end
287
503
  end
@@ -64,16 +64,21 @@ module Google
64
64
  class AsyncInserter
65
65
  include MonitorMixin
66
66
 
67
- attr_reader :max_bytes, :max_rows, :interval, :threads
67
+ attr_reader :max_bytes
68
+ attr_reader :max_rows
69
+ attr_reader :interval
70
+ attr_reader :threads
68
71
  ##
69
72
  # @private Implementation accessors
70
73
  attr_reader :table, :batch
71
74
 
72
75
  ##
73
76
  # @private
74
- def initialize table, skip_invalid: nil, ignore_unknown: nil,
75
- max_bytes: 10000000, max_rows: 500, interval: 10,
76
- threads: 4, &block
77
+ def initialize table, skip_invalid: nil, ignore_unknown: nil, max_bytes: 10_000_000, max_rows: 500,
78
+ interval: 10, threads: 4, &block
79
+ # init MonitorMixin
80
+ super()
81
+
77
82
  @table = table
78
83
  @skip_invalid = skip_invalid
79
84
  @ignore_unknown = ignore_unknown
@@ -86,12 +91,9 @@ module Google
86
91
 
87
92
  @batch = nil
88
93
 
89
- @thread_pool = Concurrent::FixedThreadPool.new @threads
94
+ @thread_pool = Concurrent::ThreadPoolExecutor.new max_threads: @threads
90
95
 
91
96
  @cond = new_cond
92
-
93
- # init MonitorMixin
94
- super()
95
97
  end
96
98
 
97
99
  ##
@@ -99,14 +101,47 @@ module Google
99
101
  # collected in batches and inserted together.
100
102
  # See {Google::Cloud::Bigquery::Table#insert_async}.
101
103
  #
102
- # @param [Hash, Array<Hash>] rows A hash object or array of hash
103
- # objects containing the data.
104
- # @param [Array<String>] insert_ids A unique ID for each row. BigQuery
105
- # uses this property to detect duplicate insertion requests on a
106
- # best-effort basis. For more information, see [data
107
- # consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency).
108
- # Optional. If not provided, the client library will assign a UUID
109
- # to each row before the request is sent.
104
+ # Simple Ruby types are generally accepted per JSON rules, along with the following support for BigQuery's
105
+ # more complex types:
106
+ #
107
+ # | BigQuery | Ruby | Notes |
108
+ # |--------------|--------------------------------------|----------------------------------------------------|
109
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
110
+ # | `BIGNUMERIC` | `String` | Pass as `String` to avoid rounding to scale 9. |
111
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
112
+ # | `DATE` | `Date` | |
113
+ # | `GEOGRAPHY` | `String` | |
114
+ # | `TIMESTAMP` | `Time` | |
115
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
116
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
117
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
118
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
119
+ #
120
+ # Because BigQuery's streaming API is designed for high insertion
121
+ # rates, modifications to the underlying table metadata are eventually
122
+ # consistent when interacting with the streaming system. In most cases
123
+ # metadata changes are propagated within minutes, but during this
124
+ # period API responses may reflect the inconsistent state of the
125
+ # table.
126
+ #
127
+ # @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
128
+ # Streaming Data Into BigQuery
129
+ #
130
+ # @see https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts
131
+ # BigQuery Troubleshooting: Metadata errors for streaming inserts
132
+ #
133
+ # @param [Hash, Array<Hash>] rows A hash object or array of hash objects
134
+ # containing the data. Required. `BigDecimal` values will be rounded to
135
+ # scale 9 to conform with the BigQuery `NUMERIC` data type. To avoid
136
+ # rounding `BIGNUMERIC` type values with scale greater than 9, use `String`
137
+ # instead of `BigDecimal`.
138
+ # @param [Array<String|Symbol>, Symbol] insert_ids A unique ID for each row. BigQuery uses this property to
139
+ # detect duplicate insertion requests on a best-effort basis. For more information, see [data
140
+ # consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency). Optional. If
141
+ # not provided, the client library will assign a UUID to each row before the request is sent.
142
+ #
143
+ # The value `:skip` can be provided to skip the generation of IDs for all rows, or to skip the generation of
144
+ # an ID for a specific row in the array.
110
145
  #
111
146
  def insert rows, insert_ids: nil
112
147
  return nil if rows.nil?
@@ -122,8 +157,7 @@ module Google
122
157
  unless @batch.try_insert row, insert_id
123
158
  push_batch_request!
124
159
 
125
- @batch = Batch.new max_bytes: @max_bytes,
126
- max_rows: @max_rows
160
+ @batch = Batch.new max_bytes: @max_bytes, max_rows: @max_rows
127
161
  @batch.insert row, insert_id
128
162
  end
129
163
  end
@@ -213,10 +247,14 @@ module Google
213
247
 
214
248
  def validate_insert_args rows, insert_ids
215
249
  rows = [rows] if rows.is_a? Hash
250
+ raise ArgumentError, "No rows provided" if rows.empty?
251
+
252
+ insert_ids = Array.new(rows.count) { :skip } if insert_ids == :skip
216
253
  insert_ids = Array insert_ids
217
- if insert_ids.count > 0 && insert_ids.count != rows.count
254
+ if insert_ids.count.positive? && insert_ids.count != rows.count
218
255
  raise ArgumentError, "insert_ids must be the same size as rows"
219
256
  end
257
+
220
258
  [rows, insert_ids]
221
259
  end
222
260
 
@@ -249,22 +287,19 @@ module Google
249
287
  json_rows = @batch.json_rows
250
288
  insert_ids = @batch.insert_ids
251
289
  Concurrent::Future.new executor: @thread_pool do
252
- begin
253
- raise ArgumentError, "No rows provided" if json_rows.empty?
254
- options = { skip_invalid: @skip_invalid,
255
- ignore_unknown: @ignore_unknown,
256
- insert_ids: insert_ids }
257
- insert_resp = @table.service.insert_tabledata_json_rows(
258
- @table.dataset_id, @table.table_id, json_rows, options
259
- )
260
- result = Result.new(
261
- InsertResponse.from_gapi(orig_rows, insert_resp)
262
- )
263
- rescue StandardError => e
264
- result = Result.new nil, e
265
- ensure
266
- @callback.call result if @callback
267
- end
290
+ raise ArgumentError, "No rows provided" if json_rows.empty?
291
+ insert_resp = @table.service.insert_tabledata_json_rows @table.dataset_id,
292
+ @table.table_id,
293
+ json_rows,
294
+ skip_invalid: @skip_invalid,
295
+ ignore_unknown: @ignore_unknown,
296
+ insert_ids: insert_ids
297
+
298
+ result = Result.new InsertResponse.from_gapi(orig_rows, insert_resp)
299
+ rescue StandardError => e
300
+ result = Result.new nil, e
301
+ ensure
302
+ @callback&.call result
268
303
  end.execute
269
304
 
270
305
  @batch = nil
@@ -274,9 +309,13 @@ module Google
274
309
  ##
275
310
  # @private
276
311
  class Batch
277
- attr_reader :max_bytes, :max_rows, :rows, :json_rows, :insert_ids
312
+ attr_reader :max_bytes
313
+ attr_reader :max_rows
314
+ attr_reader :rows
315
+ attr_reader :json_rows
316
+ attr_reader :insert_ids
278
317
 
279
- def initialize max_bytes: 10000000, max_rows: 500
318
+ def initialize max_bytes: 10_000_000, max_rows: 500
280
319
  @max_bytes = max_bytes
281
320
  @max_rows = max_rows
282
321
  @rows = []
@@ -292,8 +331,7 @@ module Google
292
331
  insert_id ||= SecureRandom.uuid
293
332
  json_row = to_json_row row
294
333
 
295
- insert_rows_bytes \
296
- row, json_row, insert_id, addl_bytes_for(json_row, insert_id)
334
+ insert_rows_bytes row, json_row, insert_id, addl_bytes_for(json_row, insert_id)
297
335
  end
298
336
 
299
337
  def try_insert row, insert_id
@@ -326,8 +364,13 @@ module Google
326
364
  end
327
365
 
328
366
  def addl_bytes_for json_row, insert_id
329
- # "{\"insertId\":\"\",\"json\":},".bytesize #=> 24
330
- 24 + json_row.to_json.bytesize + insert_id.bytesize
367
+ if insert_id == :skip
368
+ # "{\"json\":},".bytesize #=> 10
369
+ 10 + json_row.to_json.bytesize
370
+ else
371
+ # "{\"insertId\":\"\",\"json\":},".bytesize #=> 24
372
+ 24 + json_row.to_json.bytesize + insert_id.bytesize
373
+ end
331
374
  end
332
375
  end
333
376
 
@@ -379,7 +422,8 @@ module Google
379
422
  @error = error
380
423
  end
381
424
 
382
- attr_reader :insert_response, :error
425
+ attr_reader :insert_response
426
+ attr_reader :error
383
427
 
384
428
  ##
385
429
  # Checks if an error is present, meaning that the insert operation
@@ -78,8 +78,7 @@ module Google
78
78
  def next
79
79
  return nil unless next?
80
80
  ensure_service!
81
- options = { token: token, max: @max }
82
- gapi = @service.list_tables @dataset_id, options
81
+ gapi = @service.list_tables @dataset_id, token: token, max: @max
83
82
  self.class.from_gapi gapi, @service, @dataset_id, @max
84
83
  end
85
84
 
@@ -131,17 +130,15 @@ module Google
131
130
  # puts table.name
132
131
  # end
133
132
  #
134
- def all request_limit: nil
133
+ def all request_limit: nil, &block
135
134
  request_limit = request_limit.to_i if request_limit
136
- unless block_given?
137
- return enum_for :all, request_limit: request_limit
138
- end
135
+ return enum_for :all, request_limit: request_limit unless block_given?
139
136
  results = self
140
137
  loop do
141
- results.each { |r| yield r }
138
+ results.each(&block)
142
139
  if request_limit
143
140
  request_limit -= 1
144
- break if request_limit < 0
141
+ break if request_limit.negative?
145
142
  end
146
143
  break unless results.next?
147
144
  results = results.next
@@ -151,9 +148,7 @@ module Google
151
148
  ##
152
149
  # @private New Table::List from a response object.
153
150
  def self.from_gapi gapi_list, service, dataset_id = nil, max = nil
154
- tables = List.new(Array(gapi_list.tables).map do |gapi_object|
155
- Table.from_gapi gapi_object, service
156
- end)
151
+ tables = List.new(Array(gapi_list.tables).map { |gapi_object| Table.from_gapi gapi_object, service })
157
152
  tables.instance_variable_set :@token, gapi_list.next_page_token
158
153
  tables.instance_variable_set :@etag, gapi_list.etag
159
154
  tables.instance_variable_set :@total, gapi_list.total_items