google-cloud-bigquery 1.14.0 → 1.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +17 -54
  3. data/CHANGELOG.md +377 -0
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +1 -1
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +155 -173
  10. data/lib/google/cloud/bigquery/copy_job.rb +74 -26
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
  16. data/lib/google/cloud/bigquery/dataset.rb +1044 -287
  17. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  20. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  21. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  22. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  23. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  24. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  25. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  26. data/lib/google/cloud/bigquery/external.rb +50 -2256
  27. data/lib/google/cloud/bigquery/extract_job.rb +226 -61
  28. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  29. data/lib/google/cloud/bigquery/job/list.rb +10 -14
  30. data/lib/google/cloud/bigquery/job.rb +289 -14
  31. data/lib/google/cloud/bigquery/load_job.rb +810 -136
  32. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  33. data/lib/google/cloud/bigquery/model.rb +247 -16
  34. data/lib/google/cloud/bigquery/policy.rb +432 -0
  35. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  36. data/lib/google/cloud/bigquery/project.rb +509 -250
  37. data/lib/google/cloud/bigquery/query_job.rb +594 -128
  38. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  39. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  40. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  41. data/lib/google/cloud/bigquery/schema.rb +221 -48
  42. data/lib/google/cloud/bigquery/service.rb +204 -112
  43. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  44. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
  45. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  46. data/lib/google/cloud/bigquery/table.rb +1470 -377
  47. data/lib/google/cloud/bigquery/time.rb +6 -0
  48. data/lib/google/cloud/bigquery/version.rb +1 -1
  49. data/lib/google/cloud/bigquery.rb +4 -6
  50. data/lib/google-cloud-bigquery.rb +14 -13
  51. metadata +66 -38
@@ -18,89 +18,226 @@ module Google
18
18
  module Bigquery
19
19
  ##
20
20
  # BigQuery standard SQL is compliant with the SQL 2011 standard and has
21
- # extensions that support querying nested and repeated data.
21
+ # extensions that support querying nested and repeated data. See {Routine} and {Argument}.
22
+ #
23
+ # @example
24
+ # require "google/cloud/bigquery"
25
+ #
26
+ # bigquery = Google::Cloud::Bigquery.new
27
+ # dataset = bigquery.dataset "my_dataset"
28
+ # routine = dataset.create_routine "my_routine" do |r|
29
+ # r.routine_type = "SCALAR_FUNCTION"
30
+ # r.language = :SQL
31
+ # r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
32
+ # r.arguments = [
33
+ # Google::Cloud::Bigquery::Argument.new(
34
+ # name: "arr",
35
+ # argument_kind: "FIXED_TYPE",
36
+ # data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
37
+ # type_kind: "ARRAY",
38
+ # array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
39
+ # type_kind: "STRUCT",
40
+ # struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
41
+ # fields: [
42
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
43
+ # name: "name",
44
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
45
+ # ),
46
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
47
+ # name: "val",
48
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
49
+ # )
50
+ # ]
51
+ # )
52
+ # )
53
+ # )
54
+ # )
55
+ # ]
56
+ # end
57
+ #
22
58
  module StandardSql
23
59
  ##
24
- # A field or a column.
60
+ # A field or a column. See {Routine} and {Argument}.
61
+ #
62
+ # @example
63
+ # require "google/cloud/bigquery"
64
+ #
65
+ # bigquery = Google::Cloud::Bigquery.new
66
+ # dataset = bigquery.dataset "my_dataset"
67
+ # routine = dataset.create_routine "my_routine" do |r|
68
+ # r.routine_type = "SCALAR_FUNCTION"
69
+ # r.language = :SQL
70
+ # r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
71
+ # r.arguments = [
72
+ # Google::Cloud::Bigquery::Argument.new(
73
+ # name: "arr",
74
+ # argument_kind: "FIXED_TYPE",
75
+ # data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
76
+ # type_kind: "ARRAY",
77
+ # array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
78
+ # type_kind: "STRUCT",
79
+ # struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
80
+ # fields: [
81
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
82
+ # name: "name",
83
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
84
+ # ),
85
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
86
+ # name: "val",
87
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
88
+ # )
89
+ # ]
90
+ # )
91
+ # )
92
+ # )
93
+ # )
94
+ # ]
95
+ # end
96
+ #
25
97
  class Field
26
98
  ##
27
- # @private Create an empty StandardSql::Field object.
28
- def initialize
29
- @gapi_json = nil
99
+ # Creates a new, immutable StandardSql::Field object.
100
+ #
101
+ # @overload initialize(name, type)
102
+ # @param [String] name The name of the field. Optional. Can be absent for struct fields.
103
+ # @param [StandardSql::DataType, String] type The type of the field. Optional. Absent if not explicitly
104
+ # specified (e.g., `CREATE FUNCTION` statement can omit the return type; in this case the output parameter
105
+ # does not have this "type" field).
106
+ #
107
+ def initialize **kwargs
108
+ # Convert client object kwargs to a gapi object
109
+ kwargs[:type] = DataType.gapi_from_string_or_data_type kwargs[:type] if kwargs[:type]
110
+ @gapi = Google::Apis::BigqueryV2::StandardSqlField.new(**kwargs)
30
111
  end
31
112
 
32
113
  ##
33
- # The name of the field. (Can be absent for struct fields.)
114
+ # The name of the field. Optional. Can be absent for struct fields.
34
115
  #
35
116
  # @return [String, nil]
36
117
  #
37
118
  def name
38
- return nil if @gapi_json[:name] == "".freeze
39
-
40
- @gapi_json[:name]
119
+ return if @gapi.name == "".freeze
120
+ @gapi.name
41
121
  end
42
122
 
43
123
  ##
44
- # The type of the field.
124
+ # The type of the field. Optional. Absent if not explicitly specified (e.g., `CREATE FUNCTION` statement can
125
+ # omit the return type; in this case the output parameter does not have this "type" field).
45
126
  #
46
- # @return [DataType]
127
+ # @return [DataType, nil] The type of the field.
47
128
  #
48
129
  def type
49
- DataType.from_gapi_json @gapi_json[:type]
130
+ DataType.from_gapi @gapi.type if @gapi.type
131
+ end
132
+
133
+ ##
134
+ # @private New Google::Apis::BigqueryV2::StandardSqlField object.
135
+ def to_gapi
136
+ @gapi
50
137
  end
51
138
 
52
139
  ##
53
- # @private New StandardSql::Field from a JSON object.
54
- def self.from_gapi_json gapi_json
140
+ # @private New StandardSql::Field from a Google::Apis::BigqueryV2::StandardSqlField object.
141
+ def self.from_gapi gapi
55
142
  new.tap do |f|
56
- f.instance_variable_set :@gapi_json, gapi_json
143
+ f.instance_variable_set :@gapi, gapi
57
144
  end
58
145
  end
59
146
  end
60
147
 
61
148
  ##
62
- # The type of a field or a column.
149
+ # The type of a variable, e.g., a function argument. See {Routine} and {Argument}.
150
+ #
151
+ # @example
152
+ # require "google/cloud/bigquery"
153
+ #
154
+ # bigquery = Google::Cloud::Bigquery.new
155
+ # dataset = bigquery.dataset "my_dataset"
156
+ # routine = dataset.create_routine "my_routine" do |r|
157
+ # r.routine_type = "SCALAR_FUNCTION"
158
+ # r.language = :SQL
159
+ # r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
160
+ # r.arguments = [
161
+ # Google::Cloud::Bigquery::Argument.new(
162
+ # name: "arr",
163
+ # argument_kind: "FIXED_TYPE",
164
+ # data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
165
+ # type_kind: "ARRAY",
166
+ # array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
167
+ # type_kind: "STRUCT",
168
+ # struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
169
+ # fields: [
170
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
171
+ # name: "name",
172
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
173
+ # ),
174
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
175
+ # name: "val",
176
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
177
+ # )
178
+ # ]
179
+ # )
180
+ # )
181
+ # )
182
+ # )
183
+ # ]
184
+ # end
185
+ #
186
+ # @see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types Standard SQL Data Types
187
+ #
63
188
  class DataType
64
189
  ##
65
- # @private Create an empty StandardSql::DataType object.
66
- def initialize
67
- @gapi_json = nil
190
+ # Creates a new, immutable StandardSql::DataType object.
191
+ #
192
+ # @overload initialize(type_kind, array_element_type, struct_type)
193
+ # @param [String] type_kind The top level type of this field. Required. Can be [any standard SQL data
194
+ # type](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) (e.g., `INT64`, `DATE`,
195
+ # `ARRAY`).
196
+ # @param [DataType, String] array_element_type The type of the array's elements, if {#type_kind} is `ARRAY`.
197
+ # See {#array?}. Optional.
198
+ # @param [StructType] struct_type The fields of the struct, in order, if {#type_kind} is `STRUCT`. See
199
+ # {#struct?}. Optional.
200
+ #
201
+ def initialize **kwargs
202
+ # Convert client object kwargs to a gapi object
203
+ if kwargs[:array_element_type]
204
+ kwargs[:array_element_type] = self.class.gapi_from_string_or_data_type kwargs[:array_element_type]
205
+ end
206
+ kwargs[:struct_type] = kwargs[:struct_type].to_gapi if kwargs[:struct_type]
207
+
208
+ @gapi = Google::Apis::BigqueryV2::StandardSqlDataType.new(**kwargs)
68
209
  end
69
210
 
70
211
  ##
71
- # The top level type of this field.
212
+ # The top level type of this field. Required. Can be any standard SQL data type (e.g., `INT64`, `DATE`,
213
+ # `ARRAY`).
72
214
  #
73
- # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
215
+ # @see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types Standard SQL Data Types
74
216
  #
75
- # @see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
76
- # Standard SQL Data Types
77
- #
78
- # @return [String]
217
+ # @return [String] The upper case type.
79
218
  #
80
219
  def type_kind
81
- @gapi_json[:typeKind]
220
+ @gapi.type_kind
82
221
  end
83
222
 
84
223
  ##
85
- # The type of a fields when DataType is an Array. (See #array?)
224
+ # The type of the array's elements, if {#type_kind} is `ARRAY`. See {#array?}. Optional.
86
225
  #
87
226
  # @return [DataType, nil]
88
227
  #
89
228
  def array_element_type
90
- return if @gapi_json[:arrayElementType].nil?
91
-
92
- DataType.from_gapi_json @gapi_json[:arrayElementType]
229
+ return if @gapi.array_element_type.nil?
230
+ DataType.from_gapi @gapi.array_element_type
93
231
  end
94
232
 
95
233
  ##
96
- # The fields of the struct. (See #struct?)
234
+ # The fields of the struct, in order, if {#type_kind} is `STRUCT`. See {#struct?}. Optional.
97
235
  #
98
236
  # @return [StructType, nil]
99
237
  #
100
238
  def struct_type
101
- return if @gapi_json[:structType].nil?
102
-
103
- StructType.from_gapi_json @gapi_json[:structType]
239
+ return if @gapi.struct_type.nil?
240
+ StructType.from_gapi @gapi.struct_type
104
241
  end
105
242
 
106
243
  ##
@@ -136,6 +273,17 @@ module Google
136
273
  type_kind == "NUMERIC".freeze
137
274
  end
138
275
 
276
+ ##
277
+ # Checks if the {#type_kind} of the field is `BIGNUMERIC`.
278
+ #
279
+ # @return [Boolean] `true` when `BIGNUMERIC`, `false` otherwise.
280
+ #
281
+ # @!group Helpers
282
+ #
283
+ def bignumeric?
284
+ type_kind == "BIGNUMERIC".freeze
285
+ end
286
+
139
287
  ##
140
288
  # Checks if the {#type_kind} of the field is `BOOL`.
141
289
  #
@@ -247,41 +395,109 @@ module Google
247
395
  end
248
396
 
249
397
  ##
250
- # @private New StandardSql::DataType from a JSON object.
251
- def self.from_gapi_json gapi_json
252
- new.tap do |dt|
253
- dt.instance_variable_set :@gapi_json, gapi_json
398
+ # @private New Google::Apis::BigqueryV2::StandardSqlDataType object.
399
+ def to_gapi
400
+ @gapi
401
+ end
402
+
403
+ ##
404
+ # @private New StandardSql::DataType from a Google::Apis::BigqueryV2::StandardSqlDataType object.
405
+ def self.from_gapi gapi
406
+ new.tap do |f|
407
+ f.instance_variable_set :@gapi, gapi
408
+ end
409
+ end
410
+
411
+ ##
412
+ # @private New Google::Apis::BigqueryV2::StandardSqlDataType from a String or StandardSql::DataType object.
413
+ def self.gapi_from_string_or_data_type data_type
414
+ return if data_type.nil?
415
+ case data_type
416
+ when StandardSql::DataType
417
+ data_type.to_gapi
418
+ when Hash
419
+ data_type
420
+ when String, Symbol
421
+ Google::Apis::BigqueryV2::StandardSqlDataType.new type_kind: data_type.to_s.upcase
422
+ else
423
+ raise ArgumentError, "Unable to convert #{data_type} to Google::Apis::BigqueryV2::StandardSqlDataType"
254
424
  end
255
425
  end
256
426
  end
257
427
 
258
428
  ##
259
- # The type of a `STRUCT` field or a column.
429
+ # The fields of a `STRUCT` type. See {DataType#struct_type}. See {Routine} and {Argument}.
430
+ #
431
+ # @example
432
+ # require "google/cloud/bigquery"
433
+ #
434
+ # bigquery = Google::Cloud::Bigquery.new
435
+ # dataset = bigquery.dataset "my_dataset"
436
+ # routine = dataset.create_routine "my_routine" do |r|
437
+ # r.routine_type = "SCALAR_FUNCTION"
438
+ # r.language = :SQL
439
+ # r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
440
+ # r.arguments = [
441
+ # Google::Cloud::Bigquery::Argument.new(
442
+ # name: "arr",
443
+ # argument_kind: "FIXED_TYPE",
444
+ # data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
445
+ # type_kind: "ARRAY",
446
+ # array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
447
+ # type_kind: "STRUCT",
448
+ # struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
449
+ # fields: [
450
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
451
+ # name: "name",
452
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
453
+ # ),
454
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
455
+ # name: "val",
456
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
457
+ # )
458
+ # ]
459
+ # )
460
+ # )
461
+ # )
462
+ # )
463
+ # ]
464
+ # end
465
+ #
260
466
  class StructType
261
467
  ##
262
- # @private Create an empty StandardSql::DataType object.
263
- def initialize
264
- @gapi_json = nil
468
+ # Creates a new, immutable StandardSql::StructType object.
469
+ #
470
+ # @overload initialize(fields)
471
+ # @param [Array<Field>] fields The fields of the struct. Required.
472
+ #
473
+ def initialize **kwargs
474
+ # Convert each field client object to gapi object, if fields given (self.from_gapi does not pass kwargs)
475
+ kwargs[:fields] = kwargs[:fields]&.map(&:to_gapi) if kwargs[:fields]
476
+ @gapi = Google::Apis::BigqueryV2::StandardSqlStructType.new(**kwargs)
265
477
  end
266
478
 
267
479
  ##
268
- # The top level type of this field.
480
+ # The fields of the struct.
269
481
  #
270
- # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
271
- #
272
- # @return [Array<Field>]
482
+ # @return [Array<Field>] A frozen array of fields.
273
483
  #
274
484
  def fields
275
- Array(@gapi_json[:fields]).map do |field_gapi_json|
276
- Field.from_gapi_json field_gapi_json
277
- end
485
+ Array(@gapi.fields).map do |field_gapi|
486
+ Field.from_gapi field_gapi
487
+ end.freeze
278
488
  end
279
489
 
280
490
  ##
281
- # @private New StandardSql::StructType from a JSON object.
282
- def self.from_gapi_json gapi_json
283
- new.tap do |st|
284
- st.instance_variable_set :@gapi_json, gapi_json
491
+ # @private New Google::Apis::BigqueryV2::StandardSqlStructType object.
492
+ def to_gapi
493
+ @gapi
494
+ end
495
+
496
+ ##
497
+ # @private New StandardSql::StructType from a Google::Apis::BigqueryV2::StandardSqlStructType object.
498
+ def self.from_gapi gapi
499
+ new.tap do |f|
500
+ f.instance_variable_set :@gapi, gapi
285
501
  end
286
502
  end
287
503
  end
@@ -64,16 +64,21 @@ module Google
64
64
  class AsyncInserter
65
65
  include MonitorMixin
66
66
 
67
- attr_reader :max_bytes, :max_rows, :interval, :threads
67
+ attr_reader :max_bytes
68
+ attr_reader :max_rows
69
+ attr_reader :interval
70
+ attr_reader :threads
68
71
  ##
69
72
  # @private Implementation accessors
70
73
  attr_reader :table, :batch
71
74
 
72
75
  ##
73
76
  # @private
74
- def initialize table, skip_invalid: nil, ignore_unknown: nil,
75
- max_bytes: 10000000, max_rows: 500, interval: 10,
76
- threads: 4, &block
77
+ def initialize table, skip_invalid: nil, ignore_unknown: nil, max_bytes: 10_000_000, max_rows: 500,
78
+ interval: 10, threads: 4, &block
79
+ # init MonitorMixin
80
+ super()
81
+
77
82
  @table = table
78
83
  @skip_invalid = skip_invalid
79
84
  @ignore_unknown = ignore_unknown
@@ -86,13 +91,9 @@ module Google
86
91
 
87
92
  @batch = nil
88
93
 
89
- @thread_pool = Concurrent::ThreadPoolExecutor.new \
90
- max_threads: @threads
94
+ @thread_pool = Concurrent::ThreadPoolExecutor.new max_threads: @threads
91
95
 
92
96
  @cond = new_cond
93
-
94
- # init MonitorMixin
95
- super()
96
97
  end
97
98
 
98
99
  ##
@@ -100,14 +101,47 @@ module Google
100
101
  # collected in batches and inserted together.
101
102
  # See {Google::Cloud::Bigquery::Table#insert_async}.
102
103
  #
103
- # @param [Hash, Array<Hash>] rows A hash object or array of hash
104
- # objects containing the data.
105
- # @param [Array<String>] insert_ids A unique ID for each row. BigQuery
106
- # uses this property to detect duplicate insertion requests on a
107
- # best-effort basis. For more information, see [data
108
- # consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency).
109
- # Optional. If not provided, the client library will assign a UUID
110
- # to each row before the request is sent.
104
+ # Simple Ruby types are generally accepted per JSON rules, along with the following support for BigQuery's
105
+ # more complex types:
106
+ #
107
+ # | BigQuery | Ruby | Notes |
108
+ # |--------------|--------------------------------------|----------------------------------------------------|
109
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
110
+ # | `BIGNUMERIC` | `String` | Pass as `String` to avoid rounding to scale 9. |
111
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
112
+ # | `DATE` | `Date` | |
113
+ # | `GEOGRAPHY` | `String` | |
114
+ # | `TIMESTAMP` | `Time` | |
115
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
116
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
117
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
118
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
119
+ #
120
+ # Because BigQuery's streaming API is designed for high insertion
121
+ # rates, modifications to the underlying table metadata are eventually
122
+ # consistent when interacting with the streaming system. In most cases
123
+ # metadata changes are propagated within minutes, but during this
124
+ # period API responses may reflect the inconsistent state of the
125
+ # table.
126
+ #
127
+ # @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
128
+ # Streaming Data Into BigQuery
129
+ #
130
+ # @see https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts
131
+ # BigQuery Troubleshooting: Metadata errors for streaming inserts
132
+ #
133
+ # @param [Hash, Array<Hash>] rows A hash object or array of hash objects
134
+ # containing the data. Required. `BigDecimal` values will be rounded to
135
+ # scale 9 to conform with the BigQuery `NUMERIC` data type. To avoid
136
+ # rounding `BIGNUMERIC` type values with scale greater than 9, use `String`
137
+ # instead of `BigDecimal`.
138
+ # @param [Array<String|Symbol>, Symbol] insert_ids A unique ID for each row. BigQuery uses this property to
139
+ # detect duplicate insertion requests on a best-effort basis. For more information, see [data
140
+ # consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency). Optional. If
141
+ # not provided, the client library will assign a UUID to each row before the request is sent.
142
+ #
143
+ # The value `:skip` can be provided to skip the generation of IDs for all rows, or to skip the generation of
144
+ # an ID for a specific row in the array.
111
145
  #
112
146
  def insert rows, insert_ids: nil
113
147
  return nil if rows.nil?
@@ -123,8 +157,7 @@ module Google
123
157
  unless @batch.try_insert row, insert_id
124
158
  push_batch_request!
125
159
 
126
- @batch = Batch.new max_bytes: @max_bytes,
127
- max_rows: @max_rows
160
+ @batch = Batch.new max_bytes: @max_bytes, max_rows: @max_rows
128
161
  @batch.insert row, insert_id
129
162
  end
130
163
  end
@@ -214,10 +247,14 @@ module Google
214
247
 
215
248
  def validate_insert_args rows, insert_ids
216
249
  rows = [rows] if rows.is_a? Hash
250
+ raise ArgumentError, "No rows provided" if rows.empty?
251
+
252
+ insert_ids = Array.new(rows.count) { :skip } if insert_ids == :skip
217
253
  insert_ids = Array insert_ids
218
- if insert_ids.count > 0 && insert_ids.count != rows.count
254
+ if insert_ids.count.positive? && insert_ids.count != rows.count
219
255
  raise ArgumentError, "insert_ids must be the same size as rows"
220
256
  end
257
+
221
258
  [rows, insert_ids]
222
259
  end
223
260
 
@@ -250,22 +287,19 @@ module Google
250
287
  json_rows = @batch.json_rows
251
288
  insert_ids = @batch.insert_ids
252
289
  Concurrent::Future.new executor: @thread_pool do
253
- begin
254
- raise ArgumentError, "No rows provided" if json_rows.empty?
255
- options = { skip_invalid: @skip_invalid,
256
- ignore_unknown: @ignore_unknown,
257
- insert_ids: insert_ids }
258
- insert_resp = @table.service.insert_tabledata_json_rows(
259
- @table.dataset_id, @table.table_id, json_rows, options
260
- )
261
- result = Result.new(
262
- InsertResponse.from_gapi(orig_rows, insert_resp)
263
- )
264
- rescue StandardError => e
265
- result = Result.new nil, e
266
- ensure
267
- @callback.call result if @callback
268
- end
290
+ raise ArgumentError, "No rows provided" if json_rows.empty?
291
+ insert_resp = @table.service.insert_tabledata_json_rows @table.dataset_id,
292
+ @table.table_id,
293
+ json_rows,
294
+ skip_invalid: @skip_invalid,
295
+ ignore_unknown: @ignore_unknown,
296
+ insert_ids: insert_ids
297
+
298
+ result = Result.new InsertResponse.from_gapi(orig_rows, insert_resp)
299
+ rescue StandardError => e
300
+ result = Result.new nil, e
301
+ ensure
302
+ @callback&.call result
269
303
  end.execute
270
304
 
271
305
  @batch = nil
@@ -275,9 +309,13 @@ module Google
275
309
  ##
276
310
  # @private
277
311
  class Batch
278
- attr_reader :max_bytes, :max_rows, :rows, :json_rows, :insert_ids
312
+ attr_reader :max_bytes
313
+ attr_reader :max_rows
314
+ attr_reader :rows
315
+ attr_reader :json_rows
316
+ attr_reader :insert_ids
279
317
 
280
- def initialize max_bytes: 10000000, max_rows: 500
318
+ def initialize max_bytes: 10_000_000, max_rows: 500
281
319
  @max_bytes = max_bytes
282
320
  @max_rows = max_rows
283
321
  @rows = []
@@ -293,8 +331,7 @@ module Google
293
331
  insert_id ||= SecureRandom.uuid
294
332
  json_row = to_json_row row
295
333
 
296
- insert_rows_bytes \
297
- row, json_row, insert_id, addl_bytes_for(json_row, insert_id)
334
+ insert_rows_bytes row, json_row, insert_id, addl_bytes_for(json_row, insert_id)
298
335
  end
299
336
 
300
337
  def try_insert row, insert_id
@@ -327,8 +364,13 @@ module Google
327
364
  end
328
365
 
329
366
  def addl_bytes_for json_row, insert_id
330
- # "{\"insertId\":\"\",\"json\":},".bytesize #=> 24
331
- 24 + json_row.to_json.bytesize + insert_id.bytesize
367
+ if insert_id == :skip
368
+ # "{\"json\":},".bytesize #=> 10
369
+ 10 + json_row.to_json.bytesize
370
+ else
371
+ # "{\"insertId\":\"\",\"json\":},".bytesize #=> 24
372
+ 24 + json_row.to_json.bytesize + insert_id.bytesize
373
+ end
332
374
  end
333
375
  end
334
376
 
@@ -380,7 +422,8 @@ module Google
380
422
  @error = error
381
423
  end
382
424
 
383
- attr_reader :insert_response, :error
425
+ attr_reader :insert_response
426
+ attr_reader :error
384
427
 
385
428
  ##
386
429
  # Checks if an error is present, meaning that the insert operation
@@ -78,8 +78,7 @@ module Google
78
78
  def next
79
79
  return nil unless next?
80
80
  ensure_service!
81
- options = { token: token, max: @max }
82
- gapi = @service.list_tables @dataset_id, options
81
+ gapi = @service.list_tables @dataset_id, token: token, max: @max
83
82
  self.class.from_gapi gapi, @service, @dataset_id, @max
84
83
  end
85
84
 
@@ -131,17 +130,15 @@ module Google
131
130
  # puts table.name
132
131
  # end
133
132
  #
134
- def all request_limit: nil
133
+ def all request_limit: nil, &block
135
134
  request_limit = request_limit.to_i if request_limit
136
- unless block_given?
137
- return enum_for :all, request_limit: request_limit
138
- end
135
+ return enum_for :all, request_limit: request_limit unless block_given?
139
136
  results = self
140
137
  loop do
141
- results.each { |r| yield r }
138
+ results.each(&block)
142
139
  if request_limit
143
140
  request_limit -= 1
144
- break if request_limit < 0
141
+ break if request_limit.negative?
145
142
  end
146
143
  break unless results.next?
147
144
  results = results.next
@@ -151,9 +148,7 @@ module Google
151
148
  ##
152
149
  # @private New Table::List from a response object.
153
150
  def self.from_gapi gapi_list, service, dataset_id = nil, max = nil
154
- tables = List.new(Array(gapi_list.tables).map do |gapi_object|
155
- Table.from_gapi gapi_object, service
156
- end)
151
+ tables = List.new(Array(gapi_list.tables).map { |gapi_object| Table.from_gapi gapi_object, service })
157
152
  tables.instance_variable_set :@token, gapi_list.next_page_token
158
153
  tables.instance_variable_set :@etag, gapi_list.etag
159
154
  tables.instance_variable_set :@total, gapi_list.total_items