google-cloud-bigquery 0.28.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/google-cloud-bigquery.rb +2 -2
- data/lib/google/cloud/bigquery.rb +10 -12
- data/lib/google/cloud/bigquery/copy_job.rb +42 -6
- data/lib/google/cloud/bigquery/data.rb +129 -23
- data/lib/google/cloud/bigquery/dataset.rb +708 -66
- data/lib/google/cloud/bigquery/dataset/access.rb +533 -27
- data/lib/google/cloud/bigquery/dataset/list.rb +5 -3
- data/lib/google/cloud/bigquery/external.rb +2353 -0
- data/lib/google/cloud/bigquery/extract_job.rb +52 -11
- data/lib/google/cloud/bigquery/insert_response.rb +90 -2
- data/lib/google/cloud/bigquery/job.rb +160 -21
- data/lib/google/cloud/bigquery/load_job.rb +128 -11
- data/lib/google/cloud/bigquery/project.rb +187 -44
- data/lib/google/cloud/bigquery/query_job.rb +323 -13
- data/lib/google/cloud/bigquery/schema.rb +57 -1
- data/lib/google/cloud/bigquery/schema/field.rb +118 -17
- data/lib/google/cloud/bigquery/service.rb +196 -43
- data/lib/google/cloud/bigquery/table.rb +739 -49
- data/lib/google/cloud/bigquery/table/async_inserter.rb +280 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery/view.rb +306 -69
- metadata +18 -3
- data/lib/google/cloud/bigquery/query_data.rb +0 -234
@@ -34,6 +34,7 @@ module Google
|
|
34
34
|
#
|
35
35
|
# field = table.schema.field "name"
|
36
36
|
# field.required? #=> true
|
37
|
+
#
|
37
38
|
class Field
|
38
39
|
# @private
|
39
40
|
MODES = %w( NULLABLE REQUIRED REPEATED )
|
@@ -45,6 +46,11 @@ module Google
|
|
45
46
|
##
|
46
47
|
# The name of the field.
|
47
48
|
#
|
49
|
+
# @return [String] The field name. The name must contain only
|
50
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
|
51
|
+
# start with a letter or underscore. The maximum length is 128
|
52
|
+
# characters.
|
53
|
+
#
|
48
54
|
def name
|
49
55
|
@gapi.name
|
50
56
|
end
|
@@ -52,19 +58,38 @@ module Google
|
|
52
58
|
##
|
53
59
|
# Updates the name of the field.
|
54
60
|
#
|
61
|
+
# @param [String] new_name The field name. The name must contain only
|
62
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
|
63
|
+
# start with a letter or underscore. The maximum length is 128
|
64
|
+
# characters.
|
65
|
+
#
|
55
66
|
def name= new_name
|
56
67
|
@gapi.update! name: String(new_name)
|
57
68
|
end
|
58
69
|
|
59
70
|
##
|
60
|
-
# The type of the field.
|
71
|
+
# The data type of the field.
|
72
|
+
#
|
73
|
+
# @return [String] The field data type. Possible values include
|
74
|
+
# `STRING`, `BYTES`, `INTEGER`, `INT64` (same as `INTEGER`),
|
75
|
+
# `FLOAT`, `FLOAT64` (same as `FLOAT`), `BOOLEAN`, `BOOL` (same as
|
76
|
+
# `BOOLEAN`), `TIMESTAMP`, `DATE`, `TIME`, `DATETIME`, `RECORD`
|
77
|
+
# (where `RECORD` indicates that the field contains a nested schema)
|
78
|
+
# or `STRUCT` (same as `RECORD`).
|
61
79
|
#
|
62
80
|
def type
|
63
81
|
@gapi.type
|
64
82
|
end
|
65
83
|
|
66
84
|
##
|
67
|
-
# Updates the type of the field.
|
85
|
+
# Updates the data type of the field.
|
86
|
+
#
|
87
|
+
# @param [String] new_type The data type. Possible values include
|
88
|
+
# `STRING`, `BYTES`, `INTEGER`, `INT64` (same as `INTEGER`),
|
89
|
+
# `FLOAT`, `FLOAT64` (same as `FLOAT`), `BOOLEAN`, `BOOL` (same as
|
90
|
+
# `BOOLEAN`), `TIMESTAMP`, `DATE`, `TIME`, `DATETIME`, `RECORD`
|
91
|
+
# (where `RECORD` indicates that the field contains a nested schema)
|
92
|
+
# or `STRUCT` (same as `RECORD`).
|
68
93
|
#
|
69
94
|
def type= new_type
|
70
95
|
@gapi.update! type: verify_type(new_type)
|
@@ -72,18 +97,27 @@ module Google
|
|
72
97
|
|
73
98
|
##
|
74
99
|
# Checks if the type of the field is `NULLABLE`.
|
100
|
+
#
|
101
|
+
# @return [Boolean] `true` when `NULLABLE`, `false` otherwise.
|
102
|
+
#
|
75
103
|
def nullable?
|
76
104
|
mode == "NULLABLE"
|
77
105
|
end
|
78
106
|
|
79
107
|
##
|
80
108
|
# Checks if the type of the field is `REQUIRED`.
|
109
|
+
#
|
110
|
+
# @return [Boolean] `true` when `REQUIRED`, `false` otherwise.
|
111
|
+
#
|
81
112
|
def required?
|
82
113
|
mode == "REQUIRED"
|
83
114
|
end
|
84
115
|
|
85
116
|
##
|
86
117
|
# Checks if the type of the field is `REPEATED`.
|
118
|
+
#
|
119
|
+
# @return [Boolean] `true` when `REPEATED`, `false` otherwise.
|
120
|
+
#
|
87
121
|
def repeated?
|
88
122
|
mode == "REPEATED"
|
89
123
|
end
|
@@ -91,6 +125,9 @@ module Google
|
|
91
125
|
##
|
92
126
|
# The description of the field.
|
93
127
|
#
|
128
|
+
# @return [String] The field description. The maximum length is 1,024
|
129
|
+
# characters.
|
130
|
+
#
|
94
131
|
def description
|
95
132
|
@gapi.description
|
96
133
|
end
|
@@ -98,6 +135,9 @@ module Google
|
|
98
135
|
##
|
99
136
|
# Updates the description of the field.
|
100
137
|
#
|
138
|
+
# @param [String] new_description The field description. The maximum
|
139
|
+
# length is 1,024 characters.
|
140
|
+
#
|
101
141
|
def description= new_description
|
102
142
|
@gapi.update! description: new_description
|
103
143
|
end
|
@@ -105,6 +145,9 @@ module Google
|
|
105
145
|
##
|
106
146
|
# The mode of the field.
|
107
147
|
#
|
148
|
+
# @return [String] The field mode. Possible values include `NULLABLE`,
|
149
|
+
# `REQUIRED` and `REPEATED`. The default value is `NULLABLE`.
|
150
|
+
#
|
108
151
|
def mode
|
109
152
|
@gapi.mode
|
110
153
|
end
|
@@ -112,66 +155,100 @@ module Google
|
|
112
155
|
##
|
113
156
|
# Updates the mode of the field.
|
114
157
|
#
|
158
|
+
# @param [String] new_mode The field mode. Possible values include
|
159
|
+
# `NULLABLE`, `REQUIRED` and `REPEATED`. The default value is
|
160
|
+
# `NULLABLE`.
|
161
|
+
#
|
115
162
|
def mode= new_mode
|
116
163
|
@gapi.update! mode: verify_mode(new_mode)
|
117
164
|
end
|
118
165
|
|
119
166
|
##
|
120
167
|
# Checks if the mode of the field is `STRING`.
|
168
|
+
#
|
169
|
+
# @return [Boolean] `true` when `STRING`, `false` otherwise.
|
170
|
+
#
|
121
171
|
def string?
|
122
172
|
mode == "STRING"
|
123
173
|
end
|
124
174
|
|
125
175
|
##
|
126
176
|
# Checks if the mode of the field is `INTEGER`.
|
177
|
+
#
|
178
|
+
# @return [Boolean] `true` when `INTEGER`, `false` otherwise.
|
179
|
+
#
|
127
180
|
def integer?
|
128
181
|
mode == "INTEGER"
|
129
182
|
end
|
130
183
|
|
131
184
|
##
|
132
185
|
# Checks if the mode of the field is `FLOAT`.
|
186
|
+
#
|
187
|
+
# @return [Boolean] `true` when `FLOAT`, `false` otherwise.
|
188
|
+
#
|
133
189
|
def float?
|
134
190
|
mode == "FLOAT"
|
135
191
|
end
|
136
192
|
|
137
193
|
##
|
138
194
|
# Checks if the mode of the field is `BOOLEAN`.
|
195
|
+
#
|
196
|
+
# @return [Boolean] `true` when `BOOLEAN`, `false` otherwise.
|
197
|
+
#
|
139
198
|
def boolean?
|
140
199
|
mode == "BOOLEAN"
|
141
200
|
end
|
142
201
|
|
143
202
|
##
|
144
203
|
# Checks if the mode of the field is `BYTES`.
|
204
|
+
#
|
205
|
+
# @return [Boolean] `true` when `BYTES`, `false` otherwise.
|
206
|
+
#
|
145
207
|
def bytes?
|
146
208
|
mode == "BYTES"
|
147
209
|
end
|
148
210
|
|
149
211
|
##
|
150
212
|
# Checks if the mode of the field is `TIMESTAMP`.
|
213
|
+
#
|
214
|
+
# @return [Boolean] `true` when `TIMESTAMP`, `false` otherwise.
|
215
|
+
#
|
151
216
|
def timestamp?
|
152
217
|
mode == "TIMESTAMP"
|
153
218
|
end
|
154
219
|
|
155
220
|
##
|
156
221
|
# Checks if the mode of the field is `TIME`.
|
222
|
+
#
|
223
|
+
# @return [Boolean] `true` when `TIME`, `false` otherwise.
|
224
|
+
#
|
157
225
|
def time?
|
158
226
|
mode == "TIME"
|
159
227
|
end
|
160
228
|
|
161
229
|
##
|
162
230
|
# Checks if the mode of the field is `DATETIME`.
|
231
|
+
#
|
232
|
+
# @return [Boolean] `true` when `DATETIME`, `false` otherwise.
|
233
|
+
#
|
163
234
|
def datetime?
|
164
235
|
mode == "DATETIME"
|
165
236
|
end
|
166
237
|
|
167
238
|
##
|
168
239
|
# Checks if the mode of the field is `DATE`.
|
240
|
+
#
|
241
|
+
# @return [Boolean] `true` when `DATE`, `false` otherwise.
|
242
|
+
#
|
169
243
|
def date?
|
170
244
|
mode == "DATE"
|
171
245
|
end
|
172
246
|
|
173
247
|
##
|
174
248
|
# Checks if the mode of the field is `RECORD`.
|
249
|
+
#
|
250
|
+
# @return [Boolean] `true` when `RECORD`, `false` otherwise.
|
251
|
+
#
|
175
252
|
def record?
|
176
253
|
mode == "RECORD"
|
177
254
|
end
|
@@ -179,6 +256,10 @@ module Google
|
|
179
256
|
##
|
180
257
|
# The nested fields if the type property is set to `RECORD`. Will be
|
181
258
|
# empty otherwise.
|
259
|
+
#
|
260
|
+
# @return [Array<Field>, nil] The nested schema fields if the type
|
261
|
+
# is set to `RECORD`.
|
262
|
+
#
|
182
263
|
def fields
|
183
264
|
if frozen?
|
184
265
|
Array(@gapi.fields).map { |f| Field.from_gapi(f).freeze }.freeze
|
@@ -190,13 +271,20 @@ module Google
|
|
190
271
|
##
|
191
272
|
# The names of the nested fields as symbols if the type property is
|
192
273
|
# set to `RECORD`. Will be empty otherwise.
|
274
|
+
#
|
275
|
+
# @return [Array<Symbol>, nil] The names of the nested schema fields
|
276
|
+
# if the type is set to `RECORD`.
|
277
|
+
#
|
193
278
|
def headers
|
194
279
|
fields.map(&:name).map(&:to_sym)
|
195
280
|
end
|
196
281
|
|
197
282
|
##
|
198
|
-
#
|
283
|
+
# Retrieve a nested field by name, if the type property is
|
199
284
|
# set to `RECORD`. Will return `nil` otherwise.
|
285
|
+
#
|
286
|
+
# @return [Field, nil] The nested schema field object, or `nil`.
|
287
|
+
#
|
200
288
|
def field name
|
201
289
|
f = fields.find { |fld| fld.name == name.to_s }
|
202
290
|
return nil if f.nil?
|
@@ -205,7 +293,7 @@ module Google
|
|
205
293
|
end
|
206
294
|
|
207
295
|
##
|
208
|
-
# Adds a string field to the schema.
|
296
|
+
# Adds a string field to the nested schema of a record field.
|
209
297
|
#
|
210
298
|
# This can only be called on fields that are of type `RECORD`.
|
211
299
|
#
|
@@ -217,6 +305,7 @@ module Google
|
|
217
305
|
# @param [Symbol] mode The field's mode. The possible values are
|
218
306
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
219
307
|
# `:nullable`.
|
308
|
+
#
|
220
309
|
def string name, description: nil, mode: :nullable
|
221
310
|
record_check!
|
222
311
|
|
@@ -224,7 +313,7 @@ module Google
|
|
224
313
|
end
|
225
314
|
|
226
315
|
##
|
227
|
-
# Adds an integer field to the schema.
|
316
|
+
# Adds an integer field to the nested schema of a record field.
|
228
317
|
#
|
229
318
|
# This can only be called on fields that are of type `RECORD`.
|
230
319
|
#
|
@@ -236,6 +325,7 @@ module Google
|
|
236
325
|
# @param [Symbol] mode The field's mode. The possible values are
|
237
326
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
238
327
|
# `:nullable`.
|
328
|
+
#
|
239
329
|
def integer name, description: nil, mode: :nullable
|
240
330
|
record_check!
|
241
331
|
|
@@ -243,7 +333,8 @@ module Google
|
|
243
333
|
end
|
244
334
|
|
245
335
|
##
|
246
|
-
# Adds a floating-point number field to the schema
|
336
|
+
# Adds a floating-point number field to the nested schema of a record
|
337
|
+
# field.
|
247
338
|
#
|
248
339
|
# This can only be called on fields that are of type `RECORD`.
|
249
340
|
#
|
@@ -255,6 +346,7 @@ module Google
|
|
255
346
|
# @param [Symbol] mode The field's mode. The possible values are
|
256
347
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
257
348
|
# `:nullable`.
|
349
|
+
#
|
258
350
|
def float name, description: nil, mode: :nullable
|
259
351
|
record_check!
|
260
352
|
|
@@ -262,7 +354,7 @@ module Google
|
|
262
354
|
end
|
263
355
|
|
264
356
|
##
|
265
|
-
# Adds a boolean field to the schema.
|
357
|
+
# Adds a boolean field to the nested schema of a record field.
|
266
358
|
#
|
267
359
|
# This can only be called on fields that are of type `RECORD`.
|
268
360
|
#
|
@@ -274,6 +366,7 @@ module Google
|
|
274
366
|
# @param [Symbol] mode The field's mode. The possible values are
|
275
367
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
276
368
|
# `:nullable`.
|
369
|
+
#
|
277
370
|
def boolean name, description: nil, mode: :nullable
|
278
371
|
record_check!
|
279
372
|
|
@@ -281,7 +374,7 @@ module Google
|
|
281
374
|
end
|
282
375
|
|
283
376
|
##
|
284
|
-
# Adds a bytes field to the schema.
|
377
|
+
# Adds a bytes field to the nested schema of a record field.
|
285
378
|
#
|
286
379
|
# This can only be called on fields that are of type `RECORD`.
|
287
380
|
#
|
@@ -293,6 +386,7 @@ module Google
|
|
293
386
|
# @param [Symbol] mode The field's mode. The possible values are
|
294
387
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
295
388
|
# `:nullable`.
|
389
|
+
#
|
296
390
|
def bytes name, description: nil, mode: :nullable
|
297
391
|
record_check!
|
298
392
|
|
@@ -300,7 +394,7 @@ module Google
|
|
300
394
|
end
|
301
395
|
|
302
396
|
##
|
303
|
-
# Adds a timestamp field to the schema.
|
397
|
+
# Adds a timestamp field to the nested schema of a record field.
|
304
398
|
#
|
305
399
|
# This can only be called on fields that are of type `RECORD`.
|
306
400
|
#
|
@@ -312,6 +406,7 @@ module Google
|
|
312
406
|
# @param [Symbol] mode The field's mode. The possible values are
|
313
407
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
314
408
|
# `:nullable`.
|
409
|
+
#
|
315
410
|
def timestamp name, description: nil, mode: :nullable
|
316
411
|
record_check!
|
317
412
|
|
@@ -319,7 +414,7 @@ module Google
|
|
319
414
|
end
|
320
415
|
|
321
416
|
##
|
322
|
-
# Adds a time field to the schema.
|
417
|
+
# Adds a time field to the nested schema of a record field.
|
323
418
|
#
|
324
419
|
# This can only be called on fields that are of type `RECORD`.
|
325
420
|
#
|
@@ -331,6 +426,7 @@ module Google
|
|
331
426
|
# @param [Symbol] mode The field's mode. The possible values are
|
332
427
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
333
428
|
# `:nullable`.
|
429
|
+
#
|
334
430
|
def time name, description: nil, mode: :nullable
|
335
431
|
record_check!
|
336
432
|
|
@@ -338,7 +434,7 @@ module Google
|
|
338
434
|
end
|
339
435
|
|
340
436
|
##
|
341
|
-
# Adds a datetime field to the schema.
|
437
|
+
# Adds a datetime field to the nested schema of a record field.
|
342
438
|
#
|
343
439
|
# This can only be called on fields that are of type `RECORD`.
|
344
440
|
#
|
@@ -350,6 +446,7 @@ module Google
|
|
350
446
|
# @param [Symbol] mode The field's mode. The possible values are
|
351
447
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
352
448
|
# `:nullable`.
|
449
|
+
#
|
353
450
|
def datetime name, description: nil, mode: :nullable
|
354
451
|
record_check!
|
355
452
|
|
@@ -357,7 +454,7 @@ module Google
|
|
357
454
|
end
|
358
455
|
|
359
456
|
##
|
360
|
-
# Adds a date field to the schema.
|
457
|
+
# Adds a date field to the nested schema of a record field.
|
361
458
|
#
|
362
459
|
# This can only be called on fields that are of type `RECORD`.
|
363
460
|
#
|
@@ -369,6 +466,7 @@ module Google
|
|
369
466
|
# @param [Symbol] mode The field's mode. The possible values are
|
370
467
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
371
468
|
# `:nullable`.
|
469
|
+
#
|
372
470
|
def date name, description: nil, mode: :nullable
|
373
471
|
record_check!
|
374
472
|
|
@@ -376,10 +474,10 @@ module Google
|
|
376
474
|
end
|
377
475
|
|
378
476
|
##
|
379
|
-
# Adds a record field to the schema
|
380
|
-
# the nested fields of the record. For more
|
381
|
-
# and repeated records, see [Preparing Data
|
382
|
-
# ](https://cloud.google.com/bigquery/preparing-data-for-bigquery).
|
477
|
+
# Adds a record field to the nested schema of a record field. A block
|
478
|
+
# must be passed describing the nested fields of the record. For more
|
479
|
+
# information about nested and repeated records, see [Preparing Data
|
480
|
+
# for BigQuery](https://cloud.google.com/bigquery/preparing-data-for-bigquery).
|
383
481
|
#
|
384
482
|
# This can only be called on fields that are of type `RECORD`.
|
385
483
|
#
|
@@ -405,7 +503,10 @@ module Google
|
|
405
503
|
# table.schema do |schema|
|
406
504
|
# schema.string "first_name", mode: :required
|
407
505
|
# schema.record "cities_lived", mode: :repeated do |cities_lived|
|
408
|
-
# cities_lived.
|
506
|
+
# cities_lived.record "city", mode: :required do |city|
|
507
|
+
# city.string "name", mode: :required
|
508
|
+
# city.string "country", mode: :required
|
509
|
+
# end
|
409
510
|
# cities_lived.integer "number_of_years", mode: :required
|
410
511
|
# end
|
411
512
|
# end
|
@@ -18,7 +18,7 @@ require "google/cloud/bigquery/convert"
|
|
18
18
|
require "google/cloud/errors"
|
19
19
|
require "google/apis/bigquery_v2"
|
20
20
|
require "pathname"
|
21
|
-
require "
|
21
|
+
require "securerandom"
|
22
22
|
require "mime/types"
|
23
23
|
require "date"
|
24
24
|
|
@@ -61,7 +61,7 @@ module Google
|
|
61
61
|
service.client_options.open_timeout_sec = timeout
|
62
62
|
service.client_options.read_timeout_sec = timeout
|
63
63
|
service.client_options.send_timeout_sec = timeout
|
64
|
-
service.request_options.retries =
|
64
|
+
service.request_options.retries = 0 # handle retries in #execute
|
65
65
|
service.request_options.header ||= {}
|
66
66
|
service.request_options.header["x-goog-api-client"] = \
|
67
67
|
"gl-ruby/#{RUBY_VERSION} gccl/#{Google::Cloud::Bigquery::VERSION}"
|
@@ -75,17 +75,19 @@ module Google
|
|
75
75
|
# Lists all datasets in the specified project to which you have
|
76
76
|
# been granted the READER dataset role.
|
77
77
|
def list_datasets options = {}
|
78
|
-
|
78
|
+
# The list operation is considered idempotent
|
79
|
+
execute backoff: true do
|
79
80
|
service.list_datasets \
|
80
|
-
@project, all: options[:all],
|
81
|
-
page_token: options[:token]
|
81
|
+
@project, all: options[:all], filter: options[:filter],
|
82
|
+
max_results: options[:max], page_token: options[:token]
|
82
83
|
end
|
83
84
|
end
|
84
85
|
|
85
86
|
##
|
86
87
|
# Returns the dataset specified by datasetID.
|
87
88
|
def get_dataset dataset_id
|
88
|
-
|
89
|
+
# The get operation is considered idempotent
|
90
|
+
execute(backoff: true) { service.get_dataset @project, dataset_id }
|
89
91
|
end
|
90
92
|
|
91
93
|
##
|
@@ -98,8 +100,16 @@ module Google
|
|
98
100
|
# Updates information in an existing dataset, only replacing
|
99
101
|
# fields that are provided in the submitted dataset resource.
|
100
102
|
def patch_dataset dataset_id, patched_dataset_gapi
|
101
|
-
|
102
|
-
|
103
|
+
patch_with_backoff = false
|
104
|
+
options = {}
|
105
|
+
if patched_dataset_gapi.etag
|
106
|
+
options[:header] = { "If-Match" => patched_dataset_gapi.etag }
|
107
|
+
# The patch with etag operation is considered idempotent
|
108
|
+
patch_with_backoff = true
|
109
|
+
end
|
110
|
+
execute backoff: patch_with_backoff do
|
111
|
+
service.patch_dataset @project, dataset_id, patched_dataset_gapi,
|
112
|
+
options: options
|
103
113
|
end
|
104
114
|
end
|
105
115
|
|
@@ -119,7 +129,8 @@ module Google
|
|
119
129
|
# Lists all tables in the specified dataset.
|
120
130
|
# Requires the READER dataset role.
|
121
131
|
def list_tables dataset_id, options = {}
|
122
|
-
|
132
|
+
# The list operation is considered idempotent
|
133
|
+
execute backoff: true do
|
123
134
|
service.list_tables @project, dataset_id,
|
124
135
|
max_results: options[:max],
|
125
136
|
page_token: options[:token]
|
@@ -127,7 +138,10 @@ module Google
|
|
127
138
|
end
|
128
139
|
|
129
140
|
def get_project_table project_id, dataset_id, table_id
|
130
|
-
|
141
|
+
# The get operation is considered idempotent
|
142
|
+
execute backoff: true do
|
143
|
+
service.get_table project_id, dataset_id, table_id
|
144
|
+
end
|
131
145
|
end
|
132
146
|
|
133
147
|
##
|
@@ -136,7 +150,10 @@ module Google
|
|
136
150
|
# it only returns the table resource,
|
137
151
|
# which describes the structure of this table.
|
138
152
|
def get_table dataset_id, table_id
|
139
|
-
|
153
|
+
# The get operation is considered idempotent
|
154
|
+
execute backoff: true do
|
155
|
+
get_project_table @project, dataset_id, table_id
|
156
|
+
end
|
140
157
|
end
|
141
158
|
|
142
159
|
##
|
@@ -149,9 +166,16 @@ module Google
|
|
149
166
|
# Updates information in an existing table, replacing fields that
|
150
167
|
# are provided in the submitted table resource.
|
151
168
|
def patch_table dataset_id, table_id, patched_table_gapi
|
152
|
-
|
169
|
+
patch_with_backoff = false
|
170
|
+
options = {}
|
171
|
+
if patched_table_gapi.etag
|
172
|
+
options[:header] = { "If-Match" => patched_table_gapi.etag }
|
173
|
+
# The patch with etag operation is considered idempotent
|
174
|
+
patch_with_backoff = true
|
175
|
+
end
|
176
|
+
execute backoff: patch_with_backoff do
|
153
177
|
service.patch_table @project, dataset_id, table_id,
|
154
|
-
patched_table_gapi
|
178
|
+
patched_table_gapi, options: options
|
155
179
|
end
|
156
180
|
end
|
157
181
|
|
@@ -165,7 +189,8 @@ module Google
|
|
165
189
|
##
|
166
190
|
# Retrieves data from the table.
|
167
191
|
def list_tabledata dataset_id, table_id, options = {}
|
168
|
-
|
192
|
+
# The list operation is considered idempotent
|
193
|
+
execute backoff: true do
|
169
194
|
service.list_table_data @project, dataset_id, table_id,
|
170
195
|
max_results: options.delete(:max),
|
171
196
|
page_token: options.delete(:token),
|
@@ -176,8 +201,8 @@ module Google
|
|
176
201
|
def insert_tabledata dataset_id, table_id, rows, options = {}
|
177
202
|
insert_rows = Array(rows).map do |row|
|
178
203
|
Google::Apis::BigqueryV2::InsertAllTableDataRequest::Row.new(
|
179
|
-
insert_id:
|
180
|
-
json: row
|
204
|
+
insert_id: SecureRandom.uuid,
|
205
|
+
json: Convert.to_json_row(row)
|
181
206
|
)
|
182
207
|
end
|
183
208
|
insert_req = Google::Apis::BigqueryV2::InsertAllTableDataRequest.new(
|
@@ -186,7 +211,8 @@ module Google
|
|
186
211
|
skip_invalid_rows: options[:skip_invalid]
|
187
212
|
)
|
188
213
|
|
189
|
-
|
214
|
+
# The insertAll with insertId operation is considered idempotent
|
215
|
+
execute backoff: true do
|
190
216
|
service.insert_all_table_data(
|
191
217
|
@project, dataset_id, table_id, insert_req)
|
192
218
|
end
|
@@ -196,7 +222,8 @@ module Google
|
|
196
222
|
# Lists all jobs in the specified project to which you have
|
197
223
|
# been granted the READER job role.
|
198
224
|
def list_jobs options = {}
|
199
|
-
|
225
|
+
# The list operation is considered idempotent
|
226
|
+
execute backoff: true do
|
200
227
|
service.list_jobs \
|
201
228
|
@project, all_users: options[:all], max_results: options[:max],
|
202
229
|
page_token: options[:token], projection: "full",
|
@@ -207,35 +234,37 @@ module Google
|
|
207
234
|
##
|
208
235
|
# Cancel the job specified by jobId.
|
209
236
|
def cancel_job job_id
|
210
|
-
|
237
|
+
# The BigQuery team has told us cancelling is considered idempotent
|
238
|
+
execute(backoff: true) { service.cancel_job @project, job_id }
|
211
239
|
end
|
212
240
|
|
213
241
|
##
|
214
242
|
# Returns the job specified by jobID.
|
215
243
|
def get_job job_id
|
216
|
-
|
244
|
+
# The get operation is considered idempotent
|
245
|
+
execute(backoff: true) { service.get_job @project, job_id }
|
217
246
|
end
|
218
247
|
|
219
248
|
def insert_job config
|
220
249
|
job_object = API::Job.new(
|
250
|
+
job_reference: job_ref_from(nil, nil),
|
221
251
|
configuration: config
|
222
252
|
)
|
223
|
-
|
253
|
+
# Jobs have generated id, so this operation is considered idempotent
|
254
|
+
execute(backoff: true) { service.insert_job @project, job_object }
|
224
255
|
end
|
225
256
|
|
226
257
|
def query_job query, options = {}
|
227
258
|
config = query_table_config(query, options)
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
def query query, options = {}
|
232
|
-
execute { service.query_job @project, query_config(query, options) }
|
259
|
+
# Jobs have generated id, so this operation is considered idempotent
|
260
|
+
execute(backoff: true) { service.insert_job @project, config }
|
233
261
|
end
|
234
262
|
|
235
263
|
##
|
236
264
|
# Returns the query data for the job
|
237
265
|
def job_query_results job_id, options = {}
|
238
|
-
|
266
|
+
# The get operation is considered idempotent
|
267
|
+
execute backoff: true do
|
239
268
|
service.get_job_query_results @project,
|
240
269
|
job_id,
|
241
270
|
max_results: options.delete(:max),
|
@@ -246,21 +275,24 @@ module Google
|
|
246
275
|
end
|
247
276
|
|
248
277
|
def copy_table source, target, options = {}
|
249
|
-
|
278
|
+
# Jobs have generated id, so this operation is considered idempotent
|
279
|
+
execute backoff: true do
|
250
280
|
service.insert_job @project, copy_table_config(
|
251
281
|
source, target, options)
|
252
282
|
end
|
253
283
|
end
|
254
284
|
|
255
285
|
def extract_table table, storage_files, options = {}
|
256
|
-
|
286
|
+
# Jobs have generated id, so this operation is considered idempotent
|
287
|
+
execute backoff: true do
|
257
288
|
service.insert_job \
|
258
289
|
@project, extract_table_config(table, storage_files, options)
|
259
290
|
end
|
260
291
|
end
|
261
292
|
|
262
293
|
def load_table_gs_url dataset_id, table_id, url, options = {}
|
263
|
-
|
294
|
+
# Jobs have generated id, so this operation is considered idempotent
|
295
|
+
execute backoff: true do
|
264
296
|
service.insert_job \
|
265
297
|
@project, load_table_url_config(dataset_id, table_id,
|
266
298
|
url, options)
|
@@ -268,7 +300,8 @@ module Google
|
|
268
300
|
end
|
269
301
|
|
270
302
|
def load_table_file dataset_id, table_id, file, options = {}
|
271
|
-
|
303
|
+
# Jobs have generated id, so this operation is considered idempotent
|
304
|
+
execute backoff: true do
|
272
305
|
service.insert_job \
|
273
306
|
@project, load_table_file_config(
|
274
307
|
dataset_id, table_id, file, options),
|
@@ -299,7 +332,7 @@ module Google
|
|
299
332
|
##
|
300
333
|
# Lists all projects to which you have been granted any project role.
|
301
334
|
def list_projects options = {}
|
302
|
-
execute do
|
335
|
+
execute backoff: true do
|
303
336
|
service.list_projects max_results: options[:max],
|
304
337
|
page_token: options[:token]
|
305
338
|
end
|
@@ -335,6 +368,23 @@ module Google
|
|
335
368
|
end
|
336
369
|
end
|
337
370
|
|
371
|
+
# Generate a random string similar to the BigQuery service job IDs.
|
372
|
+
def generate_id
|
373
|
+
SecureRandom.urlsafe_base64(21)
|
374
|
+
end
|
375
|
+
|
376
|
+
# If no job_id or prefix is given, always generate a client-side job ID
|
377
|
+
# anyway, for idempotent retry in the google-api-client layer.
|
378
|
+
# See https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid
|
379
|
+
def job_ref_from job_id, prefix
|
380
|
+
prefix ||= "job_"
|
381
|
+
job_id ||= "#{prefix}#{generate_id}"
|
382
|
+
API::JobReference.new(
|
383
|
+
project_id: @project,
|
384
|
+
job_id: job_id
|
385
|
+
)
|
386
|
+
end
|
387
|
+
|
338
388
|
def load_table_file_opts dataset_id, table_id, file, options = {}
|
339
389
|
path = Pathname(file).to_path
|
340
390
|
{
|
@@ -346,21 +396,26 @@ module Google
|
|
346
396
|
projection_fields: projection_fields(options[:projection_fields]),
|
347
397
|
allow_jagged_rows: options[:jagged_rows],
|
348
398
|
allow_quoted_newlines: options[:quoted_newlines],
|
399
|
+
autodetect: options[:autodetect],
|
349
400
|
encoding: options[:encoding], field_delimiter: options[:delimiter],
|
350
401
|
ignore_unknown_values: options[:ignore_unknown],
|
351
|
-
max_bad_records: options[:max_bad_records],
|
402
|
+
max_bad_records: options[:max_bad_records],
|
403
|
+
null_marker: options[:null_marker], quote: options[:quote],
|
352
404
|
schema: options[:schema], skip_leading_rows: options[:skip_leading]
|
353
405
|
}.delete_if { |_, v| v.nil? }
|
354
406
|
end
|
355
407
|
|
356
408
|
def load_table_file_config dataset_id, table_id, file, options = {}
|
357
409
|
load_opts = load_table_file_opts dataset_id, table_id, file, options
|
358
|
-
API::Job.new(
|
410
|
+
req = API::Job.new(
|
411
|
+
job_reference: job_ref_from(options[:job_id], options[:prefix]),
|
359
412
|
configuration: API::JobConfiguration.new(
|
360
413
|
load: API::JobConfigurationLoad.new(load_opts),
|
361
414
|
dry_run: options[:dryrun]
|
362
415
|
)
|
363
416
|
)
|
417
|
+
req.configuration.labels = options[:labels] if options[:labels]
|
418
|
+
req
|
364
419
|
end
|
365
420
|
|
366
421
|
def load_table_url_opts dataset_id, table_id, url, options = {}
|
@@ -374,21 +429,26 @@ module Google
|
|
374
429
|
projection_fields: projection_fields(options[:projection_fields]),
|
375
430
|
allow_jagged_rows: options[:jagged_rows],
|
376
431
|
allow_quoted_newlines: options[:quoted_newlines],
|
432
|
+
autodetect: options[:autodetect],
|
377
433
|
encoding: options[:encoding], field_delimiter: options[:delimiter],
|
378
434
|
ignore_unknown_values: options[:ignore_unknown],
|
379
|
-
max_bad_records: options[:max_bad_records],
|
435
|
+
max_bad_records: options[:max_bad_records],
|
436
|
+
null_marker: options[:null_marker], quote: options[:quote],
|
380
437
|
schema: options[:schema], skip_leading_rows: options[:skip_leading]
|
381
438
|
}.delete_if { |_, v| v.nil? }
|
382
439
|
end
|
383
440
|
|
384
441
|
def load_table_url_config dataset_id, table_id, url, options = {}
|
385
442
|
load_opts = load_table_url_opts dataset_id, table_id, url, options
|
386
|
-
API::Job.new(
|
443
|
+
req = API::Job.new(
|
444
|
+
job_reference: job_ref_from(options[:job_id], options[:prefix]),
|
387
445
|
configuration: API::JobConfiguration.new(
|
388
446
|
load: API::JobConfigurationLoad.new(load_opts),
|
389
447
|
dry_run: options[:dryrun]
|
390
448
|
)
|
391
449
|
)
|
450
|
+
req.configuration.labels = options[:labels] if options[:labels]
|
451
|
+
req
|
392
452
|
end
|
393
453
|
|
394
454
|
# rubocop:disable all
|
@@ -397,8 +457,9 @@ module Google
|
|
397
457
|
# Job description for query job
|
398
458
|
def query_table_config query, options
|
399
459
|
dest_table = table_ref_from options[:table]
|
400
|
-
|
460
|
+
dataset_config = dataset_ref_from options[:dataset], options[:project]
|
401
461
|
req = API::Job.new(
|
462
|
+
job_reference: job_ref_from(options[:job_id], options[:prefix]),
|
402
463
|
configuration: API::JobConfiguration.new(
|
403
464
|
query: API::JobConfigurationQuery.new(
|
404
465
|
query: query,
|
@@ -410,14 +471,16 @@ module Google
|
|
410
471
|
write_disposition: write_disposition(options[:write]),
|
411
472
|
allow_large_results: options[:large_results],
|
412
473
|
flatten_results: options[:flatten],
|
413
|
-
default_dataset:
|
474
|
+
default_dataset: dataset_config,
|
414
475
|
use_legacy_sql: Convert.resolve_legacy_sql(
|
415
476
|
options[:standard_sql], options[:legacy_sql]),
|
416
477
|
maximum_billing_tier: options[:maximum_billing_tier],
|
417
|
-
maximum_bytes_billed: options[:maximum_bytes_billed]
|
478
|
+
maximum_bytes_billed: options[:maximum_bytes_billed],
|
479
|
+
user_defined_function_resources: udfs(options[:udfs])
|
418
480
|
)
|
419
481
|
)
|
420
482
|
)
|
483
|
+
req.configuration.labels = options[:labels] if options[:labels]
|
421
484
|
|
422
485
|
if options[:params]
|
423
486
|
if Array === options[:params]
|
@@ -439,6 +502,14 @@ module Google
|
|
439
502
|
end
|
440
503
|
end
|
441
504
|
|
505
|
+
if options[:external]
|
506
|
+
external_table_pairs = options[:external].map do |name, obj|
|
507
|
+
[String(name), obj.to_gapi]
|
508
|
+
end
|
509
|
+
external_table_hash = Hash[external_table_pairs]
|
510
|
+
req.configuration.query.table_definitions = external_table_hash
|
511
|
+
end
|
512
|
+
|
442
513
|
req
|
443
514
|
end
|
444
515
|
|
@@ -484,7 +555,8 @@ module Google
|
|
484
555
|
##
|
485
556
|
# Job description for copy job
|
486
557
|
def copy_table_config source, target, options = {}
|
487
|
-
API::Job.new(
|
558
|
+
req = API::Job.new(
|
559
|
+
job_reference: job_ref_from(options[:job_id], options[:prefix]),
|
488
560
|
configuration: API::JobConfiguration.new(
|
489
561
|
copy: API::JobConfigurationTableCopy.new(
|
490
562
|
source_table: source,
|
@@ -495,6 +567,8 @@ module Google
|
|
495
567
|
dry_run: options[:dryrun]
|
496
568
|
)
|
497
569
|
)
|
570
|
+
req.configuration.labels = options[:labels] if options[:labels]
|
571
|
+
req
|
498
572
|
end
|
499
573
|
|
500
574
|
def extract_table_config table, storage_files, options = {}
|
@@ -502,7 +576,8 @@ module Google
|
|
502
576
|
url.respond_to?(:to_gs_url) ? url.to_gs_url : url
|
503
577
|
end
|
504
578
|
dest_format = source_format storage_urls.first, options[:format]
|
505
|
-
API::Job.new(
|
579
|
+
req = API::Job.new(
|
580
|
+
job_reference: job_ref_from(options[:job_id], options[:prefix]),
|
506
581
|
configuration: API::JobConfiguration.new(
|
507
582
|
extract: API::JobConfigurationExtract.new(
|
508
583
|
destination_uris: Array(storage_urls),
|
@@ -515,6 +590,8 @@ module Google
|
|
515
590
|
dry_run: options[:dryrun]
|
516
591
|
)
|
517
592
|
)
|
593
|
+
req.configuration.labels = options[:labels] if options[:labels]
|
594
|
+
req
|
518
595
|
end
|
519
596
|
|
520
597
|
def create_disposition str
|
@@ -550,6 +627,7 @@ module Google
|
|
550
627
|
"newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
|
551
628
|
"avro" => "AVRO",
|
552
629
|
"datastore" => "DATASTORE_BACKUP",
|
630
|
+
"backup" => "DATASTORE_BACKUP",
|
553
631
|
"datastore_backup" => "DATASTORE_BACKUP"
|
554
632
|
}[format.to_s.downcase]
|
555
633
|
return val unless val.nil?
|
@@ -573,11 +651,86 @@ module Google
|
|
573
651
|
nil
|
574
652
|
end
|
575
653
|
|
576
|
-
def
|
577
|
-
|
654
|
+
def udfs array_or_str
|
655
|
+
Array(array_or_str).map do |uri_or_code|
|
656
|
+
resource = API::UserDefinedFunctionResource.new
|
657
|
+
if uri_or_code.start_with?("gs://")
|
658
|
+
resource.resource_uri = uri_or_code
|
659
|
+
else
|
660
|
+
resource.inline_code = uri_or_code
|
661
|
+
end
|
662
|
+
resource
|
663
|
+
end
|
664
|
+
end
|
665
|
+
|
666
|
+
def execute backoff: nil
|
667
|
+
if backoff
|
668
|
+
Backoff.new(retries: retries).execute { yield }
|
669
|
+
else
|
670
|
+
yield
|
671
|
+
end
|
578
672
|
rescue Google::Apis::Error => e
|
579
673
|
raise Google::Cloud::Error.from_error(e)
|
580
674
|
end
|
675
|
+
|
676
|
+
class Backoff
|
677
|
+
class << self
|
678
|
+
attr_accessor :retries
|
679
|
+
attr_accessor :reasons
|
680
|
+
attr_accessor :backoff
|
681
|
+
end
|
682
|
+
self.retries = 5
|
683
|
+
self.reasons = %w(rateLimitExceeded backendError)
|
684
|
+
self.backoff = lambda do |retries|
|
685
|
+
# Max delay is 32 seconds
|
686
|
+
# See "Back-off Requirements" here:
|
687
|
+
# https://cloud.google.com/bigquery/sla
|
688
|
+
retries = 5 if retries > 5
|
689
|
+
delay = 2 ** retries
|
690
|
+
sleep delay
|
691
|
+
end
|
692
|
+
|
693
|
+
def initialize options = {}
|
694
|
+
@retries = (options[:retries] || Backoff.retries).to_i
|
695
|
+
@reasons = (options[:reasons] || Backoff.reasons).to_a
|
696
|
+
@backoff = options[:backoff] || Backoff.backoff
|
697
|
+
end
|
698
|
+
|
699
|
+
def execute
|
700
|
+
current_retries = 0
|
701
|
+
loop do
|
702
|
+
begin
|
703
|
+
return yield
|
704
|
+
rescue Google::Apis::Error => e
|
705
|
+
raise e unless retry? e.body, current_retries
|
706
|
+
|
707
|
+
@backoff.call current_retries
|
708
|
+
current_retries += 1
|
709
|
+
end
|
710
|
+
end
|
711
|
+
end
|
712
|
+
|
713
|
+
protected
|
714
|
+
|
715
|
+
def retry? result, current_retries #:nodoc:
|
716
|
+
if current_retries < @retries
|
717
|
+
return true if retry_error_reason? result
|
718
|
+
end
|
719
|
+
false
|
720
|
+
end
|
721
|
+
|
722
|
+
def retry_error_reason? err_body
|
723
|
+
err_hash = JSON.parse err_body
|
724
|
+
json_errors = Array err_hash["error"]["errors"]
|
725
|
+
return false if json_errors.empty?
|
726
|
+
json_errors.each do |json_error|
|
727
|
+
return false unless @reasons.include? json_error["reason"]
|
728
|
+
end
|
729
|
+
true
|
730
|
+
rescue
|
731
|
+
false
|
732
|
+
end
|
733
|
+
end
|
581
734
|
end
|
582
735
|
end
|
583
736
|
end
|