google-cloud-bigquery 0.28.0 → 0.29.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/google-cloud-bigquery.rb +2 -2
- data/lib/google/cloud/bigquery.rb +10 -12
- data/lib/google/cloud/bigquery/copy_job.rb +42 -6
- data/lib/google/cloud/bigquery/data.rb +129 -23
- data/lib/google/cloud/bigquery/dataset.rb +708 -66
- data/lib/google/cloud/bigquery/dataset/access.rb +533 -27
- data/lib/google/cloud/bigquery/dataset/list.rb +5 -3
- data/lib/google/cloud/bigquery/external.rb +2353 -0
- data/lib/google/cloud/bigquery/extract_job.rb +52 -11
- data/lib/google/cloud/bigquery/insert_response.rb +90 -2
- data/lib/google/cloud/bigquery/job.rb +160 -21
- data/lib/google/cloud/bigquery/load_job.rb +128 -11
- data/lib/google/cloud/bigquery/project.rb +187 -44
- data/lib/google/cloud/bigquery/query_job.rb +323 -13
- data/lib/google/cloud/bigquery/schema.rb +57 -1
- data/lib/google/cloud/bigquery/schema/field.rb +118 -17
- data/lib/google/cloud/bigquery/service.rb +196 -43
- data/lib/google/cloud/bigquery/table.rb +739 -49
- data/lib/google/cloud/bigquery/table/async_inserter.rb +280 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery/view.rb +306 -69
- metadata +18 -3
- data/lib/google/cloud/bigquery/query_data.rb +0 -234
@@ -34,6 +34,7 @@ module Google
|
|
34
34
|
#
|
35
35
|
# field = table.schema.field "name"
|
36
36
|
# field.required? #=> true
|
37
|
+
#
|
37
38
|
class Field
|
38
39
|
# @private
|
39
40
|
MODES = %w( NULLABLE REQUIRED REPEATED )
|
@@ -45,6 +46,11 @@ module Google
|
|
45
46
|
##
|
46
47
|
# The name of the field.
|
47
48
|
#
|
49
|
+
# @return [String] The field name. The name must contain only
|
50
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
|
51
|
+
# start with a letter or underscore. The maximum length is 128
|
52
|
+
# characters.
|
53
|
+
#
|
48
54
|
def name
|
49
55
|
@gapi.name
|
50
56
|
end
|
@@ -52,19 +58,38 @@ module Google
|
|
52
58
|
##
|
53
59
|
# Updates the name of the field.
|
54
60
|
#
|
61
|
+
# @param [String] new_name The field name. The name must contain only
|
62
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
|
63
|
+
# start with a letter or underscore. The maximum length is 128
|
64
|
+
# characters.
|
65
|
+
#
|
55
66
|
def name= new_name
|
56
67
|
@gapi.update! name: String(new_name)
|
57
68
|
end
|
58
69
|
|
59
70
|
##
|
60
|
-
# The type of the field.
|
71
|
+
# The data type of the field.
|
72
|
+
#
|
73
|
+
# @return [String] The field data type. Possible values include
|
74
|
+
# `STRING`, `BYTES`, `INTEGER`, `INT64` (same as `INTEGER`),
|
75
|
+
# `FLOAT`, `FLOAT64` (same as `FLOAT`), `BOOLEAN`, `BOOL` (same as
|
76
|
+
# `BOOLEAN`), `TIMESTAMP`, `DATE`, `TIME`, `DATETIME`, `RECORD`
|
77
|
+
# (where `RECORD` indicates that the field contains a nested schema)
|
78
|
+
# or `STRUCT` (same as `RECORD`).
|
61
79
|
#
|
62
80
|
def type
|
63
81
|
@gapi.type
|
64
82
|
end
|
65
83
|
|
66
84
|
##
|
67
|
-
# Updates the type of the field.
|
85
|
+
# Updates the data type of the field.
|
86
|
+
#
|
87
|
+
# @param [String] new_type The data type. Possible values include
|
88
|
+
# `STRING`, `BYTES`, `INTEGER`, `INT64` (same as `INTEGER`),
|
89
|
+
# `FLOAT`, `FLOAT64` (same as `FLOAT`), `BOOLEAN`, `BOOL` (same as
|
90
|
+
# `BOOLEAN`), `TIMESTAMP`, `DATE`, `TIME`, `DATETIME`, `RECORD`
|
91
|
+
# (where `RECORD` indicates that the field contains a nested schema)
|
92
|
+
# or `STRUCT` (same as `RECORD`).
|
68
93
|
#
|
69
94
|
def type= new_type
|
70
95
|
@gapi.update! type: verify_type(new_type)
|
@@ -72,18 +97,27 @@ module Google
|
|
72
97
|
|
73
98
|
##
|
74
99
|
# Checks if the type of the field is `NULLABLE`.
|
100
|
+
#
|
101
|
+
# @return [Boolean] `true` when `NULLABLE`, `false` otherwise.
|
102
|
+
#
|
75
103
|
def nullable?
|
76
104
|
mode == "NULLABLE"
|
77
105
|
end
|
78
106
|
|
79
107
|
##
|
80
108
|
# Checks if the type of the field is `REQUIRED`.
|
109
|
+
#
|
110
|
+
# @return [Boolean] `true` when `REQUIRED`, `false` otherwise.
|
111
|
+
#
|
81
112
|
def required?
|
82
113
|
mode == "REQUIRED"
|
83
114
|
end
|
84
115
|
|
85
116
|
##
|
86
117
|
# Checks if the type of the field is `REPEATED`.
|
118
|
+
#
|
119
|
+
# @return [Boolean] `true` when `REPEATED`, `false` otherwise.
|
120
|
+
#
|
87
121
|
def repeated?
|
88
122
|
mode == "REPEATED"
|
89
123
|
end
|
@@ -91,6 +125,9 @@ module Google
|
|
91
125
|
##
|
92
126
|
# The description of the field.
|
93
127
|
#
|
128
|
+
# @return [String] The field description. The maximum length is 1,024
|
129
|
+
# characters.
|
130
|
+
#
|
94
131
|
def description
|
95
132
|
@gapi.description
|
96
133
|
end
|
@@ -98,6 +135,9 @@ module Google
|
|
98
135
|
##
|
99
136
|
# Updates the description of the field.
|
100
137
|
#
|
138
|
+
# @param [String] new_description The field description. The maximum
|
139
|
+
# length is 1,024 characters.
|
140
|
+
#
|
101
141
|
def description= new_description
|
102
142
|
@gapi.update! description: new_description
|
103
143
|
end
|
@@ -105,6 +145,9 @@ module Google
|
|
105
145
|
##
|
106
146
|
# The mode of the field.
|
107
147
|
#
|
148
|
+
# @return [String] The field mode. Possible values include `NULLABLE`,
|
149
|
+
# `REQUIRED` and `REPEATED`. The default value is `NULLABLE`.
|
150
|
+
#
|
108
151
|
def mode
|
109
152
|
@gapi.mode
|
110
153
|
end
|
@@ -112,66 +155,100 @@ module Google
|
|
112
155
|
##
|
113
156
|
# Updates the mode of the field.
|
114
157
|
#
|
158
|
+
# @param [String] new_mode The field mode. Possible values include
|
159
|
+
# `NULLABLE`, `REQUIRED` and `REPEATED`. The default value is
|
160
|
+
# `NULLABLE`.
|
161
|
+
#
|
115
162
|
def mode= new_mode
|
116
163
|
@gapi.update! mode: verify_mode(new_mode)
|
117
164
|
end
|
118
165
|
|
119
166
|
##
|
120
167
|
# Checks if the mode of the field is `STRING`.
|
168
|
+
#
|
169
|
+
# @return [Boolean] `true` when `STRING`, `false` otherwise.
|
170
|
+
#
|
121
171
|
def string?
|
122
172
|
mode == "STRING"
|
123
173
|
end
|
124
174
|
|
125
175
|
##
|
126
176
|
# Checks if the mode of the field is `INTEGER`.
|
177
|
+
#
|
178
|
+
# @return [Boolean] `true` when `INTEGER`, `false` otherwise.
|
179
|
+
#
|
127
180
|
def integer?
|
128
181
|
mode == "INTEGER"
|
129
182
|
end
|
130
183
|
|
131
184
|
##
|
132
185
|
# Checks if the mode of the field is `FLOAT`.
|
186
|
+
#
|
187
|
+
# @return [Boolean] `true` when `FLOAT`, `false` otherwise.
|
188
|
+
#
|
133
189
|
def float?
|
134
190
|
mode == "FLOAT"
|
135
191
|
end
|
136
192
|
|
137
193
|
##
|
138
194
|
# Checks if the mode of the field is `BOOLEAN`.
|
195
|
+
#
|
196
|
+
# @return [Boolean] `true` when `BOOLEAN`, `false` otherwise.
|
197
|
+
#
|
139
198
|
def boolean?
|
140
199
|
mode == "BOOLEAN"
|
141
200
|
end
|
142
201
|
|
143
202
|
##
|
144
203
|
# Checks if the mode of the field is `BYTES`.
|
204
|
+
#
|
205
|
+
# @return [Boolean] `true` when `BYTES`, `false` otherwise.
|
206
|
+
#
|
145
207
|
def bytes?
|
146
208
|
mode == "BYTES"
|
147
209
|
end
|
148
210
|
|
149
211
|
##
|
150
212
|
# Checks if the mode of the field is `TIMESTAMP`.
|
213
|
+
#
|
214
|
+
# @return [Boolean] `true` when `TIMESTAMP`, `false` otherwise.
|
215
|
+
#
|
151
216
|
def timestamp?
|
152
217
|
mode == "TIMESTAMP"
|
153
218
|
end
|
154
219
|
|
155
220
|
##
|
156
221
|
# Checks if the mode of the field is `TIME`.
|
222
|
+
#
|
223
|
+
# @return [Boolean] `true` when `TIME`, `false` otherwise.
|
224
|
+
#
|
157
225
|
def time?
|
158
226
|
mode == "TIME"
|
159
227
|
end
|
160
228
|
|
161
229
|
##
|
162
230
|
# Checks if the mode of the field is `DATETIME`.
|
231
|
+
#
|
232
|
+
# @return [Boolean] `true` when `DATETIME`, `false` otherwise.
|
233
|
+
#
|
163
234
|
def datetime?
|
164
235
|
mode == "DATETIME"
|
165
236
|
end
|
166
237
|
|
167
238
|
##
|
168
239
|
# Checks if the mode of the field is `DATE`.
|
240
|
+
#
|
241
|
+
# @return [Boolean] `true` when `DATE`, `false` otherwise.
|
242
|
+
#
|
169
243
|
def date?
|
170
244
|
mode == "DATE"
|
171
245
|
end
|
172
246
|
|
173
247
|
##
|
174
248
|
# Checks if the mode of the field is `RECORD`.
|
249
|
+
#
|
250
|
+
# @return [Boolean] `true` when `RECORD`, `false` otherwise.
|
251
|
+
#
|
175
252
|
def record?
|
176
253
|
mode == "RECORD"
|
177
254
|
end
|
@@ -179,6 +256,10 @@ module Google
|
|
179
256
|
##
|
180
257
|
# The nested fields if the type property is set to `RECORD`. Will be
|
181
258
|
# empty otherwise.
|
259
|
+
#
|
260
|
+
# @return [Array<Field>, nil] The nested schema fields if the type
|
261
|
+
# is set to `RECORD`.
|
262
|
+
#
|
182
263
|
def fields
|
183
264
|
if frozen?
|
184
265
|
Array(@gapi.fields).map { |f| Field.from_gapi(f).freeze }.freeze
|
@@ -190,13 +271,20 @@ module Google
|
|
190
271
|
##
|
191
272
|
# The names of the nested fields as symbols if the type property is
|
192
273
|
# set to `RECORD`. Will be empty otherwise.
|
274
|
+
#
|
275
|
+
# @return [Array<Symbol>, nil] The names of the nested schema fields
|
276
|
+
# if the type is set to `RECORD`.
|
277
|
+
#
|
193
278
|
def headers
|
194
279
|
fields.map(&:name).map(&:to_sym)
|
195
280
|
end
|
196
281
|
|
197
282
|
##
|
198
|
-
#
|
283
|
+
# Retrieve a nested field by name, if the type property is
|
199
284
|
# set to `RECORD`. Will return `nil` otherwise.
|
285
|
+
#
|
286
|
+
# @return [Field, nil] The nested schema field object, or `nil`.
|
287
|
+
#
|
200
288
|
def field name
|
201
289
|
f = fields.find { |fld| fld.name == name.to_s }
|
202
290
|
return nil if f.nil?
|
@@ -205,7 +293,7 @@ module Google
|
|
205
293
|
end
|
206
294
|
|
207
295
|
##
|
208
|
-
# Adds a string field to the schema.
|
296
|
+
# Adds a string field to the nested schema of a record field.
|
209
297
|
#
|
210
298
|
# This can only be called on fields that are of type `RECORD`.
|
211
299
|
#
|
@@ -217,6 +305,7 @@ module Google
|
|
217
305
|
# @param [Symbol] mode The field's mode. The possible values are
|
218
306
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
219
307
|
# `:nullable`.
|
308
|
+
#
|
220
309
|
def string name, description: nil, mode: :nullable
|
221
310
|
record_check!
|
222
311
|
|
@@ -224,7 +313,7 @@ module Google
|
|
224
313
|
end
|
225
314
|
|
226
315
|
##
|
227
|
-
# Adds an integer field to the schema.
|
316
|
+
# Adds an integer field to the nested schema of a record field.
|
228
317
|
#
|
229
318
|
# This can only be called on fields that are of type `RECORD`.
|
230
319
|
#
|
@@ -236,6 +325,7 @@ module Google
|
|
236
325
|
# @param [Symbol] mode The field's mode. The possible values are
|
237
326
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
238
327
|
# `:nullable`.
|
328
|
+
#
|
239
329
|
def integer name, description: nil, mode: :nullable
|
240
330
|
record_check!
|
241
331
|
|
@@ -243,7 +333,8 @@ module Google
|
|
243
333
|
end
|
244
334
|
|
245
335
|
##
|
246
|
-
# Adds a floating-point number field to the schema
|
336
|
+
# Adds a floating-point number field to the nested schema of a record
|
337
|
+
# field.
|
247
338
|
#
|
248
339
|
# This can only be called on fields that are of type `RECORD`.
|
249
340
|
#
|
@@ -255,6 +346,7 @@ module Google
|
|
255
346
|
# @param [Symbol] mode The field's mode. The possible values are
|
256
347
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
257
348
|
# `:nullable`.
|
349
|
+
#
|
258
350
|
def float name, description: nil, mode: :nullable
|
259
351
|
record_check!
|
260
352
|
|
@@ -262,7 +354,7 @@ module Google
|
|
262
354
|
end
|
263
355
|
|
264
356
|
##
|
265
|
-
# Adds a boolean field to the schema.
|
357
|
+
# Adds a boolean field to the nested schema of a record field.
|
266
358
|
#
|
267
359
|
# This can only be called on fields that are of type `RECORD`.
|
268
360
|
#
|
@@ -274,6 +366,7 @@ module Google
|
|
274
366
|
# @param [Symbol] mode The field's mode. The possible values are
|
275
367
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
276
368
|
# `:nullable`.
|
369
|
+
#
|
277
370
|
def boolean name, description: nil, mode: :nullable
|
278
371
|
record_check!
|
279
372
|
|
@@ -281,7 +374,7 @@ module Google
|
|
281
374
|
end
|
282
375
|
|
283
376
|
##
|
284
|
-
# Adds a bytes field to the schema.
|
377
|
+
# Adds a bytes field to the nested schema of a record field.
|
285
378
|
#
|
286
379
|
# This can only be called on fields that are of type `RECORD`.
|
287
380
|
#
|
@@ -293,6 +386,7 @@ module Google
|
|
293
386
|
# @param [Symbol] mode The field's mode. The possible values are
|
294
387
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
295
388
|
# `:nullable`.
|
389
|
+
#
|
296
390
|
def bytes name, description: nil, mode: :nullable
|
297
391
|
record_check!
|
298
392
|
|
@@ -300,7 +394,7 @@ module Google
|
|
300
394
|
end
|
301
395
|
|
302
396
|
##
|
303
|
-
# Adds a timestamp field to the schema.
|
397
|
+
# Adds a timestamp field to the nested schema of a record field.
|
304
398
|
#
|
305
399
|
# This can only be called on fields that are of type `RECORD`.
|
306
400
|
#
|
@@ -312,6 +406,7 @@ module Google
|
|
312
406
|
# @param [Symbol] mode The field's mode. The possible values are
|
313
407
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
314
408
|
# `:nullable`.
|
409
|
+
#
|
315
410
|
def timestamp name, description: nil, mode: :nullable
|
316
411
|
record_check!
|
317
412
|
|
@@ -319,7 +414,7 @@ module Google
|
|
319
414
|
end
|
320
415
|
|
321
416
|
##
|
322
|
-
# Adds a time field to the schema.
|
417
|
+
# Adds a time field to the nested schema of a record field.
|
323
418
|
#
|
324
419
|
# This can only be called on fields that are of type `RECORD`.
|
325
420
|
#
|
@@ -331,6 +426,7 @@ module Google
|
|
331
426
|
# @param [Symbol] mode The field's mode. The possible values are
|
332
427
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
333
428
|
# `:nullable`.
|
429
|
+
#
|
334
430
|
def time name, description: nil, mode: :nullable
|
335
431
|
record_check!
|
336
432
|
|
@@ -338,7 +434,7 @@ module Google
|
|
338
434
|
end
|
339
435
|
|
340
436
|
##
|
341
|
-
# Adds a datetime field to the schema.
|
437
|
+
# Adds a datetime field to the nested schema of a record field.
|
342
438
|
#
|
343
439
|
# This can only be called on fields that are of type `RECORD`.
|
344
440
|
#
|
@@ -350,6 +446,7 @@ module Google
|
|
350
446
|
# @param [Symbol] mode The field's mode. The possible values are
|
351
447
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
352
448
|
# `:nullable`.
|
449
|
+
#
|
353
450
|
def datetime name, description: nil, mode: :nullable
|
354
451
|
record_check!
|
355
452
|
|
@@ -357,7 +454,7 @@ module Google
|
|
357
454
|
end
|
358
455
|
|
359
456
|
##
|
360
|
-
# Adds a date field to the schema.
|
457
|
+
# Adds a date field to the nested schema of a record field.
|
361
458
|
#
|
362
459
|
# This can only be called on fields that are of type `RECORD`.
|
363
460
|
#
|
@@ -369,6 +466,7 @@ module Google
|
|
369
466
|
# @param [Symbol] mode The field's mode. The possible values are
|
370
467
|
# `:nullable`, `:required`, and `:repeated`. The default value is
|
371
468
|
# `:nullable`.
|
469
|
+
#
|
372
470
|
def date name, description: nil, mode: :nullable
|
373
471
|
record_check!
|
374
472
|
|
@@ -376,10 +474,10 @@ module Google
|
|
376
474
|
end
|
377
475
|
|
378
476
|
##
|
379
|
-
# Adds a record field to the schema
|
380
|
-
# the nested fields of the record. For more
|
381
|
-
# and repeated records, see [Preparing Data
|
382
|
-
# ](https://cloud.google.com/bigquery/preparing-data-for-bigquery).
|
477
|
+
# Adds a record field to the nested schema of a record field. A block
|
478
|
+
# must be passed describing the nested fields of the record. For more
|
479
|
+
# information about nested and repeated records, see [Preparing Data
|
480
|
+
# for BigQuery](https://cloud.google.com/bigquery/preparing-data-for-bigquery).
|
383
481
|
#
|
384
482
|
# This can only be called on fields that are of type `RECORD`.
|
385
483
|
#
|
@@ -405,7 +503,10 @@ module Google
|
|
405
503
|
# table.schema do |schema|
|
406
504
|
# schema.string "first_name", mode: :required
|
407
505
|
# schema.record "cities_lived", mode: :repeated do |cities_lived|
|
408
|
-
# cities_lived.
|
506
|
+
# cities_lived.record "city", mode: :required do |city|
|
507
|
+
# city.string "name", mode: :required
|
508
|
+
# city.string "country", mode: :required
|
509
|
+
# end
|
409
510
|
# cities_lived.integer "number_of_years", mode: :required
|
410
511
|
# end
|
411
512
|
# end
|
@@ -18,7 +18,7 @@ require "google/cloud/bigquery/convert"
|
|
18
18
|
require "google/cloud/errors"
|
19
19
|
require "google/apis/bigquery_v2"
|
20
20
|
require "pathname"
|
21
|
-
require "
|
21
|
+
require "securerandom"
|
22
22
|
require "mime/types"
|
23
23
|
require "date"
|
24
24
|
|
@@ -61,7 +61,7 @@ module Google
|
|
61
61
|
service.client_options.open_timeout_sec = timeout
|
62
62
|
service.client_options.read_timeout_sec = timeout
|
63
63
|
service.client_options.send_timeout_sec = timeout
|
64
|
-
service.request_options.retries =
|
64
|
+
service.request_options.retries = 0 # handle retries in #execute
|
65
65
|
service.request_options.header ||= {}
|
66
66
|
service.request_options.header["x-goog-api-client"] = \
|
67
67
|
"gl-ruby/#{RUBY_VERSION} gccl/#{Google::Cloud::Bigquery::VERSION}"
|
@@ -75,17 +75,19 @@ module Google
|
|
75
75
|
# Lists all datasets in the specified project to which you have
|
76
76
|
# been granted the READER dataset role.
|
77
77
|
def list_datasets options = {}
|
78
|
-
|
78
|
+
# The list operation is considered idempotent
|
79
|
+
execute backoff: true do
|
79
80
|
service.list_datasets \
|
80
|
-
@project, all: options[:all],
|
81
|
-
page_token: options[:token]
|
81
|
+
@project, all: options[:all], filter: options[:filter],
|
82
|
+
max_results: options[:max], page_token: options[:token]
|
82
83
|
end
|
83
84
|
end
|
84
85
|
|
85
86
|
##
|
86
87
|
# Returns the dataset specified by datasetID.
|
87
88
|
def get_dataset dataset_id
|
88
|
-
|
89
|
+
# The get operation is considered idempotent
|
90
|
+
execute(backoff: true) { service.get_dataset @project, dataset_id }
|
89
91
|
end
|
90
92
|
|
91
93
|
##
|
@@ -98,8 +100,16 @@ module Google
|
|
98
100
|
# Updates information in an existing dataset, only replacing
|
99
101
|
# fields that are provided in the submitted dataset resource.
|
100
102
|
def patch_dataset dataset_id, patched_dataset_gapi
|
101
|
-
|
102
|
-
|
103
|
+
patch_with_backoff = false
|
104
|
+
options = {}
|
105
|
+
if patched_dataset_gapi.etag
|
106
|
+
options[:header] = { "If-Match" => patched_dataset_gapi.etag }
|
107
|
+
# The patch with etag operation is considered idempotent
|
108
|
+
patch_with_backoff = true
|
109
|
+
end
|
110
|
+
execute backoff: patch_with_backoff do
|
111
|
+
service.patch_dataset @project, dataset_id, patched_dataset_gapi,
|
112
|
+
options: options
|
103
113
|
end
|
104
114
|
end
|
105
115
|
|
@@ -119,7 +129,8 @@ module Google
|
|
119
129
|
# Lists all tables in the specified dataset.
|
120
130
|
# Requires the READER dataset role.
|
121
131
|
def list_tables dataset_id, options = {}
|
122
|
-
|
132
|
+
# The list operation is considered idempotent
|
133
|
+
execute backoff: true do
|
123
134
|
service.list_tables @project, dataset_id,
|
124
135
|
max_results: options[:max],
|
125
136
|
page_token: options[:token]
|
@@ -127,7 +138,10 @@ module Google
|
|
127
138
|
end
|
128
139
|
|
129
140
|
def get_project_table project_id, dataset_id, table_id
|
130
|
-
|
141
|
+
# The get operation is considered idempotent
|
142
|
+
execute backoff: true do
|
143
|
+
service.get_table project_id, dataset_id, table_id
|
144
|
+
end
|
131
145
|
end
|
132
146
|
|
133
147
|
##
|
@@ -136,7 +150,10 @@ module Google
|
|
136
150
|
# it only returns the table resource,
|
137
151
|
# which describes the structure of this table.
|
138
152
|
def get_table dataset_id, table_id
|
139
|
-
|
153
|
+
# The get operation is considered idempotent
|
154
|
+
execute backoff: true do
|
155
|
+
get_project_table @project, dataset_id, table_id
|
156
|
+
end
|
140
157
|
end
|
141
158
|
|
142
159
|
##
|
@@ -149,9 +166,16 @@ module Google
|
|
149
166
|
# Updates information in an existing table, replacing fields that
|
150
167
|
# are provided in the submitted table resource.
|
151
168
|
def patch_table dataset_id, table_id, patched_table_gapi
|
152
|
-
|
169
|
+
patch_with_backoff = false
|
170
|
+
options = {}
|
171
|
+
if patched_table_gapi.etag
|
172
|
+
options[:header] = { "If-Match" => patched_table_gapi.etag }
|
173
|
+
# The patch with etag operation is considered idempotent
|
174
|
+
patch_with_backoff = true
|
175
|
+
end
|
176
|
+
execute backoff: patch_with_backoff do
|
153
177
|
service.patch_table @project, dataset_id, table_id,
|
154
|
-
patched_table_gapi
|
178
|
+
patched_table_gapi, options: options
|
155
179
|
end
|
156
180
|
end
|
157
181
|
|
@@ -165,7 +189,8 @@ module Google
|
|
165
189
|
##
|
166
190
|
# Retrieves data from the table.
|
167
191
|
def list_tabledata dataset_id, table_id, options = {}
|
168
|
-
|
192
|
+
# The list operation is considered idempotent
|
193
|
+
execute backoff: true do
|
169
194
|
service.list_table_data @project, dataset_id, table_id,
|
170
195
|
max_results: options.delete(:max),
|
171
196
|
page_token: options.delete(:token),
|
@@ -176,8 +201,8 @@ module Google
|
|
176
201
|
def insert_tabledata dataset_id, table_id, rows, options = {}
|
177
202
|
insert_rows = Array(rows).map do |row|
|
178
203
|
Google::Apis::BigqueryV2::InsertAllTableDataRequest::Row.new(
|
179
|
-
insert_id:
|
180
|
-
json: row
|
204
|
+
insert_id: SecureRandom.uuid,
|
205
|
+
json: Convert.to_json_row(row)
|
181
206
|
)
|
182
207
|
end
|
183
208
|
insert_req = Google::Apis::BigqueryV2::InsertAllTableDataRequest.new(
|
@@ -186,7 +211,8 @@ module Google
|
|
186
211
|
skip_invalid_rows: options[:skip_invalid]
|
187
212
|
)
|
188
213
|
|
189
|
-
|
214
|
+
# The insertAll with insertId operation is considered idempotent
|
215
|
+
execute backoff: true do
|
190
216
|
service.insert_all_table_data(
|
191
217
|
@project, dataset_id, table_id, insert_req)
|
192
218
|
end
|
@@ -196,7 +222,8 @@ module Google
|
|
196
222
|
# Lists all jobs in the specified project to which you have
|
197
223
|
# been granted the READER job role.
|
198
224
|
def list_jobs options = {}
|
199
|
-
|
225
|
+
# The list operation is considered idempotent
|
226
|
+
execute backoff: true do
|
200
227
|
service.list_jobs \
|
201
228
|
@project, all_users: options[:all], max_results: options[:max],
|
202
229
|
page_token: options[:token], projection: "full",
|
@@ -207,35 +234,37 @@ module Google
|
|
207
234
|
##
|
208
235
|
# Cancel the job specified by jobId.
|
209
236
|
def cancel_job job_id
|
210
|
-
|
237
|
+
# The BigQuery team has told us cancelling is considered idempotent
|
238
|
+
execute(backoff: true) { service.cancel_job @project, job_id }
|
211
239
|
end
|
212
240
|
|
213
241
|
##
|
214
242
|
# Returns the job specified by jobID.
|
215
243
|
def get_job job_id
|
216
|
-
|
244
|
+
# The get operation is considered idempotent
|
245
|
+
execute(backoff: true) { service.get_job @project, job_id }
|
217
246
|
end
|
218
247
|
|
219
248
|
def insert_job config
|
220
249
|
job_object = API::Job.new(
|
250
|
+
job_reference: job_ref_from(nil, nil),
|
221
251
|
configuration: config
|
222
252
|
)
|
223
|
-
|
253
|
+
# Jobs have generated id, so this operation is considered idempotent
|
254
|
+
execute(backoff: true) { service.insert_job @project, job_object }
|
224
255
|
end
|
225
256
|
|
226
257
|
def query_job query, options = {}
|
227
258
|
config = query_table_config(query, options)
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
def query query, options = {}
|
232
|
-
execute { service.query_job @project, query_config(query, options) }
|
259
|
+
# Jobs have generated id, so this operation is considered idempotent
|
260
|
+
execute(backoff: true) { service.insert_job @project, config }
|
233
261
|
end
|
234
262
|
|
235
263
|
##
|
236
264
|
# Returns the query data for the job
|
237
265
|
def job_query_results job_id, options = {}
|
238
|
-
|
266
|
+
# The get operation is considered idempotent
|
267
|
+
execute backoff: true do
|
239
268
|
service.get_job_query_results @project,
|
240
269
|
job_id,
|
241
270
|
max_results: options.delete(:max),
|
@@ -246,21 +275,24 @@ module Google
|
|
246
275
|
end
|
247
276
|
|
248
277
|
def copy_table source, target, options = {}
|
249
|
-
|
278
|
+
# Jobs have generated id, so this operation is considered idempotent
|
279
|
+
execute backoff: true do
|
250
280
|
service.insert_job @project, copy_table_config(
|
251
281
|
source, target, options)
|
252
282
|
end
|
253
283
|
end
|
254
284
|
|
255
285
|
def extract_table table, storage_files, options = {}
|
256
|
-
|
286
|
+
# Jobs have generated id, so this operation is considered idempotent
|
287
|
+
execute backoff: true do
|
257
288
|
service.insert_job \
|
258
289
|
@project, extract_table_config(table, storage_files, options)
|
259
290
|
end
|
260
291
|
end
|
261
292
|
|
262
293
|
def load_table_gs_url dataset_id, table_id, url, options = {}
|
263
|
-
|
294
|
+
# Jobs have generated id, so this operation is considered idempotent
|
295
|
+
execute backoff: true do
|
264
296
|
service.insert_job \
|
265
297
|
@project, load_table_url_config(dataset_id, table_id,
|
266
298
|
url, options)
|
@@ -268,7 +300,8 @@ module Google
|
|
268
300
|
end
|
269
301
|
|
270
302
|
def load_table_file dataset_id, table_id, file, options = {}
|
271
|
-
|
303
|
+
# Jobs have generated id, so this operation is considered idempotent
|
304
|
+
execute backoff: true do
|
272
305
|
service.insert_job \
|
273
306
|
@project, load_table_file_config(
|
274
307
|
dataset_id, table_id, file, options),
|
@@ -299,7 +332,7 @@ module Google
|
|
299
332
|
##
|
300
333
|
# Lists all projects to which you have been granted any project role.
|
301
334
|
def list_projects options = {}
|
302
|
-
execute do
|
335
|
+
execute backoff: true do
|
303
336
|
service.list_projects max_results: options[:max],
|
304
337
|
page_token: options[:token]
|
305
338
|
end
|
@@ -335,6 +368,23 @@ module Google
|
|
335
368
|
end
|
336
369
|
end
|
337
370
|
|
371
|
+
# Generate a random string similar to the BigQuery service job IDs.
|
372
|
+
def generate_id
|
373
|
+
SecureRandom.urlsafe_base64(21)
|
374
|
+
end
|
375
|
+
|
376
|
+
# If no job_id or prefix is given, always generate a client-side job ID
|
377
|
+
# anyway, for idempotent retry in the google-api-client layer.
|
378
|
+
# See https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid
|
379
|
+
def job_ref_from job_id, prefix
|
380
|
+
prefix ||= "job_"
|
381
|
+
job_id ||= "#{prefix}#{generate_id}"
|
382
|
+
API::JobReference.new(
|
383
|
+
project_id: @project,
|
384
|
+
job_id: job_id
|
385
|
+
)
|
386
|
+
end
|
387
|
+
|
338
388
|
def load_table_file_opts dataset_id, table_id, file, options = {}
|
339
389
|
path = Pathname(file).to_path
|
340
390
|
{
|
@@ -346,21 +396,26 @@ module Google
|
|
346
396
|
projection_fields: projection_fields(options[:projection_fields]),
|
347
397
|
allow_jagged_rows: options[:jagged_rows],
|
348
398
|
allow_quoted_newlines: options[:quoted_newlines],
|
399
|
+
autodetect: options[:autodetect],
|
349
400
|
encoding: options[:encoding], field_delimiter: options[:delimiter],
|
350
401
|
ignore_unknown_values: options[:ignore_unknown],
|
351
|
-
max_bad_records: options[:max_bad_records],
|
402
|
+
max_bad_records: options[:max_bad_records],
|
403
|
+
null_marker: options[:null_marker], quote: options[:quote],
|
352
404
|
schema: options[:schema], skip_leading_rows: options[:skip_leading]
|
353
405
|
}.delete_if { |_, v| v.nil? }
|
354
406
|
end
|
355
407
|
|
356
408
|
def load_table_file_config dataset_id, table_id, file, options = {}
|
357
409
|
load_opts = load_table_file_opts dataset_id, table_id, file, options
|
358
|
-
API::Job.new(
|
410
|
+
req = API::Job.new(
|
411
|
+
job_reference: job_ref_from(options[:job_id], options[:prefix]),
|
359
412
|
configuration: API::JobConfiguration.new(
|
360
413
|
load: API::JobConfigurationLoad.new(load_opts),
|
361
414
|
dry_run: options[:dryrun]
|
362
415
|
)
|
363
416
|
)
|
417
|
+
req.configuration.labels = options[:labels] if options[:labels]
|
418
|
+
req
|
364
419
|
end
|
365
420
|
|
366
421
|
def load_table_url_opts dataset_id, table_id, url, options = {}
|
@@ -374,21 +429,26 @@ module Google
|
|
374
429
|
projection_fields: projection_fields(options[:projection_fields]),
|
375
430
|
allow_jagged_rows: options[:jagged_rows],
|
376
431
|
allow_quoted_newlines: options[:quoted_newlines],
|
432
|
+
autodetect: options[:autodetect],
|
377
433
|
encoding: options[:encoding], field_delimiter: options[:delimiter],
|
378
434
|
ignore_unknown_values: options[:ignore_unknown],
|
379
|
-
max_bad_records: options[:max_bad_records],
|
435
|
+
max_bad_records: options[:max_bad_records],
|
436
|
+
null_marker: options[:null_marker], quote: options[:quote],
|
380
437
|
schema: options[:schema], skip_leading_rows: options[:skip_leading]
|
381
438
|
}.delete_if { |_, v| v.nil? }
|
382
439
|
end
|
383
440
|
|
384
441
|
def load_table_url_config dataset_id, table_id, url, options = {}
|
385
442
|
load_opts = load_table_url_opts dataset_id, table_id, url, options
|
386
|
-
API::Job.new(
|
443
|
+
req = API::Job.new(
|
444
|
+
job_reference: job_ref_from(options[:job_id], options[:prefix]),
|
387
445
|
configuration: API::JobConfiguration.new(
|
388
446
|
load: API::JobConfigurationLoad.new(load_opts),
|
389
447
|
dry_run: options[:dryrun]
|
390
448
|
)
|
391
449
|
)
|
450
|
+
req.configuration.labels = options[:labels] if options[:labels]
|
451
|
+
req
|
392
452
|
end
|
393
453
|
|
394
454
|
# rubocop:disable all
|
@@ -397,8 +457,9 @@ module Google
|
|
397
457
|
# Job description for query job
|
398
458
|
def query_table_config query, options
|
399
459
|
dest_table = table_ref_from options[:table]
|
400
|
-
|
460
|
+
dataset_config = dataset_ref_from options[:dataset], options[:project]
|
401
461
|
req = API::Job.new(
|
462
|
+
job_reference: job_ref_from(options[:job_id], options[:prefix]),
|
402
463
|
configuration: API::JobConfiguration.new(
|
403
464
|
query: API::JobConfigurationQuery.new(
|
404
465
|
query: query,
|
@@ -410,14 +471,16 @@ module Google
|
|
410
471
|
write_disposition: write_disposition(options[:write]),
|
411
472
|
allow_large_results: options[:large_results],
|
412
473
|
flatten_results: options[:flatten],
|
413
|
-
default_dataset:
|
474
|
+
default_dataset: dataset_config,
|
414
475
|
use_legacy_sql: Convert.resolve_legacy_sql(
|
415
476
|
options[:standard_sql], options[:legacy_sql]),
|
416
477
|
maximum_billing_tier: options[:maximum_billing_tier],
|
417
|
-
maximum_bytes_billed: options[:maximum_bytes_billed]
|
478
|
+
maximum_bytes_billed: options[:maximum_bytes_billed],
|
479
|
+
user_defined_function_resources: udfs(options[:udfs])
|
418
480
|
)
|
419
481
|
)
|
420
482
|
)
|
483
|
+
req.configuration.labels = options[:labels] if options[:labels]
|
421
484
|
|
422
485
|
if options[:params]
|
423
486
|
if Array === options[:params]
|
@@ -439,6 +502,14 @@ module Google
|
|
439
502
|
end
|
440
503
|
end
|
441
504
|
|
505
|
+
if options[:external]
|
506
|
+
external_table_pairs = options[:external].map do |name, obj|
|
507
|
+
[String(name), obj.to_gapi]
|
508
|
+
end
|
509
|
+
external_table_hash = Hash[external_table_pairs]
|
510
|
+
req.configuration.query.table_definitions = external_table_hash
|
511
|
+
end
|
512
|
+
|
442
513
|
req
|
443
514
|
end
|
444
515
|
|
@@ -484,7 +555,8 @@ module Google
|
|
484
555
|
##
|
485
556
|
# Job description for copy job
|
486
557
|
def copy_table_config source, target, options = {}
|
487
|
-
API::Job.new(
|
558
|
+
req = API::Job.new(
|
559
|
+
job_reference: job_ref_from(options[:job_id], options[:prefix]),
|
488
560
|
configuration: API::JobConfiguration.new(
|
489
561
|
copy: API::JobConfigurationTableCopy.new(
|
490
562
|
source_table: source,
|
@@ -495,6 +567,8 @@ module Google
|
|
495
567
|
dry_run: options[:dryrun]
|
496
568
|
)
|
497
569
|
)
|
570
|
+
req.configuration.labels = options[:labels] if options[:labels]
|
571
|
+
req
|
498
572
|
end
|
499
573
|
|
500
574
|
def extract_table_config table, storage_files, options = {}
|
@@ -502,7 +576,8 @@ module Google
|
|
502
576
|
url.respond_to?(:to_gs_url) ? url.to_gs_url : url
|
503
577
|
end
|
504
578
|
dest_format = source_format storage_urls.first, options[:format]
|
505
|
-
API::Job.new(
|
579
|
+
req = API::Job.new(
|
580
|
+
job_reference: job_ref_from(options[:job_id], options[:prefix]),
|
506
581
|
configuration: API::JobConfiguration.new(
|
507
582
|
extract: API::JobConfigurationExtract.new(
|
508
583
|
destination_uris: Array(storage_urls),
|
@@ -515,6 +590,8 @@ module Google
|
|
515
590
|
dry_run: options[:dryrun]
|
516
591
|
)
|
517
592
|
)
|
593
|
+
req.configuration.labels = options[:labels] if options[:labels]
|
594
|
+
req
|
518
595
|
end
|
519
596
|
|
520
597
|
def create_disposition str
|
@@ -550,6 +627,7 @@ module Google
|
|
550
627
|
"newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
|
551
628
|
"avro" => "AVRO",
|
552
629
|
"datastore" => "DATASTORE_BACKUP",
|
630
|
+
"backup" => "DATASTORE_BACKUP",
|
553
631
|
"datastore_backup" => "DATASTORE_BACKUP"
|
554
632
|
}[format.to_s.downcase]
|
555
633
|
return val unless val.nil?
|
@@ -573,11 +651,86 @@ module Google
|
|
573
651
|
nil
|
574
652
|
end
|
575
653
|
|
576
|
-
def
|
577
|
-
|
654
|
+
def udfs array_or_str
|
655
|
+
Array(array_or_str).map do |uri_or_code|
|
656
|
+
resource = API::UserDefinedFunctionResource.new
|
657
|
+
if uri_or_code.start_with?("gs://")
|
658
|
+
resource.resource_uri = uri_or_code
|
659
|
+
else
|
660
|
+
resource.inline_code = uri_or_code
|
661
|
+
end
|
662
|
+
resource
|
663
|
+
end
|
664
|
+
end
|
665
|
+
|
666
|
+
def execute backoff: nil
|
667
|
+
if backoff
|
668
|
+
Backoff.new(retries: retries).execute { yield }
|
669
|
+
else
|
670
|
+
yield
|
671
|
+
end
|
578
672
|
rescue Google::Apis::Error => e
|
579
673
|
raise Google::Cloud::Error.from_error(e)
|
580
674
|
end
|
675
|
+
|
676
|
+
class Backoff
|
677
|
+
class << self
|
678
|
+
attr_accessor :retries
|
679
|
+
attr_accessor :reasons
|
680
|
+
attr_accessor :backoff
|
681
|
+
end
|
682
|
+
self.retries = 5
|
683
|
+
self.reasons = %w(rateLimitExceeded backendError)
|
684
|
+
self.backoff = lambda do |retries|
|
685
|
+
# Max delay is 32 seconds
|
686
|
+
# See "Back-off Requirements" here:
|
687
|
+
# https://cloud.google.com/bigquery/sla
|
688
|
+
retries = 5 if retries > 5
|
689
|
+
delay = 2 ** retries
|
690
|
+
sleep delay
|
691
|
+
end
|
692
|
+
|
693
|
+
def initialize options = {}
|
694
|
+
@retries = (options[:retries] || Backoff.retries).to_i
|
695
|
+
@reasons = (options[:reasons] || Backoff.reasons).to_a
|
696
|
+
@backoff = options[:backoff] || Backoff.backoff
|
697
|
+
end
|
698
|
+
|
699
|
+
def execute
|
700
|
+
current_retries = 0
|
701
|
+
loop do
|
702
|
+
begin
|
703
|
+
return yield
|
704
|
+
rescue Google::Apis::Error => e
|
705
|
+
raise e unless retry? e.body, current_retries
|
706
|
+
|
707
|
+
@backoff.call current_retries
|
708
|
+
current_retries += 1
|
709
|
+
end
|
710
|
+
end
|
711
|
+
end
|
712
|
+
|
713
|
+
protected
|
714
|
+
|
715
|
+
def retry? result, current_retries #:nodoc:
|
716
|
+
if current_retries < @retries
|
717
|
+
return true if retry_error_reason? result
|
718
|
+
end
|
719
|
+
false
|
720
|
+
end
|
721
|
+
|
722
|
+
def retry_error_reason? err_body
|
723
|
+
err_hash = JSON.parse err_body
|
724
|
+
json_errors = Array err_hash["error"]["errors"]
|
725
|
+
return false if json_errors.empty?
|
726
|
+
json_errors.each do |json_error|
|
727
|
+
return false unless @reasons.include? json_error["reason"]
|
728
|
+
end
|
729
|
+
true
|
730
|
+
rescue
|
731
|
+
false
|
732
|
+
end
|
733
|
+
end
|
581
734
|
end
|
582
735
|
end
|
583
736
|
end
|