google-cloud-bigquery 0.28.0 → 0.29.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -34,6 +34,7 @@ module Google
34
34
  #
35
35
  # field = table.schema.field "name"
36
36
  # field.required? #=> true
37
+ #
37
38
  class Field
38
39
  # @private
39
40
  MODES = %w( NULLABLE REQUIRED REPEATED )
@@ -45,6 +46,11 @@ module Google
45
46
  ##
46
47
  # The name of the field.
47
48
  #
49
+ # @return [String] The field name. The name must contain only
50
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
51
+ # start with a letter or underscore. The maximum length is 128
52
+ # characters.
53
+ #
48
54
  def name
49
55
  @gapi.name
50
56
  end
@@ -52,19 +58,38 @@ module Google
52
58
  ##
53
59
  # Updates the name of the field.
54
60
  #
61
+ # @param [String] new_name The field name. The name must contain only
62
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
63
+ # start with a letter or underscore. The maximum length is 128
64
+ # characters.
65
+ #
55
66
  def name= new_name
56
67
  @gapi.update! name: String(new_name)
57
68
  end
58
69
 
59
70
  ##
60
- # The type of the field.
71
+ # The data type of the field.
72
+ #
73
+ # @return [String] The field data type. Possible values include
74
+ # `STRING`, `BYTES`, `INTEGER`, `INT64` (same as `INTEGER`),
75
+ # `FLOAT`, `FLOAT64` (same as `FLOAT`), `BOOLEAN`, `BOOL` (same as
76
+ # `BOOLEAN`), `TIMESTAMP`, `DATE`, `TIME`, `DATETIME`, `RECORD`
77
+ # (where `RECORD` indicates that the field contains a nested schema)
78
+ # or `STRUCT` (same as `RECORD`).
61
79
  #
62
80
  def type
63
81
  @gapi.type
64
82
  end
65
83
 
66
84
  ##
67
- # Updates the type of the field.
85
+ # Updates the data type of the field.
86
+ #
87
+ # @param [String] new_type The data type. Possible values include
88
+ # `STRING`, `BYTES`, `INTEGER`, `INT64` (same as `INTEGER`),
89
+ # `FLOAT`, `FLOAT64` (same as `FLOAT`), `BOOLEAN`, `BOOL` (same as
90
+ # `BOOLEAN`), `TIMESTAMP`, `DATE`, `TIME`, `DATETIME`, `RECORD`
91
+ # (where `RECORD` indicates that the field contains a nested schema)
92
+ # or `STRUCT` (same as `RECORD`).
68
93
  #
69
94
  def type= new_type
70
95
  @gapi.update! type: verify_type(new_type)
@@ -72,18 +97,27 @@ module Google
72
97
 
73
98
  ##
74
99
  # Checks if the type of the field is `NULLABLE`.
100
+ #
101
+ # @return [Boolean] `true` when `NULLABLE`, `false` otherwise.
102
+ #
75
103
  def nullable?
76
104
  mode == "NULLABLE"
77
105
  end
78
106
 
79
107
  ##
80
108
  # Checks if the type of the field is `REQUIRED`.
109
+ #
110
+ # @return [Boolean] `true` when `REQUIRED`, `false` otherwise.
111
+ #
81
112
  def required?
82
113
  mode == "REQUIRED"
83
114
  end
84
115
 
85
116
  ##
86
117
  # Checks if the type of the field is `REPEATED`.
118
+ #
119
+ # @return [Boolean] `true` when `REPEATED`, `false` otherwise.
120
+ #
87
121
  def repeated?
88
122
  mode == "REPEATED"
89
123
  end
@@ -91,6 +125,9 @@ module Google
91
125
  ##
92
126
  # The description of the field.
93
127
  #
128
+ # @return [String] The field description. The maximum length is 1,024
129
+ # characters.
130
+ #
94
131
  def description
95
132
  @gapi.description
96
133
  end
@@ -98,6 +135,9 @@ module Google
98
135
  ##
99
136
  # Updates the description of the field.
100
137
  #
138
+ # @param [String] new_description The field description. The maximum
139
+ # length is 1,024 characters.
140
+ #
101
141
  def description= new_description
102
142
  @gapi.update! description: new_description
103
143
  end
@@ -105,6 +145,9 @@ module Google
105
145
  ##
106
146
  # The mode of the field.
107
147
  #
148
+ # @return [String] The field mode. Possible values include `NULLABLE`,
149
+ # `REQUIRED` and `REPEATED`. The default value is `NULLABLE`.
150
+ #
108
151
  def mode
109
152
  @gapi.mode
110
153
  end
@@ -112,66 +155,100 @@ module Google
112
155
  ##
113
156
  # Updates the mode of the field.
114
157
  #
158
+ # @param [String] new_mode The field mode. Possible values include
159
+ # `NULLABLE`, `REQUIRED` and `REPEATED`. The default value is
160
+ # `NULLABLE`.
161
+ #
115
162
  def mode= new_mode
116
163
  @gapi.update! mode: verify_mode(new_mode)
117
164
  end
118
165
 
119
166
  ##
120
167
  # Checks if the mode of the field is `STRING`.
168
+ #
169
+ # @return [Boolean] `true` when `STRING`, `false` otherwise.
170
+ #
121
171
  def string?
122
172
  mode == "STRING"
123
173
  end
124
174
 
125
175
  ##
126
176
  # Checks if the mode of the field is `INTEGER`.
177
+ #
178
+ # @return [Boolean] `true` when `INTEGER`, `false` otherwise.
179
+ #
127
180
  def integer?
128
181
  mode == "INTEGER"
129
182
  end
130
183
 
131
184
  ##
132
185
  # Checks if the mode of the field is `FLOAT`.
186
+ #
187
+ # @return [Boolean] `true` when `FLOAT`, `false` otherwise.
188
+ #
133
189
  def float?
134
190
  mode == "FLOAT"
135
191
  end
136
192
 
137
193
  ##
138
194
  # Checks if the mode of the field is `BOOLEAN`.
195
+ #
196
+ # @return [Boolean] `true` when `BOOLEAN`, `false` otherwise.
197
+ #
139
198
  def boolean?
140
199
  mode == "BOOLEAN"
141
200
  end
142
201
 
143
202
  ##
144
203
  # Checks if the mode of the field is `BYTES`.
204
+ #
205
+ # @return [Boolean] `true` when `BYTES`, `false` otherwise.
206
+ #
145
207
  def bytes?
146
208
  mode == "BYTES"
147
209
  end
148
210
 
149
211
  ##
150
212
  # Checks if the mode of the field is `TIMESTAMP`.
213
+ #
214
+ # @return [Boolean] `true` when `TIMESTAMP`, `false` otherwise.
215
+ #
151
216
  def timestamp?
152
217
  mode == "TIMESTAMP"
153
218
  end
154
219
 
155
220
  ##
156
221
  # Checks if the mode of the field is `TIME`.
222
+ #
223
+ # @return [Boolean] `true` when `TIME`, `false` otherwise.
224
+ #
157
225
  def time?
158
226
  mode == "TIME"
159
227
  end
160
228
 
161
229
  ##
162
230
  # Checks if the mode of the field is `DATETIME`.
231
+ #
232
+ # @return [Boolean] `true` when `DATETIME`, `false` otherwise.
233
+ #
163
234
  def datetime?
164
235
  mode == "DATETIME"
165
236
  end
166
237
 
167
238
  ##
168
239
  # Checks if the mode of the field is `DATE`.
240
+ #
241
+ # @return [Boolean] `true` when `DATE`, `false` otherwise.
242
+ #
169
243
  def date?
170
244
  mode == "DATE"
171
245
  end
172
246
 
173
247
  ##
174
248
  # Checks if the mode of the field is `RECORD`.
249
+ #
250
+ # @return [Boolean] `true` when `RECORD`, `false` otherwise.
251
+ #
175
252
  def record?
176
253
  mode == "RECORD"
177
254
  end
@@ -179,6 +256,10 @@ module Google
179
256
  ##
180
257
  # The nested fields if the type property is set to `RECORD`. Will be
181
258
  # empty otherwise.
259
+ #
260
+ # @return [Array<Field>, nil] The nested schema fields if the type
261
+ # is set to `RECORD`.
262
+ #
182
263
  def fields
183
264
  if frozen?
184
265
  Array(@gapi.fields).map { |f| Field.from_gapi(f).freeze }.freeze
@@ -190,13 +271,20 @@ module Google
190
271
  ##
191
272
  # The names of the nested fields as symbols if the type property is
192
273
  # set to `RECORD`. Will be empty otherwise.
274
+ #
275
+ # @return [Array<Symbol>, nil] The names of the nested schema fields
276
+ # if the type is set to `RECORD`.
277
+ #
193
278
  def headers
194
279
  fields.map(&:name).map(&:to_sym)
195
280
  end
196
281
 
197
282
  ##
198
- # Retreive a nested fields by name, if the type property is
283
+ # Retrieve a nested field by name, if the type property is
199
284
  # set to `RECORD`. Will return `nil` otherwise.
285
+ #
286
+ # @return [Field, nil] The nested schema field object, or `nil`.
287
+ #
200
288
  def field name
201
289
  f = fields.find { |fld| fld.name == name.to_s }
202
290
  return nil if f.nil?
@@ -205,7 +293,7 @@ module Google
205
293
  end
206
294
 
207
295
  ##
208
- # Adds a string field to the schema.
296
+ # Adds a string field to the nested schema of a record field.
209
297
  #
210
298
  # This can only be called on fields that are of type `RECORD`.
211
299
  #
@@ -217,6 +305,7 @@ module Google
217
305
  # @param [Symbol] mode The field's mode. The possible values are
218
306
  # `:nullable`, `:required`, and `:repeated`. The default value is
219
307
  # `:nullable`.
308
+ #
220
309
  def string name, description: nil, mode: :nullable
221
310
  record_check!
222
311
 
@@ -224,7 +313,7 @@ module Google
224
313
  end
225
314
 
226
315
  ##
227
- # Adds an integer field to the schema.
316
+ # Adds an integer field to the nested schema of a record field.
228
317
  #
229
318
  # This can only be called on fields that are of type `RECORD`.
230
319
  #
@@ -236,6 +325,7 @@ module Google
236
325
  # @param [Symbol] mode The field's mode. The possible values are
237
326
  # `:nullable`, `:required`, and `:repeated`. The default value is
238
327
  # `:nullable`.
328
+ #
239
329
  def integer name, description: nil, mode: :nullable
240
330
  record_check!
241
331
 
@@ -243,7 +333,8 @@ module Google
243
333
  end
244
334
 
245
335
  ##
246
- # Adds a floating-point number field to the schema.
336
+ # Adds a floating-point number field to the nested schema of a record
337
+ # field.
247
338
  #
248
339
  # This can only be called on fields that are of type `RECORD`.
249
340
  #
@@ -255,6 +346,7 @@ module Google
255
346
  # @param [Symbol] mode The field's mode. The possible values are
256
347
  # `:nullable`, `:required`, and `:repeated`. The default value is
257
348
  # `:nullable`.
349
+ #
258
350
  def float name, description: nil, mode: :nullable
259
351
  record_check!
260
352
 
@@ -262,7 +354,7 @@ module Google
262
354
  end
263
355
 
264
356
  ##
265
- # Adds a boolean field to the schema.
357
+ # Adds a boolean field to the nested schema of a record field.
266
358
  #
267
359
  # This can only be called on fields that are of type `RECORD`.
268
360
  #
@@ -274,6 +366,7 @@ module Google
274
366
  # @param [Symbol] mode The field's mode. The possible values are
275
367
  # `:nullable`, `:required`, and `:repeated`. The default value is
276
368
  # `:nullable`.
369
+ #
277
370
  def boolean name, description: nil, mode: :nullable
278
371
  record_check!
279
372
 
@@ -281,7 +374,7 @@ module Google
281
374
  end
282
375
 
283
376
  ##
284
- # Adds a bytes field to the schema.
377
+ # Adds a bytes field to the nested schema of a record field.
285
378
  #
286
379
  # This can only be called on fields that are of type `RECORD`.
287
380
  #
@@ -293,6 +386,7 @@ module Google
293
386
  # @param [Symbol] mode The field's mode. The possible values are
294
387
  # `:nullable`, `:required`, and `:repeated`. The default value is
295
388
  # `:nullable`.
389
+ #
296
390
  def bytes name, description: nil, mode: :nullable
297
391
  record_check!
298
392
 
@@ -300,7 +394,7 @@ module Google
300
394
  end
301
395
 
302
396
  ##
303
- # Adds a timestamp field to the schema.
397
+ # Adds a timestamp field to the nested schema of a record field.
304
398
  #
305
399
  # This can only be called on fields that are of type `RECORD`.
306
400
  #
@@ -312,6 +406,7 @@ module Google
312
406
  # @param [Symbol] mode The field's mode. The possible values are
313
407
  # `:nullable`, `:required`, and `:repeated`. The default value is
314
408
  # `:nullable`.
409
+ #
315
410
  def timestamp name, description: nil, mode: :nullable
316
411
  record_check!
317
412
 
@@ -319,7 +414,7 @@ module Google
319
414
  end
320
415
 
321
416
  ##
322
- # Adds a time field to the schema.
417
+ # Adds a time field to the nested schema of a record field.
323
418
  #
324
419
  # This can only be called on fields that are of type `RECORD`.
325
420
  #
@@ -331,6 +426,7 @@ module Google
331
426
  # @param [Symbol] mode The field's mode. The possible values are
332
427
  # `:nullable`, `:required`, and `:repeated`. The default value is
333
428
  # `:nullable`.
429
+ #
334
430
  def time name, description: nil, mode: :nullable
335
431
  record_check!
336
432
 
@@ -338,7 +434,7 @@ module Google
338
434
  end
339
435
 
340
436
  ##
341
- # Adds a datetime field to the schema.
437
+ # Adds a datetime field to the nested schema of a record field.
342
438
  #
343
439
  # This can only be called on fields that are of type `RECORD`.
344
440
  #
@@ -350,6 +446,7 @@ module Google
350
446
  # @param [Symbol] mode The field's mode. The possible values are
351
447
  # `:nullable`, `:required`, and `:repeated`. The default value is
352
448
  # `:nullable`.
449
+ #
353
450
  def datetime name, description: nil, mode: :nullable
354
451
  record_check!
355
452
 
@@ -357,7 +454,7 @@ module Google
357
454
  end
358
455
 
359
456
  ##
360
- # Adds a date field to the schema.
457
+ # Adds a date field to the nested schema of a record field.
361
458
  #
362
459
  # This can only be called on fields that are of type `RECORD`.
363
460
  #
@@ -369,6 +466,7 @@ module Google
369
466
  # @param [Symbol] mode The field's mode. The possible values are
370
467
  # `:nullable`, `:required`, and `:repeated`. The default value is
371
468
  # `:nullable`.
469
+ #
372
470
  def date name, description: nil, mode: :nullable
373
471
  record_check!
374
472
 
@@ -376,10 +474,10 @@ module Google
376
474
  end
377
475
 
378
476
  ##
379
- # Adds a record field to the schema. A block must be passed describing
380
- # the nested fields of the record. For more information about nested
381
- # and repeated records, see [Preparing Data for BigQuery
382
- # ](https://cloud.google.com/bigquery/preparing-data-for-bigquery).
477
+ # Adds a record field to the nested schema of a record field. A block
478
+ # must be passed describing the nested fields of the record. For more
479
+ # information about nested and repeated records, see [Preparing Data
480
+ # for BigQuery](https://cloud.google.com/bigquery/preparing-data-for-bigquery).
383
481
  #
384
482
  # This can only be called on fields that are of type `RECORD`.
385
483
  #
@@ -405,7 +503,10 @@ module Google
405
503
  # table.schema do |schema|
406
504
  # schema.string "first_name", mode: :required
407
505
  # schema.record "cities_lived", mode: :repeated do |cities_lived|
408
- # cities_lived.string "place", mode: :required
506
+ # cities_lived.record "city", mode: :required do |city|
507
+ # city.string "name", mode: :required
508
+ # city.string "country", mode: :required
509
+ # end
409
510
  # cities_lived.integer "number_of_years", mode: :required
410
511
  # end
411
512
  # end
@@ -18,7 +18,7 @@ require "google/cloud/bigquery/convert"
18
18
  require "google/cloud/errors"
19
19
  require "google/apis/bigquery_v2"
20
20
  require "pathname"
21
- require "digest/md5"
21
+ require "securerandom"
22
22
  require "mime/types"
23
23
  require "date"
24
24
 
@@ -61,7 +61,7 @@ module Google
61
61
  service.client_options.open_timeout_sec = timeout
62
62
  service.client_options.read_timeout_sec = timeout
63
63
  service.client_options.send_timeout_sec = timeout
64
- service.request_options.retries = @retries || 3
64
+ service.request_options.retries = 0 # handle retries in #execute
65
65
  service.request_options.header ||= {}
66
66
  service.request_options.header["x-goog-api-client"] = \
67
67
  "gl-ruby/#{RUBY_VERSION} gccl/#{Google::Cloud::Bigquery::VERSION}"
@@ -75,17 +75,19 @@ module Google
75
75
  # Lists all datasets in the specified project to which you have
76
76
  # been granted the READER dataset role.
77
77
  def list_datasets options = {}
78
- execute do
78
+ # The list operation is considered idempotent
79
+ execute backoff: true do
79
80
  service.list_datasets \
80
- @project, all: options[:all], max_results: options[:max],
81
- page_token: options[:token]
81
+ @project, all: options[:all], filter: options[:filter],
82
+ max_results: options[:max], page_token: options[:token]
82
83
  end
83
84
  end
84
85
 
85
86
  ##
86
87
  # Returns the dataset specified by datasetID.
87
88
  def get_dataset dataset_id
88
- execute { service.get_dataset @project, dataset_id }
89
+ # The get operation is considered idempotent
90
+ execute(backoff: true) { service.get_dataset @project, dataset_id }
89
91
  end
90
92
 
91
93
  ##
@@ -98,8 +100,16 @@ module Google
98
100
  # Updates information in an existing dataset, only replacing
99
101
  # fields that are provided in the submitted dataset resource.
100
102
  def patch_dataset dataset_id, patched_dataset_gapi
101
- execute do
102
- service.patch_dataset @project, dataset_id, patched_dataset_gapi
103
+ patch_with_backoff = false
104
+ options = {}
105
+ if patched_dataset_gapi.etag
106
+ options[:header] = { "If-Match" => patched_dataset_gapi.etag }
107
+ # The patch with etag operation is considered idempotent
108
+ patch_with_backoff = true
109
+ end
110
+ execute backoff: patch_with_backoff do
111
+ service.patch_dataset @project, dataset_id, patched_dataset_gapi,
112
+ options: options
103
113
  end
104
114
  end
105
115
 
@@ -119,7 +129,8 @@ module Google
119
129
  # Lists all tables in the specified dataset.
120
130
  # Requires the READER dataset role.
121
131
  def list_tables dataset_id, options = {}
122
- execute do
132
+ # The list operation is considered idempotent
133
+ execute backoff: true do
123
134
  service.list_tables @project, dataset_id,
124
135
  max_results: options[:max],
125
136
  page_token: options[:token]
@@ -127,7 +138,10 @@ module Google
127
138
  end
128
139
 
129
140
  def get_project_table project_id, dataset_id, table_id
130
- execute { service.get_table project_id, dataset_id, table_id }
141
+ # The get operation is considered idempotent
142
+ execute backoff: true do
143
+ service.get_table project_id, dataset_id, table_id
144
+ end
131
145
  end
132
146
 
133
147
  ##
@@ -136,7 +150,10 @@ module Google
136
150
  # it only returns the table resource,
137
151
  # which describes the structure of this table.
138
152
  def get_table dataset_id, table_id
139
- execute { get_project_table @project, dataset_id, table_id }
153
+ # The get operation is considered idempotent
154
+ execute backoff: true do
155
+ get_project_table @project, dataset_id, table_id
156
+ end
140
157
  end
141
158
 
142
159
  ##
@@ -149,9 +166,16 @@ module Google
149
166
  # Updates information in an existing table, replacing fields that
150
167
  # are provided in the submitted table resource.
151
168
  def patch_table dataset_id, table_id, patched_table_gapi
152
- execute do
169
+ patch_with_backoff = false
170
+ options = {}
171
+ if patched_table_gapi.etag
172
+ options[:header] = { "If-Match" => patched_table_gapi.etag }
173
+ # The patch with etag operation is considered idempotent
174
+ patch_with_backoff = true
175
+ end
176
+ execute backoff: patch_with_backoff do
153
177
  service.patch_table @project, dataset_id, table_id,
154
- patched_table_gapi
178
+ patched_table_gapi, options: options
155
179
  end
156
180
  end
157
181
 
@@ -165,7 +189,8 @@ module Google
165
189
  ##
166
190
  # Retrieves data from the table.
167
191
  def list_tabledata dataset_id, table_id, options = {}
168
- execute do
192
+ # The list operation is considered idempotent
193
+ execute backoff: true do
169
194
  service.list_table_data @project, dataset_id, table_id,
170
195
  max_results: options.delete(:max),
171
196
  page_token: options.delete(:token),
@@ -176,8 +201,8 @@ module Google
176
201
  def insert_tabledata dataset_id, table_id, rows, options = {}
177
202
  insert_rows = Array(rows).map do |row|
178
203
  Google::Apis::BigqueryV2::InsertAllTableDataRequest::Row.new(
179
- insert_id: Digest::MD5.base64digest(row.to_json),
180
- json: row
204
+ insert_id: SecureRandom.uuid,
205
+ json: Convert.to_json_row(row)
181
206
  )
182
207
  end
183
208
  insert_req = Google::Apis::BigqueryV2::InsertAllTableDataRequest.new(
@@ -186,7 +211,8 @@ module Google
186
211
  skip_invalid_rows: options[:skip_invalid]
187
212
  )
188
213
 
189
- execute do
214
+ # The insertAll with insertId operation is considered idempotent
215
+ execute backoff: true do
190
216
  service.insert_all_table_data(
191
217
  @project, dataset_id, table_id, insert_req)
192
218
  end
@@ -196,7 +222,8 @@ module Google
196
222
  # Lists all jobs in the specified project to which you have
197
223
  # been granted the READER job role.
198
224
  def list_jobs options = {}
199
- execute do
225
+ # The list operation is considered idempotent
226
+ execute backoff: true do
200
227
  service.list_jobs \
201
228
  @project, all_users: options[:all], max_results: options[:max],
202
229
  page_token: options[:token], projection: "full",
@@ -207,35 +234,37 @@ module Google
207
234
  ##
208
235
  # Cancel the job specified by jobId.
209
236
  def cancel_job job_id
210
- execute { service.cancel_job @project, job_id }
237
+ # The BigQuery team has told us cancelling is considered idempotent
238
+ execute(backoff: true) { service.cancel_job @project, job_id }
211
239
  end
212
240
 
213
241
  ##
214
242
  # Returns the job specified by jobID.
215
243
  def get_job job_id
216
- execute { service.get_job @project, job_id }
244
+ # The get operation is considered idempotent
245
+ execute(backoff: true) { service.get_job @project, job_id }
217
246
  end
218
247
 
219
248
  def insert_job config
220
249
  job_object = API::Job.new(
250
+ job_reference: job_ref_from(nil, nil),
221
251
  configuration: config
222
252
  )
223
- execute { service.insert_job @project, job_object }
253
+ # Jobs have generated id, so this operation is considered idempotent
254
+ execute(backoff: true) { service.insert_job @project, job_object }
224
255
  end
225
256
 
226
257
  def query_job query, options = {}
227
258
  config = query_table_config(query, options)
228
- execute { service.insert_job @project, config }
229
- end
230
-
231
- def query query, options = {}
232
- execute { service.query_job @project, query_config(query, options) }
259
+ # Jobs have generated id, so this operation is considered idempotent
260
+ execute(backoff: true) { service.insert_job @project, config }
233
261
  end
234
262
 
235
263
  ##
236
264
  # Returns the query data for the job
237
265
  def job_query_results job_id, options = {}
238
- execute do
266
+ # The get operation is considered idempotent
267
+ execute backoff: true do
239
268
  service.get_job_query_results @project,
240
269
  job_id,
241
270
  max_results: options.delete(:max),
@@ -246,21 +275,24 @@ module Google
246
275
  end
247
276
 
248
277
  def copy_table source, target, options = {}
249
- execute do
278
+ # Jobs have generated id, so this operation is considered idempotent
279
+ execute backoff: true do
250
280
  service.insert_job @project, copy_table_config(
251
281
  source, target, options)
252
282
  end
253
283
  end
254
284
 
255
285
  def extract_table table, storage_files, options = {}
256
- execute do
286
+ # Jobs have generated id, so this operation is considered idempotent
287
+ execute backoff: true do
257
288
  service.insert_job \
258
289
  @project, extract_table_config(table, storage_files, options)
259
290
  end
260
291
  end
261
292
 
262
293
  def load_table_gs_url dataset_id, table_id, url, options = {}
263
- execute do
294
+ # Jobs have generated id, so this operation is considered idempotent
295
+ execute backoff: true do
264
296
  service.insert_job \
265
297
  @project, load_table_url_config(dataset_id, table_id,
266
298
  url, options)
@@ -268,7 +300,8 @@ module Google
268
300
  end
269
301
 
270
302
  def load_table_file dataset_id, table_id, file, options = {}
271
- execute do
303
+ # Jobs have generated id, so this operation is considered idempotent
304
+ execute backoff: true do
272
305
  service.insert_job \
273
306
  @project, load_table_file_config(
274
307
  dataset_id, table_id, file, options),
@@ -299,7 +332,7 @@ module Google
299
332
  ##
300
333
  # Lists all projects to which you have been granted any project role.
301
334
  def list_projects options = {}
302
- execute do
335
+ execute backoff: true do
303
336
  service.list_projects max_results: options[:max],
304
337
  page_token: options[:token]
305
338
  end
@@ -335,6 +368,23 @@ module Google
335
368
  end
336
369
  end
337
370
 
371
+ # Generate a random string similar to the BigQuery service job IDs.
372
+ def generate_id
373
+ SecureRandom.urlsafe_base64(21)
374
+ end
375
+
376
+ # If no job_id or prefix is given, always generate a client-side job ID
377
+ # anyway, for idempotent retry in the google-api-client layer.
378
+ # See https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid
379
+ def job_ref_from job_id, prefix
380
+ prefix ||= "job_"
381
+ job_id ||= "#{prefix}#{generate_id}"
382
+ API::JobReference.new(
383
+ project_id: @project,
384
+ job_id: job_id
385
+ )
386
+ end
387
+
338
388
  def load_table_file_opts dataset_id, table_id, file, options = {}
339
389
  path = Pathname(file).to_path
340
390
  {
@@ -346,21 +396,26 @@ module Google
346
396
  projection_fields: projection_fields(options[:projection_fields]),
347
397
  allow_jagged_rows: options[:jagged_rows],
348
398
  allow_quoted_newlines: options[:quoted_newlines],
399
+ autodetect: options[:autodetect],
349
400
  encoding: options[:encoding], field_delimiter: options[:delimiter],
350
401
  ignore_unknown_values: options[:ignore_unknown],
351
- max_bad_records: options[:max_bad_records], quote: options[:quote],
402
+ max_bad_records: options[:max_bad_records],
403
+ null_marker: options[:null_marker], quote: options[:quote],
352
404
  schema: options[:schema], skip_leading_rows: options[:skip_leading]
353
405
  }.delete_if { |_, v| v.nil? }
354
406
  end
355
407
 
356
408
  def load_table_file_config dataset_id, table_id, file, options = {}
357
409
  load_opts = load_table_file_opts dataset_id, table_id, file, options
358
- API::Job.new(
410
+ req = API::Job.new(
411
+ job_reference: job_ref_from(options[:job_id], options[:prefix]),
359
412
  configuration: API::JobConfiguration.new(
360
413
  load: API::JobConfigurationLoad.new(load_opts),
361
414
  dry_run: options[:dryrun]
362
415
  )
363
416
  )
417
+ req.configuration.labels = options[:labels] if options[:labels]
418
+ req
364
419
  end
365
420
 
366
421
  def load_table_url_opts dataset_id, table_id, url, options = {}
@@ -374,21 +429,26 @@ module Google
374
429
  projection_fields: projection_fields(options[:projection_fields]),
375
430
  allow_jagged_rows: options[:jagged_rows],
376
431
  allow_quoted_newlines: options[:quoted_newlines],
432
+ autodetect: options[:autodetect],
377
433
  encoding: options[:encoding], field_delimiter: options[:delimiter],
378
434
  ignore_unknown_values: options[:ignore_unknown],
379
- max_bad_records: options[:max_bad_records], quote: options[:quote],
435
+ max_bad_records: options[:max_bad_records],
436
+ null_marker: options[:null_marker], quote: options[:quote],
380
437
  schema: options[:schema], skip_leading_rows: options[:skip_leading]
381
438
  }.delete_if { |_, v| v.nil? }
382
439
  end
383
440
 
384
441
  def load_table_url_config dataset_id, table_id, url, options = {}
385
442
  load_opts = load_table_url_opts dataset_id, table_id, url, options
386
- API::Job.new(
443
+ req = API::Job.new(
444
+ job_reference: job_ref_from(options[:job_id], options[:prefix]),
387
445
  configuration: API::JobConfiguration.new(
388
446
  load: API::JobConfigurationLoad.new(load_opts),
389
447
  dry_run: options[:dryrun]
390
448
  )
391
449
  )
450
+ req.configuration.labels = options[:labels] if options[:labels]
451
+ req
392
452
  end
393
453
 
394
454
  # rubocop:disable all
@@ -397,8 +457,9 @@ module Google
397
457
  # Job description for query job
398
458
  def query_table_config query, options
399
459
  dest_table = table_ref_from options[:table]
400
- default_dataset = dataset_ref_from options[:dataset]
460
+ dataset_config = dataset_ref_from options[:dataset], options[:project]
401
461
  req = API::Job.new(
462
+ job_reference: job_ref_from(options[:job_id], options[:prefix]),
402
463
  configuration: API::JobConfiguration.new(
403
464
  query: API::JobConfigurationQuery.new(
404
465
  query: query,
@@ -410,14 +471,16 @@ module Google
410
471
  write_disposition: write_disposition(options[:write]),
411
472
  allow_large_results: options[:large_results],
412
473
  flatten_results: options[:flatten],
413
- default_dataset: default_dataset,
474
+ default_dataset: dataset_config,
414
475
  use_legacy_sql: Convert.resolve_legacy_sql(
415
476
  options[:standard_sql], options[:legacy_sql]),
416
477
  maximum_billing_tier: options[:maximum_billing_tier],
417
- maximum_bytes_billed: options[:maximum_bytes_billed]
478
+ maximum_bytes_billed: options[:maximum_bytes_billed],
479
+ user_defined_function_resources: udfs(options[:udfs])
418
480
  )
419
481
  )
420
482
  )
483
+ req.configuration.labels = options[:labels] if options[:labels]
421
484
 
422
485
  if options[:params]
423
486
  if Array === options[:params]
@@ -439,6 +502,14 @@ module Google
439
502
  end
440
503
  end
441
504
 
505
+ if options[:external]
506
+ external_table_pairs = options[:external].map do |name, obj|
507
+ [String(name), obj.to_gapi]
508
+ end
509
+ external_table_hash = Hash[external_table_pairs]
510
+ req.configuration.query.table_definitions = external_table_hash
511
+ end
512
+
442
513
  req
443
514
  end
444
515
 
@@ -484,7 +555,8 @@ module Google
484
555
  ##
485
556
  # Job description for copy job
486
557
  def copy_table_config source, target, options = {}
487
- API::Job.new(
558
+ req = API::Job.new(
559
+ job_reference: job_ref_from(options[:job_id], options[:prefix]),
488
560
  configuration: API::JobConfiguration.new(
489
561
  copy: API::JobConfigurationTableCopy.new(
490
562
  source_table: source,
@@ -495,6 +567,8 @@ module Google
495
567
  dry_run: options[:dryrun]
496
568
  )
497
569
  )
570
+ req.configuration.labels = options[:labels] if options[:labels]
571
+ req
498
572
  end
499
573
 
500
574
  def extract_table_config table, storage_files, options = {}
@@ -502,7 +576,8 @@ module Google
502
576
  url.respond_to?(:to_gs_url) ? url.to_gs_url : url
503
577
  end
504
578
  dest_format = source_format storage_urls.first, options[:format]
505
- API::Job.new(
579
+ req = API::Job.new(
580
+ job_reference: job_ref_from(options[:job_id], options[:prefix]),
506
581
  configuration: API::JobConfiguration.new(
507
582
  extract: API::JobConfigurationExtract.new(
508
583
  destination_uris: Array(storage_urls),
@@ -515,6 +590,8 @@ module Google
515
590
  dry_run: options[:dryrun]
516
591
  )
517
592
  )
593
+ req.configuration.labels = options[:labels] if options[:labels]
594
+ req
518
595
  end
519
596
 
520
597
  def create_disposition str
@@ -550,6 +627,7 @@ module Google
550
627
  "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
551
628
  "avro" => "AVRO",
552
629
  "datastore" => "DATASTORE_BACKUP",
630
+ "backup" => "DATASTORE_BACKUP",
553
631
  "datastore_backup" => "DATASTORE_BACKUP"
554
632
  }[format.to_s.downcase]
555
633
  return val unless val.nil?
@@ -573,11 +651,86 @@ module Google
573
651
  nil
574
652
  end
575
653
 
576
- def execute
577
- yield
654
+ def udfs array_or_str
655
+ Array(array_or_str).map do |uri_or_code|
656
+ resource = API::UserDefinedFunctionResource.new
657
+ if uri_or_code.start_with?("gs://")
658
+ resource.resource_uri = uri_or_code
659
+ else
660
+ resource.inline_code = uri_or_code
661
+ end
662
+ resource
663
+ end
664
+ end
665
+
666
+ def execute backoff: nil
667
+ if backoff
668
+ Backoff.new(retries: retries).execute { yield }
669
+ else
670
+ yield
671
+ end
578
672
  rescue Google::Apis::Error => e
579
673
  raise Google::Cloud::Error.from_error(e)
580
674
  end
675
+
676
+ class Backoff
677
+ class << self
678
+ attr_accessor :retries
679
+ attr_accessor :reasons
680
+ attr_accessor :backoff
681
+ end
682
+ self.retries = 5
683
+ self.reasons = %w(rateLimitExceeded backendError)
684
+ self.backoff = lambda do |retries|
685
+ # Max delay is 32 seconds
686
+ # See "Back-off Requirements" here:
687
+ # https://cloud.google.com/bigquery/sla
688
+ retries = 5 if retries > 5
689
+ delay = 2 ** retries
690
+ sleep delay
691
+ end
692
+
693
+ def initialize options = {}
694
+ @retries = (options[:retries] || Backoff.retries).to_i
695
+ @reasons = (options[:reasons] || Backoff.reasons).to_a
696
+ @backoff = options[:backoff] || Backoff.backoff
697
+ end
698
+
699
+ def execute
700
+ current_retries = 0
701
+ loop do
702
+ begin
703
+ return yield
704
+ rescue Google::Apis::Error => e
705
+ raise e unless retry? e.body, current_retries
706
+
707
+ @backoff.call current_retries
708
+ current_retries += 1
709
+ end
710
+ end
711
+ end
712
+
713
+ protected
714
+
715
+ def retry? result, current_retries #:nodoc:
716
+ if current_retries < @retries
717
+ return true if retry_error_reason? result
718
+ end
719
+ false
720
+ end
721
+
722
+ def retry_error_reason? err_body
723
+ err_hash = JSON.parse err_body
724
+ json_errors = Array err_hash["error"]["errors"]
725
+ return false if json_errors.empty?
726
+ json_errors.each do |json_error|
727
+ return false unless @reasons.include? json_error["reason"]
728
+ end
729
+ true
730
+ rescue
731
+ false
732
+ end
733
+ end
581
734
  end
582
735
  end
583
736
  end