google-cloud-bigquery 0.28.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,6 +34,7 @@ module Google
34
34
  #
35
35
  # field = table.schema.field "name"
36
36
  # field.required? #=> true
37
+ #
37
38
  class Field
38
39
  # @private
39
40
  MODES = %w( NULLABLE REQUIRED REPEATED )
@@ -45,6 +46,11 @@ module Google
45
46
  ##
46
47
  # The name of the field.
47
48
  #
49
+ # @return [String] The field name. The name must contain only
50
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
51
+ # start with a letter or underscore. The maximum length is 128
52
+ # characters.
53
+ #
48
54
  def name
49
55
  @gapi.name
50
56
  end
@@ -52,19 +58,38 @@ module Google
52
58
  ##
53
59
  # Updates the name of the field.
54
60
  #
61
+ # @param [String] new_name The field name. The name must contain only
62
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
63
+ # start with a letter or underscore. The maximum length is 128
64
+ # characters.
65
+ #
55
66
  def name= new_name
56
67
  @gapi.update! name: String(new_name)
57
68
  end
58
69
 
59
70
  ##
60
- # The type of the field.
71
+ # The data type of the field.
72
+ #
73
+ # @return [String] The field data type. Possible values include
74
+ # `STRING`, `BYTES`, `INTEGER`, `INT64` (same as `INTEGER`),
75
+ # `FLOAT`, `FLOAT64` (same as `FLOAT`), `BOOLEAN`, `BOOL` (same as
76
+ # `BOOLEAN`), `TIMESTAMP`, `DATE`, `TIME`, `DATETIME`, `RECORD`
77
+ # (where `RECORD` indicates that the field contains a nested schema)
78
+ # or `STRUCT` (same as `RECORD`).
61
79
  #
62
80
  def type
63
81
  @gapi.type
64
82
  end
65
83
 
66
84
  ##
67
- # Updates the type of the field.
85
+ # Updates the data type of the field.
86
+ #
87
+ # @param [String] new_type The data type. Possible values include
88
+ # `STRING`, `BYTES`, `INTEGER`, `INT64` (same as `INTEGER`),
89
+ # `FLOAT`, `FLOAT64` (same as `FLOAT`), `BOOLEAN`, `BOOL` (same as
90
+ # `BOOLEAN`), `TIMESTAMP`, `DATE`, `TIME`, `DATETIME`, `RECORD`
91
+ # (where `RECORD` indicates that the field contains a nested schema)
92
+ # or `STRUCT` (same as `RECORD`).
68
93
  #
69
94
  def type= new_type
70
95
  @gapi.update! type: verify_type(new_type)
@@ -72,18 +97,27 @@ module Google
72
97
 
73
98
  ##
74
99
  # Checks if the type of the field is `NULLABLE`.
100
+ #
101
+ # @return [Boolean] `true` when `NULLABLE`, `false` otherwise.
102
+ #
75
103
  def nullable?
76
104
  mode == "NULLABLE"
77
105
  end
78
106
 
79
107
  ##
80
108
  # Checks if the type of the field is `REQUIRED`.
109
+ #
110
+ # @return [Boolean] `true` when `REQUIRED`, `false` otherwise.
111
+ #
81
112
  def required?
82
113
  mode == "REQUIRED"
83
114
  end
84
115
 
85
116
  ##
86
117
  # Checks if the type of the field is `REPEATED`.
118
+ #
119
+ # @return [Boolean] `true` when `REPEATED`, `false` otherwise.
120
+ #
87
121
  def repeated?
88
122
  mode == "REPEATED"
89
123
  end
@@ -91,6 +125,9 @@ module Google
91
125
  ##
92
126
  # The description of the field.
93
127
  #
128
+ # @return [String] The field description. The maximum length is 1,024
129
+ # characters.
130
+ #
94
131
  def description
95
132
  @gapi.description
96
133
  end
@@ -98,6 +135,9 @@ module Google
98
135
  ##
99
136
  # Updates the description of the field.
100
137
  #
138
+ # @param [String] new_description The field description. The maximum
139
+ # length is 1,024 characters.
140
+ #
101
141
  def description= new_description
102
142
  @gapi.update! description: new_description
103
143
  end
@@ -105,6 +145,9 @@ module Google
105
145
  ##
106
146
  # The mode of the field.
107
147
  #
148
+ # @return [String] The field mode. Possible values include `NULLABLE`,
149
+ # `REQUIRED` and `REPEATED`. The default value is `NULLABLE`.
150
+ #
108
151
  def mode
109
152
  @gapi.mode
110
153
  end
@@ -112,66 +155,100 @@ module Google
112
155
  ##
113
156
  # Updates the mode of the field.
114
157
  #
158
+ # @param [String] new_mode The field mode. Possible values include
159
+ # `NULLABLE`, `REQUIRED` and `REPEATED`. The default value is
160
+ # `NULLABLE`.
161
+ #
115
162
  def mode= new_mode
116
163
  @gapi.update! mode: verify_mode(new_mode)
117
164
  end
118
165
 
119
166
  ##
120
167
  # Checks if the mode of the field is `STRING`.
168
+ #
169
+ # @return [Boolean] `true` when `STRING`, `false` otherwise.
170
+ #
121
171
  def string?
122
172
  mode == "STRING"
123
173
  end
124
174
 
125
175
  ##
126
176
  # Checks if the mode of the field is `INTEGER`.
177
+ #
178
+ # @return [Boolean] `true` when `INTEGER`, `false` otherwise.
179
+ #
127
180
  def integer?
128
181
  mode == "INTEGER"
129
182
  end
130
183
 
131
184
  ##
132
185
  # Checks if the mode of the field is `FLOAT`.
186
+ #
187
+ # @return [Boolean] `true` when `FLOAT`, `false` otherwise.
188
+ #
133
189
  def float?
134
190
  mode == "FLOAT"
135
191
  end
136
192
 
137
193
  ##
138
194
  # Checks if the mode of the field is `BOOLEAN`.
195
+ #
196
+ # @return [Boolean] `true` when `BOOLEAN`, `false` otherwise.
197
+ #
139
198
  def boolean?
140
199
  mode == "BOOLEAN"
141
200
  end
142
201
 
143
202
  ##
144
203
  # Checks if the mode of the field is `BYTES`.
204
+ #
205
+ # @return [Boolean] `true` when `BYTES`, `false` otherwise.
206
+ #
145
207
  def bytes?
146
208
  mode == "BYTES"
147
209
  end
148
210
 
149
211
  ##
150
212
  # Checks if the mode of the field is `TIMESTAMP`.
213
+ #
214
+ # @return [Boolean] `true` when `TIMESTAMP`, `false` otherwise.
215
+ #
151
216
  def timestamp?
152
217
  mode == "TIMESTAMP"
153
218
  end
154
219
 
155
220
  ##
156
221
  # Checks if the mode of the field is `TIME`.
222
+ #
223
+ # @return [Boolean] `true` when `TIME`, `false` otherwise.
224
+ #
157
225
  def time?
158
226
  mode == "TIME"
159
227
  end
160
228
 
161
229
  ##
162
230
  # Checks if the mode of the field is `DATETIME`.
231
+ #
232
+ # @return [Boolean] `true` when `DATETIME`, `false` otherwise.
233
+ #
163
234
  def datetime?
164
235
  mode == "DATETIME"
165
236
  end
166
237
 
167
238
  ##
168
239
  # Checks if the mode of the field is `DATE`.
240
+ #
241
+ # @return [Boolean] `true` when `DATE`, `false` otherwise.
242
+ #
169
243
  def date?
170
244
  mode == "DATE"
171
245
  end
172
246
 
173
247
  ##
174
248
  # Checks if the mode of the field is `RECORD`.
249
+ #
250
+ # @return [Boolean] `true` when `RECORD`, `false` otherwise.
251
+ #
175
252
  def record?
176
253
  mode == "RECORD"
177
254
  end
@@ -179,6 +256,10 @@ module Google
179
256
  ##
180
257
  # The nested fields if the type property is set to `RECORD`. Will be
181
258
  # empty otherwise.
259
+ #
260
+ # @return [Array<Field>, nil] The nested schema fields if the type
261
+ # is set to `RECORD`.
262
+ #
182
263
  def fields
183
264
  if frozen?
184
265
  Array(@gapi.fields).map { |f| Field.from_gapi(f).freeze }.freeze
@@ -190,13 +271,20 @@ module Google
190
271
  ##
191
272
  # The names of the nested fields as symbols if the type property is
192
273
  # set to `RECORD`. Will be empty otherwise.
274
+ #
275
+ # @return [Array<Symbol>, nil] The names of the nested schema fields
276
+ # if the type is set to `RECORD`.
277
+ #
193
278
  def headers
194
279
  fields.map(&:name).map(&:to_sym)
195
280
  end
196
281
 
197
282
  ##
198
- # Retreive a nested fields by name, if the type property is
283
+ # Retrieve a nested field by name, if the type property is
199
284
  # set to `RECORD`. Will return `nil` otherwise.
285
+ #
286
+ # @return [Field, nil] The nested schema field object, or `nil`.
287
+ #
200
288
  def field name
201
289
  f = fields.find { |fld| fld.name == name.to_s }
202
290
  return nil if f.nil?
@@ -205,7 +293,7 @@ module Google
205
293
  end
206
294
 
207
295
  ##
208
- # Adds a string field to the schema.
296
+ # Adds a string field to the nested schema of a record field.
209
297
  #
210
298
  # This can only be called on fields that are of type `RECORD`.
211
299
  #
@@ -217,6 +305,7 @@ module Google
217
305
  # @param [Symbol] mode The field's mode. The possible values are
218
306
  # `:nullable`, `:required`, and `:repeated`. The default value is
219
307
  # `:nullable`.
308
+ #
220
309
  def string name, description: nil, mode: :nullable
221
310
  record_check!
222
311
 
@@ -224,7 +313,7 @@ module Google
224
313
  end
225
314
 
226
315
  ##
227
- # Adds an integer field to the schema.
316
+ # Adds an integer field to the nested schema of a record field.
228
317
  #
229
318
  # This can only be called on fields that are of type `RECORD`.
230
319
  #
@@ -236,6 +325,7 @@ module Google
236
325
  # @param [Symbol] mode The field's mode. The possible values are
237
326
  # `:nullable`, `:required`, and `:repeated`. The default value is
238
327
  # `:nullable`.
328
+ #
239
329
  def integer name, description: nil, mode: :nullable
240
330
  record_check!
241
331
 
@@ -243,7 +333,8 @@ module Google
243
333
  end
244
334
 
245
335
  ##
246
- # Adds a floating-point number field to the schema.
336
+ # Adds a floating-point number field to the nested schema of a record
337
+ # field.
247
338
  #
248
339
  # This can only be called on fields that are of type `RECORD`.
249
340
  #
@@ -255,6 +346,7 @@ module Google
255
346
  # @param [Symbol] mode The field's mode. The possible values are
256
347
  # `:nullable`, `:required`, and `:repeated`. The default value is
257
348
  # `:nullable`.
349
+ #
258
350
  def float name, description: nil, mode: :nullable
259
351
  record_check!
260
352
 
@@ -262,7 +354,7 @@ module Google
262
354
  end
263
355
 
264
356
  ##
265
- # Adds a boolean field to the schema.
357
+ # Adds a boolean field to the nested schema of a record field.
266
358
  #
267
359
  # This can only be called on fields that are of type `RECORD`.
268
360
  #
@@ -274,6 +366,7 @@ module Google
274
366
  # @param [Symbol] mode The field's mode. The possible values are
275
367
  # `:nullable`, `:required`, and `:repeated`. The default value is
276
368
  # `:nullable`.
369
+ #
277
370
  def boolean name, description: nil, mode: :nullable
278
371
  record_check!
279
372
 
@@ -281,7 +374,7 @@ module Google
281
374
  end
282
375
 
283
376
  ##
284
- # Adds a bytes field to the schema.
377
+ # Adds a bytes field to the nested schema of a record field.
285
378
  #
286
379
  # This can only be called on fields that are of type `RECORD`.
287
380
  #
@@ -293,6 +386,7 @@ module Google
293
386
  # @param [Symbol] mode The field's mode. The possible values are
294
387
  # `:nullable`, `:required`, and `:repeated`. The default value is
295
388
  # `:nullable`.
389
+ #
296
390
  def bytes name, description: nil, mode: :nullable
297
391
  record_check!
298
392
 
@@ -300,7 +394,7 @@ module Google
300
394
  end
301
395
 
302
396
  ##
303
- # Adds a timestamp field to the schema.
397
+ # Adds a timestamp field to the nested schema of a record field.
304
398
  #
305
399
  # This can only be called on fields that are of type `RECORD`.
306
400
  #
@@ -312,6 +406,7 @@ module Google
312
406
  # @param [Symbol] mode The field's mode. The possible values are
313
407
  # `:nullable`, `:required`, and `:repeated`. The default value is
314
408
  # `:nullable`.
409
+ #
315
410
  def timestamp name, description: nil, mode: :nullable
316
411
  record_check!
317
412
 
@@ -319,7 +414,7 @@ module Google
319
414
  end
320
415
 
321
416
  ##
322
- # Adds a time field to the schema.
417
+ # Adds a time field to the nested schema of a record field.
323
418
  #
324
419
  # This can only be called on fields that are of type `RECORD`.
325
420
  #
@@ -331,6 +426,7 @@ module Google
331
426
  # @param [Symbol] mode The field's mode. The possible values are
332
427
  # `:nullable`, `:required`, and `:repeated`. The default value is
333
428
  # `:nullable`.
429
+ #
334
430
  def time name, description: nil, mode: :nullable
335
431
  record_check!
336
432
 
@@ -338,7 +434,7 @@ module Google
338
434
  end
339
435
 
340
436
  ##
341
- # Adds a datetime field to the schema.
437
+ # Adds a datetime field to the nested schema of a record field.
342
438
  #
343
439
  # This can only be called on fields that are of type `RECORD`.
344
440
  #
@@ -350,6 +446,7 @@ module Google
350
446
  # @param [Symbol] mode The field's mode. The possible values are
351
447
  # `:nullable`, `:required`, and `:repeated`. The default value is
352
448
  # `:nullable`.
449
+ #
353
450
  def datetime name, description: nil, mode: :nullable
354
451
  record_check!
355
452
 
@@ -357,7 +454,7 @@ module Google
357
454
  end
358
455
 
359
456
  ##
360
- # Adds a date field to the schema.
457
+ # Adds a date field to the nested schema of a record field.
361
458
  #
362
459
  # This can only be called on fields that are of type `RECORD`.
363
460
  #
@@ -369,6 +466,7 @@ module Google
369
466
  # @param [Symbol] mode The field's mode. The possible values are
370
467
  # `:nullable`, `:required`, and `:repeated`. The default value is
371
468
  # `:nullable`.
469
+ #
372
470
  def date name, description: nil, mode: :nullable
373
471
  record_check!
374
472
 
@@ -376,10 +474,10 @@ module Google
376
474
  end
377
475
 
378
476
  ##
379
- # Adds a record field to the schema. A block must be passed describing
380
- # the nested fields of the record. For more information about nested
381
- # and repeated records, see [Preparing Data for BigQuery
382
- # ](https://cloud.google.com/bigquery/preparing-data-for-bigquery).
477
+ # Adds a record field to the nested schema of a record field. A block
478
+ # must be passed describing the nested fields of the record. For more
479
+ # information about nested and repeated records, see [Preparing Data
480
+ # for BigQuery](https://cloud.google.com/bigquery/preparing-data-for-bigquery).
383
481
  #
384
482
  # This can only be called on fields that are of type `RECORD`.
385
483
  #
@@ -405,7 +503,10 @@ module Google
405
503
  # table.schema do |schema|
406
504
  # schema.string "first_name", mode: :required
407
505
  # schema.record "cities_lived", mode: :repeated do |cities_lived|
408
- # cities_lived.string "place", mode: :required
506
+ # cities_lived.record "city", mode: :required do |city|
507
+ # city.string "name", mode: :required
508
+ # city.string "country", mode: :required
509
+ # end
409
510
  # cities_lived.integer "number_of_years", mode: :required
410
511
  # end
411
512
  # end
@@ -18,7 +18,7 @@ require "google/cloud/bigquery/convert"
18
18
  require "google/cloud/errors"
19
19
  require "google/apis/bigquery_v2"
20
20
  require "pathname"
21
- require "digest/md5"
21
+ require "securerandom"
22
22
  require "mime/types"
23
23
  require "date"
24
24
 
@@ -61,7 +61,7 @@ module Google
61
61
  service.client_options.open_timeout_sec = timeout
62
62
  service.client_options.read_timeout_sec = timeout
63
63
  service.client_options.send_timeout_sec = timeout
64
- service.request_options.retries = @retries || 3
64
+ service.request_options.retries = 0 # handle retries in #execute
65
65
  service.request_options.header ||= {}
66
66
  service.request_options.header["x-goog-api-client"] = \
67
67
  "gl-ruby/#{RUBY_VERSION} gccl/#{Google::Cloud::Bigquery::VERSION}"
@@ -75,17 +75,19 @@ module Google
75
75
  # Lists all datasets in the specified project to which you have
76
76
  # been granted the READER dataset role.
77
77
  def list_datasets options = {}
78
- execute do
78
+ # The list operation is considered idempotent
79
+ execute backoff: true do
79
80
  service.list_datasets \
80
- @project, all: options[:all], max_results: options[:max],
81
- page_token: options[:token]
81
+ @project, all: options[:all], filter: options[:filter],
82
+ max_results: options[:max], page_token: options[:token]
82
83
  end
83
84
  end
84
85
 
85
86
  ##
86
87
  # Returns the dataset specified by datasetID.
87
88
  def get_dataset dataset_id
88
- execute { service.get_dataset @project, dataset_id }
89
+ # The get operation is considered idempotent
90
+ execute(backoff: true) { service.get_dataset @project, dataset_id }
89
91
  end
90
92
 
91
93
  ##
@@ -98,8 +100,16 @@ module Google
98
100
  # Updates information in an existing dataset, only replacing
99
101
  # fields that are provided in the submitted dataset resource.
100
102
  def patch_dataset dataset_id, patched_dataset_gapi
101
- execute do
102
- service.patch_dataset @project, dataset_id, patched_dataset_gapi
103
+ patch_with_backoff = false
104
+ options = {}
105
+ if patched_dataset_gapi.etag
106
+ options[:header] = { "If-Match" => patched_dataset_gapi.etag }
107
+ # The patch with etag operation is considered idempotent
108
+ patch_with_backoff = true
109
+ end
110
+ execute backoff: patch_with_backoff do
111
+ service.patch_dataset @project, dataset_id, patched_dataset_gapi,
112
+ options: options
103
113
  end
104
114
  end
105
115
 
@@ -119,7 +129,8 @@ module Google
119
129
  # Lists all tables in the specified dataset.
120
130
  # Requires the READER dataset role.
121
131
  def list_tables dataset_id, options = {}
122
- execute do
132
+ # The list operation is considered idempotent
133
+ execute backoff: true do
123
134
  service.list_tables @project, dataset_id,
124
135
  max_results: options[:max],
125
136
  page_token: options[:token]
@@ -127,7 +138,10 @@ module Google
127
138
  end
128
139
 
129
140
  def get_project_table project_id, dataset_id, table_id
130
- execute { service.get_table project_id, dataset_id, table_id }
141
+ # The get operation is considered idempotent
142
+ execute backoff: true do
143
+ service.get_table project_id, dataset_id, table_id
144
+ end
131
145
  end
132
146
 
133
147
  ##
@@ -136,7 +150,10 @@ module Google
136
150
  # it only returns the table resource,
137
151
  # which describes the structure of this table.
138
152
  def get_table dataset_id, table_id
139
- execute { get_project_table @project, dataset_id, table_id }
153
+ # The get operation is considered idempotent
154
+ execute backoff: true do
155
+ get_project_table @project, dataset_id, table_id
156
+ end
140
157
  end
141
158
 
142
159
  ##
@@ -149,9 +166,16 @@ module Google
149
166
  # Updates information in an existing table, replacing fields that
150
167
  # are provided in the submitted table resource.
151
168
  def patch_table dataset_id, table_id, patched_table_gapi
152
- execute do
169
+ patch_with_backoff = false
170
+ options = {}
171
+ if patched_table_gapi.etag
172
+ options[:header] = { "If-Match" => patched_table_gapi.etag }
173
+ # The patch with etag operation is considered idempotent
174
+ patch_with_backoff = true
175
+ end
176
+ execute backoff: patch_with_backoff do
153
177
  service.patch_table @project, dataset_id, table_id,
154
- patched_table_gapi
178
+ patched_table_gapi, options: options
155
179
  end
156
180
  end
157
181
 
@@ -165,7 +189,8 @@ module Google
165
189
  ##
166
190
  # Retrieves data from the table.
167
191
  def list_tabledata dataset_id, table_id, options = {}
168
- execute do
192
+ # The list operation is considered idempotent
193
+ execute backoff: true do
169
194
  service.list_table_data @project, dataset_id, table_id,
170
195
  max_results: options.delete(:max),
171
196
  page_token: options.delete(:token),
@@ -176,8 +201,8 @@ module Google
176
201
  def insert_tabledata dataset_id, table_id, rows, options = {}
177
202
  insert_rows = Array(rows).map do |row|
178
203
  Google::Apis::BigqueryV2::InsertAllTableDataRequest::Row.new(
179
- insert_id: Digest::MD5.base64digest(row.to_json),
180
- json: row
204
+ insert_id: SecureRandom.uuid,
205
+ json: Convert.to_json_row(row)
181
206
  )
182
207
  end
183
208
  insert_req = Google::Apis::BigqueryV2::InsertAllTableDataRequest.new(
@@ -186,7 +211,8 @@ module Google
186
211
  skip_invalid_rows: options[:skip_invalid]
187
212
  )
188
213
 
189
- execute do
214
+ # The insertAll with insertId operation is considered idempotent
215
+ execute backoff: true do
190
216
  service.insert_all_table_data(
191
217
  @project, dataset_id, table_id, insert_req)
192
218
  end
@@ -196,7 +222,8 @@ module Google
196
222
  # Lists all jobs in the specified project to which you have
197
223
  # been granted the READER job role.
198
224
  def list_jobs options = {}
199
- execute do
225
+ # The list operation is considered idempotent
226
+ execute backoff: true do
200
227
  service.list_jobs \
201
228
  @project, all_users: options[:all], max_results: options[:max],
202
229
  page_token: options[:token], projection: "full",
@@ -207,35 +234,37 @@ module Google
207
234
  ##
208
235
  # Cancel the job specified by jobId.
209
236
  def cancel_job job_id
210
- execute { service.cancel_job @project, job_id }
237
+ # The BigQuery team has told us cancelling is considered idempotent
238
+ execute(backoff: true) { service.cancel_job @project, job_id }
211
239
  end
212
240
 
213
241
  ##
214
242
  # Returns the job specified by jobID.
215
243
  def get_job job_id
216
- execute { service.get_job @project, job_id }
244
+ # The get operation is considered idempotent
245
+ execute(backoff: true) { service.get_job @project, job_id }
217
246
  end
218
247
 
219
248
  def insert_job config
220
249
  job_object = API::Job.new(
250
+ job_reference: job_ref_from(nil, nil),
221
251
  configuration: config
222
252
  )
223
- execute { service.insert_job @project, job_object }
253
+ # Jobs have generated id, so this operation is considered idempotent
254
+ execute(backoff: true) { service.insert_job @project, job_object }
224
255
  end
225
256
 
226
257
  def query_job query, options = {}
227
258
  config = query_table_config(query, options)
228
- execute { service.insert_job @project, config }
229
- end
230
-
231
- def query query, options = {}
232
- execute { service.query_job @project, query_config(query, options) }
259
+ # Jobs have generated id, so this operation is considered idempotent
260
+ execute(backoff: true) { service.insert_job @project, config }
233
261
  end
234
262
 
235
263
  ##
236
264
  # Returns the query data for the job
237
265
  def job_query_results job_id, options = {}
238
- execute do
266
+ # The get operation is considered idempotent
267
+ execute backoff: true do
239
268
  service.get_job_query_results @project,
240
269
  job_id,
241
270
  max_results: options.delete(:max),
@@ -246,21 +275,24 @@ module Google
246
275
  end
247
276
 
248
277
  def copy_table source, target, options = {}
249
- execute do
278
+ # Jobs have generated id, so this operation is considered idempotent
279
+ execute backoff: true do
250
280
  service.insert_job @project, copy_table_config(
251
281
  source, target, options)
252
282
  end
253
283
  end
254
284
 
255
285
  def extract_table table, storage_files, options = {}
256
- execute do
286
+ # Jobs have generated id, so this operation is considered idempotent
287
+ execute backoff: true do
257
288
  service.insert_job \
258
289
  @project, extract_table_config(table, storage_files, options)
259
290
  end
260
291
  end
261
292
 
262
293
  def load_table_gs_url dataset_id, table_id, url, options = {}
263
- execute do
294
+ # Jobs have generated id, so this operation is considered idempotent
295
+ execute backoff: true do
264
296
  service.insert_job \
265
297
  @project, load_table_url_config(dataset_id, table_id,
266
298
  url, options)
@@ -268,7 +300,8 @@ module Google
268
300
  end
269
301
 
270
302
  def load_table_file dataset_id, table_id, file, options = {}
271
- execute do
303
+ # Jobs have generated id, so this operation is considered idempotent
304
+ execute backoff: true do
272
305
  service.insert_job \
273
306
  @project, load_table_file_config(
274
307
  dataset_id, table_id, file, options),
@@ -299,7 +332,7 @@ module Google
299
332
  ##
300
333
  # Lists all projects to which you have been granted any project role.
301
334
  def list_projects options = {}
302
- execute do
335
+ execute backoff: true do
303
336
  service.list_projects max_results: options[:max],
304
337
  page_token: options[:token]
305
338
  end
@@ -335,6 +368,23 @@ module Google
335
368
  end
336
369
  end
337
370
 
371
+ # Generate a random string similar to the BigQuery service job IDs.
372
+ def generate_id
373
+ SecureRandom.urlsafe_base64(21)
374
+ end
375
+
376
+ # If no job_id or prefix is given, always generate a client-side job ID
377
+ # anyway, for idempotent retry in the google-api-client layer.
378
+ # See https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid
379
+ def job_ref_from job_id, prefix
380
+ prefix ||= "job_"
381
+ job_id ||= "#{prefix}#{generate_id}"
382
+ API::JobReference.new(
383
+ project_id: @project,
384
+ job_id: job_id
385
+ )
386
+ end
387
+
338
388
  def load_table_file_opts dataset_id, table_id, file, options = {}
339
389
  path = Pathname(file).to_path
340
390
  {
@@ -346,21 +396,26 @@ module Google
346
396
  projection_fields: projection_fields(options[:projection_fields]),
347
397
  allow_jagged_rows: options[:jagged_rows],
348
398
  allow_quoted_newlines: options[:quoted_newlines],
399
+ autodetect: options[:autodetect],
349
400
  encoding: options[:encoding], field_delimiter: options[:delimiter],
350
401
  ignore_unknown_values: options[:ignore_unknown],
351
- max_bad_records: options[:max_bad_records], quote: options[:quote],
402
+ max_bad_records: options[:max_bad_records],
403
+ null_marker: options[:null_marker], quote: options[:quote],
352
404
  schema: options[:schema], skip_leading_rows: options[:skip_leading]
353
405
  }.delete_if { |_, v| v.nil? }
354
406
  end
355
407
 
356
408
  def load_table_file_config dataset_id, table_id, file, options = {}
357
409
  load_opts = load_table_file_opts dataset_id, table_id, file, options
358
- API::Job.new(
410
+ req = API::Job.new(
411
+ job_reference: job_ref_from(options[:job_id], options[:prefix]),
359
412
  configuration: API::JobConfiguration.new(
360
413
  load: API::JobConfigurationLoad.new(load_opts),
361
414
  dry_run: options[:dryrun]
362
415
  )
363
416
  )
417
+ req.configuration.labels = options[:labels] if options[:labels]
418
+ req
364
419
  end
365
420
 
366
421
  def load_table_url_opts dataset_id, table_id, url, options = {}
@@ -374,21 +429,26 @@ module Google
374
429
  projection_fields: projection_fields(options[:projection_fields]),
375
430
  allow_jagged_rows: options[:jagged_rows],
376
431
  allow_quoted_newlines: options[:quoted_newlines],
432
+ autodetect: options[:autodetect],
377
433
  encoding: options[:encoding], field_delimiter: options[:delimiter],
378
434
  ignore_unknown_values: options[:ignore_unknown],
379
- max_bad_records: options[:max_bad_records], quote: options[:quote],
435
+ max_bad_records: options[:max_bad_records],
436
+ null_marker: options[:null_marker], quote: options[:quote],
380
437
  schema: options[:schema], skip_leading_rows: options[:skip_leading]
381
438
  }.delete_if { |_, v| v.nil? }
382
439
  end
383
440
 
384
441
  def load_table_url_config dataset_id, table_id, url, options = {}
385
442
  load_opts = load_table_url_opts dataset_id, table_id, url, options
386
- API::Job.new(
443
+ req = API::Job.new(
444
+ job_reference: job_ref_from(options[:job_id], options[:prefix]),
387
445
  configuration: API::JobConfiguration.new(
388
446
  load: API::JobConfigurationLoad.new(load_opts),
389
447
  dry_run: options[:dryrun]
390
448
  )
391
449
  )
450
+ req.configuration.labels = options[:labels] if options[:labels]
451
+ req
392
452
  end
393
453
 
394
454
  # rubocop:disable all
@@ -397,8 +457,9 @@ module Google
397
457
  # Job description for query job
398
458
  def query_table_config query, options
399
459
  dest_table = table_ref_from options[:table]
400
- default_dataset = dataset_ref_from options[:dataset]
460
+ dataset_config = dataset_ref_from options[:dataset], options[:project]
401
461
  req = API::Job.new(
462
+ job_reference: job_ref_from(options[:job_id], options[:prefix]),
402
463
  configuration: API::JobConfiguration.new(
403
464
  query: API::JobConfigurationQuery.new(
404
465
  query: query,
@@ -410,14 +471,16 @@ module Google
410
471
  write_disposition: write_disposition(options[:write]),
411
472
  allow_large_results: options[:large_results],
412
473
  flatten_results: options[:flatten],
413
- default_dataset: default_dataset,
474
+ default_dataset: dataset_config,
414
475
  use_legacy_sql: Convert.resolve_legacy_sql(
415
476
  options[:standard_sql], options[:legacy_sql]),
416
477
  maximum_billing_tier: options[:maximum_billing_tier],
417
- maximum_bytes_billed: options[:maximum_bytes_billed]
478
+ maximum_bytes_billed: options[:maximum_bytes_billed],
479
+ user_defined_function_resources: udfs(options[:udfs])
418
480
  )
419
481
  )
420
482
  )
483
+ req.configuration.labels = options[:labels] if options[:labels]
421
484
 
422
485
  if options[:params]
423
486
  if Array === options[:params]
@@ -439,6 +502,14 @@ module Google
439
502
  end
440
503
  end
441
504
 
505
+ if options[:external]
506
+ external_table_pairs = options[:external].map do |name, obj|
507
+ [String(name), obj.to_gapi]
508
+ end
509
+ external_table_hash = Hash[external_table_pairs]
510
+ req.configuration.query.table_definitions = external_table_hash
511
+ end
512
+
442
513
  req
443
514
  end
444
515
 
@@ -484,7 +555,8 @@ module Google
484
555
  ##
485
556
  # Job description for copy job
486
557
  def copy_table_config source, target, options = {}
487
- API::Job.new(
558
+ req = API::Job.new(
559
+ job_reference: job_ref_from(options[:job_id], options[:prefix]),
488
560
  configuration: API::JobConfiguration.new(
489
561
  copy: API::JobConfigurationTableCopy.new(
490
562
  source_table: source,
@@ -495,6 +567,8 @@ module Google
495
567
  dry_run: options[:dryrun]
496
568
  )
497
569
  )
570
+ req.configuration.labels = options[:labels] if options[:labels]
571
+ req
498
572
  end
499
573
 
500
574
  def extract_table_config table, storage_files, options = {}
@@ -502,7 +576,8 @@ module Google
502
576
  url.respond_to?(:to_gs_url) ? url.to_gs_url : url
503
577
  end
504
578
  dest_format = source_format storage_urls.first, options[:format]
505
- API::Job.new(
579
+ req = API::Job.new(
580
+ job_reference: job_ref_from(options[:job_id], options[:prefix]),
506
581
  configuration: API::JobConfiguration.new(
507
582
  extract: API::JobConfigurationExtract.new(
508
583
  destination_uris: Array(storage_urls),
@@ -515,6 +590,8 @@ module Google
515
590
  dry_run: options[:dryrun]
516
591
  )
517
592
  )
593
+ req.configuration.labels = options[:labels] if options[:labels]
594
+ req
518
595
  end
519
596
 
520
597
  def create_disposition str
@@ -550,6 +627,7 @@ module Google
550
627
  "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
551
628
  "avro" => "AVRO",
552
629
  "datastore" => "DATASTORE_BACKUP",
630
+ "backup" => "DATASTORE_BACKUP",
553
631
  "datastore_backup" => "DATASTORE_BACKUP"
554
632
  }[format.to_s.downcase]
555
633
  return val unless val.nil?
@@ -573,11 +651,86 @@ module Google
573
651
  nil
574
652
  end
575
653
 
576
- def execute
577
- yield
654
+ def udfs array_or_str
655
+ Array(array_or_str).map do |uri_or_code|
656
+ resource = API::UserDefinedFunctionResource.new
657
+ if uri_or_code.start_with?("gs://")
658
+ resource.resource_uri = uri_or_code
659
+ else
660
+ resource.inline_code = uri_or_code
661
+ end
662
+ resource
663
+ end
664
+ end
665
+
666
+ def execute backoff: nil
667
+ if backoff
668
+ Backoff.new(retries: retries).execute { yield }
669
+ else
670
+ yield
671
+ end
578
672
  rescue Google::Apis::Error => e
579
673
  raise Google::Cloud::Error.from_error(e)
580
674
  end
675
+
676
+ class Backoff
677
+ class << self
678
+ attr_accessor :retries
679
+ attr_accessor :reasons
680
+ attr_accessor :backoff
681
+ end
682
+ self.retries = 5
683
+ self.reasons = %w(rateLimitExceeded backendError)
684
+ self.backoff = lambda do |retries|
685
+ # Max delay is 32 seconds
686
+ # See "Back-off Requirements" here:
687
+ # https://cloud.google.com/bigquery/sla
688
+ retries = 5 if retries > 5
689
+ delay = 2 ** retries
690
+ sleep delay
691
+ end
692
+
693
+ def initialize options = {}
694
+ @retries = (options[:retries] || Backoff.retries).to_i
695
+ @reasons = (options[:reasons] || Backoff.reasons).to_a
696
+ @backoff = options[:backoff] || Backoff.backoff
697
+ end
698
+
699
+ def execute
700
+ current_retries = 0
701
+ loop do
702
+ begin
703
+ return yield
704
+ rescue Google::Apis::Error => e
705
+ raise e unless retry? e.body, current_retries
706
+
707
+ @backoff.call current_retries
708
+ current_retries += 1
709
+ end
710
+ end
711
+ end
712
+
713
+ protected
714
+
715
+ def retry? result, current_retries #:nodoc:
716
+ if current_retries < @retries
717
+ return true if retry_error_reason? result
718
+ end
719
+ false
720
+ end
721
+
722
+ def retry_error_reason? err_body
723
+ err_hash = JSON.parse err_body
724
+ json_errors = Array err_hash["error"]["errors"]
725
+ return false if json_errors.empty?
726
+ json_errors.each do |json_error|
727
+ return false unless @reasons.include? json_error["reason"]
728
+ end
729
+ true
730
+ rescue
731
+ false
732
+ end
733
+ end
581
734
  end
582
735
  end
583
736
  end