google-cloud-bigquery 0.28.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,13 +21,25 @@ module Google
21
21
  #
22
22
  # A {Job} subclass representing an export operation that may be performed
23
23
  # on a {Table}. A ExtractJob instance is created when you call
24
- # {Table#extract}.
24
+ # {Table#extract_job}.
25
25
  #
26
- # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
26
+ # @see https://cloud.google.com/bigquery/docs/exporting-data
27
27
  # Exporting Data From BigQuery
28
28
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
29
29
  # reference
30
30
  #
31
+ # @example
32
+ # require "google/cloud/bigquery"
33
+ #
34
+ # bigquery = Google::Cloud::Bigquery.new
35
+ # dataset = bigquery.dataset "my_dataset"
36
+ # table = dataset.table "my_table"
37
+ #
38
+ # extract_job = table.extract_job "gs://my-bucket/file-name.json",
39
+ # format: "json"
40
+ # extract_job.wait_until_done!
41
+ # extract_job.done? #=> true
42
+ #
31
43
  class ExtractJob < Job
32
44
  ##
33
45
  # The URI or URIs representing the Google Cloud Storage files to which
@@ -38,7 +50,10 @@ module Google
38
50
 
39
51
  ##
40
52
  # The table from which the data is exported. This is the table upon
41
- # which {Table#extract} was called. Returns a {Table} instance.
53
+ # which {Table#extract_job} was called.
54
+ #
55
+ # @return [Table] A table instance.
56
+ #
42
57
  def source
43
58
  table = @gapi.configuration.extract.source_table
44
59
  return nil unless table
@@ -50,6 +65,9 @@ module Google
50
65
  ##
51
66
  # Checks if the export operation compresses the data using gzip. The
52
67
  # default is `false`.
68
+ #
69
+ # @return [Boolean] `true` when `GZIP`, `false` otherwise.
70
+ #
53
71
  def compression?
54
72
  val = @gapi.configuration.extract.compression
55
73
  val == "GZIP"
@@ -58,6 +76,10 @@ module Google
58
76
  ##
59
77
  # Checks if the destination format for the data is [newline-delimited
60
78
  # JSON](http://jsonlines.org/). The default is `false`.
79
+ #
80
+ # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
81
+ # otherwise.
82
+ #
61
83
  def json?
62
84
  val = @gapi.configuration.extract.destination_format
63
85
  val == "NEWLINE_DELIMITED_JSON"
@@ -67,6 +89,9 @@ module Google
67
89
  # Checks if the destination format for the data is CSV. Tables with
68
90
  # nested or repeated fields cannot be exported as CSV. The default is
69
91
  # `true`.
92
+ #
93
+ # @return [Boolean] `true` when `CSV`, `false` otherwise.
94
+ #
70
95
  def csv?
71
96
  val = @gapi.configuration.extract.destination_format
72
97
  return true if val.nil?
@@ -76,14 +101,20 @@ module Google
76
101
  ##
77
102
  # Checks if the destination format for the data is
78
103
  # [Avro](http://avro.apache.org/). The default is `false`.
104
+ #
105
+ # @return [Boolean] `true` when `AVRO`, `false` otherwise.
106
+ #
79
107
  def avro?
80
108
  val = @gapi.configuration.extract.destination_format
81
109
  val == "AVRO"
82
110
  end
83
111
 
84
112
  ##
85
- # The symbol the operation uses to delimit fields in the exported data.
86
- # The default is a comma (,).
113
+ # The character or symbol the operation uses to delimit fields in the
114
+ # exported data. The default is a comma (,).
115
+ #
116
+ # @return [String] A string containing the character, such as `","`.
117
+ #
87
118
  def delimiter
88
119
  val = @gapi.configuration.extract.field_delimiter
89
120
  val = "," if val.nil?
@@ -93,6 +124,10 @@ module Google
93
124
  ##
94
125
  # Checks if the exported data contains a header row. The default is
95
126
  # `true`.
127
+ #
128
+ # @return [Boolean] `true` when the print header configuration is
129
+ # present or `nil`, `false` otherwise.
130
+ #
96
131
  def print_header?
97
132
  val = @gapi.configuration.extract.print_header
98
133
  val = true if val.nil?
@@ -100,17 +135,23 @@ module Google
100
135
  end
101
136
 
102
137
  ##
103
- # The count of files per destination URI or URI pattern specified in
104
- # {#destinations}. Returns an Array of values in the same order as the
105
- # URI patterns.
138
+ # The number of files per destination URI or URI pattern specified in
139
+ # {#destinations}.
140
+ #
141
+ # @return [Array<Integer>] An array of values in the same order as the
142
+ # URI patterns.
143
+ #
106
144
  def destinations_file_counts
107
145
  Array @gapi.statistics.extract.destination_uri_file_counts
108
146
  end
109
147
 
110
148
  ##
111
- # The count of files per destination URI or URI pattern specified in
112
- # {#destinations}. Returns a Hash with the URI patterns as keys and the
113
- # counts as values.
149
+ # A hash containing the URI or URI pattern specified in
150
+ # {#destinations} mapped to the counts of files per destination.
151
+ #
152
+ # @return [Hash<String, Integer>] A Hash with the URI patterns as keys
153
+ # and the counts as values.
154
+ #
114
155
  def destinations_counts
115
156
  Hash[destinations.zip destinations_file_counts]
116
157
  end
@@ -20,6 +20,28 @@ module Google
20
20
  module Bigquery
21
21
  ##
22
22
  # InsertResponse
23
+ #
24
+ # Represents the response from BigQuery when data is inserted into a table
25
+ # for near-immediate querying, without the need to complete a load
26
+ # operation before the data can appear in query results. See
27
+ # {Dataset#insert} and {Table#insert}.
28
+ #
29
+ # @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
30
+ # Streaming Data Into BigQuery
31
+ #
32
+ # @example
33
+ # require "google/cloud/bigquery"
34
+ #
35
+ # bigquery = Google::Cloud::Bigquery.new
36
+ # dataset = bigquery.dataset "my_dataset"
37
+ #
38
+ # rows = [
39
+ # { "first_name" => "Alice", "age" => 21 },
40
+ # { "first_name" => "Bob", "age" => 22 }
41
+ # ]
42
+ #
43
+ # insert_response = dataset.insert "my_table", rows
44
+ #
23
45
  class InsertResponse
24
46
  # @private
25
47
  def initialize rows, gapi
@@ -27,18 +49,43 @@ module Google
27
49
  @gapi = gapi
28
50
  end
29
51
 
52
+ ##
53
+ # Checks if the error count is zero, meaning that all of the rows were
54
+ # inserted. Use {#insert_errors} to access the errors.
55
+ #
56
+ # @return [Boolean] `true` when the error count is zero, `false`
57
+ # otherwise.
58
+ #
30
59
  def success?
31
60
  error_count.zero?
32
61
  end
33
62
 
63
+
64
+ ##
65
+ # The count of rows in the response, minus the count of errors for rows
66
+ # that were not inserted.
67
+ #
68
+ # @return [Integer] The number of rows inserted.
69
+ #
34
70
  def insert_count
35
71
  @rows.count - error_count
36
72
  end
37
73
 
74
+
75
+ ##
76
+ # The count of errors for rows that were not inserted.
77
+ #
78
+ # @return [Integer] The number of errors.
79
+ #
38
80
  def error_count
39
81
  Array(@gapi.insert_errors).count
40
82
  end
41
83
 
84
+ ##
85
+ # The error objects for rows that were not inserted.
86
+ #
87
+ # @return [Array<InsertError>] An array containing error objects.
88
+ #
42
89
  def insert_errors
43
90
  Array(@gapi.insert_errors).map do |ie|
44
91
  row = @rows[ie.index]
@@ -47,23 +94,54 @@ module Google
47
94
  end
48
95
  end
49
96
 
97
+ ##
98
+ # The rows that were not inserted.
99
+ #
100
+ # @return [Array<Hash>] An array of hash objects containing the row
101
+ # data.
102
+ #
50
103
  def error_rows
51
104
  Array(@gapi.insert_errors).map do |ie|
52
105
  @rows[ie.index]
53
106
  end
54
107
  end
55
108
 
109
+ ##
110
+ # Returns the error object for a row that was not inserted.
111
+ #
112
+ # @param [Hash] row A hash containing the data for a row.
113
+ #
114
+ # @return [InsertError, nil] An error object, or `nil` if no error is
115
+ # found in the response for the row.
116
+ #
56
117
  def insert_error_for row
57
- json_row = Convert.to_json_row(row)
58
- insert_errors.detect { |e| e.row == json_row }
118
+ insert_errors.detect { |e| e.row == row }
59
119
  end
60
120
 
121
+ ##
122
+ # Returns the error hashes for a row that was not inserted. Each error
123
+ # hash contains the following keys: `reason`, `location`, `debugInfo`,
124
+ # and `message`.
125
+ #
126
+ # @param [Hash] row A hash containing the data for a row.
127
+ #
128
+ # @return [Array<Hash>, nil] An array of error hashes, or `nil` if no
129
+ # errors are found in the response for the row.
130
+ #
61
131
  def errors_for row
62
132
  ie = insert_error_for row
63
133
  return ie.errors if ie
64
134
  []
65
135
  end
66
136
 
137
+ ##
138
+ # Returns the index for a row that was not inserted.
139
+ #
140
+ # @param [Hash] row A hash containing the data for a row.
141
+ #
142
+ # @return [Integer, nil] An error object, or `nil` if no error is
143
+ # found in the response for the row.
144
+ #
67
145
  def index_for row
68
146
  ie = insert_error_for row
69
147
  return ie.index if ie
@@ -78,6 +156,16 @@ module Google
78
156
 
79
157
  ##
80
158
  # InsertError
159
+ #
160
+ # Represents the errors for a row that was not inserted.
161
+ #
162
+ # @attr_reader [Integer] index The index of the row that error applies
163
+ # to.
164
+ # @attr_reader [Hash] row The row that error applies to.
165
+ # @attr_reader [Hash] errors Error information for the row indicated by
166
+ # the index property, with the following keys: `reason`, `location`,
167
+ # `debugInfo`, and `message`.
168
+ #
81
169
  class InsertError
82
170
  attr_reader :index
83
171
  attr_reader :row
@@ -15,7 +15,6 @@
15
15
 
16
16
  require "google/cloud/errors"
17
17
  require "google/cloud/bigquery/service"
18
- require "google/cloud/bigquery/query_data"
19
18
  require "google/cloud/bigquery/job/list"
20
19
  require "json"
21
20
 
@@ -31,11 +30,11 @@ module Google
31
30
  # {CopyJob}, {ExtractJob}, {LoadJob}, and {QueryJob}.
32
31
  #
33
32
  # A job instance is created when you call {Project#query_job},
34
- # {Dataset#query_job}, {Table#copy}, {Table#extract}, {Table#load}, or
35
- # {View#data}.
33
+ # {Dataset#query_job}, {Table#copy_job}, {Table#extract_job},
34
+ # {Table#load_job}, or {View#data}.
36
35
  #
37
- # @see https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects
38
- # Managing Jobs, Datasets, and Projects
36
+ # @see https://cloud.google.com/bigquery/docs/managing-jobs Running and
37
+ # Managing Jobs
39
38
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
40
39
  # reference
41
40
  #
@@ -52,7 +51,7 @@ module Google
52
51
  # if job.failed?
53
52
  # puts job.error
54
53
  # else
55
- # puts job.query_results.first
54
+ # puts job.data.first
56
55
  # end
57
56
  #
58
57
  class Job
@@ -73,21 +72,41 @@ module Google
73
72
 
74
73
  ##
75
74
  # The ID of the job.
75
+ #
76
+ # @return [String] The ID must contain only letters (a-z, A-Z), numbers
77
+ # (0-9), underscores (_), or dashes (-). The maximum length is 1,024
78
+ # characters.
79
+ #
76
80
  def job_id
77
81
  @gapi.job_reference.job_id
78
82
  end
79
83
 
80
84
  ##
81
85
  # The ID of the project containing the job.
86
+ #
87
+ # @return [String] The project ID.
88
+ #
82
89
  def project_id
83
90
  @gapi.job_reference.project_id
84
91
  end
85
92
 
86
93
  ##
87
- # The current state of the job. The possible values are `PENDING`,
88
- # `RUNNING`, and `DONE`. A `DONE` state does not mean that the job
89
- # completed successfully. Use {#failed?} to discover if an error
94
+ # The email address of the user who ran the job.
95
+ #
96
+ # @return [String] The email address.
97
+ #
98
+ def user_email
99
+ @gapi.user_email
100
+ end
101
+
102
+ ##
103
+ # The current state of the job. A `DONE` state does not mean that the
104
+ # job completed successfully. Use {#failed?} to discover if an error
90
105
  # occurred or if the job was successful.
106
+ #
107
+ # @return [String] The state code. The possible values are `PENDING`,
108
+ # `RUNNING`, and `DONE`.
109
+ #
91
110
  def state
92
111
  return nil if @gapi.status.nil?
93
112
  @gapi.status.state
@@ -95,6 +114,9 @@ module Google
95
114
 
96
115
  ##
97
116
  # Checks if the job's state is `RUNNING`.
117
+ #
118
+ # @return [Boolean] `true` when `RUNNING`, `false` otherwise.
119
+ #
98
120
  def running?
99
121
  return false if state.nil?
100
122
  "running".casecmp(state).zero?
@@ -102,6 +124,9 @@ module Google
102
124
 
103
125
  ##
104
126
  # Checks if the job's state is `PENDING`.
127
+ #
128
+ # @return [Boolean] `true` when `PENDING`, `false` otherwise.
129
+ #
105
130
  def pending?
106
131
  return false if state.nil?
107
132
  "pending".casecmp(state).zero?
@@ -112,19 +137,29 @@ module Google
112
137
  # running. However, a `DONE` state does not mean that the job completed
113
138
  # successfully. Use {#failed?} to detect if an error occurred or if the
114
139
  # job was successful.
140
+ #
141
+ # @return [Boolean] `true` when `DONE`, `false` otherwise.
142
+ #
115
143
  def done?
116
144
  return false if state.nil?
117
145
  "done".casecmp(state).zero?
118
146
  end
119
147
 
120
148
  ##
121
- # Checks if an error is present.
149
+ # Checks if an error is present. Use {#error} to access the error
150
+ # object.
151
+ #
152
+ # @return [Boolean] `true` when there is an error, `false` otherwise.
153
+ #
122
154
  def failed?
123
155
  !error.nil?
124
156
  end
125
157
 
126
158
  ##
127
159
  # The time when the job was created.
160
+ #
161
+ # @return [Time, nil] The creation time from the job statistics.
162
+ #
128
163
  def created_at
129
164
  ::Time.at(Integer(@gapi.statistics.creation_time) / 1000.0)
130
165
  rescue
@@ -135,6 +170,9 @@ module Google
135
170
  # The time when the job was started.
136
171
  # This field is present after the job's state changes from `PENDING`
137
172
  # to either `RUNNING` or `DONE`.
173
+ #
174
+ # @return [Time, nil] The start time from the job statistics.
175
+ #
138
176
  def started_at
139
177
  ::Time.at(Integer(@gapi.statistics.start_time) / 1000.0)
140
178
  rescue
@@ -144,6 +182,9 @@ module Google
144
182
  ##
145
183
  # The time when the job ended.
146
184
  # This field is present when the job's state is `DONE`.
185
+ #
186
+ # @return [Time, nil] The end time from the job statistics.
187
+ #
147
188
  def ended_at
148
189
  ::Time.at(Integer(@gapi.statistics.end_time) / 1000.0)
149
190
  rescue
@@ -165,6 +206,9 @@ module Google
165
206
  #
166
207
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
167
208
  # reference
209
+ #
210
+ # @return [Hash] The job statistics.
211
+ #
168
212
  def statistics
169
213
  JSON.parse @gapi.statistics.to_json
170
214
  end
@@ -173,6 +217,9 @@ module Google
173
217
  ##
174
218
  # The job's status. Returns a hash. The values contained in the hash are
175
219
  # also exposed by {#state}, {#error}, and {#errors}.
220
+ #
221
+ # @return [Hash] The job status.
222
+ #
176
223
  def status
177
224
  JSON.parse @gapi.status.to_json
178
225
  end
@@ -184,7 +231,8 @@ module Google
184
231
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
185
232
  # reference
186
233
  #
187
- # @return [Hash] Returns a hash containing `reason` and `message` keys:
234
+ # @return [Hash, nil] Returns a hash containing `reason` and `message`
235
+ # keys:
188
236
  #
189
237
  # {
190
238
  # "reason"=>"notFound",
@@ -192,21 +240,55 @@ module Google
192
240
  # }
193
241
  #
194
242
  def error
195
- return nil if @gapi.status.nil?
196
- return nil if @gapi.status.error_result.nil?
197
- JSON.parse @gapi.status.error_result.to_json
243
+ status["errorResult"]
198
244
  end
199
245
 
200
246
  ##
201
247
  # The errors for the job, if any errors have occurred. Returns an array
202
248
  # of hash objects. See {#error}.
249
+ #
250
+ # @return [Array<Hash>, nil] Returns an array of hashes containing
251
+ # `reason` and `message` keys:
252
+ #
253
+ # {
254
+ # "reason"=>"notFound",
255
+ # "message"=>"Not found: Table publicdata:samples.BAD_ID"
256
+ # }
257
+ #
203
258
  def errors
204
- return [] if @gapi.status.nil?
205
- Array(@gapi.status.errors).map { |e| JSON.parse e.to_json }
259
+ Array status["errors"]
260
+ end
261
+
262
+ ##
263
+ # A hash of user-provided labels associated with this job. Labels can be
264
+ # provided when the job is created, and used to organize and group jobs.
265
+ #
266
+ # The returned hash is frozen and changes are not allowed. Use
267
+ # {#labels=} to replace the entire hash.
268
+ #
269
+ # @return [Hash] The job labels.
270
+ #
271
+ # @!group Attributes
272
+ #
273
+ def labels
274
+ m = @gapi.configuration.labels
275
+ m = m.to_h if m.respond_to? :to_h
276
+ m.dup.freeze
206
277
  end
207
278
 
208
279
  ##
209
280
  # Cancels the job.
281
+ #
282
+ # @example
283
+ # require "google/cloud/bigquery"
284
+ #
285
+ # bigquery = Google::Cloud::Bigquery.new
286
+ #
287
+ # job = bigquery.query_job "SELECT COUNT(word) as count FROM " \
288
+ # "publicdata.samples.shakespeare"
289
+ #
290
+ # job.cancel
291
+ #
210
292
  def cancel
211
293
  ensure_service!
212
294
  resp = service.cancel_job job_id
@@ -216,6 +298,18 @@ module Google
216
298
 
217
299
  ##
218
300
  # Created a new job with the current configuration.
301
+ #
302
+ # @example
303
+ # require "google/cloud/bigquery"
304
+ #
305
+ # bigquery = Google::Cloud::Bigquery.new
306
+ #
307
+ # job = bigquery.query_job "SELECT COUNT(word) as count FROM " \
308
+ # "publicdata.samples.shakespeare"
309
+ #
310
+ # job.wait_until_done!
311
+ # job.rerun!
312
+ #
219
313
  def rerun!
220
314
  ensure_service!
221
315
  gapi = service.insert_job @gapi.configuration
@@ -224,6 +318,19 @@ module Google
224
318
 
225
319
  ##
226
320
  # Reloads the job with current data from the BigQuery service.
321
+ #
322
+ # @example
323
+ # require "google/cloud/bigquery"
324
+ #
325
+ # bigquery = Google::Cloud::Bigquery.new
326
+ #
327
+ # job = bigquery.query_job "SELECT COUNT(word) as count FROM " \
328
+ # "publicdata.samples.shakespeare"
329
+ #
330
+ # job.done?
331
+ # job.reload!
332
+ # job.done? #=> true
333
+ #
227
334
  def reload!
228
335
  ensure_service!
229
336
  gapi = service.get_job job_id
@@ -232,8 +339,9 @@ module Google
232
339
  alias_method :refresh!, :reload!
233
340
 
234
341
  ##
235
- # Refreshes the job until the job is `DONE`.
236
- # The delay between refreshes will incrementally increase.
342
+ # Refreshes the job until the job is `DONE`. The delay between refreshes
343
+ # starts at 5 seconds and increases exponentially to a maximum of 60
344
+ # seconds.
237
345
  #
238
346
  # @example
239
347
  # require "google/cloud/bigquery"
@@ -242,12 +350,16 @@ module Google
242
350
  # dataset = bigquery.dataset "my_dataset"
243
351
  # table = dataset.table "my_table"
244
352
  #
245
- # extract_job = table.extract "gs://my-bucket/file-name.json",
246
- # format: "json"
353
+ # extract_job = table.extract_job "gs://my-bucket/file-name.json",
354
+ # format: "json"
247
355
  # extract_job.wait_until_done!
248
356
  # extract_job.done? #=> true
357
+ #
249
358
  def wait_until_done!
250
- backoff = ->(retries) { sleep 2 * retries + 5 }
359
+ backoff = lambda do |retries|
360
+ delay = [retries ** 2 + 5, 60].min # Maximum delay is 60
361
+ sleep delay
362
+ end
251
363
  retries = 0
252
364
  until done?
253
365
  backoff.call retries
@@ -266,6 +378,20 @@ module Google
266
378
  end
267
379
  end
268
380
 
381
+ ##
382
+ # @private New Google::Apis::Error with job failure details
383
+ def gapi_error
384
+ return nil unless failed?
385
+
386
+ error_status_code = status_code_for_reason error["reason"]
387
+ error_body = error
388
+ error_body["errors"] = errors
389
+
390
+ Google::Apis::Error.new error["message"],
391
+ status_code: error_status_code,
392
+ body: error_body
393
+ end
394
+
269
395
  protected
270
396
 
271
397
  ##
@@ -296,6 +422,19 @@ module Google
296
422
  rescue Google::Cloud::NotFoundError
297
423
  nil
298
424
  end
425
+
426
+ def status_code_for_reason reason
427
+ codes = { "accessDenied" => 403, "backendError" => 500,
428
+ "billingNotEnabled" => 403,
429
+ "billingTierLimitExceeded" => 400, "blocked" => 403,
430
+ "duplicate" => 409, "internalError" =>500, "invalid" => 400,
431
+ "invalidQuery" => 400, "notFound" =>404,
432
+ "notImplemented" => 501, "quotaExceeded" => 403,
433
+ "rateLimitExceeded" => 403, "resourceInUse" => 400,
434
+ "resourcesExceeded" => 400, "responseTooLarge" => 403,
435
+ "tableUnavailable" => 400 }
436
+ codes[reason] || 0
437
+ end
299
438
  end
300
439
  end
301
440
  end