google-cloud-bigquery 0.28.0 → 0.29.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -21,13 +21,25 @@ module Google
21
21
  #
22
22
  # A {Job} subclass representing an export operation that may be performed
23
23
  # on a {Table}. A ExtractJob instance is created when you call
24
- # {Table#extract}.
24
+ # {Table#extract_job}.
25
25
  #
26
- # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
26
+ # @see https://cloud.google.com/bigquery/docs/exporting-data
27
27
  # Exporting Data From BigQuery
28
28
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
29
29
  # reference
30
30
  #
31
+ # @example
32
+ # require "google/cloud/bigquery"
33
+ #
34
+ # bigquery = Google::Cloud::Bigquery.new
35
+ # dataset = bigquery.dataset "my_dataset"
36
+ # table = dataset.table "my_table"
37
+ #
38
+ # extract_job = table.extract_job "gs://my-bucket/file-name.json",
39
+ # format: "json"
40
+ # extract_job.wait_until_done!
41
+ # extract_job.done? #=> true
42
+ #
31
43
  class ExtractJob < Job
32
44
  ##
33
45
  # The URI or URIs representing the Google Cloud Storage files to which
@@ -38,7 +50,10 @@ module Google
38
50
 
39
51
  ##
40
52
  # The table from which the data is exported. This is the table upon
41
- # which {Table#extract} was called. Returns a {Table} instance.
53
+ # which {Table#extract_job} was called.
54
+ #
55
+ # @return [Table] A table instance.
56
+ #
42
57
  def source
43
58
  table = @gapi.configuration.extract.source_table
44
59
  return nil unless table
@@ -50,6 +65,9 @@ module Google
50
65
  ##
51
66
  # Checks if the export operation compresses the data using gzip. The
52
67
  # default is `false`.
68
+ #
69
+ # @return [Boolean] `true` when `GZIP`, `false` otherwise.
70
+ #
53
71
  def compression?
54
72
  val = @gapi.configuration.extract.compression
55
73
  val == "GZIP"
@@ -58,6 +76,10 @@ module Google
58
76
  ##
59
77
  # Checks if the destination format for the data is [newline-delimited
60
78
  # JSON](http://jsonlines.org/). The default is `false`.
79
+ #
80
+ # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
81
+ # otherwise.
82
+ #
61
83
  def json?
62
84
  val = @gapi.configuration.extract.destination_format
63
85
  val == "NEWLINE_DELIMITED_JSON"
@@ -67,6 +89,9 @@ module Google
67
89
  # Checks if the destination format for the data is CSV. Tables with
68
90
  # nested or repeated fields cannot be exported as CSV. The default is
69
91
  # `true`.
92
+ #
93
+ # @return [Boolean] `true` when `CSV`, `false` otherwise.
94
+ #
70
95
  def csv?
71
96
  val = @gapi.configuration.extract.destination_format
72
97
  return true if val.nil?
@@ -76,14 +101,20 @@ module Google
76
101
  ##
77
102
  # Checks if the destination format for the data is
78
103
  # [Avro](http://avro.apache.org/). The default is `false`.
104
+ #
105
+ # @return [Boolean] `true` when `AVRO`, `false` otherwise.
106
+ #
79
107
  def avro?
80
108
  val = @gapi.configuration.extract.destination_format
81
109
  val == "AVRO"
82
110
  end
83
111
 
84
112
  ##
85
- # The symbol the operation uses to delimit fields in the exported data.
86
- # The default is a comma (,).
113
+ # The character or symbol the operation uses to delimit fields in the
114
+ # exported data. The default is a comma (,).
115
+ #
116
+ # @return [String] A string containing the character, such as `","`.
117
+ #
87
118
  def delimiter
88
119
  val = @gapi.configuration.extract.field_delimiter
89
120
  val = "," if val.nil?
@@ -93,6 +124,10 @@ module Google
93
124
  ##
94
125
  # Checks if the exported data contains a header row. The default is
95
126
  # `true`.
127
+ #
128
+ # @return [Boolean] `true` when the print header configuration is
129
+ # present or `nil`, `false` otherwise.
130
+ #
96
131
  def print_header?
97
132
  val = @gapi.configuration.extract.print_header
98
133
  val = true if val.nil?
@@ -100,17 +135,23 @@ module Google
100
135
  end
101
136
 
102
137
  ##
103
- # The count of files per destination URI or URI pattern specified in
104
- # {#destinations}. Returns an Array of values in the same order as the
105
- # URI patterns.
138
+ # The number of files per destination URI or URI pattern specified in
139
+ # {#destinations}.
140
+ #
141
+ # @return [Array<Integer>] An array of values in the same order as the
142
+ # URI patterns.
143
+ #
106
144
  def destinations_file_counts
107
145
  Array @gapi.statistics.extract.destination_uri_file_counts
108
146
  end
109
147
 
110
148
  ##
111
- # The count of files per destination URI or URI pattern specified in
112
- # {#destinations}. Returns a Hash with the URI patterns as keys and the
113
- # counts as values.
149
+ # A hash containing the URI or URI pattern specified in
150
+ # {#destinations} mapped to the counts of files per destination.
151
+ #
152
+ # @return [Hash<String, Integer>] A Hash with the URI patterns as keys
153
+ # and the counts as values.
154
+ #
114
155
  def destinations_counts
115
156
  Hash[destinations.zip destinations_file_counts]
116
157
  end
@@ -20,6 +20,28 @@ module Google
20
20
  module Bigquery
21
21
  ##
22
22
  # InsertResponse
23
+ #
24
+ # Represents the response from BigQuery when data is inserted into a table
25
+ # for near-immediate querying, without the need to complete a load
26
+ # operation before the data can appear in query results. See
27
+ # {Dataset#insert} and {Table#insert}.
28
+ #
29
+ # @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
30
+ # Streaming Data Into BigQuery
31
+ #
32
+ # @example
33
+ # require "google/cloud/bigquery"
34
+ #
35
+ # bigquery = Google::Cloud::Bigquery.new
36
+ # dataset = bigquery.dataset "my_dataset"
37
+ #
38
+ # rows = [
39
+ # { "first_name" => "Alice", "age" => 21 },
40
+ # { "first_name" => "Bob", "age" => 22 }
41
+ # ]
42
+ #
43
+ # insert_response = dataset.insert "my_table", rows
44
+ #
23
45
  class InsertResponse
24
46
  # @private
25
47
  def initialize rows, gapi
@@ -27,18 +49,43 @@ module Google
27
49
  @gapi = gapi
28
50
  end
29
51
 
52
+ ##
53
+ # Checks if the error count is zero, meaning that all of the rows were
54
+ # inserted. Use {#insert_errors} to access the errors.
55
+ #
56
+ # @return [Boolean] `true` when the error count is zero, `false`
57
+ # otherwise.
58
+ #
30
59
  def success?
31
60
  error_count.zero?
32
61
  end
33
62
 
63
+
64
+ ##
65
+ # The count of rows in the response, minus the count of errors for rows
66
+ # that were not inserted.
67
+ #
68
+ # @return [Integer] The number of rows inserted.
69
+ #
34
70
  def insert_count
35
71
  @rows.count - error_count
36
72
  end
37
73
 
74
+
75
+ ##
76
+ # The count of errors for rows that were not inserted.
77
+ #
78
+ # @return [Integer] The number of errors.
79
+ #
38
80
  def error_count
39
81
  Array(@gapi.insert_errors).count
40
82
  end
41
83
 
84
+ ##
85
+ # The error objects for rows that were not inserted.
86
+ #
87
+ # @return [Array<InsertError>] An array containing error objects.
88
+ #
42
89
  def insert_errors
43
90
  Array(@gapi.insert_errors).map do |ie|
44
91
  row = @rows[ie.index]
@@ -47,23 +94,54 @@ module Google
47
94
  end
48
95
  end
49
96
 
97
+ ##
98
+ # The rows that were not inserted.
99
+ #
100
+ # @return [Array<Hash>] An array of hash objects containing the row
101
+ # data.
102
+ #
50
103
  def error_rows
51
104
  Array(@gapi.insert_errors).map do |ie|
52
105
  @rows[ie.index]
53
106
  end
54
107
  end
55
108
 
109
+ ##
110
+ # Returns the error object for a row that was not inserted.
111
+ #
112
+ # @param [Hash] row A hash containing the data for a row.
113
+ #
114
+ # @return [InsertError, nil] An error object, or `nil` if no error is
115
+ # found in the response for the row.
116
+ #
56
117
  def insert_error_for row
57
- json_row = Convert.to_json_row(row)
58
- insert_errors.detect { |e| e.row == json_row }
118
+ insert_errors.detect { |e| e.row == row }
59
119
  end
60
120
 
121
+ ##
122
+ # Returns the error hashes for a row that was not inserted. Each error
123
+ # hash contains the following keys: `reason`, `location`, `debugInfo`,
124
+ # and `message`.
125
+ #
126
+ # @param [Hash] row A hash containing the data for a row.
127
+ #
128
+ # @return [Array<Hash>, nil] An array of error hashes, or `nil` if no
129
+ # errors are found in the response for the row.
130
+ #
61
131
  def errors_for row
62
132
  ie = insert_error_for row
63
133
  return ie.errors if ie
64
134
  []
65
135
  end
66
136
 
137
+ ##
138
+ # Returns the index for a row that was not inserted.
139
+ #
140
+ # @param [Hash] row A hash containing the data for a row.
141
+ #
142
+ # @return [Integer, nil] An error object, or `nil` if no error is
143
+ # found in the response for the row.
144
+ #
67
145
  def index_for row
68
146
  ie = insert_error_for row
69
147
  return ie.index if ie
@@ -78,6 +156,16 @@ module Google
78
156
 
79
157
  ##
80
158
  # InsertError
159
+ #
160
+ # Represents the errors for a row that was not inserted.
161
+ #
162
+ # @attr_reader [Integer] index The index of the row that error applies
163
+ # to.
164
+ # @attr_reader [Hash] row The row that error applies to.
165
+ # @attr_reader [Hash] errors Error information for the row indicated by
166
+ # the index property, with the following keys: `reason`, `location`,
167
+ # `debugInfo`, and `message`.
168
+ #
81
169
  class InsertError
82
170
  attr_reader :index
83
171
  attr_reader :row
@@ -15,7 +15,6 @@
15
15
 
16
16
  require "google/cloud/errors"
17
17
  require "google/cloud/bigquery/service"
18
- require "google/cloud/bigquery/query_data"
19
18
  require "google/cloud/bigquery/job/list"
20
19
  require "json"
21
20
 
@@ -31,11 +30,11 @@ module Google
31
30
  # {CopyJob}, {ExtractJob}, {LoadJob}, and {QueryJob}.
32
31
  #
33
32
  # A job instance is created when you call {Project#query_job},
34
- # {Dataset#query_job}, {Table#copy}, {Table#extract}, {Table#load}, or
35
- # {View#data}.
33
+ # {Dataset#query_job}, {Table#copy_job}, {Table#extract_job},
34
+ # {Table#load_job}, or {View#data}.
36
35
  #
37
- # @see https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects
38
- # Managing Jobs, Datasets, and Projects
36
+ # @see https://cloud.google.com/bigquery/docs/managing-jobs Running and
37
+ # Managing Jobs
39
38
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
40
39
  # reference
41
40
  #
@@ -52,7 +51,7 @@ module Google
52
51
  # if job.failed?
53
52
  # puts job.error
54
53
  # else
55
- # puts job.query_results.first
54
+ # puts job.data.first
56
55
  # end
57
56
  #
58
57
  class Job
@@ -73,21 +72,41 @@ module Google
73
72
 
74
73
  ##
75
74
  # The ID of the job.
75
+ #
76
+ # @return [String] The ID must contain only letters (a-z, A-Z), numbers
77
+ # (0-9), underscores (_), or dashes (-). The maximum length is 1,024
78
+ # characters.
79
+ #
76
80
  def job_id
77
81
  @gapi.job_reference.job_id
78
82
  end
79
83
 
80
84
  ##
81
85
  # The ID of the project containing the job.
86
+ #
87
+ # @return [String] The project ID.
88
+ #
82
89
  def project_id
83
90
  @gapi.job_reference.project_id
84
91
  end
85
92
 
86
93
  ##
87
- # The current state of the job. The possible values are `PENDING`,
88
- # `RUNNING`, and `DONE`. A `DONE` state does not mean that the job
89
- # completed successfully. Use {#failed?} to discover if an error
94
+ # The email address of the user who ran the job.
95
+ #
96
+ # @return [String] The email address.
97
+ #
98
+ def user_email
99
+ @gapi.user_email
100
+ end
101
+
102
+ ##
103
+ # The current state of the job. A `DONE` state does not mean that the
104
+ # job completed successfully. Use {#failed?} to discover if an error
90
105
  # occurred or if the job was successful.
106
+ #
107
+ # @return [String] The state code. The possible values are `PENDING`,
108
+ # `RUNNING`, and `DONE`.
109
+ #
91
110
  def state
92
111
  return nil if @gapi.status.nil?
93
112
  @gapi.status.state
@@ -95,6 +114,9 @@ module Google
95
114
 
96
115
  ##
97
116
  # Checks if the job's state is `RUNNING`.
117
+ #
118
+ # @return [Boolean] `true` when `RUNNING`, `false` otherwise.
119
+ #
98
120
  def running?
99
121
  return false if state.nil?
100
122
  "running".casecmp(state).zero?
@@ -102,6 +124,9 @@ module Google
102
124
 
103
125
  ##
104
126
  # Checks if the job's state is `PENDING`.
127
+ #
128
+ # @return [Boolean] `true` when `PENDING`, `false` otherwise.
129
+ #
105
130
  def pending?
106
131
  return false if state.nil?
107
132
  "pending".casecmp(state).zero?
@@ -112,19 +137,29 @@ module Google
112
137
  # running. However, a `DONE` state does not mean that the job completed
113
138
  # successfully. Use {#failed?} to detect if an error occurred or if the
114
139
  # job was successful.
140
+ #
141
+ # @return [Boolean] `true` when `DONE`, `false` otherwise.
142
+ #
115
143
  def done?
116
144
  return false if state.nil?
117
145
  "done".casecmp(state).zero?
118
146
  end
119
147
 
120
148
  ##
121
- # Checks if an error is present.
149
+ # Checks if an error is present. Use {#error} to access the error
150
+ # object.
151
+ #
152
+ # @return [Boolean] `true` when there is an error, `false` otherwise.
153
+ #
122
154
  def failed?
123
155
  !error.nil?
124
156
  end
125
157
 
126
158
  ##
127
159
  # The time when the job was created.
160
+ #
161
+ # @return [Time, nil] The creation time from the job statistics.
162
+ #
128
163
  def created_at
129
164
  ::Time.at(Integer(@gapi.statistics.creation_time) / 1000.0)
130
165
  rescue
@@ -135,6 +170,9 @@ module Google
135
170
  # The time when the job was started.
136
171
  # This field is present after the job's state changes from `PENDING`
137
172
  # to either `RUNNING` or `DONE`.
173
+ #
174
+ # @return [Time, nil] The start time from the job statistics.
175
+ #
138
176
  def started_at
139
177
  ::Time.at(Integer(@gapi.statistics.start_time) / 1000.0)
140
178
  rescue
@@ -144,6 +182,9 @@ module Google
144
182
  ##
145
183
  # The time when the job ended.
146
184
  # This field is present when the job's state is `DONE`.
185
+ #
186
+ # @return [Time, nil] The end time from the job statistics.
187
+ #
147
188
  def ended_at
148
189
  ::Time.at(Integer(@gapi.statistics.end_time) / 1000.0)
149
190
  rescue
@@ -165,6 +206,9 @@ module Google
165
206
  #
166
207
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
167
208
  # reference
209
+ #
210
+ # @return [Hash] The job statistics.
211
+ #
168
212
  def statistics
169
213
  JSON.parse @gapi.statistics.to_json
170
214
  end
@@ -173,6 +217,9 @@ module Google
173
217
  ##
174
218
  # The job's status. Returns a hash. The values contained in the hash are
175
219
  # also exposed by {#state}, {#error}, and {#errors}.
220
+ #
221
+ # @return [Hash] The job status.
222
+ #
176
223
  def status
177
224
  JSON.parse @gapi.status.to_json
178
225
  end
@@ -184,7 +231,8 @@ module Google
184
231
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
185
232
  # reference
186
233
  #
187
- # @return [Hash] Returns a hash containing `reason` and `message` keys:
234
+ # @return [Hash, nil] Returns a hash containing `reason` and `message`
235
+ # keys:
188
236
  #
189
237
  # {
190
238
  # "reason"=>"notFound",
@@ -192,21 +240,55 @@ module Google
192
240
  # }
193
241
  #
194
242
  def error
195
- return nil if @gapi.status.nil?
196
- return nil if @gapi.status.error_result.nil?
197
- JSON.parse @gapi.status.error_result.to_json
243
+ status["errorResult"]
198
244
  end
199
245
 
200
246
  ##
201
247
  # The errors for the job, if any errors have occurred. Returns an array
202
248
  # of hash objects. See {#error}.
249
+ #
250
+ # @return [Array<Hash>, nil] Returns an array of hashes containing
251
+ # `reason` and `message` keys:
252
+ #
253
+ # {
254
+ # "reason"=>"notFound",
255
+ # "message"=>"Not found: Table publicdata:samples.BAD_ID"
256
+ # }
257
+ #
203
258
  def errors
204
- return [] if @gapi.status.nil?
205
- Array(@gapi.status.errors).map { |e| JSON.parse e.to_json }
259
+ Array status["errors"]
260
+ end
261
+
262
+ ##
263
+ # A hash of user-provided labels associated with this job. Labels can be
264
+ # provided when the job is created, and used to organize and group jobs.
265
+ #
266
+ # The returned hash is frozen and changes are not allowed. Use
267
+ # {#labels=} to replace the entire hash.
268
+ #
269
+ # @return [Hash] The job labels.
270
+ #
271
+ # @!group Attributes
272
+ #
273
+ def labels
274
+ m = @gapi.configuration.labels
275
+ m = m.to_h if m.respond_to? :to_h
276
+ m.dup.freeze
206
277
  end
207
278
 
208
279
  ##
209
280
  # Cancels the job.
281
+ #
282
+ # @example
283
+ # require "google/cloud/bigquery"
284
+ #
285
+ # bigquery = Google::Cloud::Bigquery.new
286
+ #
287
+ # job = bigquery.query_job "SELECT COUNT(word) as count FROM " \
288
+ # "publicdata.samples.shakespeare"
289
+ #
290
+ # job.cancel
291
+ #
210
292
  def cancel
211
293
  ensure_service!
212
294
  resp = service.cancel_job job_id
@@ -216,6 +298,18 @@ module Google
216
298
 
217
299
  ##
218
300
  # Created a new job with the current configuration.
301
+ #
302
+ # @example
303
+ # require "google/cloud/bigquery"
304
+ #
305
+ # bigquery = Google::Cloud::Bigquery.new
306
+ #
307
+ # job = bigquery.query_job "SELECT COUNT(word) as count FROM " \
308
+ # "publicdata.samples.shakespeare"
309
+ #
310
+ # job.wait_until_done!
311
+ # job.rerun!
312
+ #
219
313
  def rerun!
220
314
  ensure_service!
221
315
  gapi = service.insert_job @gapi.configuration
@@ -224,6 +318,19 @@ module Google
224
318
 
225
319
  ##
226
320
  # Reloads the job with current data from the BigQuery service.
321
+ #
322
+ # @example
323
+ # require "google/cloud/bigquery"
324
+ #
325
+ # bigquery = Google::Cloud::Bigquery.new
326
+ #
327
+ # job = bigquery.query_job "SELECT COUNT(word) as count FROM " \
328
+ # "publicdata.samples.shakespeare"
329
+ #
330
+ # job.done?
331
+ # job.reload!
332
+ # job.done? #=> true
333
+ #
227
334
  def reload!
228
335
  ensure_service!
229
336
  gapi = service.get_job job_id
@@ -232,8 +339,9 @@ module Google
232
339
  alias_method :refresh!, :reload!
233
340
 
234
341
  ##
235
- # Refreshes the job until the job is `DONE`.
236
- # The delay between refreshes will incrementally increase.
342
+ # Refreshes the job until the job is `DONE`. The delay between refreshes
343
+ # starts at 5 seconds and increases exponentially to a maximum of 60
344
+ # seconds.
237
345
  #
238
346
  # @example
239
347
  # require "google/cloud/bigquery"
@@ -242,12 +350,16 @@ module Google
242
350
  # dataset = bigquery.dataset "my_dataset"
243
351
  # table = dataset.table "my_table"
244
352
  #
245
- # extract_job = table.extract "gs://my-bucket/file-name.json",
246
- # format: "json"
353
+ # extract_job = table.extract_job "gs://my-bucket/file-name.json",
354
+ # format: "json"
247
355
  # extract_job.wait_until_done!
248
356
  # extract_job.done? #=> true
357
+ #
249
358
  def wait_until_done!
250
- backoff = ->(retries) { sleep 2 * retries + 5 }
359
+ backoff = lambda do |retries|
360
+ delay = [retries ** 2 + 5, 60].min # Maximum delay is 60
361
+ sleep delay
362
+ end
251
363
  retries = 0
252
364
  until done?
253
365
  backoff.call retries
@@ -266,6 +378,20 @@ module Google
266
378
  end
267
379
  end
268
380
 
381
+ ##
382
+ # @private New Google::Apis::Error with job failure details
383
+ def gapi_error
384
+ return nil unless failed?
385
+
386
+ error_status_code = status_code_for_reason error["reason"]
387
+ error_body = error
388
+ error_body["errors"] = errors
389
+
390
+ Google::Apis::Error.new error["message"],
391
+ status_code: error_status_code,
392
+ body: error_body
393
+ end
394
+
269
395
  protected
270
396
 
271
397
  ##
@@ -296,6 +422,19 @@ module Google
296
422
  rescue Google::Cloud::NotFoundError
297
423
  nil
298
424
  end
425
+
426
+ def status_code_for_reason reason
427
+ codes = { "accessDenied" => 403, "backendError" => 500,
428
+ "billingNotEnabled" => 403,
429
+ "billingTierLimitExceeded" => 400, "blocked" => 403,
430
+ "duplicate" => 409, "internalError" =>500, "invalid" => 400,
431
+ "invalidQuery" => 400, "notFound" =>404,
432
+ "notImplemented" => 501, "quotaExceeded" => 403,
433
+ "rateLimitExceeded" => 403, "resourceInUse" => 400,
434
+ "resourcesExceeded" => 400, "responseTooLarge" => 403,
435
+ "tableUnavailable" => 400 }
436
+ codes[reason] || 0
437
+ end
299
438
  end
300
439
  end
301
440
  end