google-cloud-bigquery 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,171 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "delegate"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ class Dataset
22
+ ##
23
+ # Dataset::List is a special case Array with additional values.
24
+ class List < DelegateClass(::Array)
25
+ ##
26
+ # If not empty, indicates that there are more records that match
27
+ # the request and this value should be passed to continue.
28
+ attr_accessor :token
29
+
30
+ # A hash of this page of results.
31
+ attr_accessor :etag
32
+
33
+ ##
34
+ # @private Create a new Dataset::List with an array of datasets.
35
+ def initialize arr = []
36
+ super arr
37
+ end
38
+
39
+ ##
40
+ # Whether there is a next page of datasets.
41
+ #
42
+ # @return [Boolean]
43
+ #
44
+ # @example
45
+ # require "google/cloud"
46
+ #
47
+ # gcloud = Google::Cloud.new
48
+ # bigquery = gcloud.bigquery
49
+ #
50
+ # datasets = bigquery.datasets
51
+ # if datasets.next?
52
+ # next_datasets = datasets.next
53
+ # end
54
+ def next?
55
+ !token.nil?
56
+ end
57
+
58
+ ##
59
+ # Retrieve the next page of datasets.
60
+ #
61
+ # @return [Dataset::List]
62
+ #
63
+ # @example
64
+ # require "google/cloud"
65
+ #
66
+ # gcloud = Google::Cloud.new
67
+ # bigquery = gcloud.bigquery
68
+ #
69
+ # datasets = bigquery.datasets
70
+ # if datasets.next?
71
+ # next_datasets = datasets.next
72
+ # end
73
+ def next
74
+ return nil unless next?
75
+ ensure_service!
76
+ options = { all: @hidden, token: token, max: @max }
77
+ gapi = @service.list_datasets options
78
+ self.class.from_gapi gapi, @service, @hidden, @max
79
+ end
80
+
81
+ ##
82
+ # Retrieves all datasets by repeatedly loading {#next} until {#next?}
83
+ # returns `false`. Calls the given block once for each dataset, which
84
+ # is passed as the parameter.
85
+ #
86
+ # An Enumerator is returned if no block is given.
87
+ #
88
+ # This method may make several API calls until all datasets are
89
+ # retrieved. Be sure to use as narrow a search criteria as possible.
90
+ # Please use with caution.
91
+ #
92
+ # @param [Integer] request_limit The upper limit of API requests to
93
+ # make to load all datasets. Default is no limit.
94
+ # @yield [dataset] The block for accessing each dataset.
95
+ # @yieldparam [Dataset] dataset The dataset object.
96
+ #
97
+ # @return [Enumerator]
98
+ #
99
+ # @example Iterating each result by passing a block:
100
+ # require "google/cloud"
101
+ #
102
+ # gcloud = Google::Cloud.new
103
+ # bigquery = gcloud.bigquery
104
+ #
105
+ # bigquery.datasets.all do |dataset|
106
+ # puts dataset.name
107
+ # end
108
+ #
109
+ # @example Using the enumerator by not passing a block:
110
+ # require "google/cloud"
111
+ #
112
+ # gcloud = Google::Cloud.new
113
+ # bigquery = gcloud.bigquery
114
+ #
115
+ # all_names = bigquery.datasets.all.map do |dataset|
116
+ # dataset.name
117
+ # end
118
+ #
119
+ # @example Limit the number of API calls made:
120
+ # require "google/cloud"
121
+ #
122
+ # gcloud = Google::Cloud.new
123
+ # bigquery = gcloud.bigquery
124
+ #
125
+ # bigquery.datasets.all(request_limit: 10) do |dataset|
126
+ # puts dataset.name
127
+ # end
128
+ #
129
+ def all request_limit: nil
130
+ request_limit = request_limit.to_i if request_limit
131
+ unless block_given?
132
+ return enum_for(:all, request_limit: request_limit)
133
+ end
134
+ results = self
135
+ loop do
136
+ results.each { |r| yield r }
137
+ if request_limit
138
+ request_limit -= 1
139
+ break if request_limit < 0
140
+ end
141
+ break unless results.next?
142
+ results = results.next
143
+ end
144
+ end
145
+
146
+ ##
147
+ # @private New Dataset::List from a response object.
148
+ def self.from_gapi gapi_list, service, hidden = nil, max = nil
149
+ datasets = List.new(Array(gapi_list.datasets).map do |gapi_object|
150
+ Dataset.from_gapi gapi_object, service
151
+ end)
152
+ datasets.instance_variable_set :@token, gapi_list.next_page_token
153
+ datasets.instance_variable_set :@etag, gapi_list.etag
154
+ datasets.instance_variable_set :@service, service
155
+ datasets.instance_variable_set :@hidden, hidden
156
+ datasets.instance_variable_set :@max, max
157
+ datasets
158
+ end
159
+
160
+ protected
161
+
162
+ ##
163
+ # Raise an error unless an active service is available.
164
+ def ensure_service!
165
+ fail "Must have active connection" unless @service
166
+ end
167
+ end
168
+ end
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,120 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ module Google
17
+ module Cloud
18
+ module Bigquery
19
+ ##
20
+ # # ExtractJob
21
+ #
22
+ # A {Job} subclass representing an export operation that may be performed
23
+ # on a {Table}. A ExtractJob instance is created when you call
24
+ # {Table#extract}.
25
+ #
26
+ # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
27
+ # Exporting Data From BigQuery
28
+ # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
29
+ # reference
30
+ #
31
+ class ExtractJob < Job
32
+ ##
33
+ # The URI or URIs representing the Google Cloud Storage files to which
34
+ # the data is exported.
35
+ def destinations
36
+ Array @gapi.configuration.extract.destination_uris
37
+ end
38
+
39
+ ##
40
+ # The table from which the data is exported. This is the table upon
41
+ # which {Table#extract} was called. Returns a {Table} instance.
42
+ def source
43
+ table = @gapi.configuration.extract.source_table
44
+ return nil unless table
45
+ retrieve_table table.project_id,
46
+ table.dataset_id,
47
+ table.table_id
48
+ end
49
+
50
+ ##
51
+ # Checks if the export operation compresses the data using gzip. The
52
+ # default is `false`.
53
+ def compression?
54
+ val = @gapi.configuration.extract.compression
55
+ val == "GZIP"
56
+ end
57
+
58
+ ##
59
+ # Checks if the destination format for the data is [newline-delimited
60
+ # JSON](http://jsonlines.org/). The default is `false`.
61
+ def json?
62
+ val = @gapi.configuration.extract.destination_format
63
+ val == "NEWLINE_DELIMITED_JSON"
64
+ end
65
+
66
+ ##
67
+ # Checks if the destination format for the data is CSV. Tables with
68
+ # nested or repeated fields cannot be exported as CSV. The default is
69
+ # `true`.
70
+ def csv?
71
+ val = @gapi.configuration.extract.destination_format
72
+ return true if val.nil?
73
+ val == "CSV"
74
+ end
75
+
76
+ ##
77
+ # Checks if the destination format for the data is
78
+ # [Avro](http://avro.apache.org/). The default is `false`.
79
+ def avro?
80
+ val = @gapi.configuration.extract.destination_format
81
+ val == "AVRO"
82
+ end
83
+
84
+ ##
85
+ # The symbol the operation uses to delimit fields in the exported data.
86
+ # The default is a comma (,).
87
+ def delimiter
88
+ val = @gapi.configuration.extract.field_delimiter
89
+ val = "," if val.nil?
90
+ val
91
+ end
92
+
93
+ ##
94
+ # Checks if the exported data contains a header row. The default is
95
+ # `true`.
96
+ def print_header?
97
+ val = @gapi.configuration.extract.print_header
98
+ val = true if val.nil?
99
+ val
100
+ end
101
+
102
+ ##
103
+ # The count of files per destination URI or URI pattern specified in
104
+ # {#destinations}. Returns an Array of values in the same order as the
105
+ # URI patterns.
106
+ def destinations_file_counts
107
+ Array @gapi.statistics.extract.destination_uri_file_counts
108
+ end
109
+
110
+ ##
111
+ # The count of files per destination URI or URI pattern specified in
112
+ # {#destinations}. Returns a Hash with the URI patterns as keys and the
113
+ # counts as values.
114
+ def destinations_counts
115
+ Hash[destinations.zip destinations_file_counts]
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,83 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "json"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ ##
22
+ # InsertResponse
23
+ class InsertResponse
24
+ # @private
25
+ def initialize rows, gapi
26
+ @rows = rows
27
+ @gapi = gapi
28
+ end
29
+
30
+ def success?
31
+ error_count.zero?
32
+ end
33
+
34
+ def insert_count
35
+ @rows.count - error_count
36
+ end
37
+
38
+ def error_count
39
+ Array(@gapi.insert_errors).count
40
+ end
41
+
42
+ def insert_errors
43
+ Array(@gapi.insert_errors).map do |ie|
44
+ row = @rows[ie.index]
45
+ errors = ie.errors.map { |e| JSON.parse e.to_json }
46
+ InsertError.new row, errors
47
+ end
48
+ end
49
+
50
+ def error_rows
51
+ Array(@gapi.insert_errors).map do |ie|
52
+ @rows[ie.index]
53
+ end
54
+ end
55
+
56
+ def errors_for row
57
+ ie = insert_errors.detect { |e| e.row == row }
58
+ return ie.errors if ie
59
+ []
60
+ end
61
+
62
+ # @private New InsertResponse from the inserted rows and a
63
+ # Google::Apis::BigqueryV2::InsertAllTableDataResponse object.
64
+ def self.from_gapi rows, gapi
65
+ new rows, gapi
66
+ end
67
+
68
+ ##
69
+ # InsertError
70
+ class InsertError
71
+ attr_reader :row
72
+ attr_reader :errors
73
+
74
+ # @private
75
+ def initialize row, errors
76
+ @row = row
77
+ @errors = errors
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,301 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/errors"
17
+ require "google/cloud/bigquery/service"
18
+ require "google/cloud/bigquery/query_data"
19
+ require "google/cloud/bigquery/job/list"
20
+ require "json"
21
+
22
+ module Google
23
+ module Cloud
24
+ module Bigquery
25
+ ##
26
+ # # Job
27
+ #
28
+ # Represents a generic Job that may be performed on a {Table}.
29
+ #
30
+ # The subclasses of Job represent the specific BigQuery job types:
31
+ # {CopyJob}, {ExtractJob}, {LoadJob}, and {QueryJob}.
32
+ #
33
+ # A job instance is created when you call {Project#query_job},
34
+ # {Dataset#query_job}, {Table#copy}, {Table#extract}, {Table#load}, or
35
+ # {View#data}.
36
+ #
37
+ # @see https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects
38
+ # Managing Jobs, Datasets, and Projects
39
+ # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
40
+ # reference
41
+ #
42
+ # @example
43
+ # require "google/cloud"
44
+ #
45
+ # gcloud = Google::Cloud.new
46
+ # bigquery = gcloud.bigquery
47
+ #
48
+ # q = "SELECT COUNT(word) as count FROM publicdata:samples.shakespeare"
49
+ # job = bigquery.query_job q
50
+ #
51
+ # job.wait_until_done!
52
+ #
53
+ # if job.failed?
54
+ # puts job.error
55
+ # else
56
+ # puts job.query_results.first
57
+ # end
58
+ #
59
+ class Job
60
+ ##
61
+ # @private The Service object.
62
+ attr_accessor :service
63
+
64
+ ##
65
+ # @private The Google API Client object.
66
+ attr_accessor :gapi
67
+
68
+ ##
69
+ # @private Create an empty Job object.
70
+ def initialize
71
+ @service = nil
72
+ @gapi = {}
73
+ end
74
+
75
+ ##
76
+ # The ID of the job.
77
+ def job_id
78
+ @gapi.job_reference.job_id
79
+ end
80
+
81
+ ##
82
+ # The ID of the project containing the job.
83
+ def project_id
84
+ @gapi.job_reference.project_id
85
+ end
86
+
87
+ ##
88
+ # The current state of the job. The possible values are `PENDING`,
89
+ # `RUNNING`, and `DONE`. A `DONE` state does not mean that the job
90
+ # completed successfully. Use {#failed?} to discover if an error
91
+ # occurred or if the job was successful.
92
+ def state
93
+ return nil if @gapi.status.nil?
94
+ @gapi.status.state
95
+ end
96
+
97
+ ##
98
+ # Checks if the job's state is `RUNNING`.
99
+ def running?
100
+ return false if state.nil?
101
+ "running".casecmp(state).zero?
102
+ end
103
+
104
+ ##
105
+ # Checks if the job's state is `PENDING`.
106
+ def pending?
107
+ return false if state.nil?
108
+ "pending".casecmp(state).zero?
109
+ end
110
+
111
+ ##
112
+ # Checks if the job's state is `DONE`. When `true`, the job has stopped
113
+ # running. However, a `DONE` state does not mean that the job completed
114
+ # successfully. Use {#failed?} to detect if an error occurred or if the
115
+ # job was successful.
116
+ def done?
117
+ return false if state.nil?
118
+ "done".casecmp(state).zero?
119
+ end
120
+
121
+ ##
122
+ # Checks if an error is present.
123
+ def failed?
124
+ !error.nil?
125
+ end
126
+
127
+ ##
128
+ # The time when the job was created.
129
+ def created_at
130
+ Time.at(Integer(@gapi.statistics.creation_time) / 1000.0)
131
+ rescue
132
+ nil
133
+ end
134
+
135
+ ##
136
+ # The time when the job was started.
137
+ # This field is present after the job's state changes from `PENDING`
138
+ # to either `RUNNING` or `DONE`.
139
+ def started_at
140
+ Time.at(Integer(@gapi.statistics.start_time) / 1000.0)
141
+ rescue
142
+ nil
143
+ end
144
+
145
+ ##
146
+ # The time when the job ended.
147
+ # This field is present when the job's state is `DONE`.
148
+ def ended_at
149
+ Time.at(Integer(@gapi.statistics.end_time) / 1000.0)
150
+ rescue
151
+ nil
152
+ end
153
+
154
+ ##
155
+ # The configuration for the job. Returns a hash.
156
+ #
157
+ # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
158
+ # reference
159
+ def configuration
160
+ JSON.parse @gapi.configuration.to_json
161
+ end
162
+ alias_method :config, :configuration
163
+
164
+ ##
165
+ # The statistics for the job. Returns a hash.
166
+ #
167
+ # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
168
+ # reference
169
+ def statistics
170
+ JSON.parse @gapi.statistics.to_json
171
+ end
172
+ alias_method :stats, :statistics
173
+
174
+ ##
175
+ # The job's status. Returns a hash. The values contained in the hash are
176
+ # also exposed by {#state}, {#error}, and {#errors}.
177
+ def status
178
+ JSON.parse @gapi.status.to_json
179
+ end
180
+
181
+ ##
182
+ # The last error for the job, if any errors have occurred. Returns a
183
+ # hash.
184
+ #
185
+ # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
186
+ # reference
187
+ #
188
+ # @return [Hash] Returns a hash containing `reason` and `message` keys:
189
+ #
190
+ # {
191
+ # "reason"=>"notFound",
192
+ # "message"=>"Not found: Table publicdata:samples.BAD_ID"
193
+ # }
194
+ #
195
+ def error
196
+ return nil if @gapi.status.nil?
197
+ return nil if @gapi.status.error_result.nil?
198
+ JSON.parse @gapi.status.error_result.to_json
199
+ end
200
+
201
+ ##
202
+ # The errors for the job, if any errors have occurred. Returns an array
203
+ # of hash objects. See {#error}.
204
+ def errors
205
+ return [] if @gapi.status.nil?
206
+ Array(@gapi.status.errors).map { |e| JSON.parse e.to_json }
207
+ end
208
+
209
+ ##
210
+ # Created a new job with the current configuration.
211
+ def rerun!
212
+ ensure_service!
213
+ gapi = service.insert_job @gapi.configuration
214
+ Job.from_gapi gapi, service
215
+ end
216
+
217
+ ##
218
+ # Reloads the job with current data from the BigQuery service.
219
+ def reload!
220
+ ensure_service!
221
+ gapi = service.get_job job_id
222
+ @gapi = gapi
223
+ end
224
+ alias_method :refresh!, :reload!
225
+
226
+ ##
227
+ # Refreshes the job until the job is `DONE`.
228
+ # The delay between refreshes will incrementally increase.
229
+ #
230
+ # @example
231
+ # require "google/cloud"
232
+ #
233
+ # gcloud = Google::Cloud.new
234
+ # bigquery = gcloud.bigquery
235
+ # dataset = bigquery.dataset "my_dataset"
236
+ # table = dataset.table "my_table"
237
+ #
238
+ # extract_job = table.extract "gs://my-bucket/file-name.json",
239
+ # format: "json"
240
+ # extract_job.wait_until_done!
241
+ # extract_job.done? #=> true
242
+ def wait_until_done!
243
+ backoff = ->(retries) { sleep 2 * retries + 5 }
244
+ retries = 0
245
+ until done?
246
+ backoff.call retries
247
+ retries += 1
248
+ reload!
249
+ end
250
+ end
251
+
252
+ ##
253
+ # @private New Job from a Google API Client object.
254
+ def self.from_gapi gapi, conn
255
+ klass = klass_for gapi
256
+ klass.new.tap do |f|
257
+ f.gapi = gapi
258
+ f.service = conn
259
+ end
260
+ end
261
+
262
+ protected
263
+
264
+ ##
265
+ # Raise an error unless an active connection is available.
266
+ def ensure_service!
267
+ fail "Must have active connection" unless service
268
+ end
269
+
270
+ ##
271
+ # Get the subclass for a job type
272
+ def self.klass_for gapi
273
+ if gapi.configuration.copy
274
+ return CopyJob
275
+ elsif gapi.configuration.extract
276
+ return ExtractJob
277
+ elsif gapi.configuration.load
278
+ return LoadJob
279
+ elsif gapi.configuration.query
280
+ return QueryJob
281
+ end
282
+ Job
283
+ end
284
+
285
+ def retrieve_table project_id, dataset_id, table_id
286
+ ensure_service!
287
+ gapi = service.get_project_table project_id, dataset_id, table_id
288
+ Table.from_gapi gapi, service
289
+ rescue Google::Cloud::NotFoundError
290
+ nil
291
+ end
292
+ end
293
+ end
294
+ end
295
+ end
296
+
297
+ # We need Job to be defined before loading these.
298
+ require "google/cloud/bigquery/copy_job"
299
+ require "google/cloud/bigquery/extract_job"
300
+ require "google/cloud/bigquery/load_job"
301
+ require "google/cloud/bigquery/query_job"