google-cloud-bigquery 0.20.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,171 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "delegate"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ class Dataset
22
+ ##
23
+ # Dataset::List is a special case Array with additional values.
24
+ class List < DelegateClass(::Array)
25
+ ##
26
+ # If not empty, indicates that there are more records that match
27
+ # the request and this value should be passed to continue.
28
+ attr_accessor :token
29
+
30
+ # A hash of this page of results.
31
+ attr_accessor :etag
32
+
33
+ ##
34
+ # @private Create a new Dataset::List with an array of datasets.
35
+ def initialize arr = []
36
+ super arr
37
+ end
38
+
39
+ ##
40
+ # Whether there is a next page of datasets.
41
+ #
42
+ # @return [Boolean]
43
+ #
44
+ # @example
45
+ # require "google/cloud"
46
+ #
47
+ # gcloud = Google::Cloud.new
48
+ # bigquery = gcloud.bigquery
49
+ #
50
+ # datasets = bigquery.datasets
51
+ # if datasets.next?
52
+ # next_datasets = datasets.next
53
+ # end
54
+ def next?
55
+ !token.nil?
56
+ end
57
+
58
+ ##
59
+ # Retrieve the next page of datasets.
60
+ #
61
+ # @return [Dataset::List]
62
+ #
63
+ # @example
64
+ # require "google/cloud"
65
+ #
66
+ # gcloud = Google::Cloud.new
67
+ # bigquery = gcloud.bigquery
68
+ #
69
+ # datasets = bigquery.datasets
70
+ # if datasets.next?
71
+ # next_datasets = datasets.next
72
+ # end
73
+ def next
74
+ return nil unless next?
75
+ ensure_service!
76
+ options = { all: @hidden, token: token, max: @max }
77
+ gapi = @service.list_datasets options
78
+ self.class.from_gapi gapi, @service, @hidden, @max
79
+ end
80
+
81
+ ##
82
+ # Retrieves all datasets by repeatedly loading {#next} until {#next?}
83
+ # returns `false`. Calls the given block once for each dataset, which
84
+ # is passed as the parameter.
85
+ #
86
+ # An Enumerator is returned if no block is given.
87
+ #
88
+ # This method may make several API calls until all datasets are
89
+ # retrieved. Be sure to use as narrow a search criteria as possible.
90
+ # Please use with caution.
91
+ #
92
+ # @param [Integer] request_limit The upper limit of API requests to
93
+ # make to load all datasets. Default is no limit.
94
+ # @yield [dataset] The block for accessing each dataset.
95
+ # @yieldparam [Dataset] dataset The dataset object.
96
+ #
97
+ # @return [Enumerator]
98
+ #
99
+ # @example Iterating each result by passing a block:
100
+ # require "google/cloud"
101
+ #
102
+ # gcloud = Google::Cloud.new
103
+ # bigquery = gcloud.bigquery
104
+ #
105
+ # bigquery.datasets.all do |dataset|
106
+ # puts dataset.name
107
+ # end
108
+ #
109
+ # @example Using the enumerator by not passing a block:
110
+ # require "google/cloud"
111
+ #
112
+ # gcloud = Google::Cloud.new
113
+ # bigquery = gcloud.bigquery
114
+ #
115
+ # all_names = bigquery.datasets.all.map do |dataset|
116
+ # dataset.name
117
+ # end
118
+ #
119
+ # @example Limit the number of API calls made:
120
+ # require "google/cloud"
121
+ #
122
+ # gcloud = Google::Cloud.new
123
+ # bigquery = gcloud.bigquery
124
+ #
125
+ # bigquery.datasets.all(request_limit: 10) do |dataset|
126
+ # puts dataset.name
127
+ # end
128
+ #
129
+ def all request_limit: nil
130
+ request_limit = request_limit.to_i if request_limit
131
+ unless block_given?
132
+ return enum_for(:all, request_limit: request_limit)
133
+ end
134
+ results = self
135
+ loop do
136
+ results.each { |r| yield r }
137
+ if request_limit
138
+ request_limit -= 1
139
+ break if request_limit < 0
140
+ end
141
+ break unless results.next?
142
+ results = results.next
143
+ end
144
+ end
145
+
146
+ ##
147
+ # @private New Dataset::List from a response object.
148
+ def self.from_gapi gapi_list, service, hidden = nil, max = nil
149
+ datasets = List.new(Array(gapi_list.datasets).map do |gapi_object|
150
+ Dataset.from_gapi gapi_object, service
151
+ end)
152
+ datasets.instance_variable_set :@token, gapi_list.next_page_token
153
+ datasets.instance_variable_set :@etag, gapi_list.etag
154
+ datasets.instance_variable_set :@service, service
155
+ datasets.instance_variable_set :@hidden, hidden
156
+ datasets.instance_variable_set :@max, max
157
+ datasets
158
+ end
159
+
160
+ protected
161
+
162
+ ##
163
+ # Raise an error unless an active service is available.
164
+ def ensure_service!
165
+ fail "Must have active connection" unless @service
166
+ end
167
+ end
168
+ end
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,120 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ module Google
17
+ module Cloud
18
+ module Bigquery
19
+ ##
20
+ # # ExtractJob
21
+ #
22
+ # A {Job} subclass representing an export operation that may be performed
23
+ # on a {Table}. A ExtractJob instance is created when you call
24
+ # {Table#extract}.
25
+ #
26
+ # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
27
+ # Exporting Data From BigQuery
28
+ # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
29
+ # reference
30
+ #
31
+ class ExtractJob < Job
32
+ ##
33
+ # The URI or URIs representing the Google Cloud Storage files to which
34
+ # the data is exported.
35
+ def destinations
36
+ Array @gapi.configuration.extract.destination_uris
37
+ end
38
+
39
+ ##
40
+ # The table from which the data is exported. This is the table upon
41
+ # which {Table#extract} was called. Returns a {Table} instance.
42
+ def source
43
+ table = @gapi.configuration.extract.source_table
44
+ return nil unless table
45
+ retrieve_table table.project_id,
46
+ table.dataset_id,
47
+ table.table_id
48
+ end
49
+
50
+ ##
51
+ # Checks if the export operation compresses the data using gzip. The
52
+ # default is `false`.
53
+ def compression?
54
+ val = @gapi.configuration.extract.compression
55
+ val == "GZIP"
56
+ end
57
+
58
+ ##
59
+ # Checks if the destination format for the data is [newline-delimited
60
+ # JSON](http://jsonlines.org/). The default is `false`.
61
+ def json?
62
+ val = @gapi.configuration.extract.destination_format
63
+ val == "NEWLINE_DELIMITED_JSON"
64
+ end
65
+
66
+ ##
67
+ # Checks if the destination format for the data is CSV. Tables with
68
+ # nested or repeated fields cannot be exported as CSV. The default is
69
+ # `true`.
70
+ def csv?
71
+ val = @gapi.configuration.extract.destination_format
72
+ return true if val.nil?
73
+ val == "CSV"
74
+ end
75
+
76
+ ##
77
+ # Checks if the destination format for the data is
78
+ # [Avro](http://avro.apache.org/). The default is `false`.
79
+ def avro?
80
+ val = @gapi.configuration.extract.destination_format
81
+ val == "AVRO"
82
+ end
83
+
84
+ ##
85
+ # The symbol the operation uses to delimit fields in the exported data.
86
+ # The default is a comma (,).
87
+ def delimiter
88
+ val = @gapi.configuration.extract.field_delimiter
89
+ val = "," if val.nil?
90
+ val
91
+ end
92
+
93
+ ##
94
+ # Checks if the exported data contains a header row. The default is
95
+ # `true`.
96
+ def print_header?
97
+ val = @gapi.configuration.extract.print_header
98
+ val = true if val.nil?
99
+ val
100
+ end
101
+
102
+ ##
103
+ # The count of files per destination URI or URI pattern specified in
104
+ # {#destinations}. Returns an Array of values in the same order as the
105
+ # URI patterns.
106
+ def destinations_file_counts
107
+ Array @gapi.statistics.extract.destination_uri_file_counts
108
+ end
109
+
110
+ ##
111
+ # The count of files per destination URI or URI pattern specified in
112
+ # {#destinations}. Returns a Hash with the URI patterns as keys and the
113
+ # counts as values.
114
+ def destinations_counts
115
+ Hash[destinations.zip destinations_file_counts]
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,83 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "json"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ ##
22
+ # InsertResponse
23
+ class InsertResponse
24
+ # @private
25
+ def initialize rows, gapi
26
+ @rows = rows
27
+ @gapi = gapi
28
+ end
29
+
30
+ def success?
31
+ error_count.zero?
32
+ end
33
+
34
+ def insert_count
35
+ @rows.count - error_count
36
+ end
37
+
38
+ def error_count
39
+ Array(@gapi.insert_errors).count
40
+ end
41
+
42
+ def insert_errors
43
+ Array(@gapi.insert_errors).map do |ie|
44
+ row = @rows[ie.index]
45
+ errors = ie.errors.map { |e| JSON.parse e.to_json }
46
+ InsertError.new row, errors
47
+ end
48
+ end
49
+
50
+ def error_rows
51
+ Array(@gapi.insert_errors).map do |ie|
52
+ @rows[ie.index]
53
+ end
54
+ end
55
+
56
+ def errors_for row
57
+ ie = insert_errors.detect { |e| e.row == row }
58
+ return ie.errors if ie
59
+ []
60
+ end
61
+
62
+ # @private New InsertResponse from the inserted rows and a
63
+ # Google::Apis::BigqueryV2::InsertAllTableDataResponse object.
64
+ def self.from_gapi rows, gapi
65
+ new rows, gapi
66
+ end
67
+
68
+ ##
69
+ # InsertError
70
+ class InsertError
71
+ attr_reader :row
72
+ attr_reader :errors
73
+
74
+ # @private
75
+ def initialize row, errors
76
+ @row = row
77
+ @errors = errors
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,301 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/errors"
17
+ require "google/cloud/bigquery/service"
18
+ require "google/cloud/bigquery/query_data"
19
+ require "google/cloud/bigquery/job/list"
20
+ require "json"
21
+
22
+ module Google
23
+ module Cloud
24
+ module Bigquery
25
+ ##
26
+ # # Job
27
+ #
28
+ # Represents a generic Job that may be performed on a {Table}.
29
+ #
30
+ # The subclasses of Job represent the specific BigQuery job types:
31
+ # {CopyJob}, {ExtractJob}, {LoadJob}, and {QueryJob}.
32
+ #
33
+ # A job instance is created when you call {Project#query_job},
34
+ # {Dataset#query_job}, {Table#copy}, {Table#extract}, {Table#load}, or
35
+ # {View#data}.
36
+ #
37
+ # @see https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects
38
+ # Managing Jobs, Datasets, and Projects
39
+ # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
40
+ # reference
41
+ #
42
+ # @example
43
+ # require "google/cloud"
44
+ #
45
+ # gcloud = Google::Cloud.new
46
+ # bigquery = gcloud.bigquery
47
+ #
48
+ # q = "SELECT COUNT(word) as count FROM publicdata:samples.shakespeare"
49
+ # job = bigquery.query_job q
50
+ #
51
+ # job.wait_until_done!
52
+ #
53
+ # if job.failed?
54
+ # puts job.error
55
+ # else
56
+ # puts job.query_results.first
57
+ # end
58
+ #
59
+ class Job
60
+ ##
61
+ # @private The Service object.
62
+ attr_accessor :service
63
+
64
+ ##
65
+ # @private The Google API Client object.
66
+ attr_accessor :gapi
67
+
68
+ ##
69
+ # @private Create an empty Job object.
70
+ def initialize
71
+ @service = nil
72
+ @gapi = {}
73
+ end
74
+
75
+ ##
76
+ # The ID of the job.
77
+ def job_id
78
+ @gapi.job_reference.job_id
79
+ end
80
+
81
+ ##
82
+ # The ID of the project containing the job.
83
+ def project_id
84
+ @gapi.job_reference.project_id
85
+ end
86
+
87
+ ##
88
+ # The current state of the job. The possible values are `PENDING`,
89
+ # `RUNNING`, and `DONE`. A `DONE` state does not mean that the job
90
+ # completed successfully. Use {#failed?} to discover if an error
91
+ # occurred or if the job was successful.
92
+ def state
93
+ return nil if @gapi.status.nil?
94
+ @gapi.status.state
95
+ end
96
+
97
+ ##
98
+ # Checks if the job's state is `RUNNING`.
99
+ def running?
100
+ return false if state.nil?
101
+ "running".casecmp(state).zero?
102
+ end
103
+
104
+ ##
105
+ # Checks if the job's state is `PENDING`.
106
+ def pending?
107
+ return false if state.nil?
108
+ "pending".casecmp(state).zero?
109
+ end
110
+
111
+ ##
112
+ # Checks if the job's state is `DONE`. When `true`, the job has stopped
113
+ # running. However, a `DONE` state does not mean that the job completed
114
+ # successfully. Use {#failed?} to detect if an error occurred or if the
115
+ # job was successful.
116
+ def done?
117
+ return false if state.nil?
118
+ "done".casecmp(state).zero?
119
+ end
120
+
121
+ ##
122
+ # Checks if an error is present.
123
+ def failed?
124
+ !error.nil?
125
+ end
126
+
127
+ ##
128
+ # The time when the job was created.
129
+ def created_at
130
+ Time.at(Integer(@gapi.statistics.creation_time) / 1000.0)
131
+ rescue
132
+ nil
133
+ end
134
+
135
+ ##
136
+ # The time when the job was started.
137
+ # This field is present after the job's state changes from `PENDING`
138
+ # to either `RUNNING` or `DONE`.
139
+ def started_at
140
+ Time.at(Integer(@gapi.statistics.start_time) / 1000.0)
141
+ rescue
142
+ nil
143
+ end
144
+
145
+ ##
146
+ # The time when the job ended.
147
+ # This field is present when the job's state is `DONE`.
148
+ def ended_at
149
+ Time.at(Integer(@gapi.statistics.end_time) / 1000.0)
150
+ rescue
151
+ nil
152
+ end
153
+
154
+ ##
155
+ # The configuration for the job. Returns a hash.
156
+ #
157
+ # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
158
+ # reference
159
+ def configuration
160
+ JSON.parse @gapi.configuration.to_json
161
+ end
162
+ alias_method :config, :configuration
163
+
164
+ ##
165
+ # The statistics for the job. Returns a hash.
166
+ #
167
+ # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
168
+ # reference
169
+ def statistics
170
+ JSON.parse @gapi.statistics.to_json
171
+ end
172
+ alias_method :stats, :statistics
173
+
174
+ ##
175
+ # The job's status. Returns a hash. The values contained in the hash are
176
+ # also exposed by {#state}, {#error}, and {#errors}.
177
+ def status
178
+ JSON.parse @gapi.status.to_json
179
+ end
180
+
181
+ ##
182
+ # The last error for the job, if any errors have occurred. Returns a
183
+ # hash.
184
+ #
185
+ # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
186
+ # reference
187
+ #
188
+ # @return [Hash] Returns a hash containing `reason` and `message` keys:
189
+ #
190
+ # {
191
+ # "reason"=>"notFound",
192
+ # "message"=>"Not found: Table publicdata:samples.BAD_ID"
193
+ # }
194
+ #
195
+ def error
196
+ return nil if @gapi.status.nil?
197
+ return nil if @gapi.status.error_result.nil?
198
+ JSON.parse @gapi.status.error_result.to_json
199
+ end
200
+
201
+ ##
202
+ # The errors for the job, if any errors have occurred. Returns an array
203
+ # of hash objects. See {#error}.
204
+ def errors
205
+ return [] if @gapi.status.nil?
206
+ Array(@gapi.status.errors).map { |e| JSON.parse e.to_json }
207
+ end
208
+
209
+ ##
210
+ # Created a new job with the current configuration.
211
+ def rerun!
212
+ ensure_service!
213
+ gapi = service.insert_job @gapi.configuration
214
+ Job.from_gapi gapi, service
215
+ end
216
+
217
+ ##
218
+ # Reloads the job with current data from the BigQuery service.
219
+ def reload!
220
+ ensure_service!
221
+ gapi = service.get_job job_id
222
+ @gapi = gapi
223
+ end
224
+ alias_method :refresh!, :reload!
225
+
226
+ ##
227
+ # Refreshes the job until the job is `DONE`.
228
+ # The delay between refreshes will incrementally increase.
229
+ #
230
+ # @example
231
+ # require "google/cloud"
232
+ #
233
+ # gcloud = Google::Cloud.new
234
+ # bigquery = gcloud.bigquery
235
+ # dataset = bigquery.dataset "my_dataset"
236
+ # table = dataset.table "my_table"
237
+ #
238
+ # extract_job = table.extract "gs://my-bucket/file-name.json",
239
+ # format: "json"
240
+ # extract_job.wait_until_done!
241
+ # extract_job.done? #=> true
242
+ def wait_until_done!
243
+ backoff = ->(retries) { sleep 2 * retries + 5 }
244
+ retries = 0
245
+ until done?
246
+ backoff.call retries
247
+ retries += 1
248
+ reload!
249
+ end
250
+ end
251
+
252
+ ##
253
+ # @private New Job from a Google API Client object.
254
+ def self.from_gapi gapi, conn
255
+ klass = klass_for gapi
256
+ klass.new.tap do |f|
257
+ f.gapi = gapi
258
+ f.service = conn
259
+ end
260
+ end
261
+
262
+ protected
263
+
264
+ ##
265
+ # Raise an error unless an active connection is available.
266
+ def ensure_service!
267
+ fail "Must have active connection" unless service
268
+ end
269
+
270
+ ##
271
+ # Get the subclass for a job type
272
+ def self.klass_for gapi
273
+ if gapi.configuration.copy
274
+ return CopyJob
275
+ elsif gapi.configuration.extract
276
+ return ExtractJob
277
+ elsif gapi.configuration.load
278
+ return LoadJob
279
+ elsif gapi.configuration.query
280
+ return QueryJob
281
+ end
282
+ Job
283
+ end
284
+
285
+ def retrieve_table project_id, dataset_id, table_id
286
+ ensure_service!
287
+ gapi = service.get_project_table project_id, dataset_id, table_id
288
+ Table.from_gapi gapi, service
289
+ rescue Google::Cloud::NotFoundError
290
+ nil
291
+ end
292
+ end
293
+ end
294
+ end
295
+ end
296
+
297
+ # We need Job to be defined before loading these.
298
+ require "google/cloud/bigquery/copy_job"
299
+ require "google/cloud/bigquery/extract_job"
300
+ require "google/cloud/bigquery/load_job"
301
+ require "google/cloud/bigquery/query_job"