google-cloud-bigquery 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,174 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "delegate"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ class Job
22
+ ##
23
+ # Job::List is a special case Array with additional values.
24
+ class List < DelegateClass(::Array)
25
+ ##
26
+ # If not empty, indicates that there are more records that match
27
+ # the request and this value should be passed to continue.
28
+ attr_accessor :token
29
+
30
+ # A hash of this page of results.
31
+ attr_accessor :etag
32
+
33
+ ##
34
+ # @private Create a new Job::List with an array of jobs.
35
+ def initialize arr = []
36
+ super arr
37
+ end
38
+
39
+ ##
40
+ # Whether there is a next page of jobs.
41
+ #
42
+ # @return [Boolean]
43
+ #
44
+ # @example
45
+ # require "google/cloud"
46
+ #
47
+ # gcloud = Google::Cloud.new
48
+ # bigquery = gcloud.bigquery
49
+ #
50
+ # jobs = bigquery.jobs
51
+ # if jobs.next?
52
+ # next_jobs = jobs.next
53
+ # end
54
+ def next?
55
+ !token.nil?
56
+ end
57
+
58
+ ##
59
+ # Retrieve the next page of jobs.
60
+ #
61
+ # @return [Job::List]
62
+ #
63
+ # @example
64
+ # require "google/cloud"
65
+ #
66
+ # gcloud = Google::Cloud.new
67
+ # bigquery = gcloud.bigquery
68
+ #
69
+ # jobs = bigquery.jobs
70
+ # if jobs.next?
71
+ # next_jobs = jobs.next
72
+ # end
73
+ def next
74
+ return nil unless next?
75
+ ensure_service!
76
+ options = { all: @hidden, token: token, max: @max, filter: @filter }
77
+ gapi = @service.list_jobs options
78
+ self.class.from_gapi gapi, @service, @hidden, @max, @filter
79
+ end
80
+
81
+ ##
82
+ # Retrieves all jobs by repeatedly loading {#next} until {#next?}
83
+ # returns `false`. Calls the given block once for each job, which is
84
+ # passed as the parameter.
85
+ #
86
+ # An Enumerator is returned if no block is given.
87
+ #
88
+ # This method may make several API calls until all jobs are retrieved.
89
+ # Be sure to use as narrow a search criteria as possible. Please use
90
+ # with caution.
91
+ #
92
+ # @param [Integer] request_limit The upper limit of API requests to
93
+ # make to load all jobs. Default is no limit.
94
+ # @yield [job] The block for accessing each job.
95
+ # @yieldparam [Job] job The job object.
96
+ #
97
+ # @return [Enumerator]
98
+ #
99
+ # @example Iterating each job by passing a block:
100
+ # require "google/cloud"
101
+ #
102
+ # gcloud = Google::Cloud.new
103
+ # bigquery = gcloud.bigquery
104
+ #
105
+ # bigquery.jobs.all do |job|
106
+ # puts job.state
107
+ # end
108
+ #
109
+ # @example Using the enumerator by not passing a block:
110
+ # require "google/cloud"
111
+ #
112
+ # gcloud = Google::Cloud.new
113
+ # bigquery = gcloud.bigquery
114
+ #
115
+ # all_states = bigquery.jobs.all.map do |job|
116
+ # job.state
117
+ # end
118
+ #
119
+ # @example Limit the number of API calls made:
120
+ # require "google/cloud"
121
+ #
122
+ # gcloud = Google::Cloud.new
123
+ # bigquery = gcloud.bigquery
124
+ #
125
+ # bigquery.jobs.all(request_limit: 10) do |job|
126
+ # puts job.state
127
+ # end
128
+ #
129
+ def all request_limit: nil
130
+ request_limit = request_limit.to_i if request_limit
131
+ unless block_given?
132
+ return enum_for(:all, request_limit: request_limit)
133
+ end
134
+ results = self
135
+ loop do
136
+ results.each { |r| yield r }
137
+ if request_limit
138
+ request_limit -= 1
139
+ break if request_limit < 0
140
+ end
141
+ break unless results.next?
142
+ results = results.next
143
+ end
144
+ end
145
+
146
+ ##
147
+ # @private New Job::List from a Google API Client
148
+ # Google::Apis::BigqueryV2::JobList object.
149
+ def self.from_gapi gapi_list, service, hidden = nil, max = nil,
150
+ filter = nil
151
+ jobs = List.new(Array(gapi_list.jobs).map do |gapi_object|
152
+ Job.from_gapi gapi_object, service
153
+ end)
154
+ jobs.instance_variable_set :@token, gapi_list.next_page_token
155
+ jobs.instance_variable_set :@etag, gapi_list.etag
156
+ jobs.instance_variable_set :@service, service
157
+ jobs.instance_variable_set :@hidden, hidden
158
+ jobs.instance_variable_set :@max, max
159
+ jobs.instance_variable_set :@filter, filter
160
+ jobs
161
+ end
162
+
163
+ protected
164
+
165
+ ##
166
+ # Raise an error unless an active service is available.
167
+ def ensure_service!
168
+ fail "Must have active connection" unless @service
169
+ end
170
+ end
171
+ end
172
+ end
173
+ end
174
+ end
@@ -0,0 +1,203 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/bigquery/service"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ ##
22
+ # # LoadJob
23
+ #
24
+ # A {Job} subclass representing a load operation that may be performed
25
+ # on a {Table}. A LoadJob instance is created when you call {Table#load}.
26
+ #
27
+ # @see https://cloud.google.com/bigquery/loading-data-into-bigquery
28
+ # Loading Data Into BigQuery
29
+ # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
30
+ # reference
31
+ #
32
+ class LoadJob < Job
33
+ ##
34
+ # The URI or URIs representing the Google Cloud Storage files from which
35
+ # the operation loads data.
36
+ def sources
37
+ Array @gapi.configuration.load.source_uris
38
+ end
39
+
40
+ ##
41
+ # The table into which the operation loads data. This is the table on
42
+ # which {Table#load} was invoked. Returns a {Table} instance.
43
+ def destination
44
+ table = @gapi.configuration.load.destination_table
45
+ return nil unless table
46
+ retrieve_table table.project_id,
47
+ table.dataset_id,
48
+ table.table_id
49
+ end
50
+
51
+ ##
52
+ # The delimiter used between fields in the source data. The default is a
53
+ # comma (,).
54
+ def delimiter
55
+ @gapi.configuration.load.field_delimiter || ","
56
+ end
57
+
58
+ ##
59
+ # The number of header rows at the top of a CSV file to skip. The
60
+ # default value is `0`.
61
+ def skip_leading_rows
62
+ @gapi.configuration.load.skip_leading_rows || 0
63
+ end
64
+
65
+ ##
66
+ # Checks if the character encoding of the data is UTF-8. This is the
67
+ # default.
68
+ def utf8?
69
+ val = @gapi.configuration.load.encoding
70
+ return true if val.nil?
71
+ val == "UTF-8"
72
+ end
73
+
74
+ ##
75
+ # Checks if the character encoding of the data is ISO-8859-1.
76
+ def iso8859_1?
77
+ val = @gapi.configuration.load.encoding
78
+ val == "ISO-8859-1"
79
+ end
80
+
81
+ ##
82
+ # The value that is used to quote data sections in a CSV file. The
83
+ # default value is a double-quote (`"`). If your data does not contain
84
+ # quoted sections, the value should be an empty string. If your data
85
+ # contains quoted newline characters, {#quoted_newlines?} should return
86
+ # `true`.
87
+ def quote
88
+ val = @gapi.configuration.load.quote
89
+ val = "\"" if val.nil?
90
+ val
91
+ end
92
+
93
+ ##
94
+ # The maximum number of bad records that the load operation can ignore.
95
+ # If the number of bad records exceeds this value, an error is returned.
96
+ # The default value is `0`, which requires that all records be valid.
97
+ def max_bad_records
98
+ val = @gapi.configuration.load.max_bad_records
99
+ val = 0 if val.nil?
100
+ val
101
+ end
102
+
103
+ ##
104
+ # Checks if quoted data sections may contain newline characters in a CSV
105
+ # file. The default is `false`.
106
+ def quoted_newlines?
107
+ val = @gapi.configuration.load.allow_quoted_newlines
108
+ val = true if val.nil?
109
+ val
110
+ end
111
+
112
+ ##
113
+ # Checks if the format of the source data is [newline-delimited
114
+ # JSON](http://jsonlines.org/). The default is `false`.
115
+ def json?
116
+ val = @gapi.configuration.load.source_format
117
+ val == "NEWLINE_DELIMITED_JSON"
118
+ end
119
+
120
+ ##
121
+ # Checks if the format of the source data is CSV. The default is `true`.
122
+ def csv?
123
+ val = @gapi.configuration.load.source_format
124
+ return true if val.nil?
125
+ val == "CSV"
126
+ end
127
+
128
+ ##
129
+ # Checks if the source data is a Google Cloud Datastore backup.
130
+ def backup?
131
+ val = @gapi.configuration.load.source_format
132
+ val == "DATASTORE_BACKUP"
133
+ end
134
+
135
+ ##
136
+ # Checks if the load operation accepts rows that are missing trailing
137
+ # optional columns. The missing values are treated as nulls. If `false`,
138
+ # records with missing trailing columns are treated as bad records, and
139
+ # if there are too many bad records, an error is returned. The default
140
+ # value is `false`. Only applicable to CSV, ignored for other formats.
141
+ def allow_jagged_rows?
142
+ val = @gapi.configuration.load.allow_jagged_rows
143
+ val = false if val.nil?
144
+ val
145
+ end
146
+
147
+ ##
148
+ # Checks if the load operation allows extra values that are not
149
+ # represented in the table schema. If `true`, the extra values are
150
+ # ignored. If `false`, records with extra columns are treated as bad
151
+ # records, and if there are too many bad records, an invalid error is
152
+ # returned. The default is `false`.
153
+ def ignore_unknown_values?
154
+ val = @gapi.configuration.load.ignore_unknown_values
155
+ val = false if val.nil?
156
+ val
157
+ end
158
+
159
+ ##
160
+ # The schema for the data. Returns a hash. Can be empty if the table has
161
+ # already has the correct schema (see {Table#schema}), or if the schema
162
+ # can be inferred from the loaded data.
163
+ def schema
164
+ Schema.from_gapi(@gapi.configuration.load.schema).freeze
165
+ end
166
+
167
+ ##
168
+ # The number of source files.
169
+ def input_files
170
+ Integer @gapi.statistics.load.input_files
171
+ rescue
172
+ nil
173
+ end
174
+
175
+ ##
176
+ # The number of bytes of source data.
177
+ def input_file_bytes
178
+ Integer @gapi.statistics.load.input_file_bytes
179
+ rescue
180
+ nil
181
+ end
182
+
183
+ ##
184
+ # The number of rows that have been loaded into the table. While an
185
+ # import job is in the running state, this value may change.
186
+ def output_rows
187
+ Integer @gapi.statistics.load.output_rows
188
+ rescue
189
+ nil
190
+ end
191
+
192
+ ##
193
+ # The number of bytes that have been loaded into the table. While an
194
+ # import job is in the running state, this value may change.
195
+ def output_bytes
196
+ Integer @gapi.statistics.load.output_bytes
197
+ rescue
198
+ nil
199
+ end
200
+ end
201
+ end
202
+ end
203
+ end
@@ -0,0 +1,481 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/core/gce"
17
+ require "google/cloud/errors"
18
+ require "google/cloud/bigquery/service"
19
+ require "google/cloud/bigquery/credentials"
20
+ require "google/cloud/bigquery/dataset"
21
+ require "google/cloud/bigquery/job"
22
+ require "google/cloud/bigquery/query_data"
23
+
24
+ module Google
25
+ module Cloud
26
+ module Bigquery
27
+ ##
28
+ # # Project
29
+ #
30
+ # Projects are top-level containers in Google Cloud Platform. They store
31
+ # information about billing and authorized users, and they contain
32
+ # BigQuery data. Each project has a friendly name and a unique ID.
33
+ #
34
+ # Google::Cloud::Bigquery::Project is the main object for interacting with
35
+ # Google BigQuery. {Google::Cloud::Bigquery::Dataset} objects are created,
36
+ # accessed, and deleted by Google::Cloud::Bigquery::Project.
37
+ #
38
+ # See {Google::Cloud#bigquery}
39
+ #
40
+ # @example
41
+ # require "google/cloud"
42
+ #
43
+ # gcloud = Google::Cloud.new
44
+ # bigquery = gcloud.bigquery
45
+ # dataset = bigquery.dataset "my_dataset"
46
+ # table = dataset.table "my_table"
47
+ #
48
+ class Project
49
+ ##
50
+ # @private The Service object.
51
+ attr_accessor :service
52
+
53
+ ##
54
+ # Creates a new Service instance.
55
+ #
56
+ # See {Google::Cloud.bigquery}
57
+ def initialize service
58
+ @service = service
59
+ end
60
+
61
+ ##
62
+ # The BigQuery project connected to.
63
+ #
64
+ # @example
65
+ # require "google/cloud"
66
+ #
67
+ # gcloud = Google::Cloud.new "my-todo-project",
68
+ # "/path/to/keyfile.json"
69
+ # bigquery = gcloud.bigquery
70
+ #
71
+ # bigquery.project #=> "my-todo-project"
72
+ #
73
+ def project
74
+ service.project
75
+ end
76
+
77
+ ##
78
+ # @private Default project.
79
+ def self.default_project
80
+ ENV["BIGQUERY_PROJECT"] ||
81
+ ENV["GOOGLE_CLOUD_PROJECT"] ||
82
+ ENV["GCLOUD_PROJECT"] ||
83
+ Google::Cloud::Core::GCE.project_id
84
+ end
85
+
86
+ ##
87
+ # Queries data using the [asynchronous
88
+ # method](https://cloud.google.com/bigquery/querying-data).
89
+ #
90
+ # @param [String] query A query string, following the BigQuery [query
91
+ # syntax](https://cloud.google.com/bigquery/query-reference), of the
92
+ # query to execute. Example: "SELECT count(f1) FROM
93
+ # [myProjectId:myDatasetId.myTableId]".
94
+ # @param [String] priority Specifies a priority for the query. Possible
95
+ # values include `INTERACTIVE` and `BATCH`. The default value is
96
+ # `INTERACTIVE`.
97
+ # @param [Boolean] cache Whether to look for the result in the query
98
+ # cache. The query cache is a best-effort cache that will be flushed
99
+ # whenever tables in the query are modified. The default value is
100
+ # true. For more information, see [query
101
+ # caching](https://developers.google.com/bigquery/querying-data).
102
+ # @param [Table] table The destination table where the query results
103
+ # should be stored. If not present, a new table will be created to
104
+ # store the results.
105
+ # @param [String] create Specifies whether the job is allowed to create
106
+ # new tables.
107
+ #
108
+ # The following values are supported:
109
+ #
110
+ # * `needed` - Create the table if it does not exist.
111
+ # * `never` - The table must already exist. A 'notFound' error is
112
+ # raised if the table does not exist.
113
+ # @param [String] write Specifies the action that occurs if the
114
+ # destination table already exists.
115
+ #
116
+ # The following values are supported:
117
+ #
118
+ # * `truncate` - BigQuery overwrites the table data.
119
+ # * `append` - BigQuery appends the data to the table.
120
+ # * `empty` - A 'duplicate' error is returned in the job result if the
121
+ # table exists and contains data.
122
+ # @param [Boolean] large_results If `true`, allows the query to produce
123
+ # arbitrarily large result tables at a slight cost in performance.
124
+ # Requires `table` parameter to be set.
125
+ # @param [Boolean] flatten Flattens all nested and repeated fields in
126
+ # the query results. The default value is `true`. `large_results`
127
+ # parameter must be `true` if this is set to `false`.
128
+ # @param [Dataset, String] dataset Specifies the default dataset to use
129
+ # for unqualified table names in the query.
130
+ #
131
+ # @return [Google::Cloud::Bigquery::QueryJob]
132
+ #
133
+ # @example
134
+ # require "google/cloud"
135
+ #
136
+ # gcloud = Google::Cloud.new
137
+ # bigquery = gcloud.bigquery
138
+ #
139
+ # job = bigquery.query_job "SELECT name FROM " \
140
+ # "[my_proj:my_data.my_table]"
141
+ #
142
+ # job.wait_until_done!
143
+ # if !job.failed?
144
+ # job.query_results.each do |row|
145
+ # puts row["name"]
146
+ # end
147
+ # end
148
+ #
149
+ def query_job query, priority: "INTERACTIVE", cache: true, table: nil,
150
+ create: nil, write: nil, large_results: nil, flatten: nil,
151
+ dataset: nil
152
+ ensure_service!
153
+ options = { priority: priority, cache: cache, table: table,
154
+ create: create, write: write,
155
+ large_results: large_results, flatten: flatten,
156
+ dataset: dataset }
157
+ gapi = service.query_job query, options
158
+ Job.from_gapi gapi, service
159
+ end
160
+
161
+ ##
162
+ # Queries data using the [synchronous
163
+ # method](https://cloud.google.com/bigquery/querying-data).
164
+ #
165
+ # @param [String] query A query string, following the BigQuery [query
166
+ # syntax](https://cloud.google.com/bigquery/query-reference), of the
167
+ # query to execute. Example: "SELECT count(f1) FROM
168
+ # [myProjectId:myDatasetId.myTableId]".
169
+ # @param [Integer] max The maximum number of rows of data to return per
170
+ # page of results. Setting this flag to a small value such as 1000 and
171
+ # then paging through results might improve reliability when the query
172
+ # result set is large. In addition to this limit, responses are also
173
+ # limited to 10 MB. By default, there is no maximum row count, and
174
+ # only the byte limit applies.
175
+ # @param [Integer] timeout How long to wait for the query to complete,
176
+ # in milliseconds, before the request times out and returns. Note that
177
+ # this is only a timeout for the request, not the query. If the query
178
+ # takes longer to run than the timeout value, the call returns without
179
+ # any results and with QueryData#complete? set to false. The default
180
+ # value is 10000 milliseconds (10 seconds).
181
+ # @param [Boolean] dryrun If set to `true`, BigQuery doesn't run the
182
+ # job. Instead, if the query is valid, BigQuery returns statistics
183
+ # about the job such as how many bytes would be processed. If the
184
+ # query is invalid, an error returns. The default value is `false`.
185
+ # @param [Boolean] cache Whether to look for the result in the query
186
+ # cache. The query cache is a best-effort cache that will be flushed
187
+ # whenever tables in the query are modified. The default value is
188
+ # true. For more information, see [query
189
+ # caching](https://developers.google.com/bigquery/querying-data).
190
+ # @param [String] dataset Specifies the default datasetId and projectId
191
+ # to assume for any unqualified table names in the query. If not set,
192
+ # all table names in the query string must be qualified in the format
193
+ # 'datasetId.tableId'.
194
+ # @param [String] project Specifies the default projectId to assume for
195
+ # any unqualified table names in the query. Only used if `dataset`
196
+ # option is set.
197
+ #
198
+ # @return [Google::Cloud::Bigquery::QueryData]
199
+ #
200
+ # @example
201
+ # require "google/cloud"
202
+ #
203
+ # gcloud = Google::Cloud.new
204
+ # bigquery = gcloud.bigquery
205
+ #
206
+ # data = bigquery.query "SELECT name FROM [my_proj:my_data.my_table]"
207
+ # data.each do |row|
208
+ # puts row["name"]
209
+ # end
210
+ #
211
+ # @example Retrieve all rows: (See {QueryData#all})
212
+ # require "google/cloud"
213
+ #
214
+ # gcloud = Google::Cloud.new
215
+ # bigquery = gcloud.bigquery
216
+ #
217
+ # data = bigquery.query "SELECT name FROM [my_proj:my_data.my_table]"
218
+ # data.all do |row|
219
+ # puts row["name"]
220
+ # end
221
+ #
222
+ def query query, max: nil, timeout: 10000, dryrun: nil, cache: true,
223
+ dataset: nil, project: nil
224
+ ensure_service!
225
+ options = { max: max, timeout: timeout, dryrun: dryrun, cache: cache,
226
+ dataset: dataset, project: project }
227
+ gapi = service.query query, options
228
+ QueryData.from_gapi gapi, service
229
+ end
230
+
231
+ ##
232
+ # Retrieves an existing dataset by ID.
233
+ #
234
+ # @param [String] dataset_id The ID of a dataset.
235
+ #
236
+ # @return [Google::Cloud::Bigquery::Dataset, nil] Returns `nil` if the
237
+ # dataset does not exist.
238
+ #
239
+ # @example
240
+ # require "google/cloud"
241
+ #
242
+ # gcloud = Google::Cloud.new
243
+ # bigquery = gcloud.bigquery
244
+ #
245
+ # dataset = bigquery.dataset "my_dataset"
246
+ # puts dataset.name
247
+ #
248
+ def dataset dataset_id
249
+ ensure_service!
250
+ gapi = service.get_dataset dataset_id
251
+ Dataset.from_gapi gapi, service
252
+ rescue Google::Cloud::NotFoundError
253
+ nil
254
+ end
255
+
256
+ ##
257
+ # Creates a new dataset.
258
+ #
259
+ # @param [String] dataset_id A unique ID for this dataset, without the
260
+ # project name. The ID must contain only letters (a-z, A-Z), numbers
261
+ # (0-9), or underscores (_). The maximum length is 1,024 characters.
262
+ # @param [String] name A descriptive name for the dataset.
263
+ # @param [String] description A user-friendly description of the
264
+ # dataset.
265
+ # @param [Integer] expiration The default lifetime of all tables in the
266
+ # dataset, in milliseconds. The minimum value is 3600000 milliseconds
267
+ # (one hour).
268
+ # @param [String] location The geographic location where the dataset
269
+ # should reside. Possible values include `EU` and `US`. The default
270
+ # value is `US`.
271
+ # @yield [access] a block for setting rules
272
+ # @yieldparam [Dataset::Access] access the object accepting rules
273
+ #
274
+ # @return [Google::Cloud::Bigquery::Dataset]
275
+ #
276
+ # @example
277
+ # require "google/cloud"
278
+ #
279
+ # gcloud = Google::Cloud.new
280
+ # bigquery = gcloud.bigquery
281
+ #
282
+ # dataset = bigquery.create_dataset "my_dataset"
283
+ #
284
+ # @example A name and description can be provided:
285
+ # require "google/cloud"
286
+ #
287
+ # gcloud = Google::Cloud.new
288
+ # bigquery = gcloud.bigquery
289
+ #
290
+ # dataset = bigquery.create_dataset "my_dataset",
291
+ # name: "My Dataset",
292
+ # description: "This is my Dataset"
293
+ #
294
+ # @example Access rules can be provided with the `access` option:
295
+ # require "google/cloud"
296
+ #
297
+ # gcloud = Google::Cloud.new
298
+ # bigquery = gcloud.bigquery
299
+ #
300
+ # dataset = bigquery.create_dataset "my_dataset",
301
+ # access: [{"role"=>"WRITER", "userByEmail"=>"writers@example.com"}]
302
+ #
303
+ # @example Or, configure access with a block: (See {Dataset::Access})
304
+ # require "google/cloud"
305
+ #
306
+ # gcloud = Google::Cloud.new
307
+ # bigquery = gcloud.bigquery
308
+ #
309
+ # dataset = bigquery.create_dataset "my_dataset" do |access|
310
+ # access.add_writer_user "writers@example.com"
311
+ # end
312
+ #
313
+ def create_dataset dataset_id, name: nil, description: nil,
314
+ expiration: nil, location: nil
315
+ ensure_service!
316
+
317
+ new_ds = Google::Apis::BigqueryV2::Dataset.new(
318
+ dataset_reference: Google::Apis::BigqueryV2::DatasetReference.new(
319
+ project_id: project, dataset_id: dataset_id))
320
+
321
+ # Can set location only on creation, no Dataset#location method
322
+ new_ds.update! location: location unless location.nil?
323
+
324
+ updater = Dataset::Updater.new(new_ds).tap do |b|
325
+ b.name = name unless name.nil?
326
+ b.description = description unless description.nil?
327
+ b.default_expiration = expiration unless expiration.nil?
328
+ end
329
+
330
+ if block_given?
331
+ yield updater
332
+ updater.check_for_mutated_access!
333
+ end
334
+
335
+ gapi = service.insert_dataset new_ds
336
+ Dataset.from_gapi gapi, service
337
+ end
338
+
339
+ ##
340
+ # Retrieves the list of datasets belonging to the project.
341
+ #
342
+ # @param [Boolean] all Whether to list all datasets, including hidden
343
+ # ones. The default is `false`.
344
+ # @param [String] token A previously-returned page token representing
345
+ # part of the larger set of results to view.
346
+ # @param [Integer] max Maximum number of datasets to return.
347
+ #
348
+ # @return [Array<Google::Cloud::Bigquery::Dataset>] (See
349
+ # {Google::Cloud::Bigquery::Dataset::List})
350
+ #
351
+ # @example
352
+ # require "google/cloud"
353
+ #
354
+ # gcloud = Google::Cloud.new
355
+ # bigquery = gcloud.bigquery
356
+ #
357
+ # datasets = bigquery.datasets
358
+ # datasets.each do |dataset|
359
+ # puts dataset.name
360
+ # end
361
+ #
362
+ # @example Retrieve hidden datasets with the `all` optional arg:
363
+ # require "google/cloud"
364
+ #
365
+ # gcloud = Google::Cloud.new
366
+ # bigquery = gcloud.bigquery
367
+ #
368
+ # all_datasets = bigquery.datasets all: true
369
+ #
370
+ # @example Retrieve all datasets: (See {Dataset::List#all})
371
+ # require "google/cloud"
372
+ #
373
+ # gcloud = Google::Cloud.new
374
+ # bigquery = gcloud.bigquery
375
+ #
376
+ # datasets = bigquery.datasets
377
+ # datasets.all do |dataset|
378
+ # puts dataset.name
379
+ # end
380
+ #
381
+ def datasets all: nil, token: nil, max: nil
382
+ ensure_service!
383
+ options = { all: all, token: token, max: max }
384
+ gapi = service.list_datasets options
385
+ Dataset::List.from_gapi gapi, service, all, max
386
+ end
387
+
388
+ ##
389
+ # Retrieves an existing job by ID.
390
+ #
391
+ # @param [String] job_id The ID of a job.
392
+ #
393
+ # @return [Google::Cloud::Bigquery::Job, nil] Returns `nil` if the job
394
+ # does not exist.
395
+ #
396
+ # @example
397
+ # require "google/cloud"
398
+ #
399
+ # gcloud = Google::Cloud.new
400
+ # bigquery = gcloud.bigquery
401
+ #
402
+ # job = bigquery.job "my_job"
403
+ #
404
+ def job job_id
405
+ ensure_service!
406
+ gapi = service.get_job job_id
407
+ Job.from_gapi gapi, service
408
+ rescue Google::Cloud::NotFoundError
409
+ nil
410
+ end
411
+
412
+ ##
413
+ # Retrieves the list of jobs belonging to the project.
414
+ #
415
+ # @param [Boolean] all Whether to display jobs owned by all users in the
416
+ # project. The default is `false`.
417
+ # @param [String] token A previously-returned page token representing
418
+ # part of the larger set of results to view.
419
+ # @param [Integer] max Maximum number of jobs to return.
420
+ # @param [String] filter A filter for job state.
421
+ #
422
+ # Acceptable values are:
423
+ #
424
+ # * `done` - Finished jobs
425
+ # * `pending` - Pending jobs
426
+ # * `running` - Running jobs
427
+ #
428
+ # @return [Array<Google::Cloud::Bigquery::Job>] (See
429
+ # {Google::Cloud::Bigquery::Job::List})
430
+ #
431
+ # @example
432
+ # require "google/cloud"
433
+ #
434
+ # gcloud = Google::Cloud.new
435
+ # bigquery = gcloud.bigquery
436
+ #
437
+ # jobs = bigquery.jobs
438
+ # jobs.each do |job|
439
+ # # process job
440
+ # end
441
+ #
442
+ # @example Retrieve only running jobs using the `filter` optional arg:
443
+ # require "google/cloud"
444
+ #
445
+ # gcloud = Google::Cloud.new
446
+ # bigquery = gcloud.bigquery
447
+ #
448
+ # running_jobs = bigquery.jobs filter: "running"
449
+ # running_jobs.each do |job|
450
+ # # process job
451
+ # end
452
+ #
453
+ # @example Retrieve all jobs: (See {Job::List#all})
454
+ # require "google/cloud"
455
+ #
456
+ # gcloud = Google::Cloud.new
457
+ # bigquery = gcloud.bigquery
458
+ #
459
+ # jobs = bigquery.jobs
460
+ # jobs.all do |job|
461
+ # # process job
462
+ # end
463
+ #
464
+ def jobs all: nil, token: nil, max: nil, filter: nil
465
+ ensure_service!
466
+ options = { all: all, token: token, max: max, filter: filter }
467
+ gapi = service.list_jobs options
468
+ Job::List.from_gapi gapi, service, all, max, filter
469
+ end
470
+
471
+ protected
472
+
473
+ ##
474
+ # Raise an error unless an active service is available.
475
+ def ensure_service!
476
+ fail "Must have active connection" unless service
477
+ end
478
+ end
479
+ end
480
+ end
481
+ end