google-cloud-bigquery 0.20.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,174 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "delegate"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ class Job
22
+ ##
23
+ # Job::List is a special case Array with additional values.
24
+ class List < DelegateClass(::Array)
25
+ ##
26
+ # If not empty, indicates that there are more records that match
27
+ # the request and this value should be passed to continue.
28
+ attr_accessor :token
29
+
30
+ # A hash of this page of results.
31
+ attr_accessor :etag
32
+
33
+ ##
34
+ # @private Create a new Job::List with an array of jobs.
35
+ def initialize arr = []
36
+ super arr
37
+ end
38
+
39
+ ##
40
+ # Whether there is a next page of jobs.
41
+ #
42
+ # @return [Boolean]
43
+ #
44
+ # @example
45
+ # require "google/cloud"
46
+ #
47
+ # gcloud = Google::Cloud.new
48
+ # bigquery = gcloud.bigquery
49
+ #
50
+ # jobs = bigquery.jobs
51
+ # if jobs.next?
52
+ # next_jobs = jobs.next
53
+ # end
54
+ def next?
55
+ !token.nil?
56
+ end
57
+
58
+ ##
59
+ # Retrieve the next page of jobs.
60
+ #
61
+ # @return [Job::List]
62
+ #
63
+ # @example
64
+ # require "google/cloud"
65
+ #
66
+ # gcloud = Google::Cloud.new
67
+ # bigquery = gcloud.bigquery
68
+ #
69
+ # jobs = bigquery.jobs
70
+ # if jobs.next?
71
+ # next_jobs = jobs.next
72
+ # end
73
+ def next
74
+ return nil unless next?
75
+ ensure_service!
76
+ options = { all: @hidden, token: token, max: @max, filter: @filter }
77
+ gapi = @service.list_jobs options
78
+ self.class.from_gapi gapi, @service, @hidden, @max, @filter
79
+ end
80
+
81
+ ##
82
+ # Retrieves all jobs by repeatedly loading {#next} until {#next?}
83
+ # returns `false`. Calls the given block once for each job, which is
84
+ # passed as the parameter.
85
+ #
86
+ # An Enumerator is returned if no block is given.
87
+ #
88
+ # This method may make several API calls until all jobs are retrieved.
89
+ # Be sure to use as narrow a search criteria as possible. Please use
90
+ # with caution.
91
+ #
92
+ # @param [Integer] request_limit The upper limit of API requests to
93
+ # make to load all jobs. Default is no limit.
94
+ # @yield [job] The block for accessing each job.
95
+ # @yieldparam [Job] job The job object.
96
+ #
97
+ # @return [Enumerator]
98
+ #
99
+ # @example Iterating each job by passing a block:
100
+ # require "google/cloud"
101
+ #
102
+ # gcloud = Google::Cloud.new
103
+ # bigquery = gcloud.bigquery
104
+ #
105
+ # bigquery.jobs.all do |job|
106
+ # puts job.state
107
+ # end
108
+ #
109
+ # @example Using the enumerator by not passing a block:
110
+ # require "google/cloud"
111
+ #
112
+ # gcloud = Google::Cloud.new
113
+ # bigquery = gcloud.bigquery
114
+ #
115
+ # all_states = bigquery.jobs.all.map do |job|
116
+ # job.state
117
+ # end
118
+ #
119
+ # @example Limit the number of API calls made:
120
+ # require "google/cloud"
121
+ #
122
+ # gcloud = Google::Cloud.new
123
+ # bigquery = gcloud.bigquery
124
+ #
125
+ # bigquery.jobs.all(request_limit: 10) do |job|
126
+ # puts job.state
127
+ # end
128
+ #
129
+ def all request_limit: nil
130
+ request_limit = request_limit.to_i if request_limit
131
+ unless block_given?
132
+ return enum_for(:all, request_limit: request_limit)
133
+ end
134
+ results = self
135
+ loop do
136
+ results.each { |r| yield r }
137
+ if request_limit
138
+ request_limit -= 1
139
+ break if request_limit < 0
140
+ end
141
+ break unless results.next?
142
+ results = results.next
143
+ end
144
+ end
145
+
146
+ ##
147
+ # @private New Job::List from a Google API Client
148
+ # Google::Apis::BigqueryV2::JobList object.
149
+ def self.from_gapi gapi_list, service, hidden = nil, max = nil,
150
+ filter = nil
151
+ jobs = List.new(Array(gapi_list.jobs).map do |gapi_object|
152
+ Job.from_gapi gapi_object, service
153
+ end)
154
+ jobs.instance_variable_set :@token, gapi_list.next_page_token
155
+ jobs.instance_variable_set :@etag, gapi_list.etag
156
+ jobs.instance_variable_set :@service, service
157
+ jobs.instance_variable_set :@hidden, hidden
158
+ jobs.instance_variable_set :@max, max
159
+ jobs.instance_variable_set :@filter, filter
160
+ jobs
161
+ end
162
+
163
+ protected
164
+
165
+ ##
166
+ # Raise an error unless an active service is available.
167
+ def ensure_service!
168
+ fail "Must have active connection" unless @service
169
+ end
170
+ end
171
+ end
172
+ end
173
+ end
174
+ end
@@ -0,0 +1,203 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/bigquery/service"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ ##
22
+ # # LoadJob
23
+ #
24
+ # A {Job} subclass representing a load operation that may be performed
25
+ # on a {Table}. A LoadJob instance is created when you call {Table#load}.
26
+ #
27
+ # @see https://cloud.google.com/bigquery/loading-data-into-bigquery
28
+ # Loading Data Into BigQuery
29
+ # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
30
+ # reference
31
+ #
32
+ class LoadJob < Job
33
+ ##
34
+ # The URI or URIs representing the Google Cloud Storage files from which
35
+ # the operation loads data.
36
+ def sources
37
+ Array @gapi.configuration.load.source_uris
38
+ end
39
+
40
+ ##
41
+ # The table into which the operation loads data. This is the table on
42
+ # which {Table#load} was invoked. Returns a {Table} instance.
43
+ def destination
44
+ table = @gapi.configuration.load.destination_table
45
+ return nil unless table
46
+ retrieve_table table.project_id,
47
+ table.dataset_id,
48
+ table.table_id
49
+ end
50
+
51
+ ##
52
+ # The delimiter used between fields in the source data. The default is a
53
+ # comma (,).
54
+ def delimiter
55
+ @gapi.configuration.load.field_delimiter || ","
56
+ end
57
+
58
+ ##
59
+ # The number of header rows at the top of a CSV file to skip. The
60
+ # default value is `0`.
61
+ def skip_leading_rows
62
+ @gapi.configuration.load.skip_leading_rows || 0
63
+ end
64
+
65
+ ##
66
+ # Checks if the character encoding of the data is UTF-8. This is the
67
+ # default.
68
+ def utf8?
69
+ val = @gapi.configuration.load.encoding
70
+ return true if val.nil?
71
+ val == "UTF-8"
72
+ end
73
+
74
+ ##
75
+ # Checks if the character encoding of the data is ISO-8859-1.
76
+ def iso8859_1?
77
+ val = @gapi.configuration.load.encoding
78
+ val == "ISO-8859-1"
79
+ end
80
+
81
+ ##
82
+ # The value that is used to quote data sections in a CSV file. The
83
+ # default value is a double-quote (`"`). If your data does not contain
84
+ # quoted sections, the value should be an empty string. If your data
85
+ # contains quoted newline characters, {#quoted_newlines?} should return
86
+ # `true`.
87
+ def quote
88
+ val = @gapi.configuration.load.quote
89
+ val = "\"" if val.nil?
90
+ val
91
+ end
92
+
93
+ ##
94
+ # The maximum number of bad records that the load operation can ignore.
95
+ # If the number of bad records exceeds this value, an error is returned.
96
+ # The default value is `0`, which requires that all records be valid.
97
+ def max_bad_records
98
+ val = @gapi.configuration.load.max_bad_records
99
+ val = 0 if val.nil?
100
+ val
101
+ end
102
+
103
+ ##
104
+ # Checks if quoted data sections may contain newline characters in a CSV
105
+ # file. The default is `false`.
106
+ def quoted_newlines?
107
+ val = @gapi.configuration.load.allow_quoted_newlines
108
+ val = true if val.nil?
109
+ val
110
+ end
111
+
112
+ ##
113
+ # Checks if the format of the source data is [newline-delimited
114
+ # JSON](http://jsonlines.org/). The default is `false`.
115
+ def json?
116
+ val = @gapi.configuration.load.source_format
117
+ val == "NEWLINE_DELIMITED_JSON"
118
+ end
119
+
120
+ ##
121
+ # Checks if the format of the source data is CSV. The default is `true`.
122
+ def csv?
123
+ val = @gapi.configuration.load.source_format
124
+ return true if val.nil?
125
+ val == "CSV"
126
+ end
127
+
128
+ ##
129
+ # Checks if the source data is a Google Cloud Datastore backup.
130
+ def backup?
131
+ val = @gapi.configuration.load.source_format
132
+ val == "DATASTORE_BACKUP"
133
+ end
134
+
135
+ ##
136
+ # Checks if the load operation accepts rows that are missing trailing
137
+ # optional columns. The missing values are treated as nulls. If `false`,
138
+ # records with missing trailing columns are treated as bad records, and
139
+ # if there are too many bad records, an error is returned. The default
140
+ # value is `false`. Only applicable to CSV, ignored for other formats.
141
+ def allow_jagged_rows?
142
+ val = @gapi.configuration.load.allow_jagged_rows
143
+ val = false if val.nil?
144
+ val
145
+ end
146
+
147
+ ##
148
+ # Checks if the load operation allows extra values that are not
149
+ # represented in the table schema. If `true`, the extra values are
150
+ # ignored. If `false`, records with extra columns are treated as bad
151
+ # records, and if there are too many bad records, an invalid error is
152
+ # returned. The default is `false`.
153
+ def ignore_unknown_values?
154
+ val = @gapi.configuration.load.ignore_unknown_values
155
+ val = false if val.nil?
156
+ val
157
+ end
158
+
159
+ ##
160
+ # The schema for the data. Returns a hash. Can be empty if the table has
161
+ # already has the correct schema (see {Table#schema}), or if the schema
162
+ # can be inferred from the loaded data.
163
+ def schema
164
+ Schema.from_gapi(@gapi.configuration.load.schema).freeze
165
+ end
166
+
167
+ ##
168
+ # The number of source files.
169
+ def input_files
170
+ Integer @gapi.statistics.load.input_files
171
+ rescue
172
+ nil
173
+ end
174
+
175
+ ##
176
+ # The number of bytes of source data.
177
+ def input_file_bytes
178
+ Integer @gapi.statistics.load.input_file_bytes
179
+ rescue
180
+ nil
181
+ end
182
+
183
+ ##
184
+ # The number of rows that have been loaded into the table. While an
185
+ # import job is in the running state, this value may change.
186
+ def output_rows
187
+ Integer @gapi.statistics.load.output_rows
188
+ rescue
189
+ nil
190
+ end
191
+
192
+ ##
193
+ # The number of bytes that have been loaded into the table. While an
194
+ # import job is in the running state, this value may change.
195
+ def output_bytes
196
+ Integer @gapi.statistics.load.output_bytes
197
+ rescue
198
+ nil
199
+ end
200
+ end
201
+ end
202
+ end
203
+ end
@@ -0,0 +1,481 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/core/gce"
17
+ require "google/cloud/errors"
18
+ require "google/cloud/bigquery/service"
19
+ require "google/cloud/bigquery/credentials"
20
+ require "google/cloud/bigquery/dataset"
21
+ require "google/cloud/bigquery/job"
22
+ require "google/cloud/bigquery/query_data"
23
+
24
+ module Google
25
+ module Cloud
26
+ module Bigquery
27
+ ##
28
+ # # Project
29
+ #
30
+ # Projects are top-level containers in Google Cloud Platform. They store
31
+ # information about billing and authorized users, and they contain
32
+ # BigQuery data. Each project has a friendly name and a unique ID.
33
+ #
34
+ # Google::Cloud::Bigquery::Project is the main object for interacting with
35
+ # Google BigQuery. {Google::Cloud::Bigquery::Dataset} objects are created,
36
+ # accessed, and deleted by Google::Cloud::Bigquery::Project.
37
+ #
38
+ # See {Google::Cloud#bigquery}
39
+ #
40
+ # @example
41
+ # require "google/cloud"
42
+ #
43
+ # gcloud = Google::Cloud.new
44
+ # bigquery = gcloud.bigquery
45
+ # dataset = bigquery.dataset "my_dataset"
46
+ # table = dataset.table "my_table"
47
+ #
48
+ class Project
49
+ ##
50
+ # @private The Service object.
51
+ attr_accessor :service
52
+
53
+ ##
54
+ # Creates a new Service instance.
55
+ #
56
+ # See {Google::Cloud.bigquery}
57
+ def initialize service
58
+ @service = service
59
+ end
60
+
61
+ ##
62
+ # The BigQuery project connected to.
63
+ #
64
+ # @example
65
+ # require "google/cloud"
66
+ #
67
+ # gcloud = Google::Cloud.new "my-todo-project",
68
+ # "/path/to/keyfile.json"
69
+ # bigquery = gcloud.bigquery
70
+ #
71
+ # bigquery.project #=> "my-todo-project"
72
+ #
73
+ def project
74
+ service.project
75
+ end
76
+
77
+ ##
78
+ # @private Default project.
79
+ def self.default_project
80
+ ENV["BIGQUERY_PROJECT"] ||
81
+ ENV["GOOGLE_CLOUD_PROJECT"] ||
82
+ ENV["GCLOUD_PROJECT"] ||
83
+ Google::Cloud::Core::GCE.project_id
84
+ end
85
+
86
+ ##
87
+ # Queries data using the [asynchronous
88
+ # method](https://cloud.google.com/bigquery/querying-data).
89
+ #
90
+ # @param [String] query A query string, following the BigQuery [query
91
+ # syntax](https://cloud.google.com/bigquery/query-reference), of the
92
+ # query to execute. Example: "SELECT count(f1) FROM
93
+ # [myProjectId:myDatasetId.myTableId]".
94
+ # @param [String] priority Specifies a priority for the query. Possible
95
+ # values include `INTERACTIVE` and `BATCH`. The default value is
96
+ # `INTERACTIVE`.
97
+ # @param [Boolean] cache Whether to look for the result in the query
98
+ # cache. The query cache is a best-effort cache that will be flushed
99
+ # whenever tables in the query are modified. The default value is
100
+ # true. For more information, see [query
101
+ # caching](https://developers.google.com/bigquery/querying-data).
102
+ # @param [Table] table The destination table where the query results
103
+ # should be stored. If not present, a new table will be created to
104
+ # store the results.
105
+ # @param [String] create Specifies whether the job is allowed to create
106
+ # new tables.
107
+ #
108
+ # The following values are supported:
109
+ #
110
+ # * `needed` - Create the table if it does not exist.
111
+ # * `never` - The table must already exist. A 'notFound' error is
112
+ # raised if the table does not exist.
113
+ # @param [String] write Specifies the action that occurs if the
114
+ # destination table already exists.
115
+ #
116
+ # The following values are supported:
117
+ #
118
+ # * `truncate` - BigQuery overwrites the table data.
119
+ # * `append` - BigQuery appends the data to the table.
120
+ # * `empty` - A 'duplicate' error is returned in the job result if the
121
+ # table exists and contains data.
122
+ # @param [Boolean] large_results If `true`, allows the query to produce
123
+ # arbitrarily large result tables at a slight cost in performance.
124
+ # Requires `table` parameter to be set.
125
+ # @param [Boolean] flatten Flattens all nested and repeated fields in
126
+ # the query results. The default value is `true`. `large_results`
127
+ # parameter must be `true` if this is set to `false`.
128
+ # @param [Dataset, String] dataset Specifies the default dataset to use
129
+ # for unqualified table names in the query.
130
+ #
131
+ # @return [Google::Cloud::Bigquery::QueryJob]
132
+ #
133
+ # @example
134
+ # require "google/cloud"
135
+ #
136
+ # gcloud = Google::Cloud.new
137
+ # bigquery = gcloud.bigquery
138
+ #
139
+ # job = bigquery.query_job "SELECT name FROM " \
140
+ # "[my_proj:my_data.my_table]"
141
+ #
142
+ # job.wait_until_done!
143
+ # if !job.failed?
144
+ # job.query_results.each do |row|
145
+ # puts row["name"]
146
+ # end
147
+ # end
148
+ #
149
+ def query_job query, priority: "INTERACTIVE", cache: true, table: nil,
150
+ create: nil, write: nil, large_results: nil, flatten: nil,
151
+ dataset: nil
152
+ ensure_service!
153
+ options = { priority: priority, cache: cache, table: table,
154
+ create: create, write: write,
155
+ large_results: large_results, flatten: flatten,
156
+ dataset: dataset }
157
+ gapi = service.query_job query, options
158
+ Job.from_gapi gapi, service
159
+ end
160
+
161
+ ##
162
+ # Queries data using the [synchronous
163
+ # method](https://cloud.google.com/bigquery/querying-data).
164
+ #
165
+ # @param [String] query A query string, following the BigQuery [query
166
+ # syntax](https://cloud.google.com/bigquery/query-reference), of the
167
+ # query to execute. Example: "SELECT count(f1) FROM
168
+ # [myProjectId:myDatasetId.myTableId]".
169
+ # @param [Integer] max The maximum number of rows of data to return per
170
+ # page of results. Setting this flag to a small value such as 1000 and
171
+ # then paging through results might improve reliability when the query
172
+ # result set is large. In addition to this limit, responses are also
173
+ # limited to 10 MB. By default, there is no maximum row count, and
174
+ # only the byte limit applies.
175
+ # @param [Integer] timeout How long to wait for the query to complete,
176
+ # in milliseconds, before the request times out and returns. Note that
177
+ # this is only a timeout for the request, not the query. If the query
178
+ # takes longer to run than the timeout value, the call returns without
179
+ # any results and with QueryData#complete? set to false. The default
180
+ # value is 10000 milliseconds (10 seconds).
181
+ # @param [Boolean] dryrun If set to `true`, BigQuery doesn't run the
182
+ # job. Instead, if the query is valid, BigQuery returns statistics
183
+ # about the job such as how many bytes would be processed. If the
184
+ # query is invalid, an error returns. The default value is `false`.
185
+ # @param [Boolean] cache Whether to look for the result in the query
186
+ # cache. The query cache is a best-effort cache that will be flushed
187
+ # whenever tables in the query are modified. The default value is
188
+ # true. For more information, see [query
189
+ # caching](https://developers.google.com/bigquery/querying-data).
190
+ # @param [String] dataset Specifies the default datasetId and projectId
191
+ # to assume for any unqualified table names in the query. If not set,
192
+ # all table names in the query string must be qualified in the format
193
+ # 'datasetId.tableId'.
194
+ # @param [String] project Specifies the default projectId to assume for
195
+ # any unqualified table names in the query. Only used if `dataset`
196
+ # option is set.
197
+ #
198
+ # @return [Google::Cloud::Bigquery::QueryData]
199
+ #
200
+ # @example
201
+ # require "google/cloud"
202
+ #
203
+ # gcloud = Google::Cloud.new
204
+ # bigquery = gcloud.bigquery
205
+ #
206
+ # data = bigquery.query "SELECT name FROM [my_proj:my_data.my_table]"
207
+ # data.each do |row|
208
+ # puts row["name"]
209
+ # end
210
+ #
211
+ # @example Retrieve all rows: (See {QueryData#all})
212
+ # require "google/cloud"
213
+ #
214
+ # gcloud = Google::Cloud.new
215
+ # bigquery = gcloud.bigquery
216
+ #
217
+ # data = bigquery.query "SELECT name FROM [my_proj:my_data.my_table]"
218
+ # data.all do |row|
219
+ # puts row["name"]
220
+ # end
221
+ #
222
+ def query query, max: nil, timeout: 10000, dryrun: nil, cache: true,
223
+ dataset: nil, project: nil
224
+ ensure_service!
225
+ options = { max: max, timeout: timeout, dryrun: dryrun, cache: cache,
226
+ dataset: dataset, project: project }
227
+ gapi = service.query query, options
228
+ QueryData.from_gapi gapi, service
229
+ end
230
+
231
+ ##
232
+ # Retrieves an existing dataset by ID.
233
+ #
234
+ # @param [String] dataset_id The ID of a dataset.
235
+ #
236
+ # @return [Google::Cloud::Bigquery::Dataset, nil] Returns `nil` if the
237
+ # dataset does not exist.
238
+ #
239
+ # @example
240
+ # require "google/cloud"
241
+ #
242
+ # gcloud = Google::Cloud.new
243
+ # bigquery = gcloud.bigquery
244
+ #
245
+ # dataset = bigquery.dataset "my_dataset"
246
+ # puts dataset.name
247
+ #
248
+ def dataset dataset_id
249
+ ensure_service!
250
+ gapi = service.get_dataset dataset_id
251
+ Dataset.from_gapi gapi, service
252
+ rescue Google::Cloud::NotFoundError
253
+ nil
254
+ end
255
+
256
+ ##
257
+ # Creates a new dataset.
258
+ #
259
+ # @param [String] dataset_id A unique ID for this dataset, without the
260
+ # project name. The ID must contain only letters (a-z, A-Z), numbers
261
+ # (0-9), or underscores (_). The maximum length is 1,024 characters.
262
+ # @param [String] name A descriptive name for the dataset.
263
+ # @param [String] description A user-friendly description of the
264
+ # dataset.
265
+ # @param [Integer] expiration The default lifetime of all tables in the
266
+ # dataset, in milliseconds. The minimum value is 3600000 milliseconds
267
+ # (one hour).
268
+ # @param [String] location The geographic location where the dataset
269
+ # should reside. Possible values include `EU` and `US`. The default
270
+ # value is `US`.
271
+ # @yield [access] a block for setting rules
272
+ # @yieldparam [Dataset::Access] access the object accepting rules
273
+ #
274
+ # @return [Google::Cloud::Bigquery::Dataset]
275
+ #
276
+ # @example
277
+ # require "google/cloud"
278
+ #
279
+ # gcloud = Google::Cloud.new
280
+ # bigquery = gcloud.bigquery
281
+ #
282
+ # dataset = bigquery.create_dataset "my_dataset"
283
+ #
284
+ # @example A name and description can be provided:
285
+ # require "google/cloud"
286
+ #
287
+ # gcloud = Google::Cloud.new
288
+ # bigquery = gcloud.bigquery
289
+ #
290
+ # dataset = bigquery.create_dataset "my_dataset",
291
+ # name: "My Dataset",
292
+ # description: "This is my Dataset"
293
+ #
294
+ # @example Access rules can be provided with the `access` option:
295
+ # require "google/cloud"
296
+ #
297
+ # gcloud = Google::Cloud.new
298
+ # bigquery = gcloud.bigquery
299
+ #
300
+ # dataset = bigquery.create_dataset "my_dataset",
301
+ # access: [{"role"=>"WRITER", "userByEmail"=>"writers@example.com"}]
302
+ #
303
+ # @example Or, configure access with a block: (See {Dataset::Access})
304
+ # require "google/cloud"
305
+ #
306
+ # gcloud = Google::Cloud.new
307
+ # bigquery = gcloud.bigquery
308
+ #
309
+ # dataset = bigquery.create_dataset "my_dataset" do |access|
310
+ # access.add_writer_user "writers@example.com"
311
+ # end
312
+ #
313
+ def create_dataset dataset_id, name: nil, description: nil,
314
+ expiration: nil, location: nil
315
+ ensure_service!
316
+
317
+ new_ds = Google::Apis::BigqueryV2::Dataset.new(
318
+ dataset_reference: Google::Apis::BigqueryV2::DatasetReference.new(
319
+ project_id: project, dataset_id: dataset_id))
320
+
321
+ # Can set location only on creation, no Dataset#location method
322
+ new_ds.update! location: location unless location.nil?
323
+
324
+ updater = Dataset::Updater.new(new_ds).tap do |b|
325
+ b.name = name unless name.nil?
326
+ b.description = description unless description.nil?
327
+ b.default_expiration = expiration unless expiration.nil?
328
+ end
329
+
330
+ if block_given?
331
+ yield updater
332
+ updater.check_for_mutated_access!
333
+ end
334
+
335
+ gapi = service.insert_dataset new_ds
336
+ Dataset.from_gapi gapi, service
337
+ end
338
+
339
+ ##
340
+ # Retrieves the list of datasets belonging to the project.
341
+ #
342
+ # @param [Boolean] all Whether to list all datasets, including hidden
343
+ # ones. The default is `false`.
344
+ # @param [String] token A previously-returned page token representing
345
+ # part of the larger set of results to view.
346
+ # @param [Integer] max Maximum number of datasets to return.
347
+ #
348
+ # @return [Array<Google::Cloud::Bigquery::Dataset>] (See
349
+ # {Google::Cloud::Bigquery::Dataset::List})
350
+ #
351
+ # @example
352
+ # require "google/cloud"
353
+ #
354
+ # gcloud = Google::Cloud.new
355
+ # bigquery = gcloud.bigquery
356
+ #
357
+ # datasets = bigquery.datasets
358
+ # datasets.each do |dataset|
359
+ # puts dataset.name
360
+ # end
361
+ #
362
+ # @example Retrieve hidden datasets with the `all` optional arg:
363
+ # require "google/cloud"
364
+ #
365
+ # gcloud = Google::Cloud.new
366
+ # bigquery = gcloud.bigquery
367
+ #
368
+ # all_datasets = bigquery.datasets all: true
369
+ #
370
+ # @example Retrieve all datasets: (See {Dataset::List#all})
371
+ # require "google/cloud"
372
+ #
373
+ # gcloud = Google::Cloud.new
374
+ # bigquery = gcloud.bigquery
375
+ #
376
+ # datasets = bigquery.datasets
377
+ # datasets.all do |dataset|
378
+ # puts dataset.name
379
+ # end
380
+ #
381
+ def datasets all: nil, token: nil, max: nil
382
+ ensure_service!
383
+ options = { all: all, token: token, max: max }
384
+ gapi = service.list_datasets options
385
+ Dataset::List.from_gapi gapi, service, all, max
386
+ end
387
+
388
+ ##
389
+ # Retrieves an existing job by ID.
390
+ #
391
+ # @param [String] job_id The ID of a job.
392
+ #
393
+ # @return [Google::Cloud::Bigquery::Job, nil] Returns `nil` if the job
394
+ # does not exist.
395
+ #
396
+ # @example
397
+ # require "google/cloud"
398
+ #
399
+ # gcloud = Google::Cloud.new
400
+ # bigquery = gcloud.bigquery
401
+ #
402
+ # job = bigquery.job "my_job"
403
+ #
404
+ def job job_id
405
+ ensure_service!
406
+ gapi = service.get_job job_id
407
+ Job.from_gapi gapi, service
408
+ rescue Google::Cloud::NotFoundError
409
+ nil
410
+ end
411
+
412
+ ##
413
+ # Retrieves the list of jobs belonging to the project.
414
+ #
415
+ # @param [Boolean] all Whether to display jobs owned by all users in the
416
+ # project. The default is `false`.
417
+ # @param [String] token A previously-returned page token representing
418
+ # part of the larger set of results to view.
419
+ # @param [Integer] max Maximum number of jobs to return.
420
+ # @param [String] filter A filter for job state.
421
+ #
422
+ # Acceptable values are:
423
+ #
424
+ # * `done` - Finished jobs
425
+ # * `pending` - Pending jobs
426
+ # * `running` - Running jobs
427
+ #
428
+ # @return [Array<Google::Cloud::Bigquery::Job>] (See
429
+ # {Google::Cloud::Bigquery::Job::List})
430
+ #
431
+ # @example
432
+ # require "google/cloud"
433
+ #
434
+ # gcloud = Google::Cloud.new
435
+ # bigquery = gcloud.bigquery
436
+ #
437
+ # jobs = bigquery.jobs
438
+ # jobs.each do |job|
439
+ # # process job
440
+ # end
441
+ #
442
+ # @example Retrieve only running jobs using the `filter` optional arg:
443
+ # require "google/cloud"
444
+ #
445
+ # gcloud = Google::Cloud.new
446
+ # bigquery = gcloud.bigquery
447
+ #
448
+ # running_jobs = bigquery.jobs filter: "running"
449
+ # running_jobs.each do |job|
450
+ # # process job
451
+ # end
452
+ #
453
+ # @example Retrieve all jobs: (See {Job::List#all})
454
+ # require "google/cloud"
455
+ #
456
+ # gcloud = Google::Cloud.new
457
+ # bigquery = gcloud.bigquery
458
+ #
459
+ # jobs = bigquery.jobs
460
+ # jobs.all do |job|
461
+ # # process job
462
+ # end
463
+ #
464
+ def jobs all: nil, token: nil, max: nil, filter: nil
465
+ ensure_service!
466
+ options = { all: all, token: token, max: max, filter: filter }
467
+ gapi = service.list_jobs options
468
+ Job::List.from_gapi gapi, service, all, max, filter
469
+ end
470
+
471
+ protected
472
+
473
+ ##
474
+ # Raise an error unless an active service is available.
475
+ def ensure_service!
476
+ fail "Must have active connection" unless service
477
+ end
478
+ end
479
+ end
480
+ end
481
+ end