google-cloud-bigquery 1.21.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +16 -0
  3. data/AUTHENTICATION.md +158 -0
  4. data/CHANGELOG.md +397 -0
  5. data/CODE_OF_CONDUCT.md +40 -0
  6. data/CONTRIBUTING.md +188 -0
  7. data/LICENSE +201 -0
  8. data/LOGGING.md +27 -0
  9. data/OVERVIEW.md +463 -0
  10. data/TROUBLESHOOTING.md +31 -0
  11. data/lib/google-cloud-bigquery.rb +139 -0
  12. data/lib/google/cloud/bigquery.rb +145 -0
  13. data/lib/google/cloud/bigquery/argument.rb +197 -0
  14. data/lib/google/cloud/bigquery/convert.rb +383 -0
  15. data/lib/google/cloud/bigquery/copy_job.rb +316 -0
  16. data/lib/google/cloud/bigquery/credentials.rb +50 -0
  17. data/lib/google/cloud/bigquery/data.rb +526 -0
  18. data/lib/google/cloud/bigquery/dataset.rb +2845 -0
  19. data/lib/google/cloud/bigquery/dataset/access.rb +1021 -0
  20. data/lib/google/cloud/bigquery/dataset/list.rb +162 -0
  21. data/lib/google/cloud/bigquery/encryption_configuration.rb +123 -0
  22. data/lib/google/cloud/bigquery/external.rb +2432 -0
  23. data/lib/google/cloud/bigquery/extract_job.rb +368 -0
  24. data/lib/google/cloud/bigquery/insert_response.rb +180 -0
  25. data/lib/google/cloud/bigquery/job.rb +657 -0
  26. data/lib/google/cloud/bigquery/job/list.rb +162 -0
  27. data/lib/google/cloud/bigquery/load_job.rb +1704 -0
  28. data/lib/google/cloud/bigquery/model.rb +740 -0
  29. data/lib/google/cloud/bigquery/model/list.rb +164 -0
  30. data/lib/google/cloud/bigquery/project.rb +1655 -0
  31. data/lib/google/cloud/bigquery/project/list.rb +161 -0
  32. data/lib/google/cloud/bigquery/query_job.rb +1695 -0
  33. data/lib/google/cloud/bigquery/routine.rb +1108 -0
  34. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  35. data/lib/google/cloud/bigquery/schema.rb +564 -0
  36. data/lib/google/cloud/bigquery/schema/field.rb +668 -0
  37. data/lib/google/cloud/bigquery/service.rb +589 -0
  38. data/lib/google/cloud/bigquery/standard_sql.rb +495 -0
  39. data/lib/google/cloud/bigquery/table.rb +3340 -0
  40. data/lib/google/cloud/bigquery/table/async_inserter.rb +520 -0
  41. data/lib/google/cloud/bigquery/table/list.rb +172 -0
  42. data/lib/google/cloud/bigquery/time.rb +65 -0
  43. data/lib/google/cloud/bigquery/version.rb +22 -0
  44. metadata +297 -0
@@ -0,0 +1,164 @@
1
+ # Copyright 2019 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "delegate"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ class Model
22
+ ##
23
+ # Model::List is a special case Array with additional values.
24
+ class List < DelegateClass(::Array)
25
+ ##
26
+ # If not empty, indicates that there are more records that match
27
+ # the request and this value should be passed to continue.
28
+ attr_accessor :token
29
+
30
+ ##
31
+ # @private Create a new Model::List with an array of models.
32
+ def initialize arr = []
33
+ super arr
34
+ end
35
+
36
+ ##
37
+ # Whether there is a next page of models.
38
+ #
39
+ # @return [Boolean]
40
+ #
41
+ # @example
42
+ # require "google/cloud/bigquery"
43
+ #
44
+ # bigquery = Google::Cloud::Bigquery.new
45
+ # dataset = bigquery.dataset "my_dataset"
46
+ #
47
+ # models = dataset.models
48
+ # if models.next?
49
+ # next_models = models.next
50
+ # end
51
+ #
52
+ def next?
53
+ !token.nil?
54
+ end
55
+
56
+ ##
57
+ # Retrieve the next page of models.
58
+ #
59
+ # @return [Model::List]
60
+ #
61
+ # @example
62
+ # require "google/cloud/bigquery"
63
+ #
64
+ # bigquery = Google::Cloud::Bigquery.new
65
+ # dataset = bigquery.dataset "my_dataset"
66
+ #
67
+ # models = dataset.models
68
+ # if models.next?
69
+ # next_models = models.next
70
+ # end
71
+ #
72
+ def next
73
+ return nil unless next?
74
+ ensure_service!
75
+ gapi = @service.list_models @dataset_id, token: token, max: @max
76
+ self.class.from_gapi gapi, @service, @dataset_id, @max
77
+ end
78
+
79
+ ##
80
+ # Retrieves remaining results by repeatedly invoking {#next} until
81
+ # {#next?} returns `false`. Calls the given block once for each
82
+ # result, which is passed as the argument to the block.
83
+ #
84
+ # An Enumerator is returned if no block is given.
85
+ #
86
+ # This method will make repeated API calls until all remaining results
87
+ # are retrieved. (Unlike `#each`, for example, which merely iterates
88
+ # over the results returned by a single API call.) Use with caution.
89
+ #
90
+ # @param [Integer] request_limit The upper limit of API requests to
91
+ # make to load all models. Default is no limit.
92
+ # @yield [model] The block for accessing each model.
93
+ # @yieldparam [Model] model The model object.
94
+ #
95
+ # @return [Enumerator]
96
+ #
97
+ # @example Iterating each result by passing a block:
98
+ # require "google/cloud/bigquery"
99
+ #
100
+ # bigquery = Google::Cloud::Bigquery.new
101
+ # dataset = bigquery.dataset "my_dataset"
102
+ #
103
+ # dataset.models.all do |model|
104
+ # puts model.model_id
105
+ # end
106
+ #
107
+ # @example Using the enumerator by not passing a block:
108
+ # require "google/cloud/bigquery"
109
+ #
110
+ # bigquery = Google::Cloud::Bigquery.new
111
+ # dataset = bigquery.dataset "my_dataset"
112
+ #
113
+ # all_names = dataset.models.all.map do |model|
114
+ # model.model_id
115
+ # end
116
+ #
117
+ # @example Limit the number of API requests made:
118
+ # require "google/cloud/bigquery"
119
+ #
120
+ # bigquery = Google::Cloud::Bigquery.new
121
+ # dataset = bigquery.dataset "my_dataset"
122
+ #
123
+ # dataset.models.all(request_limit: 10) do |model|
124
+ # puts model.model_id
125
+ # end
126
+ #
127
+ def all request_limit: nil
128
+ request_limit = request_limit.to_i if request_limit
129
+ return enum_for :all, request_limit: request_limit unless block_given?
130
+ results = self
131
+ loop do
132
+ results.each { |r| yield r }
133
+ if request_limit
134
+ request_limit -= 1
135
+ break if request_limit.negative?
136
+ end
137
+ break unless results.next?
138
+ results = results.next
139
+ end
140
+ end
141
+
142
+ ##
143
+ # @private New Model::List from a response object.
144
+ def self.from_gapi gapi_list, service, dataset_id = nil, max = nil
145
+ models = List.new(Array(gapi_list[:models]).map { |gapi_json| Model.from_gapi_json gapi_json, service })
146
+ models.instance_variable_set :@token, gapi_list[:nextPageToken]
147
+ models.instance_variable_set :@service, service
148
+ models.instance_variable_set :@dataset_id, dataset_id
149
+ models.instance_variable_set :@max, max
150
+ models
151
+ end
152
+
153
+ protected
154
+
155
+ ##
156
+ # Raise an error unless an active service is available.
157
+ def ensure_service!
158
+ raise "Must have active connection" unless @service
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,1655 @@
1
+ # Copyright 2015 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/errors"
17
+ require "google/cloud/bigquery/service"
18
+ require "google/cloud/bigquery/credentials"
19
+ require "google/cloud/bigquery/dataset"
20
+ require "google/cloud/bigquery/job"
21
+ require "google/cloud/bigquery/external"
22
+ require "google/cloud/bigquery/project/list"
23
+ require "google/cloud/bigquery/time"
24
+ require "google/cloud/bigquery/schema"
25
+
26
+ module Google
27
+ module Cloud
28
+ module Bigquery
29
+ ##
30
+ # # Project
31
+ #
32
+ # Projects are top-level containers in Google Cloud Platform. They store
33
+ # information about billing and authorized users, and they contain
34
+ # BigQuery data. Each project has a friendly name and a unique ID.
35
+ #
36
+ # Google::Cloud::Bigquery::Project is the main object for interacting with
37
+ # Google BigQuery. {Google::Cloud::Bigquery::Dataset} objects are created,
38
+ # accessed, and deleted by Google::Cloud::Bigquery::Project.
39
+ #
40
+ # See {Google::Cloud#bigquery}.
41
+ #
42
+ # @attr_reader [String, nil] name The descriptive name of the project.
43
+ # Can only be present if the project was retrieved with {#projects}.
44
+ # @attr_reader [Integer, nil] numeric_id The numeric ID of the project.
45
+ # Can only be present if the project was retrieved with {#projects}.
46
+ #
47
+ # @example
48
+ # require "google/cloud/bigquery"
49
+ #
50
+ # bigquery = Google::Cloud::Bigquery.new
51
+ # dataset = bigquery.dataset "my_dataset"
52
+ # table = dataset.table "my_table"
53
+ #
54
+ class Project
55
+ ##
56
+ # @private The Service object.
57
+ attr_accessor :service
58
+
59
+ attr_reader :name, :numeric_id
60
+
61
+ ##
62
+ # Creates a new Service instance.
63
+ #
64
+ # See {Google::Cloud.bigquery}
65
+ def initialize service
66
+ @service = service
67
+ end
68
+
69
+ ##
70
+ # The BigQuery project connected to.
71
+ #
72
+ # @example
73
+ # require "google/cloud/bigquery"
74
+ #
75
+ # bigquery = Google::Cloud::Bigquery.new(
76
+ # project_id: "my-project",
77
+ # credentials: "/path/to/keyfile.json"
78
+ # )
79
+ #
80
+ # bigquery.project_id #=> "my-project"
81
+ #
82
+ def project_id
83
+ service.project
84
+ end
85
+ alias project project_id
86
+
87
+ ##
88
+ # The email address of the service account for the project used to
89
+ # connect to BigQuery. (See also {#project_id}.)
90
+ #
91
+ # @return [String] The service account email address.
92
+ #
93
+ def service_account_email
94
+ @service_account_email ||= service.project_service_account.email
95
+ end
96
+
97
+ ##
98
+ # Copies the data from the source table to the destination table using
99
+ # an asynchronous method. In this method, a {CopyJob} is immediately
100
+ # returned. The caller may poll the service by repeatedly calling
101
+ # {Job#reload!} and {Job#done?} to detect when the job is done, or
102
+ # simply block until the job is done by calling #{Job#wait_until_done!}.
103
+ # See {#copy} for the synchronous version. Use this method instead of
104
+ # {Table#copy_job} to copy from source tables in other projects.
105
+ #
106
+ # The geographic location for the job ("US", "EU", etc.) can be set via
107
+ # {CopyJob::Updater#location=} in a block passed to this method.
108
+ #
109
+ # @param [String, Table] source_table The source table for the
110
+ # copied data. This can be a table object; or a string ID as specified
111
+ # by the [Standard SQL Query
112
+ # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
113
+ # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
114
+ # Reference](https://cloud.google.com/bigquery/query-reference#from)
115
+ # (`project-name:dataset_id.table_id`).
116
+ # @param [String, Table] destination_table The destination table for the
117
+ # copied data. This can be a table object; or a string ID as specified
118
+ # by the [Standard SQL Query
119
+ # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
120
+ # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
121
+ # Reference](https://cloud.google.com/bigquery/query-reference#from)
122
+ # (`project-name:dataset_id.table_id`).
123
+ # @param [String] create Specifies whether the job is allowed to create
124
+ # new tables. The default value is `needed`.
125
+ #
126
+ # The following values are supported:
127
+ #
128
+ # * `needed` - Create the table if it does not exist.
129
+ # * `never` - The table must already exist. A 'notFound' error is
130
+ # raised if the table does not exist.
131
+ # @param [String] write Specifies how to handle data already present in
132
+ # the destination table. The default value is `empty`.
133
+ #
134
+ # The following values are supported:
135
+ #
136
+ # * `truncate` - BigQuery overwrites the table data.
137
+ # * `append` - BigQuery appends the data to the table.
138
+ # * `empty` - An error will be returned if the destination table
139
+ # already contains data.
140
+ # @param [String] job_id A user-defined ID for the copy job. The ID
141
+ # must contain only letters (a-z, A-Z), numbers (0-9), underscores
142
+ # (_), or dashes (-). The maximum length is 1,024 characters. If
143
+ # `job_id` is provided, then `prefix` will not be used.
144
+ #
145
+ # See [Generating a job
146
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
147
+ # @param [String] prefix A string, usually human-readable, that will be
148
+ # prepended to a generated value to produce a unique job ID. For
149
+ # example, the prefix `daily_import_job_` can be given to generate a
150
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
151
+ # prefix must contain only letters (a-z, A-Z), numbers (0-9),
152
+ # underscores (_), or dashes (-). The maximum length of the entire ID
153
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
154
+ # be used.
155
+ # @param [Hash] labels A hash of user-provided labels associated with
156
+ # the job. You can use these to organize and group your jobs. Label
157
+ # keys and values can be no longer than 63 characters, can only
158
+ # contain lowercase letters, numeric characters, underscores and
159
+ # dashes. International characters are allowed. Label values are
160
+ # optional. Label keys must start with a letter and each label in the
161
+ # list must have a different key. See [Requirements for
162
+ # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
163
+ # @yield [job] a job configuration object
164
+ # @yieldparam [Google::Cloud::Bigquery::CopyJob::Updater] job a job
165
+ # configuration object for setting additional options.
166
+ #
167
+ # @return [Google::Cloud::Bigquery::CopyJob]
168
+ #
169
+ # @example
170
+ # require "google/cloud/bigquery"
171
+ #
172
+ # bigquery = Google::Cloud::Bigquery.new
173
+ # dataset = bigquery.dataset "my_dataset"
174
+ # source_table_id = "bigquery-public-data.samples.shakespeare"
175
+ # destination_table = dataset.table "my_destination_table"
176
+ #
177
+ # copy_job = bigquery.copy_job source_table_id, destination_table
178
+ #
179
+ # copy_job.wait_until_done!
180
+ # copy_job.done? #=> true
181
+ #
182
+ # @!group Data
183
+ #
184
+ def copy_job source_table, destination_table, create: nil, write: nil, job_id: nil, prefix: nil, labels: nil
185
+ ensure_service!
186
+ options = { create: create, write: write, labels: labels, job_id: job_id, prefix: prefix }
187
+
188
+ updater = CopyJob::Updater.from_options(
189
+ service,
190
+ Service.get_table_ref(source_table, default_ref: project_ref),
191
+ Service.get_table_ref(destination_table, default_ref: project_ref),
192
+ options
193
+ )
194
+
195
+ yield updater if block_given?
196
+
197
+ job_gapi = updater.to_gapi
198
+ gapi = service.copy_table job_gapi
199
+ Job.from_gapi gapi, service
200
+ end
201
+
202
+ ##
203
+ # Copies the data from the source table to the destination table using a
204
+ # synchronous method that blocks for a response. Timeouts and transient
205
+ # errors are generally handled as needed to complete the job. See
206
+ # {#copy_job} for the asynchronous version. Use this method instead of
207
+ # {Table#copy} to copy from source tables in other projects.
208
+ #
209
+ # The geographic location for the job ("US", "EU", etc.) can be set via
210
+ # {CopyJob::Updater#location=} in a block passed to this method.
211
+ #
212
+ # @param [String, Table] source_table The source table for the
213
+ # copied data. This can be a table object; or a string ID as specified
214
+ # by the [Standard SQL Query
215
+ # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
216
+ # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
217
+ # Reference](https://cloud.google.com/bigquery/query-reference#from)
218
+ # (`project-name:dataset_id.table_id`).
219
+ # @param [String, Table] destination_table The destination table for the
220
+ # copied data. This can be a table object; or a string ID as specified
221
+ # by the [Standard SQL Query
222
+ # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
223
+ # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
224
+ # Reference](https://cloud.google.com/bigquery/query-reference#from)
225
+ # (`project-name:dataset_id.table_id`).
226
+ # @param [String] create Specifies whether the job is allowed to create
227
+ # new tables. The default value is `needed`.
228
+ #
229
+ # The following values are supported:
230
+ #
231
+ # * `needed` - Create the table if it does not exist.
232
+ # * `never` - The table must already exist. A 'notFound' error is
233
+ # raised if the table does not exist.
234
+ # @param [String] write Specifies how to handle data already present in
235
+ # the destination table. The default value is `empty`.
236
+ #
237
+ # The following values are supported:
238
+ #
239
+ # * `truncate` - BigQuery overwrites the table data.
240
+ # * `append` - BigQuery appends the data to the table.
241
+ # * `empty` - An error will be returned if the destination table
242
+ # already contains data.
243
+ # @yield [job] a job configuration object
244
+ # @yieldparam [Google::Cloud::Bigquery::CopyJob::Updater] job a job
245
+ # configuration object for setting additional options.
246
+ #
247
+ # @return [Boolean] Returns `true` if the copy operation succeeded.
248
+ #
249
+ # @example
250
+ # require "google/cloud/bigquery"
251
+ #
252
+ # bigquery = Google::Cloud::Bigquery.new
253
+ # dataset = bigquery.dataset "my_dataset"
254
+ # destination_table = dataset.table "my_destination_table"
255
+ #
256
+ # bigquery.copy "bigquery-public-data.samples.shakespeare",
257
+ # destination_table
258
+ #
259
+ # @!group Data
260
+ #
261
+ def copy source_table, destination_table, create: nil, write: nil, &block
262
+ job = copy_job source_table, destination_table, create: create, write: write, &block
263
+ job.wait_until_done!
264
+ ensure_job_succeeded! job
265
+ true
266
+ end
267
+
268
+ ##
269
+ # Queries data by creating a [query
270
+ # job](https://cloud.google.com/bigquery/docs/query-overview#query_jobs).
271
+ #
272
+ # The geographic location for the job ("US", "EU", etc.) can be set via
273
+ # {QueryJob::Updater#location=} in a block passed to this method.
274
+ #
275
+ # @param [String] query A query string, following the BigQuery [query
276
+ # syntax](https://cloud.google.com/bigquery/query-reference), of the
277
+ # query to execute. Example: "SELECT count(f1) FROM
278
+ # [myProjectId:myDatasetId.myTableId]".
279
+ # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
280
+ # either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
281
+ # query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
282
+ # use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
283
+ # true.
284
+ #
285
+ # Ruby types are mapped to BigQuery types as follows:
286
+ #
287
+ # | BigQuery | Ruby | Notes |
288
+ # |-------------|--------------------------------------|------------------------------------------------|
289
+ # | `BOOL` | `true`/`false` | |
290
+ # | `INT64` | `Integer` | |
291
+ # | `FLOAT64` | `Float` | |
292
+ # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
293
+ # | `STRING` | `String` | |
294
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
295
+ # | `DATE` | `Date` | |
296
+ # | `TIMESTAMP` | `Time` | |
297
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
298
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
299
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
300
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
301
+ #
302
+ # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
303
+ # of each BigQuery data type, including allowed values.
304
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always to
305
+ # infer the right SQL type from a value in `params`. In these cases, `types` must be used to specify the SQL
306
+ # type for these values.
307
+ #
308
+ # Must match the value type passed to `params`. This must be an `Array` when the query uses positional query
309
+ # parameters. This must be an `Hash` when the query uses named query parameters. The values should be BigQuery
310
+ # type codes from the following list:
311
+ #
312
+ # * `:BOOL`
313
+ # * `:INT64`
314
+ # * `:FLOAT64`
315
+ # * `:NUMERIC`
316
+ # * `:STRING`
317
+ # * `:DATETIME`
318
+ # * `:DATE`
319
+ # * `:TIMESTAMP`
320
+ # * `:TIME`
321
+ # * `:BYTES`
322
+ # * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
323
+ # are specified as `[:INT64]`.
324
+ # * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
325
+ # match the `params` hash, and the values are the types value that matches the data.
326
+ #
327
+ # Types are optional.
328
+ # @param [Hash<String|Symbol, External::DataSource>] external A Hash
329
+ # that represents the mapping of the external tables to the table
330
+ # names used in the SQL query. The hash keys are the table names, and
331
+ # the hash values are the external table objects. See {Project#query}.
332
+ # @param [String] priority Specifies a priority for the query. Possible
333
+ # values include `INTERACTIVE` and `BATCH`. The default value is
334
+ # `INTERACTIVE`.
335
+ # @param [Boolean] cache Whether to look for the result in the query
336
+ # cache. The query cache is a best-effort cache that will be flushed
337
+ # whenever tables in the query are modified. The default value is
338
+ # true. For more information, see [query
339
+ # caching](https://developers.google.com/bigquery/querying-data).
340
+ # @param [Table] table The destination table where the query results
341
+ # should be stored. If not present, a new table will be created to
342
+ # store the results.
343
+ # @param [String] create Specifies whether the job is allowed to create
344
+ # new tables. The default value is `needed`.
345
+ #
346
+ # The following values are supported:
347
+ #
348
+ # * `needed` - Create the table if it does not exist.
349
+ # * `never` - The table must already exist. A 'notFound' error is
350
+ # raised if the table does not exist.
351
+ # @param [String] write Specifies the action that occurs if the
352
+ # destination table already exists. The default value is `empty`.
353
+ #
354
+ # The following values are supported:
355
+ #
356
+ # * `truncate` - BigQuery overwrites the table data.
357
+ # * `append` - BigQuery appends the data to the table.
358
+ # * `empty` - A 'duplicate' error is returned in the job result if the
359
+ # table exists and contains data.
360
+ # @param [Boolean] dryrun If set to true, BigQuery doesn't run the job.
361
+ # Instead, if the query is valid, BigQuery returns statistics about
362
+ # the job such as how many bytes would be processed. If the query is
363
+ # invalid, an error returns. The default value is false.
364
+ # @param [Dataset, String] dataset The default dataset to use for
365
+ # unqualified table names in the query. Optional.
366
+ # @param [String] project Specifies the default projectId to assume for
367
+ # any unqualified table names in the query. Only used if `dataset`
368
+ # option is set.
369
+ # @param [Boolean] standard_sql Specifies whether to use BigQuery's
370
+ # [standard
371
+ # SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
372
+ # dialect for this query. If set to true, the query will use standard
373
+ # SQL rather than the [legacy
374
+ # SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
375
+ # dialect. Optional. The default value is true.
376
+ # @param [Boolean] legacy_sql Specifies whether to use BigQuery's
377
+ # [legacy
378
+ # SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
379
+ # dialect for this query. If set to false, the query will use
380
+ # BigQuery's [standard
381
+ # SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
382
+ # dialect. Optional. The default value is false.
383
+ # @param [Boolean] large_results This option is specific to Legacy SQL.
384
+ # If `true`, allows the query to produce arbitrarily large result
385
+ # tables at a slight cost in performance. Requires `table` parameter
386
+ # to be set.
387
+ # @param [Boolean] flatten This option is specific to Legacy SQL.
388
+ # Flattens all nested and repeated fields in the query results. The
389
+ # default value is `true`. `large_results` parameter must be `true` if
390
+ # this is set to `false`.
391
+ # @param [Integer] maximum_bytes_billed Limits the bytes billed for this
392
+ # job. Queries that will have bytes billed beyond this limit will fail
393
+ # (without incurring a charge). Optional. If unspecified, this will be
394
+ # set to your project default.
395
+ # @param [String] job_id A user-defined ID for the query job. The ID
396
+ # must contain only letters (a-z, A-Z), numbers (0-9), underscores
397
+ # (_), or dashes (-). The maximum length is 1,024 characters. If
398
+ # `job_id` is provided, then `prefix` will not be used.
399
+ #
400
+ # See [Generating a job
401
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
402
+ # @param [String] prefix A string, usually human-readable, that will be
403
+ # prepended to a generated value to produce a unique job ID. For
404
+ # example, the prefix `daily_import_job_` can be given to generate a
405
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
406
+ # prefix must contain only letters (a-z, A-Z), numbers (0-9),
407
+ # underscores (_), or dashes (-). The maximum length of the entire ID
408
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
409
+ # be used.
410
+ #
411
+ # See [Generating a job
412
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
413
+ # @param [Hash] labels A hash of user-provided labels associated with
414
+ # the job. You can use these to organize and group your jobs. Label
415
+ # keys and values can be no longer than 63 characters, can only
416
+ # contain lowercase letters, numeric characters, underscores and
417
+ # dashes. International characters are allowed. Label values are
418
+ # optional. Label keys must start with a letter and each label in the
419
+ # list must have a different key. See [Requirements for
420
+ # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
421
+ # @param [Array<String>, String] udfs User-defined function resources
422
+ # used in a legacy SQL query. May be either a code resource to load from
423
+ # a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
424
+ # that contains code for a user-defined function (UDF). Providing an
425
+ # inline code resource is equivalent to providing a URI for a file
426
+ # containing the same code.
427
+ #
428
+ # This parameter is used for defining User Defined Function (UDF)
429
+ # resources only when using legacy SQL. Users of standard SQL should
430
+ # leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
431
+ # Routines API to define UDF resources.
432
+ #
433
+ # For additional information on migrating, see: [Migrating to
434
+ # standard SQL - Differences in user-defined JavaScript
435
+ # functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
436
+ # @param [Integer] maximum_billing_tier Deprecated: Change the billing
437
+ # tier to allow high-compute queries.
438
+ # @yield [job] a job configuration object
439
+ # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
440
+ # configuration object for setting query options.
441
+ #
442
+ # @return [Google::Cloud::Bigquery::QueryJob]
443
+ #
444
+ # @example Query using standard SQL:
445
+ # require "google/cloud/bigquery"
446
+ #
447
+ # bigquery = Google::Cloud::Bigquery.new
448
+ #
449
+ # job = bigquery.query_job "SELECT name FROM " \
450
+ # "`my_project.my_dataset.my_table`"
451
+ #
452
+ # job.wait_until_done!
453
+ # if !job.failed?
454
+ # job.data.each do |row|
455
+ # puts row[:name]
456
+ # end
457
+ # end
458
+ #
459
+ # @example Query using legacy SQL:
460
+ # require "google/cloud/bigquery"
461
+ #
462
+ # bigquery = Google::Cloud::Bigquery.new
463
+ #
464
+ # job = bigquery.query_job "SELECT name FROM " \
465
+ # " [my_project:my_dataset.my_table]",
466
+ # legacy_sql: true
467
+ #
468
+ # job.wait_until_done!
469
+ # if !job.failed?
470
+ # job.data.each do |row|
471
+ # puts row[:name]
472
+ # end
473
+ # end
474
+ #
475
+ # @example Query using positional query parameters:
476
+ # require "google/cloud/bigquery"
477
+ #
478
+ # bigquery = Google::Cloud::Bigquery.new
479
+ #
480
+ # job = bigquery.query_job "SELECT name FROM " \
481
+ # "`my_dataset.my_table` " \
482
+ # "WHERE id = ?",
483
+ # params: [1]
484
+ #
485
+ # job.wait_until_done!
486
+ # if !job.failed?
487
+ # job.data.each do |row|
488
+ # puts row[:name]
489
+ # end
490
+ # end
491
+ #
492
+ # @example Query using named query parameters:
493
+ # require "google/cloud/bigquery"
494
+ #
495
+ # bigquery = Google::Cloud::Bigquery.new
496
+ #
497
+ # job = bigquery.query_job "SELECT name FROM " \
498
+ # "`my_dataset.my_table` " \
499
+ # "WHERE id = @id",
500
+ # params: { id: 1 }
501
+ #
502
+ # job.wait_until_done!
503
+ # if !job.failed?
504
+ # job.data.each do |row|
505
+ # puts row[:name]
506
+ # end
507
+ # end
508
+ #
509
+ # @example Query using named query parameters with types:
510
+ # require "google/cloud/bigquery"
511
+ #
512
+ # bigquery = Google::Cloud::Bigquery.new
513
+ #
514
+ # job = bigquery.query_job "SELECT name FROM " \
515
+ # "`my_dataset.my_table` " \
516
+ # "WHERE id IN UNNEST(@ids)",
517
+ # params: { ids: [] },
518
+ # types: { ids: [:INT64] }
519
+ #
520
+ # job.wait_until_done!
521
+ # if !job.failed?
522
+ # job.data.each do |row|
523
+ # puts row[:name]
524
+ # end
525
+ # end
526
+ #
527
+ # @example Execute a DDL statement:
528
+ # require "google/cloud/bigquery"
529
+ #
530
+ # bigquery = Google::Cloud::Bigquery.new
531
+ #
532
+ # job = bigquery.query_job "CREATE TABLE " \
533
+ # "`my_dataset.my_table` " \
534
+ # "(x INT64)"
535
+ #
536
+ # job.wait_until_done!
537
+ # if !job.failed?
538
+ # table_ref = job.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
539
+ # end
540
+ #
541
+ # @example Execute a DML statement:
542
+ # require "google/cloud/bigquery"
543
+ #
544
+ # bigquery = Google::Cloud::Bigquery.new
545
+ #
546
+ # job = bigquery.query_job "UPDATE " \
547
+ # "`my_dataset.my_table` " \
548
+ # "SET x = x + 1 " \
549
+ # "WHERE x IS NOT NULL"
550
+ #
551
+ # job.wait_until_done!
552
+ # if !job.failed?
553
+ # puts job.num_dml_affected_rows
554
+ # end
555
+ #
556
+ # @example Query using external data source, set destination:
557
+ # require "google/cloud/bigquery"
558
+ #
559
+ # bigquery = Google::Cloud::Bigquery.new
560
+ #
561
+ # csv_url = "gs://bucket/path/to/data.csv"
562
+ # csv_table = bigquery.external csv_url do |csv|
563
+ # csv.autodetect = true
564
+ # csv.skip_leading_rows = 1
565
+ # end
566
+ #
567
+ # job = bigquery.query_job "SELECT * FROM my_ext_table" do |query|
568
+ # query.external = { my_ext_table: csv_table }
569
+ # dataset = bigquery.dataset "my_dataset", skip_lookup: true
570
+ # query.table = dataset.table "my_table", skip_lookup: true
571
+ # end
572
+ #
573
+ # job.wait_until_done!
574
+ # if !job.failed?
575
+ # job.data.each do |row|
576
+ # puts row[:name]
577
+ # end
578
+ # end
579
+ #
580
+ def query_job query, params: nil, types: nil, external: nil, priority: "INTERACTIVE", cache: true, table: nil,
581
+ create: nil, write: nil, dryrun: nil, dataset: nil, project: nil, standard_sql: nil,
582
+ legacy_sql: nil, large_results: nil, flatten: nil, maximum_billing_tier: nil,
583
+ maximum_bytes_billed: nil, job_id: nil, prefix: nil, labels: nil, udfs: nil
584
+ ensure_service!
585
+ options = { params: params, types: types, external: external, priority: priority, cache: cache, table: table,
586
+ create: create, write: write, dryrun: dryrun, dataset: dataset,
587
+ project: (project || self.project), standard_sql: standard_sql, legacy_sql: legacy_sql,
588
+ large_results: large_results, flatten: flatten, maximum_billing_tier: maximum_billing_tier,
589
+ maximum_bytes_billed: maximum_bytes_billed, job_id: job_id, prefix: prefix, labels: labels,
590
+ udfs: udfs }
591
+
592
+ updater = QueryJob::Updater.from_options service, query, options
593
+
594
+ yield updater if block_given?
595
+
596
+ gapi = service.query_job updater.to_gapi
597
+ Job.from_gapi gapi, service
598
+ end
599
+
600
+ ##
601
+ # Queries data and waits for the results. In this method, a {QueryJob}
602
+ # is created and its results are saved to a temporary table, then read
603
+ # from the table. Timeouts and transient errors are generally handled
604
+ # as needed to complete the query. When used for executing DDL/DML
605
+ # statements, this method does not return row data.
606
+ #
607
+ # The geographic location for the job ("US", "EU", etc.) can be set via
608
+ # {QueryJob::Updater#location=} in a block passed to this method.
609
+ #
610
+ # @see https://cloud.google.com/bigquery/querying-data Querying Data
611
+ #
612
+ # @param [String] query A query string, following the BigQuery [query
613
+ # syntax](https://cloud.google.com/bigquery/query-reference), of the
614
+ # query to execute. Example: "SELECT count(f1) FROM
615
+ # [myProjectId:myDatasetId.myTableId]".
616
+ # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
617
+ # either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
618
+ # query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
619
+ # use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
620
+ # true.
621
+ #
622
+ # Ruby types are mapped to BigQuery types as follows:
623
+ #
624
+ # | BigQuery | Ruby | Notes |
625
+ # |-------------|--------------------------------------|------------------------------------------------|
626
+ # | `BOOL` | `true`/`false` | |
627
+ # | `INT64` | `Integer` | |
628
+ # | `FLOAT64` | `Float` | |
629
+ # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
630
+ # | `STRING` | `String` | |
631
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
632
+ # | `DATE` | `Date` | |
633
+ # | `TIMESTAMP` | `Time` | |
634
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
635
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
636
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
637
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
638
+ #
639
+ # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
640
+ # of each BigQuery data type, including allowed values.
641
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always to
642
+ # infer the right SQL type from a value in `params`. In these cases, `types` must be used to specify the SQL
643
+ # type for these values.
644
+ #
645
+ # Must match the value type passed to `params`. This must be an `Array` when the query uses positional query
646
+ # parameters. This must be an `Hash` when the query uses named query parameters. The values should be BigQuery
647
+ # type codes from the following list:
648
+ #
649
+ # * `:BOOL`
650
+ # * `:INT64`
651
+ # * `:FLOAT64`
652
+ # * `:NUMERIC`
653
+ # * `:STRING`
654
+ # * `:DATETIME`
655
+ # * `:DATE`
656
+ # * `:TIMESTAMP`
657
+ # * `:TIME`
658
+ # * `:BYTES`
659
+ # * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
660
+ # are specified as `[:INT64]`.
661
+ # * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
662
+ # match the `params` hash, and the values are the types value that matches the data.
663
+ #
664
+ # Types are optional.
665
+ # @param [Hash<String|Symbol, External::DataSource>] external A Hash
666
+ # that represents the mapping of the external tables to the table
667
+ # names used in the SQL query. The hash keys are the table names, and
668
+ # the hash values are the external table objects. See {Project#query}.
669
+ # @param [Integer] max The maximum number of rows of data to return per
670
+ # page of results. Setting this flag to a small value such as 1000 and
671
+ # then paging through results might improve reliability when the query
672
+ # result set is large. In addition to this limit, responses are also
673
+ # limited to 10 MB. By default, there is no maximum row count, and
674
+ # only the byte limit applies.
675
+ # @param [Boolean] cache Whether to look for the result in the query
676
+ # cache. The query cache is a best-effort cache that will be flushed
677
+ # whenever tables in the query are modified. The default value is
678
+ # true. For more information, see [query
679
+ # caching](https://developers.google.com/bigquery/querying-data).
680
+ # @param [String] dataset Specifies the default datasetId and projectId
681
+ # to assume for any unqualified table names in the query. If not set,
682
+ # all table names in the query string must be qualified in the format
683
+ # 'datasetId.tableId'.
684
+ # @param [String] project Specifies the default projectId to assume for
685
+ # any unqualified table names in the query. Only used if `dataset`
686
+ # option is set.
687
+ # @param [Boolean] standard_sql Specifies whether to use BigQuery's
688
+ # [standard
689
+ # SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
690
+ # dialect for this query. If set to true, the query will use standard
691
+ # SQL rather than the [legacy
692
+ # SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
693
+ # dialect. When set to true, the values of `large_results` and
694
+ # `flatten` are ignored; the query will be run as if `large_results`
695
+ # is true and `flatten` is false. Optional. The default value is
696
+ # true.
697
+ # @param [Boolean] legacy_sql Specifies whether to use BigQuery's
698
+ # [legacy
699
+ # SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
700
+ # dialect for this query. If set to false, the query will use
701
+ # BigQuery's [standard
702
+ # SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
703
+ # When set to false, the values of `large_results` and `flatten` are
704
+ # ignored; the query will be run as if `large_results` is true and
705
+ # `flatten` is false. Optional. The default value is false.
706
+ # @yield [job] a job configuration object
707
+ # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
708
+ # configuration object for setting additional options for the query.
709
+ #
710
+ # @return [Google::Cloud::Bigquery::Data]
711
+ #
712
+ # @example Query using standard SQL:
713
+ # require "google/cloud/bigquery"
714
+ #
715
+ # bigquery = Google::Cloud::Bigquery.new
716
+ #
717
+ # sql = "SELECT name FROM `my_project.my_dataset.my_table`"
718
+ # data = bigquery.query sql
719
+ #
720
+ # # Iterate over the first page of results
721
+ # data.each do |row|
722
+ # puts row[:name]
723
+ # end
724
+ # # Retrieve the next page of results
725
+ # data = data.next if data.next?
726
+ #
727
+ # @example Query using legacy SQL:
728
+ # require "google/cloud/bigquery"
729
+ #
730
+ # bigquery = Google::Cloud::Bigquery.new
731
+ #
732
+ # sql = "SELECT name FROM [my_project:my_dataset.my_table]"
733
+ # data = bigquery.query sql, legacy_sql: true
734
+ #
735
+ # # Iterate over the first page of results
736
+ # data.each do |row|
737
+ # puts row[:name]
738
+ # end
739
+ # # Retrieve the next page of results
740
+ # data = data.next if data.next?
741
+ #
742
+ # @example Retrieve all rows: (See {Data#all})
743
+ # require "google/cloud/bigquery"
744
+ #
745
+ # bigquery = Google::Cloud::Bigquery.new
746
+ #
747
+ # data = bigquery.query "SELECT name FROM `my_dataset.my_table`"
748
+ #
749
+ # data.all do |row|
750
+ # puts row[:name]
751
+ # end
752
+ #
753
+ # @example Query using positional query parameters:
754
+ # require "google/cloud/bigquery"
755
+ #
756
+ # bigquery = Google::Cloud::Bigquery.new
757
+ #
758
+ # data = bigquery.query "SELECT name " \
759
+ # "FROM `my_dataset.my_table` " \
760
+ # "WHERE id = ?",
761
+ # params: [1]
762
+ #
763
+ # # Iterate over the first page of results
764
+ # data.each do |row|
765
+ # puts row[:name]
766
+ # end
767
+ # # Retrieve the next page of results
768
+ # data = data.next if data.next?
769
+ #
770
+ # @example Query using named query parameters:
771
+ # require "google/cloud/bigquery"
772
+ #
773
+ # bigquery = Google::Cloud::Bigquery.new
774
+ #
775
+ # data = bigquery.query "SELECT name " \
776
+ # "FROM `my_dataset.my_table` " \
777
+ # "WHERE id = @id",
778
+ # params: { id: 1 }
779
+ #
780
+ # # Iterate over the first page of results
781
+ # data.each do |row|
782
+ # puts row[:name]
783
+ # end
784
+ # # Retrieve the next page of results
785
+ # data = data.next if data.next?
786
+ #
787
+ # @example Query using named query parameters with types:
788
+ # require "google/cloud/bigquery"
789
+ #
790
+ # bigquery = Google::Cloud::Bigquery.new
791
+ #
792
+ # data = bigquery.query "SELECT name FROM " \
793
+ # "`my_dataset.my_table` " \
794
+ # "WHERE id IN UNNEST(@ids)",
795
+ # params: { ids: [] },
796
+ # types: { ids: [:INT64] }
797
+ #
798
+ # # Iterate over the first page of results
799
+ # data.each do |row|
800
+ # puts row[:name]
801
+ # end
802
+ # # Retrieve the next page of results
803
+ # data = data.next if data.next?
804
+ #
805
+ # @example Execute a DDL statement:
806
+ # require "google/cloud/bigquery"
807
+ #
808
+ # bigquery = Google::Cloud::Bigquery.new
809
+ #
810
+ # data = bigquery.query "CREATE TABLE `my_dataset.my_table` (x INT64)"
811
+ #
812
+ # table_ref = data.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
813
+ #
814
+ # @example Execute a DML statement:
815
+ # require "google/cloud/bigquery"
816
+ #
817
+ # bigquery = Google::Cloud::Bigquery.new
818
+ #
819
+ # data = bigquery.query "UPDATE `my_dataset.my_table` " \
820
+ # "SET x = x + 1 " \
821
+ # "WHERE x IS NOT NULL"
822
+ #
823
+ # puts data.num_dml_affected_rows
824
+ #
825
+ # @example Query using external data source, set destination:
826
+ # require "google/cloud/bigquery"
827
+ #
828
+ # bigquery = Google::Cloud::Bigquery.new
829
+ #
830
+ # csv_url = "gs://bucket/path/to/data.csv"
831
+ # csv_table = bigquery.external csv_url do |csv|
832
+ # csv.autodetect = true
833
+ # csv.skip_leading_rows = 1
834
+ # end
835
+ #
836
+ # data = bigquery.query "SELECT * FROM my_ext_table" do |query|
837
+ # query.external = { my_ext_table: csv_table }
838
+ # dataset = bigquery.dataset "my_dataset", skip_lookup: true
839
+ # query.table = dataset.table "my_table", skip_lookup: true
840
+ # end
841
+ #
842
+ # # Iterate over the first page of results
843
+ # data.each do |row|
844
+ # puts row[:name]
845
+ # end
846
+ # # Retrieve the next page of results
847
+ # data = data.next if data.next?
848
+ #
849
+ def query query, params: nil, types: nil, external: nil, max: nil, cache: true, dataset: nil, project: nil,
850
+ standard_sql: nil, legacy_sql: nil, &block
851
+ job = query_job query, params: params, types: types, external: external, cache: cache, dataset: dataset,
852
+ project: project, standard_sql: standard_sql, legacy_sql: legacy_sql, &block
853
+ job.wait_until_done!
854
+
855
+ if job.failed?
856
+ begin
857
+ # raise to activate ruby exception cause handling
858
+ raise job.gapi_error
859
+ rescue StandardError => e
860
+ # wrap Google::Apis::Error with Google::Cloud::Error
861
+ raise Google::Cloud::Error.from_error(e)
862
+ end
863
+ end
864
+
865
+ job.data max: max
866
+ end
867
+
868
+ ##
869
+ # Creates a new External::DataSource (or subclass) object that
870
+ # represents the external data source that can be queried from directly,
871
+ # even though the data is not stored in BigQuery. Instead of loading or
872
+ # streaming the data, this object references the external data source.
873
+ #
874
+ # @see https://cloud.google.com/bigquery/external-data-sources Querying
875
+ # External Data Sources
876
+ #
877
+ # @param [String, Array<String>] url The fully-qualified URL(s) that
878
+ # point to your data in Google Cloud. An attempt will be made to
879
+ # derive the format from the URLs provided.
880
+ # @param [String|Symbol] format The data format. This value will be used
881
+ # even if the provided URLs are recognized as a different format.
882
+ # Optional.
883
+ #
884
+ # The following values are supported:
885
+ #
886
+ # * `csv` - CSV
887
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
888
+ # * `avro` - [Avro](http://avro.apache.org/)
889
+ # * `sheets` - Google Sheets
890
+ # * `datastore_backup` - Cloud Datastore backup
891
+ # * `bigtable` - Bigtable
892
+ #
893
+ # @return [External::DataSource] External data source.
894
+ #
895
+ # @example
896
+ # require "google/cloud/bigquery"
897
+ #
898
+ # bigquery = Google::Cloud::Bigquery.new
899
+ #
900
+ # csv_url = "gs://bucket/path/to/data.csv"
901
+ # csv_table = bigquery.external csv_url do |csv|
902
+ # csv.autodetect = true
903
+ # csv.skip_leading_rows = 1
904
+ # end
905
+ #
906
+ # data = bigquery.query "SELECT * FROM my_ext_table",
907
+ # external: { my_ext_table: csv_table }
908
+ #
909
+ # # Iterate over the first page of results
910
+ # data.each do |row|
911
+ # puts row[:name]
912
+ # end
913
+ # # Retrieve the next page of results
914
+ # data = data.next if data.next?
915
+ #
916
+ def external url, format: nil
917
+ ext = External.from_urls url, format
918
+ yield ext if block_given?
919
+ ext
920
+ end
921
+
922
+ ##
923
+ # Retrieves an existing dataset by ID.
924
+ #
925
+ # @param [String] dataset_id The ID of a dataset.
926
+ # @param [Boolean] skip_lookup Optionally create just a local reference
927
+ # object without verifying that the resource exists on the BigQuery
928
+ # service. Calls made on this object will raise errors if the resource
929
+ # does not exist. Default is `false`. Optional.
930
+ #
931
+ # @return [Google::Cloud::Bigquery::Dataset, nil] Returns `nil` if the
932
+ # dataset does not exist.
933
+ #
934
+ # @example
935
+ # require "google/cloud/bigquery"
936
+ #
937
+ # bigquery = Google::Cloud::Bigquery.new
938
+ #
939
+ # dataset = bigquery.dataset "my_dataset"
940
+ # puts dataset.name
941
+ #
942
+ # @example Avoid retrieving the dataset resource with `skip_lookup`:
943
+ # require "google/cloud/bigquery"
944
+ #
945
+ # bigquery = Google::Cloud::Bigquery.new
946
+ #
947
+ # dataset = bigquery.dataset "my_dataset", skip_lookup: true
948
+ #
949
+ def dataset dataset_id, skip_lookup: nil
950
+ ensure_service!
951
+ return Dataset.new_reference project, dataset_id, service if skip_lookup
952
+ gapi = service.get_dataset dataset_id
953
+ Dataset.from_gapi gapi, service
954
+ rescue Google::Cloud::NotFoundError
955
+ nil
956
+ end
957
+
958
+ ##
959
+ # Creates a new dataset.
960
+ #
961
+ # @param [String] dataset_id A unique ID for this dataset, without the
962
+ # project name. The ID must contain only letters (a-z, A-Z), numbers
963
+ # (0-9), or underscores (_). The maximum length is 1,024 characters.
964
+ # @param [String] name A descriptive name for the dataset.
965
+ # @param [String] description A user-friendly description of the
966
+ # dataset.
967
+ # @param [Integer] expiration The default lifetime of all tables in the
968
+ # dataset, in milliseconds. The minimum value is 3600000 milliseconds
969
+ # (one hour).
970
+ # @param [String] location The geographic location where the dataset
971
+ # should reside. Possible values include `EU` and `US`. The default
972
+ # value is `US`.
973
+ # @yield [access] a block for setting rules
974
+ # @yieldparam [Google::Cloud::Bigquery::Dataset] access the object
975
+ # accepting rules
976
+ #
977
+ # @return [Google::Cloud::Bigquery::Dataset]
978
+ #
979
+ # @example
980
+ # require "google/cloud/bigquery"
981
+ #
982
+ # bigquery = Google::Cloud::Bigquery.new
983
+ #
984
+ # dataset = bigquery.create_dataset "my_dataset"
985
+ #
986
+ # @example A name and description can be provided:
987
+ # require "google/cloud/bigquery"
988
+ #
989
+ # bigquery = Google::Cloud::Bigquery.new
990
+ #
991
+ # dataset = bigquery.create_dataset "my_dataset",
992
+ # name: "My Dataset",
993
+ # description: "This is my Dataset"
994
+ #
995
+ # @example Or, configure access with a block: (See {Dataset::Access})
996
+ # require "google/cloud/bigquery"
997
+ #
998
+ # bigquery = Google::Cloud::Bigquery.new
999
+ #
1000
+ # dataset = bigquery.create_dataset "my_dataset" do |dataset|
1001
+ # dataset.access.add_writer_user "writers@example.com"
1002
+ # end
1003
+ #
1004
+ def create_dataset dataset_id, name: nil, description: nil,
1005
+ expiration: nil, location: nil
1006
+ ensure_service!
1007
+
1008
+ new_ds = Google::Apis::BigqueryV2::Dataset.new(
1009
+ dataset_reference: Google::Apis::BigqueryV2::DatasetReference.new(
1010
+ project_id: project, dataset_id: dataset_id
1011
+ )
1012
+ )
1013
+
1014
+ # Can set location only on creation, no Dataset#location method
1015
+ new_ds.update! location: location unless location.nil?
1016
+
1017
+ updater = Dataset::Updater.new(new_ds).tap do |b|
1018
+ b.name = name unless name.nil?
1019
+ b.description = description unless description.nil?
1020
+ b.default_expiration = expiration unless expiration.nil?
1021
+ end
1022
+
1023
+ if block_given?
1024
+ yield updater
1025
+ updater.check_for_mutated_access!
1026
+ end
1027
+
1028
+ gapi = service.insert_dataset new_ds
1029
+ Dataset.from_gapi gapi, service
1030
+ end
1031
+
1032
+ ##
1033
+ # Retrieves the list of datasets belonging to the project.
1034
+ #
1035
+ # @param [Boolean] all Whether to list all datasets, including hidden
1036
+ # ones. The default is `false`.
1037
+ # @param [String] filter An expression for filtering the results of the
1038
+ # request by label. The syntax is `labels.<name>[:<value>]`.
1039
+ # Multiple filters can be `AND`ed together by connecting with a space.
1040
+ # Example: `labels.department:receiving labels.active`. See [Filtering
1041
+ # datasets using labels](https://cloud.google.com/bigquery/docs/labeling-datasets#filtering_datasets_using_labels).
1042
+ # @param [String] token A previously-returned page token representing
1043
+ # part of the larger set of results to view.
1044
+ # @param [Integer] max Maximum number of datasets to return.
1045
+ #
1046
+ # @return [Array<Google::Cloud::Bigquery::Dataset>] (See
1047
+ # {Google::Cloud::Bigquery::Dataset::List})
1048
+ #
1049
+ # @example
1050
+ # require "google/cloud/bigquery"
1051
+ #
1052
+ # bigquery = Google::Cloud::Bigquery.new
1053
+ #
1054
+ # datasets = bigquery.datasets
1055
+ # datasets.each do |dataset|
1056
+ # puts dataset.name
1057
+ # end
1058
+ #
1059
+ # @example Retrieve hidden datasets with the `all` optional arg:
1060
+ # require "google/cloud/bigquery"
1061
+ #
1062
+ # bigquery = Google::Cloud::Bigquery.new
1063
+ #
1064
+ # all_datasets = bigquery.datasets all: true
1065
+ #
1066
+ # @example Retrieve all datasets: (See {Dataset::List#all})
1067
+ # require "google/cloud/bigquery"
1068
+ #
1069
+ # bigquery = Google::Cloud::Bigquery.new
1070
+ #
1071
+ # datasets = bigquery.datasets
1072
+ # datasets.all do |dataset|
1073
+ # puts dataset.name
1074
+ # end
1075
+ #
1076
+ def datasets all: nil, filter: nil, token: nil, max: nil
1077
+ ensure_service!
1078
+ gapi = service.list_datasets all: all, filter: filter, token: token, max: max
1079
+ Dataset::List.from_gapi gapi, service, all, filter, max
1080
+ end
1081
+
1082
+ ##
1083
+ # Retrieves an existing job by ID.
1084
+ #
1085
+ # @param [String] job_id The ID of a job.
1086
+ # @param [String] location The geographic location where the job was
1087
+ # created. Required except for US and EU.
1088
+ #
1089
+ # @return [Google::Cloud::Bigquery::Job, nil] Returns `nil` if the job
1090
+ # does not exist.
1091
+ #
1092
+ # @example
1093
+ # require "google/cloud/bigquery"
1094
+ #
1095
+ # bigquery = Google::Cloud::Bigquery.new
1096
+ #
1097
+ # job = bigquery.job "my_job"
1098
+ #
1099
+ def job job_id, location: nil
1100
+ ensure_service!
1101
+ gapi = service.get_job job_id, location: location
1102
+ Job.from_gapi gapi, service
1103
+ rescue Google::Cloud::NotFoundError
1104
+ nil
1105
+ end
1106
+
1107
+ ##
1108
+ # Retrieves the list of jobs belonging to the project.
1109
+ #
1110
+ # @param [Boolean] all Whether to display jobs owned by all users in the
1111
+ # project. The default is `false`. Optional.
1112
+ # @param [String] token A previously-returned page token representing
1113
+ # part of the larger set of results to view. Optional.
1114
+ # @param [Integer] max Maximum number of jobs to return. Optional.
1115
+ # @param [String] filter A filter for job state. Optional.
1116
+ #
1117
+ # Acceptable values are:
1118
+ #
1119
+ # * `done` - Finished jobs
1120
+ # * `pending` - Pending jobs
1121
+ # * `running` - Running jobs
1122
+ # @param [Time] min_created_at Min value for {Job#created_at}. When
1123
+ # provided, only jobs created after or at this time are returned.
1124
+ # Optional.
1125
+ # @param [Time] max_created_at Max value for {Job#created_at}. When
1126
+ # provided, only jobs created before or at this time are returned.
1127
+ # Optional.
1128
+ # @param [Google::Cloud::Bigquery::Job, String] parent_job A job
1129
+ # object or a job ID. If set, retrieve only child jobs of the
1130
+ # specified parent. Optional. See {Job#job_id}, {Job#num_child_jobs},
1131
+ # and {Job#parent_job_id}.
1132
+ #
1133
+ # @return [Array<Google::Cloud::Bigquery::Job>] (See
1134
+ # {Google::Cloud::Bigquery::Job::List})
1135
+ #
1136
+ # @example
1137
+ # require "google/cloud/bigquery"
1138
+ #
1139
+ # bigquery = Google::Cloud::Bigquery.new
1140
+ #
1141
+ # jobs = bigquery.jobs
1142
+ # jobs.each do |job|
1143
+ # # process job
1144
+ # end
1145
+ #
1146
+ # @example Retrieve only running jobs using the `filter` optional arg:
1147
+ # require "google/cloud/bigquery"
1148
+ #
1149
+ # bigquery = Google::Cloud::Bigquery.new
1150
+ #
1151
+ # running_jobs = bigquery.jobs filter: "running"
1152
+ # running_jobs.each do |job|
1153
+ # # process job
1154
+ # end
1155
+ #
1156
+ # @example Retrieve only jobs created within provided times:
1157
+ # require "google/cloud/bigquery"
1158
+ #
1159
+ # bigquery = Google::Cloud::Bigquery.new
1160
+ #
1161
+ # two_days_ago = Time.now - 60*60*24*2
1162
+ # three_days_ago = Time.now - 60*60*24*3
1163
+ #
1164
+ # jobs = bigquery.jobs min_created_at: three_days_ago,
1165
+ # max_created_at: two_days_ago
1166
+ # jobs.each do |job|
1167
+ # # process job
1168
+ # end
1169
+ #
1170
+ # @example Retrieve all jobs: (See {Job::List#all})
1171
+ # require "google/cloud/bigquery"
1172
+ #
1173
+ # bigquery = Google::Cloud::Bigquery.new
1174
+ #
1175
+ # jobs = bigquery.jobs
1176
+ # jobs.all do |job|
1177
+ # # process job
1178
+ # end
1179
+ #
1180
+ # @example Retrieve child jobs by setting `parent_job`:
1181
+ # require "google/cloud/bigquery"
1182
+ #
1183
+ # bigquery = Google::Cloud::Bigquery.new
1184
+ #
1185
+ # multi_statement_sql = <<~SQL
1186
+ # -- Declare a variable to hold names as an array.
1187
+ # DECLARE top_names ARRAY<STRING>;
1188
+ # -- Build an array of the top 100 names from the year 2017.
1189
+ # SET top_names = (
1190
+ # SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
1191
+ # FROM `bigquery-public-data.usa_names.usa_1910_current`
1192
+ # WHERE year = 2017
1193
+ # );
1194
+ # -- Which names appear as words in Shakespeare's plays?
1195
+ # SELECT
1196
+ # name AS shakespeare_name
1197
+ # FROM UNNEST(top_names) AS name
1198
+ # WHERE name IN (
1199
+ # SELECT word
1200
+ # FROM `bigquery-public-data.samples.shakespeare`
1201
+ # );
1202
+ # SQL
1203
+ #
1204
+ # job = bigquery.query_job multi_statement_sql
1205
+ #
1206
+ # job.wait_until_done!
1207
+ #
1208
+ # child_jobs = bigquery.jobs parent_job: job
1209
+ #
1210
+ # child_jobs.each do |child_job|
1211
+ # script_statistics = child_job.script_statistics
1212
+ # puts script_statistics.evaluation_kind
1213
+ # script_statistics.stack_frames.each do |stack_frame|
1214
+ # puts stack_frame.text
1215
+ # end
1216
+ # end
1217
+ #
1218
+ def jobs all: nil,
1219
+ token: nil,
1220
+ max: nil,
1221
+ filter: nil,
1222
+ min_created_at: nil,
1223
+ max_created_at: nil,
1224
+ parent_job: nil
1225
+ ensure_service!
1226
+ parent_job = parent_job.job_id if parent_job.is_a? Job
1227
+ options = {
1228
+ parent_job_id: parent_job,
1229
+ all: all,
1230
+ token: token,
1231
+ max: max, filter: filter,
1232
+ min_created_at: min_created_at,
1233
+ max_created_at: max_created_at
1234
+ }
1235
+ gapi = service.list_jobs(**options)
1236
+ Job::List.from_gapi gapi, service, **options
1237
+ end
1238
+
1239
+ ##
1240
+ # Retrieves the list of all projects for which the currently authorized
1241
+ # account has been granted any project role. The returned project
1242
+ # instances share the same credentials as the project used to retrieve
1243
+ # them, but lazily create a new API connection for interactions with the
1244
+ # BigQuery service.
1245
+ #
1246
+ # @param [String] token A previously-returned page token representing
1247
+ # part of the larger set of results to view.
1248
+ # @param [Integer] max Maximum number of projects to return.
1249
+ #
1250
+ # @return [Array<Google::Cloud::Bigquery::Project>] (See
1251
+ # {Google::Cloud::Bigquery::Project::List})
1252
+ #
1253
+ # @example
1254
+ # require "google/cloud/bigquery"
1255
+ #
1256
+ # bigquery = Google::Cloud::Bigquery.new
1257
+ #
1258
+ # projects = bigquery.projects
1259
+ # projects.each do |project|
1260
+ # puts project.name
1261
+ # project.datasets.all.each do |dataset|
1262
+ # puts dataset.name
1263
+ # end
1264
+ # end
1265
+ #
1266
+ # @example Retrieve all projects: (See {Project::List#all})
1267
+ # require "google/cloud/bigquery"
1268
+ #
1269
+ # bigquery = Google::Cloud::Bigquery.new
1270
+ #
1271
+ # projects = bigquery.projects
1272
+ #
1273
+ # projects.all do |project|
1274
+ # puts project.name
1275
+ # project.datasets.all.each do |dataset|
1276
+ # puts dataset.name
1277
+ # end
1278
+ # end
1279
+ #
1280
+ def projects token: nil, max: nil
1281
+ ensure_service!
1282
+ gapi = service.list_projects token: token, max: max
1283
+ Project::List.from_gapi gapi, service, max
1284
+ end
1285
+
1286
+ ##
1287
+ # Creates a Bigquery::Time object to represent a time, independent of a
1288
+ # specific date.
1289
+ #
1290
+ # @param [Integer] hour Hour, valid values from 0 to 23.
1291
+ # @param [Integer] minute Minute, valid values from 0 to 59.
1292
+ # @param [Integer, Float] second Second, valid values from 0 to 59. Can
1293
+ # contain microsecond precision.
1294
+ #
1295
+ # @return [Bigquery::Time]
1296
+ #
1297
+ # @example
1298
+ # require "google/cloud/bigquery"
1299
+ #
1300
+ # bigquery = Google::Cloud::Bigquery.new
1301
+ #
1302
+ # fourpm = bigquery.time 16, 0, 0
1303
+ # data = bigquery.query "SELECT name " \
1304
+ # "FROM `my_dataset.my_table`" \
1305
+ # "WHERE time_of_date = @time",
1306
+ # params: { time: fourpm }
1307
+ #
1308
+ # # Iterate over the first page of results
1309
+ # data.each do |row|
1310
+ # puts row[:name]
1311
+ # end
1312
+ # # Retrieve the next page of results
1313
+ # data = data.next if data.next?
1314
+ #
1315
+ # @example Create Time with fractional seconds:
1316
+ # require "google/cloud/bigquery"
1317
+ #
1318
+ # bigquery = Google::Cloud::Bigquery.new
1319
+ #
1320
+ # precise_time = bigquery.time 16, 35, 15.376541
1321
+ # data = bigquery.query "SELECT name " \
1322
+ # "FROM `my_dataset.my_table`" \
1323
+ # "WHERE time_of_date >= @time",
1324
+ # params: { time: precise_time }
1325
+ #
1326
+ # # Iterate over the first page of results
1327
+ # data.each do |row|
1328
+ # puts row[:name]
1329
+ # end
1330
+ # # Retrieve the next page of results
1331
+ # data = data.next if data.next?
1332
+ #
1333
+ def time hour, minute, second
1334
+ Bigquery::Time.new "#{hour}:#{minute}:#{second}"
1335
+ end
1336
+
1337
+ ##
1338
+ # Creates a new schema instance. An optional block may be given to
1339
+ # configure the schema, otherwise the schema is returned empty and may
1340
+ # be configured directly.
1341
+ #
1342
+ # The returned schema can be passed to {Dataset#load} using the
1343
+ # `schema` option. However, for most use cases, the block yielded by
1344
+ # {Dataset#load} is a more convenient way to configure the schema
1345
+ # for the destination table.
1346
+ #
1347
+ # @yield [schema] a block for setting the schema
1348
+ # @yieldparam [Schema] schema the object accepting the schema
1349
+ #
1350
+ # @return [Google::Cloud::Bigquery::Schema]
1351
+ #
1352
+ # @example
1353
+ # require "google/cloud/bigquery"
1354
+ #
1355
+ # bigquery = Google::Cloud::Bigquery.new
1356
+ #
1357
+ # schema = bigquery.schema do |s|
1358
+ # s.string "first_name", mode: :required
1359
+ # s.record "cities_lived", mode: :repeated do |nested_schema|
1360
+ # nested_schema.string "place", mode: :required
1361
+ # nested_schema.integer "number_of_years", mode: :required
1362
+ # end
1363
+ # end
1364
+ #
1365
+ # dataset = bigquery.dataset "my_dataset"
1366
+ #
1367
+ # gs_url = "gs://my-bucket/file-name.csv"
1368
+ # load_job = dataset.load_job "my_new_table", gs_url, schema: schema
1369
+ #
1370
+ def schema
1371
+ s = Schema.from_gapi
1372
+ yield s if block_given?
1373
+ s
1374
+ end
1375
+
1376
+ ##
1377
+ # Creates a new Bigquery::EncryptionConfiguration instance.
1378
+ #
1379
+ # This method does not execute an API call. Use the encryption
1380
+ # configuration to encrypt a table when creating one via
1381
+ # Bigquery::Dataset#create_table, Bigquery::Dataset#load,
1382
+ # Bigquery::Table#copy, or Bigquery::Project#query.
1383
+ #
1384
+ # @param [String] kms_key Name of the Cloud KMS encryption key that
1385
+ # will be used to protect the destination BigQuery table. The BigQuery
1386
+ # Service Account associated with your project requires access to this
1387
+ # encryption key.
1388
+ #
1389
+ # @example Encrypt a new table
1390
+ # require "google/cloud/bigquery"
1391
+ #
1392
+ # bigquery = Google::Cloud::Bigquery.new
1393
+ # dataset = bigquery.dataset "my_dataset"
1394
+ #
1395
+ # key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
1396
+ # encrypt_config = bigquery.encryption kms_key: key_name
1397
+ #
1398
+ # table = dataset.create_table "my_table" do |updater|
1399
+ # updater.encryption = encrypt_config
1400
+ # end
1401
+ #
1402
+ # @example Encrypt a load destination table
1403
+ # require "google/cloud/bigquery"
1404
+ #
1405
+ # bigquery = Google::Cloud::Bigquery.new
1406
+ # dataset = bigquery.dataset "my_dataset"
1407
+ #
1408
+ # key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
1409
+ # encrypt_config = bigquery.encryption kms_key: key_name
1410
+ # job = dataset.load_job "my_table", "gs://abc/file" do |job|
1411
+ # job.encryption = encrypt_config
1412
+ # end
1413
+ #
1414
+ # @example Encrypt a copy destination table
1415
+ # require "google/cloud/bigquery"
1416
+ #
1417
+ # bigquery = Google::Cloud::Bigquery.new
1418
+ # dataset = bigquery.dataset "my_dataset"
1419
+ # table = dataset.table "my_table"
1420
+ #
1421
+ # key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
1422
+ # encrypt_config = bigquery.encryption kms_key: key_name
1423
+ # job = table.copy_job "my_dataset.new_table" do |job|
1424
+ # job.encryption = encrypt_config
1425
+ # end
1426
+ #
1427
+ # @example Encrypt a query destination table
1428
+ # require "google/cloud/bigquery"
1429
+ #
1430
+ # bigquery = Google::Cloud::Bigquery.new
1431
+ # dataset = bigquery.dataset "my_dataset"
1432
+ #
1433
+ # key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
1434
+ # encrypt_config = bigquery.encryption kms_key: key_name
1435
+ # job = bigquery.query_job "SELECT 1;" do |query|
1436
+ # query.table = dataset.table "my_table", skip_lookup: true
1437
+ # query.encryption = encrypt_config
1438
+ # end
1439
+ #
1440
+ # @return [Google::Cloud::Bigquery::EncryptionConfiguration]
1441
+ def encryption kms_key: nil
1442
+ encrypt_config = Bigquery::EncryptionConfiguration.new
1443
+ encrypt_config.kms_key = kms_key unless kms_key.nil?
1444
+ encrypt_config
1445
+ end
1446
+
1447
+ ##
1448
+ # Extracts the data from the provided table to a Google Cloud Storage
1449
+ # file using an asynchronous method. In this method, an {ExtractJob} is
1450
+ # immediately returned. The caller may poll the service by repeatedly
1451
+ # calling {Job#reload!} and {Job#done?} to detect when the job is done,
1452
+ # or simply block until the job is done by calling
1453
+ # #{Job#wait_until_done!}. See {#extract} for the synchronous version.
1454
+ # Use this method instead of {Table#extract_job} to extract data from
1455
+ # source tables in other projects.
1456
+ #
1457
+ # The geographic location for the job ("US", "EU", etc.) can be set via
1458
+ # {ExtractJob::Updater#location=} in a block passed to this method.
1459
+ #
1460
+ # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
1461
+ # Exporting Data From BigQuery
1462
+ #
1463
+ # @param [String, Table] table The source table from which to extract
1464
+ # data. This can be a table object; or a string ID as specified by the
1465
+ # [Standard SQL Query
1466
+ # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
1467
+ # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
1468
+ # Reference](https://cloud.google.com/bigquery/query-reference#from)
1469
+ # (`project-name:dataset_id.table_id`).
1470
+ # @param [Google::Cloud::Storage::File, String, Array<String>]
1471
+ # extract_url The Google Storage file or file URI pattern(s) to which
1472
+ # BigQuery should extract the table data.
1473
+ # @param [String] format The exported file format. The default value is
1474
+ # `csv`.
1475
+ #
1476
+ # The following values are supported:
1477
+ #
1478
+ # * `csv` - CSV
1479
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1480
+ # * `avro` - [Avro](http://avro.apache.org/)
1481
+ # @param [String] compression The compression type to use for exported
1482
+ # files. Possible values include `GZIP` and `NONE`. The default value
1483
+ # is `NONE`.
1484
+ # @param [String] delimiter Delimiter to use between fields in the
1485
+ # exported data. Default is <code>,</code>.
1486
+ # @param [Boolean] header Whether to print out a header row in the
1487
+ # results. Default is `true`.
1488
+ # @param [String] job_id A user-defined ID for the extract job. The ID
1489
+ # must contain only letters (a-z, A-Z), numbers (0-9), underscores
1490
+ # (_), or dashes (-). The maximum length is 1,024 characters. If
1491
+ # `job_id` is provided, then `prefix` will not be used.
1492
+ #
1493
+ # See [Generating a job
1494
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
1495
+ # @param [String] prefix A string, usually human-readable, that will be
1496
+ # prepended to a generated value to produce a unique job ID. For
1497
+ # example, the prefix `daily_import_job_` can be given to generate a
1498
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
1499
+ # prefix must contain only letters (a-z, A-Z), numbers (0-9),
1500
+ # underscores (_), or dashes (-). The maximum length of the entire ID
1501
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
1502
+ # be used.
1503
+ # @param [Hash] labels A hash of user-provided labels associated with
1504
+ # the job. You can use these to organize and group your jobs. Label
1505
+ # keys and values can be no longer than 63 characters, can only
1506
+ # contain lowercase letters, numeric characters, underscores and
1507
+ # dashes. International characters are allowed. Label values are
1508
+ # optional. Label keys must start with a letter and each label in the
1509
+ # list must have a different key. See [Requirements for
1510
+ # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
1511
+ # @yield [job] a job configuration object
1512
+ # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
1513
+ # configuration object for setting additional options.
1514
+ #
1515
+ # @return [Google::Cloud::Bigquery::ExtractJob]
1516
+ #
1517
+ # @example
1518
+ # require "google/cloud/bigquery"
1519
+ #
1520
+ # bigquery = Google::Cloud::Bigquery.new
1521
+ #
1522
+ # table_id = "bigquery-public-data.samples.shakespeare"
1523
+ # extract_job = bigquery.extract_job table_id,
1524
+ # "gs://my-bucket/shakespeare.csv"
1525
+ # extract_job.wait_until_done!
1526
+ # extract_job.done? #=> true
1527
+ #
1528
+ # @!group Data
1529
+ #
1530
+ def extract_job table, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, job_id: nil,
1531
+ prefix: nil, labels: nil
1532
+ ensure_service!
1533
+ options = { format: format, compression: compression, delimiter: delimiter, header: header, job_id: job_id,
1534
+ prefix: prefix, labels: labels }
1535
+
1536
+ table_ref = Service.get_table_ref table, default_ref: project_ref
1537
+ updater = ExtractJob::Updater.from_options service, table_ref, extract_url, options
1538
+
1539
+ yield updater if block_given?
1540
+
1541
+ job_gapi = updater.to_gapi
1542
+ gapi = service.extract_table job_gapi
1543
+ Job.from_gapi gapi, service
1544
+ end
1545
+
1546
+ ##
1547
+ # Extracts the data from the provided table to a Google Cloud Storage
1548
+ # file using a synchronous method that blocks for a response. Timeouts
1549
+ # and transient errors are generally handled as needed to complete the
1550
+ # job. See {#extract_job} for the asynchronous version. Use this method
1551
+ # instead of {Table#extract} to extract data from source tables in other
1552
+ # projects.
1553
+ #
1554
+ # The geographic location for the job ("US", "EU", etc.) can be set via
1555
+ # {ExtractJob::Updater#location=} in a block passed to this method.
1556
+ #
1557
+ # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
1558
+ # Exporting Data From BigQuery
1559
+ #
1560
+ # @param [String, Table] table The source table from which to extract
1561
+ # data. This can be a table object; or a string ID as specified by the
1562
+ # [Standard SQL Query
1563
+ # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
1564
+ # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
1565
+ # Reference](https://cloud.google.com/bigquery/query-reference#from)
1566
+ # (`project-name:dataset_id.table_id`).
1567
+ # @param [Google::Cloud::Storage::File, String, Array<String>]
1568
+ # extract_url The Google Storage file or file URI pattern(s) to which
1569
+ # BigQuery should extract the table data.
1570
+ # @param [String] format The exported file format. The default value is
1571
+ # `csv`.
1572
+ #
1573
+ # The following values are supported:
1574
+ #
1575
+ # * `csv` - CSV
1576
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1577
+ # * `avro` - [Avro](http://avro.apache.org/)
1578
+ # @param [String] compression The compression type to use for exported
1579
+ # files. Possible values include `GZIP` and `NONE`. The default value
1580
+ # is `NONE`.
1581
+ # @param [String] delimiter Delimiter to use between fields in the
1582
+ # exported data. Default is <code>,</code>.
1583
+ # @param [Boolean] header Whether to print out a header row in the
1584
+ # results. Default is `true`.
1585
+ # @yield [job] a job configuration object
1586
+ # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
1587
+ # configuration object for setting additional options.
1588
+ #
1589
+ # @return [Boolean] Returns `true` if the extract operation succeeded.
1590
+ #
1591
+ # @example
1592
+ # require "google/cloud/bigquery"
1593
+ #
1594
+ # bigquery = Google::Cloud::Bigquery.new
1595
+ #
1596
+ # bigquery.extract "bigquery-public-data.samples.shakespeare",
1597
+ # "gs://my-bucket/shakespeare.csv"
1598
+ #
1599
+ # @!group Data
1600
+ #
1601
+ def extract table, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, &block
1602
+ job = extract_job table, extract_url,
1603
+ format: format,
1604
+ compression: compression,
1605
+ delimiter: delimiter,
1606
+ header: header,
1607
+ &block
1608
+ job.wait_until_done!
1609
+ ensure_job_succeeded! job
1610
+ true
1611
+ end
1612
+
1613
+ ##
1614
+ # @private New Project from a Google API Client object, using the
1615
+ # same Credentials as this project.
1616
+ def self.from_gapi gapi, service
1617
+ project_service = Service.new gapi.project_reference.project_id,
1618
+ service.credentials,
1619
+ retries: service.retries,
1620
+ timeout: service.timeout
1621
+ new(project_service).tap do |p|
1622
+ p.instance_variable_set :@name, gapi.friendly_name
1623
+
1624
+ # TODO: remove `Integer` and set normally after migrating to Gax or
1625
+ # to google-api-client 0.10 (See google/google-api-ruby-client#439)
1626
+ p.instance_variable_set :@numeric_id, Integer(gapi.numeric_id) if gapi.numeric_id
1627
+ end
1628
+ end
1629
+
1630
+ protected
1631
+
1632
+ ##
1633
+ # Raise an error unless an active service is available.
1634
+ def ensure_service!
1635
+ raise "Must have active connection" unless service
1636
+ end
1637
+
1638
+ def ensure_job_succeeded! job
1639
+ return unless job.failed?
1640
+ begin
1641
+ # raise to activate ruby exception cause handling
1642
+ raise job.gapi_error
1643
+ rescue StandardError => e
1644
+ # wrap Google::Apis::Error with Google::Cloud::Error
1645
+ raise Google::Cloud::Error.from_error(e)
1646
+ end
1647
+ end
1648
+
1649
+ def project_ref
1650
+ Google::Apis::BigqueryV2::ProjectReference.new project_id: project_id
1651
+ end
1652
+ end
1653
+ end
1654
+ end
1655
+ end