google-cloud-bigquery 1.21.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +16 -0
  3. data/AUTHENTICATION.md +158 -0
  4. data/CHANGELOG.md +397 -0
  5. data/CODE_OF_CONDUCT.md +40 -0
  6. data/CONTRIBUTING.md +188 -0
  7. data/LICENSE +201 -0
  8. data/LOGGING.md +27 -0
  9. data/OVERVIEW.md +463 -0
  10. data/TROUBLESHOOTING.md +31 -0
  11. data/lib/google-cloud-bigquery.rb +139 -0
  12. data/lib/google/cloud/bigquery.rb +145 -0
  13. data/lib/google/cloud/bigquery/argument.rb +197 -0
  14. data/lib/google/cloud/bigquery/convert.rb +383 -0
  15. data/lib/google/cloud/bigquery/copy_job.rb +316 -0
  16. data/lib/google/cloud/bigquery/credentials.rb +50 -0
  17. data/lib/google/cloud/bigquery/data.rb +526 -0
  18. data/lib/google/cloud/bigquery/dataset.rb +2845 -0
  19. data/lib/google/cloud/bigquery/dataset/access.rb +1021 -0
  20. data/lib/google/cloud/bigquery/dataset/list.rb +162 -0
  21. data/lib/google/cloud/bigquery/encryption_configuration.rb +123 -0
  22. data/lib/google/cloud/bigquery/external.rb +2432 -0
  23. data/lib/google/cloud/bigquery/extract_job.rb +368 -0
  24. data/lib/google/cloud/bigquery/insert_response.rb +180 -0
  25. data/lib/google/cloud/bigquery/job.rb +657 -0
  26. data/lib/google/cloud/bigquery/job/list.rb +162 -0
  27. data/lib/google/cloud/bigquery/load_job.rb +1704 -0
  28. data/lib/google/cloud/bigquery/model.rb +740 -0
  29. data/lib/google/cloud/bigquery/model/list.rb +164 -0
  30. data/lib/google/cloud/bigquery/project.rb +1655 -0
  31. data/lib/google/cloud/bigquery/project/list.rb +161 -0
  32. data/lib/google/cloud/bigquery/query_job.rb +1695 -0
  33. data/lib/google/cloud/bigquery/routine.rb +1108 -0
  34. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  35. data/lib/google/cloud/bigquery/schema.rb +564 -0
  36. data/lib/google/cloud/bigquery/schema/field.rb +668 -0
  37. data/lib/google/cloud/bigquery/service.rb +589 -0
  38. data/lib/google/cloud/bigquery/standard_sql.rb +495 -0
  39. data/lib/google/cloud/bigquery/table.rb +3340 -0
  40. data/lib/google/cloud/bigquery/table/async_inserter.rb +520 -0
  41. data/lib/google/cloud/bigquery/table/list.rb +172 -0
  42. data/lib/google/cloud/bigquery/time.rb +65 -0
  43. data/lib/google/cloud/bigquery/version.rb +22 -0
  44. metadata +297 -0
@@ -0,0 +1,164 @@
1
+ # Copyright 2019 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "delegate"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ class Model
22
+ ##
23
+ # Model::List is a special case Array with additional values.
24
+ class List < DelegateClass(::Array)
25
+ ##
26
+ # If not empty, indicates that there are more records that match
27
+ # the request and this value should be passed to continue.
28
+ attr_accessor :token
29
+
30
+ ##
31
+ # @private Create a new Model::List with an array of models.
32
+ def initialize arr = []
33
+ super arr
34
+ end
35
+
36
+ ##
37
+ # Whether there is a next page of models.
38
+ #
39
+ # @return [Boolean]
40
+ #
41
+ # @example
42
+ # require "google/cloud/bigquery"
43
+ #
44
+ # bigquery = Google::Cloud::Bigquery.new
45
+ # dataset = bigquery.dataset "my_dataset"
46
+ #
47
+ # models = dataset.models
48
+ # if models.next?
49
+ # next_models = models.next
50
+ # end
51
+ #
52
+ def next?
53
+ !token.nil?
54
+ end
55
+
56
+ ##
57
+ # Retrieve the next page of models.
58
+ #
59
+ # @return [Model::List]
60
+ #
61
+ # @example
62
+ # require "google/cloud/bigquery"
63
+ #
64
+ # bigquery = Google::Cloud::Bigquery.new
65
+ # dataset = bigquery.dataset "my_dataset"
66
+ #
67
+ # models = dataset.models
68
+ # if models.next?
69
+ # next_models = models.next
70
+ # end
71
+ #
72
+ def next
73
+ return nil unless next?
74
+ ensure_service!
75
+ gapi = @service.list_models @dataset_id, token: token, max: @max
76
+ self.class.from_gapi gapi, @service, @dataset_id, @max
77
+ end
78
+
79
+ ##
80
+ # Retrieves remaining results by repeatedly invoking {#next} until
81
+ # {#next?} returns `false`. Calls the given block once for each
82
+ # result, which is passed as the argument to the block.
83
+ #
84
+ # An Enumerator is returned if no block is given.
85
+ #
86
+ # This method will make repeated API calls until all remaining results
87
+ # are retrieved. (Unlike `#each`, for example, which merely iterates
88
+ # over the results returned by a single API call.) Use with caution.
89
+ #
90
+ # @param [Integer] request_limit The upper limit of API requests to
91
+ # make to load all models. Default is no limit.
92
+ # @yield [model] The block for accessing each model.
93
+ # @yieldparam [Model] model The model object.
94
+ #
95
+ # @return [Enumerator]
96
+ #
97
+ # @example Iterating each result by passing a block:
98
+ # require "google/cloud/bigquery"
99
+ #
100
+ # bigquery = Google::Cloud::Bigquery.new
101
+ # dataset = bigquery.dataset "my_dataset"
102
+ #
103
+ # dataset.models.all do |model|
104
+ # puts model.model_id
105
+ # end
106
+ #
107
+ # @example Using the enumerator by not passing a block:
108
+ # require "google/cloud/bigquery"
109
+ #
110
+ # bigquery = Google::Cloud::Bigquery.new
111
+ # dataset = bigquery.dataset "my_dataset"
112
+ #
113
+ # all_names = dataset.models.all.map do |model|
114
+ # model.model_id
115
+ # end
116
+ #
117
+ # @example Limit the number of API requests made:
118
+ # require "google/cloud/bigquery"
119
+ #
120
+ # bigquery = Google::Cloud::Bigquery.new
121
+ # dataset = bigquery.dataset "my_dataset"
122
+ #
123
+ # dataset.models.all(request_limit: 10) do |model|
124
+ # puts model.model_id
125
+ # end
126
+ #
127
+ def all request_limit: nil
128
+ request_limit = request_limit.to_i if request_limit
129
+ return enum_for :all, request_limit: request_limit unless block_given?
130
+ results = self
131
+ loop do
132
+ results.each { |r| yield r }
133
+ if request_limit
134
+ request_limit -= 1
135
+ break if request_limit.negative?
136
+ end
137
+ break unless results.next?
138
+ results = results.next
139
+ end
140
+ end
141
+
142
+ ##
143
+ # @private New Model::List from a response object.
144
+ def self.from_gapi gapi_list, service, dataset_id = nil, max = nil
145
+ models = List.new(Array(gapi_list[:models]).map { |gapi_json| Model.from_gapi_json gapi_json, service })
146
+ models.instance_variable_set :@token, gapi_list[:nextPageToken]
147
+ models.instance_variable_set :@service, service
148
+ models.instance_variable_set :@dataset_id, dataset_id
149
+ models.instance_variable_set :@max, max
150
+ models
151
+ end
152
+
153
+ protected
154
+
155
+ ##
156
+ # Raise an error unless an active service is available.
157
+ def ensure_service!
158
+ raise "Must have active connection" unless @service
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,1655 @@
1
+ # Copyright 2015 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/errors"
17
+ require "google/cloud/bigquery/service"
18
+ require "google/cloud/bigquery/credentials"
19
+ require "google/cloud/bigquery/dataset"
20
+ require "google/cloud/bigquery/job"
21
+ require "google/cloud/bigquery/external"
22
+ require "google/cloud/bigquery/project/list"
23
+ require "google/cloud/bigquery/time"
24
+ require "google/cloud/bigquery/schema"
25
+
26
+ module Google
27
+ module Cloud
28
+ module Bigquery
29
+ ##
30
+ # # Project
31
+ #
32
+ # Projects are top-level containers in Google Cloud Platform. They store
33
+ # information about billing and authorized users, and they contain
34
+ # BigQuery data. Each project has a friendly name and a unique ID.
35
+ #
36
+ # Google::Cloud::Bigquery::Project is the main object for interacting with
37
+ # Google BigQuery. {Google::Cloud::Bigquery::Dataset} objects are created,
38
+ # accessed, and deleted by Google::Cloud::Bigquery::Project.
39
+ #
40
+ # See {Google::Cloud#bigquery}.
41
+ #
42
+ # @attr_reader [String, nil] name The descriptive name of the project.
43
+ # Can only be present if the project was retrieved with {#projects}.
44
+ # @attr_reader [Integer, nil] numeric_id The numeric ID of the project.
45
+ # Can only be present if the project was retrieved with {#projects}.
46
+ #
47
+ # @example
48
+ # require "google/cloud/bigquery"
49
+ #
50
+ # bigquery = Google::Cloud::Bigquery.new
51
+ # dataset = bigquery.dataset "my_dataset"
52
+ # table = dataset.table "my_table"
53
+ #
54
+ class Project
55
+ ##
56
+ # @private The Service object.
57
+ attr_accessor :service
58
+
59
+ attr_reader :name, :numeric_id
60
+
61
+ ##
62
+ # Creates a new Service instance.
63
+ #
64
+ # See {Google::Cloud.bigquery}
65
+ def initialize service
66
+ @service = service
67
+ end
68
+
69
+ ##
70
+ # The BigQuery project connected to.
71
+ #
72
+ # @example
73
+ # require "google/cloud/bigquery"
74
+ #
75
+ # bigquery = Google::Cloud::Bigquery.new(
76
+ # project_id: "my-project",
77
+ # credentials: "/path/to/keyfile.json"
78
+ # )
79
+ #
80
+ # bigquery.project_id #=> "my-project"
81
+ #
82
+ def project_id
83
+ service.project
84
+ end
85
+ alias project project_id
86
+
87
+ ##
88
+ # The email address of the service account for the project used to
89
+ # connect to BigQuery. (See also {#project_id}.)
90
+ #
91
+ # @return [String] The service account email address.
92
+ #
93
+ def service_account_email
94
+ @service_account_email ||= service.project_service_account.email
95
+ end
96
+
97
+ ##
98
+ # Copies the data from the source table to the destination table using
99
+ # an asynchronous method. In this method, a {CopyJob} is immediately
100
+ # returned. The caller may poll the service by repeatedly calling
101
+ # {Job#reload!} and {Job#done?} to detect when the job is done, or
102
+ # simply block until the job is done by calling #{Job#wait_until_done!}.
103
+ # See {#copy} for the synchronous version. Use this method instead of
104
+ # {Table#copy_job} to copy from source tables in other projects.
105
+ #
106
+ # The geographic location for the job ("US", "EU", etc.) can be set via
107
+ # {CopyJob::Updater#location=} in a block passed to this method.
108
+ #
109
+ # @param [String, Table] source_table The source table for the
110
+ # copied data. This can be a table object; or a string ID as specified
111
+ # by the [Standard SQL Query
112
+ # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
113
+ # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
114
+ # Reference](https://cloud.google.com/bigquery/query-reference#from)
115
+ # (`project-name:dataset_id.table_id`).
116
+ # @param [String, Table] destination_table The destination table for the
117
+ # copied data. This can be a table object; or a string ID as specified
118
+ # by the [Standard SQL Query
119
+ # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
120
+ # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
121
+ # Reference](https://cloud.google.com/bigquery/query-reference#from)
122
+ # (`project-name:dataset_id.table_id`).
123
+ # @param [String] create Specifies whether the job is allowed to create
124
+ # new tables. The default value is `needed`.
125
+ #
126
+ # The following values are supported:
127
+ #
128
+ # * `needed` - Create the table if it does not exist.
129
+ # * `never` - The table must already exist. A 'notFound' error is
130
+ # raised if the table does not exist.
131
+ # @param [String] write Specifies how to handle data already present in
132
+ # the destination table. The default value is `empty`.
133
+ #
134
+ # The following values are supported:
135
+ #
136
+ # * `truncate` - BigQuery overwrites the table data.
137
+ # * `append` - BigQuery appends the data to the table.
138
+ # * `empty` - An error will be returned if the destination table
139
+ # already contains data.
140
+ # @param [String] job_id A user-defined ID for the copy job. The ID
141
+ # must contain only letters (a-z, A-Z), numbers (0-9), underscores
142
+ # (_), or dashes (-). The maximum length is 1,024 characters. If
143
+ # `job_id` is provided, then `prefix` will not be used.
144
+ #
145
+ # See [Generating a job
146
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
147
+ # @param [String] prefix A string, usually human-readable, that will be
148
+ # prepended to a generated value to produce a unique job ID. For
149
+ # example, the prefix `daily_import_job_` can be given to generate a
150
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
151
+ # prefix must contain only letters (a-z, A-Z), numbers (0-9),
152
+ # underscores (_), or dashes (-). The maximum length of the entire ID
153
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
154
+ # be used.
155
+ # @param [Hash] labels A hash of user-provided labels associated with
156
+ # the job. You can use these to organize and group your jobs. Label
157
+ # keys and values can be no longer than 63 characters, can only
158
+ # contain lowercase letters, numeric characters, underscores and
159
+ # dashes. International characters are allowed. Label values are
160
+ # optional. Label keys must start with a letter and each label in the
161
+ # list must have a different key. See [Requirements for
162
+ # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
163
+ # @yield [job] a job configuration object
164
+ # @yieldparam [Google::Cloud::Bigquery::CopyJob::Updater] job a job
165
+ # configuration object for setting additional options.
166
+ #
167
+ # @return [Google::Cloud::Bigquery::CopyJob]
168
+ #
169
+ # @example
170
+ # require "google/cloud/bigquery"
171
+ #
172
+ # bigquery = Google::Cloud::Bigquery.new
173
+ # dataset = bigquery.dataset "my_dataset"
174
+ # source_table_id = "bigquery-public-data.samples.shakespeare"
175
+ # destination_table = dataset.table "my_destination_table"
176
+ #
177
+ # copy_job = bigquery.copy_job source_table_id, destination_table
178
+ #
179
+ # copy_job.wait_until_done!
180
+ # copy_job.done? #=> true
181
+ #
182
+ # @!group Data
183
+ #
184
+ def copy_job source_table, destination_table, create: nil, write: nil, job_id: nil, prefix: nil, labels: nil
185
+ ensure_service!
186
+ options = { create: create, write: write, labels: labels, job_id: job_id, prefix: prefix }
187
+
188
+ updater = CopyJob::Updater.from_options(
189
+ service,
190
+ Service.get_table_ref(source_table, default_ref: project_ref),
191
+ Service.get_table_ref(destination_table, default_ref: project_ref),
192
+ options
193
+ )
194
+
195
+ yield updater if block_given?
196
+
197
+ job_gapi = updater.to_gapi
198
+ gapi = service.copy_table job_gapi
199
+ Job.from_gapi gapi, service
200
+ end
201
+
202
+ ##
203
+ # Copies the data from the source table to the destination table using a
204
+ # synchronous method that blocks for a response. Timeouts and transient
205
+ # errors are generally handled as needed to complete the job. See
206
+ # {#copy_job} for the asynchronous version. Use this method instead of
207
+ # {Table#copy} to copy from source tables in other projects.
208
+ #
209
+ # The geographic location for the job ("US", "EU", etc.) can be set via
210
+ # {CopyJob::Updater#location=} in a block passed to this method.
211
+ #
212
+ # @param [String, Table] source_table The source table for the
213
+ # copied data. This can be a table object; or a string ID as specified
214
+ # by the [Standard SQL Query
215
+ # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
216
+ # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
217
+ # Reference](https://cloud.google.com/bigquery/query-reference#from)
218
+ # (`project-name:dataset_id.table_id`).
219
+ # @param [String, Table] destination_table The destination table for the
220
+ # copied data. This can be a table object; or a string ID as specified
221
+ # by the [Standard SQL Query
222
+ # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
223
+ # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
224
+ # Reference](https://cloud.google.com/bigquery/query-reference#from)
225
+ # (`project-name:dataset_id.table_id`).
226
+ # @param [String] create Specifies whether the job is allowed to create
227
+ # new tables. The default value is `needed`.
228
+ #
229
+ # The following values are supported:
230
+ #
231
+ # * `needed` - Create the table if it does not exist.
232
+ # * `never` - The table must already exist. A 'notFound' error is
233
+ # raised if the table does not exist.
234
+ # @param [String] write Specifies how to handle data already present in
235
+ # the destination table. The default value is `empty`.
236
+ #
237
+ # The following values are supported:
238
+ #
239
+ # * `truncate` - BigQuery overwrites the table data.
240
+ # * `append` - BigQuery appends the data to the table.
241
+ # * `empty` - An error will be returned if the destination table
242
+ # already contains data.
243
+ # @yield [job] a job configuration object
244
+ # @yieldparam [Google::Cloud::Bigquery::CopyJob::Updater] job a job
245
+ # configuration object for setting additional options.
246
+ #
247
+ # @return [Boolean] Returns `true` if the copy operation succeeded.
248
+ #
249
+ # @example
250
+ # require "google/cloud/bigquery"
251
+ #
252
+ # bigquery = Google::Cloud::Bigquery.new
253
+ # dataset = bigquery.dataset "my_dataset"
254
+ # destination_table = dataset.table "my_destination_table"
255
+ #
256
+ # bigquery.copy "bigquery-public-data.samples.shakespeare",
257
+ # destination_table
258
+ #
259
+ # @!group Data
260
+ #
261
+ def copy source_table, destination_table, create: nil, write: nil, &block
262
+ job = copy_job source_table, destination_table, create: create, write: write, &block
263
+ job.wait_until_done!
264
+ ensure_job_succeeded! job
265
+ true
266
+ end
267
+
268
+ ##
269
+ # Queries data by creating a [query
270
+ # job](https://cloud.google.com/bigquery/docs/query-overview#query_jobs).
271
+ #
272
+ # The geographic location for the job ("US", "EU", etc.) can be set via
273
+ # {QueryJob::Updater#location=} in a block passed to this method.
274
+ #
275
+ # @param [String] query A query string, following the BigQuery [query
276
+ # syntax](https://cloud.google.com/bigquery/query-reference), of the
277
+ # query to execute. Example: "SELECT count(f1) FROM
278
+ # [myProjectId:myDatasetId.myTableId]".
279
+ # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
280
+ # either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
281
+ # query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
282
+ # use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
283
+ # true.
284
+ #
285
+ # Ruby types are mapped to BigQuery types as follows:
286
+ #
287
+ # | BigQuery | Ruby | Notes |
288
+ # |-------------|--------------------------------------|------------------------------------------------|
289
+ # | `BOOL` | `true`/`false` | |
290
+ # | `INT64` | `Integer` | |
291
+ # | `FLOAT64` | `Float` | |
292
+ # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
293
+ # | `STRING` | `String` | |
294
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
295
+ # | `DATE` | `Date` | |
296
+ # | `TIMESTAMP` | `Time` | |
297
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
298
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
299
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
300
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
301
+ #
302
+ # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
303
+ # of each BigQuery data type, including allowed values.
304
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always to
305
+ # infer the right SQL type from a value in `params`. In these cases, `types` must be used to specify the SQL
306
+ # type for these values.
307
+ #
308
+ # Must match the value type passed to `params`. This must be an `Array` when the query uses positional query
309
+ # parameters. This must be an `Hash` when the query uses named query parameters. The values should be BigQuery
310
+ # type codes from the following list:
311
+ #
312
+ # * `:BOOL`
313
+ # * `:INT64`
314
+ # * `:FLOAT64`
315
+ # * `:NUMERIC`
316
+ # * `:STRING`
317
+ # * `:DATETIME`
318
+ # * `:DATE`
319
+ # * `:TIMESTAMP`
320
+ # * `:TIME`
321
+ # * `:BYTES`
322
+ # * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
323
+ # are specified as `[:INT64]`.
324
+ # * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
325
+ # match the `params` hash, and the values are the types value that matches the data.
326
+ #
327
+ # Types are optional.
328
+ # @param [Hash<String|Symbol, External::DataSource>] external A Hash
329
+ # that represents the mapping of the external tables to the table
330
+ # names used in the SQL query. The hash keys are the table names, and
331
+ # the hash values are the external table objects. See {Project#query}.
332
+ # @param [String] priority Specifies a priority for the query. Possible
333
+ # values include `INTERACTIVE` and `BATCH`. The default value is
334
+ # `INTERACTIVE`.
335
+ # @param [Boolean] cache Whether to look for the result in the query
336
+ # cache. The query cache is a best-effort cache that will be flushed
337
+ # whenever tables in the query are modified. The default value is
338
+ # true. For more information, see [query
339
+ # caching](https://developers.google.com/bigquery/querying-data).
340
+ # @param [Table] table The destination table where the query results
341
+ # should be stored. If not present, a new table will be created to
342
+ # store the results.
343
+ # @param [String] create Specifies whether the job is allowed to create
344
+ # new tables. The default value is `needed`.
345
+ #
346
+ # The following values are supported:
347
+ #
348
+ # * `needed` - Create the table if it does not exist.
349
+ # * `never` - The table must already exist. A 'notFound' error is
350
+ # raised if the table does not exist.
351
+ # @param [String] write Specifies the action that occurs if the
352
+ # destination table already exists. The default value is `empty`.
353
+ #
354
+ # The following values are supported:
355
+ #
356
+ # * `truncate` - BigQuery overwrites the table data.
357
+ # * `append` - BigQuery appends the data to the table.
358
+ # * `empty` - A 'duplicate' error is returned in the job result if the
359
+ # table exists and contains data.
360
+ # @param [Boolean] dryrun If set to true, BigQuery doesn't run the job.
361
+ # Instead, if the query is valid, BigQuery returns statistics about
362
+ # the job such as how many bytes would be processed. If the query is
363
+ # invalid, an error returns. The default value is false.
364
+ # @param [Dataset, String] dataset The default dataset to use for
365
+ # unqualified table names in the query. Optional.
366
+ # @param [String] project Specifies the default projectId to assume for
367
+ # any unqualified table names in the query. Only used if `dataset`
368
+ # option is set.
369
+ # @param [Boolean] standard_sql Specifies whether to use BigQuery's
370
+ # [standard
371
+ # SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
372
+ # dialect for this query. If set to true, the query will use standard
373
+ # SQL rather than the [legacy
374
+ # SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
375
+ # dialect. Optional. The default value is true.
376
+ # @param [Boolean] legacy_sql Specifies whether to use BigQuery's
377
+ # [legacy
378
+ # SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
379
+ # dialect for this query. If set to false, the query will use
380
+ # BigQuery's [standard
381
+ # SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
382
+ # dialect. Optional. The default value is false.
383
+ # @param [Boolean] large_results This option is specific to Legacy SQL.
384
+ # If `true`, allows the query to produce arbitrarily large result
385
+ # tables at a slight cost in performance. Requires `table` parameter
386
+ # to be set.
387
+ # @param [Boolean] flatten This option is specific to Legacy SQL.
388
+ # Flattens all nested and repeated fields in the query results. The
389
+ # default value is `true`. `large_results` parameter must be `true` if
390
+ # this is set to `false`.
391
+ # @param [Integer] maximum_bytes_billed Limits the bytes billed for this
392
+ # job. Queries that will have bytes billed beyond this limit will fail
393
+ # (without incurring a charge). Optional. If unspecified, this will be
394
+ # set to your project default.
395
+ # @param [String] job_id A user-defined ID for the query job. The ID
396
+ # must contain only letters (a-z, A-Z), numbers (0-9), underscores
397
+ # (_), or dashes (-). The maximum length is 1,024 characters. If
398
+ # `job_id` is provided, then `prefix` will not be used.
399
+ #
400
+ # See [Generating a job
401
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
402
+ # @param [String] prefix A string, usually human-readable, that will be
403
+ # prepended to a generated value to produce a unique job ID. For
404
+ # example, the prefix `daily_import_job_` can be given to generate a
405
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
406
+ # prefix must contain only letters (a-z, A-Z), numbers (0-9),
407
+ # underscores (_), or dashes (-). The maximum length of the entire ID
408
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
409
+ # be used.
410
+ #
411
+ # See [Generating a job
412
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
413
+ # @param [Hash] labels A hash of user-provided labels associated with
414
+ # the job. You can use these to organize and group your jobs. Label
415
+ # keys and values can be no longer than 63 characters, can only
416
+ # contain lowercase letters, numeric characters, underscores and
417
+ # dashes. International characters are allowed. Label values are
418
+ # optional. Label keys must start with a letter and each label in the
419
+ # list must have a different key. See [Requirements for
420
+ # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
421
+ # @param [Array<String>, String] udfs User-defined function resources
422
+ # used in a legacy SQL query. May be either a code resource to load from
423
+ # a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
424
+ # that contains code for a user-defined function (UDF). Providing an
425
+ # inline code resource is equivalent to providing a URI for a file
426
+ # containing the same code.
427
+ #
428
+ # This parameter is used for defining User Defined Function (UDF)
429
+ # resources only when using legacy SQL. Users of standard SQL should
430
+ # leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
431
+ # Routines API to define UDF resources.
432
+ #
433
+ # For additional information on migrating, see: [Migrating to
434
+ # standard SQL - Differences in user-defined JavaScript
435
+ # functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
436
+ # @param [Integer] maximum_billing_tier Deprecated: Change the billing
437
+ # tier to allow high-compute queries.
438
+ # @yield [job] a job configuration object
439
+ # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
440
+ # configuration object for setting query options.
441
+ #
442
+ # @return [Google::Cloud::Bigquery::QueryJob]
443
+ #
444
+ # @example Query using standard SQL:
445
+ # require "google/cloud/bigquery"
446
+ #
447
+ # bigquery = Google::Cloud::Bigquery.new
448
+ #
449
+ # job = bigquery.query_job "SELECT name FROM " \
450
+ # "`my_project.my_dataset.my_table`"
451
+ #
452
+ # job.wait_until_done!
453
+ # if !job.failed?
454
+ # job.data.each do |row|
455
+ # puts row[:name]
456
+ # end
457
+ # end
458
+ #
459
+ # @example Query using legacy SQL:
460
+ # require "google/cloud/bigquery"
461
+ #
462
+ # bigquery = Google::Cloud::Bigquery.new
463
+ #
464
+ # job = bigquery.query_job "SELECT name FROM " \
465
+ # " [my_project:my_dataset.my_table]",
466
+ # legacy_sql: true
467
+ #
468
+ # job.wait_until_done!
469
+ # if !job.failed?
470
+ # job.data.each do |row|
471
+ # puts row[:name]
472
+ # end
473
+ # end
474
+ #
475
+ # @example Query using positional query parameters:
476
+ # require "google/cloud/bigquery"
477
+ #
478
+ # bigquery = Google::Cloud::Bigquery.new
479
+ #
480
+ # job = bigquery.query_job "SELECT name FROM " \
481
+ # "`my_dataset.my_table` " \
482
+ # "WHERE id = ?",
483
+ # params: [1]
484
+ #
485
+ # job.wait_until_done!
486
+ # if !job.failed?
487
+ # job.data.each do |row|
488
+ # puts row[:name]
489
+ # end
490
+ # end
491
+ #
492
+ # @example Query using named query parameters:
493
+ # require "google/cloud/bigquery"
494
+ #
495
+ # bigquery = Google::Cloud::Bigquery.new
496
+ #
497
+ # job = bigquery.query_job "SELECT name FROM " \
498
+ # "`my_dataset.my_table` " \
499
+ # "WHERE id = @id",
500
+ # params: { id: 1 }
501
+ #
502
+ # job.wait_until_done!
503
+ # if !job.failed?
504
+ # job.data.each do |row|
505
+ # puts row[:name]
506
+ # end
507
+ # end
508
+ #
509
+ # @example Query using named query parameters with types:
510
+ # require "google/cloud/bigquery"
511
+ #
512
+ # bigquery = Google::Cloud::Bigquery.new
513
+ #
514
+ # job = bigquery.query_job "SELECT name FROM " \
515
+ # "`my_dataset.my_table` " \
516
+ # "WHERE id IN UNNEST(@ids)",
517
+ # params: { ids: [] },
518
+ # types: { ids: [:INT64] }
519
+ #
520
+ # job.wait_until_done!
521
+ # if !job.failed?
522
+ # job.data.each do |row|
523
+ # puts row[:name]
524
+ # end
525
+ # end
526
+ #
527
+ # @example Execute a DDL statement:
528
+ # require "google/cloud/bigquery"
529
+ #
530
+ # bigquery = Google::Cloud::Bigquery.new
531
+ #
532
+ # job = bigquery.query_job "CREATE TABLE " \
533
+ # "`my_dataset.my_table` " \
534
+ # "(x INT64)"
535
+ #
536
+ # job.wait_until_done!
537
+ # if !job.failed?
538
+ # table_ref = job.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
539
+ # end
540
+ #
541
+ # @example Execute a DML statement:
542
+ # require "google/cloud/bigquery"
543
+ #
544
+ # bigquery = Google::Cloud::Bigquery.new
545
+ #
546
+ # job = bigquery.query_job "UPDATE " \
547
+ # "`my_dataset.my_table` " \
548
+ # "SET x = x + 1 " \
549
+ # "WHERE x IS NOT NULL"
550
+ #
551
+ # job.wait_until_done!
552
+ # if !job.failed?
553
+ # puts job.num_dml_affected_rows
554
+ # end
555
+ #
556
+ # @example Query using external data source, set destination:
557
+ # require "google/cloud/bigquery"
558
+ #
559
+ # bigquery = Google::Cloud::Bigquery.new
560
+ #
561
+ # csv_url = "gs://bucket/path/to/data.csv"
562
+ # csv_table = bigquery.external csv_url do |csv|
563
+ # csv.autodetect = true
564
+ # csv.skip_leading_rows = 1
565
+ # end
566
+ #
567
+ # job = bigquery.query_job "SELECT * FROM my_ext_table" do |query|
568
+ # query.external = { my_ext_table: csv_table }
569
+ # dataset = bigquery.dataset "my_dataset", skip_lookup: true
570
+ # query.table = dataset.table "my_table", skip_lookup: true
571
+ # end
572
+ #
573
+ # job.wait_until_done!
574
+ # if !job.failed?
575
+ # job.data.each do |row|
576
+ # puts row[:name]
577
+ # end
578
+ # end
579
+ #
580
+ def query_job query, params: nil, types: nil, external: nil, priority: "INTERACTIVE", cache: true, table: nil,
581
+ create: nil, write: nil, dryrun: nil, dataset: nil, project: nil, standard_sql: nil,
582
+ legacy_sql: nil, large_results: nil, flatten: nil, maximum_billing_tier: nil,
583
+ maximum_bytes_billed: nil, job_id: nil, prefix: nil, labels: nil, udfs: nil
584
+ ensure_service!
585
+ options = { params: params, types: types, external: external, priority: priority, cache: cache, table: table,
586
+ create: create, write: write, dryrun: dryrun, dataset: dataset,
587
+ project: (project || self.project), standard_sql: standard_sql, legacy_sql: legacy_sql,
588
+ large_results: large_results, flatten: flatten, maximum_billing_tier: maximum_billing_tier,
589
+ maximum_bytes_billed: maximum_bytes_billed, job_id: job_id, prefix: prefix, labels: labels,
590
+ udfs: udfs }
591
+
592
+ updater = QueryJob::Updater.from_options service, query, options
593
+
594
+ yield updater if block_given?
595
+
596
+ gapi = service.query_job updater.to_gapi
597
+ Job.from_gapi gapi, service
598
+ end
599
+
600
+ ##
601
+ # Queries data and waits for the results. In this method, a {QueryJob}
602
+ # is created and its results are saved to a temporary table, then read
603
+ # from the table. Timeouts and transient errors are generally handled
604
+ # as needed to complete the query. When used for executing DDL/DML
605
+ # statements, this method does not return row data.
606
+ #
607
+ # The geographic location for the job ("US", "EU", etc.) can be set via
608
+ # {QueryJob::Updater#location=} in a block passed to this method.
609
+ #
610
+ # @see https://cloud.google.com/bigquery/querying-data Querying Data
611
+ #
612
+ # @param [String] query A query string, following the BigQuery [query
613
+ # syntax](https://cloud.google.com/bigquery/query-reference), of the
614
+ # query to execute. Example: "SELECT count(f1) FROM
615
+ # [myProjectId:myDatasetId.myTableId]".
616
+ # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
617
+ # either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
618
+ # query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
619
+ # use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
620
+ # true.
621
+ #
622
+ # Ruby types are mapped to BigQuery types as follows:
623
+ #
624
+ # | BigQuery | Ruby | Notes |
625
+ # |-------------|--------------------------------------|------------------------------------------------|
626
+ # | `BOOL` | `true`/`false` | |
627
+ # | `INT64` | `Integer` | |
628
+ # | `FLOAT64` | `Float` | |
629
+ # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
630
+ # | `STRING` | `String` | |
631
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
632
+ # | `DATE` | `Date` | |
633
+ # | `TIMESTAMP` | `Time` | |
634
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
635
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
636
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
637
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
638
+ #
639
+ # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
640
+ # of each BigQuery data type, including allowed values.
641
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always to
642
+ # infer the right SQL type from a value in `params`. In these cases, `types` must be used to specify the SQL
643
+ # type for these values.
644
+ #
645
+ # Must match the value type passed to `params`. This must be an `Array` when the query uses positional query
646
+ # parameters. This must be an `Hash` when the query uses named query parameters. The values should be BigQuery
647
+ # type codes from the following list:
648
+ #
649
+ # * `:BOOL`
650
+ # * `:INT64`
651
+ # * `:FLOAT64`
652
+ # * `:NUMERIC`
653
+ # * `:STRING`
654
+ # * `:DATETIME`
655
+ # * `:DATE`
656
+ # * `:TIMESTAMP`
657
+ # * `:TIME`
658
+ # * `:BYTES`
659
+ # * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
660
+ # are specified as `[:INT64]`.
661
+ # * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
662
+ # match the `params` hash, and the values are the types value that matches the data.
663
+ #
664
+ # Types are optional.
665
+ # @param [Hash<String|Symbol, External::DataSource>] external A Hash
666
+ # that represents the mapping of the external tables to the table
667
+ # names used in the SQL query. The hash keys are the table names, and
668
+ # the hash values are the external table objects. See {Project#query}.
669
+ # @param [Integer] max The maximum number of rows of data to return per
670
+ # page of results. Setting this flag to a small value such as 1000 and
671
+ # then paging through results might improve reliability when the query
672
+ # result set is large. In addition to this limit, responses are also
673
+ # limited to 10 MB. By default, there is no maximum row count, and
674
+ # only the byte limit applies.
675
+ # @param [Boolean] cache Whether to look for the result in the query
676
+ # cache. The query cache is a best-effort cache that will be flushed
677
+ # whenever tables in the query are modified. The default value is
678
+ # true. For more information, see [query
679
+ # caching](https://developers.google.com/bigquery/querying-data).
680
+ # @param [String] dataset Specifies the default datasetId and projectId
681
+ # to assume for any unqualified table names in the query. If not set,
682
+ # all table names in the query string must be qualified in the format
683
+ # 'datasetId.tableId'.
684
+ # @param [String] project Specifies the default projectId to assume for
685
+ # any unqualified table names in the query. Only used if `dataset`
686
+ # option is set.
687
+ # @param [Boolean] standard_sql Specifies whether to use BigQuery's
688
+ # [standard
689
+ # SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
690
+ # dialect for this query. If set to true, the query will use standard
691
+ # SQL rather than the [legacy
692
+ # SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
693
+ # dialect. When set to true, the values of `large_results` and
694
+ # `flatten` are ignored; the query will be run as if `large_results`
695
+ # is true and `flatten` is false. Optional. The default value is
696
+ # true.
697
+ # @param [Boolean] legacy_sql Specifies whether to use BigQuery's
698
+ # [legacy
699
+ # SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
700
+ # dialect for this query. If set to false, the query will use
701
+ # BigQuery's [standard
702
+ # SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
703
+ # When set to false, the values of `large_results` and `flatten` are
704
+ # ignored; the query will be run as if `large_results` is true and
705
+ # `flatten` is false. Optional. The default value is false.
706
+ # @yield [job] a job configuration object
707
+ # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
708
+ # configuration object for setting additional options for the query.
709
+ #
710
+ # @return [Google::Cloud::Bigquery::Data]
711
+ #
712
+ # @example Query using standard SQL:
713
+ # require "google/cloud/bigquery"
714
+ #
715
+ # bigquery = Google::Cloud::Bigquery.new
716
+ #
717
+ # sql = "SELECT name FROM `my_project.my_dataset.my_table`"
718
+ # data = bigquery.query sql
719
+ #
720
+ # # Iterate over the first page of results
721
+ # data.each do |row|
722
+ # puts row[:name]
723
+ # end
724
+ # # Retrieve the next page of results
725
+ # data = data.next if data.next?
726
+ #
727
+ # @example Query using legacy SQL:
728
+ # require "google/cloud/bigquery"
729
+ #
730
+ # bigquery = Google::Cloud::Bigquery.new
731
+ #
732
+ # sql = "SELECT name FROM [my_project:my_dataset.my_table]"
733
+ # data = bigquery.query sql, legacy_sql: true
734
+ #
735
+ # # Iterate over the first page of results
736
+ # data.each do |row|
737
+ # puts row[:name]
738
+ # end
739
+ # # Retrieve the next page of results
740
+ # data = data.next if data.next?
741
+ #
742
+ # @example Retrieve all rows: (See {Data#all})
743
+ # require "google/cloud/bigquery"
744
+ #
745
+ # bigquery = Google::Cloud::Bigquery.new
746
+ #
747
+ # data = bigquery.query "SELECT name FROM `my_dataset.my_table`"
748
+ #
749
+ # data.all do |row|
750
+ # puts row[:name]
751
+ # end
752
+ #
753
+ # @example Query using positional query parameters:
754
+ # require "google/cloud/bigquery"
755
+ #
756
+ # bigquery = Google::Cloud::Bigquery.new
757
+ #
758
+ # data = bigquery.query "SELECT name " \
759
+ # "FROM `my_dataset.my_table` " \
760
+ # "WHERE id = ?",
761
+ # params: [1]
762
+ #
763
+ # # Iterate over the first page of results
764
+ # data.each do |row|
765
+ # puts row[:name]
766
+ # end
767
+ # # Retrieve the next page of results
768
+ # data = data.next if data.next?
769
+ #
770
+ # @example Query using named query parameters:
771
+ # require "google/cloud/bigquery"
772
+ #
773
+ # bigquery = Google::Cloud::Bigquery.new
774
+ #
775
+ # data = bigquery.query "SELECT name " \
776
+ # "FROM `my_dataset.my_table` " \
777
+ # "WHERE id = @id",
778
+ # params: { id: 1 }
779
+ #
780
+ # # Iterate over the first page of results
781
+ # data.each do |row|
782
+ # puts row[:name]
783
+ # end
784
+ # # Retrieve the next page of results
785
+ # data = data.next if data.next?
786
+ #
787
+ # @example Query using named query parameters with types:
788
+ # require "google/cloud/bigquery"
789
+ #
790
+ # bigquery = Google::Cloud::Bigquery.new
791
+ #
792
+ # data = bigquery.query "SELECT name FROM " \
793
+ # "`my_dataset.my_table` " \
794
+ # "WHERE id IN UNNEST(@ids)",
795
+ # params: { ids: [] },
796
+ # types: { ids: [:INT64] }
797
+ #
798
+ # # Iterate over the first page of results
799
+ # data.each do |row|
800
+ # puts row[:name]
801
+ # end
802
+ # # Retrieve the next page of results
803
+ # data = data.next if data.next?
804
+ #
805
+ # @example Execute a DDL statement:
806
+ # require "google/cloud/bigquery"
807
+ #
808
+ # bigquery = Google::Cloud::Bigquery.new
809
+ #
810
+ # data = bigquery.query "CREATE TABLE `my_dataset.my_table` (x INT64)"
811
+ #
812
+ # table_ref = data.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
813
+ #
814
+ # @example Execute a DML statement:
815
+ # require "google/cloud/bigquery"
816
+ #
817
+ # bigquery = Google::Cloud::Bigquery.new
818
+ #
819
+ # data = bigquery.query "UPDATE `my_dataset.my_table` " \
820
+ # "SET x = x + 1 " \
821
+ # "WHERE x IS NOT NULL"
822
+ #
823
+ # puts data.num_dml_affected_rows
824
+ #
825
+ # @example Query using external data source, set destination:
826
+ # require "google/cloud/bigquery"
827
+ #
828
+ # bigquery = Google::Cloud::Bigquery.new
829
+ #
830
+ # csv_url = "gs://bucket/path/to/data.csv"
831
+ # csv_table = bigquery.external csv_url do |csv|
832
+ # csv.autodetect = true
833
+ # csv.skip_leading_rows = 1
834
+ # end
835
+ #
836
+ # data = bigquery.query "SELECT * FROM my_ext_table" do |query|
837
+ # query.external = { my_ext_table: csv_table }
838
+ # dataset = bigquery.dataset "my_dataset", skip_lookup: true
839
+ # query.table = dataset.table "my_table", skip_lookup: true
840
+ # end
841
+ #
842
+ # # Iterate over the first page of results
843
+ # data.each do |row|
844
+ # puts row[:name]
845
+ # end
846
+ # # Retrieve the next page of results
847
+ # data = data.next if data.next?
848
+ #
849
+ def query query, params: nil, types: nil, external: nil, max: nil, cache: true, dataset: nil, project: nil,
850
+ standard_sql: nil, legacy_sql: nil, &block
851
+ job = query_job query, params: params, types: types, external: external, cache: cache, dataset: dataset,
852
+ project: project, standard_sql: standard_sql, legacy_sql: legacy_sql, &block
853
+ job.wait_until_done!
854
+
855
+ if job.failed?
856
+ begin
857
+ # raise to activate ruby exception cause handling
858
+ raise job.gapi_error
859
+ rescue StandardError => e
860
+ # wrap Google::Apis::Error with Google::Cloud::Error
861
+ raise Google::Cloud::Error.from_error(e)
862
+ end
863
+ end
864
+
865
+ job.data max: max
866
+ end
867
+
868
+ ##
869
+ # Creates a new External::DataSource (or subclass) object that
870
+ # represents the external data source that can be queried from directly,
871
+ # even though the data is not stored in BigQuery. Instead of loading or
872
+ # streaming the data, this object references the external data source.
873
+ #
874
+ # @see https://cloud.google.com/bigquery/external-data-sources Querying
875
+ # External Data Sources
876
+ #
877
+ # @param [String, Array<String>] url The fully-qualified URL(s) that
878
+ # point to your data in Google Cloud. An attempt will be made to
879
+ # derive the format from the URLs provided.
880
+ # @param [String|Symbol] format The data format. This value will be used
881
+ # even if the provided URLs are recognized as a different format.
882
+ # Optional.
883
+ #
884
+ # The following values are supported:
885
+ #
886
+ # * `csv` - CSV
887
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
888
+ # * `avro` - [Avro](http://avro.apache.org/)
889
+ # * `sheets` - Google Sheets
890
+ # * `datastore_backup` - Cloud Datastore backup
891
+ # * `bigtable` - Bigtable
892
+ #
893
+ # @return [External::DataSource] External data source.
894
+ #
895
+ # @example
896
+ # require "google/cloud/bigquery"
897
+ #
898
+ # bigquery = Google::Cloud::Bigquery.new
899
+ #
900
+ # csv_url = "gs://bucket/path/to/data.csv"
901
+ # csv_table = bigquery.external csv_url do |csv|
902
+ # csv.autodetect = true
903
+ # csv.skip_leading_rows = 1
904
+ # end
905
+ #
906
+ # data = bigquery.query "SELECT * FROM my_ext_table",
907
+ # external: { my_ext_table: csv_table }
908
+ #
909
+ # # Iterate over the first page of results
910
+ # data.each do |row|
911
+ # puts row[:name]
912
+ # end
913
+ # # Retrieve the next page of results
914
+ # data = data.next if data.next?
915
+ #
916
+ def external url, format: nil
917
+ ext = External.from_urls url, format
918
+ yield ext if block_given?
919
+ ext
920
+ end
921
+
922
+ ##
923
+ # Retrieves an existing dataset by ID.
924
+ #
925
+ # @param [String] dataset_id The ID of a dataset.
926
+ # @param [Boolean] skip_lookup Optionally create just a local reference
927
+ # object without verifying that the resource exists on the BigQuery
928
+ # service. Calls made on this object will raise errors if the resource
929
+ # does not exist. Default is `false`. Optional.
930
+ #
931
+ # @return [Google::Cloud::Bigquery::Dataset, nil] Returns `nil` if the
932
+ # dataset does not exist.
933
+ #
934
+ # @example
935
+ # require "google/cloud/bigquery"
936
+ #
937
+ # bigquery = Google::Cloud::Bigquery.new
938
+ #
939
+ # dataset = bigquery.dataset "my_dataset"
940
+ # puts dataset.name
941
+ #
942
+ # @example Avoid retrieving the dataset resource with `skip_lookup`:
943
+ # require "google/cloud/bigquery"
944
+ #
945
+ # bigquery = Google::Cloud::Bigquery.new
946
+ #
947
+ # dataset = bigquery.dataset "my_dataset", skip_lookup: true
948
+ #
949
+ def dataset dataset_id, skip_lookup: nil
950
+ ensure_service!
951
+ return Dataset.new_reference project, dataset_id, service if skip_lookup
952
+ gapi = service.get_dataset dataset_id
953
+ Dataset.from_gapi gapi, service
954
+ rescue Google::Cloud::NotFoundError
955
+ nil
956
+ end
957
+
958
+ ##
959
+ # Creates a new dataset.
960
+ #
961
+ # @param [String] dataset_id A unique ID for this dataset, without the
962
+ # project name. The ID must contain only letters (a-z, A-Z), numbers
963
+ # (0-9), or underscores (_). The maximum length is 1,024 characters.
964
+ # @param [String] name A descriptive name for the dataset.
965
+ # @param [String] description A user-friendly description of the
966
+ # dataset.
967
+ # @param [Integer] expiration The default lifetime of all tables in the
968
+ # dataset, in milliseconds. The minimum value is 3600000 milliseconds
969
+ # (one hour).
970
+ # @param [String] location The geographic location where the dataset
971
+ # should reside. Possible values include `EU` and `US`. The default
972
+ # value is `US`.
973
+ # @yield [access] a block for setting rules
974
+ # @yieldparam [Google::Cloud::Bigquery::Dataset] access the object
975
+ # accepting rules
976
+ #
977
+ # @return [Google::Cloud::Bigquery::Dataset]
978
+ #
979
+ # @example
980
+ # require "google/cloud/bigquery"
981
+ #
982
+ # bigquery = Google::Cloud::Bigquery.new
983
+ #
984
+ # dataset = bigquery.create_dataset "my_dataset"
985
+ #
986
+ # @example A name and description can be provided:
987
+ # require "google/cloud/bigquery"
988
+ #
989
+ # bigquery = Google::Cloud::Bigquery.new
990
+ #
991
+ # dataset = bigquery.create_dataset "my_dataset",
992
+ # name: "My Dataset",
993
+ # description: "This is my Dataset"
994
+ #
995
+ # @example Or, configure access with a block: (See {Dataset::Access})
996
+ # require "google/cloud/bigquery"
997
+ #
998
+ # bigquery = Google::Cloud::Bigquery.new
999
+ #
1000
+ # dataset = bigquery.create_dataset "my_dataset" do |dataset|
1001
+ # dataset.access.add_writer_user "writers@example.com"
1002
+ # end
1003
+ #
1004
+ def create_dataset dataset_id, name: nil, description: nil,
1005
+ expiration: nil, location: nil
1006
+ ensure_service!
1007
+
1008
+ new_ds = Google::Apis::BigqueryV2::Dataset.new(
1009
+ dataset_reference: Google::Apis::BigqueryV2::DatasetReference.new(
1010
+ project_id: project, dataset_id: dataset_id
1011
+ )
1012
+ )
1013
+
1014
+ # Can set location only on creation, no Dataset#location method
1015
+ new_ds.update! location: location unless location.nil?
1016
+
1017
+ updater = Dataset::Updater.new(new_ds).tap do |b|
1018
+ b.name = name unless name.nil?
1019
+ b.description = description unless description.nil?
1020
+ b.default_expiration = expiration unless expiration.nil?
1021
+ end
1022
+
1023
+ if block_given?
1024
+ yield updater
1025
+ updater.check_for_mutated_access!
1026
+ end
1027
+
1028
+ gapi = service.insert_dataset new_ds
1029
+ Dataset.from_gapi gapi, service
1030
+ end
1031
+
1032
+ ##
1033
+ # Retrieves the list of datasets belonging to the project.
1034
+ #
1035
+ # @param [Boolean] all Whether to list all datasets, including hidden
1036
+ # ones. The default is `false`.
1037
+ # @param [String] filter An expression for filtering the results of the
1038
+ # request by label. The syntax is `labels.<name>[:<value>]`.
1039
+ # Multiple filters can be `AND`ed together by connecting with a space.
1040
+ # Example: `labels.department:receiving labels.active`. See [Filtering
1041
+ # datasets using labels](https://cloud.google.com/bigquery/docs/labeling-datasets#filtering_datasets_using_labels).
1042
+ # @param [String] token A previously-returned page token representing
1043
+ # part of the larger set of results to view.
1044
+ # @param [Integer] max Maximum number of datasets to return.
1045
+ #
1046
+ # @return [Array<Google::Cloud::Bigquery::Dataset>] (See
1047
+ # {Google::Cloud::Bigquery::Dataset::List})
1048
+ #
1049
+ # @example
1050
+ # require "google/cloud/bigquery"
1051
+ #
1052
+ # bigquery = Google::Cloud::Bigquery.new
1053
+ #
1054
+ # datasets = bigquery.datasets
1055
+ # datasets.each do |dataset|
1056
+ # puts dataset.name
1057
+ # end
1058
+ #
1059
+ # @example Retrieve hidden datasets with the `all` optional arg:
1060
+ # require "google/cloud/bigquery"
1061
+ #
1062
+ # bigquery = Google::Cloud::Bigquery.new
1063
+ #
1064
+ # all_datasets = bigquery.datasets all: true
1065
+ #
1066
+ # @example Retrieve all datasets: (See {Dataset::List#all})
1067
+ # require "google/cloud/bigquery"
1068
+ #
1069
+ # bigquery = Google::Cloud::Bigquery.new
1070
+ #
1071
+ # datasets = bigquery.datasets
1072
+ # datasets.all do |dataset|
1073
+ # puts dataset.name
1074
+ # end
1075
+ #
1076
+ def datasets all: nil, filter: nil, token: nil, max: nil
1077
+ ensure_service!
1078
+ gapi = service.list_datasets all: all, filter: filter, token: token, max: max
1079
+ Dataset::List.from_gapi gapi, service, all, filter, max
1080
+ end
1081
+
1082
+ ##
1083
+ # Retrieves an existing job by ID.
1084
+ #
1085
+ # @param [String] job_id The ID of a job.
1086
+ # @param [String] location The geographic location where the job was
1087
+ # created. Required except for US and EU.
1088
+ #
1089
+ # @return [Google::Cloud::Bigquery::Job, nil] Returns `nil` if the job
1090
+ # does not exist.
1091
+ #
1092
+ # @example
1093
+ # require "google/cloud/bigquery"
1094
+ #
1095
+ # bigquery = Google::Cloud::Bigquery.new
1096
+ #
1097
+ # job = bigquery.job "my_job"
1098
+ #
1099
+ def job job_id, location: nil
1100
+ ensure_service!
1101
+ gapi = service.get_job job_id, location: location
1102
+ Job.from_gapi gapi, service
1103
+ rescue Google::Cloud::NotFoundError
1104
+ nil
1105
+ end
1106
+
1107
+ ##
1108
+ # Retrieves the list of jobs belonging to the project.
1109
+ #
1110
+ # @param [Boolean] all Whether to display jobs owned by all users in the
1111
+ # project. The default is `false`. Optional.
1112
+ # @param [String] token A previously-returned page token representing
1113
+ # part of the larger set of results to view. Optional.
1114
+ # @param [Integer] max Maximum number of jobs to return. Optional.
1115
+ # @param [String] filter A filter for job state. Optional.
1116
+ #
1117
+ # Acceptable values are:
1118
+ #
1119
+ # * `done` - Finished jobs
1120
+ # * `pending` - Pending jobs
1121
+ # * `running` - Running jobs
1122
+ # @param [Time] min_created_at Min value for {Job#created_at}. When
1123
+ # provided, only jobs created after or at this time are returned.
1124
+ # Optional.
1125
+ # @param [Time] max_created_at Max value for {Job#created_at}. When
1126
+ # provided, only jobs created before or at this time are returned.
1127
+ # Optional.
1128
+ # @param [Google::Cloud::Bigquery::Job, String] parent_job A job
1129
+ # object or a job ID. If set, retrieve only child jobs of the
1130
+ # specified parent. Optional. See {Job#job_id}, {Job#num_child_jobs},
1131
+ # and {Job#parent_job_id}.
1132
+ #
1133
+ # @return [Array<Google::Cloud::Bigquery::Job>] (See
1134
+ # {Google::Cloud::Bigquery::Job::List})
1135
+ #
1136
+ # @example
1137
+ # require "google/cloud/bigquery"
1138
+ #
1139
+ # bigquery = Google::Cloud::Bigquery.new
1140
+ #
1141
+ # jobs = bigquery.jobs
1142
+ # jobs.each do |job|
1143
+ # # process job
1144
+ # end
1145
+ #
1146
+ # @example Retrieve only running jobs using the `filter` optional arg:
1147
+ # require "google/cloud/bigquery"
1148
+ #
1149
+ # bigquery = Google::Cloud::Bigquery.new
1150
+ #
1151
+ # running_jobs = bigquery.jobs filter: "running"
1152
+ # running_jobs.each do |job|
1153
+ # # process job
1154
+ # end
1155
+ #
1156
+ # @example Retrieve only jobs created within provided times:
1157
+ # require "google/cloud/bigquery"
1158
+ #
1159
+ # bigquery = Google::Cloud::Bigquery.new
1160
+ #
1161
+ # two_days_ago = Time.now - 60*60*24*2
1162
+ # three_days_ago = Time.now - 60*60*24*3
1163
+ #
1164
+ # jobs = bigquery.jobs min_created_at: three_days_ago,
1165
+ # max_created_at: two_days_ago
1166
+ # jobs.each do |job|
1167
+ # # process job
1168
+ # end
1169
+ #
1170
+ # @example Retrieve all jobs: (See {Job::List#all})
1171
+ # require "google/cloud/bigquery"
1172
+ #
1173
+ # bigquery = Google::Cloud::Bigquery.new
1174
+ #
1175
+ # jobs = bigquery.jobs
1176
+ # jobs.all do |job|
1177
+ # # process job
1178
+ # end
1179
+ #
1180
+ # @example Retrieve child jobs by setting `parent_job`:
1181
+ # require "google/cloud/bigquery"
1182
+ #
1183
+ # bigquery = Google::Cloud::Bigquery.new
1184
+ #
1185
+ # multi_statement_sql = <<~SQL
1186
+ # -- Declare a variable to hold names as an array.
1187
+ # DECLARE top_names ARRAY<STRING>;
1188
+ # -- Build an array of the top 100 names from the year 2017.
1189
+ # SET top_names = (
1190
+ # SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
1191
+ # FROM `bigquery-public-data.usa_names.usa_1910_current`
1192
+ # WHERE year = 2017
1193
+ # );
1194
+ # -- Which names appear as words in Shakespeare's plays?
1195
+ # SELECT
1196
+ # name AS shakespeare_name
1197
+ # FROM UNNEST(top_names) AS name
1198
+ # WHERE name IN (
1199
+ # SELECT word
1200
+ # FROM `bigquery-public-data.samples.shakespeare`
1201
+ # );
1202
+ # SQL
1203
+ #
1204
+ # job = bigquery.query_job multi_statement_sql
1205
+ #
1206
+ # job.wait_until_done!
1207
+ #
1208
+ # child_jobs = bigquery.jobs parent_job: job
1209
+ #
1210
+ # child_jobs.each do |child_job|
1211
+ # script_statistics = child_job.script_statistics
1212
+ # puts script_statistics.evaluation_kind
1213
+ # script_statistics.stack_frames.each do |stack_frame|
1214
+ # puts stack_frame.text
1215
+ # end
1216
+ # end
1217
+ #
1218
+ def jobs all: nil,
1219
+ token: nil,
1220
+ max: nil,
1221
+ filter: nil,
1222
+ min_created_at: nil,
1223
+ max_created_at: nil,
1224
+ parent_job: nil
1225
+ ensure_service!
1226
+ parent_job = parent_job.job_id if parent_job.is_a? Job
1227
+ options = {
1228
+ parent_job_id: parent_job,
1229
+ all: all,
1230
+ token: token,
1231
+ max: max, filter: filter,
1232
+ min_created_at: min_created_at,
1233
+ max_created_at: max_created_at
1234
+ }
1235
+ gapi = service.list_jobs(**options)
1236
+ Job::List.from_gapi gapi, service, **options
1237
+ end
1238
+
1239
+ ##
1240
+ # Retrieves the list of all projects for which the currently authorized
1241
+ # account has been granted any project role. The returned project
1242
+ # instances share the same credentials as the project used to retrieve
1243
+ # them, but lazily create a new API connection for interactions with the
1244
+ # BigQuery service.
1245
+ #
1246
+ # @param [String] token A previously-returned page token representing
1247
+ # part of the larger set of results to view.
1248
+ # @param [Integer] max Maximum number of projects to return.
1249
+ #
1250
+ # @return [Array<Google::Cloud::Bigquery::Project>] (See
1251
+ # {Google::Cloud::Bigquery::Project::List})
1252
+ #
1253
+ # @example
1254
+ # require "google/cloud/bigquery"
1255
+ #
1256
+ # bigquery = Google::Cloud::Bigquery.new
1257
+ #
1258
+ # projects = bigquery.projects
1259
+ # projects.each do |project|
1260
+ # puts project.name
1261
+ # project.datasets.all.each do |dataset|
1262
+ # puts dataset.name
1263
+ # end
1264
+ # end
1265
+ #
1266
+ # @example Retrieve all projects: (See {Project::List#all})
1267
+ # require "google/cloud/bigquery"
1268
+ #
1269
+ # bigquery = Google::Cloud::Bigquery.new
1270
+ #
1271
+ # projects = bigquery.projects
1272
+ #
1273
+ # projects.all do |project|
1274
+ # puts project.name
1275
+ # project.datasets.all.each do |dataset|
1276
+ # puts dataset.name
1277
+ # end
1278
+ # end
1279
+ #
1280
+ def projects token: nil, max: nil
1281
+ ensure_service!
1282
+ gapi = service.list_projects token: token, max: max
1283
+ Project::List.from_gapi gapi, service, max
1284
+ end
1285
+
1286
+ ##
1287
+ # Creates a Bigquery::Time object to represent a time, independent of a
1288
+ # specific date.
1289
+ #
1290
+ # @param [Integer] hour Hour, valid values from 0 to 23.
1291
+ # @param [Integer] minute Minute, valid values from 0 to 59.
1292
+ # @param [Integer, Float] second Second, valid values from 0 to 59. Can
1293
+ # contain microsecond precision.
1294
+ #
1295
+ # @return [Bigquery::Time]
1296
+ #
1297
+ # @example
1298
+ # require "google/cloud/bigquery"
1299
+ #
1300
+ # bigquery = Google::Cloud::Bigquery.new
1301
+ #
1302
+ # fourpm = bigquery.time 16, 0, 0
1303
+ # data = bigquery.query "SELECT name " \
1304
+ # "FROM `my_dataset.my_table`" \
1305
+ # "WHERE time_of_date = @time",
1306
+ # params: { time: fourpm }
1307
+ #
1308
+ # # Iterate over the first page of results
1309
+ # data.each do |row|
1310
+ # puts row[:name]
1311
+ # end
1312
+ # # Retrieve the next page of results
1313
+ # data = data.next if data.next?
1314
+ #
1315
+ # @example Create Time with fractional seconds:
1316
+ # require "google/cloud/bigquery"
1317
+ #
1318
+ # bigquery = Google::Cloud::Bigquery.new
1319
+ #
1320
+ # precise_time = bigquery.time 16, 35, 15.376541
1321
+ # data = bigquery.query "SELECT name " \
1322
+ # "FROM `my_dataset.my_table`" \
1323
+ # "WHERE time_of_date >= @time",
1324
+ # params: { time: precise_time }
1325
+ #
1326
+ # # Iterate over the first page of results
1327
+ # data.each do |row|
1328
+ # puts row[:name]
1329
+ # end
1330
+ # # Retrieve the next page of results
1331
+ # data = data.next if data.next?
1332
+ #
1333
+ def time hour, minute, second
1334
+ Bigquery::Time.new "#{hour}:#{minute}:#{second}"
1335
+ end
1336
+
1337
+ ##
1338
+ # Creates a new schema instance. An optional block may be given to
1339
+ # configure the schema, otherwise the schema is returned empty and may
1340
+ # be configured directly.
1341
+ #
1342
+ # The returned schema can be passed to {Dataset#load} using the
1343
+ # `schema` option. However, for most use cases, the block yielded by
1344
+ # {Dataset#load} is a more convenient way to configure the schema
1345
+ # for the destination table.
1346
+ #
1347
+ # @yield [schema] a block for setting the schema
1348
+ # @yieldparam [Schema] schema the object accepting the schema
1349
+ #
1350
+ # @return [Google::Cloud::Bigquery::Schema]
1351
+ #
1352
+ # @example
1353
+ # require "google/cloud/bigquery"
1354
+ #
1355
+ # bigquery = Google::Cloud::Bigquery.new
1356
+ #
1357
+ # schema = bigquery.schema do |s|
1358
+ # s.string "first_name", mode: :required
1359
+ # s.record "cities_lived", mode: :repeated do |nested_schema|
1360
+ # nested_schema.string "place", mode: :required
1361
+ # nested_schema.integer "number_of_years", mode: :required
1362
+ # end
1363
+ # end
1364
+ #
1365
+ # dataset = bigquery.dataset "my_dataset"
1366
+ #
1367
+ # gs_url = "gs://my-bucket/file-name.csv"
1368
+ # load_job = dataset.load_job "my_new_table", gs_url, schema: schema
1369
+ #
1370
+ def schema
1371
+ s = Schema.from_gapi
1372
+ yield s if block_given?
1373
+ s
1374
+ end
1375
+
1376
+ ##
1377
+ # Creates a new Bigquery::EncryptionConfiguration instance.
1378
+ #
1379
+ # This method does not execute an API call. Use the encryption
1380
+ # configuration to encrypt a table when creating one via
1381
+ # Bigquery::Dataset#create_table, Bigquery::Dataset#load,
1382
+ # Bigquery::Table#copy, or Bigquery::Project#query.
1383
+ #
1384
+ # @param [String] kms_key Name of the Cloud KMS encryption key that
1385
+ # will be used to protect the destination BigQuery table. The BigQuery
1386
+ # Service Account associated with your project requires access to this
1387
+ # encryption key.
1388
+ #
1389
+ # @example Encrypt a new table
1390
+ # require "google/cloud/bigquery"
1391
+ #
1392
+ # bigquery = Google::Cloud::Bigquery.new
1393
+ # dataset = bigquery.dataset "my_dataset"
1394
+ #
1395
+ # key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
1396
+ # encrypt_config = bigquery.encryption kms_key: key_name
1397
+ #
1398
+ # table = dataset.create_table "my_table" do |updater|
1399
+ # updater.encryption = encrypt_config
1400
+ # end
1401
+ #
1402
+ # @example Encrypt a load destination table
1403
+ # require "google/cloud/bigquery"
1404
+ #
1405
+ # bigquery = Google::Cloud::Bigquery.new
1406
+ # dataset = bigquery.dataset "my_dataset"
1407
+ #
1408
+ # key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
1409
+ # encrypt_config = bigquery.encryption kms_key: key_name
1410
+ # job = dataset.load_job "my_table", "gs://abc/file" do |job|
1411
+ # job.encryption = encrypt_config
1412
+ # end
1413
+ #
1414
+ # @example Encrypt a copy destination table
1415
+ # require "google/cloud/bigquery"
1416
+ #
1417
+ # bigquery = Google::Cloud::Bigquery.new
1418
+ # dataset = bigquery.dataset "my_dataset"
1419
+ # table = dataset.table "my_table"
1420
+ #
1421
+ # key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
1422
+ # encrypt_config = bigquery.encryption kms_key: key_name
1423
+ # job = table.copy_job "my_dataset.new_table" do |job|
1424
+ # job.encryption = encrypt_config
1425
+ # end
1426
+ #
1427
+ # @example Encrypt a query destination table
1428
+ # require "google/cloud/bigquery"
1429
+ #
1430
+ # bigquery = Google::Cloud::Bigquery.new
1431
+ # dataset = bigquery.dataset "my_dataset"
1432
+ #
1433
+ # key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
1434
+ # encrypt_config = bigquery.encryption kms_key: key_name
1435
+ # job = bigquery.query_job "SELECT 1;" do |query|
1436
+ # query.table = dataset.table "my_table", skip_lookup: true
1437
+ # query.encryption = encrypt_config
1438
+ # end
1439
+ #
1440
+ # @return [Google::Cloud::Bigquery::EncryptionConfiguration]
1441
+ def encryption kms_key: nil
1442
+ encrypt_config = Bigquery::EncryptionConfiguration.new
1443
+ encrypt_config.kms_key = kms_key unless kms_key.nil?
1444
+ encrypt_config
1445
+ end
1446
+
1447
+ ##
1448
+ # Extracts the data from the provided table to a Google Cloud Storage
1449
+ # file using an asynchronous method. In this method, an {ExtractJob} is
1450
+ # immediately returned. The caller may poll the service by repeatedly
1451
+ # calling {Job#reload!} and {Job#done?} to detect when the job is done,
1452
+ # or simply block until the job is done by calling
1453
+ # #{Job#wait_until_done!}. See {#extract} for the synchronous version.
1454
+ # Use this method instead of {Table#extract_job} to extract data from
1455
+ # source tables in other projects.
1456
+ #
1457
+ # The geographic location for the job ("US", "EU", etc.) can be set via
1458
+ # {ExtractJob::Updater#location=} in a block passed to this method.
1459
+ #
1460
+ # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
1461
+ # Exporting Data From BigQuery
1462
+ #
1463
+ # @param [String, Table] table The source table from which to extract
1464
+ # data. This can be a table object; or a string ID as specified by the
1465
+ # [Standard SQL Query
1466
+ # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
1467
+ # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
1468
+ # Reference](https://cloud.google.com/bigquery/query-reference#from)
1469
+ # (`project-name:dataset_id.table_id`).
1470
+ # @param [Google::Cloud::Storage::File, String, Array<String>]
1471
+ # extract_url The Google Storage file or file URI pattern(s) to which
1472
+ # BigQuery should extract the table data.
1473
+ # @param [String] format The exported file format. The default value is
1474
+ # `csv`.
1475
+ #
1476
+ # The following values are supported:
1477
+ #
1478
+ # * `csv` - CSV
1479
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1480
+ # * `avro` - [Avro](http://avro.apache.org/)
1481
+ # @param [String] compression The compression type to use for exported
1482
+ # files. Possible values include `GZIP` and `NONE`. The default value
1483
+ # is `NONE`.
1484
+ # @param [String] delimiter Delimiter to use between fields in the
1485
+ # exported data. Default is <code>,</code>.
1486
+ # @param [Boolean] header Whether to print out a header row in the
1487
+ # results. Default is `true`.
1488
+ # @param [String] job_id A user-defined ID for the extract job. The ID
1489
+ # must contain only letters (a-z, A-Z), numbers (0-9), underscores
1490
+ # (_), or dashes (-). The maximum length is 1,024 characters. If
1491
+ # `job_id` is provided, then `prefix` will not be used.
1492
+ #
1493
+ # See [Generating a job
1494
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
1495
+ # @param [String] prefix A string, usually human-readable, that will be
1496
+ # prepended to a generated value to produce a unique job ID. For
1497
+ # example, the prefix `daily_import_job_` can be given to generate a
1498
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
1499
+ # prefix must contain only letters (a-z, A-Z), numbers (0-9),
1500
+ # underscores (_), or dashes (-). The maximum length of the entire ID
1501
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
1502
+ # be used.
1503
+ # @param [Hash] labels A hash of user-provided labels associated with
1504
+ # the job. You can use these to organize and group your jobs. Label
1505
+ # keys and values can be no longer than 63 characters, can only
1506
+ # contain lowercase letters, numeric characters, underscores and
1507
+ # dashes. International characters are allowed. Label values are
1508
+ # optional. Label keys must start with a letter and each label in the
1509
+ # list must have a different key. See [Requirements for
1510
+ # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
1511
+ # @yield [job] a job configuration object
1512
+ # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
1513
+ # configuration object for setting additional options.
1514
+ #
1515
+ # @return [Google::Cloud::Bigquery::ExtractJob]
1516
+ #
1517
+ # @example
1518
+ # require "google/cloud/bigquery"
1519
+ #
1520
+ # bigquery = Google::Cloud::Bigquery.new
1521
+ #
1522
+ # table_id = "bigquery-public-data.samples.shakespeare"
1523
+ # extract_job = bigquery.extract_job table_id,
1524
+ # "gs://my-bucket/shakespeare.csv"
1525
+ # extract_job.wait_until_done!
1526
+ # extract_job.done? #=> true
1527
+ #
1528
+ # @!group Data
1529
+ #
1530
+ def extract_job table, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, job_id: nil,
1531
+ prefix: nil, labels: nil
1532
+ ensure_service!
1533
+ options = { format: format, compression: compression, delimiter: delimiter, header: header, job_id: job_id,
1534
+ prefix: prefix, labels: labels }
1535
+
1536
+ table_ref = Service.get_table_ref table, default_ref: project_ref
1537
+ updater = ExtractJob::Updater.from_options service, table_ref, extract_url, options
1538
+
1539
+ yield updater if block_given?
1540
+
1541
+ job_gapi = updater.to_gapi
1542
+ gapi = service.extract_table job_gapi
1543
+ Job.from_gapi gapi, service
1544
+ end
1545
+
1546
+ ##
1547
+ # Extracts the data from the provided table to a Google Cloud Storage
1548
+ # file using a synchronous method that blocks for a response. Timeouts
1549
+ # and transient errors are generally handled as needed to complete the
1550
+ # job. See {#extract_job} for the asynchronous version. Use this method
1551
+ # instead of {Table#extract} to extract data from source tables in other
1552
+ # projects.
1553
+ #
1554
+ # The geographic location for the job ("US", "EU", etc.) can be set via
1555
+ # {ExtractJob::Updater#location=} in a block passed to this method.
1556
+ #
1557
+ # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
1558
+ # Exporting Data From BigQuery
1559
+ #
1560
+ # @param [String, Table] table The source table from which to extract
1561
+ # data. This can be a table object; or a string ID as specified by the
1562
+ # [Standard SQL Query
1563
+ # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
1564
+ # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
1565
+ # Reference](https://cloud.google.com/bigquery/query-reference#from)
1566
+ # (`project-name:dataset_id.table_id`).
1567
+ # @param [Google::Cloud::Storage::File, String, Array<String>]
1568
+ # extract_url The Google Storage file or file URI pattern(s) to which
1569
+ # BigQuery should extract the table data.
1570
+ # @param [String] format The exported file format. The default value is
1571
+ # `csv`.
1572
+ #
1573
+ # The following values are supported:
1574
+ #
1575
+ # * `csv` - CSV
1576
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1577
+ # * `avro` - [Avro](http://avro.apache.org/)
1578
+ # @param [String] compression The compression type to use for exported
1579
+ # files. Possible values include `GZIP` and `NONE`. The default value
1580
+ # is `NONE`.
1581
+ # @param [String] delimiter Delimiter to use between fields in the
1582
+ # exported data. Default is <code>,</code>.
1583
+ # @param [Boolean] header Whether to print out a header row in the
1584
+ # results. Default is `true`.
1585
+ # @yield [job] a job configuration object
1586
+ # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
1587
+ # configuration object for setting additional options.
1588
+ #
1589
+ # @return [Boolean] Returns `true` if the extract operation succeeded.
1590
+ #
1591
+ # @example
1592
+ # require "google/cloud/bigquery"
1593
+ #
1594
+ # bigquery = Google::Cloud::Bigquery.new
1595
+ #
1596
+ # bigquery.extract "bigquery-public-data.samples.shakespeare",
1597
+ # "gs://my-bucket/shakespeare.csv"
1598
+ #
1599
+ # @!group Data
1600
+ #
1601
+ def extract table, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, &block
1602
+ job = extract_job table, extract_url,
1603
+ format: format,
1604
+ compression: compression,
1605
+ delimiter: delimiter,
1606
+ header: header,
1607
+ &block
1608
+ job.wait_until_done!
1609
+ ensure_job_succeeded! job
1610
+ true
1611
+ end
1612
+
1613
+ ##
1614
+ # @private New Project from a Google API Client object, using the
1615
+ # same Credentials as this project.
1616
+ def self.from_gapi gapi, service
1617
+ project_service = Service.new gapi.project_reference.project_id,
1618
+ service.credentials,
1619
+ retries: service.retries,
1620
+ timeout: service.timeout
1621
+ new(project_service).tap do |p|
1622
+ p.instance_variable_set :@name, gapi.friendly_name
1623
+
1624
+ # TODO: remove `Integer` and set normally after migrating to Gax or
1625
+ # to google-api-client 0.10 (See google/google-api-ruby-client#439)
1626
+ p.instance_variable_set :@numeric_id, Integer(gapi.numeric_id) if gapi.numeric_id
1627
+ end
1628
+ end
1629
+
1630
+ protected
1631
+
1632
+ ##
1633
+ # Raise an error unless an active service is available.
1634
+ def ensure_service!
1635
+ raise "Must have active connection" unless service
1636
+ end
1637
+
1638
+ def ensure_job_succeeded! job
1639
+ return unless job.failed?
1640
+ begin
1641
+ # raise to activate ruby exception cause handling
1642
+ raise job.gapi_error
1643
+ rescue StandardError => e
1644
+ # wrap Google::Apis::Error with Google::Cloud::Error
1645
+ raise Google::Cloud::Error.from_error(e)
1646
+ end
1647
+ end
1648
+
1649
+ def project_ref
1650
+ Google::Apis::BigqueryV2::ProjectReference.new project_id: project_id
1651
+ end
1652
+ end
1653
+ end
1654
+ end
1655
+ end