google-cloud-bigquery 1.21.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +16 -0
- data/AUTHENTICATION.md +158 -0
- data/CHANGELOG.md +397 -0
- data/CODE_OF_CONDUCT.md +40 -0
- data/CONTRIBUTING.md +188 -0
- data/LICENSE +201 -0
- data/LOGGING.md +27 -0
- data/OVERVIEW.md +463 -0
- data/TROUBLESHOOTING.md +31 -0
- data/lib/google-cloud-bigquery.rb +139 -0
- data/lib/google/cloud/bigquery.rb +145 -0
- data/lib/google/cloud/bigquery/argument.rb +197 -0
- data/lib/google/cloud/bigquery/convert.rb +383 -0
- data/lib/google/cloud/bigquery/copy_job.rb +316 -0
- data/lib/google/cloud/bigquery/credentials.rb +50 -0
- data/lib/google/cloud/bigquery/data.rb +526 -0
- data/lib/google/cloud/bigquery/dataset.rb +2845 -0
- data/lib/google/cloud/bigquery/dataset/access.rb +1021 -0
- data/lib/google/cloud/bigquery/dataset/list.rb +162 -0
- data/lib/google/cloud/bigquery/encryption_configuration.rb +123 -0
- data/lib/google/cloud/bigquery/external.rb +2432 -0
- data/lib/google/cloud/bigquery/extract_job.rb +368 -0
- data/lib/google/cloud/bigquery/insert_response.rb +180 -0
- data/lib/google/cloud/bigquery/job.rb +657 -0
- data/lib/google/cloud/bigquery/job/list.rb +162 -0
- data/lib/google/cloud/bigquery/load_job.rb +1704 -0
- data/lib/google/cloud/bigquery/model.rb +740 -0
- data/lib/google/cloud/bigquery/model/list.rb +164 -0
- data/lib/google/cloud/bigquery/project.rb +1655 -0
- data/lib/google/cloud/bigquery/project/list.rb +161 -0
- data/lib/google/cloud/bigquery/query_job.rb +1695 -0
- data/lib/google/cloud/bigquery/routine.rb +1108 -0
- data/lib/google/cloud/bigquery/routine/list.rb +165 -0
- data/lib/google/cloud/bigquery/schema.rb +564 -0
- data/lib/google/cloud/bigquery/schema/field.rb +668 -0
- data/lib/google/cloud/bigquery/service.rb +589 -0
- data/lib/google/cloud/bigquery/standard_sql.rb +495 -0
- data/lib/google/cloud/bigquery/table.rb +3340 -0
- data/lib/google/cloud/bigquery/table/async_inserter.rb +520 -0
- data/lib/google/cloud/bigquery/table/list.rb +172 -0
- data/lib/google/cloud/bigquery/time.rb +65 -0
- data/lib/google/cloud/bigquery/version.rb +22 -0
- metadata +297 -0
@@ -0,0 +1,161 @@
|
|
1
|
+
# Copyright 2016 Google LLC
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "delegate"
|
17
|
+
|
18
|
+
module Google
|
19
|
+
module Cloud
|
20
|
+
module Bigquery
|
21
|
+
class Project
|
22
|
+
##
|
23
|
+
# Project::List is a special case Array with additional values.
|
24
|
+
class List < DelegateClass(::Array)
|
25
|
+
##
|
26
|
+
# If not empty, indicates that there are more records that match
|
27
|
+
# the request and this value should be passed to continue.
|
28
|
+
attr_accessor :token
|
29
|
+
|
30
|
+
# A hash of this page of results.
|
31
|
+
attr_accessor :etag
|
32
|
+
|
33
|
+
##
|
34
|
+
# @private Create a new Project::List with an array of
|
35
|
+
# Project instances.
|
36
|
+
def initialize arr = []
|
37
|
+
super arr
|
38
|
+
end
|
39
|
+
|
40
|
+
##
|
41
|
+
# Whether there is a next page of projects.
|
42
|
+
#
|
43
|
+
# @return [Boolean]
|
44
|
+
#
|
45
|
+
# @example
|
46
|
+
# require "google/cloud/bigquery"
|
47
|
+
#
|
48
|
+
# bigquery = Google::Cloud::Bigquery.new
|
49
|
+
#
|
50
|
+
# projects = bigquery.projects
|
51
|
+
# if projects.next?
|
52
|
+
# next_projects = projects.next
|
53
|
+
# end
|
54
|
+
def next?
|
55
|
+
!token.nil?
|
56
|
+
end
|
57
|
+
|
58
|
+
##
|
59
|
+
# Retrieve the next page of projects.
|
60
|
+
#
|
61
|
+
# @return [Project::List]
|
62
|
+
#
|
63
|
+
# @example
|
64
|
+
# require "google/cloud/bigquery"
|
65
|
+
#
|
66
|
+
# bigquery = Google::Cloud::Bigquery.new
|
67
|
+
#
|
68
|
+
# projects = bigquery.projects
|
69
|
+
# if projects.next?
|
70
|
+
# next_projects = projects.next
|
71
|
+
# end
|
72
|
+
def next
|
73
|
+
return nil unless next?
|
74
|
+
ensure_service!
|
75
|
+
gapi = @service.list_projects token: token, max: @max
|
76
|
+
self.class.from_gapi gapi, @service, @max
|
77
|
+
end
|
78
|
+
|
79
|
+
##
|
80
|
+
# Retrieves remaining results by repeatedly invoking {#next} until
|
81
|
+
# {#next?} returns `false`. Calls the given block once for each
|
82
|
+
# result, which is passed as the argument to the block.
|
83
|
+
#
|
84
|
+
# An Enumerator is returned if no block is given.
|
85
|
+
#
|
86
|
+
# This method will make repeated API calls until all remaining results
|
87
|
+
# are retrieved. (Unlike `#each`, for example, which merely iterates
|
88
|
+
# over the results returned by a single API call.) Use with caution.
|
89
|
+
#
|
90
|
+
# @param [Integer] request_limit The upper limit of API requests to
|
91
|
+
# make to load all projects. Default is no limit.
|
92
|
+
# @yield [project] The block for accessing each project.
|
93
|
+
# @yieldparam [Project] project The project object.
|
94
|
+
#
|
95
|
+
# @return [Enumerator]
|
96
|
+
#
|
97
|
+
# @example Iterating each result by passing a block:
|
98
|
+
# require "google/cloud/bigquery"
|
99
|
+
#
|
100
|
+
# bigquery = Google::Cloud::Bigquery.new
|
101
|
+
#
|
102
|
+
# bigquery.projects.all do |project|
|
103
|
+
# puts project.name
|
104
|
+
# end
|
105
|
+
#
|
106
|
+
# @example Using the enumerator by not passing a block:
|
107
|
+
# require "google/cloud/bigquery"
|
108
|
+
#
|
109
|
+
# bigquery = Google::Cloud::Bigquery.new
|
110
|
+
#
|
111
|
+
# all_project_ids = bigquery.projects.all.map do |project|
|
112
|
+
# project.name
|
113
|
+
# end
|
114
|
+
#
|
115
|
+
# @example Limit the number of API calls made:
|
116
|
+
# require "google/cloud/bigquery"
|
117
|
+
#
|
118
|
+
# bigquery = Google::Cloud::Bigquery.new
|
119
|
+
#
|
120
|
+
# bigquery.projects.all(request_limit: 10) do |project|
|
121
|
+
# puts project.name
|
122
|
+
# end
|
123
|
+
#
|
124
|
+
def all request_limit: nil
|
125
|
+
request_limit = request_limit.to_i if request_limit
|
126
|
+
return enum_for :all, request_limit: request_limit unless block_given?
|
127
|
+
results = self
|
128
|
+
loop do
|
129
|
+
results.each { |r| yield r }
|
130
|
+
if request_limit
|
131
|
+
request_limit -= 1
|
132
|
+
break if request_limit.negative?
|
133
|
+
end
|
134
|
+
break unless results.next?
|
135
|
+
results = results.next
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
##
|
140
|
+
# @private New Project::List from a response object.
|
141
|
+
def self.from_gapi gapi_list, service, max = nil
|
142
|
+
projects = List.new(Array(gapi_list.projects).map { |gapi_object| Project.from_gapi gapi_object, service })
|
143
|
+
projects.instance_variable_set :@token, gapi_list.next_page_token
|
144
|
+
projects.instance_variable_set :@etag, gapi_list.etag
|
145
|
+
projects.instance_variable_set :@service, service
|
146
|
+
projects.instance_variable_set :@max, max
|
147
|
+
projects
|
148
|
+
end
|
149
|
+
|
150
|
+
protected
|
151
|
+
|
152
|
+
##
|
153
|
+
# Raise an error unless an active service is available.
|
154
|
+
def ensure_service!
|
155
|
+
raise "Must have active connection" unless @service
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
@@ -0,0 +1,1695 @@
|
|
1
|
+
# Copyright 2015 Google LLC
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/cloud/bigquery/service"
|
17
|
+
require "google/cloud/bigquery/data"
|
18
|
+
require "google/cloud/bigquery/encryption_configuration"
|
19
|
+
require "google/apis/bigquery_v2"
|
20
|
+
|
21
|
+
module Google
|
22
|
+
module Cloud
|
23
|
+
module Bigquery
|
24
|
+
##
|
25
|
+
# # QueryJob
|
26
|
+
#
|
27
|
+
# A {Job} subclass representing a query operation that may be performed
|
28
|
+
# on a {Table}. A QueryJob instance is created when you call
|
29
|
+
# {Project#query_job}, {Dataset#query_job}.
|
30
|
+
#
|
31
|
+
# @see https://cloud.google.com/bigquery/querying-data Querying Data
|
32
|
+
# @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
|
33
|
+
# reference
|
34
|
+
#
|
35
|
+
# @example
|
36
|
+
# require "google/cloud/bigquery"
|
37
|
+
#
|
38
|
+
# bigquery = Google::Cloud::Bigquery.new
|
39
|
+
#
|
40
|
+
# job = bigquery.query_job "SELECT COUNT(word) as count FROM " \
|
41
|
+
# "`bigquery-public-data.samples.shakespeare`"
|
42
|
+
#
|
43
|
+
# job.wait_until_done!
|
44
|
+
#
|
45
|
+
# if job.failed?
|
46
|
+
# puts job.error
|
47
|
+
# else
|
48
|
+
# puts job.data.first
|
49
|
+
# end
|
50
|
+
#
|
51
|
+
# @example With multiple statements and child jobs:
|
52
|
+
# require "google/cloud/bigquery"
|
53
|
+
#
|
54
|
+
# bigquery = Google::Cloud::Bigquery.new
|
55
|
+
#
|
56
|
+
# multi_statement_sql = <<~SQL
|
57
|
+
# -- Declare a variable to hold names as an array.
|
58
|
+
# DECLARE top_names ARRAY<STRING>;
|
59
|
+
# -- Build an array of the top 100 names from the year 2017.
|
60
|
+
# SET top_names = (
|
61
|
+
# SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
|
62
|
+
# FROM `bigquery-public-data.usa_names.usa_1910_current`
|
63
|
+
# WHERE year = 2017
|
64
|
+
# );
|
65
|
+
# -- Which names appear as words in Shakespeare's plays?
|
66
|
+
# SELECT
|
67
|
+
# name AS shakespeare_name
|
68
|
+
# FROM UNNEST(top_names) AS name
|
69
|
+
# WHERE name IN (
|
70
|
+
# SELECT word
|
71
|
+
# FROM `bigquery-public-data.samples.shakespeare`
|
72
|
+
# );
|
73
|
+
# SQL
|
74
|
+
#
|
75
|
+
# job = bigquery.query_job multi_statement_sql
|
76
|
+
#
|
77
|
+
# job.wait_until_done!
|
78
|
+
#
|
79
|
+
# child_jobs = bigquery.jobs parent_job: job
|
80
|
+
#
|
81
|
+
# child_jobs.each do |child_job|
|
82
|
+
# script_statistics = child_job.script_statistics
|
83
|
+
# puts script_statistics.evaluation_kind
|
84
|
+
# script_statistics.stack_frames.each do |stack_frame|
|
85
|
+
# puts stack_frame.text
|
86
|
+
# end
|
87
|
+
# end
|
88
|
+
#
|
89
|
+
class QueryJob < Job
|
90
|
+
##
|
91
|
+
# Checks if the priority for the query is `BATCH`.
|
92
|
+
#
|
93
|
+
# @return [Boolean] `true` when the priority is `BATCH`, `false`
|
94
|
+
# otherwise.
|
95
|
+
#
|
96
|
+
def batch?
|
97
|
+
val = @gapi.configuration.query.priority
|
98
|
+
val == "BATCH"
|
99
|
+
end
|
100
|
+
|
101
|
+
##
|
102
|
+
# Checks if the priority for the query is `INTERACTIVE`.
|
103
|
+
#
|
104
|
+
# @return [Boolean] `true` when the priority is `INTERACTIVE`, `false`
|
105
|
+
# otherwise.
|
106
|
+
#
|
107
|
+
def interactive?
|
108
|
+
val = @gapi.configuration.query.priority
|
109
|
+
return true if val.nil?
|
110
|
+
val == "INTERACTIVE"
|
111
|
+
end
|
112
|
+
|
113
|
+
##
|
114
|
+
# Checks if the the query job allows arbitrarily large results at a
|
115
|
+
# slight cost to performance.
|
116
|
+
#
|
117
|
+
# @return [Boolean] `true` when large results are allowed, `false`
|
118
|
+
# otherwise.
|
119
|
+
#
|
120
|
+
def large_results?
|
121
|
+
val = @gapi.configuration.query.allow_large_results
|
122
|
+
return false if val.nil?
|
123
|
+
val
|
124
|
+
end
|
125
|
+
|
126
|
+
##
|
127
|
+
# Checks if the query job looks for an existing result in the query
|
128
|
+
# cache. For more information, see [Query
|
129
|
+
# Caching](https://cloud.google.com/bigquery/querying-data#querycaching).
|
130
|
+
#
|
131
|
+
# @return [Boolean] `true` when the query cache will be used, `false`
|
132
|
+
# otherwise.
|
133
|
+
#
|
134
|
+
def cache?
|
135
|
+
val = @gapi.configuration.query.use_query_cache
|
136
|
+
return false if val.nil?
|
137
|
+
val
|
138
|
+
end
|
139
|
+
|
140
|
+
##
|
141
|
+
# If set, don't actually run this job. A valid query will return a
|
142
|
+
# mostly empty response with some processing statistics, while an
|
143
|
+
# invalid query will return the same error it would if it wasn't a dry
|
144
|
+
# run.
|
145
|
+
#
|
146
|
+
# @return [Boolean] `true` when the dry run flag is set for the query
|
147
|
+
# job, `false` otherwise.
|
148
|
+
#
|
149
|
+
def dryrun?
|
150
|
+
@gapi.configuration.dry_run
|
151
|
+
end
|
152
|
+
alias dryrun dryrun?
|
153
|
+
alias dry_run dryrun?
|
154
|
+
alias dry_run? dryrun?
|
155
|
+
|
156
|
+
##
|
157
|
+
# Checks if the query job flattens nested and repeated fields in the
|
158
|
+
# query results. The default is `true`. If the value is `false`,
|
159
|
+
# #large_results? should return `true`.
|
160
|
+
#
|
161
|
+
# @return [Boolean] `true` when the job flattens results, `false`
|
162
|
+
# otherwise.
|
163
|
+
#
|
164
|
+
def flatten?
|
165
|
+
val = @gapi.configuration.query.flatten_results
|
166
|
+
return true if val.nil?
|
167
|
+
val
|
168
|
+
end
|
169
|
+
|
170
|
+
##
|
171
|
+
# Limits the billing tier for this job. Queries that have resource usage
|
172
|
+
# beyond this tier will raise (without incurring a charge). If
|
173
|
+
# unspecified, this will be set to your project default. For more
|
174
|
+
# information, see [High-Compute
|
175
|
+
# queries](https://cloud.google.com/bigquery/pricing#high-compute).
|
176
|
+
#
|
177
|
+
# @return [Integer, nil] The tier number, or `nil` for the project
|
178
|
+
# default.
|
179
|
+
#
|
180
|
+
def maximum_billing_tier
|
181
|
+
@gapi.configuration.query.maximum_billing_tier
|
182
|
+
end
|
183
|
+
|
184
|
+
##
|
185
|
+
# Limits the bytes billed for this job. Queries that will have bytes
|
186
|
+
# billed beyond this limit will raise (without incurring a charge). If
|
187
|
+
# `nil`, this will be set to your project default.
|
188
|
+
#
|
189
|
+
# @return [Integer, nil] The number of bytes, or `nil` for the project
|
190
|
+
# default.
|
191
|
+
#
|
192
|
+
def maximum_bytes_billed
|
193
|
+
Integer @gapi.configuration.query.maximum_bytes_billed
|
194
|
+
rescue StandardError
|
195
|
+
nil
|
196
|
+
end
|
197
|
+
|
198
|
+
##
|
199
|
+
# Checks if the query results are from the query cache.
|
200
|
+
#
|
201
|
+
# @return [Boolean] `true` when the job statistics indicate a cache hit,
|
202
|
+
# `false` otherwise.
|
203
|
+
#
|
204
|
+
def cache_hit?
|
205
|
+
return false unless @gapi.statistics.query
|
206
|
+
@gapi.statistics.query.cache_hit
|
207
|
+
end
|
208
|
+
|
209
|
+
##
|
210
|
+
# The number of bytes processed by the query.
|
211
|
+
#
|
212
|
+
# @return [Integer, nil] Total bytes processed for the job.
|
213
|
+
#
|
214
|
+
def bytes_processed
|
215
|
+
Integer @gapi.statistics.query.total_bytes_processed
|
216
|
+
rescue StandardError
|
217
|
+
nil
|
218
|
+
end
|
219
|
+
|
220
|
+
##
|
221
|
+
# Describes the execution plan for the query.
|
222
|
+
#
|
223
|
+
# @return [Array<Google::Cloud::Bigquery::QueryJob::Stage>, nil] An
|
224
|
+
# array containing the stages of the execution plan.
|
225
|
+
#
|
226
|
+
# @example
|
227
|
+
# require "google/cloud/bigquery"
|
228
|
+
#
|
229
|
+
# bigquery = Google::Cloud::Bigquery.new
|
230
|
+
#
|
231
|
+
# sql = "SELECT word FROM `bigquery-public-data.samples.shakespeare`"
|
232
|
+
# job = bigquery.query_job sql
|
233
|
+
#
|
234
|
+
# job.wait_until_done!
|
235
|
+
#
|
236
|
+
# stages = job.query_plan
|
237
|
+
# stages.each do |stage|
|
238
|
+
# puts stage.name
|
239
|
+
# stage.steps.each do |step|
|
240
|
+
# puts step.kind
|
241
|
+
# puts step.substeps.inspect
|
242
|
+
# end
|
243
|
+
# end
|
244
|
+
#
|
245
|
+
def query_plan
|
246
|
+
return nil unless @gapi&.statistics&.query&.query_plan
|
247
|
+
Array(@gapi.statistics.query.query_plan).map { |stage| Stage.from_gapi stage }
|
248
|
+
end
|
249
|
+
|
250
|
+
##
|
251
|
+
# The type of query statement, if valid. Possible values (new values
|
252
|
+
# might be added in the future):
|
253
|
+
#
|
254
|
+
# * "CREATE_MODEL": DDL statement, see [Using Data Definition Language
|
255
|
+
# Statements](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language)
|
256
|
+
# * "CREATE_TABLE": DDL statement, see [Using Data Definition Language
|
257
|
+
# Statements](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language)
|
258
|
+
# * "CREATE_TABLE_AS_SELECT": DDL statement, see [Using Data Definition
|
259
|
+
# Language Statements](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language)
|
260
|
+
# * "CREATE_VIEW": DDL statement, see [Using Data Definition Language
|
261
|
+
# Statements](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language)
|
262
|
+
# * "DELETE": DML statement, see [Data Manipulation Language Syntax](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax)
|
263
|
+
# * "DROP_MODEL": DDL statement, see [Using Data Definition Language
|
264
|
+
# Statements](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language)
|
265
|
+
# * "DROP_TABLE": DDL statement, see [Using Data Definition Language
|
266
|
+
# Statements](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language)
|
267
|
+
# * "DROP_VIEW": DDL statement, see [Using Data Definition Language
|
268
|
+
# Statements](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language)
|
269
|
+
# * "INSERT": DML statement, see [Data Manipulation Language Syntax](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax)
|
270
|
+
# * "MERGE": DML statement, see [Data Manipulation Language Syntax](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax)
|
271
|
+
# * "SELECT": SQL query, see [Standard SQL Query Syntax](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax)
|
272
|
+
# * "UPDATE": DML statement, see [Data Manipulation Language Syntax](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax)
|
273
|
+
#
|
274
|
+
# @return [String, nil] The type of query statement.
|
275
|
+
#
|
276
|
+
def statement_type
|
277
|
+
return nil unless @gapi.statistics.query
|
278
|
+
@gapi.statistics.query.statement_type
|
279
|
+
end
|
280
|
+
|
281
|
+
##
|
282
|
+
# Whether the query is a DDL statement.
|
283
|
+
#
|
284
|
+
# @see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language
|
285
|
+
# Using Data Definition Language Statements
|
286
|
+
#
|
287
|
+
# @return [Boolean]
|
288
|
+
#
|
289
|
+
# @example
|
290
|
+
# require "google/cloud/bigquery"
|
291
|
+
#
|
292
|
+
# bigquery = Google::Cloud::Bigquery.new
|
293
|
+
# query_job = bigquery.query_job "CREATE TABLE my_table (x INT64)"
|
294
|
+
#
|
295
|
+
# query_job.statement_type #=> "CREATE_TABLE"
|
296
|
+
# query_job.ddl? #=> true
|
297
|
+
#
|
298
|
+
def ddl?
|
299
|
+
["CREATE_MODEL", "CREATE_TABLE", "CREATE_TABLE_AS_SELECT", "CREATE_VIEW", "\n", "DROP_MODEL", "DROP_TABLE",
|
300
|
+
"DROP_VIEW"].include? statement_type
|
301
|
+
end
|
302
|
+
|
303
|
+
##
|
304
|
+
# Whether the query is a DML statement.
|
305
|
+
#
|
306
|
+
# @see https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax
|
307
|
+
# Data Manipulation Language Syntax
|
308
|
+
#
|
309
|
+
# @return [Boolean]
|
310
|
+
#
|
311
|
+
# @example
|
312
|
+
# require "google/cloud/bigquery"
|
313
|
+
#
|
314
|
+
# bigquery = Google::Cloud::Bigquery.new
|
315
|
+
# query_job = bigquery.query_job "UPDATE my_table " \
|
316
|
+
# "SET x = x + 1 " \
|
317
|
+
# "WHERE x IS NOT NULL"
|
318
|
+
#
|
319
|
+
# query_job.statement_type #=> "UPDATE"
|
320
|
+
# query_job.dml? #=> true
|
321
|
+
#
|
322
|
+
def dml?
|
323
|
+
["INSERT", "UPDATE", "MERGE", "DELETE"].include? statement_type
|
324
|
+
end
|
325
|
+
|
326
|
+
##
|
327
|
+
# The DDL operation performed, possibly dependent on the pre-existence
|
328
|
+
# of the DDL target. (See {#ddl_target_table}.) Possible values (new
|
329
|
+
# values might be added in the future):
|
330
|
+
#
|
331
|
+
# * "CREATE": The query created the DDL target.
|
332
|
+
# * "SKIP": No-op. Example cases: the query is
|
333
|
+
# `CREATE TABLE IF NOT EXISTS` while the table already exists, or the
|
334
|
+
# query is `DROP TABLE IF EXISTS` while the table does not exist.
|
335
|
+
# * "REPLACE": The query replaced the DDL target. Example case: the
|
336
|
+
# query is `CREATE OR REPLACE TABLE`, and the table already exists.
|
337
|
+
# * "DROP": The query deleted the DDL target.
|
338
|
+
#
|
339
|
+
# @return [String, nil] The DDL operation performed.
|
340
|
+
#
|
341
|
+
def ddl_operation_performed
|
342
|
+
return nil unless @gapi.statistics.query
|
343
|
+
@gapi.statistics.query.ddl_operation_performed
|
344
|
+
end
|
345
|
+
|
346
|
+
##
|
347
|
+
# The DDL target routine, in reference state. (See {Routine#reference?}.)
|
348
|
+
# Present only for `CREATE/DROP FUNCTION/PROCEDURE` queries. (See
|
349
|
+
# {#statement_type}.)
|
350
|
+
#
|
351
|
+
# @return [Google::Cloud::Bigquery::Routine, nil] The DDL target routine, in
|
352
|
+
# reference state.
|
353
|
+
#
|
354
|
+
def ddl_target_routine
|
355
|
+
return nil unless @gapi.statistics.query
|
356
|
+
ensure_service!
|
357
|
+
routine = @gapi.statistics.query.ddl_target_routine
|
358
|
+
return nil unless routine
|
359
|
+
Google::Cloud::Bigquery::Routine.new_reference_from_gapi routine, service
|
360
|
+
end
|
361
|
+
|
362
|
+
##
|
363
|
+
# The DDL target table, in reference state. (See {Table#reference?}.)
|
364
|
+
# Present only for `CREATE/DROP TABLE/VIEW` queries. (See
|
365
|
+
# {#statement_type}.)
|
366
|
+
#
|
367
|
+
# @return [Google::Cloud::Bigquery::Table, nil] The DDL target table, in
|
368
|
+
# reference state.
|
369
|
+
#
|
370
|
+
def ddl_target_table
|
371
|
+
return nil unless @gapi.statistics.query
|
372
|
+
ensure_service!
|
373
|
+
table = @gapi.statistics.query.ddl_target_table
|
374
|
+
return nil unless table
|
375
|
+
Google::Cloud::Bigquery::Table.new_reference_from_gapi table, service
|
376
|
+
end
|
377
|
+
|
378
|
+
##
|
379
|
+
# The number of rows affected by a DML statement. Present only for DML
|
380
|
+
# statements `INSERT`, `UPDATE` or `DELETE`. (See {#statement_type}.)
|
381
|
+
#
|
382
|
+
# @return [Integer, nil] The number of rows affected by a DML statement,
|
383
|
+
# or `nil` if the query is not a DML statement.
|
384
|
+
#
|
385
|
+
def num_dml_affected_rows
|
386
|
+
return nil unless @gapi.statistics.query
|
387
|
+
@gapi.statistics.query.num_dml_affected_rows
|
388
|
+
end
|
389
|
+
|
390
|
+
##
|
391
|
+
# The table in which the query results are stored.
|
392
|
+
#
|
393
|
+
# @return [Table] A table instance.
|
394
|
+
#
|
395
|
+
def destination
|
396
|
+
table = @gapi.configuration.query.destination_table
|
397
|
+
return nil unless table
|
398
|
+
retrieve_table table.project_id,
|
399
|
+
table.dataset_id,
|
400
|
+
table.table_id
|
401
|
+
end
|
402
|
+
|
403
|
+
##
|
404
|
+
# Checks if the query job is using legacy sql.
|
405
|
+
#
|
406
|
+
# @return [Boolean] `true` when legacy sql is used, `false` otherwise.
|
407
|
+
#
|
408
|
+
def legacy_sql?
|
409
|
+
val = @gapi.configuration.query.use_legacy_sql
|
410
|
+
return true if val.nil?
|
411
|
+
val
|
412
|
+
end
|
413
|
+
|
414
|
+
##
|
415
|
+
# Checks if the query job is using standard sql.
|
416
|
+
#
|
417
|
+
# @return [Boolean] `true` when standard sql is used, `false` otherwise.
|
418
|
+
#
|
419
|
+
def standard_sql?
|
420
|
+
!legacy_sql?
|
421
|
+
end
|
422
|
+
|
423
|
+
##
|
424
|
+
# The user-defined function resources used in the query. May be either a
|
425
|
+
# code resource to load from a Google Cloud Storage URI
|
426
|
+
# (`gs://bucket/path`), or an inline resource that contains code for a
|
427
|
+
# user-defined function (UDF). Providing an inline code resource is
|
428
|
+
# equivalent to providing a URI for a file containing the same code. See
|
429
|
+
# [User-Defined Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
|
430
|
+
#
|
431
|
+
# @return [Array<String>] An array containing Google Cloud Storage URIs
|
432
|
+
# and/or inline source code.
|
433
|
+
#
|
434
|
+
def udfs
|
435
|
+
udfs_gapi = @gapi.configuration.query.user_defined_function_resources
|
436
|
+
return nil unless udfs_gapi
|
437
|
+
Array(udfs_gapi).map { |udf| udf.inline_code || udf.resource_uri }
|
438
|
+
end
|
439
|
+
|
440
|
+
##
|
441
|
+
# The encryption configuration of the destination table.
|
442
|
+
#
|
443
|
+
# @return [Google::Cloud::BigQuery::EncryptionConfiguration] Custom
|
444
|
+
# encryption configuration (e.g., Cloud KMS keys).
|
445
|
+
#
|
446
|
+
# @!group Attributes
|
447
|
+
def encryption
|
448
|
+
EncryptionConfiguration.from_gapi @gapi.configuration.query.destination_encryption_configuration
|
449
|
+
end
|
450
|
+
|
451
|
+
###
|
452
|
+
# Checks if the destination table will be range partitioned. See [Creating and using integer range partitioned
|
453
|
+
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
454
|
+
#
|
455
|
+
# @return [Boolean] `true` when the table is range partitioned, or `false` otherwise.
|
456
|
+
#
|
457
|
+
# @!group Attributes
|
458
|
+
#
|
459
|
+
def range_partitioning?
|
460
|
+
!@gapi.configuration.query.range_partitioning.nil?
|
461
|
+
end
|
462
|
+
|
463
|
+
###
|
464
|
+
# The field on which the destination table will be range partitioned, if any. The field must be a
|
465
|
+
# top-level `NULLABLE/REQUIRED` field. The only supported type is `INTEGER/INT64`. See
|
466
|
+
# [Creating and using integer range partitioned
|
467
|
+
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
468
|
+
#
|
469
|
+
# @return [String, nil] The partition field, if a field was configured, or `nil` if not range partitioned.
|
470
|
+
#
|
471
|
+
# @!group Attributes
|
472
|
+
#
|
473
|
+
def range_partitioning_field
|
474
|
+
@gapi.configuration.query.range_partitioning.field if range_partitioning?
|
475
|
+
end
|
476
|
+
|
477
|
+
###
|
478
|
+
# The start of range partitioning, inclusive. See [Creating and using integer range partitioned
|
479
|
+
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
480
|
+
#
|
481
|
+
# @return [Integer, nil] The start of range partitioning, inclusive, or `nil` if not range partitioned.
|
482
|
+
#
|
483
|
+
# @!group Attributes
|
484
|
+
#
|
485
|
+
def range_partitioning_start
|
486
|
+
@gapi.configuration.query.range_partitioning.range.start if range_partitioning?
|
487
|
+
end
|
488
|
+
|
489
|
+
###
|
490
|
+
# The width of each interval. See [Creating and using integer range partitioned
|
491
|
+
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
492
|
+
#
|
493
|
+
# @return [Integer, nil] The width of each interval, for data in range partitions, or `nil` if not range
|
494
|
+
# partitioned.
|
495
|
+
#
|
496
|
+
# @!group Attributes
|
497
|
+
#
|
498
|
+
def range_partitioning_interval
|
499
|
+
@gapi.configuration.query.range_partitioning.range.interval if range_partitioning?
|
500
|
+
end
|
501
|
+
|
502
|
+
###
|
503
|
+
# The end of range partitioning, exclusive. See [Creating and using integer range partitioned
|
504
|
+
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
505
|
+
#
|
506
|
+
# @return [Integer, nil] The end of range partitioning, exclusive, or `nil` if not range partitioned.
|
507
|
+
#
|
508
|
+
# @!group Attributes
|
509
|
+
#
|
510
|
+
def range_partitioning_end
|
511
|
+
@gapi.configuration.query.range_partitioning.range.end if range_partitioning?
|
512
|
+
end
|
513
|
+
|
514
|
+
###
|
515
|
+
# Checks if the destination table will be time-partitioned. See
|
516
|
+
# [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
517
|
+
#
|
518
|
+
# @return [Boolean, nil] `true` when the table will be time-partitioned,
|
519
|
+
# or `false` otherwise.
|
520
|
+
#
|
521
|
+
# @!group Attributes
|
522
|
+
#
|
523
|
+
def time_partitioning?
|
524
|
+
!@gapi.configuration.query.time_partitioning.nil?
|
525
|
+
end
|
526
|
+
|
527
|
+
###
|
528
|
+
# The period for which the destination table will be partitioned, if
|
529
|
+
# any. See [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
530
|
+
#
|
531
|
+
# @return [String, nil] The partition type. Currently the only supported
|
532
|
+
# value is "DAY", or `nil` if not present.
|
533
|
+
#
|
534
|
+
# @!group Attributes
|
535
|
+
#
|
536
|
+
def time_partitioning_type
|
537
|
+
@gapi.configuration.query.time_partitioning.type if time_partitioning?
|
538
|
+
end
|
539
|
+
|
540
|
+
###
|
541
|
+
# The field on which the destination table will be partitioned, if any.
|
542
|
+
# If not set, the destination table will be partitioned by pseudo column
|
543
|
+
# `_PARTITIONTIME`; if set, the table will be partitioned by this field.
|
544
|
+
# See [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
545
|
+
#
|
546
|
+
# @return [String, nil] The partition field, if a field was configured.
|
547
|
+
# `nil` if not partitioned or not set (partitioned by pseudo column
|
548
|
+
# '_PARTITIONTIME').
|
549
|
+
#
|
550
|
+
# @!group Attributes
|
551
|
+
#
|
552
|
+
def time_partitioning_field
|
553
|
+
return nil unless time_partitioning?
|
554
|
+
@gapi.configuration.query.time_partitioning.field
|
555
|
+
end
|
556
|
+
|
557
|
+
###
|
558
|
+
# The expiration for the destination table partitions, if any, in
|
559
|
+
# seconds. See [Partitioned
|
560
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
561
|
+
#
|
562
|
+
# @return [Integer, nil] The expiration time, in seconds, for data in
|
563
|
+
# partitions, or `nil` if not present.
|
564
|
+
#
|
565
|
+
# @!group Attributes
|
566
|
+
#
|
567
|
+
def time_partitioning_expiration
|
568
|
+
tp = @gapi.configuration.query.time_partitioning
|
569
|
+
tp.expiration_ms / 1_000 if tp && !tp.expiration_ms.nil?
|
570
|
+
end
|
571
|
+
|
572
|
+
###
|
573
|
+
# If set to true, queries over the destination table will require a
|
574
|
+
# partition filter that can be used for partition elimination to be
|
575
|
+
# specified. See [Partitioned
|
576
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
577
|
+
#
|
578
|
+
# @return [Boolean] `true` when a partition filter will be required,
|
579
|
+
# or `false` otherwise.
|
580
|
+
#
|
581
|
+
# @!group Attributes
|
582
|
+
#
|
583
|
+
def time_partitioning_require_filter?
|
584
|
+
tp = @gapi.configuration.query.time_partitioning
|
585
|
+
return false if tp.nil? || tp.require_partition_filter.nil?
|
586
|
+
tp.require_partition_filter
|
587
|
+
end
|
588
|
+
|
589
|
+
###
|
590
|
+
# Checks if the destination table will be clustered.
|
591
|
+
#
|
592
|
+
# @see https://cloud.google.com/bigquery/docs/clustered-tables
|
593
|
+
# Introduction to Clustered Tables
|
594
|
+
#
|
595
|
+
# @return [Boolean, nil] `true` when the table will be clustered,
|
596
|
+
# or `false` otherwise.
|
597
|
+
#
|
598
|
+
# @!group Attributes
|
599
|
+
#
|
600
|
+
def clustering?
|
601
|
+
!@gapi.configuration.query.clustering.nil?
|
602
|
+
end
|
603
|
+
|
604
|
+
###
|
605
|
+
# One or more fields on which the destination table should be clustered.
|
606
|
+
# Must be specified with time-based partitioning, data in the table will
|
607
|
+
# be first partitioned and subsequently clustered. The order of the
|
608
|
+
# returned fields determines the sort order of the data.
|
609
|
+
#
|
610
|
+
# See {QueryJob::Updater#clustering_fields=}.
|
611
|
+
#
|
612
|
+
# @see https://cloud.google.com/bigquery/docs/partitioned-tables
|
613
|
+
# Partitioned Tables
|
614
|
+
# @see https://cloud.google.com/bigquery/docs/clustered-tables
|
615
|
+
# Introduction to Clustered Tables
|
616
|
+
# @see https://cloud.google.com/bigquery/docs/creating-clustered-tables
|
617
|
+
# Creating and Using Clustered Tables
|
618
|
+
#
|
619
|
+
# @return [Array<String>, nil] The clustering fields, or `nil` if the
|
620
|
+
# destination table will not be clustered.
|
621
|
+
#
|
622
|
+
# @!group Attributes
|
623
|
+
#
|
624
|
+
def clustering_fields
|
625
|
+
@gapi.configuration.query.clustering.fields if clustering?
|
626
|
+
end
|
627
|
+
|
628
|
+
##
|
629
|
+
# Refreshes the job until the job is `DONE`.
|
630
|
+
# The delay between refreshes will incrementally increase.
|
631
|
+
#
|
632
|
+
# @example
|
633
|
+
# require "google/cloud/bigquery"
|
634
|
+
#
|
635
|
+
# bigquery = Google::Cloud::Bigquery.new
|
636
|
+
#
|
637
|
+
# sql = "SELECT word FROM `bigquery-public-data.samples.shakespeare`"
|
638
|
+
# job = bigquery.query_job sql
|
639
|
+
#
|
640
|
+
# job.wait_until_done!
|
641
|
+
# job.done? #=> true
|
642
|
+
#
|
643
|
+
def wait_until_done!
|
644
|
+
return if done?
|
645
|
+
|
646
|
+
ensure_service!
|
647
|
+
loop do
|
648
|
+
query_results_gapi = service.job_query_results job_id, location: location, max: 0
|
649
|
+
if query_results_gapi.job_complete
|
650
|
+
@destination_schema_gapi = query_results_gapi.schema
|
651
|
+
break
|
652
|
+
end
|
653
|
+
end
|
654
|
+
reload!
|
655
|
+
end
|
656
|
+
|
657
|
+
##
|
658
|
+
# Retrieves the query results for the job.
|
659
|
+
#
|
660
|
+
# @param [String] token Page token, returned by a previous call,
|
661
|
+
# identifying the result set.
|
662
|
+
# @param [Integer] max Maximum number of results to return.
|
663
|
+
# @param [Integer] start Zero-based index of the starting row to read.
|
664
|
+
#
|
665
|
+
# @return [Google::Cloud::Bigquery::Data] An object providing access to
|
666
|
+
# data read from the destination table for the job.
|
667
|
+
#
|
668
|
+
# @example
|
669
|
+
# require "google/cloud/bigquery"
|
670
|
+
#
|
671
|
+
# bigquery = Google::Cloud::Bigquery.new
|
672
|
+
#
|
673
|
+
# sql = "SELECT word FROM `bigquery-public-data.samples.shakespeare`"
|
674
|
+
# job = bigquery.query_job sql
|
675
|
+
#
|
676
|
+
# job.wait_until_done!
|
677
|
+
# data = job.data
|
678
|
+
#
|
679
|
+
# # Iterate over the first page of results
|
680
|
+
# data.each do |row|
|
681
|
+
# puts row[:word]
|
682
|
+
# end
|
683
|
+
# # Retrieve the next page of results
|
684
|
+
# data = data.next if data.next?
|
685
|
+
#
|
686
|
+
def data token: nil, max: nil, start: nil
|
687
|
+
return nil unless done?
|
688
|
+
return Data.from_gapi_json({ rows: [] }, nil, @gapi, service) if dryrun?
|
689
|
+
if ddl? || dml?
|
690
|
+
data_hash = { totalRows: nil, rows: [] }
|
691
|
+
return Data.from_gapi_json data_hash, nil, @gapi, service
|
692
|
+
end
|
693
|
+
ensure_schema!
|
694
|
+
|
695
|
+
options = { token: token, max: max, start: start }
|
696
|
+
data_hash = service.list_tabledata destination_table_dataset_id, destination_table_table_id, options
|
697
|
+
Data.from_gapi_json data_hash, destination_table_gapi, @gapi, service
|
698
|
+
end
|
699
|
+
alias query_results data
|
700
|
+
|
701
|
+
##
|
702
|
+
# Yielded to a block to accumulate changes for a patch request.
|
703
|
+
class Updater < QueryJob
|
704
|
+
##
|
705
|
+
# @private Create an Updater object.
|
706
|
+
def initialize service, gapi
|
707
|
+
@service = service
|
708
|
+
@gapi = gapi
|
709
|
+
end
|
710
|
+
|
711
|
+
# rubocop:disable all
|
712
|
+
|
713
|
+
##
|
714
|
+
# @private Create an Updater from an options hash.
|
715
|
+
#
|
716
|
+
# @return [Google::Cloud::Bigquery::QueryJob::Updater] A job
|
717
|
+
# configuration object for setting query options.
|
718
|
+
def self.from_options service, query, options
|
719
|
+
job_ref = service.job_ref_from options[:job_id], options[:prefix]
|
720
|
+
dataset_config = service.dataset_ref_from options[:dataset],
|
721
|
+
options[:project]
|
722
|
+
req = Google::Apis::BigqueryV2::Job.new(
|
723
|
+
job_reference: job_ref,
|
724
|
+
configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
|
725
|
+
query: Google::Apis::BigqueryV2::JobConfigurationQuery.new(
|
726
|
+
query: query,
|
727
|
+
default_dataset: dataset_config,
|
728
|
+
maximum_billing_tier: options[:maximum_billing_tier]
|
729
|
+
)
|
730
|
+
)
|
731
|
+
)
|
732
|
+
|
733
|
+
updater = QueryJob::Updater.new service, req
|
734
|
+
updater.set_params_and_types options[:params], options[:types] if options[:params]
|
735
|
+
updater.create = options[:create]
|
736
|
+
updater.write = options[:write]
|
737
|
+
updater.table = options[:table]
|
738
|
+
updater.dryrun = options[:dryrun]
|
739
|
+
updater.maximum_bytes_billed = options[:maximum_bytes_billed]
|
740
|
+
updater.labels = options[:labels] if options[:labels]
|
741
|
+
updater.legacy_sql = Convert.resolve_legacy_sql options[:standard_sql], options[:legacy_sql]
|
742
|
+
updater.external = options[:external] if options[:external]
|
743
|
+
updater.priority = options[:priority]
|
744
|
+
updater.cache = options[:cache]
|
745
|
+
updater.large_results = options[:large_results]
|
746
|
+
updater.flatten = options[:flatten]
|
747
|
+
updater.udfs = options[:udfs]
|
748
|
+
updater
|
749
|
+
end
|
750
|
+
|
751
|
+
# rubocop:enable all
|
752
|
+
|
753
|
+
##
|
754
|
+
# Sets the geographic location where the job should run. Required
|
755
|
+
# except for US and EU.
|
756
|
+
#
|
757
|
+
# @param [String] value A geographic location, such as "US", "EU" or
|
758
|
+
# "asia-northeast1". Required except for US and EU.
|
759
|
+
#
|
760
|
+
# @example
|
761
|
+
# require "google/cloud/bigquery"
|
762
|
+
#
|
763
|
+
# bigquery = Google::Cloud::Bigquery.new
|
764
|
+
# dataset = bigquery.dataset "my_dataset"
|
765
|
+
#
|
766
|
+
# job = bigquery.query_job "SELECT 1;" do |query|
|
767
|
+
# query.table = dataset.table "my_table", skip_lookup: true
|
768
|
+
# query.location = "EU"
|
769
|
+
# end
|
770
|
+
#
|
771
|
+
# @!group Attributes
|
772
|
+
def location= value
|
773
|
+
@gapi.job_reference.location = value
|
774
|
+
return unless value.nil?
|
775
|
+
|
776
|
+
# Treat assigning value of nil the same as unsetting the value.
|
777
|
+
unset = @gapi.job_reference.instance_variables.include? :@location
|
778
|
+
@gapi.job_reference.remove_instance_variable :@location if unset
|
779
|
+
end
|
780
|
+
|
781
|
+
##
|
782
|
+
# Sets the priority of the query.
|
783
|
+
#
|
784
|
+
# @param [String] value Specifies a priority for the query. Possible
|
785
|
+
# values include `INTERACTIVE` and `BATCH`.
|
786
|
+
#
|
787
|
+
# @!group Attributes
|
788
|
+
def priority= value
|
789
|
+
@gapi.configuration.query.priority = priority_value value
|
790
|
+
end
|
791
|
+
|
792
|
+
##
|
793
|
+
# Specifies to look in the query cache for results.
|
794
|
+
#
|
795
|
+
# @param [Boolean] value Whether to look for the result in the query
|
796
|
+
# cache. The query cache is a best-effort cache that will be flushed
|
797
|
+
# whenever tables in the query are modified. The default value is
|
798
|
+
# true. For more information, see [query
|
799
|
+
# caching](https://developers.google.com/bigquery/querying-data).
|
800
|
+
#
|
801
|
+
# @!group Attributes
|
802
|
+
def cache= value
|
803
|
+
@gapi.configuration.query.use_query_cache = value
|
804
|
+
end
|
805
|
+
|
806
|
+
##
|
807
|
+
# Allow large results for a legacy SQL query.
|
808
|
+
#
|
809
|
+
# @param [Boolean] value This option is specific to Legacy SQL.
|
810
|
+
# If `true`, allows the query to produce arbitrarily large result
|
811
|
+
# tables at a slight cost in performance. Requires `table` parameter
|
812
|
+
# to be set.
|
813
|
+
#
|
814
|
+
# @!group Attributes
|
815
|
+
def large_results= value
|
816
|
+
@gapi.configuration.query.allow_large_results = value
|
817
|
+
end
|
818
|
+
|
819
|
+
##
|
820
|
+
# Flatten nested and repeated fields in legacy SQL queries.
|
821
|
+
#
|
822
|
+
# @param [Boolean] value This option is specific to Legacy SQL.
|
823
|
+
# Flattens all nested and repeated fields in the query results. The
|
824
|
+
# default value is `true`. `large_results` parameter must be `true`
|
825
|
+
# if this is set to `false`.
|
826
|
+
#
|
827
|
+
# @!group Attributes
|
828
|
+
def flatten= value
|
829
|
+
@gapi.configuration.query.flatten_results = value
|
830
|
+
end
|
831
|
+
|
832
|
+
##
|
833
|
+
# Sets the default dataset of tables referenced in the query.
|
834
|
+
#
|
835
|
+
# @param [Dataset] value The default dataset to use for unqualified
|
836
|
+
# table names in the query.
|
837
|
+
#
|
838
|
+
# @!group Attributes
|
839
|
+
def dataset= value
|
840
|
+
@gapi.configuration.query.default_dataset = @service.dataset_ref_from value
|
841
|
+
end
|
842
|
+
|
843
|
+
##
|
844
|
+
# Sets the query parameters. Standard SQL only.
|
845
|
+
#
|
846
|
+
# Use {set_params_and_types} to set both params and types.
|
847
|
+
#
|
848
|
+
# @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
|
849
|
+
# either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
|
850
|
+
# query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
|
851
|
+
# use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql`
|
852
|
+
# to true.
|
853
|
+
#
|
854
|
+
# Ruby types are mapped to BigQuery types as follows:
|
855
|
+
#
|
856
|
+
# | BigQuery | Ruby | Notes |
|
857
|
+
# |-------------|--------------------------------------|------------------------------------------------|
|
858
|
+
# | `BOOL` | `true`/`false` | |
|
859
|
+
# | `INT64` | `Integer` | |
|
860
|
+
# | `FLOAT64` | `Float` | |
|
861
|
+
# | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
|
862
|
+
# | `STRING` | `String` | |
|
863
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
864
|
+
# | `DATE` | `Date` | |
|
865
|
+
# | `TIMESTAMP` | `Time` | |
|
866
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
867
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
868
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
869
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
870
|
+
#
|
871
|
+
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
|
872
|
+
# of each BigQuery data type, including allowed values.
|
873
|
+
#
|
874
|
+
# @!group Attributes
|
875
|
+
def params= params
|
876
|
+
set_params_and_types params
|
877
|
+
end
|
878
|
+
|
879
|
+
##
|
880
|
+
# Sets the query parameters. Standard SQL only.
|
881
|
+
#
|
882
|
+
# @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
|
883
|
+
# either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
|
884
|
+
# query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
|
885
|
+
# use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql`
|
886
|
+
# to true.
|
887
|
+
#
|
888
|
+
# Ruby types are mapped to BigQuery types as follows:
|
889
|
+
#
|
890
|
+
# | BigQuery | Ruby | Notes |
|
891
|
+
# |-------------|--------------------------------------|------------------------------------------------|
|
892
|
+
# | `BOOL` | `true`/`false` | |
|
893
|
+
# | `INT64` | `Integer` | |
|
894
|
+
# | `FLOAT64` | `Float` | |
|
895
|
+
# | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
|
896
|
+
# | `STRING` | `String` | |
|
897
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
898
|
+
# | `DATE` | `Date` | |
|
899
|
+
# | `TIMESTAMP` | `Time` | |
|
900
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
901
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
902
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
903
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
904
|
+
#
|
905
|
+
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
|
906
|
+
# of each BigQuery data type, including allowed values.
|
907
|
+
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always to
|
908
|
+
# infer the right SQL type from a value in `params`. In these cases, `types` must be used to specify the SQL
|
909
|
+
# type for these values.
|
910
|
+
#
|
911
|
+
# Must match the value type passed to `params`. This must be an `Array` when the query uses positional query
|
912
|
+
# parameters. This must be an `Hash` when the query uses named query parameters. The values should be
|
913
|
+
# BigQuery type codes from the following list:
|
914
|
+
#
|
915
|
+
# * `:BOOL`
|
916
|
+
# * `:INT64`
|
917
|
+
# * `:FLOAT64`
|
918
|
+
# * `:NUMERIC`
|
919
|
+
# * `:STRING`
|
920
|
+
# * `:DATETIME`
|
921
|
+
# * `:DATE`
|
922
|
+
# * `:TIMESTAMP`
|
923
|
+
# * `:TIME`
|
924
|
+
# * `:BYTES`
|
925
|
+
# * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
|
926
|
+
# are specified as `[:INT64]`.
|
927
|
+
# * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
|
928
|
+
# match the `params` hash, and the values are the types value that matches the data.
|
929
|
+
#
|
930
|
+
# Types are optional.
|
931
|
+
#
|
932
|
+
# @!group Attributes
|
933
|
+
def set_params_and_types params, types = nil
|
934
|
+
types ||= params.class.new
|
935
|
+
raise ArgumentError, "types must use the same format as params" if types.class != params.class
|
936
|
+
|
937
|
+
case params
|
938
|
+
when Array then
|
939
|
+
@gapi.configuration.query.use_legacy_sql = false
|
940
|
+
@gapi.configuration.query.parameter_mode = "POSITIONAL"
|
941
|
+
@gapi.configuration.query.query_parameters = params.zip(types).map do |param, type|
|
942
|
+
Convert.to_query_param param, type
|
943
|
+
end
|
944
|
+
when Hash then
|
945
|
+
@gapi.configuration.query.use_legacy_sql = false
|
946
|
+
@gapi.configuration.query.parameter_mode = "NAMED"
|
947
|
+
@gapi.configuration.query.query_parameters = params.map do |name, param|
|
948
|
+
type = types[name]
|
949
|
+
Convert.to_query_param(param, type).tap { |named_param| named_param.name = String name }
|
950
|
+
end
|
951
|
+
else
|
952
|
+
raise ArgumentError, "params must be an Array or a Hash"
|
953
|
+
end
|
954
|
+
end
|
955
|
+
|
956
|
+
##
|
957
|
+
# Sets the create disposition for creating the query results table.
|
958
|
+
#
|
959
|
+
# @param [String] value Specifies whether the job is allowed to
|
960
|
+
# create new tables. The default value is `needed`.
|
961
|
+
#
|
962
|
+
# The following values are supported:
|
963
|
+
#
|
964
|
+
# * `needed` - Create the table if it does not exist.
|
965
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
966
|
+
# raised if the table does not exist.
|
967
|
+
#
|
968
|
+
# @!group Attributes
|
969
|
+
def create= value
|
970
|
+
@gapi.configuration.query.create_disposition = Convert.create_disposition value
|
971
|
+
end
|
972
|
+
|
973
|
+
##
|
974
|
+
# Sets the write disposition for when the query results table exists.
|
975
|
+
#
|
976
|
+
# @param [String] value Specifies the action that occurs if the
|
977
|
+
# destination table already exists. The default value is `empty`.
|
978
|
+
#
|
979
|
+
# The following values are supported:
|
980
|
+
#
|
981
|
+
# * `truncate` - BigQuery overwrites the table data.
|
982
|
+
# * `append` - BigQuery appends the data to the table.
|
983
|
+
# * `empty` - A 'duplicate' error is returned in the job result if
|
984
|
+
# the table exists and contains data.
|
985
|
+
#
|
986
|
+
# @!group Attributes
|
987
|
+
def write= value
|
988
|
+
@gapi.configuration.query.write_disposition = Convert.write_disposition value
|
989
|
+
end
|
990
|
+
|
991
|
+
##
|
992
|
+
# Sets the dry run flag for the query job.
|
993
|
+
#
|
994
|
+
# @param [Boolean] value If set, don't actually run this job. A valid
|
995
|
+
# query will return a mostly empty response with some processing
|
996
|
+
# statistics, while an invalid query will return the same error it
|
997
|
+
# would if it wasn't a dry run..
|
998
|
+
#
|
999
|
+
# @!group Attributes
|
1000
|
+
def dryrun= value
|
1001
|
+
@gapi.configuration.dry_run = value
|
1002
|
+
end
|
1003
|
+
alias dry_run= dryrun=
|
1004
|
+
|
1005
|
+
##
|
1006
|
+
# Sets the destination for the query results table.
|
1007
|
+
#
|
1008
|
+
# @param [Table] value The destination table where the query results
|
1009
|
+
# should be stored. If not present, a new table will be created
|
1010
|
+
# according to the create disposition to store the results.
|
1011
|
+
#
|
1012
|
+
# @!group Attributes
|
1013
|
+
def table= value
|
1014
|
+
@gapi.configuration.query.destination_table = table_ref_from value
|
1015
|
+
end
|
1016
|
+
|
1017
|
+
##
|
1018
|
+
# Sets the maximum bytes billed for the query.
|
1019
|
+
#
|
1020
|
+
# @param [Integer] value Limits the bytes billed for this job.
|
1021
|
+
# Queries that will have bytes billed beyond this limit will fail
|
1022
|
+
# (without incurring a charge). Optional. If unspecified, this will
|
1023
|
+
# be set to your project default.
|
1024
|
+
#
|
1025
|
+
# @!group Attributes
|
1026
|
+
def maximum_bytes_billed= value
|
1027
|
+
@gapi.configuration.query.maximum_bytes_billed = value
|
1028
|
+
end
|
1029
|
+
|
1030
|
+
##
|
1031
|
+
# Sets the labels to use for the job.
|
1032
|
+
#
|
1033
|
+
# @param [Hash] value A hash of user-provided labels associated with
|
1034
|
+
# the job. You can use these to organize and group your jobs. Label
|
1035
|
+
# keys and values can be no longer than 63 characters, can only
|
1036
|
+
# contain lowercase letters, numeric characters, underscores and
|
1037
|
+
# dashes. International characters are allowed. Label values are
|
1038
|
+
# optional. Label keys must start with a letter and each label in
|
1039
|
+
# the list must have a different key.
|
1040
|
+
#
|
1041
|
+
# @!group Attributes
|
1042
|
+
#
|
1043
|
+
def labels= value
|
1044
|
+
@gapi.configuration.update! labels: value
|
1045
|
+
end
|
1046
|
+
|
1047
|
+
##
|
1048
|
+
# Sets the query syntax to legacy SQL.
|
1049
|
+
#
|
1050
|
+
# @param [Boolean] value Specifies whether to use BigQuery's [legacy
|
1051
|
+
# SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
|
1052
|
+
# dialect for this query. If set to false, the query will use
|
1053
|
+
# BigQuery's [standard
|
1054
|
+
# SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
|
1055
|
+
# dialect. Optional. The default value is false.
|
1056
|
+
#
|
1057
|
+
# @!group Attributes
|
1058
|
+
#
|
1059
|
+
def legacy_sql= value
|
1060
|
+
@gapi.configuration.query.use_legacy_sql = value
|
1061
|
+
end
|
1062
|
+
|
1063
|
+
##
|
1064
|
+
# Sets the query syntax to standard SQL.
|
1065
|
+
#
|
1066
|
+
# @param [Boolean] value Specifies whether to use BigQuery's [standard
|
1067
|
+
# SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
|
1068
|
+
# dialect for this query. If set to true, the query will use
|
1069
|
+
# standard SQL rather than the [legacy
|
1070
|
+
# SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
|
1071
|
+
# dialect. Optional. The default value is true.
|
1072
|
+
#
|
1073
|
+
# @!group Attributes
|
1074
|
+
#
|
1075
|
+
def standard_sql= value
|
1076
|
+
@gapi.configuration.query.use_legacy_sql = !value
|
1077
|
+
end
|
1078
|
+
|
1079
|
+
##
|
1080
|
+
# Sets definitions for external tables used in the query.
|
1081
|
+
#
|
1082
|
+
# @param [Hash<String|Symbol, External::DataSource>] value A Hash
|
1083
|
+
# that represents the mapping of the external tables to the table
|
1084
|
+
# names used in the SQL query. The hash keys are the table names,
|
1085
|
+
# and the hash values are the external table objects.
|
1086
|
+
#
|
1087
|
+
# @!group Attributes
|
1088
|
+
#
|
1089
|
+
def external= value
|
1090
|
+
external_table_pairs = value.map { |name, obj| [String(name), obj.to_gapi] }
|
1091
|
+
external_table_hash = Hash[external_table_pairs]
|
1092
|
+
@gapi.configuration.query.table_definitions = external_table_hash
|
1093
|
+
end
|
1094
|
+
|
1095
|
+
##
|
1096
|
+
# Sets user defined functions for the query.
|
1097
|
+
#
|
1098
|
+
# @param [Array<String>, String] value User-defined function resources
|
1099
|
+
# used in the query. May be either a code resource to load from a
|
1100
|
+
# Google Cloud Storage URI (`gs://bucket/path`), or an inline
|
1101
|
+
# resource that contains code for a user-defined function (UDF).
|
1102
|
+
# Providing an inline code resource is equivalent to providing a URI
|
1103
|
+
# for a file containing the same code. See [User-Defined
|
1104
|
+
# Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
|
1105
|
+
#
|
1106
|
+
# @!group Attributes
|
1107
|
+
def udfs= value
|
1108
|
+
@gapi.configuration.query.user_defined_function_resources = udfs_gapi_from value
|
1109
|
+
end
|
1110
|
+
|
1111
|
+
##
|
1112
|
+
# Sets the encryption configuration of the destination table.
|
1113
|
+
#
|
1114
|
+
# @param [Google::Cloud::BigQuery::EncryptionConfiguration] val
|
1115
|
+
# Custom encryption configuration (e.g., Cloud KMS keys).
|
1116
|
+
#
|
1117
|
+
# @example
|
1118
|
+
# require "google/cloud/bigquery"
|
1119
|
+
#
|
1120
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1121
|
+
# dataset = bigquery.dataset "my_dataset"
|
1122
|
+
#
|
1123
|
+
# key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
|
1124
|
+
# encrypt_config = bigquery.encryption kms_key: key_name
|
1125
|
+
# job = bigquery.query_job "SELECT 1;" do |job|
|
1126
|
+
# job.table = dataset.table "my_table", skip_lookup: true
|
1127
|
+
# job.encryption = encrypt_config
|
1128
|
+
# end
|
1129
|
+
#
|
1130
|
+
# @!group Attributes
|
1131
|
+
def encryption= val
|
1132
|
+
@gapi.configuration.query.update! destination_encryption_configuration: val.to_gapi
|
1133
|
+
end
|
1134
|
+
|
1135
|
+
##
|
1136
|
+
# Sets the field on which to range partition the table. See [Creating and using integer range partitioned
|
1137
|
+
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
1138
|
+
#
|
1139
|
+
# See {#range_partitioning_start=}, {#range_partitioning_interval=} and {#range_partitioning_end=}.
|
1140
|
+
#
|
1141
|
+
# You can only set range partitioning when creating a table. BigQuery does not allow you to change
|
1142
|
+
# partitioning on an existing table.
|
1143
|
+
#
|
1144
|
+
# @param [String] field The range partition field. the destination table is partitioned by this
|
1145
|
+
# field. The field must be a top-level `NULLABLE/REQUIRED` field. The only supported
|
1146
|
+
# type is `INTEGER/INT64`.
|
1147
|
+
#
|
1148
|
+
# @example
|
1149
|
+
# require "google/cloud/bigquery"
|
1150
|
+
#
|
1151
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1152
|
+
# dataset = bigquery.dataset "my_dataset"
|
1153
|
+
# destination_table = dataset.table "my_destination_table",
|
1154
|
+
# skip_lookup: true
|
1155
|
+
#
|
1156
|
+
# job = bigquery.query_job "SELECT num FROM UNNEST(GENERATE_ARRAY(0, 99)) AS num" do |job|
|
1157
|
+
# job.table = destination_table
|
1158
|
+
# job.range_partitioning_field = "num"
|
1159
|
+
# job.range_partitioning_start = 0
|
1160
|
+
# job.range_partitioning_interval = 10
|
1161
|
+
# job.range_partitioning_end = 100
|
1162
|
+
# end
|
1163
|
+
#
|
1164
|
+
# job.wait_until_done!
|
1165
|
+
# job.done? #=> true
|
1166
|
+
#
|
1167
|
+
# @!group Attributes
|
1168
|
+
#
|
1169
|
+
def range_partitioning_field= field
|
1170
|
+
@gapi.configuration.query.range_partitioning ||= Google::Apis::BigqueryV2::RangePartitioning.new(
|
1171
|
+
range: Google::Apis::BigqueryV2::RangePartitioning::Range.new
|
1172
|
+
)
|
1173
|
+
@gapi.configuration.query.range_partitioning.field = field
|
1174
|
+
end
|
1175
|
+
|
1176
|
+
##
|
1177
|
+
# Sets the start of range partitioning, inclusive, for the destination table. See [Creating and using integer
|
1178
|
+
# range partitioned tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
1179
|
+
#
|
1180
|
+
# You can only set range partitioning when creating a table. BigQuery does not allow you to change
|
1181
|
+
# partitioning on an existing table.
|
1182
|
+
#
|
1183
|
+
# See {#range_partitioning_field=}, {#range_partitioning_interval=} and {#range_partitioning_end=}.
|
1184
|
+
#
|
1185
|
+
# @param [Integer] range_start The start of range partitioning, inclusive.
|
1186
|
+
#
|
1187
|
+
# @example
|
1188
|
+
# require "google/cloud/bigquery"
|
1189
|
+
#
|
1190
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1191
|
+
# dataset = bigquery.dataset "my_dataset"
|
1192
|
+
# destination_table = dataset.table "my_destination_table",
|
1193
|
+
# skip_lookup: true
|
1194
|
+
#
|
1195
|
+
# job = bigquery.query_job "SELECT num FROM UNNEST(GENERATE_ARRAY(0, 99)) AS num" do |job|
|
1196
|
+
# job.table = destination_table
|
1197
|
+
# job.range_partitioning_field = "num"
|
1198
|
+
# job.range_partitioning_start = 0
|
1199
|
+
# job.range_partitioning_interval = 10
|
1200
|
+
# job.range_partitioning_end = 100
|
1201
|
+
# end
|
1202
|
+
#
|
1203
|
+
# job.wait_until_done!
|
1204
|
+
# job.done? #=> true
|
1205
|
+
#
|
1206
|
+
# @!group Attributes
|
1207
|
+
#
|
1208
|
+
def range_partitioning_start= range_start
|
1209
|
+
@gapi.configuration.query.range_partitioning ||= Google::Apis::BigqueryV2::RangePartitioning.new(
|
1210
|
+
range: Google::Apis::BigqueryV2::RangePartitioning::Range.new
|
1211
|
+
)
|
1212
|
+
@gapi.configuration.query.range_partitioning.range.start = range_start
|
1213
|
+
end
|
1214
|
+
|
1215
|
+
##
|
1216
|
+
# Sets width of each interval for data in range partitions. See [Creating and using integer range partitioned
|
1217
|
+
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
1218
|
+
#
|
1219
|
+
# You can only set range partitioning when creating a table. BigQuery does not allow you to change
|
1220
|
+
# partitioning on an existing table.
|
1221
|
+
#
|
1222
|
+
# See {#range_partitioning_field=}, {#range_partitioning_start=} and {#range_partitioning_end=}.
|
1223
|
+
#
|
1224
|
+
# @param [Integer] range_interval The width of each interval, for data in partitions.
|
1225
|
+
#
|
1226
|
+
# @example
|
1227
|
+
# require "google/cloud/bigquery"
|
1228
|
+
#
|
1229
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1230
|
+
# dataset = bigquery.dataset "my_dataset"
|
1231
|
+
# destination_table = dataset.table "my_destination_table",
|
1232
|
+
# skip_lookup: true
|
1233
|
+
#
|
1234
|
+
# job = bigquery.query_job "SELECT num FROM UNNEST(GENERATE_ARRAY(0, 99)) AS num" do |job|
|
1235
|
+
# job.table = destination_table
|
1236
|
+
# job.range_partitioning_field = "num"
|
1237
|
+
# job.range_partitioning_start = 0
|
1238
|
+
# job.range_partitioning_interval = 10
|
1239
|
+
# job.range_partitioning_end = 100
|
1240
|
+
# end
|
1241
|
+
#
|
1242
|
+
# job.wait_until_done!
|
1243
|
+
# job.done? #=> true
|
1244
|
+
#
|
1245
|
+
# @!group Attributes
|
1246
|
+
#
|
1247
|
+
def range_partitioning_interval= range_interval
|
1248
|
+
@gapi.configuration.query.range_partitioning ||= Google::Apis::BigqueryV2::RangePartitioning.new(
|
1249
|
+
range: Google::Apis::BigqueryV2::RangePartitioning::Range.new
|
1250
|
+
)
|
1251
|
+
@gapi.configuration.query.range_partitioning.range.interval = range_interval
|
1252
|
+
end
|
1253
|
+
|
1254
|
+
##
|
1255
|
+
# Sets the end of range partitioning, exclusive, for the destination table. See [Creating and using integer
|
1256
|
+
# range partitioned tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
1257
|
+
#
|
1258
|
+
# You can only set range partitioning when creating a table. BigQuery does not allow you to change
|
1259
|
+
# partitioning on an existing table.
|
1260
|
+
#
|
1261
|
+
# See {#range_partitioning_start=}, {#range_partitioning_interval=} and {#range_partitioning_field=}.
|
1262
|
+
#
|
1263
|
+
# @param [Integer] range_end The end of range partitioning, exclusive.
|
1264
|
+
#
|
1265
|
+
# @example
|
1266
|
+
# require "google/cloud/bigquery"
|
1267
|
+
#
|
1268
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1269
|
+
# dataset = bigquery.dataset "my_dataset"
|
1270
|
+
# destination_table = dataset.table "my_destination_table",
|
1271
|
+
# skip_lookup: true
|
1272
|
+
#
|
1273
|
+
# job = bigquery.query_job "SELECT num FROM UNNEST(GENERATE_ARRAY(0, 99)) AS num" do |job|
|
1274
|
+
# job.table = destination_table
|
1275
|
+
# job.range_partitioning_field = "num"
|
1276
|
+
# job.range_partitioning_start = 0
|
1277
|
+
# job.range_partitioning_interval = 10
|
1278
|
+
# job.range_partitioning_end = 100
|
1279
|
+
# end
|
1280
|
+
#
|
1281
|
+
# job.wait_until_done!
|
1282
|
+
# job.done? #=> true
|
1283
|
+
#
|
1284
|
+
# @!group Attributes
|
1285
|
+
#
|
1286
|
+
def range_partitioning_end= range_end
|
1287
|
+
@gapi.configuration.query.range_partitioning ||= Google::Apis::BigqueryV2::RangePartitioning.new(
|
1288
|
+
range: Google::Apis::BigqueryV2::RangePartitioning::Range.new
|
1289
|
+
)
|
1290
|
+
@gapi.configuration.query.range_partitioning.range.end = range_end
|
1291
|
+
end
|
1292
|
+
|
1293
|
+
##
|
1294
|
+
# Sets the partitioning for the destination table. See [Partitioned
|
1295
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
1296
|
+
#
|
1297
|
+
# You can only set the partitioning field while creating a table.
|
1298
|
+
# BigQuery does not allow you to change partitioning on an existing
|
1299
|
+
# table.
|
1300
|
+
#
|
1301
|
+
# @param [String] type The partition type. Currently the only
|
1302
|
+
# supported value is "DAY".
|
1303
|
+
#
|
1304
|
+
# @example
|
1305
|
+
# require "google/cloud/bigquery"
|
1306
|
+
#
|
1307
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1308
|
+
# dataset = bigquery.dataset "my_dataset"
|
1309
|
+
# destination_table = dataset.table "my_destination_table",
|
1310
|
+
# skip_lookup: true
|
1311
|
+
#
|
1312
|
+
# job = dataset.query_job "SELECT * FROM UNNEST(" \
|
1313
|
+
# "GENERATE_TIMESTAMP_ARRAY(" \
|
1314
|
+
# "'2018-10-01 00:00:00', " \
|
1315
|
+
# "'2018-10-10 00:00:00', " \
|
1316
|
+
# "INTERVAL 1 DAY)) AS dob" do |job|
|
1317
|
+
# job.table = destination_table
|
1318
|
+
# job.time_partitioning_type = "DAY"
|
1319
|
+
# end
|
1320
|
+
#
|
1321
|
+
# job.wait_until_done!
|
1322
|
+
# job.done? #=> true
|
1323
|
+
#
|
1324
|
+
# @!group Attributes
|
1325
|
+
#
|
1326
|
+
def time_partitioning_type= type
|
1327
|
+
@gapi.configuration.query.time_partitioning ||= Google::Apis::BigqueryV2::TimePartitioning.new
|
1328
|
+
@gapi.configuration.query.time_partitioning.update! type: type
|
1329
|
+
end
|
1330
|
+
|
1331
|
+
##
|
1332
|
+
# Sets the field on which to partition the destination table. If not
|
1333
|
+
# set, the destination table is partitioned by pseudo column
|
1334
|
+
# `_PARTITIONTIME`; if set, the table is partitioned by this field.
|
1335
|
+
# See [Partitioned
|
1336
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
1337
|
+
#
|
1338
|
+
# The destination table must also be partitioned. See
|
1339
|
+
# {#time_partitioning_type=}.
|
1340
|
+
#
|
1341
|
+
# You can only set the partitioning field while creating a table.
|
1342
|
+
# BigQuery does not allow you to change partitioning on an existing
|
1343
|
+
# table.
|
1344
|
+
#
|
1345
|
+
# @param [String] field The partition field. The field must be a
|
1346
|
+
# top-level TIMESTAMP or DATE field. Its mode must be NULLABLE or
|
1347
|
+
# REQUIRED.
|
1348
|
+
#
|
1349
|
+
# @example
|
1350
|
+
# require "google/cloud/bigquery"
|
1351
|
+
#
|
1352
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1353
|
+
# dataset = bigquery.dataset "my_dataset"
|
1354
|
+
# destination_table = dataset.table "my_destination_table",
|
1355
|
+
# skip_lookup: true
|
1356
|
+
#
|
1357
|
+
# job = dataset.query_job "SELECT * FROM UNNEST(" \
|
1358
|
+
# "GENERATE_TIMESTAMP_ARRAY(" \
|
1359
|
+
# "'2018-10-01 00:00:00', " \
|
1360
|
+
# "'2018-10-10 00:00:00', " \
|
1361
|
+
# "INTERVAL 1 DAY)) AS dob" do |job|
|
1362
|
+
# job.table = destination_table
|
1363
|
+
# job.time_partitioning_type = "DAY"
|
1364
|
+
# job.time_partitioning_field = "dob"
|
1365
|
+
# end
|
1366
|
+
#
|
1367
|
+
# job.wait_until_done!
|
1368
|
+
# job.done? #=> true
|
1369
|
+
#
|
1370
|
+
# @!group Attributes
|
1371
|
+
#
|
1372
|
+
def time_partitioning_field= field
|
1373
|
+
@gapi.configuration.query.time_partitioning ||= Google::Apis::BigqueryV2::TimePartitioning.new
|
1374
|
+
@gapi.configuration.query.time_partitioning.update! field: field
|
1375
|
+
end
|
1376
|
+
|
1377
|
+
##
|
1378
|
+
# Sets the partition expiration for the destination table. See
|
1379
|
+
# [Partitioned
|
1380
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
1381
|
+
#
|
1382
|
+
# The destination table must also be partitioned. See
|
1383
|
+
# {#time_partitioning_type=}.
|
1384
|
+
#
|
1385
|
+
# @param [Integer] expiration An expiration time, in seconds,
|
1386
|
+
# for data in partitions.
|
1387
|
+
#
|
1388
|
+
# @example
|
1389
|
+
# require "google/cloud/bigquery"
|
1390
|
+
#
|
1391
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1392
|
+
# dataset = bigquery.dataset "my_dataset"
|
1393
|
+
# destination_table = dataset.table "my_destination_table",
|
1394
|
+
# skip_lookup: true
|
1395
|
+
#
|
1396
|
+
# job = dataset.query_job "SELECT * FROM UNNEST(" \
|
1397
|
+
# "GENERATE_TIMESTAMP_ARRAY(" \
|
1398
|
+
# "'2018-10-01 00:00:00', " \
|
1399
|
+
# "'2018-10-10 00:00:00', " \
|
1400
|
+
# "INTERVAL 1 DAY)) AS dob" do |job|
|
1401
|
+
# job.table = destination_table
|
1402
|
+
# job.time_partitioning_type = "DAY"
|
1403
|
+
# job.time_partitioning_expiration = 86_400
|
1404
|
+
# end
|
1405
|
+
#
|
1406
|
+
# job.wait_until_done!
|
1407
|
+
# job.done? #=> true
|
1408
|
+
#
|
1409
|
+
# @!group Attributes
|
1410
|
+
#
|
1411
|
+
def time_partitioning_expiration= expiration
|
1412
|
+
@gapi.configuration.query.time_partitioning ||= Google::Apis::BigqueryV2::TimePartitioning.new
|
1413
|
+
@gapi.configuration.query.time_partitioning.update! expiration_ms: expiration * 1000
|
1414
|
+
end
|
1415
|
+
|
1416
|
+
##
|
1417
|
+
# If set to true, queries over the destination table will require a
|
1418
|
+
# partition filter that can be used for partition elimination to be
|
1419
|
+
# specified. See [Partitioned
|
1420
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
1421
|
+
#
|
1422
|
+
# @param [Boolean] val Indicates if queries over the destination table
|
1423
|
+
# will require a partition filter. The default value is `false`.
|
1424
|
+
#
|
1425
|
+
# @!group Attributes
|
1426
|
+
#
|
1427
|
+
def time_partitioning_require_filter= val
|
1428
|
+
@gapi.configuration.query.time_partitioning ||= Google::Apis::BigqueryV2::TimePartitioning.new
|
1429
|
+
@gapi.configuration.query.time_partitioning.update! require_partition_filter: val
|
1430
|
+
end
|
1431
|
+
|
1432
|
+
##
|
1433
|
+
# Sets one or more fields on which the destination table should be
|
1434
|
+
# clustered. Must be specified with time-based partitioning, data in
|
1435
|
+
# the table will be first partitioned and subsequently clustered.
|
1436
|
+
#
|
1437
|
+
# Only top-level, non-repeated, simple-type fields are supported. When
|
1438
|
+
# you cluster a table using multiple columns, the order of columns you
|
1439
|
+
# specify is important. The order of the specified columns determines
|
1440
|
+
# the sort order of the data.
|
1441
|
+
#
|
1442
|
+
# See {QueryJob#clustering_fields}.
|
1443
|
+
#
|
1444
|
+
# @see https://cloud.google.com/bigquery/docs/partitioned-tables
|
1445
|
+
# Partitioned Tables
|
1446
|
+
# @see https://cloud.google.com/bigquery/docs/clustered-tables
|
1447
|
+
# Introduction to Clustered Tables
|
1448
|
+
# @see https://cloud.google.com/bigquery/docs/creating-clustered-tables
|
1449
|
+
# Creating and Using Clustered Tables
|
1450
|
+
#
|
1451
|
+
# @param [Array<String>] fields The clustering fields. Only top-level,
|
1452
|
+
# non-repeated, simple-type fields are supported.
|
1453
|
+
#
|
1454
|
+
# @example
|
1455
|
+
# require "google/cloud/bigquery"
|
1456
|
+
#
|
1457
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1458
|
+
# dataset = bigquery.dataset "my_dataset"
|
1459
|
+
# destination_table = dataset.table "my_destination_table",
|
1460
|
+
# skip_lookup: true
|
1461
|
+
#
|
1462
|
+
# job = dataset.query_job "SELECT * FROM my_table" do |job|
|
1463
|
+
# job.table = destination_table
|
1464
|
+
# job.time_partitioning_type = "DAY"
|
1465
|
+
# job.time_partitioning_field = "dob"
|
1466
|
+
# job.clustering_fields = ["last_name", "first_name"]
|
1467
|
+
# end
|
1468
|
+
#
|
1469
|
+
# job.wait_until_done!
|
1470
|
+
# job.done? #=> true
|
1471
|
+
#
|
1472
|
+
# @!group Attributes
|
1473
|
+
#
|
1474
|
+
def clustering_fields= fields
|
1475
|
+
@gapi.configuration.query.clustering ||= Google::Apis::BigqueryV2::Clustering.new
|
1476
|
+
@gapi.configuration.query.clustering.fields = fields
|
1477
|
+
end
|
1478
|
+
|
1479
|
+
def cancel
|
1480
|
+
raise "not implemented in #{self.class}"
|
1481
|
+
end
|
1482
|
+
|
1483
|
+
def rerun!
|
1484
|
+
raise "not implemented in #{self.class}"
|
1485
|
+
end
|
1486
|
+
|
1487
|
+
def reload!
|
1488
|
+
raise "not implemented in #{self.class}"
|
1489
|
+
end
|
1490
|
+
alias refresh! reload!
|
1491
|
+
|
1492
|
+
def wait_until_done!
|
1493
|
+
raise "not implemented in #{self.class}"
|
1494
|
+
end
|
1495
|
+
|
1496
|
+
##
|
1497
|
+
# @private Returns the Google API client library version of this job.
|
1498
|
+
#
|
1499
|
+
# @return [<Google::Apis::BigqueryV2::Job>] (See
|
1500
|
+
# {Google::Apis::BigqueryV2::Job})
|
1501
|
+
def to_gapi
|
1502
|
+
@gapi
|
1503
|
+
end
|
1504
|
+
|
1505
|
+
protected
|
1506
|
+
|
1507
|
+
# Creates a table reference from a table object.
|
1508
|
+
def table_ref_from tbl
|
1509
|
+
return nil if tbl.nil?
|
1510
|
+
Google::Apis::BigqueryV2::TableReference.new(
|
1511
|
+
project_id: tbl.project_id,
|
1512
|
+
dataset_id: tbl.dataset_id,
|
1513
|
+
table_id: tbl.table_id
|
1514
|
+
)
|
1515
|
+
end
|
1516
|
+
|
1517
|
+
def priority_value str
|
1518
|
+
{ "batch" => "BATCH", "interactive" => "INTERACTIVE" }[str.to_s.downcase]
|
1519
|
+
end
|
1520
|
+
|
1521
|
+
def udfs_gapi_from array_or_str
|
1522
|
+
Array(array_or_str).map do |uri_or_code|
|
1523
|
+
resource = Google::Apis::BigqueryV2::UserDefinedFunctionResource.new
|
1524
|
+
if uri_or_code.start_with? "gs://"
|
1525
|
+
resource.resource_uri = uri_or_code
|
1526
|
+
else
|
1527
|
+
resource.inline_code = uri_or_code
|
1528
|
+
end
|
1529
|
+
resource
|
1530
|
+
end
|
1531
|
+
end
|
1532
|
+
end
|
1533
|
+
|
1534
|
+
##
|
1535
|
+
# Represents a stage in the execution plan for the query.
|
1536
|
+
#
|
1537
|
+
# @attr_reader [Float] compute_ratio_avg Relative amount of time the
|
1538
|
+
# average shard spent on CPU-bound tasks.
|
1539
|
+
# @attr_reader [Float] compute_ratio_max Relative amount of time the
|
1540
|
+
# slowest shard spent on CPU-bound tasks.
|
1541
|
+
# @attr_reader [Integer] id Unique ID for the stage within the query
|
1542
|
+
# plan.
|
1543
|
+
# @attr_reader [String] name Human-readable name for the stage.
|
1544
|
+
# @attr_reader [Float] read_ratio_avg Relative amount of time the
|
1545
|
+
# average shard spent reading input.
|
1546
|
+
# @attr_reader [Float] read_ratio_max Relative amount of time the
|
1547
|
+
# slowest shard spent reading input.
|
1548
|
+
# @attr_reader [Integer] records_read Number of records read into the
|
1549
|
+
# stage.
|
1550
|
+
# @attr_reader [Integer] records_written Number of records written by
|
1551
|
+
# the stage.
|
1552
|
+
# @attr_reader [Array<Step>] steps List of operations within the stage
|
1553
|
+
# in dependency order (approximately chronological).
|
1554
|
+
# @attr_reader [Float] wait_ratio_avg Relative amount of time the
|
1555
|
+
# average shard spent waiting to be scheduled.
|
1556
|
+
# @attr_reader [Float] wait_ratio_max Relative amount of time the
|
1557
|
+
# slowest shard spent waiting to be scheduled.
|
1558
|
+
# @attr_reader [Float] write_ratio_avg Relative amount of time the
|
1559
|
+
# average shard spent on writing output.
|
1560
|
+
# @attr_reader [Float] write_ratio_max Relative amount of time the
|
1561
|
+
# slowest shard spent on writing output.
|
1562
|
+
#
|
1563
|
+
# @example
|
1564
|
+
# require "google/cloud/bigquery"
|
1565
|
+
#
|
1566
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1567
|
+
#
|
1568
|
+
# sql = "SELECT word FROM `bigquery-public-data.samples.shakespeare`"
|
1569
|
+
# job = bigquery.query_job sql
|
1570
|
+
#
|
1571
|
+
# job.wait_until_done!
|
1572
|
+
#
|
1573
|
+
# stages = job.query_plan
|
1574
|
+
# stages.each do |stage|
|
1575
|
+
# puts stage.name
|
1576
|
+
# stage.steps.each do |step|
|
1577
|
+
# puts step.kind
|
1578
|
+
# puts step.substeps.inspect
|
1579
|
+
# end
|
1580
|
+
# end
|
1581
|
+
#
|
1582
|
+
class Stage
|
1583
|
+
attr_reader :compute_ratio_avg, :compute_ratio_max, :id, :name, :read_ratio_avg, :read_ratio_max,
|
1584
|
+
:records_read, :records_written, :status, :steps, :wait_ratio_avg, :wait_ratio_max,
|
1585
|
+
:write_ratio_avg, :write_ratio_max
|
1586
|
+
|
1587
|
+
##
|
1588
|
+
# @private Creates a new Stage instance.
|
1589
|
+
def initialize compute_ratio_avg, compute_ratio_max, id, name, read_ratio_avg, read_ratio_max, records_read,
|
1590
|
+
records_written, status, steps, wait_ratio_avg, wait_ratio_max, write_ratio_avg,
|
1591
|
+
write_ratio_max
|
1592
|
+
@compute_ratio_avg = compute_ratio_avg
|
1593
|
+
@compute_ratio_max = compute_ratio_max
|
1594
|
+
@id = id
|
1595
|
+
@name = name
|
1596
|
+
@read_ratio_avg = read_ratio_avg
|
1597
|
+
@read_ratio_max = read_ratio_max
|
1598
|
+
@records_read = records_read
|
1599
|
+
@records_written = records_written
|
1600
|
+
@status = status
|
1601
|
+
@steps = steps
|
1602
|
+
@wait_ratio_avg = wait_ratio_avg
|
1603
|
+
@wait_ratio_max = wait_ratio_max
|
1604
|
+
@write_ratio_avg = write_ratio_avg
|
1605
|
+
@write_ratio_max = write_ratio_max
|
1606
|
+
end
|
1607
|
+
|
1608
|
+
##
|
1609
|
+
# @private New Stage from a statistics.query.queryPlan element.
|
1610
|
+
def self.from_gapi gapi
|
1611
|
+
steps = Array(gapi.steps).map { |g| Step.from_gapi g }
|
1612
|
+
new gapi.compute_ratio_avg, gapi.compute_ratio_max, gapi.id, gapi.name, gapi.read_ratio_avg,
|
1613
|
+
gapi.read_ratio_max, gapi.records_read, gapi.records_written, gapi.status, steps, gapi.wait_ratio_avg,
|
1614
|
+
gapi.wait_ratio_max, gapi.write_ratio_avg, gapi.write_ratio_max
|
1615
|
+
end
|
1616
|
+
end
|
1617
|
+
|
1618
|
+
##
|
1619
|
+
# Represents an operation in a stage in the execution plan for the
|
1620
|
+
# query.
|
1621
|
+
#
|
1622
|
+
# @attr_reader [String] kind Machine-readable operation type. For a full
|
1623
|
+
# list of operation types, see [Steps
|
1624
|
+
# metadata](https://cloud.google.com/bigquery/query-plan-explanation#steps_metadata).
|
1625
|
+
# @attr_reader [Array<String>] substeps Human-readable stage
|
1626
|
+
# descriptions.
|
1627
|
+
#
|
1628
|
+
# @example
|
1629
|
+
# require "google/cloud/bigquery"
|
1630
|
+
#
|
1631
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1632
|
+
#
|
1633
|
+
# sql = "SELECT word FROM `bigquery-public-data.samples.shakespeare`"
|
1634
|
+
# job = bigquery.query_job sql
|
1635
|
+
#
|
1636
|
+
# job.wait_until_done!
|
1637
|
+
#
|
1638
|
+
# stages = job.query_plan
|
1639
|
+
# stages.each do |stage|
|
1640
|
+
# puts stage.name
|
1641
|
+
# stage.steps.each do |step|
|
1642
|
+
# puts step.kind
|
1643
|
+
# puts step.substeps.inspect
|
1644
|
+
# end
|
1645
|
+
# end
|
1646
|
+
#
|
1647
|
+
class Step
|
1648
|
+
attr_reader :kind, :substeps
|
1649
|
+
|
1650
|
+
##
|
1651
|
+
# @private Creates a new Stage instance.
|
1652
|
+
def initialize kind, substeps
|
1653
|
+
@kind = kind
|
1654
|
+
@substeps = substeps
|
1655
|
+
end
|
1656
|
+
|
1657
|
+
##
|
1658
|
+
# @private New Step from a statistics.query.queryPlan[].steps element.
|
1659
|
+
def self.from_gapi gapi
|
1660
|
+
new gapi.kind, Array(gapi.substeps)
|
1661
|
+
end
|
1662
|
+
end
|
1663
|
+
|
1664
|
+
protected
|
1665
|
+
|
1666
|
+
def ensure_schema!
|
1667
|
+
return unless destination_schema.nil?
|
1668
|
+
|
1669
|
+
query_results_gapi = service.job_query_results job_id, location: location, max: 0
|
1670
|
+
# raise "unable to retrieve schema" if query_results_gapi.schema.nil?
|
1671
|
+
@destination_schema_gapi = query_results_gapi.schema
|
1672
|
+
end
|
1673
|
+
|
1674
|
+
def destination_schema
|
1675
|
+
@destination_schema_gapi
|
1676
|
+
end
|
1677
|
+
|
1678
|
+
def destination_table_dataset_id
|
1679
|
+
@gapi.configuration.query.destination_table.dataset_id
|
1680
|
+
end
|
1681
|
+
|
1682
|
+
def destination_table_table_id
|
1683
|
+
@gapi.configuration.query.destination_table.table_id
|
1684
|
+
end
|
1685
|
+
|
1686
|
+
def destination_table_gapi
|
1687
|
+
Google::Apis::BigqueryV2::Table.new(
|
1688
|
+
table_reference: @gapi.configuration.query.destination_table,
|
1689
|
+
schema: destination_schema
|
1690
|
+
)
|
1691
|
+
end
|
1692
|
+
end
|
1693
|
+
end
|
1694
|
+
end
|
1695
|
+
end
|