google-cloud-bigquery 0.28.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/google-cloud-bigquery.rb +2 -2
- data/lib/google/cloud/bigquery.rb +10 -12
- data/lib/google/cloud/bigquery/copy_job.rb +42 -6
- data/lib/google/cloud/bigquery/data.rb +129 -23
- data/lib/google/cloud/bigquery/dataset.rb +708 -66
- data/lib/google/cloud/bigquery/dataset/access.rb +533 -27
- data/lib/google/cloud/bigquery/dataset/list.rb +5 -3
- data/lib/google/cloud/bigquery/external.rb +2353 -0
- data/lib/google/cloud/bigquery/extract_job.rb +52 -11
- data/lib/google/cloud/bigquery/insert_response.rb +90 -2
- data/lib/google/cloud/bigquery/job.rb +160 -21
- data/lib/google/cloud/bigquery/load_job.rb +128 -11
- data/lib/google/cloud/bigquery/project.rb +187 -44
- data/lib/google/cloud/bigquery/query_job.rb +323 -13
- data/lib/google/cloud/bigquery/schema.rb +57 -1
- data/lib/google/cloud/bigquery/schema/field.rb +118 -17
- data/lib/google/cloud/bigquery/service.rb +196 -43
- data/lib/google/cloud/bigquery/table.rb +739 -49
- data/lib/google/cloud/bigquery/table/async_inserter.rb +280 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery/view.rb +306 -69
- metadata +18 -3
- data/lib/google/cloud/bigquery/query_data.rb +0 -234
@@ -0,0 +1,280 @@
|
|
1
|
+
# Copyright 2017 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/cloud/bigquery/convert"
|
17
|
+
require "monitor"
|
18
|
+
require "concurrent"
|
19
|
+
|
20
|
+
module Google
|
21
|
+
module Cloud
|
22
|
+
module Bigquery
|
23
|
+
class Table
|
24
|
+
##
|
25
|
+
# # AsyncInserter
|
26
|
+
#
|
27
|
+
# Used to insert multiple rows in batches to a topic. See
|
28
|
+
# {Google::Cloud::Bigquery::Table#insert_async}.
|
29
|
+
#
|
30
|
+
# @example
|
31
|
+
# require "google/cloud/bigquery"
|
32
|
+
#
|
33
|
+
# bigquery = Google::Cloud::Bigquery.new
|
34
|
+
# dataset = bigquery.dataset "my_dataset"
|
35
|
+
# table = dataset.table "my_table"
|
36
|
+
# inserter = table.insert_async do |response|
|
37
|
+
# log_insert "inserted #{response.insert_count} rows " \
|
38
|
+
# "with #{response.error_count} errors"
|
39
|
+
# end
|
40
|
+
#
|
41
|
+
# rows = [
|
42
|
+
# { "first_name" => "Alice", "age" => 21 },
|
43
|
+
# { "first_name" => "Bob", "age" => 22 }
|
44
|
+
# ]
|
45
|
+
# inserter.insert rows
|
46
|
+
#
|
47
|
+
# inserter.stop.wait!
|
48
|
+
#
|
49
|
+
# @attr_reader [Integer] max_bytes The maximum size of rows to be
|
50
|
+
# collected before the batch is inserted. Default is 10,000,000
|
51
|
+
# (10MB).
|
52
|
+
# @attr_reader [Integer] max_rows The maximum number of rows to be
|
53
|
+
# collected before the batch is inserted. Default is 500.
|
54
|
+
# @attr_reader [Numeric] interval The number of seconds to collect rows
|
55
|
+
# before the batch is inserted. Default is 10.
|
56
|
+
# @attr_reader [Integer] threads The number of threads used to insert
|
57
|
+
# rows. Default is 4.
|
58
|
+
#
|
59
|
+
class AsyncInserter
|
60
|
+
include MonitorMixin
|
61
|
+
|
62
|
+
attr_reader :max_bytes, :max_rows, :interval, :threads
|
63
|
+
##
|
64
|
+
# @private Implementation accessors
|
65
|
+
attr_reader :table, :batch
|
66
|
+
|
67
|
+
##
|
68
|
+
# @private
|
69
|
+
def initialize table, skip_invalid: nil, ignore_unknown: nil,
|
70
|
+
max_bytes: 10000000, max_rows: 500, interval: 10,
|
71
|
+
threads: 4, &block
|
72
|
+
@table = table
|
73
|
+
@skip_invalid = skip_invalid
|
74
|
+
@ignore_unknown = ignore_unknown
|
75
|
+
|
76
|
+
@max_bytes = max_bytes
|
77
|
+
@max_rows = max_rows
|
78
|
+
@interval = interval
|
79
|
+
@threads = threads
|
80
|
+
@callback = block
|
81
|
+
|
82
|
+
@batch = nil
|
83
|
+
|
84
|
+
@thread_pool = Concurrent::FixedThreadPool.new @threads
|
85
|
+
|
86
|
+
@cond = new_cond
|
87
|
+
|
88
|
+
# init MonitorMixin
|
89
|
+
super()
|
90
|
+
end
|
91
|
+
|
92
|
+
##
|
93
|
+
# Adds rows to the async inserter to be inserted. Rows will be
|
94
|
+
# collected in batches and inserted together.
|
95
|
+
# See {Google::Cloud::Bigquery::Table#insert_async}.
|
96
|
+
#
|
97
|
+
# @param [Hash, Array<Hash>] rows A hash object or array of hash
|
98
|
+
# objects containing the data.
|
99
|
+
#
|
100
|
+
def insert rows
|
101
|
+
return nil if rows.nil?
|
102
|
+
return nil if rows.is_a?(Array) && rows.empty?
|
103
|
+
rows = [rows] if rows.is_a? Hash
|
104
|
+
|
105
|
+
synchronize do
|
106
|
+
rows.each do |row|
|
107
|
+
if @batch.nil?
|
108
|
+
@batch = Batch.new max_bytes: @max_bytes, max_rows: @max_rows
|
109
|
+
@batch.insert row
|
110
|
+
else
|
111
|
+
unless @batch.try_insert row
|
112
|
+
push_batch_request!
|
113
|
+
|
114
|
+
@batch = Batch.new max_bytes: @max_bytes,
|
115
|
+
max_rows: @max_rows
|
116
|
+
@batch.insert row
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
@batch_created_at ||= ::Time.now
|
121
|
+
@background_thread ||= Thread.new { run_background }
|
122
|
+
|
123
|
+
push_batch_request! if @batch.ready?
|
124
|
+
end
|
125
|
+
|
126
|
+
@cond.signal
|
127
|
+
end
|
128
|
+
|
129
|
+
true
|
130
|
+
end
|
131
|
+
|
132
|
+
##
|
133
|
+
# Begins the process of stopping the inserter. Rows already in the
|
134
|
+
# queue will be inserted, but no new rows can be added. Use {#wait!}
|
135
|
+
# to block until the inserter is fully stopped and all pending rows
|
136
|
+
# have been inserted.
|
137
|
+
#
|
138
|
+
# @return [AsyncInserter] returns self so calls can be chained.
|
139
|
+
#
|
140
|
+
def stop
|
141
|
+
synchronize do
|
142
|
+
break if @stopped
|
143
|
+
|
144
|
+
@stopped = true
|
145
|
+
push_batch_request!
|
146
|
+
@cond.signal
|
147
|
+
end
|
148
|
+
|
149
|
+
self
|
150
|
+
end
|
151
|
+
|
152
|
+
##
|
153
|
+
# Blocks until the inserter is fully stopped, all pending rows
|
154
|
+
# have been inserted, and all callbacks have completed. Does not stop
|
155
|
+
# the inserter. To stop the inserter, first call {#stop} and then
|
156
|
+
# call {#wait!} to block until the inserter is stopped.
|
157
|
+
#
|
158
|
+
# @return [AsyncInserter] returns self so calls can be chained.
|
159
|
+
#
|
160
|
+
def wait! timeout = nil
|
161
|
+
synchronize do
|
162
|
+
@thread_pool.shutdown
|
163
|
+
@thread_pool.wait_for_termination timeout
|
164
|
+
end
|
165
|
+
|
166
|
+
self
|
167
|
+
end
|
168
|
+
|
169
|
+
##
|
170
|
+
# Forces all rows in the current batch to be inserted immediately.
|
171
|
+
#
|
172
|
+
# @return [AsyncInserter] returns self so calls can be chained.
|
173
|
+
#
|
174
|
+
def flush
|
175
|
+
synchronize do
|
176
|
+
push_batch_request!
|
177
|
+
@cond.signal
|
178
|
+
end
|
179
|
+
|
180
|
+
self
|
181
|
+
end
|
182
|
+
|
183
|
+
##
|
184
|
+
# Whether the inserter has been started.
|
185
|
+
#
|
186
|
+
# @return [boolean] `true` when started, `false` otherwise.
|
187
|
+
#
|
188
|
+
def started?
|
189
|
+
!stopped?
|
190
|
+
end
|
191
|
+
|
192
|
+
##
|
193
|
+
# Whether the inserter has been stopped.
|
194
|
+
#
|
195
|
+
# @return [boolean] `true` when stopped, `false` otherwise.
|
196
|
+
#
|
197
|
+
def stopped?
|
198
|
+
synchronize { @stopped }
|
199
|
+
end
|
200
|
+
|
201
|
+
protected
|
202
|
+
|
203
|
+
def run_background
|
204
|
+
synchronize do
|
205
|
+
until @stopped
|
206
|
+
if @batch.nil?
|
207
|
+
@cond.wait
|
208
|
+
next
|
209
|
+
end
|
210
|
+
|
211
|
+
time_since_first_publish = ::Time.now - @batch_created_at
|
212
|
+
if time_since_first_publish < @interval
|
213
|
+
# still waiting for the interval to insert the batch...
|
214
|
+
@cond.wait(@interval - time_since_first_publish)
|
215
|
+
else
|
216
|
+
# interval met, insert the batch...
|
217
|
+
push_batch_request!
|
218
|
+
@cond.wait
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
def push_batch_request!
|
225
|
+
return unless @batch
|
226
|
+
|
227
|
+
batch_rows = @batch.rows
|
228
|
+
Concurrent::Future.new(executor: @thread_pool) do
|
229
|
+
begin
|
230
|
+
response = @table.insert batch_rows,
|
231
|
+
skip_invalid: @skip_invalid,
|
232
|
+
ignore_unknown: @ignore_unknown
|
233
|
+
@callback.call response if @callback
|
234
|
+
rescue => e
|
235
|
+
raise e.inspect
|
236
|
+
end
|
237
|
+
end.execute
|
238
|
+
|
239
|
+
@batch = nil
|
240
|
+
@batch_created_at = nil
|
241
|
+
end
|
242
|
+
|
243
|
+
##
|
244
|
+
# @private
|
245
|
+
class Batch
|
246
|
+
attr_reader :max_bytes, :max_rows, :rows
|
247
|
+
|
248
|
+
def initialize max_bytes: 10000000, max_rows: 500
|
249
|
+
@max_bytes = max_bytes
|
250
|
+
@max_rows = max_rows
|
251
|
+
@rows = []
|
252
|
+
end
|
253
|
+
|
254
|
+
def insert row
|
255
|
+
@rows << row
|
256
|
+
end
|
257
|
+
|
258
|
+
def try_insert row
|
259
|
+
addl_bytes = row.to_json.bytes.size + 1
|
260
|
+
return false if current_bytes + addl_bytes >= @max_bytes
|
261
|
+
return false if @rows.count + 1 >= @max_rows
|
262
|
+
|
263
|
+
insert row
|
264
|
+
true
|
265
|
+
end
|
266
|
+
|
267
|
+
def ready?
|
268
|
+
current_bytes >= @max_bytes || rows.count >= @max_rows
|
269
|
+
end
|
270
|
+
|
271
|
+
def current_bytes
|
272
|
+
# TODO: add to a counter instead of calling #to_json each time
|
273
|
+
Convert.to_json_rows(rows).to_json.bytes.size
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
278
|
+
end
|
279
|
+
end
|
280
|
+
end
|
@@ -15,7 +15,6 @@
|
|
15
15
|
|
16
16
|
require "google/cloud/errors"
|
17
17
|
require "google/cloud/bigquery/service"
|
18
|
-
require "google/cloud/bigquery/data"
|
19
18
|
require "google/cloud/bigquery/table/list"
|
20
19
|
require "google/apis/bigquery_v2"
|
21
20
|
|
@@ -52,16 +51,17 @@ module Google
|
|
52
51
|
attr_accessor :gapi
|
53
52
|
|
54
53
|
##
|
55
|
-
# @private Create an empty
|
54
|
+
# @private Create an empty View object.
|
56
55
|
def initialize
|
57
56
|
@service = nil
|
58
57
|
@gapi = Google::Apis::BigqueryV2::Table.new
|
59
58
|
end
|
60
59
|
|
61
60
|
##
|
62
|
-
# A unique ID for this
|
63
|
-
#
|
64
|
-
#
|
61
|
+
# A unique ID for this view.
|
62
|
+
#
|
63
|
+
# @return [String] The ID must contain only letters (a-z, A-Z), numbers
|
64
|
+
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
65
65
|
#
|
66
66
|
# @!group Attributes
|
67
67
|
#
|
@@ -70,7 +70,10 @@ module Google
|
|
70
70
|
end
|
71
71
|
|
72
72
|
##
|
73
|
-
# The ID of the `Dataset` containing this
|
73
|
+
# The ID of the `Dataset` containing this view.
|
74
|
+
#
|
75
|
+
# @return [String] The ID must contain only letters (a-z, A-Z), numbers
|
76
|
+
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
74
77
|
#
|
75
78
|
# @!group Attributes
|
76
79
|
#
|
@@ -79,7 +82,9 @@ module Google
|
|
79
82
|
end
|
80
83
|
|
81
84
|
##
|
82
|
-
# The ID of the `Project` containing this
|
85
|
+
# The ID of the `Project` containing this view.
|
86
|
+
#
|
87
|
+
# @return [String] The project ID.
|
83
88
|
#
|
84
89
|
# @!group Attributes
|
85
90
|
#
|
@@ -97,7 +102,7 @@ module Google
|
|
97
102
|
end
|
98
103
|
|
99
104
|
##
|
100
|
-
# The combined Project ID, Dataset ID, and Table ID for this
|
105
|
+
# The combined Project ID, Dataset ID, and Table ID for this view, in
|
101
106
|
# the format specified by the [Legacy SQL Query
|
102
107
|
# Reference](https://cloud.google.com/bigquery/query-reference#from):
|
103
108
|
# `project_name:datasetId.tableId`. To use this value in queries see
|
@@ -144,7 +149,9 @@ module Google
|
|
144
149
|
end
|
145
150
|
|
146
151
|
##
|
147
|
-
# The name of the
|
152
|
+
# The name of the view.
|
153
|
+
#
|
154
|
+
# @return [String] The friendly name.
|
148
155
|
#
|
149
156
|
# @!group Attributes
|
150
157
|
#
|
@@ -153,7 +160,9 @@ module Google
|
|
153
160
|
end
|
154
161
|
|
155
162
|
##
|
156
|
-
# Updates the name of the
|
163
|
+
# Updates the name of the view.
|
164
|
+
#
|
165
|
+
# @param [String] new_name The new friendly name.
|
157
166
|
#
|
158
167
|
# @!group Attributes
|
159
168
|
#
|
@@ -163,7 +172,9 @@ module Google
|
|
163
172
|
end
|
164
173
|
|
165
174
|
##
|
166
|
-
#
|
175
|
+
# The ETag hash of the view.
|
176
|
+
#
|
177
|
+
# @return [String] The ETag hash.
|
167
178
|
#
|
168
179
|
# @!group Attributes
|
169
180
|
#
|
@@ -173,7 +184,9 @@ module Google
|
|
173
184
|
end
|
174
185
|
|
175
186
|
##
|
176
|
-
# A URL that can be used to access the
|
187
|
+
# A URL that can be used to access the view using the REST API.
|
188
|
+
#
|
189
|
+
# @return [String] A REST URL for the resource.
|
177
190
|
#
|
178
191
|
# @!group Attributes
|
179
192
|
#
|
@@ -183,7 +196,9 @@ module Google
|
|
183
196
|
end
|
184
197
|
|
185
198
|
##
|
186
|
-
#
|
199
|
+
# A user-friendly description of the view.
|
200
|
+
#
|
201
|
+
# @return [String] The description.
|
187
202
|
#
|
188
203
|
# @!group Attributes
|
189
204
|
#
|
@@ -193,7 +208,9 @@ module Google
|
|
193
208
|
end
|
194
209
|
|
195
210
|
##
|
196
|
-
# Updates the description of the
|
211
|
+
# Updates the user-friendly description of the view.
|
212
|
+
#
|
213
|
+
# @param [String] new_description The new user-friendly description.
|
197
214
|
#
|
198
215
|
# @!group Attributes
|
199
216
|
#
|
@@ -203,7 +220,9 @@ module Google
|
|
203
220
|
end
|
204
221
|
|
205
222
|
##
|
206
|
-
# The time when this
|
223
|
+
# The time when this view was created.
|
224
|
+
#
|
225
|
+
# @return [Time, nil] The creation time.
|
207
226
|
#
|
208
227
|
# @!group Attributes
|
209
228
|
#
|
@@ -217,9 +236,11 @@ module Google
|
|
217
236
|
end
|
218
237
|
|
219
238
|
##
|
220
|
-
# The time when this
|
221
|
-
# If not present, the
|
222
|
-
# Expired
|
239
|
+
# The time when this view expires.
|
240
|
+
# If not present, the view will persist indefinitely.
|
241
|
+
# Expired views will be deleted and their storage reclaimed.
|
242
|
+
#
|
243
|
+
# @return [Time, nil] The expiration time.
|
223
244
|
#
|
224
245
|
# @!group Attributes
|
225
246
|
#
|
@@ -233,7 +254,9 @@ module Google
|
|
233
254
|
end
|
234
255
|
|
235
256
|
##
|
236
|
-
# The date when this
|
257
|
+
# The date when this view was last modified.
|
258
|
+
#
|
259
|
+
# @return [Time, nil] The last modified time.
|
237
260
|
#
|
238
261
|
# @!group Attributes
|
239
262
|
#
|
@@ -247,7 +270,9 @@ module Google
|
|
247
270
|
end
|
248
271
|
|
249
272
|
##
|
250
|
-
# Checks if the
|
273
|
+
# Checks if the view's type is "TABLE".
|
274
|
+
#
|
275
|
+
# @return [Boolean] `true` when the type is `TABLE`, `false` otherwise.
|
251
276
|
#
|
252
277
|
# @!group Attributes
|
253
278
|
#
|
@@ -256,7 +281,9 @@ module Google
|
|
256
281
|
end
|
257
282
|
|
258
283
|
##
|
259
|
-
# Checks if the
|
284
|
+
# Checks if the view's type is "VIEW".
|
285
|
+
#
|
286
|
+
# @return [Boolean] `true` when the type is `VIEW`, `false` otherwise.
|
260
287
|
#
|
261
288
|
# @!group Attributes
|
262
289
|
#
|
@@ -265,8 +292,22 @@ module Google
|
|
265
292
|
end
|
266
293
|
|
267
294
|
##
|
268
|
-
#
|
269
|
-
#
|
295
|
+
# Checks if the view's type is "EXTERNAL".
|
296
|
+
#
|
297
|
+
# @return [Boolean] `true` when the type is `EXTERNAL`, `false`
|
298
|
+
# otherwise.
|
299
|
+
#
|
300
|
+
# @!group Attributes
|
301
|
+
#
|
302
|
+
def external?
|
303
|
+
@gapi.type == "EXTERNAL"
|
304
|
+
end
|
305
|
+
|
306
|
+
##
|
307
|
+
# The geographic location where the view should reside. Possible
|
308
|
+
# values include `EU` and `US`. The default value is `US`.
|
309
|
+
#
|
310
|
+
# @return [String] The location code.
|
270
311
|
#
|
271
312
|
# @!group Attributes
|
272
313
|
#
|
@@ -275,9 +316,83 @@ module Google
|
|
275
316
|
@gapi.location
|
276
317
|
end
|
277
318
|
|
319
|
+
##
|
320
|
+
# A hash of user-provided labels associated with this view. Labels
|
321
|
+
# are used to organize and group views and views. See [Using
|
322
|
+
# Labels](https://cloud.google.com/bigquery/docs/labels).
|
323
|
+
#
|
324
|
+
# The returned hash is frozen and changes are not allowed. Use
|
325
|
+
# {#labels=} to replace the entire hash.
|
326
|
+
#
|
327
|
+
# @return [Hash<String, String>] A hash containing key/value pairs.
|
328
|
+
#
|
329
|
+
# @example
|
330
|
+
# require "google/cloud/bigquery"
|
331
|
+
#
|
332
|
+
# bigquery = Google::Cloud::Bigquery.new
|
333
|
+
# dataset = bigquery.dataset "my_dataset"
|
334
|
+
# view = dataset.table "my_view"
|
335
|
+
#
|
336
|
+
# labels = view.labels
|
337
|
+
# labels["department"] #=> "shipping"
|
338
|
+
#
|
339
|
+
# @!group Attributes
|
340
|
+
#
|
341
|
+
def labels
|
342
|
+
m = @gapi.labels
|
343
|
+
m = m.to_h if m.respond_to? :to_h
|
344
|
+
m.dup.freeze
|
345
|
+
end
|
346
|
+
|
347
|
+
##
|
348
|
+
# Updates the hash of user-provided labels associated with this view.
|
349
|
+
# Labels are used to organize and group tables and views. See [Using
|
350
|
+
# Labels](https://cloud.google.com/bigquery/docs/labels).
|
351
|
+
#
|
352
|
+
# @param [Hash<String, String>] labels A hash containing key/value
|
353
|
+
# pairs.
|
354
|
+
#
|
355
|
+
# * Label keys and values can be no longer than 63 characters.
|
356
|
+
# * Label keys and values can contain only lowercase letters, numbers,
|
357
|
+
# underscores, hyphens, and international characters.
|
358
|
+
# * Label keys and values cannot exceed 128 bytes in size.
|
359
|
+
# * Label keys must begin with a letter.
|
360
|
+
# * Label keys must be unique within a view.
|
361
|
+
#
|
362
|
+
# @example
|
363
|
+
# require "google/cloud/bigquery"
|
364
|
+
#
|
365
|
+
# bigquery = Google::Cloud::Bigquery.new
|
366
|
+
# dataset = bigquery.dataset "my_dataset"
|
367
|
+
# view = dataset.table "my_view"
|
368
|
+
#
|
369
|
+
# view.labels = { "department" => "shipping" }
|
370
|
+
#
|
371
|
+
# @!group Attributes
|
372
|
+
#
|
373
|
+
def labels= labels
|
374
|
+
@gapi.labels = labels
|
375
|
+
patch_gapi! :labels
|
376
|
+
end
|
377
|
+
|
278
378
|
##
|
279
379
|
# The schema of the view.
|
280
380
|
#
|
381
|
+
# The returned object is frozen and changes are not allowed.
|
382
|
+
#
|
383
|
+
# @return [Schema] A schema object.
|
384
|
+
#
|
385
|
+
# @example
|
386
|
+
# require "google/cloud/bigquery"
|
387
|
+
#
|
388
|
+
# bigquery = Google::Cloud::Bigquery.new
|
389
|
+
# dataset = bigquery.dataset "my_dataset"
|
390
|
+
# view = dataset.table "my_view"
|
391
|
+
#
|
392
|
+
# schema = view.schema
|
393
|
+
# field = schema.field "name"
|
394
|
+
# field.required? #=> true
|
395
|
+
#
|
281
396
|
# @!group Attributes
|
282
397
|
#
|
283
398
|
def schema
|
@@ -286,7 +401,20 @@ module Google
|
|
286
401
|
end
|
287
402
|
|
288
403
|
##
|
289
|
-
# The fields of the view.
|
404
|
+
# The fields of the view, obtained from its schema.
|
405
|
+
#
|
406
|
+
# @return [Array<Schema::Field>] An array of field objects.
|
407
|
+
#
|
408
|
+
# @example
|
409
|
+
# require "google/cloud/bigquery"
|
410
|
+
#
|
411
|
+
# bigquery = Google::Cloud::Bigquery.new
|
412
|
+
# dataset = bigquery.dataset "my_dataset"
|
413
|
+
# view = dataset.table "my_view"
|
414
|
+
#
|
415
|
+
# view.fields.each do |field|
|
416
|
+
# puts field.name
|
417
|
+
# end
|
290
418
|
#
|
291
419
|
# @!group Attributes
|
292
420
|
#
|
@@ -295,7 +423,20 @@ module Google
|
|
295
423
|
end
|
296
424
|
|
297
425
|
##
|
298
|
-
# The names of the columns in the view.
|
426
|
+
# The names of the columns in the view, obtained from its schema.
|
427
|
+
#
|
428
|
+
# @return [Array<Symbol>] An array of column names.
|
429
|
+
#
|
430
|
+
# @example
|
431
|
+
# require "google/cloud/bigquery"
|
432
|
+
#
|
433
|
+
# bigquery = Google::Cloud::Bigquery.new
|
434
|
+
# dataset = bigquery.dataset "my_dataset"
|
435
|
+
# view = dataset.table "my_view"
|
436
|
+
#
|
437
|
+
# view.headers.each do |header|
|
438
|
+
# puts header
|
439
|
+
# end
|
299
440
|
#
|
300
441
|
# @!group Attributes
|
301
442
|
#
|
@@ -306,6 +447,8 @@ module Google
|
|
306
447
|
##
|
307
448
|
# The query that executes each time the view is loaded.
|
308
449
|
#
|
450
|
+
# @return [String] The query that defines the view.
|
451
|
+
#
|
309
452
|
# @!group Attributes
|
310
453
|
#
|
311
454
|
def query
|
@@ -315,10 +458,39 @@ module Google
|
|
315
458
|
##
|
316
459
|
# Updates the query that executes each time the view is loaded.
|
317
460
|
#
|
461
|
+
# This sets the query using standard SQL. To specify legacy SQL or to
|
462
|
+
# use user-defined function resources use (#set_query) instead.
|
463
|
+
#
|
318
464
|
# @see https://cloud.google.com/bigquery/query-reference BigQuery Query
|
319
465
|
# Reference
|
320
466
|
#
|
321
467
|
# @param [String] new_query The query that defines the view.
|
468
|
+
#
|
469
|
+
# @example
|
470
|
+
# require "google/cloud/bigquery"
|
471
|
+
#
|
472
|
+
# bigquery = Google::Cloud::Bigquery.new
|
473
|
+
# dataset = bigquery.dataset "my_dataset"
|
474
|
+
# view = dataset.table "my_view"
|
475
|
+
#
|
476
|
+
# view.query = "SELECT first_name FROM " \
|
477
|
+
# "`my_project.my_dataset.my_table`"
|
478
|
+
#
|
479
|
+
# @!group Lifecycle
|
480
|
+
#
|
481
|
+
def query= new_query
|
482
|
+
set_query new_query
|
483
|
+
end
|
484
|
+
|
485
|
+
##
|
486
|
+
# Updates the query that executes each time the view is loaded. Allows
|
487
|
+
# setting of standard vs. legacy SQL and user-defined function
|
488
|
+
# resources.
|
489
|
+
#
|
490
|
+
# @see https://cloud.google.com/bigquery/query-reference BigQuery Query
|
491
|
+
# Reference
|
492
|
+
#
|
493
|
+
# @param [String] query The query that defines the view.
|
322
494
|
# @param [Boolean] standard_sql Specifies whether to use BigQuery's
|
323
495
|
# [standard
|
324
496
|
# SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
|
@@ -327,6 +499,13 @@ module Google
|
|
327
499
|
# [legacy
|
328
500
|
# SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
|
329
501
|
# dialect. Optional. The default value is false.
|
502
|
+
# @param [Array<String>, String] udfs User-defined function resources
|
503
|
+
# used in the query. May be either a code resource to load from a
|
504
|
+
# Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
505
|
+
# that contains code for a user-defined function (UDF). Providing an
|
506
|
+
# inline code resource is equivalent to providing a URI for a file
|
507
|
+
# containing the same code. See [User-Defined
|
508
|
+
# Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
|
330
509
|
#
|
331
510
|
# @example
|
332
511
|
# require "google/cloud/bigquery"
|
@@ -335,21 +514,71 @@ module Google
|
|
335
514
|
# dataset = bigquery.dataset "my_dataset"
|
336
515
|
# view = dataset.table "my_view"
|
337
516
|
#
|
338
|
-
# view.
|
339
|
-
#
|
517
|
+
# view.set_query "SELECT first_name FROM " \
|
518
|
+
# "`my_project.my_dataset.my_table`",
|
519
|
+
# standard_sql: true
|
340
520
|
#
|
341
521
|
# @!group Lifecycle
|
342
522
|
#
|
343
|
-
def query
|
344
|
-
@gapi.view
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
523
|
+
def set_query query, standard_sql: nil, legacy_sql: nil, udfs: nil
|
524
|
+
@gapi.view = Google::Apis::BigqueryV2::ViewDefinition.new \
|
525
|
+
query: query,
|
526
|
+
use_legacy_sql: Convert.resolve_legacy_sql(standard_sql,
|
527
|
+
legacy_sql),
|
528
|
+
user_defined_function_resources: udfs_gapi(udfs)
|
529
|
+
patch_view_gapi!
|
349
530
|
end
|
350
531
|
|
351
532
|
##
|
352
|
-
#
|
533
|
+
# Checks if the view's query is using legacy sql.
|
534
|
+
#
|
535
|
+
# @return [Boolean] `true` when legacy sql is used, `false` otherwise.
|
536
|
+
#
|
537
|
+
# @!group Attributes
|
538
|
+
#
|
539
|
+
def query_legacy_sql?
|
540
|
+
val = @gapi.view.use_legacy_sql
|
541
|
+
return true if val.nil?
|
542
|
+
val
|
543
|
+
end
|
544
|
+
|
545
|
+
##
|
546
|
+
# Checks if the view's query is using standard sql.
|
547
|
+
#
|
548
|
+
# @return [Boolean] `true` when standard sql is used, `false` otherwise.
|
549
|
+
#
|
550
|
+
# @!group Attributes
|
551
|
+
#
|
552
|
+
def query_standard_sql?
|
553
|
+
!query_legacy_sql?
|
554
|
+
end
|
555
|
+
|
556
|
+
##
|
557
|
+
# The user-defined function resources used in the view's query. May be
|
558
|
+
# either a code resource to load from a Google Cloud Storage URI
|
559
|
+
# (`gs://bucket/path`), or an inline resource that contains code for a
|
560
|
+
# user-defined function (UDF). Providing an inline code resource is
|
561
|
+
# equivalent to providing a URI for a file containing the same code. See
|
562
|
+
# [User-Defined
|
563
|
+
# Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
|
564
|
+
#
|
565
|
+
# @return [Array<String>] An array containing Google Cloud Storage URIs
|
566
|
+
# and/or inline source code.
|
567
|
+
#
|
568
|
+
# @!group Attributes
|
569
|
+
#
|
570
|
+
def query_udfs
|
571
|
+
udfs_gapi = @gapi.view.user_defined_function_resources
|
572
|
+
return [] if udfs_gapi.nil?
|
573
|
+
Array(udfs_gapi).map { |udf| udf.inline_code || udf.resource_uri }
|
574
|
+
end
|
575
|
+
|
576
|
+
##
|
577
|
+
# Runs a query to retrieve all data from the view, in a synchronous
|
578
|
+
# method that blocks for a response. In this method, a {QueryJob} is
|
579
|
+
# created and its results are saved to a temporary table, then read from
|
580
|
+
# the table. Timeouts and transient errors are generally handled as
|
581
|
+
# needed to complete the query.
|
353
582
|
#
|
354
583
|
# @param [Integer] max The maximum number of rows of data to return per
|
355
584
|
# page of results. Setting this flag to a small value such as 1000 and
|
@@ -357,23 +586,13 @@ module Google
|
|
357
586
|
# result set is large. In addition to this limit, responses are also
|
358
587
|
# limited to 10 MB. By default, there is no maximum row count, and
|
359
588
|
# only the byte limit applies.
|
360
|
-
# @param [Integer] timeout How long to wait for the query to complete,
|
361
|
-
# in milliseconds, before the request times out and returns. Note that
|
362
|
-
# this is only a timeout for the request, not the query. If the query
|
363
|
-
# takes longer to run than the timeout value, the call returns without
|
364
|
-
# any results and with QueryData#complete? set to false. The default
|
365
|
-
# value is 10000 milliseconds (10 seconds).
|
366
589
|
# @param [Boolean] cache Whether to look for the result in the query
|
367
590
|
# cache. The query cache is a best-effort cache that will be flushed
|
368
591
|
# whenever tables in the query are modified. The default value is
|
369
592
|
# true. For more information, see [query
|
370
593
|
# caching](https://developers.google.com/bigquery/querying-data).
|
371
|
-
# @param [Boolean] dryrun If set to `true`, BigQuery doesn't run the
|
372
|
-
# job. Instead, if the query is valid, BigQuery returns statistics
|
373
|
-
# about the job such as how many bytes would be processed. If the
|
374
|
-
# query is invalid, an error returns. The default value is `false`.
|
375
594
|
#
|
376
|
-
# @return [Google::Cloud::Bigquery::
|
595
|
+
# @return [Google::Cloud::Bigquery::Data]
|
377
596
|
#
|
378
597
|
# @example
|
379
598
|
# require "google/cloud/bigquery"
|
@@ -390,18 +609,31 @@ module Google
|
|
390
609
|
#
|
391
610
|
# @!group Data
|
392
611
|
#
|
393
|
-
def data max: nil,
|
612
|
+
def data max: nil, cache: true
|
394
613
|
sql = "SELECT * FROM #{query_id}"
|
395
614
|
ensure_service!
|
396
|
-
|
397
|
-
gapi = service.
|
398
|
-
|
615
|
+
|
616
|
+
gapi = service.query_job sql, cache: cache
|
617
|
+
job = Job.from_gapi gapi, service
|
618
|
+
job.wait_until_done!
|
619
|
+
|
620
|
+
if job.failed?
|
621
|
+
begin
|
622
|
+
# raise to activate ruby exception cause handling
|
623
|
+
fail job.gapi_error
|
624
|
+
rescue => e
|
625
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
626
|
+
raise Google::Cloud::Error.from_error(e)
|
627
|
+
end
|
628
|
+
end
|
629
|
+
|
630
|
+
job.data max: max
|
399
631
|
end
|
400
632
|
|
401
633
|
##
|
402
|
-
# Permanently deletes the
|
634
|
+
# Permanently deletes the view.
|
403
635
|
#
|
404
|
-
# @return [Boolean] Returns `true` if the
|
636
|
+
# @return [Boolean] Returns `true` if the view was deleted.
|
405
637
|
#
|
406
638
|
# @example
|
407
639
|
# require "google/cloud/bigquery"
|
@@ -421,7 +653,7 @@ module Google
|
|
421
653
|
end
|
422
654
|
|
423
655
|
##
|
424
|
-
# Reloads the
|
656
|
+
# Reloads the view with current data from the BigQuery service.
|
425
657
|
#
|
426
658
|
# @!group Lifecycle
|
427
659
|
#
|
@@ -449,12 +681,6 @@ module Google
|
|
449
681
|
fail "Must have active connection" unless service
|
450
682
|
end
|
451
683
|
|
452
|
-
def resolve_legacy_sql legacy_sql, standard_sql
|
453
|
-
return legacy_sql unless legacy_sql.nil?
|
454
|
-
return !standard_sql unless standard_sql.nil?
|
455
|
-
false
|
456
|
-
end
|
457
|
-
|
458
684
|
def patch_gapi! *attributes
|
459
685
|
return if attributes.empty?
|
460
686
|
patch_args = Hash[attributes.map do |attr|
|
@@ -463,21 +689,19 @@ module Google
|
|
463
689
|
patch_table_gapi patch_args
|
464
690
|
end
|
465
691
|
|
466
|
-
def patch_view_gapi! *attributes
|
467
|
-
return if attributes.empty?
|
468
|
-
patch_args = Hash[attributes.map do |attr|
|
469
|
-
[attr, @gapi.view.send(attr)]
|
470
|
-
end]
|
471
|
-
patch_view_args = Google::Apis::BigqueryV2::ViewDefinition.new(
|
472
|
-
patch_args
|
473
|
-
)
|
474
|
-
patch_table_gapi view: patch_view_args
|
475
|
-
end
|
476
|
-
|
477
692
|
def patch_table_gapi patch_args
|
478
693
|
ensure_service!
|
479
694
|
patch_gapi = Google::Apis::BigqueryV2::Table.new patch_args
|
695
|
+
patch_gapi.etag = etag if etag
|
480
696
|
@gapi = service.patch_table dataset_id, table_id, patch_gapi
|
697
|
+
|
698
|
+
# TODO: restore original impl after acceptance test indicates that
|
699
|
+
# service etag bug is fixed
|
700
|
+
reload!
|
701
|
+
end
|
702
|
+
|
703
|
+
def patch_view_gapi!
|
704
|
+
patch_table_gapi view: @gapi.view
|
481
705
|
end
|
482
706
|
|
483
707
|
##
|
@@ -496,6 +720,19 @@ module Google
|
|
496
720
|
def data_complete?
|
497
721
|
@gapi.is_a? Google::Apis::BigqueryV2::Table
|
498
722
|
end
|
723
|
+
|
724
|
+
def udfs_gapi array_or_str
|
725
|
+
return [] if array_or_str.nil?
|
726
|
+
Array(array_or_str).map do |uri_or_code|
|
727
|
+
resource = Google::Apis::BigqueryV2::UserDefinedFunctionResource.new
|
728
|
+
if uri_or_code.start_with?("gs://")
|
729
|
+
resource.resource_uri = uri_or_code
|
730
|
+
else
|
731
|
+
resource.inline_code = uri_or_code
|
732
|
+
end
|
733
|
+
resource
|
734
|
+
end
|
735
|
+
end
|
499
736
|
end
|
500
737
|
end
|
501
738
|
end
|