google-cloud-bigquery 0.28.0 → 0.29.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/google-cloud-bigquery.rb +2 -2
- data/lib/google/cloud/bigquery.rb +10 -12
- data/lib/google/cloud/bigquery/copy_job.rb +42 -6
- data/lib/google/cloud/bigquery/data.rb +129 -23
- data/lib/google/cloud/bigquery/dataset.rb +708 -66
- data/lib/google/cloud/bigquery/dataset/access.rb +533 -27
- data/lib/google/cloud/bigquery/dataset/list.rb +5 -3
- data/lib/google/cloud/bigquery/external.rb +2353 -0
- data/lib/google/cloud/bigquery/extract_job.rb +52 -11
- data/lib/google/cloud/bigquery/insert_response.rb +90 -2
- data/lib/google/cloud/bigquery/job.rb +160 -21
- data/lib/google/cloud/bigquery/load_job.rb +128 -11
- data/lib/google/cloud/bigquery/project.rb +187 -44
- data/lib/google/cloud/bigquery/query_job.rb +323 -13
- data/lib/google/cloud/bigquery/schema.rb +57 -1
- data/lib/google/cloud/bigquery/schema/field.rb +118 -17
- data/lib/google/cloud/bigquery/service.rb +196 -43
- data/lib/google/cloud/bigquery/table.rb +739 -49
- data/lib/google/cloud/bigquery/table/async_inserter.rb +280 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery/view.rb +306 -69
- metadata +18 -3
- data/lib/google/cloud/bigquery/query_data.rb +0 -234
@@ -0,0 +1,280 @@
|
|
1
|
+
# Copyright 2017 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/cloud/bigquery/convert"
|
17
|
+
require "monitor"
|
18
|
+
require "concurrent"
|
19
|
+
|
20
|
+
module Google
|
21
|
+
module Cloud
|
22
|
+
module Bigquery
|
23
|
+
class Table
|
24
|
+
##
|
25
|
+
# # AsyncInserter
|
26
|
+
#
|
27
|
+
# Used to insert multiple rows in batches to a topic. See
|
28
|
+
# {Google::Cloud::Bigquery::Table#insert_async}.
|
29
|
+
#
|
30
|
+
# @example
|
31
|
+
# require "google/cloud/bigquery"
|
32
|
+
#
|
33
|
+
# bigquery = Google::Cloud::Bigquery.new
|
34
|
+
# dataset = bigquery.dataset "my_dataset"
|
35
|
+
# table = dataset.table "my_table"
|
36
|
+
# inserter = table.insert_async do |response|
|
37
|
+
# log_insert "inserted #{response.insert_count} rows " \
|
38
|
+
# "with #{response.error_count} errors"
|
39
|
+
# end
|
40
|
+
#
|
41
|
+
# rows = [
|
42
|
+
# { "first_name" => "Alice", "age" => 21 },
|
43
|
+
# { "first_name" => "Bob", "age" => 22 }
|
44
|
+
# ]
|
45
|
+
# inserter.insert rows
|
46
|
+
#
|
47
|
+
# inserter.stop.wait!
|
48
|
+
#
|
49
|
+
# @attr_reader [Integer] max_bytes The maximum size of rows to be
|
50
|
+
# collected before the batch is inserted. Default is 10,000,000
|
51
|
+
# (10MB).
|
52
|
+
# @attr_reader [Integer] max_rows The maximum number of rows to be
|
53
|
+
# collected before the batch is inserted. Default is 500.
|
54
|
+
# @attr_reader [Numeric] interval The number of seconds to collect rows
|
55
|
+
# before the batch is inserted. Default is 10.
|
56
|
+
# @attr_reader [Integer] threads The number of threads used to insert
|
57
|
+
# rows. Default is 4.
|
58
|
+
#
|
59
|
+
class AsyncInserter
|
60
|
+
include MonitorMixin
|
61
|
+
|
62
|
+
attr_reader :max_bytes, :max_rows, :interval, :threads
|
63
|
+
##
|
64
|
+
# @private Implementation accessors
|
65
|
+
attr_reader :table, :batch
|
66
|
+
|
67
|
+
##
|
68
|
+
# @private
|
69
|
+
def initialize table, skip_invalid: nil, ignore_unknown: nil,
|
70
|
+
max_bytes: 10000000, max_rows: 500, interval: 10,
|
71
|
+
threads: 4, &block
|
72
|
+
@table = table
|
73
|
+
@skip_invalid = skip_invalid
|
74
|
+
@ignore_unknown = ignore_unknown
|
75
|
+
|
76
|
+
@max_bytes = max_bytes
|
77
|
+
@max_rows = max_rows
|
78
|
+
@interval = interval
|
79
|
+
@threads = threads
|
80
|
+
@callback = block
|
81
|
+
|
82
|
+
@batch = nil
|
83
|
+
|
84
|
+
@thread_pool = Concurrent::FixedThreadPool.new @threads
|
85
|
+
|
86
|
+
@cond = new_cond
|
87
|
+
|
88
|
+
# init MonitorMixin
|
89
|
+
super()
|
90
|
+
end
|
91
|
+
|
92
|
+
##
|
93
|
+
# Adds rows to the async inserter to be inserted. Rows will be
|
94
|
+
# collected in batches and inserted together.
|
95
|
+
# See {Google::Cloud::Bigquery::Table#insert_async}.
|
96
|
+
#
|
97
|
+
# @param [Hash, Array<Hash>] rows A hash object or array of hash
|
98
|
+
# objects containing the data.
|
99
|
+
#
|
100
|
+
def insert rows
|
101
|
+
return nil if rows.nil?
|
102
|
+
return nil if rows.is_a?(Array) && rows.empty?
|
103
|
+
rows = [rows] if rows.is_a? Hash
|
104
|
+
|
105
|
+
synchronize do
|
106
|
+
rows.each do |row|
|
107
|
+
if @batch.nil?
|
108
|
+
@batch = Batch.new max_bytes: @max_bytes, max_rows: @max_rows
|
109
|
+
@batch.insert row
|
110
|
+
else
|
111
|
+
unless @batch.try_insert row
|
112
|
+
push_batch_request!
|
113
|
+
|
114
|
+
@batch = Batch.new max_bytes: @max_bytes,
|
115
|
+
max_rows: @max_rows
|
116
|
+
@batch.insert row
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
@batch_created_at ||= ::Time.now
|
121
|
+
@background_thread ||= Thread.new { run_background }
|
122
|
+
|
123
|
+
push_batch_request! if @batch.ready?
|
124
|
+
end
|
125
|
+
|
126
|
+
@cond.signal
|
127
|
+
end
|
128
|
+
|
129
|
+
true
|
130
|
+
end
|
131
|
+
|
132
|
+
##
|
133
|
+
# Begins the process of stopping the inserter. Rows already in the
|
134
|
+
# queue will be inserted, but no new rows can be added. Use {#wait!}
|
135
|
+
# to block until the inserter is fully stopped and all pending rows
|
136
|
+
# have been inserted.
|
137
|
+
#
|
138
|
+
# @return [AsyncInserter] returns self so calls can be chained.
|
139
|
+
#
|
140
|
+
def stop
|
141
|
+
synchronize do
|
142
|
+
break if @stopped
|
143
|
+
|
144
|
+
@stopped = true
|
145
|
+
push_batch_request!
|
146
|
+
@cond.signal
|
147
|
+
end
|
148
|
+
|
149
|
+
self
|
150
|
+
end
|
151
|
+
|
152
|
+
##
|
153
|
+
# Blocks until the inserter is fully stopped, all pending rows
|
154
|
+
# have been inserted, and all callbacks have completed. Does not stop
|
155
|
+
# the inserter. To stop the inserter, first call {#stop} and then
|
156
|
+
# call {#wait!} to block until the inserter is stopped.
|
157
|
+
#
|
158
|
+
# @return [AsyncInserter] returns self so calls can be chained.
|
159
|
+
#
|
160
|
+
def wait! timeout = nil
|
161
|
+
synchronize do
|
162
|
+
@thread_pool.shutdown
|
163
|
+
@thread_pool.wait_for_termination timeout
|
164
|
+
end
|
165
|
+
|
166
|
+
self
|
167
|
+
end
|
168
|
+
|
169
|
+
##
|
170
|
+
# Forces all rows in the current batch to be inserted immediately.
|
171
|
+
#
|
172
|
+
# @return [AsyncInserter] returns self so calls can be chained.
|
173
|
+
#
|
174
|
+
def flush
|
175
|
+
synchronize do
|
176
|
+
push_batch_request!
|
177
|
+
@cond.signal
|
178
|
+
end
|
179
|
+
|
180
|
+
self
|
181
|
+
end
|
182
|
+
|
183
|
+
##
|
184
|
+
# Whether the inserter has been started.
|
185
|
+
#
|
186
|
+
# @return [boolean] `true` when started, `false` otherwise.
|
187
|
+
#
|
188
|
+
def started?
|
189
|
+
!stopped?
|
190
|
+
end
|
191
|
+
|
192
|
+
##
|
193
|
+
# Whether the inserter has been stopped.
|
194
|
+
#
|
195
|
+
# @return [boolean] `true` when stopped, `false` otherwise.
|
196
|
+
#
|
197
|
+
def stopped?
|
198
|
+
synchronize { @stopped }
|
199
|
+
end
|
200
|
+
|
201
|
+
protected
|
202
|
+
|
203
|
+
def run_background
|
204
|
+
synchronize do
|
205
|
+
until @stopped
|
206
|
+
if @batch.nil?
|
207
|
+
@cond.wait
|
208
|
+
next
|
209
|
+
end
|
210
|
+
|
211
|
+
time_since_first_publish = ::Time.now - @batch_created_at
|
212
|
+
if time_since_first_publish < @interval
|
213
|
+
# still waiting for the interval to insert the batch...
|
214
|
+
@cond.wait(@interval - time_since_first_publish)
|
215
|
+
else
|
216
|
+
# interval met, insert the batch...
|
217
|
+
push_batch_request!
|
218
|
+
@cond.wait
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
def push_batch_request!
|
225
|
+
return unless @batch
|
226
|
+
|
227
|
+
batch_rows = @batch.rows
|
228
|
+
Concurrent::Future.new(executor: @thread_pool) do
|
229
|
+
begin
|
230
|
+
response = @table.insert batch_rows,
|
231
|
+
skip_invalid: @skip_invalid,
|
232
|
+
ignore_unknown: @ignore_unknown
|
233
|
+
@callback.call response if @callback
|
234
|
+
rescue => e
|
235
|
+
raise e.inspect
|
236
|
+
end
|
237
|
+
end.execute
|
238
|
+
|
239
|
+
@batch = nil
|
240
|
+
@batch_created_at = nil
|
241
|
+
end
|
242
|
+
|
243
|
+
##
|
244
|
+
# @private
|
245
|
+
class Batch
|
246
|
+
attr_reader :max_bytes, :max_rows, :rows
|
247
|
+
|
248
|
+
def initialize max_bytes: 10000000, max_rows: 500
|
249
|
+
@max_bytes = max_bytes
|
250
|
+
@max_rows = max_rows
|
251
|
+
@rows = []
|
252
|
+
end
|
253
|
+
|
254
|
+
def insert row
|
255
|
+
@rows << row
|
256
|
+
end
|
257
|
+
|
258
|
+
def try_insert row
|
259
|
+
addl_bytes = row.to_json.bytes.size + 1
|
260
|
+
return false if current_bytes + addl_bytes >= @max_bytes
|
261
|
+
return false if @rows.count + 1 >= @max_rows
|
262
|
+
|
263
|
+
insert row
|
264
|
+
true
|
265
|
+
end
|
266
|
+
|
267
|
+
def ready?
|
268
|
+
current_bytes >= @max_bytes || rows.count >= @max_rows
|
269
|
+
end
|
270
|
+
|
271
|
+
def current_bytes
|
272
|
+
# TODO: add to a counter instead of calling #to_json each time
|
273
|
+
Convert.to_json_rows(rows).to_json.bytes.size
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
278
|
+
end
|
279
|
+
end
|
280
|
+
end
|
@@ -15,7 +15,6 @@
|
|
15
15
|
|
16
16
|
require "google/cloud/errors"
|
17
17
|
require "google/cloud/bigquery/service"
|
18
|
-
require "google/cloud/bigquery/data"
|
19
18
|
require "google/cloud/bigquery/table/list"
|
20
19
|
require "google/apis/bigquery_v2"
|
21
20
|
|
@@ -52,16 +51,17 @@ module Google
|
|
52
51
|
attr_accessor :gapi
|
53
52
|
|
54
53
|
##
|
55
|
-
# @private Create an empty
|
54
|
+
# @private Create an empty View object.
|
56
55
|
def initialize
|
57
56
|
@service = nil
|
58
57
|
@gapi = Google::Apis::BigqueryV2::Table.new
|
59
58
|
end
|
60
59
|
|
61
60
|
##
|
62
|
-
# A unique ID for this
|
63
|
-
#
|
64
|
-
#
|
61
|
+
# A unique ID for this view.
|
62
|
+
#
|
63
|
+
# @return [String] The ID must contain only letters (a-z, A-Z), numbers
|
64
|
+
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
65
65
|
#
|
66
66
|
# @!group Attributes
|
67
67
|
#
|
@@ -70,7 +70,10 @@ module Google
|
|
70
70
|
end
|
71
71
|
|
72
72
|
##
|
73
|
-
# The ID of the `Dataset` containing this
|
73
|
+
# The ID of the `Dataset` containing this view.
|
74
|
+
#
|
75
|
+
# @return [String] The ID must contain only letters (a-z, A-Z), numbers
|
76
|
+
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
74
77
|
#
|
75
78
|
# @!group Attributes
|
76
79
|
#
|
@@ -79,7 +82,9 @@ module Google
|
|
79
82
|
end
|
80
83
|
|
81
84
|
##
|
82
|
-
# The ID of the `Project` containing this
|
85
|
+
# The ID of the `Project` containing this view.
|
86
|
+
#
|
87
|
+
# @return [String] The project ID.
|
83
88
|
#
|
84
89
|
# @!group Attributes
|
85
90
|
#
|
@@ -97,7 +102,7 @@ module Google
|
|
97
102
|
end
|
98
103
|
|
99
104
|
##
|
100
|
-
# The combined Project ID, Dataset ID, and Table ID for this
|
105
|
+
# The combined Project ID, Dataset ID, and Table ID for this view, in
|
101
106
|
# the format specified by the [Legacy SQL Query
|
102
107
|
# Reference](https://cloud.google.com/bigquery/query-reference#from):
|
103
108
|
# `project_name:datasetId.tableId`. To use this value in queries see
|
@@ -144,7 +149,9 @@ module Google
|
|
144
149
|
end
|
145
150
|
|
146
151
|
##
|
147
|
-
# The name of the
|
152
|
+
# The name of the view.
|
153
|
+
#
|
154
|
+
# @return [String] The friendly name.
|
148
155
|
#
|
149
156
|
# @!group Attributes
|
150
157
|
#
|
@@ -153,7 +160,9 @@ module Google
|
|
153
160
|
end
|
154
161
|
|
155
162
|
##
|
156
|
-
# Updates the name of the
|
163
|
+
# Updates the name of the view.
|
164
|
+
#
|
165
|
+
# @param [String] new_name The new friendly name.
|
157
166
|
#
|
158
167
|
# @!group Attributes
|
159
168
|
#
|
@@ -163,7 +172,9 @@ module Google
|
|
163
172
|
end
|
164
173
|
|
165
174
|
##
|
166
|
-
#
|
175
|
+
# The ETag hash of the view.
|
176
|
+
#
|
177
|
+
# @return [String] The ETag hash.
|
167
178
|
#
|
168
179
|
# @!group Attributes
|
169
180
|
#
|
@@ -173,7 +184,9 @@ module Google
|
|
173
184
|
end
|
174
185
|
|
175
186
|
##
|
176
|
-
# A URL that can be used to access the
|
187
|
+
# A URL that can be used to access the view using the REST API.
|
188
|
+
#
|
189
|
+
# @return [String] A REST URL for the resource.
|
177
190
|
#
|
178
191
|
# @!group Attributes
|
179
192
|
#
|
@@ -183,7 +196,9 @@ module Google
|
|
183
196
|
end
|
184
197
|
|
185
198
|
##
|
186
|
-
#
|
199
|
+
# A user-friendly description of the view.
|
200
|
+
#
|
201
|
+
# @return [String] The description.
|
187
202
|
#
|
188
203
|
# @!group Attributes
|
189
204
|
#
|
@@ -193,7 +208,9 @@ module Google
|
|
193
208
|
end
|
194
209
|
|
195
210
|
##
|
196
|
-
# Updates the description of the
|
211
|
+
# Updates the user-friendly description of the view.
|
212
|
+
#
|
213
|
+
# @param [String] new_description The new user-friendly description.
|
197
214
|
#
|
198
215
|
# @!group Attributes
|
199
216
|
#
|
@@ -203,7 +220,9 @@ module Google
|
|
203
220
|
end
|
204
221
|
|
205
222
|
##
|
206
|
-
# The time when this
|
223
|
+
# The time when this view was created.
|
224
|
+
#
|
225
|
+
# @return [Time, nil] The creation time.
|
207
226
|
#
|
208
227
|
# @!group Attributes
|
209
228
|
#
|
@@ -217,9 +236,11 @@ module Google
|
|
217
236
|
end
|
218
237
|
|
219
238
|
##
|
220
|
-
# The time when this
|
221
|
-
# If not present, the
|
222
|
-
# Expired
|
239
|
+
# The time when this view expires.
|
240
|
+
# If not present, the view will persist indefinitely.
|
241
|
+
# Expired views will be deleted and their storage reclaimed.
|
242
|
+
#
|
243
|
+
# @return [Time, nil] The expiration time.
|
223
244
|
#
|
224
245
|
# @!group Attributes
|
225
246
|
#
|
@@ -233,7 +254,9 @@ module Google
|
|
233
254
|
end
|
234
255
|
|
235
256
|
##
|
236
|
-
# The date when this
|
257
|
+
# The date when this view was last modified.
|
258
|
+
#
|
259
|
+
# @return [Time, nil] The last modified time.
|
237
260
|
#
|
238
261
|
# @!group Attributes
|
239
262
|
#
|
@@ -247,7 +270,9 @@ module Google
|
|
247
270
|
end
|
248
271
|
|
249
272
|
##
|
250
|
-
# Checks if the
|
273
|
+
# Checks if the view's type is "TABLE".
|
274
|
+
#
|
275
|
+
# @return [Boolean] `true` when the type is `TABLE`, `false` otherwise.
|
251
276
|
#
|
252
277
|
# @!group Attributes
|
253
278
|
#
|
@@ -256,7 +281,9 @@ module Google
|
|
256
281
|
end
|
257
282
|
|
258
283
|
##
|
259
|
-
# Checks if the
|
284
|
+
# Checks if the view's type is "VIEW".
|
285
|
+
#
|
286
|
+
# @return [Boolean] `true` when the type is `VIEW`, `false` otherwise.
|
260
287
|
#
|
261
288
|
# @!group Attributes
|
262
289
|
#
|
@@ -265,8 +292,22 @@ module Google
|
|
265
292
|
end
|
266
293
|
|
267
294
|
##
|
268
|
-
#
|
269
|
-
#
|
295
|
+
# Checks if the view's type is "EXTERNAL".
|
296
|
+
#
|
297
|
+
# @return [Boolean] `true` when the type is `EXTERNAL`, `false`
|
298
|
+
# otherwise.
|
299
|
+
#
|
300
|
+
# @!group Attributes
|
301
|
+
#
|
302
|
+
def external?
|
303
|
+
@gapi.type == "EXTERNAL"
|
304
|
+
end
|
305
|
+
|
306
|
+
##
|
307
|
+
# The geographic location where the view should reside. Possible
|
308
|
+
# values include `EU` and `US`. The default value is `US`.
|
309
|
+
#
|
310
|
+
# @return [String] The location code.
|
270
311
|
#
|
271
312
|
# @!group Attributes
|
272
313
|
#
|
@@ -275,9 +316,83 @@ module Google
|
|
275
316
|
@gapi.location
|
276
317
|
end
|
277
318
|
|
319
|
+
##
|
320
|
+
# A hash of user-provided labels associated with this view. Labels
|
321
|
+
# are used to organize and group views and views. See [Using
|
322
|
+
# Labels](https://cloud.google.com/bigquery/docs/labels).
|
323
|
+
#
|
324
|
+
# The returned hash is frozen and changes are not allowed. Use
|
325
|
+
# {#labels=} to replace the entire hash.
|
326
|
+
#
|
327
|
+
# @return [Hash<String, String>] A hash containing key/value pairs.
|
328
|
+
#
|
329
|
+
# @example
|
330
|
+
# require "google/cloud/bigquery"
|
331
|
+
#
|
332
|
+
# bigquery = Google::Cloud::Bigquery.new
|
333
|
+
# dataset = bigquery.dataset "my_dataset"
|
334
|
+
# view = dataset.table "my_view"
|
335
|
+
#
|
336
|
+
# labels = view.labels
|
337
|
+
# labels["department"] #=> "shipping"
|
338
|
+
#
|
339
|
+
# @!group Attributes
|
340
|
+
#
|
341
|
+
def labels
|
342
|
+
m = @gapi.labels
|
343
|
+
m = m.to_h if m.respond_to? :to_h
|
344
|
+
m.dup.freeze
|
345
|
+
end
|
346
|
+
|
347
|
+
##
|
348
|
+
# Updates the hash of user-provided labels associated with this view.
|
349
|
+
# Labels are used to organize and group tables and views. See [Using
|
350
|
+
# Labels](https://cloud.google.com/bigquery/docs/labels).
|
351
|
+
#
|
352
|
+
# @param [Hash<String, String>] labels A hash containing key/value
|
353
|
+
# pairs.
|
354
|
+
#
|
355
|
+
# * Label keys and values can be no longer than 63 characters.
|
356
|
+
# * Label keys and values can contain only lowercase letters, numbers,
|
357
|
+
# underscores, hyphens, and international characters.
|
358
|
+
# * Label keys and values cannot exceed 128 bytes in size.
|
359
|
+
# * Label keys must begin with a letter.
|
360
|
+
# * Label keys must be unique within a view.
|
361
|
+
#
|
362
|
+
# @example
|
363
|
+
# require "google/cloud/bigquery"
|
364
|
+
#
|
365
|
+
# bigquery = Google::Cloud::Bigquery.new
|
366
|
+
# dataset = bigquery.dataset "my_dataset"
|
367
|
+
# view = dataset.table "my_view"
|
368
|
+
#
|
369
|
+
# view.labels = { "department" => "shipping" }
|
370
|
+
#
|
371
|
+
# @!group Attributes
|
372
|
+
#
|
373
|
+
def labels= labels
|
374
|
+
@gapi.labels = labels
|
375
|
+
patch_gapi! :labels
|
376
|
+
end
|
377
|
+
|
278
378
|
##
|
279
379
|
# The schema of the view.
|
280
380
|
#
|
381
|
+
# The returned object is frozen and changes are not allowed.
|
382
|
+
#
|
383
|
+
# @return [Schema] A schema object.
|
384
|
+
#
|
385
|
+
# @example
|
386
|
+
# require "google/cloud/bigquery"
|
387
|
+
#
|
388
|
+
# bigquery = Google::Cloud::Bigquery.new
|
389
|
+
# dataset = bigquery.dataset "my_dataset"
|
390
|
+
# view = dataset.table "my_view"
|
391
|
+
#
|
392
|
+
# schema = view.schema
|
393
|
+
# field = schema.field "name"
|
394
|
+
# field.required? #=> true
|
395
|
+
#
|
281
396
|
# @!group Attributes
|
282
397
|
#
|
283
398
|
def schema
|
@@ -286,7 +401,20 @@ module Google
|
|
286
401
|
end
|
287
402
|
|
288
403
|
##
|
289
|
-
# The fields of the view.
|
404
|
+
# The fields of the view, obtained from its schema.
|
405
|
+
#
|
406
|
+
# @return [Array<Schema::Field>] An array of field objects.
|
407
|
+
#
|
408
|
+
# @example
|
409
|
+
# require "google/cloud/bigquery"
|
410
|
+
#
|
411
|
+
# bigquery = Google::Cloud::Bigquery.new
|
412
|
+
# dataset = bigquery.dataset "my_dataset"
|
413
|
+
# view = dataset.table "my_view"
|
414
|
+
#
|
415
|
+
# view.fields.each do |field|
|
416
|
+
# puts field.name
|
417
|
+
# end
|
290
418
|
#
|
291
419
|
# @!group Attributes
|
292
420
|
#
|
@@ -295,7 +423,20 @@ module Google
|
|
295
423
|
end
|
296
424
|
|
297
425
|
##
|
298
|
-
# The names of the columns in the view.
|
426
|
+
# The names of the columns in the view, obtained from its schema.
|
427
|
+
#
|
428
|
+
# @return [Array<Symbol>] An array of column names.
|
429
|
+
#
|
430
|
+
# @example
|
431
|
+
# require "google/cloud/bigquery"
|
432
|
+
#
|
433
|
+
# bigquery = Google::Cloud::Bigquery.new
|
434
|
+
# dataset = bigquery.dataset "my_dataset"
|
435
|
+
# view = dataset.table "my_view"
|
436
|
+
#
|
437
|
+
# view.headers.each do |header|
|
438
|
+
# puts header
|
439
|
+
# end
|
299
440
|
#
|
300
441
|
# @!group Attributes
|
301
442
|
#
|
@@ -306,6 +447,8 @@ module Google
|
|
306
447
|
##
|
307
448
|
# The query that executes each time the view is loaded.
|
308
449
|
#
|
450
|
+
# @return [String] The query that defines the view.
|
451
|
+
#
|
309
452
|
# @!group Attributes
|
310
453
|
#
|
311
454
|
def query
|
@@ -315,10 +458,39 @@ module Google
|
|
315
458
|
##
|
316
459
|
# Updates the query that executes each time the view is loaded.
|
317
460
|
#
|
461
|
+
# This sets the query using standard SQL. To specify legacy SQL or to
|
462
|
+
# use user-defined function resources use (#set_query) instead.
|
463
|
+
#
|
318
464
|
# @see https://cloud.google.com/bigquery/query-reference BigQuery Query
|
319
465
|
# Reference
|
320
466
|
#
|
321
467
|
# @param [String] new_query The query that defines the view.
|
468
|
+
#
|
469
|
+
# @example
|
470
|
+
# require "google/cloud/bigquery"
|
471
|
+
#
|
472
|
+
# bigquery = Google::Cloud::Bigquery.new
|
473
|
+
# dataset = bigquery.dataset "my_dataset"
|
474
|
+
# view = dataset.table "my_view"
|
475
|
+
#
|
476
|
+
# view.query = "SELECT first_name FROM " \
|
477
|
+
# "`my_project.my_dataset.my_table`"
|
478
|
+
#
|
479
|
+
# @!group Lifecycle
|
480
|
+
#
|
481
|
+
def query= new_query
|
482
|
+
set_query new_query
|
483
|
+
end
|
484
|
+
|
485
|
+
##
|
486
|
+
# Updates the query that executes each time the view is loaded. Allows
|
487
|
+
# setting of standard vs. legacy SQL and user-defined function
|
488
|
+
# resources.
|
489
|
+
#
|
490
|
+
# @see https://cloud.google.com/bigquery/query-reference BigQuery Query
|
491
|
+
# Reference
|
492
|
+
#
|
493
|
+
# @param [String] query The query that defines the view.
|
322
494
|
# @param [Boolean] standard_sql Specifies whether to use BigQuery's
|
323
495
|
# [standard
|
324
496
|
# SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
|
@@ -327,6 +499,13 @@ module Google
|
|
327
499
|
# [legacy
|
328
500
|
# SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
|
329
501
|
# dialect. Optional. The default value is false.
|
502
|
+
# @param [Array<String>, String] udfs User-defined function resources
|
503
|
+
# used in the query. May be either a code resource to load from a
|
504
|
+
# Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
505
|
+
# that contains code for a user-defined function (UDF). Providing an
|
506
|
+
# inline code resource is equivalent to providing a URI for a file
|
507
|
+
# containing the same code. See [User-Defined
|
508
|
+
# Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
|
330
509
|
#
|
331
510
|
# @example
|
332
511
|
# require "google/cloud/bigquery"
|
@@ -335,21 +514,71 @@ module Google
|
|
335
514
|
# dataset = bigquery.dataset "my_dataset"
|
336
515
|
# view = dataset.table "my_view"
|
337
516
|
#
|
338
|
-
# view.
|
339
|
-
#
|
517
|
+
# view.set_query "SELECT first_name FROM " \
|
518
|
+
# "`my_project.my_dataset.my_table`",
|
519
|
+
# standard_sql: true
|
340
520
|
#
|
341
521
|
# @!group Lifecycle
|
342
522
|
#
|
343
|
-
def query
|
344
|
-
@gapi.view
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
523
|
+
def set_query query, standard_sql: nil, legacy_sql: nil, udfs: nil
|
524
|
+
@gapi.view = Google::Apis::BigqueryV2::ViewDefinition.new \
|
525
|
+
query: query,
|
526
|
+
use_legacy_sql: Convert.resolve_legacy_sql(standard_sql,
|
527
|
+
legacy_sql),
|
528
|
+
user_defined_function_resources: udfs_gapi(udfs)
|
529
|
+
patch_view_gapi!
|
349
530
|
end
|
350
531
|
|
351
532
|
##
|
352
|
-
#
|
533
|
+
# Checks if the view's query is using legacy sql.
|
534
|
+
#
|
535
|
+
# @return [Boolean] `true` when legacy sql is used, `false` otherwise.
|
536
|
+
#
|
537
|
+
# @!group Attributes
|
538
|
+
#
|
539
|
+
def query_legacy_sql?
|
540
|
+
val = @gapi.view.use_legacy_sql
|
541
|
+
return true if val.nil?
|
542
|
+
val
|
543
|
+
end
|
544
|
+
|
545
|
+
##
|
546
|
+
# Checks if the view's query is using standard sql.
|
547
|
+
#
|
548
|
+
# @return [Boolean] `true` when standard sql is used, `false` otherwise.
|
549
|
+
#
|
550
|
+
# @!group Attributes
|
551
|
+
#
|
552
|
+
def query_standard_sql?
|
553
|
+
!query_legacy_sql?
|
554
|
+
end
|
555
|
+
|
556
|
+
##
|
557
|
+
# The user-defined function resources used in the view's query. May be
|
558
|
+
# either a code resource to load from a Google Cloud Storage URI
|
559
|
+
# (`gs://bucket/path`), or an inline resource that contains code for a
|
560
|
+
# user-defined function (UDF). Providing an inline code resource is
|
561
|
+
# equivalent to providing a URI for a file containing the same code. See
|
562
|
+
# [User-Defined
|
563
|
+
# Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
|
564
|
+
#
|
565
|
+
# @return [Array<String>] An array containing Google Cloud Storage URIs
|
566
|
+
# and/or inline source code.
|
567
|
+
#
|
568
|
+
# @!group Attributes
|
569
|
+
#
|
570
|
+
def query_udfs
|
571
|
+
udfs_gapi = @gapi.view.user_defined_function_resources
|
572
|
+
return [] if udfs_gapi.nil?
|
573
|
+
Array(udfs_gapi).map { |udf| udf.inline_code || udf.resource_uri }
|
574
|
+
end
|
575
|
+
|
576
|
+
##
|
577
|
+
# Runs a query to retrieve all data from the view, in a synchronous
|
578
|
+
# method that blocks for a response. In this method, a {QueryJob} is
|
579
|
+
# created and its results are saved to a temporary table, then read from
|
580
|
+
# the table. Timeouts and transient errors are generally handled as
|
581
|
+
# needed to complete the query.
|
353
582
|
#
|
354
583
|
# @param [Integer] max The maximum number of rows of data to return per
|
355
584
|
# page of results. Setting this flag to a small value such as 1000 and
|
@@ -357,23 +586,13 @@ module Google
|
|
357
586
|
# result set is large. In addition to this limit, responses are also
|
358
587
|
# limited to 10 MB. By default, there is no maximum row count, and
|
359
588
|
# only the byte limit applies.
|
360
|
-
# @param [Integer] timeout How long to wait for the query to complete,
|
361
|
-
# in milliseconds, before the request times out and returns. Note that
|
362
|
-
# this is only a timeout for the request, not the query. If the query
|
363
|
-
# takes longer to run than the timeout value, the call returns without
|
364
|
-
# any results and with QueryData#complete? set to false. The default
|
365
|
-
# value is 10000 milliseconds (10 seconds).
|
366
589
|
# @param [Boolean] cache Whether to look for the result in the query
|
367
590
|
# cache. The query cache is a best-effort cache that will be flushed
|
368
591
|
# whenever tables in the query are modified. The default value is
|
369
592
|
# true. For more information, see [query
|
370
593
|
# caching](https://developers.google.com/bigquery/querying-data).
|
371
|
-
# @param [Boolean] dryrun If set to `true`, BigQuery doesn't run the
|
372
|
-
# job. Instead, if the query is valid, BigQuery returns statistics
|
373
|
-
# about the job such as how many bytes would be processed. If the
|
374
|
-
# query is invalid, an error returns. The default value is `false`.
|
375
594
|
#
|
376
|
-
# @return [Google::Cloud::Bigquery::
|
595
|
+
# @return [Google::Cloud::Bigquery::Data]
|
377
596
|
#
|
378
597
|
# @example
|
379
598
|
# require "google/cloud/bigquery"
|
@@ -390,18 +609,31 @@ module Google
|
|
390
609
|
#
|
391
610
|
# @!group Data
|
392
611
|
#
|
393
|
-
def data max: nil,
|
612
|
+
def data max: nil, cache: true
|
394
613
|
sql = "SELECT * FROM #{query_id}"
|
395
614
|
ensure_service!
|
396
|
-
|
397
|
-
gapi = service.
|
398
|
-
|
615
|
+
|
616
|
+
gapi = service.query_job sql, cache: cache
|
617
|
+
job = Job.from_gapi gapi, service
|
618
|
+
job.wait_until_done!
|
619
|
+
|
620
|
+
if job.failed?
|
621
|
+
begin
|
622
|
+
# raise to activate ruby exception cause handling
|
623
|
+
fail job.gapi_error
|
624
|
+
rescue => e
|
625
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
626
|
+
raise Google::Cloud::Error.from_error(e)
|
627
|
+
end
|
628
|
+
end
|
629
|
+
|
630
|
+
job.data max: max
|
399
631
|
end
|
400
632
|
|
401
633
|
##
|
402
|
-
# Permanently deletes the
|
634
|
+
# Permanently deletes the view.
|
403
635
|
#
|
404
|
-
# @return [Boolean] Returns `true` if the
|
636
|
+
# @return [Boolean] Returns `true` if the view was deleted.
|
405
637
|
#
|
406
638
|
# @example
|
407
639
|
# require "google/cloud/bigquery"
|
@@ -421,7 +653,7 @@ module Google
|
|
421
653
|
end
|
422
654
|
|
423
655
|
##
|
424
|
-
# Reloads the
|
656
|
+
# Reloads the view with current data from the BigQuery service.
|
425
657
|
#
|
426
658
|
# @!group Lifecycle
|
427
659
|
#
|
@@ -449,12 +681,6 @@ module Google
|
|
449
681
|
fail "Must have active connection" unless service
|
450
682
|
end
|
451
683
|
|
452
|
-
def resolve_legacy_sql legacy_sql, standard_sql
|
453
|
-
return legacy_sql unless legacy_sql.nil?
|
454
|
-
return !standard_sql unless standard_sql.nil?
|
455
|
-
false
|
456
|
-
end
|
457
|
-
|
458
684
|
def patch_gapi! *attributes
|
459
685
|
return if attributes.empty?
|
460
686
|
patch_args = Hash[attributes.map do |attr|
|
@@ -463,21 +689,19 @@ module Google
|
|
463
689
|
patch_table_gapi patch_args
|
464
690
|
end
|
465
691
|
|
466
|
-
def patch_view_gapi! *attributes
|
467
|
-
return if attributes.empty?
|
468
|
-
patch_args = Hash[attributes.map do |attr|
|
469
|
-
[attr, @gapi.view.send(attr)]
|
470
|
-
end]
|
471
|
-
patch_view_args = Google::Apis::BigqueryV2::ViewDefinition.new(
|
472
|
-
patch_args
|
473
|
-
)
|
474
|
-
patch_table_gapi view: patch_view_args
|
475
|
-
end
|
476
|
-
|
477
692
|
def patch_table_gapi patch_args
|
478
693
|
ensure_service!
|
479
694
|
patch_gapi = Google::Apis::BigqueryV2::Table.new patch_args
|
695
|
+
patch_gapi.etag = etag if etag
|
480
696
|
@gapi = service.patch_table dataset_id, table_id, patch_gapi
|
697
|
+
|
698
|
+
# TODO: restore original impl after acceptance test indicates that
|
699
|
+
# service etag bug is fixed
|
700
|
+
reload!
|
701
|
+
end
|
702
|
+
|
703
|
+
def patch_view_gapi!
|
704
|
+
patch_table_gapi view: @gapi.view
|
481
705
|
end
|
482
706
|
|
483
707
|
##
|
@@ -496,6 +720,19 @@ module Google
|
|
496
720
|
def data_complete?
|
497
721
|
@gapi.is_a? Google::Apis::BigqueryV2::Table
|
498
722
|
end
|
723
|
+
|
724
|
+
def udfs_gapi array_or_str
|
725
|
+
return [] if array_or_str.nil?
|
726
|
+
Array(array_or_str).map do |uri_or_code|
|
727
|
+
resource = Google::Apis::BigqueryV2::UserDefinedFunctionResource.new
|
728
|
+
if uri_or_code.start_with?("gs://")
|
729
|
+
resource.resource_uri = uri_or_code
|
730
|
+
else
|
731
|
+
resource.inline_code = uri_or_code
|
732
|
+
end
|
733
|
+
resource
|
734
|
+
end
|
735
|
+
end
|
499
736
|
end
|
500
737
|
end
|
501
738
|
end
|