google-cloud-bigquery 1.21.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +16 -0
- data/AUTHENTICATION.md +158 -0
- data/CHANGELOG.md +397 -0
- data/CODE_OF_CONDUCT.md +40 -0
- data/CONTRIBUTING.md +188 -0
- data/LICENSE +201 -0
- data/LOGGING.md +27 -0
- data/OVERVIEW.md +463 -0
- data/TROUBLESHOOTING.md +31 -0
- data/lib/google-cloud-bigquery.rb +139 -0
- data/lib/google/cloud/bigquery.rb +145 -0
- data/lib/google/cloud/bigquery/argument.rb +197 -0
- data/lib/google/cloud/bigquery/convert.rb +383 -0
- data/lib/google/cloud/bigquery/copy_job.rb +316 -0
- data/lib/google/cloud/bigquery/credentials.rb +50 -0
- data/lib/google/cloud/bigquery/data.rb +526 -0
- data/lib/google/cloud/bigquery/dataset.rb +2845 -0
- data/lib/google/cloud/bigquery/dataset/access.rb +1021 -0
- data/lib/google/cloud/bigquery/dataset/list.rb +162 -0
- data/lib/google/cloud/bigquery/encryption_configuration.rb +123 -0
- data/lib/google/cloud/bigquery/external.rb +2432 -0
- data/lib/google/cloud/bigquery/extract_job.rb +368 -0
- data/lib/google/cloud/bigquery/insert_response.rb +180 -0
- data/lib/google/cloud/bigquery/job.rb +657 -0
- data/lib/google/cloud/bigquery/job/list.rb +162 -0
- data/lib/google/cloud/bigquery/load_job.rb +1704 -0
- data/lib/google/cloud/bigquery/model.rb +740 -0
- data/lib/google/cloud/bigquery/model/list.rb +164 -0
- data/lib/google/cloud/bigquery/project.rb +1655 -0
- data/lib/google/cloud/bigquery/project/list.rb +161 -0
- data/lib/google/cloud/bigquery/query_job.rb +1695 -0
- data/lib/google/cloud/bigquery/routine.rb +1108 -0
- data/lib/google/cloud/bigquery/routine/list.rb +165 -0
- data/lib/google/cloud/bigquery/schema.rb +564 -0
- data/lib/google/cloud/bigquery/schema/field.rb +668 -0
- data/lib/google/cloud/bigquery/service.rb +589 -0
- data/lib/google/cloud/bigquery/standard_sql.rb +495 -0
- data/lib/google/cloud/bigquery/table.rb +3340 -0
- data/lib/google/cloud/bigquery/table/async_inserter.rb +520 -0
- data/lib/google/cloud/bigquery/table/list.rb +172 -0
- data/lib/google/cloud/bigquery/time.rb +65 -0
- data/lib/google/cloud/bigquery/version.rb +22 -0
- metadata +297 -0
@@ -0,0 +1,2845 @@
|
|
1
|
+
# Copyright 2015 Google LLC
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "json"
|
17
|
+
require "google/cloud/errors"
|
18
|
+
require "google/cloud/bigquery/service"
|
19
|
+
require "google/cloud/bigquery/table"
|
20
|
+
require "google/cloud/bigquery/model"
|
21
|
+
require "google/cloud/bigquery/routine"
|
22
|
+
require "google/cloud/bigquery/external"
|
23
|
+
require "google/cloud/bigquery/dataset/list"
|
24
|
+
require "google/cloud/bigquery/dataset/access"
|
25
|
+
require "google/cloud/bigquery/convert"
|
26
|
+
require "google/apis/bigquery_v2"
|
27
|
+
|
28
|
+
module Google
|
29
|
+
module Cloud
|
30
|
+
module Bigquery
|
31
|
+
##
|
32
|
+
# # Dataset
|
33
|
+
#
|
34
|
+
# Represents a Dataset. A dataset is a grouping mechanism that holds zero
|
35
|
+
# or more tables. Datasets are the lowest level unit of access control;
|
36
|
+
# you cannot control access at the table level. A dataset is contained
|
37
|
+
# within a specific project.
|
38
|
+
#
|
39
|
+
# @example
|
40
|
+
# require "google/cloud/bigquery"
|
41
|
+
#
|
42
|
+
# bigquery = Google::Cloud::Bigquery.new
|
43
|
+
#
|
44
|
+
# dataset = bigquery.create_dataset "my_dataset",
|
45
|
+
# name: "My Dataset",
|
46
|
+
# description: "This is my Dataset"
|
47
|
+
#
|
48
|
+
class Dataset
|
49
|
+
##
|
50
|
+
# @private The Connection object.
|
51
|
+
attr_accessor :service
|
52
|
+
|
53
|
+
##
|
54
|
+
# @private The Google API Client object.
|
55
|
+
attr_accessor :gapi
|
56
|
+
|
57
|
+
##
|
58
|
+
# @private A Google API Client Dataset Reference object.
|
59
|
+
attr_reader :reference
|
60
|
+
|
61
|
+
##
|
62
|
+
# @private Create an empty Dataset object.
|
63
|
+
def initialize
|
64
|
+
@service = nil
|
65
|
+
@gapi = nil
|
66
|
+
@reference = nil
|
67
|
+
end
|
68
|
+
|
69
|
+
##
|
70
|
+
# A unique ID for this dataset, without the project name.
|
71
|
+
#
|
72
|
+
# @return [String] The ID must contain only letters (a-z, A-Z), numbers
|
73
|
+
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
74
|
+
#
|
75
|
+
# @!group Attributes
|
76
|
+
#
|
77
|
+
def dataset_id
|
78
|
+
return reference.dataset_id if reference?
|
79
|
+
@gapi.dataset_reference.dataset_id
|
80
|
+
end
|
81
|
+
|
82
|
+
##
|
83
|
+
# The ID of the project containing this dataset.
|
84
|
+
#
|
85
|
+
# @return [String] The project ID.
|
86
|
+
#
|
87
|
+
# @!group Attributes
|
88
|
+
#
|
89
|
+
def project_id
|
90
|
+
return reference.project_id if reference?
|
91
|
+
@gapi.dataset_reference.project_id
|
92
|
+
end
|
93
|
+
|
94
|
+
##
|
95
|
+
# @private
|
96
|
+
# The gapi fragment containing the Project ID and Dataset ID as a
|
97
|
+
# camel-cased hash.
|
98
|
+
def dataset_ref
|
99
|
+
dataset_ref = reference? ? reference : @gapi.dataset_reference
|
100
|
+
dataset_ref = dataset_ref.to_h if dataset_ref.respond_to? :to_h
|
101
|
+
dataset_ref
|
102
|
+
end
|
103
|
+
|
104
|
+
##
|
105
|
+
# A descriptive name for the dataset.
|
106
|
+
#
|
107
|
+
# @return [String, nil] The friendly name, or `nil` if the object is
|
108
|
+
# a reference (see {#reference?}).
|
109
|
+
#
|
110
|
+
# @!group Attributes
|
111
|
+
#
|
112
|
+
def name
|
113
|
+
return nil if reference?
|
114
|
+
@gapi.friendly_name
|
115
|
+
end
|
116
|
+
|
117
|
+
##
|
118
|
+
# Updates the descriptive name for the dataset.
|
119
|
+
#
|
120
|
+
# If the dataset is not a full resource representation (see
|
121
|
+
# {#resource_full?}), the full representation will be retrieved before
|
122
|
+
# the update to comply with ETag-based optimistic concurrency control.
|
123
|
+
#
|
124
|
+
# @param [String] new_name The new friendly name, or `nil` if the object
|
125
|
+
# is a reference (see {#reference?}).
|
126
|
+
#
|
127
|
+
# @!group Attributes
|
128
|
+
#
|
129
|
+
def name= new_name
|
130
|
+
reload! unless resource_full?
|
131
|
+
@gapi.update! friendly_name: new_name
|
132
|
+
patch_gapi! :friendly_name
|
133
|
+
end
|
134
|
+
|
135
|
+
##
|
136
|
+
# The ETag hash of the dataset.
|
137
|
+
#
|
138
|
+
# @return [String, nil] The ETag hash, or `nil` if the object is a
|
139
|
+
# reference (see {#reference?}).
|
140
|
+
#
|
141
|
+
# @!group Attributes
|
142
|
+
#
|
143
|
+
def etag
|
144
|
+
return nil if reference?
|
145
|
+
ensure_full_data!
|
146
|
+
@gapi.etag
|
147
|
+
end
|
148
|
+
|
149
|
+
##
|
150
|
+
# A URL that can be used to access the dataset using the REST API.
|
151
|
+
#
|
152
|
+
# @return [String, nil] A REST URL for the resource, or `nil` if the
|
153
|
+
# object is a reference (see {#reference?}).
|
154
|
+
#
|
155
|
+
# @!group Attributes
|
156
|
+
#
|
157
|
+
def api_url
|
158
|
+
return nil if reference?
|
159
|
+
ensure_full_data!
|
160
|
+
@gapi.self_link
|
161
|
+
end
|
162
|
+
|
163
|
+
##
|
164
|
+
# A user-friendly description of the dataset.
|
165
|
+
#
|
166
|
+
# @return [String, nil] The description, or `nil` if the object is a
|
167
|
+
# reference (see {#reference?}).
|
168
|
+
#
|
169
|
+
# @!group Attributes
|
170
|
+
#
|
171
|
+
def description
|
172
|
+
return nil if reference?
|
173
|
+
ensure_full_data!
|
174
|
+
@gapi.description
|
175
|
+
end
|
176
|
+
|
177
|
+
##
|
178
|
+
# Updates the user-friendly description of the dataset.
|
179
|
+
#
|
180
|
+
# If the dataset is not a full resource representation (see
|
181
|
+
# {#resource_full?}), the full representation will be retrieved before
|
182
|
+
# the update to comply with ETag-based optimistic concurrency control.
|
183
|
+
#
|
184
|
+
# @param [String] new_description The new description for the dataset.
|
185
|
+
#
|
186
|
+
# @!group Attributes
|
187
|
+
#
|
188
|
+
def description= new_description
|
189
|
+
reload! unless resource_full?
|
190
|
+
@gapi.update! description: new_description
|
191
|
+
patch_gapi! :description
|
192
|
+
end
|
193
|
+
|
194
|
+
##
|
195
|
+
# The default lifetime of all tables in the dataset, in milliseconds.
|
196
|
+
#
|
197
|
+
# @return [Integer, nil] The default table expiration in milliseconds,
|
198
|
+
# or `nil` if not present or the object is a reference (see
|
199
|
+
# {#reference?}).
|
200
|
+
#
|
201
|
+
# @!group Attributes
|
202
|
+
#
|
203
|
+
def default_expiration
|
204
|
+
return nil if reference?
|
205
|
+
ensure_full_data!
|
206
|
+
begin
|
207
|
+
Integer @gapi.default_table_expiration_ms
|
208
|
+
rescue StandardError
|
209
|
+
nil
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
##
|
214
|
+
# Updates the default lifetime of all tables in the dataset, in
|
215
|
+
# milliseconds.
|
216
|
+
#
|
217
|
+
# If the dataset is not a full resource representation (see
|
218
|
+
# {#resource_full?}), the full representation will be retrieved before
|
219
|
+
# the update to comply with ETag-based optimistic concurrency control.
|
220
|
+
#
|
221
|
+
# @param [Integer] new_default_expiration The new default table
|
222
|
+
# expiration in milliseconds.
|
223
|
+
#
|
224
|
+
# @!group Attributes
|
225
|
+
#
|
226
|
+
def default_expiration= new_default_expiration
|
227
|
+
reload! unless resource_full?
|
228
|
+
@gapi.update! default_table_expiration_ms: new_default_expiration
|
229
|
+
patch_gapi! :default_table_expiration_ms
|
230
|
+
end
|
231
|
+
|
232
|
+
##
|
233
|
+
# The time when this dataset was created.
|
234
|
+
#
|
235
|
+
# @return [Time, nil] The creation time, or `nil` if not present or the
|
236
|
+
# object is a reference (see {#reference?}).
|
237
|
+
#
|
238
|
+
# @!group Attributes
|
239
|
+
#
|
240
|
+
def created_at
|
241
|
+
return nil if reference?
|
242
|
+
ensure_full_data!
|
243
|
+
Convert.millis_to_time @gapi.creation_time
|
244
|
+
end
|
245
|
+
|
246
|
+
##
|
247
|
+
# The date when this dataset or any of its tables was last modified.
|
248
|
+
#
|
249
|
+
# @return [Time, nil] The last modified time, or `nil` if not present or
|
250
|
+
# the object is a reference (see {#reference?}).
|
251
|
+
#
|
252
|
+
# @!group Attributes
|
253
|
+
#
|
254
|
+
def modified_at
|
255
|
+
return nil if reference?
|
256
|
+
ensure_full_data!
|
257
|
+
Convert.millis_to_time @gapi.last_modified_time
|
258
|
+
end
|
259
|
+
|
260
|
+
##
|
261
|
+
# The geographic location where the dataset should reside. Possible
|
262
|
+
# values include `EU` and `US`. The default value is `US`.
|
263
|
+
#
|
264
|
+
# @return [String, nil] The geographic location, or `nil` if the object
|
265
|
+
# is a reference (see {#reference?}).
|
266
|
+
#
|
267
|
+
# @!group Attributes
|
268
|
+
#
|
269
|
+
def location
|
270
|
+
return nil if reference?
|
271
|
+
ensure_full_data!
|
272
|
+
@gapi.location
|
273
|
+
end
|
274
|
+
|
275
|
+
##
|
276
|
+
# A hash of user-provided labels associated with this dataset. Labels
|
277
|
+
# are used to organize and group datasets. See [Using
|
278
|
+
# Labels](https://cloud.google.com/bigquery/docs/labels).
|
279
|
+
#
|
280
|
+
# The returned hash is frozen and changes are not allowed. Use
|
281
|
+
# {#labels=} to replace the entire hash.
|
282
|
+
#
|
283
|
+
# @return [Hash<String, String>, nil] A hash containing key/value pairs,
|
284
|
+
# or `nil` if the object is a reference (see {#reference?}).
|
285
|
+
#
|
286
|
+
# @example
|
287
|
+
# require "google/cloud/bigquery"
|
288
|
+
#
|
289
|
+
# bigquery = Google::Cloud::Bigquery.new
|
290
|
+
# dataset = bigquery.dataset "my_dataset"
|
291
|
+
#
|
292
|
+
# labels = dataset.labels
|
293
|
+
# labels["department"] #=> "shipping"
|
294
|
+
#
|
295
|
+
# @!group Attributes
|
296
|
+
#
|
297
|
+
def labels
|
298
|
+
return nil if reference?
|
299
|
+
m = @gapi.labels
|
300
|
+
m = m.to_h if m.respond_to? :to_h
|
301
|
+
m.dup.freeze
|
302
|
+
end
|
303
|
+
|
304
|
+
##
|
305
|
+
# Updates the hash of user-provided labels associated with this dataset.
|
306
|
+
# Labels are used to organize and group datasets. See [Using
|
307
|
+
# Labels](https://cloud.google.com/bigquery/docs/labels).
|
308
|
+
#
|
309
|
+
# If the dataset is not a full resource representation (see
|
310
|
+
# {#resource_full?}), the full representation will be retrieved before
|
311
|
+
# the update to comply with ETag-based optimistic concurrency control.
|
312
|
+
#
|
313
|
+
# @param [Hash<String, String>] labels A hash containing key/value
|
314
|
+
# pairs.
|
315
|
+
#
|
316
|
+
# * Label keys and values can be no longer than 63 characters.
|
317
|
+
# * Label keys and values can contain only lowercase letters, numbers,
|
318
|
+
# underscores, hyphens, and international characters.
|
319
|
+
# * Label keys and values cannot exceed 128 bytes in size.
|
320
|
+
# * Label keys must begin with a letter.
|
321
|
+
# * Label keys must be unique within a dataset.
|
322
|
+
#
|
323
|
+
# @example
|
324
|
+
# require "google/cloud/bigquery"
|
325
|
+
#
|
326
|
+
# bigquery = Google::Cloud::Bigquery.new
|
327
|
+
# dataset = bigquery.dataset "my_dataset"
|
328
|
+
#
|
329
|
+
# dataset.labels = { "department" => "shipping" }
|
330
|
+
#
|
331
|
+
# @!group Attributes
|
332
|
+
#
|
333
|
+
def labels= labels
|
334
|
+
reload! unless resource_full?
|
335
|
+
@gapi.labels = labels
|
336
|
+
patch_gapi! :labels
|
337
|
+
end
|
338
|
+
|
339
|
+
##
|
340
|
+
# The {EncryptionConfiguration} object that represents the default
|
341
|
+
# encryption method for all tables and models in the dataset. Once this
|
342
|
+
# property is set, all newly-created partitioned tables and models in
|
343
|
+
# the dataset will have their encryption set to this value, unless table
|
344
|
+
# creation request (or query) overrides it.
|
345
|
+
#
|
346
|
+
# Present only if this dataset is using custom default encryption.
|
347
|
+
#
|
348
|
+
# @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
|
349
|
+
# Protecting Data with Cloud KMS Keys
|
350
|
+
#
|
351
|
+
# @return [EncryptionConfiguration, nil] The default encryption
|
352
|
+
# configuration.
|
353
|
+
#
|
354
|
+
# @!group Attributes
|
355
|
+
#
|
356
|
+
# @example
|
357
|
+
# require "google/cloud/bigquery"
|
358
|
+
#
|
359
|
+
# bigquery = Google::Cloud::Bigquery.new
|
360
|
+
# dataset = bigquery.dataset "my_dataset"
|
361
|
+
#
|
362
|
+
# encrypt_config = dataset.default_encryption
|
363
|
+
#
|
364
|
+
# @!group Attributes
|
365
|
+
#
|
366
|
+
def default_encryption
|
367
|
+
return nil if reference?
|
368
|
+
ensure_full_data!
|
369
|
+
return nil if @gapi.default_encryption_configuration.nil?
|
370
|
+
EncryptionConfiguration.from_gapi(@gapi.default_encryption_configuration).freeze
|
371
|
+
end
|
372
|
+
|
373
|
+
##
|
374
|
+
# Set the {EncryptionConfiguration} object that represents the default
|
375
|
+
# encryption method for all tables and models in the dataset. Once this
|
376
|
+
# property is set, all newly-created partitioned tables and models in
|
377
|
+
# the dataset will have their encryption set to this value, unless table
|
378
|
+
# creation request (or query) overrides it.
|
379
|
+
#
|
380
|
+
# If the dataset is not a full resource representation (see
|
381
|
+
# {#resource_full?}), the full representation will be retrieved before
|
382
|
+
# the update to comply with ETag-based optimistic concurrency control.
|
383
|
+
#
|
384
|
+
# @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
|
385
|
+
# Protecting Data with Cloud KMS Keys
|
386
|
+
#
|
387
|
+
# @param [EncryptionConfiguration] value The new encryption config.
|
388
|
+
#
|
389
|
+
# @example
|
390
|
+
# require "google/cloud/bigquery"
|
391
|
+
#
|
392
|
+
# bigquery = Google::Cloud::Bigquery.new
|
393
|
+
# dataset = bigquery.dataset "my_dataset"
|
394
|
+
#
|
395
|
+
# key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
|
396
|
+
# encrypt_config = bigquery.encryption kms_key: key_name
|
397
|
+
#
|
398
|
+
# dataset.default_encryption = encrypt_config
|
399
|
+
#
|
400
|
+
# @!group Attributes
|
401
|
+
#
|
402
|
+
def default_encryption= value
|
403
|
+
ensure_full_data!
|
404
|
+
@gapi.default_encryption_configuration = value.to_gapi
|
405
|
+
patch_gapi! :default_encryption_configuration
|
406
|
+
end
|
407
|
+
|
408
|
+
##
|
409
|
+
# Retrieves the access rules for a Dataset. The rules can be updated
|
410
|
+
# when passing a block, see {Dataset::Access} for all the methods
|
411
|
+
# available.
|
412
|
+
#
|
413
|
+
# If the dataset is not a full resource representation (see
|
414
|
+
# {#resource_full?}), the full representation will be retrieved before
|
415
|
+
# the update to comply with ETag-based optimistic concurrency control.
|
416
|
+
#
|
417
|
+
# @see https://cloud.google.com/bigquery/access-control BigQuery Access
|
418
|
+
# Control
|
419
|
+
#
|
420
|
+
# @yield [access] a block for setting rules
|
421
|
+
# @yieldparam [Dataset::Access] access the object accepting rules
|
422
|
+
#
|
423
|
+
# @return [Google::Cloud::Bigquery::Dataset::Access] The access object.
|
424
|
+
#
|
425
|
+
# @example
|
426
|
+
# require "google/cloud/bigquery"
|
427
|
+
#
|
428
|
+
# bigquery = Google::Cloud::Bigquery.new
|
429
|
+
# dataset = bigquery.dataset "my_dataset"
|
430
|
+
#
|
431
|
+
# access = dataset.access
|
432
|
+
# access.writer_user? "reader@example.com" #=> false
|
433
|
+
#
|
434
|
+
# @example Manage the access rules by passing a block:
|
435
|
+
# require "google/cloud/bigquery"
|
436
|
+
#
|
437
|
+
# bigquery = Google::Cloud::Bigquery.new
|
438
|
+
# dataset = bigquery.dataset "my_dataset"
|
439
|
+
#
|
440
|
+
# dataset.access do |access|
|
441
|
+
# access.add_owner_group "owners@example.com"
|
442
|
+
# access.add_writer_user "writer@example.com"
|
443
|
+
# access.remove_writer_user "readers@example.com"
|
444
|
+
# access.add_reader_special :all
|
445
|
+
# access.add_reader_view other_dataset_view_object
|
446
|
+
# end
|
447
|
+
#
|
448
|
+
def access
|
449
|
+
ensure_full_data!
|
450
|
+
reload! unless resource_full?
|
451
|
+
access_builder = Access.from_gapi @gapi
|
452
|
+
if block_given?
|
453
|
+
yield access_builder
|
454
|
+
if access_builder.changed?
|
455
|
+
@gapi.update! access: access_builder.to_gapi
|
456
|
+
patch_gapi! :access
|
457
|
+
end
|
458
|
+
end
|
459
|
+
access_builder.freeze
|
460
|
+
end
|
461
|
+
|
462
|
+
##
|
463
|
+
# Permanently deletes the dataset. The dataset must be empty before it
|
464
|
+
# can be deleted unless the `force` option is set to `true`.
|
465
|
+
#
|
466
|
+
# @param [Boolean] force If `true`, delete all the tables in the
|
467
|
+
# dataset. If `false` and the dataset contains tables, the request
|
468
|
+
# will fail. Default is `false`.
|
469
|
+
#
|
470
|
+
# @return [Boolean] Returns `true` if the dataset was deleted.
|
471
|
+
#
|
472
|
+
# @example
|
473
|
+
# require "google/cloud/bigquery"
|
474
|
+
#
|
475
|
+
# bigquery = Google::Cloud::Bigquery.new
|
476
|
+
# dataset = bigquery.dataset "my_dataset"
|
477
|
+
#
|
478
|
+
# dataset.delete
|
479
|
+
#
|
480
|
+
# @!group Lifecycle
|
481
|
+
#
|
482
|
+
def delete force: nil
|
483
|
+
ensure_service!
|
484
|
+
service.delete_dataset dataset_id, force
|
485
|
+
# Set flag for #exists?
|
486
|
+
@exists = false
|
487
|
+
true
|
488
|
+
end
|
489
|
+
|
490
|
+
##
|
491
|
+
# Creates a new table. If you are adapting existing code that was
|
492
|
+
# written for the [Rest API
|
493
|
+
# ](https://cloud.google.com/bigquery/docs/reference/v2/tables#resource),
|
494
|
+
# you can pass the table's schema as a hash (see example.)
|
495
|
+
#
|
496
|
+
# @param [String] table_id The ID of the table. The ID must contain only
|
497
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum
|
498
|
+
# length is 1,024 characters.
|
499
|
+
# @param [String] name A descriptive name for the table.
|
500
|
+
# @param [String] description A user-friendly description of the table.
|
501
|
+
# @yield [table] a block for setting the table
|
502
|
+
# @yieldparam [Google::Cloud::Bigquery::Table::Updater] table An updater
|
503
|
+
# to set additional properties on the table in the API request to
|
504
|
+
# create it.
|
505
|
+
#
|
506
|
+
# @return [Google::Cloud::Bigquery::Table] A new table object.
|
507
|
+
#
|
508
|
+
# @example
|
509
|
+
# require "google/cloud/bigquery"
|
510
|
+
#
|
511
|
+
# bigquery = Google::Cloud::Bigquery.new
|
512
|
+
# dataset = bigquery.dataset "my_dataset"
|
513
|
+
#
|
514
|
+
# table = dataset.create_table "my_table"
|
515
|
+
#
|
516
|
+
# @example You can also pass name and description options.
|
517
|
+
# require "google/cloud/bigquery"
|
518
|
+
#
|
519
|
+
# bigquery = Google::Cloud::Bigquery.new
|
520
|
+
# dataset = bigquery.dataset "my_dataset"
|
521
|
+
#
|
522
|
+
# table = dataset.create_table "my_table",
|
523
|
+
# name: "My Table",
|
524
|
+
# description: "A description of table."
|
525
|
+
#
|
526
|
+
# @example Or the table's schema can be configured with the block.
|
527
|
+
# require "google/cloud/bigquery"
|
528
|
+
#
|
529
|
+
# bigquery = Google::Cloud::Bigquery.new
|
530
|
+
# dataset = bigquery.dataset "my_dataset"
|
531
|
+
#
|
532
|
+
# table = dataset.create_table "my_table" do |t|
|
533
|
+
# t.schema.string "first_name", mode: :required
|
534
|
+
# t.schema.record "cities_lived", mode: :required do |s|
|
535
|
+
# s.string "place", mode: :required
|
536
|
+
# s.integer "number_of_years", mode: :required
|
537
|
+
# end
|
538
|
+
# end
|
539
|
+
#
|
540
|
+
# @example You can define the schema using a nested block.
|
541
|
+
# require "google/cloud/bigquery"
|
542
|
+
#
|
543
|
+
# bigquery = Google::Cloud::Bigquery.new
|
544
|
+
# dataset = bigquery.dataset "my_dataset"
|
545
|
+
#
|
546
|
+
# table = dataset.create_table "my_table" do |t|
|
547
|
+
# t.name = "My Table"
|
548
|
+
# t.description = "A description of my table."
|
549
|
+
# t.schema do |s|
|
550
|
+
# s.string "first_name", mode: :required
|
551
|
+
# s.record "cities_lived", mode: :repeated do |r|
|
552
|
+
# r.string "place", mode: :required
|
553
|
+
# r.integer "number_of_years", mode: :required
|
554
|
+
# end
|
555
|
+
# end
|
556
|
+
# end
|
557
|
+
#
|
558
|
+
# @example With time partitioning and clustering.
|
559
|
+
# require "google/cloud/bigquery"
|
560
|
+
#
|
561
|
+
# bigquery = Google::Cloud::Bigquery.new
|
562
|
+
# dataset = bigquery.dataset "my_dataset"
|
563
|
+
#
|
564
|
+
# table = dataset.create_table "my_table" do |t|
|
565
|
+
# t.schema do |schema|
|
566
|
+
# schema.timestamp "dob", mode: :required
|
567
|
+
# schema.string "first_name", mode: :required
|
568
|
+
# schema.string "last_name", mode: :required
|
569
|
+
# end
|
570
|
+
# t.time_partitioning_type = "DAY"
|
571
|
+
# t.time_partitioning_field = "dob"
|
572
|
+
# t.clustering_fields = ["last_name", "first_name"]
|
573
|
+
# end
|
574
|
+
#
|
575
|
+
# @example With range partitioning.
|
576
|
+
# require "google/cloud/bigquery"
|
577
|
+
#
|
578
|
+
# bigquery = Google::Cloud::Bigquery.new
|
579
|
+
# dataset = bigquery.dataset "my_dataset"
|
580
|
+
#
|
581
|
+
# table = dataset.create_table "my_table" do |t|
|
582
|
+
# t.schema do |schema|
|
583
|
+
# schema.integer "my_table_id", mode: :required
|
584
|
+
# schema.string "my_table_data", mode: :required
|
585
|
+
# end
|
586
|
+
# t.range_partitioning_field = "my_table_id"
|
587
|
+
# t.range_partitioning_start = 0
|
588
|
+
# t.range_partitioning_interval = 10
|
589
|
+
# t.range_partitioning_end = 100
|
590
|
+
# end
|
591
|
+
#
|
592
|
+
# @!group Table
|
593
|
+
#
|
594
|
+
def create_table table_id, name: nil, description: nil
|
595
|
+
ensure_service!
|
596
|
+
new_tb = Google::Apis::BigqueryV2::Table.new(
|
597
|
+
table_reference: Google::Apis::BigqueryV2::TableReference.new(
|
598
|
+
project_id: project_id, dataset_id: dataset_id,
|
599
|
+
table_id: table_id
|
600
|
+
)
|
601
|
+
)
|
602
|
+
updater = Table::Updater.new(new_tb).tap do |tb|
|
603
|
+
tb.name = name unless name.nil?
|
604
|
+
tb.description = description unless description.nil?
|
605
|
+
end
|
606
|
+
|
607
|
+
yield updater if block_given?
|
608
|
+
|
609
|
+
gapi = service.insert_table dataset_id, updater.to_gapi
|
610
|
+
Table.from_gapi gapi, service
|
611
|
+
end
|
612
|
+
|
613
|
+
##
|
614
|
+
# Creates a new [view](https://cloud.google.com/bigquery/docs/views)
|
615
|
+
# table, which is a virtual table defined by the given SQL query.
|
616
|
+
#
|
617
|
+
# BigQuery's views are logical views, not materialized views, which
|
618
|
+
# means that the query that defines the view is re-executed every time
|
619
|
+
# the view is queried. Queries are billed according to the total amount
|
620
|
+
# of data in all table fields referenced directly or indirectly by the
|
621
|
+
# top-level query. (See {Table#view?} and {Table#query}.)
|
622
|
+
#
|
623
|
+
# @param [String] table_id The ID of the view table. The ID must contain
|
624
|
+
# only letters (a-z, A-Z), numbers (0-9), or underscores (_). The
|
625
|
+
# maximum length is 1,024 characters.
|
626
|
+
# @param [String] query The query that BigQuery executes when the view
|
627
|
+
# is referenced.
|
628
|
+
# @param [String] name A descriptive name for the table.
|
629
|
+
# @param [String] description A user-friendly description of the table.
|
630
|
+
# @param [Boolean] standard_sql Specifies whether to use BigQuery's
|
631
|
+
# [standard
|
632
|
+
# SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
|
633
|
+
# dialect. Optional. The default value is true.
|
634
|
+
# @param [Boolean] legacy_sql Specifies whether to use BigQuery's
|
635
|
+
# [legacy
|
636
|
+
# SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
|
637
|
+
# dialect. Optional. The default value is false.
|
638
|
+
# @param [Array<String>, String] udfs User-defined function resources
|
639
|
+
# used in a legacy SQL query. May be either a code resource to load from
|
640
|
+
# a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
641
|
+
# that contains code for a user-defined function (UDF). Providing an
|
642
|
+
# inline code resource is equivalent to providing a URI for a file
|
643
|
+
# containing the same code.
|
644
|
+
#
|
645
|
+
# This parameter is used for defining User Defined Function (UDF)
|
646
|
+
# resources only when using legacy SQL. Users of standard SQL should
|
647
|
+
# leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
|
648
|
+
# Routines API to define UDF resources.
|
649
|
+
#
|
650
|
+
# For additional information on migrating, see: [Migrating to
|
651
|
+
# standard SQL - Differences in user-defined JavaScript
|
652
|
+
# functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
|
653
|
+
#
|
654
|
+
# @return [Google::Cloud::Bigquery::Table] A new table object.
|
655
|
+
#
|
656
|
+
# @example
|
657
|
+
# require "google/cloud/bigquery"
|
658
|
+
#
|
659
|
+
# bigquery = Google::Cloud::Bigquery.new
|
660
|
+
# dataset = bigquery.dataset "my_dataset"
|
661
|
+
#
|
662
|
+
# view = dataset.create_view "my_view",
|
663
|
+
# "SELECT name, age FROM proj.dataset.users"
|
664
|
+
#
|
665
|
+
# @example A name and description can be provided:
|
666
|
+
# require "google/cloud/bigquery"
|
667
|
+
#
|
668
|
+
# bigquery = Google::Cloud::Bigquery.new
|
669
|
+
# dataset = bigquery.dataset "my_dataset"
|
670
|
+
#
|
671
|
+
# view = dataset.create_view "my_view",
|
672
|
+
# "SELECT name, age FROM proj.dataset.users",
|
673
|
+
# name: "My View", description: "This is my view"
|
674
|
+
#
|
675
|
+
# @!group Table
|
676
|
+
#
|
677
|
+
def create_view table_id, query, name: nil, description: nil,
|
678
|
+
standard_sql: nil, legacy_sql: nil, udfs: nil
|
679
|
+
use_legacy_sql = Convert.resolve_legacy_sql standard_sql, legacy_sql
|
680
|
+
new_view_opts = {
|
681
|
+
table_reference: Google::Apis::BigqueryV2::TableReference.new(
|
682
|
+
project_id: project_id,
|
683
|
+
dataset_id: dataset_id,
|
684
|
+
table_id: table_id
|
685
|
+
),
|
686
|
+
friendly_name: name,
|
687
|
+
description: description,
|
688
|
+
view: Google::Apis::BigqueryV2::ViewDefinition.new(
|
689
|
+
query: query,
|
690
|
+
use_legacy_sql: use_legacy_sql,
|
691
|
+
user_defined_function_resources: udfs_gapi(udfs)
|
692
|
+
)
|
693
|
+
}.delete_if { |_, v| v.nil? }
|
694
|
+
new_view = Google::Apis::BigqueryV2::Table.new new_view_opts
|
695
|
+
|
696
|
+
gapi = service.insert_table dataset_id, new_view
|
697
|
+
Table.from_gapi gapi, service
|
698
|
+
end
|
699
|
+
|
700
|
+
##
|
701
|
+
# Retrieves an existing table by ID.
|
702
|
+
#
|
703
|
+
# @param [String] table_id The ID of a table.
|
704
|
+
# @param [Boolean] skip_lookup Optionally create just a local reference
|
705
|
+
# object without verifying that the resource exists on the BigQuery
|
706
|
+
# service. Calls made on this object will raise errors if the resource
|
707
|
+
# does not exist. Default is `false`. Optional.
|
708
|
+
#
|
709
|
+
# @return [Google::Cloud::Bigquery::Table, nil] Returns `nil` if the
|
710
|
+
# table does not exist.
|
711
|
+
#
|
712
|
+
# @example
|
713
|
+
# require "google/cloud/bigquery"
|
714
|
+
#
|
715
|
+
# bigquery = Google::Cloud::Bigquery.new
|
716
|
+
# dataset = bigquery.dataset "my_dataset"
|
717
|
+
#
|
718
|
+
# table = dataset.table "my_table"
|
719
|
+
# puts table.name
|
720
|
+
#
|
721
|
+
# @example Avoid retrieving the table resource with `skip_lookup`:
|
722
|
+
# require "google/cloud/bigquery"
|
723
|
+
#
|
724
|
+
# bigquery = Google::Cloud::Bigquery.new
|
725
|
+
#
|
726
|
+
# dataset = bigquery.dataset "my_dataset"
|
727
|
+
#
|
728
|
+
# table = dataset.table "my_table", skip_lookup: true
|
729
|
+
#
|
730
|
+
# @!group Table
|
731
|
+
#
|
732
|
+
def table table_id, skip_lookup: nil
|
733
|
+
ensure_service!
|
734
|
+
return Table.new_reference project_id, dataset_id, table_id, service if skip_lookup
|
735
|
+
gapi = service.get_table dataset_id, table_id
|
736
|
+
Table.from_gapi gapi, service
|
737
|
+
rescue Google::Cloud::NotFoundError
|
738
|
+
nil
|
739
|
+
end
|
740
|
+
|
741
|
+
##
|
742
|
+
# Retrieves the list of tables belonging to the dataset.
|
743
|
+
#
|
744
|
+
# @param [String] token A previously-returned page token representing
|
745
|
+
# part of the larger set of results to view.
|
746
|
+
# @param [Integer] max Maximum number of tables to return.
|
747
|
+
#
|
748
|
+
# @return [Array<Google::Cloud::Bigquery::Table>] An array of tables
|
749
|
+
# (See {Google::Cloud::Bigquery::Table::List})
|
750
|
+
#
|
751
|
+
# @example
|
752
|
+
# require "google/cloud/bigquery"
|
753
|
+
#
|
754
|
+
# bigquery = Google::Cloud::Bigquery.new
|
755
|
+
# dataset = bigquery.dataset "my_dataset"
|
756
|
+
#
|
757
|
+
# tables = dataset.tables
|
758
|
+
# tables.each do |table|
|
759
|
+
# puts table.name
|
760
|
+
# end
|
761
|
+
#
|
762
|
+
# @example Retrieve all tables: (See {Table::List#all})
|
763
|
+
# require "google/cloud/bigquery"
|
764
|
+
#
|
765
|
+
# bigquery = Google::Cloud::Bigquery.new
|
766
|
+
# dataset = bigquery.dataset "my_dataset"
|
767
|
+
#
|
768
|
+
# tables = dataset.tables
|
769
|
+
# tables.all do |table|
|
770
|
+
# puts table.name
|
771
|
+
# end
|
772
|
+
#
|
773
|
+
# @!group Table
|
774
|
+
#
|
775
|
+
def tables token: nil, max: nil
|
776
|
+
ensure_service!
|
777
|
+
gapi = service.list_tables dataset_id, token: token, max: max
|
778
|
+
Table::List.from_gapi gapi, service, dataset_id, max
|
779
|
+
end
|
780
|
+
|
781
|
+
##
|
782
|
+
# Retrieves an existing model by ID.
|
783
|
+
#
|
784
|
+
# @param [String] model_id The ID of a model.
|
785
|
+
# @param [Boolean] skip_lookup Optionally create just a local reference
|
786
|
+
# object without verifying that the resource exists on the BigQuery
|
787
|
+
# service. Calls made on this object will raise errors if the resource
|
788
|
+
# does not exist. Default is `false`. Optional.
|
789
|
+
#
|
790
|
+
# @return [Google::Cloud::Bigquery::Model, nil] Returns `nil` if the
|
791
|
+
# model does not exist.
|
792
|
+
#
|
793
|
+
# @example
|
794
|
+
# require "google/cloud/bigquery"
|
795
|
+
#
|
796
|
+
# bigquery = Google::Cloud::Bigquery.new
|
797
|
+
# dataset = bigquery.dataset "my_dataset"
|
798
|
+
#
|
799
|
+
# model = dataset.model "my_model"
|
800
|
+
# puts model.model_id
|
801
|
+
#
|
802
|
+
# @example Avoid retrieving the model resource with `skip_lookup`:
|
803
|
+
# require "google/cloud/bigquery"
|
804
|
+
#
|
805
|
+
# bigquery = Google::Cloud::Bigquery.new
|
806
|
+
#
|
807
|
+
# dataset = bigquery.dataset "my_dataset"
|
808
|
+
#
|
809
|
+
# model = dataset.model "my_model", skip_lookup: true
|
810
|
+
#
|
811
|
+
# @!group Model
|
812
|
+
#
|
813
|
+
def model model_id, skip_lookup: nil
|
814
|
+
ensure_service!
|
815
|
+
return Model.new_reference project_id, dataset_id, model_id, service if skip_lookup
|
816
|
+
gapi = service.get_model dataset_id, model_id
|
817
|
+
Model.from_gapi_json gapi, service
|
818
|
+
rescue Google::Cloud::NotFoundError
|
819
|
+
nil
|
820
|
+
end
|
821
|
+
|
822
|
+
##
|
823
|
+
# Retrieves the list of models belonging to the dataset.
|
824
|
+
#
|
825
|
+
# @param [String] token A previously-returned page token representing
|
826
|
+
# part of the larger set of results to view.
|
827
|
+
# @param [Integer] max Maximum number of models to return.
|
828
|
+
#
|
829
|
+
# @return [Array<Google::Cloud::Bigquery::Model>] An array of models
|
830
|
+
# (See {Google::Cloud::Bigquery::Model::List})
|
831
|
+
#
|
832
|
+
# @example
|
833
|
+
# require "google/cloud/bigquery"
|
834
|
+
#
|
835
|
+
# bigquery = Google::Cloud::Bigquery.new
|
836
|
+
# dataset = bigquery.dataset "my_dataset"
|
837
|
+
#
|
838
|
+
# models = dataset.models
|
839
|
+
# models.each do |model|
|
840
|
+
# puts model.model_id
|
841
|
+
# end
|
842
|
+
#
|
843
|
+
# @example Retrieve all models: (See {Model::List#all})
|
844
|
+
# require "google/cloud/bigquery"
|
845
|
+
#
|
846
|
+
# bigquery = Google::Cloud::Bigquery.new
|
847
|
+
# dataset = bigquery.dataset "my_dataset"
|
848
|
+
#
|
849
|
+
# models = dataset.models
|
850
|
+
# models.all do |model|
|
851
|
+
# puts model.model_id
|
852
|
+
# end
|
853
|
+
#
|
854
|
+
# @!group Model
|
855
|
+
#
|
856
|
+
def models token: nil, max: nil
|
857
|
+
ensure_service!
|
858
|
+
gapi = service.list_models dataset_id, token: token, max: max
|
859
|
+
Model::List.from_gapi gapi, service, dataset_id, max
|
860
|
+
end
|
861
|
+
|
862
|
+
##
|
863
|
+
# Creates a new routine. The following attributes may be set in the yielded block:
|
864
|
+
# {Routine::Updater#routine_type=}, {Routine::Updater#language=}, {Routine::Updater#arguments=},
|
865
|
+
# {Routine::Updater#return_type=}, {Routine::Updater#imported_libraries=}, {Routine::Updater#body=}, and
|
866
|
+
# {Routine::Updater#description=}.
|
867
|
+
#
|
868
|
+
# @param [String] routine_id The ID of the routine. The ID must contain only
|
869
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length
|
870
|
+
# is 256 characters.
|
871
|
+
# @yield [routine] A block for setting properties on the routine.
|
872
|
+
# @yieldparam [Google::Cloud::Bigquery::Routine::Updater] routine An updater to set additional properties on the
|
873
|
+
# routine.
|
874
|
+
#
|
875
|
+
# @return [Google::Cloud::Bigquery::Routine] A new routine object.
|
876
|
+
#
|
877
|
+
# @example
|
878
|
+
# require "google/cloud/bigquery"
|
879
|
+
#
|
880
|
+
# bigquery = Google::Cloud::Bigquery.new
|
881
|
+
# dataset = bigquery.dataset "my_dataset"
|
882
|
+
#
|
883
|
+
# routine = dataset.create_routine "my_routine" do |r|
|
884
|
+
# r.routine_type = "SCALAR_FUNCTION"
|
885
|
+
# r.language = "SQL"
|
886
|
+
# r.arguments = [
|
887
|
+
# Google::Cloud::Bigquery::Argument.new(name: "x", data_type: "INT64")
|
888
|
+
# ]
|
889
|
+
# r.body = "x * 3"
|
890
|
+
# r.description = "My routine description"
|
891
|
+
# end
|
892
|
+
#
|
893
|
+
# puts routine.routine_id
|
894
|
+
#
|
895
|
+
# @example Extended example:
|
896
|
+
# require "google/cloud/bigquery"
|
897
|
+
#
|
898
|
+
# bigquery = Google::Cloud::Bigquery.new
|
899
|
+
# dataset = bigquery.dataset "my_dataset"
|
900
|
+
# routine = dataset.create_routine "my_routine" do |r|
|
901
|
+
# r.routine_type = "SCALAR_FUNCTION"
|
902
|
+
# r.language = :SQL
|
903
|
+
# r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
|
904
|
+
# r.arguments = [
|
905
|
+
# Google::Cloud::Bigquery::Argument.new(
|
906
|
+
# name: "arr",
|
907
|
+
# argument_kind: "FIXED_TYPE",
|
908
|
+
# data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
909
|
+
# type_kind: "ARRAY",
|
910
|
+
# array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
911
|
+
# type_kind: "STRUCT",
|
912
|
+
# struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
|
913
|
+
# fields: [
|
914
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
915
|
+
# name: "name",
|
916
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
|
917
|
+
# ),
|
918
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
919
|
+
# name: "val",
|
920
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
|
921
|
+
# )
|
922
|
+
# ]
|
923
|
+
# )
|
924
|
+
# )
|
925
|
+
# )
|
926
|
+
# )
|
927
|
+
# ]
|
928
|
+
# end
|
929
|
+
#
|
930
|
+
# @!group Routine
|
931
|
+
#
|
932
|
+
def create_routine routine_id
|
933
|
+
ensure_service!
|
934
|
+
new_tb = Google::Apis::BigqueryV2::Routine.new(
|
935
|
+
routine_reference: Google::Apis::BigqueryV2::RoutineReference.new(
|
936
|
+
project_id: project_id, dataset_id: dataset_id, routine_id: routine_id
|
937
|
+
)
|
938
|
+
)
|
939
|
+
updater = Routine::Updater.new new_tb
|
940
|
+
|
941
|
+
yield updater if block_given?
|
942
|
+
|
943
|
+
gapi = service.insert_routine dataset_id, updater.to_gapi
|
944
|
+
Routine.from_gapi gapi, service
|
945
|
+
end
|
946
|
+
|
947
|
+
##
|
948
|
+
# Retrieves an existing routine by ID.
|
949
|
+
#
|
950
|
+
# @param [String] routine_id The ID of a routine.
|
951
|
+
# @param [Boolean] skip_lookup Optionally create just a local reference
|
952
|
+
# object without verifying that the resource exists on the BigQuery
|
953
|
+
# service. Calls made on this object will raise errors if the resource
|
954
|
+
# does not exist. Default is `false`. Optional.
|
955
|
+
#
|
956
|
+
# @return [Google::Cloud::Bigquery::Routine, nil] Returns `nil` if the
|
957
|
+
# routine does not exist.
|
958
|
+
#
|
959
|
+
# @example
|
960
|
+
# require "google/cloud/bigquery"
|
961
|
+
#
|
962
|
+
# bigquery = Google::Cloud::Bigquery.new
|
963
|
+
# dataset = bigquery.dataset "my_dataset"
|
964
|
+
#
|
965
|
+
# routine = dataset.routine "my_routine"
|
966
|
+
# puts routine.routine_id
|
967
|
+
#
|
968
|
+
# @example Avoid retrieving the routine resource with `skip_lookup`:
|
969
|
+
# require "google/cloud/bigquery"
|
970
|
+
#
|
971
|
+
# bigquery = Google::Cloud::Bigquery.new
|
972
|
+
#
|
973
|
+
# dataset = bigquery.dataset "my_dataset"
|
974
|
+
#
|
975
|
+
# routine = dataset.routine "my_routine", skip_lookup: true
|
976
|
+
#
|
977
|
+
# @!group Routine
|
978
|
+
#
|
979
|
+
def routine routine_id, skip_lookup: nil
|
980
|
+
ensure_service!
|
981
|
+
return Routine.new_reference project_id, dataset_id, routine_id, service if skip_lookup
|
982
|
+
gapi = service.get_routine dataset_id, routine_id
|
983
|
+
Routine.from_gapi gapi, service
|
984
|
+
rescue Google::Cloud::NotFoundError
|
985
|
+
nil
|
986
|
+
end
|
987
|
+
|
988
|
+
##
|
989
|
+
# Retrieves the list of routines belonging to the dataset.
|
990
|
+
#
|
991
|
+
# @param [String] token A previously-returned page token representing
|
992
|
+
# part of the larger set of results to view.
|
993
|
+
# @param [Integer] max Maximum number of routines to return.
|
994
|
+
# @param [String] filter If set, then only the routines matching this filter are returned. The current supported
|
995
|
+
# form is `routineType:`, with a {Routine#routine_type} enum value. Example: `routineType:SCALAR_FUNCTION`.
|
996
|
+
#
|
997
|
+
# @return [Array<Google::Cloud::Bigquery::Routine>] An array of routines
|
998
|
+
# (See {Google::Cloud::Bigquery::Routine::List})
|
999
|
+
#
|
1000
|
+
# @example
|
1001
|
+
# require "google/cloud/bigquery"
|
1002
|
+
#
|
1003
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1004
|
+
# dataset = bigquery.dataset "my_dataset"
|
1005
|
+
#
|
1006
|
+
# routines = dataset.routines
|
1007
|
+
# routines.each do |routine|
|
1008
|
+
# puts routine.routine_id
|
1009
|
+
# end
|
1010
|
+
#
|
1011
|
+
# @example Retrieve all routines: (See {Routine::List#all})
|
1012
|
+
# require "google/cloud/bigquery"
|
1013
|
+
#
|
1014
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1015
|
+
# dataset = bigquery.dataset "my_dataset"
|
1016
|
+
#
|
1017
|
+
# routines = dataset.routines
|
1018
|
+
# routines.all do |routine|
|
1019
|
+
# puts routine.routine_id
|
1020
|
+
# end
|
1021
|
+
#
|
1022
|
+
# @!group Routine
|
1023
|
+
#
|
1024
|
+
def routines token: nil, max: nil, filter: nil
|
1025
|
+
ensure_service!
|
1026
|
+
gapi = service.list_routines dataset_id, token: token, max: max, filter: filter
|
1027
|
+
Routine::List.from_gapi gapi, service, dataset_id, max, filter: filter
|
1028
|
+
end
|
1029
|
+
|
1030
|
+
##
|
1031
|
+
# Queries data by creating a [query
|
1032
|
+
# job](https://cloud.google.com/bigquery/docs/query-overview#query_jobs).
|
1033
|
+
#
|
1034
|
+
# Sets the current dataset as the default dataset in the query. Useful
|
1035
|
+
# for using unqualified table names.
|
1036
|
+
#
|
1037
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1038
|
+
# {QueryJob::Updater#location=} in a block passed to this method. If the
|
1039
|
+
# dataset is a full resource representation (see {#resource_full?}), the
|
1040
|
+
# location of the job will be automatically set to the location of the
|
1041
|
+
# dataset.
|
1042
|
+
#
|
1043
|
+
# @param [String] query A query string, following the BigQuery [query
|
1044
|
+
# syntax](https://cloud.google.com/bigquery/query-reference), of the
|
1045
|
+
# query to execute. Example: "SELECT count(f1) FROM
|
1046
|
+
# [myProjectId:myDatasetId.myTableId]".
|
1047
|
+
# @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
|
1048
|
+
# either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
|
1049
|
+
# query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
|
1050
|
+
# use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
|
1051
|
+
# true.
|
1052
|
+
#
|
1053
|
+
# Ruby types are mapped to BigQuery types as follows:
|
1054
|
+
#
|
1055
|
+
# | BigQuery | Ruby | Notes |
|
1056
|
+
# |-------------|--------------------------------------|------------------------------------------------|
|
1057
|
+
# | `BOOL` | `true`/`false` | |
|
1058
|
+
# | `INT64` | `Integer` | |
|
1059
|
+
# | `FLOAT64` | `Float` | |
|
1060
|
+
# | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
|
1061
|
+
# | `STRING` | `String` | |
|
1062
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
1063
|
+
# | `DATE` | `Date` | |
|
1064
|
+
# | `TIMESTAMP` | `Time` | |
|
1065
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
1066
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
1067
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
1068
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
1069
|
+
#
|
1070
|
+
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
|
1071
|
+
# of each BigQuery data type, including allowed values.
|
1072
|
+
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always to
|
1073
|
+
# infer the right SQL type from a value in `params`. In these cases, `types` must be used to specify the SQL
|
1074
|
+
# type for these values.
|
1075
|
+
#
|
1076
|
+
# Must match the value type passed to `params`. This must be an `Array` when the query uses positional query
|
1077
|
+
# parameters. This must be an `Hash` when the query uses named query parameters. The values should be BigQuery
|
1078
|
+
# type codes from the following list:
|
1079
|
+
#
|
1080
|
+
# * `:BOOL`
|
1081
|
+
# * `:INT64`
|
1082
|
+
# * `:FLOAT64`
|
1083
|
+
# * `:NUMERIC`
|
1084
|
+
# * `:STRING`
|
1085
|
+
# * `:DATETIME`
|
1086
|
+
# * `:DATE`
|
1087
|
+
# * `:TIMESTAMP`
|
1088
|
+
# * `:TIME`
|
1089
|
+
# * `:BYTES`
|
1090
|
+
# * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
|
1091
|
+
# are specified as `[:INT64]`.
|
1092
|
+
# * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
|
1093
|
+
# match the `params` hash, and the values are the types value that matches the data.
|
1094
|
+
#
|
1095
|
+
# Types are optional.
|
1096
|
+
# @param [Hash<String|Symbol, External::DataSource>] external A Hash
|
1097
|
+
# that represents the mapping of the external tables to the table
|
1098
|
+
# names used in the SQL query. The hash keys are the table names, and
|
1099
|
+
# the hash values are the external table objects. See {Dataset#query}.
|
1100
|
+
# @param [String] priority Specifies a priority for the query. Possible
|
1101
|
+
# values include `INTERACTIVE` and `BATCH`. The default value is
|
1102
|
+
# `INTERACTIVE`.
|
1103
|
+
# @param [Boolean] cache Whether to look for the result in the query
|
1104
|
+
# cache. The query cache is a best-effort cache that will be flushed
|
1105
|
+
# whenever tables in the query are modified. The default value is
|
1106
|
+
# true. For more information, see [query
|
1107
|
+
# caching](https://developers.google.com/bigquery/querying-data).
|
1108
|
+
# @param [Table] table The destination table where the query results
|
1109
|
+
# should be stored. If not present, a new table will be created to
|
1110
|
+
# store the results.
|
1111
|
+
# @param [String] create Specifies whether the job is allowed to create
|
1112
|
+
# new tables. The default value is `needed`.
|
1113
|
+
#
|
1114
|
+
# The following values are supported:
|
1115
|
+
#
|
1116
|
+
# * `needed` - Create the table if it does not exist.
|
1117
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
1118
|
+
# raised if the table does not exist.
|
1119
|
+
# @param [String] write Specifies the action that occurs if the
|
1120
|
+
# destination table already exists. The default value is `empty`.
|
1121
|
+
#
|
1122
|
+
# The following values are supported:
|
1123
|
+
#
|
1124
|
+
# * `truncate` - BigQuery overwrites the table data.
|
1125
|
+
# * `append` - BigQuery appends the data to the table.
|
1126
|
+
# * `empty` - A 'duplicate' error is returned in the job result if the
|
1127
|
+
# table exists and contains data.
|
1128
|
+
# @param [Boolean] dryrun If set to true, BigQuery doesn't run the job.
|
1129
|
+
# Instead, if the query is valid, BigQuery returns statistics about
|
1130
|
+
# the job such as how many bytes would be processed. If the query is
|
1131
|
+
# invalid, an error returns. The default value is false.
|
1132
|
+
# @param [Boolean] standard_sql Specifies whether to use BigQuery's
|
1133
|
+
# [standard
|
1134
|
+
# SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
|
1135
|
+
# dialect for this query. If set to true, the query will use standard
|
1136
|
+
# SQL rather than the [legacy
|
1137
|
+
# SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
|
1138
|
+
# dialect. Optional. The default value is true.
|
1139
|
+
# @param [Boolean] legacy_sql Specifies whether to use BigQuery's
|
1140
|
+
# [legacy
|
1141
|
+
# SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
|
1142
|
+
# dialect for this query. If set to false, the query will use
|
1143
|
+
# BigQuery's [standard
|
1144
|
+
# SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
|
1145
|
+
# dialect. Optional. The default value is false.
|
1146
|
+
# @param [Boolean] large_results This option is specific to Legacy SQL.
|
1147
|
+
# If `true`, allows the query to produce arbitrarily large result
|
1148
|
+
# tables at a slight cost in performance. Requires `table` parameter
|
1149
|
+
# to be set.
|
1150
|
+
# @param [Boolean] flatten This option is specific to Legacy SQL.
|
1151
|
+
# Flattens all nested and repeated fields in the query results. The
|
1152
|
+
# default value is `true`. `large_results` parameter must be `true` if
|
1153
|
+
# this is set to `false`.
|
1154
|
+
# @param [Integer] maximum_bytes_billed Limits the bytes billed for this
|
1155
|
+
# job. Queries that will have bytes billed beyond this limit will fail
|
1156
|
+
# (without incurring a charge). Optional. If unspecified, this will be
|
1157
|
+
# set to your project default.
|
1158
|
+
# @param [String] job_id A user-defined ID for the query job. The ID
|
1159
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
1160
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
1161
|
+
# `job_id` is provided, then `prefix` will not be used.
|
1162
|
+
#
|
1163
|
+
# See [Generating a job
|
1164
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
1165
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
1166
|
+
# prepended to a generated value to produce a unique job ID. For
|
1167
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
1168
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
1169
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
1170
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
1171
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
1172
|
+
# be used.
|
1173
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
1174
|
+
# the job. You can use these to organize and group your jobs. Label
|
1175
|
+
# keys and values can be no longer than 63 characters, can only
|
1176
|
+
# contain lowercase letters, numeric characters, underscores and
|
1177
|
+
# dashes. International characters are allowed. Label values are
|
1178
|
+
# optional. Label keys must start with a letter and each label in the
|
1179
|
+
# list must have a different key. See [Requirements for
|
1180
|
+
# labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
|
1181
|
+
# @param [Array<String>, String] udfs User-defined function resources
|
1182
|
+
# used in a legacy SQL query. May be either a code resource to load from
|
1183
|
+
# a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
1184
|
+
# that contains code for a user-defined function (UDF). Providing an
|
1185
|
+
# inline code resource is equivalent to providing a URI for a file
|
1186
|
+
# containing the same code.
|
1187
|
+
#
|
1188
|
+
# This parameter is used for defining User Defined Function (UDF)
|
1189
|
+
# resources only when using legacy SQL. Users of standard SQL should
|
1190
|
+
# leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
|
1191
|
+
# Routines API to define UDF resources.
|
1192
|
+
#
|
1193
|
+
# For additional information on migrating, see: [Migrating to
|
1194
|
+
# standard SQL - Differences in user-defined JavaScript
|
1195
|
+
# functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
|
1196
|
+
# @param [Integer] maximum_billing_tier Deprecated: Change the billing
|
1197
|
+
# tier to allow high-compute queries.
|
1198
|
+
# @yield [job] a job configuration object
|
1199
|
+
# @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
|
1200
|
+
# configuration object for setting additional options for the query.
|
1201
|
+
#
|
1202
|
+
# @return [Google::Cloud::Bigquery::QueryJob] A new query job object.
|
1203
|
+
#
|
1204
|
+
# @example Query using standard SQL:
|
1205
|
+
# require "google/cloud/bigquery"
|
1206
|
+
#
|
1207
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1208
|
+
# dataset = bigquery.dataset "my_dataset"
|
1209
|
+
#
|
1210
|
+
# job = dataset.query_job "SELECT name FROM my_table"
|
1211
|
+
#
|
1212
|
+
# job.wait_until_done!
|
1213
|
+
# if !job.failed?
|
1214
|
+
# job.data.each do |row|
|
1215
|
+
# puts row[:name]
|
1216
|
+
# end
|
1217
|
+
# end
|
1218
|
+
#
|
1219
|
+
# @example Query using legacy SQL:
|
1220
|
+
# require "google/cloud/bigquery"
|
1221
|
+
#
|
1222
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1223
|
+
# dataset = bigquery.dataset "my_dataset"
|
1224
|
+
#
|
1225
|
+
# job = dataset.query_job "SELECT name FROM my_table",
|
1226
|
+
# legacy_sql: true
|
1227
|
+
#
|
1228
|
+
# job.wait_until_done!
|
1229
|
+
# if !job.failed?
|
1230
|
+
# job.data.each do |row|
|
1231
|
+
# puts row[:name]
|
1232
|
+
# end
|
1233
|
+
# end
|
1234
|
+
#
|
1235
|
+
# @example Query using positional query parameters:
|
1236
|
+
# require "google/cloud/bigquery"
|
1237
|
+
#
|
1238
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1239
|
+
# dataset = bigquery.dataset "my_dataset"
|
1240
|
+
#
|
1241
|
+
# job = dataset.query_job "SELECT name FROM my_table WHERE id = ?",
|
1242
|
+
# params: [1]
|
1243
|
+
#
|
1244
|
+
# job.wait_until_done!
|
1245
|
+
# if !job.failed?
|
1246
|
+
# job.data.each do |row|
|
1247
|
+
# puts row[:name]
|
1248
|
+
# end
|
1249
|
+
# end
|
1250
|
+
#
|
1251
|
+
# @example Query using named query parameters:
|
1252
|
+
# require "google/cloud/bigquery"
|
1253
|
+
#
|
1254
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1255
|
+
# dataset = bigquery.dataset "my_dataset"
|
1256
|
+
#
|
1257
|
+
# job = dataset.query_job "SELECT name FROM my_table WHERE id = @id",
|
1258
|
+
# params: { id: 1 }
|
1259
|
+
#
|
1260
|
+
# job.wait_until_done!
|
1261
|
+
# if !job.failed?
|
1262
|
+
# job.data.each do |row|
|
1263
|
+
# puts row[:name]
|
1264
|
+
# end
|
1265
|
+
# end
|
1266
|
+
#
|
1267
|
+
# @example Query using named query parameters with types:
|
1268
|
+
# require "google/cloud/bigquery"
|
1269
|
+
#
|
1270
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1271
|
+
# dataset = bigquery.dataset "my_dataset"
|
1272
|
+
#
|
1273
|
+
# job = dataset.query_job "SELECT name FROM my_table " \
|
1274
|
+
# "WHERE id IN UNNEST(@ids)",
|
1275
|
+
# params: { ids: [] },
|
1276
|
+
# types: { ids: [:INT64] }
|
1277
|
+
#
|
1278
|
+
# job.wait_until_done!
|
1279
|
+
# if !job.failed?
|
1280
|
+
# job.data.each do |row|
|
1281
|
+
# puts row[:name]
|
1282
|
+
# end
|
1283
|
+
# end
|
1284
|
+
#
|
1285
|
+
# @example Execute a DDL statement:
|
1286
|
+
# require "google/cloud/bigquery"
|
1287
|
+
#
|
1288
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1289
|
+
#
|
1290
|
+
# job = bigquery.query_job "CREATE TABLE my_table (x INT64)"
|
1291
|
+
#
|
1292
|
+
# job.wait_until_done!
|
1293
|
+
# if !job.failed?
|
1294
|
+
# table_ref = job.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
|
1295
|
+
# end
|
1296
|
+
#
|
1297
|
+
# @example Execute a DML statement:
|
1298
|
+
# require "google/cloud/bigquery"
|
1299
|
+
#
|
1300
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1301
|
+
#
|
1302
|
+
# job = bigquery.query_job "UPDATE my_table " \
|
1303
|
+
# "SET x = x + 1 " \
|
1304
|
+
# "WHERE x IS NOT NULL"
|
1305
|
+
#
|
1306
|
+
# job.wait_until_done!
|
1307
|
+
# if !job.failed?
|
1308
|
+
# puts job.num_dml_affected_rows
|
1309
|
+
# end
|
1310
|
+
#
|
1311
|
+
# @example Query using external data source, set destination:
|
1312
|
+
# require "google/cloud/bigquery"
|
1313
|
+
#
|
1314
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1315
|
+
# dataset = bigquery.dataset "my_dataset"
|
1316
|
+
#
|
1317
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
1318
|
+
# csv_table = dataset.external csv_url do |csv|
|
1319
|
+
# csv.autodetect = true
|
1320
|
+
# csv.skip_leading_rows = 1
|
1321
|
+
# end
|
1322
|
+
#
|
1323
|
+
# job = dataset.query_job "SELECT * FROM my_ext_table" do |query|
|
1324
|
+
# query.external = { my_ext_table: csv_table }
|
1325
|
+
# query.table = dataset.table "my_table", skip_lookup: true
|
1326
|
+
# end
|
1327
|
+
#
|
1328
|
+
# job.wait_until_done!
|
1329
|
+
# if !job.failed?
|
1330
|
+
# job.data.each do |row|
|
1331
|
+
# puts row[:name]
|
1332
|
+
# end
|
1333
|
+
# end
|
1334
|
+
#
|
1335
|
+
# @!group Data
|
1336
|
+
#
|
1337
|
+
def query_job query, params: nil, types: nil, external: nil, priority: "INTERACTIVE", cache: true, table: nil,
|
1338
|
+
create: nil, write: nil, dryrun: nil, standard_sql: nil, legacy_sql: nil, large_results: nil,
|
1339
|
+
flatten: nil, maximum_billing_tier: nil, maximum_bytes_billed: nil, job_id: nil, prefix: nil,
|
1340
|
+
labels: nil, udfs: nil
|
1341
|
+
ensure_service!
|
1342
|
+
options = { params: params, types: types, external: external, priority: priority, cache: cache, table: table,
|
1343
|
+
create: create, write: write, dryrun: dryrun, standard_sql: standard_sql, legacy_sql: legacy_sql,
|
1344
|
+
large_results: large_results, flatten: flatten, maximum_billing_tier: maximum_billing_tier,
|
1345
|
+
maximum_bytes_billed: maximum_bytes_billed, job_id: job_id, prefix: prefix, labels: labels,
|
1346
|
+
udfs: udfs }
|
1347
|
+
|
1348
|
+
updater = QueryJob::Updater.from_options service, query, options
|
1349
|
+
updater.dataset = self
|
1350
|
+
updater.location = location if location # may be dataset reference
|
1351
|
+
|
1352
|
+
yield updater if block_given?
|
1353
|
+
|
1354
|
+
gapi = service.query_job updater.to_gapi
|
1355
|
+
Job.from_gapi gapi, service
|
1356
|
+
end
|
1357
|
+
|
1358
|
+
##
|
1359
|
+
# Queries data and waits for the results. In this method, a {QueryJob}
|
1360
|
+
# is created and its results are saved to a temporary table, then read
|
1361
|
+
# from the table. Timeouts and transient errors are generally handled
|
1362
|
+
# as needed to complete the query. When used for executing DDL/DML
|
1363
|
+
# statements, this method does not return row data.
|
1364
|
+
#
|
1365
|
+
# Sets the current dataset as the default dataset in the query. Useful
|
1366
|
+
# for using unqualified table names.
|
1367
|
+
#
|
1368
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1369
|
+
# {QueryJob::Updater#location=} in a block passed to this method. If the
|
1370
|
+
# dataset is a full resource representation (see {#resource_full?}), the
|
1371
|
+
# location of the job will be automatically set to the location of the
|
1372
|
+
# dataset.
|
1373
|
+
#
|
1374
|
+
# @see https://cloud.google.com/bigquery/querying-data Querying Data
|
1375
|
+
#
|
1376
|
+
# @param [String] query A query string, following the BigQuery [query
|
1377
|
+
# syntax](https://cloud.google.com/bigquery/query-reference), of the
|
1378
|
+
# query to execute. Example: "SELECT count(f1) FROM
|
1379
|
+
# [myProjectId:myDatasetId.myTableId]".
|
1380
|
+
# @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
|
1381
|
+
# either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
|
1382
|
+
# query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
|
1383
|
+
# use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
|
1384
|
+
# true.
|
1385
|
+
#
|
1386
|
+
# Ruby types are mapped to BigQuery types as follows:
|
1387
|
+
#
|
1388
|
+
# | BigQuery | Ruby | Notes |
|
1389
|
+
# |-------------|--------------------------------------|------------------------------------------------|
|
1390
|
+
# | `BOOL` | `true`/`false` | |
|
1391
|
+
# | `INT64` | `Integer` | |
|
1392
|
+
# | `FLOAT64` | `Float` | |
|
1393
|
+
# | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
|
1394
|
+
# | `STRING` | `String` | |
|
1395
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
1396
|
+
# | `DATE` | `Date` | |
|
1397
|
+
# | `TIMESTAMP` | `Time` | |
|
1398
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
1399
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
1400
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
1401
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
1402
|
+
#
|
1403
|
+
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
|
1404
|
+
# of each BigQuery data type, including allowed values.
|
1405
|
+
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always to
|
1406
|
+
# infer the right SQL type from a value in `params`. In these cases, `types` must be used to specify the SQL
|
1407
|
+
# type for these values.
|
1408
|
+
#
|
1409
|
+
# Must match the value type passed to `params`. This must be an `Array` when the query uses positional query
|
1410
|
+
# parameters. This must be an `Hash` when the query uses named query parameters. The values should be BigQuery
|
1411
|
+
# type codes from the following list:
|
1412
|
+
#
|
1413
|
+
# * `:BOOL`
|
1414
|
+
# * `:INT64`
|
1415
|
+
# * `:FLOAT64`
|
1416
|
+
# * `:NUMERIC`
|
1417
|
+
# * `:STRING`
|
1418
|
+
# * `:DATETIME`
|
1419
|
+
# * `:DATE`
|
1420
|
+
# * `:TIMESTAMP`
|
1421
|
+
# * `:TIME`
|
1422
|
+
# * `:BYTES`
|
1423
|
+
# * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
|
1424
|
+
# are specified as `[:INT64]`.
|
1425
|
+
# * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
|
1426
|
+
# match the `params` hash, and the values are the types value that matches the data.
|
1427
|
+
#
|
1428
|
+
# Types are optional.
|
1429
|
+
# @param [Hash<String|Symbol, External::DataSource>] external A Hash
|
1430
|
+
# that represents the mapping of the external tables to the table
|
1431
|
+
# names used in the SQL query. The hash keys are the table names, and
|
1432
|
+
# the hash values are the external table objects. See {Dataset#query}.
|
1433
|
+
# @param [Integer] max The maximum number of rows of data to return per
|
1434
|
+
# page of results. Setting this flag to a small value such as 1000 and
|
1435
|
+
# then paging through results might improve reliability when the query
|
1436
|
+
# result set is large. In addition to this limit, responses are also
|
1437
|
+
# limited to 10 MB. By default, there is no maximum row count, and
|
1438
|
+
# only the byte limit applies.
|
1439
|
+
# @param [Boolean] cache Whether to look for the result in the query
|
1440
|
+
# cache. The query cache is a best-effort cache that will be flushed
|
1441
|
+
# whenever tables in the query are modified. The default value is
|
1442
|
+
# true. For more information, see [query
|
1443
|
+
# caching](https://developers.google.com/bigquery/querying-data).
|
1444
|
+
# @param [Boolean] standard_sql Specifies whether to use BigQuery's
|
1445
|
+
# [standard
|
1446
|
+
# SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
|
1447
|
+
# dialect for this query. If set to true, the query will use standard
|
1448
|
+
# SQL rather than the [legacy
|
1449
|
+
# SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
|
1450
|
+
# dialect. When set to true, the values of `large_results` and
|
1451
|
+
# `flatten` are ignored; the query will be run as if `large_results`
|
1452
|
+
# is true and `flatten` is false. Optional. The default value is
|
1453
|
+
# true.
|
1454
|
+
# @param [Boolean] legacy_sql Specifies whether to use BigQuery's
|
1455
|
+
# [legacy
|
1456
|
+
# SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
|
1457
|
+
# dialect for this query. If set to false, the query will use
|
1458
|
+
# BigQuery's [standard
|
1459
|
+
# SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
|
1460
|
+
# When set to false, the values of `large_results` and `flatten` are
|
1461
|
+
# ignored; the query will be run as if `large_results` is true and
|
1462
|
+
# `flatten` is false. Optional. The default value is false.
|
1463
|
+
# @yield [job] a job configuration object
|
1464
|
+
# @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
|
1465
|
+
# configuration object for setting additional options for the query.
|
1466
|
+
#
|
1467
|
+
# @return [Google::Cloud::Bigquery::Data] A new data object.
|
1468
|
+
#
|
1469
|
+
# @example Query using standard SQL:
|
1470
|
+
# require "google/cloud/bigquery"
|
1471
|
+
#
|
1472
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1473
|
+
# dataset = bigquery.dataset "my_dataset"
|
1474
|
+
#
|
1475
|
+
# data = dataset.query "SELECT name FROM my_table"
|
1476
|
+
#
|
1477
|
+
# # Iterate over the first page of results
|
1478
|
+
# data.each do |row|
|
1479
|
+
# puts row[:name]
|
1480
|
+
# end
|
1481
|
+
# # Retrieve the next page of results
|
1482
|
+
# data = data.next if data.next?
|
1483
|
+
#
|
1484
|
+
# @example Query using legacy SQL:
|
1485
|
+
# require "google/cloud/bigquery"
|
1486
|
+
#
|
1487
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1488
|
+
# dataset = bigquery.dataset "my_dataset"
|
1489
|
+
#
|
1490
|
+
# data = dataset.query "SELECT name FROM my_table",
|
1491
|
+
# legacy_sql: true
|
1492
|
+
#
|
1493
|
+
# # Iterate over the first page of results
|
1494
|
+
# data.each do |row|
|
1495
|
+
# puts row[:name]
|
1496
|
+
# end
|
1497
|
+
# # Retrieve the next page of results
|
1498
|
+
# data = data.next if data.next?
|
1499
|
+
#
|
1500
|
+
# @example Query using positional query parameters:
|
1501
|
+
# require "google/cloud/bigquery"
|
1502
|
+
#
|
1503
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1504
|
+
# dataset = bigquery.dataset "my_dataset"
|
1505
|
+
#
|
1506
|
+
# data = dataset.query "SELECT name FROM my_table WHERE id = ?",
|
1507
|
+
# params: [1]
|
1508
|
+
#
|
1509
|
+
# # Iterate over the first page of results
|
1510
|
+
# data.each do |row|
|
1511
|
+
# puts row[:name]
|
1512
|
+
# end
|
1513
|
+
# # Retrieve the next page of results
|
1514
|
+
# data = data.next if data.next?
|
1515
|
+
#
|
1516
|
+
# @example Query using named query parameters:
|
1517
|
+
# require "google/cloud/bigquery"
|
1518
|
+
#
|
1519
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1520
|
+
# dataset = bigquery.dataset "my_dataset"
|
1521
|
+
#
|
1522
|
+
# data = dataset.query "SELECT name FROM my_table WHERE id = @id",
|
1523
|
+
# params: { id: 1 }
|
1524
|
+
#
|
1525
|
+
# # Iterate over the first page of results
|
1526
|
+
# data.each do |row|
|
1527
|
+
# puts row[:name]
|
1528
|
+
# end
|
1529
|
+
# # Retrieve the next page of results
|
1530
|
+
# data = data.next if data.next?
|
1531
|
+
#
|
1532
|
+
# @example Query using named query parameters with types:
|
1533
|
+
# require "google/cloud/bigquery"
|
1534
|
+
#
|
1535
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1536
|
+
# dataset = bigquery.dataset "my_dataset"
|
1537
|
+
#
|
1538
|
+
# data = dataset.query "SELECT name FROM my_table " \
|
1539
|
+
# "WHERE id IN UNNEST(@ids)",
|
1540
|
+
# params: { ids: [] },
|
1541
|
+
# types: { ids: [:INT64] }
|
1542
|
+
#
|
1543
|
+
# # Iterate over the first page of results
|
1544
|
+
# data.each do |row|
|
1545
|
+
# puts row[:name]
|
1546
|
+
# end
|
1547
|
+
# # Retrieve the next page of results
|
1548
|
+
# data = data.next if data.next?
|
1549
|
+
#
|
1550
|
+
# @example Execute a DDL statement:
|
1551
|
+
# require "google/cloud/bigquery"
|
1552
|
+
#
|
1553
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1554
|
+
#
|
1555
|
+
# data = bigquery.query "CREATE TABLE my_table (x INT64)"
|
1556
|
+
#
|
1557
|
+
# table_ref = data.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
|
1558
|
+
#
|
1559
|
+
# @example Execute a DML statement:
|
1560
|
+
# require "google/cloud/bigquery"
|
1561
|
+
#
|
1562
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1563
|
+
#
|
1564
|
+
# data = bigquery.query "UPDATE my_table " \
|
1565
|
+
# "SET x = x + 1 " \
|
1566
|
+
# "WHERE x IS NOT NULL"
|
1567
|
+
#
|
1568
|
+
# puts data.num_dml_affected_rows
|
1569
|
+
#
|
1570
|
+
# @example Query using external data source, set destination:
|
1571
|
+
# require "google/cloud/bigquery"
|
1572
|
+
#
|
1573
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1574
|
+
# dataset = bigquery.dataset "my_dataset"
|
1575
|
+
#
|
1576
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
1577
|
+
# csv_table = dataset.external csv_url do |csv|
|
1578
|
+
# csv.autodetect = true
|
1579
|
+
# csv.skip_leading_rows = 1
|
1580
|
+
# end
|
1581
|
+
#
|
1582
|
+
# data = dataset.query "SELECT * FROM my_ext_table" do |query|
|
1583
|
+
# query.external = { my_ext_table: csv_table }
|
1584
|
+
# query.table = dataset.table "my_table", skip_lookup: true
|
1585
|
+
# end
|
1586
|
+
#
|
1587
|
+
# # Iterate over the first page of results
|
1588
|
+
# data.each do |row|
|
1589
|
+
# puts row[:name]
|
1590
|
+
# end
|
1591
|
+
# # Retrieve the next page of results
|
1592
|
+
# data = data.next if data.next?
|
1593
|
+
#
|
1594
|
+
# @!group Data
|
1595
|
+
#
|
1596
|
+
def query query, params: nil, types: nil, external: nil, max: nil, cache: true,
|
1597
|
+
standard_sql: nil, legacy_sql: nil, &block
|
1598
|
+
job = query_job query, params: params, types: types, external: external, cache: cache,
|
1599
|
+
standard_sql: standard_sql, legacy_sql: legacy_sql, &block
|
1600
|
+
job.wait_until_done!
|
1601
|
+
ensure_job_succeeded! job
|
1602
|
+
|
1603
|
+
job.data max: max
|
1604
|
+
end
|
1605
|
+
|
1606
|
+
##
|
1607
|
+
# Creates a new External::DataSource (or subclass) object that
|
1608
|
+
# represents the external data source that can be queried from directly,
|
1609
|
+
# even though the data is not stored in BigQuery. Instead of loading or
|
1610
|
+
# streaming the data, this object references the external data source.
|
1611
|
+
#
|
1612
|
+
# @see https://cloud.google.com/bigquery/external-data-sources Querying
|
1613
|
+
# External Data Sources
|
1614
|
+
#
|
1615
|
+
# @param [String, Array<String>] url The fully-qualified URL(s) that
|
1616
|
+
# point to your data in Google Cloud. An attempt will be made to
|
1617
|
+
# derive the format from the URLs provided.
|
1618
|
+
# @param [String|Symbol] format The data format. This value will be used
|
1619
|
+
# even if the provided URLs are recognized as a different format.
|
1620
|
+
# Optional.
|
1621
|
+
#
|
1622
|
+
# The following values are supported:
|
1623
|
+
#
|
1624
|
+
# * `csv` - CSV
|
1625
|
+
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
1626
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
1627
|
+
# * `sheets` - Google Sheets
|
1628
|
+
# * `datastore_backup` - Cloud Datastore backup
|
1629
|
+
# * `bigtable` - Bigtable
|
1630
|
+
#
|
1631
|
+
# @return [External::DataSource] External data source.
|
1632
|
+
#
|
1633
|
+
# @example
|
1634
|
+
# require "google/cloud/bigquery"
|
1635
|
+
#
|
1636
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1637
|
+
#
|
1638
|
+
# dataset = bigquery.dataset "my_dataset"
|
1639
|
+
#
|
1640
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
1641
|
+
# csv_table = dataset.external csv_url do |csv|
|
1642
|
+
# csv.autodetect = true
|
1643
|
+
# csv.skip_leading_rows = 1
|
1644
|
+
# end
|
1645
|
+
#
|
1646
|
+
# data = dataset.query "SELECT * FROM my_ext_table",
|
1647
|
+
# external: { my_ext_table: csv_table }
|
1648
|
+
#
|
1649
|
+
# data.each do |row|
|
1650
|
+
# puts row[:name]
|
1651
|
+
# end
|
1652
|
+
#
|
1653
|
+
def external url, format: nil
|
1654
|
+
ext = External.from_urls url, format
|
1655
|
+
yield ext if block_given?
|
1656
|
+
ext
|
1657
|
+
end
|
1658
|
+
|
1659
|
+
##
|
1660
|
+
# Loads data into the provided destination table using an asynchronous
|
1661
|
+
# method. In this method, a {LoadJob} is immediately returned. The
|
1662
|
+
# caller may poll the service by repeatedly calling {Job#reload!} and
|
1663
|
+
# {Job#done?} to detect when the job is done, or simply block until the
|
1664
|
+
# job is done by calling #{Job#wait_until_done!}. See also {#load}.
|
1665
|
+
#
|
1666
|
+
# For the source of the data, you can pass a google-cloud storage file
|
1667
|
+
# path or a google-cloud-storage `File` instance. Or, you can upload a
|
1668
|
+
# file directly. See [Loading Data with a POST
|
1669
|
+
# Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
|
1670
|
+
#
|
1671
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1672
|
+
# {LoadJob::Updater#location=} in a block passed to this method. If the
|
1673
|
+
# dataset is a full resource representation (see {#resource_full?}), the
|
1674
|
+
# location of the job will be automatically set to the location of the
|
1675
|
+
# dataset.
|
1676
|
+
#
|
1677
|
+
# @param [String] table_id The destination table to load the data into.
|
1678
|
+
# @param [File, Google::Cloud::Storage::File, String, URI,
|
1679
|
+
# Array<Google::Cloud::Storage::File, String, URI>] files
|
1680
|
+
# A file or the URI of a Google Cloud Storage file, or an Array of
|
1681
|
+
# those, containing data to load into the table.
|
1682
|
+
# @param [String] format The exported file format. The default value is
|
1683
|
+
# `csv`.
|
1684
|
+
#
|
1685
|
+
# The following values are supported:
|
1686
|
+
#
|
1687
|
+
# * `csv` - CSV
|
1688
|
+
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
1689
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
1690
|
+
# * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
|
1691
|
+
# * `parquet` - [Parquet](https://parquet.apache.org/)
|
1692
|
+
# * `datastore_backup` - Cloud Datastore backup
|
1693
|
+
# @param [String] create Specifies whether the job is allowed to create
|
1694
|
+
# new tables. The default value is `needed`.
|
1695
|
+
#
|
1696
|
+
# The following values are supported:
|
1697
|
+
#
|
1698
|
+
# * `needed` - Create the table if it does not exist.
|
1699
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
1700
|
+
# raised if the table does not exist.
|
1701
|
+
# @param [String] write Specifies how to handle data already present in
|
1702
|
+
# the table. The default value is `append`.
|
1703
|
+
#
|
1704
|
+
# The following values are supported:
|
1705
|
+
#
|
1706
|
+
# * `truncate` - BigQuery overwrites the table data.
|
1707
|
+
# * `append` - BigQuery appends the data to the table.
|
1708
|
+
# * `empty` - An error will be returned if the table already contains
|
1709
|
+
# data.
|
1710
|
+
# @param [Array<String>] projection_fields If the `format` option is set
|
1711
|
+
# to `datastore_backup`, indicates which entity properties to load
|
1712
|
+
# from a Cloud Datastore backup. Property names are case sensitive and
|
1713
|
+
# must be top-level properties. If not set, BigQuery loads all
|
1714
|
+
# properties. If any named property isn't found in the Cloud Datastore
|
1715
|
+
# backup, an invalid error is returned.
|
1716
|
+
# @param [Boolean] jagged_rows Accept rows that are missing trailing
|
1717
|
+
# optional columns. The missing values are treated as nulls. If
|
1718
|
+
# `false`, records with missing trailing columns are treated as bad
|
1719
|
+
# records, and if there are too many bad records, an invalid error is
|
1720
|
+
# returned in the job result. The default value is `false`. Only
|
1721
|
+
# applicable to CSV, ignored for other formats.
|
1722
|
+
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
1723
|
+
# quoted data sections that contain newline characters in a CSV file.
|
1724
|
+
# The default value is `false`.
|
1725
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1726
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1727
|
+
# The default value is `false`.
|
1728
|
+
# @param [String] encoding The character encoding of the data. The
|
1729
|
+
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
1730
|
+
# `UTF-8`.
|
1731
|
+
# @param [String] delimiter Specifices the separator for fields in a CSV
|
1732
|
+
# file. BigQuery converts the string to `ISO-8859-1` encoding, and
|
1733
|
+
# then uses the first byte of the encoded string to split the data in
|
1734
|
+
# its raw, binary state. Default is <code>,</code>.
|
1735
|
+
# @param [Boolean] ignore_unknown Indicates if BigQuery should allow
|
1736
|
+
# extra values that are not represented in the table schema. If true,
|
1737
|
+
# the extra values are ignored. If false, records with extra columns
|
1738
|
+
# are treated as bad records, and if there are too many bad records,
|
1739
|
+
# an invalid error is returned in the job result. The default value is
|
1740
|
+
# `false`.
|
1741
|
+
#
|
1742
|
+
# The `format` property determines what BigQuery treats as an extra
|
1743
|
+
# value:
|
1744
|
+
#
|
1745
|
+
# * `CSV`: Trailing columns
|
1746
|
+
# * `JSON`: Named values that don't match any column names
|
1747
|
+
# @param [Integer] max_bad_records The maximum number of bad records
|
1748
|
+
# that BigQuery can ignore when running the job. If the number of bad
|
1749
|
+
# records exceeds this value, an invalid error is returned in the job
|
1750
|
+
# result. The default value is `0`, which requires that all records
|
1751
|
+
# are valid.
|
1752
|
+
# @param [String] null_marker Specifies a string that represents a null
|
1753
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
1754
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
1755
|
+
# value is the empty string. If you set this property to a custom
|
1756
|
+
# value, BigQuery throws an error if an empty string is present for
|
1757
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
1758
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
1759
|
+
# @param [String] quote The value that is used to quote data sections in
|
1760
|
+
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
1761
|
+
# then uses the first byte of the encoded string to split the data in
|
1762
|
+
# its raw, binary state. The default value is a double-quote
|
1763
|
+
# <code>"</code>. If your data does not contain quoted sections, set
|
1764
|
+
# the property value to an empty string. If your data contains quoted
|
1765
|
+
# newline characters, you must also set the allowQuotedNewlines
|
1766
|
+
# property to true.
|
1767
|
+
# @param [Integer] skip_leading The number of rows at the top of a CSV
|
1768
|
+
# file that BigQuery will skip when loading the data. The default
|
1769
|
+
# value is `0`. This property is useful if you have header rows in the
|
1770
|
+
# file that should be skipped.
|
1771
|
+
# @param [Google::Cloud::Bigquery::Schema] schema The schema for the
|
1772
|
+
# destination table. Optional. The schema can be omitted if the
|
1773
|
+
# destination table already exists, or if you're loading data from a
|
1774
|
+
# Google Cloud Datastore backup.
|
1775
|
+
#
|
1776
|
+
# See {Project#schema} for the creation of the schema for use with
|
1777
|
+
# this option. Also note that for most use cases, the block yielded by
|
1778
|
+
# this method is a more convenient way to configure the schema.
|
1779
|
+
# @param [String] job_id A user-defined ID for the load job. The ID
|
1780
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
1781
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
1782
|
+
# `job_id` is provided, then `prefix` will not be used.
|
1783
|
+
#
|
1784
|
+
# See [Generating a job
|
1785
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
1786
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
1787
|
+
# prepended to a generated value to produce a unique job ID. For
|
1788
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
1789
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
1790
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
1791
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
1792
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
1793
|
+
# be used.
|
1794
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
1795
|
+
# the job. You can use these to organize and group your jobs. Label
|
1796
|
+
# keys and values can be no longer than 63 characters, can only
|
1797
|
+
# contain lowercase letters, numeric characters, underscores and
|
1798
|
+
# dashes. International characters are allowed. Label values are
|
1799
|
+
# optional. Label keys must start with a letter and each label in the
|
1800
|
+
# list must have a different key. See [Requirements for
|
1801
|
+
# labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
|
1802
|
+
# @yield [updater] A block for setting the schema and other
|
1803
|
+
# options for the destination table. The schema can be omitted if the
|
1804
|
+
# destination table already exists, or if you're loading data from a
|
1805
|
+
# Google Cloud Datastore backup.
|
1806
|
+
# @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
|
1807
|
+
# updater to modify the load job and its schema.
|
1808
|
+
# @param [Boolean] dryrun If set, don't actually run this job. Behavior
|
1809
|
+
# is undefined however for non-query jobs and may result in an error.
|
1810
|
+
# Deprecated.
|
1811
|
+
#
|
1812
|
+
# @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
|
1813
|
+
#
|
1814
|
+
# @example
|
1815
|
+
# require "google/cloud/bigquery"
|
1816
|
+
#
|
1817
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1818
|
+
# dataset = bigquery.dataset "my_dataset"
|
1819
|
+
#
|
1820
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1821
|
+
# load_job = dataset.load_job "my_new_table", gs_url do |schema|
|
1822
|
+
# schema.string "first_name", mode: :required
|
1823
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
1824
|
+
# nested_schema.string "place", mode: :required
|
1825
|
+
# nested_schema.integer "number_of_years", mode: :required
|
1826
|
+
# end
|
1827
|
+
# end
|
1828
|
+
#
|
1829
|
+
# @example Pass a google-cloud-storage `File` instance:
|
1830
|
+
# require "google/cloud/bigquery"
|
1831
|
+
# require "google/cloud/storage"
|
1832
|
+
#
|
1833
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1834
|
+
# dataset = bigquery.dataset "my_dataset"
|
1835
|
+
#
|
1836
|
+
# storage = Google::Cloud::Storage.new
|
1837
|
+
# bucket = storage.bucket "my-bucket"
|
1838
|
+
# file = bucket.file "file-name.csv"
|
1839
|
+
# load_job = dataset.load_job "my_new_table", file do |schema|
|
1840
|
+
# schema.string "first_name", mode: :required
|
1841
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
1842
|
+
# nested_schema.string "place", mode: :required
|
1843
|
+
# nested_schema.integer "number_of_years", mode: :required
|
1844
|
+
# end
|
1845
|
+
# end
|
1846
|
+
#
|
1847
|
+
# @example Pass a list of google-cloud-storage files:
|
1848
|
+
# require "google/cloud/bigquery"
|
1849
|
+
# require "google/cloud/storage"
|
1850
|
+
#
|
1851
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1852
|
+
# dataset = bigquery.dataset "my_dataset"
|
1853
|
+
#
|
1854
|
+
# storage = Google::Cloud::Storage.new
|
1855
|
+
# bucket = storage.bucket "my-bucket"
|
1856
|
+
# file = bucket.file "file-name.csv"
|
1857
|
+
# list = [file, "gs://my-bucket/file-name2.csv"]
|
1858
|
+
# load_job = dataset.load_job "my_new_table", list do |schema|
|
1859
|
+
# schema.string "first_name", mode: :required
|
1860
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
1861
|
+
# nested_schema.string "place", mode: :required
|
1862
|
+
# nested_schema.integer "number_of_years", mode: :required
|
1863
|
+
# end
|
1864
|
+
# end
|
1865
|
+
#
|
1866
|
+
# @example Upload a file directly:
|
1867
|
+
# require "google/cloud/bigquery"
|
1868
|
+
#
|
1869
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1870
|
+
# dataset = bigquery.dataset "my_dataset"
|
1871
|
+
#
|
1872
|
+
# file = File.open "my_data.csv"
|
1873
|
+
# load_job = dataset.load_job "my_new_table", file do |schema|
|
1874
|
+
# schema.string "first_name", mode: :required
|
1875
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
1876
|
+
# nested_schema.string "place", mode: :required
|
1877
|
+
# nested_schema.integer "number_of_years", mode: :required
|
1878
|
+
# end
|
1879
|
+
# end
|
1880
|
+
#
|
1881
|
+
# @example Schema is not required with a Cloud Datastore backup:
|
1882
|
+
# require "google/cloud/bigquery"
|
1883
|
+
#
|
1884
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1885
|
+
# dataset = bigquery.dataset "my_dataset"
|
1886
|
+
#
|
1887
|
+
# load_job = dataset.load_job(
|
1888
|
+
# "my_new_table",
|
1889
|
+
# "gs://my-bucket/xxxx.kind_name.backup_info") do |j|
|
1890
|
+
# j.format = "datastore_backup"
|
1891
|
+
# end
|
1892
|
+
#
|
1893
|
+
# @!group Data
|
1894
|
+
#
|
1895
|
+
def load_job table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
1896
|
+
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
|
1897
|
+
quote: nil, skip_leading: nil, schema: nil, job_id: nil, prefix: nil, labels: nil, autodetect: nil,
|
1898
|
+
null_marker: nil, dryrun: nil
|
1899
|
+
ensure_service!
|
1900
|
+
|
1901
|
+
updater = load_job_updater table_id,
|
1902
|
+
format: format, create: create, write: write, projection_fields: projection_fields,
|
1903
|
+
jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
|
1904
|
+
delimiter: delimiter, ignore_unknown: ignore_unknown,
|
1905
|
+
max_bad_records: max_bad_records, quote: quote, skip_leading: skip_leading,
|
1906
|
+
dryrun: dryrun, schema: schema, job_id: job_id, prefix: prefix, labels: labels,
|
1907
|
+
autodetect: autodetect, null_marker: null_marker
|
1908
|
+
|
1909
|
+
yield updater if block_given?
|
1910
|
+
|
1911
|
+
load_local_or_uri files, updater
|
1912
|
+
end
|
1913
|
+
|
1914
|
+
##
|
1915
|
+
# Loads data into the provided destination table using a synchronous
|
1916
|
+
# method that blocks for a response. Timeouts and transient errors are
|
1917
|
+
# generally handled as needed to complete the job. See also
|
1918
|
+
# {#load_job}.
|
1919
|
+
#
|
1920
|
+
# For the source of the data, you can pass a google-cloud storage file
|
1921
|
+
# path or a google-cloud-storage `File` instance. Or, you can upload a
|
1922
|
+
# file directly. See [Loading Data with a POST
|
1923
|
+
# Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
|
1924
|
+
#
|
1925
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1926
|
+
# {LoadJob::Updater#location=} in a block passed to this method. If the
|
1927
|
+
# dataset is a full resource representation (see {#resource_full?}), the
|
1928
|
+
# location of the job will be automatically set to the location of the
|
1929
|
+
# dataset.
|
1930
|
+
#
|
1931
|
+
# @param [String] table_id The destination table to load the data into.
|
1932
|
+
# @param [File, Google::Cloud::Storage::File, String, URI,
|
1933
|
+
# Array<Google::Cloud::Storage::File, String, URI>] files
|
1934
|
+
# A file or the URI of a Google Cloud Storage file, or an Array of
|
1935
|
+
# those, containing data to load into the table.
|
1936
|
+
# @param [String] format The exported file format. The default value is
|
1937
|
+
# `csv`.
|
1938
|
+
#
|
1939
|
+
# The following values are supported:
|
1940
|
+
#
|
1941
|
+
# * `csv` - CSV
|
1942
|
+
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
1943
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
1944
|
+
# * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
|
1945
|
+
# * `parquet` - [Parquet](https://parquet.apache.org/)
|
1946
|
+
# * `datastore_backup` - Cloud Datastore backup
|
1947
|
+
# @param [String] create Specifies whether the job is allowed to create
|
1948
|
+
# new tables. The default value is `needed`.
|
1949
|
+
#
|
1950
|
+
# The following values are supported:
|
1951
|
+
#
|
1952
|
+
# * `needed` - Create the table if it does not exist.
|
1953
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
1954
|
+
# raised if the table does not exist.
|
1955
|
+
# @param [String] write Specifies how to handle data already present in
|
1956
|
+
# the table. The default value is `append`.
|
1957
|
+
#
|
1958
|
+
# The following values are supported:
|
1959
|
+
#
|
1960
|
+
# * `truncate` - BigQuery overwrites the table data.
|
1961
|
+
# * `append` - BigQuery appends the data to the table.
|
1962
|
+
# * `empty` - An error will be returned if the table already contains
|
1963
|
+
# data.
|
1964
|
+
# @param [Array<String>] projection_fields If the `format` option is set
|
1965
|
+
# to `datastore_backup`, indicates which entity properties to load
|
1966
|
+
# from a Cloud Datastore backup. Property names are case sensitive and
|
1967
|
+
# must be top-level properties. If not set, BigQuery loads all
|
1968
|
+
# properties. If any named property isn't found in the Cloud Datastore
|
1969
|
+
# backup, an invalid error is returned.
|
1970
|
+
# @param [Boolean] jagged_rows Accept rows that are missing trailing
|
1971
|
+
# optional columns. The missing values are treated as nulls. If
|
1972
|
+
# `false`, records with missing trailing columns are treated as bad
|
1973
|
+
# records, and if there are too many bad records, an invalid error is
|
1974
|
+
# returned in the job result. The default value is `false`. Only
|
1975
|
+
# applicable to CSV, ignored for other formats.
|
1976
|
+
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
1977
|
+
# quoted data sections that contain newline characters in a CSV file.
|
1978
|
+
# The default value is `false`.
|
1979
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1980
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1981
|
+
# The default value is `false`.
|
1982
|
+
# @param [String] encoding The character encoding of the data. The
|
1983
|
+
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
1984
|
+
# `UTF-8`.
|
1985
|
+
# @param [String] delimiter Specifices the separator for fields in a CSV
|
1986
|
+
# file. BigQuery converts the string to `ISO-8859-1` encoding, and
|
1987
|
+
# then uses the first byte of the encoded string to split the data in
|
1988
|
+
# its raw, binary state. Default is <code>,</code>.
|
1989
|
+
# @param [Boolean] ignore_unknown Indicates if BigQuery should allow
|
1990
|
+
# extra values that are not represented in the table schema. If true,
|
1991
|
+
# the extra values are ignored. If false, records with extra columns
|
1992
|
+
# are treated as bad records, and if there are too many bad records,
|
1993
|
+
# an invalid error is returned in the job result. The default value is
|
1994
|
+
# `false`.
|
1995
|
+
#
|
1996
|
+
# The `format` property determines what BigQuery treats as an extra
|
1997
|
+
# value:
|
1998
|
+
#
|
1999
|
+
# * `CSV`: Trailing columns
|
2000
|
+
# * `JSON`: Named values that don't match any column names
|
2001
|
+
# @param [Integer] max_bad_records The maximum number of bad records
|
2002
|
+
# that BigQuery can ignore when running the job. If the number of bad
|
2003
|
+
# records exceeds this value, an invalid error is returned in the job
|
2004
|
+
# result. The default value is `0`, which requires that all records
|
2005
|
+
# are valid.
|
2006
|
+
# @param [String] null_marker Specifies a string that represents a null
|
2007
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
2008
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
2009
|
+
# value is the empty string. If you set this property to a custom
|
2010
|
+
# value, BigQuery throws an error if an empty string is present for
|
2011
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
2012
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
2013
|
+
# @param [String] quote The value that is used to quote data sections in
|
2014
|
+
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
2015
|
+
# then uses the first byte of the encoded string to split the data in
|
2016
|
+
# its raw, binary state. The default value is a double-quote
|
2017
|
+
# <code>"</code>. If your data does not contain quoted sections, set
|
2018
|
+
# the property value to an empty string. If your data contains quoted
|
2019
|
+
# newline characters, you must also set the allowQuotedNewlines
|
2020
|
+
# property to true.
|
2021
|
+
# @param [Integer] skip_leading The number of rows at the top of a CSV
|
2022
|
+
# file that BigQuery will skip when loading the data. The default
|
2023
|
+
# value is `0`. This property is useful if you have header rows in the
|
2024
|
+
# file that should be skipped.
|
2025
|
+
# @param [Google::Cloud::Bigquery::Schema] schema The schema for the
|
2026
|
+
# destination table. Optional. The schema can be omitted if the
|
2027
|
+
# destination table already exists, or if you're loading data from a
|
2028
|
+
# Google Cloud Datastore backup.
|
2029
|
+
#
|
2030
|
+
# See {Project#schema} for the creation of the schema for use with
|
2031
|
+
# this option. Also note that for most use cases, the block yielded by
|
2032
|
+
# this method is a more convenient way to configure the schema.
|
2033
|
+
#
|
2034
|
+
# @yield [updater] A block for setting the schema of the destination
|
2035
|
+
# table and other options for the load job. The schema can be omitted
|
2036
|
+
# if the destination table already exists, or if you're loading data
|
2037
|
+
# from a Google Cloud Datastore backup.
|
2038
|
+
# @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
|
2039
|
+
# updater to modify the load job and its schema.
|
2040
|
+
#
|
2041
|
+
# @return [Boolean] Returns `true` if the load job was successful.
|
2042
|
+
#
|
2043
|
+
# @example
|
2044
|
+
# require "google/cloud/bigquery"
|
2045
|
+
#
|
2046
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2047
|
+
# dataset = bigquery.dataset "my_dataset"
|
2048
|
+
#
|
2049
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
2050
|
+
# dataset.load "my_new_table", gs_url do |schema|
|
2051
|
+
# schema.string "first_name", mode: :required
|
2052
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
2053
|
+
# nested_schema.string "place", mode: :required
|
2054
|
+
# nested_schema.integer "number_of_years", mode: :required
|
2055
|
+
# end
|
2056
|
+
# end
|
2057
|
+
#
|
2058
|
+
# @example Pass a google-cloud-storage `File` instance:
|
2059
|
+
# require "google/cloud/bigquery"
|
2060
|
+
# require "google/cloud/storage"
|
2061
|
+
#
|
2062
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2063
|
+
# dataset = bigquery.dataset "my_dataset"
|
2064
|
+
#
|
2065
|
+
# storage = Google::Cloud::Storage.new
|
2066
|
+
# bucket = storage.bucket "my-bucket"
|
2067
|
+
# file = bucket.file "file-name.csv"
|
2068
|
+
# dataset.load "my_new_table", file do |schema|
|
2069
|
+
# schema.string "first_name", mode: :required
|
2070
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
2071
|
+
# nested_schema.string "place", mode: :required
|
2072
|
+
# nested_schema.integer "number_of_years", mode: :required
|
2073
|
+
# end
|
2074
|
+
# end
|
2075
|
+
#
|
2076
|
+
# @example Pass a list of google-cloud-storage files:
|
2077
|
+
# require "google/cloud/bigquery"
|
2078
|
+
# require "google/cloud/storage"
|
2079
|
+
#
|
2080
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2081
|
+
# dataset = bigquery.dataset "my_dataset"
|
2082
|
+
#
|
2083
|
+
# storage = Google::Cloud::Storage.new
|
2084
|
+
# bucket = storage.bucket "my-bucket"
|
2085
|
+
# file = bucket.file "file-name.csv"
|
2086
|
+
# list = [file, "gs://my-bucket/file-name2.csv"]
|
2087
|
+
# dataset.load "my_new_table", list do |schema|
|
2088
|
+
# schema.string "first_name", mode: :required
|
2089
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
2090
|
+
# nested_schema.string "place", mode: :required
|
2091
|
+
# nested_schema.integer "number_of_years", mode: :required
|
2092
|
+
# end
|
2093
|
+
# end
|
2094
|
+
#
|
2095
|
+
# @example Upload a file directly:
|
2096
|
+
# require "google/cloud/bigquery"
|
2097
|
+
#
|
2098
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2099
|
+
# dataset = bigquery.dataset "my_dataset"
|
2100
|
+
#
|
2101
|
+
# file = File.open "my_data.csv"
|
2102
|
+
# dataset.load "my_new_table", file do |schema|
|
2103
|
+
# schema.string "first_name", mode: :required
|
2104
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
2105
|
+
# nested_schema.string "place", mode: :required
|
2106
|
+
# nested_schema.integer "number_of_years", mode: :required
|
2107
|
+
# end
|
2108
|
+
# end
|
2109
|
+
#
|
2110
|
+
# @example Schema is not required with a Cloud Datastore backup:
|
2111
|
+
# require "google/cloud/bigquery"
|
2112
|
+
#
|
2113
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2114
|
+
# dataset = bigquery.dataset "my_dataset"
|
2115
|
+
#
|
2116
|
+
# dataset.load "my_new_table",
|
2117
|
+
# "gs://my-bucket/xxxx.kind_name.backup_info" do |j|
|
2118
|
+
# j.format = "datastore_backup"
|
2119
|
+
# end
|
2120
|
+
#
|
2121
|
+
# @!group Data
|
2122
|
+
#
|
2123
|
+
def load table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
2124
|
+
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
|
2125
|
+
quote: nil, skip_leading: nil, schema: nil, autodetect: nil, null_marker: nil, &block
|
2126
|
+
job = load_job table_id, files,
|
2127
|
+
format: format, create: create, write: write, projection_fields: projection_fields,
|
2128
|
+
jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
|
2129
|
+
delimiter: delimiter, ignore_unknown: ignore_unknown, max_bad_records: max_bad_records,
|
2130
|
+
quote: quote, skip_leading: skip_leading, schema: schema, autodetect: autodetect,
|
2131
|
+
null_marker: null_marker, &block
|
2132
|
+
|
2133
|
+
job.wait_until_done!
|
2134
|
+
ensure_job_succeeded! job
|
2135
|
+
true
|
2136
|
+
end
|
2137
|
+
|
2138
|
+
##
|
2139
|
+
# Reloads the dataset with current data from the BigQuery service.
|
2140
|
+
#
|
2141
|
+
# @return [Google::Cloud::Bigquery::Dataset] Returns the reloaded
|
2142
|
+
# dataset.
|
2143
|
+
#
|
2144
|
+
# @example Skip retrieving the dataset from the service, then load it:
|
2145
|
+
# require "google/cloud/bigquery"
|
2146
|
+
#
|
2147
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2148
|
+
#
|
2149
|
+
# dataset = bigquery.dataset "my_dataset", skip_lookup: true
|
2150
|
+
# dataset.reload!
|
2151
|
+
#
|
2152
|
+
def reload!
|
2153
|
+
ensure_service!
|
2154
|
+
@gapi = service.get_dataset dataset_id
|
2155
|
+
@reference = nil
|
2156
|
+
@exists = nil
|
2157
|
+
self
|
2158
|
+
end
|
2159
|
+
alias refresh! reload!
|
2160
|
+
|
2161
|
+
##
|
2162
|
+
# Determines whether the dataset exists in the BigQuery service. The
|
2163
|
+
# result is cached locally. To refresh state, set `force` to `true`.
|
2164
|
+
#
|
2165
|
+
# @param [Boolean] force Force the latest resource representation to be
|
2166
|
+
# retrieved from the BigQuery service when `true`. Otherwise the
|
2167
|
+
# return value of this method will be memoized to reduce the number of
|
2168
|
+
# API calls made to the BigQuery service. The default is `false`.
|
2169
|
+
#
|
2170
|
+
# @return [Boolean] `true` when the dataset exists in the BigQuery
|
2171
|
+
# service, `false` otherwise.
|
2172
|
+
#
|
2173
|
+
# @example
|
2174
|
+
# require "google/cloud/bigquery"
|
2175
|
+
#
|
2176
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2177
|
+
#
|
2178
|
+
# dataset = bigquery.dataset "my_dataset", skip_lookup: true
|
2179
|
+
# dataset.exists? # true
|
2180
|
+
#
|
2181
|
+
def exists? force: false
|
2182
|
+
return gapi_exists? if force
|
2183
|
+
# If we have a memoized value, return it
|
2184
|
+
return @exists unless @exists.nil?
|
2185
|
+
# Always true if we have a gapi object
|
2186
|
+
return true if resource?
|
2187
|
+
gapi_exists?
|
2188
|
+
end
|
2189
|
+
|
2190
|
+
##
|
2191
|
+
# Whether the dataset was created without retrieving the resource
|
2192
|
+
# representation from the BigQuery service.
|
2193
|
+
#
|
2194
|
+
# @return [Boolean] `true` when the dataset is just a local reference
|
2195
|
+
# object, `false` otherwise.
|
2196
|
+
#
|
2197
|
+
# @example
|
2198
|
+
# require "google/cloud/bigquery"
|
2199
|
+
#
|
2200
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2201
|
+
#
|
2202
|
+
# dataset = bigquery.dataset "my_dataset", skip_lookup: true
|
2203
|
+
#
|
2204
|
+
# dataset.reference? # true
|
2205
|
+
# dataset.reload!
|
2206
|
+
# dataset.reference? # false
|
2207
|
+
#
|
2208
|
+
def reference?
|
2209
|
+
@gapi.nil?
|
2210
|
+
end
|
2211
|
+
|
2212
|
+
##
|
2213
|
+
# Whether the dataset was created with a resource representation from
|
2214
|
+
# the BigQuery service.
|
2215
|
+
#
|
2216
|
+
# @return [Boolean] `true` when the dataset was created with a resource
|
2217
|
+
# representation, `false` otherwise.
|
2218
|
+
#
|
2219
|
+
# @example
|
2220
|
+
# require "google/cloud/bigquery"
|
2221
|
+
#
|
2222
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2223
|
+
#
|
2224
|
+
# dataset = bigquery.dataset "my_dataset", skip_lookup: true
|
2225
|
+
#
|
2226
|
+
# dataset.resource? # false
|
2227
|
+
# dataset.reload!
|
2228
|
+
# dataset.resource? # true
|
2229
|
+
#
|
2230
|
+
def resource?
|
2231
|
+
!@gapi.nil?
|
2232
|
+
end
|
2233
|
+
|
2234
|
+
##
|
2235
|
+
# Whether the dataset was created with a partial resource representation
|
2236
|
+
# from the BigQuery service by retrieval through {Project#datasets}.
|
2237
|
+
# See [Datasets: list
|
2238
|
+
# response](https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list#response)
|
2239
|
+
# for the contents of the partial representation. Accessing any
|
2240
|
+
# attribute outside of the partial representation will result in loading
|
2241
|
+
# the full representation.
|
2242
|
+
#
|
2243
|
+
# @return [Boolean] `true` when the dataset was created with a partial
|
2244
|
+
# resource representation, `false` otherwise.
|
2245
|
+
#
|
2246
|
+
# @example
|
2247
|
+
# require "google/cloud/bigquery"
|
2248
|
+
#
|
2249
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2250
|
+
#
|
2251
|
+
# dataset = bigquery.datasets.first
|
2252
|
+
#
|
2253
|
+
# dataset.resource_partial? # true
|
2254
|
+
# dataset.description # Loads the full resource.
|
2255
|
+
# dataset.resource_partial? # false
|
2256
|
+
#
|
2257
|
+
def resource_partial?
|
2258
|
+
@gapi.is_a? Google::Apis::BigqueryV2::DatasetList::Dataset
|
2259
|
+
end
|
2260
|
+
|
2261
|
+
##
|
2262
|
+
# Whether the dataset was created with a full resource representation
|
2263
|
+
# from the BigQuery service.
|
2264
|
+
#
|
2265
|
+
# @return [Boolean] `true` when the dataset was created with a full
|
2266
|
+
# resource representation, `false` otherwise.
|
2267
|
+
#
|
2268
|
+
# @example
|
2269
|
+
# require "google/cloud/bigquery"
|
2270
|
+
#
|
2271
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2272
|
+
#
|
2273
|
+
# dataset = bigquery.dataset "my_dataset"
|
2274
|
+
#
|
2275
|
+
# dataset.resource_full? # true
|
2276
|
+
#
|
2277
|
+
def resource_full?
|
2278
|
+
@gapi.is_a? Google::Apis::BigqueryV2::Dataset
|
2279
|
+
end
|
2280
|
+
|
2281
|
+
##
|
2282
|
+
# @private New Dataset from a Google API Client object.
|
2283
|
+
def self.from_gapi gapi, conn
|
2284
|
+
new.tap do |f|
|
2285
|
+
f.gapi = gapi
|
2286
|
+
f.service = conn
|
2287
|
+
end
|
2288
|
+
end
|
2289
|
+
|
2290
|
+
##
|
2291
|
+
# @private New lazy Dataset object without making an HTTP request, for use with the skip_lookup option.
|
2292
|
+
def self.new_reference project_id, dataset_id, service
|
2293
|
+
raise ArgumentError, "dataset_id is required" unless dataset_id
|
2294
|
+
new.tap do |b|
|
2295
|
+
reference_gapi = Google::Apis::BigqueryV2::DatasetReference.new \
|
2296
|
+
project_id: project_id, dataset_id: dataset_id
|
2297
|
+
b.service = service
|
2298
|
+
b.instance_variable_set :@reference, reference_gapi
|
2299
|
+
end
|
2300
|
+
end
|
2301
|
+
|
2302
|
+
##
|
2303
|
+
# Inserts data into the given table for near-immediate querying, without
|
2304
|
+
# the need to complete a load operation before the data can appear in
|
2305
|
+
# query results.
|
2306
|
+
#
|
2307
|
+
# Because BigQuery's streaming API is designed for high insertion rates,
|
2308
|
+
# modifications to the underlying table metadata are eventually
|
2309
|
+
# consistent when interacting with the streaming system. In most cases
|
2310
|
+
# metadata changes are propagated within minutes, but during this period
|
2311
|
+
# API responses may reflect the inconsistent state of the table.
|
2312
|
+
#
|
2313
|
+
# @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
|
2314
|
+
# Streaming Data Into BigQuery
|
2315
|
+
#
|
2316
|
+
# @see https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts
|
2317
|
+
# BigQuery Troubleshooting: Metadata errors for streaming inserts
|
2318
|
+
#
|
2319
|
+
# @param [String] table_id The ID of the destination table.
|
2320
|
+
# @param [Hash, Array<Hash>] rows A hash object or array of hash objects
|
2321
|
+
# containing the data. Required.
|
2322
|
+
# @param [Array<String|Symbol>, Symbol] insert_ids A unique ID for each row. BigQuery uses this property to
|
2323
|
+
# detect duplicate insertion requests on a best-effort basis. For more information, see [data
|
2324
|
+
# consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency). Optional. If
|
2325
|
+
# not provided, the client library will assign a UUID to each row before the request is sent.
|
2326
|
+
#
|
2327
|
+
# The value `:skip` can be provided to skip the generation of IDs for all rows, or to skip the generation of an
|
2328
|
+
# ID for a specific row in the array.
|
2329
|
+
# @param [Boolean] skip_invalid Insert all valid rows of a request, even
|
2330
|
+
# if invalid rows exist. The default value is `false`, which causes
|
2331
|
+
# the entire request to fail if any invalid rows exist.
|
2332
|
+
# @param [Boolean] ignore_unknown Accept rows that contain values that
|
2333
|
+
# do not match the schema. The unknown values are ignored. Default is
|
2334
|
+
# false, which treats unknown values as errors.
|
2335
|
+
# @param [Boolean] autocreate Specifies whether the method should create
|
2336
|
+
# a new table with the given `table_id`, if no table is found for
|
2337
|
+
# `table_id`. The default value is false.
|
2338
|
+
#
|
2339
|
+
# @yield [table] a block for setting the table
|
2340
|
+
# @yieldparam [Google::Cloud::Bigquery::Table::Updater] table An updater
|
2341
|
+
# to set additional properties on the table in the API request to
|
2342
|
+
# create it. Only used when `autocreate` is set and the table does not
|
2343
|
+
# already exist.
|
2344
|
+
#
|
2345
|
+
# @return [Google::Cloud::Bigquery::InsertResponse] An insert response
|
2346
|
+
# object.
|
2347
|
+
#
|
2348
|
+
# @example
|
2349
|
+
# require "google/cloud/bigquery"
|
2350
|
+
#
|
2351
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2352
|
+
# dataset = bigquery.dataset "my_dataset"
|
2353
|
+
#
|
2354
|
+
# rows = [
|
2355
|
+
# { "first_name" => "Alice", "age" => 21 },
|
2356
|
+
# { "first_name" => "Bob", "age" => 22 }
|
2357
|
+
# ]
|
2358
|
+
# dataset.insert "my_table", rows
|
2359
|
+
#
|
2360
|
+
# @example Avoid retrieving the dataset with `skip_lookup`:
|
2361
|
+
# require "google/cloud/bigquery"
|
2362
|
+
#
|
2363
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2364
|
+
#
|
2365
|
+
# dataset = bigquery.dataset "my_dataset", skip_lookup: true
|
2366
|
+
#
|
2367
|
+
# rows = [
|
2368
|
+
# { "first_name" => "Alice", "age" => 21 },
|
2369
|
+
# { "first_name" => "Bob", "age" => 22 }
|
2370
|
+
# ]
|
2371
|
+
# dataset.insert "my_table", rows
|
2372
|
+
#
|
2373
|
+
# @example Using `autocreate` to create a new table if none exists.
|
2374
|
+
# require "google/cloud/bigquery"
|
2375
|
+
#
|
2376
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2377
|
+
# dataset = bigquery.dataset "my_dataset"
|
2378
|
+
#
|
2379
|
+
# rows = [
|
2380
|
+
# { "first_name" => "Alice", "age" => 21 },
|
2381
|
+
# { "first_name" => "Bob", "age" => 22 }
|
2382
|
+
# ]
|
2383
|
+
# dataset.insert "my_table", rows, autocreate: true do |t|
|
2384
|
+
# t.schema.string "first_name", mode: :required
|
2385
|
+
# t.schema.integer "age", mode: :required
|
2386
|
+
# end
|
2387
|
+
#
|
2388
|
+
# @!group Data
|
2389
|
+
#
|
2390
|
+
def insert table_id, rows, insert_ids: nil, skip_invalid: nil, ignore_unknown: nil, autocreate: nil, &block
|
2391
|
+
rows = [rows] if rows.is_a? Hash
|
2392
|
+
raise ArgumentError, "No rows provided" if rows.empty?
|
2393
|
+
|
2394
|
+
insert_ids = Array.new(rows.count) { :skip } if insert_ids == :skip
|
2395
|
+
insert_ids = Array insert_ids
|
2396
|
+
if insert_ids.count.positive? && insert_ids.count != rows.count
|
2397
|
+
raise ArgumentError, "insert_ids must be the same size as rows"
|
2398
|
+
end
|
2399
|
+
|
2400
|
+
if autocreate
|
2401
|
+
insert_data_with_autocreate table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown,
|
2402
|
+
insert_ids: insert_ids, &block
|
2403
|
+
else
|
2404
|
+
insert_data table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown,
|
2405
|
+
insert_ids: insert_ids
|
2406
|
+
end
|
2407
|
+
end
|
2408
|
+
|
2409
|
+
##
|
2410
|
+
# Create an asynchronous inserter object used to insert rows in batches.
|
2411
|
+
#
|
2412
|
+
# @param [String] table_id The ID of the table to insert rows into.
|
2413
|
+
# @param [Boolean] skip_invalid Insert all valid rows of a request, even
|
2414
|
+
# if invalid rows exist. The default value is `false`, which causes
|
2415
|
+
# the entire request to fail if any invalid rows exist.
|
2416
|
+
# @param [Boolean] ignore_unknown Accept rows that contain values that
|
2417
|
+
# do not match the schema. The unknown values are ignored. Default is
|
2418
|
+
# false, which treats unknown values as errors.
|
2419
|
+
# @attr_reader [Integer] max_bytes The maximum size of rows to be
|
2420
|
+
# collected before the batch is published. Default is 10,000,000
|
2421
|
+
# (10MB).
|
2422
|
+
# @param [Integer] max_rows The maximum number of rows to be collected
|
2423
|
+
# before the batch is published. Default is 500.
|
2424
|
+
# @attr_reader [Numeric] interval The number of seconds to collect
|
2425
|
+
# messages before the batch is published. Default is 10.
|
2426
|
+
# @attr_reader [Numeric] threads The number of threads used to insert
|
2427
|
+
# batches of rows. Default is 4.
|
2428
|
+
# @yield [response] the callback for when a batch of rows is inserted
|
2429
|
+
# @yieldparam [Table::AsyncInserter::Result] result the result of the
|
2430
|
+
# asynchronous insert
|
2431
|
+
#
|
2432
|
+
# @return [Table::AsyncInserter] Returns an inserter object.
|
2433
|
+
#
|
2434
|
+
# @example
|
2435
|
+
# require "google/cloud/bigquery"
|
2436
|
+
#
|
2437
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2438
|
+
# dataset = bigquery.dataset "my_dataset"
|
2439
|
+
# inserter = dataset.insert_async "my_table" do |result|
|
2440
|
+
# if result.error?
|
2441
|
+
# log_error result.error
|
2442
|
+
# else
|
2443
|
+
# log_insert "inserted #{result.insert_count} rows " \
|
2444
|
+
# "with #{result.error_count} errors"
|
2445
|
+
# end
|
2446
|
+
# end
|
2447
|
+
#
|
2448
|
+
# rows = [
|
2449
|
+
# { "first_name" => "Alice", "age" => 21 },
|
2450
|
+
# { "first_name" => "Bob", "age" => 22 }
|
2451
|
+
# ]
|
2452
|
+
# inserter.insert rows
|
2453
|
+
#
|
2454
|
+
# inserter.stop.wait!
|
2455
|
+
#
|
2456
|
+
def insert_async table_id, skip_invalid: nil, ignore_unknown: nil, max_bytes: 10_000_000, max_rows: 500,
|
2457
|
+
interval: 10, threads: 4, &block
|
2458
|
+
ensure_service!
|
2459
|
+
|
2460
|
+
# Get table, don't use Dataset#table which handles NotFoundError
|
2461
|
+
gapi = service.get_table dataset_id, table_id
|
2462
|
+
table = Table.from_gapi gapi, service
|
2463
|
+
# Get the AsyncInserter from the table
|
2464
|
+
table.insert_async skip_invalid: skip_invalid,
|
2465
|
+
ignore_unknown: ignore_unknown,
|
2466
|
+
max_bytes: max_bytes, max_rows: max_rows,
|
2467
|
+
interval: interval, threads: threads, &block
|
2468
|
+
end
|
2469
|
+
|
2470
|
+
protected
|
2471
|
+
|
2472
|
+
def insert_data_with_autocreate table_id, rows, skip_invalid: nil, ignore_unknown: nil, insert_ids: nil
|
2473
|
+
insert_data table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown, insert_ids: insert_ids
|
2474
|
+
rescue Google::Cloud::NotFoundError
|
2475
|
+
sleep rand(1..60)
|
2476
|
+
begin
|
2477
|
+
create_table table_id do |tbl_updater|
|
2478
|
+
yield tbl_updater if block_given?
|
2479
|
+
end
|
2480
|
+
# rubocop:disable Lint/HandleExceptions
|
2481
|
+
rescue Google::Cloud::AlreadyExistsError
|
2482
|
+
end
|
2483
|
+
# rubocop:enable Lint/HandleExceptions
|
2484
|
+
|
2485
|
+
sleep 60
|
2486
|
+
retry
|
2487
|
+
end
|
2488
|
+
|
2489
|
+
def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil, insert_ids: nil
|
2490
|
+
rows = [rows] if rows.is_a? Hash
|
2491
|
+
raise ArgumentError, "No rows provided" if rows.empty?
|
2492
|
+
ensure_service!
|
2493
|
+
gapi = service.insert_tabledata dataset_id, table_id, rows, skip_invalid: skip_invalid,
|
2494
|
+
ignore_unknown: ignore_unknown,
|
2495
|
+
insert_ids: insert_ids
|
2496
|
+
InsertResponse.from_gapi rows, gapi
|
2497
|
+
end
|
2498
|
+
|
2499
|
+
##
|
2500
|
+
# Raise an error unless an active service is available.
|
2501
|
+
def ensure_service!
|
2502
|
+
raise "Must have active connection" unless service
|
2503
|
+
end
|
2504
|
+
|
2505
|
+
##
|
2506
|
+
# Ensures the Google::Apis::BigqueryV2::Dataset object has been loaded
|
2507
|
+
# from the service.
|
2508
|
+
def ensure_gapi!
|
2509
|
+
ensure_service!
|
2510
|
+
return unless reference?
|
2511
|
+
reload!
|
2512
|
+
end
|
2513
|
+
|
2514
|
+
##
|
2515
|
+
# Fetch gapi and memoize whether resource exists.
|
2516
|
+
def gapi_exists?
|
2517
|
+
reload!
|
2518
|
+
@exists = true
|
2519
|
+
rescue Google::Cloud::NotFoundError
|
2520
|
+
@exists = false
|
2521
|
+
end
|
2522
|
+
|
2523
|
+
def patch_gapi! *attributes
|
2524
|
+
return if attributes.empty?
|
2525
|
+
ensure_service!
|
2526
|
+
patch_args = Hash[attributes.map { |attr| [attr, @gapi.send(attr)] }]
|
2527
|
+
patch_gapi = Google::Apis::BigqueryV2::Dataset.new patch_args
|
2528
|
+
patch_gapi.etag = etag if etag
|
2529
|
+
@gapi = service.patch_dataset dataset_id, patch_gapi
|
2530
|
+
end
|
2531
|
+
|
2532
|
+
##
|
2533
|
+
# Load the complete representation of the dataset if it has been
|
2534
|
+
# only partially loaded by a request to the API list method.
|
2535
|
+
def ensure_full_data!
|
2536
|
+
reload! unless resource_full?
|
2537
|
+
end
|
2538
|
+
|
2539
|
+
def ensure_job_succeeded! job
|
2540
|
+
return unless job.failed?
|
2541
|
+
begin
|
2542
|
+
# raise to activate ruby exception cause handling
|
2543
|
+
raise job.gapi_error
|
2544
|
+
rescue StandardError => e
|
2545
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
2546
|
+
raise Google::Cloud::Error.from_error(e)
|
2547
|
+
end
|
2548
|
+
end
|
2549
|
+
|
2550
|
+
def load_job_gapi table_id, dryrun, job_id: nil, prefix: nil
|
2551
|
+
job_ref = service.job_ref_from job_id, prefix
|
2552
|
+
Google::Apis::BigqueryV2::Job.new(
|
2553
|
+
job_reference: job_ref,
|
2554
|
+
configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
|
2555
|
+
load: Google::Apis::BigqueryV2::JobConfigurationLoad.new(
|
2556
|
+
destination_table: Google::Apis::BigqueryV2::TableReference.new(
|
2557
|
+
project_id: @service.project,
|
2558
|
+
dataset_id: dataset_id,
|
2559
|
+
table_id: table_id
|
2560
|
+
)
|
2561
|
+
),
|
2562
|
+
dry_run: dryrun
|
2563
|
+
)
|
2564
|
+
)
|
2565
|
+
end
|
2566
|
+
|
2567
|
+
def load_job_csv_options! job, jagged_rows: nil, quoted_newlines: nil, delimiter: nil, quote: nil,
|
2568
|
+
skip_leading: nil, null_marker: nil
|
2569
|
+
job.jagged_rows = jagged_rows unless jagged_rows.nil?
|
2570
|
+
job.quoted_newlines = quoted_newlines unless quoted_newlines.nil?
|
2571
|
+
job.delimiter = delimiter unless delimiter.nil?
|
2572
|
+
job.null_marker = null_marker unless null_marker.nil?
|
2573
|
+
job.quote = quote unless quote.nil?
|
2574
|
+
job.skip_leading = skip_leading unless skip_leading.nil?
|
2575
|
+
end
|
2576
|
+
|
2577
|
+
def load_job_file_options! job, format: nil, projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
|
2578
|
+
encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
|
2579
|
+
skip_leading: nil, null_marker: nil
|
2580
|
+
job.format = format unless format.nil?
|
2581
|
+
job.projection_fields = projection_fields unless projection_fields.nil?
|
2582
|
+
job.encoding = encoding unless encoding.nil?
|
2583
|
+
job.ignore_unknown = ignore_unknown unless ignore_unknown.nil?
|
2584
|
+
job.max_bad_records = max_bad_records unless max_bad_records.nil?
|
2585
|
+
load_job_csv_options! job, jagged_rows: jagged_rows,
|
2586
|
+
quoted_newlines: quoted_newlines,
|
2587
|
+
delimiter: delimiter,
|
2588
|
+
quote: quote,
|
2589
|
+
skip_leading: skip_leading,
|
2590
|
+
null_marker: null_marker
|
2591
|
+
end
|
2592
|
+
|
2593
|
+
def load_job_updater table_id, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
2594
|
+
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil,
|
2595
|
+
max_bad_records: nil, quote: nil, skip_leading: nil, dryrun: nil, schema: nil, job_id: nil,
|
2596
|
+
prefix: nil, labels: nil, autodetect: nil, null_marker: nil
|
2597
|
+
new_job = load_job_gapi table_id, dryrun, job_id: job_id, prefix: prefix
|
2598
|
+
LoadJob::Updater.new(new_job).tap do |job|
|
2599
|
+
job.location = location if location # may be dataset reference
|
2600
|
+
job.create = create unless create.nil?
|
2601
|
+
job.write = write unless write.nil?
|
2602
|
+
job.schema = schema unless schema.nil?
|
2603
|
+
job.autodetect = autodetect unless autodetect.nil?
|
2604
|
+
job.labels = labels unless labels.nil?
|
2605
|
+
load_job_file_options! job, format: format,
|
2606
|
+
projection_fields: projection_fields,
|
2607
|
+
jagged_rows: jagged_rows,
|
2608
|
+
quoted_newlines: quoted_newlines,
|
2609
|
+
encoding: encoding,
|
2610
|
+
delimiter: delimiter,
|
2611
|
+
ignore_unknown: ignore_unknown,
|
2612
|
+
max_bad_records: max_bad_records,
|
2613
|
+
quote: quote,
|
2614
|
+
skip_leading: skip_leading,
|
2615
|
+
null_marker: null_marker
|
2616
|
+
end
|
2617
|
+
end
|
2618
|
+
|
2619
|
+
def load_storage urls, job_gapi
|
2620
|
+
# Convert to storage URL
|
2621
|
+
urls = [urls].flatten.map do |url|
|
2622
|
+
if url.respond_to? :to_gs_url
|
2623
|
+
url.to_gs_url
|
2624
|
+
elsif url.is_a? URI
|
2625
|
+
url.to_s
|
2626
|
+
else
|
2627
|
+
url
|
2628
|
+
end
|
2629
|
+
end
|
2630
|
+
|
2631
|
+
unless urls.nil?
|
2632
|
+
job_gapi.configuration.load.update! source_uris: urls
|
2633
|
+
if job_gapi.configuration.load.source_format.nil?
|
2634
|
+
source_format = Convert.derive_source_format_from_list urls
|
2635
|
+
job_gapi.configuration.load.source_format = source_format unless source_format.nil?
|
2636
|
+
end
|
2637
|
+
end
|
2638
|
+
|
2639
|
+
gapi = service.load_table_gs_url job_gapi
|
2640
|
+
Job.from_gapi gapi, service
|
2641
|
+
end
|
2642
|
+
|
2643
|
+
def load_local file, job_gapi
|
2644
|
+
path = Pathname(file).to_path
|
2645
|
+
if job_gapi.configuration.load.source_format.nil?
|
2646
|
+
source_format = Convert.derive_source_format path
|
2647
|
+
job_gapi.configuration.load.source_format = source_format unless source_format.nil?
|
2648
|
+
end
|
2649
|
+
|
2650
|
+
gapi = service.load_table_file file, job_gapi
|
2651
|
+
Job.from_gapi gapi, service
|
2652
|
+
end
|
2653
|
+
|
2654
|
+
def load_local_or_uri file, updater
|
2655
|
+
job_gapi = updater.to_gapi
|
2656
|
+
job = if local_file? file
|
2657
|
+
load_local file, job_gapi
|
2658
|
+
else
|
2659
|
+
load_storage file, job_gapi
|
2660
|
+
end
|
2661
|
+
job
|
2662
|
+
end
|
2663
|
+
|
2664
|
+
def storage_url? files
|
2665
|
+
[files].flatten.all? do |file|
|
2666
|
+
file.respond_to?(:to_gs_url) ||
|
2667
|
+
(file.respond_to?(:to_str) && file.to_str.downcase.start_with?("gs://")) ||
|
2668
|
+
(file.is_a?(URI) && file.to_s.downcase.start_with?("gs://"))
|
2669
|
+
end
|
2670
|
+
end
|
2671
|
+
|
2672
|
+
def local_file? file
|
2673
|
+
::File.file? file
|
2674
|
+
rescue StandardError
|
2675
|
+
false
|
2676
|
+
end
|
2677
|
+
|
2678
|
+
def udfs_gapi array_or_str
|
2679
|
+
return [] if array_or_str.nil?
|
2680
|
+
Array(array_or_str).map do |uri_or_code|
|
2681
|
+
resource = Google::Apis::BigqueryV2::UserDefinedFunctionResource.new
|
2682
|
+
if uri_or_code.start_with? "gs://"
|
2683
|
+
resource.resource_uri = uri_or_code
|
2684
|
+
else
|
2685
|
+
resource.inline_code = uri_or_code
|
2686
|
+
end
|
2687
|
+
resource
|
2688
|
+
end
|
2689
|
+
end
|
2690
|
+
|
2691
|
+
##
|
2692
|
+
# Yielded to a block to accumulate changes for a create request. See {Project#create_dataset}.
|
2693
|
+
class Updater < Dataset
|
2694
|
+
##
|
2695
|
+
# @private A list of attributes that were updated.
|
2696
|
+
attr_reader :updates
|
2697
|
+
|
2698
|
+
##
|
2699
|
+
# @private Create an Updater object.
|
2700
|
+
def initialize gapi
|
2701
|
+
@updates = []
|
2702
|
+
@gapi = gapi
|
2703
|
+
end
|
2704
|
+
|
2705
|
+
def access
|
2706
|
+
# TODO: make sure to call ensure_full_data! on Dataset#update
|
2707
|
+
@access ||= Access.from_gapi @gapi
|
2708
|
+
if block_given?
|
2709
|
+
yield @access
|
2710
|
+
check_for_mutated_access!
|
2711
|
+
end
|
2712
|
+
# Same as Dataset#access, but not frozen
|
2713
|
+
@access
|
2714
|
+
end
|
2715
|
+
|
2716
|
+
# rubocop:disable Style/MethodDefParentheses
|
2717
|
+
|
2718
|
+
##
|
2719
|
+
# @raise [RuntimeError] not implemented
|
2720
|
+
def delete(*)
|
2721
|
+
raise "not implemented in #{self.class}"
|
2722
|
+
end
|
2723
|
+
|
2724
|
+
##
|
2725
|
+
# @raise [RuntimeError] not implemented
|
2726
|
+
def create_table(*)
|
2727
|
+
raise "not implemented in #{self.class}"
|
2728
|
+
end
|
2729
|
+
|
2730
|
+
##
|
2731
|
+
# @raise [RuntimeError] not implemented
|
2732
|
+
def create_view(*)
|
2733
|
+
raise "not implemented in #{self.class}"
|
2734
|
+
end
|
2735
|
+
|
2736
|
+
##
|
2737
|
+
# @raise [RuntimeError] not implemented
|
2738
|
+
def table(*)
|
2739
|
+
raise "not implemented in #{self.class}"
|
2740
|
+
end
|
2741
|
+
|
2742
|
+
##
|
2743
|
+
# @raise [RuntimeError] not implemented
|
2744
|
+
def tables(*)
|
2745
|
+
raise "not implemented in #{self.class}"
|
2746
|
+
end
|
2747
|
+
|
2748
|
+
##
|
2749
|
+
# @raise [RuntimeError] not implemented
|
2750
|
+
def model(*)
|
2751
|
+
raise "not implemented in #{self.class}"
|
2752
|
+
end
|
2753
|
+
|
2754
|
+
##
|
2755
|
+
# @raise [RuntimeError] not implemented
|
2756
|
+
def models(*)
|
2757
|
+
raise "not implemented in #{self.class}"
|
2758
|
+
end
|
2759
|
+
|
2760
|
+
##
|
2761
|
+
# @raise [RuntimeError] not implemented
|
2762
|
+
def create_routine(*)
|
2763
|
+
raise "not implemented in #{self.class}"
|
2764
|
+
end
|
2765
|
+
|
2766
|
+
##
|
2767
|
+
# @raise [RuntimeError] not implemented
|
2768
|
+
def routine(*)
|
2769
|
+
raise "not implemented in #{self.class}"
|
2770
|
+
end
|
2771
|
+
|
2772
|
+
##
|
2773
|
+
# @raise [RuntimeError] not implemented
|
2774
|
+
def routines(*)
|
2775
|
+
raise "not implemented in #{self.class}"
|
2776
|
+
end
|
2777
|
+
|
2778
|
+
##
|
2779
|
+
# @raise [RuntimeError] not implemented
|
2780
|
+
def query_job(*)
|
2781
|
+
raise "not implemented in #{self.class}"
|
2782
|
+
end
|
2783
|
+
|
2784
|
+
##
|
2785
|
+
# @raise [RuntimeError] not implemented
|
2786
|
+
def query(*)
|
2787
|
+
raise "not implemented in #{self.class}"
|
2788
|
+
end
|
2789
|
+
|
2790
|
+
##
|
2791
|
+
# @raise [RuntimeError] not implemented
|
2792
|
+
def external(*)
|
2793
|
+
raise "not implemented in #{self.class}"
|
2794
|
+
end
|
2795
|
+
|
2796
|
+
##
|
2797
|
+
# @raise [RuntimeError] not implemented
|
2798
|
+
def load_job(*)
|
2799
|
+
raise "not implemented in #{self.class}"
|
2800
|
+
end
|
2801
|
+
|
2802
|
+
##
|
2803
|
+
# @raise [RuntimeError] not implemented
|
2804
|
+
def load(*)
|
2805
|
+
raise "not implemented in #{self.class}"
|
2806
|
+
end
|
2807
|
+
|
2808
|
+
##
|
2809
|
+
# @raise [RuntimeError] not implemented
|
2810
|
+
def reload!
|
2811
|
+
raise "not implemented in #{self.class}"
|
2812
|
+
end
|
2813
|
+
alias refresh! reload!
|
2814
|
+
|
2815
|
+
# rubocop:enable Style/MethodDefParentheses
|
2816
|
+
|
2817
|
+
##
|
2818
|
+
# @private Make sure any access changes are saved
|
2819
|
+
def check_for_mutated_access!
|
2820
|
+
return if @access.nil?
|
2821
|
+
return unless @access.changed?
|
2822
|
+
@gapi.update! access: @access.to_gapi
|
2823
|
+
patch_gapi! :access
|
2824
|
+
end
|
2825
|
+
|
2826
|
+
##
|
2827
|
+
# @private
|
2828
|
+
def to_gapi
|
2829
|
+
check_for_mutated_access!
|
2830
|
+
@gapi
|
2831
|
+
end
|
2832
|
+
|
2833
|
+
protected
|
2834
|
+
|
2835
|
+
##
|
2836
|
+
# Queue up all the updates instead of making them.
|
2837
|
+
def patch_gapi! attribute
|
2838
|
+
@updates << attribute
|
2839
|
+
@updates.uniq!
|
2840
|
+
end
|
2841
|
+
end
|
2842
|
+
end
|
2843
|
+
end
|
2844
|
+
end
|
2845
|
+
end
|