google-cloud-bigquery 1.21.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +16 -0
  3. data/AUTHENTICATION.md +158 -0
  4. data/CHANGELOG.md +397 -0
  5. data/CODE_OF_CONDUCT.md +40 -0
  6. data/CONTRIBUTING.md +188 -0
  7. data/LICENSE +201 -0
  8. data/LOGGING.md +27 -0
  9. data/OVERVIEW.md +463 -0
  10. data/TROUBLESHOOTING.md +31 -0
  11. data/lib/google-cloud-bigquery.rb +139 -0
  12. data/lib/google/cloud/bigquery.rb +145 -0
  13. data/lib/google/cloud/bigquery/argument.rb +197 -0
  14. data/lib/google/cloud/bigquery/convert.rb +383 -0
  15. data/lib/google/cloud/bigquery/copy_job.rb +316 -0
  16. data/lib/google/cloud/bigquery/credentials.rb +50 -0
  17. data/lib/google/cloud/bigquery/data.rb +526 -0
  18. data/lib/google/cloud/bigquery/dataset.rb +2845 -0
  19. data/lib/google/cloud/bigquery/dataset/access.rb +1021 -0
  20. data/lib/google/cloud/bigquery/dataset/list.rb +162 -0
  21. data/lib/google/cloud/bigquery/encryption_configuration.rb +123 -0
  22. data/lib/google/cloud/bigquery/external.rb +2432 -0
  23. data/lib/google/cloud/bigquery/extract_job.rb +368 -0
  24. data/lib/google/cloud/bigquery/insert_response.rb +180 -0
  25. data/lib/google/cloud/bigquery/job.rb +657 -0
  26. data/lib/google/cloud/bigquery/job/list.rb +162 -0
  27. data/lib/google/cloud/bigquery/load_job.rb +1704 -0
  28. data/lib/google/cloud/bigquery/model.rb +740 -0
  29. data/lib/google/cloud/bigquery/model/list.rb +164 -0
  30. data/lib/google/cloud/bigquery/project.rb +1655 -0
  31. data/lib/google/cloud/bigquery/project/list.rb +161 -0
  32. data/lib/google/cloud/bigquery/query_job.rb +1695 -0
  33. data/lib/google/cloud/bigquery/routine.rb +1108 -0
  34. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  35. data/lib/google/cloud/bigquery/schema.rb +564 -0
  36. data/lib/google/cloud/bigquery/schema/field.rb +668 -0
  37. data/lib/google/cloud/bigquery/service.rb +589 -0
  38. data/lib/google/cloud/bigquery/standard_sql.rb +495 -0
  39. data/lib/google/cloud/bigquery/table.rb +3340 -0
  40. data/lib/google/cloud/bigquery/table/async_inserter.rb +520 -0
  41. data/lib/google/cloud/bigquery/table/list.rb +172 -0
  42. data/lib/google/cloud/bigquery/time.rb +65 -0
  43. data/lib/google/cloud/bigquery/version.rb +22 -0
  44. metadata +297 -0
@@ -0,0 +1,2845 @@
1
+ # Copyright 2015 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "json"
17
+ require "google/cloud/errors"
18
+ require "google/cloud/bigquery/service"
19
+ require "google/cloud/bigquery/table"
20
+ require "google/cloud/bigquery/model"
21
+ require "google/cloud/bigquery/routine"
22
+ require "google/cloud/bigquery/external"
23
+ require "google/cloud/bigquery/dataset/list"
24
+ require "google/cloud/bigquery/dataset/access"
25
+ require "google/cloud/bigquery/convert"
26
+ require "google/apis/bigquery_v2"
27
+
28
+ module Google
29
+ module Cloud
30
+ module Bigquery
31
+ ##
32
+ # # Dataset
33
+ #
34
+ # Represents a Dataset. A dataset is a grouping mechanism that holds zero
35
+ # or more tables. Datasets are the lowest level unit of access control;
36
+ # you cannot control access at the table level. A dataset is contained
37
+ # within a specific project.
38
+ #
39
+ # @example
40
+ # require "google/cloud/bigquery"
41
+ #
42
+ # bigquery = Google::Cloud::Bigquery.new
43
+ #
44
+ # dataset = bigquery.create_dataset "my_dataset",
45
+ # name: "My Dataset",
46
+ # description: "This is my Dataset"
47
+ #
48
+ class Dataset
49
+ ##
50
+ # @private The Connection object.
51
+ attr_accessor :service
52
+
53
+ ##
54
+ # @private The Google API Client object.
55
+ attr_accessor :gapi
56
+
57
+ ##
58
+ # @private A Google API Client Dataset Reference object.
59
+ attr_reader :reference
60
+
61
+ ##
62
+ # @private Create an empty Dataset object.
63
+ def initialize
64
+ @service = nil
65
+ @gapi = nil
66
+ @reference = nil
67
+ end
68
+
69
+ ##
70
+ # A unique ID for this dataset, without the project name.
71
+ #
72
+ # @return [String] The ID must contain only letters (a-z, A-Z), numbers
73
+ # (0-9), or underscores (_). The maximum length is 1,024 characters.
74
+ #
75
+ # @!group Attributes
76
+ #
77
+ def dataset_id
78
+ return reference.dataset_id if reference?
79
+ @gapi.dataset_reference.dataset_id
80
+ end
81
+
82
+ ##
83
+ # The ID of the project containing this dataset.
84
+ #
85
+ # @return [String] The project ID.
86
+ #
87
+ # @!group Attributes
88
+ #
89
+ def project_id
90
+ return reference.project_id if reference?
91
+ @gapi.dataset_reference.project_id
92
+ end
93
+
94
+ ##
95
+ # @private
96
+ # The gapi fragment containing the Project ID and Dataset ID as a
97
+ # camel-cased hash.
98
+ def dataset_ref
99
+ dataset_ref = reference? ? reference : @gapi.dataset_reference
100
+ dataset_ref = dataset_ref.to_h if dataset_ref.respond_to? :to_h
101
+ dataset_ref
102
+ end
103
+
104
+ ##
105
+ # A descriptive name for the dataset.
106
+ #
107
+ # @return [String, nil] The friendly name, or `nil` if the object is
108
+ # a reference (see {#reference?}).
109
+ #
110
+ # @!group Attributes
111
+ #
112
+ def name
113
+ return nil if reference?
114
+ @gapi.friendly_name
115
+ end
116
+
117
+ ##
118
+ # Updates the descriptive name for the dataset.
119
+ #
120
+ # If the dataset is not a full resource representation (see
121
+ # {#resource_full?}), the full representation will be retrieved before
122
+ # the update to comply with ETag-based optimistic concurrency control.
123
+ #
124
+ # @param [String] new_name The new friendly name, or `nil` if the object
125
+ # is a reference (see {#reference?}).
126
+ #
127
+ # @!group Attributes
128
+ #
129
+ def name= new_name
130
+ reload! unless resource_full?
131
+ @gapi.update! friendly_name: new_name
132
+ patch_gapi! :friendly_name
133
+ end
134
+
135
+ ##
136
+ # The ETag hash of the dataset.
137
+ #
138
+ # @return [String, nil] The ETag hash, or `nil` if the object is a
139
+ # reference (see {#reference?}).
140
+ #
141
+ # @!group Attributes
142
+ #
143
+ def etag
144
+ return nil if reference?
145
+ ensure_full_data!
146
+ @gapi.etag
147
+ end
148
+
149
+ ##
150
+ # A URL that can be used to access the dataset using the REST API.
151
+ #
152
+ # @return [String, nil] A REST URL for the resource, or `nil` if the
153
+ # object is a reference (see {#reference?}).
154
+ #
155
+ # @!group Attributes
156
+ #
157
+ def api_url
158
+ return nil if reference?
159
+ ensure_full_data!
160
+ @gapi.self_link
161
+ end
162
+
163
+ ##
164
+ # A user-friendly description of the dataset.
165
+ #
166
+ # @return [String, nil] The description, or `nil` if the object is a
167
+ # reference (see {#reference?}).
168
+ #
169
+ # @!group Attributes
170
+ #
171
+ def description
172
+ return nil if reference?
173
+ ensure_full_data!
174
+ @gapi.description
175
+ end
176
+
177
+ ##
178
+ # Updates the user-friendly description of the dataset.
179
+ #
180
+ # If the dataset is not a full resource representation (see
181
+ # {#resource_full?}), the full representation will be retrieved before
182
+ # the update to comply with ETag-based optimistic concurrency control.
183
+ #
184
+ # @param [String] new_description The new description for the dataset.
185
+ #
186
+ # @!group Attributes
187
+ #
188
+ def description= new_description
189
+ reload! unless resource_full?
190
+ @gapi.update! description: new_description
191
+ patch_gapi! :description
192
+ end
193
+
194
+ ##
195
+ # The default lifetime of all tables in the dataset, in milliseconds.
196
+ #
197
+ # @return [Integer, nil] The default table expiration in milliseconds,
198
+ # or `nil` if not present or the object is a reference (see
199
+ # {#reference?}).
200
+ #
201
+ # @!group Attributes
202
+ #
203
+ def default_expiration
204
+ return nil if reference?
205
+ ensure_full_data!
206
+ begin
207
+ Integer @gapi.default_table_expiration_ms
208
+ rescue StandardError
209
+ nil
210
+ end
211
+ end
212
+
213
+ ##
214
+ # Updates the default lifetime of all tables in the dataset, in
215
+ # milliseconds.
216
+ #
217
+ # If the dataset is not a full resource representation (see
218
+ # {#resource_full?}), the full representation will be retrieved before
219
+ # the update to comply with ETag-based optimistic concurrency control.
220
+ #
221
+ # @param [Integer] new_default_expiration The new default table
222
+ # expiration in milliseconds.
223
+ #
224
+ # @!group Attributes
225
+ #
226
+ def default_expiration= new_default_expiration
227
+ reload! unless resource_full?
228
+ @gapi.update! default_table_expiration_ms: new_default_expiration
229
+ patch_gapi! :default_table_expiration_ms
230
+ end
231
+
232
+ ##
233
+ # The time when this dataset was created.
234
+ #
235
+ # @return [Time, nil] The creation time, or `nil` if not present or the
236
+ # object is a reference (see {#reference?}).
237
+ #
238
+ # @!group Attributes
239
+ #
240
+ def created_at
241
+ return nil if reference?
242
+ ensure_full_data!
243
+ Convert.millis_to_time @gapi.creation_time
244
+ end
245
+
246
+ ##
247
+ # The date when this dataset or any of its tables was last modified.
248
+ #
249
+ # @return [Time, nil] The last modified time, or `nil` if not present or
250
+ # the object is a reference (see {#reference?}).
251
+ #
252
+ # @!group Attributes
253
+ #
254
+ def modified_at
255
+ return nil if reference?
256
+ ensure_full_data!
257
+ Convert.millis_to_time @gapi.last_modified_time
258
+ end
259
+
260
+ ##
261
+ # The geographic location where the dataset should reside. Possible
262
+ # values include `EU` and `US`. The default value is `US`.
263
+ #
264
+ # @return [String, nil] The geographic location, or `nil` if the object
265
+ # is a reference (see {#reference?}).
266
+ #
267
+ # @!group Attributes
268
+ #
269
+ def location
270
+ return nil if reference?
271
+ ensure_full_data!
272
+ @gapi.location
273
+ end
274
+
275
+ ##
276
+ # A hash of user-provided labels associated with this dataset. Labels
277
+ # are used to organize and group datasets. See [Using
278
+ # Labels](https://cloud.google.com/bigquery/docs/labels).
279
+ #
280
+ # The returned hash is frozen and changes are not allowed. Use
281
+ # {#labels=} to replace the entire hash.
282
+ #
283
+ # @return [Hash<String, String>, nil] A hash containing key/value pairs,
284
+ # or `nil` if the object is a reference (see {#reference?}).
285
+ #
286
+ # @example
287
+ # require "google/cloud/bigquery"
288
+ #
289
+ # bigquery = Google::Cloud::Bigquery.new
290
+ # dataset = bigquery.dataset "my_dataset"
291
+ #
292
+ # labels = dataset.labels
293
+ # labels["department"] #=> "shipping"
294
+ #
295
+ # @!group Attributes
296
+ #
297
+ def labels
298
+ return nil if reference?
299
+ m = @gapi.labels
300
+ m = m.to_h if m.respond_to? :to_h
301
+ m.dup.freeze
302
+ end
303
+
304
+ ##
305
+ # Updates the hash of user-provided labels associated with this dataset.
306
+ # Labels are used to organize and group datasets. See [Using
307
+ # Labels](https://cloud.google.com/bigquery/docs/labels).
308
+ #
309
+ # If the dataset is not a full resource representation (see
310
+ # {#resource_full?}), the full representation will be retrieved before
311
+ # the update to comply with ETag-based optimistic concurrency control.
312
+ #
313
+ # @param [Hash<String, String>] labels A hash containing key/value
314
+ # pairs.
315
+ #
316
+ # * Label keys and values can be no longer than 63 characters.
317
+ # * Label keys and values can contain only lowercase letters, numbers,
318
+ # underscores, hyphens, and international characters.
319
+ # * Label keys and values cannot exceed 128 bytes in size.
320
+ # * Label keys must begin with a letter.
321
+ # * Label keys must be unique within a dataset.
322
+ #
323
+ # @example
324
+ # require "google/cloud/bigquery"
325
+ #
326
+ # bigquery = Google::Cloud::Bigquery.new
327
+ # dataset = bigquery.dataset "my_dataset"
328
+ #
329
+ # dataset.labels = { "department" => "shipping" }
330
+ #
331
+ # @!group Attributes
332
+ #
333
+ def labels= labels
334
+ reload! unless resource_full?
335
+ @gapi.labels = labels
336
+ patch_gapi! :labels
337
+ end
338
+
339
+ ##
340
+ # The {EncryptionConfiguration} object that represents the default
341
+ # encryption method for all tables and models in the dataset. Once this
342
+ # property is set, all newly-created partitioned tables and models in
343
+ # the dataset will have their encryption set to this value, unless table
344
+ # creation request (or query) overrides it.
345
+ #
346
+ # Present only if this dataset is using custom default encryption.
347
+ #
348
+ # @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
349
+ # Protecting Data with Cloud KMS Keys
350
+ #
351
+ # @return [EncryptionConfiguration, nil] The default encryption
352
+ # configuration.
353
+ #
354
+ # @!group Attributes
355
+ #
356
+ # @example
357
+ # require "google/cloud/bigquery"
358
+ #
359
+ # bigquery = Google::Cloud::Bigquery.new
360
+ # dataset = bigquery.dataset "my_dataset"
361
+ #
362
+ # encrypt_config = dataset.default_encryption
363
+ #
364
+ # @!group Attributes
365
+ #
366
+ def default_encryption
367
+ return nil if reference?
368
+ ensure_full_data!
369
+ return nil if @gapi.default_encryption_configuration.nil?
370
+ EncryptionConfiguration.from_gapi(@gapi.default_encryption_configuration).freeze
371
+ end
372
+
373
+ ##
374
+ # Set the {EncryptionConfiguration} object that represents the default
375
+ # encryption method for all tables and models in the dataset. Once this
376
+ # property is set, all newly-created partitioned tables and models in
377
+ # the dataset will have their encryption set to this value, unless table
378
+ # creation request (or query) overrides it.
379
+ #
380
+ # If the dataset is not a full resource representation (see
381
+ # {#resource_full?}), the full representation will be retrieved before
382
+ # the update to comply with ETag-based optimistic concurrency control.
383
+ #
384
+ # @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
385
+ # Protecting Data with Cloud KMS Keys
386
+ #
387
+ # @param [EncryptionConfiguration] value The new encryption config.
388
+ #
389
+ # @example
390
+ # require "google/cloud/bigquery"
391
+ #
392
+ # bigquery = Google::Cloud::Bigquery.new
393
+ # dataset = bigquery.dataset "my_dataset"
394
+ #
395
+ # key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
396
+ # encrypt_config = bigquery.encryption kms_key: key_name
397
+ #
398
+ # dataset.default_encryption = encrypt_config
399
+ #
400
+ # @!group Attributes
401
+ #
402
+ def default_encryption= value
403
+ ensure_full_data!
404
+ @gapi.default_encryption_configuration = value.to_gapi
405
+ patch_gapi! :default_encryption_configuration
406
+ end
407
+
408
+ ##
409
+ # Retrieves the access rules for a Dataset. The rules can be updated
410
+ # when passing a block, see {Dataset::Access} for all the methods
411
+ # available.
412
+ #
413
+ # If the dataset is not a full resource representation (see
414
+ # {#resource_full?}), the full representation will be retrieved before
415
+ # the update to comply with ETag-based optimistic concurrency control.
416
+ #
417
+ # @see https://cloud.google.com/bigquery/access-control BigQuery Access
418
+ # Control
419
+ #
420
+ # @yield [access] a block for setting rules
421
+ # @yieldparam [Dataset::Access] access the object accepting rules
422
+ #
423
+ # @return [Google::Cloud::Bigquery::Dataset::Access] The access object.
424
+ #
425
+ # @example
426
+ # require "google/cloud/bigquery"
427
+ #
428
+ # bigquery = Google::Cloud::Bigquery.new
429
+ # dataset = bigquery.dataset "my_dataset"
430
+ #
431
+ # access = dataset.access
432
+ # access.writer_user? "reader@example.com" #=> false
433
+ #
434
+ # @example Manage the access rules by passing a block:
435
+ # require "google/cloud/bigquery"
436
+ #
437
+ # bigquery = Google::Cloud::Bigquery.new
438
+ # dataset = bigquery.dataset "my_dataset"
439
+ #
440
+ # dataset.access do |access|
441
+ # access.add_owner_group "owners@example.com"
442
+ # access.add_writer_user "writer@example.com"
443
+ # access.remove_writer_user "readers@example.com"
444
+ # access.add_reader_special :all
445
+ # access.add_reader_view other_dataset_view_object
446
+ # end
447
+ #
448
+ def access
449
+ ensure_full_data!
450
+ reload! unless resource_full?
451
+ access_builder = Access.from_gapi @gapi
452
+ if block_given?
453
+ yield access_builder
454
+ if access_builder.changed?
455
+ @gapi.update! access: access_builder.to_gapi
456
+ patch_gapi! :access
457
+ end
458
+ end
459
+ access_builder.freeze
460
+ end
461
+
462
+ ##
463
+ # Permanently deletes the dataset. The dataset must be empty before it
464
+ # can be deleted unless the `force` option is set to `true`.
465
+ #
466
+ # @param [Boolean] force If `true`, delete all the tables in the
467
+ # dataset. If `false` and the dataset contains tables, the request
468
+ # will fail. Default is `false`.
469
+ #
470
+ # @return [Boolean] Returns `true` if the dataset was deleted.
471
+ #
472
+ # @example
473
+ # require "google/cloud/bigquery"
474
+ #
475
+ # bigquery = Google::Cloud::Bigquery.new
476
+ # dataset = bigquery.dataset "my_dataset"
477
+ #
478
+ # dataset.delete
479
+ #
480
+ # @!group Lifecycle
481
+ #
482
+ def delete force: nil
483
+ ensure_service!
484
+ service.delete_dataset dataset_id, force
485
+ # Set flag for #exists?
486
+ @exists = false
487
+ true
488
+ end
489
+
490
+ ##
491
+ # Creates a new table. If you are adapting existing code that was
492
+ # written for the [Rest API
493
+ # ](https://cloud.google.com/bigquery/docs/reference/v2/tables#resource),
494
+ # you can pass the table's schema as a hash (see example.)
495
+ #
496
+ # @param [String] table_id The ID of the table. The ID must contain only
497
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum
498
+ # length is 1,024 characters.
499
+ # @param [String] name A descriptive name for the table.
500
+ # @param [String] description A user-friendly description of the table.
501
+ # @yield [table] a block for setting the table
502
+ # @yieldparam [Google::Cloud::Bigquery::Table::Updater] table An updater
503
+ # to set additional properties on the table in the API request to
504
+ # create it.
505
+ #
506
+ # @return [Google::Cloud::Bigquery::Table] A new table object.
507
+ #
508
+ # @example
509
+ # require "google/cloud/bigquery"
510
+ #
511
+ # bigquery = Google::Cloud::Bigquery.new
512
+ # dataset = bigquery.dataset "my_dataset"
513
+ #
514
+ # table = dataset.create_table "my_table"
515
+ #
516
+ # @example You can also pass name and description options.
517
+ # require "google/cloud/bigquery"
518
+ #
519
+ # bigquery = Google::Cloud::Bigquery.new
520
+ # dataset = bigquery.dataset "my_dataset"
521
+ #
522
+ # table = dataset.create_table "my_table",
523
+ # name: "My Table",
524
+ # description: "A description of table."
525
+ #
526
+ # @example Or the table's schema can be configured with the block.
527
+ # require "google/cloud/bigquery"
528
+ #
529
+ # bigquery = Google::Cloud::Bigquery.new
530
+ # dataset = bigquery.dataset "my_dataset"
531
+ #
532
+ # table = dataset.create_table "my_table" do |t|
533
+ # t.schema.string "first_name", mode: :required
534
+ # t.schema.record "cities_lived", mode: :required do |s|
535
+ # s.string "place", mode: :required
536
+ # s.integer "number_of_years", mode: :required
537
+ # end
538
+ # end
539
+ #
540
+ # @example You can define the schema using a nested block.
541
+ # require "google/cloud/bigquery"
542
+ #
543
+ # bigquery = Google::Cloud::Bigquery.new
544
+ # dataset = bigquery.dataset "my_dataset"
545
+ #
546
+ # table = dataset.create_table "my_table" do |t|
547
+ # t.name = "My Table"
548
+ # t.description = "A description of my table."
549
+ # t.schema do |s|
550
+ # s.string "first_name", mode: :required
551
+ # s.record "cities_lived", mode: :repeated do |r|
552
+ # r.string "place", mode: :required
553
+ # r.integer "number_of_years", mode: :required
554
+ # end
555
+ # end
556
+ # end
557
+ #
558
+ # @example With time partitioning and clustering.
559
+ # require "google/cloud/bigquery"
560
+ #
561
+ # bigquery = Google::Cloud::Bigquery.new
562
+ # dataset = bigquery.dataset "my_dataset"
563
+ #
564
+ # table = dataset.create_table "my_table" do |t|
565
+ # t.schema do |schema|
566
+ # schema.timestamp "dob", mode: :required
567
+ # schema.string "first_name", mode: :required
568
+ # schema.string "last_name", mode: :required
569
+ # end
570
+ # t.time_partitioning_type = "DAY"
571
+ # t.time_partitioning_field = "dob"
572
+ # t.clustering_fields = ["last_name", "first_name"]
573
+ # end
574
+ #
575
+ # @example With range partitioning.
576
+ # require "google/cloud/bigquery"
577
+ #
578
+ # bigquery = Google::Cloud::Bigquery.new
579
+ # dataset = bigquery.dataset "my_dataset"
580
+ #
581
+ # table = dataset.create_table "my_table" do |t|
582
+ # t.schema do |schema|
583
+ # schema.integer "my_table_id", mode: :required
584
+ # schema.string "my_table_data", mode: :required
585
+ # end
586
+ # t.range_partitioning_field = "my_table_id"
587
+ # t.range_partitioning_start = 0
588
+ # t.range_partitioning_interval = 10
589
+ # t.range_partitioning_end = 100
590
+ # end
591
+ #
592
+ # @!group Table
593
+ #
594
+ def create_table table_id, name: nil, description: nil
595
+ ensure_service!
596
+ new_tb = Google::Apis::BigqueryV2::Table.new(
597
+ table_reference: Google::Apis::BigqueryV2::TableReference.new(
598
+ project_id: project_id, dataset_id: dataset_id,
599
+ table_id: table_id
600
+ )
601
+ )
602
+ updater = Table::Updater.new(new_tb).tap do |tb|
603
+ tb.name = name unless name.nil?
604
+ tb.description = description unless description.nil?
605
+ end
606
+
607
+ yield updater if block_given?
608
+
609
+ gapi = service.insert_table dataset_id, updater.to_gapi
610
+ Table.from_gapi gapi, service
611
+ end
612
+
613
+ ##
614
+ # Creates a new [view](https://cloud.google.com/bigquery/docs/views)
615
+ # table, which is a virtual table defined by the given SQL query.
616
+ #
617
+ # BigQuery's views are logical views, not materialized views, which
618
+ # means that the query that defines the view is re-executed every time
619
+ # the view is queried. Queries are billed according to the total amount
620
+ # of data in all table fields referenced directly or indirectly by the
621
+ # top-level query. (See {Table#view?} and {Table#query}.)
622
+ #
623
+ # @param [String] table_id The ID of the view table. The ID must contain
624
+ # only letters (a-z, A-Z), numbers (0-9), or underscores (_). The
625
+ # maximum length is 1,024 characters.
626
+ # @param [String] query The query that BigQuery executes when the view
627
+ # is referenced.
628
+ # @param [String] name A descriptive name for the table.
629
+ # @param [String] description A user-friendly description of the table.
630
+ # @param [Boolean] standard_sql Specifies whether to use BigQuery's
631
+ # [standard
632
+ # SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
633
+ # dialect. Optional. The default value is true.
634
+ # @param [Boolean] legacy_sql Specifies whether to use BigQuery's
635
+ # [legacy
636
+ # SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
637
+ # dialect. Optional. The default value is false.
638
+ # @param [Array<String>, String] udfs User-defined function resources
639
+ # used in a legacy SQL query. May be either a code resource to load from
640
+ # a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
641
+ # that contains code for a user-defined function (UDF). Providing an
642
+ # inline code resource is equivalent to providing a URI for a file
643
+ # containing the same code.
644
+ #
645
+ # This parameter is used for defining User Defined Function (UDF)
646
+ # resources only when using legacy SQL. Users of standard SQL should
647
+ # leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
648
+ # Routines API to define UDF resources.
649
+ #
650
+ # For additional information on migrating, see: [Migrating to
651
+ # standard SQL - Differences in user-defined JavaScript
652
+ # functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
653
+ #
654
+ # @return [Google::Cloud::Bigquery::Table] A new table object.
655
+ #
656
+ # @example
657
+ # require "google/cloud/bigquery"
658
+ #
659
+ # bigquery = Google::Cloud::Bigquery.new
660
+ # dataset = bigquery.dataset "my_dataset"
661
+ #
662
+ # view = dataset.create_view "my_view",
663
+ # "SELECT name, age FROM proj.dataset.users"
664
+ #
665
+ # @example A name and description can be provided:
666
+ # require "google/cloud/bigquery"
667
+ #
668
+ # bigquery = Google::Cloud::Bigquery.new
669
+ # dataset = bigquery.dataset "my_dataset"
670
+ #
671
+ # view = dataset.create_view "my_view",
672
+ # "SELECT name, age FROM proj.dataset.users",
673
+ # name: "My View", description: "This is my view"
674
+ #
675
+ # @!group Table
676
+ #
677
+ def create_view table_id, query, name: nil, description: nil,
678
+ standard_sql: nil, legacy_sql: nil, udfs: nil
679
+ use_legacy_sql = Convert.resolve_legacy_sql standard_sql, legacy_sql
680
+ new_view_opts = {
681
+ table_reference: Google::Apis::BigqueryV2::TableReference.new(
682
+ project_id: project_id,
683
+ dataset_id: dataset_id,
684
+ table_id: table_id
685
+ ),
686
+ friendly_name: name,
687
+ description: description,
688
+ view: Google::Apis::BigqueryV2::ViewDefinition.new(
689
+ query: query,
690
+ use_legacy_sql: use_legacy_sql,
691
+ user_defined_function_resources: udfs_gapi(udfs)
692
+ )
693
+ }.delete_if { |_, v| v.nil? }
694
+ new_view = Google::Apis::BigqueryV2::Table.new new_view_opts
695
+
696
+ gapi = service.insert_table dataset_id, new_view
697
+ Table.from_gapi gapi, service
698
+ end
699
+
700
+ ##
701
+ # Retrieves an existing table by ID.
702
+ #
703
+ # @param [String] table_id The ID of a table.
704
+ # @param [Boolean] skip_lookup Optionally create just a local reference
705
+ # object without verifying that the resource exists on the BigQuery
706
+ # service. Calls made on this object will raise errors if the resource
707
+ # does not exist. Default is `false`. Optional.
708
+ #
709
+ # @return [Google::Cloud::Bigquery::Table, nil] Returns `nil` if the
710
+ # table does not exist.
711
+ #
712
+ # @example
713
+ # require "google/cloud/bigquery"
714
+ #
715
+ # bigquery = Google::Cloud::Bigquery.new
716
+ # dataset = bigquery.dataset "my_dataset"
717
+ #
718
+ # table = dataset.table "my_table"
719
+ # puts table.name
720
+ #
721
+ # @example Avoid retrieving the table resource with `skip_lookup`:
722
+ # require "google/cloud/bigquery"
723
+ #
724
+ # bigquery = Google::Cloud::Bigquery.new
725
+ #
726
+ # dataset = bigquery.dataset "my_dataset"
727
+ #
728
+ # table = dataset.table "my_table", skip_lookup: true
729
+ #
730
+ # @!group Table
731
+ #
732
+ def table table_id, skip_lookup: nil
733
+ ensure_service!
734
+ return Table.new_reference project_id, dataset_id, table_id, service if skip_lookup
735
+ gapi = service.get_table dataset_id, table_id
736
+ Table.from_gapi gapi, service
737
+ rescue Google::Cloud::NotFoundError
738
+ nil
739
+ end
740
+
741
+ ##
742
+ # Retrieves the list of tables belonging to the dataset.
743
+ #
744
+ # @param [String] token A previously-returned page token representing
745
+ # part of the larger set of results to view.
746
+ # @param [Integer] max Maximum number of tables to return.
747
+ #
748
+ # @return [Array<Google::Cloud::Bigquery::Table>] An array of tables
749
+ # (See {Google::Cloud::Bigquery::Table::List})
750
+ #
751
+ # @example
752
+ # require "google/cloud/bigquery"
753
+ #
754
+ # bigquery = Google::Cloud::Bigquery.new
755
+ # dataset = bigquery.dataset "my_dataset"
756
+ #
757
+ # tables = dataset.tables
758
+ # tables.each do |table|
759
+ # puts table.name
760
+ # end
761
+ #
762
+ # @example Retrieve all tables: (See {Table::List#all})
763
+ # require "google/cloud/bigquery"
764
+ #
765
+ # bigquery = Google::Cloud::Bigquery.new
766
+ # dataset = bigquery.dataset "my_dataset"
767
+ #
768
+ # tables = dataset.tables
769
+ # tables.all do |table|
770
+ # puts table.name
771
+ # end
772
+ #
773
+ # @!group Table
774
+ #
775
+ def tables token: nil, max: nil
776
+ ensure_service!
777
+ gapi = service.list_tables dataset_id, token: token, max: max
778
+ Table::List.from_gapi gapi, service, dataset_id, max
779
+ end
780
+
781
+ ##
782
+ # Retrieves an existing model by ID.
783
+ #
784
+ # @param [String] model_id The ID of a model.
785
+ # @param [Boolean] skip_lookup Optionally create just a local reference
786
+ # object without verifying that the resource exists on the BigQuery
787
+ # service. Calls made on this object will raise errors if the resource
788
+ # does not exist. Default is `false`. Optional.
789
+ #
790
+ # @return [Google::Cloud::Bigquery::Model, nil] Returns `nil` if the
791
+ # model does not exist.
792
+ #
793
+ # @example
794
+ # require "google/cloud/bigquery"
795
+ #
796
+ # bigquery = Google::Cloud::Bigquery.new
797
+ # dataset = bigquery.dataset "my_dataset"
798
+ #
799
+ # model = dataset.model "my_model"
800
+ # puts model.model_id
801
+ #
802
+ # @example Avoid retrieving the model resource with `skip_lookup`:
803
+ # require "google/cloud/bigquery"
804
+ #
805
+ # bigquery = Google::Cloud::Bigquery.new
806
+ #
807
+ # dataset = bigquery.dataset "my_dataset"
808
+ #
809
+ # model = dataset.model "my_model", skip_lookup: true
810
+ #
811
+ # @!group Model
812
+ #
813
+ def model model_id, skip_lookup: nil
814
+ ensure_service!
815
+ return Model.new_reference project_id, dataset_id, model_id, service if skip_lookup
816
+ gapi = service.get_model dataset_id, model_id
817
+ Model.from_gapi_json gapi, service
818
+ rescue Google::Cloud::NotFoundError
819
+ nil
820
+ end
821
+
822
+ ##
823
+ # Retrieves the list of models belonging to the dataset.
824
+ #
825
+ # @param [String] token A previously-returned page token representing
826
+ # part of the larger set of results to view.
827
+ # @param [Integer] max Maximum number of models to return.
828
+ #
829
+ # @return [Array<Google::Cloud::Bigquery::Model>] An array of models
830
+ # (See {Google::Cloud::Bigquery::Model::List})
831
+ #
832
+ # @example
833
+ # require "google/cloud/bigquery"
834
+ #
835
+ # bigquery = Google::Cloud::Bigquery.new
836
+ # dataset = bigquery.dataset "my_dataset"
837
+ #
838
+ # models = dataset.models
839
+ # models.each do |model|
840
+ # puts model.model_id
841
+ # end
842
+ #
843
+ # @example Retrieve all models: (See {Model::List#all})
844
+ # require "google/cloud/bigquery"
845
+ #
846
+ # bigquery = Google::Cloud::Bigquery.new
847
+ # dataset = bigquery.dataset "my_dataset"
848
+ #
849
+ # models = dataset.models
850
+ # models.all do |model|
851
+ # puts model.model_id
852
+ # end
853
+ #
854
+ # @!group Model
855
+ #
856
+ def models token: nil, max: nil
857
+ ensure_service!
858
+ gapi = service.list_models dataset_id, token: token, max: max
859
+ Model::List.from_gapi gapi, service, dataset_id, max
860
+ end
861
+
862
+ ##
863
+ # Creates a new routine. The following attributes may be set in the yielded block:
864
+ # {Routine::Updater#routine_type=}, {Routine::Updater#language=}, {Routine::Updater#arguments=},
865
+ # {Routine::Updater#return_type=}, {Routine::Updater#imported_libraries=}, {Routine::Updater#body=}, and
866
+ # {Routine::Updater#description=}.
867
+ #
868
+ # @param [String] routine_id The ID of the routine. The ID must contain only
869
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length
870
+ # is 256 characters.
871
+ # @yield [routine] A block for setting properties on the routine.
872
+ # @yieldparam [Google::Cloud::Bigquery::Routine::Updater] routine An updater to set additional properties on the
873
+ # routine.
874
+ #
875
+ # @return [Google::Cloud::Bigquery::Routine] A new routine object.
876
+ #
877
+ # @example
878
+ # require "google/cloud/bigquery"
879
+ #
880
+ # bigquery = Google::Cloud::Bigquery.new
881
+ # dataset = bigquery.dataset "my_dataset"
882
+ #
883
+ # routine = dataset.create_routine "my_routine" do |r|
884
+ # r.routine_type = "SCALAR_FUNCTION"
885
+ # r.language = "SQL"
886
+ # r.arguments = [
887
+ # Google::Cloud::Bigquery::Argument.new(name: "x", data_type: "INT64")
888
+ # ]
889
+ # r.body = "x * 3"
890
+ # r.description = "My routine description"
891
+ # end
892
+ #
893
+ # puts routine.routine_id
894
+ #
895
+ # @example Extended example:
896
+ # require "google/cloud/bigquery"
897
+ #
898
+ # bigquery = Google::Cloud::Bigquery.new
899
+ # dataset = bigquery.dataset "my_dataset"
900
+ # routine = dataset.create_routine "my_routine" do |r|
901
+ # r.routine_type = "SCALAR_FUNCTION"
902
+ # r.language = :SQL
903
+ # r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
904
+ # r.arguments = [
905
+ # Google::Cloud::Bigquery::Argument.new(
906
+ # name: "arr",
907
+ # argument_kind: "FIXED_TYPE",
908
+ # data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
909
+ # type_kind: "ARRAY",
910
+ # array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
911
+ # type_kind: "STRUCT",
912
+ # struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
913
+ # fields: [
914
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
915
+ # name: "name",
916
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
917
+ # ),
918
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
919
+ # name: "val",
920
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
921
+ # )
922
+ # ]
923
+ # )
924
+ # )
925
+ # )
926
+ # )
927
+ # ]
928
+ # end
929
+ #
930
+ # @!group Routine
931
+ #
932
+ def create_routine routine_id
933
+ ensure_service!
934
+ new_tb = Google::Apis::BigqueryV2::Routine.new(
935
+ routine_reference: Google::Apis::BigqueryV2::RoutineReference.new(
936
+ project_id: project_id, dataset_id: dataset_id, routine_id: routine_id
937
+ )
938
+ )
939
+ updater = Routine::Updater.new new_tb
940
+
941
+ yield updater if block_given?
942
+
943
+ gapi = service.insert_routine dataset_id, updater.to_gapi
944
+ Routine.from_gapi gapi, service
945
+ end
946
+
947
+ ##
948
+ # Retrieves an existing routine by ID.
949
+ #
950
+ # @param [String] routine_id The ID of a routine.
951
+ # @param [Boolean] skip_lookup Optionally create just a local reference
952
+ # object without verifying that the resource exists on the BigQuery
953
+ # service. Calls made on this object will raise errors if the resource
954
+ # does not exist. Default is `false`. Optional.
955
+ #
956
+ # @return [Google::Cloud::Bigquery::Routine, nil] Returns `nil` if the
957
+ # routine does not exist.
958
+ #
959
+ # @example
960
+ # require "google/cloud/bigquery"
961
+ #
962
+ # bigquery = Google::Cloud::Bigquery.new
963
+ # dataset = bigquery.dataset "my_dataset"
964
+ #
965
+ # routine = dataset.routine "my_routine"
966
+ # puts routine.routine_id
967
+ #
968
+ # @example Avoid retrieving the routine resource with `skip_lookup`:
969
+ # require "google/cloud/bigquery"
970
+ #
971
+ # bigquery = Google::Cloud::Bigquery.new
972
+ #
973
+ # dataset = bigquery.dataset "my_dataset"
974
+ #
975
+ # routine = dataset.routine "my_routine", skip_lookup: true
976
+ #
977
+ # @!group Routine
978
+ #
979
+ def routine routine_id, skip_lookup: nil
980
+ ensure_service!
981
+ return Routine.new_reference project_id, dataset_id, routine_id, service if skip_lookup
982
+ gapi = service.get_routine dataset_id, routine_id
983
+ Routine.from_gapi gapi, service
984
+ rescue Google::Cloud::NotFoundError
985
+ nil
986
+ end
987
+
988
+ ##
989
+ # Retrieves the list of routines belonging to the dataset.
990
+ #
991
+ # @param [String] token A previously-returned page token representing
992
+ # part of the larger set of results to view.
993
+ # @param [Integer] max Maximum number of routines to return.
994
+ # @param [String] filter If set, then only the routines matching this filter are returned. The current supported
995
+ # form is `routineType:`, with a {Routine#routine_type} enum value. Example: `routineType:SCALAR_FUNCTION`.
996
+ #
997
+ # @return [Array<Google::Cloud::Bigquery::Routine>] An array of routines
998
+ # (See {Google::Cloud::Bigquery::Routine::List})
999
+ #
1000
+ # @example
1001
+ # require "google/cloud/bigquery"
1002
+ #
1003
+ # bigquery = Google::Cloud::Bigquery.new
1004
+ # dataset = bigquery.dataset "my_dataset"
1005
+ #
1006
+ # routines = dataset.routines
1007
+ # routines.each do |routine|
1008
+ # puts routine.routine_id
1009
+ # end
1010
+ #
1011
+ # @example Retrieve all routines: (See {Routine::List#all})
1012
+ # require "google/cloud/bigquery"
1013
+ #
1014
+ # bigquery = Google::Cloud::Bigquery.new
1015
+ # dataset = bigquery.dataset "my_dataset"
1016
+ #
1017
+ # routines = dataset.routines
1018
+ # routines.all do |routine|
1019
+ # puts routine.routine_id
1020
+ # end
1021
+ #
1022
+ # @!group Routine
1023
+ #
1024
+ def routines token: nil, max: nil, filter: nil
1025
+ ensure_service!
1026
+ gapi = service.list_routines dataset_id, token: token, max: max, filter: filter
1027
+ Routine::List.from_gapi gapi, service, dataset_id, max, filter: filter
1028
+ end
1029
+
1030
+ ##
1031
+ # Queries data by creating a [query
1032
+ # job](https://cloud.google.com/bigquery/docs/query-overview#query_jobs).
1033
+ #
1034
+ # Sets the current dataset as the default dataset in the query. Useful
1035
+ # for using unqualified table names.
1036
+ #
1037
+ # The geographic location for the job ("US", "EU", etc.) can be set via
1038
+ # {QueryJob::Updater#location=} in a block passed to this method. If the
1039
+ # dataset is a full resource representation (see {#resource_full?}), the
1040
+ # location of the job will be automatically set to the location of the
1041
+ # dataset.
1042
+ #
1043
+ # @param [String] query A query string, following the BigQuery [query
1044
+ # syntax](https://cloud.google.com/bigquery/query-reference), of the
1045
+ # query to execute. Example: "SELECT count(f1) FROM
1046
+ # [myProjectId:myDatasetId.myTableId]".
1047
+ # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
1048
+ # either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
1049
+ # query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
1050
+ # use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
1051
+ # true.
1052
+ #
1053
+ # Ruby types are mapped to BigQuery types as follows:
1054
+ #
1055
+ # | BigQuery | Ruby | Notes |
1056
+ # |-------------|--------------------------------------|------------------------------------------------|
1057
+ # | `BOOL` | `true`/`false` | |
1058
+ # | `INT64` | `Integer` | |
1059
+ # | `FLOAT64` | `Float` | |
1060
+ # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
1061
+ # | `STRING` | `String` | |
1062
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
1063
+ # | `DATE` | `Date` | |
1064
+ # | `TIMESTAMP` | `Time` | |
1065
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
1066
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
1067
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
1068
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
1069
+ #
1070
+ # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
1071
+ # of each BigQuery data type, including allowed values.
1072
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always to
1073
+ # infer the right SQL type from a value in `params`. In these cases, `types` must be used to specify the SQL
1074
+ # type for these values.
1075
+ #
1076
+ # Must match the value type passed to `params`. This must be an `Array` when the query uses positional query
1077
+ # parameters. This must be an `Hash` when the query uses named query parameters. The values should be BigQuery
1078
+ # type codes from the following list:
1079
+ #
1080
+ # * `:BOOL`
1081
+ # * `:INT64`
1082
+ # * `:FLOAT64`
1083
+ # * `:NUMERIC`
1084
+ # * `:STRING`
1085
+ # * `:DATETIME`
1086
+ # * `:DATE`
1087
+ # * `:TIMESTAMP`
1088
+ # * `:TIME`
1089
+ # * `:BYTES`
1090
+ # * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
1091
+ # are specified as `[:INT64]`.
1092
+ # * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
1093
+ # match the `params` hash, and the values are the types value that matches the data.
1094
+ #
1095
+ # Types are optional.
1096
+ # @param [Hash<String|Symbol, External::DataSource>] external A Hash
1097
+ # that represents the mapping of the external tables to the table
1098
+ # names used in the SQL query. The hash keys are the table names, and
1099
+ # the hash values are the external table objects. See {Dataset#query}.
1100
+ # @param [String] priority Specifies a priority for the query. Possible
1101
+ # values include `INTERACTIVE` and `BATCH`. The default value is
1102
+ # `INTERACTIVE`.
1103
+ # @param [Boolean] cache Whether to look for the result in the query
1104
+ # cache. The query cache is a best-effort cache that will be flushed
1105
+ # whenever tables in the query are modified. The default value is
1106
+ # true. For more information, see [query
1107
+ # caching](https://developers.google.com/bigquery/querying-data).
1108
+ # @param [Table] table The destination table where the query results
1109
+ # should be stored. If not present, a new table will be created to
1110
+ # store the results.
1111
+ # @param [String] create Specifies whether the job is allowed to create
1112
+ # new tables. The default value is `needed`.
1113
+ #
1114
+ # The following values are supported:
1115
+ #
1116
+ # * `needed` - Create the table if it does not exist.
1117
+ # * `never` - The table must already exist. A 'notFound' error is
1118
+ # raised if the table does not exist.
1119
+ # @param [String] write Specifies the action that occurs if the
1120
+ # destination table already exists. The default value is `empty`.
1121
+ #
1122
+ # The following values are supported:
1123
+ #
1124
+ # * `truncate` - BigQuery overwrites the table data.
1125
+ # * `append` - BigQuery appends the data to the table.
1126
+ # * `empty` - A 'duplicate' error is returned in the job result if the
1127
+ # table exists and contains data.
1128
+ # @param [Boolean] dryrun If set to true, BigQuery doesn't run the job.
1129
+ # Instead, if the query is valid, BigQuery returns statistics about
1130
+ # the job such as how many bytes would be processed. If the query is
1131
+ # invalid, an error returns. The default value is false.
1132
+ # @param [Boolean] standard_sql Specifies whether to use BigQuery's
1133
+ # [standard
1134
+ # SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
1135
+ # dialect for this query. If set to true, the query will use standard
1136
+ # SQL rather than the [legacy
1137
+ # SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
1138
+ # dialect. Optional. The default value is true.
1139
+ # @param [Boolean] legacy_sql Specifies whether to use BigQuery's
1140
+ # [legacy
1141
+ # SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
1142
+ # dialect for this query. If set to false, the query will use
1143
+ # BigQuery's [standard
1144
+ # SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
1145
+ # dialect. Optional. The default value is false.
1146
+ # @param [Boolean] large_results This option is specific to Legacy SQL.
1147
+ # If `true`, allows the query to produce arbitrarily large result
1148
+ # tables at a slight cost in performance. Requires `table` parameter
1149
+ # to be set.
1150
+ # @param [Boolean] flatten This option is specific to Legacy SQL.
1151
+ # Flattens all nested and repeated fields in the query results. The
1152
+ # default value is `true`. `large_results` parameter must be `true` if
1153
+ # this is set to `false`.
1154
+ # @param [Integer] maximum_bytes_billed Limits the bytes billed for this
1155
+ # job. Queries that will have bytes billed beyond this limit will fail
1156
+ # (without incurring a charge). Optional. If unspecified, this will be
1157
+ # set to your project default.
1158
+ # @param [String] job_id A user-defined ID for the query job. The ID
1159
+ # must contain only letters (a-z, A-Z), numbers (0-9), underscores
1160
+ # (_), or dashes (-). The maximum length is 1,024 characters. If
1161
+ # `job_id` is provided, then `prefix` will not be used.
1162
+ #
1163
+ # See [Generating a job
1164
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
1165
+ # @param [String] prefix A string, usually human-readable, that will be
1166
+ # prepended to a generated value to produce a unique job ID. For
1167
+ # example, the prefix `daily_import_job_` can be given to generate a
1168
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
1169
+ # prefix must contain only letters (a-z, A-Z), numbers (0-9),
1170
+ # underscores (_), or dashes (-). The maximum length of the entire ID
1171
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
1172
+ # be used.
1173
+ # @param [Hash] labels A hash of user-provided labels associated with
1174
+ # the job. You can use these to organize and group your jobs. Label
1175
+ # keys and values can be no longer than 63 characters, can only
1176
+ # contain lowercase letters, numeric characters, underscores and
1177
+ # dashes. International characters are allowed. Label values are
1178
+ # optional. Label keys must start with a letter and each label in the
1179
+ # list must have a different key. See [Requirements for
1180
+ # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
1181
+ # @param [Array<String>, String] udfs User-defined function resources
1182
+ # used in a legacy SQL query. May be either a code resource to load from
1183
+ # a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
1184
+ # that contains code for a user-defined function (UDF). Providing an
1185
+ # inline code resource is equivalent to providing a URI for a file
1186
+ # containing the same code.
1187
+ #
1188
+ # This parameter is used for defining User Defined Function (UDF)
1189
+ # resources only when using legacy SQL. Users of standard SQL should
1190
+ # leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
1191
+ # Routines API to define UDF resources.
1192
+ #
1193
+ # For additional information on migrating, see: [Migrating to
1194
+ # standard SQL - Differences in user-defined JavaScript
1195
+ # functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
1196
+ # @param [Integer] maximum_billing_tier Deprecated: Change the billing
1197
+ # tier to allow high-compute queries.
1198
+ # @yield [job] a job configuration object
1199
+ # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
1200
+ # configuration object for setting additional options for the query.
1201
+ #
1202
+ # @return [Google::Cloud::Bigquery::QueryJob] A new query job object.
1203
+ #
1204
+ # @example Query using standard SQL:
1205
+ # require "google/cloud/bigquery"
1206
+ #
1207
+ # bigquery = Google::Cloud::Bigquery.new
1208
+ # dataset = bigquery.dataset "my_dataset"
1209
+ #
1210
+ # job = dataset.query_job "SELECT name FROM my_table"
1211
+ #
1212
+ # job.wait_until_done!
1213
+ # if !job.failed?
1214
+ # job.data.each do |row|
1215
+ # puts row[:name]
1216
+ # end
1217
+ # end
1218
+ #
1219
+ # @example Query using legacy SQL:
1220
+ # require "google/cloud/bigquery"
1221
+ #
1222
+ # bigquery = Google::Cloud::Bigquery.new
1223
+ # dataset = bigquery.dataset "my_dataset"
1224
+ #
1225
+ # job = dataset.query_job "SELECT name FROM my_table",
1226
+ # legacy_sql: true
1227
+ #
1228
+ # job.wait_until_done!
1229
+ # if !job.failed?
1230
+ # job.data.each do |row|
1231
+ # puts row[:name]
1232
+ # end
1233
+ # end
1234
+ #
1235
+ # @example Query using positional query parameters:
1236
+ # require "google/cloud/bigquery"
1237
+ #
1238
+ # bigquery = Google::Cloud::Bigquery.new
1239
+ # dataset = bigquery.dataset "my_dataset"
1240
+ #
1241
+ # job = dataset.query_job "SELECT name FROM my_table WHERE id = ?",
1242
+ # params: [1]
1243
+ #
1244
+ # job.wait_until_done!
1245
+ # if !job.failed?
1246
+ # job.data.each do |row|
1247
+ # puts row[:name]
1248
+ # end
1249
+ # end
1250
+ #
1251
+ # @example Query using named query parameters:
1252
+ # require "google/cloud/bigquery"
1253
+ #
1254
+ # bigquery = Google::Cloud::Bigquery.new
1255
+ # dataset = bigquery.dataset "my_dataset"
1256
+ #
1257
+ # job = dataset.query_job "SELECT name FROM my_table WHERE id = @id",
1258
+ # params: { id: 1 }
1259
+ #
1260
+ # job.wait_until_done!
1261
+ # if !job.failed?
1262
+ # job.data.each do |row|
1263
+ # puts row[:name]
1264
+ # end
1265
+ # end
1266
+ #
1267
+ # @example Query using named query parameters with types:
1268
+ # require "google/cloud/bigquery"
1269
+ #
1270
+ # bigquery = Google::Cloud::Bigquery.new
1271
+ # dataset = bigquery.dataset "my_dataset"
1272
+ #
1273
+ # job = dataset.query_job "SELECT name FROM my_table " \
1274
+ # "WHERE id IN UNNEST(@ids)",
1275
+ # params: { ids: [] },
1276
+ # types: { ids: [:INT64] }
1277
+ #
1278
+ # job.wait_until_done!
1279
+ # if !job.failed?
1280
+ # job.data.each do |row|
1281
+ # puts row[:name]
1282
+ # end
1283
+ # end
1284
+ #
1285
+ # @example Execute a DDL statement:
1286
+ # require "google/cloud/bigquery"
1287
+ #
1288
+ # bigquery = Google::Cloud::Bigquery.new
1289
+ #
1290
+ # job = bigquery.query_job "CREATE TABLE my_table (x INT64)"
1291
+ #
1292
+ # job.wait_until_done!
1293
+ # if !job.failed?
1294
+ # table_ref = job.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
1295
+ # end
1296
+ #
1297
+ # @example Execute a DML statement:
1298
+ # require "google/cloud/bigquery"
1299
+ #
1300
+ # bigquery = Google::Cloud::Bigquery.new
1301
+ #
1302
+ # job = bigquery.query_job "UPDATE my_table " \
1303
+ # "SET x = x + 1 " \
1304
+ # "WHERE x IS NOT NULL"
1305
+ #
1306
+ # job.wait_until_done!
1307
+ # if !job.failed?
1308
+ # puts job.num_dml_affected_rows
1309
+ # end
1310
+ #
1311
+ # @example Query using external data source, set destination:
1312
+ # require "google/cloud/bigquery"
1313
+ #
1314
+ # bigquery = Google::Cloud::Bigquery.new
1315
+ # dataset = bigquery.dataset "my_dataset"
1316
+ #
1317
+ # csv_url = "gs://bucket/path/to/data.csv"
1318
+ # csv_table = dataset.external csv_url do |csv|
1319
+ # csv.autodetect = true
1320
+ # csv.skip_leading_rows = 1
1321
+ # end
1322
+ #
1323
+ # job = dataset.query_job "SELECT * FROM my_ext_table" do |query|
1324
+ # query.external = { my_ext_table: csv_table }
1325
+ # query.table = dataset.table "my_table", skip_lookup: true
1326
+ # end
1327
+ #
1328
+ # job.wait_until_done!
1329
+ # if !job.failed?
1330
+ # job.data.each do |row|
1331
+ # puts row[:name]
1332
+ # end
1333
+ # end
1334
+ #
1335
+ # @!group Data
1336
+ #
1337
+ def query_job query, params: nil, types: nil, external: nil, priority: "INTERACTIVE", cache: true, table: nil,
1338
+ create: nil, write: nil, dryrun: nil, standard_sql: nil, legacy_sql: nil, large_results: nil,
1339
+ flatten: nil, maximum_billing_tier: nil, maximum_bytes_billed: nil, job_id: nil, prefix: nil,
1340
+ labels: nil, udfs: nil
1341
+ ensure_service!
1342
+ options = { params: params, types: types, external: external, priority: priority, cache: cache, table: table,
1343
+ create: create, write: write, dryrun: dryrun, standard_sql: standard_sql, legacy_sql: legacy_sql,
1344
+ large_results: large_results, flatten: flatten, maximum_billing_tier: maximum_billing_tier,
1345
+ maximum_bytes_billed: maximum_bytes_billed, job_id: job_id, prefix: prefix, labels: labels,
1346
+ udfs: udfs }
1347
+
1348
+ updater = QueryJob::Updater.from_options service, query, options
1349
+ updater.dataset = self
1350
+ updater.location = location if location # may be dataset reference
1351
+
1352
+ yield updater if block_given?
1353
+
1354
+ gapi = service.query_job updater.to_gapi
1355
+ Job.from_gapi gapi, service
1356
+ end
1357
+
1358
+ ##
1359
+ # Queries data and waits for the results. In this method, a {QueryJob}
1360
+ # is created and its results are saved to a temporary table, then read
1361
+ # from the table. Timeouts and transient errors are generally handled
1362
+ # as needed to complete the query. When used for executing DDL/DML
1363
+ # statements, this method does not return row data.
1364
+ #
1365
+ # Sets the current dataset as the default dataset in the query. Useful
1366
+ # for using unqualified table names.
1367
+ #
1368
+ # The geographic location for the job ("US", "EU", etc.) can be set via
1369
+ # {QueryJob::Updater#location=} in a block passed to this method. If the
1370
+ # dataset is a full resource representation (see {#resource_full?}), the
1371
+ # location of the job will be automatically set to the location of the
1372
+ # dataset.
1373
+ #
1374
+ # @see https://cloud.google.com/bigquery/querying-data Querying Data
1375
+ #
1376
+ # @param [String] query A query string, following the BigQuery [query
1377
+ # syntax](https://cloud.google.com/bigquery/query-reference), of the
1378
+ # query to execute. Example: "SELECT count(f1) FROM
1379
+ # [myProjectId:myDatasetId.myTableId]".
1380
+ # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
1381
+ # either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
1382
+ # query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
1383
+ # use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
1384
+ # true.
1385
+ #
1386
+ # Ruby types are mapped to BigQuery types as follows:
1387
+ #
1388
+ # | BigQuery | Ruby | Notes |
1389
+ # |-------------|--------------------------------------|------------------------------------------------|
1390
+ # | `BOOL` | `true`/`false` | |
1391
+ # | `INT64` | `Integer` | |
1392
+ # | `FLOAT64` | `Float` | |
1393
+ # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
1394
+ # | `STRING` | `String` | |
1395
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
1396
+ # | `DATE` | `Date` | |
1397
+ # | `TIMESTAMP` | `Time` | |
1398
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
1399
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
1400
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
1401
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
1402
+ #
1403
+ # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
1404
+ # of each BigQuery data type, including allowed values.
1405
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always to
1406
+ # infer the right SQL type from a value in `params`. In these cases, `types` must be used to specify the SQL
1407
+ # type for these values.
1408
+ #
1409
+ # Must match the value type passed to `params`. This must be an `Array` when the query uses positional query
1410
+ # parameters. This must be an `Hash` when the query uses named query parameters. The values should be BigQuery
1411
+ # type codes from the following list:
1412
+ #
1413
+ # * `:BOOL`
1414
+ # * `:INT64`
1415
+ # * `:FLOAT64`
1416
+ # * `:NUMERIC`
1417
+ # * `:STRING`
1418
+ # * `:DATETIME`
1419
+ # * `:DATE`
1420
+ # * `:TIMESTAMP`
1421
+ # * `:TIME`
1422
+ # * `:BYTES`
1423
+ # * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
1424
+ # are specified as `[:INT64]`.
1425
+ # * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
1426
+ # match the `params` hash, and the values are the types value that matches the data.
1427
+ #
1428
+ # Types are optional.
1429
+ # @param [Hash<String|Symbol, External::DataSource>] external A Hash
1430
+ # that represents the mapping of the external tables to the table
1431
+ # names used in the SQL query. The hash keys are the table names, and
1432
+ # the hash values are the external table objects. See {Dataset#query}.
1433
+ # @param [Integer] max The maximum number of rows of data to return per
1434
+ # page of results. Setting this flag to a small value such as 1000 and
1435
+ # then paging through results might improve reliability when the query
1436
+ # result set is large. In addition to this limit, responses are also
1437
+ # limited to 10 MB. By default, there is no maximum row count, and
1438
+ # only the byte limit applies.
1439
+ # @param [Boolean] cache Whether to look for the result in the query
1440
+ # cache. The query cache is a best-effort cache that will be flushed
1441
+ # whenever tables in the query are modified. The default value is
1442
+ # true. For more information, see [query
1443
+ # caching](https://developers.google.com/bigquery/querying-data).
1444
+ # @param [Boolean] standard_sql Specifies whether to use BigQuery's
1445
+ # [standard
1446
+ # SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
1447
+ # dialect for this query. If set to true, the query will use standard
1448
+ # SQL rather than the [legacy
1449
+ # SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
1450
+ # dialect. When set to true, the values of `large_results` and
1451
+ # `flatten` are ignored; the query will be run as if `large_results`
1452
+ # is true and `flatten` is false. Optional. The default value is
1453
+ # true.
1454
+ # @param [Boolean] legacy_sql Specifies whether to use BigQuery's
1455
+ # [legacy
1456
+ # SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
1457
+ # dialect for this query. If set to false, the query will use
1458
+ # BigQuery's [standard
1459
+ # SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
1460
+ # When set to false, the values of `large_results` and `flatten` are
1461
+ # ignored; the query will be run as if `large_results` is true and
1462
+ # `flatten` is false. Optional. The default value is false.
1463
+ # @yield [job] a job configuration object
1464
+ # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
1465
+ # configuration object for setting additional options for the query.
1466
+ #
1467
+ # @return [Google::Cloud::Bigquery::Data] A new data object.
1468
+ #
1469
+ # @example Query using standard SQL:
1470
+ # require "google/cloud/bigquery"
1471
+ #
1472
+ # bigquery = Google::Cloud::Bigquery.new
1473
+ # dataset = bigquery.dataset "my_dataset"
1474
+ #
1475
+ # data = dataset.query "SELECT name FROM my_table"
1476
+ #
1477
+ # # Iterate over the first page of results
1478
+ # data.each do |row|
1479
+ # puts row[:name]
1480
+ # end
1481
+ # # Retrieve the next page of results
1482
+ # data = data.next if data.next?
1483
+ #
1484
+ # @example Query using legacy SQL:
1485
+ # require "google/cloud/bigquery"
1486
+ #
1487
+ # bigquery = Google::Cloud::Bigquery.new
1488
+ # dataset = bigquery.dataset "my_dataset"
1489
+ #
1490
+ # data = dataset.query "SELECT name FROM my_table",
1491
+ # legacy_sql: true
1492
+ #
1493
+ # # Iterate over the first page of results
1494
+ # data.each do |row|
1495
+ # puts row[:name]
1496
+ # end
1497
+ # # Retrieve the next page of results
1498
+ # data = data.next if data.next?
1499
+ #
1500
+ # @example Query using positional query parameters:
1501
+ # require "google/cloud/bigquery"
1502
+ #
1503
+ # bigquery = Google::Cloud::Bigquery.new
1504
+ # dataset = bigquery.dataset "my_dataset"
1505
+ #
1506
+ # data = dataset.query "SELECT name FROM my_table WHERE id = ?",
1507
+ # params: [1]
1508
+ #
1509
+ # # Iterate over the first page of results
1510
+ # data.each do |row|
1511
+ # puts row[:name]
1512
+ # end
1513
+ # # Retrieve the next page of results
1514
+ # data = data.next if data.next?
1515
+ #
1516
+ # @example Query using named query parameters:
1517
+ # require "google/cloud/bigquery"
1518
+ #
1519
+ # bigquery = Google::Cloud::Bigquery.new
1520
+ # dataset = bigquery.dataset "my_dataset"
1521
+ #
1522
+ # data = dataset.query "SELECT name FROM my_table WHERE id = @id",
1523
+ # params: { id: 1 }
1524
+ #
1525
+ # # Iterate over the first page of results
1526
+ # data.each do |row|
1527
+ # puts row[:name]
1528
+ # end
1529
+ # # Retrieve the next page of results
1530
+ # data = data.next if data.next?
1531
+ #
1532
+ # @example Query using named query parameters with types:
1533
+ # require "google/cloud/bigquery"
1534
+ #
1535
+ # bigquery = Google::Cloud::Bigquery.new
1536
+ # dataset = bigquery.dataset "my_dataset"
1537
+ #
1538
+ # data = dataset.query "SELECT name FROM my_table " \
1539
+ # "WHERE id IN UNNEST(@ids)",
1540
+ # params: { ids: [] },
1541
+ # types: { ids: [:INT64] }
1542
+ #
1543
+ # # Iterate over the first page of results
1544
+ # data.each do |row|
1545
+ # puts row[:name]
1546
+ # end
1547
+ # # Retrieve the next page of results
1548
+ # data = data.next if data.next?
1549
+ #
1550
+ # @example Execute a DDL statement:
1551
+ # require "google/cloud/bigquery"
1552
+ #
1553
+ # bigquery = Google::Cloud::Bigquery.new
1554
+ #
1555
+ # data = bigquery.query "CREATE TABLE my_table (x INT64)"
1556
+ #
1557
+ # table_ref = data.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
1558
+ #
1559
+ # @example Execute a DML statement:
1560
+ # require "google/cloud/bigquery"
1561
+ #
1562
+ # bigquery = Google::Cloud::Bigquery.new
1563
+ #
1564
+ # data = bigquery.query "UPDATE my_table " \
1565
+ # "SET x = x + 1 " \
1566
+ # "WHERE x IS NOT NULL"
1567
+ #
1568
+ # puts data.num_dml_affected_rows
1569
+ #
1570
+ # @example Query using external data source, set destination:
1571
+ # require "google/cloud/bigquery"
1572
+ #
1573
+ # bigquery = Google::Cloud::Bigquery.new
1574
+ # dataset = bigquery.dataset "my_dataset"
1575
+ #
1576
+ # csv_url = "gs://bucket/path/to/data.csv"
1577
+ # csv_table = dataset.external csv_url do |csv|
1578
+ # csv.autodetect = true
1579
+ # csv.skip_leading_rows = 1
1580
+ # end
1581
+ #
1582
+ # data = dataset.query "SELECT * FROM my_ext_table" do |query|
1583
+ # query.external = { my_ext_table: csv_table }
1584
+ # query.table = dataset.table "my_table", skip_lookup: true
1585
+ # end
1586
+ #
1587
+ # # Iterate over the first page of results
1588
+ # data.each do |row|
1589
+ # puts row[:name]
1590
+ # end
1591
+ # # Retrieve the next page of results
1592
+ # data = data.next if data.next?
1593
+ #
1594
+ # @!group Data
1595
+ #
1596
+ def query query, params: nil, types: nil, external: nil, max: nil, cache: true,
1597
+ standard_sql: nil, legacy_sql: nil, &block
1598
+ job = query_job query, params: params, types: types, external: external, cache: cache,
1599
+ standard_sql: standard_sql, legacy_sql: legacy_sql, &block
1600
+ job.wait_until_done!
1601
+ ensure_job_succeeded! job
1602
+
1603
+ job.data max: max
1604
+ end
1605
+
1606
+ ##
1607
+ # Creates a new External::DataSource (or subclass) object that
1608
+ # represents the external data source that can be queried from directly,
1609
+ # even though the data is not stored in BigQuery. Instead of loading or
1610
+ # streaming the data, this object references the external data source.
1611
+ #
1612
+ # @see https://cloud.google.com/bigquery/external-data-sources Querying
1613
+ # External Data Sources
1614
+ #
1615
+ # @param [String, Array<String>] url The fully-qualified URL(s) that
1616
+ # point to your data in Google Cloud. An attempt will be made to
1617
+ # derive the format from the URLs provided.
1618
+ # @param [String|Symbol] format The data format. This value will be used
1619
+ # even if the provided URLs are recognized as a different format.
1620
+ # Optional.
1621
+ #
1622
+ # The following values are supported:
1623
+ #
1624
+ # * `csv` - CSV
1625
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1626
+ # * `avro` - [Avro](http://avro.apache.org/)
1627
+ # * `sheets` - Google Sheets
1628
+ # * `datastore_backup` - Cloud Datastore backup
1629
+ # * `bigtable` - Bigtable
1630
+ #
1631
+ # @return [External::DataSource] External data source.
1632
+ #
1633
+ # @example
1634
+ # require "google/cloud/bigquery"
1635
+ #
1636
+ # bigquery = Google::Cloud::Bigquery.new
1637
+ #
1638
+ # dataset = bigquery.dataset "my_dataset"
1639
+ #
1640
+ # csv_url = "gs://bucket/path/to/data.csv"
1641
+ # csv_table = dataset.external csv_url do |csv|
1642
+ # csv.autodetect = true
1643
+ # csv.skip_leading_rows = 1
1644
+ # end
1645
+ #
1646
+ # data = dataset.query "SELECT * FROM my_ext_table",
1647
+ # external: { my_ext_table: csv_table }
1648
+ #
1649
+ # data.each do |row|
1650
+ # puts row[:name]
1651
+ # end
1652
+ #
1653
+ def external url, format: nil
1654
+ ext = External.from_urls url, format
1655
+ yield ext if block_given?
1656
+ ext
1657
+ end
1658
+
1659
+ ##
1660
+ # Loads data into the provided destination table using an asynchronous
1661
+ # method. In this method, a {LoadJob} is immediately returned. The
1662
+ # caller may poll the service by repeatedly calling {Job#reload!} and
1663
+ # {Job#done?} to detect when the job is done, or simply block until the
1664
+ # job is done by calling #{Job#wait_until_done!}. See also {#load}.
1665
+ #
1666
+ # For the source of the data, you can pass a google-cloud storage file
1667
+ # path or a google-cloud-storage `File` instance. Or, you can upload a
1668
+ # file directly. See [Loading Data with a POST
1669
+ # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
1670
+ #
1671
+ # The geographic location for the job ("US", "EU", etc.) can be set via
1672
+ # {LoadJob::Updater#location=} in a block passed to this method. If the
1673
+ # dataset is a full resource representation (see {#resource_full?}), the
1674
+ # location of the job will be automatically set to the location of the
1675
+ # dataset.
1676
+ #
1677
+ # @param [String] table_id The destination table to load the data into.
1678
+ # @param [File, Google::Cloud::Storage::File, String, URI,
1679
+ # Array<Google::Cloud::Storage::File, String, URI>] files
1680
+ # A file or the URI of a Google Cloud Storage file, or an Array of
1681
+ # those, containing data to load into the table.
1682
+ # @param [String] format The exported file format. The default value is
1683
+ # `csv`.
1684
+ #
1685
+ # The following values are supported:
1686
+ #
1687
+ # * `csv` - CSV
1688
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1689
+ # * `avro` - [Avro](http://avro.apache.org/)
1690
+ # * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
1691
+ # * `parquet` - [Parquet](https://parquet.apache.org/)
1692
+ # * `datastore_backup` - Cloud Datastore backup
1693
+ # @param [String] create Specifies whether the job is allowed to create
1694
+ # new tables. The default value is `needed`.
1695
+ #
1696
+ # The following values are supported:
1697
+ #
1698
+ # * `needed` - Create the table if it does not exist.
1699
+ # * `never` - The table must already exist. A 'notFound' error is
1700
+ # raised if the table does not exist.
1701
+ # @param [String] write Specifies how to handle data already present in
1702
+ # the table. The default value is `append`.
1703
+ #
1704
+ # The following values are supported:
1705
+ #
1706
+ # * `truncate` - BigQuery overwrites the table data.
1707
+ # * `append` - BigQuery appends the data to the table.
1708
+ # * `empty` - An error will be returned if the table already contains
1709
+ # data.
1710
+ # @param [Array<String>] projection_fields If the `format` option is set
1711
+ # to `datastore_backup`, indicates which entity properties to load
1712
+ # from a Cloud Datastore backup. Property names are case sensitive and
1713
+ # must be top-level properties. If not set, BigQuery loads all
1714
+ # properties. If any named property isn't found in the Cloud Datastore
1715
+ # backup, an invalid error is returned.
1716
+ # @param [Boolean] jagged_rows Accept rows that are missing trailing
1717
+ # optional columns. The missing values are treated as nulls. If
1718
+ # `false`, records with missing trailing columns are treated as bad
1719
+ # records, and if there are too many bad records, an invalid error is
1720
+ # returned in the job result. The default value is `false`. Only
1721
+ # applicable to CSV, ignored for other formats.
1722
+ # @param [Boolean] quoted_newlines Indicates if BigQuery should allow
1723
+ # quoted data sections that contain newline characters in a CSV file.
1724
+ # The default value is `false`.
1725
+ # @param [Boolean] autodetect Indicates if BigQuery should
1726
+ # automatically infer the options and schema for CSV and JSON sources.
1727
+ # The default value is `false`.
1728
+ # @param [String] encoding The character encoding of the data. The
1729
+ # supported values are `UTF-8` or `ISO-8859-1`. The default value is
1730
+ # `UTF-8`.
1731
+ # @param [String] delimiter Specifices the separator for fields in a CSV
1732
+ # file. BigQuery converts the string to `ISO-8859-1` encoding, and
1733
+ # then uses the first byte of the encoded string to split the data in
1734
+ # its raw, binary state. Default is <code>,</code>.
1735
+ # @param [Boolean] ignore_unknown Indicates if BigQuery should allow
1736
+ # extra values that are not represented in the table schema. If true,
1737
+ # the extra values are ignored. If false, records with extra columns
1738
+ # are treated as bad records, and if there are too many bad records,
1739
+ # an invalid error is returned in the job result. The default value is
1740
+ # `false`.
1741
+ #
1742
+ # The `format` property determines what BigQuery treats as an extra
1743
+ # value:
1744
+ #
1745
+ # * `CSV`: Trailing columns
1746
+ # * `JSON`: Named values that don't match any column names
1747
+ # @param [Integer] max_bad_records The maximum number of bad records
1748
+ # that BigQuery can ignore when running the job. If the number of bad
1749
+ # records exceeds this value, an invalid error is returned in the job
1750
+ # result. The default value is `0`, which requires that all records
1751
+ # are valid.
1752
+ # @param [String] null_marker Specifies a string that represents a null
1753
+ # value in a CSV file. For example, if you specify `\N`, BigQuery
1754
+ # interprets `\N` as a null value when loading a CSV file. The default
1755
+ # value is the empty string. If you set this property to a custom
1756
+ # value, BigQuery throws an error if an empty string is present for
1757
+ # all data types except for STRING and BYTE. For STRING and BYTE
1758
+ # columns, BigQuery interprets the empty string as an empty value.
1759
+ # @param [String] quote The value that is used to quote data sections in
1760
+ # a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
1761
+ # then uses the first byte of the encoded string to split the data in
1762
+ # its raw, binary state. The default value is a double-quote
1763
+ # <code>"</code>. If your data does not contain quoted sections, set
1764
+ # the property value to an empty string. If your data contains quoted
1765
+ # newline characters, you must also set the allowQuotedNewlines
1766
+ # property to true.
1767
+ # @param [Integer] skip_leading The number of rows at the top of a CSV
1768
+ # file that BigQuery will skip when loading the data. The default
1769
+ # value is `0`. This property is useful if you have header rows in the
1770
+ # file that should be skipped.
1771
+ # @param [Google::Cloud::Bigquery::Schema] schema The schema for the
1772
+ # destination table. Optional. The schema can be omitted if the
1773
+ # destination table already exists, or if you're loading data from a
1774
+ # Google Cloud Datastore backup.
1775
+ #
1776
+ # See {Project#schema} for the creation of the schema for use with
1777
+ # this option. Also note that for most use cases, the block yielded by
1778
+ # this method is a more convenient way to configure the schema.
1779
+ # @param [String] job_id A user-defined ID for the load job. The ID
1780
+ # must contain only letters (a-z, A-Z), numbers (0-9), underscores
1781
+ # (_), or dashes (-). The maximum length is 1,024 characters. If
1782
+ # `job_id` is provided, then `prefix` will not be used.
1783
+ #
1784
+ # See [Generating a job
1785
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
1786
+ # @param [String] prefix A string, usually human-readable, that will be
1787
+ # prepended to a generated value to produce a unique job ID. For
1788
+ # example, the prefix `daily_import_job_` can be given to generate a
1789
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
1790
+ # prefix must contain only letters (a-z, A-Z), numbers (0-9),
1791
+ # underscores (_), or dashes (-). The maximum length of the entire ID
1792
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
1793
+ # be used.
1794
+ # @param [Hash] labels A hash of user-provided labels associated with
1795
+ # the job. You can use these to organize and group your jobs. Label
1796
+ # keys and values can be no longer than 63 characters, can only
1797
+ # contain lowercase letters, numeric characters, underscores and
1798
+ # dashes. International characters are allowed. Label values are
1799
+ # optional. Label keys must start with a letter and each label in the
1800
+ # list must have a different key. See [Requirements for
1801
+ # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
1802
+ # @yield [updater] A block for setting the schema and other
1803
+ # options for the destination table. The schema can be omitted if the
1804
+ # destination table already exists, or if you're loading data from a
1805
+ # Google Cloud Datastore backup.
1806
+ # @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
1807
+ # updater to modify the load job and its schema.
1808
+ # @param [Boolean] dryrun If set, don't actually run this job. Behavior
1809
+ # is undefined however for non-query jobs and may result in an error.
1810
+ # Deprecated.
1811
+ #
1812
+ # @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
1813
+ #
1814
+ # @example
1815
+ # require "google/cloud/bigquery"
1816
+ #
1817
+ # bigquery = Google::Cloud::Bigquery.new
1818
+ # dataset = bigquery.dataset "my_dataset"
1819
+ #
1820
+ # gs_url = "gs://my-bucket/file-name.csv"
1821
+ # load_job = dataset.load_job "my_new_table", gs_url do |schema|
1822
+ # schema.string "first_name", mode: :required
1823
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
1824
+ # nested_schema.string "place", mode: :required
1825
+ # nested_schema.integer "number_of_years", mode: :required
1826
+ # end
1827
+ # end
1828
+ #
1829
+ # @example Pass a google-cloud-storage `File` instance:
1830
+ # require "google/cloud/bigquery"
1831
+ # require "google/cloud/storage"
1832
+ #
1833
+ # bigquery = Google::Cloud::Bigquery.new
1834
+ # dataset = bigquery.dataset "my_dataset"
1835
+ #
1836
+ # storage = Google::Cloud::Storage.new
1837
+ # bucket = storage.bucket "my-bucket"
1838
+ # file = bucket.file "file-name.csv"
1839
+ # load_job = dataset.load_job "my_new_table", file do |schema|
1840
+ # schema.string "first_name", mode: :required
1841
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
1842
+ # nested_schema.string "place", mode: :required
1843
+ # nested_schema.integer "number_of_years", mode: :required
1844
+ # end
1845
+ # end
1846
+ #
1847
+ # @example Pass a list of google-cloud-storage files:
1848
+ # require "google/cloud/bigquery"
1849
+ # require "google/cloud/storage"
1850
+ #
1851
+ # bigquery = Google::Cloud::Bigquery.new
1852
+ # dataset = bigquery.dataset "my_dataset"
1853
+ #
1854
+ # storage = Google::Cloud::Storage.new
1855
+ # bucket = storage.bucket "my-bucket"
1856
+ # file = bucket.file "file-name.csv"
1857
+ # list = [file, "gs://my-bucket/file-name2.csv"]
1858
+ # load_job = dataset.load_job "my_new_table", list do |schema|
1859
+ # schema.string "first_name", mode: :required
1860
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
1861
+ # nested_schema.string "place", mode: :required
1862
+ # nested_schema.integer "number_of_years", mode: :required
1863
+ # end
1864
+ # end
1865
+ #
1866
+ # @example Upload a file directly:
1867
+ # require "google/cloud/bigquery"
1868
+ #
1869
+ # bigquery = Google::Cloud::Bigquery.new
1870
+ # dataset = bigquery.dataset "my_dataset"
1871
+ #
1872
+ # file = File.open "my_data.csv"
1873
+ # load_job = dataset.load_job "my_new_table", file do |schema|
1874
+ # schema.string "first_name", mode: :required
1875
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
1876
+ # nested_schema.string "place", mode: :required
1877
+ # nested_schema.integer "number_of_years", mode: :required
1878
+ # end
1879
+ # end
1880
+ #
1881
+ # @example Schema is not required with a Cloud Datastore backup:
1882
+ # require "google/cloud/bigquery"
1883
+ #
1884
+ # bigquery = Google::Cloud::Bigquery.new
1885
+ # dataset = bigquery.dataset "my_dataset"
1886
+ #
1887
+ # load_job = dataset.load_job(
1888
+ # "my_new_table",
1889
+ # "gs://my-bucket/xxxx.kind_name.backup_info") do |j|
1890
+ # j.format = "datastore_backup"
1891
+ # end
1892
+ #
1893
+ # @!group Data
1894
+ #
1895
+ def load_job table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
1896
+ quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
1897
+ quote: nil, skip_leading: nil, schema: nil, job_id: nil, prefix: nil, labels: nil, autodetect: nil,
1898
+ null_marker: nil, dryrun: nil
1899
+ ensure_service!
1900
+
1901
+ updater = load_job_updater table_id,
1902
+ format: format, create: create, write: write, projection_fields: projection_fields,
1903
+ jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
1904
+ delimiter: delimiter, ignore_unknown: ignore_unknown,
1905
+ max_bad_records: max_bad_records, quote: quote, skip_leading: skip_leading,
1906
+ dryrun: dryrun, schema: schema, job_id: job_id, prefix: prefix, labels: labels,
1907
+ autodetect: autodetect, null_marker: null_marker
1908
+
1909
+ yield updater if block_given?
1910
+
1911
+ load_local_or_uri files, updater
1912
+ end
1913
+
1914
+ ##
1915
+ # Loads data into the provided destination table using a synchronous
1916
+ # method that blocks for a response. Timeouts and transient errors are
1917
+ # generally handled as needed to complete the job. See also
1918
+ # {#load_job}.
1919
+ #
1920
+ # For the source of the data, you can pass a google-cloud storage file
1921
+ # path or a google-cloud-storage `File` instance. Or, you can upload a
1922
+ # file directly. See [Loading Data with a POST
1923
+ # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
1924
+ #
1925
+ # The geographic location for the job ("US", "EU", etc.) can be set via
1926
+ # {LoadJob::Updater#location=} in a block passed to this method. If the
1927
+ # dataset is a full resource representation (see {#resource_full?}), the
1928
+ # location of the job will be automatically set to the location of the
1929
+ # dataset.
1930
+ #
1931
+ # @param [String] table_id The destination table to load the data into.
1932
+ # @param [File, Google::Cloud::Storage::File, String, URI,
1933
+ # Array<Google::Cloud::Storage::File, String, URI>] files
1934
+ # A file or the URI of a Google Cloud Storage file, or an Array of
1935
+ # those, containing data to load into the table.
1936
+ # @param [String] format The exported file format. The default value is
1937
+ # `csv`.
1938
+ #
1939
+ # The following values are supported:
1940
+ #
1941
+ # * `csv` - CSV
1942
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1943
+ # * `avro` - [Avro](http://avro.apache.org/)
1944
+ # * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
1945
+ # * `parquet` - [Parquet](https://parquet.apache.org/)
1946
+ # * `datastore_backup` - Cloud Datastore backup
1947
+ # @param [String] create Specifies whether the job is allowed to create
1948
+ # new tables. The default value is `needed`.
1949
+ #
1950
+ # The following values are supported:
1951
+ #
1952
+ # * `needed` - Create the table if it does not exist.
1953
+ # * `never` - The table must already exist. A 'notFound' error is
1954
+ # raised if the table does not exist.
1955
+ # @param [String] write Specifies how to handle data already present in
1956
+ # the table. The default value is `append`.
1957
+ #
1958
+ # The following values are supported:
1959
+ #
1960
+ # * `truncate` - BigQuery overwrites the table data.
1961
+ # * `append` - BigQuery appends the data to the table.
1962
+ # * `empty` - An error will be returned if the table already contains
1963
+ # data.
1964
+ # @param [Array<String>] projection_fields If the `format` option is set
1965
+ # to `datastore_backup`, indicates which entity properties to load
1966
+ # from a Cloud Datastore backup. Property names are case sensitive and
1967
+ # must be top-level properties. If not set, BigQuery loads all
1968
+ # properties. If any named property isn't found in the Cloud Datastore
1969
+ # backup, an invalid error is returned.
1970
+ # @param [Boolean] jagged_rows Accept rows that are missing trailing
1971
+ # optional columns. The missing values are treated as nulls. If
1972
+ # `false`, records with missing trailing columns are treated as bad
1973
+ # records, and if there are too many bad records, an invalid error is
1974
+ # returned in the job result. The default value is `false`. Only
1975
+ # applicable to CSV, ignored for other formats.
1976
+ # @param [Boolean] quoted_newlines Indicates if BigQuery should allow
1977
+ # quoted data sections that contain newline characters in a CSV file.
1978
+ # The default value is `false`.
1979
+ # @param [Boolean] autodetect Indicates if BigQuery should
1980
+ # automatically infer the options and schema for CSV and JSON sources.
1981
+ # The default value is `false`.
1982
+ # @param [String] encoding The character encoding of the data. The
1983
+ # supported values are `UTF-8` or `ISO-8859-1`. The default value is
1984
+ # `UTF-8`.
1985
+ # @param [String] delimiter Specifices the separator for fields in a CSV
1986
+ # file. BigQuery converts the string to `ISO-8859-1` encoding, and
1987
+ # then uses the first byte of the encoded string to split the data in
1988
+ # its raw, binary state. Default is <code>,</code>.
1989
+ # @param [Boolean] ignore_unknown Indicates if BigQuery should allow
1990
+ # extra values that are not represented in the table schema. If true,
1991
+ # the extra values are ignored. If false, records with extra columns
1992
+ # are treated as bad records, and if there are too many bad records,
1993
+ # an invalid error is returned in the job result. The default value is
1994
+ # `false`.
1995
+ #
1996
+ # The `format` property determines what BigQuery treats as an extra
1997
+ # value:
1998
+ #
1999
+ # * `CSV`: Trailing columns
2000
+ # * `JSON`: Named values that don't match any column names
2001
+ # @param [Integer] max_bad_records The maximum number of bad records
2002
+ # that BigQuery can ignore when running the job. If the number of bad
2003
+ # records exceeds this value, an invalid error is returned in the job
2004
+ # result. The default value is `0`, which requires that all records
2005
+ # are valid.
2006
+ # @param [String] null_marker Specifies a string that represents a null
2007
+ # value in a CSV file. For example, if you specify `\N`, BigQuery
2008
+ # interprets `\N` as a null value when loading a CSV file. The default
2009
+ # value is the empty string. If you set this property to a custom
2010
+ # value, BigQuery throws an error if an empty string is present for
2011
+ # all data types except for STRING and BYTE. For STRING and BYTE
2012
+ # columns, BigQuery interprets the empty string as an empty value.
2013
+ # @param [String] quote The value that is used to quote data sections in
2014
+ # a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
2015
+ # then uses the first byte of the encoded string to split the data in
2016
+ # its raw, binary state. The default value is a double-quote
2017
+ # <code>"</code>. If your data does not contain quoted sections, set
2018
+ # the property value to an empty string. If your data contains quoted
2019
+ # newline characters, you must also set the allowQuotedNewlines
2020
+ # property to true.
2021
+ # @param [Integer] skip_leading The number of rows at the top of a CSV
2022
+ # file that BigQuery will skip when loading the data. The default
2023
+ # value is `0`. This property is useful if you have header rows in the
2024
+ # file that should be skipped.
2025
+ # @param [Google::Cloud::Bigquery::Schema] schema The schema for the
2026
+ # destination table. Optional. The schema can be omitted if the
2027
+ # destination table already exists, or if you're loading data from a
2028
+ # Google Cloud Datastore backup.
2029
+ #
2030
+ # See {Project#schema} for the creation of the schema for use with
2031
+ # this option. Also note that for most use cases, the block yielded by
2032
+ # this method is a more convenient way to configure the schema.
2033
+ #
2034
+ # @yield [updater] A block for setting the schema of the destination
2035
+ # table and other options for the load job. The schema can be omitted
2036
+ # if the destination table already exists, or if you're loading data
2037
+ # from a Google Cloud Datastore backup.
2038
+ # @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
2039
+ # updater to modify the load job and its schema.
2040
+ #
2041
+ # @return [Boolean] Returns `true` if the load job was successful.
2042
+ #
2043
+ # @example
2044
+ # require "google/cloud/bigquery"
2045
+ #
2046
+ # bigquery = Google::Cloud::Bigquery.new
2047
+ # dataset = bigquery.dataset "my_dataset"
2048
+ #
2049
+ # gs_url = "gs://my-bucket/file-name.csv"
2050
+ # dataset.load "my_new_table", gs_url do |schema|
2051
+ # schema.string "first_name", mode: :required
2052
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
2053
+ # nested_schema.string "place", mode: :required
2054
+ # nested_schema.integer "number_of_years", mode: :required
2055
+ # end
2056
+ # end
2057
+ #
2058
+ # @example Pass a google-cloud-storage `File` instance:
2059
+ # require "google/cloud/bigquery"
2060
+ # require "google/cloud/storage"
2061
+ #
2062
+ # bigquery = Google::Cloud::Bigquery.new
2063
+ # dataset = bigquery.dataset "my_dataset"
2064
+ #
2065
+ # storage = Google::Cloud::Storage.new
2066
+ # bucket = storage.bucket "my-bucket"
2067
+ # file = bucket.file "file-name.csv"
2068
+ # dataset.load "my_new_table", file do |schema|
2069
+ # schema.string "first_name", mode: :required
2070
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
2071
+ # nested_schema.string "place", mode: :required
2072
+ # nested_schema.integer "number_of_years", mode: :required
2073
+ # end
2074
+ # end
2075
+ #
2076
+ # @example Pass a list of google-cloud-storage files:
2077
+ # require "google/cloud/bigquery"
2078
+ # require "google/cloud/storage"
2079
+ #
2080
+ # bigquery = Google::Cloud::Bigquery.new
2081
+ # dataset = bigquery.dataset "my_dataset"
2082
+ #
2083
+ # storage = Google::Cloud::Storage.new
2084
+ # bucket = storage.bucket "my-bucket"
2085
+ # file = bucket.file "file-name.csv"
2086
+ # list = [file, "gs://my-bucket/file-name2.csv"]
2087
+ # dataset.load "my_new_table", list do |schema|
2088
+ # schema.string "first_name", mode: :required
2089
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
2090
+ # nested_schema.string "place", mode: :required
2091
+ # nested_schema.integer "number_of_years", mode: :required
2092
+ # end
2093
+ # end
2094
+ #
2095
+ # @example Upload a file directly:
2096
+ # require "google/cloud/bigquery"
2097
+ #
2098
+ # bigquery = Google::Cloud::Bigquery.new
2099
+ # dataset = bigquery.dataset "my_dataset"
2100
+ #
2101
+ # file = File.open "my_data.csv"
2102
+ # dataset.load "my_new_table", file do |schema|
2103
+ # schema.string "first_name", mode: :required
2104
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
2105
+ # nested_schema.string "place", mode: :required
2106
+ # nested_schema.integer "number_of_years", mode: :required
2107
+ # end
2108
+ # end
2109
+ #
2110
+ # @example Schema is not required with a Cloud Datastore backup:
2111
+ # require "google/cloud/bigquery"
2112
+ #
2113
+ # bigquery = Google::Cloud::Bigquery.new
2114
+ # dataset = bigquery.dataset "my_dataset"
2115
+ #
2116
+ # dataset.load "my_new_table",
2117
+ # "gs://my-bucket/xxxx.kind_name.backup_info" do |j|
2118
+ # j.format = "datastore_backup"
2119
+ # end
2120
+ #
2121
+ # @!group Data
2122
+ #
2123
+ def load table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
2124
+ quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
2125
+ quote: nil, skip_leading: nil, schema: nil, autodetect: nil, null_marker: nil, &block
2126
+ job = load_job table_id, files,
2127
+ format: format, create: create, write: write, projection_fields: projection_fields,
2128
+ jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
2129
+ delimiter: delimiter, ignore_unknown: ignore_unknown, max_bad_records: max_bad_records,
2130
+ quote: quote, skip_leading: skip_leading, schema: schema, autodetect: autodetect,
2131
+ null_marker: null_marker, &block
2132
+
2133
+ job.wait_until_done!
2134
+ ensure_job_succeeded! job
2135
+ true
2136
+ end
2137
+
2138
+ ##
2139
+ # Reloads the dataset with current data from the BigQuery service.
2140
+ #
2141
+ # @return [Google::Cloud::Bigquery::Dataset] Returns the reloaded
2142
+ # dataset.
2143
+ #
2144
+ # @example Skip retrieving the dataset from the service, then load it:
2145
+ # require "google/cloud/bigquery"
2146
+ #
2147
+ # bigquery = Google::Cloud::Bigquery.new
2148
+ #
2149
+ # dataset = bigquery.dataset "my_dataset", skip_lookup: true
2150
+ # dataset.reload!
2151
+ #
2152
+ def reload!
2153
+ ensure_service!
2154
+ @gapi = service.get_dataset dataset_id
2155
+ @reference = nil
2156
+ @exists = nil
2157
+ self
2158
+ end
2159
+ alias refresh! reload!
2160
+
2161
+ ##
2162
+ # Determines whether the dataset exists in the BigQuery service. The
2163
+ # result is cached locally. To refresh state, set `force` to `true`.
2164
+ #
2165
+ # @param [Boolean] force Force the latest resource representation to be
2166
+ # retrieved from the BigQuery service when `true`. Otherwise the
2167
+ # return value of this method will be memoized to reduce the number of
2168
+ # API calls made to the BigQuery service. The default is `false`.
2169
+ #
2170
+ # @return [Boolean] `true` when the dataset exists in the BigQuery
2171
+ # service, `false` otherwise.
2172
+ #
2173
+ # @example
2174
+ # require "google/cloud/bigquery"
2175
+ #
2176
+ # bigquery = Google::Cloud::Bigquery.new
2177
+ #
2178
+ # dataset = bigquery.dataset "my_dataset", skip_lookup: true
2179
+ # dataset.exists? # true
2180
+ #
2181
+ def exists? force: false
2182
+ return gapi_exists? if force
2183
+ # If we have a memoized value, return it
2184
+ return @exists unless @exists.nil?
2185
+ # Always true if we have a gapi object
2186
+ return true if resource?
2187
+ gapi_exists?
2188
+ end
2189
+
2190
+ ##
2191
+ # Whether the dataset was created without retrieving the resource
2192
+ # representation from the BigQuery service.
2193
+ #
2194
+ # @return [Boolean] `true` when the dataset is just a local reference
2195
+ # object, `false` otherwise.
2196
+ #
2197
+ # @example
2198
+ # require "google/cloud/bigquery"
2199
+ #
2200
+ # bigquery = Google::Cloud::Bigquery.new
2201
+ #
2202
+ # dataset = bigquery.dataset "my_dataset", skip_lookup: true
2203
+ #
2204
+ # dataset.reference? # true
2205
+ # dataset.reload!
2206
+ # dataset.reference? # false
2207
+ #
2208
+ def reference?
2209
+ @gapi.nil?
2210
+ end
2211
+
2212
+ ##
2213
+ # Whether the dataset was created with a resource representation from
2214
+ # the BigQuery service.
2215
+ #
2216
+ # @return [Boolean] `true` when the dataset was created with a resource
2217
+ # representation, `false` otherwise.
2218
+ #
2219
+ # @example
2220
+ # require "google/cloud/bigquery"
2221
+ #
2222
+ # bigquery = Google::Cloud::Bigquery.new
2223
+ #
2224
+ # dataset = bigquery.dataset "my_dataset", skip_lookup: true
2225
+ #
2226
+ # dataset.resource? # false
2227
+ # dataset.reload!
2228
+ # dataset.resource? # true
2229
+ #
2230
+ def resource?
2231
+ !@gapi.nil?
2232
+ end
2233
+
2234
+ ##
2235
+ # Whether the dataset was created with a partial resource representation
2236
+ # from the BigQuery service by retrieval through {Project#datasets}.
2237
+ # See [Datasets: list
2238
+ # response](https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list#response)
2239
+ # for the contents of the partial representation. Accessing any
2240
+ # attribute outside of the partial representation will result in loading
2241
+ # the full representation.
2242
+ #
2243
+ # @return [Boolean] `true` when the dataset was created with a partial
2244
+ # resource representation, `false` otherwise.
2245
+ #
2246
+ # @example
2247
+ # require "google/cloud/bigquery"
2248
+ #
2249
+ # bigquery = Google::Cloud::Bigquery.new
2250
+ #
2251
+ # dataset = bigquery.datasets.first
2252
+ #
2253
+ # dataset.resource_partial? # true
2254
+ # dataset.description # Loads the full resource.
2255
+ # dataset.resource_partial? # false
2256
+ #
2257
+ def resource_partial?
2258
+ @gapi.is_a? Google::Apis::BigqueryV2::DatasetList::Dataset
2259
+ end
2260
+
2261
+ ##
2262
+ # Whether the dataset was created with a full resource representation
2263
+ # from the BigQuery service.
2264
+ #
2265
+ # @return [Boolean] `true` when the dataset was created with a full
2266
+ # resource representation, `false` otherwise.
2267
+ #
2268
+ # @example
2269
+ # require "google/cloud/bigquery"
2270
+ #
2271
+ # bigquery = Google::Cloud::Bigquery.new
2272
+ #
2273
+ # dataset = bigquery.dataset "my_dataset"
2274
+ #
2275
+ # dataset.resource_full? # true
2276
+ #
2277
+ def resource_full?
2278
+ @gapi.is_a? Google::Apis::BigqueryV2::Dataset
2279
+ end
2280
+
2281
+ ##
2282
+ # @private New Dataset from a Google API Client object.
2283
+ def self.from_gapi gapi, conn
2284
+ new.tap do |f|
2285
+ f.gapi = gapi
2286
+ f.service = conn
2287
+ end
2288
+ end
2289
+
2290
+ ##
2291
+ # @private New lazy Dataset object without making an HTTP request, for use with the skip_lookup option.
2292
+ def self.new_reference project_id, dataset_id, service
2293
+ raise ArgumentError, "dataset_id is required" unless dataset_id
2294
+ new.tap do |b|
2295
+ reference_gapi = Google::Apis::BigqueryV2::DatasetReference.new \
2296
+ project_id: project_id, dataset_id: dataset_id
2297
+ b.service = service
2298
+ b.instance_variable_set :@reference, reference_gapi
2299
+ end
2300
+ end
2301
+
2302
+ ##
2303
+ # Inserts data into the given table for near-immediate querying, without
2304
+ # the need to complete a load operation before the data can appear in
2305
+ # query results.
2306
+ #
2307
+ # Because BigQuery's streaming API is designed for high insertion rates,
2308
+ # modifications to the underlying table metadata are eventually
2309
+ # consistent when interacting with the streaming system. In most cases
2310
+ # metadata changes are propagated within minutes, but during this period
2311
+ # API responses may reflect the inconsistent state of the table.
2312
+ #
2313
+ # @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
2314
+ # Streaming Data Into BigQuery
2315
+ #
2316
+ # @see https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts
2317
+ # BigQuery Troubleshooting: Metadata errors for streaming inserts
2318
+ #
2319
+ # @param [String] table_id The ID of the destination table.
2320
+ # @param [Hash, Array<Hash>] rows A hash object or array of hash objects
2321
+ # containing the data. Required.
2322
+ # @param [Array<String|Symbol>, Symbol] insert_ids A unique ID for each row. BigQuery uses this property to
2323
+ # detect duplicate insertion requests on a best-effort basis. For more information, see [data
2324
+ # consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency). Optional. If
2325
+ # not provided, the client library will assign a UUID to each row before the request is sent.
2326
+ #
2327
+ # The value `:skip` can be provided to skip the generation of IDs for all rows, or to skip the generation of an
2328
+ # ID for a specific row in the array.
2329
+ # @param [Boolean] skip_invalid Insert all valid rows of a request, even
2330
+ # if invalid rows exist. The default value is `false`, which causes
2331
+ # the entire request to fail if any invalid rows exist.
2332
+ # @param [Boolean] ignore_unknown Accept rows that contain values that
2333
+ # do not match the schema. The unknown values are ignored. Default is
2334
+ # false, which treats unknown values as errors.
2335
+ # @param [Boolean] autocreate Specifies whether the method should create
2336
+ # a new table with the given `table_id`, if no table is found for
2337
+ # `table_id`. The default value is false.
2338
+ #
2339
+ # @yield [table] a block for setting the table
2340
+ # @yieldparam [Google::Cloud::Bigquery::Table::Updater] table An updater
2341
+ # to set additional properties on the table in the API request to
2342
+ # create it. Only used when `autocreate` is set and the table does not
2343
+ # already exist.
2344
+ #
2345
+ # @return [Google::Cloud::Bigquery::InsertResponse] An insert response
2346
+ # object.
2347
+ #
2348
+ # @example
2349
+ # require "google/cloud/bigquery"
2350
+ #
2351
+ # bigquery = Google::Cloud::Bigquery.new
2352
+ # dataset = bigquery.dataset "my_dataset"
2353
+ #
2354
+ # rows = [
2355
+ # { "first_name" => "Alice", "age" => 21 },
2356
+ # { "first_name" => "Bob", "age" => 22 }
2357
+ # ]
2358
+ # dataset.insert "my_table", rows
2359
+ #
2360
+ # @example Avoid retrieving the dataset with `skip_lookup`:
2361
+ # require "google/cloud/bigquery"
2362
+ #
2363
+ # bigquery = Google::Cloud::Bigquery.new
2364
+ #
2365
+ # dataset = bigquery.dataset "my_dataset", skip_lookup: true
2366
+ #
2367
+ # rows = [
2368
+ # { "first_name" => "Alice", "age" => 21 },
2369
+ # { "first_name" => "Bob", "age" => 22 }
2370
+ # ]
2371
+ # dataset.insert "my_table", rows
2372
+ #
2373
+ # @example Using `autocreate` to create a new table if none exists.
2374
+ # require "google/cloud/bigquery"
2375
+ #
2376
+ # bigquery = Google::Cloud::Bigquery.new
2377
+ # dataset = bigquery.dataset "my_dataset"
2378
+ #
2379
+ # rows = [
2380
+ # { "first_name" => "Alice", "age" => 21 },
2381
+ # { "first_name" => "Bob", "age" => 22 }
2382
+ # ]
2383
+ # dataset.insert "my_table", rows, autocreate: true do |t|
2384
+ # t.schema.string "first_name", mode: :required
2385
+ # t.schema.integer "age", mode: :required
2386
+ # end
2387
+ #
2388
+ # @!group Data
2389
+ #
2390
+ def insert table_id, rows, insert_ids: nil, skip_invalid: nil, ignore_unknown: nil, autocreate: nil, &block
2391
+ rows = [rows] if rows.is_a? Hash
2392
+ raise ArgumentError, "No rows provided" if rows.empty?
2393
+
2394
+ insert_ids = Array.new(rows.count) { :skip } if insert_ids == :skip
2395
+ insert_ids = Array insert_ids
2396
+ if insert_ids.count.positive? && insert_ids.count != rows.count
2397
+ raise ArgumentError, "insert_ids must be the same size as rows"
2398
+ end
2399
+
2400
+ if autocreate
2401
+ insert_data_with_autocreate table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown,
2402
+ insert_ids: insert_ids, &block
2403
+ else
2404
+ insert_data table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown,
2405
+ insert_ids: insert_ids
2406
+ end
2407
+ end
2408
+
2409
+ ##
2410
+ # Create an asynchronous inserter object used to insert rows in batches.
2411
+ #
2412
+ # @param [String] table_id The ID of the table to insert rows into.
2413
+ # @param [Boolean] skip_invalid Insert all valid rows of a request, even
2414
+ # if invalid rows exist. The default value is `false`, which causes
2415
+ # the entire request to fail if any invalid rows exist.
2416
+ # @param [Boolean] ignore_unknown Accept rows that contain values that
2417
+ # do not match the schema. The unknown values are ignored. Default is
2418
+ # false, which treats unknown values as errors.
2419
+ # @attr_reader [Integer] max_bytes The maximum size of rows to be
2420
+ # collected before the batch is published. Default is 10,000,000
2421
+ # (10MB).
2422
+ # @param [Integer] max_rows The maximum number of rows to be collected
2423
+ # before the batch is published. Default is 500.
2424
+ # @attr_reader [Numeric] interval The number of seconds to collect
2425
+ # messages before the batch is published. Default is 10.
2426
+ # @attr_reader [Numeric] threads The number of threads used to insert
2427
+ # batches of rows. Default is 4.
2428
+ # @yield [response] the callback for when a batch of rows is inserted
2429
+ # @yieldparam [Table::AsyncInserter::Result] result the result of the
2430
+ # asynchronous insert
2431
+ #
2432
+ # @return [Table::AsyncInserter] Returns an inserter object.
2433
+ #
2434
+ # @example
2435
+ # require "google/cloud/bigquery"
2436
+ #
2437
+ # bigquery = Google::Cloud::Bigquery.new
2438
+ # dataset = bigquery.dataset "my_dataset"
2439
+ # inserter = dataset.insert_async "my_table" do |result|
2440
+ # if result.error?
2441
+ # log_error result.error
2442
+ # else
2443
+ # log_insert "inserted #{result.insert_count} rows " \
2444
+ # "with #{result.error_count} errors"
2445
+ # end
2446
+ # end
2447
+ #
2448
+ # rows = [
2449
+ # { "first_name" => "Alice", "age" => 21 },
2450
+ # { "first_name" => "Bob", "age" => 22 }
2451
+ # ]
2452
+ # inserter.insert rows
2453
+ #
2454
+ # inserter.stop.wait!
2455
+ #
2456
+ def insert_async table_id, skip_invalid: nil, ignore_unknown: nil, max_bytes: 10_000_000, max_rows: 500,
2457
+ interval: 10, threads: 4, &block
2458
+ ensure_service!
2459
+
2460
+ # Get table, don't use Dataset#table which handles NotFoundError
2461
+ gapi = service.get_table dataset_id, table_id
2462
+ table = Table.from_gapi gapi, service
2463
+ # Get the AsyncInserter from the table
2464
+ table.insert_async skip_invalid: skip_invalid,
2465
+ ignore_unknown: ignore_unknown,
2466
+ max_bytes: max_bytes, max_rows: max_rows,
2467
+ interval: interval, threads: threads, &block
2468
+ end
2469
+
2470
+ protected
2471
+
2472
+ def insert_data_with_autocreate table_id, rows, skip_invalid: nil, ignore_unknown: nil, insert_ids: nil
2473
+ insert_data table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown, insert_ids: insert_ids
2474
+ rescue Google::Cloud::NotFoundError
2475
+ sleep rand(1..60)
2476
+ begin
2477
+ create_table table_id do |tbl_updater|
2478
+ yield tbl_updater if block_given?
2479
+ end
2480
+ # rubocop:disable Lint/HandleExceptions
2481
+ rescue Google::Cloud::AlreadyExistsError
2482
+ end
2483
+ # rubocop:enable Lint/HandleExceptions
2484
+
2485
+ sleep 60
2486
+ retry
2487
+ end
2488
+
2489
+ def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil, insert_ids: nil
2490
+ rows = [rows] if rows.is_a? Hash
2491
+ raise ArgumentError, "No rows provided" if rows.empty?
2492
+ ensure_service!
2493
+ gapi = service.insert_tabledata dataset_id, table_id, rows, skip_invalid: skip_invalid,
2494
+ ignore_unknown: ignore_unknown,
2495
+ insert_ids: insert_ids
2496
+ InsertResponse.from_gapi rows, gapi
2497
+ end
2498
+
2499
+ ##
2500
+ # Raise an error unless an active service is available.
2501
+ def ensure_service!
2502
+ raise "Must have active connection" unless service
2503
+ end
2504
+
2505
+ ##
2506
+ # Ensures the Google::Apis::BigqueryV2::Dataset object has been loaded
2507
+ # from the service.
2508
+ def ensure_gapi!
2509
+ ensure_service!
2510
+ return unless reference?
2511
+ reload!
2512
+ end
2513
+
2514
+ ##
2515
+ # Fetch gapi and memoize whether resource exists.
2516
+ def gapi_exists?
2517
+ reload!
2518
+ @exists = true
2519
+ rescue Google::Cloud::NotFoundError
2520
+ @exists = false
2521
+ end
2522
+
2523
+ def patch_gapi! *attributes
2524
+ return if attributes.empty?
2525
+ ensure_service!
2526
+ patch_args = Hash[attributes.map { |attr| [attr, @gapi.send(attr)] }]
2527
+ patch_gapi = Google::Apis::BigqueryV2::Dataset.new patch_args
2528
+ patch_gapi.etag = etag if etag
2529
+ @gapi = service.patch_dataset dataset_id, patch_gapi
2530
+ end
2531
+
2532
+ ##
2533
+ # Load the complete representation of the dataset if it has been
2534
+ # only partially loaded by a request to the API list method.
2535
+ def ensure_full_data!
2536
+ reload! unless resource_full?
2537
+ end
2538
+
2539
+ def ensure_job_succeeded! job
2540
+ return unless job.failed?
2541
+ begin
2542
+ # raise to activate ruby exception cause handling
2543
+ raise job.gapi_error
2544
+ rescue StandardError => e
2545
+ # wrap Google::Apis::Error with Google::Cloud::Error
2546
+ raise Google::Cloud::Error.from_error(e)
2547
+ end
2548
+ end
2549
+
2550
+ def load_job_gapi table_id, dryrun, job_id: nil, prefix: nil
2551
+ job_ref = service.job_ref_from job_id, prefix
2552
+ Google::Apis::BigqueryV2::Job.new(
2553
+ job_reference: job_ref,
2554
+ configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
2555
+ load: Google::Apis::BigqueryV2::JobConfigurationLoad.new(
2556
+ destination_table: Google::Apis::BigqueryV2::TableReference.new(
2557
+ project_id: @service.project,
2558
+ dataset_id: dataset_id,
2559
+ table_id: table_id
2560
+ )
2561
+ ),
2562
+ dry_run: dryrun
2563
+ )
2564
+ )
2565
+ end
2566
+
2567
+ def load_job_csv_options! job, jagged_rows: nil, quoted_newlines: nil, delimiter: nil, quote: nil,
2568
+ skip_leading: nil, null_marker: nil
2569
+ job.jagged_rows = jagged_rows unless jagged_rows.nil?
2570
+ job.quoted_newlines = quoted_newlines unless quoted_newlines.nil?
2571
+ job.delimiter = delimiter unless delimiter.nil?
2572
+ job.null_marker = null_marker unless null_marker.nil?
2573
+ job.quote = quote unless quote.nil?
2574
+ job.skip_leading = skip_leading unless skip_leading.nil?
2575
+ end
2576
+
2577
+ def load_job_file_options! job, format: nil, projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
2578
+ encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
2579
+ skip_leading: nil, null_marker: nil
2580
+ job.format = format unless format.nil?
2581
+ job.projection_fields = projection_fields unless projection_fields.nil?
2582
+ job.encoding = encoding unless encoding.nil?
2583
+ job.ignore_unknown = ignore_unknown unless ignore_unknown.nil?
2584
+ job.max_bad_records = max_bad_records unless max_bad_records.nil?
2585
+ load_job_csv_options! job, jagged_rows: jagged_rows,
2586
+ quoted_newlines: quoted_newlines,
2587
+ delimiter: delimiter,
2588
+ quote: quote,
2589
+ skip_leading: skip_leading,
2590
+ null_marker: null_marker
2591
+ end
2592
+
2593
+ def load_job_updater table_id, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
2594
+ quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil,
2595
+ max_bad_records: nil, quote: nil, skip_leading: nil, dryrun: nil, schema: nil, job_id: nil,
2596
+ prefix: nil, labels: nil, autodetect: nil, null_marker: nil
2597
+ new_job = load_job_gapi table_id, dryrun, job_id: job_id, prefix: prefix
2598
+ LoadJob::Updater.new(new_job).tap do |job|
2599
+ job.location = location if location # may be dataset reference
2600
+ job.create = create unless create.nil?
2601
+ job.write = write unless write.nil?
2602
+ job.schema = schema unless schema.nil?
2603
+ job.autodetect = autodetect unless autodetect.nil?
2604
+ job.labels = labels unless labels.nil?
2605
+ load_job_file_options! job, format: format,
2606
+ projection_fields: projection_fields,
2607
+ jagged_rows: jagged_rows,
2608
+ quoted_newlines: quoted_newlines,
2609
+ encoding: encoding,
2610
+ delimiter: delimiter,
2611
+ ignore_unknown: ignore_unknown,
2612
+ max_bad_records: max_bad_records,
2613
+ quote: quote,
2614
+ skip_leading: skip_leading,
2615
+ null_marker: null_marker
2616
+ end
2617
+ end
2618
+
2619
+ def load_storage urls, job_gapi
2620
+ # Convert to storage URL
2621
+ urls = [urls].flatten.map do |url|
2622
+ if url.respond_to? :to_gs_url
2623
+ url.to_gs_url
2624
+ elsif url.is_a? URI
2625
+ url.to_s
2626
+ else
2627
+ url
2628
+ end
2629
+ end
2630
+
2631
+ unless urls.nil?
2632
+ job_gapi.configuration.load.update! source_uris: urls
2633
+ if job_gapi.configuration.load.source_format.nil?
2634
+ source_format = Convert.derive_source_format_from_list urls
2635
+ job_gapi.configuration.load.source_format = source_format unless source_format.nil?
2636
+ end
2637
+ end
2638
+
2639
+ gapi = service.load_table_gs_url job_gapi
2640
+ Job.from_gapi gapi, service
2641
+ end
2642
+
2643
+ def load_local file, job_gapi
2644
+ path = Pathname(file).to_path
2645
+ if job_gapi.configuration.load.source_format.nil?
2646
+ source_format = Convert.derive_source_format path
2647
+ job_gapi.configuration.load.source_format = source_format unless source_format.nil?
2648
+ end
2649
+
2650
+ gapi = service.load_table_file file, job_gapi
2651
+ Job.from_gapi gapi, service
2652
+ end
2653
+
2654
+ def load_local_or_uri file, updater
2655
+ job_gapi = updater.to_gapi
2656
+ job = if local_file? file
2657
+ load_local file, job_gapi
2658
+ else
2659
+ load_storage file, job_gapi
2660
+ end
2661
+ job
2662
+ end
2663
+
2664
+ def storage_url? files
2665
+ [files].flatten.all? do |file|
2666
+ file.respond_to?(:to_gs_url) ||
2667
+ (file.respond_to?(:to_str) && file.to_str.downcase.start_with?("gs://")) ||
2668
+ (file.is_a?(URI) && file.to_s.downcase.start_with?("gs://"))
2669
+ end
2670
+ end
2671
+
2672
+ def local_file? file
2673
+ ::File.file? file
2674
+ rescue StandardError
2675
+ false
2676
+ end
2677
+
2678
+ def udfs_gapi array_or_str
2679
+ return [] if array_or_str.nil?
2680
+ Array(array_or_str).map do |uri_or_code|
2681
+ resource = Google::Apis::BigqueryV2::UserDefinedFunctionResource.new
2682
+ if uri_or_code.start_with? "gs://"
2683
+ resource.resource_uri = uri_or_code
2684
+ else
2685
+ resource.inline_code = uri_or_code
2686
+ end
2687
+ resource
2688
+ end
2689
+ end
2690
+
2691
+ ##
2692
+ # Yielded to a block to accumulate changes for a create request. See {Project#create_dataset}.
2693
+ class Updater < Dataset
2694
+ ##
2695
+ # @private A list of attributes that were updated.
2696
+ attr_reader :updates
2697
+
2698
+ ##
2699
+ # @private Create an Updater object.
2700
+ def initialize gapi
2701
+ @updates = []
2702
+ @gapi = gapi
2703
+ end
2704
+
2705
+ def access
2706
+ # TODO: make sure to call ensure_full_data! on Dataset#update
2707
+ @access ||= Access.from_gapi @gapi
2708
+ if block_given?
2709
+ yield @access
2710
+ check_for_mutated_access!
2711
+ end
2712
+ # Same as Dataset#access, but not frozen
2713
+ @access
2714
+ end
2715
+
2716
+ # rubocop:disable Style/MethodDefParentheses
2717
+
2718
+ ##
2719
+ # @raise [RuntimeError] not implemented
2720
+ def delete(*)
2721
+ raise "not implemented in #{self.class}"
2722
+ end
2723
+
2724
+ ##
2725
+ # @raise [RuntimeError] not implemented
2726
+ def create_table(*)
2727
+ raise "not implemented in #{self.class}"
2728
+ end
2729
+
2730
+ ##
2731
+ # @raise [RuntimeError] not implemented
2732
+ def create_view(*)
2733
+ raise "not implemented in #{self.class}"
2734
+ end
2735
+
2736
+ ##
2737
+ # @raise [RuntimeError] not implemented
2738
+ def table(*)
2739
+ raise "not implemented in #{self.class}"
2740
+ end
2741
+
2742
+ ##
2743
+ # @raise [RuntimeError] not implemented
2744
+ def tables(*)
2745
+ raise "not implemented in #{self.class}"
2746
+ end
2747
+
2748
+ ##
2749
+ # @raise [RuntimeError] not implemented
2750
+ def model(*)
2751
+ raise "not implemented in #{self.class}"
2752
+ end
2753
+
2754
+ ##
2755
+ # @raise [RuntimeError] not implemented
2756
+ def models(*)
2757
+ raise "not implemented in #{self.class}"
2758
+ end
2759
+
2760
+ ##
2761
+ # @raise [RuntimeError] not implemented
2762
+ def create_routine(*)
2763
+ raise "not implemented in #{self.class}"
2764
+ end
2765
+
2766
+ ##
2767
+ # @raise [RuntimeError] not implemented
2768
+ def routine(*)
2769
+ raise "not implemented in #{self.class}"
2770
+ end
2771
+
2772
+ ##
2773
+ # @raise [RuntimeError] not implemented
2774
+ def routines(*)
2775
+ raise "not implemented in #{self.class}"
2776
+ end
2777
+
2778
+ ##
2779
+ # @raise [RuntimeError] not implemented
2780
+ def query_job(*)
2781
+ raise "not implemented in #{self.class}"
2782
+ end
2783
+
2784
+ ##
2785
+ # @raise [RuntimeError] not implemented
2786
+ def query(*)
2787
+ raise "not implemented in #{self.class}"
2788
+ end
2789
+
2790
+ ##
2791
+ # @raise [RuntimeError] not implemented
2792
+ def external(*)
2793
+ raise "not implemented in #{self.class}"
2794
+ end
2795
+
2796
+ ##
2797
+ # @raise [RuntimeError] not implemented
2798
+ def load_job(*)
2799
+ raise "not implemented in #{self.class}"
2800
+ end
2801
+
2802
+ ##
2803
+ # @raise [RuntimeError] not implemented
2804
+ def load(*)
2805
+ raise "not implemented in #{self.class}"
2806
+ end
2807
+
2808
+ ##
2809
+ # @raise [RuntimeError] not implemented
2810
+ def reload!
2811
+ raise "not implemented in #{self.class}"
2812
+ end
2813
+ alias refresh! reload!
2814
+
2815
+ # rubocop:enable Style/MethodDefParentheses
2816
+
2817
+ ##
2818
+ # @private Make sure any access changes are saved
2819
+ def check_for_mutated_access!
2820
+ return if @access.nil?
2821
+ return unless @access.changed?
2822
+ @gapi.update! access: @access.to_gapi
2823
+ patch_gapi! :access
2824
+ end
2825
+
2826
+ ##
2827
+ # @private
2828
+ def to_gapi
2829
+ check_for_mutated_access!
2830
+ @gapi
2831
+ end
2832
+
2833
+ protected
2834
+
2835
+ ##
2836
+ # Queue up all the updates instead of making them.
2837
+ def patch_gapi! attribute
2838
+ @updates << attribute
2839
+ @updates.uniq!
2840
+ end
2841
+ end
2842
+ end
2843
+ end
2844
+ end
2845
+ end