google-cloud-bigquery 0.28.0 → 0.29.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/google-cloud-bigquery.rb +2 -2
- data/lib/google/cloud/bigquery.rb +10 -12
- data/lib/google/cloud/bigquery/copy_job.rb +42 -6
- data/lib/google/cloud/bigquery/data.rb +129 -23
- data/lib/google/cloud/bigquery/dataset.rb +708 -66
- data/lib/google/cloud/bigquery/dataset/access.rb +533 -27
- data/lib/google/cloud/bigquery/dataset/list.rb +5 -3
- data/lib/google/cloud/bigquery/external.rb +2353 -0
- data/lib/google/cloud/bigquery/extract_job.rb +52 -11
- data/lib/google/cloud/bigquery/insert_response.rb +90 -2
- data/lib/google/cloud/bigquery/job.rb +160 -21
- data/lib/google/cloud/bigquery/load_job.rb +128 -11
- data/lib/google/cloud/bigquery/project.rb +187 -44
- data/lib/google/cloud/bigquery/query_job.rb +323 -13
- data/lib/google/cloud/bigquery/schema.rb +57 -1
- data/lib/google/cloud/bigquery/schema/field.rb +118 -17
- data/lib/google/cloud/bigquery/service.rb +196 -43
- data/lib/google/cloud/bigquery/table.rb +739 -49
- data/lib/google/cloud/bigquery/table/async_inserter.rb +280 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery/view.rb +306 -69
- metadata +18 -3
- data/lib/google/cloud/bigquery/query_data.rb +0 -234
@@ -19,7 +19,9 @@ require "google/cloud/bigquery/view"
|
|
19
19
|
require "google/cloud/bigquery/data"
|
20
20
|
require "google/cloud/bigquery/table/list"
|
21
21
|
require "google/cloud/bigquery/schema"
|
22
|
+
require "google/cloud/bigquery/external"
|
22
23
|
require "google/cloud/bigquery/insert_response"
|
24
|
+
require "google/cloud/bigquery/table/async_inserter"
|
23
25
|
require "google/apis/bigquery_v2"
|
24
26
|
|
25
27
|
module Google
|
@@ -82,8 +84,9 @@ module Google
|
|
82
84
|
|
83
85
|
##
|
84
86
|
# A unique ID for this table.
|
85
|
-
#
|
86
|
-
#
|
87
|
+
#
|
88
|
+
# @return [String] The ID must contain only letters (a-z, A-Z), numbers
|
89
|
+
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
87
90
|
#
|
88
91
|
# @!group Attributes
|
89
92
|
#
|
@@ -94,6 +97,9 @@ module Google
|
|
94
97
|
##
|
95
98
|
# The ID of the `Dataset` containing this table.
|
96
99
|
#
|
100
|
+
# @return [String] The ID must contain only letters (a-z, A-Z), numbers
|
101
|
+
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
102
|
+
#
|
97
103
|
# @!group Attributes
|
98
104
|
#
|
99
105
|
def dataset_id
|
@@ -103,6 +109,8 @@ module Google
|
|
103
109
|
##
|
104
110
|
# The ID of the `Project` containing this table.
|
105
111
|
#
|
112
|
+
# @return [String] The project ID.
|
113
|
+
#
|
106
114
|
# @!group Attributes
|
107
115
|
#
|
108
116
|
def project_id
|
@@ -119,7 +127,11 @@ module Google
|
|
119
127
|
end
|
120
128
|
|
121
129
|
###
|
122
|
-
#
|
130
|
+
# Checks if the table is time-partitioned. See [Partitioned
|
131
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
132
|
+
#
|
133
|
+
# @return [Boolean] `true` when the table is time-partitioned, `false`
|
134
|
+
# otherwise.
|
123
135
|
#
|
124
136
|
# @!group Attributes
|
125
137
|
#
|
@@ -128,7 +140,11 @@ module Google
|
|
128
140
|
end
|
129
141
|
|
130
142
|
###
|
131
|
-
# The period for which the table is partitioned, if any.
|
143
|
+
# The period for which the table is partitioned, if any. See
|
144
|
+
# [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
145
|
+
#
|
146
|
+
# @return [String, nil] The partition type. Currently the only supported
|
147
|
+
# value is "DAY".
|
132
148
|
#
|
133
149
|
# @!group Attributes
|
134
150
|
#
|
@@ -138,15 +154,15 @@ module Google
|
|
138
154
|
end
|
139
155
|
|
140
156
|
##
|
141
|
-
# Sets the partitioning for the table. See [Partitioned
|
142
|
-
# ](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
157
|
+
# Sets the partitioning for the table. See [Partitioned
|
158
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
143
159
|
#
|
144
160
|
# You can only set partitioning when creating a table as in
|
145
161
|
# the example below. BigQuery does not allow you to change partitioning
|
146
162
|
# on an existing table.
|
147
163
|
#
|
148
164
|
# @param [String] type The partition type. Currently the only
|
149
|
-
#
|
165
|
+
# supported value is "DAY".
|
150
166
|
#
|
151
167
|
# @example
|
152
168
|
# require "google/cloud/bigquery"
|
@@ -168,7 +184,11 @@ module Google
|
|
168
184
|
|
169
185
|
|
170
186
|
###
|
171
|
-
# The expiration for the table partitions, if any, in seconds.
|
187
|
+
# The expiration for the table partitions, if any, in seconds. See
|
188
|
+
# [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
189
|
+
#
|
190
|
+
# @return [Integer, nil] The expiration time, in seconds, for data in
|
191
|
+
# partitions.
|
172
192
|
#
|
173
193
|
# @!group Attributes
|
174
194
|
#
|
@@ -180,14 +200,14 @@ module Google
|
|
180
200
|
end
|
181
201
|
|
182
202
|
##
|
183
|
-
# Sets the partition expiration for the table. See [Partitioned
|
184
|
-
# ](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
185
|
-
# table must also be partitioned.
|
203
|
+
# Sets the partition expiration for the table. See [Partitioned
|
204
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
205
|
+
# The table must also be partitioned.
|
186
206
|
#
|
187
207
|
# See {Table#time_partitioning_type=}.
|
188
208
|
#
|
189
209
|
# @param [Integer] expiration An expiration time, in seconds,
|
190
|
-
#
|
210
|
+
# for data in partitions.
|
191
211
|
#
|
192
212
|
# @example
|
193
213
|
# require "google/cloud/bigquery"
|
@@ -215,6 +235,8 @@ module Google
|
|
215
235
|
# `project_name:datasetId.tableId`. To use this value in queries see
|
216
236
|
# {#query_id}.
|
217
237
|
#
|
238
|
+
# @return [String] The combined ID.
|
239
|
+
#
|
218
240
|
# @!group Attributes
|
219
241
|
#
|
220
242
|
def id
|
@@ -236,6 +258,9 @@ module Google
|
|
236
258
|
# SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
|
237
259
|
# dialect. Optional. The default value is false.
|
238
260
|
#
|
261
|
+
# @return [String] The appropriate table ID for use in queries,
|
262
|
+
# depending on SQL type.
|
263
|
+
#
|
239
264
|
# @example
|
240
265
|
# require "google/cloud/bigquery"
|
241
266
|
#
|
@@ -258,6 +283,8 @@ module Google
|
|
258
283
|
##
|
259
284
|
# The name of the table.
|
260
285
|
#
|
286
|
+
# @return [String] The friendly name.
|
287
|
+
#
|
261
288
|
# @!group Attributes
|
262
289
|
#
|
263
290
|
def name
|
@@ -267,6 +294,8 @@ module Google
|
|
267
294
|
##
|
268
295
|
# Updates the name of the table.
|
269
296
|
#
|
297
|
+
# @param [String] new_name The new friendly name.
|
298
|
+
#
|
270
299
|
# @!group Attributes
|
271
300
|
#
|
272
301
|
def name= new_name
|
@@ -275,7 +304,9 @@ module Google
|
|
275
304
|
end
|
276
305
|
|
277
306
|
##
|
278
|
-
#
|
307
|
+
# The ETag hash of the table.
|
308
|
+
#
|
309
|
+
# @return [String] The ETag hash.
|
279
310
|
#
|
280
311
|
# @!group Attributes
|
281
312
|
#
|
@@ -285,7 +316,9 @@ module Google
|
|
285
316
|
end
|
286
317
|
|
287
318
|
##
|
288
|
-
# A URL that can be used to access the
|
319
|
+
# A URL that can be used to access the table using the REST API.
|
320
|
+
#
|
321
|
+
# @return [String] A REST URL for the resource.
|
289
322
|
#
|
290
323
|
# @!group Attributes
|
291
324
|
#
|
@@ -295,7 +328,9 @@ module Google
|
|
295
328
|
end
|
296
329
|
|
297
330
|
##
|
298
|
-
#
|
331
|
+
# A user-friendly description of the table.
|
332
|
+
#
|
333
|
+
# @return [String] The description.
|
299
334
|
#
|
300
335
|
# @!group Attributes
|
301
336
|
#
|
@@ -305,7 +340,9 @@ module Google
|
|
305
340
|
end
|
306
341
|
|
307
342
|
##
|
308
|
-
# Updates the description of the table.
|
343
|
+
# Updates the user-friendly description of the table.
|
344
|
+
#
|
345
|
+
# @param [String] new_description The new user-friendly description.
|
309
346
|
#
|
310
347
|
# @!group Attributes
|
311
348
|
#
|
@@ -317,6 +354,8 @@ module Google
|
|
317
354
|
##
|
318
355
|
# The number of bytes in the table.
|
319
356
|
#
|
357
|
+
# @return [Integer] The count of bytes in the table.
|
358
|
+
#
|
320
359
|
# @!group Data
|
321
360
|
#
|
322
361
|
def bytes_count
|
@@ -331,6 +370,8 @@ module Google
|
|
331
370
|
##
|
332
371
|
# The number of rows in the table.
|
333
372
|
#
|
373
|
+
# @return [Integer] The count of rows in the table.
|
374
|
+
#
|
334
375
|
# @!group Data
|
335
376
|
#
|
336
377
|
def rows_count
|
@@ -345,6 +386,8 @@ module Google
|
|
345
386
|
##
|
346
387
|
# The time when this table was created.
|
347
388
|
#
|
389
|
+
# @return [Time, nil] The creation time.
|
390
|
+
#
|
348
391
|
# @!group Attributes
|
349
392
|
#
|
350
393
|
def created_at
|
@@ -361,6 +404,8 @@ module Google
|
|
361
404
|
# If not present, the table will persist indefinitely.
|
362
405
|
# Expired tables will be deleted and their storage reclaimed.
|
363
406
|
#
|
407
|
+
# @return [Time, nil] The expiration time.
|
408
|
+
#
|
364
409
|
# @!group Attributes
|
365
410
|
#
|
366
411
|
def expires_at
|
@@ -375,6 +420,8 @@ module Google
|
|
375
420
|
##
|
376
421
|
# The date when this table was last modified.
|
377
422
|
#
|
423
|
+
# @return [Time, nil] The last modified time.
|
424
|
+
#
|
378
425
|
# @!group Attributes
|
379
426
|
#
|
380
427
|
def modified_at
|
@@ -389,6 +436,8 @@ module Google
|
|
389
436
|
##
|
390
437
|
# Checks if the table's type is "TABLE".
|
391
438
|
#
|
439
|
+
# @return [Boolean] `true` when the type is `TABLE`, `false` otherwise.
|
440
|
+
#
|
392
441
|
# @!group Attributes
|
393
442
|
#
|
394
443
|
def table?
|
@@ -398,15 +447,31 @@ module Google
|
|
398
447
|
##
|
399
448
|
# Checks if the table's type is "VIEW".
|
400
449
|
#
|
450
|
+
# @return [Boolean] `true` when the type is `VIEW`, `false` otherwise.
|
451
|
+
#
|
401
452
|
# @!group Attributes
|
402
453
|
#
|
403
454
|
def view?
|
404
455
|
@gapi.type == "VIEW"
|
405
456
|
end
|
406
457
|
|
458
|
+
##
|
459
|
+
# Checks if the table's type is "EXTERNAL".
|
460
|
+
#
|
461
|
+
# @return [Boolean] `true` when the type is `EXTERNAL`, `false`
|
462
|
+
# otherwise.
|
463
|
+
#
|
464
|
+
# @!group Attributes
|
465
|
+
#
|
466
|
+
def external?
|
467
|
+
@gapi.type == "EXTERNAL"
|
468
|
+
end
|
469
|
+
|
407
470
|
##
|
408
471
|
# The geographic location where the table should reside. Possible
|
409
|
-
# values include EU and US
|
472
|
+
# values include `EU` and `US`. The default value is `US`.
|
473
|
+
#
|
474
|
+
# @return [String] The location code.
|
410
475
|
#
|
411
476
|
# @!group Attributes
|
412
477
|
#
|
@@ -415,6 +480,65 @@ module Google
|
|
415
480
|
@gapi.location
|
416
481
|
end
|
417
482
|
|
483
|
+
##
|
484
|
+
# A hash of user-provided labels associated with this table. Labels
|
485
|
+
# are used to organize and group tables. See [Using
|
486
|
+
# Labels](https://cloud.google.com/bigquery/docs/labels).
|
487
|
+
#
|
488
|
+
# The returned hash is frozen and changes are not allowed. Use
|
489
|
+
# {#labels=} to replace the entire hash.
|
490
|
+
#
|
491
|
+
# @return [Hash<String, String>] A hash containing key/value pairs.
|
492
|
+
#
|
493
|
+
# @example
|
494
|
+
# require "google/cloud/bigquery"
|
495
|
+
#
|
496
|
+
# bigquery = Google::Cloud::Bigquery.new
|
497
|
+
# dataset = bigquery.dataset "my_dataset"
|
498
|
+
# table = dataset.table "my_table"
|
499
|
+
#
|
500
|
+
# labels = table.labels
|
501
|
+
# labels["department"] #=> "shipping"
|
502
|
+
#
|
503
|
+
# @!group Attributes
|
504
|
+
#
|
505
|
+
def labels
|
506
|
+
m = @gapi.labels
|
507
|
+
m = m.to_h if m.respond_to? :to_h
|
508
|
+
m.dup.freeze
|
509
|
+
end
|
510
|
+
|
511
|
+
##
|
512
|
+
# Updates the hash of user-provided labels associated with this table.
|
513
|
+
# Labels are used to organize and group tables. See [Using
|
514
|
+
# Labels](https://cloud.google.com/bigquery/docs/labels).
|
515
|
+
#
|
516
|
+
# @param [Hash<String, String>] labels A hash containing key/value
|
517
|
+
# pairs.
|
518
|
+
#
|
519
|
+
# * Label keys and values can be no longer than 63 characters.
|
520
|
+
# * Label keys and values can contain only lowercase letters, numbers,
|
521
|
+
# underscores, hyphens, and international characters.
|
522
|
+
# * Label keys and values cannot exceed 128 bytes in size.
|
523
|
+
# * Label keys must begin with a letter.
|
524
|
+
# * Label keys must be unique within a table.
|
525
|
+
#
|
526
|
+
# @example
|
527
|
+
# require "google/cloud/bigquery"
|
528
|
+
#
|
529
|
+
# bigquery = Google::Cloud::Bigquery.new
|
530
|
+
# dataset = bigquery.dataset "my_dataset"
|
531
|
+
# table = dataset.table "my_table"
|
532
|
+
#
|
533
|
+
# table.labels = { "department" => "shipping" }
|
534
|
+
#
|
535
|
+
# @!group Attributes
|
536
|
+
#
|
537
|
+
def labels= labels
|
538
|
+
@gapi.labels = labels
|
539
|
+
patch_gapi! :labels
|
540
|
+
end
|
541
|
+
|
418
542
|
##
|
419
543
|
# Returns the table's schema. This method can also be used to set,
|
420
544
|
# replace, or add to the schema by passing a block. See {Schema} for
|
@@ -428,7 +552,7 @@ module Google
|
|
428
552
|
# @yield [schema] a block for setting the schema
|
429
553
|
# @yieldparam [Schema] schema the object accepting the schema
|
430
554
|
#
|
431
|
-
# @return [Google::Cloud::Bigquery::Schema]
|
555
|
+
# @return [Google::Cloud::Bigquery::Schema] A frozen schema object.
|
432
556
|
#
|
433
557
|
# @example
|
434
558
|
# require "google/cloud/bigquery"
|
@@ -462,7 +586,20 @@ module Google
|
|
462
586
|
end
|
463
587
|
|
464
588
|
##
|
465
|
-
# The fields of the table.
|
589
|
+
# The fields of the table, obtained from its schema.
|
590
|
+
#
|
591
|
+
# @return [Array<Schema::Field>] An array of field objects.
|
592
|
+
#
|
593
|
+
# @example
|
594
|
+
# require "google/cloud/bigquery"
|
595
|
+
#
|
596
|
+
# bigquery = Google::Cloud::Bigquery.new
|
597
|
+
# dataset = bigquery.dataset "my_dataset"
|
598
|
+
# table = dataset.table "my_table"
|
599
|
+
#
|
600
|
+
# table.fields.each do |field|
|
601
|
+
# puts field.name
|
602
|
+
# end
|
466
603
|
#
|
467
604
|
# @!group Attributes
|
468
605
|
#
|
@@ -471,7 +608,20 @@ module Google
|
|
471
608
|
end
|
472
609
|
|
473
610
|
##
|
474
|
-
# The names of the columns in the table.
|
611
|
+
# The names of the columns in the table, obtained from its schema.
|
612
|
+
#
|
613
|
+
# @return [Array<Symbol>] An array of column names.
|
614
|
+
#
|
615
|
+
# @example
|
616
|
+
# require "google/cloud/bigquery"
|
617
|
+
#
|
618
|
+
# bigquery = Google::Cloud::Bigquery.new
|
619
|
+
# dataset = bigquery.dataset "my_dataset"
|
620
|
+
# table = dataset.table "my_table"
|
621
|
+
#
|
622
|
+
# table.headers.each do |header|
|
623
|
+
# puts header
|
624
|
+
# end
|
475
625
|
#
|
476
626
|
# @!group Attributes
|
477
627
|
#
|
@@ -479,6 +629,100 @@ module Google
|
|
479
629
|
schema.headers
|
480
630
|
end
|
481
631
|
|
632
|
+
##
|
633
|
+
# The {External::DataSource} (or subclass) object that represents the
|
634
|
+
# external data source that the table represents. Data can be queried
|
635
|
+
# the table, even though the data is not stored in BigQuery. Instead of
|
636
|
+
# loading or streaming the data, this object references the external
|
637
|
+
# data source.
|
638
|
+
#
|
639
|
+
# Present only if the table represents an External Data Source. See
|
640
|
+
# {#external?} and {External::DataSource}.
|
641
|
+
#
|
642
|
+
# @see https://cloud.google.com/bigquery/external-data-sources
|
643
|
+
# Querying External Data Sources
|
644
|
+
#
|
645
|
+
# @return [External::DataSource] The external data source.
|
646
|
+
#
|
647
|
+
# @!group Attributes
|
648
|
+
#
|
649
|
+
def external
|
650
|
+
return nil if @gapi.external_data_configuration.nil?
|
651
|
+
External.from_gapi(@gapi.external_data_configuration).freeze
|
652
|
+
end
|
653
|
+
|
654
|
+
##
|
655
|
+
# Set the {External::DataSource} (or subclass) object that represents
|
656
|
+
# the external data source that the table represents. Data can be
|
657
|
+
# queried the table, even though the data is not stored in BigQuery.
|
658
|
+
# Instead of loading or streaming the data, this object references the
|
659
|
+
# external data source.
|
660
|
+
#
|
661
|
+
# Use only if the table represents an External Data Source. See
|
662
|
+
# {#external?} and {External::DataSource}.
|
663
|
+
#
|
664
|
+
# @see https://cloud.google.com/bigquery/external-data-sources
|
665
|
+
# Querying External Data Sources
|
666
|
+
#
|
667
|
+
# @param [External::DataSource] external An external data source.
|
668
|
+
#
|
669
|
+
# @!group Attributes
|
670
|
+
#
|
671
|
+
def external= external
|
672
|
+
@gapi.external_data_configuration = external.to_gapi
|
673
|
+
patch_gapi! :external_data_configuration
|
674
|
+
end
|
675
|
+
|
676
|
+
##
|
677
|
+
# A lower-bound estimate of the number of bytes currently in this
|
678
|
+
# table's streaming buffer, if one is present. This field will be absent
|
679
|
+
# if the table is not being streamed to or if there is no data in the
|
680
|
+
# streaming buffer.
|
681
|
+
#
|
682
|
+
# @return [Integer] The estimated number of bytes in the buffer.
|
683
|
+
#
|
684
|
+
# @!group Attributes
|
685
|
+
#
|
686
|
+
def buffer_bytes
|
687
|
+
ensure_full_data!
|
688
|
+
@gapi.streaming_buffer.estimated_bytes if @gapi.streaming_buffer
|
689
|
+
end
|
690
|
+
|
691
|
+
##
|
692
|
+
# A lower-bound estimate of the number of rows currently in this
|
693
|
+
# table's streaming buffer, if one is present. This field will be absent
|
694
|
+
# if the table is not being streamed to or if there is no data in the
|
695
|
+
# streaming buffer.
|
696
|
+
#
|
697
|
+
# @return [Integer] The estimated number of rows in the buffer.
|
698
|
+
#
|
699
|
+
# @!group Attributes
|
700
|
+
#
|
701
|
+
def buffer_rows
|
702
|
+
ensure_full_data!
|
703
|
+
@gapi.streaming_buffer.estimated_rows if @gapi.streaming_buffer
|
704
|
+
end
|
705
|
+
|
706
|
+
##
|
707
|
+
# The time of the oldest entry currently in this table's streaming
|
708
|
+
# buffer, if one is present. This field will be absent if the table is
|
709
|
+
# not being streamed to or if there is no data in the streaming buffer.
|
710
|
+
#
|
711
|
+
# @return [Time, nil] The oldest entry time.
|
712
|
+
#
|
713
|
+
# @!group Attributes
|
714
|
+
#
|
715
|
+
def buffer_oldest_at
|
716
|
+
ensure_full_data!
|
717
|
+
return nil unless @gapi.streaming_buffer
|
718
|
+
oldest_entry_time = @gapi.streaming_buffer.oldest_entry_time
|
719
|
+
begin
|
720
|
+
::Time.at(Integer(oldest_entry_time) / 1000.0)
|
721
|
+
rescue
|
722
|
+
nil
|
723
|
+
end
|
724
|
+
end
|
725
|
+
|
482
726
|
##
|
483
727
|
# Retrieves data from the table.
|
484
728
|
#
|
@@ -522,19 +766,23 @@ module Google
|
|
522
766
|
def data token: nil, max: nil, start: nil
|
523
767
|
ensure_service!
|
524
768
|
options = { token: token, max: max, start: start }
|
525
|
-
|
526
|
-
Data.from_gapi gapi,
|
769
|
+
data_gapi = service.list_tabledata dataset_id, table_id, options
|
770
|
+
Data.from_gapi data_gapi, gapi, service
|
527
771
|
end
|
528
772
|
|
529
773
|
##
|
530
|
-
# Copies the data from the table to another table
|
531
|
-
#
|
532
|
-
#
|
533
|
-
#
|
534
|
-
#
|
774
|
+
# Copies the data from the table to another table using an asynchronous
|
775
|
+
# method. In this method, a {CopyJob} is immediately returned. The
|
776
|
+
# caller may poll the service by repeatedly calling {Job#reload!} and
|
777
|
+
# {Job#done?} to detect when the job is done, or simply block until the
|
778
|
+
# job is done by calling #{Job#wait_until_done!}. See also {#copy}.
|
535
779
|
#
|
536
780
|
# @param [Table, String] destination_table The destination for the
|
537
|
-
# copied data.
|
781
|
+
# copied data. This can also be a string identifier as specified by
|
782
|
+
# the [Query
|
783
|
+
# Reference](https://cloud.google.com/bigquery/query-reference#from):
|
784
|
+
# `project_name:datasetId.tableId`. This is useful for referencing
|
785
|
+
# tables in other projects and datasets.
|
538
786
|
# @param [String] create Specifies whether the job is allowed to create
|
539
787
|
# new tables. The default value is `needed`.
|
540
788
|
#
|
@@ -552,6 +800,28 @@ module Google
|
|
552
800
|
# * `append` - BigQuery appends the data to the table.
|
553
801
|
# * `empty` - An error will be returned if the destination table
|
554
802
|
# already contains data.
|
803
|
+
# @param [String] job_id A user-defined ID for the copy job. The ID
|
804
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
805
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
806
|
+
# `job_id` is provided, then `prefix` will not be used.
|
807
|
+
#
|
808
|
+
# See [Generating a job
|
809
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
810
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
811
|
+
# prepended to a generated value to produce a unique job ID. For
|
812
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
813
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
814
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
815
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
816
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
817
|
+
# be used.
|
818
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
819
|
+
# the job. You can use these to organize and group your jobs. Label
|
820
|
+
# keys and values can be no longer than 63 characters, can only
|
821
|
+
# contain lowercase letters, numeric characters, underscores and
|
822
|
+
# dashes. International characters are allowed. Label values are
|
823
|
+
# optional. Label keys must start with a letter and each label in the
|
824
|
+
# list must have a different key.
|
555
825
|
#
|
556
826
|
# @return [Google::Cloud::Bigquery::CopyJob]
|
557
827
|
#
|
@@ -563,7 +833,7 @@ module Google
|
|
563
833
|
# table = dataset.table "my_table"
|
564
834
|
# destination_table = dataset.table "my_destination_table"
|
565
835
|
#
|
566
|
-
# copy_job = table.
|
836
|
+
# copy_job = table.copy_job destination_table
|
567
837
|
#
|
568
838
|
# @example Passing a string identifier for the destination table:
|
569
839
|
# require "google/cloud/bigquery"
|
@@ -572,13 +842,15 @@ module Google
|
|
572
842
|
# dataset = bigquery.dataset "my_dataset"
|
573
843
|
# table = dataset.table "my_table"
|
574
844
|
#
|
575
|
-
# copy_job = table.
|
845
|
+
# copy_job = table.copy_job "other-project:other_dataset.other_table"
|
576
846
|
#
|
577
847
|
# @!group Data
|
578
848
|
#
|
579
|
-
def
|
849
|
+
def copy_job destination_table, create: nil, write: nil, dryrun: nil,
|
850
|
+
job_id: nil, prefix: nil, labels: nil
|
580
851
|
ensure_service!
|
581
|
-
options = { create: create, write: write, dryrun: dryrun
|
852
|
+
options = { create: create, write: write, dryrun: dryrun,
|
853
|
+
job_id: job_id, prefix: prefix, labels: labels }
|
582
854
|
gapi = service.copy_table table_ref,
|
583
855
|
get_table_ref(destination_table),
|
584
856
|
options
|
@@ -586,7 +858,82 @@ module Google
|
|
586
858
|
end
|
587
859
|
|
588
860
|
##
|
589
|
-
#
|
861
|
+
# Copies the data from the table to another table using a synchronous
|
862
|
+
# method that blocks for a response. Timeouts and transient errors are
|
863
|
+
# generally handled as needed to complete the job. See also
|
864
|
+
# {#copy_job}.
|
865
|
+
#
|
866
|
+
# @param [Table, String] destination_table The destination for the
|
867
|
+
# copied data. This can also be a string identifier as specified by
|
868
|
+
# the [Query
|
869
|
+
# Reference](https://cloud.google.com/bigquery/query-reference#from):
|
870
|
+
# `project_name:datasetId.tableId`. This is useful for referencing
|
871
|
+
# tables in other projects and datasets.
|
872
|
+
# @param [String] create Specifies whether the job is allowed to create
|
873
|
+
# new tables. The default value is `needed`.
|
874
|
+
#
|
875
|
+
# The following values are supported:
|
876
|
+
#
|
877
|
+
# * `needed` - Create the table if it does not exist.
|
878
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
879
|
+
# raised if the table does not exist.
|
880
|
+
# @param [String] write Specifies how to handle data already present in
|
881
|
+
# the destination table. The default value is `empty`.
|
882
|
+
#
|
883
|
+
# The following values are supported:
|
884
|
+
#
|
885
|
+
# * `truncate` - BigQuery overwrites the table data.
|
886
|
+
# * `append` - BigQuery appends the data to the table.
|
887
|
+
# * `empty` - An error will be returned if the destination table
|
888
|
+
# already contains data.
|
889
|
+
#
|
890
|
+
# @return [Boolean] Returns `true` if the copy operation succeeded.
|
891
|
+
#
|
892
|
+
# @example
|
893
|
+
# require "google/cloud/bigquery"
|
894
|
+
#
|
895
|
+
# bigquery = Google::Cloud::Bigquery.new
|
896
|
+
# dataset = bigquery.dataset "my_dataset"
|
897
|
+
# table = dataset.table "my_table"
|
898
|
+
# destination_table = dataset.table "my_destination_table"
|
899
|
+
#
|
900
|
+
# table.copy destination_table
|
901
|
+
#
|
902
|
+
# @example Passing a string identifier for the destination table:
|
903
|
+
# require "google/cloud/bigquery"
|
904
|
+
#
|
905
|
+
# bigquery = Google::Cloud::Bigquery.new
|
906
|
+
# dataset = bigquery.dataset "my_dataset"
|
907
|
+
# table = dataset.table "my_table"
|
908
|
+
#
|
909
|
+
# table.copy "other-project:other_dataset.other_table"
|
910
|
+
#
|
911
|
+
# @!group Data
|
912
|
+
#
|
913
|
+
def copy destination_table, create: nil, write: nil
|
914
|
+
job = copy_job destination_table, create: create, write: write
|
915
|
+
job.wait_until_done!
|
916
|
+
|
917
|
+
if job.failed?
|
918
|
+
begin
|
919
|
+
# raise to activate ruby exception cause handling
|
920
|
+
fail job.gapi_error
|
921
|
+
rescue => e
|
922
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
923
|
+
raise Google::Cloud::Error.from_error(e)
|
924
|
+
end
|
925
|
+
end
|
926
|
+
|
927
|
+
true
|
928
|
+
end
|
929
|
+
|
930
|
+
##
|
931
|
+
# Extracts the data from the table to a Google Cloud Storage file using
|
932
|
+
# an asynchronous method. In this method, an {ExtractJob} is immediately
|
933
|
+
# returned. The caller may poll the service by repeatedly calling
|
934
|
+
# {Job#reload!} and {Job#done?} to detect when the job is done, or
|
935
|
+
# simply block until the job is done by calling #{Job#wait_until_done!}.
|
936
|
+
# See also {#extract}.
|
590
937
|
#
|
591
938
|
# @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
|
592
939
|
# Exporting Data From BigQuery
|
@@ -609,6 +956,28 @@ module Google
|
|
609
956
|
# exported data. Default is <code>,</code>.
|
610
957
|
# @param [Boolean] header Whether to print out a header row in the
|
611
958
|
# results. Default is `true`.
|
959
|
+
# @param [String] job_id A user-defined ID for the extract job. The ID
|
960
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
961
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
962
|
+
# `job_id` is provided, then `prefix` will not be used.
|
963
|
+
#
|
964
|
+
# See [Generating a job
|
965
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
966
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
967
|
+
# prepended to a generated value to produce a unique job ID. For
|
968
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
969
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
970
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
971
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
972
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
973
|
+
# be used.
|
974
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
975
|
+
# the job. You can use these to organize and group your jobs. Label
|
976
|
+
# keys and values can be no longer than 63 characters, can only
|
977
|
+
# contain lowercase letters, numeric characters, underscores and
|
978
|
+
# dashes. International characters are allowed. Label values are
|
979
|
+
# optional. Label keys must start with a letter and each label in the
|
980
|
+
# list must have a different key.
|
612
981
|
#
|
613
982
|
#
|
614
983
|
# @return [Google::Cloud::Bigquery::ExtractJob]
|
@@ -620,20 +989,84 @@ module Google
|
|
620
989
|
# dataset = bigquery.dataset "my_dataset"
|
621
990
|
# table = dataset.table "my_table"
|
622
991
|
#
|
623
|
-
# extract_job = table.
|
992
|
+
# extract_job = table.extract_job "gs://my-bucket/file-name.json",
|
624
993
|
# format: "json"
|
625
994
|
#
|
626
995
|
# @!group Data
|
627
996
|
#
|
628
|
-
def
|
629
|
-
|
997
|
+
def extract_job extract_url, format: nil, compression: nil,
|
998
|
+
delimiter: nil, header: nil, dryrun: nil, job_id: nil,
|
999
|
+
prefix: nil, labels: nil
|
630
1000
|
ensure_service!
|
631
1001
|
options = { format: format, compression: compression,
|
632
|
-
delimiter: delimiter, header: header, dryrun: dryrun
|
1002
|
+
delimiter: delimiter, header: header, dryrun: dryrun,
|
1003
|
+
job_id: job_id, prefix: prefix, labels: labels }
|
633
1004
|
gapi = service.extract_table table_ref, extract_url, options
|
634
1005
|
Job.from_gapi gapi, service
|
635
1006
|
end
|
636
1007
|
|
1008
|
+
##
|
1009
|
+
# Extracts the data from the table to a Google Cloud Storage file using
|
1010
|
+
# a synchronous method that blocks for a response. Timeouts and
|
1011
|
+
# transient errors are generally handled as needed to complete the job.
|
1012
|
+
# See also {#extract_job}.
|
1013
|
+
#
|
1014
|
+
# @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
|
1015
|
+
# Exporting Data From BigQuery
|
1016
|
+
#
|
1017
|
+
# @param [Google::Cloud::Storage::File, String, Array<String>]
|
1018
|
+
# extract_url The Google Storage file or file URI pattern(s) to which
|
1019
|
+
# BigQuery should extract the table data.
|
1020
|
+
# @param [String] format The exported file format. The default value is
|
1021
|
+
# `csv`.
|
1022
|
+
#
|
1023
|
+
# The following values are supported:
|
1024
|
+
#
|
1025
|
+
# * `csv` - CSV
|
1026
|
+
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
1027
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
1028
|
+
# @param [String] compression The compression type to use for exported
|
1029
|
+
# files. Possible values include `GZIP` and `NONE`. The default value
|
1030
|
+
# is `NONE`.
|
1031
|
+
# @param [String] delimiter Delimiter to use between fields in the
|
1032
|
+
# exported data. Default is <code>,</code>.
|
1033
|
+
# @param [Boolean] header Whether to print out a header row in the
|
1034
|
+
# results. Default is `true`.
|
1035
|
+
#
|
1036
|
+
#
|
1037
|
+
# @return [Boolean] Returns `true` if the extract operation succeeded.
|
1038
|
+
#
|
1039
|
+
# @example
|
1040
|
+
# require "google/cloud/bigquery"
|
1041
|
+
#
|
1042
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1043
|
+
# dataset = bigquery.dataset "my_dataset"
|
1044
|
+
# table = dataset.table "my_table"
|
1045
|
+
#
|
1046
|
+
# table.extract "gs://my-bucket/file-name.json", format: "json"
|
1047
|
+
#
|
1048
|
+
# @!group Data
|
1049
|
+
#
|
1050
|
+
def extract extract_url, format: nil, compression: nil, delimiter: nil,
|
1051
|
+
header: nil
|
1052
|
+
job = extract_job extract_url, format: format,
|
1053
|
+
compression: compression,
|
1054
|
+
delimiter: delimiter, header: header
|
1055
|
+
job.wait_until_done!
|
1056
|
+
|
1057
|
+
if job.failed?
|
1058
|
+
begin
|
1059
|
+
# raise to activate ruby exception cause handling
|
1060
|
+
fail job.gapi_error
|
1061
|
+
rescue => e
|
1062
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
1063
|
+
raise Google::Cloud::Error.from_error(e)
|
1064
|
+
end
|
1065
|
+
end
|
1066
|
+
|
1067
|
+
true
|
1068
|
+
end
|
1069
|
+
|
637
1070
|
##
|
638
1071
|
# Loads data into the table. You can pass a google-cloud storage file
|
639
1072
|
# path or a google-cloud storage file instance. Or, you can upload a
|
@@ -684,6 +1117,9 @@ module Google
|
|
684
1117
|
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
685
1118
|
# quoted data sections that contain newline characters in a CSV file.
|
686
1119
|
# The default value is `false`.
|
1120
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1121
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1122
|
+
# The default value is `false`.
|
687
1123
|
# @param [String] encoding The character encoding of the data. The
|
688
1124
|
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
689
1125
|
# `UTF-8`.
|
@@ -708,6 +1144,13 @@ module Google
|
|
708
1144
|
# records exceeds this value, an invalid error is returned in the job
|
709
1145
|
# result. The default value is `0`, which requires that all records
|
710
1146
|
# are valid.
|
1147
|
+
# @param [String] null_marker Specifies a string that represents a null
|
1148
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
1149
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
1150
|
+
# value is the empty string. If you set this property to a custom
|
1151
|
+
# value, BigQuery throws an error if an empty string is present for
|
1152
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
1153
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
711
1154
|
# @param [String] quote The value that is used to quote data sections in
|
712
1155
|
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
713
1156
|
# then uses the first byte of the encoded string to split the data in
|
@@ -720,6 +1163,28 @@ module Google
|
|
720
1163
|
# file that BigQuery will skip when loading the data. The default
|
721
1164
|
# value is `0`. This property is useful if you have header rows in the
|
722
1165
|
# file that should be skipped.
|
1166
|
+
# @param [String] job_id A user-defined ID for the load job. The ID
|
1167
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
1168
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
1169
|
+
# `job_id` is provided, then `prefix` will not be used.
|
1170
|
+
#
|
1171
|
+
# See [Generating a job
|
1172
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
1173
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
1174
|
+
# prepended to a generated value to produce a unique job ID. For
|
1175
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
1176
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
1177
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
1178
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
1179
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
1180
|
+
# be used.
|
1181
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
1182
|
+
# the job. You can use these to organize and group your jobs. Label
|
1183
|
+
# keys and values can be no longer than 63 characters, can only
|
1184
|
+
# contain lowercase letters, numeric characters, underscores and
|
1185
|
+
# dashes. International characters are allowed. Label values are
|
1186
|
+
# optional. Label keys must start with a letter and each label in the
|
1187
|
+
# list must have a different key.
|
723
1188
|
#
|
724
1189
|
# @return [Google::Cloud::Bigquery::LoadJob]
|
725
1190
|
#
|
@@ -730,7 +1195,7 @@ module Google
|
|
730
1195
|
# dataset = bigquery.dataset "my_dataset"
|
731
1196
|
# table = dataset.table "my_table"
|
732
1197
|
#
|
733
|
-
# load_job = table.
|
1198
|
+
# load_job = table.load_job "gs://my-bucket/file-name.csv"
|
734
1199
|
#
|
735
1200
|
# @example Pass a google-cloud-storage `File` instance:
|
736
1201
|
# require "google/cloud/bigquery"
|
@@ -743,7 +1208,7 @@ module Google
|
|
743
1208
|
# storage = Google::Cloud::Storage.new
|
744
1209
|
# bucket = storage.bucket "my-bucket"
|
745
1210
|
# file = bucket.file "file-name.csv"
|
746
|
-
# load_job = table.
|
1211
|
+
# load_job = table.load_job file
|
747
1212
|
#
|
748
1213
|
# @example Upload a file directly:
|
749
1214
|
# require "google/cloud/bigquery"
|
@@ -753,15 +1218,16 @@ module Google
|
|
753
1218
|
# table = dataset.table "my_table"
|
754
1219
|
#
|
755
1220
|
# file = File.open "my_data.csv"
|
756
|
-
# load_job = table.
|
1221
|
+
# load_job = table.load_job file
|
757
1222
|
#
|
758
1223
|
# @!group Data
|
759
1224
|
#
|
760
|
-
def
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
1225
|
+
def load_job file, format: nil, create: nil, write: nil,
|
1226
|
+
projection_fields: nil, jagged_rows: nil,
|
1227
|
+
quoted_newlines: nil, encoding: nil, delimiter: nil,
|
1228
|
+
ignore_unknown: nil, max_bad_records: nil, quote: nil,
|
1229
|
+
skip_leading: nil, dryrun: nil, job_id: nil, prefix: nil,
|
1230
|
+
labels: nil, autodetect: nil, null_marker: nil
|
765
1231
|
ensure_service!
|
766
1232
|
options = { format: format, create: create, write: write,
|
767
1233
|
projection_fields: projection_fields,
|
@@ -769,22 +1235,187 @@ module Google
|
|
769
1235
|
quoted_newlines: quoted_newlines, encoding: encoding,
|
770
1236
|
delimiter: delimiter, ignore_unknown: ignore_unknown,
|
771
1237
|
max_bad_records: max_bad_records, quote: quote,
|
772
|
-
skip_leading: skip_leading, dryrun: dryrun
|
1238
|
+
skip_leading: skip_leading, dryrun: dryrun,
|
1239
|
+
job_id: job_id, prefix: prefix, labels: labels,
|
1240
|
+
autodetect: autodetect, null_marker: null_marker }
|
773
1241
|
return load_storage(file, options) if storage_url? file
|
774
1242
|
return load_local(file, options) if local_file? file
|
775
1243
|
fail Google::Cloud::Error, "Don't know how to load #{file}"
|
776
1244
|
end
|
777
1245
|
|
1246
|
+
##
|
1247
|
+
# Loads data into the table. You can pass a google-cloud storage file
|
1248
|
+
# path or a google-cloud storage file instance. Or, you can upload a
|
1249
|
+
# file directly. See [Loading Data with a POST Request](
|
1250
|
+
# https://cloud.google.com/bigquery/loading-data-post-request#multipart).
|
1251
|
+
#
|
1252
|
+
# @param [File, Google::Cloud::Storage::File, String] file A file or the
|
1253
|
+
# URI of a Google Cloud Storage file containing data to load into the
|
1254
|
+
# table.
|
1255
|
+
# @param [String] format The exported file format. The default value is
|
1256
|
+
# `csv`.
|
1257
|
+
#
|
1258
|
+
# The following values are supported:
|
1259
|
+
#
|
1260
|
+
# * `csv` - CSV
|
1261
|
+
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
1262
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
1263
|
+
# * `datastore_backup` - Cloud Datastore backup
|
1264
|
+
# @param [String] create Specifies whether the job is allowed to create
|
1265
|
+
# new tables. The default value is `needed`.
|
1266
|
+
#
|
1267
|
+
# The following values are supported:
|
1268
|
+
#
|
1269
|
+
# * `needed` - Create the table if it does not exist.
|
1270
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
1271
|
+
# raised if the table does not exist.
|
1272
|
+
# @param [String] write Specifies how to handle data already present in
|
1273
|
+
# the table. The default value is `append`.
|
1274
|
+
#
|
1275
|
+
# The following values are supported:
|
1276
|
+
#
|
1277
|
+
# * `truncate` - BigQuery overwrites the table data.
|
1278
|
+
# * `append` - BigQuery appends the data to the table.
|
1279
|
+
# * `empty` - An error will be returned if the table already contains
|
1280
|
+
# data.
|
1281
|
+
# @param [Array<String>] projection_fields If the `format` option is set
|
1282
|
+
# to `datastore_backup`, indicates which entity properties to load
|
1283
|
+
# from a Cloud Datastore backup. Property names are case sensitive and
|
1284
|
+
# must be top-level properties. If not set, BigQuery loads all
|
1285
|
+
# properties. If any named property isn't found in the Cloud Datastore
|
1286
|
+
# backup, an invalid error is returned.
|
1287
|
+
# @param [Boolean] jagged_rows Accept rows that are missing trailing
|
1288
|
+
# optional columns. The missing values are treated as nulls. If
|
1289
|
+
# `false`, records with missing trailing columns are treated as bad
|
1290
|
+
# records, and if there are too many bad records, an invalid error is
|
1291
|
+
# returned in the job result. The default value is `false`. Only
|
1292
|
+
# applicable to CSV, ignored for other formats.
|
1293
|
+
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
1294
|
+
# quoted data sections that contain newline characters in a CSV file.
|
1295
|
+
# The default value is `false`.
|
1296
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1297
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1298
|
+
# The default value is `false`.
|
1299
|
+
# @param [String] encoding The character encoding of the data. The
|
1300
|
+
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
1301
|
+
# `UTF-8`.
|
1302
|
+
# @param [String] delimiter Specifices the separator for fields in a CSV
|
1303
|
+
# file. BigQuery converts the string to `ISO-8859-1` encoding, and
|
1304
|
+
# then uses the first byte of the encoded string to split the data in
|
1305
|
+
# its raw, binary state. Default is <code>,</code>.
|
1306
|
+
# @param [Boolean] ignore_unknown Indicates if BigQuery should allow
|
1307
|
+
# extra values that are not represented in the table schema. If true,
|
1308
|
+
# the extra values are ignored. If false, records with extra columns
|
1309
|
+
# are treated as bad records, and if there are too many bad records,
|
1310
|
+
# an invalid error is returned in the job result. The default value is
|
1311
|
+
# `false`.
|
1312
|
+
#
|
1313
|
+
# The `format` property determines what BigQuery treats as an extra
|
1314
|
+
# value:
|
1315
|
+
#
|
1316
|
+
# * `CSV`: Trailing columns
|
1317
|
+
# * `JSON`: Named values that don't match any column names
|
1318
|
+
# @param [Integer] max_bad_records The maximum number of bad records
|
1319
|
+
# that BigQuery can ignore when running the job. If the number of bad
|
1320
|
+
# records exceeds this value, an invalid error is returned in the job
|
1321
|
+
# result. The default value is `0`, which requires that all records
|
1322
|
+
# are valid.
|
1323
|
+
# @param [String] null_marker Specifies a string that represents a null
|
1324
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
1325
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
1326
|
+
# value is the empty string. If you set this property to a custom
|
1327
|
+
# value, BigQuery throws an error if an empty string is present for
|
1328
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
1329
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
1330
|
+
# @param [String] quote The value that is used to quote data sections in
|
1331
|
+
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
1332
|
+
# then uses the first byte of the encoded string to split the data in
|
1333
|
+
# its raw, binary state. The default value is a double-quote
|
1334
|
+
# <code>"</code>. If your data does not contain quoted sections, set
|
1335
|
+
# the property value to an empty string. If your data contains quoted
|
1336
|
+
# newline characters, you must also set the allowQuotedNewlines
|
1337
|
+
# property to true.
|
1338
|
+
# @param [Integer] skip_leading The number of rows at the top of a CSV
|
1339
|
+
# file that BigQuery will skip when loading the data. The default
|
1340
|
+
# value is `0`. This property is useful if you have header rows in the
|
1341
|
+
# file that should be skipped.
|
1342
|
+
#
|
1343
|
+
# @return [Google::Cloud::Bigquery::LoadJob]
|
1344
|
+
#
|
1345
|
+
# @example
|
1346
|
+
# require "google/cloud/bigquery"
|
1347
|
+
#
|
1348
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1349
|
+
# dataset = bigquery.dataset "my_dataset"
|
1350
|
+
# table = dataset.table "my_table"
|
1351
|
+
#
|
1352
|
+
# load_job = table.load_job "gs://my-bucket/file-name.csv"
|
1353
|
+
#
|
1354
|
+
# @example Pass a google-cloud-storage `File` instance:
|
1355
|
+
# require "google/cloud/bigquery"
|
1356
|
+
# require "google/cloud/storage"
|
1357
|
+
#
|
1358
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1359
|
+
# dataset = bigquery.dataset "my_dataset"
|
1360
|
+
# table = dataset.table "my_table"
|
1361
|
+
#
|
1362
|
+
# storage = Google::Cloud::Storage.new
|
1363
|
+
# bucket = storage.bucket "my-bucket"
|
1364
|
+
# file = bucket.file "file-name.csv"
|
1365
|
+
# load_job = table.load_job file
|
1366
|
+
#
|
1367
|
+
# @example Upload a file directly:
|
1368
|
+
# require "google/cloud/bigquery"
|
1369
|
+
#
|
1370
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1371
|
+
# dataset = bigquery.dataset "my_dataset"
|
1372
|
+
# table = dataset.table "my_table"
|
1373
|
+
#
|
1374
|
+
# file = File.open "my_data.csv"
|
1375
|
+
# load_job = table.load_job file
|
1376
|
+
#
|
1377
|
+
# @!group Data
|
1378
|
+
#
|
1379
|
+
def load file, format: nil, create: nil, write: nil,
|
1380
|
+
projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
|
1381
|
+
encoding: nil, delimiter: nil, ignore_unknown: nil,
|
1382
|
+
max_bad_records: nil, quote: nil, skip_leading: nil,
|
1383
|
+
autodetect: nil, null_marker: nil
|
1384
|
+
job = load_job file, format: format, create: create, write: write,
|
1385
|
+
projection_fields: projection_fields,
|
1386
|
+
jagged_rows: jagged_rows,
|
1387
|
+
quoted_newlines: quoted_newlines,
|
1388
|
+
encoding: encoding, delimiter: delimiter,
|
1389
|
+
ignore_unknown: ignore_unknown,
|
1390
|
+
max_bad_records: max_bad_records, quote: quote,
|
1391
|
+
skip_leading: skip_leading,
|
1392
|
+
autodetect: autodetect, null_marker: null_marker
|
1393
|
+
|
1394
|
+
job.wait_until_done!
|
1395
|
+
|
1396
|
+
if job.failed?
|
1397
|
+
begin
|
1398
|
+
# raise to activate ruby exception cause handling
|
1399
|
+
fail job.gapi_error
|
1400
|
+
rescue => e
|
1401
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
1402
|
+
raise Google::Cloud::Error.from_error(e)
|
1403
|
+
end
|
1404
|
+
end
|
1405
|
+
|
1406
|
+
true
|
1407
|
+
end
|
1408
|
+
|
778
1409
|
##
|
779
1410
|
# Inserts data into the table for near-immediate querying, without the
|
780
|
-
# need to complete a
|
1411
|
+
# need to complete a load operation before the data can appear in query
|
781
1412
|
# results.
|
782
1413
|
#
|
783
1414
|
# @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
|
784
1415
|
# Streaming Data Into BigQuery
|
785
1416
|
#
|
786
1417
|
# @param [Hash, Array<Hash>] rows A hash object or array of hash objects
|
787
|
-
# containing the data.
|
1418
|
+
# containing the data. Required.
|
788
1419
|
# @param [Boolean] skip_invalid Insert all valid rows of a request, even
|
789
1420
|
# if invalid rows exist. The default value is `false`, which causes
|
790
1421
|
# the entire request to fail if any invalid rows exist.
|
@@ -811,7 +1442,7 @@ module Google
|
|
811
1442
|
#
|
812
1443
|
def insert rows, skip_invalid: nil, ignore_unknown: nil
|
813
1444
|
rows = [rows] if rows.is_a? Hash
|
814
|
-
rows
|
1445
|
+
fail ArgumentError, "No rows provided" if rows.empty?
|
815
1446
|
ensure_service!
|
816
1447
|
options = { skip_invalid: skip_invalid,
|
817
1448
|
ignore_unknown: ignore_unknown }
|
@@ -819,6 +1450,60 @@ module Google
|
|
819
1450
|
InsertResponse.from_gapi rows, gapi
|
820
1451
|
end
|
821
1452
|
|
1453
|
+
##
|
1454
|
+
# Create an asynchonous inserter object used to insert rows in batches.
|
1455
|
+
#
|
1456
|
+
# @param [Boolean] skip_invalid Insert all valid rows of a request, even
|
1457
|
+
# if invalid rows exist. The default value is `false`, which causes
|
1458
|
+
# the entire request to fail if any invalid rows exist.
|
1459
|
+
# @param [Boolean] ignore_unknown Accept rows that contain values that
|
1460
|
+
# do not match the schema. The unknown values are ignored. Default is
|
1461
|
+
# false, which treats unknown values as errors.
|
1462
|
+
# @attr_reader [Integer] max_bytes The maximum size of rows to be
|
1463
|
+
# collected before the batch is published. Default is 10,000,000
|
1464
|
+
# (10MB).
|
1465
|
+
# @param [Integer] max_rows The maximum number of rows to be collected
|
1466
|
+
# before the batch is published. Default is 500.
|
1467
|
+
# @attr_reader [Numeric] interval The number of seconds to collect
|
1468
|
+
# messages before the batch is published. Default is 10.
|
1469
|
+
# @attr_reader [Numeric] threads The number of threads used to insert
|
1470
|
+
# batches of rows. Default is 4.
|
1471
|
+
# @yield [response] the callback for when a batch of rows is inserted
|
1472
|
+
# @yieldparam [InsertResponse] response the result of the asynchonous
|
1473
|
+
# insert
|
1474
|
+
#
|
1475
|
+
# @return [Table::AsyncInserter] Returns inserter object.
|
1476
|
+
#
|
1477
|
+
# @example
|
1478
|
+
# require "google/cloud/bigquery"
|
1479
|
+
#
|
1480
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1481
|
+
# dataset = bigquery.dataset "my_dataset"
|
1482
|
+
# table = dataset.table "my_table"
|
1483
|
+
# inserter = table.insert_async do |response|
|
1484
|
+
# log_insert "inserted #{response.insert_count} rows " \
|
1485
|
+
# "with #{response.error_count} errors"
|
1486
|
+
# end
|
1487
|
+
#
|
1488
|
+
# rows = [
|
1489
|
+
# { "first_name" => "Alice", "age" => 21 },
|
1490
|
+
# { "first_name" => "Bob", "age" => 22 }
|
1491
|
+
# ]
|
1492
|
+
# inserter.insert rows
|
1493
|
+
#
|
1494
|
+
# inserter.stop.wait!
|
1495
|
+
#
|
1496
|
+
def insert_async skip_invalid: nil, ignore_unknown: nil,
|
1497
|
+
max_bytes: 10000000, max_rows: 500, interval: 10,
|
1498
|
+
threads: 4, &block
|
1499
|
+
ensure_service!
|
1500
|
+
|
1501
|
+
AsyncInserter.new self, skip_invalid: skip_invalid,
|
1502
|
+
ignore_unknown: ignore_unknown,
|
1503
|
+
max_bytes: max_bytes, max_rows: max_rows,
|
1504
|
+
interval: interval, threads: threads, &block
|
1505
|
+
end
|
1506
|
+
|
822
1507
|
##
|
823
1508
|
# Permanently deletes the table.
|
824
1509
|
#
|
@@ -878,7 +1563,12 @@ module Google
|
|
878
1563
|
[attr, @gapi.send(attr)]
|
879
1564
|
end]
|
880
1565
|
patch_gapi = Google::Apis::BigqueryV2::Table.new patch_args
|
1566
|
+
patch_gapi.etag = etag if etag
|
881
1567
|
@gapi = service.patch_table dataset_id, table_id, patch_gapi
|
1568
|
+
|
1569
|
+
# TODO: restore original impl after acceptance test indicates that
|
1570
|
+
# service etag bug is fixed
|
1571
|
+
reload!
|
882
1572
|
end
|
883
1573
|
|
884
1574
|
def self.class_for gapi
|