google-cloud-bigquery 0.28.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/google-cloud-bigquery.rb +2 -2
- data/lib/google/cloud/bigquery.rb +10 -12
- data/lib/google/cloud/bigquery/copy_job.rb +42 -6
- data/lib/google/cloud/bigquery/data.rb +129 -23
- data/lib/google/cloud/bigquery/dataset.rb +708 -66
- data/lib/google/cloud/bigquery/dataset/access.rb +533 -27
- data/lib/google/cloud/bigquery/dataset/list.rb +5 -3
- data/lib/google/cloud/bigquery/external.rb +2353 -0
- data/lib/google/cloud/bigquery/extract_job.rb +52 -11
- data/lib/google/cloud/bigquery/insert_response.rb +90 -2
- data/lib/google/cloud/bigquery/job.rb +160 -21
- data/lib/google/cloud/bigquery/load_job.rb +128 -11
- data/lib/google/cloud/bigquery/project.rb +187 -44
- data/lib/google/cloud/bigquery/query_job.rb +323 -13
- data/lib/google/cloud/bigquery/schema.rb +57 -1
- data/lib/google/cloud/bigquery/schema/field.rb +118 -17
- data/lib/google/cloud/bigquery/service.rb +196 -43
- data/lib/google/cloud/bigquery/table.rb +739 -49
- data/lib/google/cloud/bigquery/table/async_inserter.rb +280 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery/view.rb +306 -69
- metadata +18 -3
- data/lib/google/cloud/bigquery/query_data.rb +0 -234
@@ -19,7 +19,9 @@ require "google/cloud/bigquery/view"
|
|
19
19
|
require "google/cloud/bigquery/data"
|
20
20
|
require "google/cloud/bigquery/table/list"
|
21
21
|
require "google/cloud/bigquery/schema"
|
22
|
+
require "google/cloud/bigquery/external"
|
22
23
|
require "google/cloud/bigquery/insert_response"
|
24
|
+
require "google/cloud/bigquery/table/async_inserter"
|
23
25
|
require "google/apis/bigquery_v2"
|
24
26
|
|
25
27
|
module Google
|
@@ -82,8 +84,9 @@ module Google
|
|
82
84
|
|
83
85
|
##
|
84
86
|
# A unique ID for this table.
|
85
|
-
#
|
86
|
-
#
|
87
|
+
#
|
88
|
+
# @return [String] The ID must contain only letters (a-z, A-Z), numbers
|
89
|
+
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
87
90
|
#
|
88
91
|
# @!group Attributes
|
89
92
|
#
|
@@ -94,6 +97,9 @@ module Google
|
|
94
97
|
##
|
95
98
|
# The ID of the `Dataset` containing this table.
|
96
99
|
#
|
100
|
+
# @return [String] The ID must contain only letters (a-z, A-Z), numbers
|
101
|
+
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
102
|
+
#
|
97
103
|
# @!group Attributes
|
98
104
|
#
|
99
105
|
def dataset_id
|
@@ -103,6 +109,8 @@ module Google
|
|
103
109
|
##
|
104
110
|
# The ID of the `Project` containing this table.
|
105
111
|
#
|
112
|
+
# @return [String] The project ID.
|
113
|
+
#
|
106
114
|
# @!group Attributes
|
107
115
|
#
|
108
116
|
def project_id
|
@@ -119,7 +127,11 @@ module Google
|
|
119
127
|
end
|
120
128
|
|
121
129
|
###
|
122
|
-
#
|
130
|
+
# Checks if the table is time-partitioned. See [Partitioned
|
131
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
132
|
+
#
|
133
|
+
# @return [Boolean] `true` when the table is time-partitioned, `false`
|
134
|
+
# otherwise.
|
123
135
|
#
|
124
136
|
# @!group Attributes
|
125
137
|
#
|
@@ -128,7 +140,11 @@ module Google
|
|
128
140
|
end
|
129
141
|
|
130
142
|
###
|
131
|
-
# The period for which the table is partitioned, if any.
|
143
|
+
# The period for which the table is partitioned, if any. See
|
144
|
+
# [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
145
|
+
#
|
146
|
+
# @return [String, nil] The partition type. Currently the only supported
|
147
|
+
# value is "DAY".
|
132
148
|
#
|
133
149
|
# @!group Attributes
|
134
150
|
#
|
@@ -138,15 +154,15 @@ module Google
|
|
138
154
|
end
|
139
155
|
|
140
156
|
##
|
141
|
-
# Sets the partitioning for the table. See [Partitioned
|
142
|
-
# ](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
157
|
+
# Sets the partitioning for the table. See [Partitioned
|
158
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
143
159
|
#
|
144
160
|
# You can only set partitioning when creating a table as in
|
145
161
|
# the example below. BigQuery does not allow you to change partitioning
|
146
162
|
# on an existing table.
|
147
163
|
#
|
148
164
|
# @param [String] type The partition type. Currently the only
|
149
|
-
#
|
165
|
+
# supported value is "DAY".
|
150
166
|
#
|
151
167
|
# @example
|
152
168
|
# require "google/cloud/bigquery"
|
@@ -168,7 +184,11 @@ module Google
|
|
168
184
|
|
169
185
|
|
170
186
|
###
|
171
|
-
# The expiration for the table partitions, if any, in seconds.
|
187
|
+
# The expiration for the table partitions, if any, in seconds. See
|
188
|
+
# [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
189
|
+
#
|
190
|
+
# @return [Integer, nil] The expiration time, in seconds, for data in
|
191
|
+
# partitions.
|
172
192
|
#
|
173
193
|
# @!group Attributes
|
174
194
|
#
|
@@ -180,14 +200,14 @@ module Google
|
|
180
200
|
end
|
181
201
|
|
182
202
|
##
|
183
|
-
# Sets the partition expiration for the table. See [Partitioned
|
184
|
-
# ](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
185
|
-
# table must also be partitioned.
|
203
|
+
# Sets the partition expiration for the table. See [Partitioned
|
204
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
205
|
+
# The table must also be partitioned.
|
186
206
|
#
|
187
207
|
# See {Table#time_partitioning_type=}.
|
188
208
|
#
|
189
209
|
# @param [Integer] expiration An expiration time, in seconds,
|
190
|
-
#
|
210
|
+
# for data in partitions.
|
191
211
|
#
|
192
212
|
# @example
|
193
213
|
# require "google/cloud/bigquery"
|
@@ -215,6 +235,8 @@ module Google
|
|
215
235
|
# `project_name:datasetId.tableId`. To use this value in queries see
|
216
236
|
# {#query_id}.
|
217
237
|
#
|
238
|
+
# @return [String] The combined ID.
|
239
|
+
#
|
218
240
|
# @!group Attributes
|
219
241
|
#
|
220
242
|
def id
|
@@ -236,6 +258,9 @@ module Google
|
|
236
258
|
# SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
|
237
259
|
# dialect. Optional. The default value is false.
|
238
260
|
#
|
261
|
+
# @return [String] The appropriate table ID for use in queries,
|
262
|
+
# depending on SQL type.
|
263
|
+
#
|
239
264
|
# @example
|
240
265
|
# require "google/cloud/bigquery"
|
241
266
|
#
|
@@ -258,6 +283,8 @@ module Google
|
|
258
283
|
##
|
259
284
|
# The name of the table.
|
260
285
|
#
|
286
|
+
# @return [String] The friendly name.
|
287
|
+
#
|
261
288
|
# @!group Attributes
|
262
289
|
#
|
263
290
|
def name
|
@@ -267,6 +294,8 @@ module Google
|
|
267
294
|
##
|
268
295
|
# Updates the name of the table.
|
269
296
|
#
|
297
|
+
# @param [String] new_name The new friendly name.
|
298
|
+
#
|
270
299
|
# @!group Attributes
|
271
300
|
#
|
272
301
|
def name= new_name
|
@@ -275,7 +304,9 @@ module Google
|
|
275
304
|
end
|
276
305
|
|
277
306
|
##
|
278
|
-
#
|
307
|
+
# The ETag hash of the table.
|
308
|
+
#
|
309
|
+
# @return [String] The ETag hash.
|
279
310
|
#
|
280
311
|
# @!group Attributes
|
281
312
|
#
|
@@ -285,7 +316,9 @@ module Google
|
|
285
316
|
end
|
286
317
|
|
287
318
|
##
|
288
|
-
# A URL that can be used to access the
|
319
|
+
# A URL that can be used to access the table using the REST API.
|
320
|
+
#
|
321
|
+
# @return [String] A REST URL for the resource.
|
289
322
|
#
|
290
323
|
# @!group Attributes
|
291
324
|
#
|
@@ -295,7 +328,9 @@ module Google
|
|
295
328
|
end
|
296
329
|
|
297
330
|
##
|
298
|
-
#
|
331
|
+
# A user-friendly description of the table.
|
332
|
+
#
|
333
|
+
# @return [String] The description.
|
299
334
|
#
|
300
335
|
# @!group Attributes
|
301
336
|
#
|
@@ -305,7 +340,9 @@ module Google
|
|
305
340
|
end
|
306
341
|
|
307
342
|
##
|
308
|
-
# Updates the description of the table.
|
343
|
+
# Updates the user-friendly description of the table.
|
344
|
+
#
|
345
|
+
# @param [String] new_description The new user-friendly description.
|
309
346
|
#
|
310
347
|
# @!group Attributes
|
311
348
|
#
|
@@ -317,6 +354,8 @@ module Google
|
|
317
354
|
##
|
318
355
|
# The number of bytes in the table.
|
319
356
|
#
|
357
|
+
# @return [Integer] The count of bytes in the table.
|
358
|
+
#
|
320
359
|
# @!group Data
|
321
360
|
#
|
322
361
|
def bytes_count
|
@@ -331,6 +370,8 @@ module Google
|
|
331
370
|
##
|
332
371
|
# The number of rows in the table.
|
333
372
|
#
|
373
|
+
# @return [Integer] The count of rows in the table.
|
374
|
+
#
|
334
375
|
# @!group Data
|
335
376
|
#
|
336
377
|
def rows_count
|
@@ -345,6 +386,8 @@ module Google
|
|
345
386
|
##
|
346
387
|
# The time when this table was created.
|
347
388
|
#
|
389
|
+
# @return [Time, nil] The creation time.
|
390
|
+
#
|
348
391
|
# @!group Attributes
|
349
392
|
#
|
350
393
|
def created_at
|
@@ -361,6 +404,8 @@ module Google
|
|
361
404
|
# If not present, the table will persist indefinitely.
|
362
405
|
# Expired tables will be deleted and their storage reclaimed.
|
363
406
|
#
|
407
|
+
# @return [Time, nil] The expiration time.
|
408
|
+
#
|
364
409
|
# @!group Attributes
|
365
410
|
#
|
366
411
|
def expires_at
|
@@ -375,6 +420,8 @@ module Google
|
|
375
420
|
##
|
376
421
|
# The date when this table was last modified.
|
377
422
|
#
|
423
|
+
# @return [Time, nil] The last modified time.
|
424
|
+
#
|
378
425
|
# @!group Attributes
|
379
426
|
#
|
380
427
|
def modified_at
|
@@ -389,6 +436,8 @@ module Google
|
|
389
436
|
##
|
390
437
|
# Checks if the table's type is "TABLE".
|
391
438
|
#
|
439
|
+
# @return [Boolean] `true` when the type is `TABLE`, `false` otherwise.
|
440
|
+
#
|
392
441
|
# @!group Attributes
|
393
442
|
#
|
394
443
|
def table?
|
@@ -398,15 +447,31 @@ module Google
|
|
398
447
|
##
|
399
448
|
# Checks if the table's type is "VIEW".
|
400
449
|
#
|
450
|
+
# @return [Boolean] `true` when the type is `VIEW`, `false` otherwise.
|
451
|
+
#
|
401
452
|
# @!group Attributes
|
402
453
|
#
|
403
454
|
def view?
|
404
455
|
@gapi.type == "VIEW"
|
405
456
|
end
|
406
457
|
|
458
|
+
##
|
459
|
+
# Checks if the table's type is "EXTERNAL".
|
460
|
+
#
|
461
|
+
# @return [Boolean] `true` when the type is `EXTERNAL`, `false`
|
462
|
+
# otherwise.
|
463
|
+
#
|
464
|
+
# @!group Attributes
|
465
|
+
#
|
466
|
+
def external?
|
467
|
+
@gapi.type == "EXTERNAL"
|
468
|
+
end
|
469
|
+
|
407
470
|
##
|
408
471
|
# The geographic location where the table should reside. Possible
|
409
|
-
# values include EU and US
|
472
|
+
# values include `EU` and `US`. The default value is `US`.
|
473
|
+
#
|
474
|
+
# @return [String] The location code.
|
410
475
|
#
|
411
476
|
# @!group Attributes
|
412
477
|
#
|
@@ -415,6 +480,65 @@ module Google
|
|
415
480
|
@gapi.location
|
416
481
|
end
|
417
482
|
|
483
|
+
##
|
484
|
+
# A hash of user-provided labels associated with this table. Labels
|
485
|
+
# are used to organize and group tables. See [Using
|
486
|
+
# Labels](https://cloud.google.com/bigquery/docs/labels).
|
487
|
+
#
|
488
|
+
# The returned hash is frozen and changes are not allowed. Use
|
489
|
+
# {#labels=} to replace the entire hash.
|
490
|
+
#
|
491
|
+
# @return [Hash<String, String>] A hash containing key/value pairs.
|
492
|
+
#
|
493
|
+
# @example
|
494
|
+
# require "google/cloud/bigquery"
|
495
|
+
#
|
496
|
+
# bigquery = Google::Cloud::Bigquery.new
|
497
|
+
# dataset = bigquery.dataset "my_dataset"
|
498
|
+
# table = dataset.table "my_table"
|
499
|
+
#
|
500
|
+
# labels = table.labels
|
501
|
+
# labels["department"] #=> "shipping"
|
502
|
+
#
|
503
|
+
# @!group Attributes
|
504
|
+
#
|
505
|
+
def labels
|
506
|
+
m = @gapi.labels
|
507
|
+
m = m.to_h if m.respond_to? :to_h
|
508
|
+
m.dup.freeze
|
509
|
+
end
|
510
|
+
|
511
|
+
##
|
512
|
+
# Updates the hash of user-provided labels associated with this table.
|
513
|
+
# Labels are used to organize and group tables. See [Using
|
514
|
+
# Labels](https://cloud.google.com/bigquery/docs/labels).
|
515
|
+
#
|
516
|
+
# @param [Hash<String, String>] labels A hash containing key/value
|
517
|
+
# pairs.
|
518
|
+
#
|
519
|
+
# * Label keys and values can be no longer than 63 characters.
|
520
|
+
# * Label keys and values can contain only lowercase letters, numbers,
|
521
|
+
# underscores, hyphens, and international characters.
|
522
|
+
# * Label keys and values cannot exceed 128 bytes in size.
|
523
|
+
# * Label keys must begin with a letter.
|
524
|
+
# * Label keys must be unique within a table.
|
525
|
+
#
|
526
|
+
# @example
|
527
|
+
# require "google/cloud/bigquery"
|
528
|
+
#
|
529
|
+
# bigquery = Google::Cloud::Bigquery.new
|
530
|
+
# dataset = bigquery.dataset "my_dataset"
|
531
|
+
# table = dataset.table "my_table"
|
532
|
+
#
|
533
|
+
# table.labels = { "department" => "shipping" }
|
534
|
+
#
|
535
|
+
# @!group Attributes
|
536
|
+
#
|
537
|
+
def labels= labels
|
538
|
+
@gapi.labels = labels
|
539
|
+
patch_gapi! :labels
|
540
|
+
end
|
541
|
+
|
418
542
|
##
|
419
543
|
# Returns the table's schema. This method can also be used to set,
|
420
544
|
# replace, or add to the schema by passing a block. See {Schema} for
|
@@ -428,7 +552,7 @@ module Google
|
|
428
552
|
# @yield [schema] a block for setting the schema
|
429
553
|
# @yieldparam [Schema] schema the object accepting the schema
|
430
554
|
#
|
431
|
-
# @return [Google::Cloud::Bigquery::Schema]
|
555
|
+
# @return [Google::Cloud::Bigquery::Schema] A frozen schema object.
|
432
556
|
#
|
433
557
|
# @example
|
434
558
|
# require "google/cloud/bigquery"
|
@@ -462,7 +586,20 @@ module Google
|
|
462
586
|
end
|
463
587
|
|
464
588
|
##
|
465
|
-
# The fields of the table.
|
589
|
+
# The fields of the table, obtained from its schema.
|
590
|
+
#
|
591
|
+
# @return [Array<Schema::Field>] An array of field objects.
|
592
|
+
#
|
593
|
+
# @example
|
594
|
+
# require "google/cloud/bigquery"
|
595
|
+
#
|
596
|
+
# bigquery = Google::Cloud::Bigquery.new
|
597
|
+
# dataset = bigquery.dataset "my_dataset"
|
598
|
+
# table = dataset.table "my_table"
|
599
|
+
#
|
600
|
+
# table.fields.each do |field|
|
601
|
+
# puts field.name
|
602
|
+
# end
|
466
603
|
#
|
467
604
|
# @!group Attributes
|
468
605
|
#
|
@@ -471,7 +608,20 @@ module Google
|
|
471
608
|
end
|
472
609
|
|
473
610
|
##
|
474
|
-
# The names of the columns in the table.
|
611
|
+
# The names of the columns in the table, obtained from its schema.
|
612
|
+
#
|
613
|
+
# @return [Array<Symbol>] An array of column names.
|
614
|
+
#
|
615
|
+
# @example
|
616
|
+
# require "google/cloud/bigquery"
|
617
|
+
#
|
618
|
+
# bigquery = Google::Cloud::Bigquery.new
|
619
|
+
# dataset = bigquery.dataset "my_dataset"
|
620
|
+
# table = dataset.table "my_table"
|
621
|
+
#
|
622
|
+
# table.headers.each do |header|
|
623
|
+
# puts header
|
624
|
+
# end
|
475
625
|
#
|
476
626
|
# @!group Attributes
|
477
627
|
#
|
@@ -479,6 +629,100 @@ module Google
|
|
479
629
|
schema.headers
|
480
630
|
end
|
481
631
|
|
632
|
+
##
|
633
|
+
# The {External::DataSource} (or subclass) object that represents the
|
634
|
+
# external data source that the table represents. Data can be queried
|
635
|
+
# the table, even though the data is not stored in BigQuery. Instead of
|
636
|
+
# loading or streaming the data, this object references the external
|
637
|
+
# data source.
|
638
|
+
#
|
639
|
+
# Present only if the table represents an External Data Source. See
|
640
|
+
# {#external?} and {External::DataSource}.
|
641
|
+
#
|
642
|
+
# @see https://cloud.google.com/bigquery/external-data-sources
|
643
|
+
# Querying External Data Sources
|
644
|
+
#
|
645
|
+
# @return [External::DataSource] The external data source.
|
646
|
+
#
|
647
|
+
# @!group Attributes
|
648
|
+
#
|
649
|
+
def external
|
650
|
+
return nil if @gapi.external_data_configuration.nil?
|
651
|
+
External.from_gapi(@gapi.external_data_configuration).freeze
|
652
|
+
end
|
653
|
+
|
654
|
+
##
|
655
|
+
# Set the {External::DataSource} (or subclass) object that represents
|
656
|
+
# the external data source that the table represents. Data can be
|
657
|
+
# queried the table, even though the data is not stored in BigQuery.
|
658
|
+
# Instead of loading or streaming the data, this object references the
|
659
|
+
# external data source.
|
660
|
+
#
|
661
|
+
# Use only if the table represents an External Data Source. See
|
662
|
+
# {#external?} and {External::DataSource}.
|
663
|
+
#
|
664
|
+
# @see https://cloud.google.com/bigquery/external-data-sources
|
665
|
+
# Querying External Data Sources
|
666
|
+
#
|
667
|
+
# @param [External::DataSource] external An external data source.
|
668
|
+
#
|
669
|
+
# @!group Attributes
|
670
|
+
#
|
671
|
+
def external= external
|
672
|
+
@gapi.external_data_configuration = external.to_gapi
|
673
|
+
patch_gapi! :external_data_configuration
|
674
|
+
end
|
675
|
+
|
676
|
+
##
|
677
|
+
# A lower-bound estimate of the number of bytes currently in this
|
678
|
+
# table's streaming buffer, if one is present. This field will be absent
|
679
|
+
# if the table is not being streamed to or if there is no data in the
|
680
|
+
# streaming buffer.
|
681
|
+
#
|
682
|
+
# @return [Integer] The estimated number of bytes in the buffer.
|
683
|
+
#
|
684
|
+
# @!group Attributes
|
685
|
+
#
|
686
|
+
def buffer_bytes
|
687
|
+
ensure_full_data!
|
688
|
+
@gapi.streaming_buffer.estimated_bytes if @gapi.streaming_buffer
|
689
|
+
end
|
690
|
+
|
691
|
+
##
|
692
|
+
# A lower-bound estimate of the number of rows currently in this
|
693
|
+
# table's streaming buffer, if one is present. This field will be absent
|
694
|
+
# if the table is not being streamed to or if there is no data in the
|
695
|
+
# streaming buffer.
|
696
|
+
#
|
697
|
+
# @return [Integer] The estimated number of rows in the buffer.
|
698
|
+
#
|
699
|
+
# @!group Attributes
|
700
|
+
#
|
701
|
+
def buffer_rows
|
702
|
+
ensure_full_data!
|
703
|
+
@gapi.streaming_buffer.estimated_rows if @gapi.streaming_buffer
|
704
|
+
end
|
705
|
+
|
706
|
+
##
|
707
|
+
# The time of the oldest entry currently in this table's streaming
|
708
|
+
# buffer, if one is present. This field will be absent if the table is
|
709
|
+
# not being streamed to or if there is no data in the streaming buffer.
|
710
|
+
#
|
711
|
+
# @return [Time, nil] The oldest entry time.
|
712
|
+
#
|
713
|
+
# @!group Attributes
|
714
|
+
#
|
715
|
+
def buffer_oldest_at
|
716
|
+
ensure_full_data!
|
717
|
+
return nil unless @gapi.streaming_buffer
|
718
|
+
oldest_entry_time = @gapi.streaming_buffer.oldest_entry_time
|
719
|
+
begin
|
720
|
+
::Time.at(Integer(oldest_entry_time) / 1000.0)
|
721
|
+
rescue
|
722
|
+
nil
|
723
|
+
end
|
724
|
+
end
|
725
|
+
|
482
726
|
##
|
483
727
|
# Retrieves data from the table.
|
484
728
|
#
|
@@ -522,19 +766,23 @@ module Google
|
|
522
766
|
def data token: nil, max: nil, start: nil
|
523
767
|
ensure_service!
|
524
768
|
options = { token: token, max: max, start: start }
|
525
|
-
|
526
|
-
Data.from_gapi gapi,
|
769
|
+
data_gapi = service.list_tabledata dataset_id, table_id, options
|
770
|
+
Data.from_gapi data_gapi, gapi, service
|
527
771
|
end
|
528
772
|
|
529
773
|
##
|
530
|
-
# Copies the data from the table to another table
|
531
|
-
#
|
532
|
-
#
|
533
|
-
#
|
534
|
-
#
|
774
|
+
# Copies the data from the table to another table using an asynchronous
|
775
|
+
# method. In this method, a {CopyJob} is immediately returned. The
|
776
|
+
# caller may poll the service by repeatedly calling {Job#reload!} and
|
777
|
+
# {Job#done?} to detect when the job is done, or simply block until the
|
778
|
+
# job is done by calling #{Job#wait_until_done!}. See also {#copy}.
|
535
779
|
#
|
536
780
|
# @param [Table, String] destination_table The destination for the
|
537
|
-
# copied data.
|
781
|
+
# copied data. This can also be a string identifier as specified by
|
782
|
+
# the [Query
|
783
|
+
# Reference](https://cloud.google.com/bigquery/query-reference#from):
|
784
|
+
# `project_name:datasetId.tableId`. This is useful for referencing
|
785
|
+
# tables in other projects and datasets.
|
538
786
|
# @param [String] create Specifies whether the job is allowed to create
|
539
787
|
# new tables. The default value is `needed`.
|
540
788
|
#
|
@@ -552,6 +800,28 @@ module Google
|
|
552
800
|
# * `append` - BigQuery appends the data to the table.
|
553
801
|
# * `empty` - An error will be returned if the destination table
|
554
802
|
# already contains data.
|
803
|
+
# @param [String] job_id A user-defined ID for the copy job. The ID
|
804
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
805
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
806
|
+
# `job_id` is provided, then `prefix` will not be used.
|
807
|
+
#
|
808
|
+
# See [Generating a job
|
809
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
810
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
811
|
+
# prepended to a generated value to produce a unique job ID. For
|
812
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
813
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
814
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
815
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
816
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
817
|
+
# be used.
|
818
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
819
|
+
# the job. You can use these to organize and group your jobs. Label
|
820
|
+
# keys and values can be no longer than 63 characters, can only
|
821
|
+
# contain lowercase letters, numeric characters, underscores and
|
822
|
+
# dashes. International characters are allowed. Label values are
|
823
|
+
# optional. Label keys must start with a letter and each label in the
|
824
|
+
# list must have a different key.
|
555
825
|
#
|
556
826
|
# @return [Google::Cloud::Bigquery::CopyJob]
|
557
827
|
#
|
@@ -563,7 +833,7 @@ module Google
|
|
563
833
|
# table = dataset.table "my_table"
|
564
834
|
# destination_table = dataset.table "my_destination_table"
|
565
835
|
#
|
566
|
-
# copy_job = table.
|
836
|
+
# copy_job = table.copy_job destination_table
|
567
837
|
#
|
568
838
|
# @example Passing a string identifier for the destination table:
|
569
839
|
# require "google/cloud/bigquery"
|
@@ -572,13 +842,15 @@ module Google
|
|
572
842
|
# dataset = bigquery.dataset "my_dataset"
|
573
843
|
# table = dataset.table "my_table"
|
574
844
|
#
|
575
|
-
# copy_job = table.
|
845
|
+
# copy_job = table.copy_job "other-project:other_dataset.other_table"
|
576
846
|
#
|
577
847
|
# @!group Data
|
578
848
|
#
|
579
|
-
def
|
849
|
+
def copy_job destination_table, create: nil, write: nil, dryrun: nil,
|
850
|
+
job_id: nil, prefix: nil, labels: nil
|
580
851
|
ensure_service!
|
581
|
-
options = { create: create, write: write, dryrun: dryrun
|
852
|
+
options = { create: create, write: write, dryrun: dryrun,
|
853
|
+
job_id: job_id, prefix: prefix, labels: labels }
|
582
854
|
gapi = service.copy_table table_ref,
|
583
855
|
get_table_ref(destination_table),
|
584
856
|
options
|
@@ -586,7 +858,82 @@ module Google
|
|
586
858
|
end
|
587
859
|
|
588
860
|
##
|
589
|
-
#
|
861
|
+
# Copies the data from the table to another table using a synchronous
|
862
|
+
# method that blocks for a response. Timeouts and transient errors are
|
863
|
+
# generally handled as needed to complete the job. See also
|
864
|
+
# {#copy_job}.
|
865
|
+
#
|
866
|
+
# @param [Table, String] destination_table The destination for the
|
867
|
+
# copied data. This can also be a string identifier as specified by
|
868
|
+
# the [Query
|
869
|
+
# Reference](https://cloud.google.com/bigquery/query-reference#from):
|
870
|
+
# `project_name:datasetId.tableId`. This is useful for referencing
|
871
|
+
# tables in other projects and datasets.
|
872
|
+
# @param [String] create Specifies whether the job is allowed to create
|
873
|
+
# new tables. The default value is `needed`.
|
874
|
+
#
|
875
|
+
# The following values are supported:
|
876
|
+
#
|
877
|
+
# * `needed` - Create the table if it does not exist.
|
878
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
879
|
+
# raised if the table does not exist.
|
880
|
+
# @param [String] write Specifies how to handle data already present in
|
881
|
+
# the destination table. The default value is `empty`.
|
882
|
+
#
|
883
|
+
# The following values are supported:
|
884
|
+
#
|
885
|
+
# * `truncate` - BigQuery overwrites the table data.
|
886
|
+
# * `append` - BigQuery appends the data to the table.
|
887
|
+
# * `empty` - An error will be returned if the destination table
|
888
|
+
# already contains data.
|
889
|
+
#
|
890
|
+
# @return [Boolean] Returns `true` if the copy operation succeeded.
|
891
|
+
#
|
892
|
+
# @example
|
893
|
+
# require "google/cloud/bigquery"
|
894
|
+
#
|
895
|
+
# bigquery = Google::Cloud::Bigquery.new
|
896
|
+
# dataset = bigquery.dataset "my_dataset"
|
897
|
+
# table = dataset.table "my_table"
|
898
|
+
# destination_table = dataset.table "my_destination_table"
|
899
|
+
#
|
900
|
+
# table.copy destination_table
|
901
|
+
#
|
902
|
+
# @example Passing a string identifier for the destination table:
|
903
|
+
# require "google/cloud/bigquery"
|
904
|
+
#
|
905
|
+
# bigquery = Google::Cloud::Bigquery.new
|
906
|
+
# dataset = bigquery.dataset "my_dataset"
|
907
|
+
# table = dataset.table "my_table"
|
908
|
+
#
|
909
|
+
# table.copy "other-project:other_dataset.other_table"
|
910
|
+
#
|
911
|
+
# @!group Data
|
912
|
+
#
|
913
|
+
def copy destination_table, create: nil, write: nil
|
914
|
+
job = copy_job destination_table, create: create, write: write
|
915
|
+
job.wait_until_done!
|
916
|
+
|
917
|
+
if job.failed?
|
918
|
+
begin
|
919
|
+
# raise to activate ruby exception cause handling
|
920
|
+
fail job.gapi_error
|
921
|
+
rescue => e
|
922
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
923
|
+
raise Google::Cloud::Error.from_error(e)
|
924
|
+
end
|
925
|
+
end
|
926
|
+
|
927
|
+
true
|
928
|
+
end
|
929
|
+
|
930
|
+
##
|
931
|
+
# Extracts the data from the table to a Google Cloud Storage file using
|
932
|
+
# an asynchronous method. In this method, an {ExtractJob} is immediately
|
933
|
+
# returned. The caller may poll the service by repeatedly calling
|
934
|
+
# {Job#reload!} and {Job#done?} to detect when the job is done, or
|
935
|
+
# simply block until the job is done by calling #{Job#wait_until_done!}.
|
936
|
+
# See also {#extract}.
|
590
937
|
#
|
591
938
|
# @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
|
592
939
|
# Exporting Data From BigQuery
|
@@ -609,6 +956,28 @@ module Google
|
|
609
956
|
# exported data. Default is <code>,</code>.
|
610
957
|
# @param [Boolean] header Whether to print out a header row in the
|
611
958
|
# results. Default is `true`.
|
959
|
+
# @param [String] job_id A user-defined ID for the extract job. The ID
|
960
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
961
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
962
|
+
# `job_id` is provided, then `prefix` will not be used.
|
963
|
+
#
|
964
|
+
# See [Generating a job
|
965
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
966
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
967
|
+
# prepended to a generated value to produce a unique job ID. For
|
968
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
969
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
970
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
971
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
972
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
973
|
+
# be used.
|
974
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
975
|
+
# the job. You can use these to organize and group your jobs. Label
|
976
|
+
# keys and values can be no longer than 63 characters, can only
|
977
|
+
# contain lowercase letters, numeric characters, underscores and
|
978
|
+
# dashes. International characters are allowed. Label values are
|
979
|
+
# optional. Label keys must start with a letter and each label in the
|
980
|
+
# list must have a different key.
|
612
981
|
#
|
613
982
|
#
|
614
983
|
# @return [Google::Cloud::Bigquery::ExtractJob]
|
@@ -620,20 +989,84 @@ module Google
|
|
620
989
|
# dataset = bigquery.dataset "my_dataset"
|
621
990
|
# table = dataset.table "my_table"
|
622
991
|
#
|
623
|
-
# extract_job = table.
|
992
|
+
# extract_job = table.extract_job "gs://my-bucket/file-name.json",
|
624
993
|
# format: "json"
|
625
994
|
#
|
626
995
|
# @!group Data
|
627
996
|
#
|
628
|
-
def
|
629
|
-
|
997
|
+
def extract_job extract_url, format: nil, compression: nil,
|
998
|
+
delimiter: nil, header: nil, dryrun: nil, job_id: nil,
|
999
|
+
prefix: nil, labels: nil
|
630
1000
|
ensure_service!
|
631
1001
|
options = { format: format, compression: compression,
|
632
|
-
delimiter: delimiter, header: header, dryrun: dryrun
|
1002
|
+
delimiter: delimiter, header: header, dryrun: dryrun,
|
1003
|
+
job_id: job_id, prefix: prefix, labels: labels }
|
633
1004
|
gapi = service.extract_table table_ref, extract_url, options
|
634
1005
|
Job.from_gapi gapi, service
|
635
1006
|
end
|
636
1007
|
|
1008
|
+
##
|
1009
|
+
# Extracts the data from the table to a Google Cloud Storage file using
|
1010
|
+
# a synchronous method that blocks for a response. Timeouts and
|
1011
|
+
# transient errors are generally handled as needed to complete the job.
|
1012
|
+
# See also {#extract_job}.
|
1013
|
+
#
|
1014
|
+
# @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
|
1015
|
+
# Exporting Data From BigQuery
|
1016
|
+
#
|
1017
|
+
# @param [Google::Cloud::Storage::File, String, Array<String>]
|
1018
|
+
# extract_url The Google Storage file or file URI pattern(s) to which
|
1019
|
+
# BigQuery should extract the table data.
|
1020
|
+
# @param [String] format The exported file format. The default value is
|
1021
|
+
# `csv`.
|
1022
|
+
#
|
1023
|
+
# The following values are supported:
|
1024
|
+
#
|
1025
|
+
# * `csv` - CSV
|
1026
|
+
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
1027
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
1028
|
+
# @param [String] compression The compression type to use for exported
|
1029
|
+
# files. Possible values include `GZIP` and `NONE`. The default value
|
1030
|
+
# is `NONE`.
|
1031
|
+
# @param [String] delimiter Delimiter to use between fields in the
|
1032
|
+
# exported data. Default is <code>,</code>.
|
1033
|
+
# @param [Boolean] header Whether to print out a header row in the
|
1034
|
+
# results. Default is `true`.
|
1035
|
+
#
|
1036
|
+
#
|
1037
|
+
# @return [Boolean] Returns `true` if the extract operation succeeded.
|
1038
|
+
#
|
1039
|
+
# @example
|
1040
|
+
# require "google/cloud/bigquery"
|
1041
|
+
#
|
1042
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1043
|
+
# dataset = bigquery.dataset "my_dataset"
|
1044
|
+
# table = dataset.table "my_table"
|
1045
|
+
#
|
1046
|
+
# table.extract "gs://my-bucket/file-name.json", format: "json"
|
1047
|
+
#
|
1048
|
+
# @!group Data
|
1049
|
+
#
|
1050
|
+
def extract extract_url, format: nil, compression: nil, delimiter: nil,
|
1051
|
+
header: nil
|
1052
|
+
job = extract_job extract_url, format: format,
|
1053
|
+
compression: compression,
|
1054
|
+
delimiter: delimiter, header: header
|
1055
|
+
job.wait_until_done!
|
1056
|
+
|
1057
|
+
if job.failed?
|
1058
|
+
begin
|
1059
|
+
# raise to activate ruby exception cause handling
|
1060
|
+
fail job.gapi_error
|
1061
|
+
rescue => e
|
1062
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
1063
|
+
raise Google::Cloud::Error.from_error(e)
|
1064
|
+
end
|
1065
|
+
end
|
1066
|
+
|
1067
|
+
true
|
1068
|
+
end
|
1069
|
+
|
637
1070
|
##
|
638
1071
|
# Loads data into the table. You can pass a google-cloud storage file
|
639
1072
|
# path or a google-cloud storage file instance. Or, you can upload a
|
@@ -684,6 +1117,9 @@ module Google
|
|
684
1117
|
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
685
1118
|
# quoted data sections that contain newline characters in a CSV file.
|
686
1119
|
# The default value is `false`.
|
1120
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1121
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1122
|
+
# The default value is `false`.
|
687
1123
|
# @param [String] encoding The character encoding of the data. The
|
688
1124
|
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
689
1125
|
# `UTF-8`.
|
@@ -708,6 +1144,13 @@ module Google
|
|
708
1144
|
# records exceeds this value, an invalid error is returned in the job
|
709
1145
|
# result. The default value is `0`, which requires that all records
|
710
1146
|
# are valid.
|
1147
|
+
# @param [String] null_marker Specifies a string that represents a null
|
1148
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
1149
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
1150
|
+
# value is the empty string. If you set this property to a custom
|
1151
|
+
# value, BigQuery throws an error if an empty string is present for
|
1152
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
1153
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
711
1154
|
# @param [String] quote The value that is used to quote data sections in
|
712
1155
|
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
713
1156
|
# then uses the first byte of the encoded string to split the data in
|
@@ -720,6 +1163,28 @@ module Google
|
|
720
1163
|
# file that BigQuery will skip when loading the data. The default
|
721
1164
|
# value is `0`. This property is useful if you have header rows in the
|
722
1165
|
# file that should be skipped.
|
1166
|
+
# @param [String] job_id A user-defined ID for the load job. The ID
|
1167
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
1168
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
1169
|
+
# `job_id` is provided, then `prefix` will not be used.
|
1170
|
+
#
|
1171
|
+
# See [Generating a job
|
1172
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
1173
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
1174
|
+
# prepended to a generated value to produce a unique job ID. For
|
1175
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
1176
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
1177
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
1178
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
1179
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
1180
|
+
# be used.
|
1181
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
1182
|
+
# the job. You can use these to organize and group your jobs. Label
|
1183
|
+
# keys and values can be no longer than 63 characters, can only
|
1184
|
+
# contain lowercase letters, numeric characters, underscores and
|
1185
|
+
# dashes. International characters are allowed. Label values are
|
1186
|
+
# optional. Label keys must start with a letter and each label in the
|
1187
|
+
# list must have a different key.
|
723
1188
|
#
|
724
1189
|
# @return [Google::Cloud::Bigquery::LoadJob]
|
725
1190
|
#
|
@@ -730,7 +1195,7 @@ module Google
|
|
730
1195
|
# dataset = bigquery.dataset "my_dataset"
|
731
1196
|
# table = dataset.table "my_table"
|
732
1197
|
#
|
733
|
-
# load_job = table.
|
1198
|
+
# load_job = table.load_job "gs://my-bucket/file-name.csv"
|
734
1199
|
#
|
735
1200
|
# @example Pass a google-cloud-storage `File` instance:
|
736
1201
|
# require "google/cloud/bigquery"
|
@@ -743,7 +1208,7 @@ module Google
|
|
743
1208
|
# storage = Google::Cloud::Storage.new
|
744
1209
|
# bucket = storage.bucket "my-bucket"
|
745
1210
|
# file = bucket.file "file-name.csv"
|
746
|
-
# load_job = table.
|
1211
|
+
# load_job = table.load_job file
|
747
1212
|
#
|
748
1213
|
# @example Upload a file directly:
|
749
1214
|
# require "google/cloud/bigquery"
|
@@ -753,15 +1218,16 @@ module Google
|
|
753
1218
|
# table = dataset.table "my_table"
|
754
1219
|
#
|
755
1220
|
# file = File.open "my_data.csv"
|
756
|
-
# load_job = table.
|
1221
|
+
# load_job = table.load_job file
|
757
1222
|
#
|
758
1223
|
# @!group Data
|
759
1224
|
#
|
760
|
-
def
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
1225
|
+
def load_job file, format: nil, create: nil, write: nil,
|
1226
|
+
projection_fields: nil, jagged_rows: nil,
|
1227
|
+
quoted_newlines: nil, encoding: nil, delimiter: nil,
|
1228
|
+
ignore_unknown: nil, max_bad_records: nil, quote: nil,
|
1229
|
+
skip_leading: nil, dryrun: nil, job_id: nil, prefix: nil,
|
1230
|
+
labels: nil, autodetect: nil, null_marker: nil
|
765
1231
|
ensure_service!
|
766
1232
|
options = { format: format, create: create, write: write,
|
767
1233
|
projection_fields: projection_fields,
|
@@ -769,22 +1235,187 @@ module Google
|
|
769
1235
|
quoted_newlines: quoted_newlines, encoding: encoding,
|
770
1236
|
delimiter: delimiter, ignore_unknown: ignore_unknown,
|
771
1237
|
max_bad_records: max_bad_records, quote: quote,
|
772
|
-
skip_leading: skip_leading, dryrun: dryrun
|
1238
|
+
skip_leading: skip_leading, dryrun: dryrun,
|
1239
|
+
job_id: job_id, prefix: prefix, labels: labels,
|
1240
|
+
autodetect: autodetect, null_marker: null_marker }
|
773
1241
|
return load_storage(file, options) if storage_url? file
|
774
1242
|
return load_local(file, options) if local_file? file
|
775
1243
|
fail Google::Cloud::Error, "Don't know how to load #{file}"
|
776
1244
|
end
|
777
1245
|
|
1246
|
+
##
|
1247
|
+
# Loads data into the table. You can pass a google-cloud storage file
|
1248
|
+
# path or a google-cloud storage file instance. Or, you can upload a
|
1249
|
+
# file directly. See [Loading Data with a POST Request](
|
1250
|
+
# https://cloud.google.com/bigquery/loading-data-post-request#multipart).
|
1251
|
+
#
|
1252
|
+
# @param [File, Google::Cloud::Storage::File, String] file A file or the
|
1253
|
+
# URI of a Google Cloud Storage file containing data to load into the
|
1254
|
+
# table.
|
1255
|
+
# @param [String] format The exported file format. The default value is
|
1256
|
+
# `csv`.
|
1257
|
+
#
|
1258
|
+
# The following values are supported:
|
1259
|
+
#
|
1260
|
+
# * `csv` - CSV
|
1261
|
+
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
1262
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
1263
|
+
# * `datastore_backup` - Cloud Datastore backup
|
1264
|
+
# @param [String] create Specifies whether the job is allowed to create
|
1265
|
+
# new tables. The default value is `needed`.
|
1266
|
+
#
|
1267
|
+
# The following values are supported:
|
1268
|
+
#
|
1269
|
+
# * `needed` - Create the table if it does not exist.
|
1270
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
1271
|
+
# raised if the table does not exist.
|
1272
|
+
# @param [String] write Specifies how to handle data already present in
|
1273
|
+
# the table. The default value is `append`.
|
1274
|
+
#
|
1275
|
+
# The following values are supported:
|
1276
|
+
#
|
1277
|
+
# * `truncate` - BigQuery overwrites the table data.
|
1278
|
+
# * `append` - BigQuery appends the data to the table.
|
1279
|
+
# * `empty` - An error will be returned if the table already contains
|
1280
|
+
# data.
|
1281
|
+
# @param [Array<String>] projection_fields If the `format` option is set
|
1282
|
+
# to `datastore_backup`, indicates which entity properties to load
|
1283
|
+
# from a Cloud Datastore backup. Property names are case sensitive and
|
1284
|
+
# must be top-level properties. If not set, BigQuery loads all
|
1285
|
+
# properties. If any named property isn't found in the Cloud Datastore
|
1286
|
+
# backup, an invalid error is returned.
|
1287
|
+
# @param [Boolean] jagged_rows Accept rows that are missing trailing
|
1288
|
+
# optional columns. The missing values are treated as nulls. If
|
1289
|
+
# `false`, records with missing trailing columns are treated as bad
|
1290
|
+
# records, and if there are too many bad records, an invalid error is
|
1291
|
+
# returned in the job result. The default value is `false`. Only
|
1292
|
+
# applicable to CSV, ignored for other formats.
|
1293
|
+
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
1294
|
+
# quoted data sections that contain newline characters in a CSV file.
|
1295
|
+
# The default value is `false`.
|
1296
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1297
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1298
|
+
# The default value is `false`.
|
1299
|
+
# @param [String] encoding The character encoding of the data. The
|
1300
|
+
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
1301
|
+
# `UTF-8`.
|
1302
|
+
# @param [String] delimiter Specifices the separator for fields in a CSV
|
1303
|
+
# file. BigQuery converts the string to `ISO-8859-1` encoding, and
|
1304
|
+
# then uses the first byte of the encoded string to split the data in
|
1305
|
+
# its raw, binary state. Default is <code>,</code>.
|
1306
|
+
# @param [Boolean] ignore_unknown Indicates if BigQuery should allow
|
1307
|
+
# extra values that are not represented in the table schema. If true,
|
1308
|
+
# the extra values are ignored. If false, records with extra columns
|
1309
|
+
# are treated as bad records, and if there are too many bad records,
|
1310
|
+
# an invalid error is returned in the job result. The default value is
|
1311
|
+
# `false`.
|
1312
|
+
#
|
1313
|
+
# The `format` property determines what BigQuery treats as an extra
|
1314
|
+
# value:
|
1315
|
+
#
|
1316
|
+
# * `CSV`: Trailing columns
|
1317
|
+
# * `JSON`: Named values that don't match any column names
|
1318
|
+
# @param [Integer] max_bad_records The maximum number of bad records
|
1319
|
+
# that BigQuery can ignore when running the job. If the number of bad
|
1320
|
+
# records exceeds this value, an invalid error is returned in the job
|
1321
|
+
# result. The default value is `0`, which requires that all records
|
1322
|
+
# are valid.
|
1323
|
+
# @param [String] null_marker Specifies a string that represents a null
|
1324
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
1325
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
1326
|
+
# value is the empty string. If you set this property to a custom
|
1327
|
+
# value, BigQuery throws an error if an empty string is present for
|
1328
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
1329
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
1330
|
+
# @param [String] quote The value that is used to quote data sections in
|
1331
|
+
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
1332
|
+
# then uses the first byte of the encoded string to split the data in
|
1333
|
+
# its raw, binary state. The default value is a double-quote
|
1334
|
+
# <code>"</code>. If your data does not contain quoted sections, set
|
1335
|
+
# the property value to an empty string. If your data contains quoted
|
1336
|
+
# newline characters, you must also set the allowQuotedNewlines
|
1337
|
+
# property to true.
|
1338
|
+
# @param [Integer] skip_leading The number of rows at the top of a CSV
|
1339
|
+
# file that BigQuery will skip when loading the data. The default
|
1340
|
+
# value is `0`. This property is useful if you have header rows in the
|
1341
|
+
# file that should be skipped.
|
1342
|
+
#
|
1343
|
+
# @return [Google::Cloud::Bigquery::LoadJob]
|
1344
|
+
#
|
1345
|
+
# @example
|
1346
|
+
# require "google/cloud/bigquery"
|
1347
|
+
#
|
1348
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1349
|
+
# dataset = bigquery.dataset "my_dataset"
|
1350
|
+
# table = dataset.table "my_table"
|
1351
|
+
#
|
1352
|
+
# load_job = table.load_job "gs://my-bucket/file-name.csv"
|
1353
|
+
#
|
1354
|
+
# @example Pass a google-cloud-storage `File` instance:
|
1355
|
+
# require "google/cloud/bigquery"
|
1356
|
+
# require "google/cloud/storage"
|
1357
|
+
#
|
1358
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1359
|
+
# dataset = bigquery.dataset "my_dataset"
|
1360
|
+
# table = dataset.table "my_table"
|
1361
|
+
#
|
1362
|
+
# storage = Google::Cloud::Storage.new
|
1363
|
+
# bucket = storage.bucket "my-bucket"
|
1364
|
+
# file = bucket.file "file-name.csv"
|
1365
|
+
# load_job = table.load_job file
|
1366
|
+
#
|
1367
|
+
# @example Upload a file directly:
|
1368
|
+
# require "google/cloud/bigquery"
|
1369
|
+
#
|
1370
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1371
|
+
# dataset = bigquery.dataset "my_dataset"
|
1372
|
+
# table = dataset.table "my_table"
|
1373
|
+
#
|
1374
|
+
# file = File.open "my_data.csv"
|
1375
|
+
# load_job = table.load_job file
|
1376
|
+
#
|
1377
|
+
# @!group Data
|
1378
|
+
#
|
1379
|
+
def load file, format: nil, create: nil, write: nil,
|
1380
|
+
projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
|
1381
|
+
encoding: nil, delimiter: nil, ignore_unknown: nil,
|
1382
|
+
max_bad_records: nil, quote: nil, skip_leading: nil,
|
1383
|
+
autodetect: nil, null_marker: nil
|
1384
|
+
job = load_job file, format: format, create: create, write: write,
|
1385
|
+
projection_fields: projection_fields,
|
1386
|
+
jagged_rows: jagged_rows,
|
1387
|
+
quoted_newlines: quoted_newlines,
|
1388
|
+
encoding: encoding, delimiter: delimiter,
|
1389
|
+
ignore_unknown: ignore_unknown,
|
1390
|
+
max_bad_records: max_bad_records, quote: quote,
|
1391
|
+
skip_leading: skip_leading,
|
1392
|
+
autodetect: autodetect, null_marker: null_marker
|
1393
|
+
|
1394
|
+
job.wait_until_done!
|
1395
|
+
|
1396
|
+
if job.failed?
|
1397
|
+
begin
|
1398
|
+
# raise to activate ruby exception cause handling
|
1399
|
+
fail job.gapi_error
|
1400
|
+
rescue => e
|
1401
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
1402
|
+
raise Google::Cloud::Error.from_error(e)
|
1403
|
+
end
|
1404
|
+
end
|
1405
|
+
|
1406
|
+
true
|
1407
|
+
end
|
1408
|
+
|
778
1409
|
##
|
779
1410
|
# Inserts data into the table for near-immediate querying, without the
|
780
|
-
# need to complete a
|
1411
|
+
# need to complete a load operation before the data can appear in query
|
781
1412
|
# results.
|
782
1413
|
#
|
783
1414
|
# @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
|
784
1415
|
# Streaming Data Into BigQuery
|
785
1416
|
#
|
786
1417
|
# @param [Hash, Array<Hash>] rows A hash object or array of hash objects
|
787
|
-
# containing the data.
|
1418
|
+
# containing the data. Required.
|
788
1419
|
# @param [Boolean] skip_invalid Insert all valid rows of a request, even
|
789
1420
|
# if invalid rows exist. The default value is `false`, which causes
|
790
1421
|
# the entire request to fail if any invalid rows exist.
|
@@ -811,7 +1442,7 @@ module Google
|
|
811
1442
|
#
|
812
1443
|
def insert rows, skip_invalid: nil, ignore_unknown: nil
|
813
1444
|
rows = [rows] if rows.is_a? Hash
|
814
|
-
rows
|
1445
|
+
fail ArgumentError, "No rows provided" if rows.empty?
|
815
1446
|
ensure_service!
|
816
1447
|
options = { skip_invalid: skip_invalid,
|
817
1448
|
ignore_unknown: ignore_unknown }
|
@@ -819,6 +1450,60 @@ module Google
|
|
819
1450
|
InsertResponse.from_gapi rows, gapi
|
820
1451
|
end
|
821
1452
|
|
1453
|
+
##
|
1454
|
+
# Create an asynchonous inserter object used to insert rows in batches.
|
1455
|
+
#
|
1456
|
+
# @param [Boolean] skip_invalid Insert all valid rows of a request, even
|
1457
|
+
# if invalid rows exist. The default value is `false`, which causes
|
1458
|
+
# the entire request to fail if any invalid rows exist.
|
1459
|
+
# @param [Boolean] ignore_unknown Accept rows that contain values that
|
1460
|
+
# do not match the schema. The unknown values are ignored. Default is
|
1461
|
+
# false, which treats unknown values as errors.
|
1462
|
+
# @attr_reader [Integer] max_bytes The maximum size of rows to be
|
1463
|
+
# collected before the batch is published. Default is 10,000,000
|
1464
|
+
# (10MB).
|
1465
|
+
# @param [Integer] max_rows The maximum number of rows to be collected
|
1466
|
+
# before the batch is published. Default is 500.
|
1467
|
+
# @attr_reader [Numeric] interval The number of seconds to collect
|
1468
|
+
# messages before the batch is published. Default is 10.
|
1469
|
+
# @attr_reader [Numeric] threads The number of threads used to insert
|
1470
|
+
# batches of rows. Default is 4.
|
1471
|
+
# @yield [response] the callback for when a batch of rows is inserted
|
1472
|
+
# @yieldparam [InsertResponse] response the result of the asynchonous
|
1473
|
+
# insert
|
1474
|
+
#
|
1475
|
+
# @return [Table::AsyncInserter] Returns inserter object.
|
1476
|
+
#
|
1477
|
+
# @example
|
1478
|
+
# require "google/cloud/bigquery"
|
1479
|
+
#
|
1480
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1481
|
+
# dataset = bigquery.dataset "my_dataset"
|
1482
|
+
# table = dataset.table "my_table"
|
1483
|
+
# inserter = table.insert_async do |response|
|
1484
|
+
# log_insert "inserted #{response.insert_count} rows " \
|
1485
|
+
# "with #{response.error_count} errors"
|
1486
|
+
# end
|
1487
|
+
#
|
1488
|
+
# rows = [
|
1489
|
+
# { "first_name" => "Alice", "age" => 21 },
|
1490
|
+
# { "first_name" => "Bob", "age" => 22 }
|
1491
|
+
# ]
|
1492
|
+
# inserter.insert rows
|
1493
|
+
#
|
1494
|
+
# inserter.stop.wait!
|
1495
|
+
#
|
1496
|
+
def insert_async skip_invalid: nil, ignore_unknown: nil,
|
1497
|
+
max_bytes: 10000000, max_rows: 500, interval: 10,
|
1498
|
+
threads: 4, &block
|
1499
|
+
ensure_service!
|
1500
|
+
|
1501
|
+
AsyncInserter.new self, skip_invalid: skip_invalid,
|
1502
|
+
ignore_unknown: ignore_unknown,
|
1503
|
+
max_bytes: max_bytes, max_rows: max_rows,
|
1504
|
+
interval: interval, threads: threads, &block
|
1505
|
+
end
|
1506
|
+
|
822
1507
|
##
|
823
1508
|
# Permanently deletes the table.
|
824
1509
|
#
|
@@ -878,7 +1563,12 @@ module Google
|
|
878
1563
|
[attr, @gapi.send(attr)]
|
879
1564
|
end]
|
880
1565
|
patch_gapi = Google::Apis::BigqueryV2::Table.new patch_args
|
1566
|
+
patch_gapi.etag = etag if etag
|
881
1567
|
@gapi = service.patch_table dataset_id, table_id, patch_gapi
|
1568
|
+
|
1569
|
+
# TODO: restore original impl after acceptance test indicates that
|
1570
|
+
# service etag bug is fixed
|
1571
|
+
reload!
|
882
1572
|
end
|
883
1573
|
|
884
1574
|
def self.class_for gapi
|