google-cloud-bigquery 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/google-cloud-bigquery.rb +25 -0
- data/lib/google/cloud/bigquery.rb +61 -11
- data/lib/google/cloud/bigquery/convert.rb +1 -1
- data/lib/google/cloud/bigquery/credentials.rb +6 -6
- data/lib/google/cloud/bigquery/data.rb +6 -6
- data/lib/google/cloud/bigquery/dataset.rb +16 -15
- data/lib/google/cloud/bigquery/dataset/access.rb +38 -30
- data/lib/google/cloud/bigquery/dataset/list.rb +1 -1
- data/lib/google/cloud/bigquery/external.rb +22 -20
- data/lib/google/cloud/bigquery/insert_response.rb +0 -2
- data/lib/google/cloud/bigquery/job.rb +39 -31
- data/lib/google/cloud/bigquery/job/list.rb +1 -1
- data/lib/google/cloud/bigquery/load_job.rb +4 -4
- data/lib/google/cloud/bigquery/project.rb +7 -15
- data/lib/google/cloud/bigquery/project/list.rb +1 -1
- data/lib/google/cloud/bigquery/query_job.rb +12 -12
- data/lib/google/cloud/bigquery/schema.rb +7 -7
- data/lib/google/cloud/bigquery/schema/field.rb +12 -12
- data/lib/google/cloud/bigquery/service.rb +44 -29
- data/lib/google/cloud/bigquery/table.rb +78 -21
- data/lib/google/cloud/bigquery/table/async_inserter.rb +42 -17
- data/lib/google/cloud/bigquery/table/list.rb +1 -1
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +11 -11
@@ -300,8 +300,8 @@ module Google
|
|
300
300
|
# end
|
301
301
|
#
|
302
302
|
def record name, description: nil, mode: nil
|
303
|
-
# TODO: do we need to
|
304
|
-
|
303
|
+
# TODO: do we need to raise if no block was given?
|
304
|
+
raise ArgumentError, "a block is required" unless block_given?
|
305
305
|
|
306
306
|
nested_field = add_field name, :record, description: description,
|
307
307
|
mode: mode
|
@@ -343,7 +343,7 @@ module Google
|
|
343
343
|
|
344
344
|
def frozen_check!
|
345
345
|
return unless frozen?
|
346
|
-
|
346
|
+
raise ArgumentError, "Cannot modify a frozen schema"
|
347
347
|
end
|
348
348
|
|
349
349
|
def add_field name, type, description: nil, mode: :nullable
|
@@ -354,7 +354,8 @@ module Google
|
|
354
354
|
type: verify_type(type),
|
355
355
|
description: description,
|
356
356
|
mode: verify_mode(mode),
|
357
|
-
fields: []
|
357
|
+
fields: []
|
358
|
+
)
|
358
359
|
|
359
360
|
# Remove any existing field of this name
|
360
361
|
@gapi.fields ||= []
|
@@ -370,8 +371,7 @@ module Google
|
|
370
371
|
def verify_type type
|
371
372
|
type = type.to_s.upcase
|
372
373
|
unless Field::TYPES.include? type
|
373
|
-
|
374
|
-
"Type '#{type}' not found"
|
374
|
+
raise ArgumentError, "Type '#{type}' not found"
|
375
375
|
end
|
376
376
|
type
|
377
377
|
end
|
@@ -380,7 +380,7 @@ module Google
|
|
380
380
|
mode = :nullable if mode.nil?
|
381
381
|
mode = mode.to_s.upcase
|
382
382
|
unless Field::MODES.include? mode
|
383
|
-
|
383
|
+
raise ArgumentError "Unable to determine mode for '#{mode}'"
|
384
384
|
end
|
385
385
|
mode
|
386
386
|
end
|
@@ -37,11 +37,11 @@ module Google
|
|
37
37
|
#
|
38
38
|
class Field
|
39
39
|
# @private
|
40
|
-
MODES = %w
|
40
|
+
MODES = %w[NULLABLE REQUIRED REPEATED].freeze
|
41
41
|
|
42
42
|
# @private
|
43
|
-
TYPES = %w
|
44
|
-
|
43
|
+
TYPES = %w[STRING INTEGER FLOAT BOOLEAN BYTES TIMESTAMP TIME DATETIME
|
44
|
+
DATE RECORD].freeze
|
45
45
|
|
46
46
|
##
|
47
47
|
# The name of the field.
|
@@ -514,8 +514,8 @@ module Google
|
|
514
514
|
def record name, description: nil, mode: nil
|
515
515
|
record_check!
|
516
516
|
|
517
|
-
# TODO: do we need to
|
518
|
-
|
517
|
+
# TODO: do we need to raise if no block was given?
|
518
|
+
raise ArgumentError, "a block is required" unless block_given?
|
519
519
|
|
520
520
|
nested_field = add_field name, :record, description: description,
|
521
521
|
mode: mode
|
@@ -546,13 +546,13 @@ module Google
|
|
546
546
|
|
547
547
|
def frozen_check!
|
548
548
|
return unless frozen?
|
549
|
-
|
549
|
+
raise ArgumentError, "Cannot modify a frozen field"
|
550
550
|
end
|
551
551
|
|
552
552
|
def record_check!
|
553
553
|
return unless type != "RECORD"
|
554
|
-
|
555
|
-
|
554
|
+
raise ArgumentError,
|
555
|
+
"Cannot add fields to a non-RECORD field (#{type})"
|
556
556
|
end
|
557
557
|
|
558
558
|
def add_field name, type, description: nil, mode: :nullable
|
@@ -563,7 +563,8 @@ module Google
|
|
563
563
|
type: verify_type(type),
|
564
564
|
description: description,
|
565
565
|
mode: verify_mode(mode),
|
566
|
-
fields: []
|
566
|
+
fields: []
|
567
|
+
)
|
567
568
|
|
568
569
|
# Remove any existing field of this name
|
569
570
|
@gapi.fields ||= []
|
@@ -578,8 +579,7 @@ module Google
|
|
578
579
|
def verify_type type
|
579
580
|
type = type.to_s.upcase
|
580
581
|
unless TYPES.include? type
|
581
|
-
|
582
|
-
"Type '#{type}' not found in #{TYPES.inspect}"
|
582
|
+
raise ArgumentError, "Type '#{type}' not found"
|
583
583
|
end
|
584
584
|
type
|
585
585
|
end
|
@@ -588,7 +588,7 @@ module Google
|
|
588
588
|
mode = :nullable if mode.nil?
|
589
589
|
mode = mode.to_s.upcase
|
590
590
|
unless MODES.include? mode
|
591
|
-
|
591
|
+
raise ArgumentError "Unable to determine mode for '#{mode}'"
|
592
592
|
end
|
593
593
|
mode
|
594
594
|
end
|
@@ -201,22 +201,32 @@ module Google
|
|
201
201
|
end
|
202
202
|
|
203
203
|
def insert_tabledata dataset_id, table_id, rows, options = {}
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
204
|
+
json_rows = Array(rows).map { |row| Convert.to_json_row row }
|
205
|
+
|
206
|
+
insert_tabledata_json_rows dataset_id, table_id, json_rows, options
|
207
|
+
end
|
208
|
+
|
209
|
+
def insert_tabledata_json_rows dataset_id, table_id, json_rows,
|
210
|
+
options = {}
|
211
|
+
insert_rows = Array(json_rows).map do |json_row|
|
212
|
+
{
|
213
|
+
insertId: SecureRandom.uuid,
|
214
|
+
json: json_row
|
215
|
+
}
|
209
216
|
end
|
210
|
-
|
217
|
+
|
218
|
+
insert_req = {
|
211
219
|
rows: insert_rows,
|
212
|
-
|
213
|
-
|
214
|
-
|
220
|
+
ignoreUnknownValues: options[:ignore_unknown],
|
221
|
+
skipInvalidRows: options[:skip_invalid]
|
222
|
+
}.to_json
|
215
223
|
|
216
224
|
# The insertAll with insertId operation is considered idempotent
|
217
225
|
execute backoff: true do
|
218
226
|
service.insert_all_table_data(
|
219
|
-
@project, dataset_id, table_id, insert_req
|
227
|
+
@project, dataset_id, table_id, insert_req,
|
228
|
+
options: { skip_serialization: true }
|
229
|
+
)
|
220
230
|
end
|
221
231
|
end
|
222
232
|
|
@@ -280,7 +290,8 @@ module Google
|
|
280
290
|
# Jobs have generated id, so this operation is considered idempotent
|
281
291
|
execute backoff: true do
|
282
292
|
service.insert_job @project, copy_table_config(
|
283
|
-
source, target, options
|
293
|
+
source, target, options
|
294
|
+
)
|
284
295
|
end
|
285
296
|
end
|
286
297
|
|
@@ -306,7 +317,8 @@ module Google
|
|
306
317
|
execute backoff: true do
|
307
318
|
service.insert_job \
|
308
319
|
@project, load_table_file_config(
|
309
|
-
dataset_id, table_id, file, options
|
320
|
+
dataset_id, table_id, file, options
|
321
|
+
),
|
310
322
|
upload_source: file, content_type: mime_type_for(file)
|
311
323
|
end
|
312
324
|
end
|
@@ -320,7 +332,7 @@ module Google
|
|
320
332
|
str = str.to_s
|
321
333
|
m = /\A(((?<prj>\S*):)?(?<dts>\S*)\.)?(?<tbl>\S*)\z/.match str
|
322
334
|
unless m
|
323
|
-
|
335
|
+
raise ArgumentError, "unable to identify table from #{str.inspect}"
|
324
336
|
end
|
325
337
|
str_table_ref_hash = {
|
326
338
|
project_id: m["prj"],
|
@@ -391,7 +403,8 @@ module Google
|
|
391
403
|
path = Pathname(file).to_path
|
392
404
|
{
|
393
405
|
destination_table: Google::Apis::BigqueryV2::TableReference.new(
|
394
|
-
project_id: @project, dataset_id: dataset_id, table_id: table_id
|
406
|
+
project_id: @project, dataset_id: dataset_id, table_id: table_id
|
407
|
+
),
|
395
408
|
create_disposition: create_disposition(options[:create]),
|
396
409
|
write_disposition: write_disposition(options[:write]),
|
397
410
|
source_format: source_format(path, options[:format]),
|
@@ -423,7 +436,8 @@ module Google
|
|
423
436
|
def load_table_url_opts dataset_id, table_id, url, options = {}
|
424
437
|
{
|
425
438
|
destination_table: Google::Apis::BigqueryV2::TableReference.new(
|
426
|
-
project_id: @project, dataset_id: dataset_id, table_id: table_id
|
439
|
+
project_id: @project, dataset_id: dataset_id, table_id: table_id
|
440
|
+
),
|
427
441
|
source_uris: Array(url),
|
428
442
|
create_disposition: create_disposition(options[:create]),
|
429
443
|
write_disposition: write_disposition(options[:write]),
|
@@ -500,7 +514,7 @@ module Google
|
|
500
514
|
end
|
501
515
|
end
|
502
516
|
else
|
503
|
-
|
517
|
+
raise "Query parameters must be an Array or a Hash."
|
504
518
|
end
|
505
519
|
end
|
506
520
|
|
@@ -545,7 +559,7 @@ module Google
|
|
545
559
|
end
|
546
560
|
end
|
547
561
|
else
|
548
|
-
|
562
|
+
raise "Query parameters must be an Array or a Hash."
|
549
563
|
end
|
550
564
|
end
|
551
565
|
|
@@ -624,14 +638,15 @@ module Google
|
|
624
638
|
end
|
625
639
|
|
626
640
|
def source_format path, format
|
627
|
-
val = {
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
641
|
+
val = {
|
642
|
+
"csv" => "CSV",
|
643
|
+
"json" => "NEWLINE_DELIMITED_JSON",
|
644
|
+
"newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
|
645
|
+
"avro" => "AVRO",
|
646
|
+
"datastore" => "DATASTORE_BACKUP",
|
647
|
+
"backup" => "DATASTORE_BACKUP",
|
648
|
+
"datastore_backup" => "DATASTORE_BACKUP"
|
649
|
+
}[format.to_s.downcase]
|
635
650
|
return val unless val.nil?
|
636
651
|
return nil if path.nil?
|
637
652
|
return "CSV" if path.end_with? ".csv"
|
@@ -649,7 +664,7 @@ module Google
|
|
649
664
|
mime_type = MIME::Types.of(Pathname(file).to_path).first.to_s
|
650
665
|
return nil if mime_type.empty?
|
651
666
|
mime_type
|
652
|
-
rescue
|
667
|
+
rescue StandardError
|
653
668
|
nil
|
654
669
|
end
|
655
670
|
|
@@ -682,13 +697,13 @@ module Google
|
|
682
697
|
attr_accessor :backoff
|
683
698
|
end
|
684
699
|
self.retries = 5
|
685
|
-
self.reasons = %w
|
700
|
+
self.reasons = %w[rateLimitExceeded backendError]
|
686
701
|
self.backoff = lambda do |retries|
|
687
702
|
# Max delay is 32 seconds
|
688
703
|
# See "Back-off Requirements" here:
|
689
704
|
# https://cloud.google.com/bigquery/sla
|
690
705
|
retries = 5 if retries > 5
|
691
|
-
delay = 2
|
706
|
+
delay = 2**retries
|
692
707
|
sleep delay
|
693
708
|
end
|
694
709
|
|
@@ -729,7 +744,7 @@ module Google
|
|
729
744
|
return false unless @reasons.include? json_error["reason"]
|
730
745
|
end
|
731
746
|
true
|
732
|
-
rescue
|
747
|
+
rescue StandardError
|
733
748
|
false
|
734
749
|
end
|
735
750
|
end
|
@@ -211,12 +211,69 @@ module Google
|
|
211
211
|
#
|
212
212
|
def time_partitioning_type= type
|
213
213
|
reload! unless resource_full?
|
214
|
-
@gapi.time_partitioning ||=
|
215
|
-
|
214
|
+
@gapi.time_partitioning ||= \
|
215
|
+
Google::Apis::BigqueryV2::TimePartitioning.new
|
216
216
|
@gapi.time_partitioning.type = type
|
217
217
|
patch_gapi! :time_partitioning
|
218
218
|
end
|
219
219
|
|
220
|
+
###
|
221
|
+
# The field on which the table is partitioned, if any. See
|
222
|
+
# [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
223
|
+
#
|
224
|
+
# @return [String, nil] The partition field, if a field was configured.
|
225
|
+
# `nil` if not partitioned, not set (partitioned by pseudo column
|
226
|
+
# '_PARTITIONTIME') or the object is a reference (see {#reference?}).
|
227
|
+
#
|
228
|
+
# @!group Attributes
|
229
|
+
#
|
230
|
+
def time_partitioning_field
|
231
|
+
return nil if reference?
|
232
|
+
ensure_full_data!
|
233
|
+
@gapi.time_partitioning.field if time_partitioning?
|
234
|
+
end
|
235
|
+
|
236
|
+
##
|
237
|
+
# Sets the field on which to partition the table. See [Partitioned
|
238
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
239
|
+
# The table must also be partitioned.
|
240
|
+
#
|
241
|
+
# See {Table#time_partitioning_type=}.
|
242
|
+
#
|
243
|
+
# You can only set the partitioning field while creating a table as in
|
244
|
+
# the example below. BigQuery does not allow you to change partitioning
|
245
|
+
# on an existing table.
|
246
|
+
#
|
247
|
+
# If the table is not a full resource representation (see
|
248
|
+
# {#resource_full?}), the full representation will be retrieved before
|
249
|
+
# the update to comply with ETag-based optimistic concurrency control.
|
250
|
+
#
|
251
|
+
# @param [String] field The partition field. The field must be a
|
252
|
+
# top-level TIMESTAMP or DATE field. Its mode must be NULLABLE or
|
253
|
+
# REQUIRED.
|
254
|
+
#
|
255
|
+
# @example
|
256
|
+
# require "google/cloud/bigquery"
|
257
|
+
#
|
258
|
+
# bigquery = Google::Cloud::Bigquery.new
|
259
|
+
# dataset = bigquery.dataset "my_dataset"
|
260
|
+
# table = dataset.create_table "my_table" do |table|
|
261
|
+
# table.time_partitioning_type = "DAY"
|
262
|
+
# table.time_partitioning_field = "dob"
|
263
|
+
# table.schema do |schema|
|
264
|
+
# schema.timestamp "dob", mode: :required
|
265
|
+
# end
|
266
|
+
# end
|
267
|
+
#
|
268
|
+
# @!group Attributes
|
269
|
+
#
|
270
|
+
def time_partitioning_field= field
|
271
|
+
reload! unless resource_full?
|
272
|
+
@gapi.time_partitioning ||= \
|
273
|
+
Google::Apis::BigqueryV2::TimePartitioning.new
|
274
|
+
@gapi.time_partitioning.field = field
|
275
|
+
patch_gapi! :time_partitioning
|
276
|
+
end
|
220
277
|
|
221
278
|
###
|
222
279
|
# The expiration for the table partitions, if any, in seconds. See
|
@@ -264,8 +321,8 @@ module Google
|
|
264
321
|
#
|
265
322
|
def time_partitioning_expiration= expiration
|
266
323
|
reload! unless resource_full?
|
267
|
-
@gapi.time_partitioning ||=
|
268
|
-
|
324
|
+
@gapi.time_partitioning ||= \
|
325
|
+
Google::Apis::BigqueryV2::TimePartitioning.new
|
269
326
|
@gapi.time_partitioning.expiration_ms = expiration * 1000
|
270
327
|
patch_gapi! :time_partitioning
|
271
328
|
end
|
@@ -426,7 +483,7 @@ module Google
|
|
426
483
|
ensure_full_data!
|
427
484
|
begin
|
428
485
|
Integer @gapi.num_bytes
|
429
|
-
rescue
|
486
|
+
rescue StandardError
|
430
487
|
nil
|
431
488
|
end
|
432
489
|
end
|
@@ -444,7 +501,7 @@ module Google
|
|
444
501
|
ensure_full_data!
|
445
502
|
begin
|
446
503
|
Integer @gapi.num_rows
|
447
|
-
rescue
|
504
|
+
rescue StandardError
|
448
505
|
nil
|
449
506
|
end
|
450
507
|
end
|
@@ -462,7 +519,7 @@ module Google
|
|
462
519
|
ensure_full_data!
|
463
520
|
begin
|
464
521
|
::Time.at(Integer(@gapi.creation_time) / 1000.0)
|
465
|
-
rescue
|
522
|
+
rescue StandardError
|
466
523
|
nil
|
467
524
|
end
|
468
525
|
end
|
@@ -482,7 +539,7 @@ module Google
|
|
482
539
|
ensure_full_data!
|
483
540
|
begin
|
484
541
|
::Time.at(Integer(@gapi.expiration_time) / 1000.0)
|
485
|
-
rescue
|
542
|
+
rescue StandardError
|
486
543
|
nil
|
487
544
|
end
|
488
545
|
end
|
@@ -500,7 +557,7 @@ module Google
|
|
500
557
|
ensure_full_data!
|
501
558
|
begin
|
502
559
|
::Time.at(Integer(@gapi.last_modified_time) / 1000.0)
|
503
|
-
rescue
|
560
|
+
rescue StandardError
|
504
561
|
nil
|
505
562
|
end
|
506
563
|
end
|
@@ -829,7 +886,7 @@ module Google
|
|
829
886
|
oldest_entry_time = @gapi.streaming_buffer.oldest_entry_time
|
830
887
|
begin
|
831
888
|
::Time.at(Integer(oldest_entry_time) / 1000.0)
|
832
|
-
rescue
|
889
|
+
rescue StandardError
|
833
890
|
nil
|
834
891
|
end
|
835
892
|
end
|
@@ -1163,8 +1220,8 @@ module Google
|
|
1163
1220
|
if job.failed?
|
1164
1221
|
begin
|
1165
1222
|
# raise to activate ruby exception cause handling
|
1166
|
-
|
1167
|
-
rescue => e
|
1223
|
+
raise job.gapi_error
|
1224
|
+
rescue StandardError => e
|
1168
1225
|
# wrap Google::Apis::Error with Google::Cloud::Error
|
1169
1226
|
raise Google::Cloud::Error.from_error(e)
|
1170
1227
|
end
|
@@ -1303,8 +1360,8 @@ module Google
|
|
1303
1360
|
if job.failed?
|
1304
1361
|
begin
|
1305
1362
|
# raise to activate ruby exception cause handling
|
1306
|
-
|
1307
|
-
rescue => e
|
1363
|
+
raise job.gapi_error
|
1364
|
+
rescue StandardError => e
|
1308
1365
|
# wrap Google::Apis::Error with Google::Cloud::Error
|
1309
1366
|
raise Google::Cloud::Error.from_error(e)
|
1310
1367
|
end
|
@@ -1486,7 +1543,7 @@ module Google
|
|
1486
1543
|
autodetect: autodetect, null_marker: null_marker }
|
1487
1544
|
return load_storage(file, options) if storage_url? file
|
1488
1545
|
return load_local(file, options) if local_file? file
|
1489
|
-
|
1546
|
+
raise Google::Cloud::Error, "Don't know how to load #{file}"
|
1490
1547
|
end
|
1491
1548
|
|
1492
1549
|
##
|
@@ -1642,8 +1699,8 @@ module Google
|
|
1642
1699
|
if job.failed?
|
1643
1700
|
begin
|
1644
1701
|
# raise to activate ruby exception cause handling
|
1645
|
-
|
1646
|
-
rescue => e
|
1702
|
+
raise job.gapi_error
|
1703
|
+
rescue StandardError => e
|
1647
1704
|
# wrap Google::Apis::Error with Google::Cloud::Error
|
1648
1705
|
raise Google::Cloud::Error.from_error(e)
|
1649
1706
|
end
|
@@ -1701,7 +1758,7 @@ module Google
|
|
1701
1758
|
#
|
1702
1759
|
def insert rows, skip_invalid: nil, ignore_unknown: nil
|
1703
1760
|
rows = [rows] if rows.is_a? Hash
|
1704
|
-
|
1761
|
+
raise ArgumentError, "No rows provided" if rows.empty?
|
1705
1762
|
ensure_service!
|
1706
1763
|
options = { skip_invalid: skip_invalid,
|
1707
1764
|
ignore_unknown: ignore_unknown }
|
@@ -1812,7 +1869,7 @@ module Google
|
|
1812
1869
|
gapi = service.get_table dataset_id, table_id
|
1813
1870
|
@gapi = gapi
|
1814
1871
|
end
|
1815
|
-
|
1872
|
+
alias refresh! reload!
|
1816
1873
|
|
1817
1874
|
##
|
1818
1875
|
# Determines whether the table exists in the BigQuery service. The
|
@@ -1965,7 +2022,7 @@ module Google
|
|
1965
2022
|
##
|
1966
2023
|
# Raise an error unless an active service is available.
|
1967
2024
|
def ensure_service!
|
1968
|
-
|
2025
|
+
raise "Must have active connection" unless service
|
1969
2026
|
end
|
1970
2027
|
|
1971
2028
|
##
|
@@ -2019,7 +2076,7 @@ module Google
|
|
2019
2076
|
|
2020
2077
|
def local_file? file
|
2021
2078
|
::File.file? file
|
2022
|
-
rescue
|
2079
|
+
rescue StandardError
|
2023
2080
|
false
|
2024
2081
|
end
|
2025
2082
|
|