google-cloud-bigquery 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/google-cloud-bigquery.rb +25 -0
- data/lib/google/cloud/bigquery.rb +61 -11
- data/lib/google/cloud/bigquery/convert.rb +1 -1
- data/lib/google/cloud/bigquery/credentials.rb +6 -6
- data/lib/google/cloud/bigquery/data.rb +6 -6
- data/lib/google/cloud/bigquery/dataset.rb +16 -15
- data/lib/google/cloud/bigquery/dataset/access.rb +38 -30
- data/lib/google/cloud/bigquery/dataset/list.rb +1 -1
- data/lib/google/cloud/bigquery/external.rb +22 -20
- data/lib/google/cloud/bigquery/insert_response.rb +0 -2
- data/lib/google/cloud/bigquery/job.rb +39 -31
- data/lib/google/cloud/bigquery/job/list.rb +1 -1
- data/lib/google/cloud/bigquery/load_job.rb +4 -4
- data/lib/google/cloud/bigquery/project.rb +7 -15
- data/lib/google/cloud/bigquery/project/list.rb +1 -1
- data/lib/google/cloud/bigquery/query_job.rb +12 -12
- data/lib/google/cloud/bigquery/schema.rb +7 -7
- data/lib/google/cloud/bigquery/schema/field.rb +12 -12
- data/lib/google/cloud/bigquery/service.rb +44 -29
- data/lib/google/cloud/bigquery/table.rb +78 -21
- data/lib/google/cloud/bigquery/table/async_inserter.rb +42 -17
- data/lib/google/cloud/bigquery/table/list.rb +1 -1
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +11 -11
@@ -300,8 +300,8 @@ module Google
|
|
300
300
|
# end
|
301
301
|
#
|
302
302
|
def record name, description: nil, mode: nil
|
303
|
-
# TODO: do we need to
|
304
|
-
|
303
|
+
# TODO: do we need to raise if no block was given?
|
304
|
+
raise ArgumentError, "a block is required" unless block_given?
|
305
305
|
|
306
306
|
nested_field = add_field name, :record, description: description,
|
307
307
|
mode: mode
|
@@ -343,7 +343,7 @@ module Google
|
|
343
343
|
|
344
344
|
def frozen_check!
|
345
345
|
return unless frozen?
|
346
|
-
|
346
|
+
raise ArgumentError, "Cannot modify a frozen schema"
|
347
347
|
end
|
348
348
|
|
349
349
|
def add_field name, type, description: nil, mode: :nullable
|
@@ -354,7 +354,8 @@ module Google
|
|
354
354
|
type: verify_type(type),
|
355
355
|
description: description,
|
356
356
|
mode: verify_mode(mode),
|
357
|
-
fields: []
|
357
|
+
fields: []
|
358
|
+
)
|
358
359
|
|
359
360
|
# Remove any existing field of this name
|
360
361
|
@gapi.fields ||= []
|
@@ -370,8 +371,7 @@ module Google
|
|
370
371
|
def verify_type type
|
371
372
|
type = type.to_s.upcase
|
372
373
|
unless Field::TYPES.include? type
|
373
|
-
|
374
|
-
"Type '#{type}' not found"
|
374
|
+
raise ArgumentError, "Type '#{type}' not found"
|
375
375
|
end
|
376
376
|
type
|
377
377
|
end
|
@@ -380,7 +380,7 @@ module Google
|
|
380
380
|
mode = :nullable if mode.nil?
|
381
381
|
mode = mode.to_s.upcase
|
382
382
|
unless Field::MODES.include? mode
|
383
|
-
|
383
|
+
raise ArgumentError "Unable to determine mode for '#{mode}'"
|
384
384
|
end
|
385
385
|
mode
|
386
386
|
end
|
@@ -37,11 +37,11 @@ module Google
|
|
37
37
|
#
|
38
38
|
class Field
|
39
39
|
# @private
|
40
|
-
MODES = %w
|
40
|
+
MODES = %w[NULLABLE REQUIRED REPEATED].freeze
|
41
41
|
|
42
42
|
# @private
|
43
|
-
TYPES = %w
|
44
|
-
|
43
|
+
TYPES = %w[STRING INTEGER FLOAT BOOLEAN BYTES TIMESTAMP TIME DATETIME
|
44
|
+
DATE RECORD].freeze
|
45
45
|
|
46
46
|
##
|
47
47
|
# The name of the field.
|
@@ -514,8 +514,8 @@ module Google
|
|
514
514
|
def record name, description: nil, mode: nil
|
515
515
|
record_check!
|
516
516
|
|
517
|
-
# TODO: do we need to
|
518
|
-
|
517
|
+
# TODO: do we need to raise if no block was given?
|
518
|
+
raise ArgumentError, "a block is required" unless block_given?
|
519
519
|
|
520
520
|
nested_field = add_field name, :record, description: description,
|
521
521
|
mode: mode
|
@@ -546,13 +546,13 @@ module Google
|
|
546
546
|
|
547
547
|
def frozen_check!
|
548
548
|
return unless frozen?
|
549
|
-
|
549
|
+
raise ArgumentError, "Cannot modify a frozen field"
|
550
550
|
end
|
551
551
|
|
552
552
|
def record_check!
|
553
553
|
return unless type != "RECORD"
|
554
|
-
|
555
|
-
|
554
|
+
raise ArgumentError,
|
555
|
+
"Cannot add fields to a non-RECORD field (#{type})"
|
556
556
|
end
|
557
557
|
|
558
558
|
def add_field name, type, description: nil, mode: :nullable
|
@@ -563,7 +563,8 @@ module Google
|
|
563
563
|
type: verify_type(type),
|
564
564
|
description: description,
|
565
565
|
mode: verify_mode(mode),
|
566
|
-
fields: []
|
566
|
+
fields: []
|
567
|
+
)
|
567
568
|
|
568
569
|
# Remove any existing field of this name
|
569
570
|
@gapi.fields ||= []
|
@@ -578,8 +579,7 @@ module Google
|
|
578
579
|
def verify_type type
|
579
580
|
type = type.to_s.upcase
|
580
581
|
unless TYPES.include? type
|
581
|
-
|
582
|
-
"Type '#{type}' not found in #{TYPES.inspect}"
|
582
|
+
raise ArgumentError, "Type '#{type}' not found"
|
583
583
|
end
|
584
584
|
type
|
585
585
|
end
|
@@ -588,7 +588,7 @@ module Google
|
|
588
588
|
mode = :nullable if mode.nil?
|
589
589
|
mode = mode.to_s.upcase
|
590
590
|
unless MODES.include? mode
|
591
|
-
|
591
|
+
raise ArgumentError "Unable to determine mode for '#{mode}'"
|
592
592
|
end
|
593
593
|
mode
|
594
594
|
end
|
@@ -201,22 +201,32 @@ module Google
|
|
201
201
|
end
|
202
202
|
|
203
203
|
def insert_tabledata dataset_id, table_id, rows, options = {}
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
204
|
+
json_rows = Array(rows).map { |row| Convert.to_json_row row }
|
205
|
+
|
206
|
+
insert_tabledata_json_rows dataset_id, table_id, json_rows, options
|
207
|
+
end
|
208
|
+
|
209
|
+
def insert_tabledata_json_rows dataset_id, table_id, json_rows,
|
210
|
+
options = {}
|
211
|
+
insert_rows = Array(json_rows).map do |json_row|
|
212
|
+
{
|
213
|
+
insertId: SecureRandom.uuid,
|
214
|
+
json: json_row
|
215
|
+
}
|
209
216
|
end
|
210
|
-
|
217
|
+
|
218
|
+
insert_req = {
|
211
219
|
rows: insert_rows,
|
212
|
-
|
213
|
-
|
214
|
-
|
220
|
+
ignoreUnknownValues: options[:ignore_unknown],
|
221
|
+
skipInvalidRows: options[:skip_invalid]
|
222
|
+
}.to_json
|
215
223
|
|
216
224
|
# The insertAll with insertId operation is considered idempotent
|
217
225
|
execute backoff: true do
|
218
226
|
service.insert_all_table_data(
|
219
|
-
@project, dataset_id, table_id, insert_req
|
227
|
+
@project, dataset_id, table_id, insert_req,
|
228
|
+
options: { skip_serialization: true }
|
229
|
+
)
|
220
230
|
end
|
221
231
|
end
|
222
232
|
|
@@ -280,7 +290,8 @@ module Google
|
|
280
290
|
# Jobs have generated id, so this operation is considered idempotent
|
281
291
|
execute backoff: true do
|
282
292
|
service.insert_job @project, copy_table_config(
|
283
|
-
source, target, options
|
293
|
+
source, target, options
|
294
|
+
)
|
284
295
|
end
|
285
296
|
end
|
286
297
|
|
@@ -306,7 +317,8 @@ module Google
|
|
306
317
|
execute backoff: true do
|
307
318
|
service.insert_job \
|
308
319
|
@project, load_table_file_config(
|
309
|
-
dataset_id, table_id, file, options
|
320
|
+
dataset_id, table_id, file, options
|
321
|
+
),
|
310
322
|
upload_source: file, content_type: mime_type_for(file)
|
311
323
|
end
|
312
324
|
end
|
@@ -320,7 +332,7 @@ module Google
|
|
320
332
|
str = str.to_s
|
321
333
|
m = /\A(((?<prj>\S*):)?(?<dts>\S*)\.)?(?<tbl>\S*)\z/.match str
|
322
334
|
unless m
|
323
|
-
|
335
|
+
raise ArgumentError, "unable to identify table from #{str.inspect}"
|
324
336
|
end
|
325
337
|
str_table_ref_hash = {
|
326
338
|
project_id: m["prj"],
|
@@ -391,7 +403,8 @@ module Google
|
|
391
403
|
path = Pathname(file).to_path
|
392
404
|
{
|
393
405
|
destination_table: Google::Apis::BigqueryV2::TableReference.new(
|
394
|
-
project_id: @project, dataset_id: dataset_id, table_id: table_id
|
406
|
+
project_id: @project, dataset_id: dataset_id, table_id: table_id
|
407
|
+
),
|
395
408
|
create_disposition: create_disposition(options[:create]),
|
396
409
|
write_disposition: write_disposition(options[:write]),
|
397
410
|
source_format: source_format(path, options[:format]),
|
@@ -423,7 +436,8 @@ module Google
|
|
423
436
|
def load_table_url_opts dataset_id, table_id, url, options = {}
|
424
437
|
{
|
425
438
|
destination_table: Google::Apis::BigqueryV2::TableReference.new(
|
426
|
-
project_id: @project, dataset_id: dataset_id, table_id: table_id
|
439
|
+
project_id: @project, dataset_id: dataset_id, table_id: table_id
|
440
|
+
),
|
427
441
|
source_uris: Array(url),
|
428
442
|
create_disposition: create_disposition(options[:create]),
|
429
443
|
write_disposition: write_disposition(options[:write]),
|
@@ -500,7 +514,7 @@ module Google
|
|
500
514
|
end
|
501
515
|
end
|
502
516
|
else
|
503
|
-
|
517
|
+
raise "Query parameters must be an Array or a Hash."
|
504
518
|
end
|
505
519
|
end
|
506
520
|
|
@@ -545,7 +559,7 @@ module Google
|
|
545
559
|
end
|
546
560
|
end
|
547
561
|
else
|
548
|
-
|
562
|
+
raise "Query parameters must be an Array or a Hash."
|
549
563
|
end
|
550
564
|
end
|
551
565
|
|
@@ -624,14 +638,15 @@ module Google
|
|
624
638
|
end
|
625
639
|
|
626
640
|
def source_format path, format
|
627
|
-
val = {
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
641
|
+
val = {
|
642
|
+
"csv" => "CSV",
|
643
|
+
"json" => "NEWLINE_DELIMITED_JSON",
|
644
|
+
"newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
|
645
|
+
"avro" => "AVRO",
|
646
|
+
"datastore" => "DATASTORE_BACKUP",
|
647
|
+
"backup" => "DATASTORE_BACKUP",
|
648
|
+
"datastore_backup" => "DATASTORE_BACKUP"
|
649
|
+
}[format.to_s.downcase]
|
635
650
|
return val unless val.nil?
|
636
651
|
return nil if path.nil?
|
637
652
|
return "CSV" if path.end_with? ".csv"
|
@@ -649,7 +664,7 @@ module Google
|
|
649
664
|
mime_type = MIME::Types.of(Pathname(file).to_path).first.to_s
|
650
665
|
return nil if mime_type.empty?
|
651
666
|
mime_type
|
652
|
-
rescue
|
667
|
+
rescue StandardError
|
653
668
|
nil
|
654
669
|
end
|
655
670
|
|
@@ -682,13 +697,13 @@ module Google
|
|
682
697
|
attr_accessor :backoff
|
683
698
|
end
|
684
699
|
self.retries = 5
|
685
|
-
self.reasons = %w
|
700
|
+
self.reasons = %w[rateLimitExceeded backendError]
|
686
701
|
self.backoff = lambda do |retries|
|
687
702
|
# Max delay is 32 seconds
|
688
703
|
# See "Back-off Requirements" here:
|
689
704
|
# https://cloud.google.com/bigquery/sla
|
690
705
|
retries = 5 if retries > 5
|
691
|
-
delay = 2
|
706
|
+
delay = 2**retries
|
692
707
|
sleep delay
|
693
708
|
end
|
694
709
|
|
@@ -729,7 +744,7 @@ module Google
|
|
729
744
|
return false unless @reasons.include? json_error["reason"]
|
730
745
|
end
|
731
746
|
true
|
732
|
-
rescue
|
747
|
+
rescue StandardError
|
733
748
|
false
|
734
749
|
end
|
735
750
|
end
|
@@ -211,12 +211,69 @@ module Google
|
|
211
211
|
#
|
212
212
|
def time_partitioning_type= type
|
213
213
|
reload! unless resource_full?
|
214
|
-
@gapi.time_partitioning ||=
|
215
|
-
|
214
|
+
@gapi.time_partitioning ||= \
|
215
|
+
Google::Apis::BigqueryV2::TimePartitioning.new
|
216
216
|
@gapi.time_partitioning.type = type
|
217
217
|
patch_gapi! :time_partitioning
|
218
218
|
end
|
219
219
|
|
220
|
+
###
|
221
|
+
# The field on which the table is partitioned, if any. See
|
222
|
+
# [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
223
|
+
#
|
224
|
+
# @return [String, nil] The partition field, if a field was configured.
|
225
|
+
# `nil` if not partitioned, not set (partitioned by pseudo column
|
226
|
+
# '_PARTITIONTIME') or the object is a reference (see {#reference?}).
|
227
|
+
#
|
228
|
+
# @!group Attributes
|
229
|
+
#
|
230
|
+
def time_partitioning_field
|
231
|
+
return nil if reference?
|
232
|
+
ensure_full_data!
|
233
|
+
@gapi.time_partitioning.field if time_partitioning?
|
234
|
+
end
|
235
|
+
|
236
|
+
##
|
237
|
+
# Sets the field on which to partition the table. See [Partitioned
|
238
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
239
|
+
# The table must also be partitioned.
|
240
|
+
#
|
241
|
+
# See {Table#time_partitioning_type=}.
|
242
|
+
#
|
243
|
+
# You can only set the partitioning field while creating a table as in
|
244
|
+
# the example below. BigQuery does not allow you to change partitioning
|
245
|
+
# on an existing table.
|
246
|
+
#
|
247
|
+
# If the table is not a full resource representation (see
|
248
|
+
# {#resource_full?}), the full representation will be retrieved before
|
249
|
+
# the update to comply with ETag-based optimistic concurrency control.
|
250
|
+
#
|
251
|
+
# @param [String] field The partition field. The field must be a
|
252
|
+
# top-level TIMESTAMP or DATE field. Its mode must be NULLABLE or
|
253
|
+
# REQUIRED.
|
254
|
+
#
|
255
|
+
# @example
|
256
|
+
# require "google/cloud/bigquery"
|
257
|
+
#
|
258
|
+
# bigquery = Google::Cloud::Bigquery.new
|
259
|
+
# dataset = bigquery.dataset "my_dataset"
|
260
|
+
# table = dataset.create_table "my_table" do |table|
|
261
|
+
# table.time_partitioning_type = "DAY"
|
262
|
+
# table.time_partitioning_field = "dob"
|
263
|
+
# table.schema do |schema|
|
264
|
+
# schema.timestamp "dob", mode: :required
|
265
|
+
# end
|
266
|
+
# end
|
267
|
+
#
|
268
|
+
# @!group Attributes
|
269
|
+
#
|
270
|
+
def time_partitioning_field= field
|
271
|
+
reload! unless resource_full?
|
272
|
+
@gapi.time_partitioning ||= \
|
273
|
+
Google::Apis::BigqueryV2::TimePartitioning.new
|
274
|
+
@gapi.time_partitioning.field = field
|
275
|
+
patch_gapi! :time_partitioning
|
276
|
+
end
|
220
277
|
|
221
278
|
###
|
222
279
|
# The expiration for the table partitions, if any, in seconds. See
|
@@ -264,8 +321,8 @@ module Google
|
|
264
321
|
#
|
265
322
|
def time_partitioning_expiration= expiration
|
266
323
|
reload! unless resource_full?
|
267
|
-
@gapi.time_partitioning ||=
|
268
|
-
|
324
|
+
@gapi.time_partitioning ||= \
|
325
|
+
Google::Apis::BigqueryV2::TimePartitioning.new
|
269
326
|
@gapi.time_partitioning.expiration_ms = expiration * 1000
|
270
327
|
patch_gapi! :time_partitioning
|
271
328
|
end
|
@@ -426,7 +483,7 @@ module Google
|
|
426
483
|
ensure_full_data!
|
427
484
|
begin
|
428
485
|
Integer @gapi.num_bytes
|
429
|
-
rescue
|
486
|
+
rescue StandardError
|
430
487
|
nil
|
431
488
|
end
|
432
489
|
end
|
@@ -444,7 +501,7 @@ module Google
|
|
444
501
|
ensure_full_data!
|
445
502
|
begin
|
446
503
|
Integer @gapi.num_rows
|
447
|
-
rescue
|
504
|
+
rescue StandardError
|
448
505
|
nil
|
449
506
|
end
|
450
507
|
end
|
@@ -462,7 +519,7 @@ module Google
|
|
462
519
|
ensure_full_data!
|
463
520
|
begin
|
464
521
|
::Time.at(Integer(@gapi.creation_time) / 1000.0)
|
465
|
-
rescue
|
522
|
+
rescue StandardError
|
466
523
|
nil
|
467
524
|
end
|
468
525
|
end
|
@@ -482,7 +539,7 @@ module Google
|
|
482
539
|
ensure_full_data!
|
483
540
|
begin
|
484
541
|
::Time.at(Integer(@gapi.expiration_time) / 1000.0)
|
485
|
-
rescue
|
542
|
+
rescue StandardError
|
486
543
|
nil
|
487
544
|
end
|
488
545
|
end
|
@@ -500,7 +557,7 @@ module Google
|
|
500
557
|
ensure_full_data!
|
501
558
|
begin
|
502
559
|
::Time.at(Integer(@gapi.last_modified_time) / 1000.0)
|
503
|
-
rescue
|
560
|
+
rescue StandardError
|
504
561
|
nil
|
505
562
|
end
|
506
563
|
end
|
@@ -829,7 +886,7 @@ module Google
|
|
829
886
|
oldest_entry_time = @gapi.streaming_buffer.oldest_entry_time
|
830
887
|
begin
|
831
888
|
::Time.at(Integer(oldest_entry_time) / 1000.0)
|
832
|
-
rescue
|
889
|
+
rescue StandardError
|
833
890
|
nil
|
834
891
|
end
|
835
892
|
end
|
@@ -1163,8 +1220,8 @@ module Google
|
|
1163
1220
|
if job.failed?
|
1164
1221
|
begin
|
1165
1222
|
# raise to activate ruby exception cause handling
|
1166
|
-
|
1167
|
-
rescue => e
|
1223
|
+
raise job.gapi_error
|
1224
|
+
rescue StandardError => e
|
1168
1225
|
# wrap Google::Apis::Error with Google::Cloud::Error
|
1169
1226
|
raise Google::Cloud::Error.from_error(e)
|
1170
1227
|
end
|
@@ -1303,8 +1360,8 @@ module Google
|
|
1303
1360
|
if job.failed?
|
1304
1361
|
begin
|
1305
1362
|
# raise to activate ruby exception cause handling
|
1306
|
-
|
1307
|
-
rescue => e
|
1363
|
+
raise job.gapi_error
|
1364
|
+
rescue StandardError => e
|
1308
1365
|
# wrap Google::Apis::Error with Google::Cloud::Error
|
1309
1366
|
raise Google::Cloud::Error.from_error(e)
|
1310
1367
|
end
|
@@ -1486,7 +1543,7 @@ module Google
|
|
1486
1543
|
autodetect: autodetect, null_marker: null_marker }
|
1487
1544
|
return load_storage(file, options) if storage_url? file
|
1488
1545
|
return load_local(file, options) if local_file? file
|
1489
|
-
|
1546
|
+
raise Google::Cloud::Error, "Don't know how to load #{file}"
|
1490
1547
|
end
|
1491
1548
|
|
1492
1549
|
##
|
@@ -1642,8 +1699,8 @@ module Google
|
|
1642
1699
|
if job.failed?
|
1643
1700
|
begin
|
1644
1701
|
# raise to activate ruby exception cause handling
|
1645
|
-
|
1646
|
-
rescue => e
|
1702
|
+
raise job.gapi_error
|
1703
|
+
rescue StandardError => e
|
1647
1704
|
# wrap Google::Apis::Error with Google::Cloud::Error
|
1648
1705
|
raise Google::Cloud::Error.from_error(e)
|
1649
1706
|
end
|
@@ -1701,7 +1758,7 @@ module Google
|
|
1701
1758
|
#
|
1702
1759
|
def insert rows, skip_invalid: nil, ignore_unknown: nil
|
1703
1760
|
rows = [rows] if rows.is_a? Hash
|
1704
|
-
|
1761
|
+
raise ArgumentError, "No rows provided" if rows.empty?
|
1705
1762
|
ensure_service!
|
1706
1763
|
options = { skip_invalid: skip_invalid,
|
1707
1764
|
ignore_unknown: ignore_unknown }
|
@@ -1812,7 +1869,7 @@ module Google
|
|
1812
1869
|
gapi = service.get_table dataset_id, table_id
|
1813
1870
|
@gapi = gapi
|
1814
1871
|
end
|
1815
|
-
|
1872
|
+
alias refresh! reload!
|
1816
1873
|
|
1817
1874
|
##
|
1818
1875
|
# Determines whether the table exists in the BigQuery service. The
|
@@ -1965,7 +2022,7 @@ module Google
|
|
1965
2022
|
##
|
1966
2023
|
# Raise an error unless an active service is available.
|
1967
2024
|
def ensure_service!
|
1968
|
-
|
2025
|
+
raise "Must have active connection" unless service
|
1969
2026
|
end
|
1970
2027
|
|
1971
2028
|
##
|
@@ -2019,7 +2076,7 @@ module Google
|
|
2019
2076
|
|
2020
2077
|
def local_file? file
|
2021
2078
|
::File.file? file
|
2022
|
-
rescue
|
2079
|
+
rescue StandardError
|
2023
2080
|
false
|
2024
2081
|
end
|
2025
2082
|
|