google-cloud-bigquery 1.14.0 → 1.42.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/AUTHENTICATION.md +17 -54
- data/CHANGELOG.md +377 -0
- data/CONTRIBUTING.md +328 -116
- data/LOGGING.md +1 -1
- data/OVERVIEW.md +21 -20
- data/TROUBLESHOOTING.md +2 -8
- data/lib/google/cloud/bigquery/argument.rb +197 -0
- data/lib/google/cloud/bigquery/convert.rb +155 -173
- data/lib/google/cloud/bigquery/copy_job.rb +74 -26
- data/lib/google/cloud/bigquery/credentials.rb +5 -12
- data/lib/google/cloud/bigquery/data.rb +109 -18
- data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
- data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
- data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
- data/lib/google/cloud/bigquery/dataset.rb +1044 -287
- data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
- data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
- data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
- data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
- data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
- data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
- data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
- data/lib/google/cloud/bigquery/external.rb +50 -2256
- data/lib/google/cloud/bigquery/extract_job.rb +226 -61
- data/lib/google/cloud/bigquery/insert_response.rb +1 -3
- data/lib/google/cloud/bigquery/job/list.rb +10 -14
- data/lib/google/cloud/bigquery/job.rb +289 -14
- data/lib/google/cloud/bigquery/load_job.rb +810 -136
- data/lib/google/cloud/bigquery/model/list.rb +5 -9
- data/lib/google/cloud/bigquery/model.rb +247 -16
- data/lib/google/cloud/bigquery/policy.rb +432 -0
- data/lib/google/cloud/bigquery/project/list.rb +6 -11
- data/lib/google/cloud/bigquery/project.rb +509 -250
- data/lib/google/cloud/bigquery/query_job.rb +594 -128
- data/lib/google/cloud/bigquery/routine/list.rb +165 -0
- data/lib/google/cloud/bigquery/routine.rb +1227 -0
- data/lib/google/cloud/bigquery/schema/field.rb +413 -63
- data/lib/google/cloud/bigquery/schema.rb +221 -48
- data/lib/google/cloud/bigquery/service.rb +204 -112
- data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
- data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
- data/lib/google/cloud/bigquery/table/list.rb +6 -11
- data/lib/google/cloud/bigquery/table.rb +1470 -377
- data/lib/google/cloud/bigquery/time.rb +6 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery.rb +4 -6
- data/lib/google-cloud-bigquery.rb +14 -13
- metadata +66 -38
@@ -74,8 +74,8 @@ module Google
|
|
74
74
|
##
|
75
75
|
# The ID of the job.
|
76
76
|
#
|
77
|
-
# @return [String] The ID must contain only letters (
|
78
|
-
# (0-9), underscores (_), or dashes (
|
77
|
+
# @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
|
78
|
+
# (`[0-9]`), underscores (`_`), or dashes (`-`). The maximum length is 1,024
|
79
79
|
# characters.
|
80
80
|
#
|
81
81
|
def job_id
|
@@ -197,6 +197,102 @@ module Google
|
|
197
197
|
Convert.millis_to_time @gapi.statistics.end_time
|
198
198
|
end
|
199
199
|
|
200
|
+
##
|
201
|
+
# The number of child jobs executed.
|
202
|
+
#
|
203
|
+
# @return [Integer] The number of child jobs executed.
|
204
|
+
#
|
205
|
+
def num_child_jobs
|
206
|
+
@gapi.statistics.num_child_jobs || 0
|
207
|
+
end
|
208
|
+
|
209
|
+
##
|
210
|
+
# If this is a child job, the id of the parent.
|
211
|
+
#
|
212
|
+
# @return [String, nil] The ID of the parent job, or `nil` if not a child job.
|
213
|
+
#
|
214
|
+
def parent_job_id
|
215
|
+
@gapi.statistics.parent_job_id
|
216
|
+
end
|
217
|
+
|
218
|
+
##
|
219
|
+
# An array containing the job resource usage breakdown by reservation, if present. Reservation usage statistics
|
220
|
+
# are only reported for jobs that are executed within reservations. On-demand jobs do not report this data.
|
221
|
+
#
|
222
|
+
# @return [Array<Google::Cloud::Bigquery::Job::ReservationUsage>, nil] The reservation usage, if present.
|
223
|
+
#
|
224
|
+
def reservation_usage
|
225
|
+
return nil unless @gapi.statistics.reservation_usage
|
226
|
+
Array(@gapi.statistics.reservation_usage).map { |g| ReservationUsage.from_gapi g }
|
227
|
+
end
|
228
|
+
|
229
|
+
##
|
230
|
+
# The ID of the session if this job is part of one. See the `create_session` param in {Project#query_job} and
|
231
|
+
# {Dataset#query_job}.
|
232
|
+
#
|
233
|
+
# @return [String, nil] The session ID, or `nil` if not associated with a session.
|
234
|
+
#
|
235
|
+
def session_id
|
236
|
+
@gapi.statistics.session_info&.session_id
|
237
|
+
end
|
238
|
+
|
239
|
+
##
|
240
|
+
# The ID of a multi-statement transaction.
|
241
|
+
#
|
242
|
+
# @return [String, nil] The transaction ID, or `nil` if not associated with a transaction.
|
243
|
+
#
|
244
|
+
def transaction_id
|
245
|
+
@gapi.statistics.transaction_info&.transaction_id
|
246
|
+
end
|
247
|
+
|
248
|
+
##
|
249
|
+
# The statistics including stack frames for a child job of a script.
|
250
|
+
#
|
251
|
+
# @return [Google::Cloud::Bigquery::Job::ScriptStatistics, nil] The script statistics, or `nil` if the job is
|
252
|
+
# not a child job.
|
253
|
+
#
|
254
|
+
# @example
|
255
|
+
# require "google/cloud/bigquery"
|
256
|
+
#
|
257
|
+
# bigquery = Google::Cloud::Bigquery.new
|
258
|
+
#
|
259
|
+
# multi_statement_sql = <<~SQL
|
260
|
+
# -- Declare a variable to hold names as an array.
|
261
|
+
# DECLARE top_names ARRAY<STRING>;
|
262
|
+
# -- Build an array of the top 100 names from the year 2017.
|
263
|
+
# SET top_names = (
|
264
|
+
# SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
|
265
|
+
# FROM `bigquery-public-data.usa_names.usa_1910_current`
|
266
|
+
# WHERE year = 2017
|
267
|
+
# );
|
268
|
+
# -- Which names appear as words in Shakespeare's plays?
|
269
|
+
# SELECT
|
270
|
+
# name AS shakespeare_name
|
271
|
+
# FROM UNNEST(top_names) AS name
|
272
|
+
# WHERE name IN (
|
273
|
+
# SELECT word
|
274
|
+
# FROM `bigquery-public-data.samples.shakespeare`
|
275
|
+
# );
|
276
|
+
# SQL
|
277
|
+
#
|
278
|
+
# job = bigquery.query_job multi_statement_sql
|
279
|
+
#
|
280
|
+
# job.wait_until_done!
|
281
|
+
#
|
282
|
+
# child_jobs = bigquery.jobs parent_job: job
|
283
|
+
#
|
284
|
+
# child_jobs.each do |child_job|
|
285
|
+
# script_statistics = child_job.script_statistics
|
286
|
+
# puts script_statistics.evaluation_kind
|
287
|
+
# script_statistics.stack_frames.each do |stack_frame|
|
288
|
+
# puts stack_frame.text
|
289
|
+
# end
|
290
|
+
# end
|
291
|
+
#
|
292
|
+
def script_statistics
|
293
|
+
ScriptStatistics.from_gapi @gapi.statistics.script_statistics if @gapi.statistics.script_statistics
|
294
|
+
end
|
295
|
+
|
200
296
|
##
|
201
297
|
# The configuration for the job. Returns a hash.
|
202
298
|
#
|
@@ -306,6 +402,28 @@ module Google
|
|
306
402
|
true
|
307
403
|
end
|
308
404
|
|
405
|
+
##
|
406
|
+
# Requests that a job is deleted. This call will return when the job is deleted.
|
407
|
+
#
|
408
|
+
# @return [Boolean] Returns `true` if the job was deleted.
|
409
|
+
#
|
410
|
+
# @example
|
411
|
+
# require "google/cloud/bigquery"
|
412
|
+
#
|
413
|
+
# bigquery = Google::Cloud::Bigquery.new
|
414
|
+
#
|
415
|
+
# job = bigquery.job "my_job"
|
416
|
+
#
|
417
|
+
# job.delete
|
418
|
+
#
|
419
|
+
# @!group Lifecycle
|
420
|
+
#
|
421
|
+
def delete
|
422
|
+
ensure_service!
|
423
|
+
service.delete_job job_id, location: location
|
424
|
+
true
|
425
|
+
end
|
426
|
+
|
309
427
|
##
|
310
428
|
# Created a new job with the current configuration.
|
311
429
|
#
|
@@ -371,7 +489,7 @@ module Google
|
|
371
489
|
#
|
372
490
|
def wait_until_done!
|
373
491
|
backoff = lambda do |retries|
|
374
|
-
delay = [retries**2 + 5, 60].min # Maximum delay is 60
|
492
|
+
delay = [(retries**2) + 5, 60].min # Maximum delay is 60
|
375
493
|
sleep delay
|
376
494
|
end
|
377
495
|
retries = 0
|
@@ -423,6 +541,167 @@ module Google
|
|
423
541
|
end
|
424
542
|
end
|
425
543
|
|
544
|
+
##
|
545
|
+
# Represents Job resource usage breakdown by reservation.
|
546
|
+
#
|
547
|
+
# @attr_reader [String] name The reservation name or "unreserved" for on-demand resources usage.
|
548
|
+
# @attr_reader [Fixnum] slot_ms The slot-milliseconds the job spent in the given reservation.
|
549
|
+
#
|
550
|
+
class ReservationUsage
|
551
|
+
attr_reader :name
|
552
|
+
attr_reader :slot_ms
|
553
|
+
|
554
|
+
##
|
555
|
+
# @private Creates a new ReservationUsage instance.
|
556
|
+
def initialize name, slot_ms
|
557
|
+
@name = name
|
558
|
+
@slot_ms = slot_ms
|
559
|
+
end
|
560
|
+
|
561
|
+
##
|
562
|
+
# @private New ReservationUsage from a statistics.reservation_usage value.
|
563
|
+
def self.from_gapi gapi
|
564
|
+
new gapi.name, gapi.slot_ms
|
565
|
+
end
|
566
|
+
end
|
567
|
+
|
568
|
+
##
|
569
|
+
# Represents statistics for a child job of a script.
|
570
|
+
#
|
571
|
+
# @attr_reader [String] evaluation_kind Indicates the type of child job. Possible values include `STATEMENT` and
|
572
|
+
# `EXPRESSION`.
|
573
|
+
# @attr_reader [Array<Google::Cloud::Bigquery::Job::ScriptStackFrame>] stack_frames Stack trace where the
|
574
|
+
# current evaluation happened. Shows line/column/procedure name of each frame on the stack at the point where
|
575
|
+
# the current evaluation happened. The leaf frame is first, the primary script is last.
|
576
|
+
#
|
577
|
+
# @example
|
578
|
+
# require "google/cloud/bigquery"
|
579
|
+
#
|
580
|
+
# bigquery = Google::Cloud::Bigquery.new
|
581
|
+
#
|
582
|
+
# multi_statement_sql = <<~SQL
|
583
|
+
# -- Declare a variable to hold names as an array.
|
584
|
+
# DECLARE top_names ARRAY<STRING>;
|
585
|
+
# -- Build an array of the top 100 names from the year 2017.
|
586
|
+
# SET top_names = (
|
587
|
+
# SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
|
588
|
+
# FROM `bigquery-public-data.usa_names.usa_1910_current`
|
589
|
+
# WHERE year = 2017
|
590
|
+
# );
|
591
|
+
# -- Which names appear as words in Shakespeare's plays?
|
592
|
+
# SELECT
|
593
|
+
# name AS shakespeare_name
|
594
|
+
# FROM UNNEST(top_names) AS name
|
595
|
+
# WHERE name IN (
|
596
|
+
# SELECT word
|
597
|
+
# FROM `bigquery-public-data.samples.shakespeare`
|
598
|
+
# );
|
599
|
+
# SQL
|
600
|
+
#
|
601
|
+
# job = bigquery.query_job multi_statement_sql
|
602
|
+
#
|
603
|
+
# job.wait_until_done!
|
604
|
+
#
|
605
|
+
# child_jobs = bigquery.jobs parent_job: job
|
606
|
+
#
|
607
|
+
# child_jobs.each do |child_job|
|
608
|
+
# script_statistics = child_job.script_statistics
|
609
|
+
# puts script_statistics.evaluation_kind
|
610
|
+
# script_statistics.stack_frames.each do |stack_frame|
|
611
|
+
# puts stack_frame.text
|
612
|
+
# end
|
613
|
+
# end
|
614
|
+
#
|
615
|
+
class ScriptStatistics
|
616
|
+
attr_reader :evaluation_kind
|
617
|
+
attr_reader :stack_frames
|
618
|
+
|
619
|
+
##
|
620
|
+
# @private Creates a new ScriptStatistics instance.
|
621
|
+
def initialize evaluation_kind, stack_frames
|
622
|
+
@evaluation_kind = evaluation_kind
|
623
|
+
@stack_frames = stack_frames
|
624
|
+
end
|
625
|
+
|
626
|
+
##
|
627
|
+
# @private New ScriptStatistics from a statistics.script_statistics value.
|
628
|
+
def self.from_gapi gapi
|
629
|
+
frames = Array(gapi.stack_frames).map { |g| ScriptStackFrame.from_gapi g }
|
630
|
+
new gapi.evaluation_kind, frames
|
631
|
+
end
|
632
|
+
end
|
633
|
+
|
634
|
+
##
|
635
|
+
# Represents a stack frame showing the line/column/procedure name where the current evaluation happened.
|
636
|
+
#
|
637
|
+
# @attr_reader [Integer] start_line One-based start line.
|
638
|
+
# @attr_reader [Integer] start_column One-based start column.
|
639
|
+
# @attr_reader [Integer] end_line One-based end line.
|
640
|
+
# @attr_reader [Integer] end_column One-based end column.
|
641
|
+
# @attr_reader [String] text Text of the current statement/expression.
|
642
|
+
#
|
643
|
+
# @example
|
644
|
+
# require "google/cloud/bigquery"
|
645
|
+
#
|
646
|
+
# bigquery = Google::Cloud::Bigquery.new
|
647
|
+
#
|
648
|
+
# multi_statement_sql = <<~SQL
|
649
|
+
# -- Declare a variable to hold names as an array.
|
650
|
+
# DECLARE top_names ARRAY<STRING>;
|
651
|
+
# -- Build an array of the top 100 names from the year 2017.
|
652
|
+
# SET top_names = (
|
653
|
+
# SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
|
654
|
+
# FROM `bigquery-public-data.usa_names.usa_1910_current`
|
655
|
+
# WHERE year = 2017
|
656
|
+
# );
|
657
|
+
# -- Which names appear as words in Shakespeare's plays?
|
658
|
+
# SELECT
|
659
|
+
# name AS shakespeare_name
|
660
|
+
# FROM UNNEST(top_names) AS name
|
661
|
+
# WHERE name IN (
|
662
|
+
# SELECT word
|
663
|
+
# FROM `bigquery-public-data.samples.shakespeare`
|
664
|
+
# );
|
665
|
+
# SQL
|
666
|
+
#
|
667
|
+
# job = bigquery.query_job multi_statement_sql
|
668
|
+
#
|
669
|
+
# job.wait_until_done!
|
670
|
+
#
|
671
|
+
# child_jobs = bigquery.jobs parent_job: job
|
672
|
+
#
|
673
|
+
# child_jobs.each do |child_job|
|
674
|
+
# script_statistics = child_job.script_statistics
|
675
|
+
# puts script_statistics.evaluation_kind
|
676
|
+
# script_statistics.stack_frames.each do |stack_frame|
|
677
|
+
# puts stack_frame.text
|
678
|
+
# end
|
679
|
+
# end
|
680
|
+
#
|
681
|
+
class ScriptStackFrame
|
682
|
+
attr_reader :start_line
|
683
|
+
attr_reader :start_column
|
684
|
+
attr_reader :end_line
|
685
|
+
attr_reader :end_column
|
686
|
+
attr_reader :text
|
687
|
+
|
688
|
+
##
|
689
|
+
# @private Creates a new ScriptStackFrame instance.
|
690
|
+
def initialize start_line, start_column, end_line, end_column, text
|
691
|
+
@start_line = start_line
|
692
|
+
@start_column = start_column
|
693
|
+
@end_line = end_line
|
694
|
+
@end_column = end_column
|
695
|
+
@text = text
|
696
|
+
end
|
697
|
+
|
698
|
+
##
|
699
|
+
# @private New ScriptStackFrame from a statistics.script_statistics[].stack_frames element.
|
700
|
+
def self.from_gapi gapi
|
701
|
+
new gapi.start_line, gapi.start_column, gapi.end_line, gapi.end_column, gapi.text
|
702
|
+
end
|
703
|
+
end
|
704
|
+
|
426
705
|
protected
|
427
706
|
|
428
707
|
##
|
@@ -431,24 +710,20 @@ module Google
|
|
431
710
|
raise "Must have active connection" unless service
|
432
711
|
end
|
433
712
|
|
434
|
-
def retrieve_table project_id, dataset_id, table_id
|
713
|
+
def retrieve_table project_id, dataset_id, table_id, metadata_view: nil
|
435
714
|
ensure_service!
|
436
|
-
gapi = service.get_project_table project_id, dataset_id, table_id
|
715
|
+
gapi = service.get_project_table project_id, dataset_id, table_id, metadata_view: metadata_view
|
437
716
|
Table.from_gapi gapi, service
|
438
717
|
rescue Google::Cloud::NotFoundError
|
439
718
|
nil
|
440
719
|
end
|
441
720
|
|
442
721
|
def status_code_for_reason reason
|
443
|
-
codes = { "accessDenied" => 403, "backendError" => 500,
|
444
|
-
"
|
445
|
-
"
|
446
|
-
"
|
447
|
-
"
|
448
|
-
"notImplemented" => 501, "quotaExceeded" => 403,
|
449
|
-
"rateLimitExceeded" => 403, "resourceInUse" => 400,
|
450
|
-
"resourcesExceeded" => 400, "responseTooLarge" => 403,
|
451
|
-
"tableUnavailable" => 400 }
|
722
|
+
codes = { "accessDenied" => 403, "backendError" => 500, "billingNotEnabled" => 403,
|
723
|
+
"billingTierLimitExceeded" => 400, "blocked" => 403, "duplicate" => 409, "internalError" => 500,
|
724
|
+
"invalid" => 400, "invalidQuery" => 400, "notFound" => 404, "notImplemented" => 501,
|
725
|
+
"quotaExceeded" => 403, "rateLimitExceeded" => 403, "resourceInUse" => 400,
|
726
|
+
"resourcesExceeded" => 400, "responseTooLarge" => 403, "tableUnavailable" => 400 }
|
452
727
|
codes[reason] || 0
|
453
728
|
end
|
454
729
|
end
|