google-cloud-bigquery 1.14.0 → 1.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +17 -54
  3. data/CHANGELOG.md +377 -0
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +1 -1
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +155 -173
  10. data/lib/google/cloud/bigquery/copy_job.rb +74 -26
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
  16. data/lib/google/cloud/bigquery/dataset.rb +1044 -287
  17. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  20. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  21. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  22. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  23. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  24. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  25. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  26. data/lib/google/cloud/bigquery/external.rb +50 -2256
  27. data/lib/google/cloud/bigquery/extract_job.rb +226 -61
  28. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  29. data/lib/google/cloud/bigquery/job/list.rb +10 -14
  30. data/lib/google/cloud/bigquery/job.rb +289 -14
  31. data/lib/google/cloud/bigquery/load_job.rb +810 -136
  32. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  33. data/lib/google/cloud/bigquery/model.rb +247 -16
  34. data/lib/google/cloud/bigquery/policy.rb +432 -0
  35. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  36. data/lib/google/cloud/bigquery/project.rb +509 -250
  37. data/lib/google/cloud/bigquery/query_job.rb +594 -128
  38. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  39. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  40. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  41. data/lib/google/cloud/bigquery/schema.rb +221 -48
  42. data/lib/google/cloud/bigquery/service.rb +204 -112
  43. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  44. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
  45. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  46. data/lib/google/cloud/bigquery/table.rb +1470 -377
  47. data/lib/google/cloud/bigquery/time.rb +6 -0
  48. data/lib/google/cloud/bigquery/version.rb +1 -1
  49. data/lib/google/cloud/bigquery.rb +4 -6
  50. data/lib/google-cloud-bigquery.rb +14 -13
  51. metadata +66 -38
@@ -74,8 +74,8 @@ module Google
74
74
  ##
75
75
  # The ID of the job.
76
76
  #
77
- # @return [String] The ID must contain only letters (a-z, A-Z), numbers
78
- # (0-9), underscores (_), or dashes (-). The maximum length is 1,024
77
+ # @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
78
+ # (`[0-9]`), underscores (`_`), or dashes (`-`). The maximum length is 1,024
79
79
  # characters.
80
80
  #
81
81
  def job_id
@@ -197,6 +197,102 @@ module Google
197
197
  Convert.millis_to_time @gapi.statistics.end_time
198
198
  end
199
199
 
200
+ ##
201
+ # The number of child jobs executed.
202
+ #
203
+ # @return [Integer] The number of child jobs executed.
204
+ #
205
+ def num_child_jobs
206
+ @gapi.statistics.num_child_jobs || 0
207
+ end
208
+
209
+ ##
210
+ # If this is a child job, the id of the parent.
211
+ #
212
+ # @return [String, nil] The ID of the parent job, or `nil` if not a child job.
213
+ #
214
+ def parent_job_id
215
+ @gapi.statistics.parent_job_id
216
+ end
217
+
218
+ ##
219
+ # An array containing the job resource usage breakdown by reservation, if present. Reservation usage statistics
220
+ # are only reported for jobs that are executed within reservations. On-demand jobs do not report this data.
221
+ #
222
+ # @return [Array<Google::Cloud::Bigquery::Job::ReservationUsage>, nil] The reservation usage, if present.
223
+ #
224
+ def reservation_usage
225
+ return nil unless @gapi.statistics.reservation_usage
226
+ Array(@gapi.statistics.reservation_usage).map { |g| ReservationUsage.from_gapi g }
227
+ end
228
+
229
+ ##
230
+ # The ID of the session if this job is part of one. See the `create_session` param in {Project#query_job} and
231
+ # {Dataset#query_job}.
232
+ #
233
+ # @return [String, nil] The session ID, or `nil` if not associated with a session.
234
+ #
235
+ def session_id
236
+ @gapi.statistics.session_info&.session_id
237
+ end
238
+
239
+ ##
240
+ # The ID of a multi-statement transaction.
241
+ #
242
+ # @return [String, nil] The transaction ID, or `nil` if not associated with a transaction.
243
+ #
244
+ def transaction_id
245
+ @gapi.statistics.transaction_info&.transaction_id
246
+ end
247
+
248
+ ##
249
+ # The statistics including stack frames for a child job of a script.
250
+ #
251
+ # @return [Google::Cloud::Bigquery::Job::ScriptStatistics, nil] The script statistics, or `nil` if the job is
252
+ # not a child job.
253
+ #
254
+ # @example
255
+ # require "google/cloud/bigquery"
256
+ #
257
+ # bigquery = Google::Cloud::Bigquery.new
258
+ #
259
+ # multi_statement_sql = <<~SQL
260
+ # -- Declare a variable to hold names as an array.
261
+ # DECLARE top_names ARRAY<STRING>;
262
+ # -- Build an array of the top 100 names from the year 2017.
263
+ # SET top_names = (
264
+ # SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
265
+ # FROM `bigquery-public-data.usa_names.usa_1910_current`
266
+ # WHERE year = 2017
267
+ # );
268
+ # -- Which names appear as words in Shakespeare's plays?
269
+ # SELECT
270
+ # name AS shakespeare_name
271
+ # FROM UNNEST(top_names) AS name
272
+ # WHERE name IN (
273
+ # SELECT word
274
+ # FROM `bigquery-public-data.samples.shakespeare`
275
+ # );
276
+ # SQL
277
+ #
278
+ # job = bigquery.query_job multi_statement_sql
279
+ #
280
+ # job.wait_until_done!
281
+ #
282
+ # child_jobs = bigquery.jobs parent_job: job
283
+ #
284
+ # child_jobs.each do |child_job|
285
+ # script_statistics = child_job.script_statistics
286
+ # puts script_statistics.evaluation_kind
287
+ # script_statistics.stack_frames.each do |stack_frame|
288
+ # puts stack_frame.text
289
+ # end
290
+ # end
291
+ #
292
+ def script_statistics
293
+ ScriptStatistics.from_gapi @gapi.statistics.script_statistics if @gapi.statistics.script_statistics
294
+ end
295
+
200
296
  ##
201
297
  # The configuration for the job. Returns a hash.
202
298
  #
@@ -306,6 +402,28 @@ module Google
306
402
  true
307
403
  end
308
404
 
405
+ ##
406
+ # Requests that a job is deleted. This call will return when the job is deleted.
407
+ #
408
+ # @return [Boolean] Returns `true` if the job was deleted.
409
+ #
410
+ # @example
411
+ # require "google/cloud/bigquery"
412
+ #
413
+ # bigquery = Google::Cloud::Bigquery.new
414
+ #
415
+ # job = bigquery.job "my_job"
416
+ #
417
+ # job.delete
418
+ #
419
+ # @!group Lifecycle
420
+ #
421
+ def delete
422
+ ensure_service!
423
+ service.delete_job job_id, location: location
424
+ true
425
+ end
426
+
309
427
  ##
310
428
  # Created a new job with the current configuration.
311
429
  #
@@ -371,7 +489,7 @@ module Google
371
489
  #
372
490
  def wait_until_done!
373
491
  backoff = lambda do |retries|
374
- delay = [retries**2 + 5, 60].min # Maximum delay is 60
492
+ delay = [(retries**2) + 5, 60].min # Maximum delay is 60
375
493
  sleep delay
376
494
  end
377
495
  retries = 0
@@ -423,6 +541,167 @@ module Google
423
541
  end
424
542
  end
425
543
 
544
+ ##
545
+ # Represents Job resource usage breakdown by reservation.
546
+ #
547
+ # @attr_reader [String] name The reservation name or "unreserved" for on-demand resources usage.
548
+ # @attr_reader [Fixnum] slot_ms The slot-milliseconds the job spent in the given reservation.
549
+ #
550
+ class ReservationUsage
551
+ attr_reader :name
552
+ attr_reader :slot_ms
553
+
554
+ ##
555
+ # @private Creates a new ReservationUsage instance.
556
+ def initialize name, slot_ms
557
+ @name = name
558
+ @slot_ms = slot_ms
559
+ end
560
+
561
+ ##
562
+ # @private New ReservationUsage from a statistics.reservation_usage value.
563
+ def self.from_gapi gapi
564
+ new gapi.name, gapi.slot_ms
565
+ end
566
+ end
567
+
568
+ ##
569
+ # Represents statistics for a child job of a script.
570
+ #
571
+ # @attr_reader [String] evaluation_kind Indicates the type of child job. Possible values include `STATEMENT` and
572
+ # `EXPRESSION`.
573
+ # @attr_reader [Array<Google::Cloud::Bigquery::Job::ScriptStackFrame>] stack_frames Stack trace where the
574
+ # current evaluation happened. Shows line/column/procedure name of each frame on the stack at the point where
575
+ # the current evaluation happened. The leaf frame is first, the primary script is last.
576
+ #
577
+ # @example
578
+ # require "google/cloud/bigquery"
579
+ #
580
+ # bigquery = Google::Cloud::Bigquery.new
581
+ #
582
+ # multi_statement_sql = <<~SQL
583
+ # -- Declare a variable to hold names as an array.
584
+ # DECLARE top_names ARRAY<STRING>;
585
+ # -- Build an array of the top 100 names from the year 2017.
586
+ # SET top_names = (
587
+ # SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
588
+ # FROM `bigquery-public-data.usa_names.usa_1910_current`
589
+ # WHERE year = 2017
590
+ # );
591
+ # -- Which names appear as words in Shakespeare's plays?
592
+ # SELECT
593
+ # name AS shakespeare_name
594
+ # FROM UNNEST(top_names) AS name
595
+ # WHERE name IN (
596
+ # SELECT word
597
+ # FROM `bigquery-public-data.samples.shakespeare`
598
+ # );
599
+ # SQL
600
+ #
601
+ # job = bigquery.query_job multi_statement_sql
602
+ #
603
+ # job.wait_until_done!
604
+ #
605
+ # child_jobs = bigquery.jobs parent_job: job
606
+ #
607
+ # child_jobs.each do |child_job|
608
+ # script_statistics = child_job.script_statistics
609
+ # puts script_statistics.evaluation_kind
610
+ # script_statistics.stack_frames.each do |stack_frame|
611
+ # puts stack_frame.text
612
+ # end
613
+ # end
614
+ #
615
+ class ScriptStatistics
616
+ attr_reader :evaluation_kind
617
+ attr_reader :stack_frames
618
+
619
+ ##
620
+ # @private Creates a new ScriptStatistics instance.
621
+ def initialize evaluation_kind, stack_frames
622
+ @evaluation_kind = evaluation_kind
623
+ @stack_frames = stack_frames
624
+ end
625
+
626
+ ##
627
+ # @private New ScriptStatistics from a statistics.script_statistics value.
628
+ def self.from_gapi gapi
629
+ frames = Array(gapi.stack_frames).map { |g| ScriptStackFrame.from_gapi g }
630
+ new gapi.evaluation_kind, frames
631
+ end
632
+ end
633
+
634
+ ##
635
+ # Represents a stack frame showing the line/column/procedure name where the current evaluation happened.
636
+ #
637
+ # @attr_reader [Integer] start_line One-based start line.
638
+ # @attr_reader [Integer] start_column One-based start column.
639
+ # @attr_reader [Integer] end_line One-based end line.
640
+ # @attr_reader [Integer] end_column One-based end column.
641
+ # @attr_reader [String] text Text of the current statement/expression.
642
+ #
643
+ # @example
644
+ # require "google/cloud/bigquery"
645
+ #
646
+ # bigquery = Google::Cloud::Bigquery.new
647
+ #
648
+ # multi_statement_sql = <<~SQL
649
+ # -- Declare a variable to hold names as an array.
650
+ # DECLARE top_names ARRAY<STRING>;
651
+ # -- Build an array of the top 100 names from the year 2017.
652
+ # SET top_names = (
653
+ # SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
654
+ # FROM `bigquery-public-data.usa_names.usa_1910_current`
655
+ # WHERE year = 2017
656
+ # );
657
+ # -- Which names appear as words in Shakespeare's plays?
658
+ # SELECT
659
+ # name AS shakespeare_name
660
+ # FROM UNNEST(top_names) AS name
661
+ # WHERE name IN (
662
+ # SELECT word
663
+ # FROM `bigquery-public-data.samples.shakespeare`
664
+ # );
665
+ # SQL
666
+ #
667
+ # job = bigquery.query_job multi_statement_sql
668
+ #
669
+ # job.wait_until_done!
670
+ #
671
+ # child_jobs = bigquery.jobs parent_job: job
672
+ #
673
+ # child_jobs.each do |child_job|
674
+ # script_statistics = child_job.script_statistics
675
+ # puts script_statistics.evaluation_kind
676
+ # script_statistics.stack_frames.each do |stack_frame|
677
+ # puts stack_frame.text
678
+ # end
679
+ # end
680
+ #
681
+ class ScriptStackFrame
682
+ attr_reader :start_line
683
+ attr_reader :start_column
684
+ attr_reader :end_line
685
+ attr_reader :end_column
686
+ attr_reader :text
687
+
688
+ ##
689
+ # @private Creates a new ScriptStackFrame instance.
690
+ def initialize start_line, start_column, end_line, end_column, text
691
+ @start_line = start_line
692
+ @start_column = start_column
693
+ @end_line = end_line
694
+ @end_column = end_column
695
+ @text = text
696
+ end
697
+
698
+ ##
699
+ # @private New ScriptStackFrame from a statistics.script_statistics[].stack_frames element.
700
+ def self.from_gapi gapi
701
+ new gapi.start_line, gapi.start_column, gapi.end_line, gapi.end_column, gapi.text
702
+ end
703
+ end
704
+
426
705
  protected
427
706
 
428
707
  ##
@@ -431,24 +710,20 @@ module Google
431
710
  raise "Must have active connection" unless service
432
711
  end
433
712
 
434
- def retrieve_table project_id, dataset_id, table_id
713
+ def retrieve_table project_id, dataset_id, table_id, metadata_view: nil
435
714
  ensure_service!
436
- gapi = service.get_project_table project_id, dataset_id, table_id
715
+ gapi = service.get_project_table project_id, dataset_id, table_id, metadata_view: metadata_view
437
716
  Table.from_gapi gapi, service
438
717
  rescue Google::Cloud::NotFoundError
439
718
  nil
440
719
  end
441
720
 
442
721
  def status_code_for_reason reason
443
- codes = { "accessDenied" => 403, "backendError" => 500,
444
- "billingNotEnabled" => 403,
445
- "billingTierLimitExceeded" => 400, "blocked" => 403,
446
- "duplicate" => 409, "internalError" => 500,
447
- "invalid" => 400, "invalidQuery" => 400, "notFound" => 404,
448
- "notImplemented" => 501, "quotaExceeded" => 403,
449
- "rateLimitExceeded" => 403, "resourceInUse" => 400,
450
- "resourcesExceeded" => 400, "responseTooLarge" => 403,
451
- "tableUnavailable" => 400 }
722
+ codes = { "accessDenied" => 403, "backendError" => 500, "billingNotEnabled" => 403,
723
+ "billingTierLimitExceeded" => 400, "blocked" => 403, "duplicate" => 409, "internalError" => 500,
724
+ "invalid" => 400, "invalidQuery" => 400, "notFound" => 404, "notImplemented" => 501,
725
+ "quotaExceeded" => 403, "rateLimitExceeded" => 403, "resourceInUse" => 400,
726
+ "resourcesExceeded" => 400, "responseTooLarge" => 403, "tableUnavailable" => 400 }
452
727
  codes[reason] || 0
453
728
  end
454
729
  end