google-cloud-bigquery 1.14.0 → 1.42.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +17 -54
  3. data/CHANGELOG.md +377 -0
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +1 -1
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +155 -173
  10. data/lib/google/cloud/bigquery/copy_job.rb +74 -26
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
  16. data/lib/google/cloud/bigquery/dataset.rb +1044 -287
  17. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  20. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  21. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  22. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  23. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  24. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  25. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  26. data/lib/google/cloud/bigquery/external.rb +50 -2256
  27. data/lib/google/cloud/bigquery/extract_job.rb +226 -61
  28. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  29. data/lib/google/cloud/bigquery/job/list.rb +10 -14
  30. data/lib/google/cloud/bigquery/job.rb +289 -14
  31. data/lib/google/cloud/bigquery/load_job.rb +810 -136
  32. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  33. data/lib/google/cloud/bigquery/model.rb +247 -16
  34. data/lib/google/cloud/bigquery/policy.rb +432 -0
  35. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  36. data/lib/google/cloud/bigquery/project.rb +509 -250
  37. data/lib/google/cloud/bigquery/query_job.rb +594 -128
  38. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  39. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  40. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  41. data/lib/google/cloud/bigquery/schema.rb +221 -48
  42. data/lib/google/cloud/bigquery/service.rb +204 -112
  43. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  44. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
  45. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  46. data/lib/google/cloud/bigquery/table.rb +1470 -377
  47. data/lib/google/cloud/bigquery/time.rb +6 -0
  48. data/lib/google/cloud/bigquery/version.rb +1 -1
  49. data/lib/google/cloud/bigquery.rb +4 -6
  50. data/lib/google-cloud-bigquery.rb +14 -13
  51. metadata +66 -38
@@ -74,8 +74,8 @@ module Google
74
74
  ##
75
75
  # The ID of the job.
76
76
  #
77
- # @return [String] The ID must contain only letters (a-z, A-Z), numbers
78
- # (0-9), underscores (_), or dashes (-). The maximum length is 1,024
77
+ # @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
78
+ # (`[0-9]`), underscores (`_`), or dashes (`-`). The maximum length is 1,024
79
79
  # characters.
80
80
  #
81
81
  def job_id
@@ -197,6 +197,102 @@ module Google
197
197
  Convert.millis_to_time @gapi.statistics.end_time
198
198
  end
199
199
 
200
+ ##
201
+ # The number of child jobs executed.
202
+ #
203
+ # @return [Integer] The number of child jobs executed.
204
+ #
205
+ def num_child_jobs
206
+ @gapi.statistics.num_child_jobs || 0
207
+ end
208
+
209
+ ##
210
+ # If this is a child job, the id of the parent.
211
+ #
212
+ # @return [String, nil] The ID of the parent job, or `nil` if not a child job.
213
+ #
214
+ def parent_job_id
215
+ @gapi.statistics.parent_job_id
216
+ end
217
+
218
+ ##
219
+ # An array containing the job resource usage breakdown by reservation, if present. Reservation usage statistics
220
+ # are only reported for jobs that are executed within reservations. On-demand jobs do not report this data.
221
+ #
222
+ # @return [Array<Google::Cloud::Bigquery::Job::ReservationUsage>, nil] The reservation usage, if present.
223
+ #
224
+ def reservation_usage
225
+ return nil unless @gapi.statistics.reservation_usage
226
+ Array(@gapi.statistics.reservation_usage).map { |g| ReservationUsage.from_gapi g }
227
+ end
228
+
229
+ ##
230
+ # The ID of the session if this job is part of one. See the `create_session` param in {Project#query_job} and
231
+ # {Dataset#query_job}.
232
+ #
233
+ # @return [String, nil] The session ID, or `nil` if not associated with a session.
234
+ #
235
+ def session_id
236
+ @gapi.statistics.session_info&.session_id
237
+ end
238
+
239
+ ##
240
+ # The ID of a multi-statement transaction.
241
+ #
242
+ # @return [String, nil] The transaction ID, or `nil` if not associated with a transaction.
243
+ #
244
+ def transaction_id
245
+ @gapi.statistics.transaction_info&.transaction_id
246
+ end
247
+
248
+ ##
249
+ # The statistics including stack frames for a child job of a script.
250
+ #
251
+ # @return [Google::Cloud::Bigquery::Job::ScriptStatistics, nil] The script statistics, or `nil` if the job is
252
+ # not a child job.
253
+ #
254
+ # @example
255
+ # require "google/cloud/bigquery"
256
+ #
257
+ # bigquery = Google::Cloud::Bigquery.new
258
+ #
259
+ # multi_statement_sql = <<~SQL
260
+ # -- Declare a variable to hold names as an array.
261
+ # DECLARE top_names ARRAY<STRING>;
262
+ # -- Build an array of the top 100 names from the year 2017.
263
+ # SET top_names = (
264
+ # SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
265
+ # FROM `bigquery-public-data.usa_names.usa_1910_current`
266
+ # WHERE year = 2017
267
+ # );
268
+ # -- Which names appear as words in Shakespeare's plays?
269
+ # SELECT
270
+ # name AS shakespeare_name
271
+ # FROM UNNEST(top_names) AS name
272
+ # WHERE name IN (
273
+ # SELECT word
274
+ # FROM `bigquery-public-data.samples.shakespeare`
275
+ # );
276
+ # SQL
277
+ #
278
+ # job = bigquery.query_job multi_statement_sql
279
+ #
280
+ # job.wait_until_done!
281
+ #
282
+ # child_jobs = bigquery.jobs parent_job: job
283
+ #
284
+ # child_jobs.each do |child_job|
285
+ # script_statistics = child_job.script_statistics
286
+ # puts script_statistics.evaluation_kind
287
+ # script_statistics.stack_frames.each do |stack_frame|
288
+ # puts stack_frame.text
289
+ # end
290
+ # end
291
+ #
292
+ def script_statistics
293
+ ScriptStatistics.from_gapi @gapi.statistics.script_statistics if @gapi.statistics.script_statistics
294
+ end
295
+
200
296
  ##
201
297
  # The configuration for the job. Returns a hash.
202
298
  #
@@ -306,6 +402,28 @@ module Google
306
402
  true
307
403
  end
308
404
 
405
+ ##
406
+ # Requests that a job is deleted. This call will return when the job is deleted.
407
+ #
408
+ # @return [Boolean] Returns `true` if the job was deleted.
409
+ #
410
+ # @example
411
+ # require "google/cloud/bigquery"
412
+ #
413
+ # bigquery = Google::Cloud::Bigquery.new
414
+ #
415
+ # job = bigquery.job "my_job"
416
+ #
417
+ # job.delete
418
+ #
419
+ # @!group Lifecycle
420
+ #
421
+ def delete
422
+ ensure_service!
423
+ service.delete_job job_id, location: location
424
+ true
425
+ end
426
+
309
427
  ##
310
428
  # Created a new job with the current configuration.
311
429
  #
@@ -371,7 +489,7 @@ module Google
371
489
  #
372
490
  def wait_until_done!
373
491
  backoff = lambda do |retries|
374
- delay = [retries**2 + 5, 60].min # Maximum delay is 60
492
+ delay = [(retries**2) + 5, 60].min # Maximum delay is 60
375
493
  sleep delay
376
494
  end
377
495
  retries = 0
@@ -423,6 +541,167 @@ module Google
423
541
  end
424
542
  end
425
543
 
544
+ ##
545
+ # Represents Job resource usage breakdown by reservation.
546
+ #
547
+ # @attr_reader [String] name The reservation name or "unreserved" for on-demand resources usage.
548
+ # @attr_reader [Fixnum] slot_ms The slot-milliseconds the job spent in the given reservation.
549
+ #
550
+ class ReservationUsage
551
+ attr_reader :name
552
+ attr_reader :slot_ms
553
+
554
+ ##
555
+ # @private Creates a new ReservationUsage instance.
556
+ def initialize name, slot_ms
557
+ @name = name
558
+ @slot_ms = slot_ms
559
+ end
560
+
561
+ ##
562
+ # @private New ReservationUsage from a statistics.reservation_usage value.
563
+ def self.from_gapi gapi
564
+ new gapi.name, gapi.slot_ms
565
+ end
566
+ end
567
+
568
+ ##
569
+ # Represents statistics for a child job of a script.
570
+ #
571
+ # @attr_reader [String] evaluation_kind Indicates the type of child job. Possible values include `STATEMENT` and
572
+ # `EXPRESSION`.
573
+ # @attr_reader [Array<Google::Cloud::Bigquery::Job::ScriptStackFrame>] stack_frames Stack trace where the
574
+ # current evaluation happened. Shows line/column/procedure name of each frame on the stack at the point where
575
+ # the current evaluation happened. The leaf frame is first, the primary script is last.
576
+ #
577
+ # @example
578
+ # require "google/cloud/bigquery"
579
+ #
580
+ # bigquery = Google::Cloud::Bigquery.new
581
+ #
582
+ # multi_statement_sql = <<~SQL
583
+ # -- Declare a variable to hold names as an array.
584
+ # DECLARE top_names ARRAY<STRING>;
585
+ # -- Build an array of the top 100 names from the year 2017.
586
+ # SET top_names = (
587
+ # SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
588
+ # FROM `bigquery-public-data.usa_names.usa_1910_current`
589
+ # WHERE year = 2017
590
+ # );
591
+ # -- Which names appear as words in Shakespeare's plays?
592
+ # SELECT
593
+ # name AS shakespeare_name
594
+ # FROM UNNEST(top_names) AS name
595
+ # WHERE name IN (
596
+ # SELECT word
597
+ # FROM `bigquery-public-data.samples.shakespeare`
598
+ # );
599
+ # SQL
600
+ #
601
+ # job = bigquery.query_job multi_statement_sql
602
+ #
603
+ # job.wait_until_done!
604
+ #
605
+ # child_jobs = bigquery.jobs parent_job: job
606
+ #
607
+ # child_jobs.each do |child_job|
608
+ # script_statistics = child_job.script_statistics
609
+ # puts script_statistics.evaluation_kind
610
+ # script_statistics.stack_frames.each do |stack_frame|
611
+ # puts stack_frame.text
612
+ # end
613
+ # end
614
+ #
615
+ class ScriptStatistics
616
+ attr_reader :evaluation_kind
617
+ attr_reader :stack_frames
618
+
619
+ ##
620
+ # @private Creates a new ScriptStatistics instance.
621
+ def initialize evaluation_kind, stack_frames
622
+ @evaluation_kind = evaluation_kind
623
+ @stack_frames = stack_frames
624
+ end
625
+
626
+ ##
627
+ # @private New ScriptStatistics from a statistics.script_statistics value.
628
+ def self.from_gapi gapi
629
+ frames = Array(gapi.stack_frames).map { |g| ScriptStackFrame.from_gapi g }
630
+ new gapi.evaluation_kind, frames
631
+ end
632
+ end
633
+
634
+ ##
635
+ # Represents a stack frame showing the line/column/procedure name where the current evaluation happened.
636
+ #
637
+ # @attr_reader [Integer] start_line One-based start line.
638
+ # @attr_reader [Integer] start_column One-based start column.
639
+ # @attr_reader [Integer] end_line One-based end line.
640
+ # @attr_reader [Integer] end_column One-based end column.
641
+ # @attr_reader [String] text Text of the current statement/expression.
642
+ #
643
+ # @example
644
+ # require "google/cloud/bigquery"
645
+ #
646
+ # bigquery = Google::Cloud::Bigquery.new
647
+ #
648
+ # multi_statement_sql = <<~SQL
649
+ # -- Declare a variable to hold names as an array.
650
+ # DECLARE top_names ARRAY<STRING>;
651
+ # -- Build an array of the top 100 names from the year 2017.
652
+ # SET top_names = (
653
+ # SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
654
+ # FROM `bigquery-public-data.usa_names.usa_1910_current`
655
+ # WHERE year = 2017
656
+ # );
657
+ # -- Which names appear as words in Shakespeare's plays?
658
+ # SELECT
659
+ # name AS shakespeare_name
660
+ # FROM UNNEST(top_names) AS name
661
+ # WHERE name IN (
662
+ # SELECT word
663
+ # FROM `bigquery-public-data.samples.shakespeare`
664
+ # );
665
+ # SQL
666
+ #
667
+ # job = bigquery.query_job multi_statement_sql
668
+ #
669
+ # job.wait_until_done!
670
+ #
671
+ # child_jobs = bigquery.jobs parent_job: job
672
+ #
673
+ # child_jobs.each do |child_job|
674
+ # script_statistics = child_job.script_statistics
675
+ # puts script_statistics.evaluation_kind
676
+ # script_statistics.stack_frames.each do |stack_frame|
677
+ # puts stack_frame.text
678
+ # end
679
+ # end
680
+ #
681
+ class ScriptStackFrame
682
+ attr_reader :start_line
683
+ attr_reader :start_column
684
+ attr_reader :end_line
685
+ attr_reader :end_column
686
+ attr_reader :text
687
+
688
+ ##
689
+ # @private Creates a new ScriptStackFrame instance.
690
+ def initialize start_line, start_column, end_line, end_column, text
691
+ @start_line = start_line
692
+ @start_column = start_column
693
+ @end_line = end_line
694
+ @end_column = end_column
695
+ @text = text
696
+ end
697
+
698
+ ##
699
+ # @private New ScriptStackFrame from a statistics.script_statistics[].stack_frames element.
700
+ def self.from_gapi gapi
701
+ new gapi.start_line, gapi.start_column, gapi.end_line, gapi.end_column, gapi.text
702
+ end
703
+ end
704
+
426
705
  protected
427
706
 
428
707
  ##
@@ -431,24 +710,20 @@ module Google
431
710
  raise "Must have active connection" unless service
432
711
  end
433
712
 
434
- def retrieve_table project_id, dataset_id, table_id
713
+ def retrieve_table project_id, dataset_id, table_id, metadata_view: nil
435
714
  ensure_service!
436
- gapi = service.get_project_table project_id, dataset_id, table_id
715
+ gapi = service.get_project_table project_id, dataset_id, table_id, metadata_view: metadata_view
437
716
  Table.from_gapi gapi, service
438
717
  rescue Google::Cloud::NotFoundError
439
718
  nil
440
719
  end
441
720
 
442
721
  def status_code_for_reason reason
443
- codes = { "accessDenied" => 403, "backendError" => 500,
444
- "billingNotEnabled" => 403,
445
- "billingTierLimitExceeded" => 400, "blocked" => 403,
446
- "duplicate" => 409, "internalError" => 500,
447
- "invalid" => 400, "invalidQuery" => 400, "notFound" => 404,
448
- "notImplemented" => 501, "quotaExceeded" => 403,
449
- "rateLimitExceeded" => 403, "resourceInUse" => 400,
450
- "resourcesExceeded" => 400, "responseTooLarge" => 403,
451
- "tableUnavailable" => 400 }
722
+ codes = { "accessDenied" => 403, "backendError" => 500, "billingNotEnabled" => 403,
723
+ "billingTierLimitExceeded" => 400, "blocked" => 403, "duplicate" => 409, "internalError" => 500,
724
+ "invalid" => 400, "invalidQuery" => 400, "notFound" => 404, "notImplemented" => 501,
725
+ "quotaExceeded" => 403, "rateLimitExceeded" => 403, "resourceInUse" => 400,
726
+ "resourcesExceeded" => 400, "responseTooLarge" => 403, "tableUnavailable" => 400 }
452
727
  codes[reason] || 0
453
728
  end
454
729
  end