google-cloud-bigquery 1.14.0 → 1.42.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +17 -54
  3. data/CHANGELOG.md +377 -0
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +1 -1
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +155 -173
  10. data/lib/google/cloud/bigquery/copy_job.rb +74 -26
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
  16. data/lib/google/cloud/bigquery/dataset.rb +1044 -287
  17. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  20. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  21. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  22. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  23. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  24. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  25. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  26. data/lib/google/cloud/bigquery/external.rb +50 -2256
  27. data/lib/google/cloud/bigquery/extract_job.rb +226 -61
  28. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  29. data/lib/google/cloud/bigquery/job/list.rb +10 -14
  30. data/lib/google/cloud/bigquery/job.rb +289 -14
  31. data/lib/google/cloud/bigquery/load_job.rb +810 -136
  32. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  33. data/lib/google/cloud/bigquery/model.rb +247 -16
  34. data/lib/google/cloud/bigquery/policy.rb +432 -0
  35. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  36. data/lib/google/cloud/bigquery/project.rb +509 -250
  37. data/lib/google/cloud/bigquery/query_job.rb +594 -128
  38. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  39. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  40. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  41. data/lib/google/cloud/bigquery/schema.rb +221 -48
  42. data/lib/google/cloud/bigquery/service.rb +204 -112
  43. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  44. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
  45. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  46. data/lib/google/cloud/bigquery/table.rb +1470 -377
  47. data/lib/google/cloud/bigquery/time.rb +6 -0
  48. data/lib/google/cloud/bigquery/version.rb +1 -1
  49. data/lib/google/cloud/bigquery.rb +4 -6
  50. data/lib/google-cloud-bigquery.rb +14 -13
  51. metadata +66 -38
@@ -18,9 +18,11 @@ require "google/cloud/errors"
18
18
  require "google/cloud/bigquery/service"
19
19
  require "google/cloud/bigquery/table"
20
20
  require "google/cloud/bigquery/model"
21
+ require "google/cloud/bigquery/routine"
21
22
  require "google/cloud/bigquery/external"
22
23
  require "google/cloud/bigquery/dataset/list"
23
24
  require "google/cloud/bigquery/dataset/access"
25
+ require "google/cloud/bigquery/dataset/tag"
24
26
  require "google/cloud/bigquery/convert"
25
27
  require "google/apis/bigquery_v2"
26
28
 
@@ -68,8 +70,8 @@ module Google
68
70
  ##
69
71
  # A unique ID for this dataset, without the project name.
70
72
  #
71
- # @return [String] The ID must contain only letters (a-z, A-Z), numbers
72
- # (0-9), or underscores (_). The maximum length is 1,024 characters.
73
+ # @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
74
+ # (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
73
75
  #
74
76
  # @!group Attributes
75
77
  #
@@ -312,12 +314,19 @@ module Google
312
314
  # @param [Hash<String, String>] labels A hash containing key/value
313
315
  # pairs.
314
316
  #
315
- # * Label keys and values can be no longer than 63 characters.
316
- # * Label keys and values can contain only lowercase letters, numbers,
317
- # underscores, hyphens, and international characters.
318
- # * Label keys and values cannot exceed 128 bytes in size.
319
- # * Label keys must begin with a letter.
320
- # * Label keys must be unique within a dataset.
317
+ # The labels applied to a resource must meet the following requirements:
318
+ #
319
+ # * Each resource can have multiple labels, up to a maximum of 64.
320
+ # * Each label must be a key-value pair.
321
+ # * Keys have a minimum length of 1 character and a maximum length of
322
+ # 63 characters, and cannot be empty. Values can be empty, and have
323
+ # a maximum length of 63 characters.
324
+ # * Keys and values can contain only lowercase letters, numeric characters,
325
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
326
+ # international characters are allowed.
327
+ # * The key portion of a label must be unique. However, you can use the
328
+ # same key with multiple resources.
329
+ # * Keys must start with a lowercase letter or international character.
321
330
  #
322
331
  # @example
323
332
  # require "google/cloud/bigquery"
@@ -335,6 +344,75 @@ module Google
335
344
  patch_gapi! :labels
336
345
  end
337
346
 
347
+ ##
348
+ # The {EncryptionConfiguration} object that represents the default
349
+ # encryption method for all tables and models in the dataset. Once this
350
+ # property is set, all newly-created partitioned tables and models in
351
+ # the dataset will have their encryption set to this value, unless table
352
+ # creation request (or query) overrides it.
353
+ #
354
+ # Present only if this dataset is using custom default encryption.
355
+ #
356
+ # @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
357
+ # Protecting Data with Cloud KMS Keys
358
+ #
359
+ # @return [EncryptionConfiguration, nil] The default encryption
360
+ # configuration.
361
+ #
362
+ # @!group Attributes
363
+ #
364
+ # @example
365
+ # require "google/cloud/bigquery"
366
+ #
367
+ # bigquery = Google::Cloud::Bigquery.new
368
+ # dataset = bigquery.dataset "my_dataset"
369
+ #
370
+ # encrypt_config = dataset.default_encryption
371
+ #
372
+ # @!group Attributes
373
+ #
374
+ def default_encryption
375
+ return nil if reference?
376
+ ensure_full_data!
377
+ return nil if @gapi.default_encryption_configuration.nil?
378
+ EncryptionConfiguration.from_gapi(@gapi.default_encryption_configuration).freeze
379
+ end
380
+
381
+ ##
382
+ # Set the {EncryptionConfiguration} object that represents the default
383
+ # encryption method for all tables and models in the dataset. Once this
384
+ # property is set, all newly-created partitioned tables and models in
385
+ # the dataset will have their encryption set to this value, unless table
386
+ # creation request (or query) overrides it.
387
+ #
388
+ # If the dataset is not a full resource representation (see
389
+ # {#resource_full?}), the full representation will be retrieved before
390
+ # the update to comply with ETag-based optimistic concurrency control.
391
+ #
392
+ # @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
393
+ # Protecting Data with Cloud KMS Keys
394
+ #
395
+ # @param [EncryptionConfiguration] value The new encryption config.
396
+ #
397
+ # @example
398
+ # require "google/cloud/bigquery"
399
+ #
400
+ # bigquery = Google::Cloud::Bigquery.new
401
+ # dataset = bigquery.dataset "my_dataset"
402
+ #
403
+ # key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
404
+ # encrypt_config = bigquery.encryption kms_key: key_name
405
+ #
406
+ # dataset.default_encryption = encrypt_config
407
+ #
408
+ # @!group Attributes
409
+ #
410
+ def default_encryption= value
411
+ ensure_full_data!
412
+ @gapi.default_encryption_configuration = value.to_gapi
413
+ patch_gapi! :default_encryption_configuration
414
+ end
415
+
338
416
  ##
339
417
  # Retrieves the access rules for a Dataset. The rules can be updated
340
418
  # when passing a block, see {Dataset::Access} for all the methods
@@ -389,6 +467,21 @@ module Google
389
467
  access_builder.freeze
390
468
  end
391
469
 
470
+ ##
471
+ # Retrieves the tags associated with this dataset. Tag keys are
472
+ # globally unique, and managed via the resource manager API.
473
+ #
474
+ # @see https://cloud.google.com/resource-manager/docs/tags/tags-overview
475
+ # for more information.
476
+ #
477
+ # @return [Google::Cloud::Bigquery::Dataset::Tag] The list of tags.
478
+ #
479
+ def tags
480
+ ensure_full_data!
481
+ return nil if @gapi.tags.nil?
482
+ @gapi.tags.map { |gapi| Tag.from_gapi(gapi) }
483
+ end
484
+
392
485
  ##
393
486
  # Permanently deletes the dataset. The dataset must be empty before it
394
487
  # can be deleted unless the `force` option is set to `true`.
@@ -424,7 +517,7 @@ module Google
424
517
  # you can pass the table's schema as a hash (see example.)
425
518
  #
426
519
  # @param [String] table_id The ID of the table. The ID must contain only
427
- # letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum
520
+ # letters (`[A-Za-z]`), numbers (`[0-9]`), or underscores (`_`). The maximum
428
521
  # length is 1,024 characters.
429
522
  # @param [String] name A descriptive name for the table.
430
523
  # @param [String] description A user-friendly description of the table.
@@ -485,6 +578,40 @@ module Google
485
578
  # end
486
579
  # end
487
580
  #
581
+ # @example With time partitioning and clustering.
582
+ # require "google/cloud/bigquery"
583
+ #
584
+ # bigquery = Google::Cloud::Bigquery.new
585
+ # dataset = bigquery.dataset "my_dataset"
586
+ #
587
+ # table = dataset.create_table "my_table" do |t|
588
+ # t.schema do |schema|
589
+ # schema.timestamp "dob", mode: :required
590
+ # schema.string "first_name", mode: :required
591
+ # schema.string "last_name", mode: :required
592
+ # end
593
+ # t.time_partitioning_type = "DAY"
594
+ # t.time_partitioning_field = "dob"
595
+ # t.clustering_fields = ["last_name", "first_name"]
596
+ # end
597
+ #
598
+ # @example With range partitioning.
599
+ # require "google/cloud/bigquery"
600
+ #
601
+ # bigquery = Google::Cloud::Bigquery.new
602
+ # dataset = bigquery.dataset "my_dataset"
603
+ #
604
+ # table = dataset.create_table "my_table" do |t|
605
+ # t.schema do |schema|
606
+ # schema.integer "my_table_id", mode: :required
607
+ # schema.string "my_table_data", mode: :required
608
+ # end
609
+ # t.range_partitioning_field = "my_table_id"
610
+ # t.range_partitioning_start = 0
611
+ # t.range_partitioning_interval = 10
612
+ # t.range_partitioning_end = 100
613
+ # end
614
+ #
488
615
  # @!group Table
489
616
  #
490
617
  def create_table table_id, name: nil, description: nil
@@ -507,17 +634,19 @@ module Google
507
634
  end
508
635
 
509
636
  ##
510
- # Creates a new [view](https://cloud.google.com/bigquery/docs/views)
511
- # table, which is a virtual table defined by the given SQL query.
637
+ # Creates a new view, which is a virtual table defined by the given SQL query.
512
638
  #
513
- # BigQuery's views are logical views, not materialized views, which
514
- # means that the query that defines the view is re-executed every time
515
- # the view is queried. Queries are billed according to the total amount
639
+ # With BigQuery's logical views, the query that defines the view is re-executed
640
+ # every time the view is queried. Queries are billed according to the total amount
516
641
  # of data in all table fields referenced directly or indirectly by the
517
642
  # top-level query. (See {Table#view?} and {Table#query}.)
518
643
  #
644
+ # For materialized views, see {#create_materialized_view}.
645
+ #
646
+ # @see https://cloud.google.com/bigquery/docs/views Creating views
647
+ #
519
648
  # @param [String] table_id The ID of the view table. The ID must contain
520
- # only letters (a-z, A-Z), numbers (0-9), or underscores (_). The
649
+ # only letters (`[A-Za-z]`), numbers (`[0-9]`), or underscores (`_`). The
521
650
  # maximum length is 1,024 characters.
522
651
  # @param [String] query The query that BigQuery executes when the view
523
652
  # is referenced.
@@ -532,12 +661,20 @@ module Google
532
661
  # SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
533
662
  # dialect. Optional. The default value is false.
534
663
  # @param [Array<String>, String] udfs User-defined function resources
535
- # used in the query. May be either a code resource to load from a
536
- # Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
664
+ # used in a legacy SQL query. May be either a code resource to load from
665
+ # a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
537
666
  # that contains code for a user-defined function (UDF). Providing an
538
667
  # inline code resource is equivalent to providing a URI for a file
539
- # containing the same code. See [User-Defined
540
- # Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
668
+ # containing the same code.
669
+ #
670
+ # This parameter is used for defining User Defined Function (UDF)
671
+ # resources only when using legacy SQL. Users of standard SQL should
672
+ # leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
673
+ # Routines API to define UDF resources.
674
+ #
675
+ # For additional information on migrating, see: [Migrating to
676
+ # standard SQL - Differences in user-defined JavaScript
677
+ # functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
541
678
  #
542
679
  # @return [Google::Cloud::Bigquery::Table] A new table object.
543
680
  #
@@ -548,7 +685,7 @@ module Google
548
685
  # dataset = bigquery.dataset "my_dataset"
549
686
  #
550
687
  # view = dataset.create_view "my_view",
551
- # "SELECT name, age FROM proj.dataset.users"
688
+ # "SELECT name, age FROM proj.dataset.users"
552
689
  #
553
690
  # @example A name and description can be provided:
554
691
  # require "google/cloud/bigquery"
@@ -557,13 +694,18 @@ module Google
557
694
  # dataset = bigquery.dataset "my_dataset"
558
695
  #
559
696
  # view = dataset.create_view "my_view",
560
- # "SELECT name, age FROM proj.dataset.users",
561
- # name: "My View", description: "This is my view"
697
+ # "SELECT name, age FROM proj.dataset.users",
698
+ # name: "My View", description: "This is my view"
562
699
  #
563
700
  # @!group Table
564
701
  #
565
- def create_view table_id, query, name: nil, description: nil,
566
- standard_sql: nil, legacy_sql: nil, udfs: nil
702
+ def create_view table_id,
703
+ query,
704
+ name: nil,
705
+ description: nil,
706
+ standard_sql: nil,
707
+ legacy_sql: nil,
708
+ udfs: nil
567
709
  use_legacy_sql = Convert.resolve_legacy_sql standard_sql, legacy_sql
568
710
  new_view_opts = {
569
711
  table_reference: Google::Apis::BigqueryV2::TableReference.new(
@@ -579,7 +721,81 @@ module Google
579
721
  user_defined_function_resources: udfs_gapi(udfs)
580
722
  )
581
723
  }.delete_if { |_, v| v.nil? }
582
- new_view = Google::Apis::BigqueryV2::Table.new new_view_opts
724
+ new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
725
+
726
+ gapi = service.insert_table dataset_id, new_view
727
+ Table.from_gapi gapi, service
728
+ end
729
+
730
+ ##
731
+ # Creates a new materialized view.
732
+ #
733
+ # Materialized views are precomputed views that periodically cache results of a query for increased performance
734
+ # and efficiency. BigQuery leverages precomputed results from materialized views and whenever possible reads
735
+ # only delta changes from the base table to compute up-to-date results.
736
+ #
737
+ # Queries that use materialized views are generally faster and consume less resources than queries that retrieve
738
+ # the same data only from the base table. Materialized views are helpful to significantly boost performance of
739
+ # workloads that have the characteristic of common and repeated queries.
740
+ #
741
+ # For logical views, see {#create_view}.
742
+ #
743
+ # @see https://cloud.google.com/bigquery/docs/materialized-views-intro Introduction to materialized views
744
+ #
745
+ # @param [String] table_id The ID of the materialized view table. The ID must contain only letters (`[A-Za-z]`),
746
+ # numbers (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
747
+ # @param [String] query The query that BigQuery executes when the materialized view is referenced.
748
+ # @param [String] name A descriptive name for the table.
749
+ # @param [String] description A user-friendly description of the table.
750
+ # @param [Boolean] enable_refresh Enable automatic refresh of the materialized view when the base table is
751
+ # updated. Optional. The default value is true.
752
+ # @param [Integer] refresh_interval_ms The maximum frequency in milliseconds at which this materialized view
753
+ # will be refreshed. Optional. The default value is `1_800_000` (30 minutes).
754
+ #
755
+ # @return [Google::Cloud::Bigquery::Table] A new table object.
756
+ #
757
+ # @example
758
+ # require "google/cloud/bigquery"
759
+ #
760
+ # bigquery = Google::Cloud::Bigquery.new
761
+ # dataset = bigquery.dataset "my_dataset"
762
+ #
763
+ # materialized_view = dataset.create_materialized_view "my_materialized_view",
764
+ # "SELECT name, age FROM proj.dataset.users"
765
+ #
766
+ # @example Automatic refresh can be disabled:
767
+ # require "google/cloud/bigquery"
768
+ #
769
+ # bigquery = Google::Cloud::Bigquery.new
770
+ # dataset = bigquery.dataset "my_dataset"
771
+ #
772
+ # materialized_view = dataset.create_materialized_view "my_materialized_view",
773
+ # "SELECT name, age FROM proj.dataset.users",
774
+ # enable_refresh: false
775
+ #
776
+ # @!group Table
777
+ #
778
+ def create_materialized_view table_id,
779
+ query,
780
+ name: nil,
781
+ description: nil,
782
+ enable_refresh: nil,
783
+ refresh_interval_ms: nil
784
+ new_view_opts = {
785
+ table_reference: Google::Apis::BigqueryV2::TableReference.new(
786
+ project_id: project_id,
787
+ dataset_id: dataset_id,
788
+ table_id: table_id
789
+ ),
790
+ friendly_name: name,
791
+ description: description,
792
+ materialized_view: Google::Apis::BigqueryV2::MaterializedViewDefinition.new(
793
+ enable_refresh: enable_refresh,
794
+ query: query,
795
+ refresh_interval_ms: refresh_interval_ms
796
+ )
797
+ }.delete_if { |_, v| v.nil? }
798
+ new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
583
799
 
584
800
  gapi = service.insert_table dataset_id, new_view
585
801
  Table.from_gapi gapi, service
@@ -593,6 +809,11 @@ module Google
593
809
  # object without verifying that the resource exists on the BigQuery
594
810
  # service. Calls made on this object will raise errors if the resource
595
811
  # does not exist. Default is `false`. Optional.
812
+ # @param [String] view Specifies the view that determines which table information is returned.
813
+ # By default, basic table information and storage statistics (STORAGE_STATS) are returned.
814
+ # Accepted values include `:unspecified`, `:basic`, `:storage`, and
815
+ # `:full`. For more information, see [BigQuery Classes](@todo: Update the link).
816
+ # The default value is the `:unspecified` view type.
596
817
  #
597
818
  # @return [Google::Cloud::Bigquery::Table, nil] Returns `nil` if the
598
819
  # table does not exist.
@@ -615,15 +836,22 @@ module Google
615
836
  #
616
837
  # table = dataset.table "my_table", skip_lookup: true
617
838
  #
839
+ # @example Avoid retrieving transient stats of the table with `view`:
840
+ # require "google/cloud/bigquery"
841
+ #
842
+ # bigquery = Google::Cloud::Bigquery.new
843
+ #
844
+ # dataset = bigquery.dataset "my_dataset"
845
+ #
846
+ # table = dataset.table "my_table", view: "basic"
847
+ #
618
848
  # @!group Table
619
849
  #
620
- def table table_id, skip_lookup: nil
850
+ def table table_id, skip_lookup: nil, view: nil
621
851
  ensure_service!
622
- if skip_lookup
623
- return Table.new_reference project_id, dataset_id, table_id, service
624
- end
625
- gapi = service.get_table dataset_id, table_id
626
- Table.from_gapi gapi, service
852
+ return Table.new_reference project_id, dataset_id, table_id, service if skip_lookup
853
+ gapi = service.get_table dataset_id, table_id, metadata_view: view
854
+ Table.from_gapi gapi, service, metadata_view: view
627
855
  rescue Google::Cloud::NotFoundError
628
856
  nil
629
857
  end
@@ -664,8 +892,7 @@ module Google
664
892
  #
665
893
  def tables token: nil, max: nil
666
894
  ensure_service!
667
- options = { token: token, max: max }
668
- gapi = service.list_tables dataset_id, options
895
+ gapi = service.list_tables dataset_id, token: token, max: max
669
896
  Table::List.from_gapi gapi, service, dataset_id, max
670
897
  end
671
898
 
@@ -703,9 +930,7 @@ module Google
703
930
  #
704
931
  def model model_id, skip_lookup: nil
705
932
  ensure_service!
706
- if skip_lookup
707
- return Model.new_reference project_id, dataset_id, model_id, service
708
- end
933
+ return Model.new_reference project_id, dataset_id, model_id, service if skip_lookup
709
934
  gapi = service.get_model dataset_id, model_id
710
935
  Model.from_gapi_json gapi, service
711
936
  rescue Google::Cloud::NotFoundError
@@ -752,6 +977,174 @@ module Google
752
977
  Model::List.from_gapi gapi, service, dataset_id, max
753
978
  end
754
979
 
980
+ ##
981
+ # Creates a new routine. The following attributes may be set in the yielded block:
982
+ # {Routine::Updater#routine_type=}, {Routine::Updater#language=}, {Routine::Updater#arguments=},
983
+ # {Routine::Updater#return_type=}, {Routine::Updater#imported_libraries=}, {Routine::Updater#body=}, and
984
+ # {Routine::Updater#description=}.
985
+ #
986
+ # @param [String] routine_id The ID of the routine. The ID must contain only
987
+ # letters (`[A-Za-z]`), numbers (`[0-9]`), or underscores (`_`). The maximum length
988
+ # is 256 characters.
989
+ # @yield [routine] A block for setting properties on the routine.
990
+ # @yieldparam [Google::Cloud::Bigquery::Routine::Updater] routine An updater to set additional properties on the
991
+ # routine.
992
+ #
993
+ # @return [Google::Cloud::Bigquery::Routine] A new routine object.
994
+ #
995
+ # @example
996
+ # require "google/cloud/bigquery"
997
+ #
998
+ # bigquery = Google::Cloud::Bigquery.new
999
+ # dataset = bigquery.dataset "my_dataset"
1000
+ #
1001
+ # routine = dataset.create_routine "my_routine" do |r|
1002
+ # r.routine_type = "SCALAR_FUNCTION"
1003
+ # r.language = "SQL"
1004
+ # r.arguments = [
1005
+ # Google::Cloud::Bigquery::Argument.new(name: "x", data_type: "INT64")
1006
+ # ]
1007
+ # r.body = "x * 3"
1008
+ # r.description = "My routine description"
1009
+ # end
1010
+ #
1011
+ # puts routine.routine_id
1012
+ #
1013
+ # @example Extended example:
1014
+ # require "google/cloud/bigquery"
1015
+ #
1016
+ # bigquery = Google::Cloud::Bigquery.new
1017
+ # dataset = bigquery.dataset "my_dataset"
1018
+ # routine = dataset.create_routine "my_routine" do |r|
1019
+ # r.routine_type = "SCALAR_FUNCTION"
1020
+ # r.language = :SQL
1021
+ # r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
1022
+ # r.arguments = [
1023
+ # Google::Cloud::Bigquery::Argument.new(
1024
+ # name: "arr",
1025
+ # argument_kind: "FIXED_TYPE",
1026
+ # data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
1027
+ # type_kind: "ARRAY",
1028
+ # array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
1029
+ # type_kind: "STRUCT",
1030
+ # struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
1031
+ # fields: [
1032
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
1033
+ # name: "name",
1034
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
1035
+ # ),
1036
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
1037
+ # name: "val",
1038
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
1039
+ # )
1040
+ # ]
1041
+ # )
1042
+ # )
1043
+ # )
1044
+ # )
1045
+ # ]
1046
+ # end
1047
+ #
1048
+ # @!group Routine
1049
+ #
1050
+ def create_routine routine_id
1051
+ ensure_service!
1052
+ new_tb = Google::Apis::BigqueryV2::Routine.new(
1053
+ routine_reference: Google::Apis::BigqueryV2::RoutineReference.new(
1054
+ project_id: project_id, dataset_id: dataset_id, routine_id: routine_id
1055
+ )
1056
+ )
1057
+ updater = Routine::Updater.new new_tb
1058
+
1059
+ yield updater if block_given?
1060
+
1061
+ gapi = service.insert_routine dataset_id, updater.to_gapi
1062
+ Routine.from_gapi gapi, service
1063
+ end
1064
+
1065
+ ##
1066
+ # Retrieves an existing routine by ID.
1067
+ #
1068
+ # @param [String] routine_id The ID of a routine.
1069
+ # @param [Boolean] skip_lookup Optionally create just a local reference
1070
+ # object without verifying that the resource exists on the BigQuery
1071
+ # service. Calls made on this object will raise errors if the resource
1072
+ # does not exist. Default is `false`. Optional.
1073
+ #
1074
+ # @return [Google::Cloud::Bigquery::Routine, nil] Returns `nil` if the
1075
+ # routine does not exist.
1076
+ #
1077
+ # @example
1078
+ # require "google/cloud/bigquery"
1079
+ #
1080
+ # bigquery = Google::Cloud::Bigquery.new
1081
+ # dataset = bigquery.dataset "my_dataset"
1082
+ #
1083
+ # routine = dataset.routine "my_routine"
1084
+ # puts routine.routine_id
1085
+ #
1086
+ # @example Avoid retrieving the routine resource with `skip_lookup`:
1087
+ # require "google/cloud/bigquery"
1088
+ #
1089
+ # bigquery = Google::Cloud::Bigquery.new
1090
+ #
1091
+ # dataset = bigquery.dataset "my_dataset"
1092
+ #
1093
+ # routine = dataset.routine "my_routine", skip_lookup: true
1094
+ #
1095
+ # @!group Routine
1096
+ #
1097
+ def routine routine_id, skip_lookup: nil
1098
+ ensure_service!
1099
+ return Routine.new_reference project_id, dataset_id, routine_id, service if skip_lookup
1100
+ gapi = service.get_routine dataset_id, routine_id
1101
+ Routine.from_gapi gapi, service
1102
+ rescue Google::Cloud::NotFoundError
1103
+ nil
1104
+ end
1105
+
1106
+ ##
1107
+ # Retrieves the list of routines belonging to the dataset.
1108
+ #
1109
+ # @param [String] token A previously-returned page token representing
1110
+ # part of the larger set of results to view.
1111
+ # @param [Integer] max Maximum number of routines to return.
1112
+ # @param [String] filter If set, then only the routines matching this filter are returned. The current supported
1113
+ # form is `routineType:`, with a {Routine#routine_type} enum value. Example: `routineType:SCALAR_FUNCTION`.
1114
+ #
1115
+ # @return [Array<Google::Cloud::Bigquery::Routine>] An array of routines
1116
+ # (See {Google::Cloud::Bigquery::Routine::List})
1117
+ #
1118
+ # @example
1119
+ # require "google/cloud/bigquery"
1120
+ #
1121
+ # bigquery = Google::Cloud::Bigquery.new
1122
+ # dataset = bigquery.dataset "my_dataset"
1123
+ #
1124
+ # routines = dataset.routines
1125
+ # routines.each do |routine|
1126
+ # puts routine.routine_id
1127
+ # end
1128
+ #
1129
+ # @example Retrieve all routines: (See {Routine::List#all})
1130
+ # require "google/cloud/bigquery"
1131
+ #
1132
+ # bigquery = Google::Cloud::Bigquery.new
1133
+ # dataset = bigquery.dataset "my_dataset"
1134
+ #
1135
+ # routines = dataset.routines
1136
+ # routines.all do |routine|
1137
+ # puts routine.routine_id
1138
+ # end
1139
+ #
1140
+ # @!group Routine
1141
+ #
1142
+ def routines token: nil, max: nil, filter: nil
1143
+ ensure_service!
1144
+ gapi = service.list_routines dataset_id, token: token, max: max, filter: filter
1145
+ Routine::List.from_gapi gapi, service, dataset_id, max, filter: filter
1146
+ end
1147
+
755
1148
  ##
756
1149
  # Queries data by creating a [query
757
1150
  # job](https://cloud.google.com/bigquery/docs/query-overview#query_jobs).
@@ -759,27 +1152,6 @@ module Google
759
1152
  # Sets the current dataset as the default dataset in the query. Useful
760
1153
  # for using unqualified table names.
761
1154
  #
762
- # When using standard SQL and passing arguments using `params`, Ruby
763
- # types are mapped to BigQuery types as follows:
764
- #
765
- # | BigQuery | Ruby | Notes |
766
- # |-------------|----------------|---|
767
- # | `BOOL` | `true`/`false` | |
768
- # | `INT64` | `Integer` | |
769
- # | `FLOAT64` | `Float` | |
770
- # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
771
- # | `STRING` | `String` | |
772
- # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
773
- # | `DATE` | `Date` | |
774
- # | `TIMESTAMP` | `Time` | |
775
- # | `TIME` | `Google::Cloud::BigQuery::Time` | |
776
- # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
777
- # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
778
- # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
779
- #
780
- # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
781
- # for an overview of each BigQuery data type, including allowed values.
782
- #
783
1155
  # The geographic location for the job ("US", "EU", etc.) can be set via
784
1156
  # {QueryJob::Updater#location=} in a block passed to this method. If the
785
1157
  # dataset is a full resource representation (see {#resource_full?}), the
@@ -790,13 +1162,60 @@ module Google
790
1162
  # syntax](https://cloud.google.com/bigquery/query-reference), of the
791
1163
  # query to execute. Example: "SELECT count(f1) FROM
792
1164
  # [myProjectId:myDatasetId.myTableId]".
793
- # @param [Array, Hash] params Standard SQL only. Used to pass query
794
- # arguments when the `query` string contains either positional (`?`)
795
- # or named (`@myparam`) query parameters. If value passed is an array
796
- # `["foo"]`, the query must use positional query parameters. If value
797
- # passed is a hash `{ myparam: "foo" }`, the query must use named
798
- # query parameters. When set, `legacy_sql` will automatically be set
799
- # to false and `standard_sql` to true.
1165
+ # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
1166
+ # either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
1167
+ # query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
1168
+ # use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
1169
+ # true.
1170
+ #
1171
+ # BigQuery types are converted from Ruby types as follows:
1172
+ #
1173
+ # | BigQuery | Ruby | Notes |
1174
+ # |--------------|--------------------------------------|----------------------------------------------------|
1175
+ # | `BOOL` | `true`/`false` | |
1176
+ # | `INT64` | `Integer` | |
1177
+ # | `FLOAT64` | `Float` | |
1178
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
1179
+ # | `BIGNUMERIC` | `BigDecimal` | NOT AUTOMATIC: Must be mapped using `types`, below.|
1180
+ # | `STRING` | `String` | |
1181
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
1182
+ # | `DATE` | `Date` | |
1183
+ # | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
1184
+ # | `TIMESTAMP` | `Time` | |
1185
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
1186
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
1187
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
1188
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
1189
+ #
1190
+ # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
1191
+ # of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
1192
+ # GIS data](https://cloud.google.com/bigquery/docs/gis-data).
1193
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
1194
+ # possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
1195
+ # specify the SQL type for these values.
1196
+ #
1197
+ # Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
1198
+ # positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
1199
+ # should be BigQuery type codes from the following list:
1200
+ #
1201
+ # * `:BOOL`
1202
+ # * `:INT64`
1203
+ # * `:FLOAT64`
1204
+ # * `:NUMERIC`
1205
+ # * `:BIGNUMERIC`
1206
+ # * `:STRING`
1207
+ # * `:DATETIME`
1208
+ # * `:DATE`
1209
+ # * `:GEOGRAPHY`
1210
+ # * `:TIMESTAMP`
1211
+ # * `:TIME`
1212
+ # * `:BYTES`
1213
+ # * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
1214
+ # are specified as `[:INT64]`.
1215
+ # * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
1216
+ # match the `params` hash, and the values are the types value that matches the data.
1217
+ #
1218
+ # Types are optional.
800
1219
  # @param [Hash<String|Symbol, External::DataSource>] external A Hash
801
1220
  # that represents the mapping of the external tables to the table
802
1221
  # names used in the SQL query. The hash keys are the table names, and
@@ -855,13 +1274,19 @@ module Google
855
1274
  # Flattens all nested and repeated fields in the query results. The
856
1275
  # default value is `true`. `large_results` parameter must be `true` if
857
1276
  # this is set to `false`.
1277
+ # @param [Integer] maximum_billing_tier Limits the billing tier for this
1278
+ # job. Queries that have resource usage beyond this tier will fail
1279
+ # (without incurring a charge). WARNING: The billed byte amount can be
1280
+ # multiplied by an amount up to this number! Most users should not need
1281
+ # to alter this setting, and we recommend that you avoid introducing new
1282
+ # uses of it. Deprecated.
858
1283
  # @param [Integer] maximum_bytes_billed Limits the bytes billed for this
859
1284
  # job. Queries that will have bytes billed beyond this limit will fail
860
1285
  # (without incurring a charge). Optional. If unspecified, this will be
861
1286
  # set to your project default.
862
1287
  # @param [String] job_id A user-defined ID for the query job. The ID
863
- # must contain only letters (a-z, A-Z), numbers (0-9), underscores
864
- # (_), or dashes (-). The maximum length is 1,024 characters. If
1288
+ # must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
1289
+ # (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
865
1290
  # `job_id` is provided, then `prefix` will not be used.
866
1291
  #
867
1292
  # See [Generating a job
@@ -870,27 +1295,48 @@ module Google
870
1295
  # prepended to a generated value to produce a unique job ID. For
871
1296
  # example, the prefix `daily_import_job_` can be given to generate a
872
1297
  # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
873
- # prefix must contain only letters (a-z, A-Z), numbers (0-9),
874
- # underscores (_), or dashes (-). The maximum length of the entire ID
1298
+ # prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
1299
+ # underscores (`_`), or dashes (`-`). The maximum length of the entire ID
875
1300
  # is 1,024 characters. If `job_id` is provided, then `prefix` will not
876
1301
  # be used.
877
1302
  # @param [Hash] labels A hash of user-provided labels associated with
878
- # the job. You can use these to organize and group your jobs. Label
879
- # keys and values can be no longer than 63 characters, can only
880
- # contain lowercase letters, numeric characters, underscores and
881
- # dashes. International characters are allowed. Label values are
882
- # optional. Label keys must start with a letter and each label in the
883
- # list must have a different key. See [Requirements for
884
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
1303
+ # the job. You can use these to organize and group your jobs.
1304
+ #
1305
+ # The labels applied to a resource must meet the following requirements:
1306
+ #
1307
+ # * Each resource can have multiple labels, up to a maximum of 64.
1308
+ # * Each label must be a key-value pair.
1309
+ # * Keys have a minimum length of 1 character and a maximum length of
1310
+ # 63 characters, and cannot be empty. Values can be empty, and have
1311
+ # a maximum length of 63 characters.
1312
+ # * Keys and values can contain only lowercase letters, numeric characters,
1313
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
1314
+ # international characters are allowed.
1315
+ # * The key portion of a label must be unique. However, you can use the
1316
+ # same key with multiple resources.
1317
+ # * Keys must start with a lowercase letter or international character.
885
1318
  # @param [Array<String>, String] udfs User-defined function resources
886
- # used in the query. May be either a code resource to load from a
887
- # Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
1319
+ # used in a legacy SQL query. May be either a code resource to load from
1320
+ # a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
888
1321
  # that contains code for a user-defined function (UDF). Providing an
889
1322
  # inline code resource is equivalent to providing a URI for a file
890
- # containing the same code. See [User-Defined
891
- # Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
892
- # @param [Integer] maximum_billing_tier Deprecated: Change the billing
893
- # tier to allow high-compute queries.
1323
+ # containing the same code.
1324
+ #
1325
+ # This parameter is used for defining User Defined Function (UDF)
1326
+ # resources only when using legacy SQL. Users of standard SQL should
1327
+ # leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
1328
+ # Routines API to define UDF resources.
1329
+ #
1330
+ # For additional information on migrating, see: [Migrating to
1331
+ # standard SQL - Differences in user-defined JavaScript
1332
+ # functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
1333
+ # @param [Boolean] create_session If true, creates a new session, where the
1334
+ # session ID will be a server generated random id. If false, runs query
1335
+ # with an existing session ID when one is provided in the `session_id`
1336
+ # param, otherwise runs query in non-session mode. See {Job#session_id}.
1337
+ # The default value is false.
1338
+ # @param [String] session_id The ID of an existing session. See also the
1339
+ # `create_session` param and {Job#session_id}.
894
1340
  # @yield [job] a job configuration object
895
1341
  # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
896
1342
  # configuration object for setting additional options for the query.
@@ -960,32 +1406,62 @@ module Google
960
1406
  # end
961
1407
  # end
962
1408
  #
1409
+ # @example Query using named query parameters with types:
1410
+ # require "google/cloud/bigquery"
1411
+ #
1412
+ # bigquery = Google::Cloud::Bigquery.new
1413
+ # dataset = bigquery.dataset "my_dataset"
1414
+ #
1415
+ # job = dataset.query_job "SELECT name FROM my_table WHERE id IN UNNEST(@ids)",
1416
+ # params: { ids: [] },
1417
+ # types: { ids: [:INT64] }
1418
+ #
1419
+ # job.wait_until_done!
1420
+ # if !job.failed?
1421
+ # job.data.each do |row|
1422
+ # puts row[:name]
1423
+ # end
1424
+ # end
1425
+ #
963
1426
  # @example Execute a DDL statement:
964
1427
  # require "google/cloud/bigquery"
965
1428
  #
966
1429
  # bigquery = Google::Cloud::Bigquery.new
1430
+ # dataset = bigquery.dataset "my_dataset"
967
1431
  #
968
- # job = bigquery.query_job "CREATE TABLE my_table (x INT64)"
1432
+ # job = dataset.query_job "CREATE TABLE my_table (x INT64)"
969
1433
  #
970
1434
  # job.wait_until_done!
971
1435
  # if !job.failed?
972
- # table_ref = job.ddl_target_table
1436
+ # table_ref = job.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
973
1437
  # end
974
1438
  #
975
1439
  # @example Execute a DML statement:
976
1440
  # require "google/cloud/bigquery"
977
1441
  #
978
1442
  # bigquery = Google::Cloud::Bigquery.new
1443
+ # dataset = bigquery.dataset "my_dataset"
979
1444
  #
980
- # job = bigquery.query_job "UPDATE my_table " \
981
- # "SET x = x + 1 " \
982
- # "WHERE x IS NOT NULL"
1445
+ # job = dataset.query_job "UPDATE my_table SET x = x + 1 WHERE x IS NOT NULL"
983
1446
  #
984
1447
  # job.wait_until_done!
985
1448
  # if !job.failed?
986
1449
  # puts job.num_dml_affected_rows
987
1450
  # end
988
1451
  #
1452
+ # @example Run query in a session:
1453
+ # require "google/cloud/bigquery"
1454
+ #
1455
+ # bigquery = Google::Cloud::Bigquery.new
1456
+ # dataset = bigquery.dataset "my_dataset"
1457
+ #
1458
+ # job = dataset.query_job "CREATE TEMPORARY TABLE temptable AS SELECT 17 as foo", create_session: true
1459
+ #
1460
+ # job.wait_until_done!
1461
+ #
1462
+ # session_id = job.session_id
1463
+ # data = dataset.query "SELECT * FROM temptable", session_id: session_id
1464
+ #
989
1465
  # @example Query using external data source, set destination:
990
1466
  # require "google/cloud/bigquery"
991
1467
  #
@@ -1012,21 +1488,52 @@ module Google
1012
1488
  #
1013
1489
  # @!group Data
1014
1490
  #
1015
- def query_job query, params: nil, external: nil,
1016
- priority: "INTERACTIVE", cache: true, table: nil,
1017
- create: nil, write: nil, dryrun: nil, standard_sql: nil,
1018
- legacy_sql: nil, large_results: nil, flatten: nil,
1019
- maximum_billing_tier: nil, maximum_bytes_billed: nil,
1020
- job_id: nil, prefix: nil, labels: nil, udfs: nil
1491
+ def query_job query,
1492
+ params: nil,
1493
+ types: nil,
1494
+ external: nil,
1495
+ priority: "INTERACTIVE",
1496
+ cache: true,
1497
+ table: nil,
1498
+ create: nil,
1499
+ write: nil,
1500
+ dryrun: nil,
1501
+ standard_sql: nil,
1502
+ legacy_sql: nil,
1503
+ large_results: nil,
1504
+ flatten: nil,
1505
+ maximum_billing_tier: nil,
1506
+ maximum_bytes_billed: nil,
1507
+ job_id: nil,
1508
+ prefix: nil,
1509
+ labels: nil,
1510
+ udfs: nil,
1511
+ create_session: nil,
1512
+ session_id: nil
1021
1513
  ensure_service!
1022
- options = { priority: priority, cache: cache, table: table,
1023
- create: create, write: write, dryrun: dryrun,
1024
- large_results: large_results, flatten: flatten,
1025
- legacy_sql: legacy_sql, standard_sql: standard_sql,
1026
- maximum_billing_tier: maximum_billing_tier,
1027
- maximum_bytes_billed: maximum_bytes_billed,
1028
- job_id: job_id, prefix: prefix, params: params,
1029
- external: external, labels: labels, udfs: udfs }
1514
+ options = {
1515
+ params: params,
1516
+ types: types,
1517
+ external: external,
1518
+ priority: priority,
1519
+ cache: cache,
1520
+ table: table,
1521
+ create: create,
1522
+ write: write,
1523
+ dryrun: dryrun,
1524
+ standard_sql: standard_sql,
1525
+ legacy_sql: legacy_sql,
1526
+ large_results: large_results,
1527
+ flatten: flatten,
1528
+ maximum_billing_tier: maximum_billing_tier,
1529
+ maximum_bytes_billed: maximum_bytes_billed,
1530
+ job_id: job_id,
1531
+ prefix: prefix,
1532
+ labels: labels,
1533
+ udfs: udfs,
1534
+ create_session: create_session,
1535
+ session_id: session_id
1536
+ }
1030
1537
 
1031
1538
  updater = QueryJob::Updater.from_options service, query, options
1032
1539
  updater.dataset = self
@@ -1048,27 +1555,6 @@ module Google
1048
1555
  # Sets the current dataset as the default dataset in the query. Useful
1049
1556
  # for using unqualified table names.
1050
1557
  #
1051
- # When using standard SQL and passing arguments using `params`, Ruby
1052
- # types are mapped to BigQuery types as follows:
1053
- #
1054
- # | BigQuery | Ruby | Notes |
1055
- # |-------------|----------------|---|
1056
- # | `BOOL` | `true`/`false` | |
1057
- # | `INT64` | `Integer` | |
1058
- # | `FLOAT64` | `Float` | |
1059
- # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
1060
- # | `STRING` | `String` | |
1061
- # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
1062
- # | `DATE` | `Date` | |
1063
- # | `TIMESTAMP` | `Time` | |
1064
- # | `TIME` | `Google::Cloud::BigQuery::Time` | |
1065
- # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
1066
- # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
1067
- # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
1068
- #
1069
- # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
1070
- # for an overview of each BigQuery data type, including allowed values.
1071
- #
1072
1558
  # The geographic location for the job ("US", "EU", etc.) can be set via
1073
1559
  # {QueryJob::Updater#location=} in a block passed to this method. If the
1074
1560
  # dataset is a full resource representation (see {#resource_full?}), the
@@ -1081,13 +1567,60 @@ module Google
1081
1567
  # syntax](https://cloud.google.com/bigquery/query-reference), of the
1082
1568
  # query to execute. Example: "SELECT count(f1) FROM
1083
1569
  # [myProjectId:myDatasetId.myTableId]".
1084
- # @param [Array, Hash] params Standard SQL only. Used to pass query
1085
- # arguments when the `query` string contains either positional (`?`)
1086
- # or named (`@myparam`) query parameters. If value passed is an array
1087
- # `["foo"]`, the query must use positional query parameters. If value
1088
- # passed is a hash `{ myparam: "foo" }`, the query must use named
1089
- # query parameters. When set, `legacy_sql` will automatically be set
1090
- # to false and `standard_sql` to true.
1570
+ # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
1571
+ # either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
1572
+ # query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
1573
+ # use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
1574
+ # true.
1575
+ #
1576
+ # BigQuery types are converted from Ruby types as follows:
1577
+ #
1578
+ # | BigQuery | Ruby | Notes |
1579
+ # |--------------|--------------------------------------|----------------------------------------------------|
1580
+ # | `BOOL` | `true`/`false` | |
1581
+ # | `INT64` | `Integer` | |
1582
+ # | `FLOAT64` | `Float` | |
1583
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
1584
+ # | `BIGNUMERIC` | `BigDecimal` | NOT AUTOMATIC: Must be mapped using `types`, below.|
1585
+ # | `STRING` | `String` | |
1586
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
1587
+ # | `DATE` | `Date` | |
1588
+ # | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
1589
+ # | `TIMESTAMP` | `Time` | |
1590
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
1591
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
1592
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
1593
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
1594
+ #
1595
+ # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
1596
+ # of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
1597
+ # GIS data](https://cloud.google.com/bigquery/docs/gis-data).
1598
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
1599
+ # possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
1600
+ # specify the SQL type for these values.
1601
+ #
1602
+ # Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
1603
+ # positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
1604
+ # should be BigQuery type codes from the following list:
1605
+ #
1606
+ # * `:BOOL`
1607
+ # * `:INT64`
1608
+ # * `:FLOAT64`
1609
+ # * `:NUMERIC`
1610
+ # * `:BIGNUMERIC`
1611
+ # * `:STRING`
1612
+ # * `:DATETIME`
1613
+ # * `:DATE`
1614
+ # * `:GEOGRAPHY`
1615
+ # * `:TIMESTAMP`
1616
+ # * `:TIME`
1617
+ # * `:BYTES`
1618
+ # * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
1619
+ # are specified as `[:INT64]`.
1620
+ # * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
1621
+ # match the `params` hash, and the values are the types value that matches the data.
1622
+ #
1623
+ # Types are optional.
1091
1624
  # @param [Hash<String|Symbol, External::DataSource>] external A Hash
1092
1625
  # that represents the mapping of the external tables to the table
1093
1626
  # names used in the SQL query. The hash keys are the table names, and
@@ -1122,6 +1655,8 @@ module Google
1122
1655
  # When set to false, the values of `large_results` and `flatten` are
1123
1656
  # ignored; the query will be run as if `large_results` is true and
1124
1657
  # `flatten` is false. Optional. The default value is false.
1658
+ # @param [String] session_id The ID of an existing session. See the
1659
+ # `create_session` param in {#query_job} and {Job#session_id}.
1125
1660
  # @yield [job] a job configuration object
1126
1661
  # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
1127
1662
  # configuration object for setting additional options for the query.
@@ -1136,9 +1671,12 @@ module Google
1136
1671
  #
1137
1672
  # data = dataset.query "SELECT name FROM my_table"
1138
1673
  #
1674
+ # # Iterate over the first page of results
1139
1675
  # data.each do |row|
1140
1676
  # puts row[:name]
1141
1677
  # end
1678
+ # # Retrieve the next page of results
1679
+ # data = data.next if data.next?
1142
1680
  #
1143
1681
  # @example Query using legacy SQL:
1144
1682
  # require "google/cloud/bigquery"
@@ -1149,9 +1687,12 @@ module Google
1149
1687
  # data = dataset.query "SELECT name FROM my_table",
1150
1688
  # legacy_sql: true
1151
1689
  #
1690
+ # # Iterate over the first page of results
1152
1691
  # data.each do |row|
1153
1692
  # puts row[:name]
1154
1693
  # end
1694
+ # # Retrieve the next page of results
1695
+ # data = data.next if data.next?
1155
1696
  #
1156
1697
  # @example Query using positional query parameters:
1157
1698
  # require "google/cloud/bigquery"
@@ -1162,9 +1703,12 @@ module Google
1162
1703
  # data = dataset.query "SELECT name FROM my_table WHERE id = ?",
1163
1704
  # params: [1]
1164
1705
  #
1706
+ # # Iterate over the first page of results
1165
1707
  # data.each do |row|
1166
1708
  # puts row[:name]
1167
1709
  # end
1710
+ # # Retrieve the next page of results
1711
+ # data = data.next if data.next?
1168
1712
  #
1169
1713
  # @example Query using named query parameters:
1170
1714
  # require "google/cloud/bigquery"
@@ -1175,30 +1719,63 @@ module Google
1175
1719
  # data = dataset.query "SELECT name FROM my_table WHERE id = @id",
1176
1720
  # params: { id: 1 }
1177
1721
  #
1722
+ # # Iterate over the first page of results
1723
+ # data.each do |row|
1724
+ # puts row[:name]
1725
+ # end
1726
+ # # Retrieve the next page of results
1727
+ # data = data.next if data.next?
1728
+ #
1729
+ # @example Query using named query parameters with types:
1730
+ # require "google/cloud/bigquery"
1731
+ #
1732
+ # bigquery = Google::Cloud::Bigquery.new
1733
+ # dataset = bigquery.dataset "my_dataset"
1734
+ #
1735
+ # data = dataset.query "SELECT name FROM my_table WHERE id IN UNNEST(@ids)",
1736
+ # params: { ids: [] },
1737
+ # types: { ids: [:INT64] }
1738
+ #
1739
+ # # Iterate over the first page of results
1178
1740
  # data.each do |row|
1179
1741
  # puts row[:name]
1180
1742
  # end
1743
+ # # Retrieve the next page of results
1744
+ # data = data.next if data.next?
1181
1745
  #
1182
1746
  # @example Execute a DDL statement:
1183
1747
  # require "google/cloud/bigquery"
1184
1748
  #
1185
1749
  # bigquery = Google::Cloud::Bigquery.new
1750
+ # dataset = bigquery.dataset "my_dataset"
1186
1751
  #
1187
- # data = bigquery.query "CREATE TABLE my_table (x INT64)"
1752
+ # data = dataset.query "CREATE TABLE my_table (x INT64)"
1188
1753
  #
1189
- # table_ref = data.ddl_target_table
1754
+ # table_ref = data.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
1190
1755
  #
1191
1756
  # @example Execute a DML statement:
1192
1757
  # require "google/cloud/bigquery"
1193
1758
  #
1194
1759
  # bigquery = Google::Cloud::Bigquery.new
1760
+ # dataset = bigquery.dataset "my_dataset"
1195
1761
  #
1196
- # data = bigquery.query "UPDATE my_table " \
1197
- # "SET x = x + 1 " \
1198
- # "WHERE x IS NOT NULL"
1762
+ # data = dataset.query "UPDATE my_table SET x = x + 1 WHERE x IS NOT NULL"
1199
1763
  #
1200
1764
  # puts data.num_dml_affected_rows
1201
1765
  #
1766
+ # @example Run query in a session:
1767
+ # require "google/cloud/bigquery"
1768
+ #
1769
+ # bigquery = Google::Cloud::Bigquery.new
1770
+ # dataset = bigquery.dataset "my_dataset"
1771
+ #
1772
+ # job = dataset.query_job "CREATE TEMPORARY TABLE temptable AS SELECT 17 as foo", create_session: true
1773
+ #
1774
+ # job.wait_until_done!
1775
+ #
1776
+ # session_id = job.session_id
1777
+ # data = dataset.query "SELECT * FROM temptable", session_id: session_id
1778
+ #
1202
1779
  # @example Query using external data source, set destination:
1203
1780
  # require "google/cloud/bigquery"
1204
1781
  #
@@ -1216,17 +1793,34 @@ module Google
1216
1793
  # query.table = dataset.table "my_table", skip_lookup: true
1217
1794
  # end
1218
1795
  #
1796
+ # # Iterate over the first page of results
1219
1797
  # data.each do |row|
1220
1798
  # puts row[:name]
1221
1799
  # end
1800
+ # # Retrieve the next page of results
1801
+ # data = data.next if data.next?
1222
1802
  #
1223
1803
  # @!group Data
1224
1804
  #
1225
- def query query, params: nil, external: nil, max: nil, cache: true,
1226
- standard_sql: nil, legacy_sql: nil, &block
1227
- job = query_job query, params: params, external: external,
1228
- cache: cache, standard_sql: standard_sql,
1229
- legacy_sql: legacy_sql, &block
1805
+ def query query,
1806
+ params: nil,
1807
+ types: nil,
1808
+ external: nil,
1809
+ max: nil,
1810
+ cache: true,
1811
+ standard_sql: nil,
1812
+ legacy_sql: nil,
1813
+ session_id: nil,
1814
+ &block
1815
+ job = query_job query,
1816
+ params: params,
1817
+ types: types,
1818
+ external: external,
1819
+ cache: cache,
1820
+ standard_sql: standard_sql,
1821
+ legacy_sql: legacy_sql,
1822
+ session_id: session_id,
1823
+ &block
1230
1824
  job.wait_until_done!
1231
1825
  ensure_job_succeeded! job
1232
1826
 
@@ -1252,7 +1846,7 @@ module Google
1252
1846
  # The following values are supported:
1253
1847
  #
1254
1848
  # * `csv` - CSV
1255
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1849
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
1256
1850
  # * `avro` - [Avro](http://avro.apache.org/)
1257
1851
  # * `sheets` - Google Sheets
1258
1852
  # * `datastore_backup` - Cloud Datastore backup
@@ -1315,7 +1909,7 @@ module Google
1315
1909
  # The following values are supported:
1316
1910
  #
1317
1911
  # * `csv` - CSV
1318
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1912
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
1319
1913
  # * `avro` - [Avro](http://avro.apache.org/)
1320
1914
  # * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
1321
1915
  # * `parquet` - [Parquet](https://parquet.apache.org/)
@@ -1407,8 +2001,8 @@ module Google
1407
2001
  # this option. Also note that for most use cases, the block yielded by
1408
2002
  # this method is a more convenient way to configure the schema.
1409
2003
  # @param [String] job_id A user-defined ID for the load job. The ID
1410
- # must contain only letters (a-z, A-Z), numbers (0-9), underscores
1411
- # (_), or dashes (-). The maximum length is 1,024 characters. If
2004
+ # must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
2005
+ # (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
1412
2006
  # `job_id` is provided, then `prefix` will not be used.
1413
2007
  #
1414
2008
  # See [Generating a job
@@ -1417,18 +2011,26 @@ module Google
1417
2011
  # prepended to a generated value to produce a unique job ID. For
1418
2012
  # example, the prefix `daily_import_job_` can be given to generate a
1419
2013
  # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
1420
- # prefix must contain only letters (a-z, A-Z), numbers (0-9),
1421
- # underscores (_), or dashes (-). The maximum length of the entire ID
2014
+ # prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
2015
+ # underscores (`_`), or dashes (`-`). The maximum length of the entire ID
1422
2016
  # is 1,024 characters. If `job_id` is provided, then `prefix` will not
1423
2017
  # be used.
1424
2018
  # @param [Hash] labels A hash of user-provided labels associated with
1425
- # the job. You can use these to organize and group your jobs. Label
1426
- # keys and values can be no longer than 63 characters, can only
1427
- # contain lowercase letters, numeric characters, underscores and
1428
- # dashes. International characters are allowed. Label values are
1429
- # optional. Label keys must start with a letter and each label in the
1430
- # list must have a different key. See [Requirements for
1431
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
2019
+ # the job. You can use these to organize and group your jobs.
2020
+ #
2021
+ # The labels applied to a resource must meet the following requirements:
2022
+ #
2023
+ # * Each resource can have multiple labels, up to a maximum of 64.
2024
+ # * Each label must be a key-value pair.
2025
+ # * Keys have a minimum length of 1 character and a maximum length of
2026
+ # 63 characters, and cannot be empty. Values can be empty, and have
2027
+ # a maximum length of 63 characters.
2028
+ # * Keys and values can contain only lowercase letters, numeric characters,
2029
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
2030
+ # international characters are allowed.
2031
+ # * The key portion of a label must be unique. However, you can use the
2032
+ # same key with multiple resources.
2033
+ # * Keys must start with a lowercase letter or international character.
1432
2034
  # @yield [updater] A block for setting the schema and other
1433
2035
  # options for the destination table. The schema can be omitted if the
1434
2036
  # destination table already exists, or if you're loading data from a
@@ -1522,29 +2124,19 @@ module Google
1522
2124
  #
1523
2125
  # @!group Data
1524
2126
  #
1525
- def load_job table_id, files, format: nil, create: nil, write: nil,
1526
- projection_fields: nil, jagged_rows: nil,
1527
- quoted_newlines: nil, encoding: nil, delimiter: nil,
1528
- ignore_unknown: nil, max_bad_records: nil, quote: nil,
1529
- skip_leading: nil, schema: nil, job_id: nil, prefix: nil,
1530
- labels: nil, autodetect: nil, null_marker: nil, dryrun: nil
2127
+ def load_job table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
2128
+ quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
2129
+ quote: nil, skip_leading: nil, schema: nil, job_id: nil, prefix: nil, labels: nil, autodetect: nil,
2130
+ null_marker: nil, dryrun: nil
1531
2131
  ensure_service!
1532
2132
 
1533
2133
  updater = load_job_updater table_id,
1534
- format: format, create: create,
1535
- write: write,
1536
- projection_fields: projection_fields,
1537
- jagged_rows: jagged_rows,
1538
- quoted_newlines: quoted_newlines,
1539
- encoding: encoding,
1540
- delimiter: delimiter,
1541
- ignore_unknown: ignore_unknown,
1542
- max_bad_records: max_bad_records,
1543
- quote: quote, skip_leading: skip_leading,
1544
- dryrun: dryrun, schema: schema,
1545
- job_id: job_id, prefix: prefix,
1546
- labels: labels, autodetect: autodetect,
1547
- null_marker: null_marker
2134
+ format: format, create: create, write: write, projection_fields: projection_fields,
2135
+ jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
2136
+ delimiter: delimiter, ignore_unknown: ignore_unknown,
2137
+ max_bad_records: max_bad_records, quote: quote, skip_leading: skip_leading,
2138
+ dryrun: dryrun, schema: schema, job_id: job_id, prefix: prefix, labels: labels,
2139
+ autodetect: autodetect, null_marker: null_marker
1548
2140
 
1549
2141
  yield updater if block_given?
1550
2142
 
@@ -1579,7 +2171,7 @@ module Google
1579
2171
  # The following values are supported:
1580
2172
  #
1581
2173
  # * `csv` - CSV
1582
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
2174
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
1583
2175
  # * `avro` - [Avro](http://avro.apache.org/)
1584
2176
  # * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
1585
2177
  # * `parquet` - [Parquet](https://parquet.apache.org/)
@@ -1760,21 +2352,14 @@ module Google
1760
2352
  #
1761
2353
  # @!group Data
1762
2354
  #
1763
- def load table_id, files, format: nil, create: nil, write: nil,
1764
- projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
1765
- encoding: nil, delimiter: nil, ignore_unknown: nil,
1766
- max_bad_records: nil, quote: nil, skip_leading: nil,
1767
- schema: nil, autodetect: nil, null_marker: nil, &block
2355
+ def load table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
2356
+ quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
2357
+ quote: nil, skip_leading: nil, schema: nil, autodetect: nil, null_marker: nil, &block
1768
2358
  job = load_job table_id, files,
1769
- format: format, create: create, write: write,
1770
- projection_fields: projection_fields,
1771
- jagged_rows: jagged_rows,
1772
- quoted_newlines: quoted_newlines,
1773
- encoding: encoding, delimiter: delimiter,
1774
- ignore_unknown: ignore_unknown,
1775
- max_bad_records: max_bad_records,
1776
- quote: quote, skip_leading: skip_leading,
1777
- schema: schema, autodetect: autodetect,
2359
+ format: format, create: create, write: write, projection_fields: projection_fields,
2360
+ jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
2361
+ delimiter: delimiter, ignore_unknown: ignore_unknown, max_bad_records: max_bad_records,
2362
+ quote: quote, skip_leading: skip_leading, schema: schema, autodetect: autodetect,
1778
2363
  null_marker: null_marker, &block
1779
2364
 
1780
2365
  job.wait_until_done!
@@ -1825,7 +2410,7 @@ module Google
1825
2410
  # dataset = bigquery.dataset "my_dataset", skip_lookup: true
1826
2411
  # dataset.exists? # true
1827
2412
  #
1828
- def exists? force: nil
2413
+ def exists? force: false
1829
2414
  return gapi_exists? if force
1830
2415
  # If we have a memoized value, return it
1831
2416
  return @exists unless @exists.nil?
@@ -1935,14 +2520,12 @@ module Google
1935
2520
  end
1936
2521
 
1937
2522
  ##
1938
- # @private New lazy Dataset object without making an HTTP request.
2523
+ # @private New lazy Dataset object without making an HTTP request, for use with the skip_lookup option.
1939
2524
  def self.new_reference project_id, dataset_id, service
1940
2525
  raise ArgumentError, "dataset_id is required" unless dataset_id
1941
2526
  new.tap do |b|
1942
- reference_gapi = Google::Apis::BigqueryV2::DatasetReference.new(
1943
- project_id: project_id,
1944
- dataset_id: dataset_id
1945
- )
2527
+ reference_gapi = Google::Apis::BigqueryV2::DatasetReference.new \
2528
+ project_id: project_id, dataset_id: dataset_id
1946
2529
  b.service = service
1947
2530
  b.instance_variable_set :@reference, reference_gapi
1948
2531
  end
@@ -1953,18 +2536,47 @@ module Google
1953
2536
  # the need to complete a load operation before the data can appear in
1954
2537
  # query results.
1955
2538
  #
2539
+ # Simple Ruby types are generally accepted per JSON rules, along with the following support for BigQuery's more
2540
+ # complex types:
2541
+ #
2542
+ # | BigQuery | Ruby | Notes |
2543
+ # |--------------|--------------------------------------|----------------------------------------------------|
2544
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
2545
+ # | `BIGNUMERIC` | `String` | Pass as `String` to avoid rounding to scale 9. |
2546
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
2547
+ # | `DATE` | `Date` | |
2548
+ # | `GEOGRAPHY` | `String` | |
2549
+ # | `TIMESTAMP` | `Time` | |
2550
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
2551
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
2552
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
2553
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
2554
+ #
2555
+ # Because BigQuery's streaming API is designed for high insertion rates,
2556
+ # modifications to the underlying table metadata are eventually
2557
+ # consistent when interacting with the streaming system. In most cases
2558
+ # metadata changes are propagated within minutes, but during this period
2559
+ # API responses may reflect the inconsistent state of the table.
2560
+ #
1956
2561
  # @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
1957
2562
  # Streaming Data Into BigQuery
1958
2563
  #
2564
+ # @see https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts
2565
+ # BigQuery Troubleshooting: Metadata errors for streaming inserts
2566
+ #
1959
2567
  # @param [String] table_id The ID of the destination table.
1960
2568
  # @param [Hash, Array<Hash>] rows A hash object or array of hash objects
1961
- # containing the data. Required.
1962
- # @param [Array<String>] insert_ids A unique ID for each row. BigQuery
1963
- # uses this property to detect duplicate insertion requests on a
1964
- # best-effort basis. For more information, see [data
1965
- # consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency).
1966
- # Optional. If not provided, the client library will assign a UUID to
1967
- # each row before the request is sent.
2569
+ # containing the data. Required. `BigDecimal` values will be rounded to
2570
+ # scale 9 to conform with the BigQuery `NUMERIC` data type. To avoid
2571
+ # rounding `BIGNUMERIC` type values with scale greater than 9, use `String`
2572
+ # instead of `BigDecimal`.
2573
+ # @param [Array<String|Symbol>, Symbol] insert_ids A unique ID for each row. BigQuery uses this property to
2574
+ # detect duplicate insertion requests on a best-effort basis. For more information, see [data
2575
+ # consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency). Optional. If
2576
+ # not provided, the client library will assign a UUID to each row before the request is sent.
2577
+ #
2578
+ # The value `:skip` can be provided to skip the generation of IDs for all rows, or to skip the generation of an
2579
+ # ID for a specific row in the array.
1968
2580
  # @param [Boolean] skip_invalid Insert all valid rows of a request, even
1969
2581
  # if invalid rows exist. The default value is `false`, which causes
1970
2582
  # the entire request to fail if any invalid rows exist.
@@ -1975,6 +2587,12 @@ module Google
1975
2587
  # a new table with the given `table_id`, if no table is found for
1976
2588
  # `table_id`. The default value is false.
1977
2589
  #
2590
+ # @yield [table] a block for setting the table
2591
+ # @yieldparam [Google::Cloud::Bigquery::Table::Updater] table An updater
2592
+ # to set additional properties on the table in the API request to
2593
+ # create it. Only used when `autocreate` is set and the table does not
2594
+ # already exist.
2595
+ #
1978
2596
  # @return [Google::Cloud::Bigquery::InsertResponse] An insert response
1979
2597
  # object.
1980
2598
  #
@@ -2018,42 +2636,36 @@ module Google
2018
2636
  # t.schema.integer "age", mode: :required
2019
2637
  # end
2020
2638
  #
2639
+ # @example Pass `BIGNUMERIC` value as a string to avoid rounding to scale 9 in the conversion from `BigDecimal`:
2640
+ # require "google/cloud/bigquery"
2641
+ #
2642
+ # bigquery = Google::Cloud::Bigquery.new
2643
+ # dataset = bigquery.dataset "my_dataset"
2644
+ #
2645
+ # row = {
2646
+ # "my_numeric" => BigDecimal("123456798.987654321"),
2647
+ # "my_bignumeric" => "123456798.98765432100001" # BigDecimal would be rounded, use String instead!
2648
+ # }
2649
+ # dataset.insert "my_table", row
2650
+ #
2021
2651
  # @!group Data
2022
2652
  #
2023
- def insert table_id, rows, insert_ids: nil, skip_invalid: nil,
2024
- ignore_unknown: nil, autocreate: nil
2653
+ def insert table_id, rows, insert_ids: nil, skip_invalid: nil, ignore_unknown: nil, autocreate: nil, &block
2025
2654
  rows = [rows] if rows.is_a? Hash
2655
+ raise ArgumentError, "No rows provided" if rows.empty?
2656
+
2657
+ insert_ids = Array.new(rows.count) { :skip } if insert_ids == :skip
2026
2658
  insert_ids = Array insert_ids
2027
- if insert_ids.count > 0 && insert_ids.count != rows.count
2659
+ if insert_ids.count.positive? && insert_ids.count != rows.count
2028
2660
  raise ArgumentError, "insert_ids must be the same size as rows"
2029
2661
  end
2030
2662
 
2031
2663
  if autocreate
2032
- begin
2033
- insert_data table_id, rows, skip_invalid: skip_invalid,
2034
- ignore_unknown: ignore_unknown,
2035
- insert_ids: insert_ids
2036
- rescue Google::Cloud::NotFoundError
2037
- sleep rand(1..60)
2038
- begin
2039
- create_table table_id do |tbl_updater|
2040
- yield tbl_updater if block_given?
2041
- end
2042
- # rubocop:disable Lint/HandleExceptions
2043
- rescue Google::Cloud::AlreadyExistsError
2044
- end
2045
- # rubocop:enable Lint/HandleExceptions
2046
-
2047
- sleep 60
2048
- insert table_id, rows, skip_invalid: skip_invalid,
2049
- ignore_unknown: ignore_unknown,
2050
- autocreate: true,
2051
- insert_ids: insert_ids
2052
- end
2664
+ insert_data_with_autocreate table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown,
2665
+ insert_ids: insert_ids, &block
2053
2666
  else
2054
- insert_data table_id, rows, skip_invalid: skip_invalid,
2055
- ignore_unknown: ignore_unknown,
2056
- insert_ids: insert_ids
2667
+ insert_data table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown,
2668
+ insert_ids: insert_ids
2057
2669
  end
2058
2670
  end
2059
2671
 
@@ -2076,6 +2688,11 @@ module Google
2076
2688
  # messages before the batch is published. Default is 10.
2077
2689
  # @attr_reader [Numeric] threads The number of threads used to insert
2078
2690
  # batches of rows. Default is 4.
2691
+ # @param [String] view Specifies the view that determines which table information is returned.
2692
+ # By default, basic table information and storage statistics (STORAGE_STATS) are returned.
2693
+ # Accepted values include `:unspecified`, `:basic`, `:storage`, and
2694
+ # `:full`. For more information, see [BigQuery Classes](@todo: Update the link).
2695
+ # The default value is the `:unspecified` view type.
2079
2696
  # @yield [response] the callback for when a batch of rows is inserted
2080
2697
  # @yieldparam [Table::AsyncInserter::Result] result the result of the
2081
2698
  # asynchronous insert
@@ -2104,14 +2721,35 @@ module Google
2104
2721
  #
2105
2722
  # inserter.stop.wait!
2106
2723
  #
2107
- def insert_async table_id, skip_invalid: nil, ignore_unknown: nil,
2108
- max_bytes: 10000000, max_rows: 500, interval: 10,
2109
- threads: 4, &block
2724
+ # @example Avoid retrieving transient stats of the table with while inserting :
2725
+ # require "google/cloud/bigquery"
2726
+ #
2727
+ # bigquery = Google::Cloud::Bigquery.new
2728
+ # dataset = bigquery.dataset "my_dataset"
2729
+ # inserter = dataset.insert_async("my_table", view: "basic") do |result|
2730
+ # if result.error?
2731
+ # log_error result.error
2732
+ # else
2733
+ # log_insert "inserted #{result.insert_count} rows " \
2734
+ # "with #{result.error_count} errors"
2735
+ # end
2736
+ # end
2737
+ #
2738
+ # rows = [
2739
+ # { "first_name" => "Alice", "age" => 21 },
2740
+ # { "first_name" => "Bob", "age" => 22 }
2741
+ # ]
2742
+ # inserter.insert rows
2743
+ #
2744
+ # inserter.stop.wait!
2745
+ #
2746
+ def insert_async table_id, skip_invalid: nil, ignore_unknown: nil, max_bytes: 10_000_000, max_rows: 500,
2747
+ interval: 10, threads: 4, view: nil, &block
2110
2748
  ensure_service!
2111
2749
 
2112
2750
  # Get table, don't use Dataset#table which handles NotFoundError
2113
- gapi = service.get_table dataset_id, table_id
2114
- table = Table.from_gapi gapi, service
2751
+ gapi = service.get_table dataset_id, table_id, metadata_view: view
2752
+ table = Table.from_gapi gapi, service, metadata_view: view
2115
2753
  # Get the AsyncInserter from the table
2116
2754
  table.insert_async skip_invalid: skip_invalid,
2117
2755
  ignore_unknown: ignore_unknown,
@@ -2119,17 +2757,53 @@ module Google
2119
2757
  interval: interval, threads: threads, &block
2120
2758
  end
2121
2759
 
2760
+ ##
2761
+ # Build an object of type Google::Apis::BigqueryV2::DatasetAccessEntry from
2762
+ # the self.
2763
+ #
2764
+ # @param [Array<String>] target_types The list of target types within the dataset.
2765
+ #
2766
+ # @return [Google::Apis::BigqueryV2::DatasetAccessEntry] Returns a DatasetAccessEntry object.
2767
+ #
2768
+ # @example
2769
+ # require "google/cloud/bigquery"
2770
+ #
2771
+ # bigquery = Google::Cloud::Bigquery.new
2772
+ # dataset = bigquery.dataset "my_dataset"
2773
+ # dataset_access_entry = dataset.access_entry target_types: ["VIEWS"]
2774
+ #
2775
+ def build_access_entry target_types: nil
2776
+ params = {
2777
+ dataset: dataset_ref,
2778
+ target_types: target_types
2779
+ }.delete_if { |_, v| v.nil? }
2780
+ Google::Apis::BigqueryV2::DatasetAccessEntry.new(**params)
2781
+ end
2782
+
2122
2783
  protected
2123
2784
 
2124
- def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil,
2125
- insert_ids: nil
2785
+ def insert_data_with_autocreate table_id, rows, skip_invalid: nil, ignore_unknown: nil, insert_ids: nil
2786
+ insert_data table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown, insert_ids: insert_ids
2787
+ rescue Google::Cloud::NotFoundError
2788
+ sleep rand(1..60)
2789
+ begin
2790
+ create_table table_id do |tbl_updater|
2791
+ yield tbl_updater if block_given?
2792
+ end
2793
+ rescue Google::Cloud::AlreadyExistsError
2794
+ # Do nothing if it already exists
2795
+ end
2796
+ sleep 60
2797
+ retry
2798
+ end
2799
+
2800
+ def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil, insert_ids: nil
2126
2801
  rows = [rows] if rows.is_a? Hash
2127
2802
  raise ArgumentError, "No rows provided" if rows.empty?
2128
2803
  ensure_service!
2129
- options = { skip_invalid: skip_invalid,
2130
- ignore_unknown: ignore_unknown,
2131
- insert_ids: insert_ids }
2132
- gapi = service.insert_tabledata dataset_id, table_id, rows, options
2804
+ gapi = service.insert_tabledata dataset_id, table_id, rows, skip_invalid: skip_invalid,
2805
+ ignore_unknown: ignore_unknown,
2806
+ insert_ids: insert_ids
2133
2807
  InsertResponse.from_gapi rows, gapi
2134
2808
  end
2135
2809
 
@@ -2160,10 +2834,8 @@ module Google
2160
2834
  def patch_gapi! *attributes
2161
2835
  return if attributes.empty?
2162
2836
  ensure_service!
2163
- patch_args = Hash[attributes.map do |attr|
2164
- [attr, @gapi.send(attr)]
2165
- end]
2166
- patch_gapi = Google::Apis::BigqueryV2::Dataset.new patch_args
2837
+ patch_args = attributes.to_h { |attr| [attr, @gapi.send(attr)] }
2838
+ patch_gapi = Google::Apis::BigqueryV2::Dataset.new(**patch_args)
2167
2839
  patch_gapi.etag = etag if etag
2168
2840
  @gapi = service.patch_dataset dataset_id, patch_gapi
2169
2841
  end
@@ -2172,7 +2844,7 @@ module Google
2172
2844
  # Load the complete representation of the dataset if it has been
2173
2845
  # only partially loaded by a request to the API list method.
2174
2846
  def ensure_full_data!
2175
- reload! if resource_partial?
2847
+ reload! unless resource_full?
2176
2848
  end
2177
2849
 
2178
2850
  def ensure_job_succeeded! job
@@ -2203,11 +2875,8 @@ module Google
2203
2875
  )
2204
2876
  end
2205
2877
 
2206
- def load_job_csv_options! job, jagged_rows: nil,
2207
- quoted_newlines: nil,
2208
- delimiter: nil,
2209
- quote: nil, skip_leading: nil,
2210
- null_marker: nil
2878
+ def load_job_csv_options! job, jagged_rows: nil, quoted_newlines: nil, delimiter: nil, quote: nil,
2879
+ skip_leading: nil, null_marker: nil
2211
2880
  job.jagged_rows = jagged_rows unless jagged_rows.nil?
2212
2881
  job.quoted_newlines = quoted_newlines unless quoted_newlines.nil?
2213
2882
  job.delimiter = delimiter unless delimiter.nil?
@@ -2216,17 +2885,11 @@ module Google
2216
2885
  job.skip_leading = skip_leading unless skip_leading.nil?
2217
2886
  end
2218
2887
 
2219
- def load_job_file_options! job, format: nil,
2220
- projection_fields: nil,
2221
- jagged_rows: nil, quoted_newlines: nil,
2222
- encoding: nil, delimiter: nil,
2223
- ignore_unknown: nil, max_bad_records: nil,
2224
- quote: nil, skip_leading: nil,
2225
- null_marker: nil
2888
+ def load_job_file_options! job, format: nil, projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
2889
+ encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
2890
+ skip_leading: nil, null_marker: nil
2226
2891
  job.format = format unless format.nil?
2227
- unless projection_fields.nil?
2228
- job.projection_fields = projection_fields
2229
- end
2892
+ job.projection_fields = projection_fields unless projection_fields.nil?
2230
2893
  job.encoding = encoding unless encoding.nil?
2231
2894
  job.ignore_unknown = ignore_unknown unless ignore_unknown.nil?
2232
2895
  job.max_bad_records = max_bad_records unless max_bad_records.nil?
@@ -2238,16 +2901,11 @@ module Google
2238
2901
  null_marker: null_marker
2239
2902
  end
2240
2903
 
2241
- def load_job_updater table_id, format: nil, create: nil,
2242
- write: nil, projection_fields: nil,
2243
- jagged_rows: nil, quoted_newlines: nil,
2244
- encoding: nil, delimiter: nil,
2245
- ignore_unknown: nil, max_bad_records: nil,
2246
- quote: nil, skip_leading: nil, dryrun: nil,
2247
- schema: nil, job_id: nil, prefix: nil, labels: nil,
2248
- autodetect: nil, null_marker: nil
2249
- new_job = load_job_gapi table_id, dryrun, job_id: job_id,
2250
- prefix: prefix
2904
+ def load_job_updater table_id, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
2905
+ quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil,
2906
+ max_bad_records: nil, quote: nil, skip_leading: nil, dryrun: nil, schema: nil, job_id: nil,
2907
+ prefix: nil, labels: nil, autodetect: nil, null_marker: nil
2908
+ new_job = load_job_gapi table_id, dryrun, job_id: job_id, prefix: prefix
2251
2909
  LoadJob::Updater.new(new_job).tap do |job|
2252
2910
  job.location = location if location # may be dataset reference
2253
2911
  job.create = create unless create.nil?
@@ -2285,9 +2943,7 @@ module Google
2285
2943
  job_gapi.configuration.load.update! source_uris: urls
2286
2944
  if job_gapi.configuration.load.source_format.nil?
2287
2945
  source_format = Convert.derive_source_format_from_list urls
2288
- unless source_format.nil?
2289
- job_gapi.configuration.load.source_format = source_format
2290
- end
2946
+ job_gapi.configuration.load.source_format = source_format unless source_format.nil?
2291
2947
  end
2292
2948
  end
2293
2949
 
@@ -2299,9 +2955,7 @@ module Google
2299
2955
  path = Pathname(file).to_path
2300
2956
  if job_gapi.configuration.load.source_format.nil?
2301
2957
  source_format = Convert.derive_source_format path
2302
- unless source_format.nil?
2303
- job_gapi.configuration.load.source_format = source_format
2304
- end
2958
+ job_gapi.configuration.load.source_format = source_format unless source_format.nil?
2305
2959
  end
2306
2960
 
2307
2961
  gapi = service.load_table_file file, job_gapi
@@ -2310,21 +2964,18 @@ module Google
2310
2964
 
2311
2965
  def load_local_or_uri file, updater
2312
2966
  job_gapi = updater.to_gapi
2313
- job = if local_file? file
2314
- load_local file, job_gapi
2315
- else
2316
- load_storage file, job_gapi
2317
- end
2318
- job
2967
+ if local_file? file
2968
+ load_local file, job_gapi
2969
+ else
2970
+ load_storage file, job_gapi
2971
+ end
2319
2972
  end
2320
2973
 
2321
2974
  def storage_url? files
2322
2975
  [files].flatten.all? do |file|
2323
2976
  file.respond_to?(:to_gs_url) ||
2324
- (file.respond_to?(:to_str) &&
2325
- file.to_str.downcase.start_with?("gs://")) ||
2326
- (file.is_a?(URI) &&
2327
- file.to_s.downcase.start_with?("gs://"))
2977
+ (file.respond_to?(:to_str) && file.to_str.downcase.start_with?("gs://")) ||
2978
+ (file.is_a?(URI) && file.to_s.downcase.start_with?("gs://"))
2328
2979
  end
2329
2980
  end
2330
2981
 
@@ -2348,15 +2999,16 @@ module Google
2348
2999
  end
2349
3000
 
2350
3001
  ##
2351
- # Yielded to a block to accumulate changes for a patch request.
3002
+ # Yielded to a block to accumulate changes for a create request. See {Project#create_dataset}.
2352
3003
  class Updater < Dataset
2353
3004
  ##
2354
- # A list of attributes that were updated.
3005
+ # @private A list of attributes that were updated.
2355
3006
  attr_reader :updates
2356
3007
 
2357
3008
  ##
2358
- # Create an Updater object.
3009
+ # @private Create an Updater object.
2359
3010
  def initialize gapi
3011
+ super()
2360
3012
  @updates = []
2361
3013
  @gapi = gapi
2362
3014
  end
@@ -2373,7 +3025,110 @@ module Google
2373
3025
  end
2374
3026
 
2375
3027
  ##
2376
- # Make sure any access changes are saved
3028
+ # @raise [RuntimeError] not implemented
3029
+ def delete(*)
3030
+ raise "not implemented in #{self.class}"
3031
+ end
3032
+
3033
+ ##
3034
+ # @raise [RuntimeError] not implemented
3035
+ def create_table(*)
3036
+ raise "not implemented in #{self.class}"
3037
+ end
3038
+
3039
+ ##
3040
+ # @raise [RuntimeError] not implemented
3041
+ def create_view(*)
3042
+ raise "not implemented in #{self.class}"
3043
+ end
3044
+
3045
+ ##
3046
+ # @raise [RuntimeError] not implemented
3047
+ def create_materialized_view(*)
3048
+ raise "not implemented in #{self.class}"
3049
+ end
3050
+
3051
+ ##
3052
+ # @raise [RuntimeError] not implemented
3053
+ def table(*)
3054
+ raise "not implemented in #{self.class}"
3055
+ end
3056
+
3057
+ ##
3058
+ # @raise [RuntimeError] not implemented
3059
+ def tables(*)
3060
+ raise "not implemented in #{self.class}"
3061
+ end
3062
+
3063
+ ##
3064
+ # @raise [RuntimeError] not implemented
3065
+ def model(*)
3066
+ raise "not implemented in #{self.class}"
3067
+ end
3068
+
3069
+ ##
3070
+ # @raise [RuntimeError] not implemented
3071
+ def models(*)
3072
+ raise "not implemented in #{self.class}"
3073
+ end
3074
+
3075
+ ##
3076
+ # @raise [RuntimeError] not implemented
3077
+ def create_routine(*)
3078
+ raise "not implemented in #{self.class}"
3079
+ end
3080
+
3081
+ ##
3082
+ # @raise [RuntimeError] not implemented
3083
+ def routine(*)
3084
+ raise "not implemented in #{self.class}"
3085
+ end
3086
+
3087
+ ##
3088
+ # @raise [RuntimeError] not implemented
3089
+ def routines(*)
3090
+ raise "not implemented in #{self.class}"
3091
+ end
3092
+
3093
+ ##
3094
+ # @raise [RuntimeError] not implemented
3095
+ def query_job(*)
3096
+ raise "not implemented in #{self.class}"
3097
+ end
3098
+
3099
+ ##
3100
+ # @raise [RuntimeError] not implemented
3101
+ def query(*)
3102
+ raise "not implemented in #{self.class}"
3103
+ end
3104
+
3105
+ ##
3106
+ # @raise [RuntimeError] not implemented
3107
+ def external(*)
3108
+ raise "not implemented in #{self.class}"
3109
+ end
3110
+
3111
+ ##
3112
+ # @raise [RuntimeError] not implemented
3113
+ def load_job(*)
3114
+ raise "not implemented in #{self.class}"
3115
+ end
3116
+
3117
+ ##
3118
+ # @raise [RuntimeError] not implemented
3119
+ def load(*)
3120
+ raise "not implemented in #{self.class}"
3121
+ end
3122
+
3123
+ ##
3124
+ # @raise [RuntimeError] not implemented
3125
+ def reload!
3126
+ raise "not implemented in #{self.class}"
3127
+ end
3128
+ alias refresh! reload!
3129
+
3130
+ ##
3131
+ # @private Make sure any access changes are saved
2377
3132
  def check_for_mutated_access!
2378
3133
  return if @access.nil?
2379
3134
  return unless @access.changed?
@@ -2381,6 +3136,8 @@ module Google
2381
3136
  patch_gapi! :access
2382
3137
  end
2383
3138
 
3139
+ ##
3140
+ # @private
2384
3141
  def to_gapi
2385
3142
  check_for_mutated_access!
2386
3143
  @gapi