google-cloud-bigquery 1.14.0 → 1.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +17 -54
  3. data/CHANGELOG.md +377 -0
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +1 -1
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +155 -173
  10. data/lib/google/cloud/bigquery/copy_job.rb +74 -26
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
  16. data/lib/google/cloud/bigquery/dataset.rb +1044 -287
  17. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  20. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  21. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  22. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  23. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  24. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  25. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  26. data/lib/google/cloud/bigquery/external.rb +50 -2256
  27. data/lib/google/cloud/bigquery/extract_job.rb +226 -61
  28. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  29. data/lib/google/cloud/bigquery/job/list.rb +10 -14
  30. data/lib/google/cloud/bigquery/job.rb +289 -14
  31. data/lib/google/cloud/bigquery/load_job.rb +810 -136
  32. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  33. data/lib/google/cloud/bigquery/model.rb +247 -16
  34. data/lib/google/cloud/bigquery/policy.rb +432 -0
  35. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  36. data/lib/google/cloud/bigquery/project.rb +509 -250
  37. data/lib/google/cloud/bigquery/query_job.rb +594 -128
  38. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  39. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  40. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  41. data/lib/google/cloud/bigquery/schema.rb +221 -48
  42. data/lib/google/cloud/bigquery/service.rb +204 -112
  43. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  44. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
  45. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  46. data/lib/google/cloud/bigquery/table.rb +1470 -377
  47. data/lib/google/cloud/bigquery/time.rb +6 -0
  48. data/lib/google/cloud/bigquery/version.rb +1 -1
  49. data/lib/google/cloud/bigquery.rb +4 -6
  50. data/lib/google-cloud-bigquery.rb +14 -13
  51. metadata +66 -38
@@ -18,9 +18,11 @@ require "google/cloud/errors"
18
18
  require "google/cloud/bigquery/service"
19
19
  require "google/cloud/bigquery/table"
20
20
  require "google/cloud/bigquery/model"
21
+ require "google/cloud/bigquery/routine"
21
22
  require "google/cloud/bigquery/external"
22
23
  require "google/cloud/bigquery/dataset/list"
23
24
  require "google/cloud/bigquery/dataset/access"
25
+ require "google/cloud/bigquery/dataset/tag"
24
26
  require "google/cloud/bigquery/convert"
25
27
  require "google/apis/bigquery_v2"
26
28
 
@@ -68,8 +70,8 @@ module Google
68
70
  ##
69
71
  # A unique ID for this dataset, without the project name.
70
72
  #
71
- # @return [String] The ID must contain only letters (a-z, A-Z), numbers
72
- # (0-9), or underscores (_). The maximum length is 1,024 characters.
73
+ # @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
74
+ # (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
73
75
  #
74
76
  # @!group Attributes
75
77
  #
@@ -312,12 +314,19 @@ module Google
312
314
  # @param [Hash<String, String>] labels A hash containing key/value
313
315
  # pairs.
314
316
  #
315
- # * Label keys and values can be no longer than 63 characters.
316
- # * Label keys and values can contain only lowercase letters, numbers,
317
- # underscores, hyphens, and international characters.
318
- # * Label keys and values cannot exceed 128 bytes in size.
319
- # * Label keys must begin with a letter.
320
- # * Label keys must be unique within a dataset.
317
+ # The labels applied to a resource must meet the following requirements:
318
+ #
319
+ # * Each resource can have multiple labels, up to a maximum of 64.
320
+ # * Each label must be a key-value pair.
321
+ # * Keys have a minimum length of 1 character and a maximum length of
322
+ # 63 characters, and cannot be empty. Values can be empty, and have
323
+ # a maximum length of 63 characters.
324
+ # * Keys and values can contain only lowercase letters, numeric characters,
325
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
326
+ # international characters are allowed.
327
+ # * The key portion of a label must be unique. However, you can use the
328
+ # same key with multiple resources.
329
+ # * Keys must start with a lowercase letter or international character.
321
330
  #
322
331
  # @example
323
332
  # require "google/cloud/bigquery"
@@ -335,6 +344,75 @@ module Google
335
344
  patch_gapi! :labels
336
345
  end
337
346
 
347
+ ##
348
+ # The {EncryptionConfiguration} object that represents the default
349
+ # encryption method for all tables and models in the dataset. Once this
350
+ # property is set, all newly-created partitioned tables and models in
351
+ # the dataset will have their encryption set to this value, unless table
352
+ # creation request (or query) overrides it.
353
+ #
354
+ # Present only if this dataset is using custom default encryption.
355
+ #
356
+ # @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
357
+ # Protecting Data with Cloud KMS Keys
358
+ #
359
+ # @return [EncryptionConfiguration, nil] The default encryption
360
+ # configuration.
361
+ #
362
+ # @!group Attributes
363
+ #
364
+ # @example
365
+ # require "google/cloud/bigquery"
366
+ #
367
+ # bigquery = Google::Cloud::Bigquery.new
368
+ # dataset = bigquery.dataset "my_dataset"
369
+ #
370
+ # encrypt_config = dataset.default_encryption
371
+ #
372
+ # @!group Attributes
373
+ #
374
+ def default_encryption
375
+ return nil if reference?
376
+ ensure_full_data!
377
+ return nil if @gapi.default_encryption_configuration.nil?
378
+ EncryptionConfiguration.from_gapi(@gapi.default_encryption_configuration).freeze
379
+ end
380
+
381
+ ##
382
+ # Set the {EncryptionConfiguration} object that represents the default
383
+ # encryption method for all tables and models in the dataset. Once this
384
+ # property is set, all newly-created partitioned tables and models in
385
+ # the dataset will have their encryption set to this value, unless table
386
+ # creation request (or query) overrides it.
387
+ #
388
+ # If the dataset is not a full resource representation (see
389
+ # {#resource_full?}), the full representation will be retrieved before
390
+ # the update to comply with ETag-based optimistic concurrency control.
391
+ #
392
+ # @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
393
+ # Protecting Data with Cloud KMS Keys
394
+ #
395
+ # @param [EncryptionConfiguration] value The new encryption config.
396
+ #
397
+ # @example
398
+ # require "google/cloud/bigquery"
399
+ #
400
+ # bigquery = Google::Cloud::Bigquery.new
401
+ # dataset = bigquery.dataset "my_dataset"
402
+ #
403
+ # key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
404
+ # encrypt_config = bigquery.encryption kms_key: key_name
405
+ #
406
+ # dataset.default_encryption = encrypt_config
407
+ #
408
+ # @!group Attributes
409
+ #
410
+ def default_encryption= value
411
+ ensure_full_data!
412
+ @gapi.default_encryption_configuration = value.to_gapi
413
+ patch_gapi! :default_encryption_configuration
414
+ end
415
+
338
416
  ##
339
417
  # Retrieves the access rules for a Dataset. The rules can be updated
340
418
  # when passing a block, see {Dataset::Access} for all the methods
@@ -389,6 +467,21 @@ module Google
389
467
  access_builder.freeze
390
468
  end
391
469
 
470
+ ##
471
+ # Retrieves the tags associated with this dataset. Tag keys are
472
+ # globally unique, and managed via the resource manager API.
473
+ #
474
+ # @see https://cloud.google.com/resource-manager/docs/tags/tags-overview
475
+ # for more information.
476
+ #
477
+ # @return [Google::Cloud::Bigquery::Dataset::Tag] The list of tags.
478
+ #
479
+ def tags
480
+ ensure_full_data!
481
+ return nil if @gapi.tags.nil?
482
+ @gapi.tags.map { |gapi| Tag.from_gapi(gapi) }
483
+ end
484
+
392
485
  ##
393
486
  # Permanently deletes the dataset. The dataset must be empty before it
394
487
  # can be deleted unless the `force` option is set to `true`.
@@ -424,7 +517,7 @@ module Google
424
517
  # you can pass the table's schema as a hash (see example.)
425
518
  #
426
519
  # @param [String] table_id The ID of the table. The ID must contain only
427
- # letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum
520
+ # letters (`[A-Za-z]`), numbers (`[0-9]`), or underscores (`_`). The maximum
428
521
  # length is 1,024 characters.
429
522
  # @param [String] name A descriptive name for the table.
430
523
  # @param [String] description A user-friendly description of the table.
@@ -485,6 +578,40 @@ module Google
485
578
  # end
486
579
  # end
487
580
  #
581
+ # @example With time partitioning and clustering.
582
+ # require "google/cloud/bigquery"
583
+ #
584
+ # bigquery = Google::Cloud::Bigquery.new
585
+ # dataset = bigquery.dataset "my_dataset"
586
+ #
587
+ # table = dataset.create_table "my_table" do |t|
588
+ # t.schema do |schema|
589
+ # schema.timestamp "dob", mode: :required
590
+ # schema.string "first_name", mode: :required
591
+ # schema.string "last_name", mode: :required
592
+ # end
593
+ # t.time_partitioning_type = "DAY"
594
+ # t.time_partitioning_field = "dob"
595
+ # t.clustering_fields = ["last_name", "first_name"]
596
+ # end
597
+ #
598
+ # @example With range partitioning.
599
+ # require "google/cloud/bigquery"
600
+ #
601
+ # bigquery = Google::Cloud::Bigquery.new
602
+ # dataset = bigquery.dataset "my_dataset"
603
+ #
604
+ # table = dataset.create_table "my_table" do |t|
605
+ # t.schema do |schema|
606
+ # schema.integer "my_table_id", mode: :required
607
+ # schema.string "my_table_data", mode: :required
608
+ # end
609
+ # t.range_partitioning_field = "my_table_id"
610
+ # t.range_partitioning_start = 0
611
+ # t.range_partitioning_interval = 10
612
+ # t.range_partitioning_end = 100
613
+ # end
614
+ #
488
615
  # @!group Table
489
616
  #
490
617
  def create_table table_id, name: nil, description: nil
@@ -507,17 +634,19 @@ module Google
507
634
  end
508
635
 
509
636
  ##
510
- # Creates a new [view](https://cloud.google.com/bigquery/docs/views)
511
- # table, which is a virtual table defined by the given SQL query.
637
+ # Creates a new view, which is a virtual table defined by the given SQL query.
512
638
  #
513
- # BigQuery's views are logical views, not materialized views, which
514
- # means that the query that defines the view is re-executed every time
515
- # the view is queried. Queries are billed according to the total amount
639
+ # With BigQuery's logical views, the query that defines the view is re-executed
640
+ # every time the view is queried. Queries are billed according to the total amount
516
641
  # of data in all table fields referenced directly or indirectly by the
517
642
  # top-level query. (See {Table#view?} and {Table#query}.)
518
643
  #
644
+ # For materialized views, see {#create_materialized_view}.
645
+ #
646
+ # @see https://cloud.google.com/bigquery/docs/views Creating views
647
+ #
519
648
  # @param [String] table_id The ID of the view table. The ID must contain
520
- # only letters (a-z, A-Z), numbers (0-9), or underscores (_). The
649
+ # only letters (`[A-Za-z]`), numbers (`[0-9]`), or underscores (`_`). The
521
650
  # maximum length is 1,024 characters.
522
651
  # @param [String] query The query that BigQuery executes when the view
523
652
  # is referenced.
@@ -532,12 +661,20 @@ module Google
532
661
  # SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
533
662
  # dialect. Optional. The default value is false.
534
663
  # @param [Array<String>, String] udfs User-defined function resources
535
- # used in the query. May be either a code resource to load from a
536
- # Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
664
+ # used in a legacy SQL query. May be either a code resource to load from
665
+ # a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
537
666
  # that contains code for a user-defined function (UDF). Providing an
538
667
  # inline code resource is equivalent to providing a URI for a file
539
- # containing the same code. See [User-Defined
540
- # Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
668
+ # containing the same code.
669
+ #
670
+ # This parameter is used for defining User Defined Function (UDF)
671
+ # resources only when using legacy SQL. Users of standard SQL should
672
+ # leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
673
+ # Routines API to define UDF resources.
674
+ #
675
+ # For additional information on migrating, see: [Migrating to
676
+ # standard SQL - Differences in user-defined JavaScript
677
+ # functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
541
678
  #
542
679
  # @return [Google::Cloud::Bigquery::Table] A new table object.
543
680
  #
@@ -548,7 +685,7 @@ module Google
548
685
  # dataset = bigquery.dataset "my_dataset"
549
686
  #
550
687
  # view = dataset.create_view "my_view",
551
- # "SELECT name, age FROM proj.dataset.users"
688
+ # "SELECT name, age FROM proj.dataset.users"
552
689
  #
553
690
  # @example A name and description can be provided:
554
691
  # require "google/cloud/bigquery"
@@ -557,13 +694,18 @@ module Google
557
694
  # dataset = bigquery.dataset "my_dataset"
558
695
  #
559
696
  # view = dataset.create_view "my_view",
560
- # "SELECT name, age FROM proj.dataset.users",
561
- # name: "My View", description: "This is my view"
697
+ # "SELECT name, age FROM proj.dataset.users",
698
+ # name: "My View", description: "This is my view"
562
699
  #
563
700
  # @!group Table
564
701
  #
565
- def create_view table_id, query, name: nil, description: nil,
566
- standard_sql: nil, legacy_sql: nil, udfs: nil
702
+ def create_view table_id,
703
+ query,
704
+ name: nil,
705
+ description: nil,
706
+ standard_sql: nil,
707
+ legacy_sql: nil,
708
+ udfs: nil
567
709
  use_legacy_sql = Convert.resolve_legacy_sql standard_sql, legacy_sql
568
710
  new_view_opts = {
569
711
  table_reference: Google::Apis::BigqueryV2::TableReference.new(
@@ -579,7 +721,81 @@ module Google
579
721
  user_defined_function_resources: udfs_gapi(udfs)
580
722
  )
581
723
  }.delete_if { |_, v| v.nil? }
582
- new_view = Google::Apis::BigqueryV2::Table.new new_view_opts
724
+ new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
725
+
726
+ gapi = service.insert_table dataset_id, new_view
727
+ Table.from_gapi gapi, service
728
+ end
729
+
730
+ ##
731
+ # Creates a new materialized view.
732
+ #
733
+ # Materialized views are precomputed views that periodically cache results of a query for increased performance
734
+ # and efficiency. BigQuery leverages precomputed results from materialized views and whenever possible reads
735
+ # only delta changes from the base table to compute up-to-date results.
736
+ #
737
+ # Queries that use materialized views are generally faster and consume less resources than queries that retrieve
738
+ # the same data only from the base table. Materialized views are helpful to significantly boost performance of
739
+ # workloads that have the characteristic of common and repeated queries.
740
+ #
741
+ # For logical views, see {#create_view}.
742
+ #
743
+ # @see https://cloud.google.com/bigquery/docs/materialized-views-intro Introduction to materialized views
744
+ #
745
+ # @param [String] table_id The ID of the materialized view table. The ID must contain only letters (`[A-Za-z]`),
746
+ # numbers (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
747
+ # @param [String] query The query that BigQuery executes when the materialized view is referenced.
748
+ # @param [String] name A descriptive name for the table.
749
+ # @param [String] description A user-friendly description of the table.
750
+ # @param [Boolean] enable_refresh Enable automatic refresh of the materialized view when the base table is
751
+ # updated. Optional. The default value is true.
752
+ # @param [Integer] refresh_interval_ms The maximum frequency in milliseconds at which this materialized view
753
+ # will be refreshed. Optional. The default value is `1_800_000` (30 minutes).
754
+ #
755
+ # @return [Google::Cloud::Bigquery::Table] A new table object.
756
+ #
757
+ # @example
758
+ # require "google/cloud/bigquery"
759
+ #
760
+ # bigquery = Google::Cloud::Bigquery.new
761
+ # dataset = bigquery.dataset "my_dataset"
762
+ #
763
+ # materialized_view = dataset.create_materialized_view "my_materialized_view",
764
+ # "SELECT name, age FROM proj.dataset.users"
765
+ #
766
+ # @example Automatic refresh can be disabled:
767
+ # require "google/cloud/bigquery"
768
+ #
769
+ # bigquery = Google::Cloud::Bigquery.new
770
+ # dataset = bigquery.dataset "my_dataset"
771
+ #
772
+ # materialized_view = dataset.create_materialized_view "my_materialized_view",
773
+ # "SELECT name, age FROM proj.dataset.users",
774
+ # enable_refresh: false
775
+ #
776
+ # @!group Table
777
+ #
778
+ def create_materialized_view table_id,
779
+ query,
780
+ name: nil,
781
+ description: nil,
782
+ enable_refresh: nil,
783
+ refresh_interval_ms: nil
784
+ new_view_opts = {
785
+ table_reference: Google::Apis::BigqueryV2::TableReference.new(
786
+ project_id: project_id,
787
+ dataset_id: dataset_id,
788
+ table_id: table_id
789
+ ),
790
+ friendly_name: name,
791
+ description: description,
792
+ materialized_view: Google::Apis::BigqueryV2::MaterializedViewDefinition.new(
793
+ enable_refresh: enable_refresh,
794
+ query: query,
795
+ refresh_interval_ms: refresh_interval_ms
796
+ )
797
+ }.delete_if { |_, v| v.nil? }
798
+ new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
583
799
 
584
800
  gapi = service.insert_table dataset_id, new_view
585
801
  Table.from_gapi gapi, service
@@ -593,6 +809,11 @@ module Google
593
809
  # object without verifying that the resource exists on the BigQuery
594
810
  # service. Calls made on this object will raise errors if the resource
595
811
  # does not exist. Default is `false`. Optional.
812
+ # @param [String] view Specifies the view that determines which table information is returned.
813
+ # By default, basic table information and storage statistics (STORAGE_STATS) are returned.
814
+ # Accepted values include `:unspecified`, `:basic`, `:storage`, and
815
+ # `:full`. For more information, see [BigQuery Classes](@todo: Update the link).
816
+ # The default value is the `:unspecified` view type.
596
817
  #
597
818
  # @return [Google::Cloud::Bigquery::Table, nil] Returns `nil` if the
598
819
  # table does not exist.
@@ -615,15 +836,22 @@ module Google
615
836
  #
616
837
  # table = dataset.table "my_table", skip_lookup: true
617
838
  #
839
+ # @example Avoid retrieving transient stats of the table with `view`:
840
+ # require "google/cloud/bigquery"
841
+ #
842
+ # bigquery = Google::Cloud::Bigquery.new
843
+ #
844
+ # dataset = bigquery.dataset "my_dataset"
845
+ #
846
+ # table = dataset.table "my_table", view: "basic"
847
+ #
618
848
  # @!group Table
619
849
  #
620
- def table table_id, skip_lookup: nil
850
+ def table table_id, skip_lookup: nil, view: nil
621
851
  ensure_service!
622
- if skip_lookup
623
- return Table.new_reference project_id, dataset_id, table_id, service
624
- end
625
- gapi = service.get_table dataset_id, table_id
626
- Table.from_gapi gapi, service
852
+ return Table.new_reference project_id, dataset_id, table_id, service if skip_lookup
853
+ gapi = service.get_table dataset_id, table_id, metadata_view: view
854
+ Table.from_gapi gapi, service, metadata_view: view
627
855
  rescue Google::Cloud::NotFoundError
628
856
  nil
629
857
  end
@@ -664,8 +892,7 @@ module Google
664
892
  #
665
893
  def tables token: nil, max: nil
666
894
  ensure_service!
667
- options = { token: token, max: max }
668
- gapi = service.list_tables dataset_id, options
895
+ gapi = service.list_tables dataset_id, token: token, max: max
669
896
  Table::List.from_gapi gapi, service, dataset_id, max
670
897
  end
671
898
 
@@ -703,9 +930,7 @@ module Google
703
930
  #
704
931
  def model model_id, skip_lookup: nil
705
932
  ensure_service!
706
- if skip_lookup
707
- return Model.new_reference project_id, dataset_id, model_id, service
708
- end
933
+ return Model.new_reference project_id, dataset_id, model_id, service if skip_lookup
709
934
  gapi = service.get_model dataset_id, model_id
710
935
  Model.from_gapi_json gapi, service
711
936
  rescue Google::Cloud::NotFoundError
@@ -752,6 +977,174 @@ module Google
752
977
  Model::List.from_gapi gapi, service, dataset_id, max
753
978
  end
754
979
 
980
+ ##
981
+ # Creates a new routine. The following attributes may be set in the yielded block:
982
+ # {Routine::Updater#routine_type=}, {Routine::Updater#language=}, {Routine::Updater#arguments=},
983
+ # {Routine::Updater#return_type=}, {Routine::Updater#imported_libraries=}, {Routine::Updater#body=}, and
984
+ # {Routine::Updater#description=}.
985
+ #
986
+ # @param [String] routine_id The ID of the routine. The ID must contain only
987
+ # letters (`[A-Za-z]`), numbers (`[0-9]`), or underscores (`_`). The maximum length
988
+ # is 256 characters.
989
+ # @yield [routine] A block for setting properties on the routine.
990
+ # @yieldparam [Google::Cloud::Bigquery::Routine::Updater] routine An updater to set additional properties on the
991
+ # routine.
992
+ #
993
+ # @return [Google::Cloud::Bigquery::Routine] A new routine object.
994
+ #
995
+ # @example
996
+ # require "google/cloud/bigquery"
997
+ #
998
+ # bigquery = Google::Cloud::Bigquery.new
999
+ # dataset = bigquery.dataset "my_dataset"
1000
+ #
1001
+ # routine = dataset.create_routine "my_routine" do |r|
1002
+ # r.routine_type = "SCALAR_FUNCTION"
1003
+ # r.language = "SQL"
1004
+ # r.arguments = [
1005
+ # Google::Cloud::Bigquery::Argument.new(name: "x", data_type: "INT64")
1006
+ # ]
1007
+ # r.body = "x * 3"
1008
+ # r.description = "My routine description"
1009
+ # end
1010
+ #
1011
+ # puts routine.routine_id
1012
+ #
1013
+ # @example Extended example:
1014
+ # require "google/cloud/bigquery"
1015
+ #
1016
+ # bigquery = Google::Cloud::Bigquery.new
1017
+ # dataset = bigquery.dataset "my_dataset"
1018
+ # routine = dataset.create_routine "my_routine" do |r|
1019
+ # r.routine_type = "SCALAR_FUNCTION"
1020
+ # r.language = :SQL
1021
+ # r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
1022
+ # r.arguments = [
1023
+ # Google::Cloud::Bigquery::Argument.new(
1024
+ # name: "arr",
1025
+ # argument_kind: "FIXED_TYPE",
1026
+ # data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
1027
+ # type_kind: "ARRAY",
1028
+ # array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
1029
+ # type_kind: "STRUCT",
1030
+ # struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
1031
+ # fields: [
1032
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
1033
+ # name: "name",
1034
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
1035
+ # ),
1036
+ # Google::Cloud::Bigquery::StandardSql::Field.new(
1037
+ # name: "val",
1038
+ # type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
1039
+ # )
1040
+ # ]
1041
+ # )
1042
+ # )
1043
+ # )
1044
+ # )
1045
+ # ]
1046
+ # end
1047
+ #
1048
+ # @!group Routine
1049
+ #
1050
+ def create_routine routine_id
1051
+ ensure_service!
1052
+ new_tb = Google::Apis::BigqueryV2::Routine.new(
1053
+ routine_reference: Google::Apis::BigqueryV2::RoutineReference.new(
1054
+ project_id: project_id, dataset_id: dataset_id, routine_id: routine_id
1055
+ )
1056
+ )
1057
+ updater = Routine::Updater.new new_tb
1058
+
1059
+ yield updater if block_given?
1060
+
1061
+ gapi = service.insert_routine dataset_id, updater.to_gapi
1062
+ Routine.from_gapi gapi, service
1063
+ end
1064
+
1065
+ ##
1066
+ # Retrieves an existing routine by ID.
1067
+ #
1068
+ # @param [String] routine_id The ID of a routine.
1069
+ # @param [Boolean] skip_lookup Optionally create just a local reference
1070
+ # object without verifying that the resource exists on the BigQuery
1071
+ # service. Calls made on this object will raise errors if the resource
1072
+ # does not exist. Default is `false`. Optional.
1073
+ #
1074
+ # @return [Google::Cloud::Bigquery::Routine, nil] Returns `nil` if the
1075
+ # routine does not exist.
1076
+ #
1077
+ # @example
1078
+ # require "google/cloud/bigquery"
1079
+ #
1080
+ # bigquery = Google::Cloud::Bigquery.new
1081
+ # dataset = bigquery.dataset "my_dataset"
1082
+ #
1083
+ # routine = dataset.routine "my_routine"
1084
+ # puts routine.routine_id
1085
+ #
1086
+ # @example Avoid retrieving the routine resource with `skip_lookup`:
1087
+ # require "google/cloud/bigquery"
1088
+ #
1089
+ # bigquery = Google::Cloud::Bigquery.new
1090
+ #
1091
+ # dataset = bigquery.dataset "my_dataset"
1092
+ #
1093
+ # routine = dataset.routine "my_routine", skip_lookup: true
1094
+ #
1095
+ # @!group Routine
1096
+ #
1097
+ def routine routine_id, skip_lookup: nil
1098
+ ensure_service!
1099
+ return Routine.new_reference project_id, dataset_id, routine_id, service if skip_lookup
1100
+ gapi = service.get_routine dataset_id, routine_id
1101
+ Routine.from_gapi gapi, service
1102
+ rescue Google::Cloud::NotFoundError
1103
+ nil
1104
+ end
1105
+
1106
+ ##
1107
+ # Retrieves the list of routines belonging to the dataset.
1108
+ #
1109
+ # @param [String] token A previously-returned page token representing
1110
+ # part of the larger set of results to view.
1111
+ # @param [Integer] max Maximum number of routines to return.
1112
+ # @param [String] filter If set, then only the routines matching this filter are returned. The current supported
1113
+ # form is `routineType:`, with a {Routine#routine_type} enum value. Example: `routineType:SCALAR_FUNCTION`.
1114
+ #
1115
+ # @return [Array<Google::Cloud::Bigquery::Routine>] An array of routines
1116
+ # (See {Google::Cloud::Bigquery::Routine::List})
1117
+ #
1118
+ # @example
1119
+ # require "google/cloud/bigquery"
1120
+ #
1121
+ # bigquery = Google::Cloud::Bigquery.new
1122
+ # dataset = bigquery.dataset "my_dataset"
1123
+ #
1124
+ # routines = dataset.routines
1125
+ # routines.each do |routine|
1126
+ # puts routine.routine_id
1127
+ # end
1128
+ #
1129
+ # @example Retrieve all routines: (See {Routine::List#all})
1130
+ # require "google/cloud/bigquery"
1131
+ #
1132
+ # bigquery = Google::Cloud::Bigquery.new
1133
+ # dataset = bigquery.dataset "my_dataset"
1134
+ #
1135
+ # routines = dataset.routines
1136
+ # routines.all do |routine|
1137
+ # puts routine.routine_id
1138
+ # end
1139
+ #
1140
+ # @!group Routine
1141
+ #
1142
+ def routines token: nil, max: nil, filter: nil
1143
+ ensure_service!
1144
+ gapi = service.list_routines dataset_id, token: token, max: max, filter: filter
1145
+ Routine::List.from_gapi gapi, service, dataset_id, max, filter: filter
1146
+ end
1147
+
755
1148
  ##
756
1149
  # Queries data by creating a [query
757
1150
  # job](https://cloud.google.com/bigquery/docs/query-overview#query_jobs).
@@ -759,27 +1152,6 @@ module Google
759
1152
  # Sets the current dataset as the default dataset in the query. Useful
760
1153
  # for using unqualified table names.
761
1154
  #
762
- # When using standard SQL and passing arguments using `params`, Ruby
763
- # types are mapped to BigQuery types as follows:
764
- #
765
- # | BigQuery | Ruby | Notes |
766
- # |-------------|----------------|---|
767
- # | `BOOL` | `true`/`false` | |
768
- # | `INT64` | `Integer` | |
769
- # | `FLOAT64` | `Float` | |
770
- # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
771
- # | `STRING` | `String` | |
772
- # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
773
- # | `DATE` | `Date` | |
774
- # | `TIMESTAMP` | `Time` | |
775
- # | `TIME` | `Google::Cloud::BigQuery::Time` | |
776
- # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
777
- # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
778
- # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
779
- #
780
- # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
781
- # for an overview of each BigQuery data type, including allowed values.
782
- #
783
1155
  # The geographic location for the job ("US", "EU", etc.) can be set via
784
1156
  # {QueryJob::Updater#location=} in a block passed to this method. If the
785
1157
  # dataset is a full resource representation (see {#resource_full?}), the
@@ -790,13 +1162,60 @@ module Google
790
1162
  # syntax](https://cloud.google.com/bigquery/query-reference), of the
791
1163
  # query to execute. Example: "SELECT count(f1) FROM
792
1164
  # [myProjectId:myDatasetId.myTableId]".
793
- # @param [Array, Hash] params Standard SQL only. Used to pass query
794
- # arguments when the `query` string contains either positional (`?`)
795
- # or named (`@myparam`) query parameters. If value passed is an array
796
- # `["foo"]`, the query must use positional query parameters. If value
797
- # passed is a hash `{ myparam: "foo" }`, the query must use named
798
- # query parameters. When set, `legacy_sql` will automatically be set
799
- # to false and `standard_sql` to true.
1165
+ # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
1166
+ # either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
1167
+ # query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
1168
+ # use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
1169
+ # true.
1170
+ #
1171
+ # BigQuery types are converted from Ruby types as follows:
1172
+ #
1173
+ # | BigQuery | Ruby | Notes |
1174
+ # |--------------|--------------------------------------|----------------------------------------------------|
1175
+ # | `BOOL` | `true`/`false` | |
1176
+ # | `INT64` | `Integer` | |
1177
+ # | `FLOAT64` | `Float` | |
1178
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
1179
+ # | `BIGNUMERIC` | `BigDecimal` | NOT AUTOMATIC: Must be mapped using `types`, below.|
1180
+ # | `STRING` | `String` | |
1181
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
1182
+ # | `DATE` | `Date` | |
1183
+ # | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
1184
+ # | `TIMESTAMP` | `Time` | |
1185
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
1186
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
1187
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
1188
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
1189
+ #
1190
+ # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
1191
+ # of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
1192
+ # GIS data](https://cloud.google.com/bigquery/docs/gis-data).
1193
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
1194
+ # possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
1195
+ # specify the SQL type for these values.
1196
+ #
1197
+ # Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
1198
+ # positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
1199
+ # should be BigQuery type codes from the following list:
1200
+ #
1201
+ # * `:BOOL`
1202
+ # * `:INT64`
1203
+ # * `:FLOAT64`
1204
+ # * `:NUMERIC`
1205
+ # * `:BIGNUMERIC`
1206
+ # * `:STRING`
1207
+ # * `:DATETIME`
1208
+ # * `:DATE`
1209
+ # * `:GEOGRAPHY`
1210
+ # * `:TIMESTAMP`
1211
+ # * `:TIME`
1212
+ # * `:BYTES`
1213
+ # * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
1214
+ # are specified as `[:INT64]`.
1215
+ # * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
1216
+ # match the `params` hash, and the values are the types value that matches the data.
1217
+ #
1218
+ # Types are optional.
800
1219
  # @param [Hash<String|Symbol, External::DataSource>] external A Hash
801
1220
  # that represents the mapping of the external tables to the table
802
1221
  # names used in the SQL query. The hash keys are the table names, and
@@ -855,13 +1274,19 @@ module Google
855
1274
  # Flattens all nested and repeated fields in the query results. The
856
1275
  # default value is `true`. `large_results` parameter must be `true` if
857
1276
  # this is set to `false`.
1277
+ # @param [Integer] maximum_billing_tier Limits the billing tier for this
1278
+ # job. Queries that have resource usage beyond this tier will fail
1279
+ # (without incurring a charge). WARNING: The billed byte amount can be
1280
+ # multiplied by an amount up to this number! Most users should not need
1281
+ # to alter this setting, and we recommend that you avoid introducing new
1282
+ # uses of it. Deprecated.
858
1283
  # @param [Integer] maximum_bytes_billed Limits the bytes billed for this
859
1284
  # job. Queries that will have bytes billed beyond this limit will fail
860
1285
  # (without incurring a charge). Optional. If unspecified, this will be
861
1286
  # set to your project default.
862
1287
  # @param [String] job_id A user-defined ID for the query job. The ID
863
- # must contain only letters (a-z, A-Z), numbers (0-9), underscores
864
- # (_), or dashes (-). The maximum length is 1,024 characters. If
1288
+ # must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
1289
+ # (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
865
1290
  # `job_id` is provided, then `prefix` will not be used.
866
1291
  #
867
1292
  # See [Generating a job
@@ -870,27 +1295,48 @@ module Google
870
1295
  # prepended to a generated value to produce a unique job ID. For
871
1296
  # example, the prefix `daily_import_job_` can be given to generate a
872
1297
  # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
873
- # prefix must contain only letters (a-z, A-Z), numbers (0-9),
874
- # underscores (_), or dashes (-). The maximum length of the entire ID
1298
+ # prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
1299
+ # underscores (`_`), or dashes (`-`). The maximum length of the entire ID
875
1300
  # is 1,024 characters. If `job_id` is provided, then `prefix` will not
876
1301
  # be used.
877
1302
  # @param [Hash] labels A hash of user-provided labels associated with
878
- # the job. You can use these to organize and group your jobs. Label
879
- # keys and values can be no longer than 63 characters, can only
880
- # contain lowercase letters, numeric characters, underscores and
881
- # dashes. International characters are allowed. Label values are
882
- # optional. Label keys must start with a letter and each label in the
883
- # list must have a different key. See [Requirements for
884
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
1303
+ # the job. You can use these to organize and group your jobs.
1304
+ #
1305
+ # The labels applied to a resource must meet the following requirements:
1306
+ #
1307
+ # * Each resource can have multiple labels, up to a maximum of 64.
1308
+ # * Each label must be a key-value pair.
1309
+ # * Keys have a minimum length of 1 character and a maximum length of
1310
+ # 63 characters, and cannot be empty. Values can be empty, and have
1311
+ # a maximum length of 63 characters.
1312
+ # * Keys and values can contain only lowercase letters, numeric characters,
1313
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
1314
+ # international characters are allowed.
1315
+ # * The key portion of a label must be unique. However, you can use the
1316
+ # same key with multiple resources.
1317
+ # * Keys must start with a lowercase letter or international character.
885
1318
  # @param [Array<String>, String] udfs User-defined function resources
886
- # used in the query. May be either a code resource to load from a
887
- # Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
1319
+ # used in a legacy SQL query. May be either a code resource to load from
1320
+ # a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
888
1321
  # that contains code for a user-defined function (UDF). Providing an
889
1322
  # inline code resource is equivalent to providing a URI for a file
890
- # containing the same code. See [User-Defined
891
- # Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
892
- # @param [Integer] maximum_billing_tier Deprecated: Change the billing
893
- # tier to allow high-compute queries.
1323
+ # containing the same code.
1324
+ #
1325
+ # This parameter is used for defining User Defined Function (UDF)
1326
+ # resources only when using legacy SQL. Users of standard SQL should
1327
+ # leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
1328
+ # Routines API to define UDF resources.
1329
+ #
1330
+ # For additional information on migrating, see: [Migrating to
1331
+ # standard SQL - Differences in user-defined JavaScript
1332
+ # functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
1333
+ # @param [Boolean] create_session If true, creates a new session, where the
1334
+ # session ID will be a server generated random id. If false, runs query
1335
+ # with an existing session ID when one is provided in the `session_id`
1336
+ # param, otherwise runs query in non-session mode. See {Job#session_id}.
1337
+ # The default value is false.
1338
+ # @param [String] session_id The ID of an existing session. See also the
1339
+ # `create_session` param and {Job#session_id}.
894
1340
  # @yield [job] a job configuration object
895
1341
  # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
896
1342
  # configuration object for setting additional options for the query.
@@ -960,32 +1406,62 @@ module Google
960
1406
  # end
961
1407
  # end
962
1408
  #
1409
+ # @example Query using named query parameters with types:
1410
+ # require "google/cloud/bigquery"
1411
+ #
1412
+ # bigquery = Google::Cloud::Bigquery.new
1413
+ # dataset = bigquery.dataset "my_dataset"
1414
+ #
1415
+ # job = dataset.query_job "SELECT name FROM my_table WHERE id IN UNNEST(@ids)",
1416
+ # params: { ids: [] },
1417
+ # types: { ids: [:INT64] }
1418
+ #
1419
+ # job.wait_until_done!
1420
+ # if !job.failed?
1421
+ # job.data.each do |row|
1422
+ # puts row[:name]
1423
+ # end
1424
+ # end
1425
+ #
963
1426
  # @example Execute a DDL statement:
964
1427
  # require "google/cloud/bigquery"
965
1428
  #
966
1429
  # bigquery = Google::Cloud::Bigquery.new
1430
+ # dataset = bigquery.dataset "my_dataset"
967
1431
  #
968
- # job = bigquery.query_job "CREATE TABLE my_table (x INT64)"
1432
+ # job = dataset.query_job "CREATE TABLE my_table (x INT64)"
969
1433
  #
970
1434
  # job.wait_until_done!
971
1435
  # if !job.failed?
972
- # table_ref = job.ddl_target_table
1436
+ # table_ref = job.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
973
1437
  # end
974
1438
  #
975
1439
  # @example Execute a DML statement:
976
1440
  # require "google/cloud/bigquery"
977
1441
  #
978
1442
  # bigquery = Google::Cloud::Bigquery.new
1443
+ # dataset = bigquery.dataset "my_dataset"
979
1444
  #
980
- # job = bigquery.query_job "UPDATE my_table " \
981
- # "SET x = x + 1 " \
982
- # "WHERE x IS NOT NULL"
1445
+ # job = dataset.query_job "UPDATE my_table SET x = x + 1 WHERE x IS NOT NULL"
983
1446
  #
984
1447
  # job.wait_until_done!
985
1448
  # if !job.failed?
986
1449
  # puts job.num_dml_affected_rows
987
1450
  # end
988
1451
  #
1452
+ # @example Run query in a session:
1453
+ # require "google/cloud/bigquery"
1454
+ #
1455
+ # bigquery = Google::Cloud::Bigquery.new
1456
+ # dataset = bigquery.dataset "my_dataset"
1457
+ #
1458
+ # job = dataset.query_job "CREATE TEMPORARY TABLE temptable AS SELECT 17 as foo", create_session: true
1459
+ #
1460
+ # job.wait_until_done!
1461
+ #
1462
+ # session_id = job.session_id
1463
+ # data = dataset.query "SELECT * FROM temptable", session_id: session_id
1464
+ #
989
1465
  # @example Query using external data source, set destination:
990
1466
  # require "google/cloud/bigquery"
991
1467
  #
@@ -1012,21 +1488,52 @@ module Google
1012
1488
  #
1013
1489
  # @!group Data
1014
1490
  #
1015
- def query_job query, params: nil, external: nil,
1016
- priority: "INTERACTIVE", cache: true, table: nil,
1017
- create: nil, write: nil, dryrun: nil, standard_sql: nil,
1018
- legacy_sql: nil, large_results: nil, flatten: nil,
1019
- maximum_billing_tier: nil, maximum_bytes_billed: nil,
1020
- job_id: nil, prefix: nil, labels: nil, udfs: nil
1491
+ def query_job query,
1492
+ params: nil,
1493
+ types: nil,
1494
+ external: nil,
1495
+ priority: "INTERACTIVE",
1496
+ cache: true,
1497
+ table: nil,
1498
+ create: nil,
1499
+ write: nil,
1500
+ dryrun: nil,
1501
+ standard_sql: nil,
1502
+ legacy_sql: nil,
1503
+ large_results: nil,
1504
+ flatten: nil,
1505
+ maximum_billing_tier: nil,
1506
+ maximum_bytes_billed: nil,
1507
+ job_id: nil,
1508
+ prefix: nil,
1509
+ labels: nil,
1510
+ udfs: nil,
1511
+ create_session: nil,
1512
+ session_id: nil
1021
1513
  ensure_service!
1022
- options = { priority: priority, cache: cache, table: table,
1023
- create: create, write: write, dryrun: dryrun,
1024
- large_results: large_results, flatten: flatten,
1025
- legacy_sql: legacy_sql, standard_sql: standard_sql,
1026
- maximum_billing_tier: maximum_billing_tier,
1027
- maximum_bytes_billed: maximum_bytes_billed,
1028
- job_id: job_id, prefix: prefix, params: params,
1029
- external: external, labels: labels, udfs: udfs }
1514
+ options = {
1515
+ params: params,
1516
+ types: types,
1517
+ external: external,
1518
+ priority: priority,
1519
+ cache: cache,
1520
+ table: table,
1521
+ create: create,
1522
+ write: write,
1523
+ dryrun: dryrun,
1524
+ standard_sql: standard_sql,
1525
+ legacy_sql: legacy_sql,
1526
+ large_results: large_results,
1527
+ flatten: flatten,
1528
+ maximum_billing_tier: maximum_billing_tier,
1529
+ maximum_bytes_billed: maximum_bytes_billed,
1530
+ job_id: job_id,
1531
+ prefix: prefix,
1532
+ labels: labels,
1533
+ udfs: udfs,
1534
+ create_session: create_session,
1535
+ session_id: session_id
1536
+ }
1030
1537
 
1031
1538
  updater = QueryJob::Updater.from_options service, query, options
1032
1539
  updater.dataset = self
@@ -1048,27 +1555,6 @@ module Google
1048
1555
  # Sets the current dataset as the default dataset in the query. Useful
1049
1556
  # for using unqualified table names.
1050
1557
  #
1051
- # When using standard SQL and passing arguments using `params`, Ruby
1052
- # types are mapped to BigQuery types as follows:
1053
- #
1054
- # | BigQuery | Ruby | Notes |
1055
- # |-------------|----------------|---|
1056
- # | `BOOL` | `true`/`false` | |
1057
- # | `INT64` | `Integer` | |
1058
- # | `FLOAT64` | `Float` | |
1059
- # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
1060
- # | `STRING` | `String` | |
1061
- # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
1062
- # | `DATE` | `Date` | |
1063
- # | `TIMESTAMP` | `Time` | |
1064
- # | `TIME` | `Google::Cloud::BigQuery::Time` | |
1065
- # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
1066
- # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
1067
- # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
1068
- #
1069
- # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
1070
- # for an overview of each BigQuery data type, including allowed values.
1071
- #
1072
1558
  # The geographic location for the job ("US", "EU", etc.) can be set via
1073
1559
  # {QueryJob::Updater#location=} in a block passed to this method. If the
1074
1560
  # dataset is a full resource representation (see {#resource_full?}), the
@@ -1081,13 +1567,60 @@ module Google
1081
1567
  # syntax](https://cloud.google.com/bigquery/query-reference), of the
1082
1568
  # query to execute. Example: "SELECT count(f1) FROM
1083
1569
  # [myProjectId:myDatasetId.myTableId]".
1084
- # @param [Array, Hash] params Standard SQL only. Used to pass query
1085
- # arguments when the `query` string contains either positional (`?`)
1086
- # or named (`@myparam`) query parameters. If value passed is an array
1087
- # `["foo"]`, the query must use positional query parameters. If value
1088
- # passed is a hash `{ myparam: "foo" }`, the query must use named
1089
- # query parameters. When set, `legacy_sql` will automatically be set
1090
- # to false and `standard_sql` to true.
1570
+ # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
1571
+ # either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
1572
+ # query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
1573
+ # use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
1574
+ # true.
1575
+ #
1576
+ # BigQuery types are converted from Ruby types as follows:
1577
+ #
1578
+ # | BigQuery | Ruby | Notes |
1579
+ # |--------------|--------------------------------------|----------------------------------------------------|
1580
+ # | `BOOL` | `true`/`false` | |
1581
+ # | `INT64` | `Integer` | |
1582
+ # | `FLOAT64` | `Float` | |
1583
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
1584
+ # | `BIGNUMERIC` | `BigDecimal` | NOT AUTOMATIC: Must be mapped using `types`, below.|
1585
+ # | `STRING` | `String` | |
1586
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
1587
+ # | `DATE` | `Date` | |
1588
+ # | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
1589
+ # | `TIMESTAMP` | `Time` | |
1590
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
1591
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
1592
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
1593
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
1594
+ #
1595
+ # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
1596
+ # of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
1597
+ # GIS data](https://cloud.google.com/bigquery/docs/gis-data).
1598
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
1599
+ # possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
1600
+ # specify the SQL type for these values.
1601
+ #
1602
+ # Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
1603
+ # positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
1604
+ # should be BigQuery type codes from the following list:
1605
+ #
1606
+ # * `:BOOL`
1607
+ # * `:INT64`
1608
+ # * `:FLOAT64`
1609
+ # * `:NUMERIC`
1610
+ # * `:BIGNUMERIC`
1611
+ # * `:STRING`
1612
+ # * `:DATETIME`
1613
+ # * `:DATE`
1614
+ # * `:GEOGRAPHY`
1615
+ # * `:TIMESTAMP`
1616
+ # * `:TIME`
1617
+ # * `:BYTES`
1618
+ # * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
1619
+ # are specified as `[:INT64]`.
1620
+ # * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
1621
+ # match the `params` hash, and the values are the types value that matches the data.
1622
+ #
1623
+ # Types are optional.
1091
1624
  # @param [Hash<String|Symbol, External::DataSource>] external A Hash
1092
1625
  # that represents the mapping of the external tables to the table
1093
1626
  # names used in the SQL query. The hash keys are the table names, and
@@ -1122,6 +1655,8 @@ module Google
1122
1655
  # When set to false, the values of `large_results` and `flatten` are
1123
1656
  # ignored; the query will be run as if `large_results` is true and
1124
1657
  # `flatten` is false. Optional. The default value is false.
1658
+ # @param [String] session_id The ID of an existing session. See the
1659
+ # `create_session` param in {#query_job} and {Job#session_id}.
1125
1660
  # @yield [job] a job configuration object
1126
1661
  # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
1127
1662
  # configuration object for setting additional options for the query.
@@ -1136,9 +1671,12 @@ module Google
1136
1671
  #
1137
1672
  # data = dataset.query "SELECT name FROM my_table"
1138
1673
  #
1674
+ # # Iterate over the first page of results
1139
1675
  # data.each do |row|
1140
1676
  # puts row[:name]
1141
1677
  # end
1678
+ # # Retrieve the next page of results
1679
+ # data = data.next if data.next?
1142
1680
  #
1143
1681
  # @example Query using legacy SQL:
1144
1682
  # require "google/cloud/bigquery"
@@ -1149,9 +1687,12 @@ module Google
1149
1687
  # data = dataset.query "SELECT name FROM my_table",
1150
1688
  # legacy_sql: true
1151
1689
  #
1690
+ # # Iterate over the first page of results
1152
1691
  # data.each do |row|
1153
1692
  # puts row[:name]
1154
1693
  # end
1694
+ # # Retrieve the next page of results
1695
+ # data = data.next if data.next?
1155
1696
  #
1156
1697
  # @example Query using positional query parameters:
1157
1698
  # require "google/cloud/bigquery"
@@ -1162,9 +1703,12 @@ module Google
1162
1703
  # data = dataset.query "SELECT name FROM my_table WHERE id = ?",
1163
1704
  # params: [1]
1164
1705
  #
1706
+ # # Iterate over the first page of results
1165
1707
  # data.each do |row|
1166
1708
  # puts row[:name]
1167
1709
  # end
1710
+ # # Retrieve the next page of results
1711
+ # data = data.next if data.next?
1168
1712
  #
1169
1713
  # @example Query using named query parameters:
1170
1714
  # require "google/cloud/bigquery"
@@ -1175,30 +1719,63 @@ module Google
1175
1719
  # data = dataset.query "SELECT name FROM my_table WHERE id = @id",
1176
1720
  # params: { id: 1 }
1177
1721
  #
1722
+ # # Iterate over the first page of results
1723
+ # data.each do |row|
1724
+ # puts row[:name]
1725
+ # end
1726
+ # # Retrieve the next page of results
1727
+ # data = data.next if data.next?
1728
+ #
1729
+ # @example Query using named query parameters with types:
1730
+ # require "google/cloud/bigquery"
1731
+ #
1732
+ # bigquery = Google::Cloud::Bigquery.new
1733
+ # dataset = bigquery.dataset "my_dataset"
1734
+ #
1735
+ # data = dataset.query "SELECT name FROM my_table WHERE id IN UNNEST(@ids)",
1736
+ # params: { ids: [] },
1737
+ # types: { ids: [:INT64] }
1738
+ #
1739
+ # # Iterate over the first page of results
1178
1740
  # data.each do |row|
1179
1741
  # puts row[:name]
1180
1742
  # end
1743
+ # # Retrieve the next page of results
1744
+ # data = data.next if data.next?
1181
1745
  #
1182
1746
  # @example Execute a DDL statement:
1183
1747
  # require "google/cloud/bigquery"
1184
1748
  #
1185
1749
  # bigquery = Google::Cloud::Bigquery.new
1750
+ # dataset = bigquery.dataset "my_dataset"
1186
1751
  #
1187
- # data = bigquery.query "CREATE TABLE my_table (x INT64)"
1752
+ # data = dataset.query "CREATE TABLE my_table (x INT64)"
1188
1753
  #
1189
- # table_ref = data.ddl_target_table
1754
+ # table_ref = data.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
1190
1755
  #
1191
1756
  # @example Execute a DML statement:
1192
1757
  # require "google/cloud/bigquery"
1193
1758
  #
1194
1759
  # bigquery = Google::Cloud::Bigquery.new
1760
+ # dataset = bigquery.dataset "my_dataset"
1195
1761
  #
1196
- # data = bigquery.query "UPDATE my_table " \
1197
- # "SET x = x + 1 " \
1198
- # "WHERE x IS NOT NULL"
1762
+ # data = dataset.query "UPDATE my_table SET x = x + 1 WHERE x IS NOT NULL"
1199
1763
  #
1200
1764
  # puts data.num_dml_affected_rows
1201
1765
  #
1766
+ # @example Run query in a session:
1767
+ # require "google/cloud/bigquery"
1768
+ #
1769
+ # bigquery = Google::Cloud::Bigquery.new
1770
+ # dataset = bigquery.dataset "my_dataset"
1771
+ #
1772
+ # job = dataset.query_job "CREATE TEMPORARY TABLE temptable AS SELECT 17 as foo", create_session: true
1773
+ #
1774
+ # job.wait_until_done!
1775
+ #
1776
+ # session_id = job.session_id
1777
+ # data = dataset.query "SELECT * FROM temptable", session_id: session_id
1778
+ #
1202
1779
  # @example Query using external data source, set destination:
1203
1780
  # require "google/cloud/bigquery"
1204
1781
  #
@@ -1216,17 +1793,34 @@ module Google
1216
1793
  # query.table = dataset.table "my_table", skip_lookup: true
1217
1794
  # end
1218
1795
  #
1796
+ # # Iterate over the first page of results
1219
1797
  # data.each do |row|
1220
1798
  # puts row[:name]
1221
1799
  # end
1800
+ # # Retrieve the next page of results
1801
+ # data = data.next if data.next?
1222
1802
  #
1223
1803
  # @!group Data
1224
1804
  #
1225
- def query query, params: nil, external: nil, max: nil, cache: true,
1226
- standard_sql: nil, legacy_sql: nil, &block
1227
- job = query_job query, params: params, external: external,
1228
- cache: cache, standard_sql: standard_sql,
1229
- legacy_sql: legacy_sql, &block
1805
+ def query query,
1806
+ params: nil,
1807
+ types: nil,
1808
+ external: nil,
1809
+ max: nil,
1810
+ cache: true,
1811
+ standard_sql: nil,
1812
+ legacy_sql: nil,
1813
+ session_id: nil,
1814
+ &block
1815
+ job = query_job query,
1816
+ params: params,
1817
+ types: types,
1818
+ external: external,
1819
+ cache: cache,
1820
+ standard_sql: standard_sql,
1821
+ legacy_sql: legacy_sql,
1822
+ session_id: session_id,
1823
+ &block
1230
1824
  job.wait_until_done!
1231
1825
  ensure_job_succeeded! job
1232
1826
 
@@ -1252,7 +1846,7 @@ module Google
1252
1846
  # The following values are supported:
1253
1847
  #
1254
1848
  # * `csv` - CSV
1255
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1849
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
1256
1850
  # * `avro` - [Avro](http://avro.apache.org/)
1257
1851
  # * `sheets` - Google Sheets
1258
1852
  # * `datastore_backup` - Cloud Datastore backup
@@ -1315,7 +1909,7 @@ module Google
1315
1909
  # The following values are supported:
1316
1910
  #
1317
1911
  # * `csv` - CSV
1318
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1912
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
1319
1913
  # * `avro` - [Avro](http://avro.apache.org/)
1320
1914
  # * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
1321
1915
  # * `parquet` - [Parquet](https://parquet.apache.org/)
@@ -1407,8 +2001,8 @@ module Google
1407
2001
  # this option. Also note that for most use cases, the block yielded by
1408
2002
  # this method is a more convenient way to configure the schema.
1409
2003
  # @param [String] job_id A user-defined ID for the load job. The ID
1410
- # must contain only letters (a-z, A-Z), numbers (0-9), underscores
1411
- # (_), or dashes (-). The maximum length is 1,024 characters. If
2004
+ # must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
2005
+ # (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
1412
2006
  # `job_id` is provided, then `prefix` will not be used.
1413
2007
  #
1414
2008
  # See [Generating a job
@@ -1417,18 +2011,26 @@ module Google
1417
2011
  # prepended to a generated value to produce a unique job ID. For
1418
2012
  # example, the prefix `daily_import_job_` can be given to generate a
1419
2013
  # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
1420
- # prefix must contain only letters (a-z, A-Z), numbers (0-9),
1421
- # underscores (_), or dashes (-). The maximum length of the entire ID
2014
+ # prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
2015
+ # underscores (`_`), or dashes (`-`). The maximum length of the entire ID
1422
2016
  # is 1,024 characters. If `job_id` is provided, then `prefix` will not
1423
2017
  # be used.
1424
2018
  # @param [Hash] labels A hash of user-provided labels associated with
1425
- # the job. You can use these to organize and group your jobs. Label
1426
- # keys and values can be no longer than 63 characters, can only
1427
- # contain lowercase letters, numeric characters, underscores and
1428
- # dashes. International characters are allowed. Label values are
1429
- # optional. Label keys must start with a letter and each label in the
1430
- # list must have a different key. See [Requirements for
1431
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
2019
+ # the job. You can use these to organize and group your jobs.
2020
+ #
2021
+ # The labels applied to a resource must meet the following requirements:
2022
+ #
2023
+ # * Each resource can have multiple labels, up to a maximum of 64.
2024
+ # * Each label must be a key-value pair.
2025
+ # * Keys have a minimum length of 1 character and a maximum length of
2026
+ # 63 characters, and cannot be empty. Values can be empty, and have
2027
+ # a maximum length of 63 characters.
2028
+ # * Keys and values can contain only lowercase letters, numeric characters,
2029
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
2030
+ # international characters are allowed.
2031
+ # * The key portion of a label must be unique. However, you can use the
2032
+ # same key with multiple resources.
2033
+ # * Keys must start with a lowercase letter or international character.
1432
2034
  # @yield [updater] A block for setting the schema and other
1433
2035
  # options for the destination table. The schema can be omitted if the
1434
2036
  # destination table already exists, or if you're loading data from a
@@ -1522,29 +2124,19 @@ module Google
1522
2124
  #
1523
2125
  # @!group Data
1524
2126
  #
1525
- def load_job table_id, files, format: nil, create: nil, write: nil,
1526
- projection_fields: nil, jagged_rows: nil,
1527
- quoted_newlines: nil, encoding: nil, delimiter: nil,
1528
- ignore_unknown: nil, max_bad_records: nil, quote: nil,
1529
- skip_leading: nil, schema: nil, job_id: nil, prefix: nil,
1530
- labels: nil, autodetect: nil, null_marker: nil, dryrun: nil
2127
+ def load_job table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
2128
+ quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
2129
+ quote: nil, skip_leading: nil, schema: nil, job_id: nil, prefix: nil, labels: nil, autodetect: nil,
2130
+ null_marker: nil, dryrun: nil
1531
2131
  ensure_service!
1532
2132
 
1533
2133
  updater = load_job_updater table_id,
1534
- format: format, create: create,
1535
- write: write,
1536
- projection_fields: projection_fields,
1537
- jagged_rows: jagged_rows,
1538
- quoted_newlines: quoted_newlines,
1539
- encoding: encoding,
1540
- delimiter: delimiter,
1541
- ignore_unknown: ignore_unknown,
1542
- max_bad_records: max_bad_records,
1543
- quote: quote, skip_leading: skip_leading,
1544
- dryrun: dryrun, schema: schema,
1545
- job_id: job_id, prefix: prefix,
1546
- labels: labels, autodetect: autodetect,
1547
- null_marker: null_marker
2134
+ format: format, create: create, write: write, projection_fields: projection_fields,
2135
+ jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
2136
+ delimiter: delimiter, ignore_unknown: ignore_unknown,
2137
+ max_bad_records: max_bad_records, quote: quote, skip_leading: skip_leading,
2138
+ dryrun: dryrun, schema: schema, job_id: job_id, prefix: prefix, labels: labels,
2139
+ autodetect: autodetect, null_marker: null_marker
1548
2140
 
1549
2141
  yield updater if block_given?
1550
2142
 
@@ -1579,7 +2171,7 @@ module Google
1579
2171
  # The following values are supported:
1580
2172
  #
1581
2173
  # * `csv` - CSV
1582
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
2174
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
1583
2175
  # * `avro` - [Avro](http://avro.apache.org/)
1584
2176
  # * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
1585
2177
  # * `parquet` - [Parquet](https://parquet.apache.org/)
@@ -1760,21 +2352,14 @@ module Google
1760
2352
  #
1761
2353
  # @!group Data
1762
2354
  #
1763
- def load table_id, files, format: nil, create: nil, write: nil,
1764
- projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
1765
- encoding: nil, delimiter: nil, ignore_unknown: nil,
1766
- max_bad_records: nil, quote: nil, skip_leading: nil,
1767
- schema: nil, autodetect: nil, null_marker: nil, &block
2355
+ def load table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
2356
+ quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
2357
+ quote: nil, skip_leading: nil, schema: nil, autodetect: nil, null_marker: nil, &block
1768
2358
  job = load_job table_id, files,
1769
- format: format, create: create, write: write,
1770
- projection_fields: projection_fields,
1771
- jagged_rows: jagged_rows,
1772
- quoted_newlines: quoted_newlines,
1773
- encoding: encoding, delimiter: delimiter,
1774
- ignore_unknown: ignore_unknown,
1775
- max_bad_records: max_bad_records,
1776
- quote: quote, skip_leading: skip_leading,
1777
- schema: schema, autodetect: autodetect,
2359
+ format: format, create: create, write: write, projection_fields: projection_fields,
2360
+ jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
2361
+ delimiter: delimiter, ignore_unknown: ignore_unknown, max_bad_records: max_bad_records,
2362
+ quote: quote, skip_leading: skip_leading, schema: schema, autodetect: autodetect,
1778
2363
  null_marker: null_marker, &block
1779
2364
 
1780
2365
  job.wait_until_done!
@@ -1825,7 +2410,7 @@ module Google
1825
2410
  # dataset = bigquery.dataset "my_dataset", skip_lookup: true
1826
2411
  # dataset.exists? # true
1827
2412
  #
1828
- def exists? force: nil
2413
+ def exists? force: false
1829
2414
  return gapi_exists? if force
1830
2415
  # If we have a memoized value, return it
1831
2416
  return @exists unless @exists.nil?
@@ -1935,14 +2520,12 @@ module Google
1935
2520
  end
1936
2521
 
1937
2522
  ##
1938
- # @private New lazy Dataset object without making an HTTP request.
2523
+ # @private New lazy Dataset object without making an HTTP request, for use with the skip_lookup option.
1939
2524
  def self.new_reference project_id, dataset_id, service
1940
2525
  raise ArgumentError, "dataset_id is required" unless dataset_id
1941
2526
  new.tap do |b|
1942
- reference_gapi = Google::Apis::BigqueryV2::DatasetReference.new(
1943
- project_id: project_id,
1944
- dataset_id: dataset_id
1945
- )
2527
+ reference_gapi = Google::Apis::BigqueryV2::DatasetReference.new \
2528
+ project_id: project_id, dataset_id: dataset_id
1946
2529
  b.service = service
1947
2530
  b.instance_variable_set :@reference, reference_gapi
1948
2531
  end
@@ -1953,18 +2536,47 @@ module Google
1953
2536
  # the need to complete a load operation before the data can appear in
1954
2537
  # query results.
1955
2538
  #
2539
+ # Simple Ruby types are generally accepted per JSON rules, along with the following support for BigQuery's more
2540
+ # complex types:
2541
+ #
2542
+ # | BigQuery | Ruby | Notes |
2543
+ # |--------------|--------------------------------------|----------------------------------------------------|
2544
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
2545
+ # | `BIGNUMERIC` | `String` | Pass as `String` to avoid rounding to scale 9. |
2546
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
2547
+ # | `DATE` | `Date` | |
2548
+ # | `GEOGRAPHY` | `String` | |
2549
+ # | `TIMESTAMP` | `Time` | |
2550
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
2551
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
2552
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
2553
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
2554
+ #
2555
+ # Because BigQuery's streaming API is designed for high insertion rates,
2556
+ # modifications to the underlying table metadata are eventually
2557
+ # consistent when interacting with the streaming system. In most cases
2558
+ # metadata changes are propagated within minutes, but during this period
2559
+ # API responses may reflect the inconsistent state of the table.
2560
+ #
1956
2561
  # @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
1957
2562
  # Streaming Data Into BigQuery
1958
2563
  #
2564
+ # @see https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts
2565
+ # BigQuery Troubleshooting: Metadata errors for streaming inserts
2566
+ #
1959
2567
  # @param [String] table_id The ID of the destination table.
1960
2568
  # @param [Hash, Array<Hash>] rows A hash object or array of hash objects
1961
- # containing the data. Required.
1962
- # @param [Array<String>] insert_ids A unique ID for each row. BigQuery
1963
- # uses this property to detect duplicate insertion requests on a
1964
- # best-effort basis. For more information, see [data
1965
- # consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency).
1966
- # Optional. If not provided, the client library will assign a UUID to
1967
- # each row before the request is sent.
2569
+ # containing the data. Required. `BigDecimal` values will be rounded to
2570
+ # scale 9 to conform with the BigQuery `NUMERIC` data type. To avoid
2571
+ # rounding `BIGNUMERIC` type values with scale greater than 9, use `String`
2572
+ # instead of `BigDecimal`.
2573
+ # @param [Array<String|Symbol>, Symbol] insert_ids A unique ID for each row. BigQuery uses this property to
2574
+ # detect duplicate insertion requests on a best-effort basis. For more information, see [data
2575
+ # consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency). Optional. If
2576
+ # not provided, the client library will assign a UUID to each row before the request is sent.
2577
+ #
2578
+ # The value `:skip` can be provided to skip the generation of IDs for all rows, or to skip the generation of an
2579
+ # ID for a specific row in the array.
1968
2580
  # @param [Boolean] skip_invalid Insert all valid rows of a request, even
1969
2581
  # if invalid rows exist. The default value is `false`, which causes
1970
2582
  # the entire request to fail if any invalid rows exist.
@@ -1975,6 +2587,12 @@ module Google
1975
2587
  # a new table with the given `table_id`, if no table is found for
1976
2588
  # `table_id`. The default value is false.
1977
2589
  #
2590
+ # @yield [table] a block for setting the table
2591
+ # @yieldparam [Google::Cloud::Bigquery::Table::Updater] table An updater
2592
+ # to set additional properties on the table in the API request to
2593
+ # create it. Only used when `autocreate` is set and the table does not
2594
+ # already exist.
2595
+ #
1978
2596
  # @return [Google::Cloud::Bigquery::InsertResponse] An insert response
1979
2597
  # object.
1980
2598
  #
@@ -2018,42 +2636,36 @@ module Google
2018
2636
  # t.schema.integer "age", mode: :required
2019
2637
  # end
2020
2638
  #
2639
+ # @example Pass `BIGNUMERIC` value as a string to avoid rounding to scale 9 in the conversion from `BigDecimal`:
2640
+ # require "google/cloud/bigquery"
2641
+ #
2642
+ # bigquery = Google::Cloud::Bigquery.new
2643
+ # dataset = bigquery.dataset "my_dataset"
2644
+ #
2645
+ # row = {
2646
+ # "my_numeric" => BigDecimal("123456798.987654321"),
2647
+ # "my_bignumeric" => "123456798.98765432100001" # BigDecimal would be rounded, use String instead!
2648
+ # }
2649
+ # dataset.insert "my_table", row
2650
+ #
2021
2651
  # @!group Data
2022
2652
  #
2023
- def insert table_id, rows, insert_ids: nil, skip_invalid: nil,
2024
- ignore_unknown: nil, autocreate: nil
2653
+ def insert table_id, rows, insert_ids: nil, skip_invalid: nil, ignore_unknown: nil, autocreate: nil, &block
2025
2654
  rows = [rows] if rows.is_a? Hash
2655
+ raise ArgumentError, "No rows provided" if rows.empty?
2656
+
2657
+ insert_ids = Array.new(rows.count) { :skip } if insert_ids == :skip
2026
2658
  insert_ids = Array insert_ids
2027
- if insert_ids.count > 0 && insert_ids.count != rows.count
2659
+ if insert_ids.count.positive? && insert_ids.count != rows.count
2028
2660
  raise ArgumentError, "insert_ids must be the same size as rows"
2029
2661
  end
2030
2662
 
2031
2663
  if autocreate
2032
- begin
2033
- insert_data table_id, rows, skip_invalid: skip_invalid,
2034
- ignore_unknown: ignore_unknown,
2035
- insert_ids: insert_ids
2036
- rescue Google::Cloud::NotFoundError
2037
- sleep rand(1..60)
2038
- begin
2039
- create_table table_id do |tbl_updater|
2040
- yield tbl_updater if block_given?
2041
- end
2042
- # rubocop:disable Lint/HandleExceptions
2043
- rescue Google::Cloud::AlreadyExistsError
2044
- end
2045
- # rubocop:enable Lint/HandleExceptions
2046
-
2047
- sleep 60
2048
- insert table_id, rows, skip_invalid: skip_invalid,
2049
- ignore_unknown: ignore_unknown,
2050
- autocreate: true,
2051
- insert_ids: insert_ids
2052
- end
2664
+ insert_data_with_autocreate table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown,
2665
+ insert_ids: insert_ids, &block
2053
2666
  else
2054
- insert_data table_id, rows, skip_invalid: skip_invalid,
2055
- ignore_unknown: ignore_unknown,
2056
- insert_ids: insert_ids
2667
+ insert_data table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown,
2668
+ insert_ids: insert_ids
2057
2669
  end
2058
2670
  end
2059
2671
 
@@ -2076,6 +2688,11 @@ module Google
2076
2688
  # messages before the batch is published. Default is 10.
2077
2689
  # @attr_reader [Numeric] threads The number of threads used to insert
2078
2690
  # batches of rows. Default is 4.
2691
+ # @param [String] view Specifies the view that determines which table information is returned.
2692
+ # By default, basic table information and storage statistics (STORAGE_STATS) are returned.
2693
+ # Accepted values include `:unspecified`, `:basic`, `:storage`, and
2694
+ # `:full`. For more information, see [BigQuery Classes](@todo: Update the link).
2695
+ # The default value is the `:unspecified` view type.
2079
2696
  # @yield [response] the callback for when a batch of rows is inserted
2080
2697
  # @yieldparam [Table::AsyncInserter::Result] result the result of the
2081
2698
  # asynchronous insert
@@ -2104,14 +2721,35 @@ module Google
2104
2721
  #
2105
2722
  # inserter.stop.wait!
2106
2723
  #
2107
- def insert_async table_id, skip_invalid: nil, ignore_unknown: nil,
2108
- max_bytes: 10000000, max_rows: 500, interval: 10,
2109
- threads: 4, &block
2724
+ # @example Avoid retrieving transient stats of the table with while inserting :
2725
+ # require "google/cloud/bigquery"
2726
+ #
2727
+ # bigquery = Google::Cloud::Bigquery.new
2728
+ # dataset = bigquery.dataset "my_dataset"
2729
+ # inserter = dataset.insert_async("my_table", view: "basic") do |result|
2730
+ # if result.error?
2731
+ # log_error result.error
2732
+ # else
2733
+ # log_insert "inserted #{result.insert_count} rows " \
2734
+ # "with #{result.error_count} errors"
2735
+ # end
2736
+ # end
2737
+ #
2738
+ # rows = [
2739
+ # { "first_name" => "Alice", "age" => 21 },
2740
+ # { "first_name" => "Bob", "age" => 22 }
2741
+ # ]
2742
+ # inserter.insert rows
2743
+ #
2744
+ # inserter.stop.wait!
2745
+ #
2746
+ def insert_async table_id, skip_invalid: nil, ignore_unknown: nil, max_bytes: 10_000_000, max_rows: 500,
2747
+ interval: 10, threads: 4, view: nil, &block
2110
2748
  ensure_service!
2111
2749
 
2112
2750
  # Get table, don't use Dataset#table which handles NotFoundError
2113
- gapi = service.get_table dataset_id, table_id
2114
- table = Table.from_gapi gapi, service
2751
+ gapi = service.get_table dataset_id, table_id, metadata_view: view
2752
+ table = Table.from_gapi gapi, service, metadata_view: view
2115
2753
  # Get the AsyncInserter from the table
2116
2754
  table.insert_async skip_invalid: skip_invalid,
2117
2755
  ignore_unknown: ignore_unknown,
@@ -2119,17 +2757,53 @@ module Google
2119
2757
  interval: interval, threads: threads, &block
2120
2758
  end
2121
2759
 
2760
+ ##
2761
+ # Build an object of type Google::Apis::BigqueryV2::DatasetAccessEntry from
2762
+ # the self.
2763
+ #
2764
+ # @param [Array<String>] target_types The list of target types within the dataset.
2765
+ #
2766
+ # @return [Google::Apis::BigqueryV2::DatasetAccessEntry] Returns a DatasetAccessEntry object.
2767
+ #
2768
+ # @example
2769
+ # require "google/cloud/bigquery"
2770
+ #
2771
+ # bigquery = Google::Cloud::Bigquery.new
2772
+ # dataset = bigquery.dataset "my_dataset"
2773
+ # dataset_access_entry = dataset.access_entry target_types: ["VIEWS"]
2774
+ #
2775
+ def build_access_entry target_types: nil
2776
+ params = {
2777
+ dataset: dataset_ref,
2778
+ target_types: target_types
2779
+ }.delete_if { |_, v| v.nil? }
2780
+ Google::Apis::BigqueryV2::DatasetAccessEntry.new(**params)
2781
+ end
2782
+
2122
2783
  protected
2123
2784
 
2124
- def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil,
2125
- insert_ids: nil
2785
+ def insert_data_with_autocreate table_id, rows, skip_invalid: nil, ignore_unknown: nil, insert_ids: nil
2786
+ insert_data table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown, insert_ids: insert_ids
2787
+ rescue Google::Cloud::NotFoundError
2788
+ sleep rand(1..60)
2789
+ begin
2790
+ create_table table_id do |tbl_updater|
2791
+ yield tbl_updater if block_given?
2792
+ end
2793
+ rescue Google::Cloud::AlreadyExistsError
2794
+ # Do nothing if it already exists
2795
+ end
2796
+ sleep 60
2797
+ retry
2798
+ end
2799
+
2800
+ def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil, insert_ids: nil
2126
2801
  rows = [rows] if rows.is_a? Hash
2127
2802
  raise ArgumentError, "No rows provided" if rows.empty?
2128
2803
  ensure_service!
2129
- options = { skip_invalid: skip_invalid,
2130
- ignore_unknown: ignore_unknown,
2131
- insert_ids: insert_ids }
2132
- gapi = service.insert_tabledata dataset_id, table_id, rows, options
2804
+ gapi = service.insert_tabledata dataset_id, table_id, rows, skip_invalid: skip_invalid,
2805
+ ignore_unknown: ignore_unknown,
2806
+ insert_ids: insert_ids
2133
2807
  InsertResponse.from_gapi rows, gapi
2134
2808
  end
2135
2809
 
@@ -2160,10 +2834,8 @@ module Google
2160
2834
  def patch_gapi! *attributes
2161
2835
  return if attributes.empty?
2162
2836
  ensure_service!
2163
- patch_args = Hash[attributes.map do |attr|
2164
- [attr, @gapi.send(attr)]
2165
- end]
2166
- patch_gapi = Google::Apis::BigqueryV2::Dataset.new patch_args
2837
+ patch_args = attributes.to_h { |attr| [attr, @gapi.send(attr)] }
2838
+ patch_gapi = Google::Apis::BigqueryV2::Dataset.new(**patch_args)
2167
2839
  patch_gapi.etag = etag if etag
2168
2840
  @gapi = service.patch_dataset dataset_id, patch_gapi
2169
2841
  end
@@ -2172,7 +2844,7 @@ module Google
2172
2844
  # Load the complete representation of the dataset if it has been
2173
2845
  # only partially loaded by a request to the API list method.
2174
2846
  def ensure_full_data!
2175
- reload! if resource_partial?
2847
+ reload! unless resource_full?
2176
2848
  end
2177
2849
 
2178
2850
  def ensure_job_succeeded! job
@@ -2203,11 +2875,8 @@ module Google
2203
2875
  )
2204
2876
  end
2205
2877
 
2206
- def load_job_csv_options! job, jagged_rows: nil,
2207
- quoted_newlines: nil,
2208
- delimiter: nil,
2209
- quote: nil, skip_leading: nil,
2210
- null_marker: nil
2878
+ def load_job_csv_options! job, jagged_rows: nil, quoted_newlines: nil, delimiter: nil, quote: nil,
2879
+ skip_leading: nil, null_marker: nil
2211
2880
  job.jagged_rows = jagged_rows unless jagged_rows.nil?
2212
2881
  job.quoted_newlines = quoted_newlines unless quoted_newlines.nil?
2213
2882
  job.delimiter = delimiter unless delimiter.nil?
@@ -2216,17 +2885,11 @@ module Google
2216
2885
  job.skip_leading = skip_leading unless skip_leading.nil?
2217
2886
  end
2218
2887
 
2219
- def load_job_file_options! job, format: nil,
2220
- projection_fields: nil,
2221
- jagged_rows: nil, quoted_newlines: nil,
2222
- encoding: nil, delimiter: nil,
2223
- ignore_unknown: nil, max_bad_records: nil,
2224
- quote: nil, skip_leading: nil,
2225
- null_marker: nil
2888
+ def load_job_file_options! job, format: nil, projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
2889
+ encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
2890
+ skip_leading: nil, null_marker: nil
2226
2891
  job.format = format unless format.nil?
2227
- unless projection_fields.nil?
2228
- job.projection_fields = projection_fields
2229
- end
2892
+ job.projection_fields = projection_fields unless projection_fields.nil?
2230
2893
  job.encoding = encoding unless encoding.nil?
2231
2894
  job.ignore_unknown = ignore_unknown unless ignore_unknown.nil?
2232
2895
  job.max_bad_records = max_bad_records unless max_bad_records.nil?
@@ -2238,16 +2901,11 @@ module Google
2238
2901
  null_marker: null_marker
2239
2902
  end
2240
2903
 
2241
- def load_job_updater table_id, format: nil, create: nil,
2242
- write: nil, projection_fields: nil,
2243
- jagged_rows: nil, quoted_newlines: nil,
2244
- encoding: nil, delimiter: nil,
2245
- ignore_unknown: nil, max_bad_records: nil,
2246
- quote: nil, skip_leading: nil, dryrun: nil,
2247
- schema: nil, job_id: nil, prefix: nil, labels: nil,
2248
- autodetect: nil, null_marker: nil
2249
- new_job = load_job_gapi table_id, dryrun, job_id: job_id,
2250
- prefix: prefix
2904
+ def load_job_updater table_id, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
2905
+ quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil,
2906
+ max_bad_records: nil, quote: nil, skip_leading: nil, dryrun: nil, schema: nil, job_id: nil,
2907
+ prefix: nil, labels: nil, autodetect: nil, null_marker: nil
2908
+ new_job = load_job_gapi table_id, dryrun, job_id: job_id, prefix: prefix
2251
2909
  LoadJob::Updater.new(new_job).tap do |job|
2252
2910
  job.location = location if location # may be dataset reference
2253
2911
  job.create = create unless create.nil?
@@ -2285,9 +2943,7 @@ module Google
2285
2943
  job_gapi.configuration.load.update! source_uris: urls
2286
2944
  if job_gapi.configuration.load.source_format.nil?
2287
2945
  source_format = Convert.derive_source_format_from_list urls
2288
- unless source_format.nil?
2289
- job_gapi.configuration.load.source_format = source_format
2290
- end
2946
+ job_gapi.configuration.load.source_format = source_format unless source_format.nil?
2291
2947
  end
2292
2948
  end
2293
2949
 
@@ -2299,9 +2955,7 @@ module Google
2299
2955
  path = Pathname(file).to_path
2300
2956
  if job_gapi.configuration.load.source_format.nil?
2301
2957
  source_format = Convert.derive_source_format path
2302
- unless source_format.nil?
2303
- job_gapi.configuration.load.source_format = source_format
2304
- end
2958
+ job_gapi.configuration.load.source_format = source_format unless source_format.nil?
2305
2959
  end
2306
2960
 
2307
2961
  gapi = service.load_table_file file, job_gapi
@@ -2310,21 +2964,18 @@ module Google
2310
2964
 
2311
2965
  def load_local_or_uri file, updater
2312
2966
  job_gapi = updater.to_gapi
2313
- job = if local_file? file
2314
- load_local file, job_gapi
2315
- else
2316
- load_storage file, job_gapi
2317
- end
2318
- job
2967
+ if local_file? file
2968
+ load_local file, job_gapi
2969
+ else
2970
+ load_storage file, job_gapi
2971
+ end
2319
2972
  end
2320
2973
 
2321
2974
  def storage_url? files
2322
2975
  [files].flatten.all? do |file|
2323
2976
  file.respond_to?(:to_gs_url) ||
2324
- (file.respond_to?(:to_str) &&
2325
- file.to_str.downcase.start_with?("gs://")) ||
2326
- (file.is_a?(URI) &&
2327
- file.to_s.downcase.start_with?("gs://"))
2977
+ (file.respond_to?(:to_str) && file.to_str.downcase.start_with?("gs://")) ||
2978
+ (file.is_a?(URI) && file.to_s.downcase.start_with?("gs://"))
2328
2979
  end
2329
2980
  end
2330
2981
 
@@ -2348,15 +2999,16 @@ module Google
2348
2999
  end
2349
3000
 
2350
3001
  ##
2351
- # Yielded to a block to accumulate changes for a patch request.
3002
+ # Yielded to a block to accumulate changes for a create request. See {Project#create_dataset}.
2352
3003
  class Updater < Dataset
2353
3004
  ##
2354
- # A list of attributes that were updated.
3005
+ # @private A list of attributes that were updated.
2355
3006
  attr_reader :updates
2356
3007
 
2357
3008
  ##
2358
- # Create an Updater object.
3009
+ # @private Create an Updater object.
2359
3010
  def initialize gapi
3011
+ super()
2360
3012
  @updates = []
2361
3013
  @gapi = gapi
2362
3014
  end
@@ -2373,7 +3025,110 @@ module Google
2373
3025
  end
2374
3026
 
2375
3027
  ##
2376
- # Make sure any access changes are saved
3028
+ # @raise [RuntimeError] not implemented
3029
+ def delete(*)
3030
+ raise "not implemented in #{self.class}"
3031
+ end
3032
+
3033
+ ##
3034
+ # @raise [RuntimeError] not implemented
3035
+ def create_table(*)
3036
+ raise "not implemented in #{self.class}"
3037
+ end
3038
+
3039
+ ##
3040
+ # @raise [RuntimeError] not implemented
3041
+ def create_view(*)
3042
+ raise "not implemented in #{self.class}"
3043
+ end
3044
+
3045
+ ##
3046
+ # @raise [RuntimeError] not implemented
3047
+ def create_materialized_view(*)
3048
+ raise "not implemented in #{self.class}"
3049
+ end
3050
+
3051
+ ##
3052
+ # @raise [RuntimeError] not implemented
3053
+ def table(*)
3054
+ raise "not implemented in #{self.class}"
3055
+ end
3056
+
3057
+ ##
3058
+ # @raise [RuntimeError] not implemented
3059
+ def tables(*)
3060
+ raise "not implemented in #{self.class}"
3061
+ end
3062
+
3063
+ ##
3064
+ # @raise [RuntimeError] not implemented
3065
+ def model(*)
3066
+ raise "not implemented in #{self.class}"
3067
+ end
3068
+
3069
+ ##
3070
+ # @raise [RuntimeError] not implemented
3071
+ def models(*)
3072
+ raise "not implemented in #{self.class}"
3073
+ end
3074
+
3075
+ ##
3076
+ # @raise [RuntimeError] not implemented
3077
+ def create_routine(*)
3078
+ raise "not implemented in #{self.class}"
3079
+ end
3080
+
3081
+ ##
3082
+ # @raise [RuntimeError] not implemented
3083
+ def routine(*)
3084
+ raise "not implemented in #{self.class}"
3085
+ end
3086
+
3087
+ ##
3088
+ # @raise [RuntimeError] not implemented
3089
+ def routines(*)
3090
+ raise "not implemented in #{self.class}"
3091
+ end
3092
+
3093
+ ##
3094
+ # @raise [RuntimeError] not implemented
3095
+ def query_job(*)
3096
+ raise "not implemented in #{self.class}"
3097
+ end
3098
+
3099
+ ##
3100
+ # @raise [RuntimeError] not implemented
3101
+ def query(*)
3102
+ raise "not implemented in #{self.class}"
3103
+ end
3104
+
3105
+ ##
3106
+ # @raise [RuntimeError] not implemented
3107
+ def external(*)
3108
+ raise "not implemented in #{self.class}"
3109
+ end
3110
+
3111
+ ##
3112
+ # @raise [RuntimeError] not implemented
3113
+ def load_job(*)
3114
+ raise "not implemented in #{self.class}"
3115
+ end
3116
+
3117
+ ##
3118
+ # @raise [RuntimeError] not implemented
3119
+ def load(*)
3120
+ raise "not implemented in #{self.class}"
3121
+ end
3122
+
3123
+ ##
3124
+ # @raise [RuntimeError] not implemented
3125
+ def reload!
3126
+ raise "not implemented in #{self.class}"
3127
+ end
3128
+ alias refresh! reload!
3129
+
3130
+ ##
3131
+ # @private Make sure any access changes are saved
2377
3132
  def check_for_mutated_access!
2378
3133
  return if @access.nil?
2379
3134
  return unless @access.changed?
@@ -2381,6 +3136,8 @@ module Google
2381
3136
  patch_gapi! :access
2382
3137
  end
2383
3138
 
3139
+ ##
3140
+ # @private
2384
3141
  def to_gapi
2385
3142
  check_for_mutated_access!
2386
3143
  @gapi