google-cloud-bigquery 1.38.1 → 1.42.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -22,6 +22,7 @@ require "google/cloud/bigquery/routine"
22
22
  require "google/cloud/bigquery/external"
23
23
  require "google/cloud/bigquery/dataset/list"
24
24
  require "google/cloud/bigquery/dataset/access"
25
+ require "google/cloud/bigquery/dataset/tag"
25
26
  require "google/cloud/bigquery/convert"
26
27
  require "google/apis/bigquery_v2"
27
28
 
@@ -466,6 +467,21 @@ module Google
466
467
  access_builder.freeze
467
468
  end
468
469
 
470
+ ##
471
+ # Retrieves the tags associated with this dataset. Tag keys are
472
+ # globally unique, and managed via the resource manager API.
473
+ #
474
+ # @see https://cloud.google.com/resource-manager/docs/tags/tags-overview
475
+ # for more information.
476
+ #
477
+ # @return [Google::Cloud::Bigquery::Dataset::Tag] The list of tags.
478
+ #
479
+ def tags
480
+ ensure_full_data!
481
+ return nil if @gapi.tags.nil?
482
+ @gapi.tags.map { |gapi| Tag.from_gapi(gapi) }
483
+ end
484
+
469
485
  ##
470
486
  # Permanently deletes the dataset. The dataset must be empty before it
471
487
  # can be deleted unless the `force` option is set to `true`.
@@ -793,6 +809,11 @@ module Google
793
809
  # object without verifying that the resource exists on the BigQuery
794
810
  # service. Calls made on this object will raise errors if the resource
795
811
  # does not exist. Default is `false`. Optional.
812
+ # @param [String] view Specifies the view that determines which table information is returned.
813
+ # By default, basic table information and storage statistics (STORAGE_STATS) are returned.
814
+ # Accepted values include `:unspecified`, `:basic`, `:storage`, and
815
+ # `:full`. For more information, see [BigQuery Classes](@todo: Update the link).
816
+ # The default value is the `:unspecified` view type.
796
817
  #
797
818
  # @return [Google::Cloud::Bigquery::Table, nil] Returns `nil` if the
798
819
  # table does not exist.
@@ -815,13 +836,22 @@ module Google
815
836
  #
816
837
  # table = dataset.table "my_table", skip_lookup: true
817
838
  #
839
+ # @example Avoid retrieving transient stats of the table with `view`:
840
+ # require "google/cloud/bigquery"
841
+ #
842
+ # bigquery = Google::Cloud::Bigquery.new
843
+ #
844
+ # dataset = bigquery.dataset "my_dataset"
845
+ #
846
+ # table = dataset.table "my_table", view: "basic"
847
+ #
818
848
  # @!group Table
819
849
  #
820
- def table table_id, skip_lookup: nil
850
+ def table table_id, skip_lookup: nil, view: nil
821
851
  ensure_service!
822
852
  return Table.new_reference project_id, dataset_id, table_id, service if skip_lookup
823
- gapi = service.get_table dataset_id, table_id
824
- Table.from_gapi gapi, service
853
+ gapi = service.get_table dataset_id, table_id, metadata_view: view
854
+ Table.from_gapi gapi, service, metadata_view: view
825
855
  rescue Google::Cloud::NotFoundError
826
856
  nil
827
857
  end
@@ -1816,7 +1846,7 @@ module Google
1816
1846
  # The following values are supported:
1817
1847
  #
1818
1848
  # * `csv` - CSV
1819
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1849
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
1820
1850
  # * `avro` - [Avro](http://avro.apache.org/)
1821
1851
  # * `sheets` - Google Sheets
1822
1852
  # * `datastore_backup` - Cloud Datastore backup
@@ -1879,7 +1909,7 @@ module Google
1879
1909
  # The following values are supported:
1880
1910
  #
1881
1911
  # * `csv` - CSV
1882
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1912
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
1883
1913
  # * `avro` - [Avro](http://avro.apache.org/)
1884
1914
  # * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
1885
1915
  # * `parquet` - [Parquet](https://parquet.apache.org/)
@@ -2141,7 +2171,7 @@ module Google
2141
2171
  # The following values are supported:
2142
2172
  #
2143
2173
  # * `csv` - CSV
2144
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
2174
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
2145
2175
  # * `avro` - [Avro](http://avro.apache.org/)
2146
2176
  # * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
2147
2177
  # * `parquet` - [Parquet](https://parquet.apache.org/)
@@ -2658,6 +2688,11 @@ module Google
2658
2688
  # messages before the batch is published. Default is 10.
2659
2689
  # @attr_reader [Numeric] threads The number of threads used to insert
2660
2690
  # batches of rows. Default is 4.
2691
+ # @param [String] view Specifies the view that determines which table information is returned.
2692
+ # By default, basic table information and storage statistics (STORAGE_STATS) are returned.
2693
+ # Accepted values include `:unspecified`, `:basic`, `:storage`, and
2694
+ # `:full`. For more information, see [BigQuery Classes](@todo: Update the link).
2695
+ # The default value is the `:unspecified` view type.
2661
2696
  # @yield [response] the callback for when a batch of rows is inserted
2662
2697
  # @yieldparam [Table::AsyncInserter::Result] result the result of the
2663
2698
  # asynchronous insert
@@ -2686,13 +2721,35 @@ module Google
2686
2721
  #
2687
2722
  # inserter.stop.wait!
2688
2723
  #
2724
+ # @example Avoid retrieving transient stats of the table with while inserting :
2725
+ # require "google/cloud/bigquery"
2726
+ #
2727
+ # bigquery = Google::Cloud::Bigquery.new
2728
+ # dataset = bigquery.dataset "my_dataset"
2729
+ # inserter = dataset.insert_async("my_table", view: "basic") do |result|
2730
+ # if result.error?
2731
+ # log_error result.error
2732
+ # else
2733
+ # log_insert "inserted #{result.insert_count} rows " \
2734
+ # "with #{result.error_count} errors"
2735
+ # end
2736
+ # end
2737
+ #
2738
+ # rows = [
2739
+ # { "first_name" => "Alice", "age" => 21 },
2740
+ # { "first_name" => "Bob", "age" => 22 }
2741
+ # ]
2742
+ # inserter.insert rows
2743
+ #
2744
+ # inserter.stop.wait!
2745
+ #
2689
2746
  def insert_async table_id, skip_invalid: nil, ignore_unknown: nil, max_bytes: 10_000_000, max_rows: 500,
2690
- interval: 10, threads: 4, &block
2747
+ interval: 10, threads: 4, view: nil, &block
2691
2748
  ensure_service!
2692
2749
 
2693
2750
  # Get table, don't use Dataset#table which handles NotFoundError
2694
- gapi = service.get_table dataset_id, table_id
2695
- table = Table.from_gapi gapi, service
2751
+ gapi = service.get_table dataset_id, table_id, metadata_view: view
2752
+ table = Table.from_gapi gapi, service, metadata_view: view
2696
2753
  # Get the AsyncInserter from the table
2697
2754
  table.insert_async skip_invalid: skip_invalid,
2698
2755
  ignore_unknown: ignore_unknown,
@@ -2700,6 +2757,29 @@ module Google
2700
2757
  interval: interval, threads: threads, &block
2701
2758
  end
2702
2759
 
2760
+ ##
2761
+ # Build an object of type Google::Apis::BigqueryV2::DatasetAccessEntry from
2762
+ # the self.
2763
+ #
2764
+ # @param [Array<String>] target_types The list of target types within the dataset.
2765
+ #
2766
+ # @return [Google::Apis::BigqueryV2::DatasetAccessEntry] Returns a DatasetAccessEntry object.
2767
+ #
2768
+ # @example
2769
+ # require "google/cloud/bigquery"
2770
+ #
2771
+ # bigquery = Google::Cloud::Bigquery.new
2772
+ # dataset = bigquery.dataset "my_dataset"
2773
+ # dataset_access_entry = dataset.access_entry target_types: ["VIEWS"]
2774
+ #
2775
+ def build_access_entry target_types: nil
2776
+ params = {
2777
+ dataset: dataset_ref,
2778
+ target_types: target_types
2779
+ }.delete_if { |_, v| v.nil? }
2780
+ Google::Apis::BigqueryV2::DatasetAccessEntry.new(**params)
2781
+ end
2782
+
2703
2783
  protected
2704
2784
 
2705
2785
  def insert_data_with_autocreate table_id, rows, skip_invalid: nil, ignore_unknown: nil, insert_ids: nil
@@ -2754,7 +2834,7 @@ module Google
2754
2834
  def patch_gapi! *attributes
2755
2835
  return if attributes.empty?
2756
2836
  ensure_service!
2757
- patch_args = Hash[attributes.map { |attr| [attr, @gapi.send(attr)] }]
2837
+ patch_args = attributes.to_h { |attr| [attr, @gapi.send(attr)] }
2758
2838
  patch_gapi = Google::Apis::BigqueryV2::Dataset.new(**patch_args)
2759
2839
  patch_gapi.etag = etag if etag
2760
2840
  @gapi = service.patch_dataset dataset_id, patch_gapi
@@ -2944,8 +3024,6 @@ module Google
2944
3024
  @access
2945
3025
  end
2946
3026
 
2947
- # rubocop:disable Style/MethodDefParentheses
2948
-
2949
3027
  ##
2950
3028
  # @raise [RuntimeError] not implemented
2951
3029
  def delete(*)
@@ -3049,8 +3127,6 @@ module Google
3049
3127
  end
3050
3128
  alias refresh! reload!
3051
3129
 
3052
- # rubocop:enable Style/MethodDefParentheses
3053
-
3054
3130
  ##
3055
3131
  # @private Make sure any access changes are saved
3056
3132
  def check_for_mutated_access!
@@ -65,11 +65,17 @@ module Google
65
65
  ##
66
66
  # The table or model which is exported.
67
67
  #
68
+ # @param [String] view Specifies the view that determines which table information is returned.
69
+ # By default, basic table information and storage statistics (STORAGE_STATS) are returned.
70
+ # Accepted values include `:unspecified`, `:basic`, `:storage`, and
71
+ # `:full`. For more information, see [BigQuery Classes](@todo: Update the link).
72
+ # The default value is the `:unspecified` view type.
73
+ #
68
74
  # @return [Table, Model, nil] A table or model instance, or `nil`.
69
75
  #
70
- def source
76
+ def source view: nil
71
77
  if (table = @gapi.configuration.extract.source_table)
72
- retrieve_table table.project_id, table.dataset_id, table.table_id
78
+ retrieve_table table.project_id, table.dataset_id, table.table_id, metadata_view: view
73
79
  elsif (model = @gapi.configuration.extract.source_model)
74
80
  retrieve_model model.project_id, model.dataset_id, model.model_id
75
81
  end
@@ -108,7 +114,7 @@ module Google
108
114
 
109
115
  ##
110
116
  # Checks if the destination format for the table data is [newline-delimited
111
- # JSON](http://jsonlines.org/). The default is `false`. Not applicable when
117
+ # JSON](https://jsonlines.org/). The default is `false`. Not applicable when
112
118
  # extracting models.
113
119
  #
114
120
  # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
@@ -221,7 +227,7 @@ module Google
221
227
  # and the counts as values.
222
228
  #
223
229
  def destinations_counts
224
- Hash[destinations.zip destinations_file_counts]
230
+ destinations.zip(destinations_file_counts).to_h
225
231
  end
226
232
 
227
233
  ##
@@ -362,7 +368,7 @@ module Google
362
368
  # Supported values for tables:
363
369
  #
364
370
  # * `csv` - CSV
365
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
371
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
366
372
  # * `avro` - [Avro](http://avro.apache.org/)
367
373
  #
368
374
  # Supported values for models:
@@ -489,7 +489,7 @@ module Google
489
489
  #
490
490
  def wait_until_done!
491
491
  backoff = lambda do |retries|
492
- delay = [retries**2 + 5, 60].min # Maximum delay is 60
492
+ delay = [(retries**2) + 5, 60].min # Maximum delay is 60
493
493
  sleep delay
494
494
  end
495
495
  retries = 0
@@ -710,9 +710,9 @@ module Google
710
710
  raise "Must have active connection" unless service
711
711
  end
712
712
 
713
- def retrieve_table project_id, dataset_id, table_id
713
+ def retrieve_table project_id, dataset_id, table_id, metadata_view: nil
714
714
  ensure_service!
715
- gapi = service.get_project_table project_id, dataset_id, table_id
715
+ gapi = service.get_project_table project_id, dataset_id, table_id, metadata_view: metadata_view
716
716
  Table.from_gapi gapi, service
717
717
  rescue Google::Cloud::NotFoundError
718
718
  nil
@@ -62,12 +62,18 @@ module Google
62
62
  # The table into which the operation loads data. This is the table on
63
63
  # which {Table#load_job} was invoked.
64
64
  #
65
+ # @param [String] view Specifies the view that determines which table information is returned.
66
+ # By default, basic table information and storage statistics (STORAGE_STATS) are returned.
67
+ # Accepted values include `:unspecified`, `:basic`, `:storage`, and
68
+ # `:full`. For more information, see [BigQuery Classes](@todo: Update the link).
69
+ # The default value is the `:unspecified` view type.
70
+ #
65
71
  # @return [Table] A table instance.
66
72
  #
67
- def destination
73
+ def destination view: nil
68
74
  table = @gapi.configuration.load.destination_table
69
75
  return nil unless table
70
- retrieve_table table.project_id, table.dataset_id, table.table_id
76
+ retrieve_table table.project_id, table.dataset_id, table.table_id, metadata_view: view
71
77
  end
72
78
 
73
79
  ##
@@ -188,7 +194,7 @@ module Google
188
194
 
189
195
  ##
190
196
  # Checks if the format of the source data is [newline-delimited
191
- # JSON](http://jsonlines.org/). The default is `false`.
197
+ # JSON](https://jsonlines.org/). The default is `false`.
192
198
  #
193
199
  # @return [Boolean] `true` when the source format is
194
200
  # `NEWLINE_DELIMITED_JSON`, `false` otherwise.
@@ -1269,7 +1275,7 @@ module Google
1269
1275
  # The following values are supported:
1270
1276
  #
1271
1277
  # * `csv` - CSV
1272
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1278
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
1273
1279
  # * `avro` - [Avro](http://avro.apache.org/)
1274
1280
  # * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
1275
1281
  # * `parquet` - [Parquet](https://parquet.apache.org/)
@@ -961,7 +961,7 @@ module Google
961
961
  # The following values are supported:
962
962
  #
963
963
  # * `csv` - CSV
964
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
964
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
965
965
  # * `avro` - [Avro](http://avro.apache.org/)
966
966
  # * `sheets` - Google Sheets
967
967
  # * `datastore_backup` - Cloud Datastore backup
@@ -1554,7 +1554,7 @@ module Google
1554
1554
  # Supported values for tables:
1555
1555
  #
1556
1556
  # * `csv` - CSV
1557
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1557
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
1558
1558
  # * `avro` - [Avro](http://avro.apache.org/)
1559
1559
  #
1560
1560
  # Supported values for models:
@@ -1683,7 +1683,7 @@ module Google
1683
1683
  # Supported values for tables:
1684
1684
  #
1685
1685
  # * `csv` - CSV
1686
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1686
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
1687
1687
  # * `avro` - [Avro](http://avro.apache.org/)
1688
1688
  #
1689
1689
  # Supported values for models:
@@ -437,14 +437,21 @@ module Google
437
437
  ##
438
438
  # The table in which the query results are stored.
439
439
  #
440
+ # @param [String] view Specifies the view that determines which table information is returned.
441
+ # By default, basic table information and storage statistics (STORAGE_STATS) are returned.
442
+ # Accepted values include `:unspecified`, `:basic`, `:storage`, and
443
+ # `:full`. For more information, see [BigQuery Classes](@todo: Update the link).
444
+ # The default value is the `:unspecified` view type.
445
+ #
440
446
  # @return [Table] A table instance.
441
447
  #
442
- def destination
448
+ def destination view: nil
443
449
  table = @gapi.configuration.query.destination_table
444
450
  return nil unless table
445
451
  retrieve_table table.project_id,
446
452
  table.dataset_id,
447
- table.table_id
453
+ table.table_id,
454
+ metadata_view: view
448
455
  end
449
456
 
450
457
  ##
@@ -1193,7 +1200,7 @@ module Google
1193
1200
  #
1194
1201
  def external= value
1195
1202
  external_table_pairs = value.map { |name, obj| [String(name), obj.to_gapi] }
1196
- external_table_hash = Hash[external_table_pairs]
1203
+ external_table_hash = external_table_pairs.to_h
1197
1204
  @gapi.configuration.query.table_definitions = external_table_hash
1198
1205
  end
1199
1206
 
@@ -471,7 +471,7 @@ module Google
471
471
  #
472
472
  def param_type
473
473
  param_type = type.to_sym
474
- param_type = Hash[fields.map { |field| [field.name.to_sym, field.param_type] }] if record?
474
+ param_type = fields.to_h { |field| [field.name.to_sym, field.param_type] } if record?
475
475
  param_type = [param_type] if repeated?
476
476
  param_type
477
477
  end
@@ -172,7 +172,7 @@ module Google
172
172
  # schema.param_types
173
173
  #
174
174
  def param_types
175
- Hash[fields.map { |field| [field.name.to_sym, field.param_type] }]
175
+ fields.to_h { |field| [field.name.to_sym, field.param_type] }
176
176
  end
177
177
 
178
178
  ##
@@ -144,10 +144,11 @@ module Google
144
144
 
145
145
  ##
146
146
  # Gets the specified table resource by full table reference.
147
- def get_project_table project_id, dataset_id, table_id
147
+ def get_project_table project_id, dataset_id, table_id, metadata_view: nil
148
+ metadata_view = table_metadata_view_type_for metadata_view
148
149
  # The get operation is considered idempotent
149
150
  execute backoff: true do
150
- service.get_table project_id, dataset_id, table_id
151
+ service.get_table project_id, dataset_id, table_id, view: metadata_view
151
152
  end
152
153
  end
153
154
 
@@ -156,8 +157,8 @@ module Google
156
157
  # This method does not return the data in the table,
157
158
  # it only returns the table resource,
158
159
  # which describes the structure of this table.
159
- def get_table dataset_id, table_id
160
- get_project_table @project, dataset_id, table_id
160
+ def get_table dataset_id, table_id, metadata_view: nil
161
+ get_project_table @project, dataset_id, table_id, metadata_view: metadata_view
161
162
  end
162
163
 
163
164
  ##
@@ -496,6 +497,19 @@ module Google
496
497
  ref
497
498
  end
498
499
 
500
+ ##
501
+ # Converts a hash to a Google::Apis::BigqueryV2::DatasetAccessEntry oject.
502
+ #
503
+ # @param [Hash<String,String>] dataset_hash Hash for a DatasetAccessEntry.
504
+ #
505
+ def self.dataset_access_entry_from_hash dataset_hash
506
+ params = {
507
+ dataset: Google::Apis::BigqueryV2::DatasetReference.new(**dataset_hash),
508
+ target_types: dataset_hash[:target_types]
509
+ }.delete_if { |_, v| v.nil? }
510
+ Google::Apis::BigqueryV2::DatasetAccessEntry.new(**params)
511
+ end
512
+
499
513
  def self.validate_table_ref table_ref
500
514
  [:project_id, :dataset_id, :table_id].each do |f|
501
515
  raise ArgumentError, "TableReference is missing #{f}" if table_ref.send(f).nil?
@@ -572,6 +586,14 @@ module Google
572
586
  raise Google::Cloud::Error.from_error e
573
587
  end
574
588
 
589
+ def table_metadata_view_type_for str
590
+ return nil if str.nil?
591
+ { "unspecified" => "TABLE_METADATA_VIEW_UNSPECIFIED",
592
+ "basic" => "BASIC",
593
+ "storage" => "STORAGE_STATS",
594
+ "full" => "FULL" }[str.to_s.downcase]
595
+ end
596
+
575
597
  class Backoff
576
598
  class << self
577
599
  attr_accessor :retries