google-cloud-bigquery 1.25.0 → 1.30.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -482,14 +482,14 @@ module Google
482
482
  # puts row[:word]
483
483
  # end
484
484
  #
485
- def all request_limit: nil
485
+ def all request_limit: nil, &block
486
486
  request_limit = request_limit.to_i if request_limit
487
487
 
488
488
  return enum_for :all, request_limit: request_limit unless block_given?
489
489
 
490
490
  results = self
491
491
  loop do
492
- results.each { |r| yield r }
492
+ results.each(&block)
493
493
  if request_limit
494
494
  request_limit -= 1
495
495
  break if request_limit.negative?
@@ -618,15 +618,17 @@ module Google
618
618
  end
619
619
 
620
620
  ##
621
- # Creates a new [view](https://cloud.google.com/bigquery/docs/views)
622
- # table, which is a virtual table defined by the given SQL query.
621
+ # Creates a new view, which is a virtual table defined by the given SQL query.
623
622
  #
624
- # BigQuery's views are logical views, not materialized views, which
625
- # means that the query that defines the view is re-executed every time
626
- # the view is queried. Queries are billed according to the total amount
623
+ # With BigQuery's logical views, the query that defines the view is re-executed
624
+ # every time the view is queried. Queries are billed according to the total amount
627
625
  # of data in all table fields referenced directly or indirectly by the
628
626
  # top-level query. (See {Table#view?} and {Table#query}.)
629
627
  #
628
+ # For materialized views, see {#create_materialized_view}.
629
+ #
630
+ # @see https://cloud.google.com/bigquery/docs/views Creating views
631
+ #
630
632
  # @param [String] table_id The ID of the view table. The ID must contain
631
633
  # only letters (a-z, A-Z), numbers (0-9), or underscores (_). The
632
634
  # maximum length is 1,024 characters.
@@ -667,7 +669,7 @@ module Google
667
669
  # dataset = bigquery.dataset "my_dataset"
668
670
  #
669
671
  # view = dataset.create_view "my_view",
670
- # "SELECT name, age FROM proj.dataset.users"
672
+ # "SELECT name, age FROM proj.dataset.users"
671
673
  #
672
674
  # @example A name and description can be provided:
673
675
  # require "google/cloud/bigquery"
@@ -676,13 +678,18 @@ module Google
676
678
  # dataset = bigquery.dataset "my_dataset"
677
679
  #
678
680
  # view = dataset.create_view "my_view",
679
- # "SELECT name, age FROM proj.dataset.users",
680
- # name: "My View", description: "This is my view"
681
+ # "SELECT name, age FROM proj.dataset.users",
682
+ # name: "My View", description: "This is my view"
681
683
  #
682
684
  # @!group Table
683
685
  #
684
- def create_view table_id, query, name: nil, description: nil,
685
- standard_sql: nil, legacy_sql: nil, udfs: nil
686
+ def create_view table_id,
687
+ query,
688
+ name: nil,
689
+ description: nil,
690
+ standard_sql: nil,
691
+ legacy_sql: nil,
692
+ udfs: nil
686
693
  use_legacy_sql = Convert.resolve_legacy_sql standard_sql, legacy_sql
687
694
  new_view_opts = {
688
695
  table_reference: Google::Apis::BigqueryV2::TableReference.new(
@@ -698,7 +705,81 @@ module Google
698
705
  user_defined_function_resources: udfs_gapi(udfs)
699
706
  )
700
707
  }.delete_if { |_, v| v.nil? }
701
- new_view = Google::Apis::BigqueryV2::Table.new new_view_opts
708
+ new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
709
+
710
+ gapi = service.insert_table dataset_id, new_view
711
+ Table.from_gapi gapi, service
712
+ end
713
+
714
+ ##
715
+ # Creates a new materialized view.
716
+ #
717
+ # Materialized views are precomputed views that periodically cache results of a query for increased performance
718
+ # and efficiency. BigQuery leverages precomputed results from materialized views and whenever possible reads
719
+ # only delta changes from the base table to compute up-to-date results.
720
+ #
721
+ # Queries that use materialized views are generally faster and consume less resources than queries that retrieve
722
+ # the same data only from the base table. Materialized views are helpful to significantly boost performance of
723
+ # workloads that have the characteristic of common and repeated queries.
724
+ #
725
+ # For logical views, see {#create_view}.
726
+ #
727
+ # @see https://cloud.google.com/bigquery/docs/materialized-views-intro Introduction to materialized views
728
+ #
729
+ # @param [String] table_id The ID of the materialized view table. The ID must contain only letters (a-z, A-Z),
730
+ # numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
731
+ # @param [String] query The query that BigQuery executes when the materialized view is referenced.
732
+ # @param [String] name A descriptive name for the table.
733
+ # @param [String] description A user-friendly description of the table.
734
+ # @param [Boolean] enable_refresh Enable automatic refresh of the materialized view when the base table is
735
+ # updated. Optional. The default value is true.
736
+ # @param [Integer] refresh_interval_ms The maximum frequency in milliseconds at which this materialized view
737
+ # will be refreshed. Optional. The default value is `1_800_000` (30 minutes).
738
+ #
739
+ # @return [Google::Cloud::Bigquery::Table] A new table object.
740
+ #
741
+ # @example
742
+ # require "google/cloud/bigquery"
743
+ #
744
+ # bigquery = Google::Cloud::Bigquery.new
745
+ # dataset = bigquery.dataset "my_dataset"
746
+ #
747
+ # materialized_view = dataset.create_materialized_view "my_materialized_view",
748
+ # "SELECT name, age FROM proj.dataset.users"
749
+ #
750
+ # @example Automatic refresh can be disabled:
751
+ # require "google/cloud/bigquery"
752
+ #
753
+ # bigquery = Google::Cloud::Bigquery.new
754
+ # dataset = bigquery.dataset "my_dataset"
755
+ #
756
+ # materialized_view = dataset.create_materialized_view "my_materialized_view",
757
+ # "SELECT name, age FROM proj.dataset.users",
758
+ # enable_refresh: false
759
+ #
760
+ # @!group Table
761
+ #
762
+ def create_materialized_view table_id,
763
+ query,
764
+ name: nil,
765
+ description: nil,
766
+ enable_refresh: nil,
767
+ refresh_interval_ms: nil
768
+ new_view_opts = {
769
+ table_reference: Google::Apis::BigqueryV2::TableReference.new(
770
+ project_id: project_id,
771
+ dataset_id: dataset_id,
772
+ table_id: table_id
773
+ ),
774
+ friendly_name: name,
775
+ description: description,
776
+ materialized_view: Google::Apis::BigqueryV2::MaterializedViewDefinition.new(
777
+ enable_refresh: enable_refresh,
778
+ query: query,
779
+ refresh_interval_ms: refresh_interval_ms
780
+ )
781
+ }.delete_if { |_, v| v.nil? }
782
+ new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
702
783
 
703
784
  gapi = service.insert_table dataset_id, new_view
704
785
  Table.from_gapi gapi, service
@@ -1059,35 +1140,37 @@ module Google
1059
1140
  #
1060
1141
  # Ruby types are mapped to BigQuery types as follows:
1061
1142
  #
1062
- # | BigQuery | Ruby | Notes |
1063
- # |-------------|--------------------------------------|------------------------------------------------|
1064
- # | `BOOL` | `true`/`false` | |
1065
- # | `INT64` | `Integer` | |
1066
- # | `FLOAT64` | `Float` | |
1067
- # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
1068
- # | `STRING` | `String` | |
1069
- # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
1070
- # | `DATE` | `Date` | |
1071
- # | `TIMESTAMP` | `Time` | |
1072
- # | `TIME` | `Google::Cloud::BigQuery::Time` | |
1073
- # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
1074
- # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
1075
- # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
1143
+ # | BigQuery | Ruby | Notes |
1144
+ # |--------------|--------------------------------------|----------------------------------------------------|
1145
+ # | `BOOL` | `true`/`false` | |
1146
+ # | `INT64` | `Integer` | |
1147
+ # | `FLOAT64` | `Float` | |
1148
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
1149
+ # | `BIGNUMERIC` | | Query param values must be mapped in `types`. |
1150
+ # | `STRING` | `String` | |
1151
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
1152
+ # | `DATE` | `Date` | |
1153
+ # | `TIMESTAMP` | `Time` | |
1154
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
1155
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
1156
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
1157
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
1076
1158
  #
1077
1159
  # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
1078
1160
  # of each BigQuery data type, including allowed values.
1079
- # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always to
1080
- # infer the right SQL type from a value in `params`. In these cases, `types` must be used to specify the SQL
1081
- # type for these values.
1161
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
1162
+ # possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
1163
+ # specify the SQL type for these values.
1082
1164
  #
1083
- # Must match the value type passed to `params`. This must be an `Array` when the query uses positional query
1084
- # parameters. This must be an `Hash` when the query uses named query parameters. The values should be BigQuery
1085
- # type codes from the following list:
1165
+ # Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
1166
+ # positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
1167
+ # should be BigQuery type codes from the following list:
1086
1168
  #
1087
1169
  # * `:BOOL`
1088
1170
  # * `:INT64`
1089
1171
  # * `:FLOAT64`
1090
1172
  # * `:NUMERIC`
1173
+ # * `:BIGNUMERIC`
1091
1174
  # * `:STRING`
1092
1175
  # * `:DATETIME`
1093
1176
  # * `:DATE`
@@ -1400,35 +1483,37 @@ module Google
1400
1483
  #
1401
1484
  # Ruby types are mapped to BigQuery types as follows:
1402
1485
  #
1403
- # | BigQuery | Ruby | Notes |
1404
- # |-------------|--------------------------------------|------------------------------------------------|
1405
- # | `BOOL` | `true`/`false` | |
1406
- # | `INT64` | `Integer` | |
1407
- # | `FLOAT64` | `Float` | |
1408
- # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
1409
- # | `STRING` | `String` | |
1410
- # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
1411
- # | `DATE` | `Date` | |
1412
- # | `TIMESTAMP` | `Time` | |
1413
- # | `TIME` | `Google::Cloud::BigQuery::Time` | |
1414
- # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
1415
- # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
1416
- # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
1486
+ # | BigQuery | Ruby | Notes |
1487
+ # |--------------|--------------------------------------|----------------------------------------------------|
1488
+ # | `BOOL` | `true`/`false` | |
1489
+ # | `INT64` | `Integer` | |
1490
+ # | `FLOAT64` | `Float` | |
1491
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
1492
+ # | `BIGNUMERIC` | | Query param values must be mapped in `types`. |
1493
+ # | `STRING` | `String` | |
1494
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
1495
+ # | `DATE` | `Date` | |
1496
+ # | `TIMESTAMP` | `Time` | |
1497
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
1498
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
1499
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
1500
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
1417
1501
  #
1418
1502
  # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
1419
1503
  # of each BigQuery data type, including allowed values.
1420
- # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always to
1421
- # infer the right SQL type from a value in `params`. In these cases, `types` must be used to specify the SQL
1422
- # type for these values.
1504
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
1505
+ # possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
1506
+ # specify the SQL type for these values.
1423
1507
  #
1424
- # Must match the value type passed to `params`. This must be an `Array` when the query uses positional query
1425
- # parameters. This must be an `Hash` when the query uses named query parameters. The values should be BigQuery
1426
- # type codes from the following list:
1508
+ # Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
1509
+ # positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
1510
+ # should be BigQuery type codes from the following list:
1427
1511
  #
1428
1512
  # * `:BOOL`
1429
1513
  # * `:INT64`
1430
1514
  # * `:FLOAT64`
1431
1515
  # * `:NUMERIC`
1516
+ # * `:BIGNUMERIC`
1432
1517
  # * `:STRING`
1433
1518
  # * `:DATETIME`
1434
1519
  # * `:DATE`
@@ -2327,6 +2412,21 @@ module Google
2327
2412
  # the need to complete a load operation before the data can appear in
2328
2413
  # query results.
2329
2414
  #
2415
+ # Simple Ruby types are generally accepted per JSON rules, along with the following support for BigQuery's more
2416
+ # complex types:
2417
+ #
2418
+ # | BigQuery | Ruby | Notes |
2419
+ # |--------------|--------------------------------------|----------------------------------------------------|
2420
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
2421
+ # | `BIGNUMERIC` | `String` | Pass as `String` to avoid rounding to scale 9. |
2422
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
2423
+ # | `DATE` | `Date` | |
2424
+ # | `TIMESTAMP` | `Time` | |
2425
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
2426
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
2427
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
2428
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
2429
+ #
2330
2430
  # Because BigQuery's streaming API is designed for high insertion rates,
2331
2431
  # modifications to the underlying table metadata are eventually
2332
2432
  # consistent when interacting with the streaming system. In most cases
@@ -2341,7 +2441,10 @@ module Google
2341
2441
  #
2342
2442
  # @param [String] table_id The ID of the destination table.
2343
2443
  # @param [Hash, Array<Hash>] rows A hash object or array of hash objects
2344
- # containing the data. Required.
2444
+ # containing the data. Required. `BigDecimal` values will be rounded to
2445
+ # scale 9 to conform with the BigQuery `NUMERIC` data type. To avoid
2446
+ # rounding `BIGNUMERIC` type values with scale greater than 9, use `String`
2447
+ # instead of `BigDecimal`.
2345
2448
  # @param [Array<String|Symbol>, Symbol] insert_ids A unique ID for each row. BigQuery uses this property to
2346
2449
  # detect duplicate insertion requests on a best-effort basis. For more information, see [data
2347
2450
  # consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency). Optional. If
@@ -2408,6 +2511,18 @@ module Google
2408
2511
  # t.schema.integer "age", mode: :required
2409
2512
  # end
2410
2513
  #
2514
+ # @example Pass `BIGNUMERIC` value as a string to avoid rounding to scale 9 in the conversion from `BigDecimal`:
2515
+ # require "google/cloud/bigquery"
2516
+ #
2517
+ # bigquery = Google::Cloud::Bigquery.new
2518
+ # dataset = bigquery.dataset "my_dataset"
2519
+ #
2520
+ # row = {
2521
+ # "my_numeric" => BigDecimal("123456798.987654321"),
2522
+ # "my_bignumeric" => "123456798.98765432100001" # BigDecimal would be rounded, use String instead!
2523
+ # }
2524
+ # dataset.insert "my_table", row
2525
+ #
2411
2526
  # @!group Data
2412
2527
  #
2413
2528
  def insert table_id, rows, insert_ids: nil, skip_invalid: nil, ignore_unknown: nil, autocreate: nil, &block
@@ -2500,11 +2615,9 @@ module Google
2500
2615
  create_table table_id do |tbl_updater|
2501
2616
  yield tbl_updater if block_given?
2502
2617
  end
2503
- # rubocop:disable Lint/HandleExceptions
2504
2618
  rescue Google::Cloud::AlreadyExistsError
2619
+ # Do nothing if it already exists
2505
2620
  end
2506
- # rubocop:enable Lint/HandleExceptions
2507
-
2508
2621
  sleep 60
2509
2622
  retry
2510
2623
  end
@@ -2547,7 +2660,7 @@ module Google
2547
2660
  return if attributes.empty?
2548
2661
  ensure_service!
2549
2662
  patch_args = Hash[attributes.map { |attr| [attr, @gapi.send(attr)] }]
2550
- patch_gapi = Google::Apis::BigqueryV2::Dataset.new patch_args
2663
+ patch_gapi = Google::Apis::BigqueryV2::Dataset.new(**patch_args)
2551
2664
  patch_gapi.etag = etag if etag
2552
2665
  @gapi = service.patch_dataset dataset_id, patch_gapi
2553
2666
  end
@@ -2676,12 +2789,11 @@ module Google
2676
2789
 
2677
2790
  def load_local_or_uri file, updater
2678
2791
  job_gapi = updater.to_gapi
2679
- job = if local_file? file
2680
- load_local file, job_gapi
2681
- else
2682
- load_storage file, job_gapi
2683
- end
2684
- job
2792
+ if local_file? file
2793
+ load_local file, job_gapi
2794
+ else
2795
+ load_storage file, job_gapi
2796
+ end
2685
2797
  end
2686
2798
 
2687
2799
  def storage_url? files
@@ -2721,6 +2833,7 @@ module Google
2721
2833
  ##
2722
2834
  # @private Create an Updater object.
2723
2835
  def initialize gapi
2836
+ super()
2724
2837
  @updates = []
2725
2838
  @gapi = gapi
2726
2839
  end
@@ -2756,6 +2869,12 @@ module Google
2756
2869
  raise "not implemented in #{self.class}"
2757
2870
  end
2758
2871
 
2872
+ ##
2873
+ # @raise [RuntimeError] not implemented
2874
+ def create_materialized_view(*)
2875
+ raise "not implemented in #{self.class}"
2876
+ end
2877
+
2759
2878
  ##
2760
2879
  # @raise [RuntimeError] not implemented
2761
2880
  def table(*)
@@ -1194,7 +1194,7 @@ module Google
1194
1194
  @rules.reject!(&find_by_scope_and_value(scope, value))
1195
1195
  # Add new rule for this role, scope, and value
1196
1196
  opts = { role: role, scope => value }
1197
- @rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
1197
+ @rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
1198
1198
  end
1199
1199
 
1200
1200
  # @private
@@ -1204,7 +1204,7 @@ module Google
1204
1204
  @rules.reject!(&find_by_scope_and_resource_ref(:routine, value))
1205
1205
  # Add new rule for this role, scope, and value
1206
1206
  opts = { routine: value }
1207
- @rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
1207
+ @rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
1208
1208
  end
1209
1209
 
1210
1210
  # @private
@@ -1215,7 +1215,7 @@ module Google
1215
1215
  @rules.reject!(&find_by_scope_and_resource_ref(:view, value))
1216
1216
  # Add new rule for this role, scope, and value
1217
1217
  opts = { view: value }
1218
- @rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
1218
+ @rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
1219
1219
  end
1220
1220
 
1221
1221
  # @private
@@ -120,12 +120,12 @@ module Google
120
120
  # puts dataset.name
121
121
  # end
122
122
  #
123
- def all request_limit: nil
123
+ def all request_limit: nil, &block
124
124
  request_limit = request_limit.to_i if request_limit
125
125
  return enum_for :all, request_limit: request_limit unless block_given?
126
126
  results = self
127
127
  loop do
128
- results.each { |r| yield r }
128
+ results.each(&block)
129
129
  if request_limit
130
130
  request_limit -= 1
131
131
  break if request_limit.negative?
@@ -52,6 +52,24 @@ module Google
52
52
  # # Retrieve the next page of results
53
53
  # data = data.next if data.next?
54
54
  #
55
+ # @example Hive partitioning options:
56
+ # require "google/cloud/bigquery"
57
+ #
58
+ # bigquery = Google::Cloud::Bigquery.new
59
+ #
60
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
61
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
62
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
63
+ # ext.hive_partitioning_mode = :auto
64
+ # ext.hive_partitioning_require_partition_filter = true
65
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
66
+ # end
67
+ #
68
+ # external_data.hive_partitioning? #=> true
69
+ # external_data.hive_partitioning_mode #=> "AUTO"
70
+ # external_data.hive_partitioning_require_partition_filter? #=> true
71
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
72
+ #
55
73
  module External
56
74
  ##
57
75
  # @private New External from URLs and format
@@ -79,7 +97,8 @@ module Google
79
97
  # @private Determine source_format from inputs
80
98
  def self.source_format_for urls, format
81
99
  val = {
82
- "csv" => "CSV", "avro" => "AVRO",
100
+ "csv" => "CSV",
101
+ "avro" => "AVRO",
83
102
  "json" => "NEWLINE_DELIMITED_JSON",
84
103
  "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
85
104
  "sheets" => "GOOGLE_SHEETS",
@@ -87,7 +106,9 @@ module Google
87
106
  "datastore" => "DATASTORE_BACKUP",
88
107
  "backup" => "DATASTORE_BACKUP",
89
108
  "datastore_backup" => "DATASTORE_BACKUP",
90
- "bigtable" => "BIGTABLE"
109
+ "bigtable" => "BIGTABLE",
110
+ "orc" => "ORC",
111
+ "parquet" => "PARQUET"
91
112
  }[format.to_s.downcase]
92
113
  return val unless val.nil?
93
114
  Array(urls).each do |url|
@@ -110,7 +131,7 @@ module Google
110
131
  when "GOOGLE_SHEETS" then External::SheetsSource
111
132
  when "BIGTABLE" then External::BigtableSource
112
133
  else
113
- # AVRO and DATASTORE_BACKUP
134
+ # AVRO, DATASTORE_BACKUP, PARQUET
114
135
  External::DataSource
115
136
  end
116
137
  end
@@ -148,6 +169,24 @@ module Google
148
169
  # # Retrieve the next page of results
149
170
  # data = data.next if data.next?
150
171
  #
172
+ # @example Hive partitioning options:
173
+ # require "google/cloud/bigquery"
174
+ #
175
+ # bigquery = Google::Cloud::Bigquery.new
176
+ #
177
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
178
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
179
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
180
+ # ext.hive_partitioning_mode = :auto
181
+ # ext.hive_partitioning_require_partition_filter = true
182
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
183
+ # end
184
+ #
185
+ # external_data.hive_partitioning? #=> true
186
+ # external_data.hive_partitioning_mode #=> "AUTO"
187
+ # external_data.hive_partitioning_require_partition_filter? #=> true
188
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
189
+ #
151
190
  class DataSource
152
191
  ##
153
192
  # @private The Google API Client object.
@@ -302,6 +341,52 @@ module Google
302
341
  @gapi.source_format == "BIGTABLE"
303
342
  end
304
343
 
344
+ ##
345
+ # Whether the data format is "ORC".
346
+ #
347
+ # @return [Boolean]
348
+ #
349
+ # @example
350
+ # require "google/cloud/bigquery"
351
+ #
352
+ # bigquery = Google::Cloud::Bigquery.new
353
+ #
354
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
355
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
356
+ # external_data = bigquery.external gcs_uri, format: :orc do |ext|
357
+ # ext.hive_partitioning_mode = :auto
358
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
359
+ # end
360
+ # external_data.format #=> "ORC"
361
+ # external_data.orc? #=> true
362
+ #
363
+ def orc?
364
+ @gapi.source_format == "ORC"
365
+ end
366
+
367
+ ##
368
+ # Whether the data format is "PARQUET".
369
+ #
370
+ # @return [Boolean]
371
+ #
372
+ # @example
373
+ # require "google/cloud/bigquery"
374
+ #
375
+ # bigquery = Google::Cloud::Bigquery.new
376
+ #
377
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
378
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
379
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
380
+ # ext.hive_partitioning_mode = :auto
381
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
382
+ # end
383
+ # external_data.format #=> "PARQUET"
384
+ # external_data.parquet? #=> true
385
+ #
386
+ def parquet?
387
+ @gapi.source_format == "PARQUET"
388
+ end
389
+
305
390
  ##
306
391
  # The fully-qualified URIs that point to your data in Google Cloud.
307
392
  # For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
@@ -536,6 +621,246 @@ module Google
536
621
  @gapi.max_bad_records = new_max_bad_records
537
622
  end
538
623
 
624
+ ###
625
+ # Checks if hive partitioning options are set.
626
+ #
627
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
628
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
629
+ # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
630
+ # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
631
+ #
632
+ # @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
633
+ #
634
+ # @example
635
+ # require "google/cloud/bigquery"
636
+ #
637
+ # bigquery = Google::Cloud::Bigquery.new
638
+ #
639
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
640
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
641
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
642
+ # ext.hive_partitioning_mode = :auto
643
+ # ext.hive_partitioning_require_partition_filter = true
644
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
645
+ # end
646
+ #
647
+ # external_data.hive_partitioning? #=> true
648
+ # external_data.hive_partitioning_mode #=> "AUTO"
649
+ # external_data.hive_partitioning_require_partition_filter? #=> true
650
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
651
+ #
652
+ def hive_partitioning?
653
+ !@gapi.hive_partitioning_options.nil?
654
+ end
655
+
656
+ ###
657
+ # The mode of hive partitioning to use when reading data. The following modes are supported:
658
+ #
659
+ # 1. `AUTO`: automatically infer partition key name(s) and type(s).
660
+ # 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
661
+ # 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
662
+ #
663
+ # @return [String, nil] The mode of hive partitioning, or `nil` if not set.
664
+ #
665
+ # @example
666
+ # require "google/cloud/bigquery"
667
+ #
668
+ # bigquery = Google::Cloud::Bigquery.new
669
+ #
670
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
671
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
672
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
673
+ # ext.hive_partitioning_mode = :auto
674
+ # ext.hive_partitioning_require_partition_filter = true
675
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
676
+ # end
677
+ #
678
+ # external_data.hive_partitioning? #=> true
679
+ # external_data.hive_partitioning_mode #=> "AUTO"
680
+ # external_data.hive_partitioning_require_partition_filter? #=> true
681
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
682
+ #
683
+ def hive_partitioning_mode
684
+ @gapi.hive_partitioning_options.mode if hive_partitioning?
685
+ end
686
+
687
+ ##
688
+ # Sets the mode of hive partitioning to use when reading data. The following modes are supported:
689
+ #
690
+ # 1. `auto`: automatically infer partition key name(s) and type(s).
691
+ # 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
692
+ # 3. `custom`: partition key schema is encoded in the source URI prefix.
693
+ #
694
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
695
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
696
+ # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
697
+ # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
698
+ #
699
+ # See {#format}, {#hive_partitioning_require_partition_filter=} and {#hive_partitioning_source_uri_prefix=}.
700
+ #
701
+ # @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
702
+ #
703
+ # @example
704
+ # require "google/cloud/bigquery"
705
+ #
706
+ # bigquery = Google::Cloud::Bigquery.new
707
+ #
708
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
709
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
710
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
711
+ # ext.hive_partitioning_mode = :auto
712
+ # ext.hive_partitioning_require_partition_filter = true
713
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
714
+ # end
715
+ #
716
+ # external_data.hive_partitioning? #=> true
717
+ # external_data.hive_partitioning_mode #=> "AUTO"
718
+ # external_data.hive_partitioning_require_partition_filter? #=> true
719
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
720
+ #
721
+ def hive_partitioning_mode= mode
722
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
723
+ @gapi.hive_partitioning_options.mode = mode.to_s.upcase
724
+ end
725
+
726
+ ###
727
+ # Whether queries over the table using this external data source require a partition filter that can be used
728
+ # for partition elimination to be specified. Note that this field should only be true when creating a
729
+ # permanent external table or querying a temporary external table.
730
+ #
731
+ # @return [Boolean] `true` when queries over this table require a partition filter, or `false` otherwise.
732
+ #
733
+ # @example
734
+ # require "google/cloud/bigquery"
735
+ #
736
+ # bigquery = Google::Cloud::Bigquery.new
737
+ #
738
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
739
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
740
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
741
+ # ext.hive_partitioning_mode = :auto
742
+ # ext.hive_partitioning_require_partition_filter = true
743
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
744
+ # end
745
+ #
746
+ # external_data.hive_partitioning? #=> true
747
+ # external_data.hive_partitioning_mode #=> "AUTO"
748
+ # external_data.hive_partitioning_require_partition_filter? #=> true
749
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
750
+ #
751
+ def hive_partitioning_require_partition_filter?
752
+ return false unless hive_partitioning?
753
+ !@gapi.hive_partitioning_options.require_partition_filter.nil?
754
+ end
755
+
756
+ ##
757
+ # Sets whether queries over the table using this external data source require a partition filter
758
+ # that can be used for partition elimination to be specified.
759
+ #
760
+ # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_source_uri_prefix=}.
761
+ #
762
+ # @param [Boolean] require_partition_filter `true` if a partition filter must be specified, `false` otherwise.
763
+ #
764
+ # @example
765
+ # require "google/cloud/bigquery"
766
+ #
767
+ # bigquery = Google::Cloud::Bigquery.new
768
+ #
769
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
770
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
771
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
772
+ # ext.hive_partitioning_mode = :auto
773
+ # ext.hive_partitioning_require_partition_filter = true
774
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
775
+ # end
776
+ #
777
+ # external_data.hive_partitioning? #=> true
778
+ # external_data.hive_partitioning_mode #=> "AUTO"
779
+ # external_data.hive_partitioning_require_partition_filter? #=> true
780
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
781
+ #
782
+ def hive_partitioning_require_partition_filter= require_partition_filter
783
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
784
+ @gapi.hive_partitioning_options.require_partition_filter = require_partition_filter
785
+ end
786
+
787
+ ###
788
+ # The common prefix for all source uris when hive partition detection is requested. The prefix must end
789
+ # immediately before the partition key encoding begins. For example, consider files following this data
790
+ # layout:
791
+ #
792
+ # ```
793
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
794
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
795
+ # ```
796
+ #
797
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
798
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
799
+ #
800
+ # @return [String, nil] The common prefix for all source uris, or `nil` if not set.
801
+ #
802
+ # @example
803
+ # require "google/cloud/bigquery"
804
+ #
805
+ # bigquery = Google::Cloud::Bigquery.new
806
+ #
807
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
808
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
809
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
810
+ # ext.hive_partitioning_mode = :auto
811
+ # ext.hive_partitioning_require_partition_filter = true
812
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
813
+ # end
814
+ #
815
+ # external_data.hive_partitioning? #=> true
816
+ # external_data.hive_partitioning_mode #=> "AUTO"
817
+ # external_data.hive_partitioning_require_partition_filter? #=> true
818
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
819
+ #
820
+ def hive_partitioning_source_uri_prefix
821
+ @gapi.hive_partitioning_options.source_uri_prefix if hive_partitioning?
822
+ end
823
+
824
+ ##
825
+ # Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
826
+ # immediately before the partition key encoding begins. For example, consider files following this data
827
+ # layout:
828
+ #
829
+ # ```
830
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
831
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
832
+ # ```
833
+ #
834
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
835
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
836
+ #
837
+ # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_require_partition_filter=}.
838
+ #
839
+ # @param [String] source_uri_prefix The common prefix for all source uris.
840
+ #
841
+ # @example
842
+ # require "google/cloud/bigquery"
843
+ #
844
+ # bigquery = Google::Cloud::Bigquery.new
845
+ #
846
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
847
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
848
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
849
+ # ext.hive_partitioning_mode = :auto
850
+ # ext.hive_partitioning_require_partition_filter = true
851
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
852
+ # end
853
+ #
854
+ # external_data.hive_partitioning? #=> true
855
+ # external_data.hive_partitioning_mode #=> "AUTO"
856
+ # external_data.hive_partitioning_require_partition_filter? #=> true
857
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
858
+ #
859
+ def hive_partitioning_source_uri_prefix= source_uri_prefix
860
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
861
+ @gapi.hive_partitioning_options.source_uri_prefix = source_uri_prefix
862
+ end
863
+
539
864
  ##
540
865
  # @private Google API Client object.
541
866
  def to_gapi