google-cloud-bigquery 1.25.0 → 1.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -482,14 +482,14 @@ module Google
482
482
  # puts row[:word]
483
483
  # end
484
484
  #
485
- def all request_limit: nil
485
+ def all request_limit: nil, &block
486
486
  request_limit = request_limit.to_i if request_limit
487
487
 
488
488
  return enum_for :all, request_limit: request_limit unless block_given?
489
489
 
490
490
  results = self
491
491
  loop do
492
- results.each { |r| yield r }
492
+ results.each(&block)
493
493
  if request_limit
494
494
  request_limit -= 1
495
495
  break if request_limit.negative?
@@ -618,15 +618,17 @@ module Google
618
618
  end
619
619
 
620
620
  ##
621
- # Creates a new [view](https://cloud.google.com/bigquery/docs/views)
622
- # table, which is a virtual table defined by the given SQL query.
621
+ # Creates a new view, which is a virtual table defined by the given SQL query.
623
622
  #
624
- # BigQuery's views are logical views, not materialized views, which
625
- # means that the query that defines the view is re-executed every time
626
- # the view is queried. Queries are billed according to the total amount
623
+ # With BigQuery's logical views, the query that defines the view is re-executed
624
+ # every time the view is queried. Queries are billed according to the total amount
627
625
  # of data in all table fields referenced directly or indirectly by the
628
626
  # top-level query. (See {Table#view?} and {Table#query}.)
629
627
  #
628
+ # For materialized views, see {#create_materialized_view}.
629
+ #
630
+ # @see https://cloud.google.com/bigquery/docs/views Creating views
631
+ #
630
632
  # @param [String] table_id The ID of the view table. The ID must contain
631
633
  # only letters (a-z, A-Z), numbers (0-9), or underscores (_). The
632
634
  # maximum length is 1,024 characters.
@@ -667,7 +669,7 @@ module Google
667
669
  # dataset = bigquery.dataset "my_dataset"
668
670
  #
669
671
  # view = dataset.create_view "my_view",
670
- # "SELECT name, age FROM proj.dataset.users"
672
+ # "SELECT name, age FROM proj.dataset.users"
671
673
  #
672
674
  # @example A name and description can be provided:
673
675
  # require "google/cloud/bigquery"
@@ -676,13 +678,18 @@ module Google
676
678
  # dataset = bigquery.dataset "my_dataset"
677
679
  #
678
680
  # view = dataset.create_view "my_view",
679
- # "SELECT name, age FROM proj.dataset.users",
680
- # name: "My View", description: "This is my view"
681
+ # "SELECT name, age FROM proj.dataset.users",
682
+ # name: "My View", description: "This is my view"
681
683
  #
682
684
  # @!group Table
683
685
  #
684
- def create_view table_id, query, name: nil, description: nil,
685
- standard_sql: nil, legacy_sql: nil, udfs: nil
686
+ def create_view table_id,
687
+ query,
688
+ name: nil,
689
+ description: nil,
690
+ standard_sql: nil,
691
+ legacy_sql: nil,
692
+ udfs: nil
686
693
  use_legacy_sql = Convert.resolve_legacy_sql standard_sql, legacy_sql
687
694
  new_view_opts = {
688
695
  table_reference: Google::Apis::BigqueryV2::TableReference.new(
@@ -698,7 +705,81 @@ module Google
698
705
  user_defined_function_resources: udfs_gapi(udfs)
699
706
  )
700
707
  }.delete_if { |_, v| v.nil? }
701
- new_view = Google::Apis::BigqueryV2::Table.new new_view_opts
708
+ new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
709
+
710
+ gapi = service.insert_table dataset_id, new_view
711
+ Table.from_gapi gapi, service
712
+ end
713
+
714
+ ##
715
+ # Creates a new materialized view.
716
+ #
717
+ # Materialized views are precomputed views that periodically cache results of a query for increased performance
718
+ # and efficiency. BigQuery leverages precomputed results from materialized views and whenever possible reads
719
+ # only delta changes from the base table to compute up-to-date results.
720
+ #
721
+ # Queries that use materialized views are generally faster and consume less resources than queries that retrieve
722
+ # the same data only from the base table. Materialized views are helpful to significantly boost performance of
723
+ # workloads that have the characteristic of common and repeated queries.
724
+ #
725
+ # For logical views, see {#create_view}.
726
+ #
727
+ # @see https://cloud.google.com/bigquery/docs/materialized-views-intro Introduction to materialized views
728
+ #
729
+ # @param [String] table_id The ID of the materialized view table. The ID must contain only letters (a-z, A-Z),
730
+ # numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
731
+ # @param [String] query The query that BigQuery executes when the materialized view is referenced.
732
+ # @param [String] name A descriptive name for the table.
733
+ # @param [String] description A user-friendly description of the table.
734
+ # @param [Boolean] enable_refresh Enable automatic refresh of the materialized view when the base table is
735
+ # updated. Optional. The default value is true.
736
+ # @param [Integer] refresh_interval_ms The maximum frequency in milliseconds at which this materialized view
737
+ # will be refreshed. Optional. The default value is `1_800_000` (30 minutes).
738
+ #
739
+ # @return [Google::Cloud::Bigquery::Table] A new table object.
740
+ #
741
+ # @example
742
+ # require "google/cloud/bigquery"
743
+ #
744
+ # bigquery = Google::Cloud::Bigquery.new
745
+ # dataset = bigquery.dataset "my_dataset"
746
+ #
747
+ # materialized_view = dataset.create_materialized_view "my_materialized_view",
748
+ # "SELECT name, age FROM proj.dataset.users"
749
+ #
750
+ # @example Automatic refresh can be disabled:
751
+ # require "google/cloud/bigquery"
752
+ #
753
+ # bigquery = Google::Cloud::Bigquery.new
754
+ # dataset = bigquery.dataset "my_dataset"
755
+ #
756
+ # materialized_view = dataset.create_materialized_view "my_materialized_view",
757
+ # "SELECT name, age FROM proj.dataset.users",
758
+ # enable_refresh: false
759
+ #
760
+ # @!group Table
761
+ #
762
+ def create_materialized_view table_id,
763
+ query,
764
+ name: nil,
765
+ description: nil,
766
+ enable_refresh: nil,
767
+ refresh_interval_ms: nil
768
+ new_view_opts = {
769
+ table_reference: Google::Apis::BigqueryV2::TableReference.new(
770
+ project_id: project_id,
771
+ dataset_id: dataset_id,
772
+ table_id: table_id
773
+ ),
774
+ friendly_name: name,
775
+ description: description,
776
+ materialized_view: Google::Apis::BigqueryV2::MaterializedViewDefinition.new(
777
+ enable_refresh: enable_refresh,
778
+ query: query,
779
+ refresh_interval_ms: refresh_interval_ms
780
+ )
781
+ }.delete_if { |_, v| v.nil? }
782
+ new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
702
783
 
703
784
  gapi = service.insert_table dataset_id, new_view
704
785
  Table.from_gapi gapi, service
@@ -1059,35 +1140,37 @@ module Google
1059
1140
  #
1060
1141
  # Ruby types are mapped to BigQuery types as follows:
1061
1142
  #
1062
- # | BigQuery | Ruby | Notes |
1063
- # |-------------|--------------------------------------|------------------------------------------------|
1064
- # | `BOOL` | `true`/`false` | |
1065
- # | `INT64` | `Integer` | |
1066
- # | `FLOAT64` | `Float` | |
1067
- # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
1068
- # | `STRING` | `String` | |
1069
- # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
1070
- # | `DATE` | `Date` | |
1071
- # | `TIMESTAMP` | `Time` | |
1072
- # | `TIME` | `Google::Cloud::BigQuery::Time` | |
1073
- # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
1074
- # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
1075
- # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
1143
+ # | BigQuery | Ruby | Notes |
1144
+ # |--------------|--------------------------------------|----------------------------------------------------|
1145
+ # | `BOOL` | `true`/`false` | |
1146
+ # | `INT64` | `Integer` | |
1147
+ # | `FLOAT64` | `Float` | |
1148
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
1149
+ # | `BIGNUMERIC` | | Query param values must be mapped in `types`. |
1150
+ # | `STRING` | `String` | |
1151
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
1152
+ # | `DATE` | `Date` | |
1153
+ # | `TIMESTAMP` | `Time` | |
1154
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
1155
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
1156
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
1157
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
1076
1158
  #
1077
1159
  # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
1078
1160
  # of each BigQuery data type, including allowed values.
1079
- # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always to
1080
- # infer the right SQL type from a value in `params`. In these cases, `types` must be used to specify the SQL
1081
- # type for these values.
1161
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
1162
+ # possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
1163
+ # specify the SQL type for these values.
1082
1164
  #
1083
- # Must match the value type passed to `params`. This must be an `Array` when the query uses positional query
1084
- # parameters. This must be an `Hash` when the query uses named query parameters. The values should be BigQuery
1085
- # type codes from the following list:
1165
+ # Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
1166
+ # positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
1167
+ # should be BigQuery type codes from the following list:
1086
1168
  #
1087
1169
  # * `:BOOL`
1088
1170
  # * `:INT64`
1089
1171
  # * `:FLOAT64`
1090
1172
  # * `:NUMERIC`
1173
+ # * `:BIGNUMERIC`
1091
1174
  # * `:STRING`
1092
1175
  # * `:DATETIME`
1093
1176
  # * `:DATE`
@@ -1400,35 +1483,37 @@ module Google
1400
1483
  #
1401
1484
  # Ruby types are mapped to BigQuery types as follows:
1402
1485
  #
1403
- # | BigQuery | Ruby | Notes |
1404
- # |-------------|--------------------------------------|------------------------------------------------|
1405
- # | `BOOL` | `true`/`false` | |
1406
- # | `INT64` | `Integer` | |
1407
- # | `FLOAT64` | `Float` | |
1408
- # | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
1409
- # | `STRING` | `String` | |
1410
- # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
1411
- # | `DATE` | `Date` | |
1412
- # | `TIMESTAMP` | `Time` | |
1413
- # | `TIME` | `Google::Cloud::BigQuery::Time` | |
1414
- # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
1415
- # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
1416
- # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
1486
+ # | BigQuery | Ruby | Notes |
1487
+ # |--------------|--------------------------------------|----------------------------------------------------|
1488
+ # | `BOOL` | `true`/`false` | |
1489
+ # | `INT64` | `Integer` | |
1490
+ # | `FLOAT64` | `Float` | |
1491
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
1492
+ # | `BIGNUMERIC` | | Query param values must be mapped in `types`. |
1493
+ # | `STRING` | `String` | |
1494
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
1495
+ # | `DATE` | `Date` | |
1496
+ # | `TIMESTAMP` | `Time` | |
1497
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
1498
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
1499
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
1500
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
1417
1501
  #
1418
1502
  # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
1419
1503
  # of each BigQuery data type, including allowed values.
1420
- # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always to
1421
- # infer the right SQL type from a value in `params`. In these cases, `types` must be used to specify the SQL
1422
- # type for these values.
1504
+ # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
1505
+ # possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
1506
+ # specify the SQL type for these values.
1423
1507
  #
1424
- # Must match the value type passed to `params`. This must be an `Array` when the query uses positional query
1425
- # parameters. This must be an `Hash` when the query uses named query parameters. The values should be BigQuery
1426
- # type codes from the following list:
1508
+ # Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
1509
+ # positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
1510
+ # should be BigQuery type codes from the following list:
1427
1511
  #
1428
1512
  # * `:BOOL`
1429
1513
  # * `:INT64`
1430
1514
  # * `:FLOAT64`
1431
1515
  # * `:NUMERIC`
1516
+ # * `:BIGNUMERIC`
1432
1517
  # * `:STRING`
1433
1518
  # * `:DATETIME`
1434
1519
  # * `:DATE`
@@ -2327,6 +2412,21 @@ module Google
2327
2412
  # the need to complete a load operation before the data can appear in
2328
2413
  # query results.
2329
2414
  #
2415
+ # Simple Ruby types are generally accepted per JSON rules, along with the following support for BigQuery's more
2416
+ # complex types:
2417
+ #
2418
+ # | BigQuery | Ruby | Notes |
2419
+ # |--------------|--------------------------------------|----------------------------------------------------|
2420
+ # | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
2421
+ # | `BIGNUMERIC` | `String` | Pass as `String` to avoid rounding to scale 9. |
2422
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
2423
+ # | `DATE` | `Date` | |
2424
+ # | `TIMESTAMP` | `Time` | |
2425
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
2426
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
2427
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
2428
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
2429
+ #
2330
2430
  # Because BigQuery's streaming API is designed for high insertion rates,
2331
2431
  # modifications to the underlying table metadata are eventually
2332
2432
  # consistent when interacting with the streaming system. In most cases
@@ -2341,7 +2441,10 @@ module Google
2341
2441
  #
2342
2442
  # @param [String] table_id The ID of the destination table.
2343
2443
  # @param [Hash, Array<Hash>] rows A hash object or array of hash objects
2344
- # containing the data. Required.
2444
+ # containing the data. Required. `BigDecimal` values will be rounded to
2445
+ # scale 9 to conform with the BigQuery `NUMERIC` data type. To avoid
2446
+ # rounding `BIGNUMERIC` type values with scale greater than 9, use `String`
2447
+ # instead of `BigDecimal`.
2345
2448
  # @param [Array<String|Symbol>, Symbol] insert_ids A unique ID for each row. BigQuery uses this property to
2346
2449
  # detect duplicate insertion requests on a best-effort basis. For more information, see [data
2347
2450
  # consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency). Optional. If
@@ -2408,6 +2511,18 @@ module Google
2408
2511
  # t.schema.integer "age", mode: :required
2409
2512
  # end
2410
2513
  #
2514
+ # @example Pass `BIGNUMERIC` value as a string to avoid rounding to scale 9 in the conversion from `BigDecimal`:
2515
+ # require "google/cloud/bigquery"
2516
+ #
2517
+ # bigquery = Google::Cloud::Bigquery.new
2518
+ # dataset = bigquery.dataset "my_dataset"
2519
+ #
2520
+ # row = {
2521
+ # "my_numeric" => BigDecimal("123456798.987654321"),
2522
+ # "my_bignumeric" => "123456798.98765432100001" # BigDecimal would be rounded, use String instead!
2523
+ # }
2524
+ # dataset.insert "my_table", row
2525
+ #
2411
2526
  # @!group Data
2412
2527
  #
2413
2528
  def insert table_id, rows, insert_ids: nil, skip_invalid: nil, ignore_unknown: nil, autocreate: nil, &block
@@ -2500,11 +2615,9 @@ module Google
2500
2615
  create_table table_id do |tbl_updater|
2501
2616
  yield tbl_updater if block_given?
2502
2617
  end
2503
- # rubocop:disable Lint/HandleExceptions
2504
2618
  rescue Google::Cloud::AlreadyExistsError
2619
+ # Do nothing if it already exists
2505
2620
  end
2506
- # rubocop:enable Lint/HandleExceptions
2507
-
2508
2621
  sleep 60
2509
2622
  retry
2510
2623
  end
@@ -2547,7 +2660,7 @@ module Google
2547
2660
  return if attributes.empty?
2548
2661
  ensure_service!
2549
2662
  patch_args = Hash[attributes.map { |attr| [attr, @gapi.send(attr)] }]
2550
- patch_gapi = Google::Apis::BigqueryV2::Dataset.new patch_args
2663
+ patch_gapi = Google::Apis::BigqueryV2::Dataset.new(**patch_args)
2551
2664
  patch_gapi.etag = etag if etag
2552
2665
  @gapi = service.patch_dataset dataset_id, patch_gapi
2553
2666
  end
@@ -2676,12 +2789,11 @@ module Google
2676
2789
 
2677
2790
  def load_local_or_uri file, updater
2678
2791
  job_gapi = updater.to_gapi
2679
- job = if local_file? file
2680
- load_local file, job_gapi
2681
- else
2682
- load_storage file, job_gapi
2683
- end
2684
- job
2792
+ if local_file? file
2793
+ load_local file, job_gapi
2794
+ else
2795
+ load_storage file, job_gapi
2796
+ end
2685
2797
  end
2686
2798
 
2687
2799
  def storage_url? files
@@ -2721,6 +2833,7 @@ module Google
2721
2833
  ##
2722
2834
  # @private Create an Updater object.
2723
2835
  def initialize gapi
2836
+ super()
2724
2837
  @updates = []
2725
2838
  @gapi = gapi
2726
2839
  end
@@ -2756,6 +2869,12 @@ module Google
2756
2869
  raise "not implemented in #{self.class}"
2757
2870
  end
2758
2871
 
2872
+ ##
2873
+ # @raise [RuntimeError] not implemented
2874
+ def create_materialized_view(*)
2875
+ raise "not implemented in #{self.class}"
2876
+ end
2877
+
2759
2878
  ##
2760
2879
  # @raise [RuntimeError] not implemented
2761
2880
  def table(*)
@@ -1194,7 +1194,7 @@ module Google
1194
1194
  @rules.reject!(&find_by_scope_and_value(scope, value))
1195
1195
  # Add new rule for this role, scope, and value
1196
1196
  opts = { role: role, scope => value }
1197
- @rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
1197
+ @rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
1198
1198
  end
1199
1199
 
1200
1200
  # @private
@@ -1204,7 +1204,7 @@ module Google
1204
1204
  @rules.reject!(&find_by_scope_and_resource_ref(:routine, value))
1205
1205
  # Add new rule for this role, scope, and value
1206
1206
  opts = { routine: value }
1207
- @rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
1207
+ @rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
1208
1208
  end
1209
1209
 
1210
1210
  # @private
@@ -1215,7 +1215,7 @@ module Google
1215
1215
  @rules.reject!(&find_by_scope_and_resource_ref(:view, value))
1216
1216
  # Add new rule for this role, scope, and value
1217
1217
  opts = { view: value }
1218
- @rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
1218
+ @rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
1219
1219
  end
1220
1220
 
1221
1221
  # @private
@@ -120,12 +120,12 @@ module Google
120
120
  # puts dataset.name
121
121
  # end
122
122
  #
123
- def all request_limit: nil
123
+ def all request_limit: nil, &block
124
124
  request_limit = request_limit.to_i if request_limit
125
125
  return enum_for :all, request_limit: request_limit unless block_given?
126
126
  results = self
127
127
  loop do
128
- results.each { |r| yield r }
128
+ results.each(&block)
129
129
  if request_limit
130
130
  request_limit -= 1
131
131
  break if request_limit.negative?
@@ -52,6 +52,24 @@ module Google
52
52
  # # Retrieve the next page of results
53
53
  # data = data.next if data.next?
54
54
  #
55
+ # @example Hive partitioning options:
56
+ # require "google/cloud/bigquery"
57
+ #
58
+ # bigquery = Google::Cloud::Bigquery.new
59
+ #
60
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
61
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
62
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
63
+ # ext.hive_partitioning_mode = :auto
64
+ # ext.hive_partitioning_require_partition_filter = true
65
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
66
+ # end
67
+ #
68
+ # external_data.hive_partitioning? #=> true
69
+ # external_data.hive_partitioning_mode #=> "AUTO"
70
+ # external_data.hive_partitioning_require_partition_filter? #=> true
71
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
72
+ #
55
73
  module External
56
74
  ##
57
75
  # @private New External from URLs and format
@@ -79,7 +97,8 @@ module Google
79
97
  # @private Determine source_format from inputs
80
98
  def self.source_format_for urls, format
81
99
  val = {
82
- "csv" => "CSV", "avro" => "AVRO",
100
+ "csv" => "CSV",
101
+ "avro" => "AVRO",
83
102
  "json" => "NEWLINE_DELIMITED_JSON",
84
103
  "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
85
104
  "sheets" => "GOOGLE_SHEETS",
@@ -87,7 +106,9 @@ module Google
87
106
  "datastore" => "DATASTORE_BACKUP",
88
107
  "backup" => "DATASTORE_BACKUP",
89
108
  "datastore_backup" => "DATASTORE_BACKUP",
90
- "bigtable" => "BIGTABLE"
109
+ "bigtable" => "BIGTABLE",
110
+ "orc" => "ORC",
111
+ "parquet" => "PARQUET"
91
112
  }[format.to_s.downcase]
92
113
  return val unless val.nil?
93
114
  Array(urls).each do |url|
@@ -110,7 +131,7 @@ module Google
110
131
  when "GOOGLE_SHEETS" then External::SheetsSource
111
132
  when "BIGTABLE" then External::BigtableSource
112
133
  else
113
- # AVRO and DATASTORE_BACKUP
134
+ # AVRO, DATASTORE_BACKUP, PARQUET
114
135
  External::DataSource
115
136
  end
116
137
  end
@@ -148,6 +169,24 @@ module Google
148
169
  # # Retrieve the next page of results
149
170
  # data = data.next if data.next?
150
171
  #
172
+ # @example Hive partitioning options:
173
+ # require "google/cloud/bigquery"
174
+ #
175
+ # bigquery = Google::Cloud::Bigquery.new
176
+ #
177
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
178
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
179
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
180
+ # ext.hive_partitioning_mode = :auto
181
+ # ext.hive_partitioning_require_partition_filter = true
182
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
183
+ # end
184
+ #
185
+ # external_data.hive_partitioning? #=> true
186
+ # external_data.hive_partitioning_mode #=> "AUTO"
187
+ # external_data.hive_partitioning_require_partition_filter? #=> true
188
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
189
+ #
151
190
  class DataSource
152
191
  ##
153
192
  # @private The Google API Client object.
@@ -302,6 +341,52 @@ module Google
302
341
  @gapi.source_format == "BIGTABLE"
303
342
  end
304
343
 
344
+ ##
345
+ # Whether the data format is "ORC".
346
+ #
347
+ # @return [Boolean]
348
+ #
349
+ # @example
350
+ # require "google/cloud/bigquery"
351
+ #
352
+ # bigquery = Google::Cloud::Bigquery.new
353
+ #
354
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
355
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
356
+ # external_data = bigquery.external gcs_uri, format: :orc do |ext|
357
+ # ext.hive_partitioning_mode = :auto
358
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
359
+ # end
360
+ # external_data.format #=> "ORC"
361
+ # external_data.orc? #=> true
362
+ #
363
+ def orc?
364
+ @gapi.source_format == "ORC"
365
+ end
366
+
367
+ ##
368
+ # Whether the data format is "PARQUET".
369
+ #
370
+ # @return [Boolean]
371
+ #
372
+ # @example
373
+ # require "google/cloud/bigquery"
374
+ #
375
+ # bigquery = Google::Cloud::Bigquery.new
376
+ #
377
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
378
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
379
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
380
+ # ext.hive_partitioning_mode = :auto
381
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
382
+ # end
383
+ # external_data.format #=> "PARQUET"
384
+ # external_data.parquet? #=> true
385
+ #
386
+ def parquet?
387
+ @gapi.source_format == "PARQUET"
388
+ end
389
+
305
390
  ##
306
391
  # The fully-qualified URIs that point to your data in Google Cloud.
307
392
  # For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
@@ -536,6 +621,246 @@ module Google
536
621
  @gapi.max_bad_records = new_max_bad_records
537
622
  end
538
623
 
624
+ ###
625
+ # Checks if hive partitioning options are set.
626
+ #
627
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
628
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
629
+ # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
630
+ # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
631
+ #
632
+ # @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
633
+ #
634
+ # @example
635
+ # require "google/cloud/bigquery"
636
+ #
637
+ # bigquery = Google::Cloud::Bigquery.new
638
+ #
639
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
640
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
641
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
642
+ # ext.hive_partitioning_mode = :auto
643
+ # ext.hive_partitioning_require_partition_filter = true
644
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
645
+ # end
646
+ #
647
+ # external_data.hive_partitioning? #=> true
648
+ # external_data.hive_partitioning_mode #=> "AUTO"
649
+ # external_data.hive_partitioning_require_partition_filter? #=> true
650
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
651
+ #
652
+ def hive_partitioning?
653
+ !@gapi.hive_partitioning_options.nil?
654
+ end
655
+
656
+ ###
657
+ # The mode of hive partitioning to use when reading data. The following modes are supported:
658
+ #
659
+ # 1. `AUTO`: automatically infer partition key name(s) and type(s).
660
+ # 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
661
+ # 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
662
+ #
663
+ # @return [String, nil] The mode of hive partitioning, or `nil` if not set.
664
+ #
665
+ # @example
666
+ # require "google/cloud/bigquery"
667
+ #
668
+ # bigquery = Google::Cloud::Bigquery.new
669
+ #
670
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
671
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
672
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
673
+ # ext.hive_partitioning_mode = :auto
674
+ # ext.hive_partitioning_require_partition_filter = true
675
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
676
+ # end
677
+ #
678
+ # external_data.hive_partitioning? #=> true
679
+ # external_data.hive_partitioning_mode #=> "AUTO"
680
+ # external_data.hive_partitioning_require_partition_filter? #=> true
681
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
682
+ #
683
+ def hive_partitioning_mode
684
+ @gapi.hive_partitioning_options.mode if hive_partitioning?
685
+ end
686
+
687
+ ##
688
+ # Sets the mode of hive partitioning to use when reading data. The following modes are supported:
689
+ #
690
+ # 1. `auto`: automatically infer partition key name(s) and type(s).
691
+ # 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
692
+ # 3. `custom`: partition key schema is encoded in the source URI prefix.
693
+ #
694
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
695
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
696
+ # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
697
+ # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
698
+ #
699
+ # See {#format}, {#hive_partitioning_require_partition_filter=} and {#hive_partitioning_source_uri_prefix=}.
700
+ #
701
+ # @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
702
+ #
703
+ # @example
704
+ # require "google/cloud/bigquery"
705
+ #
706
+ # bigquery = Google::Cloud::Bigquery.new
707
+ #
708
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
709
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
710
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
711
+ # ext.hive_partitioning_mode = :auto
712
+ # ext.hive_partitioning_require_partition_filter = true
713
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
714
+ # end
715
+ #
716
+ # external_data.hive_partitioning? #=> true
717
+ # external_data.hive_partitioning_mode #=> "AUTO"
718
+ # external_data.hive_partitioning_require_partition_filter? #=> true
719
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
720
+ #
721
+ def hive_partitioning_mode= mode
722
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
723
+ @gapi.hive_partitioning_options.mode = mode.to_s.upcase
724
+ end
725
+
726
+ ###
727
+ # Whether queries over the table using this external data source require a partition filter that can be used
728
+ # for partition elimination to be specified. Note that this field should only be true when creating a
729
+ # permanent external table or querying a temporary external table.
730
+ #
731
+ # @return [Boolean] `true` when queries over this table require a partition filter, or `false` otherwise.
732
+ #
733
+ # @example
734
+ # require "google/cloud/bigquery"
735
+ #
736
+ # bigquery = Google::Cloud::Bigquery.new
737
+ #
738
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
739
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
740
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
741
+ # ext.hive_partitioning_mode = :auto
742
+ # ext.hive_partitioning_require_partition_filter = true
743
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
744
+ # end
745
+ #
746
+ # external_data.hive_partitioning? #=> true
747
+ # external_data.hive_partitioning_mode #=> "AUTO"
748
+ # external_data.hive_partitioning_require_partition_filter? #=> true
749
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
750
+ #
751
+ def hive_partitioning_require_partition_filter?
752
+ return false unless hive_partitioning?
753
+ !@gapi.hive_partitioning_options.require_partition_filter.nil?
754
+ end
755
+
756
+ ##
757
+ # Sets whether queries over the table using this external data source require a partition filter
758
+ # that can be used for partition elimination to be specified.
759
+ #
760
+ # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_source_uri_prefix=}.
761
+ #
762
+ # @param [Boolean] require_partition_filter `true` if a partition filter must be specified, `false` otherwise.
763
+ #
764
+ # @example
765
+ # require "google/cloud/bigquery"
766
+ #
767
+ # bigquery = Google::Cloud::Bigquery.new
768
+ #
769
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
770
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
771
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
772
+ # ext.hive_partitioning_mode = :auto
773
+ # ext.hive_partitioning_require_partition_filter = true
774
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
775
+ # end
776
+ #
777
+ # external_data.hive_partitioning? #=> true
778
+ # external_data.hive_partitioning_mode #=> "AUTO"
779
+ # external_data.hive_partitioning_require_partition_filter? #=> true
780
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
781
+ #
782
+ def hive_partitioning_require_partition_filter= require_partition_filter
783
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
784
+ @gapi.hive_partitioning_options.require_partition_filter = require_partition_filter
785
+ end
786
+
787
+ ###
788
+ # The common prefix for all source uris when hive partition detection is requested. The prefix must end
789
+ # immediately before the partition key encoding begins. For example, consider files following this data
790
+ # layout:
791
+ #
792
+ # ```
793
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
794
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
795
+ # ```
796
+ #
797
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
798
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
799
+ #
800
+ # @return [String, nil] The common prefix for all source uris, or `nil` if not set.
801
+ #
802
+ # @example
803
+ # require "google/cloud/bigquery"
804
+ #
805
+ # bigquery = Google::Cloud::Bigquery.new
806
+ #
807
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
808
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
809
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
810
+ # ext.hive_partitioning_mode = :auto
811
+ # ext.hive_partitioning_require_partition_filter = true
812
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
813
+ # end
814
+ #
815
+ # external_data.hive_partitioning? #=> true
816
+ # external_data.hive_partitioning_mode #=> "AUTO"
817
+ # external_data.hive_partitioning_require_partition_filter? #=> true
818
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
819
+ #
820
+ def hive_partitioning_source_uri_prefix
821
+ @gapi.hive_partitioning_options.source_uri_prefix if hive_partitioning?
822
+ end
823
+
824
+ ##
825
+ # Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
826
+ # immediately before the partition key encoding begins. For example, consider files following this data
827
+ # layout:
828
+ #
829
+ # ```
830
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
831
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
832
+ # ```
833
+ #
834
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
835
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
836
+ #
837
+ # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_require_partition_filter=}.
838
+ #
839
+ # @param [String] source_uri_prefix The common prefix for all source uris.
840
+ #
841
+ # @example
842
+ # require "google/cloud/bigquery"
843
+ #
844
+ # bigquery = Google::Cloud::Bigquery.new
845
+ #
846
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
847
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
848
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
849
+ # ext.hive_partitioning_mode = :auto
850
+ # ext.hive_partitioning_require_partition_filter = true
851
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
852
+ # end
853
+ #
854
+ # external_data.hive_partitioning? #=> true
855
+ # external_data.hive_partitioning_mode #=> "AUTO"
856
+ # external_data.hive_partitioning_require_partition_filter? #=> true
857
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
858
+ #
859
+ def hive_partitioning_source_uri_prefix= source_uri_prefix
860
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
861
+ @gapi.hive_partitioning_options.source_uri_prefix = source_uri_prefix
862
+ end
863
+
539
864
  ##
540
865
  # @private Google API Client object.
541
866
  def to_gapi