RubyGems - google-cloud-bigquery - Versions diffs - 1.25.0 → 1.30.0 - Mend

google-cloud-bigquery 1.25.0 → 1.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +55 -0
data/CONTRIBUTING.md +4 -5
data/LOGGING.md +1 -1
data/OVERVIEW.md +15 -14
data/lib/google/cloud/bigquery/convert.rb +72 -76
data/lib/google/cloud/bigquery/copy_job.rb +1 -0
data/lib/google/cloud/bigquery/data.rb +2 -2
data/lib/google/cloud/bigquery/dataset.rb +181 -62
data/lib/google/cloud/bigquery/dataset/access.rb +3 -3
data/lib/google/cloud/bigquery/dataset/list.rb +2 -2
data/lib/google/cloud/bigquery/external.rb +328 -3
data/lib/google/cloud/bigquery/extract_job.rb +8 -10
data/lib/google/cloud/bigquery/job.rb +43 -3
data/lib/google/cloud/bigquery/job/list.rb +4 -4
data/lib/google/cloud/bigquery/load_job.rb +228 -27
data/lib/google/cloud/bigquery/model/list.rb +2 -2
data/lib/google/cloud/bigquery/policy.rb +2 -1
data/lib/google/cloud/bigquery/project.rb +47 -43
data/lib/google/cloud/bigquery/project/list.rb +2 -2
data/lib/google/cloud/bigquery/query_job.rb +62 -48
data/lib/google/cloud/bigquery/routine.rb +128 -9
data/lib/google/cloud/bigquery/routine/list.rb +2 -2
data/lib/google/cloud/bigquery/schema.rb +39 -3
data/lib/google/cloud/bigquery/schema/field.rb +63 -13
data/lib/google/cloud/bigquery/service.rb +11 -13
data/lib/google/cloud/bigquery/standard_sql.rb +15 -3
data/lib/google/cloud/bigquery/table.rb +246 -52
data/lib/google/cloud/bigquery/table/async_inserter.rb +44 -17
data/lib/google/cloud/bigquery/table/list.rb +2 -2
data/lib/google/cloud/bigquery/version.rb +1 -1
metadata +15 -15

data/lib/google/cloud/bigquery/data.rb CHANGED Viewed

@@ -482,14 +482,14 @@ module Google
         #     puts row[:word]
         #   end
         #
-        def all request_limit: nil
+        def all request_limit: nil, &block
           request_limit = request_limit.to_i if request_limit
           return enum_for :all, request_limit: request_limit unless block_given?
           results = self
           loop do
-            results.each { |r| yield r }
+            results.each(&block)
             if request_limit
               request_limit -= 1
               break if request_limit.negative?

data/lib/google/cloud/bigquery/dataset.rb CHANGED Viewed

@@ -618,15 +618,17 @@ module Google
         end
         ##
-        # Creates a new [view](https://cloud.google.com/bigquery/docs/views)
-        # table, which is a virtual table defined by the given SQL query.
+        # Creates a new view, which is a virtual table defined by the given SQL query.
         #
-        # BigQuery's views are logical views, not materialized views, which
-        # means that the query that defines the view is re-executed every time
-        # the view is queried. Queries are billed according to the total amount
+        # With BigQuery's logical views, the query that defines the view is re-executed
+        # every time the view is queried. Queries are billed according to the total amount
         # of data in all table fields referenced directly or indirectly by the
         # top-level query. (See {Table#view?} and {Table#query}.)
         #
+        # For materialized views, see {#create_materialized_view}.
+        #
+        # @see https://cloud.google.com/bigquery/docs/views Creating views
+        #
         # @param [String] table_id The ID of the view table. The ID must contain
         #   only letters (a-z, A-Z), numbers (0-9), or underscores (_). The
         #   maximum length is 1,024 characters.
@@ -667,7 +669,7 @@ module Google
         #   dataset = bigquery.dataset "my_dataset"
         #
         #   view = dataset.create_view "my_view",
-        #             "SELECT name, age FROM proj.dataset.users"
+        #                              "SELECT name, age FROM proj.dataset.users"
         #
         # @example A name and description can be provided:
         #   require "google/cloud/bigquery"
@@ -676,13 +678,18 @@ module Google
         #   dataset = bigquery.dataset "my_dataset"
         #
         #   view = dataset.create_view "my_view",
-        #             "SELECT name, age FROM proj.dataset.users",
-        #             name: "My View", description: "This is my view"
+        #                              "SELECT name, age FROM proj.dataset.users",
+        #                              name: "My View", description: "This is my view"
         #
         # @!group Table
         #
-        def create_view table_id, query, name: nil, description: nil,
-                        standard_sql: nil, legacy_sql: nil, udfs: nil
+        def create_view table_id,
+                        query,
+                        name: nil,
+                        description: nil,
+                        standard_sql: nil,
+                        legacy_sql: nil,
+                        udfs: nil
           use_legacy_sql = Convert.resolve_legacy_sql standard_sql, legacy_sql
           new_view_opts = {
             table_reference: Google::Apis::BigqueryV2::TableReference.new(
@@ -698,7 +705,81 @@ module Google
               user_defined_function_resources: udfs_gapi(udfs)
             )
           }.delete_if { |_, v| v.nil? }
-          new_view = Google::Apis::BigqueryV2::Table.new new_view_opts
+          new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
+          gapi = service.insert_table dataset_id, new_view
+          Table.from_gapi gapi, service
+        end
+        ##
+        # Creates a new materialized view.
+        #
+        # Materialized views are precomputed views that periodically cache results of a query for increased performance
+        # and efficiency. BigQuery leverages precomputed results from materialized views and whenever possible reads
+        # only delta changes from the base table to compute up-to-date results.
+        #
+        # Queries that use materialized views are generally faster and consume less resources than queries that retrieve
+        # the same data only from the base table. Materialized views are helpful to significantly boost performance of
+        # workloads that have the characteristic of common and repeated queries.
+        #
+        # For logical views, see {#create_view}.
+        #
+        # @see https://cloud.google.com/bigquery/docs/materialized-views-intro Introduction to materialized views
+        #
+        # @param [String] table_id The ID of the materialized view table. The ID must contain only letters (a-z, A-Z),
+        #   numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
+        # @param [String] query The query that BigQuery executes when the materialized view is referenced.
+        # @param [String] name A descriptive name for the table.
+        # @param [String] description A user-friendly description of the table.
+        # @param [Boolean] enable_refresh Enable automatic refresh of the materialized view when the base table is
+        #   updated. Optional. The default value is true.
+        # @param [Integer] refresh_interval_ms The maximum frequency in milliseconds at which this materialized view
+        #   will be refreshed. Optional. The default value is `1_800_000` (30 minutes).
+        #
+        # @return [Google::Cloud::Bigquery::Table] A new table object.
+        #
+        # @example
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #   dataset = bigquery.dataset "my_dataset"
+        #
+        #   materialized_view = dataset.create_materialized_view "my_materialized_view",
+        #                                                        "SELECT name, age FROM proj.dataset.users"
+        #
+        # @example Automatic refresh can be disabled:
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #   dataset = bigquery.dataset "my_dataset"
+        #
+        #   materialized_view = dataset.create_materialized_view "my_materialized_view",
+        #                                                        "SELECT name, age FROM proj.dataset.users",
+        #                                                        enable_refresh: false
+        #
+        # @!group Table
+        #
+        def create_materialized_view table_id,
+                                     query,
+                                     name: nil,
+                                     description: nil,
+                                     enable_refresh: nil,
+                                     refresh_interval_ms: nil
+          new_view_opts = {
+            table_reference:   Google::Apis::BigqueryV2::TableReference.new(
+              project_id: project_id,
+              dataset_id: dataset_id,
+              table_id:   table_id
+            ),
+            friendly_name:     name,
+            description:       description,
+            materialized_view: Google::Apis::BigqueryV2::MaterializedViewDefinition.new(
+              enable_refresh:      enable_refresh,
+              query:               query,
+              refresh_interval_ms: refresh_interval_ms
+            )
+          }.delete_if { |_, v| v.nil? }
+          new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
           gapi = service.insert_table dataset_id, new_view
           Table.from_gapi gapi, service
@@ -1059,35 +1140,37 @@ module Google
         #
         #   Ruby types are mapped to BigQuery types as follows:
         #
-        #   | BigQuery    | Ruby                                 | Notes                                          |
-        #   |-------------|--------------------------------------|------------------------------------------------|
-        #   | `BOOL`      | `true`/`false`                       |                                                |
-        #   | `INT64`     | `Integer`                            |                                                |
-        #   | `FLOAT64`   | `Float`                              |                                                |
-        #   | `NUMERIC`   | `BigDecimal`                         | Will be rounded to 9 decimal places            |
-        #   | `STRING`    | `String`                             |                                                |
-        #   | `DATETIME`  | `DateTime`                           | `DATETIME` does not support time zone.         |
-        #   | `DATE`      | `Date`                               |                                                |
-        #   | `TIMESTAMP` | `Time`                               |                                                |
-        #   | `TIME`      | `Google::Cloud::BigQuery::Time`      |                                                |
-        #   | `BYTES`     | `File`, `IO`, `StringIO`, or similar |                                                |
-        #   | `ARRAY`     | `Array`                              | Nested arrays, `nil` values are not supported. |
-        #   | `STRUCT`    | `Hash`                               | Hash keys may be strings or symbols.           |
+        #   | BigQuery     | Ruby                                 | Notes                                              |
+        #   |--------------|--------------------------------------|----------------------------------------------------|
+        #   | `BOOL`       | `true`/`false`                       |                                                    |
+        #   | `INT64`      | `Integer`                            |                                                    |
+        #   | `FLOAT64`    | `Float`                              |                                                    |
+        #   | `NUMERIC`    | `BigDecimal`                         | `BigDecimal` values will be rounded to scale 9.    |
+        #   | `BIGNUMERIC` |                                      | Query param values must be mapped in `types`.      |
+        #   | `STRING`     | `String`                             |                                                    |
+        #   | `DATETIME`   | `DateTime`                           | `DATETIME` does not support time zone.             |
+        #   | `DATE`       | `Date`                               |                                                    |
+        #   | `TIMESTAMP`  | `Time`                               |                                                    |
+        #   | `TIME`       | `Google::Cloud::BigQuery::Time`      |                                                    |
+        #   | `BYTES`      | `File`, `IO`, `StringIO`, or similar |                                                    |
+        #   | `ARRAY`      | `Array`                              | Nested arrays, `nil` values are not supported.     |
+        #   | `STRUCT`     | `Hash`                               | Hash keys may be strings or symbols.               |
         #
         #   See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
         #   of each BigQuery data type, including allowed values.
-        # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always to
-        #   infer the right SQL type from a value in `params`. In these cases, `types` must be used to specify the SQL
-        #   type for these values.
+        # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
+        #   possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
+        #   specify the SQL type for these values.
         #
-        #   Must match the value type passed to `params`. This must be an `Array` when the query uses positional query
-        #   parameters. This must be an `Hash` when the query uses named query parameters. The values should be BigQuery
-        #   type codes from the following list:
+        #   Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
+        #   positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
+        #   should be BigQuery type codes from the following list:
         #
         #   * `:BOOL`
         #   * `:INT64`
         #   * `:FLOAT64`
         #   * `:NUMERIC`
+        #   * `:BIGNUMERIC`
         #   * `:STRING`
         #   * `:DATETIME`
         #   * `:DATE`
@@ -1400,35 +1483,37 @@ module Google
         #
         #   Ruby types are mapped to BigQuery types as follows:
         #
-        #   | BigQuery    | Ruby                                 | Notes                                          |
-        #   |-------------|--------------------------------------|------------------------------------------------|
-        #   | `BOOL`      | `true`/`false`                       |                                                |
-        #   | `INT64`     | `Integer`                            |                                                |
-        #   | `FLOAT64`   | `Float`                              |                                                |
-        #   | `NUMERIC`   | `BigDecimal`                         | Will be rounded to 9 decimal places            |
-        #   | `STRING`    | `String`                             |                                                |
-        #   | `DATETIME`  | `DateTime`                           | `DATETIME` does not support time zone.         |
-        #   | `DATE`      | `Date`                               |                                                |
-        #   | `TIMESTAMP` | `Time`                               |                                                |
-        #   | `TIME`      | `Google::Cloud::BigQuery::Time`      |                                                |
-        #   | `BYTES`     | `File`, `IO`, `StringIO`, or similar |                                                |
-        #   | `ARRAY`     | `Array`                              | Nested arrays, `nil` values are not supported. |
-        #   | `STRUCT`    | `Hash`                               | Hash keys may be strings or symbols.           |
+        #   | BigQuery     | Ruby                                 | Notes                                              |
+        #   |--------------|--------------------------------------|----------------------------------------------------|
+        #   | `BOOL`       | `true`/`false`                       |                                                    |
+        #   | `INT64`      | `Integer`                            |                                                    |
+        #   | `FLOAT64`    | `Float`                              |                                                    |
+        #   | `NUMERIC`    | `BigDecimal`                         | `BigDecimal` values will be rounded to scale 9.    |
+        #   | `BIGNUMERIC` |                                      | Query param values must be mapped in `types`.      |
+        #   | `STRING`     | `String`                             |                                                    |
+        #   | `DATETIME`   | `DateTime`                           | `DATETIME` does not support time zone.             |
+        #   | `DATE`       | `Date`                               |                                                    |
+        #   | `TIMESTAMP`  | `Time`                               |                                                    |
+        #   | `TIME`       | `Google::Cloud::BigQuery::Time`      |                                                    |
+        #   | `BYTES`      | `File`, `IO`, `StringIO`, or similar |                                                    |
+        #   | `ARRAY`      | `Array`                              | Nested arrays, `nil` values are not supported.     |
+        #   | `STRUCT`     | `Hash`                               | Hash keys may be strings or symbols.               |
         #
         #   See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
         #   of each BigQuery data type, including allowed values.
-        # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always to
-        #   infer the right SQL type from a value in `params`. In these cases, `types` must be used to specify the SQL
-        #   type for these values.
+        # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
+        #   possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
+        #   specify the SQL type for these values.
         #
-        #   Must match the value type passed to `params`. This must be an `Array` when the query uses positional query
-        #   parameters. This must be an `Hash` when the query uses named query parameters. The values should be BigQuery
-        #   type codes from the following list:
+        #   Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
+        #   positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
+        #   should be BigQuery type codes from the following list:
         #
         #   * `:BOOL`
         #   * `:INT64`
         #   * `:FLOAT64`
         #   * `:NUMERIC`
+        #   * `:BIGNUMERIC`
         #   * `:STRING`
         #   * `:DATETIME`
         #   * `:DATE`
@@ -2327,6 +2412,21 @@ module Google
         # the need to complete a load operation before the data can appear in
         # query results.
         #
+        # Simple Ruby types are generally accepted per JSON rules, along with the following support for BigQuery's more
+        # complex types:
+        #
+        # | BigQuery     | Ruby                                 | Notes                                              |
+        # |--------------|--------------------------------------|----------------------------------------------------|
+        # | `NUMERIC`    | `BigDecimal`                         | `BigDecimal` values will be rounded to scale 9.    |
+        # | `BIGNUMERIC` | `String`                             | Pass as `String` to avoid rounding to scale 9.     |
+        # | `DATETIME`   | `DateTime`                           | `DATETIME` does not support time zone.             |
+        # | `DATE`       | `Date`                               |                                                    |
+        # | `TIMESTAMP`  | `Time`                               |                                                    |
+        # | `TIME`       | `Google::Cloud::BigQuery::Time`      |                                                    |
+        # | `BYTES`      | `File`, `IO`, `StringIO`, or similar |                                                    |
+        # | `ARRAY`      | `Array`                              | Nested arrays, `nil` values are not supported.     |
+        # | `STRUCT`     | `Hash`                               | Hash keys may be strings or symbols.               |
+        #
         # Because BigQuery's streaming API is designed for high insertion rates,
         # modifications to the underlying table metadata are eventually
         # consistent when interacting with the streaming system. In most cases
@@ -2341,7 +2441,10 @@ module Google
         #
         # @param [String] table_id The ID of the destination table.
         # @param [Hash, Array<Hash>] rows A hash object or array of hash objects
-        #   containing the data. Required.
+        #   containing the data. Required. `BigDecimal` values will be rounded to
+        #   scale 9 to conform with the BigQuery `NUMERIC` data type. To avoid
+        #   rounding `BIGNUMERIC` type values with scale greater than 9, use `String`
+        #   instead of `BigDecimal`.
         # @param [Array<String|Symbol>, Symbol] insert_ids A unique ID for each row. BigQuery uses this property to
         #   detect duplicate insertion requests on a best-effort basis. For more information, see [data
         #   consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency). Optional. If
@@ -2408,6 +2511,18 @@ module Google
         #     t.schema.integer "age", mode: :required
         #   end
         #
+        # @example Pass `BIGNUMERIC` value as a string to avoid rounding to scale 9 in the conversion from `BigDecimal`:
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #   dataset = bigquery.dataset "my_dataset"
+        #
+        #   row = {
+        #     "my_numeric" => BigDecimal("123456798.987654321"),
+        #     "my_bignumeric" => "123456798.98765432100001" # BigDecimal would be rounded, use String instead!
+        #   }
+        #   dataset.insert "my_table", row
+        #
         # @!group Data
         #
         def insert table_id, rows, insert_ids: nil, skip_invalid: nil, ignore_unknown: nil, autocreate: nil, &block
@@ -2500,11 +2615,9 @@ module Google
             create_table table_id do |tbl_updater|
               yield tbl_updater if block_given?
             end
-          # rubocop:disable Lint/HandleExceptions
           rescue Google::Cloud::AlreadyExistsError
+            # Do nothing if it already exists
           end
-          # rubocop:enable Lint/HandleExceptions
           sleep 60
           retry
         end
@@ -2547,7 +2660,7 @@ module Google
           return if attributes.empty?
           ensure_service!
           patch_args = Hash[attributes.map { |attr| [attr, @gapi.send(attr)] }]
-          patch_gapi = Google::Apis::BigqueryV2::Dataset.new patch_args
+          patch_gapi = Google::Apis::BigqueryV2::Dataset.new(**patch_args)
           patch_gapi.etag = etag if etag
           @gapi = service.patch_dataset dataset_id, patch_gapi
         end
@@ -2676,12 +2789,11 @@ module Google
         def load_local_or_uri file, updater
           job_gapi = updater.to_gapi
-          job = if local_file? file
-                  load_local file, job_gapi
-                else
-                  load_storage file, job_gapi
-                end
-          job
+          if local_file? file
+            load_local file, job_gapi
+          else
+            load_storage file, job_gapi
+          end
         end
         def storage_url? files
@@ -2721,6 +2833,7 @@ module Google
           ##
           # @private Create an Updater object.
           def initialize gapi
+            super()
             @updates = []
             @gapi = gapi
           end
@@ -2756,6 +2869,12 @@ module Google
             raise "not implemented in #{self.class}"
           end
+          ##
+          # @raise [RuntimeError] not implemented
+          def create_materialized_view(*)
+            raise "not implemented in #{self.class}"
+          end
           ##
           # @raise [RuntimeError] not implemented
           def table(*)

data/lib/google/cloud/bigquery/dataset/access.rb CHANGED Viewed

@@ -1194,7 +1194,7 @@ module Google
             @rules.reject!(&find_by_scope_and_value(scope, value))
             # Add new rule for this role, scope, and value
             opts = { role: role, scope => value }
-            @rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
+            @rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
           end
           # @private
@@ -1204,7 +1204,7 @@ module Google
             @rules.reject!(&find_by_scope_and_resource_ref(:routine, value))
             # Add new rule for this role, scope, and value
             opts = { routine: value }
-            @rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
+            @rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
           end
           # @private
@@ -1215,7 +1215,7 @@ module Google
             @rules.reject!(&find_by_scope_and_resource_ref(:view, value))
             # Add new rule for this role, scope, and value
             opts = { view: value }
-            @rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
+            @rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
           end
           # @private

data/lib/google/cloud/bigquery/dataset/list.rb CHANGED Viewed

@@ -120,12 +120,12 @@ module Google
           #     puts dataset.name
           #   end
           #
-          def all request_limit: nil
+          def all request_limit: nil, &block
             request_limit = request_limit.to_i if request_limit
             return enum_for :all, request_limit: request_limit unless block_given?
             results = self
             loop do
-              results.each { |r| yield r }
+              results.each(&block)
               if request_limit
                 request_limit -= 1
                 break if request_limit.negative?

data/lib/google/cloud/bigquery/external.rb CHANGED Viewed

@@ -52,6 +52,24 @@ module Google
       #   # Retrieve the next page of results
       #   data = data.next if data.next?
       #
+      # @example Hive partitioning options:
+      #   require "google/cloud/bigquery"
+      #
+      #   bigquery = Google::Cloud::Bigquery.new
+      #
+      #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+      #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+      #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+      #     ext.hive_partitioning_mode = :auto
+      #     ext.hive_partitioning_require_partition_filter = true
+      #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+      #   end
+      #
+      #   external_data.hive_partitioning? #=> true
+      #   external_data.hive_partitioning_mode #=> "AUTO"
+      #   external_data.hive_partitioning_require_partition_filter? #=> true
+      #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+      #
       module External
         ##
         # @private New External from URLs and format
@@ -79,7 +97,8 @@ module Google
         # @private Determine source_format from inputs
         def self.source_format_for urls, format
           val = {
-            "csv" => "CSV",          "avro" => "AVRO",
+            "csv"                    => "CSV",
+            "avro"                   => "AVRO",
             "json"                   => "NEWLINE_DELIMITED_JSON",
             "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
             "sheets"                 => "GOOGLE_SHEETS",
@@ -87,7 +106,9 @@ module Google
             "datastore"              => "DATASTORE_BACKUP",
             "backup"                 => "DATASTORE_BACKUP",
             "datastore_backup"       => "DATASTORE_BACKUP",
-            "bigtable"               => "BIGTABLE"
+            "bigtable"               => "BIGTABLE",
+            "orc"                    => "ORC",
+            "parquet"                => "PARQUET"
           }[format.to_s.downcase]
           return val unless val.nil?
           Array(urls).each do |url|
@@ -110,7 +131,7 @@ module Google
           when "GOOGLE_SHEETS"          then External::SheetsSource
           when "BIGTABLE"               then External::BigtableSource
           else
-            # AVRO and DATASTORE_BACKUP
+            # AVRO, DATASTORE_BACKUP, PARQUET
             External::DataSource
           end
         end
@@ -148,6 +169,24 @@ module Google
         #   # Retrieve the next page of results
         #   data = data.next if data.next?
         #
+        # @example Hive partitioning options:
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #
+        #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+        #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+        #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+        #     ext.hive_partitioning_mode = :auto
+        #     ext.hive_partitioning_require_partition_filter = true
+        #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+        #   end
+        #
+        #   external_data.hive_partitioning? #=> true
+        #   external_data.hive_partitioning_mode #=> "AUTO"
+        #   external_data.hive_partitioning_require_partition_filter? #=> true
+        #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+        #
         class DataSource
           ##
           # @private The Google API Client object.
@@ -302,6 +341,52 @@ module Google
             @gapi.source_format == "BIGTABLE"
           end
+          ##
+          # Whether the data format is "ORC".
+          #
+          # @return [Boolean]
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :orc do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #   external_data.format #=> "ORC"
+          #   external_data.orc? #=> true
+          #
+          def orc?
+            @gapi.source_format == "ORC"
+          end
+          ##
+          # Whether the data format is "PARQUET".
+          #
+          # @return [Boolean]
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #   external_data.format #=> "PARQUET"
+          #   external_data.parquet? #=> true
+          #
+          def parquet?
+            @gapi.source_format == "PARQUET"
+          end
           ##
           # The fully-qualified URIs that point to your data in Google Cloud.
           # For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
@@ -536,6 +621,246 @@ module Google
             @gapi.max_bad_records = new_max_bad_records
           end
+          ###
+          # Checks if hive partitioning options are set.
+          #
+          # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
+          # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
+          # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
+          # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
+          #
+          # @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_require_partition_filter = true
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #
+          #   external_data.hive_partitioning? #=> true
+          #   external_data.hive_partitioning_mode #=> "AUTO"
+          #   external_data.hive_partitioning_require_partition_filter? #=> true
+          #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+          #
+          def hive_partitioning?
+            !@gapi.hive_partitioning_options.nil?
+          end
+          ###
+          # The mode of hive partitioning to use when reading data. The following modes are supported:
+          #
+          #   1. `AUTO`: automatically infer partition key name(s) and type(s).
+          #   2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
+          #   3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
+          #
+          # @return [String, nil] The mode of hive partitioning, or `nil` if not set.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_require_partition_filter = true
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #
+          #   external_data.hive_partitioning? #=> true
+          #   external_data.hive_partitioning_mode #=> "AUTO"
+          #   external_data.hive_partitioning_require_partition_filter? #=> true
+          #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+          #
+          def hive_partitioning_mode
+            @gapi.hive_partitioning_options.mode if hive_partitioning?
+          end
+          ##
+          # Sets the mode of hive partitioning to use when reading data. The following modes are supported:
+          #
+          #   1. `auto`: automatically infer partition key name(s) and type(s).
+          #   2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
+          #   3. `custom`: partition key schema is encoded in the source URI prefix.
+          #
+          # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
+          # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
+          # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
+          # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
+          #
+          # See {#format}, {#hive_partitioning_require_partition_filter=} and {#hive_partitioning_source_uri_prefix=}.
+          #
+          # @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_require_partition_filter = true
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #
+          #   external_data.hive_partitioning? #=> true
+          #   external_data.hive_partitioning_mode #=> "AUTO"
+          #   external_data.hive_partitioning_require_partition_filter? #=> true
+          #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+          #
+          def hive_partitioning_mode= mode
+            @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
+            @gapi.hive_partitioning_options.mode = mode.to_s.upcase
+          end
+          ###
+          # Whether queries over the table using this external data source require a partition filter that can be used
+          # for partition elimination to be specified. Note that this field should only be true when creating a
+          # permanent external table or querying a temporary external table.
+          #
+          # @return [Boolean] `true` when queries over this table require a partition filter, or `false` otherwise.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_require_partition_filter = true
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #
+          #   external_data.hive_partitioning? #=> true
+          #   external_data.hive_partitioning_mode #=> "AUTO"
+          #   external_data.hive_partitioning_require_partition_filter? #=> true
+          #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+          #
+          def hive_partitioning_require_partition_filter?
+            return false unless hive_partitioning?
+            !@gapi.hive_partitioning_options.require_partition_filter.nil?
+          end
+          ##
+          # Sets whether queries over the table using this external data source require a partition filter
+          # that can be used for partition elimination to be specified.
+          #
+          # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_source_uri_prefix=}.
+          #
+          # @param [Boolean] require_partition_filter `true` if a partition filter must be specified, `false` otherwise.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_require_partition_filter = true
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #
+          #   external_data.hive_partitioning? #=> true
+          #   external_data.hive_partitioning_mode #=> "AUTO"
+          #   external_data.hive_partitioning_require_partition_filter? #=> true
+          #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+          #
+          def hive_partitioning_require_partition_filter= require_partition_filter
+            @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
+            @gapi.hive_partitioning_options.require_partition_filter = require_partition_filter
+          end
+          ###
+          # The common prefix for all source uris when hive partition detection is requested. The prefix must end
+          # immediately before the partition key encoding begins. For example, consider files following this data
+          # layout:
+          #
+          # ```
+          # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
+          # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
+          # ```
+          #
+          # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
+          # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
+          #
+          # @return [String, nil] The common prefix for all source uris, or `nil` if not set.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_require_partition_filter = true
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #
+          #   external_data.hive_partitioning? #=> true
+          #   external_data.hive_partitioning_mode #=> "AUTO"
+          #   external_data.hive_partitioning_require_partition_filter? #=> true
+          #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+          #
+          def hive_partitioning_source_uri_prefix
+            @gapi.hive_partitioning_options.source_uri_prefix if hive_partitioning?
+          end
+          ##
+          # Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
+          # immediately before the partition key encoding begins. For example, consider files following this data
+          # layout:
+          #
+          # ```
+          # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
+          # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
+          # ```
+          #
+          # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
+          # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
+          #
+          # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_require_partition_filter=}.
+          #
+          # @param [String] source_uri_prefix The common prefix for all source uris.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_require_partition_filter = true
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #
+          #   external_data.hive_partitioning? #=> true
+          #   external_data.hive_partitioning_mode #=> "AUTO"
+          #   external_data.hive_partitioning_require_partition_filter? #=> true
+          #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+          #
+          def hive_partitioning_source_uri_prefix= source_uri_prefix
+            @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
+            @gapi.hive_partitioning_options.source_uri_prefix = source_uri_prefix
+          end
           ##
           # @private Google API Client object.
           def to_gapi