RubyGems - google-cloud-bigquery - Versions diffs - 1.21.1 → 1.27.0 - Mend

google-cloud-bigquery 1.21.1 → 1.27.0

Files changed (24) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +72 -0
data/CONTRIBUTING.md +1 -1
data/lib/google-cloud-bigquery.rb +9 -2
data/lib/google/cloud/bigquery.rb +1 -1
data/lib/google/cloud/bigquery/convert.rb +3 -1
data/lib/google/cloud/bigquery/copy_job.rb +15 -6
data/lib/google/cloud/bigquery/data.rb +12 -0
data/lib/google/cloud/bigquery/dataset.rb +61 -20
data/lib/google/cloud/bigquery/dataset/access.rb +293 -16
data/lib/google/cloud/bigquery/external.rb +352 -3
data/lib/google/cloud/bigquery/extract_job.rb +154 -50
data/lib/google/cloud/bigquery/job.rb +35 -1
data/lib/google/cloud/bigquery/load_job.rb +197 -34
data/lib/google/cloud/bigquery/model.rb +164 -8
data/lib/google/cloud/bigquery/policy.rb +431 -0
data/lib/google/cloud/bigquery/project.rb +164 -68
data/lib/google/cloud/bigquery/query_job.rb +27 -12
data/lib/google/cloud/bigquery/routine.rb +127 -5
data/lib/google/cloud/bigquery/service.rb +50 -11
data/lib/google/cloud/bigquery/table.rb +181 -42
data/lib/google/cloud/bigquery/time.rb +6 -0
data/lib/google/cloud/bigquery/version.rb +1 -1
metadata +7 -6

data/lib/google/cloud/bigquery/extract_job.rb CHANGED Viewed

@@ -20,15 +20,17 @@ module Google
       # # ExtractJob
       #
       # A {Job} subclass representing an export operation that may be performed
-      # on a {Table}. A ExtractJob instance is created when you call
-      # {Table#extract_job}.
+      # on a {Table} or {Model}. A ExtractJob instance is returned when you call
+      # {Project#extract_job}, {Table#extract_job} or {Model#extract_job}.
       #
       # @see https://cloud.google.com/bigquery/docs/exporting-data
-      #   Exporting Data From BigQuery
+      #   Exporting table data
+      # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
+      #   Exporting models
       # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
       #   reference
       #
-      # @example
+      # @example Export table data
       #   require "google/cloud/bigquery"
       #
       #   bigquery = Google::Cloud::Bigquery.new
@@ -40,6 +42,18 @@ module Google
       #   extract_job.wait_until_done!
       #   extract_job.done? #=> true
       #
+      # @example Export a model
+      #   require "google/cloud/bigquery"
+      #
+      #   bigquery = Google::Cloud::Bigquery.new
+      #   dataset = bigquery.dataset "my_dataset"
+      #   model = dataset.model "my_model"
+      #
+      #   extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
+      #
+      #   extract_job.wait_until_done!
+      #   extract_job.done? #=> true
+      #
       class ExtractJob < Job
         ##
         # The URI or URIs representing the Google Cloud Storage files to which
@@ -49,71 +63,126 @@ module Google
         end
         ##
-        # The table from which the data is exported. This is the table upon
-        # which {Table#extract_job} was called.
+        # The table or model which is exported.
         #
-        # @return [Table] A table instance.
+        # @return [Table, Model, nil] A table or model instance, or `nil`.
         #
         def source
-          table = @gapi.configuration.extract.source_table
-          return nil unless table
-          retrieve_table table.project_id, table.dataset_id, table.table_id
+          if (table = @gapi.configuration.extract.source_table)
+            retrieve_table table.project_id, table.dataset_id, table.table_id
+          elsif (model = @gapi.configuration.extract.source_model)
+            retrieve_model model.project_id, model.dataset_id, model.model_id
+          end
         end
         ##
-        # Checks if the export operation compresses the data using gzip. The
-        # default is `false`.
+        # Whether the source of the export job is a table. See {#source}.
         #
-        # @return [Boolean] `true` when `GZIP`, `false` otherwise.
+        # @return [Boolean] `true` when the source is a table, `false`
+        #   otherwise.
         #
-        def compression?
-          val = @gapi.configuration.extract.compression
-          val == "GZIP"
+        def table?
+          !@gapi.configuration.extract.source_table.nil?
         end
         ##
-        # Checks if the destination format for the data is [newline-delimited
-        # JSON](http://jsonlines.org/). The default is `false`.
+        # Whether the source of the export job is a model. See {#source}.
         #
-        # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
+        # @return [Boolean] `true` when the source is a model, `false`
         #   otherwise.
         #
+        def model?
+          !@gapi.configuration.extract.source_model.nil?
+        end
+        ##
+        # Checks if the export operation compresses the data using gzip. The
+        # default is `false`. Not applicable when extracting models.
+        #
+        # @return [Boolean] `true` when `GZIP`, `false` if not `GZIP` or not a
+        #   table extraction.
+        def compression?
+          return false unless table?
+          @gapi.configuration.extract.compression == "GZIP"
+        end
+        ##
+        # Checks if the destination format for the table data is [newline-delimited
+        # JSON](http://jsonlines.org/). The default is `false`. Not applicable when
+        # extracting models.
+        #
+        # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
+        #   `NEWLINE_DELIMITED_JSON` or not a table extraction.
+        #
         def json?
-          val = @gapi.configuration.extract.destination_format
-          val == "NEWLINE_DELIMITED_JSON"
+          return false unless table?
+          @gapi.configuration.extract.destination_format == "NEWLINE_DELIMITED_JSON"
         end
         ##
-        # Checks if the destination format for the data is CSV. Tables with
+        # Checks if the destination format for the table data is CSV. Tables with
         # nested or repeated fields cannot be exported as CSV. The default is
-        # `true`.
+        # `true` for tables. Not applicable when extracting models.
         #
-        # @return [Boolean] `true` when `CSV`, `false` otherwise.
+        # @return [Boolean] `true` when `CSV`, or `false` if not `CSV` or not a
+        #   table extraction.
         #
         def csv?
+          return false unless table?
           val = @gapi.configuration.extract.destination_format
           return true if val.nil?
           val == "CSV"
         end
         ##
-        # Checks if the destination format for the data is
-        # [Avro](http://avro.apache.org/). The default is `false`.
+        # Checks if the destination format for the table data is
+        # [Avro](http://avro.apache.org/). The default is `false`. Not applicable
+        # when extracting models.
         #
-        # @return [Boolean] `true` when `AVRO`, `false` otherwise.
+        # @return [Boolean] `true` when `AVRO`, `false` if not `AVRO` or not a
+        #   table extraction.
         #
         def avro?
+          return false unless table?
+          @gapi.configuration.extract.destination_format == "AVRO"
+        end
+        ##
+        # Checks if the destination format for the model is TensorFlow SavedModel.
+        # The default is `true` for models. Not applicable when extracting tables.
+        #
+        # @return [Boolean] `true` when `ML_TF_SAVED_MODEL`, `false` if not
+        #   `ML_TF_SAVED_MODEL` or not a model extraction.
+        #
+        def ml_tf_saved_model?
+          return false unless model?
           val = @gapi.configuration.extract.destination_format
-          val == "AVRO"
+          return true if val.nil?
+          val == "ML_TF_SAVED_MODEL"
+        end
+        ##
+        # Checks if the destination format for the model is XGBoost. The default
+        # is `false`. Not applicable when extracting tables.
+        #
+        # @return [Boolean] `true` when `ML_XGBOOST_BOOSTER`, `false` if not
+        #   `ML_XGBOOST_BOOSTER` or not a model extraction.
+        #
+        def ml_xgboost_booster?
+          return false unless model?
+          @gapi.configuration.extract.destination_format == "ML_XGBOOST_BOOSTER"
         end
         ##
         # The character or symbol the operation uses to delimit fields in the
-        # exported data. The default is a comma (,).
+        # exported data. The default is a comma (,) for tables. Not applicable
+        # when extracting models.
         #
-        # @return [String] A string containing the character, such as `","`.
+        # @return [String, nil] A string containing the character, such as `","`,
+        #   `nil` if not a table extraction.
         #
         def delimiter
+          return unless table?
           val = @gapi.configuration.extract.field_delimiter
           val = "," if val.nil?
           val
@@ -121,12 +190,13 @@ module Google
         ##
         # Checks if the exported data contains a header row. The default is
-        # `true`.
+        # `true` for tables. Not applicable when extracting models.
         #
         # @return [Boolean] `true` when the print header configuration is
-        #   present or `nil`, `false` otherwise.
+        #   present or `nil`, `false` if disabled or not a table extraction.
         #
         def print_header?
+          return false unless table?
           val = @gapi.configuration.extract.print_header
           val = true if val.nil?
           val
@@ -159,12 +229,14 @@ module Google
         # whether to enable extracting applicable column types (such as
         # `TIMESTAMP`) to their corresponding AVRO logical types
         # (`timestamp-micros`), instead of only using their raw types
-        # (`avro-long`).
+        # (`avro-long`). Not applicable when extracting models.
         #
         # @return [Boolean] `true` when applicable column types will use their
-        #   corresponding AVRO logical types, `false` otherwise.
+        #   corresponding AVRO logical types, `false` if not enabled or not a
+        #   table extraction.
         #
         def use_avro_logical_types?
+          return false unless table?
           @gapi.configuration.extract.use_avro_logical_types
         end
@@ -182,19 +254,24 @@ module Google
           #
           # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
           #   configuration object for setting query options.
-          def self.from_options service, table, storage_files, options
+          def self.from_options service, source, storage_files, options
             job_ref = service.job_ref_from options[:job_id], options[:prefix]
             storage_urls = Array(storage_files).map do |url|
               url.respond_to?(:to_gs_url) ? url.to_gs_url : url
             end
             options[:format] ||= Convert.derive_source_format storage_urls.first
+            extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
+              destination_uris: Array(storage_urls)
+            )
+            if source.is_a? Google::Apis::BigqueryV2::TableReference
+              extract_config.source_table = source
+            elsif source.is_a? Google::Apis::BigqueryV2::ModelReference
+              extract_config.source_model = source
+            end
             job = Google::Apis::BigqueryV2::Job.new(
               job_reference: job_ref,
               configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
-                extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
-                  destination_uris: Array(storage_urls),
-                  source_table:     table
-                ),
+                extract: extract_config,
                 dry_run: options[:dryrun]
               )
             )
@@ -253,7 +330,7 @@ module Google
           end
           ##
-          # Sets the compression type.
+          # Sets the compression type. Not applicable when extracting models.
           #
           # @param [String] value The compression type to use for exported
           #   files. Possible values include `GZIP` and `NONE`. The default
@@ -265,7 +342,7 @@ module Google
           end
           ##
-          # Sets the field delimiter.
+          # Sets the field delimiter. Not applicable when extracting models.
           #
           # @param [String] value Delimiter to use between fields in the
           #   exported data. Default is <code>,</code>.
@@ -276,14 +353,21 @@ module Google
           end
           ##
-          # Sets the destination file format. The default value is `csv`.
+          # Sets the destination file format. The default value for
+          # tables is `csv`. Tables with nested or repeated fields cannot be
+          # exported as CSV. The default value for models is `ml_tf_saved_model`.
           #
-          # The following values are supported:
+          # Supported values for tables:
           #
           # * `csv` - CSV
           # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
           # * `avro` - [Avro](http://avro.apache.org/)
           #
+          # Supported values for models:
+          #
+          # * `ml_tf_saved_model` - TensorFlow SavedModel
+          # * `ml_xgboost_booster` - XGBoost Booster
+          #
           # @param [String] new_format The new source format.
           #
           # @!group Attributes
@@ -293,7 +377,8 @@ module Google
           end
           ##
-          # Print a header row in the exported file.
+          # Print a header row in the exported file. Not applicable when
+          # extracting models.
           #
           # @param [Boolean] value Whether to print out a header row in the
           #   results. Default is `true`.
@@ -307,12 +392,21 @@ module Google
           # Sets the labels to use for the job.
           #
           # @param [Hash] value A hash of user-provided labels associated with
-          #   the job. You can use these to organize and group your jobs. Label
-          #   keys and values can be no longer than 63 characters, can only
-          #   contain lowercase letters, numeric characters, underscores and
-          #   dashes. International characters are allowed. Label values are
-          #   optional. Label keys must start with a letter and each label in
-          #   the list must have a different key.
+          #   the job. You can use these to organize and group your jobs.
+          #
+          #   The labels applied to a resource must meet the following requirements:
+          #
+          #   * Each resource can have multiple labels, up to a maximum of 64.
+          #   * Each label must be a key-value pair.
+          #   * Keys have a minimum length of 1 character and a maximum length of
+          #     63 characters, and cannot be empty. Values can be empty, and have
+          #     a maximum length of 63 characters.
+          #   * Keys and values can contain only lowercase letters, numeric characters,
+          #     underscores, and dashes. All characters must use UTF-8 encoding, and
+          #     international characters are allowed.
+          #   * The key portion of a label must be unique. However, you can use the
+          #     same key with multiple resources.
+          #   * Keys must start with a lowercase letter or international character.
           #
           # @!group Attributes
           #
@@ -362,6 +456,16 @@ module Google
             @gapi
           end
         end
+        protected
+        def retrieve_model project_id, dataset_id, model_id
+          ensure_service!
+          gapi = service.get_project_model project_id, dataset_id, model_id
+          Model.from_gapi_json gapi, service
+        rescue Google::Cloud::NotFoundError
+          nil
+        end
       end
     end
   end

data/lib/google/cloud/bigquery/job.rb CHANGED Viewed

@@ -215,6 +215,17 @@ module Google
           @gapi.statistics.parent_job_id
         end
+        ##
+        # An array containing the job resource usage breakdown by reservation, if present. Reservation usage statistics
+        # are only reported for jobs that are executed within reservations.  On-demand jobs do not report this data.
+        #
+        # @return [Array<Google::Cloud::Bigquery::Job::ReservationUsage>, nil] The reservation usage, if present.
+        #
+        def reservation_usage
+          return nil unless @gapi.statistics.reservation_usage
+          Array(@gapi.statistics.reservation_usage).map { |g| ReservationUsage.from_gapi g }
+        end
         ##
         # The statistics including stack frames for a child job of a script.
         #
@@ -489,6 +500,29 @@ module Google
           end
         end
+        ##
+        # Represents Job resource usage breakdown by reservation.
+        #
+        # @attr_reader [String] name The reservation name or "unreserved" for on-demand resources usage.
+        # @attr_reader [Fixnum] slot_ms The slot-milliseconds the job spent in the given reservation.
+        #
+        class ReservationUsage
+          attr_reader :name, :slot_ms
+          ##
+          # @private Creates a new ReservationUsage instance.
+          def initialize name, slot_ms
+            @name = name
+            @slot_ms = slot_ms
+          end
+          ##
+          # @private New ReservationUsage from a statistics.reservation_usage value.
+          def self.from_gapi gapi
+            new gapi.name, gapi.slot_ms
+          end
+        end
         ##
         # Represents statistics for a child job of a script.
         #
@@ -547,7 +581,7 @@ module Google
           end
           ##
-          # @private New ScriptStatistics from a statistics.script_statistics object.
+          # @private New ScriptStatistics from a statistics.script_statistics value.
           def self.from_gapi gapi
             frames = Array(gapi.stack_frames).map { |g| ScriptStackFrame.from_gapi g }
             new gapi.evaluation_kind, frames

data/lib/google/cloud/bigquery/load_job.rb CHANGED Viewed

@@ -37,8 +37,8 @@ module Google
       #   bigquery = Google::Cloud::Bigquery.new
       #   dataset = bigquery.dataset "my_dataset"
       #
-      #   gs_url = "gs://my-bucket/file-name.csv"
-      #   load_job = dataset.load_job "my_new_table", gs_url do |schema|
+      #   gcs_uri = "gs://my-bucket/file-name.csv"
+      #   load_job = dataset.load_job "my_new_table", gcs_uri do |schema|
       #     schema.string "first_name", mode: :required
       #     schema.record "cities_lived", mode: :repeated do |nested_schema|
       #       nested_schema.string "place", mode: :required
@@ -112,8 +112,7 @@ module Google
         #   `false` otherwise.
         #
         def iso8859_1?
-          val = @gapi.configuration.load.encoding
-          val == "ISO-8859-1"
+          @gapi.configuration.load.encoding == "ISO-8859-1"
         end
         ##
@@ -195,8 +194,7 @@ module Google
         #   `NEWLINE_DELIMITED_JSON`, `false` otherwise.
         #
         def json?
-          val = @gapi.configuration.load.source_format
-          val == "NEWLINE_DELIMITED_JSON"
+          @gapi.configuration.load.source_format == "NEWLINE_DELIMITED_JSON"
         end
         ##
@@ -218,8 +216,27 @@ module Google
         #   `false` otherwise.
         #
         def backup?
-          val = @gapi.configuration.load.source_format
-          val == "DATASTORE_BACKUP"
+          @gapi.configuration.load.source_format == "DATASTORE_BACKUP"
+        end
+        ##
+        # Checks if the source format is ORC.
+        #
+        # @return [Boolean] `true` when the source format is `ORC`,
+        #   `false` otherwise.
+        #
+        def orc?
+          @gapi.configuration.load.source_format == "ORC"
+        end
+        ##
+        # Checks if the source format is Parquet.
+        #
+        # @return [Boolean] `true` when the source format is `PARQUET`,
+        #   `false` otherwise.
+        #
+        def parquet?
+          @gapi.configuration.load.source_format == "PARQUET"
         end
         ##
@@ -347,6 +364,58 @@ module Google
           nil
         end
+        ###
+        # Checks if hive partitioning options are set.
+        #
+        # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
+        #
+        # @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
+        #
+        # @!group Attributes
+        #
+        def hive_partitioning?
+          !@gapi.configuration.load.hive_partitioning_options.nil?
+        end
+        ###
+        # The mode of hive partitioning to use when reading data. The following modes are supported:
+        #
+        #   1. `AUTO`: automatically infer partition key name(s) and type(s).
+        #   2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
+        #   3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
+        #
+        # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
+        #
+        # @return [String, nil] The mode of hive partitioning, or `nil` if not set.
+        #
+        # @!group Attributes
+        #
+        def hive_partitioning_mode
+          @gapi.configuration.load.hive_partitioning_options.mode if hive_partitioning?
+        end
+        ###
+        # The common prefix for all source uris when hive partition detection is requested. The prefix must end
+        # immediately before the partition key encoding begins. For example, consider files following this data layout:
+        #
+        # ```
+        # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
+        # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
+        # ```
+        #
+        # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
+        # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
+        #
+        # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
+        #
+        # @return [String, nil] The common prefix for all source uris, or `nil` if not set.
+        #
+        # @!group Attributes
+        #
+        def hive_partitioning_source_uri_prefix
+          @gapi.configuration.load.hive_partitioning_options.source_uri_prefix if hive_partitioning?
+        end
         ###
         # Checks if the destination table will be range partitioned. See [Creating and using integer range partitioned
         # tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
@@ -428,8 +497,9 @@ module Google
         # The period for which the destination table will be time partitioned, if
         # any. See [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
         #
-        # @return [String, nil] The time partition type. Currently the only supported
-        #   value is "DAY", or `nil` if not present.
+        # @return [String, nil] The time partition type. The supported types are `DAY`,
+        #   `HOUR`, `MONTH`, and `YEAR`, which will generate one partition per day,
+        #   hour, month, and year, respectively; or `nil` if not present.
         #
         # @!group Attributes
         #
@@ -1303,12 +1373,21 @@ module Google
           # Sets the labels to use for the load job.
           #
           # @param [Hash] val A hash of user-provided labels associated with
-          #   the job. You can use these to organize and group your jobs. Label
-          #   keys and values can be no longer than 63 characters, can only
-          #   contain lowercase letters, numeric characters, underscores and
-          #   dashes. International characters are allowed. Label values are
-          #   optional. Label keys must start with a letter and each label in
-          #   the list must have a different key.
+          #   the job. You can use these to organize and group your jobs.
+          #
+          #   The labels applied to a resource must meet the following requirements:
+          #
+          #   * Each resource can have multiple labels, up to a maximum of 64.
+          #   * Each label must be a key-value pair.
+          #   * Keys have a minimum length of 1 character and a maximum length of
+          #     63 characters, and cannot be empty. Values can be empty, and have
+          #     a maximum length of 63 characters.
+          #   * Keys and values can contain only lowercase letters, numeric characters,
+          #     underscores, and dashes. All characters must use UTF-8 encoding, and
+          #     international characters are allowed.
+          #   * The key portion of a label must be unique. However, you can use the
+          #     same key with multiple resources.
+          #   * Keys must start with a lowercase letter or international character.
           #
           # @!group Attributes
           #
@@ -1316,6 +1395,89 @@ module Google
             @gapi.configuration.update! labels: val
           end
+          ##
+          # Sets the mode of hive partitioning to use when reading data. The following modes are supported:
+          #
+          #   1. `auto`: automatically infer partition key name(s) and type(s).
+          #   2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
+          #   3. `custom`: partition key schema is encoded in the source URI prefix.
+          #
+          # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
+          # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
+          #
+          # See {#format=} and {#hive_partitioning_source_uri_prefix=}.
+          #
+          # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
+          #
+          # @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #   dataset = bigquery.dataset "my_dataset"
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   load_job = dataset.load_job "my_new_table", gcs_uri do |job|
+          #     job.format = :parquet
+          #     job.hive_partitioning_mode = :auto
+          #     job.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #
+          #   load_job.wait_until_done!
+          #   load_job.done? #=> true
+          #
+          # @!group Attributes
+          #
+          def hive_partitioning_mode= mode
+            @gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
+            @gapi.configuration.load.hive_partitioning_options.mode = mode.to_s.upcase
+          end
+          ##
+          # Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
+          # immediately before the partition key encoding begins. For example, consider files following this data
+          # layout:
+          #
+          # ```
+          # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
+          # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
+          # ```
+          #
+          # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
+          # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
+          #
+          # See {#hive_partitioning_mode=}.
+          #
+          # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
+          #
+          # @param [String] source_uri_prefix The common prefix for all source uris.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #   dataset = bigquery.dataset "my_dataset"
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   load_job = dataset.load_job "my_new_table", gcs_uri do |job|
+          #     job.format = :parquet
+          #     job.hive_partitioning_mode = :auto
+          #     job.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #
+          #   load_job.wait_until_done!
+          #   load_job.done? #=> true
+          #
+          # @!group Attributes
+          #
+          def hive_partitioning_source_uri_prefix= source_uri_prefix
+            @gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
+            @gapi.configuration.load.hive_partitioning_options.source_uri_prefix = source_uri_prefix
+          end
           ##
           # Sets the field on which to range partition the table. See [Creating and using integer range partitioned
           # tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
@@ -1335,8 +1497,8 @@ module Google
           #   bigquery = Google::Cloud::Bigquery.new
           #   dataset = bigquery.dataset "my_dataset"
           #
-          #   gs_url = "gs://my-bucket/file-name.csv"
-          #   load_job = dataset.load_job "my_new_table", gs_url do |job|
+          #   gcs_uri = "gs://my-bucket/file-name.csv"
+          #   load_job = dataset.load_job "my_new_table", gcs_uri do |job|
           #     job.schema do |schema|
           #       schema.integer "my_table_id", mode: :required
           #       schema.string "my_table_data", mode: :required
@@ -1376,8 +1538,8 @@ module Google
           #   bigquery = Google::Cloud::Bigquery.new
           #   dataset = bigquery.dataset "my_dataset"
           #
-          #   gs_url = "gs://my-bucket/file-name.csv"
-          #   load_job = dataset.load_job "my_new_table", gs_url do |job|
+          #   gcs_uri = "gs://my-bucket/file-name.csv"
+          #   load_job = dataset.load_job "my_new_table", gcs_uri do |job|
           #     job.schema do |schema|
           #       schema.integer "my_table_id", mode: :required
           #       schema.string "my_table_data", mode: :required
@@ -1417,8 +1579,8 @@ module Google
           #   bigquery = Google::Cloud::Bigquery.new
           #   dataset = bigquery.dataset "my_dataset"
           #
-          #   gs_url = "gs://my-bucket/file-name.csv"
-          #   load_job = dataset.load_job "my_new_table", gs_url do |job|
+          #   gcs_uri = "gs://my-bucket/file-name.csv"
+          #   load_job = dataset.load_job "my_new_table", gcs_uri do |job|
           #     job.schema do |schema|
           #       schema.integer "my_table_id", mode: :required
           #       schema.string "my_table_data", mode: :required
@@ -1458,8 +1620,8 @@ module Google
           #   bigquery = Google::Cloud::Bigquery.new
           #   dataset = bigquery.dataset "my_dataset"
           #
-          #   gs_url = "gs://my-bucket/file-name.csv"
-          #   load_job = dataset.load_job "my_new_table", gs_url do |job|
+          #   gcs_uri = "gs://my-bucket/file-name.csv"
+          #   load_job = dataset.load_job "my_new_table", gcs_uri do |job|
           #     job.schema do |schema|
           #       schema.integer "my_table_id", mode: :required
           #       schema.string "my_table_data", mode: :required
@@ -1490,8 +1652,9 @@ module Google
           # BigQuery does not allow you to change partitioning on an existing
           # table.
           #
-          # @param [String] type The time partition type. Currently the only
-          #   supported value is "DAY".
+          # @param [String] type The time partition type. The supported types are `DAY`,
+          #   `HOUR`, `MONTH`, and `YEAR`, which will generate one partition per day,
+          #   hour, month, and year, respectively.
           #
           # @example
           #   require "google/cloud/bigquery"
@@ -1499,8 +1662,8 @@ module Google
           #   bigquery = Google::Cloud::Bigquery.new
           #   dataset = bigquery.dataset "my_dataset"
           #
-          #   gs_url = "gs://my-bucket/file-name.csv"
-          #   load_job = dataset.load_job "my_new_table", gs_url do |job|
+          #   gcs_uri = "gs://my-bucket/file-name.csv"
+          #   load_job = dataset.load_job "my_new_table", gcs_uri do |job|
           #     job.time_partitioning_type = "DAY"
           #   end
           #
@@ -1538,8 +1701,8 @@ module Google
           #   bigquery = Google::Cloud::Bigquery.new
           #   dataset = bigquery.dataset "my_dataset"
           #
-          #   gs_url = "gs://my-bucket/file-name.csv"
-          #   load_job = dataset.load_job "my_new_table", gs_url do |job|
+          #   gcs_uri = "gs://my-bucket/file-name.csv"
+          #   load_job = dataset.load_job "my_new_table", gcs_uri do |job|
           #     job.time_partitioning_type  = "DAY"
           #     job.time_partitioning_field = "dob"
           #     job.schema do |schema|
@@ -1574,8 +1737,8 @@ module Google
           #   bigquery = Google::Cloud::Bigquery.new
           #   dataset = bigquery.dataset "my_dataset"
           #
-          #   gs_url = "gs://my-bucket/file-name.csv"
-          #   load_job = dataset.load_job "my_new_table", gs_url do |job|
+          #   gcs_uri = "gs://my-bucket/file-name.csv"
+          #   load_job = dataset.load_job "my_new_table", gcs_uri do |job|
           #     job.time_partitioning_type = "DAY"
           #     job.time_partitioning_expiration = 86_400
           #   end
@@ -1634,8 +1797,8 @@ module Google
           #   bigquery = Google::Cloud::Bigquery.new
           #   dataset = bigquery.dataset "my_dataset"
           #
-          #   gs_url = "gs://my-bucket/file-name.csv"
-          #   load_job = dataset.load_job "my_new_table", gs_url do |job|
+          #   gcs_uri = "gs://my-bucket/file-name.csv"
+          #   load_job = dataset.load_job "my_new_table", gcs_uri do |job|
           #     job.time_partitioning_type  = "DAY"
           #     job.time_partitioning_field = "dob"
           #     job.schema do |schema|