RubyGems - google-cloud-bigquery - Versions diffs - 1.24.0 → 1.29.0 - Mend

google-cloud-bigquery 1.24.0 → 1.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +52 -0
data/CONTRIBUTING.md +2 -2
data/LOGGING.md +1 -1
data/lib/google/cloud/bigquery/convert.rb +0 -4
data/lib/google/cloud/bigquery/copy_job.rb +1 -0
data/lib/google/cloud/bigquery/data.rb +2 -2
data/lib/google/cloud/bigquery/dataset.rb +106 -21
data/lib/google/cloud/bigquery/dataset/access.rb +112 -14
data/lib/google/cloud/bigquery/dataset/list.rb +2 -2
data/lib/google/cloud/bigquery/external.rb +328 -3
data/lib/google/cloud/bigquery/extract_job.rb +8 -10
data/lib/google/cloud/bigquery/job.rb +43 -3
data/lib/google/cloud/bigquery/job/list.rb +4 -4
data/lib/google/cloud/bigquery/load_job.rb +177 -24
data/lib/google/cloud/bigquery/model/list.rb +2 -2
data/lib/google/cloud/bigquery/policy.rb +432 -0
data/lib/google/cloud/bigquery/project.rb +3 -3
data/lib/google/cloud/bigquery/project/list.rb +2 -2
data/lib/google/cloud/bigquery/query_job.rb +25 -14
data/lib/google/cloud/bigquery/routine.rb +128 -9
data/lib/google/cloud/bigquery/routine/list.rb +2 -2
data/lib/google/cloud/bigquery/service.rb +44 -13
data/lib/google/cloud/bigquery/standard_sql.rb +4 -3
data/lib/google/cloud/bigquery/table.rb +261 -45
data/lib/google/cloud/bigquery/table/async_inserter.rb +24 -15
data/lib/google/cloud/bigquery/table/list.rb +2 -2
data/lib/google/cloud/bigquery/version.rb +1 -1
metadata +16 -15

data/lib/google/cloud/bigquery/dataset/list.rb CHANGED Viewed

@@ -120,12 +120,12 @@ module Google
           #     puts dataset.name
           #   end
           #
-          def all request_limit: nil
+          def all request_limit: nil, &block
             request_limit = request_limit.to_i if request_limit
             return enum_for :all, request_limit: request_limit unless block_given?
             results = self
             loop do
-              results.each { |r| yield r }
+              results.each(&block)
               if request_limit
                 request_limit -= 1
                 break if request_limit.negative?

data/lib/google/cloud/bigquery/external.rb CHANGED Viewed

@@ -52,6 +52,24 @@ module Google
       #   # Retrieve the next page of results
       #   data = data.next if data.next?
       #
+      # @example Hive partitioning options:
+      #   require "google/cloud/bigquery"
+      #
+      #   bigquery = Google::Cloud::Bigquery.new
+      #
+      #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+      #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+      #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+      #     ext.hive_partitioning_mode = :auto
+      #     ext.hive_partitioning_require_partition_filter = true
+      #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+      #   end
+      #
+      #   external_data.hive_partitioning? #=> true
+      #   external_data.hive_partitioning_mode #=> "AUTO"
+      #   external_data.hive_partitioning_require_partition_filter? #=> true
+      #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+      #
       module External
         ##
         # @private New External from URLs and format
@@ -79,7 +97,8 @@ module Google
         # @private Determine source_format from inputs
         def self.source_format_for urls, format
           val = {
-            "csv" => "CSV",          "avro" => "AVRO",
+            "csv"                    => "CSV",
+            "avro"                   => "AVRO",
             "json"                   => "NEWLINE_DELIMITED_JSON",
             "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
             "sheets"                 => "GOOGLE_SHEETS",
@@ -87,7 +106,9 @@ module Google
             "datastore"              => "DATASTORE_BACKUP",
             "backup"                 => "DATASTORE_BACKUP",
             "datastore_backup"       => "DATASTORE_BACKUP",
-            "bigtable"               => "BIGTABLE"
+            "bigtable"               => "BIGTABLE",
+            "orc"                    => "ORC",
+            "parquet"                => "PARQUET"
           }[format.to_s.downcase]
           return val unless val.nil?
           Array(urls).each do |url|
@@ -110,7 +131,7 @@ module Google
           when "GOOGLE_SHEETS"          then External::SheetsSource
           when "BIGTABLE"               then External::BigtableSource
           else
-            # AVRO and DATASTORE_BACKUP
+            # AVRO, DATASTORE_BACKUP, PARQUET
             External::DataSource
           end
         end
@@ -148,6 +169,24 @@ module Google
         #   # Retrieve the next page of results
         #   data = data.next if data.next?
         #
+        # @example Hive partitioning options:
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #
+        #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+        #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+        #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+        #     ext.hive_partitioning_mode = :auto
+        #     ext.hive_partitioning_require_partition_filter = true
+        #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+        #   end
+        #
+        #   external_data.hive_partitioning? #=> true
+        #   external_data.hive_partitioning_mode #=> "AUTO"
+        #   external_data.hive_partitioning_require_partition_filter? #=> true
+        #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+        #
         class DataSource
           ##
           # @private The Google API Client object.
@@ -302,6 +341,52 @@ module Google
             @gapi.source_format == "BIGTABLE"
           end
+          ##
+          # Whether the data format is "ORC".
+          #
+          # @return [Boolean]
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :orc do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #   external_data.format #=> "ORC"
+          #   external_data.orc? #=> true
+          #
+          def orc?
+            @gapi.source_format == "ORC"
+          end
+          ##
+          # Whether the data format is "PARQUET".
+          #
+          # @return [Boolean]
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #   external_data.format #=> "PARQUET"
+          #   external_data.parquet? #=> true
+          #
+          def parquet?
+            @gapi.source_format == "PARQUET"
+          end
           ##
           # The fully-qualified URIs that point to your data in Google Cloud.
           # For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
@@ -536,6 +621,246 @@ module Google
             @gapi.max_bad_records = new_max_bad_records
           end
+          ###
+          # Checks if hive partitioning options are set.
+          #
+          # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
+          # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
+          # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
+          # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
+          #
+          # @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_require_partition_filter = true
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #
+          #   external_data.hive_partitioning? #=> true
+          #   external_data.hive_partitioning_mode #=> "AUTO"
+          #   external_data.hive_partitioning_require_partition_filter? #=> true
+          #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+          #
+          def hive_partitioning?
+            !@gapi.hive_partitioning_options.nil?
+          end
+          ###
+          # The mode of hive partitioning to use when reading data. The following modes are supported:
+          #
+          #   1. `AUTO`: automatically infer partition key name(s) and type(s).
+          #   2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
+          #   3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
+          #
+          # @return [String, nil] The mode of hive partitioning, or `nil` if not set.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_require_partition_filter = true
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #
+          #   external_data.hive_partitioning? #=> true
+          #   external_data.hive_partitioning_mode #=> "AUTO"
+          #   external_data.hive_partitioning_require_partition_filter? #=> true
+          #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+          #
+          def hive_partitioning_mode
+            @gapi.hive_partitioning_options.mode if hive_partitioning?
+          end
+          ##
+          # Sets the mode of hive partitioning to use when reading data. The following modes are supported:
+          #
+          #   1. `auto`: automatically infer partition key name(s) and type(s).
+          #   2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
+          #   3. `custom`: partition key schema is encoded in the source URI prefix.
+          #
+          # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
+          # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
+          # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
+          # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
+          #
+          # See {#format}, {#hive_partitioning_require_partition_filter=} and {#hive_partitioning_source_uri_prefix=}.
+          #
+          # @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_require_partition_filter = true
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #
+          #   external_data.hive_partitioning? #=> true
+          #   external_data.hive_partitioning_mode #=> "AUTO"
+          #   external_data.hive_partitioning_require_partition_filter? #=> true
+          #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+          #
+          def hive_partitioning_mode= mode
+            @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
+            @gapi.hive_partitioning_options.mode = mode.to_s.upcase
+          end
+          ###
+          # Whether queries over the table using this external data source require a partition filter that can be used
+          # for partition elimination to be specified. Note that this field should only be true when creating a
+          # permanent external table or querying a temporary external table.
+          #
+          # @return [Boolean] `true` when queries over this table require a partition filter, or `false` otherwise.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_require_partition_filter = true
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #
+          #   external_data.hive_partitioning? #=> true
+          #   external_data.hive_partitioning_mode #=> "AUTO"
+          #   external_data.hive_partitioning_require_partition_filter? #=> true
+          #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+          #
+          def hive_partitioning_require_partition_filter?
+            return false unless hive_partitioning?
+            !@gapi.hive_partitioning_options.require_partition_filter.nil?
+          end
+          ##
+          # Sets whether queries over the table using this external data source require a partition filter
+          # that can be used for partition elimination to be specified.
+          #
+          # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_source_uri_prefix=}.
+          #
+          # @param [Boolean] require_partition_filter `true` if a partition filter must be specified, `false` otherwise.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_require_partition_filter = true
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #
+          #   external_data.hive_partitioning? #=> true
+          #   external_data.hive_partitioning_mode #=> "AUTO"
+          #   external_data.hive_partitioning_require_partition_filter? #=> true
+          #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+          #
+          def hive_partitioning_require_partition_filter= require_partition_filter
+            @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
+            @gapi.hive_partitioning_options.require_partition_filter = require_partition_filter
+          end
+          ###
+          # The common prefix for all source uris when hive partition detection is requested. The prefix must end
+          # immediately before the partition key encoding begins. For example, consider files following this data
+          # layout:
+          #
+          # ```
+          # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
+          # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
+          # ```
+          #
+          # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
+          # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
+          #
+          # @return [String, nil] The common prefix for all source uris, or `nil` if not set.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_require_partition_filter = true
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #
+          #   external_data.hive_partitioning? #=> true
+          #   external_data.hive_partitioning_mode #=> "AUTO"
+          #   external_data.hive_partitioning_require_partition_filter? #=> true
+          #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+          #
+          def hive_partitioning_source_uri_prefix
+            @gapi.hive_partitioning_options.source_uri_prefix if hive_partitioning?
+          end
+          ##
+          # Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
+          # immediately before the partition key encoding begins. For example, consider files following this data
+          # layout:
+          #
+          # ```
+          # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
+          # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
+          # ```
+          #
+          # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
+          # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
+          #
+          # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_require_partition_filter=}.
+          #
+          # @param [String] source_uri_prefix The common prefix for all source uris.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #
+          #   gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
+          #   source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
+          #   external_data = bigquery.external gcs_uri, format: :parquet do |ext|
+          #     ext.hive_partitioning_mode = :auto
+          #     ext.hive_partitioning_require_partition_filter = true
+          #     ext.hive_partitioning_source_uri_prefix = source_uri_prefix
+          #   end
+          #
+          #   external_data.hive_partitioning? #=> true
+          #   external_data.hive_partitioning_mode #=> "AUTO"
+          #   external_data.hive_partitioning_require_partition_filter? #=> true
+          #   external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
+          #
+          def hive_partitioning_source_uri_prefix= source_uri_prefix
+            @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
+            @gapi.hive_partitioning_options.source_uri_prefix = source_uri_prefix
+          end
           ##
           # @private Google API Client object.
           def to_gapi

data/lib/google/cloud/bigquery/extract_job.rb CHANGED Viewed

@@ -103,8 +103,7 @@ module Google
         #   table extraction.
         def compression?
           return false unless table?
-          val = @gapi.configuration.extract.compression
-          val == "GZIP"
+          @gapi.configuration.extract.compression == "GZIP"
         end
         ##
@@ -117,8 +116,7 @@ module Google
         #
         def json?
           return false unless table?
-          val = @gapi.configuration.extract.destination_format
-          val == "NEWLINE_DELIMITED_JSON"
+          @gapi.configuration.extract.destination_format == "NEWLINE_DELIMITED_JSON"
         end
         ##
@@ -146,8 +144,7 @@ module Google
         #
         def avro?
           return false unless table?
-          val = @gapi.configuration.extract.destination_format
-          val == "AVRO"
+          @gapi.configuration.extract.destination_format == "AVRO"
         end
         ##
@@ -173,8 +170,7 @@ module Google
         #
         def ml_xgboost_booster?
           return false unless model?
-          val = @gapi.configuration.extract.destination_format
-          val == "ML_XGBOOST_BOOSTER"
+          @gapi.configuration.extract.destination_format == "ML_XGBOOST_BOOSTER"
         end
         ##
@@ -250,6 +246,7 @@ module Google
           ##
           # @private Create an Updater object.
           def initialize gapi
+            super()
             @gapi = gapi
           end
@@ -267,9 +264,10 @@ module Google
             extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
               destination_uris: Array(storage_urls)
             )
-            if source.is_a? Google::Apis::BigqueryV2::TableReference
+            case source
+            when Google::Apis::BigqueryV2::TableReference
               extract_config.source_table = source
-            elsif source.is_a? Google::Apis::BigqueryV2::ModelReference
+            when Google::Apis::BigqueryV2::ModelReference
               extract_config.source_model = source
             end
             job = Google::Apis::BigqueryV2::Job.new(