RubyGems - google-cloud-bigquery - Versions diffs - 1.21.2 → 1.22.0 - Mend

google-cloud-bigquery 1.21.2 → 1.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +11 -0
data/lib/google/cloud/bigquery/convert.rb +3 -1
data/lib/google/cloud/bigquery/copy_job.rb +15 -6
data/lib/google/cloud/bigquery/dataset.rb +43 -20
data/lib/google/cloud/bigquery/extract_job.rb +153 -45
data/lib/google/cloud/bigquery/load_job.rb +15 -6
data/lib/google/cloud/bigquery/model.rb +164 -8
data/lib/google/cloud/bigquery/project.rb +137 -68
data/lib/google/cloud/bigquery/query_job.rb +15 -6
data/lib/google/cloud/bigquery/service.rb +12 -10
data/lib/google/cloud/bigquery/table.rb +63 -32
data/lib/google/cloud/bigquery/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: f38543236358fc319ecfcc2058ffa499e24034027aa56644924d2cf496815550
-  data.tar.gz: 37196fa1c3db03e48df4cb0ae5ae5b9b1ee07b9d112063467ebc57d25b34551a
+  metadata.gz: 5fadedabb79035a0b983765731a3aee2af5ca5480671e4255e8644cfb6e8c68c
+  data.tar.gz: 9ae351ed3987f2a81e8d9046c88ab776e3ac39f37cac5c6961fa3c2daf33b8bb
 SHA512:
-  metadata.gz: a234359fb8a04b42f22725f5540cf0378b7e51b3d847ddd4f24ba41f2a54b4de85068bb9aee10089995c29794c0a64694223ac6bf88202eda6a646ce02c85275
-  data.tar.gz: 99c6c915df70afde18de06907802c60d18df253238e6f3ab752827ea3cfc0aee0ab9bef8dc0816f5980db26334dd2b15ce0a2ecf7db3b026e2f0caa101dcafc8
+  metadata.gz: eec1a19be0af9113a94183b9c0eeaa4edf1e08b177be10c449396541d8c6970e4ec666f3fbbb0613107989afc078799beb44655e085f360e3c97846e797bba15
+  data.tar.gz: 5f89e947e34384131026fdafd0bdd05f05ed742f6a52b4c18ae0da83f433da2f2f090a924ff82bdf480b79eac9267b923732144a0b32773e12e8b3b846b61a28

data/CHANGELOG.md CHANGED

@@ -1,5 +1,16 @@
 # Release History
+### 1.22.0 / 2020-09-10
+#### Features
+* Add support for ML model export
+  * Add model support to Project#extract and #extract_job
+  * Add ExtractJob#model?
+  * Add ExtractJob#ml_tf_saved_model?
+  * Add ExtractJob#ml_xgboost_booster?
+  * Add Model#extract and #extract_job
 ### 1.21.2 / 2020-07-21
 #### Documentation

data/lib/google/cloud/bigquery/convert.rb CHANGED

@@ -318,7 +318,9 @@ module Google
             "parquet"                => "PARQUET",
             "datastore"              => "DATASTORE_BACKUP",
             "backup"                 => "DATASTORE_BACKUP",
-            "datastore_backup"       => "DATASTORE_BACKUP"
+            "datastore_backup"       => "DATASTORE_BACKUP",
+            "ml_tf_saved_model"      => "ML_TF_SAVED_MODEL",
+            "ml_xgboost_booster"     => "ML_XGBOOST_BOOSTER"
           }[format.to_s.downcase]
           return val unless val.nil?
           format

data/lib/google/cloud/bigquery/copy_job.rb CHANGED

@@ -272,12 +272,21 @@ module Google
           # Sets the labels to use for the job.
           #
           # @param [Hash] value A hash of user-provided labels associated with
-          #   the job. You can use these to organize and group your jobs. Label
-          #   keys and values can be no longer than 63 characters, can only
-          #   contain lowercase letters, numeric characters, underscores and
-          #   dashes. International characters are allowed. Label values are
-          #   optional. Label keys must start with a letter and each label in
-          #   the list must have a different key.
+          #   the job. You can use these to organize and group your jobs.
+          #
+          #   The labels applied to a resource must meet the following requirements:
+          #
+          #   * Each resource can have multiple labels, up to a maximum of 64.
+          #   * Each label must be a key-value pair.
+          #   * Keys have a minimum length of 1 character and a maximum length of
+          #     63 characters, and cannot be empty. Values can be empty, and have
+          #     a maximum length of 63 characters.
+          #   * Keys and values can contain only lowercase letters, numeric characters,
+          #     underscores, and dashes. All characters must use UTF-8 encoding, and
+          #     international characters are allowed.
+          #   * The key portion of a label must be unique. However, you can use the
+          #     same key with multiple resources.
+          #   * Keys must start with a lowercase letter or international character.
           #
           # @!group Attributes
           def labels= value

data/lib/google/cloud/bigquery/dataset.rb CHANGED

@@ -313,12 +313,19 @@ module Google
         # @param [Hash<String, String>] labels A hash containing key/value
         #   pairs.
         #
-        #   * Label keys and values can be no longer than 63 characters.
-        #   * Label keys and values can contain only lowercase letters, numbers,
-        #     underscores, hyphens, and international characters.
-        #   * Label keys and values cannot exceed 128 bytes in size.
-        #   * Label keys must begin with a letter.
-        #   * Label keys must be unique within a dataset.
+        #   The labels applied to a resource must meet the following requirements:
+        #
+        #   * Each resource can have multiple labels, up to a maximum of 64.
+        #   * Each label must be a key-value pair.
+        #   * Keys have a minimum length of 1 character and a maximum length of
+        #     63 characters, and cannot be empty. Values can be empty, and have
+        #     a maximum length of 63 characters.
+        #   * Keys and values can contain only lowercase letters, numeric characters,
+        #     underscores, and dashes. All characters must use UTF-8 encoding, and
+        #     international characters are allowed.
+        #   * The key portion of a label must be unique. However, you can use the
+        #     same key with multiple resources.
+        #   * Keys must start with a lowercase letter or international character.
         #
         # @example
         #   require "google/cloud/bigquery"
@@ -1171,13 +1178,21 @@ module Google
         #   is 1,024 characters. If `job_id` is provided, then `prefix` will not
         #   be used.
         # @param [Hash] labels A hash of user-provided labels associated with
-        #   the job. You can use these to organize and group your jobs. Label
-        #   keys and values can be no longer than 63 characters, can only
-        #   contain lowercase letters, numeric characters, underscores and
-        #   dashes. International characters are allowed. Label values are
-        #   optional. Label keys must start with a letter and each label in the
-        #   list must have a different key. See [Requirements for
-        #   labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
+        #   the job. You can use these to organize and group your jobs.
+        #
+        #   The labels applied to a resource must meet the following requirements:
+        #
+        #   * Each resource can have multiple labels, up to a maximum of 64.
+        #   * Each label must be a key-value pair.
+        #   * Keys have a minimum length of 1 character and a maximum length of
+        #     63 characters, and cannot be empty. Values can be empty, and have
+        #     a maximum length of 63 characters.
+        #   * Keys and values can contain only lowercase letters, numeric characters,
+        #     underscores, and dashes. All characters must use UTF-8 encoding, and
+        #     international characters are allowed.
+        #   * The key portion of a label must be unique. However, you can use the
+        #     same key with multiple resources.
+        #   * Keys must start with a lowercase letter or international character.
         # @param [Array<String>, String] udfs User-defined function resources
         #   used in a legacy SQL query. May be either a code resource to load from
         #   a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
@@ -1792,13 +1807,21 @@ module Google
         #   is 1,024 characters. If `job_id` is provided, then `prefix` will not
         #   be used.
         # @param [Hash] labels A hash of user-provided labels associated with
-        #   the job. You can use these to organize and group your jobs. Label
-        #   keys and values can be no longer than 63 characters, can only
-        #   contain lowercase letters, numeric characters, underscores and
-        #   dashes. International characters are allowed. Label values are
-        #   optional. Label keys must start with a letter and each label in the
-        #   list must have a different key. See [Requirements for
-        #   labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
+        #   the job. You can use these to organize and group your jobs.
+        #
+        #   The labels applied to a resource must meet the following requirements:
+        #
+        #   * Each resource can have multiple labels, up to a maximum of 64.
+        #   * Each label must be a key-value pair.
+        #   * Keys have a minimum length of 1 character and a maximum length of
+        #     63 characters, and cannot be empty. Values can be empty, and have
+        #     a maximum length of 63 characters.
+        #   * Keys and values can contain only lowercase letters, numeric characters,
+        #     underscores, and dashes. All characters must use UTF-8 encoding, and
+        #     international characters are allowed.
+        #   * The key portion of a label must be unique. However, you can use the
+        #     same key with multiple resources.
+        #   * Keys must start with a lowercase letter or international character.
         # @yield [updater] A block for setting the schema and other
         #   options for the destination table. The schema can be omitted if the
         #   destination table already exists, or if you're loading data from a

data/lib/google/cloud/bigquery/extract_job.rb CHANGED

@@ -20,15 +20,17 @@ module Google
       # # ExtractJob
       #
       # A {Job} subclass representing an export operation that may be performed
-      # on a {Table}. A ExtractJob instance is created when you call
-      # {Table#extract_job}.
+      # on a {Table} or {Model}. A ExtractJob instance is returned when you call
+      # {Project#extract_job}, {Table#extract_job} or {Model#extract_job}.
       #
       # @see https://cloud.google.com/bigquery/docs/exporting-data
-      #   Exporting Data From BigQuery
+      #   Exporting table data
+      # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
+      #   Exporting models
       # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
       #   reference
       #
-      # @example
+      # @example Export table data
       #   require "google/cloud/bigquery"
       #
       #   bigquery = Google::Cloud::Bigquery.new
@@ -40,6 +42,18 @@ module Google
       #   extract_job.wait_until_done!
       #   extract_job.done? #=> true
       #
+      # @example Export a model
+      #   require "google/cloud/bigquery"
+      #
+      #   bigquery = Google::Cloud::Bigquery.new
+      #   dataset = bigquery.dataset "my_dataset"
+      #   model = dataset.model "my_model"
+      #
+      #   extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
+      #
+      #   extract_job.wait_until_done!
+      #   extract_job.done? #=> true
+      #
       class ExtractJob < Job
         ##
         # The URI or URIs representing the Google Cloud Storage files to which
@@ -49,71 +63,130 @@ module Google
         end
         ##
-        # The table from which the data is exported. This is the table upon
-        # which {Table#extract_job} was called.
+        # The table or model which is exported.
         #
-        # @return [Table] A table instance.
+        # @return [Table, Model, nil] A table or model instance, or `nil`.
         #
         def source
-          table = @gapi.configuration.extract.source_table
-          return nil unless table
-          retrieve_table table.project_id, table.dataset_id, table.table_id
+          if (table = @gapi.configuration.extract.source_table)
+            retrieve_table table.project_id, table.dataset_id, table.table_id
+          elsif (model = @gapi.configuration.extract.source_model)
+            retrieve_model model.project_id, model.dataset_id, model.model_id
+          end
         end
         ##
-        # Checks if the export operation compresses the data using gzip. The
-        # default is `false`.
+        # Whether the source of the export job is a table. See {#source}.
         #
-        # @return [Boolean] `true` when `GZIP`, `false` otherwise.
+        # @return [Boolean] `true` when the source is a table, `false`
+        #   otherwise.
+        #
+        def table?
+          !@gapi.configuration.extract.source_table.nil?
+        end
+        ##
+        # Whether the source of the export job is a model. See {#source}.
+        #
+        # @return [Boolean] `true` when the source is a model, `false`
+        #   otherwise.
+        #
+        def model?
+          !@gapi.configuration.extract.source_model.nil?
+        end
+        ##
+        # Checks if the export operation compresses the data using gzip. The
+        # default is `false`. Not applicable when extracting models.
         #
+        # @return [Boolean] `true` when `GZIP`, `false` if not `GZIP` or not a
+        #   table extraction.
         def compression?
+          return false unless table?
           val = @gapi.configuration.extract.compression
           val == "GZIP"
         end
         ##
-        # Checks if the destination format for the data is [newline-delimited
-        # JSON](http://jsonlines.org/). The default is `false`.
+        # Checks if the destination format for the table data is [newline-delimited
+        # JSON](http://jsonlines.org/). The default is `false`. Not applicable when
+        # extracting models.
         #
-        # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
-        #   otherwise.
+        # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
+        #   `NEWLINE_DELIMITED_JSON` or not a table extraction.
         #
         def json?
+          return false unless table?
           val = @gapi.configuration.extract.destination_format
           val == "NEWLINE_DELIMITED_JSON"
         end
         ##
-        # Checks if the destination format for the data is CSV. Tables with
+        # Checks if the destination format for the table data is CSV. Tables with
         # nested or repeated fields cannot be exported as CSV. The default is
-        # `true`.
+        # `true` for tables. Not applicable when extracting models.
         #
-        # @return [Boolean] `true` when `CSV`, `false` otherwise.
+        # @return [Boolean] `true` when `CSV`, or `false` if not `CSV` or not a
+        #   table extraction.
         #
         def csv?
+          return false unless table?
           val = @gapi.configuration.extract.destination_format
           return true if val.nil?
           val == "CSV"
         end
         ##
-        # Checks if the destination format for the data is
-        # [Avro](http://avro.apache.org/). The default is `false`.
+        # Checks if the destination format for the table data is
+        # [Avro](http://avro.apache.org/). The default is `false`. Not applicable
+        # when extracting models.
         #
-        # @return [Boolean] `true` when `AVRO`, `false` otherwise.
+        # @return [Boolean] `true` when `AVRO`, `false` if not `AVRO` or not a
+        #   table extraction.
         #
         def avro?
+          return false unless table?
           val = @gapi.configuration.extract.destination_format
           val == "AVRO"
         end
+        ##
+        # Checks if the destination format for the model is TensorFlow SavedModel.
+        # The default is `true` for models. Not applicable when extracting tables.
+        #
+        # @return [Boolean] `true` when `ML_TF_SAVED_MODEL`, `false` if not
+        #   `ML_TF_SAVED_MODEL` or not a model extraction.
+        #
+        def ml_tf_saved_model?
+          return false unless model?
+          val = @gapi.configuration.extract.destination_format
+          return true if val.nil?
+          val == "ML_TF_SAVED_MODEL"
+        end
+        ##
+        # Checks if the destination format for the model is XGBoost. The default
+        # is `false`. Not applicable when extracting tables.
+        #
+        # @return [Boolean] `true` when `ML_XGBOOST_BOOSTER`, `false` if not
+        #   `ML_XGBOOST_BOOSTER` or not a model extraction.
+        #
+        def ml_xgboost_booster?
+          return false unless model?
+          val = @gapi.configuration.extract.destination_format
+          val == "ML_XGBOOST_BOOSTER"
+        end
         ##
         # The character or symbol the operation uses to delimit fields in the
-        # exported data. The default is a comma (,).
+        # exported data. The default is a comma (,) for tables. Not applicable
+        # when extracting models.
         #
-        # @return [String] A string containing the character, such as `","`.
+        # @return [String, nil] A string containing the character, such as `","`,
+        #   `nil` if not a table extraction.
         #
         def delimiter
+          return unless table?
           val = @gapi.configuration.extract.field_delimiter
           val = "," if val.nil?
           val
@@ -121,12 +194,13 @@ module Google
         ##
         # Checks if the exported data contains a header row. The default is
-        # `true`.
+        # `true` for tables. Not applicable when extracting models.
         #
         # @return [Boolean] `true` when the print header configuration is
-        #   present or `nil`, `false` otherwise.
+        #   present or `nil`, `false` if disabled or not a table extraction.
         #
         def print_header?
+          return false unless table?
           val = @gapi.configuration.extract.print_header
           val = true if val.nil?
           val
@@ -159,12 +233,14 @@ module Google
         # whether to enable extracting applicable column types (such as
         # `TIMESTAMP`) to their corresponding AVRO logical types
         # (`timestamp-micros`), instead of only using their raw types
-        # (`avro-long`).
+        # (`avro-long`). Not applicable when extracting models.
         #
         # @return [Boolean] `true` when applicable column types will use their
-        #   corresponding AVRO logical types, `false` otherwise.
+        #   corresponding AVRO logical types, `false` if not enabled or not a
+        #   table extraction.
         #
         def use_avro_logical_types?
+          return false unless table?
           @gapi.configuration.extract.use_avro_logical_types
         end
@@ -182,19 +258,24 @@ module Google
           #
           # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
           #   configuration object for setting query options.
-          def self.from_options service, table, storage_files, options
+          def self.from_options service, source, storage_files, options
             job_ref = service.job_ref_from options[:job_id], options[:prefix]
             storage_urls = Array(storage_files).map do |url|
               url.respond_to?(:to_gs_url) ? url.to_gs_url : url
             end
             options[:format] ||= Convert.derive_source_format storage_urls.first
+            extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
+              destination_uris: Array(storage_urls)
+            )
+            if source.is_a? Google::Apis::BigqueryV2::TableReference
+              extract_config.source_table = source
+            elsif source.is_a? Google::Apis::BigqueryV2::ModelReference
+              extract_config.source_model = source
+            end
             job = Google::Apis::BigqueryV2::Job.new(
               job_reference: job_ref,
               configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
-                extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
-                  destination_uris: Array(storage_urls),
-                  source_table:     table
-                ),
+                extract: extract_config,
                 dry_run: options[:dryrun]
               )
             )
@@ -253,7 +334,7 @@ module Google
           end
           ##
-          # Sets the compression type.
+          # Sets the compression type. Not applicable when extracting models.
           #
           # @param [String] value The compression type to use for exported
           #   files. Possible values include `GZIP` and `NONE`. The default
@@ -265,7 +346,7 @@ module Google
           end
           ##
-          # Sets the field delimiter.
+          # Sets the field delimiter. Not applicable when extracting models.
           #
           # @param [String] value Delimiter to use between fields in the
           #   exported data. Default is <code>,</code>.
@@ -276,14 +357,21 @@ module Google
           end
           ##
-          # Sets the destination file format. The default value is `csv`.
+          # Sets the destination file format. The default value for
+          # tables is `csv`. Tables with nested or repeated fields cannot be
+          # exported as CSV. The default value for models is `ml_tf_saved_model`.
           #
-          # The following values are supported:
+          # Supported values for tables:
           #
           # * `csv` - CSV
           # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
           # * `avro` - [Avro](http://avro.apache.org/)
           #
+          # Supported values for models:
+          #
+          # * `ml_tf_saved_model` - TensorFlow SavedModel
+          # * `ml_xgboost_booster` - XGBoost Booster
+          #
           # @param [String] new_format The new source format.
           #
           # @!group Attributes
@@ -293,7 +381,8 @@ module Google
           end
           ##
-          # Print a header row in the exported file.
+          # Print a header row in the exported file. Not applicable when
+          # extracting models.
           #
           # @param [Boolean] value Whether to print out a header row in the
           #   results. Default is `true`.
@@ -307,12 +396,21 @@ module Google
           # Sets the labels to use for the job.
           #
           # @param [Hash] value A hash of user-provided labels associated with
-          #   the job. You can use these to organize and group your jobs. Label
-          #   keys and values can be no longer than 63 characters, can only
-          #   contain lowercase letters, numeric characters, underscores and
-          #   dashes. International characters are allowed. Label values are
-          #   optional. Label keys must start with a letter and each label in
-          #   the list must have a different key.
+          #   the job. You can use these to organize and group your jobs.
+          #
+          #   The labels applied to a resource must meet the following requirements:
+          #
+          #   * Each resource can have multiple labels, up to a maximum of 64.
+          #   * Each label must be a key-value pair.
+          #   * Keys have a minimum length of 1 character and a maximum length of
+          #     63 characters, and cannot be empty. Values can be empty, and have
+          #     a maximum length of 63 characters.
+          #   * Keys and values can contain only lowercase letters, numeric characters,
+          #     underscores, and dashes. All characters must use UTF-8 encoding, and
+          #     international characters are allowed.
+          #   * The key portion of a label must be unique. However, you can use the
+          #     same key with multiple resources.
+          #   * Keys must start with a lowercase letter or international character.
           #
           # @!group Attributes
           #
@@ -362,6 +460,16 @@ module Google
             @gapi
           end
         end
+        protected
+        def retrieve_model project_id, dataset_id, model_id
+          ensure_service!
+          gapi = service.get_project_model project_id, dataset_id, model_id
+          Model.from_gapi_json gapi, service
+        rescue Google::Cloud::NotFoundError
+          nil
+        end
       end
     end
   end