RubyGems - google-cloud-bigquery - Versions diffs - 1.20.0 → 1.23.0 - Mend

google-cloud-bigquery 1.20.0 → 1.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +39 -0
data/TROUBLESHOOTING.md +2 -8
data/lib/google-cloud-bigquery.rb +9 -2
data/lib/google/cloud/bigquery.rb +1 -1
data/lib/google/cloud/bigquery/convert.rb +3 -1
data/lib/google/cloud/bigquery/copy_job.rb +15 -6
data/lib/google/cloud/bigquery/data.rb +12 -0
data/lib/google/cloud/bigquery/dataset.rb +85 -28
data/lib/google/cloud/bigquery/external.rb +24 -0
data/lib/google/cloud/bigquery/extract_job.rb +153 -45
data/lib/google/cloud/bigquery/job.rb +198 -0
data/lib/google/cloud/bigquery/load_job.rb +15 -6
data/lib/google/cloud/bigquery/model.rb +164 -8
data/lib/google/cloud/bigquery/project.rb +242 -84
data/lib/google/cloud/bigquery/query_job.rb +56 -6
data/lib/google/cloud/bigquery/service.rb +19 -13
data/lib/google/cloud/bigquery/table.rb +82 -41
data/lib/google/cloud/bigquery/time.rb +6 -0
data/lib/google/cloud/bigquery/version.rb +1 -1
metadata +5 -5

data/lib/google/cloud/bigquery/extract_job.rb CHANGED

@@ -20,15 +20,17 @@ module Google
       # # ExtractJob
       #
       # A {Job} subclass representing an export operation that may be performed
-      # on a {Table}. A ExtractJob instance is created when you call
-      # {Table#extract_job}.
+      # on a {Table} or {Model}. A ExtractJob instance is returned when you call
+      # {Project#extract_job}, {Table#extract_job} or {Model#extract_job}.
       #
       # @see https://cloud.google.com/bigquery/docs/exporting-data
-      #   Exporting Data From BigQuery
+      #   Exporting table data
+      # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
+      #   Exporting models
       # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
       #   reference
       #
-      # @example
+      # @example Export table data
       #   require "google/cloud/bigquery"
       #
       #   bigquery = Google::Cloud::Bigquery.new
@@ -40,6 +42,18 @@ module Google
       #   extract_job.wait_until_done!
       #   extract_job.done? #=> true
       #
+      # @example Export a model
+      #   require "google/cloud/bigquery"
+      #
+      #   bigquery = Google::Cloud::Bigquery.new
+      #   dataset = bigquery.dataset "my_dataset"
+      #   model = dataset.model "my_model"
+      #
+      #   extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
+      #
+      #   extract_job.wait_until_done!
+      #   extract_job.done? #=> true
+      #
       class ExtractJob < Job
         ##
         # The URI or URIs representing the Google Cloud Storage files to which
@@ -49,71 +63,130 @@ module Google
         end
         ##
-        # The table from which the data is exported. This is the table upon
-        # which {Table#extract_job} was called.
+        # The table or model which is exported.
         #
-        # @return [Table] A table instance.
+        # @return [Table, Model, nil] A table or model instance, or `nil`.
         #
         def source
-          table = @gapi.configuration.extract.source_table
-          return nil unless table
-          retrieve_table table.project_id, table.dataset_id, table.table_id
+          if (table = @gapi.configuration.extract.source_table)
+            retrieve_table table.project_id, table.dataset_id, table.table_id
+          elsif (model = @gapi.configuration.extract.source_model)
+            retrieve_model model.project_id, model.dataset_id, model.model_id
+          end
         end
         ##
-        # Checks if the export operation compresses the data using gzip. The
-        # default is `false`.
+        # Whether the source of the export job is a table. See {#source}.
         #
-        # @return [Boolean] `true` when `GZIP`, `false` otherwise.
+        # @return [Boolean] `true` when the source is a table, `false`
+        #   otherwise.
+        #
+        def table?
+          !@gapi.configuration.extract.source_table.nil?
+        end
+        ##
+        # Whether the source of the export job is a model. See {#source}.
+        #
+        # @return [Boolean] `true` when the source is a model, `false`
+        #   otherwise.
+        #
+        def model?
+          !@gapi.configuration.extract.source_model.nil?
+        end
+        ##
+        # Checks if the export operation compresses the data using gzip. The
+        # default is `false`. Not applicable when extracting models.
         #
+        # @return [Boolean] `true` when `GZIP`, `false` if not `GZIP` or not a
+        #   table extraction.
         def compression?
+          return false unless table?
           val = @gapi.configuration.extract.compression
           val == "GZIP"
         end
         ##
-        # Checks if the destination format for the data is [newline-delimited
-        # JSON](http://jsonlines.org/). The default is `false`.
+        # Checks if the destination format for the table data is [newline-delimited
+        # JSON](http://jsonlines.org/). The default is `false`. Not applicable when
+        # extracting models.
         #
-        # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
-        #   otherwise.
+        # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
+        #   `NEWLINE_DELIMITED_JSON` or not a table extraction.
         #
         def json?
+          return false unless table?
           val = @gapi.configuration.extract.destination_format
           val == "NEWLINE_DELIMITED_JSON"
         end
         ##
-        # Checks if the destination format for the data is CSV. Tables with
+        # Checks if the destination format for the table data is CSV. Tables with
         # nested or repeated fields cannot be exported as CSV. The default is
-        # `true`.
+        # `true` for tables. Not applicable when extracting models.
         #
-        # @return [Boolean] `true` when `CSV`, `false` otherwise.
+        # @return [Boolean] `true` when `CSV`, or `false` if not `CSV` or not a
+        #   table extraction.
         #
         def csv?
+          return false unless table?
           val = @gapi.configuration.extract.destination_format
           return true if val.nil?
           val == "CSV"
         end
         ##
-        # Checks if the destination format for the data is
-        # [Avro](http://avro.apache.org/). The default is `false`.
+        # Checks if the destination format for the table data is
+        # [Avro](http://avro.apache.org/). The default is `false`. Not applicable
+        # when extracting models.
         #
-        # @return [Boolean] `true` when `AVRO`, `false` otherwise.
+        # @return [Boolean] `true` when `AVRO`, `false` if not `AVRO` or not a
+        #   table extraction.
         #
         def avro?
+          return false unless table?
           val = @gapi.configuration.extract.destination_format
           val == "AVRO"
         end
+        ##
+        # Checks if the destination format for the model is TensorFlow SavedModel.
+        # The default is `true` for models. Not applicable when extracting tables.
+        #
+        # @return [Boolean] `true` when `ML_TF_SAVED_MODEL`, `false` if not
+        #   `ML_TF_SAVED_MODEL` or not a model extraction.
+        #
+        def ml_tf_saved_model?
+          return false unless model?
+          val = @gapi.configuration.extract.destination_format
+          return true if val.nil?
+          val == "ML_TF_SAVED_MODEL"
+        end
+        ##
+        # Checks if the destination format for the model is XGBoost. The default
+        # is `false`. Not applicable when extracting tables.
+        #
+        # @return [Boolean] `true` when `ML_XGBOOST_BOOSTER`, `false` if not
+        #   `ML_XGBOOST_BOOSTER` or not a model extraction.
+        #
+        def ml_xgboost_booster?
+          return false unless model?
+          val = @gapi.configuration.extract.destination_format
+          val == "ML_XGBOOST_BOOSTER"
+        end
         ##
         # The character or symbol the operation uses to delimit fields in the
-        # exported data. The default is a comma (,).
+        # exported data. The default is a comma (,) for tables. Not applicable
+        # when extracting models.
         #
-        # @return [String] A string containing the character, such as `","`.
+        # @return [String, nil] A string containing the character, such as `","`,
+        #   `nil` if not a table extraction.
         #
         def delimiter
+          return unless table?
           val = @gapi.configuration.extract.field_delimiter
           val = "," if val.nil?
           val
@@ -121,12 +194,13 @@ module Google
         ##
         # Checks if the exported data contains a header row. The default is
-        # `true`.
+        # `true` for tables. Not applicable when extracting models.
         #
         # @return [Boolean] `true` when the print header configuration is
-        #   present or `nil`, `false` otherwise.
+        #   present or `nil`, `false` if disabled or not a table extraction.
         #
         def print_header?
+          return false unless table?
           val = @gapi.configuration.extract.print_header
           val = true if val.nil?
           val
@@ -159,12 +233,14 @@ module Google
         # whether to enable extracting applicable column types (such as
         # `TIMESTAMP`) to their corresponding AVRO logical types
         # (`timestamp-micros`), instead of only using their raw types
-        # (`avro-long`).
+        # (`avro-long`). Not applicable when extracting models.
         #
         # @return [Boolean] `true` when applicable column types will use their
-        #   corresponding AVRO logical types, `false` otherwise.
+        #   corresponding AVRO logical types, `false` if not enabled or not a
+        #   table extraction.
         #
         def use_avro_logical_types?
+          return false unless table?
           @gapi.configuration.extract.use_avro_logical_types
         end
@@ -182,19 +258,24 @@ module Google
           #
           # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
           #   configuration object for setting query options.
-          def self.from_options service, table, storage_files, options
+          def self.from_options service, source, storage_files, options
             job_ref = service.job_ref_from options[:job_id], options[:prefix]
             storage_urls = Array(storage_files).map do |url|
               url.respond_to?(:to_gs_url) ? url.to_gs_url : url
             end
             options[:format] ||= Convert.derive_source_format storage_urls.first
+            extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
+              destination_uris: Array(storage_urls)
+            )
+            if source.is_a? Google::Apis::BigqueryV2::TableReference
+              extract_config.source_table = source
+            elsif source.is_a? Google::Apis::BigqueryV2::ModelReference
+              extract_config.source_model = source
+            end
             job = Google::Apis::BigqueryV2::Job.new(
               job_reference: job_ref,
               configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
-                extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
-                  destination_uris: Array(storage_urls),
-                  source_table:     table
-                ),
+                extract: extract_config,
                 dry_run: options[:dryrun]
               )
             )
@@ -253,7 +334,7 @@ module Google
           end
           ##
-          # Sets the compression type.
+          # Sets the compression type. Not applicable when extracting models.
           #
           # @param [String] value The compression type to use for exported
           #   files. Possible values include `GZIP` and `NONE`. The default
@@ -265,7 +346,7 @@ module Google
           end
           ##
-          # Sets the field delimiter.
+          # Sets the field delimiter. Not applicable when extracting models.
           #
           # @param [String] value Delimiter to use between fields in the
           #   exported data. Default is <code>,</code>.
@@ -276,14 +357,21 @@ module Google
           end
           ##
-          # Sets the destination file format. The default value is `csv`.
+          # Sets the destination file format. The default value for
+          # tables is `csv`. Tables with nested or repeated fields cannot be
+          # exported as CSV. The default value for models is `ml_tf_saved_model`.
           #
-          # The following values are supported:
+          # Supported values for tables:
           #
           # * `csv` - CSV
           # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
           # * `avro` - [Avro](http://avro.apache.org/)
           #
+          # Supported values for models:
+          #
+          # * `ml_tf_saved_model` - TensorFlow SavedModel
+          # * `ml_xgboost_booster` - XGBoost Booster
+          #
           # @param [String] new_format The new source format.
           #
           # @!group Attributes
@@ -293,7 +381,8 @@ module Google
           end
           ##
-          # Print a header row in the exported file.
+          # Print a header row in the exported file. Not applicable when
+          # extracting models.
           #
           # @param [Boolean] value Whether to print out a header row in the
           #   results. Default is `true`.
@@ -307,12 +396,21 @@ module Google
           # Sets the labels to use for the job.
           #
           # @param [Hash] value A hash of user-provided labels associated with
-          #   the job. You can use these to organize and group your jobs. Label
-          #   keys and values can be no longer than 63 characters, can only
-          #   contain lowercase letters, numeric characters, underscores and
-          #   dashes. International characters are allowed. Label values are
-          #   optional. Label keys must start with a letter and each label in
-          #   the list must have a different key.
+          #   the job. You can use these to organize and group your jobs.
+          #
+          #   The labels applied to a resource must meet the following requirements:
+          #
+          #   * Each resource can have multiple labels, up to a maximum of 64.
+          #   * Each label must be a key-value pair.
+          #   * Keys have a minimum length of 1 character and a maximum length of
+          #     63 characters, and cannot be empty. Values can be empty, and have
+          #     a maximum length of 63 characters.
+          #   * Keys and values can contain only lowercase letters, numeric characters,
+          #     underscores, and dashes. All characters must use UTF-8 encoding, and
+          #     international characters are allowed.
+          #   * The key portion of a label must be unique. However, you can use the
+          #     same key with multiple resources.
+          #   * Keys must start with a lowercase letter or international character.
           #
           # @!group Attributes
           #
@@ -362,6 +460,16 @@ module Google
             @gapi
           end
         end
+        protected
+        def retrieve_model project_id, dataset_id, model_id
+          ensure_service!
+          gapi = service.get_project_model project_id, dataset_id, model_id
+          Model.from_gapi_json gapi, service
+        rescue Google::Cloud::NotFoundError
+          nil
+        end
       end
     end
   end

data/lib/google/cloud/bigquery/job.rb CHANGED

@@ -197,6 +197,72 @@ module Google
           Convert.millis_to_time @gapi.statistics.end_time
         end
+        ##
+        # The number of child jobs executed.
+        #
+        # @return [Integer] The number of child jobs executed.
+        #
+        def num_child_jobs
+          @gapi.statistics.num_child_jobs || 0
+        end
+        ##
+        # If this is a child job, the id of the parent.
+        #
+        # @return [String, nil] The ID of the parent job, or `nil` if not a child job.
+        #
+        def parent_job_id
+          @gapi.statistics.parent_job_id
+        end
+        ##
+        # The statistics including stack frames for a child job of a script.
+        #
+        # @return [Google::Cloud::Bigquery::Job::ScriptStatistics, nil] The script statistics, or `nil` if the job is
+        #   not a child job.
+        #
+        # @example
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #
+        #   multi_statement_sql = <<~SQL
+        #     -- Declare a variable to hold names as an array.
+        #     DECLARE top_names ARRAY<STRING>;
+        #     -- Build an array of the top 100 names from the year 2017.
+        #     SET top_names = (
+        #     SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
+        #     FROM `bigquery-public-data.usa_names.usa_1910_current`
+        #     WHERE year = 2017
+        #     );
+        #     -- Which names appear as words in Shakespeare's plays?
+        #     SELECT
+        #     name AS shakespeare_name
+        #     FROM UNNEST(top_names) AS name
+        #     WHERE name IN (
+        #     SELECT word
+        #     FROM `bigquery-public-data.samples.shakespeare`
+        #     );
+        #   SQL
+        #
+        #   job = bigquery.query_job multi_statement_sql
+        #
+        #   job.wait_until_done!
+        #
+        #   child_jobs = bigquery.jobs parent_job: job
+        #
+        #   child_jobs.each do |child_job|
+        #     script_statistics = child_job.script_statistics
+        #     puts script_statistics.evaluation_kind
+        #     script_statistics.stack_frames.each do |stack_frame|
+        #       puts stack_frame.text
+        #     end
+        #   end
+        #
+        def script_statistics
+          ScriptStatistics.from_gapi @gapi.statistics.script_statistics if @gapi.statistics.script_statistics
+        end
         ##
         # The configuration for the job. Returns a hash.
         #
@@ -423,6 +489,138 @@ module Google
           end
         end
+        ##
+        # Represents statistics for a child job of a script.
+        #
+        # @attr_reader [String] evaluation_kind Indicates the type of child job. Possible values include `STATEMENT` and
+        #   `EXPRESSION`.
+        # @attr_reader [Array<Google::Cloud::Bigquery::Job::ScriptStackFrame>] stack_frames Stack trace where the
+        #   current evaluation happened. Shows line/column/procedure name of each frame on the stack at the point where
+        #   the current evaluation happened. The leaf frame is first, the primary script is last.
+        #
+        # @example
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #
+        #   multi_statement_sql = <<~SQL
+        #     -- Declare a variable to hold names as an array.
+        #     DECLARE top_names ARRAY<STRING>;
+        #     -- Build an array of the top 100 names from the year 2017.
+        #     SET top_names = (
+        #     SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
+        #     FROM `bigquery-public-data.usa_names.usa_1910_current`
+        #     WHERE year = 2017
+        #     );
+        #     -- Which names appear as words in Shakespeare's plays?
+        #     SELECT
+        #     name AS shakespeare_name
+        #     FROM UNNEST(top_names) AS name
+        #     WHERE name IN (
+        #     SELECT word
+        #     FROM `bigquery-public-data.samples.shakespeare`
+        #     );
+        #   SQL
+        #
+        #   job = bigquery.query_job multi_statement_sql
+        #
+        #   job.wait_until_done!
+        #
+        #   child_jobs = bigquery.jobs parent_job: job
+        #
+        #   child_jobs.each do |child_job|
+        #     script_statistics = child_job.script_statistics
+        #     puts script_statistics.evaluation_kind
+        #     script_statistics.stack_frames.each do |stack_frame|
+        #       puts stack_frame.text
+        #     end
+        #   end
+        #
+        class ScriptStatistics
+          attr_reader :evaluation_kind, :stack_frames
+          ##
+          # @private Creates a new ScriptStatistics instance.
+          def initialize evaluation_kind, stack_frames
+            @evaluation_kind = evaluation_kind
+            @stack_frames = stack_frames
+          end
+          ##
+          # @private New ScriptStatistics from a statistics.script_statistics object.
+          def self.from_gapi gapi
+            frames = Array(gapi.stack_frames).map { |g| ScriptStackFrame.from_gapi g }
+            new gapi.evaluation_kind, frames
+          end
+        end
+        ##
+        # Represents a stack frame showing the line/column/procedure name where the current evaluation happened.
+        #
+        # @attr_reader [Integer] start_line One-based start line.
+        # @attr_reader [Integer] start_column One-based start column.
+        # @attr_reader [Integer] end_line One-based end line.
+        # @attr_reader [Integer] end_column One-based end column.
+        # @attr_reader [String] text Text of the current statement/expression.
+        #
+        # @example
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #
+        #   multi_statement_sql = <<~SQL
+        #     -- Declare a variable to hold names as an array.
+        #     DECLARE top_names ARRAY<STRING>;
+        #     -- Build an array of the top 100 names from the year 2017.
+        #     SET top_names = (
+        #     SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
+        #     FROM `bigquery-public-data.usa_names.usa_1910_current`
+        #     WHERE year = 2017
+        #     );
+        #     -- Which names appear as words in Shakespeare's plays?
+        #     SELECT
+        #     name AS shakespeare_name
+        #     FROM UNNEST(top_names) AS name
+        #     WHERE name IN (
+        #     SELECT word
+        #     FROM `bigquery-public-data.samples.shakespeare`
+        #     );
+        #   SQL
+        #
+        #   job = bigquery.query_job multi_statement_sql
+        #
+        #   job.wait_until_done!
+        #
+        #   child_jobs = bigquery.jobs parent_job: job
+        #
+        #   child_jobs.each do |child_job|
+        #     script_statistics = child_job.script_statistics
+        #     puts script_statistics.evaluation_kind
+        #     script_statistics.stack_frames.each do |stack_frame|
+        #       puts stack_frame.text
+        #     end
+        #   end
+        #
+        class ScriptStackFrame
+          attr_reader :start_line, :start_column, :end_line, :end_column, :text
+          ##
+          # @private Creates a new ScriptStackFrame instance.
+          def initialize start_line, start_column, end_line, end_column, text
+            @start_line = start_line
+            @start_column = start_column
+            @end_line = end_line
+            @end_column = end_column
+            @text = text
+          end
+          ##
+          # @private New ScriptStackFrame from a statistics.script_statistics[].stack_frames element.
+          def self.from_gapi gapi
+            new gapi.start_line, gapi.start_column, gapi.end_line, gapi.end_column, gapi.text
+          end
+        end
         protected
         ##