RubyGems - google-cloud-bigquery - Versions diffs - 1.12.0 → 1.38.1 - Mend

google-cloud-bigquery 1.12.0 → 1.38.1

Files changed (50) hide show

checksums.yaml +4 -4
data/AUTHENTICATION.md +9 -28
data/CHANGELOG.md +372 -1
data/CONTRIBUTING.md +328 -116
data/LOGGING.md +2 -2
data/OVERVIEW.md +21 -20
data/TROUBLESHOOTING.md +2 -8
data/lib/google/cloud/bigquery/argument.rb +197 -0
data/lib/google/cloud/bigquery/convert.rb +154 -170
data/lib/google/cloud/bigquery/copy_job.rb +40 -23
data/lib/google/cloud/bigquery/credentials.rb +5 -12
data/lib/google/cloud/bigquery/data.rb +109 -18
data/lib/google/cloud/bigquery/dataset/access.rb +322 -51
data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
data/lib/google/cloud/bigquery/dataset.rb +960 -279
data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
data/lib/google/cloud/bigquery/external.rb +50 -2256
data/lib/google/cloud/bigquery/extract_job.rb +217 -58
data/lib/google/cloud/bigquery/insert_response.rb +1 -3
data/lib/google/cloud/bigquery/job/list.rb +13 -20
data/lib/google/cloud/bigquery/job.rb +286 -11
data/lib/google/cloud/bigquery/load_job.rb +801 -133
data/lib/google/cloud/bigquery/model/list.rb +5 -9
data/lib/google/cloud/bigquery/model.rb +247 -16
data/lib/google/cloud/bigquery/policy.rb +432 -0
data/lib/google/cloud/bigquery/project/list.rb +6 -11
data/lib/google/cloud/bigquery/project.rb +526 -243
data/lib/google/cloud/bigquery/query_job.rb +584 -125
data/lib/google/cloud/bigquery/routine/list.rb +165 -0
data/lib/google/cloud/bigquery/routine.rb +1227 -0
data/lib/google/cloud/bigquery/schema/field.rb +413 -63
data/lib/google/cloud/bigquery/schema.rb +221 -48
data/lib/google/cloud/bigquery/service.rb +186 -109
data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -42
data/lib/google/cloud/bigquery/table/list.rb +6 -11
data/lib/google/cloud/bigquery/table.rb +1188 -326
data/lib/google/cloud/bigquery/time.rb +6 -0
data/lib/google/cloud/bigquery/version.rb +1 -1
data/lib/google/cloud/bigquery.rb +18 -8
data/lib/google-cloud-bigquery.rb +15 -13
metadata +67 -40

data/lib/google/cloud/bigquery/extract_job.rb CHANGED Viewed

@@ -20,15 +20,17 @@ module Google
       # # ExtractJob
       #
       # A {Job} subclass representing an export operation that may be performed
-      # on a {Table}. A ExtractJob instance is created when you call
-      # {Table#extract_job}.
+      # on a {Table} or {Model}. A ExtractJob instance is returned when you call
+      # {Project#extract_job}, {Table#extract_job} or {Model#extract_job}.
       #
       # @see https://cloud.google.com/bigquery/docs/exporting-data
-      #   Exporting Data From BigQuery
+      #   Exporting table data
+      # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
+      #   Exporting models
       # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
       #   reference
       #
-      # @example
+      # @example Export table data
       #   require "google/cloud/bigquery"
       #
       #   bigquery = Google::Cloud::Bigquery.new
@@ -40,6 +42,18 @@ module Google
       #   extract_job.wait_until_done!
       #   extract_job.done? #=> true
       #
+      # @example Export a model
+      #   require "google/cloud/bigquery"
+      #
+      #   bigquery = Google::Cloud::Bigquery.new
+      #   dataset = bigquery.dataset "my_dataset"
+      #   model = dataset.model "my_model"
+      #
+      #   extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
+      #
+      #   extract_job.wait_until_done!
+      #   extract_job.done? #=> true
+      #
       class ExtractJob < Job
         ##
         # The URI or URIs representing the Google Cloud Storage files to which
@@ -49,73 +63,126 @@ module Google
         end
         ##
-        # The table from which the data is exported. This is the table upon
-        # which {Table#extract_job} was called.
+        # The table or model which is exported.
         #
-        # @return [Table] A table instance.
+        # @return [Table, Model, nil] A table or model instance, or `nil`.
         #
         def source
-          table = @gapi.configuration.extract.source_table
-          return nil unless table
-          retrieve_table table.project_id,
-                         table.dataset_id,
-                         table.table_id
+          if (table = @gapi.configuration.extract.source_table)
+            retrieve_table table.project_id, table.dataset_id, table.table_id
+          elsif (model = @gapi.configuration.extract.source_model)
+            retrieve_model model.project_id, model.dataset_id, model.model_id
+          end
         end
         ##
-        # Checks if the export operation compresses the data using gzip. The
-        # default is `false`.
+        # Whether the source of the export job is a table. See {#source}.
         #
-        # @return [Boolean] `true` when `GZIP`, `false` otherwise.
+        # @return [Boolean] `true` when the source is a table, `false`
+        #   otherwise.
         #
-        def compression?
-          val = @gapi.configuration.extract.compression
-          val == "GZIP"
+        def table?
+          !@gapi.configuration.extract.source_table.nil?
         end
         ##
-        # Checks if the destination format for the data is [newline-delimited
-        # JSON](http://jsonlines.org/). The default is `false`.
+        # Whether the source of the export job is a model. See {#source}.
         #
-        # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
+        # @return [Boolean] `true` when the source is a model, `false`
         #   otherwise.
         #
+        def model?
+          !@gapi.configuration.extract.source_model.nil?
+        end
+        ##
+        # Checks if the export operation compresses the data using gzip. The
+        # default is `false`. Not applicable when extracting models.
+        #
+        # @return [Boolean] `true` when `GZIP`, `false` if not `GZIP` or not a
+        #   table extraction.
+        def compression?
+          return false unless table?
+          @gapi.configuration.extract.compression == "GZIP"
+        end
+        ##
+        # Checks if the destination format for the table data is [newline-delimited
+        # JSON](http://jsonlines.org/). The default is `false`. Not applicable when
+        # extracting models.
+        #
+        # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
+        #   `NEWLINE_DELIMITED_JSON` or not a table extraction.
+        #
         def json?
-          val = @gapi.configuration.extract.destination_format
-          val == "NEWLINE_DELIMITED_JSON"
+          return false unless table?
+          @gapi.configuration.extract.destination_format == "NEWLINE_DELIMITED_JSON"
         end
         ##
-        # Checks if the destination format for the data is CSV. Tables with
+        # Checks if the destination format for the table data is CSV. Tables with
         # nested or repeated fields cannot be exported as CSV. The default is
-        # `true`.
+        # `true` for tables. Not applicable when extracting models.
         #
-        # @return [Boolean] `true` when `CSV`, `false` otherwise.
+        # @return [Boolean] `true` when `CSV`, or `false` if not `CSV` or not a
+        #   table extraction.
         #
         def csv?
+          return false unless table?
           val = @gapi.configuration.extract.destination_format
           return true if val.nil?
           val == "CSV"
         end
         ##
-        # Checks if the destination format for the data is
-        # [Avro](http://avro.apache.org/). The default is `false`.
+        # Checks if the destination format for the table data is
+        # [Avro](http://avro.apache.org/). The default is `false`. Not applicable
+        # when extracting models.
         #
-        # @return [Boolean] `true` when `AVRO`, `false` otherwise.
+        # @return [Boolean] `true` when `AVRO`, `false` if not `AVRO` or not a
+        #   table extraction.
         #
         def avro?
+          return false unless table?
+          @gapi.configuration.extract.destination_format == "AVRO"
+        end
+        ##
+        # Checks if the destination format for the model is TensorFlow SavedModel.
+        # The default is `true` for models. Not applicable when extracting tables.
+        #
+        # @return [Boolean] `true` when `ML_TF_SAVED_MODEL`, `false` if not
+        #   `ML_TF_SAVED_MODEL` or not a model extraction.
+        #
+        def ml_tf_saved_model?
+          return false unless model?
           val = @gapi.configuration.extract.destination_format
-          val == "AVRO"
+          return true if val.nil?
+          val == "ML_TF_SAVED_MODEL"
+        end
+        ##
+        # Checks if the destination format for the model is XGBoost. The default
+        # is `false`. Not applicable when extracting tables.
+        #
+        # @return [Boolean] `true` when `ML_XGBOOST_BOOSTER`, `false` if not
+        #   `ML_XGBOOST_BOOSTER` or not a model extraction.
+        #
+        def ml_xgboost_booster?
+          return false unless model?
+          @gapi.configuration.extract.destination_format == "ML_XGBOOST_BOOSTER"
         end
         ##
         # The character or symbol the operation uses to delimit fields in the
-        # exported data. The default is a comma (,).
+        # exported data. The default is a comma (,) for tables. Not applicable
+        # when extracting models.
         #
-        # @return [String] A string containing the character, such as `","`.
+        # @return [String, nil] A string containing the character, such as `","`,
+        #   `nil` if not a table extraction.
         #
         def delimiter
+          return unless table?
           val = @gapi.configuration.extract.field_delimiter
           val = "," if val.nil?
           val
@@ -123,12 +190,13 @@ module Google
         ##
         # Checks if the exported data contains a header row. The default is
-        # `true`.
+        # `true` for tables. Not applicable when extracting models.
         #
         # @return [Boolean] `true` when the print header configuration is
-        #   present or `nil`, `false` otherwise.
+        #   present or `nil`, `false` if disabled or not a table extraction.
         #
         def print_header?
+          return false unless table?
           val = @gapi.configuration.extract.print_header
           val = true if val.nil?
           val
@@ -156,12 +224,29 @@ module Google
           Hash[destinations.zip destinations_file_counts]
         end
+        ##
+        # If `#avro?` (`#format` is set to `"AVRO"`), this flag indicates
+        # whether to enable extracting applicable column types (such as
+        # `TIMESTAMP`) to their corresponding AVRO logical types
+        # (`timestamp-micros`), instead of only using their raw types
+        # (`avro-long`). Not applicable when extracting models.
+        #
+        # @return [Boolean] `true` when applicable column types will use their
+        #   corresponding AVRO logical types, `false` if not enabled or not a
+        #   table extraction.
+        #
+        def use_avro_logical_types?
+          return false unless table?
+          @gapi.configuration.extract.use_avro_logical_types
+        end
         ##
         # Yielded to a block to accumulate changes for an API request.
         class Updater < ExtractJob
           ##
           # @private Create an Updater object.
           def initialize gapi
+            super()
             @gapi = gapi
           end
@@ -170,32 +255,47 @@ module Google
           #
           # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
           #   configuration object for setting query options.
-          def self.from_options service, table, storage_files, options = {}
+          def self.from_options service, source, storage_files, options
             job_ref = service.job_ref_from options[:job_id], options[:prefix]
             storage_urls = Array(storage_files).map do |url|
               url.respond_to?(:to_gs_url) ? url.to_gs_url : url
             end
-            dest_format = options[:format]
-            if dest_format.nil?
-              dest_format = Convert.derive_source_format storage_urls.first
+            options[:format] ||= Convert.derive_source_format storage_urls.first
+            extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
+              destination_uris: Array(storage_urls)
+            )
+            case source
+            when Google::Apis::BigqueryV2::TableReference
+              extract_config.source_table = source
+            when Google::Apis::BigqueryV2::ModelReference
+              extract_config.source_model = source
             end
-            req = Google::Apis::BigqueryV2::Job.new(
+            job = Google::Apis::BigqueryV2::Job.new(
               job_reference: job_ref,
               configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
-                extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
-                  destination_uris: Array(storage_urls),
-                  source_table:     table
-                ),
+                extract: extract_config,
                 dry_run: options[:dryrun]
               )
             )
-            updater = ExtractJob::Updater.new req
+            from_job_and_options job, options
+          end
+          ##
+          # @private Create an Updater from a Job and options hash.
+          #
+          # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
+          #   configuration object for setting query options.
+          def self.from_job_and_options request, options
+            updater = ExtractJob::Updater.new request
             updater.compression = options[:compression]
             updater.delimiter = options[:delimiter]
-            updater.format = dest_format
+            updater.format = options[:format]
             updater.header = options[:header]
             updater.labels = options[:labels] if options[:labels]
+            unless options[:use_avro_logical_types].nil?
+              updater.use_avro_logical_types = options[:use_avro_logical_types]
+            end
             updater
           end
@@ -232,7 +332,7 @@ module Google
           end
           ##
-          # Sets the compression type.
+          # Sets the compression type. Not applicable when extracting models.
           #
           # @param [String] value The compression type to use for exported
           #   files. Possible values include `GZIP` and `NONE`. The default
@@ -244,7 +344,7 @@ module Google
           end
           ##
-          # Sets the field delimiter.
+          # Sets the field delimiter. Not applicable when extracting models.
           #
           # @param [String] value Delimiter to use between fields in the
           #   exported data. Default is <code>,</code>.
@@ -255,25 +355,32 @@ module Google
           end
           ##
-          # Sets the destination file format. The default value is `csv`.
+          # Sets the destination file format. The default value for
+          # tables is `csv`. Tables with nested or repeated fields cannot be
+          # exported as CSV. The default value for models is `ml_tf_saved_model`.
           #
-          # The following values are supported:
+          # Supported values for tables:
           #
           # * `csv` - CSV
           # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
           # * `avro` - [Avro](http://avro.apache.org/)
           #
+          # Supported values for models:
+          #
+          # * `ml_tf_saved_model` - TensorFlow SavedModel
+          # * `ml_xgboost_booster` - XGBoost Booster
+          #
           # @param [String] new_format The new source format.
           #
           # @!group Attributes
           #
           def format= new_format
-            @gapi.configuration.extract.update! \
-              destination_format: Convert.source_format(new_format)
+            @gapi.configuration.extract.update! destination_format: Convert.source_format(new_format)
           end
           ##
-          # Print a header row in the exported file.
+          # Print a header row in the exported file. Not applicable when
+          # extracting models.
           #
           # @param [Boolean] value Whether to print out a header row in the
           #   results. Default is `true`.
@@ -287,12 +394,21 @@ module Google
           # Sets the labels to use for the job.
           #
           # @param [Hash] value A hash of user-provided labels associated with
-          #   the job. You can use these to organize and group your jobs. Label
-          #   keys and values can be no longer than 63 characters, can only
-          #   contain lowercase letters, numeric characters, underscores and
-          #   dashes. International characters are allowed. Label values are
-          #   optional. Label keys must start with a letter and each label in
-          #   the list must have a different key.
+          #   the job. You can use these to organize and group your jobs.
+          #
+          #   The labels applied to a resource must meet the following requirements:
+          #
+          #   * Each resource can have multiple labels, up to a maximum of 64.
+          #   * Each label must be a key-value pair.
+          #   * Keys have a minimum length of 1 character and a maximum length of
+          #     63 characters, and cannot be empty. Values can be empty, and have
+          #     a maximum length of 63 characters.
+          #   * Keys and values can contain only lowercase letters, numeric characters,
+          #     underscores, and dashes. All characters must use UTF-8 encoding, and
+          #     international characters are allowed.
+          #   * The key portion of a label must be unique. However, you can use the
+          #     same key with multiple resources.
+          #   * Keys must start with a lowercase letter or international character.
           #
           # @!group Attributes
           #
@@ -300,6 +416,39 @@ module Google
             @gapi.configuration.update! labels: value
           end
+          ##
+          # Indicate whether to enable extracting applicable column types (such
+          # as `TIMESTAMP`) to their corresponding AVRO logical types
+          # (`timestamp-micros`), instead of only using their raw types
+          # (`avro-long`).
+          #
+          # Only used when `#format` is set to `"AVRO"` (`#avro?`).
+          #
+          # @param [Boolean] value Whether applicable column types will use
+          #   their corresponding AVRO logical types.
+          #
+          # @!group Attributes
+          def use_avro_logical_types= value
+            @gapi.configuration.extract.use_avro_logical_types = value
+          end
+          def cancel
+            raise "not implemented in #{self.class}"
+          end
+          def rerun!
+            raise "not implemented in #{self.class}"
+          end
+          def reload!
+            raise "not implemented in #{self.class}"
+          end
+          alias refresh! reload!
+          def wait_until_done!
+            raise "not implemented in #{self.class}"
+          end
           ##
           # @private Returns the Google API client library version of this job.
           #
@@ -309,6 +458,16 @@ module Google
             @gapi
           end
         end
+        protected
+        def retrieve_model project_id, dataset_id, model_id
+          ensure_service!
+          gapi = service.get_project_model project_id, dataset_id, model_id
+          Model.from_gapi_json gapi, service
+        rescue Google::Cloud::NotFoundError
+          nil
+        end
       end
     end
   end

data/lib/google/cloud/bigquery/insert_response.rb CHANGED Viewed

@@ -99,9 +99,7 @@ module Google
         #   data.
         #
         def error_rows
-          Array(@gapi.insert_errors).map do |ie|
-            @rows[ie.index]
-          end
+          Array(@gapi.insert_errors).map { |ie| @rows[ie.index] }
         end
         ##

data/lib/google/cloud/bigquery/job/list.rb CHANGED Viewed

@@ -71,9 +71,9 @@ module Google
           def next
             return nil unless next?
             ensure_service!
-            options = { all: @hidden, token: token, max: @max, filter: @filter }
-            gapi = @service.list_jobs options
-            self.class.from_gapi gapi, @service, @hidden, @max, @filter
+            next_kwargs = @kwargs.merge token: token
+            next_gapi = @service.list_jobs(**next_kwargs)
+            self.class.from_gapi next_gapi, @service, **next_kwargs
           end
           ##
@@ -121,17 +121,15 @@ module Google
           #     puts job.state
           #   end
           #
-          def all request_limit: nil
+          def all request_limit: nil, &block
             request_limit = request_limit.to_i if request_limit
-            unless block_given?
-              return enum_for :all, request_limit: request_limit
-            end
+            return enum_for :all, request_limit: request_limit unless block_given?
             results = self
             loop do
-              results.each { |r| yield r }
+              results.each(&block)
               if request_limit
                 request_limit -= 1
-                break if request_limit < 0
+                break if request_limit.negative?
               end
               break unless results.next?
               results = results.next
@@ -141,17 +139,12 @@ module Google
           ##
           # @private New Job::List from a Google API Client
           # Google::Apis::BigqueryV2::JobList object.
-          def self.from_gapi gapi_list, service, hidden = nil, max = nil,
-                             filter = nil
-            jobs = List.new(Array(gapi_list.jobs).map do |gapi_object|
-              Job.from_gapi gapi_object, service
-            end)
-            jobs.instance_variable_set :@token,   gapi_list.next_page_token
-            jobs.instance_variable_set :@etag,    gapi_list.etag
-            jobs.instance_variable_set :@service, service
-            jobs.instance_variable_set :@hidden,  hidden
-            jobs.instance_variable_set :@max,     max
-            jobs.instance_variable_set :@filter,  filter
+          def self.from_gapi gapi_list, service, **kwargs
+            jobs = List.new(Array(gapi_list.jobs).map { |gapi_object| Job.from_gapi gapi_object, service })
+            jobs.instance_variable_set :@token,    gapi_list.next_page_token
+            jobs.instance_variable_set :@etag,     gapi_list.etag
+            jobs.instance_variable_set :@service,  service
+            jobs.instance_variable_set :@kwargs,   kwargs
             jobs
           end