RubyGems - google-cloud-bigquery - Versions diffs - 1.14.0 → 1.42.0 - Mend

google-cloud-bigquery 1.14.0 → 1.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

checksums.yaml +4 -4
data/AUTHENTICATION.md +17 -54
data/CHANGELOG.md +377 -0
data/CONTRIBUTING.md +328 -116
data/LOGGING.md +1 -1
data/OVERVIEW.md +21 -20
data/TROUBLESHOOTING.md +2 -8
data/lib/google/cloud/bigquery/argument.rb +197 -0
data/lib/google/cloud/bigquery/convert.rb +155 -173
data/lib/google/cloud/bigquery/copy_job.rb +74 -26
data/lib/google/cloud/bigquery/credentials.rb +5 -12
data/lib/google/cloud/bigquery/data.rb +109 -18
data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
data/lib/google/cloud/bigquery/dataset.rb +1044 -287
data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
data/lib/google/cloud/bigquery/external.rb +50 -2256
data/lib/google/cloud/bigquery/extract_job.rb +226 -61
data/lib/google/cloud/bigquery/insert_response.rb +1 -3
data/lib/google/cloud/bigquery/job/list.rb +10 -14
data/lib/google/cloud/bigquery/job.rb +289 -14
data/lib/google/cloud/bigquery/load_job.rb +810 -136
data/lib/google/cloud/bigquery/model/list.rb +5 -9
data/lib/google/cloud/bigquery/model.rb +247 -16
data/lib/google/cloud/bigquery/policy.rb +432 -0
data/lib/google/cloud/bigquery/project/list.rb +6 -11
data/lib/google/cloud/bigquery/project.rb +509 -250
data/lib/google/cloud/bigquery/query_job.rb +594 -128
data/lib/google/cloud/bigquery/routine/list.rb +165 -0
data/lib/google/cloud/bigquery/routine.rb +1227 -0
data/lib/google/cloud/bigquery/schema/field.rb +413 -63
data/lib/google/cloud/bigquery/schema.rb +221 -48
data/lib/google/cloud/bigquery/service.rb +204 -112
data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
data/lib/google/cloud/bigquery/table/list.rb +6 -11
data/lib/google/cloud/bigquery/table.rb +1470 -377
data/lib/google/cloud/bigquery/time.rb +6 -0
data/lib/google/cloud/bigquery/version.rb +1 -1
data/lib/google/cloud/bigquery.rb +4 -6
data/lib/google-cloud-bigquery.rb +14 -13
metadata +66 -38

data/lib/google/cloud/bigquery/extract_job.rb CHANGED Viewed

@@ -20,15 +20,17 @@ module Google
       # # ExtractJob
       #
       # A {Job} subclass representing an export operation that may be performed
-      # on a {Table}. A ExtractJob instance is created when you call
-      # {Table#extract_job}.
+      # on a {Table} or {Model}. A ExtractJob instance is returned when you call
+      # {Project#extract_job}, {Table#extract_job} or {Model#extract_job}.
       #
       # @see https://cloud.google.com/bigquery/docs/exporting-data
-      #   Exporting Data From BigQuery
+      #   Exporting table data
+      # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
+      #   Exporting models
       # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
       #   reference
       #
-      # @example
+      # @example Export table data
       #   require "google/cloud/bigquery"
       #
       #   bigquery = Google::Cloud::Bigquery.new
@@ -40,6 +42,18 @@ module Google
       #   extract_job.wait_until_done!
       #   extract_job.done? #=> true
       #
+      # @example Export a model
+      #   require "google/cloud/bigquery"
+      #
+      #   bigquery = Google::Cloud::Bigquery.new
+      #   dataset = bigquery.dataset "my_dataset"
+      #   model = dataset.model "my_model"
+      #
+      #   extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
+      #
+      #   extract_job.wait_until_done!
+      #   extract_job.done? #=> true
+      #
       class ExtractJob < Job
         ##
         # The URI or URIs representing the Google Cloud Storage files to which
@@ -49,73 +63,132 @@ module Google
         end
         ##
-        # The table from which the data is exported. This is the table upon
-        # which {Table#extract_job} was called.
+        # The table or model which is exported.
         #
-        # @return [Table] A table instance.
+        # @param [String] view Specifies the view that determines which table information is returned.
+        #   By default, basic table information and storage statistics (STORAGE_STATS) are returned.
+        #   Accepted values include `:unspecified`, `:basic`, `:storage`, and
+        #   `:full`. For more information, see [BigQuery Classes](@todo: Update the link).
+        #   The default value is the `:unspecified` view type.
         #
-        def source
-          table = @gapi.configuration.extract.source_table
-          return nil unless table
-          retrieve_table table.project_id,
-                         table.dataset_id,
-                         table.table_id
+        # @return [Table, Model, nil] A table or model instance, or `nil`.
+        #
+        def source view: nil
+          if (table = @gapi.configuration.extract.source_table)
+            retrieve_table table.project_id, table.dataset_id, table.table_id, metadata_view: view
+          elsif (model = @gapi.configuration.extract.source_model)
+            retrieve_model model.project_id, model.dataset_id, model.model_id
+          end
         end
         ##
-        # Checks if the export operation compresses the data using gzip. The
-        # default is `false`.
+        # Whether the source of the export job is a table. See {#source}.
         #
-        # @return [Boolean] `true` when `GZIP`, `false` otherwise.
+        # @return [Boolean] `true` when the source is a table, `false`
+        #   otherwise.
         #
-        def compression?
-          val = @gapi.configuration.extract.compression
-          val == "GZIP"
+        def table?
+          !@gapi.configuration.extract.source_table.nil?
         end
         ##
-        # Checks if the destination format for the data is [newline-delimited
-        # JSON](http://jsonlines.org/). The default is `false`.
+        # Whether the source of the export job is a model. See {#source}.
         #
-        # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
+        # @return [Boolean] `true` when the source is a model, `false`
         #   otherwise.
         #
+        def model?
+          !@gapi.configuration.extract.source_model.nil?
+        end
+        ##
+        # Checks if the export operation compresses the data using gzip. The
+        # default is `false`. Not applicable when extracting models.
+        #
+        # @return [Boolean] `true` when `GZIP`, `false` if not `GZIP` or not a
+        #   table extraction.
+        def compression?
+          return false unless table?
+          @gapi.configuration.extract.compression == "GZIP"
+        end
+        ##
+        # Checks if the destination format for the table data is [newline-delimited
+        # JSON](https://jsonlines.org/). The default is `false`. Not applicable when
+        # extracting models.
+        #
+        # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
+        #   `NEWLINE_DELIMITED_JSON` or not a table extraction.
+        #
         def json?
-          val = @gapi.configuration.extract.destination_format
-          val == "NEWLINE_DELIMITED_JSON"
+          return false unless table?
+          @gapi.configuration.extract.destination_format == "NEWLINE_DELIMITED_JSON"
         end
         ##
-        # Checks if the destination format for the data is CSV. Tables with
+        # Checks if the destination format for the table data is CSV. Tables with
         # nested or repeated fields cannot be exported as CSV. The default is
-        # `true`.
+        # `true` for tables. Not applicable when extracting models.
         #
-        # @return [Boolean] `true` when `CSV`, `false` otherwise.
+        # @return [Boolean] `true` when `CSV`, or `false` if not `CSV` or not a
+        #   table extraction.
         #
         def csv?
+          return false unless table?
           val = @gapi.configuration.extract.destination_format
           return true if val.nil?
           val == "CSV"
         end
         ##
-        # Checks if the destination format for the data is
-        # [Avro](http://avro.apache.org/). The default is `false`.
+        # Checks if the destination format for the table data is
+        # [Avro](http://avro.apache.org/). The default is `false`. Not applicable
+        # when extracting models.
         #
-        # @return [Boolean] `true` when `AVRO`, `false` otherwise.
+        # @return [Boolean] `true` when `AVRO`, `false` if not `AVRO` or not a
+        #   table extraction.
         #
         def avro?
+          return false unless table?
+          @gapi.configuration.extract.destination_format == "AVRO"
+        end
+        ##
+        # Checks if the destination format for the model is TensorFlow SavedModel.
+        # The default is `true` for models. Not applicable when extracting tables.
+        #
+        # @return [Boolean] `true` when `ML_TF_SAVED_MODEL`, `false` if not
+        #   `ML_TF_SAVED_MODEL` or not a model extraction.
+        #
+        def ml_tf_saved_model?
+          return false unless model?
           val = @gapi.configuration.extract.destination_format
-          val == "AVRO"
+          return true if val.nil?
+          val == "ML_TF_SAVED_MODEL"
+        end
+        ##
+        # Checks if the destination format for the model is XGBoost. The default
+        # is `false`. Not applicable when extracting tables.
+        #
+        # @return [Boolean] `true` when `ML_XGBOOST_BOOSTER`, `false` if not
+        #   `ML_XGBOOST_BOOSTER` or not a model extraction.
+        #
+        def ml_xgboost_booster?
+          return false unless model?
+          @gapi.configuration.extract.destination_format == "ML_XGBOOST_BOOSTER"
         end
         ##
         # The character or symbol the operation uses to delimit fields in the
-        # exported data. The default is a comma (,).
+        # exported data. The default is a comma (,) for tables. Not applicable
+        # when extracting models.
         #
-        # @return [String] A string containing the character, such as `","`.
+        # @return [String, nil] A string containing the character, such as `","`,
+        #   `nil` if not a table extraction.
         #
         def delimiter
+          return unless table?
           val = @gapi.configuration.extract.field_delimiter
           val = "," if val.nil?
           val
@@ -123,12 +196,13 @@ module Google
         ##
         # Checks if the exported data contains a header row. The default is
-        # `true`.
+        # `true` for tables. Not applicable when extracting models.
         #
         # @return [Boolean] `true` when the print header configuration is
-        #   present or `nil`, `false` otherwise.
+        #   present or `nil`, `false` if disabled or not a table extraction.
         #
         def print_header?
+          return false unless table?
           val = @gapi.configuration.extract.print_header
           val = true if val.nil?
           val
@@ -153,7 +227,23 @@ module Google
         #   and the counts as values.
         #
         def destinations_counts
-          Hash[destinations.zip destinations_file_counts]
+          destinations.zip(destinations_file_counts).to_h
+        end
+        ##
+        # If `#avro?` (`#format` is set to `"AVRO"`), this flag indicates
+        # whether to enable extracting applicable column types (such as
+        # `TIMESTAMP`) to their corresponding AVRO logical types
+        # (`timestamp-micros`), instead of only using their raw types
+        # (`avro-long`). Not applicable when extracting models.
+        #
+        # @return [Boolean] `true` when applicable column types will use their
+        #   corresponding AVRO logical types, `false` if not enabled or not a
+        #   table extraction.
+        #
+        def use_avro_logical_types?
+          return false unless table?
+          @gapi.configuration.extract.use_avro_logical_types
         end
         ##
@@ -162,6 +252,7 @@ module Google
           ##
           # @private Create an Updater object.
           def initialize gapi
+            super()
             @gapi = gapi
           end
@@ -170,32 +261,47 @@ module Google
           #
           # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
           #   configuration object for setting query options.
-          def self.from_options service, table, storage_files, options = {}
+          def self.from_options service, source, storage_files, options
             job_ref = service.job_ref_from options[:job_id], options[:prefix]
             storage_urls = Array(storage_files).map do |url|
               url.respond_to?(:to_gs_url) ? url.to_gs_url : url
             end
-            dest_format = options[:format]
-            if dest_format.nil?
-              dest_format = Convert.derive_source_format storage_urls.first
+            options[:format] ||= Convert.derive_source_format storage_urls.first
+            extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
+              destination_uris: Array(storage_urls)
+            )
+            case source
+            when Google::Apis::BigqueryV2::TableReference
+              extract_config.source_table = source
+            when Google::Apis::BigqueryV2::ModelReference
+              extract_config.source_model = source
             end
-            req = Google::Apis::BigqueryV2::Job.new(
+            job = Google::Apis::BigqueryV2::Job.new(
               job_reference: job_ref,
               configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
-                extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
-                  destination_uris: Array(storage_urls),
-                  source_table:     table
-                ),
+                extract: extract_config,
                 dry_run: options[:dryrun]
               )
             )
-            updater = ExtractJob::Updater.new req
+            from_job_and_options job, options
+          end
+          ##
+          # @private Create an Updater from a Job and options hash.
+          #
+          # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
+          #   configuration object for setting query options.
+          def self.from_job_and_options request, options
+            updater = ExtractJob::Updater.new request
             updater.compression = options[:compression]
             updater.delimiter = options[:delimiter]
-            updater.format = dest_format
+            updater.format = options[:format]
             updater.header = options[:header]
             updater.labels = options[:labels] if options[:labels]
+            unless options[:use_avro_logical_types].nil?
+              updater.use_avro_logical_types = options[:use_avro_logical_types]
+            end
             updater
           end
@@ -232,7 +338,7 @@ module Google
           end
           ##
-          # Sets the compression type.
+          # Sets the compression type. Not applicable when extracting models.
           #
           # @param [String] value The compression type to use for exported
           #   files. Possible values include `GZIP` and `NONE`. The default
@@ -244,7 +350,7 @@ module Google
           end
           ##
-          # Sets the field delimiter.
+          # Sets the field delimiter. Not applicable when extracting models.
           #
           # @param [String] value Delimiter to use between fields in the
           #   exported data. Default is <code>,</code>.
@@ -255,25 +361,32 @@ module Google
           end
           ##
-          # Sets the destination file format. The default value is `csv`.
+          # Sets the destination file format. The default value for
+          # tables is `csv`. Tables with nested or repeated fields cannot be
+          # exported as CSV. The default value for models is `ml_tf_saved_model`.
           #
-          # The following values are supported:
+          # Supported values for tables:
           #
           # * `csv` - CSV
-          # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
+          # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
           # * `avro` - [Avro](http://avro.apache.org/)
           #
+          # Supported values for models:
+          #
+          # * `ml_tf_saved_model` - TensorFlow SavedModel
+          # * `ml_xgboost_booster` - XGBoost Booster
+          #
           # @param [String] new_format The new source format.
           #
           # @!group Attributes
           #
           def format= new_format
-            @gapi.configuration.extract.update! \
-              destination_format: Convert.source_format(new_format)
+            @gapi.configuration.extract.update! destination_format: Convert.source_format(new_format)
           end
           ##
-          # Print a header row in the exported file.
+          # Print a header row in the exported file. Not applicable when
+          # extracting models.
           #
           # @param [Boolean] value Whether to print out a header row in the
           #   results. Default is `true`.
@@ -287,12 +400,21 @@ module Google
           # Sets the labels to use for the job.
           #
           # @param [Hash] value A hash of user-provided labels associated with
-          #   the job. You can use these to organize and group your jobs. Label
-          #   keys and values can be no longer than 63 characters, can only
-          #   contain lowercase letters, numeric characters, underscores and
-          #   dashes. International characters are allowed. Label values are
-          #   optional. Label keys must start with a letter and each label in
-          #   the list must have a different key.
+          #   the job. You can use these to organize and group your jobs.
+          #
+          #   The labels applied to a resource must meet the following requirements:
+          #
+          #   * Each resource can have multiple labels, up to a maximum of 64.
+          #   * Each label must be a key-value pair.
+          #   * Keys have a minimum length of 1 character and a maximum length of
+          #     63 characters, and cannot be empty. Values can be empty, and have
+          #     a maximum length of 63 characters.
+          #   * Keys and values can contain only lowercase letters, numeric characters,
+          #     underscores, and dashes. All characters must use UTF-8 encoding, and
+          #     international characters are allowed.
+          #   * The key portion of a label must be unique. However, you can use the
+          #     same key with multiple resources.
+          #   * Keys must start with a lowercase letter or international character.
           #
           # @!group Attributes
           #
@@ -300,6 +422,39 @@ module Google
             @gapi.configuration.update! labels: value
           end
+          ##
+          # Indicate whether to enable extracting applicable column types (such
+          # as `TIMESTAMP`) to their corresponding AVRO logical types
+          # (`timestamp-micros`), instead of only using their raw types
+          # (`avro-long`).
+          #
+          # Only used when `#format` is set to `"AVRO"` (`#avro?`).
+          #
+          # @param [Boolean] value Whether applicable column types will use
+          #   their corresponding AVRO logical types.
+          #
+          # @!group Attributes
+          def use_avro_logical_types= value
+            @gapi.configuration.extract.use_avro_logical_types = value
+          end
+          def cancel
+            raise "not implemented in #{self.class}"
+          end
+          def rerun!
+            raise "not implemented in #{self.class}"
+          end
+          def reload!
+            raise "not implemented in #{self.class}"
+          end
+          alias refresh! reload!
+          def wait_until_done!
+            raise "not implemented in #{self.class}"
+          end
           ##
           # @private Returns the Google API client library version of this job.
           #
@@ -309,6 +464,16 @@ module Google
             @gapi
           end
         end
+        protected
+        def retrieve_model project_id, dataset_id, model_id
+          ensure_service!
+          gapi = service.get_project_model project_id, dataset_id, model_id
+          Model.from_gapi_json gapi, service
+        rescue Google::Cloud::NotFoundError
+          nil
+        end
       end
     end
   end

data/lib/google/cloud/bigquery/insert_response.rb CHANGED Viewed

@@ -99,9 +99,7 @@ module Google
         #   data.
         #
         def error_rows
-          Array(@gapi.insert_errors).map do |ie|
-            @rows[ie.index]
-          end
+          Array(@gapi.insert_errors).map { |ie| @rows[ie.index] }
         end
         ##

data/lib/google/cloud/bigquery/job/list.rb CHANGED Viewed

@@ -71,9 +71,9 @@ module Google
           def next
             return nil unless next?
             ensure_service!
-            next_options = @options.merge token: token
-            next_gapi = @service.list_jobs next_options
-            self.class.from_gapi next_gapi, @service, next_options
+            next_kwargs = @kwargs.merge token: token
+            next_gapi = @service.list_jobs(**next_kwargs)
+            self.class.from_gapi next_gapi, @service, **next_kwargs
           end
           ##
@@ -121,17 +121,15 @@ module Google
           #     puts job.state
           #   end
           #
-          def all request_limit: nil
+          def all request_limit: nil, &block
             request_limit = request_limit.to_i if request_limit
-            unless block_given?
-              return enum_for :all, request_limit: request_limit
-            end
+            return enum_for :all, request_limit: request_limit unless block_given?
             results = self
             loop do
-              results.each { |r| yield r }
+              results.each(&block)
               if request_limit
                 request_limit -= 1
-                break if request_limit < 0
+                break if request_limit.negative?
               end
               break unless results.next?
               results = results.next
@@ -141,14 +139,12 @@ module Google
           ##
           # @private New Job::List from a Google API Client
           # Google::Apis::BigqueryV2::JobList object.
-          def self.from_gapi gapi_list, service, options = {}
-            jobs = List.new(Array(gapi_list.jobs).map do |gapi_object|
-              Job.from_gapi gapi_object, service
-            end)
+          def self.from_gapi gapi_list, service, **kwargs
+            jobs = List.new(Array(gapi_list.jobs).map { |gapi_object| Job.from_gapi gapi_object, service })
             jobs.instance_variable_set :@token,    gapi_list.next_page_token
             jobs.instance_variable_set :@etag,     gapi_list.etag
             jobs.instance_variable_set :@service,  service
-            jobs.instance_variable_set :@options,  options
+            jobs.instance_variable_set :@kwargs,   kwargs
             jobs
           end