RubyGems - google-cloud-bigquery - Versions diffs - 1.12.0 → 1.38.1 - Mend

google-cloud-bigquery 1.12.0 → 1.38.1

Files changed (50) hide show

checksums.yaml +4 -4
data/AUTHENTICATION.md +9 -28
data/CHANGELOG.md +372 -1
data/CONTRIBUTING.md +328 -116
data/LOGGING.md +2 -2
data/OVERVIEW.md +21 -20
data/TROUBLESHOOTING.md +2 -8
data/lib/google/cloud/bigquery/argument.rb +197 -0
data/lib/google/cloud/bigquery/convert.rb +154 -170
data/lib/google/cloud/bigquery/copy_job.rb +40 -23
data/lib/google/cloud/bigquery/credentials.rb +5 -12
data/lib/google/cloud/bigquery/data.rb +109 -18
data/lib/google/cloud/bigquery/dataset/access.rb +322 -51
data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
data/lib/google/cloud/bigquery/dataset.rb +960 -279
data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
data/lib/google/cloud/bigquery/external.rb +50 -2256
data/lib/google/cloud/bigquery/extract_job.rb +217 -58
data/lib/google/cloud/bigquery/insert_response.rb +1 -3
data/lib/google/cloud/bigquery/job/list.rb +13 -20
data/lib/google/cloud/bigquery/job.rb +286 -11
data/lib/google/cloud/bigquery/load_job.rb +801 -133
data/lib/google/cloud/bigquery/model/list.rb +5 -9
data/lib/google/cloud/bigquery/model.rb +247 -16
data/lib/google/cloud/bigquery/policy.rb +432 -0
data/lib/google/cloud/bigquery/project/list.rb +6 -11
data/lib/google/cloud/bigquery/project.rb +526 -243
data/lib/google/cloud/bigquery/query_job.rb +584 -125
data/lib/google/cloud/bigquery/routine/list.rb +165 -0
data/lib/google/cloud/bigquery/routine.rb +1227 -0
data/lib/google/cloud/bigquery/schema/field.rb +413 -63
data/lib/google/cloud/bigquery/schema.rb +221 -48
data/lib/google/cloud/bigquery/service.rb +186 -109
data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -42
data/lib/google/cloud/bigquery/table/list.rb +6 -11
data/lib/google/cloud/bigquery/table.rb +1188 -326
data/lib/google/cloud/bigquery/time.rb +6 -0
data/lib/google/cloud/bigquery/version.rb +1 -1
data/lib/google/cloud/bigquery.rb +18 -8
data/lib/google-cloud-bigquery.rb +15 -13
metadata +67 -40

data/lib/google/cloud/bigquery/model/list.rb CHANGED Viewed

@@ -124,17 +124,15 @@ module Google
           #     puts model.model_id
           #   end
           #
-          def all request_limit: nil
+          def all request_limit: nil, &block
             request_limit = request_limit.to_i if request_limit
-            unless block_given?
-              return enum_for :all, request_limit: request_limit
-            end
+            return enum_for :all, request_limit: request_limit unless block_given?
             results = self
             loop do
-              results.each { |r| yield r }
+              results.each(&block)
               if request_limit
                 request_limit -= 1
-                break if request_limit < 0
+                break if request_limit.negative?
               end
               break unless results.next?
               results = results.next
@@ -144,9 +142,7 @@ module Google
           ##
           # @private New Model::List from a response object.
           def self.from_gapi gapi_list, service, dataset_id = nil, max = nil
-            models = List.new(Array(gapi_list[:models]).map do |gapi_json|
-              Model.from_gapi_json gapi_json, service
-            end)
+            models = List.new(Array(gapi_list[:models]).map { |gapi_json| Model.from_gapi_json gapi_json, service })
             models.instance_variable_set :@token,      gapi_list[:nextPageToken]
             models.instance_variable_set :@service,    service
             models.instance_variable_set :@dataset_id, dataset_id

data/lib/google/cloud/bigquery/model.rb CHANGED Viewed

@@ -87,8 +87,8 @@ module Google
         ##
         # A unique ID for this model.
         #
-        # @return [String] The ID must contain only letters (a-z, A-Z), numbers
-        #   (0-9), or underscores (_). The maximum length is 1,024 characters.
+        # @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
+        #   (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
         #
         # @!group Attributes
         #
@@ -100,8 +100,8 @@ module Google
         ##
         # The ID of the `Dataset` containing this model.
         #
-        # @return [String] The ID must contain only letters (a-z, A-Z), numbers
-        #   (0-9), or underscores (_). The maximum length is 1,024 characters.
+        # @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
+        #   (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
         #
         # @!group Attributes
         #
@@ -341,14 +341,19 @@ module Google
         # the update to comply with ETag-based optimistic concurrency control.
         #
         # @param [Hash<String, String>] new_labels A hash containing key/value
-        #   pairs.
-        #
-        #   * Label keys and values can be no longer than 63 characters.
-        #   * Label keys and values can contain only lowercase letters, numbers,
-        #     underscores, hyphens, and international characters.
-        #   * Label keys and values cannot exceed 128 bytes in size.
-        #   * Label keys must begin with a letter.
-        #   * Label keys must be unique within a model.
+        #   pairs. The labels applied to a resource must meet the following requirements:
+        #
+        #   * Each resource can have multiple labels, up to a maximum of 64.
+        #   * Each label must be a key-value pair.
+        #   * Keys have a minimum length of 1 character and a maximum length of
+        #     63 characters, and cannot be empty. Values can be empty, and have
+        #     a maximum length of 63 characters.
+        #   * Keys and values can contain only lowercase letters, numeric characters,
+        #     underscores, and dashes. All characters must use UTF-8 encoding, and
+        #     international characters are allowed.
+        #   * The key portion of a label must be unique. However, you can use the
+        #     same key with multiple resources.
+        #   * Keys must start with a lowercase letter or international character.
         #
         # @example
         #   require "google/cloud/bigquery"
@@ -366,6 +371,79 @@ module Google
           patch_gapi! labels: new_labels
         end
+        ##
+        # The {EncryptionConfiguration} object that represents the custom
+        # encryption method used to protect this model. If not set,
+        # {Dataset#default_encryption} is used.
+        #
+        # Present only if this model is using custom encryption.
+        #
+        # @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
+        #   Protecting Data with Cloud KMS Keys
+        #
+        # @return [EncryptionConfiguration, nil] The encryption configuration.
+        #
+        #   @!group Attributes
+        #
+        # @example
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #   dataset = bigquery.dataset "my_dataset"
+        #   model = dataset.model "my_model"
+        #
+        #   encrypt_config = model.encryption
+        #
+        # @!group Attributes
+        #
+        def encryption
+          return nil if reference?
+          return nil if @gapi_json[:encryptionConfiguration].nil?
+          # We have to create a gapic object from the hash because that is what
+          # EncryptionConfiguration is expecing.
+          json_cmek = @gapi_json[:encryptionConfiguration].to_json
+          gapi_cmek = Google::Apis::BigqueryV2::EncryptionConfiguration.from_json json_cmek
+          EncryptionConfiguration.from_gapi(gapi_cmek).freeze
+        end
+        ##
+        # Set the {EncryptionConfiguration} object that represents the custom
+        # encryption method used to protect this model. If not set,
+        # {Dataset#default_encryption} is used.
+        #
+        # Present only if this model is using custom encryption.
+        #
+        # If the model is not a full resource representation (see
+        # {#resource_full?}), the full representation will be retrieved before
+        # the update to comply with ETag-based optimistic concurrency control.
+        #
+        # @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
+        #   Protecting Data with Cloud KMS Keys
+        #
+        # @param [EncryptionConfiguration] value The new encryption config.
+        #
+        # @example
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #   dataset = bigquery.dataset "my_dataset"
+        #   model = dataset.model "my_model"
+        #
+        #   key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
+        #   encrypt_config = bigquery.encryption kms_key: key_name
+        #
+        #   model.encryption = encrypt_config
+        #
+        # @!group Attributes
+        #
+        def encryption= value
+          ensure_full_data!
+          # We have to create a hash from the gapic object's JSON because that
+          # is what Model is expecing.
+          json_cmek = JSON.parse value.to_gapi.to_json, symbolize_names: true
+          patch_gapi! encryptionConfiguration: json_cmek
+        end
         ##
         # The input feature columns that were used to train this model.
         #
@@ -376,7 +454,8 @@ module Google
         def feature_columns
           ensure_full_data!
           Array(@gapi_json[:featureColumns]).map do |field_gapi_json|
-            StandardSql::Field.from_gapi_json field_gapi_json
+            field_gapi = Google::Apis::BigqueryV2::StandardSqlField.from_json field_gapi_json.to_json
+            StandardSql::Field.from_gapi field_gapi
           end
         end
@@ -391,7 +470,8 @@ module Google
         def label_columns
           ensure_full_data!
           Array(@gapi_json[:labelColumns]).map do |field_gapi_json|
-            StandardSql::Field.from_gapi_json field_gapi_json
+            field_gapi = Google::Apis::BigqueryV2::StandardSqlField.from_json field_gapi_json.to_json
+            StandardSql::Field.from_gapi field_gapi
           end
         end
@@ -407,6 +487,146 @@ module Google
           Array @gapi_json[:trainingRuns]
         end
+        ##
+        # Exports the model to Google Cloud Storage asynchronously, immediately
+        # returning an {ExtractJob} that can be used to track the progress of the
+        # export job. The caller may poll the service by repeatedly calling
+        # {Job#reload!} and {Job#done?} to detect when the job is done, or
+        # simply block until the job is done by calling #{Job#wait_until_done!}.
+        # See also {#extract}.
+        #
+        # The geographic location for the job ("US", "EU", etc.) can be set via
+        # {ExtractJob::Updater#location=} in a block passed to this method. If
+        # the model is a full resource representation (see {#resource_full?}),
+        # the location of the job will automatically be set to the location of
+        # the model.
+        #
+        # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
+        #   Exporting models
+        #
+        # @param [String] extract_url The Google Storage URI to which BigQuery
+        #   should extract the model. This value should be end in an object name
+        #   prefix, since multiple objects will be exported.
+        # @param [String] format The exported file format. The default value is
+        #   `ml_tf_saved_model`.
+        #
+        #   The following values are supported:
+        #
+        #   * `ml_tf_saved_model` - TensorFlow SavedModel
+        #   * `ml_xgboost_booster` - XGBoost Booster
+        # @param [String] job_id A user-defined ID for the extract job. The ID
+        #   must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
+        #   (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
+        #   `job_id` is provided, then `prefix` will not be used.
+        #
+        #   See [Generating a job
+        #   ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
+        # @param [String] prefix A string, usually human-readable, that will be
+        #   prepended to a generated value to produce a unique job ID. For
+        #   example, the prefix `daily_import_job_` can be given to generate a
+        #   job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
+        #   prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
+        #   underscores (`_`), or dashes (`-`). The maximum length of the entire ID
+        #   is 1,024 characters. If `job_id` is provided, then `prefix` will not
+        #   be used.
+        # @param [Hash] labels A hash of user-provided labels associated with
+        #   the job. You can use these to organize and group your jobs.
+        #
+        #   The labels applied to a resource must meet the following requirements:
+        #
+        #   * Each resource can have multiple labels, up to a maximum of 64.
+        #   * Each label must be a key-value pair.
+        #   * Keys have a minimum length of 1 character and a maximum length of
+        #     63 characters, and cannot be empty. Values can be empty, and have
+        #     a maximum length of 63 characters.
+        #   * Keys and values can contain only lowercase letters, numeric characters,
+        #     underscores, and dashes. All characters must use UTF-8 encoding, and
+        #     international characters are allowed.
+        #   * The key portion of a label must be unique. However, you can use the
+        #     same key with multiple resources.
+        #   * Keys must start with a lowercase letter or international character.
+        #
+        # @yield [job] a job configuration object
+        # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
+        #   configuration object for setting additional options.
+        #
+        # @return [Google::Cloud::Bigquery::ExtractJob]
+        #
+        # @example
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #   dataset = bigquery.dataset "my_dataset"
+        #   model = dataset.model "my_model"
+        #
+        #   extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
+        #
+        #   extract_job.wait_until_done!
+        #   extract_job.done? #=> true
+        #
+        # @!group Data
+        #
+        def extract_job extract_url, format: nil, job_id: nil, prefix: nil, labels: nil
+          ensure_service!
+          options = { format: format, job_id: job_id, prefix: prefix, labels: labels }
+          updater = ExtractJob::Updater.from_options service, model_ref, extract_url, options
+          updater.location = location if location # may be model reference
+          yield updater if block_given?
+          job_gapi = updater.to_gapi
+          gapi = service.extract_table job_gapi
+          Job.from_gapi gapi, service
+        end
+        ##
+        # Exports the model to Google Cloud Storage using a synchronous method
+        # that blocks for a response. Timeouts and transient errors are generally
+        # handled as needed to complete the job. See also {#extract_job}.
+        #
+        # The geographic location for the job ("US", "EU", etc.) can be set via
+        # {ExtractJob::Updater#location=} in a block passed to this method. If
+        # the model is a full resource representation (see {#resource_full?}),
+        # the location of the job will automatically be set to the location of
+        # the model.
+        #
+        # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
+        #   Exporting models
+        #
+        # @param [String] extract_url The Google Storage URI to which BigQuery
+        #   should extract the model. This value should be end in an object name
+        #   prefix, since multiple objects will be exported.
+        # @param [String] format The exported file format. The default value is
+        #   `ml_tf_saved_model`.
+        #
+        #   The following values are supported:
+        #
+        #   * `ml_tf_saved_model` - TensorFlow SavedModel
+        #   * `ml_xgboost_booster` - XGBoost Booster
+        # @yield [job] a job configuration object
+        # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
+        #   configuration object for setting additional options.
+        #
+        # @return [Boolean] Returns `true` if the extract operation succeeded.
+        #
+        # @example
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #   dataset = bigquery.dataset "my_dataset"
+        #   model = dataset.model "my_model"
+        #
+        #   model.extract "gs://my-bucket/#{model.model_id}"
+        #
+        # @!group Data
+        #
+        def extract extract_url, format: nil, &block
+          job = extract_job extract_url, format: format, &block
+          job.wait_until_done!
+          ensure_job_succeeded! job
+          true
+        end
         ##
         # Permanently deletes the model.
         #
@@ -481,7 +701,7 @@ module Google
         #   model = dataset.model "my_model", skip_lookup: true
         #   model.exists? #=> true
         #
-        def exists? force: nil
+        def exists? force: false
           return resource_exists? if force
           # If we have a value, return it
           return @exists unless @exists.nil?
@@ -595,7 +815,7 @@ module Google
         end
         ##
-        # @private New lazy Model object without making an HTTP request.
+        # @private New lazy Model object without making an HTTP request, for use with the skip_lookup option.
         def self.new_reference project_id, dataset_id, model_id, service
           raise ArgumentError, "project_id is required" unless project_id
           raise ArgumentError, "dataset_id is required" unless dataset_id
@@ -659,6 +879,17 @@ module Google
         def ensure_full_data!
           reload! unless resource_full?
         end
+        def ensure_job_succeeded! job
+          return unless job.failed?
+          begin
+            # raise to activate ruby exception cause handling
+            raise job.gapi_error
+          rescue StandardError => e
+            # wrap Google::Apis::Error with Google::Cloud::Error
+            raise Google::Cloud::Error.from_error(e)
+          end
+        end
       end
     end
   end