RubyGems - google-cloud-bigquery - Versions diffs - 1.14.0 → 1.42.0 - Mend

google-cloud-bigquery 1.14.0 → 1.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

checksums.yaml +4 -4
data/AUTHENTICATION.md +17 -54
data/CHANGELOG.md +377 -0
data/CONTRIBUTING.md +328 -116
data/LOGGING.md +1 -1
data/OVERVIEW.md +21 -20
data/TROUBLESHOOTING.md +2 -8
data/lib/google/cloud/bigquery/argument.rb +197 -0
data/lib/google/cloud/bigquery/convert.rb +155 -173
data/lib/google/cloud/bigquery/copy_job.rb +74 -26
data/lib/google/cloud/bigquery/credentials.rb +5 -12
data/lib/google/cloud/bigquery/data.rb +109 -18
data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
data/lib/google/cloud/bigquery/dataset.rb +1044 -287
data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
data/lib/google/cloud/bigquery/external.rb +50 -2256
data/lib/google/cloud/bigquery/extract_job.rb +226 -61
data/lib/google/cloud/bigquery/insert_response.rb +1 -3
data/lib/google/cloud/bigquery/job/list.rb +10 -14
data/lib/google/cloud/bigquery/job.rb +289 -14
data/lib/google/cloud/bigquery/load_job.rb +810 -136
data/lib/google/cloud/bigquery/model/list.rb +5 -9
data/lib/google/cloud/bigquery/model.rb +247 -16
data/lib/google/cloud/bigquery/policy.rb +432 -0
data/lib/google/cloud/bigquery/project/list.rb +6 -11
data/lib/google/cloud/bigquery/project.rb +509 -250
data/lib/google/cloud/bigquery/query_job.rb +594 -128
data/lib/google/cloud/bigquery/routine/list.rb +165 -0
data/lib/google/cloud/bigquery/routine.rb +1227 -0
data/lib/google/cloud/bigquery/schema/field.rb +413 -63
data/lib/google/cloud/bigquery/schema.rb +221 -48
data/lib/google/cloud/bigquery/service.rb +204 -112
data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
data/lib/google/cloud/bigquery/table/list.rb +6 -11
data/lib/google/cloud/bigquery/table.rb +1470 -377
data/lib/google/cloud/bigquery/time.rb +6 -0
data/lib/google/cloud/bigquery/version.rb +1 -1
data/lib/google/cloud/bigquery.rb +4 -6
data/lib/google-cloud-bigquery.rb +14 -13
metadata +66 -38

data/lib/google/cloud/bigquery/model/list.rb CHANGED Viewed

@@ -124,17 +124,15 @@ module Google
           #     puts model.model_id
           #   end
           #
-          def all request_limit: nil
+          def all request_limit: nil, &block
             request_limit = request_limit.to_i if request_limit
-            unless block_given?
-              return enum_for :all, request_limit: request_limit
-            end
+            return enum_for :all, request_limit: request_limit unless block_given?
             results = self
             loop do
-              results.each { |r| yield r }
+              results.each(&block)
               if request_limit
                 request_limit -= 1
-                break if request_limit < 0
+                break if request_limit.negative?
               end
               break unless results.next?
               results = results.next
@@ -144,9 +142,7 @@ module Google
           ##
           # @private New Model::List from a response object.
           def self.from_gapi gapi_list, service, dataset_id = nil, max = nil
-            models = List.new(Array(gapi_list[:models]).map do |gapi_json|
-              Model.from_gapi_json gapi_json, service
-            end)
+            models = List.new(Array(gapi_list[:models]).map { |gapi_json| Model.from_gapi_json gapi_json, service })
             models.instance_variable_set :@token,      gapi_list[:nextPageToken]
             models.instance_variable_set :@service,    service
             models.instance_variable_set :@dataset_id, dataset_id

data/lib/google/cloud/bigquery/model.rb CHANGED Viewed

@@ -87,8 +87,8 @@ module Google
         ##
         # A unique ID for this model.
         #
-        # @return [String] The ID must contain only letters (a-z, A-Z), numbers
-        #   (0-9), or underscores (_). The maximum length is 1,024 characters.
+        # @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
+        #   (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
         #
         # @!group Attributes
         #
@@ -100,8 +100,8 @@ module Google
         ##
         # The ID of the `Dataset` containing this model.
         #
-        # @return [String] The ID must contain only letters (a-z, A-Z), numbers
-        #   (0-9), or underscores (_). The maximum length is 1,024 characters.
+        # @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
+        #   (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
         #
         # @!group Attributes
         #
@@ -341,14 +341,19 @@ module Google
         # the update to comply with ETag-based optimistic concurrency control.
         #
         # @param [Hash<String, String>] new_labels A hash containing key/value
-        #   pairs.
-        #
-        #   * Label keys and values can be no longer than 63 characters.
-        #   * Label keys and values can contain only lowercase letters, numbers,
-        #     underscores, hyphens, and international characters.
-        #   * Label keys and values cannot exceed 128 bytes in size.
-        #   * Label keys must begin with a letter.
-        #   * Label keys must be unique within a model.
+        #   pairs. The labels applied to a resource must meet the following requirements:
+        #
+        #   * Each resource can have multiple labels, up to a maximum of 64.
+        #   * Each label must be a key-value pair.
+        #   * Keys have a minimum length of 1 character and a maximum length of
+        #     63 characters, and cannot be empty. Values can be empty, and have
+        #     a maximum length of 63 characters.
+        #   * Keys and values can contain only lowercase letters, numeric characters,
+        #     underscores, and dashes. All characters must use UTF-8 encoding, and
+        #     international characters are allowed.
+        #   * The key portion of a label must be unique. However, you can use the
+        #     same key with multiple resources.
+        #   * Keys must start with a lowercase letter or international character.
         #
         # @example
         #   require "google/cloud/bigquery"
@@ -366,6 +371,79 @@ module Google
           patch_gapi! labels: new_labels
         end
+        ##
+        # The {EncryptionConfiguration} object that represents the custom
+        # encryption method used to protect this model. If not set,
+        # {Dataset#default_encryption} is used.
+        #
+        # Present only if this model is using custom encryption.
+        #
+        # @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
+        #   Protecting Data with Cloud KMS Keys
+        #
+        # @return [EncryptionConfiguration, nil] The encryption configuration.
+        #
+        #   @!group Attributes
+        #
+        # @example
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #   dataset = bigquery.dataset "my_dataset"
+        #   model = dataset.model "my_model"
+        #
+        #   encrypt_config = model.encryption
+        #
+        # @!group Attributes
+        #
+        def encryption
+          return nil if reference?
+          return nil if @gapi_json[:encryptionConfiguration].nil?
+          # We have to create a gapic object from the hash because that is what
+          # EncryptionConfiguration is expecing.
+          json_cmek = @gapi_json[:encryptionConfiguration].to_json
+          gapi_cmek = Google::Apis::BigqueryV2::EncryptionConfiguration.from_json json_cmek
+          EncryptionConfiguration.from_gapi(gapi_cmek).freeze
+        end
+        ##
+        # Set the {EncryptionConfiguration} object that represents the custom
+        # encryption method used to protect this model. If not set,
+        # {Dataset#default_encryption} is used.
+        #
+        # Present only if this model is using custom encryption.
+        #
+        # If the model is not a full resource representation (see
+        # {#resource_full?}), the full representation will be retrieved before
+        # the update to comply with ETag-based optimistic concurrency control.
+        #
+        # @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
+        #   Protecting Data with Cloud KMS Keys
+        #
+        # @param [EncryptionConfiguration] value The new encryption config.
+        #
+        # @example
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #   dataset = bigquery.dataset "my_dataset"
+        #   model = dataset.model "my_model"
+        #
+        #   key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
+        #   encrypt_config = bigquery.encryption kms_key: key_name
+        #
+        #   model.encryption = encrypt_config
+        #
+        # @!group Attributes
+        #
+        def encryption= value
+          ensure_full_data!
+          # We have to create a hash from the gapic object's JSON because that
+          # is what Model is expecing.
+          json_cmek = JSON.parse value.to_gapi.to_json, symbolize_names: true
+          patch_gapi! encryptionConfiguration: json_cmek
+        end
         ##
         # The input feature columns that were used to train this model.
         #
@@ -376,7 +454,8 @@ module Google
         def feature_columns
           ensure_full_data!
           Array(@gapi_json[:featureColumns]).map do |field_gapi_json|
-            StandardSql::Field.from_gapi_json field_gapi_json
+            field_gapi = Google::Apis::BigqueryV2::StandardSqlField.from_json field_gapi_json.to_json
+            StandardSql::Field.from_gapi field_gapi
           end
         end
@@ -391,7 +470,8 @@ module Google
         def label_columns
           ensure_full_data!
           Array(@gapi_json[:labelColumns]).map do |field_gapi_json|
-            StandardSql::Field.from_gapi_json field_gapi_json
+            field_gapi = Google::Apis::BigqueryV2::StandardSqlField.from_json field_gapi_json.to_json
+            StandardSql::Field.from_gapi field_gapi
           end
         end
@@ -407,6 +487,146 @@ module Google
           Array @gapi_json[:trainingRuns]
         end
+        ##
+        # Exports the model to Google Cloud Storage asynchronously, immediately
+        # returning an {ExtractJob} that can be used to track the progress of the
+        # export job. The caller may poll the service by repeatedly calling
+        # {Job#reload!} and {Job#done?} to detect when the job is done, or
+        # simply block until the job is done by calling #{Job#wait_until_done!}.
+        # See also {#extract}.
+        #
+        # The geographic location for the job ("US", "EU", etc.) can be set via
+        # {ExtractJob::Updater#location=} in a block passed to this method. If
+        # the model is a full resource representation (see {#resource_full?}),
+        # the location of the job will automatically be set to the location of
+        # the model.
+        #
+        # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
+        #   Exporting models
+        #
+        # @param [String] extract_url The Google Storage URI to which BigQuery
+        #   should extract the model. This value should be end in an object name
+        #   prefix, since multiple objects will be exported.
+        # @param [String] format The exported file format. The default value is
+        #   `ml_tf_saved_model`.
+        #
+        #   The following values are supported:
+        #
+        #   * `ml_tf_saved_model` - TensorFlow SavedModel
+        #   * `ml_xgboost_booster` - XGBoost Booster
+        # @param [String] job_id A user-defined ID for the extract job. The ID
+        #   must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
+        #   (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
+        #   `job_id` is provided, then `prefix` will not be used.
+        #
+        #   See [Generating a job
+        #   ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
+        # @param [String] prefix A string, usually human-readable, that will be
+        #   prepended to a generated value to produce a unique job ID. For
+        #   example, the prefix `daily_import_job_` can be given to generate a
+        #   job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
+        #   prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
+        #   underscores (`_`), or dashes (`-`). The maximum length of the entire ID
+        #   is 1,024 characters. If `job_id` is provided, then `prefix` will not
+        #   be used.
+        # @param [Hash] labels A hash of user-provided labels associated with
+        #   the job. You can use these to organize and group your jobs.
+        #
+        #   The labels applied to a resource must meet the following requirements:
+        #
+        #   * Each resource can have multiple labels, up to a maximum of 64.
+        #   * Each label must be a key-value pair.
+        #   * Keys have a minimum length of 1 character and a maximum length of
+        #     63 characters, and cannot be empty. Values can be empty, and have
+        #     a maximum length of 63 characters.
+        #   * Keys and values can contain only lowercase letters, numeric characters,
+        #     underscores, and dashes. All characters must use UTF-8 encoding, and
+        #     international characters are allowed.
+        #   * The key portion of a label must be unique. However, you can use the
+        #     same key with multiple resources.
+        #   * Keys must start with a lowercase letter or international character.
+        #
+        # @yield [job] a job configuration object
+        # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
+        #   configuration object for setting additional options.
+        #
+        # @return [Google::Cloud::Bigquery::ExtractJob]
+        #
+        # @example
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #   dataset = bigquery.dataset "my_dataset"
+        #   model = dataset.model "my_model"
+        #
+        #   extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
+        #
+        #   extract_job.wait_until_done!
+        #   extract_job.done? #=> true
+        #
+        # @!group Data
+        #
+        def extract_job extract_url, format: nil, job_id: nil, prefix: nil, labels: nil
+          ensure_service!
+          options = { format: format, job_id: job_id, prefix: prefix, labels: labels }
+          updater = ExtractJob::Updater.from_options service, model_ref, extract_url, options
+          updater.location = location if location # may be model reference
+          yield updater if block_given?
+          job_gapi = updater.to_gapi
+          gapi = service.extract_table job_gapi
+          Job.from_gapi gapi, service
+        end
+        ##
+        # Exports the model to Google Cloud Storage using a synchronous method
+        # that blocks for a response. Timeouts and transient errors are generally
+        # handled as needed to complete the job. See also {#extract_job}.
+        #
+        # The geographic location for the job ("US", "EU", etc.) can be set via
+        # {ExtractJob::Updater#location=} in a block passed to this method. If
+        # the model is a full resource representation (see {#resource_full?}),
+        # the location of the job will automatically be set to the location of
+        # the model.
+        #
+        # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
+        #   Exporting models
+        #
+        # @param [String] extract_url The Google Storage URI to which BigQuery
+        #   should extract the model. This value should be end in an object name
+        #   prefix, since multiple objects will be exported.
+        # @param [String] format The exported file format. The default value is
+        #   `ml_tf_saved_model`.
+        #
+        #   The following values are supported:
+        #
+        #   * `ml_tf_saved_model` - TensorFlow SavedModel
+        #   * `ml_xgboost_booster` - XGBoost Booster
+        # @yield [job] a job configuration object
+        # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
+        #   configuration object for setting additional options.
+        #
+        # @return [Boolean] Returns `true` if the extract operation succeeded.
+        #
+        # @example
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #   dataset = bigquery.dataset "my_dataset"
+        #   model = dataset.model "my_model"
+        #
+        #   model.extract "gs://my-bucket/#{model.model_id}"
+        #
+        # @!group Data
+        #
+        def extract extract_url, format: nil, &block
+          job = extract_job extract_url, format: format, &block
+          job.wait_until_done!
+          ensure_job_succeeded! job
+          true
+        end
         ##
         # Permanently deletes the model.
         #
@@ -481,7 +701,7 @@ module Google
         #   model = dataset.model "my_model", skip_lookup: true
         #   model.exists? #=> true
         #
-        def exists? force: nil
+        def exists? force: false
           return resource_exists? if force
           # If we have a value, return it
           return @exists unless @exists.nil?
@@ -595,7 +815,7 @@ module Google
         end
         ##
-        # @private New lazy Model object without making an HTTP request.
+        # @private New lazy Model object without making an HTTP request, for use with the skip_lookup option.
         def self.new_reference project_id, dataset_id, model_id, service
           raise ArgumentError, "project_id is required" unless project_id
           raise ArgumentError, "dataset_id is required" unless dataset_id
@@ -659,6 +879,17 @@ module Google
         def ensure_full_data!
           reload! unless resource_full?
         end
+        def ensure_job_succeeded! job
+          return unless job.failed?
+          begin
+            # raise to activate ruby exception cause handling
+            raise job.gapi_error
+          rescue StandardError => e
+            # wrap Google::Apis::Error with Google::Cloud::Error
+            raise Google::Cloud::Error.from_error(e)
+          end
+        end
       end
     end
   end