RubyGems - google-cloud-bigquery - Versions diffs - 1.21.2 - Mend

google-cloud-bigquery 1.21.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

checksums.yaml +7 -0
data/.yardopts +16 -0
data/AUTHENTICATION.md +158 -0
data/CHANGELOG.md +397 -0
data/CODE_OF_CONDUCT.md +40 -0
data/CONTRIBUTING.md +188 -0
data/LICENSE +201 -0
data/LOGGING.md +27 -0
data/OVERVIEW.md +463 -0
data/TROUBLESHOOTING.md +31 -0
data/lib/google-cloud-bigquery.rb +139 -0
data/lib/google/cloud/bigquery.rb +145 -0
data/lib/google/cloud/bigquery/argument.rb +197 -0
data/lib/google/cloud/bigquery/convert.rb +383 -0
data/lib/google/cloud/bigquery/copy_job.rb +316 -0
data/lib/google/cloud/bigquery/credentials.rb +50 -0
data/lib/google/cloud/bigquery/data.rb +526 -0
data/lib/google/cloud/bigquery/dataset.rb +2845 -0
data/lib/google/cloud/bigquery/dataset/access.rb +1021 -0
data/lib/google/cloud/bigquery/dataset/list.rb +162 -0
data/lib/google/cloud/bigquery/encryption_configuration.rb +123 -0
data/lib/google/cloud/bigquery/external.rb +2432 -0
data/lib/google/cloud/bigquery/extract_job.rb +368 -0
data/lib/google/cloud/bigquery/insert_response.rb +180 -0
data/lib/google/cloud/bigquery/job.rb +657 -0
data/lib/google/cloud/bigquery/job/list.rb +162 -0
data/lib/google/cloud/bigquery/load_job.rb +1704 -0
data/lib/google/cloud/bigquery/model.rb +740 -0
data/lib/google/cloud/bigquery/model/list.rb +164 -0
data/lib/google/cloud/bigquery/project.rb +1655 -0
data/lib/google/cloud/bigquery/project/list.rb +161 -0
data/lib/google/cloud/bigquery/query_job.rb +1695 -0
data/lib/google/cloud/bigquery/routine.rb +1108 -0
data/lib/google/cloud/bigquery/routine/list.rb +165 -0
data/lib/google/cloud/bigquery/schema.rb +564 -0
data/lib/google/cloud/bigquery/schema/field.rb +668 -0
data/lib/google/cloud/bigquery/service.rb +589 -0
data/lib/google/cloud/bigquery/standard_sql.rb +495 -0
data/lib/google/cloud/bigquery/table.rb +3340 -0
data/lib/google/cloud/bigquery/table/async_inserter.rb +520 -0
data/lib/google/cloud/bigquery/table/list.rb +172 -0
data/lib/google/cloud/bigquery/time.rb +65 -0
data/lib/google/cloud/bigquery/version.rb +22 -0
metadata +297 -0

data/lib/google/cloud/bigquery/extract_job.rb ADDED

@@ -0,0 +1,368 @@
+# Copyright 2015 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+module Google
+  module Cloud
+    module Bigquery
+      ##
+      # # ExtractJob
+      #
+      # A {Job} subclass representing an export operation that may be performed
+      # on a {Table}. A ExtractJob instance is created when you call
+      # {Table#extract_job}.
+      #
+      # @see https://cloud.google.com/bigquery/docs/exporting-data
+      #   Exporting Data From BigQuery
+      # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
+      #   reference
+      #
+      # @example
+      #   require "google/cloud/bigquery"
+      #
+      #   bigquery = Google::Cloud::Bigquery.new
+      #   dataset = bigquery.dataset "my_dataset"
+      #   table = dataset.table "my_table"
+      #
+      #   extract_job = table.extract_job "gs://my-bucket/file-name.json",
+      #                                   format: "json"
+      #   extract_job.wait_until_done!
+      #   extract_job.done? #=> true
+      #
+      class ExtractJob < Job
+        ##
+        # The URI or URIs representing the Google Cloud Storage files to which
+        # the data is exported.
+        def destinations
+          Array @gapi.configuration.extract.destination_uris
+        end
+        ##
+        # The table from which the data is exported. This is the table upon
+        # which {Table#extract_job} was called.
+        #
+        # @return [Table] A table instance.
+        #
+        def source
+          table = @gapi.configuration.extract.source_table
+          return nil unless table
+          retrieve_table table.project_id, table.dataset_id, table.table_id
+        end
+        ##
+        # Checks if the export operation compresses the data using gzip. The
+        # default is `false`.
+        #
+        # @return [Boolean] `true` when `GZIP`, `false` otherwise.
+        #
+        def compression?
+          val = @gapi.configuration.extract.compression
+          val == "GZIP"
+        end
+        ##
+        # Checks if the destination format for the data is [newline-delimited
+        # JSON](http://jsonlines.org/). The default is `false`.
+        #
+        # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
+        #   otherwise.
+        #
+        def json?
+          val = @gapi.configuration.extract.destination_format
+          val == "NEWLINE_DELIMITED_JSON"
+        end
+        ##
+        # Checks if the destination format for the data is CSV. Tables with
+        # nested or repeated fields cannot be exported as CSV. The default is
+        # `true`.
+        #
+        # @return [Boolean] `true` when `CSV`, `false` otherwise.
+        #
+        def csv?
+          val = @gapi.configuration.extract.destination_format
+          return true if val.nil?
+          val == "CSV"
+        end
+        ##
+        # Checks if the destination format for the data is
+        # [Avro](http://avro.apache.org/). The default is `false`.
+        #
+        # @return [Boolean] `true` when `AVRO`, `false` otherwise.
+        #
+        def avro?
+          val = @gapi.configuration.extract.destination_format
+          val == "AVRO"
+        end
+        ##
+        # The character or symbol the operation uses to delimit fields in the
+        # exported data. The default is a comma (,).
+        #
+        # @return [String] A string containing the character, such as `","`.
+        #
+        def delimiter
+          val = @gapi.configuration.extract.field_delimiter
+          val = "," if val.nil?
+          val
+        end
+        ##
+        # Checks if the exported data contains a header row. The default is
+        # `true`.
+        #
+        # @return [Boolean] `true` when the print header configuration is
+        #   present or `nil`, `false` otherwise.
+        #
+        def print_header?
+          val = @gapi.configuration.extract.print_header
+          val = true if val.nil?
+          val
+        end
+        ##
+        # The number of files per destination URI or URI pattern specified in
+        # {#destinations}.
+        #
+        # @return [Array<Integer>] An array of values in the same order as the
+        #   URI patterns.
+        #
+        def destinations_file_counts
+          Array @gapi.statistics.extract.destination_uri_file_counts
+        end
+        ##
+        # A hash containing the URI or URI pattern specified in
+        # {#destinations} mapped to the counts of files per destination.
+        #
+        # @return [Hash<String, Integer>] A Hash with the URI patterns as keys
+        #   and the counts as values.
+        #
+        def destinations_counts
+          Hash[destinations.zip destinations_file_counts]
+        end
+        ##
+        # If `#avro?` (`#format` is set to `"AVRO"`), this flag indicates
+        # whether to enable extracting applicable column types (such as
+        # `TIMESTAMP`) to their corresponding AVRO logical types
+        # (`timestamp-micros`), instead of only using their raw types
+        # (`avro-long`).
+        #
+        # @return [Boolean] `true` when applicable column types will use their
+        #   corresponding AVRO logical types, `false` otherwise.
+        #
+        def use_avro_logical_types?
+          @gapi.configuration.extract.use_avro_logical_types
+        end
+        ##
+        # Yielded to a block to accumulate changes for an API request.
+        class Updater < ExtractJob
+          ##
+          # @private Create an Updater object.
+          def initialize gapi
+            @gapi = gapi
+          end
+          ##
+          # @private Create an Updater from an options hash.
+          #
+          # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
+          #   configuration object for setting query options.
+          def self.from_options service, table, storage_files, options
+            job_ref = service.job_ref_from options[:job_id], options[:prefix]
+            storage_urls = Array(storage_files).map do |url|
+              url.respond_to?(:to_gs_url) ? url.to_gs_url : url
+            end
+            options[:format] ||= Convert.derive_source_format storage_urls.first
+            job = Google::Apis::BigqueryV2::Job.new(
+              job_reference: job_ref,
+              configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
+                extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
+                  destination_uris: Array(storage_urls),
+                  source_table:     table
+                ),
+                dry_run: options[:dryrun]
+              )
+            )
+            from_job_and_options job, options
+          end
+          ##
+          # @private Create an Updater from a Job and options hash.
+          #
+          # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
+          #   configuration object for setting query options.
+          def self.from_job_and_options request, options
+            updater = ExtractJob::Updater.new request
+            updater.compression = options[:compression]
+            updater.delimiter = options[:delimiter]
+            updater.format = options[:format]
+            updater.header = options[:header]
+            updater.labels = options[:labels] if options[:labels]
+            unless options[:use_avro_logical_types].nil?
+              updater.use_avro_logical_types = options[:use_avro_logical_types]
+            end
+            updater
+          end
+          ##
+          # Sets the geographic location where the job should run. Required
+          # except for US and EU.
+          #
+          # @param [String] value A geographic location, such as "US", "EU" or
+          #   "asia-northeast1". Required except for US and EU.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #   dataset = bigquery.dataset "my_dataset"
+          #   table = dataset.table "my_table"
+          #
+          #   destination = "gs://my-bucket/file-name.csv"
+          #   extract_job = table.extract_job destination do |j|
+          #     j.location = "EU"
+          #   end
+          #
+          #   extract_job.wait_until_done!
+          #   extract_job.done? #=> true
+          #
+          # @!group Attributes
+          def location= value
+            @gapi.job_reference.location = value
+            return unless value.nil?
+            # Treat assigning value of nil the same as unsetting the value.
+            unset = @gapi.job_reference.instance_variables.include? :@location
+            @gapi.job_reference.remove_instance_variable :@location if unset
+          end
+          ##
+          # Sets the compression type.
+          #
+          # @param [String] value The compression type to use for exported
+          #   files. Possible values include `GZIP` and `NONE`. The default
+          #   value is `NONE`.
+          #
+          # @!group Attributes
+          def compression= value
+            @gapi.configuration.extract.compression = value
+          end
+          ##
+          # Sets the field delimiter.
+          #
+          # @param [String] value Delimiter to use between fields in the
+          #   exported data. Default is <code>,</code>.
+          #
+          # @!group Attributes
+          def delimiter= value
+            @gapi.configuration.extract.field_delimiter = value
+          end
+          ##
+          # Sets the destination file format. The default value is `csv`.
+          #
+          # The following values are supported:
+          #
+          # * `csv` - CSV
+          # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
+          # * `avro` - [Avro](http://avro.apache.org/)
+          #
+          # @param [String] new_format The new source format.
+          #
+          # @!group Attributes
+          #
+          def format= new_format
+            @gapi.configuration.extract.update! destination_format: Convert.source_format(new_format)
+          end
+          ##
+          # Print a header row in the exported file.
+          #
+          # @param [Boolean] value Whether to print out a header row in the
+          #   results. Default is `true`.
+          #
+          # @!group Attributes
+          def header= value
+            @gapi.configuration.extract.print_header = value
+          end
+          ##
+          # Sets the labels to use for the job.
+          #
+          # @param [Hash] value A hash of user-provided labels associated with
+          #   the job. You can use these to organize and group your jobs. Label
+          #   keys and values can be no longer than 63 characters, can only
+          #   contain lowercase letters, numeric characters, underscores and
+          #   dashes. International characters are allowed. Label values are
+          #   optional. Label keys must start with a letter and each label in
+          #   the list must have a different key.
+          #
+          # @!group Attributes
+          #
+          def labels= value
+            @gapi.configuration.update! labels: value
+          end
+          ##
+          # Indicate whether to enable extracting applicable column types (such
+          # as `TIMESTAMP`) to their corresponding AVRO logical types
+          # (`timestamp-micros`), instead of only using their raw types
+          # (`avro-long`).
+          #
+          # Only used when `#format` is set to `"AVRO"` (`#avro?`).
+          #
+          # @param [Boolean] value Whether applicable column types will use
+          #   their corresponding AVRO logical types.
+          #
+          # @!group Attributes
+          def use_avro_logical_types= value
+            @gapi.configuration.extract.use_avro_logical_types = value
+          end
+          def cancel
+            raise "not implemented in #{self.class}"
+          end
+          def rerun!
+            raise "not implemented in #{self.class}"
+          end
+          def reload!
+            raise "not implemented in #{self.class}"
+          end
+          alias refresh! reload!
+          def wait_until_done!
+            raise "not implemented in #{self.class}"
+          end
+          ##
+          # @private Returns the Google API client library version of this job.
+          #
+          # @return [<Google::Apis::BigqueryV2::Job>] (See
+          #   {Google::Apis::BigqueryV2::Job})
+          def to_gapi
+            @gapi
+          end
+        end
+      end
+    end
+  end
+end

data/lib/google/cloud/bigquery/insert_response.rb ADDED

@@ -0,0 +1,180 @@
+# Copyright 2015 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+require "json"
+module Google
+  module Cloud
+    module Bigquery
+      ##
+      # InsertResponse
+      #
+      # Represents the response from BigQuery when data is inserted into a table
+      # for near-immediate querying, without the need to complete a load
+      # operation before the data can appear in query results. See
+      # {Dataset#insert} and {Table#insert}.
+      #
+      # @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
+      #   Streaming Data Into BigQuery
+      #
+      # @example
+      #   require "google/cloud/bigquery"
+      #
+      #   bigquery = Google::Cloud::Bigquery.new
+      #   dataset = bigquery.dataset "my_dataset"
+      #
+      #   rows = [
+      #     { "first_name" => "Alice", "age" => 21 },
+      #     { "first_name" => "Bob", "age" => 22 }
+      #   ]
+      #
+      #   insert_response = dataset.insert "my_table", rows
+      #
+      class InsertResponse
+        # @private
+        def initialize rows, gapi
+          @rows = rows
+          @gapi = gapi
+        end
+        ##
+        # Checks if the error count is zero, meaning that all of the rows were
+        # inserted. Use {#insert_errors} to access the errors.
+        #
+        # @return [Boolean] `true` when the error count is zero, `false`
+        #   otherwise.
+        #
+        def success?
+          error_count.zero?
+        end
+        ##
+        # The count of rows in the response, minus the count of errors for rows
+        # that were not inserted.
+        #
+        # @return [Integer] The number of rows inserted.
+        #
+        def insert_count
+          @rows.count - error_count
+        end
+        ##
+        # The count of errors for rows that were not inserted.
+        #
+        # @return [Integer] The number of errors.
+        #
+        def error_count
+          Array(@gapi.insert_errors).count
+        end
+        ##
+        # The error objects for rows that were not inserted.
+        #
+        # @return [Array<InsertError>] An array containing error objects.
+        #
+        def insert_errors
+          Array(@gapi.insert_errors).map do |ie|
+            row = @rows[ie.index]
+            errors = ie.errors.map { |e| JSON.parse e.to_json }
+            InsertError.new ie.index, row, errors
+          end
+        end
+        ##
+        # The rows that were not inserted.
+        #
+        # @return [Array<Hash>] An array of hash objects containing the row
+        #   data.
+        #
+        def error_rows
+          Array(@gapi.insert_errors).map { |ie| @rows[ie.index] }
+        end
+        ##
+        # Returns the error object for a row that was not inserted.
+        #
+        # @param [Hash] row A hash containing the data for a row.
+        #
+        # @return [InsertError, nil] An error object, or `nil` if no error is
+        #   found in the response for the row.
+        #
+        def insert_error_for row
+          insert_errors.detect { |e| e.row == row }
+        end
+        ##
+        # Returns the error hashes for a row that was not inserted. Each error
+        # hash contains the following keys: `reason`, `location`, `debugInfo`,
+        # and `message`.
+        #
+        # @param [Hash] row A hash containing the data for a row.
+        #
+        # @return [Array<Hash>, nil] An array of error hashes, or `nil` if no
+        #   errors are found in the response for the row.
+        #
+        def errors_for row
+          ie = insert_error_for row
+          return ie.errors if ie
+          []
+        end
+        ##
+        # Returns the index for a row that was not inserted.
+        #
+        # @param [Hash] row A hash containing the data for a row.
+        #
+        # @return [Integer, nil] An error object, or `nil` if no error is
+        #   found in the response for the row.
+        #
+        def index_for row
+          ie = insert_error_for row
+          return ie.index if ie
+          nil
+        end
+        # @private New InsertResponse from the inserted rows and a
+        # Google::Apis::BigqueryV2::InsertAllTableDataResponse object.
+        def self.from_gapi rows, gapi
+          new rows, gapi
+        end
+        ##
+        # InsertError
+        #
+        # Represents the errors for a row that was not inserted.
+        #
+        # @attr_reader [Integer] index The index of the row that error applies
+        #   to.
+        # @attr_reader [Hash] row The row that error applies to.
+        # @attr_reader [Hash] errors Error information for the row indicated by
+        #   the index property, with the following keys: `reason`, `location`,
+        #   `debugInfo`, and `message`.
+        #
+        class InsertError
+          attr_reader :index
+          attr_reader :row
+          attr_reader :errors
+          # @private
+          def initialize index, row, errors
+            @index = index
+            @row = row
+            @errors = errors
+          end
+        end
+      end
+    end
+  end
+end