RubyGems - google-cloud-bigquery - Versions diffs - 0.20.0 - Mend

google-cloud-bigquery 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +7 -0
data/lib/google-cloud-bigquery.rb +122 -0
data/lib/google/cloud/bigquery.rb +353 -0
data/lib/google/cloud/bigquery/copy_job.rb +99 -0
data/lib/google/cloud/bigquery/credentials.rb +31 -0
data/lib/google/cloud/bigquery/data.rb +244 -0
data/lib/google/cloud/bigquery/dataset.rb +758 -0
data/lib/google/cloud/bigquery/dataset/access.rb +509 -0
data/lib/google/cloud/bigquery/dataset/list.rb +171 -0
data/lib/google/cloud/bigquery/extract_job.rb +120 -0
data/lib/google/cloud/bigquery/insert_response.rb +83 -0
data/lib/google/cloud/bigquery/job.rb +301 -0
data/lib/google/cloud/bigquery/job/list.rb +174 -0
data/lib/google/cloud/bigquery/load_job.rb +203 -0
data/lib/google/cloud/bigquery/project.rb +481 -0
data/lib/google/cloud/bigquery/query_data.rb +238 -0
data/lib/google/cloud/bigquery/query_job.rb +139 -0
data/lib/google/cloud/bigquery/schema.rb +361 -0
data/lib/google/cloud/bigquery/service.rb +502 -0
data/lib/google/cloud/bigquery/table.rb +1141 -0
data/lib/google/cloud/bigquery/table/list.rb +182 -0
data/lib/google/cloud/bigquery/version.rb +22 -0
data/lib/google/cloud/bigquery/view.rb +478 -0
metadata +208 -0

data/lib/google/cloud/bigquery/dataset/list.rb ADDED Viewed

@@ -0,0 +1,171 @@
+# Copyright 2015 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+require "delegate"
+module Google
+  module Cloud
+    module Bigquery
+      class Dataset
+        ##
+        # Dataset::List is a special case Array with additional values.
+        class List < DelegateClass(::Array)
+          ##
+          # If not empty, indicates that there are more records that match
+          # the request and this value should be passed to continue.
+          attr_accessor :token
+          # A hash of this page of results.
+          attr_accessor :etag
+          ##
+          # @private Create a new Dataset::List with an array of datasets.
+          def initialize arr = []
+            super arr
+          end
+          ##
+          # Whether there is a next page of datasets.
+          #
+          # @return [Boolean]
+          #
+          # @example
+          #   require "google/cloud"
+          #
+          #   gcloud = Google::Cloud.new
+          #   bigquery = gcloud.bigquery
+          #
+          #   datasets = bigquery.datasets
+          #   if datasets.next?
+          #     next_datasets = datasets.next
+          #   end
+          def next?
+            !token.nil?
+          end
+          ##
+          # Retrieve the next page of datasets.
+          #
+          # @return [Dataset::List]
+          #
+          # @example
+          #   require "google/cloud"
+          #
+          #   gcloud = Google::Cloud.new
+          #   bigquery = gcloud.bigquery
+          #
+          #   datasets = bigquery.datasets
+          #   if datasets.next?
+          #     next_datasets = datasets.next
+          #   end
+          def next
+            return nil unless next?
+            ensure_service!
+            options = { all: @hidden, token: token, max: @max }
+            gapi = @service.list_datasets options
+            self.class.from_gapi gapi, @service, @hidden, @max
+          end
+          ##
+          # Retrieves all datasets by repeatedly loading {#next} until {#next?}
+          # returns `false`. Calls the given block once for each dataset, which
+          # is passed as the parameter.
+          #
+          # An Enumerator is returned if no block is given.
+          #
+          # This method may make several API calls until all datasets are
+          # retrieved. Be sure to use as narrow a search criteria as possible.
+          # Please use with caution.
+          #
+          # @param [Integer] request_limit The upper limit of API requests to
+          #   make to load all datasets. Default is no limit.
+          # @yield [dataset] The block for accessing each dataset.
+          # @yieldparam [Dataset] dataset The dataset object.
+          #
+          # @return [Enumerator]
+          #
+          # @example Iterating each result by passing a block:
+          #   require "google/cloud"
+          #
+          #   gcloud = Google::Cloud.new
+          #   bigquery = gcloud.bigquery
+          #
+          #   bigquery.datasets.all do |dataset|
+          #     puts dataset.name
+          #   end
+          #
+          # @example Using the enumerator by not passing a block:
+          #   require "google/cloud"
+          #
+          #   gcloud = Google::Cloud.new
+          #   bigquery = gcloud.bigquery
+          #
+          #   all_names = bigquery.datasets.all.map do |dataset|
+          #     dataset.name
+          #   end
+          #
+          # @example Limit the number of API calls made:
+          #   require "google/cloud"
+          #
+          #   gcloud = Google::Cloud.new
+          #   bigquery = gcloud.bigquery
+          #
+          #   bigquery.datasets.all(request_limit: 10) do |dataset|
+          #     puts dataset.name
+          #   end
+          #
+          def all request_limit: nil
+            request_limit = request_limit.to_i if request_limit
+            unless block_given?
+              return enum_for(:all, request_limit: request_limit)
+            end
+            results = self
+            loop do
+              results.each { |r| yield r }
+              if request_limit
+                request_limit -= 1
+                break if request_limit < 0
+              end
+              break unless results.next?
+              results = results.next
+            end
+          end
+          ##
+          # @private New Dataset::List from a response object.
+          def self.from_gapi gapi_list, service, hidden = nil, max = nil
+            datasets = List.new(Array(gapi_list.datasets).map do |gapi_object|
+              Dataset.from_gapi gapi_object, service
+            end)
+            datasets.instance_variable_set :@token,   gapi_list.next_page_token
+            datasets.instance_variable_set :@etag,    gapi_list.etag
+            datasets.instance_variable_set :@service, service
+            datasets.instance_variable_set :@hidden,  hidden
+            datasets.instance_variable_set :@max,     max
+            datasets
+          end
+          protected
+          ##
+          # Raise an error unless an active service is available.
+          def ensure_service!
+            fail "Must have active connection" unless @service
+          end
+        end
+      end
+    end
+  end
+end

data/lib/google/cloud/bigquery/extract_job.rb ADDED Viewed

@@ -0,0 +1,120 @@
+# Copyright 2015 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+module Google
+  module Cloud
+    module Bigquery
+      ##
+      # # ExtractJob
+      #
+      # A {Job} subclass representing an export operation that may be performed
+      # on a {Table}. A ExtractJob instance is created when you call
+      # {Table#extract}.
+      #
+      # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
+      #   Exporting Data From BigQuery
+      # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
+      #   reference
+      #
+      class ExtractJob < Job
+        ##
+        # The URI or URIs representing the Google Cloud Storage files to which
+        # the data is exported.
+        def destinations
+          Array @gapi.configuration.extract.destination_uris
+        end
+        ##
+        # The table from which the data is exported. This is the table upon
+        # which {Table#extract} was called. Returns a {Table} instance.
+        def source
+          table = @gapi.configuration.extract.source_table
+          return nil unless table
+          retrieve_table table.project_id,
+                         table.dataset_id,
+                         table.table_id
+        end
+        ##
+        # Checks if the export operation compresses the data using gzip. The
+        # default is `false`.
+        def compression?
+          val = @gapi.configuration.extract.compression
+          val == "GZIP"
+        end
+        ##
+        # Checks if the destination format for the data is [newline-delimited
+        # JSON](http://jsonlines.org/). The default is `false`.
+        def json?
+          val = @gapi.configuration.extract.destination_format
+          val == "NEWLINE_DELIMITED_JSON"
+        end
+        ##
+        # Checks if the destination format for the data is CSV. Tables with
+        # nested or repeated fields cannot be exported as CSV. The default is
+        # `true`.
+        def csv?
+          val = @gapi.configuration.extract.destination_format
+          return true if val.nil?
+          val == "CSV"
+        end
+        ##
+        # Checks if the destination format for the data is
+        # [Avro](http://avro.apache.org/). The default is `false`.
+        def avro?
+          val = @gapi.configuration.extract.destination_format
+          val == "AVRO"
+        end
+        ##
+        # The symbol the operation uses to delimit fields in the exported data.
+        # The default is a comma (,).
+        def delimiter
+          val = @gapi.configuration.extract.field_delimiter
+          val = "," if val.nil?
+          val
+        end
+        ##
+        # Checks if the exported data contains a header row. The default is
+        # `true`.
+        def print_header?
+          val = @gapi.configuration.extract.print_header
+          val = true if val.nil?
+          val
+        end
+        ##
+        # The count of files per destination URI or URI pattern specified in
+        # {#destinations}. Returns an Array of values in the same order as the
+        # URI patterns.
+        def destinations_file_counts
+          Array @gapi.statistics.extract.destination_uri_file_counts
+        end
+        ##
+        # The count of files per destination URI or URI pattern specified in
+        # {#destinations}. Returns a Hash with the URI patterns as keys and the
+        # counts as values.
+        def destinations_counts
+          Hash[destinations.zip destinations_file_counts]
+        end
+      end
+    end
+  end
+end

data/lib/google/cloud/bigquery/insert_response.rb ADDED Viewed

@@ -0,0 +1,83 @@
+# Copyright 2015 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+require "json"
+module Google
+  module Cloud
+    module Bigquery
+      ##
+      # InsertResponse
+      class InsertResponse
+        # @private
+        def initialize rows, gapi
+          @rows = rows
+          @gapi = gapi
+        end
+        def success?
+          error_count.zero?
+        end
+        def insert_count
+          @rows.count - error_count
+        end
+        def error_count
+          Array(@gapi.insert_errors).count
+        end
+        def insert_errors
+          Array(@gapi.insert_errors).map do |ie|
+            row = @rows[ie.index]
+            errors = ie.errors.map { |e| JSON.parse e.to_json }
+            InsertError.new row, errors
+          end
+        end
+        def error_rows
+          Array(@gapi.insert_errors).map do |ie|
+            @rows[ie.index]
+          end
+        end
+        def errors_for row
+          ie = insert_errors.detect { |e| e.row == row }
+          return ie.errors if ie
+          []
+        end
+        # @private New InsertResponse from the inserted rows and a
+        # Google::Apis::BigqueryV2::InsertAllTableDataResponse object.
+        def self.from_gapi rows, gapi
+          new rows, gapi
+        end
+        ##
+        # InsertError
+        class InsertError
+          attr_reader :row
+          attr_reader :errors
+          # @private
+          def initialize row, errors
+            @row = row
+            @errors = errors
+          end
+        end
+      end
+    end
+  end
+end

data/lib/google/cloud/bigquery/job.rb ADDED Viewed

@@ -0,0 +1,301 @@
+# Copyright 2015 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+require "google/cloud/errors"
+require "google/cloud/bigquery/service"
+require "google/cloud/bigquery/query_data"
+require "google/cloud/bigquery/job/list"
+require "json"
+module Google
+  module Cloud
+    module Bigquery
+      ##
+      # # Job
+      #
+      # Represents a generic Job that may be performed on a {Table}.
+      #
+      # The subclasses of Job represent the specific BigQuery job types:
+      # {CopyJob}, {ExtractJob}, {LoadJob}, and {QueryJob}.
+      #
+      # A job instance is created when you call {Project#query_job},
+      # {Dataset#query_job}, {Table#copy}, {Table#extract}, {Table#load}, or
+      # {View#data}.
+      #
+      # @see https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects
+      #   Managing Jobs, Datasets, and Projects
+      # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
+      #   reference
+      #
+      # @example
+      #   require "google/cloud"
+      #
+      #   gcloud = Google::Cloud.new
+      #   bigquery = gcloud.bigquery
+      #
+      #   q = "SELECT COUNT(word) as count FROM publicdata:samples.shakespeare"
+      #   job = bigquery.query_job q
+      #
+      #   job.wait_until_done!
+      #
+      #   if job.failed?
+      #     puts job.error
+      #   else
+      #     puts job.query_results.first
+      #   end
+      #
+      class Job
+        ##
+        # @private The Service object.
+        attr_accessor :service
+        ##
+        # @private The Google API Client object.
+        attr_accessor :gapi
+        ##
+        # @private Create an empty Job object.
+        def initialize
+          @service = nil
+          @gapi = {}
+        end
+        ##
+        # The ID of the job.
+        def job_id
+          @gapi.job_reference.job_id
+        end
+        ##
+        # The ID of the project containing the job.
+        def project_id
+          @gapi.job_reference.project_id
+        end
+        ##
+        # The current state of the job. The possible values are `PENDING`,
+        # `RUNNING`, and `DONE`. A `DONE` state does not mean that the job
+        # completed successfully. Use {#failed?} to discover if an error
+        # occurred or if the job was successful.
+        def state
+          return nil if @gapi.status.nil?
+          @gapi.status.state
+        end
+        ##
+        # Checks if the job's state is `RUNNING`.
+        def running?
+          return false if state.nil?
+          "running".casecmp(state).zero?
+        end
+        ##
+        # Checks if the job's state is `PENDING`.
+        def pending?
+          return false if state.nil?
+          "pending".casecmp(state).zero?
+        end
+        ##
+        # Checks if the job's state is `DONE`. When `true`, the job has stopped
+        # running. However, a `DONE` state does not mean that the job completed
+        # successfully.  Use {#failed?} to detect if an error occurred or if the
+        # job was successful.
+        def done?
+          return false if state.nil?
+          "done".casecmp(state).zero?
+        end
+        ##
+        # Checks if an error is present.
+        def failed?
+          !error.nil?
+        end
+        ##
+        # The time when the job was created.
+        def created_at
+          Time.at(Integer(@gapi.statistics.creation_time) / 1000.0)
+        rescue
+          nil
+        end
+        ##
+        # The time when the job was started.
+        # This field is present after the job's state changes from `PENDING`
+        # to either `RUNNING` or `DONE`.
+        def started_at
+          Time.at(Integer(@gapi.statistics.start_time) / 1000.0)
+        rescue
+          nil
+        end
+        ##
+        # The time when the job ended.
+        # This field is present when the job's state is `DONE`.
+        def ended_at
+          Time.at(Integer(@gapi.statistics.end_time) / 1000.0)
+        rescue
+          nil
+        end
+        ##
+        # The configuration for the job. Returns a hash.
+        #
+        # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
+        #   reference
+        def configuration
+          JSON.parse @gapi.configuration.to_json
+        end
+        alias_method :config, :configuration
+        ##
+        # The statistics for the job. Returns a hash.
+        #
+        # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
+        #   reference
+        def statistics
+          JSON.parse @gapi.statistics.to_json
+        end
+        alias_method :stats, :statistics
+        ##
+        # The job's status. Returns a hash. The values contained in the hash are
+        # also exposed by {#state}, {#error}, and {#errors}.
+        def status
+          JSON.parse @gapi.status.to_json
+        end
+        ##
+        # The last error for the job, if any errors have occurred. Returns a
+        # hash.
+        #
+        # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
+        #   reference
+        #
+        # @return [Hash] Returns a hash containing `reason` and `message` keys:
+        #
+        #   {
+        #     "reason"=>"notFound",
+        #     "message"=>"Not found: Table publicdata:samples.BAD_ID"
+        #   }
+        #
+        def error
+          return nil if @gapi.status.nil?
+          return nil if @gapi.status.error_result.nil?
+          JSON.parse @gapi.status.error_result.to_json
+        end
+        ##
+        # The errors for the job, if any errors have occurred. Returns an array
+        # of hash objects. See {#error}.
+        def errors
+          return [] if @gapi.status.nil?
+          Array(@gapi.status.errors).map { |e| JSON.parse e.to_json }
+        end
+        ##
+        # Created a new job with the current configuration.
+        def rerun!
+          ensure_service!
+          gapi = service.insert_job @gapi.configuration
+          Job.from_gapi gapi, service
+        end
+        ##
+        # Reloads the job with current data from the BigQuery service.
+        def reload!
+          ensure_service!
+          gapi = service.get_job job_id
+          @gapi = gapi
+        end
+        alias_method :refresh!, :reload!
+        ##
+        # Refreshes the job until the job is `DONE`.
+        # The delay between refreshes will incrementally increase.
+        #
+        # @example
+        #   require "google/cloud"
+        #
+        #   gcloud = Google::Cloud.new
+        #   bigquery = gcloud.bigquery
+        #   dataset = bigquery.dataset "my_dataset"
+        #   table = dataset.table "my_table"
+        #
+        #   extract_job = table.extract "gs://my-bucket/file-name.json",
+        #                               format: "json"
+        #   extract_job.wait_until_done!
+        #   extract_job.done? #=> true
+        def wait_until_done!
+          backoff = ->(retries) { sleep 2 * retries + 5 }
+          retries = 0
+          until done?
+            backoff.call retries
+            retries += 1
+            reload!
+          end
+        end
+        ##
+        # @private New Job from a Google API Client object.
+        def self.from_gapi gapi, conn
+          klass = klass_for gapi
+          klass.new.tap do |f|
+            f.gapi = gapi
+            f.service = conn
+          end
+        end
+        protected
+        ##
+        # Raise an error unless an active connection is available.
+        def ensure_service!
+          fail "Must have active connection" unless service
+        end
+        ##
+        # Get the subclass for a job type
+        def self.klass_for gapi
+          if gapi.configuration.copy
+            return CopyJob
+          elsif gapi.configuration.extract
+            return ExtractJob
+          elsif gapi.configuration.load
+            return LoadJob
+          elsif gapi.configuration.query
+            return QueryJob
+          end
+          Job
+        end
+        def retrieve_table project_id, dataset_id, table_id
+          ensure_service!
+          gapi = service.get_project_table project_id, dataset_id, table_id
+          Table.from_gapi gapi, service
+        rescue Google::Cloud::NotFoundError
+          nil
+        end
+      end
+    end
+  end
+end
+# We need Job to be defined before loading these.
+require "google/cloud/bigquery/copy_job"
+require "google/cloud/bigquery/extract_job"
+require "google/cloud/bigquery/load_job"
+require "google/cloud/bigquery/query_job"