RubyGems - google-cloud-bigquery - Versions diffs - 0.28.0 → 0.29.0 - Mend

google-cloud-bigquery 0.28.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +4 -4
data/README.md +1 -1
data/lib/google-cloud-bigquery.rb +2 -2
data/lib/google/cloud/bigquery.rb +10 -12
data/lib/google/cloud/bigquery/copy_job.rb +42 -6
data/lib/google/cloud/bigquery/data.rb +129 -23
data/lib/google/cloud/bigquery/dataset.rb +708 -66
data/lib/google/cloud/bigquery/dataset/access.rb +533 -27
data/lib/google/cloud/bigquery/dataset/list.rb +5 -3
data/lib/google/cloud/bigquery/external.rb +2353 -0
data/lib/google/cloud/bigquery/extract_job.rb +52 -11
data/lib/google/cloud/bigquery/insert_response.rb +90 -2
data/lib/google/cloud/bigquery/job.rb +160 -21
data/lib/google/cloud/bigquery/load_job.rb +128 -11
data/lib/google/cloud/bigquery/project.rb +187 -44
data/lib/google/cloud/bigquery/query_job.rb +323 -13
data/lib/google/cloud/bigquery/schema.rb +57 -1
data/lib/google/cloud/bigquery/schema/field.rb +118 -17
data/lib/google/cloud/bigquery/service.rb +196 -43
data/lib/google/cloud/bigquery/table.rb +739 -49
data/lib/google/cloud/bigquery/table/async_inserter.rb +280 -0
data/lib/google/cloud/bigquery/version.rb +1 -1
data/lib/google/cloud/bigquery/view.rb +306 -69
metadata +18 -3
data/lib/google/cloud/bigquery/query_data.rb +0 -234

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 41cd0596408cd35451c4db0c52fe0b7901dff7ea
-  data.tar.gz: fb36dac2c6d09940b4335980b808fc6d9564b84d
+  metadata.gz: 58e73b43f4053457d2df061703e3314483552dbb
+  data.tar.gz: 927a49cb45ff1a1c2aac5fa319e19dc0b422af23
 SHA512:
-  metadata.gz: cf8190937417f431a6221c408b911c958c2c83be1b4cce3688a130e2f9f44a5645c68e81f16368a3b3e524e9c5ae87ea095e4b3791bbd8970d93dc5afafb4d32
-  data.tar.gz: 2ad979a3b853e4e2fc6813f5e98279181a39d082b843079752d3d4fd63c1732f37e261584b5b77c3aa0f47f6b3a45386bd1ea5e3ac97cb4acaf35d84d3d83ff7
+  metadata.gz: 8d86f37859a1a6cf2b682afd8b4e3f77b178fb8333e47606673e02e0a5e6ba76e61c6ee3c9bfbd5bd8d4763eff03f9c54bc067c10f39883772a6c730c0d8b443
+  data.tar.gz: d9d5afd3aac9c23e523909d78b7ae5fc886ce783e5b3cad0068aff21a815841a564e9baf6f35b2235adf0ed743df55d41c33b570f9b99a5d5a96ea50768882d0

data/README.md CHANGED

@@ -36,7 +36,7 @@ table = dataset.create_table "todos",
 # Load data into the table
 file = File.open "/archive/todos/completed-todos.csv"
-load_job = table.load file
+table.load file
 # Run a query for the number of completed todos by owner
 count_sql = "SELECT owner, COUNT(*) AS complete_count FROM todos GROUP BY owner"

data/lib/google-cloud-bigquery.rb CHANGED

@@ -39,7 +39,7 @@ module Google
     #
     #   * `https://www.googleapis.com/auth/bigquery`
     # @param [Integer] retries Number of times to retry requests on server
-    #   error. The default value is `3`. Optional.
+    #   error. The default value is `5`. Optional.
     # @param [Integer] timeout Default request timeout in seconds. Optional.
     #
     # @return [Google::Cloud::Bigquery::Project]
@@ -88,7 +88,7 @@ module Google
     #
     #   * `https://www.googleapis.com/auth/bigquery`
     # @param [Integer] retries Number of times to retry requests on server
-    #   error. The default value is `3`. Optional.
+    #   error. The default value is `5`. Optional.
     # @param [Integer] timeout Default timeout to use in requests. Optional.
     #
     # @return [Google::Cloud::Bigquery::Project]

data/lib/google/cloud/bigquery.rb CHANGED

@@ -232,7 +232,7 @@ module Google
     # BigQuery API provides facilities for managing longer-running jobs. With
     # the asynchronous approach to running a query, an instance of
     # {Google::Cloud::Bigquery::QueryJob} is returned, rather than an instance
-    # of {Google::Cloud::Bigquery::QueryData}.
+    # of {Google::Cloud::Bigquery::Data}.
     #
     # ```ruby
     # require "google/cloud/bigquery"
@@ -246,17 +246,17 @@ module Google
     #
     # job.wait_until_done!
     # if !job.failed?
-    #   job.query_results.first
+    #   job.data.first
     #   #=> {:title=>[{:value=>"hamlet", :count=>5318}, ...}
     # end
     # ```
     #
     # Once you have determined that the job is done and has not failed, you can
-    # obtain an instance of {Google::Cloud::Bigquery::QueryData} by calling
-    # `query_results` on the job instance. The query results for both of the
-    # above examples are stored in temporary tables with a lifetime of about 24
-    # hours. See the final example below for a demonstration of how to store
-    # query results in a permanent table.
+    # obtain an instance of {Google::Cloud::Bigquery::Data} by calling `data` on
+    # the job instance. The query results for both of the above examples are
+    # stored in temporary tables with a lifetime of about 24 hours. See the
+    # final example below for a demonstration of how to store query results in a
+    # permanent table.
     #
     # ## Creating Datasets and Tables
     #
@@ -370,7 +370,7 @@ module Google
     # end
     #
     # file = File.open "names/yob2014.txt"
-    # load_job = table.load file, format: "csv"
+    # table.load file, format: "csv"
     # ```
     #
     # Because the names data, although formatted as CSV, is distributed in files
@@ -411,9 +411,7 @@ module Google
     #   bucket = storage.create_bucket bucket_id
     #   extract_url = "gs://#{bucket.id}/baby-names.csv"
     #
-    #   extract_job = result_table.extract extract_url
-    #
-    #   extract_job.wait_until_done!
+    #   result_table.extract extract_url
     #
     #   # Download to local filesystem
     #   bucket.files.first.download "baby-names.csv"
@@ -470,7 +468,7 @@ module Google
       #
       #   * `https://www.googleapis.com/auth/bigquery`
       # @param [Integer] retries Number of times to retry requests on server
-      #   error. The default value is `3`. Optional.
+      #   error. The default value is `5`. Optional.
       # @param [Integer] timeout Default timeout to use in requests. Optional.
       #
       # @return [Google::Cloud::Bigquery::Project]

data/lib/google/cloud/bigquery/copy_job.rb CHANGED

@@ -20,17 +20,33 @@ module Google
       # # CopyJob
       #
       # A {Job} subclass representing a copy operation that may be performed on
-      # a {Table}. A CopyJob instance is created when you call {Table#copy}.
+      # a {Table}. A CopyJob instance is created when you call {Table#copy_job}.
       #
-      # @see https://cloud.google.com/bigquery/docs/tables#copyingtable Copying
+      # @see https://cloud.google.com/bigquery/docs/tables#copy-table Copying
       #   an Existing Table
       # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
       #   reference
       #
+      # @example
+      #   require "google/cloud/bigquery"
+      #
+      #   bigquery = Google::Cloud::Bigquery.new
+      #   dataset = bigquery.dataset "my_dataset"
+      #   table = dataset.table "my_table"
+      #   destination_table = dataset.table "my_destination_table"
+      #
+      #   copy_job = table.copy_job destination_table
+      #
+      #   copy_job.wait_until_done!
+      #   copy_job.done? #=> true
+      #
       class CopyJob < Job
         ##
         # The table from which data is copied. This is the table on
-        # which {Table#copy} was called. Returns a {Table} instance.
+        # which {Table#copy_job} was called.
+        #
+        # @return [Table] A table instance.
+        #
         def source
           table = @gapi.configuration.copy.source_table
           return nil unless table
@@ -40,7 +56,10 @@ module Google
         end
         ##
-        # The table to which data is copied. Returns a {Table} instance.
+        # The table to which data is copied.
+        #
+        # @return [Table] A table instance.
+        #
         def destination
           table = @gapi.configuration.copy.destination_table
           return nil unless table
@@ -52,7 +71,11 @@ module Google
         ##
         # Checks if the create disposition for the job is `CREATE_IF_NEEDED`,
         # which provides the following behavior: If the table does not exist,
-        # the copy operation creates the table. This is the default.
+        # the copy operation creates the table. This is the default create
+        # disposition for copy jobs.
+        #
+        # @return [Boolean] `true` when `CREATE_IF_NEEDED`, `false` otherwise.
+        #
         def create_if_needed?
           disp = @gapi.configuration.copy.create_disposition
           disp == "CREATE_IF_NEEDED"
@@ -62,6 +85,9 @@ module Google
         # Checks if the create disposition for the job is `CREATE_NEVER`, which
         # provides the following behavior: The table must already exist; if it
         # does not, an error is returned in the job result.
+        #
+        # @return [Boolean] `true` when `CREATE_NEVER`, `false` otherwise.
+        #
         def create_never?
           disp = @gapi.configuration.copy.create_disposition
           disp == "CREATE_NEVER"
@@ -71,6 +97,9 @@ module Google
         # Checks if the write disposition for the job is `WRITE_TRUNCATE`, which
         # provides the following behavior: If the table already exists, the copy
         # operation overwrites the table data.
+        #
+        # @return [Boolean] `true` when `WRITE_TRUNCATE`, `false` otherwise.
+        #
         def write_truncate?
           disp = @gapi.configuration.copy.write_disposition
           disp == "WRITE_TRUNCATE"
@@ -80,6 +109,9 @@ module Google
         # Checks if the write disposition for the job is `WRITE_APPEND`, which
         # provides the following behavior: If the table already exists, the copy
         # operation appends the data to the table.
+        #
+        # @return [Boolean] `true` when `WRITE_APPEND`, `false` otherwise.
+        #
         def write_append?
           disp = @gapi.configuration.copy.write_disposition
           disp == "WRITE_APPEND"
@@ -88,7 +120,11 @@ module Google
         ##
         # Checks if the write disposition for the job is `WRITE_EMPTY`, which
         # provides the following behavior: If the table already exists and
-        # contains data, the job will have an error. This is the default.
+        # contains data, the job will have an error. This is the default write
+        # disposition for copy jobs.
+        #
+        # @return [Boolean] `true` when `WRITE_EMPTY`, `false` otherwise.
+        #
         def write_empty?
           disp = @gapi.configuration.copy.write_disposition
           disp == "WRITE_EMPTY"

data/lib/google/cloud/bigquery/data.rb CHANGED

@@ -22,12 +22,31 @@ module Google
       ##
       # # Data
       #
-      # Represents {Table} Data as a list of name/value pairs.
-      # Also contains metadata such as `etag` and `total`.
+      # Represents {Table} Data as a list of name/value pairs (hashes.)
+      # Also contains metadata such as `etag` and `total`, and provides access
+      # to the schema of the table from which the data was read.
+      #
+      # @example
+      #   require "google/cloud/bigquery"
+      #
+      #   bigquery = Google::Cloud::Bigquery.new
+      #   dataset = bigquery.dataset "my_dataset"
+      #   table = dataset.table "my_table"
+      #
+      #   data = table.data
+      #   puts "#{data.count} of #{data.total}"
+      #   if data.next?
+      #     next_data = data.next
+      #   end
+      #
       class Data < DelegateClass(::Array)
+        ##
+        # @private The Service object.
+        attr_accessor :service
         ##
         # @private The {Table} object the data belongs to.
-        attr_accessor :table
+        attr_accessor :table_gapi
         ##
         # @private The Google API Client object.
@@ -35,30 +54,58 @@ module Google
         # @private
         def initialize arr = []
-          @table = nil
-          @gapi = {}
+          @service = nil
+          @table_gapi = nil
+          @gapi = nil
           super arr
         end
         ##
         # The resource type of the API response.
+        #
+        # @return [String] The resource type.
+        #
         def kind
           @gapi.kind
         end
         ##
-        # The etag.
+        # An ETag hash for the page of results represented by the data instance.
+        #
+        # @return [String] The ETag hash.
+        #
         def etag
           @gapi.etag
         end
         ##
-        # A token used for paging results.
+        # A token used for paging results. Used by the data instance to retrieve
+        # subsequent pages. See {#next}.
+        #
+        # @return [String] The pagination token.
+        #
         def token
           @gapi.page_token
         end
+        ##
         # The total number of rows in the complete table.
+        #
+        # @return [Integer] The number of rows.
+        #
+        # @example
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #   dataset = bigquery.dataset "my_dataset"
+        #   table = dataset.table "my_table"
+        #
+        #   data = table.data
+        #   puts "#{data.count} of #{data.total}"
+        #   if data.next?
+        #     next_data = data.next
+        #   end
+        #
         def total
           Integer @gapi.total_rows
         rescue
@@ -66,19 +113,72 @@ module Google
         end
         ##
-        # The schema of the data.
+        # The schema of the table from which the data was read.
+        #
+        # The returned object is frozen and changes are not allowed. Use
+        # {Table#schema} to update the schema.
+        #
+        # @return [Schema] A schema object.
+        #
+        # @example
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #   dataset = bigquery.dataset "my_dataset"
+        #   table = dataset.table "my_table"
+        #
+        #   data = table.data
+        #
+        #   schema = data.schema
+        #   field = schema.field "name"
+        #   field.required? #=> true
+        #
         def schema
-          table.schema
+          Schema.from_gapi(@table_gapi.schema).freeze
         end
         ##
-        # The fields of the data.
+        # The fields of the data, obtained from the schema of the table from
+        # which the data was read.
+        #
+        # @return [Array<Schema::Field>] An array of field objects.
+        #
+        # @example
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #   dataset = bigquery.dataset "my_dataset"
+        #   table = dataset.table "my_table"
+        #
+        #   data = table.data
+        #
+        #   data.fields.each do |field|
+        #     puts field.name
+        #   end
+        #
         def fields
           schema.fields
         end
         ##
-        # The name of the columns in the data.
+        # The names of the columns in the data, obtained from the schema of the
+        # table from which the data was read.
+        #
+        # @return [Array<Symbol>] An array of column names.
+        #
+        # @example
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #   dataset = bigquery.dataset "my_dataset"
+        #   table = dataset.table "my_table"
+        #
+        #   data = table.data
+        #
+        #   data.headers.each do |header|
+        #     puts header
+        #   end
+        #
         def headers
           schema.headers
         end
@@ -86,7 +186,7 @@ module Google
         ##
         # Whether there is a next page of data.
         #
-        # @return [Boolean]
+        # @return [Boolean] `true` when there is a next page, `false` otherwise.
         #
         # @example
         #   require "google/cloud/bigquery"
@@ -105,9 +205,9 @@ module Google
         end
         ##
-        # Retrieve the next page of data.
+        # Retrieves the next page of data.
         #
-        # @return [Data]
+        # @return [Data] A new instance providing the next page of data.
         #
         # @example
         #   require "google/cloud/bigquery"
@@ -123,8 +223,12 @@ module Google
         #
         def next
           return nil unless next?
-          ensure_table!
-          table.data token: token
+          ensure_service!
+          data_gapi = service.list_tabledata \
+            @table_gapi.table_reference.dataset_id,
+            @table_gapi.table_reference.table_id,
+            token: token
+          self.class.from_gapi data_gapi, @table_gapi, @service
         end
         ##
@@ -132,7 +236,7 @@ module Google
         # returns `false`. Calls the given block once for each row, which is
         # passed as the parameter.
         #
-        # An Enumerator is returned if no block is given.
+        # An enumerator is returned if no block is given.
         #
         # This method may make several API calls until all rows are retrieved.
         # Be sure to use as narrow a search criteria as possible. Please use
@@ -143,7 +247,8 @@ module Google
         # @yield [row] The block for accessing each row of data.
         # @yieldparam [Hash] row The row object.
         #
-        # @return [Enumerator]
+        # @return [Enumerator] An enumerator providing access to all of the
+        #   data.
         #
         # @example Iterating each rows by passing a block:
         #   require "google/cloud/bigquery"
@@ -197,13 +302,14 @@ module Google
         ##
         # @private New Data from a response object.
-        def self.from_gapi gapi, table
+        def self.from_gapi gapi, table_gapi, service
           formatted_rows = Convert.format_rows(gapi.rows,
-                                               table.gapi.schema.fields)
+                                               table_gapi.schema.fields)
           data = new formatted_rows
-          data.table = table
+          data.table_gapi = table_gapi
           data.gapi = gapi
+          data.service = service
           data
         end
@@ -211,8 +317,8 @@ module Google
         ##
         # Raise an error unless an active service is available.
-        def ensure_table!
-          fail "Must have active connection" unless table
+        def ensure_service!
+          fail "Must have active connection" unless service
         end
       end
     end