RubyGems - veritable - Versions diffs - 0.1.3.31 → 0.1.4.33 - Mend

veritable 0.1.3.31 → 0.1.4.33

Files changed (4) hide show

data/CHANGELOG.txt CHANGED Viewed

@@ -1,3 +1,8 @@
+veritable-ruby 0.1.4 - July 16, 2012
+    * analysis.batch_predict returns an Enumerator over Prediction objects instead of a list
+    * analysis.similar_to returns an Enumerator over row entries instead of a list
+    * table.batch_upload_rows and table.batch_delete_rows accept Enumerators as arguments
 veritable-ruby 0.1.3 - July 12, 2012
     * Support for similar API call to retrieve rows similar to a target row
     * Implemented count-batching for predictions

data/lib/veritable/api.rb CHANGED Viewed

@@ -1,11 +1,12 @@
+require 'backports/1.9.1'
 require 'veritable/cursor'
 require 'veritable/datatypes'
 require 'veritable/errors'
 require 'veritable/resource'
 require 'veritable/util'
-module Veritable
+module Veritable
   # Represents the resources available to a user of the Veritable API.
   #
   # Users should not initialize directly; use Veritable.connect as the entry point.
@@ -232,7 +233,7 @@ module Veritable
     # Batch uploads multiple rows to the table
     #
     # ==== Arguments
-    # * +rows+ -- an Array of Hashes, each of which represents a row of the table. Each row must contain the key <tt>"_id"</tt>, whose value must be a String containing only alphanumeric characters, underscores, and hyphens, and must be unique in the table.
+    # * +rows+ -- an Enumerator over row data Hashes, each of which represents a row of the table. Each row must contain the key <tt>"_id"</tt>, whose value must be a String containing only alphanumeric characters, underscores, and hyphens, and must be unique in the table.
     # * +per_page+ -- optionally controls the number of rows to upload in each batch. Defaults to +100+.
     #
     # ==== Returns
@@ -255,7 +256,7 @@ module Veritable
     # Batch deletes a list of rows from the table
     #
     # ==== Arguments
-    # * +rows+ -- an Array of Hashes, each of which represents a row of the table. Each row must contain the key <tt>"_id"</tt>, whose value must be a String containing only alphanumeric characters, underscores, and hyphens, and must be unique in the table. Any other keys will be ignored.
+    # * +rows+ -- an Enumerator over row data Hashes, each of which represents a row of the table. Each row must contain the key <tt>"_id"</tt>, whose value must be a String containing only alphanumeric characters, underscores, and hyphens, and must be unique in the table. Any other keys will be ignored.
     # * +per_page+ -- optionally controls the number of rows to delete in each batch. Defaults to +100+.
     #
     # ==== Returns
@@ -400,28 +401,18 @@ module Veritable
       if not per_page.is_a? Fixnum or not per_page > 0
         raise VeritableError.new("Batch upload or delete must have integer page size greater than 0.")
       end
-      rows = rows.collect {|row|
+      batch = []
+      rows.each do |row|
         Util.check_row(row)
-        row
-      }
-      if (not rows.is_a? Array) and (not rows.is_a? Veritable::Cursor)
-        raise VeritableError.new("Must pass an array of row hashes or a cursor of rows to batch upload or delete.")
+        batch.push(row)
+        if batch.size == per_page
+            post(link('rows'), {'action' => action, 'rows' => batch})
+            batch = []
+        end
+      end
+      if batch.size > 0
+        post(link('rows'), {'action' => action, 'rows' => batch})
       end
-      ct = (1..per_page).to_a.cycle
-      batch = Array.new()
-      ct.each { |ct|
-        if rows.empty?
-          if batch.size > 0
-            post(link('rows'), {'action' => action, 'rows' => batch})
-          end
-          break
-        end
-        batch.push rows.shift
-        if ct == per_page
-          post(link('rows'), {'action' => action, 'rows' => batch})
-          batch = Array.new()
-        end
-      }
     end
   end
@@ -446,6 +437,7 @@ module Veritable
   # * +wait+ -- blocks until the analysis succeeds or fails
   # * +predict+ -- makes new predictions based on the analysis
   # * +related_to+ -- calculates column relatedness based on the analysis
+  # * +similar_to+ -- calculates row relatedness based on the analysis
   #
   # See also: https://dev.priorknowledge.com/docs/client/ruby
   class Analysis
@@ -521,33 +513,22 @@ module Veritable
       if not row.is_a? Hash
         raise VeritableError.new("Predict -- Must provide a row hash to make predictions.")
       end
-      return raw_predict([row], count, api_limits['predictions_max_response_cells'], api_limits['predictions_max_cols'])[0]
+      raw_predict([row].to_enum, count, api_limits['predictions_max_response_cells'], api_limits['predictions_max_cols'], false).next
     end
     # Makes predictions based on the analysis for multiple rows at a time
     #
     # ==== Arguments
-    # * +rows+ -- an Array of Hashes, each of which represents a row whose missing values are to be predicted. Keys must be valid String ids of columns contained in the underlying table, and values must be either fixed (conditioning) values of an appropriate type for each column, or +nil+ for values to be predicted. Each row Hash must also have a '_request_id' key with a unique string value.
+    # * +rows+ -- an Enumerator over prediction request Hashes, each of which represents a row whose missing values are to be predicted. Keys must be valid String ids of columns contained in the underlying table, and values must be either fixed (conditioning) values of an appropriate type for each column, or +nil+ for values to be predicted. Each prediction request Hash must also have a '_request_id' key with a unique string value.
     # * +count+ -- optionally specify the number of samples from the predictive distribution to return. Defaults to +100+.
     #
     # ==== Returns
-    # An Array of Veritable::Prediction objects
+    # An Enumerator over Veritable::Prediction objects
     #
     # See also: https://dev.priorknowledge.com/docs/client/ruby
     def batch_predict(rows, count=100)
-      if not rows.is_a? Array
-        raise VeritableError.new("Predict -- Must provide an array of row hashes to make predictions.")
-      end
-      rows.each {|row|
-        if not row.is_a? Hash
-            raise VeritableError.new("Predict -- Invalid row for predictions: #{row}")
-        end
-        if not row['_request_id'].is_a? String
-            raise VeritableError.new("Predict -- Rows for batch predictions must contain a string '_request_id' field: #{row}")
-        end
-      }
-      return raw_predict(rows, count, api_limits['predictions_max_response_cells'], api_limits['predictions_max_cols'])
+      return raw_predict(rows.to_enum, count, api_limits['predictions_max_response_cells'], api_limits['predictions_max_cols'], true)
     end
@@ -588,7 +569,7 @@ module Veritable
     # * +return_data+ -- if +true+, the full row content will be returned. If +false+, only the '_id' field for each row will be returned. Default is +true+.
     #
     # ==== Returns
-    # An array of row entries ordered from most similar to least similar.
+    # An Enumerator over row entries ordered from most similar to least similar.
     # Each row entry is an array with the first element being the row and
     # the second element being a relatedness score between 0 to 1.
     #
@@ -604,7 +585,7 @@ module Veritable
       if succeeded?
         doc = post(link('similar'), {:data => row, :column => column_id,
                                      :max_rows => 10, :return_data => true}.update(opts))
-        return doc['data']
+        return doc['data'].to_enum
       elsif running?
         raise VeritableError.new("Similar -- Analysis with id #{_id} is still running and not yet ready to calculate similar.")
       elsif failed?
@@ -614,9 +595,6 @@ module Veritable
       end
     end
     # Returns a string representation of the analysis resource
     def inspect; to_s; end
@@ -656,51 +634,24 @@ module Veritable
     def description; @doc['description']; end
     private
-    def execute_batch(batch, count, preds, maxcells)
-        if batch.size == 0
-            return
-        end
-        if batch.size == 1
-            data = batch[0]
-            ncols = (data.values.select {|v| v.nil?}).size
-            max_batch_count = (ncols == 0) ? count : (maxcells/ncols).to_i
-            res = []
-            while res.size < count do
-                batch_count = [max_batch_count, count - res.size].min
-                res = res + post(link('predict'), {'data' => data, 'count' => batch_count, 'return_fixed' => false})
+    def raw_predict(rows, count, maxcells, maxcols, requires_id=true)
+      return Enumerator.new { |y|
+        update if running?
+        if running?
+          raise VeritableError.new("Predict -- Analysis with id #{_id} is still running and not yet ready to predict.")
+        elsif failed?
+          raise VeritableError.new("Predict -- Analysis with id #{_id} has failed and cannot predict.")
+        elsif succeeded?
+          ncells = 0
+          batch = []
+          rows.each { |row|
+            if not row.is_a? Hash
+              raise VeritableError.new("Predict -- Invalid row for predictions: #{row}")
+            end
+            if requires_id and not row['_request_id'].is_a? String
+              raise VeritableError.new("Predict -- Rows for batch predictions must contain a string '_request_id' field: #{row}")
             end
-        else
-            res = post(link('predict'), {'data' => batch, 'count' => count, 'return_fixed' => false})
-        end
-        if not res.is_a? Array
-          begin
-            res.to_s
-          rescue
-            raise VeritableError.new("Predict -- Error making predictions.")
-          else
-            raise VeritableError.new("Predict -- Error making predictions: #{res}")
-          end
-        end
-        (0...batch.size).each {|i|
-            request = batch[i].clone
-            request_id = request['_request_id']
-            distribution = res[(i * count)...((i + 1) * count)]
-            preds.push Prediction.new(request, distribution, schema, request_id)
-        }
-    end
-    def raw_predict(rows, count, maxcells, maxcols)
-      update if running?
-      if running?
-        raise VeritableError.new("Predict -- Analysis with id #{_id} is still running and not yet ready to predict.")
-      elsif failed?
-        raise VeritableError.new("Predict -- Analysis with id #{_id} has failed and cannot predict.")
-      elsif succeeded?
-        preds = []
-        ncells = 0
-        batch = []
-        rows.each {|row|
             ncols = (row.values.select {|v| v.nil?}).size
             tcols = (row.keys.select {|k| k != '_request_id'}).size
             if tcols > maxcols
@@ -709,28 +660,56 @@ module Veritable
             if ncols > maxcells
                 raise VeritableError.new("Predict -- Cannot predict for row #{row['_request_id']} with #{ncols} missing values: exceeds predicted cell limit of #{maxcells}.")
             end
-        }
-        rows.each {|row|
-            ncols = (row.values.select {|v| v.nil?}).size
             n = ncols * count
             if (ncells + n) > maxcells
-                execute_batch(batch, count, preds, maxcells)
+                execute_batch(batch, count, maxcells).each {|x| y << x}
                 ncells = n
                 batch = [row]
             else
                 batch.push row
                 ncells = ncells + n
             end
-        }
-        execute_batch(batch, count, preds, maxcells)
-        return preds
+          }
+          execute_batch(batch, count, maxcells).each {|x| y << x}
+        end
+      }
+    end
+    def execute_batch(batch, count, maxcells)
+      if batch.size == 0
+          return []
+      end
+      if batch.size == 1
+          data = batch[0]
+          ncols = (data.values.select {|v| v.nil?}).size
+          max_batch_count = (ncols == 0) ? count : (maxcells/ncols).to_i
+          res = []
+          while res.size < count do
+              batch_count = [max_batch_count, count - res.size].min
+              res = res + post(link('predict'), {'data' => data, 'count' => batch_count, 'return_fixed' => false})
+          end
       else
-        raise VeritableError.new("Predict -- Shouldn't be here -- please let us know at support@priorknowledge.com.")
+          res = post(link('predict'), {'data' => batch, 'count' => count, 'return_fixed' => false})
       end
-     end
-end
+      if not res.is_a? Array
+        begin
+          res.to_s
+        rescue
+          raise VeritableError.new("Predict -- Error making predictions.")
+        else
+          raise VeritableError.new("Predict -- Error making predictions: #{res}")
+        end
+      end
+      preds = []
+      (0...batch.size).each {|i|
+          request = batch[i].clone
+          request_id = request['_request_id']
+          distribution = res[(i * count)...((i + 1) * count)]
+          preds.push Prediction.new(request, distribution, schema, request_id)
+      }
+      return preds
+    end
+  end
   # Represents a schema for a Veritable analysis
   #
@@ -905,8 +884,6 @@ end
           @uncertainty[k] = 0.0
         end
       }
     end
     # Calculates the probability a column's value lies within a range.

data/lib/veritable/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Veritable
   # The current version of veritable-ruby
-  VERSION = "0.1.3.31"
+  VERSION = "0.1.4.33"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: veritable
 version: !ruby/object:Gem::Version
-  version: 0.1.3.31
+  version: 0.1.4.33
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-07-13 00:00:00.000000000 Z
+date: 2012-07-17 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rest-client
@@ -59,6 +59,22 @@ dependencies:
     - - ! '>='
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: backports
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: test-unit
   requirement: !ruby/object:Gem::Requirement
@@ -173,7 +189,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: 4376636797931241119
+      hash: -1563856652623917274
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements:
@@ -182,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: 4376636797931241119
+      hash: -1563856652623917274
 requirements: []
 rubyforge_project:
 rubygems_version: 1.8.24