RubyGems - veritable - Versions diffs - 0.1.2.30 → 0.1.3.31 - Mend

veritable 0.1.2.30 → 0.1.3.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

data/CHANGELOG.txt CHANGED Viewed

@@ -1,3 +1,7 @@
+veritable-ruby 0.1.3 - July 12, 2012
+    * Support for similar API call to retrieve rows similar to a target row
+    * Implemented count-batching for predictions
 veritable-ruby 0.1.2 - July 9, 2012
     * Added check for maximum count value (100000)

data/lib/veritable/api.rb CHANGED Viewed

@@ -554,8 +554,8 @@ module Veritable
     # Scores how related columns are to a column of interest
     #
     # ==== Arguments
-    # * +column_id+ -- the id of the column of interest
-    # * +start+ -- the column id from which to start the cursor. Columns with related scores greater than or equal to the score of column +start+ will be returned by the cursor. Default is +nil+, in which case all columns in the table will be returned by the cursor.
+    # * +column_id+ -- the name of the column of interest
+    # * +start+ -- the column name from which to start the cursor. Columns with related scores greater than or equal to the score of column +start+ will be returned by the cursor. Default is +nil+, in which case all columns in the table will be returned by the cursor.
     # * +limit+ -- optionally limits the number of columns returned by the cursor. Default is +nil+, in which case the number of columns returned will not be limited.
     #
     # ==== Returns
@@ -578,6 +578,45 @@ module Veritable
       end
     end
+    # Returns rows which are similar to a target row in the context
+    # of a particular column of interest.
+    #
+    # ==== Arguments
+    # * +row+ -- either a row '_id' string or a row hash corrsponding to the target row. If a row hash is provided, it must contain an '_id' key whose value is the '_id' of a row present in the table at the time of the analysis
+    # * +column_id+ -- the name of the column of interest.
+    # * +max_rows+ -- the maximum number of similar rows to return. Default is +10+. The actual number of similar rows returned will be less than or equal to max_rows.
+    # * +return_data+ -- if +true+, the full row content will be returned. If +false+, only the '_id' field for each row will be returned. Default is +true+.
+    #
+    # ==== Returns
+    # An array of row entries ordered from most similar to least similar.
+    # Each row entry is an array with the first element being the row and
+    # the second element being a relatedness score between 0 to 1.
+    #
+    # See also: https://dev.priorknowledge.com/docs/client/ruby
+    def similar_to(row, column_id, opts={:max_rows => 10, :return_data => true})
+      if row.is_a? String
+        row = {'_id' => row}
+      end
+      if not row.is_a? Hash
+        raise VeritableError.new("Similar -- Must provide an existing row to get similar!")
+      end
+      update if running?
+      if succeeded?
+        doc = post(link('similar'), {:data => row, :column => column_id,
+                                     :max_rows => 10, :return_data => true}.update(opts))
+        return doc['data']
+      elsif running?
+        raise VeritableError.new("Similar -- Analysis with id #{_id} is still running and not yet ready to calculate similar.")
+      elsif failed?
+        raise VeritableError.new("Similar -- Analysis with id #{_id} has failed and cannot calculate similar.")
+      else
+        raise VeritableError.new("Similar -- Shouldn't be here -- please let us know at support@priorknowledge.com.")
+      end
+    end
     # Returns a string representation of the analysis resource
     def inspect; to_s; end
@@ -618,16 +657,22 @@ module Veritable
     private
-    def execute_batch(batch, count, preds)
+    def execute_batch(batch, count, preds, maxcells)
         if batch.size == 0
             return
         end
         if batch.size == 1
             data = batch[0]
+            ncols = (data.values.select {|v| v.nil?}).size
+            max_batch_count = (ncols == 0) ? count : (maxcells/ncols).to_i
+            res = []
+            while res.size < count do
+                batch_count = [max_batch_count, count - res.size].min
+                res = res + post(link('predict'), {'data' => data, 'count' => batch_count, 'return_fixed' => false})
+            end
         else
-            data = batch
+            res = post(link('predict'), {'data' => batch, 'count' => count, 'return_fixed' => false})
         end
-        res = post(link('predict'), {'data' => data, 'count' => count, 'return_fixed' => false})
         if not res.is_a? Array
           begin
             res.to_s
@@ -661,16 +706,15 @@ module Veritable
             if tcols > maxcols
                 raise VeritableError.new("Predict -- Cannot predict for row #{row['_request_id']} with more than #{maxcols} combined fixed and predicted values.")
             end
-            n = ncols * count
-            if n > maxcells
-                raise VeritableError.new("Predict -- Cannot predict for row #{row['_request_id']} with #{ncols} missing values and count #{count}: exceeds predicted cell limit of #{maxcells}.")
+            if ncols > maxcells
+                raise VeritableError.new("Predict -- Cannot predict for row #{row['_request_id']} with #{ncols} missing values: exceeds predicted cell limit of #{maxcells}.")
             end
         }
         rows.each {|row|
             ncols = (row.values.select {|v| v.nil?}).size
             n = ncols * count
             if (ncells + n) > maxcells
-                execute_batch(batch, count, preds)
+                execute_batch(batch, count, preds, maxcells)
                 ncells = n
                 batch = [row]
             else
@@ -678,7 +722,7 @@ module Veritable
                 ncells = ncells + n
             end
         }
-        execute_batch(batch, count, preds)
+        execute_batch(batch, count, preds, maxcells)
         return preds
       else
         raise VeritableError.new("Predict -- Shouldn't be here -- please let us know at support@priorknowledge.com.")

data/lib/veritable/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Veritable
   # The current version of veritable-ruby
-  VERSION = "0.1.2.30"
+  VERSION = "0.1.3.31"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: veritable
 version: !ruby/object:Gem::Version
-  version: 0.1.2.30
+  version: 0.1.3.31
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-07-09 00:00:00.000000000 Z
+date: 2012-07-13 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rest-client
@@ -173,7 +173,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: 810707972233102190
+      hash: 4376636797931241119
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements:
@@ -182,7 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: 810707972233102190
+      hash: 4376636797931241119
 requirements: []
 rubyforge_project:
 rubygems_version: 1.8.24