veritable 0.1.3.31 → 0.1.4.33

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.txt CHANGED
@@ -1,3 +1,8 @@
1
+ veritable-ruby 0.1.4 - July 16, 2012
2
+ * analysis.batch_predict returns an Enumerator over Prediction objects instead of a list
3
+ * analysis.similar_to returns an Enumerator over row entries instead of a list
4
+ * table.batch_upload_rows and table.batch_delete_rows accept Enumerators as arguments
5
+
1
6
  veritable-ruby 0.1.3 - July 12, 2012
2
7
  * Support for similar API call to retrieve rows similar to a target row
3
8
  * Implemented count-batching for predictions
data/lib/veritable/api.rb CHANGED
@@ -1,11 +1,12 @@
1
+ require 'backports/1.9.1'
2
+
1
3
  require 'veritable/cursor'
2
4
  require 'veritable/datatypes'
3
5
  require 'veritable/errors'
4
6
  require 'veritable/resource'
5
7
  require 'veritable/util'
6
8
 
7
- module Veritable
8
-
9
+ module Veritable
9
10
  # Represents the resources available to a user of the Veritable API.
10
11
  #
11
12
  # Users should not initialize directly; use Veritable.connect as the entry point.
@@ -232,7 +233,7 @@ module Veritable
232
233
  # Batch uploads multiple rows to the table
233
234
  #
234
235
  # ==== Arguments
235
- # * +rows+ -- an Array of Hashes, each of which represents a row of the table. Each row must contain the key <tt>"_id"</tt>, whose value must be a String containing only alphanumeric characters, underscores, and hyphens, and must be unique in the table.
236
+ # * +rows+ -- an Enumerator over row data Hashes, each of which represents a row of the table. Each row must contain the key <tt>"_id"</tt>, whose value must be a String containing only alphanumeric characters, underscores, and hyphens, and must be unique in the table.
236
237
  # * +per_page+ -- optionally controls the number of rows to upload in each batch. Defaults to +100+.
237
238
  #
238
239
  # ==== Returns
@@ -255,7 +256,7 @@ module Veritable
255
256
  # Batch deletes a list of rows from the table
256
257
  #
257
258
  # ==== Arguments
258
- # * +rows+ -- an Array of Hashes, each of which represents a row of the table. Each row must contain the key <tt>"_id"</tt>, whose value must be a String containing only alphanumeric characters, underscores, and hyphens, and must be unique in the table. Any other keys will be ignored.
259
+ # * +rows+ -- an Enumerator over row data Hashes, each of which represents a row of the table. Each row must contain the key <tt>"_id"</tt>, whose value must be a String containing only alphanumeric characters, underscores, and hyphens, and must be unique in the table. Any other keys will be ignored.
259
260
  # * +per_page+ -- optionally controls the number of rows to delete in each batch. Defaults to +100+.
260
261
  #
261
262
  # ==== Returns
@@ -400,28 +401,18 @@ module Veritable
400
401
  if not per_page.is_a? Fixnum or not per_page > 0
401
402
  raise VeritableError.new("Batch upload or delete must have integer page size greater than 0.")
402
403
  end
403
- rows = rows.collect {|row|
404
+ batch = []
405
+ rows.each do |row|
404
406
  Util.check_row(row)
405
- row
406
- }
407
- if (not rows.is_a? Array) and (not rows.is_a? Veritable::Cursor)
408
- raise VeritableError.new("Must pass an array of row hashes or a cursor of rows to batch upload or delete.")
407
+ batch.push(row)
408
+ if batch.size == per_page
409
+ post(link('rows'), {'action' => action, 'rows' => batch})
410
+ batch = []
411
+ end
412
+ end
413
+ if batch.size > 0
414
+ post(link('rows'), {'action' => action, 'rows' => batch})
409
415
  end
410
- ct = (1..per_page).to_a.cycle
411
- batch = Array.new()
412
- ct.each { |ct|
413
- if rows.empty?
414
- if batch.size > 0
415
- post(link('rows'), {'action' => action, 'rows' => batch})
416
- end
417
- break
418
- end
419
- batch.push rows.shift
420
- if ct == per_page
421
- post(link('rows'), {'action' => action, 'rows' => batch})
422
- batch = Array.new()
423
- end
424
- }
425
416
  end
426
417
  end
427
418
 
@@ -446,6 +437,7 @@ module Veritable
446
437
  # * +wait+ -- blocks until the analysis succeeds or fails
447
438
  # * +predict+ -- makes new predictions based on the analysis
448
439
  # * +related_to+ -- calculates column relatedness based on the analysis
440
+ # * +similar_to+ -- calculates row relatedness based on the analysis
449
441
  #
450
442
  # See also: https://dev.priorknowledge.com/docs/client/ruby
451
443
  class Analysis
@@ -521,33 +513,22 @@ module Veritable
521
513
  if not row.is_a? Hash
522
514
  raise VeritableError.new("Predict -- Must provide a row hash to make predictions.")
523
515
  end
524
- return raw_predict([row], count, api_limits['predictions_max_response_cells'], api_limits['predictions_max_cols'])[0]
516
+ raw_predict([row].to_enum, count, api_limits['predictions_max_response_cells'], api_limits['predictions_max_cols'], false).next
525
517
  end
526
518
 
527
519
 
528
520
  # Makes predictions based on the analysis for multiple rows at a time
529
521
  #
530
522
  # ==== Arguments
531
- # * +rows+ -- an Array of Hashes, each of which represents a row whose missing values are to be predicted. Keys must be valid String ids of columns contained in the underlying table, and values must be either fixed (conditioning) values of an appropriate type for each column, or +nil+ for values to be predicted. Each row Hash must also have a '_request_id' key with a unique string value.
523
+ # * +rows+ -- an Enumerator over prediction request Hashes, each of which represents a row whose missing values are to be predicted. Keys must be valid String ids of columns contained in the underlying table, and values must be either fixed (conditioning) values of an appropriate type for each column, or +nil+ for values to be predicted. Each prediction request Hash must also have a '_request_id' key with a unique string value.
532
524
  # * +count+ -- optionally specify the number of samples from the predictive distribution to return. Defaults to +100+.
533
525
  #
534
526
  # ==== Returns
535
- # An Array of Veritable::Prediction objects
527
+ # An Enumerator over Veritable::Prediction objects
536
528
  #
537
529
  # See also: https://dev.priorknowledge.com/docs/client/ruby
538
530
  def batch_predict(rows, count=100)
539
- if not rows.is_a? Array
540
- raise VeritableError.new("Predict -- Must provide an array of row hashes to make predictions.")
541
- end
542
- rows.each {|row|
543
- if not row.is_a? Hash
544
- raise VeritableError.new("Predict -- Invalid row for predictions: #{row}")
545
- end
546
- if not row['_request_id'].is_a? String
547
- raise VeritableError.new("Predict -- Rows for batch predictions must contain a string '_request_id' field: #{row}")
548
- end
549
- }
550
- return raw_predict(rows, count, api_limits['predictions_max_response_cells'], api_limits['predictions_max_cols'])
531
+ return raw_predict(rows.to_enum, count, api_limits['predictions_max_response_cells'], api_limits['predictions_max_cols'], true)
551
532
  end
552
533
 
553
534
 
@@ -588,7 +569,7 @@ module Veritable
588
569
  # * +return_data+ -- if +true+, the full row content will be returned. If +false+, only the '_id' field for each row will be returned. Default is +true+.
589
570
  #
590
571
  # ==== Returns
591
- # An array of row entries ordered from most similar to least similar.
572
+ # An Enumerator over row entries ordered from most similar to least similar.
592
573
  # Each row entry is an array with the first element being the row and
593
574
  # the second element being a relatedness score between 0 to 1.
594
575
  #
@@ -604,7 +585,7 @@ module Veritable
604
585
  if succeeded?
605
586
  doc = post(link('similar'), {:data => row, :column => column_id,
606
587
  :max_rows => 10, :return_data => true}.update(opts))
607
- return doc['data']
588
+ return doc['data'].to_enum
608
589
  elsif running?
609
590
  raise VeritableError.new("Similar -- Analysis with id #{_id} is still running and not yet ready to calculate similar.")
610
591
  elsif failed?
@@ -614,9 +595,6 @@ module Veritable
614
595
  end
615
596
  end
616
597
 
617
-
618
-
619
-
620
598
  # Returns a string representation of the analysis resource
621
599
  def inspect; to_s; end
622
600
 
@@ -656,51 +634,24 @@ module Veritable
656
634
  def description; @doc['description']; end
657
635
 
658
636
  private
659
-
660
- def execute_batch(batch, count, preds, maxcells)
661
- if batch.size == 0
662
- return
663
- end
664
- if batch.size == 1
665
- data = batch[0]
666
- ncols = (data.values.select {|v| v.nil?}).size
667
- max_batch_count = (ncols == 0) ? count : (maxcells/ncols).to_i
668
- res = []
669
- while res.size < count do
670
- batch_count = [max_batch_count, count - res.size].min
671
- res = res + post(link('predict'), {'data' => data, 'count' => batch_count, 'return_fixed' => false})
637
+
638
+ def raw_predict(rows, count, maxcells, maxcols, requires_id=true)
639
+ return Enumerator.new { |y|
640
+ update if running?
641
+ if running?
642
+ raise VeritableError.new("Predict -- Analysis with id #{_id} is still running and not yet ready to predict.")
643
+ elsif failed?
644
+ raise VeritableError.new("Predict -- Analysis with id #{_id} has failed and cannot predict.")
645
+ elsif succeeded?
646
+ ncells = 0
647
+ batch = []
648
+ rows.each { |row|
649
+ if not row.is_a? Hash
650
+ raise VeritableError.new("Predict -- Invalid row for predictions: #{row}")
651
+ end
652
+ if requires_id and not row['_request_id'].is_a? String
653
+ raise VeritableError.new("Predict -- Rows for batch predictions must contain a string '_request_id' field: #{row}")
672
654
  end
673
- else
674
- res = post(link('predict'), {'data' => batch, 'count' => count, 'return_fixed' => false})
675
- end
676
- if not res.is_a? Array
677
- begin
678
- res.to_s
679
- rescue
680
- raise VeritableError.new("Predict -- Error making predictions.")
681
- else
682
- raise VeritableError.new("Predict -- Error making predictions: #{res}")
683
- end
684
- end
685
- (0...batch.size).each {|i|
686
- request = batch[i].clone
687
- request_id = request['_request_id']
688
- distribution = res[(i * count)...((i + 1) * count)]
689
- preds.push Prediction.new(request, distribution, schema, request_id)
690
- }
691
- end
692
-
693
- def raw_predict(rows, count, maxcells, maxcols)
694
- update if running?
695
- if running?
696
- raise VeritableError.new("Predict -- Analysis with id #{_id} is still running and not yet ready to predict.")
697
- elsif failed?
698
- raise VeritableError.new("Predict -- Analysis with id #{_id} has failed and cannot predict.")
699
- elsif succeeded?
700
- preds = []
701
- ncells = 0
702
- batch = []
703
- rows.each {|row|
704
655
  ncols = (row.values.select {|v| v.nil?}).size
705
656
  tcols = (row.keys.select {|k| k != '_request_id'}).size
706
657
  if tcols > maxcols
@@ -709,28 +660,56 @@ module Veritable
709
660
  if ncols > maxcells
710
661
  raise VeritableError.new("Predict -- Cannot predict for row #{row['_request_id']} with #{ncols} missing values: exceeds predicted cell limit of #{maxcells}.")
711
662
  end
712
- }
713
- rows.each {|row|
714
- ncols = (row.values.select {|v| v.nil?}).size
715
663
  n = ncols * count
716
664
  if (ncells + n) > maxcells
717
- execute_batch(batch, count, preds, maxcells)
665
+ execute_batch(batch, count, maxcells).each {|x| y << x}
718
666
  ncells = n
719
667
  batch = [row]
720
668
  else
721
669
  batch.push row
722
670
  ncells = ncells + n
723
671
  end
724
- }
725
- execute_batch(batch, count, preds, maxcells)
726
- return preds
672
+ }
673
+ execute_batch(batch, count, maxcells).each {|x| y << x}
674
+ end
675
+ }
676
+ end
677
+
678
+ def execute_batch(batch, count, maxcells)
679
+ if batch.size == 0
680
+ return []
681
+ end
682
+ if batch.size == 1
683
+ data = batch[0]
684
+ ncols = (data.values.select {|v| v.nil?}).size
685
+ max_batch_count = (ncols == 0) ? count : (maxcells/ncols).to_i
686
+ res = []
687
+ while res.size < count do
688
+ batch_count = [max_batch_count, count - res.size].min
689
+ res = res + post(link('predict'), {'data' => data, 'count' => batch_count, 'return_fixed' => false})
690
+ end
727
691
  else
728
- raise VeritableError.new("Predict -- Shouldn't be here -- please let us know at support@priorknowledge.com.")
692
+ res = post(link('predict'), {'data' => batch, 'count' => count, 'return_fixed' => false})
729
693
  end
730
- end
731
-
732
-
733
- end
694
+ if not res.is_a? Array
695
+ begin
696
+ res.to_s
697
+ rescue
698
+ raise VeritableError.new("Predict -- Error making predictions.")
699
+ else
700
+ raise VeritableError.new("Predict -- Error making predictions: #{res}")
701
+ end
702
+ end
703
+ preds = []
704
+ (0...batch.size).each {|i|
705
+ request = batch[i].clone
706
+ request_id = request['_request_id']
707
+ distribution = res[(i * count)...((i + 1) * count)]
708
+ preds.push Prediction.new(request, distribution, schema, request_id)
709
+ }
710
+ return preds
711
+ end
712
+ end
734
713
 
735
714
  # Represents a schema for a Veritable analysis
736
715
  #
@@ -905,8 +884,6 @@ end
905
884
  @uncertainty[k] = 0.0
906
885
  end
907
886
  }
908
-
909
-
910
887
  end
911
888
 
912
889
  # Calculates the probability a column's value lies within a range.
@@ -1,5 +1,5 @@
1
1
  module Veritable
2
2
 
3
3
  # The current version of veritable-ruby
4
- VERSION = "0.1.3.31"
4
+ VERSION = "0.1.4.33"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: veritable
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3.31
4
+ version: 0.1.4.33
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-13 00:00:00.000000000 Z
12
+ date: 2012-07-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rest-client
@@ -59,6 +59,22 @@ dependencies:
59
59
  - - ! '>='
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: backports
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
62
78
  - !ruby/object:Gem::Dependency
63
79
  name: test-unit
64
80
  requirement: !ruby/object:Gem::Requirement
@@ -173,7 +189,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
173
189
  version: '0'
174
190
  segments:
175
191
  - 0
176
- hash: 4376636797931241119
192
+ hash: -1563856652623917274
177
193
  required_rubygems_version: !ruby/object:Gem::Requirement
178
194
  none: false
179
195
  requirements:
@@ -182,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
182
198
  version: '0'
183
199
  segments:
184
200
  - 0
185
- hash: 4376636797931241119
201
+ hash: -1563856652623917274
186
202
  requirements: []
187
203
  rubyforge_project:
188
204
  rubygems_version: 1.8.24