veritable 0.1.3.31 → 0.1.4.33
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.txt +5 -0
- data/lib/veritable/api.rb +79 -102
- data/lib/veritable/version.rb +1 -1
- metadata +20 -4
data/CHANGELOG.txt
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
veritable-ruby 0.1.4 - July 16, 2012
|
2
|
+
* analysis.batch_predict returns an Enumerator over Prediction objects instead of a list
|
3
|
+
* analysis.similar_to returns an Enumerator over row entries instead of a list
|
4
|
+
* table.batch_upload_rows and table.batch_delete_rows accept Enumerators as arguments
|
5
|
+
|
1
6
|
veritable-ruby 0.1.3 - July 12, 2012
|
2
7
|
* Support for similar API call to retrieve rows similar to a target row
|
3
8
|
* Implemented count-batching for predictions
|
data/lib/veritable/api.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
|
+
require 'backports/1.9.1'
|
2
|
+
|
1
3
|
require 'veritable/cursor'
|
2
4
|
require 'veritable/datatypes'
|
3
5
|
require 'veritable/errors'
|
4
6
|
require 'veritable/resource'
|
5
7
|
require 'veritable/util'
|
6
8
|
|
7
|
-
module Veritable
|
8
|
-
|
9
|
+
module Veritable
|
9
10
|
# Represents the resources available to a user of the Veritable API.
|
10
11
|
#
|
11
12
|
# Users should not initialize directly; use Veritable.connect as the entry point.
|
@@ -232,7 +233,7 @@ module Veritable
|
|
232
233
|
# Batch uploads multiple rows to the table
|
233
234
|
#
|
234
235
|
# ==== Arguments
|
235
|
-
# * +rows+ -- an
|
236
|
+
# * +rows+ -- an Enumerator over row data Hashes, each of which represents a row of the table. Each row must contain the key <tt>"_id"</tt>, whose value must be a String containing only alphanumeric characters, underscores, and hyphens, and must be unique in the table.
|
236
237
|
# * +per_page+ -- optionally controls the number of rows to upload in each batch. Defaults to +100+.
|
237
238
|
#
|
238
239
|
# ==== Returns
|
@@ -255,7 +256,7 @@ module Veritable
|
|
255
256
|
# Batch deletes a list of rows from the table
|
256
257
|
#
|
257
258
|
# ==== Arguments
|
258
|
-
# * +rows+ -- an
|
259
|
+
# * +rows+ -- an Enumerator over row data Hashes, each of which represents a row of the table. Each row must contain the key <tt>"_id"</tt>, whose value must be a String containing only alphanumeric characters, underscores, and hyphens, and must be unique in the table. Any other keys will be ignored.
|
259
260
|
# * +per_page+ -- optionally controls the number of rows to delete in each batch. Defaults to +100+.
|
260
261
|
#
|
261
262
|
# ==== Returns
|
@@ -400,28 +401,18 @@ module Veritable
|
|
400
401
|
if not per_page.is_a? Fixnum or not per_page > 0
|
401
402
|
raise VeritableError.new("Batch upload or delete must have integer page size greater than 0.")
|
402
403
|
end
|
403
|
-
|
404
|
+
batch = []
|
405
|
+
rows.each do |row|
|
404
406
|
Util.check_row(row)
|
405
|
-
row
|
406
|
-
|
407
|
-
|
408
|
-
|
407
|
+
batch.push(row)
|
408
|
+
if batch.size == per_page
|
409
|
+
post(link('rows'), {'action' => action, 'rows' => batch})
|
410
|
+
batch = []
|
411
|
+
end
|
412
|
+
end
|
413
|
+
if batch.size > 0
|
414
|
+
post(link('rows'), {'action' => action, 'rows' => batch})
|
409
415
|
end
|
410
|
-
ct = (1..per_page).to_a.cycle
|
411
|
-
batch = Array.new()
|
412
|
-
ct.each { |ct|
|
413
|
-
if rows.empty?
|
414
|
-
if batch.size > 0
|
415
|
-
post(link('rows'), {'action' => action, 'rows' => batch})
|
416
|
-
end
|
417
|
-
break
|
418
|
-
end
|
419
|
-
batch.push rows.shift
|
420
|
-
if ct == per_page
|
421
|
-
post(link('rows'), {'action' => action, 'rows' => batch})
|
422
|
-
batch = Array.new()
|
423
|
-
end
|
424
|
-
}
|
425
416
|
end
|
426
417
|
end
|
427
418
|
|
@@ -446,6 +437,7 @@ module Veritable
|
|
446
437
|
# * +wait+ -- blocks until the analysis succeeds or fails
|
447
438
|
# * +predict+ -- makes new predictions based on the analysis
|
448
439
|
# * +related_to+ -- calculates column relatedness based on the analysis
|
440
|
+
# * +similar_to+ -- calculates row relatedness based on the analysis
|
449
441
|
#
|
450
442
|
# See also: https://dev.priorknowledge.com/docs/client/ruby
|
451
443
|
class Analysis
|
@@ -521,33 +513,22 @@ module Veritable
|
|
521
513
|
if not row.is_a? Hash
|
522
514
|
raise VeritableError.new("Predict -- Must provide a row hash to make predictions.")
|
523
515
|
end
|
524
|
-
|
516
|
+
raw_predict([row].to_enum, count, api_limits['predictions_max_response_cells'], api_limits['predictions_max_cols'], false).next
|
525
517
|
end
|
526
518
|
|
527
519
|
|
528
520
|
# Makes predictions based on the analysis for multiple rows at a time
|
529
521
|
#
|
530
522
|
# ==== Arguments
|
531
|
-
# * +rows+ -- an
|
523
|
+
# * +rows+ -- an Enumerator over prediction request Hashes, each of which represents a row whose missing values are to be predicted. Keys must be valid String ids of columns contained in the underlying table, and values must be either fixed (conditioning) values of an appropriate type for each column, or +nil+ for values to be predicted. Each prediction request Hash must also have a '_request_id' key with a unique string value.
|
532
524
|
# * +count+ -- optionally specify the number of samples from the predictive distribution to return. Defaults to +100+.
|
533
525
|
#
|
534
526
|
# ==== Returns
|
535
|
-
# An
|
527
|
+
# An Enumerator over Veritable::Prediction objects
|
536
528
|
#
|
537
529
|
# See also: https://dev.priorknowledge.com/docs/client/ruby
|
538
530
|
def batch_predict(rows, count=100)
|
539
|
-
|
540
|
-
raise VeritableError.new("Predict -- Must provide an array of row hashes to make predictions.")
|
541
|
-
end
|
542
|
-
rows.each {|row|
|
543
|
-
if not row.is_a? Hash
|
544
|
-
raise VeritableError.new("Predict -- Invalid row for predictions: #{row}")
|
545
|
-
end
|
546
|
-
if not row['_request_id'].is_a? String
|
547
|
-
raise VeritableError.new("Predict -- Rows for batch predictions must contain a string '_request_id' field: #{row}")
|
548
|
-
end
|
549
|
-
}
|
550
|
-
return raw_predict(rows, count, api_limits['predictions_max_response_cells'], api_limits['predictions_max_cols'])
|
531
|
+
return raw_predict(rows.to_enum, count, api_limits['predictions_max_response_cells'], api_limits['predictions_max_cols'], true)
|
551
532
|
end
|
552
533
|
|
553
534
|
|
@@ -588,7 +569,7 @@ module Veritable
|
|
588
569
|
# * +return_data+ -- if +true+, the full row content will be returned. If +false+, only the '_id' field for each row will be returned. Default is +true+.
|
589
570
|
#
|
590
571
|
# ==== Returns
|
591
|
-
# An
|
572
|
+
# An Enumerator over row entries ordered from most similar to least similar.
|
592
573
|
# Each row entry is an array with the first element being the row and
|
593
574
|
# the second element being a relatedness score between 0 to 1.
|
594
575
|
#
|
@@ -604,7 +585,7 @@ module Veritable
|
|
604
585
|
if succeeded?
|
605
586
|
doc = post(link('similar'), {:data => row, :column => column_id,
|
606
587
|
:max_rows => 10, :return_data => true}.update(opts))
|
607
|
-
return doc['data']
|
588
|
+
return doc['data'].to_enum
|
608
589
|
elsif running?
|
609
590
|
raise VeritableError.new("Similar -- Analysis with id #{_id} is still running and not yet ready to calculate similar.")
|
610
591
|
elsif failed?
|
@@ -614,9 +595,6 @@ module Veritable
|
|
614
595
|
end
|
615
596
|
end
|
616
597
|
|
617
|
-
|
618
|
-
|
619
|
-
|
620
598
|
# Returns a string representation of the analysis resource
|
621
599
|
def inspect; to_s; end
|
622
600
|
|
@@ -656,51 +634,24 @@ module Veritable
|
|
656
634
|
def description; @doc['description']; end
|
657
635
|
|
658
636
|
private
|
659
|
-
|
660
|
-
def
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
637
|
+
|
638
|
+
def raw_predict(rows, count, maxcells, maxcols, requires_id=true)
|
639
|
+
return Enumerator.new { |y|
|
640
|
+
update if running?
|
641
|
+
if running?
|
642
|
+
raise VeritableError.new("Predict -- Analysis with id #{_id} is still running and not yet ready to predict.")
|
643
|
+
elsif failed?
|
644
|
+
raise VeritableError.new("Predict -- Analysis with id #{_id} has failed and cannot predict.")
|
645
|
+
elsif succeeded?
|
646
|
+
ncells = 0
|
647
|
+
batch = []
|
648
|
+
rows.each { |row|
|
649
|
+
if not row.is_a? Hash
|
650
|
+
raise VeritableError.new("Predict -- Invalid row for predictions: #{row}")
|
651
|
+
end
|
652
|
+
if requires_id and not row['_request_id'].is_a? String
|
653
|
+
raise VeritableError.new("Predict -- Rows for batch predictions must contain a string '_request_id' field: #{row}")
|
672
654
|
end
|
673
|
-
else
|
674
|
-
res = post(link('predict'), {'data' => batch, 'count' => count, 'return_fixed' => false})
|
675
|
-
end
|
676
|
-
if not res.is_a? Array
|
677
|
-
begin
|
678
|
-
res.to_s
|
679
|
-
rescue
|
680
|
-
raise VeritableError.new("Predict -- Error making predictions.")
|
681
|
-
else
|
682
|
-
raise VeritableError.new("Predict -- Error making predictions: #{res}")
|
683
|
-
end
|
684
|
-
end
|
685
|
-
(0...batch.size).each {|i|
|
686
|
-
request = batch[i].clone
|
687
|
-
request_id = request['_request_id']
|
688
|
-
distribution = res[(i * count)...((i + 1) * count)]
|
689
|
-
preds.push Prediction.new(request, distribution, schema, request_id)
|
690
|
-
}
|
691
|
-
end
|
692
|
-
|
693
|
-
def raw_predict(rows, count, maxcells, maxcols)
|
694
|
-
update if running?
|
695
|
-
if running?
|
696
|
-
raise VeritableError.new("Predict -- Analysis with id #{_id} is still running and not yet ready to predict.")
|
697
|
-
elsif failed?
|
698
|
-
raise VeritableError.new("Predict -- Analysis with id #{_id} has failed and cannot predict.")
|
699
|
-
elsif succeeded?
|
700
|
-
preds = []
|
701
|
-
ncells = 0
|
702
|
-
batch = []
|
703
|
-
rows.each {|row|
|
704
655
|
ncols = (row.values.select {|v| v.nil?}).size
|
705
656
|
tcols = (row.keys.select {|k| k != '_request_id'}).size
|
706
657
|
if tcols > maxcols
|
@@ -709,28 +660,56 @@ module Veritable
|
|
709
660
|
if ncols > maxcells
|
710
661
|
raise VeritableError.new("Predict -- Cannot predict for row #{row['_request_id']} with #{ncols} missing values: exceeds predicted cell limit of #{maxcells}.")
|
711
662
|
end
|
712
|
-
}
|
713
|
-
rows.each {|row|
|
714
|
-
ncols = (row.values.select {|v| v.nil?}).size
|
715
663
|
n = ncols * count
|
716
664
|
if (ncells + n) > maxcells
|
717
|
-
execute_batch(batch, count,
|
665
|
+
execute_batch(batch, count, maxcells).each {|x| y << x}
|
718
666
|
ncells = n
|
719
667
|
batch = [row]
|
720
668
|
else
|
721
669
|
batch.push row
|
722
670
|
ncells = ncells + n
|
723
671
|
end
|
724
|
-
|
725
|
-
|
726
|
-
|
672
|
+
}
|
673
|
+
execute_batch(batch, count, maxcells).each {|x| y << x}
|
674
|
+
end
|
675
|
+
}
|
676
|
+
end
|
677
|
+
|
678
|
+
def execute_batch(batch, count, maxcells)
|
679
|
+
if batch.size == 0
|
680
|
+
return []
|
681
|
+
end
|
682
|
+
if batch.size == 1
|
683
|
+
data = batch[0]
|
684
|
+
ncols = (data.values.select {|v| v.nil?}).size
|
685
|
+
max_batch_count = (ncols == 0) ? count : (maxcells/ncols).to_i
|
686
|
+
res = []
|
687
|
+
while res.size < count do
|
688
|
+
batch_count = [max_batch_count, count - res.size].min
|
689
|
+
res = res + post(link('predict'), {'data' => data, 'count' => batch_count, 'return_fixed' => false})
|
690
|
+
end
|
727
691
|
else
|
728
|
-
|
692
|
+
res = post(link('predict'), {'data' => batch, 'count' => count, 'return_fixed' => false})
|
729
693
|
end
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
694
|
+
if not res.is_a? Array
|
695
|
+
begin
|
696
|
+
res.to_s
|
697
|
+
rescue
|
698
|
+
raise VeritableError.new("Predict -- Error making predictions.")
|
699
|
+
else
|
700
|
+
raise VeritableError.new("Predict -- Error making predictions: #{res}")
|
701
|
+
end
|
702
|
+
end
|
703
|
+
preds = []
|
704
|
+
(0...batch.size).each {|i|
|
705
|
+
request = batch[i].clone
|
706
|
+
request_id = request['_request_id']
|
707
|
+
distribution = res[(i * count)...((i + 1) * count)]
|
708
|
+
preds.push Prediction.new(request, distribution, schema, request_id)
|
709
|
+
}
|
710
|
+
return preds
|
711
|
+
end
|
712
|
+
end
|
734
713
|
|
735
714
|
# Represents a schema for a Veritable analysis
|
736
715
|
#
|
@@ -905,8 +884,6 @@ end
|
|
905
884
|
@uncertainty[k] = 0.0
|
906
885
|
end
|
907
886
|
}
|
908
|
-
|
909
|
-
|
910
887
|
end
|
911
888
|
|
912
889
|
# Calculates the probability a column's value lies within a range.
|
data/lib/veritable/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: veritable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4.33
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-07-
|
12
|
+
date: 2012-07-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rest-client
|
@@ -59,6 +59,22 @@ dependencies:
|
|
59
59
|
- - ! '>='
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: backports
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
62
78
|
- !ruby/object:Gem::Dependency
|
63
79
|
name: test-unit
|
64
80
|
requirement: !ruby/object:Gem::Requirement
|
@@ -173,7 +189,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
173
189
|
version: '0'
|
174
190
|
segments:
|
175
191
|
- 0
|
176
|
-
hash:
|
192
|
+
hash: -1563856652623917274
|
177
193
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
178
194
|
none: false
|
179
195
|
requirements:
|
@@ -182,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
182
198
|
version: '0'
|
183
199
|
segments:
|
184
200
|
- 0
|
185
|
-
hash:
|
201
|
+
hash: -1563856652623917274
|
186
202
|
requirements: []
|
187
203
|
rubyforge_project:
|
188
204
|
rubygems_version: 1.8.24
|