pest 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +49 -47
- data/VERSION +1 -1
- data/lib/pest.rb +0 -1
- data/lib/pest/data_set.rb +45 -5
- data/lib/pest/data_set/hash.rb +70 -36
- data/lib/pest/data_set/narray.rb +104 -53
- data/lib/pest/estimator.rb +8 -28
- data/lib/pest/estimator/frequency.rb +9 -7
- data/lib/pest/function.rb +0 -7
- data/lib/pest/function/entropy.rb +5 -3
- data/lib/pest/function/probability.rb +45 -10
- data/pest.gemspec +15 -5
- data/spec/pest/data_set/hash_spec.rb +78 -63
- data/spec/pest/data_set/narray_spec.rb +107 -47
- data/spec/pest/data_set_spec.rb +36 -1
- data/spec/pest/estimator/frequency_spec.rb +31 -32
- data/spec/pest/estimator_spec.rb +13 -11
- data/spec/pest/function/entropy_spec.rb +11 -14
- data/spec/pest/function/probability_spec.rb +97 -28
- metadata +68 -6
- data/lib/pest/variable.rb +0 -34
- data/spec/pest/variable_spec.rb +0 -73
data/lib/pest/estimator.rb
CHANGED
@@ -13,17 +13,6 @@ module Pest::Estimator
|
|
13
13
|
@distributions ||= DistributionList.new(self)
|
14
14
|
end
|
15
15
|
|
16
|
-
def to_variable(arg)
|
17
|
-
variable = case arg.class.name
|
18
|
-
when 'Pest::Variable'
|
19
|
-
arg
|
20
|
-
when 'String', 'Symbol'
|
21
|
-
variables[arg] || Pest::Variable.new(:name => arg)
|
22
|
-
end
|
23
|
-
raise ArgumentError unless variables.values.include?(variable)
|
24
|
-
variable
|
25
|
-
end
|
26
|
-
|
27
16
|
module Distribution
|
28
17
|
attr_reader :variables
|
29
18
|
|
@@ -36,7 +25,11 @@ module Pest::Estimator
|
|
36
25
|
variables.to_a.sort
|
37
26
|
end
|
38
27
|
|
39
|
-
def
|
28
|
+
def batch_probability(*args)
|
29
|
+
raise NotImplementedError
|
30
|
+
end
|
31
|
+
|
32
|
+
def probability(*args)
|
40
33
|
raise NotImplementedError
|
41
34
|
end
|
42
35
|
end
|
@@ -46,23 +39,10 @@ module Pest::Estimator
|
|
46
39
|
@estimator = estimator
|
47
40
|
end
|
48
41
|
|
49
|
-
def parse_args(args)
|
50
|
-
set = if args.kind_of? Array
|
51
|
-
if args.any? {|arg| arg.kind_of?(::Set)}
|
52
|
-
args.inject(::Set.new) {|set, el| set + el.to_set}
|
53
|
-
else
|
54
|
-
args.flatten.to_set
|
55
|
-
end
|
56
|
-
elsif args.kind_of? ::Set
|
57
|
-
args
|
58
|
-
else
|
59
|
-
Array(args).to_set
|
60
|
-
end
|
61
|
-
set.map! {|arg| @estimator.to_variable(arg) }
|
62
|
-
end
|
63
|
-
|
64
42
|
def [](*args)
|
65
|
-
set =
|
43
|
+
set = args.to_set
|
44
|
+
raise ArgumentError unless (set - @estimator.variables).empty?
|
45
|
+
|
66
46
|
unless has_key? set
|
67
47
|
self[set] = @estimator.distribution_class.new(@estimator, set)
|
68
48
|
end
|
@@ -11,15 +11,16 @@ class Pest::Estimator::Frequency
|
|
11
11
|
|
12
12
|
class Distribution
|
13
13
|
include Pest::Estimator::Distribution
|
14
|
-
|
14
|
+
|
15
|
+
OFFSET = 0
|
15
16
|
attr_reader :frequencies, :checksum
|
16
17
|
|
17
18
|
def cache_model
|
18
19
|
if @frequencies.nil?
|
19
20
|
@frequencies = Hash.new(0)
|
20
|
-
@estimator.data.
|
21
|
+
@estimator.data.pick(*variable_array).each do |vector|
|
21
22
|
# Make sure this vector is consistently ordered
|
22
|
-
@frequencies[vector] += 1
|
23
|
+
@frequencies[Array(vector)] += 1
|
23
24
|
end
|
24
25
|
end
|
25
26
|
end
|
@@ -27,9 +28,10 @@ class Pest::Estimator::Frequency
|
|
27
28
|
def probability(data)
|
28
29
|
cache_model
|
29
30
|
|
30
|
-
NArray[ data.
|
31
|
-
@frequencies[vector].to_f
|
32
|
-
end ]
|
31
|
+
array = NArray[ data.pick(*variable_array).map do |vector|
|
32
|
+
@frequencies[Array(vector)].to_f
|
33
|
+
end ]
|
34
|
+
(OFFSET + array.reshape!(data.length)) / (OFFSET + @estimator.data.length)
|
33
35
|
end
|
34
36
|
|
35
37
|
def entropy
|
@@ -43,7 +45,7 @@ class Pest::Estimator::Frequency
|
|
43
45
|
private
|
44
46
|
|
45
47
|
def unique_event_dataset
|
46
|
-
vectors =
|
48
|
+
vectors = NMatrix[@frequencies.keys]
|
47
49
|
hash = {}
|
48
50
|
variable_array.each_index do |i|
|
49
51
|
# Extract a single variable from the array of vectors
|
data/lib/pest/function.rb
CHANGED
@@ -12,12 +12,14 @@ module Pest::Function
|
|
12
12
|
|
13
13
|
def initialize(estimator, variables)
|
14
14
|
@estimator = estimator
|
15
|
-
@event =
|
16
|
-
@givens =
|
15
|
+
@event = variables.to_set
|
16
|
+
@givens = Set.new
|
17
|
+
raise ArgumentError unless (@event - @estimator.variables).empty?
|
17
18
|
end
|
18
19
|
|
19
20
|
def given(*variables)
|
20
|
-
@givens.merge
|
21
|
+
@givens.merge variables.to_set
|
22
|
+
raise ArgumentError unless (@givens - @estimator.variables).empty?
|
21
23
|
self
|
22
24
|
end
|
23
25
|
|
@@ -1,24 +1,30 @@
|
|
1
1
|
module Pest::Function
|
2
2
|
module Probability
|
3
|
-
def
|
4
|
-
|
3
|
+
def batch_probability(*variables)
|
4
|
+
BatchBuilder.new(self, variables)
|
5
|
+
end
|
6
|
+
alias :batch_p :batch_probability
|
7
|
+
|
8
|
+
def probability(event={})
|
9
|
+
Builder.new(self, event)
|
5
10
|
end
|
6
11
|
alias :p :probability
|
7
12
|
|
8
|
-
class
|
13
|
+
class BatchBuilder
|
9
14
|
include Pest::Function::Builder
|
10
15
|
|
11
16
|
attr_reader :estimator, :data_source, :event, :givens
|
12
17
|
|
13
18
|
def initialize(estimator, variables)
|
14
19
|
@estimator = estimator
|
15
|
-
@
|
16
|
-
@event = parse(variables)
|
20
|
+
@event = variables.to_set
|
17
21
|
@givens = [].to_set
|
22
|
+
raise ArgumentError unless (@event - @estimator.variables).empty?
|
18
23
|
end
|
19
24
|
|
20
25
|
def given(*variables)
|
21
|
-
@givens
|
26
|
+
@givens += variables
|
27
|
+
raise ArgumentError unless (@givens - @estimator.variables).empty?
|
22
28
|
self
|
23
29
|
end
|
24
30
|
|
@@ -28,14 +34,43 @@ module Pest::Function
|
|
28
34
|
end
|
29
35
|
|
30
36
|
def evaluate
|
31
|
-
joint = estimator.distributions[event].probability(data_source)
|
32
37
|
if givens.empty?
|
33
|
-
|
38
|
+
estimator.distributions[*event].probability(data_source).to_a
|
34
39
|
else
|
35
|
-
|
36
|
-
|
40
|
+
joint = estimator.distributions[*(event + givens)].probability(data_source)
|
41
|
+
conditional = estimator.distributions[*givens].probability(data_source)
|
42
|
+
|
43
|
+
(joint / conditional).to_a
|
37
44
|
end
|
38
45
|
end
|
39
46
|
end
|
47
|
+
|
48
|
+
class Builder
|
49
|
+
include Pest::Function::Builder
|
50
|
+
|
51
|
+
attr_accessor :estimator, :event, :givens
|
52
|
+
|
53
|
+
def initialize(estimator, event)
|
54
|
+
@estimator = estimator
|
55
|
+
@event = event
|
56
|
+
@givens = Hash.new
|
57
|
+
end
|
58
|
+
|
59
|
+
def given(given)
|
60
|
+
givens.merge! given
|
61
|
+
raise ArgumentError unless (given.keys.to_set - @estimator.variables).empty?
|
62
|
+
self
|
63
|
+
end
|
64
|
+
|
65
|
+
def evaluate
|
66
|
+
data_hash = event.merge(givens)
|
67
|
+
data_hash.each_key {|key| data_hash[key] = Array(data_hash[key])}
|
68
|
+
|
69
|
+
data = Pest::DataSet::Hash.from_hash(data_hash)
|
70
|
+
BatchBuilder.new(estimator, event.keys).
|
71
|
+
given(*givens.keys).in(data).
|
72
|
+
evaluate.first
|
73
|
+
end
|
74
|
+
end
|
40
75
|
end
|
41
76
|
end
|
data/pest.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "pest"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Ryan Michael"]
|
12
|
-
s.date = "2012-
|
12
|
+
s.date = "2012-07-05"
|
13
13
|
s.description = "Wrappers to facilitate different classes of probability estimators"
|
14
14
|
s.email = "kerinin@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -30,7 +30,6 @@ Gem::Specification.new do |s|
|
|
30
30
|
"lib/pest/function.rb",
|
31
31
|
"lib/pest/function/entropy.rb",
|
32
32
|
"lib/pest/function/probability.rb",
|
33
|
-
"lib/pest/variable.rb",
|
34
33
|
"lib/pest/version.rb",
|
35
34
|
"pest.gemspec",
|
36
35
|
"spec/pest/data_set/hash_spec.rb",
|
@@ -45,14 +44,13 @@ Gem::Specification.new do |s|
|
|
45
44
|
"spec/pest/estimator_spec.rb",
|
46
45
|
"spec/pest/function/entropy_spec.rb",
|
47
46
|
"spec/pest/function/probability_spec.rb",
|
48
|
-
"spec/pest/variable_spec.rb",
|
49
47
|
"spec/pest_spec.rb",
|
50
48
|
"spec/spec_helper.rb"
|
51
49
|
]
|
52
50
|
s.homepage = "http://github.com/kerinin/pest"
|
53
51
|
s.licenses = ["MIT"]
|
54
52
|
s.require_paths = ["lib"]
|
55
|
-
s.rubygems_version = "1.8.
|
53
|
+
s.rubygems_version = "1.8.23"
|
56
54
|
s.summary = "Probability Estimation"
|
57
55
|
|
58
56
|
if s.respond_to? :specification_version then
|
@@ -74,6 +72,10 @@ Gem::Specification.new do |s|
|
|
74
72
|
s.add_development_dependency(%q<rake>, [">= 0"])
|
75
73
|
s.add_development_dependency(%q<pry>, [">= 0"])
|
76
74
|
s.add_development_dependency(%q<rspec>, [">= 0"])
|
75
|
+
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
76
|
+
s.add_development_dependency(%q<rake>, [">= 0"])
|
77
|
+
s.add_development_dependency(%q<pry>, [">= 0"])
|
78
|
+
s.add_development_dependency(%q<rspec>, [">= 0"])
|
77
79
|
else
|
78
80
|
s.add_dependency(%q<pest>, [">= 0"])
|
79
81
|
s.add_dependency(%q<narray>, [">= 0"])
|
@@ -90,6 +92,10 @@ Gem::Specification.new do |s|
|
|
90
92
|
s.add_dependency(%q<rake>, [">= 0"])
|
91
93
|
s.add_dependency(%q<pry>, [">= 0"])
|
92
94
|
s.add_dependency(%q<rspec>, [">= 0"])
|
95
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
96
|
+
s.add_dependency(%q<rake>, [">= 0"])
|
97
|
+
s.add_dependency(%q<pry>, [">= 0"])
|
98
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
93
99
|
end
|
94
100
|
else
|
95
101
|
s.add_dependency(%q<pest>, [">= 0"])
|
@@ -107,6 +113,10 @@ Gem::Specification.new do |s|
|
|
107
113
|
s.add_dependency(%q<rake>, [">= 0"])
|
108
114
|
s.add_dependency(%q<pry>, [">= 0"])
|
109
115
|
s.add_dependency(%q<rspec>, [">= 0"])
|
116
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
117
|
+
s.add_dependency(%q<rake>, [">= 0"])
|
118
|
+
s.add_dependency(%q<pry>, [">= 0"])
|
119
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
110
120
|
end
|
111
121
|
end
|
112
122
|
|
@@ -5,55 +5,30 @@ require 'spec_helper'
|
|
5
5
|
# conflated
|
6
6
|
|
7
7
|
describe Pest::DataSet::Hash do
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
end
|
12
|
-
|
13
|
-
describe "::translators" do
|
14
|
-
it "maps File => from_file" do
|
15
|
-
@class.translators[File].should == :from_file
|
16
|
-
end
|
17
|
-
|
18
|
-
it "maps String => from_file" do
|
19
|
-
@class.translators[String].should == :from_file
|
20
|
-
end
|
8
|
+
before(:each) do
|
9
|
+
@class = Pest::DataSet::Hash
|
10
|
+
end
|
21
11
|
|
22
|
-
|
23
|
-
|
24
|
-
|
12
|
+
describe "::translators" do
|
13
|
+
it "maps String => from_file" do
|
14
|
+
@class.translators[String].should == :from_file
|
25
15
|
end
|
26
16
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
Marshal.stub(:restore).with(file).and_return({:foo => 1})
|
32
|
-
end
|
33
|
-
|
34
|
-
it "looks for file if passed string" do
|
35
|
-
File.should_receive(:open).with('foo', 'r')
|
36
|
-
@class.from_file('foo')
|
37
|
-
end
|
38
|
-
|
39
|
-
it "unmarshals" do
|
40
|
-
Marshal.should_receive(:restore)
|
41
|
-
@class.from_file('foo')
|
42
|
-
end
|
43
|
-
|
44
|
-
it "sets variables" do
|
45
|
-
@class.from_file('foo').variables.length.should == 1
|
46
|
-
end
|
17
|
+
it "maps Symbol => from_file" do
|
18
|
+
@class.translators[Symbol].should == :from_file
|
19
|
+
end
|
20
|
+
end
|
47
21
|
|
48
|
-
|
49
|
-
|
50
|
-
|
22
|
+
describe "::from_hash" do
|
23
|
+
it "gets variables from keys" do
|
24
|
+
@instance = Pest::DataSet::Hash.from_hash(:foo => [1,2,3], :bar => [3,4,5])
|
25
|
+
@instance.variables.should == [:foo, :bar].to_set
|
51
26
|
end
|
52
27
|
end
|
53
28
|
|
54
29
|
before(:each) do
|
55
30
|
@data = {:foo => [1,2,3], :bar => [3,4,5]}
|
56
|
-
@instance = Pest::DataSet::Hash.
|
31
|
+
@instance = Pest::DataSet::Hash.from_hash(@data)
|
57
32
|
end
|
58
33
|
|
59
34
|
describe "#to_hash" do
|
@@ -62,47 +37,87 @@ describe Pest::DataSet::Hash do
|
|
62
37
|
end
|
63
38
|
end
|
64
39
|
|
65
|
-
describe "#
|
66
|
-
|
67
|
-
@instance.
|
40
|
+
describe "#pick" do
|
41
|
+
before(:each) do
|
42
|
+
@instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
|
43
|
+
end
|
44
|
+
|
45
|
+
it "accepts a single symbol string" do
|
46
|
+
@instance.pick(:foo).data.to_a.first.should == [1,2,3]
|
68
47
|
end
|
69
48
|
|
70
|
-
it "
|
71
|
-
@instance.
|
49
|
+
it "accepts a single variable" do
|
50
|
+
@instance.pick(:foo).data.to_a.first.should == [1,2,3]
|
72
51
|
end
|
52
|
+
|
53
|
+
it "accepts multiple variables" do
|
54
|
+
@instance.pick(:bar, :foo).data.to_a.should == [[4,5,6],[1,2,3]]
|
55
|
+
end
|
73
56
|
end
|
74
57
|
|
75
|
-
describe "#
|
58
|
+
describe "#length" do
|
76
59
|
before(:each) do
|
77
|
-
@
|
78
|
-
|
79
|
-
Marshal.stub(:dump)
|
60
|
+
@data = {:foo => [1,2,3], :bar => [3,4,5]}
|
61
|
+
@instance = Pest::DataSet::Hash.from_hash(@data)
|
80
62
|
end
|
81
63
|
|
82
|
-
it "
|
83
|
-
|
84
|
-
@instance.save('foo')
|
64
|
+
it "delegates to hash" do
|
65
|
+
@instance.length.should == 3
|
85
66
|
end
|
67
|
+
end
|
86
68
|
|
87
|
-
|
88
|
-
|
89
|
-
@instance.
|
69
|
+
describe "#each" do
|
70
|
+
before(:each) do
|
71
|
+
@instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
|
90
72
|
end
|
91
73
|
|
92
|
-
it "
|
93
|
-
|
94
|
-
|
74
|
+
it "yields vectors" do
|
75
|
+
block = double("block")
|
76
|
+
block.should_receive(:yielding).with([1,4])
|
77
|
+
block.should_receive(:yielding).with([2,5])
|
78
|
+
block.should_receive(:yielding).with([3,6])
|
79
|
+
@instance.each {|i| block.yielding(i)}
|
95
80
|
end
|
96
81
|
end
|
97
82
|
|
98
|
-
describe "#
|
83
|
+
describe "#map" do
|
99
84
|
before(:each) do
|
100
|
-
@
|
101
|
-
@instance = Pest::DataSet::Hash.new(@data)
|
85
|
+
@instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
|
102
86
|
end
|
103
87
|
|
104
|
-
it "
|
105
|
-
@instance.
|
88
|
+
it "works" do
|
89
|
+
@instance.map {|i| i}.should == [[1,4],[2,5],[3,6]]
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
describe "#merge" do
|
94
|
+
before(:each) do
|
95
|
+
@other = @class.from_hash :foo => [10,11,12,13], :baz => [1,2,3,4]
|
96
|
+
@instance = @class.from_hash :foo => [1,2,3,4], :bar => [5,6,7,8]
|
97
|
+
end
|
98
|
+
|
99
|
+
it "accepts a dataset and returns dataset" do
|
100
|
+
@instance.merge(@other).should be_a(@class)
|
101
|
+
end
|
102
|
+
|
103
|
+
it "accepts a hash and returns dataset" do
|
104
|
+
@instance.merge(:foo => [10,11,12,13], :baz => [1,2,3,4]).should be_a(@class)
|
105
|
+
end
|
106
|
+
|
107
|
+
it "requires the dataset to have the same length" do
|
108
|
+
expect { @instance.merge(:foo => [1,2,3,4,5]) }.to raise_error(ArgumentError)
|
109
|
+
end
|
110
|
+
|
111
|
+
it "adds the passed variable to self" do
|
112
|
+
@instance.merge(@other).variables.should include(:baz)
|
113
|
+
end
|
114
|
+
|
115
|
+
it "adds the passed data to self" do
|
116
|
+
@instance.merge(@other).pick(:baz).to_a.flatten.should == [1,2,3,4]
|
117
|
+
end
|
118
|
+
|
119
|
+
it "over-writes variables in self with variables in other" do
|
120
|
+
@instance.merge(@other).pick(:foo).to_a.flatten.should == [10,11,12,13]
|
106
121
|
end
|
107
122
|
end
|
108
123
|
end
|