pest 0.0.0 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +49 -47
- data/VERSION +1 -1
- data/lib/pest.rb +0 -1
- data/lib/pest/data_set.rb +45 -5
- data/lib/pest/data_set/hash.rb +70 -36
- data/lib/pest/data_set/narray.rb +104 -53
- data/lib/pest/estimator.rb +8 -28
- data/lib/pest/estimator/frequency.rb +9 -7
- data/lib/pest/function.rb +0 -7
- data/lib/pest/function/entropy.rb +5 -3
- data/lib/pest/function/probability.rb +45 -10
- data/pest.gemspec +15 -5
- data/spec/pest/data_set/hash_spec.rb +78 -63
- data/spec/pest/data_set/narray_spec.rb +107 -47
- data/spec/pest/data_set_spec.rb +36 -1
- data/spec/pest/estimator/frequency_spec.rb +31 -32
- data/spec/pest/estimator_spec.rb +13 -11
- data/spec/pest/function/entropy_spec.rb +11 -14
- data/spec/pest/function/probability_spec.rb +97 -28
- metadata +68 -6
- data/lib/pest/variable.rb +0 -34
- data/spec/pest/variable_spec.rb +0 -73
data/lib/pest/estimator.rb
CHANGED
@@ -13,17 +13,6 @@ module Pest::Estimator
|
|
13
13
|
@distributions ||= DistributionList.new(self)
|
14
14
|
end
|
15
15
|
|
16
|
-
def to_variable(arg)
|
17
|
-
variable = case arg.class.name
|
18
|
-
when 'Pest::Variable'
|
19
|
-
arg
|
20
|
-
when 'String', 'Symbol'
|
21
|
-
variables[arg] || Pest::Variable.new(:name => arg)
|
22
|
-
end
|
23
|
-
raise ArgumentError unless variables.values.include?(variable)
|
24
|
-
variable
|
25
|
-
end
|
26
|
-
|
27
16
|
module Distribution
|
28
17
|
attr_reader :variables
|
29
18
|
|
@@ -36,7 +25,11 @@ module Pest::Estimator
|
|
36
25
|
variables.to_a.sort
|
37
26
|
end
|
38
27
|
|
39
|
-
def
|
28
|
+
def batch_probability(*args)
|
29
|
+
raise NotImplementedError
|
30
|
+
end
|
31
|
+
|
32
|
+
def probability(*args)
|
40
33
|
raise NotImplementedError
|
41
34
|
end
|
42
35
|
end
|
@@ -46,23 +39,10 @@ module Pest::Estimator
|
|
46
39
|
@estimator = estimator
|
47
40
|
end
|
48
41
|
|
49
|
-
def parse_args(args)
|
50
|
-
set = if args.kind_of? Array
|
51
|
-
if args.any? {|arg| arg.kind_of?(::Set)}
|
52
|
-
args.inject(::Set.new) {|set, el| set + el.to_set}
|
53
|
-
else
|
54
|
-
args.flatten.to_set
|
55
|
-
end
|
56
|
-
elsif args.kind_of? ::Set
|
57
|
-
args
|
58
|
-
else
|
59
|
-
Array(args).to_set
|
60
|
-
end
|
61
|
-
set.map! {|arg| @estimator.to_variable(arg) }
|
62
|
-
end
|
63
|
-
|
64
42
|
def [](*args)
|
65
|
-
set =
|
43
|
+
set = args.to_set
|
44
|
+
raise ArgumentError unless (set - @estimator.variables).empty?
|
45
|
+
|
66
46
|
unless has_key? set
|
67
47
|
self[set] = @estimator.distribution_class.new(@estimator, set)
|
68
48
|
end
|
@@ -11,15 +11,16 @@ class Pest::Estimator::Frequency
|
|
11
11
|
|
12
12
|
class Distribution
|
13
13
|
include Pest::Estimator::Distribution
|
14
|
-
|
14
|
+
|
15
|
+
OFFSET = 0
|
15
16
|
attr_reader :frequencies, :checksum
|
16
17
|
|
17
18
|
def cache_model
|
18
19
|
if @frequencies.nil?
|
19
20
|
@frequencies = Hash.new(0)
|
20
|
-
@estimator.data.
|
21
|
+
@estimator.data.pick(*variable_array).each do |vector|
|
21
22
|
# Make sure this vector is consistently ordered
|
22
|
-
@frequencies[vector] += 1
|
23
|
+
@frequencies[Array(vector)] += 1
|
23
24
|
end
|
24
25
|
end
|
25
26
|
end
|
@@ -27,9 +28,10 @@ class Pest::Estimator::Frequency
|
|
27
28
|
def probability(data)
|
28
29
|
cache_model
|
29
30
|
|
30
|
-
NArray[ data.
|
31
|
-
@frequencies[vector].to_f
|
32
|
-
end ]
|
31
|
+
array = NArray[ data.pick(*variable_array).map do |vector|
|
32
|
+
@frequencies[Array(vector)].to_f
|
33
|
+
end ]
|
34
|
+
(OFFSET + array.reshape!(data.length)) / (OFFSET + @estimator.data.length)
|
33
35
|
end
|
34
36
|
|
35
37
|
def entropy
|
@@ -43,7 +45,7 @@ class Pest::Estimator::Frequency
|
|
43
45
|
private
|
44
46
|
|
45
47
|
def unique_event_dataset
|
46
|
-
vectors =
|
48
|
+
vectors = NMatrix[@frequencies.keys]
|
47
49
|
hash = {}
|
48
50
|
variable_array.each_index do |i|
|
49
51
|
# Extract a single variable from the array of vectors
|
data/lib/pest/function.rb
CHANGED
@@ -12,12 +12,14 @@ module Pest::Function
|
|
12
12
|
|
13
13
|
def initialize(estimator, variables)
|
14
14
|
@estimator = estimator
|
15
|
-
@event =
|
16
|
-
@givens =
|
15
|
+
@event = variables.to_set
|
16
|
+
@givens = Set.new
|
17
|
+
raise ArgumentError unless (@event - @estimator.variables).empty?
|
17
18
|
end
|
18
19
|
|
19
20
|
def given(*variables)
|
20
|
-
@givens.merge
|
21
|
+
@givens.merge variables.to_set
|
22
|
+
raise ArgumentError unless (@givens - @estimator.variables).empty?
|
21
23
|
self
|
22
24
|
end
|
23
25
|
|
@@ -1,24 +1,30 @@
|
|
1
1
|
module Pest::Function
|
2
2
|
module Probability
|
3
|
-
def
|
4
|
-
|
3
|
+
def batch_probability(*variables)
|
4
|
+
BatchBuilder.new(self, variables)
|
5
|
+
end
|
6
|
+
alias :batch_p :batch_probability
|
7
|
+
|
8
|
+
def probability(event={})
|
9
|
+
Builder.new(self, event)
|
5
10
|
end
|
6
11
|
alias :p :probability
|
7
12
|
|
8
|
-
class
|
13
|
+
class BatchBuilder
|
9
14
|
include Pest::Function::Builder
|
10
15
|
|
11
16
|
attr_reader :estimator, :data_source, :event, :givens
|
12
17
|
|
13
18
|
def initialize(estimator, variables)
|
14
19
|
@estimator = estimator
|
15
|
-
@
|
16
|
-
@event = parse(variables)
|
20
|
+
@event = variables.to_set
|
17
21
|
@givens = [].to_set
|
22
|
+
raise ArgumentError unless (@event - @estimator.variables).empty?
|
18
23
|
end
|
19
24
|
|
20
25
|
def given(*variables)
|
21
|
-
@givens
|
26
|
+
@givens += variables
|
27
|
+
raise ArgumentError unless (@givens - @estimator.variables).empty?
|
22
28
|
self
|
23
29
|
end
|
24
30
|
|
@@ -28,14 +34,43 @@ module Pest::Function
|
|
28
34
|
end
|
29
35
|
|
30
36
|
def evaluate
|
31
|
-
joint = estimator.distributions[event].probability(data_source)
|
32
37
|
if givens.empty?
|
33
|
-
|
38
|
+
estimator.distributions[*event].probability(data_source).to_a
|
34
39
|
else
|
35
|
-
|
36
|
-
|
40
|
+
joint = estimator.distributions[*(event + givens)].probability(data_source)
|
41
|
+
conditional = estimator.distributions[*givens].probability(data_source)
|
42
|
+
|
43
|
+
(joint / conditional).to_a
|
37
44
|
end
|
38
45
|
end
|
39
46
|
end
|
47
|
+
|
48
|
+
class Builder
|
49
|
+
include Pest::Function::Builder
|
50
|
+
|
51
|
+
attr_accessor :estimator, :event, :givens
|
52
|
+
|
53
|
+
def initialize(estimator, event)
|
54
|
+
@estimator = estimator
|
55
|
+
@event = event
|
56
|
+
@givens = Hash.new
|
57
|
+
end
|
58
|
+
|
59
|
+
def given(given)
|
60
|
+
givens.merge! given
|
61
|
+
raise ArgumentError unless (given.keys.to_set - @estimator.variables).empty?
|
62
|
+
self
|
63
|
+
end
|
64
|
+
|
65
|
+
def evaluate
|
66
|
+
data_hash = event.merge(givens)
|
67
|
+
data_hash.each_key {|key| data_hash[key] = Array(data_hash[key])}
|
68
|
+
|
69
|
+
data = Pest::DataSet::Hash.from_hash(data_hash)
|
70
|
+
BatchBuilder.new(estimator, event.keys).
|
71
|
+
given(*givens.keys).in(data).
|
72
|
+
evaluate.first
|
73
|
+
end
|
74
|
+
end
|
40
75
|
end
|
41
76
|
end
|
data/pest.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "pest"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Ryan Michael"]
|
12
|
-
s.date = "2012-
|
12
|
+
s.date = "2012-07-05"
|
13
13
|
s.description = "Wrappers to facilitate different classes of probability estimators"
|
14
14
|
s.email = "kerinin@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -30,7 +30,6 @@ Gem::Specification.new do |s|
|
|
30
30
|
"lib/pest/function.rb",
|
31
31
|
"lib/pest/function/entropy.rb",
|
32
32
|
"lib/pest/function/probability.rb",
|
33
|
-
"lib/pest/variable.rb",
|
34
33
|
"lib/pest/version.rb",
|
35
34
|
"pest.gemspec",
|
36
35
|
"spec/pest/data_set/hash_spec.rb",
|
@@ -45,14 +44,13 @@ Gem::Specification.new do |s|
|
|
45
44
|
"spec/pest/estimator_spec.rb",
|
46
45
|
"spec/pest/function/entropy_spec.rb",
|
47
46
|
"spec/pest/function/probability_spec.rb",
|
48
|
-
"spec/pest/variable_spec.rb",
|
49
47
|
"spec/pest_spec.rb",
|
50
48
|
"spec/spec_helper.rb"
|
51
49
|
]
|
52
50
|
s.homepage = "http://github.com/kerinin/pest"
|
53
51
|
s.licenses = ["MIT"]
|
54
52
|
s.require_paths = ["lib"]
|
55
|
-
s.rubygems_version = "1.8.
|
53
|
+
s.rubygems_version = "1.8.23"
|
56
54
|
s.summary = "Probability Estimation"
|
57
55
|
|
58
56
|
if s.respond_to? :specification_version then
|
@@ -74,6 +72,10 @@ Gem::Specification.new do |s|
|
|
74
72
|
s.add_development_dependency(%q<rake>, [">= 0"])
|
75
73
|
s.add_development_dependency(%q<pry>, [">= 0"])
|
76
74
|
s.add_development_dependency(%q<rspec>, [">= 0"])
|
75
|
+
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
76
|
+
s.add_development_dependency(%q<rake>, [">= 0"])
|
77
|
+
s.add_development_dependency(%q<pry>, [">= 0"])
|
78
|
+
s.add_development_dependency(%q<rspec>, [">= 0"])
|
77
79
|
else
|
78
80
|
s.add_dependency(%q<pest>, [">= 0"])
|
79
81
|
s.add_dependency(%q<narray>, [">= 0"])
|
@@ -90,6 +92,10 @@ Gem::Specification.new do |s|
|
|
90
92
|
s.add_dependency(%q<rake>, [">= 0"])
|
91
93
|
s.add_dependency(%q<pry>, [">= 0"])
|
92
94
|
s.add_dependency(%q<rspec>, [">= 0"])
|
95
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
96
|
+
s.add_dependency(%q<rake>, [">= 0"])
|
97
|
+
s.add_dependency(%q<pry>, [">= 0"])
|
98
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
93
99
|
end
|
94
100
|
else
|
95
101
|
s.add_dependency(%q<pest>, [">= 0"])
|
@@ -107,6 +113,10 @@ Gem::Specification.new do |s|
|
|
107
113
|
s.add_dependency(%q<rake>, [">= 0"])
|
108
114
|
s.add_dependency(%q<pry>, [">= 0"])
|
109
115
|
s.add_dependency(%q<rspec>, [">= 0"])
|
116
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
117
|
+
s.add_dependency(%q<rake>, [">= 0"])
|
118
|
+
s.add_dependency(%q<pry>, [">= 0"])
|
119
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
110
120
|
end
|
111
121
|
end
|
112
122
|
|
@@ -5,55 +5,30 @@ require 'spec_helper'
|
|
5
5
|
# conflated
|
6
6
|
|
7
7
|
describe Pest::DataSet::Hash do
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
end
|
12
|
-
|
13
|
-
describe "::translators" do
|
14
|
-
it "maps File => from_file" do
|
15
|
-
@class.translators[File].should == :from_file
|
16
|
-
end
|
17
|
-
|
18
|
-
it "maps String => from_file" do
|
19
|
-
@class.translators[String].should == :from_file
|
20
|
-
end
|
8
|
+
before(:each) do
|
9
|
+
@class = Pest::DataSet::Hash
|
10
|
+
end
|
21
11
|
|
22
|
-
|
23
|
-
|
24
|
-
|
12
|
+
describe "::translators" do
|
13
|
+
it "maps String => from_file" do
|
14
|
+
@class.translators[String].should == :from_file
|
25
15
|
end
|
26
16
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
Marshal.stub(:restore).with(file).and_return({:foo => 1})
|
32
|
-
end
|
33
|
-
|
34
|
-
it "looks for file if passed string" do
|
35
|
-
File.should_receive(:open).with('foo', 'r')
|
36
|
-
@class.from_file('foo')
|
37
|
-
end
|
38
|
-
|
39
|
-
it "unmarshals" do
|
40
|
-
Marshal.should_receive(:restore)
|
41
|
-
@class.from_file('foo')
|
42
|
-
end
|
43
|
-
|
44
|
-
it "sets variables" do
|
45
|
-
@class.from_file('foo').variables.length.should == 1
|
46
|
-
end
|
17
|
+
it "maps Symbol => from_file" do
|
18
|
+
@class.translators[Symbol].should == :from_file
|
19
|
+
end
|
20
|
+
end
|
47
21
|
|
48
|
-
|
49
|
-
|
50
|
-
|
22
|
+
describe "::from_hash" do
|
23
|
+
it "gets variables from keys" do
|
24
|
+
@instance = Pest::DataSet::Hash.from_hash(:foo => [1,2,3], :bar => [3,4,5])
|
25
|
+
@instance.variables.should == [:foo, :bar].to_set
|
51
26
|
end
|
52
27
|
end
|
53
28
|
|
54
29
|
before(:each) do
|
55
30
|
@data = {:foo => [1,2,3], :bar => [3,4,5]}
|
56
|
-
@instance = Pest::DataSet::Hash.
|
31
|
+
@instance = Pest::DataSet::Hash.from_hash(@data)
|
57
32
|
end
|
58
33
|
|
59
34
|
describe "#to_hash" do
|
@@ -62,47 +37,87 @@ describe Pest::DataSet::Hash do
|
|
62
37
|
end
|
63
38
|
end
|
64
39
|
|
65
|
-
describe "#
|
66
|
-
|
67
|
-
@instance.
|
40
|
+
describe "#pick" do
|
41
|
+
before(:each) do
|
42
|
+
@instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
|
43
|
+
end
|
44
|
+
|
45
|
+
it "accepts a single symbol string" do
|
46
|
+
@instance.pick(:foo).data.to_a.first.should == [1,2,3]
|
68
47
|
end
|
69
48
|
|
70
|
-
it "
|
71
|
-
@instance.
|
49
|
+
it "accepts a single variable" do
|
50
|
+
@instance.pick(:foo).data.to_a.first.should == [1,2,3]
|
72
51
|
end
|
52
|
+
|
53
|
+
it "accepts multiple variables" do
|
54
|
+
@instance.pick(:bar, :foo).data.to_a.should == [[4,5,6],[1,2,3]]
|
55
|
+
end
|
73
56
|
end
|
74
57
|
|
75
|
-
describe "#
|
58
|
+
describe "#length" do
|
76
59
|
before(:each) do
|
77
|
-
@
|
78
|
-
|
79
|
-
Marshal.stub(:dump)
|
60
|
+
@data = {:foo => [1,2,3], :bar => [3,4,5]}
|
61
|
+
@instance = Pest::DataSet::Hash.from_hash(@data)
|
80
62
|
end
|
81
63
|
|
82
|
-
it "
|
83
|
-
|
84
|
-
@instance.save('foo')
|
64
|
+
it "delegates to hash" do
|
65
|
+
@instance.length.should == 3
|
85
66
|
end
|
67
|
+
end
|
86
68
|
|
87
|
-
|
88
|
-
|
89
|
-
@instance.
|
69
|
+
describe "#each" do
|
70
|
+
before(:each) do
|
71
|
+
@instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
|
90
72
|
end
|
91
73
|
|
92
|
-
it "
|
93
|
-
|
94
|
-
|
74
|
+
it "yields vectors" do
|
75
|
+
block = double("block")
|
76
|
+
block.should_receive(:yielding).with([1,4])
|
77
|
+
block.should_receive(:yielding).with([2,5])
|
78
|
+
block.should_receive(:yielding).with([3,6])
|
79
|
+
@instance.each {|i| block.yielding(i)}
|
95
80
|
end
|
96
81
|
end
|
97
82
|
|
98
|
-
describe "#
|
83
|
+
describe "#map" do
|
99
84
|
before(:each) do
|
100
|
-
@
|
101
|
-
@instance = Pest::DataSet::Hash.new(@data)
|
85
|
+
@instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
|
102
86
|
end
|
103
87
|
|
104
|
-
it "
|
105
|
-
@instance.
|
88
|
+
it "works" do
|
89
|
+
@instance.map {|i| i}.should == [[1,4],[2,5],[3,6]]
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
describe "#merge" do
|
94
|
+
before(:each) do
|
95
|
+
@other = @class.from_hash :foo => [10,11,12,13], :baz => [1,2,3,4]
|
96
|
+
@instance = @class.from_hash :foo => [1,2,3,4], :bar => [5,6,7,8]
|
97
|
+
end
|
98
|
+
|
99
|
+
it "accepts a dataset and returns dataset" do
|
100
|
+
@instance.merge(@other).should be_a(@class)
|
101
|
+
end
|
102
|
+
|
103
|
+
it "accepts a hash and returns dataset" do
|
104
|
+
@instance.merge(:foo => [10,11,12,13], :baz => [1,2,3,4]).should be_a(@class)
|
105
|
+
end
|
106
|
+
|
107
|
+
it "requires the dataset to have the same length" do
|
108
|
+
expect { @instance.merge(:foo => [1,2,3,4,5]) }.to raise_error(ArgumentError)
|
109
|
+
end
|
110
|
+
|
111
|
+
it "adds the passed variable to self" do
|
112
|
+
@instance.merge(@other).variables.should include(:baz)
|
113
|
+
end
|
114
|
+
|
115
|
+
it "adds the passed data to self" do
|
116
|
+
@instance.merge(@other).pick(:baz).to_a.flatten.should == [1,2,3,4]
|
117
|
+
end
|
118
|
+
|
119
|
+
it "over-writes variables in self with variables in other" do
|
120
|
+
@instance.merge(@other).pick(:foo).to_a.flatten.should == [10,11,12,13]
|
106
121
|
end
|
107
122
|
end
|
108
123
|
end
|