fathom 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.bundle/config +2 -0
- data/.document +5 -0
- data/.gitignore +5 -0
- data/.rspec +1 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +30 -0
- data/LICENSE +20 -0
- data/README.md +176 -0
- data/Rakefile +50 -0
- data/VERSION +1 -0
- data/autotest/discover.rb +1 -0
- data/lib/fathom.rb +68 -0
- data/lib/fathom/archive/conditional_probability_matrix.rb +116 -0
- data/lib/fathom/archive/n2.rb +198 -0
- data/lib/fathom/archive/n3.rb +119 -0
- data/lib/fathom/archive/node.rb +74 -0
- data/lib/fathom/archive/noodle.rb +136 -0
- data/lib/fathom/archive/scratch.rb +45 -0
- data/lib/fathom/basic_node.rb +8 -0
- data/lib/fathom/causal_graph.rb +12 -0
- data/lib/fathom/combined_plausibilities.rb +12 -0
- data/lib/fathom/concept.rb +83 -0
- data/lib/fathom/data_node.rb +51 -0
- data/lib/fathom/import.rb +68 -0
- data/lib/fathom/import/csv_import.rb +60 -0
- data/lib/fathom/import/yaml_import.rb +53 -0
- data/lib/fathom/inverter.rb +21 -0
- data/lib/fathom/knowledge_base.rb +23 -0
- data/lib/fathom/monte_carlo_set.rb +76 -0
- data/lib/fathom/node_utilities.rb +8 -0
- data/lib/fathom/plausible_range.rb +82 -0
- data/lib/fathom/value_aggregator.rb +11 -0
- data/lib/fathom/value_description.rb +79 -0
- data/lib/fathom/value_multiplier.rb +18 -0
- data/lib/options_hash.rb +186 -0
- data/spec/fathom/data_node_spec.rb +61 -0
- data/spec/fathom/import/csv_import_spec.rb +36 -0
- data/spec/fathom/import/yaml_import_spec.rb +40 -0
- data/spec/fathom/import_spec.rb +22 -0
- data/spec/fathom/knowledge_base_spec.rb +16 -0
- data/spec/fathom/monte_carlo_set_spec.rb +58 -0
- data/spec/fathom/plausible_range_spec.rb +130 -0
- data/spec/fathom/value_description_spec.rb +70 -0
- data/spec/fathom_spec.rb +8 -0
- data/spec/spec_helper.rb +13 -0
- data/spec/support/demo.yml +17 -0
- metadata +135 -0
@@ -0,0 +1,198 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
=begin
|
4
|
+
Some noodling about what a node might contain in order to describe the joint probabilities.
|
5
|
+
=end
|
6
|
+
class Node
|
7
|
+
|
8
|
+
attr_reader :variable, :parents
|
9
|
+
def initialize(variable, *parents)
|
10
|
+
@variable = Variable.infer(variable)
|
11
|
+
raise ArgumentError, "A valid variable cannot be implied from #{variable}" unless @variable
|
12
|
+
@parents = parents
|
13
|
+
end
|
14
|
+
|
15
|
+
def name
|
16
|
+
self.variable.name
|
17
|
+
end
|
18
|
+
|
19
|
+
def inspect
|
20
|
+
"Node: #{self.name} #{ self.parents.map{|p| p.name}.inspect }"
|
21
|
+
end
|
22
|
+
|
23
|
+
class << self
|
24
|
+
def infer(obj, *parents)
|
25
|
+
return obj if obj.is_a?(Node)
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
class Variable
|
32
|
+
|
33
|
+
attr_reader :values, :name, :observations, :total
|
34
|
+
|
35
|
+
def initialize(name, *values)
|
36
|
+
values = [true, false] if values.empty?
|
37
|
+
@name = name
|
38
|
+
@values = values
|
39
|
+
@observations = Array.new(@values.size, 0)
|
40
|
+
@total = 0
|
41
|
+
end
|
42
|
+
|
43
|
+
# You can observe anything but nothing: we record any observation but nil.
|
44
|
+
# If nil is set, we use the first value as the default.
|
45
|
+
def observe(value=nil)
|
46
|
+
value = self.values.first if value.nil?
|
47
|
+
unless self.values.include?(value)
|
48
|
+
self.values << value
|
49
|
+
self.observations << 0
|
50
|
+
end
|
51
|
+
index = self.values.index(value)
|
52
|
+
self.observations[index] += 1
|
53
|
+
@total += 1
|
54
|
+
end
|
55
|
+
|
56
|
+
# Lookup observations
|
57
|
+
def observed(value)
|
58
|
+
index = self.values.index(value)
|
59
|
+
return 0 unless index
|
60
|
+
self.observations[index]
|
61
|
+
end
|
62
|
+
|
63
|
+
def inspect
|
64
|
+
"Variable: #{self.name} #{self.values.inspect}"
|
65
|
+
end
|
66
|
+
|
67
|
+
class << self
|
68
|
+
def infer(obj, *values)
|
69
|
+
return obj if obj.is_a?(Variable)
|
70
|
+
case obj
|
71
|
+
when Symbol
|
72
|
+
Variable.new(obj, *values)
|
73
|
+
when String
|
74
|
+
Variable.new(obj.to_sym, *values)
|
75
|
+
else
|
76
|
+
nil
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
require 'rubygems'
|
84
|
+
require 'spec'
|
85
|
+
|
86
|
+
describe Variable do
|
87
|
+
|
88
|
+
before do
|
89
|
+
@v = Variable.new(:v1)
|
90
|
+
end
|
91
|
+
|
92
|
+
it "should require a name" do
|
93
|
+
lambda{Variable.new}.should raise_error(ArgumentError)
|
94
|
+
lambda{@v = Variable.new(:name)}.should_not raise_error
|
95
|
+
@v.name.should eql(:name)
|
96
|
+
end
|
97
|
+
|
98
|
+
it "should default to true and false as parameter values" do
|
99
|
+
v = Variable.new(:v)
|
100
|
+
v.values.should eql([true, false])
|
101
|
+
end
|
102
|
+
|
103
|
+
it "should be able to take a variables parameters" do
|
104
|
+
v = Variable.new :v, :red, :blue, :green
|
105
|
+
v.values.should eql([:red, :blue, :green])
|
106
|
+
end
|
107
|
+
|
108
|
+
it "should be able to infer a variable from a variable" do
|
109
|
+
v = Variable.new(:v)
|
110
|
+
Variable.infer(v).should eql(v)
|
111
|
+
end
|
112
|
+
|
113
|
+
it "should be able to infer a variable from a symbol" do
|
114
|
+
v = Variable.infer(:v)
|
115
|
+
v.should be_a(Variable)
|
116
|
+
v.name.should eql(:v)
|
117
|
+
end
|
118
|
+
|
119
|
+
it "should be able to infer a variable from a string" do
|
120
|
+
v = Variable.infer('v')
|
121
|
+
v.should be_a(Variable)
|
122
|
+
v.name.should eql(:v)
|
123
|
+
end
|
124
|
+
|
125
|
+
it "should be able to infer values from a list" do
|
126
|
+
v = Variable.infer :v, 1, 2
|
127
|
+
v.values.should eql([1,2])
|
128
|
+
end
|
129
|
+
|
130
|
+
it "should start with zero observations" do
|
131
|
+
@v.total.should eql(0)
|
132
|
+
end
|
133
|
+
|
134
|
+
it "should increment observations" do
|
135
|
+
@v.observe
|
136
|
+
@v.total.should eql(1)
|
137
|
+
@v.observe
|
138
|
+
@v.total.should eql(2)
|
139
|
+
end
|
140
|
+
|
141
|
+
it "should record observations" do
|
142
|
+
@v.observe(true)
|
143
|
+
@v.total.should eql(1)
|
144
|
+
@v.observed(true).should eql(1)
|
145
|
+
@v.observed(false).should eql(0)
|
146
|
+
@v.observe(false)
|
147
|
+
@v.total.should eql(2)
|
148
|
+
@v.observed(true).should eql(1)
|
149
|
+
@v.observed(false).should eql(1)
|
150
|
+
end
|
151
|
+
|
152
|
+
end
|
153
|
+
|
154
|
+
describe Node do
|
155
|
+
|
156
|
+
before do
|
157
|
+
@season = Variable.new(:season, :spring, :summer, :fall, :winter)
|
158
|
+
@x1 = Node.new(@season)
|
159
|
+
@x2 = Node.new(:rain, @x1)
|
160
|
+
@x3 = Node.new(:sprinkler, @x1)
|
161
|
+
@x4 = Node.new(:wet, @x3, @x2)
|
162
|
+
@x5 = Node.new(:slippery, @x4)
|
163
|
+
end
|
164
|
+
|
165
|
+
it "should infer a variable for the node" do
|
166
|
+
v = Variable.new(:v)
|
167
|
+
n = Node.new(v)
|
168
|
+
n.variable.should eql(v)
|
169
|
+
|
170
|
+
n = Node.new(:v)
|
171
|
+
v = n.variable
|
172
|
+
v.should be_a(Variable)
|
173
|
+
v.name.should eql(:v)
|
174
|
+
end
|
175
|
+
|
176
|
+
it "should raise an error when it cannot infer a variable for the node" do
|
177
|
+
lambda{Node.new(1)}.should raise_error(ArgumentError, /A valid variable cannot be implied from/)
|
178
|
+
end
|
179
|
+
|
180
|
+
it "should be able to create a node with parents" do
|
181
|
+
@x1.parents.should be_empty
|
182
|
+
@x2.parents.should eql([@x1])
|
183
|
+
@x3.parents.should eql([@x1])
|
184
|
+
@x4.parents.should eql([@x3, @x2])
|
185
|
+
@x5.parents.should eql([@x4])
|
186
|
+
end
|
187
|
+
|
188
|
+
# it "should be able to infer a node" do
|
189
|
+
# n = Node.infer(:v1, :v2)
|
190
|
+
# n.name.should eql(:v1)
|
191
|
+
# n.variable.name.should eql(:v1)
|
192
|
+
# n.variable.should be_a(Variable)
|
193
|
+
# n.parents.size.should eql(1)
|
194
|
+
# p = n.parents.first
|
195
|
+
# p.name.should eql(:v2)
|
196
|
+
# p.should be_a(Variable)
|
197
|
+
# end
|
198
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
class Array
|
2
|
+
def expand(array)
|
3
|
+
return array.map{|e| [e]} if self.empty?
|
4
|
+
array.inject([]) do |list, other_e|
|
5
|
+
self.each do |e|
|
6
|
+
list << [e,other_e].flatten
|
7
|
+
end
|
8
|
+
list
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class Variable
|
14
|
+
|
15
|
+
attr_reader :values, :name, :observations, :total
|
16
|
+
|
17
|
+
def initialize(name, *values)
|
18
|
+
values = [true, false] if values.empty?
|
19
|
+
@name = name
|
20
|
+
@values = values
|
21
|
+
@observations = Array.new(@values.size, 0)
|
22
|
+
@total = 0
|
23
|
+
end
|
24
|
+
|
25
|
+
# You can observe anything but nothing: we record any observation but nil.
|
26
|
+
# If nil is set, we use the first value as the default.
|
27
|
+
def observe(value=nil)
|
28
|
+
value = self.values.first if value.nil?
|
29
|
+
unless self.values.include?(value)
|
30
|
+
self.values << value
|
31
|
+
self.observations << 0
|
32
|
+
end
|
33
|
+
index = self.values.index(value)
|
34
|
+
self.observations[index] += 1
|
35
|
+
@total += 1
|
36
|
+
end
|
37
|
+
|
38
|
+
# Lookup observations
|
39
|
+
def observed(value)
|
40
|
+
index = self.values.index(value)
|
41
|
+
return 0 unless index
|
42
|
+
self.observations[index]
|
43
|
+
end
|
44
|
+
|
45
|
+
def inspect
|
46
|
+
"Variable: #{self.name} #{self.values.inspect}"
|
47
|
+
end
|
48
|
+
|
49
|
+
class << self
|
50
|
+
def infer(obj, *values)
|
51
|
+
return obj if obj.is_a?(Variable)
|
52
|
+
case obj
|
53
|
+
when Symbol
|
54
|
+
Variable.new(obj, *values)
|
55
|
+
when String
|
56
|
+
Variable.new(obj.to_sym, *values)
|
57
|
+
else
|
58
|
+
nil
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
class Table
|
66
|
+
|
67
|
+
attr_reader :columns, :variables, :legend
|
68
|
+
def initialize(*variables)
|
69
|
+
@columns = variables.map {|v| v.name}
|
70
|
+
@variables = variables
|
71
|
+
@legend = @variables.inject([]) do |list, e|
|
72
|
+
list = list.expand(e)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
protected
|
77
|
+
def lookup()
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
class Node
|
82
|
+
def initialize(name, *parents)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Take an array, array of arrays, dictionary, hash, or OpenStruct.
|
86
|
+
# Anything but an array can add a new parent to observe.
|
87
|
+
def observe(values)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
require 'rubygems'
|
92
|
+
require 'spec'
|
93
|
+
|
94
|
+
=begin
|
95
|
+
variables = [[:spring, :summer, :fall, :winter], [:true, :false], [:true, :false], [:true, :false]]
|
96
|
+
|
97
|
+
@all = []
|
98
|
+
(0...variables.size).each do |i|
|
99
|
+
@all << variables.inject([]) do |list, v|
|
100
|
+
list << (0...v.size).map do |j|
|
101
|
+
variables[i][j]
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
@all
|
107
|
+
|
108
|
+
|
109
|
+
v1 = [:spring, :summer, :fall, :winter]
|
110
|
+
v2 = [:true, :false]
|
111
|
+
a = [v1, v2]
|
112
|
+
b = []
|
113
|
+
a.each do |e|
|
114
|
+
b << e
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
=end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'gsl'
|
3
|
+
|
4
|
+
include GSL
|
5
|
+
|
6
|
+
class Node
|
7
|
+
|
8
|
+
attr_reader :name, :labels, :probabilities, :likelihood
|
9
|
+
|
10
|
+
def initialize(*args)
|
11
|
+
@name = args.shift
|
12
|
+
raise ArgumentError, "Must provide a node name" unless self.name
|
13
|
+
if args.empty?
|
14
|
+
extract_from_array([:true, :false])
|
15
|
+
elsif args.length == 1 and args.first.is_a?(Hash)
|
16
|
+
extract_from_hash(args.first)
|
17
|
+
elsif args.length == 1 and args.first.is_a?(Array)
|
18
|
+
extract_from_array(args.first)
|
19
|
+
else
|
20
|
+
extract_from_array(args)
|
21
|
+
end
|
22
|
+
assert_likelihood
|
23
|
+
end
|
24
|
+
|
25
|
+
def inspect
|
26
|
+
"Node: #{self.labels.inspect} #{self.probabilities.inspect}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def belief
|
30
|
+
probabilities * likelihood
|
31
|
+
end
|
32
|
+
|
33
|
+
alias :b :belief
|
34
|
+
alias :l :likelihood
|
35
|
+
alias :p :probabilities
|
36
|
+
|
37
|
+
protected
|
38
|
+
|
39
|
+
def assert_likelihood
|
40
|
+
@likelihood = Vector.ary_to_gv(Array.new(@probabilities.size, 1))
|
41
|
+
end
|
42
|
+
|
43
|
+
def extract_from_array(array)
|
44
|
+
@labels = array
|
45
|
+
@probabilities = Vector.ary_to_gv(uniform_distribution(array.size))
|
46
|
+
end
|
47
|
+
|
48
|
+
def uniform_distribution(n)
|
49
|
+
Array.new(n, 1/n.to_f)
|
50
|
+
end
|
51
|
+
|
52
|
+
def extract_from_hash(hash)
|
53
|
+
@labels, probabilities = [], []
|
54
|
+
hash.each do |k, v|
|
55
|
+
@labels << k
|
56
|
+
probabilities << v
|
57
|
+
end
|
58
|
+
@probabilities = Vector.ary_to_gv(probabilities)
|
59
|
+
normalize_probabilities!
|
60
|
+
end
|
61
|
+
|
62
|
+
# I don't like GSL::Vector#normalize!, it's not accurate, or has a different
|
63
|
+
# idea of what a normalized vector looks like.
|
64
|
+
def normalize_probabilities!
|
65
|
+
sum = 0.0
|
66
|
+
@probabilities.each do |value|
|
67
|
+
sum += value
|
68
|
+
end
|
69
|
+
@probabilities.map! do |value|
|
70
|
+
value.to_f / sum
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
@@ -0,0 +1,136 @@
|
|
1
|
+
=begin
|
2
|
+
I want to noodle around a bit with basic probabilities, odds, that sort of thing. Here's the example:
|
3
|
+
|
4
|
+
Upon being awakened by the sound of a burglar alary, what is your degree of belief that a burglary attempt took place?
|
5
|
+
|
6
|
+
Supporting information:
|
7
|
+
|
8
|
+
* There is a 95% chance that an attempted burglary will trigger the alarm system, P(alarm|burglary) = 95%
|
9
|
+
* There is a 1% chance that the alarms will be triggered by non-burglary attempts, p(alarm|no burglary) = 1%
|
10
|
+
* There is a 1/10_000 chance of a particular home being burglarized, generally, P(burglary) = 10^-4
|
11
|
+
|
12
|
+
O(burglary|alarm) = L(alarm|burglary)O(burglary)
|
13
|
+
|
14
|
+
P(burglary|alarm) = O(burglary|alarm) / 1 + O(burglary|alarm)
|
15
|
+
|
16
|
+
=end
|
17
|
+
|
18
|
+
require 'mathn'
|
19
|
+
|
20
|
+
def likelihood(effect, cause)
|
21
|
+
effect / cause
|
22
|
+
end
|
23
|
+
alias :l :likelihood
|
24
|
+
|
25
|
+
def odds(effect, cause=nil)
|
26
|
+
cause ? ( likelihood(cause, effect) * odds(effect) ) : (effect / (1 - effect))
|
27
|
+
end
|
28
|
+
alias :o :odds
|
29
|
+
|
30
|
+
# The same as the odds above, but only in the case of a single hypothesis
|
31
|
+
def prior_odds(e)
|
32
|
+
e / ( 1 - e )
|
33
|
+
end
|
34
|
+
|
35
|
+
def likelihood_ratio(e, h)
|
36
|
+
p(e,h) / p(e, 1 - h)
|
37
|
+
end
|
38
|
+
|
39
|
+
def probability(e, h)
|
40
|
+
(e * h) / h
|
41
|
+
end
|
42
|
+
|
43
|
+
def product_rule(*e)
|
44
|
+
end
|
45
|
+
alias :p :probability
|
46
|
+
|
47
|
+
class Array
|
48
|
+
|
49
|
+
def givens
|
50
|
+
@givens ||= {}
|
51
|
+
end
|
52
|
+
|
53
|
+
def give(sym, array=Array.new(self.size, 1/self.size))
|
54
|
+
self.givens[sym] = array
|
55
|
+
end
|
56
|
+
|
57
|
+
def given(sym, index)
|
58
|
+
self.givens[sym][index]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
=begin
|
63
|
+
Since Rational is the automatic choice for probabilistic data,
|
64
|
+
and since I don't want to override how mathn infers numbers,
|
65
|
+
I am adding some baggage to Rational:
|
66
|
+
|
67
|
+
* It still reduces to the LCD
|
68
|
+
* It keeps track of all events,
|
69
|
+
so that I can keep a new event proportional to old ones
|
70
|
+
* It has an add_event (add) which takes a true or false value
|
71
|
+
true values, records that a condition was found
|
72
|
+
|
73
|
+
This really only works for binary data, but this is a noodle file.
|
74
|
+
|
75
|
+
=end
|
76
|
+
class Rational < Numeric
|
77
|
+
class << self
|
78
|
+
alias :orig_reduce :reduce
|
79
|
+
def reduce(num, den=1)
|
80
|
+
val = orig_reduce(num, den)
|
81
|
+
val.total_events = den
|
82
|
+
val.positive_events = num
|
83
|
+
val
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def total_events(val=nil)
|
88
|
+
@total_events ||= 0
|
89
|
+
@total_events = val if val
|
90
|
+
@total_events
|
91
|
+
end
|
92
|
+
alias :events :total_events
|
93
|
+
alias :total :total_events
|
94
|
+
|
95
|
+
def total_events=(val)
|
96
|
+
total_events(val)
|
97
|
+
end
|
98
|
+
|
99
|
+
def positive_events(val=nil)
|
100
|
+
@positive_events ||= 0
|
101
|
+
@positive_events = val if val
|
102
|
+
@positive_events
|
103
|
+
end
|
104
|
+
alias :positive :positive_events
|
105
|
+
|
106
|
+
def positive_events=(val)
|
107
|
+
positive_events(val)
|
108
|
+
end
|
109
|
+
|
110
|
+
# Uses the Rational constructor to calculate the lowest common denominator
|
111
|
+
def add_event(positive=true)
|
112
|
+
num = positive ? self.positive_events + 1 : self.positive_events
|
113
|
+
den = self.total_events + 1
|
114
|
+
other = Rational(num, den)
|
115
|
+
@numerator, @denominator, @positive_events, @total_events = other.numerator, other.denominator, num, den
|
116
|
+
self
|
117
|
+
end
|
118
|
+
alias :add :add_event
|
119
|
+
end
|
120
|
+
|
121
|
+
|
122
|
+
class A
|
123
|
+
class << self
|
124
|
+
include GSL
|
125
|
+
|
126
|
+
def r
|
127
|
+
@r ||= Rng.alloc
|
128
|
+
end
|
129
|
+
|
130
|
+
def pdf
|
131
|
+
r = self.r.gaussian
|
132
|
+
pdf = Ran.gaussian_pdf(r)
|
133
|
+
puts r, pdf
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|