fathom 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.bundle/config +2 -0
- data/.document +5 -0
- data/.gitignore +5 -0
- data/.rspec +1 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +30 -0
- data/LICENSE +20 -0
- data/README.md +176 -0
- data/Rakefile +50 -0
- data/VERSION +1 -0
- data/autotest/discover.rb +1 -0
- data/lib/fathom.rb +68 -0
- data/lib/fathom/archive/conditional_probability_matrix.rb +116 -0
- data/lib/fathom/archive/n2.rb +198 -0
- data/lib/fathom/archive/n3.rb +119 -0
- data/lib/fathom/archive/node.rb +74 -0
- data/lib/fathom/archive/noodle.rb +136 -0
- data/lib/fathom/archive/scratch.rb +45 -0
- data/lib/fathom/basic_node.rb +8 -0
- data/lib/fathom/causal_graph.rb +12 -0
- data/lib/fathom/combined_plausibilities.rb +12 -0
- data/lib/fathom/concept.rb +83 -0
- data/lib/fathom/data_node.rb +51 -0
- data/lib/fathom/import.rb +68 -0
- data/lib/fathom/import/csv_import.rb +60 -0
- data/lib/fathom/import/yaml_import.rb +53 -0
- data/lib/fathom/inverter.rb +21 -0
- data/lib/fathom/knowledge_base.rb +23 -0
- data/lib/fathom/monte_carlo_set.rb +76 -0
- data/lib/fathom/node_utilities.rb +8 -0
- data/lib/fathom/plausible_range.rb +82 -0
- data/lib/fathom/value_aggregator.rb +11 -0
- data/lib/fathom/value_description.rb +79 -0
- data/lib/fathom/value_multiplier.rb +18 -0
- data/lib/options_hash.rb +186 -0
- data/spec/fathom/data_node_spec.rb +61 -0
- data/spec/fathom/import/csv_import_spec.rb +36 -0
- data/spec/fathom/import/yaml_import_spec.rb +40 -0
- data/spec/fathom/import_spec.rb +22 -0
- data/spec/fathom/knowledge_base_spec.rb +16 -0
- data/spec/fathom/monte_carlo_set_spec.rb +58 -0
- data/spec/fathom/plausible_range_spec.rb +130 -0
- data/spec/fathom/value_description_spec.rb +70 -0
- data/spec/fathom_spec.rb +8 -0
- data/spec/spec_helper.rb +13 -0
- data/spec/support/demo.yml +17 -0
- metadata +135 -0
@@ -0,0 +1,198 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
=begin
|
4
|
+
Some noodling about what a node might contain in order to describe the joint probabilities.
|
5
|
+
=end
|
6
|
+
class Node
|
7
|
+
|
8
|
+
attr_reader :variable, :parents
|
9
|
+
def initialize(variable, *parents)
|
10
|
+
@variable = Variable.infer(variable)
|
11
|
+
raise ArgumentError, "A valid variable cannot be implied from #{variable}" unless @variable
|
12
|
+
@parents = parents
|
13
|
+
end
|
14
|
+
|
15
|
+
def name
|
16
|
+
self.variable.name
|
17
|
+
end
|
18
|
+
|
19
|
+
def inspect
|
20
|
+
"Node: #{self.name} #{ self.parents.map{|p| p.name}.inspect }"
|
21
|
+
end
|
22
|
+
|
23
|
+
class << self
|
24
|
+
def infer(obj, *parents)
|
25
|
+
return obj if obj.is_a?(Node)
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
class Variable
|
32
|
+
|
33
|
+
attr_reader :values, :name, :observations, :total
|
34
|
+
|
35
|
+
def initialize(name, *values)
|
36
|
+
values = [true, false] if values.empty?
|
37
|
+
@name = name
|
38
|
+
@values = values
|
39
|
+
@observations = Array.new(@values.size, 0)
|
40
|
+
@total = 0
|
41
|
+
end
|
42
|
+
|
43
|
+
# You can observe anything but nothing: we record any observation but nil.
|
44
|
+
# If nil is set, we use the first value as the default.
|
45
|
+
def observe(value=nil)
|
46
|
+
value = self.values.first if value.nil?
|
47
|
+
unless self.values.include?(value)
|
48
|
+
self.values << value
|
49
|
+
self.observations << 0
|
50
|
+
end
|
51
|
+
index = self.values.index(value)
|
52
|
+
self.observations[index] += 1
|
53
|
+
@total += 1
|
54
|
+
end
|
55
|
+
|
56
|
+
# Lookup observations
|
57
|
+
def observed(value)
|
58
|
+
index = self.values.index(value)
|
59
|
+
return 0 unless index
|
60
|
+
self.observations[index]
|
61
|
+
end
|
62
|
+
|
63
|
+
def inspect
|
64
|
+
"Variable: #{self.name} #{self.values.inspect}"
|
65
|
+
end
|
66
|
+
|
67
|
+
class << self
|
68
|
+
def infer(obj, *values)
|
69
|
+
return obj if obj.is_a?(Variable)
|
70
|
+
case obj
|
71
|
+
when Symbol
|
72
|
+
Variable.new(obj, *values)
|
73
|
+
when String
|
74
|
+
Variable.new(obj.to_sym, *values)
|
75
|
+
else
|
76
|
+
nil
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
require 'rubygems'
|
84
|
+
require 'spec'
|
85
|
+
|
86
|
+
describe Variable do
|
87
|
+
|
88
|
+
before do
|
89
|
+
@v = Variable.new(:v1)
|
90
|
+
end
|
91
|
+
|
92
|
+
it "should require a name" do
|
93
|
+
lambda{Variable.new}.should raise_error(ArgumentError)
|
94
|
+
lambda{@v = Variable.new(:name)}.should_not raise_error
|
95
|
+
@v.name.should eql(:name)
|
96
|
+
end
|
97
|
+
|
98
|
+
it "should default to true and false as parameter values" do
|
99
|
+
v = Variable.new(:v)
|
100
|
+
v.values.should eql([true, false])
|
101
|
+
end
|
102
|
+
|
103
|
+
it "should be able to take a variables parameters" do
|
104
|
+
v = Variable.new :v, :red, :blue, :green
|
105
|
+
v.values.should eql([:red, :blue, :green])
|
106
|
+
end
|
107
|
+
|
108
|
+
it "should be able to infer a variable from a variable" do
|
109
|
+
v = Variable.new(:v)
|
110
|
+
Variable.infer(v).should eql(v)
|
111
|
+
end
|
112
|
+
|
113
|
+
it "should be able to infer a variable from a symbol" do
|
114
|
+
v = Variable.infer(:v)
|
115
|
+
v.should be_a(Variable)
|
116
|
+
v.name.should eql(:v)
|
117
|
+
end
|
118
|
+
|
119
|
+
it "should be able to infer a variable from a string" do
|
120
|
+
v = Variable.infer('v')
|
121
|
+
v.should be_a(Variable)
|
122
|
+
v.name.should eql(:v)
|
123
|
+
end
|
124
|
+
|
125
|
+
it "should be able to infer values from a list" do
|
126
|
+
v = Variable.infer :v, 1, 2
|
127
|
+
v.values.should eql([1,2])
|
128
|
+
end
|
129
|
+
|
130
|
+
it "should start with zero observations" do
|
131
|
+
@v.total.should eql(0)
|
132
|
+
end
|
133
|
+
|
134
|
+
it "should increment observations" do
|
135
|
+
@v.observe
|
136
|
+
@v.total.should eql(1)
|
137
|
+
@v.observe
|
138
|
+
@v.total.should eql(2)
|
139
|
+
end
|
140
|
+
|
141
|
+
it "should record observations" do
|
142
|
+
@v.observe(true)
|
143
|
+
@v.total.should eql(1)
|
144
|
+
@v.observed(true).should eql(1)
|
145
|
+
@v.observed(false).should eql(0)
|
146
|
+
@v.observe(false)
|
147
|
+
@v.total.should eql(2)
|
148
|
+
@v.observed(true).should eql(1)
|
149
|
+
@v.observed(false).should eql(1)
|
150
|
+
end
|
151
|
+
|
152
|
+
end
|
153
|
+
|
154
|
+
describe Node do
|
155
|
+
|
156
|
+
before do
|
157
|
+
@season = Variable.new(:season, :spring, :summer, :fall, :winter)
|
158
|
+
@x1 = Node.new(@season)
|
159
|
+
@x2 = Node.new(:rain, @x1)
|
160
|
+
@x3 = Node.new(:sprinkler, @x1)
|
161
|
+
@x4 = Node.new(:wet, @x3, @x2)
|
162
|
+
@x5 = Node.new(:slippery, @x4)
|
163
|
+
end
|
164
|
+
|
165
|
+
it "should infer a variable for the node" do
|
166
|
+
v = Variable.new(:v)
|
167
|
+
n = Node.new(v)
|
168
|
+
n.variable.should eql(v)
|
169
|
+
|
170
|
+
n = Node.new(:v)
|
171
|
+
v = n.variable
|
172
|
+
v.should be_a(Variable)
|
173
|
+
v.name.should eql(:v)
|
174
|
+
end
|
175
|
+
|
176
|
+
it "should raise an error when it cannot infer a variable for the node" do
|
177
|
+
lambda{Node.new(1)}.should raise_error(ArgumentError, /A valid variable cannot be implied from/)
|
178
|
+
end
|
179
|
+
|
180
|
+
it "should be able to create a node with parents" do
|
181
|
+
@x1.parents.should be_empty
|
182
|
+
@x2.parents.should eql([@x1])
|
183
|
+
@x3.parents.should eql([@x1])
|
184
|
+
@x4.parents.should eql([@x3, @x2])
|
185
|
+
@x5.parents.should eql([@x4])
|
186
|
+
end
|
187
|
+
|
188
|
+
# it "should be able to infer a node" do
|
189
|
+
# n = Node.infer(:v1, :v2)
|
190
|
+
# n.name.should eql(:v1)
|
191
|
+
# n.variable.name.should eql(:v1)
|
192
|
+
# n.variable.should be_a(Variable)
|
193
|
+
# n.parents.size.should eql(1)
|
194
|
+
# p = n.parents.first
|
195
|
+
# p.name.should eql(:v2)
|
196
|
+
# p.should be_a(Variable)
|
197
|
+
# end
|
198
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
class Array
|
2
|
+
def expand(array)
|
3
|
+
return array.map{|e| [e]} if self.empty?
|
4
|
+
array.inject([]) do |list, other_e|
|
5
|
+
self.each do |e|
|
6
|
+
list << [e,other_e].flatten
|
7
|
+
end
|
8
|
+
list
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class Variable
|
14
|
+
|
15
|
+
attr_reader :values, :name, :observations, :total
|
16
|
+
|
17
|
+
def initialize(name, *values)
|
18
|
+
values = [true, false] if values.empty?
|
19
|
+
@name = name
|
20
|
+
@values = values
|
21
|
+
@observations = Array.new(@values.size, 0)
|
22
|
+
@total = 0
|
23
|
+
end
|
24
|
+
|
25
|
+
# You can observe anything but nothing: we record any observation but nil.
|
26
|
+
# If nil is set, we use the first value as the default.
|
27
|
+
def observe(value=nil)
|
28
|
+
value = self.values.first if value.nil?
|
29
|
+
unless self.values.include?(value)
|
30
|
+
self.values << value
|
31
|
+
self.observations << 0
|
32
|
+
end
|
33
|
+
index = self.values.index(value)
|
34
|
+
self.observations[index] += 1
|
35
|
+
@total += 1
|
36
|
+
end
|
37
|
+
|
38
|
+
# Lookup observations
|
39
|
+
def observed(value)
|
40
|
+
index = self.values.index(value)
|
41
|
+
return 0 unless index
|
42
|
+
self.observations[index]
|
43
|
+
end
|
44
|
+
|
45
|
+
def inspect
|
46
|
+
"Variable: #{self.name} #{self.values.inspect}"
|
47
|
+
end
|
48
|
+
|
49
|
+
class << self
|
50
|
+
def infer(obj, *values)
|
51
|
+
return obj if obj.is_a?(Variable)
|
52
|
+
case obj
|
53
|
+
when Symbol
|
54
|
+
Variable.new(obj, *values)
|
55
|
+
when String
|
56
|
+
Variable.new(obj.to_sym, *values)
|
57
|
+
else
|
58
|
+
nil
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
class Table
|
66
|
+
|
67
|
+
attr_reader :columns, :variables, :legend
|
68
|
+
def initialize(*variables)
|
69
|
+
@columns = variables.map {|v| v.name}
|
70
|
+
@variables = variables
|
71
|
+
@legend = @variables.inject([]) do |list, e|
|
72
|
+
list = list.expand(e)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
protected
|
77
|
+
def lookup()
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
class Node
|
82
|
+
def initialize(name, *parents)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Take an array, array of arrays, dictionary, hash, or OpenStruct.
|
86
|
+
# Anything but an array can add a new parent to observe.
|
87
|
+
def observe(values)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
require 'rubygems'
|
92
|
+
require 'spec'
|
93
|
+
|
94
|
+
=begin
|
95
|
+
variables = [[:spring, :summer, :fall, :winter], [:true, :false], [:true, :false], [:true, :false]]
|
96
|
+
|
97
|
+
@all = []
|
98
|
+
(0...variables.size).each do |i|
|
99
|
+
@all << variables.inject([]) do |list, v|
|
100
|
+
list << (0...v.size).map do |j|
|
101
|
+
variables[i][j]
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
@all
|
107
|
+
|
108
|
+
|
109
|
+
v1 = [:spring, :summer, :fall, :winter]
|
110
|
+
v2 = [:true, :false]
|
111
|
+
a = [v1, v2]
|
112
|
+
b = []
|
113
|
+
a.each do |e|
|
114
|
+
b << e
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
=end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'gsl'
|
3
|
+
|
4
|
+
include GSL
|
5
|
+
|
6
|
+
class Node
|
7
|
+
|
8
|
+
attr_reader :name, :labels, :probabilities, :likelihood
|
9
|
+
|
10
|
+
def initialize(*args)
|
11
|
+
@name = args.shift
|
12
|
+
raise ArgumentError, "Must provide a node name" unless self.name
|
13
|
+
if args.empty?
|
14
|
+
extract_from_array([:true, :false])
|
15
|
+
elsif args.length == 1 and args.first.is_a?(Hash)
|
16
|
+
extract_from_hash(args.first)
|
17
|
+
elsif args.length == 1 and args.first.is_a?(Array)
|
18
|
+
extract_from_array(args.first)
|
19
|
+
else
|
20
|
+
extract_from_array(args)
|
21
|
+
end
|
22
|
+
assert_likelihood
|
23
|
+
end
|
24
|
+
|
25
|
+
def inspect
|
26
|
+
"Node: #{self.labels.inspect} #{self.probabilities.inspect}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def belief
|
30
|
+
probabilities * likelihood
|
31
|
+
end
|
32
|
+
|
33
|
+
alias :b :belief
|
34
|
+
alias :l :likelihood
|
35
|
+
alias :p :probabilities
|
36
|
+
|
37
|
+
protected
|
38
|
+
|
39
|
+
def assert_likelihood
|
40
|
+
@likelihood = Vector.ary_to_gv(Array.new(@probabilities.size, 1))
|
41
|
+
end
|
42
|
+
|
43
|
+
def extract_from_array(array)
|
44
|
+
@labels = array
|
45
|
+
@probabilities = Vector.ary_to_gv(uniform_distribution(array.size))
|
46
|
+
end
|
47
|
+
|
48
|
+
def uniform_distribution(n)
|
49
|
+
Array.new(n, 1/n.to_f)
|
50
|
+
end
|
51
|
+
|
52
|
+
def extract_from_hash(hash)
|
53
|
+
@labels, probabilities = [], []
|
54
|
+
hash.each do |k, v|
|
55
|
+
@labels << k
|
56
|
+
probabilities << v
|
57
|
+
end
|
58
|
+
@probabilities = Vector.ary_to_gv(probabilities)
|
59
|
+
normalize_probabilities!
|
60
|
+
end
|
61
|
+
|
62
|
+
# I don't like GSL::Vector#normalize!, it's not accurate, or has a different
|
63
|
+
# idea of what a normalized vector looks like.
|
64
|
+
def normalize_probabilities!
|
65
|
+
sum = 0.0
|
66
|
+
@probabilities.each do |value|
|
67
|
+
sum += value
|
68
|
+
end
|
69
|
+
@probabilities.map! do |value|
|
70
|
+
value.to_f / sum
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
@@ -0,0 +1,136 @@
|
|
1
|
+
=begin
|
2
|
+
I want to noodle around a bit with basic probabilities, odds, that sort of thing. Here's the example:
|
3
|
+
|
4
|
+
Upon being awakened by the sound of a burglar alary, what is your degree of belief that a burglary attempt took place?
|
5
|
+
|
6
|
+
Supporting information:
|
7
|
+
|
8
|
+
* There is a 95% chance that an attempted burglary will trigger the alarm system, P(alarm|burglary) = 95%
|
9
|
+
* There is a 1% chance that the alarms will be triggered by non-burglary attempts, p(alarm|no burglary) = 1%
|
10
|
+
* There is a 1/10_000 chance of a particular home being burglarized, generally, P(burglary) = 10^-4
|
11
|
+
|
12
|
+
O(burglary|alarm) = L(alarm|burglary)O(burglary)
|
13
|
+
|
14
|
+
P(burglary|alarm) = O(burglary|alarm) / 1 + O(burglary|alarm)
|
15
|
+
|
16
|
+
=end
|
17
|
+
|
18
|
+
require 'mathn'
|
19
|
+
|
20
|
+
def likelihood(effect, cause)
|
21
|
+
effect / cause
|
22
|
+
end
|
23
|
+
alias :l :likelihood
|
24
|
+
|
25
|
+
def odds(effect, cause=nil)
|
26
|
+
cause ? ( likelihood(cause, effect) * odds(effect) ) : (effect / (1 - effect))
|
27
|
+
end
|
28
|
+
alias :o :odds
|
29
|
+
|
30
|
+
# The same as the odds above, but only in the case of a single hypothesis
|
31
|
+
def prior_odds(e)
|
32
|
+
e / ( 1 - e )
|
33
|
+
end
|
34
|
+
|
35
|
+
def likelihood_ratio(e, h)
|
36
|
+
p(e,h) / p(e, 1 - h)
|
37
|
+
end
|
38
|
+
|
39
|
+
def probability(e, h)
|
40
|
+
(e * h) / h
|
41
|
+
end
|
42
|
+
|
43
|
+
def product_rule(*e)
|
44
|
+
end
|
45
|
+
alias :p :probability
|
46
|
+
|
47
|
+
class Array
|
48
|
+
|
49
|
+
def givens
|
50
|
+
@givens ||= {}
|
51
|
+
end
|
52
|
+
|
53
|
+
def give(sym, array=Array.new(self.size, 1/self.size))
|
54
|
+
self.givens[sym] = array
|
55
|
+
end
|
56
|
+
|
57
|
+
def given(sym, index)
|
58
|
+
self.givens[sym][index]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
=begin
|
63
|
+
Since Rational is the automatic choice for probabilistic data,
|
64
|
+
and since I don't want to override how mathn infers numbers,
|
65
|
+
I am adding some baggage to Rational:
|
66
|
+
|
67
|
+
* It still reduces to the LCD
|
68
|
+
* It keeps track of all events,
|
69
|
+
so that I can keep a new event proportional to old ones
|
70
|
+
* It has an add_event (add) which takes a true or false value
|
71
|
+
true values, records that a condition was found
|
72
|
+
|
73
|
+
This really only works for binary data, but this is a noodle file.
|
74
|
+
|
75
|
+
=end
|
76
|
+
class Rational < Numeric
|
77
|
+
class << self
|
78
|
+
alias :orig_reduce :reduce
|
79
|
+
def reduce(num, den=1)
|
80
|
+
val = orig_reduce(num, den)
|
81
|
+
val.total_events = den
|
82
|
+
val.positive_events = num
|
83
|
+
val
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def total_events(val=nil)
|
88
|
+
@total_events ||= 0
|
89
|
+
@total_events = val if val
|
90
|
+
@total_events
|
91
|
+
end
|
92
|
+
alias :events :total_events
|
93
|
+
alias :total :total_events
|
94
|
+
|
95
|
+
def total_events=(val)
|
96
|
+
total_events(val)
|
97
|
+
end
|
98
|
+
|
99
|
+
def positive_events(val=nil)
|
100
|
+
@positive_events ||= 0
|
101
|
+
@positive_events = val if val
|
102
|
+
@positive_events
|
103
|
+
end
|
104
|
+
alias :positive :positive_events
|
105
|
+
|
106
|
+
def positive_events=(val)
|
107
|
+
positive_events(val)
|
108
|
+
end
|
109
|
+
|
110
|
+
# Uses the Rational constructor to calculate the lowest common denominator
|
111
|
+
def add_event(positive=true)
|
112
|
+
num = positive ? self.positive_events + 1 : self.positive_events
|
113
|
+
den = self.total_events + 1
|
114
|
+
other = Rational(num, den)
|
115
|
+
@numerator, @denominator, @positive_events, @total_events = other.numerator, other.denominator, num, den
|
116
|
+
self
|
117
|
+
end
|
118
|
+
alias :add :add_event
|
119
|
+
end
|
120
|
+
|
121
|
+
|
122
|
+
class A
|
123
|
+
class << self
|
124
|
+
include GSL
|
125
|
+
|
126
|
+
def r
|
127
|
+
@r ||= Rng.alloc
|
128
|
+
end
|
129
|
+
|
130
|
+
def pdf
|
131
|
+
r = self.r.gaussian
|
132
|
+
pdf = Ran.gaussian_pdf(r)
|
133
|
+
puts r, pdf
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|