fathom 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/.bundle/config +2 -0
  2. data/.document +5 -0
  3. data/.gitignore +5 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +5 -0
  6. data/Gemfile.lock +30 -0
  7. data/LICENSE +20 -0
  8. data/README.md +176 -0
  9. data/Rakefile +50 -0
  10. data/VERSION +1 -0
  11. data/autotest/discover.rb +1 -0
  12. data/lib/fathom.rb +68 -0
  13. data/lib/fathom/archive/conditional_probability_matrix.rb +116 -0
  14. data/lib/fathom/archive/n2.rb +198 -0
  15. data/lib/fathom/archive/n3.rb +119 -0
  16. data/lib/fathom/archive/node.rb +74 -0
  17. data/lib/fathom/archive/noodle.rb +136 -0
  18. data/lib/fathom/archive/scratch.rb +45 -0
  19. data/lib/fathom/basic_node.rb +8 -0
  20. data/lib/fathom/causal_graph.rb +12 -0
  21. data/lib/fathom/combined_plausibilities.rb +12 -0
  22. data/lib/fathom/concept.rb +83 -0
  23. data/lib/fathom/data_node.rb +51 -0
  24. data/lib/fathom/import.rb +68 -0
  25. data/lib/fathom/import/csv_import.rb +60 -0
  26. data/lib/fathom/import/yaml_import.rb +53 -0
  27. data/lib/fathom/inverter.rb +21 -0
  28. data/lib/fathom/knowledge_base.rb +23 -0
  29. data/lib/fathom/monte_carlo_set.rb +76 -0
  30. data/lib/fathom/node_utilities.rb +8 -0
  31. data/lib/fathom/plausible_range.rb +82 -0
  32. data/lib/fathom/value_aggregator.rb +11 -0
  33. data/lib/fathom/value_description.rb +79 -0
  34. data/lib/fathom/value_multiplier.rb +18 -0
  35. data/lib/options_hash.rb +186 -0
  36. data/spec/fathom/data_node_spec.rb +61 -0
  37. data/spec/fathom/import/csv_import_spec.rb +36 -0
  38. data/spec/fathom/import/yaml_import_spec.rb +40 -0
  39. data/spec/fathom/import_spec.rb +22 -0
  40. data/spec/fathom/knowledge_base_spec.rb +16 -0
  41. data/spec/fathom/monte_carlo_set_spec.rb +58 -0
  42. data/spec/fathom/plausible_range_spec.rb +130 -0
  43. data/spec/fathom/value_description_spec.rb +70 -0
  44. data/spec/fathom_spec.rb +8 -0
  45. data/spec/spec_helper.rb +13 -0
  46. data/spec/support/demo.yml +17 -0
  47. metadata +135 -0
@@ -0,0 +1,198 @@
1
+
2
+
3
+ =begin
4
+ Some noodling about what a node might contain in order to describe the joint probabilities.
5
+ =end
6
+ class Node
7
+
8
+ attr_reader :variable, :parents
9
+ def initialize(variable, *parents)
10
+ @variable = Variable.infer(variable)
11
+ raise ArgumentError, "A valid variable cannot be implied from #{variable}" unless @variable
12
+ @parents = parents
13
+ end
14
+
15
+ def name
16
+ self.variable.name
17
+ end
18
+
19
+ def inspect
20
+ "Node: #{self.name} #{ self.parents.map{|p| p.name}.inspect }"
21
+ end
22
+
23
+ class << self
24
+ def infer(obj, *parents)
25
+ return obj if obj.is_a?(Node)
26
+
27
+ end
28
+ end
29
+ end
30
+
31
+ class Variable
32
+
33
+ attr_reader :values, :name, :observations, :total
34
+
35
+ def initialize(name, *values)
36
+ values = [true, false] if values.empty?
37
+ @name = name
38
+ @values = values
39
+ @observations = Array.new(@values.size, 0)
40
+ @total = 0
41
+ end
42
+
43
+ # You can observe anything but nothing: we record any observation but nil.
44
+ # If nil is set, we use the first value as the default.
45
+ def observe(value=nil)
46
+ value = self.values.first if value.nil?
47
+ unless self.values.include?(value)
48
+ self.values << value
49
+ self.observations << 0
50
+ end
51
+ index = self.values.index(value)
52
+ self.observations[index] += 1
53
+ @total += 1
54
+ end
55
+
56
+ # Lookup observations
57
+ def observed(value)
58
+ index = self.values.index(value)
59
+ return 0 unless index
60
+ self.observations[index]
61
+ end
62
+
63
+ def inspect
64
+ "Variable: #{self.name} #{self.values.inspect}"
65
+ end
66
+
67
+ class << self
68
+ def infer(obj, *values)
69
+ return obj if obj.is_a?(Variable)
70
+ case obj
71
+ when Symbol
72
+ Variable.new(obj, *values)
73
+ when String
74
+ Variable.new(obj.to_sym, *values)
75
+ else
76
+ nil
77
+ end
78
+ end
79
+
80
+ end
81
+ end
82
+
83
+ require 'rubygems'
84
+ require 'spec'
85
+
86
+ describe Variable do
87
+
88
+ before do
89
+ @v = Variable.new(:v1)
90
+ end
91
+
92
+ it "should require a name" do
93
+ lambda{Variable.new}.should raise_error(ArgumentError)
94
+ lambda{@v = Variable.new(:name)}.should_not raise_error
95
+ @v.name.should eql(:name)
96
+ end
97
+
98
+ it "should default to true and false as parameter values" do
99
+ v = Variable.new(:v)
100
+ v.values.should eql([true, false])
101
+ end
102
+
103
+ it "should be able to take a variables parameters" do
104
+ v = Variable.new :v, :red, :blue, :green
105
+ v.values.should eql([:red, :blue, :green])
106
+ end
107
+
108
+ it "should be able to infer a variable from a variable" do
109
+ v = Variable.new(:v)
110
+ Variable.infer(v).should eql(v)
111
+ end
112
+
113
+ it "should be able to infer a variable from a symbol" do
114
+ v = Variable.infer(:v)
115
+ v.should be_a(Variable)
116
+ v.name.should eql(:v)
117
+ end
118
+
119
+ it "should be able to infer a variable from a string" do
120
+ v = Variable.infer('v')
121
+ v.should be_a(Variable)
122
+ v.name.should eql(:v)
123
+ end
124
+
125
+ it "should be able to infer values from a list" do
126
+ v = Variable.infer :v, 1, 2
127
+ v.values.should eql([1,2])
128
+ end
129
+
130
+ it "should start with zero observations" do
131
+ @v.total.should eql(0)
132
+ end
133
+
134
+ it "should increment observations" do
135
+ @v.observe
136
+ @v.total.should eql(1)
137
+ @v.observe
138
+ @v.total.should eql(2)
139
+ end
140
+
141
+ it "should record observations" do
142
+ @v.observe(true)
143
+ @v.total.should eql(1)
144
+ @v.observed(true).should eql(1)
145
+ @v.observed(false).should eql(0)
146
+ @v.observe(false)
147
+ @v.total.should eql(2)
148
+ @v.observed(true).should eql(1)
149
+ @v.observed(false).should eql(1)
150
+ end
151
+
152
+ end
153
+
154
+ describe Node do
155
+
156
+ before do
157
+ @season = Variable.new(:season, :spring, :summer, :fall, :winter)
158
+ @x1 = Node.new(@season)
159
+ @x2 = Node.new(:rain, @x1)
160
+ @x3 = Node.new(:sprinkler, @x1)
161
+ @x4 = Node.new(:wet, @x3, @x2)
162
+ @x5 = Node.new(:slippery, @x4)
163
+ end
164
+
165
+ it "should infer a variable for the node" do
166
+ v = Variable.new(:v)
167
+ n = Node.new(v)
168
+ n.variable.should eql(v)
169
+
170
+ n = Node.new(:v)
171
+ v = n.variable
172
+ v.should be_a(Variable)
173
+ v.name.should eql(:v)
174
+ end
175
+
176
+ it "should raise an error when it cannot infer a variable for the node" do
177
+ lambda{Node.new(1)}.should raise_error(ArgumentError, /A valid variable cannot be implied from/)
178
+ end
179
+
180
+ it "should be able to create a node with parents" do
181
+ @x1.parents.should be_empty
182
+ @x2.parents.should eql([@x1])
183
+ @x3.parents.should eql([@x1])
184
+ @x4.parents.should eql([@x3, @x2])
185
+ @x5.parents.should eql([@x4])
186
+ end
187
+
188
+ # it "should be able to infer a node" do
189
+ # n = Node.infer(:v1, :v2)
190
+ # n.name.should eql(:v1)
191
+ # n.variable.name.should eql(:v1)
192
+ # n.variable.should be_a(Variable)
193
+ # n.parents.size.should eql(1)
194
+ # p = n.parents.first
195
+ # p.name.should eql(:v2)
196
+ # p.should be_a(Variable)
197
+ # end
198
+ end
@@ -0,0 +1,119 @@
1
+ class Array
2
+ def expand(array)
3
+ return array.map{|e| [e]} if self.empty?
4
+ array.inject([]) do |list, other_e|
5
+ self.each do |e|
6
+ list << [e,other_e].flatten
7
+ end
8
+ list
9
+ end
10
+ end
11
+ end
12
+
13
+ class Variable
14
+
15
+ attr_reader :values, :name, :observations, :total
16
+
17
+ def initialize(name, *values)
18
+ values = [true, false] if values.empty?
19
+ @name = name
20
+ @values = values
21
+ @observations = Array.new(@values.size, 0)
22
+ @total = 0
23
+ end
24
+
25
+ # You can observe anything but nothing: we record any observation but nil.
26
+ # If nil is set, we use the first value as the default.
27
+ def observe(value=nil)
28
+ value = self.values.first if value.nil?
29
+ unless self.values.include?(value)
30
+ self.values << value
31
+ self.observations << 0
32
+ end
33
+ index = self.values.index(value)
34
+ self.observations[index] += 1
35
+ @total += 1
36
+ end
37
+
38
+ # Lookup observations
39
+ def observed(value)
40
+ index = self.values.index(value)
41
+ return 0 unless index
42
+ self.observations[index]
43
+ end
44
+
45
+ def inspect
46
+ "Variable: #{self.name} #{self.values.inspect}"
47
+ end
48
+
49
+ class << self
50
+ def infer(obj, *values)
51
+ return obj if obj.is_a?(Variable)
52
+ case obj
53
+ when Symbol
54
+ Variable.new(obj, *values)
55
+ when String
56
+ Variable.new(obj.to_sym, *values)
57
+ else
58
+ nil
59
+ end
60
+ end
61
+
62
+ end
63
+ end
64
+
65
+ class Table
66
+
67
+ attr_reader :columns, :variables, :legend
68
+ def initialize(*variables)
69
+ @columns = variables.map {|v| v.name}
70
+ @variables = variables
71
+ @legend = @variables.inject([]) do |list, e|
72
+ list = list.expand(e)
73
+ end
74
+ end
75
+
76
+ protected
77
+ def lookup()
78
+ end
79
+ end
80
+
81
+ class Node
82
+ def initialize(name, *parents)
83
+ end
84
+
85
+ # Take an array, array of arrays, dictionary, hash, or OpenStruct.
86
+ # Anything but an array can add a new parent to observe.
87
+ def observe(values)
88
+ end
89
+ end
90
+
91
+ require 'rubygems'
92
+ require 'spec'
93
+
94
+ =begin
95
+ variables = [[:spring, :summer, :fall, :winter], [:true, :false], [:true, :false], [:true, :false]]
96
+
97
+ @all = []
98
+ (0...variables.size).each do |i|
99
+ @all << variables.inject([]) do |list, v|
100
+ list << (0...v.size).map do |j|
101
+ variables[i][j]
102
+ end
103
+ end
104
+ end
105
+
106
+ @all
107
+
108
+
109
+ v1 = [:spring, :summer, :fall, :winter]
110
+ v2 = [:true, :false]
111
+ a = [v1, v2]
112
+ b = []
113
+ a.each do |e|
114
+ b << e
115
+ end
116
+
117
+
118
+
119
+ =end
@@ -0,0 +1,74 @@
1
+ require 'rubygems'
2
+ require 'gsl'
3
+
4
+ include GSL
5
+
6
+ class Node
7
+
8
+ attr_reader :name, :labels, :probabilities, :likelihood
9
+
10
+ def initialize(*args)
11
+ @name = args.shift
12
+ raise ArgumentError, "Must provide a node name" unless self.name
13
+ if args.empty?
14
+ extract_from_array([:true, :false])
15
+ elsif args.length == 1 and args.first.is_a?(Hash)
16
+ extract_from_hash(args.first)
17
+ elsif args.length == 1 and args.first.is_a?(Array)
18
+ extract_from_array(args.first)
19
+ else
20
+ extract_from_array(args)
21
+ end
22
+ assert_likelihood
23
+ end
24
+
25
+ def inspect
26
+ "Node: #{self.labels.inspect} #{self.probabilities.inspect}"
27
+ end
28
+
29
+ def belief
30
+ probabilities * likelihood
31
+ end
32
+
33
+ alias :b :belief
34
+ alias :l :likelihood
35
+ alias :p :probabilities
36
+
37
+ protected
38
+
39
+ def assert_likelihood
40
+ @likelihood = Vector.ary_to_gv(Array.new(@probabilities.size, 1))
41
+ end
42
+
43
+ def extract_from_array(array)
44
+ @labels = array
45
+ @probabilities = Vector.ary_to_gv(uniform_distribution(array.size))
46
+ end
47
+
48
+ def uniform_distribution(n)
49
+ Array.new(n, 1/n.to_f)
50
+ end
51
+
52
+ def extract_from_hash(hash)
53
+ @labels, probabilities = [], []
54
+ hash.each do |k, v|
55
+ @labels << k
56
+ probabilities << v
57
+ end
58
+ @probabilities = Vector.ary_to_gv(probabilities)
59
+ normalize_probabilities!
60
+ end
61
+
62
+ # I don't like GSL::Vector#normalize!, it's not accurate, or has a different
63
+ # idea of what a normalized vector looks like.
64
+ def normalize_probabilities!
65
+ sum = 0.0
66
+ @probabilities.each do |value|
67
+ sum += value
68
+ end
69
+ @probabilities.map! do |value|
70
+ value.to_f / sum
71
+ end
72
+ end
73
+
74
+ end
@@ -0,0 +1,136 @@
1
+ =begin
2
+ I want to noodle around a bit with basic probabilities, odds, that sort of thing. Here's the example:
3
+
4
+ Upon being awakened by the sound of a burglar alary, what is your degree of belief that a burglary attempt took place?
5
+
6
+ Supporting information:
7
+
8
+ * There is a 95% chance that an attempted burglary will trigger the alarm system, P(alarm|burglary) = 95%
9
+ * There is a 1% chance that the alarms will be triggered by non-burglary attempts, p(alarm|no burglary) = 1%
10
+ * There is a 1/10_000 chance of a particular home being burglarized, generally, P(burglary) = 10^-4
11
+
12
+ O(burglary|alarm) = L(alarm|burglary)O(burglary)
13
+
14
+ P(burglary|alarm) = O(burglary|alarm) / 1 + O(burglary|alarm)
15
+
16
+ =end
17
+
18
+ require 'mathn'
19
+
20
+ def likelihood(effect, cause)
21
+ effect / cause
22
+ end
23
+ alias :l :likelihood
24
+
25
+ def odds(effect, cause=nil)
26
+ cause ? ( likelihood(cause, effect) * odds(effect) ) : (effect / (1 - effect))
27
+ end
28
+ alias :o :odds
29
+
30
+ # The same as the odds above, but only in the case of a single hypothesis
31
+ def prior_odds(e)
32
+ e / ( 1 - e )
33
+ end
34
+
35
+ def likelihood_ratio(e, h)
36
+ p(e,h) / p(e, 1 - h)
37
+ end
38
+
39
+ def probability(e, h)
40
+ (e * h) / h
41
+ end
42
+
43
+ def product_rule(*e)
44
+ end
45
+ alias :p :probability
46
+
47
+ class Array
48
+
49
+ def givens
50
+ @givens ||= {}
51
+ end
52
+
53
+ def give(sym, array=Array.new(self.size, 1/self.size))
54
+ self.givens[sym] = array
55
+ end
56
+
57
+ def given(sym, index)
58
+ self.givens[sym][index]
59
+ end
60
+ end
61
+
62
+ =begin
63
+ Since Rational is the automatic choice for probabilistic data,
64
+ and since I don't want to override how mathn infers numbers,
65
+ I am adding some baggage to Rational:
66
+
67
+ * It still reduces to the LCD
68
+ * It keeps track of all events,
69
+ so that I can keep a new event proportional to old ones
70
+ * It has an add_event (add) which takes a true or false value
71
+ true values, records that a condition was found
72
+
73
+ This really only works for binary data, but this is a noodle file.
74
+
75
+ =end
76
+ class Rational < Numeric
77
+ class << self
78
+ alias :orig_reduce :reduce
79
+ def reduce(num, den=1)
80
+ val = orig_reduce(num, den)
81
+ val.total_events = den
82
+ val.positive_events = num
83
+ val
84
+ end
85
+ end
86
+
87
+ def total_events(val=nil)
88
+ @total_events ||= 0
89
+ @total_events = val if val
90
+ @total_events
91
+ end
92
+ alias :events :total_events
93
+ alias :total :total_events
94
+
95
+ def total_events=(val)
96
+ total_events(val)
97
+ end
98
+
99
+ def positive_events(val=nil)
100
+ @positive_events ||= 0
101
+ @positive_events = val if val
102
+ @positive_events
103
+ end
104
+ alias :positive :positive_events
105
+
106
+ def positive_events=(val)
107
+ positive_events(val)
108
+ end
109
+
110
+ # Uses the Rational constructor to calculate the lowest common denominator
111
+ def add_event(positive=true)
112
+ num = positive ? self.positive_events + 1 : self.positive_events
113
+ den = self.total_events + 1
114
+ other = Rational(num, den)
115
+ @numerator, @denominator, @positive_events, @total_events = other.numerator, other.denominator, num, den
116
+ self
117
+ end
118
+ alias :add :add_event
119
+ end
120
+
121
+
122
+ class A
123
+ class << self
124
+ include GSL
125
+
126
+ def r
127
+ @r ||= Rng.alloc
128
+ end
129
+
130
+ def pdf
131
+ r = self.r.gaussian
132
+ pdf = Ran.gaussian_pdf(r)
133
+ puts r, pdf
134
+ end
135
+ end
136
+ end