fathom 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +10 -0
- data/Gemfile +10 -2
- data/Gemfile.lock +8 -0
- data/TODO.md +12 -25
- data/VERSION +1 -1
- data/lib/{fathom/ext → ext}/array.rb +0 -0
- data/lib/{fathom/ext → ext}/faster_csv.rb +0 -0
- data/lib/{fathom/ext → ext}/open_struct.rb +0 -0
- data/lib/{fathom/ext → ext}/string.rb +0 -0
- data/lib/fathom.rb +16 -13
- data/lib/fathom/agent.rb +8 -9
- data/lib/fathom/{causal_graph.rb → archive/causal_graph.rb} +0 -0
- data/lib/fathom/{concept.rb → archive/concept.rb} +0 -0
- data/lib/fathom/archive/conditional_probability_matrix.rb +3 -0
- data/lib/fathom/{inverter.rb → archive/inverter.rb} +0 -0
- data/lib/fathom/archive/node.rb +24 -1
- data/lib/fathom/distributions/discrete_uniform.rb +11 -32
- data/lib/fathom/import.rb +37 -34
- data/lib/fathom/import/yaml_import.rb +22 -1
- data/lib/fathom/knowledge_base.rb +34 -23
- data/lib/fathom/knowledge_base/search.rb +19 -0
- data/lib/fathom/node.rb +32 -1
- data/lib/fathom/node/belief_node.rb +121 -0
- data/lib/fathom/node/cpm_node.rb +100 -0
- data/lib/fathom/node/data_collection.rb +97 -0
- data/lib/fathom/{data_node.rb → node/data_node.rb} +1 -1
- data/lib/fathom/{value_aggregator.rb → node/decision.rb} +5 -5
- data/lib/fathom/node/discrete_node.rb +41 -0
- data/lib/fathom/node/fact.rb +24 -0
- data/lib/fathom/{mc_node.rb → node/mc_node.rb} +1 -1
- data/lib/fathom/{enforced_name.rb → node/node_extensions/enforced_name.rb} +1 -1
- data/lib/fathom/{numeric_methods.rb → node/node_extensions/numeric_methods.rb} +19 -1
- data/lib/fathom/{plausible_range.rb → node/plausible_range.rb} +1 -1
- data/spec/ext/array_spec.rb +10 -0
- data/spec/ext/faster_csv_spec.rb +10 -0
- data/spec/ext/open_struct_spec.rb +20 -0
- data/spec/ext/string_spec.rb +7 -0
- data/spec/fathom/import/csv_import_spec.rb +11 -9
- data/spec/fathom/import/yaml_import_spec.rb +27 -7
- data/spec/fathom/knowledge_base_spec.rb +8 -4
- data/spec/fathom/node/belief_node_spec.rb +180 -0
- data/spec/fathom/node/cpm_node_spec.rb +144 -0
- data/spec/fathom/node/data_collection_spec.rb +26 -0
- data/spec/fathom/{data_node_spec.rb → node/data_node_spec.rb} +1 -1
- data/spec/fathom/node/decision_spec.rb +15 -0
- data/spec/fathom/node/discrete_node_spec.rb +56 -0
- data/spec/fathom/node/fact_spec.rb +33 -0
- data/spec/fathom/{mc_node_spec.rb → node/mc_node_spec.rb} +1 -1
- data/spec/fathom/{enforced_name_spec.rb → node/node_extensions/enforced_name_spec.rb} +1 -1
- data/spec/fathom/{numeric_methods_spec.rb → node/node_extensions/numeric_methods_spec.rb} +53 -11
- data/spec/fathom/{plausible_range_spec.rb → node/plausible_range_spec.rb} +1 -1
- data/spec/fathom/node_spec.rb +17 -0
- data/spec/fathom_spec.rb +40 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/support/fact.yml +11 -0
- metadata +57 -30
- data/lib/fathom/value_multiplier.rb +0 -18
@@ -1,31 +1,42 @@
|
|
1
|
-
require File.expand_path(
|
2
|
-
|
1
|
+
require File.expand_path("../../fathom", __FILE__)
|
2
|
+
|
3
|
+
require 'rdf'
|
4
|
+
require 'knowledge_base/search'
|
5
|
+
|
6
|
+
module Fathom
|
7
|
+
class KnowledgeBase
|
3
8
|
|
4
|
-
|
9
|
+
# =====================
|
10
|
+
# = Module Extensions =
|
11
|
+
# =====================
|
12
|
+
include Search
|
5
13
|
|
6
|
-
|
7
|
-
opts = OptionsHash.new(opts)
|
8
|
-
@data_store = OpenStruct.new
|
9
|
-
end
|
10
|
-
|
11
|
-
def []=(key, value)
|
12
|
-
@data_store.table[key] = value
|
13
|
-
end
|
14
|
+
# attr_reader :data_store
|
14
15
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
# This is temporary, but useful for now. After we have the persisted KnowledgeBase,
|
20
|
-
# we'll create explicit accessor methods or a find syntax.
|
21
|
-
def method_missing(sym, *args, &block)
|
22
|
-
if @data_store.table.keys.include?(sym)
|
23
|
-
@data_store.send(sym)
|
24
|
-
else
|
25
|
-
super
|
16
|
+
def initialize(opts={})
|
17
|
+
# opts = OptionsHash.new(opts)
|
18
|
+
# @data_store = OpenStruct.new
|
26
19
|
end
|
27
|
-
end
|
28
20
|
|
21
|
+
# def []=(key, value)
|
22
|
+
# @data_store.table[key] = value
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# def [](key)
|
26
|
+
# @data_store.table[key]
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# # This is temporary, but useful for now. After we have the persisted KnowledgeBase,
|
30
|
+
# # we'll create explicit accessor methods or a find syntax.
|
31
|
+
# def method_missing(sym, *args, &block)
|
32
|
+
# if @data_store.table.keys.include?(sym)
|
33
|
+
# @data_store.send(sym)
|
34
|
+
# else
|
35
|
+
# super
|
36
|
+
# end
|
37
|
+
# end
|
38
|
+
|
39
|
+
end
|
29
40
|
end
|
30
41
|
|
31
42
|
if __FILE__ == $0
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'sparql/algebra'
|
2
|
+
|
3
|
+
module Fathom
|
4
|
+
module Search
|
5
|
+
|
6
|
+
def self.included(base)
|
7
|
+
base.send(:extend, ClassMethods)
|
8
|
+
base.send(:include, InstanceMethods)
|
9
|
+
end
|
10
|
+
|
11
|
+
module ClassMethods
|
12
|
+
def find(opts={})
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
module InstanceMethods
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/fathom/node.rb
CHANGED
@@ -1,9 +1,18 @@
|
|
1
1
|
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
|
2
|
+
# TODO: Move this into a proper configuration module
|
3
|
+
# require 'spira'
|
4
|
+
# @repository = RDF::Repository.new
|
5
|
+
# Spira.add_repository(:default, @repository)
|
6
|
+
|
2
7
|
class Fathom::Node
|
3
8
|
|
9
|
+
# See notes in the spec about this.
|
10
|
+
# include Spira::Resource
|
11
|
+
|
4
12
|
attr_reader :name, :distribution, :description, :values
|
5
13
|
|
6
14
|
def initialize(opts={})
|
15
|
+
symbolize_keys!(opts)
|
7
16
|
@name = opts[:name]
|
8
17
|
assert_distribution(opts)
|
9
18
|
@description = opts[:description]
|
@@ -56,8 +65,31 @@ class Fathom::Node
|
|
56
65
|
true
|
57
66
|
end
|
58
67
|
|
68
|
+
def simple_inspect
|
69
|
+
self.name ? "#{self.name} (#{self.class.to_s})" : self.class.to_s
|
70
|
+
end
|
71
|
+
|
72
|
+
def inspect
|
73
|
+
"#{self.class.to_s}: " + [
|
74
|
+
self.name,
|
75
|
+
self.description,
|
76
|
+
"children:",
|
77
|
+
self.children.map {|e| e.simple_inspect }.inspect,
|
78
|
+
"parents: ",
|
79
|
+
self.parents.map {|e| e.simple_inspect }.inspect,
|
80
|
+
].compact.join(", ")
|
81
|
+
end
|
82
|
+
|
59
83
|
protected
|
60
84
|
|
85
|
+
# Quick and dirty extract from ActiveSupport's same method
|
86
|
+
def symbolize_keys!(h)
|
87
|
+
h.keys.each do |key|
|
88
|
+
h[(key.to_sym rescue key) || key] = h.delete(key)
|
89
|
+
end
|
90
|
+
h
|
91
|
+
end
|
92
|
+
|
61
93
|
def add_accessor_for_node(node)
|
62
94
|
return false unless node.is_a?(Node) and node.name_sym
|
63
95
|
return false if self.respond_to?(node.name_sym)
|
@@ -77,7 +109,6 @@ class Fathom::Node
|
|
77
109
|
add_parent(parent)
|
78
110
|
end
|
79
111
|
|
80
|
-
|
81
112
|
found = opts[:children]
|
82
113
|
found ||= opts[:child]
|
83
114
|
found ||= []
|
@@ -0,0 +1,121 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
|
2
|
+
class Fathom::BeliefNode < DiscreteNode
|
3
|
+
|
4
|
+
attr_reader :probabilities, :likelihoods, :precision_threshold
|
5
|
+
|
6
|
+
def initialize(opts={})
|
7
|
+
super(opts)
|
8
|
+
assert_probabilities(opts)
|
9
|
+
assert_liklihoods(opts)
|
10
|
+
@precision_threshold = opts.fetch(:precision_threshold, 0.00001)
|
11
|
+
end
|
12
|
+
|
13
|
+
def add_child(child)
|
14
|
+
if child.is_a?(BeliefNode)
|
15
|
+
cpm = CPMNode.new(:parent => self, :child => child)
|
16
|
+
# self.children << cpm
|
17
|
+
self.add_accessor_for_cpm(cpm, child)
|
18
|
+
self.add_accessor_for_node(child)
|
19
|
+
# cpm.register_parent(self)
|
20
|
+
self.children << child
|
21
|
+
child.register_parent(self)
|
22
|
+
child.add_accessor_for_cpm(cpm, self)
|
23
|
+
else
|
24
|
+
super(child)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def add_parent(parent)
|
29
|
+
if parent.is_a?(BeliefNode)
|
30
|
+
cpm = CPMNode.new(:parent => parent, :child => self)
|
31
|
+
# self.parents << cpm
|
32
|
+
self.add_accessor_for_cpm(cpm, parent)
|
33
|
+
self.add_accessor_for_node(parent)
|
34
|
+
# cpm.register_child(self)
|
35
|
+
self.parents << parent
|
36
|
+
parent.register_child(self)
|
37
|
+
parent.add_accessor_for_cpm(cpm, self)
|
38
|
+
else
|
39
|
+
super(parent)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def inspect
|
44
|
+
"#{self.class.to_s}: " + [
|
45
|
+
self.name,
|
46
|
+
self.description,
|
47
|
+
"children:",
|
48
|
+
self.children.map {|e| e.is_a?(CPMNode) ? e.child.simple_inspect : e.simple_inspect }.inspect,
|
49
|
+
"parents: ",
|
50
|
+
self.parents.map {|e| e.is_a?(CPMNode) ? e.parent.simple_inspect : e.simple_inspect }.inspect,
|
51
|
+
].compact.join(", ")
|
52
|
+
end
|
53
|
+
|
54
|
+
def add_accessor_for_cpm(cpm, node)
|
55
|
+
return false unless cpm.is_a?(CPMNode) and cpm.name_sym
|
56
|
+
method_name = ("cpm_for_" + node.name_sym.to_s).to_sym
|
57
|
+
return false if self.respond_to?(method_name)
|
58
|
+
(class << self; self; end).module_eval do
|
59
|
+
define_method method_name do
|
60
|
+
cpm
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def likelihood(label)
|
66
|
+
OpenStruct.new
|
67
|
+
end
|
68
|
+
|
69
|
+
protected
|
70
|
+
|
71
|
+
|
72
|
+
def assert_probabilities(opts)
|
73
|
+
return assert_probabilities_and_labels_from_values_hash(opts[:values]) if
|
74
|
+
opts[:values] and opts[:values].is_a?(Hash)
|
75
|
+
|
76
|
+
unnormalized_obj = opts.fetch(:probabilities, Array.new(self.size, 1.0))
|
77
|
+
unnormalized_vector = case unnormalized_obj
|
78
|
+
when Array
|
79
|
+
GSL::Vector.ary_to_gv(unnormalized_obj)
|
80
|
+
when GSL::Vector
|
81
|
+
unnormalized_obj
|
82
|
+
else
|
83
|
+
GSL::Vector[unnormalized_obj]
|
84
|
+
end
|
85
|
+
|
86
|
+
raise ArgumentError, "Probabilities must be #{self.size} items long" unless
|
87
|
+
unnormalized_vector.size == self.size
|
88
|
+
|
89
|
+
sum = unnormalized_vector.sum
|
90
|
+
@probabilities = unnormalized_vector.map {|e| e / sum }
|
91
|
+
end
|
92
|
+
|
93
|
+
def assert_probabilities_and_labels_from_values_hash(values)
|
94
|
+
@labels, probabilities = values.inject([[], []]) do |list, e|
|
95
|
+
list.first << e.first
|
96
|
+
list.last << e.last
|
97
|
+
list
|
98
|
+
end
|
99
|
+
@probabilities = GSL::Vector.ary_to_gv(probabilities)
|
100
|
+
end
|
101
|
+
|
102
|
+
def assert_liklihoods(opts)
|
103
|
+
likelihoods = opts.fetch(:likelihoods, Array.new(self.size, 1.0))
|
104
|
+
@likelihoods = case likelihoods
|
105
|
+
when Array
|
106
|
+
GSL::Vector.ary_to_gv(likelihoods)
|
107
|
+
when GSL::Vector
|
108
|
+
likelihoods
|
109
|
+
else
|
110
|
+
GSL::Vector[likelihoods]
|
111
|
+
end
|
112
|
+
raise ArgumentError, "Likelihoods must be #{self.size} items long" unless
|
113
|
+
likelihoods.size == self.size
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
if __FILE__ == $0
|
118
|
+
include Fathom
|
119
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
120
|
+
# BeliefNode.new
|
121
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
|
2
|
+
|
3
|
+
# Conditional Probability Matrix to join two nodes
|
4
|
+
class Fathom::CPMNode < Node
|
5
|
+
|
6
|
+
def initialize(opts={})
|
7
|
+
ensure_belief_nodes(opts)
|
8
|
+
super(opts)
|
9
|
+
assert_name
|
10
|
+
assert_description
|
11
|
+
assert_cpm
|
12
|
+
end
|
13
|
+
|
14
|
+
def parent
|
15
|
+
parents.first
|
16
|
+
end
|
17
|
+
|
18
|
+
def child
|
19
|
+
children.first
|
20
|
+
end
|
21
|
+
|
22
|
+
alias :matrix :values
|
23
|
+
|
24
|
+
# Filter values from the matrix. Allows us to grab multiple rows and columns if desired.
|
25
|
+
# If the rows or columns aren't filtered, all values are assumed to be desired.
|
26
|
+
#
|
27
|
+
# @cpm.probability :child_node_name => [:desired, :values], :parent_node_name => :value
|
28
|
+
# This filters both the child columns and the parent rows
|
29
|
+
#
|
30
|
+
# @cpm.probability :child_node_name => [:desired, :values]
|
31
|
+
# This only filters the child columns
|
32
|
+
def probability(opts={})
|
33
|
+
# Are we using long descriptions for the return value?
|
34
|
+
# If so, we'll use a hash to describe it without having to parse the value out of a string later.
|
35
|
+
describe = opts.delete(:describe) || false
|
36
|
+
|
37
|
+
# Is something unknown being asked for?
|
38
|
+
allowed = [parent.name_sym, child.name_sym]
|
39
|
+
unknown_keys = opts.reject {|k, v| allowed.include?(k)}
|
40
|
+
raise ArgumentError, "Unknown node: #{unknown_keys.inspect}" unless unknown_keys.empty?
|
41
|
+
|
42
|
+
# Values for the desired child and parent values
|
43
|
+
child_values = Array(opts[self.child.name_sym] || self.child.labels)
|
44
|
+
parent_values = Array(opts[self.parent.name_sym] || self.parent.labels)
|
45
|
+
|
46
|
+
# Indices in the matrix for the desired values
|
47
|
+
child_indices = child_values.map {|c| self.child.labels.index(c)}
|
48
|
+
parent_indices = parent_values.map {|c| self.parent.labels.index(c)}
|
49
|
+
|
50
|
+
# Collect the filtered values from the matrix
|
51
|
+
value = parent_indices.inject(0.0) do |sum, row|
|
52
|
+
sum += child_indices.inject(0.0) do |s, col|
|
53
|
+
s += matrix.get(row, col)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
return value unless describe
|
58
|
+
|
59
|
+
label = "P(" +
|
60
|
+
child_values.map(&:to_s).join(" or ") +
|
61
|
+
" | " +
|
62
|
+
parent_values.map(&:to_s).join(" or ") +
|
63
|
+
")"
|
64
|
+
{ label => value }
|
65
|
+
|
66
|
+
end
|
67
|
+
alias :p :probability
|
68
|
+
|
69
|
+
def odds(opts={})
|
70
|
+
p = probability(opts)
|
71
|
+
return p / (1 - p)
|
72
|
+
end
|
73
|
+
alias :o :odds
|
74
|
+
|
75
|
+
# Returns a vector of likelihoods for each parent value, given the child value
|
76
|
+
def likelihood(value)
|
77
|
+
GSL::Vector.alloc(
|
78
|
+
*parent.labels.map {|parent_label| probability(parent.name_sym => parent_label, child.name_sym => value)}
|
79
|
+
)
|
80
|
+
end
|
81
|
+
alias :l :likelihood
|
82
|
+
|
83
|
+
protected
|
84
|
+
def assert_name
|
85
|
+
@name ||= :cpm
|
86
|
+
end
|
87
|
+
|
88
|
+
def assert_description
|
89
|
+
@description ||= "Conditional Probability Matrix from #{parent.name.to_s} to #{child.name.to_s}."
|
90
|
+
end
|
91
|
+
|
92
|
+
def ensure_belief_nodes(opts)
|
93
|
+
raise ArgumentError, "The child must be a BeliefNode" unless opts[:child].is_a?(BeliefNode)
|
94
|
+
raise ArgumentError, "The parent must be a BeliefNode" unless opts[:parent].is_a?(BeliefNode)
|
95
|
+
end
|
96
|
+
|
97
|
+
def assert_cpm
|
98
|
+
@values = self.parent.probabilities.col * self.child.probabilities
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
|
2
|
+
|
3
|
+
=begin
|
4
|
+
This class uses SQLite for in-memory set operations. It is based on a discrete variable
|
5
|
+
which can be translated into fields in a table. For now, I am just using float data
|
6
|
+
types for the fields. This will evolve as needs drive it into a more robust data set.
|
7
|
+
|
8
|
+
It turns out that SQLite set operations are quite fast and speed things up pretty well.
|
9
|
+
So, to use this class, you'll need to have sqlite3-ruby SQLite3 bindings installed.
|
10
|
+
|
11
|
+
This also uses uuid to enforce a node name, an additional dependency.
|
12
|
+
=end
|
13
|
+
|
14
|
+
require 'uuid'
|
15
|
+
|
16
|
+
class Fathom::DataCollection < DiscreteNode
|
17
|
+
|
18
|
+
def initialize(opts={})
|
19
|
+
opts = extract_labels(opts)
|
20
|
+
opts[:name] ||= UUID.generate
|
21
|
+
super(opts)
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
protected
|
26
|
+
|
27
|
+
# Looking for labels.
|
28
|
+
# Using :labels, then :parents, then :parent, looking for the first node with labels defined
|
29
|
+
def extract_labels(opts)
|
30
|
+
return opts if opts[:labels]
|
31
|
+
parents = opts[:parents]
|
32
|
+
parents ||= opts[:parent]
|
33
|
+
parents = Array[parents] if parents and not parents.is_a?(Array)
|
34
|
+
parents.each do |parent|
|
35
|
+
if parent.respond_to?(:labels)
|
36
|
+
opts[:labels] = parent.labels
|
37
|
+
return opts
|
38
|
+
end
|
39
|
+
end
|
40
|
+
opts
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
if __FILE__ == $0
|
46
|
+
include Fathom
|
47
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
48
|
+
# DataCollection.new
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
# J2: Some bare-minimum sqlite3 stuff
|
53
|
+
# See: http://sqlite-ruby.rubyforge.org/
|
54
|
+
# require 'rubygems'
|
55
|
+
# require 'sqlite3'
|
56
|
+
#
|
57
|
+
# def prepare_database(table_name)
|
58
|
+
# @db = SQLite3::Database.new(":memory:")
|
59
|
+
# # @db = SQLite3::Database.new("/tmp/j2.db")
|
60
|
+
#
|
61
|
+
# create_sql = <<-SQL
|
62
|
+
#
|
63
|
+
# CREATE TABLE "#{table_name}" (
|
64
|
+
# "id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
65
|
+
# "field1" FLOAT);
|
66
|
+
# SQL
|
67
|
+
# @db.execute_batch(create_sql)
|
68
|
+
#
|
69
|
+
# insert_sql = <<-SQL
|
70
|
+
# INSERT INTO #{table_name}
|
71
|
+
# ("field1")
|
72
|
+
# VALUES
|
73
|
+
# (:field1);
|
74
|
+
# SQL
|
75
|
+
# @insert_record = @db.prepare(insert_sql)
|
76
|
+
#
|
77
|
+
# # select_sql = <<-SQL
|
78
|
+
# # "select * from 'asdf';"
|
79
|
+
# # SQL
|
80
|
+
# # @select_record = @db.prepare(select_sql)
|
81
|
+
# @select_record = @db.prepare( "select * from asdf" )
|
82
|
+
# end
|
83
|
+
#
|
84
|
+
# def insert_record(opts)
|
85
|
+
# @insert_record.bind_params(:field1 => opts[:field1])
|
86
|
+
# @insert_record.execute
|
87
|
+
# end
|
88
|
+
#
|
89
|
+
# def select_record(opts={})
|
90
|
+
# # @db.execute( "select * from 'asdf'" )
|
91
|
+
# @select_record.execute.entries
|
92
|
+
# end
|
93
|
+
#
|
94
|
+
# prepare_database('asdf')
|
95
|
+
# insert_record :field1 => 123.1
|
96
|
+
# insert_record :field1 => 122.2
|
97
|
+
# @a = select_record
|