fathom 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +10 -0
- data/Gemfile +10 -2
- data/Gemfile.lock +8 -0
- data/TODO.md +12 -25
- data/VERSION +1 -1
- data/lib/{fathom/ext → ext}/array.rb +0 -0
- data/lib/{fathom/ext → ext}/faster_csv.rb +0 -0
- data/lib/{fathom/ext → ext}/open_struct.rb +0 -0
- data/lib/{fathom/ext → ext}/string.rb +0 -0
- data/lib/fathom.rb +16 -13
- data/lib/fathom/agent.rb +8 -9
- data/lib/fathom/{causal_graph.rb → archive/causal_graph.rb} +0 -0
- data/lib/fathom/{concept.rb → archive/concept.rb} +0 -0
- data/lib/fathom/archive/conditional_probability_matrix.rb +3 -0
- data/lib/fathom/{inverter.rb → archive/inverter.rb} +0 -0
- data/lib/fathom/archive/node.rb +24 -1
- data/lib/fathom/distributions/discrete_uniform.rb +11 -32
- data/lib/fathom/import.rb +37 -34
- data/lib/fathom/import/yaml_import.rb +22 -1
- data/lib/fathom/knowledge_base.rb +34 -23
- data/lib/fathom/knowledge_base/search.rb +19 -0
- data/lib/fathom/node.rb +32 -1
- data/lib/fathom/node/belief_node.rb +121 -0
- data/lib/fathom/node/cpm_node.rb +100 -0
- data/lib/fathom/node/data_collection.rb +97 -0
- data/lib/fathom/{data_node.rb → node/data_node.rb} +1 -1
- data/lib/fathom/{value_aggregator.rb → node/decision.rb} +5 -5
- data/lib/fathom/node/discrete_node.rb +41 -0
- data/lib/fathom/node/fact.rb +24 -0
- data/lib/fathom/{mc_node.rb → node/mc_node.rb} +1 -1
- data/lib/fathom/{enforced_name.rb → node/node_extensions/enforced_name.rb} +1 -1
- data/lib/fathom/{numeric_methods.rb → node/node_extensions/numeric_methods.rb} +19 -1
- data/lib/fathom/{plausible_range.rb → node/plausible_range.rb} +1 -1
- data/spec/ext/array_spec.rb +10 -0
- data/spec/ext/faster_csv_spec.rb +10 -0
- data/spec/ext/open_struct_spec.rb +20 -0
- data/spec/ext/string_spec.rb +7 -0
- data/spec/fathom/import/csv_import_spec.rb +11 -9
- data/spec/fathom/import/yaml_import_spec.rb +27 -7
- data/spec/fathom/knowledge_base_spec.rb +8 -4
- data/spec/fathom/node/belief_node_spec.rb +180 -0
- data/spec/fathom/node/cpm_node_spec.rb +144 -0
- data/spec/fathom/node/data_collection_spec.rb +26 -0
- data/spec/fathom/{data_node_spec.rb → node/data_node_spec.rb} +1 -1
- data/spec/fathom/node/decision_spec.rb +15 -0
- data/spec/fathom/node/discrete_node_spec.rb +56 -0
- data/spec/fathom/node/fact_spec.rb +33 -0
- data/spec/fathom/{mc_node_spec.rb → node/mc_node_spec.rb} +1 -1
- data/spec/fathom/{enforced_name_spec.rb → node/node_extensions/enforced_name_spec.rb} +1 -1
- data/spec/fathom/{numeric_methods_spec.rb → node/node_extensions/numeric_methods_spec.rb} +53 -11
- data/spec/fathom/{plausible_range_spec.rb → node/plausible_range_spec.rb} +1 -1
- data/spec/fathom/node_spec.rb +17 -0
- data/spec/fathom_spec.rb +40 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/support/fact.yml +11 -0
- metadata +57 -30
- data/lib/fathom/value_multiplier.rb +0 -18
@@ -1,31 +1,42 @@
|
|
1
|
-
require File.expand_path(
|
2
|
-
|
1
|
+
require File.expand_path("../../fathom", __FILE__)
|
2
|
+
|
3
|
+
require 'rdf'
|
4
|
+
require 'knowledge_base/search'
|
5
|
+
|
6
|
+
module Fathom
|
7
|
+
class KnowledgeBase
|
3
8
|
|
4
|
-
|
9
|
+
# =====================
|
10
|
+
# = Module Extensions =
|
11
|
+
# =====================
|
12
|
+
include Search
|
5
13
|
|
6
|
-
|
7
|
-
opts = OptionsHash.new(opts)
|
8
|
-
@data_store = OpenStruct.new
|
9
|
-
end
|
10
|
-
|
11
|
-
def []=(key, value)
|
12
|
-
@data_store.table[key] = value
|
13
|
-
end
|
14
|
+
# attr_reader :data_store
|
14
15
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
# This is temporary, but useful for now. After we have the persisted KnowledgeBase,
|
20
|
-
# we'll create explicit accessor methods or a find syntax.
|
21
|
-
def method_missing(sym, *args, &block)
|
22
|
-
if @data_store.table.keys.include?(sym)
|
23
|
-
@data_store.send(sym)
|
24
|
-
else
|
25
|
-
super
|
16
|
+
def initialize(opts={})
|
17
|
+
# opts = OptionsHash.new(opts)
|
18
|
+
# @data_store = OpenStruct.new
|
26
19
|
end
|
27
|
-
end
|
28
20
|
|
21
|
+
# def []=(key, value)
|
22
|
+
# @data_store.table[key] = value
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# def [](key)
|
26
|
+
# @data_store.table[key]
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# # This is temporary, but useful for now. After we have the persisted KnowledgeBase,
|
30
|
+
# # we'll create explicit accessor methods or a find syntax.
|
31
|
+
# def method_missing(sym, *args, &block)
|
32
|
+
# if @data_store.table.keys.include?(sym)
|
33
|
+
# @data_store.send(sym)
|
34
|
+
# else
|
35
|
+
# super
|
36
|
+
# end
|
37
|
+
# end
|
38
|
+
|
39
|
+
end
|
29
40
|
end
|
30
41
|
|
31
42
|
if __FILE__ == $0
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'sparql/algebra'
|
2
|
+
|
3
|
+
module Fathom
|
4
|
+
module Search
|
5
|
+
|
6
|
+
def self.included(base)
|
7
|
+
base.send(:extend, ClassMethods)
|
8
|
+
base.send(:include, InstanceMethods)
|
9
|
+
end
|
10
|
+
|
11
|
+
module ClassMethods
|
12
|
+
def find(opts={})
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
module InstanceMethods
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/fathom/node.rb
CHANGED
@@ -1,9 +1,18 @@
|
|
1
1
|
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
|
2
|
+
# TODO: Move this into a proper configuration module
|
3
|
+
# require 'spira'
|
4
|
+
# @repository = RDF::Repository.new
|
5
|
+
# Spira.add_repository(:default, @repository)
|
6
|
+
|
2
7
|
class Fathom::Node
|
3
8
|
|
9
|
+
# See notes in the spec about this.
|
10
|
+
# include Spira::Resource
|
11
|
+
|
4
12
|
attr_reader :name, :distribution, :description, :values
|
5
13
|
|
6
14
|
def initialize(opts={})
|
15
|
+
symbolize_keys!(opts)
|
7
16
|
@name = opts[:name]
|
8
17
|
assert_distribution(opts)
|
9
18
|
@description = opts[:description]
|
@@ -56,8 +65,31 @@ class Fathom::Node
|
|
56
65
|
true
|
57
66
|
end
|
58
67
|
|
68
|
+
def simple_inspect
|
69
|
+
self.name ? "#{self.name} (#{self.class.to_s})" : self.class.to_s
|
70
|
+
end
|
71
|
+
|
72
|
+
def inspect
|
73
|
+
"#{self.class.to_s}: " + [
|
74
|
+
self.name,
|
75
|
+
self.description,
|
76
|
+
"children:",
|
77
|
+
self.children.map {|e| e.simple_inspect }.inspect,
|
78
|
+
"parents: ",
|
79
|
+
self.parents.map {|e| e.simple_inspect }.inspect,
|
80
|
+
].compact.join(", ")
|
81
|
+
end
|
82
|
+
|
59
83
|
protected
|
60
84
|
|
85
|
+
# Quick and dirty extract from ActiveSupport's same method
|
86
|
+
def symbolize_keys!(h)
|
87
|
+
h.keys.each do |key|
|
88
|
+
h[(key.to_sym rescue key) || key] = h.delete(key)
|
89
|
+
end
|
90
|
+
h
|
91
|
+
end
|
92
|
+
|
61
93
|
def add_accessor_for_node(node)
|
62
94
|
return false unless node.is_a?(Node) and node.name_sym
|
63
95
|
return false if self.respond_to?(node.name_sym)
|
@@ -77,7 +109,6 @@ class Fathom::Node
|
|
77
109
|
add_parent(parent)
|
78
110
|
end
|
79
111
|
|
80
|
-
|
81
112
|
found = opts[:children]
|
82
113
|
found ||= opts[:child]
|
83
114
|
found ||= []
|
@@ -0,0 +1,121 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
|
2
|
+
class Fathom::BeliefNode < DiscreteNode
|
3
|
+
|
4
|
+
attr_reader :probabilities, :likelihoods, :precision_threshold
|
5
|
+
|
6
|
+
def initialize(opts={})
|
7
|
+
super(opts)
|
8
|
+
assert_probabilities(opts)
|
9
|
+
assert_liklihoods(opts)
|
10
|
+
@precision_threshold = opts.fetch(:precision_threshold, 0.00001)
|
11
|
+
end
|
12
|
+
|
13
|
+
def add_child(child)
|
14
|
+
if child.is_a?(BeliefNode)
|
15
|
+
cpm = CPMNode.new(:parent => self, :child => child)
|
16
|
+
# self.children << cpm
|
17
|
+
self.add_accessor_for_cpm(cpm, child)
|
18
|
+
self.add_accessor_for_node(child)
|
19
|
+
# cpm.register_parent(self)
|
20
|
+
self.children << child
|
21
|
+
child.register_parent(self)
|
22
|
+
child.add_accessor_for_cpm(cpm, self)
|
23
|
+
else
|
24
|
+
super(child)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def add_parent(parent)
|
29
|
+
if parent.is_a?(BeliefNode)
|
30
|
+
cpm = CPMNode.new(:parent => parent, :child => self)
|
31
|
+
# self.parents << cpm
|
32
|
+
self.add_accessor_for_cpm(cpm, parent)
|
33
|
+
self.add_accessor_for_node(parent)
|
34
|
+
# cpm.register_child(self)
|
35
|
+
self.parents << parent
|
36
|
+
parent.register_child(self)
|
37
|
+
parent.add_accessor_for_cpm(cpm, self)
|
38
|
+
else
|
39
|
+
super(parent)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def inspect
|
44
|
+
"#{self.class.to_s}: " + [
|
45
|
+
self.name,
|
46
|
+
self.description,
|
47
|
+
"children:",
|
48
|
+
self.children.map {|e| e.is_a?(CPMNode) ? e.child.simple_inspect : e.simple_inspect }.inspect,
|
49
|
+
"parents: ",
|
50
|
+
self.parents.map {|e| e.is_a?(CPMNode) ? e.parent.simple_inspect : e.simple_inspect }.inspect,
|
51
|
+
].compact.join(", ")
|
52
|
+
end
|
53
|
+
|
54
|
+
def add_accessor_for_cpm(cpm, node)
|
55
|
+
return false unless cpm.is_a?(CPMNode) and cpm.name_sym
|
56
|
+
method_name = ("cpm_for_" + node.name_sym.to_s).to_sym
|
57
|
+
return false if self.respond_to?(method_name)
|
58
|
+
(class << self; self; end).module_eval do
|
59
|
+
define_method method_name do
|
60
|
+
cpm
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def likelihood(label)
|
66
|
+
OpenStruct.new
|
67
|
+
end
|
68
|
+
|
69
|
+
protected
|
70
|
+
|
71
|
+
|
72
|
+
def assert_probabilities(opts)
|
73
|
+
return assert_probabilities_and_labels_from_values_hash(opts[:values]) if
|
74
|
+
opts[:values] and opts[:values].is_a?(Hash)
|
75
|
+
|
76
|
+
unnormalized_obj = opts.fetch(:probabilities, Array.new(self.size, 1.0))
|
77
|
+
unnormalized_vector = case unnormalized_obj
|
78
|
+
when Array
|
79
|
+
GSL::Vector.ary_to_gv(unnormalized_obj)
|
80
|
+
when GSL::Vector
|
81
|
+
unnormalized_obj
|
82
|
+
else
|
83
|
+
GSL::Vector[unnormalized_obj]
|
84
|
+
end
|
85
|
+
|
86
|
+
raise ArgumentError, "Probabilities must be #{self.size} items long" unless
|
87
|
+
unnormalized_vector.size == self.size
|
88
|
+
|
89
|
+
sum = unnormalized_vector.sum
|
90
|
+
@probabilities = unnormalized_vector.map {|e| e / sum }
|
91
|
+
end
|
92
|
+
|
93
|
+
def assert_probabilities_and_labels_from_values_hash(values)
|
94
|
+
@labels, probabilities = values.inject([[], []]) do |list, e|
|
95
|
+
list.first << e.first
|
96
|
+
list.last << e.last
|
97
|
+
list
|
98
|
+
end
|
99
|
+
@probabilities = GSL::Vector.ary_to_gv(probabilities)
|
100
|
+
end
|
101
|
+
|
102
|
+
def assert_liklihoods(opts)
|
103
|
+
likelihoods = opts.fetch(:likelihoods, Array.new(self.size, 1.0))
|
104
|
+
@likelihoods = case likelihoods
|
105
|
+
when Array
|
106
|
+
GSL::Vector.ary_to_gv(likelihoods)
|
107
|
+
when GSL::Vector
|
108
|
+
likelihoods
|
109
|
+
else
|
110
|
+
GSL::Vector[likelihoods]
|
111
|
+
end
|
112
|
+
raise ArgumentError, "Likelihoods must be #{self.size} items long" unless
|
113
|
+
likelihoods.size == self.size
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
if __FILE__ == $0
|
118
|
+
include Fathom
|
119
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
120
|
+
# BeliefNode.new
|
121
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
|
2
|
+
|
3
|
+
# Conditional Probability Matrix to join two nodes
|
4
|
+
class Fathom::CPMNode < Node
|
5
|
+
|
6
|
+
def initialize(opts={})
|
7
|
+
ensure_belief_nodes(opts)
|
8
|
+
super(opts)
|
9
|
+
assert_name
|
10
|
+
assert_description
|
11
|
+
assert_cpm
|
12
|
+
end
|
13
|
+
|
14
|
+
def parent
|
15
|
+
parents.first
|
16
|
+
end
|
17
|
+
|
18
|
+
def child
|
19
|
+
children.first
|
20
|
+
end
|
21
|
+
|
22
|
+
alias :matrix :values
|
23
|
+
|
24
|
+
# Filter values from the matrix. Allows us to grab multiple rows and columns if desired.
|
25
|
+
# If the rows or columns aren't filtered, all values are assumed to be desired.
|
26
|
+
#
|
27
|
+
# @cpm.probability :child_node_name => [:desired, :values], :parent_node_name => :value
|
28
|
+
# This filters both the child columns and the parent rows
|
29
|
+
#
|
30
|
+
# @cpm.probability :child_node_name => [:desired, :values]
|
31
|
+
# This only filters the child columns
|
32
|
+
def probability(opts={})
|
33
|
+
# Are we using long descriptions for the return value?
|
34
|
+
# If so, we'll use a hash to describe it without having to parse the value out of a string later.
|
35
|
+
describe = opts.delete(:describe) || false
|
36
|
+
|
37
|
+
# Is something unknown being asked for?
|
38
|
+
allowed = [parent.name_sym, child.name_sym]
|
39
|
+
unknown_keys = opts.reject {|k, v| allowed.include?(k)}
|
40
|
+
raise ArgumentError, "Unknown node: #{unknown_keys.inspect}" unless unknown_keys.empty?
|
41
|
+
|
42
|
+
# Values for the desired child and parent values
|
43
|
+
child_values = Array(opts[self.child.name_sym] || self.child.labels)
|
44
|
+
parent_values = Array(opts[self.parent.name_sym] || self.parent.labels)
|
45
|
+
|
46
|
+
# Indices in the matrix for the desired values
|
47
|
+
child_indices = child_values.map {|c| self.child.labels.index(c)}
|
48
|
+
parent_indices = parent_values.map {|c| self.parent.labels.index(c)}
|
49
|
+
|
50
|
+
# Collect the filtered values from the matrix
|
51
|
+
value = parent_indices.inject(0.0) do |sum, row|
|
52
|
+
sum += child_indices.inject(0.0) do |s, col|
|
53
|
+
s += matrix.get(row, col)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
return value unless describe
|
58
|
+
|
59
|
+
label = "P(" +
|
60
|
+
child_values.map(&:to_s).join(" or ") +
|
61
|
+
" | " +
|
62
|
+
parent_values.map(&:to_s).join(" or ") +
|
63
|
+
")"
|
64
|
+
{ label => value }
|
65
|
+
|
66
|
+
end
|
67
|
+
alias :p :probability
|
68
|
+
|
69
|
+
def odds(opts={})
|
70
|
+
p = probability(opts)
|
71
|
+
return p / (1 - p)
|
72
|
+
end
|
73
|
+
alias :o :odds
|
74
|
+
|
75
|
+
# Returns a vector of likelihoods for each parent value, given the child value
|
76
|
+
def likelihood(value)
|
77
|
+
GSL::Vector.alloc(
|
78
|
+
*parent.labels.map {|parent_label| probability(parent.name_sym => parent_label, child.name_sym => value)}
|
79
|
+
)
|
80
|
+
end
|
81
|
+
alias :l :likelihood
|
82
|
+
|
83
|
+
protected
|
84
|
+
def assert_name
|
85
|
+
@name ||= :cpm
|
86
|
+
end
|
87
|
+
|
88
|
+
def assert_description
|
89
|
+
@description ||= "Conditional Probability Matrix from #{parent.name.to_s} to #{child.name.to_s}."
|
90
|
+
end
|
91
|
+
|
92
|
+
def ensure_belief_nodes(opts)
|
93
|
+
raise ArgumentError, "The child must be a BeliefNode" unless opts[:child].is_a?(BeliefNode)
|
94
|
+
raise ArgumentError, "The parent must be a BeliefNode" unless opts[:parent].is_a?(BeliefNode)
|
95
|
+
end
|
96
|
+
|
97
|
+
def assert_cpm
|
98
|
+
@values = self.parent.probabilities.col * self.child.probabilities
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
|
2
|
+
|
3
|
+
=begin
|
4
|
+
This class uses SQLite for in-memory set operations. It is based on a discrete variable
|
5
|
+
which can be translated into fields in a table. For now, I am just using float data
|
6
|
+
types for the fields. This will evolve as needs drive it into a more robust data set.
|
7
|
+
|
8
|
+
It turns out that SQLite set operations are quite fast and speed things up pretty well.
|
9
|
+
So, to use this class, you'll need to have sqlite3-ruby SQLite3 bindings installed.
|
10
|
+
|
11
|
+
This also uses uuid to enforce a node name, an additional dependency.
|
12
|
+
=end
|
13
|
+
|
14
|
+
require 'uuid'
|
15
|
+
|
16
|
+
class Fathom::DataCollection < DiscreteNode
|
17
|
+
|
18
|
+
def initialize(opts={})
|
19
|
+
opts = extract_labels(opts)
|
20
|
+
opts[:name] ||= UUID.generate
|
21
|
+
super(opts)
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
protected
|
26
|
+
|
27
|
+
# Looking for labels.
|
28
|
+
# Using :labels, then :parents, then :parent, looking for the first node with labels defined
|
29
|
+
def extract_labels(opts)
|
30
|
+
return opts if opts[:labels]
|
31
|
+
parents = opts[:parents]
|
32
|
+
parents ||= opts[:parent]
|
33
|
+
parents = Array[parents] if parents and not parents.is_a?(Array)
|
34
|
+
parents.each do |parent|
|
35
|
+
if parent.respond_to?(:labels)
|
36
|
+
opts[:labels] = parent.labels
|
37
|
+
return opts
|
38
|
+
end
|
39
|
+
end
|
40
|
+
opts
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
if __FILE__ == $0
|
46
|
+
include Fathom
|
47
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
48
|
+
# DataCollection.new
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
# J2: Some bare-minimum sqlite3 stuff
|
53
|
+
# See: http://sqlite-ruby.rubyforge.org/
|
54
|
+
# require 'rubygems'
|
55
|
+
# require 'sqlite3'
|
56
|
+
#
|
57
|
+
# def prepare_database(table_name)
|
58
|
+
# @db = SQLite3::Database.new(":memory:")
|
59
|
+
# # @db = SQLite3::Database.new("/tmp/j2.db")
|
60
|
+
#
|
61
|
+
# create_sql = <<-SQL
|
62
|
+
#
|
63
|
+
# CREATE TABLE "#{table_name}" (
|
64
|
+
# "id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
65
|
+
# "field1" FLOAT);
|
66
|
+
# SQL
|
67
|
+
# @db.execute_batch(create_sql)
|
68
|
+
#
|
69
|
+
# insert_sql = <<-SQL
|
70
|
+
# INSERT INTO #{table_name}
|
71
|
+
# ("field1")
|
72
|
+
# VALUES
|
73
|
+
# (:field1);
|
74
|
+
# SQL
|
75
|
+
# @insert_record = @db.prepare(insert_sql)
|
76
|
+
#
|
77
|
+
# # select_sql = <<-SQL
|
78
|
+
# # "select * from 'asdf';"
|
79
|
+
# # SQL
|
80
|
+
# # @select_record = @db.prepare(select_sql)
|
81
|
+
# @select_record = @db.prepare( "select * from asdf" )
|
82
|
+
# end
|
83
|
+
#
|
84
|
+
# def insert_record(opts)
|
85
|
+
# @insert_record.bind_params(:field1 => opts[:field1])
|
86
|
+
# @insert_record.execute
|
87
|
+
# end
|
88
|
+
#
|
89
|
+
# def select_record(opts={})
|
90
|
+
# # @db.execute( "select * from 'asdf'" )
|
91
|
+
# @select_record.execute.entries
|
92
|
+
# end
|
93
|
+
#
|
94
|
+
# prepare_database('asdf')
|
95
|
+
# insert_record :field1 => 123.1
|
96
|
+
# insert_record :field1 => 122.2
|
97
|
+
# @a = select_record
|