fathom 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/.autotest +10 -0
  2. data/Gemfile +10 -2
  3. data/Gemfile.lock +8 -0
  4. data/TODO.md +12 -25
  5. data/VERSION +1 -1
  6. data/lib/{fathom/ext → ext}/array.rb +0 -0
  7. data/lib/{fathom/ext → ext}/faster_csv.rb +0 -0
  8. data/lib/{fathom/ext → ext}/open_struct.rb +0 -0
  9. data/lib/{fathom/ext → ext}/string.rb +0 -0
  10. data/lib/fathom.rb +16 -13
  11. data/lib/fathom/agent.rb +8 -9
  12. data/lib/fathom/{causal_graph.rb → archive/causal_graph.rb} +0 -0
  13. data/lib/fathom/{concept.rb → archive/concept.rb} +0 -0
  14. data/lib/fathom/archive/conditional_probability_matrix.rb +3 -0
  15. data/lib/fathom/{inverter.rb → archive/inverter.rb} +0 -0
  16. data/lib/fathom/archive/node.rb +24 -1
  17. data/lib/fathom/distributions/discrete_uniform.rb +11 -32
  18. data/lib/fathom/import.rb +37 -34
  19. data/lib/fathom/import/yaml_import.rb +22 -1
  20. data/lib/fathom/knowledge_base.rb +34 -23
  21. data/lib/fathom/knowledge_base/search.rb +19 -0
  22. data/lib/fathom/node.rb +32 -1
  23. data/lib/fathom/node/belief_node.rb +121 -0
  24. data/lib/fathom/node/cpm_node.rb +100 -0
  25. data/lib/fathom/node/data_collection.rb +97 -0
  26. data/lib/fathom/{data_node.rb → node/data_node.rb} +1 -1
  27. data/lib/fathom/{value_aggregator.rb → node/decision.rb} +5 -5
  28. data/lib/fathom/node/discrete_node.rb +41 -0
  29. data/lib/fathom/node/fact.rb +24 -0
  30. data/lib/fathom/{mc_node.rb → node/mc_node.rb} +1 -1
  31. data/lib/fathom/{enforced_name.rb → node/node_extensions/enforced_name.rb} +1 -1
  32. data/lib/fathom/{numeric_methods.rb → node/node_extensions/numeric_methods.rb} +19 -1
  33. data/lib/fathom/{plausible_range.rb → node/plausible_range.rb} +1 -1
  34. data/spec/ext/array_spec.rb +10 -0
  35. data/spec/ext/faster_csv_spec.rb +10 -0
  36. data/spec/ext/open_struct_spec.rb +20 -0
  37. data/spec/ext/string_spec.rb +7 -0
  38. data/spec/fathom/import/csv_import_spec.rb +11 -9
  39. data/spec/fathom/import/yaml_import_spec.rb +27 -7
  40. data/spec/fathom/knowledge_base_spec.rb +8 -4
  41. data/spec/fathom/node/belief_node_spec.rb +180 -0
  42. data/spec/fathom/node/cpm_node_spec.rb +144 -0
  43. data/spec/fathom/node/data_collection_spec.rb +26 -0
  44. data/spec/fathom/{data_node_spec.rb → node/data_node_spec.rb} +1 -1
  45. data/spec/fathom/node/decision_spec.rb +15 -0
  46. data/spec/fathom/node/discrete_node_spec.rb +56 -0
  47. data/spec/fathom/node/fact_spec.rb +33 -0
  48. data/spec/fathom/{mc_node_spec.rb → node/mc_node_spec.rb} +1 -1
  49. data/spec/fathom/{enforced_name_spec.rb → node/node_extensions/enforced_name_spec.rb} +1 -1
  50. data/spec/fathom/{numeric_methods_spec.rb → node/node_extensions/numeric_methods_spec.rb} +53 -11
  51. data/spec/fathom/{plausible_range_spec.rb → node/plausible_range_spec.rb} +1 -1
  52. data/spec/fathom/node_spec.rb +17 -0
  53. data/spec/fathom_spec.rb +40 -0
  54. data/spec/spec_helper.rb +3 -0
  55. data/spec/support/fact.yml +11 -0
  56. metadata +57 -30
  57. data/lib/fathom/value_multiplier.rb +0 -18
@@ -1,31 +1,42 @@
1
- require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
- class Fathom::KnowledgeBase
1
+ require File.expand_path("../../fathom", __FILE__)
2
+
3
+ require 'rdf'
4
+ require 'knowledge_base/search'
5
+
6
+ module Fathom
7
+ class KnowledgeBase
3
8
 
4
- attr_reader :data_store
9
+ # =====================
10
+ # = Module Extensions =
11
+ # =====================
12
+ include Search
5
13
 
6
- def initialize(opts={})
7
- opts = OptionsHash.new(opts)
8
- @data_store = OpenStruct.new
9
- end
10
-
11
- def []=(key, value)
12
- @data_store.table[key] = value
13
- end
14
+ # attr_reader :data_store
14
15
 
15
- def [](key)
16
- @data_store.table[key]
17
- end
18
-
19
- # This is temporary, but useful for now. After we have the persisted KnowledgeBase,
20
- # we'll create explicit accessor methods or a find syntax.
21
- def method_missing(sym, *args, &block)
22
- if @data_store.table.keys.include?(sym)
23
- @data_store.send(sym)
24
- else
25
- super
16
+ def initialize(opts={})
17
+ # opts = OptionsHash.new(opts)
18
+ # @data_store = OpenStruct.new
26
19
  end
27
- end
28
20
 
21
+ # def []=(key, value)
22
+ # @data_store.table[key] = value
23
+ # end
24
+ #
25
+ # def [](key)
26
+ # @data_store.table[key]
27
+ # end
28
+ #
29
+ # # This is temporary, but useful for now. After we have the persisted KnowledgeBase,
30
+ # # we'll create explicit accessor methods or a find syntax.
31
+ # def method_missing(sym, *args, &block)
32
+ # if @data_store.table.keys.include?(sym)
33
+ # @data_store.send(sym)
34
+ # else
35
+ # super
36
+ # end
37
+ # end
38
+
39
+ end
29
40
  end
30
41
 
31
42
  if __FILE__ == $0
@@ -0,0 +1,19 @@
1
+ require 'sparql/algebra'
2
+
3
+ module Fathom
4
+ module Search
5
+
6
+ def self.included(base)
7
+ base.send(:extend, ClassMethods)
8
+ base.send(:include, InstanceMethods)
9
+ end
10
+
11
+ module ClassMethods
12
+ def find(opts={})
13
+ end
14
+ end
15
+
16
+ module InstanceMethods
17
+ end
18
+ end
19
+ end
@@ -1,9 +1,18 @@
1
1
  require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
+ # TODO: Move this into a proper configuration module
3
+ # require 'spira'
4
+ # @repository = RDF::Repository.new
5
+ # Spira.add_repository(:default, @repository)
6
+
2
7
  class Fathom::Node
3
8
 
9
+ # See notes in the spec about this.
10
+ # include Spira::Resource
11
+
4
12
  attr_reader :name, :distribution, :description, :values
5
13
 
6
14
  def initialize(opts={})
15
+ symbolize_keys!(opts)
7
16
  @name = opts[:name]
8
17
  assert_distribution(opts)
9
18
  @description = opts[:description]
@@ -56,8 +65,31 @@ class Fathom::Node
56
65
  true
57
66
  end
58
67
 
68
+ def simple_inspect
69
+ self.name ? "#{self.name} (#{self.class.to_s})" : self.class.to_s
70
+ end
71
+
72
+ def inspect
73
+ "#{self.class.to_s}: " + [
74
+ self.name,
75
+ self.description,
76
+ "children:",
77
+ self.children.map {|e| e.simple_inspect }.inspect,
78
+ "parents: ",
79
+ self.parents.map {|e| e.simple_inspect }.inspect,
80
+ ].compact.join(", ")
81
+ end
82
+
59
83
  protected
60
84
 
85
+ # Quick and dirty extract from ActiveSupport's same method
86
+ def symbolize_keys!(h)
87
+ h.keys.each do |key|
88
+ h[(key.to_sym rescue key) || key] = h.delete(key)
89
+ end
90
+ h
91
+ end
92
+
61
93
  def add_accessor_for_node(node)
62
94
  return false unless node.is_a?(Node) and node.name_sym
63
95
  return false if self.respond_to?(node.name_sym)
@@ -77,7 +109,6 @@ class Fathom::Node
77
109
  add_parent(parent)
78
110
  end
79
111
 
80
-
81
112
  found = opts[:children]
82
113
  found ||= opts[:child]
83
114
  found ||= []
@@ -0,0 +1,121 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
2
+ class Fathom::BeliefNode < DiscreteNode
3
+
4
+ attr_reader :probabilities, :likelihoods, :precision_threshold
5
+
6
+ def initialize(opts={})
7
+ super(opts)
8
+ assert_probabilities(opts)
9
+ assert_liklihoods(opts)
10
+ @precision_threshold = opts.fetch(:precision_threshold, 0.00001)
11
+ end
12
+
13
+ def add_child(child)
14
+ if child.is_a?(BeliefNode)
15
+ cpm = CPMNode.new(:parent => self, :child => child)
16
+ # self.children << cpm
17
+ self.add_accessor_for_cpm(cpm, child)
18
+ self.add_accessor_for_node(child)
19
+ # cpm.register_parent(self)
20
+ self.children << child
21
+ child.register_parent(self)
22
+ child.add_accessor_for_cpm(cpm, self)
23
+ else
24
+ super(child)
25
+ end
26
+ end
27
+
28
+ def add_parent(parent)
29
+ if parent.is_a?(BeliefNode)
30
+ cpm = CPMNode.new(:parent => parent, :child => self)
31
+ # self.parents << cpm
32
+ self.add_accessor_for_cpm(cpm, parent)
33
+ self.add_accessor_for_node(parent)
34
+ # cpm.register_child(self)
35
+ self.parents << parent
36
+ parent.register_child(self)
37
+ parent.add_accessor_for_cpm(cpm, self)
38
+ else
39
+ super(parent)
40
+ end
41
+ end
42
+
43
+ def inspect
44
+ "#{self.class.to_s}: " + [
45
+ self.name,
46
+ self.description,
47
+ "children:",
48
+ self.children.map {|e| e.is_a?(CPMNode) ? e.child.simple_inspect : e.simple_inspect }.inspect,
49
+ "parents: ",
50
+ self.parents.map {|e| e.is_a?(CPMNode) ? e.parent.simple_inspect : e.simple_inspect }.inspect,
51
+ ].compact.join(", ")
52
+ end
53
+
54
+ def add_accessor_for_cpm(cpm, node)
55
+ return false unless cpm.is_a?(CPMNode) and cpm.name_sym
56
+ method_name = ("cpm_for_" + node.name_sym.to_s).to_sym
57
+ return false if self.respond_to?(method_name)
58
+ (class << self; self; end).module_eval do
59
+ define_method method_name do
60
+ cpm
61
+ end
62
+ end
63
+ end
64
+
65
+ def likelihood(label)
66
+ OpenStruct.new
67
+ end
68
+
69
+ protected
70
+
71
+
72
+ def assert_probabilities(opts)
73
+ return assert_probabilities_and_labels_from_values_hash(opts[:values]) if
74
+ opts[:values] and opts[:values].is_a?(Hash)
75
+
76
+ unnormalized_obj = opts.fetch(:probabilities, Array.new(self.size, 1.0))
77
+ unnormalized_vector = case unnormalized_obj
78
+ when Array
79
+ GSL::Vector.ary_to_gv(unnormalized_obj)
80
+ when GSL::Vector
81
+ unnormalized_obj
82
+ else
83
+ GSL::Vector[unnormalized_obj]
84
+ end
85
+
86
+ raise ArgumentError, "Probabilities must be #{self.size} items long" unless
87
+ unnormalized_vector.size == self.size
88
+
89
+ sum = unnormalized_vector.sum
90
+ @probabilities = unnormalized_vector.map {|e| e / sum }
91
+ end
92
+
93
+ def assert_probabilities_and_labels_from_values_hash(values)
94
+ @labels, probabilities = values.inject([[], []]) do |list, e|
95
+ list.first << e.first
96
+ list.last << e.last
97
+ list
98
+ end
99
+ @probabilities = GSL::Vector.ary_to_gv(probabilities)
100
+ end
101
+
102
+ def assert_liklihoods(opts)
103
+ likelihoods = opts.fetch(:likelihoods, Array.new(self.size, 1.0))
104
+ @likelihoods = case likelihoods
105
+ when Array
106
+ GSL::Vector.ary_to_gv(likelihoods)
107
+ when GSL::Vector
108
+ likelihoods
109
+ else
110
+ GSL::Vector[likelihoods]
111
+ end
112
+ raise ArgumentError, "Likelihoods must be #{self.size} items long" unless
113
+ likelihoods.size == self.size
114
+ end
115
+ end
116
+
117
+ if __FILE__ == $0
118
+ include Fathom
119
+ # TODO: Is there anything you want to do to run this file on its own?
120
+ # BeliefNode.new
121
+ end
@@ -0,0 +1,100 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
2
+
3
+ # Conditional Probability Matrix to join two nodes
4
+ class Fathom::CPMNode < Node
5
+
6
+ def initialize(opts={})
7
+ ensure_belief_nodes(opts)
8
+ super(opts)
9
+ assert_name
10
+ assert_description
11
+ assert_cpm
12
+ end
13
+
14
+ def parent
15
+ parents.first
16
+ end
17
+
18
+ def child
19
+ children.first
20
+ end
21
+
22
+ alias :matrix :values
23
+
24
+ # Filter values from the matrix. Allows us to grab multiple rows and columns if desired.
25
+ # If the rows or columns aren't filtered, all values are assumed to be desired.
26
+ #
27
+ # @cpm.probability :child_node_name => [:desired, :values], :parent_node_name => :value
28
+ # This filters both the child columns and the parent rows
29
+ #
30
+ # @cpm.probability :child_node_name => [:desired, :values]
31
+ # This only filters the child columns
32
+ def probability(opts={})
33
+ # Are we using long descriptions for the return value?
34
+ # If so, we'll use a hash to describe it without having to parse the value out of a string later.
35
+ describe = opts.delete(:describe) || false
36
+
37
+ # Is something unknown being asked for?
38
+ allowed = [parent.name_sym, child.name_sym]
39
+ unknown_keys = opts.reject {|k, v| allowed.include?(k)}
40
+ raise ArgumentError, "Unknown node: #{unknown_keys.inspect}" unless unknown_keys.empty?
41
+
42
+ # Values for the desired child and parent values
43
+ child_values = Array(opts[self.child.name_sym] || self.child.labels)
44
+ parent_values = Array(opts[self.parent.name_sym] || self.parent.labels)
45
+
46
+ # Indices in the matrix for the desired values
47
+ child_indices = child_values.map {|c| self.child.labels.index(c)}
48
+ parent_indices = parent_values.map {|c| self.parent.labels.index(c)}
49
+
50
+ # Collect the filtered values from the matrix
51
+ value = parent_indices.inject(0.0) do |sum, row|
52
+ sum += child_indices.inject(0.0) do |s, col|
53
+ s += matrix.get(row, col)
54
+ end
55
+ end
56
+
57
+ return value unless describe
58
+
59
+ label = "P(" +
60
+ child_values.map(&:to_s).join(" or ") +
61
+ " | " +
62
+ parent_values.map(&:to_s).join(" or ") +
63
+ ")"
64
+ { label => value }
65
+
66
+ end
67
+ alias :p :probability
68
+
69
+ def odds(opts={})
70
+ p = probability(opts)
71
+ return p / (1 - p)
72
+ end
73
+ alias :o :odds
74
+
75
+ # Returns a vector of likelihoods for each parent value, given the child value
76
+ def likelihood(value)
77
+ GSL::Vector.alloc(
78
+ *parent.labels.map {|parent_label| probability(parent.name_sym => parent_label, child.name_sym => value)}
79
+ )
80
+ end
81
+ alias :l :likelihood
82
+
83
+ protected
84
+ def assert_name
85
+ @name ||= :cpm
86
+ end
87
+
88
+ def assert_description
89
+ @description ||= "Conditional Probability Matrix from #{parent.name.to_s} to #{child.name.to_s}."
90
+ end
91
+
92
+ def ensure_belief_nodes(opts)
93
+ raise ArgumentError, "The child must be a BeliefNode" unless opts[:child].is_a?(BeliefNode)
94
+ raise ArgumentError, "The parent must be a BeliefNode" unless opts[:parent].is_a?(BeliefNode)
95
+ end
96
+
97
+ def assert_cpm
98
+ @values = self.parent.probabilities.col * self.child.probabilities
99
+ end
100
+ end
@@ -0,0 +1,97 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
2
+
3
+ =begin
4
+ This class uses SQLite for in-memory set operations. It is based on a discrete variable
5
+ which can be translated into fields in a table. For now, I am just using float data
6
+ types for the fields. This will evolve as needs drive it into a more robust data set.
7
+
8
+ It turns out that SQLite set operations are quite fast and speed things up pretty well.
9
+ So, to use this class, you'll need to have sqlite3-ruby SQLite3 bindings installed.
10
+
11
+ This also uses uuid to enforce a node name, an additional dependency.
12
+ =end
13
+
14
+ require 'uuid'
15
+
16
+ class Fathom::DataCollection < DiscreteNode
17
+
18
+ def initialize(opts={})
19
+ opts = extract_labels(opts)
20
+ opts[:name] ||= UUID.generate
21
+ super(opts)
22
+ end
23
+
24
+
25
+ protected
26
+
27
+ # Looking for labels.
28
+ # Using :labels, then :parents, then :parent, looking for the first node with labels defined
29
+ def extract_labels(opts)
30
+ return opts if opts[:labels]
31
+ parents = opts[:parents]
32
+ parents ||= opts[:parent]
33
+ parents = Array[parents] if parents and not parents.is_a?(Array)
34
+ parents.each do |parent|
35
+ if parent.respond_to?(:labels)
36
+ opts[:labels] = parent.labels
37
+ return opts
38
+ end
39
+ end
40
+ opts
41
+ end
42
+
43
+ end
44
+
45
+ if __FILE__ == $0
46
+ include Fathom
47
+ # TODO: Is there anything you want to do to run this file on its own?
48
+ # DataCollection.new
49
+ end
50
+
51
+
52
+ # J2: Some bare-minimum sqlite3 stuff
53
+ # See: http://sqlite-ruby.rubyforge.org/
54
+ # require 'rubygems'
55
+ # require 'sqlite3'
56
+ #
57
+ # def prepare_database(table_name)
58
+ # @db = SQLite3::Database.new(":memory:")
59
+ # # @db = SQLite3::Database.new("/tmp/j2.db")
60
+ #
61
+ # create_sql = <<-SQL
62
+ #
63
+ # CREATE TABLE "#{table_name}" (
64
+ # "id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
65
+ # "field1" FLOAT);
66
+ # SQL
67
+ # @db.execute_batch(create_sql)
68
+ #
69
+ # insert_sql = <<-SQL
70
+ # INSERT INTO #{table_name}
71
+ # ("field1")
72
+ # VALUES
73
+ # (:field1);
74
+ # SQL
75
+ # @insert_record = @db.prepare(insert_sql)
76
+ #
77
+ # # select_sql = <<-SQL
78
+ # # "select * from 'asdf';"
79
+ # # SQL
80
+ # # @select_record = @db.prepare(select_sql)
81
+ # @select_record = @db.prepare( "select * from asdf" )
82
+ # end
83
+ #
84
+ # def insert_record(opts)
85
+ # @insert_record.bind_params(:field1 => opts[:field1])
86
+ # @insert_record.execute
87
+ # end
88
+ #
89
+ # def select_record(opts={})
90
+ # # @db.execute( "select * from 'asdf'" )
91
+ # @select_record.execute.entries
92
+ # end
93
+ #
94
+ # prepare_database('asdf')
95
+ # insert_record :field1 => 123.1
96
+ # insert_record :field1 => 122.2
97
+ # @a = select_record