fathom 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/.autotest +10 -0
  2. data/Gemfile +10 -2
  3. data/Gemfile.lock +8 -0
  4. data/TODO.md +12 -25
  5. data/VERSION +1 -1
  6. data/lib/{fathom/ext → ext}/array.rb +0 -0
  7. data/lib/{fathom/ext → ext}/faster_csv.rb +0 -0
  8. data/lib/{fathom/ext → ext}/open_struct.rb +0 -0
  9. data/lib/{fathom/ext → ext}/string.rb +0 -0
  10. data/lib/fathom.rb +16 -13
  11. data/lib/fathom/agent.rb +8 -9
  12. data/lib/fathom/{causal_graph.rb → archive/causal_graph.rb} +0 -0
  13. data/lib/fathom/{concept.rb → archive/concept.rb} +0 -0
  14. data/lib/fathom/archive/conditional_probability_matrix.rb +3 -0
  15. data/lib/fathom/{inverter.rb → archive/inverter.rb} +0 -0
  16. data/lib/fathom/archive/node.rb +24 -1
  17. data/lib/fathom/distributions/discrete_uniform.rb +11 -32
  18. data/lib/fathom/import.rb +37 -34
  19. data/lib/fathom/import/yaml_import.rb +22 -1
  20. data/lib/fathom/knowledge_base.rb +34 -23
  21. data/lib/fathom/knowledge_base/search.rb +19 -0
  22. data/lib/fathom/node.rb +32 -1
  23. data/lib/fathom/node/belief_node.rb +121 -0
  24. data/lib/fathom/node/cpm_node.rb +100 -0
  25. data/lib/fathom/node/data_collection.rb +97 -0
  26. data/lib/fathom/{data_node.rb → node/data_node.rb} +1 -1
  27. data/lib/fathom/{value_aggregator.rb → node/decision.rb} +5 -5
  28. data/lib/fathom/node/discrete_node.rb +41 -0
  29. data/lib/fathom/node/fact.rb +24 -0
  30. data/lib/fathom/{mc_node.rb → node/mc_node.rb} +1 -1
  31. data/lib/fathom/{enforced_name.rb → node/node_extensions/enforced_name.rb} +1 -1
  32. data/lib/fathom/{numeric_methods.rb → node/node_extensions/numeric_methods.rb} +19 -1
  33. data/lib/fathom/{plausible_range.rb → node/plausible_range.rb} +1 -1
  34. data/spec/ext/array_spec.rb +10 -0
  35. data/spec/ext/faster_csv_spec.rb +10 -0
  36. data/spec/ext/open_struct_spec.rb +20 -0
  37. data/spec/ext/string_spec.rb +7 -0
  38. data/spec/fathom/import/csv_import_spec.rb +11 -9
  39. data/spec/fathom/import/yaml_import_spec.rb +27 -7
  40. data/spec/fathom/knowledge_base_spec.rb +8 -4
  41. data/spec/fathom/node/belief_node_spec.rb +180 -0
  42. data/spec/fathom/node/cpm_node_spec.rb +144 -0
  43. data/spec/fathom/node/data_collection_spec.rb +26 -0
  44. data/spec/fathom/{data_node_spec.rb → node/data_node_spec.rb} +1 -1
  45. data/spec/fathom/node/decision_spec.rb +15 -0
  46. data/spec/fathom/node/discrete_node_spec.rb +56 -0
  47. data/spec/fathom/node/fact_spec.rb +33 -0
  48. data/spec/fathom/{mc_node_spec.rb → node/mc_node_spec.rb} +1 -1
  49. data/spec/fathom/{enforced_name_spec.rb → node/node_extensions/enforced_name_spec.rb} +1 -1
  50. data/spec/fathom/{numeric_methods_spec.rb → node/node_extensions/numeric_methods_spec.rb} +53 -11
  51. data/spec/fathom/{plausible_range_spec.rb → node/plausible_range_spec.rb} +1 -1
  52. data/spec/fathom/node_spec.rb +17 -0
  53. data/spec/fathom_spec.rb +40 -0
  54. data/spec/spec_helper.rb +3 -0
  55. data/spec/support/fact.yml +11 -0
  56. metadata +57 -30
  57. data/lib/fathom/value_multiplier.rb +0 -18
@@ -1,31 +1,42 @@
1
- require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
- class Fathom::KnowledgeBase
1
+ require File.expand_path("../../fathom", __FILE__)
2
+
3
+ require 'rdf'
4
+ require 'knowledge_base/search'
5
+
6
+ module Fathom
7
+ class KnowledgeBase
3
8
 
4
- attr_reader :data_store
9
+ # =====================
10
+ # = Module Extensions =
11
+ # =====================
12
+ include Search
5
13
 
6
- def initialize(opts={})
7
- opts = OptionsHash.new(opts)
8
- @data_store = OpenStruct.new
9
- end
10
-
11
- def []=(key, value)
12
- @data_store.table[key] = value
13
- end
14
+ # attr_reader :data_store
14
15
 
15
- def [](key)
16
- @data_store.table[key]
17
- end
18
-
19
- # This is temporary, but useful for now. After we have the persisted KnowledgeBase,
20
- # we'll create explicit accessor methods or a find syntax.
21
- def method_missing(sym, *args, &block)
22
- if @data_store.table.keys.include?(sym)
23
- @data_store.send(sym)
24
- else
25
- super
16
+ def initialize(opts={})
17
+ # opts = OptionsHash.new(opts)
18
+ # @data_store = OpenStruct.new
26
19
  end
27
- end
28
20
 
21
+ # def []=(key, value)
22
+ # @data_store.table[key] = value
23
+ # end
24
+ #
25
+ # def [](key)
26
+ # @data_store.table[key]
27
+ # end
28
+ #
29
+ # # This is temporary, but useful for now. After we have the persisted KnowledgeBase,
30
+ # # we'll create explicit accessor methods or a find syntax.
31
+ # def method_missing(sym, *args, &block)
32
+ # if @data_store.table.keys.include?(sym)
33
+ # @data_store.send(sym)
34
+ # else
35
+ # super
36
+ # end
37
+ # end
38
+
39
+ end
29
40
  end
30
41
 
31
42
  if __FILE__ == $0
@@ -0,0 +1,19 @@
1
+ require 'sparql/algebra'
2
+
3
+ module Fathom
4
+ module Search
5
+
6
+ def self.included(base)
7
+ base.send(:extend, ClassMethods)
8
+ base.send(:include, InstanceMethods)
9
+ end
10
+
11
+ module ClassMethods
12
+ def find(opts={})
13
+ end
14
+ end
15
+
16
+ module InstanceMethods
17
+ end
18
+ end
19
+ end
@@ -1,9 +1,18 @@
1
1
  require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
+ # TODO: Move this into a proper configuration module
3
+ # require 'spira'
4
+ # @repository = RDF::Repository.new
5
+ # Spira.add_repository(:default, @repository)
6
+
2
7
  class Fathom::Node
3
8
 
9
+ # See notes in the spec about this.
10
+ # include Spira::Resource
11
+
4
12
  attr_reader :name, :distribution, :description, :values
5
13
 
6
14
  def initialize(opts={})
15
+ symbolize_keys!(opts)
7
16
  @name = opts[:name]
8
17
  assert_distribution(opts)
9
18
  @description = opts[:description]
@@ -56,8 +65,31 @@ class Fathom::Node
56
65
  true
57
66
  end
58
67
 
68
+ def simple_inspect
69
+ self.name ? "#{self.name} (#{self.class.to_s})" : self.class.to_s
70
+ end
71
+
72
+ def inspect
73
+ "#{self.class.to_s}: " + [
74
+ self.name,
75
+ self.description,
76
+ "children:",
77
+ self.children.map {|e| e.simple_inspect }.inspect,
78
+ "parents: ",
79
+ self.parents.map {|e| e.simple_inspect }.inspect,
80
+ ].compact.join(", ")
81
+ end
82
+
59
83
  protected
60
84
 
85
+ # Quick and dirty extract from ActiveSupport's same method
86
+ def symbolize_keys!(h)
87
+ h.keys.each do |key|
88
+ h[(key.to_sym rescue key) || key] = h.delete(key)
89
+ end
90
+ h
91
+ end
92
+
61
93
  def add_accessor_for_node(node)
62
94
  return false unless node.is_a?(Node) and node.name_sym
63
95
  return false if self.respond_to?(node.name_sym)
@@ -77,7 +109,6 @@ class Fathom::Node
77
109
  add_parent(parent)
78
110
  end
79
111
 
80
-
81
112
  found = opts[:children]
82
113
  found ||= opts[:child]
83
114
  found ||= []
@@ -0,0 +1,121 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
2
+ class Fathom::BeliefNode < DiscreteNode
3
+
4
+ attr_reader :probabilities, :likelihoods, :precision_threshold
5
+
6
+ def initialize(opts={})
7
+ super(opts)
8
+ assert_probabilities(opts)
9
+ assert_liklihoods(opts)
10
+ @precision_threshold = opts.fetch(:precision_threshold, 0.00001)
11
+ end
12
+
13
+ def add_child(child)
14
+ if child.is_a?(BeliefNode)
15
+ cpm = CPMNode.new(:parent => self, :child => child)
16
+ # self.children << cpm
17
+ self.add_accessor_for_cpm(cpm, child)
18
+ self.add_accessor_for_node(child)
19
+ # cpm.register_parent(self)
20
+ self.children << child
21
+ child.register_parent(self)
22
+ child.add_accessor_for_cpm(cpm, self)
23
+ else
24
+ super(child)
25
+ end
26
+ end
27
+
28
+ def add_parent(parent)
29
+ if parent.is_a?(BeliefNode)
30
+ cpm = CPMNode.new(:parent => parent, :child => self)
31
+ # self.parents << cpm
32
+ self.add_accessor_for_cpm(cpm, parent)
33
+ self.add_accessor_for_node(parent)
34
+ # cpm.register_child(self)
35
+ self.parents << parent
36
+ parent.register_child(self)
37
+ parent.add_accessor_for_cpm(cpm, self)
38
+ else
39
+ super(parent)
40
+ end
41
+ end
42
+
43
+ def inspect
44
+ "#{self.class.to_s}: " + [
45
+ self.name,
46
+ self.description,
47
+ "children:",
48
+ self.children.map {|e| e.is_a?(CPMNode) ? e.child.simple_inspect : e.simple_inspect }.inspect,
49
+ "parents: ",
50
+ self.parents.map {|e| e.is_a?(CPMNode) ? e.parent.simple_inspect : e.simple_inspect }.inspect,
51
+ ].compact.join(", ")
52
+ end
53
+
54
+ def add_accessor_for_cpm(cpm, node)
55
+ return false unless cpm.is_a?(CPMNode) and cpm.name_sym
56
+ method_name = ("cpm_for_" + node.name_sym.to_s).to_sym
57
+ return false if self.respond_to?(method_name)
58
+ (class << self; self; end).module_eval do
59
+ define_method method_name do
60
+ cpm
61
+ end
62
+ end
63
+ end
64
+
65
+ def likelihood(label)
66
+ OpenStruct.new
67
+ end
68
+
69
+ protected
70
+
71
+
72
+ def assert_probabilities(opts)
73
+ return assert_probabilities_and_labels_from_values_hash(opts[:values]) if
74
+ opts[:values] and opts[:values].is_a?(Hash)
75
+
76
+ unnormalized_obj = opts.fetch(:probabilities, Array.new(self.size, 1.0))
77
+ unnormalized_vector = case unnormalized_obj
78
+ when Array
79
+ GSL::Vector.ary_to_gv(unnormalized_obj)
80
+ when GSL::Vector
81
+ unnormalized_obj
82
+ else
83
+ GSL::Vector[unnormalized_obj]
84
+ end
85
+
86
+ raise ArgumentError, "Probabilities must be #{self.size} items long" unless
87
+ unnormalized_vector.size == self.size
88
+
89
+ sum = unnormalized_vector.sum
90
+ @probabilities = unnormalized_vector.map {|e| e / sum }
91
+ end
92
+
93
+ def assert_probabilities_and_labels_from_values_hash(values)
94
+ @labels, probabilities = values.inject([[], []]) do |list, e|
95
+ list.first << e.first
96
+ list.last << e.last
97
+ list
98
+ end
99
+ @probabilities = GSL::Vector.ary_to_gv(probabilities)
100
+ end
101
+
102
+ def assert_liklihoods(opts)
103
+ likelihoods = opts.fetch(:likelihoods, Array.new(self.size, 1.0))
104
+ @likelihoods = case likelihoods
105
+ when Array
106
+ GSL::Vector.ary_to_gv(likelihoods)
107
+ when GSL::Vector
108
+ likelihoods
109
+ else
110
+ GSL::Vector[likelihoods]
111
+ end
112
+ raise ArgumentError, "Likelihoods must be #{self.size} items long" unless
113
+ likelihoods.size == self.size
114
+ end
115
+ end
116
+
117
+ if __FILE__ == $0
118
+ include Fathom
119
+ # TODO: Is there anything you want to do to run this file on its own?
120
+ # BeliefNode.new
121
+ end
@@ -0,0 +1,100 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
2
+
3
+ # Conditional Probability Matrix to join two nodes
4
+ class Fathom::CPMNode < Node
5
+
6
+ def initialize(opts={})
7
+ ensure_belief_nodes(opts)
8
+ super(opts)
9
+ assert_name
10
+ assert_description
11
+ assert_cpm
12
+ end
13
+
14
+ def parent
15
+ parents.first
16
+ end
17
+
18
+ def child
19
+ children.first
20
+ end
21
+
22
+ alias :matrix :values
23
+
24
+ # Filter values from the matrix. Allows us to grab multiple rows and columns if desired.
25
+ # If the rows or columns aren't filtered, all values are assumed to be desired.
26
+ #
27
+ # @cpm.probability :child_node_name => [:desired, :values], :parent_node_name => :value
28
+ # This filters both the child columns and the parent rows
29
+ #
30
+ # @cpm.probability :child_node_name => [:desired, :values]
31
+ # This only filters the child columns
32
+ def probability(opts={})
33
+ # Are we using long descriptions for the return value?
34
+ # If so, we'll use a hash to describe it without having to parse the value out of a string later.
35
+ describe = opts.delete(:describe) || false
36
+
37
+ # Is something unknown being asked for?
38
+ allowed = [parent.name_sym, child.name_sym]
39
+ unknown_keys = opts.reject {|k, v| allowed.include?(k)}
40
+ raise ArgumentError, "Unknown node: #{unknown_keys.inspect}" unless unknown_keys.empty?
41
+
42
+ # Values for the desired child and parent values
43
+ child_values = Array(opts[self.child.name_sym] || self.child.labels)
44
+ parent_values = Array(opts[self.parent.name_sym] || self.parent.labels)
45
+
46
+ # Indices in the matrix for the desired values
47
+ child_indices = child_values.map {|c| self.child.labels.index(c)}
48
+ parent_indices = parent_values.map {|c| self.parent.labels.index(c)}
49
+
50
+ # Collect the filtered values from the matrix
51
+ value = parent_indices.inject(0.0) do |sum, row|
52
+ sum += child_indices.inject(0.0) do |s, col|
53
+ s += matrix.get(row, col)
54
+ end
55
+ end
56
+
57
+ return value unless describe
58
+
59
+ label = "P(" +
60
+ child_values.map(&:to_s).join(" or ") +
61
+ " | " +
62
+ parent_values.map(&:to_s).join(" or ") +
63
+ ")"
64
+ { label => value }
65
+
66
+ end
67
+ alias :p :probability
68
+
69
+ def odds(opts={})
70
+ p = probability(opts)
71
+ return p / (1 - p)
72
+ end
73
+ alias :o :odds
74
+
75
+ # Returns a vector of likelihoods for each parent value, given the child value
76
+ def likelihood(value)
77
+ GSL::Vector.alloc(
78
+ *parent.labels.map {|parent_label| probability(parent.name_sym => parent_label, child.name_sym => value)}
79
+ )
80
+ end
81
+ alias :l :likelihood
82
+
83
+ protected
84
+ def assert_name
85
+ @name ||= :cpm
86
+ end
87
+
88
+ def assert_description
89
+ @description ||= "Conditional Probability Matrix from #{parent.name.to_s} to #{child.name.to_s}."
90
+ end
91
+
92
+ def ensure_belief_nodes(opts)
93
+ raise ArgumentError, "The child must be a BeliefNode" unless opts[:child].is_a?(BeliefNode)
94
+ raise ArgumentError, "The parent must be a BeliefNode" unless opts[:parent].is_a?(BeliefNode)
95
+ end
96
+
97
+ def assert_cpm
98
+ @values = self.parent.probabilities.col * self.child.probabilities
99
+ end
100
+ end
@@ -0,0 +1,97 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
2
+
3
+ =begin
4
+ This class uses SQLite for in-memory set operations. It is based on a discrete variable
5
+ which can be translated into fields in a table. For now, I am just using float data
6
+ types for the fields. This will evolve as needs drive it into a more robust data set.
7
+
8
+ It turns out that SQLite set operations are quite fast and speed things up pretty well.
9
+ So, to use this class, you'll need to have sqlite3-ruby SQLite3 bindings installed.
10
+
11
+ This also uses uuid to enforce a node name, an additional dependency.
12
+ =end
13
+
14
+ require 'uuid'
15
+
16
+ class Fathom::DataCollection < DiscreteNode
17
+
18
+ def initialize(opts={})
19
+ opts = extract_labels(opts)
20
+ opts[:name] ||= UUID.generate
21
+ super(opts)
22
+ end
23
+
24
+
25
+ protected
26
+
27
+ # Looking for labels.
28
+ # Using :labels, then :parents, then :parent, looking for the first node with labels defined
29
+ def extract_labels(opts)
30
+ return opts if opts[:labels]
31
+ parents = opts[:parents]
32
+ parents ||= opts[:parent]
33
+ parents = Array[parents] if parents and not parents.is_a?(Array)
34
+ parents.each do |parent|
35
+ if parent.respond_to?(:labels)
36
+ opts[:labels] = parent.labels
37
+ return opts
38
+ end
39
+ end
40
+ opts
41
+ end
42
+
43
+ end
44
+
45
+ if __FILE__ == $0
46
+ include Fathom
47
+ # TODO: Is there anything you want to do to run this file on its own?
48
+ # DataCollection.new
49
+ end
50
+
51
+
52
+ # J2: Some bare-minimum sqlite3 stuff
53
+ # See: http://sqlite-ruby.rubyforge.org/
54
+ # require 'rubygems'
55
+ # require 'sqlite3'
56
+ #
57
+ # def prepare_database(table_name)
58
+ # @db = SQLite3::Database.new(":memory:")
59
+ # # @db = SQLite3::Database.new("/tmp/j2.db")
60
+ #
61
+ # create_sql = <<-SQL
62
+ #
63
+ # CREATE TABLE "#{table_name}" (
64
+ # "id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
65
+ # "field1" FLOAT);
66
+ # SQL
67
+ # @db.execute_batch(create_sql)
68
+ #
69
+ # insert_sql = <<-SQL
70
+ # INSERT INTO #{table_name}
71
+ # ("field1")
72
+ # VALUES
73
+ # (:field1);
74
+ # SQL
75
+ # @insert_record = @db.prepare(insert_sql)
76
+ #
77
+ # # select_sql = <<-SQL
78
+ # # "select * from 'asdf';"
79
+ # # SQL
80
+ # # @select_record = @db.prepare(select_sql)
81
+ # @select_record = @db.prepare( "select * from asdf" )
82
+ # end
83
+ #
84
+ # def insert_record(opts)
85
+ # @insert_record.bind_params(:field1 => opts[:field1])
86
+ # @insert_record.execute
87
+ # end
88
+ #
89
+ # def select_record(opts={})
90
+ # # @db.execute( "select * from 'asdf'" )
91
+ # @select_record.execute.entries
92
+ # end
93
+ #
94
+ # prepare_database('asdf')
95
+ # insert_record :field1 => 123.1
96
+ # insert_record :field1 => 122.2
97
+ # @a = select_record