bayesnet 0.1.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9a746d994d25c279f3246613b9a918fb84720c7c9c78f85ce1ffdc5fbd6bcf9c
4
- data.tar.gz: 3b8ee59eab90bf75172239601ddef479926f5a27db475688f1191c71298ca757
3
+ metadata.gz: 7591665046345784f55275c06d1129fd91ee3f098f3800b2c03b6f9bbfd8e172
4
+ data.tar.gz: ec9009ab90593d42fa2506a230e5900d5a39bebb1a7fbd874953d6c86022b2eb
5
5
  SHA512:
6
- metadata.gz: 72985a24e9d529b04e8d275a46cc6eadbd55aa4380f2aed73a41d2e3c3c7e7528419aa99fa0004d4d3aab1692484021c3b577e1f9262ee9ef4d89664523d8335
7
- data.tar.gz: 8be39618f74ccd85750569a74e18a0a384aa3a12c4c52a35315958d8d4ad4045abbe8bee83d510459e3fd6b29de68bab4120e3540cee0cadc9a024f1b2389ffb
6
+ metadata.gz: 5e668b431f55f9239ad3ae06cdc020098ff4a8b68f7934283d6f77a3969a014aa17f68df12b23013d1681e79fdecd8f4c8e4da105e9430a16d2ab8075bbcca7b
7
+ data.tar.gz: 75eceac300152cfa8d0ce736b16939e779f127dc844ea0c8ce8e1d0f363b04048107533f64c744420b2631fd3e8678d6812e15c6b770e2b37a598ae290af773a
data/CHANGELOG.md CHANGED
@@ -1,5 +1,8 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.0] - 2022-06-26
4
+ - Using variables elimination algorithm to build a distribution
5
+
3
6
  ## [0.5.0] - 2022-02-26
4
7
 
5
8
  - Constructing networks out of the `.BIF` ([Interchange Format for Bayesian Networks](https://www.cs.washington.edu/dm/vfml/appendixes/bif.htm)) files.
@@ -8,7 +11,7 @@
8
11
 
9
12
  ## [0.0.3] - 2021-12-29
10
13
 
11
- - Fixing terminoloty used in Factor class
14
+ - Fixing terminology used in Factor class
12
15
 
13
16
  ## [0.0.2] - 2021-12-28
14
17
 
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bayesnet (0.1.0)
4
+ bayesnet (0.6.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -1,29 +1,68 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bayesnet
4
- # Factor if a function of sevaral variables (A, B, ...) each defined on values from finite set
4
+ # Factor if a function of several variables (A, B, ...), where
5
+ # every variable cold take values from some finite set
5
6
  class Factor
7
+ # +++ Factor DSL +++
8
+ #
9
+ # Factor DSL entry point:
6
10
  def self.build(&block)
7
11
  factor = new
8
12
  factor.instance_eval(&block)
9
13
  factor
10
14
  end
11
15
 
12
- # Specifies variable name together with its values
13
- def scope(var_name_to_values)
14
- @scope.merge!(var_name_to_values)
16
+ # Factor DSL
17
+ # Defining variable with list of its possible values looks like:
18
+ # ```
19
+ # Bayesnet::Factor.build do
20
+ # scope weather: %i[sunny cloudy]
21
+ # scope mood: %i[bad good]
22
+ # ...
23
+ # ```
24
+ # ^ this code defines to variables `weather` and `mood`, where
25
+ # `weather` could be :sunny or :cloudy, and
26
+ # `mood` could be :bad or :good
27
+ def scope(var_name_to_values = nil)
28
+ if var_name_to_values
29
+ @scope.merge!(var_name_to_values)
30
+ else
31
+ @scope
32
+ end
15
33
  end
16
34
 
17
- # Specifies value for a scope context. Value is the last element in `context_and_val`
35
+ # Factor DSL
36
+ # Specifies factor value for some set of variable values, i.e.
37
+ # ```
38
+ # Bayesnet::Factor.build do
39
+ # scope weather: %i[sunny cloudy]
40
+ # scope mood: %i[bad good]
41
+ # val :sunny, :bad, 0.1
42
+ # ...
43
+ # ```
44
+ # ^ this code says the value of factor for [weather == :sunny, mood == :bad] is 0.1
18
45
  def val(*context_and_val)
19
46
  context_and_val = context_and_val[0] if context_and_val.size == 1 && context_and_val[0].is_a?(Array)
20
47
  @vals[context_and_val[0..-2]] = context_and_val[-1]
21
48
  end
49
+ # --- Factor DSL ---
22
50
 
51
+ # List of variable names
23
52
  def var_names
24
53
  @scope.keys
25
54
  end
26
55
 
56
+ # accessor factor value, i.e
57
+ # ```
58
+ # factor = Bayesnet::Factor.build do
59
+ # scope weather: %i[sunny cloudy]
60
+ # scope mood: %i[bad good]
61
+ # val :sunny, :bad, 0.1
62
+ # ...
63
+ # end
64
+ # factor[:sunny, :bad] # 0.1
65
+ # ```
27
66
  def [](*context)
28
67
  key = if context.size == 1 && context[0].is_a?(Hash)
29
68
  context[0].slice(*var_names).values
@@ -33,20 +72,19 @@ module Bayesnet
33
72
  @vals[key]
34
73
  end
35
74
 
36
- def self.from_distribution(var_distribution)
37
- self.class.new(var_distribution.keys, var_distribution.values.map(&:to_a))
38
- end
39
-
75
+ # returns all combinations of values of `var_names`
40
76
  def contextes(*var_names)
41
77
  return [] if var_names.empty?
42
78
 
43
79
  @scope[var_names[0]].product(*var_names[1..].map { |var_name| @scope[var_name] })
44
80
  end
45
81
 
82
+ # returns all possible values
46
83
  def values
47
84
  @vals.values
48
85
  end
49
86
 
87
+ # returns new normalized factor, i.e. where sum of all values is 1.0
50
88
  def normalize
51
89
  vals = @vals.clone
52
90
  norm_factor = vals.map(&:last).sum * 1.0
@@ -54,26 +92,29 @@ module Bayesnet
54
92
  self.class.new(@scope.clone, vals)
55
93
  end
56
94
 
95
+ # Returns factor built as follows:
96
+ # 1. Original factor gets filtered out by variables having values compatible with `context`
97
+ # 2. Returned factor does not have any variables from `context` (because they have
98
+ # same values, after step 1)
99
+ # The `context` argument supposed to be an evidence, somewhat like
100
+ # `{weather: :sunny}`
57
101
  def reduce_to(context)
58
- # TODO: use Hash#except when Ruby 2.6 support no longer needed
59
- context_keys_set = context.keys.to_set
60
- scope = @scope.reject { |k, _| context_keys_set.include?(k) }
102
+ limited_context = context.slice(*scope.keys)
103
+ return self.class.new(@scope, @vals) if limited_context.empty?
104
+ limited_scope = @scope.slice(*(@scope.keys - limited_context.keys))
61
105
 
62
- context_vals = context.values
63
- indices = context.keys.map { |k| index_by_var_name[k] }
106
+ context_vals = limited_context.values
107
+ indices = limited_context.keys.map { |k| index_by_var_name[k] }
64
108
  vals = @vals.select { |k, _v| indices.map { |i| k[i] } == context_vals }
65
109
  vals.transform_keys! { |k| delete_by_indices(k, indices) }
66
110
 
67
- self.class.new(scope, vals)
68
- end
69
-
70
- def delete_by_indices(array, indices)
71
- result = array.dup
72
- indices.map { |i| result[i] = nil }
73
- result.compact
111
+ self.class.new(limited_scope, vals)
74
112
  end
75
113
 
76
- # groups by `var_names` having same context and sum out values.
114
+ # Returns new context defined over `var_names`, all other variables
115
+ # get eliminated. For every combination of `var_names`'s values
116
+ # the value of new factor is defined by summing up values in original factor
117
+ # having compatible value
77
118
  def marginalize(var_names)
78
119
  scope = @scope.slice(*var_names)
79
120
 
@@ -84,8 +125,52 @@ module Bayesnet
84
125
  self.class.new(scope, vals)
85
126
  end
86
127
 
128
+ def eliminate(var_name)
129
+ keep_var_names = var_names
130
+ keep_var_names.delete(var_name)
131
+ marginalize(keep_var_names)
132
+ end
133
+
134
+ def select(subcontext)
135
+ @vals.select do |context, _|
136
+ var_names.zip(context).slice(subcontext.keys) == subcontext
137
+ end
138
+ end
139
+
140
+ def *(other)
141
+ common_scope = @scope.keys & other.scope.keys
142
+ new_scope = scope.merge(other.scope)
143
+ new_vals = {}
144
+ group1 = group_by_scope_values(common_scope)
145
+ group2 = other.group_by_scope_values(common_scope)
146
+ group1.each do |scope, vals1|
147
+ combo = vals1.product(group2[scope])
148
+ combo.each do |(val1, val2)|
149
+ # values in scope must match variables order in new_scope, i.e.
150
+ # they must match `new_scope.var_names`
151
+ # The code bellow ensures it by merging two hashes in the same
152
+ # wasy as `new_scope`` is constructed above
153
+ val_by_name1 = var_names.zip(val1.first).to_h
154
+ val_by_name2 = other.var_names.zip(val2.first).to_h
155
+ new_vals[val_by_name1.merge(val_by_name2).values] = val1.last*val2.last
156
+ end
157
+ end
158
+ Factor.new(new_scope, new_vals)
159
+ end
160
+
161
+ def group_by_scope_values(scope_keys)
162
+ indices = scope_keys.map { |k| index_by_var_name[k] }
163
+ @vals.group_by { |context, _val| indices.map { |i| context[i] } }
164
+ end
165
+
87
166
  private
88
167
 
168
+ def delete_by_indices(array, indices)
169
+ result = array.dup
170
+ indices.map { |i| result[i] = nil }
171
+ result.compact
172
+ end
173
+
89
174
  def initialize(scope = {}, vals = {})
90
175
  @scope = scope
91
176
  @vals = vals
@@ -5,16 +5,15 @@ require "bayesnet/node"
5
5
  module Bayesnet
6
6
  # Acyclic graph
7
7
  class Graph
8
+ include Bayesnet::Logging
9
+
8
10
  attr_reader :nodes
9
11
 
10
12
  def initialize
11
13
  @nodes = {}
12
14
  end
13
15
 
14
- def var_names
15
- nodes.keys
16
- end
17
-
16
+ # +++ Graph DSL +++
18
17
  def node(name, parents: [], &block)
19
18
  raise Error, "DSL error, #node requires a &block" unless block
20
19
 
@@ -22,21 +21,100 @@ module Bayesnet
22
21
  node.instance_eval(&block)
23
22
  @nodes[name] = node
24
23
  end
24
+ # --- Graph DSL ---
25
25
 
26
- def resolve_factors
27
- @nodes.values.each do |node|
28
- node.resolve_factor(@nodes.slice(*node.parent_nodes))
26
+ # returns names of all nodes
27
+ def var_names
28
+ nodes.keys
29
+ end
30
+
31
+ # returns normalized distribution reduced to `evidence`
32
+ # and marginalized over `over`
33
+ def distribution(over: [], evidence: {}, algorithm: :variables_elimination)
34
+ case algorithm
35
+ when :brute_force
36
+ joint_distribution
37
+ .reduce_to(evidence)
38
+ .marginalize(over)
39
+ .normalize
40
+ when :variables_elimination
41
+ reduced_factors = nodes.values.map(&:factor).map { |f| f.reduce_to(evidence) }
42
+ not_include_in_order = evidence.keys.to_set + over.to_set
43
+ variables_order = elimination_order.reject { |v| not_include_in_order.include?(v) }
44
+ distribution = eliminate_variables(variables_order, reduced_factors)
45
+ distribution.normalize
46
+ else
47
+ raise "Uknown algorithm #{algorithm}"
29
48
  end
30
49
  end
31
50
 
32
- def distribution(over: [], evidence: {})
33
- joint_distribution
34
- .reduce_to(evidence)
35
- .marginalize(over)
36
- .normalize
51
+ def elimination_order
52
+ return @order if @order
53
+ @order = []
54
+ edges = Set.new
55
+ @nodes.each do |name, node|
56
+ parents = node.parent_nodes.keys
57
+ parents.each { |p| edges.add([name, p].to_set) }
58
+ parents.combination(2) { |p1, p2| edges.add([p1, p2].to_set) }
59
+ end
60
+ # edges now are moralized graph of `self`, just represented differently as
61
+ # set of edges
62
+
63
+ remaining_nodes = nodes.keys.to_set
64
+ until remaining_nodes.empty?
65
+ best_node = find_min_neighbor(remaining_nodes, edges)
66
+ remaining_nodes.delete(best_node)
67
+ @order.push(best_node)
68
+ clique = edges.select { |e| e.include?(best_node) }
69
+ edges -= clique
70
+ if edges.empty? #i.e. clique is the last edge
71
+ @order += remaining_nodes.to_a
72
+ remaining_nodes = Set.new
73
+ end
74
+ clique.
75
+ map { |e| e.delete(best_node) }.
76
+ map(&:first).
77
+ combination(2) { |p1, p2| edges.add([p1,p2].to_set) }
78
+ end
79
+ @order
80
+ end
81
+
82
+ def find_min_neighbor(remaining_nodes, edges)
83
+ result = nil
84
+ min_neighbors = nil
85
+ remaining_nodes.each do |name, _|
86
+ neighbors = edges.count { |e| e.include?(name) }
87
+ if min_neighbors.nil? || neighbors < min_neighbors
88
+ min_neighbors = neighbors
89
+ result = name
90
+ end
91
+ end
92
+ result
93
+ end
94
+
95
+ def eliminate_variables(variables_order, factors)
96
+ logger.debug "Eliminating variables #{variables_order} from #{factors.size} factors #{factors.map(&:var_names)}"
97
+ remaining_factors = factors.to_set
98
+ variables_order.each do |var_name|
99
+ logger.debug "Eliminating '#{var_name}'..."
100
+ grouped_factors = remaining_factors.select { |f| f.var_names.include?(var_name) }
101
+ remaining_factors -= grouped_factors
102
+ logger.debug "Building new factor out of #{grouped_factors.size} factors having '#{var_name}' - #{grouped_factors.map(&:var_names)}"
103
+ product_factor = grouped_factors.reduce(&:*)
104
+ logger.debug "Removing variable from new factor"
105
+ new_factor = product_factor.eliminate(var_name)
106
+ logger.debug "New factor variables are #{new_factor.var_names}"
107
+ remaining_factors.add(new_factor)
108
+ logger.debug "The variable '#{var_name}' is elminated"
109
+ end
110
+ logger.debug "Non-eliminated variables are #{remaining_factors.map(&:var_names).flatten.uniq}"
111
+ result = remaining_factors.reduce(&:*)
112
+ logger.debug "Eliminating is done"
113
+ result
37
114
  end
38
115
 
39
116
  # This is MAP query, i.e. Maximum a Posteriory
117
+ # returns value of `var_name` having maximum likelihood, when `evidence` is observed
40
118
  def most_likely_value(var_name, evidence:)
41
119
  posterior_distribution = distribution(over: [var_name], evidence: evidence)
42
120
  mode = posterior_distribution.contextes(var_name).zip(posterior_distribution.values).max_by(&:last)
@@ -49,6 +127,7 @@ module Bayesnet
49
127
  posterior_distribution[*over_vars.values]
50
128
  end
51
129
 
130
+ # Essentially it builds product of all node's factors
52
131
  def joint_distribution
53
132
  return @joint_distribution if @joint_distribution
54
133
 
@@ -75,5 +154,11 @@ module Bayesnet
75
154
  def parameters
76
155
  nodes.values.map(&:parameters).sum
77
156
  end
157
+
158
+ def resolve_factors
159
+ @nodes.values.each do |node|
160
+ node.resolve_factor(@nodes.slice(*node.parent_nodes))
161
+ end
162
+ end
78
163
  end
79
164
  end
@@ -0,0 +1,13 @@
1
+ # lib/logging.rb
2
+
3
+ module Bayesnet
4
+ def self.logger
5
+ @logger ||= Logger.new(STDOUT).tap { |l| l.level = :debug }
6
+ end
7
+
8
+ module Logging
9
+ def logger
10
+ Bayesnet.logger
11
+ end
12
+ end
13
+ end
data/lib/bayesnet/node.rb CHANGED
@@ -6,8 +6,10 @@ module Bayesnet
6
6
  @name = name
7
7
  @parent_nodes = parent_nodes
8
8
  @values = []
9
+ @factor = Factor.new
9
10
  end
10
11
 
12
+ # +++ Node DSL +++
11
13
  def values(hash_or_array = nil, &block)
12
14
  case hash_or_array
13
15
  when NilClass
@@ -28,6 +30,21 @@ module Bayesnet
28
30
  end
29
31
  end
30
32
 
33
+ def distributions(&block)
34
+ instance_eval(&block)
35
+ end
36
+ # --- Node DSL ---
37
+
38
+ def parameters
39
+ (values.size - 1) * parent_nodes.values.reduce(1) { |mul, n| mul * n.values.size }
40
+ end
41
+
42
+ def as(distribution, given:)
43
+ @values.zip(distribution).each do |value, probability|
44
+ @factor.val [value] + given + [probability]
45
+ end
46
+ end
47
+
31
48
  def resolve_factor(parent_nodes)
32
49
  @parent_nodes = parent_nodes
33
50
  if @factor.is_a?(Proc)
@@ -43,18 +60,5 @@ module Bayesnet
43
60
  end
44
61
  end
45
62
 
46
- def distributions(&block)
47
- instance_eval(&block)
48
- end
49
-
50
- def parameters
51
- (values.size - 1) * parent_nodes.values.reduce(1) { |mul, n| mul * n.values.size }
52
- end
53
-
54
- def as(distribution, given:)
55
- @values.zip(distribution).each do |value, probability|
56
- @factor.val [value] + given + [probability]
57
- end
58
- end
59
63
  end
60
64
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bayesnet
4
- VERSION = "0.1.0"
4
+ VERSION = "0.6.0"
5
5
  end
data/lib/bayesnet.rb CHANGED
@@ -1,8 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "set"
4
+ require "logger"
4
5
 
5
6
  # net
7
+ require_relative "bayesnet/logging"
6
8
  require_relative "bayesnet/dsl"
7
9
  require_relative "bayesnet/error"
8
10
  require_relative "bayesnet/factor"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bayesnet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksandr Furmanov
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-02-26 00:00:00.000000000 Z
11
+ date: 2022-06-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: m
@@ -77,6 +77,7 @@ files:
77
77
  - lib/bayesnet/error.rb
78
78
  - lib/bayesnet/factor.rb
79
79
  - lib/bayesnet/graph.rb
80
+ - lib/bayesnet/logging.rb
80
81
  - lib/bayesnet/node.rb
81
82
  - lib/bayesnet/parsers/bif.rb
82
83
  - lib/bayesnet/parsers/bif.treetop