bayesnet 0.1.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9a746d994d25c279f3246613b9a918fb84720c7c9c78f85ce1ffdc5fbd6bcf9c
4
- data.tar.gz: 3b8ee59eab90bf75172239601ddef479926f5a27db475688f1191c71298ca757
3
+ metadata.gz: 7591665046345784f55275c06d1129fd91ee3f098f3800b2c03b6f9bbfd8e172
4
+ data.tar.gz: ec9009ab90593d42fa2506a230e5900d5a39bebb1a7fbd874953d6c86022b2eb
5
5
  SHA512:
6
- metadata.gz: 72985a24e9d529b04e8d275a46cc6eadbd55aa4380f2aed73a41d2e3c3c7e7528419aa99fa0004d4d3aab1692484021c3b577e1f9262ee9ef4d89664523d8335
7
- data.tar.gz: 8be39618f74ccd85750569a74e18a0a384aa3a12c4c52a35315958d8d4ad4045abbe8bee83d510459e3fd6b29de68bab4120e3540cee0cadc9a024f1b2389ffb
6
+ metadata.gz: 5e668b431f55f9239ad3ae06cdc020098ff4a8b68f7934283d6f77a3969a014aa17f68df12b23013d1681e79fdecd8f4c8e4da105e9430a16d2ab8075bbcca7b
7
+ data.tar.gz: 75eceac300152cfa8d0ce736b16939e779f127dc844ea0c8ce8e1d0f363b04048107533f64c744420b2631fd3e8678d6812e15c6b770e2b37a598ae290af773a
data/CHANGELOG.md CHANGED
@@ -1,5 +1,8 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.0] - 2022-06-26
4
+ - Using variables elimination algorithm to build a distribution
5
+
3
6
  ## [0.5.0] - 2022-02-26
4
7
 
5
8
  - Constructing networks out of the `.BIF` ([Interchange Format for Bayesian Networks](https://www.cs.washington.edu/dm/vfml/appendixes/bif.htm)) files.
@@ -8,7 +11,7 @@
8
11
 
9
12
  ## [0.0.3] - 2021-12-29
10
13
 
11
- - Fixing terminoloty used in Factor class
14
+ - Fixing terminology used in Factor class
12
15
 
13
16
  ## [0.0.2] - 2021-12-28
14
17
 
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bayesnet (0.1.0)
4
+ bayesnet (0.6.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -1,29 +1,68 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bayesnet
4
- # Factor if a function of sevaral variables (A, B, ...) each defined on values from finite set
4
+ # Factor if a function of several variables (A, B, ...), where
5
+ # every variable cold take values from some finite set
5
6
  class Factor
7
+ # +++ Factor DSL +++
8
+ #
9
+ # Factor DSL entry point:
6
10
  def self.build(&block)
7
11
  factor = new
8
12
  factor.instance_eval(&block)
9
13
  factor
10
14
  end
11
15
 
12
- # Specifies variable name together with its values
13
- def scope(var_name_to_values)
14
- @scope.merge!(var_name_to_values)
16
+ # Factor DSL
17
+ # Defining variable with list of its possible values looks like:
18
+ # ```
19
+ # Bayesnet::Factor.build do
20
+ # scope weather: %i[sunny cloudy]
21
+ # scope mood: %i[bad good]
22
+ # ...
23
+ # ```
24
+ # ^ this code defines to variables `weather` and `mood`, where
25
+ # `weather` could be :sunny or :cloudy, and
26
+ # `mood` could be :bad or :good
27
+ def scope(var_name_to_values = nil)
28
+ if var_name_to_values
29
+ @scope.merge!(var_name_to_values)
30
+ else
31
+ @scope
32
+ end
15
33
  end
16
34
 
17
- # Specifies value for a scope context. Value is the last element in `context_and_val`
35
+ # Factor DSL
36
+ # Specifies factor value for some set of variable values, i.e.
37
+ # ```
38
+ # Bayesnet::Factor.build do
39
+ # scope weather: %i[sunny cloudy]
40
+ # scope mood: %i[bad good]
41
+ # val :sunny, :bad, 0.1
42
+ # ...
43
+ # ```
44
+ # ^ this code says the value of factor for [weather == :sunny, mood == :bad] is 0.1
18
45
  def val(*context_and_val)
19
46
  context_and_val = context_and_val[0] if context_and_val.size == 1 && context_and_val[0].is_a?(Array)
20
47
  @vals[context_and_val[0..-2]] = context_and_val[-1]
21
48
  end
49
+ # --- Factor DSL ---
22
50
 
51
+ # List of variable names
23
52
  def var_names
24
53
  @scope.keys
25
54
  end
26
55
 
56
+ # accessor factor value, i.e
57
+ # ```
58
+ # factor = Bayesnet::Factor.build do
59
+ # scope weather: %i[sunny cloudy]
60
+ # scope mood: %i[bad good]
61
+ # val :sunny, :bad, 0.1
62
+ # ...
63
+ # end
64
+ # factor[:sunny, :bad] # 0.1
65
+ # ```
27
66
  def [](*context)
28
67
  key = if context.size == 1 && context[0].is_a?(Hash)
29
68
  context[0].slice(*var_names).values
@@ -33,20 +72,19 @@ module Bayesnet
33
72
  @vals[key]
34
73
  end
35
74
 
36
- def self.from_distribution(var_distribution)
37
- self.class.new(var_distribution.keys, var_distribution.values.map(&:to_a))
38
- end
39
-
75
+ # returns all combinations of values of `var_names`
40
76
  def contextes(*var_names)
41
77
  return [] if var_names.empty?
42
78
 
43
79
  @scope[var_names[0]].product(*var_names[1..].map { |var_name| @scope[var_name] })
44
80
  end
45
81
 
82
+ # returns all possible values
46
83
  def values
47
84
  @vals.values
48
85
  end
49
86
 
87
+ # returns new normalized factor, i.e. where sum of all values is 1.0
50
88
  def normalize
51
89
  vals = @vals.clone
52
90
  norm_factor = vals.map(&:last).sum * 1.0
@@ -54,26 +92,29 @@ module Bayesnet
54
92
  self.class.new(@scope.clone, vals)
55
93
  end
56
94
 
95
+ # Returns factor built as follows:
96
+ # 1. Original factor gets filtered out by variables having values compatible with `context`
97
+ # 2. Returned factor does not have any variables from `context` (because they have
98
+ # same values, after step 1)
99
+ # The `context` argument supposed to be an evidence, somewhat like
100
+ # `{weather: :sunny}`
57
101
  def reduce_to(context)
58
- # TODO: use Hash#except when Ruby 2.6 support no longer needed
59
- context_keys_set = context.keys.to_set
60
- scope = @scope.reject { |k, _| context_keys_set.include?(k) }
102
+ limited_context = context.slice(*scope.keys)
103
+ return self.class.new(@scope, @vals) if limited_context.empty?
104
+ limited_scope = @scope.slice(*(@scope.keys - limited_context.keys))
61
105
 
62
- context_vals = context.values
63
- indices = context.keys.map { |k| index_by_var_name[k] }
106
+ context_vals = limited_context.values
107
+ indices = limited_context.keys.map { |k| index_by_var_name[k] }
64
108
  vals = @vals.select { |k, _v| indices.map { |i| k[i] } == context_vals }
65
109
  vals.transform_keys! { |k| delete_by_indices(k, indices) }
66
110
 
67
- self.class.new(scope, vals)
68
- end
69
-
70
- def delete_by_indices(array, indices)
71
- result = array.dup
72
- indices.map { |i| result[i] = nil }
73
- result.compact
111
+ self.class.new(limited_scope, vals)
74
112
  end
75
113
 
76
- # groups by `var_names` having same context and sum out values.
114
+ # Returns new context defined over `var_names`, all other variables
115
+ # get eliminated. For every combination of `var_names`'s values
116
+ # the value of new factor is defined by summing up values in original factor
117
+ # having compatible value
77
118
  def marginalize(var_names)
78
119
  scope = @scope.slice(*var_names)
79
120
 
@@ -84,8 +125,52 @@ module Bayesnet
84
125
  self.class.new(scope, vals)
85
126
  end
86
127
 
128
+ def eliminate(var_name)
129
+ keep_var_names = var_names
130
+ keep_var_names.delete(var_name)
131
+ marginalize(keep_var_names)
132
+ end
133
+
134
+ def select(subcontext)
135
+ @vals.select do |context, _|
136
+ var_names.zip(context).slice(subcontext.keys) == subcontext
137
+ end
138
+ end
139
+
140
+ def *(other)
141
+ common_scope = @scope.keys & other.scope.keys
142
+ new_scope = scope.merge(other.scope)
143
+ new_vals = {}
144
+ group1 = group_by_scope_values(common_scope)
145
+ group2 = other.group_by_scope_values(common_scope)
146
+ group1.each do |scope, vals1|
147
+ combo = vals1.product(group2[scope])
148
+ combo.each do |(val1, val2)|
149
+ # values in scope must match variables order in new_scope, i.e.
150
+ # they must match `new_scope.var_names`
151
+ # The code bellow ensures it by merging two hashes in the same
152
+ # wasy as `new_scope`` is constructed above
153
+ val_by_name1 = var_names.zip(val1.first).to_h
154
+ val_by_name2 = other.var_names.zip(val2.first).to_h
155
+ new_vals[val_by_name1.merge(val_by_name2).values] = val1.last*val2.last
156
+ end
157
+ end
158
+ Factor.new(new_scope, new_vals)
159
+ end
160
+
161
+ def group_by_scope_values(scope_keys)
162
+ indices = scope_keys.map { |k| index_by_var_name[k] }
163
+ @vals.group_by { |context, _val| indices.map { |i| context[i] } }
164
+ end
165
+
87
166
  private
88
167
 
168
+ def delete_by_indices(array, indices)
169
+ result = array.dup
170
+ indices.map { |i| result[i] = nil }
171
+ result.compact
172
+ end
173
+
89
174
  def initialize(scope = {}, vals = {})
90
175
  @scope = scope
91
176
  @vals = vals
@@ -5,16 +5,15 @@ require "bayesnet/node"
5
5
  module Bayesnet
6
6
  # Acyclic graph
7
7
  class Graph
8
+ include Bayesnet::Logging
9
+
8
10
  attr_reader :nodes
9
11
 
10
12
  def initialize
11
13
  @nodes = {}
12
14
  end
13
15
 
14
- def var_names
15
- nodes.keys
16
- end
17
-
16
+ # +++ Graph DSL +++
18
17
  def node(name, parents: [], &block)
19
18
  raise Error, "DSL error, #node requires a &block" unless block
20
19
 
@@ -22,21 +21,100 @@ module Bayesnet
22
21
  node.instance_eval(&block)
23
22
  @nodes[name] = node
24
23
  end
24
+ # --- Graph DSL ---
25
25
 
26
- def resolve_factors
27
- @nodes.values.each do |node|
28
- node.resolve_factor(@nodes.slice(*node.parent_nodes))
26
+ # returns names of all nodes
27
+ def var_names
28
+ nodes.keys
29
+ end
30
+
31
+ # returns normalized distribution reduced to `evidence`
32
+ # and marginalized over `over`
33
+ def distribution(over: [], evidence: {}, algorithm: :variables_elimination)
34
+ case algorithm
35
+ when :brute_force
36
+ joint_distribution
37
+ .reduce_to(evidence)
38
+ .marginalize(over)
39
+ .normalize
40
+ when :variables_elimination
41
+ reduced_factors = nodes.values.map(&:factor).map { |f| f.reduce_to(evidence) }
42
+ not_include_in_order = evidence.keys.to_set + over.to_set
43
+ variables_order = elimination_order.reject { |v| not_include_in_order.include?(v) }
44
+ distribution = eliminate_variables(variables_order, reduced_factors)
45
+ distribution.normalize
46
+ else
47
+ raise "Uknown algorithm #{algorithm}"
29
48
  end
30
49
  end
31
50
 
32
- def distribution(over: [], evidence: {})
33
- joint_distribution
34
- .reduce_to(evidence)
35
- .marginalize(over)
36
- .normalize
51
+ def elimination_order
52
+ return @order if @order
53
+ @order = []
54
+ edges = Set.new
55
+ @nodes.each do |name, node|
56
+ parents = node.parent_nodes.keys
57
+ parents.each { |p| edges.add([name, p].to_set) }
58
+ parents.combination(2) { |p1, p2| edges.add([p1, p2].to_set) }
59
+ end
60
+ # edges now are moralized graph of `self`, just represented differently as
61
+ # set of edges
62
+
63
+ remaining_nodes = nodes.keys.to_set
64
+ until remaining_nodes.empty?
65
+ best_node = find_min_neighbor(remaining_nodes, edges)
66
+ remaining_nodes.delete(best_node)
67
+ @order.push(best_node)
68
+ clique = edges.select { |e| e.include?(best_node) }
69
+ edges -= clique
70
+ if edges.empty? #i.e. clique is the last edge
71
+ @order += remaining_nodes.to_a
72
+ remaining_nodes = Set.new
73
+ end
74
+ clique.
75
+ map { |e| e.delete(best_node) }.
76
+ map(&:first).
77
+ combination(2) { |p1, p2| edges.add([p1,p2].to_set) }
78
+ end
79
+ @order
80
+ end
81
+
82
+ def find_min_neighbor(remaining_nodes, edges)
83
+ result = nil
84
+ min_neighbors = nil
85
+ remaining_nodes.each do |name, _|
86
+ neighbors = edges.count { |e| e.include?(name) }
87
+ if min_neighbors.nil? || neighbors < min_neighbors
88
+ min_neighbors = neighbors
89
+ result = name
90
+ end
91
+ end
92
+ result
93
+ end
94
+
95
+ def eliminate_variables(variables_order, factors)
96
+ logger.debug "Eliminating variables #{variables_order} from #{factors.size} factors #{factors.map(&:var_names)}"
97
+ remaining_factors = factors.to_set
98
+ variables_order.each do |var_name|
99
+ logger.debug "Eliminating '#{var_name}'..."
100
+ grouped_factors = remaining_factors.select { |f| f.var_names.include?(var_name) }
101
+ remaining_factors -= grouped_factors
102
+ logger.debug "Building new factor out of #{grouped_factors.size} factors having '#{var_name}' - #{grouped_factors.map(&:var_names)}"
103
+ product_factor = grouped_factors.reduce(&:*)
104
+ logger.debug "Removing variable from new factor"
105
+ new_factor = product_factor.eliminate(var_name)
106
+ logger.debug "New factor variables are #{new_factor.var_names}"
107
+ remaining_factors.add(new_factor)
108
+ logger.debug "The variable '#{var_name}' is elminated"
109
+ end
110
+ logger.debug "Non-eliminated variables are #{remaining_factors.map(&:var_names).flatten.uniq}"
111
+ result = remaining_factors.reduce(&:*)
112
+ logger.debug "Eliminating is done"
113
+ result
37
114
  end
38
115
 
39
116
  # This is MAP query, i.e. Maximum a Posteriory
117
+ # returns value of `var_name` having maximum likelihood, when `evidence` is observed
40
118
  def most_likely_value(var_name, evidence:)
41
119
  posterior_distribution = distribution(over: [var_name], evidence: evidence)
42
120
  mode = posterior_distribution.contextes(var_name).zip(posterior_distribution.values).max_by(&:last)
@@ -49,6 +127,7 @@ module Bayesnet
49
127
  posterior_distribution[*over_vars.values]
50
128
  end
51
129
 
130
+ # Essentially it builds product of all node's factors
52
131
  def joint_distribution
53
132
  return @joint_distribution if @joint_distribution
54
133
 
@@ -75,5 +154,11 @@ module Bayesnet
75
154
  def parameters
76
155
  nodes.values.map(&:parameters).sum
77
156
  end
157
+
158
+ def resolve_factors
159
+ @nodes.values.each do |node|
160
+ node.resolve_factor(@nodes.slice(*node.parent_nodes))
161
+ end
162
+ end
78
163
  end
79
164
  end
@@ -0,0 +1,13 @@
1
+ # lib/logging.rb
2
+
3
+ module Bayesnet
4
+ def self.logger
5
+ @logger ||= Logger.new(STDOUT).tap { |l| l.level = :debug }
6
+ end
7
+
8
+ module Logging
9
+ def logger
10
+ Bayesnet.logger
11
+ end
12
+ end
13
+ end
data/lib/bayesnet/node.rb CHANGED
@@ -6,8 +6,10 @@ module Bayesnet
6
6
  @name = name
7
7
  @parent_nodes = parent_nodes
8
8
  @values = []
9
+ @factor = Factor.new
9
10
  end
10
11
 
12
+ # +++ Node DSL +++
11
13
  def values(hash_or_array = nil, &block)
12
14
  case hash_or_array
13
15
  when NilClass
@@ -28,6 +30,21 @@ module Bayesnet
28
30
  end
29
31
  end
30
32
 
33
+ def distributions(&block)
34
+ instance_eval(&block)
35
+ end
36
+ # --- Node DSL ---
37
+
38
+ def parameters
39
+ (values.size - 1) * parent_nodes.values.reduce(1) { |mul, n| mul * n.values.size }
40
+ end
41
+
42
+ def as(distribution, given:)
43
+ @values.zip(distribution).each do |value, probability|
44
+ @factor.val [value] + given + [probability]
45
+ end
46
+ end
47
+
31
48
  def resolve_factor(parent_nodes)
32
49
  @parent_nodes = parent_nodes
33
50
  if @factor.is_a?(Proc)
@@ -43,18 +60,5 @@ module Bayesnet
43
60
  end
44
61
  end
45
62
 
46
- def distributions(&block)
47
- instance_eval(&block)
48
- end
49
-
50
- def parameters
51
- (values.size - 1) * parent_nodes.values.reduce(1) { |mul, n| mul * n.values.size }
52
- end
53
-
54
- def as(distribution, given:)
55
- @values.zip(distribution).each do |value, probability|
56
- @factor.val [value] + given + [probability]
57
- end
58
- end
59
63
  end
60
64
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bayesnet
4
- VERSION = "0.1.0"
4
+ VERSION = "0.6.0"
5
5
  end
data/lib/bayesnet.rb CHANGED
@@ -1,8 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "set"
4
+ require "logger"
4
5
 
5
6
  # net
7
+ require_relative "bayesnet/logging"
6
8
  require_relative "bayesnet/dsl"
7
9
  require_relative "bayesnet/error"
8
10
  require_relative "bayesnet/factor"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bayesnet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksandr Furmanov
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-02-26 00:00:00.000000000 Z
11
+ date: 2022-06-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: m
@@ -77,6 +77,7 @@ files:
77
77
  - lib/bayesnet/error.rb
78
78
  - lib/bayesnet/factor.rb
79
79
  - lib/bayesnet/graph.rb
80
+ - lib/bayesnet/logging.rb
80
81
  - lib/bayesnet/node.rb
81
82
  - lib/bayesnet/parsers/bif.rb
82
83
  - lib/bayesnet/parsers/bif.treetop