bayesnet 0.1.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -1
- data/Gemfile.lock +1 -1
- data/lib/bayesnet/factor.rb +107 -22
- data/lib/bayesnet/graph.rb +97 -12
- data/lib/bayesnet/logging.rb +13 -0
- data/lib/bayesnet/node.rb +17 -13
- data/lib/bayesnet/version.rb +1 -1
- data/lib/bayesnet.rb +2 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7591665046345784f55275c06d1129fd91ee3f098f3800b2c03b6f9bbfd8e172
|
4
|
+
data.tar.gz: ec9009ab90593d42fa2506a230e5900d5a39bebb1a7fbd874953d6c86022b2eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5e668b431f55f9239ad3ae06cdc020098ff4a8b68f7934283d6f77a3969a014aa17f68df12b23013d1681e79fdecd8f4c8e4da105e9430a16d2ab8075bbcca7b
|
7
|
+
data.tar.gz: 75eceac300152cfa8d0ce736b16939e779f127dc844ea0c8ce8e1d0f363b04048107533f64c744420b2631fd3e8678d6812e15c6b770e2b37a598ae290af773a
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.6.0] - 2022-06-26
|
4
|
+
- Using variables elimination algorithm to build a distribution
|
5
|
+
|
3
6
|
## [0.5.0] - 2022-02-26
|
4
7
|
|
5
8
|
- Constructing networks out of the `.BIF` ([Interchange Format for Bayesian Networks](https://www.cs.washington.edu/dm/vfml/appendixes/bif.htm)) files.
|
@@ -8,7 +11,7 @@
|
|
8
11
|
|
9
12
|
## [0.0.3] - 2021-12-29
|
10
13
|
|
11
|
-
- Fixing
|
14
|
+
- Fixing terminology used in Factor class
|
12
15
|
|
13
16
|
## [0.0.2] - 2021-12-28
|
14
17
|
|
data/Gemfile.lock
CHANGED
data/lib/bayesnet/factor.rb
CHANGED
@@ -1,29 +1,68 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bayesnet
|
4
|
-
# Factor if a function of
|
4
|
+
# Factor if a function of several variables (A, B, ...), where
|
5
|
+
# every variable cold take values from some finite set
|
5
6
|
class Factor
|
7
|
+
# +++ Factor DSL +++
|
8
|
+
#
|
9
|
+
# Factor DSL entry point:
|
6
10
|
def self.build(&block)
|
7
11
|
factor = new
|
8
12
|
factor.instance_eval(&block)
|
9
13
|
factor
|
10
14
|
end
|
11
15
|
|
12
|
-
#
|
13
|
-
|
14
|
-
|
16
|
+
# Factor DSL
|
17
|
+
# Defining variable with list of its possible values looks like:
|
18
|
+
# ```
|
19
|
+
# Bayesnet::Factor.build do
|
20
|
+
# scope weather: %i[sunny cloudy]
|
21
|
+
# scope mood: %i[bad good]
|
22
|
+
# ...
|
23
|
+
# ```
|
24
|
+
# ^ this code defines to variables `weather` and `mood`, where
|
25
|
+
# `weather` could be :sunny or :cloudy, and
|
26
|
+
# `mood` could be :bad or :good
|
27
|
+
def scope(var_name_to_values = nil)
|
28
|
+
if var_name_to_values
|
29
|
+
@scope.merge!(var_name_to_values)
|
30
|
+
else
|
31
|
+
@scope
|
32
|
+
end
|
15
33
|
end
|
16
34
|
|
17
|
-
#
|
35
|
+
# Factor DSL
|
36
|
+
# Specifies factor value for some set of variable values, i.e.
|
37
|
+
# ```
|
38
|
+
# Bayesnet::Factor.build do
|
39
|
+
# scope weather: %i[sunny cloudy]
|
40
|
+
# scope mood: %i[bad good]
|
41
|
+
# val :sunny, :bad, 0.1
|
42
|
+
# ...
|
43
|
+
# ```
|
44
|
+
# ^ this code says the value of factor for [weather == :sunny, mood == :bad] is 0.1
|
18
45
|
def val(*context_and_val)
|
19
46
|
context_and_val = context_and_val[0] if context_and_val.size == 1 && context_and_val[0].is_a?(Array)
|
20
47
|
@vals[context_and_val[0..-2]] = context_and_val[-1]
|
21
48
|
end
|
49
|
+
# --- Factor DSL ---
|
22
50
|
|
51
|
+
# List of variable names
|
23
52
|
def var_names
|
24
53
|
@scope.keys
|
25
54
|
end
|
26
55
|
|
56
|
+
# accessor factor value, i.e
|
57
|
+
# ```
|
58
|
+
# factor = Bayesnet::Factor.build do
|
59
|
+
# scope weather: %i[sunny cloudy]
|
60
|
+
# scope mood: %i[bad good]
|
61
|
+
# val :sunny, :bad, 0.1
|
62
|
+
# ...
|
63
|
+
# end
|
64
|
+
# factor[:sunny, :bad] # 0.1
|
65
|
+
# ```
|
27
66
|
def [](*context)
|
28
67
|
key = if context.size == 1 && context[0].is_a?(Hash)
|
29
68
|
context[0].slice(*var_names).values
|
@@ -33,20 +72,19 @@ module Bayesnet
|
|
33
72
|
@vals[key]
|
34
73
|
end
|
35
74
|
|
36
|
-
|
37
|
-
self.class.new(var_distribution.keys, var_distribution.values.map(&:to_a))
|
38
|
-
end
|
39
|
-
|
75
|
+
# returns all combinations of values of `var_names`
|
40
76
|
def contextes(*var_names)
|
41
77
|
return [] if var_names.empty?
|
42
78
|
|
43
79
|
@scope[var_names[0]].product(*var_names[1..].map { |var_name| @scope[var_name] })
|
44
80
|
end
|
45
81
|
|
82
|
+
# returns all possible values
|
46
83
|
def values
|
47
84
|
@vals.values
|
48
85
|
end
|
49
86
|
|
87
|
+
# returns new normalized factor, i.e. where sum of all values is 1.0
|
50
88
|
def normalize
|
51
89
|
vals = @vals.clone
|
52
90
|
norm_factor = vals.map(&:last).sum * 1.0
|
@@ -54,26 +92,29 @@ module Bayesnet
|
|
54
92
|
self.class.new(@scope.clone, vals)
|
55
93
|
end
|
56
94
|
|
95
|
+
# Returns factor built as follows:
|
96
|
+
# 1. Original factor gets filtered out by variables having values compatible with `context`
|
97
|
+
# 2. Returned factor does not have any variables from `context` (because they have
|
98
|
+
# same values, after step 1)
|
99
|
+
# The `context` argument supposed to be an evidence, somewhat like
|
100
|
+
# `{weather: :sunny}`
|
57
101
|
def reduce_to(context)
|
58
|
-
|
59
|
-
|
60
|
-
|
102
|
+
limited_context = context.slice(*scope.keys)
|
103
|
+
return self.class.new(@scope, @vals) if limited_context.empty?
|
104
|
+
limited_scope = @scope.slice(*(@scope.keys - limited_context.keys))
|
61
105
|
|
62
|
-
context_vals =
|
63
|
-
indices =
|
106
|
+
context_vals = limited_context.values
|
107
|
+
indices = limited_context.keys.map { |k| index_by_var_name[k] }
|
64
108
|
vals = @vals.select { |k, _v| indices.map { |i| k[i] } == context_vals }
|
65
109
|
vals.transform_keys! { |k| delete_by_indices(k, indices) }
|
66
110
|
|
67
|
-
self.class.new(
|
68
|
-
end
|
69
|
-
|
70
|
-
def delete_by_indices(array, indices)
|
71
|
-
result = array.dup
|
72
|
-
indices.map { |i| result[i] = nil }
|
73
|
-
result.compact
|
111
|
+
self.class.new(limited_scope, vals)
|
74
112
|
end
|
75
113
|
|
76
|
-
#
|
114
|
+
# Returns new context defined over `var_names`, all other variables
|
115
|
+
# get eliminated. For every combination of `var_names`'s values
|
116
|
+
# the value of new factor is defined by summing up values in original factor
|
117
|
+
# having compatible value
|
77
118
|
def marginalize(var_names)
|
78
119
|
scope = @scope.slice(*var_names)
|
79
120
|
|
@@ -84,8 +125,52 @@ module Bayesnet
|
|
84
125
|
self.class.new(scope, vals)
|
85
126
|
end
|
86
127
|
|
128
|
+
def eliminate(var_name)
|
129
|
+
keep_var_names = var_names
|
130
|
+
keep_var_names.delete(var_name)
|
131
|
+
marginalize(keep_var_names)
|
132
|
+
end
|
133
|
+
|
134
|
+
def select(subcontext)
|
135
|
+
@vals.select do |context, _|
|
136
|
+
var_names.zip(context).slice(subcontext.keys) == subcontext
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def *(other)
|
141
|
+
common_scope = @scope.keys & other.scope.keys
|
142
|
+
new_scope = scope.merge(other.scope)
|
143
|
+
new_vals = {}
|
144
|
+
group1 = group_by_scope_values(common_scope)
|
145
|
+
group2 = other.group_by_scope_values(common_scope)
|
146
|
+
group1.each do |scope, vals1|
|
147
|
+
combo = vals1.product(group2[scope])
|
148
|
+
combo.each do |(val1, val2)|
|
149
|
+
# values in scope must match variables order in new_scope, i.e.
|
150
|
+
# they must match `new_scope.var_names`
|
151
|
+
# The code bellow ensures it by merging two hashes in the same
|
152
|
+
# wasy as `new_scope`` is constructed above
|
153
|
+
val_by_name1 = var_names.zip(val1.first).to_h
|
154
|
+
val_by_name2 = other.var_names.zip(val2.first).to_h
|
155
|
+
new_vals[val_by_name1.merge(val_by_name2).values] = val1.last*val2.last
|
156
|
+
end
|
157
|
+
end
|
158
|
+
Factor.new(new_scope, new_vals)
|
159
|
+
end
|
160
|
+
|
161
|
+
def group_by_scope_values(scope_keys)
|
162
|
+
indices = scope_keys.map { |k| index_by_var_name[k] }
|
163
|
+
@vals.group_by { |context, _val| indices.map { |i| context[i] } }
|
164
|
+
end
|
165
|
+
|
87
166
|
private
|
88
167
|
|
168
|
+
def delete_by_indices(array, indices)
|
169
|
+
result = array.dup
|
170
|
+
indices.map { |i| result[i] = nil }
|
171
|
+
result.compact
|
172
|
+
end
|
173
|
+
|
89
174
|
def initialize(scope = {}, vals = {})
|
90
175
|
@scope = scope
|
91
176
|
@vals = vals
|
data/lib/bayesnet/graph.rb
CHANGED
@@ -5,16 +5,15 @@ require "bayesnet/node"
|
|
5
5
|
module Bayesnet
|
6
6
|
# Acyclic graph
|
7
7
|
class Graph
|
8
|
+
include Bayesnet::Logging
|
9
|
+
|
8
10
|
attr_reader :nodes
|
9
11
|
|
10
12
|
def initialize
|
11
13
|
@nodes = {}
|
12
14
|
end
|
13
15
|
|
14
|
-
|
15
|
-
nodes.keys
|
16
|
-
end
|
17
|
-
|
16
|
+
# +++ Graph DSL +++
|
18
17
|
def node(name, parents: [], &block)
|
19
18
|
raise Error, "DSL error, #node requires a &block" unless block
|
20
19
|
|
@@ -22,21 +21,100 @@ module Bayesnet
|
|
22
21
|
node.instance_eval(&block)
|
23
22
|
@nodes[name] = node
|
24
23
|
end
|
24
|
+
# --- Graph DSL ---
|
25
25
|
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
# returns names of all nodes
|
27
|
+
def var_names
|
28
|
+
nodes.keys
|
29
|
+
end
|
30
|
+
|
31
|
+
# returns normalized distribution reduced to `evidence`
|
32
|
+
# and marginalized over `over`
|
33
|
+
def distribution(over: [], evidence: {}, algorithm: :variables_elimination)
|
34
|
+
case algorithm
|
35
|
+
when :brute_force
|
36
|
+
joint_distribution
|
37
|
+
.reduce_to(evidence)
|
38
|
+
.marginalize(over)
|
39
|
+
.normalize
|
40
|
+
when :variables_elimination
|
41
|
+
reduced_factors = nodes.values.map(&:factor).map { |f| f.reduce_to(evidence) }
|
42
|
+
not_include_in_order = evidence.keys.to_set + over.to_set
|
43
|
+
variables_order = elimination_order.reject { |v| not_include_in_order.include?(v) }
|
44
|
+
distribution = eliminate_variables(variables_order, reduced_factors)
|
45
|
+
distribution.normalize
|
46
|
+
else
|
47
|
+
raise "Uknown algorithm #{algorithm}"
|
29
48
|
end
|
30
49
|
end
|
31
50
|
|
32
|
-
def
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
51
|
+
def elimination_order
|
52
|
+
return @order if @order
|
53
|
+
@order = []
|
54
|
+
edges = Set.new
|
55
|
+
@nodes.each do |name, node|
|
56
|
+
parents = node.parent_nodes.keys
|
57
|
+
parents.each { |p| edges.add([name, p].to_set) }
|
58
|
+
parents.combination(2) { |p1, p2| edges.add([p1, p2].to_set) }
|
59
|
+
end
|
60
|
+
# edges now are moralized graph of `self`, just represented differently as
|
61
|
+
# set of edges
|
62
|
+
|
63
|
+
remaining_nodes = nodes.keys.to_set
|
64
|
+
until remaining_nodes.empty?
|
65
|
+
best_node = find_min_neighbor(remaining_nodes, edges)
|
66
|
+
remaining_nodes.delete(best_node)
|
67
|
+
@order.push(best_node)
|
68
|
+
clique = edges.select { |e| e.include?(best_node) }
|
69
|
+
edges -= clique
|
70
|
+
if edges.empty? #i.e. clique is the last edge
|
71
|
+
@order += remaining_nodes.to_a
|
72
|
+
remaining_nodes = Set.new
|
73
|
+
end
|
74
|
+
clique.
|
75
|
+
map { |e| e.delete(best_node) }.
|
76
|
+
map(&:first).
|
77
|
+
combination(2) { |p1, p2| edges.add([p1,p2].to_set) }
|
78
|
+
end
|
79
|
+
@order
|
80
|
+
end
|
81
|
+
|
82
|
+
def find_min_neighbor(remaining_nodes, edges)
|
83
|
+
result = nil
|
84
|
+
min_neighbors = nil
|
85
|
+
remaining_nodes.each do |name, _|
|
86
|
+
neighbors = edges.count { |e| e.include?(name) }
|
87
|
+
if min_neighbors.nil? || neighbors < min_neighbors
|
88
|
+
min_neighbors = neighbors
|
89
|
+
result = name
|
90
|
+
end
|
91
|
+
end
|
92
|
+
result
|
93
|
+
end
|
94
|
+
|
95
|
+
def eliminate_variables(variables_order, factors)
|
96
|
+
logger.debug "Eliminating variables #{variables_order} from #{factors.size} factors #{factors.map(&:var_names)}"
|
97
|
+
remaining_factors = factors.to_set
|
98
|
+
variables_order.each do |var_name|
|
99
|
+
logger.debug "Eliminating '#{var_name}'..."
|
100
|
+
grouped_factors = remaining_factors.select { |f| f.var_names.include?(var_name) }
|
101
|
+
remaining_factors -= grouped_factors
|
102
|
+
logger.debug "Building new factor out of #{grouped_factors.size} factors having '#{var_name}' - #{grouped_factors.map(&:var_names)}"
|
103
|
+
product_factor = grouped_factors.reduce(&:*)
|
104
|
+
logger.debug "Removing variable from new factor"
|
105
|
+
new_factor = product_factor.eliminate(var_name)
|
106
|
+
logger.debug "New factor variables are #{new_factor.var_names}"
|
107
|
+
remaining_factors.add(new_factor)
|
108
|
+
logger.debug "The variable '#{var_name}' is elminated"
|
109
|
+
end
|
110
|
+
logger.debug "Non-eliminated variables are #{remaining_factors.map(&:var_names).flatten.uniq}"
|
111
|
+
result = remaining_factors.reduce(&:*)
|
112
|
+
logger.debug "Eliminating is done"
|
113
|
+
result
|
37
114
|
end
|
38
115
|
|
39
116
|
# This is MAP query, i.e. Maximum a Posteriory
|
117
|
+
# returns value of `var_name` having maximum likelihood, when `evidence` is observed
|
40
118
|
def most_likely_value(var_name, evidence:)
|
41
119
|
posterior_distribution = distribution(over: [var_name], evidence: evidence)
|
42
120
|
mode = posterior_distribution.contextes(var_name).zip(posterior_distribution.values).max_by(&:last)
|
@@ -49,6 +127,7 @@ module Bayesnet
|
|
49
127
|
posterior_distribution[*over_vars.values]
|
50
128
|
end
|
51
129
|
|
130
|
+
# Essentially it builds product of all node's factors
|
52
131
|
def joint_distribution
|
53
132
|
return @joint_distribution if @joint_distribution
|
54
133
|
|
@@ -75,5 +154,11 @@ module Bayesnet
|
|
75
154
|
def parameters
|
76
155
|
nodes.values.map(&:parameters).sum
|
77
156
|
end
|
157
|
+
|
158
|
+
def resolve_factors
|
159
|
+
@nodes.values.each do |node|
|
160
|
+
node.resolve_factor(@nodes.slice(*node.parent_nodes))
|
161
|
+
end
|
162
|
+
end
|
78
163
|
end
|
79
164
|
end
|
data/lib/bayesnet/node.rb
CHANGED
@@ -6,8 +6,10 @@ module Bayesnet
|
|
6
6
|
@name = name
|
7
7
|
@parent_nodes = parent_nodes
|
8
8
|
@values = []
|
9
|
+
@factor = Factor.new
|
9
10
|
end
|
10
11
|
|
12
|
+
# +++ Node DSL +++
|
11
13
|
def values(hash_or_array = nil, &block)
|
12
14
|
case hash_or_array
|
13
15
|
when NilClass
|
@@ -28,6 +30,21 @@ module Bayesnet
|
|
28
30
|
end
|
29
31
|
end
|
30
32
|
|
33
|
+
def distributions(&block)
|
34
|
+
instance_eval(&block)
|
35
|
+
end
|
36
|
+
# --- Node DSL ---
|
37
|
+
|
38
|
+
def parameters
|
39
|
+
(values.size - 1) * parent_nodes.values.reduce(1) { |mul, n| mul * n.values.size }
|
40
|
+
end
|
41
|
+
|
42
|
+
def as(distribution, given:)
|
43
|
+
@values.zip(distribution).each do |value, probability|
|
44
|
+
@factor.val [value] + given + [probability]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
31
48
|
def resolve_factor(parent_nodes)
|
32
49
|
@parent_nodes = parent_nodes
|
33
50
|
if @factor.is_a?(Proc)
|
@@ -43,18 +60,5 @@ module Bayesnet
|
|
43
60
|
end
|
44
61
|
end
|
45
62
|
|
46
|
-
def distributions(&block)
|
47
|
-
instance_eval(&block)
|
48
|
-
end
|
49
|
-
|
50
|
-
def parameters
|
51
|
-
(values.size - 1) * parent_nodes.values.reduce(1) { |mul, n| mul * n.values.size }
|
52
|
-
end
|
53
|
-
|
54
|
-
def as(distribution, given:)
|
55
|
-
@values.zip(distribution).each do |value, probability|
|
56
|
-
@factor.val [value] + given + [probability]
|
57
|
-
end
|
58
|
-
end
|
59
63
|
end
|
60
64
|
end
|
data/lib/bayesnet/version.rb
CHANGED
data/lib/bayesnet.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bayesnet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksandr Furmanov
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: m
|
@@ -77,6 +77,7 @@ files:
|
|
77
77
|
- lib/bayesnet/error.rb
|
78
78
|
- lib/bayesnet/factor.rb
|
79
79
|
- lib/bayesnet/graph.rb
|
80
|
+
- lib/bayesnet/logging.rb
|
80
81
|
- lib/bayesnet/node.rb
|
81
82
|
- lib/bayesnet/parsers/bif.rb
|
82
83
|
- lib/bayesnet/parsers/bif.treetop
|