bayesnet 0.1.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -1
- data/Gemfile.lock +1 -1
- data/lib/bayesnet/factor.rb +107 -22
- data/lib/bayesnet/graph.rb +97 -12
- data/lib/bayesnet/logging.rb +13 -0
- data/lib/bayesnet/node.rb +17 -13
- data/lib/bayesnet/version.rb +1 -1
- data/lib/bayesnet.rb +2 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7591665046345784f55275c06d1129fd91ee3f098f3800b2c03b6f9bbfd8e172
|
4
|
+
data.tar.gz: ec9009ab90593d42fa2506a230e5900d5a39bebb1a7fbd874953d6c86022b2eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5e668b431f55f9239ad3ae06cdc020098ff4a8b68f7934283d6f77a3969a014aa17f68df12b23013d1681e79fdecd8f4c8e4da105e9430a16d2ab8075bbcca7b
|
7
|
+
data.tar.gz: 75eceac300152cfa8d0ce736b16939e779f127dc844ea0c8ce8e1d0f363b04048107533f64c744420b2631fd3e8678d6812e15c6b770e2b37a598ae290af773a
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.6.0] - 2022-06-26
|
4
|
+
- Using variables elimination algorithm to build a distribution
|
5
|
+
|
3
6
|
## [0.5.0] - 2022-02-26
|
4
7
|
|
5
8
|
- Constructing networks out of the `.BIF` ([Interchange Format for Bayesian Networks](https://www.cs.washington.edu/dm/vfml/appendixes/bif.htm)) files.
|
@@ -8,7 +11,7 @@
|
|
8
11
|
|
9
12
|
## [0.0.3] - 2021-12-29
|
10
13
|
|
11
|
-
- Fixing
|
14
|
+
- Fixing terminology used in Factor class
|
12
15
|
|
13
16
|
## [0.0.2] - 2021-12-28
|
14
17
|
|
data/Gemfile.lock
CHANGED
data/lib/bayesnet/factor.rb
CHANGED
@@ -1,29 +1,68 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bayesnet
|
4
|
-
# Factor if a function of
|
4
|
+
# Factor if a function of several variables (A, B, ...), where
|
5
|
+
# every variable cold take values from some finite set
|
5
6
|
class Factor
|
7
|
+
# +++ Factor DSL +++
|
8
|
+
#
|
9
|
+
# Factor DSL entry point:
|
6
10
|
def self.build(&block)
|
7
11
|
factor = new
|
8
12
|
factor.instance_eval(&block)
|
9
13
|
factor
|
10
14
|
end
|
11
15
|
|
12
|
-
#
|
13
|
-
|
14
|
-
|
16
|
+
# Factor DSL
|
17
|
+
# Defining variable with list of its possible values looks like:
|
18
|
+
# ```
|
19
|
+
# Bayesnet::Factor.build do
|
20
|
+
# scope weather: %i[sunny cloudy]
|
21
|
+
# scope mood: %i[bad good]
|
22
|
+
# ...
|
23
|
+
# ```
|
24
|
+
# ^ this code defines to variables `weather` and `mood`, where
|
25
|
+
# `weather` could be :sunny or :cloudy, and
|
26
|
+
# `mood` could be :bad or :good
|
27
|
+
def scope(var_name_to_values = nil)
|
28
|
+
if var_name_to_values
|
29
|
+
@scope.merge!(var_name_to_values)
|
30
|
+
else
|
31
|
+
@scope
|
32
|
+
end
|
15
33
|
end
|
16
34
|
|
17
|
-
#
|
35
|
+
# Factor DSL
|
36
|
+
# Specifies factor value for some set of variable values, i.e.
|
37
|
+
# ```
|
38
|
+
# Bayesnet::Factor.build do
|
39
|
+
# scope weather: %i[sunny cloudy]
|
40
|
+
# scope mood: %i[bad good]
|
41
|
+
# val :sunny, :bad, 0.1
|
42
|
+
# ...
|
43
|
+
# ```
|
44
|
+
# ^ this code says the value of factor for [weather == :sunny, mood == :bad] is 0.1
|
18
45
|
def val(*context_and_val)
|
19
46
|
context_and_val = context_and_val[0] if context_and_val.size == 1 && context_and_val[0].is_a?(Array)
|
20
47
|
@vals[context_and_val[0..-2]] = context_and_val[-1]
|
21
48
|
end
|
49
|
+
# --- Factor DSL ---
|
22
50
|
|
51
|
+
# List of variable names
|
23
52
|
def var_names
|
24
53
|
@scope.keys
|
25
54
|
end
|
26
55
|
|
56
|
+
# accessor factor value, i.e
|
57
|
+
# ```
|
58
|
+
# factor = Bayesnet::Factor.build do
|
59
|
+
# scope weather: %i[sunny cloudy]
|
60
|
+
# scope mood: %i[bad good]
|
61
|
+
# val :sunny, :bad, 0.1
|
62
|
+
# ...
|
63
|
+
# end
|
64
|
+
# factor[:sunny, :bad] # 0.1
|
65
|
+
# ```
|
27
66
|
def [](*context)
|
28
67
|
key = if context.size == 1 && context[0].is_a?(Hash)
|
29
68
|
context[0].slice(*var_names).values
|
@@ -33,20 +72,19 @@ module Bayesnet
|
|
33
72
|
@vals[key]
|
34
73
|
end
|
35
74
|
|
36
|
-
|
37
|
-
self.class.new(var_distribution.keys, var_distribution.values.map(&:to_a))
|
38
|
-
end
|
39
|
-
|
75
|
+
# returns all combinations of values of `var_names`
|
40
76
|
def contextes(*var_names)
|
41
77
|
return [] if var_names.empty?
|
42
78
|
|
43
79
|
@scope[var_names[0]].product(*var_names[1..].map { |var_name| @scope[var_name] })
|
44
80
|
end
|
45
81
|
|
82
|
+
# returns all possible values
|
46
83
|
def values
|
47
84
|
@vals.values
|
48
85
|
end
|
49
86
|
|
87
|
+
# returns new normalized factor, i.e. where sum of all values is 1.0
|
50
88
|
def normalize
|
51
89
|
vals = @vals.clone
|
52
90
|
norm_factor = vals.map(&:last).sum * 1.0
|
@@ -54,26 +92,29 @@ module Bayesnet
|
|
54
92
|
self.class.new(@scope.clone, vals)
|
55
93
|
end
|
56
94
|
|
95
|
+
# Returns factor built as follows:
|
96
|
+
# 1. Original factor gets filtered out by variables having values compatible with `context`
|
97
|
+
# 2. Returned factor does not have any variables from `context` (because they have
|
98
|
+
# same values, after step 1)
|
99
|
+
# The `context` argument supposed to be an evidence, somewhat like
|
100
|
+
# `{weather: :sunny}`
|
57
101
|
def reduce_to(context)
|
58
|
-
|
59
|
-
|
60
|
-
|
102
|
+
limited_context = context.slice(*scope.keys)
|
103
|
+
return self.class.new(@scope, @vals) if limited_context.empty?
|
104
|
+
limited_scope = @scope.slice(*(@scope.keys - limited_context.keys))
|
61
105
|
|
62
|
-
context_vals =
|
63
|
-
indices =
|
106
|
+
context_vals = limited_context.values
|
107
|
+
indices = limited_context.keys.map { |k| index_by_var_name[k] }
|
64
108
|
vals = @vals.select { |k, _v| indices.map { |i| k[i] } == context_vals }
|
65
109
|
vals.transform_keys! { |k| delete_by_indices(k, indices) }
|
66
110
|
|
67
|
-
self.class.new(
|
68
|
-
end
|
69
|
-
|
70
|
-
def delete_by_indices(array, indices)
|
71
|
-
result = array.dup
|
72
|
-
indices.map { |i| result[i] = nil }
|
73
|
-
result.compact
|
111
|
+
self.class.new(limited_scope, vals)
|
74
112
|
end
|
75
113
|
|
76
|
-
#
|
114
|
+
# Returns new context defined over `var_names`, all other variables
|
115
|
+
# get eliminated. For every combination of `var_names`'s values
|
116
|
+
# the value of new factor is defined by summing up values in original factor
|
117
|
+
# having compatible value
|
77
118
|
def marginalize(var_names)
|
78
119
|
scope = @scope.slice(*var_names)
|
79
120
|
|
@@ -84,8 +125,52 @@ module Bayesnet
|
|
84
125
|
self.class.new(scope, vals)
|
85
126
|
end
|
86
127
|
|
128
|
+
def eliminate(var_name)
|
129
|
+
keep_var_names = var_names
|
130
|
+
keep_var_names.delete(var_name)
|
131
|
+
marginalize(keep_var_names)
|
132
|
+
end
|
133
|
+
|
134
|
+
def select(subcontext)
|
135
|
+
@vals.select do |context, _|
|
136
|
+
var_names.zip(context).slice(subcontext.keys) == subcontext
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def *(other)
|
141
|
+
common_scope = @scope.keys & other.scope.keys
|
142
|
+
new_scope = scope.merge(other.scope)
|
143
|
+
new_vals = {}
|
144
|
+
group1 = group_by_scope_values(common_scope)
|
145
|
+
group2 = other.group_by_scope_values(common_scope)
|
146
|
+
group1.each do |scope, vals1|
|
147
|
+
combo = vals1.product(group2[scope])
|
148
|
+
combo.each do |(val1, val2)|
|
149
|
+
# values in scope must match variables order in new_scope, i.e.
|
150
|
+
# they must match `new_scope.var_names`
|
151
|
+
# The code bellow ensures it by merging two hashes in the same
|
152
|
+
# wasy as `new_scope`` is constructed above
|
153
|
+
val_by_name1 = var_names.zip(val1.first).to_h
|
154
|
+
val_by_name2 = other.var_names.zip(val2.first).to_h
|
155
|
+
new_vals[val_by_name1.merge(val_by_name2).values] = val1.last*val2.last
|
156
|
+
end
|
157
|
+
end
|
158
|
+
Factor.new(new_scope, new_vals)
|
159
|
+
end
|
160
|
+
|
161
|
+
def group_by_scope_values(scope_keys)
|
162
|
+
indices = scope_keys.map { |k| index_by_var_name[k] }
|
163
|
+
@vals.group_by { |context, _val| indices.map { |i| context[i] } }
|
164
|
+
end
|
165
|
+
|
87
166
|
private
|
88
167
|
|
168
|
+
def delete_by_indices(array, indices)
|
169
|
+
result = array.dup
|
170
|
+
indices.map { |i| result[i] = nil }
|
171
|
+
result.compact
|
172
|
+
end
|
173
|
+
|
89
174
|
def initialize(scope = {}, vals = {})
|
90
175
|
@scope = scope
|
91
176
|
@vals = vals
|
data/lib/bayesnet/graph.rb
CHANGED
@@ -5,16 +5,15 @@ require "bayesnet/node"
|
|
5
5
|
module Bayesnet
|
6
6
|
# Acyclic graph
|
7
7
|
class Graph
|
8
|
+
include Bayesnet::Logging
|
9
|
+
|
8
10
|
attr_reader :nodes
|
9
11
|
|
10
12
|
def initialize
|
11
13
|
@nodes = {}
|
12
14
|
end
|
13
15
|
|
14
|
-
|
15
|
-
nodes.keys
|
16
|
-
end
|
17
|
-
|
16
|
+
# +++ Graph DSL +++
|
18
17
|
def node(name, parents: [], &block)
|
19
18
|
raise Error, "DSL error, #node requires a &block" unless block
|
20
19
|
|
@@ -22,21 +21,100 @@ module Bayesnet
|
|
22
21
|
node.instance_eval(&block)
|
23
22
|
@nodes[name] = node
|
24
23
|
end
|
24
|
+
# --- Graph DSL ---
|
25
25
|
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
# returns names of all nodes
|
27
|
+
def var_names
|
28
|
+
nodes.keys
|
29
|
+
end
|
30
|
+
|
31
|
+
# returns normalized distribution reduced to `evidence`
|
32
|
+
# and marginalized over `over`
|
33
|
+
def distribution(over: [], evidence: {}, algorithm: :variables_elimination)
|
34
|
+
case algorithm
|
35
|
+
when :brute_force
|
36
|
+
joint_distribution
|
37
|
+
.reduce_to(evidence)
|
38
|
+
.marginalize(over)
|
39
|
+
.normalize
|
40
|
+
when :variables_elimination
|
41
|
+
reduced_factors = nodes.values.map(&:factor).map { |f| f.reduce_to(evidence) }
|
42
|
+
not_include_in_order = evidence.keys.to_set + over.to_set
|
43
|
+
variables_order = elimination_order.reject { |v| not_include_in_order.include?(v) }
|
44
|
+
distribution = eliminate_variables(variables_order, reduced_factors)
|
45
|
+
distribution.normalize
|
46
|
+
else
|
47
|
+
raise "Uknown algorithm #{algorithm}"
|
29
48
|
end
|
30
49
|
end
|
31
50
|
|
32
|
-
def
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
51
|
+
def elimination_order
|
52
|
+
return @order if @order
|
53
|
+
@order = []
|
54
|
+
edges = Set.new
|
55
|
+
@nodes.each do |name, node|
|
56
|
+
parents = node.parent_nodes.keys
|
57
|
+
parents.each { |p| edges.add([name, p].to_set) }
|
58
|
+
parents.combination(2) { |p1, p2| edges.add([p1, p2].to_set) }
|
59
|
+
end
|
60
|
+
# edges now are moralized graph of `self`, just represented differently as
|
61
|
+
# set of edges
|
62
|
+
|
63
|
+
remaining_nodes = nodes.keys.to_set
|
64
|
+
until remaining_nodes.empty?
|
65
|
+
best_node = find_min_neighbor(remaining_nodes, edges)
|
66
|
+
remaining_nodes.delete(best_node)
|
67
|
+
@order.push(best_node)
|
68
|
+
clique = edges.select { |e| e.include?(best_node) }
|
69
|
+
edges -= clique
|
70
|
+
if edges.empty? #i.e. clique is the last edge
|
71
|
+
@order += remaining_nodes.to_a
|
72
|
+
remaining_nodes = Set.new
|
73
|
+
end
|
74
|
+
clique.
|
75
|
+
map { |e| e.delete(best_node) }.
|
76
|
+
map(&:first).
|
77
|
+
combination(2) { |p1, p2| edges.add([p1,p2].to_set) }
|
78
|
+
end
|
79
|
+
@order
|
80
|
+
end
|
81
|
+
|
82
|
+
def find_min_neighbor(remaining_nodes, edges)
|
83
|
+
result = nil
|
84
|
+
min_neighbors = nil
|
85
|
+
remaining_nodes.each do |name, _|
|
86
|
+
neighbors = edges.count { |e| e.include?(name) }
|
87
|
+
if min_neighbors.nil? || neighbors < min_neighbors
|
88
|
+
min_neighbors = neighbors
|
89
|
+
result = name
|
90
|
+
end
|
91
|
+
end
|
92
|
+
result
|
93
|
+
end
|
94
|
+
|
95
|
+
def eliminate_variables(variables_order, factors)
|
96
|
+
logger.debug "Eliminating variables #{variables_order} from #{factors.size} factors #{factors.map(&:var_names)}"
|
97
|
+
remaining_factors = factors.to_set
|
98
|
+
variables_order.each do |var_name|
|
99
|
+
logger.debug "Eliminating '#{var_name}'..."
|
100
|
+
grouped_factors = remaining_factors.select { |f| f.var_names.include?(var_name) }
|
101
|
+
remaining_factors -= grouped_factors
|
102
|
+
logger.debug "Building new factor out of #{grouped_factors.size} factors having '#{var_name}' - #{grouped_factors.map(&:var_names)}"
|
103
|
+
product_factor = grouped_factors.reduce(&:*)
|
104
|
+
logger.debug "Removing variable from new factor"
|
105
|
+
new_factor = product_factor.eliminate(var_name)
|
106
|
+
logger.debug "New factor variables are #{new_factor.var_names}"
|
107
|
+
remaining_factors.add(new_factor)
|
108
|
+
logger.debug "The variable '#{var_name}' is elminated"
|
109
|
+
end
|
110
|
+
logger.debug "Non-eliminated variables are #{remaining_factors.map(&:var_names).flatten.uniq}"
|
111
|
+
result = remaining_factors.reduce(&:*)
|
112
|
+
logger.debug "Eliminating is done"
|
113
|
+
result
|
37
114
|
end
|
38
115
|
|
39
116
|
# This is MAP query, i.e. Maximum a Posteriory
|
117
|
+
# returns value of `var_name` having maximum likelihood, when `evidence` is observed
|
40
118
|
def most_likely_value(var_name, evidence:)
|
41
119
|
posterior_distribution = distribution(over: [var_name], evidence: evidence)
|
42
120
|
mode = posterior_distribution.contextes(var_name).zip(posterior_distribution.values).max_by(&:last)
|
@@ -49,6 +127,7 @@ module Bayesnet
|
|
49
127
|
posterior_distribution[*over_vars.values]
|
50
128
|
end
|
51
129
|
|
130
|
+
# Essentially it builds product of all node's factors
|
52
131
|
def joint_distribution
|
53
132
|
return @joint_distribution if @joint_distribution
|
54
133
|
|
@@ -75,5 +154,11 @@ module Bayesnet
|
|
75
154
|
def parameters
|
76
155
|
nodes.values.map(&:parameters).sum
|
77
156
|
end
|
157
|
+
|
158
|
+
def resolve_factors
|
159
|
+
@nodes.values.each do |node|
|
160
|
+
node.resolve_factor(@nodes.slice(*node.parent_nodes))
|
161
|
+
end
|
162
|
+
end
|
78
163
|
end
|
79
164
|
end
|
data/lib/bayesnet/node.rb
CHANGED
@@ -6,8 +6,10 @@ module Bayesnet
|
|
6
6
|
@name = name
|
7
7
|
@parent_nodes = parent_nodes
|
8
8
|
@values = []
|
9
|
+
@factor = Factor.new
|
9
10
|
end
|
10
11
|
|
12
|
+
# +++ Node DSL +++
|
11
13
|
def values(hash_or_array = nil, &block)
|
12
14
|
case hash_or_array
|
13
15
|
when NilClass
|
@@ -28,6 +30,21 @@ module Bayesnet
|
|
28
30
|
end
|
29
31
|
end
|
30
32
|
|
33
|
+
def distributions(&block)
|
34
|
+
instance_eval(&block)
|
35
|
+
end
|
36
|
+
# --- Node DSL ---
|
37
|
+
|
38
|
+
def parameters
|
39
|
+
(values.size - 1) * parent_nodes.values.reduce(1) { |mul, n| mul * n.values.size }
|
40
|
+
end
|
41
|
+
|
42
|
+
def as(distribution, given:)
|
43
|
+
@values.zip(distribution).each do |value, probability|
|
44
|
+
@factor.val [value] + given + [probability]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
31
48
|
def resolve_factor(parent_nodes)
|
32
49
|
@parent_nodes = parent_nodes
|
33
50
|
if @factor.is_a?(Proc)
|
@@ -43,18 +60,5 @@ module Bayesnet
|
|
43
60
|
end
|
44
61
|
end
|
45
62
|
|
46
|
-
def distributions(&block)
|
47
|
-
instance_eval(&block)
|
48
|
-
end
|
49
|
-
|
50
|
-
def parameters
|
51
|
-
(values.size - 1) * parent_nodes.values.reduce(1) { |mul, n| mul * n.values.size }
|
52
|
-
end
|
53
|
-
|
54
|
-
def as(distribution, given:)
|
55
|
-
@values.zip(distribution).each do |value, probability|
|
56
|
-
@factor.val [value] + given + [probability]
|
57
|
-
end
|
58
|
-
end
|
59
63
|
end
|
60
64
|
end
|
data/lib/bayesnet/version.rb
CHANGED
data/lib/bayesnet.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bayesnet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksandr Furmanov
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: m
|
@@ -77,6 +77,7 @@ files:
|
|
77
77
|
- lib/bayesnet/error.rb
|
78
78
|
- lib/bayesnet/factor.rb
|
79
79
|
- lib/bayesnet/graph.rb
|
80
|
+
- lib/bayesnet/logging.rb
|
80
81
|
- lib/bayesnet/node.rb
|
81
82
|
- lib/bayesnet/parsers/bif.rb
|
82
83
|
- lib/bayesnet/parsers/bif.treetop
|