bayesnet 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/CHANGELOG.md +16 -1
- data/Gemfile +1 -1
- data/Gemfile.lock +6 -29
- data/README.md +68 -6
- data/Rakefile +5 -2
- data/bayesnet.gemspec +1 -2
- data/doc/morning-mood-model.png +0 -0
- data/lib/bayesnet/dsl.rb +4 -0
- data/lib/bayesnet/error.rb +2 -0
- data/lib/bayesnet/factor.rb +47 -33
- data/lib/bayesnet/graph.rb +24 -8
- data/lib/bayesnet/node.rb +16 -4
- data/lib/bayesnet/parsers/bif.rb +2484 -0
- data/lib/bayesnet/parsers/bif.treetop +250 -0
- data/lib/bayesnet/parsers/builder.rb +37 -0
- data/lib/bayesnet/version.rb +1 -1
- data/lib/bayesnet.rb +5 -0
- metadata +8 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9a746d994d25c279f3246613b9a918fb84720c7c9c78f85ce1ffdc5fbd6bcf9c
|
4
|
+
data.tar.gz: 3b8ee59eab90bf75172239601ddef479926f5a27db475688f1191c71298ca757
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72985a24e9d529b04e8d275a46cc6eadbd55aa4380f2aed73a41d2e3c3c7e7528419aa99fa0004d4d3aab1692484021c3b577e1f9262ee9ef4d89664523d8335
|
7
|
+
data.tar.gz: 8be39618f74ccd85750569a74e18a0a384aa3a12c4c52a35315958d8d4ad4045abbe8bee83d510459e3fd6b29de68bab4120e3540cee0cadc9a024f1b2389ffb
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.0
|
1
|
+
3.1.0
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,20 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
-
## [0.
|
3
|
+
## [0.5.0] - 2022-02-26
|
4
|
+
|
5
|
+
- Constructing networks out of the `.BIF` ([Interchange Format for Bayesian Networks](https://www.cs.washington.edu/dm/vfml/appendixes/bif.htm)) files.
|
6
|
+
- Fixing inference bug
|
7
|
+
- Network children nodes could be specified ***before** their parents
|
8
|
+
|
9
|
+
## [0.0.3] - 2021-12-29
|
10
|
+
|
11
|
+
- Fixing terminoloty used in Factor class
|
12
|
+
|
13
|
+
## [0.0.2] - 2021-12-28
|
14
|
+
|
15
|
+
- README, CI/CD for Ruby 2.6, 2.7, 3.1 added
|
16
|
+
|
17
|
+
## [0.0.1] - 2021-12-28
|
4
18
|
|
5
19
|
- Initial release
|
20
|
+
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,12 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
bayesnet (0.0
|
4
|
+
bayesnet (0.1.0)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
|
-
ast (2.4.2)
|
10
9
|
byebug (11.1.3)
|
11
10
|
coderay (1.1.3)
|
12
11
|
m (1.5.1)
|
@@ -14,38 +13,16 @@ GEM
|
|
14
13
|
rake (>= 0.9.2.2)
|
15
14
|
method_source (1.0.0)
|
16
15
|
minitest (5.15.0)
|
17
|
-
|
18
|
-
parser (3.0.3.2)
|
19
|
-
ast (~> 2.4.1)
|
16
|
+
polyglot (0.3.5)
|
20
17
|
pry (0.13.1)
|
21
18
|
coderay (~> 1.1)
|
22
19
|
method_source (~> 1.0)
|
23
20
|
pry-byebug (3.9.0)
|
24
21
|
byebug (~> 11.0)
|
25
22
|
pry (~> 0.13.0)
|
26
|
-
rainbow (3.0.0)
|
27
23
|
rake (13.0.6)
|
28
|
-
|
29
|
-
|
30
|
-
rubocop (1.23.0)
|
31
|
-
parallel (~> 1.10)
|
32
|
-
parser (>= 3.0.0.0)
|
33
|
-
rainbow (>= 2.2.2, < 4.0)
|
34
|
-
regexp_parser (>= 1.8, < 3.0)
|
35
|
-
rexml
|
36
|
-
rubocop-ast (>= 1.12.0, < 2.0)
|
37
|
-
ruby-progressbar (~> 1.7)
|
38
|
-
unicode-display_width (>= 1.4.0, < 3.0)
|
39
|
-
rubocop-ast (1.15.0)
|
40
|
-
parser (>= 3.0.1.1)
|
41
|
-
rubocop-performance (1.12.0)
|
42
|
-
rubocop (>= 1.7.0, < 2.0)
|
43
|
-
rubocop-ast (>= 0.4.0)
|
44
|
-
ruby-progressbar (1.11.0)
|
45
|
-
standard (1.5.0)
|
46
|
-
rubocop (= 1.23.0)
|
47
|
-
rubocop-performance (= 1.12.0)
|
48
|
-
unicode-display_width (2.1.0)
|
24
|
+
treetop (1.6.11)
|
25
|
+
polyglot (~> 0.3)
|
49
26
|
|
50
27
|
PLATFORMS
|
51
28
|
x86_64-darwin-19
|
@@ -57,7 +34,7 @@ DEPENDENCIES
|
|
57
34
|
minitest (~> 5.0)
|
58
35
|
pry-byebug (~> 3.9.0)
|
59
36
|
rake (~> 13.0)
|
60
|
-
|
37
|
+
treetop (~> 1.6)
|
61
38
|
|
62
39
|
BUNDLED WITH
|
63
|
-
2.
|
40
|
+
2.3.3
|
data/README.md
CHANGED
@@ -1,15 +1,80 @@
|
|
1
1
|
# Bayesnet
|
2
2
|
|
3
|
-
|
3
|
+
This gem provides an DSL for constructing Bayesian networks and let to execute basic inference queries. It is also capable of parsing .BIF format ([The Interchange Format for Bayesian Networks](https://www.cs.washington.edu/dm/vfml/appendixes/bif.htm)).
|
4
4
|
|
5
|
-
|
5
|
+
|
6
|
+
### Example:
|
7
|
+
|
8
|
+
Someone has decided to study how some sleep hours followed by a cup of coffee are affecting a morning mood.
|
9
|
+
Here is the graphical model:
|
10
|
+
|
11
|
+
![model](./doc/morning-mood-model.png "Morning Mood Model")
|
12
|
+
|
13
|
+
`Sleep hours` could have values `:six, :seven, :eight`. `Mood` could be `:bad, :moderate, :good` and variable
|
14
|
+
`Coffee` could take values `:yes, :no`. Here is how accumulated statistics could be compiled in a Bayesian network and then defined in the code:
|
15
|
+
|
16
|
+
```
|
17
|
+
net = Bayesnet.define do
|
18
|
+
node :coffee do
|
19
|
+
values yes: 0.7, no: 0.3
|
20
|
+
end
|
21
|
+
|
22
|
+
node :sleep_hours do
|
23
|
+
values six: 0.1, seven: 0.3, eight: 0.6
|
24
|
+
end
|
25
|
+
|
26
|
+
node :mood, parents: [:coffee, :sleep_hours] do
|
27
|
+
values [:good, :moderate, :bad] do
|
28
|
+
distributions do
|
29
|
+
as [0.8, 0.1, 0.1], given: [:yes, :eight]
|
30
|
+
as [0.6, 0.2, 0.2], given: [:yes, :seven]
|
31
|
+
as [0.4, 0.4, 0.2], given: [:yes, :six]
|
32
|
+
as [0.7, 0.2, 0.1], given: [:no, :eight]
|
33
|
+
as [0.5, 0.3, 0.2], given: [:no, :seven]
|
34
|
+
as [0.3, 0.4, 0.3], given: [:no, :six]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
```
|
40
|
+
|
41
|
+
Above, must be read as:
|
42
|
+
someone could be observed having morning coffee in 70% of all cases and when that person has a morning coffee after sleeping 8 hours, chances to find her in
|
43
|
+
- `:good` mood - 80%
|
44
|
+
- `:moderate` mood - 10%
|
45
|
+
- `:bad` mood - 10%
|
46
|
+
|
47
|
+
i.e. `[0.8, 0.1, 0.1]` is a conditional distribution.
|
48
|
+
|
49
|
+
Given the model above the following queries could be executed:
|
50
|
+
|
51
|
+
#### Given sleep time is six hours and mood is good, what chances cup of coffee has been consumed?
|
52
|
+
```
|
53
|
+
net.chances({coffee: :yes}, evidence: {mood: :good, sleep_hours: :six}) # 0.757
|
54
|
+
```
|
55
|
+
|
56
|
+
#### Given sleep time is six hours and mood is good, what is the most likely value for a `:coffee` variable?
|
57
|
+
```
|
58
|
+
net.most_likely_value(:coffee, evidence: {mood: :good, sleep_hours: :six}) # :yes
|
59
|
+
```
|
60
|
+
|
61
|
+
#### A broader question than the two above: Given sleep time is six hours and mood is good, what is the distribution for a `:coffee` variable?
|
62
|
+
```
|
63
|
+
net.distribution(over: [:coffee], evidence: {mood: :good, sleep_hours: :six}) # [:yes, 0.757], [:no, 0.243]
|
64
|
+
```
|
65
|
+
|
66
|
+
The inference is based on summing over joint distribution, i.e. it is the simplest and
|
67
|
+
most expensive way to calculate it. No optimization is implemented in this version; the code
|
68
|
+
is more a proof of API.
|
69
|
+
|
70
|
+
### [Another example](https://afurmanov.com/reducing-anxiety-with-bayesian-network) of using this gem
|
6
71
|
|
7
72
|
## Installation
|
8
73
|
|
9
74
|
Add this line to your application's Gemfile:
|
10
75
|
|
11
76
|
```ruby
|
12
|
-
|
77
|
+
em 'bayesnet'
|
13
78
|
```
|
14
79
|
|
15
80
|
And then execute:
|
@@ -20,9 +85,6 @@ Or install it yourself as:
|
|
20
85
|
|
21
86
|
$ gem install bayesnet
|
22
87
|
|
23
|
-
## Usage
|
24
|
-
|
25
|
-
TODO: Write usage instructions here
|
26
88
|
|
27
89
|
## Development
|
28
90
|
|
data/Rakefile
CHANGED
@@ -9,6 +9,9 @@ Rake::TestTask.new(:test) do |t|
|
|
9
9
|
t.test_files = FileList["test/**/*_test.rb"]
|
10
10
|
end
|
11
11
|
|
12
|
-
|
12
|
+
Rake::TestTask.new("regen-bif") do |t|
|
13
|
+
`rm ./lib/bayesnet/parsers/bif.rb`
|
14
|
+
`tt ./lib/bayesnet/parsers/bif.treetop`
|
15
|
+
end
|
13
16
|
|
14
|
-
task default: %i[test
|
17
|
+
task default: %i[test]
|
data/bayesnet.gemspec
CHANGED
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
|
|
15
15
|
|
16
16
|
spec.metadata["homepage_uri"] = spec.homepage
|
17
17
|
spec.metadata["source_code_uri"] = spec.homepage
|
18
|
-
spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/
|
18
|
+
spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/master/CHANGELOG.md"
|
19
19
|
|
20
20
|
# Specify which files should be added to the gem when it is released.
|
21
21
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
@@ -34,7 +34,6 @@ Gem::Specification.new do |spec|
|
|
34
34
|
spec.add_development_dependency "m", "~> 1.5.0"
|
35
35
|
spec.add_development_dependency "minitest", "~> 5.0"
|
36
36
|
spec.add_development_dependency "pry-byebug", "~> 3.9.0"
|
37
|
-
spec.add_development_dependency "standard", "~> 1.3"
|
38
37
|
|
39
38
|
# For more information and examples about making a new gem, checkout our
|
40
39
|
# guide at: https://bundler.io/guides/creating_gem.html
|
Binary file
|
data/lib/bayesnet/dsl.rb
CHANGED
data/lib/bayesnet/error.rb
CHANGED
data/lib/bayesnet/factor.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Bayesnet
|
2
4
|
# Factor if a function of sevaral variables (A, B, ...) each defined on values from finite set
|
3
5
|
class Factor
|
@@ -8,26 +10,26 @@ module Bayesnet
|
|
8
10
|
end
|
9
11
|
|
10
12
|
# Specifies variable name together with its values
|
11
|
-
def
|
12
|
-
@
|
13
|
+
def scope(var_name_to_values)
|
14
|
+
@scope.merge!(var_name_to_values)
|
13
15
|
end
|
14
16
|
|
15
|
-
# Specifies
|
16
|
-
def val(*
|
17
|
-
|
18
|
-
@vals[
|
17
|
+
# Specifies value for a scope context. Value is the last element in `context_and_val`
|
18
|
+
def val(*context_and_val)
|
19
|
+
context_and_val = context_and_val[0] if context_and_val.size == 1 && context_and_val[0].is_a?(Array)
|
20
|
+
@vals[context_and_val[0..-2]] = context_and_val[-1]
|
19
21
|
end
|
20
22
|
|
21
23
|
def var_names
|
22
|
-
@
|
24
|
+
@scope.keys
|
23
25
|
end
|
24
26
|
|
25
|
-
def [](*
|
26
|
-
key = if
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
27
|
+
def [](*context)
|
28
|
+
key = if context.size == 1 && context[0].is_a?(Hash)
|
29
|
+
context[0].slice(*var_names).values
|
30
|
+
else
|
31
|
+
context
|
32
|
+
end
|
31
33
|
@vals[key]
|
32
34
|
end
|
33
35
|
|
@@ -35,9 +37,10 @@ module Bayesnet
|
|
35
37
|
self.class.new(var_distribution.keys, var_distribution.values.map(&:to_a))
|
36
38
|
end
|
37
39
|
|
38
|
-
def
|
40
|
+
def contextes(*var_names)
|
39
41
|
return [] if var_names.empty?
|
40
|
-
|
42
|
+
|
43
|
+
@scope[var_names[0]].product(*var_names[1..].map { |var_name| @scope[var_name] })
|
41
44
|
end
|
42
45
|
|
43
46
|
def values
|
@@ -47,41 +50,52 @@ module Bayesnet
|
|
47
50
|
def normalize
|
48
51
|
vals = @vals.clone
|
49
52
|
norm_factor = vals.map(&:last).sum * 1.0
|
50
|
-
vals.each { |k,
|
51
|
-
self.class.new(@
|
53
|
+
vals.each { |k, _v| vals[k] /= norm_factor }
|
54
|
+
self.class.new(@scope.clone, vals)
|
52
55
|
end
|
53
56
|
|
54
|
-
def
|
55
|
-
|
57
|
+
def reduce_to(context)
|
58
|
+
# TODO: use Hash#except when Ruby 2.6 support no longer needed
|
59
|
+
context_keys_set = context.keys.to_set
|
60
|
+
scope = @scope.reject { |k, _| context_keys_set.include?(k) }
|
56
61
|
|
57
|
-
|
58
|
-
indices =
|
59
|
-
vals = @vals.select { |k,
|
60
|
-
vals.transform_keys! { |k| k
|
62
|
+
context_vals = context.values
|
63
|
+
indices = context.keys.map { |k| index_by_var_name[k] }
|
64
|
+
vals = @vals.select { |k, _v| indices.map { |i| k[i] } == context_vals }
|
65
|
+
vals.transform_keys! { |k| delete_by_indices(k, indices) }
|
66
|
+
|
67
|
+
self.class.new(scope, vals)
|
68
|
+
end
|
61
69
|
|
62
|
-
|
70
|
+
def delete_by_indices(array, indices)
|
71
|
+
result = array.dup
|
72
|
+
indices.map { |i| result[i] = nil }
|
73
|
+
result.compact
|
63
74
|
end
|
64
75
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
76
|
+
# groups by `var_names` having same context and sum out values.
|
77
|
+
def marginalize(var_names)
|
78
|
+
scope = @scope.slice(*var_names)
|
79
|
+
|
80
|
+
indices = scope.keys.map { |k| index_by_var_name[k] }
|
81
|
+
vals = @vals.group_by { |context, _val| indices.map { |i| context[i] } }
|
69
82
|
vals.transform_values! { |v| v.map(&:last).sum }
|
70
|
-
|
71
|
-
|
83
|
+
|
84
|
+
self.class.new(scope, vals)
|
72
85
|
end
|
73
86
|
|
74
87
|
private
|
75
88
|
|
76
|
-
def initialize(
|
77
|
-
@
|
89
|
+
def initialize(scope = {}, vals = {})
|
90
|
+
@scope = scope
|
78
91
|
@vals = vals
|
79
92
|
end
|
80
93
|
|
81
94
|
def index_by_var_name
|
82
95
|
return @index_by_var_name if @index_by_var_name
|
96
|
+
|
83
97
|
@index_by_var_name = {}
|
84
|
-
@
|
98
|
+
@scope.each_with_index { |(k, _v), i| @index_by_var_name[k] = i }
|
85
99
|
@index_by_var_name
|
86
100
|
end
|
87
101
|
end
|
data/lib/bayesnet/graph.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "bayesnet/node"
|
2
4
|
|
3
5
|
module Bayesnet
|
6
|
+
# Acyclic graph
|
4
7
|
class Graph
|
5
8
|
attr_reader :nodes
|
6
9
|
|
@@ -14,20 +17,29 @@ module Bayesnet
|
|
14
17
|
|
15
18
|
def node(name, parents: [], &block)
|
16
19
|
raise Error, "DSL error, #node requires a &block" unless block
|
17
|
-
|
20
|
+
|
21
|
+
node = Node.new(name, parents)
|
18
22
|
node.instance_eval(&block)
|
19
23
|
@nodes[name] = node
|
20
24
|
end
|
21
25
|
|
26
|
+
def resolve_factors
|
27
|
+
@nodes.values.each do |node|
|
28
|
+
node.resolve_factor(@nodes.slice(*node.parent_nodes))
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
22
32
|
def distribution(over: [], evidence: {})
|
23
|
-
|
24
|
-
|
33
|
+
joint_distribution
|
34
|
+
.reduce_to(evidence)
|
35
|
+
.marginalize(over)
|
36
|
+
.normalize
|
25
37
|
end
|
26
38
|
|
27
39
|
# This is MAP query, i.e. Maximum a Posteriory
|
28
40
|
def most_likely_value(var_name, evidence:)
|
29
41
|
posterior_distribution = distribution(over: [var_name], evidence: evidence)
|
30
|
-
mode = posterior_distribution.
|
42
|
+
mode = posterior_distribution.contextes(var_name).zip(posterior_distribution.values).max_by(&:last)
|
31
43
|
mode.first.first
|
32
44
|
end
|
33
45
|
|
@@ -47,17 +59,21 @@ module Bayesnet
|
|
47
59
|
|
48
60
|
factor = Factor.new
|
49
61
|
@nodes.each do |node_name, node|
|
50
|
-
factor.
|
62
|
+
factor.scope node_name => node.values
|
51
63
|
end
|
52
64
|
|
53
|
-
factor.
|
54
|
-
val_by_name = var_names.zip(
|
65
|
+
factor.contextes(*var_names).each do |context|
|
66
|
+
val_by_name = var_names.zip(context).to_h
|
55
67
|
val = nodes.values.reduce(1.0) do |prob, node|
|
56
68
|
prob * node.factor[val_by_name]
|
57
69
|
end
|
58
|
-
factor.val
|
70
|
+
factor.val context + [val]
|
59
71
|
end
|
60
72
|
@joint_distribution = factor.normalize
|
61
73
|
end
|
74
|
+
|
75
|
+
def parameters
|
76
|
+
nodes.values.map(&:parameters).sum
|
77
|
+
end
|
62
78
|
end
|
63
79
|
end
|
data/lib/bayesnet/node.rb
CHANGED
@@ -16,7 +16,7 @@ module Bayesnet
|
|
16
16
|
@values = hash_or_array.keys
|
17
17
|
node = self
|
18
18
|
@factor = Factor.build do
|
19
|
-
|
19
|
+
scope node.name => node.values
|
20
20
|
hash_or_array.each do |value, probability|
|
21
21
|
val [value, probability]
|
22
22
|
end
|
@@ -24,14 +24,22 @@ module Bayesnet
|
|
24
24
|
when Array
|
25
25
|
raise Error, "DSL error, #values requires a &block when first argument is an Array" unless block
|
26
26
|
@values = hash_or_array
|
27
|
+
@factor = block
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def resolve_factor(parent_nodes)
|
32
|
+
@parent_nodes = parent_nodes
|
33
|
+
if @factor.is_a?(Proc)
|
34
|
+
proc = @factor
|
27
35
|
node = self
|
28
36
|
@factor = Factor.build do
|
29
|
-
|
37
|
+
scope node.name => node.values
|
30
38
|
node.parent_nodes.each do |parent_node_name, parent_node|
|
31
|
-
|
39
|
+
scope parent_node_name => parent_node.values
|
32
40
|
end
|
33
41
|
end
|
34
|
-
instance_eval(&
|
42
|
+
instance_eval(&proc)
|
35
43
|
end
|
36
44
|
end
|
37
45
|
|
@@ -39,6 +47,10 @@ module Bayesnet
|
|
39
47
|
instance_eval(&block)
|
40
48
|
end
|
41
49
|
|
50
|
+
def parameters
|
51
|
+
(values.size - 1) * parent_nodes.values.reduce(1) { |mul, n| mul * n.values.size }
|
52
|
+
end
|
53
|
+
|
42
54
|
def as(distribution, given:)
|
43
55
|
@values.zip(distribution).each do |value, probability|
|
44
56
|
@factor.val [value] + given + [probability]
|