bayesnet 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/CHANGELOG.md +16 -1
- data/Gemfile +1 -1
- data/Gemfile.lock +6 -29
- data/README.md +68 -6
- data/Rakefile +5 -2
- data/bayesnet.gemspec +1 -2
- data/doc/morning-mood-model.png +0 -0
- data/lib/bayesnet/dsl.rb +4 -0
- data/lib/bayesnet/error.rb +2 -0
- data/lib/bayesnet/factor.rb +47 -33
- data/lib/bayesnet/graph.rb +24 -8
- data/lib/bayesnet/node.rb +16 -4
- data/lib/bayesnet/parsers/bif.rb +2484 -0
- data/lib/bayesnet/parsers/bif.treetop +250 -0
- data/lib/bayesnet/parsers/builder.rb +37 -0
- data/lib/bayesnet/version.rb +1 -1
- data/lib/bayesnet.rb +5 -0
- metadata +8 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9a746d994d25c279f3246613b9a918fb84720c7c9c78f85ce1ffdc5fbd6bcf9c
|
4
|
+
data.tar.gz: 3b8ee59eab90bf75172239601ddef479926f5a27db475688f1191c71298ca757
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72985a24e9d529b04e8d275a46cc6eadbd55aa4380f2aed73a41d2e3c3c7e7528419aa99fa0004d4d3aab1692484021c3b577e1f9262ee9ef4d89664523d8335
|
7
|
+
data.tar.gz: 8be39618f74ccd85750569a74e18a0a384aa3a12c4c52a35315958d8d4ad4045abbe8bee83d510459e3fd6b29de68bab4120e3540cee0cadc9a024f1b2389ffb
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.0
|
1
|
+
3.1.0
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,20 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
-
## [0.
|
3
|
+
## [0.5.0] - 2022-02-26
|
4
|
+
|
5
|
+
- Constructing networks out of the `.BIF` ([Interchange Format for Bayesian Networks](https://www.cs.washington.edu/dm/vfml/appendixes/bif.htm)) files.
|
6
|
+
- Fixing inference bug
|
7
|
+
- Network children nodes could be specified ***before** their parents
|
8
|
+
|
9
|
+
## [0.0.3] - 2021-12-29
|
10
|
+
|
11
|
+
- Fixing terminoloty used in Factor class
|
12
|
+
|
13
|
+
## [0.0.2] - 2021-12-28
|
14
|
+
|
15
|
+
- README, CI/CD for Ruby 2.6, 2.7, 3.1 added
|
16
|
+
|
17
|
+
## [0.0.1] - 2021-12-28
|
4
18
|
|
5
19
|
- Initial release
|
20
|
+
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,12 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
bayesnet (0.0
|
4
|
+
bayesnet (0.1.0)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
|
-
ast (2.4.2)
|
10
9
|
byebug (11.1.3)
|
11
10
|
coderay (1.1.3)
|
12
11
|
m (1.5.1)
|
@@ -14,38 +13,16 @@ GEM
|
|
14
13
|
rake (>= 0.9.2.2)
|
15
14
|
method_source (1.0.0)
|
16
15
|
minitest (5.15.0)
|
17
|
-
|
18
|
-
parser (3.0.3.2)
|
19
|
-
ast (~> 2.4.1)
|
16
|
+
polyglot (0.3.5)
|
20
17
|
pry (0.13.1)
|
21
18
|
coderay (~> 1.1)
|
22
19
|
method_source (~> 1.0)
|
23
20
|
pry-byebug (3.9.0)
|
24
21
|
byebug (~> 11.0)
|
25
22
|
pry (~> 0.13.0)
|
26
|
-
rainbow (3.0.0)
|
27
23
|
rake (13.0.6)
|
28
|
-
|
29
|
-
|
30
|
-
rubocop (1.23.0)
|
31
|
-
parallel (~> 1.10)
|
32
|
-
parser (>= 3.0.0.0)
|
33
|
-
rainbow (>= 2.2.2, < 4.0)
|
34
|
-
regexp_parser (>= 1.8, < 3.0)
|
35
|
-
rexml
|
36
|
-
rubocop-ast (>= 1.12.0, < 2.0)
|
37
|
-
ruby-progressbar (~> 1.7)
|
38
|
-
unicode-display_width (>= 1.4.0, < 3.0)
|
39
|
-
rubocop-ast (1.15.0)
|
40
|
-
parser (>= 3.0.1.1)
|
41
|
-
rubocop-performance (1.12.0)
|
42
|
-
rubocop (>= 1.7.0, < 2.0)
|
43
|
-
rubocop-ast (>= 0.4.0)
|
44
|
-
ruby-progressbar (1.11.0)
|
45
|
-
standard (1.5.0)
|
46
|
-
rubocop (= 1.23.0)
|
47
|
-
rubocop-performance (= 1.12.0)
|
48
|
-
unicode-display_width (2.1.0)
|
24
|
+
treetop (1.6.11)
|
25
|
+
polyglot (~> 0.3)
|
49
26
|
|
50
27
|
PLATFORMS
|
51
28
|
x86_64-darwin-19
|
@@ -57,7 +34,7 @@ DEPENDENCIES
|
|
57
34
|
minitest (~> 5.0)
|
58
35
|
pry-byebug (~> 3.9.0)
|
59
36
|
rake (~> 13.0)
|
60
|
-
|
37
|
+
treetop (~> 1.6)
|
61
38
|
|
62
39
|
BUNDLED WITH
|
63
|
-
2.
|
40
|
+
2.3.3
|
data/README.md
CHANGED
@@ -1,15 +1,80 @@
|
|
1
1
|
# Bayesnet
|
2
2
|
|
3
|
-
|
3
|
+
This gem provides an DSL for constructing Bayesian networks and let to execute basic inference queries. It is also capable of parsing .BIF format ([The Interchange Format for Bayesian Networks](https://www.cs.washington.edu/dm/vfml/appendixes/bif.htm)).
|
4
4
|
|
5
|
-
|
5
|
+
|
6
|
+
### Example:
|
7
|
+
|
8
|
+
Someone has decided to study how some sleep hours followed by a cup of coffee are affecting a morning mood.
|
9
|
+
Here is the graphical model:
|
10
|
+
|
11
|
+

|
12
|
+
|
13
|
+
`Sleep hours` could have values `:six, :seven, :eight`. `Mood` could be `:bad, :moderate, :good` and variable
|
14
|
+
`Coffee` could take values `:yes, :no`. Here is how accumulated statistics could be compiled in a Bayesian network and then defined in the code:
|
15
|
+
|
16
|
+
```
|
17
|
+
net = Bayesnet.define do
|
18
|
+
node :coffee do
|
19
|
+
values yes: 0.7, no: 0.3
|
20
|
+
end
|
21
|
+
|
22
|
+
node :sleep_hours do
|
23
|
+
values six: 0.1, seven: 0.3, eight: 0.6
|
24
|
+
end
|
25
|
+
|
26
|
+
node :mood, parents: [:coffee, :sleep_hours] do
|
27
|
+
values [:good, :moderate, :bad] do
|
28
|
+
distributions do
|
29
|
+
as [0.8, 0.1, 0.1], given: [:yes, :eight]
|
30
|
+
as [0.6, 0.2, 0.2], given: [:yes, :seven]
|
31
|
+
as [0.4, 0.4, 0.2], given: [:yes, :six]
|
32
|
+
as [0.7, 0.2, 0.1], given: [:no, :eight]
|
33
|
+
as [0.5, 0.3, 0.2], given: [:no, :seven]
|
34
|
+
as [0.3, 0.4, 0.3], given: [:no, :six]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
```
|
40
|
+
|
41
|
+
Above, must be read as:
|
42
|
+
someone could be observed having morning coffee in 70% of all cases and when that person has a morning coffee after sleeping 8 hours, chances to find her in
|
43
|
+
- `:good` mood - 80%
|
44
|
+
- `:moderate` mood - 10%
|
45
|
+
- `:bad` mood - 10%
|
46
|
+
|
47
|
+
i.e. `[0.8, 0.1, 0.1]` is a conditional distribution.
|
48
|
+
|
49
|
+
Given the model above the following queries could be executed:
|
50
|
+
|
51
|
+
#### Given sleep time is six hours and mood is good, what chances cup of coffee has been consumed?
|
52
|
+
```
|
53
|
+
net.chances({coffee: :yes}, evidence: {mood: :good, sleep_hours: :six}) # 0.757
|
54
|
+
```
|
55
|
+
|
56
|
+
#### Given sleep time is six hours and mood is good, what is the most likely value for a `:coffee` variable?
|
57
|
+
```
|
58
|
+
net.most_likely_value(:coffee, evidence: {mood: :good, sleep_hours: :six}) # :yes
|
59
|
+
```
|
60
|
+
|
61
|
+
#### A broader question than the two above: Given sleep time is six hours and mood is good, what is the distribution for a `:coffee` variable?
|
62
|
+
```
|
63
|
+
net.distribution(over: [:coffee], evidence: {mood: :good, sleep_hours: :six}) # [:yes, 0.757], [:no, 0.243]
|
64
|
+
```
|
65
|
+
|
66
|
+
The inference is based on summing over joint distribution, i.e. it is the simplest and
|
67
|
+
most expensive way to calculate it. No optimization is implemented in this version; the code
|
68
|
+
is more a proof of API.
|
69
|
+
|
70
|
+
### [Another example](https://afurmanov.com/reducing-anxiety-with-bayesian-network) of using this gem
|
6
71
|
|
7
72
|
## Installation
|
8
73
|
|
9
74
|
Add this line to your application's Gemfile:
|
10
75
|
|
11
76
|
```ruby
|
12
|
-
|
77
|
+
em 'bayesnet'
|
13
78
|
```
|
14
79
|
|
15
80
|
And then execute:
|
@@ -20,9 +85,6 @@ Or install it yourself as:
|
|
20
85
|
|
21
86
|
$ gem install bayesnet
|
22
87
|
|
23
|
-
## Usage
|
24
|
-
|
25
|
-
TODO: Write usage instructions here
|
26
88
|
|
27
89
|
## Development
|
28
90
|
|
data/Rakefile
CHANGED
@@ -9,6 +9,9 @@ Rake::TestTask.new(:test) do |t|
|
|
9
9
|
t.test_files = FileList["test/**/*_test.rb"]
|
10
10
|
end
|
11
11
|
|
12
|
-
|
12
|
+
Rake::TestTask.new("regen-bif") do |t|
|
13
|
+
`rm ./lib/bayesnet/parsers/bif.rb`
|
14
|
+
`tt ./lib/bayesnet/parsers/bif.treetop`
|
15
|
+
end
|
13
16
|
|
14
|
-
task default: %i[test
|
17
|
+
task default: %i[test]
|
data/bayesnet.gemspec
CHANGED
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
|
|
15
15
|
|
16
16
|
spec.metadata["homepage_uri"] = spec.homepage
|
17
17
|
spec.metadata["source_code_uri"] = spec.homepage
|
18
|
-
spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/
|
18
|
+
spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/master/CHANGELOG.md"
|
19
19
|
|
20
20
|
# Specify which files should be added to the gem when it is released.
|
21
21
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
@@ -34,7 +34,6 @@ Gem::Specification.new do |spec|
|
|
34
34
|
spec.add_development_dependency "m", "~> 1.5.0"
|
35
35
|
spec.add_development_dependency "minitest", "~> 5.0"
|
36
36
|
spec.add_development_dependency "pry-byebug", "~> 3.9.0"
|
37
|
-
spec.add_development_dependency "standard", "~> 1.3"
|
38
37
|
|
39
38
|
# For more information and examples about making a new gem, checkout our
|
40
39
|
# guide at: https://bundler.io/guides/creating_gem.html
|
Binary file
|
data/lib/bayesnet/dsl.rb
CHANGED
data/lib/bayesnet/error.rb
CHANGED
data/lib/bayesnet/factor.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Bayesnet
|
2
4
|
# Factor if a function of sevaral variables (A, B, ...) each defined on values from finite set
|
3
5
|
class Factor
|
@@ -8,26 +10,26 @@ module Bayesnet
|
|
8
10
|
end
|
9
11
|
|
10
12
|
# Specifies variable name together with its values
|
11
|
-
def
|
12
|
-
@
|
13
|
+
def scope(var_name_to_values)
|
14
|
+
@scope.merge!(var_name_to_values)
|
13
15
|
end
|
14
16
|
|
15
|
-
# Specifies
|
16
|
-
def val(*
|
17
|
-
|
18
|
-
@vals[
|
17
|
+
# Specifies value for a scope context. Value is the last element in `context_and_val`
|
18
|
+
def val(*context_and_val)
|
19
|
+
context_and_val = context_and_val[0] if context_and_val.size == 1 && context_and_val[0].is_a?(Array)
|
20
|
+
@vals[context_and_val[0..-2]] = context_and_val[-1]
|
19
21
|
end
|
20
22
|
|
21
23
|
def var_names
|
22
|
-
@
|
24
|
+
@scope.keys
|
23
25
|
end
|
24
26
|
|
25
|
-
def [](*
|
26
|
-
key = if
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
27
|
+
def [](*context)
|
28
|
+
key = if context.size == 1 && context[0].is_a?(Hash)
|
29
|
+
context[0].slice(*var_names).values
|
30
|
+
else
|
31
|
+
context
|
32
|
+
end
|
31
33
|
@vals[key]
|
32
34
|
end
|
33
35
|
|
@@ -35,9 +37,10 @@ module Bayesnet
|
|
35
37
|
self.class.new(var_distribution.keys, var_distribution.values.map(&:to_a))
|
36
38
|
end
|
37
39
|
|
38
|
-
def
|
40
|
+
def contextes(*var_names)
|
39
41
|
return [] if var_names.empty?
|
40
|
-
|
42
|
+
|
43
|
+
@scope[var_names[0]].product(*var_names[1..].map { |var_name| @scope[var_name] })
|
41
44
|
end
|
42
45
|
|
43
46
|
def values
|
@@ -47,41 +50,52 @@ module Bayesnet
|
|
47
50
|
def normalize
|
48
51
|
vals = @vals.clone
|
49
52
|
norm_factor = vals.map(&:last).sum * 1.0
|
50
|
-
vals.each { |k,
|
51
|
-
self.class.new(@
|
53
|
+
vals.each { |k, _v| vals[k] /= norm_factor }
|
54
|
+
self.class.new(@scope.clone, vals)
|
52
55
|
end
|
53
56
|
|
54
|
-
def
|
55
|
-
|
57
|
+
def reduce_to(context)
|
58
|
+
# TODO: use Hash#except when Ruby 2.6 support no longer needed
|
59
|
+
context_keys_set = context.keys.to_set
|
60
|
+
scope = @scope.reject { |k, _| context_keys_set.include?(k) }
|
56
61
|
|
57
|
-
|
58
|
-
indices =
|
59
|
-
vals = @vals.select { |k,
|
60
|
-
vals.transform_keys! { |k| k
|
62
|
+
context_vals = context.values
|
63
|
+
indices = context.keys.map { |k| index_by_var_name[k] }
|
64
|
+
vals = @vals.select { |k, _v| indices.map { |i| k[i] } == context_vals }
|
65
|
+
vals.transform_keys! { |k| delete_by_indices(k, indices) }
|
66
|
+
|
67
|
+
self.class.new(scope, vals)
|
68
|
+
end
|
61
69
|
|
62
|
-
|
70
|
+
def delete_by_indices(array, indices)
|
71
|
+
result = array.dup
|
72
|
+
indices.map { |i| result[i] = nil }
|
73
|
+
result.compact
|
63
74
|
end
|
64
75
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
76
|
+
# groups by `var_names` having same context and sum out values.
|
77
|
+
def marginalize(var_names)
|
78
|
+
scope = @scope.slice(*var_names)
|
79
|
+
|
80
|
+
indices = scope.keys.map { |k| index_by_var_name[k] }
|
81
|
+
vals = @vals.group_by { |context, _val| indices.map { |i| context[i] } }
|
69
82
|
vals.transform_values! { |v| v.map(&:last).sum }
|
70
|
-
|
71
|
-
|
83
|
+
|
84
|
+
self.class.new(scope, vals)
|
72
85
|
end
|
73
86
|
|
74
87
|
private
|
75
88
|
|
76
|
-
def initialize(
|
77
|
-
@
|
89
|
+
def initialize(scope = {}, vals = {})
|
90
|
+
@scope = scope
|
78
91
|
@vals = vals
|
79
92
|
end
|
80
93
|
|
81
94
|
def index_by_var_name
|
82
95
|
return @index_by_var_name if @index_by_var_name
|
96
|
+
|
83
97
|
@index_by_var_name = {}
|
84
|
-
@
|
98
|
+
@scope.each_with_index { |(k, _v), i| @index_by_var_name[k] = i }
|
85
99
|
@index_by_var_name
|
86
100
|
end
|
87
101
|
end
|
data/lib/bayesnet/graph.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "bayesnet/node"
|
2
4
|
|
3
5
|
module Bayesnet
|
6
|
+
# Acyclic graph
|
4
7
|
class Graph
|
5
8
|
attr_reader :nodes
|
6
9
|
|
@@ -14,20 +17,29 @@ module Bayesnet
|
|
14
17
|
|
15
18
|
def node(name, parents: [], &block)
|
16
19
|
raise Error, "DSL error, #node requires a &block" unless block
|
17
|
-
|
20
|
+
|
21
|
+
node = Node.new(name, parents)
|
18
22
|
node.instance_eval(&block)
|
19
23
|
@nodes[name] = node
|
20
24
|
end
|
21
25
|
|
26
|
+
def resolve_factors
|
27
|
+
@nodes.values.each do |node|
|
28
|
+
node.resolve_factor(@nodes.slice(*node.parent_nodes))
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
22
32
|
def distribution(over: [], evidence: {})
|
23
|
-
|
24
|
-
|
33
|
+
joint_distribution
|
34
|
+
.reduce_to(evidence)
|
35
|
+
.marginalize(over)
|
36
|
+
.normalize
|
25
37
|
end
|
26
38
|
|
27
39
|
# This is MAP query, i.e. Maximum a Posteriory
|
28
40
|
def most_likely_value(var_name, evidence:)
|
29
41
|
posterior_distribution = distribution(over: [var_name], evidence: evidence)
|
30
|
-
mode = posterior_distribution.
|
42
|
+
mode = posterior_distribution.contextes(var_name).zip(posterior_distribution.values).max_by(&:last)
|
31
43
|
mode.first.first
|
32
44
|
end
|
33
45
|
|
@@ -47,17 +59,21 @@ module Bayesnet
|
|
47
59
|
|
48
60
|
factor = Factor.new
|
49
61
|
@nodes.each do |node_name, node|
|
50
|
-
factor.
|
62
|
+
factor.scope node_name => node.values
|
51
63
|
end
|
52
64
|
|
53
|
-
factor.
|
54
|
-
val_by_name = var_names.zip(
|
65
|
+
factor.contextes(*var_names).each do |context|
|
66
|
+
val_by_name = var_names.zip(context).to_h
|
55
67
|
val = nodes.values.reduce(1.0) do |prob, node|
|
56
68
|
prob * node.factor[val_by_name]
|
57
69
|
end
|
58
|
-
factor.val
|
70
|
+
factor.val context + [val]
|
59
71
|
end
|
60
72
|
@joint_distribution = factor.normalize
|
61
73
|
end
|
74
|
+
|
75
|
+
def parameters
|
76
|
+
nodes.values.map(&:parameters).sum
|
77
|
+
end
|
62
78
|
end
|
63
79
|
end
|
data/lib/bayesnet/node.rb
CHANGED
@@ -16,7 +16,7 @@ module Bayesnet
|
|
16
16
|
@values = hash_or_array.keys
|
17
17
|
node = self
|
18
18
|
@factor = Factor.build do
|
19
|
-
|
19
|
+
scope node.name => node.values
|
20
20
|
hash_or_array.each do |value, probability|
|
21
21
|
val [value, probability]
|
22
22
|
end
|
@@ -24,14 +24,22 @@ module Bayesnet
|
|
24
24
|
when Array
|
25
25
|
raise Error, "DSL error, #values requires a &block when first argument is an Array" unless block
|
26
26
|
@values = hash_or_array
|
27
|
+
@factor = block
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def resolve_factor(parent_nodes)
|
32
|
+
@parent_nodes = parent_nodes
|
33
|
+
if @factor.is_a?(Proc)
|
34
|
+
proc = @factor
|
27
35
|
node = self
|
28
36
|
@factor = Factor.build do
|
29
|
-
|
37
|
+
scope node.name => node.values
|
30
38
|
node.parent_nodes.each do |parent_node_name, parent_node|
|
31
|
-
|
39
|
+
scope parent_node_name => parent_node.values
|
32
40
|
end
|
33
41
|
end
|
34
|
-
instance_eval(&
|
42
|
+
instance_eval(&proc)
|
35
43
|
end
|
36
44
|
end
|
37
45
|
|
@@ -39,6 +47,10 @@ module Bayesnet
|
|
39
47
|
instance_eval(&block)
|
40
48
|
end
|
41
49
|
|
50
|
+
def parameters
|
51
|
+
(values.size - 1) * parent_nodes.values.reduce(1) { |mul, n| mul * n.values.size }
|
52
|
+
end
|
53
|
+
|
42
54
|
def as(distribution, given:)
|
43
55
|
@values.zip(distribution).each do |value, probability|
|
44
56
|
@factor.val [value] + given + [probability]
|