glymour 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/README +5 -0
- data/Rakefile +1 -0
- data/glymour.gemspec +28 -0
- data/lib/glymour/version.rb +3 -0
- data/lib/glymour.rb +241 -0
- data/lib/scratch.rb +23 -0
- data/lib/stats_module.rb +116 -0
- data/required_r_packages/colorspace_1.1-0.tgz +0 -0
- data/required_r_packages/vcd_1.2-11.tgz +0 -0
- data/spec/.rspec +2 -0
- data/spec/spec_helper.rb +62 -0
- data/spec/statistics_spec.rb +39 -0
- data/spec/structure_learning_spec.rb +82 -0
- metadata +166 -0
data/.gitignore
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
ADDED
data/README
ADDED
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
data/glymour.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "glymour/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "glymour"
|
7
|
+
s.version = Glymour::VERSION
|
8
|
+
s.authors = ["Brian Stanwyck"]
|
9
|
+
s.email = ["brian.stanwyck@ncf.edu"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{A gem for supervised Bayesian net structure learning}
|
12
|
+
s.description = %q{Implements supervised Bayesian structure learning, as well as extra tools to help train a Bayesian net using ActiveRecord data}
|
13
|
+
|
14
|
+
s.add_development_dependency "rspec"
|
15
|
+
s.add_development_dependency "pry"
|
16
|
+
s.add_development_dependency "ruby-debug"
|
17
|
+
|
18
|
+
s.add_dependency 'rgl'
|
19
|
+
s.add_dependency 'sbn'
|
20
|
+
s.add_dependency 'rinruby'
|
21
|
+
|
22
|
+
s.rubyforge_project = "glymour"
|
23
|
+
|
24
|
+
s.files = `git ls-files`.split("\n")
|
25
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
26
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
27
|
+
s.require_paths = ["lib"]
|
28
|
+
end
|
data/lib/glymour.rb
ADDED
@@ -0,0 +1,241 @@
|
|
1
|
+
require "glymour"
|
2
|
+
require "pry"
|
3
|
+
require "rinruby"
|
4
|
+
require "rgl/adjacency"
|
5
|
+
require "rgl/topsort"
|
6
|
+
require "stats_module"
|
7
|
+
|
8
|
+
# Generates the complete graph on n vertices if n is an integer, otherwise
|
9
|
+
# the complete graph on the vertices in the enumerable given
|
10
|
+
def complete_graph(n)
|
11
|
+
set = (Integer === n) ? 1..n : n
|
12
|
+
RGL::ImplicitGraph.new do |g|
|
13
|
+
g.vertex_iterator { |b| set.each(&b) }
|
14
|
+
g.adjacent_iterator do |x, b|
|
15
|
+
set.each { |y| b.call(y) unless x == y }
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def remove_edge(orig, e)
|
21
|
+
new_graph = RGL::ImplicitGraph.new do |g|
|
22
|
+
g.vertex_iterator { |b| orig.vertices.each(&b) }
|
23
|
+
g.adjacent_iterator do |x, b|
|
24
|
+
new_adj = orig.adjacent_vertices(x).reject { |v| e.source == v or e.target == v }
|
25
|
+
new_adj.each { |y| b.call(y) }
|
26
|
+
end
|
27
|
+
end
|
28
|
+
new_graph
|
29
|
+
end
|
30
|
+
|
31
|
+
# Takes a list of vertices and a hash of source => [targets] pairs and generates a directed graph
|
32
|
+
def make_directed(vertices, directed_edges)
|
33
|
+
g = RGL::DirectedAdjacencyGraph.new
|
34
|
+
|
35
|
+
vertices.each { |v| g.add_vertex(v) }
|
36
|
+
|
37
|
+
directed_edges.each do |source, targets|
|
38
|
+
targets.each { |target| g.add_edge(source, target) }
|
39
|
+
end
|
40
|
+
|
41
|
+
g
|
42
|
+
end
|
43
|
+
|
44
|
+
# Takes a list of vertices and a hash of source => [targets] pairs and generates an implicit (undirected) graph
|
45
|
+
def make_implicit(vertices, edges)
|
46
|
+
RGL::ImplicitGraph.new do |g|
|
47
|
+
edges.default = []
|
48
|
+
g.vertex_iterator { |b| vertices.each(&b) }
|
49
|
+
g.adjacent_iterator do |x, b|
|
50
|
+
vertices.each {|y| b.call(y) if edges[x].include? y}
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def cartprod(*args)
|
56
|
+
result = [[]]
|
57
|
+
while [] != args
|
58
|
+
t, result = result, []
|
59
|
+
b, *args = args
|
60
|
+
t.each do |a|
|
61
|
+
b.each do |n|
|
62
|
+
result << a + [n]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
result
|
67
|
+
end
|
68
|
+
|
69
|
+
module Glymour
|
70
|
+
# Provides graph structures and algorithms for determining edge structure of a Bayesian net
|
71
|
+
module StructureLearning
|
72
|
+
module PowerSet
|
73
|
+
# Sets an array to its "power array" (array of subarrays)
|
74
|
+
def power_set!
|
75
|
+
return [[]] if empty?
|
76
|
+
first = shift
|
77
|
+
rest = power_set!
|
78
|
+
|
79
|
+
rest + rest.map {|subset| [first] + subset }
|
80
|
+
end
|
81
|
+
|
82
|
+
def power_set
|
83
|
+
return clone.power_set!
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
module GraphAlgorithms
|
88
|
+
def has_edge?(e)
|
89
|
+
self.edges.include? e
|
90
|
+
end
|
91
|
+
|
92
|
+
# Returns a (unique) list of vertices adjacent to vertex a or b.
|
93
|
+
# This is denoted "Aab" in Spirtes-Glymour's paper.
|
94
|
+
def adjacent_either(a, b)
|
95
|
+
(adjacent_undirected(a) + adjacent_undirected(b)).uniq
|
96
|
+
end
|
97
|
+
|
98
|
+
def adjacent_undirected(vertex)
|
99
|
+
adjacent_sources = vertices.select { |w| adjacent_vertices(w).include?(vertex) }
|
100
|
+
adjacent_vertices(vertex) + adjacent_sources
|
101
|
+
end
|
102
|
+
|
103
|
+
# Returns an array of all vertices on undirected simple paths between s and t.
|
104
|
+
# Modified breadth-first search: keep track of current path, and when t is found, add it to paths.
|
105
|
+
# This is denoted "Uab" in Spirtes-Glymour's paper.
|
106
|
+
def verts_on_paths(current_vertex, t, current_path=[], paths=[])
|
107
|
+
if current_vertex == t
|
108
|
+
paths << current_path + [current_vertex]
|
109
|
+
else
|
110
|
+
adjacent_undirected(current_vertex).each do |v|
|
111
|
+
# Don't recur if we're repeating vertices (i.e. reject non-simple paths)
|
112
|
+
verts_on_paths(v, t, current_path + [current_vertex], paths) if current_path.count(current_vertex) == 0
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
paths.flatten.uniq
|
117
|
+
end
|
118
|
+
|
119
|
+
# Returns a list of _ordered_ 3-tuples (a, b, c) of vertices such that
|
120
|
+
# (a, b) are adjacent and (b,c) are adjacent, but (a,c) are not.
|
121
|
+
def non_transitive
|
122
|
+
triples = vertices.product(vertices, vertices)
|
123
|
+
|
124
|
+
adjacent_triples = triples.select do |triple|
|
125
|
+
adjacent_undirected(triple.first).include?(triple[1]) && adjacent_undirected(triple[1]).include?(triple.last)
|
126
|
+
end
|
127
|
+
|
128
|
+
adjacent_triples.reject do |triple|
|
129
|
+
(adjacent_undirected(triple.first).include? triple.last) || (triple.first == triple.last)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
class LearningNet
|
135
|
+
include Glymour::Statistics
|
136
|
+
attr_accessor :net, :directed_edges, :n
|
137
|
+
attr_reader :p_value
|
138
|
+
|
139
|
+
def initialize(variable_container, p_value = 0.05)
|
140
|
+
@net = complete_graph(variable_container.variables).extend(GraphAlgorithms)
|
141
|
+
@directed_edges = {}
|
142
|
+
@directed_edges.default = []
|
143
|
+
@n = -1
|
144
|
+
@p_value = p_value
|
145
|
+
end
|
146
|
+
|
147
|
+
# Perform one step of the PC algorithm
|
148
|
+
def step
|
149
|
+
any_independent = false
|
150
|
+
net.edges.each do |e|
|
151
|
+
a, b = e.source, e.target
|
152
|
+
intersect = (@net.adjacent_either(a, b) & @net.verts_on_paths(a, b)).extend(PowerSet)
|
153
|
+
|
154
|
+
# Is |Aab ^ Uab| > n?
|
155
|
+
if intersect.length <= n
|
156
|
+
next
|
157
|
+
else
|
158
|
+
# Are a and b independent conditioned on any subsets of Aab ^ Uab of cardinality n+1?
|
159
|
+
valid_intersects = intersect.power_set.select {|s| s.length == n+1}.reject { |subset| subset.include?(a) || subset.include?(b) }
|
160
|
+
if valid_intersects.any? { |subset|
|
161
|
+
print "Testing independence between #{a.name} and #{b.name}, conditioning on #{(subset.any? ? subset.map(&:name).join(', ') : 'nothing') + '...'}"
|
162
|
+
print (coindependent?(p_value, a, b, *subset) ? "[+]\n" : "[-]\n")
|
163
|
+
coindependent?(p_value, a, b, *subset)
|
164
|
+
}
|
165
|
+
@net = remove_edge(net, e)
|
166
|
+
net.edges.each do |e|
|
167
|
+
puts "#{e.source.name} => #{e.target.name}"
|
168
|
+
end
|
169
|
+
any_independent = true
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
@n += 1
|
174
|
+
any_independent
|
175
|
+
end
|
176
|
+
|
177
|
+
# Perform the PC algorithm in full
|
178
|
+
def learn_structure
|
179
|
+
puts "Learning undirected net structure..."
|
180
|
+
# Perform step until every pair of adjacent variables is dependent, and
|
181
|
+
# set final_net to the _second-to-last_ state of @net
|
182
|
+
begin
|
183
|
+
puts "n = #{n}"
|
184
|
+
final_net = net
|
185
|
+
step
|
186
|
+
end while n < 1
|
187
|
+
|
188
|
+
net = final_net
|
189
|
+
|
190
|
+
direct_edges
|
191
|
+
end
|
192
|
+
|
193
|
+
# Direct remaining edges in @net as much as possible
|
194
|
+
def direct_edges
|
195
|
+
puts "Directing edges where possible..."
|
196
|
+
|
197
|
+
net.non_transitive.each do |triple|
|
198
|
+
a, b, c = *triple
|
199
|
+
|
200
|
+
intersect = (net.adjacent_either(a, c) & net.verts_on_paths(a, c)).extend(PowerSet)
|
201
|
+
if intersect.power_set.select {|s| s.include? b}.none? { |subset|
|
202
|
+
coindependent?(p_value, a, c, *subset)
|
203
|
+
}
|
204
|
+
puts "Adding directed edge #{a.name} => #{b.name}..."
|
205
|
+
@directed_edges[a] = (@directed_edges[a] << b).uniq
|
206
|
+
|
207
|
+
puts "Adding directed edge #{c.name} => #{b.name}..."
|
208
|
+
@directed_edges[c] = (@directed_edges[c] << b).uniq
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
|
214
|
+
# Gives a list of all orientations of @net compatible with @directed_edges
|
215
|
+
# (i.e., all directed acyclic graphs with edge structure given partially by @directed_edges)
|
216
|
+
def compatible_orientations
|
217
|
+
compat_list = []
|
218
|
+
edges = net.edges.extend(PowerSet)
|
219
|
+
|
220
|
+
# Every orientation of net corresponds to a subset of its edges
|
221
|
+
edges.power_set.each do |subset|
|
222
|
+
# Orient edges in subset as source => target, outside of it as target => source
|
223
|
+
# Any edges conflicting with directed_edges will be cyclic and therefore not counted
|
224
|
+
current_orientation = make_directed(net.vertices, @directed_edges)
|
225
|
+
|
226
|
+
edges.each do |e|
|
227
|
+
if subset.include? e
|
228
|
+
current_orientation.add_edge(e.source, e.target)
|
229
|
+
else
|
230
|
+
current_orientation.add_edge(e.target, e.source)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
compat_list << current_orientation if current_orientation.acyclic?
|
235
|
+
end
|
236
|
+
|
237
|
+
compat_list
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
data/lib/scratch.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
class WeightNet
|
2
|
+
attr_accessor :net
|
3
|
+
|
4
|
+
# Use a DAG generated by StructureLearning to create an unweighted Bayes net
|
5
|
+
def initialize(dag)
|
6
|
+
vars = {}
|
7
|
+
@net = Sbn::Net.new(title)
|
8
|
+
|
9
|
+
vertices.each do |v|
|
10
|
+
vars[v] = Sbn::Variable.new(@net, v.name.to_sym)
|
11
|
+
end
|
12
|
+
|
13
|
+
edges.each do |e|
|
14
|
+
vars[e.source].add_child(vars[e.target])
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
R.eval <<-EOF
|
20
|
+
partial_table <- t[,,#{value.join(',')}]
|
21
|
+
chisq <- chisq.test(partial_table)
|
22
|
+
s <- chisq$statistic
|
23
|
+
EOF
|
data/lib/stats_module.rb
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
module Glymour
|
2
|
+
module Statistics
|
3
|
+
class VariableContainer
|
4
|
+
attr_reader :table, :number_unnamed
|
5
|
+
attr_accessor :variables
|
6
|
+
|
7
|
+
def initialize(table, variables=[])
|
8
|
+
number_unnamed = 0
|
9
|
+
@table = table
|
10
|
+
@variables = variables
|
11
|
+
@variables.each do |var|
|
12
|
+
var.variable_container = self
|
13
|
+
var.set_intervals if var.num_classes
|
14
|
+
var.name ||= "unnamed_variable#{number_unnamed += 1}"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class Variable
|
20
|
+
attr_accessor :intervals, :variable_container, :name, :num_classes
|
21
|
+
|
22
|
+
def initialize(name = nil, num_classes = nil, &block)
|
23
|
+
@block = Proc.new &block
|
24
|
+
@num_classes = num_classes
|
25
|
+
@intervals = num_classes && variable_container ? set_intervals : nil
|
26
|
+
|
27
|
+
# names are used as variable names in R, so make sure there's no whitespace
|
28
|
+
@name = name.gsub(/\s+/, '_') if name
|
29
|
+
end
|
30
|
+
|
31
|
+
# Apply @block to each column value, and
|
32
|
+
# return a list of evenly divided intervals [x1, x2, ..., x(n_classes)]
|
33
|
+
# So that x1 is the minimum, xn is the max
|
34
|
+
def set_intervals
|
35
|
+
vals = self.values
|
36
|
+
step = (vals.max - vals.min)/(num_classes-1).to_f
|
37
|
+
@intervals = (0..(num_classes-1)).map { |k| vals.min + k*step }
|
38
|
+
end
|
39
|
+
|
40
|
+
def value_at(row)
|
41
|
+
intervals ? location_in_interval(row) : @block.call(row)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Gives an array of all variable values in table
|
45
|
+
def values
|
46
|
+
intervals ? variable_container.table.map { |row| location_in_interval(row) } : variable_container.table.map(&@block)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Gives the location of a column value within a finite set of interval values (i.e. gives discrete state after classing a continuous variable)
|
50
|
+
def location_in_interval(row)
|
51
|
+
intervals.each_with_index do |x, i|
|
52
|
+
return i if @block.call(row) <= x
|
53
|
+
end
|
54
|
+
|
55
|
+
# Return -1 if value is not within intervals
|
56
|
+
-1
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Takes two or more Variables
|
61
|
+
# Returns true if first two variables are coindependent given the rest
|
62
|
+
def coindependent?(p_val, *variables)
|
63
|
+
#TODO: Raise an exception if variables have different tables?
|
64
|
+
R.echo(false)
|
65
|
+
# Push variable data into R
|
66
|
+
variables.each do |var|
|
67
|
+
# Rinruby can't handle true and false values, so use 1 and 0 resp. instead
|
68
|
+
sanitized_values = var.values.map do |value|
|
69
|
+
case value
|
70
|
+
when true then 1
|
71
|
+
when false then 0
|
72
|
+
else value
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
R.assign var.name, sanitized_values
|
77
|
+
end
|
78
|
+
|
79
|
+
R.eval <<-EOF
|
80
|
+
cond_data <- data.frame(#{variables.map(&:name).join(', ')})
|
81
|
+
t <-table(cond_data)
|
82
|
+
EOF
|
83
|
+
|
84
|
+
cond_vars = variables[2..(variables.length-1)]
|
85
|
+
|
86
|
+
# If no conditioning variables are given, just return the chi square test for the first two
|
87
|
+
if cond_vars.empty?
|
88
|
+
R.eval "chisq <- chisq.test(t)"
|
89
|
+
observed_p = R.pull "chisq$p.value"
|
90
|
+
return observed_p > p_val
|
91
|
+
end
|
92
|
+
|
93
|
+
cond_values = cond_vars.map { |var| (1..var.values.uniq.length).collect }
|
94
|
+
|
95
|
+
# Find the chi-squared statistic for every state of the conditioning variables and sum them
|
96
|
+
chisq_sum = 0
|
97
|
+
df = 0
|
98
|
+
cond_values.inject!(&:product).map(&:flatten)
|
99
|
+
cond_values.each do |value|
|
100
|
+
R.eval <<-EOF
|
101
|
+
partial_table <- t[,,#{value.join(',')}]
|
102
|
+
table_without_zero_columns <- partial_table[,-(which(colSums(partial_table) == 0))]
|
103
|
+
chisq <- chisq.test(table_without_zero_columns)
|
104
|
+
s <- chisq$statistic
|
105
|
+
EOF
|
106
|
+
|
107
|
+
observed_s = R.pull("s").to_f
|
108
|
+
chisq_sum += observed_s
|
109
|
+
df += R.pull("chisq$parameter").to_i
|
110
|
+
end
|
111
|
+
# Compute the p-value of the sum of statistics
|
112
|
+
observed_p = 1 - R.pull("pchisq(#{chisq_sum}, #{df})").to_f
|
113
|
+
observed_p > p_val
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
Binary file
|
Binary file
|
data/spec/.rspec
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
require "glymour"
|
2
|
+
|
3
|
+
class StatsDummy
|
4
|
+
include Glymour::Statistics
|
5
|
+
end
|
6
|
+
|
7
|
+
# Returns true with probability p
|
8
|
+
def prob(p)
|
9
|
+
rand < p
|
10
|
+
end
|
11
|
+
|
12
|
+
def alarm_init
|
13
|
+
@alarm_data = []
|
14
|
+
100000.times do
|
15
|
+
earthquake = prob(0.01)
|
16
|
+
burglary = prob(0.007)
|
17
|
+
|
18
|
+
if burglary
|
19
|
+
alarm = earthquake ? prob(0.95) : prob(0.94)
|
20
|
+
else
|
21
|
+
alarm = earthquake ? prob(0.29) : prob(0.001)
|
22
|
+
end
|
23
|
+
|
24
|
+
john_calls = alarm ? prob(0.90) : prob(0.05)
|
25
|
+
mary_calls = alarm ? prob(0.70) : prob(0.01)
|
26
|
+
|
27
|
+
alarm_continuous = rand(50)
|
28
|
+
|
29
|
+
@alarm_data << { :e => earthquake, :b => burglary, :a => alarm, :j => john_calls, :m => mary_calls, :ac => alarm_continuous }
|
30
|
+
end
|
31
|
+
@e = Glymour::Statistics::Variable.new("Earthquake") { |r| r[:e] }
|
32
|
+
@b = Glymour::Statistics::Variable.new("Burglary") { |r| r[:b] }
|
33
|
+
@a = Glymour::Statistics::Variable.new("Alarm") { |r| r[:a] }
|
34
|
+
@j = Glymour::Statistics::Variable.new("John Calls") { |r| r[:j] }
|
35
|
+
@m = Glymour::Statistics::Variable.new("Mary Calls") { |r| r[:m] }
|
36
|
+
|
37
|
+
@ac = Glymour::Statistics::Variable.new("Alarm Continuous", 10) { |r| r[:ac] }
|
38
|
+
|
39
|
+
alarm_vars = [@e, @b, @a, @j, @m, @ac]
|
40
|
+
|
41
|
+
alarm_container = Glymour::Statistics::VariableContainer.new(@alarm_data, alarm_vars)
|
42
|
+
@alarm_net = Glymour::StructureLearning::LearningNet.new(alarm_container)
|
43
|
+
end
|
44
|
+
|
45
|
+
def coin_init
|
46
|
+
# Highly simplified test net
|
47
|
+
# Only edges should be @h pointing to @red and @blue
|
48
|
+
@coin_data = []
|
49
|
+
10000.times do
|
50
|
+
h = prob(0.5)
|
51
|
+
red = h ? prob(0.2) : prob(0.7)
|
52
|
+
blue = h ? prob(0.4) : prob(0.9)
|
53
|
+
@coin_data << { :h => h, :red => red, :blue => blue }
|
54
|
+
end
|
55
|
+
|
56
|
+
@h = Glymour::Statistics::Variable.new("Heads") { |r| r[:h] }
|
57
|
+
@red = Glymour::Statistics::Variable.new("Red") { |r| r[:red] }
|
58
|
+
@blue = Glymour::Statistics::Variable.new("Blue") { |r| r[:blue] }
|
59
|
+
|
60
|
+
coin_container = Glymour::Statistics::VariableContainer.new(@coin_data, [@h, @red, @blue])
|
61
|
+
@coin_net = Glymour::StructureLearning::LearningNet.new(coin_container)
|
62
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'glymour'
|
2
|
+
require 'rgl/implicit'
|
3
|
+
require 'rgl/dot'
|
4
|
+
require 'spec_helper'
|
5
|
+
|
6
|
+
describe Glymour::Statistics do
|
7
|
+
before(:all) do
|
8
|
+
R.echo(true)
|
9
|
+
Stats = StatsDummy.new
|
10
|
+
|
11
|
+
alarm_init
|
12
|
+
coin_init
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'should give chi square independence data for two variables' do
|
16
|
+
Stats.coindependent?(0.05, @h, @red).should be_false
|
17
|
+
Stats.coindependent?(0.05, @e, @b).should be_true
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should give conditional independence data for several variables' do
|
21
|
+
Stats.coindependent?(0.05, @red, @blue, @h).should be_true
|
22
|
+
Stats.coindependent?(0.05, @j, @m, @a).should be_true
|
23
|
+
Stats.coindependent?(0.05, @b, @ac, @a).should be_true
|
24
|
+
end
|
25
|
+
|
26
|
+
describe Glymour::Statistics::VariableContainer
|
27
|
+
it 'should set variable name when nil' do
|
28
|
+
var = Glymour::Statistics::Variable.new {|r| r}
|
29
|
+
container = Glymour::Statistics::VariableContainer.new([], [var])
|
30
|
+
var.name.should_not be_nil
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'should create unique names for variables' do
|
34
|
+
var1 = Glymour::Statistics::Variable.new {|r| r}
|
35
|
+
var2 = Glymour::Statistics::Variable.new {|r| r}
|
36
|
+
container = Glymour::Statistics::VariableContainer.new([], [var1, var2])
|
37
|
+
var1.name.should_not eq var2.name
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'glymour'
|
2
|
+
require 'rgl/implicit'
|
3
|
+
require 'rgl/dot'
|
4
|
+
require 'spec_helper'
|
5
|
+
|
6
|
+
describe Glymour::StructureLearning do
|
7
|
+
before(:each) do
|
8
|
+
extend Glymour::StructureLearning
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'should compute the power set of an array' do
|
12
|
+
ary = [1, :two, "three"].extend(Glymour::StructureLearning::PowerSet)
|
13
|
+
result = ary.power_set
|
14
|
+
|
15
|
+
[[], [1, :two], [:two, "three"], ary].each do |set|
|
16
|
+
result.should include set
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe 'Within GraphAlgorithms' do
|
21
|
+
before(:each) do
|
22
|
+
class RGL::ImplicitGraph
|
23
|
+
include Glymour::StructureLearning::GraphAlgorithms
|
24
|
+
end
|
25
|
+
|
26
|
+
# Create a graph for graph algorithm tests
|
27
|
+
# (Unfortunately we need something a little complicated for some tests)
|
28
|
+
vertices = (1..8).collect
|
29
|
+
edges = {1 => [2], 2 => [3], 3 => [1, 7], 4 => [3], 7 => [5, 8], 5 => [6], 6 => [7]}
|
30
|
+
|
31
|
+
@g = make_implicit(vertices, edges)
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should remove an edge from a graph' do
|
35
|
+
g = complete_graph(4)
|
36
|
+
orig_edge_count = g.edges.length
|
37
|
+
remove_edge(g, g.edges.first).edges.length.should_not eq orig_edge_count
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'should compute the vertices on all paths between two vertices' do
|
41
|
+
path_verts = @g.verts_on_paths(4, 6)
|
42
|
+
[4, 3, 7, 5, 6].each { |v| path_verts.should include v }
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'should compute non-transitive vertices of a graph' do
|
46
|
+
[[4, 3, 1], [4, 3, 7], [3, 7, 8]].each do |triple|
|
47
|
+
@g.non_transitive.should include triple
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'should compute a complete graph on any vertex set' do
|
52
|
+
vert_set = [1, :two, "three", [5]]
|
53
|
+
complete = complete_graph vert_set
|
54
|
+
|
55
|
+
vert_set.each do |v|
|
56
|
+
complete.adjacent_vertices(v).should eq vert_set - [v]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
describe Glymour::StructureLearning::LearningNet do
|
62
|
+
before(:all) do
|
63
|
+
alarm_init
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'should perform the structure learning algorithm' do
|
67
|
+
prev_n_edges = @alarm_net.net.edges.length
|
68
|
+
|
69
|
+
@alarm_net.learn_structure
|
70
|
+
|
71
|
+
@alarm_net.net.edges.length.should be < prev_n_edges
|
72
|
+
|
73
|
+
@alarm_net.net.edges.each do |e|
|
74
|
+
puts "#{e.source.name} => #{e.target.name}"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'should produce orientations compatible with learn_structure output' do
|
79
|
+
orientations = @alarm_net.compatible_orientations
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
metadata
ADDED
@@ -0,0 +1,166 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: glymour
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Brian Stanwyck
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-09-28 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: rspec
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 3
|
29
|
+
segments:
|
30
|
+
- 0
|
31
|
+
version: "0"
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: pry
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
38
|
+
none: false
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
hash: 3
|
43
|
+
segments:
|
44
|
+
- 0
|
45
|
+
version: "0"
|
46
|
+
type: :development
|
47
|
+
version_requirements: *id002
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: ruby-debug
|
50
|
+
prerelease: false
|
51
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
hash: 3
|
57
|
+
segments:
|
58
|
+
- 0
|
59
|
+
version: "0"
|
60
|
+
type: :development
|
61
|
+
version_requirements: *id003
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rgl
|
64
|
+
prerelease: false
|
65
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
hash: 3
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
type: :runtime
|
75
|
+
version_requirements: *id004
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: sbn
|
78
|
+
prerelease: false
|
79
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
hash: 3
|
85
|
+
segments:
|
86
|
+
- 0
|
87
|
+
version: "0"
|
88
|
+
type: :runtime
|
89
|
+
version_requirements: *id005
|
90
|
+
- !ruby/object:Gem::Dependency
|
91
|
+
name: rinruby
|
92
|
+
prerelease: false
|
93
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
hash: 3
|
99
|
+
segments:
|
100
|
+
- 0
|
101
|
+
version: "0"
|
102
|
+
type: :runtime
|
103
|
+
version_requirements: *id006
|
104
|
+
description: Implements supervised Bayesian structure learning, as well as extra tools to help train a Bayesian net using ActiveRecord data
|
105
|
+
email:
|
106
|
+
- brian.stanwyck@ncf.edu
|
107
|
+
executables: []
|
108
|
+
|
109
|
+
extensions: []
|
110
|
+
|
111
|
+
extra_rdoc_files: []
|
112
|
+
|
113
|
+
files:
|
114
|
+
- .gitignore
|
115
|
+
- .rspec
|
116
|
+
- Gemfile
|
117
|
+
- README
|
118
|
+
- Rakefile
|
119
|
+
- glymour.gemspec
|
120
|
+
- lib/glymour.rb
|
121
|
+
- lib/glymour/version.rb
|
122
|
+
- lib/scratch.rb
|
123
|
+
- lib/stats_module.rb
|
124
|
+
- required_r_packages/colorspace_1.1-0.tgz
|
125
|
+
- required_r_packages/vcd_1.2-11.tgz
|
126
|
+
- spec/.rspec
|
127
|
+
- spec/spec_helper.rb
|
128
|
+
- spec/statistics_spec.rb
|
129
|
+
- spec/structure_learning_spec.rb
|
130
|
+
homepage: ""
|
131
|
+
licenses: []
|
132
|
+
|
133
|
+
post_install_message:
|
134
|
+
rdoc_options: []
|
135
|
+
|
136
|
+
require_paths:
|
137
|
+
- lib
|
138
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
139
|
+
none: false
|
140
|
+
requirements:
|
141
|
+
- - ">="
|
142
|
+
- !ruby/object:Gem::Version
|
143
|
+
hash: 3
|
144
|
+
segments:
|
145
|
+
- 0
|
146
|
+
version: "0"
|
147
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
148
|
+
none: false
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
hash: 3
|
153
|
+
segments:
|
154
|
+
- 0
|
155
|
+
version: "0"
|
156
|
+
requirements: []
|
157
|
+
|
158
|
+
rubyforge_project: glymour
|
159
|
+
rubygems_version: 1.8.7
|
160
|
+
signing_key:
|
161
|
+
specification_version: 3
|
162
|
+
summary: A gem for supervised Bayesian net structure learning
|
163
|
+
test_files:
|
164
|
+
- spec/spec_helper.rb
|
165
|
+
- spec/statistics_spec.rb
|
166
|
+
- spec/structure_learning_spec.rb
|