baobab 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/baobab/dataset.rb +66 -63
- data/lib/baobab/node.rb +190 -187
- data/lib/baobab/shannon.rb +12 -9
- data/lib/baobab/tree.rb +57 -53
- metadata +7 -18
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: bc9a6568b4998bd54bb07030d90bbebcc6979d72
|
4
|
+
data.tar.gz: a392e91b05f89b6dc055249e9b5256e3fc98a93b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cddc12fb7be6d7b5a73f3e659f22df27b52bdd1f75f53e9bddc9da2aa85a0d7499111f4e1b61459d071e489b68017e06306a5287b7aae3a6c84741dafaff8b34
|
7
|
+
data.tar.gz: f6d28f686b71791eed288ccdd6c6e9d6cfba3632ddbaabc0bd162fc5672f613d77c05150814c9d5a1b53854c04f7ab1b52ebc484536a111b8b7d7da78af20021
|
data/lib/baobab/dataset.rb
CHANGED
@@ -1,74 +1,77 @@
|
|
1
|
+
module Baobab
|
1
2
|
|
2
|
-
# Represents a dataset or subset thereof.
|
3
|
-
# Is an array of hashes where all hashes contain the same keys.
|
4
|
-
class Dataset < Array
|
3
|
+
# Represents a dataset or subset thereof.
|
4
|
+
# Is an array of hashes where all hashes contain the same keys.
|
5
|
+
class Dataset < Array
|
5
6
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
7
|
+
# Receives an array of hashes. All hashes must contain the same keys.
|
8
|
+
def initialize data
|
9
|
+
data.each do |row|
|
10
|
+
self << row
|
11
|
+
end
|
12
|
+
end
|
12
13
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
14
|
+
def self.from_json filename
|
15
|
+
text = File.read(filename)
|
16
|
+
self.new JSON.parse(text)
|
17
|
+
end
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
19
|
+
def attribute_names class_var
|
20
|
+
self.column_names.reject{|name| name == class_var}
|
21
|
+
end
|
21
22
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
# Returns an array of the attribute names in the dataset
|
24
|
+
# Careful: it's empty on an empty set.
|
25
|
+
def column_names
|
26
|
+
self[0].keys
|
27
|
+
end
|
27
28
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
29
|
+
# Returns an array of the values of an attribute in the dataset.
|
30
|
+
# Careful: it's empty on an empty set.
|
31
|
+
def column_values attribute
|
32
|
+
self.map{|row| row[attribute]}.to_a.uniq
|
33
|
+
end
|
33
34
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
35
|
+
# Gets a subset with given conditions. Keys must be of the same type as
|
36
|
+
# in the dataset (be careful with symbols).
|
37
|
+
def subset conditions
|
38
|
+
rows = self.select do |row|
|
39
|
+
conditions.reduce(true) do |memo, (var, val)|
|
40
|
+
memo and row[var] == val
|
41
|
+
end
|
42
|
+
end
|
43
|
+
Dataset.new rows
|
44
|
+
end
|
44
45
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
46
|
+
def entropy class_var
|
47
|
+
class_vals = self.column_values(class_var)
|
48
|
+
probabilities = class_vals.map do |class_val|
|
49
|
+
self.probability(class_var, class_val)
|
50
|
+
end
|
51
|
+
Shannon::entropy *probabilities
|
52
|
+
end
|
52
53
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
54
|
+
# Evaluates the probability that var be val in this dataset.
|
55
|
+
# Can also be used for subsets.
|
56
|
+
def probability var, val
|
57
|
+
unless self.count.zero?
|
58
|
+
self.count{|r| r[var] == val}.fdiv(self.count)
|
59
|
+
else
|
60
|
+
0
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def validate
|
65
|
+
raise 'Dataset is empty' if self.empty?
|
66
|
+
self.reduce(self[0].keys) do |memo, row|
|
67
|
+
if memo == row.keys then
|
68
|
+
memo
|
69
|
+
else
|
70
|
+
raise 'Dataset is inconsistent'
|
71
|
+
end
|
72
|
+
end
|
73
|
+
return nil
|
74
|
+
end
|
75
|
+
end
|
62
76
|
|
63
|
-
def validate
|
64
|
-
raise 'Dataset is empty' if self.empty?
|
65
|
-
self.reduce(self[0].keys) do |memo, row|
|
66
|
-
if memo == row.keys then
|
67
|
-
memo
|
68
|
-
else
|
69
|
-
raise 'Dataset is inconsistent'
|
70
|
-
end
|
71
|
-
end
|
72
|
-
return nil
|
73
|
-
end
|
74
77
|
end
|
data/lib/baobab/node.rb
CHANGED
@@ -1,189 +1,192 @@
|
|
1
|
+
module Baobab
|
2
|
+
|
3
|
+
class DecisionTreeNode
|
4
|
+
|
5
|
+
# A list of nodes
|
6
|
+
attr_accessor :children
|
7
|
+
|
8
|
+
# The variable on which this operates, a string
|
9
|
+
attr_reader :variable
|
10
|
+
|
11
|
+
# The value for the variable, a string
|
12
|
+
attr_reader :value
|
13
|
+
|
14
|
+
# The decision tree
|
15
|
+
attr_reader :tree
|
16
|
+
|
17
|
+
attr_reader :entropy
|
18
|
+
|
19
|
+
attr_accessor :next_attribute
|
20
|
+
|
21
|
+
# Accumulates the conditions down the tree
|
22
|
+
attr_reader :conditions
|
23
|
+
|
24
|
+
# The conditional probability that variable=value
|
25
|
+
attr_reader :probability
|
26
|
+
|
27
|
+
def initialize tree, parent, variable, value, entropy, conditions=nil
|
28
|
+
@tree = tree
|
29
|
+
@parent = parent
|
30
|
+
@variable = variable
|
31
|
+
@value = value
|
32
|
+
@entropy = entropy
|
33
|
+
@conditions = conditions ? conditions : {}
|
34
|
+
@children = Array.new
|
35
|
+
@subset = nil
|
36
|
+
end
|
37
|
+
|
38
|
+
def to_s
|
39
|
+
s = @variable ? "#{@variable} => #{@value}" : "ROOT"
|
40
|
+
s += " (#{@entropy.round(3)})"
|
41
|
+
end
|
42
|
+
|
43
|
+
def subset
|
44
|
+
unless @subset.nil?
|
45
|
+
@subset
|
46
|
+
else
|
47
|
+
@subset = @tree.dataset.subset(self.full_conditions)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def clear
|
52
|
+
@subset = nil
|
53
|
+
end
|
54
|
+
|
55
|
+
def pending_attrs
|
56
|
+
@tree.dataset.column_names.reject do |name|
|
57
|
+
@tree.class_var == name or @conditions.include? name
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Returns a subset selected on all the parent node's conditions plus this
|
62
|
+
# node's attribute and its value.
|
63
|
+
def full_conditions
|
64
|
+
if @variable
|
65
|
+
conditions.merge({@variable => @value})
|
66
|
+
else
|
67
|
+
conditions
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def build_subtree recursive=true
|
72
|
+
if self.try_finish
|
73
|
+
return
|
74
|
+
end
|
75
|
+
subset = self.subset
|
76
|
+
subset_count = self.subset.count
|
77
|
+
entropies = self.entropies
|
78
|
+
inf_gain = entropies.each.with_object({}) do |(a, e), o|
|
79
|
+
o[a] = @entropy - e
|
80
|
+
end
|
81
|
+
max_attr, max_gain = inf_gain.max_by{|v, g| g}
|
82
|
+
self.next_attribute = max_attr
|
83
|
+
@children += self.tree.dataset.column_values(max_attr).map do |value|
|
84
|
+
conditions = self.full_conditions
|
85
|
+
DecisionTreeNode.new(
|
86
|
+
tree=@tree, parent=self,
|
87
|
+
attribute=max_attr, value=value,
|
88
|
+
entropy=entropies[max_attr], conditions=conditions
|
89
|
+
)
|
90
|
+
end
|
91
|
+
if recursive
|
92
|
+
@children.each do |c|
|
93
|
+
c.build_subtree
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# Checks whether any of three is true:
|
99
|
+
# - All elements in the subset belong to the same class value: a leaf with
|
100
|
+
# this value is created.
|
101
|
+
# - There are no more attributes to be selected: a leaf with the most common
|
102
|
+
# class value is selected
|
103
|
+
# - There are no more rows in the dataset: a leaf with the most common class
|
104
|
+
# value in the parent node is created.
|
105
|
+
def try_finish
|
106
|
+
var = self.tree.class_var
|
107
|
+
val = (
|
108
|
+
self.try_finish_single_value_class or
|
109
|
+
self.try_finish_empty_subset or
|
110
|
+
self.try_finish_no_more_attributes
|
111
|
+
)
|
112
|
+
if val
|
113
|
+
@next_attribute = @tree.class_var
|
114
|
+
self.children << DecisionTreeNode.new(
|
115
|
+
tree=@tree, parent=self,
|
116
|
+
variable=@tree.class_var, value=val,
|
117
|
+
entropy=0, conditions=self.full_conditions
|
118
|
+
)
|
119
|
+
else
|
120
|
+
false
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# If all class values are the same, returns that value; else, nil.
|
125
|
+
def try_finish_single_value_class
|
126
|
+
if self.subset.any?
|
127
|
+
v0 = self.subset[0][@tree.class_var]
|
128
|
+
self.subset.slice(1...-1).reduce(v0) do |memo, row|
|
129
|
+
if memo != row[@tree.class_var]
|
130
|
+
return nil
|
131
|
+
else
|
132
|
+
memo
|
133
|
+
end
|
134
|
+
end
|
135
|
+
else
|
136
|
+
nil
|
137
|
+
end
|
138
|
+
|
139
|
+
end
|
140
|
+
|
141
|
+
# If there are not more attributes, returns the most common class value;
|
142
|
+
# else, nil.
|
143
|
+
def try_finish_no_more_attributes
|
144
|
+
if self.pending_attrs.empty? then
|
145
|
+
self.most_common_value
|
146
|
+
else
|
147
|
+
nil
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
# If the subset is empty, returns the most common value in the parent
|
152
|
+
# node's subset.
|
153
|
+
def try_finish_empty_subset
|
154
|
+
if self.subset.empty?
|
155
|
+
@parent.most_common_value
|
156
|
+
else
|
157
|
+
nil
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# Returns the most common class value in the dataset.
|
162
|
+
def most_common_value
|
163
|
+
class_var = @tree.class_var
|
164
|
+
class_values = @tree.dataset.column_values(class_var)
|
165
|
+
count = class_values.each.with_object({}) do |val, o|
|
166
|
+
o[val] = 0
|
167
|
+
end
|
168
|
+
self.subset.each.with_object(count) do |row, o|
|
169
|
+
count[row[class_var]] += 1
|
170
|
+
end
|
171
|
+
count.max_by{|v, c| c}[0]
|
172
|
+
end
|
173
|
+
|
174
|
+
# Returns a hash of {attribute, entropy} given that we divide the dataset
|
175
|
+
# on attribute.
|
176
|
+
def entropies
|
177
|
+
self.pending_attrs.each.with_object({}) do |a, o|
|
178
|
+
values = @tree.dataset.column_values(a)
|
179
|
+
val_probabilities = values.each.with_object({}) do |v, o|
|
180
|
+
o[v] = subset.probability a, v
|
181
|
+
end
|
182
|
+
val_entropies = values.each.with_object({}) do |v, o|
|
183
|
+
o[v] = subset.subset({a => v}).entropy(self.tree.class_var)
|
184
|
+
end
|
185
|
+
o[a] = values.reduce(0) do |memo, v|
|
186
|
+
memo + val_entropies[v] * val_probabilities[v]
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
1
191
|
|
2
|
-
class DecisionTreeNode
|
3
|
-
|
4
|
-
# A list of nodes
|
5
|
-
attr_accessor :children
|
6
|
-
|
7
|
-
# The variable on which this operates, a string
|
8
|
-
attr_reader :variable
|
9
|
-
|
10
|
-
# The value for the variable, a string
|
11
|
-
attr_reader :value
|
12
|
-
|
13
|
-
# The decision tree
|
14
|
-
attr_reader :tree
|
15
|
-
|
16
|
-
attr_reader :entropy
|
17
|
-
|
18
|
-
attr_accessor :next_attribute
|
19
|
-
|
20
|
-
# Accumulates the conditions down the tree
|
21
|
-
attr_reader :conditions
|
22
|
-
|
23
|
-
# The conditional probability that variable=value
|
24
|
-
attr_reader :probability
|
25
|
-
|
26
|
-
def initialize tree, parent, variable, value, entropy, conditions=nil
|
27
|
-
@tree = tree
|
28
|
-
@parent = parent
|
29
|
-
@variable = variable
|
30
|
-
@value = value
|
31
|
-
@entropy = entropy
|
32
|
-
@conditions = conditions ? conditions : {}
|
33
|
-
@children = Array.new
|
34
|
-
@subset = nil
|
35
|
-
end
|
36
|
-
|
37
|
-
def to_s
|
38
|
-
s = @variable ? "#{@variable} => #{@value}" : "ROOT"
|
39
|
-
s += " (#{@entropy.round(3)})"
|
40
|
-
end
|
41
|
-
|
42
|
-
def subset
|
43
|
-
unless @subset.nil?
|
44
|
-
@subset
|
45
|
-
else
|
46
|
-
@subset = @tree.dataset.subset(self.full_conditions)
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def clear
|
51
|
-
@subset = nil
|
52
|
-
end
|
53
|
-
|
54
|
-
def pending_attrs
|
55
|
-
@tree.dataset.column_names.reject do |name|
|
56
|
-
@tree.class_var == name or @conditions.include? name
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
# Returns a subset selected on all the parent node's conditions plus this
|
61
|
-
# node's attribute and its value.
|
62
|
-
def full_conditions
|
63
|
-
if @variable
|
64
|
-
conditions.merge({@variable => @value})
|
65
|
-
else
|
66
|
-
conditions
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def build_subtree recursive=true
|
71
|
-
if self.try_finish
|
72
|
-
return
|
73
|
-
end
|
74
|
-
subset = self.subset
|
75
|
-
subset_count = self.subset.count
|
76
|
-
entropies = self.entropies
|
77
|
-
inf_gain = entropies.each.with_object({}) do |(a, e), o|
|
78
|
-
o[a] = @entropy - e
|
79
|
-
end
|
80
|
-
max_attr, max_gain = inf_gain.max_by{|v, g| g}
|
81
|
-
self.next_attribute = max_attr
|
82
|
-
@children += self.tree.dataset.column_values(max_attr).map do |value|
|
83
|
-
conditions = self.full_conditions
|
84
|
-
DecisionTreeNode.new(
|
85
|
-
tree=@tree, parent=self,
|
86
|
-
attribute=max_attr, value=value,
|
87
|
-
entropy=entropies[max_attr], conditions=conditions
|
88
|
-
)
|
89
|
-
end
|
90
|
-
if recursive
|
91
|
-
@children.each do |c|
|
92
|
-
c.build_subtree
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
# Checks whether any of three is true:
|
98
|
-
# - All elements in the subset belong to the same class value: a leaf with
|
99
|
-
# this value is created.
|
100
|
-
# - There are no more attributes to be selected: a leaf with the most common
|
101
|
-
# class value is selected
|
102
|
-
# - There are no more rows in the dataset: a leaf with the most common class
|
103
|
-
# value in the parent node is created.
|
104
|
-
def try_finish
|
105
|
-
var = self.tree.class_var
|
106
|
-
val = (
|
107
|
-
self.try_finish_single_value_class or
|
108
|
-
self.try_finish_empty_subset or
|
109
|
-
self.try_finish_no_more_attributes
|
110
|
-
)
|
111
|
-
if val
|
112
|
-
@next_attribute = @tree.class_var
|
113
|
-
self.children << DecisionTreeNode.new(
|
114
|
-
tree=@tree, parent=self,
|
115
|
-
variable=@tree.class_var, value=val,
|
116
|
-
entropy=0, conditions=self.full_conditions
|
117
|
-
)
|
118
|
-
else
|
119
|
-
false
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
# If all class values are the same, returns that value; else, nil.
|
124
|
-
def try_finish_single_value_class
|
125
|
-
if self.subset.any?
|
126
|
-
v0 = self.subset[0][@tree.class_var]
|
127
|
-
self.subset.slice(1...-1).reduce(v0) do |memo, row|
|
128
|
-
if memo != row[@tree.class_var]
|
129
|
-
return nil
|
130
|
-
else
|
131
|
-
memo
|
132
|
-
end
|
133
|
-
end
|
134
|
-
else
|
135
|
-
nil
|
136
|
-
end
|
137
|
-
|
138
|
-
end
|
139
|
-
|
140
|
-
# If there are not more attributes, returns the most common class value;
|
141
|
-
# else, nil.
|
142
|
-
def try_finish_no_more_attributes
|
143
|
-
if self.pending_attrs.empty? then
|
144
|
-
self.most_common_value
|
145
|
-
else
|
146
|
-
nil
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
# If the subset is empty, returns the most common value in the parent
|
151
|
-
# node's subset.
|
152
|
-
def try_finish_empty_subset
|
153
|
-
if self.subset.empty?
|
154
|
-
@parent.most_common_value
|
155
|
-
else
|
156
|
-
nil
|
157
|
-
end
|
158
|
-
end
|
159
|
-
|
160
|
-
# Returns the most common class value in the dataset.
|
161
|
-
def most_common_value
|
162
|
-
class_var = @tree.class_var
|
163
|
-
class_values = @tree.dataset.column_values(class_var)
|
164
|
-
count = class_values.each.with_object({}) do |val, o|
|
165
|
-
o[val] = 0
|
166
|
-
end
|
167
|
-
self.subset.each.with_object(count) do |row, o|
|
168
|
-
count[row[class_var]] += 1
|
169
|
-
end
|
170
|
-
count.max_by{|v, c| c}[0]
|
171
|
-
end
|
172
|
-
|
173
|
-
# Returns a hash of {attribute, entropy} given that we divide the dataset
|
174
|
-
# on attribute.
|
175
|
-
def entropies
|
176
|
-
self.pending_attrs.each.with_object({}) do |a, o|
|
177
|
-
values = @tree.dataset.column_values(a)
|
178
|
-
val_probabilities = values.each.with_object({}) do |v, o|
|
179
|
-
o[v] = subset.probability a, v
|
180
|
-
end
|
181
|
-
val_entropies = values.each.with_object({}) do |v, o|
|
182
|
-
o[v] = subset.subset({a => v}).entropy(self.tree.class_var)
|
183
|
-
end
|
184
|
-
o[a] = values.reduce(0) do |memo, v|
|
185
|
-
memo + val_entropies[v] * val_probabilities[v]
|
186
|
-
end
|
187
|
-
end
|
188
|
-
end
|
189
192
|
end
|
data/lib/baobab/shannon.rb
CHANGED
@@ -1,13 +1,16 @@
|
|
1
|
+
module Baobab
|
1
2
|
|
2
|
-
module Shannon
|
3
|
+
module Shannon
|
3
4
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
def self.entropy *probabilities
|
6
|
+
probabilities.reduce(0) do |memo, p|
|
7
|
+
if p.zero? then 0 else memo + self.entropy_term(p) end
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.entropy_term probability
|
12
|
+
- probability * Math::log2(probability)
|
13
|
+
end
|
14
|
+
end
|
9
15
|
|
10
|
-
def self.entropy_term probability
|
11
|
-
- probability * Math::log2(probability)
|
12
|
-
end
|
13
16
|
end
|
data/lib/baobab/tree.rb
CHANGED
@@ -1,64 +1,68 @@
|
|
1
1
|
require 'json'
|
2
2
|
require 'set'
|
3
3
|
|
4
|
-
|
4
|
+
module Baobab
|
5
5
|
|
6
|
-
|
7
|
-
attr_reader :root
|
6
|
+
class DecisionTree
|
8
7
|
|
9
|
-
|
10
|
-
|
8
|
+
# The first decision tree node
|
9
|
+
attr_reader :root
|
11
10
|
|
12
|
-
|
13
|
-
|
11
|
+
# The class variable of interest in this decision tree.
|
12
|
+
attr_reader :class_var
|
14
13
|
|
15
|
-
|
16
|
-
|
17
|
-
@class_var = class_var
|
18
|
-
entropy = dataset.entropy class_var
|
19
|
-
@root = DecisionTreeNode.new(
|
20
|
-
self, parent=nil,
|
21
|
-
attribute=nil, value=nil,
|
22
|
-
entropy
|
23
|
-
)
|
24
|
-
@root.build_subtree
|
25
|
-
end
|
14
|
+
# The underlying dataset
|
15
|
+
attr_reader :dataset
|
26
16
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
end
|
39
|
-
return s
|
40
|
-
end
|
17
|
+
def initialize dataset, class_var
|
18
|
+
@dataset = dataset
|
19
|
+
@class_var = class_var
|
20
|
+
entropy = dataset.entropy class_var
|
21
|
+
@root = DecisionTreeNode.new(
|
22
|
+
self, parent=nil,
|
23
|
+
attribute=nil, value=nil,
|
24
|
+
entropy
|
25
|
+
)
|
26
|
+
@root.build_subtree
|
27
|
+
end
|
41
28
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
29
|
+
# Prints the decision depth-first with the respective entropy values.
|
30
|
+
def to_s
|
31
|
+
s = ""
|
32
|
+
nodes = [[0, @root]]
|
33
|
+
while nodes.any?
|
34
|
+
l, n = nodes.last
|
35
|
+
nodes = nodes.slice(0...-1)
|
36
|
+
n.children.each do |c|
|
37
|
+
nodes << [l + 1, c]
|
38
|
+
end
|
39
|
+
s = s + "#{' ' * (l * 2)}#{n.to_s}\n"
|
40
|
+
end
|
41
|
+
return s
|
42
|
+
end
|
43
|
+
|
44
|
+
# Receives attributes and their values (they must be all defined).
|
45
|
+
# Returns the value of the predicted class value.
|
46
|
+
def query values
|
47
|
+
if values.keys != @dataset.attribute_names(@class_var)
|
48
|
+
raise "Query does not fit all variables"
|
49
|
+
end
|
50
|
+
node = @root
|
51
|
+
while node.variable != @class_var
|
52
|
+
if node.next_attribute
|
53
|
+
if node.children.count > 1
|
54
|
+
val = values[node.next_attribute]
|
55
|
+
node = node.children.select do |child|
|
56
|
+
child.value == val
|
57
|
+
end[0]
|
58
|
+
else
|
59
|
+
node = node.children[0]
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
node.value
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
63
67
|
|
64
68
|
end
|
metadata
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: baobab
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
5
|
-
prerelease:
|
4
|
+
version: 0.1.1
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Johnny E. Lee Othon
|
@@ -11,16 +10,7 @@ bindir: bin
|
|
11
10
|
cert_chain: []
|
12
11
|
date: 2015-03-22 00:00:00.000000000 Z
|
13
12
|
dependencies: []
|
14
|
-
description:
|
15
|
-
in Ruby\n\n## How to run the tests\n\n```\nrake tests\n```\n\n## Coming soon\n\nI
|
16
|
-
promise I'll make this an installable gem. One of these days.\n\n## Sources of the
|
17
|
-
datasets\n\nThe weather dataset has been adapted from the `weather.nominal.arff`
|
18
|
-
that comes shipped with [Weka](http://www.cs.waikato.ac.nz/ml/weka/).\n\nThe transportation
|
19
|
-
dataset was taken from the example data in [https://www.youtube.com/watch?v=wL9aogTuZw8](https://www.youtube.com/watch?v=wL9aogTuZw8).\n\nThe
|
20
|
-
breast cancer dataset is adapted from the `breast-cancer.arff` file that comes shipped
|
21
|
-
with Weka. It should be attributed to:\n\nMatjaz Zwitter & Milan Soklic (physicians)\nInstitute
|
22
|
-
of Oncology \nUniversity Medical Center\nLjubljana, Yugoslavia\nDonors: Ming Tan
|
23
|
-
and Jeff Schlimmer (Jeffrey.Schlimmer@a.gp.cs.cmu.edu)\nDate: 11 July 1988\n"
|
13
|
+
description:
|
24
14
|
email: jleeothon@gmail.com
|
25
15
|
executables: []
|
26
16
|
extensions: []
|
@@ -34,26 +24,25 @@ files:
|
|
34
24
|
homepage: https://github.com/jleeothon/baobab
|
35
25
|
licenses:
|
36
26
|
- MIT
|
27
|
+
metadata: {}
|
37
28
|
post_install_message:
|
38
29
|
rdoc_options: []
|
39
30
|
require_paths:
|
40
31
|
- lib
|
41
32
|
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
-
none: false
|
43
33
|
requirements:
|
44
|
-
- -
|
34
|
+
- - ">="
|
45
35
|
- !ruby/object:Gem::Version
|
46
36
|
version: '0'
|
47
37
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
48
|
-
none: false
|
49
38
|
requirements:
|
50
|
-
- -
|
39
|
+
- - ">="
|
51
40
|
- !ruby/object:Gem::Version
|
52
41
|
version: '0'
|
53
42
|
requirements: []
|
54
43
|
rubyforge_project:
|
55
|
-
rubygems_version:
|
44
|
+
rubygems_version: 2.4.6
|
56
45
|
signing_key:
|
57
|
-
specification_version:
|
46
|
+
specification_version: 4
|
58
47
|
summary: ID3 decision trees for machine learning in Ruby
|
59
48
|
test_files: []
|