decision-tree 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/decision-tree.rb +84 -20
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 618f5b362f73985a29b4c14122ee3f8759819e37
|
4
|
+
data.tar.gz: 6ce2d965ce54258bce09533ca4504a547d3d45d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cbe8ce578b1b98da49f7407cda609826d55216011d4634c0ea6782be63cd230a0eef4786b5497d2ca8aa71737bdc65a244308eaef348dcb28e0cd254c7e7b5ff
|
7
|
+
data.tar.gz: 06cc3773d1add266888dde7a37df1a5523342c4500811004e2d1a4f73617e53c6e817e3d4a4a05d5f670c67b20d535038c5cd0733da33ce273f6ae10657f6a79
|
data/lib/decision-tree.rb
CHANGED
@@ -74,20 +74,36 @@ module DecisionTree
|
|
74
74
|
|
75
75
|
@path << choose_best_feature(entries)
|
76
76
|
|
77
|
-
|
77
|
+
if @algorithm == 'id3'
|
78
|
+
build_child_nodes(entries)
|
79
|
+
elsif algorithm=='c45'
|
80
|
+
if feature_type=='num'
|
81
|
+
build_child_nodes_with_continuous_value(entries)
|
82
|
+
else
|
83
|
+
build_child_nodes(entries)
|
84
|
+
end
|
85
|
+
end
|
78
86
|
end
|
79
87
|
|
88
|
+
|
80
89
|
def feature_index
|
81
90
|
@path[-1]
|
82
91
|
end
|
83
92
|
|
84
93
|
|
85
94
|
def feature_name
|
86
|
-
@columns[ @path[-1] ]
|
95
|
+
@columns[ @path[-1] ].split(':')[0]
|
87
96
|
end
|
88
97
|
|
89
|
-
|
90
|
-
|
98
|
+
|
99
|
+
def feature_type
|
100
|
+
t = @columns[ @path[-1] ].split(':')[1]
|
101
|
+
t || 'string'
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
def to_pseudo_code(buff=nil,indent="")
|
106
|
+
buff = Array.new if buff.nil?
|
91
107
|
|
92
108
|
if @child_nodes.size==0
|
93
109
|
result = @labels.to_set.to_a
|
@@ -96,14 +112,27 @@ module DecisionTree
|
|
96
112
|
else
|
97
113
|
buff << "#{indent}return #{@labels}"
|
98
114
|
end
|
115
|
+
return buff
|
99
116
|
end
|
100
117
|
|
101
|
-
@
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
118
|
+
if @algorithm=='c45' and feature_type=='num'
|
119
|
+
sorted_nodes = @child_nodes.sort_by{|k,v| k.to_f }
|
120
|
+
sorted_nodes[1..-1].to_a.each do |feature_value,child_node|
|
121
|
+
buff << "#{indent}if(#{feature_name} >= #{feature_value}){"
|
122
|
+
child_node.to_pseudo_code(buff, indent+" " )
|
123
|
+
buff << "#{indent}}"
|
124
|
+
end
|
125
|
+
buff << "#{indent}else{"
|
126
|
+
sorted_nodes[0][1].to_pseudo_code(buff, indent+" " )
|
127
|
+
buff << "#{indent}}"
|
128
|
+
else
|
129
|
+
@child_nodes.each do |feature_value,child_node|
|
130
|
+
buff << "#{indent}if(#{feature_name} == #{feature_value}){"
|
131
|
+
child_node.to_pseudo_code(buff, indent+" " )
|
132
|
+
buff << "#{indent}}"
|
133
|
+
end
|
106
134
|
end
|
135
|
+
|
107
136
|
return buff
|
108
137
|
end
|
109
138
|
|
@@ -113,11 +142,24 @@ module DecisionTree
|
|
113
142
|
probability = Hash.new(0)
|
114
143
|
@labels.each{|k| probability[k] += 1 }
|
115
144
|
probability.each{|k,v| probability[k] = v / @labels.size.to_f }
|
116
|
-
return probability
|
145
|
+
return probability
|
117
146
|
else
|
118
|
-
|
119
|
-
|
120
|
-
|
147
|
+
if @algorithm=='c45' and feature_type=='num'
|
148
|
+
curr_value = vector[feature_index]
|
149
|
+
|
150
|
+
sorted_nodes = @child_nodes.sort_by{|k,v| k.to_f }
|
151
|
+
last_node = sorted_nodes[0][1]
|
152
|
+
sorted_nodes[1..-1].to_a.each do |feature_value,child_node|
|
153
|
+
break if curr_value.to_f < feature_value.to_f
|
154
|
+
last_node = child_node
|
155
|
+
end
|
156
|
+
|
157
|
+
return last_node.predict(vector,default)
|
158
|
+
else
|
159
|
+
feature_value = vector[feature_index]
|
160
|
+
return default if not @child_nodes.has_key?(feature_value)
|
161
|
+
return @child_nodes[feature_value].predict(vector,default)
|
162
|
+
end
|
121
163
|
end
|
122
164
|
end
|
123
165
|
|
@@ -130,21 +172,16 @@ module DecisionTree
|
|
130
172
|
@dimension.times do |i|
|
131
173
|
next if @path.include?(i)
|
132
174
|
child_entropy = entries.map{|x| x[:features][i]}.concitional_entropy_with(labels)
|
133
|
-
|
175
|
+
|
134
176
|
ig = if @algorithm=='id3'
|
135
177
|
@entropy - child_entropy
|
136
178
|
else# c45
|
137
|
-
a = (@entropy - child_entropy)
|
138
|
-
b = entries.map{|x| x[:features][i]}.entropy
|
139
|
-
# puts "@path=#{@path}"
|
140
|
-
# puts "i=#{i} @entropy=#{@entropy} child_entropy=#{child_entropy} a=#{a} b=#{b}"
|
141
179
|
gain = (@entropy - child_entropy) / entries.map{|x| x[:features][i]}.entropy
|
142
180
|
gain = 0 if gain.nan?
|
143
181
|
gain
|
144
182
|
end
|
145
183
|
|
146
184
|
max_ig = {index: i, ig: ig} if ig > max_ig[:ig]
|
147
|
-
# puts "max_ig=#{max_ig} ig=#{ig}"
|
148
185
|
end
|
149
186
|
return max_ig[:index]
|
150
187
|
end
|
@@ -162,5 +199,32 @@ module DecisionTree
|
|
162
199
|
@child_nodes[feature_value] = Node.new(child_entries, @columns, @algorithm, @dimension, self, feature_value, @path.dup)
|
163
200
|
end
|
164
201
|
end
|
202
|
+
|
203
|
+
|
204
|
+
def build_child_nodes_with_continuous_value(entries)
|
205
|
+
|
206
|
+
buff = Hash.new{|h,feature_value| h[feature_value] = Array.new}
|
207
|
+
sorted_entries = entries.sort_by{|e| e[:features][feature_index].to_f }
|
208
|
+
|
209
|
+
last_label = nil #sorted_entries[0][:label].to_s
|
210
|
+
last_value = nil # sorted_entries[0][:features][feature_index]
|
211
|
+
|
212
|
+
sorted_entries.each_with_index do |e, i|
|
213
|
+
|
214
|
+
feature_value = e[:features][feature_index]
|
215
|
+
|
216
|
+
if last_label != e[:label].to_s
|
217
|
+
last_value = feature_value.to_s
|
218
|
+
last_label = e[:label].to_s
|
219
|
+
end
|
220
|
+
|
221
|
+
buff[last_value] << e
|
222
|
+
end
|
223
|
+
|
224
|
+
buff.each do |feature_value,child_entries|
|
225
|
+
@child_nodes[feature_value] = Node.new(child_entries, @columns, @algorithm, @dimension, self, feature_value, @path.dup)
|
226
|
+
end
|
227
|
+
|
228
|
+
end
|
165
229
|
end
|
166
|
-
end
|
230
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: decision-tree
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ireullin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-04-
|
11
|
+
date: 2017-04-19 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: 'A decision tree library which implemented ID3 & C4.5 of algorithms '
|
14
14
|
email:
|