decision-tree 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/decision-tree.rb +84 -20
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6f70b8957300f4487b14eec4a2ba6461395899c8
4
- data.tar.gz: 62425aa46fa2199efafee60cfcb13f860fa76c54
3
+ metadata.gz: 618f5b362f73985a29b4c14122ee3f8759819e37
4
+ data.tar.gz: 6ce2d965ce54258bce09533ca4504a547d3d45d2
5
5
  SHA512:
6
- metadata.gz: e6c52758675d22c3d91493cdaaef808e791d53ad73097cbc582a869e68c977f4a8f5bef47c218ab581fed4f422c3e1605853597cf2e1c9fdeaceda6e5b65a7a2
7
- data.tar.gz: a1bafd5b22b0d54215ef74edfc5fbd82bc2c3c75ce78543e1ada9367ce504e711aa0510487fe08e31e1f3e369ce3eb8b2c7f6b20d8c3a4a3c7eed35873f392a0
6
+ metadata.gz: cbe8ce578b1b98da49f7407cda609826d55216011d4634c0ea6782be63cd230a0eef4786b5497d2ca8aa71737bdc65a244308eaef348dcb28e0cd254c7e7b5ff
7
+ data.tar.gz: 06cc3773d1add266888dde7a37df1a5523342c4500811004e2d1a4f73617e53c6e817e3d4a4a05d5f670c67b20d535038c5cd0733da33ce273f6ae10657f6a79
@@ -74,20 +74,36 @@ module DecisionTree
74
74
 
75
75
  @path << choose_best_feature(entries)
76
76
 
77
- build_child_nodes(entries)
77
+ if @algorithm == 'id3'
78
+ build_child_nodes(entries)
79
+ elsif algorithm=='c45'
80
+ if feature_type=='num'
81
+ build_child_nodes_with_continuous_value(entries)
82
+ else
83
+ build_child_nodes(entries)
84
+ end
85
+ end
78
86
  end
79
87
 
88
+
80
89
  def feature_index
81
90
  @path[-1]
82
91
  end
83
92
 
84
93
 
85
94
  def feature_name
86
- @columns[ @path[-1] ]
95
+ @columns[ @path[-1] ].split(':')[0]
87
96
  end
88
97
 
89
- def to_pseudo_code(buff=nil,indent="")
90
- buff = Array.new if buff.nil?
98
+
99
+ def feature_type
100
+ t = @columns[ @path[-1] ].split(':')[1]
101
+ t || 'string'
102
+ end
103
+
104
+
105
+ def to_pseudo_code(buff=nil,indent="")
106
+ buff = Array.new if buff.nil?
91
107
 
92
108
  if @child_nodes.size==0
93
109
  result = @labels.to_set.to_a
@@ -96,14 +112,27 @@ module DecisionTree
96
112
  else
97
113
  buff << "#{indent}return #{@labels}"
98
114
  end
115
+ return buff
99
116
  end
100
117
 
101
- @child_nodes.each do |feature_value,child_node|
102
- buff << "#{indent}if(#{feature_name} == #{feature_value}){"
103
- # buff << "#{indent}if(#{feature_index} == #{feature_value}){"
104
- child_node.to_pseudo_code(buff, indent+" " )
105
- buff << "#{indent}}"
118
+ if @algorithm=='c45' and feature_type=='num'
119
+ sorted_nodes = @child_nodes.sort_by{|k,v| k.to_f }
120
+ sorted_nodes[1..-1].to_a.each do |feature_value,child_node|
121
+ buff << "#{indent}if(#{feature_name} >= #{feature_value}){"
122
+ child_node.to_pseudo_code(buff, indent+" " )
123
+ buff << "#{indent}}"
124
+ end
125
+ buff << "#{indent}else{"
126
+ sorted_nodes[0][1].to_pseudo_code(buff, indent+" " )
127
+ buff << "#{indent}}"
128
+ else
129
+ @child_nodes.each do |feature_value,child_node|
130
+ buff << "#{indent}if(#{feature_name} == #{feature_value}){"
131
+ child_node.to_pseudo_code(buff, indent+" " )
132
+ buff << "#{indent}}"
133
+ end
106
134
  end
135
+
107
136
  return buff
108
137
  end
109
138
 
@@ -113,11 +142,24 @@ module DecisionTree
113
142
  probability = Hash.new(0)
114
143
  @labels.each{|k| probability[k] += 1 }
115
144
  probability.each{|k,v| probability[k] = v / @labels.size.to_f }
116
- return probability.to_json
145
+ return probability
117
146
  else
118
- feature_value = vector[feature_index]
119
- return default if not @child_nodes.has_key?(feature_value)
120
- return @child_nodes[feature_value].predict(vector)
147
+ if @algorithm=='c45' and feature_type=='num'
148
+ curr_value = vector[feature_index]
149
+
150
+ sorted_nodes = @child_nodes.sort_by{|k,v| k.to_f }
151
+ last_node = sorted_nodes[0][1]
152
+ sorted_nodes[1..-1].to_a.each do |feature_value,child_node|
153
+ break if curr_value.to_f < feature_value.to_f
154
+ last_node = child_node
155
+ end
156
+
157
+ return last_node.predict(vector,default)
158
+ else
159
+ feature_value = vector[feature_index]
160
+ return default if not @child_nodes.has_key?(feature_value)
161
+ return @child_nodes[feature_value].predict(vector,default)
162
+ end
121
163
  end
122
164
  end
123
165
 
@@ -130,21 +172,16 @@ module DecisionTree
130
172
  @dimension.times do |i|
131
173
  next if @path.include?(i)
132
174
  child_entropy = entries.map{|x| x[:features][i]}.concitional_entropy_with(labels)
133
-
175
+
134
176
  ig = if @algorithm=='id3'
135
177
  @entropy - child_entropy
136
178
  else# c45
137
- a = (@entropy - child_entropy)
138
- b = entries.map{|x| x[:features][i]}.entropy
139
- # puts "@path=#{@path}"
140
- # puts "i=#{i} @entropy=#{@entropy} child_entropy=#{child_entropy} a=#{a} b=#{b}"
141
179
  gain = (@entropy - child_entropy) / entries.map{|x| x[:features][i]}.entropy
142
180
  gain = 0 if gain.nan?
143
181
  gain
144
182
  end
145
183
 
146
184
  max_ig = {index: i, ig: ig} if ig > max_ig[:ig]
147
- # puts "max_ig=#{max_ig} ig=#{ig}"
148
185
  end
149
186
  return max_ig[:index]
150
187
  end
@@ -162,5 +199,32 @@ module DecisionTree
162
199
  @child_nodes[feature_value] = Node.new(child_entries, @columns, @algorithm, @dimension, self, feature_value, @path.dup)
163
200
  end
164
201
  end
202
+
203
+
204
+ def build_child_nodes_with_continuous_value(entries)
205
+
206
+ buff = Hash.new{|h,feature_value| h[feature_value] = Array.new}
207
+ sorted_entries = entries.sort_by{|e| e[:features][feature_index].to_f }
208
+
209
+ last_label = nil #sorted_entries[0][:label].to_s
210
+ last_value = nil # sorted_entries[0][:features][feature_index]
211
+
212
+ sorted_entries.each_with_index do |e, i|
213
+
214
+ feature_value = e[:features][feature_index]
215
+
216
+ if last_label != e[:label].to_s
217
+ last_value = feature_value.to_s
218
+ last_label = e[:label].to_s
219
+ end
220
+
221
+ buff[last_value] << e
222
+ end
223
+
224
+ buff.each do |feature_value,child_entries|
225
+ @child_nodes[feature_value] = Node.new(child_entries, @columns, @algorithm, @dimension, self, feature_value, @path.dup)
226
+ end
227
+
228
+ end
165
229
  end
166
- end
230
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: decision-tree
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - ireullin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-04-17 00:00:00.000000000 Z
11
+ date: 2017-04-19 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: 'A decision tree library which implemented ID3 & C4.5 of algorithms '
14
14
  email: