decisiontree 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.txt +3 -0
- data/History.txt +0 -0
- data/Manifest.txt +18 -0
- data/README.txt +15 -0
- data/Rakefile +53 -0
- data/examples/continuous-id3.rb +33 -0
- data/examples/data/continuous-test.txt +13 -0
- data/examples/data/continuous-training.txt +133 -0
- data/examples/data/discrete-test.txt +4 -0
- data/examples/data/discrete-training.txt +21 -0
- data/examples/discrete-id3.rb +34 -0
- data/lib/decisiontree.rb +1 -0
- data/lib/decisiontree/id3_tree.rb +132 -0
- data/lib/decisiontree/version.rb +9 -0
- data/setup.rb +1585 -0
- data/test/test_decisiontree.rb +26 -0
- data/test/test_helper.rb +2 -0
- metadata +62 -0
data/CHANGELOG.txt
ADDED
data/History.txt
ADDED
File without changes
|
data/Manifest.txt
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
CHANGELOG.txt
|
2
|
+
History.txt
|
3
|
+
Manifest.txt
|
4
|
+
README.txt
|
5
|
+
Rakefile
|
6
|
+
|
7
|
+
lib/decisiontree.rb
|
8
|
+
lib/decisiontree/version.rb
|
9
|
+
lib/decisiontree/id3_tree.rb
|
10
|
+
examples/continuous-id3.rb
|
11
|
+
examples/discrete-id3.rb
|
12
|
+
examples/data/continuous-test.txt
|
13
|
+
examples/data/discrete-test.txt
|
14
|
+
examples/data/continuous-training.txt
|
15
|
+
examples/data/discrete-training.txt
|
16
|
+
setup.rb
|
17
|
+
test/test_helper.rb
|
18
|
+
test/test_decisiontree.rb
|
data/README.txt
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
README for decision_tree
|
2
|
+
========================
|
3
|
+
|
4
|
+
A ruby library which implements ID3 (information gain) algorithm for decision tree learning. Currently, continuous and discrete datasets can be learned.
|
5
|
+
|
6
|
+
- Discrete assumes unique labels, can be graphed and converted into a png for visual analysis
|
7
|
+
- Continuous looks at all possible values for a variable and iteratively chooses the best threshold between all possible assignments. This results in a binary tree which is partitioned by the threshold at every step. (e.g. temperate > 20C)
|
8
|
+
|
9
|
+
Currently, graphing works properly only for discrete cases due to a limitation in graphviz code.
|
10
|
+
|
11
|
+
Graphviz dependency: http://rockit.sourceforge.net/subprojects/graphr/
|
12
|
+
|
13
|
+
Enjoy.
|
14
|
+
|
15
|
+
Ilya Grigorik (ilya <at> fortehost DOT com)
|
data/Rakefile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/clean'
|
4
|
+
require 'rake/testtask'
|
5
|
+
require 'rake/packagetask'
|
6
|
+
require 'rake/gempackagetask'
|
7
|
+
require 'rake/rdoctask'
|
8
|
+
require 'rake/contrib/rubyforgepublisher'
|
9
|
+
require 'fileutils'
|
10
|
+
require 'hoe'
|
11
|
+
include FileUtils
|
12
|
+
require File.join(File.dirname(__FILE__), 'lib', 'decisiontree', 'version')
|
13
|
+
|
14
|
+
AUTHOR = "Ilya Grigorik"
|
15
|
+
EMAIL = "ilya <at> fortehost.com"
|
16
|
+
DESCRIPTION = "ID3-based implementation of the M.L. Decision Tree algorithm"
|
17
|
+
GEM_NAME = "decisiontree" # what ppl will type to install your gem
|
18
|
+
RUBYFORGE_PROJECT = "decisiontree" # The unix name for your project
|
19
|
+
HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
|
20
|
+
|
21
|
+
NAME = "decisiontree"
|
22
|
+
REV = nil # UNCOMMENT IF REQUIRED: File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
|
23
|
+
VERS = ENV['VERSION'] || (DecisionTree::VERSION::STRING + (REV ? ".#{REV}" : ""))
|
24
|
+
CLEAN.include ['**/.*.sw?', '*.gem', '.config']
|
25
|
+
RDOC_OPTS = ['--quiet', '--title', "decisiontree documentation",
|
26
|
+
"--opname", "index.html",
|
27
|
+
"--line-numbers",
|
28
|
+
"--main", "README",
|
29
|
+
"--inline-source"]
|
30
|
+
|
31
|
+
class Hoe
|
32
|
+
def extra_deps
|
33
|
+
@extra_deps.reject { |x| Array(x).first == 'hoe' }
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Generate all the Rake tasks
|
38
|
+
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
39
|
+
hoe = Hoe.new(GEM_NAME, VERS) do |p|
|
40
|
+
p.author = AUTHOR
|
41
|
+
p.description = DESCRIPTION
|
42
|
+
p.email = EMAIL
|
43
|
+
p.summary = DESCRIPTION
|
44
|
+
p.url = HOMEPATH
|
45
|
+
p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
|
46
|
+
p.test_globs = ["test/**/*_test.rb"]
|
47
|
+
p.clean_globs = CLEAN #An array of file patterns to delete on clean.
|
48
|
+
|
49
|
+
# == Optional
|
50
|
+
#p.changes - A description of the release's latest changes.
|
51
|
+
#p.extra_deps - An array of rubygem dependencies.
|
52
|
+
#p.spec_extras - A hash of extra values to set in the gemspec.
|
53
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'decisiontree'
|
3
|
+
include DecisionTree
|
4
|
+
|
5
|
+
# ---Continuous-----------------------------------------------------------------------------------------
|
6
|
+
|
7
|
+
# Read in the training data
|
8
|
+
training, attributes = [], nil
|
9
|
+
File.open('data/continuous-training.txt','r').each_line { |line|
|
10
|
+
data = line.strip.chomp('.').split(',')
|
11
|
+
attributes ||= data
|
12
|
+
training.push(data.collect {|v| (v == 'healthy') || (v == 'colic') ? (v == 'healthy' ? 1 : 0) : v.to_f})
|
13
|
+
}
|
14
|
+
|
15
|
+
# Remove the attribute row from the training data
|
16
|
+
training.shift
|
17
|
+
|
18
|
+
# Instantiate the tree, and train it based on the data (set default to '1')
|
19
|
+
dec_tree = ID3Tree.new(attributes, training, 1, :continuous)
|
20
|
+
dec_tree.train
|
21
|
+
|
22
|
+
#---- Test the tree....
|
23
|
+
|
24
|
+
# Read in the test cases
|
25
|
+
# Note: omit the attribute line (first line), we know the labels from the training data
|
26
|
+
test = []
|
27
|
+
File.open('data/continuous-test.txt','r').each_line { |line|
|
28
|
+
data = line.strip.chomp('.').split(',')
|
29
|
+
test.push(data.collect {|v| (v == 'healthy') || (v == 'colic') ? (v == 'healthy' ? 1 : 0) : v.to_f})
|
30
|
+
}
|
31
|
+
|
32
|
+
# Let the tree predict the output and compare it to the true specified value
|
33
|
+
test.each { |t| predict = dec_tree.predict(t); puts "Predict: #{predict} ... True: #{t.last}"}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
4.60000,139.00000,101.00000,28.80000,7.64000,13.80000,265.06000,1.50000,0.60000,60.00000,12.00000,40.00000,40.00000,3.52393,0.20000,17.61965,healthy.
|
2
|
+
4.30000,139.00000,101.00000,26.20000,3.61000,16.10000,518.74103,1.90000,0.01000,68.00000,12.00000,38.00000,36.00000,5.70834,0.20000,28.54170,healthy.
|
3
|
+
4.20000,139.00000,101.00000,29.20000,4.96000,13.00000,265.06000,2.10000,0.50000,62.00000,12.00000,39.00000,44.00000,3.44906,0.20000,17.24530,healthy.
|
4
|
+
4.40000,141.00000,103.00000,28.30000,12.65000,14.10000,197.60699,2.20000,0.10000,66.00000,12.00000,32.00000,44.00000,3.30135,0.20000,16.50675,healthy.
|
5
|
+
4.50000,136.00000,101.00000,26.10000,3.27000,13.40000,300.61499,1.40000,0.01000,68.00000,16.00000,33.00000,50.00000,6.94524,0.70000,9.92177,healthy.
|
6
|
+
4.30000,151.00000,112.00000,21.90000,42.66000,21.40000,613.52301,11.50000,172.89999,68.00000,26.00000,63.00000,92.00000,2.69917,0.50000,5.39834,colic.
|
7
|
+
3.00000,145.00000,103.00000,22.30000,83.93000,22.70000,476.97101,43.40000,139.50000,86.00000,60.00000,67.00000,68.00000,2.73668,0.20000,13.68340,colic.
|
8
|
+
3.40000,134.00000,98.00000,25.90000,90.15000,13.50000,265.06000,2.10000,1.30000,66.00000,20.00000,40.00000,52.00000,3.13565,0.50000,6.27130,colic.
|
9
|
+
2.90000,136.00000,92.00000,34.70000,5.81000,12.20000,243.71800,4.20000,22.80000,61.00000,20.00000,41.00000,48.00000,3.20928,0.20000,16.04640,colic.
|
10
|
+
3.80000,140.00000,99.00000,28.20000,88.92000,16.60000,695.82800,7.00000,2.60000,60.00000,28.00000,49.00000,80.00000,1.67106,0.50000,3.34212,colic.
|
11
|
+
3.70000,143.00000,105.00000,21.60000,93.67000,20.10000,265.06000,4.60000,38.80000,68.00000,16.00000,43.00000,48.00000,3.51757,0.50000,7.03514,colic.
|
12
|
+
3.70000,142.00000,103.00000,27.00000,100.24000,15.70000,386.71301,2.30000,0.01000,85.00000,40.00000,45.00000,48.00000,2.81077,0.50000,5.62154,colic.
|
13
|
+
3.20000,138.00000,99.00000,29.80000,80.77000,12.40000,224.11301,2.30000,3.90000,61.00000,24.00000,37.00000,40.00000,3.32568,0.50000,6.65136,colic.
|
@@ -0,0 +1,133 @@
|
|
1
|
+
K,Na,CL,HCO,Endotoxin,Aniongap,PLA2,SDH,GLDH,TPP,Breath rate,PCV,Pulse rate,Fibrinogen,Dimer,FibPerDim
|
2
|
+
4.60000,138.00000,102.00000,27.50000,3.45000,13.10000,420.62299,4.00000,1.00000,56.00000,10.00000,38.00000,48.00000,3.78216,0.20000,18.91080,healthy.
|
3
|
+
4.50000,141.00000,103.00000,26.50000,7.64000,16.00000,695.82800,0.70000,1.00000,72.00000,16.00000,37.00000,36.00000,4.86282,0.20000,24.31410,healthy.
|
4
|
+
4.60000,143.00000,104.00000,25.30000,3.04000,18.30000,243.71800,3.10000,0.40000,68.00000,20.00000,46.00000,52.00000,4.14486,0.20000,20.72430,healthy.
|
5
|
+
4.70000,140.00000,102.00000,27.60000,3.75000,15.10000,243.71800,3.10000,1.50000,66.00000,20.00000,32.00000,40.00000,4.11386,0.20000,20.56930,healthy.
|
6
|
+
4.50000,140.00000,101.00000,23.90000,4.12000,19.60000,233.71001,3.60000,6.90000,60.00000,12.00000,52.00000,48.00000,3.47588,0.20000,17.37940,healthy.
|
7
|
+
4.00000,139.00000,101.00000,29.30000,4.05000,12.70000,153.64301,1.60000,0.01000,55.00000,16.00000,41.00000,44.00000,3.63289,0.20000,18.16445,healthy.
|
8
|
+
3.20000,139.00000,98.00000,30.70000,101.18000,13.50000,564.12097,6.80000,16.40000,66.00000,56.00000,53.00000,80.00000,5.83544,1.00000,5.83544,colic.
|
9
|
+
3.20000,144.00000,105.00000,24.40000,51.15000,17.80000,386.71301,43.60000,471.60001,58.00000,20.00000,35.00000,48.00000,2.65903,0.50000,5.31806,colic.
|
10
|
+
3.90000,144.00000,99.00000,20.30000,94.45000,28.60000,1305.69495,16.60000,58.60000,64.00000,48.00000,75.00000,88.00000,1.86868,0.20000,9.34340,colic.
|
11
|
+
3.60000,134.00000,96.00000,26.30000,79.33000,15.30000,386.71301,4.50000,2.80000,48.00000,28.00000,35.00000,100.00000,3.86725,0.50000,7.73450,colic.
|
12
|
+
3.80000,148.00000,111.00000,23.90000,45.27000,16.90000,895.03497,1.60000,10.10000,84.00000,16.00000,55.00000,60.00000,4.58211,0.20000,22.91055,colic.
|
13
|
+
3.30000,140.00000,102.00000,20.90000,68.33000,20.40000,326.93799,2.00000,1.70000,84.00000,20.00000,46.00000,56.00000,3.57136,0.50000,7.14272,colic.
|
14
|
+
3.50000,140.00000,99.00000,25.10000,97.40000,19.40000,420.53101,5.40000,8.80000,94.00000,16.00000,53.00000,80.00000,4.02566,0.70000,5.75094,colic.
|
15
|
+
3.30000,137.00000,98.00000,30.80000,74.87000,11.50000,789.14801,168.60001,465.10001,60.00000,36.00000,40.00000,48.00000,5.79638,0.70000,8.28054,colic.
|
16
|
+
3.10000,126.00000,88.00000,27.90000,9.31000,13.20000,206.06100,2.10000,0.01000,70.00000,36.00000,37.00000,52.00000,5.55303,0.50000,11.10606,colic.
|
17
|
+
3.10000,138.00000,94.00000,39.80000,57.39000,7.30000,420.53101,3.80000,10.50000,68.00000,20.00000,46.00000,68.00000,2.45303,0.20000,12.26515,colic.
|
18
|
+
5.00000,136.00000,100.00000,31.40000,12.28000,9.60000,276.43900,4.90000,0.01000,58.00000,16.00000,40.00000,48.00000,4.00226,0.20000,20.01130,healthy.
|
19
|
+
3.60000,139.00000,100.00000,29.20000,7.25000,13.40000,288.27600,1.10000,1.10000,65.00000,12.00000,38.00000,48.00000,2.85107,0.20000,14.25535,healthy.
|
20
|
+
4.30000,142.00000,102.00000,29.90000,3.80000,14.40000,243.71800,3.00000,0.30000,67.00000,12.00000,44.00000,44.00000,3.87469,0.20000,19.37345,healthy.
|
21
|
+
4.60000,139.00000,100.00000,29.40000,2.40000,14.20000,288.27600,2.40000,2.10000,65.00000,16.00000,43.00000,52.00000,4.84979,0.20000,24.24895,healthy.
|
22
|
+
4.10000,136.00000,98.00000,28.40000,2.97000,13.70000,300.61499,2.00000,1.10000,62.00000,12.00000,43.00000,48.00000,5.19111,0.50000,10.38222,healthy.
|
23
|
+
4.20000,136.00000,98.00000,25.30000,2.93000,16.90000,224.11301,9.90000,0.70000,64.00000,16.00000,36.00000,52.00000,3.91034,0.20000,19.55170,healthy.
|
24
|
+
3.00000,132.00000,89.00000,29.40000,88.25000,16.60000,162.05200,3.40000,0.01000,52.00000,28.00000,45.00000,76.00000,1.64083,0.50000,3.28166,colic.
|
25
|
+
3.30000,139.00000,99.00000,25.70000,49.80000,17.60000,174.25400,0.90000,0.30000,62.00000,16.00000,38.00000,60.00000,3.20091,1.50000,2.13394,colic.
|
26
|
+
2.90000,138.00000,92.00000,24.80000,94.45000,24.10000,355.59201,9.20000,4.00000,51.00000,45.00000,44.00000,42.00000,2.42420,1.50000,1.61613,colic.
|
27
|
+
2.60000,131.00000,89.00000,26.50000,6.54000,18.10000,725.62500,4.70000,11.00000,80.00000,48.00000,43.00000,52.00000,4.10642,0.50000,8.21284,colic.
|
28
|
+
3.60000,135.00000,95.00000,26.70000,65.86000,16.90000,243.71800,4.80000,1.60000,58.00000,38.00000,50.00000,88.00000,2.92609,0.20000,14.63045,colic.
|
29
|
+
3.30000,147.00000,105.00000,28.00000,61.56000,17.30000,313.50201,3.70000,2.60000,75.00000,40.00000,48.00000,88.00000,3.60096,1.50000,2.40064,colic.
|
30
|
+
3.20000,142.00000,100.00000,26.70000,78.69000,18.50000,370.81000,42.90000,333.79999,80.00000,24.00000,55.00000,100.00000,4.53422,2.00000,2.26711,colic.
|
31
|
+
3.70000,136.00000,86.00000,25.30000,65.54000,28.40000,1103.97498,6.40000,4.80000,100.00000,20.00000,55.00000,132.00000,7.76240,1.00000,7.76240,colic.
|
32
|
+
3.30000,142.00000,99.00000,29.50000,82.42000,16.80000,420.53101,6.80000,40.70000,71.00000,28.00000,48.00000,72.00000,3.29344,0.50000,6.58688,colic.
|
33
|
+
3.30000,141.00000,99.00000,32.40000,87.43000,12.90000,326.93799,3.00000,1.50000,47.00000,36.00000,48.00000,48.00000,3.24353,0.20000,16.21765,colic.
|
34
|
+
3.10000,146.00000,103.00000,26.10000,79.08000,20.00000,476.97101,3.50000,1.20000,78.00000,24.00000,54.00000,80.00000,3.76666,0.50000,7.53332,colic.
|
35
|
+
4.10000,138.00000,101.00000,27.30000,8.01000,13.80000,147.29100,6.30000,5.20000,67.00000,10.00000,43.00000,40.00000,3.68016,0.20000,18.40080,healthy.
|
36
|
+
4.10000,136.00000,98.00000,28.50000,6.15000,13.60000,174.25400,2.10000,1.30000,60.00000,8.00000,35.00000,40.00000,1.94448,0.20000,9.72240,healthy.
|
37
|
+
4.50000,136.00000,99.00000,26.80000,5.08000,14.70000,189.47200,2.00000,0.60000,55.00000,12.00000,35.00000,44.00000,3.67257,0.20000,18.36285,healthy.
|
38
|
+
3.50000,142.00000,105.00000,22.20000,6.77000,18.30000,276.43900,3.40000,1.20000,64.00000,10.00000,39.00000,48.00000,3.45945,0.20000,17.29725,healthy.
|
39
|
+
3.90000,140.00000,101.00000,28.50000,3.61000,14.40000,340.96799,0.20000,0.01000,61.00000,12.00000,37.00000,48.00000,2.51116,0.20000,12.55580,healthy.
|
40
|
+
3.60000,145.00000,106.00000,27.50000,89.65000,15.10000,224.11301,2.80000,1.20000,78.00000,60.00000,48.00000,80.00000,2.42001,0.20000,12.10005,colic.
|
41
|
+
3.50000,136.00000,98.00000,25.40000,22.39000,16.10000,1420.03601,3.60000,0.80000,60.00000,20.00000,21.00000,56.00000,9.81956,4.00000,2.45489,colic.
|
42
|
+
3.60000,140.00000,98.00000,19.50000,99.57000,26.10000,789.14801,36.10000,293.20001,73.00000,48.00000,64.00000,100.00000,2.24781,2.00000,1.12390,colic.
|
43
|
+
3.60000,131.00000,92.00000,22.60000,76.04000,20.00000,564.12097,3.70000,4.70000,48.00000,56.00000,38.00000,120.00000,3.33932,0.50000,6.67864,colic.
|
44
|
+
3.50000,144.00000,104.00000,18.90000,64.19000,24.60000,1149.99500,4.80000,3.10000,60.00000,28.00000,40.00000,80.00000,4.12378,0.70000,5.89111,colic.
|
45
|
+
2.90000,142.00000,100.00000,30.00000,49.20000,14.90000,497.39899,2.50000,0.01000,74.00000,40.00000,52.00000,64.00000,3.21284,0.50000,6.42568,colic.
|
46
|
+
3.60000,138.00000,99.00000,24.40000,50.32000,18.20000,1610.51404,14.20000,1.30000,66.00000,20.00000,37.00000,60.00000,6.60548,2.00000,3.30274,colic.
|
47
|
+
3.40000,137.00000,93.00000,24.40000,6.29000,23.00000,4227.66113,43.60000,3.00000,71.00000,36.00000,60.00000,72.00000,5.17514,6.00000,0.86252,colic.
|
48
|
+
3.50000,144.00000,100.00000,32.50000,51.49000,15.00000,129.87900,7.90000,83.00000,61.00000,36.00000,44.00000,84.00000,3.42922,0.20000,17.14610,colic.
|
49
|
+
3.10000,136.00000,98.00000,23.40000,5.97000,17.70000,243.71800,2.10000,2.70000,66.00000,28.00000,45.00000,52.00000,2.84968,0.20000,14.24840,colic.
|
50
|
+
4.50000,137.00000,100.00000,27.20000,11.48000,14.30000,181.70300,2.00000,3.60000,62.00000,8.00000,38.00000,52.00000,4.01342,0.20000,20.06710,healthy.
|
51
|
+
4.20000,141.00000,103.00000,29.10000,3.77000,13.10000,288.27600,6.70000,5.60000,64.00000,8.00000,42.00000,40.00000,4.20329,0.20000,21.01645,healthy.
|
52
|
+
4.20000,138.00000,101.00000,28.30000,6.22000,12.90000,288.27600,5.40000,2.10000,65.00000,12.00000,43.00000,44.00000,5.08152,0.20000,25.40760,healthy.
|
53
|
+
4.50000,137.00000,101.00000,27.40000,6.68000,13.10000,167.07899,2.10000,1.10000,60.00000,16.00000,38.00000,48.00000,3.25795,0.20000,16.28975,healthy.
|
54
|
+
4.00000,141.00000,102.00000,27.20000,12.44000,15.80000,338.17999,3.40000,3.10000,72.00000,12.00000,33.00000,48.00000,4.98961,0.20000,24.94805,healthy.
|
55
|
+
4.20000,138.00000,96.00000,23.70000,51.83000,22.50000,355.59201,2.70000,4.20000,60.00000,20.00000,39.00000,100.00000,3.61817,0.50000,7.23634,colic.
|
56
|
+
3.60000,141.00000,101.00000,28.60000,97.70000,15.00000,667.21997,5.00000,3.70000,70.00000,12.00000,48.00000,60.00000,3.13410,1.00000,3.13410,colic.
|
57
|
+
3.20000,137.00000,100.00000,24.40000,71.53000,15.80000,224.11301,2.40000,2.20000,79.00000,28.00000,42.00000,60.00000,3.92367,1.00000,3.92367,colic.
|
58
|
+
3.50000,141.00000,102.00000,27.40000,51.93000,15.10000,1015.08801,3.10000,0.80000,62.00000,72.00000,54.00000,88.00000,2.50883,0.20000,12.54415,colic.
|
59
|
+
4.20000,143.00000,106.00000,24.00000,5.31000,17.20000,265.06000,8.00000,32.90000,77.00000,16.00000,38.00000,40.00000,3.98583,1.00000,3.98583,colic.
|
60
|
+
3.20000,138.00000,97.00000,25.00000,8.76000,19.20000,288.27600,5.40000,3.10000,70.00000,12.00000,47.00000,88.00000,5.01596,1.00000,5.01596,colic.
|
61
|
+
4.10000,132.00000,91.00000,28.60000,19.74000,16.50000,639.79999,6.70000,0.01000,78.00000,24.00000,38.00000,112.00000,8.94970,6.00000,1.49162,colic.
|
62
|
+
6.00000,140.00000,97.00000,32.20000,48.15000,16.80000,153.64301,17.00000,52.60000,48.00000,40.00000,67.00000,80.00000,2.18364,1.50000,1.45576,colic.
|
63
|
+
3.10000,138.00000,95.00000,29.30000,10.98000,16.80000,822.96600,3.90000,0.60000,58.00000,36.00000,36.00000,48.00000,2.52015,0.50000,5.04030,colic.
|
64
|
+
3.70000,144.00000,107.00000,25.40000,85.30000,15.30000,457.36600,3.10000,1.10000,66.00000,24.00000,48.00000,60.00000,2.81775,0.50000,5.63550,colic.
|
65
|
+
4.20000,139.00000,100.00000,29.40000,2.33000,13.80000,233.71001,3.40000,0.90000,64.00000,12.00000,40.00000,44.00000,3.78293,0.20000,18.91465,healthy.
|
66
|
+
4.20000,144.00000,107.00000,23.90000,7.87000,17.30000,300.61499,5.90000,16.40000,68.00000,20.00000,48.00000,48.00000,4.42355,0.20000,22.11775,healthy.
|
67
|
+
4.10000,139.00000,100.00000,28.60000,4.12000,14.50000,170.78101,0.70000,0.01000,60.00000,10.00000,43.00000,32.00000,3.22927,0.20000,16.14635,healthy.
|
68
|
+
4.70000,136.00000,99.00000,28.60000,10.43000,13.10000,288.27600,1.70000,0.20000,62.00000,8.00000,35.00000,40.00000,4.18454,0.20000,20.92270,healthy.
|
69
|
+
3.70000,140.00000,102.00000,28.20000,6.57000,13.50000,174.25400,3.20000,2.10000,60.00000,10.00000,39.00000,44.00000,3.40799,0.20000,17.03995,healthy.
|
70
|
+
3.70000,142.00000,101.00000,30.60000,94.68000,14.10000,300.61499,1.90000,0.10000,58.00000,32.00000,40.00000,80.00000,2.66538,0.20000,13.32690,colic.
|
71
|
+
3.00000,135.00000,95.00000,27.30000,8.19000,15.70000,265.06000,2.30000,0.01000,60.00000,40.00000,37.00000,48.00000,2.96841,0.20000,14.84205,colic.
|
72
|
+
2.70000,143.00000,96.00000,24.60000,83.61000,25.10000,386.71301,6.50000,3.80000,62.00000,28.00000,33.00000,52.00000,3.44921,0.50000,6.89842,colic.
|
73
|
+
4.00000,140.00000,103.00000,20.30000,99.16000,20.70000,300.61499,3.50000,1.70000,64.00000,24.00000,44.00000,64.00000,3.75317,0.20000,18.76585,colic.
|
74
|
+
3.50000,130.00000,93.00000,29.90000,4.35000,10.60000,265.06000,1.90000,0.70000,70.00000,20.00000,42.00000,52.00000,5.66107,0.50000,11.32214,colic.
|
75
|
+
3.10000,139.00000,96.00000,30.80000,20.02000,15.30000,167.07899,3.30000,1.80000,58.00000,20.00000,44.00000,72.00000,3.30615,0.20000,16.53075,colic.
|
76
|
+
3.00000,137.00000,91.00000,14.80000,7.32000,34.20000,181.70300,20.10000,1.70000,61.00000,16.00000,59.00000,72.00000,4.94729,0.50000,9.89458,colic.
|
77
|
+
3.70000,138.00000,99.00000,29.10000,97.72000,13.60000,214.92700,1.50000,0.01000,58.00000,20.00000,35.00000,56.00000,2.61113,0.20000,13.05565,colic.
|
78
|
+
4.00000,137.00000,98.00000,27.50000,56.43000,15.50000,243.71800,3.70000,0.90000,62.00000,16.00000,38.00000,60.00000,4.75695,0.50000,9.51390,colic.
|
79
|
+
3.20000,139.00000,98.00000,30.00000,76.75000,14.20000,276.43900,2.40000,0.01000,61.00000,60.00000,47.00000,72.00000,2.74397,0.20000,13.71985,colic.
|
80
|
+
4.50000,141.00000,103.00000,27.40000,9.08000,15.10000,457.36600,4.60000,5.50000,70.00000,8.00000,39.00000,32.00000,3.92956,0.20000,19.64780,healthy.
|
81
|
+
3.90000,134.00000,98.00000,25.10000,5.35000,14.80000,695.82800,1.90000,0.01000,72.00000,16.00000,33.00000,48.00000,8.01149,0.70000,11.44499,healthy.
|
82
|
+
3.90000,138.00000,102.00000,25.90000,4.05000,14.00000,564.12097,5.70000,5.50000,70.00000,10.00000,41.00000,40.00000,5.33758,0.20000,26.68790,healthy.
|
83
|
+
3.90000,141.00000,103.00000,25.20000,7.55000,16.70000,153.64301,2.90000,7.90000,70.00000,16.00000,34.00000,48.00000,3.46906,0.50000,6.93812,healthy.
|
84
|
+
4.60000,137.00000,101.00000,24.70000,3.18000,15.90000,206.06100,1.40000,1.10000,70.00000,10.00000,38.00000,40.00000,5.13267,0.20000,25.66335,healthy.
|
85
|
+
3.50000,131.00000,92.00000,30.70000,14.41000,11.80000,420.53101,3.30000,1.10000,64.00000,16.00000,41.00000,48.00000,2.23278,0.20000,11.16390,colic.
|
86
|
+
3.80000,141.00000,100.00000,29.20000,82.01000,15.60000,233.71001,2.20000,0.70000,62.00000,14.00000,33.00000,52.00000,4.07480,0.50000,8.14960,colic.
|
87
|
+
4.40000,140.00000,98.00000,24.10000,82.76000,22.30000,403.25699,2.80000,2.00000,60.00000,32.00000,62.00000,112.00000,2.15636,0.50000,4.31272,colic.
|
88
|
+
3.60000,144.00000,97.00000,19.90000,38.61000,30.70000,822.96600,10.60000,6.20000,80.00000,24.00000,62.00000,64.00000,3.64002,1.00000,3.64002,colic.
|
89
|
+
3.30000,144.00000,101.00000,28.90000,61.44000,17.40000,476.97101,28.90000,138.60001,89.00000,16.00000,54.00000,80.00000,5.20165,1.00000,5.20165,colic.
|
90
|
+
3.80000,136.00000,98.00000,23.90000,87.61000,17.90000,318.07199,6.10000,7.70000,100.00000,28.00000,54.00000,92.00000,3.27562,1.00000,3.27562,colic.
|
91
|
+
4.00000,139.00000,99.00000,26.00000,46.76000,18.00000,476.97101,5.30000,6.50000,73.00000,36.00000,37.00000,82.00000,3.37621,0.50000,6.75242,colic.
|
92
|
+
3.00000,141.00000,99.00000,32.10000,97.13000,12.90000,420.53101,2.90000,1.80000,73.00000,12.00000,28.00000,80.00000,3.37575,0.70000,4.82250,colic.
|
93
|
+
3.50000,145.00000,93.00000,20.00000,86.12000,35.50000,895.03497,5.70000,5.60000,80.00000,34.00000,65.00000,88.00000,2.57734,0.50000,5.15468,colic.
|
94
|
+
4.00000,137.00000,99.00000,29.70000,4.71000,12.30000,403.25699,2.40000,1.20000,56.00000,12.00000,37.00000,44.00000,3.37110,0.20000,16.85550,healthy.
|
95
|
+
4.20000,140.00000,103.00000,25.60000,4.80000,15.60000,386.71301,2.60000,3.50000,54.00000,12.00000,33.00000,40.00000,2.99693,0.20000,14.98465,healthy.
|
96
|
+
4.70000,139.00000,101.00000,27.40000,6.95000,15.30000,197.60699,1.30000,0.30000,58.00000,12.00000,37.00000,44.00000,2.50155,0.50000,5.00310,healthy.
|
97
|
+
5.20000,138.00000,99.00000,28.00000,4.46000,16.20000,340.96799,3.10000,2.70000,55.00000,12.00000,35.00000,56.00000,4.22825,0.20000,21.14125,healthy.
|
98
|
+
4.50000,137.00000,98.00000,26.40000,2.49000,17.10000,197.60699,14.10000,9.00000,54.00000,12.00000,42.00000,56.00000,3.47526,0.20000,17.37630,healthy.
|
99
|
+
4.40000,138.00000,101.00000,20.10000,65.74000,21.30000,476.97101,14.00000,88.60000,72.00000,14.00000,43.00000,82.00000,2.78303,0.50000,5.56606,colic.
|
100
|
+
3.80000,143.00000,101.00000,29.20000,100.22000,16.60000,313.50201,4.30000,26.50000,67.00000,20.00000,63.00000,80.00000,3.35963,1.00000,3.35963,colic.
|
101
|
+
3.50000,142.00000,101.00000,29.10000,73.95000,15.40000,386.71301,5.10000,4.30000,65.00000,28.00000,41.00000,56.00000,4.12300,0.20000,20.61500,colic.
|
102
|
+
4.30000,141.00000,104.00000,23.10000,82.72000,18.20000,386.71301,4.90000,1.60000,72.00000,36.00000,45.00000,92.00000,3.47479,0.50000,6.94958,colic.
|
103
|
+
3.60000,135.00000,98.00000,30.10000,83.79000,10.50000,254.18300,1.50000,0.01000,58.00000,20.00000,41.00000,48.00000,2.64120,0.50000,5.28240,colic.
|
104
|
+
2.80000,140.00000,101.00000,26.90000,31.25000,14.90000,463.62701,4.30000,3.80000,46.00000,28.00000,48.00000,64.00000,4.19771,0.50000,8.39542,colic.
|
105
|
+
3.30000,140.00000,99.00000,32.70000,97.22000,11.60000,300.61499,3.70000,3.40000,58.00000,24.00000,34.00000,44.00000,2.04600,0.70000,2.92286,colic.
|
106
|
+
3.10000,146.00000,103.00000,21.60000,83.65000,24.50000,288.27600,4.30000,3.50000,82.00000,32.00000,46.00000,64.00000,3.65040,0.50000,7.30080,colic.
|
107
|
+
4.10000,139.00000,102.00000,24.20000,88.23000,16.90000,214.92700,1.80000,0.01000,63.00000,12.00000,40.00000,42.00000,2.97430,0.20000,14.87150,colic.
|
108
|
+
4.50000,139.00000,100.00000,29.20000,6.04000,14.30000,210.72301,2.00000,0.20000,68.00000,10.00000,40.00000,40.00000,3.52393,0.50000,7.04786,healthy.
|
109
|
+
4.20000,130.00000,102.00000,27.90000,6.68000,4.30000,386.71301,1.90000,1.10000,56.00000,14.00000,37.00000,48.00000,4.05697,0.20000,20.28485,healthy.
|
110
|
+
5.30000,137.00000,99.00000,25.80000,4.35000,17.50000,276.43900,1.90000,0.60000,62.00000,16.00000,40.00000,52.00000,5.01906,0.50000,10.03812,healthy.
|
111
|
+
4.40000,135.00000,100.00000,25.10000,2.77000,14.30000,197.60699,0.60000,1.60000,60.00000,16.00000,36.00000,36.00000,3.56702,0.20000,17.83510,healthy.
|
112
|
+
2.90000,129.00000,86.00000,27.30000,82.85000,18.60000,756.74597,5.40000,29.50000,79.00000,16.00000,43.00000,84.00000,2.38374,1.00000,2.38374,colic.
|
113
|
+
3.40000,139.00000,98.00000,29.80000,54.42000,14.60000,695.82800,5.50000,30.00000,52.00000,24.00000,35.00000,52.00000,1.95393,0.50000,3.90786,colic.
|
114
|
+
3.30000,137.00000,96.00000,30.50000,53.76000,13.80000,233.71001,7.20000,28.90000,55.00000,24.00000,30.00000,100.00000,2.11327,0.20000,10.56635,colic.
|
115
|
+
2.50000,127.00000,88.00000,17.80000,88.37000,23.70000,588.29602,3.90000,3.20000,70.00000,24.00000,54.00000,88.00000,3.32398,2.00000,1.66199,colic.
|
116
|
+
3.30000,146.00000,97.00000,23.10000,70.02000,29.20000,1420.03601,42.70000,327.50000,70.00000,28.00000,68.00000,68.00000,2.19294,3.00000,0.73098,colic.
|
117
|
+
3.80000,140.00000,100.00000,26.70000,92.83000,17.10000,457.36600,4.60000,2.10000,61.00000,32.00000,38.00000,76.00000,2.07359,1.50000,1.38239,colic.
|
118
|
+
3.30000,134.00000,95.00000,31.60000,73.63000,10.70000,224.11301,3.30000,1.70000,62.00000,20.00000,37.00000,56.00000,3.68947,0.50000,7.37894,colic.
|
119
|
+
3.30000,140.00000,99.00000,29.60000,88.66000,14.70000,233.71001,1.60000,2.40000,74.00000,40.00000,38.00000,52.00000,2.76427,1.00000,2.76427,colic.
|
120
|
+
2.80000,145.00000,101.00000,35.40000,31.96000,11.40000,243.71800,0.40000,0.70000,70.00000,20.00000,47.00000,84.00000,3.82587,0.20000,19.12935,colic.
|
121
|
+
4.40000,136.00000,98.00000,28.50000,8.69000,13.90000,725.62500,1.90000,1.50000,60.00000,16.00000,40.00000,52.00000,3.41419,0.20000,17.07095,healthy.
|
122
|
+
3.70000,140.00000,100.00000,29.80000,5.15000,13.90000,189.47200,2.30000,0.70000,78.00000,12.00000,42.00000,48.00000,3.33607,0.20000,16.68035,healthy.
|
123
|
+
4.60000,138.00000,100.00000,28.60000,9.79000,14.00000,224.11301,1.60000,2.00000,61.00000,16.00000,35.00000,40.00000,3.58624,0.20000,17.93120,healthy.
|
124
|
+
4.00000,138.00000,102.00000,25.90000,90.54000,14.10000,326.93799,0.40000,1.70000,70.00000,20.00000,48.00000,79.00000,3.34645,0.20000,16.73225,colic.
|
125
|
+
2.70000,132.00000,93.00000,29.30000,52.57000,12.40000,1058.59497,5.00000,8.00000,78.00000,28.00000,48.00000,76.00000,4.77013,0.50000,9.54026,colic.
|
126
|
+
3.40000,133.00000,95.00000,28.50000,64.71000,12.90000,276.43900,8.70000,43.70000,76.00000,16.00000,47.00000,76.00000,4.15168,0.20000,20.75840,colic.
|
127
|
+
3.00000,139.00000,93.00000,33.30000,96.88000,15.70000,224.11301,6.90000,3.30000,48.00000,80.00000,43.00000,56.00000,2.32748,0.20000,11.63740,colic.
|
128
|
+
2.80000,139.00000,101.00000,25.90000,71.32000,14.90000,676.35999,2.30000,0.30000,71.00000,16.00000,46.00000,52.00000,2.50558,0.20000,12.52790,colic.
|
129
|
+
2.80000,142.00000,97.00000,29.80000,53.21000,18.00000,160.22400,4.70000,5.10000,50.00000,60.00000,44.00000,88.00000,2.31710,0.70000,3.31014,colic.
|
130
|
+
3.50000,140.00000,102.00000,23.00000,87.86000,18.50000,189.47200,2.20000,0.90000,73.00000,24.00000,47.00000,96.00000,3.73721,0.50000,7.47442,colic.
|
131
|
+
3.00000,142.00000,100.00000,22.60000,93.17000,22.40000,355.59201,16.30000,124.10000,80.00000,24.00000,45.00000,68.00000,2.75668,0.70000,3.93811,colic.
|
132
|
+
3.30000,149.00000,110.00000,19.20000,96.46000,23.10000,667.21997,5.70000,0.20000,59.00000,16.00000,41.00000,54.00000,3.18324,0.20000,15.91620,colic.
|
133
|
+
3.50000,141.00000,96.00000,31.20000,11.00000,17.30000,214.92700,3.80000,1.70000,53.00000,48.00000,39.00000,64.00000,2.89664,0.70000,4.13806,colic.
|
@@ -0,0 +1,21 @@
|
|
1
|
+
Age,Education,Income,Marital Status
|
2
|
+
36 - 55,masters,high,single,will buy
|
3
|
+
18 - 35,high school,low,single,won't buy
|
4
|
+
36 - 55,masters,low,single,will buy
|
5
|
+
18 - 35,bachelors,high,single,won't buy
|
6
|
+
< 18,high school,low,single,will buy
|
7
|
+
18 - 35,bachelors,high,married,won't buy
|
8
|
+
36 - 55,bachelors,low,married,won't buy
|
9
|
+
> 55,bachelors,high,single,will buy
|
10
|
+
36 - 55,masters,low,married,won't buy
|
11
|
+
> 55,masters,low,married,will buy
|
12
|
+
36 - 55,masters,high,single,will buy
|
13
|
+
> 55,masters,high,single,will buy
|
14
|
+
< 18,high school,high,single,won't buy
|
15
|
+
36 - 55,masters,low,single,will buy
|
16
|
+
36 - 55,high school,low,single,will buy
|
17
|
+
< 18,high school,low,married,will buy
|
18
|
+
18 - 35,bachelors,high,married,won't buy
|
19
|
+
> 55,high school,high,married,will buy
|
20
|
+
> 55,bachelors,low,single,will buy
|
21
|
+
36 - 55,high school,high,married,won't buy
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'decisiontree'
|
3
|
+
|
4
|
+
# ---Discrete-----------------------------------------------------------------------------------------
|
5
|
+
|
6
|
+
# Read in the training data
|
7
|
+
training, attributes = [], nil
|
8
|
+
File.open('data/discrete-training.txt','r').each_line { |line|
|
9
|
+
data = line.strip.split(',')
|
10
|
+
attributes ||= data
|
11
|
+
training.push(data.collect {|v| (v == 'will buy') || (v == "won't buy") ? (v == 'will buy' ? 1 : 0) : v})
|
12
|
+
}
|
13
|
+
|
14
|
+
# Remove the attribute row from the training data
|
15
|
+
training.shift
|
16
|
+
|
17
|
+
# Instantiate the tree, and train it based on the data (set default to '1')
|
18
|
+
dec_tree = DecisionTree::ID3Tree.new(attributes, training, 1, :discrete)
|
19
|
+
dec_tree.train
|
20
|
+
|
21
|
+
#---- Test the tree....
|
22
|
+
|
23
|
+
# Read in the test cases
|
24
|
+
# Note: omit the attribute line (first line), we know the labels from the training data
|
25
|
+
test = []
|
26
|
+
File.open('data/discrete-test.txt','r').each_line { |line| data = line.strip.split(',')
|
27
|
+
test.push(data.collect {|v| (v == 'will buy') || (v == "won't buy") ? (v == 'will buy' ? 1 : 0) : v})
|
28
|
+
}
|
29
|
+
|
30
|
+
# Let the tree predict the output and compare it to the true specified value
|
31
|
+
test.each { |t| predict = dec_tree.predict(t); puts "Predict: #{predict} ... True: #{t.last}"; }
|
32
|
+
|
33
|
+
# Graph the tree, save to 'discrete.png'
|
34
|
+
dec_tree.graph("discrete")
|
data/lib/decisiontree.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Dir[File.join(File.dirname(__FILE__), 'decisiontree/**/*.rb')].sort.each { |lib| require lib }
|
@@ -0,0 +1,132 @@
|
|
1
|
+
#The MIT License
|
2
|
+
|
3
|
+
###Copyright (c) 2007 Ilya Grigorik <ilya AT fortehost DOT com>
|
4
|
+
|
5
|
+
begin;
|
6
|
+
require 'graph/graphviz_dot'
|
7
|
+
rescue LoadError
|
8
|
+
STDERR.puts "graph/graphviz_dot not installed, graphing functionality not included."
|
9
|
+
end
|
10
|
+
|
11
|
+
class Array
|
12
|
+
def classification; collect { |v| v.last }; end
|
13
|
+
def count_p; select { |v| v.last == 1 }.size; end
|
14
|
+
def count_n; select { |v| v.last == 0 }.size; end
|
15
|
+
end
|
16
|
+
|
17
|
+
module DecisionTree
|
18
|
+
class ID3Tree
|
19
|
+
Node = Struct.new(:attribute, :threshold, :gain)
|
20
|
+
def initialize(attributes, data, default, type)
|
21
|
+
@used, @tree, @type = {}, {}, type
|
22
|
+
@data, @attributes, @default = data, attributes, default
|
23
|
+
end
|
24
|
+
|
25
|
+
def train(data=@data, attributes=@attributes, default=@default)
|
26
|
+
# Choose a fitness algorithm
|
27
|
+
case @type
|
28
|
+
when :discrete; fitness = proc{|a,b,c| id3_discrete(a,b,c)}
|
29
|
+
when :continuous; fitness = proc{|a,b,c| id3_continuous(a,b,c)}
|
30
|
+
end
|
31
|
+
|
32
|
+
return default if data.empty?
|
33
|
+
# return classification if all examples have the same classification
|
34
|
+
return data.first.last if data.classification.uniq.size == 1
|
35
|
+
|
36
|
+
# Choose best attribute (1. enumerate all attributes / 2. Pick best attribute)
|
37
|
+
performance = attributes.collect { |attribute| fitness.call(data, attributes, attribute) }
|
38
|
+
max = performance.max { |a,b| a[0] <=> b[0] }
|
39
|
+
best = Node.new(attributes[performance.index(max)], max[1], max[0])
|
40
|
+
@used.has_key?(best.attribute) ? @used[best.attribute] += [best.threshold] : @used[best.attribute] = [best.threshold]
|
41
|
+
tree, l = {best => {}}, ['gt', 'lt']
|
42
|
+
|
43
|
+
case @type
|
44
|
+
when :continuous
|
45
|
+
data.partition { |d| d[attributes.index(best.attribute)] > best.threshold }.each_with_index { |examples, i|
|
46
|
+
tree[best][String.new(l[i])] = train(examples, attributes, (data.classification.mode rescue 0), &fitness)
|
47
|
+
}
|
48
|
+
when :discrete
|
49
|
+
values = data.collect { |d| d[attributes.index(best.attribute)] }.uniq.sort
|
50
|
+
partitions = values.collect { |val| data.select { |d| d[attributes.index(best.attribute)] == val } }
|
51
|
+
partitions.each_with_index { |examples, i|
|
52
|
+
tree[best][values[i]] = train(examples, attributes-[values[i]], (data.classification.mode rescue 0), &fitness)
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
@tree = tree
|
57
|
+
end
|
58
|
+
|
59
|
+
# ID3 for binary classification of continuous variables (e.g. healthy / sick based on temperature thresholds)
|
60
|
+
def id3_continuous(data, attributes, attribute)
|
61
|
+
values, thresholds = data.collect { |d| d[attributes.index(attribute)] }.uniq.sort, []
|
62
|
+
values.each_index { |i| thresholds.push((values[i]+(values[i+1].nil? ? values[i] : values[i+1])).to_f / 2) }
|
63
|
+
thresholds -= @used[attribute] if @used.has_key? attribute
|
64
|
+
|
65
|
+
gain = thresholds.collect { |threshold|
|
66
|
+
sp = data.partition { |d| d[attributes.index(attribute)] > threshold }
|
67
|
+
pos = (sp[0].size).to_f / data.size
|
68
|
+
neg = (sp[1].size).to_f / data.size
|
69
|
+
|
70
|
+
[entropy_num(data.count_p, data.count_n) - pos*entropy_num(sp[0].count_p, sp[0].count_n) - neg*entropy_num(sp[1].count_p, sp[1].count_n), threshold]
|
71
|
+
}.max { |a,b| a[0] <=> b[0] }
|
72
|
+
end
|
73
|
+
|
74
|
+
# ID3 for discrete label cases
|
75
|
+
def id3_discrete(data, attributes, attribute)
|
76
|
+
values = data.collect { |d| d[attributes.index(attribute)] }.uniq.sort
|
77
|
+
partitions = values.collect { |val| data.select { |d| d[attributes.index(attribute)] == val } }
|
78
|
+
remainder = partitions.collect {|p| (p.size.to_f / data.size) * entropy_num(p.count_p, p.count_n)}.inject(0) {|i,s| s+=i }
|
79
|
+
|
80
|
+
[entropy_num(data.count_p, data.count_n) - remainder, attributes.index(attribute)]
|
81
|
+
end
|
82
|
+
|
83
|
+
# calculate information based on number of positive and negative classifications
|
84
|
+
def entropy_num(p,n); entropy(p.to_f/(p+n),n.to_f/(p+n)); end
|
85
|
+
|
86
|
+
# calculate Information based on probabilities
|
87
|
+
def entropy(p, n)
|
88
|
+
p = 0 if p.nan?
|
89
|
+
n = 0 if n.nan?
|
90
|
+
|
91
|
+
if(n < 0.00000001 and p < 0.00000001); return 0
|
92
|
+
elsif (p < 0.00000001); return - n.to_f/(p+n)*Math.log(n.to_f/(p+n))/Math.log(2.0)
|
93
|
+
elsif (n < 0.00000001); return - p.to_f/(p+n)*Math.log(p.to_f/(p+n))/Math.log(2.0)
|
94
|
+
end
|
95
|
+
|
96
|
+
return (- p.to_f/(p+n)) * Math.log(p.to_f/(p+n))/Math.log(2.0) + (- n.to_f/(p+n)) * Math.log(n.to_f/(p+n))/Math.log(2.0)
|
97
|
+
end
|
98
|
+
|
99
|
+
def predict(test); @type == :discrete ? descend_discrete(@tree, test) : descend_continuous(@tree,test); end
|
100
|
+
|
101
|
+
def graph(filename)
|
102
|
+
dgp = DotGraphPrinter.new(build_tree)
|
103
|
+
dgp.write_to_file("#{filename}.png", "png")
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
def descend_continuous(tree, test)
|
108
|
+
attr = tree.to_a.first
|
109
|
+
return attr[1]['gt'] if attr[1]['gt'].is_a?(Integer) and test[@attributes.index(attr.first.attribute)] >= attr.first.threshold
|
110
|
+
return attr[1]['lt'] if attr[1]['lt'].is_a?(Integer) and test[@attributes.index(attr.first.attribute)] < attr.first.threshold
|
111
|
+
return descend_continuous(attr[1]['gt'],test) if test[@attributes.index(attr.first.attribute)] >= attr.first.threshold
|
112
|
+
return descend_continuous(attr[1]['lt'],test) if test[@attributes.index(attr.first.attribute)] < attr.first.threshold
|
113
|
+
end
|
114
|
+
|
115
|
+
def descend_discrete(tree,test)
|
116
|
+
attr = tree.to_a.first
|
117
|
+
return attr[1][test[@attributes.index(attr[0].attribute)]] if attr[1][test[@attributes.index(attr[0].attribute)]].is_a?(Integer)
|
118
|
+
return descend_discrete(attr[1][test[@attributes.index(attr[0].attribute)]],test)
|
119
|
+
end
|
120
|
+
|
121
|
+
def build_tree(tree = @tree, root = nil)
|
122
|
+
return [[root, "#{tree == 1 ? 'true' : 'false'} \n (#{String.new(tree.to_s).object_id})"]] if tree.is_a?(Integer)
|
123
|
+
|
124
|
+
attr = tree.to_a.first
|
125
|
+
mid = root.nil? ? [] : [[root, attr[0].attribute]]
|
126
|
+
links = mid + attr[1].keys.collect { |key| [attr[0].attribute, key] }
|
127
|
+
attr[1].keys.each { |key| links += build_tree(attr[1][key], key) }
|
128
|
+
|
129
|
+
return links
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|