decisiontree_n 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in ..gemspec
4
+ gemspec
@@ -0,0 +1,67 @@
1
+ # Decision Tree
2
+
3
+ A ruby library which implements ID3 (information gain) algorithm for decision tree learning. Currently, continuous and discrete datasets can be learned.
4
+
5
+ - Discrete model assumes unique labels & can be graphed and converted into a png for visual analysis
6
+ - Continuous looks at all possible values for a variable and iteratively chooses the best threshold between all possible assignments. This results in a binary tree which is partitioned by the threshold at every step. (e.g. temperate > 20C)
7
+
8
+ ## Features
9
+ - ID3 algorithms for continuous and discrete cases, with support for incosistent datasets.
10
+ - Graphviz component to visualize the learned tree (http://rockit.sourceforge.net/subprojects/graphr/)
11
+ - Support for multiple, and symbolic outputs and graphing of continuos trees.
12
+ - Returns default value when no branches are suitable for input
13
+
14
+ ## Implementation
15
+
16
+ - Ruleset is a class that trains an ID3Tree with 2/3 of the training data, converts it into a set of rules and prunes the rules with the remaining 1/3 of the training data (in a C4.5 way).
17
+ - Bagging is a bagging-based trainer (quite obvious), which trains 10 Ruleset trainers and when predicting chooses the best output based on voting.
18
+
19
+ Blog post with explanation & examples: http://www.igvita.com/2007/04/16/decision-tree-learning-in-ruby/
20
+
21
+ ## Example
22
+
23
+ ```ruby
24
+ require 'decisiontree'
25
+
26
+ attributes = ['Temperature']
27
+ training = [
28
+ [36.6, 'healthy'],
29
+ [37, 'sick'],
30
+ [38, 'sick'],
31
+ [36.7, 'healthy'],
32
+ [40, 'sick'],
33
+ [50, 'really sick'],
34
+ ]
35
+
36
+ # Instantiate the tree, and train it based on the data (set default to '1')
37
+ dec_tree = DecisionTree::ID3Tree.new(attributes, training, 'sick', :continuous)
38
+ dec_tree.train
39
+
40
+ decision = dec_tree.predict([37, 'sick'])
41
+ puts "Predicted: #{decision} ... True decision: #{test.last}";
42
+
43
+ # => Predicted: sick ... True decision: sick
44
+
45
+ # Specify type ("discrete" or "continuous") in the training data
46
+ labels = ["hunger", "color"]
47
+ training = [
48
+ [8, "red", "angry"],
49
+ [6, "red", "angry"],
50
+ [7, "red", "angry"],
51
+ [7, "blue", "not angry"],
52
+ [2, "red", "not angry"],
53
+ [3, "blue", "not angry"],
54
+ [2, "blue", "not angry"],
55
+ [1, "red", "not angry"]
56
+ ]
57
+
58
+ dec_tree = DecisionTree::ID3Tree.new(labels, data, "not angry", color: :discrete, hunger: :continuous)
59
+ dec_tree.train
60
+
61
+ decision = dec_tree.predict([7, "red"])
62
+ puts "Predicted: #{decision} ... True decision: #{test.last}";
63
+ ```
64
+
65
+ ## License
66
+
67
+ The MIT License - Copyright (c) 2006 Ilya Grigorik
@@ -0,0 +1,7 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new
6
+
7
+ task :default => :spec
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "decisiontree_n"
6
+ s.version = "0.4.1"
7
+ s.platform = Gem::Platform::RUBY
8
+ s.authors = ["Ilya Grigorik"]
9
+ s.email = ["ilya@igvita.com"]
10
+ s.homepage = "https://github.com/igrigorik/decisiontree"
11
+ s.summary = %q{ID3-based implementation of the M.L. Decision Tree algorithm}
12
+ s.description = s.summary
13
+
14
+ s.rubyforge_project = "decisiontree"
15
+
16
+ s.add_development_dependency "graphr"
17
+ s.add_development_dependency "rspec"
18
+ s.add_development_dependency "rspec-given"
19
+ s.add_development_dependency "pry"
20
+
21
+ s.files = `git ls-files`.split("\n")
22
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
23
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
24
+ s.require_paths = ["lib"]
25
+ end
@@ -0,0 +1,33 @@
1
+ require 'rubygems'
2
+ require 'decisiontree'
3
+ include DecisionTree
4
+
5
+ # ---Continuous-----------------------------------------------------------------------------------------
6
+
7
+ # Read in the training data
8
+ training, attributes = [], nil
9
+ File.open('data/continuous-training.txt','r').each_line { |line|
10
+ data = line.strip.chomp('.').split(',')
11
+ attributes ||= data
12
+ training.push(data.collect {|v| (v == 'healthy') || (v == 'colic') ? (v == 'healthy' ? 1 : 0) : v.to_f})
13
+ }
14
+
15
+ # Remove the attribute row from the training data
16
+ training.shift
17
+
18
+ # Instantiate the tree, and train it based on the data (set default to '1')
19
+ dec_tree = ID3Tree.new(attributes, training, 1, :continuous)
20
+ dec_tree.train
21
+
22
+ #---- Test the tree....
23
+
24
+ # Read in the test cases
25
+ # Note: omit the attribute line (first line), we know the labels from the training data
26
+ test = []
27
+ File.open('data/continuous-test.txt','r').each_line { |line|
28
+ data = line.strip.chomp('.').split(',')
29
+ test.push(data.collect {|v| (v == 'healthy') || (v == 'colic') ? (v == 'healthy' ? 1 : 0) : v.to_f})
30
+ }
31
+
32
+ # Let the tree predict the output and compare it to the true specified value
33
+ test.each { |t| predict = dec_tree.predict(t); puts "Predict: #{predict} ... True: #{t.last}"}
@@ -0,0 +1,13 @@
1
+ 4.60000,139.00000,101.00000,28.80000,7.64000,13.80000,265.06000,1.50000,0.60000,60.00000,12.00000,40.00000,40.00000,3.52393,0.20000,17.61965,healthy.
2
+ 4.30000,139.00000,101.00000,26.20000,3.61000,16.10000,518.74103,1.90000,0.01000,68.00000,12.00000,38.00000,36.00000,5.70834,0.20000,28.54170,healthy.
3
+ 4.20000,139.00000,101.00000,29.20000,4.96000,13.00000,265.06000,2.10000,0.50000,62.00000,12.00000,39.00000,44.00000,3.44906,0.20000,17.24530,healthy.
4
+ 4.40000,141.00000,103.00000,28.30000,12.65000,14.10000,197.60699,2.20000,0.10000,66.00000,12.00000,32.00000,44.00000,3.30135,0.20000,16.50675,healthy.
5
+ 4.50000,136.00000,101.00000,26.10000,3.27000,13.40000,300.61499,1.40000,0.01000,68.00000,16.00000,33.00000,50.00000,6.94524,0.70000,9.92177,healthy.
6
+ 4.30000,151.00000,112.00000,21.90000,42.66000,21.40000,613.52301,11.50000,172.89999,68.00000,26.00000,63.00000,92.00000,2.69917,0.50000,5.39834,colic.
7
+ 3.00000,145.00000,103.00000,22.30000,83.93000,22.70000,476.97101,43.40000,139.50000,86.00000,60.00000,67.00000,68.00000,2.73668,0.20000,13.68340,colic.
8
+ 3.40000,134.00000,98.00000,25.90000,90.15000,13.50000,265.06000,2.10000,1.30000,66.00000,20.00000,40.00000,52.00000,3.13565,0.50000,6.27130,colic.
9
+ 2.90000,136.00000,92.00000,34.70000,5.81000,12.20000,243.71800,4.20000,22.80000,61.00000,20.00000,41.00000,48.00000,3.20928,0.20000,16.04640,colic.
10
+ 3.80000,140.00000,99.00000,28.20000,88.92000,16.60000,695.82800,7.00000,2.60000,60.00000,28.00000,49.00000,80.00000,1.67106,0.50000,3.34212,colic.
11
+ 3.70000,143.00000,105.00000,21.60000,93.67000,20.10000,265.06000,4.60000,38.80000,68.00000,16.00000,43.00000,48.00000,3.51757,0.50000,7.03514,colic.
12
+ 3.70000,142.00000,103.00000,27.00000,100.24000,15.70000,386.71301,2.30000,0.01000,85.00000,40.00000,45.00000,48.00000,2.81077,0.50000,5.62154,colic.
13
+ 3.20000,138.00000,99.00000,29.80000,80.77000,12.40000,224.11301,2.30000,3.90000,61.00000,24.00000,37.00000,40.00000,3.32568,0.50000,6.65136,colic.
@@ -0,0 +1,133 @@
1
+ K,Na,CL,HCO,Endotoxin,Aniongap,PLA2,SDH,GLDH,TPP,Breath rate,PCV,Pulse rate,Fibrinogen,Dimer,FibPerDim
2
+ 4.60000,138.00000,102.00000,27.50000,3.45000,13.10000,420.62299,4.00000,1.00000,56.00000,10.00000,38.00000,48.00000,3.78216,0.20000,18.91080,healthy.
3
+ 4.50000,141.00000,103.00000,26.50000,7.64000,16.00000,695.82800,0.70000,1.00000,72.00000,16.00000,37.00000,36.00000,4.86282,0.20000,24.31410,healthy.
4
+ 4.60000,143.00000,104.00000,25.30000,3.04000,18.30000,243.71800,3.10000,0.40000,68.00000,20.00000,46.00000,52.00000,4.14486,0.20000,20.72430,healthy.
5
+ 4.70000,140.00000,102.00000,27.60000,3.75000,15.10000,243.71800,3.10000,1.50000,66.00000,20.00000,32.00000,40.00000,4.11386,0.20000,20.56930,healthy.
6
+ 4.50000,140.00000,101.00000,23.90000,4.12000,19.60000,233.71001,3.60000,6.90000,60.00000,12.00000,52.00000,48.00000,3.47588,0.20000,17.37940,healthy.
7
+ 4.00000,139.00000,101.00000,29.30000,4.05000,12.70000,153.64301,1.60000,0.01000,55.00000,16.00000,41.00000,44.00000,3.63289,0.20000,18.16445,healthy.
8
+ 3.20000,139.00000,98.00000,30.70000,101.18000,13.50000,564.12097,6.80000,16.40000,66.00000,56.00000,53.00000,80.00000,5.83544,1.00000,5.83544,colic.
9
+ 3.20000,144.00000,105.00000,24.40000,51.15000,17.80000,386.71301,43.60000,471.60001,58.00000,20.00000,35.00000,48.00000,2.65903,0.50000,5.31806,colic.
10
+ 3.90000,144.00000,99.00000,20.30000,94.45000,28.60000,1305.69495,16.60000,58.60000,64.00000,48.00000,75.00000,88.00000,1.86868,0.20000,9.34340,colic.
11
+ 3.60000,134.00000,96.00000,26.30000,79.33000,15.30000,386.71301,4.50000,2.80000,48.00000,28.00000,35.00000,100.00000,3.86725,0.50000,7.73450,colic.
12
+ 3.80000,148.00000,111.00000,23.90000,45.27000,16.90000,895.03497,1.60000,10.10000,84.00000,16.00000,55.00000,60.00000,4.58211,0.20000,22.91055,colic.
13
+ 3.30000,140.00000,102.00000,20.90000,68.33000,20.40000,326.93799,2.00000,1.70000,84.00000,20.00000,46.00000,56.00000,3.57136,0.50000,7.14272,colic.
14
+ 3.50000,140.00000,99.00000,25.10000,97.40000,19.40000,420.53101,5.40000,8.80000,94.00000,16.00000,53.00000,80.00000,4.02566,0.70000,5.75094,colic.
15
+ 3.30000,137.00000,98.00000,30.80000,74.87000,11.50000,789.14801,168.60001,465.10001,60.00000,36.00000,40.00000,48.00000,5.79638,0.70000,8.28054,colic.
16
+ 3.10000,126.00000,88.00000,27.90000,9.31000,13.20000,206.06100,2.10000,0.01000,70.00000,36.00000,37.00000,52.00000,5.55303,0.50000,11.10606,colic.
17
+ 3.10000,138.00000,94.00000,39.80000,57.39000,7.30000,420.53101,3.80000,10.50000,68.00000,20.00000,46.00000,68.00000,2.45303,0.20000,12.26515,colic.
18
+ 5.00000,136.00000,100.00000,31.40000,12.28000,9.60000,276.43900,4.90000,0.01000,58.00000,16.00000,40.00000,48.00000,4.00226,0.20000,20.01130,healthy.
19
+ 3.60000,139.00000,100.00000,29.20000,7.25000,13.40000,288.27600,1.10000,1.10000,65.00000,12.00000,38.00000,48.00000,2.85107,0.20000,14.25535,healthy.
20
+ 4.30000,142.00000,102.00000,29.90000,3.80000,14.40000,243.71800,3.00000,0.30000,67.00000,12.00000,44.00000,44.00000,3.87469,0.20000,19.37345,healthy.
21
+ 4.60000,139.00000,100.00000,29.40000,2.40000,14.20000,288.27600,2.40000,2.10000,65.00000,16.00000,43.00000,52.00000,4.84979,0.20000,24.24895,healthy.
22
+ 4.10000,136.00000,98.00000,28.40000,2.97000,13.70000,300.61499,2.00000,1.10000,62.00000,12.00000,43.00000,48.00000,5.19111,0.50000,10.38222,healthy.
23
+ 4.20000,136.00000,98.00000,25.30000,2.93000,16.90000,224.11301,9.90000,0.70000,64.00000,16.00000,36.00000,52.00000,3.91034,0.20000,19.55170,healthy.
24
+ 3.00000,132.00000,89.00000,29.40000,88.25000,16.60000,162.05200,3.40000,0.01000,52.00000,28.00000,45.00000,76.00000,1.64083,0.50000,3.28166,colic.
25
+ 3.30000,139.00000,99.00000,25.70000,49.80000,17.60000,174.25400,0.90000,0.30000,62.00000,16.00000,38.00000,60.00000,3.20091,1.50000,2.13394,colic.
26
+ 2.90000,138.00000,92.00000,24.80000,94.45000,24.10000,355.59201,9.20000,4.00000,51.00000,45.00000,44.00000,42.00000,2.42420,1.50000,1.61613,colic.
27
+ 2.60000,131.00000,89.00000,26.50000,6.54000,18.10000,725.62500,4.70000,11.00000,80.00000,48.00000,43.00000,52.00000,4.10642,0.50000,8.21284,colic.
28
+ 3.60000,135.00000,95.00000,26.70000,65.86000,16.90000,243.71800,4.80000,1.60000,58.00000,38.00000,50.00000,88.00000,2.92609,0.20000,14.63045,colic.
29
+ 3.30000,147.00000,105.00000,28.00000,61.56000,17.30000,313.50201,3.70000,2.60000,75.00000,40.00000,48.00000,88.00000,3.60096,1.50000,2.40064,colic.
30
+ 3.20000,142.00000,100.00000,26.70000,78.69000,18.50000,370.81000,42.90000,333.79999,80.00000,24.00000,55.00000,100.00000,4.53422,2.00000,2.26711,colic.
31
+ 3.70000,136.00000,86.00000,25.30000,65.54000,28.40000,1103.97498,6.40000,4.80000,100.00000,20.00000,55.00000,132.00000,7.76240,1.00000,7.76240,colic.
32
+ 3.30000,142.00000,99.00000,29.50000,82.42000,16.80000,420.53101,6.80000,40.70000,71.00000,28.00000,48.00000,72.00000,3.29344,0.50000,6.58688,colic.
33
+ 3.30000,141.00000,99.00000,32.40000,87.43000,12.90000,326.93799,3.00000,1.50000,47.00000,36.00000,48.00000,48.00000,3.24353,0.20000,16.21765,colic.
34
+ 3.10000,146.00000,103.00000,26.10000,79.08000,20.00000,476.97101,3.50000,1.20000,78.00000,24.00000,54.00000,80.00000,3.76666,0.50000,7.53332,colic.
35
+ 4.10000,138.00000,101.00000,27.30000,8.01000,13.80000,147.29100,6.30000,5.20000,67.00000,10.00000,43.00000,40.00000,3.68016,0.20000,18.40080,healthy.
36
+ 4.10000,136.00000,98.00000,28.50000,6.15000,13.60000,174.25400,2.10000,1.30000,60.00000,8.00000,35.00000,40.00000,1.94448,0.20000,9.72240,healthy.
37
+ 4.50000,136.00000,99.00000,26.80000,5.08000,14.70000,189.47200,2.00000,0.60000,55.00000,12.00000,35.00000,44.00000,3.67257,0.20000,18.36285,healthy.
38
+ 3.50000,142.00000,105.00000,22.20000,6.77000,18.30000,276.43900,3.40000,1.20000,64.00000,10.00000,39.00000,48.00000,3.45945,0.20000,17.29725,healthy.
39
+ 3.90000,140.00000,101.00000,28.50000,3.61000,14.40000,340.96799,0.20000,0.01000,61.00000,12.00000,37.00000,48.00000,2.51116,0.20000,12.55580,healthy.
40
+ 3.60000,145.00000,106.00000,27.50000,89.65000,15.10000,224.11301,2.80000,1.20000,78.00000,60.00000,48.00000,80.00000,2.42001,0.20000,12.10005,colic.
41
+ 3.50000,136.00000,98.00000,25.40000,22.39000,16.10000,1420.03601,3.60000,0.80000,60.00000,20.00000,21.00000,56.00000,9.81956,4.00000,2.45489,colic.
42
+ 3.60000,140.00000,98.00000,19.50000,99.57000,26.10000,789.14801,36.10000,293.20001,73.00000,48.00000,64.00000,100.00000,2.24781,2.00000,1.12390,colic.
43
+ 3.60000,131.00000,92.00000,22.60000,76.04000,20.00000,564.12097,3.70000,4.70000,48.00000,56.00000,38.00000,120.00000,3.33932,0.50000,6.67864,colic.
44
+ 3.50000,144.00000,104.00000,18.90000,64.19000,24.60000,1149.99500,4.80000,3.10000,60.00000,28.00000,40.00000,80.00000,4.12378,0.70000,5.89111,colic.
45
+ 2.90000,142.00000,100.00000,30.00000,49.20000,14.90000,497.39899,2.50000,0.01000,74.00000,40.00000,52.00000,64.00000,3.21284,0.50000,6.42568,colic.
46
+ 3.60000,138.00000,99.00000,24.40000,50.32000,18.20000,1610.51404,14.20000,1.30000,66.00000,20.00000,37.00000,60.00000,6.60548,2.00000,3.30274,colic.
47
+ 3.40000,137.00000,93.00000,24.40000,6.29000,23.00000,4227.66113,43.60000,3.00000,71.00000,36.00000,60.00000,72.00000,5.17514,6.00000,0.86252,colic.
48
+ 3.50000,144.00000,100.00000,32.50000,51.49000,15.00000,129.87900,7.90000,83.00000,61.00000,36.00000,44.00000,84.00000,3.42922,0.20000,17.14610,colic.
49
+ 3.10000,136.00000,98.00000,23.40000,5.97000,17.70000,243.71800,2.10000,2.70000,66.00000,28.00000,45.00000,52.00000,2.84968,0.20000,14.24840,colic.
50
+ 4.50000,137.00000,100.00000,27.20000,11.48000,14.30000,181.70300,2.00000,3.60000,62.00000,8.00000,38.00000,52.00000,4.01342,0.20000,20.06710,healthy.
51
+ 4.20000,141.00000,103.00000,29.10000,3.77000,13.10000,288.27600,6.70000,5.60000,64.00000,8.00000,42.00000,40.00000,4.20329,0.20000,21.01645,healthy.
52
+ 4.20000,138.00000,101.00000,28.30000,6.22000,12.90000,288.27600,5.40000,2.10000,65.00000,12.00000,43.00000,44.00000,5.08152,0.20000,25.40760,healthy.
53
+ 4.50000,137.00000,101.00000,27.40000,6.68000,13.10000,167.07899,2.10000,1.10000,60.00000,16.00000,38.00000,48.00000,3.25795,0.20000,16.28975,healthy.
54
+ 4.00000,141.00000,102.00000,27.20000,12.44000,15.80000,338.17999,3.40000,3.10000,72.00000,12.00000,33.00000,48.00000,4.98961,0.20000,24.94805,healthy.
55
+ 4.20000,138.00000,96.00000,23.70000,51.83000,22.50000,355.59201,2.70000,4.20000,60.00000,20.00000,39.00000,100.00000,3.61817,0.50000,7.23634,colic.
56
+ 3.60000,141.00000,101.00000,28.60000,97.70000,15.00000,667.21997,5.00000,3.70000,70.00000,12.00000,48.00000,60.00000,3.13410,1.00000,3.13410,colic.
57
+ 3.20000,137.00000,100.00000,24.40000,71.53000,15.80000,224.11301,2.40000,2.20000,79.00000,28.00000,42.00000,60.00000,3.92367,1.00000,3.92367,colic.
58
+ 3.50000,141.00000,102.00000,27.40000,51.93000,15.10000,1015.08801,3.10000,0.80000,62.00000,72.00000,54.00000,88.00000,2.50883,0.20000,12.54415,colic.
59
+ 4.20000,143.00000,106.00000,24.00000,5.31000,17.20000,265.06000,8.00000,32.90000,77.00000,16.00000,38.00000,40.00000,3.98583,1.00000,3.98583,colic.
60
+ 3.20000,138.00000,97.00000,25.00000,8.76000,19.20000,288.27600,5.40000,3.10000,70.00000,12.00000,47.00000,88.00000,5.01596,1.00000,5.01596,colic.
61
+ 4.10000,132.00000,91.00000,28.60000,19.74000,16.50000,639.79999,6.70000,0.01000,78.00000,24.00000,38.00000,112.00000,8.94970,6.00000,1.49162,colic.
62
+ 6.00000,140.00000,97.00000,32.20000,48.15000,16.80000,153.64301,17.00000,52.60000,48.00000,40.00000,67.00000,80.00000,2.18364,1.50000,1.45576,colic.
63
+ 3.10000,138.00000,95.00000,29.30000,10.98000,16.80000,822.96600,3.90000,0.60000,58.00000,36.00000,36.00000,48.00000,2.52015,0.50000,5.04030,colic.
64
+ 3.70000,144.00000,107.00000,25.40000,85.30000,15.30000,457.36600,3.10000,1.10000,66.00000,24.00000,48.00000,60.00000,2.81775,0.50000,5.63550,colic.
65
+ 4.20000,139.00000,100.00000,29.40000,2.33000,13.80000,233.71001,3.40000,0.90000,64.00000,12.00000,40.00000,44.00000,3.78293,0.20000,18.91465,healthy.
66
+ 4.20000,144.00000,107.00000,23.90000,7.87000,17.30000,300.61499,5.90000,16.40000,68.00000,20.00000,48.00000,48.00000,4.42355,0.20000,22.11775,healthy.
67
+ 4.10000,139.00000,100.00000,28.60000,4.12000,14.50000,170.78101,0.70000,0.01000,60.00000,10.00000,43.00000,32.00000,3.22927,0.20000,16.14635,healthy.
68
+ 4.70000,136.00000,99.00000,28.60000,10.43000,13.10000,288.27600,1.70000,0.20000,62.00000,8.00000,35.00000,40.00000,4.18454,0.20000,20.92270,healthy.
69
+ 3.70000,140.00000,102.00000,28.20000,6.57000,13.50000,174.25400,3.20000,2.10000,60.00000,10.00000,39.00000,44.00000,3.40799,0.20000,17.03995,healthy.
70
+ 3.70000,142.00000,101.00000,30.60000,94.68000,14.10000,300.61499,1.90000,0.10000,58.00000,32.00000,40.00000,80.00000,2.66538,0.20000,13.32690,colic.
71
+ 3.00000,135.00000,95.00000,27.30000,8.19000,15.70000,265.06000,2.30000,0.01000,60.00000,40.00000,37.00000,48.00000,2.96841,0.20000,14.84205,colic.
72
+ 2.70000,143.00000,96.00000,24.60000,83.61000,25.10000,386.71301,6.50000,3.80000,62.00000,28.00000,33.00000,52.00000,3.44921,0.50000,6.89842,colic.
73
+ 4.00000,140.00000,103.00000,20.30000,99.16000,20.70000,300.61499,3.50000,1.70000,64.00000,24.00000,44.00000,64.00000,3.75317,0.20000,18.76585,colic.
74
+ 3.50000,130.00000,93.00000,29.90000,4.35000,10.60000,265.06000,1.90000,0.70000,70.00000,20.00000,42.00000,52.00000,5.66107,0.50000,11.32214,colic.
75
+ 3.10000,139.00000,96.00000,30.80000,20.02000,15.30000,167.07899,3.30000,1.80000,58.00000,20.00000,44.00000,72.00000,3.30615,0.20000,16.53075,colic.
76
+ 3.00000,137.00000,91.00000,14.80000,7.32000,34.20000,181.70300,20.10000,1.70000,61.00000,16.00000,59.00000,72.00000,4.94729,0.50000,9.89458,colic.
77
+ 3.70000,138.00000,99.00000,29.10000,97.72000,13.60000,214.92700,1.50000,0.01000,58.00000,20.00000,35.00000,56.00000,2.61113,0.20000,13.05565,colic.
78
+ 4.00000,137.00000,98.00000,27.50000,56.43000,15.50000,243.71800,3.70000,0.90000,62.00000,16.00000,38.00000,60.00000,4.75695,0.50000,9.51390,colic.
79
+ 3.20000,139.00000,98.00000,30.00000,76.75000,14.20000,276.43900,2.40000,0.01000,61.00000,60.00000,47.00000,72.00000,2.74397,0.20000,13.71985,colic.
80
+ 4.50000,141.00000,103.00000,27.40000,9.08000,15.10000,457.36600,4.60000,5.50000,70.00000,8.00000,39.00000,32.00000,3.92956,0.20000,19.64780,healthy.
81
+ 3.90000,134.00000,98.00000,25.10000,5.35000,14.80000,695.82800,1.90000,0.01000,72.00000,16.00000,33.00000,48.00000,8.01149,0.70000,11.44499,healthy.
82
+ 3.90000,138.00000,102.00000,25.90000,4.05000,14.00000,564.12097,5.70000,5.50000,70.00000,10.00000,41.00000,40.00000,5.33758,0.20000,26.68790,healthy.
83
+ 3.90000,141.00000,103.00000,25.20000,7.55000,16.70000,153.64301,2.90000,7.90000,70.00000,16.00000,34.00000,48.00000,3.46906,0.50000,6.93812,healthy.
84
+ 4.60000,137.00000,101.00000,24.70000,3.18000,15.90000,206.06100,1.40000,1.10000,70.00000,10.00000,38.00000,40.00000,5.13267,0.20000,25.66335,healthy.
85
+ 3.50000,131.00000,92.00000,30.70000,14.41000,11.80000,420.53101,3.30000,1.10000,64.00000,16.00000,41.00000,48.00000,2.23278,0.20000,11.16390,colic.
86
+ 3.80000,141.00000,100.00000,29.20000,82.01000,15.60000,233.71001,2.20000,0.70000,62.00000,14.00000,33.00000,52.00000,4.07480,0.50000,8.14960,colic.
87
+ 4.40000,140.00000,98.00000,24.10000,82.76000,22.30000,403.25699,2.80000,2.00000,60.00000,32.00000,62.00000,112.00000,2.15636,0.50000,4.31272,colic.
88
+ 3.60000,144.00000,97.00000,19.90000,38.61000,30.70000,822.96600,10.60000,6.20000,80.00000,24.00000,62.00000,64.00000,3.64002,1.00000,3.64002,colic.
89
+ 3.30000,144.00000,101.00000,28.90000,61.44000,17.40000,476.97101,28.90000,138.60001,89.00000,16.00000,54.00000,80.00000,5.20165,1.00000,5.20165,colic.
90
+ 3.80000,136.00000,98.00000,23.90000,87.61000,17.90000,318.07199,6.10000,7.70000,100.00000,28.00000,54.00000,92.00000,3.27562,1.00000,3.27562,colic.
91
+ 4.00000,139.00000,99.00000,26.00000,46.76000,18.00000,476.97101,5.30000,6.50000,73.00000,36.00000,37.00000,82.00000,3.37621,0.50000,6.75242,colic.
92
+ 3.00000,141.00000,99.00000,32.10000,97.13000,12.90000,420.53101,2.90000,1.80000,73.00000,12.00000,28.00000,80.00000,3.37575,0.70000,4.82250,colic.
93
+ 3.50000,145.00000,93.00000,20.00000,86.12000,35.50000,895.03497,5.70000,5.60000,80.00000,34.00000,65.00000,88.00000,2.57734,0.50000,5.15468,colic.
94
+ 4.00000,137.00000,99.00000,29.70000,4.71000,12.30000,403.25699,2.40000,1.20000,56.00000,12.00000,37.00000,44.00000,3.37110,0.20000,16.85550,healthy.
95
+ 4.20000,140.00000,103.00000,25.60000,4.80000,15.60000,386.71301,2.60000,3.50000,54.00000,12.00000,33.00000,40.00000,2.99693,0.20000,14.98465,healthy.
96
+ 4.70000,139.00000,101.00000,27.40000,6.95000,15.30000,197.60699,1.30000,0.30000,58.00000,12.00000,37.00000,44.00000,2.50155,0.50000,5.00310,healthy.
97
+ 5.20000,138.00000,99.00000,28.00000,4.46000,16.20000,340.96799,3.10000,2.70000,55.00000,12.00000,35.00000,56.00000,4.22825,0.20000,21.14125,healthy.
98
+ 4.50000,137.00000,98.00000,26.40000,2.49000,17.10000,197.60699,14.10000,9.00000,54.00000,12.00000,42.00000,56.00000,3.47526,0.20000,17.37630,healthy.
99
+ 4.40000,138.00000,101.00000,20.10000,65.74000,21.30000,476.97101,14.00000,88.60000,72.00000,14.00000,43.00000,82.00000,2.78303,0.50000,5.56606,colic.
100
+ 3.80000,143.00000,101.00000,29.20000,100.22000,16.60000,313.50201,4.30000,26.50000,67.00000,20.00000,63.00000,80.00000,3.35963,1.00000,3.35963,colic.
101
+ 3.50000,142.00000,101.00000,29.10000,73.95000,15.40000,386.71301,5.10000,4.30000,65.00000,28.00000,41.00000,56.00000,4.12300,0.20000,20.61500,colic.
102
+ 4.30000,141.00000,104.00000,23.10000,82.72000,18.20000,386.71301,4.90000,1.60000,72.00000,36.00000,45.00000,92.00000,3.47479,0.50000,6.94958,colic.
103
+ 3.60000,135.00000,98.00000,30.10000,83.79000,10.50000,254.18300,1.50000,0.01000,58.00000,20.00000,41.00000,48.00000,2.64120,0.50000,5.28240,colic.
104
+ 2.80000,140.00000,101.00000,26.90000,31.25000,14.90000,463.62701,4.30000,3.80000,46.00000,28.00000,48.00000,64.00000,4.19771,0.50000,8.39542,colic.
105
+ 3.30000,140.00000,99.00000,32.70000,97.22000,11.60000,300.61499,3.70000,3.40000,58.00000,24.00000,34.00000,44.00000,2.04600,0.70000,2.92286,colic.
106
+ 3.10000,146.00000,103.00000,21.60000,83.65000,24.50000,288.27600,4.30000,3.50000,82.00000,32.00000,46.00000,64.00000,3.65040,0.50000,7.30080,colic.
107
+ 4.10000,139.00000,102.00000,24.20000,88.23000,16.90000,214.92700,1.80000,0.01000,63.00000,12.00000,40.00000,42.00000,2.97430,0.20000,14.87150,colic.
108
+ 4.50000,139.00000,100.00000,29.20000,6.04000,14.30000,210.72301,2.00000,0.20000,68.00000,10.00000,40.00000,40.00000,3.52393,0.50000,7.04786,healthy.
109
+ 4.20000,130.00000,102.00000,27.90000,6.68000,4.30000,386.71301,1.90000,1.10000,56.00000,14.00000,37.00000,48.00000,4.05697,0.20000,20.28485,healthy.
110
+ 5.30000,137.00000,99.00000,25.80000,4.35000,17.50000,276.43900,1.90000,0.60000,62.00000,16.00000,40.00000,52.00000,5.01906,0.50000,10.03812,healthy.
111
+ 4.40000,135.00000,100.00000,25.10000,2.77000,14.30000,197.60699,0.60000,1.60000,60.00000,16.00000,36.00000,36.00000,3.56702,0.20000,17.83510,healthy.
112
+ 2.90000,129.00000,86.00000,27.30000,82.85000,18.60000,756.74597,5.40000,29.50000,79.00000,16.00000,43.00000,84.00000,2.38374,1.00000,2.38374,colic.
113
+ 3.40000,139.00000,98.00000,29.80000,54.42000,14.60000,695.82800,5.50000,30.00000,52.00000,24.00000,35.00000,52.00000,1.95393,0.50000,3.90786,colic.
114
+ 3.30000,137.00000,96.00000,30.50000,53.76000,13.80000,233.71001,7.20000,28.90000,55.00000,24.00000,30.00000,100.00000,2.11327,0.20000,10.56635,colic.
115
+ 2.50000,127.00000,88.00000,17.80000,88.37000,23.70000,588.29602,3.90000,3.20000,70.00000,24.00000,54.00000,88.00000,3.32398,2.00000,1.66199,colic.
116
+ 3.30000,146.00000,97.00000,23.10000,70.02000,29.20000,1420.03601,42.70000,327.50000,70.00000,28.00000,68.00000,68.00000,2.19294,3.00000,0.73098,colic.
117
+ 3.80000,140.00000,100.00000,26.70000,92.83000,17.10000,457.36600,4.60000,2.10000,61.00000,32.00000,38.00000,76.00000,2.07359,1.50000,1.38239,colic.
118
+ 3.30000,134.00000,95.00000,31.60000,73.63000,10.70000,224.11301,3.30000,1.70000,62.00000,20.00000,37.00000,56.00000,3.68947,0.50000,7.37894,colic.
119
+ 3.30000,140.00000,99.00000,29.60000,88.66000,14.70000,233.71001,1.60000,2.40000,74.00000,40.00000,38.00000,52.00000,2.76427,1.00000,2.76427,colic.
120
+ 2.80000,145.00000,101.00000,35.40000,31.96000,11.40000,243.71800,0.40000,0.70000,70.00000,20.00000,47.00000,84.00000,3.82587,0.20000,19.12935,colic.
121
+ 4.40000,136.00000,98.00000,28.50000,8.69000,13.90000,725.62500,1.90000,1.50000,60.00000,16.00000,40.00000,52.00000,3.41419,0.20000,17.07095,healthy.
122
+ 3.70000,140.00000,100.00000,29.80000,5.15000,13.90000,189.47200,2.30000,0.70000,78.00000,12.00000,42.00000,48.00000,3.33607,0.20000,16.68035,healthy.
123
+ 4.60000,138.00000,100.00000,28.60000,9.79000,14.00000,224.11301,1.60000,2.00000,61.00000,16.00000,35.00000,40.00000,3.58624,0.20000,17.93120,healthy.
124
+ 4.00000,138.00000,102.00000,25.90000,90.54000,14.10000,326.93799,0.40000,1.70000,70.00000,20.00000,48.00000,79.00000,3.34645,0.20000,16.73225,colic.
125
+ 2.70000,132.00000,93.00000,29.30000,52.57000,12.40000,1058.59497,5.00000,8.00000,78.00000,28.00000,48.00000,76.00000,4.77013,0.50000,9.54026,colic.
126
+ 3.40000,133.00000,95.00000,28.50000,64.71000,12.90000,276.43900,8.70000,43.70000,76.00000,16.00000,47.00000,76.00000,4.15168,0.20000,20.75840,colic.
127
+ 3.00000,139.00000,93.00000,33.30000,96.88000,15.70000,224.11301,6.90000,3.30000,48.00000,80.00000,43.00000,56.00000,2.32748,0.20000,11.63740,colic.
128
+ 2.80000,139.00000,101.00000,25.90000,71.32000,14.90000,676.35999,2.30000,0.30000,71.00000,16.00000,46.00000,52.00000,2.50558,0.20000,12.52790,colic.
129
+ 2.80000,142.00000,97.00000,29.80000,53.21000,18.00000,160.22400,4.70000,5.10000,50.00000,60.00000,44.00000,88.00000,2.31710,0.70000,3.31014,colic.
130
+ 3.50000,140.00000,102.00000,23.00000,87.86000,18.50000,189.47200,2.20000,0.90000,73.00000,24.00000,47.00000,96.00000,3.73721,0.50000,7.47442,colic.
131
+ 3.00000,142.00000,100.00000,22.60000,93.17000,22.40000,355.59201,16.30000,124.10000,80.00000,24.00000,45.00000,68.00000,2.75668,0.70000,3.93811,colic.
132
+ 3.30000,149.00000,110.00000,19.20000,96.46000,23.10000,667.21997,5.70000,0.20000,59.00000,16.00000,41.00000,54.00000,3.18324,0.20000,15.91620,colic.
133
+ 3.50000,141.00000,96.00000,31.20000,11.00000,17.30000,214.92700,3.80000,1.70000,53.00000,48.00000,39.00000,64.00000,2.89664,0.70000,4.13806,colic.
@@ -0,0 +1,4 @@
1
+ 36 - 55,masters,high,single,will buy
2
+ 18 - 35,high school,low,single,won't buy
3
+ 18 - 35,masters,high,single,won't buy
4
+ 36 - 55,high school,low,single,will buy
@@ -0,0 +1,21 @@
1
+ Age,Education,Income,Marital Status
2
+ 36 - 55,masters,high,single,will buy
3
+ 18 - 35,high school,low,single,won't buy
4
+ 36 - 55,masters,low,single,will buy
5
+ 18 - 35,bachelors,high,single,won't buy
6
+ < 18,high school,low,single,will buy
7
+ 18 - 35,bachelors,high,married,won't buy
8
+ 36 - 55,bachelors,low,married,won't buy
9
+ > 55,bachelors,high,single,will buy
10
+ 36 - 55,masters,low,married,won't buy
11
+ > 55,masters,low,married,will buy
12
+ 36 - 55,masters,high,single,will buy
13
+ > 55,masters,high,single,will buy
14
+ < 18,high school,high,single,won't buy
15
+ 36 - 55,masters,low,single,will buy
16
+ 36 - 55,high school,low,single,will buy
17
+ < 18,high school,low,married,will buy
18
+ 18 - 35,bachelors,high,married,won't buy
19
+ > 55,high school,high,married,will buy
20
+ > 55,bachelors,low,single,will buy
21
+ 36 - 55,high school,high,married,won't buy
@@ -0,0 +1,34 @@
1
+ require 'rubygems'
2
+ require 'decisiontree'
3
+
4
+ # ---Discrete-----------------------------------------------------------------------------------------
5
+
6
+ # Read in the training data
7
+ training, attributes = [], nil
8
+ File.open('data/discrete-training.txt','r').each_line { |line|
9
+ data = line.strip.split(',')
10
+ attributes ||= data
11
+ training.push(data.collect {|v| (v == 'will buy') || (v == "won't buy") ? (v == 'will buy' ? 1 : 0) : v})
12
+ }
13
+
14
+ # Remove the attribute row from the training data
15
+ training.shift
16
+
17
+ # Instantiate the tree, and train it based on the data (set default to '1')
18
+ dec_tree = DecisionTree::ID3Tree.new(attributes, training, 1, :discrete)
19
+ dec_tree.train
20
+
21
+ #---- Test the tree....
22
+
23
+ # Read in the test cases
24
+ # Note: omit the attribute line (first line), we know the labels from the training data
25
+ test = []
26
+ File.open('data/discrete-test.txt','r').each_line { |line| data = line.strip.split(',')
27
+ test.push(data.collect {|v| (v == 'will buy') || (v == "won't buy") ? (v == 'will buy' ? 1 : 0) : v})
28
+ }
29
+
30
+ # Let the tree predict the output and compare it to the true specified value
31
+ test.each { |t| predict = dec_tree.predict(t); puts "Predict: #{predict} ... True: #{t.last}"; }
32
+
33
+ # Graph the tree, save to 'discrete.png'
34
+ dec_tree.graph("discrete")
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rubygems'
4
+ require 'decisiontree'
5
+
6
+ attributes = ['Temperature']
7
+ training = [
8
+ [36.6, 'healthy'],
9
+ [37, 'sick'],
10
+ [38, 'sick'],
11
+ [36.7, 'healthy'],
12
+ [40, 'sick'],
13
+ [50, 'really sick'],
14
+ ]
15
+
16
+ # Instantiate the tree, and train it based on the data (set default to '1')
17
+ dec_tree = DecisionTree::ID3Tree.new(attributes, training, 'sick', :continuous)
18
+ dec_tree.train
19
+
20
+ test = [37, 'sick']
21
+
22
+ decision = dec_tree.predict(test)
23
+ puts "Predicted: #{decision} ... True decision: #{test.last}";
24
+
25
+ # Graph the tree, save to 'tree.png'
26
+ dec_tree.graph("tree")
27
+
28
+
@@ -0,0 +1 @@
1
+ require File.dirname(__FILE__) + '/decisiontree/id3_tree.rb'
@@ -0,0 +1,325 @@
1
+ # The MIT License
2
+ #
3
+ ### Copyright (c) 2007 Ilya Grigorik <ilya AT igvita DOT com>
4
+ ### Modifed at 2007 by José Ignacio Fernández <joseignacio.fernandez AT gmail DOT com>
5
+
6
+ class Object
7
+ def save_to_file(filename)
8
+ File.open(filename, 'w+' ) { |f| f << Marshal.dump(self) }
9
+ end
10
+
11
+ def self.load_from_file(filename)
12
+ Marshal.load( File.read( filename ) )
13
+ end
14
+ end
15
+
16
+ class Array
17
+ def classification; collect { |v| v.last }; end
18
+
19
+ # calculate information entropy
20
+ def entropy
21
+ return 0 if empty?
22
+
23
+ info = {}
24
+ total = 0
25
+ each {|i| info[i] = !info[i] ? 1 : (info[i] + 1); total += 1}
26
+
27
+ result = 0
28
+ info.each do |symbol, count|
29
+ result += -count.to_f/total*Math.log(count.to_f/total)/Math.log(2.0) if (count > 0)
30
+ end
31
+ result
32
+ end
33
+ end
34
+
35
+ module DecisionTree
36
+ Node = Struct.new(:attribute, :threshold, :gain)
37
+
38
+ class ID3Tree
39
+ def initialize(attributes, data, default, type)
40
+ @used, @tree, @type = {}, {}, type
41
+ @data, @attributes, @default = data, attributes, default
42
+ end
43
+
44
+ def train(data=@data, attributes=@attributes, default=@default)
45
+ initialize(attributes, data, default, @type)
46
+
47
+ # Remove samples with same attributes leaving most common classification
48
+ data2 = data.inject({}) {|hash, d| hash[d.slice(0..-2)] ||= Hash.new(0); hash[d.slice(0..-2)][d.last] += 1; hash }.map{|key,val| key + [val.sort_by{ |k, v| v }.last.first]}
49
+
50
+ @tree = id3_train(data2, attributes, default)
51
+ end
52
+
53
+ def type(attribute)
54
+ @type.is_a?(Hash) ? @type[attribute.to_sym] : @type
55
+ end
56
+
57
+ def fitness_for(attribute)
58
+ case type(attribute)
59
+ when :discrete; fitness = proc{|a,b,c| id3_discrete(a,b,c)}
60
+ when :continuous; fitness = proc{|a,b,c| id3_continuous(a,b,c)}
61
+ end
62
+ end
63
+
64
+ def id3_train(data, attributes, default, used={})
65
+ return default if data.empty?
66
+
67
+ # return classification if all examples have the same classification
68
+ return data.first.last if data.classification.uniq.size == 1
69
+
70
+ # Choose best attribute:
71
+ # 1. enumerate all attributes
72
+ # 2. Pick best attribute
73
+ # 3. If attributes all score the same, then pick a random one to avoid infinite recursion.
74
+ performance = attributes.collect { |attribute| fitness_for(attribute).call(data, attributes, attribute) }
75
+ max = performance.max { |a,b| a[0] <=> b[0] }
76
+ min = performance.min { |a,b| a[0] <=> b[0] }
77
+ max = performance.shuffle.first if max[0] == min[0]
78
+ best = Node.new(attributes[performance.index(max)], max[1], max[0])
79
+ best.threshold = nil if @type == :discrete
80
+ @used.has_key?(best.attribute) ? @used[best.attribute] += [best.threshold] : @used[best.attribute] = [best.threshold]
81
+ tree, l = {best => {}}, ['>=', '<']
82
+
83
+ fitness = fitness_for(best.attribute)
84
+ case type(best.attribute)
85
+ when :continuous
86
+ data.partition { |d| d[attributes.index(best.attribute)] >= best.threshold }.each_with_index { |examples, i|
87
+ tree[best][String.new(l[i])] = id3_train(examples, attributes, (data.classification.mode rescue 0), &fitness)
88
+ }
89
+ when :discrete
90
+ values = data.collect { |d| d[attributes.index(best.attribute)] }.uniq.sort
91
+ partitions = values.collect { |val| data.select { |d| d[attributes.index(best.attribute)] == val } }
92
+ partitions.each_with_index { |examples, i|
93
+ tree[best][values[i]] = id3_train(examples, attributes-[values[i]], (data.classification.mode rescue 0), &fitness)
94
+ }
95
+ end
96
+
97
+ tree
98
+ end
99
+
100
+ # ID3 for binary classification of continuous variables (e.g. healthy / sick based on temperature thresholds)
101
+ def id3_continuous(data, attributes, attribute)
102
+ values, thresholds = data.collect { |d| d[attributes.index(attribute)] }.uniq.sort, []
103
+ return [-1, -1] if values.size == 1
104
+ values.each_index { |i| thresholds.push((values[i]+(values[i+1].nil? ? values[i] : values[i+1])).to_f / 2) }
105
+ thresholds.pop
106
+ #thresholds -= used[attribute] if used.has_key? attribute
107
+
108
+ gain = thresholds.collect { |threshold|
109
+ sp = data.partition { |d| d[attributes.index(attribute)] >= threshold }
110
+ pos = (sp[0].size).to_f / data.size
111
+ neg = (sp[1].size).to_f / data.size
112
+
113
+ [data.classification.entropy - pos*sp[0].classification.entropy - neg*sp[1].classification.entropy, threshold]
114
+ }.max { |a,b| a[0] <=> b[0] }
115
+
116
+ return [-1, -1] if gain.size == 0
117
+ gain
118
+ end
119
+
120
+ # ID3 for discrete label cases
121
+ def id3_discrete(data, attributes, attribute)
122
+ values = data.collect { |d| d[attributes.index(attribute)] }.uniq.sort
123
+ partitions = values.collect { |val| data.select { |d| d[attributes.index(attribute)] == val } }
124
+ remainder = partitions.collect {|p| (p.size.to_f / data.size) * p.classification.entropy}.inject(0) {|i,s| s+=i }
125
+
126
+ [data.classification.entropy - remainder, attributes.index(attribute)]
127
+ end
128
+
129
+ def predict(test)
130
+ descend(@tree, test)
131
+ end
132
+
133
+ def graph(filename)
134
+ dgp = DotGraphPrinter.new(build_tree)
135
+ dgp.write_to_file("#{filename}.png", "png")
136
+ end
137
+
138
+ def ruleset
139
+ rs = Ruleset.new(@attributes, @data, @default, @type)
140
+ rs.rules = build_rules
141
+ rs
142
+ end
143
+
144
+ def build_rules(tree=@tree)
145
+ attr = tree.to_a.first
146
+ cases = attr[1].to_a
147
+ rules = []
148
+ cases.each do |c,child|
149
+ if child.is_a?(Hash) then
150
+ build_rules(child).each do |r|
151
+ r2 = r.clone
152
+ r2.premises.unshift([attr.first, c])
153
+ rules << r2
154
+ end
155
+ else
156
+ rules << Rule.new(@attributes, [[attr.first, c]], child)
157
+ end
158
+ end
159
+ rules
160
+ end
161
+
162
+ private
163
+ def descend(tree, test)
164
+ attr = tree.to_a.first
165
+ return @default if !attr
166
+ if type(attr.first.attribute) == :continuous
167
+ return attr[1]['>='] if !attr[1]['>='].is_a?(Hash) and test[@attributes.index(attr.first.attribute)] >= attr.first.threshold
168
+ return attr[1]['<'] if !attr[1]['<'].is_a?(Hash) and test[@attributes.index(attr.first.attribute)] < attr.first.threshold
169
+ return descend(attr[1]['>='],test) if test[@attributes.index(attr.first.attribute)] >= attr.first.threshold
170
+ return descend(attr[1]['<'],test) if test[@attributes.index(attr.first.attribute)] < attr.first.threshold
171
+ else
172
+ return attr[1][test[@attributes.index(attr[0].attribute)]] if !attr[1][test[@attributes.index(attr[0].attribute)]].is_a?(Hash)
173
+ return descend(attr[1][test[@attributes.index(attr[0].attribute)]],test)
174
+ end
175
+ end
176
+
177
+ def build_tree(tree = @tree)
178
+ return [] unless tree.is_a?(Hash)
179
+ return [["Always", @default]] if tree.empty?
180
+
181
+ attr = tree.to_a.first
182
+
183
+ links = attr[1].keys.collect do |key|
184
+ parent_text = "#{attr[0].attribute}\n(#{attr[0].object_id})"
185
+ if attr[1][key].is_a?(Hash) then
186
+ child = attr[1][key].to_a.first[0]
187
+ child_text = "#{child.attribute}\n(#{child.object_id})"
188
+ else
189
+ child = attr[1][key]
190
+ child_text = "#{child}\n(#{child.to_s.clone.object_id})"
191
+ end
192
+ label_text = "#{key} #{type(attr[0].attribute) == :continuous ? attr[0].threshold : ""}"
193
+
194
+ [parent_text, child_text, label_text]
195
+ end
196
+ attr[1].keys.each { |key| links += build_tree(attr[1][key]) }
197
+
198
+ return links
199
+ end
200
+ end
201
+
202
+ class Rule
203
+ attr_accessor :premises
204
+ attr_accessor :conclusion
205
+ attr_accessor :attributes
206
+
207
+ def initialize(attributes,premises=[],conclusion=nil)
208
+ @attributes, @premises, @conclusion = attributes, premises, conclusion
209
+ end
210
+
211
+ def to_s
212
+ str = ''
213
+ @premises.each do |p|
214
+ str += "#{p.first.attribute} #{p.last} #{p.first.threshold}" if p.first.threshold
215
+ str += "#{p.first.attribute} = #{p.last}" if !p.first.threshold
216
+ str += "\n"
217
+ end
218
+ str += "=> #{@conclusion} (#{accuracy})"
219
+ end
220
+
221
+ def predict(test)
222
+ verifies = true;
223
+ @premises.each do |p|
224
+ if p.first.threshold then # Continuous
225
+ if !(p.last == '>=' && test[@attributes.index(p.first.attribute)] >= p.first.threshold) && !(p.last == '<' && test[@attributes.index(p.first.attribute)] < p.first.threshold) then
226
+ verifies = false; break
227
+ end
228
+ else # Discrete
229
+ if test[@attributes.index(p.first.attribute)] != p.last then
230
+ verifies = false; break
231
+ end
232
+ end
233
+ end
234
+ return @conclusion if verifies
235
+ return nil
236
+ end
237
+
238
+ def get_accuracy(data)
239
+ correct = 0; total = 0
240
+ data.each do |d|
241
+ prediction = predict(d)
242
+ correct += 1 if d.last == prediction
243
+ total += 1 if !prediction.nil?
244
+ end
245
+ (correct.to_f + 1) / (total.to_f + 2)
246
+ end
247
+
248
+ def accuracy(data=nil)
249
+ data.nil? ? @accuracy : @accuracy = get_accuracy(data)
250
+ end
251
+ end
252
+
253
+ class Ruleset
254
+ attr_accessor :rules
255
+
256
+ def initialize(attributes, data, default, type)
257
+ @attributes, @default, @type = attributes, default, type
258
+ mixed_data = data.sort_by {rand}
259
+ cut = (mixed_data.size.to_f * 0.67).to_i
260
+ @train_data = mixed_data.slice(0..cut-1)
261
+ @prune_data = mixed_data.slice(cut..-1)
262
+ end
263
+
264
+ def train(train_data=@train_data, attributes=@attributes, default=@default)
265
+ dec_tree = DecisionTree::ID3Tree.new(attributes, train_data, default, @type)
266
+ dec_tree.train
267
+ @rules = dec_tree.build_rules
268
+ @rules.each { |r| r.accuracy(train_data) } # Calculate accuracy
269
+ prune
270
+ end
271
+
272
+ def prune(data=@prune_data)
273
+ @rules.each do |r|
274
+ (1..r.premises.size).each do
275
+ acc1 = r.accuracy(data)
276
+ p = r.premises.pop
277
+ if acc1 > r.get_accuracy(data) then
278
+ r.premises.push(p); break
279
+ end
280
+ end
281
+ end
282
+ @rules = @rules.sort_by{|r| -r.accuracy(data)}
283
+ end
284
+
285
+ def to_s
286
+ str = ''; @rules.each { |rule| str += "#{rule}\n\n" }
287
+ str
288
+ end
289
+
290
+ def predict(test)
291
+ @rules.each do |r|
292
+ prediction = r.predict(test)
293
+ return prediction, r.accuracy unless prediction.nil?
294
+ end
295
+ return @default, 0.0
296
+ end
297
+ end
298
+
299
+ class Bagging
300
+ attr_accessor :classifiers
301
+ def initialize(attributes, data, default, type)
302
+ @classifiers, @type = [], type
303
+ @data, @attributes, @default = data, attributes, default
304
+ end
305
+
306
+ def train(data=@data, attributes=@attributes, default=@default)
307
+ @classifiers = []
308
+ 10.times { @classifiers << Ruleset.new(attributes, data, default, @type) }
309
+ @classifiers.each do |c|
310
+ c.train(data, attributes, default)
311
+ end
312
+ end
313
+
314
+ def predict(test)
315
+ predictions = Hash.new(0)
316
+ @classifiers.each do |c|
317
+ p, accuracy = c.predict(test)
318
+ predictions[p] += accuracy unless p.nil?
319
+ end
320
+ return @default, 0.0 if predictions.empty?
321
+ winner = predictions.sort_by {|k,v| -v}.first
322
+ return winner[0], winner[1].to_f / @classifiers.size.to_f
323
+ end
324
+ end
325
+ end
@@ -0,0 +1,92 @@
1
+ require 'spec_helper'
2
+
3
+ describe describe DecisionTree::ID3Tree do
4
+
5
+ describe "simple discrete case" do
6
+ Given(:labels) { ["sun", "rain"]}
7
+ Given(:data) do
8
+ [
9
+ [1,0,1],
10
+ [0,1,0]
11
+ ]
12
+ end
13
+ Given(:tree) { DecisionTree::ID3Tree.new(labels, data, 1, :discrete) }
14
+ When { tree.train }
15
+ Then { tree.predict([1,0]).should == 1 }
16
+ Then { tree.predict([0,1]).should == 0 }
17
+ end
18
+
19
+ describe "discrete attributes" do
20
+ Given(:labels) { ["hungry", "color"] }
21
+ Given(:data) do
22
+ [
23
+ ["yes", "red", "angry"],
24
+ ["no", "blue", "not angry"],
25
+ ["yes", "blue", "not angry"],
26
+ ["no", "red", "not angry"]
27
+ ]
28
+ end
29
+ Given(:tree) { DecisionTree::ID3Tree.new(labels, data, "not angry", :discrete) }
30
+ When { tree.train }
31
+ Then { tree.predict(["yes", "red"]).should == "angry" }
32
+ Then { tree.predict(["no", "red"]).should == "not angry" }
33
+ end
34
+
35
+ describe "discrete attributes" do
36
+ Given(:labels) { ["hunger", "happiness"] }
37
+ Given(:data) do
38
+ [
39
+ [8, 7, "angry"],
40
+ [6, 7, "angry"],
41
+ [7, 9, "angry"],
42
+ [7, 1, "not angry"],
43
+ [2, 9, "not angry"],
44
+ [3, 2, "not angry"],
45
+ [2, 3, "not angry"],
46
+ [1, 4, "not angry"]
47
+ ]
48
+ end
49
+ Given(:tree) { DecisionTree::ID3Tree.new(labels, data, "not angry", :continuous) }
50
+ When { tree.train }
51
+ Then { tree.graph("continuous") }
52
+ Then { tree.predict([7, 7]).should == "angry" }
53
+ Then { tree.predict([2, 3]).should == "not angry" }
54
+ end
55
+
56
+ describe "a mixture" do
57
+ Given(:labels) { ["hunger", "color"] }
58
+ Given(:data) do
59
+ [
60
+ [8, "red", "angry"],
61
+ [6, "red", "angry"],
62
+ [7, "red", "angry"],
63
+ [7, "blue", "not angry"],
64
+ [2, "red", "not angry"],
65
+ [3, "blue", "not angry"],
66
+ [2, "blue", "not angry"],
67
+ [1, "red", "not angry"]
68
+ ]
69
+ end
70
+ Given(:tree) { DecisionTree::ID3Tree.new(labels, data, "not angry", color: :discrete, hunger: :continuous) }
71
+ When { tree.train }
72
+ Then { tree.graph("continuous") }
73
+ Then { tree.predict([7, "red"]).should == "angry" }
74
+ Then { tree.predict([2, "blue"]).should == "not angry" }
75
+ end
76
+
77
+ describe "infinite recursion case" do
78
+ Given(:labels) { [:a, :b, :c] }
79
+ Given(:data) do
80
+ [
81
+ ["a1", "b0", "c0", "RED"],
82
+ ["a1", "b1", "c1", "RED"],
83
+ ["a1", "b1", "c0", "BLUE"],
84
+ ["a1", "b0", "c1", "BLUE"]
85
+ ]
86
+ end
87
+ Given(:tree) { DecisionTree::ID3Tree.new(labels, data, "RED", :discrete) }
88
+ When { tree.train }
89
+ Then { tree.predict(["a1","b0","c0"]).should == "RED" }
90
+ end
91
+
92
+ end
@@ -0,0 +1,3 @@
1
+ require 'rspec/given'
2
+ require 'decisiontree'
3
+ require 'pry'
metadata ADDED
@@ -0,0 +1,125 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: decisiontree_n
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.4.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ilya Grigorik
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-09-03 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: graphr
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec-given
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: pry
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ description: ID3-based implementation of the M.L. Decision Tree algorithm
79
+ email:
80
+ - ilya@igvita.com
81
+ executables: []
82
+ extensions: []
83
+ extra_rdoc_files: []
84
+ files:
85
+ - .gitignore
86
+ - Gemfile
87
+ - README.md
88
+ - Rakefile
89
+ - decisiontree.gemspec
90
+ - examples/continuous-id3.rb
91
+ - examples/data/continuous-test.txt
92
+ - examples/data/continuous-training.txt
93
+ - examples/data/discrete-test.txt
94
+ - examples/data/discrete-training.txt
95
+ - examples/discrete-id3.rb
96
+ - examples/simple.rb
97
+ - lib/decisiontree.rb
98
+ - lib/decisiontree/id3_tree.rb
99
+ - spec/id3_spec.rb
100
+ - spec/spec_helper.rb
101
+ homepage: https://github.com/igrigorik/decisiontree
102
+ licenses: []
103
+ post_install_message:
104
+ rdoc_options: []
105
+ require_paths:
106
+ - lib
107
+ required_ruby_version: !ruby/object:Gem::Requirement
108
+ none: false
109
+ requirements:
110
+ - - ! '>='
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ required_rubygems_version: !ruby/object:Gem::Requirement
114
+ none: false
115
+ requirements:
116
+ - - ! '>='
117
+ - !ruby/object:Gem::Version
118
+ version: '0'
119
+ requirements: []
120
+ rubyforge_project: decisiontree
121
+ rubygems_version: 1.8.25
122
+ signing_key:
123
+ specification_version: 3
124
+ summary: ID3-based implementation of the M.L. Decision Tree algorithm
125
+ test_files: []