decisiontree 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +19 -0
- data/decisiontree.gemspec +2 -1
- data/lib/decisiontree.rb +1 -1
- data/lib/decisiontree/id3_tree.rb +10 -5
- data/spec/id3_spec.rb +50 -2
- data/spec/spec_helper.rb +2 -0
- metadata +17 -27
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: dc0e08692fc5531e6d19f5b974777cd48170b454
|
4
|
+
data.tar.gz: 127248c9a9ecc65a689d1fc617db6d1aab7fdec4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0b245fb099112983f9d88ddfe8256a36b2b247a3ea66a2f150fcbdf68ac306b09c808a54d1dea67be19904b302cd455454080fd1ae453633aff131779e04ccdd
|
7
|
+
data.tar.gz: 0a389c5fdf6a9b2ef16e3727a9fe2bd10161142baf3ee46d0aba49be132233b7479c7856d8a8b09ebd5391421001f79dde2dc63172666cf7bcd00d069b7a3518
|
data/README.md
CHANGED
@@ -41,6 +41,25 @@ decision = dec_tree.predict([37, 'sick'])
|
|
41
41
|
puts "Predicted: #{decision} ... True decision: #{test.last}";
|
42
42
|
|
43
43
|
# => Predicted: sick ... True decision: sick
|
44
|
+
|
45
|
+
# Specify type ("discrete" or "continuous") in the training data
|
46
|
+
labels = ["hunger", "color"]
|
47
|
+
training = [
|
48
|
+
[8, "red", "angry"],
|
49
|
+
[6, "red", "angry"],
|
50
|
+
[7, "red", "angry"],
|
51
|
+
[7, "blue", "not angry"],
|
52
|
+
[2, "red", "not angry"],
|
53
|
+
[3, "blue", "not angry"],
|
54
|
+
[2, "blue", "not angry"],
|
55
|
+
[1, "red", "not angry"]
|
56
|
+
]
|
57
|
+
|
58
|
+
dec_tree = DecisionTree::ID3Tree.new(labels, data, "not angry", color: :discrete, hunger: :continuous)
|
59
|
+
dec_tree.train
|
60
|
+
|
61
|
+
decision = dec_tree.predict([7, "red"])
|
62
|
+
puts "Predicted: #{decision} ... True decision: #{test.last}";
|
44
63
|
```
|
45
64
|
|
46
65
|
## License
|
data/decisiontree.gemspec
CHANGED
@@ -3,13 +3,14 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "decisiontree"
|
6
|
-
s.version = "0.
|
6
|
+
s.version = "0.5.0"
|
7
7
|
s.platform = Gem::Platform::RUBY
|
8
8
|
s.authors = ["Ilya Grigorik"]
|
9
9
|
s.email = ["ilya@igvita.com"]
|
10
10
|
s.homepage = "https://github.com/igrigorik/decisiontree"
|
11
11
|
s.summary = %q{ID3-based implementation of the M.L. Decision Tree algorithm}
|
12
12
|
s.description = s.summary
|
13
|
+
s.license = "MIT"
|
13
14
|
|
14
15
|
s.rubyforge_project = "decisiontree"
|
15
16
|
|
data/lib/decisiontree.rb
CHANGED
@@ -1 +1 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/decisiontree/id3_tree.rb'
|
1
|
+
require File.dirname(__FILE__) + '/decisiontree/id3_tree.rb'
|
@@ -3,8 +3,6 @@
|
|
3
3
|
### Copyright (c) 2007 Ilya Grigorik <ilya AT igvita DOT com>
|
4
4
|
### Modifed at 2007 by José Ignacio Fernández <joseignacio.fernandez AT gmail DOT com>
|
5
5
|
|
6
|
-
require 'graphr'
|
7
|
-
|
8
6
|
class Object
|
9
7
|
def save_to_file(filename)
|
10
8
|
File.open(filename, 'w+' ) { |f| f << Marshal.dump(self) }
|
@@ -44,6 +42,7 @@ module DecisionTree
|
|
44
42
|
end
|
45
43
|
|
46
44
|
def train(data=@data, attributes=@attributes, default=@default)
|
45
|
+
attributes = attributes.map {|e| e.to_s}
|
47
46
|
initialize(attributes, data, default, @type)
|
48
47
|
|
49
48
|
# Remove samples with same attributes leaving most common classification
|
@@ -69,9 +68,14 @@ module DecisionTree
|
|
69
68
|
# return classification if all examples have the same classification
|
70
69
|
return data.first.last if data.classification.uniq.size == 1
|
71
70
|
|
72
|
-
# Choose best attribute
|
71
|
+
# Choose best attribute:
|
72
|
+
# 1. enumerate all attributes
|
73
|
+
# 2. Pick best attribute
|
74
|
+
# 3. If attributes all score the same, then pick a random one to avoid infinite recursion.
|
73
75
|
performance = attributes.collect { |attribute| fitness_for(attribute).call(data, attributes, attribute) }
|
74
76
|
max = performance.max { |a,b| a[0] <=> b[0] }
|
77
|
+
min = performance.min { |a,b| a[0] <=> b[0] }
|
78
|
+
max = performance.shuffle.first if max[0] == min[0]
|
75
79
|
best = Node.new(attributes[performance.index(max)], max[1], max[0])
|
76
80
|
best.threshold = nil if @type == :discrete
|
77
81
|
@used.has_key?(best.attribute) ? @used[best.attribute] += [best.threshold] : @used[best.attribute] = [best.threshold]
|
@@ -127,9 +131,10 @@ module DecisionTree
|
|
127
131
|
descend(@tree, test)
|
128
132
|
end
|
129
133
|
|
130
|
-
def graph(filename)
|
134
|
+
def graph(filename, file_type = "png")
|
135
|
+
require 'graphr'
|
131
136
|
dgp = DotGraphPrinter.new(build_tree)
|
132
|
-
dgp.write_to_file("#{filename}
|
137
|
+
dgp.write_to_file("#{filename}.#{file_type}", file_type)
|
133
138
|
end
|
134
139
|
|
135
140
|
def ruleset
|
data/spec/id3_spec.rb
CHANGED
@@ -48,7 +48,6 @@ describe describe DecisionTree::ID3Tree do
|
|
48
48
|
end
|
49
49
|
Given(:tree) { DecisionTree::ID3Tree.new(labels, data, "not angry", :continuous) }
|
50
50
|
When { tree.train }
|
51
|
-
Then { tree.graph("continuous") }
|
52
51
|
Then { tree.predict([7, 7]).should == "angry" }
|
53
52
|
Then { tree.predict([2, 3]).should == "not angry" }
|
54
53
|
end
|
@@ -69,9 +68,58 @@ describe describe DecisionTree::ID3Tree do
|
|
69
68
|
end
|
70
69
|
Given(:tree) { DecisionTree::ID3Tree.new(labels, data, "not angry", color: :discrete, hunger: :continuous) }
|
71
70
|
When { tree.train }
|
72
|
-
Then { tree.graph("continuous") }
|
73
71
|
Then { tree.predict([7, "red"]).should == "angry" }
|
74
72
|
Then { tree.predict([2, "blue"]).should == "not angry" }
|
75
73
|
end
|
76
74
|
|
75
|
+
describe "infinite recursion case" do
|
76
|
+
Given(:labels) { [:a, :b, :c] }
|
77
|
+
Given(:data) do
|
78
|
+
[
|
79
|
+
["a1", "b0", "c0", "RED"],
|
80
|
+
["a1", "b1", "c1", "RED"],
|
81
|
+
["a1", "b1", "c0", "BLUE"],
|
82
|
+
["a1", "b0", "c1", "BLUE"]
|
83
|
+
]
|
84
|
+
end
|
85
|
+
Given(:tree) { DecisionTree::ID3Tree.new(labels, data, "RED", :discrete) }
|
86
|
+
When { tree.train }
|
87
|
+
Then { tree.predict(["a1","b0","c0"]).should == "RED" }
|
88
|
+
end
|
89
|
+
|
90
|
+
describe "numerical labels case" do
|
91
|
+
Given(:labels) { [1, 2] }
|
92
|
+
Given(:data) do
|
93
|
+
[
|
94
|
+
[1, 1, true],
|
95
|
+
[1, 2, false],
|
96
|
+
[2, 1, false],
|
97
|
+
[2, 2, true]
|
98
|
+
]
|
99
|
+
end
|
100
|
+
Given(:tree) { DecisionTree::ID3Tree.new labels, data, nil, :discrete }
|
101
|
+
When { tree.train }
|
102
|
+
Then {
|
103
|
+
lambda { tree.predict([1, 1]) }.should_not raise_error
|
104
|
+
}
|
105
|
+
end
|
106
|
+
|
107
|
+
describe "create a figure" do
|
108
|
+
after(:all) do
|
109
|
+
File.delete("#{FIGURE_FILENAME}.png") if File.file?("#{FIGURE_FILENAME}.png")
|
110
|
+
end
|
111
|
+
|
112
|
+
Given(:labels) { ["sun", "rain"]}
|
113
|
+
Given(:data) do
|
114
|
+
[
|
115
|
+
[1,0,1],
|
116
|
+
[0,1,0]
|
117
|
+
]
|
118
|
+
end
|
119
|
+
Given(:tree) { DecisionTree::ID3Tree.new(labels, data, 1, :discrete) }
|
120
|
+
When { tree.train }
|
121
|
+
When(:result) { tree.graph(FIGURE_FILENAME) }
|
122
|
+
Then { expect(result).to_not have_failed }
|
123
|
+
And { File.file?("#{FIGURE_FILENAME}.png") }
|
124
|
+
end
|
77
125
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,78 +1,69 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: decisiontree
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.5.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Ilya Grigorik
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-09-19 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: graphr
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '0'
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - '>='
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '0'
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: rspec
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - '>='
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: '0'
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - '>='
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: '0'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: rspec-given
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
|
-
- -
|
45
|
+
- - '>='
|
52
46
|
- !ruby/object:Gem::Version
|
53
47
|
version: '0'
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
|
-
- -
|
52
|
+
- - '>='
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: '0'
|
62
55
|
- !ruby/object:Gem::Dependency
|
63
56
|
name: pry
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
58
|
requirements:
|
67
|
-
- -
|
59
|
+
- - '>='
|
68
60
|
- !ruby/object:Gem::Version
|
69
61
|
version: '0'
|
70
62
|
type: :development
|
71
63
|
prerelease: false
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
65
|
requirements:
|
75
|
-
- -
|
66
|
+
- - '>='
|
76
67
|
- !ruby/object:Gem::Version
|
77
68
|
version: '0'
|
78
69
|
description: ID3-based implementation of the M.L. Decision Tree algorithm
|
@@ -99,30 +90,29 @@ files:
|
|
99
90
|
- spec/id3_spec.rb
|
100
91
|
- spec/spec_helper.rb
|
101
92
|
homepage: https://github.com/igrigorik/decisiontree
|
102
|
-
licenses:
|
93
|
+
licenses:
|
94
|
+
- MIT
|
95
|
+
metadata: {}
|
103
96
|
post_install_message:
|
104
97
|
rdoc_options: []
|
105
98
|
require_paths:
|
106
99
|
- lib
|
107
100
|
required_ruby_version: !ruby/object:Gem::Requirement
|
108
|
-
none: false
|
109
101
|
requirements:
|
110
|
-
- -
|
102
|
+
- - '>='
|
111
103
|
- !ruby/object:Gem::Version
|
112
104
|
version: '0'
|
113
105
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
114
|
-
none: false
|
115
106
|
requirements:
|
116
|
-
- -
|
107
|
+
- - '>='
|
117
108
|
- !ruby/object:Gem::Version
|
118
109
|
version: '0'
|
119
110
|
requirements: []
|
120
111
|
rubyforge_project: decisiontree
|
121
|
-
rubygems_version:
|
112
|
+
rubygems_version: 2.0.14
|
122
113
|
signing_key:
|
123
|
-
specification_version:
|
114
|
+
specification_version: 4
|
124
115
|
summary: ID3-based implementation of the M.L. Decision Tree algorithm
|
125
116
|
test_files:
|
126
117
|
- spec/id3_spec.rb
|
127
118
|
- spec/spec_helper.rb
|
128
|
-
has_rdoc:
|