decisiontree 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +19 -0
- data/decisiontree.gemspec +2 -1
- data/lib/decisiontree.rb +1 -1
- data/lib/decisiontree/id3_tree.rb +10 -5
- data/spec/id3_spec.rb +50 -2
- data/spec/spec_helper.rb +2 -0
- metadata +17 -27
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: dc0e08692fc5531e6d19f5b974777cd48170b454
|
4
|
+
data.tar.gz: 127248c9a9ecc65a689d1fc617db6d1aab7fdec4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0b245fb099112983f9d88ddfe8256a36b2b247a3ea66a2f150fcbdf68ac306b09c808a54d1dea67be19904b302cd455454080fd1ae453633aff131779e04ccdd
|
7
|
+
data.tar.gz: 0a389c5fdf6a9b2ef16e3727a9fe2bd10161142baf3ee46d0aba49be132233b7479c7856d8a8b09ebd5391421001f79dde2dc63172666cf7bcd00d069b7a3518
|
data/README.md
CHANGED
@@ -41,6 +41,25 @@ decision = dec_tree.predict([37, 'sick'])
|
|
41
41
|
puts "Predicted: #{decision} ... True decision: #{test.last}";
|
42
42
|
|
43
43
|
# => Predicted: sick ... True decision: sick
|
44
|
+
|
45
|
+
# Specify type ("discrete" or "continuous") in the training data
|
46
|
+
labels = ["hunger", "color"]
|
47
|
+
training = [
|
48
|
+
[8, "red", "angry"],
|
49
|
+
[6, "red", "angry"],
|
50
|
+
[7, "red", "angry"],
|
51
|
+
[7, "blue", "not angry"],
|
52
|
+
[2, "red", "not angry"],
|
53
|
+
[3, "blue", "not angry"],
|
54
|
+
[2, "blue", "not angry"],
|
55
|
+
[1, "red", "not angry"]
|
56
|
+
]
|
57
|
+
|
58
|
+
dec_tree = DecisionTree::ID3Tree.new(labels, data, "not angry", color: :discrete, hunger: :continuous)
|
59
|
+
dec_tree.train
|
60
|
+
|
61
|
+
decision = dec_tree.predict([7, "red"])
|
62
|
+
puts "Predicted: #{decision} ... True decision: #{test.last}";
|
44
63
|
```
|
45
64
|
|
46
65
|
## License
|
data/decisiontree.gemspec
CHANGED
@@ -3,13 +3,14 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "decisiontree"
|
6
|
-
s.version = "0.
|
6
|
+
s.version = "0.5.0"
|
7
7
|
s.platform = Gem::Platform::RUBY
|
8
8
|
s.authors = ["Ilya Grigorik"]
|
9
9
|
s.email = ["ilya@igvita.com"]
|
10
10
|
s.homepage = "https://github.com/igrigorik/decisiontree"
|
11
11
|
s.summary = %q{ID3-based implementation of the M.L. Decision Tree algorithm}
|
12
12
|
s.description = s.summary
|
13
|
+
s.license = "MIT"
|
13
14
|
|
14
15
|
s.rubyforge_project = "decisiontree"
|
15
16
|
|
data/lib/decisiontree.rb
CHANGED
@@ -1 +1 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/decisiontree/id3_tree.rb'
|
1
|
+
require File.dirname(__FILE__) + '/decisiontree/id3_tree.rb'
|
@@ -3,8 +3,6 @@
|
|
3
3
|
### Copyright (c) 2007 Ilya Grigorik <ilya AT igvita DOT com>
|
4
4
|
### Modifed at 2007 by José Ignacio Fernández <joseignacio.fernandez AT gmail DOT com>
|
5
5
|
|
6
|
-
require 'graphr'
|
7
|
-
|
8
6
|
class Object
|
9
7
|
def save_to_file(filename)
|
10
8
|
File.open(filename, 'w+' ) { |f| f << Marshal.dump(self) }
|
@@ -44,6 +42,7 @@ module DecisionTree
|
|
44
42
|
end
|
45
43
|
|
46
44
|
def train(data=@data, attributes=@attributes, default=@default)
|
45
|
+
attributes = attributes.map {|e| e.to_s}
|
47
46
|
initialize(attributes, data, default, @type)
|
48
47
|
|
49
48
|
# Remove samples with same attributes leaving most common classification
|
@@ -69,9 +68,14 @@ module DecisionTree
|
|
69
68
|
# return classification if all examples have the same classification
|
70
69
|
return data.first.last if data.classification.uniq.size == 1
|
71
70
|
|
72
|
-
# Choose best attribute
|
71
|
+
# Choose best attribute:
|
72
|
+
# 1. enumerate all attributes
|
73
|
+
# 2. Pick best attribute
|
74
|
+
# 3. If attributes all score the same, then pick a random one to avoid infinite recursion.
|
73
75
|
performance = attributes.collect { |attribute| fitness_for(attribute).call(data, attributes, attribute) }
|
74
76
|
max = performance.max { |a,b| a[0] <=> b[0] }
|
77
|
+
min = performance.min { |a,b| a[0] <=> b[0] }
|
78
|
+
max = performance.shuffle.first if max[0] == min[0]
|
75
79
|
best = Node.new(attributes[performance.index(max)], max[1], max[0])
|
76
80
|
best.threshold = nil if @type == :discrete
|
77
81
|
@used.has_key?(best.attribute) ? @used[best.attribute] += [best.threshold] : @used[best.attribute] = [best.threshold]
|
@@ -127,9 +131,10 @@ module DecisionTree
|
|
127
131
|
descend(@tree, test)
|
128
132
|
end
|
129
133
|
|
130
|
-
def graph(filename)
|
134
|
+
def graph(filename, file_type = "png")
|
135
|
+
require 'graphr'
|
131
136
|
dgp = DotGraphPrinter.new(build_tree)
|
132
|
-
dgp.write_to_file("#{filename}
|
137
|
+
dgp.write_to_file("#{filename}.#{file_type}", file_type)
|
133
138
|
end
|
134
139
|
|
135
140
|
def ruleset
|
data/spec/id3_spec.rb
CHANGED
@@ -48,7 +48,6 @@ describe describe DecisionTree::ID3Tree do
|
|
48
48
|
end
|
49
49
|
Given(:tree) { DecisionTree::ID3Tree.new(labels, data, "not angry", :continuous) }
|
50
50
|
When { tree.train }
|
51
|
-
Then { tree.graph("continuous") }
|
52
51
|
Then { tree.predict([7, 7]).should == "angry" }
|
53
52
|
Then { tree.predict([2, 3]).should == "not angry" }
|
54
53
|
end
|
@@ -69,9 +68,58 @@ describe describe DecisionTree::ID3Tree do
|
|
69
68
|
end
|
70
69
|
Given(:tree) { DecisionTree::ID3Tree.new(labels, data, "not angry", color: :discrete, hunger: :continuous) }
|
71
70
|
When { tree.train }
|
72
|
-
Then { tree.graph("continuous") }
|
73
71
|
Then { tree.predict([7, "red"]).should == "angry" }
|
74
72
|
Then { tree.predict([2, "blue"]).should == "not angry" }
|
75
73
|
end
|
76
74
|
|
75
|
+
describe "infinite recursion case" do
|
76
|
+
Given(:labels) { [:a, :b, :c] }
|
77
|
+
Given(:data) do
|
78
|
+
[
|
79
|
+
["a1", "b0", "c0", "RED"],
|
80
|
+
["a1", "b1", "c1", "RED"],
|
81
|
+
["a1", "b1", "c0", "BLUE"],
|
82
|
+
["a1", "b0", "c1", "BLUE"]
|
83
|
+
]
|
84
|
+
end
|
85
|
+
Given(:tree) { DecisionTree::ID3Tree.new(labels, data, "RED", :discrete) }
|
86
|
+
When { tree.train }
|
87
|
+
Then { tree.predict(["a1","b0","c0"]).should == "RED" }
|
88
|
+
end
|
89
|
+
|
90
|
+
describe "numerical labels case" do
|
91
|
+
Given(:labels) { [1, 2] }
|
92
|
+
Given(:data) do
|
93
|
+
[
|
94
|
+
[1, 1, true],
|
95
|
+
[1, 2, false],
|
96
|
+
[2, 1, false],
|
97
|
+
[2, 2, true]
|
98
|
+
]
|
99
|
+
end
|
100
|
+
Given(:tree) { DecisionTree::ID3Tree.new labels, data, nil, :discrete }
|
101
|
+
When { tree.train }
|
102
|
+
Then {
|
103
|
+
lambda { tree.predict([1, 1]) }.should_not raise_error
|
104
|
+
}
|
105
|
+
end
|
106
|
+
|
107
|
+
describe "create a figure" do
|
108
|
+
after(:all) do
|
109
|
+
File.delete("#{FIGURE_FILENAME}.png") if File.file?("#{FIGURE_FILENAME}.png")
|
110
|
+
end
|
111
|
+
|
112
|
+
Given(:labels) { ["sun", "rain"]}
|
113
|
+
Given(:data) do
|
114
|
+
[
|
115
|
+
[1,0,1],
|
116
|
+
[0,1,0]
|
117
|
+
]
|
118
|
+
end
|
119
|
+
Given(:tree) { DecisionTree::ID3Tree.new(labels, data, 1, :discrete) }
|
120
|
+
When { tree.train }
|
121
|
+
When(:result) { tree.graph(FIGURE_FILENAME) }
|
122
|
+
Then { expect(result).to_not have_failed }
|
123
|
+
And { File.file?("#{FIGURE_FILENAME}.png") }
|
124
|
+
end
|
77
125
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,78 +1,69 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: decisiontree
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.5.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Ilya Grigorik
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-09-19 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: graphr
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '0'
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - '>='
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '0'
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: rspec
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - '>='
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: '0'
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - '>='
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: '0'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: rspec-given
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
|
-
- -
|
45
|
+
- - '>='
|
52
46
|
- !ruby/object:Gem::Version
|
53
47
|
version: '0'
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
|
-
- -
|
52
|
+
- - '>='
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: '0'
|
62
55
|
- !ruby/object:Gem::Dependency
|
63
56
|
name: pry
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
58
|
requirements:
|
67
|
-
- -
|
59
|
+
- - '>='
|
68
60
|
- !ruby/object:Gem::Version
|
69
61
|
version: '0'
|
70
62
|
type: :development
|
71
63
|
prerelease: false
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
65
|
requirements:
|
75
|
-
- -
|
66
|
+
- - '>='
|
76
67
|
- !ruby/object:Gem::Version
|
77
68
|
version: '0'
|
78
69
|
description: ID3-based implementation of the M.L. Decision Tree algorithm
|
@@ -99,30 +90,29 @@ files:
|
|
99
90
|
- spec/id3_spec.rb
|
100
91
|
- spec/spec_helper.rb
|
101
92
|
homepage: https://github.com/igrigorik/decisiontree
|
102
|
-
licenses:
|
93
|
+
licenses:
|
94
|
+
- MIT
|
95
|
+
metadata: {}
|
103
96
|
post_install_message:
|
104
97
|
rdoc_options: []
|
105
98
|
require_paths:
|
106
99
|
- lib
|
107
100
|
required_ruby_version: !ruby/object:Gem::Requirement
|
108
|
-
none: false
|
109
101
|
requirements:
|
110
|
-
- -
|
102
|
+
- - '>='
|
111
103
|
- !ruby/object:Gem::Version
|
112
104
|
version: '0'
|
113
105
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
114
|
-
none: false
|
115
106
|
requirements:
|
116
|
-
- -
|
107
|
+
- - '>='
|
117
108
|
- !ruby/object:Gem::Version
|
118
109
|
version: '0'
|
119
110
|
requirements: []
|
120
111
|
rubyforge_project: decisiontree
|
121
|
-
rubygems_version:
|
112
|
+
rubygems_version: 2.0.14
|
122
113
|
signing_key:
|
123
|
-
specification_version:
|
114
|
+
specification_version: 4
|
124
115
|
summary: ID3-based implementation of the M.L. Decision Tree algorithm
|
125
116
|
test_files:
|
126
117
|
- spec/id3_spec.rb
|
127
118
|
- spec/spec_helper.rb
|
128
|
-
has_rdoc:
|