decisiontree 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: dc0e08692fc5531e6d19f5b974777cd48170b454
4
+ data.tar.gz: 127248c9a9ecc65a689d1fc617db6d1aab7fdec4
5
+ SHA512:
6
+ metadata.gz: 0b245fb099112983f9d88ddfe8256a36b2b247a3ea66a2f150fcbdf68ac306b09c808a54d1dea67be19904b302cd455454080fd1ae453633aff131779e04ccdd
7
+ data.tar.gz: 0a389c5fdf6a9b2ef16e3727a9fe2bd10161142baf3ee46d0aba49be132233b7479c7856d8a8b09ebd5391421001f79dde2dc63172666cf7bcd00d069b7a3518
data/README.md CHANGED
@@ -41,6 +41,25 @@ decision = dec_tree.predict([37, 'sick'])
41
41
  puts "Predicted: #{decision} ... True decision: #{test.last}";
42
42
 
43
43
  # => Predicted: sick ... True decision: sick
44
+
45
+ # Specify type ("discrete" or "continuous") in the training data
46
+ labels = ["hunger", "color"]
47
+ training = [
48
+ [8, "red", "angry"],
49
+ [6, "red", "angry"],
50
+ [7, "red", "angry"],
51
+ [7, "blue", "not angry"],
52
+ [2, "red", "not angry"],
53
+ [3, "blue", "not angry"],
54
+ [2, "blue", "not angry"],
55
+ [1, "red", "not angry"]
56
+ ]
57
+
58
+ dec_tree = DecisionTree::ID3Tree.new(labels, data, "not angry", color: :discrete, hunger: :continuous)
59
+ dec_tree.train
60
+
61
+ decision = dec_tree.predict([7, "red"])
62
+ puts "Predicted: #{decision} ... True decision: #{test.last}";
44
63
  ```
45
64
 
46
65
  ## License
@@ -3,13 +3,14 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "decisiontree"
6
- s.version = "0.4.0"
6
+ s.version = "0.5.0"
7
7
  s.platform = Gem::Platform::RUBY
8
8
  s.authors = ["Ilya Grigorik"]
9
9
  s.email = ["ilya@igvita.com"]
10
10
  s.homepage = "https://github.com/igrigorik/decisiontree"
11
11
  s.summary = %q{ID3-based implementation of the M.L. Decision Tree algorithm}
12
12
  s.description = s.summary
13
+ s.license = "MIT"
13
14
 
14
15
  s.rubyforge_project = "decisiontree"
15
16
 
@@ -1 +1 @@
1
- require File.dirname(__FILE__) + '/decisiontree/id3_tree.rb'
1
+ require File.dirname(__FILE__) + '/decisiontree/id3_tree.rb'
@@ -3,8 +3,6 @@
3
3
  ### Copyright (c) 2007 Ilya Grigorik <ilya AT igvita DOT com>
4
4
  ### Modifed at 2007 by José Ignacio Fernández <joseignacio.fernandez AT gmail DOT com>
5
5
 
6
- require 'graphr'
7
-
8
6
  class Object
9
7
  def save_to_file(filename)
10
8
  File.open(filename, 'w+' ) { |f| f << Marshal.dump(self) }
@@ -44,6 +42,7 @@ module DecisionTree
44
42
  end
45
43
 
46
44
  def train(data=@data, attributes=@attributes, default=@default)
45
+ attributes = attributes.map {|e| e.to_s}
47
46
  initialize(attributes, data, default, @type)
48
47
 
49
48
  # Remove samples with same attributes leaving most common classification
@@ -69,9 +68,14 @@ module DecisionTree
69
68
  # return classification if all examples have the same classification
70
69
  return data.first.last if data.classification.uniq.size == 1
71
70
 
72
- # Choose best attribute (1. enumerate all attributes / 2. Pick best attribute)
71
+ # Choose best attribute:
72
+ # 1. enumerate all attributes
73
+ # 2. Pick best attribute
74
+ # 3. If attributes all score the same, then pick a random one to avoid infinite recursion.
73
75
  performance = attributes.collect { |attribute| fitness_for(attribute).call(data, attributes, attribute) }
74
76
  max = performance.max { |a,b| a[0] <=> b[0] }
77
+ min = performance.min { |a,b| a[0] <=> b[0] }
78
+ max = performance.shuffle.first if max[0] == min[0]
75
79
  best = Node.new(attributes[performance.index(max)], max[1], max[0])
76
80
  best.threshold = nil if @type == :discrete
77
81
  @used.has_key?(best.attribute) ? @used[best.attribute] += [best.threshold] : @used[best.attribute] = [best.threshold]
@@ -127,9 +131,10 @@ module DecisionTree
127
131
  descend(@tree, test)
128
132
  end
129
133
 
130
- def graph(filename)
134
+ def graph(filename, file_type = "png")
135
+ require 'graphr'
131
136
  dgp = DotGraphPrinter.new(build_tree)
132
- dgp.write_to_file("#{filename}.png", "png")
137
+ dgp.write_to_file("#{filename}.#{file_type}", file_type)
133
138
  end
134
139
 
135
140
  def ruleset
@@ -48,7 +48,6 @@ describe describe DecisionTree::ID3Tree do
48
48
  end
49
49
  Given(:tree) { DecisionTree::ID3Tree.new(labels, data, "not angry", :continuous) }
50
50
  When { tree.train }
51
- Then { tree.graph("continuous") }
52
51
  Then { tree.predict([7, 7]).should == "angry" }
53
52
  Then { tree.predict([2, 3]).should == "not angry" }
54
53
  end
@@ -69,9 +68,58 @@ describe describe DecisionTree::ID3Tree do
69
68
  end
70
69
  Given(:tree) { DecisionTree::ID3Tree.new(labels, data, "not angry", color: :discrete, hunger: :continuous) }
71
70
  When { tree.train }
72
- Then { tree.graph("continuous") }
73
71
  Then { tree.predict([7, "red"]).should == "angry" }
74
72
  Then { tree.predict([2, "blue"]).should == "not angry" }
75
73
  end
76
74
 
75
+ describe "infinite recursion case" do
76
+ Given(:labels) { [:a, :b, :c] }
77
+ Given(:data) do
78
+ [
79
+ ["a1", "b0", "c0", "RED"],
80
+ ["a1", "b1", "c1", "RED"],
81
+ ["a1", "b1", "c0", "BLUE"],
82
+ ["a1", "b0", "c1", "BLUE"]
83
+ ]
84
+ end
85
+ Given(:tree) { DecisionTree::ID3Tree.new(labels, data, "RED", :discrete) }
86
+ When { tree.train }
87
+ Then { tree.predict(["a1","b0","c0"]).should == "RED" }
88
+ end
89
+
90
+ describe "numerical labels case" do
91
+ Given(:labels) { [1, 2] }
92
+ Given(:data) do
93
+ [
94
+ [1, 1, true],
95
+ [1, 2, false],
96
+ [2, 1, false],
97
+ [2, 2, true]
98
+ ]
99
+ end
100
+ Given(:tree) { DecisionTree::ID3Tree.new labels, data, nil, :discrete }
101
+ When { tree.train }
102
+ Then {
103
+ lambda { tree.predict([1, 1]) }.should_not raise_error
104
+ }
105
+ end
106
+
107
+ describe "create a figure" do
108
+ after(:all) do
109
+ File.delete("#{FIGURE_FILENAME}.png") if File.file?("#{FIGURE_FILENAME}.png")
110
+ end
111
+
112
+ Given(:labels) { ["sun", "rain"]}
113
+ Given(:data) do
114
+ [
115
+ [1,0,1],
116
+ [0,1,0]
117
+ ]
118
+ end
119
+ Given(:tree) { DecisionTree::ID3Tree.new(labels, data, 1, :discrete) }
120
+ When { tree.train }
121
+ When(:result) { tree.graph(FIGURE_FILENAME) }
122
+ Then { expect(result).to_not have_failed }
123
+ And { File.file?("#{FIGURE_FILENAME}.png") }
124
+ end
77
125
  end
@@ -1,3 +1,5 @@
1
1
  require 'rspec/given'
2
2
  require 'decisiontree'
3
3
  require 'pry'
4
+
5
+ FIGURE_FILENAME = "just_a_spec"
metadata CHANGED
@@ -1,78 +1,69 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: decisiontree
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
5
- prerelease:
4
+ version: 0.5.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Ilya Grigorik
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-10-28 00:00:00.000000000 Z
11
+ date: 2014-09-19 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: graphr
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - '>='
20
18
  - !ruby/object:Gem::Version
21
19
  version: '0'
22
20
  type: :development
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - '>='
28
25
  - !ruby/object:Gem::Version
29
26
  version: '0'
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: rspec
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
- - - ! '>='
31
+ - - '>='
36
32
  - !ruby/object:Gem::Version
37
33
  version: '0'
38
34
  type: :development
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
- - - ! '>='
38
+ - - '>='
44
39
  - !ruby/object:Gem::Version
45
40
  version: '0'
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: rspec-given
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
- - - ! '>='
45
+ - - '>='
52
46
  - !ruby/object:Gem::Version
53
47
  version: '0'
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
- - - ! '>='
52
+ - - '>='
60
53
  - !ruby/object:Gem::Version
61
54
  version: '0'
62
55
  - !ruby/object:Gem::Dependency
63
56
  name: pry
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
- - - ! '>='
59
+ - - '>='
68
60
  - !ruby/object:Gem::Version
69
61
  version: '0'
70
62
  type: :development
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
- - - ! '>='
66
+ - - '>='
76
67
  - !ruby/object:Gem::Version
77
68
  version: '0'
78
69
  description: ID3-based implementation of the M.L. Decision Tree algorithm
@@ -99,30 +90,29 @@ files:
99
90
  - spec/id3_spec.rb
100
91
  - spec/spec_helper.rb
101
92
  homepage: https://github.com/igrigorik/decisiontree
102
- licenses: []
93
+ licenses:
94
+ - MIT
95
+ metadata: {}
103
96
  post_install_message:
104
97
  rdoc_options: []
105
98
  require_paths:
106
99
  - lib
107
100
  required_ruby_version: !ruby/object:Gem::Requirement
108
- none: false
109
101
  requirements:
110
- - - ! '>='
102
+ - - '>='
111
103
  - !ruby/object:Gem::Version
112
104
  version: '0'
113
105
  required_rubygems_version: !ruby/object:Gem::Requirement
114
- none: false
115
106
  requirements:
116
- - - ! '>='
107
+ - - '>='
117
108
  - !ruby/object:Gem::Version
118
109
  version: '0'
119
110
  requirements: []
120
111
  rubyforge_project: decisiontree
121
- rubygems_version: 1.8.24
112
+ rubygems_version: 2.0.14
122
113
  signing_key:
123
- specification_version: 3
114
+ specification_version: 4
124
115
  summary: ID3-based implementation of the M.L. Decision Tree algorithm
125
116
  test_files:
126
117
  - spec/id3_spec.rb
127
118
  - spec/spec_helper.rb
128
- has_rdoc: