ai4r 1.2 → 1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +12 -25
- data/examples/decision_trees/id3_example.rb +6 -9
- data/examples/decision_trees/results.txt +2 -0
- data/examples/genetic_algorithm/genetic_algorithm_example.rb +11 -13
- data/examples/neural_network/xor_example.rb +25 -0
- data/lib/ai4r.rb +10 -0
- data/lib/ai4r/classifiers/classifier.rb +46 -0
- data/lib/ai4r/classifiers/id3.rb +27 -58
- data/lib/ai4r/classifiers/one_r.rb +19 -58
- data/lib/ai4r/classifiers/prism.rb +21 -57
- data/lib/ai4r/classifiers/zero_r.rb +16 -48
- data/lib/ai4r/clusterers/bisecting_k_means.rb +115 -0
- data/lib/ai4r/clusterers/clusterer.rb +55 -0
- data/lib/ai4r/clusterers/k_means.rb +164 -0
- data/lib/ai4r/data/data_set.rb +250 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +19 -19
- data/lib/ai4r/neural_network/backpropagation.rb +23 -24
- data/site/build/site/en/broken-links.xml +2 -0
- data/site/build/site/en/downloads.html +200 -0
- data/site/build/site/en/downloads.pdf +151 -0
- data/site/build/site/en/forum.html +197 -0
- data/site/build/site/en/forum.pdf +151 -0
- data/site/build/site/en/geneticAlgorithms.html +591 -0
- data/site/build/site/en/geneticAlgorithms.pdf +934 -0
- data/site/build/site/en/images/ai4r-logo.png +0 -0
- data/site/build/site/en/images/built-with-forrest-button.png +0 -0
- data/site/build/site/en/images/c.png +0 -0
- data/site/build/site/en/images/c_wbn.png +0 -0
- data/site/build/site/en/images/c_wn.png +0 -0
- data/site/build/site/en/images/ero.gif +0 -0
- data/site/build/site/en/images/europe2.png +0 -0
- data/site/build/site/en/images/europe3.png +0 -0
- data/site/build/site/en/images/fitness.png +0 -0
- data/site/build/site/en/images/genetic_algorithms_example.png +0 -0
- data/site/build/site/en/images/instruction_arrow.png +0 -0
- data/site/build/site/en/images/jadeferret.png +0 -0
- data/site/build/site/en/images/my_email.png +0 -0
- data/site/build/site/en/images/neural_network_example.png +0 -0
- data/site/build/site/en/images/rubyforge.png +0 -0
- data/site/build/site/en/images/s.png +0 -0
- data/site/build/site/en/images/s_wbn.png +0 -0
- data/site/build/site/en/images/s_wn.png +0 -0
- data/site/build/site/en/images/sigmoid.png +0 -0
- data/site/build/site/en/images/t.png +0 -0
- data/site/build/site/en/images/t_wbn.png +0 -0
- data/site/build/site/en/images/t_wn.png +0 -0
- data/site/build/site/en/index.html +336 -0
- data/site/build/site/en/index.pdf +508 -0
- data/site/build/site/en/linkmap.html +263 -0
- data/site/build/site/en/linkmap.pdf +94 -0
- data/site/build/site/en/locationmap.xml +72 -0
- data/site/build/site/en/machineLearning.html +339 -0
- data/site/build/site/en/machineLearning.pdf +337 -0
- data/site/build/site/en/neuralNetworks.html +484 -0
- data/site/build/site/en/neuralNetworks.pdf +604 -0
- data/site/build/site/en/skin/CommonMessages_de.xml +23 -0
- data/site/build/site/en/skin/CommonMessages_en_US.xml +23 -0
- data/site/build/site/en/skin/CommonMessages_es.xml +23 -0
- data/site/build/site/en/skin/CommonMessages_fr.xml +23 -0
- data/site/build/site/en/skin/basic.css +166 -0
- data/site/build/site/en/skin/breadcrumbs-optimized.js +90 -0
- data/site/build/site/en/skin/breadcrumbs.js +237 -0
- data/site/build/site/en/skin/fontsize.js +166 -0
- data/site/build/site/en/skin/getBlank.js +40 -0
- data/site/build/site/en/skin/getMenu.js +45 -0
- data/site/build/site/en/skin/images/README.txt +1 -0
- data/site/build/site/en/skin/images/add.jpg +0 -0
- data/site/build/site/en/skin/images/built-with-forrest-button.png +0 -0
- data/site/build/site/en/skin/images/chapter.gif +0 -0
- data/site/build/site/en/skin/images/chapter_open.gif +0 -0
- data/site/build/site/en/skin/images/current.gif +0 -0
- data/site/build/site/en/skin/images/error.png +0 -0
- data/site/build/site/en/skin/images/external-link.gif +0 -0
- data/site/build/site/en/skin/images/fix.jpg +0 -0
- data/site/build/site/en/skin/images/forrest-credit-logo.png +0 -0
- data/site/build/site/en/skin/images/hack.jpg +0 -0
- data/site/build/site/en/skin/images/header_white_line.gif +0 -0
- data/site/build/site/en/skin/images/info.png +0 -0
- data/site/build/site/en/skin/images/instruction_arrow.png +0 -0
- data/site/build/site/en/skin/images/label.gif +0 -0
- data/site/build/site/en/skin/images/page.gif +0 -0
- data/site/build/site/en/skin/images/pdfdoc.gif +0 -0
- data/site/build/site/en/skin/images/poddoc.png +0 -0
- data/site/build/site/en/skin/images/printer.gif +0 -0
- data/site/build/site/en/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/site/build/site/en/skin/images/remove.jpg +0 -0
- data/site/build/site/en/skin/images/rss.png +0 -0
- data/site/build/site/en/skin/images/spacer.gif +0 -0
- data/site/build/site/en/skin/images/success.png +0 -0
- data/site/build/site/en/skin/images/txtdoc.png +0 -0
- data/site/build/site/en/skin/images/update.jpg +0 -0
- data/site/build/site/en/skin/images/valid-html401.png +0 -0
- data/site/build/site/en/skin/images/vcss.png +0 -0
- data/site/build/site/en/skin/images/warning.png +0 -0
- data/site/build/site/en/skin/images/xmldoc.gif +0 -0
- data/site/build/site/en/skin/menu.js +48 -0
- data/site/build/site/en/skin/note.txt +50 -0
- data/site/build/site/en/skin/print.css +54 -0
- data/site/build/site/en/skin/profile.css +163 -0
- data/site/build/site/en/skin/prototype.js +1257 -0
- data/site/build/site/en/skin/screen.css +587 -0
- data/site/build/site/en/svn.html +252 -0
- data/site/build/site/en/svn.pdf +306 -0
- data/site/build/site/en/wholesite.pdf +1915 -0
- data/site/build/tmp/brokenlinks.xml +2 -0
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
- data/site/build/tmp/locationmap.xml +14 -14
- data/site/build/tmp/output.xmap +23 -23
- data/site/build/tmp/pluginlist2fetchbuild.xml +144 -144
- data/site/build/tmp/projfilters.properties +41 -41
- data/site/build/webapp/WEB-INF/logs/core.log +593 -679
- data/site/build/webapp/WEB-INF/logs/error.log +362 -279
- data/site/build/webapp/WEB-INF/logs/sitemap.log +368 -1015
- data/site/src/documentation/content/xdocs/index.xml +18 -10
- data/site/src/documentation/content/xdocs/machineLearning.xml +4 -3
- data/site/src/documentation/content/xdocs/site.xml +2 -1
- data/site/src/documentation/resources/images/sigmoid.png +0 -0
- data/test/classifiers/id3_test.rb +45 -44
- data/test/classifiers/one_r_test.rb +19 -17
- data/test/classifiers/prism_test.rb +22 -20
- data/test/classifiers/zero_r_test.rb +15 -12
- data/test/clusterers/bisecting_k_means_test.rb +59 -0
- data/test/clusterers/k_means_test.rb +93 -0
- data/test/data/data_set_test.rb +92 -0
- metadata +252 -128
- data/lib/ai4r/classifiers/classifier_helper.rb +0 -54
- data/site/src/documentation/content/xdocs/forum.html +0 -9
- data/site/src/documentation/resources/images/Thumbs.db +0 -0
- data/site/src/documentation/resources/images/sub-dir/Thumbs.db +0 -0
data/README.rdoc
CHANGED
@@ -1,26 +1,12 @@
|
|
1
1
|
= Introduction
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
* Genetic algorithms (AI4R::GeneticAlgorithm::GeneticSearch)
|
7
|
-
|
8
|
-
* Neural networks (AI4R::NeuralNetwork::Backpropagation)
|
9
|
-
|
10
|
-
* ID3 Decision Trees (AI4R::Classifiers::ID3)
|
11
|
-
|
12
|
-
* PRISM (J. Cendrowska, 1987) (AI4R::Classifiers::Prism)
|
13
|
-
|
14
|
-
* OneR (AKA One Attribute Rule, 1R) (AI4R::Classifiers::OneR)
|
15
|
-
|
16
|
-
* ZeroR (AI4R::Classifiers::ZeroR)
|
3
|
+
This project aims to produce ruby implementations of
|
4
|
+
algorithms covering several Artificial intelligence fields.
|
17
5
|
|
18
6
|
= Where can I find the lastest code and info on this project?
|
19
7
|
|
20
8
|
http://ai4r.rubyforge.org
|
21
9
|
|
22
|
-
http://ai4r.jadeferret.com
|
23
|
-
|
24
10
|
= How to install
|
25
11
|
|
26
12
|
1. Install the gem:
|
@@ -29,18 +15,19 @@ http://ai4r.jadeferret.com
|
|
29
15
|
|
30
16
|
2. Include require statements in your code:
|
31
17
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
18
|
+
require "rubygems"
|
19
|
+
require "ai4r/classifiers/id3"en
|
20
|
+
require "ai4r/classifiers/prism"
|
21
|
+
require "ai4r/classifiers/one_r"
|
22
|
+
require "ai4r/classifiers/zero_r"
|
23
|
+
require "ai4r/neural_network/backpropagation"
|
24
|
+
require "ai4r/genetic_algorithm/genetic_algorithm"
|
39
25
|
|
40
26
|
= Feedback
|
41
27
|
|
42
|
-
If you have
|
43
|
-
to
|
28
|
+
If you have questions or constructive comments about this project,
|
29
|
+
please post them in the forum. If you do not want to make it public,
|
30
|
+
send it to me: Sergio Fierens (sergio(dot)fierens(at)gmail(dot)com)
|
44
31
|
|
45
32
|
= Warranty
|
46
33
|
|
@@ -7,25 +7,22 @@
|
|
7
7
|
# the Mozilla Public License version 1.1 as published by the
|
8
8
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
9
|
|
10
|
-
#require File.dirname(__FILE__) + '/../../lib/decision_tree/id3'
|
11
10
|
require File.dirname(__FILE__) + '/../../lib/ai4r/classifiers/id3'
|
12
|
-
require 'csv'
|
13
11
|
|
14
12
|
# Load data from data_set.csv
|
15
|
-
|
16
|
-
|
17
|
-
data_set << row
|
18
|
-
end
|
19
|
-
data_labels = data_set.shift
|
13
|
+
data_filename = "#{File.dirname(__FILE__)}/data_set.csv"
|
14
|
+
data_set = Ai4r::Data::DataSet.new.load_data_and_labels_from_csv data_filename
|
20
15
|
|
21
16
|
# Build ID3 tree
|
22
|
-
id3 = Ai4r::Classifiers::ID3.new.build(data_set
|
17
|
+
id3 = Ai4r::Classifiers::ID3.new.build(data_set)
|
23
18
|
|
24
19
|
# Show rules
|
25
20
|
puts "Discovered rules are:"
|
26
|
-
puts id3.
|
21
|
+
puts id3.get_rules
|
22
|
+
puts
|
27
23
|
|
28
24
|
# Try to predict some values
|
25
|
+
puts "Prediction samples:"
|
29
26
|
puts "['Moron Sur (GBA)','4','[86 m2 - 100 m2]'] => " + id3.eval(['Moron Sur (GBA)','4','[86 m2 - 100 m2]'])
|
30
27
|
puts "['Moron Sur (GBA)','3','[101 m2 - 125 m2]'] => " + id3.eval(['Moron Sur (GBA)','3','[101 m2 - 125 m2]'])
|
31
28
|
puts "['Recoleta (CABA)','3','[86 m2 - 100 m2]'] => " + id3.eval(['Recoleta (CABA)','3','[86 m2 - 100 m2]',])
|
@@ -23,6 +23,8 @@ elsif size=='[126 m2 - 160 m2]' and zone=='Moron Sur (GBA)' then price='[56K-75K
|
|
23
23
|
elsif size=='[126 m2 - 160 m2]' and zone=='Recoleta (CABA)' then price='[200K-275K]'
|
24
24
|
elsif size=='[126 m2 - 160 m2]' and zone=='Tigre (GBA)' then price='>275K'
|
25
25
|
else raise 'There was not enough information during training to do a proper induction for this data element' end
|
26
|
+
|
27
|
+
Prediction samples:
|
26
28
|
['Moron Sur (GBA)','4','[86 m2 - 100 m2]'] => [46K-55K]
|
27
29
|
['Moron Sur (GBA)','3','[101 m2 - 125 m2]'] => [76K-100K]
|
28
30
|
['Recoleta (CABA)','3','[86 m2 - 100 m2]'] => [126K-150K]
|
@@ -8,32 +8,30 @@
|
|
8
8
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
9
|
|
10
10
|
require File.dirname(__FILE__) + '/../../lib/ai4r/genetic_algorithm/genetic_algorithm'
|
11
|
+
require File.dirname(__FILE__) + '/../../lib/ai4r/data/data_set'
|
11
12
|
require 'csv'
|
12
13
|
|
13
14
|
# Load data from data_set.csv
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
end
|
18
|
-
data_labels = data_set.shift
|
19
|
-
data_set.collect! do |column|
|
20
|
-
column.collect { |element| element.to_f}
|
21
|
-
end
|
15
|
+
data_filename = "#{File.dirname(__FILE__)}/travel_cost.csv"
|
16
|
+
data_set = Ai4r::Data::DataSet.new.load_data_and_labels_from_csv data_filename
|
17
|
+
data_set.data_items.collect! {|column| column.collect {|element| element.to_f}}
|
22
18
|
|
23
19
|
Ai4r::GeneticAlgorithm::Chromosome.set_cost_matrix(data_set)
|
24
20
|
|
25
21
|
puts "Some random selected tours costs: "
|
26
22
|
3.times do
|
27
23
|
c = Ai4r::GeneticAlgorithm::Chromosome.seed
|
28
|
-
puts "COST #{-1 * c.fitness} TOUR:
|
24
|
+
puts "COST #{-1 * c.fitness} TOUR: "+
|
25
|
+
"#{c.data.collect{|c| data_set.data_labels[c]} * ', '}"
|
29
26
|
end
|
30
27
|
|
31
28
|
puts "Beginning genetic search, please wait... "
|
32
29
|
search = Ai4r::GeneticAlgorithm::GeneticSearch.new(800, 100)
|
33
30
|
result = search.run
|
34
|
-
puts "
|
31
|
+
puts "COST #{-1 * result.fitness} TOUR: "+
|
32
|
+
"#{result.data.collect{|c| data_set.data_labels[c]} * ', '}"
|
35
33
|
|
36
|
-
|
37
|
-
|
38
|
-
|
34
|
+
# $7611.99 TOUR: Moscow, Kiev, Warsaw, Hamburg, Berlin, Vienna, Munich, Milan, Rome, Barcelona, Madrid, Paris, Brussels, London, Dublin
|
35
|
+
# $7659.81 TOUR: Moscow, Kiev, Warsaw, Vienna, Munich, Berlin, Hamburg, Brussels, Dublin, London, Paris, Milan, Rome, Barcelona, Madrid
|
36
|
+
# $7596.74 TOUR: Moscow, Kiev, Warsaw, Berlin, Hamburg, Vienna, Munich, Milan, Rome, Barcelona, Madrid, Paris, Brussels, London Dublin
|
39
37
|
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/training_patterns'
|
2
|
+
require File.dirname(__FILE__) + '/patterns_with_noise'
|
3
|
+
require File.dirname(__FILE__) + '/patterns_with_base_noise'
|
4
|
+
require File.dirname(__FILE__) + '/../../lib/ai4r/neural_network/backpropagation'
|
5
|
+
|
6
|
+
examples = [
|
7
|
+
[[0, 0], [0, 1]],
|
8
|
+
[[0, 1], [1, 0]],
|
9
|
+
[[1, 0], [1, 0]],
|
10
|
+
[[1, 1], [0, 0]]
|
11
|
+
]
|
12
|
+
|
13
|
+
net = Ai4r::NeuralNetwork::Backpropagation.new([2, 1, 2, 1])
|
14
|
+
|
15
|
+
i=0
|
16
|
+
200.times {
|
17
|
+
examples.each do |ex|
|
18
|
+
2000.times {net.train(ex[0], [ex[1].first])}
|
19
|
+
end
|
20
|
+
puts(i=i+1)
|
21
|
+
}
|
22
|
+
|
23
|
+
examples.each do |ex|
|
24
|
+
print ex[0], ' => ', net.eval(ex[0]).inspect, ', should be ', ex[1].first, "\n"
|
25
|
+
end
|
data/lib/ai4r.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require "ai4r/clusterers/clusterer"
|
2
|
+
require "ai4r/clusterers/k_means"
|
3
|
+
require "ai4r/clusterers/bisecting_k_means"
|
4
|
+
require "ai4r/classifiers/classifier"
|
5
|
+
require "ai4r/classifiers/id3"
|
6
|
+
require "ai4r/classifiers/prism"
|
7
|
+
require "ai4r/classifiers/one_r"
|
8
|
+
require "ai4r/classifiers/zero_r"
|
9
|
+
require "ai4r/neural_network/backpropagation"
|
10
|
+
require "ai4r/genetic_algorithm/genetic_algorithm"
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# Author:: Sergio Fierens
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
module Ai4r
|
11
|
+
module Classifiers
|
12
|
+
|
13
|
+
# The only purpose of this class is to define a common API for classifiers.
|
14
|
+
# All methods in this class must be implemented in subclasses.
|
15
|
+
class Classifier
|
16
|
+
|
17
|
+
# Build a new classifier, using data examples found in data_set.
|
18
|
+
def build(data_set)
|
19
|
+
raise NotImplementedError
|
20
|
+
end
|
21
|
+
|
22
|
+
# You can evaluate new data, predicting its class.
|
23
|
+
# e.g.
|
24
|
+
# classifier.eval(['New York', '<30', 'F']) # => 'Y'
|
25
|
+
def eval(data)
|
26
|
+
raise NotImplementedError
|
27
|
+
end
|
28
|
+
|
29
|
+
# This method returns the generated rules in ruby code.
|
30
|
+
# e.g.
|
31
|
+
#
|
32
|
+
# classifier.get_rules
|
33
|
+
# # => marketing_target='Y'
|
34
|
+
#
|
35
|
+
# It is a nice way to inspect induction results, and also to execute them:
|
36
|
+
# marketing_target = nil
|
37
|
+
# eval classifier.get_rules
|
38
|
+
# puts marketing_target
|
39
|
+
# # => 'Y'
|
40
|
+
def get_rules
|
41
|
+
raise NotImplementedError
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/ai4r/classifiers/id3.rb
CHANGED
@@ -8,7 +8,8 @@
|
|
8
8
|
# the Mozilla Public License version 1.1 as published by the
|
9
9
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
10
10
|
|
11
|
-
require File.dirname(__FILE__) + '/
|
11
|
+
require File.dirname(__FILE__) + '/../data/data_set'
|
12
|
+
require File.dirname(__FILE__) + '/../classifiers/classifier'
|
12
13
|
|
13
14
|
module Ai4r
|
14
15
|
|
@@ -44,9 +45,9 @@ module Ai4r
|
|
44
45
|
# ['Chicago', '>80', 'F', 'Y']
|
45
46
|
# ]
|
46
47
|
#
|
47
|
-
# id3 =
|
48
|
+
# id3 = Ai4r::Classifiers::ID3.new(DATA_SET, DATA_LABELS)
|
48
49
|
#
|
49
|
-
# id3.
|
50
|
+
# id3.get_rules
|
50
51
|
# # => if age_range=='<30' then marketing_target='Y'
|
51
52
|
# elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
|
52
53
|
# elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
|
@@ -69,14 +70,14 @@ module Ai4r
|
|
69
70
|
# end
|
70
71
|
# data_labels = data_set.shift
|
71
72
|
#
|
72
|
-
# id3 =
|
73
|
+
# id3 = Ai4r::Classifiers::ID3.new(data_set, data_labels)
|
73
74
|
#
|
74
75
|
# = A nice tip for data evaluation
|
75
76
|
#
|
76
|
-
# id3 =
|
77
|
+
# id3 = Ai4r::Classifiers::ID3.new(DATA_SET, DATA_LABELS)
|
77
78
|
# age_range = '<30'
|
78
79
|
# marketing_target = nil
|
79
|
-
# eval id3.
|
80
|
+
# eval id3.get_rules
|
80
81
|
# puts marketing_target
|
81
82
|
# # => 'Y'
|
82
83
|
# = More about ID3 and decision trees
|
@@ -87,50 +88,17 @@ module Ai4r
|
|
87
88
|
# = About the project
|
88
89
|
# Author:: Sergio Fierens
|
89
90
|
# License:: MPL 1.1
|
90
|
-
|
91
|
-
class ID3
|
91
|
+
# Url:: http://ai4r.rubyforge.org/
|
92
|
+
class ID3 < Classifier
|
92
93
|
|
93
|
-
attr_reader :
|
94
|
-
include ClassifierHelper
|
94
|
+
attr_reader :data_set
|
95
95
|
|
96
|
-
# Create a new
|
97
|
-
#
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
# [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CATEGORY_VALM],
|
103
|
-
# ]
|
104
|
-
#
|
105
|
-
# e.g.
|
106
|
-
# [ ['New York', '<30', 'M', 'Y'],
|
107
|
-
# ['Chicago', '<30', 'M', 'Y'],
|
108
|
-
# ['Chicago', '<30', 'F', 'Y'],
|
109
|
-
# ['New York', '<30', 'M', 'Y'],
|
110
|
-
# ['New York', '<30', 'M', 'Y'],
|
111
|
-
# ['Chicago', '[30-50)', 'M', 'Y'],
|
112
|
-
# ['New York', '[30-50)', 'F', 'N'],
|
113
|
-
# ['Chicago', '[30-50)', 'F', 'Y'],
|
114
|
-
# ['New York', '[30-50)', 'F', 'N'],
|
115
|
-
# ['Chicago', '[50-80]', 'M', 'N'],
|
116
|
-
# ['New York', '[50-80]', 'F', 'N'],
|
117
|
-
# ['New York', '[50-80]', 'M', 'N'],
|
118
|
-
# ['Chicago', '[50-80]', 'M', 'N'],
|
119
|
-
# ['New York', '[50-80]', 'F', 'N'],
|
120
|
-
# ['Chicago', '>80', 'F', 'Y']
|
121
|
-
# ]
|
122
|
-
#
|
123
|
-
# Data labels must have the following format:
|
124
|
-
# [ 'city', 'age_range', 'gender', 'marketing_target' ]
|
125
|
-
#
|
126
|
-
# If you do not provide labels for you data, the following labels will
|
127
|
-
# be created by default:
|
128
|
-
# [ 'ATTRIBUTE_1', 'ATTRIBUTE_2', 'ATTRIBUTE_3', 'CATEGORY' ]
|
129
|
-
#
|
130
|
-
def build(data_examples, data_labels=nil)
|
131
|
-
check_data_examples(data_examples)
|
132
|
-
@data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
|
133
|
-
preprocess_data(data_examples)
|
96
|
+
# Create a new ID3 classifier. You must provide a DataSet instance
|
97
|
+
# as parameter.
|
98
|
+
def build(data_set)
|
99
|
+
data_set.check_not_empty
|
100
|
+
@data_set = data_set
|
101
|
+
preprocess_data(@data_set.data_items)
|
134
102
|
return self
|
135
103
|
end
|
136
104
|
|
@@ -144,7 +112,7 @@ module Ai4r
|
|
144
112
|
# This method returns the generated rules in ruby code.
|
145
113
|
# e.g.
|
146
114
|
#
|
147
|
-
# id3.
|
115
|
+
# id3.get_rules
|
148
116
|
# # => if age_range=='<30' then marketing_target='Y'
|
149
117
|
# elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
|
150
118
|
# elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
|
@@ -155,10 +123,11 @@ module Ai4r
|
|
155
123
|
# It is a nice way to inspect induction results, and also to execute them:
|
156
124
|
# age_range = '<30'
|
157
125
|
# marketing_target = nil
|
158
|
-
# eval id3.
|
126
|
+
# eval id3.get_rules
|
159
127
|
# puts marketing_target
|
160
128
|
# # => 'Y'
|
161
|
-
def
|
129
|
+
def get_rules
|
130
|
+
#return "Empty ID3 tree" if !@tree
|
162
131
|
rules = @tree.get_rules
|
163
132
|
rules = rules.collect do |rule|
|
164
133
|
"#{rule[0..-2].join(' and ')} then #{rule.last}"
|
@@ -175,15 +144,15 @@ module Ai4r
|
|
175
144
|
def build_node(data_examples, flag_att = [])
|
176
145
|
return ErrorNode.new if data_examples.length == 0
|
177
146
|
domain = domain(data_examples)
|
178
|
-
return CategoryNode.new(@data_labels.last, domain.last[0]) if domain.last.length == 1
|
147
|
+
return CategoryNode.new(@data_set.data_labels.last, domain.last[0]) if domain.last.length == 1
|
179
148
|
min_entropy_index = min_entropy_index(data_examples, domain, flag_att)
|
180
149
|
flag_att << min_entropy_index
|
181
150
|
split_data_examples = split_data_examples(data_examples, domain, min_entropy_index)
|
182
|
-
return CategoryNode.new(@data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
|
151
|
+
return CategoryNode.new(@data_set.data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
|
183
152
|
nodes = split_data_examples.collect do |partial_data_examples|
|
184
153
|
build_node(partial_data_examples, flag_att)
|
185
154
|
end
|
186
|
-
return EvaluationNode.new(@data_labels, min_entropy_index, domain[min_entropy_index], nodes)
|
155
|
+
return EvaluationNode.new(@data_set.data_labels, min_entropy_index, domain[min_entropy_index], nodes)
|
187
156
|
end
|
188
157
|
|
189
158
|
private
|
@@ -247,7 +216,7 @@ module Ai4r
|
|
247
216
|
def domain(data_examples)
|
248
217
|
#return build_domains(data_examples)
|
249
218
|
domain = []
|
250
|
-
@data_labels.length.times { domain << [] }
|
219
|
+
@data_set.data_labels.length.times { domain << [] }
|
251
220
|
data_examples.each do |data|
|
252
221
|
data.each_index do |i|
|
253
222
|
domain[i] << data[i] if i<domain.length && !domain[i].include?(data[i])
|
@@ -297,7 +266,7 @@ module Ai4r
|
|
297
266
|
LOG2 = Math.log(2)
|
298
267
|
end
|
299
268
|
|
300
|
-
class EvaluationNode
|
269
|
+
class EvaluationNode #:nodoc: all
|
301
270
|
|
302
271
|
attr_reader :index, :values, :nodes
|
303
272
|
|
@@ -330,7 +299,7 @@ module Ai4r
|
|
330
299
|
|
331
300
|
end
|
332
301
|
|
333
|
-
class CategoryNode
|
302
|
+
class CategoryNode #:nodoc: all
|
334
303
|
def initialize(label, value)
|
335
304
|
@label = label
|
336
305
|
@value = value
|
@@ -343,7 +312,7 @@ module Ai4r
|
|
343
312
|
end
|
344
313
|
end
|
345
314
|
|
346
|
-
class ErrorNode
|
315
|
+
class ErrorNode #:nodoc: all
|
347
316
|
def value(data)
|
348
317
|
raise "There was not enough information during training to do a proper induction for this data element."
|
349
318
|
end
|
@@ -8,7 +8,8 @@
|
|
8
8
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
9
|
|
10
10
|
require 'set'
|
11
|
-
require File.dirname(__FILE__) + '/
|
11
|
+
require File.dirname(__FILE__) + '/../data/data_set'
|
12
|
+
require File.dirname(__FILE__) + '/../classifiers/classifier'
|
12
13
|
|
13
14
|
module Ai4r
|
14
15
|
module Classifiers
|
@@ -19,58 +20,25 @@ module Ai4r
|
|
19
20
|
# attribute to use to classify data that makes
|
20
21
|
# fewest prediction errors.
|
21
22
|
# It generates rules based on a single attribute.
|
22
|
-
class OneR
|
23
|
+
class OneR < Classifier
|
23
24
|
|
24
|
-
|
25
|
-
include ClassifierHelper
|
25
|
+
attr_reader :data_set, :rule
|
26
26
|
|
27
|
-
# Build a new OneR classifier.
|
28
|
-
#
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
# ]
|
35
|
-
#
|
36
|
-
# e.g.
|
37
|
-
# [ ['New York', '<30', 'M', 'Y'],
|
38
|
-
# ['Chicago', '<30', 'M', 'Y'],
|
39
|
-
# ['Chicago', '<30', 'F', 'Y'],
|
40
|
-
# ['New York', '<30', 'M', 'Y'],
|
41
|
-
# ['New York', '<30', 'M', 'Y'],
|
42
|
-
# ['Chicago', '[30-50)', 'M', 'Y'],
|
43
|
-
# ['New York', '[30-50)', 'F', 'N'],
|
44
|
-
# ['Chicago', '[30-50)', 'F', 'Y'],
|
45
|
-
# ['New York', '[30-50)', 'F', 'N'],
|
46
|
-
# ['Chicago', '[50-80]', 'M', 'N'],
|
47
|
-
# ['New York', '[50-80]', 'F', 'N'],
|
48
|
-
# ['New York', '[50-80]', 'M', 'N'],
|
49
|
-
# ['Chicago', '[50-80]', 'M', 'N'],
|
50
|
-
# ['New York', '[50-80]', 'F', 'N'],
|
51
|
-
# ['Chicago', '>80', 'F', 'Y']
|
52
|
-
# ]
|
53
|
-
#
|
54
|
-
# Data labels must have the following format:
|
55
|
-
# [ 'city', 'age_range', 'gender', 'marketing_target' ]
|
56
|
-
#
|
57
|
-
# If you do not provide labels for you data, the following labels will
|
58
|
-
# be created by default:
|
59
|
-
# [ 'attribute_1', 'attribute_2', 'attribute_3', 'class_value' ]
|
60
|
-
#
|
61
|
-
def build(data_examples, data_labels = nil)
|
62
|
-
check_data_examples(data_examples)
|
63
|
-
@data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
|
64
|
-
if (num_attributes(data_examples) == 1)
|
65
|
-
@zero_r = ZeroR.new.build(data_examples, data_labels)
|
27
|
+
# Build a new OneR classifier. You must provide a DataSet instance
|
28
|
+
# as parameter.
|
29
|
+
def build(data_set)
|
30
|
+
data_set.check_not_empty
|
31
|
+
@data_set = data_set
|
32
|
+
if (data_set.num_attributes == 1)
|
33
|
+
@zero_r = ZeroR.new.build(data_set)
|
66
34
|
return self;
|
67
35
|
else
|
68
36
|
@zero_r = nil;
|
69
37
|
end
|
70
|
-
domains = build_domains
|
38
|
+
domains = @data_set.build_domains
|
71
39
|
@rule = nil
|
72
40
|
domains[1...-1].each_index do |attr_index|
|
73
|
-
rule = build_rule(
|
41
|
+
rule = build_rule(@data_set.data_items, attr_index, domains)
|
74
42
|
@rule = rule if !@rule || rule[:correct] > @rule[:correct]
|
75
43
|
end
|
76
44
|
return self
|
@@ -88,7 +56,7 @@ module Ai4r
|
|
88
56
|
# This method returns the generated rules in ruby code.
|
89
57
|
# e.g.
|
90
58
|
#
|
91
|
-
# classifier.
|
59
|
+
# classifier.get_rules
|
92
60
|
# # => if age_range == '<30' then marketing_target = 'Y'
|
93
61
|
# elsif age_range == '[30-50)' then marketing_target = 'N'
|
94
62
|
# elsif age_range == '[50-80]' then marketing_target = 'N'
|
@@ -96,14 +64,14 @@ module Ai4r
|
|
96
64
|
#
|
97
65
|
# It is a nice way to inspect induction results, and also to execute them:
|
98
66
|
# marketing_target = nil
|
99
|
-
# eval classifier.
|
67
|
+
# eval classifier.get_rules
|
100
68
|
# puts marketing_target
|
101
69
|
# # => 'Y'
|
102
|
-
def
|
103
|
-
return @zero_r.
|
70
|
+
def get_rules
|
71
|
+
return @zero_r.get_rules if @zero_r
|
104
72
|
sentences = []
|
105
|
-
attr_label = @data_labels[@rule[:attr_index]]
|
106
|
-
class_label = @data_labels.last
|
73
|
+
attr_label = @data_set.data_labels[@rule[:attr_index]]
|
74
|
+
class_label = @data_set.data_labels.last
|
107
75
|
@rule[:rule].each_pair do |attr_value, class_value|
|
108
76
|
sentences << "#{attr_label} == '#{attr_value}' then #{class_label} = '#{class_value}'"
|
109
77
|
end
|
@@ -111,13 +79,6 @@ module Ai4r
|
|
111
79
|
end
|
112
80
|
|
113
81
|
protected
|
114
|
-
def build_domains(data_examples)
|
115
|
-
domains = Array.new(num_attributes(data_examples)) { Set.new }
|
116
|
-
data_examples.each do |data|
|
117
|
-
data.each_index {|attr_index| domains[attr_index] << data[attr_index]}
|
118
|
-
end
|
119
|
-
return domains
|
120
|
-
end
|
121
82
|
|
122
83
|
def build_rule(data_examples, attr_index, domains)
|
123
84
|
domain = domains[attr_index]
|