ai4r 1.2 → 1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +12 -25
- data/examples/decision_trees/id3_example.rb +6 -9
- data/examples/decision_trees/results.txt +2 -0
- data/examples/genetic_algorithm/genetic_algorithm_example.rb +11 -13
- data/examples/neural_network/xor_example.rb +25 -0
- data/lib/ai4r.rb +10 -0
- data/lib/ai4r/classifiers/classifier.rb +46 -0
- data/lib/ai4r/classifiers/id3.rb +27 -58
- data/lib/ai4r/classifiers/one_r.rb +19 -58
- data/lib/ai4r/classifiers/prism.rb +21 -57
- data/lib/ai4r/classifiers/zero_r.rb +16 -48
- data/lib/ai4r/clusterers/bisecting_k_means.rb +115 -0
- data/lib/ai4r/clusterers/clusterer.rb +55 -0
- data/lib/ai4r/clusterers/k_means.rb +164 -0
- data/lib/ai4r/data/data_set.rb +250 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +19 -19
- data/lib/ai4r/neural_network/backpropagation.rb +23 -24
- data/site/build/site/en/broken-links.xml +2 -0
- data/site/build/site/en/downloads.html +200 -0
- data/site/build/site/en/downloads.pdf +151 -0
- data/site/build/site/en/forum.html +197 -0
- data/site/build/site/en/forum.pdf +151 -0
- data/site/build/site/en/geneticAlgorithms.html +591 -0
- data/site/build/site/en/geneticAlgorithms.pdf +934 -0
- data/site/build/site/en/images/ai4r-logo.png +0 -0
- data/site/build/site/en/images/built-with-forrest-button.png +0 -0
- data/site/build/site/en/images/c.png +0 -0
- data/site/build/site/en/images/c_wbn.png +0 -0
- data/site/build/site/en/images/c_wn.png +0 -0
- data/site/build/site/en/images/ero.gif +0 -0
- data/site/build/site/en/images/europe2.png +0 -0
- data/site/build/site/en/images/europe3.png +0 -0
- data/site/build/site/en/images/fitness.png +0 -0
- data/site/build/site/en/images/genetic_algorithms_example.png +0 -0
- data/site/build/site/en/images/instruction_arrow.png +0 -0
- data/site/build/site/en/images/jadeferret.png +0 -0
- data/site/build/site/en/images/my_email.png +0 -0
- data/site/build/site/en/images/neural_network_example.png +0 -0
- data/site/build/site/en/images/rubyforge.png +0 -0
- data/site/build/site/en/images/s.png +0 -0
- data/site/build/site/en/images/s_wbn.png +0 -0
- data/site/build/site/en/images/s_wn.png +0 -0
- data/site/build/site/en/images/sigmoid.png +0 -0
- data/site/build/site/en/images/t.png +0 -0
- data/site/build/site/en/images/t_wbn.png +0 -0
- data/site/build/site/en/images/t_wn.png +0 -0
- data/site/build/site/en/index.html +336 -0
- data/site/build/site/en/index.pdf +508 -0
- data/site/build/site/en/linkmap.html +263 -0
- data/site/build/site/en/linkmap.pdf +94 -0
- data/site/build/site/en/locationmap.xml +72 -0
- data/site/build/site/en/machineLearning.html +339 -0
- data/site/build/site/en/machineLearning.pdf +337 -0
- data/site/build/site/en/neuralNetworks.html +484 -0
- data/site/build/site/en/neuralNetworks.pdf +604 -0
- data/site/build/site/en/skin/CommonMessages_de.xml +23 -0
- data/site/build/site/en/skin/CommonMessages_en_US.xml +23 -0
- data/site/build/site/en/skin/CommonMessages_es.xml +23 -0
- data/site/build/site/en/skin/CommonMessages_fr.xml +23 -0
- data/site/build/site/en/skin/basic.css +166 -0
- data/site/build/site/en/skin/breadcrumbs-optimized.js +90 -0
- data/site/build/site/en/skin/breadcrumbs.js +237 -0
- data/site/build/site/en/skin/fontsize.js +166 -0
- data/site/build/site/en/skin/getBlank.js +40 -0
- data/site/build/site/en/skin/getMenu.js +45 -0
- data/site/build/site/en/skin/images/README.txt +1 -0
- data/site/build/site/en/skin/images/add.jpg +0 -0
- data/site/build/site/en/skin/images/built-with-forrest-button.png +0 -0
- data/site/build/site/en/skin/images/chapter.gif +0 -0
- data/site/build/site/en/skin/images/chapter_open.gif +0 -0
- data/site/build/site/en/skin/images/current.gif +0 -0
- data/site/build/site/en/skin/images/error.png +0 -0
- data/site/build/site/en/skin/images/external-link.gif +0 -0
- data/site/build/site/en/skin/images/fix.jpg +0 -0
- data/site/build/site/en/skin/images/forrest-credit-logo.png +0 -0
- data/site/build/site/en/skin/images/hack.jpg +0 -0
- data/site/build/site/en/skin/images/header_white_line.gif +0 -0
- data/site/build/site/en/skin/images/info.png +0 -0
- data/site/build/site/en/skin/images/instruction_arrow.png +0 -0
- data/site/build/site/en/skin/images/label.gif +0 -0
- data/site/build/site/en/skin/images/page.gif +0 -0
- data/site/build/site/en/skin/images/pdfdoc.gif +0 -0
- data/site/build/site/en/skin/images/poddoc.png +0 -0
- data/site/build/site/en/skin/images/printer.gif +0 -0
- data/site/build/site/en/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/site/build/site/en/skin/images/remove.jpg +0 -0
- data/site/build/site/en/skin/images/rss.png +0 -0
- data/site/build/site/en/skin/images/spacer.gif +0 -0
- data/site/build/site/en/skin/images/success.png +0 -0
- data/site/build/site/en/skin/images/txtdoc.png +0 -0
- data/site/build/site/en/skin/images/update.jpg +0 -0
- data/site/build/site/en/skin/images/valid-html401.png +0 -0
- data/site/build/site/en/skin/images/vcss.png +0 -0
- data/site/build/site/en/skin/images/warning.png +0 -0
- data/site/build/site/en/skin/images/xmldoc.gif +0 -0
- data/site/build/site/en/skin/menu.js +48 -0
- data/site/build/site/en/skin/note.txt +50 -0
- data/site/build/site/en/skin/print.css +54 -0
- data/site/build/site/en/skin/profile.css +163 -0
- data/site/build/site/en/skin/prototype.js +1257 -0
- data/site/build/site/en/skin/screen.css +587 -0
- data/site/build/site/en/svn.html +252 -0
- data/site/build/site/en/svn.pdf +306 -0
- data/site/build/site/en/wholesite.pdf +1915 -0
- data/site/build/tmp/brokenlinks.xml +2 -0
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
- data/site/build/tmp/locationmap.xml +14 -14
- data/site/build/tmp/output.xmap +23 -23
- data/site/build/tmp/pluginlist2fetchbuild.xml +144 -144
- data/site/build/tmp/projfilters.properties +41 -41
- data/site/build/webapp/WEB-INF/logs/core.log +593 -679
- data/site/build/webapp/WEB-INF/logs/error.log +362 -279
- data/site/build/webapp/WEB-INF/logs/sitemap.log +368 -1015
- data/site/src/documentation/content/xdocs/index.xml +18 -10
- data/site/src/documentation/content/xdocs/machineLearning.xml +4 -3
- data/site/src/documentation/content/xdocs/site.xml +2 -1
- data/site/src/documentation/resources/images/sigmoid.png +0 -0
- data/test/classifiers/id3_test.rb +45 -44
- data/test/classifiers/one_r_test.rb +19 -17
- data/test/classifiers/prism_test.rb +22 -20
- data/test/classifiers/zero_r_test.rb +15 -12
- data/test/clusterers/bisecting_k_means_test.rb +59 -0
- data/test/clusterers/k_means_test.rb +93 -0
- data/test/data/data_set_test.rb +92 -0
- metadata +252 -128
- data/lib/ai4r/classifiers/classifier_helper.rb +0 -54
- data/site/src/documentation/content/xdocs/forum.html +0 -9
- data/site/src/documentation/resources/images/Thumbs.db +0 -0
- data/site/src/documentation/resources/images/sub-dir/Thumbs.db +0 -0
data/README.rdoc
CHANGED
@@ -1,26 +1,12 @@
|
|
1
1
|
= Introduction
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
* Genetic algorithms (AI4R::GeneticAlgorithm::GeneticSearch)
|
7
|
-
|
8
|
-
* Neural networks (AI4R::NeuralNetwork::Backpropagation)
|
9
|
-
|
10
|
-
* ID3 Decision Trees (AI4R::Classifiers::ID3)
|
11
|
-
|
12
|
-
* PRISM (J. Cendrowska, 1987) (AI4R::Classifiers::Prism)
|
13
|
-
|
14
|
-
* OneR (AKA One Attribute Rule, 1R) (AI4R::Classifiers::OneR)
|
15
|
-
|
16
|
-
* ZeroR (AI4R::Classifiers::ZeroR)
|
3
|
+
This project aims to produce ruby implementations of
|
4
|
+
algorithms covering several Artificial intelligence fields.
|
17
5
|
|
18
6
|
= Where can I find the lastest code and info on this project?
|
19
7
|
|
20
8
|
http://ai4r.rubyforge.org
|
21
9
|
|
22
|
-
http://ai4r.jadeferret.com
|
23
|
-
|
24
10
|
= How to install
|
25
11
|
|
26
12
|
1. Install the gem:
|
@@ -29,18 +15,19 @@ http://ai4r.jadeferret.com
|
|
29
15
|
|
30
16
|
2. Include require statements in your code:
|
31
17
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
18
|
+
require "rubygems"
|
19
|
+
require "ai4r/classifiers/id3"en
|
20
|
+
require "ai4r/classifiers/prism"
|
21
|
+
require "ai4r/classifiers/one_r"
|
22
|
+
require "ai4r/classifiers/zero_r"
|
23
|
+
require "ai4r/neural_network/backpropagation"
|
24
|
+
require "ai4r/genetic_algorithm/genetic_algorithm"
|
39
25
|
|
40
26
|
= Feedback
|
41
27
|
|
42
|
-
If you have
|
43
|
-
to
|
28
|
+
If you have questions or constructive comments about this project,
|
29
|
+
please post them in the forum. If you do not want to make it public,
|
30
|
+
send it to me: Sergio Fierens (sergio(dot)fierens(at)gmail(dot)com)
|
44
31
|
|
45
32
|
= Warranty
|
46
33
|
|
@@ -7,25 +7,22 @@
|
|
7
7
|
# the Mozilla Public License version 1.1 as published by the
|
8
8
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
9
|
|
10
|
-
#require File.dirname(__FILE__) + '/../../lib/decision_tree/id3'
|
11
10
|
require File.dirname(__FILE__) + '/../../lib/ai4r/classifiers/id3'
|
12
|
-
require 'csv'
|
13
11
|
|
14
12
|
# Load data from data_set.csv
|
15
|
-
|
16
|
-
|
17
|
-
data_set << row
|
18
|
-
end
|
19
|
-
data_labels = data_set.shift
|
13
|
+
data_filename = "#{File.dirname(__FILE__)}/data_set.csv"
|
14
|
+
data_set = Ai4r::Data::DataSet.new.load_data_and_labels_from_csv data_filename
|
20
15
|
|
21
16
|
# Build ID3 tree
|
22
|
-
id3 = Ai4r::Classifiers::ID3.new.build(data_set
|
17
|
+
id3 = Ai4r::Classifiers::ID3.new.build(data_set)
|
23
18
|
|
24
19
|
# Show rules
|
25
20
|
puts "Discovered rules are:"
|
26
|
-
puts id3.
|
21
|
+
puts id3.get_rules
|
22
|
+
puts
|
27
23
|
|
28
24
|
# Try to predict some values
|
25
|
+
puts "Prediction samples:"
|
29
26
|
puts "['Moron Sur (GBA)','4','[86 m2 - 100 m2]'] => " + id3.eval(['Moron Sur (GBA)','4','[86 m2 - 100 m2]'])
|
30
27
|
puts "['Moron Sur (GBA)','3','[101 m2 - 125 m2]'] => " + id3.eval(['Moron Sur (GBA)','3','[101 m2 - 125 m2]'])
|
31
28
|
puts "['Recoleta (CABA)','3','[86 m2 - 100 m2]'] => " + id3.eval(['Recoleta (CABA)','3','[86 m2 - 100 m2]',])
|
@@ -23,6 +23,8 @@ elsif size=='[126 m2 - 160 m2]' and zone=='Moron Sur (GBA)' then price='[56K-75K
|
|
23
23
|
elsif size=='[126 m2 - 160 m2]' and zone=='Recoleta (CABA)' then price='[200K-275K]'
|
24
24
|
elsif size=='[126 m2 - 160 m2]' and zone=='Tigre (GBA)' then price='>275K'
|
25
25
|
else raise 'There was not enough information during training to do a proper induction for this data element' end
|
26
|
+
|
27
|
+
Prediction samples:
|
26
28
|
['Moron Sur (GBA)','4','[86 m2 - 100 m2]'] => [46K-55K]
|
27
29
|
['Moron Sur (GBA)','3','[101 m2 - 125 m2]'] => [76K-100K]
|
28
30
|
['Recoleta (CABA)','3','[86 m2 - 100 m2]'] => [126K-150K]
|
@@ -8,32 +8,30 @@
|
|
8
8
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
9
|
|
10
10
|
require File.dirname(__FILE__) + '/../../lib/ai4r/genetic_algorithm/genetic_algorithm'
|
11
|
+
require File.dirname(__FILE__) + '/../../lib/ai4r/data/data_set'
|
11
12
|
require 'csv'
|
12
13
|
|
13
14
|
# Load data from data_set.csv
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
end
|
18
|
-
data_labels = data_set.shift
|
19
|
-
data_set.collect! do |column|
|
20
|
-
column.collect { |element| element.to_f}
|
21
|
-
end
|
15
|
+
data_filename = "#{File.dirname(__FILE__)}/travel_cost.csv"
|
16
|
+
data_set = Ai4r::Data::DataSet.new.load_data_and_labels_from_csv data_filename
|
17
|
+
data_set.data_items.collect! {|column| column.collect {|element| element.to_f}}
|
22
18
|
|
23
19
|
Ai4r::GeneticAlgorithm::Chromosome.set_cost_matrix(data_set)
|
24
20
|
|
25
21
|
puts "Some random selected tours costs: "
|
26
22
|
3.times do
|
27
23
|
c = Ai4r::GeneticAlgorithm::Chromosome.seed
|
28
|
-
puts "COST #{-1 * c.fitness} TOUR:
|
24
|
+
puts "COST #{-1 * c.fitness} TOUR: "+
|
25
|
+
"#{c.data.collect{|c| data_set.data_labels[c]} * ', '}"
|
29
26
|
end
|
30
27
|
|
31
28
|
puts "Beginning genetic search, please wait... "
|
32
29
|
search = Ai4r::GeneticAlgorithm::GeneticSearch.new(800, 100)
|
33
30
|
result = search.run
|
34
|
-
puts "
|
31
|
+
puts "COST #{-1 * result.fitness} TOUR: "+
|
32
|
+
"#{result.data.collect{|c| data_set.data_labels[c]} * ', '}"
|
35
33
|
|
36
|
-
|
37
|
-
|
38
|
-
|
34
|
+
# $7611.99 TOUR: Moscow, Kiev, Warsaw, Hamburg, Berlin, Vienna, Munich, Milan, Rome, Barcelona, Madrid, Paris, Brussels, London, Dublin
|
35
|
+
# $7659.81 TOUR: Moscow, Kiev, Warsaw, Vienna, Munich, Berlin, Hamburg, Brussels, Dublin, London, Paris, Milan, Rome, Barcelona, Madrid
|
36
|
+
# $7596.74 TOUR: Moscow, Kiev, Warsaw, Berlin, Hamburg, Vienna, Munich, Milan, Rome, Barcelona, Madrid, Paris, Brussels, London Dublin
|
39
37
|
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/training_patterns'
|
2
|
+
require File.dirname(__FILE__) + '/patterns_with_noise'
|
3
|
+
require File.dirname(__FILE__) + '/patterns_with_base_noise'
|
4
|
+
require File.dirname(__FILE__) + '/../../lib/ai4r/neural_network/backpropagation'
|
5
|
+
|
6
|
+
examples = [
|
7
|
+
[[0, 0], [0, 1]],
|
8
|
+
[[0, 1], [1, 0]],
|
9
|
+
[[1, 0], [1, 0]],
|
10
|
+
[[1, 1], [0, 0]]
|
11
|
+
]
|
12
|
+
|
13
|
+
net = Ai4r::NeuralNetwork::Backpropagation.new([2, 1, 2, 1])
|
14
|
+
|
15
|
+
i=0
|
16
|
+
200.times {
|
17
|
+
examples.each do |ex|
|
18
|
+
2000.times {net.train(ex[0], [ex[1].first])}
|
19
|
+
end
|
20
|
+
puts(i=i+1)
|
21
|
+
}
|
22
|
+
|
23
|
+
examples.each do |ex|
|
24
|
+
print ex[0], ' => ', net.eval(ex[0]).inspect, ', should be ', ex[1].first, "\n"
|
25
|
+
end
|
data/lib/ai4r.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require "ai4r/clusterers/clusterer"
|
2
|
+
require "ai4r/clusterers/k_means"
|
3
|
+
require "ai4r/clusterers/bisecting_k_means"
|
4
|
+
require "ai4r/classifiers/classifier"
|
5
|
+
require "ai4r/classifiers/id3"
|
6
|
+
require "ai4r/classifiers/prism"
|
7
|
+
require "ai4r/classifiers/one_r"
|
8
|
+
require "ai4r/classifiers/zero_r"
|
9
|
+
require "ai4r/neural_network/backpropagation"
|
10
|
+
require "ai4r/genetic_algorithm/genetic_algorithm"
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# Author:: Sergio Fierens
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
module Ai4r
|
11
|
+
module Classifiers
|
12
|
+
|
13
|
+
# The only purpose of this class is to define a common API for classifiers.
|
14
|
+
# All methods in this class must be implemented in subclasses.
|
15
|
+
class Classifier
|
16
|
+
|
17
|
+
# Build a new classifier, using data examples found in data_set.
|
18
|
+
def build(data_set)
|
19
|
+
raise NotImplementedError
|
20
|
+
end
|
21
|
+
|
22
|
+
# You can evaluate new data, predicting its class.
|
23
|
+
# e.g.
|
24
|
+
# classifier.eval(['New York', '<30', 'F']) # => 'Y'
|
25
|
+
def eval(data)
|
26
|
+
raise NotImplementedError
|
27
|
+
end
|
28
|
+
|
29
|
+
# This method returns the generated rules in ruby code.
|
30
|
+
# e.g.
|
31
|
+
#
|
32
|
+
# classifier.get_rules
|
33
|
+
# # => marketing_target='Y'
|
34
|
+
#
|
35
|
+
# It is a nice way to inspect induction results, and also to execute them:
|
36
|
+
# marketing_target = nil
|
37
|
+
# eval classifier.get_rules
|
38
|
+
# puts marketing_target
|
39
|
+
# # => 'Y'
|
40
|
+
def get_rules
|
41
|
+
raise NotImplementedError
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/ai4r/classifiers/id3.rb
CHANGED
@@ -8,7 +8,8 @@
|
|
8
8
|
# the Mozilla Public License version 1.1 as published by the
|
9
9
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
10
10
|
|
11
|
-
require File.dirname(__FILE__) + '/
|
11
|
+
require File.dirname(__FILE__) + '/../data/data_set'
|
12
|
+
require File.dirname(__FILE__) + '/../classifiers/classifier'
|
12
13
|
|
13
14
|
module Ai4r
|
14
15
|
|
@@ -44,9 +45,9 @@ module Ai4r
|
|
44
45
|
# ['Chicago', '>80', 'F', 'Y']
|
45
46
|
# ]
|
46
47
|
#
|
47
|
-
# id3 =
|
48
|
+
# id3 = Ai4r::Classifiers::ID3.new(DATA_SET, DATA_LABELS)
|
48
49
|
#
|
49
|
-
# id3.
|
50
|
+
# id3.get_rules
|
50
51
|
# # => if age_range=='<30' then marketing_target='Y'
|
51
52
|
# elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
|
52
53
|
# elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
|
@@ -69,14 +70,14 @@ module Ai4r
|
|
69
70
|
# end
|
70
71
|
# data_labels = data_set.shift
|
71
72
|
#
|
72
|
-
# id3 =
|
73
|
+
# id3 = Ai4r::Classifiers::ID3.new(data_set, data_labels)
|
73
74
|
#
|
74
75
|
# = A nice tip for data evaluation
|
75
76
|
#
|
76
|
-
# id3 =
|
77
|
+
# id3 = Ai4r::Classifiers::ID3.new(DATA_SET, DATA_LABELS)
|
77
78
|
# age_range = '<30'
|
78
79
|
# marketing_target = nil
|
79
|
-
# eval id3.
|
80
|
+
# eval id3.get_rules
|
80
81
|
# puts marketing_target
|
81
82
|
# # => 'Y'
|
82
83
|
# = More about ID3 and decision trees
|
@@ -87,50 +88,17 @@ module Ai4r
|
|
87
88
|
# = About the project
|
88
89
|
# Author:: Sergio Fierens
|
89
90
|
# License:: MPL 1.1
|
90
|
-
|
91
|
-
class ID3
|
91
|
+
# Url:: http://ai4r.rubyforge.org/
|
92
|
+
class ID3 < Classifier
|
92
93
|
|
93
|
-
attr_reader :
|
94
|
-
include ClassifierHelper
|
94
|
+
attr_reader :data_set
|
95
95
|
|
96
|
-
# Create a new
|
97
|
-
#
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
# [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CATEGORY_VALM],
|
103
|
-
# ]
|
104
|
-
#
|
105
|
-
# e.g.
|
106
|
-
# [ ['New York', '<30', 'M', 'Y'],
|
107
|
-
# ['Chicago', '<30', 'M', 'Y'],
|
108
|
-
# ['Chicago', '<30', 'F', 'Y'],
|
109
|
-
# ['New York', '<30', 'M', 'Y'],
|
110
|
-
# ['New York', '<30', 'M', 'Y'],
|
111
|
-
# ['Chicago', '[30-50)', 'M', 'Y'],
|
112
|
-
# ['New York', '[30-50)', 'F', 'N'],
|
113
|
-
# ['Chicago', '[30-50)', 'F', 'Y'],
|
114
|
-
# ['New York', '[30-50)', 'F', 'N'],
|
115
|
-
# ['Chicago', '[50-80]', 'M', 'N'],
|
116
|
-
# ['New York', '[50-80]', 'F', 'N'],
|
117
|
-
# ['New York', '[50-80]', 'M', 'N'],
|
118
|
-
# ['Chicago', '[50-80]', 'M', 'N'],
|
119
|
-
# ['New York', '[50-80]', 'F', 'N'],
|
120
|
-
# ['Chicago', '>80', 'F', 'Y']
|
121
|
-
# ]
|
122
|
-
#
|
123
|
-
# Data labels must have the following format:
|
124
|
-
# [ 'city', 'age_range', 'gender', 'marketing_target' ]
|
125
|
-
#
|
126
|
-
# If you do not provide labels for you data, the following labels will
|
127
|
-
# be created by default:
|
128
|
-
# [ 'ATTRIBUTE_1', 'ATTRIBUTE_2', 'ATTRIBUTE_3', 'CATEGORY' ]
|
129
|
-
#
|
130
|
-
def build(data_examples, data_labels=nil)
|
131
|
-
check_data_examples(data_examples)
|
132
|
-
@data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
|
133
|
-
preprocess_data(data_examples)
|
96
|
+
# Create a new ID3 classifier. You must provide a DataSet instance
|
97
|
+
# as parameter.
|
98
|
+
def build(data_set)
|
99
|
+
data_set.check_not_empty
|
100
|
+
@data_set = data_set
|
101
|
+
preprocess_data(@data_set.data_items)
|
134
102
|
return self
|
135
103
|
end
|
136
104
|
|
@@ -144,7 +112,7 @@ module Ai4r
|
|
144
112
|
# This method returns the generated rules in ruby code.
|
145
113
|
# e.g.
|
146
114
|
#
|
147
|
-
# id3.
|
115
|
+
# id3.get_rules
|
148
116
|
# # => if age_range=='<30' then marketing_target='Y'
|
149
117
|
# elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
|
150
118
|
# elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
|
@@ -155,10 +123,11 @@ module Ai4r
|
|
155
123
|
# It is a nice way to inspect induction results, and also to execute them:
|
156
124
|
# age_range = '<30'
|
157
125
|
# marketing_target = nil
|
158
|
-
# eval id3.
|
126
|
+
# eval id3.get_rules
|
159
127
|
# puts marketing_target
|
160
128
|
# # => 'Y'
|
161
|
-
def
|
129
|
+
def get_rules
|
130
|
+
#return "Empty ID3 tree" if !@tree
|
162
131
|
rules = @tree.get_rules
|
163
132
|
rules = rules.collect do |rule|
|
164
133
|
"#{rule[0..-2].join(' and ')} then #{rule.last}"
|
@@ -175,15 +144,15 @@ module Ai4r
|
|
175
144
|
def build_node(data_examples, flag_att = [])
|
176
145
|
return ErrorNode.new if data_examples.length == 0
|
177
146
|
domain = domain(data_examples)
|
178
|
-
return CategoryNode.new(@data_labels.last, domain.last[0]) if domain.last.length == 1
|
147
|
+
return CategoryNode.new(@data_set.data_labels.last, domain.last[0]) if domain.last.length == 1
|
179
148
|
min_entropy_index = min_entropy_index(data_examples, domain, flag_att)
|
180
149
|
flag_att << min_entropy_index
|
181
150
|
split_data_examples = split_data_examples(data_examples, domain, min_entropy_index)
|
182
|
-
return CategoryNode.new(@data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
|
151
|
+
return CategoryNode.new(@data_set.data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
|
183
152
|
nodes = split_data_examples.collect do |partial_data_examples|
|
184
153
|
build_node(partial_data_examples, flag_att)
|
185
154
|
end
|
186
|
-
return EvaluationNode.new(@data_labels, min_entropy_index, domain[min_entropy_index], nodes)
|
155
|
+
return EvaluationNode.new(@data_set.data_labels, min_entropy_index, domain[min_entropy_index], nodes)
|
187
156
|
end
|
188
157
|
|
189
158
|
private
|
@@ -247,7 +216,7 @@ module Ai4r
|
|
247
216
|
def domain(data_examples)
|
248
217
|
#return build_domains(data_examples)
|
249
218
|
domain = []
|
250
|
-
@data_labels.length.times { domain << [] }
|
219
|
+
@data_set.data_labels.length.times { domain << [] }
|
251
220
|
data_examples.each do |data|
|
252
221
|
data.each_index do |i|
|
253
222
|
domain[i] << data[i] if i<domain.length && !domain[i].include?(data[i])
|
@@ -297,7 +266,7 @@ module Ai4r
|
|
297
266
|
LOG2 = Math.log(2)
|
298
267
|
end
|
299
268
|
|
300
|
-
class EvaluationNode
|
269
|
+
class EvaluationNode #:nodoc: all
|
301
270
|
|
302
271
|
attr_reader :index, :values, :nodes
|
303
272
|
|
@@ -330,7 +299,7 @@ module Ai4r
|
|
330
299
|
|
331
300
|
end
|
332
301
|
|
333
|
-
class CategoryNode
|
302
|
+
class CategoryNode #:nodoc: all
|
334
303
|
def initialize(label, value)
|
335
304
|
@label = label
|
336
305
|
@value = value
|
@@ -343,7 +312,7 @@ module Ai4r
|
|
343
312
|
end
|
344
313
|
end
|
345
314
|
|
346
|
-
class ErrorNode
|
315
|
+
class ErrorNode #:nodoc: all
|
347
316
|
def value(data)
|
348
317
|
raise "There was not enough information during training to do a proper induction for this data element."
|
349
318
|
end
|
@@ -8,7 +8,8 @@
|
|
8
8
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
9
|
|
10
10
|
require 'set'
|
11
|
-
require File.dirname(__FILE__) + '/
|
11
|
+
require File.dirname(__FILE__) + '/../data/data_set'
|
12
|
+
require File.dirname(__FILE__) + '/../classifiers/classifier'
|
12
13
|
|
13
14
|
module Ai4r
|
14
15
|
module Classifiers
|
@@ -19,58 +20,25 @@ module Ai4r
|
|
19
20
|
# attribute to use to classify data that makes
|
20
21
|
# fewest prediction errors.
|
21
22
|
# It generates rules based on a single attribute.
|
22
|
-
class OneR
|
23
|
+
class OneR < Classifier
|
23
24
|
|
24
|
-
|
25
|
-
include ClassifierHelper
|
25
|
+
attr_reader :data_set, :rule
|
26
26
|
|
27
|
-
# Build a new OneR classifier.
|
28
|
-
#
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
# ]
|
35
|
-
#
|
36
|
-
# e.g.
|
37
|
-
# [ ['New York', '<30', 'M', 'Y'],
|
38
|
-
# ['Chicago', '<30', 'M', 'Y'],
|
39
|
-
# ['Chicago', '<30', 'F', 'Y'],
|
40
|
-
# ['New York', '<30', 'M', 'Y'],
|
41
|
-
# ['New York', '<30', 'M', 'Y'],
|
42
|
-
# ['Chicago', '[30-50)', 'M', 'Y'],
|
43
|
-
# ['New York', '[30-50)', 'F', 'N'],
|
44
|
-
# ['Chicago', '[30-50)', 'F', 'Y'],
|
45
|
-
# ['New York', '[30-50)', 'F', 'N'],
|
46
|
-
# ['Chicago', '[50-80]', 'M', 'N'],
|
47
|
-
# ['New York', '[50-80]', 'F', 'N'],
|
48
|
-
# ['New York', '[50-80]', 'M', 'N'],
|
49
|
-
# ['Chicago', '[50-80]', 'M', 'N'],
|
50
|
-
# ['New York', '[50-80]', 'F', 'N'],
|
51
|
-
# ['Chicago', '>80', 'F', 'Y']
|
52
|
-
# ]
|
53
|
-
#
|
54
|
-
# Data labels must have the following format:
|
55
|
-
# [ 'city', 'age_range', 'gender', 'marketing_target' ]
|
56
|
-
#
|
57
|
-
# If you do not provide labels for you data, the following labels will
|
58
|
-
# be created by default:
|
59
|
-
# [ 'attribute_1', 'attribute_2', 'attribute_3', 'class_value' ]
|
60
|
-
#
|
61
|
-
def build(data_examples, data_labels = nil)
|
62
|
-
check_data_examples(data_examples)
|
63
|
-
@data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
|
64
|
-
if (num_attributes(data_examples) == 1)
|
65
|
-
@zero_r = ZeroR.new.build(data_examples, data_labels)
|
27
|
+
# Build a new OneR classifier. You must provide a DataSet instance
|
28
|
+
# as parameter.
|
29
|
+
def build(data_set)
|
30
|
+
data_set.check_not_empty
|
31
|
+
@data_set = data_set
|
32
|
+
if (data_set.num_attributes == 1)
|
33
|
+
@zero_r = ZeroR.new.build(data_set)
|
66
34
|
return self;
|
67
35
|
else
|
68
36
|
@zero_r = nil;
|
69
37
|
end
|
70
|
-
domains = build_domains
|
38
|
+
domains = @data_set.build_domains
|
71
39
|
@rule = nil
|
72
40
|
domains[1...-1].each_index do |attr_index|
|
73
|
-
rule = build_rule(
|
41
|
+
rule = build_rule(@data_set.data_items, attr_index, domains)
|
74
42
|
@rule = rule if !@rule || rule[:correct] > @rule[:correct]
|
75
43
|
end
|
76
44
|
return self
|
@@ -88,7 +56,7 @@ module Ai4r
|
|
88
56
|
# This method returns the generated rules in ruby code.
|
89
57
|
# e.g.
|
90
58
|
#
|
91
|
-
# classifier.
|
59
|
+
# classifier.get_rules
|
92
60
|
# # => if age_range == '<30' then marketing_target = 'Y'
|
93
61
|
# elsif age_range == '[30-50)' then marketing_target = 'N'
|
94
62
|
# elsif age_range == '[50-80]' then marketing_target = 'N'
|
@@ -96,14 +64,14 @@ module Ai4r
|
|
96
64
|
#
|
97
65
|
# It is a nice way to inspect induction results, and also to execute them:
|
98
66
|
# marketing_target = nil
|
99
|
-
# eval classifier.
|
67
|
+
# eval classifier.get_rules
|
100
68
|
# puts marketing_target
|
101
69
|
# # => 'Y'
|
102
|
-
def
|
103
|
-
return @zero_r.
|
70
|
+
def get_rules
|
71
|
+
return @zero_r.get_rules if @zero_r
|
104
72
|
sentences = []
|
105
|
-
attr_label = @data_labels[@rule[:attr_index]]
|
106
|
-
class_label = @data_labels.last
|
73
|
+
attr_label = @data_set.data_labels[@rule[:attr_index]]
|
74
|
+
class_label = @data_set.data_labels.last
|
107
75
|
@rule[:rule].each_pair do |attr_value, class_value|
|
108
76
|
sentences << "#{attr_label} == '#{attr_value}' then #{class_label} = '#{class_value}'"
|
109
77
|
end
|
@@ -111,13 +79,6 @@ module Ai4r
|
|
111
79
|
end
|
112
80
|
|
113
81
|
protected
|
114
|
-
def build_domains(data_examples)
|
115
|
-
domains = Array.new(num_attributes(data_examples)) { Set.new }
|
116
|
-
data_examples.each do |data|
|
117
|
-
data.each_index {|attr_index| domains[attr_index] << data[attr_index]}
|
118
|
-
end
|
119
|
-
return domains
|
120
|
-
end
|
121
82
|
|
122
83
|
def build_rule(data_examples, attr_index, domains)
|
123
84
|
domain = domains[attr_index]
|