ai4r 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +48 -0
- data/examples/decision_trees/data_set.csv +121 -0
- data/examples/decision_trees/id3_example.rb +31 -0
- data/examples/decision_trees/results.txt +29 -0
- data/examples/genetic_algorithm/genetic_algorithm_example.rb +39 -0
- data/examples/genetic_algorithm/travel_cost.csv +16 -0
- data/examples/neural_network/backpropagation_example.rb +65 -0
- data/examples/neural_network/patterns_with_base_noise.rb +68 -0
- data/examples/neural_network/patterns_with_noise.rb +66 -0
- data/examples/neural_network/training_patterns.rb +68 -0
- data/lib/decision_tree/id3.rb +354 -0
- data/lib/genetic_algorithm/genetic_algorithm.rb +268 -0
- data/lib/neural_network/backpropagation.rb +259 -0
- data/site/build/site/en/broken-links.xml +2 -0
- data/site/build/site/en/downloads.html +187 -0
- data/site/build/site/en/downloads.pdf +151 -0
- data/site/build/site/en/geneticAlgorithms.html +564 -0
- data/site/build/site/en/geneticAlgorithms.pdf +911 -0
- data/site/build/site/en/images/ai4r-logo.png +0 -0
- data/site/build/site/en/images/built-with-forrest-button.png +0 -0
- data/site/build/site/en/images/c.png +0 -0
- data/site/build/site/en/images/c_wbn.png +0 -0
- data/site/build/site/en/images/c_wn.png +0 -0
- data/site/build/site/en/images/ero.gif +0 -0
- data/site/build/site/en/images/europe2.png +0 -0
- data/site/build/site/en/images/europe3.png +0 -0
- data/site/build/site/en/images/fitness.png +0 -0
- data/site/build/site/en/images/instruction_arrow.png +0 -0
- data/site/build/site/en/images/my_email.png +0 -0
- data/site/build/site/en/images/rubyforge.png +0 -0
- data/site/build/site/en/images/s.png +0 -0
- data/site/build/site/en/images/s_wbn.png +0 -0
- data/site/build/site/en/images/s_wn.png +0 -0
- data/site/build/site/en/images/sigmoid.png +0 -0
- data/site/build/site/en/images/t.png +0 -0
- data/site/build/site/en/images/t_wbn.png +0 -0
- data/site/build/site/en/images/t_wn.png +0 -0
- data/site/build/site/en/index.html +258 -0
- data/site/build/site/en/index.pdf +306 -0
- data/site/build/site/en/linkmap.html +231 -0
- data/site/build/site/en/linkmap.pdf +94 -0
- data/site/build/site/en/locationmap.xml +72 -0
- data/site/build/site/en/machineLearning.html +325 -0
- data/site/build/site/en/machineLearning.pdf +337 -0
- data/site/build/site/en/neuralNetworks.html +446 -0
- data/site/build/site/en/neuralNetworks.pdf +604 -0
- data/site/build/site/en/skin/CommonMessages_de.xml +23 -0
- data/site/build/site/en/skin/CommonMessages_en_US.xml +23 -0
- data/site/build/site/en/skin/CommonMessages_es.xml +23 -0
- data/site/build/site/en/skin/CommonMessages_fr.xml +23 -0
- data/site/build/site/en/skin/basic.css +166 -0
- data/site/build/site/en/skin/breadcrumbs-optimized.js +90 -0
- data/site/build/site/en/skin/breadcrumbs.js +237 -0
- data/site/build/site/en/skin/fontsize.js +166 -0
- data/site/build/site/en/skin/getBlank.js +40 -0
- data/site/build/site/en/skin/getMenu.js +45 -0
- data/site/build/site/en/skin/images/README.txt +1 -0
- data/site/build/site/en/skin/images/add.jpg +0 -0
- data/site/build/site/en/skin/images/built-with-forrest-button.png +0 -0
- data/site/build/site/en/skin/images/chapter.gif +0 -0
- data/site/build/site/en/skin/images/chapter_open.gif +0 -0
- data/site/build/site/en/skin/images/current.gif +0 -0
- data/site/build/site/en/skin/images/error.png +0 -0
- data/site/build/site/en/skin/images/external-link.gif +0 -0
- data/site/build/site/en/skin/images/fix.jpg +0 -0
- data/site/build/site/en/skin/images/forrest-credit-logo.png +0 -0
- data/site/build/site/en/skin/images/hack.jpg +0 -0
- data/site/build/site/en/skin/images/header_white_line.gif +0 -0
- data/site/build/site/en/skin/images/info.png +0 -0
- data/site/build/site/en/skin/images/instruction_arrow.png +0 -0
- data/site/build/site/en/skin/images/label.gif +0 -0
- data/site/build/site/en/skin/images/page.gif +0 -0
- data/site/build/site/en/skin/images/pdfdoc.gif +0 -0
- data/site/build/site/en/skin/images/poddoc.png +0 -0
- data/site/build/site/en/skin/images/printer.gif +0 -0
- data/site/build/site/en/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/site/build/site/en/skin/images/remove.jpg +0 -0
- data/site/build/site/en/skin/images/rss.png +0 -0
- data/site/build/site/en/skin/images/spacer.gif +0 -0
- data/site/build/site/en/skin/images/success.png +0 -0
- data/site/build/site/en/skin/images/txtdoc.png +0 -0
- data/site/build/site/en/skin/images/update.jpg +0 -0
- data/site/build/site/en/skin/images/valid-html401.png +0 -0
- data/site/build/site/en/skin/images/vcss.png +0 -0
- data/site/build/site/en/skin/images/warning.png +0 -0
- data/site/build/site/en/skin/images/xmldoc.gif +0 -0
- data/site/build/site/en/skin/menu.js +48 -0
- data/site/build/site/en/skin/note.txt +50 -0
- data/site/build/site/en/skin/print.css +54 -0
- data/site/build/site/en/skin/profile.css +163 -0
- data/site/build/site/en/skin/prototype.js +1257 -0
- data/site/build/site/en/skin/screen.css +587 -0
- data/site/build/site/en/svn.html +223 -0
- data/site/build/site/en/svn.pdf +239 -0
- data/site/build/site/en/wholesite.pdf +1686 -0
- data/site/build/tmp/brokenlinks.xml +2 -0
- data/site/build/tmp/build-info.xml +5 -0
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
- data/site/build/tmp/input.xmap +32 -0
- data/site/build/tmp/internal.xmap +32 -0
- data/site/build/tmp/locationmap.xml +29 -0
- data/site/build/tmp/output.xmap +38 -0
- data/site/build/tmp/pluginlist2fetchbuild.xml +144 -0
- data/site/build/tmp/plugins-1.xml +212 -0
- data/site/build/tmp/plugins-2.xml +347 -0
- data/site/build/tmp/projfilters.properties +41 -0
- data/site/build/tmp/resources.xmap +32 -0
- data/site/build/webapp/WEB-INF/logs/access.log +0 -0
- data/site/build/webapp/WEB-INF/logs/core.log +788 -0
- data/site/build/webapp/WEB-INF/logs/debug.log +0 -0
- data/site/build/webapp/WEB-INF/logs/error.log +248 -0
- data/site/build/webapp/WEB-INF/logs/flow.log +0 -0
- data/site/build/webapp/WEB-INF/logs/idgen.log +0 -0
- data/site/build/webapp/WEB-INF/logs/linkrewriter.log +0 -0
- data/site/build/webapp/WEB-INF/logs/locationmap.log +0 -0
- data/site/build/webapp/WEB-INF/logs/sitemap.log +0 -0
- data/site/build/webapp/WEB-INF/logs/xmlform.log +0 -0
- data/site/forrest.properties +152 -0
- data/site/forrest.properties.dispatcher.properties +25 -0
- data/site/forrest.properties.xml +29 -0
- data/site/src/documentation/README.txt +7 -0
- data/site/src/documentation/classes/CatalogManager.properties +62 -0
- data/site/src/documentation/content/locationmap.xml +72 -0
- data/site/src/documentation/content/xdocs/downloads.html +9 -0
- data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +280 -0
- data/site/src/documentation/content/xdocs/index.xml +73 -0
- data/site/src/documentation/content/xdocs/machineLearning.xml +129 -0
- data/site/src/documentation/content/xdocs/neuralNetworks.xml +218 -0
- data/site/src/documentation/content/xdocs/site.xml +51 -0
- data/site/src/documentation/content/xdocs/svn.xml +31 -0
- data/site/src/documentation/content/xdocs/tabs.xml +35 -0
- data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
- data/site/src/documentation/resources/images/c.png +0 -0
- data/site/src/documentation/resources/images/c_wbn.png +0 -0
- data/site/src/documentation/resources/images/c_wn.png +0 -0
- data/site/src/documentation/resources/images/ellipse-2.svg +30 -0
- data/site/src/documentation/resources/images/ero.gif +0 -0
- data/site/src/documentation/resources/images/europe2.png +0 -0
- data/site/src/documentation/resources/images/europe3.png +0 -0
- data/site/src/documentation/resources/images/fitness.png +0 -0
- data/site/src/documentation/resources/images/icon-a.png +0 -0
- data/site/src/documentation/resources/images/icon-b.png +0 -0
- data/site/src/documentation/resources/images/icon.png +0 -0
- data/site/src/documentation/resources/images/my_email.png +0 -0
- data/site/src/documentation/resources/images/project-logo.png +0 -0
- data/site/src/documentation/resources/images/rubyforge.png +0 -0
- data/site/src/documentation/resources/images/s.png +0 -0
- data/site/src/documentation/resources/images/s_wbn.png +0 -0
- data/site/src/documentation/resources/images/s_wn.png +0 -0
- data/site/src/documentation/resources/images/sigmoid.png +0 -0
- data/site/src/documentation/resources/images/sub-dir/icon-c.png +0 -0
- data/site/src/documentation/resources/images/t.png +0 -0
- data/site/src/documentation/resources/images/t_wbn.png +0 -0
- data/site/src/documentation/resources/images/t_wn.png +0 -0
- data/site/src/documentation/resources/schema/catalog.xcat +29 -0
- data/site/src/documentation/resources/schema/hello-v10.dtd +51 -0
- data/site/src/documentation/resources/schema/symbols-project-v10.ent +26 -0
- data/site/src/documentation/resources/stylesheets/hello2document.xsl +33 -0
- data/site/src/documentation/sitemap.xmap +66 -0
- data/site/src/documentation/skinconf.xml +418 -0
- data/site/src/documentation/translations/langcode.xml +29 -0
- data/site/src/documentation/translations/languages_de.xml +24 -0
- data/site/src/documentation/translations/languages_en.xml +24 -0
- data/site/src/documentation/translations/languages_es.xml +22 -0
- data/site/src/documentation/translations/languages_fr.xml +24 -0
- data/site/src/documentation/translations/languages_nl.xml +24 -0
- data/site/src/documentation/translations/menu.xml +33 -0
- data/site/src/documentation/translations/menu_af.xml +33 -0
- data/site/src/documentation/translations/menu_de.xml +33 -0
- data/site/src/documentation/translations/menu_es.xml +33 -0
- data/site/src/documentation/translations/menu_fr.xml +33 -0
- data/site/src/documentation/translations/menu_it.xml +33 -0
- data/site/src/documentation/translations/menu_nl.xml +33 -0
- data/site/src/documentation/translations/menu_no.xml +33 -0
- data/site/src/documentation/translations/menu_ru.xml +33 -0
- data/site/src/documentation/translations/menu_sk.xml +33 -0
- data/site/src/documentation/translations/tabs.xml +22 -0
- data/site/src/documentation/translations/tabs_de.xml +22 -0
- data/site/src/documentation/translations/tabs_es.xml +22 -0
- data/site/src/documentation/translations/tabs_fr.xml +22 -0
- data/site/src/documentation/translations/tabs_nl.xml +22 -0
- data/test/decision_tree/id3_test.rb +209 -0
- data/test/genetic_algorithm/chromosome_test.rb +55 -0
- data/test/genetic_algorithm/genetic_algorithm_test.rb +78 -0
- data/test/neural_network/backpropagation_test.rb +44 -0
- metadata +274 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Author:: Sergio Fierens
|
|
2
|
+
# License:: MPL 1.1
|
|
3
|
+
# Project:: ai4r
|
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
|
5
|
+
#
|
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
TRIANGLE = [
|
|
12
|
+
[ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
|
|
13
|
+
[ 0, 0, 0, 0, 0, 0, 1, 9, 9, 1, 0, 0, 0, 0, 0, 0],
|
|
14
|
+
[ 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0],
|
|
15
|
+
[ 0, 0, 0, 0, 0, 1, 9, 1, 1, 9, 1, 0, 0, 0, 0, 0],
|
|
16
|
+
[ 0, 0, 0, 0, 0, 5, 5, 0, 0, 5, 5, 0, 0, 0, 0, 0],
|
|
17
|
+
[ 0, 0, 0, 0, 1, 9, 1, 0, 0, 1, 9, 1, 0, 0, 0, 0],
|
|
18
|
+
[ 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0],
|
|
19
|
+
[ 0, 0, 0, 1, 9, 1, 0, 0, 0, 0, 1, 9, 1, 0, 0, 0],
|
|
20
|
+
[ 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0],
|
|
21
|
+
[ 0, 0, 1, 9, 1, 0, 0, 0, 0, 0, 0, 1, 9, 1, 0, 0],
|
|
22
|
+
[ 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0],
|
|
23
|
+
[ 0, 1, 9, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 9, 1, 0],
|
|
24
|
+
[ 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 0],
|
|
25
|
+
[ 1, 9, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 9, 1],
|
|
26
|
+
[ 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5],
|
|
27
|
+
[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
SQUARE = [
|
|
31
|
+
[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
|
|
32
|
+
[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
|
|
33
|
+
[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
|
|
34
|
+
[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
|
|
35
|
+
[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
|
|
36
|
+
[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
|
|
37
|
+
[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
|
|
38
|
+
[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
|
|
39
|
+
[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
|
|
40
|
+
[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
|
|
41
|
+
[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
|
|
42
|
+
[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
|
|
43
|
+
[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
|
|
44
|
+
[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
|
|
45
|
+
[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
|
|
46
|
+
[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]
|
|
47
|
+
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
CROSS = [
|
|
51
|
+
[ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
|
|
52
|
+
[ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
|
|
53
|
+
[ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
|
|
54
|
+
[ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
|
|
55
|
+
[ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
|
|
56
|
+
[ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
|
|
57
|
+
[ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
|
|
58
|
+
[ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
|
|
59
|
+
[ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
|
|
60
|
+
[ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
|
|
61
|
+
[ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
|
|
62
|
+
[ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
|
|
63
|
+
[ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
|
|
64
|
+
[ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
|
|
65
|
+
[ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
|
|
66
|
+
[ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0]
|
|
67
|
+
]
|
|
68
|
+
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
|
|
2
|
+
# Decision tree learning, used in data mining and machine learning,
|
|
3
|
+
# uses a decision tree as a predictive model which maps observations about
|
|
4
|
+
# an item to conclusions about the item's target value.
|
|
5
|
+
#
|
|
6
|
+
# In this module you will find an implementation of the ID3 algorithm (Quinlan)
|
|
7
|
+
#
|
|
8
|
+
# * http://en.wikipedia.org/wiki/Decision_tree
|
|
9
|
+
# * http://en.wikipedia.org/wiki/ID3_algorithm
|
|
10
|
+
#
|
|
11
|
+
# Author:: Sergio Fierens
|
|
12
|
+
# License:: MPL 1.1
|
|
13
|
+
# Project:: ai4r
|
|
14
|
+
# Url:: http://ai4r.rubyforge.org/
|
|
15
|
+
#
|
|
16
|
+
# You can redistribute it and/or modify it under the terms of
|
|
17
|
+
# the Mozilla Public License version 1.1 as published by the
|
|
18
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
|
19
|
+
|
|
20
|
+
module DecisionTree
|
|
21
|
+
|
|
22
|
+
# = Introduction
|
|
23
|
+
# This is an implementation of the ID3 algorithm (Quinlan)
|
|
24
|
+
# Given a set of preclassified examples, it builds a top-down
|
|
25
|
+
# induction of decision tree, biased by the information gain and
|
|
26
|
+
# entropy measure.
|
|
27
|
+
#
|
|
28
|
+
# = How to use it
|
|
29
|
+
#
|
|
30
|
+
# DATA_LABELS = [ 'city', 'age_range', 'gender', 'marketing_target' ]
|
|
31
|
+
#
|
|
32
|
+
# DATA_SET = [ ['New York', '<30', 'M', 'Y'],
|
|
33
|
+
# ['Chicago', '<30', 'M', 'Y'],
|
|
34
|
+
# ['Chicago', '<30', 'F', 'Y'],
|
|
35
|
+
# ['New York', '<30', 'M', 'Y'],
|
|
36
|
+
# ['New York', '<30', 'M', 'Y'],
|
|
37
|
+
# ['Chicago', '[30-50)', 'M', 'Y'],
|
|
38
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
|
39
|
+
# ['Chicago', '[30-50)', 'F', 'Y'],
|
|
40
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
|
41
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
|
42
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
|
43
|
+
# ['New York', '[50-80]', 'M', 'N'],
|
|
44
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
|
45
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
|
46
|
+
# ['Chicago', '>80', 'F', 'Y']
|
|
47
|
+
# ]
|
|
48
|
+
#
|
|
49
|
+
# id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
|
|
50
|
+
#
|
|
51
|
+
# id3.to_s
|
|
52
|
+
# # => if age_range=='<30' then marketing_target='Y'
|
|
53
|
+
# elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
|
|
54
|
+
# elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
|
|
55
|
+
# elsif age_range=='[50-80]' then marketing_target='N'
|
|
56
|
+
# elsif age_range=='>80' then marketing_target='Y'
|
|
57
|
+
# else raise 'There was not enough information during training to do a proper induction for this data element' end
|
|
58
|
+
#
|
|
59
|
+
# id3.eval(['New York', '<30', 'M'])
|
|
60
|
+
# # => 'Y'
|
|
61
|
+
#
|
|
62
|
+
# = A better way to load the data
|
|
63
|
+
#
|
|
64
|
+
# In the real life you will use lot more data training examples, with more
|
|
65
|
+
# attributes. Consider moving your data to an external CSV (comma separate
|
|
66
|
+
# values) file.
|
|
67
|
+
#
|
|
68
|
+
# data_set = []
|
|
69
|
+
# CSV::Reader.parse(File.open("#{File.dirname(__FILE__)}/data_set.csv", 'r')) do |row|
|
|
70
|
+
# data_set << row
|
|
71
|
+
# end
|
|
72
|
+
# data_labels = data_set.shift
|
|
73
|
+
#
|
|
74
|
+
# id3 = DecisionTree::ID3.new(data_set, data_labels)
|
|
75
|
+
#
|
|
76
|
+
# = A nice tip for data evaluation
|
|
77
|
+
#
|
|
78
|
+
# id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
|
|
79
|
+
# age_range = '<30'
|
|
80
|
+
# marketing_target = nil
|
|
81
|
+
# eval id3.to_s
|
|
82
|
+
# puts marketing_target
|
|
83
|
+
# # => 'Y'
|
|
84
|
+
# = More about ID3 and decision trees
|
|
85
|
+
#
|
|
86
|
+
# * http://en.wikipedia.org/wiki/Decision_tree
|
|
87
|
+
# * http://en.wikipedia.org/wiki/ID3_algorithm
|
|
88
|
+
#
|
|
89
|
+
# = About the project
|
|
90
|
+
# Author:: Sergio Fierens
|
|
91
|
+
# License:: MPL 1.1
|
|
92
|
+
|
|
93
|
+
class ID3
|
|
94
|
+
attr_reader :data_labels
|
|
95
|
+
# Create a new decision tree. If your data is classified with N attributed
|
|
96
|
+
# and M examples, then your data examples must have the following format:
|
|
97
|
+
#
|
|
98
|
+
# [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CATEGORY_VAL1],
|
|
99
|
+
# [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CATEGORY_VAL2],
|
|
100
|
+
# ...
|
|
101
|
+
# [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CATEGORY_VALM],
|
|
102
|
+
# ]
|
|
103
|
+
#
|
|
104
|
+
# e.g.
|
|
105
|
+
# [ ['New York', '<30', 'M', 'Y'],
|
|
106
|
+
# ['Chicago', '<30', 'M', 'Y'],
|
|
107
|
+
# ['Chicago', '<30', 'F', 'Y'],
|
|
108
|
+
# ['New York', '<30', 'M', 'Y'],
|
|
109
|
+
# ['New York', '<30', 'M', 'Y'],
|
|
110
|
+
# ['Chicago', '[30-50)', 'M', 'Y'],
|
|
111
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
|
112
|
+
# ['Chicago', '[30-50)', 'F', 'Y'],
|
|
113
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
|
114
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
|
115
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
|
116
|
+
# ['New York', '[50-80]', 'M', 'N'],
|
|
117
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
|
118
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
|
119
|
+
# ['Chicago', '>80', 'F', 'Y']
|
|
120
|
+
# ]
|
|
121
|
+
#
|
|
122
|
+
# Data labels must have the following format:
|
|
123
|
+
# [ 'city', 'age_range', 'gender', 'marketing_target' ]
|
|
124
|
+
#
|
|
125
|
+
# If you do not provide labels for you data, the following labels will
|
|
126
|
+
# be created by default:
|
|
127
|
+
# [ 'ATTRIBUTE_1', 'ATTRIBUTE_2', 'ATTRIBUTE_3', 'CATEGORY' ]
|
|
128
|
+
#
|
|
129
|
+
def initialize(data_examples, data_labels=nil)
|
|
130
|
+
raise "Examples data set must not be empty." if !data_examples || data_examples.empty?
|
|
131
|
+
if !data_labels
|
|
132
|
+
data_labels = []
|
|
133
|
+
data_examples[0][0..-2].each_index do |i|
|
|
134
|
+
data_labels[i] = "ATTRIBUTE_#{i+1}"
|
|
135
|
+
end
|
|
136
|
+
data_labels[data_labels.length]="CATEGORY"
|
|
137
|
+
end
|
|
138
|
+
@data_labels = data_labels
|
|
139
|
+
preprocess_data(data_examples)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# You can evaluate new data, predicting its category.
|
|
143
|
+
# e.g.
|
|
144
|
+
# id3.eval(['New York', '<30', 'F']) # => 'Y'
|
|
145
|
+
def eval(data)
|
|
146
|
+
@tree.value(data)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# This method returns the generated rules in ruby code.
|
|
150
|
+
# e.g.
|
|
151
|
+
#
|
|
152
|
+
# id3.to_s
|
|
153
|
+
# # => if age_range=='<30' then marketing_target='Y'
|
|
154
|
+
# elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
|
|
155
|
+
# elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
|
|
156
|
+
# elsif age_range=='[50-80]' then marketing_target='N'
|
|
157
|
+
# elsif age_range=='>80' then marketing_target='Y'
|
|
158
|
+
# else raise 'There was not enough information during training to do a proper induction for this data element' end
|
|
159
|
+
#
|
|
160
|
+
# It is a nice way to inspect induction results, and also to execute them:
|
|
161
|
+
# age_range = '<30'
|
|
162
|
+
# marketing_target = nil
|
|
163
|
+
# eval id3.to_s
|
|
164
|
+
# puts marketing_target
|
|
165
|
+
# # => 'Y'
|
|
166
|
+
def to_s
|
|
167
|
+
rules = @tree.get_rules
|
|
168
|
+
rules = rules.collect do |rule|
|
|
169
|
+
"#{rule[0..-2].join(' and ')} then #{rule.last}"
|
|
170
|
+
end
|
|
171
|
+
return "if #{rules.join("\nelsif ")}\nelse raise 'There was not enough information during training to do a proper induction for this data element' end"
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
private
|
|
175
|
+
def preprocess_data(data_examples)
|
|
176
|
+
@tree = build_node(data_examples)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
private
|
|
180
|
+
def build_node(data_examples, flag_att = [])
|
|
181
|
+
return ErrorNode.new if data_examples.length == 0
|
|
182
|
+
domain = domain(data_examples)
|
|
183
|
+
return CategoryNode.new(@data_labels.last, domain.last[0]) if domain.last.length == 1
|
|
184
|
+
min_entropy_index = min_entropy_index(data_examples, domain, flag_att)
|
|
185
|
+
flag_att << min_entropy_index
|
|
186
|
+
split_data_examples = split_data_examples(data_examples, domain, min_entropy_index)
|
|
187
|
+
return CategoryNode.new(@data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
|
|
188
|
+
nodes = split_data_examples.collect do |partial_data_examples|
|
|
189
|
+
build_node(partial_data_examples, flag_att)
|
|
190
|
+
end
|
|
191
|
+
return EvaluationNode.new(@data_labels, min_entropy_index, domain[min_entropy_index], nodes)
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
private
|
|
195
|
+
def self.sum(values)
|
|
196
|
+
values.inject( 0 ) { |sum,x| sum+x }
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
private
|
|
200
|
+
def self.log2(z)
|
|
201
|
+
return 0.0 if z == 0
|
|
202
|
+
Math.log(z)/LOG2
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
private
|
|
206
|
+
def most_freq(examples, domain)
|
|
207
|
+
freqs = []
|
|
208
|
+
domain.last.length.times { freqs << 0}
|
|
209
|
+
examples.each do |example|
|
|
210
|
+
cat_index = domain.last.index(example.last)
|
|
211
|
+
freq = freqs[cat_index] + 1
|
|
212
|
+
freqs[cat_index] = freq
|
|
213
|
+
end
|
|
214
|
+
max_freq = freqs.max
|
|
215
|
+
max_freq_index = freqs.index(max_freq)
|
|
216
|
+
domain.last[max_freq_index]
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
private
|
|
220
|
+
def split_data_examples(data_examples, domain, att_index)
|
|
221
|
+
data_examples_array = []
|
|
222
|
+
att_value_examples = {}
|
|
223
|
+
data_examples.each do |example|
|
|
224
|
+
example_set = att_value_examples[example[att_index]]
|
|
225
|
+
example_set = [] if !example_set
|
|
226
|
+
example_set << example
|
|
227
|
+
att_value_examples.store(example[att_index], example_set)
|
|
228
|
+
end
|
|
229
|
+
att_value_examples.each_pair do |att_value, example_set|
|
|
230
|
+
att_value_index = domain[att_index].index(att_value)
|
|
231
|
+
data_examples_array[att_value_index] = example_set
|
|
232
|
+
end
|
|
233
|
+
return data_examples_array
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
private
|
|
237
|
+
def min_entropy_index(data_examples, domain, flag_att=[])
|
|
238
|
+
min_entropy = nil
|
|
239
|
+
min_index = 0
|
|
240
|
+
domain[0..-2].each_index do |index|
|
|
241
|
+
freq_grid = freq_grid(index, data_examples, domain)
|
|
242
|
+
entropy = entropy(freq_grid, data_examples.length)
|
|
243
|
+
if (!min_entropy || entropy < min_entropy) && !flag_att.include?(index)
|
|
244
|
+
min_entropy = entropy
|
|
245
|
+
min_index = index
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
return min_index
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
private
|
|
252
|
+
def domain(data_examples)
|
|
253
|
+
domain = []
|
|
254
|
+
@data_labels.length.times { domain << [] }
|
|
255
|
+
data_examples.each do |data|
|
|
256
|
+
data.each_index do |i|
|
|
257
|
+
domain[i] << data[i] if i<domain.length && !domain[i].include?(data[i])
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
return domain
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
private
|
|
264
|
+
def freq_grid(att_index, data_examples, domain)
|
|
265
|
+
#Initialize empty grid
|
|
266
|
+
grid_element = []
|
|
267
|
+
domain.last.length.times { grid_element << 0}
|
|
268
|
+
grid = []
|
|
269
|
+
domain[att_index].length.times { grid << grid_element.clone }
|
|
270
|
+
#Fill frecuency with grid
|
|
271
|
+
data_examples.each do |example|
|
|
272
|
+
att_val = example[att_index]
|
|
273
|
+
att_val_index = domain[att_index].index(att_val)
|
|
274
|
+
category = example.last
|
|
275
|
+
category_index = domain.last.index(category)
|
|
276
|
+
freq = grid[att_val_index][category_index] + 1
|
|
277
|
+
grid[att_val_index][category_index] = freq
|
|
278
|
+
end
|
|
279
|
+
return grid
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
private
|
|
283
|
+
def entropy(freq_grid, total_examples)
|
|
284
|
+
#Calc entropy of each element
|
|
285
|
+
entropy = 0
|
|
286
|
+
freq_grid.each do |att_freq|
|
|
287
|
+
att_total_freq = ID3.sum(att_freq)
|
|
288
|
+
partial_entropy = 0
|
|
289
|
+
if att_total_freq != 0
|
|
290
|
+
att_freq.each do |freq|
|
|
291
|
+
prop = freq.to_f/att_total_freq
|
|
292
|
+
partial_entropy += (-1*prop*ID3.log2(prop))
|
|
293
|
+
end
|
|
294
|
+
end
|
|
295
|
+
entropy += (att_total_freq.to_f/total_examples) * partial_entropy
|
|
296
|
+
end
|
|
297
|
+
return entropy
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
private
|
|
301
|
+
LOG2 = Math.log(2)
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
class EvaluationNode
|
|
305
|
+
attr_reader :index, :values, :nodes
|
|
306
|
+
def initialize(data_labels, index, values, nodes)
|
|
307
|
+
@index = index
|
|
308
|
+
@values = values
|
|
309
|
+
@nodes = nodes
|
|
310
|
+
@data_labels = data_labels
|
|
311
|
+
end
|
|
312
|
+
def value(data)
|
|
313
|
+
value = data[@index]
|
|
314
|
+
return rule_not_found if !@values.include?(value)
|
|
315
|
+
return nodes[@values.index(value)].value(data)
|
|
316
|
+
end
|
|
317
|
+
def get_rules
|
|
318
|
+
rule_set = []
|
|
319
|
+
@nodes.each_index do |child_node_index|
|
|
320
|
+
my_rule = "#{@data_labels[@index]}=='#{@values[child_node_index]}'"
|
|
321
|
+
child_node = @nodes[child_node_index]
|
|
322
|
+
child_node_rules = child_node.get_rules
|
|
323
|
+
child_node_rules.each do |child_rule|
|
|
324
|
+
child_rule.unshift(my_rule)
|
|
325
|
+
end
|
|
326
|
+
rule_set += child_node_rules
|
|
327
|
+
end
|
|
328
|
+
return rule_set
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
class CategoryNode
|
|
333
|
+
def initialize(label, value)
|
|
334
|
+
@label = label
|
|
335
|
+
@value = value
|
|
336
|
+
end
|
|
337
|
+
def value(data)
|
|
338
|
+
return @value
|
|
339
|
+
end
|
|
340
|
+
def get_rules
|
|
341
|
+
return [["#{@label}='#{@value}'"]]
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
class ErrorNode
|
|
346
|
+
def value(data)
|
|
347
|
+
raise "There was not enough information during training to do a proper induction for this data element."
|
|
348
|
+
end
|
|
349
|
+
def get_rules
|
|
350
|
+
return []
|
|
351
|
+
end
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
end
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
#
|
|
2
|
+
# The GeneticAlgorithm module implements the GeneticSearch and Chromosome
|
|
3
|
+
# classes. The GeneticSearch is a generic class, and can be used to solved
|
|
4
|
+
# any kind of problems. The GeneticSearch class performs a stochastic search
|
|
5
|
+
# of the solution of a given problem.
|
|
6
|
+
#
|
|
7
|
+
# The Chromosome is "problem specific". Ai4r built-in Chromosomeclass was
|
|
8
|
+
# designed to model the Travelling salesman problem. If you want to solve other
|
|
9
|
+
# type of problem, you will have to modify the Chromosome class, by overwriting
|
|
10
|
+
# its fitness, reproduce, and mutate functions, to model you specific problem.
|
|
11
|
+
#
|
|
12
|
+
# Author:: Sergio Fierens
|
|
13
|
+
# License:: MPL 1.1
|
|
14
|
+
# Project:: ai4r
|
|
15
|
+
# Url:: http://ai4r.rubyforge.org/
|
|
16
|
+
#
|
|
17
|
+
# You can redistribute it and/or modify it under the terms of
|
|
18
|
+
# the Mozilla Public License version 1.1 as published by the
|
|
19
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
|
20
|
+
|
|
21
|
+
module GeneticAlgorithm
|
|
22
|
+
|
|
23
|
+
# This class is used to automatically:
|
|
24
|
+
#
|
|
25
|
+
# 1. Choose initial population
|
|
26
|
+
# 2. Evaluate the fitness of each individual in the population
|
|
27
|
+
# 3. Repeat
|
|
28
|
+
# 1. Select best-ranking individuals to reproduce
|
|
29
|
+
# 2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
|
|
30
|
+
# 3. Evaluate the individual fitnesses of the offspring
|
|
31
|
+
# 4. Replace worst ranked part of population with offspring
|
|
32
|
+
# 4. Until termination
|
|
33
|
+
#
|
|
34
|
+
# If you want to customize the algorithm, you must modify any of the following classes:
|
|
35
|
+
# - Chromosome
|
|
36
|
+
# - Population
|
|
37
|
+
class GeneticSearch
|
|
38
|
+
|
|
39
|
+
attr_accessor :population
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def initialize(initial_population_size, generations)
|
|
43
|
+
@population_size = initial_population_size
|
|
44
|
+
@max_generation = generations
|
|
45
|
+
@generation = 0
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# 1. Choose initial population
|
|
49
|
+
# 2. Evaluate the fitness of each individual in the population
|
|
50
|
+
# 3. Repeat
|
|
51
|
+
# 1. Select best-ranking individuals to reproduce
|
|
52
|
+
# 2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
|
|
53
|
+
# 3. Evaluate the individual fitnesses of the offspring
|
|
54
|
+
# 4. Replace worst ranked part of population with offspring
|
|
55
|
+
# 4. Until termination
|
|
56
|
+
# 5. Return the best chromosome
|
|
57
|
+
def run
|
|
58
|
+
generate_initial_population #Generate initial population
|
|
59
|
+
@max_generation.times do
|
|
60
|
+
selected_to_breed = selection #Evaluates current population
|
|
61
|
+
offsprings = reproduction selected_to_breed #Generate the population for this new generation
|
|
62
|
+
replace_worst_ranked offsprings
|
|
63
|
+
end
|
|
64
|
+
return best_chromosome
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def generate_initial_population
|
|
69
|
+
@population = []
|
|
70
|
+
@population_size.times do
|
|
71
|
+
population << Chromosome.seed
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Select best-ranking individuals to reproduce
|
|
76
|
+
#
|
|
77
|
+
# Selection is the stage of a genetic algorithm in which individual
|
|
78
|
+
# genomes are chosen from a population for later breeding.
|
|
79
|
+
# There are several generic selection algorithms, such as
|
|
80
|
+
# tournament selection and roulette wheel selection. We implemented the
|
|
81
|
+
# latest.
|
|
82
|
+
#
|
|
83
|
+
# Steps:
|
|
84
|
+
#
|
|
85
|
+
# 1. The fitness function is evaluated for each individual, providing fitness values
|
|
86
|
+
# 2. The population is sorted by descending fitness values.
|
|
87
|
+
# 3. The fitness values ar then normalized. (Highest fitness gets 1, lowest fitness gets 0). The normalized value is stored in the "normalized_fitness" attribute of the chromosomes.
|
|
88
|
+
# 4. A random number R is chosen. R is between 0 and the accumulated normalized value (all the normalized fitness values added togheter).
|
|
89
|
+
# 5. The selected individual is the first one whose accumulated normalized value (its is normalized value plus the normalized values of the chromosomes prior it) greater than R.
|
|
90
|
+
# 6. We repeat steps 4 and 5, 2/3 times the population size.
|
|
91
|
+
def selection
|
|
92
|
+
@population.sort! { |a, b| b.fitness <=> a.fitness}
|
|
93
|
+
best_fitness = @population[0].fitness
|
|
94
|
+
worst_fitness = @population.last.fitness
|
|
95
|
+
acum_fitness = 0
|
|
96
|
+
if best_fitness-worst_fitness > 0
|
|
97
|
+
@population.each do |chromosome|
|
|
98
|
+
chromosome.normalized_fitness = (chromosome.fitness - worst_fitness)/(best_fitness-worst_fitness)
|
|
99
|
+
acum_fitness += chromosome.normalized_fitness
|
|
100
|
+
end
|
|
101
|
+
else
|
|
102
|
+
@population.each { |chromosome| chromosome.normalized_fitness = 1}
|
|
103
|
+
end
|
|
104
|
+
selected_to_breed = []
|
|
105
|
+
((2*@population_size)/3).times do
|
|
106
|
+
selected_to_breed << select_random_individual(acum_fitness)
|
|
107
|
+
end
|
|
108
|
+
selected_to_breed
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# We combine each pair of selected chromosome using the method
|
|
112
|
+
# Chromosome.reproduce
|
|
113
|
+
#
|
|
114
|
+
# The reproduction will also call the Chromosome.mutate method with
|
|
115
|
+
# each member of the population. You should implement Chromosome.mutate
|
|
116
|
+
# to only change (mutate) randomly. E.g. You could effectivly change the
|
|
117
|
+
# chromosome only if
|
|
118
|
+
# rand < ((1 - chromosome.normalized_fitness) * 0.4)
|
|
119
|
+
def reproduction(selected_to_breed)
|
|
120
|
+
offsprings = []
|
|
121
|
+
0.upto(selected_to_breed.length/2-1) do |i|
|
|
122
|
+
offsprings << Chromosome.reproduce(selected_to_breed[2*i], selected_to_breed[2*i+1])
|
|
123
|
+
end
|
|
124
|
+
@population.each do |individual|
|
|
125
|
+
Chromosome.mutate(individual)
|
|
126
|
+
end
|
|
127
|
+
return offsprings
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Replace worst ranked part of population with offspring
|
|
131
|
+
def replace_worst_ranked(offsprings)
|
|
132
|
+
size = offsprings.length
|
|
133
|
+
@population = @population [0..((-1*size)-1)] + offsprings
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Select the best chromosome in the population
|
|
137
|
+
def best_chromosome
|
|
138
|
+
the_best = @population[0]
|
|
139
|
+
@population.each do |chromosome|
|
|
140
|
+
the_best = chromosome if chromosome.fitness > the_best.fitness
|
|
141
|
+
end
|
|
142
|
+
return the_best
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
private
|
|
146
|
+
def select_random_individual(acum_fitness)
|
|
147
|
+
select_random_target = acum_fitness * rand
|
|
148
|
+
local_acum = 0
|
|
149
|
+
@population.each do |chromosome|
|
|
150
|
+
local_acum += chromosome.normalized_fitness
|
|
151
|
+
return chromosome if local_acum >= select_random_target
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# A Chromosome is a representation of an individual solutions for a specific
|
|
158
|
+
# problem. You will have to redifine you Chromosome representation for each
|
|
159
|
+
# particular problem, along with its fitness, mutate, reproduce, and seed
|
|
160
|
+
# functions.
|
|
161
|
+
class Chromosome
|
|
162
|
+
|
|
163
|
+
attr_accessor :data
|
|
164
|
+
attr_accessor :normalized_fitness
|
|
165
|
+
|
|
166
|
+
def initialize(data)
|
|
167
|
+
@data = data
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# The fitness function quantifies the optimality of a solution
|
|
171
|
+
# (that is, a chromosome) in a genetic algorithm so that that particular
|
|
172
|
+
# chromosome may be ranked against all the other chromosomes.
|
|
173
|
+
#
|
|
174
|
+
# Optimal chromosomes, or at least chromosomes which are more optimal,
|
|
175
|
+
# are allowed to breed and mix their datasets by any of several techniques,
|
|
176
|
+
# producing a new generation that will (hopefully) be even better.
|
|
177
|
+
def fitness
|
|
178
|
+
return @fitness if @fitness
|
|
179
|
+
last_token = @data[0]
|
|
180
|
+
cost = 0
|
|
181
|
+
@data[1..-1].each do |token|
|
|
182
|
+
cost += @@costs[last_token][token]
|
|
183
|
+
last_token = token
|
|
184
|
+
end
|
|
185
|
+
@fitness = -1 * cost
|
|
186
|
+
return @fitness
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# mutation is a function used to maintain genetic diversity from one
|
|
190
|
+
# generation of a population of chromosomes to the next. It is analogous
|
|
191
|
+
# to biological mutation.
|
|
192
|
+
#
|
|
193
|
+
# The purpose of mutation in GAs is to allow the
|
|
194
|
+
# algorithm to avoid local minima by preventing the population of
|
|
195
|
+
# chromosomes from becoming too similar to each other, thus slowing or even
|
|
196
|
+
# stopping evolution.
|
|
197
|
+
#
|
|
198
|
+
# Calling the mutate function will "probably" slightly change a chromosome
|
|
199
|
+
# randomly.
|
|
200
|
+
#
|
|
201
|
+
# This implementation of "mutation" will (probably) reverse the
|
|
202
|
+
# order of 2 consecutive randome nodes
|
|
203
|
+
# (e.g. from [ 0, 1, 2, 4] to [0, 2, 1, 4]) if:
|
|
204
|
+
# ((1 - chromosome.normalized_fitness) * 0.4)
|
|
205
|
+
def self.mutate(chromosome)
|
|
206
|
+
if chromosome.normalized_fitness && rand < ((1 - chromosome.normalized_fitness) * 0.3)
|
|
207
|
+
data = chromosome.data
|
|
208
|
+
index = rand(data.length-1)
|
|
209
|
+
data[index], data[index+1] = data[index+1], data[index]
|
|
210
|
+
chromosome.data = data
|
|
211
|
+
@fitness = nil
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# Reproduction is used to vary the programming of a chromosome or
|
|
216
|
+
# chromosomes from one generation to the next. There are several ways to
|
|
217
|
+
# combine two chromosomes: One-point crossover, Two-point crossover,
|
|
218
|
+
# "Cut and splice", edge recombination, and more.
|
|
219
|
+
#
|
|
220
|
+
# The method is usually dependant of the problem domain.
|
|
221
|
+
# In this case, we have implemented edge recombination, wich is the
|
|
222
|
+
# most used reproduction algorithm for the Travelling salesman problem.
|
|
223
|
+
def self.reproduce(a, b)
|
|
224
|
+
data_size = @@costs[0].length
|
|
225
|
+
available = []
|
|
226
|
+
0.upto(data_size-1) { |n| available << n }
|
|
227
|
+
token = a.data[0]
|
|
228
|
+
spawn = [token]
|
|
229
|
+
available.delete(token)
|
|
230
|
+
while available.length > 0 do
|
|
231
|
+
#Select next
|
|
232
|
+
if token != b.data.last && available.include?(b.data[b.data.index(token)+1])
|
|
233
|
+
next_token = b.data[b.data.index(token)+1]
|
|
234
|
+
elsif token != a.data.last && available.include?(a.data[a.data.index(token)+1])
|
|
235
|
+
next_token = a.data[a.data.index(token)+1]
|
|
236
|
+
else
|
|
237
|
+
next_token = available[rand(available.length)]
|
|
238
|
+
end
|
|
239
|
+
#Add to spawn
|
|
240
|
+
token = next_token
|
|
241
|
+
available.delete(token)
|
|
242
|
+
spawn << next_token
|
|
243
|
+
a, b = b, a if rand < 0.4
|
|
244
|
+
end
|
|
245
|
+
return Chromosome.new(spawn)
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# Initializes an individual solution (chromosome) for the initial
|
|
249
|
+
# population. Usually the chromosome is generated randomly, but you can
|
|
250
|
+
# use some problem domain knowledge, to generate better initial solutions.
|
|
251
|
+
def self.seed
|
|
252
|
+
data_size = @@costs[0].length
|
|
253
|
+
available = []
|
|
254
|
+
0.upto(data_size-1) { |n| available << n }
|
|
255
|
+
seed = []
|
|
256
|
+
while available.length > 0 do
|
|
257
|
+
index = rand(available.length)
|
|
258
|
+
seed << available.delete_at(index)
|
|
259
|
+
end
|
|
260
|
+
return Chromosome.new(seed)
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def self.set_cost_matrix(costs)
|
|
264
|
+
@@costs = costs
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
end
|