RubyGems - ai4r - Versions diffs - 1.0 - Mend

ai4r 1.0

Files changed (196) hide show

data/README.rdoc +48 -0
data/examples/decision_trees/data_set.csv +121 -0
data/examples/decision_trees/id3_example.rb +31 -0
data/examples/decision_trees/results.txt +29 -0
data/examples/genetic_algorithm/genetic_algorithm_example.rb +39 -0
data/examples/genetic_algorithm/travel_cost.csv +16 -0
data/examples/neural_network/backpropagation_example.rb +65 -0
data/examples/neural_network/patterns_with_base_noise.rb +68 -0
data/examples/neural_network/patterns_with_noise.rb +66 -0
data/examples/neural_network/training_patterns.rb +68 -0
data/lib/decision_tree/id3.rb +354 -0
data/lib/genetic_algorithm/genetic_algorithm.rb +268 -0
data/lib/neural_network/backpropagation.rb +259 -0
data/site/build/site/en/broken-links.xml +2 -0
data/site/build/site/en/downloads.html +187 -0
data/site/build/site/en/downloads.pdf +151 -0
data/site/build/site/en/geneticAlgorithms.html +564 -0
data/site/build/site/en/geneticAlgorithms.pdf +911 -0
data/site/build/site/en/images/ai4r-logo.png +0 -0
data/site/build/site/en/images/built-with-forrest-button.png +0 -0
data/site/build/site/en/images/c.png +0 -0
data/site/build/site/en/images/c_wbn.png +0 -0
data/site/build/site/en/images/c_wn.png +0 -0
data/site/build/site/en/images/ero.gif +0 -0
data/site/build/site/en/images/europe2.png +0 -0
data/site/build/site/en/images/europe3.png +0 -0
data/site/build/site/en/images/fitness.png +0 -0
data/site/build/site/en/images/instruction_arrow.png +0 -0
data/site/build/site/en/images/my_email.png +0 -0
data/site/build/site/en/images/rubyforge.png +0 -0
data/site/build/site/en/images/s.png +0 -0
data/site/build/site/en/images/s_wbn.png +0 -0
data/site/build/site/en/images/s_wn.png +0 -0
data/site/build/site/en/images/sigmoid.png +0 -0
data/site/build/site/en/images/t.png +0 -0
data/site/build/site/en/images/t_wbn.png +0 -0
data/site/build/site/en/images/t_wn.png +0 -0
data/site/build/site/en/index.html +258 -0
data/site/build/site/en/index.pdf +306 -0
data/site/build/site/en/linkmap.html +231 -0
data/site/build/site/en/linkmap.pdf +94 -0
data/site/build/site/en/locationmap.xml +72 -0
data/site/build/site/en/machineLearning.html +325 -0
data/site/build/site/en/machineLearning.pdf +337 -0
data/site/build/site/en/neuralNetworks.html +446 -0
data/site/build/site/en/neuralNetworks.pdf +604 -0
data/site/build/site/en/skin/CommonMessages_de.xml +23 -0
data/site/build/site/en/skin/CommonMessages_en_US.xml +23 -0
data/site/build/site/en/skin/CommonMessages_es.xml +23 -0
data/site/build/site/en/skin/CommonMessages_fr.xml +23 -0
data/site/build/site/en/skin/basic.css +166 -0
data/site/build/site/en/skin/breadcrumbs-optimized.js +90 -0
data/site/build/site/en/skin/breadcrumbs.js +237 -0
data/site/build/site/en/skin/fontsize.js +166 -0
data/site/build/site/en/skin/getBlank.js +40 -0
data/site/build/site/en/skin/getMenu.js +45 -0
data/site/build/site/en/skin/images/README.txt +1 -0
data/site/build/site/en/skin/images/add.jpg +0 -0
data/site/build/site/en/skin/images/built-with-forrest-button.png +0 -0
data/site/build/site/en/skin/images/chapter.gif +0 -0
data/site/build/site/en/skin/images/chapter_open.gif +0 -0
data/site/build/site/en/skin/images/current.gif +0 -0
data/site/build/site/en/skin/images/error.png +0 -0
data/site/build/site/en/skin/images/external-link.gif +0 -0
data/site/build/site/en/skin/images/fix.jpg +0 -0
data/site/build/site/en/skin/images/forrest-credit-logo.png +0 -0
data/site/build/site/en/skin/images/hack.jpg +0 -0
data/site/build/site/en/skin/images/header_white_line.gif +0 -0
data/site/build/site/en/skin/images/info.png +0 -0
data/site/build/site/en/skin/images/instruction_arrow.png +0 -0
data/site/build/site/en/skin/images/label.gif +0 -0
data/site/build/site/en/skin/images/page.gif +0 -0
data/site/build/site/en/skin/images/pdfdoc.gif +0 -0
data/site/build/site/en/skin/images/poddoc.png +0 -0
data/site/build/site/en/skin/images/printer.gif +0 -0
data/site/build/site/en/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
data/site/build/site/en/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
data/site/build/site/en/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
data/site/build/site/en/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
data/site/build/site/en/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
data/site/build/site/en/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
data/site/build/site/en/skin/images/remove.jpg +0 -0
data/site/build/site/en/skin/images/rss.png +0 -0
data/site/build/site/en/skin/images/spacer.gif +0 -0
data/site/build/site/en/skin/images/success.png +0 -0
data/site/build/site/en/skin/images/txtdoc.png +0 -0
data/site/build/site/en/skin/images/update.jpg +0 -0
data/site/build/site/en/skin/images/valid-html401.png +0 -0
data/site/build/site/en/skin/images/vcss.png +0 -0
data/site/build/site/en/skin/images/warning.png +0 -0
data/site/build/site/en/skin/images/xmldoc.gif +0 -0
data/site/build/site/en/skin/menu.js +48 -0
data/site/build/site/en/skin/note.txt +50 -0
data/site/build/site/en/skin/print.css +54 -0
data/site/build/site/en/skin/profile.css +163 -0
data/site/build/site/en/skin/prototype.js +1257 -0
data/site/build/site/en/skin/screen.css +587 -0
data/site/build/site/en/svn.html +223 -0
data/site/build/site/en/svn.pdf +239 -0
data/site/build/site/en/wholesite.pdf +1686 -0
data/site/build/tmp/brokenlinks.xml +2 -0
data/site/build/tmp/build-info.xml +5 -0
data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
data/site/build/tmp/input.xmap +32 -0
data/site/build/tmp/internal.xmap +32 -0
data/site/build/tmp/locationmap.xml +29 -0
data/site/build/tmp/output.xmap +38 -0
data/site/build/tmp/pluginlist2fetchbuild.xml +144 -0
data/site/build/tmp/plugins-1.xml +212 -0
data/site/build/tmp/plugins-2.xml +347 -0
data/site/build/tmp/projfilters.properties +41 -0
data/site/build/tmp/resources.xmap +32 -0
data/site/build/webapp/WEB-INF/logs/access.log +0 -0
data/site/build/webapp/WEB-INF/logs/core.log +788 -0
data/site/build/webapp/WEB-INF/logs/debug.log +0 -0
data/site/build/webapp/WEB-INF/logs/error.log +248 -0
data/site/build/webapp/WEB-INF/logs/flow.log +0 -0
data/site/build/webapp/WEB-INF/logs/idgen.log +0 -0
data/site/build/webapp/WEB-INF/logs/linkrewriter.log +0 -0
data/site/build/webapp/WEB-INF/logs/locationmap.log +0 -0
data/site/build/webapp/WEB-INF/logs/sitemap.log +0 -0
data/site/build/webapp/WEB-INF/logs/xmlform.log +0 -0
data/site/forrest.properties +152 -0
data/site/forrest.properties.dispatcher.properties +25 -0
data/site/forrest.properties.xml +29 -0
data/site/src/documentation/README.txt +7 -0
data/site/src/documentation/classes/CatalogManager.properties +62 -0
data/site/src/documentation/content/locationmap.xml +72 -0
data/site/src/documentation/content/xdocs/downloads.html +9 -0
data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +280 -0
data/site/src/documentation/content/xdocs/index.xml +73 -0
data/site/src/documentation/content/xdocs/machineLearning.xml +129 -0
data/site/src/documentation/content/xdocs/neuralNetworks.xml +218 -0
data/site/src/documentation/content/xdocs/site.xml +51 -0
data/site/src/documentation/content/xdocs/svn.xml +31 -0
data/site/src/documentation/content/xdocs/tabs.xml +35 -0
data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
data/site/src/documentation/resources/images/c.png +0 -0
data/site/src/documentation/resources/images/c_wbn.png +0 -0
data/site/src/documentation/resources/images/c_wn.png +0 -0
data/site/src/documentation/resources/images/ellipse-2.svg +30 -0
data/site/src/documentation/resources/images/ero.gif +0 -0
data/site/src/documentation/resources/images/europe2.png +0 -0
data/site/src/documentation/resources/images/europe3.png +0 -0
data/site/src/documentation/resources/images/fitness.png +0 -0
data/site/src/documentation/resources/images/icon-a.png +0 -0
data/site/src/documentation/resources/images/icon-b.png +0 -0
data/site/src/documentation/resources/images/icon.png +0 -0
data/site/src/documentation/resources/images/my_email.png +0 -0
data/site/src/documentation/resources/images/project-logo.png +0 -0
data/site/src/documentation/resources/images/rubyforge.png +0 -0
data/site/src/documentation/resources/images/s.png +0 -0
data/site/src/documentation/resources/images/s_wbn.png +0 -0
data/site/src/documentation/resources/images/s_wn.png +0 -0
data/site/src/documentation/resources/images/sigmoid.png +0 -0
data/site/src/documentation/resources/images/sub-dir/icon-c.png +0 -0
data/site/src/documentation/resources/images/t.png +0 -0
data/site/src/documentation/resources/images/t_wbn.png +0 -0
data/site/src/documentation/resources/images/t_wn.png +0 -0
data/site/src/documentation/resources/schema/catalog.xcat +29 -0
data/site/src/documentation/resources/schema/hello-v10.dtd +51 -0
data/site/src/documentation/resources/schema/symbols-project-v10.ent +26 -0
data/site/src/documentation/resources/stylesheets/hello2document.xsl +33 -0
data/site/src/documentation/sitemap.xmap +66 -0
data/site/src/documentation/skinconf.xml +418 -0
data/site/src/documentation/translations/langcode.xml +29 -0
data/site/src/documentation/translations/languages_de.xml +24 -0
data/site/src/documentation/translations/languages_en.xml +24 -0
data/site/src/documentation/translations/languages_es.xml +22 -0
data/site/src/documentation/translations/languages_fr.xml +24 -0
data/site/src/documentation/translations/languages_nl.xml +24 -0
data/site/src/documentation/translations/menu.xml +33 -0
data/site/src/documentation/translations/menu_af.xml +33 -0
data/site/src/documentation/translations/menu_de.xml +33 -0
data/site/src/documentation/translations/menu_es.xml +33 -0
data/site/src/documentation/translations/menu_fr.xml +33 -0
data/site/src/documentation/translations/menu_it.xml +33 -0
data/site/src/documentation/translations/menu_nl.xml +33 -0
data/site/src/documentation/translations/menu_no.xml +33 -0
data/site/src/documentation/translations/menu_ru.xml +33 -0
data/site/src/documentation/translations/menu_sk.xml +33 -0
data/site/src/documentation/translations/tabs.xml +22 -0
data/site/src/documentation/translations/tabs_de.xml +22 -0
data/site/src/documentation/translations/tabs_es.xml +22 -0
data/site/src/documentation/translations/tabs_fr.xml +22 -0
data/site/src/documentation/translations/tabs_nl.xml +22 -0
data/test/decision_tree/id3_test.rb +209 -0
data/test/genetic_algorithm/chromosome_test.rb +55 -0
data/test/genetic_algorithm/genetic_algorithm_test.rb +78 -0
data/test/neural_network/backpropagation_test.rb +44 -0
metadata +274 -0

data/examples/neural_network/training_patterns.rb ADDED Viewed

@@ -0,0 +1,68 @@
+# Author::    Sergio Fierens
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org/
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+TRIANGLE = [
+  [ 0,  0,  0,  0,  0,  0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  0,  0,  1,  9,  9,  1,  0,  0,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  0,  0,  5,  5,  5,  5,  0,  0,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  0,  1,  9,  1,  1,  9,  1,  0,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  0,  5,  5,  0,  0,  5,  5,  0,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  1,  9,  1,  0,  0,  1,  9,  1,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  5,  5,  0,  0,  0,  0,  5,  5,  0,  0,  0,  0],
+  [ 0,  0,  0,  1,  9,  1,  0,  0,  0,  0,  1,  9,  1,  0,  0,  0],
+  [ 0,  0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  5,  5,  0,  0,  0],
+  [ 0,  0,  1,  9,  1,  0,  0,  0,  0,  0,  0,  1,  9,  1,  0,  0],
+  [ 0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  0,  0,  5,  5,  0,  0],
+  [ 0,  1,  9,  1,  0,  0,  0,  0,  0,  0,  0,  0,  1,  9,  1,  0],
+  [ 0,  5,  5,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  5,  5,  0],
+  [ 1,  9,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  9,  1],
+  [ 5,  5,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  5,  5],
+  [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]
+]
+SQUARE = [
+  [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
+  [10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10],
+  [10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10],
+  [10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10],
+  [10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10],
+  [10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10],
+  [10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10],
+  [10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10],
+  [10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10],
+  [10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10],
+  [10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10],
+  [10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10],
+  [10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10],
+  [10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10],
+  [10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10],
+  [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]
+]
+CROSS = [
+  [ 0,  0,  0,  0,  0,  0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  0,  0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  0,  0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  0,  0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  0,  0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  0,  0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  0,  0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  0],
+  [ 5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5],
+  [ 5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5],
+  [ 0,  0,  0,  0,  0,  0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  0,  0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  0,  0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  0,  0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  0,  0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  0,  0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  0],
+  [ 0,  0,  0,  0,  0,  0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  0]
+]

data/lib/decision_tree/id3.rb ADDED Viewed

@@ -0,0 +1,354 @@
+# Decision tree learning, used in data mining and machine learning,
+# uses a decision tree as a predictive model which maps observations about
+# an item to conclusions about the item's target value.
+#
+# In this module you will find an implementation of the ID3 algorithm (Quinlan)
+#
+# * http://en.wikipedia.org/wiki/Decision_tree
+# * http://en.wikipedia.org/wiki/ID3_algorithm
+#
+# Author::    Sergio Fierens
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org/
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+module DecisionTree
+  # = Introduction
+  # This is an implementation of the ID3 algorithm (Quinlan)
+  # Given a set of preclassified examples, it builds a top-down
+  # induction of decision tree, biased by the information gain and
+  # entropy measure.
+  #
+  # = How to use it
+  #
+  #   DATA_LABELS = [ 'city', 'age_range', 'gender', 'marketing_target'  ]
+  #
+  #   DATA_SET = [  ['New York',  '<30',      'M', 'Y'],
+  #            ['Chicago',     '<30',      'M', 'Y'],
+  #            ['Chicago',     '<30',      'F', 'Y'],
+  #            ['New York',  '<30',      'M', 'Y'],
+  #            ['New York',  '<30',      'M', 'Y'],
+  #            ['Chicago',     '[30-50)',  'M', 'Y'],
+  #            ['New York',  '[30-50)',  'F', 'N'],
+  #            ['Chicago',     '[30-50)',  'F', 'Y'],
+  #            ['New York',  '[30-50)',  'F', 'N'],
+  #            ['Chicago',     '[50-80]', 'M', 'N'],
+  #            ['New York',  '[50-80]', 'F', 'N'],
+  #            ['New York',  '[50-80]', 'M', 'N'],
+  #            ['Chicago',     '[50-80]', 'M', 'N'],
+  #            ['New York',  '[50-80]', 'F', 'N'],
+  #            ['Chicago',     '>80',      'F', 'Y']
+  #          ]
+  #
+  #   id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
+  #
+  #   id3.to_s
+  #     # =>  if age_range=='<30' then marketing_target='Y'
+  #           elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
+  #           elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
+  #           elsif age_range=='[50-80]' then marketing_target='N'
+  #           elsif age_range=='>80' then marketing_target='Y'
+  #           else raise 'There was not enough information during training to do a proper induction for this data element' end
+  #
+  #   id3.eval(['New York', '<30', 'M'])
+  #     # =>  'Y'
+  #
+  # = A better way to load the data
+  #
+  # In the real life you will use lot more data training examples, with more
+  # attributes. Consider moving your data to an external CSV (comma separate
+  # values) file.
+  #
+  #   data_set = []
+  #   CSV::Reader.parse(File.open("#{File.dirname(__FILE__)}/data_set.csv", 'r')) do |row|
+  #     data_set << row
+  #   end
+  #   data_labels = data_set.shift
+  #
+  #   id3 = DecisionTree::ID3.new(data_set, data_labels)
+  #
+  # = A nice tip for data evaluation
+  #
+  #   id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
+  #     age_range = '<30'
+  #     marketing_target = nil
+  #     eval id3.to_s
+  #     puts marketing_target
+  #       # =>  'Y'
+  # = More about ID3 and decision trees
+  #
+  # * http://en.wikipedia.org/wiki/Decision_tree
+  # * http://en.wikipedia.org/wiki/ID3_algorithm
+  #
+  # = About the project
+  # Author::    Sergio Fierens
+  # License::   MPL 1.1
+  class ID3
+    attr_reader :data_labels
+    # Create a new decision tree. If your data is classified with N attributed
+    # and M examples, then your data examples must have the following format:
+    #
+    #     [   [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1,  CATEGORY_VAL1],
+    #         [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2,  CATEGORY_VAL2],
+    #         ...
+    #         [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM,  CATEGORY_VALM],
+    #     ]
+    #
+    # e.g.
+    #     [   ['New York',  '<30',      'M', 'Y'],
+    #          ['Chicago',     '<30',      'M', 'Y'],
+    #          ['Chicago',     '<30',      'F', 'Y'],
+    #          ['New York',  '<30',      'M', 'Y'],
+    #          ['New York',  '<30',      'M', 'Y'],
+    #          ['Chicago',     '[30-50)',  'M', 'Y'],
+    #          ['New York',  '[30-50)',  'F', 'N'],
+    #          ['Chicago',     '[30-50)',  'F', 'Y'],
+    #          ['New York',  '[30-50)',  'F', 'N'],
+    #          ['Chicago',     '[50-80]', 'M', 'N'],
+    #          ['New York',  '[50-80]', 'F', 'N'],
+    #          ['New York',  '[50-80]', 'M', 'N'],
+    #          ['Chicago',     '[50-80]', 'M', 'N'],
+    #          ['New York',  '[50-80]', 'F', 'N'],
+    #          ['Chicago',     '>80',      'F', 'Y']
+    #        ]
+    #
+    # Data labels must have the following format:
+    #     [ 'city', 'age_range', 'gender', 'marketing_target'  ]
+    #
+    # If you do not provide labels for you data, the following labels will
+    # be created by default:
+    #     [ 'ATTRIBUTE_1', 'ATTRIBUTE_2', 'ATTRIBUTE_3', 'CATEGORY'  ]
+    #
+    def initialize(data_examples, data_labels=nil)
+      raise "Examples data set must not be empty." if !data_examples || data_examples.empty?
+      if !data_labels
+        data_labels = []
+        data_examples[0][0..-2].each_index do |i|
+          data_labels[i] = "ATTRIBUTE_#{i+1}"
+        end
+        data_labels[data_labels.length]="CATEGORY"
+      end
+      @data_labels = data_labels
+      preprocess_data(data_examples)
+    end
+    # You can evaluate new data, predicting its category.
+    # e.g.
+    #   id3.eval(['New York',  '<30', 'F'])  # => 'Y'
+    def eval(data)
+      @tree.value(data)
+    end
+    # This method returns the generated rules in ruby code.
+    # e.g.
+    #
+    #   id3.to_s
+    #     # =>  if age_range=='<30' then marketing_target='Y'
+    #           elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
+    #           elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
+    #           elsif age_range=='[50-80]' then marketing_target='N'
+    #           elsif age_range=='>80' then marketing_target='Y'
+    #           else raise 'There was not enough information during training to do a proper induction for this data element' end
+    #
+    # It is a nice way to inspect induction results, and also to execute them:
+    #     age_range = '<30'
+    #     marketing_target = nil
+    #     eval id3.to_s
+    #     puts marketing_target
+    #       # =>  'Y'
+    def to_s
+      rules = @tree.get_rules
+      rules = rules.collect do |rule|
+          "#{rule[0..-2].join(' and ')} then #{rule.last}"
+      end
+      return "if #{rules.join("\nelsif ")}\nelse raise 'There was not enough information during training to do a proper induction for this data element' end"
+    end
+    private
+    def preprocess_data(data_examples)
+      @tree = build_node(data_examples)
+    end
+    private
+    def build_node(data_examples, flag_att = [])
+      return ErrorNode.new if data_examples.length == 0
+      domain = domain(data_examples)
+      return CategoryNode.new(@data_labels.last, domain.last[0]) if domain.last.length == 1
+      min_entropy_index = min_entropy_index(data_examples, domain, flag_att)
+      flag_att << min_entropy_index
+      split_data_examples = split_data_examples(data_examples, domain, min_entropy_index)
+      return CategoryNode.new(@data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
+      nodes = split_data_examples.collect do |partial_data_examples|
+        build_node(partial_data_examples, flag_att)
+      end
+      return EvaluationNode.new(@data_labels, min_entropy_index, domain[min_entropy_index], nodes)
+    end
+    private
+    def self.sum(values)
+      values.inject( 0 ) { |sum,x| sum+x }
+    end
+    private
+    def self.log2(z)
+      return 0.0 if z == 0
+      Math.log(z)/LOG2
+    end
+    private
+    def most_freq(examples, domain)
+      freqs = []
+      domain.last.length.times { freqs << 0}
+      examples.each do |example|
+        cat_index = domain.last.index(example.last)
+        freq = freqs[cat_index] + 1
+        freqs[cat_index] = freq
+      end
+      max_freq = freqs.max
+      max_freq_index = freqs.index(max_freq)
+      domain.last[max_freq_index]
+    end
+    private
+    def split_data_examples(data_examples, domain, att_index)
+      data_examples_array = []
+      att_value_examples = {}
+      data_examples.each do |example|
+        example_set = att_value_examples[example[att_index]]
+        example_set = [] if !example_set
+        example_set << example
+        att_value_examples.store(example[att_index], example_set)
+      end
+      att_value_examples.each_pair do |att_value, example_set|
+         att_value_index = domain[att_index].index(att_value)
+         data_examples_array[att_value_index] = example_set
+      end
+      return data_examples_array
+    end
+    private
+    def min_entropy_index(data_examples, domain, flag_att=[])
+      min_entropy = nil
+      min_index = 0
+      domain[0..-2].each_index do |index|
+        freq_grid = freq_grid(index, data_examples, domain)
+        entropy = entropy(freq_grid, data_examples.length)
+        if (!min_entropy || entropy < min_entropy) && !flag_att.include?(index)
+          min_entropy = entropy
+          min_index = index
+        end
+      end
+      return min_index
+    end
+    private
+    def domain(data_examples)
+      domain = []
+      @data_labels.length.times { domain << [] }
+      data_examples.each do |data|
+        data.each_index do |i|
+          domain[i] << data[i] if i<domain.length && !domain[i].include?(data[i])
+        end
+      end
+      return domain
+    end
+    private
+    def freq_grid(att_index, data_examples, domain)
+      #Initialize empty grid
+      grid_element = []
+      domain.last.length.times { grid_element << 0}
+      grid = []
+      domain[att_index].length.times { grid << grid_element.clone }
+      #Fill frecuency with grid
+      data_examples.each do |example|
+        att_val = example[att_index]
+        att_val_index = domain[att_index].index(att_val)
+        category = example.last
+        category_index = domain.last.index(category)
+        freq = grid[att_val_index][category_index] + 1
+        grid[att_val_index][category_index] = freq
+      end
+      return grid
+    end
+    private
+    def entropy(freq_grid, total_examples)
+      #Calc entropy of each element
+      entropy = 0
+      freq_grid.each do |att_freq|
+        att_total_freq = ID3.sum(att_freq)
+        partial_entropy = 0
+        if att_total_freq != 0
+          att_freq.each do |freq|
+            prop = freq.to_f/att_total_freq
+            partial_entropy += (-1*prop*ID3.log2(prop))
+          end
+        end
+        entropy += (att_total_freq.to_f/total_examples) * partial_entropy
+      end
+      return entropy
+    end
+    private
+    LOG2 = Math.log(2)
+  end
+  class EvaluationNode
+    attr_reader :index, :values, :nodes
+    def initialize(data_labels, index, values, nodes)
+      @index = index
+      @values = values
+      @nodes = nodes
+      @data_labels = data_labels
+    end
+    def value(data)
+      value = data[@index]
+      return rule_not_found if !@values.include?(value)
+      return nodes[@values.index(value)].value(data)
+    end
+    def get_rules
+      rule_set = []
+      @nodes.each_index do |child_node_index|
+        my_rule = "#{@data_labels[@index]}=='#{@values[child_node_index]}'"
+        child_node = @nodes[child_node_index]
+        child_node_rules = child_node.get_rules
+        child_node_rules.each do |child_rule|
+          child_rule.unshift(my_rule)
+        end
+        rule_set += child_node_rules
+      end
+      return rule_set
+    end
+  end
+  class CategoryNode
+    def initialize(label, value)
+      @label = label
+      @value = value
+    end
+    def value(data)
+      return @value
+    end
+    def get_rules
+      return [["#{@label}='#{@value}'"]]
+    end
+  end
+  class ErrorNode
+    def value(data)
+      raise "There was not enough information during training to do a proper induction for this data element."
+    end
+    def get_rules
+      return []
+    end
+  end
+end

data/lib/genetic_algorithm/genetic_algorithm.rb ADDED Viewed

@@ -0,0 +1,268 @@
+#
+# The GeneticAlgorithm module implements the GeneticSearch and Chromosome
+# classes. The GeneticSearch is a generic class, and can be used to solved
+# any kind of problems. The GeneticSearch class performs a stochastic search
+# of the solution of a given problem.
+#
+# The Chromosome is "problem specific". Ai4r built-in Chromosomeclass was
+# designed to model the Travelling salesman problem. If you want to solve other
+# type of problem, you will have to modify the Chromosome class, by overwriting
+# its fitness, reproduce, and mutate functions, to model you specific problem.
+#
+# Author::    Sergio Fierens
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org/
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+module GeneticAlgorithm
+  #   This class is used to automatically:
+  #
+  #     1. Choose initial population
+  #     2. Evaluate the fitness of each individual in the population
+  #     3. Repeat
+  #           1. Select best-ranking individuals to reproduce
+  #           2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
+  #           3. Evaluate the individual fitnesses of the offspring
+  #           4. Replace worst ranked part of population with offspring
+  #     4. Until termination
+  #
+  #   If you want to customize the algorithm, you must modify any of the following classes:
+  #     - Chromosome
+  #     - Population
+  class GeneticSearch
+    attr_accessor :population
+    def initialize(initial_population_size, generations)
+      @population_size = initial_population_size
+      @max_generation = generations
+      @generation = 0
+    end
+    #     1. Choose initial population
+    #     2. Evaluate the fitness of each individual in the population
+    #     3. Repeat
+    #           1. Select best-ranking individuals to reproduce
+    #           2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
+    #           3. Evaluate the individual fitnesses of the offspring
+    #           4. Replace worst ranked part of population with offspring
+    #     4. Until termination
+    #     5. Return the best chromosome
+    def run
+      generate_initial_population                    #Generate initial population
+      @max_generation.times do
+        selected_to_breed = selection                #Evaluates current population
+        offsprings = reproduction selected_to_breed  #Generate the population for this new generation
+        replace_worst_ranked offsprings
+      end
+      return best_chromosome
+    end
+    def generate_initial_population
+     @population = []
+     @population_size.times do
+       population << Chromosome.seed
+     end
+    end
+    # Select best-ranking individuals to reproduce
+    #
+    # Selection is the stage of a genetic algorithm in which individual
+    # genomes are chosen from a population for later breeding.
+    # There are several generic selection algorithms, such as
+    # tournament selection and roulette wheel selection. We implemented the
+    # latest.
+    #
+    # Steps:
+    #
+    # 1. The fitness function is evaluated for each individual, providing fitness values
+    # 2. The population is sorted by descending fitness values.
+    # 3. The fitness values ar then normalized. (Highest fitness gets 1, lowest fitness gets 0). The normalized value is stored in the "normalized_fitness" attribute of the chromosomes.
+    # 4. A random number R is chosen. R is between 0 and the accumulated normalized value (all the normalized fitness values added togheter).
+    # 5. The selected individual is the first one whose accumulated normalized value (its is normalized value plus the normalized values of the chromosomes prior it) greater than R.
+    # 6. We repeat steps 4 and 5, 2/3 times the population size.
+    def selection
+      @population.sort! { |a, b| b.fitness <=> a.fitness}
+      best_fitness = @population[0].fitness
+      worst_fitness = @population.last.fitness
+      acum_fitness = 0
+      if best_fitness-worst_fitness > 0
+      @population.each do |chromosome|
+        chromosome.normalized_fitness = (chromosome.fitness - worst_fitness)/(best_fitness-worst_fitness)
+        acum_fitness += chromosome.normalized_fitness
+      end
+      else
+        @population.each { |chromosome| chromosome.normalized_fitness = 1}
+      end
+      selected_to_breed = []
+      ((2*@population_size)/3).times do
+        selected_to_breed << select_random_individual(acum_fitness)
+      end
+      selected_to_breed
+    end
+    # We combine each pair of selected chromosome using the method
+    # Chromosome.reproduce
+    #
+    # The reproduction will also call the Chromosome.mutate method with
+    # each member of the population. You should implement Chromosome.mutate
+    # to only change (mutate) randomly. E.g. You could effectivly change the
+    # chromosome only if
+    #     rand < ((1 - chromosome.normalized_fitness) * 0.4)
+    def reproduction(selected_to_breed)
+      offsprings = []
+      0.upto(selected_to_breed.length/2-1) do |i|
+        offsprings << Chromosome.reproduce(selected_to_breed[2*i], selected_to_breed[2*i+1])
+      end
+      @population.each do |individual|
+        Chromosome.mutate(individual)
+      end
+      return offsprings
+    end
+    # Replace worst ranked part of population with offspring
+    def replace_worst_ranked(offsprings)
+      size = offsprings.length
+      @population = @population [0..((-1*size)-1)] + offsprings
+    end
+    # Select the best chromosome in the population
+    def best_chromosome
+      the_best = @population[0]
+      @population.each do |chromosome|
+        the_best = chromosome if chromosome.fitness > the_best.fitness
+      end
+      return the_best
+    end
+    private
+    def select_random_individual(acum_fitness)
+      select_random_target = acum_fitness * rand
+      local_acum = 0
+      @population.each do |chromosome|
+        local_acum += chromosome.normalized_fitness
+        return chromosome if local_acum >= select_random_target
+      end
+    end
+  end
+  # A Chromosome is a representation of an individual solutions for a specific
+  # problem. You will have to redifine you Chromosome representation for each
+  # particular problem, along with its fitness, mutate, reproduce, and seed
+  # functions.
+  class Chromosome
+    attr_accessor :data
+    attr_accessor :normalized_fitness
+    def initialize(data)
+      @data = data
+    end
+    # The fitness function quantifies the optimality of a solution
+    # (that is, a chromosome) in a genetic algorithm so that that particular
+    # chromosome may be ranked against all the other chromosomes.
+    #
+    # Optimal chromosomes, or at least chromosomes which are more optimal,
+    # are allowed to breed and mix their datasets by any of several techniques,
+    # producing a new generation that will (hopefully) be even better.
+    def fitness
+      return @fitness if @fitness
+      last_token = @data[0]
+      cost = 0
+      @data[1..-1].each do |token|
+        cost += @@costs[last_token][token]
+        last_token = token
+      end
+      @fitness = -1 * cost
+      return @fitness
+    end
+    # mutation is a function used to maintain genetic diversity from one
+    # generation of a population of chromosomes to the next. It is analogous
+    # to biological mutation.
+    #
+    # The purpose of mutation in GAs is to allow the
+    # algorithm to avoid local minima by preventing the population of
+    # chromosomes from becoming too similar to each other, thus slowing or even
+    # stopping evolution.
+    #
+    # Calling the mutate function will "probably" slightly change a chromosome
+    # randomly.
+    #
+    # This implementation of "mutation" will (probably) reverse the
+    # order of 2 consecutive randome nodes
+    # (e.g. from [ 0, 1, 2, 4] to [0, 2, 1, 4]) if:
+    #     ((1 - chromosome.normalized_fitness) * 0.4)
+    def self.mutate(chromosome)
+      if chromosome.normalized_fitness && rand < ((1 - chromosome.normalized_fitness) * 0.3)
+        data = chromosome.data
+        index = rand(data.length-1)
+        data[index], data[index+1] = data[index+1], data[index]
+        chromosome.data = data
+        @fitness = nil
+      end
+    end
+    # Reproduction is used to vary the programming of a chromosome or
+    # chromosomes from one generation to the next. There are several ways to
+    # combine two chromosomes: One-point crossover, Two-point crossover,
+    # "Cut and splice", edge recombination, and more.
+    #
+    # The method is usually dependant of the problem domain.
+    # In this case, we have implemented edge recombination, wich is the
+    # most used reproduction algorithm for the Travelling salesman problem.
+    def self.reproduce(a, b)
+      data_size = @@costs[0].length
+      available = []
+      0.upto(data_size-1) { |n| available << n }
+      token = a.data[0]
+      spawn = [token]
+      available.delete(token)
+      while available.length > 0 do
+        #Select next
+        if token != b.data.last && available.include?(b.data[b.data.index(token)+1])
+          next_token = b.data[b.data.index(token)+1]
+        elsif token != a.data.last && available.include?(a.data[a.data.index(token)+1])
+          next_token = a.data[a.data.index(token)+1]
+        else
+          next_token = available[rand(available.length)]
+        end
+        #Add to spawn
+        token = next_token
+        available.delete(token)
+        spawn << next_token
+        a, b = b, a if rand < 0.4
+      end
+      return Chromosome.new(spawn)
+    end
+    # Initializes an individual solution (chromosome) for the initial
+    # population. Usually the chromosome is generated randomly, but you can
+    # use some problem domain knowledge, to generate better initial solutions.
+    def self.seed
+      data_size = @@costs[0].length
+      available = []
+      0.upto(data_size-1) { |n| available << n }
+      seed = []
+      while available.length > 0 do
+        index = rand(available.length)
+        seed << available.delete_at(index)
+      end
+      return Chromosome.new(seed)
+    end
+    def self.set_cost_matrix(costs)
+      @@costs = costs
+    end
+  end
+end