RubyGems - nirvdrum-ai4r - Versions diffs - 1.9.1 - Mend

nirvdrum-ai4r 1.9.1

Files changed (150) hide show

data/.gitignore +1 -0
data/.rakeTasks +7 -0
data/README.rdoc +56 -0
data/Rakefile.rb +42 -0
data/VERSION +1 -0
data/ai4r.gemspec +221 -0
data/change_log +49 -0
data/examples/classifiers/id3_data.csv +121 -0
data/examples/classifiers/id3_example.rb +29 -0
data/examples/classifiers/naive_bayes_data.csv +11 -0
data/examples/classifiers/naive_bayes_example.rb +16 -0
data/examples/classifiers/results.txt +31 -0
data/examples/genetic_algorithm/genetic_algorithm_example.rb +37 -0
data/examples/genetic_algorithm/travel_cost.csv +16 -0
data/examples/neural_network/backpropagation_example.rb +67 -0
data/examples/neural_network/patterns_with_base_noise.rb +68 -0
data/examples/neural_network/patterns_with_noise.rb +66 -0
data/examples/neural_network/training_patterns.rb +68 -0
data/examples/neural_network/xor_example.rb +35 -0
data/examples/som/som_data.rb +156 -0
data/examples/som/som_multi_node_example.rb +22 -0
data/examples/som/som_single_example.rb +24 -0
data/lib/ai4r.rb +32 -0
data/lib/ai4r/classifiers/classifier.rb +59 -0
data/lib/ai4r/classifiers/hyperpipes.rb +118 -0
data/lib/ai4r/classifiers/id3.rb +326 -0
data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
data/lib/ai4r/classifiers/naive_bayes.rb +259 -0
data/lib/ai4r/classifiers/one_r.rb +110 -0
data/lib/ai4r/classifiers/prism.rb +197 -0
data/lib/ai4r/classifiers/zero_r.rb +73 -0
data/lib/ai4r/clusterers/average_linkage.rb +59 -0
data/lib/ai4r/clusterers/bisecting_k_means.rb +93 -0
data/lib/ai4r/clusterers/centroid_linkage.rb +66 -0
data/lib/ai4r/clusterers/clusterer.rb +61 -0
data/lib/ai4r/clusterers/complete_linkage.rb +67 -0
data/lib/ai4r/clusterers/diana.rb +139 -0
data/lib/ai4r/clusterers/k_means.rb +126 -0
data/lib/ai4r/clusterers/median_linkage.rb +61 -0
data/lib/ai4r/clusterers/single_linkage.rb +194 -0
data/lib/ai4r/clusterers/ward_linkage.rb +64 -0
data/lib/ai4r/clusterers/weighted_average_linkage.rb +61 -0
data/lib/ai4r/data/data_set.rb +266 -0
data/lib/ai4r/data/parameterizable.rb +64 -0
data/lib/ai4r/data/proximity.rb +100 -0
data/lib/ai4r/data/statistics.rb +77 -0
data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +270 -0
data/lib/ai4r/neural_network/backpropagation.rb +293 -0
data/lib/ai4r/neural_network/hopfield.rb +149 -0
data/lib/ai4r/som/layer.rb +68 -0
data/lib/ai4r/som/node.rb +96 -0
data/lib/ai4r/som/som.rb +155 -0
data/lib/ai4r/som/two_phase_layer.rb +90 -0
data/site/forrest.properties +152 -0
data/site/forrest.properties.dispatcher.properties +25 -0
data/site/forrest.properties.xml +29 -0
data/site/src/documentation/README.txt +7 -0
data/site/src/documentation/classes/CatalogManager.properties +62 -0
data/site/src/documentation/content/locationmap.xml +72 -0
data/site/src/documentation/content/xdocs/downloads.html +9 -0
data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +294 -0
data/site/src/documentation/content/xdocs/index.xml +155 -0
data/site/src/documentation/content/xdocs/machineLearning.xml +131 -0
data/site/src/documentation/content/xdocs/neuralNetworks.xml +270 -0
data/site/src/documentation/content/xdocs/site.xml +54 -0
data/site/src/documentation/content/xdocs/sourceCode.xml +43 -0
data/site/src/documentation/content/xdocs/tabs.xml +35 -0
data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
data/site/src/documentation/resources/images/c.png +0 -0
data/site/src/documentation/resources/images/c_wbn.png +0 -0
data/site/src/documentation/resources/images/c_wn.png +0 -0
data/site/src/documentation/resources/images/ellipse-2.svg +30 -0
data/site/src/documentation/resources/images/ero.gif +0 -0
data/site/src/documentation/resources/images/europe2.png +0 -0
data/site/src/documentation/resources/images/europe3.png +0 -0
data/site/src/documentation/resources/images/fitness.png +0 -0
data/site/src/documentation/resources/images/genetic_algorithms_example.png +0 -0
data/site/src/documentation/resources/images/icon-a.png +0 -0
data/site/src/documentation/resources/images/icon-b.png +0 -0
data/site/src/documentation/resources/images/icon.png +0 -0
data/site/src/documentation/resources/images/jadeferret.png +0 -0
data/site/src/documentation/resources/images/my_email.png +0 -0
data/site/src/documentation/resources/images/neural_network_example.png +0 -0
data/site/src/documentation/resources/images/project-logo.png +0 -0
data/site/src/documentation/resources/images/rubyforge.png +0 -0
data/site/src/documentation/resources/images/s.png +0 -0
data/site/src/documentation/resources/images/s_wbn.png +0 -0
data/site/src/documentation/resources/images/s_wn.png +0 -0
data/site/src/documentation/resources/images/sigmoid.png +0 -0
data/site/src/documentation/resources/images/sub-dir/icon-c.png +0 -0
data/site/src/documentation/resources/images/t.png +0 -0
data/site/src/documentation/resources/images/t_wbn.png +0 -0
data/site/src/documentation/resources/images/t_wn.png +0 -0
data/site/src/documentation/resources/schema/catalog.xcat +29 -0
data/site/src/documentation/resources/schema/hello-v10.dtd +51 -0
data/site/src/documentation/resources/schema/symbols-project-v10.ent +26 -0
data/site/src/documentation/resources/stylesheets/hello2document.xsl +33 -0
data/site/src/documentation/sitemap.xmap +66 -0
data/site/src/documentation/skinconf.xml +418 -0
data/site/src/documentation/translations/langcode.xml +29 -0
data/site/src/documentation/translations/languages_de.xml +24 -0
data/site/src/documentation/translations/languages_en.xml +24 -0
data/site/src/documentation/translations/languages_es.xml +22 -0
data/site/src/documentation/translations/languages_fr.xml +24 -0
data/site/src/documentation/translations/languages_nl.xml +24 -0
data/site/src/documentation/translations/menu.xml +33 -0
data/site/src/documentation/translations/menu_af.xml +33 -0
data/site/src/documentation/translations/menu_de.xml +33 -0
data/site/src/documentation/translations/menu_es.xml +33 -0
data/site/src/documentation/translations/menu_fr.xml +33 -0
data/site/src/documentation/translations/menu_it.xml +33 -0
data/site/src/documentation/translations/menu_nl.xml +33 -0
data/site/src/documentation/translations/menu_no.xml +33 -0
data/site/src/documentation/translations/menu_ru.xml +33 -0
data/site/src/documentation/translations/menu_sk.xml +33 -0
data/site/src/documentation/translations/tabs.xml +22 -0
data/site/src/documentation/translations/tabs_de.xml +22 -0
data/site/src/documentation/translations/tabs_es.xml +22 -0
data/site/src/documentation/translations/tabs_fr.xml +22 -0
data/site/src/documentation/translations/tabs_nl.xml +22 -0
data/test/classifiers/hyperpipes_test.rb +84 -0
data/test/classifiers/id3_test.rb +208 -0
data/test/classifiers/multilayer_perceptron_test.rb +79 -0
data/test/classifiers/naive_bayes_test.rb +43 -0
data/test/classifiers/one_r_test.rb +62 -0
data/test/classifiers/prism_test.rb +85 -0
data/test/classifiers/zero_r_test.rb +50 -0
data/test/clusterers/average_linkage_test.rb +51 -0
data/test/clusterers/bisecting_k_means_test.rb +66 -0
data/test/clusterers/centroid_linkage_test.rb +53 -0
data/test/clusterers/complete_linkage_test.rb +57 -0
data/test/clusterers/diana_test.rb +69 -0
data/test/clusterers/k_means_test.rb +100 -0
data/test/clusterers/median_linkage_test.rb +53 -0
data/test/clusterers/single_linkage_test.rb +122 -0
data/test/clusterers/ward_linkage_test.rb +53 -0
data/test/clusterers/weighted_average_linkage_test.rb +53 -0
data/test/data/data_set.csv +121 -0
data/test/data/data_set_test.rb +96 -0
data/test/data/proximity_test.rb +81 -0
data/test/data/statistics_data_set.csv +5 -0
data/test/data/statistics_test.rb +65 -0
data/test/experiment/classifier_evaluator_test.rb +76 -0
data/test/genetic_algorithm/chromosome_test.rb +58 -0
data/test/genetic_algorithm/genetic_algorithm_test.rb +81 -0
data/test/neural_network/backpropagation_test.rb +69 -0
data/test/neural_network/hopfield_test.rb +72 -0
data/test/som/som_test.rb +97 -0
metadata +238 -0

@@ -0,0 +1,156 @@
+# data is from the iris dataset (http://archive.ics.uci.edu/ml/datasets/Iris)
+# it is the full dataset, removing the last column
+# website provides additional information on the dataset itself (attributes, class distribution, etc)
+SOM_DATA = [
+        [5.1, 3.5, 1.4, 0.2],
+        [4.9, 3.0, 1.4, 0.2],
+        [4.7, 3.2, 1.3, 0.2],
+        [4.6, 3.1, 1.5, 0.2],
+        [5.0, 3.6, 1.4, 0.2],
+        [5.4, 3.9, 1.7, 0.4],
+        [4.6, 3.4, 1.4, 0.3],
+        [5.0, 3.4, 1.5, 0.2],
+        [4.4, 2.9, 1.4, 0.2],
+        [4.9, 3.1, 1.5, 0.1],
+        [5.4, 3.7, 1.5, 0.2],
+        [4.8, 3.4, 1.6, 0.2],
+        [4.8, 3.0, 1.4, 0.1],
+        [4.3, 3.0, 1.1, 0.1],
+        [5.8, 4.0, 1.2, 0.2],
+        [5.7, 4.4, 1.5, 0.4],
+        [5.4, 3.9, 1.3, 0.4],
+        [5.1, 3.5, 1.4, 0.3],
+        [5.7, 3.8, 1.7, 0.3],
+        [5.1, 3.8, 1.5, 0.3],
+        [5.4, 3.4, 1.7, 0.2],
+        [5.1, 3.7, 1.5, 0.4],
+        [4.6, 3.6, 1.0, 0.2],
+        [5.1, 3.3, 1.7, 0.5],
+        [4.8, 3.4, 1.9, 0.2],
+        [5.0, 3.0, 1.6, 0.2],
+        [5.0, 3.4, 1.6, 0.4],
+        [5.2, 3.5, 1.5, 0.2],
+        [5.2, 3.4, 1.4, 0.2],
+        [4.7, 3.2, 1.6, 0.2],
+        [4.8, 3.1, 1.6, 0.2],
+        [5.4, 3.4, 1.5, 0.4],
+        [5.2, 4.1, 1.5, 0.1],
+        [5.5, 4.2, 1.4, 0.2],
+        [4.9, 3.1, 1.5, 0.1],
+        [5.0, 3.2, 1.2, 0.2],
+        [5.5, 3.5, 1.3, 0.2],
+        [4.9, 3.1, 1.5, 0.1],
+        [4.4, 3.0, 1.3, 0.2],
+        [5.1, 3.4, 1.5, 0.2],
+        [5.0, 3.5, 1.3, 0.3],
+        [4.5, 2.3, 1.3, 0.3],
+        [4.4, 3.2, 1.3, 0.2],
+        [5.0, 3.5, 1.6, 0.6],
+        [5.1, 3.8, 1.9, 0.4],
+        [4.8, 3.0, 1.4, 0.3],
+        [5.1, 3.8, 1.6, 0.2],
+        [4.6, 3.2, 1.4, 0.2],
+        [5.3, 3.7, 1.5, 0.2],
+        [5.0, 3.3, 1.4, 0.2],
+        [7.0, 3.2, 4.7, 1.4],
+        [6.4, 3.2, 4.5, 1.5],
+        [6.9, 3.1, 4.9, 1.5],
+        [5.5, 2.3, 4.0, 1.3],
+        [6.5, 2.8, 4.6, 1.5],
+        [5.7, 2.8, 4.5, 1.3],
+        [6.3, 3.3, 4.7, 1.6],
+        [4.9, 2.4, 3.3, 1.0],
+        [6.6, 2.9, 4.6, 1.3],
+        [5.2, 2.7, 3.9, 1.4],
+        [5.0, 2.0, 3.5, 1.0],
+        [5.9, 3.0, 4.2, 1.5],
+        [6.0, 2.2, 4.0, 1.0],
+        [6.1, 2.9, 4.7, 1.4],
+        [5.6, 2.9, 3.6, 1.3],
+        [6.7, 3.1, 4.4, 1.4],
+        [5.6, 3.0, 4.5, 1.5],
+        [5.8, 2.7, 4.1, 1.0],
+        [6.2, 2.2, 4.5, 1.5],
+        [5.6, 2.5, 3.9, 1.1],
+        [5.9, 3.2, 4.8, 1.8],
+        [6.1, 2.8, 4.0, 1.3],
+        [6.3, 2.5, 4.9, 1.5],
+        [6.1, 2.8, 4.7, 1.2],
+        [6.4, 2.9, 4.3, 1.3],
+        [6.6, 3.0, 4.4, 1.4],
+        [6.8, 2.8, 4.8, 1.4],
+        [6.7, 3.0, 5.0, 1.7],
+        [6.0, 2.9, 4.5, 1.5],
+        [5.7, 2.6, 3.5, 1.0],
+        [5.5, 2.4, 3.8, 1.1],
+        [5.5, 2.4, 3.7, 1.0],
+        [5.8, 2.7, 3.9, 1.2],
+        [6.0, 2.7, 5.1, 1.6],
+        [5.4, 3.0, 4.5, 1.5],
+        [6.0, 3.4, 4.5, 1.6],
+        [6.7, 3.1, 4.7, 1.5],
+        [6.3, 2.3, 4.4, 1.3],
+        [5.6, 3.0, 4.1, 1.3],
+        [5.5, 2.5, 4.0, 1.3],
+        [5.5, 2.6, 4.4, 1.2],
+        [6.1, 3.0, 4.6, 1.4],
+        [5.8, 2.6, 4.0, 1.2],
+        [5.0, 2.3, 3.3, 1.0],
+        [5.6, 2.7, 4.2, 1.3],
+        [5.7, 3.0, 4.2, 1.2],
+        [5.7, 2.9, 4.2, 1.3],
+        [6.2, 2.9, 4.3, 1.3],
+        [5.1, 2.5, 3.0, 1.1],
+        [5.7, 2.8, 4.1, 1.3],
+        [6.3, 3.3, 6.0, 2.5],
+        [5.8, 2.7, 5.1, 1.9],
+        [7.1, 3.0, 5.9, 2.1],
+        [6.3, 2.9, 5.6, 1.8],
+        [6.5, 3.0, 5.8, 2.2],
+        [7.6, 3.0, 6.6, 2.1],
+        [4.9, 2.5, 4.5, 1.7],
+        [7.3, 2.9, 6.3, 1.8],
+        [6.7, 2.5, 5.8, 1.8],
+        [7.2, 3.6, 6.1, 2.5],
+        [6.5, 3.2, 5.1, 2.0],
+        [6.4, 2.7, 5.3, 1.9],
+        [6.8, 3.0, 5.5, 2.1],
+        [5.7, 2.5, 5.0, 2.0],
+        [5.8, 2.8, 5.1, 2.4],
+        [6.4, 3.2, 5.3, 2.3],
+        [6.5, 3.0, 5.5, 1.8],
+        [7.7, 3.8, 6.7, 2.2],
+        [7.7, 2.6, 6.9, 2.3],
+        [6.0, 2.2, 5.0, 1.5],
+        [6.9, 3.2, 5.7, 2.3],
+        [5.6, 2.8, 4.9, 2.0],
+        [7.7, 2.8, 6.7, 2.0],
+        [6.3, 2.7, 4.9, 1.8],
+        [6.7, 3.3, 5.7, 2.1],
+        [7.2, 3.2, 6.0, 1.8],
+        [6.2, 2.8, 4.8, 1.8],
+        [6.1, 3.0, 4.9, 1.8],
+        [6.4, 2.8, 5.6, 2.1],
+        [7.2, 3.0, 5.8, 1.6],
+        [7.4, 2.8, 6.1, 1.9],
+        [7.9, 3.8, 6.4, 2.0],
+        [6.4, 2.8, 5.6, 2.2],
+        [6.3, 2.8, 5.1, 1.5],
+        [6.1, 2.6, 5.6, 1.4],
+        [7.7, 3.0, 6.1, 2.3],
+        [6.3, 3.4, 5.6, 2.4],
+        [6.4, 3.1, 5.5, 1.8],
+        [6.0, 3.0, 4.8, 1.8],
+        [6.9, 3.1, 5.4, 2.1],
+        [6.7, 3.1, 5.6, 2.4],
+        [6.9, 3.1, 5.1, 2.3],
+        [5.8, 2.7, 5.1, 1.9],
+        [6.8, 3.2, 5.9, 2.3],
+        [6.7, 3.3, 5.7, 2.5],
+        [6.7, 3.0, 5.2, 2.3],
+        [6.3, 2.5, 5.0, 1.9],
+        [6.5, 3.0, 5.2, 2.0],
+        [6.2, 3.4, 5.4, 2.3],
+        [5.9, 3.0, 5.1, 1.8],
+]

data/examples/som/som_multi_node_example.rb ADDED

@@ -0,0 +1,22 @@
+# this example shows the impact of the size of a som on the global error distance
+require File.dirname(__FILE__) + '/../../lib/ai4r/som/som'
+require File.dirname(__FILE__) + '/som_data'
+require 'benchmark'
+10.times do |t|
+  t += 3 # minimum number of nodes
+  puts "Nodes: #{t}"
+  som = Ai4r::Som::Som.new 4, 8, Ai4r::Som::TwoPhaseLayer.new(t)
+  som.initiate_map
+  puts "global error distance: #{som.global_error(SOM_DATA)}"
+  puts "\ntraining the som\n"
+  times = Benchmark.measure do
+    som.train SOM_DATA
+  end
+  puts "Elapsed time for training: #{times}"
+  puts "global error distance: #{som.global_error(SOM_DATA)}\n\n"
+end

data/examples/som/som_single_example.rb ADDED

@@ -0,0 +1,24 @@
+require File.dirname(__FILE__) + '/../../lib/ai4r/som/som'
+require File.dirname(__FILE__) + '/som_data'
+require 'benchmark'
+som = Ai4r::Som::Som.new 4, 8, Ai4r::Som::TwoPhaseLayer.new(10)
+som.initiate_map
+som.nodes.each do |node|
+  p node.weights
+end
+puts "global error distance: #{som.global_error(SOM_DATA)}"
+puts "\ntraining the som\n"
+times = Benchmark.measure do
+  som.train SOM_DATA
+end
+som.nodes.each do |node|
+  p node.weights
+end
+puts "Elapsed time for training: #{times}"
+puts "global error distance: #{som.global_error(SOM_DATA)}\n\n"

data/lib/ai4r.rb ADDED

@@ -0,0 +1,32 @@
+# Data
+require File.dirname(__FILE__) +  "/ai4r/data/data_set"
+require File.dirname(__FILE__) +  "/ai4r/data/statistics"
+require File.dirname(__FILE__) +  "/ai4r/data/proximity"
+require File.dirname(__FILE__) +  "/ai4r/data/parameterizable"
+# Clusterers
+require File.dirname(__FILE__) +  "/ai4r/clusterers/clusterer"
+require File.dirname(__FILE__) +  "/ai4r/clusterers/k_means"
+require File.dirname(__FILE__) +  "/ai4r/clusterers/bisecting_k_means"
+require File.dirname(__FILE__) +  "/ai4r/clusterers/single_linkage"
+require File.dirname(__FILE__) +  "/ai4r/clusterers/complete_linkage"
+require File.dirname(__FILE__) +  "/ai4r/clusterers/average_linkage"
+require File.dirname(__FILE__) +  "/ai4r/clusterers/weighted_average_linkage"
+require File.dirname(__FILE__) +  "/ai4r/clusterers/centroid_linkage"
+require File.dirname(__FILE__) +  "/ai4r/clusterers/median_linkage"
+require File.dirname(__FILE__) +  "/ai4r/clusterers/ward_linkage"
+require File.dirname(__FILE__) +  "/ai4r/clusterers/diana"
+# Classifiers
+require File.dirname(__FILE__) +  "/ai4r/classifiers/classifier"
+require File.dirname(__FILE__) +  "/ai4r/classifiers/id3"
+require File.dirname(__FILE__) +  "/ai4r/classifiers/prism"
+require File.dirname(__FILE__) +  "/ai4r/classifiers/one_r"
+require File.dirname(__FILE__) +  "/ai4r/classifiers/zero_r"
+require File.dirname(__FILE__) +  "/ai4r/classifiers/hyperpipes"
+require File.dirname(__FILE__) +  "/ai4r/classifiers/naive_bayes"
+# Neural networks
+require File.dirname(__FILE__) +  "/ai4r/neural_network/backpropagation"
+require File.dirname(__FILE__) +  "/ai4r/neural_network/hopfield"
+# Genetic Algorithms
+require File.dirname(__FILE__) +  "/ai4r/genetic_algorithm/genetic_algorithm"
+# SOM
+require File.dirname(__FILE__) +  "/ai4r/som/som"

data/lib/ai4r/classifiers/classifier.rb ADDED

@@ -0,0 +1,59 @@
+# Author::    Sergio Fierens
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+require File.dirname(__FILE__) + '/../data/parameterizable'
+module Ai4r
+  module Classifiers
+    # This class defines a common API for classifiers.
+    # All methods in this class must be implemented in subclasses.
+    class Classifier
+      include Ai4r::Data::Parameterizable
+      # Build a new classifier, using data examples found in data_set.
+      # The last attribute of each item is considered as the
+      # item class.
+      def build(data_set)
+        raise NotImplementedError
+      end
+      # You can evaluate new data, predicting its class.
+      # e.g.
+      #   classifier.eval(['New York',  '<30', 'F'])  # => 'Y'
+      def eval(data)
+        raise NotImplementedError
+      end
+      # This method returns the generated rules in ruby code.
+      # e.g.
+      #
+      #   classifier.get_rules
+      #     # =>  if age_range=='<30' then marketing_target='Y'
+      #           elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
+      #           elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
+      #           elsif age_range=='[50-80]' then marketing_target='N'
+      #           elsif age_range=='>80' then marketing_target='Y'
+      #           else raise 'There was not enough information during training to do a proper induction for this data element' end
+      #
+      # It is a nice way to inspect induction results, and also to execute them:
+      #     age_range = '<30'
+      #     city='New York'
+      #     marketing_target = nil
+      #     eval classifier.get_rules
+      #     puts marketing_target
+      #       # =>  'Y'
+      def get_rules
+        raise NotImplementedError
+      end
+    end
+  end
+end

data/lib/ai4r/classifiers/hyperpipes.rb ADDED

@@ -0,0 +1,118 @@
+# Author::    Sergio Fierens (Implementation only)
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org/
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+require 'set'
+require File.dirname(__FILE__) + '/../data/data_set'
+require File.dirname(__FILE__) + '/../classifiers/classifier'
+module Ai4r
+  module Classifiers
+    include Ai4r::Data
+    # = Introduction
+    #
+    # A fast classifier algorithm, created by Lucio de Souza Coelho
+    # and Len Trigg.
+    class Hyperpipes < Classifier
+      attr_reader :data_set, :pipes
+      # Build a new Hyperpipes classifier. You must provide a DataSet instance
+      # as parameter. The last attribute of each item is considered as
+      # the item class.
+      def build(data_set)
+        data_set.check_not_empty
+        @data_set = data_set
+        @domains = data_set.build_domains
+        @pipes = {}
+        @domains.last.each {|cat| @pipes[cat] = build_pipe(@data_set)}
+        @data_set.data_items.each {|item| update_pipe(@pipes[item.last], item) }
+        return self
+      end
+      # You can evaluate new data, predicting its class.
+      # e.g.
+      #   classifier.eval(['New York',  '<30', 'F'])  # => 'Y'
+      def eval(data)
+        votes = Hash.new {0}
+        @pipes.each do |category, pipe|
+          pipe.each_with_index do |bounds, i|
+            if data[i].is_a? Numeric
+              votes[category]+=1 if data[i]>=bounds[:min] && data[i]<=bounds[:max]
+            else
+              votes[category]+=1 if bounds[data[i]]
+            end
+          end
+        end
+        return votes.to_a.max {|x, y| x.last <=> y.last}.first
+      end
+      # This method returns the generated rules in ruby code.
+      # e.g.
+      #
+      #   classifier.get_rules
+      #     # =>  if age_range == '<30' then marketing_target = 'Y'
+      #           elsif age_range == '[30-50)' then marketing_target = 'N'
+      #           elsif age_range == '[50-80]' then marketing_target = 'N'
+      #           end
+      #
+      # It is a nice way to inspect induction results, and also to execute them:
+      #     marketing_target = nil
+      #     eval classifier.get_rules
+      #     puts marketing_target
+      #       # =>  'Y'
+      def get_rules
+        rules = []
+        rules << "votes = Hash.new {0}"
+        data = @data_set.data_items.first
+        labels = @data_set.data_labels.collect {|l| l.to_s}
+        @pipes.each do |category, pipe|
+          pipe.each_with_index do |bounds, i|
+            rule = "votes['#{category}'] += 1 "
+            if data[i].is_a? Numeric
+              rule += "if #{labels[i]} >= #{bounds[:min]} && #{labels[i]} <= #{bounds[:max]}"
+            else
+              rule += "if #{bounds.inspect}[#{labels[i]}]"
+            end
+            rules << rule
+          end
+        end
+        rules << "#{labels.last} = votes.to_a.max {|x, y| x.last <=> y.last}.first"
+        return rules.join("\n")
+      end
+      protected
+      def build_pipe(data_set)
+        data_set.data_items.first[0...-1].collect do |att|
+          if att.is_a? Numeric
+            {:min=>1.0/0, :max=>-1.0/0}
+          else
+            Hash.new(false)
+          end
+        end
+      end
+      def update_pipe(pipe, data_item)
+        data_item[0...-1].each_with_index do |att, i|
+          if att.is_a? Numeric
+            pipe[i][:min] = att if att < pipe[i][:min]
+            pipe[i][:max] = att if att > pipe[i][:max]
+          else
+            pipe[i][att] = true
+          end
+        end
+      end
+    end
+  end
+end

data/lib/ai4r/classifiers/id3.rb ADDED

@@ -0,0 +1,326 @@
+# Author::    Sergio Fierens (Implementation, Quinlan is
+# the creator of the algorithm)
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org/
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+require File.dirname(__FILE__) + '/../data/data_set'
+require File.dirname(__FILE__) + '/../classifiers/classifier'
+module Ai4r
+  module Classifiers
+    # = Introduction
+    # This is an implementation of the ID3 algorithm (Quinlan)
+    # Given a set of preclassified examples, it builds a top-down
+    # induction of decision tree, biased by the information gain and
+    # entropy measure.
+    #
+    # * http://en.wikipedia.org/wiki/Decision_tree
+    # * http://en.wikipedia.org/wiki/ID3_algorithm
+    #
+    # = How to use it
+    #
+    #   DATA_LABELS = [ 'city', 'age_range', 'gender', 'marketing_target'  ]
+    #
+    #   DATA_ITEMS = [
+    #          ['New York',  '<30',      'M', 'Y'],
+    #          ['Chicago',     '<30',      'M', 'Y'],
+    #          ['Chicago',     '<30',      'F', 'Y'],
+    #          ['New York',  '<30',      'M', 'Y'],
+    #          ['New York',  '<30',      'M', 'Y'],
+    #          ['Chicago',     '[30-50)',  'M', 'Y'],
+    #          ['New York',  '[30-50)',  'F', 'N'],
+    #          ['Chicago',     '[30-50)',  'F', 'Y'],
+    #          ['New York',  '[30-50)',  'F', 'N'],
+    #          ['Chicago',     '[50-80]', 'M', 'N'],
+    #          ['New York',  '[50-80]', 'F', 'N'],
+    #          ['New York',  '[50-80]', 'M', 'N'],
+    #          ['Chicago',     '[50-80]', 'M', 'N'],
+    #          ['New York',  '[50-80]', 'F', 'N'],
+    #          ['Chicago',     '>80',      'F', 'Y']
+    #        ]
+    #
+    #   data_set = DataSet.new(:data_items=>DATA_SET, :data_labels=>DATA_LABELS)
+    #   id3 = Ai4r::Classifiers::ID3.new.build(data_set)
+    #
+    #   id3.get_rules
+    #     # =>  if age_range=='<30' then marketing_target='Y'
+    #           elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
+    #           elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
+    #           elsif age_range=='[50-80]' then marketing_target='N'
+    #           elsif age_range=='>80' then marketing_target='Y'
+    #           else raise 'There was not enough information during training to do a proper induction for this data element' end
+    #
+    #   id3.eval(['New York', '<30', 'M'])
+    #     # =>  'Y'
+    #
+    # = A better way to load the data
+    #
+    # In the real life you will use lot more data training examples, with more
+    # attributes. Consider moving your data to an external CSV (comma separate
+    # values) file.
+    #
+    #   data_file = "#{File.dirname(__FILE__)}/data_set.csv"
+    #   data_set = DataSet.load_csv_with_labels data_file
+    #   id3 = Ai4r::Classifiers::ID3.new.build(data_set)
+    #
+    # = A nice tip for data evaluation
+    #
+    #   id3 = Ai4r::Classifiers::ID3.new.build(data_set)
+    #
+    #   age_range = '<30'
+    #   marketing_target = nil
+    #   eval id3.get_rules
+    #   puts marketing_target
+    #     # =>  'Y'
+    #
+    # = More about ID3 and decision trees
+    #
+    # * http://en.wikipedia.org/wiki/Decision_tree
+    # * http://en.wikipedia.org/wiki/ID3_algorithm
+    #
+    # = About the project
+    # Author::    Sergio Fierens
+    # License::   MPL 1.1
+    # Url::       http://ai4r.rubyforge.org/
+    class ID3 < Classifier
+      attr_reader :data_set
+      # Create a new ID3 classifier. You must provide a DataSet instance
+      # as parameter. The last attribute of each item is considered as the
+      # item class.
+      def build(data_set)
+        data_set.check_not_empty
+        @data_set = data_set
+        preprocess_data(@data_set.data_items)
+        return self
+      end
+      # You can evaluate new data, predicting its category.
+      # e.g.
+      #   id3.eval(['New York',  '<30', 'F'])  # => 'Y'
+      def eval(data)
+        @tree.value(data) if @tree
+      end
+      # This method returns the generated rules in ruby code.
+      # e.g.
+      #
+      #   id3.get_rules
+      #     # =>  if age_range=='<30' then marketing_target='Y'
+      #           elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
+      #           elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
+      #           elsif age_range=='[50-80]' then marketing_target='N'
+      #           elsif age_range=='>80' then marketing_target='Y'
+      #           else raise 'There was not enough information during training to do a proper induction for this data element' end
+      #
+      # It is a nice way to inspect induction results, and also to execute them:
+      #     age_range = '<30'
+      #     marketing_target = nil
+      #     eval id3.get_rules
+      #     puts marketing_target
+      #       # =>  'Y'
+      def get_rules
+        #return "Empty ID3 tree" if !@tree
+        rules = @tree.get_rules
+        rules = rules.collect do |rule|
+            "#{rule[0..-2].join(' and ')} then #{rule.last}"
+        end
+        return "if #{rules.join("\nelsif ")}\nelse raise 'There was not enough information during training to do a proper induction for this data element' end"
+      end
+      private
+      def preprocess_data(data_examples)
+        @tree = build_node(data_examples)
+      end
+      private
+      def build_node(data_examples, flag_att = [])
+        return ErrorNode.new if data_examples.length == 0
+        domain = domain(data_examples)
+        return CategoryNode.new(@data_set.data_labels.last, domain.last[0]) if domain.last.length == 1
+        min_entropy_index = min_entropy_index(data_examples, domain, flag_att)
+        flag_att << min_entropy_index
+        split_data_examples = split_data_examples(data_examples, domain, min_entropy_index)
+        return CategoryNode.new(@data_set.data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
+        nodes = split_data_examples.collect do |partial_data_examples|
+          build_node(partial_data_examples, flag_att)
+        end
+        return EvaluationNode.new(@data_set.data_labels, min_entropy_index, domain[min_entropy_index], nodes)
+      end
+      private
+      def self.sum(values)
+        values.inject( 0 ) { |sum,x| sum+x }
+      end
+      private
+      def self.log2(z)
+        return 0.0 if z == 0
+        Math.log(z)/LOG2
+      end
+      private
+      def most_freq(examples, domain)
+        freqs = []
+        domain.last.length.times { freqs << 0}
+        examples.each do |example|
+          cat_index = domain.last.index(example.last)
+          freq = freqs[cat_index] + 1
+          freqs[cat_index] = freq
+        end
+        max_freq = freqs.max
+        max_freq_index = freqs.index(max_freq)
+        domain.last[max_freq_index]
+      end
+      private
+      def split_data_examples(data_examples, domain, att_index)
+        data_examples_array = []
+        att_value_examples = {}
+        data_examples.each do |example|
+          example_set = att_value_examples[example[att_index]]
+          example_set = [] if !example_set
+          example_set << example
+          att_value_examples.store(example[att_index], example_set)
+        end
+        att_value_examples.each_pair do |att_value, example_set|
+           att_value_index = domain[att_index].index(att_value)
+           data_examples_array[att_value_index] = example_set
+        end
+        return data_examples_array
+      end
+      private
+      def min_entropy_index(data_examples, domain, flag_att=[])
+        min_entropy = nil
+        min_index = 0
+        domain[0..-2].each_index do |index|
+          freq_grid = freq_grid(index, data_examples, domain)
+          entropy = entropy(freq_grid, data_examples.length)
+          if (!min_entropy || entropy < min_entropy) && !flag_att.include?(index)
+            min_entropy = entropy
+            min_index = index
+          end
+        end
+        return min_index
+      end
+      private
+      def domain(data_examples)
+        #return build_domains(data_examples)
+        domain = []
+        @data_set.data_labels.length.times { domain << [] }
+        data_examples.each do |data|
+          data.each_index do |i|
+            domain[i] << data[i] if i<domain.length && !domain[i].include?(data[i])
+          end
+        end
+        return domain
+      end
+      private
+      def freq_grid(att_index, data_examples, domain)
+        #Initialize empty grid
+        grid_element = []
+        domain.last.length.times { grid_element << 0}
+        grid = []
+        domain[att_index].length.times { grid << grid_element.clone }
+        #Fill frecuency with grid
+        data_examples.each do |example|
+          att_val = example[att_index]
+          att_val_index = domain[att_index].index(att_val)
+          category = example.last
+          category_index = domain.last.index(category)
+          freq = grid[att_val_index][category_index] + 1
+          grid[att_val_index][category_index] = freq
+        end
+        return grid
+      end
+      private
+      def entropy(freq_grid, total_examples)
+        #Calc entropy of each element
+        entropy = 0
+        freq_grid.each do |att_freq|
+          att_total_freq = ID3.sum(att_freq)
+          partial_entropy = 0
+          if att_total_freq != 0
+            att_freq.each do |freq|
+              prop = freq.to_f/att_total_freq
+              partial_entropy += (-1*prop*ID3.log2(prop))
+            end
+          end
+          entropy += (att_total_freq.to_f/total_examples) * partial_entropy
+        end
+        return entropy
+      end
+      private
+      LOG2 = Math.log(2)
+    end
+    class EvaluationNode #:nodoc: all
+      attr_reader :index, :values, :nodes
+      def initialize(data_labels, index, values, nodes)
+        @index = index
+        @values = values
+        @nodes = nodes
+        @data_labels = data_labels
+      end
+      def value(data)
+        value = data[@index]
+        return rule_not_found if !@values.include?(value)
+        return nodes[@values.index(value)].value(data)
+      end
+      def get_rules
+        rule_set = []
+        @nodes.each_index do |child_node_index|
+          my_rule = "#{@data_labels[@index]}=='#{@values[child_node_index]}'"
+          child_node = @nodes[child_node_index]
+          child_node_rules = child_node.get_rules
+          child_node_rules.each do |child_rule|
+            child_rule.unshift(my_rule)
+          end
+          rule_set += child_node_rules
+        end
+        return rule_set
+      end
+    end
+    class CategoryNode #:nodoc: all
+      def initialize(label, value)
+        @label = label
+        @value = value
+      end
+      def value(data)
+        return @value
+      end
+      def get_rules
+        return [["#{@label}='#{@value}'"]]
+      end
+    end
+    class ErrorNode #:nodoc: all
+      def value(data)
+        raise "There was not enough information during training to do a proper induction for this data element."
+      end
+      def get_rules
+        return []
+      end
+    end
+  end
+end