RubyGems - ai4r - Versions diffs - 1.4 → 1.5 - Mend

ai4r 1.4 → 1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

data/README.rdoc +24 -3
data/examples/decision_trees/id3_example.rb +1 -1
data/examples/genetic_algorithm/genetic_algorithm_example.rb +1 -1
data/lib/ai4r.rb +11 -0
data/lib/ai4r/classifiers/classifier.rb +2 -0
data/lib/ai4r/classifiers/id3.rb +3 -2
data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
data/lib/ai4r/classifiers/one_r.rb +2 -1
data/lib/ai4r/classifiers/prism.rb +2 -1
data/lib/ai4r/classifiers/zero_r.rb +2 -1
data/lib/ai4r/clusterers/average_linkage.rb +60 -0
data/lib/ai4r/clusterers/bisecting_k_means.rb +17 -39
data/lib/ai4r/clusterers/clusterer.rb +25 -0
data/lib/ai4r/clusterers/complete_linkage.rb +62 -0
data/lib/ai4r/clusterers/k_means.rb +18 -25
data/lib/ai4r/clusterers/single_linkage.rb +179 -0
data/lib/ai4r/data/data_set.rb +33 -41
data/lib/ai4r/data/proximity.rb +82 -0
data/lib/ai4r/data/statistics.rb +77 -0
data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +2 -4
data/site/build/site/en/build/tmp/build-info.xml +5 -0
data/site/build/site/en/build/tmp/plugins-1.xml +212 -0
data/site/build/site/en/build/tmp/plugins-2.xml +252 -0
data/site/build/site/en/build/tmp/projfilters.properties +41 -0
data/site/build/site/en/downloads.html +1 -1
data/site/build/site/en/geneticAlgorithms.html +1 -1
data/site/build/site/en/index.html +44 -7
data/site/build/site/en/index.pdf +278 -155
data/site/build/site/en/linkmap.html +2 -2
data/site/build/site/en/linkmap.pdf +12 -12
data/site/build/site/en/machineLearning.html +1 -1
data/site/build/site/en/neuralNetworks.html +1 -1
data/site/build/site/en/sourceCode.html +244 -0
data/site/build/site/en/sourceCode.pdf +278 -0
data/site/build/site/en/svn.html +34 -42
data/site/build/site/en/svn.pdf +86 -114
data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
data/site/build/tmp/projfilters.properties +1 -1
data/site/build/webapp/WEB-INF/logs/core.log +628 -629
data/site/build/webapp/WEB-INF/logs/error.log +213 -213
data/site/src/documentation/content/xdocs/index.xml +20 -1
data/site/src/documentation/content/xdocs/site.xml +1 -1
data/site/src/documentation/content/xdocs/sourceCode.xml +43 -0
data/site/src/documentation/resources/images/sigmoid.png +0 -0
data/test/classifiers/id3_test.rb +0 -1
data/test/classifiers/multilayer_perceptron_test.rb +79 -0
data/test/classifiers/one_r_test.rb +0 -2
data/test/classifiers/prism_test.rb +0 -2
data/test/classifiers/zero_r_test.rb +0 -2
data/test/clusterers/average_linkage_test.rb +45 -0
data/test/clusterers/bisecting_k_means_test.rb +0 -2
data/test/clusterers/complete_linkage_test.rb +45 -0
data/test/clusterers/k_means_test.rb +0 -2
data/test/clusterers/single_linkage_test.rb +113 -0
data/test/data/data_set_test.rb +3 -15
data/test/data/proximity_test.rb +71 -0
data/test/data/statistics_test.rb +65 -0
data/test/experiment/classifier_evaluator_test.rb +76 -0
metadata +27 -6
data/site/src/documentation/content/xdocs/svn.xml +0 -41

data/README.rdoc CHANGED

@@ -20,10 +20,31 @@ http://ai4r.rubyforge.org
 = More Info
- * AI4R wiki: http://wiki.jadeferret.com/Category:AI4R
- * AI4R Project site: http://ai4r.rubyforge.org
+* AI4R wiki: http://wiki.jadeferret.com/Category:AI4R
+* AI4R Project site: http://ai4r.rubyforge.org
-= Warranty
+= Contact
+If you have questions or constructive comments about this project,
+please post them in the forum (http://forum.jadeferret.com/viewforum.php?f=3).
+I get an email notification when you post, and I do my best to answer as soon as possible.
+If you do not want to make it public, send it to me: Sergio Fierens, email address: (sergio (at) jadeferret (dot) com). But please, try to post them in the forum. I get tons of emails and it would be great to make them public to help everyone.
+= Roadmap
+AI4R is an active project. If you are interested about what we are working on,
+checkout the development roadmap: http://wiki.jadeferret.com/AI4R_RoadMap
+= Disclaimer
+In plain english:
+This project was created by Sergio Fierens, but the AI algorithms were created by other
+people who are actually much more clever than Sergio. He does his best implementing
+them, but he cannot warranty that these implementations are accurate.
+In legalese:
 This software is provided "as is" and without any express or implied warranties,
 including, without limitation, the implied warranties of merchantibility and

data/examples/decision_trees/id3_example.rb CHANGED

@@ -11,7 +11,7 @@ require File.dirname(__FILE__) + '/../../lib/ai4r/classifiers/id3'
 # Load data from data_set.csv
 data_filename = "#{File.dirname(__FILE__)}/data_set.csv"
-data_set = Ai4r::Data::DataSet.new.load_data_and_labels_from_csv data_filename
+data_set = Ai4r::Data::DataSet.new.load_csv_with_labels data_filename
 # Build ID3 tree
 id3 = Ai4r::Classifiers::ID3.new.build(data_set)

data/examples/genetic_algorithm/genetic_algorithm_example.rb CHANGED

@@ -13,7 +13,7 @@ require 'csv'
 # Load data from data_set.csv
 data_filename = "#{File.dirname(__FILE__)}/travel_cost.csv"
-data_set = Ai4r::Data::DataSet.new.load_data_and_labels_from_csv data_filename
+data_set = Ai4r::Data::DataSet.new.load_csv_with_labels data_filename
 data_set.data_items.collect! {|column| column.collect {|element| element.to_f}}
 Ai4r::GeneticAlgorithm::Chromosome.set_cost_matrix(data_set)

data/lib/ai4r.rb CHANGED

@@ -1,10 +1,21 @@
+# Data
+require "ai4r/data/data_set"
+require "ai4r/data/statistics"
+require "ai4r/data/parameterizable"
+# Clusterers
 require "ai4r/clusterers/clusterer"
 require "ai4r/clusterers/k_means"
 require "ai4r/clusterers/bisecting_k_means"
+require "ai4r/clusterers/single_linkage"
+require "ai4r/clusterers/complete_linkage"
+require "ai4r/clusterers/average_linkage"
+# Classifiers
 require "ai4r/classifiers/classifier"
 require "ai4r/classifiers/id3"
 require "ai4r/classifiers/prism"
 require "ai4r/classifiers/one_r"
 require "ai4r/classifiers/zero_r"
+# Neural networks
 require "ai4r/neural_network/backpropagation"
+# Genetic Algorithms
 require "ai4r/genetic_algorithm/genetic_algorithm"

data/lib/ai4r/classifiers/classifier.rb CHANGED

@@ -19,6 +19,8 @@ module Ai4r
       include Ai4r::Data::Parameterizable
       # Build a new classifier, using data examples found in data_set.
+      # The last attribute of each item is considered as the
+      # item class.
       def build(data_set)
         raise NotImplementedError
       end

data/lib/ai4r/classifiers/id3.rb CHANGED

@@ -67,7 +67,7 @@ module Ai4r
     # values) file.
     #
     #   data_file = "#{File.dirname(__FILE__)}/data_set.csv"
-    #   data_set = DataSet.load_data_and_labels_from_csv data_file
+    #   data_set = DataSet.load_csv_with_labels data_file
     #   id3 = Ai4r::Classifiers::ID3.new.build(data_set)
     #
     # = A nice tip for data evaluation
@@ -94,7 +94,8 @@ module Ai4r
       attr_reader :data_set
       # Create a new ID3 classifier. You must provide a DataSet instance
-      # as parameter.
+      # as parameter. The last attribute of each item is considered as the
+      # item class.
       def build(data_set)
         data_set.check_not_empty
         @data_set = data_set

data/lib/ai4r/classifiers/multilayer_perceptron.rb ADDED

@@ -0,0 +1,135 @@
+# Author::    Sergio Fierens (Implementation only)
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org/
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+require File.dirname(__FILE__) + '/../data/data_set.rb'
+require File.dirname(__FILE__) + '/../classifiers/classifier'
+require File.dirname(__FILE__) + '/../neural_network/backpropagation'
+module Ai4r
+  module Classifiers
+    # = Introduction
+    #
+    # The idea behind the MultilayerPerceptron classifier is to
+    # train a Multilayer Perceptron neural network with the provided examples,
+    # and predict the class for new data items.
+    #
+    # = Parameters
+    #
+    # Use class method get_parameters_info to obtain details on the algorithm
+    # parameters. Use set_parameters to set values for this parameters.
+    # See Parameterizable module documentation.
+    #
+    # * :network_class => Neural network implementation class.
+    #   By default: Ai4r::NeuralNetwork::Backpropagation.
+    # * :network_parameters => Parameters to be forwarded to the back end
+    #   neural ntework.
+    # * :hidden_layers => Hidden layer structure. E.g. [8, 6] will generate
+    #   2 hidden layers with 8 and 6 neurons each. By default []
+    # * :training_iterations => How many times the training should be repeated.
+    #   By default: 1000.
+    # :active_node_value => Default: 1
+    # :inactive_node_value => Default: 1
+    class MultilayerPerceptron < Classifier
+      attr_reader :data_set, :class_value, :network, :domains
+      parameters_info :network_class => "Neural network implementation class."+
+          "By default: Ai4r::NeuralNetwork::Backpropagation.",
+        :network_parameters => "parameters to be forwarded to the back end " +
+          "neural network.",
+        :hidden_layers => "Hidden layer structure. E.g. [8, 6] will generate " +
+          "2 hidden layers with 8 and 6 neurons each. By default []",
+        :training_iterations => "How many times the training should be " +
+          "repeated. By default: 1000",
+        :active_node_value => "Default: 1",
+        :inactive_node_value => "Default: 0"
+      def initialize
+        @network_class = Ai4r::NeuralNetwork::Backpropagation
+        @hidden_layers = []
+        @training_iterations = 500
+        @network_parameters = {}
+        @active_node_value = 1
+        @inactive_node_value = 0
+      end
+      # Build a new MultilayerPerceptron classifier. You must provide a DataSet
+      # instance as parameter. The last attribute of each item is considered as
+      # the item class.
+      def build(data_set)
+        data_set.check_not_empty
+        @data_set = data_set
+        @domains = @data_set.build_domains.collect {|domain| domain.to_a}
+        @outputs = @domains.last.length
+        @inputs = 0
+        @domains[0...-1].each {|domain| @inputs += domain.length}
+        @structure = [@inputs] + @hidden_layers + [@outputs]
+        @network = @network_class.new @structure
+        @training_iterations.times do
+          data_set.data_items.each do |data_item|
+            input_values = data_to_input(data_item[0...-1])
+            output_values = data_to_output(data_item.last)
+            @network.train(input_values, output_values)
+          end
+        end
+        return self
+      end
+      # You can evaluate new data, predicting its class.
+      # e.g.
+      #   classifier.eval(['New York',  '<30', 'F'])  # => 'Y'
+      def eval(data)
+        input_values = data_to_input(data)
+        output_values = @network.eval(input_values)
+        return @domains.last[get_max_index(output_values)]
+      end
+      # Multilayer Perceptron Classifiers cannot generate
+      # human-readable rules.
+      def get_rules
+        return "raise 'Neural networks classifiers do not generate human-readable rules.'"
+      end
+      protected
+      def data_to_input(data_item)
+        input_values = Array.new(@inputs, @inactive_node_value)
+        accum_index = 0
+        data_item.each_index do |att_index|
+          att_value = data_item[att_index]
+          domain_index = @domains[att_index].index(att_value)
+          input_values[domain_index + accum_index] = @active_node_value
+          accum_index = @domains[att_index].length
+        end
+        return input_values
+      end
+      def data_to_output(data_item)
+        output_values = Array.new(@outputs, @inactive_node_value)
+        output_values[@domains.last.index(data_item)] = @active_node_value
+        return output_values
+      end
+      def get_max_index(output_values)
+        max_value = @inactive_node_value
+        max_index = 0
+        output_values.each_index do |output_index|
+          if max_value < output_values[output_index]
+            max_value = output_values[output_index]
+            max_index = output_index
+          end
+        end
+        return max_index
+      end
+    end
+  end
+end

data/lib/ai4r/classifiers/one_r.rb CHANGED

@@ -25,7 +25,8 @@ module Ai4r
       attr_reader :data_set, :rule
       # Build a new OneR classifier. You must provide a DataSet instance
-      # as parameter.
+      # as parameter. The last attribute of each item is considered as
+      # the item class.
       def build(data_set)
         data_set.check_not_empty
         @data_set = data_set

data/lib/ai4r/classifiers/prism.rb CHANGED

@@ -29,7 +29,8 @@ module Ai4r
       attr_reader :data_set, :rules
       # Build a new Prism classifier. You must provide a DataSet instance
-      # as parameter.
+      # as parameter. The last attribute of each item is considered as
+      # the item class.
       def build(data_set)
         data_set.check_not_empty
         @data_set = data_set

data/lib/ai4r/classifiers/zero_r.rb CHANGED

@@ -25,7 +25,8 @@ module Ai4r
       attr_reader :data_set, :class_value
       # Build a new ZeroR classifier. You must provide a DataSet instance
-      # as parameter.
+      # as parameter. The last attribute of each item is considered as
+      # the item class.
       def build(data_set)
         data_set.check_not_empty
         @data_set = data_set

data/lib/ai4r/clusterers/average_linkage.rb ADDED

@@ -0,0 +1,60 @@
+# Author::    Sergio Fierens (implementation)
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org/
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+require File.dirname(__FILE__) + '/../data/data_set'
+require File.dirname(__FILE__) + '/../clusterers/single_linkage'
+module Ai4r
+  module Clusterers
+    # Implementation of a Hierarchical clusterer with complete linkage.
+    # Hierarchical clusteres create one cluster per element, and then
+    # progressively merge clusters, until the required number of clusters
+    # is reached.
+    # With average linkage, the distance between two clusters is computed as
+    # the average distance between elements of each cluster.
+    class AverageLinkage < SingleLinkage
+      # Build a new clusterer, using data examples found in data_set.
+      # Items will be clustered in "number_of_clusters" different
+      # clusters.
+      def build(data_set, number_of_clusters)
+        super
+      end
+      # Classifies the given data item, returning the cluster index it belongs
+      # to (0-based).
+      def eval(data_item)
+        super
+      end
+      protected
+      # Calculate cluster distance using the average linkage method
+      def calc_index_clusters_distance(cluster_a, cluster_b)
+        dist_sum = 0.0
+        cluster_a.each do |index_a|
+          cluster_b.each do |index_b|
+            dist_sum += read_distance_matrix(index_a, index_b)
+            end
+        end
+        return dist_sum/(cluster_a.length*cluster_b.length)
+      end
+      def distance_between_item_and_cluster(data_item, cluster)
+        dist_sum = 0.0
+        cluster.data_items.each do |another_item|
+          dist_sum += distance(data_item, another_item)
+        end
+        return dist_sum/cluster.data_items.length
+      end
+    end
+  end
+end

data/lib/ai4r/clusterers/bisecting_k_means.rb CHANGED

@@ -7,7 +7,6 @@
 # the Mozilla Public License version 1.1  as published by the
 # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
-require "set"
 require File.dirname(__FILE__) + '/../data/data_set'
 require File.dirname(__FILE__) + '/../clusterers/k_means'
@@ -25,6 +24,23 @@ module Ai4r
       attr_reader :data_set, :number_of_clusters, :clusters, :centroids
       attr_accessor :max_iterations, :distance_function, :refine
+      parameters_info :max_iterations => "Maximum number of iterations to " +
+        "build the clusterer. By default it is uncapped.",
+        :distance_function => "Custom implementation of distance function. " +
+          "It must be a closure receiving two data items and return the " +
+          "distance bewteen them. By default, this algorithm uses " +
+          "ecuclidean distance of numeric attributes to the power of 2.",
+        :centroid_function => "Custom implementation to calculate the " +
+          "centroid of a cluster. It must be a closure receiving an array of " +
+          "data sets, and return an array of data items, representing the " +
+          "centroids of for each data set. " +
+          "By default, this algorithm returns a data items using the mode "+
+          "or mean of each attribute on each data set.",
+        :refine => "Boolean value. True by default. It will run the " +
+            "classic K Means algorithm, using as initial centroids the " +
+            "result of the bisecting approach."
       def intialize
         @refine = true
       end
@@ -54,44 +70,6 @@ module Ai4r
         return self
       end
-      # Get info on what can be parameterized on this clusterer algorithm.
-      # It returns a hash with the following format:
-      # { :param_name => "Info on the parameter" }
-      def get_parameters_info
-        { :max_iterations => "Maximum number of iterations used to bisect a " +
-          "cluster. By default it is uncapped.",
-          :distance_function => "Custom implementation of distance function. " +
-            "It must be a closure receiving two data items and return the " +
-            "distance bewteen them. By default, this algorithm uses " +
-            "ecuclidean distance of numeric attributes to the power of 2.",
-          :refine => "Boolean value. True by default. It will run the " +
-            "classic K Means algorithm, using as initial centroids the " +
-            "result of the bisecting approach."
-          }
-      end
-      # Set parameters on this clusterer instance.
-      # You must provide a hash with the folowing format:
-      # { :param_name => parameter_value }
-      #
-      # Use get_parameters_info to know what parameters are accepted.
-      def set_parameters(parameters)
-        super
-        if parameters.has_key?(:refine)
-          @refine = parameters[:refine]
-        end
-        return self
-      end
-      # Get parameter values on this clusterer instance.
-      # Returns a hash with the folowing format:
-      # { :param_name => parameter_value }
-      def get_parameters
-        params = super
-        params[:refine] = @refine
-        return params
-      end
       protected
       def calc_initial_centroids
         @centroids # Use existing centroids

data/lib/ai4r/clusterers/clusterer.rb CHANGED

@@ -31,6 +31,31 @@ module Ai4r
         raise NotImplementedError
       end
+      protected
+      # Usefull as a defult distance function for clustering algorithms
+      def euclidean_distance(a, b)
+        dist = 0.0
+        a.each_index do |index|
+          if a[index].is_a?(Numeric) && b[index].is_a?(Numeric)
+            dist = dist + ((a[index]-b[index])*(a[index]-b[index]))
+          end
+        end
+        return dist
+      end
+      def get_min_index(array)
+        min = array.first
+        index = 0
+        array.each_index do |i|
+          x = array[i]
+          if x < min
+            min = x
+            index = i
+          end
+        end
+        return index
+      end
     end
   end
 end

data/lib/ai4r/clusterers/complete_linkage.rb ADDED

@@ -0,0 +1,62 @@
+# Author::    Sergio Fierens (implementation)
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org/
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+require File.dirname(__FILE__) + '/../data/data_set'
+require File.dirname(__FILE__) + '/../clusterers/single_linkage'
+module Ai4r
+  module Clusterers
+    # Implementation of a Hierarchical clusterer with complete linkage.
+    # Hierarchical clusteres create one cluster per element, and then
+    # progressively merge clusters, until the required number of clusters
+    # is reached.
+    # With complete linkage, the distance between two clusters is computed as
+    # the maximum distance between elements of each cluster.
+    class CompleteLinkage < SingleLinkage
+      # Build a new clusterer, using data examples found in data_set.
+      # Items will be clustered in "number_of_clusters" different
+      # clusters.
+      def build(data_set, number_of_clusters)
+        super
+      end
+      # Classifies the given data item, returning the cluster index it belongs
+      # to (0-based).
+      def eval(data_item)
+        super
+      end
+      protected
+      # Calculate cluster distance using the complete linkage method
+      def calc_index_clusters_distance(cluster_a, cluster_b)
+        max_dist = 0
+        cluster_a.each do |index_a|
+          cluster_b.each do |index_b|
+            dist = read_distance_matrix(index_a, index_b)
+            max_dist = dist if dist > max_dist
+          end
+        end
+        return max_dist
+      end
+      def distance_between_item_and_cluster(data_item, cluster)
+        max_dist = 0
+        cluster.data_items.each do |another_item|
+          dist = distance(data_item, another_item)
+          max_dist = dist if dist > max_dist
+        end
+        return max_dist
+      end
+    end
+  end
+end