RubyGems - nirvdrum-ai4r - Versions diffs - 1.9.1 - Mend

nirvdrum-ai4r 1.9.1

Files changed (150) hide show

data/.gitignore +1 -0
data/.rakeTasks +7 -0
data/README.rdoc +56 -0
data/Rakefile.rb +42 -0
data/VERSION +1 -0
data/ai4r.gemspec +221 -0
data/change_log +49 -0
data/examples/classifiers/id3_data.csv +121 -0
data/examples/classifiers/id3_example.rb +29 -0
data/examples/classifiers/naive_bayes_data.csv +11 -0
data/examples/classifiers/naive_bayes_example.rb +16 -0
data/examples/classifiers/results.txt +31 -0
data/examples/genetic_algorithm/genetic_algorithm_example.rb +37 -0
data/examples/genetic_algorithm/travel_cost.csv +16 -0
data/examples/neural_network/backpropagation_example.rb +67 -0
data/examples/neural_network/patterns_with_base_noise.rb +68 -0
data/examples/neural_network/patterns_with_noise.rb +66 -0
data/examples/neural_network/training_patterns.rb +68 -0
data/examples/neural_network/xor_example.rb +35 -0
data/examples/som/som_data.rb +156 -0
data/examples/som/som_multi_node_example.rb +22 -0
data/examples/som/som_single_example.rb +24 -0
data/lib/ai4r.rb +32 -0
data/lib/ai4r/classifiers/classifier.rb +59 -0
data/lib/ai4r/classifiers/hyperpipes.rb +118 -0
data/lib/ai4r/classifiers/id3.rb +326 -0
data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
data/lib/ai4r/classifiers/naive_bayes.rb +259 -0
data/lib/ai4r/classifiers/one_r.rb +110 -0
data/lib/ai4r/classifiers/prism.rb +197 -0
data/lib/ai4r/classifiers/zero_r.rb +73 -0
data/lib/ai4r/clusterers/average_linkage.rb +59 -0
data/lib/ai4r/clusterers/bisecting_k_means.rb +93 -0
data/lib/ai4r/clusterers/centroid_linkage.rb +66 -0
data/lib/ai4r/clusterers/clusterer.rb +61 -0
data/lib/ai4r/clusterers/complete_linkage.rb +67 -0
data/lib/ai4r/clusterers/diana.rb +139 -0
data/lib/ai4r/clusterers/k_means.rb +126 -0
data/lib/ai4r/clusterers/median_linkage.rb +61 -0
data/lib/ai4r/clusterers/single_linkage.rb +194 -0
data/lib/ai4r/clusterers/ward_linkage.rb +64 -0
data/lib/ai4r/clusterers/weighted_average_linkage.rb +61 -0
data/lib/ai4r/data/data_set.rb +266 -0
data/lib/ai4r/data/parameterizable.rb +64 -0
data/lib/ai4r/data/proximity.rb +100 -0
data/lib/ai4r/data/statistics.rb +77 -0
data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +270 -0
data/lib/ai4r/neural_network/backpropagation.rb +293 -0
data/lib/ai4r/neural_network/hopfield.rb +149 -0
data/lib/ai4r/som/layer.rb +68 -0
data/lib/ai4r/som/node.rb +96 -0
data/lib/ai4r/som/som.rb +155 -0
data/lib/ai4r/som/two_phase_layer.rb +90 -0
data/site/forrest.properties +152 -0
data/site/forrest.properties.dispatcher.properties +25 -0
data/site/forrest.properties.xml +29 -0
data/site/src/documentation/README.txt +7 -0
data/site/src/documentation/classes/CatalogManager.properties +62 -0
data/site/src/documentation/content/locationmap.xml +72 -0
data/site/src/documentation/content/xdocs/downloads.html +9 -0
data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +294 -0
data/site/src/documentation/content/xdocs/index.xml +155 -0
data/site/src/documentation/content/xdocs/machineLearning.xml +131 -0
data/site/src/documentation/content/xdocs/neuralNetworks.xml +270 -0
data/site/src/documentation/content/xdocs/site.xml +54 -0
data/site/src/documentation/content/xdocs/sourceCode.xml +43 -0
data/site/src/documentation/content/xdocs/tabs.xml +35 -0
data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
data/site/src/documentation/resources/images/c.png +0 -0
data/site/src/documentation/resources/images/c_wbn.png +0 -0
data/site/src/documentation/resources/images/c_wn.png +0 -0
data/site/src/documentation/resources/images/ellipse-2.svg +30 -0
data/site/src/documentation/resources/images/ero.gif +0 -0
data/site/src/documentation/resources/images/europe2.png +0 -0
data/site/src/documentation/resources/images/europe3.png +0 -0
data/site/src/documentation/resources/images/fitness.png +0 -0
data/site/src/documentation/resources/images/genetic_algorithms_example.png +0 -0
data/site/src/documentation/resources/images/icon-a.png +0 -0
data/site/src/documentation/resources/images/icon-b.png +0 -0
data/site/src/documentation/resources/images/icon.png +0 -0
data/site/src/documentation/resources/images/jadeferret.png +0 -0
data/site/src/documentation/resources/images/my_email.png +0 -0
data/site/src/documentation/resources/images/neural_network_example.png +0 -0
data/site/src/documentation/resources/images/project-logo.png +0 -0
data/site/src/documentation/resources/images/rubyforge.png +0 -0
data/site/src/documentation/resources/images/s.png +0 -0
data/site/src/documentation/resources/images/s_wbn.png +0 -0
data/site/src/documentation/resources/images/s_wn.png +0 -0
data/site/src/documentation/resources/images/sigmoid.png +0 -0
data/site/src/documentation/resources/images/sub-dir/icon-c.png +0 -0
data/site/src/documentation/resources/images/t.png +0 -0
data/site/src/documentation/resources/images/t_wbn.png +0 -0
data/site/src/documentation/resources/images/t_wn.png +0 -0
data/site/src/documentation/resources/schema/catalog.xcat +29 -0
data/site/src/documentation/resources/schema/hello-v10.dtd +51 -0
data/site/src/documentation/resources/schema/symbols-project-v10.ent +26 -0
data/site/src/documentation/resources/stylesheets/hello2document.xsl +33 -0
data/site/src/documentation/sitemap.xmap +66 -0
data/site/src/documentation/skinconf.xml +418 -0
data/site/src/documentation/translations/langcode.xml +29 -0
data/site/src/documentation/translations/languages_de.xml +24 -0
data/site/src/documentation/translations/languages_en.xml +24 -0
data/site/src/documentation/translations/languages_es.xml +22 -0
data/site/src/documentation/translations/languages_fr.xml +24 -0
data/site/src/documentation/translations/languages_nl.xml +24 -0
data/site/src/documentation/translations/menu.xml +33 -0
data/site/src/documentation/translations/menu_af.xml +33 -0
data/site/src/documentation/translations/menu_de.xml +33 -0
data/site/src/documentation/translations/menu_es.xml +33 -0
data/site/src/documentation/translations/menu_fr.xml +33 -0
data/site/src/documentation/translations/menu_it.xml +33 -0
data/site/src/documentation/translations/menu_nl.xml +33 -0
data/site/src/documentation/translations/menu_no.xml +33 -0
data/site/src/documentation/translations/menu_ru.xml +33 -0
data/site/src/documentation/translations/menu_sk.xml +33 -0
data/site/src/documentation/translations/tabs.xml +22 -0
data/site/src/documentation/translations/tabs_de.xml +22 -0
data/site/src/documentation/translations/tabs_es.xml +22 -0
data/site/src/documentation/translations/tabs_fr.xml +22 -0
data/site/src/documentation/translations/tabs_nl.xml +22 -0
data/test/classifiers/hyperpipes_test.rb +84 -0
data/test/classifiers/id3_test.rb +208 -0
data/test/classifiers/multilayer_perceptron_test.rb +79 -0
data/test/classifiers/naive_bayes_test.rb +43 -0
data/test/classifiers/one_r_test.rb +62 -0
data/test/classifiers/prism_test.rb +85 -0
data/test/classifiers/zero_r_test.rb +50 -0
data/test/clusterers/average_linkage_test.rb +51 -0
data/test/clusterers/bisecting_k_means_test.rb +66 -0
data/test/clusterers/centroid_linkage_test.rb +53 -0
data/test/clusterers/complete_linkage_test.rb +57 -0
data/test/clusterers/diana_test.rb +69 -0
data/test/clusterers/k_means_test.rb +100 -0
data/test/clusterers/median_linkage_test.rb +53 -0
data/test/clusterers/single_linkage_test.rb +122 -0
data/test/clusterers/ward_linkage_test.rb +53 -0
data/test/clusterers/weighted_average_linkage_test.rb +53 -0
data/test/data/data_set.csv +121 -0
data/test/data/data_set_test.rb +96 -0
data/test/data/proximity_test.rb +81 -0
data/test/data/statistics_data_set.csv +5 -0
data/test/data/statistics_test.rb +65 -0
data/test/experiment/classifier_evaluator_test.rb +76 -0
data/test/genetic_algorithm/chromosome_test.rb +58 -0
data/test/genetic_algorithm/genetic_algorithm_test.rb +81 -0
data/test/neural_network/backpropagation_test.rb +69 -0
data/test/neural_network/hopfield_test.rb +72 -0
data/test/som/som_test.rb +97 -0
metadata +238 -0

data/lib/ai4r/classifiers/prism.rb ADDED

@@ -0,0 +1,197 @@
+# Author::    Sergio Fierens (Implementation only, Cendrowska is
+# the creator of the algorithm)
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org/
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+#
+# J. Cendrowska (1987). PRISM: An algorithm for inducing modular rules.
+# International Journal of Man-Machine Studies. 27(4):349-370.
+require File.dirname(__FILE__) + '/../data/data_set'
+require File.dirname(__FILE__) + '/../classifiers/classifier'
+module Ai4r
+  module Classifiers
+    # = Introduction
+    # This is an implementation of the PRISM algorithm (Cendrowska, 1987)
+    # Given a set of preclassified examples, it builds a set of rules
+    # to predict the class of other instaces.
+    #
+    # J. Cendrowska (1987). PRISM: An algorithm for inducing modular rules.
+    # International Journal of Man-Machine Studies. 27(4):349-370.
+    class Prism < Classifier
+      attr_reader :data_set, :rules
+      # Build a new Prism classifier. You must provide a DataSet instance
+      # as parameter. The last attribute of each item is considered as
+      # the item class.
+      def build(data_set)
+        data_set.check_not_empty
+        @data_set = data_set
+        domains = @data_set.build_domains
+        instances = @data_set.data_items.collect {|data| data }
+        @rules = []
+        domains.last.each do |class_value|
+          while(has_class_value(instances, class_value))
+            rule = build_rule(class_value, instances)
+            @rules << rule
+            instances = instances.select {|data| !matches_conditions(data, rule[:conditions])}
+          end
+        end
+        return self
+      end
+      # You can evaluate new data, predicting its class.
+      # e.g.
+      #   classifier.eval(['New York',  '<30', 'F'])  # => 'Y'
+      def eval(instace)
+        @rules.each do |rule|
+          return rule[:class_value] if matches_conditions(instace, rule[:conditions])
+        end
+        return nil
+      end
+      # This method returns the generated rules in ruby code.
+      # e.g.
+      #
+      #   classifier.get_rules
+      #     # => if age_range == '<30' then marketing_target = 'Y'
+      #    elsif age_range == '>80' then marketing_target = 'Y'
+      #    elsif city == 'Chicago' and age_range == '[30-50)' then marketing_target = 'Y'
+      #    else marketing_target = 'N'
+      #    end
+      #
+      # It is a nice way to inspect induction results, and also to execute them:
+      #        age_range = '[30-50)'
+      #        city = 'New York'
+      #        eval(classifier.get_rules)
+      #        puts marketing_target
+      #         'Y'
+      def get_rules
+        out = "if #{join_terms(@rules.first)} then #{then_clause(@rules.first)}"
+        @rules[1...-1].each do |rule|
+          out += "\nelsif #{join_terms(rule)} then #{then_clause(rule)}"
+        end
+        out += "\nelse #{then_clause(@rules.last)}" if @rules.size > 1
+        out += "\nend"
+        return out
+      end
+      protected
+      def get_attr_value(data, attr)
+        data[@data_set.get_index(attr)]
+      end
+      def has_class_value(instances, class_value)
+        instances.each { |data| return true if data.last == class_value}
+        return false
+      end
+      def is_perfect(instances, rule)
+        class_value = rule[:class_value]
+        instances.each do |data|
+          return false if data.last != class_value and matches_conditions(data, rule[:conditions])
+        end
+        return true
+      end
+      def matches_conditions(data, conditions)
+        conditions.each_pair do |attr_label, attr_value|
+          return false if get_attr_value(data, attr_label) != attr_value
+        end
+        return true
+      end
+      def build_rule(class_value, instances)
+        rule = {:class_value => class_value, :conditions => {}}
+        rule_instances = instances.collect {|data| data }
+        attributes = @data_set.data_labels[0...-1].collect {|label| label }
+        until(is_perfect(instances, rule) || attributes.empty?)
+          freq_table = build_freq_table(rule_instances, attributes, class_value)
+          condition = get_condition(freq_table)
+          rule[:conditions].merge!(condition)
+          rule_instances = rule_instances.select do |data|
+            matches_conditions(data, condition)
+          end
+        end
+        return rule
+      end
+      # Returns a structure with the folloring format:
+      # => {attr1_label => { :attr1_value1 => [p, t], attr1_value2 => [p, t], ... },
+      #     attr2_label => { :attr2_value1 => [p, t], attr2_value2 => [p, t], ... },
+      #     ...
+      #     }
+      # where p is the number of instances classified as class_value
+      # with that attribute value, and t is the total number of instances with
+      # that attribute value
+      def build_freq_table(rule_instances, attributes, class_value)
+        freq_table = Hash.new()
+        rule_instances.each do |data|
+          attributes.each do |attr_label|
+            attr_freqs = freq_table[attr_label] || Hash.new([0, 0])
+            pt = attr_freqs[get_attr_value(data, attr_label)]
+            pt = [(data.last == class_value) ? pt[0]+1 : pt[0], pt[1]+1]
+            attr_freqs[get_attr_value(data, attr_label)] = pt
+            freq_table[attr_label] = attr_freqs
+          end
+        end
+        return freq_table
+      end
+      # returns a single conditional term: {attrN_label => attrN_valueM}
+      # selecting the attribute with higher pt ratio
+      # (occurrences of attribute value classified as class_value /
+      #  occurrences of attribute value)
+      def get_condition(freq_table)
+        best_pt = [0, 0]
+        condition = nil
+        freq_table.each do |attr_label, attr_freqs|
+          attr_freqs.each do |attr_value, pt|
+            if(better_pt(pt, best_pt))
+              condition = { attr_label => attr_value }
+              best_pt = pt
+            end
+          end
+        end
+        return condition
+      end
+      # pt = [p, t]
+      # p = occurrences of attribute value with instance classified as class_value
+      # t = occurrences of attribute value
+      # a pt is better if:
+      #   1- its ratio is higher
+      #   2- its ratio is equal, and has a higher p
+      def better_pt(pt, best_pt)
+        return false if pt[1] == 0
+        return true if best_pt[1] == 0
+        a = pt[0]*best_pt[1]
+        b = best_pt[0]*pt[1]
+        return true if a>b || (a==b && pt[0]>best_pt[0])
+        return false
+      end
+      def join_terms(rule)
+        terms = []
+        rule[:conditions].each do |attr_label, attr_value|
+            terms << "#{attr_label} == '#{attr_value}'"
+        end
+        "#{terms.join(" and ")}"
+      end
+      def then_clause(rule)
+        "#{@data_set.data_labels.last} = '#{rule[:class_value]}'"
+      end
+    end
+  end
+end

data/lib/ai4r/classifiers/zero_r.rb ADDED

@@ -0,0 +1,73 @@
+# Author::    Sergio Fierens (Implementation only)
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org/
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+require File.dirname(__FILE__) + '/../data/data_set.rb'
+require File.dirname(__FILE__) + '/../classifiers/classifier'
+module Ai4r
+  module Classifiers
+    # = Introduction
+    #
+    # The idea behind the ZeroR classifier is to identify the
+    # the most common class value in the training set.
+    # It always returns that value when evaluating an instance.
+    # It is frequently used as a baseline for evaluating other machine learning
+    # algorithms.
+    class ZeroR < Classifier
+      attr_reader :data_set, :class_value
+      # Build a new ZeroR classifier. You must provide a DataSet instance
+      # as parameter. The last attribute of each item is considered as
+      # the item class.
+      def build(data_set)
+        data_set.check_not_empty
+        @data_set = data_set
+        frequencies = {}
+        max_freq = 0
+        @class_value = nil
+        @data_set.data_items.each do |example|
+          class_value = example.last
+          frequencies[class_value] = frequencies[class_value].nil? ? 1 : frequencies[class_value] + 1
+          class_frequency = frequencies[class_value]
+          if max_freq < class_frequency
+            max_freq = class_frequency
+            @class_value = class_value
+          end
+        end
+        return self
+      end
+      # You can evaluate new data, predicting its class.
+      # e.g.
+      #   classifier.eval(['New York',  '<30', 'F'])  # => 'Y'
+      def eval(data)
+        @class_value
+      end
+      # This method returns the generated rules in ruby code.
+      # e.g.
+      #
+      #   classifier.get_rules
+      #     # =>  marketing_target='Y'
+      #
+      # It is a nice way to inspect induction results, and also to execute them:
+      #     marketing_target = nil
+      #     eval classifier.get_rules
+      #     puts marketing_target
+      #       # =>  'Y'
+      def get_rules
+        return "#{@data_set.data_labels.last} = '#{@class_value}'"
+      end
+    end
+  end
+end

data/lib/ai4r/clusterers/average_linkage.rb ADDED

@@ -0,0 +1,59 @@
+# Author::    Sergio Fierens (implementation)
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org/
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+require File.dirname(__FILE__) + '/../data/data_set'
+require File.dirname(__FILE__) + '/../clusterers/single_linkage'
+module Ai4r
+  module Clusterers
+    # Implementation of a Hierarchical clusterer with group average
+    # linkage, AKA unweighted pair group method average or UPGMA (Everitt
+    # et al., 2001 ; Jain and Dubes, 1988 ; Sokal and Michener, 1958).
+    # Hierarchical clusteres create one cluster per element, and then
+    # progressively merge clusters, until the required number of clusters
+    # is reached.
+    # With average linkage, the distance between a clusters cx and
+    # cluster (ci U cj) the the average distance between cx and ci, and
+    # cx and cj.
+    #
+    #   D(cx, (ci U cj) = (D(cx, ci) + D(cx, cj)) / 2
+    class AverageLinkage < SingleLinkage
+      parameters_info :distance_function =>
+          "Custom implementation of distance function. " +
+          "It must be a closure receiving two data items and return the " +
+          "distance bewteen them. By default, this algorithm uses " +
+          "ecuclidean distance of numeric attributes to the power of 2."
+      # Build a new clusterer, using data examples found in data_set.
+      # Items will be clustered in "number_of_clusters" different
+      # clusters.
+      def build(data_set, number_of_clusters)
+        super
+      end
+      # This algorithms does not allow classification of new data items
+      # once it has been built. Rebuild the cluster including you data element.
+      def eval(data_item)
+        Raise "Eval of new data is not supported by this algorithm."
+      end
+      protected
+      # return distance between cluster cx and cluster (ci U cj),
+      # using average linkage
+      def linkage_distance(cx, ci, cj)
+        (read_distance_matrix(cx, ci)+
+          read_distance_matrix(cx, cj))/2
+      end
+    end
+  end
+end

data/lib/ai4r/clusterers/bisecting_k_means.rb ADDED

@@ -0,0 +1,93 @@
+# Author::    Sergio Fierens (implementation)
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org/
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+require File.dirname(__FILE__) + '/../data/data_set'
+require File.dirname(__FILE__) + '/../clusterers/k_means'
+module Ai4r
+  module Clusterers
+    # The Bisecting k-means algorithm is a variation of the "k-means" algorithm,
+    # somewhat less sensible to the initial election of centroids than the
+    # original.
+    #
+    # More about K Means algorithm:
+    # http://en.wikipedia.org/wiki/K-means_algorithm
+    class BisectingKMeans < KMeans
+      attr_reader :data_set, :number_of_clusters, :clusters, :centroids
+      attr_accessor :max_iterations, :distance_function, :refine
+      parameters_info :max_iterations => "Maximum number of iterations to " +
+        "build the clusterer. By default it is uncapped.",
+        :distance_function => "Custom implementation of distance function. " +
+          "It must be a closure receiving two data items and return the " +
+          "distance bewteen them. By default, this algorithm uses " +
+          "ecuclidean distance of numeric attributes to the power of 2.",
+        :centroid_function => "Custom implementation to calculate the " +
+          "centroid of a cluster. It must be a closure receiving an array of " +
+          "data sets, and return an array of data items, representing the " +
+          "centroids of for each data set. " +
+          "By default, this algorithm returns a data items using the mode "+
+          "or mean of each attribute on each data set.",
+        :refine => "Boolean value. True by default. It will run the " +
+            "classic K Means algorithm, using as initial centroids the " +
+            "result of the bisecting approach."
+      def intialize
+        @refine = true
+      end
+      # Build a new clusterer, using data examples found in data_set.
+      # Items will be clustered in "number_of_clusters" different
+      # clusters.
+      def build(data_set, number_of_clusters)
+        @data_set = data_set
+        @number_of_clusters = number_of_clusters
+        @clusters = [@data_set]
+        @centroids = [@data_set.get_mean_or_mode]
+        while @clusters.length < @number_of_clusters
+          biggest_cluster_index = find_biggest_cluster_index(@clusters)
+          clusterer = KMeans.new.
+            set_parameters(get_parameters).
+            build(@clusters[biggest_cluster_index], 2)
+          @clusters.delete_at(biggest_cluster_index)
+          @centroids.delete_at(biggest_cluster_index)
+          @clusters.concat(clusterer.clusters)
+          @centroids.concat(clusterer.centroids)
+        end
+        super if @refine
+        return self
+      end
+      protected
+      def calc_initial_centroids
+        @centroids # Use existing centroids
+      end
+      def find_biggest_cluster_index(clusters)
+        max_index = 0
+        max_length = 0
+        clusters.each_index do |cluster_index|
+          cluster = clusters[cluster_index]
+          if max_length < cluster.data_items.length
+            max_length = cluster.data_items.length
+            max_index = cluster_index
+          end
+        end
+        return max_index
+      end
+    end
+  end
+end

data/lib/ai4r/clusterers/centroid_linkage.rb ADDED

@@ -0,0 +1,66 @@
+# Author::    Sergio Fierens (implementation)
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org/
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+require File.dirname(__FILE__) + '/../data/data_set'
+require File.dirname(__FILE__) + '/../clusterers/single_linkage'
+module Ai4r
+  module Clusterers
+    # Implementation of an Agglomerative Hierarchical clusterer with
+    # centroid linkage algorithm, aka unweighted pair group method
+    # centroid (UPGMC) (Everitt et al., 2001 ; Jain and Dubes, 1988 ;
+    # Sokal and Michener, 1958 )
+    # Hierarchical clusteres create one cluster per element, and then
+    # progressively merge clusters, until the required number of clusters
+    # is reached.
+    # The distance between clusters is the squared euclidean distance
+    # between their centroids.
+    #
+    #   D(cx, (ci U cj)) = | mx - mij |^2
+    #   D(cx, (ci U cj)) =  (ni/(ni+nj))*D(cx, ci) +
+    #                       (nj/(ni+nj))*D(cx, cj) -
+    #                       (ni*nj/(ni+nj)^2)*D(ci, cj)
+    class CentroidLinkage < SingleLinkage
+    parameters_info :distance_function =>
+          "Custom implementation of distance function. " +
+          "It must be a closure receiving two data items and return the " +
+          "distance bewteen them. By default, this algorithm uses " +
+          "ecuclidean distance of numeric attributes to the power of 2."
+      # Build a new clusterer, using data examples found in data_set.
+      # Items will be clustered in "number_of_clusters" different
+      # clusters.
+      def build(data_set, number_of_clusters)
+        super
+      end
+      # This algorithms does not allow classification of new data items
+      # once it has been built. Rebuild the cluster including you data element.
+      def eval(data_item)
+        Raise "Eval of new data is not supported by this algorithm."
+      end
+      protected
+      # return distance between cluster cx and cluster (ci U cj),
+      # using centroid linkage
+      def linkage_distance(cx, ci, cj)
+        ni = @index_clusters[ci].length
+        nj = @index_clusters[cj].length
+        ( ni * read_distance_matrix(cx, ci) +
+          nj * read_distance_matrix(cx, cj) -
+         1.0 * ni * nj * read_distance_matrix(ci, cj) / (ni+nj)) / (ni+nj)
+      end
+    end
+  end
+end