RubyGems - ai4r - Versions diffs - 1.12 → 1.13 - Mend

ai4r 1.12 → 1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

data/README.rdoc +7 -12
data/examples/classifiers/simple_linear_regression_example.csv +159 -0
data/examples/classifiers/simple_linear_regression_example.rb +15 -0
data/examples/clusterers/clusterer_example.rb +56 -0
data/examples/neural_network/backpropagation_example.rb +2 -1
data/lib/ai4r.rb +3 -1
data/lib/ai4r/classifiers/id3.rb +6 -2
data/lib/ai4r/classifiers/multilayer_perceptron.rb +1 -1
data/lib/ai4r/classifiers/naive_bayes.rb +24 -21
data/lib/ai4r/classifiers/simple_linear_regression.rb +118 -0
data/lib/ai4r/clusterers/average_linkage.rb +3 -3
data/lib/ai4r/clusterers/bisecting_k_means.rb +2 -2
data/lib/ai4r/clusterers/centroid_linkage.rb +3 -3
data/lib/ai4r/clusterers/clusterer.rb +0 -11
data/lib/ai4r/clusterers/complete_linkage.rb +3 -3
data/lib/ai4r/clusterers/diana.rb +2 -2
data/lib/ai4r/clusterers/k_means.rb +123 -21
data/lib/ai4r/clusterers/median_linkage.rb +3 -3
data/lib/ai4r/clusterers/single_linkage.rb +4 -4
data/lib/ai4r/clusterers/ward_linkage.rb +4 -4
data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +48 -0
data/lib/ai4r/clusterers/weighted_average_linkage.rb +3 -3
data/lib/ai4r/data/data_set.rb +12 -3
data/lib/ai4r/data/proximity.rb +22 -0
data/lib/ai4r/neural_network/backpropagation.rb +26 -15
data/test/classifiers/id3_test.rb +12 -0
data/test/classifiers/multilayer_perceptron_test.rb +1 -1
data/test/classifiers/naive_bayes_test.rb +18 -18
data/test/classifiers/simple_linear_regression_test.rb +37 -0
data/test/clusterers/k_means_test.rb +75 -8
data/test/clusterers/ward_linkage_hierarchical_test.rb +81 -0
data/test/data/data_set_test.rb +8 -0
data/test/data/proximity_test.rb +7 -1
metadata +96 -55

data/lib/ai4r/clusterers/single_linkage.rb CHANGED

@@ -16,7 +16,7 @@ module Ai4r
     # Implementation of a Hierarchical clusterer with single linkage (Everitt et
     # al., 2001 ; Johnson, 1967 ; Jain and Dubes, 1988 ; Sneath, 1957 )
-    # Hierarchical clusteres create one cluster per element, and then
+    # Hierarchical clusterer create one cluster per element, and then
     # progressively merge clusters, until the required number of clusters
     # is reached.
     # With single linkage, the distance between two clusters is computed as the
@@ -30,8 +30,8 @@ module Ai4r
       parameters_info :distance_function =>
           "Custom implementation of distance function. " +
           "It must be a closure receiving two data items and return the " +
-          "distance bewteen them. By default, this algorithm uses " +
-          "ecuclidean distance of numeric attributes to the power of 2."
+          "distance between them. By default, this algorithm uses " +
+          "euclidean distance of numeric attributes to the power of 2."
       def initialize
         @distance_function = lambda do |a,b|
@@ -105,7 +105,7 @@ module Ai4r
       end
       # ci and cj are the indexes of the clusters that are going to
-      # be merged. We need to remove distances from/to ci and ci,
+      # be merged. We need to remove distances from/to ci and cj,
       # and add distances from/to new cluster (ci U cj)
       def update_distance_matrix(ci, cj)
         ci, cj = cj, ci if cj > ci

data/lib/ai4r/clusterers/ward_linkage.rb CHANGED

@@ -16,10 +16,10 @@ module Ai4r
     # Implementation of an Agglomerative Hierarchical clusterer with
     # Ward's method linkage algorithm, aka the minimum variance method (Everitt
     # et al., 2001 ; Jain and Dubes, 1988 ; Ward, 1963 ).
-    # Hierarchical clusteres create one cluster per element, and then
+    # Hierarchical clusterer create one cluster per element, and then
     # progressively merge clusters, until the required number of clusters
     # is reached.
-    # The objective of this method is to minime the variance.
+    # The objective of this method is to minimize the variance.
     #
     #   D(cx, (ci U cj)) =  (ni/(ni+nj+nx))*D(cx, ci) +
     #                       (nj/(ni+nj+nx))*D(cx, cj) -
@@ -29,8 +29,8 @@ module Ai4r
     parameters_info :distance_function =>
           "Custom implementation of distance function. " +
           "It must be a closure receiving two data items and return the " +
-          "distance bewteen them. By default, this algorithm uses " +
-          "ecuclidean distance of numeric attributes to the power of 2."
+          "distance between them. By default, this algorithm uses " +
+          "euclidean distance of numeric attributes to the power of 2."
       # Build a new clusterer, using data examples found in data_set.
       # Items will be clustered in "number_of_clusters" different

data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb ADDED

@@ -0,0 +1,48 @@
+# Author::    Peter Lubell-Doughtie
+# License::   BSD 3 Clause
+# Project::   ai4r
+# Url::       http://peet.ldee.org
+require File.dirname(__FILE__) + '/../clusterers/ward_linkage'
+module Ai4r
+  module Clusterers
+    # Hierarchical version to store classes as merges occur.
+    class WardLinkageHierarchical < WardLinkage
+      attr_reader :cluster_tree
+      def initialize(depth = nil)
+        @cluster_tree = []
+        @depth = depth
+        @merges_so_far = 0
+        super()
+      end
+      def build(data_set, number_of_clusters)
+        data_len = data_set.data_items.length
+        @total_merges = data_len - number_of_clusters
+        super
+        @cluster_tree << self.clusters
+        @cluster_tree.reverse!
+        return self
+      end
+      protected
+      def merge_clusters(index_a, index_b, index_clusters)
+        # only store if no or above depth
+        if @depth.nil? or @merges_so_far > @total_merges - @depth
+          # store current clusters
+          stored_distance_matrix = @distance_matrix.dup
+          @cluster_tree << build_clusters_from_index_clusters(index_clusters)
+          @distance_matrix = stored_distance_matrix
+        end
+        @merges_so_far += 1
+        super
+      end
+    end
+  end
+end

data/lib/ai4r/clusterers/weighted_average_linkage.rb CHANGED

@@ -16,7 +16,7 @@ module Ai4r
     # Implementation of an Agglomerative Hierarchical clusterer with
     # weighted average linkage algorithm, aka weighted pair group method
     # average or WPGMA (Jain and Dubes, 1988 ; McQuitty, 1966 )
-    # Hierarchical clusteres create one cluster per element, and then
+    # Hierarchical clusterer create one cluster per element, and then
     # progressively merge clusters, until the required number of clusters
     # is reached.
     # Similar to AverageLinkage, but the distances between clusters are
@@ -28,8 +28,8 @@ module Ai4r
     parameters_info :distance_function =>
           "Custom implementation of distance function. " +
           "It must be a closure receiving two data items and return the " +
-          "distance bewteen them. By default, this algorithm uses " +
-          "ecuclidean distance of numeric attributes to the power of 2."
+          "distance between them. By default, this algorithm uses " +
+          "euclidean distance of numeric attributes to the power of 2."
       # Build a new clusterer, using data examples found in data_set.
       # Items will be clustered in "number_of_clusters" different

data/lib/ai4r/data/data_set.rb CHANGED

@@ -20,8 +20,6 @@ module Ai4r
     # the data_labels property.
     class DataSet
-      @@number_regex = /(((\b[0-9]+)?\.)?\b[0-9]+([eE][-+]?[0-9]+)?\b)/
       attr_reader :data_labels, :data_items
       # Create a new DataSet. By default, empty.
@@ -82,11 +80,18 @@ module Ai4r
       def parse_csv(filepath)
         items = []
         open_csv_file(filepath) do |row|
-          items << row.collect{|x| (x.match(@@number_regex)) ? x.to_f : x.data }
+          items << row.collect{|x| is_number?(x) ? Float(x) : x }
         end
         set_data_items(items)
       end
+      # Same as load_csv_with_labels, but it will try to convert cell contents as numbers.
+      def parse_csv_with_labels(filepath)
+        parse_csv(filepath)
+        @data_labels = @data_items.shift
+        return self
+      end
       # Set data labels.
       # Data labels must have the following format:
       #     [ 'city', 'age_range', 'gender', 'marketing_target'  ]
@@ -224,6 +229,10 @@ module Ai4r
       protected
+      def is_number?(x)
+        true if Float(x) rescue false
+      end
       def check_data_items(data_items)
         if !data_items || data_items.empty?
           raise ArgumentError, "Examples data set must not be empty."

data/lib/ai4r/data/proximity.rb CHANGED

@@ -92,6 +92,28 @@ module Ai4r
         return 1.0/similarity - 1
       end
+      # Cosine similarity is a measure of similarity between two vectors
+      # of an inner product space that measures the cosine of the
+      # angle between them (http://en.wikipedia.org/wiki/Cosine_similarity).
+      #
+      # Parameters a and b are vectors with continuous attributes.
+      #
+      # D = sum(a[i] * b[i]) / sqrt(sum(a[i]**2)) * sqrt(sum(b[i]**2))
+      def self.cosine_distance(a,b)
+        dot_product = 0.0
+        norm_a = 0.0
+        norm_b = 0.0
+        magnitude = 0.0
+        a.each_index do |i|
+          dot_product += a[i] * b[i]
+          norm_a += a[i] ** 2
+          norm_b += b[i] ** 2
+        end
+        magnitude = Math.sqrt(norm_a) * Math.sqrt(norm_b)
+        return 1 - (dot_product / magnitude)
+      end
     end
   end

data/lib/ai4r/neural_network/backpropagation.rb CHANGED

@@ -44,7 +44,7 @@ module Ai4r
     # Use class method get_parameters_info to obtain details on the algorithm
     # parameters. Use set_parameters to set values for this parameters.
     #
-    # * :disable_bias => If true, the alforithm will not use bias nodes.
+    # * :disable_bias => If true, the algorithm will not use bias nodes.
     #   False by default.
     # * :initial_weight_function => f(n, i, j) must return the initial
     #   weight for the conection between the node i in layer n, and node j in
@@ -86,7 +86,7 @@ module Ai4r
       include Ai4r::Data::Parameterizable
-      parameters_info :disable_bias => "If true, the alforithm will not use "+
+      parameters_info :disable_bias => "If true, the algorithm will not use "+
             "bias nodes. False by default.",
         :initial_weight_function => "f(n, i, j) must return the initial "+
             "weight for the conection between the node i in layer n, and "+
@@ -136,6 +136,17 @@ module Ai4r
         return @activation_nodes.last.clone
       end
+      # Evaluates the input and returns most active node
+      # E.g.
+      #     net = Backpropagation.new([4, 3, 2])
+      #     net.eval_result([25, 32.3, 12.8, 1.5])
+      #         # eval gives [0.83, 0.03]
+      #         # =>  0
+      def eval_result(input_values)
+        result = eval(input_values)
+        result.index(result.max)
+      end
       # This method trains the network using the backpropagation algorithm.
       #
       # input: Networks input
@@ -178,20 +189,20 @@ module Ai4r
           @last_changes,
           @activation_nodes
         ]
-     end
+      end
-     def marshal_load(ary)
-       @structure,
-          @disable_bias,
-          @learning_rate,
-          @momentum,
-          @weights,
-          @last_changes,
-          @activation_nodes = ary
-       @initial_weight_function = lambda { |n, i, j| ((rand 2000)/1000.0) - 1}
-       @propagation_function = lambda { |x| 1/(1+Math.exp(-1*(x))) } #lambda { |x| Math.tanh(x) }
-       @derivative_propagation_function = lambda { |y| y*(1-y) } #lambda { |y| 1.0 - y**2 }
-     end
+      def marshal_load(ary)
+        @structure,
+           @disable_bias,
+           @learning_rate,
+           @momentum,
+           @weights,
+           @last_changes,
+           @activation_nodes = ary
+        @initial_weight_function = lambda { |n, i, j| ((rand 2000)/1000.0) - 1}
+        @propagation_function = lambda { |x| 1/(1+Math.exp(-1*(x))) } #lambda { |x| Math.tanh(x) }
+        @derivative_propagation_function = lambda { |y| y*(1-y) } #lambda { |y| 1.0 - y**2 }
+      end
       # Propagate error backwards

data/test/classifiers/id3_test.rb CHANGED

@@ -203,6 +203,18 @@ class ID3Test < Test::Unit::TestCase
     eval id3.get_rules
     assert_equal 'N', marketing_target
   end
+  def test_model_failure
+    bad_data_items = [  ['a', 'Y'],
+                        ['b', 'N'],
+            ]
+    bad_data_labels = ['bogus', 'target']
+    id3 = ID3.new.build(DataSet.new(:data_items =>bad_data_items, :data_labels => bad_data_labels))
+    assert_raise ModelFailureError do
+      id3.eval(['c'])
+    end
+    assert_equal true, true
+  end
 end

data/test/classifiers/multilayer_perceptron_test.rb CHANGED

@@ -23,7 +23,7 @@ class MultilayerPerceptronTest < Test::Unit::TestCase
                 ['Chicago',     '[50-80]', 'M', 'N'],
               ])
-   def test_initialize
+  def test_initialize
     classifier = MultilayerPerceptron.new
     assert_equal 1, classifier.active_node_value
     assert_equal 0, classifier.inactive_node_value

data/test/classifiers/naive_bayes_test.rb CHANGED

@@ -7,37 +7,37 @@ include Ai4r::Data
 class NaiveBayesTest < Test::Unit::TestCase
-  @@data_labels = [ "Color","Type","Origin","Stolen?" ]
+  @@data_labels = %w(Color Type Origin Stolen?)
   @@data_items = [
-              ["Red",   "Sports", "Domestic", "Yes"],
-              ["Red",   "Sports", "Domestic", "No"],
-              ["Red",   "Sports", "Domestic", "Yes"],
-              ["Yellow","Sports", "Domestic", "No"],
-              ["Yellow","Sports", "Imported", "Yes"],
-              ["Yellow","SUV",    "Imported", "No"],
-              ["Yellow","SUV",    "Imported", "Yes"],
-              ["Yellow","Sports", "Domestic", "No"],
-              ["Red",   "SUV",    "Imported", "No"],
-              ["Red",   "Sports", "Imported", "Yes"]
-            ]
+    %w(Red    Sports  Domestic Yes),
+    %w(Red    Sports  Domestic No),
+    %w(Red    Sports  Domestic Yes),
+    %w(Yellow Sports  Domestic No),
+    %w(Yellow Sports  Imported Yes),
+    %w(Yellow SUV     Imported No),
+    %w(Yellow SUV     Imported Yes),
+    %w(Yellow Sports  Domestic No),
+    %w(Red    SUV     Imported No),
+    %w(Red    Sports  Imported Yes)
+  ]
   def setup
     @data_set = DataSet.new
     @data_set = DataSet.new(:data_items => @@data_items, :data_labels => @@data_labels)
-    @b = NaiveBayes.new.set_parameters({:m=>3}).build @data_set
+    @b = NaiveBayes.new.set_parameters({:m => 3}).build @data_set
   end
   def test_eval
-    result = @b.eval(["Red", "SUV", "Domestic"])
-    assert_equal "No", result
+    result = @b.eval(%w(Red SUV Domestic))
+    assert_equal 'No', result
   end
   def test_get_probability_map
-    map = @b.get_probability_map(["Red", "SUV", "Domestic"])
+    map = @b.get_probability_map(%w(Red SUV Domestic))
     assert_equal 2, map.keys.length
-    assert_in_delta 0.42, map["Yes"], 0.1
-    assert_in_delta 0.58, map["No"], 0.1
+    assert_in_delta 0.42, map['Yes'], 0.1
+    assert_in_delta 0.58, map['No'], 0.1
   end
 end

data/test/classifiers/simple_linear_regression_test.rb ADDED

@@ -0,0 +1,37 @@
+require 'ai4r/classifiers/simple_linear_regression'
+require 'ai4r/data/data_set'
+require 'test/unit'
+include Ai4r::Classifiers
+include Ai4r::Data
+class SimpleLinearRegressionTest < Test::Unit::TestCase
+  @@data_labels = ["symboling", "normalized-losses", "wheel-base", "length", "width", "height", "curb-weight",
+                   "engine-size", "bore" , "stroke", "compression-ratio", "horsepower", "peak-rpm", "city-mpg",
+                   "highway-mpg", "class"]
+  @@data_items = [
+      [2,164,99.8,176.6,66.2,54.3,2337,109,3.19,3.4,10,102,5500,24,30,13950],
+      [2,164,99.4,176.6,66.4,54.3,2824,136,3.19,3.4,8,115,5500,18,22,17450],
+      [1,158,105.8,192.7,71.4,55.7,2844,136,3.19,3.4,8.5,110,5500,19,25,17710],
+      [1,158,105.8,192.7,71.4,55.9,3086,131,3.13,3.4,8.3,140,5500,17,20,23875],
+      [2,192,101.2,176.8,64.8,54.3,2395,108,3.5,2.8,8.8,101,5800,23,29,16430],
+      [0,192,101.2,176.8,64.8,54.3,2395,108,3.5,2.8,8.8,101,5800,23,29,16925],
+      [0,188,101.2,176.8,64.8,54.3,2710,164,3.31,3.19,9,121,4250,21,28,20970],
+      [0,188,101.2,176.8,64.8,54.3,2765,164,3.31,3.19,9,121,4250,21,28,21105],
+      [2,121,88.4,141.1,60.3,53.2,1488,61,2.91,3.03,9.5,48,5100,47,53,5151],
+  ]
+  def setup
+    @data_set = DataSet.new
+    @data_set = DataSet.new(:data_items => @@data_items, :data_labels => @@data_labels)
+    @c = SimpleLinearRegression.new.build @data_set
+  end
+  def test_eval
+    result = @c.eval([-1,95,109.1,188.8,68.9,55.5,3062,141,3.78,3.15,9.5,114,5400,19,25])
+    assert_equal 17218.444444444445, result
+  end
+end

data/test/clusterers/k_means_test.rb CHANGED

@@ -17,7 +17,11 @@ class KMeansTest < Test::Unit::TestCase
   @@data = [  [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
               [1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
+  # k-means will generate an empty cluster with this data and initial centroid assignment
+  @@empty_cluster_data = [[-0.1, 0], [0, 0], [0.1, 0], [-0.1, 10], [0.1, 10], [0.2, 10]]
+  @@empty_centroid_indices = [0,1,2]
   def test_build
     data_set = DataSet.new(:data_items => @@data, :data_labels => ["X", "Y"])
     clusterer = KMeans.new.build(data_set, 4)
@@ -25,21 +29,45 @@ class KMeansTest < Test::Unit::TestCase
     # Verify that all 4 clusters are created
     assert_equal 4, clusterer.clusters.length
     assert_equal 4, clusterer.centroids.length
-    # The addition of all instances of every cluster must be equal than
+    # The addition of all instances of every cluster must be equal to
     # the number of data points
     total_length = 0
     clusterer.clusters.each do |cluster|
       total_length += cluster.data_items.length
     end
     assert_equal @@data.length, total_length
-    # Data inside clusters must be the same as orifinal data
+    # Data inside clusters must be the same as original data
     clusterer.clusters.each do |cluster|
      cluster.data_items.each do |data_item|
        assert @@data.include?(data_item)
      end
     end
   end
+  def test_build_and_eliminate_empty_clusters
+    data_set = DataSet.new(:data_items => @@empty_cluster_data, :data_labels => ["X", "Y"])
+    # :eliminate is the :on_empty default, so we don't need to pass it as a parameter for it
+    clusterer = KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices}).build(data_set, @@empty_centroid_indices.size)
+    # Verify that one cluster was eliminated
+    assert_equal @@empty_centroid_indices.size - 1, clusterer.clusters.length
+    assert_equal @@empty_centroid_indices.size - 1, clusterer.centroids.length
+    # The addition of all instances of every cluster must be equal to
+    # the number of data points
+    total_length = 0
+    clusterer.clusters.each do |cluster|
+      total_length += cluster.data_items.length
+    end
+    assert_equal @@empty_cluster_data.length, total_length
+    # Data inside clusters must be the same as original data
+    clusterer.clusters.each do |cluster|
+     cluster.data_items.each do |data_item|
+       assert @@empty_cluster_data.include?(data_item)
+     end
+    end
+  end
   def test_eval
     data_set = DataSet.new(:data_items => @@data, :data_labels => ["X", "Y"])
     clusterer = KMeans.new.build(data_set, 4)
@@ -54,13 +82,18 @@ class KMeansTest < Test::Unit::TestCase
       assert clusterer.distance(centroid, item) >= min_distance
     end
   end
   def test_distance
     clusterer = KMeans.new
-    # By default, distance returns the eucledian distance to the power of 2
+    # By default, distance returns the euclidean distance to the power of 2
     assert_equal 2385, clusterer.distance(
       [1, 10, "Chicago", 2],
       [10, 10, "London", 50])
+    # Ensure default distance raises error for nil argument
+    exception = assert_raise(TypeError) {clusterer.distance([1, 10], [nil, nil])}
+    assert_equal("nil can't be coerced into Fixnum", exception.message)
     # Test new distance definition
     manhattan_distance = lambda do |a, b|
         dist = 0.0
@@ -84,7 +117,42 @@ class KMeansTest < Test::Unit::TestCase
       build(data_set, 4)
     assert_equal 1, clusterer.iterations
   end
+  def test_centroid_indices
+    data_set = DataSet.new(:data_items => @@data, :data_labels => ["X", "Y"])
+    # centroid_indices need not be specified:
+    KMeans.new.build(data_set, 4)
+    # centroid_indices can be specified:
+    KMeans.new.set_parameters({:centroid_indices=>[0,1,2,3]}).build(data_set, 4)
+    # raises exception if number of clusters differs from length of centroid_indices:
+    exception = assert_raise(ArgumentError) {KMeans.new.set_parameters({:centroid_indices=>[0,1,2,3]}).build(data_set, 2)}
+    assert_equal('Length of centroid indices array differs from the specified number of clusters', exception.message)
+    # raises exception for bad centroid index:
+    exception = assert_raise(ArgumentError) {KMeans.new.set_parameters({:centroid_indices=>[0,1,2,@@data.size+10]}).build(data_set, 4)}
+    assert_equal("Invalid centroid index #{@@data.size+10}", exception.message)
+  end
+  def test_on_empty
+    data_set = DataSet.new(:data_items => @@empty_cluster_data, :data_labels => ["X", "Y"])
+    clusterer = KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices}).build(data_set, @@empty_centroid_indices.size)
+    # Verify that one cluster was eliminated
+    assert_equal @@empty_centroid_indices.size - 1, clusterer.clusters.length
+    # Verify that eliminate is the on_empty default
+    assert_equal 'eliminate', clusterer.on_empty
+    # Verify that invalid on_empty option throws an argument error
+    exception = assert_raise(ArgumentError) {KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices, :on_empty=>'ldkfje'}).build(data_set, @@empty_centroid_indices.size)}
+    assert_equal("Invalid value for on_empty", exception.message)
+    # Verify that on_empty option 'terminate' raises an error when an empty cluster arises
+    exception = assert_raise(TypeError) {KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices, :on_empty=>'terminate'}).build(data_set, @@empty_centroid_indices.size)}
+    assert_equal("nil can't be coerced into Float", exception.message)
+    clusterer = KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices, :on_empty=>'random'}).build(data_set, @@empty_centroid_indices.size)
+    # Verify that cluster was not eliminated
+    assert_equal @@empty_centroid_indices.size, clusterer.clusters.length
+    clusterer = KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices, :on_empty=>'outlier'}).build(data_set, @@empty_centroid_indices.size)
+    # Verify that cluster was not eliminated
+    assert_equal @@empty_centroid_indices.size, clusterer.clusters.length
+  end
   private
   def draw_map(clusterer)
     map = Array.new(11) {Array.new(11, 0)}
@@ -95,6 +163,5 @@ class KMeansTest < Test::Unit::TestCase
     end
     map.each { |row| puts row.inspect}
   end
 end