ai4r 1.12 → 1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +7 -12
- data/examples/classifiers/simple_linear_regression_example.csv +159 -0
- data/examples/classifiers/simple_linear_regression_example.rb +15 -0
- data/examples/clusterers/clusterer_example.rb +56 -0
- data/examples/neural_network/backpropagation_example.rb +2 -1
- data/lib/ai4r.rb +3 -1
- data/lib/ai4r/classifiers/id3.rb +6 -2
- data/lib/ai4r/classifiers/multilayer_perceptron.rb +1 -1
- data/lib/ai4r/classifiers/naive_bayes.rb +24 -21
- data/lib/ai4r/classifiers/simple_linear_regression.rb +118 -0
- data/lib/ai4r/clusterers/average_linkage.rb +3 -3
- data/lib/ai4r/clusterers/bisecting_k_means.rb +2 -2
- data/lib/ai4r/clusterers/centroid_linkage.rb +3 -3
- data/lib/ai4r/clusterers/clusterer.rb +0 -11
- data/lib/ai4r/clusterers/complete_linkage.rb +3 -3
- data/lib/ai4r/clusterers/diana.rb +2 -2
- data/lib/ai4r/clusterers/k_means.rb +123 -21
- data/lib/ai4r/clusterers/median_linkage.rb +3 -3
- data/lib/ai4r/clusterers/single_linkage.rb +4 -4
- data/lib/ai4r/clusterers/ward_linkage.rb +4 -4
- data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +48 -0
- data/lib/ai4r/clusterers/weighted_average_linkage.rb +3 -3
- data/lib/ai4r/data/data_set.rb +12 -3
- data/lib/ai4r/data/proximity.rb +22 -0
- data/lib/ai4r/neural_network/backpropagation.rb +26 -15
- data/test/classifiers/id3_test.rb +12 -0
- data/test/classifiers/multilayer_perceptron_test.rb +1 -1
- data/test/classifiers/naive_bayes_test.rb +18 -18
- data/test/classifiers/simple_linear_regression_test.rb +37 -0
- data/test/clusterers/k_means_test.rb +75 -8
- data/test/clusterers/ward_linkage_hierarchical_test.rb +81 -0
- data/test/data/data_set_test.rb +8 -0
- data/test/data/proximity_test.rb +7 -1
- metadata +96 -55
@@ -0,0 +1,118 @@
|
|
1
|
+
# Author:: Malav Bhavsar
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/../data/data_set'
|
11
|
+
require File.dirname(__FILE__) + '/classifier'
|
12
|
+
|
13
|
+
module Ai4r
|
14
|
+
module Classifiers
|
15
|
+
|
16
|
+
|
17
|
+
# = Introduction
|
18
|
+
#
|
19
|
+
# This is an implementation of a Simple Linear Regression Classifier.
|
20
|
+
#
|
21
|
+
# For further details regarding Bayes and Naive Bayes Classifier have a look at this link:
|
22
|
+
# http://en.wikipedia.org/wiki/Naive_Bayesian_classification
|
23
|
+
# http://en.wikipedia.org/wiki/Bayes%27_theorem
|
24
|
+
#
|
25
|
+
#
|
26
|
+
# = How to use it
|
27
|
+
#
|
28
|
+
# data = DataSet.new.parse_csv_with_labels "autoPrice.csv"
|
29
|
+
# c = SimpleLinearRegression.new.
|
30
|
+
# build data
|
31
|
+
# c.eval([1,158,105.8,192.7,71.4,55.7,2844,136,3.19,3.4,8.5,110,5500,19,25])
|
32
|
+
#
|
33
|
+
|
34
|
+
class SimpleLinearRegression < Classifier
|
35
|
+
|
36
|
+
attr_reader :attribute, :attribute_index, :slope, :intercept
|
37
|
+
|
38
|
+
def initialize
|
39
|
+
@attribute = nil
|
40
|
+
@attribute_index = 0
|
41
|
+
@slope = 0
|
42
|
+
@intercept = 0
|
43
|
+
end
|
44
|
+
|
45
|
+
# You can evaluate new data, predicting its category.
|
46
|
+
# e.g.
|
47
|
+
# c.eval([1,158,105.8,192.7,71.4,55.7,2844,136,3.19,3.4,8.5,110,5500,19,25])
|
48
|
+
# => 11876.96774193548
|
49
|
+
def eval(data)
|
50
|
+
@intercept + @slope * data[@attribute_index]
|
51
|
+
end
|
52
|
+
|
53
|
+
# Gets the best attribute and does Linear Regression using it to find out the
|
54
|
+
# slope and intercept.
|
55
|
+
# Parameter data has to be an instance of DataSet
|
56
|
+
def build(data)
|
57
|
+
raise "Error instance must be passed" unless data.is_a?(DataSet)
|
58
|
+
raise "Data should not be empty" if data.data_items.length == 0
|
59
|
+
y_mean = data.get_mean_or_mode[data.num_attributes - 1]
|
60
|
+
|
61
|
+
# Choose best attribute
|
62
|
+
min_msq = Float::MAX
|
63
|
+
attribute = nil
|
64
|
+
chosen = -1
|
65
|
+
chosen_slope = 0.0 / 0.0 # Float::NAN
|
66
|
+
chosen_intercept = 0.0 / 0.0 # Float::NAN
|
67
|
+
|
68
|
+
data.data_labels.each do |attr_name|
|
69
|
+
attr_index = data.get_index attr_name
|
70
|
+
if attr_index != data.num_attributes-1
|
71
|
+
# Compute slope and intercept
|
72
|
+
x_mean = data.get_mean_or_mode[attr_index]
|
73
|
+
sum_x_diff_squared = 0
|
74
|
+
sum_y_diff_squared = 0
|
75
|
+
slope = 0
|
76
|
+
data.data_items.map do |instance|
|
77
|
+
x_diff = instance[attr_index] - x_mean
|
78
|
+
y_diff = instance[attr_index] - y_mean
|
79
|
+
slope += x_diff * y_diff
|
80
|
+
sum_x_diff_squared += x_diff * x_diff
|
81
|
+
sum_y_diff_squared += y_diff * y_diff
|
82
|
+
end
|
83
|
+
|
84
|
+
if sum_x_diff_squared == 0
|
85
|
+
next
|
86
|
+
end
|
87
|
+
|
88
|
+
numerator = slope
|
89
|
+
slope /= sum_x_diff_squared
|
90
|
+
intercept = y_mean - slope * x_mean
|
91
|
+
msq = sum_y_diff_squared - slope * numerator
|
92
|
+
|
93
|
+
if msq < min_msq
|
94
|
+
min_msq = msq
|
95
|
+
chosen = attr_index
|
96
|
+
chosen_slope = slope
|
97
|
+
chosen_intercept = intercept
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
if chosen == -1
|
103
|
+
raise "no useful attribute found"
|
104
|
+
@attribute = nil
|
105
|
+
@attribute_index = 0
|
106
|
+
@slope = 0
|
107
|
+
@intercept = y_mean
|
108
|
+
else
|
109
|
+
@attribute = data.data_labels[chosen]
|
110
|
+
@attribute_index = chosen
|
111
|
+
@slope = chosen_slope
|
112
|
+
@intercept = chosen_intercept
|
113
|
+
end
|
114
|
+
return self
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -16,7 +16,7 @@ module Ai4r
|
|
16
16
|
# Implementation of a Hierarchical clusterer with group average
|
17
17
|
# linkage, AKA unweighted pair group method average or UPGMA (Everitt
|
18
18
|
# et al., 2001 ; Jain and Dubes, 1988 ; Sokal and Michener, 1958).
|
19
|
-
# Hierarchical
|
19
|
+
# Hierarchical clusterer create one cluster per element, and then
|
20
20
|
# progressively merge clusters, until the required number of clusters
|
21
21
|
# is reached.
|
22
22
|
# With average linkage, the distance between a clusters cx and
|
@@ -29,8 +29,8 @@ module Ai4r
|
|
29
29
|
parameters_info :distance_function =>
|
30
30
|
"Custom implementation of distance function. " +
|
31
31
|
"It must be a closure receiving two data items and return the " +
|
32
|
-
"distance
|
33
|
-
"
|
32
|
+
"distance between them. By default, this algorithm uses " +
|
33
|
+
"euclidean distance of numeric attributes to the power of 2."
|
34
34
|
|
35
35
|
# Build a new clusterer, using data examples found in data_set.
|
36
36
|
# Items will be clustered in "number_of_clusters" different
|
@@ -28,8 +28,8 @@ module Ai4r
|
|
28
28
|
"build the clusterer. By default it is uncapped.",
|
29
29
|
:distance_function => "Custom implementation of distance function. " +
|
30
30
|
"It must be a closure receiving two data items and return the " +
|
31
|
-
"distance
|
32
|
-
"
|
31
|
+
"distance between them. By default, this algorithm uses " +
|
32
|
+
"euclidean distance of numeric attributes to the power of 2.",
|
33
33
|
:centroid_function => "Custom implementation to calculate the " +
|
34
34
|
"centroid of a cluster. It must be a closure receiving an array of " +
|
35
35
|
"data sets, and return an array of data items, representing the " +
|
@@ -17,7 +17,7 @@ module Ai4r
|
|
17
17
|
# centroid linkage algorithm, aka unweighted pair group method
|
18
18
|
# centroid (UPGMC) (Everitt et al., 2001 ; Jain and Dubes, 1988 ;
|
19
19
|
# Sokal and Michener, 1958 )
|
20
|
-
# Hierarchical
|
20
|
+
# Hierarchical clusterer create one cluster per element, and then
|
21
21
|
# progressively merge clusters, until the required number of clusters
|
22
22
|
# is reached.
|
23
23
|
# The distance between clusters is the squared euclidean distance
|
@@ -32,8 +32,8 @@ module Ai4r
|
|
32
32
|
parameters_info :distance_function =>
|
33
33
|
"Custom implementation of distance function. " +
|
34
34
|
"It must be a closure receiving two data items and return the " +
|
35
|
-
"distance
|
36
|
-
"
|
35
|
+
"distance between them. By default, this algorithm uses " +
|
36
|
+
"euclidean distance of numeric attributes to the power of 2."
|
37
37
|
|
38
38
|
# Build a new clusterer, using data examples found in data_set.
|
39
39
|
# Items will be clustered in "number_of_clusters" different
|
@@ -32,17 +32,6 @@ module Ai4r
|
|
32
32
|
end
|
33
33
|
|
34
34
|
protected
|
35
|
-
# Usefull as a defult distance function for clustering algorithms
|
36
|
-
def euclidean_distance(a, b)
|
37
|
-
dist = 0.0
|
38
|
-
a.each_index do |index|
|
39
|
-
if a[index].is_a?(Numeric) && b[index].is_a?(Numeric)
|
40
|
-
dist = dist + ((a[index]-b[index])*(a[index]-b[index]))
|
41
|
-
end
|
42
|
-
end
|
43
|
-
return dist
|
44
|
-
end
|
45
|
-
|
46
35
|
def get_min_index(array)
|
47
36
|
min = array.first
|
48
37
|
index = 0
|
@@ -15,7 +15,7 @@ module Ai4r
|
|
15
15
|
|
16
16
|
# Implementation of a Hierarchical clusterer with complete linkage (Everitt
|
17
17
|
# et al., 2001 ; Jain and Dubes, 1988 ; Sorensen, 1948 ).
|
18
|
-
# Hierarchical
|
18
|
+
# Hierarchical clusterer create one cluster per element, and then
|
19
19
|
# progressively merge clusters, until the required number of clusters
|
20
20
|
# is reached.
|
21
21
|
# With complete linkage, the distance between two clusters is computed as
|
@@ -27,8 +27,8 @@ module Ai4r
|
|
27
27
|
parameters_info :distance_function =>
|
28
28
|
"Custom implementation of distance function. " +
|
29
29
|
"It must be a closure receiving two data items and return the " +
|
30
|
-
"distance
|
31
|
-
"
|
30
|
+
"distance between them. By default, this algorithm uses " +
|
31
|
+
"euclidean distance of numeric attributes to the power of 2."
|
32
32
|
|
33
33
|
|
34
34
|
# Build a new clusterer, using data examples found in data_set.
|
@@ -25,8 +25,8 @@ module Ai4r
|
|
25
25
|
parameters_info :distance_function =>
|
26
26
|
"Custom implementation of distance function. " +
|
27
27
|
"It must be a closure receiving two data items and return the " +
|
28
|
-
"distance
|
29
|
-
"
|
28
|
+
"distance between them. By default, this algorithm uses " +
|
29
|
+
"euclidean distance of numeric attributes to the power of 2."
|
30
30
|
|
31
31
|
def initialize
|
32
32
|
@distance_function = lambda do |a,b|
|
@@ -8,6 +8,7 @@
|
|
8
8
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
9
|
|
10
10
|
require File.dirname(__FILE__) + '/../data/data_set'
|
11
|
+
require File.dirname(__FILE__) + '/../data/proximity'
|
11
12
|
require File.dirname(__FILE__) + '/../clusterers/clusterer'
|
12
13
|
|
13
14
|
module Ai4r
|
@@ -27,22 +28,31 @@ module Ai4r
|
|
27
28
|
"build the clusterer. By default it is uncapped.",
|
28
29
|
:distance_function => "Custom implementation of distance function. " +
|
29
30
|
"It must be a closure receiving two data items and return the " +
|
30
|
-
"distance
|
31
|
-
"
|
31
|
+
"distance between them. By default, this algorithm uses " +
|
32
|
+
"euclidean distance of numeric attributes to the power of 2.",
|
32
33
|
:centroid_function => "Custom implementation to calculate the " +
|
33
34
|
"centroid of a cluster. It must be a closure receiving an array of " +
|
34
35
|
"data sets, and return an array of data items, representing the " +
|
35
36
|
"centroids of for each data set. " +
|
36
37
|
"By default, this algorithm returns a data items using the mode "+
|
37
|
-
"or mean of each attribute on each data set."
|
38
|
+
"or mean of each attribute on each data set.",
|
39
|
+
:centroid_indices => "Indices of data items (indexed from 0) to be " +
|
40
|
+
"the initial centroids. Otherwise, the initial centroids will be " +
|
41
|
+
"assigned randomly from the data set.",
|
42
|
+
:on_empty => "Action to take if a cluster becomes empty, with values " +
|
43
|
+
"'eliminate' (the default action, eliminate the empty cluster), " +
|
44
|
+
"'terminate' (terminate with error), 'random' (relocate the " +
|
45
|
+
"empty cluster to a random point), 'outlier' (relocate the " +
|
46
|
+
"empty cluster to the point furthest from its centroid)."
|
38
47
|
|
39
48
|
def initialize
|
40
49
|
@distance_function = nil
|
41
50
|
@max_iterations = nil
|
42
|
-
@old_centroids = nil
|
43
51
|
@centroid_function = lambda do |data_sets|
|
44
52
|
data_sets.collect{ |data_set| data_set.get_mean_or_mode}
|
45
53
|
end
|
54
|
+
@centroid_indices = []
|
55
|
+
@on_empty = 'eliminate' # default if none specified
|
46
56
|
end
|
47
57
|
|
48
58
|
|
@@ -52,6 +62,8 @@ module Ai4r
|
|
52
62
|
def build(data_set, number_of_clusters)
|
53
63
|
@data_set = data_set
|
54
64
|
@number_of_clusters = number_of_clusters
|
65
|
+
raise ArgumentError, 'Length of centroid indices array differs from the specified number of clusters' unless @centroid_indices.empty? || @centroid_indices.length == @number_of_clusters
|
66
|
+
raise ArgumentError, 'Invalid value for on_empty' unless @on_empty == 'eliminate' || @on_empty == 'terminate' || @on_empty == 'random' || @on_empty == 'outlier'
|
55
67
|
@iterations = 0
|
56
68
|
|
57
69
|
calc_initial_centroids
|
@@ -73,32 +85,27 @@ module Ai4r
|
|
73
85
|
# This function calculates the distance between 2 different
|
74
86
|
# instances. By default, it returns the euclidean distance to the
|
75
87
|
# power of 2.
|
76
|
-
# You can provide a more
|
88
|
+
# You can provide a more convenient distance implementation:
|
77
89
|
#
|
78
90
|
# 1- Overwriting this method
|
79
91
|
#
|
80
92
|
# 2- Providing a closure to the :distance_function parameter
|
81
93
|
def distance(a, b)
|
82
94
|
return @distance_function.call(a, b) if @distance_function
|
83
|
-
return
|
95
|
+
return Ai4r::Data::Proximity.squared_euclidean_distance(
|
96
|
+
a.select {|att_a| att_a.is_a? Numeric} ,
|
97
|
+
b.select {|att_b| att_b.is_a? Numeric})
|
84
98
|
end
|
85
99
|
|
86
100
|
protected
|
87
101
|
|
88
102
|
def calc_initial_centroids
|
89
|
-
@centroids = []
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
if !tried_indexes.include?(random_index)
|
95
|
-
tried_indexes << random_index
|
96
|
-
if !@centroids.include? @data_set.data_items[random_index]
|
97
|
-
@centroids << @data_set.data_items[random_index]
|
98
|
-
end
|
99
|
-
end
|
103
|
+
@centroids, @old_centroids = [], nil
|
104
|
+
if @centroid_indices.empty?
|
105
|
+
populate_centroids('random')
|
106
|
+
else
|
107
|
+
populate_centroids('indices')
|
100
108
|
end
|
101
|
-
@number_of_clusters = @centroids.length
|
102
109
|
end
|
103
110
|
|
104
111
|
def stop_criteria_met
|
@@ -110,9 +117,14 @@ module Ai4r
|
|
110
117
|
@clusters = Array.new(@number_of_clusters) do
|
111
118
|
Ai4r::Data::DataSet.new :data_labels => @data_set.data_labels
|
112
119
|
end
|
113
|
-
@
|
114
|
-
|
120
|
+
@cluster_indices = Array.new(@number_of_clusters) {[]}
|
121
|
+
|
122
|
+
@data_set.data_items.each_with_index do |data_item, data_index|
|
123
|
+
c = eval(data_item)
|
124
|
+
@clusters[c] << data_item
|
125
|
+
@cluster_indices[c] << data_index if @on_empty == 'outlier'
|
115
126
|
end
|
127
|
+
manage_empty_clusters if has_empty_cluster?
|
116
128
|
end
|
117
129
|
|
118
130
|
def recompute_centroids
|
@@ -120,7 +132,97 @@ module Ai4r
|
|
120
132
|
@iterations += 1
|
121
133
|
@centroids = @centroid_function.call(@clusters)
|
122
134
|
end
|
123
|
-
|
135
|
+
|
136
|
+
def populate_centroids(populate_method, number_of_clusters=@number_of_clusters)
|
137
|
+
tried_indexes = []
|
138
|
+
case populate_method
|
139
|
+
when 'random' # for initial assignment (without the :centroid_indices option) and for reassignment of empty cluster centroids (with :on_empty option 'random')
|
140
|
+
while @centroids.length < number_of_clusters &&
|
141
|
+
tried_indexes.length < @data_set.data_items.length
|
142
|
+
random_index = rand(@data_set.data_items.length)
|
143
|
+
if !tried_indexes.include?(random_index)
|
144
|
+
tried_indexes << random_index
|
145
|
+
if !@centroids.include? @data_set.data_items[random_index]
|
146
|
+
@centroids << @data_set.data_items[random_index]
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
when 'indices' # for initial assignment only (with the :centroid_indices option)
|
151
|
+
@centroid_indices.each do |index|
|
152
|
+
raise ArgumentError, "Invalid centroid index #{index}" unless (index.is_a? Integer) && index >=0 && index < @data_set.data_items.length
|
153
|
+
if !tried_indexes.include?(index)
|
154
|
+
tried_indexes << index
|
155
|
+
if !@centroids.include? @data_set.data_items[index]
|
156
|
+
@centroids << @data_set.data_items[index]
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
when 'outlier' # for reassignment of empty cluster centroids only (with :on_empty option 'outlier')
|
161
|
+
sorted_data_indices = sort_data_indices_by_dist_to_centroid
|
162
|
+
i = sorted_data_indices.length - 1 # the last item is the furthest from its centroid
|
163
|
+
while @centroids.length < number_of_clusters &&
|
164
|
+
tried_indexes.length < @data_set.data_items.length
|
165
|
+
outlier_index = sorted_data_indices[i]
|
166
|
+
if !tried_indexes.include?(outlier_index)
|
167
|
+
tried_indexes << outlier_index
|
168
|
+
if !@centroids.include? @data_set.data_items[outlier_index]
|
169
|
+
@centroids << @data_set.data_items[outlier_index]
|
170
|
+
end
|
171
|
+
end
|
172
|
+
i > 0 ? i -= 1 : break
|
173
|
+
end
|
174
|
+
end
|
175
|
+
@number_of_clusters = @centroids.length
|
176
|
+
end
|
177
|
+
|
178
|
+
# Sort cluster points by distance to assigned centroid. Utilizes @cluster_indices.
|
179
|
+
# Returns indices, sorted in order from the nearest to furthest.
|
180
|
+
def sort_data_indices_by_dist_to_centroid
|
181
|
+
sorted_data_indices = []
|
182
|
+
h = {}
|
183
|
+
@clusters.each_with_index do |cluster, c|
|
184
|
+
centroid = @centroids[c]
|
185
|
+
cluster.data_items.each_with_index do |data_item, i|
|
186
|
+
dist_to_centroid = distance(data_item, centroid)
|
187
|
+
data_index = @cluster_indices[c][i]
|
188
|
+
h[data_index] = dist_to_centroid
|
189
|
+
end
|
190
|
+
end
|
191
|
+
# sort hash of {index => dist to centroid} by dist to centroid (ascending) and then return an array of only the indices
|
192
|
+
sorted_data_indices = h.sort_by{|k,v| v}.collect{|a,b| a}
|
193
|
+
end
|
194
|
+
|
195
|
+
def has_empty_cluster?
|
196
|
+
found_empty = false
|
197
|
+
@number_of_clusters.times do |c|
|
198
|
+
found_empty = true if @clusters[c].data_items.empty?
|
199
|
+
end
|
200
|
+
found_empty
|
201
|
+
end
|
202
|
+
|
203
|
+
def manage_empty_clusters
|
204
|
+
return if self.on_empty == 'terminate' # Do nothing to terminate with error. (The empty cluster will be assigned a nil centroid, and then calculating the distance from this centroid to another point will raise an exception.)
|
205
|
+
|
206
|
+
initial_number_of_clusters = @number_of_clusters
|
207
|
+
eliminate_empty_clusters
|
208
|
+
return if self.on_empty == 'eliminate'
|
209
|
+
populate_centroids(self.on_empty, initial_number_of_clusters) # Add initial_number_of_clusters - @number_of_clusters
|
210
|
+
calculate_membership_clusters
|
211
|
+
end
|
212
|
+
|
213
|
+
def eliminate_empty_clusters
|
214
|
+
old_clusters, old_centroids, old_cluster_indices = @clusters, @centroids, @cluster_indices
|
215
|
+
@clusters, @centroids, @cluster_indices = [], [], []
|
216
|
+
@number_of_clusters.times do |i|
|
217
|
+
if !old_clusters[i].data_items.empty?
|
218
|
+
@clusters << old_clusters[i]
|
219
|
+
@cluster_indices << old_cluster_indices[i]
|
220
|
+
@centroids << old_centroids[i]
|
221
|
+
end
|
222
|
+
end
|
223
|
+
@number_of_clusters = @centroids.length
|
224
|
+
end
|
225
|
+
|
124
226
|
end
|
125
227
|
end
|
126
228
|
end
|
@@ -16,7 +16,7 @@ module Ai4r
|
|
16
16
|
# Implementation of an Agglomerative Hierarchical clusterer with
|
17
17
|
# median linkage algorithm, aka weighted pair group method centroid
|
18
18
|
# or WPGMC (Everitt et al., 2001 ; Gower, 1967 ; Jain and Dubes, 1988 ).
|
19
|
-
# Hierarchical
|
19
|
+
# Hierarchical clusterer create one cluster per element, and then
|
20
20
|
# progressively merge clusters, until the required number of clusters
|
21
21
|
# is reached.
|
22
22
|
# Similar to centroid linkages, but using fix weight:
|
@@ -29,8 +29,8 @@ module Ai4r
|
|
29
29
|
parameters_info :distance_function =>
|
30
30
|
"Custom implementation of distance function. " +
|
31
31
|
"It must be a closure receiving two data items and return the " +
|
32
|
-
"distance
|
33
|
-
"
|
32
|
+
"distance between them. By default, this algorithm uses " +
|
33
|
+
"euclidean distance of numeric attributes to the power of 2."
|
34
34
|
|
35
35
|
# Build a new clusterer, using data examples found in data_set.
|
36
36
|
# Items will be clustered in "number_of_clusters" different
|