ai4r 1.12 → 1.13
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +7 -12
- data/examples/classifiers/simple_linear_regression_example.csv +159 -0
- data/examples/classifiers/simple_linear_regression_example.rb +15 -0
- data/examples/clusterers/clusterer_example.rb +56 -0
- data/examples/neural_network/backpropagation_example.rb +2 -1
- data/lib/ai4r.rb +3 -1
- data/lib/ai4r/classifiers/id3.rb +6 -2
- data/lib/ai4r/classifiers/multilayer_perceptron.rb +1 -1
- data/lib/ai4r/classifiers/naive_bayes.rb +24 -21
- data/lib/ai4r/classifiers/simple_linear_regression.rb +118 -0
- data/lib/ai4r/clusterers/average_linkage.rb +3 -3
- data/lib/ai4r/clusterers/bisecting_k_means.rb +2 -2
- data/lib/ai4r/clusterers/centroid_linkage.rb +3 -3
- data/lib/ai4r/clusterers/clusterer.rb +0 -11
- data/lib/ai4r/clusterers/complete_linkage.rb +3 -3
- data/lib/ai4r/clusterers/diana.rb +2 -2
- data/lib/ai4r/clusterers/k_means.rb +123 -21
- data/lib/ai4r/clusterers/median_linkage.rb +3 -3
- data/lib/ai4r/clusterers/single_linkage.rb +4 -4
- data/lib/ai4r/clusterers/ward_linkage.rb +4 -4
- data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +48 -0
- data/lib/ai4r/clusterers/weighted_average_linkage.rb +3 -3
- data/lib/ai4r/data/data_set.rb +12 -3
- data/lib/ai4r/data/proximity.rb +22 -0
- data/lib/ai4r/neural_network/backpropagation.rb +26 -15
- data/test/classifiers/id3_test.rb +12 -0
- data/test/classifiers/multilayer_perceptron_test.rb +1 -1
- data/test/classifiers/naive_bayes_test.rb +18 -18
- data/test/classifiers/simple_linear_regression_test.rb +37 -0
- data/test/clusterers/k_means_test.rb +75 -8
- data/test/clusterers/ward_linkage_hierarchical_test.rb +81 -0
- data/test/data/data_set_test.rb +8 -0
- data/test/data/proximity_test.rb +7 -1
- metadata +96 -55
@@ -0,0 +1,118 @@
|
|
1
|
+
# Author:: Malav Bhavsar
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/../data/data_set'
|
11
|
+
require File.dirname(__FILE__) + '/classifier'
|
12
|
+
|
13
|
+
module Ai4r
|
14
|
+
module Classifiers
|
15
|
+
|
16
|
+
|
17
|
+
# = Introduction
|
18
|
+
#
|
19
|
+
# This is an implementation of a Simple Linear Regression Classifier.
|
20
|
+
#
|
21
|
+
# For further details regarding Bayes and Naive Bayes Classifier have a look at this link:
|
22
|
+
# http://en.wikipedia.org/wiki/Naive_Bayesian_classification
|
23
|
+
# http://en.wikipedia.org/wiki/Bayes%27_theorem
|
24
|
+
#
|
25
|
+
#
|
26
|
+
# = How to use it
|
27
|
+
#
|
28
|
+
# data = DataSet.new.parse_csv_with_labels "autoPrice.csv"
|
29
|
+
# c = SimpleLinearRegression.new.
|
30
|
+
# build data
|
31
|
+
# c.eval([1,158,105.8,192.7,71.4,55.7,2844,136,3.19,3.4,8.5,110,5500,19,25])
|
32
|
+
#
|
33
|
+
|
34
|
+
class SimpleLinearRegression < Classifier
|
35
|
+
|
36
|
+
attr_reader :attribute, :attribute_index, :slope, :intercept
|
37
|
+
|
38
|
+
def initialize
|
39
|
+
@attribute = nil
|
40
|
+
@attribute_index = 0
|
41
|
+
@slope = 0
|
42
|
+
@intercept = 0
|
43
|
+
end
|
44
|
+
|
45
|
+
# You can evaluate new data, predicting its category.
|
46
|
+
# e.g.
|
47
|
+
# c.eval([1,158,105.8,192.7,71.4,55.7,2844,136,3.19,3.4,8.5,110,5500,19,25])
|
48
|
+
# => 11876.96774193548
|
49
|
+
def eval(data)
|
50
|
+
@intercept + @slope * data[@attribute_index]
|
51
|
+
end
|
52
|
+
|
53
|
+
# Gets the best attribute and does Linear Regression using it to find out the
|
54
|
+
# slope and intercept.
|
55
|
+
# Parameter data has to be an instance of DataSet
|
56
|
+
def build(data)
|
57
|
+
raise "Error instance must be passed" unless data.is_a?(DataSet)
|
58
|
+
raise "Data should not be empty" if data.data_items.length == 0
|
59
|
+
y_mean = data.get_mean_or_mode[data.num_attributes - 1]
|
60
|
+
|
61
|
+
# Choose best attribute
|
62
|
+
min_msq = Float::MAX
|
63
|
+
attribute = nil
|
64
|
+
chosen = -1
|
65
|
+
chosen_slope = 0.0 / 0.0 # Float::NAN
|
66
|
+
chosen_intercept = 0.0 / 0.0 # Float::NAN
|
67
|
+
|
68
|
+
data.data_labels.each do |attr_name|
|
69
|
+
attr_index = data.get_index attr_name
|
70
|
+
if attr_index != data.num_attributes-1
|
71
|
+
# Compute slope and intercept
|
72
|
+
x_mean = data.get_mean_or_mode[attr_index]
|
73
|
+
sum_x_diff_squared = 0
|
74
|
+
sum_y_diff_squared = 0
|
75
|
+
slope = 0
|
76
|
+
data.data_items.map do |instance|
|
77
|
+
x_diff = instance[attr_index] - x_mean
|
78
|
+
y_diff = instance[attr_index] - y_mean
|
79
|
+
slope += x_diff * y_diff
|
80
|
+
sum_x_diff_squared += x_diff * x_diff
|
81
|
+
sum_y_diff_squared += y_diff * y_diff
|
82
|
+
end
|
83
|
+
|
84
|
+
if sum_x_diff_squared == 0
|
85
|
+
next
|
86
|
+
end
|
87
|
+
|
88
|
+
numerator = slope
|
89
|
+
slope /= sum_x_diff_squared
|
90
|
+
intercept = y_mean - slope * x_mean
|
91
|
+
msq = sum_y_diff_squared - slope * numerator
|
92
|
+
|
93
|
+
if msq < min_msq
|
94
|
+
min_msq = msq
|
95
|
+
chosen = attr_index
|
96
|
+
chosen_slope = slope
|
97
|
+
chosen_intercept = intercept
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
if chosen == -1
|
103
|
+
raise "no useful attribute found"
|
104
|
+
@attribute = nil
|
105
|
+
@attribute_index = 0
|
106
|
+
@slope = 0
|
107
|
+
@intercept = y_mean
|
108
|
+
else
|
109
|
+
@attribute = data.data_labels[chosen]
|
110
|
+
@attribute_index = chosen
|
111
|
+
@slope = chosen_slope
|
112
|
+
@intercept = chosen_intercept
|
113
|
+
end
|
114
|
+
return self
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -16,7 +16,7 @@ module Ai4r
|
|
16
16
|
# Implementation of a Hierarchical clusterer with group average
|
17
17
|
# linkage, AKA unweighted pair group method average or UPGMA (Everitt
|
18
18
|
# et al., 2001 ; Jain and Dubes, 1988 ; Sokal and Michener, 1958).
|
19
|
-
# Hierarchical
|
19
|
+
# Hierarchical clusterer create one cluster per element, and then
|
20
20
|
# progressively merge clusters, until the required number of clusters
|
21
21
|
# is reached.
|
22
22
|
# With average linkage, the distance between a clusters cx and
|
@@ -29,8 +29,8 @@ module Ai4r
|
|
29
29
|
parameters_info :distance_function =>
|
30
30
|
"Custom implementation of distance function. " +
|
31
31
|
"It must be a closure receiving two data items and return the " +
|
32
|
-
"distance
|
33
|
-
"
|
32
|
+
"distance between them. By default, this algorithm uses " +
|
33
|
+
"euclidean distance of numeric attributes to the power of 2."
|
34
34
|
|
35
35
|
# Build a new clusterer, using data examples found in data_set.
|
36
36
|
# Items will be clustered in "number_of_clusters" different
|
@@ -28,8 +28,8 @@ module Ai4r
|
|
28
28
|
"build the clusterer. By default it is uncapped.",
|
29
29
|
:distance_function => "Custom implementation of distance function. " +
|
30
30
|
"It must be a closure receiving two data items and return the " +
|
31
|
-
"distance
|
32
|
-
"
|
31
|
+
"distance between them. By default, this algorithm uses " +
|
32
|
+
"euclidean distance of numeric attributes to the power of 2.",
|
33
33
|
:centroid_function => "Custom implementation to calculate the " +
|
34
34
|
"centroid of a cluster. It must be a closure receiving an array of " +
|
35
35
|
"data sets, and return an array of data items, representing the " +
|
@@ -17,7 +17,7 @@ module Ai4r
|
|
17
17
|
# centroid linkage algorithm, aka unweighted pair group method
|
18
18
|
# centroid (UPGMC) (Everitt et al., 2001 ; Jain and Dubes, 1988 ;
|
19
19
|
# Sokal and Michener, 1958 )
|
20
|
-
# Hierarchical
|
20
|
+
# Hierarchical clusterer create one cluster per element, and then
|
21
21
|
# progressively merge clusters, until the required number of clusters
|
22
22
|
# is reached.
|
23
23
|
# The distance between clusters is the squared euclidean distance
|
@@ -32,8 +32,8 @@ module Ai4r
|
|
32
32
|
parameters_info :distance_function =>
|
33
33
|
"Custom implementation of distance function. " +
|
34
34
|
"It must be a closure receiving two data items and return the " +
|
35
|
-
"distance
|
36
|
-
"
|
35
|
+
"distance between them. By default, this algorithm uses " +
|
36
|
+
"euclidean distance of numeric attributes to the power of 2."
|
37
37
|
|
38
38
|
# Build a new clusterer, using data examples found in data_set.
|
39
39
|
# Items will be clustered in "number_of_clusters" different
|
@@ -32,17 +32,6 @@ module Ai4r
|
|
32
32
|
end
|
33
33
|
|
34
34
|
protected
|
35
|
-
# Usefull as a defult distance function for clustering algorithms
|
36
|
-
def euclidean_distance(a, b)
|
37
|
-
dist = 0.0
|
38
|
-
a.each_index do |index|
|
39
|
-
if a[index].is_a?(Numeric) && b[index].is_a?(Numeric)
|
40
|
-
dist = dist + ((a[index]-b[index])*(a[index]-b[index]))
|
41
|
-
end
|
42
|
-
end
|
43
|
-
return dist
|
44
|
-
end
|
45
|
-
|
46
35
|
def get_min_index(array)
|
47
36
|
min = array.first
|
48
37
|
index = 0
|
@@ -15,7 +15,7 @@ module Ai4r
|
|
15
15
|
|
16
16
|
# Implementation of a Hierarchical clusterer with complete linkage (Everitt
|
17
17
|
# et al., 2001 ; Jain and Dubes, 1988 ; Sorensen, 1948 ).
|
18
|
-
# Hierarchical
|
18
|
+
# Hierarchical clusterer create one cluster per element, and then
|
19
19
|
# progressively merge clusters, until the required number of clusters
|
20
20
|
# is reached.
|
21
21
|
# With complete linkage, the distance between two clusters is computed as
|
@@ -27,8 +27,8 @@ module Ai4r
|
|
27
27
|
parameters_info :distance_function =>
|
28
28
|
"Custom implementation of distance function. " +
|
29
29
|
"It must be a closure receiving two data items and return the " +
|
30
|
-
"distance
|
31
|
-
"
|
30
|
+
"distance between them. By default, this algorithm uses " +
|
31
|
+
"euclidean distance of numeric attributes to the power of 2."
|
32
32
|
|
33
33
|
|
34
34
|
# Build a new clusterer, using data examples found in data_set.
|
@@ -25,8 +25,8 @@ module Ai4r
|
|
25
25
|
parameters_info :distance_function =>
|
26
26
|
"Custom implementation of distance function. " +
|
27
27
|
"It must be a closure receiving two data items and return the " +
|
28
|
-
"distance
|
29
|
-
"
|
28
|
+
"distance between them. By default, this algorithm uses " +
|
29
|
+
"euclidean distance of numeric attributes to the power of 2."
|
30
30
|
|
31
31
|
def initialize
|
32
32
|
@distance_function = lambda do |a,b|
|
@@ -8,6 +8,7 @@
|
|
8
8
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
9
|
|
10
10
|
require File.dirname(__FILE__) + '/../data/data_set'
|
11
|
+
require File.dirname(__FILE__) + '/../data/proximity'
|
11
12
|
require File.dirname(__FILE__) + '/../clusterers/clusterer'
|
12
13
|
|
13
14
|
module Ai4r
|
@@ -27,22 +28,31 @@ module Ai4r
|
|
27
28
|
"build the clusterer. By default it is uncapped.",
|
28
29
|
:distance_function => "Custom implementation of distance function. " +
|
29
30
|
"It must be a closure receiving two data items and return the " +
|
30
|
-
"distance
|
31
|
-
"
|
31
|
+
"distance between them. By default, this algorithm uses " +
|
32
|
+
"euclidean distance of numeric attributes to the power of 2.",
|
32
33
|
:centroid_function => "Custom implementation to calculate the " +
|
33
34
|
"centroid of a cluster. It must be a closure receiving an array of " +
|
34
35
|
"data sets, and return an array of data items, representing the " +
|
35
36
|
"centroids of for each data set. " +
|
36
37
|
"By default, this algorithm returns a data items using the mode "+
|
37
|
-
"or mean of each attribute on each data set."
|
38
|
+
"or mean of each attribute on each data set.",
|
39
|
+
:centroid_indices => "Indices of data items (indexed from 0) to be " +
|
40
|
+
"the initial centroids. Otherwise, the initial centroids will be " +
|
41
|
+
"assigned randomly from the data set.",
|
42
|
+
:on_empty => "Action to take if a cluster becomes empty, with values " +
|
43
|
+
"'eliminate' (the default action, eliminate the empty cluster), " +
|
44
|
+
"'terminate' (terminate with error), 'random' (relocate the " +
|
45
|
+
"empty cluster to a random point), 'outlier' (relocate the " +
|
46
|
+
"empty cluster to the point furthest from its centroid)."
|
38
47
|
|
39
48
|
def initialize
|
40
49
|
@distance_function = nil
|
41
50
|
@max_iterations = nil
|
42
|
-
@old_centroids = nil
|
43
51
|
@centroid_function = lambda do |data_sets|
|
44
52
|
data_sets.collect{ |data_set| data_set.get_mean_or_mode}
|
45
53
|
end
|
54
|
+
@centroid_indices = []
|
55
|
+
@on_empty = 'eliminate' # default if none specified
|
46
56
|
end
|
47
57
|
|
48
58
|
|
@@ -52,6 +62,8 @@ module Ai4r
|
|
52
62
|
def build(data_set, number_of_clusters)
|
53
63
|
@data_set = data_set
|
54
64
|
@number_of_clusters = number_of_clusters
|
65
|
+
raise ArgumentError, 'Length of centroid indices array differs from the specified number of clusters' unless @centroid_indices.empty? || @centroid_indices.length == @number_of_clusters
|
66
|
+
raise ArgumentError, 'Invalid value for on_empty' unless @on_empty == 'eliminate' || @on_empty == 'terminate' || @on_empty == 'random' || @on_empty == 'outlier'
|
55
67
|
@iterations = 0
|
56
68
|
|
57
69
|
calc_initial_centroids
|
@@ -73,32 +85,27 @@ module Ai4r
|
|
73
85
|
# This function calculates the distance between 2 different
|
74
86
|
# instances. By default, it returns the euclidean distance to the
|
75
87
|
# power of 2.
|
76
|
-
# You can provide a more
|
88
|
+
# You can provide a more convenient distance implementation:
|
77
89
|
#
|
78
90
|
# 1- Overwriting this method
|
79
91
|
#
|
80
92
|
# 2- Providing a closure to the :distance_function parameter
|
81
93
|
def distance(a, b)
|
82
94
|
return @distance_function.call(a, b) if @distance_function
|
83
|
-
return
|
95
|
+
return Ai4r::Data::Proximity.squared_euclidean_distance(
|
96
|
+
a.select {|att_a| att_a.is_a? Numeric} ,
|
97
|
+
b.select {|att_b| att_b.is_a? Numeric})
|
84
98
|
end
|
85
99
|
|
86
100
|
protected
|
87
101
|
|
88
102
|
def calc_initial_centroids
|
89
|
-
@centroids = []
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
if !tried_indexes.include?(random_index)
|
95
|
-
tried_indexes << random_index
|
96
|
-
if !@centroids.include? @data_set.data_items[random_index]
|
97
|
-
@centroids << @data_set.data_items[random_index]
|
98
|
-
end
|
99
|
-
end
|
103
|
+
@centroids, @old_centroids = [], nil
|
104
|
+
if @centroid_indices.empty?
|
105
|
+
populate_centroids('random')
|
106
|
+
else
|
107
|
+
populate_centroids('indices')
|
100
108
|
end
|
101
|
-
@number_of_clusters = @centroids.length
|
102
109
|
end
|
103
110
|
|
104
111
|
def stop_criteria_met
|
@@ -110,9 +117,14 @@ module Ai4r
|
|
110
117
|
@clusters = Array.new(@number_of_clusters) do
|
111
118
|
Ai4r::Data::DataSet.new :data_labels => @data_set.data_labels
|
112
119
|
end
|
113
|
-
@
|
114
|
-
|
120
|
+
@cluster_indices = Array.new(@number_of_clusters) {[]}
|
121
|
+
|
122
|
+
@data_set.data_items.each_with_index do |data_item, data_index|
|
123
|
+
c = eval(data_item)
|
124
|
+
@clusters[c] << data_item
|
125
|
+
@cluster_indices[c] << data_index if @on_empty == 'outlier'
|
115
126
|
end
|
127
|
+
manage_empty_clusters if has_empty_cluster?
|
116
128
|
end
|
117
129
|
|
118
130
|
def recompute_centroids
|
@@ -120,7 +132,97 @@ module Ai4r
|
|
120
132
|
@iterations += 1
|
121
133
|
@centroids = @centroid_function.call(@clusters)
|
122
134
|
end
|
123
|
-
|
135
|
+
|
136
|
+
def populate_centroids(populate_method, number_of_clusters=@number_of_clusters)
|
137
|
+
tried_indexes = []
|
138
|
+
case populate_method
|
139
|
+
when 'random' # for initial assignment (without the :centroid_indices option) and for reassignment of empty cluster centroids (with :on_empty option 'random')
|
140
|
+
while @centroids.length < number_of_clusters &&
|
141
|
+
tried_indexes.length < @data_set.data_items.length
|
142
|
+
random_index = rand(@data_set.data_items.length)
|
143
|
+
if !tried_indexes.include?(random_index)
|
144
|
+
tried_indexes << random_index
|
145
|
+
if !@centroids.include? @data_set.data_items[random_index]
|
146
|
+
@centroids << @data_set.data_items[random_index]
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
when 'indices' # for initial assignment only (with the :centroid_indices option)
|
151
|
+
@centroid_indices.each do |index|
|
152
|
+
raise ArgumentError, "Invalid centroid index #{index}" unless (index.is_a? Integer) && index >=0 && index < @data_set.data_items.length
|
153
|
+
if !tried_indexes.include?(index)
|
154
|
+
tried_indexes << index
|
155
|
+
if !@centroids.include? @data_set.data_items[index]
|
156
|
+
@centroids << @data_set.data_items[index]
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
when 'outlier' # for reassignment of empty cluster centroids only (with :on_empty option 'outlier')
|
161
|
+
sorted_data_indices = sort_data_indices_by_dist_to_centroid
|
162
|
+
i = sorted_data_indices.length - 1 # the last item is the furthest from its centroid
|
163
|
+
while @centroids.length < number_of_clusters &&
|
164
|
+
tried_indexes.length < @data_set.data_items.length
|
165
|
+
outlier_index = sorted_data_indices[i]
|
166
|
+
if !tried_indexes.include?(outlier_index)
|
167
|
+
tried_indexes << outlier_index
|
168
|
+
if !@centroids.include? @data_set.data_items[outlier_index]
|
169
|
+
@centroids << @data_set.data_items[outlier_index]
|
170
|
+
end
|
171
|
+
end
|
172
|
+
i > 0 ? i -= 1 : break
|
173
|
+
end
|
174
|
+
end
|
175
|
+
@number_of_clusters = @centroids.length
|
176
|
+
end
|
177
|
+
|
178
|
+
# Sort cluster points by distance to assigned centroid. Utilizes @cluster_indices.
|
179
|
+
# Returns indices, sorted in order from the nearest to furthest.
|
180
|
+
def sort_data_indices_by_dist_to_centroid
|
181
|
+
sorted_data_indices = []
|
182
|
+
h = {}
|
183
|
+
@clusters.each_with_index do |cluster, c|
|
184
|
+
centroid = @centroids[c]
|
185
|
+
cluster.data_items.each_with_index do |data_item, i|
|
186
|
+
dist_to_centroid = distance(data_item, centroid)
|
187
|
+
data_index = @cluster_indices[c][i]
|
188
|
+
h[data_index] = dist_to_centroid
|
189
|
+
end
|
190
|
+
end
|
191
|
+
# sort hash of {index => dist to centroid} by dist to centroid (ascending) and then return an array of only the indices
|
192
|
+
sorted_data_indices = h.sort_by{|k,v| v}.collect{|a,b| a}
|
193
|
+
end
|
194
|
+
|
195
|
+
def has_empty_cluster?
|
196
|
+
found_empty = false
|
197
|
+
@number_of_clusters.times do |c|
|
198
|
+
found_empty = true if @clusters[c].data_items.empty?
|
199
|
+
end
|
200
|
+
found_empty
|
201
|
+
end
|
202
|
+
|
203
|
+
def manage_empty_clusters
|
204
|
+
return if self.on_empty == 'terminate' # Do nothing to terminate with error. (The empty cluster will be assigned a nil centroid, and then calculating the distance from this centroid to another point will raise an exception.)
|
205
|
+
|
206
|
+
initial_number_of_clusters = @number_of_clusters
|
207
|
+
eliminate_empty_clusters
|
208
|
+
return if self.on_empty == 'eliminate'
|
209
|
+
populate_centroids(self.on_empty, initial_number_of_clusters) # Add initial_number_of_clusters - @number_of_clusters
|
210
|
+
calculate_membership_clusters
|
211
|
+
end
|
212
|
+
|
213
|
+
def eliminate_empty_clusters
|
214
|
+
old_clusters, old_centroids, old_cluster_indices = @clusters, @centroids, @cluster_indices
|
215
|
+
@clusters, @centroids, @cluster_indices = [], [], []
|
216
|
+
@number_of_clusters.times do |i|
|
217
|
+
if !old_clusters[i].data_items.empty?
|
218
|
+
@clusters << old_clusters[i]
|
219
|
+
@cluster_indices << old_cluster_indices[i]
|
220
|
+
@centroids << old_centroids[i]
|
221
|
+
end
|
222
|
+
end
|
223
|
+
@number_of_clusters = @centroids.length
|
224
|
+
end
|
225
|
+
|
124
226
|
end
|
125
227
|
end
|
126
228
|
end
|
@@ -16,7 +16,7 @@ module Ai4r
|
|
16
16
|
# Implementation of an Agglomerative Hierarchical clusterer with
|
17
17
|
# median linkage algorithm, aka weighted pair group method centroid
|
18
18
|
# or WPGMC (Everitt et al., 2001 ; Gower, 1967 ; Jain and Dubes, 1988 ).
|
19
|
-
# Hierarchical
|
19
|
+
# Hierarchical clusterer create one cluster per element, and then
|
20
20
|
# progressively merge clusters, until the required number of clusters
|
21
21
|
# is reached.
|
22
22
|
# Similar to centroid linkages, but using fix weight:
|
@@ -29,8 +29,8 @@ module Ai4r
|
|
29
29
|
parameters_info :distance_function =>
|
30
30
|
"Custom implementation of distance function. " +
|
31
31
|
"It must be a closure receiving two data items and return the " +
|
32
|
-
"distance
|
33
|
-
"
|
32
|
+
"distance between them. By default, this algorithm uses " +
|
33
|
+
"euclidean distance of numeric attributes to the power of 2."
|
34
34
|
|
35
35
|
# Build a new clusterer, using data examples found in data_set.
|
36
36
|
# Items will be clustered in "number_of_clusters" different
|