ai4r 1.4 → 1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -3
- data/examples/decision_trees/id3_example.rb +1 -1
- data/examples/genetic_algorithm/genetic_algorithm_example.rb +1 -1
- data/lib/ai4r.rb +11 -0
- data/lib/ai4r/classifiers/classifier.rb +2 -0
- data/lib/ai4r/classifiers/id3.rb +3 -2
- data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
- data/lib/ai4r/classifiers/one_r.rb +2 -1
- data/lib/ai4r/classifiers/prism.rb +2 -1
- data/lib/ai4r/classifiers/zero_r.rb +2 -1
- data/lib/ai4r/clusterers/average_linkage.rb +60 -0
- data/lib/ai4r/clusterers/bisecting_k_means.rb +17 -39
- data/lib/ai4r/clusterers/clusterer.rb +25 -0
- data/lib/ai4r/clusterers/complete_linkage.rb +62 -0
- data/lib/ai4r/clusterers/k_means.rb +18 -25
- data/lib/ai4r/clusterers/single_linkage.rb +179 -0
- data/lib/ai4r/data/data_set.rb +33 -41
- data/lib/ai4r/data/proximity.rb +82 -0
- data/lib/ai4r/data/statistics.rb +77 -0
- data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +2 -4
- data/site/build/site/en/build/tmp/build-info.xml +5 -0
- data/site/build/site/en/build/tmp/plugins-1.xml +212 -0
- data/site/build/site/en/build/tmp/plugins-2.xml +252 -0
- data/site/build/site/en/build/tmp/projfilters.properties +41 -0
- data/site/build/site/en/downloads.html +1 -1
- data/site/build/site/en/geneticAlgorithms.html +1 -1
- data/site/build/site/en/index.html +44 -7
- data/site/build/site/en/index.pdf +278 -155
- data/site/build/site/en/linkmap.html +2 -2
- data/site/build/site/en/linkmap.pdf +12 -12
- data/site/build/site/en/machineLearning.html +1 -1
- data/site/build/site/en/neuralNetworks.html +1 -1
- data/site/build/site/en/sourceCode.html +244 -0
- data/site/build/site/en/sourceCode.pdf +278 -0
- data/site/build/site/en/svn.html +34 -42
- data/site/build/site/en/svn.pdf +86 -114
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
- data/site/build/tmp/projfilters.properties +1 -1
- data/site/build/webapp/WEB-INF/logs/core.log +628 -629
- data/site/build/webapp/WEB-INF/logs/error.log +213 -213
- data/site/src/documentation/content/xdocs/index.xml +20 -1
- data/site/src/documentation/content/xdocs/site.xml +1 -1
- data/site/src/documentation/content/xdocs/sourceCode.xml +43 -0
- data/site/src/documentation/resources/images/sigmoid.png +0 -0
- data/test/classifiers/id3_test.rb +0 -1
- data/test/classifiers/multilayer_perceptron_test.rb +79 -0
- data/test/classifiers/one_r_test.rb +0 -2
- data/test/classifiers/prism_test.rb +0 -2
- data/test/classifiers/zero_r_test.rb +0 -2
- data/test/clusterers/average_linkage_test.rb +45 -0
- data/test/clusterers/bisecting_k_means_test.rb +0 -2
- data/test/clusterers/complete_linkage_test.rb +45 -0
- data/test/clusterers/k_means_test.rb +0 -2
- data/test/clusterers/single_linkage_test.rb +113 -0
- data/test/data/data_set_test.rb +3 -15
- data/test/data/proximity_test.rb +71 -0
- data/test/data/statistics_test.rb +65 -0
- data/test/experiment/classifier_evaluator_test.rb +76 -0
- metadata +27 -6
- data/site/src/documentation/content/xdocs/svn.xml +0 -41
@@ -7,7 +7,6 @@
|
|
7
7
|
# the Mozilla Public License version 1.1 as published by the
|
8
8
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
9
|
|
10
|
-
require "set"
|
11
10
|
require File.dirname(__FILE__) + '/../data/data_set'
|
12
11
|
require File.dirname(__FILE__) + '/../clusterers/clusterer'
|
13
12
|
|
@@ -29,7 +28,23 @@ module Ai4r
|
|
29
28
|
:distance_function => "Custom implementation of distance function. " +
|
30
29
|
"It must be a closure receiving two data items and return the " +
|
31
30
|
"distance bewteen them. By default, this algorithm uses " +
|
32
|
-
"ecuclidean distance of numeric attributes to the power of 2."
|
31
|
+
"ecuclidean distance of numeric attributes to the power of 2.",
|
32
|
+
:centroid_function => "Custom implementation to calculate the " +
|
33
|
+
"centroid of a cluster. It must be a closure receiving an array of " +
|
34
|
+
"data sets, and return an array of data items, representing the " +
|
35
|
+
"centroids of for each data set. " +
|
36
|
+
"By default, this algorithm returns a data items using the mode "+
|
37
|
+
"or mean of each attribute on each data set."
|
38
|
+
|
39
|
+
def initialize
|
40
|
+
@distance_function = nil
|
41
|
+
@max_iterations = nil
|
42
|
+
@old_centroids = nil
|
43
|
+
@centroid_function = lambda do |data_sets|
|
44
|
+
data_sets.collect{ |data_set| data_set.get_mean_or_mode}
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
33
48
|
|
34
49
|
# Build a new clusterer, using data examples found in data_set.
|
35
50
|
# Items will be clustered in "number_of_clusters" different
|
@@ -69,15 +84,6 @@ module Ai4r
|
|
69
84
|
end
|
70
85
|
|
71
86
|
protected
|
72
|
-
def euclidean_distance(a, b)
|
73
|
-
dist = 0.0
|
74
|
-
a.each_index do |index|
|
75
|
-
if a[index].is_a?(Numeric) && b[index].is_a?(Numeric)
|
76
|
-
dist = dist + ((a[index]-b[index])*(a[index]-b[index]))
|
77
|
-
end
|
78
|
-
end
|
79
|
-
return dist
|
80
|
-
end
|
81
87
|
|
82
88
|
def calc_initial_centroids
|
83
89
|
@centroids = []
|
@@ -111,21 +117,8 @@ module Ai4r
|
|
111
117
|
|
112
118
|
def recompute_centroids
|
113
119
|
@old_centroids = @centroids
|
114
|
-
@centroids = @clusters.collect { |cluster| cluster.get_mean_or_mode }
|
115
120
|
@iterations += 1
|
116
|
-
|
117
|
-
|
118
|
-
def get_min_index(array)
|
119
|
-
min = array.first
|
120
|
-
index = 0
|
121
|
-
array.each_index do |i|
|
122
|
-
x = array[i]
|
123
|
-
if x < min
|
124
|
-
min = x
|
125
|
-
index = i
|
126
|
-
end
|
127
|
-
end
|
128
|
-
return index
|
121
|
+
@centroids = @centroid_function.call(@clusters)
|
129
122
|
end
|
130
123
|
|
131
124
|
end
|
@@ -0,0 +1,179 @@
|
|
1
|
+
# Author:: Sergio Fierens (implementation)
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/../data/data_set'
|
11
|
+
require File.dirname(__FILE__) + '/../clusterers/clusterer'
|
12
|
+
|
13
|
+
module Ai4r
|
14
|
+
module Clusterers
|
15
|
+
|
16
|
+
# Implementation of a Hierarchical clusterer with single linkage.
|
17
|
+
# Hierarchical clusteres create one cluster per element, and then
|
18
|
+
# progressively merge clusters, until the required number of clusters
|
19
|
+
# is reached.
|
20
|
+
# With single linkage, the distance between two clusters is computed as the
|
21
|
+
# distance between the two closest elements in the two clusters.
|
22
|
+
class SingleLinkage < Clusterer
|
23
|
+
|
24
|
+
attr_reader :data_set, :number_of_clusters, :clusters
|
25
|
+
|
26
|
+
parameters_info :distance_function =>
|
27
|
+
"Custom implementation of distance function. " +
|
28
|
+
"It must be a closure receiving two data items and return the " +
|
29
|
+
"distance bewteen them. By default, this algorithm uses " +
|
30
|
+
"ecuclidean distance of numeric attributes to the power of 2."
|
31
|
+
|
32
|
+
def initialize
|
33
|
+
@distance_function = nil
|
34
|
+
end
|
35
|
+
|
36
|
+
# Build a new clusterer, using data examples found in data_set.
|
37
|
+
# Items will be clustered in "number_of_clusters" different
|
38
|
+
# clusters.
|
39
|
+
def build(data_set, number_of_clusters)
|
40
|
+
@data_set = data_set
|
41
|
+
@number_of_clusters = number_of_clusters
|
42
|
+
|
43
|
+
index_clusters = create_initial_index_clusters
|
44
|
+
create_distance_matrix(data_set)
|
45
|
+
while index_clusters.length > @number_of_clusters
|
46
|
+
clusters_to_merge = get_closest_clusters(index_clusters)
|
47
|
+
index_clusters = merge_clusters(clusters_to_merge, index_clusters)
|
48
|
+
end
|
49
|
+
@clusters = build_clusters_from_index_clusters index_clusters
|
50
|
+
|
51
|
+
return self
|
52
|
+
end
|
53
|
+
|
54
|
+
# Classifies the given data item, returning the cluster index it belongs
|
55
|
+
# to (0-based).
|
56
|
+
def eval(data_item)
|
57
|
+
get_min_index(@clusters.collect {|cluster|
|
58
|
+
distance_between_item_and_cluster(data_item, cluster)})
|
59
|
+
end
|
60
|
+
|
61
|
+
# This function calculates the distance between 2 different
|
62
|
+
# instances. By default, it returns the euclidean distance to the
|
63
|
+
# power of 2.
|
64
|
+
# You can provide a more convinient distance implementation:
|
65
|
+
#
|
66
|
+
# 1- Overwriting this method
|
67
|
+
#
|
68
|
+
# 2- Providing a closure to the :distance_function parameter
|
69
|
+
def distance(a, b)
|
70
|
+
return @distance_function.call(a, b) if @distance_function
|
71
|
+
return euclidean_distance(a, b)
|
72
|
+
end
|
73
|
+
|
74
|
+
protected
|
75
|
+
|
76
|
+
# returns [ [0], [1], [2], ... , [n-1] ]
|
77
|
+
# where n is the number of data items in the data set
|
78
|
+
def create_initial_index_clusters
|
79
|
+
index_clusters = []
|
80
|
+
@data_set.data_items.length.times {|i| index_clusters << [i]}
|
81
|
+
return index_clusters
|
82
|
+
end
|
83
|
+
|
84
|
+
# Create a partial distance matrix:
|
85
|
+
# [
|
86
|
+
# [d(1,0)],
|
87
|
+
# [d(2,0)], [d(2,1)],
|
88
|
+
# [d(3,0)], [d(3,1)], [d(3,2)],
|
89
|
+
# ...
|
90
|
+
# [d(n-1,0)], [d(n-1,1)], [d(n-1,2)], ... , [d(n-1,n-2)]
|
91
|
+
# ]
|
92
|
+
# where n is the number of data items in the data set
|
93
|
+
def create_distance_matrix(data_set)
|
94
|
+
@distance_matrix = Array.new(data_set.data_items.length-1) {|index| Array.new(index+1)}
|
95
|
+
data_set.data_items.each_with_index do |a, i|
|
96
|
+
i.times do |j|
|
97
|
+
b = data_set.data_items[j]
|
98
|
+
@distance_matrix[i-1][j] = distance(a, b)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Returns the distance between element data_item[index_a] and
|
104
|
+
# data_item[index_b] using the distance matrix
|
105
|
+
def read_distance_matrix(index_a, index_b)
|
106
|
+
return 0 if index_a == index_b
|
107
|
+
index_a, index_b = index_b, index_a if index_b > index_a
|
108
|
+
return @distance_matrix[index_a-1][index_b]
|
109
|
+
end
|
110
|
+
|
111
|
+
# clusters_to_merge = [index_cluster_a, index_cluster_b].
|
112
|
+
# cluster_a and cluster_b are removed from index_cluster,
|
113
|
+
# and a new cluster with all members of cluster_a and cluster_b
|
114
|
+
# is added.
|
115
|
+
# It returns the new clusters array.
|
116
|
+
def merge_clusters(clusters_to_merge, index_clusters)
|
117
|
+
index_a = clusters_to_merge.first
|
118
|
+
index_b = clusters_to_merge.last
|
119
|
+
index_a, index_b = index_b, index_a if index_b > index_a
|
120
|
+
new_index_cluster = index_clusters[index_a] +
|
121
|
+
index_clusters[index_b]
|
122
|
+
index_clusters.delete_at index_a
|
123
|
+
index_clusters.delete_at index_b
|
124
|
+
index_clusters << new_index_cluster
|
125
|
+
return index_clusters
|
126
|
+
end
|
127
|
+
|
128
|
+
# Given an array with clusters of data_items indexes,
|
129
|
+
# it returns an array of data_items clusters
|
130
|
+
def build_clusters_from_index_clusters(index_clusters)
|
131
|
+
@distance_matrix = nil
|
132
|
+
return index_clusters.collect do |index_cluster|
|
133
|
+
Ai4r::Data::DataSet.new(:data_labels => @data_set.data_labels,
|
134
|
+
:data_items => index_cluster.collect {|i| @data_set.data_items[i]})
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# Returns ans array with the indexes of the two closest
|
139
|
+
# clusters => [index_cluster_a, index_cluster_b]
|
140
|
+
def get_closest_clusters(index_clusters)
|
141
|
+
min_distance = 1.0/0
|
142
|
+
closest_clusters = [1, 0]
|
143
|
+
index_clusters.each_with_index do |cluster_a, index_a|
|
144
|
+
index_a.times do |index_b|
|
145
|
+
cluster_b = index_clusters[index_b]
|
146
|
+
cluster_distance = calc_index_clusters_distance(cluster_a, cluster_b)
|
147
|
+
if cluster_distance < min_distance
|
148
|
+
closest_clusters = [index_a, index_b]
|
149
|
+
min_distance = cluster_distance
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
return closest_clusters
|
154
|
+
end
|
155
|
+
|
156
|
+
# Calculate cluster distance using the single linkage method
|
157
|
+
def calc_index_clusters_distance(cluster_a, cluster_b)
|
158
|
+
min_dist = 1.0/0
|
159
|
+
cluster_a.each do |index_a|
|
160
|
+
cluster_b.each do |index_b|
|
161
|
+
dist = read_distance_matrix(index_a, index_b)
|
162
|
+
min_dist = dist if dist < min_dist
|
163
|
+
end
|
164
|
+
end
|
165
|
+
return min_dist
|
166
|
+
end
|
167
|
+
|
168
|
+
def distance_between_item_and_cluster(data_item, cluster)
|
169
|
+
min_dist = 1.0/0
|
170
|
+
cluster.data_items.each do |another_item|
|
171
|
+
dist = distance(data_item, another_item)
|
172
|
+
min_dist = dist if dist < min_dist
|
173
|
+
end
|
174
|
+
return min_dist
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
data/lib/ai4r/data/data_set.rb
CHANGED
@@ -9,11 +9,19 @@
|
|
9
9
|
|
10
10
|
require 'csv'
|
11
11
|
require 'set'
|
12
|
+
require File.dirname(__FILE__) + '/statistics'
|
12
13
|
|
13
14
|
module Ai4r
|
14
15
|
module Data
|
16
|
+
|
17
|
+
# A data set is a collection of N data items. Each data item is
|
18
|
+
# described by a set of attributes, represented as an array.
|
19
|
+
# Optionally, you can assign a label to the attributes, using
|
20
|
+
# the data_labels property.
|
15
21
|
class DataSet
|
16
22
|
|
23
|
+
@@number_regex = /(((\b[0-9]+)?\.)?\b[0-9]+([eE][-+]?[0-9]+)?\b)/
|
24
|
+
|
17
25
|
attr_reader :data_labels, :data_items
|
18
26
|
|
19
27
|
# Create a new DataSet. By default, empty.
|
@@ -24,7 +32,7 @@ module Ai4r
|
|
24
32
|
# If you provide data items, but no data labels, the data set will
|
25
33
|
# use the default data label values (see set_data_labels)
|
26
34
|
def initialize(options = {})
|
27
|
-
@data_labels =
|
35
|
+
@data_labels = []
|
28
36
|
@data_items = options[:data_items] || []
|
29
37
|
set_data_labels(options[:data_labels]) if options[:data_labels]
|
30
38
|
set_data_items(options[:data_items]) if options[:data_items]
|
@@ -38,7 +46,7 @@ module Ai4r
|
|
38
46
|
end
|
39
47
|
|
40
48
|
# Load data items from csv file
|
41
|
-
def
|
49
|
+
def load_csv(filepath)
|
42
50
|
items = []
|
43
51
|
CSV::Reader.parse(File.open(filepath, 'r')) do |row|
|
44
52
|
items << row
|
@@ -47,12 +55,21 @@ module Ai4r
|
|
47
55
|
end
|
48
56
|
|
49
57
|
# Load data items from csv file. The first row is used as data labels.
|
50
|
-
def
|
51
|
-
|
58
|
+
def load_csv_with_labels(filepath)
|
59
|
+
load_csv(filepath)
|
52
60
|
@data_labels = @data_items.shift
|
53
61
|
return self
|
54
62
|
end
|
55
63
|
|
64
|
+
# Same as load_csv, but it will try to convert cell contents as numbers.
|
65
|
+
def parse_csv(filepath)
|
66
|
+
items = []
|
67
|
+
CSV::Reader.parse(File.open(filepath, 'r')) do |row|
|
68
|
+
items << row.collect{|x| (x.match(@@number_regex)) ? x.to_f : x.data }
|
69
|
+
end
|
70
|
+
set_data_items(items)
|
71
|
+
end
|
72
|
+
|
56
73
|
# Set data labels.
|
57
74
|
# Data labels must have the following format:
|
58
75
|
# [ 'city', 'age_range', 'gender', 'marketing_target' ]
|
@@ -144,7 +161,7 @@ module Ai4r
|
|
144
161
|
# get_index("gender")
|
145
162
|
# => 2
|
146
163
|
def get_index(attr)
|
147
|
-
return (attr.is_a?(
|
164
|
+
return (attr.is_a?(Fixnum) || attr.is_a?(Range)) ? attr : @data_labels.index(attr)
|
148
165
|
end
|
149
166
|
|
150
167
|
# Raise an exception if there is no data item.
|
@@ -168,44 +185,19 @@ module Ai4r
|
|
168
185
|
@data_items << data_item
|
169
186
|
end
|
170
187
|
end
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
mean = 0.0
|
175
|
-
@data_items.each { |data_item| mean += data_item[index] }
|
176
|
-
mean /= @data_items.length
|
177
|
-
return mean
|
178
|
-
end
|
179
|
-
|
180
|
-
def get_attribute_mode(attribute)
|
181
|
-
index = get_index(attribute)
|
182
|
-
domain = build_domain(attribute)
|
183
|
-
count = {}
|
184
|
-
domain.each {|value| count[value]=0}
|
185
|
-
@data_items.each { |data_item| count[data_item[index]] += 1 }
|
186
|
-
max_count = 0
|
187
|
-
mode = nil
|
188
|
-
count.each_pair do |value, value_count|
|
189
|
-
if value_count > max_count
|
190
|
-
mode = value
|
191
|
-
max_count = value_count
|
192
|
-
end
|
193
|
-
end
|
194
|
-
return mode
|
195
|
-
end
|
196
|
-
|
197
|
-
def get_attribute_mean_or_mode(attribute)
|
198
|
-
index = get_index(attribute)
|
199
|
-
if @data_items.first[index].is_a?(Numeric)
|
200
|
-
return get_attribute_mean(attribute)
|
201
|
-
else
|
202
|
-
return get_attribute_mode(attribute)
|
203
|
-
end
|
204
|
-
end
|
205
|
-
|
188
|
+
|
189
|
+
# Returns an array with the mean value of numeric attributes, and
|
190
|
+
# the most frequent value of non numeric attributes
|
206
191
|
def get_mean_or_mode
|
207
192
|
mean = []
|
208
|
-
num_attributes.times
|
193
|
+
num_attributes.times do |i|
|
194
|
+
mean[i] =
|
195
|
+
if @data_items.first[i].is_a?(Numeric)
|
196
|
+
Statistics.mean(self, i)
|
197
|
+
else
|
198
|
+
Statistics.mode(self, i)
|
199
|
+
end
|
200
|
+
end
|
209
201
|
return mean
|
210
202
|
end
|
211
203
|
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# Author:: Sergio Fierens
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
module Ai4r
|
11
|
+
module Data
|
12
|
+
|
13
|
+
# This module provides classical distance functions
|
14
|
+
module Proximity
|
15
|
+
|
16
|
+
# This is a faster computational replacement for eclidean distance.
|
17
|
+
# Parameters a and b are vectors with continuous attributes.
|
18
|
+
def self.squared_euclidean_distance(a, b)
|
19
|
+
sum = 0.0
|
20
|
+
a.each_with_index do |item_a, i|
|
21
|
+
item_b = b[i]
|
22
|
+
sum += (item_a - item_b)**2
|
23
|
+
end
|
24
|
+
return sum
|
25
|
+
end
|
26
|
+
|
27
|
+
# Euclidean distance, or L2 norm.
|
28
|
+
# Parameters a and b are vectors with continuous attributes.
|
29
|
+
# Euclidean distance tends to form hyperspherical
|
30
|
+
# clusters(Clustering, Xu and Wunsch, 2009).
|
31
|
+
# Translations and rotations do not cause a
|
32
|
+
# distortion in distance relation (Duda et al, 2001)
|
33
|
+
# If attributes are measured with different units,
|
34
|
+
# attributes with larger values and variance will
|
35
|
+
# dominate the metric.
|
36
|
+
def self.euclidean_distance(a, b)
|
37
|
+
Math.sqrt(squared_euclidean_distance(a, b))
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
# city block, Manhattan distance, or L1 norm.
|
42
|
+
# Parameters a and b are vectors with continuous attributes.
|
43
|
+
def self.manhattan_distance(a, b)
|
44
|
+
sum = 0.0
|
45
|
+
a.each_with_index do |item_a, i|
|
46
|
+
item_b = b[i]
|
47
|
+
sum += (item_a - item_b).abs
|
48
|
+
end
|
49
|
+
return sum
|
50
|
+
end
|
51
|
+
|
52
|
+
# Sup distance, or L-intinity norm
|
53
|
+
# Parameters a and b are vectors with continuous attributes.
|
54
|
+
def self.sup_distance(a, b)
|
55
|
+
distance = 0.0
|
56
|
+
a.each_with_index do |item_a, i|
|
57
|
+
item_b = b[i]
|
58
|
+
diff = (item_a - item_b).abs
|
59
|
+
distance = diff if diff > distance
|
60
|
+
end
|
61
|
+
return distance
|
62
|
+
end
|
63
|
+
|
64
|
+
# The Hamming distance between two attributes vectors of equal
|
65
|
+
# length is the number of attributes for which the corresponding
|
66
|
+
# vectors are different
|
67
|
+
# This distance function is frequently used with binary attributes,
|
68
|
+
# though it can be used with other discrete attributes.
|
69
|
+
def self.hamming_distance(a,b)
|
70
|
+
count = 0
|
71
|
+
a.each_index do |i|
|
72
|
+
count += 1 if a[i] != b[i]
|
73
|
+
end
|
74
|
+
return count
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|