ai4r 1.4 → 1.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -3
- data/examples/decision_trees/id3_example.rb +1 -1
- data/examples/genetic_algorithm/genetic_algorithm_example.rb +1 -1
- data/lib/ai4r.rb +11 -0
- data/lib/ai4r/classifiers/classifier.rb +2 -0
- data/lib/ai4r/classifiers/id3.rb +3 -2
- data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
- data/lib/ai4r/classifiers/one_r.rb +2 -1
- data/lib/ai4r/classifiers/prism.rb +2 -1
- data/lib/ai4r/classifiers/zero_r.rb +2 -1
- data/lib/ai4r/clusterers/average_linkage.rb +60 -0
- data/lib/ai4r/clusterers/bisecting_k_means.rb +17 -39
- data/lib/ai4r/clusterers/clusterer.rb +25 -0
- data/lib/ai4r/clusterers/complete_linkage.rb +62 -0
- data/lib/ai4r/clusterers/k_means.rb +18 -25
- data/lib/ai4r/clusterers/single_linkage.rb +179 -0
- data/lib/ai4r/data/data_set.rb +33 -41
- data/lib/ai4r/data/proximity.rb +82 -0
- data/lib/ai4r/data/statistics.rb +77 -0
- data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +2 -4
- data/site/build/site/en/build/tmp/build-info.xml +5 -0
- data/site/build/site/en/build/tmp/plugins-1.xml +212 -0
- data/site/build/site/en/build/tmp/plugins-2.xml +252 -0
- data/site/build/site/en/build/tmp/projfilters.properties +41 -0
- data/site/build/site/en/downloads.html +1 -1
- data/site/build/site/en/geneticAlgorithms.html +1 -1
- data/site/build/site/en/index.html +44 -7
- data/site/build/site/en/index.pdf +278 -155
- data/site/build/site/en/linkmap.html +2 -2
- data/site/build/site/en/linkmap.pdf +12 -12
- data/site/build/site/en/machineLearning.html +1 -1
- data/site/build/site/en/neuralNetworks.html +1 -1
- data/site/build/site/en/sourceCode.html +244 -0
- data/site/build/site/en/sourceCode.pdf +278 -0
- data/site/build/site/en/svn.html +34 -42
- data/site/build/site/en/svn.pdf +86 -114
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
- data/site/build/tmp/projfilters.properties +1 -1
- data/site/build/webapp/WEB-INF/logs/core.log +628 -629
- data/site/build/webapp/WEB-INF/logs/error.log +213 -213
- data/site/src/documentation/content/xdocs/index.xml +20 -1
- data/site/src/documentation/content/xdocs/site.xml +1 -1
- data/site/src/documentation/content/xdocs/sourceCode.xml +43 -0
- data/site/src/documentation/resources/images/sigmoid.png +0 -0
- data/test/classifiers/id3_test.rb +0 -1
- data/test/classifiers/multilayer_perceptron_test.rb +79 -0
- data/test/classifiers/one_r_test.rb +0 -2
- data/test/classifiers/prism_test.rb +0 -2
- data/test/classifiers/zero_r_test.rb +0 -2
- data/test/clusterers/average_linkage_test.rb +45 -0
- data/test/clusterers/bisecting_k_means_test.rb +0 -2
- data/test/clusterers/complete_linkage_test.rb +45 -0
- data/test/clusterers/k_means_test.rb +0 -2
- data/test/clusterers/single_linkage_test.rb +113 -0
- data/test/data/data_set_test.rb +3 -15
- data/test/data/proximity_test.rb +71 -0
- data/test/data/statistics_test.rb +65 -0
- data/test/experiment/classifier_evaluator_test.rb +76 -0
- metadata +27 -6
- data/site/src/documentation/content/xdocs/svn.xml +0 -41
@@ -7,7 +7,6 @@
|
|
7
7
|
# the Mozilla Public License version 1.1 as published by the
|
8
8
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
9
|
|
10
|
-
require "set"
|
11
10
|
require File.dirname(__FILE__) + '/../data/data_set'
|
12
11
|
require File.dirname(__FILE__) + '/../clusterers/clusterer'
|
13
12
|
|
@@ -29,7 +28,23 @@ module Ai4r
|
|
29
28
|
:distance_function => "Custom implementation of distance function. " +
|
30
29
|
"It must be a closure receiving two data items and return the " +
|
31
30
|
"distance bewteen them. By default, this algorithm uses " +
|
32
|
-
"ecuclidean distance of numeric attributes to the power of 2."
|
31
|
+
"ecuclidean distance of numeric attributes to the power of 2.",
|
32
|
+
:centroid_function => "Custom implementation to calculate the " +
|
33
|
+
"centroid of a cluster. It must be a closure receiving an array of " +
|
34
|
+
"data sets, and return an array of data items, representing the " +
|
35
|
+
"centroids of for each data set. " +
|
36
|
+
"By default, this algorithm returns a data items using the mode "+
|
37
|
+
"or mean of each attribute on each data set."
|
38
|
+
|
39
|
+
def initialize
|
40
|
+
@distance_function = nil
|
41
|
+
@max_iterations = nil
|
42
|
+
@old_centroids = nil
|
43
|
+
@centroid_function = lambda do |data_sets|
|
44
|
+
data_sets.collect{ |data_set| data_set.get_mean_or_mode}
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
33
48
|
|
34
49
|
# Build a new clusterer, using data examples found in data_set.
|
35
50
|
# Items will be clustered in "number_of_clusters" different
|
@@ -69,15 +84,6 @@ module Ai4r
|
|
69
84
|
end
|
70
85
|
|
71
86
|
protected
|
72
|
-
def euclidean_distance(a, b)
|
73
|
-
dist = 0.0
|
74
|
-
a.each_index do |index|
|
75
|
-
if a[index].is_a?(Numeric) && b[index].is_a?(Numeric)
|
76
|
-
dist = dist + ((a[index]-b[index])*(a[index]-b[index]))
|
77
|
-
end
|
78
|
-
end
|
79
|
-
return dist
|
80
|
-
end
|
81
87
|
|
82
88
|
def calc_initial_centroids
|
83
89
|
@centroids = []
|
@@ -111,21 +117,8 @@ module Ai4r
|
|
111
117
|
|
112
118
|
def recompute_centroids
|
113
119
|
@old_centroids = @centroids
|
114
|
-
@centroids = @clusters.collect { |cluster| cluster.get_mean_or_mode }
|
115
120
|
@iterations += 1
|
116
|
-
|
117
|
-
|
118
|
-
def get_min_index(array)
|
119
|
-
min = array.first
|
120
|
-
index = 0
|
121
|
-
array.each_index do |i|
|
122
|
-
x = array[i]
|
123
|
-
if x < min
|
124
|
-
min = x
|
125
|
-
index = i
|
126
|
-
end
|
127
|
-
end
|
128
|
-
return index
|
121
|
+
@centroids = @centroid_function.call(@clusters)
|
129
122
|
end
|
130
123
|
|
131
124
|
end
|
@@ -0,0 +1,179 @@
|
|
1
|
+
# Author:: Sergio Fierens (implementation)
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/../data/data_set'
|
11
|
+
require File.dirname(__FILE__) + '/../clusterers/clusterer'
|
12
|
+
|
13
|
+
module Ai4r
|
14
|
+
module Clusterers
|
15
|
+
|
16
|
+
# Implementation of a Hierarchical clusterer with single linkage.
|
17
|
+
# Hierarchical clusteres create one cluster per element, and then
|
18
|
+
# progressively merge clusters, until the required number of clusters
|
19
|
+
# is reached.
|
20
|
+
# With single linkage, the distance between two clusters is computed as the
|
21
|
+
# distance between the two closest elements in the two clusters.
|
22
|
+
class SingleLinkage < Clusterer
|
23
|
+
|
24
|
+
attr_reader :data_set, :number_of_clusters, :clusters
|
25
|
+
|
26
|
+
parameters_info :distance_function =>
|
27
|
+
"Custom implementation of distance function. " +
|
28
|
+
"It must be a closure receiving two data items and return the " +
|
29
|
+
"distance bewteen them. By default, this algorithm uses " +
|
30
|
+
"ecuclidean distance of numeric attributes to the power of 2."
|
31
|
+
|
32
|
+
def initialize
|
33
|
+
@distance_function = nil
|
34
|
+
end
|
35
|
+
|
36
|
+
# Build a new clusterer, using data examples found in data_set.
|
37
|
+
# Items will be clustered in "number_of_clusters" different
|
38
|
+
# clusters.
|
39
|
+
def build(data_set, number_of_clusters)
|
40
|
+
@data_set = data_set
|
41
|
+
@number_of_clusters = number_of_clusters
|
42
|
+
|
43
|
+
index_clusters = create_initial_index_clusters
|
44
|
+
create_distance_matrix(data_set)
|
45
|
+
while index_clusters.length > @number_of_clusters
|
46
|
+
clusters_to_merge = get_closest_clusters(index_clusters)
|
47
|
+
index_clusters = merge_clusters(clusters_to_merge, index_clusters)
|
48
|
+
end
|
49
|
+
@clusters = build_clusters_from_index_clusters index_clusters
|
50
|
+
|
51
|
+
return self
|
52
|
+
end
|
53
|
+
|
54
|
+
# Classifies the given data item, returning the cluster index it belongs
|
55
|
+
# to (0-based).
|
56
|
+
def eval(data_item)
|
57
|
+
get_min_index(@clusters.collect {|cluster|
|
58
|
+
distance_between_item_and_cluster(data_item, cluster)})
|
59
|
+
end
|
60
|
+
|
61
|
+
# This function calculates the distance between 2 different
|
62
|
+
# instances. By default, it returns the euclidean distance to the
|
63
|
+
# power of 2.
|
64
|
+
# You can provide a more convinient distance implementation:
|
65
|
+
#
|
66
|
+
# 1- Overwriting this method
|
67
|
+
#
|
68
|
+
# 2- Providing a closure to the :distance_function parameter
|
69
|
+
def distance(a, b)
|
70
|
+
return @distance_function.call(a, b) if @distance_function
|
71
|
+
return euclidean_distance(a, b)
|
72
|
+
end
|
73
|
+
|
74
|
+
protected
|
75
|
+
|
76
|
+
# returns [ [0], [1], [2], ... , [n-1] ]
|
77
|
+
# where n is the number of data items in the data set
|
78
|
+
def create_initial_index_clusters
|
79
|
+
index_clusters = []
|
80
|
+
@data_set.data_items.length.times {|i| index_clusters << [i]}
|
81
|
+
return index_clusters
|
82
|
+
end
|
83
|
+
|
84
|
+
# Create a partial distance matrix:
|
85
|
+
# [
|
86
|
+
# [d(1,0)],
|
87
|
+
# [d(2,0)], [d(2,1)],
|
88
|
+
# [d(3,0)], [d(3,1)], [d(3,2)],
|
89
|
+
# ...
|
90
|
+
# [d(n-1,0)], [d(n-1,1)], [d(n-1,2)], ... , [d(n-1,n-2)]
|
91
|
+
# ]
|
92
|
+
# where n is the number of data items in the data set
|
93
|
+
def create_distance_matrix(data_set)
|
94
|
+
@distance_matrix = Array.new(data_set.data_items.length-1) {|index| Array.new(index+1)}
|
95
|
+
data_set.data_items.each_with_index do |a, i|
|
96
|
+
i.times do |j|
|
97
|
+
b = data_set.data_items[j]
|
98
|
+
@distance_matrix[i-1][j] = distance(a, b)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Returns the distance between element data_item[index_a] and
|
104
|
+
# data_item[index_b] using the distance matrix
|
105
|
+
def read_distance_matrix(index_a, index_b)
|
106
|
+
return 0 if index_a == index_b
|
107
|
+
index_a, index_b = index_b, index_a if index_b > index_a
|
108
|
+
return @distance_matrix[index_a-1][index_b]
|
109
|
+
end
|
110
|
+
|
111
|
+
# clusters_to_merge = [index_cluster_a, index_cluster_b].
|
112
|
+
# cluster_a and cluster_b are removed from index_cluster,
|
113
|
+
# and a new cluster with all members of cluster_a and cluster_b
|
114
|
+
# is added.
|
115
|
+
# It returns the new clusters array.
|
116
|
+
def merge_clusters(clusters_to_merge, index_clusters)
|
117
|
+
index_a = clusters_to_merge.first
|
118
|
+
index_b = clusters_to_merge.last
|
119
|
+
index_a, index_b = index_b, index_a if index_b > index_a
|
120
|
+
new_index_cluster = index_clusters[index_a] +
|
121
|
+
index_clusters[index_b]
|
122
|
+
index_clusters.delete_at index_a
|
123
|
+
index_clusters.delete_at index_b
|
124
|
+
index_clusters << new_index_cluster
|
125
|
+
return index_clusters
|
126
|
+
end
|
127
|
+
|
128
|
+
# Given an array with clusters of data_items indexes,
|
129
|
+
# it returns an array of data_items clusters
|
130
|
+
def build_clusters_from_index_clusters(index_clusters)
|
131
|
+
@distance_matrix = nil
|
132
|
+
return index_clusters.collect do |index_cluster|
|
133
|
+
Ai4r::Data::DataSet.new(:data_labels => @data_set.data_labels,
|
134
|
+
:data_items => index_cluster.collect {|i| @data_set.data_items[i]})
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# Returns ans array with the indexes of the two closest
|
139
|
+
# clusters => [index_cluster_a, index_cluster_b]
|
140
|
+
def get_closest_clusters(index_clusters)
|
141
|
+
min_distance = 1.0/0
|
142
|
+
closest_clusters = [1, 0]
|
143
|
+
index_clusters.each_with_index do |cluster_a, index_a|
|
144
|
+
index_a.times do |index_b|
|
145
|
+
cluster_b = index_clusters[index_b]
|
146
|
+
cluster_distance = calc_index_clusters_distance(cluster_a, cluster_b)
|
147
|
+
if cluster_distance < min_distance
|
148
|
+
closest_clusters = [index_a, index_b]
|
149
|
+
min_distance = cluster_distance
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
return closest_clusters
|
154
|
+
end
|
155
|
+
|
156
|
+
# Calculate cluster distance using the single linkage method
|
157
|
+
def calc_index_clusters_distance(cluster_a, cluster_b)
|
158
|
+
min_dist = 1.0/0
|
159
|
+
cluster_a.each do |index_a|
|
160
|
+
cluster_b.each do |index_b|
|
161
|
+
dist = read_distance_matrix(index_a, index_b)
|
162
|
+
min_dist = dist if dist < min_dist
|
163
|
+
end
|
164
|
+
end
|
165
|
+
return min_dist
|
166
|
+
end
|
167
|
+
|
168
|
+
def distance_between_item_and_cluster(data_item, cluster)
|
169
|
+
min_dist = 1.0/0
|
170
|
+
cluster.data_items.each do |another_item|
|
171
|
+
dist = distance(data_item, another_item)
|
172
|
+
min_dist = dist if dist < min_dist
|
173
|
+
end
|
174
|
+
return min_dist
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
data/lib/ai4r/data/data_set.rb
CHANGED
@@ -9,11 +9,19 @@
|
|
9
9
|
|
10
10
|
require 'csv'
|
11
11
|
require 'set'
|
12
|
+
require File.dirname(__FILE__) + '/statistics'
|
12
13
|
|
13
14
|
module Ai4r
|
14
15
|
module Data
|
16
|
+
|
17
|
+
# A data set is a collection of N data items. Each data item is
|
18
|
+
# described by a set of attributes, represented as an array.
|
19
|
+
# Optionally, you can assign a label to the attributes, using
|
20
|
+
# the data_labels property.
|
15
21
|
class DataSet
|
16
22
|
|
23
|
+
@@number_regex = /(((\b[0-9]+)?\.)?\b[0-9]+([eE][-+]?[0-9]+)?\b)/
|
24
|
+
|
17
25
|
attr_reader :data_labels, :data_items
|
18
26
|
|
19
27
|
# Create a new DataSet. By default, empty.
|
@@ -24,7 +32,7 @@ module Ai4r
|
|
24
32
|
# If you provide data items, but no data labels, the data set will
|
25
33
|
# use the default data label values (see set_data_labels)
|
26
34
|
def initialize(options = {})
|
27
|
-
@data_labels =
|
35
|
+
@data_labels = []
|
28
36
|
@data_items = options[:data_items] || []
|
29
37
|
set_data_labels(options[:data_labels]) if options[:data_labels]
|
30
38
|
set_data_items(options[:data_items]) if options[:data_items]
|
@@ -38,7 +46,7 @@ module Ai4r
|
|
38
46
|
end
|
39
47
|
|
40
48
|
# Load data items from csv file
|
41
|
-
def
|
49
|
+
def load_csv(filepath)
|
42
50
|
items = []
|
43
51
|
CSV::Reader.parse(File.open(filepath, 'r')) do |row|
|
44
52
|
items << row
|
@@ -47,12 +55,21 @@ module Ai4r
|
|
47
55
|
end
|
48
56
|
|
49
57
|
# Load data items from csv file. The first row is used as data labels.
|
50
|
-
def
|
51
|
-
|
58
|
+
def load_csv_with_labels(filepath)
|
59
|
+
load_csv(filepath)
|
52
60
|
@data_labels = @data_items.shift
|
53
61
|
return self
|
54
62
|
end
|
55
63
|
|
64
|
+
# Same as load_csv, but it will try to convert cell contents as numbers.
|
65
|
+
def parse_csv(filepath)
|
66
|
+
items = []
|
67
|
+
CSV::Reader.parse(File.open(filepath, 'r')) do |row|
|
68
|
+
items << row.collect{|x| (x.match(@@number_regex)) ? x.to_f : x.data }
|
69
|
+
end
|
70
|
+
set_data_items(items)
|
71
|
+
end
|
72
|
+
|
56
73
|
# Set data labels.
|
57
74
|
# Data labels must have the following format:
|
58
75
|
# [ 'city', 'age_range', 'gender', 'marketing_target' ]
|
@@ -144,7 +161,7 @@ module Ai4r
|
|
144
161
|
# get_index("gender")
|
145
162
|
# => 2
|
146
163
|
def get_index(attr)
|
147
|
-
return (attr.is_a?(
|
164
|
+
return (attr.is_a?(Fixnum) || attr.is_a?(Range)) ? attr : @data_labels.index(attr)
|
148
165
|
end
|
149
166
|
|
150
167
|
# Raise an exception if there is no data item.
|
@@ -168,44 +185,19 @@ module Ai4r
|
|
168
185
|
@data_items << data_item
|
169
186
|
end
|
170
187
|
end
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
mean = 0.0
|
175
|
-
@data_items.each { |data_item| mean += data_item[index] }
|
176
|
-
mean /= @data_items.length
|
177
|
-
return mean
|
178
|
-
end
|
179
|
-
|
180
|
-
def get_attribute_mode(attribute)
|
181
|
-
index = get_index(attribute)
|
182
|
-
domain = build_domain(attribute)
|
183
|
-
count = {}
|
184
|
-
domain.each {|value| count[value]=0}
|
185
|
-
@data_items.each { |data_item| count[data_item[index]] += 1 }
|
186
|
-
max_count = 0
|
187
|
-
mode = nil
|
188
|
-
count.each_pair do |value, value_count|
|
189
|
-
if value_count > max_count
|
190
|
-
mode = value
|
191
|
-
max_count = value_count
|
192
|
-
end
|
193
|
-
end
|
194
|
-
return mode
|
195
|
-
end
|
196
|
-
|
197
|
-
def get_attribute_mean_or_mode(attribute)
|
198
|
-
index = get_index(attribute)
|
199
|
-
if @data_items.first[index].is_a?(Numeric)
|
200
|
-
return get_attribute_mean(attribute)
|
201
|
-
else
|
202
|
-
return get_attribute_mode(attribute)
|
203
|
-
end
|
204
|
-
end
|
205
|
-
|
188
|
+
|
189
|
+
# Returns an array with the mean value of numeric attributes, and
|
190
|
+
# the most frequent value of non numeric attributes
|
206
191
|
def get_mean_or_mode
|
207
192
|
mean = []
|
208
|
-
num_attributes.times
|
193
|
+
num_attributes.times do |i|
|
194
|
+
mean[i] =
|
195
|
+
if @data_items.first[i].is_a?(Numeric)
|
196
|
+
Statistics.mean(self, i)
|
197
|
+
else
|
198
|
+
Statistics.mode(self, i)
|
199
|
+
end
|
200
|
+
end
|
209
201
|
return mean
|
210
202
|
end
|
211
203
|
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# Author:: Sergio Fierens
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
module Ai4r
|
11
|
+
module Data
|
12
|
+
|
13
|
+
# This module provides classical distance functions
|
14
|
+
module Proximity
|
15
|
+
|
16
|
+
# This is a faster computational replacement for eclidean distance.
|
17
|
+
# Parameters a and b are vectors with continuous attributes.
|
18
|
+
def self.squared_euclidean_distance(a, b)
|
19
|
+
sum = 0.0
|
20
|
+
a.each_with_index do |item_a, i|
|
21
|
+
item_b = b[i]
|
22
|
+
sum += (item_a - item_b)**2
|
23
|
+
end
|
24
|
+
return sum
|
25
|
+
end
|
26
|
+
|
27
|
+
# Euclidean distance, or L2 norm.
|
28
|
+
# Parameters a and b are vectors with continuous attributes.
|
29
|
+
# Euclidean distance tends to form hyperspherical
|
30
|
+
# clusters(Clustering, Xu and Wunsch, 2009).
|
31
|
+
# Translations and rotations do not cause a
|
32
|
+
# distortion in distance relation (Duda et al, 2001)
|
33
|
+
# If attributes are measured with different units,
|
34
|
+
# attributes with larger values and variance will
|
35
|
+
# dominate the metric.
|
36
|
+
def self.euclidean_distance(a, b)
|
37
|
+
Math.sqrt(squared_euclidean_distance(a, b))
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
# city block, Manhattan distance, or L1 norm.
|
42
|
+
# Parameters a and b are vectors with continuous attributes.
|
43
|
+
def self.manhattan_distance(a, b)
|
44
|
+
sum = 0.0
|
45
|
+
a.each_with_index do |item_a, i|
|
46
|
+
item_b = b[i]
|
47
|
+
sum += (item_a - item_b).abs
|
48
|
+
end
|
49
|
+
return sum
|
50
|
+
end
|
51
|
+
|
52
|
+
# Sup distance, or L-intinity norm
|
53
|
+
# Parameters a and b are vectors with continuous attributes.
|
54
|
+
def self.sup_distance(a, b)
|
55
|
+
distance = 0.0
|
56
|
+
a.each_with_index do |item_a, i|
|
57
|
+
item_b = b[i]
|
58
|
+
diff = (item_a - item_b).abs
|
59
|
+
distance = diff if diff > distance
|
60
|
+
end
|
61
|
+
return distance
|
62
|
+
end
|
63
|
+
|
64
|
+
# The Hamming distance between two attributes vectors of equal
|
65
|
+
# length is the number of attributes for which the corresponding
|
66
|
+
# vectors are different
|
67
|
+
# This distance function is frequently used with binary attributes,
|
68
|
+
# though it can be used with other discrete attributes.
|
69
|
+
def self.hamming_distance(a,b)
|
70
|
+
count = 0
|
71
|
+
a.each_index do |i|
|
72
|
+
count += 1 if a[i] != b[i]
|
73
|
+
end
|
74
|
+
return count
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|