ai4r 1.4 → 1.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -3
- data/examples/decision_trees/id3_example.rb +1 -1
- data/examples/genetic_algorithm/genetic_algorithm_example.rb +1 -1
- data/lib/ai4r.rb +11 -0
- data/lib/ai4r/classifiers/classifier.rb +2 -0
- data/lib/ai4r/classifiers/id3.rb +3 -2
- data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
- data/lib/ai4r/classifiers/one_r.rb +2 -1
- data/lib/ai4r/classifiers/prism.rb +2 -1
- data/lib/ai4r/classifiers/zero_r.rb +2 -1
- data/lib/ai4r/clusterers/average_linkage.rb +60 -0
- data/lib/ai4r/clusterers/bisecting_k_means.rb +17 -39
- data/lib/ai4r/clusterers/clusterer.rb +25 -0
- data/lib/ai4r/clusterers/complete_linkage.rb +62 -0
- data/lib/ai4r/clusterers/k_means.rb +18 -25
- data/lib/ai4r/clusterers/single_linkage.rb +179 -0
- data/lib/ai4r/data/data_set.rb +33 -41
- data/lib/ai4r/data/proximity.rb +82 -0
- data/lib/ai4r/data/statistics.rb +77 -0
- data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +2 -4
- data/site/build/site/en/build/tmp/build-info.xml +5 -0
- data/site/build/site/en/build/tmp/plugins-1.xml +212 -0
- data/site/build/site/en/build/tmp/plugins-2.xml +252 -0
- data/site/build/site/en/build/tmp/projfilters.properties +41 -0
- data/site/build/site/en/downloads.html +1 -1
- data/site/build/site/en/geneticAlgorithms.html +1 -1
- data/site/build/site/en/index.html +44 -7
- data/site/build/site/en/index.pdf +278 -155
- data/site/build/site/en/linkmap.html +2 -2
- data/site/build/site/en/linkmap.pdf +12 -12
- data/site/build/site/en/machineLearning.html +1 -1
- data/site/build/site/en/neuralNetworks.html +1 -1
- data/site/build/site/en/sourceCode.html +244 -0
- data/site/build/site/en/sourceCode.pdf +278 -0
- data/site/build/site/en/svn.html +34 -42
- data/site/build/site/en/svn.pdf +86 -114
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
- data/site/build/tmp/projfilters.properties +1 -1
- data/site/build/webapp/WEB-INF/logs/core.log +628 -629
- data/site/build/webapp/WEB-INF/logs/error.log +213 -213
- data/site/src/documentation/content/xdocs/index.xml +20 -1
- data/site/src/documentation/content/xdocs/site.xml +1 -1
- data/site/src/documentation/content/xdocs/sourceCode.xml +43 -0
- data/site/src/documentation/resources/images/sigmoid.png +0 -0
- data/test/classifiers/id3_test.rb +0 -1
- data/test/classifiers/multilayer_perceptron_test.rb +79 -0
- data/test/classifiers/one_r_test.rb +0 -2
- data/test/classifiers/prism_test.rb +0 -2
- data/test/classifiers/zero_r_test.rb +0 -2
- data/test/clusterers/average_linkage_test.rb +45 -0
- data/test/clusterers/bisecting_k_means_test.rb +0 -2
- data/test/clusterers/complete_linkage_test.rb +45 -0
- data/test/clusterers/k_means_test.rb +0 -2
- data/test/clusterers/single_linkage_test.rb +113 -0
- data/test/data/data_set_test.rb +3 -15
- data/test/data/proximity_test.rb +71 -0
- data/test/data/statistics_test.rb +65 -0
- data/test/experiment/classifier_evaluator_test.rb +76 -0
- metadata +27 -6
- data/site/src/documentation/content/xdocs/svn.xml +0 -41
data/README.rdoc
CHANGED
@@ -20,10 +20,31 @@ http://ai4r.rubyforge.org
|
|
20
20
|
|
21
21
|
= More Info
|
22
22
|
|
23
|
-
|
24
|
-
|
23
|
+
* AI4R wiki: http://wiki.jadeferret.com/Category:AI4R
|
24
|
+
* AI4R Project site: http://ai4r.rubyforge.org
|
25
25
|
|
26
|
-
=
|
26
|
+
= Contact
|
27
|
+
|
28
|
+
If you have questions or constructive comments about this project,
|
29
|
+
please post them in the forum (http://forum.jadeferret.com/viewforum.php?f=3).
|
30
|
+
I get an email notification when you post, and I do my best to answer as soon as possible.
|
31
|
+
|
32
|
+
If you do not want to make it public, send it to me: Sergio Fierens, email address: (sergio (at) jadeferret (dot) com). But please, try to post them in the forum. I get tons of emails and it would be great to make them public to help everyone.
|
33
|
+
|
34
|
+
= Roadmap
|
35
|
+
|
36
|
+
AI4R is an active project. If you are interested about what we are working on,
|
37
|
+
checkout the development roadmap: http://wiki.jadeferret.com/AI4R_RoadMap
|
38
|
+
|
39
|
+
= Disclaimer
|
40
|
+
|
41
|
+
In plain english:
|
42
|
+
|
43
|
+
This project was created by Sergio Fierens, but the AI algorithms were created by other
|
44
|
+
people who are actually much more clever than Sergio. He does his best implementing
|
45
|
+
them, but he cannot warranty that these implementations are accurate.
|
46
|
+
|
47
|
+
In legalese:
|
27
48
|
|
28
49
|
This software is provided "as is" and without any express or implied warranties,
|
29
50
|
including, without limitation, the implied warranties of merchantibility and
|
@@ -11,7 +11,7 @@ require File.dirname(__FILE__) + '/../../lib/ai4r/classifiers/id3'
|
|
11
11
|
|
12
12
|
# Load data from data_set.csv
|
13
13
|
data_filename = "#{File.dirname(__FILE__)}/data_set.csv"
|
14
|
-
data_set = Ai4r::Data::DataSet.new.
|
14
|
+
data_set = Ai4r::Data::DataSet.new.load_csv_with_labels data_filename
|
15
15
|
|
16
16
|
# Build ID3 tree
|
17
17
|
id3 = Ai4r::Classifiers::ID3.new.build(data_set)
|
@@ -13,7 +13,7 @@ require 'csv'
|
|
13
13
|
|
14
14
|
# Load data from data_set.csv
|
15
15
|
data_filename = "#{File.dirname(__FILE__)}/travel_cost.csv"
|
16
|
-
data_set = Ai4r::Data::DataSet.new.
|
16
|
+
data_set = Ai4r::Data::DataSet.new.load_csv_with_labels data_filename
|
17
17
|
data_set.data_items.collect! {|column| column.collect {|element| element.to_f}}
|
18
18
|
|
19
19
|
Ai4r::GeneticAlgorithm::Chromosome.set_cost_matrix(data_set)
|
data/lib/ai4r.rb
CHANGED
@@ -1,10 +1,21 @@
|
|
1
|
+
# Data
|
2
|
+
require "ai4r/data/data_set"
|
3
|
+
require "ai4r/data/statistics"
|
4
|
+
require "ai4r/data/parameterizable"
|
5
|
+
# Clusterers
|
1
6
|
require "ai4r/clusterers/clusterer"
|
2
7
|
require "ai4r/clusterers/k_means"
|
3
8
|
require "ai4r/clusterers/bisecting_k_means"
|
9
|
+
require "ai4r/clusterers/single_linkage"
|
10
|
+
require "ai4r/clusterers/complete_linkage"
|
11
|
+
require "ai4r/clusterers/average_linkage"
|
12
|
+
# Classifiers
|
4
13
|
require "ai4r/classifiers/classifier"
|
5
14
|
require "ai4r/classifiers/id3"
|
6
15
|
require "ai4r/classifiers/prism"
|
7
16
|
require "ai4r/classifiers/one_r"
|
8
17
|
require "ai4r/classifiers/zero_r"
|
18
|
+
# Neural networks
|
9
19
|
require "ai4r/neural_network/backpropagation"
|
20
|
+
# Genetic Algorithms
|
10
21
|
require "ai4r/genetic_algorithm/genetic_algorithm"
|
data/lib/ai4r/classifiers/id3.rb
CHANGED
@@ -67,7 +67,7 @@ module Ai4r
|
|
67
67
|
# values) file.
|
68
68
|
#
|
69
69
|
# data_file = "#{File.dirname(__FILE__)}/data_set.csv"
|
70
|
-
# data_set = DataSet.
|
70
|
+
# data_set = DataSet.load_csv_with_labels data_file
|
71
71
|
# id3 = Ai4r::Classifiers::ID3.new.build(data_set)
|
72
72
|
#
|
73
73
|
# = A nice tip for data evaluation
|
@@ -94,7 +94,8 @@ module Ai4r
|
|
94
94
|
attr_reader :data_set
|
95
95
|
|
96
96
|
# Create a new ID3 classifier. You must provide a DataSet instance
|
97
|
-
# as parameter.
|
97
|
+
# as parameter. The last attribute of each item is considered as the
|
98
|
+
# item class.
|
98
99
|
def build(data_set)
|
99
100
|
data_set.check_not_empty
|
100
101
|
@data_set = data_set
|
@@ -0,0 +1,135 @@
|
|
1
|
+
# Author:: Sergio Fierens (Implementation only)
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/../data/data_set.rb'
|
11
|
+
require File.dirname(__FILE__) + '/../classifiers/classifier'
|
12
|
+
require File.dirname(__FILE__) + '/../neural_network/backpropagation'
|
13
|
+
|
14
|
+
module Ai4r
|
15
|
+
module Classifiers
|
16
|
+
|
17
|
+
# = Introduction
|
18
|
+
#
|
19
|
+
# The idea behind the MultilayerPerceptron classifier is to
|
20
|
+
# train a Multilayer Perceptron neural network with the provided examples,
|
21
|
+
# and predict the class for new data items.
|
22
|
+
#
|
23
|
+
# = Parameters
|
24
|
+
#
|
25
|
+
# Use class method get_parameters_info to obtain details on the algorithm
|
26
|
+
# parameters. Use set_parameters to set values for this parameters.
|
27
|
+
# See Parameterizable module documentation.
|
28
|
+
#
|
29
|
+
# * :network_class => Neural network implementation class.
|
30
|
+
# By default: Ai4r::NeuralNetwork::Backpropagation.
|
31
|
+
# * :network_parameters => Parameters to be forwarded to the back end
|
32
|
+
# neural ntework.
|
33
|
+
# * :hidden_layers => Hidden layer structure. E.g. [8, 6] will generate
|
34
|
+
# 2 hidden layers with 8 and 6 neurons each. By default []
|
35
|
+
# * :training_iterations => How many times the training should be repeated.
|
36
|
+
# By default: 1000.
|
37
|
+
# :active_node_value => Default: 1
|
38
|
+
# :inactive_node_value => Default: 1
|
39
|
+
class MultilayerPerceptron < Classifier
|
40
|
+
|
41
|
+
attr_reader :data_set, :class_value, :network, :domains
|
42
|
+
|
43
|
+
parameters_info :network_class => "Neural network implementation class."+
|
44
|
+
"By default: Ai4r::NeuralNetwork::Backpropagation.",
|
45
|
+
:network_parameters => "parameters to be forwarded to the back end " +
|
46
|
+
"neural network.",
|
47
|
+
:hidden_layers => "Hidden layer structure. E.g. [8, 6] will generate " +
|
48
|
+
"2 hidden layers with 8 and 6 neurons each. By default []",
|
49
|
+
:training_iterations => "How many times the training should be " +
|
50
|
+
"repeated. By default: 1000",
|
51
|
+
:active_node_value => "Default: 1",
|
52
|
+
:inactive_node_value => "Default: 0"
|
53
|
+
|
54
|
+
def initialize
|
55
|
+
@network_class = Ai4r::NeuralNetwork::Backpropagation
|
56
|
+
@hidden_layers = []
|
57
|
+
@training_iterations = 500
|
58
|
+
@network_parameters = {}
|
59
|
+
@active_node_value = 1
|
60
|
+
@inactive_node_value = 0
|
61
|
+
end
|
62
|
+
|
63
|
+
# Build a new MultilayerPerceptron classifier. You must provide a DataSet
|
64
|
+
# instance as parameter. The last attribute of each item is considered as
|
65
|
+
# the item class.
|
66
|
+
def build(data_set)
|
67
|
+
data_set.check_not_empty
|
68
|
+
@data_set = data_set
|
69
|
+
@domains = @data_set.build_domains.collect {|domain| domain.to_a}
|
70
|
+
@outputs = @domains.last.length
|
71
|
+
@inputs = 0
|
72
|
+
@domains[0...-1].each {|domain| @inputs += domain.length}
|
73
|
+
@structure = [@inputs] + @hidden_layers + [@outputs]
|
74
|
+
@network = @network_class.new @structure
|
75
|
+
@training_iterations.times do
|
76
|
+
data_set.data_items.each do |data_item|
|
77
|
+
input_values = data_to_input(data_item[0...-1])
|
78
|
+
output_values = data_to_output(data_item.last)
|
79
|
+
@network.train(input_values, output_values)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
return self
|
83
|
+
end
|
84
|
+
|
85
|
+
# You can evaluate new data, predicting its class.
|
86
|
+
# e.g.
|
87
|
+
# classifier.eval(['New York', '<30', 'F']) # => 'Y'
|
88
|
+
def eval(data)
|
89
|
+
input_values = data_to_input(data)
|
90
|
+
output_values = @network.eval(input_values)
|
91
|
+
return @domains.last[get_max_index(output_values)]
|
92
|
+
end
|
93
|
+
|
94
|
+
# Multilayer Perceptron Classifiers cannot generate
|
95
|
+
# human-readable rules.
|
96
|
+
def get_rules
|
97
|
+
return "raise 'Neural networks classifiers do not generate human-readable rules.'"
|
98
|
+
end
|
99
|
+
|
100
|
+
protected
|
101
|
+
|
102
|
+
def data_to_input(data_item)
|
103
|
+
input_values = Array.new(@inputs, @inactive_node_value)
|
104
|
+
accum_index = 0
|
105
|
+
data_item.each_index do |att_index|
|
106
|
+
att_value = data_item[att_index]
|
107
|
+
domain_index = @domains[att_index].index(att_value)
|
108
|
+
input_values[domain_index + accum_index] = @active_node_value
|
109
|
+
accum_index = @domains[att_index].length
|
110
|
+
end
|
111
|
+
return input_values
|
112
|
+
end
|
113
|
+
|
114
|
+
def data_to_output(data_item)
|
115
|
+
output_values = Array.new(@outputs, @inactive_node_value)
|
116
|
+
output_values[@domains.last.index(data_item)] = @active_node_value
|
117
|
+
return output_values
|
118
|
+
end
|
119
|
+
|
120
|
+
def get_max_index(output_values)
|
121
|
+
max_value = @inactive_node_value
|
122
|
+
max_index = 0
|
123
|
+
output_values.each_index do |output_index|
|
124
|
+
if max_value < output_values[output_index]
|
125
|
+
max_value = output_values[output_index]
|
126
|
+
max_index = output_index
|
127
|
+
end
|
128
|
+
end
|
129
|
+
return max_index
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
133
|
+
|
134
|
+
end
|
135
|
+
end
|
@@ -25,7 +25,8 @@ module Ai4r
|
|
25
25
|
attr_reader :data_set, :rule
|
26
26
|
|
27
27
|
# Build a new OneR classifier. You must provide a DataSet instance
|
28
|
-
# as parameter.
|
28
|
+
# as parameter. The last attribute of each item is considered as
|
29
|
+
# the item class.
|
29
30
|
def build(data_set)
|
30
31
|
data_set.check_not_empty
|
31
32
|
@data_set = data_set
|
@@ -29,7 +29,8 @@ module Ai4r
|
|
29
29
|
attr_reader :data_set, :rules
|
30
30
|
|
31
31
|
# Build a new Prism classifier. You must provide a DataSet instance
|
32
|
-
# as parameter.
|
32
|
+
# as parameter. The last attribute of each item is considered as
|
33
|
+
# the item class.
|
33
34
|
def build(data_set)
|
34
35
|
data_set.check_not_empty
|
35
36
|
@data_set = data_set
|
@@ -25,7 +25,8 @@ module Ai4r
|
|
25
25
|
attr_reader :data_set, :class_value
|
26
26
|
|
27
27
|
# Build a new ZeroR classifier. You must provide a DataSet instance
|
28
|
-
# as parameter.
|
28
|
+
# as parameter. The last attribute of each item is considered as
|
29
|
+
# the item class.
|
29
30
|
def build(data_set)
|
30
31
|
data_set.check_not_empty
|
31
32
|
@data_set = data_set
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# Author:: Sergio Fierens (implementation)
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/../data/data_set'
|
11
|
+
require File.dirname(__FILE__) + '/../clusterers/single_linkage'
|
12
|
+
|
13
|
+
module Ai4r
|
14
|
+
module Clusterers
|
15
|
+
|
16
|
+
# Implementation of a Hierarchical clusterer with complete linkage.
|
17
|
+
# Hierarchical clusteres create one cluster per element, and then
|
18
|
+
# progressively merge clusters, until the required number of clusters
|
19
|
+
# is reached.
|
20
|
+
# With average linkage, the distance between two clusters is computed as
|
21
|
+
# the average distance between elements of each cluster.
|
22
|
+
class AverageLinkage < SingleLinkage
|
23
|
+
|
24
|
+
# Build a new clusterer, using data examples found in data_set.
|
25
|
+
# Items will be clustered in "number_of_clusters" different
|
26
|
+
# clusters.
|
27
|
+
def build(data_set, number_of_clusters)
|
28
|
+
super
|
29
|
+
end
|
30
|
+
|
31
|
+
# Classifies the given data item, returning the cluster index it belongs
|
32
|
+
# to (0-based).
|
33
|
+
def eval(data_item)
|
34
|
+
super
|
35
|
+
end
|
36
|
+
|
37
|
+
protected
|
38
|
+
|
39
|
+
# Calculate cluster distance using the average linkage method
|
40
|
+
def calc_index_clusters_distance(cluster_a, cluster_b)
|
41
|
+
dist_sum = 0.0
|
42
|
+
cluster_a.each do |index_a|
|
43
|
+
cluster_b.each do |index_b|
|
44
|
+
dist_sum += read_distance_matrix(index_a, index_b)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
return dist_sum/(cluster_a.length*cluster_b.length)
|
48
|
+
end
|
49
|
+
|
50
|
+
def distance_between_item_and_cluster(data_item, cluster)
|
51
|
+
dist_sum = 0.0
|
52
|
+
cluster.data_items.each do |another_item|
|
53
|
+
dist_sum += distance(data_item, another_item)
|
54
|
+
end
|
55
|
+
return dist_sum/cluster.data_items.length
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -7,7 +7,6 @@
|
|
7
7
|
# the Mozilla Public License version 1.1 as published by the
|
8
8
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
9
|
|
10
|
-
require "set"
|
11
10
|
require File.dirname(__FILE__) + '/../data/data_set'
|
12
11
|
require File.dirname(__FILE__) + '/../clusterers/k_means'
|
13
12
|
|
@@ -25,6 +24,23 @@ module Ai4r
|
|
25
24
|
attr_reader :data_set, :number_of_clusters, :clusters, :centroids
|
26
25
|
attr_accessor :max_iterations, :distance_function, :refine
|
27
26
|
|
27
|
+
parameters_info :max_iterations => "Maximum number of iterations to " +
|
28
|
+
"build the clusterer. By default it is uncapped.",
|
29
|
+
:distance_function => "Custom implementation of distance function. " +
|
30
|
+
"It must be a closure receiving two data items and return the " +
|
31
|
+
"distance bewteen them. By default, this algorithm uses " +
|
32
|
+
"ecuclidean distance of numeric attributes to the power of 2.",
|
33
|
+
:centroid_function => "Custom implementation to calculate the " +
|
34
|
+
"centroid of a cluster. It must be a closure receiving an array of " +
|
35
|
+
"data sets, and return an array of data items, representing the " +
|
36
|
+
"centroids of for each data set. " +
|
37
|
+
"By default, this algorithm returns a data items using the mode "+
|
38
|
+
"or mean of each attribute on each data set.",
|
39
|
+
:refine => "Boolean value. True by default. It will run the " +
|
40
|
+
"classic K Means algorithm, using as initial centroids the " +
|
41
|
+
"result of the bisecting approach."
|
42
|
+
|
43
|
+
|
28
44
|
def intialize
|
29
45
|
@refine = true
|
30
46
|
end
|
@@ -54,44 +70,6 @@ module Ai4r
|
|
54
70
|
return self
|
55
71
|
end
|
56
72
|
|
57
|
-
# Get info on what can be parameterized on this clusterer algorithm.
|
58
|
-
# It returns a hash with the following format:
|
59
|
-
# { :param_name => "Info on the parameter" }
|
60
|
-
def get_parameters_info
|
61
|
-
{ :max_iterations => "Maximum number of iterations used to bisect a " +
|
62
|
-
"cluster. By default it is uncapped.",
|
63
|
-
:distance_function => "Custom implementation of distance function. " +
|
64
|
-
"It must be a closure receiving two data items and return the " +
|
65
|
-
"distance bewteen them. By default, this algorithm uses " +
|
66
|
-
"ecuclidean distance of numeric attributes to the power of 2.",
|
67
|
-
:refine => "Boolean value. True by default. It will run the " +
|
68
|
-
"classic K Means algorithm, using as initial centroids the " +
|
69
|
-
"result of the bisecting approach."
|
70
|
-
}
|
71
|
-
end
|
72
|
-
|
73
|
-
# Set parameters on this clusterer instance.
|
74
|
-
# You must provide a hash with the folowing format:
|
75
|
-
# { :param_name => parameter_value }
|
76
|
-
#
|
77
|
-
# Use get_parameters_info to know what parameters are accepted.
|
78
|
-
def set_parameters(parameters)
|
79
|
-
super
|
80
|
-
if parameters.has_key?(:refine)
|
81
|
-
@refine = parameters[:refine]
|
82
|
-
end
|
83
|
-
return self
|
84
|
-
end
|
85
|
-
|
86
|
-
# Get parameter values on this clusterer instance.
|
87
|
-
# Returns a hash with the folowing format:
|
88
|
-
# { :param_name => parameter_value }
|
89
|
-
def get_parameters
|
90
|
-
params = super
|
91
|
-
params[:refine] = @refine
|
92
|
-
return params
|
93
|
-
end
|
94
|
-
|
95
73
|
protected
|
96
74
|
def calc_initial_centroids
|
97
75
|
@centroids # Use existing centroids
|
@@ -31,6 +31,31 @@ module Ai4r
|
|
31
31
|
raise NotImplementedError
|
32
32
|
end
|
33
33
|
|
34
|
+
protected
|
35
|
+
# Usefull as a defult distance function for clustering algorithms
|
36
|
+
def euclidean_distance(a, b)
|
37
|
+
dist = 0.0
|
38
|
+
a.each_index do |index|
|
39
|
+
if a[index].is_a?(Numeric) && b[index].is_a?(Numeric)
|
40
|
+
dist = dist + ((a[index]-b[index])*(a[index]-b[index]))
|
41
|
+
end
|
42
|
+
end
|
43
|
+
return dist
|
44
|
+
end
|
45
|
+
|
46
|
+
def get_min_index(array)
|
47
|
+
min = array.first
|
48
|
+
index = 0
|
49
|
+
array.each_index do |i|
|
50
|
+
x = array[i]
|
51
|
+
if x < min
|
52
|
+
min = x
|
53
|
+
index = i
|
54
|
+
end
|
55
|
+
end
|
56
|
+
return index
|
57
|
+
end
|
58
|
+
|
34
59
|
end
|
35
60
|
end
|
36
61
|
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# Author:: Sergio Fierens (implementation)
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/../data/data_set'
|
11
|
+
require File.dirname(__FILE__) + '/../clusterers/single_linkage'
|
12
|
+
|
13
|
+
module Ai4r
|
14
|
+
module Clusterers
|
15
|
+
|
16
|
+
# Implementation of a Hierarchical clusterer with complete linkage.
|
17
|
+
# Hierarchical clusteres create one cluster per element, and then
|
18
|
+
# progressively merge clusters, until the required number of clusters
|
19
|
+
# is reached.
|
20
|
+
# With complete linkage, the distance between two clusters is computed as
|
21
|
+
# the maximum distance between elements of each cluster.
|
22
|
+
class CompleteLinkage < SingleLinkage
|
23
|
+
|
24
|
+
# Build a new clusterer, using data examples found in data_set.
|
25
|
+
# Items will be clustered in "number_of_clusters" different
|
26
|
+
# clusters.
|
27
|
+
def build(data_set, number_of_clusters)
|
28
|
+
super
|
29
|
+
end
|
30
|
+
|
31
|
+
# Classifies the given data item, returning the cluster index it belongs
|
32
|
+
# to (0-based).
|
33
|
+
def eval(data_item)
|
34
|
+
super
|
35
|
+
end
|
36
|
+
|
37
|
+
protected
|
38
|
+
|
39
|
+
# Calculate cluster distance using the complete linkage method
|
40
|
+
def calc_index_clusters_distance(cluster_a, cluster_b)
|
41
|
+
max_dist = 0
|
42
|
+
cluster_a.each do |index_a|
|
43
|
+
cluster_b.each do |index_b|
|
44
|
+
dist = read_distance_matrix(index_a, index_b)
|
45
|
+
max_dist = dist if dist > max_dist
|
46
|
+
end
|
47
|
+
end
|
48
|
+
return max_dist
|
49
|
+
end
|
50
|
+
|
51
|
+
def distance_between_item_and_cluster(data_item, cluster)
|
52
|
+
max_dist = 0
|
53
|
+
cluster.data_items.each do |another_item|
|
54
|
+
dist = distance(data_item, another_item)
|
55
|
+
max_dist = dist if dist > max_dist
|
56
|
+
end
|
57
|
+
return max_dist
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|