nirvdrum-ai4r 1.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (150) hide show
  1. data/.gitignore +1 -0
  2. data/.rakeTasks +7 -0
  3. data/README.rdoc +56 -0
  4. data/Rakefile.rb +42 -0
  5. data/VERSION +1 -0
  6. data/ai4r.gemspec +221 -0
  7. data/change_log +49 -0
  8. data/examples/classifiers/id3_data.csv +121 -0
  9. data/examples/classifiers/id3_example.rb +29 -0
  10. data/examples/classifiers/naive_bayes_data.csv +11 -0
  11. data/examples/classifiers/naive_bayes_example.rb +16 -0
  12. data/examples/classifiers/results.txt +31 -0
  13. data/examples/genetic_algorithm/genetic_algorithm_example.rb +37 -0
  14. data/examples/genetic_algorithm/travel_cost.csv +16 -0
  15. data/examples/neural_network/backpropagation_example.rb +67 -0
  16. data/examples/neural_network/patterns_with_base_noise.rb +68 -0
  17. data/examples/neural_network/patterns_with_noise.rb +66 -0
  18. data/examples/neural_network/training_patterns.rb +68 -0
  19. data/examples/neural_network/xor_example.rb +35 -0
  20. data/examples/som/som_data.rb +156 -0
  21. data/examples/som/som_multi_node_example.rb +22 -0
  22. data/examples/som/som_single_example.rb +24 -0
  23. data/lib/ai4r.rb +32 -0
  24. data/lib/ai4r/classifiers/classifier.rb +59 -0
  25. data/lib/ai4r/classifiers/hyperpipes.rb +118 -0
  26. data/lib/ai4r/classifiers/id3.rb +326 -0
  27. data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
  28. data/lib/ai4r/classifiers/naive_bayes.rb +259 -0
  29. data/lib/ai4r/classifiers/one_r.rb +110 -0
  30. data/lib/ai4r/classifiers/prism.rb +197 -0
  31. data/lib/ai4r/classifiers/zero_r.rb +73 -0
  32. data/lib/ai4r/clusterers/average_linkage.rb +59 -0
  33. data/lib/ai4r/clusterers/bisecting_k_means.rb +93 -0
  34. data/lib/ai4r/clusterers/centroid_linkage.rb +66 -0
  35. data/lib/ai4r/clusterers/clusterer.rb +61 -0
  36. data/lib/ai4r/clusterers/complete_linkage.rb +67 -0
  37. data/lib/ai4r/clusterers/diana.rb +139 -0
  38. data/lib/ai4r/clusterers/k_means.rb +126 -0
  39. data/lib/ai4r/clusterers/median_linkage.rb +61 -0
  40. data/lib/ai4r/clusterers/single_linkage.rb +194 -0
  41. data/lib/ai4r/clusterers/ward_linkage.rb +64 -0
  42. data/lib/ai4r/clusterers/weighted_average_linkage.rb +61 -0
  43. data/lib/ai4r/data/data_set.rb +266 -0
  44. data/lib/ai4r/data/parameterizable.rb +64 -0
  45. data/lib/ai4r/data/proximity.rb +100 -0
  46. data/lib/ai4r/data/statistics.rb +77 -0
  47. data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
  48. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +270 -0
  49. data/lib/ai4r/neural_network/backpropagation.rb +293 -0
  50. data/lib/ai4r/neural_network/hopfield.rb +149 -0
  51. data/lib/ai4r/som/layer.rb +68 -0
  52. data/lib/ai4r/som/node.rb +96 -0
  53. data/lib/ai4r/som/som.rb +155 -0
  54. data/lib/ai4r/som/two_phase_layer.rb +90 -0
  55. data/site/forrest.properties +152 -0
  56. data/site/forrest.properties.dispatcher.properties +25 -0
  57. data/site/forrest.properties.xml +29 -0
  58. data/site/src/documentation/README.txt +7 -0
  59. data/site/src/documentation/classes/CatalogManager.properties +62 -0
  60. data/site/src/documentation/content/locationmap.xml +72 -0
  61. data/site/src/documentation/content/xdocs/downloads.html +9 -0
  62. data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +294 -0
  63. data/site/src/documentation/content/xdocs/index.xml +155 -0
  64. data/site/src/documentation/content/xdocs/machineLearning.xml +131 -0
  65. data/site/src/documentation/content/xdocs/neuralNetworks.xml +270 -0
  66. data/site/src/documentation/content/xdocs/site.xml +54 -0
  67. data/site/src/documentation/content/xdocs/sourceCode.xml +43 -0
  68. data/site/src/documentation/content/xdocs/tabs.xml +35 -0
  69. data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
  70. data/site/src/documentation/resources/images/c.png +0 -0
  71. data/site/src/documentation/resources/images/c_wbn.png +0 -0
  72. data/site/src/documentation/resources/images/c_wn.png +0 -0
  73. data/site/src/documentation/resources/images/ellipse-2.svg +30 -0
  74. data/site/src/documentation/resources/images/ero.gif +0 -0
  75. data/site/src/documentation/resources/images/europe2.png +0 -0
  76. data/site/src/documentation/resources/images/europe3.png +0 -0
  77. data/site/src/documentation/resources/images/fitness.png +0 -0
  78. data/site/src/documentation/resources/images/genetic_algorithms_example.png +0 -0
  79. data/site/src/documentation/resources/images/icon-a.png +0 -0
  80. data/site/src/documentation/resources/images/icon-b.png +0 -0
  81. data/site/src/documentation/resources/images/icon.png +0 -0
  82. data/site/src/documentation/resources/images/jadeferret.png +0 -0
  83. data/site/src/documentation/resources/images/my_email.png +0 -0
  84. data/site/src/documentation/resources/images/neural_network_example.png +0 -0
  85. data/site/src/documentation/resources/images/project-logo.png +0 -0
  86. data/site/src/documentation/resources/images/rubyforge.png +0 -0
  87. data/site/src/documentation/resources/images/s.png +0 -0
  88. data/site/src/documentation/resources/images/s_wbn.png +0 -0
  89. data/site/src/documentation/resources/images/s_wn.png +0 -0
  90. data/site/src/documentation/resources/images/sigmoid.png +0 -0
  91. data/site/src/documentation/resources/images/sub-dir/icon-c.png +0 -0
  92. data/site/src/documentation/resources/images/t.png +0 -0
  93. data/site/src/documentation/resources/images/t_wbn.png +0 -0
  94. data/site/src/documentation/resources/images/t_wn.png +0 -0
  95. data/site/src/documentation/resources/schema/catalog.xcat +29 -0
  96. data/site/src/documentation/resources/schema/hello-v10.dtd +51 -0
  97. data/site/src/documentation/resources/schema/symbols-project-v10.ent +26 -0
  98. data/site/src/documentation/resources/stylesheets/hello2document.xsl +33 -0
  99. data/site/src/documentation/sitemap.xmap +66 -0
  100. data/site/src/documentation/skinconf.xml +418 -0
  101. data/site/src/documentation/translations/langcode.xml +29 -0
  102. data/site/src/documentation/translations/languages_de.xml +24 -0
  103. data/site/src/documentation/translations/languages_en.xml +24 -0
  104. data/site/src/documentation/translations/languages_es.xml +22 -0
  105. data/site/src/documentation/translations/languages_fr.xml +24 -0
  106. data/site/src/documentation/translations/languages_nl.xml +24 -0
  107. data/site/src/documentation/translations/menu.xml +33 -0
  108. data/site/src/documentation/translations/menu_af.xml +33 -0
  109. data/site/src/documentation/translations/menu_de.xml +33 -0
  110. data/site/src/documentation/translations/menu_es.xml +33 -0
  111. data/site/src/documentation/translations/menu_fr.xml +33 -0
  112. data/site/src/documentation/translations/menu_it.xml +33 -0
  113. data/site/src/documentation/translations/menu_nl.xml +33 -0
  114. data/site/src/documentation/translations/menu_no.xml +33 -0
  115. data/site/src/documentation/translations/menu_ru.xml +33 -0
  116. data/site/src/documentation/translations/menu_sk.xml +33 -0
  117. data/site/src/documentation/translations/tabs.xml +22 -0
  118. data/site/src/documentation/translations/tabs_de.xml +22 -0
  119. data/site/src/documentation/translations/tabs_es.xml +22 -0
  120. data/site/src/documentation/translations/tabs_fr.xml +22 -0
  121. data/site/src/documentation/translations/tabs_nl.xml +22 -0
  122. data/test/classifiers/hyperpipes_test.rb +84 -0
  123. data/test/classifiers/id3_test.rb +208 -0
  124. data/test/classifiers/multilayer_perceptron_test.rb +79 -0
  125. data/test/classifiers/naive_bayes_test.rb +43 -0
  126. data/test/classifiers/one_r_test.rb +62 -0
  127. data/test/classifiers/prism_test.rb +85 -0
  128. data/test/classifiers/zero_r_test.rb +50 -0
  129. data/test/clusterers/average_linkage_test.rb +51 -0
  130. data/test/clusterers/bisecting_k_means_test.rb +66 -0
  131. data/test/clusterers/centroid_linkage_test.rb +53 -0
  132. data/test/clusterers/complete_linkage_test.rb +57 -0
  133. data/test/clusterers/diana_test.rb +69 -0
  134. data/test/clusterers/k_means_test.rb +100 -0
  135. data/test/clusterers/median_linkage_test.rb +53 -0
  136. data/test/clusterers/single_linkage_test.rb +122 -0
  137. data/test/clusterers/ward_linkage_test.rb +53 -0
  138. data/test/clusterers/weighted_average_linkage_test.rb +53 -0
  139. data/test/data/data_set.csv +121 -0
  140. data/test/data/data_set_test.rb +96 -0
  141. data/test/data/proximity_test.rb +81 -0
  142. data/test/data/statistics_data_set.csv +5 -0
  143. data/test/data/statistics_test.rb +65 -0
  144. data/test/experiment/classifier_evaluator_test.rb +76 -0
  145. data/test/genetic_algorithm/chromosome_test.rb +58 -0
  146. data/test/genetic_algorithm/genetic_algorithm_test.rb +81 -0
  147. data/test/neural_network/backpropagation_test.rb +69 -0
  148. data/test/neural_network/hopfield_test.rb +72 -0
  149. data/test/som/som_test.rb +97 -0
  150. metadata +238 -0
@@ -0,0 +1,61 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/parameterizable'
11
+
12
+ module Ai4r
13
+ module Clusterers
14
+
15
+ # The purpose of this class is to define a common API for Clusterers.
16
+ # All methods in this class (other than eval) must be implemented in
17
+ # subclasses.
18
+ class Clusterer
19
+
20
+ include Ai4r::Data::Parameterizable
21
+
22
+ # Build a new clusterer, using data examples found in data_set.
23
+ # Data items will be clustered in "number_of_clusters" different
24
+ # clusters.
25
+ def build(data_set, number_of_clusters)
26
+ raise NotImplementedError
27
+ end
28
+
29
+ # Classifies the given data item, returning the cluster it belongs to.
30
+ def eval(data_item)
31
+ raise NotImplementedError
32
+ end
33
+
34
+ protected
35
+ # Usefull as a defult distance function for clustering algorithms
36
+ def euclidean_distance(a, b)
37
+ dist = 0.0
38
+ a.each_index do |index|
39
+ if a[index].is_a?(Numeric) && b[index].is_a?(Numeric)
40
+ dist = dist + ((a[index]-b[index])*(a[index]-b[index]))
41
+ end
42
+ end
43
+ return dist
44
+ end
45
+
46
+ def get_min_index(array)
47
+ min = array.first
48
+ index = 0
49
+ array.each_index do |i|
50
+ x = array[i]
51
+ if x < min
52
+ min = x
53
+ index = i
54
+ end
55
+ end
56
+ return index
57
+ end
58
+
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,67 @@
1
+ # Author:: Sergio Fierens (implementation)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/data_set'
11
+ require File.dirname(__FILE__) + '/../clusterers/single_linkage'
12
+
13
+ module Ai4r
14
+ module Clusterers
15
+
16
+ # Implementation of a Hierarchical clusterer with complete linkage (Everitt
17
+ # et al., 2001 ; Jain and Dubes, 1988 ; Sorensen, 1948 ).
18
+ # Hierarchical clusteres create one cluster per element, and then
19
+ # progressively merge clusters, until the required number of clusters
20
+ # is reached.
21
+ # With complete linkage, the distance between two clusters is computed as
22
+ # the maximum distance between elements of each cluster.
23
+ #
24
+ # D(cx, (ci U cj) = max(D(cx, ci), D(cx, cj))
25
+ class CompleteLinkage < SingleLinkage
26
+
27
+ parameters_info :distance_function =>
28
+ "Custom implementation of distance function. " +
29
+ "It must be a closure receiving two data items and return the " +
30
+ "distance bewteen them. By default, this algorithm uses " +
31
+ "ecuclidean distance of numeric attributes to the power of 2."
32
+
33
+
34
+ # Build a new clusterer, using data examples found in data_set.
35
+ # Items will be clustered in "number_of_clusters" different
36
+ # clusters.
37
+ def build(data_set, number_of_clusters)
38
+ super
39
+ end
40
+
41
+ # Classifies the given data item, returning the cluster index it belongs
42
+ # to (0-based).
43
+ def eval(data_item)
44
+ super
45
+ end
46
+
47
+ protected
48
+
49
+ # return distance between cluster cx and new cluster (ci U cj),
50
+ # using complete linkage
51
+ def linkage_distance(cx, ci, cj)
52
+ [read_distance_matrix(cx, ci),
53
+ read_distance_matrix(cx, cj)].max
54
+ end
55
+
56
+ def distance_between_item_and_cluster(data_item, cluster)
57
+ max_dist = 0
58
+ cluster.data_items.each do |another_item|
59
+ dist = @distance_function.call(data_item, another_item)
60
+ max_dist = dist if dist > max_dist
61
+ end
62
+ return max_dist
63
+ end
64
+
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,139 @@
1
+ # Author:: Sergio Fierens (implementation)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/data_set'
11
+ require File.dirname(__FILE__) + '/../data/proximity'
12
+ require File.dirname(__FILE__) + '/../clusterers/clusterer'
13
+
14
+ module Ai4r
15
+ module Clusterers
16
+
17
+ # DIANA (Divisive ANAlysis) (Kaufman and Rousseeuw, 1990;
18
+ # Macnaughton - Smith et al. 1964) is a Divisive Hierarchical
19
+ # Clusterer. It begins with only one cluster with all data items,
20
+ # and divides the clusters until the desired clusters number is reached.
21
+ class Diana < Clusterer
22
+
23
+ attr_reader :data_set, :number_of_clusters, :clusters
24
+
25
+ parameters_info :distance_function =>
26
+ "Custom implementation of distance function. " +
27
+ "It must be a closure receiving two data items and return the " +
28
+ "distance bewteen them. By default, this algorithm uses " +
29
+ "ecuclidean distance of numeric attributes to the power of 2."
30
+
31
+ def initialize
32
+ @distance_function = lambda do |a,b|
33
+ Ai4r::Data::Proximity.squared_euclidean_distance(
34
+ a.select {|att_a| att_a.is_a? Numeric} ,
35
+ b.select {|att_b| att_b.is_a? Numeric})
36
+ end
37
+ end
38
+
39
+ # Build a new clusterer, using divisive analysis (DIANA algorithm)
40
+ def build(data_set, number_of_clusters)
41
+ @data_set = data_set
42
+ @number_of_clusters = number_of_clusters
43
+ @clusters = [@data_set[0..-1]]
44
+
45
+ while(@clusters.length < @number_of_clusters)
46
+ cluster_index_to_split = max_diameter_cluster(@clusters)
47
+ cluster_to_split = @clusters[cluster_index_to_split]
48
+ splinter_cluster = init_splinter_cluster(cluster_to_split)
49
+ while true
50
+ dist_diff, index = max_distance_difference(cluster_to_split, splinter_cluster)
51
+ break if dist_diff < 0
52
+ splinter_cluster << cluster_to_split.data_items[index]
53
+ cluster_to_split.data_items.delete_at(index)
54
+ end
55
+ @clusters << splinter_cluster
56
+ end
57
+
58
+ return self
59
+ end
60
+
61
+ # Classifies the given data item, returning the cluster index it belongs
62
+ # to (0-based).
63
+ def eval(data_item)
64
+ get_min_index(@clusters.collect do |cluster|
65
+ distance_sum(data_item, cluster) / cluster.data_items.length
66
+ end)
67
+ end
68
+
69
+ protected
70
+
71
+ # return the cluster with max diameter
72
+ def max_diameter_cluster(clusters)
73
+ max_index = 0
74
+ max_diameter = 0
75
+ clusters.each_with_index do |cluster, index|
76
+ diameter = cluster_diameter(cluster)
77
+ if diameter > max_diameter
78
+ max_index = index
79
+ max_diameter = diameter
80
+ end
81
+ end
82
+ return max_index
83
+ end
84
+
85
+ # Max distance between 2 items in a cluster
86
+ def cluster_diameter(cluster)
87
+ diameter = 0
88
+ cluster.data_items.each_with_index do |item_a, item_a_pos|
89
+ item_a_pos.times do |item_b_pos|
90
+ d = @distance_function.call(item_a, cluster.data_items[item_b_pos])
91
+ diameter = d if d > diameter
92
+ end
93
+ end
94
+ return diameter
95
+ end
96
+
97
+ # Create a cluster with the item with mx distance
98
+ # to the rest of the cluster's items.
99
+ # That item is removed from the initial cluster.
100
+ def init_splinter_cluster(cluster_to_split)
101
+ max = 0.0
102
+ max_index = 0
103
+ cluster_to_split.data_items.each_with_index do |item, index|
104
+ sum = distance_sum(item, cluster_to_split)
105
+ max, max_index = sum, index if sum > max
106
+ end
107
+ splinter_cluster = cluster_to_split[max_index]
108
+ cluster_to_split.data_items.delete_at(max_index)
109
+ return splinter_cluster
110
+ end
111
+
112
+ # Return the max average distance between any item of
113
+ # cluster_to_split and the rest of items in that cluster,
114
+ # minus the average distance with the items of splinter_cluster,
115
+ # and the index of the item.
116
+ # A positive value means that the items is closer to the
117
+ # splinter group than to its current cluster.
118
+ def max_distance_difference(cluster_to_split, splinter_cluster)
119
+ max_diff = -1.0/0
120
+ max_diff_index = 0
121
+ cluster_to_split.data_items.each_with_index do |item, index|
122
+ dist_a = distance_sum(item, cluster_to_split) / (cluster_to_split.data_items.length-1)
123
+ dist_b = distance_sum(item, splinter_cluster) / (splinter_cluster.data_items.length)
124
+ dist_diff = dist_a - dist_b
125
+ max_diff, max_diff_index = dist_diff, index if dist_diff > max_diff
126
+ end
127
+ return max_diff, max_diff_index
128
+ end
129
+
130
+ # Sum up the distance between an item and all the items in a cluster
131
+ def distance_sum(item_a, cluster)
132
+ cluster.data_items.inject(0.0) do |sum, item_b|
133
+ sum + @distance_function.call(item_a, item_b)
134
+ end
135
+ end
136
+
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,126 @@
1
+ # Author:: Sergio Fierens (implementation)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/data_set'
11
+ require File.dirname(__FILE__) + '/../clusterers/clusterer'
12
+
13
+ module Ai4r
14
+ module Clusterers
15
+
16
+ # The k-means algorithm is an algorithm to cluster n objects
17
+ # based on attributes into k partitions, with k < n.
18
+ #
19
+ # More about K Means algorithm:
20
+ # http://en.wikipedia.org/wiki/K-means_algorithm
21
+ class KMeans < Clusterer
22
+
23
+ attr_reader :data_set, :number_of_clusters
24
+ attr_reader :clusters, :centroids, :iterations
25
+
26
+ parameters_info :max_iterations => "Maximum number of iterations to " +
27
+ "build the clusterer. By default it is uncapped.",
28
+ :distance_function => "Custom implementation of distance function. " +
29
+ "It must be a closure receiving two data items and return the " +
30
+ "distance bewteen them. By default, this algorithm uses " +
31
+ "ecuclidean distance of numeric attributes to the power of 2.",
32
+ :centroid_function => "Custom implementation to calculate the " +
33
+ "centroid of a cluster. It must be a closure receiving an array of " +
34
+ "data sets, and return an array of data items, representing the " +
35
+ "centroids of for each data set. " +
36
+ "By default, this algorithm returns a data items using the mode "+
37
+ "or mean of each attribute on each data set."
38
+
39
+ def initialize
40
+ @distance_function = nil
41
+ @max_iterations = nil
42
+ @old_centroids = nil
43
+ @centroid_function = lambda do |data_sets|
44
+ data_sets.collect{ |data_set| data_set.get_mean_or_mode}
45
+ end
46
+ end
47
+
48
+
49
+ # Build a new clusterer, using data examples found in data_set.
50
+ # Items will be clustered in "number_of_clusters" different
51
+ # clusters.
52
+ def build(data_set, number_of_clusters)
53
+ @data_set = data_set
54
+ @number_of_clusters = number_of_clusters
55
+ @iterations = 0
56
+
57
+ calc_initial_centroids
58
+ while(not stop_criteria_met)
59
+ calculate_membership_clusters
60
+ recompute_centroids
61
+ end
62
+
63
+ return self
64
+ end
65
+
66
+ # Classifies the given data item, returning the cluster index it belongs
67
+ # to (0-based).
68
+ def eval(data_item)
69
+ get_min_index(@centroids.collect {|centroid|
70
+ distance(data_item, centroid)})
71
+ end
72
+
73
+ # This function calculates the distance between 2 different
74
+ # instances. By default, it returns the euclidean distance to the
75
+ # power of 2.
76
+ # You can provide a more convinient distance implementation:
77
+ #
78
+ # 1- Overwriting this method
79
+ #
80
+ # 2- Providing a closure to the :distance_function parameter
81
+ def distance(a, b)
82
+ return @distance_function.call(a, b) if @distance_function
83
+ return euclidean_distance(a, b)
84
+ end
85
+
86
+ protected
87
+
88
+ def calc_initial_centroids
89
+ @centroids = []
90
+ tried_indexes = []
91
+ while @centroids.length < @number_of_clusters &&
92
+ tried_indexes.length < @data_set.data_items.length
93
+ random_index = rand(@data_set.data_items.length)
94
+ if !tried_indexes.include?(random_index)
95
+ tried_indexes << random_index
96
+ if !@centroids.include? @data_set.data_items[random_index]
97
+ @centroids << @data_set.data_items[random_index]
98
+ end
99
+ end
100
+ end
101
+ @number_of_clusters = @centroids.length
102
+ end
103
+
104
+ def stop_criteria_met
105
+ @old_centroids == @centroids ||
106
+ (@max_iterations && (@max_iterations <= @iterations))
107
+ end
108
+
109
+ def calculate_membership_clusters
110
+ @clusters = Array.new(@number_of_clusters) do
111
+ Ai4r::Data::DataSet.new :data_labels => @data_set.data_labels
112
+ end
113
+ @data_set.data_items.each do |data_item|
114
+ @clusters[eval(data_item)] << data_item
115
+ end
116
+ end
117
+
118
+ def recompute_centroids
119
+ @old_centroids = @centroids
120
+ @iterations += 1
121
+ @centroids = @centroid_function.call(@clusters)
122
+ end
123
+
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,61 @@
1
+ # Author:: Sergio Fierens (implementation)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/data_set'
11
+ require File.dirname(__FILE__) + '/../clusterers/single_linkage'
12
+
13
+ module Ai4r
14
+ module Clusterers
15
+
16
+ # Implementation of an Agglomerative Hierarchical clusterer with
17
+ # median linkage algorithm, aka weighted pair group method centroid
18
+ # or WPGMC (Everitt et al., 2001 ; Gower, 1967 ; Jain and Dubes, 1988 ).
19
+ # Hierarchical clusteres create one cluster per element, and then
20
+ # progressively merge clusters, until the required number of clusters
21
+ # is reached.
22
+ # Similar to centroid linkages, but using fix weight:
23
+ #
24
+ # D(cx, (ci U cj)) = (1/2)*D(cx, ci) +
25
+ # (1/2)*D(cx, cj) -
26
+ # (1/4)*D(ci, cj)
27
+ class MedianLinkage < SingleLinkage
28
+
29
+ parameters_info :distance_function =>
30
+ "Custom implementation of distance function. " +
31
+ "It must be a closure receiving two data items and return the " +
32
+ "distance bewteen them. By default, this algorithm uses " +
33
+ "ecuclidean distance of numeric attributes to the power of 2."
34
+
35
+ # Build a new clusterer, using data examples found in data_set.
36
+ # Items will be clustered in "number_of_clusters" different
37
+ # clusters.
38
+ def build(data_set, number_of_clusters)
39
+ super
40
+ end
41
+
42
+ # This algorithms does not allow classification of new data items
43
+ # once it has been built. Rebuild the cluster including you data element.
44
+ def eval(data_item)
45
+ Raise "Eval of new data is not supported by this algorithm."
46
+ end
47
+
48
+ protected
49
+
50
+ # return distance between cluster cx and cluster (ci U cj),
51
+ # using median linkage
52
+ def linkage_distance(cx, ci, cj)
53
+ ( 0.5 * read_distance_matrix(cx, ci) +
54
+ 0.5 * read_distance_matrix(cx, cj) -
55
+ 0.25 * read_distance_matrix(ci, cj))
56
+ end
57
+
58
+ end
59
+ end
60
+ end
61
+