ai4r 1.5 → 1.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (216) hide show
  1. data/examples/clusterers/simple_website_clustering.rb +47 -0
  2. data/lib/ai4r.rb +7 -0
  3. data/lib/ai4r/classifiers/hyperpipes.rb +118 -0
  4. data/lib/ai4r/clusterers/average_linkage.rb +22 -23
  5. data/lib/ai4r/clusterers/centroid_linkage.rb +66 -0
  6. data/lib/ai4r/clusterers/complete_linkage.rb +17 -12
  7. data/lib/ai4r/clusterers/diana.rb +139 -0
  8. data/lib/ai4r/clusterers/median_linkage.rb +61 -0
  9. data/lib/ai4r/clusterers/single_linkage.rb +57 -42
  10. data/lib/ai4r/clusterers/ward_linkage.rb +64 -0
  11. data/lib/ai4r/clusterers/weighted_average_linkage.rb +61 -0
  12. data/lib/ai4r/data/constants.rb +18 -0
  13. data/lib/ai4r/data/data_set.rb +5 -3
  14. data/lib/ai4r/data/proximity.rb +18 -0
  15. data/test/clusterers/average_linkage_test.rb +14 -11
  16. data/test/clusterers/bisecting_k_means_test.rb +9 -0
  17. data/test/clusterers/centroid_linkage_test.rb +50 -0
  18. data/test/clusterers/complete_linkage_test.rb +14 -5
  19. data/test/clusterers/diana_test.rb +69 -0
  20. data/test/clusterers/k_means_test.rb +9 -0
  21. data/test/clusterers/median_linkage_test.rb +50 -0
  22. data/test/clusterers/single_linkage_test.rb +15 -6
  23. data/test/clusterers/ward_linkage_test.rb +50 -0
  24. data/test/clusterers/weighted_average_linkage_test.rb +50 -0
  25. data/test/data/data_set_test.rb +14 -0
  26. data/test/data/proximity_test.rb +10 -0
  27. metadata +87 -298
  28. data/site/build/site/en/broken-links.xml +0 -2
  29. data/site/build/site/en/build/tmp/build-info.xml +0 -5
  30. data/site/build/site/en/build/tmp/plugins-1.xml +0 -212
  31. data/site/build/site/en/build/tmp/plugins-2.xml +0 -252
  32. data/site/build/site/en/build/tmp/projfilters.properties +0 -41
  33. data/site/build/site/en/downloads.html +0 -200
  34. data/site/build/site/en/downloads.pdf +0 -151
  35. data/site/build/site/en/geneticAlgorithms.html +0 -591
  36. data/site/build/site/en/geneticAlgorithms.pdf +0 -934
  37. data/site/build/site/en/images/ai4r-logo.png +0 -0
  38. data/site/build/site/en/images/built-with-forrest-button.png +0 -0
  39. data/site/build/site/en/images/c.png +0 -0
  40. data/site/build/site/en/images/c_wbn.png +0 -0
  41. data/site/build/site/en/images/c_wn.png +0 -0
  42. data/site/build/site/en/images/ero.gif +0 -0
  43. data/site/build/site/en/images/europe2.png +0 -0
  44. data/site/build/site/en/images/europe3.png +0 -0
  45. data/site/build/site/en/images/fitness.png +0 -0
  46. data/site/build/site/en/images/genetic_algorithms_example.png +0 -0
  47. data/site/build/site/en/images/instruction_arrow.png +0 -0
  48. data/site/build/site/en/images/jadeferret.png +0 -0
  49. data/site/build/site/en/images/my_email.png +0 -0
  50. data/site/build/site/en/images/neural_network_example.png +0 -0
  51. data/site/build/site/en/images/rubyforge.png +0 -0
  52. data/site/build/site/en/images/s.png +0 -0
  53. data/site/build/site/en/images/s_wbn.png +0 -0
  54. data/site/build/site/en/images/s_wn.png +0 -0
  55. data/site/build/site/en/images/sigmoid.png +0 -0
  56. data/site/build/site/en/images/t.png +0 -0
  57. data/site/build/site/en/images/t_wbn.png +0 -0
  58. data/site/build/site/en/images/t_wn.png +0 -0
  59. data/site/build/site/en/index.html +0 -390
  60. data/site/build/site/en/index.pdf +0 -657
  61. data/site/build/site/en/linkmap.html +0 -261
  62. data/site/build/site/en/linkmap.pdf +0 -94
  63. data/site/build/site/en/locationmap.xml +0 -72
  64. data/site/build/site/en/machineLearning.html +0 -340
  65. data/site/build/site/en/machineLearning.pdf +0 -337
  66. data/site/build/site/en/neuralNetworks.html +0 -521
  67. data/site/build/site/en/neuralNetworks.pdf +0 -671
  68. data/site/build/site/en/skin/CommonMessages_de.xml +0 -23
  69. data/site/build/site/en/skin/CommonMessages_en_US.xml +0 -23
  70. data/site/build/site/en/skin/CommonMessages_es.xml +0 -23
  71. data/site/build/site/en/skin/CommonMessages_fr.xml +0 -23
  72. data/site/build/site/en/skin/basic.css +0 -166
  73. data/site/build/site/en/skin/breadcrumbs-optimized.js +0 -90
  74. data/site/build/site/en/skin/breadcrumbs.js +0 -237
  75. data/site/build/site/en/skin/fontsize.js +0 -166
  76. data/site/build/site/en/skin/getBlank.js +0 -40
  77. data/site/build/site/en/skin/getMenu.js +0 -45
  78. data/site/build/site/en/skin/images/README.txt +0 -1
  79. data/site/build/site/en/skin/images/add.jpg +0 -0
  80. data/site/build/site/en/skin/images/built-with-forrest-button.png +0 -0
  81. data/site/build/site/en/skin/images/chapter.gif +0 -0
  82. data/site/build/site/en/skin/images/chapter_open.gif +0 -0
  83. data/site/build/site/en/skin/images/current.gif +0 -0
  84. data/site/build/site/en/skin/images/error.png +0 -0
  85. data/site/build/site/en/skin/images/external-link.gif +0 -0
  86. data/site/build/site/en/skin/images/fix.jpg +0 -0
  87. data/site/build/site/en/skin/images/forrest-credit-logo.png +0 -0
  88. data/site/build/site/en/skin/images/hack.jpg +0 -0
  89. data/site/build/site/en/skin/images/header_white_line.gif +0 -0
  90. data/site/build/site/en/skin/images/info.png +0 -0
  91. data/site/build/site/en/skin/images/instruction_arrow.png +0 -0
  92. data/site/build/site/en/skin/images/label.gif +0 -0
  93. data/site/build/site/en/skin/images/page.gif +0 -0
  94. data/site/build/site/en/skin/images/pdfdoc.gif +0 -0
  95. data/site/build/site/en/skin/images/poddoc.png +0 -0
  96. data/site/build/site/en/skin/images/printer.gif +0 -0
  97. data/site/build/site/en/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  98. data/site/build/site/en/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  99. data/site/build/site/en/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  100. data/site/build/site/en/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  101. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  102. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  103. data/site/build/site/en/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  104. data/site/build/site/en/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  105. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  106. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  107. data/site/build/site/en/skin/images/remove.jpg +0 -0
  108. data/site/build/site/en/skin/images/rss.png +0 -0
  109. data/site/build/site/en/skin/images/spacer.gif +0 -0
  110. data/site/build/site/en/skin/images/success.png +0 -0
  111. data/site/build/site/en/skin/images/txtdoc.png +0 -0
  112. data/site/build/site/en/skin/images/update.jpg +0 -0
  113. data/site/build/site/en/skin/images/valid-html401.png +0 -0
  114. data/site/build/site/en/skin/images/vcss.png +0 -0
  115. data/site/build/site/en/skin/images/warning.png +0 -0
  116. data/site/build/site/en/skin/images/xmldoc.gif +0 -0
  117. data/site/build/site/en/skin/menu.js +0 -48
  118. data/site/build/site/en/skin/note.txt +0 -50
  119. data/site/build/site/en/skin/print.css +0 -54
  120. data/site/build/site/en/skin/profile.css +0 -163
  121. data/site/build/site/en/skin/prototype.js +0 -1257
  122. data/site/build/site/en/skin/screen.css +0 -587
  123. data/site/build/site/en/sourceCode.html +0 -244
  124. data/site/build/site/en/sourceCode.pdf +0 -278
  125. data/site/build/site/en/svn.html +0 -244
  126. data/site/build/site/en/svn.pdf +0 -278
  127. data/site/build/tmp/brokenlinks.xml +0 -2
  128. data/site/build/tmp/build-info.xml +0 -5
  129. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
  130. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
  131. data/site/build/tmp/input.xmap +0 -32
  132. data/site/build/tmp/internal.xmap +0 -32
  133. data/site/build/tmp/locationmap.xml +0 -29
  134. data/site/build/tmp/output.xmap +0 -38
  135. data/site/build/tmp/pluginlist2fetchbuild.xml +0 -144
  136. data/site/build/tmp/plugins-1.xml +0 -201
  137. data/site/build/tmp/plugins-2.xml +0 -401
  138. data/site/build/tmp/projfilters.properties +0 -41
  139. data/site/build/tmp/resources.xmap +0 -32
  140. data/site/build/webapp/WEB-INF/logs/access.log +0 -0
  141. data/site/build/webapp/WEB-INF/logs/core.log +0 -775
  142. data/site/build/webapp/WEB-INF/logs/debug.log +0 -0
  143. data/site/build/webapp/WEB-INF/logs/error.log +0 -213
  144. data/site/build/webapp/WEB-INF/logs/flow.log +0 -0
  145. data/site/build/webapp/WEB-INF/logs/idgen.log +0 -0
  146. data/site/build/webapp/WEB-INF/logs/linkrewriter.log +0 -0
  147. data/site/build/webapp/WEB-INF/logs/locationmap.log +0 -0
  148. data/site/build/webapp/WEB-INF/logs/sitemap.log +0 -0
  149. data/site/build/webapp/WEB-INF/logs/xmlform.log +0 -0
  150. data/site/forrest.properties +0 -152
  151. data/site/forrest.properties.dispatcher.properties +0 -25
  152. data/site/forrest.properties.xml +0 -29
  153. data/site/src/documentation/README.txt +0 -7
  154. data/site/src/documentation/classes/CatalogManager.properties +0 -62
  155. data/site/src/documentation/content/locationmap.xml +0 -72
  156. data/site/src/documentation/content/xdocs/downloads.html +0 -9
  157. data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +0 -294
  158. data/site/src/documentation/content/xdocs/index.xml +0 -129
  159. data/site/src/documentation/content/xdocs/machineLearning.xml +0 -131
  160. data/site/src/documentation/content/xdocs/neuralNetworks.xml +0 -270
  161. data/site/src/documentation/content/xdocs/site.xml +0 -54
  162. data/site/src/documentation/content/xdocs/sourceCode.xml +0 -43
  163. data/site/src/documentation/content/xdocs/tabs.xml +0 -35
  164. data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
  165. data/site/src/documentation/resources/images/c.png +0 -0
  166. data/site/src/documentation/resources/images/c_wbn.png +0 -0
  167. data/site/src/documentation/resources/images/c_wn.png +0 -0
  168. data/site/src/documentation/resources/images/ellipse-2.svg +0 -30
  169. data/site/src/documentation/resources/images/ero.gif +0 -0
  170. data/site/src/documentation/resources/images/europe2.png +0 -0
  171. data/site/src/documentation/resources/images/europe3.png +0 -0
  172. data/site/src/documentation/resources/images/fitness.png +0 -0
  173. data/site/src/documentation/resources/images/genetic_algorithms_example.png +0 -0
  174. data/site/src/documentation/resources/images/icon-a.png +0 -0
  175. data/site/src/documentation/resources/images/icon-b.png +0 -0
  176. data/site/src/documentation/resources/images/icon.png +0 -0
  177. data/site/src/documentation/resources/images/jadeferret.png +0 -0
  178. data/site/src/documentation/resources/images/my_email.png +0 -0
  179. data/site/src/documentation/resources/images/neural_network_example.png +0 -0
  180. data/site/src/documentation/resources/images/project-logo.png +0 -0
  181. data/site/src/documentation/resources/images/rubyforge.png +0 -0
  182. data/site/src/documentation/resources/images/s.png +0 -0
  183. data/site/src/documentation/resources/images/s_wbn.png +0 -0
  184. data/site/src/documentation/resources/images/s_wn.png +0 -0
  185. data/site/src/documentation/resources/images/sigmoid.png +0 -0
  186. data/site/src/documentation/resources/images/sub-dir/icon-c.png +0 -0
  187. data/site/src/documentation/resources/images/t.png +0 -0
  188. data/site/src/documentation/resources/images/t_wbn.png +0 -0
  189. data/site/src/documentation/resources/images/t_wn.png +0 -0
  190. data/site/src/documentation/resources/schema/catalog.xcat +0 -29
  191. data/site/src/documentation/resources/schema/hello-v10.dtd +0 -51
  192. data/site/src/documentation/resources/schema/symbols-project-v10.ent +0 -26
  193. data/site/src/documentation/resources/stylesheets/hello2document.xsl +0 -33
  194. data/site/src/documentation/sitemap.xmap +0 -66
  195. data/site/src/documentation/skinconf.xml +0 -418
  196. data/site/src/documentation/translations/langcode.xml +0 -29
  197. data/site/src/documentation/translations/languages_de.xml +0 -24
  198. data/site/src/documentation/translations/languages_en.xml +0 -24
  199. data/site/src/documentation/translations/languages_es.xml +0 -22
  200. data/site/src/documentation/translations/languages_fr.xml +0 -24
  201. data/site/src/documentation/translations/languages_nl.xml +0 -24
  202. data/site/src/documentation/translations/menu.xml +0 -33
  203. data/site/src/documentation/translations/menu_af.xml +0 -33
  204. data/site/src/documentation/translations/menu_de.xml +0 -33
  205. data/site/src/documentation/translations/menu_es.xml +0 -33
  206. data/site/src/documentation/translations/menu_fr.xml +0 -33
  207. data/site/src/documentation/translations/menu_it.xml +0 -33
  208. data/site/src/documentation/translations/menu_nl.xml +0 -33
  209. data/site/src/documentation/translations/menu_no.xml +0 -33
  210. data/site/src/documentation/translations/menu_ru.xml +0 -33
  211. data/site/src/documentation/translations/menu_sk.xml +0 -33
  212. data/site/src/documentation/translations/tabs.xml +0 -22
  213. data/site/src/documentation/translations/tabs_de.xml +0 -22
  214. data/site/src/documentation/translations/tabs_es.xml +0 -22
  215. data/site/src/documentation/translations/tabs_fr.xml +0 -22
  216. data/site/src/documentation/translations/tabs_nl.xml +0 -22
@@ -0,0 +1,61 @@
1
+ # Author:: Sergio Fierens (implementation)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/data_set'
11
+ require File.dirname(__FILE__) + '/../clusterers/single_linkage'
12
+
13
+ module Ai4r
14
+ module Clusterers
15
+
16
+ # Implementation of an Agglomerative Hierarchical clusterer with
17
+ # median linkage algorithm, aka weighted pair group method centroid
18
+ # or WPGMC (Everitt et al., 2001 ; Gower, 1967 ; Jain and Dubes, 1988 ).
19
+ # Hierarchical clusteres create one cluster per element, and then
20
+ # progressively merge clusters, until the required number of clusters
21
+ # is reached.
22
+ # Similar to centroid linkages, but using fix weight:
23
+ #
24
+ # D(cx, (ci U cj)) = (1/2)*D(cx, ci) +
25
+ # (1/2)*D(cx, cj) -
26
+ # (1/4)*D(ci, cj)
27
+ class MedianLinkage < SingleLinkage
28
+
29
+ parameters_info :distance_function =>
30
+ "Custom implementation of distance function. " +
31
+ "It must be a closure receiving two data items and return the " +
32
+ "distance bewteen them. By default, this algorithm uses " +
33
+ "ecuclidean distance of numeric attributes to the power of 2."
34
+
35
+ # Build a new clusterer, using data examples found in data_set.
36
+ # Items will be clustered in "number_of_clusters" different
37
+ # clusters.
38
+ def build(data_set, number_of_clusters)
39
+ super
40
+ end
41
+
42
+ # This algorithms does not allow classification of new data items
43
+ # once it has been built. Rebuild the cluster including you data element.
44
+ def eval(data_item)
45
+ Raise "Eval of new data is not supported by this algorithm."
46
+ end
47
+
48
+ protected
49
+
50
+ # return distance between cluster cx and cluster (ci U cj),
51
+ # using median linkage
52
+ def linkage_distance(cx, ci, cj)
53
+ ( 0.5 * read_distance_matrix(cx, ci) +
54
+ 0.5 * read_distance_matrix(cx, cj) -
55
+ 0.25 * read_distance_matrix(ci, cj))
56
+ end
57
+
58
+ end
59
+ end
60
+ end
61
+
@@ -8,17 +8,21 @@
8
8
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
9
 
10
10
  require File.dirname(__FILE__) + '/../data/data_set'
11
+ require File.dirname(__FILE__) + '/../data/proximity'
11
12
  require File.dirname(__FILE__) + '/../clusterers/clusterer'
12
13
 
13
14
  module Ai4r
14
15
  module Clusterers
15
16
 
16
- # Implementation of a Hierarchical clusterer with single linkage.
17
+ # Implementation of a Hierarchical clusterer with single linkage (Everitt et
18
+ # al., 2001 ; Johnson, 1967 ; Jain and Dubes, 1988 ; Sneath, 1957 )
17
19
  # Hierarchical clusteres create one cluster per element, and then
18
20
  # progressively merge clusters, until the required number of clusters
19
21
  # is reached.
20
22
  # With single linkage, the distance between two clusters is computed as the
21
23
  # distance between the two closest elements in the two clusters.
24
+ #
25
+ # D(cx, (ci U cj) = min(D(cx, ci), D(cx, cj))
22
26
  class SingleLinkage < Clusterer
23
27
 
24
28
  attr_reader :data_set, :number_of_clusters, :clusters
@@ -30,7 +34,11 @@ module Ai4r
30
34
  "ecuclidean distance of numeric attributes to the power of 2."
31
35
 
32
36
  def initialize
33
- @distance_function = nil
37
+ @distance_function = lambda do |a,b|
38
+ Ai4r::Data::Proximity.squared_euclidean_distance(
39
+ a.select {|att_a| att_a.is_a? Numeric} ,
40
+ b.select {|att_b| att_b.is_a? Numeric})
41
+ end
34
42
  end
35
43
 
36
44
  # Build a new clusterer, using data examples found in data_set.
@@ -40,13 +48,14 @@ module Ai4r
40
48
  @data_set = data_set
41
49
  @number_of_clusters = number_of_clusters
42
50
 
43
- index_clusters = create_initial_index_clusters
51
+ @index_clusters = create_initial_index_clusters
44
52
  create_distance_matrix(data_set)
45
- while index_clusters.length > @number_of_clusters
46
- clusters_to_merge = get_closest_clusters(index_clusters)
47
- index_clusters = merge_clusters(clusters_to_merge, index_clusters)
53
+ while @index_clusters.length > @number_of_clusters
54
+ ci, cj = get_closest_clusters(@index_clusters)
55
+ update_distance_matrix(ci, cj)
56
+ merge_clusters(ci, cj, @index_clusters)
48
57
  end
49
- @clusters = build_clusters_from_index_clusters index_clusters
58
+ @clusters = build_clusters_from_index_clusters @index_clusters
50
59
 
51
60
  return self
52
61
  end
@@ -58,19 +67,6 @@ module Ai4r
58
67
  distance_between_item_and_cluster(data_item, cluster)})
59
68
  end
60
69
 
61
- # This function calculates the distance between 2 different
62
- # instances. By default, it returns the euclidean distance to the
63
- # power of 2.
64
- # You can provide a more convinient distance implementation:
65
- #
66
- # 1- Overwriting this method
67
- #
68
- # 2- Providing a closure to the :distance_function parameter
69
- def distance(a, b)
70
- return @distance_function.call(a, b) if @distance_function
71
- return euclidean_distance(a, b)
72
- end
73
-
74
70
  protected
75
71
 
76
72
  # returns [ [0], [1], [2], ... , [n-1] ]
@@ -95,7 +91,7 @@ module Ai4r
95
91
  data_set.data_items.each_with_index do |a, i|
96
92
  i.times do |j|
97
93
  b = data_set.data_items[j]
98
- @distance_matrix[i-1][j] = distance(a, b)
94
+ @distance_matrix[i-1][j] = @distance_function.call(a, b)
99
95
  end
100
96
  end
101
97
  end
@@ -108,14 +104,46 @@ module Ai4r
108
104
  return @distance_matrix[index_a-1][index_b]
109
105
  end
110
106
 
111
- # clusters_to_merge = [index_cluster_a, index_cluster_b].
107
+ # ci and cj are the indexes of the clusters that are going to
108
+ # be merged. We need to remove distances from/to ci and ci,
109
+ # and add distances from/to new cluster (ci U cj)
110
+ def update_distance_matrix(ci, cj)
111
+ ci, cj = cj, ci if cj > ci
112
+ distances_to_new_cluster = Array.new
113
+ (@distance_matrix.length+1).times do |cx|
114
+ if cx!= ci && cx!=cj
115
+ distances_to_new_cluster << linkage_distance(cx, ci, cj)
116
+ end
117
+ end
118
+ if cj==0 && ci==1
119
+ @distance_matrix.delete_at(1)
120
+ @distance_matrix.delete_at(0)
121
+ elsif cj==0
122
+ @distance_matrix.delete_at(ci-1)
123
+ @distance_matrix.delete_at(0)
124
+ else
125
+ @distance_matrix.delete_at(ci-1)
126
+ @distance_matrix.delete_at(cj-1)
127
+ end
128
+ @distance_matrix.each do |d|
129
+ d.delete_at(ci)
130
+ d.delete_at(cj)
131
+ end
132
+ @distance_matrix << distances_to_new_cluster
133
+ end
134
+
135
+ # return distance between cluster cx and new cluster (ci U cj),
136
+ # using single linkage
137
+ def linkage_distance(cx, ci, cj)
138
+ [read_distance_matrix(cx, ci),
139
+ read_distance_matrix(cx, cj)].min
140
+ end
141
+
112
142
  # cluster_a and cluster_b are removed from index_cluster,
113
143
  # and a new cluster with all members of cluster_a and cluster_b
114
144
  # is added.
115
- # It returns the new clusters array.
116
- def merge_clusters(clusters_to_merge, index_clusters)
117
- index_a = clusters_to_merge.first
118
- index_b = clusters_to_merge.last
145
+ # It modifies index clusters array.
146
+ def merge_clusters(index_a, index_b, index_clusters)
119
147
  index_a, index_b = index_b, index_a if index_b > index_a
120
148
  new_index_cluster = index_clusters[index_a] +
121
149
  index_clusters[index_b]
@@ -140,10 +168,9 @@ module Ai4r
140
168
  def get_closest_clusters(index_clusters)
141
169
  min_distance = 1.0/0
142
170
  closest_clusters = [1, 0]
143
- index_clusters.each_with_index do |cluster_a, index_a|
171
+ index_clusters.each_index do |index_a|
144
172
  index_a.times do |index_b|
145
- cluster_b = index_clusters[index_b]
146
- cluster_distance = calc_index_clusters_distance(cluster_a, cluster_b)
173
+ cluster_distance = read_distance_matrix(index_a, index_b)
147
174
  if cluster_distance < min_distance
148
175
  closest_clusters = [index_a, index_b]
149
176
  min_distance = cluster_distance
@@ -153,22 +180,10 @@ module Ai4r
153
180
  return closest_clusters
154
181
  end
155
182
 
156
- # Calculate cluster distance using the single linkage method
157
- def calc_index_clusters_distance(cluster_a, cluster_b)
158
- min_dist = 1.0/0
159
- cluster_a.each do |index_a|
160
- cluster_b.each do |index_b|
161
- dist = read_distance_matrix(index_a, index_b)
162
- min_dist = dist if dist < min_dist
163
- end
164
- end
165
- return min_dist
166
- end
167
-
168
183
  def distance_between_item_and_cluster(data_item, cluster)
169
184
  min_dist = 1.0/0
170
185
  cluster.data_items.each do |another_item|
171
- dist = distance(data_item, another_item)
186
+ dist = @distance_function.call(data_item, another_item)
172
187
  min_dist = dist if dist < min_dist
173
188
  end
174
189
  return min_dist
@@ -0,0 +1,64 @@
1
+ # Author:: Sergio Fierens (implementation)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/data_set'
11
+ require File.dirname(__FILE__) + '/../clusterers/single_linkage'
12
+
13
+ module Ai4r
14
+ module Clusterers
15
+
16
+ # Implementation of an Agglomerative Hierarchical clusterer with
17
+ # Ward's method linkage algorithm, aka the minimum variance method (Everitt
18
+ # et al., 2001 ; Jain and Dubes, 1988 ; Ward, 1963 ).
19
+ # Hierarchical clusteres create one cluster per element, and then
20
+ # progressively merge clusters, until the required number of clusters
21
+ # is reached.
22
+ # The objective of this method is to minime the variance.
23
+ #
24
+ # D(cx, (ci U cj)) = (ni/(ni+nj+nx))*D(cx, ci) +
25
+ # (nj/(ni+nj+nx))*D(cx, cj) -
26
+ # (nx/(ni+nj)^2)*D(ci, cj)
27
+ class WardLinkage < SingleLinkage
28
+
29
+ parameters_info :distance_function =>
30
+ "Custom implementation of distance function. " +
31
+ "It must be a closure receiving two data items and return the " +
32
+ "distance bewteen them. By default, this algorithm uses " +
33
+ "ecuclidean distance of numeric attributes to the power of 2."
34
+
35
+ # Build a new clusterer, using data examples found in data_set.
36
+ # Items will be clustered in "number_of_clusters" different
37
+ # clusters.
38
+ def build(data_set, number_of_clusters)
39
+ super
40
+ end
41
+
42
+ # This algorithms does not allow classification of new data items
43
+ # once it has been built. Rebuild the cluster including you data element.
44
+ def eval(data_item)
45
+ Raise "Eval of new data is not supported by this algorithm."
46
+ end
47
+
48
+ protected
49
+
50
+ # return distance between cluster cx and cluster (ci U cj),
51
+ # using ward's method linkage
52
+ def linkage_distance(cx, ci, cj)
53
+ ni = @index_clusters[ci].length
54
+ nj = @index_clusters[cj].length
55
+ nx = @index_clusters[cx].length
56
+ ( ( ( 1.0* (ni+nx) * read_distance_matrix(cx, ci) ) +
57
+ ( 1.0* (nj+nx) * read_distance_matrix(cx, cj) ) ) / (ni + nj + nx) -
58
+ ( 1.0 * nx * read_distance_matrix(ci, cj) / (ni+nj)**2 ) )
59
+ end
60
+
61
+ end
62
+ end
63
+ end
64
+
@@ -0,0 +1,61 @@
1
+ # Author:: Sergio Fierens (implementation)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/data_set'
11
+ require File.dirname(__FILE__) + '/../clusterers/single_linkage'
12
+
13
+ module Ai4r
14
+ module Clusterers
15
+
16
+ # Implementation of an Agglomerative Hierarchical clusterer with
17
+ # weighted average linkage algorithm, aka weighted pair group method
18
+ # average or WPGMA (Jain and Dubes, 1988 ; McQuitty, 1966 )
19
+ # Hierarchical clusteres create one cluster per element, and then
20
+ # progressively merge clusters, until the required number of clusters
21
+ # is reached.
22
+ # Similar to AverageLinkage, but the distances between clusters are
23
+ # weighted based on the number of data items in each of them.
24
+ #
25
+ # D(cx, (ci U cj)) = ( ni * D(cx, ci) + nj * D(cx, cj)) / (ni + nj)
26
+ class WeightedAverageLinkage < SingleLinkage
27
+
28
+ parameters_info :distance_function =>
29
+ "Custom implementation of distance function. " +
30
+ "It must be a closure receiving two data items and return the " +
31
+ "distance bewteen them. By default, this algorithm uses " +
32
+ "ecuclidean distance of numeric attributes to the power of 2."
33
+
34
+ # Build a new clusterer, using data examples found in data_set.
35
+ # Items will be clustered in "number_of_clusters" different
36
+ # clusters.
37
+ def build(data_set, number_of_clusters)
38
+ super
39
+ end
40
+
41
+ # This algorithms does not allow classification of new data items
42
+ # once it has been built. Rebuild the cluster including you data element.
43
+ def eval(data_item)
44
+ Raise "Eval of new data is not supported by this algorithm."
45
+ end
46
+
47
+ protected
48
+
49
+ # return distance between cluster cx and cluster (ci U cj),
50
+ # using weighted average linkage
51
+ def linkage_distance(cx, ci, cj)
52
+ ni = @index_clusters[ci].length
53
+ nj = @index_clusters[cj].length
54
+ (1.0 * ni * read_distance_matrix(cx, ci)+
55
+ nj * read_distance_matrix(cx, cj))/(ni+nj)
56
+ end
57
+
58
+ end
59
+ end
60
+ end
61
+
@@ -0,0 +1,18 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ module Ai4r
11
+ module Data
12
+
13
+ POSITIVE_INFINTY = 1.0/0
14
+
15
+ NEGATIVE_INFINTY = -1.0/0
16
+
17
+ end
18
+ end
@@ -41,7 +41,9 @@ module Ai4r
41
41
  # Retrieve a new DataSet, with the item(s) selected by the provided
42
42
  # index. You can specify an index range, too.
43
43
  def [](index)
44
- return DataSet.new(:data_items=>@data_items[index],
44
+ selected_items = (index.is_a?(Fixnum)) ?
45
+ [@data_items[index]] : @data_items[index]
46
+ return DataSet.new(:data_items => selected_items,
45
47
  :data_labels =>@data_labels)
46
48
  end
47
49
 
@@ -173,7 +175,7 @@ module Ai4r
173
175
 
174
176
  # Add a data item to the data set
175
177
  def << data_item
176
- if data_item.nil? || !data_item.is_a?(Array) || data_item.empty?
178
+ if data_item.nil? || !data_item.is_a?(Enumerable) || data_item.empty?
177
179
  raise ArgumentError,"Data must not be an non empty array."
178
180
  elsif @data_items.empty?
179
181
  set_data_items([data_item])
@@ -205,7 +207,7 @@ module Ai4r
205
207
  def check_data_items(data_items)
206
208
  if !data_items || data_items.empty?
207
209
  raise ArgumentError,"Examples data set must not be empty."
208
- elsif !data_items.first.is_a?(Array)
210
+ elsif !data_items.first.is_a?(Enumerable)
209
211
  raise ArgumentError,"Unkown format for example data."
210
212
  end
211
213
  attributes_num = data_items.first.length
@@ -74,6 +74,24 @@ module Ai4r
74
74
  return count
75
75
  end
76
76
 
77
+ # The "Simple matching" distance between two attribute sets is given
78
+ # by the number of values present on both vectors.
79
+ # If sets a and b have lengths da and db then:
80
+ #
81
+ # S = 2/(da + db) * Number of values present on both sets
82
+ # D = 1.0/S - 1
83
+ #
84
+ # Some considerations:
85
+ # * a and b must not include repeated items
86
+ # * all attributes are treated equally
87
+ # * all attributes are treated equally
88
+ def self.simple_matching_distance(a,b)
89
+ similarity = 0.0
90
+ a.each {|item| similarity += 2 if b.include?(item)}
91
+ similarity /= (a.length + b.length)
92
+ return 1.0/similarity - 1
93
+ end
94
+
77
95
  end
78
96
 
79
97
  end
@@ -1,9 +1,18 @@
1
+ # Author:: Sergio Fierens (implementation)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
1
10
  require 'test/unit'
2
11
  require File.dirname(__FILE__) + '/../../lib/ai4r/clusterers/average_linkage'
3
12
 
4
13
  class Ai4r::Clusterers::AverageLinkage < Ai4r::Clusterers::SingleLinkage
5
14
  attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix
6
- public :calc_index_clusters_distance
15
+ public :linkage_distance
7
16
  public :distance_between_item_and_cluster
8
17
  end
9
18
 
@@ -28,18 +37,12 @@ class AverageLinkageTest < Test::Unit::TestCase
28
37
  [49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
29
38
  [2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
30
39
 
31
- def test_calc_index_clusters_distance
32
- clusterer = AverageLinkage.new
40
+ def test_linkage_distance
41
+ clusterer = Ai4r::Clusterers::AverageLinkage.new
33
42
  clusterer.distance_matrix = @@expected_distance_matrix
34
- assert_equal 98.0, clusterer.calc_index_clusters_distance([0], [1])
35
- assert_equal 43.0, clusterer.calc_index_clusters_distance([0, 1], [3, 4])
43
+ assert_equal 93.5, clusterer.linkage_distance(0,1,2)
44
+ assert_equal 37.5, clusterer.linkage_distance(4,2,5)
36
45
  end
37
46
 
38
- def test_distance_between_item_and_cluster
39
- clusterer = AverageLinkage.new
40
- assert_equal 20.0, clusterer.distance_between_item_and_cluster([1,2],
41
- DataSet.new(:data_items => [[3,4],[5,6]]))
42
- end
43
-
44
47
  end
45
48