ai4ruby 1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. data/README.rdoc +47 -0
  2. data/examples/classifiers/id3_data.csv +121 -0
  3. data/examples/classifiers/id3_example.rb +29 -0
  4. data/examples/classifiers/naive_bayes_data.csv +11 -0
  5. data/examples/classifiers/naive_bayes_example.rb +16 -0
  6. data/examples/classifiers/results.txt +31 -0
  7. data/examples/genetic_algorithm/genetic_algorithm_example.rb +37 -0
  8. data/examples/genetic_algorithm/travel_cost.csv +16 -0
  9. data/examples/neural_network/backpropagation_example.rb +67 -0
  10. data/examples/neural_network/patterns_with_base_noise.rb +68 -0
  11. data/examples/neural_network/patterns_with_noise.rb +66 -0
  12. data/examples/neural_network/training_patterns.rb +68 -0
  13. data/examples/neural_network/xor_example.rb +35 -0
  14. data/examples/som/som_data.rb +156 -0
  15. data/examples/som/som_multi_node_example.rb +22 -0
  16. data/examples/som/som_single_example.rb +24 -0
  17. data/lib/ai4r.rb +33 -0
  18. data/lib/ai4r/classifiers/classifier.rb +62 -0
  19. data/lib/ai4r/classifiers/hyperpipes.rb +118 -0
  20. data/lib/ai4r/classifiers/ib1.rb +121 -0
  21. data/lib/ai4r/classifiers/id3.rb +326 -0
  22. data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
  23. data/lib/ai4r/classifiers/naive_bayes.rb +259 -0
  24. data/lib/ai4r/classifiers/one_r.rb +110 -0
  25. data/lib/ai4r/classifiers/prism.rb +197 -0
  26. data/lib/ai4r/classifiers/zero_r.rb +73 -0
  27. data/lib/ai4r/clusterers/average_linkage.rb +59 -0
  28. data/lib/ai4r/clusterers/bisecting_k_means.rb +93 -0
  29. data/lib/ai4r/clusterers/centroid_linkage.rb +66 -0
  30. data/lib/ai4r/clusterers/clusterer.rb +61 -0
  31. data/lib/ai4r/clusterers/complete_linkage.rb +67 -0
  32. data/lib/ai4r/clusterers/diana.rb +139 -0
  33. data/lib/ai4r/clusterers/k_means.rb +126 -0
  34. data/lib/ai4r/clusterers/median_linkage.rb +61 -0
  35. data/lib/ai4r/clusterers/single_linkage.rb +194 -0
  36. data/lib/ai4r/clusterers/ward_linkage.rb +64 -0
  37. data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +31 -0
  38. data/lib/ai4r/clusterers/weighted_average_linkage.rb +61 -0
  39. data/lib/ai4r/data/data_set.rb +266 -0
  40. data/lib/ai4r/data/parameterizable.rb +64 -0
  41. data/lib/ai4r/data/proximity.rb +100 -0
  42. data/lib/ai4r/data/statistics.rb +77 -0
  43. data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
  44. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +270 -0
  45. data/lib/ai4r/neural_network/backpropagation.rb +326 -0
  46. data/lib/ai4r/neural_network/hopfield.rb +149 -0
  47. data/lib/ai4r/som/layer.rb +68 -0
  48. data/lib/ai4r/som/node.rb +96 -0
  49. data/lib/ai4r/som/som.rb +155 -0
  50. data/lib/ai4r/som/two_phase_layer.rb +90 -0
  51. data/test/classifiers/hyperpipes_test.rb +84 -0
  52. data/test/classifiers/ib1_test.rb +78 -0
  53. data/test/classifiers/id3_test.rb +208 -0
  54. data/test/classifiers/multilayer_perceptron_test.rb +79 -0
  55. data/test/classifiers/naive_bayes_test.rb +43 -0
  56. data/test/classifiers/one_r_test.rb +62 -0
  57. data/test/classifiers/prism_test.rb +85 -0
  58. data/test/classifiers/zero_r_test.rb +49 -0
  59. data/test/clusterers/average_linkage_test.rb +51 -0
  60. data/test/clusterers/bisecting_k_means_test.rb +66 -0
  61. data/test/clusterers/centroid_linkage_test.rb +53 -0
  62. data/test/clusterers/complete_linkage_test.rb +57 -0
  63. data/test/clusterers/diana_test.rb +69 -0
  64. data/test/clusterers/k_means_test.rb +100 -0
  65. data/test/clusterers/median_linkage_test.rb +53 -0
  66. data/test/clusterers/single_linkage_test.rb +122 -0
  67. data/test/clusterers/ward_linkage_hierarchical_test.rb +61 -0
  68. data/test/clusterers/ward_linkage_test.rb +53 -0
  69. data/test/clusterers/weighted_average_linkage_test.rb +53 -0
  70. data/test/data/data_set_test.rb +96 -0
  71. data/test/data/proximity_test.rb +81 -0
  72. data/test/data/statistics_test.rb +65 -0
  73. data/test/experiment/classifier_evaluator_test.rb +76 -0
  74. data/test/genetic_algorithm/chromosome_test.rb +58 -0
  75. data/test/genetic_algorithm/genetic_algorithm_test.rb +81 -0
  76. data/test/neural_network/backpropagation_test.rb +82 -0
  77. data/test/neural_network/hopfield_test.rb +72 -0
  78. data/test/som/som_test.rb +97 -0
  79. metadata +168 -0
@@ -0,0 +1,197 @@
1
+ # Author:: Sergio Fierens (Implementation only, Cendrowska is
2
+ # the creator of the algorithm)
3
+ # License:: MPL 1.1
4
+ # Project:: ai4r
5
+ # Url:: http://ai4r.rubyforge.org/
6
+ #
7
+ # You can redistribute it and/or modify it under the terms of
8
+ # the Mozilla Public License version 1.1 as published by the
9
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
10
+ #
11
+ # J. Cendrowska (1987). PRISM: An algorithm for inducing modular rules.
12
+ # International Journal of Man-Machine Studies. 27(4):349-370.
13
+
14
+ require File.dirname(__FILE__) + '/../data/data_set'
15
+ require File.dirname(__FILE__) + '/../classifiers/classifier'
16
+
17
+ module Ai4r
18
+ module Classifiers
19
+
20
+ # = Introduction
21
+ # This is an implementation of the PRISM algorithm (Cendrowska, 1987)
22
+ # Given a set of preclassified examples, it builds a set of rules
23
+ # to predict the class of other instaces.
24
+ #
25
+ # J. Cendrowska (1987). PRISM: An algorithm for inducing modular rules.
26
+ # International Journal of Man-Machine Studies. 27(4):349-370.
27
+ class Prism < Classifier
28
+
29
+ attr_reader :data_set, :rules
30
+
31
+ # Build a new Prism classifier. You must provide a DataSet instance
32
+ # as parameter. The last attribute of each item is considered as
33
+ # the item class.
34
+ def build(data_set)
35
+ data_set.check_not_empty
36
+ @data_set = data_set
37
+ domains = @data_set.build_domains
38
+ instances = @data_set.data_items.collect {|data| data }
39
+ @rules = []
40
+ domains.last.each do |class_value|
41
+ while(has_class_value(instances, class_value))
42
+ rule = build_rule(class_value, instances)
43
+ @rules << rule
44
+ instances = instances.select {|data| !matches_conditions(data, rule[:conditions])}
45
+ end
46
+ end
47
+ return self
48
+ end
49
+
50
+ # You can evaluate new data, predicting its class.
51
+ # e.g.
52
+ # classifier.eval(['New York', '<30', 'F']) # => 'Y'
53
+ def eval(instace)
54
+ @rules.each do |rule|
55
+ return rule[:class_value] if matches_conditions(instace, rule[:conditions])
56
+ end
57
+ return nil
58
+ end
59
+
60
+ # This method returns the generated rules in ruby code.
61
+ # e.g.
62
+ #
63
+ # classifier.get_rules
64
+ # # => if age_range == '<30' then marketing_target = 'Y'
65
+ # elsif age_range == '>80' then marketing_target = 'Y'
66
+ # elsif city == 'Chicago' and age_range == '[30-50)' then marketing_target = 'Y'
67
+ # else marketing_target = 'N'
68
+ # end
69
+ #
70
+ # It is a nice way to inspect induction results, and also to execute them:
71
+ # age_range = '[30-50)'
72
+ # city = 'New York'
73
+ # eval(classifier.get_rules)
74
+ # puts marketing_target
75
+ # 'Y'
76
+ def get_rules
77
+ out = "if #{join_terms(@rules.first)} then #{then_clause(@rules.first)}"
78
+ @rules[1...-1].each do |rule|
79
+ out += "\nelsif #{join_terms(rule)} then #{then_clause(rule)}"
80
+ end
81
+ out += "\nelse #{then_clause(@rules.last)}" if @rules.size > 1
82
+ out += "\nend"
83
+ return out
84
+ end
85
+
86
+ protected
87
+
88
+ def get_attr_value(data, attr)
89
+ data[@data_set.get_index(attr)]
90
+ end
91
+
92
+ def has_class_value(instances, class_value)
93
+ instances.each { |data| return true if data.last == class_value}
94
+ return false
95
+ end
96
+
97
+ def is_perfect(instances, rule)
98
+ class_value = rule[:class_value]
99
+ instances.each do |data|
100
+ return false if data.last != class_value and matches_conditions(data, rule[:conditions])
101
+ end
102
+ return true
103
+ end
104
+
105
+ def matches_conditions(data, conditions)
106
+ conditions.each_pair do |attr_label, attr_value|
107
+ return false if get_attr_value(data, attr_label) != attr_value
108
+ end
109
+ return true
110
+ end
111
+
112
+ def build_rule(class_value, instances)
113
+ rule = {:class_value => class_value, :conditions => {}}
114
+ rule_instances = instances.collect {|data| data }
115
+ attributes = @data_set.data_labels[0...-1].collect {|label| label }
116
+ until(is_perfect(instances, rule) || attributes.empty?)
117
+ freq_table = build_freq_table(rule_instances, attributes, class_value)
118
+ condition = get_condition(freq_table)
119
+ rule[:conditions].merge!(condition)
120
+ rule_instances = rule_instances.select do |data|
121
+ matches_conditions(data, condition)
122
+ end
123
+ end
124
+ return rule
125
+ end
126
+
127
+ # Returns a structure with the folloring format:
128
+ # => {attr1_label => { :attr1_value1 => [p, t], attr1_value2 => [p, t], ... },
129
+ # attr2_label => { :attr2_value1 => [p, t], attr2_value2 => [p, t], ... },
130
+ # ...
131
+ # }
132
+ # where p is the number of instances classified as class_value
133
+ # with that attribute value, and t is the total number of instances with
134
+ # that attribute value
135
+ def build_freq_table(rule_instances, attributes, class_value)
136
+ freq_table = Hash.new()
137
+ rule_instances.each do |data|
138
+ attributes.each do |attr_label|
139
+ attr_freqs = freq_table[attr_label] || Hash.new([0, 0])
140
+ pt = attr_freqs[get_attr_value(data, attr_label)]
141
+ pt = [(data.last == class_value) ? pt[0]+1 : pt[0], pt[1]+1]
142
+ attr_freqs[get_attr_value(data, attr_label)] = pt
143
+ freq_table[attr_label] = attr_freqs
144
+ end
145
+ end
146
+ return freq_table
147
+ end
148
+
149
+ # returns a single conditional term: {attrN_label => attrN_valueM}
150
+ # selecting the attribute with higher pt ratio
151
+ # (occurrences of attribute value classified as class_value /
152
+ # occurrences of attribute value)
153
+ def get_condition(freq_table)
154
+ best_pt = [0, 0]
155
+ condition = nil
156
+ freq_table.each do |attr_label, attr_freqs|
157
+ attr_freqs.each do |attr_value, pt|
158
+ if(better_pt(pt, best_pt))
159
+ condition = { attr_label => attr_value }
160
+ best_pt = pt
161
+ end
162
+ end
163
+ end
164
+ return condition
165
+ end
166
+
167
+ # pt = [p, t]
168
+ # p = occurrences of attribute value with instance classified as class_value
169
+ # t = occurrences of attribute value
170
+ # a pt is better if:
171
+ # 1- its ratio is higher
172
+ # 2- its ratio is equal, and has a higher p
173
+ def better_pt(pt, best_pt)
174
+ return false if pt[1] == 0
175
+ return true if best_pt[1] == 0
176
+ a = pt[0]*best_pt[1]
177
+ b = best_pt[0]*pt[1]
178
+ return true if a>b || (a==b && pt[0]>best_pt[0])
179
+ return false
180
+ end
181
+
182
+ def join_terms(rule)
183
+ terms = []
184
+ rule[:conditions].each do |attr_label, attr_value|
185
+ terms << "#{attr_label} == '#{attr_value}'"
186
+ end
187
+ "#{terms.join(" and ")}"
188
+ end
189
+
190
+ def then_clause(rule)
191
+ "#{@data_set.data_labels.last} = '#{rule[:class_value]}'"
192
+ end
193
+
194
+ end
195
+ end
196
+ end
197
+
@@ -0,0 +1,73 @@
1
+ # Author:: Sergio Fierens (Implementation only)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/data_set.rb'
11
+ require File.dirname(__FILE__) + '/../classifiers/classifier'
12
+
13
+ module Ai4r
14
+ module Classifiers
15
+
16
+ # = Introduction
17
+ #
18
+ # The idea behind the ZeroR classifier is to identify the
19
+ # the most common class value in the training set.
20
+ # It always returns that value when evaluating an instance.
21
+ # It is frequently used as a baseline for evaluating other machine learning
22
+ # algorithms.
23
+ class ZeroR < Classifier
24
+
25
+ attr_reader :data_set, :class_value
26
+
27
+ # Build a new ZeroR classifier. You must provide a DataSet instance
28
+ # as parameter. The last attribute of each item is considered as
29
+ # the item class.
30
+ def build(data_set)
31
+ data_set.check_not_empty
32
+ @data_set = data_set
33
+ frequence = {}
34
+ max_freq = 0
35
+ @class_value = nil
36
+ @data_set.data_items.each do |example|
37
+ class_value = example.last
38
+ class_frequency = frequence[class_value]
39
+ class_frequency = (class_frequency) ? class_frequency+1 : 1
40
+ if max_freq < class_frequency
41
+ max_freq = class_frequency
42
+ @class_value = class_value
43
+ end
44
+ end
45
+ return self
46
+ end
47
+
48
+ # You can evaluate new data, predicting its class.
49
+ # e.g.
50
+ # classifier.eval(['New York', '<30', 'F']) # => 'Y'
51
+ def eval(data)
52
+ @class_value
53
+ end
54
+
55
+ # This method returns the generated rules in ruby code.
56
+ # e.g.
57
+ #
58
+ # classifier.get_rules
59
+ # # => marketing_target='Y'
60
+ #
61
+ # It is a nice way to inspect induction results, and also to execute them:
62
+ # marketing_target = nil
63
+ # eval classifier.get_rules
64
+ # puts marketing_target
65
+ # # => 'Y'
66
+ def get_rules
67
+ return "#{@data_set.data_labels.last} = '#{@class_value}'"
68
+ end
69
+
70
+ end
71
+
72
+ end
73
+ end
@@ -0,0 +1,59 @@
1
+ # Author:: Sergio Fierens (implementation)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/data_set'
11
+ require File.dirname(__FILE__) + '/../clusterers/single_linkage'
12
+
13
+ module Ai4r
14
+ module Clusterers
15
+
16
+ # Implementation of a Hierarchical clusterer with group average
17
+ # linkage, AKA unweighted pair group method average or UPGMA (Everitt
18
+ # et al., 2001 ; Jain and Dubes, 1988 ; Sokal and Michener, 1958).
19
+ # Hierarchical clusteres create one cluster per element, and then
20
+ # progressively merge clusters, until the required number of clusters
21
+ # is reached.
22
+ # With average linkage, the distance between a clusters cx and
23
+ # cluster (ci U cj) the the average distance between cx and ci, and
24
+ # cx and cj.
25
+ #
26
+ # D(cx, (ci U cj) = (D(cx, ci) + D(cx, cj)) / 2
27
+ class AverageLinkage < SingleLinkage
28
+
29
+ parameters_info :distance_function =>
30
+ "Custom implementation of distance function. " +
31
+ "It must be a closure receiving two data items and return the " +
32
+ "distance bewteen them. By default, this algorithm uses " +
33
+ "ecuclidean distance of numeric attributes to the power of 2."
34
+
35
+ # Build a new clusterer, using data examples found in data_set.
36
+ # Items will be clustered in "number_of_clusters" different
37
+ # clusters.
38
+ def build(data_set, number_of_clusters)
39
+ super
40
+ end
41
+
42
+ # This algorithms does not allow classification of new data items
43
+ # once it has been built. Rebuild the cluster including you data element.
44
+ def eval(data_item)
45
+ Raise "Eval of new data is not supported by this algorithm."
46
+ end
47
+
48
+ protected
49
+
50
+ # return distance between cluster cx and cluster (ci U cj),
51
+ # using average linkage
52
+ def linkage_distance(cx, ci, cj)
53
+ (read_distance_matrix(cx, ci)+
54
+ read_distance_matrix(cx, cj))/2
55
+ end
56
+
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,93 @@
1
+ # Author:: Sergio Fierens (implementation)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/data_set'
11
+ require File.dirname(__FILE__) + '/../clusterers/k_means'
12
+
13
+ module Ai4r
14
+ module Clusterers
15
+
16
+ # The Bisecting k-means algorithm is a variation of the "k-means" algorithm,
17
+ # somewhat less sensible to the initial election of centroids than the
18
+ # original.
19
+ #
20
+ # More about K Means algorithm:
21
+ # http://en.wikipedia.org/wiki/K-means_algorithm
22
+ class BisectingKMeans < KMeans
23
+
24
+ attr_reader :data_set, :number_of_clusters, :clusters, :centroids
25
+ attr_accessor :max_iterations, :distance_function, :refine
26
+
27
+ parameters_info :max_iterations => "Maximum number of iterations to " +
28
+ "build the clusterer. By default it is uncapped.",
29
+ :distance_function => "Custom implementation of distance function. " +
30
+ "It must be a closure receiving two data items and return the " +
31
+ "distance bewteen them. By default, this algorithm uses " +
32
+ "ecuclidean distance of numeric attributes to the power of 2.",
33
+ :centroid_function => "Custom implementation to calculate the " +
34
+ "centroid of a cluster. It must be a closure receiving an array of " +
35
+ "data sets, and return an array of data items, representing the " +
36
+ "centroids of for each data set. " +
37
+ "By default, this algorithm returns a data items using the mode "+
38
+ "or mean of each attribute on each data set.",
39
+ :refine => "Boolean value. True by default. It will run the " +
40
+ "classic K Means algorithm, using as initial centroids the " +
41
+ "result of the bisecting approach."
42
+
43
+
44
+ def intialize
45
+ @refine = true
46
+ end
47
+
48
+ # Build a new clusterer, using data examples found in data_set.
49
+ # Items will be clustered in "number_of_clusters" different
50
+ # clusters.
51
+ def build(data_set, number_of_clusters)
52
+ @data_set = data_set
53
+ @number_of_clusters = number_of_clusters
54
+
55
+ @clusters = [@data_set]
56
+ @centroids = [@data_set.get_mean_or_mode]
57
+ while @clusters.length < @number_of_clusters
58
+ biggest_cluster_index = find_biggest_cluster_index(@clusters)
59
+ clusterer = KMeans.new.
60
+ set_parameters(get_parameters).
61
+ build(@clusters[biggest_cluster_index], 2)
62
+ @clusters.delete_at(biggest_cluster_index)
63
+ @centroids.delete_at(biggest_cluster_index)
64
+ @clusters.concat(clusterer.clusters)
65
+ @centroids.concat(clusterer.centroids)
66
+ end
67
+
68
+ super if @refine
69
+
70
+ return self
71
+ end
72
+
73
+ protected
74
+ def calc_initial_centroids
75
+ @centroids # Use existing centroids
76
+ end
77
+
78
+ def find_biggest_cluster_index(clusters)
79
+ max_index = 0
80
+ max_length = 0
81
+ clusters.each_index do |cluster_index|
82
+ cluster = clusters[cluster_index]
83
+ if max_length < cluster.data_items.length
84
+ max_length = cluster.data_items.length
85
+ max_index = cluster_index
86
+ end
87
+ end
88
+ return max_index
89
+ end
90
+
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,66 @@
1
+ # Author:: Sergio Fierens (implementation)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/data_set'
11
+ require File.dirname(__FILE__) + '/../clusterers/single_linkage'
12
+
13
+ module Ai4r
14
+ module Clusterers
15
+
16
+ # Implementation of an Agglomerative Hierarchical clusterer with
17
+ # centroid linkage algorithm, aka unweighted pair group method
18
+ # centroid (UPGMC) (Everitt et al., 2001 ; Jain and Dubes, 1988 ;
19
+ # Sokal and Michener, 1958 )
20
+ # Hierarchical clusteres create one cluster per element, and then
21
+ # progressively merge clusters, until the required number of clusters
22
+ # is reached.
23
+ # The distance between clusters is the squared euclidean distance
24
+ # between their centroids.
25
+ #
26
+ # D(cx, (ci U cj)) = | mx - mij |^2
27
+ # D(cx, (ci U cj)) = (ni/(ni+nj))*D(cx, ci) +
28
+ # (nj/(ni+nj))*D(cx, cj) -
29
+ # (ni*nj/(ni+nj)^2)*D(ci, cj)
30
+ class CentroidLinkage < SingleLinkage
31
+
32
+ parameters_info :distance_function =>
33
+ "Custom implementation of distance function. " +
34
+ "It must be a closure receiving two data items and return the " +
35
+ "distance bewteen them. By default, this algorithm uses " +
36
+ "ecuclidean distance of numeric attributes to the power of 2."
37
+
38
+ # Build a new clusterer, using data examples found in data_set.
39
+ # Items will be clustered in "number_of_clusters" different
40
+ # clusters.
41
+ def build(data_set, number_of_clusters)
42
+ super
43
+ end
44
+
45
+ # This algorithms does not allow classification of new data items
46
+ # once it has been built. Rebuild the cluster including you data element.
47
+ def eval(data_item)
48
+ Raise "Eval of new data is not supported by this algorithm."
49
+ end
50
+
51
+ protected
52
+
53
+ # return distance between cluster cx and cluster (ci U cj),
54
+ # using centroid linkage
55
+ def linkage_distance(cx, ci, cj)
56
+ ni = @index_clusters[ci].length
57
+ nj = @index_clusters[cj].length
58
+ ( ni * read_distance_matrix(cx, ci) +
59
+ nj * read_distance_matrix(cx, cj) -
60
+ 1.0 * ni * nj * read_distance_matrix(ci, cj) / (ni+nj)) / (ni+nj)
61
+ end
62
+
63
+ end
64
+ end
65
+ end
66
+