agglomerative_clustering 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6368eb116e76afdf46d5dd16ffeaed04a3d590e8
4
- data.tar.gz: 39447a07d51c37d5743dc910bb71ff011bfcd6d2
3
+ metadata.gz: 03755a284a11c8225365f9b22df38da433e07994
4
+ data.tar.gz: 6996369a3b16c5541cd694a8315c2c40606b91e3
5
5
  SHA512:
6
- metadata.gz: 198911398132736c1cf06117e48e4f0026c94271586e8f172bf72733828f8bf1b218e03659b148663c56b2f8a3ca793e45b335527e90bc51577663221ddb4202
7
- data.tar.gz: 5b04529399622524c8d7b36cd8b387c7dae73c83700e0e63455091e6357b39745bb78df0d01e6df0f8e29f0d264f2df89b23a79d9b744f2df393ceb10062249a
6
+ metadata.gz: 1061fb7aae2bc9c6cee7ba054593331a6e9db4c5388d895f821aad46b6d05d86721f61c947ce5377095ff7dc3adf8f93494ce9a3697f44357521d5ecc97e8523
7
+ data.tar.gz: f481ee1c97df283e3a354ed447319fbf9041d5507ac677269288e9c7802eb743510e016a6a0fca9a5232eb0a98420a1c393e1a08c9a52a0dfac91fcc21d91fd7
data/cluster.rb CHANGED
@@ -56,3 +56,7 @@ clusters.each_with_index do |cluster, index|
56
56
  end
57
57
  end
58
58
  end
59
+
60
+ puts 'Silhouette Coefficient of First Cluster: '
61
+ sc = AgglomerativeClustering::SilhouetteCoefficient.new(clusters[0])
62
+ puts sc.measure(clusters)
@@ -1,3 +1,4 @@
1
+ require 'matrix'
1
2
  module AgglomerativeClustering
2
3
  class DistanceMatrix
3
4
 
@@ -6,9 +7,7 @@ module AgglomerativeClustering
6
7
  end
7
8
 
8
9
  def matrix
9
- Matrix.build(matrix_array.size, matrix_array.first.size) do |row, column|
10
- matrix_array[row][column]
11
- end
10
+ @matrix ||= build_matrix
12
11
  end
13
12
 
14
13
  def print_matrix
@@ -18,20 +17,38 @@ module AgglomerativeClustering
18
17
  def remove_edge index
19
18
  matrix_array.delete_at(index)
20
19
  matrix_array.each { |row| row.delete_at(index) }
21
- Matrix.rows(matrix_array)
22
20
  end
23
21
 
24
22
  def add_edge weights
25
23
  matrix_array.each_with_index { |row, index| row << weights[index] }
26
24
  matrix_array << weights
27
- Matrix.rows(matrix_array)
25
+ @matrix = build_matrix
26
+ end
27
+
28
+ def shortest_distance
29
+ min_dist = 1.0/0
30
+ indexes = []
31
+ matrix.each_with_index do |index, row, column|
32
+ distance = matrix[row, column]
33
+ if distance < min_dist && (row != column)
34
+ min_dist = distance
35
+ indexes = [row, column]
36
+ end
37
+ end
38
+ indexes
28
39
  end
29
-
40
+
30
41
  private
31
42
 
32
43
  def matrix_array
33
44
  @matrix_array ||= []
34
45
  end
35
46
 
47
+ def build_matrix
48
+ Matrix.build(matrix_array.size, matrix_array.first.size) do |row, column|
49
+ matrix_array[row][column]
50
+ end
51
+ end
52
+
36
53
  end
37
54
  end
@@ -1,4 +1,3 @@
1
- require 'matrix'
2
1
  module AgglomerativeClustering
3
2
  class Set
4
3
  include EuclideanDistance
@@ -26,11 +25,27 @@ module AgglomerativeClustering
26
25
 
27
26
  def cluster total_clusters
28
27
  while clusters.size > total_clusters
29
- merge_clusters(shortest_distance)
28
+ merge_clusters(distance_matrix.shortest_distance)
30
29
  end
31
30
  clusters
32
31
  end
33
32
 
33
+ def outliers
34
+ set_outliers.uniq
35
+ end
36
+
37
+ def find_outliers percentage_of_clusters, distance
38
+ distance_matrix.matrix.each_with_index do |index, row, column|
39
+ count_hash[row] ||= 0
40
+ count_hash[row] += 1 if distance_matrix.matrix[row, column] > distance
41
+ set_outliers << points[row] if count_hash[row]/(distance_matrix.matrix.row_count - 1) > percentage_of_clusters/100
42
+ end
43
+ points.reject! { |point| outliers.include?(point) }
44
+ outliers
45
+ end
46
+
47
+ private
48
+
34
49
  def merge_clusters indexes
35
50
  index1, index2 = indexes
36
51
  new_cluster = clusters[index1].merge(clusters[index2])
@@ -48,22 +63,6 @@ module AgglomerativeClustering
48
63
  distance_matrix
49
64
  end
50
65
 
51
- def outliers
52
- set_outliers.uniq
53
- end
54
-
55
- def find_outliers percentage_of_clusters, distance
56
- distance_matrix.matrix.each_with_index do |index, row, column|
57
- count_hash[row] ||= 0
58
- count_hash[row] += 1 if distance_matrix.matrix[row, column] > distance
59
- set_outliers << points[row] if count_hash[row]/(distance_matrix.matrix.row_count - 1) > percentage_of_clusters/100
60
- end
61
- points.reject! { |point| outliers.include?(point) }
62
- outliers
63
- end
64
-
65
- private
66
-
67
66
  def add_cluster new_cluster
68
67
  clusters << new_cluster
69
68
  update_distance_matrix(clusters.size - 1)
@@ -75,19 +74,6 @@ module AgglomerativeClustering
75
74
  distance_matrix.remove_edge(index)
76
75
  end
77
76
 
78
- def shortest_distance
79
- min_cluster_dist = 1.0/0
80
- indexes = []
81
- distance_matrix.matrix.each_with_index do |index, row, column|
82
- distance = distance_matrix.matrix[row, column]
83
- if distance < min_cluster_dist && distance != 0
84
- min_cluster_dist = distance
85
- indexes = [row, column]
86
- end
87
- end
88
- indexes
89
- end
90
-
91
77
  def set_outliers
92
78
  @set_outliers ||= []
93
79
  end
@@ -0,0 +1,38 @@
1
+ module AgglomerativeClustering
2
+ class SilhouetteCoefficient
3
+ include EuclideanDistance
4
+ attr_reader :main_cluster
5
+
6
+ def initialize main_cluster
7
+ @main_cluster = main_cluster
8
+ end
9
+
10
+ def measure clusters
11
+ silhouettes = []
12
+ average_distances = []
13
+ main_cluster.points.each do |point1|
14
+ a1 = calculate_a1(point1)
15
+ (clusters - [main_cluster]).each do |cluster|
16
+ distances = []
17
+ cluster.points.each do |point2|
18
+ distances << euclidean_distance(point1, point2)
19
+ end
20
+ average_distances << distances.inject(:+)/distances.size
21
+ end
22
+ b1 = average_distances.min
23
+ s1 = (b1 - a1)/[a1,b1].max
24
+ silhouettes << s1
25
+ end
26
+ silhouettes.inject(:+) / silhouettes.size
27
+ end
28
+
29
+ def calculate_a1 point
30
+ distances = []
31
+ main_cluster.points.each do |point1|
32
+ distances << euclidean_distance(point, point1)
33
+ end
34
+ distances.inject(:+)/(distances.size - 1)
35
+ end
36
+
37
+ end
38
+ end
@@ -1,3 +1,3 @@
1
1
  module AgglomerativeClustering
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
@@ -9,3 +9,4 @@ require "agglomerative_clustering/linkage/center"
9
9
  require "agglomerative_clustering/point"
10
10
  require "agglomerative_clustering/cluster"
11
11
  require "agglomerative_clustering/set"
12
+ require "agglomerative_clustering/silhouette_coefficient"
@@ -5,7 +5,7 @@ describe AgglomerativeClustering::DistanceMatrix do
5
5
  matrix = AgglomerativeClustering::DistanceMatrix.new(Matrix.empty)
6
6
  matrix.add_edge([1,2])
7
7
  matrix.add_edge([2,2,3])
8
- expect(matrix.remove_edge(0)).to eql(Matrix[[2,3]])
8
+ expect(matrix.remove_edge(0)).to eql([[2,3]])
9
9
  end
10
10
  end
11
11
 
@@ -15,4 +15,11 @@ describe AgglomerativeClustering::DistanceMatrix do
15
15
  expect(matrix.add_edge([4,5,6,7])).to eql(Matrix[[1,2,3,4],[4,5,6,7]])
16
16
  end
17
17
  end
18
+
19
+ context '#shortest_distance' do
20
+ it 'will return the indexes of the shortest distances' do
21
+ matrix = AgglomerativeClustering::DistanceMatrix.new(Matrix.rows([[0,2,3],[2,0,3]]))
22
+ expect(matrix.shortest_distance).to eql([0,1])
23
+ end
24
+ end
18
25
  end
@@ -35,12 +35,6 @@ describe AgglomerativeClustering::Set do
35
35
  end
36
36
  end
37
37
 
38
- context '#merge_clusters' do
39
- it 'will merge two clusters into one and update the distance matrix' do
40
- expect(@set.merge_clusters([0,1]).points).to eql([@point1, @point2])
41
- end
42
- end
43
-
44
38
  context '#find_outliers' do
45
39
  it 'will return a list of outliers' do
46
40
  outlier1 = FactoryGirl.build(:point, x:100, y:200, z:300)
@@ -0,0 +1,24 @@
1
+ describe AgglomerativeClustering::SilhouetteCoefficient do
2
+
3
+ context '#measure' do
4
+ it 'will return the average silhoutte coefficient of a cluster' do
5
+ p1 = FactoryGirl.build(:point, x:1, y:1, z:1)
6
+ p2 = FactoryGirl.build(:point, x:3, y:3, z:3)
7
+ p3 = FactoryGirl.build(:point, x:17, y:17, z:17)
8
+ p4 = FactoryGirl.build(:point, x:16, y:16, z:16)
9
+ p5 = FactoryGirl.build(:point, x:18, y:18, z:18)
10
+ p6 = FactoryGirl.build(:point, x:2, y:2, z:2)
11
+ cluster1 = AgglomerativeClustering::Cluster.new(p1)
12
+ cluster2 = AgglomerativeClustering::Cluster.new(p2)
13
+ cluster3 = AgglomerativeClustering::Cluster.new(p3)
14
+ cluster4 = AgglomerativeClustering::Cluster.new(p4)
15
+ cluster5 = AgglomerativeClustering::Cluster.new(p5)
16
+ cluster6 = AgglomerativeClustering::Cluster.new(p6)
17
+ cluster1.merge(cluster2).merge(cluster3)
18
+ cluster4.merge(cluster5).merge(cluster6)
19
+ sc = AgglomerativeClustering::SilhouetteCoefficient.new(cluster1)
20
+ clusters = [cluster1, cluster4]
21
+ expect(sc.measure(clusters).round(4)).to eql(-0.0893)
22
+ end
23
+ end
24
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: agglomerative_clustering
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bryan Mulvihill
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-30 00:00:00.000000000 Z
11
+ date: 2014-11-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -79,6 +79,7 @@ files:
79
79
  - lib/agglomerative_clustering/linkage/single.rb
80
80
  - lib/agglomerative_clustering/point.rb
81
81
  - lib/agglomerative_clustering/set.rb
82
+ - lib/agglomerative_clustering/silhouette_coefficient.rb
82
83
  - lib/agglomerative_clustering/version.rb
83
84
  - spec/factories/lib/agglomerative_clustering/cluster.rb
84
85
  - spec/factories/lib/agglomerative_clustering/point.rb
@@ -91,6 +92,7 @@ files:
91
92
  - spec/lib/agglomerative_clustering/linkage/complete_spec.rb
92
93
  - spec/lib/agglomerative_clustering/linkage/single_spec.rb
93
94
  - spec/lib/agglomerative_clustering/set_spec.rb
95
+ - spec/lib/agglomerative_clustering/silhouette_coefficient_spec.rb
94
96
  - spec/spec_helper.rb
95
97
  homepage: https://github.com/bmulvihill/agglomerative_clustering
96
98
  licenses:
@@ -128,4 +130,5 @@ test_files:
128
130
  - spec/lib/agglomerative_clustering/linkage/complete_spec.rb
129
131
  - spec/lib/agglomerative_clustering/linkage/single_spec.rb
130
132
  - spec/lib/agglomerative_clustering/set_spec.rb
133
+ - spec/lib/agglomerative_clustering/silhouette_coefficient_spec.rb
131
134
  - spec/spec_helper.rb