agglomerative_clustering 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6368eb116e76afdf46d5dd16ffeaed04a3d590e8
4
- data.tar.gz: 39447a07d51c37d5743dc910bb71ff011bfcd6d2
3
+ metadata.gz: 03755a284a11c8225365f9b22df38da433e07994
4
+ data.tar.gz: 6996369a3b16c5541cd694a8315c2c40606b91e3
5
5
  SHA512:
6
- metadata.gz: 198911398132736c1cf06117e48e4f0026c94271586e8f172bf72733828f8bf1b218e03659b148663c56b2f8a3ca793e45b335527e90bc51577663221ddb4202
7
- data.tar.gz: 5b04529399622524c8d7b36cd8b387c7dae73c83700e0e63455091e6357b39745bb78df0d01e6df0f8e29f0d264f2df89b23a79d9b744f2df393ceb10062249a
6
+ metadata.gz: 1061fb7aae2bc9c6cee7ba054593331a6e9db4c5388d895f821aad46b6d05d86721f61c947ce5377095ff7dc3adf8f93494ce9a3697f44357521d5ecc97e8523
7
+ data.tar.gz: f481ee1c97df283e3a354ed447319fbf9041d5507ac677269288e9c7802eb743510e016a6a0fca9a5232eb0a98420a1c393e1a08c9a52a0dfac91fcc21d91fd7
data/cluster.rb CHANGED
@@ -56,3 +56,7 @@ clusters.each_with_index do |cluster, index|
56
56
  end
57
57
  end
58
58
  end
59
+
60
+ puts 'Silhouette Coefficient of First Cluster: '
61
+ sc = AgglomerativeClustering::SilhouetteCoefficient.new(clusters[0])
62
+ puts sc.measure(clusters)
@@ -1,3 +1,4 @@
1
+ require 'matrix'
1
2
  module AgglomerativeClustering
2
3
  class DistanceMatrix
3
4
 
@@ -6,9 +7,7 @@ module AgglomerativeClustering
6
7
  end
7
8
 
8
9
  def matrix
9
- Matrix.build(matrix_array.size, matrix_array.first.size) do |row, column|
10
- matrix_array[row][column]
11
- end
10
+ @matrix ||= build_matrix
12
11
  end
13
12
 
14
13
  def print_matrix
@@ -18,20 +17,38 @@ module AgglomerativeClustering
18
17
  def remove_edge index
19
18
  matrix_array.delete_at(index)
20
19
  matrix_array.each { |row| row.delete_at(index) }
21
- Matrix.rows(matrix_array)
22
20
  end
23
21
 
24
22
  def add_edge weights
25
23
  matrix_array.each_with_index { |row, index| row << weights[index] }
26
24
  matrix_array << weights
27
- Matrix.rows(matrix_array)
25
+ @matrix = build_matrix
26
+ end
27
+
28
+ def shortest_distance
29
+ min_dist = 1.0/0
30
+ indexes = []
31
+ matrix.each_with_index do |index, row, column|
32
+ distance = matrix[row, column]
33
+ if distance < min_dist && (row != column)
34
+ min_dist = distance
35
+ indexes = [row, column]
36
+ end
37
+ end
38
+ indexes
28
39
  end
29
-
40
+
30
41
  private
31
42
 
32
43
  def matrix_array
33
44
  @matrix_array ||= []
34
45
  end
35
46
 
47
+ def build_matrix
48
+ Matrix.build(matrix_array.size, matrix_array.first.size) do |row, column|
49
+ matrix_array[row][column]
50
+ end
51
+ end
52
+
36
53
  end
37
54
  end
@@ -1,4 +1,3 @@
1
- require 'matrix'
2
1
  module AgglomerativeClustering
3
2
  class Set
4
3
  include EuclideanDistance
@@ -26,11 +25,27 @@ module AgglomerativeClustering
26
25
 
27
26
  def cluster total_clusters
28
27
  while clusters.size > total_clusters
29
- merge_clusters(shortest_distance)
28
+ merge_clusters(distance_matrix.shortest_distance)
30
29
  end
31
30
  clusters
32
31
  end
33
32
 
33
+ def outliers
34
+ set_outliers.uniq
35
+ end
36
+
37
+ def find_outliers percentage_of_clusters, distance
38
+ distance_matrix.matrix.each_with_index do |index, row, column|
39
+ count_hash[row] ||= 0
40
+ count_hash[row] += 1 if distance_matrix.matrix[row, column] > distance
41
+ set_outliers << points[row] if count_hash[row]/(distance_matrix.matrix.row_count - 1) > percentage_of_clusters/100
42
+ end
43
+ points.reject! { |point| outliers.include?(point) }
44
+ outliers
45
+ end
46
+
47
+ private
48
+
34
49
  def merge_clusters indexes
35
50
  index1, index2 = indexes
36
51
  new_cluster = clusters[index1].merge(clusters[index2])
@@ -48,22 +63,6 @@ module AgglomerativeClustering
48
63
  distance_matrix
49
64
  end
50
65
 
51
- def outliers
52
- set_outliers.uniq
53
- end
54
-
55
- def find_outliers percentage_of_clusters, distance
56
- distance_matrix.matrix.each_with_index do |index, row, column|
57
- count_hash[row] ||= 0
58
- count_hash[row] += 1 if distance_matrix.matrix[row, column] > distance
59
- set_outliers << points[row] if count_hash[row]/(distance_matrix.matrix.row_count - 1) > percentage_of_clusters/100
60
- end
61
- points.reject! { |point| outliers.include?(point) }
62
- outliers
63
- end
64
-
65
- private
66
-
67
66
  def add_cluster new_cluster
68
67
  clusters << new_cluster
69
68
  update_distance_matrix(clusters.size - 1)
@@ -75,19 +74,6 @@ module AgglomerativeClustering
75
74
  distance_matrix.remove_edge(index)
76
75
  end
77
76
 
78
- def shortest_distance
79
- min_cluster_dist = 1.0/0
80
- indexes = []
81
- distance_matrix.matrix.each_with_index do |index, row, column|
82
- distance = distance_matrix.matrix[row, column]
83
- if distance < min_cluster_dist && distance != 0
84
- min_cluster_dist = distance
85
- indexes = [row, column]
86
- end
87
- end
88
- indexes
89
- end
90
-
91
77
  def set_outliers
92
78
  @set_outliers ||= []
93
79
  end
@@ -0,0 +1,38 @@
1
+ module AgglomerativeClustering
2
+ class SilhouetteCoefficient
3
+ include EuclideanDistance
4
+ attr_reader :main_cluster
5
+
6
+ def initialize main_cluster
7
+ @main_cluster = main_cluster
8
+ end
9
+
10
+ def measure clusters
11
+ silhouettes = []
12
+ average_distances = []
13
+ main_cluster.points.each do |point1|
14
+ a1 = calculate_a1(point1)
15
+ (clusters - [main_cluster]).each do |cluster|
16
+ distances = []
17
+ cluster.points.each do |point2|
18
+ distances << euclidean_distance(point1, point2)
19
+ end
20
+ average_distances << distances.inject(:+)/distances.size
21
+ end
22
+ b1 = average_distances.min
23
+ s1 = (b1 - a1)/[a1,b1].max
24
+ silhouettes << s1
25
+ end
26
+ silhouettes.inject(:+) / silhouettes.size
27
+ end
28
+
29
+ def calculate_a1 point
30
+ distances = []
31
+ main_cluster.points.each do |point1|
32
+ distances << euclidean_distance(point, point1)
33
+ end
34
+ distances.inject(:+)/(distances.size - 1)
35
+ end
36
+
37
+ end
38
+ end
@@ -1,3 +1,3 @@
1
1
  module AgglomerativeClustering
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
@@ -9,3 +9,4 @@ require "agglomerative_clustering/linkage/center"
9
9
  require "agglomerative_clustering/point"
10
10
  require "agglomerative_clustering/cluster"
11
11
  require "agglomerative_clustering/set"
12
+ require "agglomerative_clustering/silhouette_coefficient"
@@ -5,7 +5,7 @@ describe AgglomerativeClustering::DistanceMatrix do
5
5
  matrix = AgglomerativeClustering::DistanceMatrix.new(Matrix.empty)
6
6
  matrix.add_edge([1,2])
7
7
  matrix.add_edge([2,2,3])
8
- expect(matrix.remove_edge(0)).to eql(Matrix[[2,3]])
8
+ expect(matrix.remove_edge(0)).to eql([[2,3]])
9
9
  end
10
10
  end
11
11
 
@@ -15,4 +15,11 @@ describe AgglomerativeClustering::DistanceMatrix do
15
15
  expect(matrix.add_edge([4,5,6,7])).to eql(Matrix[[1,2,3,4],[4,5,6,7]])
16
16
  end
17
17
  end
18
+
19
+ context '#shortest_distance' do
20
+ it 'will return the indexes of the shortest distances' do
21
+ matrix = AgglomerativeClustering::DistanceMatrix.new(Matrix.rows([[0,2,3],[2,0,3]]))
22
+ expect(matrix.shortest_distance).to eql([0,1])
23
+ end
24
+ end
18
25
  end
@@ -35,12 +35,6 @@ describe AgglomerativeClustering::Set do
35
35
  end
36
36
  end
37
37
 
38
- context '#merge_clusters' do
39
- it 'will merge two clusters into one and update the distance matrix' do
40
- expect(@set.merge_clusters([0,1]).points).to eql([@point1, @point2])
41
- end
42
- end
43
-
44
38
  context '#find_outliers' do
45
39
  it 'will return a list of outliers' do
46
40
  outlier1 = FactoryGirl.build(:point, x:100, y:200, z:300)
@@ -0,0 +1,24 @@
1
+ describe AgglomerativeClustering::SilhouetteCoefficient do
2
+
3
+ context '#measure' do
4
+ it 'will return the average silhoutte coefficient of a cluster' do
5
+ p1 = FactoryGirl.build(:point, x:1, y:1, z:1)
6
+ p2 = FactoryGirl.build(:point, x:3, y:3, z:3)
7
+ p3 = FactoryGirl.build(:point, x:17, y:17, z:17)
8
+ p4 = FactoryGirl.build(:point, x:16, y:16, z:16)
9
+ p5 = FactoryGirl.build(:point, x:18, y:18, z:18)
10
+ p6 = FactoryGirl.build(:point, x:2, y:2, z:2)
11
+ cluster1 = AgglomerativeClustering::Cluster.new(p1)
12
+ cluster2 = AgglomerativeClustering::Cluster.new(p2)
13
+ cluster3 = AgglomerativeClustering::Cluster.new(p3)
14
+ cluster4 = AgglomerativeClustering::Cluster.new(p4)
15
+ cluster5 = AgglomerativeClustering::Cluster.new(p5)
16
+ cluster6 = AgglomerativeClustering::Cluster.new(p6)
17
+ cluster1.merge(cluster2).merge(cluster3)
18
+ cluster4.merge(cluster5).merge(cluster6)
19
+ sc = AgglomerativeClustering::SilhouetteCoefficient.new(cluster1)
20
+ clusters = [cluster1, cluster4]
21
+ expect(sc.measure(clusters).round(4)).to eql(-0.0893)
22
+ end
23
+ end
24
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: agglomerative_clustering
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bryan Mulvihill
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-30 00:00:00.000000000 Z
11
+ date: 2014-11-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -79,6 +79,7 @@ files:
79
79
  - lib/agglomerative_clustering/linkage/single.rb
80
80
  - lib/agglomerative_clustering/point.rb
81
81
  - lib/agglomerative_clustering/set.rb
82
+ - lib/agglomerative_clustering/silhouette_coefficient.rb
82
83
  - lib/agglomerative_clustering/version.rb
83
84
  - spec/factories/lib/agglomerative_clustering/cluster.rb
84
85
  - spec/factories/lib/agglomerative_clustering/point.rb
@@ -91,6 +92,7 @@ files:
91
92
  - spec/lib/agglomerative_clustering/linkage/complete_spec.rb
92
93
  - spec/lib/agglomerative_clustering/linkage/single_spec.rb
93
94
  - spec/lib/agglomerative_clustering/set_spec.rb
95
+ - spec/lib/agglomerative_clustering/silhouette_coefficient_spec.rb
94
96
  - spec/spec_helper.rb
95
97
  homepage: https://github.com/bmulvihill/agglomerative_clustering
96
98
  licenses:
@@ -128,4 +130,5 @@ test_files:
128
130
  - spec/lib/agglomerative_clustering/linkage/complete_spec.rb
129
131
  - spec/lib/agglomerative_clustering/linkage/single_spec.rb
130
132
  - spec/lib/agglomerative_clustering/set_spec.rb
133
+ - spec/lib/agglomerative_clustering/silhouette_coefficient_spec.rb
131
134
  - spec/spec_helper.rb