agglomerative_clustering 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d52ff4f84aef0fbd56aa453833ea2e5b9c9772e2
4
- data.tar.gz: 4abb19bb148f77989f43be805769e0676a7182f7
3
+ metadata.gz: 6368eb116e76afdf46d5dd16ffeaed04a3d590e8
4
+ data.tar.gz: 39447a07d51c37d5743dc910bb71ff011bfcd6d2
5
5
  SHA512:
6
- metadata.gz: b31e61ad6c08ecdce2326bdab94c412aaee32227775f662ad62361896d69124acbad3ed4c6f8a40f2298de30ac77d65c0fcabd0bfe58c16b6d11bd89e75a7dd5
7
- data.tar.gz: 8a2a08bda0f2975166536d1ec85cd0f2254044689d8b6f422b6f2170bd64301ae914c05f1264d14a81d7b4b2fb4deb9c4da0fa82abb9b94f57a462ba1689c11d
6
+ metadata.gz: 198911398132736c1cf06117e48e4f0026c94271586e8f172bf72733828f8bf1b218e03659b148663c56b2f8a3ca793e45b335527e90bc51577663221ddb4202
7
+ data.tar.gz: 5b04529399622524c8d7b36cd8b387c7dae73c83700e0e63455091e6357b39745bb78df0d01e6df0f8e29f0d264f2df89b23a79d9b744f2df393ceb10062249a
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
1
  source "https://rubygems.org"
2
2
  gemspec
3
- gem 'factory_girl'
3
+ group :development do
4
+ gem 'factory_girl'
5
+ end
data/README.md CHANGED
@@ -2,7 +2,12 @@
2
2
 
3
3
  Hierarchical Agglomerative Clustering Algorithm
4
4
 
5
- Input Set of 3 dimensional points, group into nearest k clusters
5
+ Input Set of 3 dimensional points, group into nearest k clusters based on Euclidean Distance.
6
+ Currently the Clustering Algorithm supports 4 different types of Linkage
7
+ * Single Linkage (Distance between clusters is based on nearest points)
8
+ * Complete Linkage (Distance between clusters is based on farthest points)
9
+ * Average Linkage (Distance between clusters is based on average distance of points)
10
+ * Center Linkage (Distance between clusters is based on center of cluster)
6
11
 
7
12
  ## Installation
8
13
 
@@ -21,8 +26,8 @@ Or install it yourself as:
21
26
  $ gem install agglomerative_clustering
22
27
 
23
28
  ## Usage
29
+ Please see cluster.rb for a sample until I have a chance to write something up here
24
30
 
25
- TODO: Write usage instructions here
26
31
 
27
32
  ## Contributing
28
33
 
@@ -7,9 +7,9 @@ Gem::Specification.new do |spec|
7
7
  spec.name = "agglomerative_clustering"
8
8
  spec.version = AgglomerativeClustering::VERSION
9
9
  spec.authors = ["Bryan Mulvihill"]
10
- spec.email = ["bmulvihill@pinsonault.com"]
10
+ spec.email = ["mulvihill.bryan@gmail.com"]
11
11
  spec.summary = %q{Ruby Agglomerative Clustering Algorithm}
12
- spec.homepage = ""
12
+ spec.homepage = "https://github.com/bmulvihill/agglomerative_clustering"
13
13
  spec.license = "MIT"
14
14
 
15
15
  spec.files = `git ls-files -z`.split("\x0")
@@ -1,5 +1,6 @@
1
1
  require "agglomerative_clustering/version"
2
2
  require "agglomerative_clustering/euclidean_distance"
3
+ require "agglomerative_clustering/distance_matrix"
3
4
  require "agglomerative_clustering/linkage/base"
4
5
  require "agglomerative_clustering/linkage/single"
5
6
  require "agglomerative_clustering/linkage/complete"
@@ -0,0 +1,37 @@
1
+ module AgglomerativeClustering
2
+ class DistanceMatrix
3
+
4
+ def initialize matrix
5
+ @matrix_array = matrix.to_a
6
+ end
7
+
8
+ def matrix
9
+ Matrix.build(matrix_array.size, matrix_array.first.size) do |row, column|
10
+ matrix_array[row][column]
11
+ end
12
+ end
13
+
14
+ def print_matrix
15
+ puts matrix.to_a.map(&:inspect)
16
+ end
17
+
18
+ def remove_edge index
19
+ matrix_array.delete_at(index)
20
+ matrix_array.each { |row| row.delete_at(index) }
21
+ Matrix.rows(matrix_array)
22
+ end
23
+
24
+ def add_edge weights
25
+ matrix_array.each_with_index { |row, index| row << weights[index] }
26
+ matrix_array << weights
27
+ Matrix.rows(matrix_array)
28
+ end
29
+
30
+ private
31
+
32
+ def matrix_array
33
+ @matrix_array ||= []
34
+ end
35
+
36
+ end
37
+ end
@@ -12,9 +12,6 @@ module AgglomerativeClustering
12
12
  distances.inject(:+)/distances.size
13
13
  end
14
14
 
15
- def clusters_to_merge
16
- @clusters_to_merge ||= []
17
- end
18
15
  end
19
16
  end
20
17
  end
@@ -2,21 +2,6 @@ module AgglomerativeClustering
2
2
  module Linkage
3
3
  class Base
4
4
  include EuclideanDistance
5
-
6
- def cluster(clusters)
7
- min_cluster_dist = 1.0/0
8
- clusters.each_with_index do |cluster1, index|
9
- clusters[index + 1..clusters.size].each do |cluster2|
10
- distance = calculate_distance(cluster1, cluster2)
11
- if distance < min_cluster_dist
12
- min_cluster_dist = distance
13
- @clusters_to_merge = [cluster1, cluster2]
14
- end
15
- end
16
- end
17
- clusters_to_merge
18
- end
19
-
20
5
  end
21
6
  end
22
7
  end
@@ -10,11 +10,7 @@ module AgglomerativeClustering
10
10
  def center_point cluster
11
11
  cluster.points.first.zip(*cluster.points[1..cluster.points.size-1]).map { |a,b| (a + b)/cluster.points.size.to_f }
12
12
  end
13
-
14
- def clusters_to_merge
15
- @clusters_to_merge ||= []
16
- end
17
-
13
+
18
14
  end
19
15
  end
20
16
  end
@@ -13,10 +13,6 @@ module AgglomerativeClustering
13
13
  max_point_distance
14
14
  end
15
15
 
16
- def clusters_to_merge
17
- @clusters_to_merge ||= []
18
- end
19
-
20
16
  end
21
17
  end
22
18
  end
@@ -13,9 +13,6 @@ module AgglomerativeClustering
13
13
  min_point_distance
14
14
  end
15
15
 
16
- def clusters_to_merge
17
- @clusters_to_merge ||= []
18
- end
19
16
  end
20
17
  end
21
18
  end
@@ -1,3 +1 @@
1
- Point = Struct.new(:x, :y, :z) do
2
- attr_accessor :index
3
- end
1
+ Point = Struct.new(:x, :y, :z)
@@ -2,15 +2,17 @@ require 'matrix'
2
2
  module AgglomerativeClustering
3
3
  class Set
4
4
  include EuclideanDistance
5
- attr_reader :points
5
+ attr_reader :linkage
6
6
 
7
7
  def initialize(linkage)
8
8
  @linkage = linkage
9
- @points = []
9
+ end
10
+
11
+ def points
12
+ @points ||= []
10
13
  end
11
14
 
12
15
  def push point
13
- point.index = points.size
14
16
  points << point
15
17
  end
16
18
 
@@ -22,23 +24,28 @@ module AgglomerativeClustering
22
24
  @distance_matrix ||= build_distance_matrix
23
25
  end
24
26
 
25
- def print_distance_matrix
26
- puts distance_matrix.to_a.map(&:inspect)
27
- end
28
-
29
27
  def cluster total_clusters
30
- clusters_to_merge =[]
31
28
  while clusters.size > total_clusters
32
- clusters_to_merge = @linkage.cluster(clusters)
33
- merge_clusters(clusters_to_merge)
29
+ merge_clusters(shortest_distance)
34
30
  end
35
31
  clusters
36
32
  end
37
33
 
38
- def merge_clusters(min_clusters)
39
- min_clusters[0].merge(min_clusters[1])
40
- clusters.reject! { |cluster| cluster == min_clusters[1] }
41
- min_clusters[0]
34
+ def merge_clusters indexes
35
+ index1, index2 = indexes
36
+ new_cluster = clusters[index1].merge(clusters[index2])
37
+ remove_cluster(index1)
38
+ remove_cluster(index2 - 1)
39
+ add_cluster(new_cluster)
40
+ end
41
+
42
+ def update_distance_matrix new_cluster
43
+ distances = []
44
+ clusters.each do |cluster|
45
+ distances << linkage.calculate_distance(clusters[new_cluster], cluster)
46
+ end
47
+ distance_matrix.add_edge(distances)
48
+ distance_matrix
42
49
  end
43
50
 
44
51
  def outliers
@@ -46,10 +53,10 @@ module AgglomerativeClustering
46
53
  end
47
54
 
48
55
  def find_outliers percentage_of_clusters, distance
49
- distance_matrix.each_with_index do |index, row, column|
56
+ distance_matrix.matrix.each_with_index do |index, row, column|
50
57
  count_hash[row] ||= 0
51
- count_hash[row] += 1 if distance_matrix[row, column] > distance
52
- set_outliers << points[row] if count_hash[row]/(distance_matrix.row_count - 1) > percentage_of_clusters/100
58
+ count_hash[row] += 1 if distance_matrix.matrix[row, column] > distance
59
+ set_outliers << points[row] if count_hash[row]/(distance_matrix.matrix.row_count - 1) > percentage_of_clusters/100
53
60
  end
54
61
  points.reject! { |point| outliers.include?(point) }
55
62
  outliers
@@ -57,6 +64,30 @@ module AgglomerativeClustering
57
64
 
58
65
  private
59
66
 
67
+ def add_cluster new_cluster
68
+ clusters << new_cluster
69
+ update_distance_matrix(clusters.size - 1)
70
+ new_cluster
71
+ end
72
+
73
+ def remove_cluster index
74
+ clusters.delete_at(index)
75
+ distance_matrix.remove_edge(index)
76
+ end
77
+
78
+ def shortest_distance
79
+ min_cluster_dist = 1.0/0
80
+ indexes = []
81
+ distance_matrix.matrix.each_with_index do |index, row, column|
82
+ distance = distance_matrix.matrix[row, column]
83
+ if distance < min_cluster_dist && distance != 0
84
+ min_cluster_dist = distance
85
+ indexes = [row, column]
86
+ end
87
+ end
88
+ indexes
89
+ end
90
+
60
91
  def set_outliers
61
92
  @set_outliers ||= []
62
93
  end
@@ -66,9 +97,10 @@ module AgglomerativeClustering
66
97
  end
67
98
 
68
99
  def build_distance_matrix
69
- Matrix.build(points.size, points.size) do |row, column|
100
+ m = Matrix.build(points.size, points.size) do |row, column|
70
101
  euclidean_distance(points[row], points[column]).round(2)
71
102
  end
103
+ DistanceMatrix.new(m)
72
104
  end
73
105
 
74
106
  end
@@ -1,3 +1,3 @@
1
1
  module AgglomerativeClustering
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -0,0 +1,18 @@
1
+ describe AgglomerativeClustering::DistanceMatrix do
2
+
3
+ context '#remove_edge' do
4
+ it 'will remove edges from the distance matrix' do
5
+ matrix = AgglomerativeClustering::DistanceMatrix.new(Matrix.empty)
6
+ matrix.add_edge([1,2])
7
+ matrix.add_edge([2,2,3])
8
+ expect(matrix.remove_edge(0)).to eql(Matrix[[2,3]])
9
+ end
10
+ end
11
+
12
+ context '#add_edge' do
13
+ it 'will add an edges to the distance matrix' do
14
+ matrix = AgglomerativeClustering::DistanceMatrix.new(Matrix.rows([[1,2,3]]))
15
+ expect(matrix.add_edge([4,5,6,7])).to eql(Matrix[[1,2,3,4],[4,5,6,7]])
16
+ end
17
+ end
18
+ end
@@ -2,9 +2,9 @@ describe AgglomerativeClustering::Set do
2
2
 
3
3
  before do
4
4
  @set = FactoryGirl.build(:set)
5
- @point1 = FactoryGirl.build(:point, x:2, y:2, z:3)
6
- @point2 = FactoryGirl.build(:point, x:1, y:4, z:1)
7
- @point3 = FactoryGirl.build(:point, x:5, y:2, z:2)
5
+ @point1 = FactoryGirl.build(:point, x:1, y:2, z:3)
6
+ @point2 = FactoryGirl.build(:point, x:2, y:4, z:1)
7
+ @point3 = FactoryGirl.build(:point, x:4, y:2, z:2)
8
8
  @point4 = FactoryGirl.build(:point, x:5, y:2, z:3)
9
9
  @set.push(@point1)
10
10
  @set.push(@point2)
@@ -14,36 +14,30 @@ describe AgglomerativeClustering::Set do
14
14
 
15
15
  context '#cluster' do
16
16
  it 'will return clusters of points based on requested number of clusters' do
17
- expect(@set.cluster(3).size).to eql(3)
17
+ expect(@set.cluster(2).size).to eql(2)
18
18
  end
19
19
 
20
20
  it 'will cluster points that are closest to each other' do
21
- @point5 = FactoryGirl.build(:point, x:5, y:2, z:4)
22
- @point6 = FactoryGirl.build(:point, x:5, y:3, z:4)
21
+ @point5 = FactoryGirl.build(:point, x:6, y:2, z:4)
22
+ @point6 = FactoryGirl.build(:point, x:7, y:3, z:4)
23
23
  @point7 = FactoryGirl.build(:point, x:15, y:20, z:21)
24
- @point8 = FactoryGirl.build(:point, x:18, y:21, z:21)
25
- @point9 = FactoryGirl.build(:point, x:16, y:22, z:21)
24
+ @point8 = FactoryGirl.build(:point, x:16, y:21, z:21)
25
+ @point9 = FactoryGirl.build(:point, x:18, y:22, z:21)
26
26
  @set.push(@point5)
27
27
  @set.push(@point6)
28
28
  @set.push(@point7)
29
29
  @set.push(@point8)
30
30
  @set.push(@point9)
31
31
  clusters = @set.cluster(3)
32
- clusters[0].points.each do |point|
33
- expect([@point1, @point2].include?(point)).to be true
34
- end
35
- clusters[1].points.each do |point|
36
- expect([@point3, @point4, @point5, @point6].include?(point)).to be true
37
- end
38
- clusters[2].points.each do |point|
39
- expect([@point7, @point8, @point9].include?(point)).to be true
40
- end
32
+ points = clusters.map(&:points).each {|cluster| cluster.sort_by!(&:x) }
33
+ expect([[@point1, @point2],[@point3, @point4, @point5, @point6], [@point7, @point8, @point9]] - points).to eql([])
34
+
41
35
  end
42
36
  end
43
37
 
44
38
  context '#merge_clusters' do
45
39
  it 'will merge two clusters into one and update the distance matrix' do
46
- expect(@set.merge_clusters([@set.clusters[0],@set.clusters[1]]).points).to eql([@point1, @point2])
40
+ expect(@set.merge_clusters([0,1]).points).to eql([@point1, @point2])
47
41
  end
48
42
  end
49
43
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: agglomerative_clustering
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bryan Mulvihill
@@ -54,7 +54,7 @@ dependencies:
54
54
  version: '0'
55
55
  description:
56
56
  email:
57
- - bmulvihill@pinsonault.com
57
+ - mulvihill.bryan@gmail.com
58
58
  executables: []
59
59
  extensions: []
60
60
  extra_rdoc_files: []
@@ -63,7 +63,6 @@ files:
63
63
  - ".rspec"
64
64
  - Gemfile
65
65
  - Gemfile.lock
66
- - LICENSE
67
66
  - LICENSE.txt
68
67
  - README.md
69
68
  - Rakefile
@@ -71,6 +70,7 @@ files:
71
70
  - cluster.rb
72
71
  - lib/agglomerative_clustering.rb
73
72
  - lib/agglomerative_clustering/cluster.rb
73
+ - lib/agglomerative_clustering/distance_matrix.rb
74
74
  - lib/agglomerative_clustering/euclidean_distance.rb
75
75
  - lib/agglomerative_clustering/linkage/average.rb
76
76
  - lib/agglomerative_clustering/linkage/base.rb
@@ -80,21 +80,19 @@ files:
80
80
  - lib/agglomerative_clustering/point.rb
81
81
  - lib/agglomerative_clustering/set.rb
82
82
  - lib/agglomerative_clustering/version.rb
83
- - outliers.csv
84
- - points.csv
85
83
  - spec/factories/lib/agglomerative_clustering/cluster.rb
86
84
  - spec/factories/lib/agglomerative_clustering/point.rb
87
85
  - spec/factories/lib/agglomerative_clustering/set.rb
88
86
  - spec/lib/agglomerative_clustering/cluster_spec.rb
87
+ - spec/lib/agglomerative_clustering/distance_matrix_spec.rb
89
88
  - spec/lib/agglomerative_clustering/euclidean_distance_spec.rb
90
89
  - spec/lib/agglomerative_clustering/linkage/average_spec.rb
91
- - spec/lib/agglomerative_clustering/linkage/base_spec.rb
92
90
  - spec/lib/agglomerative_clustering/linkage/center_spec.rb
93
91
  - spec/lib/agglomerative_clustering/linkage/complete_spec.rb
94
92
  - spec/lib/agglomerative_clustering/linkage/single_spec.rb
95
93
  - spec/lib/agglomerative_clustering/set_spec.rb
96
94
  - spec/spec_helper.rb
97
- homepage: ''
95
+ homepage: https://github.com/bmulvihill/agglomerative_clustering
98
96
  licenses:
99
97
  - MIT
100
98
  metadata: {}
@@ -123,9 +121,9 @@ test_files:
123
121
  - spec/factories/lib/agglomerative_clustering/point.rb
124
122
  - spec/factories/lib/agglomerative_clustering/set.rb
125
123
  - spec/lib/agglomerative_clustering/cluster_spec.rb
124
+ - spec/lib/agglomerative_clustering/distance_matrix_spec.rb
126
125
  - spec/lib/agglomerative_clustering/euclidean_distance_spec.rb
127
126
  - spec/lib/agglomerative_clustering/linkage/average_spec.rb
128
- - spec/lib/agglomerative_clustering/linkage/base_spec.rb
129
127
  - spec/lib/agglomerative_clustering/linkage/center_spec.rb
130
128
  - spec/lib/agglomerative_clustering/linkage/complete_spec.rb
131
129
  - spec/lib/agglomerative_clustering/linkage/single_spec.rb
data/LICENSE DELETED
@@ -1,22 +0,0 @@
1
- The MIT License (MIT)
2
-
3
- Copyright (c) 2014 Bryan Mulvihill
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
22
-
@@ -1,13 +0,0 @@
1
- describe AgglomerativeClustering::Linkage::Base do
2
-
3
- context '#cluster' do
4
- it 'will return the clusters where min distance is closest' do
5
- single_linkage = AgglomerativeClustering::Linkage::Single.new
6
- set = FactoryGirl.build(:set)
7
- set.push(FactoryGirl.build(:point))
8
- set.push(FactoryGirl.build(:point))
9
- expect(single_linkage.cluster(set.clusters)).to eql([set.clusters[0], set.clusters[1]])
10
- end
11
- end
12
-
13
- end