agglomerative_clustering 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d52ff4f84aef0fbd56aa453833ea2e5b9c9772e2
4
- data.tar.gz: 4abb19bb148f77989f43be805769e0676a7182f7
3
+ metadata.gz: 6368eb116e76afdf46d5dd16ffeaed04a3d590e8
4
+ data.tar.gz: 39447a07d51c37d5743dc910bb71ff011bfcd6d2
5
5
  SHA512:
6
- metadata.gz: b31e61ad6c08ecdce2326bdab94c412aaee32227775f662ad62361896d69124acbad3ed4c6f8a40f2298de30ac77d65c0fcabd0bfe58c16b6d11bd89e75a7dd5
7
- data.tar.gz: 8a2a08bda0f2975166536d1ec85cd0f2254044689d8b6f422b6f2170bd64301ae914c05f1264d14a81d7b4b2fb4deb9c4da0fa82abb9b94f57a462ba1689c11d
6
+ metadata.gz: 198911398132736c1cf06117e48e4f0026c94271586e8f172bf72733828f8bf1b218e03659b148663c56b2f8a3ca793e45b335527e90bc51577663221ddb4202
7
+ data.tar.gz: 5b04529399622524c8d7b36cd8b387c7dae73c83700e0e63455091e6357b39745bb78df0d01e6df0f8e29f0d264f2df89b23a79d9b744f2df393ceb10062249a
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
1
  source "https://rubygems.org"
2
2
  gemspec
3
- gem 'factory_girl'
3
+ group :development do
4
+ gem 'factory_girl'
5
+ end
data/README.md CHANGED
@@ -2,7 +2,12 @@
2
2
 
3
3
  Hierarchical Agglomerative Clustering Algorithm
4
4
 
5
- Input Set of 3 dimensional points, group into nearest k clusters
5
+ Input Set of 3 dimensional points, group into nearest k clusters based on Euclidean Distance.
6
+ Currently the Clustering Algorithm supports 4 different types of Linkage
7
+ * Single Linkage (Distance between clusters is based on nearest points)
8
+ * Complete Linkage (Distance between clusters is based on farthest points)
9
+ * Average Linkage (Distance between clusters is based on average distance of points)
10
+ * Center Linkage (Distance between clusters is based on center of cluster)
6
11
 
7
12
  ## Installation
8
13
 
@@ -21,8 +26,8 @@ Or install it yourself as:
21
26
  $ gem install agglomerative_clustering
22
27
 
23
28
  ## Usage
29
+ Please see cluster.rb for a sample until I have a chance to write something up here
24
30
 
25
- TODO: Write usage instructions here
26
31
 
27
32
  ## Contributing
28
33
 
@@ -7,9 +7,9 @@ Gem::Specification.new do |spec|
7
7
  spec.name = "agglomerative_clustering"
8
8
  spec.version = AgglomerativeClustering::VERSION
9
9
  spec.authors = ["Bryan Mulvihill"]
10
- spec.email = ["bmulvihill@pinsonault.com"]
10
+ spec.email = ["mulvihill.bryan@gmail.com"]
11
11
  spec.summary = %q{Ruby Agglomerative Clustering Algorithm}
12
- spec.homepage = ""
12
+ spec.homepage = "https://github.com/bmulvihill/agglomerative_clustering"
13
13
  spec.license = "MIT"
14
14
 
15
15
  spec.files = `git ls-files -z`.split("\x0")
@@ -1,5 +1,6 @@
1
1
  require "agglomerative_clustering/version"
2
2
  require "agglomerative_clustering/euclidean_distance"
3
+ require "agglomerative_clustering/distance_matrix"
3
4
  require "agglomerative_clustering/linkage/base"
4
5
  require "agglomerative_clustering/linkage/single"
5
6
  require "agglomerative_clustering/linkage/complete"
@@ -0,0 +1,37 @@
1
+ module AgglomerativeClustering
2
+ class DistanceMatrix
3
+
4
+ def initialize matrix
5
+ @matrix_array = matrix.to_a
6
+ end
7
+
8
+ def matrix
9
+ Matrix.build(matrix_array.size, matrix_array.first.size) do |row, column|
10
+ matrix_array[row][column]
11
+ end
12
+ end
13
+
14
+ def print_matrix
15
+ puts matrix.to_a.map(&:inspect)
16
+ end
17
+
18
+ def remove_edge index
19
+ matrix_array.delete_at(index)
20
+ matrix_array.each { |row| row.delete_at(index) }
21
+ Matrix.rows(matrix_array)
22
+ end
23
+
24
+ def add_edge weights
25
+ matrix_array.each_with_index { |row, index| row << weights[index] }
26
+ matrix_array << weights
27
+ Matrix.rows(matrix_array)
28
+ end
29
+
30
+ private
31
+
32
+ def matrix_array
33
+ @matrix_array ||= []
34
+ end
35
+
36
+ end
37
+ end
@@ -12,9 +12,6 @@ module AgglomerativeClustering
12
12
  distances.inject(:+)/distances.size
13
13
  end
14
14
 
15
- def clusters_to_merge
16
- @clusters_to_merge ||= []
17
- end
18
15
  end
19
16
  end
20
17
  end
@@ -2,21 +2,6 @@ module AgglomerativeClustering
2
2
  module Linkage
3
3
  class Base
4
4
  include EuclideanDistance
5
-
6
- def cluster(clusters)
7
- min_cluster_dist = 1.0/0
8
- clusters.each_with_index do |cluster1, index|
9
- clusters[index + 1..clusters.size].each do |cluster2|
10
- distance = calculate_distance(cluster1, cluster2)
11
- if distance < min_cluster_dist
12
- min_cluster_dist = distance
13
- @clusters_to_merge = [cluster1, cluster2]
14
- end
15
- end
16
- end
17
- clusters_to_merge
18
- end
19
-
20
5
  end
21
6
  end
22
7
  end
@@ -10,11 +10,7 @@ module AgglomerativeClustering
10
10
  def center_point cluster
11
11
  cluster.points.first.zip(*cluster.points[1..cluster.points.size-1]).map { |a,b| (a + b)/cluster.points.size.to_f }
12
12
  end
13
-
14
- def clusters_to_merge
15
- @clusters_to_merge ||= []
16
- end
17
-
13
+
18
14
  end
19
15
  end
20
16
  end
@@ -13,10 +13,6 @@ module AgglomerativeClustering
13
13
  max_point_distance
14
14
  end
15
15
 
16
- def clusters_to_merge
17
- @clusters_to_merge ||= []
18
- end
19
-
20
16
  end
21
17
  end
22
18
  end
@@ -13,9 +13,6 @@ module AgglomerativeClustering
13
13
  min_point_distance
14
14
  end
15
15
 
16
- def clusters_to_merge
17
- @clusters_to_merge ||= []
18
- end
19
16
  end
20
17
  end
21
18
  end
@@ -1,3 +1 @@
1
- Point = Struct.new(:x, :y, :z) do
2
- attr_accessor :index
3
- end
1
+ Point = Struct.new(:x, :y, :z)
@@ -2,15 +2,17 @@ require 'matrix'
2
2
  module AgglomerativeClustering
3
3
  class Set
4
4
  include EuclideanDistance
5
- attr_reader :points
5
+ attr_reader :linkage
6
6
 
7
7
  def initialize(linkage)
8
8
  @linkage = linkage
9
- @points = []
9
+ end
10
+
11
+ def points
12
+ @points ||= []
10
13
  end
11
14
 
12
15
  def push point
13
- point.index = points.size
14
16
  points << point
15
17
  end
16
18
 
@@ -22,23 +24,28 @@ module AgglomerativeClustering
22
24
  @distance_matrix ||= build_distance_matrix
23
25
  end
24
26
 
25
- def print_distance_matrix
26
- puts distance_matrix.to_a.map(&:inspect)
27
- end
28
-
29
27
  def cluster total_clusters
30
- clusters_to_merge =[]
31
28
  while clusters.size > total_clusters
32
- clusters_to_merge = @linkage.cluster(clusters)
33
- merge_clusters(clusters_to_merge)
29
+ merge_clusters(shortest_distance)
34
30
  end
35
31
  clusters
36
32
  end
37
33
 
38
- def merge_clusters(min_clusters)
39
- min_clusters[0].merge(min_clusters[1])
40
- clusters.reject! { |cluster| cluster == min_clusters[1] }
41
- min_clusters[0]
34
+ def merge_clusters indexes
35
+ index1, index2 = indexes
36
+ new_cluster = clusters[index1].merge(clusters[index2])
37
+ remove_cluster(index1)
38
+ remove_cluster(index2 - 1)
39
+ add_cluster(new_cluster)
40
+ end
41
+
42
+ def update_distance_matrix new_cluster
43
+ distances = []
44
+ clusters.each do |cluster|
45
+ distances << linkage.calculate_distance(clusters[new_cluster], cluster)
46
+ end
47
+ distance_matrix.add_edge(distances)
48
+ distance_matrix
42
49
  end
43
50
 
44
51
  def outliers
@@ -46,10 +53,10 @@ module AgglomerativeClustering
46
53
  end
47
54
 
48
55
  def find_outliers percentage_of_clusters, distance
49
- distance_matrix.each_with_index do |index, row, column|
56
+ distance_matrix.matrix.each_with_index do |index, row, column|
50
57
  count_hash[row] ||= 0
51
- count_hash[row] += 1 if distance_matrix[row, column] > distance
52
- set_outliers << points[row] if count_hash[row]/(distance_matrix.row_count - 1) > percentage_of_clusters/100
58
+ count_hash[row] += 1 if distance_matrix.matrix[row, column] > distance
59
+ set_outliers << points[row] if count_hash[row]/(distance_matrix.matrix.row_count - 1) > percentage_of_clusters/100
53
60
  end
54
61
  points.reject! { |point| outliers.include?(point) }
55
62
  outliers
@@ -57,6 +64,30 @@ module AgglomerativeClustering
57
64
 
58
65
  private
59
66
 
67
+ def add_cluster new_cluster
68
+ clusters << new_cluster
69
+ update_distance_matrix(clusters.size - 1)
70
+ new_cluster
71
+ end
72
+
73
+ def remove_cluster index
74
+ clusters.delete_at(index)
75
+ distance_matrix.remove_edge(index)
76
+ end
77
+
78
+ def shortest_distance
79
+ min_cluster_dist = 1.0/0
80
+ indexes = []
81
+ distance_matrix.matrix.each_with_index do |index, row, column|
82
+ distance = distance_matrix.matrix[row, column]
83
+ if distance < min_cluster_dist && distance != 0
84
+ min_cluster_dist = distance
85
+ indexes = [row, column]
86
+ end
87
+ end
88
+ indexes
89
+ end
90
+
60
91
  def set_outliers
61
92
  @set_outliers ||= []
62
93
  end
@@ -66,9 +97,10 @@ module AgglomerativeClustering
66
97
  end
67
98
 
68
99
  def build_distance_matrix
69
- Matrix.build(points.size, points.size) do |row, column|
100
+ m = Matrix.build(points.size, points.size) do |row, column|
70
101
  euclidean_distance(points[row], points[column]).round(2)
71
102
  end
103
+ DistanceMatrix.new(m)
72
104
  end
73
105
 
74
106
  end
@@ -1,3 +1,3 @@
1
1
  module AgglomerativeClustering
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -0,0 +1,18 @@
1
+ describe AgglomerativeClustering::DistanceMatrix do
2
+
3
+ context '#remove_edge' do
4
+ it 'will remove edges from the distance matrix' do
5
+ matrix = AgglomerativeClustering::DistanceMatrix.new(Matrix.empty)
6
+ matrix.add_edge([1,2])
7
+ matrix.add_edge([2,2,3])
8
+ expect(matrix.remove_edge(0)).to eql(Matrix[[2,3]])
9
+ end
10
+ end
11
+
12
+ context '#add_edge' do
13
+ it 'will add an edges to the distance matrix' do
14
+ matrix = AgglomerativeClustering::DistanceMatrix.new(Matrix.rows([[1,2,3]]))
15
+ expect(matrix.add_edge([4,5,6,7])).to eql(Matrix[[1,2,3,4],[4,5,6,7]])
16
+ end
17
+ end
18
+ end
@@ -2,9 +2,9 @@ describe AgglomerativeClustering::Set do
2
2
 
3
3
  before do
4
4
  @set = FactoryGirl.build(:set)
5
- @point1 = FactoryGirl.build(:point, x:2, y:2, z:3)
6
- @point2 = FactoryGirl.build(:point, x:1, y:4, z:1)
7
- @point3 = FactoryGirl.build(:point, x:5, y:2, z:2)
5
+ @point1 = FactoryGirl.build(:point, x:1, y:2, z:3)
6
+ @point2 = FactoryGirl.build(:point, x:2, y:4, z:1)
7
+ @point3 = FactoryGirl.build(:point, x:4, y:2, z:2)
8
8
  @point4 = FactoryGirl.build(:point, x:5, y:2, z:3)
9
9
  @set.push(@point1)
10
10
  @set.push(@point2)
@@ -14,36 +14,30 @@ describe AgglomerativeClustering::Set do
14
14
 
15
15
  context '#cluster' do
16
16
  it 'will return clusters of points based on requested number of clusters' do
17
- expect(@set.cluster(3).size).to eql(3)
17
+ expect(@set.cluster(2).size).to eql(2)
18
18
  end
19
19
 
20
20
  it 'will cluster points that are closest to each other' do
21
- @point5 = FactoryGirl.build(:point, x:5, y:2, z:4)
22
- @point6 = FactoryGirl.build(:point, x:5, y:3, z:4)
21
+ @point5 = FactoryGirl.build(:point, x:6, y:2, z:4)
22
+ @point6 = FactoryGirl.build(:point, x:7, y:3, z:4)
23
23
  @point7 = FactoryGirl.build(:point, x:15, y:20, z:21)
24
- @point8 = FactoryGirl.build(:point, x:18, y:21, z:21)
25
- @point9 = FactoryGirl.build(:point, x:16, y:22, z:21)
24
+ @point8 = FactoryGirl.build(:point, x:16, y:21, z:21)
25
+ @point9 = FactoryGirl.build(:point, x:18, y:22, z:21)
26
26
  @set.push(@point5)
27
27
  @set.push(@point6)
28
28
  @set.push(@point7)
29
29
  @set.push(@point8)
30
30
  @set.push(@point9)
31
31
  clusters = @set.cluster(3)
32
- clusters[0].points.each do |point|
33
- expect([@point1, @point2].include?(point)).to be true
34
- end
35
- clusters[1].points.each do |point|
36
- expect([@point3, @point4, @point5, @point6].include?(point)).to be true
37
- end
38
- clusters[2].points.each do |point|
39
- expect([@point7, @point8, @point9].include?(point)).to be true
40
- end
32
+ points = clusters.map(&:points).each {|cluster| cluster.sort_by!(&:x) }
33
+ expect([[@point1, @point2],[@point3, @point4, @point5, @point6], [@point7, @point8, @point9]] - points).to eql([])
34
+
41
35
  end
42
36
  end
43
37
 
44
38
  context '#merge_clusters' do
45
39
  it 'will merge two clusters into one and update the distance matrix' do
46
- expect(@set.merge_clusters([@set.clusters[0],@set.clusters[1]]).points).to eql([@point1, @point2])
40
+ expect(@set.merge_clusters([0,1]).points).to eql([@point1, @point2])
47
41
  end
48
42
  end
49
43
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: agglomerative_clustering
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bryan Mulvihill
@@ -54,7 +54,7 @@ dependencies:
54
54
  version: '0'
55
55
  description:
56
56
  email:
57
- - bmulvihill@pinsonault.com
57
+ - mulvihill.bryan@gmail.com
58
58
  executables: []
59
59
  extensions: []
60
60
  extra_rdoc_files: []
@@ -63,7 +63,6 @@ files:
63
63
  - ".rspec"
64
64
  - Gemfile
65
65
  - Gemfile.lock
66
- - LICENSE
67
66
  - LICENSE.txt
68
67
  - README.md
69
68
  - Rakefile
@@ -71,6 +70,7 @@ files:
71
70
  - cluster.rb
72
71
  - lib/agglomerative_clustering.rb
73
72
  - lib/agglomerative_clustering/cluster.rb
73
+ - lib/agglomerative_clustering/distance_matrix.rb
74
74
  - lib/agglomerative_clustering/euclidean_distance.rb
75
75
  - lib/agglomerative_clustering/linkage/average.rb
76
76
  - lib/agglomerative_clustering/linkage/base.rb
@@ -80,21 +80,19 @@ files:
80
80
  - lib/agglomerative_clustering/point.rb
81
81
  - lib/agglomerative_clustering/set.rb
82
82
  - lib/agglomerative_clustering/version.rb
83
- - outliers.csv
84
- - points.csv
85
83
  - spec/factories/lib/agglomerative_clustering/cluster.rb
86
84
  - spec/factories/lib/agglomerative_clustering/point.rb
87
85
  - spec/factories/lib/agglomerative_clustering/set.rb
88
86
  - spec/lib/agglomerative_clustering/cluster_spec.rb
87
+ - spec/lib/agglomerative_clustering/distance_matrix_spec.rb
89
88
  - spec/lib/agglomerative_clustering/euclidean_distance_spec.rb
90
89
  - spec/lib/agglomerative_clustering/linkage/average_spec.rb
91
- - spec/lib/agglomerative_clustering/linkage/base_spec.rb
92
90
  - spec/lib/agglomerative_clustering/linkage/center_spec.rb
93
91
  - spec/lib/agglomerative_clustering/linkage/complete_spec.rb
94
92
  - spec/lib/agglomerative_clustering/linkage/single_spec.rb
95
93
  - spec/lib/agglomerative_clustering/set_spec.rb
96
94
  - spec/spec_helper.rb
97
- homepage: ''
95
+ homepage: https://github.com/bmulvihill/agglomerative_clustering
98
96
  licenses:
99
97
  - MIT
100
98
  metadata: {}
@@ -123,9 +121,9 @@ test_files:
123
121
  - spec/factories/lib/agglomerative_clustering/point.rb
124
122
  - spec/factories/lib/agglomerative_clustering/set.rb
125
123
  - spec/lib/agglomerative_clustering/cluster_spec.rb
124
+ - spec/lib/agglomerative_clustering/distance_matrix_spec.rb
126
125
  - spec/lib/agglomerative_clustering/euclidean_distance_spec.rb
127
126
  - spec/lib/agglomerative_clustering/linkage/average_spec.rb
128
- - spec/lib/agglomerative_clustering/linkage/base_spec.rb
129
127
  - spec/lib/agglomerative_clustering/linkage/center_spec.rb
130
128
  - spec/lib/agglomerative_clustering/linkage/complete_spec.rb
131
129
  - spec/lib/agglomerative_clustering/linkage/single_spec.rb
data/LICENSE DELETED
@@ -1,22 +0,0 @@
1
- The MIT License (MIT)
2
-
3
- Copyright (c) 2014 Bryan Mulvihill
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
22
-
@@ -1,13 +0,0 @@
1
- describe AgglomerativeClustering::Linkage::Base do
2
-
3
- context '#cluster' do
4
- it 'will return the clusters where min distance is closest' do
5
- single_linkage = AgglomerativeClustering::Linkage::Single.new
6
- set = FactoryGirl.build(:set)
7
- set.push(FactoryGirl.build(:point))
8
- set.push(FactoryGirl.build(:point))
9
- expect(single_linkage.cluster(set.clusters)).to eql([set.clusters[0], set.clusters[1]])
10
- end
11
- end
12
-
13
- end