agglomerative_clustering 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +36 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +46 -0
  6. data/LICENSE +22 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +33 -0
  9. data/Rakefile +2 -0
  10. data/agglomerative_clustering.gemspec +23 -0
  11. data/cluster.rb +58 -0
  12. data/lib/agglomerative_clustering.rb +10 -0
  13. data/lib/agglomerative_clustering/cluster.rb +19 -0
  14. data/lib/agglomerative_clustering/euclidean_distance.rb +8 -0
  15. data/lib/agglomerative_clustering/linkage/average.rb +20 -0
  16. data/lib/agglomerative_clustering/linkage/base.rb +22 -0
  17. data/lib/agglomerative_clustering/linkage/center.rb +20 -0
  18. data/lib/agglomerative_clustering/linkage/complete.rb +22 -0
  19. data/lib/agglomerative_clustering/linkage/single.rb +21 -0
  20. data/lib/agglomerative_clustering/point.rb +3 -0
  21. data/lib/agglomerative_clustering/set.rb +75 -0
  22. data/lib/agglomerative_clustering/version.rb +3 -0
  23. data/spec/factories/lib/agglomerative_clustering/cluster.rb +5 -0
  24. data/spec/factories/lib/agglomerative_clustering/point.rb +7 -0
  25. data/spec/factories/lib/agglomerative_clustering/set.rb +5 -0
  26. data/spec/lib/agglomerative_clustering/cluster_spec.rb +11 -0
  27. data/spec/lib/agglomerative_clustering/euclidean_distance_spec.rb +15 -0
  28. data/spec/lib/agglomerative_clustering/linkage/average_spec.rb +25 -0
  29. data/spec/lib/agglomerative_clustering/linkage/base_spec.rb +13 -0
  30. data/spec/lib/agglomerative_clustering/linkage/center_spec.rb +23 -0
  31. data/spec/lib/agglomerative_clustering/linkage/complete_spec.rb +19 -0
  32. data/spec/lib/agglomerative_clustering/linkage/single_spec.rb +19 -0
  33. data/spec/lib/agglomerative_clustering/set_spec.rb +75 -0
  34. data/spec/spec_helper.rb +7 -0
  35. metadata +133 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d52ff4f84aef0fbd56aa453833ea2e5b9c9772e2
4
+ data.tar.gz: 4abb19bb148f77989f43be805769e0676a7182f7
5
+ SHA512:
6
+ metadata.gz: b31e61ad6c08ecdce2326bdab94c412aaee32227775f662ad62361896d69124acbad3ed4c6f8a40f2298de30ac77d65c0fcabd0bfe58c16b6d11bd89e75a7dd5
7
+ data.tar.gz: 8a2a08bda0f2975166536d1ec85cd0f2254044689d8b6f422b6f2170bd64301ae914c05f1264d14a81d7b4b2fb4deb9c4da0fa82abb9b94f57a462ba1689c11d
@@ -0,0 +1,36 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ # Gemfile.lock
30
+ # .ruby-version
31
+ # .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
35
+ outliers.csv
36
+ points.csv
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --format documentation
3
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "https://rubygems.org"
2
+ gemspec
3
+ gem 'factory_girl'
@@ -0,0 +1,46 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ agglomerative_clustering (0.0.1)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ activesupport (4.1.6)
10
+ i18n (~> 0.6, >= 0.6.9)
11
+ json (~> 1.7, >= 1.7.7)
12
+ minitest (~> 5.1)
13
+ thread_safe (~> 0.1)
14
+ tzinfo (~> 1.1)
15
+ diff-lcs (1.2.5)
16
+ factory_girl (4.4.0)
17
+ activesupport (>= 3.0.0)
18
+ i18n (0.6.11)
19
+ json (1.8.1)
20
+ minitest (5.4.2)
21
+ rake (10.3.2)
22
+ rspec (3.1.0)
23
+ rspec-core (~> 3.1.0)
24
+ rspec-expectations (~> 3.1.0)
25
+ rspec-mocks (~> 3.1.0)
26
+ rspec-core (3.1.6)
27
+ rspec-support (~> 3.1.0)
28
+ rspec-expectations (3.1.2)
29
+ diff-lcs (>= 1.2.0, < 2.0)
30
+ rspec-support (~> 3.1.0)
31
+ rspec-mocks (3.1.3)
32
+ rspec-support (~> 3.1.0)
33
+ rspec-support (3.1.2)
34
+ thread_safe (0.3.4)
35
+ tzinfo (1.2.2)
36
+ thread_safe (~> 0.1)
37
+
38
+ PLATFORMS
39
+ ruby
40
+
41
+ DEPENDENCIES
42
+ agglomerative_clustering!
43
+ bundler (~> 1.7)
44
+ factory_girl
45
+ rake (~> 10.0)
46
+ rspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 Bryan Mulvihill
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Bryan Mulvihill
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,33 @@
1
+ # AgglomerativeClustering
2
+
3
+ Hierarchical Agglomerative Clustering Algorithm
4
+
5
+ Input Set of 3 dimensional points, group into nearest k clusters
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'agglomerative_clustering'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install agglomerative_clustering
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Contributing
28
+
29
+ 1. Fork it ( https://github.com/[my-github-username]/agglomerative_clustering/fork )
30
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
31
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
32
+ 4. Push to the branch (`git push origin my-new-feature`)
33
+ 5. Create a new Pull Request
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'agglomerative_clustering/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "agglomerative_clustering"
8
+ spec.version = AgglomerativeClustering::VERSION
9
+ spec.authors = ["Bryan Mulvihill"]
10
+ spec.email = ["bmulvihill@pinsonault.com"]
11
+ spec.summary = %q{Ruby Agglomerative Clustering Algorithm}
12
+ spec.homepage = ""
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "bundler", "~> 1.7"
21
+ spec.add_development_dependency "rake", "~> 10.0"
22
+ spec.add_development_dependency 'rspec'
23
+ end
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env ruby
2
+ require 'agglomerative_clustering'
3
+
4
+ set = AgglomerativeClustering::Set.new(AgglomerativeClustering::Linkage::Single.new)
5
+ for i in 0..99
6
+ x = Random.rand(0..100)
7
+ y = Random.rand(0..100)
8
+ z = Random.rand(0..100)
9
+ p = Point.new(x,y,z)
10
+ set.push(p)
11
+ end
12
+ for i in 100..199
13
+ x = Random.rand(200..299)
14
+ y = Random.rand(200..299)
15
+ z = Random.rand(200..299)
16
+ p = Point.new(x,y,z)
17
+ set.push(p)
18
+ end
19
+ for i in 200..299
20
+ x = Random.rand(400..499)
21
+ y = Random.rand(400..499)
22
+ z = Random.rand(400..499)
23
+ p = Point.new(x,y,z)
24
+ set.push(p)
25
+ end
26
+
27
+ percentage = 80
28
+ distance = 150
29
+
30
+ open('points.csv', 'w') do |f|
31
+ set.points.each do |point|
32
+ f << "#{point.x},#{point.y},#{point.z}\n"
33
+ end
34
+ end
35
+
36
+ open('outliers.csv', 'w') do |f|
37
+ set.find_outliers(percentage, distance).each do |point|
38
+ f << "#{point.x},#{point.y},#{point.z}\n"
39
+ end
40
+ end
41
+
42
+ if set.outliers.any?
43
+ puts 'Outliers Removed from Set:'
44
+ set.outliers.each do |outlier|
45
+ puts outlier
46
+ end
47
+ else
48
+ puts "There are no outliers where #{percentage}% of the points lie at a distance greater than #{distance}"
49
+ end
50
+
51
+ clusters = set.cluster(3)
52
+ clusters.each_with_index do |cluster, index|
53
+ open("cluster#{index}.csv", 'w') do |f|
54
+ cluster.points.each do |point|
55
+ f << "#{point.x},#{point.y},#{point.z}\n"
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,10 @@
1
+ require "agglomerative_clustering/version"
2
+ require "agglomerative_clustering/euclidean_distance"
3
+ require "agglomerative_clustering/linkage/base"
4
+ require "agglomerative_clustering/linkage/single"
5
+ require "agglomerative_clustering/linkage/complete"
6
+ require "agglomerative_clustering/linkage/average"
7
+ require "agglomerative_clustering/linkage/center"
8
+ require "agglomerative_clustering/point"
9
+ require "agglomerative_clustering/cluster"
10
+ require "agglomerative_clustering/set"
@@ -0,0 +1,19 @@
1
+ module AgglomerativeClustering
2
+ class Cluster
3
+ attr_reader :points
4
+
5
+ def initialize(point)
6
+ points << point
7
+ end
8
+
9
+ def points
10
+ @points ||= []
11
+ end
12
+
13
+ def merge(cluster)
14
+ cluster.points.each { |point| points << point }
15
+ self
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,8 @@
1
+ module AgglomerativeClustering
2
+ module EuclideanDistance
3
+ def euclidean_distance point1, point2
4
+ # Thanks to https://blog.philipcunningham.org/posts/ruby-euclidean-distance
5
+ Math.sqrt(point1.zip(point2).map{|a,b| a-b}.map{|d| d*d}.reduce(:+))
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,20 @@
1
+ module AgglomerativeClustering
2
+ module Linkage
3
+ class Average < Base
4
+
5
+ def calculate_distance(cluster1, cluster2)
6
+ distances = []
7
+ cluster1.points.each do |point1|
8
+ cluster2.points.each do |point2|
9
+ distances << euclidean_distance(point1, point2)
10
+ end
11
+ end
12
+ distances.inject(:+)/distances.size
13
+ end
14
+
15
+ def clusters_to_merge
16
+ @clusters_to_merge ||= []
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,22 @@
1
+ module AgglomerativeClustering
2
+ module Linkage
3
+ class Base
4
+ include EuclideanDistance
5
+
6
+ def cluster(clusters)
7
+ min_cluster_dist = 1.0/0
8
+ clusters.each_with_index do |cluster1, index|
9
+ clusters[index + 1..clusters.size].each do |cluster2|
10
+ distance = calculate_distance(cluster1, cluster2)
11
+ if distance < min_cluster_dist
12
+ min_cluster_dist = distance
13
+ @clusters_to_merge = [cluster1, cluster2]
14
+ end
15
+ end
16
+ end
17
+ clusters_to_merge
18
+ end
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,20 @@
1
+ module AgglomerativeClustering
2
+ module Linkage
3
+ class Center < Base
4
+
5
+ def calculate_distance(cluster1, cluster2)
6
+ point1, point2 = center_point(cluster1), center_point(cluster2)
7
+ euclidean_distance(point1, point2)
8
+ end
9
+
10
+ def center_point cluster
11
+ cluster.points.first.zip(*cluster.points[1..cluster.points.size-1]).map { |a,b| (a + b)/cluster.points.size.to_f }
12
+ end
13
+
14
+ def clusters_to_merge
15
+ @clusters_to_merge ||= []
16
+ end
17
+
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,22 @@
1
+ module AgglomerativeClustering
2
+ module Linkage
3
+ class Complete < Base
4
+
5
+ def calculate_distance(cluster1, cluster2)
6
+ max_point_distance = 0
7
+ cluster1.points.each do |point1|
8
+ cluster2.points.each do |point2|
9
+ distance = euclidean_distance(point1, point2)
10
+ max_point_distance = distance if distance > max_point_distance
11
+ end
12
+ end
13
+ max_point_distance
14
+ end
15
+
16
+ def clusters_to_merge
17
+ @clusters_to_merge ||= []
18
+ end
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,21 @@
1
+ module AgglomerativeClustering
2
+ module Linkage
3
+ class Single < Base
4
+
5
+ def calculate_distance(cluster1, cluster2)
6
+ min_point_distance = 1.0/0
7
+ cluster1.points.each do |point1|
8
+ cluster2.points.each do |point2|
9
+ distance = euclidean_distance(point1, point2)
10
+ min_point_distance = distance if distance < min_point_distance
11
+ end
12
+ end
13
+ min_point_distance
14
+ end
15
+
16
+ def clusters_to_merge
17
+ @clusters_to_merge ||= []
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,3 @@
1
+ Point = Struct.new(:x, :y, :z) do
2
+ attr_accessor :index
3
+ end
@@ -0,0 +1,75 @@
1
+ require 'matrix'
2
+ module AgglomerativeClustering
3
+ class Set
4
+ include EuclideanDistance
5
+ attr_reader :points
6
+
7
+ def initialize(linkage)
8
+ @linkage = linkage
9
+ @points = []
10
+ end
11
+
12
+ def push point
13
+ point.index = points.size
14
+ points << point
15
+ end
16
+
17
+ def clusters
18
+ @clusters ||= points.map{ |point| AgglomerativeClustering::Cluster.new(point) }
19
+ end
20
+
21
+ def distance_matrix
22
+ @distance_matrix ||= build_distance_matrix
23
+ end
24
+
25
+ def print_distance_matrix
26
+ puts distance_matrix.to_a.map(&:inspect)
27
+ end
28
+
29
+ def cluster total_clusters
30
+ clusters_to_merge =[]
31
+ while clusters.size > total_clusters
32
+ clusters_to_merge = @linkage.cluster(clusters)
33
+ merge_clusters(clusters_to_merge)
34
+ end
35
+ clusters
36
+ end
37
+
38
+ def merge_clusters(min_clusters)
39
+ min_clusters[0].merge(min_clusters[1])
40
+ clusters.reject! { |cluster| cluster == min_clusters[1] }
41
+ min_clusters[0]
42
+ end
43
+
44
+ def outliers
45
+ set_outliers.uniq
46
+ end
47
+
48
+ def find_outliers percentage_of_clusters, distance
49
+ distance_matrix.each_with_index do |index, row, column|
50
+ count_hash[row] ||= 0
51
+ count_hash[row] += 1 if distance_matrix[row, column] > distance
52
+ set_outliers << points[row] if count_hash[row]/(distance_matrix.row_count - 1) > percentage_of_clusters/100
53
+ end
54
+ points.reject! { |point| outliers.include?(point) }
55
+ outliers
56
+ end
57
+
58
+ private
59
+
60
+ def set_outliers
61
+ @set_outliers ||= []
62
+ end
63
+
64
+ def count_hash
65
+ @count_hash ||= {}
66
+ end
67
+
68
+ def build_distance_matrix
69
+ Matrix.build(points.size, points.size) do |row, column|
70
+ euclidean_distance(points[row], points[column]).round(2)
71
+ end
72
+ end
73
+
74
+ end
75
+ end
@@ -0,0 +1,3 @@
1
+ module AgglomerativeClustering
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,5 @@
1
+ FactoryGirl.define do
2
+ factory :cluster, class: AgglomerativeClustering::Cluster do
3
+ initialize_with { new(FactoryGirl.build(:point)) }
4
+ end
5
+ end
@@ -0,0 +1,7 @@
1
+ FactoryGirl.define do
2
+ factory :point, class: Point do
3
+ x { Random.rand(1000) }
4
+ y { Random.rand(1000) }
5
+ z { Random.rand(1000) }
6
+ end
7
+ end
@@ -0,0 +1,5 @@
1
+ FactoryGirl.define do
2
+ factory :set, class: AgglomerativeClustering::Set do
3
+ initialize_with { new(AgglomerativeClustering::Linkage::Single.new) }
4
+ end
5
+ end
@@ -0,0 +1,11 @@
1
+ describe AgglomerativeClustering::Cluster do
2
+
3
+ context '#merge' do
4
+ it 'will merge two clusters' do
5
+ cluster1 = FactoryGirl.build(:cluster)
6
+ cluster2 = FactoryGirl.build(:cluster)
7
+ points = cluster1.points + cluster2.points
8
+ expect(cluster1.merge(cluster2).points).to eql(points)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,15 @@
1
+ describe AgglomerativeClustering::EuclideanDistance do
2
+ before do
3
+ class Dummy
4
+ include AgglomerativeClustering::EuclideanDistance
5
+ end
6
+ end
7
+
8
+ context '#distance' do
9
+ it 'will return the distance between two points' do
10
+ p1 = FactoryGirl.build(:point, x: -1, y: 2, z: 3)
11
+ p2 = FactoryGirl.build(:point, x: 4 ,y: 0, z: -3)
12
+ expect(Dummy.new.euclidean_distance(p1,p2).round(3)).to eql(8.062)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,25 @@
1
+ describe AgglomerativeClustering::Linkage::Average do
2
+
3
+ context '#calculate_distance' do
4
+ it 'will calculate distance between clusters based on the average distnace between points' do
5
+ average_linkage = AgglomerativeClustering::Linkage::Average.new
6
+ point1 = FactoryGirl.build(:point, x: 1, y: 1, z: 1)
7
+ point2 = FactoryGirl.build(:point, x: 7, y: 7, z: 7)
8
+ point3 = FactoryGirl.build(:point, x: 2, y: 2, z: 2)
9
+ point4 = FactoryGirl.build(:point, x: 5, y: 5, z: 5)
10
+ cluster1 = AgglomerativeClustering::Cluster.new(point1)
11
+ cluster2 = AgglomerativeClustering::Cluster.new(point2)
12
+ cluster3 = AgglomerativeClustering::Cluster.new(point3)
13
+ cluster4 = AgglomerativeClustering::Cluster.new(point4)
14
+ cluster1 = cluster1.merge(cluster2)
15
+ cluster3 = cluster3.merge(cluster4)
16
+ distance1 = average_linkage.euclidean_distance(point1, point3)
17
+ distance2 = average_linkage.euclidean_distance(point1, point4)
18
+ distance3 = average_linkage.euclidean_distance(point2, point3)
19
+ distance4 = average_linkage.euclidean_distance(point2, point4)
20
+ average_distance = (distance1 + distance2 + distance3 + distance4)/4
21
+ expect(average_linkage.calculate_distance(cluster1, cluster3)).to eql(average_distance)
22
+ end
23
+ end
24
+
25
+ end
@@ -0,0 +1,13 @@
1
+ describe AgglomerativeClustering::Linkage::Base do
2
+
3
+ context '#cluster' do
4
+ it 'will return the clusters where min distance is closest' do
5
+ single_linkage = AgglomerativeClustering::Linkage::Single.new
6
+ set = FactoryGirl.build(:set)
7
+ set.push(FactoryGirl.build(:point))
8
+ set.push(FactoryGirl.build(:point))
9
+ expect(single_linkage.cluster(set.clusters)).to eql([set.clusters[0], set.clusters[1]])
10
+ end
11
+ end
12
+
13
+ end
@@ -0,0 +1,23 @@
1
+ describe AgglomerativeClustering::Linkage::Center do
2
+
3
+ context '#calculate_distance' do
4
+ it 'will calculate distance between clusters based on the center distance between points' do
5
+ center_linkage = AgglomerativeClustering::Linkage::Center.new
6
+ point1 = FactoryGirl.build(:point, x: 1, y: 1, z: 1)
7
+ point2 = FactoryGirl.build(:point, x: 2, y: 2, z: 2)
8
+ point3 = FactoryGirl.build(:point, x: 7, y: 7, z: 7)
9
+ point4 = FactoryGirl.build(:point, x: 5, y: 5, z: 5)
10
+ cluster1 = AgglomerativeClustering::Cluster.new(point1)
11
+ cluster2 = AgglomerativeClustering::Cluster.new(point2)
12
+ cluster3 = AgglomerativeClustering::Cluster.new(point3)
13
+ cluster4 = AgglomerativeClustering::Cluster.new(point4)
14
+ cluster1 = cluster1.merge(cluster2)
15
+ cluster3 = cluster3.merge(cluster4)
16
+ center_point1 = Point.new(1.5, 1.5, 1.5)
17
+ center_point2 = Point.new(6, 6, 6)
18
+ center_distance = center_linkage.euclidean_distance(center_point1, center_point2)
19
+ expect(center_linkage.calculate_distance(cluster1, cluster3)).to eql(center_distance)
20
+ end
21
+ end
22
+
23
+ end
@@ -0,0 +1,19 @@
1
+ describe AgglomerativeClustering::Linkage::Complete do
2
+
3
+ context '#calculate_distance' do
4
+ it 'will calculate distance between clusters based on the max distnace between points' do
5
+ complete_linkage = AgglomerativeClustering::Linkage::Complete.new
6
+ min_point = FactoryGirl.build(:point, x: 1, y: 1, z: 1)
7
+ max_point = FactoryGirl.build(:point, x: 7, y: 7, z: 7)
8
+ cluster1 = AgglomerativeClustering::Cluster.new(min_point)
9
+ cluster2 = AgglomerativeClustering::Cluster.new(FactoryGirl.build(:point, x: 2, y: 2, z: 2))
10
+ cluster3 = AgglomerativeClustering::Cluster.new(FactoryGirl.build(:point, x: 5, y: 5, z: 5))
11
+ cluster4 = AgglomerativeClustering::Cluster.new(max_point)
12
+ cluster1 = cluster1.merge(cluster2)
13
+ cluster3 = cluster3.merge(cluster4)
14
+ max_distance = complete_linkage.euclidean_distance(min_point, max_point)
15
+ expect(complete_linkage.calculate_distance(cluster1, cluster3)).to eql(max_distance)
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,19 @@
1
+ describe AgglomerativeClustering::Linkage::Single do
2
+
3
+ context '#calculate_distance' do
4
+ it 'will calculate distance between clusters based on the min distnace between points' do
5
+ single_linkage = AgglomerativeClustering::Linkage::Single.new
6
+ min_point = FactoryGirl.build(:point, x: 2, y: 2, z: 2)
7
+ max_point = FactoryGirl.build(:point, x: 5, y: 5, z: 5)
8
+ cluster1 = AgglomerativeClustering::Cluster.new(min_point)
9
+ cluster2 = AgglomerativeClustering::Cluster.new(FactoryGirl.build(:point, x: 1, y: 1, z: 1))
10
+ cluster3 = AgglomerativeClustering::Cluster.new(FactoryGirl.build(:point, x: 7, y: 7, z: 7))
11
+ cluster4 = AgglomerativeClustering::Cluster.new(max_point)
12
+ cluster1 = cluster1.merge(cluster2)
13
+ cluster3 = cluster3.merge(cluster4)
14
+ min_distance = single_linkage.euclidean_distance(min_point, max_point)
15
+ expect(single_linkage.calculate_distance(cluster1, cluster3)).to eql(min_distance)
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,75 @@
1
+ describe AgglomerativeClustering::Set do
2
+
3
+ before do
4
+ @set = FactoryGirl.build(:set)
5
+ @point1 = FactoryGirl.build(:point, x:2, y:2, z:3)
6
+ @point2 = FactoryGirl.build(:point, x:1, y:4, z:1)
7
+ @point3 = FactoryGirl.build(:point, x:5, y:2, z:2)
8
+ @point4 = FactoryGirl.build(:point, x:5, y:2, z:3)
9
+ @set.push(@point1)
10
+ @set.push(@point2)
11
+ @set.push(@point3)
12
+ @set.push(@point4)
13
+ end
14
+
15
+ context '#cluster' do
16
+ it 'will return clusters of points based on requested number of clusters' do
17
+ expect(@set.cluster(3).size).to eql(3)
18
+ end
19
+
20
+ it 'will cluster points that are closest to each other' do
21
+ @point5 = FactoryGirl.build(:point, x:5, y:2, z:4)
22
+ @point6 = FactoryGirl.build(:point, x:5, y:3, z:4)
23
+ @point7 = FactoryGirl.build(:point, x:15, y:20, z:21)
24
+ @point8 = FactoryGirl.build(:point, x:18, y:21, z:21)
25
+ @point9 = FactoryGirl.build(:point, x:16, y:22, z:21)
26
+ @set.push(@point5)
27
+ @set.push(@point6)
28
+ @set.push(@point7)
29
+ @set.push(@point8)
30
+ @set.push(@point9)
31
+ clusters = @set.cluster(3)
32
+ clusters[0].points.each do |point|
33
+ expect([@point1, @point2].include?(point)).to be true
34
+ end
35
+ clusters[1].points.each do |point|
36
+ expect([@point3, @point4, @point5, @point6].include?(point)).to be true
37
+ end
38
+ clusters[2].points.each do |point|
39
+ expect([@point7, @point8, @point9].include?(point)).to be true
40
+ end
41
+ end
42
+ end
43
+
44
+ context '#merge_clusters' do
45
+ it 'will merge two clusters into one and update the distance matrix' do
46
+ expect(@set.merge_clusters([@set.clusters[0],@set.clusters[1]]).points).to eql([@point1, @point2])
47
+ end
48
+ end
49
+
50
+ context '#find_outliers' do
51
+ it 'will return a list of outliers' do
52
+ outlier1 = FactoryGirl.build(:point, x:100, y:200, z:300)
53
+ outlier2 = FactoryGirl.build(:point, x:-100, y:-200, z:-300)
54
+ @set.push(outlier1)
55
+ @set.push(outlier2)
56
+
57
+ percentage_of_points = 90
58
+ distance = 10
59
+ expect(@set.find_outliers(percentage_of_points, distance)).to eql([outlier1, outlier2])
60
+ end
61
+ end
62
+
63
+ context '#outliers' do
64
+ it 'will return the set of points without outliers' do
65
+ outlier1 = FactoryGirl.build(:point, x:100, y:200, z:300)
66
+ outlier2 = FactoryGirl.build(:point, x:-100, y:-200, z:-300)
67
+ @set.push(outlier1)
68
+ @set.push(outlier2)
69
+ percentage_of_points = 90
70
+ distance = 10
71
+ @set.find_outliers(percentage_of_points, distance)
72
+ expect(@set.outliers).to eql([outlier1,outlier2])
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,7 @@
1
+ require 'factory_girl'
2
+ require 'agglomerative_clustering'
3
+
4
+ RSpec.configure do |config|
5
+ config.include FactoryGirl::Syntax::Methods
6
+ FactoryGirl.find_definitions
7
+ end
metadata ADDED
@@ -0,0 +1,133 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: agglomerative_clustering
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Bryan Mulvihill
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-10-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description:
56
+ email:
57
+ - bmulvihill@pinsonault.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rspec"
64
+ - Gemfile
65
+ - Gemfile.lock
66
+ - LICENSE
67
+ - LICENSE.txt
68
+ - README.md
69
+ - Rakefile
70
+ - agglomerative_clustering.gemspec
71
+ - cluster.rb
72
+ - lib/agglomerative_clustering.rb
73
+ - lib/agglomerative_clustering/cluster.rb
74
+ - lib/agglomerative_clustering/euclidean_distance.rb
75
+ - lib/agglomerative_clustering/linkage/average.rb
76
+ - lib/agglomerative_clustering/linkage/base.rb
77
+ - lib/agglomerative_clustering/linkage/center.rb
78
+ - lib/agglomerative_clustering/linkage/complete.rb
79
+ - lib/agglomerative_clustering/linkage/single.rb
80
+ - lib/agglomerative_clustering/point.rb
81
+ - lib/agglomerative_clustering/set.rb
82
+ - lib/agglomerative_clustering/version.rb
83
+ - outliers.csv
84
+ - points.csv
85
+ - spec/factories/lib/agglomerative_clustering/cluster.rb
86
+ - spec/factories/lib/agglomerative_clustering/point.rb
87
+ - spec/factories/lib/agglomerative_clustering/set.rb
88
+ - spec/lib/agglomerative_clustering/cluster_spec.rb
89
+ - spec/lib/agglomerative_clustering/euclidean_distance_spec.rb
90
+ - spec/lib/agglomerative_clustering/linkage/average_spec.rb
91
+ - spec/lib/agglomerative_clustering/linkage/base_spec.rb
92
+ - spec/lib/agglomerative_clustering/linkage/center_spec.rb
93
+ - spec/lib/agglomerative_clustering/linkage/complete_spec.rb
94
+ - spec/lib/agglomerative_clustering/linkage/single_spec.rb
95
+ - spec/lib/agglomerative_clustering/set_spec.rb
96
+ - spec/spec_helper.rb
97
+ homepage: ''
98
+ licenses:
99
+ - MIT
100
+ metadata: {}
101
+ post_install_message:
102
+ rdoc_options: []
103
+ require_paths:
104
+ - lib
105
+ required_ruby_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ required_rubygems_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ requirements: []
116
+ rubyforge_project:
117
+ rubygems_version: 2.2.2
118
+ signing_key:
119
+ specification_version: 4
120
+ summary: Ruby Agglomerative Clustering Algorithm
121
+ test_files:
122
+ - spec/factories/lib/agglomerative_clustering/cluster.rb
123
+ - spec/factories/lib/agglomerative_clustering/point.rb
124
+ - spec/factories/lib/agglomerative_clustering/set.rb
125
+ - spec/lib/agglomerative_clustering/cluster_spec.rb
126
+ - spec/lib/agglomerative_clustering/euclidean_distance_spec.rb
127
+ - spec/lib/agglomerative_clustering/linkage/average_spec.rb
128
+ - spec/lib/agglomerative_clustering/linkage/base_spec.rb
129
+ - spec/lib/agglomerative_clustering/linkage/center_spec.rb
130
+ - spec/lib/agglomerative_clustering/linkage/complete_spec.rb
131
+ - spec/lib/agglomerative_clustering/linkage/single_spec.rb
132
+ - spec/lib/agglomerative_clustering/set_spec.rb
133
+ - spec/spec_helper.rb