agglomerative_clustering 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +36 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +46 -0
  6. data/LICENSE +22 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +33 -0
  9. data/Rakefile +2 -0
  10. data/agglomerative_clustering.gemspec +23 -0
  11. data/cluster.rb +58 -0
  12. data/lib/agglomerative_clustering.rb +10 -0
  13. data/lib/agglomerative_clustering/cluster.rb +19 -0
  14. data/lib/agglomerative_clustering/euclidean_distance.rb +8 -0
  15. data/lib/agglomerative_clustering/linkage/average.rb +20 -0
  16. data/lib/agglomerative_clustering/linkage/base.rb +22 -0
  17. data/lib/agglomerative_clustering/linkage/center.rb +20 -0
  18. data/lib/agglomerative_clustering/linkage/complete.rb +22 -0
  19. data/lib/agglomerative_clustering/linkage/single.rb +21 -0
  20. data/lib/agglomerative_clustering/point.rb +3 -0
  21. data/lib/agglomerative_clustering/set.rb +75 -0
  22. data/lib/agglomerative_clustering/version.rb +3 -0
  23. data/spec/factories/lib/agglomerative_clustering/cluster.rb +5 -0
  24. data/spec/factories/lib/agglomerative_clustering/point.rb +7 -0
  25. data/spec/factories/lib/agglomerative_clustering/set.rb +5 -0
  26. data/spec/lib/agglomerative_clustering/cluster_spec.rb +11 -0
  27. data/spec/lib/agglomerative_clustering/euclidean_distance_spec.rb +15 -0
  28. data/spec/lib/agglomerative_clustering/linkage/average_spec.rb +25 -0
  29. data/spec/lib/agglomerative_clustering/linkage/base_spec.rb +13 -0
  30. data/spec/lib/agglomerative_clustering/linkage/center_spec.rb +23 -0
  31. data/spec/lib/agglomerative_clustering/linkage/complete_spec.rb +19 -0
  32. data/spec/lib/agglomerative_clustering/linkage/single_spec.rb +19 -0
  33. data/spec/lib/agglomerative_clustering/set_spec.rb +75 -0
  34. data/spec/spec_helper.rb +7 -0
  35. metadata +133 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d52ff4f84aef0fbd56aa453833ea2e5b9c9772e2
4
+ data.tar.gz: 4abb19bb148f77989f43be805769e0676a7182f7
5
+ SHA512:
6
+ metadata.gz: b31e61ad6c08ecdce2326bdab94c412aaee32227775f662ad62361896d69124acbad3ed4c6f8a40f2298de30ac77d65c0fcabd0bfe58c16b6d11bd89e75a7dd5
7
+ data.tar.gz: 8a2a08bda0f2975166536d1ec85cd0f2254044689d8b6f422b6f2170bd64301ae914c05f1264d14a81d7b4b2fb4deb9c4da0fa82abb9b94f57a462ba1689c11d
@@ -0,0 +1,36 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ # Gemfile.lock
30
+ # .ruby-version
31
+ # .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
35
+ outliers.csv
36
+ points.csv
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --format documentation
3
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "https://rubygems.org"
2
+ gemspec
3
+ gem 'factory_girl'
@@ -0,0 +1,46 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ agglomerative_clustering (0.0.1)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ activesupport (4.1.6)
10
+ i18n (~> 0.6, >= 0.6.9)
11
+ json (~> 1.7, >= 1.7.7)
12
+ minitest (~> 5.1)
13
+ thread_safe (~> 0.1)
14
+ tzinfo (~> 1.1)
15
+ diff-lcs (1.2.5)
16
+ factory_girl (4.4.0)
17
+ activesupport (>= 3.0.0)
18
+ i18n (0.6.11)
19
+ json (1.8.1)
20
+ minitest (5.4.2)
21
+ rake (10.3.2)
22
+ rspec (3.1.0)
23
+ rspec-core (~> 3.1.0)
24
+ rspec-expectations (~> 3.1.0)
25
+ rspec-mocks (~> 3.1.0)
26
+ rspec-core (3.1.6)
27
+ rspec-support (~> 3.1.0)
28
+ rspec-expectations (3.1.2)
29
+ diff-lcs (>= 1.2.0, < 2.0)
30
+ rspec-support (~> 3.1.0)
31
+ rspec-mocks (3.1.3)
32
+ rspec-support (~> 3.1.0)
33
+ rspec-support (3.1.2)
34
+ thread_safe (0.3.4)
35
+ tzinfo (1.2.2)
36
+ thread_safe (~> 0.1)
37
+
38
+ PLATFORMS
39
+ ruby
40
+
41
+ DEPENDENCIES
42
+ agglomerative_clustering!
43
+ bundler (~> 1.7)
44
+ factory_girl
45
+ rake (~> 10.0)
46
+ rspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 Bryan Mulvihill
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Bryan Mulvihill
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,33 @@
1
+ # AgglomerativeClustering
2
+
3
+ Hierarchical Agglomerative Clustering Algorithm
4
+
5
+ Input Set of 3 dimensional points, group into nearest k clusters
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'agglomerative_clustering'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install agglomerative_clustering
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Contributing
28
+
29
+ 1. Fork it ( https://github.com/[my-github-username]/agglomerative_clustering/fork )
30
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
31
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
32
+ 4. Push to the branch (`git push origin my-new-feature`)
33
+ 5. Create a new Pull Request
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'agglomerative_clustering/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "agglomerative_clustering"
8
+ spec.version = AgglomerativeClustering::VERSION
9
+ spec.authors = ["Bryan Mulvihill"]
10
+ spec.email = ["bmulvihill@pinsonault.com"]
11
+ spec.summary = %q{Ruby Agglomerative Clustering Algorithm}
12
+ spec.homepage = ""
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "bundler", "~> 1.7"
21
+ spec.add_development_dependency "rake", "~> 10.0"
22
+ spec.add_development_dependency 'rspec'
23
+ end
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env ruby
2
+ require 'agglomerative_clustering'
3
+
4
+ set = AgglomerativeClustering::Set.new(AgglomerativeClustering::Linkage::Single.new)
5
+ for i in 0..99
6
+ x = Random.rand(0..100)
7
+ y = Random.rand(0..100)
8
+ z = Random.rand(0..100)
9
+ p = Point.new(x,y,z)
10
+ set.push(p)
11
+ end
12
+ for i in 100..199
13
+ x = Random.rand(200..299)
14
+ y = Random.rand(200..299)
15
+ z = Random.rand(200..299)
16
+ p = Point.new(x,y,z)
17
+ set.push(p)
18
+ end
19
+ for i in 200..299
20
+ x = Random.rand(400..499)
21
+ y = Random.rand(400..499)
22
+ z = Random.rand(400..499)
23
+ p = Point.new(x,y,z)
24
+ set.push(p)
25
+ end
26
+
27
+ percentage = 80
28
+ distance = 150
29
+
30
+ open('points.csv', 'w') do |f|
31
+ set.points.each do |point|
32
+ f << "#{point.x},#{point.y},#{point.z}\n"
33
+ end
34
+ end
35
+
36
+ open('outliers.csv', 'w') do |f|
37
+ set.find_outliers(percentage, distance).each do |point|
38
+ f << "#{point.x},#{point.y},#{point.z}\n"
39
+ end
40
+ end
41
+
42
+ if set.outliers.any?
43
+ puts 'Outliers Removed from Set:'
44
+ set.outliers.each do |outlier|
45
+ puts outlier
46
+ end
47
+ else
48
+ puts "There are no outliers where #{percentage}% of the points lie at a distance greater than #{distance}"
49
+ end
50
+
51
+ clusters = set.cluster(3)
52
+ clusters.each_with_index do |cluster, index|
53
+ open("cluster#{index}.csv", 'w') do |f|
54
+ cluster.points.each do |point|
55
+ f << "#{point.x},#{point.y},#{point.z}\n"
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,10 @@
1
+ require "agglomerative_clustering/version"
2
+ require "agglomerative_clustering/euclidean_distance"
3
+ require "agglomerative_clustering/linkage/base"
4
+ require "agglomerative_clustering/linkage/single"
5
+ require "agglomerative_clustering/linkage/complete"
6
+ require "agglomerative_clustering/linkage/average"
7
+ require "agglomerative_clustering/linkage/center"
8
+ require "agglomerative_clustering/point"
9
+ require "agglomerative_clustering/cluster"
10
+ require "agglomerative_clustering/set"
@@ -0,0 +1,19 @@
1
+ module AgglomerativeClustering
2
+ class Cluster
3
+ attr_reader :points
4
+
5
+ def initialize(point)
6
+ points << point
7
+ end
8
+
9
+ def points
10
+ @points ||= []
11
+ end
12
+
13
+ def merge(cluster)
14
+ cluster.points.each { |point| points << point }
15
+ self
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,8 @@
1
+ module AgglomerativeClustering
2
+ module EuclideanDistance
3
+ def euclidean_distance point1, point2
4
+ # Thanks to https://blog.philipcunningham.org/posts/ruby-euclidean-distance
5
+ Math.sqrt(point1.zip(point2).map{|a,b| a-b}.map{|d| d*d}.reduce(:+))
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,20 @@
1
+ module AgglomerativeClustering
2
+ module Linkage
3
+ class Average < Base
4
+
5
+ def calculate_distance(cluster1, cluster2)
6
+ distances = []
7
+ cluster1.points.each do |point1|
8
+ cluster2.points.each do |point2|
9
+ distances << euclidean_distance(point1, point2)
10
+ end
11
+ end
12
+ distances.inject(:+)/distances.size
13
+ end
14
+
15
+ def clusters_to_merge
16
+ @clusters_to_merge ||= []
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,22 @@
1
+ module AgglomerativeClustering
2
+ module Linkage
3
+ class Base
4
+ include EuclideanDistance
5
+
6
+ def cluster(clusters)
7
+ min_cluster_dist = 1.0/0
8
+ clusters.each_with_index do |cluster1, index|
9
+ clusters[index + 1..clusters.size].each do |cluster2|
10
+ distance = calculate_distance(cluster1, cluster2)
11
+ if distance < min_cluster_dist
12
+ min_cluster_dist = distance
13
+ @clusters_to_merge = [cluster1, cluster2]
14
+ end
15
+ end
16
+ end
17
+ clusters_to_merge
18
+ end
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,20 @@
1
+ module AgglomerativeClustering
2
+ module Linkage
3
+ class Center < Base
4
+
5
+ def calculate_distance(cluster1, cluster2)
6
+ point1, point2 = center_point(cluster1), center_point(cluster2)
7
+ euclidean_distance(point1, point2)
8
+ end
9
+
10
+ def center_point cluster
11
+ cluster.points.first.zip(*cluster.points[1..cluster.points.size-1]).map { |a,b| (a + b)/cluster.points.size.to_f }
12
+ end
13
+
14
+ def clusters_to_merge
15
+ @clusters_to_merge ||= []
16
+ end
17
+
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,22 @@
1
+ module AgglomerativeClustering
2
+ module Linkage
3
+ class Complete < Base
4
+
5
+ def calculate_distance(cluster1, cluster2)
6
+ max_point_distance = 0
7
+ cluster1.points.each do |point1|
8
+ cluster2.points.each do |point2|
9
+ distance = euclidean_distance(point1, point2)
10
+ max_point_distance = distance if distance > max_point_distance
11
+ end
12
+ end
13
+ max_point_distance
14
+ end
15
+
16
+ def clusters_to_merge
17
+ @clusters_to_merge ||= []
18
+ end
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,21 @@
1
+ module AgglomerativeClustering
2
+ module Linkage
3
+ class Single < Base
4
+
5
+ def calculate_distance(cluster1, cluster2)
6
+ min_point_distance = 1.0/0
7
+ cluster1.points.each do |point1|
8
+ cluster2.points.each do |point2|
9
+ distance = euclidean_distance(point1, point2)
10
+ min_point_distance = distance if distance < min_point_distance
11
+ end
12
+ end
13
+ min_point_distance
14
+ end
15
+
16
+ def clusters_to_merge
17
+ @clusters_to_merge ||= []
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,3 @@
1
+ Point = Struct.new(:x, :y, :z) do
2
+ attr_accessor :index
3
+ end
@@ -0,0 +1,75 @@
1
+ require 'matrix'
2
+ module AgglomerativeClustering
3
+ class Set
4
+ include EuclideanDistance
5
+ attr_reader :points
6
+
7
+ def initialize(linkage)
8
+ @linkage = linkage
9
+ @points = []
10
+ end
11
+
12
+ def push point
13
+ point.index = points.size
14
+ points << point
15
+ end
16
+
17
+ def clusters
18
+ @clusters ||= points.map{ |point| AgglomerativeClustering::Cluster.new(point) }
19
+ end
20
+
21
+ def distance_matrix
22
+ @distance_matrix ||= build_distance_matrix
23
+ end
24
+
25
+ def print_distance_matrix
26
+ puts distance_matrix.to_a.map(&:inspect)
27
+ end
28
+
29
+ def cluster total_clusters
30
+ clusters_to_merge =[]
31
+ while clusters.size > total_clusters
32
+ clusters_to_merge = @linkage.cluster(clusters)
33
+ merge_clusters(clusters_to_merge)
34
+ end
35
+ clusters
36
+ end
37
+
38
+ def merge_clusters(min_clusters)
39
+ min_clusters[0].merge(min_clusters[1])
40
+ clusters.reject! { |cluster| cluster == min_clusters[1] }
41
+ min_clusters[0]
42
+ end
43
+
44
+ def outliers
45
+ set_outliers.uniq
46
+ end
47
+
48
+ def find_outliers percentage_of_clusters, distance
49
+ distance_matrix.each_with_index do |index, row, column|
50
+ count_hash[row] ||= 0
51
+ count_hash[row] += 1 if distance_matrix[row, column] > distance
52
+ set_outliers << points[row] if count_hash[row]/(distance_matrix.row_count - 1) > percentage_of_clusters/100
53
+ end
54
+ points.reject! { |point| outliers.include?(point) }
55
+ outliers
56
+ end
57
+
58
+ private
59
+
60
+ def set_outliers
61
+ @set_outliers ||= []
62
+ end
63
+
64
+ def count_hash
65
+ @count_hash ||= {}
66
+ end
67
+
68
+ def build_distance_matrix
69
+ Matrix.build(points.size, points.size) do |row, column|
70
+ euclidean_distance(points[row], points[column]).round(2)
71
+ end
72
+ end
73
+
74
+ end
75
+ end
@@ -0,0 +1,3 @@
1
+ module AgglomerativeClustering
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,5 @@
1
+ FactoryGirl.define do
2
+ factory :cluster, class: AgglomerativeClustering::Cluster do
3
+ initialize_with { new(FactoryGirl.build(:point)) }
4
+ end
5
+ end
@@ -0,0 +1,7 @@
1
+ FactoryGirl.define do
2
+ factory :point, class: Point do
3
+ x { Random.rand(1000) }
4
+ y { Random.rand(1000) }
5
+ z { Random.rand(1000) }
6
+ end
7
+ end
@@ -0,0 +1,5 @@
1
+ FactoryGirl.define do
2
+ factory :set, class: AgglomerativeClustering::Set do
3
+ initialize_with { new(AgglomerativeClustering::Linkage::Single.new) }
4
+ end
5
+ end
@@ -0,0 +1,11 @@
1
+ describe AgglomerativeClustering::Cluster do
2
+
3
+ context '#merge' do
4
+ it 'will merge two clusters' do
5
+ cluster1 = FactoryGirl.build(:cluster)
6
+ cluster2 = FactoryGirl.build(:cluster)
7
+ points = cluster1.points + cluster2.points
8
+ expect(cluster1.merge(cluster2).points).to eql(points)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,15 @@
1
+ describe AgglomerativeClustering::EuclideanDistance do
2
+ before do
3
+ class Dummy
4
+ include AgglomerativeClustering::EuclideanDistance
5
+ end
6
+ end
7
+
8
+ context '#distance' do
9
+ it 'will return the distance between two points' do
10
+ p1 = FactoryGirl.build(:point, x: -1, y: 2, z: 3)
11
+ p2 = FactoryGirl.build(:point, x: 4 ,y: 0, z: -3)
12
+ expect(Dummy.new.euclidean_distance(p1,p2).round(3)).to eql(8.062)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,25 @@
1
+ describe AgglomerativeClustering::Linkage::Average do
2
+
3
+ context '#calculate_distance' do
4
+ it 'will calculate distance between clusters based on the average distnace between points' do
5
+ average_linkage = AgglomerativeClustering::Linkage::Average.new
6
+ point1 = FactoryGirl.build(:point, x: 1, y: 1, z: 1)
7
+ point2 = FactoryGirl.build(:point, x: 7, y: 7, z: 7)
8
+ point3 = FactoryGirl.build(:point, x: 2, y: 2, z: 2)
9
+ point4 = FactoryGirl.build(:point, x: 5, y: 5, z: 5)
10
+ cluster1 = AgglomerativeClustering::Cluster.new(point1)
11
+ cluster2 = AgglomerativeClustering::Cluster.new(point2)
12
+ cluster3 = AgglomerativeClustering::Cluster.new(point3)
13
+ cluster4 = AgglomerativeClustering::Cluster.new(point4)
14
+ cluster1 = cluster1.merge(cluster2)
15
+ cluster3 = cluster3.merge(cluster4)
16
+ distance1 = average_linkage.euclidean_distance(point1, point3)
17
+ distance2 = average_linkage.euclidean_distance(point1, point4)
18
+ distance3 = average_linkage.euclidean_distance(point2, point3)
19
+ distance4 = average_linkage.euclidean_distance(point2, point4)
20
+ average_distance = (distance1 + distance2 + distance3 + distance4)/4
21
+ expect(average_linkage.calculate_distance(cluster1, cluster3)).to eql(average_distance)
22
+ end
23
+ end
24
+
25
+ end
@@ -0,0 +1,13 @@
1
+ describe AgglomerativeClustering::Linkage::Base do
2
+
3
+ context '#cluster' do
4
+ it 'will return the clusters where min distance is closest' do
5
+ single_linkage = AgglomerativeClustering::Linkage::Single.new
6
+ set = FactoryGirl.build(:set)
7
+ set.push(FactoryGirl.build(:point))
8
+ set.push(FactoryGirl.build(:point))
9
+ expect(single_linkage.cluster(set.clusters)).to eql([set.clusters[0], set.clusters[1]])
10
+ end
11
+ end
12
+
13
+ end
@@ -0,0 +1,23 @@
1
+ describe AgglomerativeClustering::Linkage::Center do
2
+
3
+ context '#calculate_distance' do
4
+ it 'will calculate distance between clusters based on the center distance between points' do
5
+ center_linkage = AgglomerativeClustering::Linkage::Center.new
6
+ point1 = FactoryGirl.build(:point, x: 1, y: 1, z: 1)
7
+ point2 = FactoryGirl.build(:point, x: 2, y: 2, z: 2)
8
+ point3 = FactoryGirl.build(:point, x: 7, y: 7, z: 7)
9
+ point4 = FactoryGirl.build(:point, x: 5, y: 5, z: 5)
10
+ cluster1 = AgglomerativeClustering::Cluster.new(point1)
11
+ cluster2 = AgglomerativeClustering::Cluster.new(point2)
12
+ cluster3 = AgglomerativeClustering::Cluster.new(point3)
13
+ cluster4 = AgglomerativeClustering::Cluster.new(point4)
14
+ cluster1 = cluster1.merge(cluster2)
15
+ cluster3 = cluster3.merge(cluster4)
16
+ center_point1 = Point.new(1.5, 1.5, 1.5)
17
+ center_point2 = Point.new(6, 6, 6)
18
+ center_distance = center_linkage.euclidean_distance(center_point1, center_point2)
19
+ expect(center_linkage.calculate_distance(cluster1, cluster3)).to eql(center_distance)
20
+ end
21
+ end
22
+
23
+ end
@@ -0,0 +1,19 @@
1
+ describe AgglomerativeClustering::Linkage::Complete do
2
+
3
+ context '#calculate_distance' do
4
+ it 'will calculate distance between clusters based on the max distnace between points' do
5
+ complete_linkage = AgglomerativeClustering::Linkage::Complete.new
6
+ min_point = FactoryGirl.build(:point, x: 1, y: 1, z: 1)
7
+ max_point = FactoryGirl.build(:point, x: 7, y: 7, z: 7)
8
+ cluster1 = AgglomerativeClustering::Cluster.new(min_point)
9
+ cluster2 = AgglomerativeClustering::Cluster.new(FactoryGirl.build(:point, x: 2, y: 2, z: 2))
10
+ cluster3 = AgglomerativeClustering::Cluster.new(FactoryGirl.build(:point, x: 5, y: 5, z: 5))
11
+ cluster4 = AgglomerativeClustering::Cluster.new(max_point)
12
+ cluster1 = cluster1.merge(cluster2)
13
+ cluster3 = cluster3.merge(cluster4)
14
+ max_distance = complete_linkage.euclidean_distance(min_point, max_point)
15
+ expect(complete_linkage.calculate_distance(cluster1, cluster3)).to eql(max_distance)
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,19 @@
1
+ describe AgglomerativeClustering::Linkage::Single do
2
+
3
+ context '#calculate_distance' do
4
+ it 'will calculate distance between clusters based on the min distnace between points' do
5
+ single_linkage = AgglomerativeClustering::Linkage::Single.new
6
+ min_point = FactoryGirl.build(:point, x: 2, y: 2, z: 2)
7
+ max_point = FactoryGirl.build(:point, x: 5, y: 5, z: 5)
8
+ cluster1 = AgglomerativeClustering::Cluster.new(min_point)
9
+ cluster2 = AgglomerativeClustering::Cluster.new(FactoryGirl.build(:point, x: 1, y: 1, z: 1))
10
+ cluster3 = AgglomerativeClustering::Cluster.new(FactoryGirl.build(:point, x: 7, y: 7, z: 7))
11
+ cluster4 = AgglomerativeClustering::Cluster.new(max_point)
12
+ cluster1 = cluster1.merge(cluster2)
13
+ cluster3 = cluster3.merge(cluster4)
14
+ min_distance = single_linkage.euclidean_distance(min_point, max_point)
15
+ expect(single_linkage.calculate_distance(cluster1, cluster3)).to eql(min_distance)
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,75 @@
1
+ describe AgglomerativeClustering::Set do
2
+
3
+ before do
4
+ @set = FactoryGirl.build(:set)
5
+ @point1 = FactoryGirl.build(:point, x:2, y:2, z:3)
6
+ @point2 = FactoryGirl.build(:point, x:1, y:4, z:1)
7
+ @point3 = FactoryGirl.build(:point, x:5, y:2, z:2)
8
+ @point4 = FactoryGirl.build(:point, x:5, y:2, z:3)
9
+ @set.push(@point1)
10
+ @set.push(@point2)
11
+ @set.push(@point3)
12
+ @set.push(@point4)
13
+ end
14
+
15
+ context '#cluster' do
16
+ it 'will return clusters of points based on requested number of clusters' do
17
+ expect(@set.cluster(3).size).to eql(3)
18
+ end
19
+
20
+ it 'will cluster points that are closest to each other' do
21
+ @point5 = FactoryGirl.build(:point, x:5, y:2, z:4)
22
+ @point6 = FactoryGirl.build(:point, x:5, y:3, z:4)
23
+ @point7 = FactoryGirl.build(:point, x:15, y:20, z:21)
24
+ @point8 = FactoryGirl.build(:point, x:18, y:21, z:21)
25
+ @point9 = FactoryGirl.build(:point, x:16, y:22, z:21)
26
+ @set.push(@point5)
27
+ @set.push(@point6)
28
+ @set.push(@point7)
29
+ @set.push(@point8)
30
+ @set.push(@point9)
31
+ clusters = @set.cluster(3)
32
+ clusters[0].points.each do |point|
33
+ expect([@point1, @point2].include?(point)).to be true
34
+ end
35
+ clusters[1].points.each do |point|
36
+ expect([@point3, @point4, @point5, @point6].include?(point)).to be true
37
+ end
38
+ clusters[2].points.each do |point|
39
+ expect([@point7, @point8, @point9].include?(point)).to be true
40
+ end
41
+ end
42
+ end
43
+
44
+ context '#merge_clusters' do
45
+ it 'will merge two clusters into one and update the distance matrix' do
46
+ expect(@set.merge_clusters([@set.clusters[0],@set.clusters[1]]).points).to eql([@point1, @point2])
47
+ end
48
+ end
49
+
50
+ context '#find_outliers' do
51
+ it 'will return a list of outliers' do
52
+ outlier1 = FactoryGirl.build(:point, x:100, y:200, z:300)
53
+ outlier2 = FactoryGirl.build(:point, x:-100, y:-200, z:-300)
54
+ @set.push(outlier1)
55
+ @set.push(outlier2)
56
+
57
+ percentage_of_points = 90
58
+ distance = 10
59
+ expect(@set.find_outliers(percentage_of_points, distance)).to eql([outlier1, outlier2])
60
+ end
61
+ end
62
+
63
+ context '#outliers' do
64
+ it 'will return the set of points without outliers' do
65
+ outlier1 = FactoryGirl.build(:point, x:100, y:200, z:300)
66
+ outlier2 = FactoryGirl.build(:point, x:-100, y:-200, z:-300)
67
+ @set.push(outlier1)
68
+ @set.push(outlier2)
69
+ percentage_of_points = 90
70
+ distance = 10
71
+ @set.find_outliers(percentage_of_points, distance)
72
+ expect(@set.outliers).to eql([outlier1,outlier2])
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,7 @@
1
+ require 'factory_girl'
2
+ require 'agglomerative_clustering'
3
+
4
+ RSpec.configure do |config|
5
+ config.include FactoryGirl::Syntax::Methods
6
+ FactoryGirl.find_definitions
7
+ end
metadata ADDED
@@ -0,0 +1,133 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: agglomerative_clustering
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Bryan Mulvihill
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-10-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description:
56
+ email:
57
+ - bmulvihill@pinsonault.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rspec"
64
+ - Gemfile
65
+ - Gemfile.lock
66
+ - LICENSE
67
+ - LICENSE.txt
68
+ - README.md
69
+ - Rakefile
70
+ - agglomerative_clustering.gemspec
71
+ - cluster.rb
72
+ - lib/agglomerative_clustering.rb
73
+ - lib/agglomerative_clustering/cluster.rb
74
+ - lib/agglomerative_clustering/euclidean_distance.rb
75
+ - lib/agglomerative_clustering/linkage/average.rb
76
+ - lib/agglomerative_clustering/linkage/base.rb
77
+ - lib/agglomerative_clustering/linkage/center.rb
78
+ - lib/agglomerative_clustering/linkage/complete.rb
79
+ - lib/agglomerative_clustering/linkage/single.rb
80
+ - lib/agglomerative_clustering/point.rb
81
+ - lib/agglomerative_clustering/set.rb
82
+ - lib/agglomerative_clustering/version.rb
83
+ - outliers.csv
84
+ - points.csv
85
+ - spec/factories/lib/agglomerative_clustering/cluster.rb
86
+ - spec/factories/lib/agglomerative_clustering/point.rb
87
+ - spec/factories/lib/agglomerative_clustering/set.rb
88
+ - spec/lib/agglomerative_clustering/cluster_spec.rb
89
+ - spec/lib/agglomerative_clustering/euclidean_distance_spec.rb
90
+ - spec/lib/agglomerative_clustering/linkage/average_spec.rb
91
+ - spec/lib/agglomerative_clustering/linkage/base_spec.rb
92
+ - spec/lib/agglomerative_clustering/linkage/center_spec.rb
93
+ - spec/lib/agglomerative_clustering/linkage/complete_spec.rb
94
+ - spec/lib/agglomerative_clustering/linkage/single_spec.rb
95
+ - spec/lib/agglomerative_clustering/set_spec.rb
96
+ - spec/spec_helper.rb
97
+ homepage: ''
98
+ licenses:
99
+ - MIT
100
+ metadata: {}
101
+ post_install_message:
102
+ rdoc_options: []
103
+ require_paths:
104
+ - lib
105
+ required_ruby_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ required_rubygems_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ requirements: []
116
+ rubyforge_project:
117
+ rubygems_version: 2.2.2
118
+ signing_key:
119
+ specification_version: 4
120
+ summary: Ruby Agglomerative Clustering Algorithm
121
+ test_files:
122
+ - spec/factories/lib/agglomerative_clustering/cluster.rb
123
+ - spec/factories/lib/agglomerative_clustering/point.rb
124
+ - spec/factories/lib/agglomerative_clustering/set.rb
125
+ - spec/lib/agglomerative_clustering/cluster_spec.rb
126
+ - spec/lib/agglomerative_clustering/euclidean_distance_spec.rb
127
+ - spec/lib/agglomerative_clustering/linkage/average_spec.rb
128
+ - spec/lib/agglomerative_clustering/linkage/base_spec.rb
129
+ - spec/lib/agglomerative_clustering/linkage/center_spec.rb
130
+ - spec/lib/agglomerative_clustering/linkage/complete_spec.rb
131
+ - spec/lib/agglomerative_clustering/linkage/single_spec.rb
132
+ - spec/lib/agglomerative_clustering/set_spec.rb
133
+ - spec/spec_helper.rb