agglomerative_clustering 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 03755a284a11c8225365f9b22df38da433e07994
4
- data.tar.gz: 6996369a3b16c5541cd694a8315c2c40606b91e3
3
+ metadata.gz: 3c8adc349d03e25a7a982a0479069f510eb6a97a
4
+ data.tar.gz: 91cfe58dd5b19999699d1cf331bcd12b0b2451db
5
5
  SHA512:
6
- metadata.gz: 1061fb7aae2bc9c6cee7ba054593331a6e9db4c5388d895f821aad46b6d05d86721f61c947ce5377095ff7dc3adf8f93494ce9a3697f44357521d5ecc97e8523
7
- data.tar.gz: f481ee1c97df283e3a354ed447319fbf9041d5507ac677269288e9c7802eb743510e016a6a0fca9a5232eb0a98420a1c393e1a08c9a52a0dfac91fcc21d91fd7
6
+ metadata.gz: 7ecefeb4ecb8f02f10da56b95cb883c6752631a28fc0212fb8e8f72f3b57d361c7597179ba9bf97c4a804f0540f7a81393a0c0263048994c3b14bca63bc6e364
7
+ data.tar.gz: 8b979ffcc0778884639e02ae3651792582802efd31e58ccd8d2964d1d56b30b94dd00953389a3df55654287412d4bd8ef64321a65ab3af3386570106fdce6969
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/env ruby
2
+ require 'agglomerative_clustering'
3
+
4
+ puts "Enter a total number of points"
5
+ points = gets.chomp.to_i
6
+ puts "Please enter a P value (Percentage of points to determine outliers)"
7
+ percentage = gets.chomp.to_f
8
+ puts "Please enter a d value (Distance of points to determine outliers)"
9
+ distance = gets.chomp.to_f
10
+ puts "Please enter a number of clusters"
11
+ k = gets.chomp.to_i
12
+ puts "Enter y to enter a center point for each pseudo cluster (Or press enter for random)"
13
+ center_points = []
14
+ cp_input = gets.chomp
15
+ if cp_input == 'y'
16
+ for l in 0..k-1
17
+ center_points << gets.chomp.split(',').map(&:to_i)
18
+ end
19
+ else
20
+ for i in 0..k-1
21
+ center_points << [Random.rand(-100..100), Random.rand(-100..100), Random.rand(-100..100)]
22
+ end
23
+ end
24
+
25
+ puts "Processing.."
26
+ min_set = AgglomerativeClustering::Set.new(AgglomerativeClustering::Linkage::Single.new)
27
+ max_set = AgglomerativeClustering::Set.new(AgglomerativeClustering::Linkage::Complete.new)
28
+ average_set = AgglomerativeClustering::Set.new(AgglomerativeClustering::Linkage::Average.new)
29
+ center_set = AgglomerativeClustering::Set.new(AgglomerativeClustering::Linkage::Center.new)
30
+ sets = [min_set, max_set, average_set, center_set]
31
+
32
+ # Create set of 500 points based on provided parameters
33
+ radius = 25
34
+ for clusters in 0..k-1
35
+ cp = center_points[clusters]
36
+ for i in 0..(points/k-1)
37
+ x = cp[0] + (Random.rand(-radius..radius))
38
+ y = cp[1] + (Random.rand(-radius..radius))
39
+ z = cp[2] + (Random.rand(-radius..radius))
40
+ p = Point.new(x,y,z)
41
+ sets.each {|set| set.push(p)}
42
+ end
43
+ end
44
+
45
+ # Find and Remove Outliers in the Set
46
+ sets.each do |set|
47
+ set.find_outliers(percentage, distance)
48
+ end
49
+
50
+ # Output outliers to the console
51
+ if min_set.outliers.any?
52
+ puts 'Outliers Removed from Set:'
53
+ min_set.outliers.each do |outlier|
54
+ puts outlier
55
+ end
56
+ else
57
+ puts "There are no outliers where #{percentage}% of the points lie at a distance greater than #{distance}"
58
+ end
59
+
60
+ # Run the clustering algorithms
61
+ min_clusters = min_set.cluster(k)
62
+ max_clusters = max_set.cluster(k)
63
+ average_clusters = average_set.cluster(k)
64
+ center_clusters = center_set.cluster(k)
65
+
66
+ # Outputs points on each cluster
67
+ puts "=====Minimum Linkage Clusters====="
68
+ min_clusters.each_with_index{|c, i| puts "Cluster #{i + 1}:\n #{c.points}" }
69
+ puts "=====Maximum Linkage Clusters====="
70
+ max_clusters.each_with_index{|c, i| puts "Cluster #{i + 1}:\n #{c.points}" }
71
+ puts "=====Average Linkage Clusters====="
72
+ average_clusters.each_with_index{|c, i| puts "Cluster #{i + 1}:\n #{c.points}" }
73
+ puts "=====Center Linkage Clusters======"
74
+ center_clusters.each_with_index{|c, i| puts "Cluster #{i + 1}:\n #{c.points}" }
75
+
76
+ # Output Silhouette Coefficients for the clusters in each set
77
+ puts 'Minimum Linkage Silhouette Coefficients: '
78
+ s1 = min_clusters.map { |c| AgglomerativeClustering::SilhouetteCoefficient.new(c).measure(min_clusters) }
79
+ p s1
80
+ puts 'Maximum Linkage Silhouette Coefficients: '
81
+ s2 = max_clusters.map { |c| AgglomerativeClustering::SilhouetteCoefficient.new(c).measure(max_clusters) }
82
+ p s2
83
+ puts 'Average Linkage Silhouette Coefficients: '
84
+ s3 = average_clusters.map { |cluster| AgglomerativeClustering::SilhouetteCoefficient.new(cluster).measure(average_clusters) }
85
+ p s3
86
+ puts 'Center Linkage Silhouette Coefficients: '
87
+ s4 = center_clusters.map { |cluster| AgglomerativeClustering::SilhouetteCoefficient.new(cluster).measure(center_clusters) }
88
+ p s4
89
+
90
+ # output best average silhouette coefficient
91
+ type_hash = {0 => 'Minimum', 1 => 'Maximum', 2 => 'Average', 3 => 'Center'}
92
+ all = [s1,s2,s3,s4].map { |s| s.inject(:+)/s.size }
93
+ puts "The best Average Silhouette Coefficient is #{all.max} from #{type_hash[all.index(all.max)]} Linkage"
@@ -8,6 +8,8 @@ module AgglomerativeClustering
8
8
 
9
9
  def points
10
10
  @points ||= []
11
+ @points.sort_by!{|p| [p.x, p.y, p.z] } if @points.any?
12
+ @points
11
13
  end
12
14
 
13
15
  def merge(cluster)
@@ -8,9 +8,10 @@ module AgglomerativeClustering
8
8
  end
9
9
 
10
10
  def center_point cluster
11
- cluster.points.first.zip(*cluster.points[1..cluster.points.size-1]).map { |a,b| (a + b)/cluster.points.size.to_f }
11
+ return cluster.points.first if cluster.points.size == 1
12
+ cluster.points.first.zip(*cluster.points[1..cluster.points.size-1]).map { |a| a.inject(:+).to_f/a.size.to_f }
12
13
  end
13
-
14
+
14
15
  end
15
16
  end
16
17
  end
@@ -38,9 +38,12 @@ module AgglomerativeClustering
38
38
  distance_matrix.matrix.each_with_index do |index, row, column|
39
39
  count_hash[row] ||= 0
40
40
  count_hash[row] += 1 if distance_matrix.matrix[row, column] > distance
41
- set_outliers << points[row] if count_hash[row]/(distance_matrix.matrix.row_count - 1) > percentage_of_clusters/100
41
+ if count_hash[row]/(distance_matrix.matrix.row_count - 1) > percentage_of_clusters/100
42
+ set_outliers << points[row]
43
+ end
42
44
  end
43
45
  points.reject! { |point| outliers.include?(point) }
46
+ @distance_matrix = build_distance_matrix
44
47
  outliers
45
48
  end
46
49
 
@@ -7,6 +7,8 @@ module AgglomerativeClustering
7
7
  @main_cluster = main_cluster
8
8
  end
9
9
 
10
+ # Measures the silhouette coefficient of a cluster compared to all other clusters
11
+ # Returns the average silhouette coefficient of a cluster
10
12
  def measure clusters
11
13
  silhouettes = []
12
14
  average_distances = []
@@ -15,23 +17,25 @@ module AgglomerativeClustering
15
17
  (clusters - [main_cluster]).each do |cluster|
16
18
  distances = []
17
19
  cluster.points.each do |point2|
18
- distances << euclidean_distance(point1, point2)
20
+ distances << euclidean_distance(point1, point2).round(2)
19
21
  end
20
22
  average_distances << distances.inject(:+)/distances.size
21
23
  end
22
- b1 = average_distances.min
24
+ b1 = average_distances.min || 0
23
25
  s1 = (b1 - a1)/[a1,b1].max
24
26
  silhouettes << s1
25
27
  end
26
- silhouettes.inject(:+) / silhouettes.size
28
+ (silhouettes.inject(:+) / silhouettes.size).round(2)
27
29
  end
28
30
 
31
+ # Calculates the a1 value of a cluster
29
32
  def calculate_a1 point
30
33
  distances = []
31
34
  main_cluster.points.each do |point1|
32
- distances << euclidean_distance(point, point1)
35
+ distances << euclidean_distance(point, point1).round(2)
33
36
  end
34
- distances.inject(:+)/(distances.size - 1)
37
+ return 0 if distances.size == 1
38
+ (distances.inject(:+)/(distances.size - 1)).round(2)
35
39
  end
36
40
 
37
41
  end
@@ -1,3 +1,3 @@
1
1
  module AgglomerativeClustering
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
@@ -5,7 +5,7 @@ describe AgglomerativeClustering::Cluster do
5
5
  cluster1 = FactoryGirl.build(:cluster)
6
6
  cluster2 = FactoryGirl.build(:cluster)
7
7
  points = cluster1.points + cluster2.points
8
- expect(cluster1.merge(cluster2).points).to eql(points)
8
+ expect(cluster1.merge(cluster2).points).to eql(points.sort_by{|p| [p.x, p.y, p.z]})
9
9
  end
10
10
  end
11
11
  end
@@ -3,17 +3,17 @@ describe AgglomerativeClustering::Linkage::Single do
3
3
  context '#calculate_distance' do
4
4
  it 'will calculate distance between clusters based on the min distnace between points' do
5
5
  single_linkage = AgglomerativeClustering::Linkage::Single.new
6
- min_point = FactoryGirl.build(:point, x: 2, y: 2, z: 2)
7
- max_point = FactoryGirl.build(:point, x: 5, y: 5, z: 5)
8
- cluster1 = AgglomerativeClustering::Cluster.new(min_point)
6
+ min_point1 = FactoryGirl.build(:point, x: 2, y: 2, z: 2)
7
+ min_point2 = FactoryGirl.build(:point, x: 5, y: 5, z: 5)
8
+ cluster1 = AgglomerativeClustering::Cluster.new(min_point1)
9
9
  cluster2 = AgglomerativeClustering::Cluster.new(FactoryGirl.build(:point, x: 1, y: 1, z: 1))
10
10
  cluster3 = AgglomerativeClustering::Cluster.new(FactoryGirl.build(:point, x: 7, y: 7, z: 7))
11
- cluster4 = AgglomerativeClustering::Cluster.new(max_point)
11
+ cluster4 = AgglomerativeClustering::Cluster.new(min_point2)
12
12
  cluster1 = cluster1.merge(cluster2)
13
13
  cluster3 = cluster3.merge(cluster4)
14
- min_distance = single_linkage.euclidean_distance(min_point, max_point)
14
+ min_distance = single_linkage.euclidean_distance(min_point1, min_point2)
15
15
  expect(single_linkage.calculate_distance(cluster1, cluster3)).to eql(min_distance)
16
16
  end
17
17
  end
18
-
18
+
19
19
  end
@@ -18,7 +18,7 @@ describe AgglomerativeClustering::SilhouetteCoefficient do
18
18
  cluster4.merge(cluster5).merge(cluster6)
19
19
  sc = AgglomerativeClustering::SilhouetteCoefficient.new(cluster1)
20
20
  clusters = [cluster1, cluster4]
21
- expect(sc.measure(clusters).round(4)).to eql(-0.0893)
21
+ expect(sc.measure(clusters)).to eql(-0.09)
22
22
  end
23
23
  end
24
24
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: agglomerative_clustering
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bryan Mulvihill
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-02 00:00:00.000000000 Z
11
+ date: 2014-11-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -67,7 +67,7 @@ files:
67
67
  - README.md
68
68
  - Rakefile
69
69
  - agglomerative_clustering.gemspec
70
- - cluster.rb
70
+ - cluster_example.rb
71
71
  - lib/agglomerative_clustering.rb
72
72
  - lib/agglomerative_clustering/cluster.rb
73
73
  - lib/agglomerative_clustering/distance_matrix.rb
data/cluster.rb DELETED
@@ -1,62 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'agglomerative_clustering'
3
-
4
- set = AgglomerativeClustering::Set.new(AgglomerativeClustering::Linkage::Single.new)
5
- for i in 0..99
6
- x = Random.rand(0..100)
7
- y = Random.rand(0..100)
8
- z = Random.rand(0..100)
9
- p = Point.new(x,y,z)
10
- set.push(p)
11
- end
12
- for i in 100..199
13
- x = Random.rand(200..299)
14
- y = Random.rand(200..299)
15
- z = Random.rand(200..299)
16
- p = Point.new(x,y,z)
17
- set.push(p)
18
- end
19
- for i in 200..299
20
- x = Random.rand(400..499)
21
- y = Random.rand(400..499)
22
- z = Random.rand(400..499)
23
- p = Point.new(x,y,z)
24
- set.push(p)
25
- end
26
-
27
- percentage = 80
28
- distance = 150
29
-
30
- open('points.csv', 'w') do |f|
31
- set.points.each do |point|
32
- f << "#{point.x},#{point.y},#{point.z}\n"
33
- end
34
- end
35
-
36
- open('outliers.csv', 'w') do |f|
37
- set.find_outliers(percentage, distance).each do |point|
38
- f << "#{point.x},#{point.y},#{point.z}\n"
39
- end
40
- end
41
-
42
- if set.outliers.any?
43
- puts 'Outliers Removed from Set:'
44
- set.outliers.each do |outlier|
45
- puts outlier
46
- end
47
- else
48
- puts "There are no outliers where #{percentage}% of the points lie at a distance greater than #{distance}"
49
- end
50
-
51
- clusters = set.cluster(3)
52
- clusters.each_with_index do |cluster, index|
53
- open("cluster#{index}.csv", 'w') do |f|
54
- cluster.points.each do |point|
55
- f << "#{point.x},#{point.y},#{point.z}\n"
56
- end
57
- end
58
- end
59
-
60
- puts 'Silhouette Coefficient of First Cluster: '
61
- sc = AgglomerativeClustering::SilhouetteCoefficient.new(clusters[0])
62
- puts sc.measure(clusters)