k_means_pp 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 25114f424713579b656eddb1a275a59103a860c5
4
+ data.tar.gz: f3ef959b9a7044a048903c44b2c55a4c0bc11583
5
+ SHA512:
6
+ metadata.gz: ea81b5f48c62e0654cee9a635ffbc16e829c5129e1e26a02477217287a517416ce5adfe41a150199acc87dec26d335b9d89d738ea94bd31441ce419002d23625
7
+ data.tar.gz: 9587a9d3a4d0c7d6d3e14d945d92bab9c81e9779699ef4f53dc8e232ea56f073d2fae1f0baad6c96ea2af6fc0ae01bd709964491ed9333f6e379e530acaf6bc4
@@ -0,0 +1,16 @@
1
+ /*.gem
2
+ /examples/report-*.*
3
+ /.bundle/
4
+ /.yardoc
5
+ /Gemfile.lock
6
+ /_yardoc/
7
+ /coverage/
8
+ /doc/
9
+ /pkg/
10
+ /spec/reports/
11
+ /tmp/
12
+ *.bundle
13
+ *.so
14
+ *.o
15
+ *.a
16
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
@@ -0,0 +1,7 @@
1
+ --protected
2
+ --private
3
+ --embed-mixins
4
+ lib/**/*.rb
5
+ -
6
+ README.md
7
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in k_means_pp.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Oldrich Vetesnik
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,161 @@
1
+ # KMeansPP
2
+
3
+ ## What's this?
4
+
5
+ This is a Ruby implementation of the k-means++ algorithm for data clustering.
6
+ In other words: Grouping a bunch of X, Y points into K groups.
7
+ The code is a port of the Python version on [rosettacode.org][rosetta].
8
+
9
+ ### K-means++ (from [Wikipedia][kmeans++])
10
+
11
+ > In data mining, k-means++ is an algorithm for choosing the initial values (or
12
+ > "seeds") for the k-means clustering algorithm. It was proposed in 2007 by
13
+ > David Arthur and Sergei Vassilvitskii, as an approximation algorithm for the
14
+ > NP-hard k-means problem—a way of avoiding the sometimes poor clusterings found
15
+ > by the standard k-means algorithm.
16
+ >
17
+ > [...]
18
+ >
19
+ > The k-means problem is to find cluster centers that minimize the intra-class
20
+ > variance, i.e. the sum of squared distances from each data point being
21
+ > clustered to its cluster center (the center that is closest to it). Although
22
+ > finding an exact solution to the k-means problem for arbitrary input is
23
+ > NP-hard the standard approach to finding an approximate solution (often
24
+ > called [Lloyd's algorithm][lloyd] or the k-means algorithm) is used widely and
25
+ > frequently finds reasonable solutions quickly.
26
+
27
+ ### K-means (from [Wikipedia][kmeans])
28
+
29
+ > k-means clustering is a method of vector quantization, originally from signal
30
+ > processing, that is popular for cluster analysis in data mining. k-means
31
+ > clustering aims to partition n observations into k clusters in which each
32
+ > observation belongs to the cluster with the nearest mean, serving as a
33
+ > prototype of the cluster. This results in a partitioning of the data space
34
+ > into Voronoi cells.
35
+
36
+ ## Usage
37
+
38
+ See examples, too.
39
+
40
+ ```ruby
41
+ points = [
42
+ [0.3968, 1.9431],
43
+ [9.3348, 6.7843],
44
+ [9.2882, 8.1347],
45
+ [7.6768, 2.7362],
46
+ [3.4434, 4.1910],
47
+ [1.8097, 5.0884],
48
+ [7.0698, 3.9285],
49
+ [9.3820, 7.6790],
50
+ [8.6092, 0.9651],
51
+ [9.1981, 7.7493]
52
+ ]
53
+
54
+ clusters = KMeansPP.clusters(points, 3)
55
+
56
+ plot clusters
57
+ puts clusters
58
+ # Cluster (7.785266666666668, 2.5432666666666663): [
59
+ # [7.6768, 2.7362],
60
+ # [7.0698, 3.9285],
61
+ # [8.6092, 0.9651],
62
+ # ]
63
+ # Cluster (9.300774999999998, 7.586824999999999): [
64
+ # [9.3348, 6.7843],
65
+ # [9.2882, 8.1347],
66
+ # [9.382, 7.679],
67
+ # [9.1981, 7.7493],
68
+ # ]
69
+ # Cluster (1.8833, 3.7408333333333332): [
70
+ # [0.3968, 1.9431],
71
+ # [3.4434, 4.191],
72
+ # [1.8097, 5.0884],
73
+ # ]
74
+
75
+ cluster = clusters.first
76
+ p cluster.centroid.x # 7.785266666666668
77
+ p cluster.centroid.y # 2.5432666666666663
78
+ p cluster.points # [[7.6768, 2.7362], [7.0698, 3.9285], [8.6092, 0.9651]]
79
+ ```
80
+
81
+ Or with custom structure:
82
+
83
+ ```ruby
84
+ points = [
85
+ { x: 0.3968, y: 1.9431 },
86
+ { x: 9.3348, y: 6.7843 },
87
+ { x: 9.2882, y: 8.1347 },
88
+ { x: 7.6768, y: 2.7362 },
89
+ { x: 3.4434, y: 4.1910 },
90
+ { x: 1.8097, y: 5.0884 },
91
+ { x: 7.0698, y: 3.9285 },
92
+ { x: 9.3820, y: 7.6790 },
93
+ { x: 8.6092, y: 0.9651 },
94
+ { x: 9.1981, y: 7.7493 }
95
+ ]
96
+
97
+ clusters = KMeansPP.clusters(points, 3) do |point|
98
+ [point[:x], point[:y]]
99
+ end
100
+
101
+ puts clusters
102
+ # Cluster (9.300774999999998, 7.586824999999999): [
103
+ # {:x=>9.3348, :y=>6.7843},
104
+ # {:x=>9.2882, :y=>8.1347},
105
+ # {:x=>9.382, :y=>7.679},
106
+ # {:x=>9.1981, :y=>7.7493},
107
+ # ]
108
+ # Cluster (1.8833, 3.7408333333333332): [
109
+ # {:x=>0.3968, :y=>1.9431},
110
+ # {:x=>3.4434, :y=>4.191},
111
+ # {:x=>1.8097, :y=>5.0884},
112
+ # ]
113
+ # Cluster (7.785266666666668, 2.5432666666666663): [
114
+ # {:x=>7.6768, :y=>2.7362},
115
+ # {:x=>7.0698, :y=>3.9285},
116
+ # {:x=>8.6092, :y=>0.9651},
117
+ # ]
118
+ ```
119
+
120
+ ## Running examples
121
+
122
+ If you want to run the examples, you will need `gnuplot` library and gem.
123
+ Don't forget to add the `--with-x` flag otherwise it won't show anything.
124
+
125
+ $ brew install gnuplot --with-x # Assuming OS X
126
+ $ gem install gnuplot
127
+ $ cd examples
128
+ $ ruby example_simple.rb
129
+ $ ruby example_block.rb
130
+ $ ruby example_csv.rb
131
+ $ ruby example_huge.rb
132
+ $ ruby example_debug.rb # Generates profiler reports
133
+
134
+ ## Installation
135
+
136
+ Add this line to your application's Gemfile:
137
+
138
+ ```ruby
139
+ gem 'k_means_pp'
140
+ ```
141
+
142
+ And then execute:
143
+
144
+ $ bundle
145
+
146
+ Or install it yourself as:
147
+
148
+ $ gem install k_means_pp
149
+
150
+ ## Contributing
151
+
152
+ 1. Fork it (https://github.com/ollie/k_means_pp/fork)
153
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
154
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
155
+ 4. Push to the branch (`git push origin my-new-feature`)
156
+ 5. Create a new Pull Request
157
+
158
+ [rosetta]: http://rosettacode.org/wiki/K-means%2B%2B_clustering#Python
159
+ [kmeans++]: https://en.wikipedia.org/wiki/K-means%2B%2B
160
+ [kmeans]: https://en.wikipedia.org/wiki/K-means_clustering
161
+ [lloyd]: https://en.wikipedia.org/wiki/Lloyd%27s_algorithm
@@ -0,0 +1,18 @@
1
+ task default: :combo
2
+
3
+ desc 'Run tests, rubocop and generate documentation'
4
+ task :combo do
5
+ sh 'bundle exec rspec'
6
+ sh('bundle exec rubocop') {} # ignore status > 0
7
+ sh 'bundle exec yardoc'
8
+ end
9
+
10
+ desc 'Same as :combo but build a gem, too'
11
+ task mega_combo: :combo do
12
+ sh 'gem build k_means_pp.gemspec'
13
+ end
14
+
15
+ desc 'Start a console'
16
+ task :console do
17
+ sh 'bundle exec pry -I ./lib -r ./lib/k_means_pp.rb'
18
+ end
@@ -0,0 +1,34 @@
1
+ require 'gnuplot'
2
+
3
+ # Plot and display data on the screen.
4
+ #
5
+ # @param clusters [Array<Cluster>]
6
+ def plot(clusters)
7
+ # Graph output by running gnuplot pipe
8
+ Gnuplot.open do |gp|
9
+ # Start a new plot
10
+ Gnuplot::Plot.new(gp) do |plot|
11
+ # Plot each cluster's points
12
+ clusters.each do |cluster|
13
+ # Collect all x and y coords for this cluster
14
+ x = cluster.points.map { |p| p[0] }
15
+ y = cluster.points.map { |p| p[1] }
16
+
17
+ # Plot w/o a title (clutters things up)
18
+ plot.data << Gnuplot::DataSet.new([x, y]) do |ds|
19
+ ds.notitle
20
+ end
21
+
22
+ # Centroid point as bigger black points
23
+ x = [cluster.centroid.x]
24
+ y = [cluster.centroid.y]
25
+
26
+ plot.data << Gnuplot::DataSet.new([x, y]) do |ds|
27
+ ds.notitle
28
+ ds.linecolor = '000000'
29
+ ds.linewidth = 3
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,23 @@
1
+ $LOAD_PATH.unshift('../lib')
2
+
3
+ require 'bundler/setup'
4
+ require 'k_means_pp'
5
+
6
+ points = [
7
+ { x: 0.3968, y: 1.9431 },
8
+ { x: 9.3348, y: 6.7843 },
9
+ { x: 9.2882, y: 8.1347 },
10
+ { x: 7.6768, y: 2.7362 },
11
+ { x: 3.4434, y: 4.1910 },
12
+ { x: 1.8097, y: 5.0884 },
13
+ { x: 7.0698, y: 3.9285 },
14
+ { x: 9.3820, y: 7.6790 },
15
+ { x: 8.6092, y: 0.9651 },
16
+ { x: 9.1981, y: 7.7493 }
17
+ ]
18
+
19
+ clusters = KMeansPP.clusters(points, 3) do |point|
20
+ [point[:x], point[:y]]
21
+ end
22
+
23
+ puts clusters
@@ -0,0 +1,15 @@
1
+ $LOAD_PATH.unshift('../lib')
2
+
3
+ require 'bundler/setup'
4
+ require 'k_means_pp'
5
+ require './common'
6
+ require 'csv'
7
+
8
+ points = CSV.foreach('points.csv').map do |row|
9
+ [row[0].to_f, row[1].to_f]
10
+ end
11
+
12
+ clusters = KMeansPP.clusters(points, 3)
13
+
14
+ plot clusters
15
+ puts clusters
@@ -0,0 +1,47 @@
1
+ $LOAD_PATH.unshift('../lib')
2
+
3
+ require 'bundler/setup'
4
+ require 'k_means_pp'
5
+ # require './common'
6
+ require 'ruby-prof'
7
+
8
+ # Generate an array of random n points around origin.
9
+ #
10
+ # @param n [Fixnum] Number of points to generate.
11
+ # @param radius [Fixnum] How far to go from origin.
12
+ #
13
+ # @return [Array<Array>]
14
+ def generate_points(n, radius)
15
+ n.times.map do
16
+ random_radius = rand * radius
17
+ random_angle = rand * 2 * Math::PI
18
+ x = random_radius * Math.cos(random_angle)
19
+ y = random_radius * Math.sin(random_angle)
20
+
21
+ [x, y]
22
+ end
23
+ end
24
+
25
+ clusters = nil
26
+
27
+ result = RubyProf.profile do
28
+ points = generate_points(100, 10)
29
+ clusters = KMeansPP.clusters(points, 5)
30
+ end
31
+
32
+ printer = RubyProf::FlatPrinter.new(result)
33
+ printer.print(File.open('report-flat.txt', 'w'), min_percent: 2)
34
+
35
+ printer = RubyProf::GraphPrinter.new(result)
36
+ printer.print(File.open('report-graph.txt', 'w'), min_percent: 2)
37
+
38
+ printer = RubyProf::GraphHtmlPrinter.new(result)
39
+ printer.print(File.open('report-graph.html', 'w'), min_percent: 2)
40
+
41
+ printer = RubyProf::DotPrinter.new(result)
42
+ printer.print(File.open('report-dot.dot', 'w'), min_percent: 2)
43
+
44
+ # Then run:
45
+ # dot -Tpng report-dot.dot > report-graph.png
46
+
47
+ # plot(clusters)
@@ -0,0 +1,27 @@
1
+ $LOAD_PATH.unshift('../lib')
2
+
3
+ require 'bundler/setup'
4
+ require 'k_means_pp'
5
+ require './common'
6
+
7
+ # Generate an array of random n points around origin.
8
+ #
9
+ # @param n [Fixnum] Number of points to generate.
10
+ # @param radius [Fixnum] How far to go from origin.
11
+ #
12
+ # @return [Array<Array>]
13
+ def generate_points(n, radius)
14
+ n.times.map do
15
+ random_radius = rand * radius
16
+ random_angle = rand * 2 * Math::PI
17
+ x = random_radius * Math.cos(random_angle)
18
+ y = random_radius * Math.sin(random_angle)
19
+
20
+ [x, y]
21
+ end
22
+ end
23
+
24
+ points = generate_points(30_000, 10)
25
+ clusters = KMeansPP.clusters(points, 7)
26
+
27
+ plot clusters
@@ -0,0 +1,28 @@
1
+ $LOAD_PATH.unshift('../lib')
2
+
3
+ require 'bundler/setup'
4
+ require 'k_means_pp'
5
+ require './common'
6
+
7
+ points = [
8
+ [0.3968, 1.9431],
9
+ [9.3348, 6.7843],
10
+ [9.2882, 8.1347],
11
+ [7.6768, 2.7362],
12
+ [3.4434, 4.1910],
13
+ [1.8097, 5.0884],
14
+ [7.0698, 3.9285],
15
+ [9.3820, 7.6790],
16
+ [8.6092, 0.9651],
17
+ [9.1981, 7.7493]
18
+ ]
19
+
20
+ clusters = KMeansPP.clusters(points, 3)
21
+
22
+ plot clusters
23
+ puts clusters
24
+
25
+ cluster = clusters.first
26
+ p cluster.centroid.x
27
+ p cluster.centroid.y
28
+ p cluster.points
@@ -0,0 +1,100 @@
1
+ 48.2641334571,86.4516903905
2
+ 0.114004262656,35.8368597414
3
+ 97.4319168245,92.8009240744
4
+ 24.4614031388,18.3292584382
5
+ 36.2367675367,32.8294024271
6
+ 75.5836860736,68.30729977
7
+ 38.6577034445,25.7701728584
8
+ 28.2607136287,64.4493377817
9
+ 61.5358486771,61.2195232194
10
+ 1.52352224798,38.5083779618
11
+ 11.6392182793,68.2369021579
12
+ 53.9486870607,53.9136556533
13
+ 14.6671651772,26.0132534731
14
+ 65.9506725878,82.5639317581
15
+ 58.3682872339,51.6414580337
16
+ 12.6918921252,2.28888447759
17
+ 31.7587852231,18.1368234166
18
+ 63.6631115204,24.933301389
19
+ 29.1652289905,34.456759171
20
+ 44.3830953085,70.4813875779
21
+ 47.0571691145,65.3507625811
22
+ 74.0584537502,98.2271944247
23
+ 55.8929146157,86.6196265477
24
+ 20.4744253473,12.0025149302
25
+ 14.2867767281,40.2850440995
26
+ 40.43551369,94.5410407116
27
+ 87.6178871195,12.4700151639
28
+ 47.2703048197,93.0636237124
29
+ 59.7895104175,69.2621288413
30
+ 80.8612333922,42.9183411179
31
+ 31.1271795535,55.6669044656
32
+ 78.9671049353,65.833739365
33
+ 39.8324533414,63.0343115139
34
+ 79.126343548,14.9128874133
35
+ 65.8152400306,77.5202358013
36
+ 75.2762752704,42.4858435609
37
+ 29.6475948493,61.2068411763
38
+ 67.421857106,54.8955604259
39
+ 10.4652931501,29.7954139372
40
+ 32.0272462745,99.5422900971
41
+ 80.1520927001,84.2710379142
42
+ 2.27240208403,41.2138854089
43
+ 44.4601509555,1.72563901513
44
+ 16.8676021068,35.3415636277
45
+ 58.1977544121,29.2752085455
46
+ 24.6119080085,39.9440735137
47
+ 63.0759798755,60.9841014448
48
+ 30.9289119657,95.0173219502
49
+ 8.54972950047,41.7384441737
50
+ 61.2606910793,4.06738902059
51
+ 83.2302091964,11.6373312879
52
+ 89.4443065362,42.5694882801
53
+ 24.5619318152,97.7947977804
54
+ 50.3134024475,40.6429336223
55
+ 58.1422402033,36.1112632557
56
+ 32.0668520827,29.9924151435
57
+ 89.6057447137,84.9532177777
58
+ 9.8876440816,18.2540486261
59
+ 17.9670383961,47.596032257
60
+ 50.2977668282,93.6851189223
61
+ 98.0700386253,86.5816924579
62
+ 10.8175290981,26.4344732252
63
+ 34.7463851288,24.4154447141
64
+ 92.5470100593,17.3595513748
65
+ 79.0426629356,4.59850018907
66
+ 89.9791366918,29.523946842
67
+ 3.89920214563,91.3650215111
68
+ 35.4669861576,62.1865368798
69
+ 2.78150918086,24.5280230552
70
+ 50.0390951889,57.0414421682
71
+ 64.4521660758,48.4962172448
72
+ 94.4915452316,56.6508179406
73
+ 47.1655534769,15.8292055671
74
+ 94.2027011374,45.6802385454
75
+ 30.5846324871,54.783635876
76
+ 57.7043252948,0.286661610381
77
+ 41.7908674949,14.7206014023
78
+ 59.6689465934,64.8849831965
79
+ 92.2553335495,55.9096460272
80
+ 48.493467262,69.4766837809
81
+ 23.1837859581,71.4406867443
82
+ 29.0737623652,66.9391416961
83
+ 95.7442323112,89.4677505059
84
+ 68.7707275828,40.9900140055
85
+ 84.5445737133,32.1707309618
86
+ 67.4126251988,56.6710579117
87
+ 10.688352016,28.1745892928
88
+ 56.7620324155,18.3034334207
89
+ 50.6751320678,86.6916908032
90
+ 74.6185482896,34.022483532
91
+ 20.7011996002,32.855295357
92
+ 11.479054664,1.59204297586
93
+ 51.6805387648,25.4063026358
94
+ 84.4109522357,47.237632645
95
+ 90.6395051745,57.7917166935
96
+ 58.6159601042,84.1226173848
97
+ 46.2184509277,28.559934585
98
+ 97.0302485783,41.3135022812
99
+ 31.3144587058,87.2459910122
100
+ 5.93357833962,95.6812831872
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'k_means_pp/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'k_means_pp'
8
+ spec.version = KMeansPP::VERSION
9
+ spec.authors = ['Oldrich Vetesnik']
10
+ spec.email = ['oldrich.vetesnik@gmail.com']
11
+ spec.summary = 'K-means++ Algorithm Implementation.'
12
+ spec.description = 'This is a Ruby implementation of the k-means++ ' \
13
+ 'algorithm for data clustering. In other words: ' \
14
+ 'Grouping a bunch of X, Y points into K groups.'
15
+ spec.homepage = 'https://github.com/ollie/k_means_pp'
16
+ spec.license = 'MIT'
17
+
18
+ spec.files = `git ls-files -z`.split("\x0")
19
+ spec.executables = spec.files.grep(/^bin\//) { |f| File.basename(f) }
20
+ spec.test_files = spec.files.grep(/^(test|spec|features)\//)
21
+ spec.require_paths = ['lib']
22
+
23
+ # System
24
+ spec.add_development_dependency 'bundler', '~> 1.7'
25
+
26
+ # Test
27
+ spec.add_development_dependency 'rspec', '~> 3.1'
28
+ spec.add_development_dependency 'simplecov', '~> 0.9'
29
+
30
+ # Code style, debugging, docs
31
+ spec.add_development_dependency 'yard', '~> 0.8'
32
+ spec.add_development_dependency 'rake', '~> 10.3'
33
+ spec.add_development_dependency 'rubocop', '~> 0.26'
34
+ spec.add_development_dependency 'pry', '~> 0.10'
35
+ spec.add_development_dependency 'pry-byebug', '~> 2.0'
36
+ spec.add_development_dependency 'ruby-prof', '~> 0.15'
37
+ spec.add_development_dependency 'gnuplot', '~> 2.6'
38
+ end
@@ -0,0 +1,240 @@
1
+ require 'k_means_pp/version'
2
+ require 'k_means_pp/point'
3
+ require 'k_means_pp/cluster'
4
+
5
+ # Cluster data with the k-means++, k-means and Lloyd algorithm.
6
+ class KMeansPP
7
+ # Source data set of points.
8
+ #
9
+ # @return [Array<Point>]
10
+ attr_accessor :points
11
+
12
+ # Centroid points
13
+ #
14
+ # @return [Array<Centroid>]
15
+ attr_accessor :centroids
16
+
17
+ # Take an array of things and group them into K clusters.
18
+ #
19
+ # If no block was given, an array of arrays (of two numbers) is expected.
20
+ # At the end an array of +Cluster+s is returned, each wrapping
21
+ # an array or arrays (of two numbers).
22
+ #
23
+ # If a block was given, the +points+ is likely an array of other things
24
+ # like hashes or objects. The block is expected to return an array of two
25
+ # numbers. At the end an array of +Cluster+s is returned, each wrapping
26
+ # an array or original objects.
27
+ #
28
+ # @param points [Array] Source data set of points.
29
+ # @param clusters_count [Fixnum] Number of clusters ("k").
30
+ # @yieldreturn [Array<Numeric>]
31
+ #
32
+ # @return [Array<Cluster>]
33
+ def self.clusters(points, clusters_count, &block)
34
+ instance = new(points, clusters_count, &block)
35
+ instance.group_points
36
+ instance.centroids.map do |centroid|
37
+ cluster_for_centroid(centroid, points, &block)
38
+ end
39
+ end
40
+
41
+ # Computed points are a flat structure so this nests each point
42
+ # in an array.
43
+ #
44
+ # @param centroid [Centroid] Centroid of the cluster.
45
+ #
46
+ # @return [Cluster]
47
+ def self.cluster_for_centroid(centroid, points, &block)
48
+ cluster_points = points.select { |p| p.group == centroid }
49
+
50
+ if block
51
+ cluster_points.map!(&:original)
52
+ else
53
+ cluster_points.map! { |p| [p.x, p.y] }
54
+ end
55
+
56
+ Cluster.new(centroid, cluster_points)
57
+ end
58
+
59
+ # Find nearest centroid for a given point in given centroids.
60
+ #
61
+ # @param point [Point] Measure distance of this point
62
+ # @param centroids [Array<Centroid>] to those cluster centers
63
+ #
64
+ # @return [Centroid]
65
+ def self.find_nearest_centroid(point, centroids)
66
+ find_nearest_centroid_and_distance(point, centroids)[0]
67
+ end
68
+
69
+ # Find distance to the nearest centroid for a given point in given centroids.
70
+ #
71
+ # @param point [Point] Measure distance of this point
72
+ # @param centroids [Array<Centroid>] to those cluster centers
73
+ #
74
+ # @return [Float]
75
+ def self.find_nearest_centroid_distance(point, centroids)
76
+ find_nearest_centroid_and_distance(point, centroids)[1]
77
+ end
78
+
79
+ # Find the nearest centroid in given centroids.
80
+ #
81
+ # @param point [Point] Measure distance of this point
82
+ # @param centroids [Array<Centroid>] to those cluster centers
83
+ #
84
+ # @return [Array]
85
+ def self.find_nearest_centroid_and_distance(point, centroids)
86
+ # Assume the current centroid is the closest.
87
+ nearest_centroid = point.group
88
+ nearest_distance = Float::INFINITY
89
+
90
+ centroids.each do |centroid|
91
+ distance = centroid.squared_distance_to(point)
92
+
93
+ next if distance >= nearest_distance
94
+
95
+ nearest_distance = distance
96
+ nearest_centroid = centroid
97
+ end
98
+
99
+ [nearest_centroid, nearest_distance]
100
+ end
101
+
102
+ # Take an array of things and group them into K clusters.
103
+ #
104
+ # If no block was given, an array of arrays (of two numbers) is expected.
105
+ # Internally we map them with +Point+ objects.
106
+ #
107
+ # If a block was given, the +points+ is likely an array of other things
108
+ # like hashes or objects. In this case we will keep the original object
109
+ # in a property and once we are done, we will swap those objects.
110
+ # The block is expected to retun an array of two numbers.
111
+ #
112
+ # @param points [Array] Source data set of points.
113
+ # @param clusters_count [Fixnum] Number of clusters ("k").
114
+ # @yieldreturn [Array<Numeric>]
115
+ def initialize(points, clusters_count)
116
+ if block_given?
117
+ points.map! do |point_obj|
118
+ point_ary = yield(point_obj)
119
+ point = Point.new(point_ary[0], point_ary[1])
120
+ point.original = point_obj
121
+ point
122
+ end
123
+ else
124
+ points.map! do |point_ary|
125
+ Point.new(point_ary[0], point_ary[1])
126
+ end
127
+ end
128
+
129
+ self.points = points
130
+ self.centroids = Array.new(clusters_count)
131
+ end
132
+
133
+ # Group points into clusters.
134
+ def group_points
135
+ define_initial_clusters
136
+ fine_tune_clusters
137
+ end
138
+
139
+ protected
140
+
141
+ # K-means++ algorithm.
142
+ #
143
+ # Find initial centroids and assign points to their nearest centroid,
144
+ # forming cells.
145
+ def define_initial_clusters
146
+ # Randomly choose a point as the first centroid.
147
+ centroids[0] = Centroid.new(points.sample)
148
+
149
+ # Initialize an array of distances of every point.
150
+ distances = points.size.times.map { 0.0 }
151
+
152
+ centroids.each_with_index do |_, centroid_i|
153
+ # Skip the first centroid as it's already picked but keep the index.
154
+ next if centroid_i == 0
155
+
156
+ # Sum points' distances to their nearest centroid
157
+ distances_sum = 0.0
158
+
159
+ points.each_with_index do |point, point_i|
160
+ distance = self.class.find_nearest_centroid_distance(
161
+ point,
162
+ centroids[0...centroid_i]
163
+ )
164
+ distances[point_i] = distance
165
+ distances_sum += distance
166
+ end
167
+
168
+ # Randomly cut it.
169
+ distances_sum *= rand
170
+
171
+ # Keep subtracting those distances until we hit a zero (or lower)
172
+ # in which case we found a new centroid.
173
+ distances.each_with_index do |distance, point_i|
174
+ distances_sum -= distance
175
+ next if distances_sum > 0
176
+ centroids[centroid_i] = Centroid.new(points[point_i])
177
+ break
178
+ end
179
+ end
180
+
181
+ # Assign each point its nearest centroid.
182
+ points.each do |point|
183
+ point.group = self.class.find_nearest_centroid(point, centroids)
184
+ end
185
+ end
186
+
187
+ # This is Lloyd's algorithm
188
+ # https://en.wikipedia.org/wiki/Lloyd%27s_algorithm
189
+ #
190
+ # At this point we have our points already assigned into cells.
191
+ #
192
+ # 1. We calculate a new center for each cell.
193
+ # 2. For each point find its nearest center and re-assign it if it changed.
194
+ # 3. Repeat until a threshold has been reached.
195
+ def fine_tune_clusters
196
+ # When a number of changed points reaches this number, we are done.
197
+ changed_threshold = points.size >> 10
198
+
199
+ loop do
200
+ calculate_new_centroids
201
+ changed = reassign_points
202
+
203
+ # Stop when 99.9% of points are good
204
+ break if changed <= changed_threshold
205
+ end
206
+ end
207
+
208
+ # For each cell calculate its center.
209
+ # This is done by averaging X and Y coordinates.
210
+ def calculate_new_centroids
211
+ # Clear centroids.
212
+ centroids.each(&:reset)
213
+
214
+ # Sum all X and Y coords into each point's centroid.
215
+ points.each do |point|
216
+ centroid = point.group
217
+ centroid.add(point)
218
+ end
219
+
220
+ # And then average it to find a center.
221
+ centroids.each(&:average)
222
+ end
223
+
224
+ # Loop through all the points and find their nearest centroid.
225
+ # If it's a different one than current, change it ande take a note.
226
+ #
227
+ # @return [Fixnum] Number of changed points.
228
+ def reassign_points
229
+ changed = 0
230
+
231
+ points.each do |point|
232
+ centroid = self.class.find_nearest_centroid(point, centroids)
233
+ next if centroid == point.group
234
+ changed += 1
235
+ point.group = centroid
236
+ end
237
+
238
+ changed
239
+ end
240
+ end
@@ -0,0 +1,32 @@
1
+ class KMeansPP
2
+ # Cluster has a centroid and a group of related points.
3
+ class Cluster
4
+ # Center of the data set ("centroid").
5
+ #
6
+ # @return [Centroid]
7
+ attr_accessor :centroid
8
+
9
+ # Points in this cluster.
10
+ #
11
+ # @return [Array<Point>]
12
+ attr_accessor :points
13
+
14
+ # Create a new cluster with a centroid and points.
15
+ #
16
+ # @param centroid [Centroid] Center point of the data set.
17
+ # @param points [Array<Point>] Points in this cluster.
18
+ def initialize(centroid, points = [])
19
+ self.centroid = centroid
20
+ self.points = points
21
+ end
22
+
23
+ # A string representation of the cluster.
24
+ def to_s
25
+ o = ''
26
+ o << "Cluster #{ centroid }: [\n"
27
+ points.each { |p| o << " #{ p },\n" }
28
+ o << "]\n"
29
+ o
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,95 @@
1
+ class KMeansPP
2
+ # Common methods for +Point+ and +Centroid+.
3
+ class BasePoint
4
+ # X coordinate of the point.
5
+ #
6
+ # @return [Float]
7
+ attr_accessor :x
8
+
9
+ # Y coordinate of the point.
10
+ #
11
+ # @return [Float]
12
+ attr_accessor :y
13
+
14
+ # Measure a 2D squared distance between two points.
15
+ #
16
+ # @param point [BasePoint]
17
+ #
18
+ # @return [Float]
19
+ def squared_distance_to(point)
20
+ distance_x = x - point.x
21
+ distance_y = y - point.y
22
+ squared_distance = distance_x**2 + distance_y**2
23
+ squared_distance
24
+ end
25
+
26
+ # A string representation of the point.
27
+ def to_s
28
+ "(#{ x }, #{ y })"
29
+ end
30
+ end
31
+
32
+ # Point of the data set.
33
+ class Point < BasePoint
34
+ # Group is a centroid point.
35
+ #
36
+ # @return [Centroid]
37
+ attr_accessor :group
38
+
39
+ # The original object (could be anything from Hash to an Object).
40
+ #
41
+ # @return [Object]
42
+ attr_accessor :original
43
+
44
+ # Create a new point (data set point or a centroid).
45
+ #
46
+ # @param x [Float] X coordinate of the point.
47
+ # @param y [Float] Y coordinate of the point.
48
+ # @param group [Centroid] Group is a centroid point.
49
+ def initialize(x = 0.0, y = 0.0, group = nil)
50
+ self.x = x
51
+ self.y = y
52
+ self.group = group
53
+ end
54
+ end
55
+
56
+ # Centroid of a cluster.
57
+ class Centroid < BasePoint
58
+ # How many points are in this cluster?
59
+ #
60
+ # @return [Fixnum]
61
+ attr_accessor :counter
62
+
63
+ # Create a new centroid point.
64
+ #
65
+ # @param point [Point] Copy point's X and Y coords.
66
+ def initialize(point)
67
+ self.x = point.x
68
+ self.y = point.y
69
+ end
70
+
71
+ # Prepare centroid for a new iteration, zero-ing everything.
72
+ def reset
73
+ self.x = 0.0
74
+ self.y = 0.0
75
+ self.counter = 0
76
+ end
77
+
78
+ # Add this point's X and Y coords into the sum (for later average).
79
+ #
80
+ # @param point [Point]
81
+ def add(point)
82
+ self.counter += 1
83
+ self.x += point.x
84
+ self.y += point.y
85
+ end
86
+
87
+ # At this point X and Y properties will contain sums of all the point
88
+ # coords, counter will contain number of those points.
89
+ # By averaging the coords we find a new center.
90
+ def average
91
+ self.x /= counter
92
+ self.y /= counter
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,4 @@
1
+ class KMeansPP
2
+ # Version number, happy now?
3
+ VERSION = '0.0.1'
4
+ end
@@ -0,0 +1,62 @@
1
+ require 'spec_helper'
2
+ require 'csv'
3
+
4
+ RSpec.describe 'Superman' do
5
+ it 'does it again' do
6
+ data = CSV.foreach('./spec/resources/points.csv').map do |row|
7
+ [row[0].to_f, row[1].to_f]
8
+ end
9
+
10
+ clusters = KMeansPP.clusters(data, 3)
11
+
12
+ clusters.each do |cluster|
13
+ expect(cluster.points.size).to be > 0
14
+ expect(cluster.centroid.x).to_not eq(0)
15
+ expect(cluster.centroid.y).to_not eq(0)
16
+ expect(cluster.to_s).to_not be_empty
17
+ end
18
+
19
+ expect(clusters.size).to eq(3)
20
+ end
21
+
22
+ it 'array of arrays' do
23
+ data = [
24
+ [0.3968, 1.9431],
25
+ [9.3348, 6.7843],
26
+ [9.2882, 8.1347],
27
+ [7.6768, 2.7362],
28
+ [3.4434, 4.1910],
29
+ [1.8097, 5.0884],
30
+ [7.0698, 3.9285],
31
+ [9.3820, 7.6790],
32
+ [8.6092, 0.9651],
33
+ [9.1981, 7.7493]
34
+ ]
35
+
36
+ clusters = KMeansPP.clusters(data, 3)
37
+ expect(clusters.size).to eq(3)
38
+ expect(clusters.first.points.first).to be_a(Array)
39
+ end
40
+
41
+ it 'array of anything else with block' do
42
+ data = [
43
+ { x: 0.3968, y: 1.9431 },
44
+ { x: 9.3348, y: 6.7843 },
45
+ { x: 9.2882, y: 8.1347 },
46
+ { x: 7.6768, y: 2.7362 },
47
+ { x: 3.4434, y: 4.1910 },
48
+ { x: 1.8097, y: 5.0884 },
49
+ { x: 7.0698, y: 3.9285 },
50
+ { x: 9.3820, y: 7.6790 },
51
+ { x: 8.6092, y: 0.9651 },
52
+ { x: 9.1981, y: 7.7493 }
53
+ ]
54
+
55
+ clusters = KMeansPP.clusters(data, 3) do |point|
56
+ [point[:x], point[:y]]
57
+ end
58
+
59
+ expect(clusters.size).to eq(3)
60
+ expect(clusters.first.points.first).to be_a(Hash)
61
+ end
62
+ end
@@ -0,0 +1,100 @@
1
+ 48.2641334571,86.4516903905
2
+ 0.114004262656,35.8368597414
3
+ 97.4319168245,92.8009240744
4
+ 24.4614031388,18.3292584382
5
+ 36.2367675367,32.8294024271
6
+ 75.5836860736,68.30729977
7
+ 38.6577034445,25.7701728584
8
+ 28.2607136287,64.4493377817
9
+ 61.5358486771,61.2195232194
10
+ 1.52352224798,38.5083779618
11
+ 11.6392182793,68.2369021579
12
+ 53.9486870607,53.9136556533
13
+ 14.6671651772,26.0132534731
14
+ 65.9506725878,82.5639317581
15
+ 58.3682872339,51.6414580337
16
+ 12.6918921252,2.28888447759
17
+ 31.7587852231,18.1368234166
18
+ 63.6631115204,24.933301389
19
+ 29.1652289905,34.456759171
20
+ 44.3830953085,70.4813875779
21
+ 47.0571691145,65.3507625811
22
+ 74.0584537502,98.2271944247
23
+ 55.8929146157,86.6196265477
24
+ 20.4744253473,12.0025149302
25
+ 14.2867767281,40.2850440995
26
+ 40.43551369,94.5410407116
27
+ 87.6178871195,12.4700151639
28
+ 47.2703048197,93.0636237124
29
+ 59.7895104175,69.2621288413
30
+ 80.8612333922,42.9183411179
31
+ 31.1271795535,55.6669044656
32
+ 78.9671049353,65.833739365
33
+ 39.8324533414,63.0343115139
34
+ 79.126343548,14.9128874133
35
+ 65.8152400306,77.5202358013
36
+ 75.2762752704,42.4858435609
37
+ 29.6475948493,61.2068411763
38
+ 67.421857106,54.8955604259
39
+ 10.4652931501,29.7954139372
40
+ 32.0272462745,99.5422900971
41
+ 80.1520927001,84.2710379142
42
+ 2.27240208403,41.2138854089
43
+ 44.4601509555,1.72563901513
44
+ 16.8676021068,35.3415636277
45
+ 58.1977544121,29.2752085455
46
+ 24.6119080085,39.9440735137
47
+ 63.0759798755,60.9841014448
48
+ 30.9289119657,95.0173219502
49
+ 8.54972950047,41.7384441737
50
+ 61.2606910793,4.06738902059
51
+ 83.2302091964,11.6373312879
52
+ 89.4443065362,42.5694882801
53
+ 24.5619318152,97.7947977804
54
+ 50.3134024475,40.6429336223
55
+ 58.1422402033,36.1112632557
56
+ 32.0668520827,29.9924151435
57
+ 89.6057447137,84.9532177777
58
+ 9.8876440816,18.2540486261
59
+ 17.9670383961,47.596032257
60
+ 50.2977668282,93.6851189223
61
+ 98.0700386253,86.5816924579
62
+ 10.8175290981,26.4344732252
63
+ 34.7463851288,24.4154447141
64
+ 92.5470100593,17.3595513748
65
+ 79.0426629356,4.59850018907
66
+ 89.9791366918,29.523946842
67
+ 3.89920214563,91.3650215111
68
+ 35.4669861576,62.1865368798
69
+ 2.78150918086,24.5280230552
70
+ 50.0390951889,57.0414421682
71
+ 64.4521660758,48.4962172448
72
+ 94.4915452316,56.6508179406
73
+ 47.1655534769,15.8292055671
74
+ 94.2027011374,45.6802385454
75
+ 30.5846324871,54.783635876
76
+ 57.7043252948,0.286661610381
77
+ 41.7908674949,14.7206014023
78
+ 59.6689465934,64.8849831965
79
+ 92.2553335495,55.9096460272
80
+ 48.493467262,69.4766837809
81
+ 23.1837859581,71.4406867443
82
+ 29.0737623652,66.9391416961
83
+ 95.7442323112,89.4677505059
84
+ 68.7707275828,40.9900140055
85
+ 84.5445737133,32.1707309618
86
+ 67.4126251988,56.6710579117
87
+ 10.688352016,28.1745892928
88
+ 56.7620324155,18.3034334207
89
+ 50.6751320678,86.6916908032
90
+ 74.6185482896,34.022483532
91
+ 20.7011996002,32.855295357
92
+ 11.479054664,1.59204297586
93
+ 51.6805387648,25.4063026358
94
+ 84.4109522357,47.237632645
95
+ 90.6395051745,57.7917166935
96
+ 58.6159601042,84.1226173848
97
+ 46.2184509277,28.559934585
98
+ 97.0302485783,41.3135022812
99
+ 31.3144587058,87.2459910122
100
+ 5.93357833962,95.6812831872
@@ -0,0 +1,10 @@
1
+ require 'bundler/setup'
2
+
3
+ require 'simplecov'
4
+
5
+ # Coverage tool, needs to be started as soon as possible
6
+ SimpleCov.start do
7
+ add_filter '/spec/' # Ignore spec directory
8
+ end
9
+
10
+ require 'k_means_pp'
metadata ADDED
@@ -0,0 +1,211 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: k_means_pp
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Oldrich Vetesnik
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-10-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.1'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: simplecov
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.9'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.9'
55
+ - !ruby/object:Gem::Dependency
56
+ name: yard
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.8'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.8'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '10.3'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '10.3'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubocop
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.26'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.26'
97
+ - !ruby/object:Gem::Dependency
98
+ name: pry
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '0.10'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '0.10'
111
+ - !ruby/object:Gem::Dependency
112
+ name: pry-byebug
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '2.0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '2.0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: ruby-prof
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '0.15'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '0.15'
139
+ - !ruby/object:Gem::Dependency
140
+ name: gnuplot
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '2.6'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '2.6'
153
+ description: 'This is a Ruby implementation of the k-means++ algorithm for data clustering.
154
+ In other words: Grouping a bunch of X, Y points into K groups.'
155
+ email:
156
+ - oldrich.vetesnik@gmail.com
157
+ executables: []
158
+ extensions: []
159
+ extra_rdoc_files: []
160
+ files:
161
+ - ".gitignore"
162
+ - ".rspec"
163
+ - ".yardopts"
164
+ - Gemfile
165
+ - LICENSE.txt
166
+ - README.md
167
+ - Rakefile
168
+ - examples/common.rb
169
+ - examples/example_block.rb
170
+ - examples/example_csv.rb
171
+ - examples/example_debug.rb
172
+ - examples/example_huge.rb
173
+ - examples/example_simple.rb
174
+ - examples/points.csv
175
+ - k_means_pp.gemspec
176
+ - lib/k_means_pp.rb
177
+ - lib/k_means_pp/cluster.rb
178
+ - lib/k_means_pp/point.rb
179
+ - lib/k_means_pp/version.rb
180
+ - spec/lib/k_means_pp_spec.rb
181
+ - spec/resources/points.csv
182
+ - spec/spec_helper.rb
183
+ homepage: https://github.com/ollie/k_means_pp
184
+ licenses:
185
+ - MIT
186
+ metadata: {}
187
+ post_install_message:
188
+ rdoc_options: []
189
+ require_paths:
190
+ - lib
191
+ required_ruby_version: !ruby/object:Gem::Requirement
192
+ requirements:
193
+ - - ">="
194
+ - !ruby/object:Gem::Version
195
+ version: '0'
196
+ required_rubygems_version: !ruby/object:Gem::Requirement
197
+ requirements:
198
+ - - ">="
199
+ - !ruby/object:Gem::Version
200
+ version: '0'
201
+ requirements: []
202
+ rubyforge_project:
203
+ rubygems_version: 2.4.1
204
+ signing_key:
205
+ specification_version: 4
206
+ summary: K-means++ Algorithm Implementation.
207
+ test_files:
208
+ - spec/lib/k_means_pp_spec.rb
209
+ - spec/resources/points.csv
210
+ - spec/spec_helper.rb
211
+ has_rdoc: