k_means_pp 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 25114f424713579b656eddb1a275a59103a860c5
4
+ data.tar.gz: f3ef959b9a7044a048903c44b2c55a4c0bc11583
5
+ SHA512:
6
+ metadata.gz: ea81b5f48c62e0654cee9a635ffbc16e829c5129e1e26a02477217287a517416ce5adfe41a150199acc87dec26d335b9d89d738ea94bd31441ce419002d23625
7
+ data.tar.gz: 9587a9d3a4d0c7d6d3e14d945d92bab9c81e9779699ef4f53dc8e232ea56f073d2fae1f0baad6c96ea2af6fc0ae01bd709964491ed9333f6e379e530acaf6bc4
@@ -0,0 +1,16 @@
1
+ /*.gem
2
+ /examples/report-*.*
3
+ /.bundle/
4
+ /.yardoc
5
+ /Gemfile.lock
6
+ /_yardoc/
7
+ /coverage/
8
+ /doc/
9
+ /pkg/
10
+ /spec/reports/
11
+ /tmp/
12
+ *.bundle
13
+ *.so
14
+ *.o
15
+ *.a
16
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
@@ -0,0 +1,7 @@
1
+ --protected
2
+ --private
3
+ --embed-mixins
4
+ lib/**/*.rb
5
+ -
6
+ README.md
7
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in k_means_pp.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Oldrich Vetesnik
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,161 @@
1
+ # KMeansPP
2
+
3
+ ## What's this?
4
+
5
+ This is a Ruby implementation of the k-means++ algorithm for data clustering.
6
+ In other words: Grouping a bunch of X, Y points into K groups.
7
+ The code is a port of the Python version on [rosettacode.org][rosetta].
8
+
9
+ ### K-means++ (from [Wikipedia][kmeans++])
10
+
11
+ > In data mining, k-means++ is an algorithm for choosing the initial values (or
12
+ > "seeds") for the k-means clustering algorithm. It was proposed in 2007 by
13
+ > David Arthur and Sergei Vassilvitskii, as an approximation algorithm for the
14
+ > NP-hard k-means problem—a way of avoiding the sometimes poor clusterings found
15
+ > by the standard k-means algorithm.
16
+ >
17
+ > [...]
18
+ >
19
+ > The k-means problem is to find cluster centers that minimize the intra-class
20
+ > variance, i.e. the sum of squared distances from each data point being
21
+ > clustered to its cluster center (the center that is closest to it). Although
22
+ > finding an exact solution to the k-means problem for arbitrary input is
23
+ > NP-hard the standard approach to finding an approximate solution (often
24
+ > called [Lloyd's algorithm][lloyd] or the k-means algorithm) is used widely and
25
+ > frequently finds reasonable solutions quickly.
26
+
27
+ ### K-means (from [Wikipedia][kmeans])
28
+
29
+ > k-means clustering is a method of vector quantization, originally from signal
30
+ > processing, that is popular for cluster analysis in data mining. k-means
31
+ > clustering aims to partition n observations into k clusters in which each
32
+ > observation belongs to the cluster with the nearest mean, serving as a
33
+ > prototype of the cluster. This results in a partitioning of the data space
34
+ > into Voronoi cells.
35
+
36
+ ## Usage
37
+
38
+ See examples, too.
39
+
40
+ ```ruby
41
+ points = [
42
+ [0.3968, 1.9431],
43
+ [9.3348, 6.7843],
44
+ [9.2882, 8.1347],
45
+ [7.6768, 2.7362],
46
+ [3.4434, 4.1910],
47
+ [1.8097, 5.0884],
48
+ [7.0698, 3.9285],
49
+ [9.3820, 7.6790],
50
+ [8.6092, 0.9651],
51
+ [9.1981, 7.7493]
52
+ ]
53
+
54
+ clusters = KMeansPP.clusters(points, 3)
55
+
56
+ plot clusters
57
+ puts clusters
58
+ # Cluster (7.785266666666668, 2.5432666666666663): [
59
+ # [7.6768, 2.7362],
60
+ # [7.0698, 3.9285],
61
+ # [8.6092, 0.9651],
62
+ # ]
63
+ # Cluster (9.300774999999998, 7.586824999999999): [
64
+ # [9.3348, 6.7843],
65
+ # [9.2882, 8.1347],
66
+ # [9.382, 7.679],
67
+ # [9.1981, 7.7493],
68
+ # ]
69
+ # Cluster (1.8833, 3.7408333333333332): [
70
+ # [0.3968, 1.9431],
71
+ # [3.4434, 4.191],
72
+ # [1.8097, 5.0884],
73
+ # ]
74
+
75
+ cluster = clusters.first
76
+ p cluster.centroid.x # 7.785266666666668
77
+ p cluster.centroid.y # 2.5432666666666663
78
+ p cluster.points # [[7.6768, 2.7362], [7.0698, 3.9285], [8.6092, 0.9651]]
79
+ ```
80
+
81
+ Or with custom structure:
82
+
83
+ ```ruby
84
+ points = [
85
+ { x: 0.3968, y: 1.9431 },
86
+ { x: 9.3348, y: 6.7843 },
87
+ { x: 9.2882, y: 8.1347 },
88
+ { x: 7.6768, y: 2.7362 },
89
+ { x: 3.4434, y: 4.1910 },
90
+ { x: 1.8097, y: 5.0884 },
91
+ { x: 7.0698, y: 3.9285 },
92
+ { x: 9.3820, y: 7.6790 },
93
+ { x: 8.6092, y: 0.9651 },
94
+ { x: 9.1981, y: 7.7493 }
95
+ ]
96
+
97
+ clusters = KMeansPP.clusters(points, 3) do |point|
98
+ [point[:x], point[:y]]
99
+ end
100
+
101
+ puts clusters
102
+ # Cluster (9.300774999999998, 7.586824999999999): [
103
+ # {:x=>9.3348, :y=>6.7843},
104
+ # {:x=>9.2882, :y=>8.1347},
105
+ # {:x=>9.382, :y=>7.679},
106
+ # {:x=>9.1981, :y=>7.7493},
107
+ # ]
108
+ # Cluster (1.8833, 3.7408333333333332): [
109
+ # {:x=>0.3968, :y=>1.9431},
110
+ # {:x=>3.4434, :y=>4.191},
111
+ # {:x=>1.8097, :y=>5.0884},
112
+ # ]
113
+ # Cluster (7.785266666666668, 2.5432666666666663): [
114
+ # {:x=>7.6768, :y=>2.7362},
115
+ # {:x=>7.0698, :y=>3.9285},
116
+ # {:x=>8.6092, :y=>0.9651},
117
+ # ]
118
+ ```
119
+
120
+ ## Running examples
121
+
122
+ If you want to run the examples, you will need `gnuplot` library and gem.
123
+ Don't forget to add the `--with-x` flag otherwise it won't show anything.
124
+
125
+ $ brew install gnuplot --with-x # Assuming OS X
126
+ $ gem install gnuplot
127
+ $ cd examples
128
+ $ ruby example_simple.rb
129
+ $ ruby example_block.rb
130
+ $ ruby example_csv.rb
131
+ $ ruby example_huge.rb
132
+ $ ruby example_debug.rb # Generates profiler reports
133
+
134
+ ## Installation
135
+
136
+ Add this line to your application's Gemfile:
137
+
138
+ ```ruby
139
+ gem 'k_means_pp'
140
+ ```
141
+
142
+ And then execute:
143
+
144
+ $ bundle
145
+
146
+ Or install it yourself as:
147
+
148
+ $ gem install k_means_pp
149
+
150
+ ## Contributing
151
+
152
+ 1. Fork it (https://github.com/ollie/k_means_pp/fork)
153
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
154
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
155
+ 4. Push to the branch (`git push origin my-new-feature`)
156
+ 5. Create a new Pull Request
157
+
158
+ [rosetta]: http://rosettacode.org/wiki/K-means%2B%2B_clustering#Python
159
+ [kmeans++]: https://en.wikipedia.org/wiki/K-means%2B%2B
160
+ [kmeans]: https://en.wikipedia.org/wiki/K-means_clustering
161
+ [lloyd]: https://en.wikipedia.org/wiki/Lloyd%27s_algorithm
@@ -0,0 +1,18 @@
1
+ task default: :combo
2
+
3
+ desc 'Run tests, rubocop and generate documentation'
4
+ task :combo do
5
+ sh 'bundle exec rspec'
6
+ sh('bundle exec rubocop') {} # ignore status > 0
7
+ sh 'bundle exec yardoc'
8
+ end
9
+
10
+ desc 'Same as :combo but build a gem, too'
11
+ task mega_combo: :combo do
12
+ sh 'gem build k_means_pp.gemspec'
13
+ end
14
+
15
+ desc 'Start a console'
16
+ task :console do
17
+ sh 'bundle exec pry -I ./lib -r ./lib/k_means_pp.rb'
18
+ end
@@ -0,0 +1,34 @@
1
+ require 'gnuplot'
2
+
3
+ # Plot and display data on the screen.
4
+ #
5
+ # @param clusters [Array<Cluster>]
6
+ def plot(clusters)
7
+ # Graph output by running gnuplot pipe
8
+ Gnuplot.open do |gp|
9
+ # Start a new plot
10
+ Gnuplot::Plot.new(gp) do |plot|
11
+ # Plot each cluster's points
12
+ clusters.each do |cluster|
13
+ # Collect all x and y coords for this cluster
14
+ x = cluster.points.map { |p| p[0] }
15
+ y = cluster.points.map { |p| p[1] }
16
+
17
+ # Plot w/o a title (clutters things up)
18
+ plot.data << Gnuplot::DataSet.new([x, y]) do |ds|
19
+ ds.notitle
20
+ end
21
+
22
+ # Centroid point as bigger black points
23
+ x = [cluster.centroid.x]
24
+ y = [cluster.centroid.y]
25
+
26
+ plot.data << Gnuplot::DataSet.new([x, y]) do |ds|
27
+ ds.notitle
28
+ ds.linecolor = '000000'
29
+ ds.linewidth = 3
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,23 @@
1
+ $LOAD_PATH.unshift('../lib')
2
+
3
+ require 'bundler/setup'
4
+ require 'k_means_pp'
5
+
6
+ points = [
7
+ { x: 0.3968, y: 1.9431 },
8
+ { x: 9.3348, y: 6.7843 },
9
+ { x: 9.2882, y: 8.1347 },
10
+ { x: 7.6768, y: 2.7362 },
11
+ { x: 3.4434, y: 4.1910 },
12
+ { x: 1.8097, y: 5.0884 },
13
+ { x: 7.0698, y: 3.9285 },
14
+ { x: 9.3820, y: 7.6790 },
15
+ { x: 8.6092, y: 0.9651 },
16
+ { x: 9.1981, y: 7.7493 }
17
+ ]
18
+
19
+ clusters = KMeansPP.clusters(points, 3) do |point|
20
+ [point[:x], point[:y]]
21
+ end
22
+
23
+ puts clusters
@@ -0,0 +1,15 @@
1
+ $LOAD_PATH.unshift('../lib')
2
+
3
+ require 'bundler/setup'
4
+ require 'k_means_pp'
5
+ require './common'
6
+ require 'csv'
7
+
8
+ points = CSV.foreach('points.csv').map do |row|
9
+ [row[0].to_f, row[1].to_f]
10
+ end
11
+
12
+ clusters = KMeansPP.clusters(points, 3)
13
+
14
+ plot clusters
15
+ puts clusters
@@ -0,0 +1,47 @@
1
+ $LOAD_PATH.unshift('../lib')
2
+
3
+ require 'bundler/setup'
4
+ require 'k_means_pp'
5
+ # require './common'
6
+ require 'ruby-prof'
7
+
8
+ # Generate an array of random n points around origin.
9
+ #
10
+ # @param n [Fixnum] Number of points to generate.
11
+ # @param radius [Fixnum] How far to go from origin.
12
+ #
13
+ # @return [Array<Array>]
14
+ def generate_points(n, radius)
15
+ n.times.map do
16
+ random_radius = rand * radius
17
+ random_angle = rand * 2 * Math::PI
18
+ x = random_radius * Math.cos(random_angle)
19
+ y = random_radius * Math.sin(random_angle)
20
+
21
+ [x, y]
22
+ end
23
+ end
24
+
25
+ clusters = nil
26
+
27
+ result = RubyProf.profile do
28
+ points = generate_points(100, 10)
29
+ clusters = KMeansPP.clusters(points, 5)
30
+ end
31
+
32
+ printer = RubyProf::FlatPrinter.new(result)
33
+ printer.print(File.open('report-flat.txt', 'w'), min_percent: 2)
34
+
35
+ printer = RubyProf::GraphPrinter.new(result)
36
+ printer.print(File.open('report-graph.txt', 'w'), min_percent: 2)
37
+
38
+ printer = RubyProf::GraphHtmlPrinter.new(result)
39
+ printer.print(File.open('report-graph.html', 'w'), min_percent: 2)
40
+
41
+ printer = RubyProf::DotPrinter.new(result)
42
+ printer.print(File.open('report-dot.dot', 'w'), min_percent: 2)
43
+
44
+ # Then run:
45
+ # dot -Tpng report-dot.dot > report-graph.png
46
+
47
+ # plot(clusters)
@@ -0,0 +1,27 @@
1
+ $LOAD_PATH.unshift('../lib')
2
+
3
+ require 'bundler/setup'
4
+ require 'k_means_pp'
5
+ require './common'
6
+
7
+ # Generate an array of random n points around origin.
8
+ #
9
+ # @param n [Fixnum] Number of points to generate.
10
+ # @param radius [Fixnum] How far to go from origin.
11
+ #
12
+ # @return [Array<Array>]
13
+ def generate_points(n, radius)
14
+ n.times.map do
15
+ random_radius = rand * radius
16
+ random_angle = rand * 2 * Math::PI
17
+ x = random_radius * Math.cos(random_angle)
18
+ y = random_radius * Math.sin(random_angle)
19
+
20
+ [x, y]
21
+ end
22
+ end
23
+
24
+ points = generate_points(30_000, 10)
25
+ clusters = KMeansPP.clusters(points, 7)
26
+
27
+ plot clusters
@@ -0,0 +1,28 @@
1
+ $LOAD_PATH.unshift('../lib')
2
+
3
+ require 'bundler/setup'
4
+ require 'k_means_pp'
5
+ require './common'
6
+
7
+ points = [
8
+ [0.3968, 1.9431],
9
+ [9.3348, 6.7843],
10
+ [9.2882, 8.1347],
11
+ [7.6768, 2.7362],
12
+ [3.4434, 4.1910],
13
+ [1.8097, 5.0884],
14
+ [7.0698, 3.9285],
15
+ [9.3820, 7.6790],
16
+ [8.6092, 0.9651],
17
+ [9.1981, 7.7493]
18
+ ]
19
+
20
+ clusters = KMeansPP.clusters(points, 3)
21
+
22
+ plot clusters
23
+ puts clusters
24
+
25
+ cluster = clusters.first
26
+ p cluster.centroid.x
27
+ p cluster.centroid.y
28
+ p cluster.points
@@ -0,0 +1,100 @@
1
+ 48.2641334571,86.4516903905
2
+ 0.114004262656,35.8368597414
3
+ 97.4319168245,92.8009240744
4
+ 24.4614031388,18.3292584382
5
+ 36.2367675367,32.8294024271
6
+ 75.5836860736,68.30729977
7
+ 38.6577034445,25.7701728584
8
+ 28.2607136287,64.4493377817
9
+ 61.5358486771,61.2195232194
10
+ 1.52352224798,38.5083779618
11
+ 11.6392182793,68.2369021579
12
+ 53.9486870607,53.9136556533
13
+ 14.6671651772,26.0132534731
14
+ 65.9506725878,82.5639317581
15
+ 58.3682872339,51.6414580337
16
+ 12.6918921252,2.28888447759
17
+ 31.7587852231,18.1368234166
18
+ 63.6631115204,24.933301389
19
+ 29.1652289905,34.456759171
20
+ 44.3830953085,70.4813875779
21
+ 47.0571691145,65.3507625811
22
+ 74.0584537502,98.2271944247
23
+ 55.8929146157,86.6196265477
24
+ 20.4744253473,12.0025149302
25
+ 14.2867767281,40.2850440995
26
+ 40.43551369,94.5410407116
27
+ 87.6178871195,12.4700151639
28
+ 47.2703048197,93.0636237124
29
+ 59.7895104175,69.2621288413
30
+ 80.8612333922,42.9183411179
31
+ 31.1271795535,55.6669044656
32
+ 78.9671049353,65.833739365
33
+ 39.8324533414,63.0343115139
34
+ 79.126343548,14.9128874133
35
+ 65.8152400306,77.5202358013
36
+ 75.2762752704,42.4858435609
37
+ 29.6475948493,61.2068411763
38
+ 67.421857106,54.8955604259
39
+ 10.4652931501,29.7954139372
40
+ 32.0272462745,99.5422900971
41
+ 80.1520927001,84.2710379142
42
+ 2.27240208403,41.2138854089
43
+ 44.4601509555,1.72563901513
44
+ 16.8676021068,35.3415636277
45
+ 58.1977544121,29.2752085455
46
+ 24.6119080085,39.9440735137
47
+ 63.0759798755,60.9841014448
48
+ 30.9289119657,95.0173219502
49
+ 8.54972950047,41.7384441737
50
+ 61.2606910793,4.06738902059
51
+ 83.2302091964,11.6373312879
52
+ 89.4443065362,42.5694882801
53
+ 24.5619318152,97.7947977804
54
+ 50.3134024475,40.6429336223
55
+ 58.1422402033,36.1112632557
56
+ 32.0668520827,29.9924151435
57
+ 89.6057447137,84.9532177777
58
+ 9.8876440816,18.2540486261
59
+ 17.9670383961,47.596032257
60
+ 50.2977668282,93.6851189223
61
+ 98.0700386253,86.5816924579
62
+ 10.8175290981,26.4344732252
63
+ 34.7463851288,24.4154447141
64
+ 92.5470100593,17.3595513748
65
+ 79.0426629356,4.59850018907
66
+ 89.9791366918,29.523946842
67
+ 3.89920214563,91.3650215111
68
+ 35.4669861576,62.1865368798
69
+ 2.78150918086,24.5280230552
70
+ 50.0390951889,57.0414421682
71
+ 64.4521660758,48.4962172448
72
+ 94.4915452316,56.6508179406
73
+ 47.1655534769,15.8292055671
74
+ 94.2027011374,45.6802385454
75
+ 30.5846324871,54.783635876
76
+ 57.7043252948,0.286661610381
77
+ 41.7908674949,14.7206014023
78
+ 59.6689465934,64.8849831965
79
+ 92.2553335495,55.9096460272
80
+ 48.493467262,69.4766837809
81
+ 23.1837859581,71.4406867443
82
+ 29.0737623652,66.9391416961
83
+ 95.7442323112,89.4677505059
84
+ 68.7707275828,40.9900140055
85
+ 84.5445737133,32.1707309618
86
+ 67.4126251988,56.6710579117
87
+ 10.688352016,28.1745892928
88
+ 56.7620324155,18.3034334207
89
+ 50.6751320678,86.6916908032
90
+ 74.6185482896,34.022483532
91
+ 20.7011996002,32.855295357
92
+ 11.479054664,1.59204297586
93
+ 51.6805387648,25.4063026358
94
+ 84.4109522357,47.237632645
95
+ 90.6395051745,57.7917166935
96
+ 58.6159601042,84.1226173848
97
+ 46.2184509277,28.559934585
98
+ 97.0302485783,41.3135022812
99
+ 31.3144587058,87.2459910122
100
+ 5.93357833962,95.6812831872
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'k_means_pp/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'k_means_pp'
8
+ spec.version = KMeansPP::VERSION
9
+ spec.authors = ['Oldrich Vetesnik']
10
+ spec.email = ['oldrich.vetesnik@gmail.com']
11
+ spec.summary = 'K-means++ Algorithm Implementation.'
12
+ spec.description = 'This is a Ruby implementation of the k-means++ ' \
13
+ 'algorithm for data clustering. In other words: ' \
14
+ 'Grouping a bunch of X, Y points into K groups.'
15
+ spec.homepage = 'https://github.com/ollie/k_means_pp'
16
+ spec.license = 'MIT'
17
+
18
+ spec.files = `git ls-files -z`.split("\x0")
19
+ spec.executables = spec.files.grep(/^bin\//) { |f| File.basename(f) }
20
+ spec.test_files = spec.files.grep(/^(test|spec|features)\//)
21
+ spec.require_paths = ['lib']
22
+
23
+ # System
24
+ spec.add_development_dependency 'bundler', '~> 1.7'
25
+
26
+ # Test
27
+ spec.add_development_dependency 'rspec', '~> 3.1'
28
+ spec.add_development_dependency 'simplecov', '~> 0.9'
29
+
30
+ # Code style, debugging, docs
31
+ spec.add_development_dependency 'yard', '~> 0.8'
32
+ spec.add_development_dependency 'rake', '~> 10.3'
33
+ spec.add_development_dependency 'rubocop', '~> 0.26'
34
+ spec.add_development_dependency 'pry', '~> 0.10'
35
+ spec.add_development_dependency 'pry-byebug', '~> 2.0'
36
+ spec.add_development_dependency 'ruby-prof', '~> 0.15'
37
+ spec.add_development_dependency 'gnuplot', '~> 2.6'
38
+ end
@@ -0,0 +1,240 @@
1
+ require 'k_means_pp/version'
2
+ require 'k_means_pp/point'
3
+ require 'k_means_pp/cluster'
4
+
5
+ # Cluster data with the k-means++, k-means and Lloyd algorithm.
6
+ class KMeansPP
7
+ # Source data set of points.
8
+ #
9
+ # @return [Array<Point>]
10
+ attr_accessor :points
11
+
12
+ # Centroid points
13
+ #
14
+ # @return [Array<Centroid>]
15
+ attr_accessor :centroids
16
+
17
+ # Take an array of things and group them into K clusters.
18
+ #
19
+ # If no block was given, an array of arrays (of two numbers) is expected.
20
+ # At the end an array of +Cluster+s is returned, each wrapping
21
+ # an array or arrays (of two numbers).
22
+ #
23
+ # If a block was given, the +points+ is likely an array of other things
24
+ # like hashes or objects. The block is expected to return an array of two
25
+ # numbers. At the end an array of +Cluster+s is returned, each wrapping
26
+ # an array or original objects.
27
+ #
28
+ # @param points [Array] Source data set of points.
29
+ # @param clusters_count [Fixnum] Number of clusters ("k").
30
+ # @yieldreturn [Array<Numeric>]
31
+ #
32
+ # @return [Array<Cluster>]
33
+ def self.clusters(points, clusters_count, &block)
34
+ instance = new(points, clusters_count, &block)
35
+ instance.group_points
36
+ instance.centroids.map do |centroid|
37
+ cluster_for_centroid(centroid, points, &block)
38
+ end
39
+ end
40
+
41
+ # Computed points are a flat structure so this nests each point
42
+ # in an array.
43
+ #
44
+ # @param centroid [Centroid] Centroid of the cluster.
45
+ #
46
+ # @return [Cluster]
47
+ def self.cluster_for_centroid(centroid, points, &block)
48
+ cluster_points = points.select { |p| p.group == centroid }
49
+
50
+ if block
51
+ cluster_points.map!(&:original)
52
+ else
53
+ cluster_points.map! { |p| [p.x, p.y] }
54
+ end
55
+
56
+ Cluster.new(centroid, cluster_points)
57
+ end
58
+
59
+ # Find nearest centroid for a given point in given centroids.
60
+ #
61
+ # @param point [Point] Measure distance of this point
62
+ # @param centroids [Array<Centroid>] to those cluster centers
63
+ #
64
+ # @return [Centroid]
65
+ def self.find_nearest_centroid(point, centroids)
66
+ find_nearest_centroid_and_distance(point, centroids)[0]
67
+ end
68
+
69
+ # Find distance to the nearest centroid for a given point in given centroids.
70
+ #
71
+ # @param point [Point] Measure distance of this point
72
+ # @param centroids [Array<Centroid>] to those cluster centers
73
+ #
74
+ # @return [Float]
75
+ def self.find_nearest_centroid_distance(point, centroids)
76
+ find_nearest_centroid_and_distance(point, centroids)[1]
77
+ end
78
+
79
+ # Find the nearest centroid in given centroids.
80
+ #
81
+ # @param point [Point] Measure distance of this point
82
+ # @param centroids [Array<Centroid>] to those cluster centers
83
+ #
84
+ # @return [Array]
85
+ def self.find_nearest_centroid_and_distance(point, centroids)
86
+ # Assume the current centroid is the closest.
87
+ nearest_centroid = point.group
88
+ nearest_distance = Float::INFINITY
89
+
90
+ centroids.each do |centroid|
91
+ distance = centroid.squared_distance_to(point)
92
+
93
+ next if distance >= nearest_distance
94
+
95
+ nearest_distance = distance
96
+ nearest_centroid = centroid
97
+ end
98
+
99
+ [nearest_centroid, nearest_distance]
100
+ end
101
+
102
+ # Take an array of things and group them into K clusters.
103
+ #
104
+ # If no block was given, an array of arrays (of two numbers) is expected.
105
+ # Internally we map them with +Point+ objects.
106
+ #
107
+ # If a block was given, the +points+ is likely an array of other things
108
+ # like hashes or objects. In this case we will keep the original object
109
+ # in a property and once we are done, we will swap those objects.
110
+ # The block is expected to retun an array of two numbers.
111
+ #
112
+ # @param points [Array] Source data set of points.
113
+ # @param clusters_count [Fixnum] Number of clusters ("k").
114
+ # @yieldreturn [Array<Numeric>]
115
+ def initialize(points, clusters_count)
116
+ if block_given?
117
+ points.map! do |point_obj|
118
+ point_ary = yield(point_obj)
119
+ point = Point.new(point_ary[0], point_ary[1])
120
+ point.original = point_obj
121
+ point
122
+ end
123
+ else
124
+ points.map! do |point_ary|
125
+ Point.new(point_ary[0], point_ary[1])
126
+ end
127
+ end
128
+
129
+ self.points = points
130
+ self.centroids = Array.new(clusters_count)
131
+ end
132
+
133
+ # Group points into clusters.
134
+ def group_points
135
+ define_initial_clusters
136
+ fine_tune_clusters
137
+ end
138
+
139
+ protected
140
+
141
+ # K-means++ algorithm.
142
+ #
143
+ # Find initial centroids and assign points to their nearest centroid,
144
+ # forming cells.
145
+ def define_initial_clusters
146
+ # Randomly choose a point as the first centroid.
147
+ centroids[0] = Centroid.new(points.sample)
148
+
149
+ # Initialize an array of distances of every point.
150
+ distances = points.size.times.map { 0.0 }
151
+
152
+ centroids.each_with_index do |_, centroid_i|
153
+ # Skip the first centroid as it's already picked but keep the index.
154
+ next if centroid_i == 0
155
+
156
+ # Sum points' distances to their nearest centroid
157
+ distances_sum = 0.0
158
+
159
+ points.each_with_index do |point, point_i|
160
+ distance = self.class.find_nearest_centroid_distance(
161
+ point,
162
+ centroids[0...centroid_i]
163
+ )
164
+ distances[point_i] = distance
165
+ distances_sum += distance
166
+ end
167
+
168
+ # Randomly cut it.
169
+ distances_sum *= rand
170
+
171
+ # Keep subtracting those distances until we hit a zero (or lower)
172
+ # in which case we found a new centroid.
173
+ distances.each_with_index do |distance, point_i|
174
+ distances_sum -= distance
175
+ next if distances_sum > 0
176
+ centroids[centroid_i] = Centroid.new(points[point_i])
177
+ break
178
+ end
179
+ end
180
+
181
+ # Assign each point its nearest centroid.
182
+ points.each do |point|
183
+ point.group = self.class.find_nearest_centroid(point, centroids)
184
+ end
185
+ end
186
+
187
+ # This is Lloyd's algorithm
188
+ # https://en.wikipedia.org/wiki/Lloyd%27s_algorithm
189
+ #
190
+ # At this point we have our points already assigned into cells.
191
+ #
192
+ # 1. We calculate a new center for each cell.
193
+ # 2. For each point find its nearest center and re-assign it if it changed.
194
+ # 3. Repeat until a threshold has been reached.
195
+ def fine_tune_clusters
196
+ # When a number of changed points reaches this number, we are done.
197
+ changed_threshold = points.size >> 10
198
+
199
+ loop do
200
+ calculate_new_centroids
201
+ changed = reassign_points
202
+
203
+ # Stop when 99.9% of points are good
204
+ break if changed <= changed_threshold
205
+ end
206
+ end
207
+
208
+ # For each cell calculate its center.
209
+ # This is done by averaging X and Y coordinates.
210
+ def calculate_new_centroids
211
+ # Clear centroids.
212
+ centroids.each(&:reset)
213
+
214
+ # Sum all X and Y coords into each point's centroid.
215
+ points.each do |point|
216
+ centroid = point.group
217
+ centroid.add(point)
218
+ end
219
+
220
+ # And then average it to find a center.
221
+ centroids.each(&:average)
222
+ end
223
+
224
+ # Loop through all the points and find their nearest centroid.
225
+ # If it's a different one than current, change it ande take a note.
226
+ #
227
+ # @return [Fixnum] Number of changed points.
228
+ def reassign_points
229
+ changed = 0
230
+
231
+ points.each do |point|
232
+ centroid = self.class.find_nearest_centroid(point, centroids)
233
+ next if centroid == point.group
234
+ changed += 1
235
+ point.group = centroid
236
+ end
237
+
238
+ changed
239
+ end
240
+ end
@@ -0,0 +1,32 @@
1
+ class KMeansPP
2
+ # Cluster has a centroid and a group of related points.
3
+ class Cluster
4
+ # Center of the data set ("centroid").
5
+ #
6
+ # @return [Centroid]
7
+ attr_accessor :centroid
8
+
9
+ # Points in this cluster.
10
+ #
11
+ # @return [Array<Point>]
12
+ attr_accessor :points
13
+
14
+ # Create a new cluster with a centroid and points.
15
+ #
16
+ # @param centroid [Centroid] Center point of the data set.
17
+ # @param points [Array<Point>] Points in this cluster.
18
+ def initialize(centroid, points = [])
19
+ self.centroid = centroid
20
+ self.points = points
21
+ end
22
+
23
+ # A string representation of the cluster.
24
+ def to_s
25
+ o = ''
26
+ o << "Cluster #{ centroid }: [\n"
27
+ points.each { |p| o << " #{ p },\n" }
28
+ o << "]\n"
29
+ o
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,95 @@
1
+ class KMeansPP
2
+ # Common methods for +Point+ and +Centroid+.
3
+ class BasePoint
4
+ # X coordinate of the point.
5
+ #
6
+ # @return [Float]
7
+ attr_accessor :x
8
+
9
+ # Y coordinate of the point.
10
+ #
11
+ # @return [Float]
12
+ attr_accessor :y
13
+
14
+ # Measure a 2D squared distance between two points.
15
+ #
16
+ # @param point [BasePoint]
17
+ #
18
+ # @return [Float]
19
+ def squared_distance_to(point)
20
+ distance_x = x - point.x
21
+ distance_y = y - point.y
22
+ squared_distance = distance_x**2 + distance_y**2
23
+ squared_distance
24
+ end
25
+
26
+ # A string representation of the point.
27
+ def to_s
28
+ "(#{ x }, #{ y })"
29
+ end
30
+ end
31
+
32
+ # Point of the data set.
33
+ class Point < BasePoint
34
+ # Group is a centroid point.
35
+ #
36
+ # @return [Centroid]
37
+ attr_accessor :group
38
+
39
+ # The original object (could be anything from Hash to an Object).
40
+ #
41
+ # @return [Object]
42
+ attr_accessor :original
43
+
44
+ # Create a new point (data set point or a centroid).
45
+ #
46
+ # @param x [Float] X coordinate of the point.
47
+ # @param y [Float] Y coordinate of the point.
48
+ # @param group [Centroid] Group is a centroid point.
49
+ def initialize(x = 0.0, y = 0.0, group = nil)
50
+ self.x = x
51
+ self.y = y
52
+ self.group = group
53
+ end
54
+ end
55
+
56
+ # Centroid of a cluster.
57
+ class Centroid < BasePoint
58
+ # How many points are in this cluster?
59
+ #
60
+ # @return [Fixnum]
61
+ attr_accessor :counter
62
+
63
+ # Create a new centroid point.
64
+ #
65
+ # @param point [Point] Copy point's X and Y coords.
66
+ def initialize(point)
67
+ self.x = point.x
68
+ self.y = point.y
69
+ end
70
+
71
+ # Prepare centroid for a new iteration, zero-ing everything.
72
+ def reset
73
+ self.x = 0.0
74
+ self.y = 0.0
75
+ self.counter = 0
76
+ end
77
+
78
+ # Add this point's X and Y coords into the sum (for later average).
79
+ #
80
+ # @param point [Point]
81
+ def add(point)
82
+ self.counter += 1
83
+ self.x += point.x
84
+ self.y += point.y
85
+ end
86
+
87
+ # At this point X and Y properties will contain sums of all the point
88
+ # coords, counter will contain number of those points.
89
+ # By averaging the coords we find a new center.
90
+ def average
91
+ self.x /= counter
92
+ self.y /= counter
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,4 @@
1
+ class KMeansPP
2
+ # Version number, happy now?
3
+ VERSION = '0.0.1'
4
+ end
@@ -0,0 +1,62 @@
1
+ require 'spec_helper'
2
+ require 'csv'
3
+
4
+ RSpec.describe 'Superman' do
5
+ it 'does it again' do
6
+ data = CSV.foreach('./spec/resources/points.csv').map do |row|
7
+ [row[0].to_f, row[1].to_f]
8
+ end
9
+
10
+ clusters = KMeansPP.clusters(data, 3)
11
+
12
+ clusters.each do |cluster|
13
+ expect(cluster.points.size).to be > 0
14
+ expect(cluster.centroid.x).to_not eq(0)
15
+ expect(cluster.centroid.y).to_not eq(0)
16
+ expect(cluster.to_s).to_not be_empty
17
+ end
18
+
19
+ expect(clusters.size).to eq(3)
20
+ end
21
+
22
+ it 'array of arrays' do
23
+ data = [
24
+ [0.3968, 1.9431],
25
+ [9.3348, 6.7843],
26
+ [9.2882, 8.1347],
27
+ [7.6768, 2.7362],
28
+ [3.4434, 4.1910],
29
+ [1.8097, 5.0884],
30
+ [7.0698, 3.9285],
31
+ [9.3820, 7.6790],
32
+ [8.6092, 0.9651],
33
+ [9.1981, 7.7493]
34
+ ]
35
+
36
+ clusters = KMeansPP.clusters(data, 3)
37
+ expect(clusters.size).to eq(3)
38
+ expect(clusters.first.points.first).to be_a(Array)
39
+ end
40
+
41
+ it 'array of anything else with block' do
42
+ data = [
43
+ { x: 0.3968, y: 1.9431 },
44
+ { x: 9.3348, y: 6.7843 },
45
+ { x: 9.2882, y: 8.1347 },
46
+ { x: 7.6768, y: 2.7362 },
47
+ { x: 3.4434, y: 4.1910 },
48
+ { x: 1.8097, y: 5.0884 },
49
+ { x: 7.0698, y: 3.9285 },
50
+ { x: 9.3820, y: 7.6790 },
51
+ { x: 8.6092, y: 0.9651 },
52
+ { x: 9.1981, y: 7.7493 }
53
+ ]
54
+
55
+ clusters = KMeansPP.clusters(data, 3) do |point|
56
+ [point[:x], point[:y]]
57
+ end
58
+
59
+ expect(clusters.size).to eq(3)
60
+ expect(clusters.first.points.first).to be_a(Hash)
61
+ end
62
+ end
@@ -0,0 +1,100 @@
1
+ 48.2641334571,86.4516903905
2
+ 0.114004262656,35.8368597414
3
+ 97.4319168245,92.8009240744
4
+ 24.4614031388,18.3292584382
5
+ 36.2367675367,32.8294024271
6
+ 75.5836860736,68.30729977
7
+ 38.6577034445,25.7701728584
8
+ 28.2607136287,64.4493377817
9
+ 61.5358486771,61.2195232194
10
+ 1.52352224798,38.5083779618
11
+ 11.6392182793,68.2369021579
12
+ 53.9486870607,53.9136556533
13
+ 14.6671651772,26.0132534731
14
+ 65.9506725878,82.5639317581
15
+ 58.3682872339,51.6414580337
16
+ 12.6918921252,2.28888447759
17
+ 31.7587852231,18.1368234166
18
+ 63.6631115204,24.933301389
19
+ 29.1652289905,34.456759171
20
+ 44.3830953085,70.4813875779
21
+ 47.0571691145,65.3507625811
22
+ 74.0584537502,98.2271944247
23
+ 55.8929146157,86.6196265477
24
+ 20.4744253473,12.0025149302
25
+ 14.2867767281,40.2850440995
26
+ 40.43551369,94.5410407116
27
+ 87.6178871195,12.4700151639
28
+ 47.2703048197,93.0636237124
29
+ 59.7895104175,69.2621288413
30
+ 80.8612333922,42.9183411179
31
+ 31.1271795535,55.6669044656
32
+ 78.9671049353,65.833739365
33
+ 39.8324533414,63.0343115139
34
+ 79.126343548,14.9128874133
35
+ 65.8152400306,77.5202358013
36
+ 75.2762752704,42.4858435609
37
+ 29.6475948493,61.2068411763
38
+ 67.421857106,54.8955604259
39
+ 10.4652931501,29.7954139372
40
+ 32.0272462745,99.5422900971
41
+ 80.1520927001,84.2710379142
42
+ 2.27240208403,41.2138854089
43
+ 44.4601509555,1.72563901513
44
+ 16.8676021068,35.3415636277
45
+ 58.1977544121,29.2752085455
46
+ 24.6119080085,39.9440735137
47
+ 63.0759798755,60.9841014448
48
+ 30.9289119657,95.0173219502
49
+ 8.54972950047,41.7384441737
50
+ 61.2606910793,4.06738902059
51
+ 83.2302091964,11.6373312879
52
+ 89.4443065362,42.5694882801
53
+ 24.5619318152,97.7947977804
54
+ 50.3134024475,40.6429336223
55
+ 58.1422402033,36.1112632557
56
+ 32.0668520827,29.9924151435
57
+ 89.6057447137,84.9532177777
58
+ 9.8876440816,18.2540486261
59
+ 17.9670383961,47.596032257
60
+ 50.2977668282,93.6851189223
61
+ 98.0700386253,86.5816924579
62
+ 10.8175290981,26.4344732252
63
+ 34.7463851288,24.4154447141
64
+ 92.5470100593,17.3595513748
65
+ 79.0426629356,4.59850018907
66
+ 89.9791366918,29.523946842
67
+ 3.89920214563,91.3650215111
68
+ 35.4669861576,62.1865368798
69
+ 2.78150918086,24.5280230552
70
+ 50.0390951889,57.0414421682
71
+ 64.4521660758,48.4962172448
72
+ 94.4915452316,56.6508179406
73
+ 47.1655534769,15.8292055671
74
+ 94.2027011374,45.6802385454
75
+ 30.5846324871,54.783635876
76
+ 57.7043252948,0.286661610381
77
+ 41.7908674949,14.7206014023
78
+ 59.6689465934,64.8849831965
79
+ 92.2553335495,55.9096460272
80
+ 48.493467262,69.4766837809
81
+ 23.1837859581,71.4406867443
82
+ 29.0737623652,66.9391416961
83
+ 95.7442323112,89.4677505059
84
+ 68.7707275828,40.9900140055
85
+ 84.5445737133,32.1707309618
86
+ 67.4126251988,56.6710579117
87
+ 10.688352016,28.1745892928
88
+ 56.7620324155,18.3034334207
89
+ 50.6751320678,86.6916908032
90
+ 74.6185482896,34.022483532
91
+ 20.7011996002,32.855295357
92
+ 11.479054664,1.59204297586
93
+ 51.6805387648,25.4063026358
94
+ 84.4109522357,47.237632645
95
+ 90.6395051745,57.7917166935
96
+ 58.6159601042,84.1226173848
97
+ 46.2184509277,28.559934585
98
+ 97.0302485783,41.3135022812
99
+ 31.3144587058,87.2459910122
100
+ 5.93357833962,95.6812831872
@@ -0,0 +1,10 @@
1
+ require 'bundler/setup'
2
+
3
+ require 'simplecov'
4
+
5
+ # Coverage tool, needs to be started as soon as possible
6
+ SimpleCov.start do
7
+ add_filter '/spec/' # Ignore spec directory
8
+ end
9
+
10
+ require 'k_means_pp'
metadata ADDED
@@ -0,0 +1,211 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: k_means_pp
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Oldrich Vetesnik
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-10-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.1'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: simplecov
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.9'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.9'
55
+ - !ruby/object:Gem::Dependency
56
+ name: yard
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.8'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.8'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '10.3'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '10.3'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubocop
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.26'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.26'
97
+ - !ruby/object:Gem::Dependency
98
+ name: pry
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '0.10'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '0.10'
111
+ - !ruby/object:Gem::Dependency
112
+ name: pry-byebug
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '2.0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '2.0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: ruby-prof
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '0.15'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '0.15'
139
+ - !ruby/object:Gem::Dependency
140
+ name: gnuplot
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '2.6'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '2.6'
153
+ description: 'This is a Ruby implementation of the k-means++ algorithm for data clustering.
154
+ In other words: Grouping a bunch of X, Y points into K groups.'
155
+ email:
156
+ - oldrich.vetesnik@gmail.com
157
+ executables: []
158
+ extensions: []
159
+ extra_rdoc_files: []
160
+ files:
161
+ - ".gitignore"
162
+ - ".rspec"
163
+ - ".yardopts"
164
+ - Gemfile
165
+ - LICENSE.txt
166
+ - README.md
167
+ - Rakefile
168
+ - examples/common.rb
169
+ - examples/example_block.rb
170
+ - examples/example_csv.rb
171
+ - examples/example_debug.rb
172
+ - examples/example_huge.rb
173
+ - examples/example_simple.rb
174
+ - examples/points.csv
175
+ - k_means_pp.gemspec
176
+ - lib/k_means_pp.rb
177
+ - lib/k_means_pp/cluster.rb
178
+ - lib/k_means_pp/point.rb
179
+ - lib/k_means_pp/version.rb
180
+ - spec/lib/k_means_pp_spec.rb
181
+ - spec/resources/points.csv
182
+ - spec/spec_helper.rb
183
+ homepage: https://github.com/ollie/k_means_pp
184
+ licenses:
185
+ - MIT
186
+ metadata: {}
187
+ post_install_message:
188
+ rdoc_options: []
189
+ require_paths:
190
+ - lib
191
+ required_ruby_version: !ruby/object:Gem::Requirement
192
+ requirements:
193
+ - - ">="
194
+ - !ruby/object:Gem::Version
195
+ version: '0'
196
+ required_rubygems_version: !ruby/object:Gem::Requirement
197
+ requirements:
198
+ - - ">="
199
+ - !ruby/object:Gem::Version
200
+ version: '0'
201
+ requirements: []
202
+ rubyforge_project:
203
+ rubygems_version: 2.4.1
204
+ signing_key:
205
+ specification_version: 4
206
+ summary: K-means++ Algorithm Implementation.
207
+ test_files:
208
+ - spec/lib/k_means_pp_spec.rb
209
+ - spec/resources/points.csv
210
+ - spec/spec_helper.rb
211
+ has_rdoc: