k_means_pp 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.rspec +2 -0
- data/.yardopts +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +161 -0
- data/Rakefile +18 -0
- data/examples/common.rb +34 -0
- data/examples/example_block.rb +23 -0
- data/examples/example_csv.rb +15 -0
- data/examples/example_debug.rb +47 -0
- data/examples/example_huge.rb +27 -0
- data/examples/example_simple.rb +28 -0
- data/examples/points.csv +100 -0
- data/k_means_pp.gemspec +38 -0
- data/lib/k_means_pp.rb +240 -0
- data/lib/k_means_pp/cluster.rb +32 -0
- data/lib/k_means_pp/point.rb +95 -0
- data/lib/k_means_pp/version.rb +4 -0
- data/spec/lib/k_means_pp_spec.rb +62 -0
- data/spec/resources/points.csv +100 -0
- data/spec/spec_helper.rb +10 -0
- metadata +211 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 25114f424713579b656eddb1a275a59103a860c5
|
4
|
+
data.tar.gz: f3ef959b9a7044a048903c44b2c55a4c0bc11583
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ea81b5f48c62e0654cee9a635ffbc16e829c5129e1e26a02477217287a517416ce5adfe41a150199acc87dec26d335b9d89d738ea94bd31441ce419002d23625
|
7
|
+
data.tar.gz: 9587a9d3a4d0c7d6d3e14d945d92bab9c81e9779699ef4f53dc8e232ea56f073d2fae1f0baad6c96ea2af6fc0ae01bd709964491ed9333f6e379e530acaf6bc4
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.yardopts
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Oldrich Vetesnik
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,161 @@
|
|
1
|
+
# KMeansPP
|
2
|
+
|
3
|
+
## What's this?
|
4
|
+
|
5
|
+
This is a Ruby implementation of the k-means++ algorithm for data clustering.
|
6
|
+
In other words: Grouping a bunch of X, Y points into K groups.
|
7
|
+
The code is a port of the Python version on [rosettacode.org][rosetta].
|
8
|
+
|
9
|
+
### K-means++ (from [Wikipedia][kmeans++])
|
10
|
+
|
11
|
+
> In data mining, k-means++ is an algorithm for choosing the initial values (or
|
12
|
+
> "seeds") for the k-means clustering algorithm. It was proposed in 2007 by
|
13
|
+
> David Arthur and Sergei Vassilvitskii, as an approximation algorithm for the
|
14
|
+
> NP-hard k-means problem—a way of avoiding the sometimes poor clusterings found
|
15
|
+
> by the standard k-means algorithm.
|
16
|
+
>
|
17
|
+
> [...]
|
18
|
+
>
|
19
|
+
> The k-means problem is to find cluster centers that minimize the intra-class
|
20
|
+
> variance, i.e. the sum of squared distances from each data point being
|
21
|
+
> clustered to its cluster center (the center that is closest to it). Although
|
22
|
+
> finding an exact solution to the k-means problem for arbitrary input is
|
23
|
+
> NP-hard the standard approach to finding an approximate solution (often
|
24
|
+
> called [Lloyd's algorithm][lloyd] or the k-means algorithm) is used widely and
|
25
|
+
> frequently finds reasonable solutions quickly.
|
26
|
+
|
27
|
+
### K-means (from [Wikipedia][kmeans])
|
28
|
+
|
29
|
+
> k-means clustering is a method of vector quantization, originally from signal
|
30
|
+
> processing, that is popular for cluster analysis in data mining. k-means
|
31
|
+
> clustering aims to partition n observations into k clusters in which each
|
32
|
+
> observation belongs to the cluster with the nearest mean, serving as a
|
33
|
+
> prototype of the cluster. This results in a partitioning of the data space
|
34
|
+
> into Voronoi cells.
|
35
|
+
|
36
|
+
## Usage
|
37
|
+
|
38
|
+
See examples, too.
|
39
|
+
|
40
|
+
```ruby
|
41
|
+
points = [
|
42
|
+
[0.3968, 1.9431],
|
43
|
+
[9.3348, 6.7843],
|
44
|
+
[9.2882, 8.1347],
|
45
|
+
[7.6768, 2.7362],
|
46
|
+
[3.4434, 4.1910],
|
47
|
+
[1.8097, 5.0884],
|
48
|
+
[7.0698, 3.9285],
|
49
|
+
[9.3820, 7.6790],
|
50
|
+
[8.6092, 0.9651],
|
51
|
+
[9.1981, 7.7493]
|
52
|
+
]
|
53
|
+
|
54
|
+
clusters = KMeansPP.clusters(points, 3)
|
55
|
+
|
56
|
+
plot clusters
|
57
|
+
puts clusters
|
58
|
+
# Cluster (7.785266666666668, 2.5432666666666663): [
|
59
|
+
# [7.6768, 2.7362],
|
60
|
+
# [7.0698, 3.9285],
|
61
|
+
# [8.6092, 0.9651],
|
62
|
+
# ]
|
63
|
+
# Cluster (9.300774999999998, 7.586824999999999): [
|
64
|
+
# [9.3348, 6.7843],
|
65
|
+
# [9.2882, 8.1347],
|
66
|
+
# [9.382, 7.679],
|
67
|
+
# [9.1981, 7.7493],
|
68
|
+
# ]
|
69
|
+
# Cluster (1.8833, 3.7408333333333332): [
|
70
|
+
# [0.3968, 1.9431],
|
71
|
+
# [3.4434, 4.191],
|
72
|
+
# [1.8097, 5.0884],
|
73
|
+
# ]
|
74
|
+
|
75
|
+
cluster = clusters.first
|
76
|
+
p cluster.centroid.x # 7.785266666666668
|
77
|
+
p cluster.centroid.y # 2.5432666666666663
|
78
|
+
p cluster.points # [[7.6768, 2.7362], [7.0698, 3.9285], [8.6092, 0.9651]]
|
79
|
+
```
|
80
|
+
|
81
|
+
Or with custom structure:
|
82
|
+
|
83
|
+
```ruby
|
84
|
+
points = [
|
85
|
+
{ x: 0.3968, y: 1.9431 },
|
86
|
+
{ x: 9.3348, y: 6.7843 },
|
87
|
+
{ x: 9.2882, y: 8.1347 },
|
88
|
+
{ x: 7.6768, y: 2.7362 },
|
89
|
+
{ x: 3.4434, y: 4.1910 },
|
90
|
+
{ x: 1.8097, y: 5.0884 },
|
91
|
+
{ x: 7.0698, y: 3.9285 },
|
92
|
+
{ x: 9.3820, y: 7.6790 },
|
93
|
+
{ x: 8.6092, y: 0.9651 },
|
94
|
+
{ x: 9.1981, y: 7.7493 }
|
95
|
+
]
|
96
|
+
|
97
|
+
clusters = KMeansPP.clusters(points, 3) do |point|
|
98
|
+
[point[:x], point[:y]]
|
99
|
+
end
|
100
|
+
|
101
|
+
puts clusters
|
102
|
+
# Cluster (9.300774999999998, 7.586824999999999): [
|
103
|
+
# {:x=>9.3348, :y=>6.7843},
|
104
|
+
# {:x=>9.2882, :y=>8.1347},
|
105
|
+
# {:x=>9.382, :y=>7.679},
|
106
|
+
# {:x=>9.1981, :y=>7.7493},
|
107
|
+
# ]
|
108
|
+
# Cluster (1.8833, 3.7408333333333332): [
|
109
|
+
# {:x=>0.3968, :y=>1.9431},
|
110
|
+
# {:x=>3.4434, :y=>4.191},
|
111
|
+
# {:x=>1.8097, :y=>5.0884},
|
112
|
+
# ]
|
113
|
+
# Cluster (7.785266666666668, 2.5432666666666663): [
|
114
|
+
# {:x=>7.6768, :y=>2.7362},
|
115
|
+
# {:x=>7.0698, :y=>3.9285},
|
116
|
+
# {:x=>8.6092, :y=>0.9651},
|
117
|
+
# ]
|
118
|
+
```
|
119
|
+
|
120
|
+
## Running examples
|
121
|
+
|
122
|
+
If you want to run the examples, you will need `gnuplot` library and gem.
|
123
|
+
Don't forget to add the `--with-x` flag otherwise it won't show anything.
|
124
|
+
|
125
|
+
$ brew install gnuplot --with-x # Assuming OS X
|
126
|
+
$ gem install gnuplot
|
127
|
+
$ cd examples
|
128
|
+
$ ruby example_simple.rb
|
129
|
+
$ ruby example_block.rb
|
130
|
+
$ ruby example_csv.rb
|
131
|
+
$ ruby example_huge.rb
|
132
|
+
$ ruby example_debug.rb # Generates profiler reports
|
133
|
+
|
134
|
+
## Installation
|
135
|
+
|
136
|
+
Add this line to your application's Gemfile:
|
137
|
+
|
138
|
+
```ruby
|
139
|
+
gem 'k_means_pp'
|
140
|
+
```
|
141
|
+
|
142
|
+
And then execute:
|
143
|
+
|
144
|
+
$ bundle
|
145
|
+
|
146
|
+
Or install it yourself as:
|
147
|
+
|
148
|
+
$ gem install k_means_pp
|
149
|
+
|
150
|
+
## Contributing
|
151
|
+
|
152
|
+
1. Fork it (https://github.com/ollie/k_means_pp/fork)
|
153
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
154
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
155
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
156
|
+
5. Create a new Pull Request
|
157
|
+
|
158
|
+
[rosetta]: http://rosettacode.org/wiki/K-means%2B%2B_clustering#Python
|
159
|
+
[kmeans++]: https://en.wikipedia.org/wiki/K-means%2B%2B
|
160
|
+
[kmeans]: https://en.wikipedia.org/wiki/K-means_clustering
|
161
|
+
[lloyd]: https://en.wikipedia.org/wiki/Lloyd%27s_algorithm
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
task default: :combo
|
2
|
+
|
3
|
+
desc 'Run tests, rubocop and generate documentation'
|
4
|
+
task :combo do
|
5
|
+
sh 'bundle exec rspec'
|
6
|
+
sh('bundle exec rubocop') {} # ignore status > 0
|
7
|
+
sh 'bundle exec yardoc'
|
8
|
+
end
|
9
|
+
|
10
|
+
desc 'Same as :combo but build a gem, too'
|
11
|
+
task mega_combo: :combo do
|
12
|
+
sh 'gem build k_means_pp.gemspec'
|
13
|
+
end
|
14
|
+
|
15
|
+
desc 'Start a console'
|
16
|
+
task :console do
|
17
|
+
sh 'bundle exec pry -I ./lib -r ./lib/k_means_pp.rb'
|
18
|
+
end
|
data/examples/common.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'gnuplot'
|
2
|
+
|
3
|
+
# Plot and display data on the screen.
|
4
|
+
#
|
5
|
+
# @param clusters [Array<Cluster>]
|
6
|
+
def plot(clusters)
|
7
|
+
# Graph output by running gnuplot pipe
|
8
|
+
Gnuplot.open do |gp|
|
9
|
+
# Start a new plot
|
10
|
+
Gnuplot::Plot.new(gp) do |plot|
|
11
|
+
# Plot each cluster's points
|
12
|
+
clusters.each do |cluster|
|
13
|
+
# Collect all x and y coords for this cluster
|
14
|
+
x = cluster.points.map { |p| p[0] }
|
15
|
+
y = cluster.points.map { |p| p[1] }
|
16
|
+
|
17
|
+
# Plot w/o a title (clutters things up)
|
18
|
+
plot.data << Gnuplot::DataSet.new([x, y]) do |ds|
|
19
|
+
ds.notitle
|
20
|
+
end
|
21
|
+
|
22
|
+
# Centroid point as bigger black points
|
23
|
+
x = [cluster.centroid.x]
|
24
|
+
y = [cluster.centroid.y]
|
25
|
+
|
26
|
+
plot.data << Gnuplot::DataSet.new([x, y]) do |ds|
|
27
|
+
ds.notitle
|
28
|
+
ds.linecolor = '000000'
|
29
|
+
ds.linewidth = 3
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
$LOAD_PATH.unshift('../lib')
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'k_means_pp'
|
5
|
+
|
6
|
+
points = [
|
7
|
+
{ x: 0.3968, y: 1.9431 },
|
8
|
+
{ x: 9.3348, y: 6.7843 },
|
9
|
+
{ x: 9.2882, y: 8.1347 },
|
10
|
+
{ x: 7.6768, y: 2.7362 },
|
11
|
+
{ x: 3.4434, y: 4.1910 },
|
12
|
+
{ x: 1.8097, y: 5.0884 },
|
13
|
+
{ x: 7.0698, y: 3.9285 },
|
14
|
+
{ x: 9.3820, y: 7.6790 },
|
15
|
+
{ x: 8.6092, y: 0.9651 },
|
16
|
+
{ x: 9.1981, y: 7.7493 }
|
17
|
+
]
|
18
|
+
|
19
|
+
clusters = KMeansPP.clusters(points, 3) do |point|
|
20
|
+
[point[:x], point[:y]]
|
21
|
+
end
|
22
|
+
|
23
|
+
puts clusters
|
@@ -0,0 +1,15 @@
|
|
1
|
+
$LOAD_PATH.unshift('../lib')
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'k_means_pp'
|
5
|
+
require './common'
|
6
|
+
require 'csv'
|
7
|
+
|
8
|
+
points = CSV.foreach('points.csv').map do |row|
|
9
|
+
[row[0].to_f, row[1].to_f]
|
10
|
+
end
|
11
|
+
|
12
|
+
clusters = KMeansPP.clusters(points, 3)
|
13
|
+
|
14
|
+
plot clusters
|
15
|
+
puts clusters
|
@@ -0,0 +1,47 @@
|
|
1
|
+
$LOAD_PATH.unshift('../lib')
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'k_means_pp'
|
5
|
+
# require './common'
|
6
|
+
require 'ruby-prof'
|
7
|
+
|
8
|
+
# Generate an array of random n points around origin.
|
9
|
+
#
|
10
|
+
# @param n [Fixnum] Number of points to generate.
|
11
|
+
# @param radius [Fixnum] How far to go from origin.
|
12
|
+
#
|
13
|
+
# @return [Array<Array>]
|
14
|
+
def generate_points(n, radius)
|
15
|
+
n.times.map do
|
16
|
+
random_radius = rand * radius
|
17
|
+
random_angle = rand * 2 * Math::PI
|
18
|
+
x = random_radius * Math.cos(random_angle)
|
19
|
+
y = random_radius * Math.sin(random_angle)
|
20
|
+
|
21
|
+
[x, y]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
clusters = nil
|
26
|
+
|
27
|
+
result = RubyProf.profile do
|
28
|
+
points = generate_points(100, 10)
|
29
|
+
clusters = KMeansPP.clusters(points, 5)
|
30
|
+
end
|
31
|
+
|
32
|
+
printer = RubyProf::FlatPrinter.new(result)
|
33
|
+
printer.print(File.open('report-flat.txt', 'w'), min_percent: 2)
|
34
|
+
|
35
|
+
printer = RubyProf::GraphPrinter.new(result)
|
36
|
+
printer.print(File.open('report-graph.txt', 'w'), min_percent: 2)
|
37
|
+
|
38
|
+
printer = RubyProf::GraphHtmlPrinter.new(result)
|
39
|
+
printer.print(File.open('report-graph.html', 'w'), min_percent: 2)
|
40
|
+
|
41
|
+
printer = RubyProf::DotPrinter.new(result)
|
42
|
+
printer.print(File.open('report-dot.dot', 'w'), min_percent: 2)
|
43
|
+
|
44
|
+
# Then run:
|
45
|
+
# dot -Tpng report-dot.dot > report-graph.png
|
46
|
+
|
47
|
+
# plot(clusters)
|
@@ -0,0 +1,27 @@
|
|
1
|
+
$LOAD_PATH.unshift('../lib')
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'k_means_pp'
|
5
|
+
require './common'
|
6
|
+
|
7
|
+
# Generate an array of random n points around origin.
|
8
|
+
#
|
9
|
+
# @param n [Fixnum] Number of points to generate.
|
10
|
+
# @param radius [Fixnum] How far to go from origin.
|
11
|
+
#
|
12
|
+
# @return [Array<Array>]
|
13
|
+
def generate_points(n, radius)
|
14
|
+
n.times.map do
|
15
|
+
random_radius = rand * radius
|
16
|
+
random_angle = rand * 2 * Math::PI
|
17
|
+
x = random_radius * Math.cos(random_angle)
|
18
|
+
y = random_radius * Math.sin(random_angle)
|
19
|
+
|
20
|
+
[x, y]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
points = generate_points(30_000, 10)
|
25
|
+
clusters = KMeansPP.clusters(points, 7)
|
26
|
+
|
27
|
+
plot clusters
|
@@ -0,0 +1,28 @@
|
|
1
|
+
$LOAD_PATH.unshift('../lib')
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'k_means_pp'
|
5
|
+
require './common'
|
6
|
+
|
7
|
+
points = [
|
8
|
+
[0.3968, 1.9431],
|
9
|
+
[9.3348, 6.7843],
|
10
|
+
[9.2882, 8.1347],
|
11
|
+
[7.6768, 2.7362],
|
12
|
+
[3.4434, 4.1910],
|
13
|
+
[1.8097, 5.0884],
|
14
|
+
[7.0698, 3.9285],
|
15
|
+
[9.3820, 7.6790],
|
16
|
+
[8.6092, 0.9651],
|
17
|
+
[9.1981, 7.7493]
|
18
|
+
]
|
19
|
+
|
20
|
+
clusters = KMeansPP.clusters(points, 3)
|
21
|
+
|
22
|
+
plot clusters
|
23
|
+
puts clusters
|
24
|
+
|
25
|
+
cluster = clusters.first
|
26
|
+
p cluster.centroid.x
|
27
|
+
p cluster.centroid.y
|
28
|
+
p cluster.points
|
data/examples/points.csv
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
48.2641334571,86.4516903905
|
2
|
+
0.114004262656,35.8368597414
|
3
|
+
97.4319168245,92.8009240744
|
4
|
+
24.4614031388,18.3292584382
|
5
|
+
36.2367675367,32.8294024271
|
6
|
+
75.5836860736,68.30729977
|
7
|
+
38.6577034445,25.7701728584
|
8
|
+
28.2607136287,64.4493377817
|
9
|
+
61.5358486771,61.2195232194
|
10
|
+
1.52352224798,38.5083779618
|
11
|
+
11.6392182793,68.2369021579
|
12
|
+
53.9486870607,53.9136556533
|
13
|
+
14.6671651772,26.0132534731
|
14
|
+
65.9506725878,82.5639317581
|
15
|
+
58.3682872339,51.6414580337
|
16
|
+
12.6918921252,2.28888447759
|
17
|
+
31.7587852231,18.1368234166
|
18
|
+
63.6631115204,24.933301389
|
19
|
+
29.1652289905,34.456759171
|
20
|
+
44.3830953085,70.4813875779
|
21
|
+
47.0571691145,65.3507625811
|
22
|
+
74.0584537502,98.2271944247
|
23
|
+
55.8929146157,86.6196265477
|
24
|
+
20.4744253473,12.0025149302
|
25
|
+
14.2867767281,40.2850440995
|
26
|
+
40.43551369,94.5410407116
|
27
|
+
87.6178871195,12.4700151639
|
28
|
+
47.2703048197,93.0636237124
|
29
|
+
59.7895104175,69.2621288413
|
30
|
+
80.8612333922,42.9183411179
|
31
|
+
31.1271795535,55.6669044656
|
32
|
+
78.9671049353,65.833739365
|
33
|
+
39.8324533414,63.0343115139
|
34
|
+
79.126343548,14.9128874133
|
35
|
+
65.8152400306,77.5202358013
|
36
|
+
75.2762752704,42.4858435609
|
37
|
+
29.6475948493,61.2068411763
|
38
|
+
67.421857106,54.8955604259
|
39
|
+
10.4652931501,29.7954139372
|
40
|
+
32.0272462745,99.5422900971
|
41
|
+
80.1520927001,84.2710379142
|
42
|
+
2.27240208403,41.2138854089
|
43
|
+
44.4601509555,1.72563901513
|
44
|
+
16.8676021068,35.3415636277
|
45
|
+
58.1977544121,29.2752085455
|
46
|
+
24.6119080085,39.9440735137
|
47
|
+
63.0759798755,60.9841014448
|
48
|
+
30.9289119657,95.0173219502
|
49
|
+
8.54972950047,41.7384441737
|
50
|
+
61.2606910793,4.06738902059
|
51
|
+
83.2302091964,11.6373312879
|
52
|
+
89.4443065362,42.5694882801
|
53
|
+
24.5619318152,97.7947977804
|
54
|
+
50.3134024475,40.6429336223
|
55
|
+
58.1422402033,36.1112632557
|
56
|
+
32.0668520827,29.9924151435
|
57
|
+
89.6057447137,84.9532177777
|
58
|
+
9.8876440816,18.2540486261
|
59
|
+
17.9670383961,47.596032257
|
60
|
+
50.2977668282,93.6851189223
|
61
|
+
98.0700386253,86.5816924579
|
62
|
+
10.8175290981,26.4344732252
|
63
|
+
34.7463851288,24.4154447141
|
64
|
+
92.5470100593,17.3595513748
|
65
|
+
79.0426629356,4.59850018907
|
66
|
+
89.9791366918,29.523946842
|
67
|
+
3.89920214563,91.3650215111
|
68
|
+
35.4669861576,62.1865368798
|
69
|
+
2.78150918086,24.5280230552
|
70
|
+
50.0390951889,57.0414421682
|
71
|
+
64.4521660758,48.4962172448
|
72
|
+
94.4915452316,56.6508179406
|
73
|
+
47.1655534769,15.8292055671
|
74
|
+
94.2027011374,45.6802385454
|
75
|
+
30.5846324871,54.783635876
|
76
|
+
57.7043252948,0.286661610381
|
77
|
+
41.7908674949,14.7206014023
|
78
|
+
59.6689465934,64.8849831965
|
79
|
+
92.2553335495,55.9096460272
|
80
|
+
48.493467262,69.4766837809
|
81
|
+
23.1837859581,71.4406867443
|
82
|
+
29.0737623652,66.9391416961
|
83
|
+
95.7442323112,89.4677505059
|
84
|
+
68.7707275828,40.9900140055
|
85
|
+
84.5445737133,32.1707309618
|
86
|
+
67.4126251988,56.6710579117
|
87
|
+
10.688352016,28.1745892928
|
88
|
+
56.7620324155,18.3034334207
|
89
|
+
50.6751320678,86.6916908032
|
90
|
+
74.6185482896,34.022483532
|
91
|
+
20.7011996002,32.855295357
|
92
|
+
11.479054664,1.59204297586
|
93
|
+
51.6805387648,25.4063026358
|
94
|
+
84.4109522357,47.237632645
|
95
|
+
90.6395051745,57.7917166935
|
96
|
+
58.6159601042,84.1226173848
|
97
|
+
46.2184509277,28.559934585
|
98
|
+
97.0302485783,41.3135022812
|
99
|
+
31.3144587058,87.2459910122
|
100
|
+
5.93357833962,95.6812831872
|
data/k_means_pp.gemspec
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'k_means_pp/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'k_means_pp'
|
8
|
+
spec.version = KMeansPP::VERSION
|
9
|
+
spec.authors = ['Oldrich Vetesnik']
|
10
|
+
spec.email = ['oldrich.vetesnik@gmail.com']
|
11
|
+
spec.summary = 'K-means++ Algorithm Implementation.'
|
12
|
+
spec.description = 'This is a Ruby implementation of the k-means++ ' \
|
13
|
+
'algorithm for data clustering. In other words: ' \
|
14
|
+
'Grouping a bunch of X, Y points into K groups.'
|
15
|
+
spec.homepage = 'https://github.com/ollie/k_means_pp'
|
16
|
+
spec.license = 'MIT'
|
17
|
+
|
18
|
+
spec.files = `git ls-files -z`.split("\x0")
|
19
|
+
spec.executables = spec.files.grep(/^bin\//) { |f| File.basename(f) }
|
20
|
+
spec.test_files = spec.files.grep(/^(test|spec|features)\//)
|
21
|
+
spec.require_paths = ['lib']
|
22
|
+
|
23
|
+
# System
|
24
|
+
spec.add_development_dependency 'bundler', '~> 1.7'
|
25
|
+
|
26
|
+
# Test
|
27
|
+
spec.add_development_dependency 'rspec', '~> 3.1'
|
28
|
+
spec.add_development_dependency 'simplecov', '~> 0.9'
|
29
|
+
|
30
|
+
# Code style, debugging, docs
|
31
|
+
spec.add_development_dependency 'yard', '~> 0.8'
|
32
|
+
spec.add_development_dependency 'rake', '~> 10.3'
|
33
|
+
spec.add_development_dependency 'rubocop', '~> 0.26'
|
34
|
+
spec.add_development_dependency 'pry', '~> 0.10'
|
35
|
+
spec.add_development_dependency 'pry-byebug', '~> 2.0'
|
36
|
+
spec.add_development_dependency 'ruby-prof', '~> 0.15'
|
37
|
+
spec.add_development_dependency 'gnuplot', '~> 2.6'
|
38
|
+
end
|
data/lib/k_means_pp.rb
ADDED
@@ -0,0 +1,240 @@
|
|
1
|
+
require 'k_means_pp/version'
|
2
|
+
require 'k_means_pp/point'
|
3
|
+
require 'k_means_pp/cluster'
|
4
|
+
|
5
|
+
# Cluster data with the k-means++, k-means and Lloyd algorithm.
|
6
|
+
class KMeansPP
|
7
|
+
# Source data set of points.
|
8
|
+
#
|
9
|
+
# @return [Array<Point>]
|
10
|
+
attr_accessor :points
|
11
|
+
|
12
|
+
# Centroid points
|
13
|
+
#
|
14
|
+
# @return [Array<Centroid>]
|
15
|
+
attr_accessor :centroids
|
16
|
+
|
17
|
+
# Take an array of things and group them into K clusters.
|
18
|
+
#
|
19
|
+
# If no block was given, an array of arrays (of two numbers) is expected.
|
20
|
+
# At the end an array of +Cluster+s is returned, each wrapping
|
21
|
+
# an array or arrays (of two numbers).
|
22
|
+
#
|
23
|
+
# If a block was given, the +points+ is likely an array of other things
|
24
|
+
# like hashes or objects. The block is expected to return an array of two
|
25
|
+
# numbers. At the end an array of +Cluster+s is returned, each wrapping
|
26
|
+
# an array or original objects.
|
27
|
+
#
|
28
|
+
# @param points [Array] Source data set of points.
|
29
|
+
# @param clusters_count [Fixnum] Number of clusters ("k").
|
30
|
+
# @yieldreturn [Array<Numeric>]
|
31
|
+
#
|
32
|
+
# @return [Array<Cluster>]
|
33
|
+
def self.clusters(points, clusters_count, &block)
|
34
|
+
instance = new(points, clusters_count, &block)
|
35
|
+
instance.group_points
|
36
|
+
instance.centroids.map do |centroid|
|
37
|
+
cluster_for_centroid(centroid, points, &block)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Computed points are a flat structure so this nests each point
|
42
|
+
# in an array.
|
43
|
+
#
|
44
|
+
# @param centroid [Centroid] Centroid of the cluster.
|
45
|
+
#
|
46
|
+
# @return [Cluster]
|
47
|
+
def self.cluster_for_centroid(centroid, points, &block)
|
48
|
+
cluster_points = points.select { |p| p.group == centroid }
|
49
|
+
|
50
|
+
if block
|
51
|
+
cluster_points.map!(&:original)
|
52
|
+
else
|
53
|
+
cluster_points.map! { |p| [p.x, p.y] }
|
54
|
+
end
|
55
|
+
|
56
|
+
Cluster.new(centroid, cluster_points)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Find nearest centroid for a given point in given centroids.
|
60
|
+
#
|
61
|
+
# @param point [Point] Measure distance of this point
|
62
|
+
# @param centroids [Array<Centroid>] to those cluster centers
|
63
|
+
#
|
64
|
+
# @return [Centroid]
|
65
|
+
def self.find_nearest_centroid(point, centroids)
|
66
|
+
find_nearest_centroid_and_distance(point, centroids)[0]
|
67
|
+
end
|
68
|
+
|
69
|
+
# Find distance to the nearest centroid for a given point in given centroids.
|
70
|
+
#
|
71
|
+
# @param point [Point] Measure distance of this point
|
72
|
+
# @param centroids [Array<Centroid>] to those cluster centers
|
73
|
+
#
|
74
|
+
# @return [Float]
|
75
|
+
def self.find_nearest_centroid_distance(point, centroids)
|
76
|
+
find_nearest_centroid_and_distance(point, centroids)[1]
|
77
|
+
end
|
78
|
+
|
79
|
+
# Find the nearest centroid in given centroids.
|
80
|
+
#
|
81
|
+
# @param point [Point] Measure distance of this point
|
82
|
+
# @param centroids [Array<Centroid>] to those cluster centers
|
83
|
+
#
|
84
|
+
# @return [Array]
|
85
|
+
def self.find_nearest_centroid_and_distance(point, centroids)
|
86
|
+
# Assume the current centroid is the closest.
|
87
|
+
nearest_centroid = point.group
|
88
|
+
nearest_distance = Float::INFINITY
|
89
|
+
|
90
|
+
centroids.each do |centroid|
|
91
|
+
distance = centroid.squared_distance_to(point)
|
92
|
+
|
93
|
+
next if distance >= nearest_distance
|
94
|
+
|
95
|
+
nearest_distance = distance
|
96
|
+
nearest_centroid = centroid
|
97
|
+
end
|
98
|
+
|
99
|
+
[nearest_centroid, nearest_distance]
|
100
|
+
end
|
101
|
+
|
102
|
+
# Take an array of things and group them into K clusters.
|
103
|
+
#
|
104
|
+
# If no block was given, an array of arrays (of two numbers) is expected.
|
105
|
+
# Internally we map them with +Point+ objects.
|
106
|
+
#
|
107
|
+
# If a block was given, the +points+ is likely an array of other things
|
108
|
+
# like hashes or objects. In this case we will keep the original object
|
109
|
+
# in a property and once we are done, we will swap those objects.
|
110
|
+
# The block is expected to retun an array of two numbers.
|
111
|
+
#
|
112
|
+
# @param points [Array] Source data set of points.
|
113
|
+
# @param clusters_count [Fixnum] Number of clusters ("k").
|
114
|
+
# @yieldreturn [Array<Numeric>]
|
115
|
+
def initialize(points, clusters_count)
|
116
|
+
if block_given?
|
117
|
+
points.map! do |point_obj|
|
118
|
+
point_ary = yield(point_obj)
|
119
|
+
point = Point.new(point_ary[0], point_ary[1])
|
120
|
+
point.original = point_obj
|
121
|
+
point
|
122
|
+
end
|
123
|
+
else
|
124
|
+
points.map! do |point_ary|
|
125
|
+
Point.new(point_ary[0], point_ary[1])
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
self.points = points
|
130
|
+
self.centroids = Array.new(clusters_count)
|
131
|
+
end
|
132
|
+
|
133
|
+
# Group points into clusters.
|
134
|
+
def group_points
|
135
|
+
define_initial_clusters
|
136
|
+
fine_tune_clusters
|
137
|
+
end
|
138
|
+
|
139
|
+
protected
|
140
|
+
|
141
|
+
# K-means++ algorithm.
|
142
|
+
#
|
143
|
+
# Find initial centroids and assign points to their nearest centroid,
|
144
|
+
# forming cells.
|
145
|
+
def define_initial_clusters
|
146
|
+
# Randomly choose a point as the first centroid.
|
147
|
+
centroids[0] = Centroid.new(points.sample)
|
148
|
+
|
149
|
+
# Initialize an array of distances of every point.
|
150
|
+
distances = points.size.times.map { 0.0 }
|
151
|
+
|
152
|
+
centroids.each_with_index do |_, centroid_i|
|
153
|
+
# Skip the first centroid as it's already picked but keep the index.
|
154
|
+
next if centroid_i == 0
|
155
|
+
|
156
|
+
# Sum points' distances to their nearest centroid
|
157
|
+
distances_sum = 0.0
|
158
|
+
|
159
|
+
points.each_with_index do |point, point_i|
|
160
|
+
distance = self.class.find_nearest_centroid_distance(
|
161
|
+
point,
|
162
|
+
centroids[0...centroid_i]
|
163
|
+
)
|
164
|
+
distances[point_i] = distance
|
165
|
+
distances_sum += distance
|
166
|
+
end
|
167
|
+
|
168
|
+
# Randomly cut it.
|
169
|
+
distances_sum *= rand
|
170
|
+
|
171
|
+
# Keep subtracting those distances until we hit a zero (or lower)
|
172
|
+
# in which case we found a new centroid.
|
173
|
+
distances.each_with_index do |distance, point_i|
|
174
|
+
distances_sum -= distance
|
175
|
+
next if distances_sum > 0
|
176
|
+
centroids[centroid_i] = Centroid.new(points[point_i])
|
177
|
+
break
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# Assign each point its nearest centroid.
|
182
|
+
points.each do |point|
|
183
|
+
point.group = self.class.find_nearest_centroid(point, centroids)
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
# This is Lloyd's algorithm
|
188
|
+
# https://en.wikipedia.org/wiki/Lloyd%27s_algorithm
|
189
|
+
#
|
190
|
+
# At this point we have our points already assigned into cells.
|
191
|
+
#
|
192
|
+
# 1. We calculate a new center for each cell.
|
193
|
+
# 2. For each point find its nearest center and re-assign it if it changed.
|
194
|
+
# 3. Repeat until a threshold has been reached.
|
195
|
+
def fine_tune_clusters
|
196
|
+
# When a number of changed points reaches this number, we are done.
|
197
|
+
changed_threshold = points.size >> 10
|
198
|
+
|
199
|
+
loop do
|
200
|
+
calculate_new_centroids
|
201
|
+
changed = reassign_points
|
202
|
+
|
203
|
+
# Stop when 99.9% of points are good
|
204
|
+
break if changed <= changed_threshold
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# For each cell calculate its center.
|
209
|
+
# This is done by averaging X and Y coordinates.
|
210
|
+
def calculate_new_centroids
|
211
|
+
# Clear centroids.
|
212
|
+
centroids.each(&:reset)
|
213
|
+
|
214
|
+
# Sum all X and Y coords into each point's centroid.
|
215
|
+
points.each do |point|
|
216
|
+
centroid = point.group
|
217
|
+
centroid.add(point)
|
218
|
+
end
|
219
|
+
|
220
|
+
# And then average it to find a center.
|
221
|
+
centroids.each(&:average)
|
222
|
+
end
|
223
|
+
|
224
|
+
# Loop through all the points and find their nearest centroid.
|
225
|
+
# If it's a different one than current, change it ande take a note.
|
226
|
+
#
|
227
|
+
# @return [Fixnum] Number of changed points.
|
228
|
+
def reassign_points
|
229
|
+
changed = 0
|
230
|
+
|
231
|
+
points.each do |point|
|
232
|
+
centroid = self.class.find_nearest_centroid(point, centroids)
|
233
|
+
next if centroid == point.group
|
234
|
+
changed += 1
|
235
|
+
point.group = centroid
|
236
|
+
end
|
237
|
+
|
238
|
+
changed
|
239
|
+
end
|
240
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
class KMeansPP
|
2
|
+
# Cluster has a centroid and a group of related points.
|
3
|
+
class Cluster
|
4
|
+
# Center of the data set ("centroid").
|
5
|
+
#
|
6
|
+
# @return [Centroid]
|
7
|
+
attr_accessor :centroid
|
8
|
+
|
9
|
+
# Points in this cluster.
|
10
|
+
#
|
11
|
+
# @return [Array<Point>]
|
12
|
+
attr_accessor :points
|
13
|
+
|
14
|
+
# Create a new cluster with a centroid and points.
|
15
|
+
#
|
16
|
+
# @param centroid [Centroid] Center point of the data set.
|
17
|
+
# @param points [Array<Point>] Points in this cluster.
|
18
|
+
def initialize(centroid, points = [])
|
19
|
+
self.centroid = centroid
|
20
|
+
self.points = points
|
21
|
+
end
|
22
|
+
|
23
|
+
# A string representation of the cluster.
|
24
|
+
def to_s
|
25
|
+
o = ''
|
26
|
+
o << "Cluster #{ centroid }: [\n"
|
27
|
+
points.each { |p| o << " #{ p },\n" }
|
28
|
+
o << "]\n"
|
29
|
+
o
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
class KMeansPP
|
2
|
+
# Common methods for +Point+ and +Centroid+.
|
3
|
+
class BasePoint
|
4
|
+
# X coordinate of the point.
|
5
|
+
#
|
6
|
+
# @return [Float]
|
7
|
+
attr_accessor :x
|
8
|
+
|
9
|
+
# Y coordinate of the point.
|
10
|
+
#
|
11
|
+
# @return [Float]
|
12
|
+
attr_accessor :y
|
13
|
+
|
14
|
+
# Measure a 2D squared distance between two points.
|
15
|
+
#
|
16
|
+
# @param point [BasePoint]
|
17
|
+
#
|
18
|
+
# @return [Float]
|
19
|
+
def squared_distance_to(point)
|
20
|
+
distance_x = x - point.x
|
21
|
+
distance_y = y - point.y
|
22
|
+
squared_distance = distance_x**2 + distance_y**2
|
23
|
+
squared_distance
|
24
|
+
end
|
25
|
+
|
26
|
+
# A string representation of the point.
|
27
|
+
def to_s
|
28
|
+
"(#{ x }, #{ y })"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Point of the data set.
|
33
|
+
class Point < BasePoint
|
34
|
+
# Group is a centroid point.
|
35
|
+
#
|
36
|
+
# @return [Centroid]
|
37
|
+
attr_accessor :group
|
38
|
+
|
39
|
+
# The original object (could be anything from Hash to an Object).
|
40
|
+
#
|
41
|
+
# @return [Object]
|
42
|
+
attr_accessor :original
|
43
|
+
|
44
|
+
# Create a new point (data set point or a centroid).
|
45
|
+
#
|
46
|
+
# @param x [Float] X coordinate of the point.
|
47
|
+
# @param y [Float] Y coordinate of the point.
|
48
|
+
# @param group [Centroid] Group is a centroid point.
|
49
|
+
def initialize(x = 0.0, y = 0.0, group = nil)
|
50
|
+
self.x = x
|
51
|
+
self.y = y
|
52
|
+
self.group = group
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Centroid of a cluster.
|
57
|
+
class Centroid < BasePoint
|
58
|
+
# How many points are in this cluster?
|
59
|
+
#
|
60
|
+
# @return [Fixnum]
|
61
|
+
attr_accessor :counter
|
62
|
+
|
63
|
+
# Create a new centroid point.
|
64
|
+
#
|
65
|
+
# @param point [Point] Copy point's X and Y coords.
|
66
|
+
def initialize(point)
|
67
|
+
self.x = point.x
|
68
|
+
self.y = point.y
|
69
|
+
end
|
70
|
+
|
71
|
+
# Prepare centroid for a new iteration, zero-ing everything.
|
72
|
+
def reset
|
73
|
+
self.x = 0.0
|
74
|
+
self.y = 0.0
|
75
|
+
self.counter = 0
|
76
|
+
end
|
77
|
+
|
78
|
+
# Add this point's X and Y coords into the sum (for later average).
|
79
|
+
#
|
80
|
+
# @param point [Point]
|
81
|
+
def add(point)
|
82
|
+
self.counter += 1
|
83
|
+
self.x += point.x
|
84
|
+
self.y += point.y
|
85
|
+
end
|
86
|
+
|
87
|
+
# At this point X and Y properties will contain sums of all the point
|
88
|
+
# coords, counter will contain number of those points.
|
89
|
+
# By averaging the coords we find a new center.
|
90
|
+
def average
|
91
|
+
self.x /= counter
|
92
|
+
self.y /= counter
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'csv'
|
3
|
+
|
4
|
+
RSpec.describe 'Superman' do
|
5
|
+
it 'does it again' do
|
6
|
+
data = CSV.foreach('./spec/resources/points.csv').map do |row|
|
7
|
+
[row[0].to_f, row[1].to_f]
|
8
|
+
end
|
9
|
+
|
10
|
+
clusters = KMeansPP.clusters(data, 3)
|
11
|
+
|
12
|
+
clusters.each do |cluster|
|
13
|
+
expect(cluster.points.size).to be > 0
|
14
|
+
expect(cluster.centroid.x).to_not eq(0)
|
15
|
+
expect(cluster.centroid.y).to_not eq(0)
|
16
|
+
expect(cluster.to_s).to_not be_empty
|
17
|
+
end
|
18
|
+
|
19
|
+
expect(clusters.size).to eq(3)
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'array of arrays' do
|
23
|
+
data = [
|
24
|
+
[0.3968, 1.9431],
|
25
|
+
[9.3348, 6.7843],
|
26
|
+
[9.2882, 8.1347],
|
27
|
+
[7.6768, 2.7362],
|
28
|
+
[3.4434, 4.1910],
|
29
|
+
[1.8097, 5.0884],
|
30
|
+
[7.0698, 3.9285],
|
31
|
+
[9.3820, 7.6790],
|
32
|
+
[8.6092, 0.9651],
|
33
|
+
[9.1981, 7.7493]
|
34
|
+
]
|
35
|
+
|
36
|
+
clusters = KMeansPP.clusters(data, 3)
|
37
|
+
expect(clusters.size).to eq(3)
|
38
|
+
expect(clusters.first.points.first).to be_a(Array)
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'array of anything else with block' do
|
42
|
+
data = [
|
43
|
+
{ x: 0.3968, y: 1.9431 },
|
44
|
+
{ x: 9.3348, y: 6.7843 },
|
45
|
+
{ x: 9.2882, y: 8.1347 },
|
46
|
+
{ x: 7.6768, y: 2.7362 },
|
47
|
+
{ x: 3.4434, y: 4.1910 },
|
48
|
+
{ x: 1.8097, y: 5.0884 },
|
49
|
+
{ x: 7.0698, y: 3.9285 },
|
50
|
+
{ x: 9.3820, y: 7.6790 },
|
51
|
+
{ x: 8.6092, y: 0.9651 },
|
52
|
+
{ x: 9.1981, y: 7.7493 }
|
53
|
+
]
|
54
|
+
|
55
|
+
clusters = KMeansPP.clusters(data, 3) do |point|
|
56
|
+
[point[:x], point[:y]]
|
57
|
+
end
|
58
|
+
|
59
|
+
expect(clusters.size).to eq(3)
|
60
|
+
expect(clusters.first.points.first).to be_a(Hash)
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
48.2641334571,86.4516903905
|
2
|
+
0.114004262656,35.8368597414
|
3
|
+
97.4319168245,92.8009240744
|
4
|
+
24.4614031388,18.3292584382
|
5
|
+
36.2367675367,32.8294024271
|
6
|
+
75.5836860736,68.30729977
|
7
|
+
38.6577034445,25.7701728584
|
8
|
+
28.2607136287,64.4493377817
|
9
|
+
61.5358486771,61.2195232194
|
10
|
+
1.52352224798,38.5083779618
|
11
|
+
11.6392182793,68.2369021579
|
12
|
+
53.9486870607,53.9136556533
|
13
|
+
14.6671651772,26.0132534731
|
14
|
+
65.9506725878,82.5639317581
|
15
|
+
58.3682872339,51.6414580337
|
16
|
+
12.6918921252,2.28888447759
|
17
|
+
31.7587852231,18.1368234166
|
18
|
+
63.6631115204,24.933301389
|
19
|
+
29.1652289905,34.456759171
|
20
|
+
44.3830953085,70.4813875779
|
21
|
+
47.0571691145,65.3507625811
|
22
|
+
74.0584537502,98.2271944247
|
23
|
+
55.8929146157,86.6196265477
|
24
|
+
20.4744253473,12.0025149302
|
25
|
+
14.2867767281,40.2850440995
|
26
|
+
40.43551369,94.5410407116
|
27
|
+
87.6178871195,12.4700151639
|
28
|
+
47.2703048197,93.0636237124
|
29
|
+
59.7895104175,69.2621288413
|
30
|
+
80.8612333922,42.9183411179
|
31
|
+
31.1271795535,55.6669044656
|
32
|
+
78.9671049353,65.833739365
|
33
|
+
39.8324533414,63.0343115139
|
34
|
+
79.126343548,14.9128874133
|
35
|
+
65.8152400306,77.5202358013
|
36
|
+
75.2762752704,42.4858435609
|
37
|
+
29.6475948493,61.2068411763
|
38
|
+
67.421857106,54.8955604259
|
39
|
+
10.4652931501,29.7954139372
|
40
|
+
32.0272462745,99.5422900971
|
41
|
+
80.1520927001,84.2710379142
|
42
|
+
2.27240208403,41.2138854089
|
43
|
+
44.4601509555,1.72563901513
|
44
|
+
16.8676021068,35.3415636277
|
45
|
+
58.1977544121,29.2752085455
|
46
|
+
24.6119080085,39.9440735137
|
47
|
+
63.0759798755,60.9841014448
|
48
|
+
30.9289119657,95.0173219502
|
49
|
+
8.54972950047,41.7384441737
|
50
|
+
61.2606910793,4.06738902059
|
51
|
+
83.2302091964,11.6373312879
|
52
|
+
89.4443065362,42.5694882801
|
53
|
+
24.5619318152,97.7947977804
|
54
|
+
50.3134024475,40.6429336223
|
55
|
+
58.1422402033,36.1112632557
|
56
|
+
32.0668520827,29.9924151435
|
57
|
+
89.6057447137,84.9532177777
|
58
|
+
9.8876440816,18.2540486261
|
59
|
+
17.9670383961,47.596032257
|
60
|
+
50.2977668282,93.6851189223
|
61
|
+
98.0700386253,86.5816924579
|
62
|
+
10.8175290981,26.4344732252
|
63
|
+
34.7463851288,24.4154447141
|
64
|
+
92.5470100593,17.3595513748
|
65
|
+
79.0426629356,4.59850018907
|
66
|
+
89.9791366918,29.523946842
|
67
|
+
3.89920214563,91.3650215111
|
68
|
+
35.4669861576,62.1865368798
|
69
|
+
2.78150918086,24.5280230552
|
70
|
+
50.0390951889,57.0414421682
|
71
|
+
64.4521660758,48.4962172448
|
72
|
+
94.4915452316,56.6508179406
|
73
|
+
47.1655534769,15.8292055671
|
74
|
+
94.2027011374,45.6802385454
|
75
|
+
30.5846324871,54.783635876
|
76
|
+
57.7043252948,0.286661610381
|
77
|
+
41.7908674949,14.7206014023
|
78
|
+
59.6689465934,64.8849831965
|
79
|
+
92.2553335495,55.9096460272
|
80
|
+
48.493467262,69.4766837809
|
81
|
+
23.1837859581,71.4406867443
|
82
|
+
29.0737623652,66.9391416961
|
83
|
+
95.7442323112,89.4677505059
|
84
|
+
68.7707275828,40.9900140055
|
85
|
+
84.5445737133,32.1707309618
|
86
|
+
67.4126251988,56.6710579117
|
87
|
+
10.688352016,28.1745892928
|
88
|
+
56.7620324155,18.3034334207
|
89
|
+
50.6751320678,86.6916908032
|
90
|
+
74.6185482896,34.022483532
|
91
|
+
20.7011996002,32.855295357
|
92
|
+
11.479054664,1.59204297586
|
93
|
+
51.6805387648,25.4063026358
|
94
|
+
84.4109522357,47.237632645
|
95
|
+
90.6395051745,57.7917166935
|
96
|
+
58.6159601042,84.1226173848
|
97
|
+
46.2184509277,28.559934585
|
98
|
+
97.0302485783,41.3135022812
|
99
|
+
31.3144587058,87.2459910122
|
100
|
+
5.93357833962,95.6812831872
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,211 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: k_means_pp
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Oldrich Vetesnik
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-10-03 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.1'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.1'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: simplecov
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.9'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.9'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: yard
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0.8'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0.8'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '10.3'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '10.3'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rubocop
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0.26'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0.26'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: pry
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0.10'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0.10'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: pry-byebug
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '2.0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '2.0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: ruby-prof
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0.15'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0.15'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: gnuplot
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '2.6'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '2.6'
|
153
|
+
description: 'This is a Ruby implementation of the k-means++ algorithm for data clustering.
|
154
|
+
In other words: Grouping a bunch of X, Y points into K groups.'
|
155
|
+
email:
|
156
|
+
- oldrich.vetesnik@gmail.com
|
157
|
+
executables: []
|
158
|
+
extensions: []
|
159
|
+
extra_rdoc_files: []
|
160
|
+
files:
|
161
|
+
- ".gitignore"
|
162
|
+
- ".rspec"
|
163
|
+
- ".yardopts"
|
164
|
+
- Gemfile
|
165
|
+
- LICENSE.txt
|
166
|
+
- README.md
|
167
|
+
- Rakefile
|
168
|
+
- examples/common.rb
|
169
|
+
- examples/example_block.rb
|
170
|
+
- examples/example_csv.rb
|
171
|
+
- examples/example_debug.rb
|
172
|
+
- examples/example_huge.rb
|
173
|
+
- examples/example_simple.rb
|
174
|
+
- examples/points.csv
|
175
|
+
- k_means_pp.gemspec
|
176
|
+
- lib/k_means_pp.rb
|
177
|
+
- lib/k_means_pp/cluster.rb
|
178
|
+
- lib/k_means_pp/point.rb
|
179
|
+
- lib/k_means_pp/version.rb
|
180
|
+
- spec/lib/k_means_pp_spec.rb
|
181
|
+
- spec/resources/points.csv
|
182
|
+
- spec/spec_helper.rb
|
183
|
+
homepage: https://github.com/ollie/k_means_pp
|
184
|
+
licenses:
|
185
|
+
- MIT
|
186
|
+
metadata: {}
|
187
|
+
post_install_message:
|
188
|
+
rdoc_options: []
|
189
|
+
require_paths:
|
190
|
+
- lib
|
191
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
192
|
+
requirements:
|
193
|
+
- - ">="
|
194
|
+
- !ruby/object:Gem::Version
|
195
|
+
version: '0'
|
196
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
197
|
+
requirements:
|
198
|
+
- - ">="
|
199
|
+
- !ruby/object:Gem::Version
|
200
|
+
version: '0'
|
201
|
+
requirements: []
|
202
|
+
rubyforge_project:
|
203
|
+
rubygems_version: 2.4.1
|
204
|
+
signing_key:
|
205
|
+
specification_version: 4
|
206
|
+
summary: K-means++ Algorithm Implementation.
|
207
|
+
test_files:
|
208
|
+
- spec/lib/k_means_pp_spec.rb
|
209
|
+
- spec/resources/points.csv
|
210
|
+
- spec/spec_helper.rb
|
211
|
+
has_rdoc:
|