knnball 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +19 -0
- data/README.md +76 -0
- data/Rakefile +6 -0
- data/knnball.gemspec +42 -0
- data/lib/knnball/ball.rb +114 -0
- data/lib/knnball/kdtree.rb +90 -0
- data/lib/knnball/stat.rb +60 -0
- data/lib/knnball.rb +79 -0
- data/test/specs/ball_spec.rb +101 -0
- data/test/specs/data.json +1 -0
- data/test/specs/kdtree_spec.rb +68 -0
- data/test/specs/knnball_spec.rb +116 -0
- data/test/units/stat_test.rb +57 -0
- metadata +62 -0
data/LICENSE
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (c) 2011 Olivier Amblet
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
5
|
+
in the Software without restriction, including without limitation the rights
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
8
|
+
furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in
|
11
|
+
all copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
KnnBall Instruction
|
2
|
+
===================
|
3
|
+
|
4
|
+
KnnBall is a Ruby library that implements *Querying neareast neighbor algorithm*.
|
5
|
+
This algorithm optimize the search of the nearest point given another point as input.
|
6
|
+
|
7
|
+
It works with any number of dimension but written seems accord on the point
|
8
|
+
that with more than 10 dimensions, brute force approach might give better results.
|
9
|
+
|
10
|
+
In this library, each point is associated to a value,
|
11
|
+
this way the library acts as an index for multidimensional data like
|
12
|
+
geolocation for example.
|
13
|
+
|
14
|
+
|
15
|
+
Usage
|
16
|
+
-----
|
17
|
+
|
18
|
+
require 'knnball'
|
19
|
+
|
20
|
+
data = [
|
21
|
+
{:id => 1, :coord => [6.3299934, 52.32444]},
|
22
|
+
{:id => 2, :coord => [3.34444, 53.23259]},
|
23
|
+
{:id => 3, :coord => [4.22452, 53.243982]},
|
24
|
+
{:id => 4, :coord => [4.2333424, 51.239994]},
|
25
|
+
# ...
|
26
|
+
]
|
27
|
+
|
28
|
+
index = KnnBall.build(data)
|
29
|
+
|
30
|
+
result = index.nearest([3.43353, 52.34355])
|
31
|
+
puts result # --> {:id=>2, :coord=>[3.34444, 53.23259]}
|
32
|
+
|
33
|
+
Some notes about the above:
|
34
|
+
|
35
|
+
*data* must is given using an array of hashes.
|
36
|
+
The only requirement of an Hash instance is
|
37
|
+
to have a :coord keys containing an array of coordinate.
|
38
|
+
in the documentation one of this Hash instance will be
|
39
|
+
called a *value* and the array of coordinates a *point*.
|
40
|
+
Sticking to built-in data-type will allow you to easily
|
41
|
+
use this tree without having to deal with homemade classes,
|
42
|
+
you might avoid a lot of conversion code this way. In the example
|
43
|
+
above, we added an :id key but you are not limited to that, you can
|
44
|
+
use any keys you want beyond the coord key. Keep in mind that the more
|
45
|
+
you put in this Hash, the more memory you will consume.
|
46
|
+
|
47
|
+
*index* is an instance of KnnBall::KDTree. The library rely on a k-dimensions
|
48
|
+
tree to store and retrieve the values. The nodes of the KDTree are Ball instance,
|
49
|
+
whoose class name refer to the theory of having ball containing smaller ball and so
|
50
|
+
on. In practice, this class does not behave like a ball, but by metaphore, it may help.
|
51
|
+
|
52
|
+
*KDTree#nearest* retrieve the nearest *value* of the given *point*.
|
53
|
+
|
54
|
+
|
55
|
+
Roadmap
|
56
|
+
-------
|
57
|
+
|
58
|
+
* Retrieve the k-nearest neighbors of a point instead of just one.
|
59
|
+
* Export and load using JSON
|
60
|
+
* Support the addition of new values
|
61
|
+
* Rebuild the tree
|
62
|
+
|
63
|
+
|
64
|
+
References
|
65
|
+
----------
|
66
|
+
|
67
|
+
This code was written with the help of the following ressources:
|
68
|
+
|
69
|
+
* Alorithms In a Nutshell ; George T. Heinemann, Gary Pollice & Stanley Selkow ; O'Reilly (chapter 4 and 9)
|
70
|
+
* Python SciPy kdnn module: http://scikit-learn.sourceforge.net/modules/neighbors.html
|
71
|
+
* Five Balltree Construction Algorithms, by Stephen M. Omohundro, http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.91.8209&rep=rep1&type=pdf
|
72
|
+
|
73
|
+
Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
74
|
+
|
75
|
+
knnball is freely distributable under the terms of an MIT license.
|
76
|
+
See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
data/Rakefile
ADDED
data/knnball.gemspec
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.specification_version = 1 if s.respond_to? :specification_version=
|
3
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
4
|
+
s.rubygems_version = '1.3.5'
|
5
|
+
|
6
|
+
s.name = 'knnball'
|
7
|
+
s.version = '0.0.5'
|
8
|
+
s.date = '2011-05-23'
|
9
|
+
s.rubyforge_project = 'knnball'
|
10
|
+
|
11
|
+
s.summary = "K-Nearest Neighbor queries using a KDTree"
|
12
|
+
s.description = "Implements K-Nearest Neighbor algorithm using a KDTree in Ruby."
|
13
|
+
|
14
|
+
s.authors = ["Olivier Amblet"]
|
15
|
+
s.email = 'olivier@amblet.net'
|
16
|
+
s.homepage = 'http://github.com/oliamb/knnball'
|
17
|
+
|
18
|
+
s.require_paths = %w[lib]
|
19
|
+
|
20
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
21
|
+
s.extra_rdoc_files = %w[README.md LICENSE]
|
22
|
+
|
23
|
+
# = MANIFEST =
|
24
|
+
s.files = %w[
|
25
|
+
LICENSE
|
26
|
+
README.md
|
27
|
+
Rakefile
|
28
|
+
knnball.gemspec
|
29
|
+
lib/knnball.rb
|
30
|
+
lib/knnball/ball.rb
|
31
|
+
lib/knnball/stat.rb
|
32
|
+
lib/knnball/kdtree.rb
|
33
|
+
test/specs/ball_spec.rb
|
34
|
+
test/specs/data.json
|
35
|
+
test/specs/kdtree_spec.rb
|
36
|
+
test/specs/knnball_spec.rb
|
37
|
+
test/units/stat_test.rb
|
38
|
+
]
|
39
|
+
# = MANIFEST =
|
40
|
+
|
41
|
+
s.test_files = s.files.select { |path| path =~ /^test\/units\/\.*_test\.rb|test\/specs\/\.*_spec\.rb/ }
|
42
|
+
end
|
data/lib/knnball/ball.rb
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
4
|
+
#
|
5
|
+
# knnball is freely distributable under the terms of an MIT license.
|
6
|
+
# See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
module KnnBall
|
11
|
+
# This class represents a ball in the tree.
|
12
|
+
#
|
13
|
+
# The value of this ball will be its center
|
14
|
+
# while its radius is the distance between the center and
|
15
|
+
# the most far sub-ball.
|
16
|
+
class Ball
|
17
|
+
attr_accessor :left, :right, :value, :dimension
|
18
|
+
|
19
|
+
# @param value the value associated to this ball
|
20
|
+
# @param actual_dimension the dimension used for sorting left and right tree
|
21
|
+
def initialize(value, dimension = 1, left = nil, right = nil)
|
22
|
+
unless (value.respond_to?(:include?) && value.respond_to?(:[]))
|
23
|
+
raise ArgumentError.new("Value must at least respond to methods include? and [].")
|
24
|
+
end
|
25
|
+
unless (value.include?(:coord))
|
26
|
+
raise ArgumentError.new("value must contains :coord key but has only #{value.keys.inspect}")
|
27
|
+
end
|
28
|
+
@value = value
|
29
|
+
@right = right
|
30
|
+
@dimension = dimension
|
31
|
+
@left = left
|
32
|
+
end
|
33
|
+
|
34
|
+
def center
|
35
|
+
value[:coord]
|
36
|
+
end
|
37
|
+
|
38
|
+
def nearest(target, min)
|
39
|
+
result = nil
|
40
|
+
d = [distance(target), min[0]].min
|
41
|
+
if d < min[0]
|
42
|
+
min[0] = d
|
43
|
+
result = self
|
44
|
+
end
|
45
|
+
|
46
|
+
# determine if we need to dive into sub tree
|
47
|
+
dp = (center[dimension-1] - target[dimension-1]).abs
|
48
|
+
new_result = nil
|
49
|
+
if(dp < min[0])
|
50
|
+
# must dive into both left and right
|
51
|
+
unless(left.nil?)
|
52
|
+
new_result = left.nearest(target, min)
|
53
|
+
result = new_result unless new_result.nil?
|
54
|
+
end
|
55
|
+
unless right.nil?
|
56
|
+
new_result = right.nearest(target, min)
|
57
|
+
result = new_result unless new_result.nil?
|
58
|
+
end
|
59
|
+
else
|
60
|
+
# only need to dive in one
|
61
|
+
if(target[dimension-1] < center[dimension-1])
|
62
|
+
unless(left.nil?)
|
63
|
+
new_result = left.nearest(target, min)
|
64
|
+
end
|
65
|
+
else
|
66
|
+
unless(right.nil?)
|
67
|
+
new_result = right.nearest(target, min)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
result = new_result unless new_result.nil?
|
71
|
+
end
|
72
|
+
return result
|
73
|
+
end
|
74
|
+
|
75
|
+
# Compute euclidien distance.
|
76
|
+
#
|
77
|
+
# @param coordinates an array of coord or a Ball instance
|
78
|
+
def distance(coordinates)
|
79
|
+
coordinates = coordinates.center if coordinates.respond_to?(:center)
|
80
|
+
Math.sqrt([center, coordinates].transpose.map {|a,b| (b - a)**2}.reduce {|d1,d2| d1 + d2})
|
81
|
+
end
|
82
|
+
|
83
|
+
# Retrieve true if this is a leaf ball.
|
84
|
+
#
|
85
|
+
# A leaf ball has no sub_balls.
|
86
|
+
def leaf?
|
87
|
+
@left.nil? && @right.nil?
|
88
|
+
end
|
89
|
+
|
90
|
+
# Generate an Array from this Ball.
|
91
|
+
#
|
92
|
+
# index 0 contains the value object,
|
93
|
+
# index 1 contains the left ball or nil,
|
94
|
+
# index 2 contains the right ball or nil.
|
95
|
+
def to_a
|
96
|
+
if leaf?
|
97
|
+
[@value, nil, nil]
|
98
|
+
else
|
99
|
+
[@value, (@left.nil? ? nil : @left.to_a), (@right.nil? ? nil : @right.to_a)]
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Generate a Hash from this Ball instance.
|
104
|
+
#
|
105
|
+
# The generated instance contains keys :id, :left and :right
|
106
|
+
def to_h
|
107
|
+
if leaf?
|
108
|
+
{:value => @value, :left => nil, :right => nil}
|
109
|
+
else
|
110
|
+
{:value => @value, :left => (@left.nil? ? nil : @left.to_h), :right => (@right.nil? ? nil : @right.to_h)}
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
4
|
+
#
|
5
|
+
# knnball is freely distributable under the terms of an MIT license.
|
6
|
+
# See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
module KnnBall
|
11
|
+
|
12
|
+
# KD-Tree implementation
|
13
|
+
class KDTree
|
14
|
+
attr_accessor :root
|
15
|
+
|
16
|
+
def initialize(root = nil)
|
17
|
+
@root = root
|
18
|
+
end
|
19
|
+
|
20
|
+
def nearest(coord, &cmp_block)
|
21
|
+
return nil if root.nil?
|
22
|
+
return nil if coord.nil?
|
23
|
+
|
24
|
+
# Find the parent to which this coord should belongs to
|
25
|
+
# This will be our best first try
|
26
|
+
result = parent(coord)
|
27
|
+
smallest = result.distance(coord)
|
28
|
+
|
29
|
+
# Starting back from the root, we check all rectangle that
|
30
|
+
# might overlap the smallest one.
|
31
|
+
best = [smallest]
|
32
|
+
better_one = root.nearest(coord, best)
|
33
|
+
return (better_one || result).value
|
34
|
+
end
|
35
|
+
|
36
|
+
# Retrieve the parent to which this coord should belongs to
|
37
|
+
def parent(coord)
|
38
|
+
current = root
|
39
|
+
idx = current.dimension-1
|
40
|
+
result = nil
|
41
|
+
while(result.nil?)
|
42
|
+
if(coord[idx] <= current.center[idx])
|
43
|
+
if current.left.nil?
|
44
|
+
result = current
|
45
|
+
else
|
46
|
+
current = current.left
|
47
|
+
end
|
48
|
+
else
|
49
|
+
if current.right.nil?
|
50
|
+
result = current
|
51
|
+
else
|
52
|
+
current = current.right
|
53
|
+
end
|
54
|
+
end
|
55
|
+
idx = current.dimension-1
|
56
|
+
end
|
57
|
+
return result
|
58
|
+
end
|
59
|
+
|
60
|
+
def empty?
|
61
|
+
root.nil?
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_a
|
65
|
+
return root.to_a
|
66
|
+
end
|
67
|
+
|
68
|
+
def each(&proc)
|
69
|
+
raise "tree is nil" if @root.nil?
|
70
|
+
each_ball(@root, &proc)
|
71
|
+
end
|
72
|
+
|
73
|
+
def map(&proc)
|
74
|
+
res = []
|
75
|
+
self.each {|b| res << yield(b) }
|
76
|
+
return res
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def each_ball(b, &proc)
|
82
|
+
return if b.nil?
|
83
|
+
|
84
|
+
yield(b)
|
85
|
+
each_ball(b.left, &proc) unless (b.left.nil?)
|
86
|
+
each_ball(b.right, &proc) unless (b.right.nil?)
|
87
|
+
return
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
data/lib/knnball/stat.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
4
|
+
#
|
5
|
+
# knnball is freely distributable under the terms of an MIT license.
|
6
|
+
# See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
module KnnBall
|
11
|
+
module Stat
|
12
|
+
|
13
|
+
# sort an array with the goal of having the median at the middle.
|
14
|
+
#
|
15
|
+
# Values on the left will be lesser or equal to the median and
|
16
|
+
# values on the right higher than or equal to the median.
|
17
|
+
def self.median!(data, &cmp_block)
|
18
|
+
midx = median_index(data)
|
19
|
+
left = 0
|
20
|
+
right = data.size-1
|
21
|
+
pidx = pivot!(data, midx, 0, data.size-1, &cmp_block)
|
22
|
+
while pidx != midx do
|
23
|
+
if(pidx < midx)
|
24
|
+
left = pidx + 1
|
25
|
+
else
|
26
|
+
right = pidx - 1
|
27
|
+
end
|
28
|
+
pidx = pivot!(data, midx, left, right, &cmp_block)
|
29
|
+
end
|
30
|
+
return data[midx]
|
31
|
+
end
|
32
|
+
|
33
|
+
# @param data an array of data that will be changed in place
|
34
|
+
# @param pivot index of the pivot value in data
|
35
|
+
# @return the final index of the pivot
|
36
|
+
def self.pivot!(data, pivot, left = 0, right = data.size-1, &cmp_block)
|
37
|
+
value = data[pivot]
|
38
|
+
cmp_block = Proc.new {|a, b| a <=> b} if cmp_block.nil?
|
39
|
+
|
40
|
+
# push pivot value at the end of data
|
41
|
+
data[pivot], data[right] = data[right], data[pivot]
|
42
|
+
|
43
|
+
# swap position if current idx <= pivot value
|
44
|
+
for i in (left..right-1)
|
45
|
+
if(cmp_block.call(data[i], value) < 1)
|
46
|
+
data[left], data[i] = data[i], data[left]
|
47
|
+
left = left + 1
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# push the pivot value just after the last index
|
52
|
+
data[left], data[right] = data[right], data[left]
|
53
|
+
return left
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.median_index(data)
|
57
|
+
(data.size % 2 == 0) ? (data.size - 1)/ 2 : data.size / 2
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/knnball.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
4
|
+
#
|
5
|
+
# knnball is freely distributable under the terms of an MIT license.
|
6
|
+
# See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
# This module is used as namespace for every elements of
|
11
|
+
# the knnball library.
|
12
|
+
module KnnBall
|
13
|
+
|
14
|
+
autoload :Ball, 'knnball/ball'
|
15
|
+
autoload :Stat, 'knnball/stat'
|
16
|
+
autoload :KDTree, 'knnball/kdtree'
|
17
|
+
|
18
|
+
# Retrieve a new BallTree given an array of input values.
|
19
|
+
#
|
20
|
+
# Each data entry in the array is a Hash containing
|
21
|
+
# keys :value and :coord, an array of position (one per dimension)
|
22
|
+
# [ {:value => 1, :coord => [1.23, 2.34, -1.23, -22.3]},
|
23
|
+
# {:value => 2, :coord => [-2.33, 4.2, 1.23, 332.2]} ]
|
24
|
+
#
|
25
|
+
# @param data an array of Hash containing :value and :coord key
|
26
|
+
#
|
27
|
+
# @see KnnBall::KDTree#initialize
|
28
|
+
def self.build(data)
|
29
|
+
if(data.nil? || data.empty?)
|
30
|
+
raise ArgumentError.new("data argument must be a not empty Array")
|
31
|
+
end
|
32
|
+
max_dimension = data.first[:coord].size
|
33
|
+
kdtree = KDTree.new(max_dimension)
|
34
|
+
kdtree.root = generate(data, max_dimension)
|
35
|
+
return kdtree
|
36
|
+
end
|
37
|
+
|
38
|
+
# Generate the KD-Tree hyperrectangle.
|
39
|
+
#
|
40
|
+
# @param actual_dimension the dimension to base comparison on
|
41
|
+
# @param max_dimension the number of dimension of each points
|
42
|
+
# @param data the list of all points
|
43
|
+
# @param left the first data index to look for
|
44
|
+
# @param right the last data index to look for
|
45
|
+
def self.generate(data, max_dimension, actual_dimension = 1)
|
46
|
+
return nil if data.nil?
|
47
|
+
return Ball.new(data.first) if data.size == 1
|
48
|
+
|
49
|
+
# Order the array such as the middle point is the median and
|
50
|
+
# that every point on the left are of lesser value than median
|
51
|
+
# and that every point on the right are of greater value
|
52
|
+
# than the median. They are not more sorted than that.
|
53
|
+
median_idx = Stat.median_index(data)
|
54
|
+
value = Stat.median!(data) {|v1, v2| v1[:coord][actual_dimension-1] <=> v2[:coord][actual_dimension-1]}
|
55
|
+
ball = Ball.new(value)
|
56
|
+
|
57
|
+
actual_dimension = (max_dimension == actual_dimension ? 1 : actual_dimension)
|
58
|
+
|
59
|
+
ball.left = generate(data[0..(median_idx-1)], max_dimension, actual_dimension) if median_idx > 0
|
60
|
+
ball.right = generate(data[(median_idx+1)..-1], max_dimension, actual_dimension) if median_idx < (data.count)
|
61
|
+
return ball
|
62
|
+
end
|
63
|
+
|
64
|
+
# Retrieve an internal string representation of the index
|
65
|
+
# that can then be persisted.
|
66
|
+
def self.marshall(ball_tree)
|
67
|
+
return ""
|
68
|
+
end
|
69
|
+
|
70
|
+
# Retrieve a BallTree instance from a previously marshalled instance.
|
71
|
+
def self.unmarshall(marshalled_content)
|
72
|
+
return KDTree.new
|
73
|
+
end
|
74
|
+
|
75
|
+
# Retrieve the k nearest neighbor of the given position.
|
76
|
+
def self.find_knn(ball_tree, position, k = 1, options = Hash.new)
|
77
|
+
return []
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
4
|
+
#
|
5
|
+
# knnball is freely distributable under the terms of an MIT license.
|
6
|
+
# See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
require 'minitest/autorun'
|
11
|
+
require 'knnball'
|
12
|
+
|
13
|
+
|
14
|
+
module KnnBall
|
15
|
+
|
16
|
+
describe Ball do
|
17
|
+
|
18
|
+
describe "Leaf balls" do
|
19
|
+
before :each do
|
20
|
+
@value = {:id => 1, :coord => [1,2,3]}
|
21
|
+
@ball = Ball.new(@value)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "must be a leaf" do
|
25
|
+
@ball.leaf?.must_equal true
|
26
|
+
end
|
27
|
+
|
28
|
+
it "must have a center equals to the value location" do
|
29
|
+
@ball.center.must_equal @value[:coord]
|
30
|
+
end
|
31
|
+
|
32
|
+
it "must convert itself to an Array instance" do
|
33
|
+
@ball.to_a.must_equal [@value, nil, nil]
|
34
|
+
end
|
35
|
+
|
36
|
+
it "must convert itself to a Hash instance" do
|
37
|
+
@ball.to_h.must_equal({:value => @value, :left => nil, :right => nil})
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe "Standard Balls" do
|
42
|
+
before :each do
|
43
|
+
@value = {:id => 1, :coord => [1,2,3]}
|
44
|
+
@ball = Ball.new(@value, 1, Ball.new({:id => 3, :coord => [-1, -2, -3]}), Ball.new({:id => 2, :coord => [2, 3, 4]}))
|
45
|
+
end
|
46
|
+
|
47
|
+
it "wont be a leaf" do
|
48
|
+
@ball.leaf?.must_equal false
|
49
|
+
end
|
50
|
+
|
51
|
+
it "must_be_centered_at_the_ball_value_location" do
|
52
|
+
@ball.center.must_equal @value[:coord]
|
53
|
+
end
|
54
|
+
|
55
|
+
it "must convert itself to an Array instance" do
|
56
|
+
@ball.to_a.must_equal([
|
57
|
+
@value,
|
58
|
+
[{:id => 3, :coord => [-1, -2, -3]}, nil, nil],
|
59
|
+
[{:id => 2, :coord => [2, 3, 4]}, nil, nil]
|
60
|
+
])
|
61
|
+
end
|
62
|
+
|
63
|
+
it "must convert itself to a Hash instance" do
|
64
|
+
@ball.to_h.must_equal({:value => @value,
|
65
|
+
:left => {:value => {:id => 3, :coord => [-1, -2, -3]}, :left => nil, :right => nil},
|
66
|
+
:right => {:value => {:id => 2, :coord => [2, 3, 4]}, :left => nil, :right => nil}
|
67
|
+
})
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
describe "Ball with sub-balls" do
|
72
|
+
before :each do
|
73
|
+
@value = {:id => 1, :coord => [1,2,3]}
|
74
|
+
@leaf_1 = Ball.new(@value)
|
75
|
+
@leaf_2 = Ball.new({:id => 2, :coord => [2, 3, 4]})
|
76
|
+
@leaf_3 = Ball.new({:id => 3, :coord => [-1, -2 , -5]})
|
77
|
+
@leaf_4 = Ball.new({:id => 4, :coord => [-3, -2, -2]})
|
78
|
+
@sub_ball_1 = Ball.new({:id => 5, :coord => [1.4, 2, 2.5]}, 1, @leaf_1, @leaf_2)
|
79
|
+
@sub_ball_2 = Ball.new({:id => 6, :coord => [-2, -1.9, -3]}, 1, @leaf_3, @leaf_4)
|
80
|
+
@ball = Ball.new({:id => 7, :coord => [0, 0, 0]}, 1, @sub_ball_1, @sub_ball_2)
|
81
|
+
end
|
82
|
+
|
83
|
+
it "must be centered at (0,0,0)" do
|
84
|
+
@ball.center.must_equal([0,0,0])
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
describe "distance from private method" do
|
89
|
+
before :each do
|
90
|
+
# Make private method public (white box testing)
|
91
|
+
Ball.send(:public, *Ball.private_instance_methods)
|
92
|
+
end
|
93
|
+
|
94
|
+
it "retrieve the correct distance" do
|
95
|
+
b1 = Ball.new({:id => 2, :coord => [2, 3, 4]})
|
96
|
+
b2 = Ball.new({:id => 3, :coord => [-1, -2 , -5]})
|
97
|
+
b1.distance(b2).must_equal(Math.sqrt(115))
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
[{"id":"Dr Achermann Romeo","coord":[47.0495221,8.3079993]},{"id":"Dresse Achermann-Bieri Ursula","coord":[47.0525778,8.3052475]},{"id":"M. Ackermann Christian","coord":[46.9406862,7.3991925]},{"id":"Dr Ackermann Roland","coord":[47.3538085,7.9035746]},{"id":"Dr Adank-Sailer Gabrielle","coord":[47.1359809,7.2447156]},{"id":"Dr Aebersold Christian","coord":[47.1262539,7.2763632]},{"id":"Dresse Aebersold Gaby","coord":[47.1262539,7.2763632]},{"id":"M. Aellen Jean-Marc","coord":[46.1980464,6.1483353]},{"id":"M. Aerni Christian","coord":[46.18612,6.118075]},{"id":"Dr Akermann Felix","coord":[47.1697675,9.4766358]},{"id":"Dr Albrecht Silvia","coord":[47.1655638,7.5936764]},{"id":"M. Alder Ernst","coord":[47.6833333,8.75]},{"id":"Dr Althaus Marc-André","coord":[46.4810917,6.4571243]},{"id":"Dr Amati Francesca","coord":[46.5230847,6.640415]},{"id":"Anklin Bernard","coord":[47.3920158,8.5392456]},{"id":"Dr Bachelin Pierre","coord":[46.6668682,6.5107242]},{"id":"Dr Backes Hans-Ulrich","coord":[47.4238477,9.3686547]},{"id":"Badorff Cornel","coord":[47.4977665,8.7270143]},{"id":"Dr Bagutti Carlo","coord":[46.516054,6.608998]},{"id":"Dr Ballmer Peter Matthias","coord":[46.7617201,7.6279035]},{"id":"Dr Bandi-Ott Elisabeth","coord":[47.3839285,8.54832]},{"id":"Dr Barandun Jürg","coord":[47.3519079,8.5762373]},{"id":"Dr Barras Bernard","coord":[46.2270208,7.3533351]},{"id":"Dr Beck Thomas","coord":[46.7571286,7.6305141]},{"id":"Dr Bedat Bernard","coord":[46.2785274,6.1684023]},{"id":"M. Bekkering Anton","coord":[47.4226784,9.3184235]},{"id":"Dresse Benz Gabrielle","coord":[46.2304522,7.363867]},{"id":"M. Benz Martin","coord":[46.2304522,7.363867]},{"id":"M. Berchten Anthony","coord":[46.3873762,6.2208354]},{"id":"M. Berdoz Jean-Marc","coord":[46.2124628,6.131694]},{"id":"Mme Berdoz Nicole","coord":[46.2124628,6.131694]},{"id":"Dr Berger Hanspeter","coord":[46.684936,7.8499151]},{"id":"Dr Berghoff Ueli","coord":[47.2217408,8.6722444]},{"id":"Dr Bernasconi Cristiano","coord":[46.3601201,8.971514]},{"id":"Dr Bernhart Felix","coord":[46.9286035,7.447995]},{"id":"Dr Beuing Markus","coord":[46.4965828,9.8382621]},{"id":"Dr Beyeler Jürg","coord":[47.3805257,8.5428647]},{"id":"Dr Bianchi Michele","coord":[46.0056877,8.9407073]},{"id":"Dr Bickel Andreas","coord":[47.2287339,8.8268529]},{"id":"Dr Biedert Roland","coord":[47.1333363,7.2612937]}]
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
4
|
+
#
|
5
|
+
# knnball is freely distributable under the terms of an MIT license.
|
6
|
+
# See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
require 'minitest/autorun'
|
11
|
+
require 'knnball'
|
12
|
+
|
13
|
+
|
14
|
+
module KnnBall
|
15
|
+
|
16
|
+
describe KDTree do
|
17
|
+
|
18
|
+
describe "building the tree" do
|
19
|
+
it "must be an empty tree without params" do
|
20
|
+
KDTree.new.must_be_empty
|
21
|
+
end
|
22
|
+
|
23
|
+
it "wont be an empty tree with data" do
|
24
|
+
KDTree.new(Ball.new({:id => 1, :coord => [1]})).wont_be_empty
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "find the nearest ball" do
|
29
|
+
before :each do
|
30
|
+
root = Ball.new({:id => 4, :coord => [5]}, 1,
|
31
|
+
Ball.new({:id => 2, :coord => [2]}, 1, Ball.new({:id => 1, :coord => [1]}), Ball.new({:id => 3, :coord => [3]})),
|
32
|
+
Ball.new({:id => 6, :coord => [13]}, 1, Ball.new({:id => 5, :coord => [8]}),
|
33
|
+
Ball.new({:id => 7, :coord => [21]}, 1, Ball.new({:id => 8, :coord => [34]})))
|
34
|
+
)
|
35
|
+
@ball_tree = KDTree.new(root)
|
36
|
+
end
|
37
|
+
|
38
|
+
it "return a matching location" do
|
39
|
+
@ball_tree.nearest([3])[:id].must_equal(3)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe "find the parent for coordinates" do
|
44
|
+
before :each do
|
45
|
+
@root = Ball.new({:id => 4, :coord => [5, 7]}, 1,
|
46
|
+
Ball.new({:id => 2, :coord => [3, 4]}, 1, Ball.new({:id => 1, :coord => [2, 2]}), Ball.new({:id => 3, :coord => [4, 8]})),
|
47
|
+
Ball.new({:id => 6, :coord => [13, 4]}, 1, Ball.new({:id => 5, :coord => [8, 1]}),
|
48
|
+
Ball.new({:id => 7, :coord => [21, 6]}, 1, Ball.new({:id => 8, :coord => [34, 5]})))
|
49
|
+
)
|
50
|
+
@ball_tree = KDTree.new(@root)
|
51
|
+
end
|
52
|
+
|
53
|
+
it "return the nearest parent" do
|
54
|
+
@ball_tree.parent([13.2, 4.5]).value[:id].must_equal(8)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
describe "tree to array" do
|
59
|
+
it "Should return an empty array if empty" do
|
60
|
+
KDTree.new().to_a.must_equal []
|
61
|
+
end
|
62
|
+
|
63
|
+
it "Should return a tree array if not nil" do
|
64
|
+
KDTree.new(Ball.new({:id => 1, :coord => [1, 2, 3]})).to_a.must_equal [{:id => 1, :coord => [1,2,3]}, nil, nil]
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
4
|
+
#
|
5
|
+
# knnball is freely distributable under the terms of an MIT license.
|
6
|
+
# See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
require 'minitest/autorun'
|
11
|
+
require 'knnball'
|
12
|
+
require 'json'
|
13
|
+
|
14
|
+
describe KnnBall do
|
15
|
+
before do
|
16
|
+
@ball_tree = MiniTest::Mock.new
|
17
|
+
end
|
18
|
+
|
19
|
+
describe "when asked to build the tree" do
|
20
|
+
it "must retrieve a KDTree instance" do
|
21
|
+
KnnBall.build([
|
22
|
+
{:id => 1, :coord => [1.0,1.0]},
|
23
|
+
{:id => 2, :coord => [2.0, 3.0]}
|
24
|
+
]).must_be :kind_of?, KnnBall::KDTree
|
25
|
+
end
|
26
|
+
|
27
|
+
it "must build a one dimension tree correctly" do
|
28
|
+
tree = KnnBall.build(
|
29
|
+
[{:id => 2, :coord => [2]},
|
30
|
+
{:id => 3, :coord => [3]},
|
31
|
+
{:id => 1, :coord => [1]}]
|
32
|
+
)
|
33
|
+
tree.root.value.must_equal({:id => 2, :coord => [2]})
|
34
|
+
tree.root.left.value.must_equal({:id => 1, :coord => [1]})
|
35
|
+
tree.root.left.left.must_be_nil
|
36
|
+
tree.root.left.right.must_be_nil
|
37
|
+
tree.root.right.wont_be_nil
|
38
|
+
tree.root.right.value.must_equal({:id => 3, :coord => [3]})
|
39
|
+
|
40
|
+
KnnBall.build([
|
41
|
+
{:id => 1, :coord => [1]},
|
42
|
+
{:id => 2, :coord => [2]},
|
43
|
+
{:id => 3, :coord => [3]},
|
44
|
+
{:id => 4, :coord => [5]},
|
45
|
+
{:id => 5, :coord => [8]},
|
46
|
+
{:id => 6, :coord => [13]},
|
47
|
+
{:id => 7, :coord => [21]},
|
48
|
+
{:id => 8, :coord => [34]}
|
49
|
+
]).root.value.must_equal({:id => 4, :coord => [5]})
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "when asked to serialize the index" do
|
54
|
+
it "must retrieve a string" do
|
55
|
+
KnnBall.marshall(@ball_tree).must_be :kind_of?, String
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
describe "when asked to load an index" do
|
60
|
+
it "must retrieve a a BallTree instance" do
|
61
|
+
KnnBall.unmarshall("").must_be :kind_of?, KnnBall::KDTree
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
describe "when asked to find the neareast location" do
|
66
|
+
it "retrieve the nearest location" do
|
67
|
+
result = KnnBall.find_knn(@ball_tree, [1, 1, 1, 1])
|
68
|
+
result.must_be :kind_of?, Array
|
69
|
+
end
|
70
|
+
|
71
|
+
it "retrieve the same results as a brute force approach" do
|
72
|
+
json = File.open(File.join(File.dirname(__FILE__), 'data.json'), 'r:utf-8').read
|
73
|
+
data = JSON.parse(json)
|
74
|
+
data = data.map do |l|
|
75
|
+
h = {}
|
76
|
+
l.each {|k,v| h[k.to_sym] = v}
|
77
|
+
h
|
78
|
+
end
|
79
|
+
|
80
|
+
tree = KnnBall.build(data)
|
81
|
+
errors = []
|
82
|
+
msgs = []
|
83
|
+
data.each do |p|
|
84
|
+
t0 = Time.now
|
85
|
+
res = data.map do |p2|
|
86
|
+
euc = Math.sqrt((p2[:coord][0] - p[:coord][0])**2.0 + (p2[:coord][1] - p[:coord][1])**2.0)
|
87
|
+
[p2, euc]
|
88
|
+
end
|
89
|
+
best = res.min {|a, b| a.last <=> b.last}
|
90
|
+
brute_force_result = best.first
|
91
|
+
t1 = Time.now
|
92
|
+
p[:coord].must_equal(brute_force_result[:coord])
|
93
|
+
t2 = Time.now
|
94
|
+
nn_result = tree.nearest(p[:coord])
|
95
|
+
t3 = Time.now
|
96
|
+
if(nn_result[:coord] != brute_force_result[:coord])
|
97
|
+
errors << [p, nn_result, brute_force_result]
|
98
|
+
end
|
99
|
+
if(t1-t0 < t3-t2)
|
100
|
+
msgs << "For #{p}, efficiency was before with bruteforce than with kdtree search."
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
msgs = errors.map do |e|
|
105
|
+
if(e[0] == e[1])
|
106
|
+
"For #{e[0]}, OK, but brute force retrieved #{e[2]}"
|
107
|
+
elsif(e[0] == e[2])
|
108
|
+
"For #{e[0]}, #{e[1]} retrieved instead of #{e[2]}"
|
109
|
+
else
|
110
|
+
"For #{e[0]}, both brute force and nn search are wrong"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
must_be_empty errors
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
4
|
+
#
|
5
|
+
# knnball is freely distributable under the terms of an MIT license.
|
6
|
+
# See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
require 'minitest/autorun'
|
11
|
+
require 'knnball'
|
12
|
+
|
13
|
+
module KnnBall
|
14
|
+
class StatTest < MiniTest::Unit::TestCase
|
15
|
+
def test_median_index
|
16
|
+
assert_equal(1, Stat.median_index([1] * 3))
|
17
|
+
assert_equal(1, Stat.median_index([1] * 4))
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_pivot
|
21
|
+
data = [2,3,4,23,1,342,6,34, 2.3,4,-5,-1,2]
|
22
|
+
assert_equal(8, Stat.pivot!(data, 2), data.inspect)
|
23
|
+
assert_equal(4, data[8])
|
24
|
+
data[0..8].each {|v| assert v <= 4}
|
25
|
+
data[9..-1].each {|v| assert v > 4}
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_pivot_with_comparison_block
|
29
|
+
data = [2,3,4,23,1,342,6,34, 2.3,4,-5,-1,2]
|
30
|
+
assert_equal(5, Stat.pivot!(data, 2){|a,b| b<=>a}, data.inspect)
|
31
|
+
assert_equal(4, data[5])
|
32
|
+
data[0..5].each {|v| assert v >= 4}
|
33
|
+
data[6..-1].each {|v| assert v < 4}
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_median_with_sorted_value
|
37
|
+
data = [1, 2, 3, 4, 5, 6, 7, 8, 9]
|
38
|
+
assert_equal(5, Stat.median!(data), data.inspect)
|
39
|
+
assert_equal(5, data[4])
|
40
|
+
data[0..4].each {|v| assert v <= 5}
|
41
|
+
data[4..8].each {|v| assert v >= 5}
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_median_with_unsorted_values
|
45
|
+
data = [1, 5, 3, 2, 7, 3, 4, 5, 6]
|
46
|
+
assert_equal(4, Stat.median!(data), data.inspect)
|
47
|
+
assert_equal(4, data[4], data.inspect)
|
48
|
+
data[0..4].each {|v| assert v <= 4}
|
49
|
+
data[4..8].each {|v| assert v >= 4}
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_median_with_hash
|
53
|
+
data = [{:coord => [1]}, {:coord => [2]}, {:coord => [3]}]
|
54
|
+
assert_equal({:coord => [2]}, Stat.median!(data){|a,b| a[:coord] <=> b[:coord]}, data.inspect)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
metadata
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: knnball
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.5
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Olivier Amblet
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-05-23 00:00:00.000000000 +02:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
description: Implements K-Nearest Neighbor algorithm using a KDTree in Ruby.
|
16
|
+
email: olivier@amblet.net
|
17
|
+
executables: []
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files:
|
20
|
+
- README.md
|
21
|
+
- LICENSE
|
22
|
+
files:
|
23
|
+
- LICENSE
|
24
|
+
- README.md
|
25
|
+
- Rakefile
|
26
|
+
- knnball.gemspec
|
27
|
+
- lib/knnball.rb
|
28
|
+
- lib/knnball/ball.rb
|
29
|
+
- lib/knnball/stat.rb
|
30
|
+
- lib/knnball/kdtree.rb
|
31
|
+
- test/specs/ball_spec.rb
|
32
|
+
- test/specs/data.json
|
33
|
+
- test/specs/kdtree_spec.rb
|
34
|
+
- test/specs/knnball_spec.rb
|
35
|
+
- test/units/stat_test.rb
|
36
|
+
has_rdoc: true
|
37
|
+
homepage: http://github.com/oliamb/knnball
|
38
|
+
licenses: []
|
39
|
+
post_install_message:
|
40
|
+
rdoc_options:
|
41
|
+
- --charset=UTF-8
|
42
|
+
require_paths:
|
43
|
+
- lib
|
44
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
+
none: false
|
46
|
+
requirements:
|
47
|
+
- - ! '>='
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ! '>='
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
requirements: []
|
57
|
+
rubyforge_project: knnball
|
58
|
+
rubygems_version: 1.6.2
|
59
|
+
signing_key:
|
60
|
+
specification_version: 1
|
61
|
+
summary: K-Nearest Neighbor queries using a KDTree
|
62
|
+
test_files: []
|