knnball 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +19 -0
- data/README.md +76 -0
- data/Rakefile +6 -0
- data/knnball.gemspec +42 -0
- data/lib/knnball/ball.rb +114 -0
- data/lib/knnball/kdtree.rb +90 -0
- data/lib/knnball/stat.rb +60 -0
- data/lib/knnball.rb +79 -0
- data/test/specs/ball_spec.rb +101 -0
- data/test/specs/data.json +1 -0
- data/test/specs/kdtree_spec.rb +68 -0
- data/test/specs/knnball_spec.rb +116 -0
- data/test/units/stat_test.rb +57 -0
- metadata +62 -0
data/LICENSE
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (c) 2011 Olivier Amblet
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
5
|
+
in the Software without restriction, including without limitation the rights
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
8
|
+
furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in
|
11
|
+
all copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
KnnBall Instruction
|
2
|
+
===================
|
3
|
+
|
4
|
+
KnnBall is a Ruby library that implements *Querying neareast neighbor algorithm*.
|
5
|
+
This algorithm optimize the search of the nearest point given another point as input.
|
6
|
+
|
7
|
+
It works with any number of dimension but written seems accord on the point
|
8
|
+
that with more than 10 dimensions, brute force approach might give better results.
|
9
|
+
|
10
|
+
In this library, each point is associated to a value,
|
11
|
+
this way the library acts as an index for multidimensional data like
|
12
|
+
geolocation for example.
|
13
|
+
|
14
|
+
|
15
|
+
Usage
|
16
|
+
-----
|
17
|
+
|
18
|
+
require 'knnball'
|
19
|
+
|
20
|
+
data = [
|
21
|
+
{:id => 1, :coord => [6.3299934, 52.32444]},
|
22
|
+
{:id => 2, :coord => [3.34444, 53.23259]},
|
23
|
+
{:id => 3, :coord => [4.22452, 53.243982]},
|
24
|
+
{:id => 4, :coord => [4.2333424, 51.239994]},
|
25
|
+
# ...
|
26
|
+
]
|
27
|
+
|
28
|
+
index = KnnBall.build(data)
|
29
|
+
|
30
|
+
result = index.nearest([3.43353, 52.34355])
|
31
|
+
puts result # --> {:id=>2, :coord=>[3.34444, 53.23259]}
|
32
|
+
|
33
|
+
Some notes about the above:
|
34
|
+
|
35
|
+
*data* must is given using an array of hashes.
|
36
|
+
The only requirement of an Hash instance is
|
37
|
+
to have a :coord keys containing an array of coordinate.
|
38
|
+
in the documentation one of this Hash instance will be
|
39
|
+
called a *value* and the array of coordinates a *point*.
|
40
|
+
Sticking to built-in data-type will allow you to easily
|
41
|
+
use this tree without having to deal with homemade classes,
|
42
|
+
you might avoid a lot of conversion code this way. In the example
|
43
|
+
above, we added an :id key but you are not limited to that, you can
|
44
|
+
use any keys you want beyond the coord key. Keep in mind that the more
|
45
|
+
you put in this Hash, the more memory you will consume.
|
46
|
+
|
47
|
+
*index* is an instance of KnnBall::KDTree. The library rely on a k-dimensions
|
48
|
+
tree to store and retrieve the values. The nodes of the KDTree are Ball instance,
|
49
|
+
whoose class name refer to the theory of having ball containing smaller ball and so
|
50
|
+
on. In practice, this class does not behave like a ball, but by metaphore, it may help.
|
51
|
+
|
52
|
+
*KDTree#nearest* retrieve the nearest *value* of the given *point*.
|
53
|
+
|
54
|
+
|
55
|
+
Roadmap
|
56
|
+
-------
|
57
|
+
|
58
|
+
* Retrieve the k-nearest neighbors of a point instead of just one.
|
59
|
+
* Export and load using JSON
|
60
|
+
* Support the addition of new values
|
61
|
+
* Rebuild the tree
|
62
|
+
|
63
|
+
|
64
|
+
References
|
65
|
+
----------
|
66
|
+
|
67
|
+
This code was written with the help of the following ressources:
|
68
|
+
|
69
|
+
* Alorithms In a Nutshell ; George T. Heinemann, Gary Pollice & Stanley Selkow ; O'Reilly (chapter 4 and 9)
|
70
|
+
* Python SciPy kdnn module: http://scikit-learn.sourceforge.net/modules/neighbors.html
|
71
|
+
* Five Balltree Construction Algorithms, by Stephen M. Omohundro, http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.91.8209&rep=rep1&type=pdf
|
72
|
+
|
73
|
+
Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
74
|
+
|
75
|
+
knnball is freely distributable under the terms of an MIT license.
|
76
|
+
See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
data/Rakefile
ADDED
data/knnball.gemspec
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.specification_version = 1 if s.respond_to? :specification_version=
|
3
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
4
|
+
s.rubygems_version = '1.3.5'
|
5
|
+
|
6
|
+
s.name = 'knnball'
|
7
|
+
s.version = '0.0.5'
|
8
|
+
s.date = '2011-05-23'
|
9
|
+
s.rubyforge_project = 'knnball'
|
10
|
+
|
11
|
+
s.summary = "K-Nearest Neighbor queries using a KDTree"
|
12
|
+
s.description = "Implements K-Nearest Neighbor algorithm using a KDTree in Ruby."
|
13
|
+
|
14
|
+
s.authors = ["Olivier Amblet"]
|
15
|
+
s.email = 'olivier@amblet.net'
|
16
|
+
s.homepage = 'http://github.com/oliamb/knnball'
|
17
|
+
|
18
|
+
s.require_paths = %w[lib]
|
19
|
+
|
20
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
21
|
+
s.extra_rdoc_files = %w[README.md LICENSE]
|
22
|
+
|
23
|
+
# = MANIFEST =
|
24
|
+
s.files = %w[
|
25
|
+
LICENSE
|
26
|
+
README.md
|
27
|
+
Rakefile
|
28
|
+
knnball.gemspec
|
29
|
+
lib/knnball.rb
|
30
|
+
lib/knnball/ball.rb
|
31
|
+
lib/knnball/stat.rb
|
32
|
+
lib/knnball/kdtree.rb
|
33
|
+
test/specs/ball_spec.rb
|
34
|
+
test/specs/data.json
|
35
|
+
test/specs/kdtree_spec.rb
|
36
|
+
test/specs/knnball_spec.rb
|
37
|
+
test/units/stat_test.rb
|
38
|
+
]
|
39
|
+
# = MANIFEST =
|
40
|
+
|
41
|
+
s.test_files = s.files.select { |path| path =~ /^test\/units\/\.*_test\.rb|test\/specs\/\.*_spec\.rb/ }
|
42
|
+
end
|
data/lib/knnball/ball.rb
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
4
|
+
#
|
5
|
+
# knnball is freely distributable under the terms of an MIT license.
|
6
|
+
# See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
module KnnBall
|
11
|
+
# This class represents a ball in the tree.
|
12
|
+
#
|
13
|
+
# The value of this ball will be its center
|
14
|
+
# while its radius is the distance between the center and
|
15
|
+
# the most far sub-ball.
|
16
|
+
class Ball
|
17
|
+
attr_accessor :left, :right, :value, :dimension
|
18
|
+
|
19
|
+
# @param value the value associated to this ball
|
20
|
+
# @param actual_dimension the dimension used for sorting left and right tree
|
21
|
+
def initialize(value, dimension = 1, left = nil, right = nil)
|
22
|
+
unless (value.respond_to?(:include?) && value.respond_to?(:[]))
|
23
|
+
raise ArgumentError.new("Value must at least respond to methods include? and [].")
|
24
|
+
end
|
25
|
+
unless (value.include?(:coord))
|
26
|
+
raise ArgumentError.new("value must contains :coord key but has only #{value.keys.inspect}")
|
27
|
+
end
|
28
|
+
@value = value
|
29
|
+
@right = right
|
30
|
+
@dimension = dimension
|
31
|
+
@left = left
|
32
|
+
end
|
33
|
+
|
34
|
+
def center
|
35
|
+
value[:coord]
|
36
|
+
end
|
37
|
+
|
38
|
+
def nearest(target, min)
|
39
|
+
result = nil
|
40
|
+
d = [distance(target), min[0]].min
|
41
|
+
if d < min[0]
|
42
|
+
min[0] = d
|
43
|
+
result = self
|
44
|
+
end
|
45
|
+
|
46
|
+
# determine if we need to dive into sub tree
|
47
|
+
dp = (center[dimension-1] - target[dimension-1]).abs
|
48
|
+
new_result = nil
|
49
|
+
if(dp < min[0])
|
50
|
+
# must dive into both left and right
|
51
|
+
unless(left.nil?)
|
52
|
+
new_result = left.nearest(target, min)
|
53
|
+
result = new_result unless new_result.nil?
|
54
|
+
end
|
55
|
+
unless right.nil?
|
56
|
+
new_result = right.nearest(target, min)
|
57
|
+
result = new_result unless new_result.nil?
|
58
|
+
end
|
59
|
+
else
|
60
|
+
# only need to dive in one
|
61
|
+
if(target[dimension-1] < center[dimension-1])
|
62
|
+
unless(left.nil?)
|
63
|
+
new_result = left.nearest(target, min)
|
64
|
+
end
|
65
|
+
else
|
66
|
+
unless(right.nil?)
|
67
|
+
new_result = right.nearest(target, min)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
result = new_result unless new_result.nil?
|
71
|
+
end
|
72
|
+
return result
|
73
|
+
end
|
74
|
+
|
75
|
+
# Compute euclidien distance.
|
76
|
+
#
|
77
|
+
# @param coordinates an array of coord or a Ball instance
|
78
|
+
def distance(coordinates)
|
79
|
+
coordinates = coordinates.center if coordinates.respond_to?(:center)
|
80
|
+
Math.sqrt([center, coordinates].transpose.map {|a,b| (b - a)**2}.reduce {|d1,d2| d1 + d2})
|
81
|
+
end
|
82
|
+
|
83
|
+
# Retrieve true if this is a leaf ball.
|
84
|
+
#
|
85
|
+
# A leaf ball has no sub_balls.
|
86
|
+
def leaf?
|
87
|
+
@left.nil? && @right.nil?
|
88
|
+
end
|
89
|
+
|
90
|
+
# Generate an Array from this Ball.
|
91
|
+
#
|
92
|
+
# index 0 contains the value object,
|
93
|
+
# index 1 contains the left ball or nil,
|
94
|
+
# index 2 contains the right ball or nil.
|
95
|
+
def to_a
|
96
|
+
if leaf?
|
97
|
+
[@value, nil, nil]
|
98
|
+
else
|
99
|
+
[@value, (@left.nil? ? nil : @left.to_a), (@right.nil? ? nil : @right.to_a)]
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Generate a Hash from this Ball instance.
|
104
|
+
#
|
105
|
+
# The generated instance contains keys :id, :left and :right
|
106
|
+
def to_h
|
107
|
+
if leaf?
|
108
|
+
{:value => @value, :left => nil, :right => nil}
|
109
|
+
else
|
110
|
+
{:value => @value, :left => (@left.nil? ? nil : @left.to_h), :right => (@right.nil? ? nil : @right.to_h)}
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
4
|
+
#
|
5
|
+
# knnball is freely distributable under the terms of an MIT license.
|
6
|
+
# See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
module KnnBall
|
11
|
+
|
12
|
+
# KD-Tree implementation
|
13
|
+
class KDTree
|
14
|
+
attr_accessor :root
|
15
|
+
|
16
|
+
def initialize(root = nil)
|
17
|
+
@root = root
|
18
|
+
end
|
19
|
+
|
20
|
+
def nearest(coord, &cmp_block)
|
21
|
+
return nil if root.nil?
|
22
|
+
return nil if coord.nil?
|
23
|
+
|
24
|
+
# Find the parent to which this coord should belongs to
|
25
|
+
# This will be our best first try
|
26
|
+
result = parent(coord)
|
27
|
+
smallest = result.distance(coord)
|
28
|
+
|
29
|
+
# Starting back from the root, we check all rectangle that
|
30
|
+
# might overlap the smallest one.
|
31
|
+
best = [smallest]
|
32
|
+
better_one = root.nearest(coord, best)
|
33
|
+
return (better_one || result).value
|
34
|
+
end
|
35
|
+
|
36
|
+
# Retrieve the parent to which this coord should belongs to
|
37
|
+
def parent(coord)
|
38
|
+
current = root
|
39
|
+
idx = current.dimension-1
|
40
|
+
result = nil
|
41
|
+
while(result.nil?)
|
42
|
+
if(coord[idx] <= current.center[idx])
|
43
|
+
if current.left.nil?
|
44
|
+
result = current
|
45
|
+
else
|
46
|
+
current = current.left
|
47
|
+
end
|
48
|
+
else
|
49
|
+
if current.right.nil?
|
50
|
+
result = current
|
51
|
+
else
|
52
|
+
current = current.right
|
53
|
+
end
|
54
|
+
end
|
55
|
+
idx = current.dimension-1
|
56
|
+
end
|
57
|
+
return result
|
58
|
+
end
|
59
|
+
|
60
|
+
def empty?
|
61
|
+
root.nil?
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_a
|
65
|
+
return root.to_a
|
66
|
+
end
|
67
|
+
|
68
|
+
def each(&proc)
|
69
|
+
raise "tree is nil" if @root.nil?
|
70
|
+
each_ball(@root, &proc)
|
71
|
+
end
|
72
|
+
|
73
|
+
def map(&proc)
|
74
|
+
res = []
|
75
|
+
self.each {|b| res << yield(b) }
|
76
|
+
return res
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def each_ball(b, &proc)
|
82
|
+
return if b.nil?
|
83
|
+
|
84
|
+
yield(b)
|
85
|
+
each_ball(b.left, &proc) unless (b.left.nil?)
|
86
|
+
each_ball(b.right, &proc) unless (b.right.nil?)
|
87
|
+
return
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
data/lib/knnball/stat.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
4
|
+
#
|
5
|
+
# knnball is freely distributable under the terms of an MIT license.
|
6
|
+
# See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
module KnnBall
|
11
|
+
module Stat
|
12
|
+
|
13
|
+
# sort an array with the goal of having the median at the middle.
|
14
|
+
#
|
15
|
+
# Values on the left will be lesser or equal to the median and
|
16
|
+
# values on the right higher than or equal to the median.
|
17
|
+
def self.median!(data, &cmp_block)
|
18
|
+
midx = median_index(data)
|
19
|
+
left = 0
|
20
|
+
right = data.size-1
|
21
|
+
pidx = pivot!(data, midx, 0, data.size-1, &cmp_block)
|
22
|
+
while pidx != midx do
|
23
|
+
if(pidx < midx)
|
24
|
+
left = pidx + 1
|
25
|
+
else
|
26
|
+
right = pidx - 1
|
27
|
+
end
|
28
|
+
pidx = pivot!(data, midx, left, right, &cmp_block)
|
29
|
+
end
|
30
|
+
return data[midx]
|
31
|
+
end
|
32
|
+
|
33
|
+
# @param data an array of data that will be changed in place
|
34
|
+
# @param pivot index of the pivot value in data
|
35
|
+
# @return the final index of the pivot
|
36
|
+
def self.pivot!(data, pivot, left = 0, right = data.size-1, &cmp_block)
|
37
|
+
value = data[pivot]
|
38
|
+
cmp_block = Proc.new {|a, b| a <=> b} if cmp_block.nil?
|
39
|
+
|
40
|
+
# push pivot value at the end of data
|
41
|
+
data[pivot], data[right] = data[right], data[pivot]
|
42
|
+
|
43
|
+
# swap position if current idx <= pivot value
|
44
|
+
for i in (left..right-1)
|
45
|
+
if(cmp_block.call(data[i], value) < 1)
|
46
|
+
data[left], data[i] = data[i], data[left]
|
47
|
+
left = left + 1
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# push the pivot value just after the last index
|
52
|
+
data[left], data[right] = data[right], data[left]
|
53
|
+
return left
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.median_index(data)
|
57
|
+
(data.size % 2 == 0) ? (data.size - 1)/ 2 : data.size / 2
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/knnball.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
4
|
+
#
|
5
|
+
# knnball is freely distributable under the terms of an MIT license.
|
6
|
+
# See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
# This module is used as namespace for every elements of
|
11
|
+
# the knnball library.
|
12
|
+
module KnnBall
|
13
|
+
|
14
|
+
autoload :Ball, 'knnball/ball'
|
15
|
+
autoload :Stat, 'knnball/stat'
|
16
|
+
autoload :KDTree, 'knnball/kdtree'
|
17
|
+
|
18
|
+
# Retrieve a new BallTree given an array of input values.
|
19
|
+
#
|
20
|
+
# Each data entry in the array is a Hash containing
|
21
|
+
# keys :value and :coord, an array of position (one per dimension)
|
22
|
+
# [ {:value => 1, :coord => [1.23, 2.34, -1.23, -22.3]},
|
23
|
+
# {:value => 2, :coord => [-2.33, 4.2, 1.23, 332.2]} ]
|
24
|
+
#
|
25
|
+
# @param data an array of Hash containing :value and :coord key
|
26
|
+
#
|
27
|
+
# @see KnnBall::KDTree#initialize
|
28
|
+
def self.build(data)
|
29
|
+
if(data.nil? || data.empty?)
|
30
|
+
raise ArgumentError.new("data argument must be a not empty Array")
|
31
|
+
end
|
32
|
+
max_dimension = data.first[:coord].size
|
33
|
+
kdtree = KDTree.new(max_dimension)
|
34
|
+
kdtree.root = generate(data, max_dimension)
|
35
|
+
return kdtree
|
36
|
+
end
|
37
|
+
|
38
|
+
# Generate the KD-Tree hyperrectangle.
|
39
|
+
#
|
40
|
+
# @param actual_dimension the dimension to base comparison on
|
41
|
+
# @param max_dimension the number of dimension of each points
|
42
|
+
# @param data the list of all points
|
43
|
+
# @param left the first data index to look for
|
44
|
+
# @param right the last data index to look for
|
45
|
+
def self.generate(data, max_dimension, actual_dimension = 1)
|
46
|
+
return nil if data.nil?
|
47
|
+
return Ball.new(data.first) if data.size == 1
|
48
|
+
|
49
|
+
# Order the array such as the middle point is the median and
|
50
|
+
# that every point on the left are of lesser value than median
|
51
|
+
# and that every point on the right are of greater value
|
52
|
+
# than the median. They are not more sorted than that.
|
53
|
+
median_idx = Stat.median_index(data)
|
54
|
+
value = Stat.median!(data) {|v1, v2| v1[:coord][actual_dimension-1] <=> v2[:coord][actual_dimension-1]}
|
55
|
+
ball = Ball.new(value)
|
56
|
+
|
57
|
+
actual_dimension = (max_dimension == actual_dimension ? 1 : actual_dimension)
|
58
|
+
|
59
|
+
ball.left = generate(data[0..(median_idx-1)], max_dimension, actual_dimension) if median_idx > 0
|
60
|
+
ball.right = generate(data[(median_idx+1)..-1], max_dimension, actual_dimension) if median_idx < (data.count)
|
61
|
+
return ball
|
62
|
+
end
|
63
|
+
|
64
|
+
# Retrieve an internal string representation of the index
|
65
|
+
# that can then be persisted.
|
66
|
+
def self.marshall(ball_tree)
|
67
|
+
return ""
|
68
|
+
end
|
69
|
+
|
70
|
+
# Retrieve a BallTree instance from a previously marshalled instance.
|
71
|
+
def self.unmarshall(marshalled_content)
|
72
|
+
return KDTree.new
|
73
|
+
end
|
74
|
+
|
75
|
+
# Retrieve the k nearest neighbor of the given position.
|
76
|
+
def self.find_knn(ball_tree, position, k = 1, options = Hash.new)
|
77
|
+
return []
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
4
|
+
#
|
5
|
+
# knnball is freely distributable under the terms of an MIT license.
|
6
|
+
# See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
require 'minitest/autorun'
|
11
|
+
require 'knnball'
|
12
|
+
|
13
|
+
|
14
|
+
module KnnBall
|
15
|
+
|
16
|
+
describe Ball do
|
17
|
+
|
18
|
+
describe "Leaf balls" do
|
19
|
+
before :each do
|
20
|
+
@value = {:id => 1, :coord => [1,2,3]}
|
21
|
+
@ball = Ball.new(@value)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "must be a leaf" do
|
25
|
+
@ball.leaf?.must_equal true
|
26
|
+
end
|
27
|
+
|
28
|
+
it "must have a center equals to the value location" do
|
29
|
+
@ball.center.must_equal @value[:coord]
|
30
|
+
end
|
31
|
+
|
32
|
+
it "must convert itself to an Array instance" do
|
33
|
+
@ball.to_a.must_equal [@value, nil, nil]
|
34
|
+
end
|
35
|
+
|
36
|
+
it "must convert itself to a Hash instance" do
|
37
|
+
@ball.to_h.must_equal({:value => @value, :left => nil, :right => nil})
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe "Standard Balls" do
|
42
|
+
before :each do
|
43
|
+
@value = {:id => 1, :coord => [1,2,3]}
|
44
|
+
@ball = Ball.new(@value, 1, Ball.new({:id => 3, :coord => [-1, -2, -3]}), Ball.new({:id => 2, :coord => [2, 3, 4]}))
|
45
|
+
end
|
46
|
+
|
47
|
+
it "wont be a leaf" do
|
48
|
+
@ball.leaf?.must_equal false
|
49
|
+
end
|
50
|
+
|
51
|
+
it "must_be_centered_at_the_ball_value_location" do
|
52
|
+
@ball.center.must_equal @value[:coord]
|
53
|
+
end
|
54
|
+
|
55
|
+
it "must convert itself to an Array instance" do
|
56
|
+
@ball.to_a.must_equal([
|
57
|
+
@value,
|
58
|
+
[{:id => 3, :coord => [-1, -2, -3]}, nil, nil],
|
59
|
+
[{:id => 2, :coord => [2, 3, 4]}, nil, nil]
|
60
|
+
])
|
61
|
+
end
|
62
|
+
|
63
|
+
it "must convert itself to a Hash instance" do
|
64
|
+
@ball.to_h.must_equal({:value => @value,
|
65
|
+
:left => {:value => {:id => 3, :coord => [-1, -2, -3]}, :left => nil, :right => nil},
|
66
|
+
:right => {:value => {:id => 2, :coord => [2, 3, 4]}, :left => nil, :right => nil}
|
67
|
+
})
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
describe "Ball with sub-balls" do
|
72
|
+
before :each do
|
73
|
+
@value = {:id => 1, :coord => [1,2,3]}
|
74
|
+
@leaf_1 = Ball.new(@value)
|
75
|
+
@leaf_2 = Ball.new({:id => 2, :coord => [2, 3, 4]})
|
76
|
+
@leaf_3 = Ball.new({:id => 3, :coord => [-1, -2 , -5]})
|
77
|
+
@leaf_4 = Ball.new({:id => 4, :coord => [-3, -2, -2]})
|
78
|
+
@sub_ball_1 = Ball.new({:id => 5, :coord => [1.4, 2, 2.5]}, 1, @leaf_1, @leaf_2)
|
79
|
+
@sub_ball_2 = Ball.new({:id => 6, :coord => [-2, -1.9, -3]}, 1, @leaf_3, @leaf_4)
|
80
|
+
@ball = Ball.new({:id => 7, :coord => [0, 0, 0]}, 1, @sub_ball_1, @sub_ball_2)
|
81
|
+
end
|
82
|
+
|
83
|
+
it "must be centered at (0,0,0)" do
|
84
|
+
@ball.center.must_equal([0,0,0])
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
describe "distance from private method" do
|
89
|
+
before :each do
|
90
|
+
# Make private method public (white box testing)
|
91
|
+
Ball.send(:public, *Ball.private_instance_methods)
|
92
|
+
end
|
93
|
+
|
94
|
+
it "retrieve the correct distance" do
|
95
|
+
b1 = Ball.new({:id => 2, :coord => [2, 3, 4]})
|
96
|
+
b2 = Ball.new({:id => 3, :coord => [-1, -2 , -5]})
|
97
|
+
b1.distance(b2).must_equal(Math.sqrt(115))
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
[{"id":"Dr Achermann Romeo","coord":[47.0495221,8.3079993]},{"id":"Dresse Achermann-Bieri Ursula","coord":[47.0525778,8.3052475]},{"id":"M. Ackermann Christian","coord":[46.9406862,7.3991925]},{"id":"Dr Ackermann Roland","coord":[47.3538085,7.9035746]},{"id":"Dr Adank-Sailer Gabrielle","coord":[47.1359809,7.2447156]},{"id":"Dr Aebersold Christian","coord":[47.1262539,7.2763632]},{"id":"Dresse Aebersold Gaby","coord":[47.1262539,7.2763632]},{"id":"M. Aellen Jean-Marc","coord":[46.1980464,6.1483353]},{"id":"M. Aerni Christian","coord":[46.18612,6.118075]},{"id":"Dr Akermann Felix","coord":[47.1697675,9.4766358]},{"id":"Dr Albrecht Silvia","coord":[47.1655638,7.5936764]},{"id":"M. Alder Ernst","coord":[47.6833333,8.75]},{"id":"Dr Althaus Marc-André","coord":[46.4810917,6.4571243]},{"id":"Dr Amati Francesca","coord":[46.5230847,6.640415]},{"id":"Anklin Bernard","coord":[47.3920158,8.5392456]},{"id":"Dr Bachelin Pierre","coord":[46.6668682,6.5107242]},{"id":"Dr Backes Hans-Ulrich","coord":[47.4238477,9.3686547]},{"id":"Badorff Cornel","coord":[47.4977665,8.7270143]},{"id":"Dr Bagutti Carlo","coord":[46.516054,6.608998]},{"id":"Dr Ballmer Peter Matthias","coord":[46.7617201,7.6279035]},{"id":"Dr Bandi-Ott Elisabeth","coord":[47.3839285,8.54832]},{"id":"Dr Barandun Jürg","coord":[47.3519079,8.5762373]},{"id":"Dr Barras Bernard","coord":[46.2270208,7.3533351]},{"id":"Dr Beck Thomas","coord":[46.7571286,7.6305141]},{"id":"Dr Bedat Bernard","coord":[46.2785274,6.1684023]},{"id":"M. Bekkering Anton","coord":[47.4226784,9.3184235]},{"id":"Dresse Benz Gabrielle","coord":[46.2304522,7.363867]},{"id":"M. Benz Martin","coord":[46.2304522,7.363867]},{"id":"M. Berchten Anthony","coord":[46.3873762,6.2208354]},{"id":"M. Berdoz Jean-Marc","coord":[46.2124628,6.131694]},{"id":"Mme Berdoz Nicole","coord":[46.2124628,6.131694]},{"id":"Dr Berger Hanspeter","coord":[46.684936,7.8499151]},{"id":"Dr Berghoff Ueli","coord":[47.2217408,8.6722444]},{"id":"Dr Bernasconi Cristiano","coord":[46.3601201,8.971514]},{"id":"Dr Bernhart Felix","coord":[46.9286035,7.447995]},{"id":"Dr Beuing Markus","coord":[46.4965828,9.8382621]},{"id":"Dr Beyeler Jürg","coord":[47.3805257,8.5428647]},{"id":"Dr Bianchi Michele","coord":[46.0056877,8.9407073]},{"id":"Dr Bickel Andreas","coord":[47.2287339,8.8268529]},{"id":"Dr Biedert Roland","coord":[47.1333363,7.2612937]}]
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
4
|
+
#
|
5
|
+
# knnball is freely distributable under the terms of an MIT license.
|
6
|
+
# See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
require 'minitest/autorun'
|
11
|
+
require 'knnball'
|
12
|
+
|
13
|
+
|
14
|
+
module KnnBall
|
15
|
+
|
16
|
+
describe KDTree do
|
17
|
+
|
18
|
+
describe "building the tree" do
|
19
|
+
it "must be an empty tree without params" do
|
20
|
+
KDTree.new.must_be_empty
|
21
|
+
end
|
22
|
+
|
23
|
+
it "wont be an empty tree with data" do
|
24
|
+
KDTree.new(Ball.new({:id => 1, :coord => [1]})).wont_be_empty
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "find the nearest ball" do
|
29
|
+
before :each do
|
30
|
+
root = Ball.new({:id => 4, :coord => [5]}, 1,
|
31
|
+
Ball.new({:id => 2, :coord => [2]}, 1, Ball.new({:id => 1, :coord => [1]}), Ball.new({:id => 3, :coord => [3]})),
|
32
|
+
Ball.new({:id => 6, :coord => [13]}, 1, Ball.new({:id => 5, :coord => [8]}),
|
33
|
+
Ball.new({:id => 7, :coord => [21]}, 1, Ball.new({:id => 8, :coord => [34]})))
|
34
|
+
)
|
35
|
+
@ball_tree = KDTree.new(root)
|
36
|
+
end
|
37
|
+
|
38
|
+
it "return a matching location" do
|
39
|
+
@ball_tree.nearest([3])[:id].must_equal(3)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe "find the parent for coordinates" do
|
44
|
+
before :each do
|
45
|
+
@root = Ball.new({:id => 4, :coord => [5, 7]}, 1,
|
46
|
+
Ball.new({:id => 2, :coord => [3, 4]}, 1, Ball.new({:id => 1, :coord => [2, 2]}), Ball.new({:id => 3, :coord => [4, 8]})),
|
47
|
+
Ball.new({:id => 6, :coord => [13, 4]}, 1, Ball.new({:id => 5, :coord => [8, 1]}),
|
48
|
+
Ball.new({:id => 7, :coord => [21, 6]}, 1, Ball.new({:id => 8, :coord => [34, 5]})))
|
49
|
+
)
|
50
|
+
@ball_tree = KDTree.new(@root)
|
51
|
+
end
|
52
|
+
|
53
|
+
it "return the nearest parent" do
|
54
|
+
@ball_tree.parent([13.2, 4.5]).value[:id].must_equal(8)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
describe "tree to array" do
|
59
|
+
it "Should return an empty array if empty" do
|
60
|
+
KDTree.new().to_a.must_equal []
|
61
|
+
end
|
62
|
+
|
63
|
+
it "Should return a tree array if not nil" do
|
64
|
+
KDTree.new(Ball.new({:id => 1, :coord => [1, 2, 3]})).to_a.must_equal [{:id => 1, :coord => [1,2,3]}, nil, nil]
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
4
|
+
#
|
5
|
+
# knnball is freely distributable under the terms of an MIT license.
|
6
|
+
# See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
require 'minitest/autorun'
|
11
|
+
require 'knnball'
|
12
|
+
require 'json'
|
13
|
+
|
14
|
+
describe KnnBall do
|
15
|
+
before do
|
16
|
+
@ball_tree = MiniTest::Mock.new
|
17
|
+
end
|
18
|
+
|
19
|
+
describe "when asked to build the tree" do
|
20
|
+
it "must retrieve a KDTree instance" do
|
21
|
+
KnnBall.build([
|
22
|
+
{:id => 1, :coord => [1.0,1.0]},
|
23
|
+
{:id => 2, :coord => [2.0, 3.0]}
|
24
|
+
]).must_be :kind_of?, KnnBall::KDTree
|
25
|
+
end
|
26
|
+
|
27
|
+
it "must build a one dimension tree correctly" do
|
28
|
+
tree = KnnBall.build(
|
29
|
+
[{:id => 2, :coord => [2]},
|
30
|
+
{:id => 3, :coord => [3]},
|
31
|
+
{:id => 1, :coord => [1]}]
|
32
|
+
)
|
33
|
+
tree.root.value.must_equal({:id => 2, :coord => [2]})
|
34
|
+
tree.root.left.value.must_equal({:id => 1, :coord => [1]})
|
35
|
+
tree.root.left.left.must_be_nil
|
36
|
+
tree.root.left.right.must_be_nil
|
37
|
+
tree.root.right.wont_be_nil
|
38
|
+
tree.root.right.value.must_equal({:id => 3, :coord => [3]})
|
39
|
+
|
40
|
+
KnnBall.build([
|
41
|
+
{:id => 1, :coord => [1]},
|
42
|
+
{:id => 2, :coord => [2]},
|
43
|
+
{:id => 3, :coord => [3]},
|
44
|
+
{:id => 4, :coord => [5]},
|
45
|
+
{:id => 5, :coord => [8]},
|
46
|
+
{:id => 6, :coord => [13]},
|
47
|
+
{:id => 7, :coord => [21]},
|
48
|
+
{:id => 8, :coord => [34]}
|
49
|
+
]).root.value.must_equal({:id => 4, :coord => [5]})
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "when asked to serialize the index" do
|
54
|
+
it "must retrieve a string" do
|
55
|
+
KnnBall.marshall(@ball_tree).must_be :kind_of?, String
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
describe "when asked to load an index" do
|
60
|
+
it "must retrieve a a BallTree instance" do
|
61
|
+
KnnBall.unmarshall("").must_be :kind_of?, KnnBall::KDTree
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
describe "when asked to find the neareast location" do
|
66
|
+
it "retrieve the nearest location" do
|
67
|
+
result = KnnBall.find_knn(@ball_tree, [1, 1, 1, 1])
|
68
|
+
result.must_be :kind_of?, Array
|
69
|
+
end
|
70
|
+
|
71
|
+
it "retrieve the same results as a brute force approach" do
|
72
|
+
json = File.open(File.join(File.dirname(__FILE__), 'data.json'), 'r:utf-8').read
|
73
|
+
data = JSON.parse(json)
|
74
|
+
data = data.map do |l|
|
75
|
+
h = {}
|
76
|
+
l.each {|k,v| h[k.to_sym] = v}
|
77
|
+
h
|
78
|
+
end
|
79
|
+
|
80
|
+
tree = KnnBall.build(data)
|
81
|
+
errors = []
|
82
|
+
msgs = []
|
83
|
+
data.each do |p|
|
84
|
+
t0 = Time.now
|
85
|
+
res = data.map do |p2|
|
86
|
+
euc = Math.sqrt((p2[:coord][0] - p[:coord][0])**2.0 + (p2[:coord][1] - p[:coord][1])**2.0)
|
87
|
+
[p2, euc]
|
88
|
+
end
|
89
|
+
best = res.min {|a, b| a.last <=> b.last}
|
90
|
+
brute_force_result = best.first
|
91
|
+
t1 = Time.now
|
92
|
+
p[:coord].must_equal(brute_force_result[:coord])
|
93
|
+
t2 = Time.now
|
94
|
+
nn_result = tree.nearest(p[:coord])
|
95
|
+
t3 = Time.now
|
96
|
+
if(nn_result[:coord] != brute_force_result[:coord])
|
97
|
+
errors << [p, nn_result, brute_force_result]
|
98
|
+
end
|
99
|
+
if(t1-t0 < t3-t2)
|
100
|
+
msgs << "For #{p}, efficiency was before with bruteforce than with kdtree search."
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
msgs = errors.map do |e|
|
105
|
+
if(e[0] == e[1])
|
106
|
+
"For #{e[0]}, OK, but brute force retrieved #{e[2]}"
|
107
|
+
elsif(e[0] == e[2])
|
108
|
+
"For #{e[0]}, #{e[1]} retrieved instead of #{e[2]}"
|
109
|
+
else
|
110
|
+
"For #{e[0]}, both brute force and nn search are wrong"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
must_be_empty errors
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright (C) 2011 Olivier Amblet <http://olivier.amblet.net>
|
4
|
+
#
|
5
|
+
# knnball is freely distributable under the terms of an MIT license.
|
6
|
+
# See LICENSE or http://www.opensource.org/licenses/mit-license.php.
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
require 'minitest/autorun'
|
11
|
+
require 'knnball'
|
12
|
+
|
13
|
+
module KnnBall
|
14
|
+
class StatTest < MiniTest::Unit::TestCase
|
15
|
+
def test_median_index
|
16
|
+
assert_equal(1, Stat.median_index([1] * 3))
|
17
|
+
assert_equal(1, Stat.median_index([1] * 4))
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_pivot
|
21
|
+
data = [2,3,4,23,1,342,6,34, 2.3,4,-5,-1,2]
|
22
|
+
assert_equal(8, Stat.pivot!(data, 2), data.inspect)
|
23
|
+
assert_equal(4, data[8])
|
24
|
+
data[0..8].each {|v| assert v <= 4}
|
25
|
+
data[9..-1].each {|v| assert v > 4}
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_pivot_with_comparison_block
|
29
|
+
data = [2,3,4,23,1,342,6,34, 2.3,4,-5,-1,2]
|
30
|
+
assert_equal(5, Stat.pivot!(data, 2){|a,b| b<=>a}, data.inspect)
|
31
|
+
assert_equal(4, data[5])
|
32
|
+
data[0..5].each {|v| assert v >= 4}
|
33
|
+
data[6..-1].each {|v| assert v < 4}
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_median_with_sorted_value
|
37
|
+
data = [1, 2, 3, 4, 5, 6, 7, 8, 9]
|
38
|
+
assert_equal(5, Stat.median!(data), data.inspect)
|
39
|
+
assert_equal(5, data[4])
|
40
|
+
data[0..4].each {|v| assert v <= 5}
|
41
|
+
data[4..8].each {|v| assert v >= 5}
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_median_with_unsorted_values
|
45
|
+
data = [1, 5, 3, 2, 7, 3, 4, 5, 6]
|
46
|
+
assert_equal(4, Stat.median!(data), data.inspect)
|
47
|
+
assert_equal(4, data[4], data.inspect)
|
48
|
+
data[0..4].each {|v| assert v <= 4}
|
49
|
+
data[4..8].each {|v| assert v >= 4}
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_median_with_hash
|
53
|
+
data = [{:coord => [1]}, {:coord => [2]}, {:coord => [3]}]
|
54
|
+
assert_equal({:coord => [2]}, Stat.median!(data){|a,b| a[:coord] <=> b[:coord]}, data.inspect)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
metadata
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: knnball
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.5
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Olivier Amblet
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-05-23 00:00:00.000000000 +02:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
description: Implements K-Nearest Neighbor algorithm using a KDTree in Ruby.
|
16
|
+
email: olivier@amblet.net
|
17
|
+
executables: []
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files:
|
20
|
+
- README.md
|
21
|
+
- LICENSE
|
22
|
+
files:
|
23
|
+
- LICENSE
|
24
|
+
- README.md
|
25
|
+
- Rakefile
|
26
|
+
- knnball.gemspec
|
27
|
+
- lib/knnball.rb
|
28
|
+
- lib/knnball/ball.rb
|
29
|
+
- lib/knnball/stat.rb
|
30
|
+
- lib/knnball/kdtree.rb
|
31
|
+
- test/specs/ball_spec.rb
|
32
|
+
- test/specs/data.json
|
33
|
+
- test/specs/kdtree_spec.rb
|
34
|
+
- test/specs/knnball_spec.rb
|
35
|
+
- test/units/stat_test.rb
|
36
|
+
has_rdoc: true
|
37
|
+
homepage: http://github.com/oliamb/knnball
|
38
|
+
licenses: []
|
39
|
+
post_install_message:
|
40
|
+
rdoc_options:
|
41
|
+
- --charset=UTF-8
|
42
|
+
require_paths:
|
43
|
+
- lib
|
44
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
+
none: false
|
46
|
+
requirements:
|
47
|
+
- - ! '>='
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ! '>='
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
requirements: []
|
57
|
+
rubyforge_project: knnball
|
58
|
+
rubygems_version: 1.6.2
|
59
|
+
signing_key:
|
60
|
+
specification_version: 1
|
61
|
+
summary: K-Nearest Neighbor queries using a KDTree
|
62
|
+
test_files: []
|