bk 0.0.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -7
- data/README.md +58 -25
- data/lib/bk/version.rb +1 -1
- data/test/common.rb +4 -0
- data/test/test_building_tree.rb +1 -2
- data/test/test_import_and_export.rb +1 -2
- data/test/test_querying_tree.rb +2 -2
- metadata +9 -9
- data/test/test_all.rb +0 -3
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,57 +1,78 @@
|
|
1
1
|
# BK-Tree implementation in Ruby
|
2
2
|
|
3
|
-
|
3
|
+
BK-trees can be used to efficiently locate strings' best matches from within a
|
4
|
+
large set. If you don’t know what a BK-tree is, these links should provide a
|
5
|
+
good explanation and introduction.
|
4
6
|
|
5
7
|
* [Damn Cool Algorithms, Part 1: BK-Trees](http://blog.notdot.net/2007/4/Damn-Cool-Algorithms-Part-1-BK-Trees)
|
6
8
|
* [Fast Approximate String Matching in a Dictionary](http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.21.3317)
|
7
9
|
|
10
|
+
|
11
|
+
## Installation
|
12
|
+
|
13
|
+
BK is available as a Ruby gem:
|
14
|
+
|
15
|
+
gem install bk
|
16
|
+
|
8
17
|
## Usage
|
9
18
|
|
10
|
-
|
11
|
-
|
19
|
+
```ruby
|
20
|
+
require "bk"
|
21
|
+
tree = BK::Tree.new # Use the default Levenshtein distance algorithm
|
22
|
+
```
|
12
23
|
|
13
24
|
Add items to the tree:
|
14
25
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
26
|
+
```ruby
|
27
|
+
tree.add "cat"
|
28
|
+
tree.add "dog"
|
29
|
+
tree.add "monkey"
|
30
|
+
tree.add "donkey"
|
31
|
+
```
|
19
32
|
|
20
33
|
Find all items within distance 1 of ‘munkey’:
|
21
34
|
|
22
|
-
|
23
|
-
|
35
|
+
```ruby
|
36
|
+
tree.query("munkey", 1)
|
37
|
+
# => {"monkey"=>1}
|
38
|
+
```
|
24
39
|
|
25
40
|
Find all items within distance 2 of ‘munkey’:
|
26
41
|
|
27
|
-
|
28
|
-
|
42
|
+
```ruby
|
43
|
+
tree.query("munkey", 2)
|
44
|
+
# => {"donkey"=>2, "monkey"=>1}
|
45
|
+
```
|
29
46
|
|
30
47
|
You can specify a custom distance algorithm by passing an object that responds
|
31
48
|
to `call(a, b)` with a number:
|
32
49
|
|
33
|
-
|
34
|
-
|
35
|
-
|
50
|
+
```ruby
|
51
|
+
custom_algorithm = lambda{ |a, b|
|
52
|
+
Text::Levenshtein.distance(a, b)
|
53
|
+
}
|
36
54
|
|
37
|
-
|
55
|
+
# or, more tersely:
|
56
|
+
|
57
|
+
custom_algorithm = Text::Levenshtein.public_method(:distance)
|
58
|
+
|
59
|
+
tree = BK::Tree.new(custom_algorithm)
|
60
|
+
```
|
38
61
|
|
39
62
|
Note that the result *must* satisfy the
|
40
63
|
_triangle inequality_, i.e. _d(x,z) ≤ d(x,y) + d(y,z)_.
|
41
64
|
|
42
65
|
The precomputed tree can be exported to and reimported later from an IO-like object:
|
43
66
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
File.open("tree", "rb") do |f|
|
49
|
-
tree = BK::Tree.import(f)
|
50
|
-
end
|
67
|
+
```ruby
|
68
|
+
File.open("tree", "wb") do |f|
|
69
|
+
tree.export(f)
|
70
|
+
end
|
51
71
|
|
52
|
-
|
53
|
-
|
54
|
-
|
72
|
+
File.open("tree", "rb") do |f|
|
73
|
+
tree = BK::Tree.import(f)
|
74
|
+
end
|
75
|
+
```
|
55
76
|
|
56
77
|
## Performance
|
57
78
|
|
@@ -75,3 +96,15 @@ As the threshold increases, the benefit is reduced. At threshold 3:
|
|
75
96
|
|
76
97
|
* Memory usage: around 6 MB for a 20,000-word tree.
|
77
98
|
* Maximum tree depth is limited by the stack.
|
99
|
+
|
100
|
+
## Testing
|
101
|
+
|
102
|
+
rake test
|
103
|
+
|
104
|
+
...or, for specific tests:
|
105
|
+
|
106
|
+
ruby -Itest test/test_building_tree.rb
|
107
|
+
|
108
|
+
## Licensing
|
109
|
+
|
110
|
+
MIT (see COPYING.txt)
|
data/lib/bk/version.rb
CHANGED
data/test/common.rb
ADDED
data/test/test_building_tree.rb
CHANGED
data/test/test_querying_tree.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
|
2
|
-
require 'test/unit'
|
1
|
+
require 'common'
|
3
2
|
require 'bk'
|
4
3
|
|
5
4
|
class BKTreeQueryAccuracyTest < Test::Unit::TestCase
|
@@ -33,6 +32,7 @@ class BKTreeSearchSpaceTest < Test::Unit::TestCase
|
|
33
32
|
def initialize
|
34
33
|
@history = []
|
35
34
|
@counting = false
|
35
|
+
@recording = false
|
36
36
|
end
|
37
37
|
|
38
38
|
def call(a, b)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-07-04 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: text
|
16
|
-
requirement: &
|
16
|
+
requirement: &2152922040 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2152922040
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
|
-
name:
|
27
|
-
requirement: &
|
26
|
+
name: rake
|
27
|
+
requirement: &2152921620 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2152921620
|
36
36
|
description: Burkhard Keller Tree implementation in Ruby
|
37
37
|
email: pbattley@gmail.com
|
38
38
|
executables: []
|
@@ -47,7 +47,7 @@ files:
|
|
47
47
|
- lib/bk.rb
|
48
48
|
- samples/graph.rb
|
49
49
|
- samples/performance.rb
|
50
|
-
- test/
|
50
|
+
- test/common.rb
|
51
51
|
- test/test_building_tree.rb
|
52
52
|
- test/test_import_and_export.rb
|
53
53
|
- test/test_querying_tree.rb
|
@@ -76,7 +76,7 @@ signing_key:
|
|
76
76
|
specification_version: 3
|
77
77
|
summary: Burkhard Keller Tree implementation in Ruby
|
78
78
|
test_files:
|
79
|
-
- test/
|
79
|
+
- test/common.rb
|
80
80
|
- test/test_building_tree.rb
|
81
81
|
- test/test_import_and_export.rb
|
82
82
|
- test/test_querying_tree.rb
|
data/test/test_all.rb
DELETED