symspell 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -5
- data/Rakefile +2 -2
- data/lib/symspell.rb +18 -12
- data/symspell.gemspec +1 -1
- data/tests/symspell_test.rb +39 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c6abf1253f5e16d82ae589bc6e35a6f5d72bc36
|
4
|
+
data.tar.gz: 6c9344ec4d40597700099e119b423ccfb85d3582
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2f71318a0ded808acf0f3eac41d8b569edd908ec0b81f126421fcfa7d206fce6969bc47e565fcdc324e1b4fb02f492e482e52e36b608559cf4364760a1847baf
|
7
|
+
data.tar.gz: 62215df1ac0378e26fe131ed8c730ebf3ad235107858bebf37802704bb8f8dd8a88c0c15f9a15da02518e7034cdeaa34802c24d03b38e31d8dc834c86254c03f
|
data/README.md
CHANGED
@@ -11,7 +11,7 @@ This is a straight port of SymSpell from C# to Ruby. I've started moving things
|
|
11
11
|
|
12
12
|
Original source with inline comments and README is here: https://github.com/wolfgarbe/symspell.
|
13
13
|
|
14
|
-
I've changed very little from the original source (apart from removing the commandline interface) but please note it has
|
14
|
+
I've changed very little from the original source (apart from removing the commandline interface) but please note it has only some very basic end to end tests at this time.
|
15
15
|
|
16
16
|
|
17
17
|
## Usage
|
@@ -20,13 +20,19 @@ I've changed very little from the original source (apart from removing the comma
|
|
20
20
|
|
21
21
|
require 'symspell'
|
22
22
|
|
23
|
-
speller = SymSpell.new <EDIT_DISTANCE_MAX>
|
24
|
-
speller.create_dictionary(
|
25
|
-
speller.lookup
|
23
|
+
speller = SymSpell.new <EDIT_DISTANCE_MAX> <VERBOSE>
|
24
|
+
speller.create_dictionary %w(joe jo mark john peter mary andrew imogen)
|
25
|
+
speller.lookup 'jo'
|
26
26
|
|
27
|
-
|
27
|
+
### EDIT_DISTANCE_MAX
|
28
28
|
|
29
29
|
`EDIT_DISTANCE_MAX` is the number of operations needed to tranform one string into another.
|
30
30
|
|
31
31
|
For example the edit distance between **CA** and **ABC** is 2 because **CA** => **AC** => **ABC**. Edit distances of 2-5 are normal. Note, however, increasing EDIT_DISTANCE_MAX exponentially increases the combinations and therefore the time it takes to create the dictionary.
|
32
32
|
|
33
|
+
### VERBOSE
|
34
|
+
|
35
|
+
* 0 - Return the top suggestion
|
36
|
+
* 1 - Return the suggestions with the lowest edit distance
|
37
|
+
* 2 - Return all suggestions
|
38
|
+
|
data/Rakefile
CHANGED
data/lib/symspell.rb
CHANGED
@@ -4,8 +4,9 @@ require 'set'
|
|
4
4
|
class SymSpell
|
5
5
|
MAX_INT = 2**30 - 1
|
6
6
|
|
7
|
-
def initialize(edit_distance_max)
|
7
|
+
def initialize(edit_distance_max, verbose)
|
8
8
|
@edit_distance_max = edit_distance_max
|
9
|
+
@verbose = verbose
|
9
10
|
@maxlength = 0
|
10
11
|
@dictionary = {}
|
11
12
|
@wordlist = []
|
@@ -14,7 +15,7 @@ class SymSpell
|
|
14
15
|
def create_dictionary(corpus)
|
15
16
|
word_count = 0
|
16
17
|
|
17
|
-
|
18
|
+
corpus.each do |word|
|
18
19
|
word_count += 1 if create_dictionary_entry(word.strip)
|
19
20
|
end
|
20
21
|
end
|
@@ -35,7 +36,7 @@ class SymSpell
|
|
35
36
|
while (candidates.count > 0)
|
36
37
|
candidate = candidates.shift
|
37
38
|
|
38
|
-
return sort(suggestions) if
|
39
|
+
return sort(suggestions) if @verbose < 2 && suggestions.count > 0 && (input.size - candidate.size) > suggestions[0].distance
|
39
40
|
|
40
41
|
if valueo = @dictionary[candidate]
|
41
42
|
value = DictionaryItem.new
|
@@ -51,7 +52,7 @@ class SymSpell
|
|
51
52
|
si.count = value.count
|
52
53
|
si.distance = input.size - candidate.size
|
53
54
|
suggestions << si
|
54
|
-
return sort(suggestions) if input.size - candidate.size == 0
|
55
|
+
return sort(suggestions) if @verbose < 2 && input.size - candidate.size == 0
|
55
56
|
end
|
56
57
|
|
57
58
|
value2 = nil
|
@@ -85,12 +86,8 @@ class SymSpell
|
|
85
86
|
end
|
86
87
|
end
|
87
88
|
|
88
|
-
if suggestions.count > 0 && suggestions[0].distance > distance
|
89
|
-
|
90
|
-
end
|
91
|
-
if suggestions.count > 0 && distance > suggestions[0].distance
|
92
|
-
next
|
93
|
-
end
|
89
|
+
suggestions.clear if @verbose < 2 && suggestions.count > 0 && suggestions[0].distance > distance
|
90
|
+
next if @verbose < 2 && suggestions.count > 0 && distance > suggestions[0].distance
|
94
91
|
|
95
92
|
if (distance <= @edit_distance_max)
|
96
93
|
if value2 = @dictionary[suggestion]
|
@@ -200,9 +197,12 @@ class SymSpell
|
|
200
197
|
end
|
201
198
|
|
202
199
|
def add_lowest_distance(item, suggestion, suggestionint, delete)
|
203
|
-
if item.suggestions.count > 0 && @wordlist[item.suggestions[0]].size - delete.size > suggestion.size - delete.size
|
200
|
+
if @verbose < 2 && item.suggestions.count > 0 && @wordlist[item.suggestions[0]].size - delete.size > suggestion.size - delete.size
|
204
201
|
item.suggestions.clear
|
205
202
|
end
|
203
|
+
if @verbose == 2 || item.suggestions.size == 0 || (@wordlist[item.suggestions[0]].size - delete.size >= suggestion.size - delete.size)
|
204
|
+
item.suggestions << suggestionint
|
205
|
+
end
|
206
206
|
end
|
207
207
|
|
208
208
|
def edits(word, edit_distance, deletes)
|
@@ -221,7 +221,13 @@ class SymSpell
|
|
221
221
|
end
|
222
222
|
|
223
223
|
def sort(suggestions)
|
224
|
-
|
224
|
+
if @verbose < 2
|
225
|
+
suggestions.sort! {|x, y| -x.count <=> y.count}
|
226
|
+
else
|
227
|
+
suggestions.sort! {|x, y| (2 * x.distance <=> y.distance) - x.count <=> y.count}
|
228
|
+
end
|
229
|
+
|
230
|
+
@verbose == 0 ? suggestions[0..0] : suggestions
|
225
231
|
end
|
226
232
|
|
227
233
|
def damerau_levenshtein_distance(source, target)
|
data/symspell.gemspec
CHANGED
@@ -2,7 +2,7 @@ require 'base64'
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = 'symspell'
|
5
|
-
s.version = '0.0.
|
5
|
+
s.version = '0.0.2'
|
6
6
|
s.authors = 'Phil Thompson'
|
7
7
|
s.email = Base64.decode64("cGhpbEBlbGVjdHJpY3Zpc2lvbnMuY29t\n")
|
8
8
|
s.summary = 'Ruby port of the symetric spell checking algorithm'
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require_relative '../lib/symspell'
|
3
|
+
|
4
|
+
class SymSpellTest < Minitest::Test
|
5
|
+
def setup
|
6
|
+
@edit_distance_max = 2
|
7
|
+
@verbose = 0
|
8
|
+
end
|
9
|
+
|
10
|
+
def subject
|
11
|
+
@subject ||= SymSpell.new(@edit_distance_max, @verbose).tap do |subject|
|
12
|
+
words = %w(joe mark john peter mary andrew imogen)
|
13
|
+
subject.create_dictionary words
|
14
|
+
end
|
15
|
+
end
|
16
|
+
def test_lookup_correctly_spelled_word
|
17
|
+
assert_equal 'andrew', subject.lookup('andrew').first.term
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_lookup_misspelt_word
|
21
|
+
assert_equal 'andrew', subject.lookup('andre').first.term
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_lookup_fails_to_find_match
|
25
|
+
assert_equal nil, subject.lookup('amigon').first
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_lookup_finds_match_after_turning_up_edit_distance
|
29
|
+
@edit_distance_max = 3
|
30
|
+
assert_equal ['imogen'], subject.lookup('amigon').map(&:term)
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_lookup_returns_multiple_suggestions
|
34
|
+
@edit_distance_max = 2
|
35
|
+
@verbose = 2
|
36
|
+
assert_equal ['joe', 'john'], subject.lookup('jo').map(&:term)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: symspell
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Phil Thompson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-08-03 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: phil@electricvisions.com
|
@@ -20,6 +20,7 @@ files:
|
|
20
20
|
- Rakefile
|
21
21
|
- lib/symspell.rb
|
22
22
|
- symspell.gemspec
|
23
|
+
- tests/symspell_test.rb
|
23
24
|
homepage: https://github.com/PhilT/symspell
|
24
25
|
licenses: []
|
25
26
|
metadata: {}
|