symspell 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1460d117d70b607f1e3cd2a82e1775239dcf6409
4
- data.tar.gz: 077eec846704a48345ffe305d4491ec85c40da57
3
+ metadata.gz: 4c6abf1253f5e16d82ae589bc6e35a6f5d72bc36
4
+ data.tar.gz: 6c9344ec4d40597700099e119b423ccfb85d3582
5
5
  SHA512:
6
- metadata.gz: d60beee5581b45443ed882fce50add34f6a4b26cf6792996156d2e14b396d31246708a9a30ded49baf7f0f35b28e877fdd4409bbff7020b4006f15b97c84e372
7
- data.tar.gz: 89497ab96606792069f18ca01bebdaf8e4aaeb68cc555053b5e0352a3656ef70faf6443803806630d73926a1abda41d74ed640366ffbe469f3cd12704bfcb541
6
+ metadata.gz: 2f71318a0ded808acf0f3eac41d8b569edd908ec0b81f126421fcfa7d206fce6969bc47e565fcdc324e1b4fb02f492e482e52e36b608559cf4364760a1847baf
7
+ data.tar.gz: 62215df1ac0378e26fe131ed8c730ebf3ad235107858bebf37802704bb8f8dd8a88c0c15f9a15da02518e7034cdeaa34802c24d03b38e31d8dc834c86254c03f
data/README.md CHANGED
@@ -11,7 +11,7 @@ This is a straight port of SymSpell from C# to Ruby. I've started moving things
11
11
 
12
12
  Original source with inline comments and README is here: https://github.com/wolfgarbe/symspell.
13
13
 
14
- I've changed very little from the original source (apart from removing the commandline interface) but please note it has no test coverage at this time.
14
+ I've changed very little from the original source (apart from removing the commandline interface) but please note it has only some very basic end to end tests at this time.
15
15
 
16
16
 
17
17
  ## Usage
@@ -20,13 +20,19 @@ I've changed very little from the original source (apart from removing the comma
20
20
 
21
21
  require 'symspell'
22
22
 
23
- speller = SymSpell.new <EDIT_DISTANCE_MAX>
24
- speller.create_dictionary('words.txt')
25
- speller.lookup('something')
23
+ speller = SymSpell.new <EDIT_DISTANCE_MAX> <VERBOSE>
24
+ speller.create_dictionary %w(joe jo mark john peter mary andrew imogen)
25
+ speller.lookup 'jo'
26
26
 
27
- ## EDIT_DISTANCE_MAX
27
+ ### EDIT_DISTANCE_MAX
28
28
 
29
29
  `EDIT_DISTANCE_MAX` is the number of operations needed to tranform one string into another.
30
30
 
31
31
  For example the edit distance between **CA** and **ABC** is 2 because **CA** => **AC** => **ABC**. Edit distances of 2-5 are normal. Note, however, increasing EDIT_DISTANCE_MAX exponentially increases the combinations and therefore the time it takes to create the dictionary.
32
32
 
33
+ ### VERBOSE
34
+
35
+ * 0 - Return the top suggestion
36
+ * 1 - Return the suggestions with the lowest edit distance
37
+ * 2 - Return all suggestions
38
+
data/Rakefile CHANGED
@@ -1,9 +1,9 @@
1
1
  require 'rake/testtask'
2
2
 
3
3
  desc 'Test, build and install the gem'
4
- task :default => [:spec, :install]
4
+ task :default => [:test]
5
5
 
6
- Rake::TestTask.new(:spec) do |t|
6
+ Rake::TestTask.new(:test) do |t|
7
7
  t.pattern = 'tests/*_test.rb'
8
8
  end
9
9
 
@@ -4,8 +4,9 @@ require 'set'
4
4
  class SymSpell
5
5
  MAX_INT = 2**30 - 1
6
6
 
7
- def initialize(edit_distance_max)
7
+ def initialize(edit_distance_max, verbose)
8
8
  @edit_distance_max = edit_distance_max
9
+ @verbose = verbose
9
10
  @maxlength = 0
10
11
  @dictionary = {}
11
12
  @wordlist = []
@@ -14,7 +15,7 @@ class SymSpell
14
15
  def create_dictionary(corpus)
15
16
  word_count = 0
16
17
 
17
- File.open(corpus, 'r').each_line do |word|
18
+ corpus.each do |word|
18
19
  word_count += 1 if create_dictionary_entry(word.strip)
19
20
  end
20
21
  end
@@ -35,7 +36,7 @@ class SymSpell
35
36
  while (candidates.count > 0)
36
37
  candidate = candidates.shift
37
38
 
38
- return sort(suggestions) if ((suggestions.count > 0) && (input.size - candidate.size > suggestions[0].distance))
39
+ return sort(suggestions) if @verbose < 2 && suggestions.count > 0 && (input.size - candidate.size) > suggestions[0].distance
39
40
 
40
41
  if valueo = @dictionary[candidate]
41
42
  value = DictionaryItem.new
@@ -51,7 +52,7 @@ class SymSpell
51
52
  si.count = value.count
52
53
  si.distance = input.size - candidate.size
53
54
  suggestions << si
54
- return sort(suggestions) if input.size - candidate.size == 0
55
+ return sort(suggestions) if @verbose < 2 && input.size - candidate.size == 0
55
56
  end
56
57
 
57
58
  value2 = nil
@@ -85,12 +86,8 @@ class SymSpell
85
86
  end
86
87
  end
87
88
 
88
- if suggestions.count > 0 && suggestions[0].distance > distance
89
- suggestions.clear
90
- end
91
- if suggestions.count > 0 && distance > suggestions[0].distance
92
- next
93
- end
89
+ suggestions.clear if @verbose < 2 && suggestions.count > 0 && suggestions[0].distance > distance
90
+ next if @verbose < 2 && suggestions.count > 0 && distance > suggestions[0].distance
94
91
 
95
92
  if (distance <= @edit_distance_max)
96
93
  if value2 = @dictionary[suggestion]
@@ -200,9 +197,12 @@ class SymSpell
200
197
  end
201
198
 
202
199
  def add_lowest_distance(item, suggestion, suggestionint, delete)
203
- if item.suggestions.count > 0 && @wordlist[item.suggestions[0]].size - delete.size > suggestion.size - delete.size
200
+ if @verbose < 2 && item.suggestions.count > 0 && @wordlist[item.suggestions[0]].size - delete.size > suggestion.size - delete.size
204
201
  item.suggestions.clear
205
202
  end
203
+ if @verbose == 2 || item.suggestions.size == 0 || (@wordlist[item.suggestions[0]].size - delete.size >= suggestion.size - delete.size)
204
+ item.suggestions << suggestionint
205
+ end
206
206
  end
207
207
 
208
208
  def edits(word, edit_distance, deletes)
@@ -221,7 +221,13 @@ class SymSpell
221
221
  end
222
222
 
223
223
  def sort(suggestions)
224
- suggestions.sort! {|x, y| -x.count <=> y.count}
224
+ if @verbose < 2
225
+ suggestions.sort! {|x, y| -x.count <=> y.count}
226
+ else
227
+ suggestions.sort! {|x, y| (2 * x.distance <=> y.distance) - x.count <=> y.count}
228
+ end
229
+
230
+ @verbose == 0 ? suggestions[0..0] : suggestions
225
231
  end
226
232
 
227
233
  def damerau_levenshtein_distance(source, target)
@@ -2,7 +2,7 @@ require 'base64'
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'symspell'
5
- s.version = '0.0.1'
5
+ s.version = '0.0.2'
6
6
  s.authors = 'Phil Thompson'
7
7
  s.email = Base64.decode64("cGhpbEBlbGVjdHJpY3Zpc2lvbnMuY29t\n")
8
8
  s.summary = 'Ruby port of the symetric spell checking algorithm'
@@ -0,0 +1,39 @@
1
+ require 'minitest/autorun'
2
+ require_relative '../lib/symspell'
3
+
4
+ class SymSpellTest < Minitest::Test
5
+ def setup
6
+ @edit_distance_max = 2
7
+ @verbose = 0
8
+ end
9
+
10
+ def subject
11
+ @subject ||= SymSpell.new(@edit_distance_max, @verbose).tap do |subject|
12
+ words = %w(joe mark john peter mary andrew imogen)
13
+ subject.create_dictionary words
14
+ end
15
+ end
16
+ def test_lookup_correctly_spelled_word
17
+ assert_equal 'andrew', subject.lookup('andrew').first.term
18
+ end
19
+
20
+ def test_lookup_misspelt_word
21
+ assert_equal 'andrew', subject.lookup('andre').first.term
22
+ end
23
+
24
+ def test_lookup_fails_to_find_match
25
+ assert_equal nil, subject.lookup('amigon').first
26
+ end
27
+
28
+ def test_lookup_finds_match_after_turning_up_edit_distance
29
+ @edit_distance_max = 3
30
+ assert_equal ['imogen'], subject.lookup('amigon').map(&:term)
31
+ end
32
+
33
+ def test_lookup_returns_multiple_suggestions
34
+ @edit_distance_max = 2
35
+ @verbose = 2
36
+ assert_equal ['joe', 'john'], subject.lookup('jo').map(&:term)
37
+ end
38
+ end
39
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: symspell
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Phil Thompson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-31 00:00:00.000000000 Z
11
+ date: 2015-08-03 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: phil@electricvisions.com
@@ -20,6 +20,7 @@ files:
20
20
  - Rakefile
21
21
  - lib/symspell.rb
22
22
  - symspell.gemspec
23
+ - tests/symspell_test.rb
23
24
  homepage: https://github.com/PhilT/symspell
24
25
  licenses: []
25
26
  metadata: {}