symspell 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -5
- data/Rakefile +2 -2
- data/lib/symspell.rb +18 -12
- data/symspell.gemspec +1 -1
- data/tests/symspell_test.rb +39 -0
- metadata +3 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 4c6abf1253f5e16d82ae589bc6e35a6f5d72bc36
         | 
| 4 | 
            +
              data.tar.gz: 6c9344ec4d40597700099e119b423ccfb85d3582
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 2f71318a0ded808acf0f3eac41d8b569edd908ec0b81f126421fcfa7d206fce6969bc47e565fcdc324e1b4fb02f492e482e52e36b608559cf4364760a1847baf
         | 
| 7 | 
            +
              data.tar.gz: 62215df1ac0378e26fe131ed8c730ebf3ad235107858bebf37802704bb8f8dd8a88c0c15f9a15da02518e7034cdeaa34802c24d03b38e31d8dc834c86254c03f
         | 
    
        data/README.md
    CHANGED
    
    | @@ -11,7 +11,7 @@ This is a straight port of SymSpell from C# to Ruby. I've started moving things | |
| 11 11 |  | 
| 12 12 | 
             
            Original source with inline comments and README is here: https://github.com/wolfgarbe/symspell.
         | 
| 13 13 |  | 
| 14 | 
            -
            I've changed very little from the original source (apart from removing the commandline interface) but please note it has  | 
| 14 | 
            +
            I've changed very little from the original source (apart from removing the commandline interface) but please note it has only some very basic end to end tests at this time.
         | 
| 15 15 |  | 
| 16 16 |  | 
| 17 17 | 
             
            ## Usage
         | 
| @@ -20,13 +20,19 @@ I've changed very little from the original source (apart from removing the comma | |
| 20 20 |  | 
| 21 21 | 
             
                require 'symspell'
         | 
| 22 22 |  | 
| 23 | 
            -
                speller = SymSpell.new <EDIT_DISTANCE_MAX>
         | 
| 24 | 
            -
                speller.create_dictionary( | 
| 25 | 
            -
                speller.lookup | 
| 23 | 
            +
                speller = SymSpell.new <EDIT_DISTANCE_MAX> <VERBOSE>
         | 
| 24 | 
            +
                speller.create_dictionary %w(joe jo mark john peter mary andrew imogen)
         | 
| 25 | 
            +
                speller.lookup 'jo'
         | 
| 26 26 |  | 
| 27 | 
            -
             | 
| 27 | 
            +
            ### EDIT_DISTANCE_MAX
         | 
| 28 28 |  | 
| 29 29 | 
             
            `EDIT_DISTANCE_MAX` is the number of operations needed to tranform one string into another.
         | 
| 30 30 |  | 
| 31 31 | 
             
            For example the edit distance between **CA** and **ABC** is 2 because **CA** => **AC** => **ABC**. Edit distances of 2-5 are normal. Note, however, increasing EDIT_DISTANCE_MAX exponentially increases the combinations and therefore the time it takes to create the dictionary.
         | 
| 32 32 |  | 
| 33 | 
            +
            ### VERBOSE
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            * 0 - Return the top suggestion
         | 
| 36 | 
            +
            * 1 - Return the suggestions with the lowest edit distance
         | 
| 37 | 
            +
            * 2 - Return all suggestions
         | 
| 38 | 
            +
             | 
    
        data/Rakefile
    CHANGED
    
    
    
        data/lib/symspell.rb
    CHANGED
    
    | @@ -4,8 +4,9 @@ require 'set' | |
| 4 4 | 
             
            class SymSpell
         | 
| 5 5 | 
             
              MAX_INT = 2**30 - 1
         | 
| 6 6 |  | 
| 7 | 
            -
              def initialize(edit_distance_max)
         | 
| 7 | 
            +
              def initialize(edit_distance_max, verbose)
         | 
| 8 8 | 
             
                @edit_distance_max = edit_distance_max
         | 
| 9 | 
            +
                @verbose = verbose
         | 
| 9 10 | 
             
                @maxlength = 0
         | 
| 10 11 | 
             
                @dictionary = {}
         | 
| 11 12 | 
             
                @wordlist = []
         | 
| @@ -14,7 +15,7 @@ class SymSpell | |
| 14 15 | 
             
              def create_dictionary(corpus)
         | 
| 15 16 | 
             
                word_count = 0
         | 
| 16 17 |  | 
| 17 | 
            -
                 | 
| 18 | 
            +
                corpus.each do |word|
         | 
| 18 19 | 
             
                  word_count += 1 if create_dictionary_entry(word.strip)
         | 
| 19 20 | 
             
                end
         | 
| 20 21 | 
             
              end
         | 
| @@ -35,7 +36,7 @@ class SymSpell | |
| 35 36 | 
             
                while (candidates.count > 0)
         | 
| 36 37 | 
             
                  candidate = candidates.shift
         | 
| 37 38 |  | 
| 38 | 
            -
                  return sort(suggestions) if  | 
| 39 | 
            +
                  return sort(suggestions) if @verbose < 2 && suggestions.count > 0 && (input.size - candidate.size) > suggestions[0].distance
         | 
| 39 40 |  | 
| 40 41 | 
             
                  if valueo = @dictionary[candidate]
         | 
| 41 42 | 
             
                    value = DictionaryItem.new
         | 
| @@ -51,7 +52,7 @@ class SymSpell | |
| 51 52 | 
             
                      si.count = value.count
         | 
| 52 53 | 
             
                      si.distance = input.size - candidate.size
         | 
| 53 54 | 
             
                      suggestions << si
         | 
| 54 | 
            -
                      return sort(suggestions) if input.size - candidate.size == 0
         | 
| 55 | 
            +
                      return sort(suggestions) if @verbose < 2 && input.size - candidate.size == 0
         | 
| 55 56 | 
             
                    end
         | 
| 56 57 |  | 
| 57 58 | 
             
                    value2 = nil
         | 
| @@ -85,12 +86,8 @@ class SymSpell | |
| 85 86 | 
             
                          end
         | 
| 86 87 | 
             
                        end
         | 
| 87 88 |  | 
| 88 | 
            -
                        if suggestions.count > 0 && suggestions[0].distance > distance
         | 
| 89 | 
            -
             | 
| 90 | 
            -
                        end
         | 
| 91 | 
            -
                        if suggestions.count > 0 && distance > suggestions[0].distance
         | 
| 92 | 
            -
                          next
         | 
| 93 | 
            -
                        end
         | 
| 89 | 
            +
                        suggestions.clear if @verbose < 2  && suggestions.count > 0 && suggestions[0].distance > distance
         | 
| 90 | 
            +
                        next if @verbose < 2  && suggestions.count > 0 && distance > suggestions[0].distance
         | 
| 94 91 |  | 
| 95 92 | 
             
                        if (distance <= @edit_distance_max)
         | 
| 96 93 | 
             
                          if value2 = @dictionary[suggestion]
         | 
| @@ -200,9 +197,12 @@ class SymSpell | |
| 200 197 | 
             
              end
         | 
| 201 198 |  | 
| 202 199 | 
             
              def add_lowest_distance(item, suggestion, suggestionint, delete)
         | 
| 203 | 
            -
                if item.suggestions.count > 0 && @wordlist[item.suggestions[0]].size - delete.size > suggestion.size - delete.size
         | 
| 200 | 
            +
                if @verbose < 2 && item.suggestions.count > 0 && @wordlist[item.suggestions[0]].size - delete.size > suggestion.size - delete.size
         | 
| 204 201 | 
             
                  item.suggestions.clear
         | 
| 205 202 | 
             
                end
         | 
| 203 | 
            +
                if @verbose == 2 || item.suggestions.size == 0 || (@wordlist[item.suggestions[0]].size - delete.size >= suggestion.size - delete.size)
         | 
| 204 | 
            +
                  item.suggestions << suggestionint
         | 
| 205 | 
            +
                end
         | 
| 206 206 | 
             
              end
         | 
| 207 207 |  | 
| 208 208 | 
             
              def edits(word, edit_distance, deletes)
         | 
| @@ -221,7 +221,13 @@ class SymSpell | |
| 221 221 | 
             
              end
         | 
| 222 222 |  | 
| 223 223 | 
             
              def sort(suggestions)
         | 
| 224 | 
            -
                 | 
| 224 | 
            +
                if @verbose < 2
         | 
| 225 | 
            +
                  suggestions.sort! {|x, y| -x.count <=> y.count}
         | 
| 226 | 
            +
                else
         | 
| 227 | 
            +
                  suggestions.sort! {|x, y| (2 * x.distance <=> y.distance) - x.count <=> y.count}
         | 
| 228 | 
            +
                end
         | 
| 229 | 
            +
             | 
| 230 | 
            +
                @verbose == 0 ? suggestions[0..0] : suggestions
         | 
| 225 231 | 
             
              end
         | 
| 226 232 |  | 
| 227 233 | 
             
              def damerau_levenshtein_distance(source, target)
         | 
    
        data/symspell.gemspec
    CHANGED
    
    | @@ -2,7 +2,7 @@ require 'base64' | |
| 2 2 |  | 
| 3 3 | 
             
            Gem::Specification.new do |s|
         | 
| 4 4 | 
             
              s.name        = 'symspell'
         | 
| 5 | 
            -
              s.version     = '0.0. | 
| 5 | 
            +
              s.version     = '0.0.2'
         | 
| 6 6 | 
             
              s.authors     = 'Phil Thompson'
         | 
| 7 7 | 
             
              s.email       = Base64.decode64("cGhpbEBlbGVjdHJpY3Zpc2lvbnMuY29t\n")
         | 
| 8 8 | 
             
              s.summary     = 'Ruby port of the symetric spell checking algorithm'
         | 
| @@ -0,0 +1,39 @@ | |
| 1 | 
            +
            require 'minitest/autorun'
         | 
| 2 | 
            +
            require_relative '../lib/symspell'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            class SymSpellTest < Minitest::Test
         | 
| 5 | 
            +
              def setup
         | 
| 6 | 
            +
                @edit_distance_max = 2
         | 
| 7 | 
            +
                @verbose = 0
         | 
| 8 | 
            +
              end
         | 
| 9 | 
            +
             | 
| 10 | 
            +
              def subject
         | 
| 11 | 
            +
                @subject ||= SymSpell.new(@edit_distance_max, @verbose).tap do |subject|
         | 
| 12 | 
            +
                  words = %w(joe mark john peter mary andrew imogen)
         | 
| 13 | 
            +
                  subject.create_dictionary words
         | 
| 14 | 
            +
                end
         | 
| 15 | 
            +
              end
         | 
| 16 | 
            +
              def test_lookup_correctly_spelled_word
         | 
| 17 | 
            +
                assert_equal 'andrew', subject.lookup('andrew').first.term
         | 
| 18 | 
            +
              end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
              def test_lookup_misspelt_word
         | 
| 21 | 
            +
                assert_equal 'andrew', subject.lookup('andre').first.term
         | 
| 22 | 
            +
              end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
              def test_lookup_fails_to_find_match
         | 
| 25 | 
            +
                assert_equal nil, subject.lookup('amigon').first
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
              def test_lookup_finds_match_after_turning_up_edit_distance
         | 
| 29 | 
            +
                @edit_distance_max = 3
         | 
| 30 | 
            +
                assert_equal ['imogen'], subject.lookup('amigon').map(&:term)
         | 
| 31 | 
            +
              end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
              def test_lookup_returns_multiple_suggestions
         | 
| 34 | 
            +
                @edit_distance_max = 2
         | 
| 35 | 
            +
                @verbose = 2
         | 
| 36 | 
            +
                assert_equal ['joe', 'john'], subject.lookup('jo').map(&:term)
         | 
| 37 | 
            +
              end
         | 
| 38 | 
            +
            end
         | 
| 39 | 
            +
             | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: symspell
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.0. | 
| 4 | 
            +
              version: 0.0.2
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Phil Thompson
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2015- | 
| 11 | 
            +
            date: 2015-08-03 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies: []
         | 
| 13 13 | 
             
            description: 
         | 
| 14 14 | 
             
            email: phil@electricvisions.com
         | 
| @@ -20,6 +20,7 @@ files: | |
| 20 20 | 
             
            - Rakefile
         | 
| 21 21 | 
             
            - lib/symspell.rb
         | 
| 22 22 | 
             
            - symspell.gemspec
         | 
| 23 | 
            +
            - tests/symspell_test.rb
         | 
| 23 24 | 
             
            homepage: https://github.com/PhilT/symspell
         | 
| 24 25 | 
             
            licenses: []
         | 
| 25 26 | 
             
            metadata: {}
         |