spell_check 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,129 @@
1
+ require 'spell_check/version'
2
+ require 'spell_check/dictionary'
3
+
4
+ module SpellCheck
5
+
6
+ # Checks if input word exists in dictionary. This method will accept any case and will make corrections for any
7
+ # over-repeated strings.
8
+ # @param [Object] aWordToCheck
9
+ # @return [String]
10
+ def self.checkWord( aWordToCheck )
11
+ @dict = Dictionary.new
12
+ not_found = 'no correction found'
13
+
14
+ # Check for any non-alphabet characters. If any are found, cease word search
15
+ return not_found if has_non_alphabet_chars aWordToCheck
16
+
17
+ # Adjust word for case
18
+ word = adjust_case( aWordToCheck )
19
+
20
+ # Attempt to see if input word is found before doing regular expression search
21
+ corrected_word = @dict.find_word(word)
22
+ return corrected_word unless corrected_word.nil?
23
+
24
+ # Input word was not found, so try matching regular expression
25
+ corrected_word = correct_repetitions(word)
26
+ return corrected_word unless corrected_word.nil?
27
+
28
+ return not_found
29
+ end
30
+
31
+ private
32
+
33
+ # Returns true if any non-English alphabet characters exist in input object, false otherwise. This is also the
34
+ # primary check to ensure that the object will convert to a String
35
+ # @param [Object] word
36
+ # @return [Boolean]
37
+ def self.has_non_alphabet_chars( word )
38
+ return word.to_s.match(/[^A-Za-z]/) rescue true
39
+ end
40
+
41
+ # Takes an input object, converts it to a string, then returns it with all but the first character down cased. The
42
+ # first character case is retained for comparing between words like god and God
43
+ # @param [String] word
44
+ # @return [String]
45
+ def self.adjust_case( word )
46
+ return word[0] + word[1..word.length].to_s.downcase if word.length > 1
47
+ word # string contains one or fewer characters
48
+ end
49
+
50
+ # This is the main logic block for correcting repetition spelling errors. This method removes all consecutive
51
+ # repetition of characters in a word, builds a regular expression that allows repetitions of each character,
52
+ # scans the dictionary for any matches to this regular expression, then finds and returns the closest match.
53
+ # @param [String] word
54
+ # @return [String]
55
+ def self.correct_repetitions( word )
56
+ # Remove all consecutive repetitions of characters in the word
57
+ squeeze_str = word.downcase.squeeze
58
+
59
+ # Create a set of potential matches using regular expression
60
+ reg_ex_matches = find_reg_ex_matches squeeze_str
61
+
62
+ # Compare matches against original input and return closest match
63
+ return get_best_match word, reg_ex_matches unless reg_ex_matches.empty?
64
+
65
+ nil # no corrections found
66
+ end
67
+
68
+ # Builds a RegExp object and then matches it against the dictionary values
69
+ # @param [String] word
70
+ # @return [Array]
71
+ def self.find_reg_ex_matches( word )
72
+ reg_ex = build_reg_ex word
73
+ @dict.find_reg_ex_matches reg_ex
74
+ end
75
+
76
+ # Creates a regular expression object that allows any number of consecutive repetitions of each character in the
77
+ # input string.
78
+ # @param [String] word
79
+ # @return [RegExp]
80
+ def self.build_reg_ex( word )
81
+ exp_str = '^' # disallow preceding characters
82
+ word.chars.each do |char|
83
+ exp_str += '[' + char + ']' + '+' # this allows 1 or more instances of this char
84
+ end
85
+ exp_str += '$' # disallow trailing characters
86
+ Regexp.new exp_str
87
+ end
88
+
89
+ # Given a set of words that are possible corrections for the input word, this method compares the corrected words
90
+ # to the input and returns to closest matching correction using Levenshtein distance.
91
+ # @param [String] word
92
+ # @param [Array] matches
93
+ def self.get_best_match( word, matches )
94
+ lev_array = matches.to_a
95
+ lev_array.sort! { |x,y| levenshtein_distance(x,word) <=> levenshtein_distance(y,word)}
96
+ lev_array.first # lowest number of changes means closest match to original input
97
+ end
98
+
99
+ # The Levenshtein Distance algorithm measures the number of changes required to match two strings. For this reason
100
+ # it is a good method to compare the similarity of strings. Please note that this code was sourced at
101
+ # http://stackoverflow.com/questions/16323571/measure-the-distance-between-two-strings-with-ruby
102
+ # @param [String] s
103
+ # @param [String] t
104
+ # @return [Integer]
105
+ def self.levenshtein_distance(s, t)
106
+ m = s.length
107
+ n = t.length
108
+ return m if n == 0
109
+ return n if m == 0
110
+ d = Array.new(m+1) {Array.new(n+1)}
111
+
112
+ (0..m).each {|i| d[i][0] = i}
113
+ (0..n).each {|j| d[0][j] = j}
114
+ (1..n).each do |j|
115
+ (1..m).each do |i|
116
+ d[i][j] = if s[i-1] == t[j-1] # adjust index into string
117
+ d[i-1][j-1] # no operation required
118
+ else
119
+ [ d[i-1][j]+1, # deletion
120
+ d[i][j-1]+1, # insertion
121
+ d[i-1][j-1]+1, # substitution
122
+ ].min
123
+ end
124
+ end
125
+ end
126
+ d[m][n]
127
+ end
128
+
129
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: spell_check
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Solberg, Garrick L
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: ! "Checks if a word exists in text dictionary. If it does not, this
42
+ gem will attempt to correct\n the input by changing repeated
43
+ characters and case."
44
+ email:
45
+ - Garrick.Solberg@gmail.com
46
+ executables: []
47
+ extensions: []
48
+ extra_rdoc_files: []
49
+ files:
50
+ - lib/spell_check.rb
51
+ - lib/spell_check/dictionary.rb
52
+ - lib/spell_check/version.rb
53
+ - lib/spell_check/words.txt
54
+ homepage: http://www.linkedin.com/in/garricksolberg/
55
+ licenses:
56
+ - MIT
57
+ metadata: {}
58
+ post_install_message:
59
+ rdoc_options: []
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ! '>='
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements: []
73
+ rubyforge_project:
74
+ rubygems_version: 2.2.2
75
+ signing_key:
76
+ specification_version: 4
77
+ summary: Checks to see if a word is correctly spelled
78
+ test_files: []