yasc 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.DS_Store +0 -0
- data/.gitignore +19 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +31 -0
- data/Rakefile +2 -0
- data/lib/.DS_Store +0 -0
- data/lib/yasc.rb +8 -0
- data/lib/yasc/.DS_Store +0 -0
- data/lib/yasc/big.txt +128457 -0
- data/lib/yasc/spelling_corrector.rb +48 -0
- data/lib/yasc/version.rb +3 -0
- data/yasc.gemspec +17 -0
- metadata +58 -0
@@ -0,0 +1,48 @@
|
|
1
|
+
# require 'open-uri'
|
2
|
+
module Yasc
|
3
|
+
class SpellingCorrector
|
4
|
+
class << self
|
5
|
+
def words text
|
6
|
+
text.downcase.scan(/[a-z]+/)
|
7
|
+
end
|
8
|
+
|
9
|
+
def train features
|
10
|
+
model = Hash.new(1)
|
11
|
+
features.each {|f| model[f] += 1 }
|
12
|
+
return model
|
13
|
+
end
|
14
|
+
|
15
|
+
def edits1 word
|
16
|
+
n = word.length
|
17
|
+
deletion = (0...n).collect {|i| word[0...i]+word[i+1..-1] }
|
18
|
+
transposition = (0...n-1).collect {|i| word[0...i]+word[i+1,1]+word[i,1]+word[i+2..-1] }
|
19
|
+
alteration = []
|
20
|
+
n.times {|i| LETTERS.each_byte {|l| alteration << word[0...i]+l.chr+word[i+1..-1] } }
|
21
|
+
insertion = []
|
22
|
+
(n+1).times {|i| LETTERS.each_byte {|l| insertion << word[0...i]+l.chr+word[i..-1] } }
|
23
|
+
result = deletion + transposition + alteration + insertion
|
24
|
+
result.empty? ? nil : result
|
25
|
+
end
|
26
|
+
|
27
|
+
def known_edits2 word
|
28
|
+
result = []
|
29
|
+
edits1(word).each {|e1| edits1(e1).each {|e2| result << e2 if NWORDS.has_key?(e2) }}
|
30
|
+
result.empty? ? nil : result
|
31
|
+
end
|
32
|
+
|
33
|
+
def known words
|
34
|
+
result = words.find_all {|w| NWORDS.has_key?(w) }
|
35
|
+
result.empty? ? nil : result
|
36
|
+
end
|
37
|
+
|
38
|
+
def correct word
|
39
|
+
(known([word]) or known(edits1(word)) or known_edits2(word) or
|
40
|
+
[word]).max {|a,b| NWORDS[a] <=> NWORDS[b] }
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# NWORDS = train(words(open('http://norvig.com/big.txt') {|f| f.read }))
|
45
|
+
NWORDS = train(words(File.new('big.txt').read))
|
46
|
+
LETTERS = ("a".."z").to_a.join
|
47
|
+
end
|
48
|
+
end
|
data/lib/yasc/version.rb
ADDED
data/yasc.gemspec
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/yasc/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Yu Zhang"]
|
6
|
+
gem.email = ["ian7zy@gmail.com"]
|
7
|
+
gem.description = %q{Yet another spelling checker}
|
8
|
+
gem.summary = %q{This gem implements the spelling checker following this tutorial http://norvig.com/spell-correct.html}
|
9
|
+
gem.homepage = "https://github.com/ianzhang/yasc"
|
10
|
+
|
11
|
+
gem.files = `git ls-files`.split($\)
|
12
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
+
gem.name = "yasc"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = Yasc::VERSION
|
17
|
+
end
|
metadata
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: yasc
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Yu Zhang
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-08-13 00:00:00.000000000Z
|
13
|
+
dependencies: []
|
14
|
+
description: Yet another spelling checker
|
15
|
+
email:
|
16
|
+
- ian7zy@gmail.com
|
17
|
+
executables: []
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- .DS_Store
|
22
|
+
- .gitignore
|
23
|
+
- Gemfile
|
24
|
+
- LICENSE
|
25
|
+
- README.md
|
26
|
+
- Rakefile
|
27
|
+
- lib/.DS_Store
|
28
|
+
- lib/yasc.rb
|
29
|
+
- lib/yasc/.DS_Store
|
30
|
+
- lib/yasc/big.txt
|
31
|
+
- lib/yasc/spelling_corrector.rb
|
32
|
+
- lib/yasc/version.rb
|
33
|
+
- yasc.gemspec
|
34
|
+
homepage: https://github.com/ianzhang/yasc
|
35
|
+
licenses: []
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
require_paths:
|
39
|
+
- lib
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
48
|
+
requirements:
|
49
|
+
- - ! '>='
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
requirements: []
|
53
|
+
rubyforge_project:
|
54
|
+
rubygems_version: 1.8.10
|
55
|
+
signing_key:
|
56
|
+
specification_version: 3
|
57
|
+
summary: This gem implements the spelling checker following this tutorial http://norvig.com/spell-correct.html
|
58
|
+
test_files: []
|