yasc 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,48 @@
1
+ # require 'open-uri'
2
+ module Yasc
3
+ class SpellingCorrector
4
+ class << self
5
+ def words text
6
+ text.downcase.scan(/[a-z]+/)
7
+ end
8
+
9
+ def train features
10
+ model = Hash.new(1)
11
+ features.each {|f| model[f] += 1 }
12
+ return model
13
+ end
14
+
15
+ def edits1 word
16
+ n = word.length
17
+ deletion = (0...n).collect {|i| word[0...i]+word[i+1..-1] }
18
+ transposition = (0...n-1).collect {|i| word[0...i]+word[i+1,1]+word[i,1]+word[i+2..-1] }
19
+ alteration = []
20
+ n.times {|i| LETTERS.each_byte {|l| alteration << word[0...i]+l.chr+word[i+1..-1] } }
21
+ insertion = []
22
+ (n+1).times {|i| LETTERS.each_byte {|l| insertion << word[0...i]+l.chr+word[i..-1] } }
23
+ result = deletion + transposition + alteration + insertion
24
+ result.empty? ? nil : result
25
+ end
26
+
27
+ def known_edits2 word
28
+ result = []
29
+ edits1(word).each {|e1| edits1(e1).each {|e2| result << e2 if NWORDS.has_key?(e2) }}
30
+ result.empty? ? nil : result
31
+ end
32
+
33
+ def known words
34
+ result = words.find_all {|w| NWORDS.has_key?(w) }
35
+ result.empty? ? nil : result
36
+ end
37
+
38
+ def correct word
39
+ (known([word]) or known(edits1(word)) or known_edits2(word) or
40
+ [word]).max {|a,b| NWORDS[a] <=> NWORDS[b] }
41
+ end
42
+ end
43
+
44
+ # NWORDS = train(words(open('http://norvig.com/big.txt') {|f| f.read }))
45
+ NWORDS = train(words(File.new('big.txt').read))
46
+ LETTERS = ("a".."z").to_a.join
47
+ end
48
+ end
@@ -0,0 +1,3 @@
1
+ module Yasc
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,17 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/yasc/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Yu Zhang"]
6
+ gem.email = ["ian7zy@gmail.com"]
7
+ gem.description = %q{Yet another spelling checker}
8
+ gem.summary = %q{This gem implements the spelling checker following this tutorial http://norvig.com/spell-correct.html}
9
+ gem.homepage = "https://github.com/ianzhang/yasc"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "yasc"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = Yasc::VERSION
17
+ end
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yasc
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Yu Zhang
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-13 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: Yet another spelling checker
15
+ email:
16
+ - ian7zy@gmail.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - .DS_Store
22
+ - .gitignore
23
+ - Gemfile
24
+ - LICENSE
25
+ - README.md
26
+ - Rakefile
27
+ - lib/.DS_Store
28
+ - lib/yasc.rb
29
+ - lib/yasc/.DS_Store
30
+ - lib/yasc/big.txt
31
+ - lib/yasc/spelling_corrector.rb
32
+ - lib/yasc/version.rb
33
+ - yasc.gemspec
34
+ homepage: https://github.com/ianzhang/yasc
35
+ licenses: []
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 1.8.10
55
+ signing_key:
56
+ specification_version: 3
57
+ summary: This gem implements the spelling checker following this tutorial http://norvig.com/spell-correct.html
58
+ test_files: []