yasc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,48 @@
1
+ # require 'open-uri'
2
+ module Yasc
3
+ class SpellingCorrector
4
+ class << self
5
+ def words text
6
+ text.downcase.scan(/[a-z]+/)
7
+ end
8
+
9
+ def train features
10
+ model = Hash.new(1)
11
+ features.each {|f| model[f] += 1 }
12
+ return model
13
+ end
14
+
15
+ def edits1 word
16
+ n = word.length
17
+ deletion = (0...n).collect {|i| word[0...i]+word[i+1..-1] }
18
+ transposition = (0...n-1).collect {|i| word[0...i]+word[i+1,1]+word[i,1]+word[i+2..-1] }
19
+ alteration = []
20
+ n.times {|i| LETTERS.each_byte {|l| alteration << word[0...i]+l.chr+word[i+1..-1] } }
21
+ insertion = []
22
+ (n+1).times {|i| LETTERS.each_byte {|l| insertion << word[0...i]+l.chr+word[i..-1] } }
23
+ result = deletion + transposition + alteration + insertion
24
+ result.empty? ? nil : result
25
+ end
26
+
27
+ def known_edits2 word
28
+ result = []
29
+ edits1(word).each {|e1| edits1(e1).each {|e2| result << e2 if NWORDS.has_key?(e2) }}
30
+ result.empty? ? nil : result
31
+ end
32
+
33
+ def known words
34
+ result = words.find_all {|w| NWORDS.has_key?(w) }
35
+ result.empty? ? nil : result
36
+ end
37
+
38
+ def correct word
39
+ (known([word]) or known(edits1(word)) or known_edits2(word) or
40
+ [word]).max {|a,b| NWORDS[a] <=> NWORDS[b] }
41
+ end
42
+ end
43
+
44
+ # NWORDS = train(words(open('http://norvig.com/big.txt') {|f| f.read }))
45
+ NWORDS = train(words(File.new('big.txt').read))
46
+ LETTERS = ("a".."z").to_a.join
47
+ end
48
+ end
@@ -0,0 +1,3 @@
1
+ module Yasc
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,17 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/yasc/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Yu Zhang"]
6
+ gem.email = ["ian7zy@gmail.com"]
7
+ gem.description = %q{Yet another spelling checker}
8
+ gem.summary = %q{This gem implements the spelling checker following this tutorial http://norvig.com/spell-correct.html}
9
+ gem.homepage = "https://github.com/ianzhang/yasc"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "yasc"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = Yasc::VERSION
17
+ end
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yasc
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Yu Zhang
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-13 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: Yet another spelling checker
15
+ email:
16
+ - ian7zy@gmail.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - .DS_Store
22
+ - .gitignore
23
+ - Gemfile
24
+ - LICENSE
25
+ - README.md
26
+ - Rakefile
27
+ - lib/.DS_Store
28
+ - lib/yasc.rb
29
+ - lib/yasc/.DS_Store
30
+ - lib/yasc/big.txt
31
+ - lib/yasc/spelling_corrector.rb
32
+ - lib/yasc/version.rb
33
+ - yasc.gemspec
34
+ homepage: https://github.com/ianzhang/yasc
35
+ licenses: []
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 1.8.10
55
+ signing_key:
56
+ specification_version: 3
57
+ summary: This gem implements the spelling checker following this tutorial http://norvig.com/spell-correct.html
58
+ test_files: []