html_spellchecker 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -0
- data/MIT-LICENSE +20 -0
- data/README.md +55 -0
- data/lib/html_spellchecker.rb +62 -0
- metadata +117 -0
data/Gemfile
ADDED
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 Bruno Michel
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
HTML Spellchecker
|
2
|
+
=================
|
3
|
+
|
4
|
+
Wants to spellcheck an HTML string properly? This gem is for you.
|
5
|
+
It's powered by [Nokogiri](http://nokogiri.org/) and
|
6
|
+
[hunspell-ffi](https://github.com/ahaller/hunspell-ffi)!
|
7
|
+
|
8
|
+
|
9
|
+
How to use it
|
10
|
+
-------------
|
11
|
+
|
12
|
+
It's very simple. Install it with rubygems:
|
13
|
+
|
14
|
+
gem install html_spellchecker
|
15
|
+
|
16
|
+
Or, if you use bundler, add it to your `Gemfile`:
|
17
|
+
|
18
|
+
gem "html_spellchecker", :version => "~>0.1"
|
19
|
+
|
20
|
+
Then you can use it in your code:
|
21
|
+
|
22
|
+
require "html_spellchecker"
|
23
|
+
HTML_Spellchecker.english.spellcheck("<p>This is xzqwy.</p>")
|
24
|
+
# => "<p>This is <mark class="misspelled">xzqwy</mark>.</p>"
|
25
|
+
|
26
|
+
The HTML_Spellchecker class can be initialized by giving 2 paths:
|
27
|
+
the affinity and dictionnary for hunspell. There are helpers to
|
28
|
+
create a new instance for english and french dictionnaries.
|
29
|
+
|
30
|
+
Then, you can use `spellcheck` method: you give it an HTML string
|
31
|
+
and it returns you with the same string with misspelled words
|
32
|
+
enclosed in `<mark>` tags (with the `misspelled` class).
|
33
|
+
|
34
|
+
HTML_Spellchecker can avoid to check the spelling of special tags
|
35
|
+
like `<code>`, by keeping a list of the tags to spellcheck in
|
36
|
+
`HTML_Spellchecker.spellcheckable_tags`.
|
37
|
+
|
38
|
+
|
39
|
+
Issues or Suggestions
|
40
|
+
---------------------
|
41
|
+
|
42
|
+
Found an issue or have a suggestion? Please report it on
|
43
|
+
[Github's issue tracker](http://github.com/nono/HTML-Spellchecker/issues).
|
44
|
+
|
45
|
+
If you wants to make a pull request, please check the specs before:
|
46
|
+
|
47
|
+
rspec spec
|
48
|
+
|
49
|
+
|
50
|
+
Credits
|
51
|
+
-------
|
52
|
+
|
53
|
+
Thanks [Andreas Haller](https://github.com/ahaller) for the hunspell-ffi gem.
|
54
|
+
|
55
|
+
Copyright (c) 2011 Bruno Michel <bmichel@menfin.info>, released under the MIT license
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# Encoding: UTF-8
|
2
|
+
|
3
|
+
require "hunspell-ffi"
|
4
|
+
require "nokogiri"
|
5
|
+
require "set"
|
6
|
+
|
7
|
+
|
8
|
+
class HTML_Spellchecker
|
9
|
+
def self.english
|
10
|
+
@english ||= self.new("/usr/share/hunspell/en_US.aff", "/usr/share/hunspell/en_US.dic")
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.french
|
14
|
+
@french ||= self.new("/usr/share/hunspell/fr_FR.aff", "/usr/share/hunspell/fr_FR.dic")
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(aff, dic)
|
18
|
+
@dict = Hunspell.new(aff, dic)
|
19
|
+
end
|
20
|
+
|
21
|
+
def spellcheck(html)
|
22
|
+
Nokogiri::HTML::DocumentFragment.parse(html).spellcheck(@dict)
|
23
|
+
end
|
24
|
+
|
25
|
+
class <<self
|
26
|
+
attr_accessor :spellcheckable_tags
|
27
|
+
end
|
28
|
+
self.spellcheckable_tags = Set.new(%w(p ol ul li div header article nav section footer aside dd dt dl
|
29
|
+
span blockquote cite q mark ins del table td th tr tbody thead tfoot
|
30
|
+
a b i s em small strong hgroup h1 h2 h3 h4 h5 h6))
|
31
|
+
end
|
32
|
+
|
33
|
+
class Nokogiri::HTML::DocumentFragment
|
34
|
+
def spellcheckable?
|
35
|
+
true
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
class Nokogiri::XML::Node
|
40
|
+
def spellcheck(dict)
|
41
|
+
if spellcheckable?
|
42
|
+
inner = children.map {|child| child.spellcheck(dict) }.join
|
43
|
+
children.remove
|
44
|
+
add_child Nokogiri::HTML::DocumentFragment.parse(inner)
|
45
|
+
end
|
46
|
+
to_html(:indent => 0)
|
47
|
+
end
|
48
|
+
|
49
|
+
def spellcheckable?
|
50
|
+
HTML_Spellchecker.spellcheckable_tags.include? name
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class Nokogiri::XML::Text
|
55
|
+
WORDS_REGEXP = RUBY_VERSION =~ /^1\.8/ ? /\w+/ : /\p{Word}+/
|
56
|
+
|
57
|
+
def spellcheck(dict)
|
58
|
+
to_xhtml(:encoding => 'UTF-8').gsub(WORDS_REGEXP) do |word|
|
59
|
+
dict.check(word) ? word : "<mark class=\"misspelled\">#{word}</mark>"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
metadata
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: html_spellchecker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 31
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 2
|
10
|
+
version: 0.1.2
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Bruno Michel
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-04-03 00:00:00 +02:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: nokogiri
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 7
|
30
|
+
segments:
|
31
|
+
- 1
|
32
|
+
- 4
|
33
|
+
version: "1.4"
|
34
|
+
type: :runtime
|
35
|
+
version_requirements: *id001
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: hunspell-ffi
|
38
|
+
prerelease: false
|
39
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - "="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
hash: 592302929
|
45
|
+
segments:
|
46
|
+
- 0
|
47
|
+
- 1
|
48
|
+
- 3
|
49
|
+
- alpha
|
50
|
+
- 2
|
51
|
+
version: 0.1.3.alpha2
|
52
|
+
type: :runtime
|
53
|
+
version_requirements: *id002
|
54
|
+
- !ruby/object:Gem::Dependency
|
55
|
+
name: rspec
|
56
|
+
prerelease: false
|
57
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ~>
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
hash: 11
|
63
|
+
segments:
|
64
|
+
- 2
|
65
|
+
- 4
|
66
|
+
version: "2.4"
|
67
|
+
type: :development
|
68
|
+
version_requirements: *id003
|
69
|
+
description: Wants to spellcheck an HTML string properly? This gem is for you.
|
70
|
+
email: bmichel@menfin.info
|
71
|
+
executables: []
|
72
|
+
|
73
|
+
extensions: []
|
74
|
+
|
75
|
+
extra_rdoc_files:
|
76
|
+
- README.md
|
77
|
+
files:
|
78
|
+
- MIT-LICENSE
|
79
|
+
- README.md
|
80
|
+
- Gemfile
|
81
|
+
- lib/html_spellchecker.rb
|
82
|
+
has_rdoc: true
|
83
|
+
homepage: http://github.com/nono/HTML-Spellchecker
|
84
|
+
licenses: []
|
85
|
+
|
86
|
+
post_install_message:
|
87
|
+
rdoc_options: []
|
88
|
+
|
89
|
+
require_paths:
|
90
|
+
- lib
|
91
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
92
|
+
none: false
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
hash: 3
|
97
|
+
segments:
|
98
|
+
- 0
|
99
|
+
version: "0"
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
|
+
none: false
|
102
|
+
requirements:
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
hash: 3
|
106
|
+
segments:
|
107
|
+
- 0
|
108
|
+
version: "0"
|
109
|
+
requirements: []
|
110
|
+
|
111
|
+
rubyforge_project:
|
112
|
+
rubygems_version: 1.5.2
|
113
|
+
signing_key:
|
114
|
+
specification_version: 3
|
115
|
+
summary: Wants to spellcheck an HTML string properly? This gem is for you.
|
116
|
+
test_files: []
|
117
|
+
|