html_spellchecker 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. data/Gemfile +2 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +55 -0
  4. data/lib/html_spellchecker.rb +62 -0
  5. metadata +117 -0
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source :rubygems
2
+ gemspec
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Bruno Michel
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,55 @@
1
+ HTML Spellchecker
2
+ =================
3
+
4
+ Wants to spellcheck an HTML string properly? This gem is for you.
5
+ It's powered by [Nokogiri](http://nokogiri.org/) and
6
+ [hunspell-ffi](https://github.com/ahaller/hunspell-ffi)!
7
+
8
+
9
+ How to use it
10
+ -------------
11
+
12
+ It's very simple. Install it with rubygems:
13
+
14
+ gem install html_spellchecker
15
+
16
+ Or, if you use bundler, add it to your `Gemfile`:
17
+
18
+ gem "html_spellchecker", :version => "~>0.1"
19
+
20
+ Then you can use it in your code:
21
+
22
+ require "html_spellchecker"
23
+ HTML_Spellchecker.english.spellcheck("<p>This is xzqwy.</p>")
24
+ # => "<p>This is <mark class="misspelled">xzqwy</mark>.</p>"
25
+
26
+ The HTML_Spellchecker class can be initialized by giving 2 paths:
27
+ the affinity and dictionnary for hunspell. There are helpers to
28
+ create a new instance for english and french dictionnaries.
29
+
30
+ Then, you can use `spellcheck` method: you give it an HTML string
31
+ and it returns you with the same string with misspelled words
32
+ enclosed in `<mark>` tags (with the `misspelled` class).
33
+
34
+ HTML_Spellchecker can avoid to check the spelling of special tags
35
+ like `<code>`, by keeping a list of the tags to spellcheck in
36
+ `HTML_Spellchecker.spellcheckable_tags`.
37
+
38
+
39
+ Issues or Suggestions
40
+ ---------------------
41
+
42
+ Found an issue or have a suggestion? Please report it on
43
+ [Github's issue tracker](http://github.com/nono/HTML-Spellchecker/issues).
44
+
45
+ If you wants to make a pull request, please check the specs before:
46
+
47
+ rspec spec
48
+
49
+
50
+ Credits
51
+ -------
52
+
53
+ Thanks [Andreas Haller](https://github.com/ahaller) for the hunspell-ffi gem.
54
+
55
+ Copyright (c) 2011 Bruno Michel <bmichel@menfin.info>, released under the MIT license
@@ -0,0 +1,62 @@
1
+ # Encoding: UTF-8
2
+
3
+ require "hunspell-ffi"
4
+ require "nokogiri"
5
+ require "set"
6
+
7
+
8
+ class HTML_Spellchecker
9
+ def self.english
10
+ @english ||= self.new("/usr/share/hunspell/en_US.aff", "/usr/share/hunspell/en_US.dic")
11
+ end
12
+
13
+ def self.french
14
+ @french ||= self.new("/usr/share/hunspell/fr_FR.aff", "/usr/share/hunspell/fr_FR.dic")
15
+ end
16
+
17
+ def initialize(aff, dic)
18
+ @dict = Hunspell.new(aff, dic)
19
+ end
20
+
21
+ def spellcheck(html)
22
+ Nokogiri::HTML::DocumentFragment.parse(html).spellcheck(@dict)
23
+ end
24
+
25
+ class <<self
26
+ attr_accessor :spellcheckable_tags
27
+ end
28
+ self.spellcheckable_tags = Set.new(%w(p ol ul li div header article nav section footer aside dd dt dl
29
+ span blockquote cite q mark ins del table td th tr tbody thead tfoot
30
+ a b i s em small strong hgroup h1 h2 h3 h4 h5 h6))
31
+ end
32
+
33
+ class Nokogiri::HTML::DocumentFragment
34
+ def spellcheckable?
35
+ true
36
+ end
37
+ end
38
+
39
+ class Nokogiri::XML::Node
40
+ def spellcheck(dict)
41
+ if spellcheckable?
42
+ inner = children.map {|child| child.spellcheck(dict) }.join
43
+ children.remove
44
+ add_child Nokogiri::HTML::DocumentFragment.parse(inner)
45
+ end
46
+ to_html(:indent => 0)
47
+ end
48
+
49
+ def spellcheckable?
50
+ HTML_Spellchecker.spellcheckable_tags.include? name
51
+ end
52
+ end
53
+
54
+ class Nokogiri::XML::Text
55
+ WORDS_REGEXP = RUBY_VERSION =~ /^1\.8/ ? /\w+/ : /\p{Word}+/
56
+
57
+ def spellcheck(dict)
58
+ to_xhtml(:encoding => 'UTF-8').gsub(WORDS_REGEXP) do |word|
59
+ dict.check(word) ? word : "<mark class=\"misspelled\">#{word}</mark>"
60
+ end
61
+ end
62
+ end
metadata ADDED
@@ -0,0 +1,117 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: html_spellchecker
3
+ version: !ruby/object:Gem::Version
4
+ hash: 31
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 2
10
+ version: 0.1.2
11
+ platform: ruby
12
+ authors:
13
+ - Bruno Michel
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-04-03 00:00:00 +02:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: nokogiri
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ hash: 7
30
+ segments:
31
+ - 1
32
+ - 4
33
+ version: "1.4"
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: hunspell-ffi
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - "="
43
+ - !ruby/object:Gem::Version
44
+ hash: 592302929
45
+ segments:
46
+ - 0
47
+ - 1
48
+ - 3
49
+ - alpha
50
+ - 2
51
+ version: 0.1.3.alpha2
52
+ type: :runtime
53
+ version_requirements: *id002
54
+ - !ruby/object:Gem::Dependency
55
+ name: rspec
56
+ prerelease: false
57
+ requirement: &id003 !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ~>
61
+ - !ruby/object:Gem::Version
62
+ hash: 11
63
+ segments:
64
+ - 2
65
+ - 4
66
+ version: "2.4"
67
+ type: :development
68
+ version_requirements: *id003
69
+ description: Wants to spellcheck an HTML string properly? This gem is for you.
70
+ email: bmichel@menfin.info
71
+ executables: []
72
+
73
+ extensions: []
74
+
75
+ extra_rdoc_files:
76
+ - README.md
77
+ files:
78
+ - MIT-LICENSE
79
+ - README.md
80
+ - Gemfile
81
+ - lib/html_spellchecker.rb
82
+ has_rdoc: true
83
+ homepage: http://github.com/nono/HTML-Spellchecker
84
+ licenses: []
85
+
86
+ post_install_message:
87
+ rdoc_options: []
88
+
89
+ require_paths:
90
+ - lib
91
+ required_ruby_version: !ruby/object:Gem::Requirement
92
+ none: false
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ hash: 3
97
+ segments:
98
+ - 0
99
+ version: "0"
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ none: false
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ hash: 3
106
+ segments:
107
+ - 0
108
+ version: "0"
109
+ requirements: []
110
+
111
+ rubyforge_project:
112
+ rubygems_version: 1.5.2
113
+ signing_key:
114
+ specification_version: 3
115
+ summary: Wants to spellcheck an HTML string properly? This gem is for you.
116
+ test_files: []
117
+