html_spellchecker 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. data/Gemfile +2 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +55 -0
  4. data/lib/html_spellchecker.rb +62 -0
  5. metadata +117 -0
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source :rubygems
2
+ gemspec
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Bruno Michel
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,55 @@
1
+ HTML Spellchecker
2
+ =================
3
+
4
+ Wants to spellcheck an HTML string properly? This gem is for you.
5
+ It's powered by [Nokogiri](http://nokogiri.org/) and
6
+ [hunspell-ffi](https://github.com/ahaller/hunspell-ffi)!
7
+
8
+
9
+ How to use it
10
+ -------------
11
+
12
+ It's very simple. Install it with rubygems:
13
+
14
+ gem install html_spellchecker
15
+
16
+ Or, if you use bundler, add it to your `Gemfile`:
17
+
18
+ gem "html_spellchecker", :version => "~>0.1"
19
+
20
+ Then you can use it in your code:
21
+
22
+ require "html_spellchecker"
23
+ HTML_Spellchecker.english.spellcheck("<p>This is xzqwy.</p>")
24
+ # => "<p>This is <mark class="misspelled">xzqwy</mark>.</p>"
25
+
26
+ The HTML_Spellchecker class can be initialized by giving 2 paths:
27
+ the affinity and dictionnary for hunspell. There are helpers to
28
+ create a new instance for english and french dictionnaries.
29
+
30
+ Then, you can use `spellcheck` method: you give it an HTML string
31
+ and it returns you with the same string with misspelled words
32
+ enclosed in `<mark>` tags (with the `misspelled` class).
33
+
34
+ HTML_Spellchecker can avoid to check the spelling of special tags
35
+ like `<code>`, by keeping a list of the tags to spellcheck in
36
+ `HTML_Spellchecker.spellcheckable_tags`.
37
+
38
+
39
+ Issues or Suggestions
40
+ ---------------------
41
+
42
+ Found an issue or have a suggestion? Please report it on
43
+ [Github's issue tracker](http://github.com/nono/HTML-Spellchecker/issues).
44
+
45
+ If you wants to make a pull request, please check the specs before:
46
+
47
+ rspec spec
48
+
49
+
50
+ Credits
51
+ -------
52
+
53
+ Thanks [Andreas Haller](https://github.com/ahaller) for the hunspell-ffi gem.
54
+
55
+ Copyright (c) 2011 Bruno Michel <bmichel@menfin.info>, released under the MIT license
@@ -0,0 +1,62 @@
1
+ # Encoding: UTF-8
2
+
3
+ require "hunspell-ffi"
4
+ require "nokogiri"
5
+ require "set"
6
+
7
+
8
+ class HTML_Spellchecker
9
+ def self.english
10
+ @english ||= self.new("/usr/share/hunspell/en_US.aff", "/usr/share/hunspell/en_US.dic")
11
+ end
12
+
13
+ def self.french
14
+ @french ||= self.new("/usr/share/hunspell/fr_FR.aff", "/usr/share/hunspell/fr_FR.dic")
15
+ end
16
+
17
+ def initialize(aff, dic)
18
+ @dict = Hunspell.new(aff, dic)
19
+ end
20
+
21
+ def spellcheck(html)
22
+ Nokogiri::HTML::DocumentFragment.parse(html).spellcheck(@dict)
23
+ end
24
+
25
+ class <<self
26
+ attr_accessor :spellcheckable_tags
27
+ end
28
+ self.spellcheckable_tags = Set.new(%w(p ol ul li div header article nav section footer aside dd dt dl
29
+ span blockquote cite q mark ins del table td th tr tbody thead tfoot
30
+ a b i s em small strong hgroup h1 h2 h3 h4 h5 h6))
31
+ end
32
+
33
+ class Nokogiri::HTML::DocumentFragment
34
+ def spellcheckable?
35
+ true
36
+ end
37
+ end
38
+
39
+ class Nokogiri::XML::Node
40
+ def spellcheck(dict)
41
+ if spellcheckable?
42
+ inner = children.map {|child| child.spellcheck(dict) }.join
43
+ children.remove
44
+ add_child Nokogiri::HTML::DocumentFragment.parse(inner)
45
+ end
46
+ to_html(:indent => 0)
47
+ end
48
+
49
+ def spellcheckable?
50
+ HTML_Spellchecker.spellcheckable_tags.include? name
51
+ end
52
+ end
53
+
54
+ class Nokogiri::XML::Text
55
+ WORDS_REGEXP = RUBY_VERSION =~ /^1\.8/ ? /\w+/ : /\p{Word}+/
56
+
57
+ def spellcheck(dict)
58
+ to_xhtml(:encoding => 'UTF-8').gsub(WORDS_REGEXP) do |word|
59
+ dict.check(word) ? word : "<mark class=\"misspelled\">#{word}</mark>"
60
+ end
61
+ end
62
+ end
metadata ADDED
@@ -0,0 +1,117 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: html_spellchecker
3
+ version: !ruby/object:Gem::Version
4
+ hash: 31
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 2
10
+ version: 0.1.2
11
+ platform: ruby
12
+ authors:
13
+ - Bruno Michel
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-04-03 00:00:00 +02:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: nokogiri
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ hash: 7
30
+ segments:
31
+ - 1
32
+ - 4
33
+ version: "1.4"
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: hunspell-ffi
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - "="
43
+ - !ruby/object:Gem::Version
44
+ hash: 592302929
45
+ segments:
46
+ - 0
47
+ - 1
48
+ - 3
49
+ - alpha
50
+ - 2
51
+ version: 0.1.3.alpha2
52
+ type: :runtime
53
+ version_requirements: *id002
54
+ - !ruby/object:Gem::Dependency
55
+ name: rspec
56
+ prerelease: false
57
+ requirement: &id003 !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ~>
61
+ - !ruby/object:Gem::Version
62
+ hash: 11
63
+ segments:
64
+ - 2
65
+ - 4
66
+ version: "2.4"
67
+ type: :development
68
+ version_requirements: *id003
69
+ description: Wants to spellcheck an HTML string properly? This gem is for you.
70
+ email: bmichel@menfin.info
71
+ executables: []
72
+
73
+ extensions: []
74
+
75
+ extra_rdoc_files:
76
+ - README.md
77
+ files:
78
+ - MIT-LICENSE
79
+ - README.md
80
+ - Gemfile
81
+ - lib/html_spellchecker.rb
82
+ has_rdoc: true
83
+ homepage: http://github.com/nono/HTML-Spellchecker
84
+ licenses: []
85
+
86
+ post_install_message:
87
+ rdoc_options: []
88
+
89
+ require_paths:
90
+ - lib
91
+ required_ruby_version: !ruby/object:Gem::Requirement
92
+ none: false
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ hash: 3
97
+ segments:
98
+ - 0
99
+ version: "0"
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ none: false
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ hash: 3
106
+ segments:
107
+ - 0
108
+ version: "0"
109
+ requirements: []
110
+
111
+ rubyforge_project:
112
+ rubygems_version: 1.5.2
113
+ signing_key:
114
+ specification_version: 3
115
+ summary: Wants to spellcheck an HTML string properly? This gem is for you.
116
+ test_files: []
117
+