text_cleaner 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in text_cleaner.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Jason Amster
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # TextCleaner
2
+
3
+ Simple gem does one thing, and one thing only... Turns funky chars such as [FILL IN HERE] to the proper HTML Encodings such as [FILL IN HERE].
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'text_cleaner'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install text_cleaner
18
+
19
+ ## Usage
20
+
21
+ TextCleaner.clean(input_text)
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
@@ -0,0 +1,268 @@
1
+ require "text_cleaner/version"
2
+
3
+ module TextCleaner
4
+ DICTIONARY = <<-EOS
5
+ quotation mark &quot; &#34; &#x22; " " "
6
+ ampersand &amp; &#38; &#x26; & & &
7
+ less-than sign &lt; &#60; &#x3C; < < <
8
+ greater-than sign &gt; &#62; &#x3E; > > >
9
+ Latin capital ligature OE &OElig; &#338; &#x152; Œ Œ Œ
10
+ Latin small ligature oe &oelig; &#339; &#x153; œ œ œ
11
+ Latin capital letter S with caron &Scaron; &#352; &#x160; Š Š Š
12
+ Latin small letter s with caron &scaron; &#353; &#x161; š š š
13
+ Latin capital letter Y with diaeresis &Yuml; &#376; &#x178; Ÿ Ÿ Ÿ
14
+ modifier letter circumflex accent &circ; &#710; &#x2C6; ˆ ˆ ˆ
15
+ small tilde &tilde; &#732; &#x2DC; ˜ ˜ ˜
16
+ en space &ensp; &#8194; &#x2002;      
17
+ em space &emsp; &#8195; &#x2003;      
18
+ thin space &thinsp; &#8201; &#x2009;      
19
+ zero width non-joiner &zwnj; &#8204; &#x200C; ‌ ‌ ‌
20
+ zero width joiner &zwj; &#8205; &#x200D; ‍ ‍ ‍
21
+ left-to-right mark &lrm; &#8206; &#x200E; ‎ ‎ ‎
22
+ right-to-left mark &rlm; &#8207; &#x200F; ‏ ‏ ‏
23
+ en dash &ndash; &#8211; &#x2013; – – –
24
+ em dash &mdash; &#8212; &#x2014; — — —
25
+ amster right single quotation mark &rsquo; &#8217; &#x2019; ’ ’ ’
26
+ left single quotation mark &lsquo; &#8216; &#x2018; ‘ ‘ ‘
27
+ right single quotation mark &rsquo; &#8217; &#x2019; ’ ’ ’
28
+ single low-9 quotation mark &sbquo; &#8218; &#x201A; ‚ ‚ ‚
29
+ left double quotation mark &ldquo; &#8220; &#x201C; “ “ “
30
+ right double quotation mark &rdquo; &#8221; &#x201D; ” ” ”
31
+ double low-9 quotation mark &bdquo; &#8222; &#x201E; „ „ „
32
+ dagger &dagger; &#8224; &#x2020; † † †
33
+ double dagger &Dagger; &#8225; &#x2021; ‡ ‡ ‡
34
+ per mille sign &permil; &#8240; &#x2030; ‰ ‰ ‰
35
+ single left-pointing angle quotation mark &lsaquo; &#8249; &#x2039; ‹ ‹ ‹
36
+ single right-pointing angle quotation mark &rsaquo; &#8250; &#x203A; › › ›
37
+ euro sign &euro; &#8364; &#x20AC; € € €
38
+ Latin small f with hook = function = florin &fnof; &#402; &#x192; ƒ ƒ ƒ
39
+ Greek capital letter alpha &Alpha; &#913; &#x391; Α Α Α
40
+ Greek capital letter beta &Beta; &#914; &#x392; Β Β Β
41
+ Greek capital letter gamma &Gamma; &#915; &#x393; Γ Γ Γ
42
+ Greek capital letter delta &Delta; &#916; &#x394; Δ Δ Δ
43
+ Greek capital letter epsilon &Epsilon; &#917; &#x395; Ε Ε Ε
44
+ Greek capital letter zeta &Zeta; &#918; &#x396; Ζ Ζ Ζ
45
+ Greek capital letter eta &Eta; &#919; &#x397; Η Η Η
46
+ Greek capital letter theta &Theta; &#920; &#x398; Θ Θ Θ
47
+ Greek capital letter iota &Iota; &#921; &#x399; Ι Ι Ι
48
+ Greek capital letter kappa &Kappa; &#922; &#x39A; Κ Κ Κ
49
+ Greek capital letter lambda &Lambda; &#923; &#x39B; Λ Λ Λ
50
+ Greek capital letter mu &Mu; &#924; &#x39C; Μ Μ Μ
51
+ Greek capital letter nu &Nu; &#925; &#x39D; Ν Ν Ν
52
+ Greek capital letter xi &Xi; &#926; &#x39E; Ξ Ξ Ξ
53
+ Greek capital letter omicron &Omicron; &#927; &#x39F; Ο Ο Ο
54
+ Greek capital letter pi &Pi; &#928; &#x3A0; Π Π Π
55
+ Greek capital letter rho &Rho; &#929; &#x3A1; Ρ Ρ Ρ
56
+ Greek capital letter sigma &Sigma; &#931; &#x3A3; Σ Σ Σ
57
+ Greek capital letter tau &Tau; &#932; &#x3A4; Τ Τ Τ
58
+ Greek capital letter upsilon &Upsilon; &#933; &#x3A5; Υ Υ Υ
59
+ Greek capital letter phi &Phi; &#934; &#x3A6; Φ Φ Φ
60
+ Greek capital letter chi &Chi; &#935; &#x3A7; Χ Χ Χ
61
+ Greek capital letter psi &Psi; &#936; &#x3A8; Ψ Ψ Ψ
62
+ Greek capital letter omega &Omega; &#937; &#x3A9; Ω Ω Ω
63
+ Greek small letter alpha &alpha; &#945; &#x3B1; α α α
64
+ Greek small letter beta &beta; &#946; &#x3B2; β β β
65
+ Greek small letter gamma &gamma; &#947; &#x3B3; γ γ γ
66
+ Greek small letter delta &delta; &#948; &#x3B4; δ δ δ
67
+ Greek small letter epsilon &epsilon; &#949; &#x3B5; ε ε ε
68
+ Greek small letter zeta &zeta; &#950; &#x3B6; ζ ζ ζ
69
+ Greek small letter eta &eta; &#951; &#x3B7; η η η
70
+ Greek small letter theta &theta; &#952; &#x3B8; θ θ θ
71
+ Greek small letter iota &iota; &#953; &#x3B9; ι ι ι
72
+ Greek small letter kappa &kappa; &#954; &#x3BA; κ κ κ
73
+ Greek small letter lambda &lambda; &#955; &#x3BB; λ λ λ
74
+ Greek small letter mu &mu; &#956; &#x3BC; μ μ μ
75
+ Greek small letter nu &nu; &#957; &#x3BD; ν ν ν
76
+ Greek small letter xi &xi; &#958; &#x3BE; ξ ξ ξ
77
+ Greek small letter omicron &omicron; &#959; &#x3BF; ο ο ο
78
+ Greek small letter pi &pi; &#960; &#x3C0; π π π
79
+ Greek small letter rho &rho; &#961; &#x3C1; ρ ρ ρ
80
+ Greek small letter final sigma &sigmaf; &#962; &#x3C2; ς ς ς
81
+ Greek small letter sigma &sigma; &#963; &#x3C3; σ σ σ
82
+ Greek small letter tau &tau; &#964; &#x3C4; τ τ τ
83
+ Greek small letter upsilon &upsilon; &#965; &#x3C5; υ υ υ
84
+ Greek small letter phi &phi; &#966; &#x3C6; φ φ φ
85
+ Greek small letter chi &chi; &#967; &#x3C7; χ χ χ
86
+ Greek small letter psi &psi; &#968; &#x3C8; ψ ψ ψ
87
+ Greek small letter omega &omega; &#969; &#x3C9; ω ω ω
88
+ Greek small letter theta symbol &thetasym; &#977; &#x3D1; ϑ ϑ ϑ
89
+ Greek upsilon with hook symbol &upsih; &#978; &#x3D2; ϒ ϒ ϒ
90
+ Greek pi symbol &piv; &#982; &#x3D6; ϖ ϖ ϖ
91
+ bullet = black small circle &bull; &#8226; &#x2022; • • •
92
+ horizontal ellipsis = three dot leader &hellip; &#8230; &#x2026; … … …
93
+ prime = minutes = feet &prime; &#8242; &#x2032; ′ ′ ′
94
+ double prime = seconds = inches &Prime; &#8243; &#x2033; ″ ″ ″
95
+ overline = spacing overscore &oline; &#8254; &#x203E; ‾ ‾ ‾
96
+ fraction slash &frasl; &#8260; &#x2044; ⁄ ⁄ ⁄
97
+ script capital P = power set = Weierstrass p &weierp; &#8472; &#x2118; ℘ ℘ ℘
98
+ blackletter capital I = imaginary part &image; &#8465; &#x2111; ℑ ℑ ℑ
99
+ blackletter capital R = real part symbol &real; &#8476; &#x211C; ℜ ℜ ℜ
100
+ trade mark sign &trade; &#8482; &#x2122; ™ ™ ™
101
+ alef symbol = first transfinite cardinal &alefsym; &#8501; &#x2135; ℵ ℵ ℵ
102
+ leftwards arrow &larr; &#8592; &#x2190; ← ← ←
103
+ upwards arrow &uarr; &#8593; &#x2191; ↑ ↑ ↑
104
+ rightwards arrow &rarr; &#8594; &#x2192; → → →
105
+ downwards arrow &darr; &#8595; &#x2193; ↓ ↓ ↓
106
+ left right arrow &harr; &#8596; &#x2194; ↔ ↔ ↔
107
+ downwards arrow with corner leftwards = carriage return &crarr; &#8629; &#x21B5; ↵ ↵ ↵
108
+ leftwards double arrow &lArr; &#8656; &#x21D0; ⇐ ⇐ ⇐
109
+ upwards double arrow &uArr; &#8657; &#x21D1; ⇑ ⇑ ⇑
110
+ rightwards double arrow &rArr; &#8658; &#x21D2; ⇒ ⇒ ⇒
111
+ downwards double arrow &dArr; &#8659; &#x21D3; ⇓ ⇓ ⇓
112
+ left right double arrow &hArr; &#8660; &#x21D4; ⇔ ⇔ ⇔
113
+ for all &forall; &#8704; &#x2200; ∀ ∀ ∀
114
+ partial differential &part; &#8706; &#x2202; ∂ ∂ ∂
115
+ there exists &exist; &#8707; &#x2203; ∃ ∃ ∃
116
+ empty set = null set = diameter &empty; &#8709; &#x2205; ∅ ∅ ∅
117
+ nabla = backward difference &nabla; &#8711; &#x2207; ∇ ∇ ∇
118
+ element of &isin; &#8712; &#x2208; ∈ ∈ ∈
119
+ not an element of &notin; &#8713; &#x2209; ∉ ∉ ∉
120
+ contains as member &ni; &#8715; &#x220B; ∋ ∋ ∋
121
+ n-ary product = product sign &prod; &#8719; &#x220F; ∏ ∏ ∏
122
+ n-ary sumation &sum; &#8721; &#x2211; ∑ ∑ ∑
123
+ minus sign &minus; &#8722; &#x2212; − − −
124
+ asterisk operator &lowast; &#8727; &#x2217; ∗ ∗ ∗
125
+ square root = radical sign &radic; &#8730; &#x221A; √ √ √
126
+ proportional to &prop; &#8733; &#x221D; ∝ ∝ ∝
127
+ infinity &infin; &#8734; &#x221E; ∞ ∞ ∞
128
+ angle &ang; &#8736; &#x2220; ∠ ∠ ∠
129
+ logical and = wedge &and; &#8743; &#x2227; ∧ ∧ ∧
130
+ logical or = vee &or; &#8744; &#x2228; ∨ ∨ ∨
131
+ intersection = cap &cap; &#8745; &#x2229; ∩ ∩ ∩
132
+ union = cup &cup; &#8746; &#x222A; ∪ ∪ ∪
133
+ integral &int; &#8747; &#x222B; ∫ ∫ ∫
134
+ therefore &there4; &#8756; &#x2234; ∴ ∴ ∴
135
+ tilde operator = varies with = similar to &sim; &#8764; &#x223C; ∼ ∼ ∼
136
+ approximately equal to &cong; &#8773; &#x2245; ≅ ≅ ≅
137
+ almost equal to = asymptotic to &asymp; &#8776; &#x2248; ≈ ≈ ≈
138
+ not equal to &ne; &#8800; &#x2260; ≠ ≠ ≠
139
+ identical to &equiv; &#8801; &#x2261; ≡ ≡ ≡
140
+ less-than or equal to &le; &#8804; &#x2264; ≤ ≤ ≤
141
+ greater-than or equal to &ge; &#8805; &#x2265; ≥ ≥ ≥
142
+ subset of &sub; &#8834; &#x2282; ⊂ ⊂ ⊂
143
+ superset of &sup; &#8835; &#x2283; ⊃ ⊃ ⊃
144
+ not a subset of &nsub; &#8836; &#x2284; ⊄ ⊄ ⊄
145
+ subset of or equal to &sube; &#8838; &#x2286; ⊆ ⊆ ⊆
146
+ superset of or equal to &supe; &#8839; &#x2287; ⊇ ⊇ ⊇
147
+ circled plus = direct sum &oplus; &#8853; &#x2295; ⊕ ⊕ ⊕
148
+ circled times = vector product &otimes; &#8855; &#x2297; ⊗ ⊗ ⊗
149
+ up tack = orthogonal to = perpendicular &perp; &#8869; &#x22A5; ⊥ ⊥ ⊥
150
+ dot operator &sdot; &#8901; &#x22C5; ⋅ ⋅ ⋅
151
+ left ceiling = APL upstile &lceil; &#8968; &#x2308; ⌈ ⌈ ⌈
152
+ right ceiling &rceil; &#8969; &#x2309; ⌉ ⌉ ⌉
153
+ left floor = APL downstile &lfloor; &#8970; &#x230A; ⌊ ⌊ ⌊
154
+ right floor &rfloor; &#8971; &#x230B; ⌋ ⌋ ⌋
155
+ left-pointing angle bracket = bra &lang; &#9001; &#x2329; 〈 〈 〈
156
+ right-pointing angle bracket = ket &rang; &#9002; &#x232A; 〉 〉 〉
157
+ lozenge &loz; &#9674; &#x25CA; ◊ ◊ ◊
158
+ black spade suit &spades; &#9824; &#x2660; ♠ ♠ ♠
159
+ black club suit = shamrock &clubs; &#9827; &#x2663; ♣ ♣ ♣
160
+ black heart suit = valentine &hearts; &#9829; &#x2665; ♥ ♥ ♥
161
+ black diamond suit &diams; &#9830; &#x2666; ♦ ♦ ♦
162
+ inverted exclamation mark &iexcl; &#161; &#xA1; ¡ ¡ ¡
163
+ cent sign &cent; &#162; &#xA2; ¢ ¢ ¢
164
+ pound sign &pound; &#163; &#xA3; £ £ £
165
+ currency sign &curren; &#164; &#xA4; ¤ ¤ ¤
166
+ yen sign = yuan sign &yen; &#165; &#xA5; ¥ ¥ ¥
167
+ broken bar = broken vertical bar &brvbar; &#166; &#xA6; ¦ ¦ ¦
168
+ section sign &sect; &#167; &#xA7; § § §
169
+ diaeresis = spacing diaeresis &uml; &#168; &#xA8; ¨ ¨ ¨
170
+ copyright sign &copy; &#169; &#xA9; © © ©
171
+ feminine ordinal indicator &ordf; &#170; &#xAA; ª ª ª
172
+ left-pointing double angle quotation mark = left pointing guillemet &laquo; &#171; &#xAB; « « «
173
+ not sign &not; &#172; &#xAC; ¬ ¬ ¬
174
+ soft hyphen = discretionary hyphen &shy; &#173; &#xAD; ­ ­ ­
175
+ registered sign = registered trade mark sign &reg; &#174; &#xAE; ® ® ®
176
+ macron = spacing macron = overline = APL overbar &macr; &#175; &#xAF; ¯ ¯ ¯
177
+ degree sign &deg; &#176; &#xB0; ° ° °
178
+ plus-minus sign = plus-or-minus sign &plusmn; &#177; &#xB1; ± ± ±
179
+ superscript two = superscript digit two = squared &sup2; &#178; &#xB2; ² ² ²
180
+ superscript three = superscript digit three = cubed &sup3; &#179; &#xB3; ³ ³ ³
181
+ acute accent = spacing acute &acute; &#180; &#xB4; ´ ´ ´
182
+ micro sign &micro; &#181; &#xB5; µ µ µ
183
+ pilcrow sign = paragraph sign &para; &#182; &#xB6; ¶ ¶ ¶
184
+ middle dot = Georgian comma = Greek middle dot &middot; &#183; &#xB7; · · ·
185
+ cedilla = spacing cedilla &cedil; &#184; &#xB8; ¸ ¸ ¸
186
+ superscript one = superscript digit one &sup1; &#185; &#xB9; ¹ ¹ ¹
187
+ masculine ordinal indicator &ordm; &#186; &#xBA; º º º
188
+ right-pointing double angle quotation mark = right pointing guillemet &raquo; &#187; &#xBB; » » »
189
+ vulgar fraction one quarter = fraction one quarter &frac14; &#188; &#xBC; ¼ ¼ ¼
190
+ vulgar fraction one half = fraction one half &frac12; &#189; &#xBD; ½ ½ ½
191
+ vulgar fraction three quarters = fraction three quarters &frac34; &#190; &#xBE; ¾ ¾ ¾
192
+ inverted question mark = turned question mark &iquest; &#191; &#xBF; ¿ ¿ ¿
193
+ Latin capital letter A with grave = Latin capital letter A grave &Agrave; &#192; &#xC0; À À À
194
+ Latin capital letter A with acute &Aacute; &#193; &#xC1; Á Á Á
195
+ Latin capital letter A with circumflex &Acirc; &#194; &#xC2; Â Â Â
196
+ Latin capital letter A with tilde &Atilde; &#195; &#xC3; Ã Ã Ã
197
+ Latin capital letter A with diaeresis &Auml; &#196; &#xC4; Ä Ä Ä
198
+ Latin capital letter A with ring above = Latin capital letter A ring &Aring; &#197; &#xC5; Å Å Å
199
+ Latin capital letter AE = Latin capital ligature AE &AElig; &#198; &#xC6; Æ Æ Æ
200
+ Latin capital letter C with cedilla &Ccedil; &#199; &#xC7; Ç Ç Ç
201
+ Latin capital letter E with grave &Egrave; &#200; &#xC8; È È È
202
+ Latin capital letter E with acute &Eacute; &#201; &#xC9; É É É
203
+ Latin capital letter E with circumflex &Ecirc; &#202; &#xCA; Ê Ê Ê
204
+ Latin capital letter E with diaeresis &Euml; &#203; &#xCB; Ë Ë Ë
205
+ Latin capital letter I with grave &Igrave; &#204; &#xCC; Ì Ì Ì
206
+ Latin capital letter I with acute &Iacute; &#205; &#xCD; Í Í Í
207
+ Latin capital letter I with circumflex &Icirc; &#206; &#xCE; Î Î Î
208
+ Latin capital letter I with diaeresis &Iuml; &#207; &#xCF; Ï Ï Ï
209
+ Latin capital letter ETH &ETH; &#208; &#xD0; Ð Ð Ð
210
+ Latin capital letter N with tilde &Ntilde; &#209; &#xD1; Ñ Ñ Ñ
211
+ Latin capital letter O with grave &Ograve; &#210; &#xD2; Ò Ò Ò
212
+ Latin capital letter O with acute &Oacute; &#211; &#xD3; Ó Ó Ó
213
+ Latin capital letter O with circumflex &Ocirc; &#212; &#xD4; Ô Ô Ô
214
+ Latin capital letter O with tilde &Otilde; &#213; &#xD5; Õ Õ Õ
215
+ Latin capital letter O with diaeresis &Ouml; &#214; &#xD6; Ö Ö Ö
216
+ multiplication sign &times; &#215; &#xD7; × × ×
217
+ Latin capital letter O with stroke = Latin capital letter O slash &Oslash; &#216; &#xD8; Ø Ø Ø
218
+ Latin capital letter U with grave &Ugrave; &#217; &#xD9; Ù Ù Ù
219
+ Latin capital letter U with acute &Uacute; &#218; &#xDA; Ú Ú Ú
220
+ Latin capital letter U with circumflex &Ucirc; &#219; &#xDB; Û Û Û
221
+ Latin capital letter U with diaeresis &Uuml; &#220; &#xDC; Ü Ü Ü
222
+ Latin capital letter Y with acute &Yacute; &#221; &#xDD; Ý Ý Ý
223
+ Latin capital letter THORN &THORN; &#222; &#xDE; Þ Þ Þ
224
+ Latin small letter sharp s = ess-zed &szlig; &#223; &#xDF; ß ß ß
225
+ Latin small letter a with grave = Latin small letter a grave &agrave; &#224; &#xE0; à à à
226
+ Latin small letter a with acute &aacute; &#225; &#xE1; á á á
227
+ Latin small letter a with circumflex &acirc; &#226; &#xE2; â â â
228
+ Latin small letter a with tilde &atilde; &#227; &#xE3; ã ã ã
229
+ Latin small letter a with diaeresis &auml; &#228; &#xE4; ä ä ä
230
+ Latin small letter a with ring above = Latin small letter a ring &aring; &#229; &#xE5; å å å
231
+ Latin small letter ae = Latin small ligature ae &aelig; &#230; &#xE6; æ æ æ
232
+ Latin small letter c with cedilla &ccedil; &#231; &#xE7; ç ç ç
233
+ Latin small letter e with grave &egrave; &#232; &#xE8; è è è
234
+ Latin small letter e with acute &eacute; &#233; &#xE9; é é é
235
+ Latin small letter e with circumflex &ecirc; &#234; &#xEA; ê ê ê
236
+ Latin small letter e with diaeresis &euml; &#235; &#xEB; ë ë ë
237
+ Latin small letter i with grave &igrave; &#236; &#xEC; ì ì ì
238
+ Latin small letter i with acute &iacute; &#237; &#xED; í í í
239
+ Latin small letter i with circumflex &icirc; &#238; &#xEE; î î î
240
+ Latin small letter i with diaeresis &iuml; &#239; &#xEF; ï ï ï
241
+ Latin small letter eth &eth; &#240; &#xF0; ð ð ð
242
+ Latin small letter n with tilde &ntilde; &#241; &#xF1; ñ ñ ñ
243
+ Latin small letter o with grave &ograve; &#242; &#xF2; ò ò ò
244
+ Latin small letter o with acute &oacute; &#243; &#xF3; ó ó ó
245
+ Latin small letter o with circumflex &ocirc; &#244; &#xF4; ô ô ô
246
+ Latin small letter o with tilde &otilde; &#245; &#xF5; õ õ õ
247
+ Latin small letter o with diaeresis &ouml; &#246; &#xF6; ö ö ö
248
+ division sign &divide; &#247; &#xF7; ÷ ÷ ÷
249
+ Latin small letter o with stroke = Latin small letter o slash &oslash; &#248; &#xF8; ø ø ø
250
+ Latin small letter u with grave &ugrave; &#249; &#xF9; ù ù ù
251
+ Latin small letter u with acute &uacute; &#250; &#xFA; ú ú ú
252
+ Latin small letter u with circumflex &ucirc; &#251; &#xFB; û û û
253
+ Latin small letter u with diaeresis &uuml; &#252; &#xFC; ü ü ü
254
+ Latin small letter y with acute &yacute; &#253; &#xFD; ý ý ý
255
+ Latin small letter thorn &thorn; &#254; &#xFE; þ þ þ
256
+ Latin small letter y with diaeresis &yuml; &#255; &#xFF; ÿ ÿ ÿ
257
+ EOS
258
+
259
+
260
+
261
+ def self.clean(input_text)
262
+ DICTIONARY.each_line do |line|
263
+ name, html, hex, oct, display, display2, display3 = line.split(/\t/)
264
+ input_text.gsub!(display.strip, html.strip)
265
+ end
266
+ input_text
267
+ end
268
+ end
@@ -0,0 +1,3 @@
1
+ module TextCleaner
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,17 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/text_cleaner/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Jason Amster"]
6
+ gem.email = ["jayamster@gmail.com"]
7
+ gem.description = %q{Simple gem does one thing, and one thing only... Turns funky chars such as [FILL IN HERE] to the proper HTML Encodings such as [FILL IN HERE].}
8
+ gem.summary = %q{Simple gem does one thing, and one thing only... Turns funky chars such as [FILL IN HERE] to the proper HTML Encodings such as [FILL IN HERE].}
9
+ gem.homepage = ""
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "text_cleaner"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = TextCleaner::VERSION
17
+ end
metadata ADDED
@@ -0,0 +1,55 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: text_cleaner
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Jason Amster
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-09-20 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Simple gem does one thing, and one thing only... Turns funky chars such
15
+ as [FILL IN HERE] to the proper HTML Encodings such as [FILL IN HERE].
16
+ email:
17
+ - jayamster@gmail.com
18
+ executables: []
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - .gitignore
23
+ - Gemfile
24
+ - LICENSE
25
+ - README.md
26
+ - Rakefile
27
+ - lib/text_cleaner.rb
28
+ - lib/text_cleaner/version.rb
29
+ - text_cleaner.gemspec
30
+ homepage: ''
31
+ licenses: []
32
+ post_install_message:
33
+ rdoc_options: []
34
+ require_paths:
35
+ - lib
36
+ required_ruby_version: !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ! '>='
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ required_rubygems_version: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ requirements: []
49
+ rubyforge_project:
50
+ rubygems_version: 1.8.24
51
+ signing_key:
52
+ specification_version: 3
53
+ summary: Simple gem does one thing, and one thing only... Turns funky chars such as
54
+ [FILL IN HERE] to the proper HTML Encodings such as [FILL IN HERE].
55
+ test_files: []