asciidammit2 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f5a506cf717abed78a7676b3853183c5a8c98664
4
+ data.tar.gz: 933891e3f1c4d3ae86193684fc70b8cdd09b589b
5
+ SHA512:
6
+ metadata.gz: 295a44d49826a51249e031264b8dd25f0abcd161f21ed35abe3afb1792c37dea8235854a8a6109f477585cce44e4dc5ab38fd0eca4ea1335206ebbfb7c5db308
7
+ data.tar.gz: 608f16edac803b6b3aca83ccc34f458ba35d906e0ba3163db820bae5d534bb88627d3c80bc176e6c50dc20ea77ef4565818cd6eba80570b9465818d681c8f852
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in asciidammit2.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Forrest Chang
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,33 @@
1
+ # Asciidammit2
2
+
3
+ A port of asciidammit.py. Originally created to strip out characters
4
+ that were giving our email provider problems. Extracted for general
5
+ use. Another asciidammit gem exists with a different API
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'asciidammit2'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install asciidammit2
22
+
23
+ ## Usage
24
+
25
+ ascii_str = Asciidammit.demoronize(string_with_weird_chars)
26
+
27
+ ## Contributing
28
+
29
+ 1. Fork it ( https://github.com/[my-github-username]/asciidammit2/fork )
30
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
31
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
32
+ 4. Push to the branch (`git push origin my-new-feature`)
33
+ 5. Create a new Pull Request
@@ -0,0 +1,10 @@
1
+ require 'rspec/core/rake_task'
2
+ require 'bundler/gem_tasks'
3
+
4
+ # Default directory to look in is `/specs`
5
+ # Run with `rake spec`
6
+ RSpec::Core::RakeTask.new(:spec) do |task|
7
+ task.rspec_opts = ['--color']
8
+ end
9
+
10
+ task :default => :spec
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'asciidammit/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'asciidammit2'
8
+ spec.version = Asciidammit::VERSION
9
+ spec.authors = ['Forrest Chang']
10
+ spec.email = ['fkc_email-ruby@yahoo.com']
11
+ spec.summary = %q{ A straight port of asciidammit.py.}
12
+ spec.description = %q{ A straight port of asciidammit.py used for a work project, made into a gem to be used elsewhere. There is an asciidammit gem which sports a different interface.}
13
+ spec.homepage = 'https://github.com/fkchang/asciidammit2'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_development_dependency 'bundler', '~> 1.7'
22
+ spec.add_development_dependency 'rake', '~> 10.0'
23
+ spec.add_development_dependency 'rspec'
24
+ spec.add_development_dependency 'rspec-nc'
25
+ end
@@ -0,0 +1,465 @@
1
+ require "asciidammit/version"
2
+
3
+ # ported from asciidammit.py
4
+ # different API and classname (Asciidammit vs AsciiDammit)the the existing asciidammit gem
5
+ module Asciidammit
6
+
7
+ CP1252_CHARS = { 0x80 => ['EUR', 'euro'],
8
+ 0x81 => [' ', ' '],
9
+ 0x82 => [',', 'sbquo'],
10
+ 0x83 => ['f', 'fnof'],
11
+ 0x84 => [',,', 'bdquo'],
12
+ 0x85 => ['...', 'hellip'],
13
+ 0x86 => ['+', 'dagger'],
14
+ 0x87 => ['++', 'Dagger'],
15
+ 0x88 => ['^', 'caret'],
16
+ 0x89 => ['%','%'],
17
+ 0x8A => ['S', 'Scaron'],
18
+ 0x8B => ['<', 'lt;'],
19
+ 0x8C => ['OE', 'OElig'],
20
+ 0x8D => ['?','?'],
21
+ 0x8E => ['Z', 'Z'],
22
+ 0x8F => ['?','?'],
23
+ 0x90 => ['?', '?'],
24
+ 0x91 => ["'", 'lsquo'],
25
+ 0x92 => ["'", 'rsquo'],
26
+ 0x93 => ['"', 'ldquo'],
27
+ 0x94 => ['"', 'rdquo'],
28
+ 0x95 => ['*', 'bull'],
29
+ 0x96 => ['-', 'ndash'],
30
+ 0x97 => ['--', 'mdash'],
31
+ 0x98 => ['~', 'tilde'],
32
+ 0x99 => ['[TM]', 'trade'],
33
+ 0x9a => ['s', 'scaron'],
34
+ 0x9b => ['>', 'gt'],
35
+ 0x9c => ['oe', 'oelig'],
36
+ 0x9d => ['?', '?'],
37
+ 0x9e => ['z', 'z'],
38
+ 0x9f => ['Y', 'Yuml'],
39
+ 0xa0 => [' ', 'nbsp'],
40
+ 0xa1 => ['!', 'iexcl'],
41
+ 0xa2 => ['c', 'cent'],
42
+ 0xa3 => ['GBP', 'pound'],
43
+ 0xa4 => ['$', 'curren'], #This approximation is especially lame.
44
+ 0xa5 => ['YEN', 'yen'],
45
+ 0xa6 => ['|', 'brvbar'],
46
+ 0xa7 => ['S', 'sect'],
47
+ 0xa8 => ['..', 'uml'],
48
+ 0xa9 => ['(c)', 'copy'],
49
+ 0xaa => ['[th]', 'ordf'],
50
+ 0xab => ['<<', 'laquo'],
51
+ 0xac => ['!', 'not'],
52
+ 0xad => [' ', 'shy'],
53
+ 0xae => ['[R]', 'reg'],
54
+ 0xaf => ['-', 'macr'],
55
+ 0xb0 => ['o', 'deg'],
56
+ 0xb1 => ['+-', 'plusmm'],
57
+ 0xb2 => ['2', 'sup2'],
58
+ 0xb3 => ['3', 'sup3'],
59
+ 0xb4 => ["'", 'acute'],
60
+ 0xb5 => ['u', 'micro'],
61
+ 0xb6 => ['P', 'para'],
62
+ 0xb7 => ['*', 'middot'],
63
+ 0xb8 => [',', 'cedil'],
64
+ 0xb9 => ['1', 'sup1'],
65
+ 0xba => ['[th]', 'ordm'],
66
+ 0xbb => ['>>', 'raquo'],
67
+ 0xbc => ['1/4', 'frac14'],
68
+ 0xbd => ['1/2', 'frac12'],
69
+ 0xbe => ['3/4', 'frac34'],
70
+ 0xbf => ['?', 'iquest'],
71
+ 0xc0 => ['A', "Agrave"],
72
+ 0xc1 => ['A', "Aacute"],
73
+ 0xc2 => ['A', "Acirc"],
74
+ 0xc3 => ['A', "Atilde"],
75
+ 0xc4 => ['A', "Auml"],
76
+ 0xc5 => ['A', "Aring"],
77
+ 0xc6 => ['AE', "Aelig"],
78
+ 0xc7 => ['C', "Ccedil"],
79
+ 0xc8 => ['E', "Egrave"],
80
+ 0xc9 => ['E', "Eacute"],
81
+ 0xca => ['E', "Ecirc"],
82
+ 0xcb => ['E', "Euml"],
83
+ 0xcc => ['I', "Igrave"],
84
+ 0xcd => ['I', "Iacute"],
85
+ 0xce => ['I', "Icirc"],
86
+ 0xcf => ['I', "Iuml"],
87
+ 0xd0 => ['D', "Eth"],
88
+ 0xd1 => ['N', "Ntilde"],
89
+ 0xd2 => ['O', "Ograve"],
90
+ 0xd3 => ['O', "Oacute"],
91
+ 0xd4 => ['O', "Ocirc"],
92
+ 0xd5 => ['O', "Otilde"],
93
+ 0xd6 => ['O', "Ouml"],
94
+ 0xd7 => ['*', "times"],
95
+ 0xd8 => ['O', "Oslash"],
96
+ 0xd9 => ['U', "Ugrave"],
97
+ 0xda => ['U', "Uacute"],
98
+ 0xdb => ['U', "Ucirc"],
99
+ 0xdc => ['U', "Uuml"],
100
+ 0xdd => ['Y', "Yacute"],
101
+ 0xde => ['b', "Thorn"],
102
+ 0xdf => ['B', "szlig"],
103
+ 0xe0 => ['a', "agrave"],
104
+ 0xe1 => ['a', "aacute"],
105
+ 0xe2 => ['a', "acirc"],
106
+ 0xe3 => ['a', "atilde"],
107
+ 0xe4 => ['a', "auml"],
108
+ 0xe5 => ['a', "aring"],
109
+ 0xe6 => ['ae', "aelig"],
110
+ 0xe7 => ['c', "ccedil"],
111
+ 0xe8 => ['e', "egrave"],
112
+ 0xe9 => ['e', "eacute"],
113
+ 0xea => ['e', "ecirc"],
114
+ 0xeb => ['e', "euml"],
115
+ 0xec => ['i', "igrave"],
116
+ 0xed => ['i', "iacute"],
117
+ 0xee => ['i', "icirc"],
118
+ 0xef => ['i', "iuml"],
119
+ 0xf0 => ['o', "eth"],
120
+ 0xf1 => ['n', "ntilde"],
121
+ 0xf2 => ['o', "ograve"],
122
+ 0xf3 => ['o', "oacute"],
123
+ 0xf4 => ['o', "ocirc"],
124
+ 0xf5 => ['o', "otilde"],
125
+ 0xf6 => ['o', "ouml"],
126
+ 0xf7 => ['/', "divide"],
127
+ 0xf8 => ['o', "oslash"],
128
+ 0xf9 => ['u', "ugrave"],
129
+ 0xfa => ['u', "uacute"],
130
+ 0xfb => ['u', "ucirc"],
131
+ 0xfc => ['u', "uuml"],
132
+ 0xfd => ['y', "yacute"],
133
+ 0xfe => ['b', "thorn"],
134
+ 0xff => ['y', "yuml"],
135
+ # added by me for test crashing Vertical Response
136
+ 0x2010 => ['-', "#x2010"],
137
+ 0x2011 => ['-', "#x2011"],
138
+ 0x2012 => ['-', "#x2012"],
139
+ }
140
+
141
+ # from http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
142
+ CP1252_MAP = {
143
+ #
144
+ # Name: cp1252 to Unicode table
145
+ # Unicode version: 2.0
146
+ # Table version: 2.01
147
+ # Table format: Format A
148
+ # Date: 04/15/98
149
+ #
150
+ # Contact: Shawn.Steele@microsoft.com
151
+ #
152
+ # General notes: none
153
+ #
154
+ # Format: Three tab-separated columns
155
+ # Column #1 is the cp1252 code (in hex)
156
+ # Column #2 is the Unicode (in hex as 0xXXXX)
157
+ # Column #3 is the Unicode name (follows a comment sign, '#')
158
+ #
159
+ # The entries are in cp1252 order
160
+ #
161
+ 0x00 => 0x0000, #NULL
162
+ 0x01 => 0x0001, #START OF HEADING
163
+ 0x02 => 0x0002, #START OF TEXT
164
+ 0x03 => 0x0003, #END OF TEXT
165
+ 0x04 => 0x0004, #END OF TRANSMISSION
166
+ 0x05 => 0x0005, #ENQUIRY
167
+ 0x06 => 0x0006, #ACKNOWLEDGE
168
+ 0x07 => 0x0007, #BELL
169
+ 0x08 => 0x0008, #BACKSPACE
170
+ 0x09 => 0x0009, #HORIZONTAL TABULATION
171
+ 0x0A => 0x000A, #LINE FEED
172
+ 0x0B => 0x000B, #VERTICAL TABULATION
173
+ 0x0C => 0x000C, #FORM FEED
174
+ 0x0D => 0x000D, #CARRIAGE RETURN
175
+ 0x0E => 0x000E, #SHIFT OUT
176
+ 0x0F => 0x000F, #SHIFT IN
177
+ 0x10 => 0x0010, #DATA LINK ESCAPE
178
+ 0x11 => 0x0011, #DEVICE CONTROL ONE
179
+ 0x12 => 0x0012, #DEVICE CONTROL TWO
180
+ 0x13 => 0x0013, #DEVICE CONTROL THREE
181
+ 0x14 => 0x0014, #DEVICE CONTROL FOUR
182
+ 0x15 => 0x0015, #NEGATIVE ACKNOWLEDGE
183
+ 0x16 => 0x0016, #SYNCHRONOUS IDLE
184
+ 0x17 => 0x0017, #END OF TRANSMISSION BLOCK
185
+ 0x18 => 0x0018, #CANCEL
186
+ 0x19 => 0x0019, #END OF MEDIUM
187
+ 0x1A => 0x001A, #SUBSTITUTE
188
+ 0x1B => 0x001B, #ESCAPE
189
+ 0x1C => 0x001C, #FILE SEPARATOR
190
+ 0x1D => 0x001D, #GROUP SEPARATOR
191
+ 0x1E => 0x001E, #RECORD SEPARATOR
192
+ 0x1F => 0x001F, #UNIT SEPARATOR
193
+ 0x20 => 0x0020, #SPACE
194
+ 0x21 => 0x0021, #EXCLAMATION MARK
195
+ 0x22 => 0x0022, #QUOTATION MARK
196
+ 0x23 => 0x0023, #NUMBER SIGN
197
+ 0x24 => 0x0024, #DOLLAR SIGN
198
+ 0x25 => 0x0025, #PERCENT SIGN
199
+ 0x26 => 0x0026, #AMPERSAND
200
+ 0x27 => 0x0027, #APOSTROPHE
201
+ 0x28 => 0x0028, #LEFT PARENTHESIS
202
+ 0x29 => 0x0029, #RIGHT PARENTHESIS
203
+ 0x2A => 0x002A, #ASTERISK
204
+ 0x2B => 0x002B, #PLUS SIGN
205
+ 0x2C => 0x002C, #COMMA
206
+ 0x2D => 0x002D, #HYPHEN-MINUS
207
+ 0x2E => 0x002E, #FULL STOP
208
+ 0x2F => 0x002F, #SOLIDUS
209
+ 0x30 => 0x0030, #DIGIT ZERO
210
+ 0x31 => 0x0031, #DIGIT ONE
211
+ 0x32 => 0x0032, #DIGIT TWO
212
+ 0x33 => 0x0033, #DIGIT THREE
213
+ 0x34 => 0x0034, #DIGIT FOUR
214
+ 0x35 => 0x0035, #DIGIT FIVE
215
+ 0x36 => 0x0036, #DIGIT SIX
216
+ 0x37 => 0x0037, #DIGIT SEVEN
217
+ 0x38 => 0x0038, #DIGIT EIGHT
218
+ 0x39 => 0x0039, #DIGIT NINE
219
+ 0x3A => 0x003A, #COLON
220
+ 0x3B => 0x003B, #SEMICOLON
221
+ 0x3C => 0x003C, #LESS-THAN SIGN
222
+ 0x3D => 0x003D, #EQUALS SIGN
223
+ 0x3E => 0x003E, #GREATER-THAN SIGN
224
+ 0x3F => 0x003F, #QUESTION MARK
225
+ 0x40 => 0x0040, #COMMERCIAL AT
226
+ 0x41 => 0x0041, #LATIN CAPITAL LETTER A
227
+ 0x42 => 0x0042, #LATIN CAPITAL LETTER B
228
+ 0x43 => 0x0043, #LATIN CAPITAL LETTER C
229
+ 0x44 => 0x0044, #LATIN CAPITAL LETTER D
230
+ 0x45 => 0x0045, #LATIN CAPITAL LETTER E
231
+ 0x46 => 0x0046, #LATIN CAPITAL LETTER F
232
+ 0x47 => 0x0047, #LATIN CAPITAL LETTER G
233
+ 0x48 => 0x0048, #LATIN CAPITAL LETTER H
234
+ 0x49 => 0x0049, #LATIN CAPITAL LETTER I
235
+ 0x4A => 0x004A, #LATIN CAPITAL LETTER J
236
+ 0x4B => 0x004B, #LATIN CAPITAL LETTER K
237
+ 0x4C => 0x004C, #LATIN CAPITAL LETTER L
238
+ 0x4D => 0x004D, #LATIN CAPITAL LETTER M
239
+ 0x4E => 0x004E, #LATIN CAPITAL LETTER N
240
+ 0x4F => 0x004F, #LATIN CAPITAL LETTER O
241
+ 0x50 => 0x0050, #LATIN CAPITAL LETTER P
242
+ 0x51 => 0x0051, #LATIN CAPITAL LETTER Q
243
+ 0x52 => 0x0052, #LATIN CAPITAL LETTER R
244
+ 0x53 => 0x0053, #LATIN CAPITAL LETTER S
245
+ 0x54 => 0x0054, #LATIN CAPITAL LETTER T
246
+ 0x55 => 0x0055, #LATIN CAPITAL LETTER U
247
+ 0x56 => 0x0056, #LATIN CAPITAL LETTER V
248
+ 0x57 => 0x0057, #LATIN CAPITAL LETTER W
249
+ 0x58 => 0x0058, #LATIN CAPITAL LETTER X
250
+ 0x59 => 0x0059, #LATIN CAPITAL LETTER Y
251
+ 0x5A => 0x005A, #LATIN CAPITAL LETTER Z
252
+ 0x5B => 0x005B, #LEFT SQUARE BRACKET
253
+ 0x5C => 0x005C, #REVERSE SOLIDUS
254
+ 0x5D => 0x005D, #RIGHT SQUARE BRACKET
255
+ 0x5E => 0x005E, #CIRCUMFLEX ACCENT
256
+ 0x5F => 0x005F, #LOW LINE
257
+ 0x60 => 0x0060, #GRAVE ACCENT
258
+ 0x61 => 0x0061, #LATIN SMALL LETTER A
259
+ 0x62 => 0x0062, #LATIN SMALL LETTER B
260
+ 0x63 => 0x0063, #LATIN SMALL LETTER C
261
+ 0x64 => 0x0064, #LATIN SMALL LETTER D
262
+ 0x65 => 0x0065, #LATIN SMALL LETTER E
263
+ 0x66 => 0x0066, #LATIN SMALL LETTER F
264
+ 0x67 => 0x0067, #LATIN SMALL LETTER G
265
+ 0x68 => 0x0068, #LATIN SMALL LETTER H
266
+ 0x69 => 0x0069, #LATIN SMALL LETTER I
267
+ 0x6A => 0x006A, #LATIN SMALL LETTER J
268
+ 0x6B => 0x006B, #LATIN SMALL LETTER K
269
+ 0x6C => 0x006C, #LATIN SMALL LETTER L
270
+ 0x6D => 0x006D, #LATIN SMALL LETTER M
271
+ 0x6E => 0x006E, #LATIN SMALL LETTER N
272
+ 0x6F => 0x006F, #LATIN SMALL LETTER O
273
+ 0x70 => 0x0070, #LATIN SMALL LETTER P
274
+ 0x71 => 0x0071, #LATIN SMALL LETTER Q
275
+ 0x72 => 0x0072, #LATIN SMALL LETTER R
276
+ 0x73 => 0x0073, #LATIN SMALL LETTER S
277
+ 0x74 => 0x0074, #LATIN SMALL LETTER T
278
+ 0x75 => 0x0075, #LATIN SMALL LETTER U
279
+ 0x76 => 0x0076, #LATIN SMALL LETTER V
280
+ 0x77 => 0x0077, #LATIN SMALL LETTER W
281
+ 0x78 => 0x0078, #LATIN SMALL LETTER X
282
+ 0x79 => 0x0079, #LATIN SMALL LETTER Y
283
+ 0x7A => 0x007A, #LATIN SMALL LETTER Z
284
+ 0x7B => 0x007B, #LEFT CURLY BRACKET
285
+ 0x7C => 0x007C, #VERTICAL LINE
286
+ 0x7D => 0x007D, #RIGHT CURLY BRACKET
287
+ 0x7E => 0x007E, #TILDE
288
+ 0x7F => 0x007F, #DELETE
289
+ 0x80 => 0x20AC, #EURO SIGN
290
+ 0x81 => nil, #UNDEFINED,
291
+ 0x82 => 0x201A, #SINGLE LOW-9 QUOTATION MARK
292
+ 0x83 => 0x0192, #LATIN SMALL LETTER F WITH HOOK
293
+ 0x84 => 0x201E, #DOUBLE LOW-9 QUOTATION MARK
294
+ 0x85 => 0x2026, #HORIZONTAL ELLIPSIS
295
+ 0x86 => 0x2020, #DAGGER
296
+ 0x87 => 0x2021, #DOUBLE DAGGER
297
+ 0x88 => 0x02C6, #MODIFIER LETTER CIRCUMFLEX ACCENT
298
+ 0x89 => 0x2030, #PER MILLE SIGN
299
+ 0x8A => 0x0160, #LATIN CAPITAL LETTER S WITH CARON
300
+ 0x8B => 0x2039, #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
301
+ 0x8C => 0x0152, #LATIN CAPITAL LIGATURE OE
302
+ 0x8D => nil, #UNDEFINED,
303
+ 0x8E => 0x017D, #LATIN CAPITAL LETTER Z WITH CARON
304
+ 0x8F => nil, #UNDEFINED,
305
+ 0x90 => nil, #UNDEFINED,
306
+ 0x91 => 0x2018, #LEFT SINGLE QUOTATION MARK
307
+ 0x92 => 0x2019, #RIGHT SINGLE QUOTATION MARK
308
+ 0x93 => 0x201C, #LEFT DOUBLE QUOTATION MARK
309
+ 0x94 => 0x201D, #RIGHT DOUBLE QUOTATION MARK
310
+ 0x95 => 0x2022, #BULLET
311
+ 0x96 => 0x2013, #EN DASH
312
+ 0x97 => 0x2014, #EM DASH
313
+ 0x98 => 0x02DC, #SMALL TILDE
314
+ 0x99 => 0x2122, #TRADE MARK SIGN
315
+ 0x9A => 0x0161, #LATIN SMALL LETTER S WITH CARON
316
+ 0x9B => 0x203A, #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
317
+ 0x9C => 0x0153, #LATIN SMALL LIGATURE OE
318
+ 0x9D => nil, #UNDEFINED,
319
+ 0x9E => 0x017E, #LATIN SMALL LETTER Z WITH CARON
320
+ 0x9F => 0x0178, #LATIN CAPITAL LETTER Y WITH DIAERESIS
321
+ 0xA0 => 0x00A0, #NO-BREAK SPACE
322
+ 0xA1 => 0x00A1, #INVERTED EXCLAMATION MARK
323
+ 0xA2 => 0x00A2, #CENT SIGN
324
+ 0xA3 => 0x00A3, #POUND SIGN
325
+ 0xA4 => 0x00A4, #CURRENCY SIGN
326
+ 0xA5 => 0x00A5, #YEN SIGN
327
+ 0xA6 => 0x00A6, #BROKEN BAR
328
+ 0xA7 => 0x00A7, #SECTION SIGN
329
+ 0xA8 => 0x00A8, #DIAERESIS
330
+ 0xA9 => 0x00A9, #COPYRIGHT SIGN
331
+ 0xAA => 0x00AA, #FEMININE ORDINAL INDICATOR
332
+ 0xAB => 0x00AB, #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
333
+ 0xAC => 0x00AC, #NOT SIGN
334
+ 0xAD => 0x00AD, #SOFT HYPHEN
335
+ 0xAE => 0x00AE, #REGISTERED SIGN
336
+ 0xAF => 0x00AF, #MACRON
337
+ 0xB0 => 0x00B0, #DEGREE SIGN
338
+ 0xB1 => 0x00B1, #PLUS-MINUS SIGN
339
+ 0xB2 => 0x00B2, #SUPERSCRIPT TWO
340
+ 0xB3 => 0x00B3, #SUPERSCRIPT THREE
341
+ 0xB4 => 0x00B4, #ACUTE ACCENT
342
+ 0xB5 => 0x00B5, #MICRO SIGN
343
+ 0xB6 => 0x00B6, #PILCROW SIGN
344
+ 0xB7 => 0x00B7, #MIDDLE DOT
345
+ 0xB8 => 0x00B8, #CEDILLA
346
+ 0xB9 => 0x00B9, #SUPERSCRIPT ONE
347
+ 0xBA => 0x00BA, #MASCULINE ORDINAL INDICATOR
348
+ 0xBB => 0x00BB, #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
349
+ 0xBC => 0x00BC, #VULGAR FRACTION ONE QUARTER
350
+ 0xBD => 0x00BD, #VULGAR FRACTION ONE HALF
351
+ 0xBE => 0x00BE, #VULGAR FRACTION THREE QUARTERS
352
+ 0xBF => 0x00BF, #INVERTED QUESTION MARK
353
+ 0xC0 => 0x00C0, #LATIN CAPITAL LETTER A WITH GRAVE
354
+ 0xC1 => 0x00C1, #LATIN CAPITAL LETTER A WITH ACUTE
355
+ 0xC2 => 0x00C2, #LATIN CAPITAL LETTER A WITH CIRCUMFLEX
356
+ 0xC3 => 0x00C3, #LATIN CAPITAL LETTER A WITH TILDE
357
+ 0xC4 => 0x00C4, #LATIN CAPITAL LETTER A WITH DIAERESIS
358
+ 0xC5 => 0x00C5, #LATIN CAPITAL LETTER A WITH RING ABOVE
359
+ 0xC6 => 0x00C6, #LATIN CAPITAL LETTER AE
360
+ 0xC7 => 0x00C7, #LATIN CAPITAL LETTER C WITH CEDILLA
361
+ 0xC8 => 0x00C8, #LATIN CAPITAL LETTER E WITH GRAVE
362
+ 0xC9 => 0x00C9, #LATIN CAPITAL LETTER E WITH ACUTE
363
+ 0xCA => 0x00CA, #LATIN CAPITAL LETTER E WITH CIRCUMFLEX
364
+ 0xCB => 0x00CB, #LATIN CAPITAL LETTER E WITH DIAERESIS
365
+ 0xCC => 0x00CC, #LATIN CAPITAL LETTER I WITH GRAVE
366
+ 0xCD => 0x00CD, #LATIN CAPITAL LETTER I WITH ACUTE
367
+ 0xCE => 0x00CE, #LATIN CAPITAL LETTER I WITH CIRCUMFLEX
368
+ 0xCF => 0x00CF, #LATIN CAPITAL LETTER I WITH DIAERESIS
369
+ 0xD0 => 0x00D0, #LATIN CAPITAL LETTER ETH
370
+ 0xD1 => 0x00D1, #LATIN CAPITAL LETTER N WITH TILDE
371
+ 0xD2 => 0x00D2, #LATIN CAPITAL LETTER O WITH GRAVE
372
+ 0xD3 => 0x00D3, #LATIN CAPITAL LETTER O WITH ACUTE
373
+ 0xD4 => 0x00D4, #LATIN CAPITAL LETTER O WITH CIRCUMFLEX
374
+ 0xD5 => 0x00D5, #LATIN CAPITAL LETTER O WITH TILDE
375
+ 0xD6 => 0x00D6, #LATIN CAPITAL LETTER O WITH DIAERESIS
376
+ 0xD7 => 0x00D7, #MULTIPLICATION SIGN
377
+ 0xD8 => 0x00D8, #LATIN CAPITAL LETTER O WITH STROKE
378
+ 0xD9 => 0x00D9, #LATIN CAPITAL LETTER U WITH GRAVE
379
+ 0xDA => 0x00DA, #LATIN CAPITAL LETTER U WITH ACUTE
380
+ 0xDB => 0x00DB, #LATIN CAPITAL LETTER U WITH CIRCUMFLEX
381
+ 0xDC => 0x00DC, #LATIN CAPITAL LETTER U WITH DIAERESIS
382
+ 0xDD => 0x00DD, #LATIN CAPITAL LETTER Y WITH ACUTE
383
+ 0xDE => 0x00DE, #LATIN CAPITAL LETTER THORN
384
+ 0xDF => 0x00DF, #LATIN SMALL LETTER SHARP S
385
+ 0xE0 => 0x00E0, #LATIN SMALL LETTER A WITH GRAVE
386
+ 0xE1 => 0x00E1, #LATIN SMALL LETTER A WITH ACUTE
387
+ 0xE2 => 0x00E2, #LATIN SMALL LETTER A WITH CIRCUMFLEX
388
+ 0xE3 => 0x00E3, #LATIN SMALL LETTER A WITH TILDE
389
+ 0xE4 => 0x00E4, #LATIN SMALL LETTER A WITH DIAERESIS
390
+ 0xE5 => 0x00E5, #LATIN SMALL LETTER A WITH RING ABOVE
391
+ 0xE6 => 0x00E6, #LATIN SMALL LETTER AE
392
+ 0xE7 => 0x00E7, #LATIN SMALL LETTER C WITH CEDILLA
393
+ 0xE8 => 0x00E8, #LATIN SMALL LETTER E WITH GRAVE
394
+ 0xE9 => 0x00E9, #LATIN SMALL LETTER E WITH ACUTE
395
+ 0xEA => 0x00EA, #LATIN SMALL LETTER E WITH CIRCUMFLEX
396
+ 0xEB => 0x00EB, #LATIN SMALL LETTER E WITH DIAERESIS
397
+ 0xEC => 0x00EC, #LATIN SMALL LETTER I WITH GRAVE
398
+ 0xED => 0x00ED, #LATIN SMALL LETTER I WITH ACUTE
399
+ 0xEE => 0x00EE, #LATIN SMALL LETTER I WITH CIRCUMFLEX
400
+ 0xEF => 0x00EF, #LATIN SMALL LETTER I WITH DIAERESIS
401
+ 0xF0 => 0x00F0, #LATIN SMALL LETTER ETH
402
+ 0xF1 => 0x00F1, #LATIN SMALL LETTER N WITH TILDE
403
+ 0xF2 => 0x00F2, #LATIN SMALL LETTER O WITH GRAVE
404
+ 0xF3 => 0x00F3, #LATIN SMALL LETTER O WITH ACUTE
405
+ 0xF4 => 0x00F4, #LATIN SMALL LETTER O WITH CIRCUMFLEX
406
+ 0xF5 => 0x00F5, #LATIN SMALL LETTER O WITH TILDE
407
+ 0xF6 => 0x00F6, #LATIN SMALL LETTER O WITH DIAERESIS
408
+ 0xF7 => 0x00F7, #DIVISION SIGN
409
+ 0xF8 => 0x00F8, #LATIN SMALL LETTER O WITH STROKE
410
+ 0xF9 => 0x00F9, #LATIN SMALL LETTER U WITH GRAVE
411
+ 0xFA => 0x00FA, #LATIN SMALL LETTER U WITH ACUTE
412
+ 0xFB => 0x00FB, #LATIN SMALL LETTER U WITH CIRCUMFLEX
413
+ 0xFC => 0x00FC, #LATIN SMALL LETTER U WITH DIAERESIS
414
+ 0xFD => 0x00FD, #LATIN SMALL LETTER Y WITH ACUTE
415
+ 0xFE => 0x00FE, #LATIN SMALL LETTER THORN
416
+ 0xFF => 0x00FF, #LATIN SMALL LETTER Y WITH DIAERESIS
417
+ # added by me for crashing VR
418
+ # http://unicode-search.net/unicode-namesearch.pl?term=hyphen
419
+ 0X2010 => 0x2010, #HYPHEN
420
+ 0X2011 => 0x2011, #NON BREAKING HYPHEN
421
+ 0X2012 => 0x2012, #FIGURE DASH
422
+
423
+
424
+ }
425
+
426
+ # from http://redhanded.hobix.com/inspect/closingInOnUnicodeWithJcode.html
427
+ def self.utf_encode( str )
428
+ String.new str.gsub(/U\+([0-9a-fA-F]{4,4})/u){["#$1".hex ].pack('U*')}
429
+ end
430
+
431
+
432
+ UTF8_CHARS = { nil => ""}
433
+ # Too lazy to manually create a table
434
+ # map the cps1252 to the unicode values
435
+ CP1252_CHARS.each { |cps_code, values|
436
+
437
+ if CP1252_MAP[cps_code]
438
+ hex_value = sprintf( '%04X', CP1252_MAP[cps_code])
439
+ plain_ascii = values[0]
440
+ # puts "#{cps_code}, #{values.inspect}, #{CP1252_MAP[cps_code]} #{hex_value}"
441
+ else
442
+ hex_value = sprintf( '%04X', cps_code)
443
+ plain_ascii = ""
444
+ end
445
+ utf_encoded_value = utf_encode "U+#{hex_value}"
446
+ # puts "\thex_value = #{hex_value} utf_encoded_value = #{utf_encoded_value} #{utf_encode "U+2012"}"
447
+ UTF8_CHARS[Regexp.new( utf_encoded_value)] = plain_ascii
448
+ }
449
+
450
+
451
+ def self.demoronize( orig_str)
452
+ # hex_rep_ary = []
453
+ # orig_str.each_byte { |b| hex_rep_ary << sprintf( "%x", b) }
454
+ # hex_rep = hex_rep_ary.join( " ")
455
+ string = orig_str.dup
456
+ UTF8_CHARS.each { |regex, value|
457
+ # puts " regex: #{regex} value: #{value} orig: #{orig_str} : #{hex_rep}"
458
+ if string =~ regex
459
+ # puts "subbing #{regex} for #{value}"
460
+ string.gsub!( regex, value)
461
+ end
462
+ }
463
+ string
464
+ end
465
+ end
@@ -0,0 +1,3 @@
1
+ module Asciidammit
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,32 @@
1
+ # coding: utf-8
2
+ require 'asciidammit'
3
+ describe Asciidammit do
4
+ # keep this for handy string w/lots of symbols
5
+ it "should demoronize the special characters provided by word" do
6
+ Asciidammit.demoronize( "—–‑­ ©®™§¶…‘’“”" ).should == "---- (c)[R][TM]SP...''\"\""
7
+ end
8
+ # the MS Word provided symbols 1 at a time
9
+ [
10
+ ["—", "--" ],
11
+ ["–", "-"],
12
+ ["‑", "-"],
13
+ ["­", " "],
14
+ [" ", " "],
15
+ ["©", "(c)"],
16
+ ["®", "[R]"],
17
+ ["™", "[TM]"],
18
+ ["§", "S"],
19
+ ["¶", "P"],
20
+ ["…", "..."],
21
+ ["‘", "'"],
22
+ ["’", "'"],
23
+ ["“", "\""],
24
+ ["”", "\""],
25
+
26
+ ].each { |ms_symbol, ascii|
27
+ it "should convert #{ms_symbol} to #{ascii}" do
28
+ expect(Asciidammit.demoronize( ms_symbol)).to eq ascii
29
+ end
30
+ }
31
+
32
+ end
metadata ADDED
@@ -0,0 +1,113 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: asciidammit2
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Forrest Chang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec-nc
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: ' A straight port of asciidammit.py used for a work project, made into
70
+ a gem to be used elsewhere. There is an asciidammit gem which sports a different
71
+ interface.'
72
+ email:
73
+ - fkc_email-ruby@yahoo.com
74
+ executables: []
75
+ extensions: []
76
+ extra_rdoc_files: []
77
+ files:
78
+ - .gitignore
79
+ - Gemfile
80
+ - LICENSE.txt
81
+ - README.md
82
+ - Rakefile
83
+ - asciidammit2.gemspec
84
+ - lib/asciidammit.rb
85
+ - lib/asciidammit/version.rb
86
+ - spec/asciidammit_spec.rb
87
+ homepage: https://github.com/fkchang/asciidammit2
88
+ licenses:
89
+ - MIT
90
+ metadata: {}
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - '>='
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - '>='
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ requirements: []
106
+ rubyforge_project:
107
+ rubygems_version: 2.0.6
108
+ signing_key:
109
+ specification_version: 4
110
+ summary: A straight port of asciidammit.py.
111
+ test_files:
112
+ - spec/asciidammit_spec.rb
113
+ has_rdoc: