ansel_iconv 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt ADDED
@@ -0,0 +1,5 @@
1
+ === 1.0.0 / 2009-03-23
2
+
3
+ * Initial public release
4
+
5
+
data/README.txt ADDED
@@ -0,0 +1,37 @@
1
+ = ANSEL::Iconv
2
+
3
+ http://github.com/infused/ansel_iconv/tree/master
4
+
5
+ == DESCRIPTION:
6
+
7
+ Convert ANSEL encoded text to any other encoding available to Iconv
8
+
9
+ == INSTALL:
10
+
11
+ gem install infused-ansel_iconv --source http://gems.github.com
12
+
13
+
14
+ == LICENSE:
15
+
16
+ (The MIT License)
17
+
18
+ Copyright (c) 2006-2009 Keith Morrison <keithm@infused.org>
19
+
20
+ Permission is hereby granted, free of charge, to any person obtaining
21
+ a copy of this software and associated documentation files (the
22
+ 'Software'), to deal in the Software without restriction, including
23
+ without limitation the rights to use, copy, modify, merge, publish,
24
+ distribute, sublicense, and/or sell copies of the Software, and to
25
+ permit persons to whom the Software is furnished to do so, subject to
26
+ the following conditions:
27
+
28
+ The above copyright notice and this permission notice shall be
29
+ included in all copies or substantial portions of the Software.
30
+
31
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
32
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
34
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
35
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
36
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
37
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/VERSION.yml ADDED
@@ -0,0 +1,4 @@
1
+ ---
2
+ :major: 1
3
+ :minor: 0
4
+ :patch: 2
@@ -0,0 +1,631 @@
1
+ require 'activesupport'
2
+ require 'iconv'
3
+
4
+ module ANSEL
5
+ class Iconv
6
+ delegate :iconv, :to => :@converter
7
+
8
+ def initialize(to, from = 'ANSEL')
9
+ @converter = (from == 'ANSEL') ? Convert.new(to) : ::Iconv.new(to, from)
10
+ end
11
+ end
12
+
13
+ class Convert
14
+ @@non_combining = {
15
+ "ERR" => "\xFF\xFD", # � - REPLACEMENT CHARACTER
16
+ "88" => "", # NON-SORT BEGIN / START OF STRING
17
+ "89" => "", # NON-SORT END / STRING TERMINATOR
18
+ "8D" => "", # JOINER / ZERO WIDTH JOINER
19
+ "8E" => "", # NON-JOINER / ZERO WIDTH NON-JOINER
20
+ "A1" => "\x01\x41", # Ł - UPPERCASE POLISH L / LATIN CAPITAL LETTER L WITH STROKE
21
+ "A2" => "\x00\xD8", # Ø - UPPERCASE SCANDINAVIAN O / LATIN CAPITAL LETTER O WITH STROKE
22
+ "A3" => "\x01\x10", # Đ - UPPERCASE D WITH CROSSBAR / LATIN CAPITAL LETTER D WITH STROKE
23
+ "A4" => "\x00\xDE", # Þ - UPPERCASE ICELANDIC THORN / LATIN CAPITAL LETTER THORN (Icelandic)
24
+ "A5" => "\x00\xC6", # Æ - UPPERCASE DIGRAPH AE / LATIN CAPITAL LIGATURE AE
25
+ "A6" => "\x01\x52", # Π- UPPERCASE DIGRAPH OE / LATIN CAPITAL LIGATURE OE
26
+ "A7" => "\x02\xB9", # ʹ - SOFT SIGN, PRIME / MODIFIER LETTER PRIME
27
+ "A8" => "\x00\xB7", # · - MIDDLE DOT
28
+ "A9" => "\x26\x6D", # ♭ - MUSIC FLAT SIGN
29
+ "AA" => "\x00\xAE", # ® - PATENT MARK / REGISTERED SIGN
30
+ "AB" => "\x00\xB1", # ± - PLUS OR MINUS / PLUS-MINUS SIGN
31
+ "AC" => "\x01\xA0", # Ơ - UPPERCASE O-HOOK / LATIN CAPITAL LETTER O WITH HORN
32
+ "AD" => "\x01\xAF", # Ư - UPPERCASE U-HOOK / LATIN CAPITAL LETTER U WITH HORN
33
+ "AE" => "\x02\xBC", # ʼ - ALIF / MODIFIER LETTER APOSTROPHE
34
+ "B0" => "\x02\xBB", # ʻ - AYN / MODIFIER LETTER TURNED COMMA
35
+ "B1" => "\x01\x42", # ł - LOWERCASE POLISH L / LATIN SMALL LETTER L WITH STROKE
36
+ "B2" => "\x00\xF8", # ø - LOWERCASE SCANDINAVIAN O / LATIN SMALL LETTER O WITH STROKE
37
+ "B3" => "\x01\x11", # đ - LOWERCASE D WITH CROSSBAR / LATIN SMALL LETTER D WITH STROKE
38
+ "B4" => "\x00\xFE", # þ - LOWERCASE ICELANDIC THORN / LATIN SMALL LETTER THORN (Icelandic)
39
+ "B5" => "\x00\xE6", # æ - LOWERCASE DIGRAPH AE / LATIN SMALL LIGATURE AE
40
+ "B6" => "\x01\x53", # œ - LOWERCASE DIGRAPH OE / LATIN SMALL LIGATURE OE
41
+ "B7" => "\x02\xBA", # ʺ - HARD SIGN, DOUBLE PRIME / MODIFIER LETTER DOUBLE PRIME
42
+ "B8" => "\x01\x31", # ı - LOWERCASE TURKISH I / LATIN SMALL LETTER DOTLESS I
43
+ "B9" => "\x00\xA3", # £ - BRITISH POUND / POUND SIGN
44
+ "BA" => "\x00\xF0", # ð - LOWERCASE ETH / LATIN SMALL LETTER ETH (Icelandic)
45
+ "BC" => "\x01\xA1", # ơ - LOWERCASE O-HOOK / LATIN SMALL LETTER O WITH HORN
46
+ "BD" => "\x01\xB0", # ư - LOWERCASE U-HOOK / LATIN SMALL LETTER U WITH HORN
47
+ "C0" => "\x00\xB0", # ° - DEGREE SIGN
48
+ "C1" => "\x21\x13", # ℓ - SCRIPT SMALL L
49
+ "C2" => "\x21\x17", # ℗ - SOUND RECORDING COPYRIGHT
50
+ "C3" => "\x00\xA9", # © - COPYRIGHT SIGN
51
+ "C4" => "\x26\x6F", # ♯ - MUSIC SHARP SIGN
52
+ "C5" => "\x00\xBF", # ¿ - INVERTED QUESTION MARK
53
+ "C6" => "\x00\xA1", # ¡ - INVERTED EXCLAMATION MARK
54
+ "C7" => "\x00\xDF", # ß - ESZETT SYMBOL
55
+ "C8" => "\x20\xAC" # € - EURO SIGN
56
+ }
57
+
58
+ @@combining = {
59
+ "E0+41" => "\x1E\xA2", # Ả - LATIN CAPITAL LETTER A WITH HOOK ABOVE
60
+ "E0+45" => "\x1E\xBA", # LATIN CAPITAL LETTER E WITH HOOK ABOVE
61
+ "E0+49" => "\x1E\xC8", # LATIN CAPITAL LETTER I WITH HOOK ABOVE
62
+ "E0+4F" => "\x1E\xCE", # LATIN CAPITAL LETTER O WITH HOOK ABOVE
63
+ "E0+55" => "\x1E\xE6", # LATIN CAPITAL LETTER U WITH HOOK ABOVE
64
+ "E0+59" => "\x1E\xF6", # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
65
+ "E0+61" => "\x1E\xA3", # LATIN SMALL LETTER A WITH HOOK ABOVE
66
+ "E0+65" => "\x1E\xBB", # LATIN SMALL LETTER E WITH HOOK ABOVE
67
+ "E0+69" => "\x1E\xC9", # LATIN SMALL LETTER I WITH HOOK ABOVE
68
+ "E0+6F" => "\x1E\xCF", # LATIN SMALL LETTER O WITH HOOK ABOVE
69
+ "E0+75" => "\x1E\xE7", # LATIN SMALL LETTER U WITH HOOK ABOVE
70
+ "E0+79" => "\x1E\xF7", # LATIN SMALL LETTER Y WITH HOOK ABOVE
71
+ "E0+E3+41" => "\x1E\xA8", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
72
+ "E0+E3+45" => "\x1E\xC2", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
73
+ "E0+E3+4F" => "\x1E\xD4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
74
+ "E0+E3+61" => "\x1E\xA9", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
75
+ "E0+E3+65" => "\x1E\xC3", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
76
+ "E0+E3+6F" => "\x1E\xD5", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
77
+ "E0+E6+41" => "\x1E\xB2", # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
78
+ "E0+E6+61" => "\x1E\xB3", # LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
79
+ "E0" => "\x03\x09", # COMBINING HOOK ABOVE
80
+ "E1+41" => "\x00\xC0", # LATIN CAPITAL LETTER A WITH GRAVE
81
+ "E1+45" => "\x00\xC8", # LATIN CAPITAL LETTER E WITH GRAVE
82
+ "E1+49" => "\x00\xCC", # LATIN CAPITAL LETTER I WITH GRAVE
83
+ "E1+4F" => "\x00\xD2", # LATIN CAPITAL LETTER O WITH GRAVE
84
+ "E1+55" => "\x00\xD9", # LATIN CAPITAL LETTER U WITH GRAVE
85
+ "E1+57" => "\x1E\x80", # LATIN CAPITAL LETTER W WITH GRAVE
86
+ "E1+59" => "\x1E\xF2", # LATIN CAPITAL LETTER Y WITH GRAVE
87
+ "E1+61" => "\x00\xE0", # LATIN SMALL LETTER A WITH GRAVE
88
+ "E1+65" => "\x00\xE8", # LATIN SMALL LETTER E WITH GRAVE
89
+ "E1+69" => "\x00\xEC", # LATIN SMALL LETTER I WITH GRAVE
90
+ "E1+6F" => "\x00\xF2", # LATIN SMALL LETTER O WITH GRAVE
91
+ "E1+75" => "\x00\xF9", # LATIN SMALL LETTER U WITH GRAVE
92
+ "E1+77" => "\x1E\x81", # LATIN SMALL LETTER W WITH GRAVE
93
+ "E1+79" => "\x1E\xF3", # LATIN SMALL LETTER Y WITH GRAVE
94
+ "E1+E3+41" => "\x1E\xA6", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
95
+ "E1+E3+45" => "\x1E\xC0", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
96
+ "E1+E3+4F" => "\x1E\xD2", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
97
+ "E1+E3+61" => "\x1E\xA7", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
98
+ "E1+E3+65" => "\x1E\xC1", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
99
+ "E1+E3+6F" => "\x1E\xD3", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
100
+ "E1+E5+45" => "\x1E\x14", # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
101
+ "E1+E5+4F" => "\x1E\x50", # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
102
+ "E1+E5+65" => "\x1E\x15", # LATIN SMALL LETTER E WITH MACRON AND GRAVE
103
+ "E1+E5+6F" => "\x1E\x51", # LATIN SMALL LETTER O WITH MACRON AND GRAVE
104
+ "E1+E6+41" => "\x1E\xB0", # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
105
+ "E1+E6+61" => "\x1E\xB1", # LATIN SMALL LETTER A WITH BREVE AND GRAVE
106
+ "E1+E8+55" => "\x01\xDB", # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
107
+ "E1+E8+75" => "\x01\xDC", # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
108
+ "E1" => "\x03\x00", # COMBINING GRAVE ACCENT
109
+ "E2+41" => "\x00\xC1", # LATIN CAPITAL LETTER A WITH ACUTE
110
+ "E2+43" => "\x01\x06", # LATIN CAPITAL LETTER C WITH ACUTE
111
+ "E2+45" => "\x00\xC9", # LATIN CAPITAL LETTER E WITH ACUTE
112
+ "E2+47" => "\x01\xF4", # LATIN CAPITAL LETTER G WITH ACUTE
113
+ "E2+49" => "\x00\xCD", # LATIN CAPITAL LETTER I WITH ACUTE
114
+ "E2+4B" => "\x1E\x30", # LATIN CAPITAL LETTER K WITH ACUTE
115
+ "E2+4C" => "\x01\x39", # LATIN CAPITAL LETTER L WITH ACUTE
116
+ "E2+4D" => "\x1E\x3E", # LATIN CAPITAL LETTER M WITH ACUTE
117
+ "E2+4E" => "\x01\x43", # LATIN CAPITAL LETTER N WITH ACUTE
118
+ "E2+4F" => "\x00\xD3", # LATIN CAPITAL LETTER O WITH ACUTE
119
+ "E2+50" => "\x1E\x54", # LATIN CAPITAL LETTER P WITH ACUTE
120
+ "E2+52" => "\x01\x54", # LATIN CAPITAL LETTER R WITH ACUTE
121
+ "E2+53" => "\x01\x5A", # LATIN CAPITAL LETTER S WITH ACUTE
122
+ "E2+55" => "\x00\xDA", # LATIN CAPITAL LETTER U WITH ACUTE
123
+ "E2+57" => "\x1E\x82", # LATIN CAPITAL LETTER W WITH ACUTE
124
+ "E2+59" => "\x00\xDD", # LATIN CAPITAL LETTER Y WITH ACUTE
125
+ "E2+5A" => "\x01\x79", # LATIN CAPITAL LETTER Z WITH ACUTE
126
+ "E2+61" => "\x00\xE1", # LATIN SMALL LETTER A WITH ACUTE
127
+ "E2+63" => "\x01\x07", # LATIN SMALL LETTER C WITH ACUTE
128
+ "E2+65" => "\x00\xE9", # LATIN SMALL LETTER E WITH ACUTE
129
+ "E2+67" => "\x01\xF5", # LATIN SMALL LETTER G WITH ACUTE
130
+ "E2+69" => "\x00\xED", # LATIN SMALL LETTER I WITH ACUTE
131
+ "E2+6B" => "\x1E\x31", # LATIN SMALL LETTER K WITH ACUTE
132
+ "E2+6C" => "\x01\x3A", # LATIN SMALL LETTER L WITH ACUTE
133
+ "E2+6D" => "\x1E\x3F", # LATIN SMALL LETTER M WITH ACUTE
134
+ "E2+6E" => "\x01\x44", # LATIN SMALL LETTER N WITH ACUTE
135
+ "E2+6F" => "\x00\xF3", # LATIN SMALL LETTER O WITH ACUTE
136
+ "E2+70" => "\x1E\x55", # LATIN SMALL LETTER P WITH ACUTE
137
+ "E2+72" => "\x01\x55", # LATIN SMALL LETTER R WITH ACUTE
138
+ "E2+73" => "\x01\x5B", # LATIN SMALL LETTER S WITH ACUTE
139
+ "E2+75" => "\x00\xFA", # LATIN SMALL LETTER U WITH ACUTE
140
+ "E2+77" => "\x1E\x83", # LATIN SMALL LETTER W WITH ACUTE
141
+ "E2+79" => "\x00\xFD", # LATIN SMALL LETTER Y WITH ACUTE
142
+ "E2+7A" => "\x01\x7A", # LATIN SMALL LETTER Z WITH ACUTE
143
+ "E2+A5" => "\x01\xFC", # LATIN CAPITAL LETTER AE WITH ACUTE
144
+ "E2+B5" => "\x01\xFD", # LATIN SMALL LETTER AE WITH ACUTE
145
+ "E2+E3+41" => "\x1E\xA4", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
146
+ "E2+E3+45" => "\x1E\xBE", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
147
+ "E2+E3+4F" => "\x1E\xD0", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
148
+ "E2+E3+61" => "\x1E\xA5", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
149
+ "E2+E3+65" => "\x1E\xBF", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
150
+ "E2+E3+6F" => "\x1E\xD1", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
151
+ "E2+E4+4F" => "\x1E\x4C", # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
152
+ "E2+E4+55" => "\x1E\x78", # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
153
+ "E2+E4+6F" => "\x1E\x4D", # LATIN SMALL LETTER O WITH TILDE AND ACUTE
154
+ "E2+E4+75" => "\x1E\x79", # LATIN SMALL LETTER U WITH TILDE AND ACUTE
155
+ "E2+E5+45" => "\x1E\x16", # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
156
+ "E2+E5+4F" => "\x1E\x52", # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
157
+ "E2+E5+65" => "\x1E\x17", # LATIN SMALL LETTER E WITH MACRON AND ACUTE
158
+ "E2+E5+6F" => "\x1E\x53", # LATIN SMALL LETTER O WITH MACRON AND ACUTE
159
+ "E2+E6+41" => "\x1E\xAE", # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
160
+ "E2+E6+61" => "\x1E\xAF", # LATIN SMALL LETTER A WITH BREVE AND ACUTE
161
+ "E2+E7+53" => "\x1E\x64", # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
162
+ "E2+E7+73" => "\x1E\x65", # LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE
163
+ "E2+E8+49" => "\x1E\x2E", # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
164
+ "E2+E8+55" => "\x01\xD7", # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
165
+ "E2+E8+69" => "\x1E\x2F", # LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
166
+ "E2+E8+75" => "\x01\xD8", # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
167
+ "E2+EA+41" => "\x01\xFA", # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
168
+ "E2+EA+61" => "\x01\xFB", # LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE
169
+ "E2+F0+43" => "\x1E\x08", # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
170
+ "E2+F0+63" => "\x1E\x09", # LATIN SMALL LETTER C WITH CEDILLA AND ACUTE
171
+ "E2" => "\x03\x01", # COMBINING ACUTE ACCENT
172
+ "E3+41" => "\x00\xC2", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
173
+ "E3+43" => "\x01\x08", # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
174
+ "E3+45" => "\x00\xCA", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
175
+ "E3+47" => "\x01\x1C", # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
176
+ "E3+48" => "\x01\x24", # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
177
+ "E3+49" => "\x00\xCE", # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
178
+ "E3+4A" => "\x01\x34", # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
179
+ "E3+4F" => "\x00\xD4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
180
+ "E3+53" => "\x01\x5C", # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
181
+ "E3+55" => "\x00\xDB", # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
182
+ "E3+57" => "\x01\x74", # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
183
+ "E3+59" => "\x01\x76", # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
184
+ "E3+5A" => "\x1E\x90", # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
185
+ "E3+61" => "\x00\xE2", # LATIN SMALL LETTER A WITH CIRCUMFLEX
186
+ "E3+63" => "\x01\x09", # LATIN SMALL LETTER C WITH CIRCUMFLEX
187
+ "E3+65" => "\x00\xEA", # LATIN SMALL LETTER E WITH CIRCUMFLEX
188
+ "E3+67" => "\x01\x1D", # LATIN SMALL LETTER G WITH CIRCUMFLEX
189
+ "E3+68" => "\x01\x25", # LATIN SMALL LETTER H WITH CIRCUMFLEX
190
+ "E3+69" => "\x00\xEE", # LATIN SMALL LETTER I WITH CIRCUMFLEX
191
+ "E3+6A" => "\x01\x35", # LATIN SMALL LETTER J WITH CIRCUMFLEX
192
+ "E3+6F" => "\x00\xF4", # LATIN SMALL LETTER O WITH CIRCUMFLEX
193
+ "E3+73" => "\x01\x5D", # LATIN SMALL LETTER S WITH CIRCUMFLEX
194
+ "E3+75" => "\x00\xFB", # LATIN SMALL LETTER U WITH CIRCUMFLEX
195
+ "E3+77" => "\x01\x75", # LATIN SMALL LETTER W WITH CIRCUMFLEX
196
+ "E3+79" => "\x01\x77", # LATIN SMALL LETTER Y WITH CIRCUMFLEX
197
+ "E3+7A" => "\x1E\x91", # LATIN SMALL LETTER Z WITH CIRCUMFLEX
198
+ "E3+E0+41" => "\x1E\xA8", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
199
+ "E3+E0+45" => "\x1E\xC2", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
200
+ "E3+E0+4F" => "\x1E\xD4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
201
+ "E3+E0+61" => "\x1E\xA9", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
202
+ "E3+E0+65" => "\x1E\xC3", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
203
+ "E3+E0+6F" => "\x1E\xD5", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
204
+ "E3+E1+41" => "\x1E\xA6", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
205
+ "E3+E1+45" => "\x1E\xC0", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
206
+ "E3+E1+4F" => "\x1E\xD2", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
207
+ "E3+E1+61" => "\x1E\xA7", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
208
+ "E3+E1+65" => "\x1E\xC1", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
209
+ "E3+E1+6F" => "\x1E\xD3", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
210
+ "E3+E2+41" => "\x1E\xA4", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
211
+ "E3+E2+45" => "\x1E\xBE", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
212
+ "E3+E2+4F" => "\x1E\xD0", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
213
+ "E3+E2+61" => "\x1E\xA5", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
214
+ "E3+E2+65" => "\x1E\xBF", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
215
+ "E3+E2+6F" => "\x1E\xD1", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
216
+ "E3+E4+41" => "\x1E\xAA", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
217
+ "E3+E4+45" => "\x1E\xC4", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
218
+ "E3+E4+4F" => "\x1E\xD6", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
219
+ "E3+E4+61" => "\x1E\xAB", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
220
+ "E3+E4+65" => "\x1E\xC5", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
221
+ "E3+E4+6F" => "\x1E\xD7", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
222
+ "E3+F2+41" => "\x1E\xAC", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
223
+ "E3+F2+45" => "\x1E\xC6", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
224
+ "E3+F2+4F" => "\x1E\xD8", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
225
+ "E3+F2+61" => "\x1E\xAD", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
226
+ "E3+F2+65" => "\x1E\xC7", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
227
+ "E3+F2+6F" => "\x1E\xD9", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
228
+ "E3" => "\x03\x02", # COMBINING CIRCUMFLEX ACCENT
229
+ "E4+41" => "\x00\xC3", # LATIN CAPITAL LETTER A WITH TILDE
230
+ "E4+45" => "\x1E\xBC", # LATIN CAPITAL LETTER E WITH TILDE
231
+ "E4+49" => "\x01\x28", # LATIN CAPITAL LETTER I WITH TILDE
232
+ "E4+4E" => "\x00\xD1", # LATIN CAPITAL LETTER N WITH TILDE
233
+ "E4+4F" => "\x00\xD5", # LATIN CAPITAL LETTER O WITH TILDE
234
+ "E4+55" => "\x01\x68", # LATIN CAPITAL LETTER U WITH TILDE
235
+ "E4+56" => "\x1E\x7C", # LATIN CAPITAL LETTER V WITH TILDE
236
+ "E4+59" => "\x1E\xF8", # LATIN CAPITAL LETTER Y WITH TILDE
237
+ "E4+61" => "\x00\xE3", # LATIN SMALL LETTER A WITH TILDE
238
+ "E4+65" => "\x1E\xBD", # LATIN SMALL LETTER E WITH TILDE
239
+ "E4+69" => "\x01\x29", # LATIN SMALL LETTER I WITH TILDE
240
+ "E4+6E" => "\x00\xF1", # LATIN SMALL LETTER N WITH TILDE
241
+ "E4+6F" => "\x00\xF5", # LATIN SMALL LETTER O WITH TILDE
242
+ "E4+75" => "\x01\x69", # LATIN SMALL LETTER U WITH TILDE
243
+ "E4+76" => "\x1E\x7D", # LATIN SMALL LETTER V WITH TILDE
244
+ "E4+79" => "\x1E\xF9", # LATIN SMALL LETTER Y WITH TILDE
245
+ "E4+E2+4F" => "\x1E\x4C", # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
246
+ "E4+E2+55" => "\x1E\x78", # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
247
+ "E4+E2+6F" => "\x1E\x4D", # LATIN SMALL LETTER O WITH TILDE AND ACUTE
248
+ "E4+E2+75" => "\x1E\x79", # LATIN SMALL LETTER U WITH TILDE AND ACUTE
249
+ "E4+E3+41" => "\x1E\xAA", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
250
+ "E4+E3+45" => "\x1E\xC4", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
251
+ "E4+E3+4F" => "\x1E\xD6", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
252
+ "E4+E3+61" => "\x1E\xAB", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
253
+ "E4+E3+65" => "\x1E\xC5", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
254
+ "E4+E3+6F" => "\x1E\xD7", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
255
+ "E4+E6+41" => "\x1E\xB4", # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
256
+ "E4+E6+61" => "\x1E\xB5", # LATIN SMALL LETTER A WITH BREVE AND TILDE
257
+ "E4+E8+4F" => "\x1E\x4E", # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
258
+ "E4+E8+6F" => "\x1E\x4F", # LATIN SMALL LETTER O WITH TILDE AND DIAERESIS
259
+ "E4" => "\x03\x03", # COMBINING TILDE
260
+ "E5+41" => "\x01\x00", # LATIN CAPITAL LETTER A WITH MACRON
261
+ "E5+45" => "\x01\x12", # LATIN CAPITAL LETTER E WITH MACRON
262
+ "E5+47" => "\x1E\x20", # LATIN CAPITAL LETTER G WITH MACRON
263
+ "E5+49" => "\x01\x2A", # LATIN CAPITAL LETTER I WITH MACRON
264
+ "E5+4F" => "\x01\x4C", # LATIN CAPITAL LETTER O WITH MACRON
265
+ "E5+55" => "\x01\x6A", # LATIN CAPITAL LETTER U WITH MACRON
266
+ "E5+61" => "\x01\x01", # LATIN SMALL LETTER A WITH MACRON
267
+ "E5+65" => "\x01\x13", # LATIN SMALL LETTER E WITH MACRON
268
+ "E5+67" => "\x1E\x21", # LATIN SMALL LETTER G WITH MACRON
269
+ "E5+69" => "\x01\x2B", # LATIN SMALL LETTER I WITH MACRON
270
+ "E5+6F" => "\x01\x4D", # LATIN SMALL LETTER O WITH MACRON
271
+ "E5+75" => "\x01\x6B", # LATIN SMALL LETTER U WITH MACRON
272
+ "E5+A5" => "\x01\xE2", # LATIN CAPITAL LETTER AE WITH MACRON
273
+ "E5+B5" => "\x01\xE3", # LATIN SMALL LETTER AE WITH MACRON
274
+ "E5+E1+45" => "\x1E\x14", # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
275
+ "E5+E1+4F" => "\x1E\x50", # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
276
+ "E5+E1+65" => "\x1E\x15", # LATIN SMALL LETTER E WITH MACRON AND GRAVE
277
+ "E5+E1+6F" => "\x1E\x51", # LATIN SMALL LETTER O WITH MACRON AND GRAVE
278
+ "E5+E2+45" => "\x1E\x16", # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
279
+ "E5+E2+4F" => "\x1E\x52", # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
280
+ "E5+E2+65" => "\x1E\x17", # LATIN SMALL LETTER E WITH MACRON AND ACUTE
281
+ "E5+E2+6F" => "\x1E\x53", # LATIN SMALL LETTER O WITH MACRON AND ACUTE
282
+ "E5+E7+41" => "\x01\xE0", # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
283
+ "E5+E7+61" => "\x01\xE1", # LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
284
+ "E5+E8+41" => "\x01\xDE", # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
285
+ "E5+E8+55" => "\x1E\x7A", # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
286
+ "E5+E8+61" => "\x01\xDF", # LATIN SMALL LETTER A WITH DIAERESIS AND MACRON
287
+ "E5+E8+75" => "\x1E\x7B", # LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
288
+ "E5+F1+4F" => "\x01\xEC", # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
289
+ "E5+F1+6F" => "\x01\xED", # LATIN SMALL LETTER O WITH OGONEK AND MACRON
290
+ "E5+F2+4C" => "\x1E\x38", # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
291
+ "E5+F2+52" => "\x1E\x5C", # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
292
+ "E5+F2+6C" => "\x1E\x39", # LATIN SMALL LETTER L WITH DOT BELOW AND MACRON
293
+ "E5+F2+72" => "\x1E\x5D", # LATIN SMALL LETTER R WITH DOT BELOW AND MACRON
294
+ "E5" => "\x03\x04", # COMBINING MACRON
295
+ "E6+41" => "\x01\x02", # LATIN CAPITAL LETTER A WITH BREVE
296
+ "E6+45" => "\x01\x14", # LATIN CAPITAL LETTER E WITH BREVE
297
+ "E6+47" => "\x01\x1E", # LATIN CAPITAL LETTER G WITH BREVE
298
+ "E6+49" => "\x01\x2C", # LATIN CAPITAL LETTER I WITH BREVE
299
+ "E6+4F" => "\x01\x4E", # LATIN CAPITAL LETTER O WITH BREVE
300
+ "E6+55" => "\x01\x6C", # LATIN CAPITAL LETTER U WITH BREVE
301
+ "E6+61" => "\x01\x03", # LATIN SMALL LETTER A WITH BREVE
302
+ "E6+65" => "\x01\x15", # LATIN SMALL LETTER E WITH BREVE
303
+ "E6+67" => "\x01\x1F", # LATIN SMALL LETTER G WITH BREVE
304
+ "E6+69" => "\x01\x2D", # LATIN SMALL LETTER I WITH BREVE
305
+ "E6+6F" => "\x01\x4F", # LATIN SMALL LETTER O WITH BREVE
306
+ "E6+75" => "\x01\x6D", # LATIN SMALL LETTER U WITH BREVE
307
+ "E6+E0+41" => "\x1E\xB2", # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
308
+ "E6+E0+61" => "\x1E\xB3", # LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
309
+ "E6+E1+41" => "\x1E\xB0", # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
310
+ "E6+E1+61" => "\x1E\xB1", # LATIN SMALL LETTER A WITH BREVE AND GRAVE
311
+ "E6+E2+41" => "\x1E\xAE", # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
312
+ "E6+E2+61" => "\x1E\xAF", # LATIN SMALL LETTER A WITH BREVE AND ACUTE
313
+ "E6+E4+41" => "\x1E\xB4", # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
314
+ "E6+E4+61" => "\x1E\xB5", # LATIN SMALL LETTER A WITH BREVE AND TILDE
315
+ "E6+F0+45" => "\x1E\x1C", # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
316
+ "E6+F0+65" => "\x1E\x1D", # LATIN SMALL LETTER E WITH CEDILLA AND BREVE
317
+ "E6+F2+41" => "\x1E\xB6", # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
318
+ "E6+F2+61" => "\x1E\xB7", # LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
319
+ "E6" => "\x03\x06", # COMBINING BREVE
320
+ "E7+42" => "\x1E\x02", # LATIN CAPITAL LETTER B WITH DOT ABOVE
321
+ "E7+43" => "\x01\x0A", # LATIN CAPITAL LETTER C WITH DOT ABOVE
322
+ "E7+44" => "\x1E\x0A", # LATIN CAPITAL LETTER D WITH DOT ABOVE
323
+ "E7+45" => "\x01\x16", # LATIN CAPITAL LETTER E WITH DOT ABOVE
324
+ "E7+46" => "\x1E\x1E", # LATIN CAPITAL LETTER F WITH DOT ABOVE
325
+ "E7+47" => "\x01\x20", # LATIN CAPITAL LETTER G WITH DOT ABOVE
326
+ "E7+48" => "\x1E\x22", # LATIN CAPITAL LETTER H WITH DOT ABOVE
327
+ "E7+49" => "\x01\x30", # LATIN CAPITAL LETTER I WITH DOT ABOVE
328
+ "E7+4D" => "\x1E\x40", # LATIN CAPITAL LETTER M WITH DOT ABOVE
329
+ "E7+4E" => "\x1E\x44", # LATIN CAPITAL LETTER N WITH DOT ABOVE
330
+ "E7+50" => "\x1E\x56", # LATIN CAPITAL LETTER P WITH DOT ABOVE
331
+ "E7+52" => "\x1E\x58", # LATIN CAPITAL LETTER R WITH DOT ABOVE
332
+ "E7+53" => "\x1E\x60", # LATIN CAPITAL LETTER S WITH DOT ABOVE
333
+ "E7+54" => "\x1E\x6A", # LATIN CAPITAL LETTER T WITH DOT ABOVE
334
+ "E7+57" => "\x1E\x86", # LATIN CAPITAL LETTER W WITH DOT ABOVE
335
+ "E7+58" => "\x1E\x8A", # LATIN CAPITAL LETTER X WITH DOT ABOVE
336
+ "E7+59" => "\x1E\x8E", # LATIN CAPITAL LETTER Y WITH DOT ABOVE
337
+ "E7+5A" => "\x01\x7B", # LATIN CAPITAL LETTER Z WITH DOT ABOVE
338
+ "E7+62" => "\x1E\x03", # LATIN SMALL LETTER B WITH DOT ABOVE
339
+ "E7+63" => "\x01\x0B", # LATIN SMALL LETTER C WITH DOT ABOVE
340
+ "E7+64" => "\x1E\x0B", # LATIN SMALL LETTER D WITH DOT ABOVE
341
+ "E7+65" => "\x01\x17", # LATIN SMALL LETTER E WITH DOT ABOVE
342
+ "E7+66" => "\x1E\x1F", # LATIN SMALL LETTER F WITH DOT ABOVE
343
+ "E7+67" => "\x01\x21", # LATIN SMALL LETTER G WITH DOT ABOVE
344
+ "E7+68" => "\x1E\x23", # LATIN SMALL LETTER H WITH DOT ABOVE
345
+ "E7+6D" => "\x1E\x41", # LATIN SMALL LETTER M WITH DOT ABOVE
346
+ "E7+6E" => "\x1E\x45", # LATIN SMALL LETTER N WITH DOT ABOVE
347
+ "E7+70" => "\x1E\x57", # LATIN SMALL LETTER P WITH DOT ABOVE
348
+ "E7+72" => "\x1E\x59", # LATIN SMALL LETTER R WITH DOT ABOVE
349
+ "E7+73" => "\x1E\x61", # LATIN SMALL LETTER S WITH DOT ABOVE
350
+ "E7+74" => "\x1E\x6B", # LATIN SMALL LETTER T WITH DOT ABOVE
351
+ "E7+77" => "\x1E\x87", # LATIN SMALL LETTER W WITH DOT ABOVE
352
+ "E7+78" => "\x1E\x8B", # LATIN SMALL LETTER X WITH DOT ABOVE
353
+ "E7+79" => "\x1E\x8F", # LATIN SMALL LETTER Y WITH DOT ABOVE
354
+ "E7+7A" => "\x01\x7C", # LATIN SMALL LETTER Z WITH DOT ABOVE
355
+ "E7+E2+53" => "\x1E\x64", # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
356
+ "E7+E2+73" => "\x1E\x65", # LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE
357
+ "E7+E5+41" => "\x01\xE0", # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
358
+ "E7+E5+61" => "\x01\xE1", # LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
359
+ "E7+E9+53" => "\x1E\x66", # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
360
+ "E7+E9+73" => "\x1E\x67", # LATIN SMALL LETTER S WITH CARON AND DOT ABOVE
361
+ "E7+F2+53" => "\x1E\x68", # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
362
+ "E7+F2+73" => "\x1E\x69", # LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
363
+ "E7" => "\x03\x07", # COMBINING DOT ABOVE
364
+ "E8+41" => "\x00\xC4", # LATIN CAPITAL LETTER A WITH DIAERESIS
365
+ "E8+45" => "\x00\xCB", # LATIN CAPITAL LETTER E WITH DIAERESIS
366
+ "E8+48" => "\x1E\x26", # LATIN CAPITAL LETTER H WITH DIAERESIS
367
+ "E8+49" => "\x00\xCF", # LATIN CAPITAL LETTER I WITH DIAERESIS
368
+ "E8+4F" => "\x00\xD6", # LATIN CAPITAL LETTER O WITH DIAERESIS
369
+ "E8+55" => "\x00\xDC", # LATIN CAPITAL LETTER U WITH DIAERESIS
370
+ "E8+57" => "\x1E\x84", # LATIN CAPITAL LETTER W WITH DIAERESIS
371
+ "E8+58" => "\x1E\x8C", # LATIN CAPITAL LETTER X WITH DIAERESIS
372
+ "E8+59" => "\x01\x78", # LATIN CAPITAL LETTER Y WITH DIAERESIS
373
+ "E8+61" => "\x00\xE4", # LATIN SMALL LETTER A WITH DIAERESIS
374
+ "E8+65" => "\x00\xEB", # LATIN SMALL LETTER E WITH DIAERESIS
375
+ "E8+68" => "\x1E\x27", # LATIN SMALL LETTER H WITH DIAERESIS
376
+ "E8+69" => "\x00\xEF", # LATIN SMALL LETTER I WITH DIAERESIS
377
+ "E8+6F" => "\x00\xF6", # LATIN SMALL LETTER O WITH DIAERESIS
378
+ "E8+74" => "\x1E\x97", # LATIN SMALL LETTER T WITH DIAERESIS
379
+ "E8+75" => "\x00\xFC", # LATIN SMALL LETTER U WITH DIAERESIS
380
+ "E8+77" => "\x1E\x85", # LATIN SMALL LETTER W WITH DIAERESIS
381
+ "E8+78" => "\x1E\x8D", # LATIN SMALL LETTER X WITH DIAERESIS
382
+ "E8+79" => "\x00\xFF", # LATIN SMALL LETTER Y WITH DIAERESIS
383
+ "E8+E1+55" => "\x01\xDB", # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
384
+ "E8+E1+75" => "\x01\xDC", # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
385
+ "E8+E2+49" => "\x1E\x2E", # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
386
+ "E8+E2+55" => "\x01\xD7", # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
387
+ "E8+E2+69" => "\x1E\x2F", # LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
388
+ "E8+E2+75" => "\x01\xD8", # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
389
+ "E8+E4+4F" => "\x1E\x4E", # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
390
+ "E8+E4+6F" => "\x1E\x4F", # LATIN SMALL LETTER O WITH TILDE AND DIAERESIS
391
+ "E8+E5+41" => "\x01\xDE", # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
392
+ "E8+E5+55" => "\x1E\x7A", # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
393
+ "E8+E5+61" => "\x01\xDF", # LATIN SMALL LETTER A WITH DIAERESIS AND MACRON
394
+ "E8+E5+75" => "\x1E\x7B", # LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
395
+ "E8+E9+55" => "\x01\xD9", # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
396
+ "E8+E9+75" => "\x01\xDA", # LATIN SMALL LETTER U WITH DIAERESIS AND CARON
397
+ "E8" => "\x03\x08", # COMBINING DIAERESIS
398
+ "E9+41" => "\x01\xCD", # LATIN CAPITAL LETTER A WITH CARON
399
+ "E9+43" => "\x01\x0C", # LATIN CAPITAL LETTER C WITH CARON
400
+ "E9+44" => "\x01\x0E", # LATIN CAPITAL LETTER D WITH CARON
401
+ "E9+45" => "\x01\x1A", # LATIN CAPITAL LETTER E WITH CARON
402
+ "E9+47" => "\x01\xE6", # LATIN CAPITAL LETTER G WITH CARON
403
+ "E9+49" => "\x01\xCF", # LATIN CAPITAL LETTER I WITH CARON
404
+ "E9+4B" => "\x01\xE8", # LATIN CAPITAL LETTER K WITH CARON
405
+ "E9+4C" => "\x01\x3D", # LATIN CAPITAL LETTER L WITH CARON
406
+ "E9+4E" => "\x01\x47", # LATIN CAPITAL LETTER N WITH CARON
407
+ "E9+4F" => "\x01\xD1", # LATIN CAPITAL LETTER O WITH CARON
408
+ "E9+52" => "\x01\x58", # LATIN CAPITAL LETTER R WITH CARON
409
+ "E9+53" => "\x01\x60", # LATIN CAPITAL LETTER S WITH CARON
410
+ "E9+54" => "\x01\x64", # LATIN CAPITAL LETTER T WITH CARON
411
+ "E9+55" => "\x01\xD3", # LATIN CAPITAL LETTER U WITH CARON
412
+ "E9+5A" => "\x01\x7D", # LATIN CAPITAL LETTER Z WITH CARON
413
+ "E9+61" => "\x01\xCE", # LATIN SMALL LETTER A WITH CARON
414
+ "E9+63" => "\x01\x0D", # LATIN SMALL LETTER C WITH CARON
415
+ "E9+64" => "\x01\x0F", # LATIN SMALL LETTER D WITH CARON
416
+ "E9+65" => "\x01\x1B", # LATIN SMALL LETTER E WITH CARON
417
+ "E9+67" => "\x01\xE7", # LATIN SMALL LETTER G WITH CARON
418
+ "E9+69" => "\x01\xD0", # LATIN SMALL LETTER I WITH CARON
419
+ "E9+6A" => "\x01\xF0", # LATIN SMALL LETTER J WITH CARON
420
+ "E9+6B" => "\x01\xE9", # LATIN SMALL LETTER K WITH CARON
421
+ "E9+6C" => "\x01\x3E", # LATIN SMALL LETTER L WITH CARON
422
+ "E9+6E" => "\x01\x48", # LATIN SMALL LETTER N WITH CARON
423
+ "E9+6F" => "\x01\xD2", # LATIN SMALL LETTER O WITH CARON
424
+ "E9+72" => "\x01\x59", # LATIN SMALL LETTER R WITH CARON
425
+ "E9+73" => "\x01\x61", # LATIN SMALL LETTER S WITH CARON
426
+ "E9+74" => "\x01\x65", # LATIN SMALL LETTER T WITH CARON
427
+ "E9+75" => "\x01\xD4", # LATIN SMALL LETTER U WITH CARON
428
+ "E9+7A" => "\x01\x7E", # LATIN SMALL LETTER Z WITH CARON
429
+ "E9+E7+53" => "\x1E\x66", # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
430
+ "E9+E7+73" => "\x1E\x67", # LATIN SMALL LETTER S WITH CARON AND DOT ABOVE
431
+ "E9+E8+55" => "\x01\xD9", # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
432
+ "E9+E8+75" => "\x01\xDA", # LATIN SMALL LETTER U WITH DIAERESIS AND CARON
433
+ "E9" => "\x03\x0C", # COMBINING CARON
434
+ "EA+41" => "\x00\xC5", # LATIN CAPITAL LETTER A WITH RING ABOVE
435
+ "EA+55" => "\x01\x6E", # LATIN CAPITAL LETTER U WITH RING ABOVE
436
+ "EA+61" => "\x00\xE5", # LATIN SMALL LETTER A WITH RING ABOVE
437
+ "EA+75" => "\x01\x6F", # LATIN SMALL LETTER U WITH RING ABOVE
438
+ "EA+77" => "\x1E\x98", # LATIN SMALL LETTER W WITH RING ABOVE
439
+ "EA+79" => "\x1E\x99", # LATIN SMALL LETTER Y WITH RING ABOVE
440
+ "EA+E2+41" => "\x01\xFA", # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
441
+ "EA+E2+61" => "\x01\xFB", # LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE
442
+ "EA" => "\x03\x0A", # COMBINING RING ABOVE
443
+ "EB" => "\xFE\x20", # COMBINING LIGATURE LEFT HALF
444
+ "EC" => "\xFE\x21", # COMBINING LIGATURE RIGHT HALF
445
+ "ED" => "\x03\x15", # COMBINING COMMA ABOVE RIGHT
446
+ "EE+4F" => "\x01\x50", # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
447
+ "EE+55" => "\x01\x70", # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
448
+ "EE+6F" => "\x01\x51", # LATIN SMALL LETTER O WITH DOUBLE ACUTE
449
+ "EE+75" => "\x01\x71", # LATIN SMALL LETTER U WITH DOUBLE ACUTE
450
+ "EE" => "\x03\x0B", # COMBINING DOUBLE ACUTE ACCENT
451
+ "EF" => "\x03\x10", # COMBINING CANDRABINDU
452
+ "F0+43" => "\x00\xC7", # LATIN CAPITAL LETTER C WITH CEDILLA
453
+ "F0+44" => "\x1E\x10", # LATIN CAPITAL LETTER D WITH CEDILLA
454
+ "F0+47" => "\x01\x22", # LATIN CAPITAL LETTER G WITH CEDILLA
455
+ "F0+48" => "\x1E\x28", # LATIN CAPITAL LETTER H WITH CEDILLA
456
+ "F0+4B" => "\x01\x36", # LATIN CAPITAL LETTER K WITH CEDILLA
457
+ "F0+4C" => "\x01\x3B", # LATIN CAPITAL LETTER L WITH CEDILLA
458
+ "F0+4E" => "\x01\x45", # LATIN CAPITAL LETTER N WITH CEDILLA
459
+ "F0+52" => "\x01\x56", # LATIN CAPITAL LETTER R WITH CEDILLA
460
+ "F0+53" => "\x01\x5E", # LATIN CAPITAL LETTER S WITH CEDILLA
461
+ "F0+54" => "\x01\x62", # LATIN CAPITAL LETTER T WITH CEDILLA
462
+ "F0+63" => "\x00\xE7", # LATIN SMALL LETTER C WITH CEDILLA
463
+ "F0+64" => "\x1E\x11", # LATIN SMALL LETTER D WITH CEDILLA
464
+ "F0+67" => "\x01\x23", # LATIN SMALL LETTER G WITH CEDILLA
465
+ "F0+68" => "\x1E\x29", # LATIN SMALL LETTER H WITH CEDILLA
466
+ "F0+6B" => "\x01\x37", # LATIN SMALL LETTER K WITH CEDILLA
467
+ "F0+6C" => "\x01\x3C", # LATIN SMALL LETTER L WITH CEDILLA
468
+ "F0+6E" => "\x01\x46", # LATIN SMALL LETTER N WITH CEDILLA
469
+ "F0+72" => "\x01\x57", # LATIN SMALL LETTER R WITH CEDILLA
470
+ "F0+73" => "\x01\x5F", # LATIN SMALL LETTER S WITH CEDILLA
471
+ "F0+74" => "\x01\x63", # LATIN SMALL LETTER T WITH CEDILLA
472
+ "F0+E2+43" => "\x1E\x08", # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
473
+ "F0+E2+63" => "\x1E\x09", # LATIN SMALL LETTER C WITH CEDILLA AND ACUTE
474
+ "F0+E6+45" => "\x1E\x1C", # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
475
+ "F0+E6+65" => "\x1E\x1D", # LATIN SMALL LETTER E WITH CEDILLA AND BREVE
476
+ "F0" => "\x03\x27", # COMBINING CEDILLA
477
+ "F1+41" => "\x01\x04", # LATIN CAPITAL LETTER A WITH OGONEK
478
+ "F1+45" => "\x01\x18", # LATIN CAPITAL LETTER E WITH OGONEK
479
+ "F1+49" => "\x01\x2E", # LATIN CAPITAL LETTER I WITH OGONEK
480
+ "F1+4F" => "\x01\xEA", # LATIN CAPITAL LETTER O WITH OGONEK
481
+ "F1+55" => "\x01\x72", # LATIN CAPITAL LETTER U WITH OGONEK
482
+ "F1+61" => "\x01\x05", # LATIN SMALL LETTER A WITH OGONEK
483
+ "F1+65" => "\x01\x19", # LATIN SMALL LETTER E WITH OGONEK
484
+ "F1+69" => "\x01\x2F", # LATIN SMALL LETTER I WITH OGONEK
485
+ "F1+6F" => "\x01\xEB", # LATIN SMALL LETTER O WITH OGONEK
486
+ "F1+75" => "\x01\x73", # LATIN SMALL LETTER U WITH OGONEK
487
+ "F1+E5+4F" => "\x01\xEC", # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
488
+ "F1+E5+6F" => "\x01\xED", # LATIN SMALL LETTER O WITH OGONEK AND MACRON
489
+ "F1" => "\x03\x28", # COMBINING OGONEK
490
+ "F2+41" => "\x1E\xA0", # LATIN CAPITAL LETTER A WITH DOT BELOW
491
+ "F2+42" => "\x1E\x04", # LATIN CAPITAL LETTER B WITH DOT BELOW
492
+ "F2+44" => "\x1E\x0C", # LATIN CAPITAL LETTER D WITH DOT BELOW
493
+ "F2+45" => "\x1E\xB8", # LATIN CAPITAL LETTER E WITH DOT BELOW
494
+ "F2+48" => "\x1E\x24", # LATIN CAPITAL LETTER H WITH DOT BELOW
495
+ "F2+49" => "\x1E\xCA", # LATIN CAPITAL LETTER I WITH DOT BELOW
496
+ "F2+4B" => "\x1E\x32", # LATIN CAPITAL LETTER K WITH DOT BELOW
497
+ "F2+4C" => "\x1E\x36", # LATIN CAPITAL LETTER L WITH DOT BELOW
498
+ "F2+4D" => "\x1E\x42", # LATIN CAPITAL LETTER M WITH DOT BELOW
499
+ "F2+4E" => "\x1E\x46", # LATIN CAPITAL LETTER N WITH DOT BELOW
500
+ "F2+4F" => "\x1E\xCC", # LATIN CAPITAL LETTER O WITH DOT BELOW
501
+ "F2+52" => "\x1E\x5A", # LATIN CAPITAL LETTER R WITH DOT BELOW
502
+ "F2+53" => "\x1E\x62", # LATIN CAPITAL LETTER S WITH DOT BELOW
503
+ "F2+54" => "\x1E\x6C", # LATIN CAPITAL LETTER T WITH DOT BELOW
504
+ "F2+55" => "\x1E\xE4", # LATIN CAPITAL LETTER U WITH DOT BELOW
505
+ "F2+56" => "\x1E\x7E", # LATIN CAPITAL LETTER V WITH DOT BELOW
506
+ "F2+57" => "\x1E\x88", # LATIN CAPITAL LETTER W WITH DOT BELOW
507
+ "F2+59" => "\x1E\xF4", # LATIN CAPITAL LETTER Y WITH DOT BELOW
508
+ "F2+5A" => "\x1E\x92", # LATIN CAPITAL LETTER Z WITH DOT BELOW
509
+ "F2+61" => "\x1E\xA1", # LATIN SMALL LETTER A WITH DOT BELOW
510
+ "F2+62" => "\x1E\x05", # LATIN SMALL LETTER B WITH DOT BELOW
511
+ "F2+64" => "\x1E\x0D", # LATIN SMALL LETTER D WITH DOT BELOW
512
+ "F2+65" => "\x1E\xB9", # LATIN SMALL LETTER E WITH DOT BELOW
513
+ "F2+68" => "\x1E\x25", # LATIN SMALL LETTER H WITH DOT BELOW
514
+ "F2+69" => "\x1E\xCB", # LATIN SMALL LETTER I WITH DOT BELOW
515
+ "F2+6B" => "\x1E\x33", # LATIN SMALL LETTER K WITH DOT BELOW
516
+ "F2+6C" => "\x1E\x37", # LATIN SMALL LETTER L WITH DOT BELOW
517
+ "F2+6D" => "\x1E\x43", # LATIN SMALL LETTER M WITH DOT BELOW
518
+ "F2+6E" => "\x1E\x47", # LATIN SMALL LETTER N WITH DOT BELOW
519
+ "F2+6F" => "\x1E\xCD", # LATIN SMALL LETTER O WITH DOT BELOW
520
+ "F2+72" => "\x1E\x5B", # LATIN SMALL LETTER R WITH DOT BELOW
521
+ "F2+73" => "\x1E\x63", # LATIN SMALL LETTER S WITH DOT BELOW
522
+ "F2+74" => "\x1E\x6D", # LATIN SMALL LETTER T WITH DOT BELOW
523
+ "F2+75" => "\x1E\xE5", # LATIN SMALL LETTER U WITH DOT BELOW
524
+ "F2+76" => "\x1E\x7F", # LATIN SMALL LETTER V WITH DOT BELOW
525
+ "F2+77" => "\x1E\x89", # LATIN SMALL LETTER W WITH DOT BELOW
526
+ "F2+79" => "\x1E\xF5", # LATIN SMALL LETTER Y WITH DOT BELOW
527
+ "F2+7A" => "\x1E\x93", # LATIN SMALL LETTER Z WITH DOT BELOW
528
+ "F2+E3+41" => "\x1E\xAC", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
529
+ "F2+E3+45" => "\x1E\xC6", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
530
+ "F2+E3+4F" => "\x1E\xD8", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
531
+ "F2+E3+61" => "\x1E\xAD", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
532
+ "F2+E3+65" => "\x1E\xC7", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
533
+ "F2+E3+6F" => "\x1E\xD9", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
534
+ "F2+E5+4C" => "\x1E\x38", # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
535
+ "F2+E5+52" => "\x1E\x5C", # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
536
+ "F2+E5+6C" => "\x1E\x39", # LATIN SMALL LETTER L WITH DOT BELOW AND MACRON
537
+ "F2+E5+72" => "\x1E\x5D", # LATIN SMALL LETTER R WITH DOT BELOW AND MACRON
538
+ "F2+E6+41" => "\x1E\xB6", # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
539
+ "F2+E6+61" => "\x1E\xB7", # LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
540
+ "F2+E7+53" => "\x1E\x68", # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
541
+ "F2+E7+73" => "\x1E\x69", # LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
542
+ "F2" => "\x03\x23", # COMBINING DOT BELOW
543
+ "F3+55" => "\x1E\x72", # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
544
+ "F3+75" => "\x1E\x73", # LATIN SMALL LETTER U WITH DIAERESIS BELOW
545
+ "F3" => "\x03\x24", # COMBINING DIAERESIS BELOW
546
+ "F4+41" => "\x1E\x00", # LATIN CAPITAL LETTER A WITH RING BELOW
547
+ "F4+61" => "\x1E\x01", # LATIN SMALL LETTER A WITH RING BELOW
548
+ "F4" => "\x03\x25", # COMBINING RING BELOW
549
+ "F5" => "\x03\x33", # COMBINING DOUBLE LOW LINE
550
+ "F6+42" => "\x1E\x06", # LATIN CAPITAL LETTER B WITH LINE BELOW
551
+ "F6+44" => "\x1E\x0E", # LATIN CAPITAL LETTER D WITH LINE BELOW
552
+ "F6+4B" => "\x1E\x34", # LATIN CAPITAL LETTER K WITH LINE BELOW
553
+ "F6+4C" => "\x1E\x3A", # LATIN CAPITAL LETTER L WITH LINE BELOW
554
+ "F6+4E" => "\x1E\x48", # LATIN CAPITAL LETTER N WITH LINE BELOW
555
+ "F6+52" => "\x1E\x5E", # LATIN CAPITAL LETTER R WITH LINE BELOW
556
+ "F6+54" => "\x1E\x6E", # LATIN CAPITAL LETTER T WITH LINE BELOW
557
+ "F6+5A" => "\x1E\x94", # LATIN CAPITAL LETTER Z WITH LINE BELOW
558
+ "F6+62" => "\x1E\x07", # LATIN SMALL LETTER B WITH LINE BELOW
559
+ "F6+64" => "\x1E\x0F", # LATIN SMALL LETTER D WITH LINE BELOW
560
+ "F6+68" => "\x1E\x96", # LATIN SMALL LETTER H WITH LINE BELOW
561
+ "F6+6B" => "\x1E\x35", # LATIN SMALL LETTER K WITH LINE BELOW
562
+ "F6+6C" => "\x1E\x3B", # LATIN SMALL LETTER L WITH LINE BELOW
563
+ "F6+6E" => "\x1E\x49", # LATIN SMALL LETTER N WITH LINE BELOW
564
+ "F6+72" => "\x1E\x5F", # LATIN SMALL LETTER R WITH LINE BELOW
565
+ "F6+74" => "\x1E\x6F", # LATIN SMALL LETTER T WITH LINE BELOW
566
+ "F6+7A" => "\x1E\x95", # LATIN SMALL LETTER Z WITH LINE BELOW
567
+ "F6" => "\x03\x32", # COMBINING LOW LINE
568
+ "F7" => "\x03\x26", # COMBINING COMMA BELOW
569
+ "F8" => "\x03\x21", # COMBINING OGONEK
570
+ "F9+48" => "\x1E\x2A", # LATIN CAPITAL LETTER H WITH BREVE BELOW
571
+ "F9+68" => "\x1E\x2B", # LATIN SMALL LETTER H WITH BREVE BELOW
572
+ "F9" => "\x03\x2E", # COMBINING BREVE BELOW
573
+ "FA" => "\xFE\x22", # COMBINING DOUBLE TILDE LEFT HALF
574
+ "FB" => "\xFE\x23" # COMBINING DOUBLE TILDE RIGHT HALF
575
+ }
576
+
577
+ def initialize(to_charset = 'UTF-8')
578
+ @to_charset = to_charset
579
+ @ansi_to_utf8 = {}
580
+ @ansi_to_utf8.merge!(@@non_combining)
581
+ @ansi_to_utf8.merge!(@@combining)
582
+ end
583
+
584
+ def iconv(string)
585
+ output = ''
586
+ scanner = StringScanner.new(string)
587
+ until scanner.eos? do
588
+ byte = scanner.get_byte
589
+
590
+ if byte[0] <= 0x7F
591
+ output << byte
592
+ elsif byte[0] >= 0x88 && byte[0] <= 0xC8
593
+ hex_key = byte[0].to_s(16).upcase
594
+ output << ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8.has_key?(hex_key) ? @ansi_to_utf8[hex_key] : @ansi_to_utf8['ERR'])
595
+ scanner.get_byte # ignore the next byte
596
+ elsif byte[0] >= 0xE0 && byte[0] <= 0xFB
597
+ [2, 1, 0].each do |n| # try 3 bytes, then 2 bytes, then 1 byte
598
+ bytes = [byte[0].to_s(16).upcase]
599
+ scanner.peek(n).each_byte {|b| bytes << b.to_s(16).upcase}
600
+ hex_key = bytes.join("+")
601
+ if @ansi_to_utf8.has_key?(hex_key)
602
+ output << ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8[hex_key])
603
+ n.times {scanner.get_byte}
604
+ break
605
+ end
606
+ end
607
+ else
608
+ output << ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8['ERR'])
609
+ scanner.get_byte if scanner.get_byte[0] >= 0xE0 # ignore the next byte
610
+ end
611
+ end
612
+
613
+ @to_charset == 'UTF-8' ? output : ::Iconv.conv(@to_charset, 'UTF-8', output)
614
+ end
615
+
616
+ def convert_char(char)
617
+ return char if char.size <= 1 && char[0] <= 0x7f
618
+
619
+ if char[0] > 0x7f && char[0] < 0xE0
620
+ hex_key = char[0].to_s(16).upcase
621
+ elsif char[0] >= 0xE0
622
+ bytes = []
623
+ char.each_byte {|byte| bytes << byte.to_s(16).upcase}
624
+ hex_key = bytes.join('+')
625
+ end
626
+ return ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8.has_key?(hex_key) ? @ansi_to_utf8[hex_key] : @ansi_to_utf8['ERR'])
627
+ end
628
+
629
+ end
630
+
631
+ end
@@ -0,0 +1,101 @@
1
+ require 'test_helper'
2
+
3
+ class ANSEL::IconvTest < Test::Unit::TestCase
4
+ FIXTURE_PATH = File.dirname(__FILE__) + "/../../../fixtures/gedcom"
5
+
6
+ def setup
7
+ @ansel = ANSEL::Iconv.new 'UTF-8'
8
+ end
9
+
10
+ should "return ASCII values without conversion" do
11
+ assert_equal " ", @ansel.iconv("\x20")
12
+ assert_equal "x", @ansel.iconv("\x78")
13
+ end
14
+
15
+ should "return the unicode replacement character for invalid characters" do
16
+ assert_equal "\xEF\xBF\xBD", @ansel.iconv("\xBE\x00")
17
+ assert_equal "\xEF\xBF\xBD", @ansel.iconv("\xD1\x00")
18
+ end
19
+
20
+ should "return UTF-8 characters for valid ANSEL characters" do
21
+ # ANSEL non-combining mappings
22
+ assert_equal "", @ansel.iconv("\x88\x00")
23
+ assert_equal "", @ansel.iconv("\x89\x00")
24
+ assert_equal "", @ansel.iconv("\x8D\x00")
25
+ assert_equal "", @ansel.iconv("\x8E\x00")
26
+ assert_equal "Ł", @ansel.iconv("\xA1\x00")
27
+ assert_equal "Ø", @ansel.iconv("\xA2\x00")
28
+ assert_equal "Đ", @ansel.iconv("\xA3\x00")
29
+ assert_equal "Þ", @ansel.iconv("\xA4\x00")
30
+ assert_equal "Æ", @ansel.iconv("\xA5\x00")
31
+ assert_equal "Œ", @ansel.iconv("\xA6\x00")
32
+ assert_equal "ʹ", @ansel.iconv("\xA7\x00")
33
+ assert_equal "·", @ansel.iconv("\xA8\x00")
34
+ assert_equal "♭", @ansel.iconv("\xA9\x00")
35
+ assert_equal "®", @ansel.iconv("\xAA\x00")
36
+ assert_equal "±", @ansel.iconv("\xAB\x00")
37
+ assert_equal "±", @ansel.iconv("\xAB\x00")
38
+ assert_equal "Ơ", @ansel.iconv("\xAC\x00")
39
+ assert_equal "Ư", @ansel.iconv("\xAD\x00")
40
+ assert_equal "ʼ", @ansel.iconv("\xAE\x00")
41
+ assert_equal "ʻ", @ansel.iconv("\xB0\x00")
42
+ assert_equal "ł", @ansel.iconv("\xB1\x00")
43
+ assert_equal "ø", @ansel.iconv("\xB2\x00")
44
+ assert_equal "đ", @ansel.iconv("\xB3\x00")
45
+ assert_equal "þ", @ansel.iconv("\xB4\x00")
46
+ assert_equal "æ", @ansel.iconv("\xB5\x00")
47
+ assert_equal "œ", @ansel.iconv("\xB6\x00")
48
+ assert_equal "ʺ", @ansel.iconv("\xB7\x00")
49
+ assert_equal "ı", @ansel.iconv("\xB8\x00")
50
+ assert_equal "£", @ansel.iconv("\xB9\x00")
51
+ assert_equal "ð", @ansel.iconv("\xBA\x00")
52
+ assert_equal "ơ", @ansel.iconv("\xBC\x00")
53
+ assert_equal "ư", @ansel.iconv("\xBD\x00")
54
+ assert_equal "°", @ansel.iconv("\xC0\x00")
55
+ assert_equal "ℓ", @ansel.iconv("\xC1\x00")
56
+ assert_equal "℗", @ansel.iconv("\xC2\x00")
57
+ assert_equal "©", @ansel.iconv("\xC3\x00")
58
+ assert_equal "♯", @ansel.iconv("\xC4\x00")
59
+ assert_equal "¿", @ansel.iconv("\xC5\x00")
60
+ assert_equal "¡", @ansel.iconv("\xC6\x00")
61
+ assert_equal "ß", @ansel.iconv("\xC7\x00")
62
+ assert_equal "€", @ansel.iconv("\xC8\x00")
63
+
64
+ # ANSEL combining characters
65
+ assert_equal "Ả", @ansel.iconv("\xE0\x41")
66
+ assert_equal "Ḻ", @ansel.iconv("\xF6\x4C")
67
+ assert_equal "̲", @ansel.iconv("\xF6")
68
+ assert_equal "̮", @ansel.iconv("\xF9")
69
+ assert_equal "Ḫ", @ansel.iconv("\xF9\x48")
70
+ assert_equal "Ậ", @ansel.iconv("\xF2\xE3\x41")
71
+ assert_equal "ỵ", @ansel.iconv("\xF2\x79")
72
+ assert_equal "̣", @ansel.iconv("\xF2")
73
+ end
74
+
75
+ should "convert full text correctly" do
76
+ assert_equal "What is the question?", @ansel.iconv("What is the question?")
77
+ assert_equal "¿What is the question?", @ansel.iconv("\xC5\x00What is the question?")
78
+ assert_equal "© 1994", @ansel.iconv("\xC3\x00 1994")
79
+ assert_equal "£4.59", @ansel.iconv("\xB9\x004.59")
80
+ end
81
+
82
+ should "convert ANSEL to UTF-16" do
83
+ converter = ANSEL::Iconv.new 'UTF-16', 'ANSEL'
84
+ assert_equal "\376\377\000a\000b\000c", converter.iconv('abc')
85
+ end
86
+
87
+ should "convert ASCII to UTF-16" do
88
+ converter = ANSEL::Iconv.new 'UTF-16', 'ASCII'
89
+ assert_equal "\376\377\000a\000b\000c", converter.iconv('abc')
90
+ end
91
+
92
+ should "convert UTF-8 to UTF-16" do
93
+ converter = ANSEL::Iconv.new 'UTF-16', 'UTF-8'
94
+ assert_equal "\376\377\000a\000b\000c", converter.iconv('abc')
95
+ end
96
+
97
+ should "convert UTF-16 to UTF-16" do
98
+ converter = ANSEL::Iconv.new 'UTF-16', 'UTF-16'
99
+ assert_equal "\376\377\000a\000b\000c", converter.iconv("\376\377\000a\000b\000c")
100
+ end
101
+ end
@@ -0,0 +1,5 @@
1
+ $:.unshift(File.dirname(__FILE__) + "/../lib/")
2
+ require 'rubygems'
3
+ require 'test/unit'
4
+ require 'shoulda'
5
+ require 'ansel_iconv'
metadata ADDED
@@ -0,0 +1,70 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ansel_iconv
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Keith Morrison
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-03-25 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: activesupport
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 2.1.0
24
+ version:
25
+ description: Convert ANSEL encoded text to any other encoding available to Iconv
26
+ email: keithm@infused.org
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - README.txt
33
+ files:
34
+ - History.txt
35
+ - README.txt
36
+ - VERSION.yml
37
+ - lib/ansel_iconv.rb
38
+ - test/ansel_iconv_test.rb
39
+ - test/test_helper.rb
40
+ has_rdoc: true
41
+ homepage:
42
+ licenses: []
43
+
44
+ post_install_message:
45
+ rdoc_options:
46
+ - --inline-source
47
+ - --charset=UTF-8
48
+ require_paths:
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: "0"
55
+ version:
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "0"
61
+ version:
62
+ requirements: []
63
+
64
+ rubyforge_project:
65
+ rubygems_version: 1.3.5
66
+ signing_key:
67
+ specification_version: 2
68
+ summary: Convert ANSEL encoded text
69
+ test_files: []
70
+