ansel_iconv 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt ADDED
@@ -0,0 +1,5 @@
1
+ === 1.0.0 / 2009-03-23
2
+
3
+ * Initial public release
4
+
5
+
data/README.txt ADDED
@@ -0,0 +1,37 @@
1
+ = ANSEL::Iconv
2
+
3
+ http://github.com/infused/ansel_iconv/tree/master
4
+
5
+ == DESCRIPTION:
6
+
7
+ Convert ANSEL encoded text to any other encoding available to Iconv
8
+
9
+ == INSTALL:
10
+
11
+ gem install infused-ansel_iconv --source http://gems.github.com
12
+
13
+
14
+ == LICENSE:
15
+
16
+ (The MIT License)
17
+
18
+ Copyright (c) 2006-2009 Keith Morrison <keithm@infused.org>
19
+
20
+ Permission is hereby granted, free of charge, to any person obtaining
21
+ a copy of this software and associated documentation files (the
22
+ 'Software'), to deal in the Software without restriction, including
23
+ without limitation the rights to use, copy, modify, merge, publish,
24
+ distribute, sublicense, and/or sell copies of the Software, and to
25
+ permit persons to whom the Software is furnished to do so, subject to
26
+ the following conditions:
27
+
28
+ The above copyright notice and this permission notice shall be
29
+ included in all copies or substantial portions of the Software.
30
+
31
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
32
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
34
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
35
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
36
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
37
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/VERSION.yml ADDED
@@ -0,0 +1,4 @@
1
+ ---
2
+ :major: 1
3
+ :minor: 0
4
+ :patch: 2
@@ -0,0 +1,631 @@
1
+ require 'activesupport'
2
+ require 'iconv'
3
+
4
+ module ANSEL
5
+ class Iconv
6
+ delegate :iconv, :to => :@converter
7
+
8
+ def initialize(to, from = 'ANSEL')
9
+ @converter = (from == 'ANSEL') ? Convert.new(to) : ::Iconv.new(to, from)
10
+ end
11
+ end
12
+
13
+ class Convert
14
+ @@non_combining = {
15
+ "ERR" => "\xFF\xFD", # � - REPLACEMENT CHARACTER
16
+ "88" => "", # NON-SORT BEGIN / START OF STRING
17
+ "89" => "", # NON-SORT END / STRING TERMINATOR
18
+ "8D" => "", # JOINER / ZERO WIDTH JOINER
19
+ "8E" => "", # NON-JOINER / ZERO WIDTH NON-JOINER
20
+ "A1" => "\x01\x41", # Ł - UPPERCASE POLISH L / LATIN CAPITAL LETTER L WITH STROKE
21
+ "A2" => "\x00\xD8", # Ø - UPPERCASE SCANDINAVIAN O / LATIN CAPITAL LETTER O WITH STROKE
22
+ "A3" => "\x01\x10", # Đ - UPPERCASE D WITH CROSSBAR / LATIN CAPITAL LETTER D WITH STROKE
23
+ "A4" => "\x00\xDE", # Þ - UPPERCASE ICELANDIC THORN / LATIN CAPITAL LETTER THORN (Icelandic)
24
+ "A5" => "\x00\xC6", # Æ - UPPERCASE DIGRAPH AE / LATIN CAPITAL LIGATURE AE
25
+ "A6" => "\x01\x52", # Œ - UPPERCASE DIGRAPH OE / LATIN CAPITAL LIGATURE OE
26
+ "A7" => "\x02\xB9", # ʹ - SOFT SIGN, PRIME / MODIFIER LETTER PRIME
27
+ "A8" => "\x00\xB7", # · - MIDDLE DOT
28
+ "A9" => "\x26\x6D", # ♭ - MUSIC FLAT SIGN
29
+ "AA" => "\x00\xAE", # ® - PATENT MARK / REGISTERED SIGN
30
+ "AB" => "\x00\xB1", # ± - PLUS OR MINUS / PLUS-MINUS SIGN
31
+ "AC" => "\x01\xA0", # Ơ - UPPERCASE O-HOOK / LATIN CAPITAL LETTER O WITH HORN
32
+ "AD" => "\x01\xAF", # Ư - UPPERCASE U-HOOK / LATIN CAPITAL LETTER U WITH HORN
33
+ "AE" => "\x02\xBC", # ʼ - ALIF / MODIFIER LETTER APOSTROPHE
34
+ "B0" => "\x02\xBB", # ʻ - AYN / MODIFIER LETTER TURNED COMMA
35
+ "B1" => "\x01\x42", # ł - LOWERCASE POLISH L / LATIN SMALL LETTER L WITH STROKE
36
+ "B2" => "\x00\xF8", # ø - LOWERCASE SCANDINAVIAN O / LATIN SMALL LETTER O WITH STROKE
37
+ "B3" => "\x01\x11", # đ - LOWERCASE D WITH CROSSBAR / LATIN SMALL LETTER D WITH STROKE
38
+ "B4" => "\x00\xFE", # þ - LOWERCASE ICELANDIC THORN / LATIN SMALL LETTER THORN (Icelandic)
39
+ "B5" => "\x00\xE6", # æ - LOWERCASE DIGRAPH AE / LATIN SMALL LIGATURE AE
40
+ "B6" => "\x01\x53", # œ - LOWERCASE DIGRAPH OE / LATIN SMALL LIGATURE OE
41
+ "B7" => "\x02\xBA", # ʺ - HARD SIGN, DOUBLE PRIME / MODIFIER LETTER DOUBLE PRIME
42
+ "B8" => "\x01\x31", # ı - LOWERCASE TURKISH I / LATIN SMALL LETTER DOTLESS I
43
+ "B9" => "\x00\xA3", # £ - BRITISH POUND / POUND SIGN
44
+ "BA" => "\x00\xF0", # ð - LOWERCASE ETH / LATIN SMALL LETTER ETH (Icelandic)
45
+ "BC" => "\x01\xA1", # ơ - LOWERCASE O-HOOK / LATIN SMALL LETTER O WITH HORN
46
+ "BD" => "\x01\xB0", # ư - LOWERCASE U-HOOK / LATIN SMALL LETTER U WITH HORN
47
+ "C0" => "\x00\xB0", # ° - DEGREE SIGN
48
+ "C1" => "\x21\x13", # ℓ - SCRIPT SMALL L
49
+ "C2" => "\x21\x17", # ℗ - SOUND RECORDING COPYRIGHT
50
+ "C3" => "\x00\xA9", # © - COPYRIGHT SIGN
51
+ "C4" => "\x26\x6F", # ♯ - MUSIC SHARP SIGN
52
+ "C5" => "\x00\xBF", # ¿ - INVERTED QUESTION MARK
53
+ "C6" => "\x00\xA1", # ¡ - INVERTED EXCLAMATION MARK
54
+ "C7" => "\x00\xDF", # ß - ESZETT SYMBOL
55
+ "C8" => "\x20\xAC" # € - EURO SIGN
56
+ }
57
+
58
+ @@combining = {
59
+ "E0+41" => "\x1E\xA2", # Ả - LATIN CAPITAL LETTER A WITH HOOK ABOVE
60
+ "E0+45" => "\x1E\xBA", # LATIN CAPITAL LETTER E WITH HOOK ABOVE
61
+ "E0+49" => "\x1E\xC8", # LATIN CAPITAL LETTER I WITH HOOK ABOVE
62
+ "E0+4F" => "\x1E\xCE", # LATIN CAPITAL LETTER O WITH HOOK ABOVE
63
+ "E0+55" => "\x1E\xE6", # LATIN CAPITAL LETTER U WITH HOOK ABOVE
64
+ "E0+59" => "\x1E\xF6", # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
65
+ "E0+61" => "\x1E\xA3", # LATIN SMALL LETTER A WITH HOOK ABOVE
66
+ "E0+65" => "\x1E\xBB", # LATIN SMALL LETTER E WITH HOOK ABOVE
67
+ "E0+69" => "\x1E\xC9", # LATIN SMALL LETTER I WITH HOOK ABOVE
68
+ "E0+6F" => "\x1E\xCF", # LATIN SMALL LETTER O WITH HOOK ABOVE
69
+ "E0+75" => "\x1E\xE7", # LATIN SMALL LETTER U WITH HOOK ABOVE
70
+ "E0+79" => "\x1E\xF7", # LATIN SMALL LETTER Y WITH HOOK ABOVE
71
+ "E0+E3+41" => "\x1E\xA8", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
72
+ "E0+E3+45" => "\x1E\xC2", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
73
+ "E0+E3+4F" => "\x1E\xD4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
74
+ "E0+E3+61" => "\x1E\xA9", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
75
+ "E0+E3+65" => "\x1E\xC3", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
76
+ "E0+E3+6F" => "\x1E\xD5", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
77
+ "E0+E6+41" => "\x1E\xB2", # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
78
+ "E0+E6+61" => "\x1E\xB3", # LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
79
+ "E0" => "\x03\x09", # COMBINING HOOK ABOVE
80
+ "E1+41" => "\x00\xC0", # LATIN CAPITAL LETTER A WITH GRAVE
81
+ "E1+45" => "\x00\xC8", # LATIN CAPITAL LETTER E WITH GRAVE
82
+ "E1+49" => "\x00\xCC", # LATIN CAPITAL LETTER I WITH GRAVE
83
+ "E1+4F" => "\x00\xD2", # LATIN CAPITAL LETTER O WITH GRAVE
84
+ "E1+55" => "\x00\xD9", # LATIN CAPITAL LETTER U WITH GRAVE
85
+ "E1+57" => "\x1E\x80", # LATIN CAPITAL LETTER W WITH GRAVE
86
+ "E1+59" => "\x1E\xF2", # LATIN CAPITAL LETTER Y WITH GRAVE
87
+ "E1+61" => "\x00\xE0", # LATIN SMALL LETTER A WITH GRAVE
88
+ "E1+65" => "\x00\xE8", # LATIN SMALL LETTER E WITH GRAVE
89
+ "E1+69" => "\x00\xEC", # LATIN SMALL LETTER I WITH GRAVE
90
+ "E1+6F" => "\x00\xF2", # LATIN SMALL LETTER O WITH GRAVE
91
+ "E1+75" => "\x00\xF9", # LATIN SMALL LETTER U WITH GRAVE
92
+ "E1+77" => "\x1E\x81", # LATIN SMALL LETTER W WITH GRAVE
93
+ "E1+79" => "\x1E\xF3", # LATIN SMALL LETTER Y WITH GRAVE
94
+ "E1+E3+41" => "\x1E\xA6", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
95
+ "E1+E3+45" => "\x1E\xC0", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
96
+ "E1+E3+4F" => "\x1E\xD2", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
97
+ "E1+E3+61" => "\x1E\xA7", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
98
+ "E1+E3+65" => "\x1E\xC1", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
99
+ "E1+E3+6F" => "\x1E\xD3", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
100
+ "E1+E5+45" => "\x1E\x14", # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
101
+ "E1+E5+4F" => "\x1E\x50", # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
102
+ "E1+E5+65" => "\x1E\x15", # LATIN SMALL LETTER E WITH MACRON AND GRAVE
103
+ "E1+E5+6F" => "\x1E\x51", # LATIN SMALL LETTER O WITH MACRON AND GRAVE
104
+ "E1+E6+41" => "\x1E\xB0", # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
105
+ "E1+E6+61" => "\x1E\xB1", # LATIN SMALL LETTER A WITH BREVE AND GRAVE
106
+ "E1+E8+55" => "\x01\xDB", # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
107
+ "E1+E8+75" => "\x01\xDC", # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
108
+ "E1" => "\x03\x00", # COMBINING GRAVE ACCENT
109
+ "E2+41" => "\x00\xC1", # LATIN CAPITAL LETTER A WITH ACUTE
110
+ "E2+43" => "\x01\x06", # LATIN CAPITAL LETTER C WITH ACUTE
111
+ "E2+45" => "\x00\xC9", # LATIN CAPITAL LETTER E WITH ACUTE
112
+ "E2+47" => "\x01\xF4", # LATIN CAPITAL LETTER G WITH ACUTE
113
+ "E2+49" => "\x00\xCD", # LATIN CAPITAL LETTER I WITH ACUTE
114
+ "E2+4B" => "\x1E\x30", # LATIN CAPITAL LETTER K WITH ACUTE
115
+ "E2+4C" => "\x01\x39", # LATIN CAPITAL LETTER L WITH ACUTE
116
+ "E2+4D" => "\x1E\x3E", # LATIN CAPITAL LETTER M WITH ACUTE
117
+ "E2+4E" => "\x01\x43", # LATIN CAPITAL LETTER N WITH ACUTE
118
+ "E2+4F" => "\x00\xD3", # LATIN CAPITAL LETTER O WITH ACUTE
119
+ "E2+50" => "\x1E\x54", # LATIN CAPITAL LETTER P WITH ACUTE
120
+ "E2+52" => "\x01\x54", # LATIN CAPITAL LETTER R WITH ACUTE
121
+ "E2+53" => "\x01\x5A", # LATIN CAPITAL LETTER S WITH ACUTE
122
+ "E2+55" => "\x00\xDA", # LATIN CAPITAL LETTER U WITH ACUTE
123
+ "E2+57" => "\x1E\x82", # LATIN CAPITAL LETTER W WITH ACUTE
124
+ "E2+59" => "\x00\xDD", # LATIN CAPITAL LETTER Y WITH ACUTE
125
+ "E2+5A" => "\x01\x79", # LATIN CAPITAL LETTER Z WITH ACUTE
126
+ "E2+61" => "\x00\xE1", # LATIN SMALL LETTER A WITH ACUTE
127
+ "E2+63" => "\x01\x07", # LATIN SMALL LETTER C WITH ACUTE
128
+ "E2+65" => "\x00\xE9", # LATIN SMALL LETTER E WITH ACUTE
129
+ "E2+67" => "\x01\xF5", # LATIN SMALL LETTER G WITH ACUTE
130
+ "E2+69" => "\x00\xED", # LATIN SMALL LETTER I WITH ACUTE
131
+ "E2+6B" => "\x1E\x31", # LATIN SMALL LETTER K WITH ACUTE
132
+ "E2+6C" => "\x01\x3A", # LATIN SMALL LETTER L WITH ACUTE
133
+ "E2+6D" => "\x1E\x3F", # LATIN SMALL LETTER M WITH ACUTE
134
+ "E2+6E" => "\x01\x44", # LATIN SMALL LETTER N WITH ACUTE
135
+ "E2+6F" => "\x00\xF3", # LATIN SMALL LETTER O WITH ACUTE
136
+ "E2+70" => "\x1E\x55", # LATIN SMALL LETTER P WITH ACUTE
137
+ "E2+72" => "\x01\x55", # LATIN SMALL LETTER R WITH ACUTE
138
+ "E2+73" => "\x01\x5B", # LATIN SMALL LETTER S WITH ACUTE
139
+ "E2+75" => "\x00\xFA", # LATIN SMALL LETTER U WITH ACUTE
140
+ "E2+77" => "\x1E\x83", # LATIN SMALL LETTER W WITH ACUTE
141
+ "E2+79" => "\x00\xFD", # LATIN SMALL LETTER Y WITH ACUTE
142
+ "E2+7A" => "\x01\x7A", # LATIN SMALL LETTER Z WITH ACUTE
143
+ "E2+A5" => "\x01\xFC", # LATIN CAPITAL LETTER AE WITH ACUTE
144
+ "E2+B5" => "\x01\xFD", # LATIN SMALL LETTER AE WITH ACUTE
145
+ "E2+E3+41" => "\x1E\xA4", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
146
+ "E2+E3+45" => "\x1E\xBE", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
147
+ "E2+E3+4F" => "\x1E\xD0", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
148
+ "E2+E3+61" => "\x1E\xA5", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
149
+ "E2+E3+65" => "\x1E\xBF", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
150
+ "E2+E3+6F" => "\x1E\xD1", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
151
+ "E2+E4+4F" => "\x1E\x4C", # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
152
+ "E2+E4+55" => "\x1E\x78", # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
153
+ "E2+E4+6F" => "\x1E\x4D", # LATIN SMALL LETTER O WITH TILDE AND ACUTE
154
+ "E2+E4+75" => "\x1E\x79", # LATIN SMALL LETTER U WITH TILDE AND ACUTE
155
+ "E2+E5+45" => "\x1E\x16", # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
156
+ "E2+E5+4F" => "\x1E\x52", # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
157
+ "E2+E5+65" => "\x1E\x17", # LATIN SMALL LETTER E WITH MACRON AND ACUTE
158
+ "E2+E5+6F" => "\x1E\x53", # LATIN SMALL LETTER O WITH MACRON AND ACUTE
159
+ "E2+E6+41" => "\x1E\xAE", # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
160
+ "E2+E6+61" => "\x1E\xAF", # LATIN SMALL LETTER A WITH BREVE AND ACUTE
161
+ "E2+E7+53" => "\x1E\x64", # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
162
+ "E2+E7+73" => "\x1E\x65", # LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE
163
+ "E2+E8+49" => "\x1E\x2E", # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
164
+ "E2+E8+55" => "\x01\xD7", # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
165
+ "E2+E8+69" => "\x1E\x2F", # LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
166
+ "E2+E8+75" => "\x01\xD8", # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
167
+ "E2+EA+41" => "\x01\xFA", # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
168
+ "E2+EA+61" => "\x01\xFB", # LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE
169
+ "E2+F0+43" => "\x1E\x08", # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
170
+ "E2+F0+63" => "\x1E\x09", # LATIN SMALL LETTER C WITH CEDILLA AND ACUTE
171
+ "E2" => "\x03\x01", # COMBINING ACUTE ACCENT
172
+ "E3+41" => "\x00\xC2", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
173
+ "E3+43" => "\x01\x08", # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
174
+ "E3+45" => "\x00\xCA", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
175
+ "E3+47" => "\x01\x1C", # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
176
+ "E3+48" => "\x01\x24", # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
177
+ "E3+49" => "\x00\xCE", # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
178
+ "E3+4A" => "\x01\x34", # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
179
+ "E3+4F" => "\x00\xD4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
180
+ "E3+53" => "\x01\x5C", # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
181
+ "E3+55" => "\x00\xDB", # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
182
+ "E3+57" => "\x01\x74", # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
183
+ "E3+59" => "\x01\x76", # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
184
+ "E3+5A" => "\x1E\x90", # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
185
+ "E3+61" => "\x00\xE2", # LATIN SMALL LETTER A WITH CIRCUMFLEX
186
+ "E3+63" => "\x01\x09", # LATIN SMALL LETTER C WITH CIRCUMFLEX
187
+ "E3+65" => "\x00\xEA", # LATIN SMALL LETTER E WITH CIRCUMFLEX
188
+ "E3+67" => "\x01\x1D", # LATIN SMALL LETTER G WITH CIRCUMFLEX
189
+ "E3+68" => "\x01\x25", # LATIN SMALL LETTER H WITH CIRCUMFLEX
190
+ "E3+69" => "\x00\xEE", # LATIN SMALL LETTER I WITH CIRCUMFLEX
191
+ "E3+6A" => "\x01\x35", # LATIN SMALL LETTER J WITH CIRCUMFLEX
192
+ "E3+6F" => "\x00\xF4", # LATIN SMALL LETTER O WITH CIRCUMFLEX
193
+ "E3+73" => "\x01\x5D", # LATIN SMALL LETTER S WITH CIRCUMFLEX
194
+ "E3+75" => "\x00\xFB", # LATIN SMALL LETTER U WITH CIRCUMFLEX
195
+ "E3+77" => "\x01\x75", # LATIN SMALL LETTER W WITH CIRCUMFLEX
196
+ "E3+79" => "\x01\x77", # LATIN SMALL LETTER Y WITH CIRCUMFLEX
197
+ "E3+7A" => "\x1E\x91", # LATIN SMALL LETTER Z WITH CIRCUMFLEX
198
+ "E3+E0+41" => "\x1E\xA8", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
199
+ "E3+E0+45" => "\x1E\xC2", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
200
+ "E3+E0+4F" => "\x1E\xD4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
201
+ "E3+E0+61" => "\x1E\xA9", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
202
+ "E3+E0+65" => "\x1E\xC3", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
203
+ "E3+E0+6F" => "\x1E\xD5", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
204
+ "E3+E1+41" => "\x1E\xA6", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
205
+ "E3+E1+45" => "\x1E\xC0", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
206
+ "E3+E1+4F" => "\x1E\xD2", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
207
+ "E3+E1+61" => "\x1E\xA7", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
208
+ "E3+E1+65" => "\x1E\xC1", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
209
+ "E3+E1+6F" => "\x1E\xD3", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
210
+ "E3+E2+41" => "\x1E\xA4", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
211
+ "E3+E2+45" => "\x1E\xBE", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
212
+ "E3+E2+4F" => "\x1E\xD0", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
213
+ "E3+E2+61" => "\x1E\xA5", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
214
+ "E3+E2+65" => "\x1E\xBF", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
215
+ "E3+E2+6F" => "\x1E\xD1", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
216
+ "E3+E4+41" => "\x1E\xAA", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
217
+ "E3+E4+45" => "\x1E\xC4", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
218
+ "E3+E4+4F" => "\x1E\xD6", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
219
+ "E3+E4+61" => "\x1E\xAB", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
220
+ "E3+E4+65" => "\x1E\xC5", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
221
+ "E3+E4+6F" => "\x1E\xD7", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
222
+ "E3+F2+41" => "\x1E\xAC", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
223
+ "E3+F2+45" => "\x1E\xC6", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
224
+ "E3+F2+4F" => "\x1E\xD8", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
225
+ "E3+F2+61" => "\x1E\xAD", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
226
+ "E3+F2+65" => "\x1E\xC7", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
227
+ "E3+F2+6F" => "\x1E\xD9", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
228
+ "E3" => "\x03\x02", # COMBINING CIRCUMFLEX ACCENT
229
+ "E4+41" => "\x00\xC3", # LATIN CAPITAL LETTER A WITH TILDE
230
+ "E4+45" => "\x1E\xBC", # LATIN CAPITAL LETTER E WITH TILDE
231
+ "E4+49" => "\x01\x28", # LATIN CAPITAL LETTER I WITH TILDE
232
+ "E4+4E" => "\x00\xD1", # LATIN CAPITAL LETTER N WITH TILDE
233
+ "E4+4F" => "\x00\xD5", # LATIN CAPITAL LETTER O WITH TILDE
234
+ "E4+55" => "\x01\x68", # LATIN CAPITAL LETTER U WITH TILDE
235
+ "E4+56" => "\x1E\x7C", # LATIN CAPITAL LETTER V WITH TILDE
236
+ "E4+59" => "\x1E\xF8", # LATIN CAPITAL LETTER Y WITH TILDE
237
+ "E4+61" => "\x00\xE3", # LATIN SMALL LETTER A WITH TILDE
238
+ "E4+65" => "\x1E\xBD", # LATIN SMALL LETTER E WITH TILDE
239
+ "E4+69" => "\x01\x29", # LATIN SMALL LETTER I WITH TILDE
240
+ "E4+6E" => "\x00\xF1", # LATIN SMALL LETTER N WITH TILDE
241
+ "E4+6F" => "\x00\xF5", # LATIN SMALL LETTER O WITH TILDE
242
+ "E4+75" => "\x01\x69", # LATIN SMALL LETTER U WITH TILDE
243
+ "E4+76" => "\x1E\x7D", # LATIN SMALL LETTER V WITH TILDE
244
+ "E4+79" => "\x1E\xF9", # LATIN SMALL LETTER Y WITH TILDE
245
+ "E4+E2+4F" => "\x1E\x4C", # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
246
+ "E4+E2+55" => "\x1E\x78", # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
247
+ "E4+E2+6F" => "\x1E\x4D", # LATIN SMALL LETTER O WITH TILDE AND ACUTE
248
+ "E4+E2+75" => "\x1E\x79", # LATIN SMALL LETTER U WITH TILDE AND ACUTE
249
+ "E4+E3+41" => "\x1E\xAA", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
250
+ "E4+E3+45" => "\x1E\xC4", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
251
+ "E4+E3+4F" => "\x1E\xD6", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
252
+ "E4+E3+61" => "\x1E\xAB", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
253
+ "E4+E3+65" => "\x1E\xC5", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
254
+ "E4+E3+6F" => "\x1E\xD7", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
255
+ "E4+E6+41" => "\x1E\xB4", # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
256
+ "E4+E6+61" => "\x1E\xB5", # LATIN SMALL LETTER A WITH BREVE AND TILDE
257
+ "E4+E8+4F" => "\x1E\x4E", # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
258
+ "E4+E8+6F" => "\x1E\x4F", # LATIN SMALL LETTER O WITH TILDE AND DIAERESIS
259
+ "E4" => "\x03\x03", # COMBINING TILDE
260
+ "E5+41" => "\x01\x00", # LATIN CAPITAL LETTER A WITH MACRON
261
+ "E5+45" => "\x01\x12", # LATIN CAPITAL LETTER E WITH MACRON
262
+ "E5+47" => "\x1E\x20", # LATIN CAPITAL LETTER G WITH MACRON
263
+ "E5+49" => "\x01\x2A", # LATIN CAPITAL LETTER I WITH MACRON
264
+ "E5+4F" => "\x01\x4C", # LATIN CAPITAL LETTER O WITH MACRON
265
+ "E5+55" => "\x01\x6A", # LATIN CAPITAL LETTER U WITH MACRON
266
+ "E5+61" => "\x01\x01", # LATIN SMALL LETTER A WITH MACRON
267
+ "E5+65" => "\x01\x13", # LATIN SMALL LETTER E WITH MACRON
268
+ "E5+67" => "\x1E\x21", # LATIN SMALL LETTER G WITH MACRON
269
+ "E5+69" => "\x01\x2B", # LATIN SMALL LETTER I WITH MACRON
270
+ "E5+6F" => "\x01\x4D", # LATIN SMALL LETTER O WITH MACRON
271
+ "E5+75" => "\x01\x6B", # LATIN SMALL LETTER U WITH MACRON
272
+ "E5+A5" => "\x01\xE2", # LATIN CAPITAL LETTER AE WITH MACRON
273
+ "E5+B5" => "\x01\xE3", # LATIN SMALL LETTER AE WITH MACRON
274
+ "E5+E1+45" => "\x1E\x14", # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
275
+ "E5+E1+4F" => "\x1E\x50", # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
276
+ "E5+E1+65" => "\x1E\x15", # LATIN SMALL LETTER E WITH MACRON AND GRAVE
277
+ "E5+E1+6F" => "\x1E\x51", # LATIN SMALL LETTER O WITH MACRON AND GRAVE
278
+ "E5+E2+45" => "\x1E\x16", # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
279
+ "E5+E2+4F" => "\x1E\x52", # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
280
+ "E5+E2+65" => "\x1E\x17", # LATIN SMALL LETTER E WITH MACRON AND ACUTE
281
+ "E5+E2+6F" => "\x1E\x53", # LATIN SMALL LETTER O WITH MACRON AND ACUTE
282
+ "E5+E7+41" => "\x01\xE0", # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
283
+ "E5+E7+61" => "\x01\xE1", # LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
284
+ "E5+E8+41" => "\x01\xDE", # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
285
+ "E5+E8+55" => "\x1E\x7A", # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
286
+ "E5+E8+61" => "\x01\xDF", # LATIN SMALL LETTER A WITH DIAERESIS AND MACRON
287
+ "E5+E8+75" => "\x1E\x7B", # LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
288
+ "E5+F1+4F" => "\x01\xEC", # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
289
+ "E5+F1+6F" => "\x01\xED", # LATIN SMALL LETTER O WITH OGONEK AND MACRON
290
+ "E5+F2+4C" => "\x1E\x38", # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
291
+ "E5+F2+52" => "\x1E\x5C", # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
292
+ "E5+F2+6C" => "\x1E\x39", # LATIN SMALL LETTER L WITH DOT BELOW AND MACRON
293
+ "E5+F2+72" => "\x1E\x5D", # LATIN SMALL LETTER R WITH DOT BELOW AND MACRON
294
+ "E5" => "\x03\x04", # COMBINING MACRON
295
+ "E6+41" => "\x01\x02", # LATIN CAPITAL LETTER A WITH BREVE
296
+ "E6+45" => "\x01\x14", # LATIN CAPITAL LETTER E WITH BREVE
297
+ "E6+47" => "\x01\x1E", # LATIN CAPITAL LETTER G WITH BREVE
298
+ "E6+49" => "\x01\x2C", # LATIN CAPITAL LETTER I WITH BREVE
299
+ "E6+4F" => "\x01\x4E", # LATIN CAPITAL LETTER O WITH BREVE
300
+ "E6+55" => "\x01\x6C", # LATIN CAPITAL LETTER U WITH BREVE
301
+ "E6+61" => "\x01\x03", # LATIN SMALL LETTER A WITH BREVE
302
+ "E6+65" => "\x01\x15", # LATIN SMALL LETTER E WITH BREVE
303
+ "E6+67" => "\x01\x1F", # LATIN SMALL LETTER G WITH BREVE
304
+ "E6+69" => "\x01\x2D", # LATIN SMALL LETTER I WITH BREVE
305
+ "E6+6F" => "\x01\x4F", # LATIN SMALL LETTER O WITH BREVE
306
+ "E6+75" => "\x01\x6D", # LATIN SMALL LETTER U WITH BREVE
307
+ "E6+E0+41" => "\x1E\xB2", # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
308
+ "E6+E0+61" => "\x1E\xB3", # LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
309
+ "E6+E1+41" => "\x1E\xB0", # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
310
+ "E6+E1+61" => "\x1E\xB1", # LATIN SMALL LETTER A WITH BREVE AND GRAVE
311
+ "E6+E2+41" => "\x1E\xAE", # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
312
+ "E6+E2+61" => "\x1E\xAF", # LATIN SMALL LETTER A WITH BREVE AND ACUTE
313
+ "E6+E4+41" => "\x1E\xB4", # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
314
+ "E6+E4+61" => "\x1E\xB5", # LATIN SMALL LETTER A WITH BREVE AND TILDE
315
+ "E6+F0+45" => "\x1E\x1C", # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
316
+ "E6+F0+65" => "\x1E\x1D", # LATIN SMALL LETTER E WITH CEDILLA AND BREVE
317
+ "E6+F2+41" => "\x1E\xB6", # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
318
+ "E6+F2+61" => "\x1E\xB7", # LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
319
+ "E6" => "\x03\x06", # COMBINING BREVE
320
+ "E7+42" => "\x1E\x02", # LATIN CAPITAL LETTER B WITH DOT ABOVE
321
+ "E7+43" => "\x01\x0A", # LATIN CAPITAL LETTER C WITH DOT ABOVE
322
+ "E7+44" => "\x1E\x0A", # LATIN CAPITAL LETTER D WITH DOT ABOVE
323
+ "E7+45" => "\x01\x16", # LATIN CAPITAL LETTER E WITH DOT ABOVE
324
+ "E7+46" => "\x1E\x1E", # LATIN CAPITAL LETTER F WITH DOT ABOVE
325
+ "E7+47" => "\x01\x20", # LATIN CAPITAL LETTER G WITH DOT ABOVE
326
+ "E7+48" => "\x1E\x22", # LATIN CAPITAL LETTER H WITH DOT ABOVE
327
+ "E7+49" => "\x01\x30", # LATIN CAPITAL LETTER I WITH DOT ABOVE
328
+ "E7+4D" => "\x1E\x40", # LATIN CAPITAL LETTER M WITH DOT ABOVE
329
+ "E7+4E" => "\x1E\x44", # LATIN CAPITAL LETTER N WITH DOT ABOVE
330
+ "E7+50" => "\x1E\x56", # LATIN CAPITAL LETTER P WITH DOT ABOVE
331
+ "E7+52" => "\x1E\x58", # LATIN CAPITAL LETTER R WITH DOT ABOVE
332
+ "E7+53" => "\x1E\x60", # LATIN CAPITAL LETTER S WITH DOT ABOVE
333
+ "E7+54" => "\x1E\x6A", # LATIN CAPITAL LETTER T WITH DOT ABOVE
334
+ "E7+57" => "\x1E\x86", # LATIN CAPITAL LETTER W WITH DOT ABOVE
335
+ "E7+58" => "\x1E\x8A", # LATIN CAPITAL LETTER X WITH DOT ABOVE
336
+ "E7+59" => "\x1E\x8E", # LATIN CAPITAL LETTER Y WITH DOT ABOVE
337
+ "E7+5A" => "\x01\x7B", # LATIN CAPITAL LETTER Z WITH DOT ABOVE
338
+ "E7+62" => "\x1E\x03", # LATIN SMALL LETTER B WITH DOT ABOVE
339
+ "E7+63" => "\x01\x0B", # LATIN SMALL LETTER C WITH DOT ABOVE
340
+ "E7+64" => "\x1E\x0B", # LATIN SMALL LETTER D WITH DOT ABOVE
341
+ "E7+65" => "\x01\x17", # LATIN SMALL LETTER E WITH DOT ABOVE
342
+ "E7+66" => "\x1E\x1F", # LATIN SMALL LETTER F WITH DOT ABOVE
343
+ "E7+67" => "\x01\x21", # LATIN SMALL LETTER G WITH DOT ABOVE
344
+ "E7+68" => "\x1E\x23", # LATIN SMALL LETTER H WITH DOT ABOVE
345
+ "E7+6D" => "\x1E\x41", # LATIN SMALL LETTER M WITH DOT ABOVE
346
+ "E7+6E" => "\x1E\x45", # LATIN SMALL LETTER N WITH DOT ABOVE
347
+ "E7+70" => "\x1E\x57", # LATIN SMALL LETTER P WITH DOT ABOVE
348
+ "E7+72" => "\x1E\x59", # LATIN SMALL LETTER R WITH DOT ABOVE
349
+ "E7+73" => "\x1E\x61", # LATIN SMALL LETTER S WITH DOT ABOVE
350
+ "E7+74" => "\x1E\x6B", # LATIN SMALL LETTER T WITH DOT ABOVE
351
+ "E7+77" => "\x1E\x87", # LATIN SMALL LETTER W WITH DOT ABOVE
352
+ "E7+78" => "\x1E\x8B", # LATIN SMALL LETTER X WITH DOT ABOVE
353
+ "E7+79" => "\x1E\x8F", # LATIN SMALL LETTER Y WITH DOT ABOVE
354
+ "E7+7A" => "\x01\x7C", # LATIN SMALL LETTER Z WITH DOT ABOVE
355
+ "E7+E2+53" => "\x1E\x64", # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
356
+ "E7+E2+73" => "\x1E\x65", # LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE
357
+ "E7+E5+41" => "\x01\xE0", # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
358
+ "E7+E5+61" => "\x01\xE1", # LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
359
+ "E7+E9+53" => "\x1E\x66", # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
360
+ "E7+E9+73" => "\x1E\x67", # LATIN SMALL LETTER S WITH CARON AND DOT ABOVE
361
+ "E7+F2+53" => "\x1E\x68", # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
362
+ "E7+F2+73" => "\x1E\x69", # LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
363
+ "E7" => "\x03\x07", # COMBINING DOT ABOVE
364
+ "E8+41" => "\x00\xC4", # LATIN CAPITAL LETTER A WITH DIAERESIS
365
+ "E8+45" => "\x00\xCB", # LATIN CAPITAL LETTER E WITH DIAERESIS
366
+ "E8+48" => "\x1E\x26", # LATIN CAPITAL LETTER H WITH DIAERESIS
367
+ "E8+49" => "\x00\xCF", # LATIN CAPITAL LETTER I WITH DIAERESIS
368
+ "E8+4F" => "\x00\xD6", # LATIN CAPITAL LETTER O WITH DIAERESIS
369
+ "E8+55" => "\x00\xDC", # LATIN CAPITAL LETTER U WITH DIAERESIS
370
+ "E8+57" => "\x1E\x84", # LATIN CAPITAL LETTER W WITH DIAERESIS
371
+ "E8+58" => "\x1E\x8C", # LATIN CAPITAL LETTER X WITH DIAERESIS
372
+ "E8+59" => "\x01\x78", # LATIN CAPITAL LETTER Y WITH DIAERESIS
373
+ "E8+61" => "\x00\xE4", # LATIN SMALL LETTER A WITH DIAERESIS
374
+ "E8+65" => "\x00\xEB", # LATIN SMALL LETTER E WITH DIAERESIS
375
+ "E8+68" => "\x1E\x27", # LATIN SMALL LETTER H WITH DIAERESIS
376
+ "E8+69" => "\x00\xEF", # LATIN SMALL LETTER I WITH DIAERESIS
377
+ "E8+6F" => "\x00\xF6", # LATIN SMALL LETTER O WITH DIAERESIS
378
+ "E8+74" => "\x1E\x97", # LATIN SMALL LETTER T WITH DIAERESIS
379
+ "E8+75" => "\x00\xFC", # LATIN SMALL LETTER U WITH DIAERESIS
380
+ "E8+77" => "\x1E\x85", # LATIN SMALL LETTER W WITH DIAERESIS
381
+ "E8+78" => "\x1E\x8D", # LATIN SMALL LETTER X WITH DIAERESIS
382
+ "E8+79" => "\x00\xFF", # LATIN SMALL LETTER Y WITH DIAERESIS
383
+ "E8+E1+55" => "\x01\xDB", # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
384
+ "E8+E1+75" => "\x01\xDC", # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
385
+ "E8+E2+49" => "\x1E\x2E", # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
386
+ "E8+E2+55" => "\x01\xD7", # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
387
+ "E8+E2+69" => "\x1E\x2F", # LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
388
+ "E8+E2+75" => "\x01\xD8", # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
389
+ "E8+E4+4F" => "\x1E\x4E", # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
390
+ "E8+E4+6F" => "\x1E\x4F", # LATIN SMALL LETTER O WITH TILDE AND DIAERESIS
391
+ "E8+E5+41" => "\x01\xDE", # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
392
+ "E8+E5+55" => "\x1E\x7A", # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
393
+ "E8+E5+61" => "\x01\xDF", # LATIN SMALL LETTER A WITH DIAERESIS AND MACRON
394
+ "E8+E5+75" => "\x1E\x7B", # LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
395
+ "E8+E9+55" => "\x01\xD9", # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
396
+ "E8+E9+75" => "\x01\xDA", # LATIN SMALL LETTER U WITH DIAERESIS AND CARON
397
+ "E8" => "\x03\x08", # COMBINING DIAERESIS
398
+ "E9+41" => "\x01\xCD", # LATIN CAPITAL LETTER A WITH CARON
399
+ "E9+43" => "\x01\x0C", # LATIN CAPITAL LETTER C WITH CARON
400
+ "E9+44" => "\x01\x0E", # LATIN CAPITAL LETTER D WITH CARON
401
+ "E9+45" => "\x01\x1A", # LATIN CAPITAL LETTER E WITH CARON
402
+ "E9+47" => "\x01\xE6", # LATIN CAPITAL LETTER G WITH CARON
403
+ "E9+49" => "\x01\xCF", # LATIN CAPITAL LETTER I WITH CARON
404
+ "E9+4B" => "\x01\xE8", # LATIN CAPITAL LETTER K WITH CARON
405
+ "E9+4C" => "\x01\x3D", # LATIN CAPITAL LETTER L WITH CARON
406
+ "E9+4E" => "\x01\x47", # LATIN CAPITAL LETTER N WITH CARON
407
+ "E9+4F" => "\x01\xD1", # LATIN CAPITAL LETTER O WITH CARON
408
+ "E9+52" => "\x01\x58", # LATIN CAPITAL LETTER R WITH CARON
409
+ "E9+53" => "\x01\x60", # LATIN CAPITAL LETTER S WITH CARON
410
+ "E9+54" => "\x01\x64", # LATIN CAPITAL LETTER T WITH CARON
411
+ "E9+55" => "\x01\xD3", # LATIN CAPITAL LETTER U WITH CARON
412
+ "E9+5A" => "\x01\x7D", # LATIN CAPITAL LETTER Z WITH CARON
413
+ "E9+61" => "\x01\xCE", # LATIN SMALL LETTER A WITH CARON
414
+ "E9+63" => "\x01\x0D", # LATIN SMALL LETTER C WITH CARON
415
+ "E9+64" => "\x01\x0F", # LATIN SMALL LETTER D WITH CARON
416
+ "E9+65" => "\x01\x1B", # LATIN SMALL LETTER E WITH CARON
417
+ "E9+67" => "\x01\xE7", # LATIN SMALL LETTER G WITH CARON
418
+ "E9+69" => "\x01\xD0", # LATIN SMALL LETTER I WITH CARON
419
+ "E9+6A" => "\x01\xF0", # LATIN SMALL LETTER J WITH CARON
420
+ "E9+6B" => "\x01\xE9", # LATIN SMALL LETTER K WITH CARON
421
+ "E9+6C" => "\x01\x3E", # LATIN SMALL LETTER L WITH CARON
422
+ "E9+6E" => "\x01\x48", # LATIN SMALL LETTER N WITH CARON
423
+ "E9+6F" => "\x01\xD2", # LATIN SMALL LETTER O WITH CARON
424
+ "E9+72" => "\x01\x59", # LATIN SMALL LETTER R WITH CARON
425
+ "E9+73" => "\x01\x61", # LATIN SMALL LETTER S WITH CARON
426
+ "E9+74" => "\x01\x65", # LATIN SMALL LETTER T WITH CARON
427
+ "E9+75" => "\x01\xD4", # LATIN SMALL LETTER U WITH CARON
428
+ "E9+7A" => "\x01\x7E", # LATIN SMALL LETTER Z WITH CARON
429
+ "E9+E7+53" => "\x1E\x66", # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
430
+ "E9+E7+73" => "\x1E\x67", # LATIN SMALL LETTER S WITH CARON AND DOT ABOVE
431
+ "E9+E8+55" => "\x01\xD9", # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
432
+ "E9+E8+75" => "\x01\xDA", # LATIN SMALL LETTER U WITH DIAERESIS AND CARON
433
+ "E9" => "\x03\x0C", # COMBINING CARON
434
+ "EA+41" => "\x00\xC5", # LATIN CAPITAL LETTER A WITH RING ABOVE
435
+ "EA+55" => "\x01\x6E", # LATIN CAPITAL LETTER U WITH RING ABOVE
436
+ "EA+61" => "\x00\xE5", # LATIN SMALL LETTER A WITH RING ABOVE
437
+ "EA+75" => "\x01\x6F", # LATIN SMALL LETTER U WITH RING ABOVE
438
+ "EA+77" => "\x1E\x98", # LATIN SMALL LETTER W WITH RING ABOVE
439
+ "EA+79" => "\x1E\x99", # LATIN SMALL LETTER Y WITH RING ABOVE
440
+ "EA+E2+41" => "\x01\xFA", # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
441
+ "EA+E2+61" => "\x01\xFB", # LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE
442
+ "EA" => "\x03\x0A", # COMBINING RING ABOVE
443
+ "EB" => "\xFE\x20", # COMBINING LIGATURE LEFT HALF
444
+ "EC" => "\xFE\x21", # COMBINING LIGATURE RIGHT HALF
445
+ "ED" => "\x03\x15", # COMBINING COMMA ABOVE RIGHT
446
+ "EE+4F" => "\x01\x50", # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
447
+ "EE+55" => "\x01\x70", # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
448
+ "EE+6F" => "\x01\x51", # LATIN SMALL LETTER O WITH DOUBLE ACUTE
449
+ "EE+75" => "\x01\x71", # LATIN SMALL LETTER U WITH DOUBLE ACUTE
450
+ "EE" => "\x03\x0B", # COMBINING DOUBLE ACUTE ACCENT
451
+ "EF" => "\x03\x10", # COMBINING CANDRABINDU
452
+ "F0+43" => "\x00\xC7", # LATIN CAPITAL LETTER C WITH CEDILLA
453
+ "F0+44" => "\x1E\x10", # LATIN CAPITAL LETTER D WITH CEDILLA
454
+ "F0+47" => "\x01\x22", # LATIN CAPITAL LETTER G WITH CEDILLA
455
+ "F0+48" => "\x1E\x28", # LATIN CAPITAL LETTER H WITH CEDILLA
456
+ "F0+4B" => "\x01\x36", # LATIN CAPITAL LETTER K WITH CEDILLA
457
+ "F0+4C" => "\x01\x3B", # LATIN CAPITAL LETTER L WITH CEDILLA
458
+ "F0+4E" => "\x01\x45", # LATIN CAPITAL LETTER N WITH CEDILLA
459
+ "F0+52" => "\x01\x56", # LATIN CAPITAL LETTER R WITH CEDILLA
460
+ "F0+53" => "\x01\x5E", # LATIN CAPITAL LETTER S WITH CEDILLA
461
+ "F0+54" => "\x01\x62", # LATIN CAPITAL LETTER T WITH CEDILLA
462
+ "F0+63" => "\x00\xE7", # LATIN SMALL LETTER C WITH CEDILLA
463
+ "F0+64" => "\x1E\x11", # LATIN SMALL LETTER D WITH CEDILLA
464
+ "F0+67" => "\x01\x23", # LATIN SMALL LETTER G WITH CEDILLA
465
+ "F0+68" => "\x1E\x29", # LATIN SMALL LETTER H WITH CEDILLA
466
+ "F0+6B" => "\x01\x37", # LATIN SMALL LETTER K WITH CEDILLA
467
+ "F0+6C" => "\x01\x3C", # LATIN SMALL LETTER L WITH CEDILLA
468
+ "F0+6E" => "\x01\x46", # LATIN SMALL LETTER N WITH CEDILLA
469
+ "F0+72" => "\x01\x57", # LATIN SMALL LETTER R WITH CEDILLA
470
+ "F0+73" => "\x01\x5F", # LATIN SMALL LETTER S WITH CEDILLA
471
+ "F0+74" => "\x01\x63", # LATIN SMALL LETTER T WITH CEDILLA
472
+ "F0+E2+43" => "\x1E\x08", # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
473
+ "F0+E2+63" => "\x1E\x09", # LATIN SMALL LETTER C WITH CEDILLA AND ACUTE
474
+ "F0+E6+45" => "\x1E\x1C", # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
475
+ "F0+E6+65" => "\x1E\x1D", # LATIN SMALL LETTER E WITH CEDILLA AND BREVE
476
+ "F0" => "\x03\x27", # COMBINING CEDILLA
477
+ "F1+41" => "\x01\x04", # LATIN CAPITAL LETTER A WITH OGONEK
478
+ "F1+45" => "\x01\x18", # LATIN CAPITAL LETTER E WITH OGONEK
479
+ "F1+49" => "\x01\x2E", # LATIN CAPITAL LETTER I WITH OGONEK
480
+ "F1+4F" => "\x01\xEA", # LATIN CAPITAL LETTER O WITH OGONEK
481
+ "F1+55" => "\x01\x72", # LATIN CAPITAL LETTER U WITH OGONEK
482
+ "F1+61" => "\x01\x05", # LATIN SMALL LETTER A WITH OGONEK
483
+ "F1+65" => "\x01\x19", # LATIN SMALL LETTER E WITH OGONEK
484
+ "F1+69" => "\x01\x2F", # LATIN SMALL LETTER I WITH OGONEK
485
+ "F1+6F" => "\x01\xEB", # LATIN SMALL LETTER O WITH OGONEK
486
+ "F1+75" => "\x01\x73", # LATIN SMALL LETTER U WITH OGONEK
487
+ "F1+E5+4F" => "\x01\xEC", # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
488
+ "F1+E5+6F" => "\x01\xED", # LATIN SMALL LETTER O WITH OGONEK AND MACRON
489
+ "F1" => "\x03\x28", # COMBINING OGONEK
490
+ "F2+41" => "\x1E\xA0", # LATIN CAPITAL LETTER A WITH DOT BELOW
491
+ "F2+42" => "\x1E\x04", # LATIN CAPITAL LETTER B WITH DOT BELOW
492
+ "F2+44" => "\x1E\x0C", # LATIN CAPITAL LETTER D WITH DOT BELOW
493
+ "F2+45" => "\x1E\xB8", # LATIN CAPITAL LETTER E WITH DOT BELOW
494
+ "F2+48" => "\x1E\x24", # LATIN CAPITAL LETTER H WITH DOT BELOW
495
+ "F2+49" => "\x1E\xCA", # LATIN CAPITAL LETTER I WITH DOT BELOW
496
+ "F2+4B" => "\x1E\x32", # LATIN CAPITAL LETTER K WITH DOT BELOW
497
+ "F2+4C" => "\x1E\x36", # LATIN CAPITAL LETTER L WITH DOT BELOW
498
+ "F2+4D" => "\x1E\x42", # LATIN CAPITAL LETTER M WITH DOT BELOW
499
+ "F2+4E" => "\x1E\x46", # LATIN CAPITAL LETTER N WITH DOT BELOW
500
+ "F2+4F" => "\x1E\xCC", # LATIN CAPITAL LETTER O WITH DOT BELOW
501
+ "F2+52" => "\x1E\x5A", # LATIN CAPITAL LETTER R WITH DOT BELOW
502
+ "F2+53" => "\x1E\x62", # LATIN CAPITAL LETTER S WITH DOT BELOW
503
+ "F2+54" => "\x1E\x6C", # LATIN CAPITAL LETTER T WITH DOT BELOW
504
+ "F2+55" => "\x1E\xE4", # LATIN CAPITAL LETTER U WITH DOT BELOW
505
+ "F2+56" => "\x1E\x7E", # LATIN CAPITAL LETTER V WITH DOT BELOW
506
+ "F2+57" => "\x1E\x88", # LATIN CAPITAL LETTER W WITH DOT BELOW
507
+ "F2+59" => "\x1E\xF4", # LATIN CAPITAL LETTER Y WITH DOT BELOW
508
+ "F2+5A" => "\x1E\x92", # LATIN CAPITAL LETTER Z WITH DOT BELOW
509
+ "F2+61" => "\x1E\xA1", # LATIN SMALL LETTER A WITH DOT BELOW
510
+ "F2+62" => "\x1E\x05", # LATIN SMALL LETTER B WITH DOT BELOW
511
+ "F2+64" => "\x1E\x0D", # LATIN SMALL LETTER D WITH DOT BELOW
512
+ "F2+65" => "\x1E\xB9", # LATIN SMALL LETTER E WITH DOT BELOW
513
+ "F2+68" => "\x1E\x25", # LATIN SMALL LETTER H WITH DOT BELOW
514
+ "F2+69" => "\x1E\xCB", # LATIN SMALL LETTER I WITH DOT BELOW
515
+ "F2+6B" => "\x1E\x33", # LATIN SMALL LETTER K WITH DOT BELOW
516
+ "F2+6C" => "\x1E\x37", # LATIN SMALL LETTER L WITH DOT BELOW
517
+ "F2+6D" => "\x1E\x43", # LATIN SMALL LETTER M WITH DOT BELOW
518
+ "F2+6E" => "\x1E\x47", # LATIN SMALL LETTER N WITH DOT BELOW
519
+ "F2+6F" => "\x1E\xCD", # LATIN SMALL LETTER O WITH DOT BELOW
520
+ "F2+72" => "\x1E\x5B", # LATIN SMALL LETTER R WITH DOT BELOW
521
+ "F2+73" => "\x1E\x63", # LATIN SMALL LETTER S WITH DOT BELOW
522
+ "F2+74" => "\x1E\x6D", # LATIN SMALL LETTER T WITH DOT BELOW
523
+ "F2+75" => "\x1E\xE5", # LATIN SMALL LETTER U WITH DOT BELOW
524
+ "F2+76" => "\x1E\x7F", # LATIN SMALL LETTER V WITH DOT BELOW
525
+ "F2+77" => "\x1E\x89", # LATIN SMALL LETTER W WITH DOT BELOW
526
+ "F2+79" => "\x1E\xF5", # LATIN SMALL LETTER Y WITH DOT BELOW
527
+ "F2+7A" => "\x1E\x93", # LATIN SMALL LETTER Z WITH DOT BELOW
528
+ "F2+E3+41" => "\x1E\xAC", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
529
+ "F2+E3+45" => "\x1E\xC6", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
530
+ "F2+E3+4F" => "\x1E\xD8", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
531
+ "F2+E3+61" => "\x1E\xAD", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
532
+ "F2+E3+65" => "\x1E\xC7", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
533
+ "F2+E3+6F" => "\x1E\xD9", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
534
+ "F2+E5+4C" => "\x1E\x38", # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
535
+ "F2+E5+52" => "\x1E\x5C", # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
536
+ "F2+E5+6C" => "\x1E\x39", # LATIN SMALL LETTER L WITH DOT BELOW AND MACRON
537
+ "F2+E5+72" => "\x1E\x5D", # LATIN SMALL LETTER R WITH DOT BELOW AND MACRON
538
+ "F2+E6+41" => "\x1E\xB6", # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
539
+ "F2+E6+61" => "\x1E\xB7", # LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
540
+ "F2+E7+53" => "\x1E\x68", # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
541
+ "F2+E7+73" => "\x1E\x69", # LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
542
+ "F2" => "\x03\x23", # COMBINING DOT BELOW
543
+ "F3+55" => "\x1E\x72", # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
544
+ "F3+75" => "\x1E\x73", # LATIN SMALL LETTER U WITH DIAERESIS BELOW
545
+ "F3" => "\x03\x24", # COMBINING DIAERESIS BELOW
546
+ "F4+41" => "\x1E\x00", # LATIN CAPITAL LETTER A WITH RING BELOW
547
+ "F4+61" => "\x1E\x01", # LATIN SMALL LETTER A WITH RING BELOW
548
+ "F4" => "\x03\x25", # COMBINING RING BELOW
549
+ "F5" => "\x03\x33", # COMBINING DOUBLE LOW LINE
550
+ "F6+42" => "\x1E\x06", # LATIN CAPITAL LETTER B WITH LINE BELOW
551
+ "F6+44" => "\x1E\x0E", # LATIN CAPITAL LETTER D WITH LINE BELOW
552
+ "F6+4B" => "\x1E\x34", # LATIN CAPITAL LETTER K WITH LINE BELOW
553
+ "F6+4C" => "\x1E\x3A", # LATIN CAPITAL LETTER L WITH LINE BELOW
554
+ "F6+4E" => "\x1E\x48", # LATIN CAPITAL LETTER N WITH LINE BELOW
555
+ "F6+52" => "\x1E\x5E", # LATIN CAPITAL LETTER R WITH LINE BELOW
556
+ "F6+54" => "\x1E\x6E", # LATIN CAPITAL LETTER T WITH LINE BELOW
557
+ "F6+5A" => "\x1E\x94", # LATIN CAPITAL LETTER Z WITH LINE BELOW
558
+ "F6+62" => "\x1E\x07", # LATIN SMALL LETTER B WITH LINE BELOW
559
+ "F6+64" => "\x1E\x0F", # LATIN SMALL LETTER D WITH LINE BELOW
560
+ "F6+68" => "\x1E\x96", # LATIN SMALL LETTER H WITH LINE BELOW
561
+ "F6+6B" => "\x1E\x35", # LATIN SMALL LETTER K WITH LINE BELOW
562
+ "F6+6C" => "\x1E\x3B", # LATIN SMALL LETTER L WITH LINE BELOW
563
+ "F6+6E" => "\x1E\x49", # LATIN SMALL LETTER N WITH LINE BELOW
564
+ "F6+72" => "\x1E\x5F", # LATIN SMALL LETTER R WITH LINE BELOW
565
+ "F6+74" => "\x1E\x6F", # LATIN SMALL LETTER T WITH LINE BELOW
566
+ "F6+7A" => "\x1E\x95", # LATIN SMALL LETTER Z WITH LINE BELOW
567
+ "F6" => "\x03\x32", # COMBINING LOW LINE
568
+ "F7" => "\x03\x26", # COMBINING COMMA BELOW
569
+ "F8" => "\x03\x21", # COMBINING OGONEK
570
+ "F9+48" => "\x1E\x2A", # LATIN CAPITAL LETTER H WITH BREVE BELOW
571
+ "F9+68" => "\x1E\x2B", # LATIN SMALL LETTER H WITH BREVE BELOW
572
+ "F9" => "\x03\x2E", # COMBINING BREVE BELOW
573
+ "FA" => "\xFE\x22", # COMBINING DOUBLE TILDE LEFT HALF
574
+ "FB" => "\xFE\x23" # COMBINING DOUBLE TILDE RIGHT HALF
575
+ }
576
+
577
+ def initialize(to_charset = 'UTF-8')
578
+ @to_charset = to_charset
579
+ @ansi_to_utf8 = {}
580
+ @ansi_to_utf8.merge!(@@non_combining)
581
+ @ansi_to_utf8.merge!(@@combining)
582
+ end
583
+
584
+ def iconv(string)
585
+ output = ''
586
+ scanner = StringScanner.new(string)
587
+ until scanner.eos? do
588
+ byte = scanner.get_byte
589
+
590
+ if byte[0] <= 0x7F
591
+ output << byte
592
+ elsif byte[0] >= 0x88 && byte[0] <= 0xC8
593
+ hex_key = byte[0].to_s(16).upcase
594
+ output << ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8.has_key?(hex_key) ? @ansi_to_utf8[hex_key] : @ansi_to_utf8['ERR'])
595
+ scanner.get_byte # ignore the next byte
596
+ elsif byte[0] >= 0xE0 && byte[0] <= 0xFB
597
+ [2, 1, 0].each do |n| # try 3 bytes, then 2 bytes, then 1 byte
598
+ bytes = [byte[0].to_s(16).upcase]
599
+ scanner.peek(n).each_byte {|b| bytes << b.to_s(16).upcase}
600
+ hex_key = bytes.join("+")
601
+ if @ansi_to_utf8.has_key?(hex_key)
602
+ output << ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8[hex_key])
603
+ n.times {scanner.get_byte}
604
+ break
605
+ end
606
+ end
607
+ else
608
+ output << ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8['ERR'])
609
+ scanner.get_byte if scanner.get_byte[0] >= 0xE0 # ignore the next byte
610
+ end
611
+ end
612
+
613
+ @to_charset == 'UTF-8' ? output : ::Iconv.conv(@to_charset, 'UTF-8', output)
614
+ end
615
+
616
+ def convert_char(char)
617
+ return char if char.size <= 1 && char[0] <= 0x7f
618
+
619
+ if char[0] > 0x7f && char[0] < 0xE0
620
+ hex_key = char[0].to_s(16).upcase
621
+ elsif char[0] >= 0xE0
622
+ bytes = []
623
+ char.each_byte {|byte| bytes << byte.to_s(16).upcase}
624
+ hex_key = bytes.join('+')
625
+ end
626
+ return ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8.has_key?(hex_key) ? @ansi_to_utf8[hex_key] : @ansi_to_utf8['ERR'])
627
+ end
628
+
629
+ end
630
+
631
+ end
@@ -0,0 +1,101 @@
1
+ require 'test_helper'
2
+
3
+ class ANSEL::IconvTest < Test::Unit::TestCase
4
+ FIXTURE_PATH = File.dirname(__FILE__) + "/../../../fixtures/gedcom"
5
+
6
+ def setup
7
+ @ansel = ANSEL::Iconv.new 'UTF-8'
8
+ end
9
+
10
+ should "return ASCII values without conversion" do
11
+ assert_equal " ", @ansel.iconv("\x20")
12
+ assert_equal "x", @ansel.iconv("\x78")
13
+ end
14
+
15
+ should "return the unicode replacement character for invalid characters" do
16
+ assert_equal "\xEF\xBF\xBD", @ansel.iconv("\xBE\x00")
17
+ assert_equal "\xEF\xBF\xBD", @ansel.iconv("\xD1\x00")
18
+ end
19
+
20
+ should "return UTF-8 characters for valid ANSEL characters" do
21
+ # ANSEL non-combining mappings
22
+ assert_equal "", @ansel.iconv("\x88\x00")
23
+ assert_equal "", @ansel.iconv("\x89\x00")
24
+ assert_equal "", @ansel.iconv("\x8D\x00")
25
+ assert_equal "", @ansel.iconv("\x8E\x00")
26
+ assert_equal "Ł", @ansel.iconv("\xA1\x00")
27
+ assert_equal "Ø", @ansel.iconv("\xA2\x00")
28
+ assert_equal "Đ", @ansel.iconv("\xA3\x00")
29
+ assert_equal "Þ", @ansel.iconv("\xA4\x00")
30
+ assert_equal "Æ", @ansel.iconv("\xA5\x00")
31
+ assert_equal "Œ", @ansel.iconv("\xA6\x00")
32
+ assert_equal "ʹ", @ansel.iconv("\xA7\x00")
33
+ assert_equal "·", @ansel.iconv("\xA8\x00")
34
+ assert_equal "♭", @ansel.iconv("\xA9\x00")
35
+ assert_equal "®", @ansel.iconv("\xAA\x00")
36
+ assert_equal "±", @ansel.iconv("\xAB\x00")
37
+ assert_equal "±", @ansel.iconv("\xAB\x00")
38
+ assert_equal "Ơ", @ansel.iconv("\xAC\x00")
39
+ assert_equal "Ư", @ansel.iconv("\xAD\x00")
40
+ assert_equal "ʼ", @ansel.iconv("\xAE\x00")
41
+ assert_equal "ʻ", @ansel.iconv("\xB0\x00")
42
+ assert_equal "ł", @ansel.iconv("\xB1\x00")
43
+ assert_equal "ø", @ansel.iconv("\xB2\x00")
44
+ assert_equal "đ", @ansel.iconv("\xB3\x00")
45
+ assert_equal "þ", @ansel.iconv("\xB4\x00")
46
+ assert_equal "æ", @ansel.iconv("\xB5\x00")
47
+ assert_equal "œ", @ansel.iconv("\xB6\x00")
48
+ assert_equal "ʺ", @ansel.iconv("\xB7\x00")
49
+ assert_equal "ı", @ansel.iconv("\xB8\x00")
50
+ assert_equal "£", @ansel.iconv("\xB9\x00")
51
+ assert_equal "ð", @ansel.iconv("\xBA\x00")
52
+ assert_equal "ơ", @ansel.iconv("\xBC\x00")
53
+ assert_equal "ư", @ansel.iconv("\xBD\x00")
54
+ assert_equal "°", @ansel.iconv("\xC0\x00")
55
+ assert_equal "ℓ", @ansel.iconv("\xC1\x00")
56
+ assert_equal "℗", @ansel.iconv("\xC2\x00")
57
+ assert_equal "©", @ansel.iconv("\xC3\x00")
58
+ assert_equal "♯", @ansel.iconv("\xC4\x00")
59
+ assert_equal "¿", @ansel.iconv("\xC5\x00")
60
+ assert_equal "¡", @ansel.iconv("\xC6\x00")
61
+ assert_equal "ß", @ansel.iconv("\xC7\x00")
62
+ assert_equal "€", @ansel.iconv("\xC8\x00")
63
+
64
+ # ANSEL combining characters
65
+ assert_equal "Ả", @ansel.iconv("\xE0\x41")
66
+ assert_equal "Ḻ", @ansel.iconv("\xF6\x4C")
67
+ assert_equal "̲", @ansel.iconv("\xF6")
68
+ assert_equal "̮", @ansel.iconv("\xF9")
69
+ assert_equal "Ḫ", @ansel.iconv("\xF9\x48")
70
+ assert_equal "Ậ", @ansel.iconv("\xF2\xE3\x41")
71
+ assert_equal "ỵ", @ansel.iconv("\xF2\x79")
72
+ assert_equal "̣", @ansel.iconv("\xF2")
73
+ end
74
+
75
+ should "convert full text correctly" do
76
+ assert_equal "What is the question?", @ansel.iconv("What is the question?")
77
+ assert_equal "¿What is the question?", @ansel.iconv("\xC5\x00What is the question?")
78
+ assert_equal "© 1994", @ansel.iconv("\xC3\x00 1994")
79
+ assert_equal "£4.59", @ansel.iconv("\xB9\x004.59")
80
+ end
81
+
82
+ should "convert ANSEL to UTF-16" do
83
+ converter = ANSEL::Iconv.new 'UTF-16', 'ANSEL'
84
+ assert_equal "\376\377\000a\000b\000c", converter.iconv('abc')
85
+ end
86
+
87
+ should "convert ASCII to UTF-16" do
88
+ converter = ANSEL::Iconv.new 'UTF-16', 'ASCII'
89
+ assert_equal "\376\377\000a\000b\000c", converter.iconv('abc')
90
+ end
91
+
92
+ should "convert UTF-8 to UTF-16" do
93
+ converter = ANSEL::Iconv.new 'UTF-16', 'UTF-8'
94
+ assert_equal "\376\377\000a\000b\000c", converter.iconv('abc')
95
+ end
96
+
97
+ should "convert UTF-16 to UTF-16" do
98
+ converter = ANSEL::Iconv.new 'UTF-16', 'UTF-16'
99
+ assert_equal "\376\377\000a\000b\000c", converter.iconv("\376\377\000a\000b\000c")
100
+ end
101
+ end
@@ -0,0 +1,5 @@
1
+ $:.unshift(File.dirname(__FILE__) + "/../lib/")
2
+ require 'rubygems'
3
+ require 'test/unit'
4
+ require 'shoulda'
5
+ require 'ansel_iconv'
metadata ADDED
@@ -0,0 +1,70 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ansel_iconv
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Keith Morrison
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-03-25 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: activesupport
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 2.1.0
24
+ version:
25
+ description: Convert ANSEL encoded text to any other encoding available to Iconv
26
+ email: keithm@infused.org
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - README.txt
33
+ files:
34
+ - History.txt
35
+ - README.txt
36
+ - VERSION.yml
37
+ - lib/ansel_iconv.rb
38
+ - test/ansel_iconv_test.rb
39
+ - test/test_helper.rb
40
+ has_rdoc: true
41
+ homepage:
42
+ licenses: []
43
+
44
+ post_install_message:
45
+ rdoc_options:
46
+ - --inline-source
47
+ - --charset=UTF-8
48
+ require_paths:
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: "0"
55
+ version:
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "0"
61
+ version:
62
+ requirements: []
63
+
64
+ rubyforge_project:
65
+ rubygems_version: 1.3.5
66
+ signing_key:
67
+ specification_version: 2
68
+ summary: Convert ANSEL encoded text
69
+ test_files: []
70
+