ansel_iconv 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +5 -0
- data/README.txt +37 -0
- data/VERSION.yml +4 -0
- data/lib/ansel_iconv.rb +631 -0
- data/test/ansel_iconv_test.rb +101 -0
- data/test/test_helper.rb +5 -0
- metadata +70 -0
data/History.txt
ADDED
data/README.txt
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
= ANSEL::Iconv
|
2
|
+
|
3
|
+
http://github.com/infused/ansel_iconv/tree/master
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
Convert ANSEL encoded text to any other encoding available to Iconv
|
8
|
+
|
9
|
+
== INSTALL:
|
10
|
+
|
11
|
+
gem install infused-ansel_iconv --source http://gems.github.com
|
12
|
+
|
13
|
+
|
14
|
+
== LICENSE:
|
15
|
+
|
16
|
+
(The MIT License)
|
17
|
+
|
18
|
+
Copyright (c) 2006-2009 Keith Morrison <keithm@infused.org>
|
19
|
+
|
20
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
21
|
+
a copy of this software and associated documentation files (the
|
22
|
+
'Software'), to deal in the Software without restriction, including
|
23
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
24
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
25
|
+
permit persons to whom the Software is furnished to do so, subject to
|
26
|
+
the following conditions:
|
27
|
+
|
28
|
+
The above copyright notice and this permission notice shall be
|
29
|
+
included in all copies or substantial portions of the Software.
|
30
|
+
|
31
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
32
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
33
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
34
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
35
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
36
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
37
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/VERSION.yml
ADDED
data/lib/ansel_iconv.rb
ADDED
@@ -0,0 +1,631 @@
|
|
1
|
+
require 'activesupport'
|
2
|
+
require 'iconv'
|
3
|
+
|
4
|
+
module ANSEL
|
5
|
+
class Iconv
|
6
|
+
delegate :iconv, :to => :@converter
|
7
|
+
|
8
|
+
def initialize(to, from = 'ANSEL')
|
9
|
+
@converter = (from == 'ANSEL') ? Convert.new(to) : ::Iconv.new(to, from)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class Convert
|
14
|
+
@@non_combining = {
|
15
|
+
"ERR" => "\xFF\xFD", # � - REPLACEMENT CHARACTER
|
16
|
+
"88" => "", # NON-SORT BEGIN / START OF STRING
|
17
|
+
"89" => "", # NON-SORT END / STRING TERMINATOR
|
18
|
+
"8D" => "", # JOINER / ZERO WIDTH JOINER
|
19
|
+
"8E" => "", # NON-JOINER / ZERO WIDTH NON-JOINER
|
20
|
+
"A1" => "\x01\x41", # Ł - UPPERCASE POLISH L / LATIN CAPITAL LETTER L WITH STROKE
|
21
|
+
"A2" => "\x00\xD8", # Ø - UPPERCASE SCANDINAVIAN O / LATIN CAPITAL LETTER O WITH STROKE
|
22
|
+
"A3" => "\x01\x10", # Đ - UPPERCASE D WITH CROSSBAR / LATIN CAPITAL LETTER D WITH STROKE
|
23
|
+
"A4" => "\x00\xDE", # Þ - UPPERCASE ICELANDIC THORN / LATIN CAPITAL LETTER THORN (Icelandic)
|
24
|
+
"A5" => "\x00\xC6", # Æ - UPPERCASE DIGRAPH AE / LATIN CAPITAL LIGATURE AE
|
25
|
+
"A6" => "\x01\x52", # Œ - UPPERCASE DIGRAPH OE / LATIN CAPITAL LIGATURE OE
|
26
|
+
"A7" => "\x02\xB9", # ʹ - SOFT SIGN, PRIME / MODIFIER LETTER PRIME
|
27
|
+
"A8" => "\x00\xB7", # · - MIDDLE DOT
|
28
|
+
"A9" => "\x26\x6D", # ♭ - MUSIC FLAT SIGN
|
29
|
+
"AA" => "\x00\xAE", # ® - PATENT MARK / REGISTERED SIGN
|
30
|
+
"AB" => "\x00\xB1", # ± - PLUS OR MINUS / PLUS-MINUS SIGN
|
31
|
+
"AC" => "\x01\xA0", # Ơ - UPPERCASE O-HOOK / LATIN CAPITAL LETTER O WITH HORN
|
32
|
+
"AD" => "\x01\xAF", # Ư - UPPERCASE U-HOOK / LATIN CAPITAL LETTER U WITH HORN
|
33
|
+
"AE" => "\x02\xBC", # ʼ - ALIF / MODIFIER LETTER APOSTROPHE
|
34
|
+
"B0" => "\x02\xBB", # ʻ - AYN / MODIFIER LETTER TURNED COMMA
|
35
|
+
"B1" => "\x01\x42", # ł - LOWERCASE POLISH L / LATIN SMALL LETTER L WITH STROKE
|
36
|
+
"B2" => "\x00\xF8", # ø - LOWERCASE SCANDINAVIAN O / LATIN SMALL LETTER O WITH STROKE
|
37
|
+
"B3" => "\x01\x11", # đ - LOWERCASE D WITH CROSSBAR / LATIN SMALL LETTER D WITH STROKE
|
38
|
+
"B4" => "\x00\xFE", # þ - LOWERCASE ICELANDIC THORN / LATIN SMALL LETTER THORN (Icelandic)
|
39
|
+
"B5" => "\x00\xE6", # æ - LOWERCASE DIGRAPH AE / LATIN SMALL LIGATURE AE
|
40
|
+
"B6" => "\x01\x53", # œ - LOWERCASE DIGRAPH OE / LATIN SMALL LIGATURE OE
|
41
|
+
"B7" => "\x02\xBA", # ʺ - HARD SIGN, DOUBLE PRIME / MODIFIER LETTER DOUBLE PRIME
|
42
|
+
"B8" => "\x01\x31", # ı - LOWERCASE TURKISH I / LATIN SMALL LETTER DOTLESS I
|
43
|
+
"B9" => "\x00\xA3", # £ - BRITISH POUND / POUND SIGN
|
44
|
+
"BA" => "\x00\xF0", # ð - LOWERCASE ETH / LATIN SMALL LETTER ETH (Icelandic)
|
45
|
+
"BC" => "\x01\xA1", # ơ - LOWERCASE O-HOOK / LATIN SMALL LETTER O WITH HORN
|
46
|
+
"BD" => "\x01\xB0", # ư - LOWERCASE U-HOOK / LATIN SMALL LETTER U WITH HORN
|
47
|
+
"C0" => "\x00\xB0", # ° - DEGREE SIGN
|
48
|
+
"C1" => "\x21\x13", # ℓ - SCRIPT SMALL L
|
49
|
+
"C2" => "\x21\x17", # ℗ - SOUND RECORDING COPYRIGHT
|
50
|
+
"C3" => "\x00\xA9", # © - COPYRIGHT SIGN
|
51
|
+
"C4" => "\x26\x6F", # ♯ - MUSIC SHARP SIGN
|
52
|
+
"C5" => "\x00\xBF", # ¿ - INVERTED QUESTION MARK
|
53
|
+
"C6" => "\x00\xA1", # ¡ - INVERTED EXCLAMATION MARK
|
54
|
+
"C7" => "\x00\xDF", # ß - ESZETT SYMBOL
|
55
|
+
"C8" => "\x20\xAC" # € - EURO SIGN
|
56
|
+
}
|
57
|
+
|
58
|
+
@@combining = {
|
59
|
+
"E0+41" => "\x1E\xA2", # Ả - LATIN CAPITAL LETTER A WITH HOOK ABOVE
|
60
|
+
"E0+45" => "\x1E\xBA", # LATIN CAPITAL LETTER E WITH HOOK ABOVE
|
61
|
+
"E0+49" => "\x1E\xC8", # LATIN CAPITAL LETTER I WITH HOOK ABOVE
|
62
|
+
"E0+4F" => "\x1E\xCE", # LATIN CAPITAL LETTER O WITH HOOK ABOVE
|
63
|
+
"E0+55" => "\x1E\xE6", # LATIN CAPITAL LETTER U WITH HOOK ABOVE
|
64
|
+
"E0+59" => "\x1E\xF6", # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
|
65
|
+
"E0+61" => "\x1E\xA3", # LATIN SMALL LETTER A WITH HOOK ABOVE
|
66
|
+
"E0+65" => "\x1E\xBB", # LATIN SMALL LETTER E WITH HOOK ABOVE
|
67
|
+
"E0+69" => "\x1E\xC9", # LATIN SMALL LETTER I WITH HOOK ABOVE
|
68
|
+
"E0+6F" => "\x1E\xCF", # LATIN SMALL LETTER O WITH HOOK ABOVE
|
69
|
+
"E0+75" => "\x1E\xE7", # LATIN SMALL LETTER U WITH HOOK ABOVE
|
70
|
+
"E0+79" => "\x1E\xF7", # LATIN SMALL LETTER Y WITH HOOK ABOVE
|
71
|
+
"E0+E3+41" => "\x1E\xA8", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
|
72
|
+
"E0+E3+45" => "\x1E\xC2", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
|
73
|
+
"E0+E3+4F" => "\x1E\xD4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
|
74
|
+
"E0+E3+61" => "\x1E\xA9", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
|
75
|
+
"E0+E3+65" => "\x1E\xC3", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
|
76
|
+
"E0+E3+6F" => "\x1E\xD5", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
|
77
|
+
"E0+E6+41" => "\x1E\xB2", # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
|
78
|
+
"E0+E6+61" => "\x1E\xB3", # LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
|
79
|
+
"E0" => "\x03\x09", # COMBINING HOOK ABOVE
|
80
|
+
"E1+41" => "\x00\xC0", # LATIN CAPITAL LETTER A WITH GRAVE
|
81
|
+
"E1+45" => "\x00\xC8", # LATIN CAPITAL LETTER E WITH GRAVE
|
82
|
+
"E1+49" => "\x00\xCC", # LATIN CAPITAL LETTER I WITH GRAVE
|
83
|
+
"E1+4F" => "\x00\xD2", # LATIN CAPITAL LETTER O WITH GRAVE
|
84
|
+
"E1+55" => "\x00\xD9", # LATIN CAPITAL LETTER U WITH GRAVE
|
85
|
+
"E1+57" => "\x1E\x80", # LATIN CAPITAL LETTER W WITH GRAVE
|
86
|
+
"E1+59" => "\x1E\xF2", # LATIN CAPITAL LETTER Y WITH GRAVE
|
87
|
+
"E1+61" => "\x00\xE0", # LATIN SMALL LETTER A WITH GRAVE
|
88
|
+
"E1+65" => "\x00\xE8", # LATIN SMALL LETTER E WITH GRAVE
|
89
|
+
"E1+69" => "\x00\xEC", # LATIN SMALL LETTER I WITH GRAVE
|
90
|
+
"E1+6F" => "\x00\xF2", # LATIN SMALL LETTER O WITH GRAVE
|
91
|
+
"E1+75" => "\x00\xF9", # LATIN SMALL LETTER U WITH GRAVE
|
92
|
+
"E1+77" => "\x1E\x81", # LATIN SMALL LETTER W WITH GRAVE
|
93
|
+
"E1+79" => "\x1E\xF3", # LATIN SMALL LETTER Y WITH GRAVE
|
94
|
+
"E1+E3+41" => "\x1E\xA6", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
|
95
|
+
"E1+E3+45" => "\x1E\xC0", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
|
96
|
+
"E1+E3+4F" => "\x1E\xD2", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
|
97
|
+
"E1+E3+61" => "\x1E\xA7", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
|
98
|
+
"E1+E3+65" => "\x1E\xC1", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
|
99
|
+
"E1+E3+6F" => "\x1E\xD3", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
|
100
|
+
"E1+E5+45" => "\x1E\x14", # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
|
101
|
+
"E1+E5+4F" => "\x1E\x50", # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
|
102
|
+
"E1+E5+65" => "\x1E\x15", # LATIN SMALL LETTER E WITH MACRON AND GRAVE
|
103
|
+
"E1+E5+6F" => "\x1E\x51", # LATIN SMALL LETTER O WITH MACRON AND GRAVE
|
104
|
+
"E1+E6+41" => "\x1E\xB0", # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
|
105
|
+
"E1+E6+61" => "\x1E\xB1", # LATIN SMALL LETTER A WITH BREVE AND GRAVE
|
106
|
+
"E1+E8+55" => "\x01\xDB", # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
|
107
|
+
"E1+E8+75" => "\x01\xDC", # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
|
108
|
+
"E1" => "\x03\x00", # COMBINING GRAVE ACCENT
|
109
|
+
"E2+41" => "\x00\xC1", # LATIN CAPITAL LETTER A WITH ACUTE
|
110
|
+
"E2+43" => "\x01\x06", # LATIN CAPITAL LETTER C WITH ACUTE
|
111
|
+
"E2+45" => "\x00\xC9", # LATIN CAPITAL LETTER E WITH ACUTE
|
112
|
+
"E2+47" => "\x01\xF4", # LATIN CAPITAL LETTER G WITH ACUTE
|
113
|
+
"E2+49" => "\x00\xCD", # LATIN CAPITAL LETTER I WITH ACUTE
|
114
|
+
"E2+4B" => "\x1E\x30", # LATIN CAPITAL LETTER K WITH ACUTE
|
115
|
+
"E2+4C" => "\x01\x39", # LATIN CAPITAL LETTER L WITH ACUTE
|
116
|
+
"E2+4D" => "\x1E\x3E", # LATIN CAPITAL LETTER M WITH ACUTE
|
117
|
+
"E2+4E" => "\x01\x43", # LATIN CAPITAL LETTER N WITH ACUTE
|
118
|
+
"E2+4F" => "\x00\xD3", # LATIN CAPITAL LETTER O WITH ACUTE
|
119
|
+
"E2+50" => "\x1E\x54", # LATIN CAPITAL LETTER P WITH ACUTE
|
120
|
+
"E2+52" => "\x01\x54", # LATIN CAPITAL LETTER R WITH ACUTE
|
121
|
+
"E2+53" => "\x01\x5A", # LATIN CAPITAL LETTER S WITH ACUTE
|
122
|
+
"E2+55" => "\x00\xDA", # LATIN CAPITAL LETTER U WITH ACUTE
|
123
|
+
"E2+57" => "\x1E\x82", # LATIN CAPITAL LETTER W WITH ACUTE
|
124
|
+
"E2+59" => "\x00\xDD", # LATIN CAPITAL LETTER Y WITH ACUTE
|
125
|
+
"E2+5A" => "\x01\x79", # LATIN CAPITAL LETTER Z WITH ACUTE
|
126
|
+
"E2+61" => "\x00\xE1", # LATIN SMALL LETTER A WITH ACUTE
|
127
|
+
"E2+63" => "\x01\x07", # LATIN SMALL LETTER C WITH ACUTE
|
128
|
+
"E2+65" => "\x00\xE9", # LATIN SMALL LETTER E WITH ACUTE
|
129
|
+
"E2+67" => "\x01\xF5", # LATIN SMALL LETTER G WITH ACUTE
|
130
|
+
"E2+69" => "\x00\xED", # LATIN SMALL LETTER I WITH ACUTE
|
131
|
+
"E2+6B" => "\x1E\x31", # LATIN SMALL LETTER K WITH ACUTE
|
132
|
+
"E2+6C" => "\x01\x3A", # LATIN SMALL LETTER L WITH ACUTE
|
133
|
+
"E2+6D" => "\x1E\x3F", # LATIN SMALL LETTER M WITH ACUTE
|
134
|
+
"E2+6E" => "\x01\x44", # LATIN SMALL LETTER N WITH ACUTE
|
135
|
+
"E2+6F" => "\x00\xF3", # LATIN SMALL LETTER O WITH ACUTE
|
136
|
+
"E2+70" => "\x1E\x55", # LATIN SMALL LETTER P WITH ACUTE
|
137
|
+
"E2+72" => "\x01\x55", # LATIN SMALL LETTER R WITH ACUTE
|
138
|
+
"E2+73" => "\x01\x5B", # LATIN SMALL LETTER S WITH ACUTE
|
139
|
+
"E2+75" => "\x00\xFA", # LATIN SMALL LETTER U WITH ACUTE
|
140
|
+
"E2+77" => "\x1E\x83", # LATIN SMALL LETTER W WITH ACUTE
|
141
|
+
"E2+79" => "\x00\xFD", # LATIN SMALL LETTER Y WITH ACUTE
|
142
|
+
"E2+7A" => "\x01\x7A", # LATIN SMALL LETTER Z WITH ACUTE
|
143
|
+
"E2+A5" => "\x01\xFC", # LATIN CAPITAL LETTER AE WITH ACUTE
|
144
|
+
"E2+B5" => "\x01\xFD", # LATIN SMALL LETTER AE WITH ACUTE
|
145
|
+
"E2+E3+41" => "\x1E\xA4", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
|
146
|
+
"E2+E3+45" => "\x1E\xBE", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
|
147
|
+
"E2+E3+4F" => "\x1E\xD0", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
|
148
|
+
"E2+E3+61" => "\x1E\xA5", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
|
149
|
+
"E2+E3+65" => "\x1E\xBF", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
|
150
|
+
"E2+E3+6F" => "\x1E\xD1", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
|
151
|
+
"E2+E4+4F" => "\x1E\x4C", # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
|
152
|
+
"E2+E4+55" => "\x1E\x78", # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
|
153
|
+
"E2+E4+6F" => "\x1E\x4D", # LATIN SMALL LETTER O WITH TILDE AND ACUTE
|
154
|
+
"E2+E4+75" => "\x1E\x79", # LATIN SMALL LETTER U WITH TILDE AND ACUTE
|
155
|
+
"E2+E5+45" => "\x1E\x16", # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
|
156
|
+
"E2+E5+4F" => "\x1E\x52", # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
|
157
|
+
"E2+E5+65" => "\x1E\x17", # LATIN SMALL LETTER E WITH MACRON AND ACUTE
|
158
|
+
"E2+E5+6F" => "\x1E\x53", # LATIN SMALL LETTER O WITH MACRON AND ACUTE
|
159
|
+
"E2+E6+41" => "\x1E\xAE", # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
|
160
|
+
"E2+E6+61" => "\x1E\xAF", # LATIN SMALL LETTER A WITH BREVE AND ACUTE
|
161
|
+
"E2+E7+53" => "\x1E\x64", # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
|
162
|
+
"E2+E7+73" => "\x1E\x65", # LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE
|
163
|
+
"E2+E8+49" => "\x1E\x2E", # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
|
164
|
+
"E2+E8+55" => "\x01\xD7", # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
|
165
|
+
"E2+E8+69" => "\x1E\x2F", # LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
|
166
|
+
"E2+E8+75" => "\x01\xD8", # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
|
167
|
+
"E2+EA+41" => "\x01\xFA", # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
|
168
|
+
"E2+EA+61" => "\x01\xFB", # LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE
|
169
|
+
"E2+F0+43" => "\x1E\x08", # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
|
170
|
+
"E2+F0+63" => "\x1E\x09", # LATIN SMALL LETTER C WITH CEDILLA AND ACUTE
|
171
|
+
"E2" => "\x03\x01", # COMBINING ACUTE ACCENT
|
172
|
+
"E3+41" => "\x00\xC2", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
173
|
+
"E3+43" => "\x01\x08", # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
|
174
|
+
"E3+45" => "\x00\xCA", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
175
|
+
"E3+47" => "\x01\x1C", # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
|
176
|
+
"E3+48" => "\x01\x24", # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
|
177
|
+
"E3+49" => "\x00\xCE", # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
178
|
+
"E3+4A" => "\x01\x34", # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
|
179
|
+
"E3+4F" => "\x00\xD4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
180
|
+
"E3+53" => "\x01\x5C", # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
|
181
|
+
"E3+55" => "\x00\xDB", # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
182
|
+
"E3+57" => "\x01\x74", # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
|
183
|
+
"E3+59" => "\x01\x76", # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
|
184
|
+
"E3+5A" => "\x1E\x90", # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
|
185
|
+
"E3+61" => "\x00\xE2", # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
186
|
+
"E3+63" => "\x01\x09", # LATIN SMALL LETTER C WITH CIRCUMFLEX
|
187
|
+
"E3+65" => "\x00\xEA", # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
188
|
+
"E3+67" => "\x01\x1D", # LATIN SMALL LETTER G WITH CIRCUMFLEX
|
189
|
+
"E3+68" => "\x01\x25", # LATIN SMALL LETTER H WITH CIRCUMFLEX
|
190
|
+
"E3+69" => "\x00\xEE", # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
191
|
+
"E3+6A" => "\x01\x35", # LATIN SMALL LETTER J WITH CIRCUMFLEX
|
192
|
+
"E3+6F" => "\x00\xF4", # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
193
|
+
"E3+73" => "\x01\x5D", # LATIN SMALL LETTER S WITH CIRCUMFLEX
|
194
|
+
"E3+75" => "\x00\xFB", # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
195
|
+
"E3+77" => "\x01\x75", # LATIN SMALL LETTER W WITH CIRCUMFLEX
|
196
|
+
"E3+79" => "\x01\x77", # LATIN SMALL LETTER Y WITH CIRCUMFLEX
|
197
|
+
"E3+7A" => "\x1E\x91", # LATIN SMALL LETTER Z WITH CIRCUMFLEX
|
198
|
+
"E3+E0+41" => "\x1E\xA8", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
|
199
|
+
"E3+E0+45" => "\x1E\xC2", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
|
200
|
+
"E3+E0+4F" => "\x1E\xD4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
|
201
|
+
"E3+E0+61" => "\x1E\xA9", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
|
202
|
+
"E3+E0+65" => "\x1E\xC3", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
|
203
|
+
"E3+E0+6F" => "\x1E\xD5", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
|
204
|
+
"E3+E1+41" => "\x1E\xA6", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
|
205
|
+
"E3+E1+45" => "\x1E\xC0", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
|
206
|
+
"E3+E1+4F" => "\x1E\xD2", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
|
207
|
+
"E3+E1+61" => "\x1E\xA7", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
|
208
|
+
"E3+E1+65" => "\x1E\xC1", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
|
209
|
+
"E3+E1+6F" => "\x1E\xD3", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
|
210
|
+
"E3+E2+41" => "\x1E\xA4", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
|
211
|
+
"E3+E2+45" => "\x1E\xBE", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
|
212
|
+
"E3+E2+4F" => "\x1E\xD0", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
|
213
|
+
"E3+E2+61" => "\x1E\xA5", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
|
214
|
+
"E3+E2+65" => "\x1E\xBF", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
|
215
|
+
"E3+E2+6F" => "\x1E\xD1", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
|
216
|
+
"E3+E4+41" => "\x1E\xAA", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
|
217
|
+
"E3+E4+45" => "\x1E\xC4", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
|
218
|
+
"E3+E4+4F" => "\x1E\xD6", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
|
219
|
+
"E3+E4+61" => "\x1E\xAB", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
|
220
|
+
"E3+E4+65" => "\x1E\xC5", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
|
221
|
+
"E3+E4+6F" => "\x1E\xD7", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
|
222
|
+
"E3+F2+41" => "\x1E\xAC", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
|
223
|
+
"E3+F2+45" => "\x1E\xC6", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
|
224
|
+
"E3+F2+4F" => "\x1E\xD8", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
|
225
|
+
"E3+F2+61" => "\x1E\xAD", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
|
226
|
+
"E3+F2+65" => "\x1E\xC7", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
|
227
|
+
"E3+F2+6F" => "\x1E\xD9", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
|
228
|
+
"E3" => "\x03\x02", # COMBINING CIRCUMFLEX ACCENT
|
229
|
+
"E4+41" => "\x00\xC3", # LATIN CAPITAL LETTER A WITH TILDE
|
230
|
+
"E4+45" => "\x1E\xBC", # LATIN CAPITAL LETTER E WITH TILDE
|
231
|
+
"E4+49" => "\x01\x28", # LATIN CAPITAL LETTER I WITH TILDE
|
232
|
+
"E4+4E" => "\x00\xD1", # LATIN CAPITAL LETTER N WITH TILDE
|
233
|
+
"E4+4F" => "\x00\xD5", # LATIN CAPITAL LETTER O WITH TILDE
|
234
|
+
"E4+55" => "\x01\x68", # LATIN CAPITAL LETTER U WITH TILDE
|
235
|
+
"E4+56" => "\x1E\x7C", # LATIN CAPITAL LETTER V WITH TILDE
|
236
|
+
"E4+59" => "\x1E\xF8", # LATIN CAPITAL LETTER Y WITH TILDE
|
237
|
+
"E4+61" => "\x00\xE3", # LATIN SMALL LETTER A WITH TILDE
|
238
|
+
"E4+65" => "\x1E\xBD", # LATIN SMALL LETTER E WITH TILDE
|
239
|
+
"E4+69" => "\x01\x29", # LATIN SMALL LETTER I WITH TILDE
|
240
|
+
"E4+6E" => "\x00\xF1", # LATIN SMALL LETTER N WITH TILDE
|
241
|
+
"E4+6F" => "\x00\xF5", # LATIN SMALL LETTER O WITH TILDE
|
242
|
+
"E4+75" => "\x01\x69", # LATIN SMALL LETTER U WITH TILDE
|
243
|
+
"E4+76" => "\x1E\x7D", # LATIN SMALL LETTER V WITH TILDE
|
244
|
+
"E4+79" => "\x1E\xF9", # LATIN SMALL LETTER Y WITH TILDE
|
245
|
+
"E4+E2+4F" => "\x1E\x4C", # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
|
246
|
+
"E4+E2+55" => "\x1E\x78", # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
|
247
|
+
"E4+E2+6F" => "\x1E\x4D", # LATIN SMALL LETTER O WITH TILDE AND ACUTE
|
248
|
+
"E4+E2+75" => "\x1E\x79", # LATIN SMALL LETTER U WITH TILDE AND ACUTE
|
249
|
+
"E4+E3+41" => "\x1E\xAA", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
|
250
|
+
"E4+E3+45" => "\x1E\xC4", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
|
251
|
+
"E4+E3+4F" => "\x1E\xD6", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
|
252
|
+
"E4+E3+61" => "\x1E\xAB", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
|
253
|
+
"E4+E3+65" => "\x1E\xC5", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
|
254
|
+
"E4+E3+6F" => "\x1E\xD7", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
|
255
|
+
"E4+E6+41" => "\x1E\xB4", # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
|
256
|
+
"E4+E6+61" => "\x1E\xB5", # LATIN SMALL LETTER A WITH BREVE AND TILDE
|
257
|
+
"E4+E8+4F" => "\x1E\x4E", # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
|
258
|
+
"E4+E8+6F" => "\x1E\x4F", # LATIN SMALL LETTER O WITH TILDE AND DIAERESIS
|
259
|
+
"E4" => "\x03\x03", # COMBINING TILDE
|
260
|
+
"E5+41" => "\x01\x00", # LATIN CAPITAL LETTER A WITH MACRON
|
261
|
+
"E5+45" => "\x01\x12", # LATIN CAPITAL LETTER E WITH MACRON
|
262
|
+
"E5+47" => "\x1E\x20", # LATIN CAPITAL LETTER G WITH MACRON
|
263
|
+
"E5+49" => "\x01\x2A", # LATIN CAPITAL LETTER I WITH MACRON
|
264
|
+
"E5+4F" => "\x01\x4C", # LATIN CAPITAL LETTER O WITH MACRON
|
265
|
+
"E5+55" => "\x01\x6A", # LATIN CAPITAL LETTER U WITH MACRON
|
266
|
+
"E5+61" => "\x01\x01", # LATIN SMALL LETTER A WITH MACRON
|
267
|
+
"E5+65" => "\x01\x13", # LATIN SMALL LETTER E WITH MACRON
|
268
|
+
"E5+67" => "\x1E\x21", # LATIN SMALL LETTER G WITH MACRON
|
269
|
+
"E5+69" => "\x01\x2B", # LATIN SMALL LETTER I WITH MACRON
|
270
|
+
"E5+6F" => "\x01\x4D", # LATIN SMALL LETTER O WITH MACRON
|
271
|
+
"E5+75" => "\x01\x6B", # LATIN SMALL LETTER U WITH MACRON
|
272
|
+
"E5+A5" => "\x01\xE2", # LATIN CAPITAL LETTER AE WITH MACRON
|
273
|
+
"E5+B5" => "\x01\xE3", # LATIN SMALL LETTER AE WITH MACRON
|
274
|
+
"E5+E1+45" => "\x1E\x14", # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
|
275
|
+
"E5+E1+4F" => "\x1E\x50", # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
|
276
|
+
"E5+E1+65" => "\x1E\x15", # LATIN SMALL LETTER E WITH MACRON AND GRAVE
|
277
|
+
"E5+E1+6F" => "\x1E\x51", # LATIN SMALL LETTER O WITH MACRON AND GRAVE
|
278
|
+
"E5+E2+45" => "\x1E\x16", # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
|
279
|
+
"E5+E2+4F" => "\x1E\x52", # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
|
280
|
+
"E5+E2+65" => "\x1E\x17", # LATIN SMALL LETTER E WITH MACRON AND ACUTE
|
281
|
+
"E5+E2+6F" => "\x1E\x53", # LATIN SMALL LETTER O WITH MACRON AND ACUTE
|
282
|
+
"E5+E7+41" => "\x01\xE0", # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
|
283
|
+
"E5+E7+61" => "\x01\xE1", # LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
|
284
|
+
"E5+E8+41" => "\x01\xDE", # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
|
285
|
+
"E5+E8+55" => "\x1E\x7A", # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
286
|
+
"E5+E8+61" => "\x01\xDF", # LATIN SMALL LETTER A WITH DIAERESIS AND MACRON
|
287
|
+
"E5+E8+75" => "\x1E\x7B", # LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
|
288
|
+
"E5+F1+4F" => "\x01\xEC", # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
|
289
|
+
"E5+F1+6F" => "\x01\xED", # LATIN SMALL LETTER O WITH OGONEK AND MACRON
|
290
|
+
"E5+F2+4C" => "\x1E\x38", # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
|
291
|
+
"E5+F2+52" => "\x1E\x5C", # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
|
292
|
+
"E5+F2+6C" => "\x1E\x39", # LATIN SMALL LETTER L WITH DOT BELOW AND MACRON
|
293
|
+
"E5+F2+72" => "\x1E\x5D", # LATIN SMALL LETTER R WITH DOT BELOW AND MACRON
|
294
|
+
"E5" => "\x03\x04", # COMBINING MACRON
|
295
|
+
"E6+41" => "\x01\x02", # LATIN CAPITAL LETTER A WITH BREVE
|
296
|
+
"E6+45" => "\x01\x14", # LATIN CAPITAL LETTER E WITH BREVE
|
297
|
+
"E6+47" => "\x01\x1E", # LATIN CAPITAL LETTER G WITH BREVE
|
298
|
+
"E6+49" => "\x01\x2C", # LATIN CAPITAL LETTER I WITH BREVE
|
299
|
+
"E6+4F" => "\x01\x4E", # LATIN CAPITAL LETTER O WITH BREVE
|
300
|
+
"E6+55" => "\x01\x6C", # LATIN CAPITAL LETTER U WITH BREVE
|
301
|
+
"E6+61" => "\x01\x03", # LATIN SMALL LETTER A WITH BREVE
|
302
|
+
"E6+65" => "\x01\x15", # LATIN SMALL LETTER E WITH BREVE
|
303
|
+
"E6+67" => "\x01\x1F", # LATIN SMALL LETTER G WITH BREVE
|
304
|
+
"E6+69" => "\x01\x2D", # LATIN SMALL LETTER I WITH BREVE
|
305
|
+
"E6+6F" => "\x01\x4F", # LATIN SMALL LETTER O WITH BREVE
|
306
|
+
"E6+75" => "\x01\x6D", # LATIN SMALL LETTER U WITH BREVE
|
307
|
+
"E6+E0+41" => "\x1E\xB2", # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
|
308
|
+
"E6+E0+61" => "\x1E\xB3", # LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
|
309
|
+
"E6+E1+41" => "\x1E\xB0", # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
|
310
|
+
"E6+E1+61" => "\x1E\xB1", # LATIN SMALL LETTER A WITH BREVE AND GRAVE
|
311
|
+
"E6+E2+41" => "\x1E\xAE", # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
|
312
|
+
"E6+E2+61" => "\x1E\xAF", # LATIN SMALL LETTER A WITH BREVE AND ACUTE
|
313
|
+
"E6+E4+41" => "\x1E\xB4", # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
|
314
|
+
"E6+E4+61" => "\x1E\xB5", # LATIN SMALL LETTER A WITH BREVE AND TILDE
|
315
|
+
"E6+F0+45" => "\x1E\x1C", # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
|
316
|
+
"E6+F0+65" => "\x1E\x1D", # LATIN SMALL LETTER E WITH CEDILLA AND BREVE
|
317
|
+
"E6+F2+41" => "\x1E\xB6", # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
|
318
|
+
"E6+F2+61" => "\x1E\xB7", # LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
|
319
|
+
"E6" => "\x03\x06", # COMBINING BREVE
|
320
|
+
"E7+42" => "\x1E\x02", # LATIN CAPITAL LETTER B WITH DOT ABOVE
|
321
|
+
"E7+43" => "\x01\x0A", # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
322
|
+
"E7+44" => "\x1E\x0A", # LATIN CAPITAL LETTER D WITH DOT ABOVE
|
323
|
+
"E7+45" => "\x01\x16", # LATIN CAPITAL LETTER E WITH DOT ABOVE
|
324
|
+
"E7+46" => "\x1E\x1E", # LATIN CAPITAL LETTER F WITH DOT ABOVE
|
325
|
+
"E7+47" => "\x01\x20", # LATIN CAPITAL LETTER G WITH DOT ABOVE
|
326
|
+
"E7+48" => "\x1E\x22", # LATIN CAPITAL LETTER H WITH DOT ABOVE
|
327
|
+
"E7+49" => "\x01\x30", # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
328
|
+
"E7+4D" => "\x1E\x40", # LATIN CAPITAL LETTER M WITH DOT ABOVE
|
329
|
+
"E7+4E" => "\x1E\x44", # LATIN CAPITAL LETTER N WITH DOT ABOVE
|
330
|
+
"E7+50" => "\x1E\x56", # LATIN CAPITAL LETTER P WITH DOT ABOVE
|
331
|
+
"E7+52" => "\x1E\x58", # LATIN CAPITAL LETTER R WITH DOT ABOVE
|
332
|
+
"E7+53" => "\x1E\x60", # LATIN CAPITAL LETTER S WITH DOT ABOVE
|
333
|
+
"E7+54" => "\x1E\x6A", # LATIN CAPITAL LETTER T WITH DOT ABOVE
|
334
|
+
"E7+57" => "\x1E\x86", # LATIN CAPITAL LETTER W WITH DOT ABOVE
|
335
|
+
"E7+58" => "\x1E\x8A", # LATIN CAPITAL LETTER X WITH DOT ABOVE
|
336
|
+
"E7+59" => "\x1E\x8E", # LATIN CAPITAL LETTER Y WITH DOT ABOVE
|
337
|
+
"E7+5A" => "\x01\x7B", # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
338
|
+
"E7+62" => "\x1E\x03", # LATIN SMALL LETTER B WITH DOT ABOVE
|
339
|
+
"E7+63" => "\x01\x0B", # LATIN SMALL LETTER C WITH DOT ABOVE
|
340
|
+
"E7+64" => "\x1E\x0B", # LATIN SMALL LETTER D WITH DOT ABOVE
|
341
|
+
"E7+65" => "\x01\x17", # LATIN SMALL LETTER E WITH DOT ABOVE
|
342
|
+
"E7+66" => "\x1E\x1F", # LATIN SMALL LETTER F WITH DOT ABOVE
|
343
|
+
"E7+67" => "\x01\x21", # LATIN SMALL LETTER G WITH DOT ABOVE
|
344
|
+
"E7+68" => "\x1E\x23", # LATIN SMALL LETTER H WITH DOT ABOVE
|
345
|
+
"E7+6D" => "\x1E\x41", # LATIN SMALL LETTER M WITH DOT ABOVE
|
346
|
+
"E7+6E" => "\x1E\x45", # LATIN SMALL LETTER N WITH DOT ABOVE
|
347
|
+
"E7+70" => "\x1E\x57", # LATIN SMALL LETTER P WITH DOT ABOVE
|
348
|
+
"E7+72" => "\x1E\x59", # LATIN SMALL LETTER R WITH DOT ABOVE
|
349
|
+
"E7+73" => "\x1E\x61", # LATIN SMALL LETTER S WITH DOT ABOVE
|
350
|
+
"E7+74" => "\x1E\x6B", # LATIN SMALL LETTER T WITH DOT ABOVE
|
351
|
+
"E7+77" => "\x1E\x87", # LATIN SMALL LETTER W WITH DOT ABOVE
|
352
|
+
"E7+78" => "\x1E\x8B", # LATIN SMALL LETTER X WITH DOT ABOVE
|
353
|
+
"E7+79" => "\x1E\x8F", # LATIN SMALL LETTER Y WITH DOT ABOVE
|
354
|
+
"E7+7A" => "\x01\x7C", # LATIN SMALL LETTER Z WITH DOT ABOVE
|
355
|
+
"E7+E2+53" => "\x1E\x64", # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
|
356
|
+
"E7+E2+73" => "\x1E\x65", # LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE
|
357
|
+
"E7+E5+41" => "\x01\xE0", # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
|
358
|
+
"E7+E5+61" => "\x01\xE1", # LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
|
359
|
+
"E7+E9+53" => "\x1E\x66", # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
|
360
|
+
"E7+E9+73" => "\x1E\x67", # LATIN SMALL LETTER S WITH CARON AND DOT ABOVE
|
361
|
+
"E7+F2+53" => "\x1E\x68", # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
|
362
|
+
"E7+F2+73" => "\x1E\x69", # LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
|
363
|
+
"E7" => "\x03\x07", # COMBINING DOT ABOVE
|
364
|
+
"E8+41" => "\x00\xC4", # LATIN CAPITAL LETTER A WITH DIAERESIS
|
365
|
+
"E8+45" => "\x00\xCB", # LATIN CAPITAL LETTER E WITH DIAERESIS
|
366
|
+
"E8+48" => "\x1E\x26", # LATIN CAPITAL LETTER H WITH DIAERESIS
|
367
|
+
"E8+49" => "\x00\xCF", # LATIN CAPITAL LETTER I WITH DIAERESIS
|
368
|
+
"E8+4F" => "\x00\xD6", # LATIN CAPITAL LETTER O WITH DIAERESIS
|
369
|
+
"E8+55" => "\x00\xDC", # LATIN CAPITAL LETTER U WITH DIAERESIS
|
370
|
+
"E8+57" => "\x1E\x84", # LATIN CAPITAL LETTER W WITH DIAERESIS
|
371
|
+
"E8+58" => "\x1E\x8C", # LATIN CAPITAL LETTER X WITH DIAERESIS
|
372
|
+
"E8+59" => "\x01\x78", # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
373
|
+
"E8+61" => "\x00\xE4", # LATIN SMALL LETTER A WITH DIAERESIS
|
374
|
+
"E8+65" => "\x00\xEB", # LATIN SMALL LETTER E WITH DIAERESIS
|
375
|
+
"E8+68" => "\x1E\x27", # LATIN SMALL LETTER H WITH DIAERESIS
|
376
|
+
"E8+69" => "\x00\xEF", # LATIN SMALL LETTER I WITH DIAERESIS
|
377
|
+
"E8+6F" => "\x00\xF6", # LATIN SMALL LETTER O WITH DIAERESIS
|
378
|
+
"E8+74" => "\x1E\x97", # LATIN SMALL LETTER T WITH DIAERESIS
|
379
|
+
"E8+75" => "\x00\xFC", # LATIN SMALL LETTER U WITH DIAERESIS
|
380
|
+
"E8+77" => "\x1E\x85", # LATIN SMALL LETTER W WITH DIAERESIS
|
381
|
+
"E8+78" => "\x1E\x8D", # LATIN SMALL LETTER X WITH DIAERESIS
|
382
|
+
"E8+79" => "\x00\xFF", # LATIN SMALL LETTER Y WITH DIAERESIS
|
383
|
+
"E8+E1+55" => "\x01\xDB", # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
|
384
|
+
"E8+E1+75" => "\x01\xDC", # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
|
385
|
+
"E8+E2+49" => "\x1E\x2E", # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
|
386
|
+
"E8+E2+55" => "\x01\xD7", # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
|
387
|
+
"E8+E2+69" => "\x1E\x2F", # LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
|
388
|
+
"E8+E2+75" => "\x01\xD8", # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
|
389
|
+
"E8+E4+4F" => "\x1E\x4E", # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
|
390
|
+
"E8+E4+6F" => "\x1E\x4F", # LATIN SMALL LETTER O WITH TILDE AND DIAERESIS
|
391
|
+
"E8+E5+41" => "\x01\xDE", # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
|
392
|
+
"E8+E5+55" => "\x1E\x7A", # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
393
|
+
"E8+E5+61" => "\x01\xDF", # LATIN SMALL LETTER A WITH DIAERESIS AND MACRON
|
394
|
+
"E8+E5+75" => "\x1E\x7B", # LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
|
395
|
+
"E8+E9+55" => "\x01\xD9", # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
|
396
|
+
"E8+E9+75" => "\x01\xDA", # LATIN SMALL LETTER U WITH DIAERESIS AND CARON
|
397
|
+
"E8" => "\x03\x08", # COMBINING DIAERESIS
|
398
|
+
"E9+41" => "\x01\xCD", # LATIN CAPITAL LETTER A WITH CARON
|
399
|
+
"E9+43" => "\x01\x0C", # LATIN CAPITAL LETTER C WITH CARON
|
400
|
+
"E9+44" => "\x01\x0E", # LATIN CAPITAL LETTER D WITH CARON
|
401
|
+
"E9+45" => "\x01\x1A", # LATIN CAPITAL LETTER E WITH CARON
|
402
|
+
"E9+47" => "\x01\xE6", # LATIN CAPITAL LETTER G WITH CARON
|
403
|
+
"E9+49" => "\x01\xCF", # LATIN CAPITAL LETTER I WITH CARON
|
404
|
+
"E9+4B" => "\x01\xE8", # LATIN CAPITAL LETTER K WITH CARON
|
405
|
+
"E9+4C" => "\x01\x3D", # LATIN CAPITAL LETTER L WITH CARON
|
406
|
+
"E9+4E" => "\x01\x47", # LATIN CAPITAL LETTER N WITH CARON
|
407
|
+
"E9+4F" => "\x01\xD1", # LATIN CAPITAL LETTER O WITH CARON
|
408
|
+
"E9+52" => "\x01\x58", # LATIN CAPITAL LETTER R WITH CARON
|
409
|
+
"E9+53" => "\x01\x60", # LATIN CAPITAL LETTER S WITH CARON
|
410
|
+
"E9+54" => "\x01\x64", # LATIN CAPITAL LETTER T WITH CARON
|
411
|
+
"E9+55" => "\x01\xD3", # LATIN CAPITAL LETTER U WITH CARON
|
412
|
+
"E9+5A" => "\x01\x7D", # LATIN CAPITAL LETTER Z WITH CARON
|
413
|
+
"E9+61" => "\x01\xCE", # LATIN SMALL LETTER A WITH CARON
|
414
|
+
"E9+63" => "\x01\x0D", # LATIN SMALL LETTER C WITH CARON
|
415
|
+
"E9+64" => "\x01\x0F", # LATIN SMALL LETTER D WITH CARON
|
416
|
+
"E9+65" => "\x01\x1B", # LATIN SMALL LETTER E WITH CARON
|
417
|
+
"E9+67" => "\x01\xE7", # LATIN SMALL LETTER G WITH CARON
|
418
|
+
"E9+69" => "\x01\xD0", # LATIN SMALL LETTER I WITH CARON
|
419
|
+
"E9+6A" => "\x01\xF0", # LATIN SMALL LETTER J WITH CARON
|
420
|
+
"E9+6B" => "\x01\xE9", # LATIN SMALL LETTER K WITH CARON
|
421
|
+
"E9+6C" => "\x01\x3E", # LATIN SMALL LETTER L WITH CARON
|
422
|
+
"E9+6E" => "\x01\x48", # LATIN SMALL LETTER N WITH CARON
|
423
|
+
"E9+6F" => "\x01\xD2", # LATIN SMALL LETTER O WITH CARON
|
424
|
+
"E9+72" => "\x01\x59", # LATIN SMALL LETTER R WITH CARON
|
425
|
+
"E9+73" => "\x01\x61", # LATIN SMALL LETTER S WITH CARON
|
426
|
+
"E9+74" => "\x01\x65", # LATIN SMALL LETTER T WITH CARON
|
427
|
+
"E9+75" => "\x01\xD4", # LATIN SMALL LETTER U WITH CARON
|
428
|
+
"E9+7A" => "\x01\x7E", # LATIN SMALL LETTER Z WITH CARON
|
429
|
+
"E9+E7+53" => "\x1E\x66", # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
|
430
|
+
"E9+E7+73" => "\x1E\x67", # LATIN SMALL LETTER S WITH CARON AND DOT ABOVE
|
431
|
+
"E9+E8+55" => "\x01\xD9", # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
|
432
|
+
"E9+E8+75" => "\x01\xDA", # LATIN SMALL LETTER U WITH DIAERESIS AND CARON
|
433
|
+
"E9" => "\x03\x0C", # COMBINING CARON
|
434
|
+
"EA+41" => "\x00\xC5", # LATIN CAPITAL LETTER A WITH RING ABOVE
|
435
|
+
"EA+55" => "\x01\x6E", # LATIN CAPITAL LETTER U WITH RING ABOVE
|
436
|
+
"EA+61" => "\x00\xE5", # LATIN SMALL LETTER A WITH RING ABOVE
|
437
|
+
"EA+75" => "\x01\x6F", # LATIN SMALL LETTER U WITH RING ABOVE
|
438
|
+
"EA+77" => "\x1E\x98", # LATIN SMALL LETTER W WITH RING ABOVE
|
439
|
+
"EA+79" => "\x1E\x99", # LATIN SMALL LETTER Y WITH RING ABOVE
|
440
|
+
"EA+E2+41" => "\x01\xFA", # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
|
441
|
+
"EA+E2+61" => "\x01\xFB", # LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE
|
442
|
+
"EA" => "\x03\x0A", # COMBINING RING ABOVE
|
443
|
+
"EB" => "\xFE\x20", # COMBINING LIGATURE LEFT HALF
|
444
|
+
"EC" => "\xFE\x21", # COMBINING LIGATURE RIGHT HALF
|
445
|
+
"ED" => "\x03\x15", # COMBINING COMMA ABOVE RIGHT
|
446
|
+
"EE+4F" => "\x01\x50", # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
|
447
|
+
"EE+55" => "\x01\x70", # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
|
448
|
+
"EE+6F" => "\x01\x51", # LATIN SMALL LETTER O WITH DOUBLE ACUTE
|
449
|
+
"EE+75" => "\x01\x71", # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
450
|
+
"EE" => "\x03\x0B", # COMBINING DOUBLE ACUTE ACCENT
|
451
|
+
"EF" => "\x03\x10", # COMBINING CANDRABINDU
|
452
|
+
"F0+43" => "\x00\xC7", # LATIN CAPITAL LETTER C WITH CEDILLA
|
453
|
+
"F0+44" => "\x1E\x10", # LATIN CAPITAL LETTER D WITH CEDILLA
|
454
|
+
"F0+47" => "\x01\x22", # LATIN CAPITAL LETTER G WITH CEDILLA
|
455
|
+
"F0+48" => "\x1E\x28", # LATIN CAPITAL LETTER H WITH CEDILLA
|
456
|
+
"F0+4B" => "\x01\x36", # LATIN CAPITAL LETTER K WITH CEDILLA
|
457
|
+
"F0+4C" => "\x01\x3B", # LATIN CAPITAL LETTER L WITH CEDILLA
|
458
|
+
"F0+4E" => "\x01\x45", # LATIN CAPITAL LETTER N WITH CEDILLA
|
459
|
+
"F0+52" => "\x01\x56", # LATIN CAPITAL LETTER R WITH CEDILLA
|
460
|
+
"F0+53" => "\x01\x5E", # LATIN CAPITAL LETTER S WITH CEDILLA
|
461
|
+
"F0+54" => "\x01\x62", # LATIN CAPITAL LETTER T WITH CEDILLA
|
462
|
+
"F0+63" => "\x00\xE7", # LATIN SMALL LETTER C WITH CEDILLA
|
463
|
+
"F0+64" => "\x1E\x11", # LATIN SMALL LETTER D WITH CEDILLA
|
464
|
+
"F0+67" => "\x01\x23", # LATIN SMALL LETTER G WITH CEDILLA
|
465
|
+
"F0+68" => "\x1E\x29", # LATIN SMALL LETTER H WITH CEDILLA
|
466
|
+
"F0+6B" => "\x01\x37", # LATIN SMALL LETTER K WITH CEDILLA
|
467
|
+
"F0+6C" => "\x01\x3C", # LATIN SMALL LETTER L WITH CEDILLA
|
468
|
+
"F0+6E" => "\x01\x46", # LATIN SMALL LETTER N WITH CEDILLA
|
469
|
+
"F0+72" => "\x01\x57", # LATIN SMALL LETTER R WITH CEDILLA
|
470
|
+
"F0+73" => "\x01\x5F", # LATIN SMALL LETTER S WITH CEDILLA
|
471
|
+
"F0+74" => "\x01\x63", # LATIN SMALL LETTER T WITH CEDILLA
|
472
|
+
"F0+E2+43" => "\x1E\x08", # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
|
473
|
+
"F0+E2+63" => "\x1E\x09", # LATIN SMALL LETTER C WITH CEDILLA AND ACUTE
|
474
|
+
"F0+E6+45" => "\x1E\x1C", # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
|
475
|
+
"F0+E6+65" => "\x1E\x1D", # LATIN SMALL LETTER E WITH CEDILLA AND BREVE
|
476
|
+
"F0" => "\x03\x27", # COMBINING CEDILLA
|
477
|
+
"F1+41" => "\x01\x04", # LATIN CAPITAL LETTER A WITH OGONEK
|
478
|
+
"F1+45" => "\x01\x18", # LATIN CAPITAL LETTER E WITH OGONEK
|
479
|
+
"F1+49" => "\x01\x2E", # LATIN CAPITAL LETTER I WITH OGONEK
|
480
|
+
"F1+4F" => "\x01\xEA", # LATIN CAPITAL LETTER O WITH OGONEK
|
481
|
+
"F1+55" => "\x01\x72", # LATIN CAPITAL LETTER U WITH OGONEK
|
482
|
+
"F1+61" => "\x01\x05", # LATIN SMALL LETTER A WITH OGONEK
|
483
|
+
"F1+65" => "\x01\x19", # LATIN SMALL LETTER E WITH OGONEK
|
484
|
+
"F1+69" => "\x01\x2F", # LATIN SMALL LETTER I WITH OGONEK
|
485
|
+
"F1+6F" => "\x01\xEB", # LATIN SMALL LETTER O WITH OGONEK
|
486
|
+
"F1+75" => "\x01\x73", # LATIN SMALL LETTER U WITH OGONEK
|
487
|
+
"F1+E5+4F" => "\x01\xEC", # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
|
488
|
+
"F1+E5+6F" => "\x01\xED", # LATIN SMALL LETTER O WITH OGONEK AND MACRON
|
489
|
+
"F1" => "\x03\x28", # COMBINING OGONEK
|
490
|
+
"F2+41" => "\x1E\xA0", # LATIN CAPITAL LETTER A WITH DOT BELOW
|
491
|
+
"F2+42" => "\x1E\x04", # LATIN CAPITAL LETTER B WITH DOT BELOW
|
492
|
+
"F2+44" => "\x1E\x0C", # LATIN CAPITAL LETTER D WITH DOT BELOW
|
493
|
+
"F2+45" => "\x1E\xB8", # LATIN CAPITAL LETTER E WITH DOT BELOW
|
494
|
+
"F2+48" => "\x1E\x24", # LATIN CAPITAL LETTER H WITH DOT BELOW
|
495
|
+
"F2+49" => "\x1E\xCA", # LATIN CAPITAL LETTER I WITH DOT BELOW
|
496
|
+
"F2+4B" => "\x1E\x32", # LATIN CAPITAL LETTER K WITH DOT BELOW
|
497
|
+
"F2+4C" => "\x1E\x36", # LATIN CAPITAL LETTER L WITH DOT BELOW
|
498
|
+
"F2+4D" => "\x1E\x42", # LATIN CAPITAL LETTER M WITH DOT BELOW
|
499
|
+
"F2+4E" => "\x1E\x46", # LATIN CAPITAL LETTER N WITH DOT BELOW
|
500
|
+
"F2+4F" => "\x1E\xCC", # LATIN CAPITAL LETTER O WITH DOT BELOW
|
501
|
+
"F2+52" => "\x1E\x5A", # LATIN CAPITAL LETTER R WITH DOT BELOW
|
502
|
+
"F2+53" => "\x1E\x62", # LATIN CAPITAL LETTER S WITH DOT BELOW
|
503
|
+
"F2+54" => "\x1E\x6C", # LATIN CAPITAL LETTER T WITH DOT BELOW
|
504
|
+
"F2+55" => "\x1E\xE4", # LATIN CAPITAL LETTER U WITH DOT BELOW
|
505
|
+
"F2+56" => "\x1E\x7E", # LATIN CAPITAL LETTER V WITH DOT BELOW
|
506
|
+
"F2+57" => "\x1E\x88", # LATIN CAPITAL LETTER W WITH DOT BELOW
|
507
|
+
"F2+59" => "\x1E\xF4", # LATIN CAPITAL LETTER Y WITH DOT BELOW
|
508
|
+
"F2+5A" => "\x1E\x92", # LATIN CAPITAL LETTER Z WITH DOT BELOW
|
509
|
+
"F2+61" => "\x1E\xA1", # LATIN SMALL LETTER A WITH DOT BELOW
|
510
|
+
"F2+62" => "\x1E\x05", # LATIN SMALL LETTER B WITH DOT BELOW
|
511
|
+
"F2+64" => "\x1E\x0D", # LATIN SMALL LETTER D WITH DOT BELOW
|
512
|
+
"F2+65" => "\x1E\xB9", # LATIN SMALL LETTER E WITH DOT BELOW
|
513
|
+
"F2+68" => "\x1E\x25", # LATIN SMALL LETTER H WITH DOT BELOW
|
514
|
+
"F2+69" => "\x1E\xCB", # LATIN SMALL LETTER I WITH DOT BELOW
|
515
|
+
"F2+6B" => "\x1E\x33", # LATIN SMALL LETTER K WITH DOT BELOW
|
516
|
+
"F2+6C" => "\x1E\x37", # LATIN SMALL LETTER L WITH DOT BELOW
|
517
|
+
"F2+6D" => "\x1E\x43", # LATIN SMALL LETTER M WITH DOT BELOW
|
518
|
+
"F2+6E" => "\x1E\x47", # LATIN SMALL LETTER N WITH DOT BELOW
|
519
|
+
"F2+6F" => "\x1E\xCD", # LATIN SMALL LETTER O WITH DOT BELOW
|
520
|
+
"F2+72" => "\x1E\x5B", # LATIN SMALL LETTER R WITH DOT BELOW
|
521
|
+
"F2+73" => "\x1E\x63", # LATIN SMALL LETTER S WITH DOT BELOW
|
522
|
+
"F2+74" => "\x1E\x6D", # LATIN SMALL LETTER T WITH DOT BELOW
|
523
|
+
"F2+75" => "\x1E\xE5", # LATIN SMALL LETTER U WITH DOT BELOW
|
524
|
+
"F2+76" => "\x1E\x7F", # LATIN SMALL LETTER V WITH DOT BELOW
|
525
|
+
"F2+77" => "\x1E\x89", # LATIN SMALL LETTER W WITH DOT BELOW
|
526
|
+
"F2+79" => "\x1E\xF5", # LATIN SMALL LETTER Y WITH DOT BELOW
|
527
|
+
"F2+7A" => "\x1E\x93", # LATIN SMALL LETTER Z WITH DOT BELOW
|
528
|
+
"F2+E3+41" => "\x1E\xAC", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
|
529
|
+
"F2+E3+45" => "\x1E\xC6", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
|
530
|
+
"F2+E3+4F" => "\x1E\xD8", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
|
531
|
+
"F2+E3+61" => "\x1E\xAD", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
|
532
|
+
"F2+E3+65" => "\x1E\xC7", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
|
533
|
+
"F2+E3+6F" => "\x1E\xD9", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
|
534
|
+
"F2+E5+4C" => "\x1E\x38", # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
|
535
|
+
"F2+E5+52" => "\x1E\x5C", # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
|
536
|
+
"F2+E5+6C" => "\x1E\x39", # LATIN SMALL LETTER L WITH DOT BELOW AND MACRON
|
537
|
+
"F2+E5+72" => "\x1E\x5D", # LATIN SMALL LETTER R WITH DOT BELOW AND MACRON
|
538
|
+
"F2+E6+41" => "\x1E\xB6", # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
|
539
|
+
"F2+E6+61" => "\x1E\xB7", # LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
|
540
|
+
"F2+E7+53" => "\x1E\x68", # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
|
541
|
+
"F2+E7+73" => "\x1E\x69", # LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
|
542
|
+
"F2" => "\x03\x23", # COMBINING DOT BELOW
|
543
|
+
"F3+55" => "\x1E\x72", # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
|
544
|
+
"F3+75" => "\x1E\x73", # LATIN SMALL LETTER U WITH DIAERESIS BELOW
|
545
|
+
"F3" => "\x03\x24", # COMBINING DIAERESIS BELOW
|
546
|
+
"F4+41" => "\x1E\x00", # LATIN CAPITAL LETTER A WITH RING BELOW
|
547
|
+
"F4+61" => "\x1E\x01", # LATIN SMALL LETTER A WITH RING BELOW
|
548
|
+
"F4" => "\x03\x25", # COMBINING RING BELOW
|
549
|
+
"F5" => "\x03\x33", # COMBINING DOUBLE LOW LINE
|
550
|
+
"F6+42" => "\x1E\x06", # LATIN CAPITAL LETTER B WITH LINE BELOW
|
551
|
+
"F6+44" => "\x1E\x0E", # LATIN CAPITAL LETTER D WITH LINE BELOW
|
552
|
+
"F6+4B" => "\x1E\x34", # LATIN CAPITAL LETTER K WITH LINE BELOW
|
553
|
+
"F6+4C" => "\x1E\x3A", # LATIN CAPITAL LETTER L WITH LINE BELOW
|
554
|
+
"F6+4E" => "\x1E\x48", # LATIN CAPITAL LETTER N WITH LINE BELOW
|
555
|
+
"F6+52" => "\x1E\x5E", # LATIN CAPITAL LETTER R WITH LINE BELOW
|
556
|
+
"F6+54" => "\x1E\x6E", # LATIN CAPITAL LETTER T WITH LINE BELOW
|
557
|
+
"F6+5A" => "\x1E\x94", # LATIN CAPITAL LETTER Z WITH LINE BELOW
|
558
|
+
"F6+62" => "\x1E\x07", # LATIN SMALL LETTER B WITH LINE BELOW
|
559
|
+
"F6+64" => "\x1E\x0F", # LATIN SMALL LETTER D WITH LINE BELOW
|
560
|
+
"F6+68" => "\x1E\x96", # LATIN SMALL LETTER H WITH LINE BELOW
|
561
|
+
"F6+6B" => "\x1E\x35", # LATIN SMALL LETTER K WITH LINE BELOW
|
562
|
+
"F6+6C" => "\x1E\x3B", # LATIN SMALL LETTER L WITH LINE BELOW
|
563
|
+
"F6+6E" => "\x1E\x49", # LATIN SMALL LETTER N WITH LINE BELOW
|
564
|
+
"F6+72" => "\x1E\x5F", # LATIN SMALL LETTER R WITH LINE BELOW
|
565
|
+
"F6+74" => "\x1E\x6F", # LATIN SMALL LETTER T WITH LINE BELOW
|
566
|
+
"F6+7A" => "\x1E\x95", # LATIN SMALL LETTER Z WITH LINE BELOW
|
567
|
+
"F6" => "\x03\x32", # COMBINING LOW LINE
|
568
|
+
"F7" => "\x03\x26", # COMBINING COMMA BELOW
|
569
|
+
"F8" => "\x03\x21", # COMBINING OGONEK
|
570
|
+
"F9+48" => "\x1E\x2A", # LATIN CAPITAL LETTER H WITH BREVE BELOW
|
571
|
+
"F9+68" => "\x1E\x2B", # LATIN SMALL LETTER H WITH BREVE BELOW
|
572
|
+
"F9" => "\x03\x2E", # COMBINING BREVE BELOW
|
573
|
+
"FA" => "\xFE\x22", # COMBINING DOUBLE TILDE LEFT HALF
|
574
|
+
"FB" => "\xFE\x23" # COMBINING DOUBLE TILDE RIGHT HALF
|
575
|
+
}
|
576
|
+
|
577
|
+
def initialize(to_charset = 'UTF-8')
|
578
|
+
@to_charset = to_charset
|
579
|
+
@ansi_to_utf8 = {}
|
580
|
+
@ansi_to_utf8.merge!(@@non_combining)
|
581
|
+
@ansi_to_utf8.merge!(@@combining)
|
582
|
+
end
|
583
|
+
|
584
|
+
def iconv(string)
|
585
|
+
output = ''
|
586
|
+
scanner = StringScanner.new(string)
|
587
|
+
until scanner.eos? do
|
588
|
+
byte = scanner.get_byte
|
589
|
+
|
590
|
+
if byte[0] <= 0x7F
|
591
|
+
output << byte
|
592
|
+
elsif byte[0] >= 0x88 && byte[0] <= 0xC8
|
593
|
+
hex_key = byte[0].to_s(16).upcase
|
594
|
+
output << ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8.has_key?(hex_key) ? @ansi_to_utf8[hex_key] : @ansi_to_utf8['ERR'])
|
595
|
+
scanner.get_byte # ignore the next byte
|
596
|
+
elsif byte[0] >= 0xE0 && byte[0] <= 0xFB
|
597
|
+
[2, 1, 0].each do |n| # try 3 bytes, then 2 bytes, then 1 byte
|
598
|
+
bytes = [byte[0].to_s(16).upcase]
|
599
|
+
scanner.peek(n).each_byte {|b| bytes << b.to_s(16).upcase}
|
600
|
+
hex_key = bytes.join("+")
|
601
|
+
if @ansi_to_utf8.has_key?(hex_key)
|
602
|
+
output << ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8[hex_key])
|
603
|
+
n.times {scanner.get_byte}
|
604
|
+
break
|
605
|
+
end
|
606
|
+
end
|
607
|
+
else
|
608
|
+
output << ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8['ERR'])
|
609
|
+
scanner.get_byte if scanner.get_byte[0] >= 0xE0 # ignore the next byte
|
610
|
+
end
|
611
|
+
end
|
612
|
+
|
613
|
+
@to_charset == 'UTF-8' ? output : ::Iconv.conv(@to_charset, 'UTF-8', output)
|
614
|
+
end
|
615
|
+
|
616
|
+
def convert_char(char)
|
617
|
+
return char if char.size <= 1 && char[0] <= 0x7f
|
618
|
+
|
619
|
+
if char[0] > 0x7f && char[0] < 0xE0
|
620
|
+
hex_key = char[0].to_s(16).upcase
|
621
|
+
elsif char[0] >= 0xE0
|
622
|
+
bytes = []
|
623
|
+
char.each_byte {|byte| bytes << byte.to_s(16).upcase}
|
624
|
+
hex_key = bytes.join('+')
|
625
|
+
end
|
626
|
+
return ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8.has_key?(hex_key) ? @ansi_to_utf8[hex_key] : @ansi_to_utf8['ERR'])
|
627
|
+
end
|
628
|
+
|
629
|
+
end
|
630
|
+
|
631
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class ANSEL::IconvTest < Test::Unit::TestCase
|
4
|
+
FIXTURE_PATH = File.dirname(__FILE__) + "/../../../fixtures/gedcom"
|
5
|
+
|
6
|
+
def setup
|
7
|
+
@ansel = ANSEL::Iconv.new 'UTF-8'
|
8
|
+
end
|
9
|
+
|
10
|
+
should "return ASCII values without conversion" do
|
11
|
+
assert_equal " ", @ansel.iconv("\x20")
|
12
|
+
assert_equal "x", @ansel.iconv("\x78")
|
13
|
+
end
|
14
|
+
|
15
|
+
should "return the unicode replacement character for invalid characters" do
|
16
|
+
assert_equal "\xEF\xBF\xBD", @ansel.iconv("\xBE\x00")
|
17
|
+
assert_equal "\xEF\xBF\xBD", @ansel.iconv("\xD1\x00")
|
18
|
+
end
|
19
|
+
|
20
|
+
should "return UTF-8 characters for valid ANSEL characters" do
|
21
|
+
# ANSEL non-combining mappings
|
22
|
+
assert_equal "", @ansel.iconv("\x88\x00")
|
23
|
+
assert_equal "", @ansel.iconv("\x89\x00")
|
24
|
+
assert_equal "", @ansel.iconv("\x8D\x00")
|
25
|
+
assert_equal "", @ansel.iconv("\x8E\x00")
|
26
|
+
assert_equal "Ł", @ansel.iconv("\xA1\x00")
|
27
|
+
assert_equal "Ø", @ansel.iconv("\xA2\x00")
|
28
|
+
assert_equal "Đ", @ansel.iconv("\xA3\x00")
|
29
|
+
assert_equal "Þ", @ansel.iconv("\xA4\x00")
|
30
|
+
assert_equal "Æ", @ansel.iconv("\xA5\x00")
|
31
|
+
assert_equal "Œ", @ansel.iconv("\xA6\x00")
|
32
|
+
assert_equal "ʹ", @ansel.iconv("\xA7\x00")
|
33
|
+
assert_equal "·", @ansel.iconv("\xA8\x00")
|
34
|
+
assert_equal "♭", @ansel.iconv("\xA9\x00")
|
35
|
+
assert_equal "®", @ansel.iconv("\xAA\x00")
|
36
|
+
assert_equal "±", @ansel.iconv("\xAB\x00")
|
37
|
+
assert_equal "±", @ansel.iconv("\xAB\x00")
|
38
|
+
assert_equal "Ơ", @ansel.iconv("\xAC\x00")
|
39
|
+
assert_equal "Ư", @ansel.iconv("\xAD\x00")
|
40
|
+
assert_equal "ʼ", @ansel.iconv("\xAE\x00")
|
41
|
+
assert_equal "ʻ", @ansel.iconv("\xB0\x00")
|
42
|
+
assert_equal "ł", @ansel.iconv("\xB1\x00")
|
43
|
+
assert_equal "ø", @ansel.iconv("\xB2\x00")
|
44
|
+
assert_equal "đ", @ansel.iconv("\xB3\x00")
|
45
|
+
assert_equal "þ", @ansel.iconv("\xB4\x00")
|
46
|
+
assert_equal "æ", @ansel.iconv("\xB5\x00")
|
47
|
+
assert_equal "œ", @ansel.iconv("\xB6\x00")
|
48
|
+
assert_equal "ʺ", @ansel.iconv("\xB7\x00")
|
49
|
+
assert_equal "ı", @ansel.iconv("\xB8\x00")
|
50
|
+
assert_equal "£", @ansel.iconv("\xB9\x00")
|
51
|
+
assert_equal "ð", @ansel.iconv("\xBA\x00")
|
52
|
+
assert_equal "ơ", @ansel.iconv("\xBC\x00")
|
53
|
+
assert_equal "ư", @ansel.iconv("\xBD\x00")
|
54
|
+
assert_equal "°", @ansel.iconv("\xC0\x00")
|
55
|
+
assert_equal "ℓ", @ansel.iconv("\xC1\x00")
|
56
|
+
assert_equal "℗", @ansel.iconv("\xC2\x00")
|
57
|
+
assert_equal "©", @ansel.iconv("\xC3\x00")
|
58
|
+
assert_equal "♯", @ansel.iconv("\xC4\x00")
|
59
|
+
assert_equal "¿", @ansel.iconv("\xC5\x00")
|
60
|
+
assert_equal "¡", @ansel.iconv("\xC6\x00")
|
61
|
+
assert_equal "ß", @ansel.iconv("\xC7\x00")
|
62
|
+
assert_equal "€", @ansel.iconv("\xC8\x00")
|
63
|
+
|
64
|
+
# ANSEL combining characters
|
65
|
+
assert_equal "Ả", @ansel.iconv("\xE0\x41")
|
66
|
+
assert_equal "Ḻ", @ansel.iconv("\xF6\x4C")
|
67
|
+
assert_equal "̲", @ansel.iconv("\xF6")
|
68
|
+
assert_equal "̮", @ansel.iconv("\xF9")
|
69
|
+
assert_equal "Ḫ", @ansel.iconv("\xF9\x48")
|
70
|
+
assert_equal "Ậ", @ansel.iconv("\xF2\xE3\x41")
|
71
|
+
assert_equal "ỵ", @ansel.iconv("\xF2\x79")
|
72
|
+
assert_equal "̣", @ansel.iconv("\xF2")
|
73
|
+
end
|
74
|
+
|
75
|
+
should "convert full text correctly" do
|
76
|
+
assert_equal "What is the question?", @ansel.iconv("What is the question?")
|
77
|
+
assert_equal "¿What is the question?", @ansel.iconv("\xC5\x00What is the question?")
|
78
|
+
assert_equal "© 1994", @ansel.iconv("\xC3\x00 1994")
|
79
|
+
assert_equal "£4.59", @ansel.iconv("\xB9\x004.59")
|
80
|
+
end
|
81
|
+
|
82
|
+
should "convert ANSEL to UTF-16" do
|
83
|
+
converter = ANSEL::Iconv.new 'UTF-16', 'ANSEL'
|
84
|
+
assert_equal "\376\377\000a\000b\000c", converter.iconv('abc')
|
85
|
+
end
|
86
|
+
|
87
|
+
should "convert ASCII to UTF-16" do
|
88
|
+
converter = ANSEL::Iconv.new 'UTF-16', 'ASCII'
|
89
|
+
assert_equal "\376\377\000a\000b\000c", converter.iconv('abc')
|
90
|
+
end
|
91
|
+
|
92
|
+
should "convert UTF-8 to UTF-16" do
|
93
|
+
converter = ANSEL::Iconv.new 'UTF-16', 'UTF-8'
|
94
|
+
assert_equal "\376\377\000a\000b\000c", converter.iconv('abc')
|
95
|
+
end
|
96
|
+
|
97
|
+
should "convert UTF-16 to UTF-16" do
|
98
|
+
converter = ANSEL::Iconv.new 'UTF-16', 'UTF-16'
|
99
|
+
assert_equal "\376\377\000a\000b\000c", converter.iconv("\376\377\000a\000b\000c")
|
100
|
+
end
|
101
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ansel_iconv
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Keith Morrison
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-03-25 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: activesupport
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 2.1.0
|
24
|
+
version:
|
25
|
+
description: Convert ANSEL encoded text to any other encoding available to Iconv
|
26
|
+
email: keithm@infused.org
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- README.txt
|
33
|
+
files:
|
34
|
+
- History.txt
|
35
|
+
- README.txt
|
36
|
+
- VERSION.yml
|
37
|
+
- lib/ansel_iconv.rb
|
38
|
+
- test/ansel_iconv_test.rb
|
39
|
+
- test/test_helper.rb
|
40
|
+
has_rdoc: true
|
41
|
+
homepage:
|
42
|
+
licenses: []
|
43
|
+
|
44
|
+
post_install_message:
|
45
|
+
rdoc_options:
|
46
|
+
- --inline-source
|
47
|
+
- --charset=UTF-8
|
48
|
+
require_paths:
|
49
|
+
- lib
|
50
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: "0"
|
55
|
+
version:
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: "0"
|
61
|
+
version:
|
62
|
+
requirements: []
|
63
|
+
|
64
|
+
rubyforge_project:
|
65
|
+
rubygems_version: 1.3.5
|
66
|
+
signing_key:
|
67
|
+
specification_version: 2
|
68
|
+
summary: Convert ANSEL encoded text
|
69
|
+
test_files: []
|
70
|
+
|