ansel_iconv 1.1.3 → 1.1.5

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.md ADDED
@@ -0,0 +1,30 @@
1
+ ## 1.1.4
2
+
3
+ - New gemspec and Rakefile
4
+ - Rename History.txt to CHANGELOG.md
5
+
6
+ ## 1.1.3
7
+
8
+ - MIT license
9
+
10
+ ## 1.1.2
11
+
12
+ - Speed up conversion
13
+
14
+ ## 1.1.0
15
+
16
+ - Ruby 1.9 compatibility
17
+
18
+ ## 1.0.5
19
+
20
+ - Requires activesupport 2.3.5 and works when 3.0 is installed
21
+
22
+ ## 1.0.3
23
+
24
+ - Fix ActiveSupport deprecation warning
25
+
26
+ ## 1.0.0
27
+
28
+ - Initial public release
29
+
30
+
@@ -4,10 +4,10 @@ ANSEL::Iconv is a wrapper for Iconv that adds ANSEL character set conversion.
4
4
 
5
5
  Copyright (c) 2006-2010 Keith Morrison <mailto:keithm@infused.org>, <http://www.infused.org>
6
6
 
7
- * Project page: <http://github.com/infused/ansel_iconv>
8
- * API Documentation: <http://rdoc.info/projects/infused/dbf>
9
- * Report bugs: <http://github.com/infused/ansel_iconv/issues>
10
- * Questions? Email [keithm@infused.org](mailto:keithm@infused.org?subject=ANSEL::Iconv)
7
+ - Project page: <http://github.com/infused/ansel_iconv>
8
+ - API Documentation: <http://rdoc.info/projects/infused/dbf>
9
+ - Report bugs: <http://github.com/infused/ansel_iconv/issues>
10
+ - Questions? Email [keithm@infused.org](mailto:keithm@infused.org?subject=ANSEL::Iconv)
11
11
  with ANSEL::Iconv in the subject line
12
12
 
13
13
  ## Compatibility
@@ -49,9 +49,7 @@ standard.
49
49
 
50
50
  ## LICENSE:
51
51
 
52
- (The MIT License)
53
-
54
- Copyright (c) 2006-2010 Keith Morrison <mailto:keithm@infused.org>, <http://www.infused.org>
52
+ Copyright (c) 2006-2010 Keith Morrison <keithm@infused.org>
55
53
 
56
54
  Permission is hereby granted, free of charge, to any person obtaining
57
55
  a copy of this software and associated documentation files (the
data/Rakefile CHANGED
@@ -1,23 +1,9 @@
1
- PROJECT_ROOT = File.expand_path(File.dirname(__FILE__))
2
- $: << File.join(PROJECT_ROOT, 'lib')
1
+ # encoding: utf-8
3
2
 
4
3
  require 'rubygems'
5
- require 'ansel_iconv'
4
+ require 'rubygems/specification'
6
5
  require 'rake/testtask'
7
- require 'jeweler'
8
-
9
- Jeweler::Tasks.new do |p|
10
- p.name = 'ansel_iconv'
11
- p.description = 'Convert ANSEL encoded text to any other encoding available to Iconv'
12
- p.summary = 'Convert ANSEL encoded text'
13
- p.platform = Gem::Platform::RUBY
14
- p.authors = ['Keith Morrison']
15
- p.email = 'keithm@infused.org'
16
- p.add_dependency(%q<activesupport>, ['=2.3.5'])
17
- p.homepage = 'http://github.com/infused/ansel_iconv'
18
- end
19
6
 
20
- Jeweler::GemcutterTasks.new
21
7
 
22
8
  desc 'Default: run unit tests.'
23
9
  task :default => :test
@@ -26,4 +12,34 @@ Rake::TestTask.new(:test) do |t|
26
12
  t.pattern = 'test/**/*_test.rb'
27
13
  t.verbose = true
28
14
  t.libs << 'test'
29
- end
15
+ end
16
+
17
+ def gemspec
18
+ @gemspec ||= begin
19
+ file = File.expand_path('../ansel_iconv.gemspec', __FILE__)
20
+ eval(File.read(file), binding, file)
21
+ end
22
+ end
23
+
24
+ begin
25
+ require 'rake/gempackagetask'
26
+ rescue LoadError
27
+ task(:gem) { $stderr.puts '`gem install rake` to package gems' }
28
+ else
29
+ Rake::GemPackageTask.new(gemspec) do |pkg|
30
+ pkg.gem_spec = gemspec
31
+ end
32
+ task :gem => :gemspec
33
+ end
34
+
35
+ desc "install the gem locally"
36
+ task :install => :package do
37
+ sh %{gem install pkg/#{gemspec.name}-#{gemspec.version}}
38
+ end
39
+
40
+ desc "validate the gemspec"
41
+ task :gemspec do
42
+ gemspec.validate
43
+ end
44
+
45
+ task :package => :gemspec
data/lib/ansel_iconv.rb CHANGED
@@ -1,621 +1,8 @@
1
1
  # encoding: ascii-8bit
2
2
 
3
- gem 'activesupport', '=2.3.5'
3
+ gem 'activesupport', '>= 2.3.5', '<=2.3.8'
4
4
  require 'active_support'
5
5
  require 'iconv'
6
-
7
- module ANSEL
8
- class Iconv
9
- delegate :iconv, :to => :@converter
10
-
11
- def initialize(to, from = 'ANSEL')
12
- @converter = (from == 'ANSEL') ? Convert.new(to) : ::Iconv.new(to, from)
13
- end
14
- end
15
-
16
- class Convert
17
- @@non_combining = {
18
- "ERR" => "\xFF\xFD", # � - REPLACEMENT CHARACTER
19
- "88" => "", # NON-SORT BEGIN / START OF STRING
20
- "89" => "", # NON-SORT END / STRING TERMINATOR
21
- "8D" => "", # JOINER / ZERO WIDTH JOINER
22
- "8E" => "", # NON-JOINER / ZERO WIDTH NON-JOINER
23
- "A1" => "\x01\x41", # Ł - UPPERCASE POLISH L / LATIN CAPITAL LETTER L WITH STROKE
24
- "A2" => "\x00\xD8", # Ø - UPPERCASE SCANDINAVIAN O / LATIN CAPITAL LETTER O WITH STROKE
25
- "A3" => "\x01\x10", # Đ - UPPERCASE D WITH CROSSBAR / LATIN CAPITAL LETTER D WITH STROKE
26
- "A4" => "\x00\xDE", # Þ - UPPERCASE ICELANDIC THORN / LATIN CAPITAL LETTER THORN (Icelandic)
27
- "A5" => "\x00\xC6", # Æ - UPPERCASE DIGRAPH AE / LATIN CAPITAL LIGATURE AE
28
- "A6" => "\x01\x52", # Π- UPPERCASE DIGRAPH OE / LATIN CAPITAL LIGATURE OE
29
- "A7" => "\x02\xB9", # ʹ - SOFT SIGN, PRIME / MODIFIER LETTER PRIME
30
- "A8" => "\x00\xB7", # · - MIDDLE DOT
31
- "A9" => "\x26\x6D", # ♭ - MUSIC FLAT SIGN
32
- "AA" => "\x00\xAE", # ® - PATENT MARK / REGISTERED SIGN
33
- "AB" => "\x00\xB1", # ± - PLUS OR MINUS / PLUS-MINUS SIGN
34
- "AC" => "\x01\xA0", # Ơ - UPPERCASE O-HOOK / LATIN CAPITAL LETTER O WITH HORN
35
- "AD" => "\x01\xAF", # Ư - UPPERCASE U-HOOK / LATIN CAPITAL LETTER U WITH HORN
36
- "AE" => "\x02\xBC", # ʼ - ALIF / MODIFIER LETTER APOSTROPHE
37
- "B0" => "\x02\xBB", # ʻ - AYN / MODIFIER LETTER TURNED COMMA
38
- "B1" => "\x01\x42", # ł - LOWERCASE POLISH L / LATIN SMALL LETTER L WITH STROKE
39
- "B2" => "\x00\xF8", # ø - LOWERCASE SCANDINAVIAN O / LATIN SMALL LETTER O WITH STROKE
40
- "B3" => "\x01\x11", # đ - LOWERCASE D WITH CROSSBAR / LATIN SMALL LETTER D WITH STROKE
41
- "B4" => "\x00\xFE", # þ - LOWERCASE ICELANDIC THORN / LATIN SMALL LETTER THORN (Icelandic)
42
- "B5" => "\x00\xE6", # æ - LOWERCASE DIGRAPH AE / LATIN SMALL LIGATURE AE
43
- "B6" => "\x01\x53", # œ - LOWERCASE DIGRAPH OE / LATIN SMALL LIGATURE OE
44
- "B7" => "\x02\xBA", # ʺ - HARD SIGN, DOUBLE PRIME / MODIFIER LETTER DOUBLE PRIME
45
- "B8" => "\x01\x31", # ı - LOWERCASE TURKISH I / LATIN SMALL LETTER DOTLESS I
46
- "B9" => "\x00\xA3", # £ - BRITISH POUND / POUND SIGN
47
- "BA" => "\x00\xF0", # ð - LOWERCASE ETH / LATIN SMALL LETTER ETH (Icelandic)
48
- "BC" => "\x01\xA1", # ơ - LOWERCASE O-HOOK / LATIN SMALL LETTER O WITH HORN
49
- "BD" => "\x01\xB0", # ư - LOWERCASE U-HOOK / LATIN SMALL LETTER U WITH HORN
50
- "C0" => "\x00\xB0", # ° - DEGREE SIGN
51
- "C1" => "\x21\x13", # ℓ - SCRIPT SMALL L
52
- "C2" => "\x21\x17", # ℗ - SOUND RECORDING COPYRIGHT
53
- "C3" => "\x00\xA9", # © - COPYRIGHT SIGN
54
- "C4" => "\x26\x6F", # ♯ - MUSIC SHARP SIGN
55
- "C5" => "\x00\xBF", # ¿ - INVERTED QUESTION MARK
56
- "C6" => "\x00\xA1", # ¡ - INVERTED EXCLAMATION MARK
57
- "C7" => "\x00\xDF", # ß - ESZETT SYMBOL
58
- "C8" => "\x20\xAC" # € - EURO SIGN
59
- }
60
-
61
- @@combining = {
62
- "E0+41" => "\x1E\xA2", # Ả - LATIN CAPITAL LETTER A WITH HOOK ABOVE
63
- "E0+45" => "\x1E\xBA", # LATIN CAPITAL LETTER E WITH HOOK ABOVE
64
- "E0+49" => "\x1E\xC8", # LATIN CAPITAL LETTER I WITH HOOK ABOVE
65
- "E0+4F" => "\x1E\xCE", # LATIN CAPITAL LETTER O WITH HOOK ABOVE
66
- "E0+55" => "\x1E\xE6", # LATIN CAPITAL LETTER U WITH HOOK ABOVE
67
- "E0+59" => "\x1E\xF6", # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
68
- "E0+61" => "\x1E\xA3", # LATIN SMALL LETTER A WITH HOOK ABOVE
69
- "E0+65" => "\x1E\xBB", # LATIN SMALL LETTER E WITH HOOK ABOVE
70
- "E0+69" => "\x1E\xC9", # LATIN SMALL LETTER I WITH HOOK ABOVE
71
- "E0+6F" => "\x1E\xCF", # LATIN SMALL LETTER O WITH HOOK ABOVE
72
- "E0+75" => "\x1E\xE7", # LATIN SMALL LETTER U WITH HOOK ABOVE
73
- "E0+79" => "\x1E\xF7", # LATIN SMALL LETTER Y WITH HOOK ABOVE
74
- "E0+E3+41" => "\x1E\xA8", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
75
- "E0+E3+45" => "\x1E\xC2", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
76
- "E0+E3+4F" => "\x1E\xD4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
77
- "E0+E3+61" => "\x1E\xA9", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
78
- "E0+E3+65" => "\x1E\xC3", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
79
- "E0+E3+6F" => "\x1E\xD5", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
80
- "E0+E6+41" => "\x1E\xB2", # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
81
- "E0+E6+61" => "\x1E\xB3", # LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
82
- "E0" => "\x03\x09", # COMBINING HOOK ABOVE
83
- "E1+41" => "\x00\xC0", # LATIN CAPITAL LETTER A WITH GRAVE
84
- "E1+45" => "\x00\xC8", # LATIN CAPITAL LETTER E WITH GRAVE
85
- "E1+49" => "\x00\xCC", # LATIN CAPITAL LETTER I WITH GRAVE
86
- "E1+4F" => "\x00\xD2", # LATIN CAPITAL LETTER O WITH GRAVE
87
- "E1+55" => "\x00\xD9", # LATIN CAPITAL LETTER U WITH GRAVE
88
- "E1+57" => "\x1E\x80", # LATIN CAPITAL LETTER W WITH GRAVE
89
- "E1+59" => "\x1E\xF2", # LATIN CAPITAL LETTER Y WITH GRAVE
90
- "E1+61" => "\x00\xE0", # LATIN SMALL LETTER A WITH GRAVE
91
- "E1+65" => "\x00\xE8", # LATIN SMALL LETTER E WITH GRAVE
92
- "E1+69" => "\x00\xEC", # LATIN SMALL LETTER I WITH GRAVE
93
- "E1+6F" => "\x00\xF2", # LATIN SMALL LETTER O WITH GRAVE
94
- "E1+75" => "\x00\xF9", # LATIN SMALL LETTER U WITH GRAVE
95
- "E1+77" => "\x1E\x81", # LATIN SMALL LETTER W WITH GRAVE
96
- "E1+79" => "\x1E\xF3", # LATIN SMALL LETTER Y WITH GRAVE
97
- "E1+E3+41" => "\x1E\xA6", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
98
- "E1+E3+45" => "\x1E\xC0", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
99
- "E1+E3+4F" => "\x1E\xD2", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
100
- "E1+E3+61" => "\x1E\xA7", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
101
- "E1+E3+65" => "\x1E\xC1", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
102
- "E1+E3+6F" => "\x1E\xD3", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
103
- "E1+E5+45" => "\x1E\x14", # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
104
- "E1+E5+4F" => "\x1E\x50", # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
105
- "E1+E5+65" => "\x1E\x15", # LATIN SMALL LETTER E WITH MACRON AND GRAVE
106
- "E1+E5+6F" => "\x1E\x51", # LATIN SMALL LETTER O WITH MACRON AND GRAVE
107
- "E1+E6+41" => "\x1E\xB0", # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
108
- "E1+E6+61" => "\x1E\xB1", # LATIN SMALL LETTER A WITH BREVE AND GRAVE
109
- "E1+E8+55" => "\x01\xDB", # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
110
- "E1+E8+75" => "\x01\xDC", # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
111
- "E1" => "\x03\x00", # COMBINING GRAVE ACCENT
112
- "E2+41" => "\x00\xC1", # LATIN CAPITAL LETTER A WITH ACUTE
113
- "E2+43" => "\x01\x06", # LATIN CAPITAL LETTER C WITH ACUTE
114
- "E2+45" => "\x00\xC9", # LATIN CAPITAL LETTER E WITH ACUTE
115
- "E2+47" => "\x01\xF4", # LATIN CAPITAL LETTER G WITH ACUTE
116
- "E2+49" => "\x00\xCD", # LATIN CAPITAL LETTER I WITH ACUTE
117
- "E2+4B" => "\x1E\x30", # LATIN CAPITAL LETTER K WITH ACUTE
118
- "E2+4C" => "\x01\x39", # LATIN CAPITAL LETTER L WITH ACUTE
119
- "E2+4D" => "\x1E\x3E", # LATIN CAPITAL LETTER M WITH ACUTE
120
- "E2+4E" => "\x01\x43", # LATIN CAPITAL LETTER N WITH ACUTE
121
- "E2+4F" => "\x00\xD3", # LATIN CAPITAL LETTER O WITH ACUTE
122
- "E2+50" => "\x1E\x54", # LATIN CAPITAL LETTER P WITH ACUTE
123
- "E2+52" => "\x01\x54", # LATIN CAPITAL LETTER R WITH ACUTE
124
- "E2+53" => "\x01\x5A", # LATIN CAPITAL LETTER S WITH ACUTE
125
- "E2+55" => "\x00\xDA", # LATIN CAPITAL LETTER U WITH ACUTE
126
- "E2+57" => "\x1E\x82", # LATIN CAPITAL LETTER W WITH ACUTE
127
- "E2+59" => "\x00\xDD", # LATIN CAPITAL LETTER Y WITH ACUTE
128
- "E2+5A" => "\x01\x79", # LATIN CAPITAL LETTER Z WITH ACUTE
129
- "E2+61" => "\x00\xE1", # LATIN SMALL LETTER A WITH ACUTE
130
- "E2+63" => "\x01\x07", # LATIN SMALL LETTER C WITH ACUTE
131
- "E2+65" => "\x00\xE9", # LATIN SMALL LETTER E WITH ACUTE
132
- "E2+67" => "\x01\xF5", # LATIN SMALL LETTER G WITH ACUTE
133
- "E2+69" => "\x00\xED", # LATIN SMALL LETTER I WITH ACUTE
134
- "E2+6B" => "\x1E\x31", # LATIN SMALL LETTER K WITH ACUTE
135
- "E2+6C" => "\x01\x3A", # LATIN SMALL LETTER L WITH ACUTE
136
- "E2+6D" => "\x1E\x3F", # LATIN SMALL LETTER M WITH ACUTE
137
- "E2+6E" => "\x01\x44", # LATIN SMALL LETTER N WITH ACUTE
138
- "E2+6F" => "\x00\xF3", # LATIN SMALL LETTER O WITH ACUTE
139
- "E2+70" => "\x1E\x55", # LATIN SMALL LETTER P WITH ACUTE
140
- "E2+72" => "\x01\x55", # LATIN SMALL LETTER R WITH ACUTE
141
- "E2+73" => "\x01\x5B", # LATIN SMALL LETTER S WITH ACUTE
142
- "E2+75" => "\x00\xFA", # LATIN SMALL LETTER U WITH ACUTE
143
- "E2+77" => "\x1E\x83", # LATIN SMALL LETTER W WITH ACUTE
144
- "E2+79" => "\x00\xFD", # LATIN SMALL LETTER Y WITH ACUTE
145
- "E2+7A" => "\x01\x7A", # LATIN SMALL LETTER Z WITH ACUTE
146
- "E2+A5" => "\x01\xFC", # LATIN CAPITAL LETTER AE WITH ACUTE
147
- "E2+B5" => "\x01\xFD", # LATIN SMALL LETTER AE WITH ACUTE
148
- "E2+E3+41" => "\x1E\xA4", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
149
- "E2+E3+45" => "\x1E\xBE", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
150
- "E2+E3+4F" => "\x1E\xD0", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
151
- "E2+E3+61" => "\x1E\xA5", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
152
- "E2+E3+65" => "\x1E\xBF", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
153
- "E2+E3+6F" => "\x1E\xD1", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
154
- "E2+E4+4F" => "\x1E\x4C", # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
155
- "E2+E4+55" => "\x1E\x78", # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
156
- "E2+E4+6F" => "\x1E\x4D", # LATIN SMALL LETTER O WITH TILDE AND ACUTE
157
- "E2+E4+75" => "\x1E\x79", # LATIN SMALL LETTER U WITH TILDE AND ACUTE
158
- "E2+E5+45" => "\x1E\x16", # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
159
- "E2+E5+4F" => "\x1E\x52", # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
160
- "E2+E5+65" => "\x1E\x17", # LATIN SMALL LETTER E WITH MACRON AND ACUTE
161
- "E2+E5+6F" => "\x1E\x53", # LATIN SMALL LETTER O WITH MACRON AND ACUTE
162
- "E2+E6+41" => "\x1E\xAE", # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
163
- "E2+E6+61" => "\x1E\xAF", # LATIN SMALL LETTER A WITH BREVE AND ACUTE
164
- "E2+E7+53" => "\x1E\x64", # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
165
- "E2+E7+73" => "\x1E\x65", # LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE
166
- "E2+E8+49" => "\x1E\x2E", # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
167
- "E2+E8+55" => "\x01\xD7", # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
168
- "E2+E8+69" => "\x1E\x2F", # LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
169
- "E2+E8+75" => "\x01\xD8", # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
170
- "E2+EA+41" => "\x01\xFA", # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
171
- "E2+EA+61" => "\x01\xFB", # LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE
172
- "E2+F0+43" => "\x1E\x08", # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
173
- "E2+F0+63" => "\x1E\x09", # LATIN SMALL LETTER C WITH CEDILLA AND ACUTE
174
- "E2" => "\x03\x01", # COMBINING ACUTE ACCENT
175
- "E3+41" => "\x00\xC2", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
176
- "E3+43" => "\x01\x08", # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
177
- "E3+45" => "\x00\xCA", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
178
- "E3+47" => "\x01\x1C", # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
179
- "E3+48" => "\x01\x24", # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
180
- "E3+49" => "\x00\xCE", # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
181
- "E3+4A" => "\x01\x34", # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
182
- "E3+4F" => "\x00\xD4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
183
- "E3+53" => "\x01\x5C", # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
184
- "E3+55" => "\x00\xDB", # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
185
- "E3+57" => "\x01\x74", # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
186
- "E3+59" => "\x01\x76", # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
187
- "E3+5A" => "\x1E\x90", # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
188
- "E3+61" => "\x00\xE2", # LATIN SMALL LETTER A WITH CIRCUMFLEX
189
- "E3+63" => "\x01\x09", # LATIN SMALL LETTER C WITH CIRCUMFLEX
190
- "E3+65" => "\x00\xEA", # LATIN SMALL LETTER E WITH CIRCUMFLEX
191
- "E3+67" => "\x01\x1D", # LATIN SMALL LETTER G WITH CIRCUMFLEX
192
- "E3+68" => "\x01\x25", # LATIN SMALL LETTER H WITH CIRCUMFLEX
193
- "E3+69" => "\x00\xEE", # LATIN SMALL LETTER I WITH CIRCUMFLEX
194
- "E3+6A" => "\x01\x35", # LATIN SMALL LETTER J WITH CIRCUMFLEX
195
- "E3+6F" => "\x00\xF4", # LATIN SMALL LETTER O WITH CIRCUMFLEX
196
- "E3+73" => "\x01\x5D", # LATIN SMALL LETTER S WITH CIRCUMFLEX
197
- "E3+75" => "\x00\xFB", # LATIN SMALL LETTER U WITH CIRCUMFLEX
198
- "E3+77" => "\x01\x75", # LATIN SMALL LETTER W WITH CIRCUMFLEX
199
- "E3+79" => "\x01\x77", # LATIN SMALL LETTER Y WITH CIRCUMFLEX
200
- "E3+7A" => "\x1E\x91", # LATIN SMALL LETTER Z WITH CIRCUMFLEX
201
- "E3+E0+41" => "\x1E\xA8", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
202
- "E3+E0+45" => "\x1E\xC2", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
203
- "E3+E0+4F" => "\x1E\xD4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
204
- "E3+E0+61" => "\x1E\xA9", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
205
- "E3+E0+65" => "\x1E\xC3", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
206
- "E3+E0+6F" => "\x1E\xD5", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
207
- "E3+E1+41" => "\x1E\xA6", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
208
- "E3+E1+45" => "\x1E\xC0", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
209
- "E3+E1+4F" => "\x1E\xD2", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
210
- "E3+E1+61" => "\x1E\xA7", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
211
- "E3+E1+65" => "\x1E\xC1", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
212
- "E3+E1+6F" => "\x1E\xD3", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
213
- "E3+E2+41" => "\x1E\xA4", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
214
- "E3+E2+45" => "\x1E\xBE", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
215
- "E3+E2+4F" => "\x1E\xD0", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
216
- "E3+E2+61" => "\x1E\xA5", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
217
- "E3+E2+65" => "\x1E\xBF", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
218
- "E3+E2+6F" => "\x1E\xD1", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
219
- "E3+E4+41" => "\x1E\xAA", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
220
- "E3+E4+45" => "\x1E\xC4", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
221
- "E3+E4+4F" => "\x1E\xD6", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
222
- "E3+E4+61" => "\x1E\xAB", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
223
- "E3+E4+65" => "\x1E\xC5", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
224
- "E3+E4+6F" => "\x1E\xD7", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
225
- "E3+F2+41" => "\x1E\xAC", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
226
- "E3+F2+45" => "\x1E\xC6", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
227
- "E3+F2+4F" => "\x1E\xD8", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
228
- "E3+F2+61" => "\x1E\xAD", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
229
- "E3+F2+65" => "\x1E\xC7", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
230
- "E3+F2+6F" => "\x1E\xD9", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
231
- "E3" => "\x03\x02", # COMBINING CIRCUMFLEX ACCENT
232
- "E4+41" => "\x00\xC3", # LATIN CAPITAL LETTER A WITH TILDE
233
- "E4+45" => "\x1E\xBC", # LATIN CAPITAL LETTER E WITH TILDE
234
- "E4+49" => "\x01\x28", # LATIN CAPITAL LETTER I WITH TILDE
235
- "E4+4E" => "\x00\xD1", # LATIN CAPITAL LETTER N WITH TILDE
236
- "E4+4F" => "\x00\xD5", # LATIN CAPITAL LETTER O WITH TILDE
237
- "E4+55" => "\x01\x68", # LATIN CAPITAL LETTER U WITH TILDE
238
- "E4+56" => "\x1E\x7C", # LATIN CAPITAL LETTER V WITH TILDE
239
- "E4+59" => "\x1E\xF8", # LATIN CAPITAL LETTER Y WITH TILDE
240
- "E4+61" => "\x00\xE3", # LATIN SMALL LETTER A WITH TILDE
241
- "E4+65" => "\x1E\xBD", # LATIN SMALL LETTER E WITH TILDE
242
- "E4+69" => "\x01\x29", # LATIN SMALL LETTER I WITH TILDE
243
- "E4+6E" => "\x00\xF1", # LATIN SMALL LETTER N WITH TILDE
244
- "E4+6F" => "\x00\xF5", # LATIN SMALL LETTER O WITH TILDE
245
- "E4+75" => "\x01\x69", # LATIN SMALL LETTER U WITH TILDE
246
- "E4+76" => "\x1E\x7D", # LATIN SMALL LETTER V WITH TILDE
247
- "E4+79" => "\x1E\xF9", # LATIN SMALL LETTER Y WITH TILDE
248
- "E4+E2+4F" => "\x1E\x4C", # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
249
- "E4+E2+55" => "\x1E\x78", # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
250
- "E4+E2+6F" => "\x1E\x4D", # LATIN SMALL LETTER O WITH TILDE AND ACUTE
251
- "E4+E2+75" => "\x1E\x79", # LATIN SMALL LETTER U WITH TILDE AND ACUTE
252
- "E4+E3+41" => "\x1E\xAA", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
253
- "E4+E3+45" => "\x1E\xC4", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
254
- "E4+E3+4F" => "\x1E\xD6", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
255
- "E4+E3+61" => "\x1E\xAB", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
256
- "E4+E3+65" => "\x1E\xC5", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
257
- "E4+E3+6F" => "\x1E\xD7", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
258
- "E4+E6+41" => "\x1E\xB4", # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
259
- "E4+E6+61" => "\x1E\xB5", # LATIN SMALL LETTER A WITH BREVE AND TILDE
260
- "E4+E8+4F" => "\x1E\x4E", # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
261
- "E4+E8+6F" => "\x1E\x4F", # LATIN SMALL LETTER O WITH TILDE AND DIAERESIS
262
- "E4" => "\x03\x03", # COMBINING TILDE
263
- "E5+41" => "\x01\x00", # LATIN CAPITAL LETTER A WITH MACRON
264
- "E5+45" => "\x01\x12", # LATIN CAPITAL LETTER E WITH MACRON
265
- "E5+47" => "\x1E\x20", # LATIN CAPITAL LETTER G WITH MACRON
266
- "E5+49" => "\x01\x2A", # LATIN CAPITAL LETTER I WITH MACRON
267
- "E5+4F" => "\x01\x4C", # LATIN CAPITAL LETTER O WITH MACRON
268
- "E5+55" => "\x01\x6A", # LATIN CAPITAL LETTER U WITH MACRON
269
- "E5+61" => "\x01\x01", # LATIN SMALL LETTER A WITH MACRON
270
- "E5+65" => "\x01\x13", # LATIN SMALL LETTER E WITH MACRON
271
- "E5+67" => "\x1E\x21", # LATIN SMALL LETTER G WITH MACRON
272
- "E5+69" => "\x01\x2B", # LATIN SMALL LETTER I WITH MACRON
273
- "E5+6F" => "\x01\x4D", # LATIN SMALL LETTER O WITH MACRON
274
- "E5+75" => "\x01\x6B", # LATIN SMALL LETTER U WITH MACRON
275
- "E5+A5" => "\x01\xE2", # LATIN CAPITAL LETTER AE WITH MACRON
276
- "E5+B5" => "\x01\xE3", # LATIN SMALL LETTER AE WITH MACRON
277
- "E5+E1+45" => "\x1E\x14", # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
278
- "E5+E1+4F" => "\x1E\x50", # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
279
- "E5+E1+65" => "\x1E\x15", # LATIN SMALL LETTER E WITH MACRON AND GRAVE
280
- "E5+E1+6F" => "\x1E\x51", # LATIN SMALL LETTER O WITH MACRON AND GRAVE
281
- "E5+E2+45" => "\x1E\x16", # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
282
- "E5+E2+4F" => "\x1E\x52", # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
283
- "E5+E2+65" => "\x1E\x17", # LATIN SMALL LETTER E WITH MACRON AND ACUTE
284
- "E5+E2+6F" => "\x1E\x53", # LATIN SMALL LETTER O WITH MACRON AND ACUTE
285
- "E5+E7+41" => "\x01\xE0", # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
286
- "E5+E7+61" => "\x01\xE1", # LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
287
- "E5+E8+41" => "\x01\xDE", # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
288
- "E5+E8+55" => "\x1E\x7A", # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
289
- "E5+E8+61" => "\x01\xDF", # LATIN SMALL LETTER A WITH DIAERESIS AND MACRON
290
- "E5+E8+75" => "\x1E\x7B", # LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
291
- "E5+F1+4F" => "\x01\xEC", # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
292
- "E5+F1+6F" => "\x01\xED", # LATIN SMALL LETTER O WITH OGONEK AND MACRON
293
- "E5+F2+4C" => "\x1E\x38", # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
294
- "E5+F2+52" => "\x1E\x5C", # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
295
- "E5+F2+6C" => "\x1E\x39", # LATIN SMALL LETTER L WITH DOT BELOW AND MACRON
296
- "E5+F2+72" => "\x1E\x5D", # LATIN SMALL LETTER R WITH DOT BELOW AND MACRON
297
- "E5" => "\x03\x04", # COMBINING MACRON
298
- "E6+41" => "\x01\x02", # LATIN CAPITAL LETTER A WITH BREVE
299
- "E6+45" => "\x01\x14", # LATIN CAPITAL LETTER E WITH BREVE
300
- "E6+47" => "\x01\x1E", # LATIN CAPITAL LETTER G WITH BREVE
301
- "E6+49" => "\x01\x2C", # LATIN CAPITAL LETTER I WITH BREVE
302
- "E6+4F" => "\x01\x4E", # LATIN CAPITAL LETTER O WITH BREVE
303
- "E6+55" => "\x01\x6C", # LATIN CAPITAL LETTER U WITH BREVE
304
- "E6+61" => "\x01\x03", # LATIN SMALL LETTER A WITH BREVE
305
- "E6+65" => "\x01\x15", # LATIN SMALL LETTER E WITH BREVE
306
- "E6+67" => "\x01\x1F", # LATIN SMALL LETTER G WITH BREVE
307
- "E6+69" => "\x01\x2D", # LATIN SMALL LETTER I WITH BREVE
308
- "E6+6F" => "\x01\x4F", # LATIN SMALL LETTER O WITH BREVE
309
- "E6+75" => "\x01\x6D", # LATIN SMALL LETTER U WITH BREVE
310
- "E6+E0+41" => "\x1E\xB2", # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
311
- "E6+E0+61" => "\x1E\xB3", # LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
312
- "E6+E1+41" => "\x1E\xB0", # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
313
- "E6+E1+61" => "\x1E\xB1", # LATIN SMALL LETTER A WITH BREVE AND GRAVE
314
- "E6+E2+41" => "\x1E\xAE", # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
315
- "E6+E2+61" => "\x1E\xAF", # LATIN SMALL LETTER A WITH BREVE AND ACUTE
316
- "E6+E4+41" => "\x1E\xB4", # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
317
- "E6+E4+61" => "\x1E\xB5", # LATIN SMALL LETTER A WITH BREVE AND TILDE
318
- "E6+F0+45" => "\x1E\x1C", # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
319
- "E6+F0+65" => "\x1E\x1D", # LATIN SMALL LETTER E WITH CEDILLA AND BREVE
320
- "E6+F2+41" => "\x1E\xB6", # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
321
- "E6+F2+61" => "\x1E\xB7", # LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
322
- "E6" => "\x03\x06", # COMBINING BREVE
323
- "E7+42" => "\x1E\x02", # LATIN CAPITAL LETTER B WITH DOT ABOVE
324
- "E7+43" => "\x01\x0A", # LATIN CAPITAL LETTER C WITH DOT ABOVE
325
- "E7+44" => "\x1E\x0A", # LATIN CAPITAL LETTER D WITH DOT ABOVE
326
- "E7+45" => "\x01\x16", # LATIN CAPITAL LETTER E WITH DOT ABOVE
327
- "E7+46" => "\x1E\x1E", # LATIN CAPITAL LETTER F WITH DOT ABOVE
328
- "E7+47" => "\x01\x20", # LATIN CAPITAL LETTER G WITH DOT ABOVE
329
- "E7+48" => "\x1E\x22", # LATIN CAPITAL LETTER H WITH DOT ABOVE
330
- "E7+49" => "\x01\x30", # LATIN CAPITAL LETTER I WITH DOT ABOVE
331
- "E7+4D" => "\x1E\x40", # LATIN CAPITAL LETTER M WITH DOT ABOVE
332
- "E7+4E" => "\x1E\x44", # LATIN CAPITAL LETTER N WITH DOT ABOVE
333
- "E7+50" => "\x1E\x56", # LATIN CAPITAL LETTER P WITH DOT ABOVE
334
- "E7+52" => "\x1E\x58", # LATIN CAPITAL LETTER R WITH DOT ABOVE
335
- "E7+53" => "\x1E\x60", # LATIN CAPITAL LETTER S WITH DOT ABOVE
336
- "E7+54" => "\x1E\x6A", # LATIN CAPITAL LETTER T WITH DOT ABOVE
337
- "E7+57" => "\x1E\x86", # LATIN CAPITAL LETTER W WITH DOT ABOVE
338
- "E7+58" => "\x1E\x8A", # LATIN CAPITAL LETTER X WITH DOT ABOVE
339
- "E7+59" => "\x1E\x8E", # LATIN CAPITAL LETTER Y WITH DOT ABOVE
340
- "E7+5A" => "\x01\x7B", # LATIN CAPITAL LETTER Z WITH DOT ABOVE
341
- "E7+62" => "\x1E\x03", # LATIN SMALL LETTER B WITH DOT ABOVE
342
- "E7+63" => "\x01\x0B", # LATIN SMALL LETTER C WITH DOT ABOVE
343
- "E7+64" => "\x1E\x0B", # LATIN SMALL LETTER D WITH DOT ABOVE
344
- "E7+65" => "\x01\x17", # LATIN SMALL LETTER E WITH DOT ABOVE
345
- "E7+66" => "\x1E\x1F", # LATIN SMALL LETTER F WITH DOT ABOVE
346
- "E7+67" => "\x01\x21", # LATIN SMALL LETTER G WITH DOT ABOVE
347
- "E7+68" => "\x1E\x23", # LATIN SMALL LETTER H WITH DOT ABOVE
348
- "E7+6D" => "\x1E\x41", # LATIN SMALL LETTER M WITH DOT ABOVE
349
- "E7+6E" => "\x1E\x45", # LATIN SMALL LETTER N WITH DOT ABOVE
350
- "E7+70" => "\x1E\x57", # LATIN SMALL LETTER P WITH DOT ABOVE
351
- "E7+72" => "\x1E\x59", # LATIN SMALL LETTER R WITH DOT ABOVE
352
- "E7+73" => "\x1E\x61", # LATIN SMALL LETTER S WITH DOT ABOVE
353
- "E7+74" => "\x1E\x6B", # LATIN SMALL LETTER T WITH DOT ABOVE
354
- "E7+77" => "\x1E\x87", # LATIN SMALL LETTER W WITH DOT ABOVE
355
- "E7+78" => "\x1E\x8B", # LATIN SMALL LETTER X WITH DOT ABOVE
356
- "E7+79" => "\x1E\x8F", # LATIN SMALL LETTER Y WITH DOT ABOVE
357
- "E7+7A" => "\x01\x7C", # LATIN SMALL LETTER Z WITH DOT ABOVE
358
- "E7+E2+53" => "\x1E\x64", # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
359
- "E7+E2+73" => "\x1E\x65", # LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE
360
- "E7+E5+41" => "\x01\xE0", # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
361
- "E7+E5+61" => "\x01\xE1", # LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
362
- "E7+E9+53" => "\x1E\x66", # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
363
- "E7+E9+73" => "\x1E\x67", # LATIN SMALL LETTER S WITH CARON AND DOT ABOVE
364
- "E7+F2+53" => "\x1E\x68", # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
365
- "E7+F2+73" => "\x1E\x69", # LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
366
- "E7" => "\x03\x07", # COMBINING DOT ABOVE
367
- "E8+41" => "\x00\xC4", # LATIN CAPITAL LETTER A WITH DIAERESIS
368
- "E8+45" => "\x00\xCB", # LATIN CAPITAL LETTER E WITH DIAERESIS
369
- "E8+48" => "\x1E\x26", # LATIN CAPITAL LETTER H WITH DIAERESIS
370
- "E8+49" => "\x00\xCF", # LATIN CAPITAL LETTER I WITH DIAERESIS
371
- "E8+4F" => "\x00\xD6", # LATIN CAPITAL LETTER O WITH DIAERESIS
372
- "E8+55" => "\x00\xDC", # LATIN CAPITAL LETTER U WITH DIAERESIS
373
- "E8+57" => "\x1E\x84", # LATIN CAPITAL LETTER W WITH DIAERESIS
374
- "E8+58" => "\x1E\x8C", # LATIN CAPITAL LETTER X WITH DIAERESIS
375
- "E8+59" => "\x01\x78", # LATIN CAPITAL LETTER Y WITH DIAERESIS
376
- "E8+61" => "\x00\xE4", # LATIN SMALL LETTER A WITH DIAERESIS
377
- "E8+65" => "\x00\xEB", # LATIN SMALL LETTER E WITH DIAERESIS
378
- "E8+68" => "\x1E\x27", # LATIN SMALL LETTER H WITH DIAERESIS
379
- "E8+69" => "\x00\xEF", # LATIN SMALL LETTER I WITH DIAERESIS
380
- "E8+6F" => "\x00\xF6", # LATIN SMALL LETTER O WITH DIAERESIS
381
- "E8+74" => "\x1E\x97", # LATIN SMALL LETTER T WITH DIAERESIS
382
- "E8+75" => "\x00\xFC", # LATIN SMALL LETTER U WITH DIAERESIS
383
- "E8+77" => "\x1E\x85", # LATIN SMALL LETTER W WITH DIAERESIS
384
- "E8+78" => "\x1E\x8D", # LATIN SMALL LETTER X WITH DIAERESIS
385
- "E8+79" => "\x00\xFF", # LATIN SMALL LETTER Y WITH DIAERESIS
386
- "E8+E1+55" => "\x01\xDB", # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
387
- "E8+E1+75" => "\x01\xDC", # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
388
- "E8+E2+49" => "\x1E\x2E", # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
389
- "E8+E2+55" => "\x01\xD7", # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
390
- "E8+E2+69" => "\x1E\x2F", # LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
391
- "E8+E2+75" => "\x01\xD8", # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
392
- "E8+E4+4F" => "\x1E\x4E", # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
393
- "E8+E4+6F" => "\x1E\x4F", # LATIN SMALL LETTER O WITH TILDE AND DIAERESIS
394
- "E8+E5+41" => "\x01\xDE", # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
395
- "E8+E5+55" => "\x1E\x7A", # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
396
- "E8+E5+61" => "\x01\xDF", # LATIN SMALL LETTER A WITH DIAERESIS AND MACRON
397
- "E8+E5+75" => "\x1E\x7B", # LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
398
- "E8+E9+55" => "\x01\xD9", # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
399
- "E8+E9+75" => "\x01\xDA", # LATIN SMALL LETTER U WITH DIAERESIS AND CARON
400
- "E8" => "\x03\x08", # COMBINING DIAERESIS
401
- "E9+41" => "\x01\xCD", # LATIN CAPITAL LETTER A WITH CARON
402
- "E9+43" => "\x01\x0C", # LATIN CAPITAL LETTER C WITH CARON
403
- "E9+44" => "\x01\x0E", # LATIN CAPITAL LETTER D WITH CARON
404
- "E9+45" => "\x01\x1A", # LATIN CAPITAL LETTER E WITH CARON
405
- "E9+47" => "\x01\xE6", # LATIN CAPITAL LETTER G WITH CARON
406
- "E9+49" => "\x01\xCF", # LATIN CAPITAL LETTER I WITH CARON
407
- "E9+4B" => "\x01\xE8", # LATIN CAPITAL LETTER K WITH CARON
408
- "E9+4C" => "\x01\x3D", # LATIN CAPITAL LETTER L WITH CARON
409
- "E9+4E" => "\x01\x47", # LATIN CAPITAL LETTER N WITH CARON
410
- "E9+4F" => "\x01\xD1", # LATIN CAPITAL LETTER O WITH CARON
411
- "E9+52" => "\x01\x58", # LATIN CAPITAL LETTER R WITH CARON
412
- "E9+53" => "\x01\x60", # LATIN CAPITAL LETTER S WITH CARON
413
- "E9+54" => "\x01\x64", # LATIN CAPITAL LETTER T WITH CARON
414
- "E9+55" => "\x01\xD3", # LATIN CAPITAL LETTER U WITH CARON
415
- "E9+5A" => "\x01\x7D", # LATIN CAPITAL LETTER Z WITH CARON
416
- "E9+61" => "\x01\xCE", # LATIN SMALL LETTER A WITH CARON
417
- "E9+63" => "\x01\x0D", # LATIN SMALL LETTER C WITH CARON
418
- "E9+64" => "\x01\x0F", # LATIN SMALL LETTER D WITH CARON
419
- "E9+65" => "\x01\x1B", # LATIN SMALL LETTER E WITH CARON
420
- "E9+67" => "\x01\xE7", # LATIN SMALL LETTER G WITH CARON
421
- "E9+69" => "\x01\xD0", # LATIN SMALL LETTER I WITH CARON
422
- "E9+6A" => "\x01\xF0", # LATIN SMALL LETTER J WITH CARON
423
- "E9+6B" => "\x01\xE9", # LATIN SMALL LETTER K WITH CARON
424
- "E9+6C" => "\x01\x3E", # LATIN SMALL LETTER L WITH CARON
425
- "E9+6E" => "\x01\x48", # LATIN SMALL LETTER N WITH CARON
426
- "E9+6F" => "\x01\xD2", # LATIN SMALL LETTER O WITH CARON
427
- "E9+72" => "\x01\x59", # LATIN SMALL LETTER R WITH CARON
428
- "E9+73" => "\x01\x61", # LATIN SMALL LETTER S WITH CARON
429
- "E9+74" => "\x01\x65", # LATIN SMALL LETTER T WITH CARON
430
- "E9+75" => "\x01\xD4", # LATIN SMALL LETTER U WITH CARON
431
- "E9+7A" => "\x01\x7E", # LATIN SMALL LETTER Z WITH CARON
432
- "E9+E7+53" => "\x1E\x66", # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
433
- "E9+E7+73" => "\x1E\x67", # LATIN SMALL LETTER S WITH CARON AND DOT ABOVE
434
- "E9+E8+55" => "\x01\xD9", # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
435
- "E9+E8+75" => "\x01\xDA", # LATIN SMALL LETTER U WITH DIAERESIS AND CARON
436
- "E9" => "\x03\x0C", # COMBINING CARON
437
- "EA+41" => "\x00\xC5", # LATIN CAPITAL LETTER A WITH RING ABOVE
438
- "EA+55" => "\x01\x6E", # LATIN CAPITAL LETTER U WITH RING ABOVE
439
- "EA+61" => "\x00\xE5", # LATIN SMALL LETTER A WITH RING ABOVE
440
- "EA+75" => "\x01\x6F", # LATIN SMALL LETTER U WITH RING ABOVE
441
- "EA+77" => "\x1E\x98", # LATIN SMALL LETTER W WITH RING ABOVE
442
- "EA+79" => "\x1E\x99", # LATIN SMALL LETTER Y WITH RING ABOVE
443
- "EA+E2+41" => "\x01\xFA", # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
444
- "EA+E2+61" => "\x01\xFB", # LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE
445
- "EA" => "\x03\x0A", # COMBINING RING ABOVE
446
- "EB" => "\xFE\x20", # COMBINING LIGATURE LEFT HALF
447
- "EC" => "\xFE\x21", # COMBINING LIGATURE RIGHT HALF
448
- "ED" => "\x03\x15", # COMBINING COMMA ABOVE RIGHT
449
- "EE+4F" => "\x01\x50", # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
450
- "EE+55" => "\x01\x70", # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
451
- "EE+6F" => "\x01\x51", # LATIN SMALL LETTER O WITH DOUBLE ACUTE
452
- "EE+75" => "\x01\x71", # LATIN SMALL LETTER U WITH DOUBLE ACUTE
453
- "EE" => "\x03\x0B", # COMBINING DOUBLE ACUTE ACCENT
454
- "EF" => "\x03\x10", # COMBINING CANDRABINDU
455
- "F0+43" => "\x00\xC7", # LATIN CAPITAL LETTER C WITH CEDILLA
456
- "F0+44" => "\x1E\x10", # LATIN CAPITAL LETTER D WITH CEDILLA
457
- "F0+47" => "\x01\x22", # LATIN CAPITAL LETTER G WITH CEDILLA
458
- "F0+48" => "\x1E\x28", # LATIN CAPITAL LETTER H WITH CEDILLA
459
- "F0+4B" => "\x01\x36", # LATIN CAPITAL LETTER K WITH CEDILLA
460
- "F0+4C" => "\x01\x3B", # LATIN CAPITAL LETTER L WITH CEDILLA
461
- "F0+4E" => "\x01\x45", # LATIN CAPITAL LETTER N WITH CEDILLA
462
- "F0+52" => "\x01\x56", # LATIN CAPITAL LETTER R WITH CEDILLA
463
- "F0+53" => "\x01\x5E", # LATIN CAPITAL LETTER S WITH CEDILLA
464
- "F0+54" => "\x01\x62", # LATIN CAPITAL LETTER T WITH CEDILLA
465
- "F0+63" => "\x00\xE7", # LATIN SMALL LETTER C WITH CEDILLA
466
- "F0+64" => "\x1E\x11", # LATIN SMALL LETTER D WITH CEDILLA
467
- "F0+67" => "\x01\x23", # LATIN SMALL LETTER G WITH CEDILLA
468
- "F0+68" => "\x1E\x29", # LATIN SMALL LETTER H WITH CEDILLA
469
- "F0+6B" => "\x01\x37", # LATIN SMALL LETTER K WITH CEDILLA
470
- "F0+6C" => "\x01\x3C", # LATIN SMALL LETTER L WITH CEDILLA
471
- "F0+6E" => "\x01\x46", # LATIN SMALL LETTER N WITH CEDILLA
472
- "F0+72" => "\x01\x57", # LATIN SMALL LETTER R WITH CEDILLA
473
- "F0+73" => "\x01\x5F", # LATIN SMALL LETTER S WITH CEDILLA
474
- "F0+74" => "\x01\x63", # LATIN SMALL LETTER T WITH CEDILLA
475
- "F0+E2+43" => "\x1E\x08", # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
476
- "F0+E2+63" => "\x1E\x09", # LATIN SMALL LETTER C WITH CEDILLA AND ACUTE
477
- "F0+E6+45" => "\x1E\x1C", # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
478
- "F0+E6+65" => "\x1E\x1D", # LATIN SMALL LETTER E WITH CEDILLA AND BREVE
479
- "F0" => "\x03\x27", # COMBINING CEDILLA
480
- "F1+41" => "\x01\x04", # LATIN CAPITAL LETTER A WITH OGONEK
481
- "F1+45" => "\x01\x18", # LATIN CAPITAL LETTER E WITH OGONEK
482
- "F1+49" => "\x01\x2E", # LATIN CAPITAL LETTER I WITH OGONEK
483
- "F1+4F" => "\x01\xEA", # LATIN CAPITAL LETTER O WITH OGONEK
484
- "F1+55" => "\x01\x72", # LATIN CAPITAL LETTER U WITH OGONEK
485
- "F1+61" => "\x01\x05", # LATIN SMALL LETTER A WITH OGONEK
486
- "F1+65" => "\x01\x19", # LATIN SMALL LETTER E WITH OGONEK
487
- "F1+69" => "\x01\x2F", # LATIN SMALL LETTER I WITH OGONEK
488
- "F1+6F" => "\x01\xEB", # LATIN SMALL LETTER O WITH OGONEK
489
- "F1+75" => "\x01\x73", # LATIN SMALL LETTER U WITH OGONEK
490
- "F1+E5+4F" => "\x01\xEC", # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
491
- "F1+E5+6F" => "\x01\xED", # LATIN SMALL LETTER O WITH OGONEK AND MACRON
492
- "F1" => "\x03\x28", # COMBINING OGONEK
493
- "F2+41" => "\x1E\xA0", # LATIN CAPITAL LETTER A WITH DOT BELOW
494
- "F2+42" => "\x1E\x04", # LATIN CAPITAL LETTER B WITH DOT BELOW
495
- "F2+44" => "\x1E\x0C", # LATIN CAPITAL LETTER D WITH DOT BELOW
496
- "F2+45" => "\x1E\xB8", # LATIN CAPITAL LETTER E WITH DOT BELOW
497
- "F2+48" => "\x1E\x24", # LATIN CAPITAL LETTER H WITH DOT BELOW
498
- "F2+49" => "\x1E\xCA", # LATIN CAPITAL LETTER I WITH DOT BELOW
499
- "F2+4B" => "\x1E\x32", # LATIN CAPITAL LETTER K WITH DOT BELOW
500
- "F2+4C" => "\x1E\x36", # LATIN CAPITAL LETTER L WITH DOT BELOW
501
- "F2+4D" => "\x1E\x42", # LATIN CAPITAL LETTER M WITH DOT BELOW
502
- "F2+4E" => "\x1E\x46", # LATIN CAPITAL LETTER N WITH DOT BELOW
503
- "F2+4F" => "\x1E\xCC", # LATIN CAPITAL LETTER O WITH DOT BELOW
504
- "F2+52" => "\x1E\x5A", # LATIN CAPITAL LETTER R WITH DOT BELOW
505
- "F2+53" => "\x1E\x62", # LATIN CAPITAL LETTER S WITH DOT BELOW
506
- "F2+54" => "\x1E\x6C", # LATIN CAPITAL LETTER T WITH DOT BELOW
507
- "F2+55" => "\x1E\xE4", # LATIN CAPITAL LETTER U WITH DOT BELOW
508
- "F2+56" => "\x1E\x7E", # LATIN CAPITAL LETTER V WITH DOT BELOW
509
- "F2+57" => "\x1E\x88", # LATIN CAPITAL LETTER W WITH DOT BELOW
510
- "F2+59" => "\x1E\xF4", # LATIN CAPITAL LETTER Y WITH DOT BELOW
511
- "F2+5A" => "\x1E\x92", # LATIN CAPITAL LETTER Z WITH DOT BELOW
512
- "F2+61" => "\x1E\xA1", # LATIN SMALL LETTER A WITH DOT BELOW
513
- "F2+62" => "\x1E\x05", # LATIN SMALL LETTER B WITH DOT BELOW
514
- "F2+64" => "\x1E\x0D", # LATIN SMALL LETTER D WITH DOT BELOW
515
- "F2+65" => "\x1E\xB9", # LATIN SMALL LETTER E WITH DOT BELOW
516
- "F2+68" => "\x1E\x25", # LATIN SMALL LETTER H WITH DOT BELOW
517
- "F2+69" => "\x1E\xCB", # LATIN SMALL LETTER I WITH DOT BELOW
518
- "F2+6B" => "\x1E\x33", # LATIN SMALL LETTER K WITH DOT BELOW
519
- "F2+6C" => "\x1E\x37", # LATIN SMALL LETTER L WITH DOT BELOW
520
- "F2+6D" => "\x1E\x43", # LATIN SMALL LETTER M WITH DOT BELOW
521
- "F2+6E" => "\x1E\x47", # LATIN SMALL LETTER N WITH DOT BELOW
522
- "F2+6F" => "\x1E\xCD", # LATIN SMALL LETTER O WITH DOT BELOW
523
- "F2+72" => "\x1E\x5B", # LATIN SMALL LETTER R WITH DOT BELOW
524
- "F2+73" => "\x1E\x63", # LATIN SMALL LETTER S WITH DOT BELOW
525
- "F2+74" => "\x1E\x6D", # LATIN SMALL LETTER T WITH DOT BELOW
526
- "F2+75" => "\x1E\xE5", # LATIN SMALL LETTER U WITH DOT BELOW
527
- "F2+76" => "\x1E\x7F", # LATIN SMALL LETTER V WITH DOT BELOW
528
- "F2+77" => "\x1E\x89", # LATIN SMALL LETTER W WITH DOT BELOW
529
- "F2+79" => "\x1E\xF5", # LATIN SMALL LETTER Y WITH DOT BELOW
530
- "F2+7A" => "\x1E\x93", # LATIN SMALL LETTER Z WITH DOT BELOW
531
- "F2+E3+41" => "\x1E\xAC", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
532
- "F2+E3+45" => "\x1E\xC6", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
533
- "F2+E3+4F" => "\x1E\xD8", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
534
- "F2+E3+61" => "\x1E\xAD", # LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
535
- "F2+E3+65" => "\x1E\xC7", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
536
- "F2+E3+6F" => "\x1E\xD9", # LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
537
- "F2+E5+4C" => "\x1E\x38", # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
538
- "F2+E5+52" => "\x1E\x5C", # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
539
- "F2+E5+6C" => "\x1E\x39", # LATIN SMALL LETTER L WITH DOT BELOW AND MACRON
540
- "F2+E5+72" => "\x1E\x5D", # LATIN SMALL LETTER R WITH DOT BELOW AND MACRON
541
- "F2+E6+41" => "\x1E\xB6", # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
542
- "F2+E6+61" => "\x1E\xB7", # LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
543
- "F2+E7+53" => "\x1E\x68", # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
544
- "F2+E7+73" => "\x1E\x69", # LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
545
- "F2" => "\x03\x23", # COMBINING DOT BELOW
546
- "F3+55" => "\x1E\x72", # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
547
- "F3+75" => "\x1E\x73", # LATIN SMALL LETTER U WITH DIAERESIS BELOW
548
- "F3" => "\x03\x24", # COMBINING DIAERESIS BELOW
549
- "F4+41" => "\x1E\x00", # LATIN CAPITAL LETTER A WITH RING BELOW
550
- "F4+61" => "\x1E\x01", # LATIN SMALL LETTER A WITH RING BELOW
551
- "F4" => "\x03\x25", # COMBINING RING BELOW
552
- "F5" => "\x03\x33", # COMBINING DOUBLE LOW LINE
553
- "F6+42" => "\x1E\x06", # LATIN CAPITAL LETTER B WITH LINE BELOW
554
- "F6+44" => "\x1E\x0E", # LATIN CAPITAL LETTER D WITH LINE BELOW
555
- "F6+4B" => "\x1E\x34", # LATIN CAPITAL LETTER K WITH LINE BELOW
556
- "F6+4C" => "\x1E\x3A", # LATIN CAPITAL LETTER L WITH LINE BELOW
557
- "F6+4E" => "\x1E\x48", # LATIN CAPITAL LETTER N WITH LINE BELOW
558
- "F6+52" => "\x1E\x5E", # LATIN CAPITAL LETTER R WITH LINE BELOW
559
- "F6+54" => "\x1E\x6E", # LATIN CAPITAL LETTER T WITH LINE BELOW
560
- "F6+5A" => "\x1E\x94", # LATIN CAPITAL LETTER Z WITH LINE BELOW
561
- "F6+62" => "\x1E\x07", # LATIN SMALL LETTER B WITH LINE BELOW
562
- "F6+64" => "\x1E\x0F", # LATIN SMALL LETTER D WITH LINE BELOW
563
- "F6+68" => "\x1E\x96", # LATIN SMALL LETTER H WITH LINE BELOW
564
- "F6+6B" => "\x1E\x35", # LATIN SMALL LETTER K WITH LINE BELOW
565
- "F6+6C" => "\x1E\x3B", # LATIN SMALL LETTER L WITH LINE BELOW
566
- "F6+6E" => "\x1E\x49", # LATIN SMALL LETTER N WITH LINE BELOW
567
- "F6+72" => "\x1E\x5F", # LATIN SMALL LETTER R WITH LINE BELOW
568
- "F6+74" => "\x1E\x6F", # LATIN SMALL LETTER T WITH LINE BELOW
569
- "F6+7A" => "\x1E\x95", # LATIN SMALL LETTER Z WITH LINE BELOW
570
- "F6" => "\x03\x32", # COMBINING LOW LINE
571
- "F7" => "\x03\x26", # COMBINING COMMA BELOW
572
- "F8" => "\x03\x21", # COMBINING OGONEK
573
- "F9+48" => "\x1E\x2A", # LATIN CAPITAL LETTER H WITH BREVE BELOW
574
- "F9+68" => "\x1E\x2B", # LATIN SMALL LETTER H WITH BREVE BELOW
575
- "F9" => "\x03\x2E", # COMBINING BREVE BELOW
576
- "FA" => "\xFE\x22", # COMBINING DOUBLE TILDE LEFT HALF
577
- "FB" => "\xFE\x23" # COMBINING DOUBLE TILDE RIGHT HALF
578
- }
579
-
580
- def initialize(to_charset = 'UTF-8')
581
- @to_charset = to_charset
582
- @ansi_to_utf8 = {}
583
- @ansi_to_utf8.merge!(@@non_combining)
584
- @ansi_to_utf8.merge!(@@combining)
585
- end
586
-
587
- def iconv(string)
588
- output = ''
589
- scanner = StringScanner.new(string)
590
- until scanner.eos? do
591
- byte = scanner.get_byte
592
- char = byte.unpack('C')[0]
593
-
594
- if char <= 0x7F
595
- output << byte
596
- elsif char >= 0x88 && char <= 0xC8
597
- hex_key = char.to_s(16).upcase
598
- output << ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8.has_key?(hex_key) ? @ansi_to_utf8[hex_key] : @ansi_to_utf8['ERR'])
599
- scanner.get_byte # ignore the next byte
600
- elsif char >= 0xE0 && char <= 0xFB
601
- [2, 1, 0].each do |n| # try 3 bytes, then 2 bytes, then 1 byte
602
- bytes = [char.to_s(16).upcase]
603
- scanner.peek(n).each_byte {|b| bytes << b.to_s(16).upcase}
604
- hex_key = bytes.join("+")
605
- if @ansi_to_utf8.has_key?(hex_key)
606
- output << ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8[hex_key])
607
- n.times {scanner.get_byte}
608
- break
609
- end
610
- end
611
- else
612
- output << ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8['ERR'])
613
- scanner.get_byte if scanner.get_byte.unpack('C')[0] >= 0xE0 # ignore the next byte
614
- end
615
- end
616
-
617
- @to_charset == 'UTF-8' ? output : ::Iconv.conv(@to_charset, 'UTF-8', output)
618
- end
619
-
620
- end
621
- end
6
+ require 'ansel_iconv/character_map'
7
+ require 'ansel_iconv/iconv'
8
+ require 'ansel_iconv/converter'