langa 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. data/COPYING +674 -0
  2. data/README +69 -0
  3. data/bin/langa +169 -0
  4. data/examples/afrikaans_1953_utf8.txt +1000 -0
  5. data/examples/albanian_utf8.txt +1000 -0
  6. data/examples/amharic_utf8.txt +1000 -0
  7. data/examples/arabic_svd_utf8.txt +1000 -0
  8. data/examples/armenian_western_1853_utf8.txt +1000 -0
  9. data/examples/asv_utf8.txt +1000 -0
  10. data/examples/basque_1571_utf8.txt +1000 -0
  11. data/examples/breton_utf8.txt +1000 -0
  12. data/examples/chinese_ncv_s_utf8.txt +1000 -0
  13. data/examples/chinese_ncv_utf8.txt +1000 -0
  14. data/examples/chinese_union_s_utf8.txt +1000 -0
  15. data/examples/chinese_union_utf8.txt +1000 -0
  16. data/examples/coptic_nt_utf8.txt +1000 -0
  17. data/examples/croatian_utf8.txt +1000 -0
  18. data/examples/czech_bkr_utf8.txt +1000 -0
  19. data/examples/danish_utf8.txt +1000 -0
  20. data/examples/dutch_svv_utf8.txt +1000 -0
  21. data/examples/esperanto_utf8.txt +1000 -0
  22. data/examples/estonian_utf8.txt +1000 -0
  23. data/examples/finnish_pr_1992_utf8.txt +1000 -0
  24. data/examples/french_ostervald_1996_utf8.txt +1000 -0
  25. data/examples/german_schlachter_1951_utf8.txt +1000 -0
  26. data/examples/greek_byzantine_2000_utf8.txt +1000 -0
  27. data/examples/greek_modern_utf8.txt +1000 -0
  28. data/examples/hebrew_modern_utf8.txt +1000 -0
  29. data/examples/hungarian_karoli_utf8.txt +1000 -0
  30. data/examples/italian_riveduta_1927_utf8.txt +1000 -0
  31. data/examples/kabyle_nt_utf8.txt +1000 -0
  32. data/examples/kjv_apocrypha_utf8.txt +1000 -0
  33. data/examples/korean_utf8.txt +1000 -0
  34. data/examples/latin_vulgata_clementina_utf8.txt +1000 -0
  35. data/examples/latvian_nt_utf8.txt +1000 -0
  36. data/examples/lithuanian_utf8.txt +1000 -0
  37. data/examples/manx_gaelic_utf8.txt +1000 -0
  38. data/examples/maori_utf8.txt +1000 -0
  39. data/examples/myanmar_judson_1835_utf8.txt +1000 -0
  40. data/examples/norwegian_utf8.txt +1000 -0
  41. data/examples/peshitta_utf8.txt +1000 -0
  42. data/examples/portuguese_utf8.txt +1000 -0
  43. data/examples/romani_utf8.txt +1000 -0
  44. data/examples/romanian_cornilescu_utf8.txt +1000 -0
  45. data/examples/russian_makarij_utf8.txt +1000 -0
  46. data/examples/spanish_reina_valera_1909_utf8.txt +1000 -0
  47. data/examples/swedish_1917_utf8.txt +1000 -0
  48. data/examples/tagalog_1905_utf8.txt +1000 -0
  49. data/examples/thai_kjv_utf8.txt +1000 -0
  50. data/examples/turkish_nt_utf8.txt +1000 -0
  51. data/examples/turkish_utf8.txt +1000 -0
  52. data/examples/ukrainian_1871_utf8.txt +1000 -0
  53. data/examples/vietnamese_1934_utf8.txt +1000 -0
  54. data/examples/wolof_utf8.txt +1000 -0
  55. data/examples/xhosa_utf8.txt +1000 -0
  56. data/lib/langa.rb +35 -0
  57. data/lib/langa/dna.rb +209 -0
  58. data/lib/langa/file.rb +97 -0
  59. data/lib/langa/langa.dna +406 -0
  60. data/lib/langa/languageanalyzer.rb +134 -0
  61. data/lib/langa/languages.rb +147 -0
  62. data/lib/langa/randomtestfiles.rb +140 -0
  63. data/lib/langa/utilities.rb +53 -0
  64. data/test/tc_file.rb +47 -0
  65. data/test/tc_languages.rb +69 -0
  66. data/test/tc_utilities.rb +42 -0
  67. data/unicode/CaseFolding.txt +1065 -0
  68. data/unicode/CaseFolding.txt.webloc +8 -0
  69. data/unicode/Index of -Public-MAPPINGS.webloc b/data/unicode/Index of → -Public-MAPPINGS.webloc +0 -0
  70. data/unicode/mappings/8859-1.TXT +303 -0
  71. data/unicode/mappings/8859-10.TXT +303 -0
  72. data/unicode/mappings/8859-11.TXT +297 -0
  73. data/unicode/mappings/8859-13.TXT +299 -0
  74. data/unicode/mappings/8859-14.TXT +301 -0
  75. data/unicode/mappings/8859-15.TXT +303 -0
  76. data/unicode/mappings/8859-16.TXT +299 -0
  77. data/unicode/mappings/8859-2.TXT +303 -0
  78. data/unicode/mappings/8859-3.TXT +296 -0
  79. data/unicode/mappings/8859-4.TXT +303 -0
  80. data/unicode/mappings/8859-5.TXT +303 -0
  81. data/unicode/mappings/8859-6.TXT +260 -0
  82. data/unicode/mappings/8859-7.TXT +308 -0
  83. data/unicode/mappings/8859-8.TXT +270 -0
  84. data/unicode/mappings/8859-9.TXT +307 -0
  85. data/unicode/mappings/ATARIST.TXT +313 -0
  86. data/unicode/mappings/CP037.TXT +275 -0
  87. data/unicode/mappings/CP1006.TXT +302 -0
  88. data/unicode/mappings/CP1026.TXT +275 -0
  89. data/unicode/mappings/CP1250.TXT +274 -0
  90. data/unicode/mappings/CP1251.TXT +274 -0
  91. data/unicode/mappings/CP1252.TXT +274 -0
  92. data/unicode/mappings/CP1253.TXT +274 -0
  93. data/unicode/mappings/CP1254.TXT +274 -0
  94. data/unicode/mappings/CP1255.TXT +274 -0
  95. data/unicode/mappings/CP1256.TXT +274 -0
  96. data/unicode/mappings/CP1257.TXT +274 -0
  97. data/unicode/mappings/CP1258.TXT +274 -0
  98. data/unicode/mappings/CP424.TXT +304 -0
  99. data/unicode/mappings/CP437.TXT +274 -0
  100. data/unicode/mappings/CP500.TXT +275 -0
  101. data/unicode/mappings/CP737.TXT +274 -0
  102. data/unicode/mappings/CP775.TXT +275 -0
  103. data/unicode/mappings/CP850.TXT +274 -0
  104. data/unicode/mappings/CP852.TXT +274 -0
  105. data/unicode/mappings/CP855.TXT +275 -0
  106. data/unicode/mappings/CP856.TXT +303 -0
  107. data/unicode/mappings/CP857.TXT +275 -0
  108. data/unicode/mappings/CP860.TXT +275 -0
  109. data/unicode/mappings/CP861.TXT +275 -0
  110. data/unicode/mappings/CP862.TXT +275 -0
  111. data/unicode/mappings/CP863.TXT +275 -0
  112. data/unicode/mappings/CP864.TXT +275 -0
  113. data/unicode/mappings/CP865.TXT +275 -0
  114. data/unicode/mappings/CP866.TXT +275 -0
  115. data/unicode/mappings/CP869.TXT +275 -0
  116. data/unicode/mappings/CP874.TXT +274 -0
  117. data/unicode/mappings/CP875.TXT +275 -0
  118. data/unicode/mappings/KOI8-R.TXT +302 -0
  119. data/unicode/mappings/NEXTSTEP.TXT +173 -0
  120. data/unicode/mappings/ROMAN.TXT +275 -0
  121. data/unicode/mappings/US-ASCII-QUOTES.TXT +198 -0
  122. metadata +180 -0
@@ -0,0 +1,297 @@
1
+ #
2
+ # Name: ISO/IEC 8859-11:2001 to Unicode
3
+ # Unicode version: 3.2
4
+ # Table version: 1.0
5
+ # Table format: Format A
6
+ # Date: 2002 October 7
7
+ # Authors: Ken Whistler <kenw@sybase.com>
8
+ #
9
+ # Copyright (c) 2002 Unicode, Inc. All Rights reserved.
10
+ #
11
+ # This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
12
+ # No claims are made as to fitness for any particular purpose. No
13
+ # warranties of any kind are expressed or implied. The recipient
14
+ # agrees to determine applicability of information provided. If this
15
+ # file has been provided on optical media by Unicode, Inc., the sole
16
+ # remedy for any claim will be exchange of defective media within 90
17
+ # days of receipt.
18
+ #
19
+ # Unicode, Inc. hereby grants the right to freely use the information
20
+ # supplied in this file in the creation of products supporting the
21
+ # Unicode Standard, and to make copies of this file in any form for
22
+ # internal or external distribution as long as this notice remains
23
+ # attached.
24
+ #
25
+ # General notes:
26
+ #
27
+ # This table contains the data the Unicode Consortium has on how
28
+ # ISO/IEC 8859-11:2001 characters map into Unicode.
29
+ #
30
+ # ISO/IEC 8859-11:2001 is equivalent to TIS 620-2533 (1990) with
31
+ # the addition of 0xA0 NO-BREAK SPACE.
32
+ #
33
+ # Format: Three tab-separated columns
34
+ # Column #1 is the ISO/IEC 8859-11 code (in hex as 0xXX)
35
+ # Column #2 is the Unicode (in hex as 0xXXXX)
36
+ # Column #3 the Unicode name (follows a comment sign, '#')
37
+ #
38
+ # The entries are in ISO/IEC 8859-11 order.
39
+ #
40
+ # Version history:
41
+ # 2002 October 7 Created
42
+ #
43
+ # Updated versions of this file may be found in:
44
+ # <ftp://ftp.unicode.org/Public/MAPPINGS/>
45
+ #
46
+ # For any comments or problems, please use the Unicode
47
+ # web contact form at:
48
+ # http://www.unicode.org/unicode/reporting.html
49
+ #
50
+ 0x00 0x0000 # NULL
51
+ 0x01 0x0001 # START OF HEADING
52
+ 0x02 0x0002 # START OF TEXT
53
+ 0x03 0x0003 # END OF TEXT
54
+ 0x04 0x0004 # END OF TRANSMISSION
55
+ 0x05 0x0005 # ENQUIRY
56
+ 0x06 0x0006 # ACKNOWLEDGE
57
+ 0x07 0x0007 # BELL
58
+ 0x08 0x0008 # BACKSPACE
59
+ 0x09 0x0009 # HORIZONTAL TABULATION
60
+ 0x0A 0x000A # LINE FEED
61
+ 0x0B 0x000B # VERTICAL TABULATION
62
+ 0x0C 0x000C # FORM FEED
63
+ 0x0D 0x000D # CARRIAGE RETURN
64
+ 0x0E 0x000E # SHIFT OUT
65
+ 0x0F 0x000F # SHIFT IN
66
+ 0x10 0x0010 # DATA LINK ESCAPE
67
+ 0x11 0x0011 # DEVICE CONTROL ONE
68
+ 0x12 0x0012 # DEVICE CONTROL TWO
69
+ 0x13 0x0013 # DEVICE CONTROL THREE
70
+ 0x14 0x0014 # DEVICE CONTROL FOUR
71
+ 0x15 0x0015 # NEGATIVE ACKNOWLEDGE
72
+ 0x16 0x0016 # SYNCHRONOUS IDLE
73
+ 0x17 0x0017 # END OF TRANSMISSION BLOCK
74
+ 0x18 0x0018 # CANCEL
75
+ 0x19 0x0019 # END OF MEDIUM
76
+ 0x1A 0x001A # SUBSTITUTE
77
+ 0x1B 0x001B # ESCAPE
78
+ 0x1C 0x001C # FILE SEPARATOR
79
+ 0x1D 0x001D # GROUP SEPARATOR
80
+ 0x1E 0x001E # RECORD SEPARATOR
81
+ 0x1F 0x001F # UNIT SEPARATOR
82
+ 0x20 0x0020 # SPACE
83
+ 0x21 0x0021 # EXCLAMATION MARK
84
+ 0x22 0x0022 # QUOTATION MARK
85
+ 0x23 0x0023 # NUMBER SIGN
86
+ 0x24 0x0024 # DOLLAR SIGN
87
+ 0x25 0x0025 # PERCENT SIGN
88
+ 0x26 0x0026 # AMPERSAND
89
+ 0x27 0x0027 # APOSTROPHE
90
+ 0x28 0x0028 # LEFT PARENTHESIS
91
+ 0x29 0x0029 # RIGHT PARENTHESIS
92
+ 0x2A 0x002A # ASTERISK
93
+ 0x2B 0x002B # PLUS SIGN
94
+ 0x2C 0x002C # COMMA
95
+ 0x2D 0x002D # HYPHEN-MINUS
96
+ 0x2E 0x002E # FULL STOP
97
+ 0x2F 0x002F # SOLIDUS
98
+ 0x30 0x0030 # DIGIT ZERO
99
+ 0x31 0x0031 # DIGIT ONE
100
+ 0x32 0x0032 # DIGIT TWO
101
+ 0x33 0x0033 # DIGIT THREE
102
+ 0x34 0x0034 # DIGIT FOUR
103
+ 0x35 0x0035 # DIGIT FIVE
104
+ 0x36 0x0036 # DIGIT SIX
105
+ 0x37 0x0037 # DIGIT SEVEN
106
+ 0x38 0x0038 # DIGIT EIGHT
107
+ 0x39 0x0039 # DIGIT NINE
108
+ 0x3A 0x003A # COLON
109
+ 0x3B 0x003B # SEMICOLON
110
+ 0x3C 0x003C # LESS-THAN SIGN
111
+ 0x3D 0x003D # EQUALS SIGN
112
+ 0x3E 0x003E # GREATER-THAN SIGN
113
+ 0x3F 0x003F # QUESTION MARK
114
+ 0x40 0x0040 # COMMERCIAL AT
115
+ 0x41 0x0041 # LATIN CAPITAL LETTER A
116
+ 0x42 0x0042 # LATIN CAPITAL LETTER B
117
+ 0x43 0x0043 # LATIN CAPITAL LETTER C
118
+ 0x44 0x0044 # LATIN CAPITAL LETTER D
119
+ 0x45 0x0045 # LATIN CAPITAL LETTER E
120
+ 0x46 0x0046 # LATIN CAPITAL LETTER F
121
+ 0x47 0x0047 # LATIN CAPITAL LETTER G
122
+ 0x48 0x0048 # LATIN CAPITAL LETTER H
123
+ 0x49 0x0049 # LATIN CAPITAL LETTER I
124
+ 0x4A 0x004A # LATIN CAPITAL LETTER J
125
+ 0x4B 0x004B # LATIN CAPITAL LETTER K
126
+ 0x4C 0x004C # LATIN CAPITAL LETTER L
127
+ 0x4D 0x004D # LATIN CAPITAL LETTER M
128
+ 0x4E 0x004E # LATIN CAPITAL LETTER N
129
+ 0x4F 0x004F # LATIN CAPITAL LETTER O
130
+ 0x50 0x0050 # LATIN CAPITAL LETTER P
131
+ 0x51 0x0051 # LATIN CAPITAL LETTER Q
132
+ 0x52 0x0052 # LATIN CAPITAL LETTER R
133
+ 0x53 0x0053 # LATIN CAPITAL LETTER S
134
+ 0x54 0x0054 # LATIN CAPITAL LETTER T
135
+ 0x55 0x0055 # LATIN CAPITAL LETTER U
136
+ 0x56 0x0056 # LATIN CAPITAL LETTER V
137
+ 0x57 0x0057 # LATIN CAPITAL LETTER W
138
+ 0x58 0x0058 # LATIN CAPITAL LETTER X
139
+ 0x59 0x0059 # LATIN CAPITAL LETTER Y
140
+ 0x5A 0x005A # LATIN CAPITAL LETTER Z
141
+ 0x5B 0x005B # LEFT SQUARE BRACKET
142
+ 0x5C 0x005C # REVERSE SOLIDUS
143
+ 0x5D 0x005D # RIGHT SQUARE BRACKET
144
+ 0x5E 0x005E # CIRCUMFLEX ACCENT
145
+ 0x5F 0x005F # LOW LINE
146
+ 0x60 0x0060 # GRAVE ACCENT
147
+ 0x61 0x0061 # LATIN SMALL LETTER A
148
+ 0x62 0x0062 # LATIN SMALL LETTER B
149
+ 0x63 0x0063 # LATIN SMALL LETTER C
150
+ 0x64 0x0064 # LATIN SMALL LETTER D
151
+ 0x65 0x0065 # LATIN SMALL LETTER E
152
+ 0x66 0x0066 # LATIN SMALL LETTER F
153
+ 0x67 0x0067 # LATIN SMALL LETTER G
154
+ 0x68 0x0068 # LATIN SMALL LETTER H
155
+ 0x69 0x0069 # LATIN SMALL LETTER I
156
+ 0x6A 0x006A # LATIN SMALL LETTER J
157
+ 0x6B 0x006B # LATIN SMALL LETTER K
158
+ 0x6C 0x006C # LATIN SMALL LETTER L
159
+ 0x6D 0x006D # LATIN SMALL LETTER M
160
+ 0x6E 0x006E # LATIN SMALL LETTER N
161
+ 0x6F 0x006F # LATIN SMALL LETTER O
162
+ 0x70 0x0070 # LATIN SMALL LETTER P
163
+ 0x71 0x0071 # LATIN SMALL LETTER Q
164
+ 0x72 0x0072 # LATIN SMALL LETTER R
165
+ 0x73 0x0073 # LATIN SMALL LETTER S
166
+ 0x74 0x0074 # LATIN SMALL LETTER T
167
+ 0x75 0x0075 # LATIN SMALL LETTER U
168
+ 0x76 0x0076 # LATIN SMALL LETTER V
169
+ 0x77 0x0077 # LATIN SMALL LETTER W
170
+ 0x78 0x0078 # LATIN SMALL LETTER X
171
+ 0x79 0x0079 # LATIN SMALL LETTER Y
172
+ 0x7A 0x007A # LATIN SMALL LETTER Z
173
+ 0x7B 0x007B # LEFT CURLY BRACKET
174
+ 0x7C 0x007C # VERTICAL LINE
175
+ 0x7D 0x007D # RIGHT CURLY BRACKET
176
+ 0x7E 0x007E # TILDE
177
+ 0x7F 0x007F # DELETE
178
+ 0x80 0x0080 # <control>
179
+ 0x81 0x0081 # <control>
180
+ 0x82 0x0082 # <control>
181
+ 0x83 0x0083 # <control>
182
+ 0x84 0x0084 # <control>
183
+ 0x85 0x0085 # <control>
184
+ 0x86 0x0086 # <control>
185
+ 0x87 0x0087 # <control>
186
+ 0x88 0x0088 # <control>
187
+ 0x89 0x0089 # <control>
188
+ 0x8A 0x008A # <control>
189
+ 0x8B 0x008B # <control>
190
+ 0x8C 0x008C # <control>
191
+ 0x8D 0x008D # <control>
192
+ 0x8E 0x008E # <control>
193
+ 0x8F 0x008F # <control>
194
+ 0x90 0x0090 # <control>
195
+ 0x91 0x0091 # <control>
196
+ 0x92 0x0092 # <control>
197
+ 0x93 0x0093 # <control>
198
+ 0x94 0x0094 # <control>
199
+ 0x95 0x0095 # <control>
200
+ 0x96 0x0096 # <control>
201
+ 0x97 0x0097 # <control>
202
+ 0x98 0x0098 # <control>
203
+ 0x99 0x0099 # <control>
204
+ 0x9A 0x009A # <control>
205
+ 0x9B 0x009B # <control>
206
+ 0x9C 0x009C # <control>
207
+ 0x9D 0x009D # <control>
208
+ 0x9E 0x009E # <control>
209
+ 0x9F 0x009F # <control>
210
+ 0xA0 0x00A0 # NO-BREAK SPACE
211
+ 0xA1 0x0E01 # THAI CHARACTER KO KAI
212
+ 0xA2 0x0E02 # THAI CHARACTER KHO KHAI
213
+ 0xA3 0x0E03 # THAI CHARACTER KHO KHUAT
214
+ 0xA4 0x0E04 # THAI CHARACTER KHO KHWAI
215
+ 0xA5 0x0E05 # THAI CHARACTER KHO KHON
216
+ 0xA6 0x0E06 # THAI CHARACTER KHO RAKHANG
217
+ 0xA7 0x0E07 # THAI CHARACTER NGO NGU
218
+ 0xA8 0x0E08 # THAI CHARACTER CHO CHAN
219
+ 0xA9 0x0E09 # THAI CHARACTER CHO CHING
220
+ 0xAA 0x0E0A # THAI CHARACTER CHO CHANG
221
+ 0xAB 0x0E0B # THAI CHARACTER SO SO
222
+ 0xAC 0x0E0C # THAI CHARACTER CHO CHOE
223
+ 0xAD 0x0E0D # THAI CHARACTER YO YING
224
+ 0xAE 0x0E0E # THAI CHARACTER DO CHADA
225
+ 0xAF 0x0E0F # THAI CHARACTER TO PATAK
226
+ 0xB0 0x0E10 # THAI CHARACTER THO THAN
227
+ 0xB1 0x0E11 # THAI CHARACTER THO NANGMONTHO
228
+ 0xB2 0x0E12 # THAI CHARACTER THO PHUTHAO
229
+ 0xB3 0x0E13 # THAI CHARACTER NO NEN
230
+ 0xB4 0x0E14 # THAI CHARACTER DO DEK
231
+ 0xB5 0x0E15 # THAI CHARACTER TO TAO
232
+ 0xB6 0x0E16 # THAI CHARACTER THO THUNG
233
+ 0xB7 0x0E17 # THAI CHARACTER THO THAHAN
234
+ 0xB8 0x0E18 # THAI CHARACTER THO THONG
235
+ 0xB9 0x0E19 # THAI CHARACTER NO NU
236
+ 0xBA 0x0E1A # THAI CHARACTER BO BAIMAI
237
+ 0xBB 0x0E1B # THAI CHARACTER PO PLA
238
+ 0xBC 0x0E1C # THAI CHARACTER PHO PHUNG
239
+ 0xBD 0x0E1D # THAI CHARACTER FO FA
240
+ 0xBE 0x0E1E # THAI CHARACTER PHO PHAN
241
+ 0xBF 0x0E1F # THAI CHARACTER FO FAN
242
+ 0xC0 0x0E20 # THAI CHARACTER PHO SAMPHAO
243
+ 0xC1 0x0E21 # THAI CHARACTER MO MA
244
+ 0xC2 0x0E22 # THAI CHARACTER YO YAK
245
+ 0xC3 0x0E23 # THAI CHARACTER RO RUA
246
+ 0xC4 0x0E24 # THAI CHARACTER RU
247
+ 0xC5 0x0E25 # THAI CHARACTER LO LING
248
+ 0xC6 0x0E26 # THAI CHARACTER LU
249
+ 0xC7 0x0E27 # THAI CHARACTER WO WAEN
250
+ 0xC8 0x0E28 # THAI CHARACTER SO SALA
251
+ 0xC9 0x0E29 # THAI CHARACTER SO RUSI
252
+ 0xCA 0x0E2A # THAI CHARACTER SO SUA
253
+ 0xCB 0x0E2B # THAI CHARACTER HO HIP
254
+ 0xCC 0x0E2C # THAI CHARACTER LO CHULA
255
+ 0xCD 0x0E2D # THAI CHARACTER O ANG
256
+ 0xCE 0x0E2E # THAI CHARACTER HO NOKHUK
257
+ 0xCF 0x0E2F # THAI CHARACTER PAIYANNOI
258
+ 0xD0 0x0E30 # THAI CHARACTER SARA A
259
+ 0xD1 0x0E31 # THAI CHARACTER MAI HAN-AKAT
260
+ 0xD2 0x0E32 # THAI CHARACTER SARA AA
261
+ 0xD3 0x0E33 # THAI CHARACTER SARA AM
262
+ 0xD4 0x0E34 # THAI CHARACTER SARA I
263
+ 0xD5 0x0E35 # THAI CHARACTER SARA II
264
+ 0xD6 0x0E36 # THAI CHARACTER SARA UE
265
+ 0xD7 0x0E37 # THAI CHARACTER SARA UEE
266
+ 0xD8 0x0E38 # THAI CHARACTER SARA U
267
+ 0xD9 0x0E39 # THAI CHARACTER SARA UU
268
+ 0xDA 0x0E3A # THAI CHARACTER PHINTHU
269
+ 0xDF 0x0E3F # THAI CURRENCY SYMBOL BAHT
270
+ 0xE0 0x0E40 # THAI CHARACTER SARA E
271
+ 0xE1 0x0E41 # THAI CHARACTER SARA AE
272
+ 0xE2 0x0E42 # THAI CHARACTER SARA O
273
+ 0xE3 0x0E43 # THAI CHARACTER SARA AI MAIMUAN
274
+ 0xE4 0x0E44 # THAI CHARACTER SARA AI MAIMALAI
275
+ 0xE5 0x0E45 # THAI CHARACTER LAKKHANGYAO
276
+ 0xE6 0x0E46 # THAI CHARACTER MAIYAMOK
277
+ 0xE7 0x0E47 # THAI CHARACTER MAITAIKHU
278
+ 0xE8 0x0E48 # THAI CHARACTER MAI EK
279
+ 0xE9 0x0E49 # THAI CHARACTER MAI THO
280
+ 0xEA 0x0E4A # THAI CHARACTER MAI TRI
281
+ 0xEB 0x0E4B # THAI CHARACTER MAI CHATTAWA
282
+ 0xEC 0x0E4C # THAI CHARACTER THANTHAKHAT
283
+ 0xED 0x0E4D # THAI CHARACTER NIKHAHIT
284
+ 0xEE 0x0E4E # THAI CHARACTER YAMAKKAN
285
+ 0xEF 0x0E4F # THAI CHARACTER FONGMAN
286
+ 0xF0 0x0E50 # THAI DIGIT ZERO
287
+ 0xF1 0x0E51 # THAI DIGIT ONE
288
+ 0xF2 0x0E52 # THAI DIGIT TWO
289
+ 0xF3 0x0E53 # THAI DIGIT THREE
290
+ 0xF4 0x0E54 # THAI DIGIT FOUR
291
+ 0xF5 0x0E55 # THAI DIGIT FIVE
292
+ 0xF6 0x0E56 # THAI DIGIT SIX
293
+ 0xF7 0x0E57 # THAI DIGIT SEVEN
294
+ 0xF8 0x0E58 # THAI DIGIT EIGHT
295
+ 0xF9 0x0E59 # THAI DIGIT NINE
296
+ 0xFA 0x0E5A # THAI CHARACTER ANGKHANKHU
297
+ 0xFB 0x0E5B # THAI CHARACTER KHOMUT
@@ -0,0 +1,299 @@
1
+ #
2
+ # Name: ISO/IEC 8859-13:1998 to Unicode
3
+ # Unicode version: 3.0
4
+ # Table version: 1.0
5
+ # Table format: Format A
6
+ # Date: 1999 July 27
7
+ # Authors: Ken Whistler <kenw@sybase.com>
8
+ #
9
+ # Copyright (c) 1998 - 1999 Unicode, Inc. All Rights reserved.
10
+ #
11
+ # This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
12
+ # No claims are made as to fitness for any particular purpose. No
13
+ # warranties of any kind are expressed or implied. The recipient
14
+ # agrees to determine applicability of information provided. If this
15
+ # file has been provided on optical media by Unicode, Inc., the sole
16
+ # remedy for any claim will be exchange of defective media within 90
17
+ # days of receipt.
18
+ #
19
+ # Unicode, Inc. hereby grants the right to freely use the information
20
+ # supplied in this file in the creation of products supporting the
21
+ # Unicode Standard, and to make copies of this file in any form for
22
+ # internal or external distribution as long as this notice remains
23
+ # attached.
24
+ #
25
+ # General notes:
26
+ #
27
+ # This table contains the data the Unicode Consortium has on how
28
+ # ISO/IEC 8859-13:1998 characters map into Unicode.
29
+ #
30
+ # Format: Three tab-separated columns
31
+ # Column #1 is the ISO/IEC 8859-13 code (in hex as 0xXX)
32
+ # Column #2 is the Unicode (in hex as 0xXXXX)
33
+ # Column #3 the Unicode name (follows a comment sign, '#')
34
+ #
35
+ # The entries are in ISO/IEC 8859-13 order.
36
+ #
37
+ # Updated versions of this file may be found in:
38
+ # <ftp://ftp.unicode.org/Public/MAPPINGS/>
39
+ #
40
+ # Any comments or problems, contact <errata@unicode.org>
41
+ # Please note that <errata@unicode.org> is an archival address;
42
+ # notices will be checked, but do not expect an immediate response.
43
+ #
44
+ 0x00 0x0000 # NULL
45
+ 0x01 0x0001 # START OF HEADING
46
+ 0x02 0x0002 # START OF TEXT
47
+ 0x03 0x0003 # END OF TEXT
48
+ 0x04 0x0004 # END OF TRANSMISSION
49
+ 0x05 0x0005 # ENQUIRY
50
+ 0x06 0x0006 # ACKNOWLEDGE
51
+ 0x07 0x0007 # BELL
52
+ 0x08 0x0008 # BACKSPACE
53
+ 0x09 0x0009 # HORIZONTAL TABULATION
54
+ 0x0A 0x000A # LINE FEED
55
+ 0x0B 0x000B # VERTICAL TABULATION
56
+ 0x0C 0x000C # FORM FEED
57
+ 0x0D 0x000D # CARRIAGE RETURN
58
+ 0x0E 0x000E # SHIFT OUT
59
+ 0x0F 0x000F # SHIFT IN
60
+ 0x10 0x0010 # DATA LINK ESCAPE
61
+ 0x11 0x0011 # DEVICE CONTROL ONE
62
+ 0x12 0x0012 # DEVICE CONTROL TWO
63
+ 0x13 0x0013 # DEVICE CONTROL THREE
64
+ 0x14 0x0014 # DEVICE CONTROL FOUR
65
+ 0x15 0x0015 # NEGATIVE ACKNOWLEDGE
66
+ 0x16 0x0016 # SYNCHRONOUS IDLE
67
+ 0x17 0x0017 # END OF TRANSMISSION BLOCK
68
+ 0x18 0x0018 # CANCEL
69
+ 0x19 0x0019 # END OF MEDIUM
70
+ 0x1A 0x001A # SUBSTITUTE
71
+ 0x1B 0x001B # ESCAPE
72
+ 0x1C 0x001C # FILE SEPARATOR
73
+ 0x1D 0x001D # GROUP SEPARATOR
74
+ 0x1E 0x001E # RECORD SEPARATOR
75
+ 0x1F 0x001F # UNIT SEPARATOR
76
+ 0x20 0x0020 # SPACE
77
+ 0x21 0x0021 # EXCLAMATION MARK
78
+ 0x22 0x0022 # QUOTATION MARK
79
+ 0x23 0x0023 # NUMBER SIGN
80
+ 0x24 0x0024 # DOLLAR SIGN
81
+ 0x25 0x0025 # PERCENT SIGN
82
+ 0x26 0x0026 # AMPERSAND
83
+ 0x27 0x0027 # APOSTROPHE
84
+ 0x28 0x0028 # LEFT PARENTHESIS
85
+ 0x29 0x0029 # RIGHT PARENTHESIS
86
+ 0x2A 0x002A # ASTERISK
87
+ 0x2B 0x002B # PLUS SIGN
88
+ 0x2C 0x002C # COMMA
89
+ 0x2D 0x002D # HYPHEN-MINUS
90
+ 0x2E 0x002E # FULL STOP
91
+ 0x2F 0x002F # SOLIDUS
92
+ 0x30 0x0030 # DIGIT ZERO
93
+ 0x31 0x0031 # DIGIT ONE
94
+ 0x32 0x0032 # DIGIT TWO
95
+ 0x33 0x0033 # DIGIT THREE
96
+ 0x34 0x0034 # DIGIT FOUR
97
+ 0x35 0x0035 # DIGIT FIVE
98
+ 0x36 0x0036 # DIGIT SIX
99
+ 0x37 0x0037 # DIGIT SEVEN
100
+ 0x38 0x0038 # DIGIT EIGHT
101
+ 0x39 0x0039 # DIGIT NINE
102
+ 0x3A 0x003A # COLON
103
+ 0x3B 0x003B # SEMICOLON
104
+ 0x3C 0x003C # LESS-THAN SIGN
105
+ 0x3D 0x003D # EQUALS SIGN
106
+ 0x3E 0x003E # GREATER-THAN SIGN
107
+ 0x3F 0x003F # QUESTION MARK
108
+ 0x40 0x0040 # COMMERCIAL AT
109
+ 0x41 0x0041 # LATIN CAPITAL LETTER A
110
+ 0x42 0x0042 # LATIN CAPITAL LETTER B
111
+ 0x43 0x0043 # LATIN CAPITAL LETTER C
112
+ 0x44 0x0044 # LATIN CAPITAL LETTER D
113
+ 0x45 0x0045 # LATIN CAPITAL LETTER E
114
+ 0x46 0x0046 # LATIN CAPITAL LETTER F
115
+ 0x47 0x0047 # LATIN CAPITAL LETTER G
116
+ 0x48 0x0048 # LATIN CAPITAL LETTER H
117
+ 0x49 0x0049 # LATIN CAPITAL LETTER I
118
+ 0x4A 0x004A # LATIN CAPITAL LETTER J
119
+ 0x4B 0x004B # LATIN CAPITAL LETTER K
120
+ 0x4C 0x004C # LATIN CAPITAL LETTER L
121
+ 0x4D 0x004D # LATIN CAPITAL LETTER M
122
+ 0x4E 0x004E # LATIN CAPITAL LETTER N
123
+ 0x4F 0x004F # LATIN CAPITAL LETTER O
124
+ 0x50 0x0050 # LATIN CAPITAL LETTER P
125
+ 0x51 0x0051 # LATIN CAPITAL LETTER Q
126
+ 0x52 0x0052 # LATIN CAPITAL LETTER R
127
+ 0x53 0x0053 # LATIN CAPITAL LETTER S
128
+ 0x54 0x0054 # LATIN CAPITAL LETTER T
129
+ 0x55 0x0055 # LATIN CAPITAL LETTER U
130
+ 0x56 0x0056 # LATIN CAPITAL LETTER V
131
+ 0x57 0x0057 # LATIN CAPITAL LETTER W
132
+ 0x58 0x0058 # LATIN CAPITAL LETTER X
133
+ 0x59 0x0059 # LATIN CAPITAL LETTER Y
134
+ 0x5A 0x005A # LATIN CAPITAL LETTER Z
135
+ 0x5B 0x005B # LEFT SQUARE BRACKET
136
+ 0x5C 0x005C # REVERSE SOLIDUS
137
+ 0x5D 0x005D # RIGHT SQUARE BRACKET
138
+ 0x5E 0x005E # CIRCUMFLEX ACCENT
139
+ 0x5F 0x005F # LOW LINE
140
+ 0x60 0x0060 # GRAVE ACCENT
141
+ 0x61 0x0061 # LATIN SMALL LETTER A
142
+ 0x62 0x0062 # LATIN SMALL LETTER B
143
+ 0x63 0x0063 # LATIN SMALL LETTER C
144
+ 0x64 0x0064 # LATIN SMALL LETTER D
145
+ 0x65 0x0065 # LATIN SMALL LETTER E
146
+ 0x66 0x0066 # LATIN SMALL LETTER F
147
+ 0x67 0x0067 # LATIN SMALL LETTER G
148
+ 0x68 0x0068 # LATIN SMALL LETTER H
149
+ 0x69 0x0069 # LATIN SMALL LETTER I
150
+ 0x6A 0x006A # LATIN SMALL LETTER J
151
+ 0x6B 0x006B # LATIN SMALL LETTER K
152
+ 0x6C 0x006C # LATIN SMALL LETTER L
153
+ 0x6D 0x006D # LATIN SMALL LETTER M
154
+ 0x6E 0x006E # LATIN SMALL LETTER N
155
+ 0x6F 0x006F # LATIN SMALL LETTER O
156
+ 0x70 0x0070 # LATIN SMALL LETTER P
157
+ 0x71 0x0071 # LATIN SMALL LETTER Q
158
+ 0x72 0x0072 # LATIN SMALL LETTER R
159
+ 0x73 0x0073 # LATIN SMALL LETTER S
160
+ 0x74 0x0074 # LATIN SMALL LETTER T
161
+ 0x75 0x0075 # LATIN SMALL LETTER U
162
+ 0x76 0x0076 # LATIN SMALL LETTER V
163
+ 0x77 0x0077 # LATIN SMALL LETTER W
164
+ 0x78 0x0078 # LATIN SMALL LETTER X
165
+ 0x79 0x0079 # LATIN SMALL LETTER Y
166
+ 0x7A 0x007A # LATIN SMALL LETTER Z
167
+ 0x7B 0x007B # LEFT CURLY BRACKET
168
+ 0x7C 0x007C # VERTICAL LINE
169
+ 0x7D 0x007D # RIGHT CURLY BRACKET
170
+ 0x7E 0x007E # TILDE
171
+ 0x7F 0x007F # DELETE
172
+ 0x80 0x0080 # <control>
173
+ 0x81 0x0081 # <control>
174
+ 0x82 0x0082 # <control>
175
+ 0x83 0x0083 # <control>
176
+ 0x84 0x0084 # <control>
177
+ 0x85 0x0085 # <control>
178
+ 0x86 0x0086 # <control>
179
+ 0x87 0x0087 # <control>
180
+ 0x88 0x0088 # <control>
181
+ 0x89 0x0089 # <control>
182
+ 0x8A 0x008A # <control>
183
+ 0x8B 0x008B # <control>
184
+ 0x8C 0x008C # <control>
185
+ 0x8D 0x008D # <control>
186
+ 0x8E 0x008E # <control>
187
+ 0x8F 0x008F # <control>
188
+ 0x90 0x0090 # <control>
189
+ 0x91 0x0091 # <control>
190
+ 0x92 0x0092 # <control>
191
+ 0x93 0x0093 # <control>
192
+ 0x94 0x0094 # <control>
193
+ 0x95 0x0095 # <control>
194
+ 0x96 0x0096 # <control>
195
+ 0x97 0x0097 # <control>
196
+ 0x98 0x0098 # <control>
197
+ 0x99 0x0099 # <control>
198
+ 0x9A 0x009A # <control>
199
+ 0x9B 0x009B # <control>
200
+ 0x9C 0x009C # <control>
201
+ 0x9D 0x009D # <control>
202
+ 0x9E 0x009E # <control>
203
+ 0x9F 0x009F # <control>
204
+ 0xA0 0x00A0 # NO-BREAK SPACE
205
+ 0xA1 0x201D # RIGHT DOUBLE QUOTATION MARK
206
+ 0xA2 0x00A2 # CENT SIGN
207
+ 0xA3 0x00A3 # POUND SIGN
208
+ 0xA4 0x00A4 # CURRENCY SIGN
209
+ 0xA5 0x201E # DOUBLE LOW-9 QUOTATION MARK
210
+ 0xA6 0x00A6 # BROKEN BAR
211
+ 0xA7 0x00A7 # SECTION SIGN
212
+ 0xA8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE
213
+ 0xA9 0x00A9 # COPYRIGHT SIGN
214
+ 0xAA 0x0156 # LATIN CAPITAL LETTER R WITH CEDILLA
215
+ 0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
216
+ 0xAC 0x00AC # NOT SIGN
217
+ 0xAD 0x00AD # SOFT HYPHEN
218
+ 0xAE 0x00AE # REGISTERED SIGN
219
+ 0xAF 0x00C6 # LATIN CAPITAL LETTER AE
220
+ 0xB0 0x00B0 # DEGREE SIGN
221
+ 0xB1 0x00B1 # PLUS-MINUS SIGN
222
+ 0xB2 0x00B2 # SUPERSCRIPT TWO
223
+ 0xB3 0x00B3 # SUPERSCRIPT THREE
224
+ 0xB4 0x201C # LEFT DOUBLE QUOTATION MARK
225
+ 0xB5 0x00B5 # MICRO SIGN
226
+ 0xB6 0x00B6 # PILCROW SIGN
227
+ 0xB7 0x00B7 # MIDDLE DOT
228
+ 0xB8 0x00F8 # LATIN SMALL LETTER O WITH STROKE
229
+ 0xB9 0x00B9 # SUPERSCRIPT ONE
230
+ 0xBA 0x0157 # LATIN SMALL LETTER R WITH CEDILLA
231
+ 0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
232
+ 0xBC 0x00BC # VULGAR FRACTION ONE QUARTER
233
+ 0xBD 0x00BD # VULGAR FRACTION ONE HALF
234
+ 0xBE 0x00BE # VULGAR FRACTION THREE QUARTERS
235
+ 0xBF 0x00E6 # LATIN SMALL LETTER AE
236
+ 0xC0 0x0104 # LATIN CAPITAL LETTER A WITH OGONEK
237
+ 0xC1 0x012E # LATIN CAPITAL LETTER I WITH OGONEK
238
+ 0xC2 0x0100 # LATIN CAPITAL LETTER A WITH MACRON
239
+ 0xC3 0x0106 # LATIN CAPITAL LETTER C WITH ACUTE
240
+ 0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
241
+ 0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE
242
+ 0xC6 0x0118 # LATIN CAPITAL LETTER E WITH OGONEK
243
+ 0xC7 0x0112 # LATIN CAPITAL LETTER E WITH MACRON
244
+ 0xC8 0x010C # LATIN CAPITAL LETTER C WITH CARON
245
+ 0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
246
+ 0xCA 0x0179 # LATIN CAPITAL LETTER Z WITH ACUTE
247
+ 0xCB 0x0116 # LATIN CAPITAL LETTER E WITH DOT ABOVE
248
+ 0xCC 0x0122 # LATIN CAPITAL LETTER G WITH CEDILLA
249
+ 0xCD 0x0136 # LATIN CAPITAL LETTER K WITH CEDILLA
250
+ 0xCE 0x012A # LATIN CAPITAL LETTER I WITH MACRON
251
+ 0xCF 0x013B # LATIN CAPITAL LETTER L WITH CEDILLA
252
+ 0xD0 0x0160 # LATIN CAPITAL LETTER S WITH CARON
253
+ 0xD1 0x0143 # LATIN CAPITAL LETTER N WITH ACUTE
254
+ 0xD2 0x0145 # LATIN CAPITAL LETTER N WITH CEDILLA
255
+ 0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE
256
+ 0xD4 0x014C # LATIN CAPITAL LETTER O WITH MACRON
257
+ 0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE
258
+ 0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
259
+ 0xD7 0x00D7 # MULTIPLICATION SIGN
260
+ 0xD8 0x0172 # LATIN CAPITAL LETTER U WITH OGONEK
261
+ 0xD9 0x0141 # LATIN CAPITAL LETTER L WITH STROKE
262
+ 0xDA 0x015A # LATIN CAPITAL LETTER S WITH ACUTE
263
+ 0xDB 0x016A # LATIN CAPITAL LETTER U WITH MACRON
264
+ 0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
265
+ 0xDD 0x017B # LATIN CAPITAL LETTER Z WITH DOT ABOVE
266
+ 0xDE 0x017D # LATIN CAPITAL LETTER Z WITH CARON
267
+ 0xDF 0x00DF # LATIN SMALL LETTER SHARP S (German)
268
+ 0xE0 0x0105 # LATIN SMALL LETTER A WITH OGONEK
269
+ 0xE1 0x012F # LATIN SMALL LETTER I WITH OGONEK
270
+ 0xE2 0x0101 # LATIN SMALL LETTER A WITH MACRON
271
+ 0xE3 0x0107 # LATIN SMALL LETTER C WITH ACUTE
272
+ 0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
273
+ 0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE
274
+ 0xE6 0x0119 # LATIN SMALL LETTER E WITH OGONEK
275
+ 0xE7 0x0113 # LATIN SMALL LETTER E WITH MACRON
276
+ 0xE8 0x010D # LATIN SMALL LETTER C WITH CARON
277
+ 0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
278
+ 0xEA 0x017A # LATIN SMALL LETTER Z WITH ACUTE
279
+ 0xEB 0x0117 # LATIN SMALL LETTER E WITH DOT ABOVE
280
+ 0xEC 0x0123 # LATIN SMALL LETTER G WITH CEDILLA
281
+ 0xED 0x0137 # LATIN SMALL LETTER K WITH CEDILLA
282
+ 0xEE 0x012B # LATIN SMALL LETTER I WITH MACRON
283
+ 0xEF 0x013C # LATIN SMALL LETTER L WITH CEDILLA
284
+ 0xF0 0x0161 # LATIN SMALL LETTER S WITH CARON
285
+ 0xF1 0x0144 # LATIN SMALL LETTER N WITH ACUTE
286
+ 0xF2 0x0146 # LATIN SMALL LETTER N WITH CEDILLA
287
+ 0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE
288
+ 0xF4 0x014D # LATIN SMALL LETTER O WITH MACRON
289
+ 0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE
290
+ 0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
291
+ 0xF7 0x00F7 # DIVISION SIGN
292
+ 0xF8 0x0173 # LATIN SMALL LETTER U WITH OGONEK
293
+ 0xF9 0x0142 # LATIN SMALL LETTER L WITH STROKE
294
+ 0xFA 0x015B # LATIN SMALL LETTER S WITH ACUTE
295
+ 0xFB 0x016B # LATIN SMALL LETTER U WITH MACRON
296
+ 0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
297
+ 0xFD 0x017C # LATIN SMALL LETTER Z WITH DOT ABOVE
298
+ 0xFE 0x017E # LATIN SMALL LETTER Z WITH CARON
299
+ 0xFF 0x2019 # RIGHT SINGLE QUOTATION MARK