langa 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. data/COPYING +674 -0
  2. data/README +69 -0
  3. data/bin/langa +169 -0
  4. data/examples/afrikaans_1953_utf8.txt +1000 -0
  5. data/examples/albanian_utf8.txt +1000 -0
  6. data/examples/amharic_utf8.txt +1000 -0
  7. data/examples/arabic_svd_utf8.txt +1000 -0
  8. data/examples/armenian_western_1853_utf8.txt +1000 -0
  9. data/examples/asv_utf8.txt +1000 -0
  10. data/examples/basque_1571_utf8.txt +1000 -0
  11. data/examples/breton_utf8.txt +1000 -0
  12. data/examples/chinese_ncv_s_utf8.txt +1000 -0
  13. data/examples/chinese_ncv_utf8.txt +1000 -0
  14. data/examples/chinese_union_s_utf8.txt +1000 -0
  15. data/examples/chinese_union_utf8.txt +1000 -0
  16. data/examples/coptic_nt_utf8.txt +1000 -0
  17. data/examples/croatian_utf8.txt +1000 -0
  18. data/examples/czech_bkr_utf8.txt +1000 -0
  19. data/examples/danish_utf8.txt +1000 -0
  20. data/examples/dutch_svv_utf8.txt +1000 -0
  21. data/examples/esperanto_utf8.txt +1000 -0
  22. data/examples/estonian_utf8.txt +1000 -0
  23. data/examples/finnish_pr_1992_utf8.txt +1000 -0
  24. data/examples/french_ostervald_1996_utf8.txt +1000 -0
  25. data/examples/german_schlachter_1951_utf8.txt +1000 -0
  26. data/examples/greek_byzantine_2000_utf8.txt +1000 -0
  27. data/examples/greek_modern_utf8.txt +1000 -0
  28. data/examples/hebrew_modern_utf8.txt +1000 -0
  29. data/examples/hungarian_karoli_utf8.txt +1000 -0
  30. data/examples/italian_riveduta_1927_utf8.txt +1000 -0
  31. data/examples/kabyle_nt_utf8.txt +1000 -0
  32. data/examples/kjv_apocrypha_utf8.txt +1000 -0
  33. data/examples/korean_utf8.txt +1000 -0
  34. data/examples/latin_vulgata_clementina_utf8.txt +1000 -0
  35. data/examples/latvian_nt_utf8.txt +1000 -0
  36. data/examples/lithuanian_utf8.txt +1000 -0
  37. data/examples/manx_gaelic_utf8.txt +1000 -0
  38. data/examples/maori_utf8.txt +1000 -0
  39. data/examples/myanmar_judson_1835_utf8.txt +1000 -0
  40. data/examples/norwegian_utf8.txt +1000 -0
  41. data/examples/peshitta_utf8.txt +1000 -0
  42. data/examples/portuguese_utf8.txt +1000 -0
  43. data/examples/romani_utf8.txt +1000 -0
  44. data/examples/romanian_cornilescu_utf8.txt +1000 -0
  45. data/examples/russian_makarij_utf8.txt +1000 -0
  46. data/examples/spanish_reina_valera_1909_utf8.txt +1000 -0
  47. data/examples/swedish_1917_utf8.txt +1000 -0
  48. data/examples/tagalog_1905_utf8.txt +1000 -0
  49. data/examples/thai_kjv_utf8.txt +1000 -0
  50. data/examples/turkish_nt_utf8.txt +1000 -0
  51. data/examples/turkish_utf8.txt +1000 -0
  52. data/examples/ukrainian_1871_utf8.txt +1000 -0
  53. data/examples/vietnamese_1934_utf8.txt +1000 -0
  54. data/examples/wolof_utf8.txt +1000 -0
  55. data/examples/xhosa_utf8.txt +1000 -0
  56. data/lib/langa.rb +35 -0
  57. data/lib/langa/dna.rb +209 -0
  58. data/lib/langa/file.rb +97 -0
  59. data/lib/langa/langa.dna +406 -0
  60. data/lib/langa/languageanalyzer.rb +134 -0
  61. data/lib/langa/languages.rb +147 -0
  62. data/lib/langa/randomtestfiles.rb +140 -0
  63. data/lib/langa/utilities.rb +53 -0
  64. data/test/tc_file.rb +47 -0
  65. data/test/tc_languages.rb +69 -0
  66. data/test/tc_utilities.rb +42 -0
  67. data/unicode/CaseFolding.txt +1065 -0
  68. data/unicode/CaseFolding.txt.webloc +8 -0
  69. data/unicode/Index of -Public-MAPPINGS.webloc b/data/unicode/Index of → -Public-MAPPINGS.webloc +0 -0
  70. data/unicode/mappings/8859-1.TXT +303 -0
  71. data/unicode/mappings/8859-10.TXT +303 -0
  72. data/unicode/mappings/8859-11.TXT +297 -0
  73. data/unicode/mappings/8859-13.TXT +299 -0
  74. data/unicode/mappings/8859-14.TXT +301 -0
  75. data/unicode/mappings/8859-15.TXT +303 -0
  76. data/unicode/mappings/8859-16.TXT +299 -0
  77. data/unicode/mappings/8859-2.TXT +303 -0
  78. data/unicode/mappings/8859-3.TXT +296 -0
  79. data/unicode/mappings/8859-4.TXT +303 -0
  80. data/unicode/mappings/8859-5.TXT +303 -0
  81. data/unicode/mappings/8859-6.TXT +260 -0
  82. data/unicode/mappings/8859-7.TXT +308 -0
  83. data/unicode/mappings/8859-8.TXT +270 -0
  84. data/unicode/mappings/8859-9.TXT +307 -0
  85. data/unicode/mappings/ATARIST.TXT +313 -0
  86. data/unicode/mappings/CP037.TXT +275 -0
  87. data/unicode/mappings/CP1006.TXT +302 -0
  88. data/unicode/mappings/CP1026.TXT +275 -0
  89. data/unicode/mappings/CP1250.TXT +274 -0
  90. data/unicode/mappings/CP1251.TXT +274 -0
  91. data/unicode/mappings/CP1252.TXT +274 -0
  92. data/unicode/mappings/CP1253.TXT +274 -0
  93. data/unicode/mappings/CP1254.TXT +274 -0
  94. data/unicode/mappings/CP1255.TXT +274 -0
  95. data/unicode/mappings/CP1256.TXT +274 -0
  96. data/unicode/mappings/CP1257.TXT +274 -0
  97. data/unicode/mappings/CP1258.TXT +274 -0
  98. data/unicode/mappings/CP424.TXT +304 -0
  99. data/unicode/mappings/CP437.TXT +274 -0
  100. data/unicode/mappings/CP500.TXT +275 -0
  101. data/unicode/mappings/CP737.TXT +274 -0
  102. data/unicode/mappings/CP775.TXT +275 -0
  103. data/unicode/mappings/CP850.TXT +274 -0
  104. data/unicode/mappings/CP852.TXT +274 -0
  105. data/unicode/mappings/CP855.TXT +275 -0
  106. data/unicode/mappings/CP856.TXT +303 -0
  107. data/unicode/mappings/CP857.TXT +275 -0
  108. data/unicode/mappings/CP860.TXT +275 -0
  109. data/unicode/mappings/CP861.TXT +275 -0
  110. data/unicode/mappings/CP862.TXT +275 -0
  111. data/unicode/mappings/CP863.TXT +275 -0
  112. data/unicode/mappings/CP864.TXT +275 -0
  113. data/unicode/mappings/CP865.TXT +275 -0
  114. data/unicode/mappings/CP866.TXT +275 -0
  115. data/unicode/mappings/CP869.TXT +275 -0
  116. data/unicode/mappings/CP874.TXT +274 -0
  117. data/unicode/mappings/CP875.TXT +275 -0
  118. data/unicode/mappings/KOI8-R.TXT +302 -0
  119. data/unicode/mappings/NEXTSTEP.TXT +173 -0
  120. data/unicode/mappings/ROMAN.TXT +275 -0
  121. data/unicode/mappings/US-ASCII-QUOTES.TXT +198 -0
  122. metadata +180 -0
@@ -0,0 +1,302 @@
1
+ #
2
+ # Name: KOI8-R (RFC1489) to Unicode
3
+ # Unicode version: 3.0
4
+ # Table version: 1.0
5
+ # Table format: Format A
6
+ # Date: 18 August 1999
7
+ # Authors: Helmut Richter <richter@lrz.de>
8
+ #
9
+ # Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved.
10
+ #
11
+ # This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
12
+ # No claims are made as to fitness for any particular purpose. No
13
+ # warranties of any kind are expressed or implied. The recipient
14
+ # agrees to determine applicability of information provided. If this
15
+ # file has been provided on optical media by Unicode, Inc., the sole
16
+ # remedy for any claim will be exchange of defective media within 90
17
+ # days of receipt.
18
+ #
19
+ # Unicode, Inc. hereby grants the right to freely use the information
20
+ # supplied in this file in the creation of products supporting the
21
+ # Unicode Standard, and to make copies of this file in any form for
22
+ # internal or external distribution as long as this notice remains
23
+ # attached.
24
+ #
25
+ # General notes:
26
+ #
27
+ # This table contains the data the Unicode Consortium has on how
28
+ # KOI8-R characters map into Unicode. The underlying document is the
29
+ # mapping described in RFC 1489. No statements are made as to whether
30
+ # this mapping is the same as the mapping defined as "Code Page 878"
31
+ # with some vendors.
32
+ #
33
+ # Format: Three tab-separated columns
34
+ # Column #1 is the KOI8-R code (in hex as 0xXX)
35
+ # Column #2 is the Unicode (in hex as 0xXXXX)
36
+ # Column #3 the Unicode name (follows a comment sign, '#')
37
+ #
38
+ # The entries are in KOI8-R order.
39
+ #
40
+ # Version history
41
+ # 1.0 version: created.
42
+ #
43
+ # Any comments or problems, contact <errata@unicode.org>
44
+ # Please note that <errata@unicode.org> is an archival address;
45
+ # notices will be checked, but do not expect an immediate response.
46
+ #
47
+ 0x00 0x0000 # NULL
48
+ 0x01 0x0001 # START OF HEADING
49
+ 0x02 0x0002 # START OF TEXT
50
+ 0x03 0x0003 # END OF TEXT
51
+ 0x04 0x0004 # END OF TRANSMISSION
52
+ 0x05 0x0005 # ENQUIRY
53
+ 0x06 0x0006 # ACKNOWLEDGE
54
+ 0x07 0x0007 # BELL
55
+ 0x08 0x0008 # BACKSPACE
56
+ 0x09 0x0009 # HORIZONTAL TABULATION
57
+ 0x0A 0x000A # LINE FEED
58
+ 0x0B 0x000B # VERTICAL TABULATION
59
+ 0x0C 0x000C # FORM FEED
60
+ 0x0D 0x000D # CARRIAGE RETURN
61
+ 0x0E 0x000E # SHIFT OUT
62
+ 0x0F 0x000F # SHIFT IN
63
+ 0x10 0x0010 # DATA LINK ESCAPE
64
+ 0x11 0x0011 # DEVICE CONTROL ONE
65
+ 0x12 0x0012 # DEVICE CONTROL TWO
66
+ 0x13 0x0013 # DEVICE CONTROL THREE
67
+ 0x14 0x0014 # DEVICE CONTROL FOUR
68
+ 0x15 0x0015 # NEGATIVE ACKNOWLEDGE
69
+ 0x16 0x0016 # SYNCHRONOUS IDLE
70
+ 0x17 0x0017 # END OF TRANSMISSION BLOCK
71
+ 0x18 0x0018 # CANCEL
72
+ 0x19 0x0019 # END OF MEDIUM
73
+ 0x1A 0x001A # SUBSTITUTE
74
+ 0x1B 0x001B # ESCAPE
75
+ 0x1C 0x001C # FILE SEPARATOR
76
+ 0x1D 0x001D # GROUP SEPARATOR
77
+ 0x1E 0x001E # RECORD SEPARATOR
78
+ 0x1F 0x001F # UNIT SEPARATOR
79
+ 0x20 0x0020 # SPACE
80
+ 0x21 0x0021 # EXCLAMATION MARK
81
+ 0x22 0x0022 # QUOTATION MARK
82
+ 0x23 0x0023 # NUMBER SIGN
83
+ 0x24 0x0024 # DOLLAR SIGN
84
+ 0x25 0x0025 # PERCENT SIGN
85
+ 0x26 0x0026 # AMPERSAND
86
+ 0x27 0x0027 # APOSTROPHE
87
+ 0x28 0x0028 # LEFT PARENTHESIS
88
+ 0x29 0x0029 # RIGHT PARENTHESIS
89
+ 0x2A 0x002A # ASTERISK
90
+ 0x2B 0x002B # PLUS SIGN
91
+ 0x2C 0x002C # COMMA
92
+ 0x2D 0x002D # HYPHEN-MINUS
93
+ 0x2E 0x002E # FULL STOP
94
+ 0x2F 0x002F # SOLIDUS
95
+ 0x30 0x0030 # DIGIT ZERO
96
+ 0x31 0x0031 # DIGIT ONE
97
+ 0x32 0x0032 # DIGIT TWO
98
+ 0x33 0x0033 # DIGIT THREE
99
+ 0x34 0x0034 # DIGIT FOUR
100
+ 0x35 0x0035 # DIGIT FIVE
101
+ 0x36 0x0036 # DIGIT SIX
102
+ 0x37 0x0037 # DIGIT SEVEN
103
+ 0x38 0x0038 # DIGIT EIGHT
104
+ 0x39 0x0039 # DIGIT NINE
105
+ 0x3A 0x003A # COLON
106
+ 0x3B 0x003B # SEMICOLON
107
+ 0x3C 0x003C # LESS-THAN SIGN
108
+ 0x3D 0x003D # EQUALS SIGN
109
+ 0x3E 0x003E # GREATER-THAN SIGN
110
+ 0x3F 0x003F # QUESTION MARK
111
+ 0x40 0x0040 # COMMERCIAL AT
112
+ 0x41 0x0041 # LATIN CAPITAL LETTER A
113
+ 0x42 0x0042 # LATIN CAPITAL LETTER B
114
+ 0x43 0x0043 # LATIN CAPITAL LETTER C
115
+ 0x44 0x0044 # LATIN CAPITAL LETTER D
116
+ 0x45 0x0045 # LATIN CAPITAL LETTER E
117
+ 0x46 0x0046 # LATIN CAPITAL LETTER F
118
+ 0x47 0x0047 # LATIN CAPITAL LETTER G
119
+ 0x48 0x0048 # LATIN CAPITAL LETTER H
120
+ 0x49 0x0049 # LATIN CAPITAL LETTER I
121
+ 0x4A 0x004A # LATIN CAPITAL LETTER J
122
+ 0x4B 0x004B # LATIN CAPITAL LETTER K
123
+ 0x4C 0x004C # LATIN CAPITAL LETTER L
124
+ 0x4D 0x004D # LATIN CAPITAL LETTER M
125
+ 0x4E 0x004E # LATIN CAPITAL LETTER N
126
+ 0x4F 0x004F # LATIN CAPITAL LETTER O
127
+ 0x50 0x0050 # LATIN CAPITAL LETTER P
128
+ 0x51 0x0051 # LATIN CAPITAL LETTER Q
129
+ 0x52 0x0052 # LATIN CAPITAL LETTER R
130
+ 0x53 0x0053 # LATIN CAPITAL LETTER S
131
+ 0x54 0x0054 # LATIN CAPITAL LETTER T
132
+ 0x55 0x0055 # LATIN CAPITAL LETTER U
133
+ 0x56 0x0056 # LATIN CAPITAL LETTER V
134
+ 0x57 0x0057 # LATIN CAPITAL LETTER W
135
+ 0x58 0x0058 # LATIN CAPITAL LETTER X
136
+ 0x59 0x0059 # LATIN CAPITAL LETTER Y
137
+ 0x5A 0x005A # LATIN CAPITAL LETTER Z
138
+ 0x5B 0x005B # LEFT SQUARE BRACKET
139
+ 0x5C 0x005C # REVERSE SOLIDUS
140
+ 0x5D 0x005D # RIGHT SQUARE BRACKET
141
+ 0x5E 0x005E # CIRCUMFLEX ACCENT
142
+ 0x5F 0x005F # LOW LINE
143
+ 0x60 0x0060 # GRAVE ACCENT
144
+ 0x61 0x0061 # LATIN SMALL LETTER A
145
+ 0x62 0x0062 # LATIN SMALL LETTER B
146
+ 0x63 0x0063 # LATIN SMALL LETTER C
147
+ 0x64 0x0064 # LATIN SMALL LETTER D
148
+ 0x65 0x0065 # LATIN SMALL LETTER E
149
+ 0x66 0x0066 # LATIN SMALL LETTER F
150
+ 0x67 0x0067 # LATIN SMALL LETTER G
151
+ 0x68 0x0068 # LATIN SMALL LETTER H
152
+ 0x69 0x0069 # LATIN SMALL LETTER I
153
+ 0x6A 0x006A # LATIN SMALL LETTER J
154
+ 0x6B 0x006B # LATIN SMALL LETTER K
155
+ 0x6C 0x006C # LATIN SMALL LETTER L
156
+ 0x6D 0x006D # LATIN SMALL LETTER M
157
+ 0x6E 0x006E # LATIN SMALL LETTER N
158
+ 0x6F 0x006F # LATIN SMALL LETTER O
159
+ 0x70 0x0070 # LATIN SMALL LETTER P
160
+ 0x71 0x0071 # LATIN SMALL LETTER Q
161
+ 0x72 0x0072 # LATIN SMALL LETTER R
162
+ 0x73 0x0073 # LATIN SMALL LETTER S
163
+ 0x74 0x0074 # LATIN SMALL LETTER T
164
+ 0x75 0x0075 # LATIN SMALL LETTER U
165
+ 0x76 0x0076 # LATIN SMALL LETTER V
166
+ 0x77 0x0077 # LATIN SMALL LETTER W
167
+ 0x78 0x0078 # LATIN SMALL LETTER X
168
+ 0x79 0x0079 # LATIN SMALL LETTER Y
169
+ 0x7A 0x007A # LATIN SMALL LETTER Z
170
+ 0x7B 0x007B # LEFT CURLY BRACKET
171
+ 0x7C 0x007C # VERTICAL LINE
172
+ 0x7D 0x007D # RIGHT CURLY BRACKET
173
+ 0x7E 0x007E # TILDE
174
+ 0x7F 0x007F # DELETE
175
+ 0x80 0x2500 # BOX DRAWINGS LIGHT HORIZONTAL
176
+ 0x81 0x2502 # BOX DRAWINGS LIGHT VERTICAL
177
+ 0x82 0x250C # BOX DRAWINGS LIGHT DOWN AND RIGHT
178
+ 0x83 0x2510 # BOX DRAWINGS LIGHT DOWN AND LEFT
179
+ 0x84 0x2514 # BOX DRAWINGS LIGHT UP AND RIGHT
180
+ 0x85 0x2518 # BOX DRAWINGS LIGHT UP AND LEFT
181
+ 0x86 0x251C # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
182
+ 0x87 0x2524 # BOX DRAWINGS LIGHT VERTICAL AND LEFT
183
+ 0x88 0x252C # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
184
+ 0x89 0x2534 # BOX DRAWINGS LIGHT UP AND HORIZONTAL
185
+ 0x8A 0x253C # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
186
+ 0x8B 0x2580 # UPPER HALF BLOCK
187
+ 0x8C 0x2584 # LOWER HALF BLOCK
188
+ 0x8D 0x2588 # FULL BLOCK
189
+ 0x8E 0x258C # LEFT HALF BLOCK
190
+ 0x8F 0x2590 # RIGHT HALF BLOCK
191
+ 0x90 0x2591 # LIGHT SHADE
192
+ 0x91 0x2592 # MEDIUM SHADE
193
+ 0x92 0x2593 # DARK SHADE
194
+ 0x93 0x2320 # TOP HALF INTEGRAL
195
+ 0x94 0x25A0 # BLACK SQUARE
196
+ 0x95 0x2219 # BULLET OPERATOR
197
+ 0x96 0x221A # SQUARE ROOT
198
+ 0x97 0x2248 # ALMOST EQUAL TO
199
+ 0x98 0x2264 # LESS-THAN OR EQUAL TO
200
+ 0x99 0x2265 # GREATER-THAN OR EQUAL TO
201
+ 0x9A 0x00A0 # NO-BREAK SPACE
202
+ 0x9B 0x2321 # BOTTOM HALF INTEGRAL
203
+ 0x9C 0x00B0 # DEGREE SIGN
204
+ 0x9D 0x00B2 # SUPERSCRIPT TWO
205
+ 0x9E 0x00B7 # MIDDLE DOT
206
+ 0x9F 0x00F7 # DIVISION SIGN
207
+ 0xA0 0x2550 # BOX DRAWINGS DOUBLE HORIZONTAL
208
+ 0xA1 0x2551 # BOX DRAWINGS DOUBLE VERTICAL
209
+ 0xA2 0x2552 # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
210
+ 0xA3 0x0451 # CYRILLIC SMALL LETTER IO
211
+ 0xA4 0x2553 # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
212
+ 0xA5 0x2554 # BOX DRAWINGS DOUBLE DOWN AND RIGHT
213
+ 0xA6 0x2555 # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
214
+ 0xA7 0x2556 # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
215
+ 0xA8 0x2557 # BOX DRAWINGS DOUBLE DOWN AND LEFT
216
+ 0xA9 0x2558 # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
217
+ 0xAA 0x2559 # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
218
+ 0xAB 0x255A # BOX DRAWINGS DOUBLE UP AND RIGHT
219
+ 0xAC 0x255B # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
220
+ 0xAD 0x255C # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
221
+ 0xAE 0x255D # BOX DRAWINGS DOUBLE UP AND LEFT
222
+ 0xAF 0x255E # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
223
+ 0xB0 0x255F # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
224
+ 0xB1 0x2560 # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
225
+ 0xB2 0x2561 # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
226
+ 0xB3 0x0401 # CYRILLIC CAPITAL LETTER IO
227
+ 0xB4 0x2562 # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
228
+ 0xB5 0x2563 # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
229
+ 0xB6 0x2564 # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
230
+ 0xB7 0x2565 # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
231
+ 0xB8 0x2566 # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
232
+ 0xB9 0x2567 # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
233
+ 0xBA 0x2568 # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
234
+ 0xBB 0x2569 # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
235
+ 0xBC 0x256A # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
236
+ 0xBD 0x256B # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
237
+ 0xBE 0x256C # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
238
+ 0xBF 0x00A9 # COPYRIGHT SIGN
239
+ 0xC0 0x044E # CYRILLIC SMALL LETTER YU
240
+ 0xC1 0x0430 # CYRILLIC SMALL LETTER A
241
+ 0xC2 0x0431 # CYRILLIC SMALL LETTER BE
242
+ 0xC3 0x0446 # CYRILLIC SMALL LETTER TSE
243
+ 0xC4 0x0434 # CYRILLIC SMALL LETTER DE
244
+ 0xC5 0x0435 # CYRILLIC SMALL LETTER IE
245
+ 0xC6 0x0444 # CYRILLIC SMALL LETTER EF
246
+ 0xC7 0x0433 # CYRILLIC SMALL LETTER GHE
247
+ 0xC8 0x0445 # CYRILLIC SMALL LETTER HA
248
+ 0xC9 0x0438 # CYRILLIC SMALL LETTER I
249
+ 0xCA 0x0439 # CYRILLIC SMALL LETTER SHORT I
250
+ 0xCB 0x043A # CYRILLIC SMALL LETTER KA
251
+ 0xCC 0x043B # CYRILLIC SMALL LETTER EL
252
+ 0xCD 0x043C # CYRILLIC SMALL LETTER EM
253
+ 0xCE 0x043D # CYRILLIC SMALL LETTER EN
254
+ 0xCF 0x043E # CYRILLIC SMALL LETTER O
255
+ 0xD0 0x043F # CYRILLIC SMALL LETTER PE
256
+ 0xD1 0x044F # CYRILLIC SMALL LETTER YA
257
+ 0xD2 0x0440 # CYRILLIC SMALL LETTER ER
258
+ 0xD3 0x0441 # CYRILLIC SMALL LETTER ES
259
+ 0xD4 0x0442 # CYRILLIC SMALL LETTER TE
260
+ 0xD5 0x0443 # CYRILLIC SMALL LETTER U
261
+ 0xD6 0x0436 # CYRILLIC SMALL LETTER ZHE
262
+ 0xD7 0x0432 # CYRILLIC SMALL LETTER VE
263
+ 0xD8 0x044C # CYRILLIC SMALL LETTER SOFT SIGN
264
+ 0xD9 0x044B # CYRILLIC SMALL LETTER YERU
265
+ 0xDA 0x0437 # CYRILLIC SMALL LETTER ZE
266
+ 0xDB 0x0448 # CYRILLIC SMALL LETTER SHA
267
+ 0xDC 0x044D # CYRILLIC SMALL LETTER E
268
+ 0xDD 0x0449 # CYRILLIC SMALL LETTER SHCHA
269
+ 0xDE 0x0447 # CYRILLIC SMALL LETTER CHE
270
+ 0xDF 0x044A # CYRILLIC SMALL LETTER HARD SIGN
271
+ 0xE0 0x042E # CYRILLIC CAPITAL LETTER YU
272
+ 0xE1 0x0410 # CYRILLIC CAPITAL LETTER A
273
+ 0xE2 0x0411 # CYRILLIC CAPITAL LETTER BE
274
+ 0xE3 0x0426 # CYRILLIC CAPITAL LETTER TSE
275
+ 0xE4 0x0414 # CYRILLIC CAPITAL LETTER DE
276
+ 0xE5 0x0415 # CYRILLIC CAPITAL LETTER IE
277
+ 0xE6 0x0424 # CYRILLIC CAPITAL LETTER EF
278
+ 0xE7 0x0413 # CYRILLIC CAPITAL LETTER GHE
279
+ 0xE8 0x0425 # CYRILLIC CAPITAL LETTER HA
280
+ 0xE9 0x0418 # CYRILLIC CAPITAL LETTER I
281
+ 0xEA 0x0419 # CYRILLIC CAPITAL LETTER SHORT I
282
+ 0xEB 0x041A # CYRILLIC CAPITAL LETTER KA
283
+ 0xEC 0x041B # CYRILLIC CAPITAL LETTER EL
284
+ 0xED 0x041C # CYRILLIC CAPITAL LETTER EM
285
+ 0xEE 0x041D # CYRILLIC CAPITAL LETTER EN
286
+ 0xEF 0x041E # CYRILLIC CAPITAL LETTER O
287
+ 0xF0 0x041F # CYRILLIC CAPITAL LETTER PE
288
+ 0xF1 0x042F # CYRILLIC CAPITAL LETTER YA
289
+ 0xF2 0x0420 # CYRILLIC CAPITAL LETTER ER
290
+ 0xF3 0x0421 # CYRILLIC CAPITAL LETTER ES
291
+ 0xF4 0x0422 # CYRILLIC CAPITAL LETTER TE
292
+ 0xF5 0x0423 # CYRILLIC CAPITAL LETTER U
293
+ 0xF6 0x0416 # CYRILLIC CAPITAL LETTER ZHE
294
+ 0xF7 0x0412 # CYRILLIC CAPITAL LETTER VE
295
+ 0xF8 0x042C # CYRILLIC CAPITAL LETTER SOFT SIGN
296
+ 0xF9 0x042B # CYRILLIC CAPITAL LETTER YERU
297
+ 0xFA 0x0417 # CYRILLIC CAPITAL LETTER ZE
298
+ 0xFB 0x0428 # CYRILLIC CAPITAL LETTER SHA
299
+ 0xFC 0x042D # CYRILLIC CAPITAL LETTER E
300
+ 0xFD 0x0429 # CYRILLIC CAPITAL LETTER SHCHA
301
+ 0xFE 0x0427 # CYRILLIC CAPITAL LETTER CHE
302
+ 0xFF 0x042A # CYRILLIC CAPITAL LETTER HARD SIGN
@@ -0,0 +1,173 @@
1
+ #
2
+ # Name: NextStep Encoding to Unicode
3
+ # Unicode version: 1.1
4
+ # Table version: 0.1
5
+ # Table format: Format A
6
+ # Date: 1999 September 23
7
+ # Authors: Rick McGowan
8
+ #
9
+ # Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved.
10
+ #
11
+ # This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
12
+ # No claims are made as to fitness for any particular purpose. No
13
+ # warranties of any kind are expressed or implied. The recipient
14
+ # agrees to determine applicability of information provided. If this
15
+ # file has been provided on optical media by Unicode, Inc., the sole
16
+ # remedy for any claim will be exchange of defective media within 90
17
+ # days of receipt.
18
+ #
19
+ # Unicode, Inc. hereby grants the right to freely use the information
20
+ # supplied in this file in the creation of products supporting the
21
+ # Unicode Standard, and to make copies of this file in any form for
22
+ # internal or external distribution as long as this notice remains
23
+ # attached.
24
+ #
25
+ # General notes:
26
+ #
27
+ # This table contains the data the Unicode Consortium has on how
28
+ # NextStep Encoding characters map into Unicode. Since the first
29
+ # 128 characters (0x0 - 0x7f) are identical to ASCII and Unicode,
30
+ # this table only maps the NextStep range from 0x80 - 0xFF.
31
+ #
32
+ # This file is provided for historical reference only and pertains
33
+ # to NextStep and OpenStep products shipped prior to the aquisition
34
+ # of NeXT by Apple Computer, Inc. See http://www.apple.com for
35
+ # further information.
36
+ #
37
+ # Format: Three tab-separated columns
38
+ # Column #1 is the NextStep code (in hex as 0xXX)
39
+ # Column #2 is the Unicode (in hex as 0xXXXX)
40
+ # Column #3 NextStep name, Unicode name (follows a comment sign, '#')
41
+ #
42
+ # The entries are in NextStep order
43
+ #
44
+ # Any comments or problems, contact info@unicode.org
45
+ #
46
+ 0x80 0x00a0 # NO-BREAK SPACE
47
+ 0x81 0x00c0 # LATIN CAPITAL LETTER A WITH GRAVE
48
+ 0x82 0x00c1 # LATIN CAPITAL LETTER A WITH ACUTE
49
+ 0x83 0x00c2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
50
+ 0x84 0x00c3 # LATIN CAPITAL LETTER A WITH TILDE
51
+ 0x85 0x00c4 # LATIN CAPITAL LETTER A WITH DIAERESIS
52
+ 0x86 0x00c5 # LATIN CAPITAL LETTER A WITH RING
53
+ 0x87 0x00c7 # LATIN CAPITAL LETTER C WITH CEDILLA
54
+ 0x88 0x00c8 # LATIN CAPITAL LETTER E WITH GRAVE
55
+ 0x89 0x00c9 # LATIN CAPITAL LETTER E WITH ACUTE
56
+ 0x8a 0x00ca # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
57
+ 0x8b 0x00cb # LATIN CAPITAL LETTER E WITH DIAERESIS
58
+ 0x8c 0x00cc # LATIN CAPITAL LETTER I WITH GRAVE
59
+ 0x8d 0x00cd # LATIN CAPITAL LETTER I WITH ACUTE
60
+ 0x8e 0x00ce # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
61
+ 0x8f 0x00cf # LATIN CAPITAL LETTER I WITH DIAERESIS
62
+ 0x90 0x00d0 # LATIN CAPITAL LETTER ETH
63
+ 0x91 0x00d1 # LATIN CAPITAL LETTER N WITH TILDE
64
+ 0x92 0x00d2 # LATIN CAPITAL LETTER O WITH GRAVE
65
+ 0x93 0x00d3 # LATIN CAPITAL LETTER O WITH ACUTE
66
+ 0x94 0x00d4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
67
+ 0x95 0x00d5 # LATIN CAPITAL LETTER O WITH TILDE
68
+ 0x96 0x00d6 # LATIN CAPITAL LETTER O WITH DIAERESIS
69
+ 0x97 0x00d9 # LATIN CAPITAL LETTER U WITH GRAVE
70
+ 0x98 0x00da # LATIN CAPITAL LETTER U WITH ACUTE
71
+ 0x99 0x00db # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
72
+ 0x9a 0x00dc # LATIN CAPITAL LETTER U WITH DIAERESIS
73
+ 0x9b 0x00dd # LATIN CAPITAL LETTER Y WITH ACUTE
74
+ 0x9c 0x00de # LATIN CAPITAL LETTER THORN
75
+ 0x9d 0x00b5 # MICRO SIGN
76
+ 0x9e 0x00d7 # MULTIPLICATION SIGN
77
+ 0x9f 0x00f7 # DIVISION SIGN
78
+ 0xa0 0x00a9 # COPYRIGHT SIGN
79
+ 0xa1 0x00a1 # INVERTED EXCLAMATION MARK
80
+ 0xa2 0x00a2 # CENT SIGN
81
+ 0xa3 0x00a3 # POUND SIGN
82
+ 0xa4 0x2044 # FRACTION SLASH
83
+ 0xa5 0x00a5 # YEN SIGN
84
+ 0xa6 0x0192 # LATIN SMALL LETTER F WITH HOOK
85
+ 0xa7 0x00a7 # SECTION SIGN
86
+ 0xa8 0x00a4 # CURRENCY SIGN
87
+ 0xa9 0x2019 # RIGHT SINGLE QUOTATION MARK
88
+ 0xaa 0x201c # LEFT DOUBLE QUOTATION MARK
89
+ 0xab 0x00ab # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
90
+ 0xac 0x2039 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
91
+ 0xad 0x203a # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
92
+ 0xae 0xfb01 # LATIN SMALL LIGATURE FI
93
+ 0xaf 0xfb02 # LATIN SMALL LIGATURE FL
94
+ 0xb0 0x00ae # REGISTERED SIGN
95
+ 0xb1 0x2013 # EN DASH
96
+ 0xb2 0x2020 # DAGGER
97
+ 0xb3 0x2021 # DOUBLE DAGGER
98
+ 0xb4 0x00b7 # MIDDLE DOT
99
+ 0xb5 0x00a6 # BROKEN BAR
100
+ 0xb6 0x00b6 # PILCROW SIGN
101
+ 0xb7 0x2022 # BULLET
102
+ 0xb8 0x201a # SINGLE LOW-9 QUOTATION MARK
103
+ 0xb9 0x201e # DOUBLE LOW-9 QUOTATION MARK
104
+ 0xba 0x201d # RIGHT DOUBLE QUOTATION MARK
105
+ 0xbb 0x00bb # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
106
+ 0xbc 0x2026 # HORIZONTAL ELLIPSIS
107
+ 0xbd 0x2030 # PER MILLE SIGN
108
+ 0xbe 0x00ac # NOT SIGN
109
+ 0xbf 0x00bf # INVERTED QUESTION MARK
110
+ 0xc0 0x00b9 # SUPERSCRIPT ONE
111
+ 0xc1 0x02cb # MODIFIER LETTER GRAVE ACCENT
112
+ 0xc2 0x00b4 # ACUTE ACCENT
113
+ 0xc3 0x02c6 # MODIFIER LETTER CIRCUMFLEX ACCENT
114
+ 0xc4 0x02dc # SMALL TILDE
115
+ 0xc5 0x00af # MACRON
116
+ 0xc6 0x02d8 # BREVE
117
+ 0xc7 0x02d9 # DOT ABOVE
118
+ 0xc8 0x00a8 # DIAERESIS
119
+ 0xc9 0x00b2 # SUPERSCRIPT TWO
120
+ 0xca 0x02da # RING ABOVE
121
+ 0xcb 0x00b8 # CEDILLA
122
+ 0xcc 0x00b3 # SUPERSCRIPT THREE
123
+ 0xcd 0x02dd # DOUBLE ACUTE ACCENT
124
+ 0xce 0x02db # OGONEK
125
+ 0xcf 0x02c7 # CARON
126
+ 0xd0 0x2014 # EM DASH
127
+ 0xd1 0x00b1 # PLUS-MINUS SIGN
128
+ 0xd2 0x00bc # VULGAR FRACTION ONE QUARTER
129
+ 0xd3 0x00bd # VULGAR FRACTION ONE HALF
130
+ 0xd4 0x00be # VULGAR FRACTION THREE QUARTERS
131
+ 0xd5 0x00e0 # LATIN SMALL LETTER A WITH GRAVE
132
+ 0xd6 0x00e1 # LATIN SMALL LETTER A WITH ACUTE
133
+ 0xd7 0x00e2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
134
+ 0xd8 0x00e3 # LATIN SMALL LETTER A WITH TILDE
135
+ 0xd9 0x00e4 # LATIN SMALL LETTER A WITH DIAERESIS
136
+ 0xda 0x00e5 # LATIN SMALL LETTER A WITH RING ABOVE
137
+ 0xdb 0x00e7 # LATIN SMALL LETTER C WITH CEDILLA
138
+ 0xdc 0x00e8 # LATIN SMALL LETTER E WITH GRAVE
139
+ 0xdd 0x00e9 # LATIN SMALL LETTER E WITH ACUTE
140
+ 0xde 0x00ea # LATIN SMALL LETTER E WITH CIRCUMFLEX
141
+ 0xdf 0x00eb # LATIN SMALL LETTER E WITH DIAERESIS
142
+ 0xe0 0x00ec # LATIN SMALL LETTER I WITH GRAVE
143
+ 0xe1 0x00c6 # LATIN CAPITAL LETTER AE
144
+ 0xe2 0x00ed # LATIN SMALL LETTER I WITH ACUTE
145
+ 0xe3 0x00aa # FEMININE ORDINAL INDICATOR
146
+ 0xe4 0x00ee # LATIN SMALL LETTER I WITH CIRCUMFLEX
147
+ 0xe5 0x00ef # LATIN SMALL LETTER I WITH DIAERESIS
148
+ 0xe6 0x00f0 # LATIN SMALL LETTER ETH
149
+ 0xe7 0x00f1 # LATIN SMALL LETTER N WITH TILDE
150
+ 0xe8 0x0141 # LATIN CAPITAL LETTER L WITH STROKE
151
+ 0xe9 0x00d8 # LATIN CAPITAL LETTER O WITH STROKE
152
+ 0xea 0x0152 # LATIN CAPITAL LIGATURE OE
153
+ 0xeb 0x00ba # MASCULINE ORDINAL INDICATOR
154
+ 0xec 0x00f2 # LATIN SMALL LETTER O WITH GRAVE
155
+ 0xed 0x00f3 # LATIN SMALL LETTER O WITH ACUTE
156
+ 0xee 0x00f4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
157
+ 0xef 0x00f5 # LATIN SMALL LETTER O WITH TILDE
158
+ 0xf0 0x00f6 # LATIN SMALL LETTER O WITH DIAERESIS
159
+ 0xf1 0x00e6 # LATIN SMALL LETTER AE
160
+ 0xf2 0x00f9 # LATIN SMALL LETTER U WITH GRAVE
161
+ 0xf3 0x00fa # LATIN SMALL LETTER U WITH ACUTE
162
+ 0xf4 0x00fb # LATIN SMALL LETTER U WITH CIRCUMFLEX
163
+ 0xf5 0x0131 # LATIN SMALL LETTER DOTLESS I
164
+ 0xf6 0x00fc # LATIN SMALL LETTER U WITH DIAERESIS
165
+ 0xf7 0x00fd # LATIN SMALL LETTER Y WITH ACUTE
166
+ 0xf8 0x0142 # LATIN SMALL LETTER L WITH STROKE
167
+ 0xf9 0x00f8 # LATIN SMALL LETTER O WITH STROKE
168
+ 0xfa 0x0153 # LATIN SMALL LIGATURE OE
169
+ 0xfb 0x00df # LATIN SMALL LETTER SHARP S
170
+ 0xfc 0x00fe # LATIN SMALL LETTER THORN
171
+ 0xfd 0x00ff # LATIN SMALL LETTER Y WITH DIAERESIS
172
+ 0xfe 0xfffd # .notdef, REPLACEMENT CHARACTER
173
+ 0xff 0xfffd # .notdef, REPLACEMENT CHARACTER