encoding-codepage 0.3 → 0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +7 -7
- data/lib/codepages.tsv +1 -1
- data/lib/encoding-codepage.rb +1 -1
- metadata +1 -1
data/README.md
CHANGED
@@ -78,7 +78,8 @@ After installing this gem, you'll be able to access the following Code Pages fro
|
|
78
78
|
CP936 => GB2312 # ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
|
79
79
|
CP949 => KS_C_5601-1987 # ANSI/OEM Korean (Unified Hangul Code)
|
80
80
|
CP950 => BIG5 # ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
|
81
|
-
CP1200 => UTF-
|
81
|
+
CP1200 => UTF-16LE # Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications
|
82
|
+
CP1201 => UTF-16BE # Unicode UTF-16, big endian byte order; available only to managed applications
|
82
83
|
CP1250 => WINDOWS-1250 # ANSI Central European; Central European (Windows)
|
83
84
|
CP1251 => WINDOWS-1251 # ANSI Cyrillic; Cyrillic (Windows)
|
84
85
|
CP1252 => WINDOWS-1252 # ANSI Latin 1; Western European (Windows)
|
@@ -88,7 +89,7 @@ After installing this gem, you'll be able to access the following Code Pages fro
|
|
88
89
|
CP1256 => WINDOWS-1256 # ANSI Arabic; Arabic (Windows)
|
89
90
|
CP1257 => WINDOWS-1257 # ANSI Baltic; Baltic (Windows)
|
90
91
|
CP1258 => WINDOWS-1258 # ANSI/OEM Vietnamese; Vietnamese (Windows)
|
91
|
-
CP12000 => UTF-
|
92
|
+
CP12000 => UTF-32LE # Unicode UTF-32, little endian byte order; available only to managed applications
|
92
93
|
CP12001 => UTF-32BE # Unicode UTF-32, big endian byte order; available only to managed applications
|
93
94
|
CP20127 => US-ASCII # US-ASCII (7-bit)
|
94
95
|
CP20866 => KOI8-R # Russian (KOI8-R); Cyrillic (KOI8-R)
|
@@ -105,14 +106,10 @@ After installing this gem, you'll be able to access the following Code Pages fro
|
|
105
106
|
CP28599 => ISO-8859-9 # ISO 8859-9 Turkish
|
106
107
|
CP28603 => ISO-8859-13 # ISO 8859-13 Estonian
|
107
108
|
CP28605 => ISO-8859-15 # ISO 8859-15 Latin 9
|
108
|
-
CP50220 => ISO-2022-JP # ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
|
109
|
-
CP50221 => CSISO2022JP # ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
|
110
|
-
CP50222 => ISO-2022-JP # ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)
|
111
109
|
CP51932 => EUC-JP # EUC Japanese
|
112
110
|
CP51936 => EUC-CN # EUC Simplified Chinese; Chinese Simplified (EUC)
|
113
111
|
CP51949 => EUC-KR # EUC Korean
|
114
112
|
CP54936 => GB18030 # Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)
|
115
|
-
CP65000 => UTF-7 # Unicode (UTF-7)
|
116
113
|
CP65001 => UTF-8 # Unicode (UTF-8)
|
117
114
|
|
118
115
|
The following code pages are known not to be supported:
|
@@ -138,7 +135,6 @@ The following code pages are known not to be supported:
|
|
138
135
|
CP1147 => IBM01147 # IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)
|
139
136
|
CP1148 => IBM01148 # IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)
|
140
137
|
CP1149 => IBM01149 # IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)
|
141
|
-
CP1201 => UNICODEFFFE # Unicode UTF-16, big endian byte order; available only to managed applications
|
142
138
|
CP1361 => JOHAB # Korean (Johab)
|
143
139
|
CP10000 => MACINTOSH # MAC Roman; Western European (Mac)
|
144
140
|
CP10001 => X-MAC-JAPANESE # Japanese (Mac)
|
@@ -191,6 +187,9 @@ The following code pages are known not to be supported:
|
|
191
187
|
CP21027 => # (deprecated)
|
192
188
|
CP29001 => X-EUROPA # Europa 3
|
193
189
|
CP38598 => ISO-8859-8-I # ISO 8859-8 Hebrew; Hebrew (ISO-Logical)
|
190
|
+
CP50220 => ISO-2022-JP # ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
|
191
|
+
CP50221 => CSISO2022JP # ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
|
192
|
+
CP50222 => ISO-2022-JP # ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)
|
194
193
|
CP50225 => ISO-2022-KR # ISO 2022 Korean
|
195
194
|
CP50227 => X-CP50227 # ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)
|
196
195
|
CP50229 => # ISO 2022 Traditional Chinese
|
@@ -213,6 +212,7 @@ The following code pages are known not to be supported:
|
|
213
212
|
CP57009 => X-ISCII-MA # ISCII Malayalam
|
214
213
|
CP57010 => X-ISCII-GU # ISCII Gujarati
|
215
214
|
CP57011 => X-ISCII-PA # ISCII Punjabi
|
215
|
+
CP65000 => UTF-7 # Unicode (UTF-7)
|
216
216
|
|
217
217
|
Original list from: http://msdn.microsoft.com/en-us/library/dd317756(VS.85).aspx
|
218
218
|
|
data/lib/codepages.tsv
CHANGED
@@ -69,7 +69,7 @@
|
|
69
69
|
10079 x-mac-icelandic Icelandic (Mac)
|
70
70
|
10081 x-mac-turkish Turkish (Mac)
|
71
71
|
10082 x-mac-croatian Croatian (Mac)
|
72
|
-
12000 utf-
|
72
|
+
12000 utf-32LE Unicode UTF-32, little endian byte order; available only to managed applications
|
73
73
|
12001 utf-32BE Unicode UTF-32, big endian byte order; available only to managed applications
|
74
74
|
20000 x-Chinese_CNS CNS Taiwan; Chinese Traditional (CNS)
|
75
75
|
20001 x-cp20001 TCA Taiwan
|
data/lib/encoding-codepage.rb
CHANGED
@@ -54,7 +54,7 @@ class Encoding
|
|
54
54
|
number, original, comment = line.split("\t", 3)
|
55
55
|
number = Integer(number, 10)
|
56
56
|
|
57
|
-
if encoding = exist?(original.upcase)
|
57
|
+
if encoding = exist?(original.upcase) && !encoding.dummy?
|
58
58
|
encoding.replicate "CP#{number}" unless codepage?(number)
|
59
59
|
|
60
60
|
CodePage.reverse_lookup[encoding] = codepage(number)
|