encoding-codepage 0.3 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +7 -7
- data/lib/codepages.tsv +1 -1
- data/lib/encoding-codepage.rb +1 -1
- metadata +1 -1
data/README.md
CHANGED
@@ -78,7 +78,8 @@ After installing this gem, you'll be able to access the following Code Pages fro
|
|
78
78
|
CP936 => GB2312 # ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
|
79
79
|
CP949 => KS_C_5601-1987 # ANSI/OEM Korean (Unified Hangul Code)
|
80
80
|
CP950 => BIG5 # ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
|
81
|
-
CP1200 => UTF-
|
81
|
+
CP1200 => UTF-16LE # Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications
|
82
|
+
CP1201 => UTF-16BE # Unicode UTF-16, big endian byte order; available only to managed applications
|
82
83
|
CP1250 => WINDOWS-1250 # ANSI Central European; Central European (Windows)
|
83
84
|
CP1251 => WINDOWS-1251 # ANSI Cyrillic; Cyrillic (Windows)
|
84
85
|
CP1252 => WINDOWS-1252 # ANSI Latin 1; Western European (Windows)
|
@@ -88,7 +89,7 @@ After installing this gem, you'll be able to access the following Code Pages fro
|
|
88
89
|
CP1256 => WINDOWS-1256 # ANSI Arabic; Arabic (Windows)
|
89
90
|
CP1257 => WINDOWS-1257 # ANSI Baltic; Baltic (Windows)
|
90
91
|
CP1258 => WINDOWS-1258 # ANSI/OEM Vietnamese; Vietnamese (Windows)
|
91
|
-
CP12000 => UTF-
|
92
|
+
CP12000 => UTF-32LE # Unicode UTF-32, little endian byte order; available only to managed applications
|
92
93
|
CP12001 => UTF-32BE # Unicode UTF-32, big endian byte order; available only to managed applications
|
93
94
|
CP20127 => US-ASCII # US-ASCII (7-bit)
|
94
95
|
CP20866 => KOI8-R # Russian (KOI8-R); Cyrillic (KOI8-R)
|
@@ -105,14 +106,10 @@ After installing this gem, you'll be able to access the following Code Pages fro
|
|
105
106
|
CP28599 => ISO-8859-9 # ISO 8859-9 Turkish
|
106
107
|
CP28603 => ISO-8859-13 # ISO 8859-13 Estonian
|
107
108
|
CP28605 => ISO-8859-15 # ISO 8859-15 Latin 9
|
108
|
-
CP50220 => ISO-2022-JP # ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
|
109
|
-
CP50221 => CSISO2022JP # ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
|
110
|
-
CP50222 => ISO-2022-JP # ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)
|
111
109
|
CP51932 => EUC-JP # EUC Japanese
|
112
110
|
CP51936 => EUC-CN # EUC Simplified Chinese; Chinese Simplified (EUC)
|
113
111
|
CP51949 => EUC-KR # EUC Korean
|
114
112
|
CP54936 => GB18030 # Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)
|
115
|
-
CP65000 => UTF-7 # Unicode (UTF-7)
|
116
113
|
CP65001 => UTF-8 # Unicode (UTF-8)
|
117
114
|
|
118
115
|
The following code pages are known not to be supported:
|
@@ -138,7 +135,6 @@ The following code pages are known not to be supported:
|
|
138
135
|
CP1147 => IBM01147 # IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)
|
139
136
|
CP1148 => IBM01148 # IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)
|
140
137
|
CP1149 => IBM01149 # IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)
|
141
|
-
CP1201 => UNICODEFFFE # Unicode UTF-16, big endian byte order; available only to managed applications
|
142
138
|
CP1361 => JOHAB # Korean (Johab)
|
143
139
|
CP10000 => MACINTOSH # MAC Roman; Western European (Mac)
|
144
140
|
CP10001 => X-MAC-JAPANESE # Japanese (Mac)
|
@@ -191,6 +187,9 @@ The following code pages are known not to be supported:
|
|
191
187
|
CP21027 => # (deprecated)
|
192
188
|
CP29001 => X-EUROPA # Europa 3
|
193
189
|
CP38598 => ISO-8859-8-I # ISO 8859-8 Hebrew; Hebrew (ISO-Logical)
|
190
|
+
CP50220 => ISO-2022-JP # ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
|
191
|
+
CP50221 => CSISO2022JP # ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
|
192
|
+
CP50222 => ISO-2022-JP # ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)
|
194
193
|
CP50225 => ISO-2022-KR # ISO 2022 Korean
|
195
194
|
CP50227 => X-CP50227 # ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)
|
196
195
|
CP50229 => # ISO 2022 Traditional Chinese
|
@@ -213,6 +212,7 @@ The following code pages are known not to be supported:
|
|
213
212
|
CP57009 => X-ISCII-MA # ISCII Malayalam
|
214
213
|
CP57010 => X-ISCII-GU # ISCII Gujarati
|
215
214
|
CP57011 => X-ISCII-PA # ISCII Punjabi
|
215
|
+
CP65000 => UTF-7 # Unicode (UTF-7)
|
216
216
|
|
217
217
|
Original list from: http://msdn.microsoft.com/en-us/library/dd317756(VS.85).aspx
|
218
218
|
|
data/lib/codepages.tsv
CHANGED
@@ -69,7 +69,7 @@
|
|
69
69
|
10079 x-mac-icelandic Icelandic (Mac)
|
70
70
|
10081 x-mac-turkish Turkish (Mac)
|
71
71
|
10082 x-mac-croatian Croatian (Mac)
|
72
|
-
12000 utf-
|
72
|
+
12000 utf-32LE Unicode UTF-32, little endian byte order; available only to managed applications
|
73
73
|
12001 utf-32BE Unicode UTF-32, big endian byte order; available only to managed applications
|
74
74
|
20000 x-Chinese_CNS CNS Taiwan; Chinese Traditional (CNS)
|
75
75
|
20001 x-cp20001 TCA Taiwan
|
data/lib/encoding-codepage.rb
CHANGED
@@ -54,7 +54,7 @@ class Encoding
|
|
54
54
|
number, original, comment = line.split("\t", 3)
|
55
55
|
number = Integer(number, 10)
|
56
56
|
|
57
|
-
if encoding = exist?(original.upcase)
|
57
|
+
if encoding = exist?(original.upcase) && !encoding.dummy?
|
58
58
|
encoding.replicate "CP#{number}" unless codepage?(number)
|
59
59
|
|
60
60
|
CodePage.reverse_lookup[encoding] = codepage(number)
|