encoding-codepage 0.3 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -78,7 +78,8 @@ After installing this gem, you'll be able to access the following Code Pages fro
78
78
  CP936 => GB2312 # ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
79
79
  CP949 => KS_C_5601-1987 # ANSI/OEM Korean (Unified Hangul Code)
80
80
  CP950 => BIG5 # ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
81
- CP1200 => UTF-16 # Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications
81
+ CP1200 => UTF-16LE # Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications
82
+ CP1201 => UTF-16BE # Unicode UTF-16, big endian byte order; available only to managed applications
82
83
  CP1250 => WINDOWS-1250 # ANSI Central European; Central European (Windows)
83
84
  CP1251 => WINDOWS-1251 # ANSI Cyrillic; Cyrillic (Windows)
84
85
  CP1252 => WINDOWS-1252 # ANSI Latin 1; Western European (Windows)
@@ -88,7 +89,7 @@ After installing this gem, you'll be able to access the following Code Pages fro
88
89
  CP1256 => WINDOWS-1256 # ANSI Arabic; Arabic (Windows)
89
90
  CP1257 => WINDOWS-1257 # ANSI Baltic; Baltic (Windows)
90
91
  CP1258 => WINDOWS-1258 # ANSI/OEM Vietnamese; Vietnamese (Windows)
91
- CP12000 => UTF-32 # Unicode UTF-32, little endian byte order; available only to managed applications
92
+ CP12000 => UTF-32LE # Unicode UTF-32, little endian byte order; available only to managed applications
92
93
  CP12001 => UTF-32BE # Unicode UTF-32, big endian byte order; available only to managed applications
93
94
  CP20127 => US-ASCII # US-ASCII (7-bit)
94
95
  CP20866 => KOI8-R # Russian (KOI8-R); Cyrillic (KOI8-R)
@@ -105,14 +106,10 @@ After installing this gem, you'll be able to access the following Code Pages fro
105
106
  CP28599 => ISO-8859-9 # ISO 8859-9 Turkish
106
107
  CP28603 => ISO-8859-13 # ISO 8859-13 Estonian
107
108
  CP28605 => ISO-8859-15 # ISO 8859-15 Latin 9
108
- CP50220 => ISO-2022-JP # ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
109
- CP50221 => CSISO2022JP # ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
110
- CP50222 => ISO-2022-JP # ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)
111
109
  CP51932 => EUC-JP # EUC Japanese
112
110
  CP51936 => EUC-CN # EUC Simplified Chinese; Chinese Simplified (EUC)
113
111
  CP51949 => EUC-KR # EUC Korean
114
112
  CP54936 => GB18030 # Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)
115
- CP65000 => UTF-7 # Unicode (UTF-7)
116
113
  CP65001 => UTF-8 # Unicode (UTF-8)
117
114
 
118
115
  The following code pages are known not to be supported:
@@ -138,7 +135,6 @@ The following code pages are known not to be supported:
138
135
  CP1147 => IBM01147 # IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)
139
136
  CP1148 => IBM01148 # IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)
140
137
  CP1149 => IBM01149 # IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)
141
- CP1201 => UNICODEFFFE # Unicode UTF-16, big endian byte order; available only to managed applications
142
138
  CP1361 => JOHAB # Korean (Johab)
143
139
  CP10000 => MACINTOSH # MAC Roman; Western European (Mac)
144
140
  CP10001 => X-MAC-JAPANESE # Japanese (Mac)
@@ -191,6 +187,9 @@ The following code pages are known not to be supported:
191
187
  CP21027 => # (deprecated)
192
188
  CP29001 => X-EUROPA # Europa 3
193
189
  CP38598 => ISO-8859-8-I # ISO 8859-8 Hebrew; Hebrew (ISO-Logical)
190
+ CP50220 => ISO-2022-JP # ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
191
+ CP50221 => CSISO2022JP # ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
192
+ CP50222 => ISO-2022-JP # ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)
194
193
  CP50225 => ISO-2022-KR # ISO 2022 Korean
195
194
  CP50227 => X-CP50227 # ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)
196
195
  CP50229 => # ISO 2022 Traditional Chinese
@@ -213,6 +212,7 @@ The following code pages are known not to be supported:
213
212
  CP57009 => X-ISCII-MA # ISCII Malayalam
214
213
  CP57010 => X-ISCII-GU # ISCII Gujarati
215
214
  CP57011 => X-ISCII-PA # ISCII Punjabi
215
+ CP65000 => UTF-7 # Unicode (UTF-7)
216
216
 
217
217
  Original list from: http://msdn.microsoft.com/en-us/library/dd317756(VS.85).aspx
218
218
 
@@ -69,7 +69,7 @@
69
69
  10079 x-mac-icelandic Icelandic (Mac)
70
70
  10081 x-mac-turkish Turkish (Mac)
71
71
  10082 x-mac-croatian Croatian (Mac)
72
- 12000 utf-32 Unicode UTF-32, little endian byte order; available only to managed applications
72
+ 12000 utf-32LE Unicode UTF-32, little endian byte order; available only to managed applications
73
73
  12001 utf-32BE Unicode UTF-32, big endian byte order; available only to managed applications
74
74
  20000 x-Chinese_CNS CNS Taiwan; Chinese Traditional (CNS)
75
75
  20001 x-cp20001 TCA Taiwan
@@ -54,7 +54,7 @@ class Encoding
54
54
  number, original, comment = line.split("\t", 3)
55
55
  number = Integer(number, 10)
56
56
 
57
- if encoding = exist?(original.upcase)
57
+ if encoding = exist?(original.upcase) && !encoding.dummy?
58
58
  encoding.replicate "CP#{number}" unless codepage?(number)
59
59
 
60
60
  CodePage.reverse_lookup[encoding] = codepage(number)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: encoding-codepage
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.3'
4
+ version: '0.4'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: