iconv-tiny 1.2.3 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -29
- package/dist/iconv-tiny.d.mts +70 -45
- package/dist/iconv-tiny.min.mjs +186 -66
- package/dist/iconv-tiny.mjs +1185 -536
- package/package.json +12 -11
package/README.md
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
[](https://github.com/prettier/prettier)
|
|
13
13
|
[](https://www.npmjs.com/package/iconv-tiny)
|
|
14
14
|
|
|
15
|
-
Pure JS character encoding conversion as an ECMAScript Module (ESM). Auto-Generated from
|
|
15
|
+
Pure JS character encoding conversion as an ECMAScript Module (ESM). Auto-Generated from https://www.unicode.org/Public/MAPPINGS.
|
|
16
16
|
|
|
17
17
|
## About
|
|
18
18
|
|
|
@@ -27,9 +27,9 @@ It is an ES-module and should work in all modern browsers and NodeJS that suppor
|
|
|
27
27
|
1. Tree shaking and other ESM-related benefits.
|
|
28
28
|
1. Table mappings and default characters overrides.
|
|
29
29
|
1. Native `TextDecoder` for some encodings.
|
|
30
|
-
1. `encodeInto(Uint8Array)` and `byteLength(string)`
|
|
30
|
+
1. `encodeInto(Uint8Array)` and `byteLength(string)` functions
|
|
31
31
|
1. Typescript friendly.
|
|
32
|
-
1. Tiny: 1 file, ~
|
|
32
|
+
1. Tiny: 1 file with dbcs tables, ~30KB in gzip.
|
|
33
33
|
|
|
34
34
|
## Installation
|
|
35
35
|
|
|
@@ -43,7 +43,7 @@ or use CDN:
|
|
|
43
43
|
<script type="importmap">
|
|
44
44
|
{
|
|
45
45
|
"imports": {
|
|
46
|
-
"iconv-tiny": "https://unpkg.com/iconv-tiny@1.
|
|
46
|
+
"iconv-tiny": "https://unpkg.com/iconv-tiny@1.4.0/dist/iconv-tiny.mjs"
|
|
47
47
|
}
|
|
48
48
|
}
|
|
49
49
|
</script>
|
|
@@ -54,10 +54,10 @@ or use CDN:
|
|
|
54
54
|
API is very close to **iconv-lite** API, see [iconv-tiny.d.mts](dist/iconv-tiny.d.mts).
|
|
55
55
|
|
|
56
56
|
```javascript
|
|
57
|
-
import {
|
|
57
|
+
import { canonicalize, createIconv, encodings, aliases } from "iconv-tiny";
|
|
58
58
|
|
|
59
59
|
// Create iconv
|
|
60
|
-
const iconv =
|
|
60
|
+
const iconv = createIconv(encodings, aliases);
|
|
61
61
|
|
|
62
62
|
// Convert from an Uint8Array to a js string.
|
|
63
63
|
str = iconv.decode(new Uint8Array([0x68, 0x65, 0x6c, 0x6c, 0x6f]), "win1251");
|
|
@@ -88,32 +88,33 @@ import { UTF16LE } from "iconv-tiny";
|
|
|
88
88
|
const utf16 = UTF16LE.create();
|
|
89
89
|
|
|
90
90
|
// Create decoder, it works like TextDecoder with {stream: true} option.
|
|
91
|
-
const decoder = utf16.
|
|
91
|
+
const decoder = utf16.getDecoder();
|
|
92
92
|
|
|
93
93
|
// Decode a fragment
|
|
94
|
-
const part = decoder.
|
|
94
|
+
const part = decoder.write(new Uint8Array([0x3d, 0xd8, 0x0a]));
|
|
95
95
|
|
|
96
96
|
// Decode the next fragment
|
|
97
|
-
const str = decoder.
|
|
97
|
+
const str = decoder.write(new Uint8Array([0xde])); // 😊
|
|
98
98
|
|
|
99
99
|
// Finish stream decoding
|
|
100
|
-
const tail = decoder.
|
|
100
|
+
const tail = decoder.end();
|
|
101
101
|
```
|
|
102
102
|
|
|
103
103
|
See more [examples](examples).
|
|
104
104
|
|
|
105
|
-
## Supported encodings:
|
|
105
|
+
## Supported encodings (singlebyte & doublebyte encodings, Unicode):
|
|
106
106
|
|
|
107
107
|
1. **ISO-8859**: 1,2,3,4,5,6,7,8,9,10,11,13,14,15,16
|
|
108
|
-
1. **EBCDIC**: Cp037, Cp500, Cp875, Cp1026
|
|
108
|
+
1. **EBCDIC**: Cp037, Cp424, Cp500, Cp875, Cp1026
|
|
109
109
|
1. **DOS**: Cp437, Cp737, Cp775, Cp850, Cp852, Cp855, Cp857, Cp860, Cp861, Cp862, Cp863, Cp864, Cp865, Cp866, Cp869, Cp874
|
|
110
110
|
1. **WINDOWS**: Cp1250, Cp1251, Cp1252, Cp1253, Cp1254, Cp1255, Cp1256, Cp1257, Cp1258
|
|
111
111
|
1. **MAC**: CYRILLIC, GREEK, ICELAND, LATIN2, ROMAN, TURKISH
|
|
112
|
-
1. **MISC**: ATARIST,
|
|
113
|
-
1. **OTHER**: US-ASCII
|
|
112
|
+
1. **MISC**: ATARIST, CP856, Cp1006, KOI8-R, KOI8-U, KZ1048, NEXTSTEP
|
|
113
|
+
1. **OTHER**: [US-ASCII](https://en.wikipedia.org/wiki/ASCII)
|
|
114
114
|
1. **UNICODE**: UTF-8, UTF-16, UTF-32
|
|
115
|
+
1. **Japanese**: [JIS-0201](https://en.wikipedia.org/wiki/JIS_X_0201), [SHIFT-JIS](https://en.wikipedia.org/wiki/Shift_JIS), [CP932](https://en.wikipedia.org/wiki/Code_page_932_(Microsoft_Windows))
|
|
115
116
|
|
|
116
|
-
All encodings are generated automatically from
|
|
117
|
+
All encodings are generated automatically from https://www.unicode.org/Public/MAPPINGS with a few additional mappings for **CP875** (0xE1 -> 0x20AF, 0xEC -> 0x037A, 0xFC -> 0x20AC) and **CP1255** (0xCA -> 0x05BA), see [mappings](scripts/mappings/)
|
|
117
118
|
|
|
118
119
|
**iconv-tiny** output is identical to **iconv-lite** output, see [tests/regression.test.mjs](tests/regression.test.mjs).
|
|
119
120
|
|
|
@@ -122,10 +123,10 @@ All encodings are generated automatically from http://www.unicode.org/Public/MAP
|
|
|
122
123
|
Comparison with iconv-lite module (Core i7-7500U CPU @ 2.7GHz, Node v24.2.0). Note: your results may vary, so please always check on your hardware.
|
|
123
124
|
|
|
124
125
|
```
|
|
125
|
-
operation iconv-lite@0.7.
|
|
126
|
+
operation iconv-lite@0.7.2 iconv-tiny@1.4.0
|
|
126
127
|
------------------------------------------------------
|
|
127
|
-
encode('win1251') ~
|
|
128
|
-
decode('win1251') ~
|
|
128
|
+
encode('win1251') ~270 Mb/s ~270 Mb/s
|
|
129
|
+
decode('win1251') ~120 Mb/s ~220 Mb/s
|
|
129
130
|
```
|
|
130
131
|
|
|
131
132
|
**iconv-lite** is NodeJS oriented and use specific API like `Buffer` and native NodeJS encodings.
|
|
@@ -140,7 +141,7 @@ decode('win1251') ~218 Mb/s ~263 Mb/s
|
|
|
140
141
|
1. UTF-16 is an alias of UTF-16LE
|
|
141
142
|
1. UTF-32 is an alias of UTF-32LE
|
|
142
143
|
|
|
143
|
-
## Testing
|
|
144
|
+
## Testing & Coverage
|
|
144
145
|
|
|
145
146
|
```
|
|
146
147
|
$ git clone https://github.com/vip-delete/iconv-tiny.git
|
|
@@ -153,16 +154,22 @@ $ node tests\perf-test-unicode.mjs
|
|
|
153
154
|
|
|
154
155
|
$ # To view test coverage:
|
|
155
156
|
$ npm run coverage
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
iconv
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
157
|
+
-------------|---------|----------|---------|---------|-------------------
|
|
158
|
+
File | % Stmts | % Branch | % Funcs | % Lines | Uncovered Line #s
|
|
159
|
+
-------------|---------|----------|---------|---------|-------------------
|
|
160
|
+
All files | 97.86 | 95.55 | 96.66 | 97.93 |
|
|
161
|
+
commons.mjs | 93.83 | 95 | 97.29 | 94.2 | 36-41,53-56
|
|
162
|
+
dbcs.mjs | 97.02 | 89.47 | 100 | 96.87 | 27,134,143
|
|
163
|
+
iconv.mjs | 100 | 100 | 100 | 100 |
|
|
164
|
+
mapped.mjs | 100 | 100 | 100 | 100 |
|
|
165
|
+
native.mjs | 100 | 100 | 100 | 100 |
|
|
166
|
+
sbcs.mjs | 100 | 90.9 | 100 | 100 | 36-66
|
|
167
|
+
types.mjs | 100 | 100 | 50 | 100 |
|
|
168
|
+
unicode.mjs | 100 | 100 | 100 | 100 |
|
|
169
|
+
utf16.mjs | 100 | 100 | 100 | 100 |
|
|
170
|
+
utf32.mjs | 98.76 | 97.5 | 100 | 98.76 | 224
|
|
171
|
+
utf8.mjs | 100 | 100 | 100 | 100 |
|
|
172
|
+
-------------|---------|----------|---------|---------|-------------------
|
|
166
173
|
```
|
|
167
174
|
|
|
168
175
|
## Commands
|
package/dist/iconv-tiny.d.mts
CHANGED
|
@@ -1,14 +1,3 @@
|
|
|
1
|
-
export class IconvTiny {
|
|
2
|
-
/**
|
|
3
|
-
* @param encodings A map of encodings to support.
|
|
4
|
-
* @param aliases Comma-separated groups, each containing space-separated aliases for the same encoding.
|
|
5
|
-
*/
|
|
6
|
-
constructor(encodings?: { [key: string]: EncodingFactory }, aliases?: string);
|
|
7
|
-
decode(array: Uint8Array, encoding: string, options?: OptionsAndDecoderOptions): string;
|
|
8
|
-
encode(content: string, encoding: string, options?: OptionsAndEncoderOptions): Uint8Array;
|
|
9
|
-
getEncoding(encoding: string, options?: Options): Encoding;
|
|
10
|
-
}
|
|
11
|
-
|
|
12
1
|
/**
|
|
13
2
|
* Converts an encoding name to a normalized, unique name.
|
|
14
3
|
* Removes non-alphanumeric characters and leading zeros.
|
|
@@ -18,31 +7,71 @@ export class IconvTiny {
|
|
|
18
7
|
*/
|
|
19
8
|
export function canonicalize(encoding: string): string;
|
|
20
9
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
newEncoder(options?: EncoderOptions): CharsetEncoder;
|
|
27
|
-
}
|
|
10
|
+
/**
|
|
11
|
+
* @param encodings - A map of encodings to support.
|
|
12
|
+
* @param aliases - Comma-separated groups, each containing space-separated aliases for the same encoding.
|
|
13
|
+
*/
|
|
14
|
+
export function createIconv(encodings?: { [key: string]: EncodingFactory }, aliases?: string): Iconv;
|
|
28
15
|
|
|
29
16
|
interface EncodingFactory {
|
|
30
17
|
create(options?: Options): Encoding;
|
|
31
18
|
}
|
|
32
19
|
|
|
33
|
-
interface
|
|
34
|
-
decode
|
|
20
|
+
interface Iconv {
|
|
21
|
+
/** get/create an encoding, create a decoder, decode and flush */
|
|
22
|
+
decode(buf: Uint8Array, encoding: string, options?: OptionsAndDecoderOptions): string;
|
|
23
|
+
/** get/create an encoding, create a encoder, encode and flush */
|
|
24
|
+
encode(str: string, encoding: string, options?: OptionsAndEncoderOptions): Uint8Array;
|
|
25
|
+
/** get/create an encoding */
|
|
26
|
+
getEncoding(encoding: string, options?: Options): Encoding;
|
|
35
27
|
}
|
|
36
28
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
29
|
+
/** Encoding that doesn't keep any state */
|
|
30
|
+
interface Encoding {
|
|
31
|
+
getName(): string;
|
|
32
|
+
|
|
33
|
+
/** get/create a decoder, decode and flush */
|
|
34
|
+
decode(buf: Uint8Array, options?: DecodeOptions): string;
|
|
35
|
+
/** get/create a encoder, encode and flush */
|
|
36
|
+
encode(str: string, options?: EncodeOptions): Uint8Array;
|
|
37
|
+
|
|
40
38
|
/**
|
|
41
|
-
* Similar to Buffer.byteLength
|
|
42
|
-
*
|
|
39
|
+
* Similar to Buffer.byteLength.
|
|
40
|
+
*
|
|
41
|
+
* @param str - input to calculate the length of
|
|
43
42
|
* @returns The number of bytes of the specified string
|
|
44
43
|
*/
|
|
45
|
-
byteLength(
|
|
44
|
+
byteLength(str: string): number;
|
|
45
|
+
|
|
46
|
+
// --- Low-level Stream APIs ---
|
|
47
|
+
|
|
48
|
+
/** create a decoder to keep the decoding state. */
|
|
49
|
+
getDecoder(options?: DecodeOptions): DecoderStream;
|
|
50
|
+
/** create an encoder to keep the encoding state. */
|
|
51
|
+
getEncoder(options?: EncodeOptions): EncoderStream;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Decoder to keep the decoding state */
|
|
55
|
+
interface DecoderStream {
|
|
56
|
+
/** decode, keep the leftover in the state */
|
|
57
|
+
write(buf: Uint8Array): string;
|
|
58
|
+
/** flush the leftover */
|
|
59
|
+
end(): string;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/** Encoder to keep the encoding state */
|
|
63
|
+
interface EncoderStream {
|
|
64
|
+
/** encode into a new array, keep the leftover in the state */
|
|
65
|
+
write(str: string): Uint8Array;
|
|
66
|
+
/** flush the leftover into a new array */
|
|
67
|
+
end(): Uint8Array;
|
|
68
|
+
|
|
69
|
+
// --- Low-Level Encoding APIs ---
|
|
70
|
+
|
|
71
|
+
/** encode into the given array, keep the leftover in the state */
|
|
72
|
+
encodeInto(str: string, buf: Uint8Array): TextEncoderEncodeIntoResult;
|
|
73
|
+
/** flush the leftover into the given array */
|
|
74
|
+
flushInto(buf: Uint8Array): TextEncoderEncodeIntoResult;
|
|
46
75
|
}
|
|
47
76
|
|
|
48
77
|
type TextEncoderEncodeIntoResult = {
|
|
@@ -50,11 +79,11 @@ type TextEncoderEncodeIntoResult = {
|
|
|
50
79
|
written: number;
|
|
51
80
|
}
|
|
52
81
|
|
|
53
|
-
type
|
|
82
|
+
type DecodeOptions = {
|
|
54
83
|
/**
|
|
55
84
|
* Sets the replacement character used by the "decode" method for unmapped bytes (default: "�").
|
|
56
85
|
*/
|
|
57
|
-
defaultCharUnicode?: string |
|
|
86
|
+
defaultCharUnicode?: string | DefaultFunction;
|
|
58
87
|
/**
|
|
59
88
|
* Specifies the behavior of the "decode" method (default: false)
|
|
60
89
|
*
|
|
@@ -70,11 +99,11 @@ type DecoderOptions = {
|
|
|
70
99
|
stripBOM?: boolean;
|
|
71
100
|
};
|
|
72
101
|
|
|
73
|
-
type
|
|
102
|
+
type EncodeOptions = {
|
|
74
103
|
/**
|
|
75
104
|
* Sets the replacement byte used by the "encode" method for unmapped symbols (default: "?").
|
|
76
105
|
*/
|
|
77
|
-
defaultCharByte?: string |
|
|
106
|
+
defaultCharByte?: string | DefaultFunction;
|
|
78
107
|
/**
|
|
79
108
|
* Unicode only. No BOM added by default, unless overridden by addBOM: true
|
|
80
109
|
*/
|
|
@@ -96,22 +125,16 @@ type Options = {
|
|
|
96
125
|
type Overrides = Array<number | string>;
|
|
97
126
|
|
|
98
127
|
/**
|
|
99
|
-
* @param {number} input - input character code (0-
|
|
100
|
-
* @param {number} index - index of the character
|
|
101
|
-
* @returns {number} default byte (0-255)
|
|
128
|
+
* @param {number} input - input character code (0-65535) if encoding; or an input byte (0-255) if decoding
|
|
129
|
+
* @param {number} index - index of the character if encoding; or an index of the byte if decoding
|
|
130
|
+
* @returns {number} default byte (0-255) if encoding; or a default character code (0-65535) if decoding
|
|
102
131
|
*/
|
|
103
|
-
type
|
|
132
|
+
type DefaultFunction = (input: number, index: number) => number | null | undefined;
|
|
104
133
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
* @param {number} index - index of the byte
|
|
108
|
-
* @returns {number} default character code (0-65536)
|
|
109
|
-
*/
|
|
110
|
-
type DefaultCharUnicodeFunction = (input: number, index: number) => number | null | undefined;
|
|
111
|
-
|
|
112
|
-
type OptionsAndDecoderOptions = Options & DecoderOptions;
|
|
113
|
-
type OptionsAndEncoderOptions = Options & EncoderOptions;
|
|
134
|
+
type OptionsAndDecoderOptions = Options & DecodeOptions;
|
|
135
|
+
type OptionsAndEncoderOptions = Options & EncodeOptions;
|
|
114
136
|
|
|
137
|
+
export const US_ASCII: EncodingFactory;
|
|
115
138
|
export const ISO_8859_1: EncodingFactory;
|
|
116
139
|
export const ISO_8859_2: EncodingFactory;
|
|
117
140
|
export const ISO_8859_3: EncodingFactory;
|
|
@@ -128,6 +151,7 @@ export const ISO_8859_14: EncodingFactory;
|
|
|
128
151
|
export const ISO_8859_15: EncodingFactory;
|
|
129
152
|
export const ISO_8859_16: EncodingFactory;
|
|
130
153
|
export const CP037: EncodingFactory;
|
|
154
|
+
export const CP424: EncodingFactory;
|
|
131
155
|
export const CP500: EncodingFactory;
|
|
132
156
|
export const CP875: EncodingFactory;
|
|
133
157
|
export const CP1026: EncodingFactory;
|
|
@@ -163,14 +187,15 @@ export const MAC_LATIN2: EncodingFactory;
|
|
|
163
187
|
export const MAC_ROMAN: EncodingFactory;
|
|
164
188
|
export const MAC_TURKISH: EncodingFactory;
|
|
165
189
|
export const ATARIST: EncodingFactory;
|
|
166
|
-
export const CP424: EncodingFactory;
|
|
167
190
|
export const CP856: EncodingFactory;
|
|
168
191
|
export const CP1006: EncodingFactory;
|
|
169
192
|
export const KOI8_R: EncodingFactory;
|
|
170
193
|
export const KOI8_U: EncodingFactory;
|
|
171
194
|
export const KZ1048: EncodingFactory;
|
|
172
195
|
export const NEXTSTEP: EncodingFactory;
|
|
173
|
-
export const
|
|
196
|
+
export const JIS_0201: EncodingFactory;
|
|
197
|
+
export const SHIFT_JIS: EncodingFactory;
|
|
198
|
+
export const CP932: EncodingFactory;
|
|
174
199
|
export const UTF8: EncodingFactory;
|
|
175
200
|
export const UTF16LE: EncodingFactory;
|
|
176
201
|
export const UTF16BE: EncodingFactory;
|