iconv-tiny 1.2.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -12
- package/dist/iconv-tiny.d.mts +37 -26
- package/dist/iconv-tiny.min.mjs +191 -75
- package/dist/iconv-tiny.mjs +876 -504
- package/package.json +12 -11
package/README.md
CHANGED
|
@@ -10,8 +10,9 @@
|
|
|
10
10
|
|
|
11
11
|
[](https://github.com/vip-delete/iconv-tiny/actions/workflows/ci.yaml)
|
|
12
12
|
[](https://github.com/prettier/prettier)
|
|
13
|
+
[](https://www.npmjs.com/package/iconv-tiny)
|
|
13
14
|
|
|
14
|
-
Pure JS character encoding conversion as an ECMAScript Module (ESM). Auto-Generated from
|
|
15
|
+
Pure JS character encoding conversion as an ECMAScript Module (ESM). Auto-Generated from https://www.unicode.org/Public/MAPPINGS.
|
|
15
16
|
|
|
16
17
|
## About
|
|
17
18
|
|
|
@@ -26,9 +27,9 @@ It is an ES-module and should work in all modern browsers and NodeJS that suppor
|
|
|
26
27
|
1. Tree shaking and other ESM-related benefits.
|
|
27
28
|
1. Table mappings and default characters overrides.
|
|
28
29
|
1. Native `TextDecoder` for some encodings.
|
|
29
|
-
1. `encodeInto(Uint8Array)` and `byteLength(string)`
|
|
30
|
+
1. `encodeInto(Uint8Array)` and `byteLength(string)` functions
|
|
30
31
|
1. Typescript friendly.
|
|
31
|
-
1. Tiny: 1 file, ~
|
|
32
|
+
1. Tiny: 1 file with dbcs tables, ~30KB in gzip.
|
|
32
33
|
|
|
33
34
|
## Installation
|
|
34
35
|
|
|
@@ -42,7 +43,7 @@ or use CDN:
|
|
|
42
43
|
<script type="importmap">
|
|
43
44
|
{
|
|
44
45
|
"imports": {
|
|
45
|
-
"iconv-tiny": "https://unpkg.com/iconv-tiny@1.
|
|
46
|
+
"iconv-tiny": "https://unpkg.com/iconv-tiny@1.3.0/dist/iconv-tiny.mjs"
|
|
46
47
|
}
|
|
47
48
|
}
|
|
48
49
|
</script>
|
|
@@ -53,10 +54,10 @@ or use CDN:
|
|
|
53
54
|
API is very close to **iconv-lite** API, see [iconv-tiny.d.mts](dist/iconv-tiny.d.mts).
|
|
54
55
|
|
|
55
56
|
```javascript
|
|
56
|
-
import {
|
|
57
|
+
import { canonicalize, createIconv, encodings, aliases } from "iconv-tiny";
|
|
57
58
|
|
|
58
59
|
// Create iconv
|
|
59
|
-
const iconv =
|
|
60
|
+
const iconv = createIconv(encodings, aliases);
|
|
60
61
|
|
|
61
62
|
// Convert from an Uint8Array to a js string.
|
|
62
63
|
str = iconv.decode(new Uint8Array([0x68, 0x65, 0x6c, 0x6c, 0x6f]), "win1251");
|
|
@@ -101,18 +102,19 @@ const tail = decoder.decode();
|
|
|
101
102
|
|
|
102
103
|
See more [examples](examples).
|
|
103
104
|
|
|
104
|
-
## Supported encodings:
|
|
105
|
+
## Supported encodings (singlebyte & doublebyte encodings, Unicode):
|
|
105
106
|
|
|
106
107
|
1. **ISO-8859**: 1,2,3,4,5,6,7,8,9,10,11,13,14,15,16
|
|
107
|
-
1. **EBCDIC**: Cp037, Cp500, Cp875, Cp1026
|
|
108
|
+
1. **EBCDIC**: Cp037, Cp424, Cp500, Cp875, Cp1026
|
|
108
109
|
1. **DOS**: Cp437, Cp737, Cp775, Cp850, Cp852, Cp855, Cp857, Cp860, Cp861, Cp862, Cp863, Cp864, Cp865, Cp866, Cp869, Cp874
|
|
109
110
|
1. **WINDOWS**: Cp1250, Cp1251, Cp1252, Cp1253, Cp1254, Cp1255, Cp1256, Cp1257, Cp1258
|
|
110
111
|
1. **MAC**: CYRILLIC, GREEK, ICELAND, LATIN2, ROMAN, TURKISH
|
|
111
|
-
1. **MISC**: ATARIST,
|
|
112
|
-
1. **OTHER**: US-ASCII
|
|
112
|
+
1. **MISC**: ATARIST, CP856, Cp1006, KOI8-R, KOI8-U, KZ1048, NEXTSTEP
|
|
113
|
+
1. **OTHER**: [US-ASCII](https://en.wikipedia.org/wiki/ASCII)
|
|
113
114
|
1. **UNICODE**: UTF-8, UTF-16, UTF-32
|
|
115
|
+
1. **Japanese**: [JIS-0201](https://en.wikipedia.org/wiki/JIS_X_0201), [SHIFT-JIS](https://en.wikipedia.org/wiki/Shift_JIS), [CP932](https://en.wikipedia.org/wiki/Code_page_932_(Microsoft_Windows))
|
|
114
116
|
|
|
115
|
-
All encodings are generated automatically from
|
|
117
|
+
All encodings are generated automatically from https://www.unicode.org/Public/MAPPINGS with a few additional mappings for **CP875** (0xE1 -> 0x20AF, 0xEC -> 0x037A, 0xFC -> 0x20AC) and **CP1255** (0xCA -> 0x05BA), see [mappings](scripts/mappings/)
|
|
116
118
|
|
|
117
119
|
**iconv-tiny** output is identical to **iconv-lite** output, see [tests/regression.test.mjs](tests/regression.test.mjs).
|
|
118
120
|
|
|
@@ -121,7 +123,7 @@ All encodings are generated automatically from http://www.unicode.org/Public/MAP
|
|
|
121
123
|
Comparison with iconv-lite module (Core i7-7500U CPU @ 2.7GHz, Node v24.2.0). Note: your results may vary, so please always check on your hardware.
|
|
122
124
|
|
|
123
125
|
```
|
|
124
|
-
operation iconv-lite@0.
|
|
126
|
+
operation iconv-lite@0.7.0 iconv-tiny@1.3.0
|
|
125
127
|
------------------------------------------------------
|
|
126
128
|
encode('win1251') ~598 Mb/s ~622 Mb/s
|
|
127
129
|
decode('win1251') ~218 Mb/s ~263 Mb/s
|
package/dist/iconv-tiny.d.mts
CHANGED
|
@@ -1,14 +1,3 @@
|
|
|
1
|
-
export class IconvTiny {
|
|
2
|
-
/**
|
|
3
|
-
* @param encodings A map of encodings to support.
|
|
4
|
-
* @param aliases Comma-separated groups, each containing space-separated aliases for the same encoding.
|
|
5
|
-
*/
|
|
6
|
-
constructor(encodings?: { [key: string]: EncodingFactory }, aliases?: string);
|
|
7
|
-
decode(array: Uint8Array, encoding: string, options?: OptionsAndDecoderOptions): string;
|
|
8
|
-
encode(content: string, encoding: string, options?: OptionsAndEncoderOptions): Uint8Array;
|
|
9
|
-
getEncoding(encoding: string, options?: Options): Encoding;
|
|
10
|
-
}
|
|
11
|
-
|
|
12
1
|
/**
|
|
13
2
|
* Converts an encoding name to a normalized, unique name.
|
|
14
3
|
* Removes non-alphanumeric characters and leading zeros.
|
|
@@ -18,6 +7,18 @@ export class IconvTiny {
|
|
|
18
7
|
*/
|
|
19
8
|
export function canonicalize(encoding: string): string;
|
|
20
9
|
|
|
10
|
+
/**
|
|
11
|
+
* @param encodings - A map of encodings to support.
|
|
12
|
+
* @param aliases - Comma-separated groups, each containing space-separated aliases for the same encoding.
|
|
13
|
+
*/
|
|
14
|
+
export function createIconv(encodings?: { [key: string]: EncodingFactory }, aliases?: string): IconvTiny;
|
|
15
|
+
|
|
16
|
+
interface IconvTiny {
|
|
17
|
+
decode(array: Uint8Array, encoding: string, options?: OptionsAndDecoderOptions): string;
|
|
18
|
+
encode(content: string, encoding: string, options?: OptionsAndEncoderOptions): Uint8Array;
|
|
19
|
+
getEncoding(encoding: string, options?: Options): Encoding;
|
|
20
|
+
}
|
|
21
|
+
|
|
21
22
|
interface Encoding {
|
|
22
23
|
getName(): string;
|
|
23
24
|
decode(array: Uint8Array, options?: DecoderOptions): string;
|
|
@@ -36,13 +37,13 @@ interface CharsetDecoder {
|
|
|
36
37
|
|
|
37
38
|
interface CharsetEncoder {
|
|
38
39
|
encode(text?: string): Uint8Array;
|
|
39
|
-
encodeInto(
|
|
40
|
+
encodeInto(text: string, dst: Uint8Array): TextEncoderEncodeIntoResult;
|
|
40
41
|
/**
|
|
41
42
|
* Similar to Buffer.byteLength;
|
|
42
|
-
* @param
|
|
43
|
+
* @param text - input to calculate the length of
|
|
43
44
|
* @returns The number of bytes of the specified string
|
|
44
45
|
*/
|
|
45
|
-
byteLength(
|
|
46
|
+
byteLength(text: string): number;
|
|
46
47
|
}
|
|
47
48
|
|
|
48
49
|
type TextEncoderEncodeIntoResult = {
|
|
@@ -56,27 +57,27 @@ type DecoderOptions = {
|
|
|
56
57
|
*/
|
|
57
58
|
defaultCharUnicode?: string | DefaultCharUnicodeFunction;
|
|
58
59
|
/**
|
|
59
|
-
* Specifies the behavior of "decode" method (default: false)
|
|
60
|
+
* Specifies the behavior of the "decode" method (default: false)
|
|
60
61
|
*
|
|
61
|
-
* - true: use native TextDecoder
|
|
62
|
-
* - false: use
|
|
62
|
+
* - true: use the native TextDecoder when possible.
|
|
63
|
+
* - false: use a software-based decoder that relies on a mapping table.
|
|
64
|
+
*
|
|
65
|
+
* This option is ignored for Unicode, as it uses algorithmic rules rather than a mapping table.
|
|
63
66
|
*/
|
|
64
67
|
native?: boolean;
|
|
65
68
|
/**
|
|
66
|
-
*
|
|
67
|
-
* UTF-16, UTF-32: Use BOM, fallback to LE, unless overriden by defaultEncoding: 'UTF-16BE' or 'UTF-32BE';
|
|
69
|
+
* Unicode only. BOM is stripped by default, unless overridden by stripBOM: false
|
|
68
70
|
*/
|
|
69
71
|
stripBOM?: boolean;
|
|
70
72
|
};
|
|
71
73
|
|
|
72
74
|
type EncoderOptions = {
|
|
73
75
|
/**
|
|
74
|
-
* Sets the replacement byte used by the "encode" method for unmapped
|
|
76
|
+
* Sets the replacement byte used by the "encode" method for unmapped symbols (default: "?").
|
|
75
77
|
*/
|
|
76
78
|
defaultCharByte?: string | DefaultCharByteFunction;
|
|
77
79
|
/**
|
|
78
|
-
*
|
|
79
|
-
* UTF-16, UTF-32: Use LE and add BOM by default, unless overridden by addBOM: false
|
|
80
|
+
* Unicode only. No BOM added by default, unless overridden by addBOM: true
|
|
80
81
|
*/
|
|
81
82
|
addBOM?: boolean;
|
|
82
83
|
}
|
|
@@ -95,23 +96,31 @@ type Options = {
|
|
|
95
96
|
|
|
96
97
|
type Overrides = Array<number | string>;
|
|
97
98
|
|
|
99
|
+
/**
|
|
100
|
+
* @param {number} input
|
|
101
|
+
* @param {number} index
|
|
102
|
+
* @returns {number}
|
|
103
|
+
*/
|
|
104
|
+
type DefaultFunction = (input: number, index: number) => number | null | undefined;
|
|
105
|
+
|
|
98
106
|
/**
|
|
99
107
|
* @param {number} input - input character code (0-65536)
|
|
100
108
|
* @param {number} index - index of the character
|
|
101
109
|
* @returns {number} default byte (0-255)
|
|
102
110
|
*/
|
|
103
|
-
type DefaultCharByteFunction =
|
|
111
|
+
type DefaultCharByteFunction = DefaultFunction;
|
|
104
112
|
|
|
105
113
|
/**
|
|
106
114
|
* @param {number} input - input byte (0-255)
|
|
107
115
|
* @param {number} index - index of the byte
|
|
108
116
|
* @returns {number} default character code (0-65536)
|
|
109
117
|
*/
|
|
110
|
-
type DefaultCharUnicodeFunction =
|
|
118
|
+
type DefaultCharUnicodeFunction = DefaultFunction;
|
|
111
119
|
|
|
112
120
|
type OptionsAndDecoderOptions = Options & DecoderOptions;
|
|
113
121
|
type OptionsAndEncoderOptions = Options & EncoderOptions;
|
|
114
122
|
|
|
123
|
+
export const US_ASCII: EncodingFactory;
|
|
115
124
|
export const ISO_8859_1: EncodingFactory;
|
|
116
125
|
export const ISO_8859_2: EncodingFactory;
|
|
117
126
|
export const ISO_8859_3: EncodingFactory;
|
|
@@ -128,6 +137,7 @@ export const ISO_8859_14: EncodingFactory;
|
|
|
128
137
|
export const ISO_8859_15: EncodingFactory;
|
|
129
138
|
export const ISO_8859_16: EncodingFactory;
|
|
130
139
|
export const CP037: EncodingFactory;
|
|
140
|
+
export const CP424: EncodingFactory;
|
|
131
141
|
export const CP500: EncodingFactory;
|
|
132
142
|
export const CP875: EncodingFactory;
|
|
133
143
|
export const CP1026: EncodingFactory;
|
|
@@ -163,14 +173,15 @@ export const MAC_LATIN2: EncodingFactory;
|
|
|
163
173
|
export const MAC_ROMAN: EncodingFactory;
|
|
164
174
|
export const MAC_TURKISH: EncodingFactory;
|
|
165
175
|
export const ATARIST: EncodingFactory;
|
|
166
|
-
export const CP424: EncodingFactory;
|
|
167
176
|
export const CP856: EncodingFactory;
|
|
168
177
|
export const CP1006: EncodingFactory;
|
|
169
178
|
export const KOI8_R: EncodingFactory;
|
|
170
179
|
export const KOI8_U: EncodingFactory;
|
|
171
180
|
export const KZ1048: EncodingFactory;
|
|
172
181
|
export const NEXTSTEP: EncodingFactory;
|
|
173
|
-
export const
|
|
182
|
+
export const JIS_0201: EncodingFactory;
|
|
183
|
+
export const SHIFT_JIS: EncodingFactory;
|
|
184
|
+
export const CP932: EncodingFactory;
|
|
174
185
|
export const UTF8: EncodingFactory;
|
|
175
186
|
export const UTF16LE: EncodingFactory;
|
|
176
187
|
export const UTF16BE: EncodingFactory;
|