iconv-tiny 1.2.3 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -12,7 +12,7 @@
12
12
  [![Code Style: Prettier](https://img.shields.io/badge/code_style-prettier-ff69b4.svg)](https://github.com/prettier/prettier)
13
13
  [![npm version](https://img.shields.io/npm/v/iconv-tiny)](https://www.npmjs.com/package/iconv-tiny)
14
14
 
15
- Pure JS character encoding conversion as an ECMAScript Module (ESM). Auto-Generated from http://www.unicode.org/Public/MAPPINGS.
15
+ Pure JS character encoding conversion as an ECMAScript Module (ESM). Auto-Generated from https://www.unicode.org/Public/MAPPINGS.
16
16
 
17
17
  ## About
18
18
 
@@ -27,9 +27,9 @@ It is an ES-module and should work in all modern browsers and NodeJS that suppor
27
27
  1. Tree shaking and other ESM-related benefits.
28
28
  1. Table mappings and default characters overrides.
29
29
  1. Native `TextDecoder` for some encodings.
30
- 1. `encodeInto(Uint8Array)` and `byteLength(string)`
30
+ 1. `encodeInto(Uint8Array)` and `byteLength(string)` functions
31
31
  1. Typescript friendly.
32
- 1. Tiny: 1 file, ~12KB in gzip.
32
+ 1. Tiny: 1 file with dbcs tables, ~30KB in gzip.
33
33
 
34
34
  ## Installation
35
35
 
@@ -43,7 +43,7 @@ or use CDN:
43
43
  <script type="importmap">
44
44
  {
45
45
  "imports": {
46
- "iconv-tiny": "https://unpkg.com/iconv-tiny@1.2.2/dist/iconv-tiny.mjs"
46
+ "iconv-tiny": "https://unpkg.com/iconv-tiny@1.4.0/dist/iconv-tiny.mjs"
47
47
  }
48
48
  }
49
49
  </script>
@@ -54,10 +54,10 @@ or use CDN:
54
54
  API is very close to **iconv-lite** API, see [iconv-tiny.d.mts](dist/iconv-tiny.d.mts).
55
55
 
56
56
  ```javascript
57
- import { IconvTiny, aliases, encodings, canonicalize } from "iconv-tiny";
57
+ import { canonicalize, createIconv, encodings, aliases } from "iconv-tiny";
58
58
 
59
59
  // Create iconv
60
- const iconv = new IconvTiny(encodings, aliases);
60
+ const iconv = createIconv(encodings, aliases);
61
61
 
62
62
  // Convert from an Uint8Array to a js string.
63
63
  str = iconv.decode(new Uint8Array([0x68, 0x65, 0x6c, 0x6c, 0x6f]), "win1251");
@@ -88,32 +88,33 @@ import { UTF16LE } from "iconv-tiny";
88
88
  const utf16 = UTF16LE.create();
89
89
 
90
90
  // Create decoder, it works like TextDecoder with {stream: true} option.
91
- const decoder = utf16.newDecoder();
91
+ const decoder = utf16.getDecoder();
92
92
 
93
93
  // Decode a fragment
94
- const part = decoder.decode(new Uint8Array([0x3d, 0xd8, 0x0a]));
94
+ const part = decoder.write(new Uint8Array([0x3d, 0xd8, 0x0a]));
95
95
 
96
96
  // Decode the next fragment
97
- const str = decoder.decode(new Uint8Array([0xde])); // 😊
97
+ const str = decoder.write(new Uint8Array([0xde])); // 😊
98
98
 
99
99
  // Finish stream decoding
100
- const tail = decoder.decode();
100
+ const tail = decoder.end();
101
101
  ```
102
102
 
103
103
  See more [examples](examples).
104
104
 
105
- ## Supported encodings:
105
+ ## Supported encodings (singlebyte & doublebyte encodings, Unicode):
106
106
 
107
107
  1. **ISO-8859**: 1,2,3,4,5,6,7,8,9,10,11,13,14,15,16
108
- 1. **EBCDIC**: Cp037, Cp500, Cp875, Cp1026
108
+ 1. **EBCDIC**: Cp037, Cp424, Cp500, Cp875, Cp1026
109
109
  1. **DOS**: Cp437, Cp737, Cp775, Cp850, Cp852, Cp855, Cp857, Cp860, Cp861, Cp862, Cp863, Cp864, Cp865, Cp866, Cp869, Cp874
110
110
  1. **WINDOWS**: Cp1250, Cp1251, Cp1252, Cp1253, Cp1254, Cp1255, Cp1256, Cp1257, Cp1258
111
111
  1. **MAC**: CYRILLIC, GREEK, ICELAND, LATIN2, ROMAN, TURKISH
112
- 1. **MISC**: ATARIST, Cp424, CP856, Cp1006, KOI8-R, KOI8-U, KZ1048, NEXTSTEP
113
- 1. **OTHER**: US-ASCII
112
+ 1. **MISC**: ATARIST, CP856, Cp1006, KOI8-R, KOI8-U, KZ1048, NEXTSTEP
113
+ 1. **OTHER**: [US-ASCII](https://en.wikipedia.org/wiki/ASCII)
114
114
  1. **UNICODE**: UTF-8, UTF-16, UTF-32
115
+ 1. **Japanese**: [JIS-0201](https://en.wikipedia.org/wiki/JIS_X_0201), [SHIFT-JIS](https://en.wikipedia.org/wiki/Shift_JIS), [CP932](https://en.wikipedia.org/wiki/Code_page_932_(Microsoft_Windows))
115
116
 
116
- All encodings are generated automatically from http://www.unicode.org/Public/MAPPINGS with a few additional mappings for **CP875** (0xE1 -> 0x20AF, 0xEC -> 0x037A, 0xFC -> 0x20AC) and **CP1255** (0xCA -> 0x05BA), see [mappings](scripts/mappings/)
117
+ All encodings are generated automatically from https://www.unicode.org/Public/MAPPINGS with a few additional mappings for **CP875** (0xE1 -> 0x20AF, 0xEC -> 0x037A, 0xFC -> 0x20AC) and **CP1255** (0xCA -> 0x05BA), see [mappings](scripts/mappings/)
117
118
 
118
119
  **iconv-tiny** output is identical to **iconv-lite** output, see [tests/regression.test.mjs](tests/regression.test.mjs).
119
120
 
@@ -122,10 +123,10 @@ All encodings are generated automatically from http://www.unicode.org/Public/MAP
122
123
  Comparison with iconv-lite module (Core i7-7500U CPU @ 2.7GHz, Node v24.2.0). Note: your results may vary, so please always check on your hardware.
123
124
 
124
125
  ```
125
- operation iconv-lite@0.7.0 iconv-tiny@1.2.3
126
+ operation iconv-lite@0.7.2 iconv-tiny@1.4.0
126
127
  ------------------------------------------------------
127
- encode('win1251') ~598 Mb/s ~622 Mb/s
128
- decode('win1251') ~218 Mb/s ~263 Mb/s
128
+ encode('win1251') ~270 Mb/s ~270 Mb/s
129
+ decode('win1251') ~120 Mb/s ~220 Mb/s
129
130
  ```
130
131
 
131
132
  **iconv-lite** is NodeJS oriented and use specific API like `Buffer` and native NodeJS encodings.
@@ -140,7 +141,7 @@ decode('win1251') ~218 Mb/s ~263 Mb/s
140
141
  1. UTF-16 is an alias of UTF-16LE
141
142
  1. UTF-32 is an alias of UTF-32LE
142
143
 
143
- ## Testing
144
+ ## Testing & Coverage
144
145
 
145
146
  ```
146
147
  $ git clone https://github.com/vip-delete/iconv-tiny.git
@@ -153,16 +154,22 @@ $ node tests\perf-test-unicode.mjs
153
154
 
154
155
  $ # To view test coverage:
155
156
  $ npm run coverage
156
-
157
- ----------------|---------|----------|---------|---------|-------------------
158
- File | % Stmts | % Branch | % Funcs | % Lines | Uncovered Line #s
159
- ----------------|---------|----------|---------|---------|-------------------
160
- All files | 100 | 100 | 96.72 | 100 |
161
- commons.mjs | 100 | 100 | 81.81 | 100 |
162
- iconv-tiny.mjs | 100 | 100 | 100 | 100 |
163
- sbcs.mjs | 100 | 100 | 100 | 100 |
164
- unicode.mjs | 100 | 100 | 100 | 100 |
165
- ----------------|---------|----------|---------|---------|-------------------
157
+ -------------|---------|----------|---------|---------|-------------------
158
+ File | % Stmts | % Branch | % Funcs | % Lines | Uncovered Line #s
159
+ -------------|---------|----------|---------|---------|-------------------
160
+ All files | 97.86 | 95.55 | 96.66 | 97.93 |
161
+ commons.mjs | 93.83 | 95 | 97.29 | 94.2 | 36-41,53-56
162
+ dbcs.mjs | 97.02 | 89.47 | 100 | 96.87 | 27,134,143
163
+ iconv.mjs | 100 | 100 | 100 | 100 |
164
+ mapped.mjs | 100 | 100 | 100 | 100 |
165
+ native.mjs | 100 | 100 | 100 | 100 |
166
+ sbcs.mjs | 100 | 90.9 | 100 | 100 | 36-66
167
+ types.mjs | 100 | 100 | 50 | 100 |
168
+ unicode.mjs | 100 | 100 | 100 | 100 |
169
+ utf16.mjs | 100 | 100 | 100 | 100 |
170
+ utf32.mjs | 98.76 | 97.5 | 100 | 98.76 | 224
171
+ utf8.mjs | 100 | 100 | 100 | 100 |
172
+ -------------|---------|----------|---------|---------|-------------------
166
173
  ```
167
174
 
168
175
  ## Commands
@@ -1,14 +1,3 @@
1
- export class IconvTiny {
2
- /**
3
- * @param encodings A map of encodings to support.
4
- * @param aliases Comma-separated groups, each containing space-separated aliases for the same encoding.
5
- */
6
- constructor(encodings?: { [key: string]: EncodingFactory }, aliases?: string);
7
- decode(array: Uint8Array, encoding: string, options?: OptionsAndDecoderOptions): string;
8
- encode(content: string, encoding: string, options?: OptionsAndEncoderOptions): Uint8Array;
9
- getEncoding(encoding: string, options?: Options): Encoding;
10
- }
11
-
12
1
  /**
13
2
  * Converts an encoding name to a normalized, unique name.
14
3
  * Removes non-alphanumeric characters and leading zeros.
@@ -18,31 +7,71 @@ export class IconvTiny {
18
7
  */
19
8
  export function canonicalize(encoding: string): string;
20
9
 
21
- interface Encoding {
22
- getName(): string;
23
- decode(array: Uint8Array, options?: DecoderOptions): string;
24
- encode(text: string, options?: EncoderOptions): Uint8Array;
25
- newDecoder(options?: DecoderOptions): CharsetDecoder;
26
- newEncoder(options?: EncoderOptions): CharsetEncoder;
27
- }
10
+ /**
11
+ * @param encodings - A map of encodings to support.
12
+ * @param aliases - Comma-separated groups, each containing space-separated aliases for the same encoding.
13
+ */
14
+ export function createIconv(encodings?: { [key: string]: EncodingFactory }, aliases?: string): Iconv;
28
15
 
29
16
  interface EncodingFactory {
30
17
  create(options?: Options): Encoding;
31
18
  }
32
19
 
33
- interface CharsetDecoder {
34
- decode(array?: Uint8Array): string;
20
+ interface Iconv {
21
+ /** get/create an encoding, create a decoder, decode and flush */
22
+ decode(buf: Uint8Array, encoding: string, options?: OptionsAndDecoderOptions): string;
23
+ /** get/create an encoding, create a encoder, encode and flush */
24
+ encode(str: string, encoding: string, options?: OptionsAndEncoderOptions): Uint8Array;
25
+ /** get/create an encoding */
26
+ getEncoding(encoding: string, options?: Options): Encoding;
35
27
  }
36
28
 
37
- interface CharsetEncoder {
38
- encode(text?: string): Uint8Array;
39
- encodeInto(src: string, dst: Uint8Array): TextEncoderEncodeIntoResult;
29
+ /** Encoding that doesn't keep any state */
30
+ interface Encoding {
31
+ getName(): string;
32
+
33
+ /** get/create a decoder, decode and flush */
34
+ decode(buf: Uint8Array, options?: DecodeOptions): string;
35
+ /** get/create a encoder, encode and flush */
36
+ encode(str: string, options?: EncodeOptions): Uint8Array;
37
+
40
38
  /**
41
- * Similar to Buffer.byteLength;
42
- * @param src input to calculate the length of
39
+ * Similar to Buffer.byteLength.
40
+ *
41
+ * @param str - input to calculate the length of
43
42
  * @returns The number of bytes of the specified string
44
43
  */
45
- byteLength(src: string): number;
44
+ byteLength(str: string): number;
45
+
46
+ // --- Low-level Stream APIs ---
47
+
48
+ /** create a decoder to keep the decoding state. */
49
+ getDecoder(options?: DecodeOptions): DecoderStream;
50
+ /** create an encoder to keep the encoding state. */
51
+ getEncoder(options?: EncodeOptions): EncoderStream;
52
+ }
53
+
54
+ /** Decoder to keep the decoding state */
55
+ interface DecoderStream {
56
+ /** decode, keep the leftover in the state */
57
+ write(buf: Uint8Array): string;
58
+ /** flush the leftover */
59
+ end(): string;
60
+ }
61
+
62
+ /** Encoder to keep the encoding state */
63
+ interface EncoderStream {
64
+ /** encode into a new array, keep the leftover in the state */
65
+ write(str: string): Uint8Array;
66
+ /** flush the leftover into a new array */
67
+ end(): Uint8Array;
68
+
69
+ // --- Low-Level Encoding APIs ---
70
+
71
+ /** encode into the given array, keep the leftover in the state */
72
+ encodeInto(str: string, buf: Uint8Array): TextEncoderEncodeIntoResult;
73
+ /** flush the leftover into the given array */
74
+ flushInto(buf: Uint8Array): TextEncoderEncodeIntoResult;
46
75
  }
47
76
 
48
77
  type TextEncoderEncodeIntoResult = {
@@ -50,11 +79,11 @@ type TextEncoderEncodeIntoResult = {
50
79
  written: number;
51
80
  }
52
81
 
53
- type DecoderOptions = {
82
+ type DecodeOptions = {
54
83
  /**
55
84
  * Sets the replacement character used by the "decode" method for unmapped bytes (default: "�").
56
85
  */
57
- defaultCharUnicode?: string | DefaultCharUnicodeFunction;
86
+ defaultCharUnicode?: string | DefaultFunction;
58
87
  /**
59
88
  * Specifies the behavior of the "decode" method (default: false)
60
89
  *
@@ -70,11 +99,11 @@ type DecoderOptions = {
70
99
  stripBOM?: boolean;
71
100
  };
72
101
 
73
- type EncoderOptions = {
102
+ type EncodeOptions = {
74
103
  /**
75
104
  * Sets the replacement byte used by the "encode" method for unmapped symbols (default: "?").
76
105
  */
77
- defaultCharByte?: string | DefaultCharByteFunction;
106
+ defaultCharByte?: string | DefaultFunction;
78
107
  /**
79
108
  * Unicode only. No BOM added by default, unless overridden by addBOM: true
80
109
  */
@@ -96,22 +125,16 @@ type Options = {
96
125
  type Overrides = Array<number | string>;
97
126
 
98
127
  /**
99
- * @param {number} input - input character code (0-65536)
100
- * @param {number} index - index of the character
101
- * @returns {number} default byte (0-255)
128
+ * @param {number} input - input character code (0-65535) if encoding; or an input byte (0-255) if decoding
129
+ * @param {number} index - index of the character if encoding; or an index of the byte if decoding
130
+ * @returns {number} default byte (0-255) if encoding; or a default character code (0-65535) if decoding
102
131
  */
103
- type DefaultCharByteFunction = (input: number, index: number) => number | null | undefined;
132
+ type DefaultFunction = (input: number, index: number) => number | null | undefined;
104
133
 
105
- /**
106
- * @param {number} input - input byte (0-255)
107
- * @param {number} index - index of the byte
108
- * @returns {number} default character code (0-65536)
109
- */
110
- type DefaultCharUnicodeFunction = (input: number, index: number) => number | null | undefined;
111
-
112
- type OptionsAndDecoderOptions = Options & DecoderOptions;
113
- type OptionsAndEncoderOptions = Options & EncoderOptions;
134
+ type OptionsAndDecoderOptions = Options & DecodeOptions;
135
+ type OptionsAndEncoderOptions = Options & EncodeOptions;
114
136
 
137
+ export const US_ASCII: EncodingFactory;
115
138
  export const ISO_8859_1: EncodingFactory;
116
139
  export const ISO_8859_2: EncodingFactory;
117
140
  export const ISO_8859_3: EncodingFactory;
@@ -128,6 +151,7 @@ export const ISO_8859_14: EncodingFactory;
128
151
  export const ISO_8859_15: EncodingFactory;
129
152
  export const ISO_8859_16: EncodingFactory;
130
153
  export const CP037: EncodingFactory;
154
+ export const CP424: EncodingFactory;
131
155
  export const CP500: EncodingFactory;
132
156
  export const CP875: EncodingFactory;
133
157
  export const CP1026: EncodingFactory;
@@ -163,14 +187,15 @@ export const MAC_LATIN2: EncodingFactory;
163
187
  export const MAC_ROMAN: EncodingFactory;
164
188
  export const MAC_TURKISH: EncodingFactory;
165
189
  export const ATARIST: EncodingFactory;
166
- export const CP424: EncodingFactory;
167
190
  export const CP856: EncodingFactory;
168
191
  export const CP1006: EncodingFactory;
169
192
  export const KOI8_R: EncodingFactory;
170
193
  export const KOI8_U: EncodingFactory;
171
194
  export const KZ1048: EncodingFactory;
172
195
  export const NEXTSTEP: EncodingFactory;
173
- export const US_ASCII: EncodingFactory;
196
+ export const JIS_0201: EncodingFactory;
197
+ export const SHIFT_JIS: EncodingFactory;
198
+ export const CP932: EncodingFactory;
174
199
  export const UTF8: EncodingFactory;
175
200
  export const UTF16LE: EncodingFactory;
176
201
  export const UTF16BE: EncodingFactory;