iconv-tiny 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,8 +10,9 @@
10
10
 
11
11
  [![ci](https://github.com/vip-delete/iconv-tiny/actions/workflows/ci.yaml/badge.svg)](https://github.com/vip-delete/iconv-tiny/actions/workflows/ci.yaml)
12
12
  [![Code Style: Prettier](https://img.shields.io/badge/code_style-prettier-ff69b4.svg)](https://github.com/prettier/prettier)
13
+ [![npm version](https://img.shields.io/npm/v/iconv-tiny)](https://www.npmjs.com/package/iconv-tiny)
13
14
 
14
- Pure JS character encoding conversion as an ECMAScript Module (ESM). Auto-Generated from http://www.unicode.org/Public/MAPPINGS.
15
+ Pure JS character encoding conversion as an ECMAScript Module (ESM). Auto-Generated from https://www.unicode.org/Public/MAPPINGS.
15
16
 
16
17
  ## About
17
18
 
@@ -26,9 +27,9 @@ It is an ES-module and should work in all modern browsers and NodeJS that suppor
26
27
  1. Tree shaking and other ESM-related benefits.
27
28
  1. Table mappings and default characters overrides.
28
29
  1. Native `TextDecoder` for some encodings.
29
- 1. `encodeInto(Uint8Array)` and `byteLength(string)`
30
+ 1. `encodeInto(Uint8Array)` and `byteLength(string)` functions
30
31
  1. Typescript friendly.
31
- 1. Tiny: 1 file, ~12KB in gzip.
32
+ 1. Tiny: 1 file with dbcs tables, ~30KB in gzip.
32
33
 
33
34
  ## Installation
34
35
 
@@ -42,7 +43,7 @@ or use CDN:
42
43
  <script type="importmap">
43
44
  {
44
45
  "imports": {
45
- "iconv-tiny": "https://unpkg.com/iconv-tiny@1.2.2/dist/iconv-tiny.mjs"
46
+ "iconv-tiny": "https://unpkg.com/iconv-tiny@1.3.0/dist/iconv-tiny.mjs"
46
47
  }
47
48
  }
48
49
  </script>
@@ -53,10 +54,10 @@ or use CDN:
53
54
  API is very close to **iconv-lite** API, see [iconv-tiny.d.mts](dist/iconv-tiny.d.mts).
54
55
 
55
56
  ```javascript
56
- import { IconvTiny, aliases, encodings, canonicalize } from "iconv-tiny";
57
+ import { canonicalize, createIconv, encodings, aliases } from "iconv-tiny";
57
58
 
58
59
  // Create iconv
59
- const iconv = new IconvTiny(encodings, aliases);
60
+ const iconv = createIconv(encodings, aliases);
60
61
 
61
62
  // Convert from an Uint8Array to a js string.
62
63
  str = iconv.decode(new Uint8Array([0x68, 0x65, 0x6c, 0x6c, 0x6f]), "win1251");
@@ -101,18 +102,19 @@ const tail = decoder.decode();
101
102
 
102
103
  See more [examples](examples).
103
104
 
104
- ## Supported encodings:
105
+ ## Supported encodings (singlebyte & doublebyte encodings, Unicode):
105
106
 
106
107
  1. **ISO-8859**: 1,2,3,4,5,6,7,8,9,10,11,13,14,15,16
107
- 1. **EBCDIC**: Cp037, Cp500, Cp875, Cp1026
108
+ 1. **EBCDIC**: Cp037, Cp424, Cp500, Cp875, Cp1026
108
109
  1. **DOS**: Cp437, Cp737, Cp775, Cp850, Cp852, Cp855, Cp857, Cp860, Cp861, Cp862, Cp863, Cp864, Cp865, Cp866, Cp869, Cp874
109
110
  1. **WINDOWS**: Cp1250, Cp1251, Cp1252, Cp1253, Cp1254, Cp1255, Cp1256, Cp1257, Cp1258
110
111
  1. **MAC**: CYRILLIC, GREEK, ICELAND, LATIN2, ROMAN, TURKISH
111
- 1. **MISC**: ATARIST, Cp424, CP856, Cp1006, KOI8-R, KOI8-U, KZ1048, NEXTSTEP
112
- 1. **OTHER**: US-ASCII
112
+ 1. **MISC**: ATARIST, CP856, Cp1006, KOI8-R, KOI8-U, KZ1048, NEXTSTEP
113
+ 1. **OTHER**: [US-ASCII](https://en.wikipedia.org/wiki/ASCII)
113
114
  1. **UNICODE**: UTF-8, UTF-16, UTF-32
115
+ 1. **Japanese**: [JIS-0201](https://en.wikipedia.org/wiki/JIS_X_0201), [SHIFT-JIS](https://en.wikipedia.org/wiki/Shift_JIS), [CP932](https://en.wikipedia.org/wiki/Code_page_932_(Microsoft_Windows))
114
116
 
115
- All encodings are generated automatically from http://www.unicode.org/Public/MAPPINGS with a few additional mappings for **CP875** (0xE1 -> 0x20AF, 0xEC -> 0x037A, 0xFC -> 0x20AC) and **CP1255** (0xCA -> 0x05BA), see [mappings](scripts/mappings/)
117
+ All encodings are generated automatically from https://www.unicode.org/Public/MAPPINGS with a few additional mappings for **CP875** (0xE1 -> 0x20AF, 0xEC -> 0x037A, 0xFC -> 0x20AC) and **CP1255** (0xCA -> 0x05BA), see [mappings](scripts/mappings/)
116
118
 
117
119
  **iconv-tiny** output is identical to **iconv-lite** output, see [tests/regression.test.mjs](tests/regression.test.mjs).
118
120
 
@@ -121,7 +123,7 @@ All encodings are generated automatically from http://www.unicode.org/Public/MAP
121
123
  Comparison with iconv-lite module (Core i7-7500U CPU @ 2.7GHz, Node v24.2.0). Note: your results may vary, so please always check on your hardware.
122
124
 
123
125
  ```
124
- operation iconv-lite@0.6.3 iconv-tiny@1.2.2
126
+ operation iconv-lite@0.7.0 iconv-tiny@1.3.0
125
127
  ------------------------------------------------------
126
128
  encode('win1251') ~598 Mb/s ~622 Mb/s
127
129
  decode('win1251') ~218 Mb/s ~263 Mb/s
@@ -1,14 +1,3 @@
1
- export class IconvTiny {
2
- /**
3
- * @param encodings A map of encodings to support.
4
- * @param aliases Comma-separated groups, each containing space-separated aliases for the same encoding.
5
- */
6
- constructor(encodings?: { [key: string]: EncodingFactory }, aliases?: string);
7
- decode(array: Uint8Array, encoding: string, options?: OptionsAndDecoderOptions): string;
8
- encode(content: string, encoding: string, options?: OptionsAndEncoderOptions): Uint8Array;
9
- getEncoding(encoding: string, options?: Options): Encoding;
10
- }
11
-
12
1
  /**
13
2
  * Converts an encoding name to a normalized, unique name.
14
3
  * Removes non-alphanumeric characters and leading zeros.
@@ -18,6 +7,18 @@ export class IconvTiny {
18
7
  */
19
8
  export function canonicalize(encoding: string): string;
20
9
 
10
+ /**
11
+ * @param encodings - A map of encodings to support.
12
+ * @param aliases - Comma-separated groups, each containing space-separated aliases for the same encoding.
13
+ */
14
+ export function createIconv(encodings?: { [key: string]: EncodingFactory }, aliases?: string): IconvTiny;
15
+
16
+ interface IconvTiny {
17
+ decode(array: Uint8Array, encoding: string, options?: OptionsAndDecoderOptions): string;
18
+ encode(content: string, encoding: string, options?: OptionsAndEncoderOptions): Uint8Array;
19
+ getEncoding(encoding: string, options?: Options): Encoding;
20
+ }
21
+
21
22
  interface Encoding {
22
23
  getName(): string;
23
24
  decode(array: Uint8Array, options?: DecoderOptions): string;
@@ -36,13 +37,13 @@ interface CharsetDecoder {
36
37
 
37
38
  interface CharsetEncoder {
38
39
  encode(text?: string): Uint8Array;
39
- encodeInto(src: string, dst: Uint8Array): TextEncoderEncodeIntoResult;
40
+ encodeInto(text: string, dst: Uint8Array): TextEncoderEncodeIntoResult;
40
41
  /**
41
42
  * Similar to Buffer.byteLength;
42
- * @param src input to calculate the length of
43
+ * @param text - input to calculate the length of
43
44
  * @returns The number of bytes of the specified string
44
45
  */
45
- byteLength(src: string): number;
46
+ byteLength(text: string): number;
46
47
  }
47
48
 
48
49
  type TextEncoderEncodeIntoResult = {
@@ -56,27 +57,27 @@ type DecoderOptions = {
56
57
  */
57
58
  defaultCharUnicode?: string | DefaultCharUnicodeFunction;
58
59
  /**
59
- * Specifies the behavior of "decode" method (default: false)
60
+ * Specifies the behavior of the "decode" method (default: false)
60
61
  *
61
- * - true: use native TextDecoder whenever possible
62
- * - false: use "software" decoding according to the mapping rules.
62
+ * - true: use the native TextDecoder when possible.
63
+ * - false: use a software-based decoder that relies on a mapping table.
64
+ *
65
+ * This option is ignored for Unicode, as it uses algorithmic rules rather than a mapping table.
63
66
  */
64
67
  native?: boolean;
65
68
  /**
66
- * UTF-16LE, UTF-16BE, UTF-32LE, UTF-32BE: BOM is stripped by default, unless overridden by stripBOM: false
67
- * UTF-16, UTF-32: Use BOM, fallback to LE, unless overriden by defaultEncoding: 'UTF-16BE' or 'UTF-32BE';
69
+ * Unicode only. BOM is stripped by default, unless overridden by stripBOM: false
68
70
  */
69
71
  stripBOM?: boolean;
70
72
  };
71
73
 
72
74
  type EncoderOptions = {
73
75
  /**
74
- * Sets the replacement byte used by the "encode" method for unmapped Unicode symbols (default: "?").
76
+ * Sets the replacement byte used by the "encode" method for unmapped symbols (default: "?").
75
77
  */
76
78
  defaultCharByte?: string | DefaultCharByteFunction;
77
79
  /**
78
- * UTF-16LE, UTF-16BE, UTF-32LE, UTF-32BE: No BOM added by default, unless overridden by addBOM: true
79
- * UTF-16, UTF-32: Use LE and add BOM by default, unless overridden by addBOM: false
80
+ * Unicode only. No BOM added by default, unless overridden by addBOM: true
80
81
  */
81
82
  addBOM?: boolean;
82
83
  }
@@ -95,23 +96,31 @@ type Options = {
95
96
 
96
97
  type Overrides = Array<number | string>;
97
98
 
99
+ /**
100
+ * @param {number} input
101
+ * @param {number} index
102
+ * @returns {number}
103
+ */
104
+ type DefaultFunction = (input: number, index: number) => number | null | undefined;
105
+
98
106
  /**
99
107
  * @param {number} input - input character code (0-65536)
100
108
  * @param {number} index - index of the character
101
109
  * @returns {number} default byte (0-255)
102
110
  */
103
- type DefaultCharByteFunction = (input: number, index: number) => number | null | undefined;
111
+ type DefaultCharByteFunction = DefaultFunction;
104
112
 
105
113
  /**
106
114
  * @param {number} input - input byte (0-255)
107
115
  * @param {number} index - index of the byte
108
116
  * @returns {number} default character code (0-65536)
109
117
  */
110
- type DefaultCharUnicodeFunction = (input: number, index: number) => number | null | undefined;
118
+ type DefaultCharUnicodeFunction = DefaultFunction;
111
119
 
112
120
  type OptionsAndDecoderOptions = Options & DecoderOptions;
113
121
  type OptionsAndEncoderOptions = Options & EncoderOptions;
114
122
 
123
+ export const US_ASCII: EncodingFactory;
115
124
  export const ISO_8859_1: EncodingFactory;
116
125
  export const ISO_8859_2: EncodingFactory;
117
126
  export const ISO_8859_3: EncodingFactory;
@@ -128,6 +137,7 @@ export const ISO_8859_14: EncodingFactory;
128
137
  export const ISO_8859_15: EncodingFactory;
129
138
  export const ISO_8859_16: EncodingFactory;
130
139
  export const CP037: EncodingFactory;
140
+ export const CP424: EncodingFactory;
131
141
  export const CP500: EncodingFactory;
132
142
  export const CP875: EncodingFactory;
133
143
  export const CP1026: EncodingFactory;
@@ -163,14 +173,15 @@ export const MAC_LATIN2: EncodingFactory;
163
173
  export const MAC_ROMAN: EncodingFactory;
164
174
  export const MAC_TURKISH: EncodingFactory;
165
175
  export const ATARIST: EncodingFactory;
166
- export const CP424: EncodingFactory;
167
176
  export const CP856: EncodingFactory;
168
177
  export const CP1006: EncodingFactory;
169
178
  export const KOI8_R: EncodingFactory;
170
179
  export const KOI8_U: EncodingFactory;
171
180
  export const KZ1048: EncodingFactory;
172
181
  export const NEXTSTEP: EncodingFactory;
173
- export const US_ASCII: EncodingFactory;
182
+ export const JIS_0201: EncodingFactory;
183
+ export const SHIFT_JIS: EncodingFactory;
184
+ export const CP932: EncodingFactory;
174
185
  export const UTF8: EncodingFactory;
175
186
  export const UTF16LE: EncodingFactory;
176
187
  export const UTF16BE: EncodingFactory;