npm - iconv-tiny - Versions diffs - 1.2.2 → 1.3.0 - Mend

iconv-tiny 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md +14 -12
package/dist/iconv-tiny.d.mts +37 -26
package/dist/iconv-tiny.min.mjs +191 -75
package/dist/iconv-tiny.mjs +876 -504
package/package.json +12 -11

package/README.md CHANGED Viewed

@@ -10,8 +10,9 @@
 [![ci](https://github.com/vip-delete/iconv-tiny/actions/workflows/ci.yaml/badge.svg)](https://github.com/vip-delete/iconv-tiny/actions/workflows/ci.yaml)
 [![Code Style: Prettier](https://img.shields.io/badge/code_style-prettier-ff69b4.svg)](https://github.com/prettier/prettier)
+[![npm version](https://img.shields.io/npm/v/iconv-tiny)](https://www.npmjs.com/package/iconv-tiny)
-Pure JS character encoding conversion as an ECMAScript Module (ESM). Auto-Generated from http://www.unicode.org/Public/MAPPINGS.
+Pure JS character encoding conversion as an ECMAScript Module (ESM). Auto-Generated from https://www.unicode.org/Public/MAPPINGS.
 ## About
@@ -26,9 +27,9 @@ It is an ES-module and should work in all modern browsers and NodeJS that suppor
 1. Tree shaking and other ESM-related benefits.
 1. Table mappings and default characters overrides.
 1. Native `TextDecoder` for some encodings.
-1. `encodeInto(Uint8Array)` and `byteLength(string)`
+1. `encodeInto(Uint8Array)` and `byteLength(string)` functions
 1. Typescript friendly.
-1. Tiny: 1 file, ~12KB in gzip.
+1. Tiny: 1 file with dbcs tables, ~30KB in gzip.
 ## Installation
@@ -42,7 +43,7 @@ or use CDN:
 <script type="importmap">
   {
     "imports": {
-      "iconv-tiny": "https://unpkg.com/iconv-tiny@1.2.2/dist/iconv-tiny.mjs"
+      "iconv-tiny": "https://unpkg.com/iconv-tiny@1.3.0/dist/iconv-tiny.mjs"
     }
   }
 </script>
@@ -53,10 +54,10 @@ or use CDN:
 API is very close to **iconv-lite** API, see [iconv-tiny.d.mts](dist/iconv-tiny.d.mts).
 ```javascript
-import { IconvTiny, aliases, encodings, canonicalize } from "iconv-tiny";
+import { canonicalize, createIconv, encodings, aliases } from "iconv-tiny";
 // Create iconv
-const iconv = new IconvTiny(encodings, aliases);
+const iconv = createIconv(encodings, aliases);
 // Convert from an Uint8Array to a js string.
 str = iconv.decode(new Uint8Array([0x68, 0x65, 0x6c, 0x6c, 0x6f]), "win1251");
@@ -101,18 +102,19 @@ const tail = decoder.decode();
 See more [examples](examples).
-## Supported encodings:
+## Supported encodings (singlebyte & doublebyte encodings, Unicode):
 1. **ISO-8859**: 1,2,3,4,5,6,7,8,9,10,11,13,14,15,16
-1. **EBCDIC**: Cp037, Cp500, Cp875, Cp1026
+1. **EBCDIC**: Cp037, Cp424, Cp500, Cp875, Cp1026
 1. **DOS**: Cp437, Cp737, Cp775, Cp850, Cp852, Cp855, Cp857, Cp860, Cp861, Cp862, Cp863, Cp864, Cp865, Cp866, Cp869, Cp874
 1. **WINDOWS**: Cp1250, Cp1251, Cp1252, Cp1253, Cp1254, Cp1255, Cp1256, Cp1257, Cp1258
 1. **MAC**: CYRILLIC, GREEK, ICELAND, LATIN2, ROMAN, TURKISH
-1. **MISC**: ATARIST, Cp424, CP856, Cp1006, KOI8-R, KOI8-U, KZ1048, NEXTSTEP
-1. **OTHER**: US-ASCII
+1. **MISC**: ATARIST, CP856, Cp1006, KOI8-R, KOI8-U, KZ1048, NEXTSTEP
+1. **OTHER**: [US-ASCII](https://en.wikipedia.org/wiki/ASCII)
 1. **UNICODE**: UTF-8, UTF-16, UTF-32
+1. **Japanese**: [JIS-0201](https://en.wikipedia.org/wiki/JIS_X_0201), [SHIFT-JIS](https://en.wikipedia.org/wiki/Shift_JIS), [CP932](https://en.wikipedia.org/wiki/Code_page_932_(Microsoft_Windows))
-All encodings are generated automatically from http://www.unicode.org/Public/MAPPINGS with a few additional mappings for **CP875** (0xE1 -> 0x20AF, 0xEC -> 0x037A, 0xFC -> 0x20AC) and **CP1255** (0xCA -> 0x05BA), see [mappings](scripts/mappings/)
+All encodings are generated automatically from https://www.unicode.org/Public/MAPPINGS with a few additional mappings for **CP875** (0xE1 -> 0x20AF, 0xEC -> 0x037A, 0xFC -> 0x20AC) and **CP1255** (0xCA -> 0x05BA), see [mappings](scripts/mappings/)
 **iconv-tiny** output is identical to **iconv-lite** output, see [tests/regression.test.mjs](tests/regression.test.mjs).
@@ -121,7 +123,7 @@ All encodings are generated automatically from http://www.unicode.org/Public/MAP
 Comparison with iconv-lite module (Core i7-7500U CPU @ 2.7GHz, Node v24.2.0). Note: your results may vary, so please always check on your hardware.
 ```
-operation          iconv-lite@0.6.3   iconv-tiny@1.2.2
+operation          iconv-lite@0.7.0   iconv-tiny@1.3.0
 ------------------------------------------------------
 encode('win1251')     ~598 Mb/s          ~622 Mb/s
 decode('win1251')     ~218 Mb/s          ~263 Mb/s

package/dist/iconv-tiny.d.mts CHANGED Viewed

@@ -1,14 +1,3 @@
-export class IconvTiny {
-  /**
-   * @param encodings A map of encodings to support.
-   * @param aliases Comma-separated groups, each containing space-separated aliases for the same encoding.
-   */
-  constructor(encodings?: { [key: string]: EncodingFactory }, aliases?: string);
-  decode(array: Uint8Array, encoding: string, options?: OptionsAndDecoderOptions): string;
-  encode(content: string, encoding: string, options?: OptionsAndEncoderOptions): Uint8Array;
-  getEncoding(encoding: string, options?: Options): Encoding;
-}
 /**
  * Converts an encoding name to a normalized, unique name.
  * Removes non-alphanumeric characters and leading zeros.
@@ -18,6 +7,18 @@ export class IconvTiny {
  */
 export function canonicalize(encoding: string): string;
+/**
+ * @param encodings - A map of encodings to support.
+ * @param aliases - Comma-separated groups, each containing space-separated aliases for the same encoding.
+ */
+export function createIconv(encodings?: { [key: string]: EncodingFactory }, aliases?: string): IconvTiny;
+interface IconvTiny {
+  decode(array: Uint8Array, encoding: string, options?: OptionsAndDecoderOptions): string;
+  encode(content: string, encoding: string, options?: OptionsAndEncoderOptions): Uint8Array;
+  getEncoding(encoding: string, options?: Options): Encoding;
+}
 interface Encoding {
   getName(): string;
   decode(array: Uint8Array, options?: DecoderOptions): string;
@@ -36,13 +37,13 @@ interface CharsetDecoder {
 interface CharsetEncoder {
   encode(text?: string): Uint8Array;
-  encodeInto(src: string, dst: Uint8Array): TextEncoderEncodeIntoResult;
+  encodeInto(text: string, dst: Uint8Array): TextEncoderEncodeIntoResult;
   /**
    * Similar to Buffer.byteLength;
-   * @param src input to calculate the length of
+   * @param text - input to calculate the length of
    * @returns The number of bytes of the specified string
    */
-  byteLength(src: string): number;
+  byteLength(text: string): number;
 }
 type TextEncoderEncodeIntoResult = {
@@ -56,27 +57,27 @@ type DecoderOptions = {
    */
   defaultCharUnicode?: string | DefaultCharUnicodeFunction;
   /**
-   * Specifies the behavior of "decode" method (default: false)
+   * Specifies the behavior of the "decode" method (default: false)
    *
-   * - true: use native TextDecoder whenever possible
-   * - false: use "software" decoding according to the mapping rules.
+   * - true: use the native TextDecoder when possible.
+   * - false: use a software-based decoder that relies on a mapping table.
+   *
+   * This option is ignored for Unicode, as it uses algorithmic rules rather than a mapping table.
    */
   native?: boolean;
   /**
-   * UTF-16LE, UTF-16BE, UTF-32LE, UTF-32BE: BOM is stripped by default, unless overridden by stripBOM: false
-   * UTF-16, UTF-32: Use BOM, fallback to LE, unless overriden by defaultEncoding: 'UTF-16BE' or 'UTF-32BE';
+   * Unicode only. BOM is stripped by default, unless overridden by stripBOM: false
    */
   stripBOM?: boolean;
 };
 type EncoderOptions = {
   /**
-   * Sets the replacement byte used by the "encode" method for unmapped Unicode symbols (default: "?").
+   * Sets the replacement byte used by the "encode" method for unmapped symbols (default: "?").
    */
   defaultCharByte?: string | DefaultCharByteFunction;
   /**
-   * UTF-16LE, UTF-16BE, UTF-32LE, UTF-32BE: No BOM added by default, unless overridden by addBOM: true
-   * UTF-16, UTF-32: Use LE and add BOM by default, unless overridden by addBOM: false
+   * Unicode only. No BOM added by default, unless overridden by addBOM: true
    */
   addBOM?: boolean;
 }
@@ -95,23 +96,31 @@ type Options = {
 type Overrides = Array<number | string>;
+/**
+ * @param {number} input
+ * @param {number} index
+ * @returns {number}
+ */
+type DefaultFunction = (input: number, index: number) => number | null | undefined;
 /**
  * @param {number} input - input character code (0-65536)
  * @param {number} index - index of the character
  * @returns {number} default byte (0-255)
  */
-type DefaultCharByteFunction = (input: number, index: number) => number | null | undefined;
+type DefaultCharByteFunction = DefaultFunction;
 /**
  * @param {number} input - input byte (0-255)
  * @param {number} index - index of the byte
  * @returns {number} default character code (0-65536)
  */
-type DefaultCharUnicodeFunction = (input: number, index: number) => number | null | undefined;
+type DefaultCharUnicodeFunction = DefaultFunction;
 type OptionsAndDecoderOptions = Options & DecoderOptions;
 type OptionsAndEncoderOptions = Options & EncoderOptions;
+export const US_ASCII: EncodingFactory;
 export const ISO_8859_1: EncodingFactory;
 export const ISO_8859_2: EncodingFactory;
 export const ISO_8859_3: EncodingFactory;
@@ -128,6 +137,7 @@ export const ISO_8859_14: EncodingFactory;
 export const ISO_8859_15: EncodingFactory;
 export const ISO_8859_16: EncodingFactory;
 export const CP037: EncodingFactory;
+export const CP424: EncodingFactory;
 export const CP500: EncodingFactory;
 export const CP875: EncodingFactory;
 export const CP1026: EncodingFactory;
@@ -163,14 +173,15 @@ export const MAC_LATIN2: EncodingFactory;
 export const MAC_ROMAN: EncodingFactory;
 export const MAC_TURKISH: EncodingFactory;
 export const ATARIST: EncodingFactory;
-export const CP424: EncodingFactory;
 export const CP856: EncodingFactory;
 export const CP1006: EncodingFactory;
 export const KOI8_R: EncodingFactory;
 export const KOI8_U: EncodingFactory;
 export const KZ1048: EncodingFactory;
 export const NEXTSTEP: EncodingFactory;
-export const US_ASCII: EncodingFactory;
+export const JIS_0201: EncodingFactory;
+export const SHIFT_JIS: EncodingFactory;
+export const CP932: EncodingFactory;
 export const UTF8: EncodingFactory;
 export const UTF16LE: EncodingFactory;
 export const UTF16BE: EncodingFactory;