npm - unicode-escaper - Versions diffs - 1.0.0 - Mend

unicode-escaper 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,515 @@
+import { Transform, TransformOptions, TransformCallback } from 'node:stream';
+/**
+ * Supported escape format types
+ */
+type EscapeFormat = 'unicode' | 'unicode-es6' | 'hex' | 'html-hex' | 'html-decimal' | 'codepoint';
+/**
+ * Filter function to determine which characters should be escaped
+ * @param char - The character to check
+ * @param codePoint - The Unicode code point of the character
+ * @returns true if the character should be escaped, false otherwise
+ */
+type FilterFunction = (char: string, codePoint: number) => boolean;
+/**
+ * Options for the escape function
+ */
+interface EscapeOptions {
+    /**
+     * The escape format to use
+     * @default 'unicode'
+     */
+    format?: EscapeFormat;
+    /**
+     * Custom filter function to determine which characters to escape.
+     * If provided, this takes precedence over preserveAscii and preserveLatin1.
+     */
+    filter?: FilterFunction;
+    /**
+     * If true, ASCII characters (0x00-0x7F) will not be escaped
+     * @default true
+     */
+    preserveAscii?: boolean;
+    /**
+     * If true, Latin-1 characters (0x00-0xFF) will not be escaped.
+     * Only applies when preserveAscii is also true or undefined.
+     * @default false
+     */
+    preserveLatin1?: boolean;
+    /**
+     * If true, use uppercase hex digits (A-F), otherwise lowercase (a-f)
+     * @default true
+     */
+    uppercase?: boolean;
+}
+/**
+ * Options for the unescape function
+ */
+interface UnescapeOptions {
+    /**
+     * Specific formats to unescape. If not provided, all formats are attempted.
+     */
+    formats?: EscapeFormat[];
+    /**
+     * If true, invalid escape sequences will be left as-is instead of throwing
+     * @default true
+     */
+    lenient?: boolean;
+}
+/**
+ * Information about a Unicode character
+ */
+interface CharacterInfo {
+    /** The character itself */
+    char: string;
+    /** The Unicode code point */
+    codePoint: number;
+    /** Hexadecimal representation of the code point */
+    hex: string;
+    /** Whether the character is in the ASCII range (0x00-0x7F) */
+    isAscii: boolean;
+    /** Whether the character is in the BMP (0x0000-0xFFFF) */
+    isBmp: boolean;
+    /** Whether the character is in the Latin-1 range (0x00-0xFF) */
+    isLatin1: boolean;
+    /** Whether the character is a high surrogate (0xD800-0xDBFF) */
+    isHighSurrogate: boolean;
+    /** Whether the character is a low surrogate (0xDC00-0xDFFF) */
+    isLowSurrogate: boolean;
+    /** Length in UTF-16 code units */
+    utf16Length: number;
+}
+/**
+ * Result of escaping a string with detailed information
+ */
+interface EscapeResult {
+    /** The escaped string */
+    escaped: string;
+    /** Number of characters that were escaped */
+    escapedCount: number;
+    /** Number of characters that were preserved */
+    preservedCount: number;
+    /** Total number of characters processed */
+    totalCount: number;
+}
+/**
+ * Stream transformer options
+ */
+interface StreamOptions {
+    /** Options passed to escape/unescape */
+    escapeOptions?: EscapeOptions;
+    unescapeOptions?: UnescapeOptions;
+    /** High water mark for the stream buffer */
+    highWaterMark?: number;
+}
+/**
+ * Escapes Unicode characters in a string according to the specified options
+ *
+ * @param input - The string to escape
+ * @param options - Escape options
+ * @returns The escaped string
+ *
+ * @example
+ * ```ts
+ * escape('Hello 世界')
+ * // => 'Hello \\u4E16\\u754C'
+ *
+ * escape('Hello 世界', { format: 'unicode-es6' })
+ * // => 'Hello \\u{4E16}\\u{754C}'
+ *
+ * escape('Café', { preserveAscii: true })
+ * // => 'Caf\\u00E9'
+ * ```
+ */
+declare function escape(input: string, options?: EscapeOptions): string;
+/**
+ * Escapes Unicode characters and returns detailed information about the operation
+ *
+ * @param input - The string to escape
+ * @param options - Escape options
+ * @returns Object containing escaped string and statistics
+ */
+declare function escapeWithInfo(input: string, options?: EscapeOptions): EscapeResult;
+/**
+ * Convenience function: escape to \uXXXX format
+ */
+declare function escapeToUnicode(input: string, options?: Omit<EscapeOptions, 'format'>): string;
+/**
+ * Convenience function: escape to \u{XXXXX} ES6 format
+ */
+declare function escapeToUnicodeES6(input: string, options?: Omit<EscapeOptions, 'format'>): string;
+/**
+ * Convenience function: escape to \xNN format (falls back to \uXXXX for non-Latin1)
+ */
+declare function escapeToHex(input: string, options?: Omit<EscapeOptions, 'format'>): string;
+/**
+ * Convenience function: escape to &#xNNNN; HTML hex entity format
+ */
+declare function escapeToHtmlHex(input: string, options?: Omit<EscapeOptions, 'format'>): string;
+/**
+ * Convenience function: escape to &#NNNN; HTML decimal entity format
+ */
+declare function escapeToHtmlDecimal(input: string, options?: Omit<EscapeOptions, 'format'>): string;
+/**
+ * Convenience function: escape to U+XXXX code point format
+ */
+declare function escapeToCodePoint(input: string, options?: Omit<EscapeOptions, 'format'>): string;
+/**
+ * Escapes all characters in a string (ignores preserve options)
+ */
+declare function escapeAll(input: string, options?: Omit<EscapeOptions, 'filter' | 'preserveAscii' | 'preserveLatin1'>): string;
+/**
+ * Escapes only non-printable and control characters
+ */
+declare function escapeNonPrintable(input: string, options?: Omit<EscapeOptions, 'filter'>): string;
+/**
+ * Unescapes Unicode escape sequences in a string
+ *
+ * @param input - The string containing escape sequences
+ * @param options - Unescape options
+ * @returns The unescaped string
+ *
+ * @example
+ * ```ts
+ * unescape('Hello \\u4E16\\u754C')
+ * // => 'Hello 世界'
+ *
+ * unescape('Hello \\u{4E16}\\u{754C}')
+ * // => 'Hello 世界'
+ *
+ * unescape('Caf&#xE9;')
+ * // => 'Café'
+ * ```
+ */
+declare function unescape(input: string, options?: UnescapeOptions): string;
+/**
+ * Unescapes only \uXXXX format (with surrogate pair support)
+ */
+declare function unescapeUnicode(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
+/**
+ * Unescapes only \u{XXXXX} ES6 format
+ */
+declare function unescapeUnicodeES6(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
+/**
+ * Unescapes only \xNN format
+ */
+declare function unescapeHex(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
+/**
+ * Unescapes only &#xNNNN; HTML hex entity format
+ */
+declare function unescapeHtmlHex(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
+/**
+ * Unescapes only &#NNNN; HTML decimal entity format
+ */
+declare function unescapeHtmlDecimal(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
+/**
+ * Unescapes only U+XXXX code point format
+ */
+declare function unescapeCodePoint(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
+/**
+ * Unescapes all HTML entities (both hex and decimal)
+ */
+declare function unescapeHtml(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
+/**
+ * Unescapes all JavaScript escape formats (\uXXXX, \u{XXXXX}, \xNN)
+ */
+declare function unescapeJs(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
+/**
+ * Checks if a string contains any escape sequences
+ */
+declare function hasEscapeSequences(input: string, formats?: EscapeFormat[]): boolean;
+/**
+ * Counts the number of escape sequences in a string
+ */
+declare function countEscapeSequences(input: string, formats?: EscapeFormat[]): number;
+/**
+ * Returns true if the character is in the ASCII range (0x00-0x7F)
+ */
+declare const isAscii$1: FilterFunction;
+/**
+ * Returns true if the character is NOT in the ASCII range
+ */
+declare const isNotAscii: FilterFunction;
+/**
+ * Returns true if the character is in the Latin-1 range (0x00-0xFF)
+ */
+declare const isLatin1$1: FilterFunction;
+/**
+ * Returns true if the character is NOT in the Latin-1 range
+ */
+declare const isNotLatin1: FilterFunction;
+/**
+ * Returns true if the character is in the BMP (0x0000-0xFFFF)
+ */
+declare const isBmp$1: FilterFunction;
+/**
+ * Returns true if the character is NOT in the BMP (supplementary planes)
+ */
+declare const isNotBmp: FilterFunction;
+/**
+ * Returns true if the character is a high surrogate (0xD800-0xDBFF)
+ */
+declare const isHighSurrogate$1: FilterFunction;
+/**
+ * Returns true if the character is a low surrogate (0xDC00-0xDFFF)
+ */
+declare const isLowSurrogate$1: FilterFunction;
+/**
+ * Returns true if the character is any surrogate (0xD800-0xDFFF)
+ */
+declare const isSurrogate$1: FilterFunction;
+/**
+ * Returns true if the character is a printable ASCII character (0x20-0x7E)
+ */
+declare const isPrintableAscii: FilterFunction;
+/**
+ * Returns true if the character is NOT a printable ASCII character
+ */
+declare const isNotPrintableAscii: FilterFunction;
+/**
+ * Returns true if the character is a control character (0x00-0x1F or 0x7F)
+ */
+declare const isControl: FilterFunction;
+/**
+ * Returns true if the character is a whitespace character
+ */
+declare const isWhitespace: FilterFunction;
+/**
+ * Creates a filter that matches characters within a specific range
+ */
+declare function inRange(start: number, end: number): FilterFunction;
+/**
+ * Creates a filter that matches characters outside a specific range
+ */
+declare function notInRange(start: number, end: number): FilterFunction;
+/**
+ * Creates a filter that matches any of the specified characters
+ */
+declare function oneOf(chars: string): FilterFunction;
+/**
+ * Creates a filter that matches none of the specified characters
+ */
+declare function noneOf(chars: string): FilterFunction;
+/**
+ * Combines multiple filters with AND logic (all must return true)
+ */
+declare function and(...filters: FilterFunction[]): FilterFunction;
+/**
+ * Combines multiple filters with OR logic (any must return true)
+ */
+declare function or(...filters: FilterFunction[]): FilterFunction;
+/**
+ * Negates a filter
+ */
+declare function not(filter: FilterFunction): FilterFunction;
+/**
+ * Always returns true (escape all characters)
+ */
+declare const all: FilterFunction;
+/**
+ * Always returns false (escape no characters)
+ */
+declare const none: FilterFunction;
+/**
+ * Gets the Unicode code point of a character
+ *
+ * @param char - The character (can be a surrogate pair)
+ * @returns The code point, or undefined if invalid
+ */
+declare function getCodePoint(char: string): number | undefined;
+/**
+ * Creates a character from a code point
+ *
+ * @param codePoint - The Unicode code point
+ * @returns The character
+ * @throws If the code point is invalid
+ */
+declare function fromCodePoint(codePoint: number): string;
+/**
+ * Checks if a character is in the ASCII range (0x00-0x7F)
+ */
+declare function isAscii(char: string): boolean;
+/**
+ * Checks if a character is in the Latin-1 range (0x00-0xFF)
+ */
+declare function isLatin1(char: string): boolean;
+/**
+ * Checks if a character is in the BMP (0x0000-0xFFFF)
+ */
+declare function isBmp(char: string): boolean;
+/**
+ * Checks if a code point is a high surrogate (0xD800-0xDBFF)
+ */
+declare function isHighSurrogate(codePoint: number): boolean;
+/**
+ * Checks if a code point is a low surrogate (0xDC00-0xDFFF)
+ */
+declare function isLowSurrogate(codePoint: number): boolean;
+/**
+ * Checks if a code point is any surrogate (0xD800-0xDFFF)
+ */
+declare function isSurrogate(codePoint: number): boolean;
+/**
+ * Gets detailed information about a character
+ */
+declare function getCharInfo(char: string): CharacterInfo | undefined;
+/**
+ * Iterates over characters in a string, yielding code points
+ * (handles surrogate pairs correctly)
+ */
+declare function iterateCodePoints(input: string): Generator<{
+    char: string;
+    codePoint: number;
+    index: number;
+}>;
+/**
+ * Converts a string to an array of code points
+ */
+declare function toCodePoints(input: string): number[];
+/**
+ * Converts an array of code points to a string
+ */
+declare function fromCodePoints(codePoints: number[]): string;
+/**
+ * Gets the length of a string in code points (not UTF-16 code units)
+ */
+declare function codePointLength(input: string): number;
+/**
+ * Converts a code point to its hex representation with optional prefix
+ */
+declare function toHex(codePoint: number, options?: {
+    prefix?: string;
+    minLength?: number;
+    uppercase?: boolean;
+}): string;
+/**
+ * Parses a hex string (with or without prefix) to a code point
+ */
+declare function parseHex(hex: string): number | undefined;
+/**
+ * Validates if a string contains only valid Unicode characters
+ * (no unpaired surrogates)
+ */
+declare function isValidUnicode(input: string): boolean;
+/**
+ * Normalizes a string to NFC form
+ */
+declare function normalizeNFC(input: string): string;
+/**
+ * Normalizes a string to NFD form
+ */
+declare function normalizeNFD(input: string): string;
+/**
+ * Compares two strings for Unicode equivalence
+ */
+declare function unicodeEquals(a: string, b: string): boolean;
+/**
+ * Options for stream transformers
+ */
+interface StreamTransformOptions extends TransformOptions {
+    escapeOptions?: EscapeOptions;
+    unescapeOptions?: UnescapeOptions;
+}
+/**
+ * Transform stream that escapes Unicode characters
+ *
+ * @example
+ * ```ts
+ * import { createReadStream, createWriteStream } from 'fs';
+ * import { EscapeStream } from 'unicode-escaper';
+ *
+ * createReadStream('input.txt')
+ *   .pipe(new EscapeStream({ escapeOptions: { format: 'unicode-es6' } }))
+ *   .pipe(createWriteStream('output.txt'));
+ * ```
+ */
+declare class EscapeStream extends Transform {
+    private readonly escapeOptions;
+    private buffer;
+    constructor(options?: StreamTransformOptions);
+    _transform(chunk: Buffer | string, _encoding: BufferEncoding, callback: TransformCallback): void;
+    _flush(callback: TransformCallback): void;
+}
+/**
+ * Transform stream that unescapes Unicode sequences
+ *
+ * @example
+ * ```ts
+ * import { createReadStream, createWriteStream } from 'fs';
+ * import { UnescapeStream } from 'unicode-escaper';
+ *
+ * createReadStream('escaped.txt')
+ *   .pipe(new UnescapeStream())
+ *   .pipe(createWriteStream('output.txt'));
+ * ```
+ */
+declare class UnescapeStream extends Transform {
+    private readonly unescapeOptions;
+    private buffer;
+    private readonly maxLookback;
+    constructor(options?: StreamTransformOptions);
+    _transform(chunk: Buffer | string, _encoding: BufferEncoding, callback: TransformCallback): void;
+    _flush(callback: TransformCallback): void;
+}
+/**
+ * Creates an escape transform stream
+ *
+ * @param options - Stream and escape options
+ * @returns A transform stream that escapes Unicode characters
+ */
+declare function createEscapeStream(options?: StreamTransformOptions): EscapeStream;
+/**
+ * Creates an unescape transform stream
+ *
+ * @param options - Stream and unescape options
+ * @returns A transform stream that unescapes Unicode sequences
+ */
+declare function createUnescapeStream(options?: StreamTransformOptions): UnescapeStream;
+/**
+ * Web Streams API support (for browsers and modern Node.js)
+ */
+/**
+ * Creates a TransformStream for escaping (Web Streams API)
+ *
+ * @example
+ * ```ts
+ * const response = await fetch('data.txt');
+ * const escaped = response.body
+ *   .pipeThrough(new TextDecoderStream())
+ *   .pipeThrough(createWebEscapeStream())
+ *   .pipeThrough(new TextEncoderStream());
+ * ```
+ */
+declare function createWebEscapeStream(options?: EscapeOptions): TransformStream<string, string>;
+/**
+ * Creates a TransformStream for unescaping (Web Streams API)
+ */
+declare function createWebUnescapeStream(options?: UnescapeOptions): TransformStream<string, string>;
+/**
+ * Format escape functions - convert a code point to its escaped representation
+ */
+declare const formatters: Record<EscapeFormat, (codePoint: number, uppercase: boolean) => string>;
+/**
+ * Regular expressions to match each escape format
+ */
+declare const unescapePatterns: Record<EscapeFormat, RegExp>;
+/**
+ * Validates if a code point is valid Unicode
+ */
+declare function isValidCodePoint(codePoint: number): boolean;
+/**
+ * Validates if a code point is a valid surrogate
+ */
+declare function isSurrogateCodePoint(codePoint: number): boolean;
+/**
+ * Converts surrogate pair to code point
+ */
+declare function surrogateToCodePoint(high: number, low: number): number;
+export { type CharacterInfo, type EscapeFormat, type EscapeOptions, type EscapeResult, EscapeStream, type FilterFunction, type StreamOptions, type StreamTransformOptions, type UnescapeOptions, UnescapeStream, all, and, codePointLength, countEscapeSequences, createEscapeStream, createUnescapeStream, createWebEscapeStream, createWebUnescapeStream, escape, escapeAll, escapeNonPrintable, escapeToCodePoint, escapeToHex, escapeToHtmlDecimal, escapeToHtmlHex, escapeToUnicode, escapeToUnicodeES6, escapeWithInfo, formatters, fromCodePoint, fromCodePoints, getCharInfo, getCodePoint, hasEscapeSequences, inRange, isAscii$1 as isAscii, isAscii as isAsciiChar, isBmp$1 as isBmp, isBmp as isBmpChar, isControl, isHighSurrogate$1 as isHighSurrogate, isHighSurrogate as isHighSurrogateCode, isLatin1$1 as isLatin1, isLatin1 as isLatin1Char, isLowSurrogate$1 as isLowSurrogate, isLowSurrogate as isLowSurrogateCode, isNotAscii, isNotBmp, isNotLatin1, isNotPrintableAscii, isPrintableAscii, isSurrogate$1 as isSurrogate, isSurrogate as isSurrogateCode, isSurrogateCodePoint, isValidCodePoint, isValidUnicode, isWhitespace, iterateCodePoints, none, noneOf, normalizeNFC, normalizeNFD, not, notInRange, oneOf, or, parseHex, surrogateToCodePoint, toCodePoints, toHex, unescape, unescapeCodePoint, unescapeHex, unescapeHtml, unescapeHtmlDecimal, unescapeHtmlHex, unescapeJs, unescapePatterns, unescapeUnicode, unescapeUnicodeES6, unicodeEquals };