unicode-escaper 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,515 @@
1
+ import { Transform, TransformOptions, TransformCallback } from 'node:stream';
2
+
3
+ /**
4
+ * Supported escape format types
5
+ */
6
+ type EscapeFormat = 'unicode' | 'unicode-es6' | 'hex' | 'html-hex' | 'html-decimal' | 'codepoint';
7
+ /**
8
+ * Filter function to determine which characters should be escaped
9
+ * @param char - The character to check
10
+ * @param codePoint - The Unicode code point of the character
11
+ * @returns true if the character should be escaped, false otherwise
12
+ */
13
+ type FilterFunction = (char: string, codePoint: number) => boolean;
14
+ /**
15
+ * Options for the escape function
16
+ */
17
+ interface EscapeOptions {
18
+ /**
19
+ * The escape format to use
20
+ * @default 'unicode'
21
+ */
22
+ format?: EscapeFormat;
23
+ /**
24
+ * Custom filter function to determine which characters to escape.
25
+ * If provided, this takes precedence over preserveAscii and preserveLatin1.
26
+ */
27
+ filter?: FilterFunction;
28
+ /**
29
+ * If true, ASCII characters (0x00-0x7F) will not be escaped
30
+ * @default true
31
+ */
32
+ preserveAscii?: boolean;
33
+ /**
34
+ * If true, Latin-1 characters (0x00-0xFF) will not be escaped.
35
+ * Only applies when preserveAscii is also true or undefined.
36
+ * @default false
37
+ */
38
+ preserveLatin1?: boolean;
39
+ /**
40
+ * If true, use uppercase hex digits (A-F), otherwise lowercase (a-f)
41
+ * @default true
42
+ */
43
+ uppercase?: boolean;
44
+ }
45
+ /**
46
+ * Options for the unescape function
47
+ */
48
+ interface UnescapeOptions {
49
+ /**
50
+ * Specific formats to unescape. If not provided, all formats are attempted.
51
+ */
52
+ formats?: EscapeFormat[];
53
+ /**
54
+ * If true, invalid escape sequences will be left as-is instead of throwing
55
+ * @default true
56
+ */
57
+ lenient?: boolean;
58
+ }
59
+ /**
60
+ * Information about a Unicode character
61
+ */
62
+ interface CharacterInfo {
63
+ /** The character itself */
64
+ char: string;
65
+ /** The Unicode code point */
66
+ codePoint: number;
67
+ /** Hexadecimal representation of the code point */
68
+ hex: string;
69
+ /** Whether the character is in the ASCII range (0x00-0x7F) */
70
+ isAscii: boolean;
71
+ /** Whether the character is in the BMP (0x0000-0xFFFF) */
72
+ isBmp: boolean;
73
+ /** Whether the character is in the Latin-1 range (0x00-0xFF) */
74
+ isLatin1: boolean;
75
+ /** Whether the character is a high surrogate (0xD800-0xDBFF) */
76
+ isHighSurrogate: boolean;
77
+ /** Whether the character is a low surrogate (0xDC00-0xDFFF) */
78
+ isLowSurrogate: boolean;
79
+ /** Length in UTF-16 code units */
80
+ utf16Length: number;
81
+ }
82
+ /**
83
+ * Result of escaping a string with detailed information
84
+ */
85
+ interface EscapeResult {
86
+ /** The escaped string */
87
+ escaped: string;
88
+ /** Number of characters that were escaped */
89
+ escapedCount: number;
90
+ /** Number of characters that were preserved */
91
+ preservedCount: number;
92
+ /** Total number of characters processed */
93
+ totalCount: number;
94
+ }
95
+ /**
96
+ * Stream transformer options
97
+ */
98
+ interface StreamOptions {
99
+ /** Options passed to escape/unescape */
100
+ escapeOptions?: EscapeOptions;
101
+ unescapeOptions?: UnescapeOptions;
102
+ /** High water mark for the stream buffer */
103
+ highWaterMark?: number;
104
+ }
105
+
106
+ /**
107
+ * Escapes Unicode characters in a string according to the specified options
108
+ *
109
+ * @param input - The string to escape
110
+ * @param options - Escape options
111
+ * @returns The escaped string
112
+ *
113
+ * @example
114
+ * ```ts
115
+ * escape('Hello 世界')
116
+ * // => 'Hello \\u4E16\\u754C'
117
+ *
118
+ * escape('Hello 世界', { format: 'unicode-es6' })
119
+ * // => 'Hello \\u{4E16}\\u{754C}'
120
+ *
121
+ * escape('Café', { preserveAscii: true })
122
+ * // => 'Caf\\u00E9'
123
+ * ```
124
+ */
125
+ declare function escape(input: string, options?: EscapeOptions): string;
126
+ /**
127
+ * Escapes Unicode characters and returns detailed information about the operation
128
+ *
129
+ * @param input - The string to escape
130
+ * @param options - Escape options
131
+ * @returns Object containing escaped string and statistics
132
+ */
133
+ declare function escapeWithInfo(input: string, options?: EscapeOptions): EscapeResult;
134
+ /**
135
+ * Convenience function: escape to \uXXXX format
136
+ */
137
+ declare function escapeToUnicode(input: string, options?: Omit<EscapeOptions, 'format'>): string;
138
+ /**
139
+ * Convenience function: escape to \u{XXXXX} ES6 format
140
+ */
141
+ declare function escapeToUnicodeES6(input: string, options?: Omit<EscapeOptions, 'format'>): string;
142
+ /**
143
+ * Convenience function: escape to \xNN format (falls back to \uXXXX for non-Latin1)
144
+ */
145
+ declare function escapeToHex(input: string, options?: Omit<EscapeOptions, 'format'>): string;
146
+ /**
147
+ * Convenience function: escape to &#xNNNN; HTML hex entity format
148
+ */
149
+ declare function escapeToHtmlHex(input: string, options?: Omit<EscapeOptions, 'format'>): string;
150
+ /**
151
+ * Convenience function: escape to &#NNNN; HTML decimal entity format
152
+ */
153
+ declare function escapeToHtmlDecimal(input: string, options?: Omit<EscapeOptions, 'format'>): string;
154
+ /**
155
+ * Convenience function: escape to U+XXXX code point format
156
+ */
157
+ declare function escapeToCodePoint(input: string, options?: Omit<EscapeOptions, 'format'>): string;
158
+ /**
159
+ * Escapes all characters in a string (ignores preserve options)
160
+ */
161
+ declare function escapeAll(input: string, options?: Omit<EscapeOptions, 'filter' | 'preserveAscii' | 'preserveLatin1'>): string;
162
+ /**
163
+ * Escapes only non-printable and control characters
164
+ */
165
+ declare function escapeNonPrintable(input: string, options?: Omit<EscapeOptions, 'filter'>): string;
166
+
167
+ /**
168
+ * Unescapes Unicode escape sequences in a string
169
+ *
170
+ * @param input - The string containing escape sequences
171
+ * @param options - Unescape options
172
+ * @returns The unescaped string
173
+ *
174
+ * @example
175
+ * ```ts
176
+ * unescape('Hello \\u4E16\\u754C')
177
+ * // => 'Hello 世界'
178
+ *
179
+ * unescape('Hello \\u{4E16}\\u{754C}')
180
+ * // => 'Hello 世界'
181
+ *
182
+ * unescape('Caf&#xE9;')
183
+ * // => 'Café'
184
+ * ```
185
+ */
186
+ declare function unescape(input: string, options?: UnescapeOptions): string;
187
+ /**
188
+ * Unescapes only \uXXXX format (with surrogate pair support)
189
+ */
190
+ declare function unescapeUnicode(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
191
+ /**
192
+ * Unescapes only \u{XXXXX} ES6 format
193
+ */
194
+ declare function unescapeUnicodeES6(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
195
+ /**
196
+ * Unescapes only \xNN format
197
+ */
198
+ declare function unescapeHex(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
199
+ /**
200
+ * Unescapes only &#xNNNN; HTML hex entity format
201
+ */
202
+ declare function unescapeHtmlHex(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
203
+ /**
204
+ * Unescapes only &#NNNN; HTML decimal entity format
205
+ */
206
+ declare function unescapeHtmlDecimal(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
207
+ /**
208
+ * Unescapes only U+XXXX code point format
209
+ */
210
+ declare function unescapeCodePoint(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
211
+ /**
212
+ * Unescapes all HTML entities (both hex and decimal)
213
+ */
214
+ declare function unescapeHtml(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
215
+ /**
216
+ * Unescapes all JavaScript escape formats (\uXXXX, \u{XXXXX}, \xNN)
217
+ */
218
+ declare function unescapeJs(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
219
+ /**
220
+ * Checks if a string contains any escape sequences
221
+ */
222
+ declare function hasEscapeSequences(input: string, formats?: EscapeFormat[]): boolean;
223
+ /**
224
+ * Counts the number of escape sequences in a string
225
+ */
226
+ declare function countEscapeSequences(input: string, formats?: EscapeFormat[]): number;
227
+
228
+ /**
229
+ * Returns true if the character is in the ASCII range (0x00-0x7F)
230
+ */
231
+ declare const isAscii$1: FilterFunction;
232
+ /**
233
+ * Returns true if the character is NOT in the ASCII range
234
+ */
235
+ declare const isNotAscii: FilterFunction;
236
+ /**
237
+ * Returns true if the character is in the Latin-1 range (0x00-0xFF)
238
+ */
239
+ declare const isLatin1$1: FilterFunction;
240
+ /**
241
+ * Returns true if the character is NOT in the Latin-1 range
242
+ */
243
+ declare const isNotLatin1: FilterFunction;
244
+ /**
245
+ * Returns true if the character is in the BMP (0x0000-0xFFFF)
246
+ */
247
+ declare const isBmp$1: FilterFunction;
248
+ /**
249
+ * Returns true if the character is NOT in the BMP (supplementary planes)
250
+ */
251
+ declare const isNotBmp: FilterFunction;
252
+ /**
253
+ * Returns true if the character is a high surrogate (0xD800-0xDBFF)
254
+ */
255
+ declare const isHighSurrogate$1: FilterFunction;
256
+ /**
257
+ * Returns true if the character is a low surrogate (0xDC00-0xDFFF)
258
+ */
259
+ declare const isLowSurrogate$1: FilterFunction;
260
+ /**
261
+ * Returns true if the character is any surrogate (0xD800-0xDFFF)
262
+ */
263
+ declare const isSurrogate$1: FilterFunction;
264
+ /**
265
+ * Returns true if the character is a printable ASCII character (0x20-0x7E)
266
+ */
267
+ declare const isPrintableAscii: FilterFunction;
268
+ /**
269
+ * Returns true if the character is NOT a printable ASCII character
270
+ */
271
+ declare const isNotPrintableAscii: FilterFunction;
272
+ /**
273
+ * Returns true if the character is a control character (0x00-0x1F or 0x7F)
274
+ */
275
+ declare const isControl: FilterFunction;
276
+ /**
277
+ * Returns true if the character is a whitespace character
278
+ */
279
+ declare const isWhitespace: FilterFunction;
280
+ /**
281
+ * Creates a filter that matches characters within a specific range
282
+ */
283
+ declare function inRange(start: number, end: number): FilterFunction;
284
+ /**
285
+ * Creates a filter that matches characters outside a specific range
286
+ */
287
+ declare function notInRange(start: number, end: number): FilterFunction;
288
+ /**
289
+ * Creates a filter that matches any of the specified characters
290
+ */
291
+ declare function oneOf(chars: string): FilterFunction;
292
+ /**
293
+ * Creates a filter that matches none of the specified characters
294
+ */
295
+ declare function noneOf(chars: string): FilterFunction;
296
+ /**
297
+ * Combines multiple filters with AND logic (all must return true)
298
+ */
299
+ declare function and(...filters: FilterFunction[]): FilterFunction;
300
+ /**
301
+ * Combines multiple filters with OR logic (any must return true)
302
+ */
303
+ declare function or(...filters: FilterFunction[]): FilterFunction;
304
+ /**
305
+ * Negates a filter
306
+ */
307
+ declare function not(filter: FilterFunction): FilterFunction;
308
+ /**
309
+ * Always returns true (escape all characters)
310
+ */
311
+ declare const all: FilterFunction;
312
+ /**
313
+ * Always returns false (escape no characters)
314
+ */
315
+ declare const none: FilterFunction;
316
+
317
+ /**
318
+ * Gets the Unicode code point of a character
319
+ *
320
+ * @param char - The character (can be a surrogate pair)
321
+ * @returns The code point, or undefined if invalid
322
+ */
323
+ declare function getCodePoint(char: string): number | undefined;
324
+ /**
325
+ * Creates a character from a code point
326
+ *
327
+ * @param codePoint - The Unicode code point
328
+ * @returns The character
329
+ * @throws If the code point is invalid
330
+ */
331
+ declare function fromCodePoint(codePoint: number): string;
332
+ /**
333
+ * Checks if a character is in the ASCII range (0x00-0x7F)
334
+ */
335
+ declare function isAscii(char: string): boolean;
336
+ /**
337
+ * Checks if a character is in the Latin-1 range (0x00-0xFF)
338
+ */
339
+ declare function isLatin1(char: string): boolean;
340
+ /**
341
+ * Checks if a character is in the BMP (0x0000-0xFFFF)
342
+ */
343
+ declare function isBmp(char: string): boolean;
344
+ /**
345
+ * Checks if a code point is a high surrogate (0xD800-0xDBFF)
346
+ */
347
+ declare function isHighSurrogate(codePoint: number): boolean;
348
+ /**
349
+ * Checks if a code point is a low surrogate (0xDC00-0xDFFF)
350
+ */
351
+ declare function isLowSurrogate(codePoint: number): boolean;
352
+ /**
353
+ * Checks if a code point is any surrogate (0xD800-0xDFFF)
354
+ */
355
+ declare function isSurrogate(codePoint: number): boolean;
356
+ /**
357
+ * Gets detailed information about a character
358
+ */
359
+ declare function getCharInfo(char: string): CharacterInfo | undefined;
360
+ /**
361
+ * Iterates over characters in a string, yielding code points
362
+ * (handles surrogate pairs correctly)
363
+ */
364
+ declare function iterateCodePoints(input: string): Generator<{
365
+ char: string;
366
+ codePoint: number;
367
+ index: number;
368
+ }>;
369
+ /**
370
+ * Converts a string to an array of code points
371
+ */
372
+ declare function toCodePoints(input: string): number[];
373
+ /**
374
+ * Converts an array of code points to a string
375
+ */
376
+ declare function fromCodePoints(codePoints: number[]): string;
377
+ /**
378
+ * Gets the length of a string in code points (not UTF-16 code units)
379
+ */
380
+ declare function codePointLength(input: string): number;
381
+ /**
382
+ * Converts a code point to its hex representation with optional prefix
383
+ */
384
+ declare function toHex(codePoint: number, options?: {
385
+ prefix?: string;
386
+ minLength?: number;
387
+ uppercase?: boolean;
388
+ }): string;
389
+ /**
390
+ * Parses a hex string (with or without prefix) to a code point
391
+ */
392
+ declare function parseHex(hex: string): number | undefined;
393
+ /**
394
+ * Validates if a string contains only valid Unicode characters
395
+ * (no unpaired surrogates)
396
+ */
397
+ declare function isValidUnicode(input: string): boolean;
398
+ /**
399
+ * Normalizes a string to NFC form
400
+ */
401
+ declare function normalizeNFC(input: string): string;
402
+ /**
403
+ * Normalizes a string to NFD form
404
+ */
405
+ declare function normalizeNFD(input: string): string;
406
+ /**
407
+ * Compares two strings for Unicode equivalence
408
+ */
409
+ declare function unicodeEquals(a: string, b: string): boolean;
410
+
411
+ /**
412
+ * Options for stream transformers
413
+ */
414
+ interface StreamTransformOptions extends TransformOptions {
415
+ escapeOptions?: EscapeOptions;
416
+ unescapeOptions?: UnescapeOptions;
417
+ }
418
+ /**
419
+ * Transform stream that escapes Unicode characters
420
+ *
421
+ * @example
422
+ * ```ts
423
+ * import { createReadStream, createWriteStream } from 'fs';
424
+ * import { EscapeStream } from 'unicode-escaper';
425
+ *
426
+ * createReadStream('input.txt')
427
+ * .pipe(new EscapeStream({ escapeOptions: { format: 'unicode-es6' } }))
428
+ * .pipe(createWriteStream('output.txt'));
429
+ * ```
430
+ */
431
+ declare class EscapeStream extends Transform {
432
+ private readonly escapeOptions;
433
+ private buffer;
434
+ constructor(options?: StreamTransformOptions);
435
+ _transform(chunk: Buffer | string, _encoding: BufferEncoding, callback: TransformCallback): void;
436
+ _flush(callback: TransformCallback): void;
437
+ }
438
+ /**
439
+ * Transform stream that unescapes Unicode sequences
440
+ *
441
+ * @example
442
+ * ```ts
443
+ * import { createReadStream, createWriteStream } from 'fs';
444
+ * import { UnescapeStream } from 'unicode-escaper';
445
+ *
446
+ * createReadStream('escaped.txt')
447
+ * .pipe(new UnescapeStream())
448
+ * .pipe(createWriteStream('output.txt'));
449
+ * ```
450
+ */
451
+ declare class UnescapeStream extends Transform {
452
+ private readonly unescapeOptions;
453
+ private buffer;
454
+ private readonly maxLookback;
455
+ constructor(options?: StreamTransformOptions);
456
+ _transform(chunk: Buffer | string, _encoding: BufferEncoding, callback: TransformCallback): void;
457
+ _flush(callback: TransformCallback): void;
458
+ }
459
+ /**
460
+ * Creates an escape transform stream
461
+ *
462
+ * @param options - Stream and escape options
463
+ * @returns A transform stream that escapes Unicode characters
464
+ */
465
+ declare function createEscapeStream(options?: StreamTransformOptions): EscapeStream;
466
+ /**
467
+ * Creates an unescape transform stream
468
+ *
469
+ * @param options - Stream and unescape options
470
+ * @returns A transform stream that unescapes Unicode sequences
471
+ */
472
+ declare function createUnescapeStream(options?: StreamTransformOptions): UnescapeStream;
473
+ /**
474
+ * Web Streams API support (for browsers and modern Node.js)
475
+ */
476
+ /**
477
+ * Creates a TransformStream for escaping (Web Streams API)
478
+ *
479
+ * @example
480
+ * ```ts
481
+ * const response = await fetch('data.txt');
482
+ * const escaped = response.body
483
+ * .pipeThrough(new TextDecoderStream())
484
+ * .pipeThrough(createWebEscapeStream())
485
+ * .pipeThrough(new TextEncoderStream());
486
+ * ```
487
+ */
488
+ declare function createWebEscapeStream(options?: EscapeOptions): TransformStream<string, string>;
489
+ /**
490
+ * Creates a TransformStream for unescaping (Web Streams API)
491
+ */
492
+ declare function createWebUnescapeStream(options?: UnescapeOptions): TransformStream<string, string>;
493
+
494
+ /**
495
+ * Format escape functions - convert a code point to its escaped representation
496
+ */
497
+ declare const formatters: Record<EscapeFormat, (codePoint: number, uppercase: boolean) => string>;
498
+ /**
499
+ * Regular expressions to match each escape format
500
+ */
501
+ declare const unescapePatterns: Record<EscapeFormat, RegExp>;
502
+ /**
503
+ * Validates if a code point is valid Unicode
504
+ */
505
+ declare function isValidCodePoint(codePoint: number): boolean;
506
+ /**
507
+ * Validates if a code point is a valid surrogate
508
+ */
509
+ declare function isSurrogateCodePoint(codePoint: number): boolean;
510
+ /**
511
+ * Converts surrogate pair to code point
512
+ */
513
+ declare function surrogateToCodePoint(high: number, low: number): number;
514
+
515
+ export { type CharacterInfo, type EscapeFormat, type EscapeOptions, type EscapeResult, EscapeStream, type FilterFunction, type StreamOptions, type StreamTransformOptions, type UnescapeOptions, UnescapeStream, all, and, codePointLength, countEscapeSequences, createEscapeStream, createUnescapeStream, createWebEscapeStream, createWebUnescapeStream, escape, escapeAll, escapeNonPrintable, escapeToCodePoint, escapeToHex, escapeToHtmlDecimal, escapeToHtmlHex, escapeToUnicode, escapeToUnicodeES6, escapeWithInfo, formatters, fromCodePoint, fromCodePoints, getCharInfo, getCodePoint, hasEscapeSequences, inRange, isAscii$1 as isAscii, isAscii as isAsciiChar, isBmp$1 as isBmp, isBmp as isBmpChar, isControl, isHighSurrogate$1 as isHighSurrogate, isHighSurrogate as isHighSurrogateCode, isLatin1$1 as isLatin1, isLatin1 as isLatin1Char, isLowSurrogate$1 as isLowSurrogate, isLowSurrogate as isLowSurrogateCode, isNotAscii, isNotBmp, isNotLatin1, isNotPrintableAscii, isPrintableAscii, isSurrogate$1 as isSurrogate, isSurrogate as isSurrogateCode, isSurrogateCodePoint, isValidCodePoint, isValidUnicode, isWhitespace, iterateCodePoints, none, noneOf, normalizeNFC, normalizeNFD, not, notInRange, oneOf, or, parseHex, surrogateToCodePoint, toCodePoints, toHex, unescape, unescapeCodePoint, unescapeHex, unescapeHtml, unescapeHtmlDecimal, unescapeHtmlHex, unescapeJs, unescapePatterns, unescapeUnicode, unescapeUnicodeES6, unicodeEquals };