unicode-escaper 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,451 @@
1
+ /**
2
+ * Supported escape format types
3
+ */
4
+ type EscapeFormat = 'unicode' | 'unicode-es6' | 'hex' | 'html-hex' | 'html-decimal' | 'codepoint';
5
+ /**
6
+ * Filter function to determine which characters should be escaped
7
+ * @param char - The character to check
8
+ * @param codePoint - The Unicode code point of the character
9
+ * @returns true if the character should be escaped, false otherwise
10
+ */
11
+ type FilterFunction = (char: string, codePoint: number) => boolean;
12
+ /**
13
+ * Options for the escape function
14
+ */
15
+ interface EscapeOptions {
16
+ /**
17
+ * The escape format to use
18
+ * @default 'unicode'
19
+ */
20
+ format?: EscapeFormat;
21
+ /**
22
+ * Custom filter function to determine which characters to escape.
23
+ * If provided, this takes precedence over preserveAscii and preserveLatin1.
24
+ */
25
+ filter?: FilterFunction;
26
+ /**
27
+ * If true, ASCII characters (0x00-0x7F) will not be escaped
28
+ * @default true
29
+ */
30
+ preserveAscii?: boolean;
31
+ /**
32
+ * If true, Latin-1 characters (0x00-0xFF) will not be escaped.
33
+ * Only applies when preserveAscii is also true or undefined.
34
+ * @default false
35
+ */
36
+ preserveLatin1?: boolean;
37
+ /**
38
+ * If true, use uppercase hex digits (A-F), otherwise lowercase (a-f)
39
+ * @default true
40
+ */
41
+ uppercase?: boolean;
42
+ }
43
+ /**
44
+ * Options for the unescape function
45
+ */
46
+ interface UnescapeOptions {
47
+ /**
48
+ * Specific formats to unescape. If not provided, all formats are attempted.
49
+ */
50
+ formats?: EscapeFormat[];
51
+ /**
52
+ * If true, invalid escape sequences will be left as-is instead of throwing
53
+ * @default true
54
+ */
55
+ lenient?: boolean;
56
+ }
57
+ /**
58
+ * Information about a Unicode character
59
+ */
60
+ interface CharacterInfo {
61
+ /** The character itself */
62
+ char: string;
63
+ /** The Unicode code point */
64
+ codePoint: number;
65
+ /** Hexadecimal representation of the code point */
66
+ hex: string;
67
+ /** Whether the character is in the ASCII range (0x00-0x7F) */
68
+ isAscii: boolean;
69
+ /** Whether the character is in the BMP (0x0000-0xFFFF) */
70
+ isBmp: boolean;
71
+ /** Whether the character is in the Latin-1 range (0x00-0xFF) */
72
+ isLatin1: boolean;
73
+ /** Whether the character is a high surrogate (0xD800-0xDBFF) */
74
+ isHighSurrogate: boolean;
75
+ /** Whether the character is a low surrogate (0xDC00-0xDFFF) */
76
+ isLowSurrogate: boolean;
77
+ /** Length in UTF-16 code units */
78
+ utf16Length: number;
79
+ }
80
+ /**
81
+ * Result of escaping a string with detailed information
82
+ */
83
+ interface EscapeResult {
84
+ /** The escaped string */
85
+ escaped: string;
86
+ /** Number of characters that were escaped */
87
+ escapedCount: number;
88
+ /** Number of characters that were preserved */
89
+ preservedCount: number;
90
+ /** Total number of characters processed */
91
+ totalCount: number;
92
+ }
93
+ /**
94
+ * Stream transformer options
95
+ */
96
+ interface StreamOptions {
97
+ /** Options passed to escape/unescape */
98
+ escapeOptions?: EscapeOptions;
99
+ unescapeOptions?: UnescapeOptions;
100
+ /** High water mark for the stream buffer */
101
+ highWaterMark?: number;
102
+ }
103
+
104
+ /**
105
+ * Escapes Unicode characters in a string according to the specified options
106
+ *
107
+ * @param input - The string to escape
108
+ * @param options - Escape options
109
+ * @returns The escaped string
110
+ *
111
+ * @example
112
+ * ```ts
113
+ * escape('Hello 世界')
114
+ * // => 'Hello \\u4E16\\u754C'
115
+ *
116
+ * escape('Hello 世界', { format: 'unicode-es6' })
117
+ * // => 'Hello \\u{4E16}\\u{754C}'
118
+ *
119
+ * escape('Café', { preserveAscii: true })
120
+ * // => 'Caf\\u00E9'
121
+ * ```
122
+ */
123
+ declare function escape(input: string, options?: EscapeOptions): string;
124
+ /**
125
+ * Escapes Unicode characters and returns detailed information about the operation
126
+ *
127
+ * @param input - The string to escape
128
+ * @param options - Escape options
129
+ * @returns Object containing escaped string and statistics
130
+ */
131
+ declare function escapeWithInfo(input: string, options?: EscapeOptions): EscapeResult;
132
+ /**
133
+ * Convenience function: escape to \uXXXX format
134
+ */
135
+ declare function escapeToUnicode(input: string, options?: Omit<EscapeOptions, 'format'>): string;
136
+ /**
137
+ * Convenience function: escape to \u{XXXXX} ES6 format
138
+ */
139
+ declare function escapeToUnicodeES6(input: string, options?: Omit<EscapeOptions, 'format'>): string;
140
+ /**
141
+ * Convenience function: escape to \xNN format (falls back to \uXXXX for non-Latin1)
142
+ */
143
+ declare function escapeToHex(input: string, options?: Omit<EscapeOptions, 'format'>): string;
144
+ /**
145
+ * Convenience function: escape to &#xNNNN; HTML hex entity format
146
+ */
147
+ declare function escapeToHtmlHex(input: string, options?: Omit<EscapeOptions, 'format'>): string;
148
+ /**
149
+ * Convenience function: escape to &#NNNN; HTML decimal entity format
150
+ */
151
+ declare function escapeToHtmlDecimal(input: string, options?: Omit<EscapeOptions, 'format'>): string;
152
+ /**
153
+ * Convenience function: escape to U+XXXX code point format
154
+ */
155
+ declare function escapeToCodePoint(input: string, options?: Omit<EscapeOptions, 'format'>): string;
156
+ /**
157
+ * Escapes all characters in a string (ignores preserve options)
158
+ */
159
+ declare function escapeAll(input: string, options?: Omit<EscapeOptions, 'filter' | 'preserveAscii' | 'preserveLatin1'>): string;
160
+ /**
161
+ * Escapes only non-printable and control characters
162
+ */
163
+ declare function escapeNonPrintable(input: string, options?: Omit<EscapeOptions, 'filter'>): string;
164
+
165
+ /**
166
+ * Unescapes Unicode escape sequences in a string
167
+ *
168
+ * @param input - The string containing escape sequences
169
+ * @param options - Unescape options
170
+ * @returns The unescaped string
171
+ *
172
+ * @example
173
+ * ```ts
174
+ * unescape('Hello \\u4E16\\u754C')
175
+ * // => 'Hello 世界'
176
+ *
177
+ * unescape('Hello \\u{4E16}\\u{754C}')
178
+ * // => 'Hello 世界'
179
+ *
180
+ * unescape('Caf&#xE9;')
181
+ * // => 'Café'
182
+ * ```
183
+ */
184
+ declare function unescape(input: string, options?: UnescapeOptions): string;
185
+ /**
186
+ * Unescapes only \uXXXX format (with surrogate pair support)
187
+ */
188
+ declare function unescapeUnicode(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
189
+ /**
190
+ * Unescapes only \u{XXXXX} ES6 format
191
+ */
192
+ declare function unescapeUnicodeES6(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
193
+ /**
194
+ * Unescapes only \xNN format
195
+ */
196
+ declare function unescapeHex(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
197
+ /**
198
+ * Unescapes only &#xNNNN; HTML hex entity format
199
+ */
200
+ declare function unescapeHtmlHex(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
201
+ /**
202
+ * Unescapes only &#NNNN; HTML decimal entity format
203
+ */
204
+ declare function unescapeHtmlDecimal(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
205
+ /**
206
+ * Unescapes only U+XXXX code point format
207
+ */
208
+ declare function unescapeCodePoint(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
209
+ /**
210
+ * Unescapes all HTML entities (both hex and decimal)
211
+ */
212
+ declare function unescapeHtml(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
213
+ /**
214
+ * Unescapes all JavaScript escape formats (\uXXXX, \u{XXXXX}, \xNN)
215
+ */
216
+ declare function unescapeJs(input: string, options?: Omit<UnescapeOptions, 'formats'>): string;
217
+ /**
218
+ * Checks if a string contains any escape sequences
219
+ */
220
+ declare function hasEscapeSequences(input: string, formats?: EscapeFormat[]): boolean;
221
+ /**
222
+ * Counts the number of escape sequences in a string
223
+ */
224
+ declare function countEscapeSequences(input: string, formats?: EscapeFormat[]): number;
225
+
226
+ /**
227
+ * Returns true if the character is in the ASCII range (0x00-0x7F)
228
+ */
229
+ declare const isAscii$1: FilterFunction;
230
+ /**
231
+ * Returns true if the character is NOT in the ASCII range
232
+ */
233
+ declare const isNotAscii: FilterFunction;
234
+ /**
235
+ * Returns true if the character is in the Latin-1 range (0x00-0xFF)
236
+ */
237
+ declare const isLatin1$1: FilterFunction;
238
+ /**
239
+ * Returns true if the character is NOT in the Latin-1 range
240
+ */
241
+ declare const isNotLatin1: FilterFunction;
242
+ /**
243
+ * Returns true if the character is in the BMP (0x0000-0xFFFF)
244
+ */
245
+ declare const isBmp$1: FilterFunction;
246
+ /**
247
+ * Returns true if the character is NOT in the BMP (supplementary planes)
248
+ */
249
+ declare const isNotBmp: FilterFunction;
250
+ /**
251
+ * Returns true if the character is a high surrogate (0xD800-0xDBFF)
252
+ */
253
+ declare const isHighSurrogate$1: FilterFunction;
254
+ /**
255
+ * Returns true if the character is a low surrogate (0xDC00-0xDFFF)
256
+ */
257
+ declare const isLowSurrogate$1: FilterFunction;
258
+ /**
259
+ * Returns true if the character is any surrogate (0xD800-0xDFFF)
260
+ */
261
+ declare const isSurrogate$1: FilterFunction;
262
+ /**
263
+ * Returns true if the character is a printable ASCII character (0x20-0x7E)
264
+ */
265
+ declare const isPrintableAscii: FilterFunction;
266
+ /**
267
+ * Returns true if the character is NOT a printable ASCII character
268
+ */
269
+ declare const isNotPrintableAscii: FilterFunction;
270
+ /**
271
+ * Returns true if the character is a control character (0x00-0x1F or 0x7F)
272
+ */
273
+ declare const isControl: FilterFunction;
274
+ /**
275
+ * Returns true if the character is a whitespace character
276
+ */
277
+ declare const isWhitespace: FilterFunction;
278
+ /**
279
+ * Creates a filter that matches characters within a specific range
280
+ */
281
+ declare function inRange(start: number, end: number): FilterFunction;
282
+ /**
283
+ * Creates a filter that matches characters outside a specific range
284
+ */
285
+ declare function notInRange(start: number, end: number): FilterFunction;
286
+ /**
287
+ * Creates a filter that matches any of the specified characters
288
+ */
289
+ declare function oneOf(chars: string): FilterFunction;
290
+ /**
291
+ * Creates a filter that matches none of the specified characters
292
+ */
293
+ declare function noneOf(chars: string): FilterFunction;
294
+ /**
295
+ * Combines multiple filters with AND logic (all must return true)
296
+ */
297
+ declare function and(...filters: FilterFunction[]): FilterFunction;
298
+ /**
299
+ * Combines multiple filters with OR logic (any must return true)
300
+ */
301
+ declare function or(...filters: FilterFunction[]): FilterFunction;
302
+ /**
303
+ * Negates a filter
304
+ */
305
+ declare function not(filter: FilterFunction): FilterFunction;
306
+ /**
307
+ * Always returns true (escape all characters)
308
+ */
309
+ declare const all: FilterFunction;
310
+ /**
311
+ * Always returns false (escape no characters)
312
+ */
313
+ declare const none: FilterFunction;
314
+
315
+ /**
316
+ * Gets the Unicode code point of a character
317
+ *
318
+ * @param char - The character (can be a surrogate pair)
319
+ * @returns The code point, or undefined if invalid
320
+ */
321
+ declare function getCodePoint(char: string): number | undefined;
322
+ /**
323
+ * Creates a character from a code point
324
+ *
325
+ * @param codePoint - The Unicode code point
326
+ * @returns The character
327
+ * @throws If the code point is invalid
328
+ */
329
+ declare function fromCodePoint(codePoint: number): string;
330
+ /**
331
+ * Checks if a character is in the ASCII range (0x00-0x7F)
332
+ */
333
+ declare function isAscii(char: string): boolean;
334
+ /**
335
+ * Checks if a character is in the Latin-1 range (0x00-0xFF)
336
+ */
337
+ declare function isLatin1(char: string): boolean;
338
+ /**
339
+ * Checks if a character is in the BMP (0x0000-0xFFFF)
340
+ */
341
+ declare function isBmp(char: string): boolean;
342
+ /**
343
+ * Checks if a code point is a high surrogate (0xD800-0xDBFF)
344
+ */
345
+ declare function isHighSurrogate(codePoint: number): boolean;
346
+ /**
347
+ * Checks if a code point is a low surrogate (0xDC00-0xDFFF)
348
+ */
349
+ declare function isLowSurrogate(codePoint: number): boolean;
350
+ /**
351
+ * Checks if a code point is any surrogate (0xD800-0xDFFF)
352
+ */
353
+ declare function isSurrogate(codePoint: number): boolean;
354
+ /**
355
+ * Gets detailed information about a character
356
+ */
357
+ declare function getCharInfo(char: string): CharacterInfo | undefined;
358
+ /**
359
+ * Iterates over characters in a string, yielding code points
360
+ * (handles surrogate pairs correctly)
361
+ */
362
+ declare function iterateCodePoints(input: string): Generator<{
363
+ char: string;
364
+ codePoint: number;
365
+ index: number;
366
+ }>;
367
+ /**
368
+ * Converts a string to an array of code points
369
+ */
370
+ declare function toCodePoints(input: string): number[];
371
+ /**
372
+ * Converts an array of code points to a string
373
+ */
374
+ declare function fromCodePoints(codePoints: number[]): string;
375
+ /**
376
+ * Gets the length of a string in code points (not UTF-16 code units)
377
+ */
378
+ declare function codePointLength(input: string): number;
379
+ /**
380
+ * Converts a code point to its hex representation with optional prefix
381
+ */
382
+ declare function toHex(codePoint: number, options?: {
383
+ prefix?: string;
384
+ minLength?: number;
385
+ uppercase?: boolean;
386
+ }): string;
387
+ /**
388
+ * Parses a hex string (with or without prefix) to a code point
389
+ */
390
+ declare function parseHex(hex: string): number | undefined;
391
+ /**
392
+ * Validates if a string contains only valid Unicode characters
393
+ * (no unpaired surrogates)
394
+ */
395
+ declare function isValidUnicode(input: string): boolean;
396
+ /**
397
+ * Normalizes a string to NFC form
398
+ */
399
+ declare function normalizeNFC(input: string): string;
400
+ /**
401
+ * Normalizes a string to NFD form
402
+ */
403
+ declare function normalizeNFD(input: string): string;
404
+ /**
405
+ * Compares two strings for Unicode equivalence
406
+ */
407
+ declare function unicodeEquals(a: string, b: string): boolean;
408
+
409
+ /**
410
+ * Web Streams API support (for browsers and modern Node.js)
411
+ */
412
+ /**
413
+ * Creates a TransformStream for escaping (Web Streams API)
414
+ *
415
+ * @example
416
+ * ```ts
417
+ * const response = await fetch('data.txt');
418
+ * const escaped = response.body
419
+ * .pipeThrough(new TextDecoderStream())
420
+ * .pipeThrough(createWebEscapeStream())
421
+ * .pipeThrough(new TextEncoderStream());
422
+ * ```
423
+ */
424
+ declare function createWebEscapeStream(options?: EscapeOptions): TransformStream<string, string>;
425
+ /**
426
+ * Creates a TransformStream for unescaping (Web Streams API)
427
+ */
428
+ declare function createWebUnescapeStream(options?: UnescapeOptions): TransformStream<string, string>;
429
+
430
+ /**
431
+ * Format escape functions - convert a code point to its escaped representation
432
+ */
433
+ declare const formatters: Record<EscapeFormat, (codePoint: number, uppercase: boolean) => string>;
434
+ /**
435
+ * Regular expressions to match each escape format
436
+ */
437
+ declare const unescapePatterns: Record<EscapeFormat, RegExp>;
438
+ /**
439
+ * Validates if a code point is valid Unicode
440
+ */
441
+ declare function isValidCodePoint(codePoint: number): boolean;
442
+ /**
443
+ * Validates if a code point is a valid surrogate
444
+ */
445
+ declare function isSurrogateCodePoint(codePoint: number): boolean;
446
+ /**
447
+ * Converts surrogate pair to code point
448
+ */
449
+ declare function surrogateToCodePoint(high: number, low: number): number;
450
+
451
+ export { type CharacterInfo, type EscapeFormat, type EscapeOptions, type EscapeResult, type FilterFunction, type StreamOptions, type UnescapeOptions, all, and, codePointLength, countEscapeSequences, createWebEscapeStream, createWebUnescapeStream, escape, escapeAll, escapeNonPrintable, escapeToCodePoint, escapeToHex, escapeToHtmlDecimal, escapeToHtmlHex, escapeToUnicode, escapeToUnicodeES6, escapeWithInfo, formatters, fromCodePoint, fromCodePoints, getCharInfo, getCodePoint, hasEscapeSequences, inRange, isAscii$1 as isAscii, isAscii as isAsciiChar, isBmp$1 as isBmp, isBmp as isBmpChar, isControl, isHighSurrogate$1 as isHighSurrogate, isHighSurrogate as isHighSurrogateCode, isLatin1$1 as isLatin1, isLatin1 as isLatin1Char, isLowSurrogate$1 as isLowSurrogate, isLowSurrogate as isLowSurrogateCode, isNotAscii, isNotBmp, isNotLatin1, isNotPrintableAscii, isPrintableAscii, isSurrogate$1 as isSurrogate, isSurrogate as isSurrogateCode, isSurrogateCodePoint, isValidCodePoint, isValidUnicode, isWhitespace, iterateCodePoints, none, noneOf, normalizeNFC, normalizeNFD, not, notInRange, oneOf, or, parseHex, surrogateToCodePoint, toCodePoints, toHex, unescape, unescapeCodePoint, unescapeHex, unescapeHtml, unescapeHtmlDecimal, unescapeHtmlHex, unescapeJs, unescapePatterns, unescapeUnicode, unescapeUnicodeES6, unicodeEquals };