@nodable/entities 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.d.ts ADDED
@@ -0,0 +1,421 @@
1
+ // ---------------------------------------------------------------------------
2
+ // @nodable/entities — TypeScript declarations
3
+ // ---------------------------------------------------------------------------
4
+
5
+ // ---------------------------------------------------------------------------
6
+ // Entity table shape
7
+ // ---------------------------------------------------------------------------
8
+
9
+ /** A function-based entity replacement value (used for numeric refs). */
10
+ export type EntityValFn = (match: string, captured: string, ...rest: unknown[]) => string;
11
+
12
+ /** A single entity entry: a regex and its replacement value. */
13
+ export interface EntityEntry {
14
+ regex: RegExp;
15
+ val: string | EntityValFn;
16
+ }
17
+
18
+ /** A map of entity name → EntityEntry. */
19
+ export type EntityTable = Record<string, EntityEntry>;
20
+
21
+ // ---------------------------------------------------------------------------
22
+ // Constructor options
23
+ // ---------------------------------------------------------------------------
24
+
25
+ /**
26
+ * Controls which entity categories count toward the expansion limits.
27
+ *
28
+ * - `'external'` — only untrusted / injected entities (default, safest)
29
+ * - `'all'` — shorthand for all categories
30
+ * - `string[]` — any combination of `'external'`, `'system'`, `'default'`
31
+ */
32
+ export type ApplyLimitsTo = 'external' | 'all' | Array<'external' | 'system' | 'default'>;
33
+
34
+ /**
35
+ * Options accepted by the `EntityReplacer` constructor.
36
+ */
37
+ export interface EntityReplacerOptions {
38
+ /**
39
+ * Built-in XML entities: `&lt;` `&gt;` `&quot;` `&apos;`
40
+ *
41
+ * - `true` — use built-in table (default)
42
+ * - `false` — disable
43
+ * - `object` — use a custom table instead of the built-in set
44
+ * @default true
45
+ */
46
+ default?: boolean | EntityTable | null;
47
+
48
+ /**
49
+ * `&amp;` → `&` final pass (always processed last to prevent double-expansion).
50
+ * @default true
51
+ */
52
+ amp?: boolean | null;
53
+
54
+ /**
55
+ * Named entity groups (system-level, trusted).
56
+ *
57
+ * - `false` — disabled (default)
58
+ * - `true` — enables `COMMON_HTML` built-in group
59
+ * - `object` — use the supplied table (compose freely with exported groups)
60
+ *
61
+ * @example
62
+ * import { COMMON_HTML, CURRENCY_ENTITIES } from '@nodable/entities';
63
+ * new EntityReplacer({ system: { ...COMMON_HTML, ...CURRENCY_ENTITIES } });
64
+ *
65
+ * @default false
66
+ */
67
+ system?: boolean | EntityTable | null;
68
+
69
+ /**
70
+ * Maximum number of entity references expanded per document.
71
+ * `0` means unlimited.
72
+ * @default 0
73
+ */
74
+ maxTotalExpansions?: number;
75
+
76
+ /**
77
+ * Maximum number of characters *added* by entity expansion per document.
78
+ * `0` means unlimited.
79
+ * @default 0
80
+ */
81
+ maxExpandedLength?: number;
82
+
83
+ /**
84
+ * Which entity categories count toward the expansion limits.
85
+ * @default 'external'
86
+ */
87
+ applyLimitsTo?: ApplyLimitsTo;
88
+
89
+ /**
90
+ * Hook called once on the fully resolved string (after all categories).
91
+ *
92
+ * - Receives `(resolved, original)` and **must return a string**.
93
+ * - To reject expansion, return `original`.
94
+ * - To sanitize, return a cleaned version of `resolved`.
95
+ *
96
+ * @example
97
+ * postCheck: (resolved, original) =>
98
+ * /<[a-z]/i.test(resolved) ? original : resolved
99
+ */
100
+ postCheck?: ((resolved: string, original: string) => string) | null;
101
+ }
102
+
103
+ // ---------------------------------------------------------------------------
104
+ // EntityReplacer class
105
+ // ---------------------------------------------------------------------------
106
+
107
+ /**
108
+ * Standalone, zero-dependency XML/HTML entity replacer.
109
+ *
110
+ * ## Entity categories and replacement order
111
+ *
112
+ * Entities are processed in this fixed order per `replace()` call:
113
+ * 1. **persistent external** — set via `setExternalEntities()` / `addExternalEntity()`
114
+ * 2. **input / runtime** — injected via `addInputEntities()` (DOCTYPE per-document)
115
+ * 3. **system** — named entity groups (e.g. `COMMON_HTML`)
116
+ * 4. **default** — built-in XML entities (`lt`, `gt`, `apos`, `quot`)
117
+ * 5. **amp** — `&amp;` → `&` (always last)
118
+ * 6. **postCheck** — optional hook on the fully resolved string
119
+ *
120
+ * ## Lifecycle with `@nodable/flexible-xml-parser`
121
+ *
122
+ * Construct once, then let the builder factory drive the lifecycle:
123
+ *
124
+ * ```ts
125
+ * const replacer = new EntityReplacer({ default: true, system: COMMON_HTML });
126
+ * replacer.setExternalEntities({ brand: 'Acme' }); // persistent — survives all docs
127
+ *
128
+ * // Builder factory calls getInstance() when creating a new builder instance:
129
+ * const instance = replacer.getInstance();
130
+ *
131
+ * // Builder calls addInputEntities() if the document has a DOCTYPE block:
132
+ * instance.addInputEntities(doctypeEntities);
133
+ *
134
+ * // Builder calls replace() (indirectly via ValueParser) for each text node:
135
+ * instance.replace('&brand; v&version; &lt;'); // 'Acme v1.0 <'
136
+ * ```
137
+ */
138
+ export default class EntityReplacer {
139
+ constructor(options?: EntityReplacerOptions);
140
+
141
+ // -------------------------------------------------------------------------
142
+ // Persistent external entities (survive across documents)
143
+ // -------------------------------------------------------------------------
144
+
145
+ /**
146
+ * Replace the full set of persistent external entities.
147
+ *
148
+ * These entities survive across all documents — they are **not** wiped by
149
+ * `getInstance()`. Use them for caller-supplied entities that are fixed at
150
+ * configuration time (e.g. brand names, product codes).
151
+ *
152
+ * Calling this a second time replaces the previous persistent entity map.
153
+ *
154
+ * Values containing `&` are silently skipped to prevent recursive expansion.
155
+ *
156
+ * @param map Entity name → replacement string, or pre-built `{ regex, val }` object.
157
+ */
158
+ setExternalEntities(
159
+ map: Record<string, string | { regex: RegExp; val: string | EntityValFn }>
160
+ ): void;
161
+
162
+ /**
163
+ * Append a single persistent external entity without disturbing the rest.
164
+ *
165
+ * @param key Bare entity name without `&` / `;` — e.g. `'copy'`
166
+ * @param value Replacement string — must not contain `&`
167
+ * @throws if `key` contains regex-special characters
168
+ */
169
+ addExternalEntity(key: string, value: string): void;
170
+
171
+ // -------------------------------------------------------------------------
172
+ // Input / runtime entities (per document, cleared by getInstance)
173
+ // -------------------------------------------------------------------------
174
+
175
+ /**
176
+ * Inject DOCTYPE (input/runtime) entities for the **current document only**.
177
+ *
178
+ * These are stored separately from persistent entities. They are wiped on
179
+ * the next `getInstance()` call so they never leak into subsequent documents.
180
+ *
181
+ * Also resets the per-document expansion counters.
182
+ *
183
+ * Accepts both plain string values and `{ regx, val }` / `{ regex, val }`
184
+ * objects as produced by `DocTypeReader`.
185
+ *
186
+ * @param map Raw entity map from the DOCTYPE reader.
187
+ */
188
+ addInputEntities(
189
+ map: Record<
190
+ string,
191
+ | string
192
+ | { regx: RegExp; val: string | EntityValFn }
193
+ | { regex: RegExp; val: string | EntityValFn }
194
+ >
195
+ ): void;
196
+
197
+ // -------------------------------------------------------------------------
198
+ // Builder factory integration
199
+ // -------------------------------------------------------------------------
200
+
201
+ /**
202
+ * Reset all per-document state and return `this`.
203
+ *
204
+ * Clears:
205
+ * - input / runtime entities (DOCTYPE)
206
+ * - `_totalExpansions` counter
207
+ * - `_expandedLength` counter
208
+ *
209
+ * Does **not** clear persistent external entities set via
210
+ * `setExternalEntities()` / `addExternalEntity()`.
211
+ *
212
+ * The builder factory calls this when creating a new builder instance,
213
+ * ensuring each document starts clean regardless of whether it has a DOCTYPE.
214
+ *
215
+ * @returns `this` — for convenient chaining in factory code
216
+ */
217
+ getInstance(): this;
218
+
219
+ // -------------------------------------------------------------------------
220
+ // Primary API
221
+ // -------------------------------------------------------------------------
222
+
223
+ /**
224
+ * Replace all entity references in `str`.
225
+ * Returns `str` unchanged if it contains no `&` character (fast path).
226
+ */
227
+ replace(str: string): string;
228
+ }
229
+
230
+ // ---------------------------------------------------------------------------
231
+ // EntitiesValueParser
232
+ // ---------------------------------------------------------------------------
233
+
234
+ /**
235
+ * Options accepted by `EntitiesValueParser` — a superset of `EntityReplacerOptions`.
236
+ */
237
+ export interface EntitiesValueParserOptions extends EntityReplacerOptions {
238
+ /**
239
+ * Initial persistent external entity map loaded at construction time.
240
+ * Values must not contain `&` (to prevent recursive expansion).
241
+ * Equivalent to calling `setExternalEntities()` after construction.
242
+ *
243
+ * @example
244
+ * new EntitiesValueParser({ entities: { copy: '©', trade: '™' } })
245
+ */
246
+ entities?: Record<string, string>;
247
+ }
248
+
249
+ /**
250
+ * Raw DOCTYPE entity map shape as produced by `DocTypeReader`.
251
+ * Values are either plain strings or `{ regx, val }` objects
252
+ * (note: `regx`, not `regex` — matches the reader's output field name).
253
+ */
254
+ export type DocTypeEntityMap = Record<
255
+ string,
256
+ | string
257
+ | { regx: RegExp; val: string | EntityValFn }
258
+ | { regex: RegExp; val: string | EntityValFn }
259
+ >;
260
+
261
+ /**
262
+ * ValueParser context object passed by `@nodable/flexible-xml-parser`.
263
+ * All fields are optional; `parse()` accepts but ignores this argument.
264
+ */
265
+ export interface ValueParserContext {
266
+ elementName?: string;
267
+ elementValue?: string;
268
+ elementType?: string;
269
+ matcher?: unknown;
270
+ isLeafNode?: boolean;
271
+ }
272
+
273
+ /**
274
+ * `EntitiesValueParser` — value-parser adapter that wraps `EntityReplacer`
275
+ * for use with `@nodable/flexible-xml-parser`.
276
+ *
277
+ * ## Setup
278
+ *
279
+ * ```ts
280
+ * import { EntitiesValueParser, COMMON_HTML } from '@nodable/entities';
281
+ *
282
+ * const evp = new EntitiesValueParser({ system: COMMON_HTML });
283
+ *
284
+ * // Persistent entities — never wiped between documents:
285
+ * evp.setExternalEntities({ brand: 'Acme', product: 'Widget' });
286
+ *
287
+ * // Register with the builder factory:
288
+ * builder.registerValueParser('entity', evp);
289
+ *
290
+ * const parser = new XMLParser({ OutputBuilder: builder });
291
+ * parser.parse(xml);
292
+ * ```
293
+ *
294
+ * ## Lifecycle (called automatically by the builder / parser)
295
+ *
296
+ * | Caller | Method | When |
297
+ * |-----------------|----------------------|-------------------------------------------|
298
+ * | Builder factory | `getInstance()` | Before each `parse()` call |
299
+ * | Builder | `addInputEntities()` | After DOCTYPE is read (if present) |
300
+ * | Builder | `parse(val)` | For each text / attribute value |
301
+ */
302
+ export class EntitiesValueParser {
303
+ constructor(options?: EntitiesValueParserOptions);
304
+
305
+ // -------------------------------------------------------------------------
306
+ // Persistent external entity registration
307
+ // -------------------------------------------------------------------------
308
+
309
+ /**
310
+ * Replace the full set of persistent external entities.
311
+ *
312
+ * These survive across all documents and are **not** cleared by
313
+ * `getInstance()`. Call this once after construction (or at any time to
314
+ * swap the entire persistent entity map).
315
+ *
316
+ * @throws if any value contains `&`
317
+ */
318
+ setExternalEntities(map: Record<string, string>): void;
319
+
320
+ /**
321
+ * Append a single persistent external entity.
322
+ *
323
+ * Provide the bare name without `&` and `;` — e.g. `'copy'` for `&copy;`.
324
+ * Existing persistent entities are preserved.
325
+ *
326
+ * @throws if `key` contains `&` or `;`
327
+ * @throws if `value` is not a string or contains `&`
328
+ */
329
+ addEntity(key: string, value: string): void;
330
+
331
+ // -------------------------------------------------------------------------
332
+ // Builder factory integration
333
+ // -------------------------------------------------------------------------
334
+
335
+ /**
336
+ * Reset per-document state and return `this`.
337
+ *
338
+ * Clears input/runtime entities (DOCTYPE) and resets expansion counters.
339
+ * Does **not** clear persistent external entities.
340
+ *
341
+ * The builder factory calls this when creating a new builder instance.
342
+ *
343
+ * @returns `this`
344
+ */
345
+ getInstance(): this;
346
+
347
+ // -------------------------------------------------------------------------
348
+ // DOCTYPE integration — called automatically by BaseOutputBuilder
349
+ // -------------------------------------------------------------------------
350
+
351
+ /**
352
+ * Receive DOCTYPE entities for the current document.
353
+ *
354
+ * Called automatically by `BaseOutputBuilder`. Stores entities separately
355
+ * from persistent entities so they are wiped on the next `getInstance()`.
356
+ * Also resets per-document expansion counters.
357
+ *
358
+ * Accepts both plain string values and `{ regx, val }` / `{ regex, val }`
359
+ * objects as produced by `DocTypeReader`.
360
+ */
361
+ addInputEntities(entities: DocTypeEntityMap): void;
362
+
363
+ // -------------------------------------------------------------------------
364
+ // ValueParser interface
365
+ // -------------------------------------------------------------------------
366
+
367
+ /**
368
+ * Replace entity references in `val`.
369
+ *
370
+ * Implements the `ValueParser` interface. The `context` argument is
371
+ * accepted but ignored — replacement is applied uniformly to all values.
372
+ *
373
+ * Returns non-string input unchanged.
374
+ */
375
+ parse(val: string, context?: ValueParserContext): string;
376
+ parse(val: unknown, context?: ValueParserContext): unknown;
377
+ }
378
+
379
+ // ---------------------------------------------------------------------------
380
+ // Named entity group exports
381
+ // ---------------------------------------------------------------------------
382
+
383
+ /**
384
+ * ~20 most commonly needed HTML named entities.
385
+ * Includes: `&nbsp;` `&copy;` `&reg;` `&trade;` `&mdash;` `&ndash;`
386
+ * `&hellip;` `&laquo;` `&raquo;` `&lsquo;` `&rsquo;` `&ldquo;` `&rdquo;`
387
+ * `&bull;` `&para;` `&sect;` `&deg;` `&frac12;` `&frac14;` `&frac34;`
388
+ */
389
+ export const COMMON_HTML: EntityTable;
390
+
391
+ /**
392
+ * Currency symbol entities.
393
+ * Includes: `&cent;` `&pound;` `&yen;` `&euro;` `&inr;` `&curren;` `&fnof;`
394
+ */
395
+ export const CURRENCY_ENTITIES: EntityTable;
396
+
397
+ /**
398
+ * Mathematical operator entities.
399
+ * Includes: `&times;` `&divide;` `&plusmn;` `&minus;` `&sup2;` `&sup3;`
400
+ * `&permil;` `&infin;` `&sum;` `&prod;` `&radic;` `&ne;` `&le;` `&ge;`
401
+ */
402
+ export const MATH_ENTITIES: EntityTable;
403
+
404
+ /**
405
+ * Arrow entities.
406
+ * Includes: `&larr;` `&uarr;` `&rarr;` `&darr;` `&harr;`
407
+ * and their double-stroke variants `&lArr;` `&uArr;` `&rArr;` `&dArr;` `&hArr;`
408
+ */
409
+ export const ARROW_ENTITIES: EntityTable;
410
+
411
+ /**
412
+ * Numeric character reference entities.
413
+ * Handles any valid decimal `&#NNN;` and hex `&#xHH;` code point reference.
414
+ */
415
+ export const NUMERIC_ENTITIES: EntityTable;
416
+
417
+ /** The built-in XML entity table (`lt`, `gt`, `apos`, `quot`). */
418
+ export const DEFAULT_XML_ENTITIES: EntityTable;
419
+
420
+ /** The `&amp;` entity entry used in the final expansion pass. */
421
+ export const AMP_ENTITY: EntityEntry;
package/src/index.js ADDED
@@ -0,0 +1,27 @@
1
+ /**
2
+ * @nodable/entities
3
+ *
4
+ * Standalone, zero-dependency XML/HTML entity replacement.
5
+ *
6
+ * @example
7
+ * import EntityReplacer, { COMMON_HTML, CURRENCY_ENTITIES } from '@nodable/entities';
8
+ *
9
+ * const replacer = new EntityReplacer({
10
+ * default: true,
11
+ * system: { ...COMMON_HTML, ...CURRENCY_ENTITIES },
12
+ * });
13
+ *
14
+ * replacer.replace('Price: &pound;9.99 &mdash; &copy; 2024');
15
+ * // → 'Price: £9.99 — © 2024'
16
+ */
17
+
18
+ export { default } from './EntityReplacer.js';
19
+ export { DEFAULT_XML_ENTITIES, AMP_ENTITY } from './EntityReplacer.js';
20
+ export { default as EntitiesValueParser } from './EntitiesValueParser.js';
21
+ export {
22
+ COMMON_HTML,
23
+ CURRENCY_ENTITIES,
24
+ MATH_ENTITIES,
25
+ ARROW_ENTITIES,
26
+ NUMERIC_ENTITIES,
27
+ } from './groups.js';