@nodable/entities 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +476 -0
- package/package.json +49 -0
- package/src/EntitiesValueParser.js +152 -0
- package/src/EntityReplacer.js +379 -0
- package/src/groups.js +99 -0
- package/src/index.d.ts +421 -0
- package/src/index.js +27 -0
package/README.md
ADDED
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
# `@nodable/entities`
|
|
2
|
+
|
|
3
|
+
Standalone, zero-dependency XML/HTML entity replacement with:
|
|
4
|
+
|
|
5
|
+
- **5 entity categories** processed in a fixed, predictable order
|
|
6
|
+
- **Persistent vs. input entity separation** — no state leaks between documents
|
|
7
|
+
- **`getInstance()`** — clean per-document reset without cloning
|
|
8
|
+
- **Composable named entity groups** (HTML, currency, math, arrows, numeric refs)
|
|
9
|
+
- **Security limits** — cap total expansions and expanded length per document
|
|
10
|
+
- **Granular limit targeting** — apply limits to any subset of categories
|
|
11
|
+
- **`postCheck` hook** — inspect or sanitize the fully resolved string
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```sh
|
|
18
|
+
npm install @nodable/entities
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Quick Start
|
|
24
|
+
|
|
25
|
+
```js
|
|
26
|
+
import EntityReplacer from '@nodable/entities';
|
|
27
|
+
|
|
28
|
+
const replacer = new EntityReplacer({ default: true });
|
|
29
|
+
|
|
30
|
+
replacer.replace('5 < 10 && x > 0');
|
|
31
|
+
// → '5 < 10 && x > 0'
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
With named entity groups:
|
|
35
|
+
|
|
36
|
+
```js
|
|
37
|
+
import EntityReplacer, { COMMON_HTML, CURRENCY_ENTITIES } from '@nodable/entities';
|
|
38
|
+
|
|
39
|
+
const replacer = new EntityReplacer({
|
|
40
|
+
default: true,
|
|
41
|
+
system: { ...COMMON_HTML, ...CURRENCY_ENTITIES },
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
replacer.replace('© 2024 — Price: £9.99');
|
|
45
|
+
// → '© 2024 — Price: £9.99'
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Entity Categories
|
|
51
|
+
|
|
52
|
+
Entities are processed in this fixed order — not configurable:
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
persistent external → input/runtime → system → default → amp
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### `persistent external` — Caller-supplied configuration entities
|
|
59
|
+
|
|
60
|
+
Entities set at configuration time that survive across all documents. Never wiped by `getInstance()`. Set via `setExternalEntities()` or `addExternalEntity()` / `addEntity()`.
|
|
61
|
+
|
|
62
|
+
```js
|
|
63
|
+
const replacer = new EntityReplacer({ default: true });
|
|
64
|
+
replacer.setExternalEntities({ brand: 'Acme Corp', product: 'Widget Pro' });
|
|
65
|
+
replacer.replace('&brand; makes &product;');
|
|
66
|
+
// → 'Acme Corp makes Widget Pro'
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### `input / runtime` — Per-document DOCTYPE entities
|
|
70
|
+
|
|
71
|
+
Entities injected by the parser from the document's DOCTYPE block. Stored separately from persistent entities and **wiped on every `getInstance()` call** so they cannot leak between documents.
|
|
72
|
+
|
|
73
|
+
Set via `addInputEntities()`. Never call this manually — `BaseOutputBuilder` calls it automatically.
|
|
74
|
+
|
|
75
|
+
### `system` — Named entity groups
|
|
76
|
+
|
|
77
|
+
Opt-in. Trusted programmer-supplied groups. Compose freely:
|
|
78
|
+
|
|
79
|
+
```js
|
|
80
|
+
import {
|
|
81
|
+
COMMON_HTML,
|
|
82
|
+
CURRENCY_ENTITIES,
|
|
83
|
+
MATH_ENTITIES,
|
|
84
|
+
ARROW_ENTITIES,
|
|
85
|
+
NUMERIC_ENTITIES,
|
|
86
|
+
} from '@nodable/entities';
|
|
87
|
+
|
|
88
|
+
const replacer = new EntityReplacer({
|
|
89
|
+
system: { ...COMMON_HTML, ...MATH_ENTITIES },
|
|
90
|
+
});
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
| Group | Contents |
|
|
94
|
+
|---------------------|----------|
|
|
95
|
+
| `COMMON_HTML` | ` ` `©` `®` `™` `—` `–` `…` `«` `»` `‘` `’` `“` `”` `•` `¶` `§` `°` `½` `¼` `¾` |
|
|
96
|
+
| `CURRENCY_ENTITIES` | `¢` `£` `¥` `€` `&inr;` `¤` `ƒ` |
|
|
97
|
+
| `MATH_ENTITIES` | `×` `÷` `±` `−` `²` `³` `‰` `∞` `∑` `∏` `√` `≠` `≤` `≥` |
|
|
98
|
+
| `ARROW_ENTITIES` | `←` `↑` `→` `↓` `↔` `⇐` `⇑` `⇒` `⇓` `⇔` |
|
|
99
|
+
| `NUMERIC_ENTITIES` | `&#NNN;` decimal and `&#xHH;` hex refs — any valid Unicode code point |
|
|
100
|
+
|
|
101
|
+
### `default` — Built-in XML entities
|
|
102
|
+
|
|
103
|
+
Always on unless explicitly disabled.
|
|
104
|
+
|
|
105
|
+
| Entity | Output |
|
|
106
|
+
|----------|--------|
|
|
107
|
+
| `<` | `<` |
|
|
108
|
+
| `>` | `>` |
|
|
109
|
+
| `"` | `"` |
|
|
110
|
+
| `'` | `'` |
|
|
111
|
+
|
|
112
|
+
### `amp` — Final pass
|
|
113
|
+
|
|
114
|
+
`&` → `&`
|
|
115
|
+
|
|
116
|
+
Processed **after all other categories** to prevent double-expansion:
|
|
117
|
+
- `&lt;` → `<` ✓ (not `<`)
|
|
118
|
+
- `&amp;` → `&` ✓ (not `&`)
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## Constructor API
|
|
123
|
+
|
|
124
|
+
```js
|
|
125
|
+
const replacer = new EntityReplacer({
|
|
126
|
+
// Category toggles
|
|
127
|
+
default: true, // true (default) | false | custom EntityTable object
|
|
128
|
+
amp: true, // true (default) | false | null
|
|
129
|
+
system: false, // false (default) | true for COMMON_HTML | EntityTable object
|
|
130
|
+
|
|
131
|
+
// Security limits — 0 = unlimited
|
|
132
|
+
maxTotalExpansions: 0,
|
|
133
|
+
maxExpandedLength: 0,
|
|
134
|
+
|
|
135
|
+
// Which categories count against the limits
|
|
136
|
+
applyLimitsTo: 'external', // 'external' (default) | 'all' | ['external', 'system'] | ...
|
|
137
|
+
|
|
138
|
+
// Post-processing hook — fires once on the fully resolved string
|
|
139
|
+
postCheck: null, // (resolved: string, original: string) => string
|
|
140
|
+
});
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## EntityReplacer Instance Methods
|
|
146
|
+
|
|
147
|
+
### `replace(str)`
|
|
148
|
+
|
|
149
|
+
Replace all entity references in `str`. Returns `str` unchanged (same reference) if no `&` is present — fast path.
|
|
150
|
+
|
|
151
|
+
```js
|
|
152
|
+
replacer.replace('Tom & Jerry <cartoons>');
|
|
153
|
+
// → 'Tom & Jerry <cartoons>'
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### `setExternalEntities(map)`
|
|
157
|
+
|
|
158
|
+
Replace the full set of **persistent** external entities. These survive across all documents and are not cleared by `getInstance()`.
|
|
159
|
+
|
|
160
|
+
```js
|
|
161
|
+
replacer.setExternalEntities({ brand: 'Acme', year: '2025' });
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Calling this a second time replaces the entire persistent map. Values containing `&` are silently skipped.
|
|
165
|
+
|
|
166
|
+
### `addExternalEntity(key, value)`
|
|
167
|
+
|
|
168
|
+
Append a single persistent external entity without disturbing the rest.
|
|
169
|
+
|
|
170
|
+
```js
|
|
171
|
+
replacer.addExternalEntity('brand', 'Acme');
|
|
172
|
+
replacer.addExternalEntity('year', '2025');
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### `addInputEntities(map)`
|
|
176
|
+
|
|
177
|
+
Inject **input/runtime** (DOCTYPE) entities for the current document. These are stored separately from persistent entities and wiped on the next `getInstance()` call. Also resets per-document expansion counters.
|
|
178
|
+
|
|
179
|
+
```js
|
|
180
|
+
// Called automatically by BaseOutputBuilder — no manual wiring needed.
|
|
181
|
+
replacer.addInputEntities(doctypeEntityMap);
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Values containing `&` are silently skipped. Accepts pre-built `{ regex, val }` or `{ regx, val }` objects as produced by `DocTypeReader`.
|
|
185
|
+
|
|
186
|
+
### `getInstance()`
|
|
187
|
+
|
|
188
|
+
Reset all per-document state and return `this`.
|
|
189
|
+
|
|
190
|
+
**Clears:**
|
|
191
|
+
- input/runtime entities (DOCTYPE)
|
|
192
|
+
- `_totalExpansions` counter
|
|
193
|
+
- `_expandedLength` counter
|
|
194
|
+
|
|
195
|
+
**Preserves:**
|
|
196
|
+
- persistent external entities set via `setExternalEntities()` / `addExternalEntity()`
|
|
197
|
+
- all constructor config
|
|
198
|
+
|
|
199
|
+
The builder factory calls this when creating a new builder instance, ensuring each document starts clean whether or not it has a DOCTYPE.
|
|
200
|
+
|
|
201
|
+
```js
|
|
202
|
+
// In a builder factory:
|
|
203
|
+
getInstance() {
|
|
204
|
+
const builder = new MyBuilder(this.config);
|
|
205
|
+
builder.entityParser = this.entityVP.getInstance();
|
|
206
|
+
return builder;
|
|
207
|
+
}
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
## Document-to-Document Safety
|
|
213
|
+
|
|
214
|
+
A key design goal is that entities from one document never bleed into the next. Here's how the two categories work together:
|
|
215
|
+
|
|
216
|
+
```
|
|
217
|
+
Document 1 parse:
|
|
218
|
+
factory.getInstance() → evp.getInstance() [clears input, resets counters]
|
|
219
|
+
builder sees DOCTYPE → evp.addInputEntities({ version: '1.0' })
|
|
220
|
+
builder processes values → evp.parse('&brand; v&version;') → 'Acme v1.0'
|
|
221
|
+
|
|
222
|
+
Document 2 parse (no DOCTYPE):
|
|
223
|
+
factory.getInstance() → evp.getInstance() [clears &version;, resets counters]
|
|
224
|
+
no DOCTYPE → addInputEntities() not called
|
|
225
|
+
builder processes values → evp.parse('&brand; v&version;') → 'Acme v&version;'
|
|
226
|
+
↑ persistent &brand; works
|
|
227
|
+
↑ &version; is gone — correct
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
---
|
|
231
|
+
|
|
232
|
+
## Security Controls
|
|
233
|
+
|
|
234
|
+
### Expansion count limit
|
|
235
|
+
|
|
236
|
+
Caps the number of entity references that may be expanded per document.
|
|
237
|
+
|
|
238
|
+
```js
|
|
239
|
+
const replacer = new EntityReplacer({ maxTotalExpansions: 1000 });
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
Throws `Error` if exceeded:
|
|
243
|
+
> `[EntityReplacer] Entity expansion count limit exceeded: 1001 > 1000`
|
|
244
|
+
|
|
245
|
+
### Expanded length limit
|
|
246
|
+
|
|
247
|
+
Caps the total number of characters *added* by entity expansion per document.
|
|
248
|
+
|
|
249
|
+
```js
|
|
250
|
+
const replacer = new EntityReplacer({ maxExpandedLength: 65536 });
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
Throws `Error` if exceeded:
|
|
254
|
+
> `[EntityReplacer] Expanded content length limit exceeded: 65537 > 65536`
|
|
255
|
+
|
|
256
|
+
### `applyLimitsTo`
|
|
257
|
+
|
|
258
|
+
Controls which categories count against the limits.
|
|
259
|
+
|
|
260
|
+
```js
|
|
261
|
+
// Default — only untrusted injected entities (safest)
|
|
262
|
+
applyLimitsTo: 'external'
|
|
263
|
+
|
|
264
|
+
// All categories
|
|
265
|
+
applyLimitsTo: 'all'
|
|
266
|
+
|
|
267
|
+
// Specific combination
|
|
268
|
+
applyLimitsTo: ['external', 'system']
|
|
269
|
+
applyLimitsTo: ['external', 'default']
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
---
|
|
273
|
+
|
|
274
|
+
## `postCheck` Hook
|
|
275
|
+
|
|
276
|
+
Fires **once** on the fully resolved string, after all categories have been processed. Not called if the string is unchanged (no `&` present or no matches found).
|
|
277
|
+
|
|
278
|
+
```js
|
|
279
|
+
// Signature
|
|
280
|
+
postCheck: (resolved: string, original: string) => string
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
- `resolved` — string after all entity replacements
|
|
284
|
+
- `original` — the original input string before any replacement
|
|
285
|
+
- Must **return a string**
|
|
286
|
+
- To reject expansion: `return original`
|
|
287
|
+
- To sanitize: return a modified version of `resolved`
|
|
288
|
+
|
|
289
|
+
Examples:
|
|
290
|
+
|
|
291
|
+
```js
|
|
292
|
+
// Reject if expansion produces any HTML tags
|
|
293
|
+
postCheck: (resolved, original) =>
|
|
294
|
+
/<[a-z]/i.test(resolved) ? original : resolved
|
|
295
|
+
|
|
296
|
+
// Strip all tag-like content from the result
|
|
297
|
+
postCheck: (resolved) =>
|
|
298
|
+
resolved.replace(/<[^>]*>/g, '')
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
---
|
|
302
|
+
|
|
303
|
+
## `EntitiesValueParser` — flex-xml-parser adapter
|
|
304
|
+
|
|
305
|
+
`EntitiesValueParser` wraps `EntityReplacer` and implements the `ValueParser` interface used by `@nodable/flexible-xml-parser`.
|
|
306
|
+
|
|
307
|
+
### Setup
|
|
308
|
+
|
|
309
|
+
```js
|
|
310
|
+
import { EntitiesValueParser, COMMON_HTML } from '@nodable/entities';
|
|
311
|
+
|
|
312
|
+
const evp = new EntitiesValueParser({
|
|
313
|
+
system: COMMON_HTML,
|
|
314
|
+
maxTotalExpansions: 500,
|
|
315
|
+
});
|
|
316
|
+
|
|
317
|
+
// Persistent entities — survive across all documents:
|
|
318
|
+
evp.setExternalEntities({ brand: 'Acme', product: 'Widget' });
|
|
319
|
+
|
|
320
|
+
// Register with the builder factory:
|
|
321
|
+
myBuilder.registerValueParser('entity', evp);
|
|
322
|
+
|
|
323
|
+
const parser = new XMLParser({ OutputBuilder: myBuilder });
|
|
324
|
+
parser.parse(xml);
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
### Constructor options
|
|
328
|
+
|
|
329
|
+
All `EntityReplacerOptions` are accepted, plus one extra:
|
|
330
|
+
|
|
331
|
+
```js
|
|
332
|
+
new EntitiesValueParser({
|
|
333
|
+
// All EntityReplacer options...
|
|
334
|
+
default: true,
|
|
335
|
+
system: COMMON_HTML,
|
|
336
|
+
maxTotalExpansions: 1000,
|
|
337
|
+
postCheck: (resolved, original) => resolved,
|
|
338
|
+
|
|
339
|
+
// Extra: initial persistent entity map (same as calling setExternalEntities after construction)
|
|
340
|
+
entities: { copy: '©', trade: '™', brand: 'Acme Corp' },
|
|
341
|
+
})
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
### `setExternalEntities(map)`
|
|
345
|
+
|
|
346
|
+
Replace the full persistent entity map. These entities survive across all documents.
|
|
347
|
+
|
|
348
|
+
```js
|
|
349
|
+
evp.setExternalEntities({ brand: 'Acme', copy: '©' });
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
### `addEntity(key, value)`
|
|
353
|
+
|
|
354
|
+
Append a single persistent external entity. Previously registered entities are preserved.
|
|
355
|
+
|
|
356
|
+
```js
|
|
357
|
+
evp.addEntity('copy', '©');
|
|
358
|
+
evp.addEntity('trade', '™');
|
|
359
|
+
evp.addEntity('year', '2024');
|
|
360
|
+
```
|
|
361
|
+
|
|
362
|
+
Throws if `key` contains `&` or `;`, or if `value` contains `&`.
|
|
363
|
+
|
|
364
|
+
### `getInstance()` — called by builder factory
|
|
365
|
+
|
|
366
|
+
Reset per-document state (input entities + counters) and return `this`. The builder factory calls this each time it creates a new builder instance.
|
|
367
|
+
|
|
368
|
+
```js
|
|
369
|
+
// In your CompactObjBuilderFactory.getInstance():
|
|
370
|
+
getInstance() {
|
|
371
|
+
const builder = new CompactObjBuilder(this._config);
|
|
372
|
+
// Reset EVP for the new document:
|
|
373
|
+
builder.entityParser = this._entityVP.getInstance();
|
|
374
|
+
return builder;
|
|
375
|
+
}
|
|
376
|
+
```
|
|
377
|
+
|
|
378
|
+
### `addInputEntities(entities)` — called automatically
|
|
379
|
+
|
|
380
|
+
Receives the DOCTYPE entity map from `BaseOutputBuilder` once per parse. Resets per-document expansion counters. Accepts both plain string values and `{ regx, val }` objects from `DocTypeReader`.
|
|
381
|
+
|
|
382
|
+
### `parse(val, context?)`
|
|
383
|
+
|
|
384
|
+
Implements the `ValueParser` interface. `context` is accepted but ignored. Returns non-string input unchanged.
|
|
385
|
+
|
|
386
|
+
---
|
|
387
|
+
|
|
388
|
+
## Custom Entity Tables
|
|
389
|
+
|
|
390
|
+
Pass any plain object as `default` or `system` to replace the built-in set:
|
|
391
|
+
|
|
392
|
+
```js
|
|
393
|
+
const myEntities = {
|
|
394
|
+
br: { regex: /&br;/g, val: '\n' },
|
|
395
|
+
tab: { regex: /&tab;/g, val: '\t' },
|
|
396
|
+
};
|
|
397
|
+
|
|
398
|
+
const replacer = new EntityReplacer({ default: myEntities });
|
|
399
|
+
replacer.replace('line1&br;line2&tab;indented');
|
|
400
|
+
// → 'line1\nline2\tindented'
|
|
401
|
+
```
|
|
402
|
+
|
|
403
|
+
Extend the built-in tables via spreading:
|
|
404
|
+
|
|
405
|
+
```js
|
|
406
|
+
import { DEFAULT_XML_ENTITIES } from '@nodable/entities';
|
|
407
|
+
|
|
408
|
+
const replacer = new EntityReplacer({
|
|
409
|
+
default: { ...DEFAULT_XML_ENTITIES, br: { regex: /&br;/g, val: '\n' } },
|
|
410
|
+
});
|
|
411
|
+
```
|
|
412
|
+
|
|
413
|
+
---
|
|
414
|
+
|
|
415
|
+
## Comparison with `entities` npm package
|
|
416
|
+
|
|
417
|
+
| Feature | `entities` pkg | `@nodable/entities` |
|
|
418
|
+
|------------------------------------------------|-------------------|---------------------|
|
|
419
|
+
| XML entity decoding | ✅ | ✅ |
|
|
420
|
+
| HTML entity decoding | ✅ full ~2000 | ✅ grouped, composable |
|
|
421
|
+
| Numeric refs with leading zeros | ✅ | ✅ |
|
|
422
|
+
| DOCTYPE / external entity injection | ❌ | ✅ |
|
|
423
|
+
| Persistent vs. input entity separation | ❌ | ✅ |
|
|
424
|
+
| Per-document reset via `getInstance()` | ❌ | ✅ |
|
|
425
|
+
| Expansion count limit | ❌ | ✅ |
|
|
426
|
+
| Expanded length limit | ❌ | ✅ |
|
|
427
|
+
| `applyLimitsTo` granularity | ❌ | ✅ |
|
|
428
|
+
| `postCheck` hook | ❌ | ✅ |
|
|
429
|
+
| Encoding / HTML escaping | ✅ | ❌ out of scope |
|
|
430
|
+
| Zero dependencies | ✅ | ✅ |
|
|
431
|
+
|
|
432
|
+
---
|
|
433
|
+
|
|
434
|
+
## TypeScript
|
|
435
|
+
|
|
436
|
+
Full TypeScript declarations are included via `index.d.ts`. No `@types/` package needed.
|
|
437
|
+
|
|
438
|
+
```ts
|
|
439
|
+
import EntityReplacer, {
|
|
440
|
+
EntitiesValueParser,
|
|
441
|
+
COMMON_HTML,
|
|
442
|
+
EntityTable,
|
|
443
|
+
EntityReplacerOptions,
|
|
444
|
+
EntitiesValueParserOptions,
|
|
445
|
+
} from '@nodable/entities';
|
|
446
|
+
|
|
447
|
+
// EntityReplacer
|
|
448
|
+
const opts: EntityReplacerOptions = {
|
|
449
|
+
default: true,
|
|
450
|
+
system: COMMON_HTML,
|
|
451
|
+
maxTotalExpansions: 500,
|
|
452
|
+
postCheck: (resolved, original) =>
|
|
453
|
+
/<script/i.test(resolved) ? original : resolved,
|
|
454
|
+
};
|
|
455
|
+
const replacer = new EntityReplacer(opts);
|
|
456
|
+
replacer.setExternalEntities({ brand: 'Acme' });
|
|
457
|
+
replacer.getInstance(); // reset for new document
|
|
458
|
+
replacer.addInputEntities({ version: '1.0' }); // from DOCTYPE
|
|
459
|
+
|
|
460
|
+
// EntitiesValueParser
|
|
461
|
+
const evpOpts: EntitiesValueParserOptions = {
|
|
462
|
+
system: COMMON_HTML,
|
|
463
|
+
entities: { brand: 'Acme' },
|
|
464
|
+
};
|
|
465
|
+
const evp = new EntitiesValueParser(evpOpts);
|
|
466
|
+
evp.addEntity('copy', '©');
|
|
467
|
+
evp.getInstance(); // called by builder factory
|
|
468
|
+
evp.addInputEntities({ company: 'Nodable' }); // called by BaseOutputBuilder
|
|
469
|
+
const result: string = evp.parse('<©&brand;');
|
|
470
|
+
```
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
## License
|
|
475
|
+
|
|
476
|
+
MIT
|
package/package.json
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@nodable/entities",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Replace XML, HTML, External entites with security controls",
|
|
5
|
+
"main": "./src/index.js",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"sideEffects": false,
|
|
8
|
+
"types": "./src/index.d.ts",
|
|
9
|
+
"scripts": {
|
|
10
|
+
"test": "node --experimental-vm-modules node_modules/.bin/jest",
|
|
11
|
+
"test:watch": "node --experimental-vm-modules node_modules/.bin/jest --watch",
|
|
12
|
+
"test:coverage": "node --experimental-vm-modules node_modules/.bin/jest --coverage",
|
|
13
|
+
"lint": "eslint src/ test/"
|
|
14
|
+
},
|
|
15
|
+
"files": [
|
|
16
|
+
"src",
|
|
17
|
+
"README.md"
|
|
18
|
+
],
|
|
19
|
+
"repository": {
|
|
20
|
+
"type": "git",
|
|
21
|
+
"url": "git+https://github.com/nodable/val-parsers.git"
|
|
22
|
+
},
|
|
23
|
+
"keywords": [
|
|
24
|
+
"fast",
|
|
25
|
+
"xml",
|
|
26
|
+
"html",
|
|
27
|
+
"entity"
|
|
28
|
+
],
|
|
29
|
+
"author": "Amit Gupta (https://solothought.com)",
|
|
30
|
+
"license": "MIT",
|
|
31
|
+
"publishConfig": {
|
|
32
|
+
"access": "public"
|
|
33
|
+
},
|
|
34
|
+
"devDependencies": {
|
|
35
|
+
"jest": "^29.7.0"
|
|
36
|
+
},
|
|
37
|
+
"funding": [
|
|
38
|
+
{
|
|
39
|
+
"type": "github",
|
|
40
|
+
"url": "https://github.com/sponsors/nodable"
|
|
41
|
+
}
|
|
42
|
+
],
|
|
43
|
+
"jest": {
|
|
44
|
+
"testMatch": [
|
|
45
|
+
"**/?(*.)+(spec|test).[jt]s?(x)",
|
|
46
|
+
"**/*_spec.[jt]s?(x)"
|
|
47
|
+
]
|
|
48
|
+
}
|
|
49
|
+
}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import EntityReplacer from './EntityReplacer.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* EntitiesValueParser — value-parser adapter that wraps `EntityReplacer`.
|
|
5
|
+
*
|
|
6
|
+
* Register an instance under the key `'entity'` on a `@nodable/flexible-xml-parser`
|
|
7
|
+
* output builder factory to enable entity expansion for all parsed text values.
|
|
8
|
+
*
|
|
9
|
+
* ## Lifecycle
|
|
10
|
+
*
|
|
11
|
+
* 1. **Construction** — supply configuration and optional persistent entities.
|
|
12
|
+
* 2. **`setExternalEntities(map)`** — (re)set the full persistent entity map.
|
|
13
|
+
* Or use `addEntity(key, value)` to add one at a time.
|
|
14
|
+
* 3. **`getInstance()`** — builder factory calls this when creating a new builder
|
|
15
|
+
* instance. Resets input entities and per-document counters. Returns `this`.
|
|
16
|
+
* 4. **`addInputEntities(map)`** — builder calls this if the document has a
|
|
17
|
+
* DOCTYPE block. Stores entities for *this document only*.
|
|
18
|
+
* 5. **`parse(val)`** — called by the builder for each text value.
|
|
19
|
+
*
|
|
20
|
+
* ```js
|
|
21
|
+
* const evp = new EntitiesValueParser({ system: COMMON_HTML });
|
|
22
|
+
* evp.setExternalEntities({ brand: 'Acme' });
|
|
23
|
+
* builder.registerValueParser('entity', evp);
|
|
24
|
+
* ```
|
|
25
|
+
*
|
|
26
|
+
* -------------------------------------------------------------------------
|
|
27
|
+
* Constructor options (all optional)
|
|
28
|
+
* -------------------------------------------------------------------------
|
|
29
|
+
*
|
|
30
|
+
* `default` — `true` (default) | `false`/`null` | custom EntityTable
|
|
31
|
+
* `system` — `false` (default) | `true` for COMMON_HTML | EntityTable
|
|
32
|
+
* `amp` — `true` (default) | `false`/`null`
|
|
33
|
+
* `maxTotalExpansions` — max entity refs expanded per document (0 = unlimited)
|
|
34
|
+
* `maxExpandedLength` — max characters added by expansion per document (0 = unlimited)
|
|
35
|
+
* `applyLimitsTo` — which categories count toward limits (default: `'external'`)
|
|
36
|
+
* `postCheck` — `(resolved, original) => string` hook
|
|
37
|
+
* `entities` — initial persistent entity map, e.g. `{ copy: '©' }`
|
|
38
|
+
*/
|
|
39
|
+
export default class EntitiesValueParser {
|
|
40
|
+
constructor(options = {}) {
|
|
41
|
+
this._replacer = new EntityReplacer(options);
|
|
42
|
+
|
|
43
|
+
// Load any entities provided inline at construction time as persistent entities
|
|
44
|
+
if (options.entities && typeof options.entities === 'object') {
|
|
45
|
+
const init = {};
|
|
46
|
+
for (const [key, val] of Object.entries(options.entities)) {
|
|
47
|
+
this._validateEntityArgs(key, val);
|
|
48
|
+
init[key] = val;
|
|
49
|
+
}
|
|
50
|
+
this._replacer.setExternalEntities(init);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// -------------------------------------------------------------------------
|
|
55
|
+
// Persistent external entity registration
|
|
56
|
+
// -------------------------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Replace the full set of persistent external entities.
|
|
60
|
+
* These survive across documents and are never wiped by `getInstance()`.
|
|
61
|
+
*
|
|
62
|
+
* @param {Record<string, string>} map — e.g. `{ copy: '©', brand: 'Acme' }`
|
|
63
|
+
*/
|
|
64
|
+
setExternalEntities(map) {
|
|
65
|
+
for (const [key, val] of Object.entries(map)) {
|
|
66
|
+
this._validateEntityArgs(key, val);
|
|
67
|
+
}
|
|
68
|
+
this._replacer.setExternalEntities(map);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Add (or replace) a single persistent external entity.
|
|
73
|
+
* Existing persistent entities are preserved.
|
|
74
|
+
*
|
|
75
|
+
* @param {string} key — bare name without `&` / `;`, e.g. `'copy'`
|
|
76
|
+
* @param {string} value — replacement string, e.g. `'©'`
|
|
77
|
+
*/
|
|
78
|
+
addEntity(key, value) {
|
|
79
|
+
this._validateEntityArgs(key, value);
|
|
80
|
+
this._replacer.addExternalEntity(key, value);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// -------------------------------------------------------------------------
|
|
84
|
+
// Builder factory integration
|
|
85
|
+
// -------------------------------------------------------------------------
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Reset per-document state (input entities + expansion counters) and return `this`.
|
|
89
|
+
*
|
|
90
|
+
* The builder factory calls this when creating a new builder instance so that
|
|
91
|
+
* DOCTYPE entities from a previous document are never carried over.
|
|
92
|
+
*
|
|
93
|
+
* @returns {EntitiesValueParser} `this`
|
|
94
|
+
*/
|
|
95
|
+
getInstance() {
|
|
96
|
+
this._replacer.getInstance();
|
|
97
|
+
return this;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// -------------------------------------------------------------------------
|
|
101
|
+
// DOCTYPE integration — called by BaseOutputBuilder
|
|
102
|
+
// -------------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Receive DOCTYPE entities from the output builder.
|
|
106
|
+
*
|
|
107
|
+
* These are stored separately from persistent entities and wiped on the next
|
|
108
|
+
* `getInstance()` call. Resets per-document expansion counters.
|
|
109
|
+
*
|
|
110
|
+
* @param {Record<string, string | { regx: RegExp, val: string | Function }>} entities
|
|
111
|
+
* Raw entity map from `DocTypeReader` — values may be plain strings or
|
|
112
|
+
* `{ regx, val }` objects (note: `regx`, not `regex`, matching the reader's output).
|
|
113
|
+
*/
|
|
114
|
+
addInputEntities(entities) {
|
|
115
|
+
this._replacer.addInputEntities(entities);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// -------------------------------------------------------------------------
|
|
119
|
+
// ValueParser interface
|
|
120
|
+
// -------------------------------------------------------------------------
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Replace entity references in `val`.
|
|
124
|
+
*
|
|
125
|
+
* @param {string} val
|
|
126
|
+
* @param {object} [_context]
|
|
127
|
+
* @returns {string}
|
|
128
|
+
*/
|
|
129
|
+
parse(val, _context) {
|
|
130
|
+
if (typeof val !== 'string') return val;
|
|
131
|
+
return this._replacer.replace(val);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// -------------------------------------------------------------------------
|
|
135
|
+
// Private helpers
|
|
136
|
+
// -------------------------------------------------------------------------
|
|
137
|
+
|
|
138
|
+
_validateEntityArgs(key, value) {
|
|
139
|
+
if (typeof key !== 'string' || key.includes('&') || key.includes(';')) {
|
|
140
|
+
throw new Error(
|
|
141
|
+
`[EntitiesValueParser] Entity key must not contain '&' or ';'. ` +
|
|
142
|
+
`Use 'copy' for '©', got: ${JSON.stringify(key)}`
|
|
143
|
+
);
|
|
144
|
+
}
|
|
145
|
+
if (typeof value !== 'string' || value.includes('&')) {
|
|
146
|
+
throw new Error(
|
|
147
|
+
`[EntitiesValueParser] Entity value must be a plain string that does not ` +
|
|
148
|
+
`contain '&', got: ${JSON.stringify(value)}`
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|