@eksml/xml 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +588 -0
- package/dist/converters/fromLossless.d.mts +14 -0
- package/dist/converters/fromLossless.d.mts.map +1 -0
- package/dist/converters/fromLossless.mjs +35 -0
- package/dist/converters/fromLossless.mjs.map +1 -0
- package/dist/converters/fromLossy.d.mts +18 -0
- package/dist/converters/fromLossy.d.mts.map +1 -0
- package/dist/converters/fromLossy.mjs +91 -0
- package/dist/converters/fromLossy.mjs.map +1 -0
- package/dist/converters/lossless.d.mts +39 -0
- package/dist/converters/lossless.d.mts.map +1 -0
- package/dist/converters/lossless.mjs +74 -0
- package/dist/converters/lossless.mjs.map +1 -0
- package/dist/converters/lossy.d.mts +42 -0
- package/dist/converters/lossy.d.mts.map +1 -0
- package/dist/converters/lossy.mjs +158 -0
- package/dist/converters/lossy.mjs.map +1 -0
- package/dist/htmlConstants-D6fsKbZ-.mjs +30 -0
- package/dist/htmlConstants-D6fsKbZ-.mjs.map +1 -0
- package/dist/parser-BfdEfWDg.d.mts +95 -0
- package/dist/parser-BfdEfWDg.d.mts.map +1 -0
- package/dist/parser-CYq309aR.mjs +479 -0
- package/dist/parser-CYq309aR.mjs.map +1 -0
- package/dist/parser.d.mts +2 -0
- package/dist/parser.mjs +2 -0
- package/dist/sax.d.mts +64 -0
- package/dist/sax.d.mts.map +1 -0
- package/dist/sax.mjs +70 -0
- package/dist/sax.mjs.map +1 -0
- package/dist/saxEngine-BDnD7ruG.mjs +750 -0
- package/dist/saxEngine-BDnD7ruG.mjs.map +1 -0
- package/dist/utilities/index.d.mts +88 -0
- package/dist/utilities/index.d.mts.map +1 -0
- package/dist/utilities/index.mjs +87 -0
- package/dist/utilities/index.mjs.map +1 -0
- package/dist/writer.d.mts +58 -0
- package/dist/writer.d.mts.map +1 -0
- package/dist/writer.mjs +357 -0
- package/dist/writer.mjs.map +1 -0
- package/dist/xmlParseStream.d.mts +138 -0
- package/dist/xmlParseStream.d.mts.map +1 -0
- package/dist/xmlParseStream.mjs +313 -0
- package/dist/xmlParseStream.mjs.map +1 -0
- package/package.json +100 -0
- package/src/converters/fromLossless.ts +80 -0
- package/src/converters/fromLossy.ts +180 -0
- package/src/converters/lossless.ts +116 -0
- package/src/converters/lossy.ts +274 -0
- package/src/parser.ts +728 -0
- package/src/sax.ts +157 -0
- package/src/saxEngine.ts +1157 -0
- package/src/utilities/escapeRegExp.ts +19 -0
- package/src/utilities/filter.ts +63 -0
- package/src/utilities/getElementById.ts +21 -0
- package/src/utilities/getElementsByClassName.ts +22 -0
- package/src/utilities/htmlConstants.ts +26 -0
- package/src/utilities/index.ts +7 -0
- package/src/utilities/isElementNode.ts +19 -0
- package/src/utilities/isTextNode.ts +19 -0
- package/src/utilities/toContentString.ts +23 -0
- package/src/writer.ts +650 -0
- package/src/xmlParseStream.ts +597 -0
|
@@ -0,0 +1,597 @@
|
|
|
1
|
+
import type { TNode } from '#src/parser.ts';
|
|
2
|
+
import { saxEngine } from '#src/saxEngine.ts';
|
|
3
|
+
import type { Attributes } from '#src/saxEngine.ts';
|
|
4
|
+
import {
|
|
5
|
+
HTML_VOID_ELEMENTS,
|
|
6
|
+
HTML_RAW_CONTENT_TAGS,
|
|
7
|
+
} from '#src/utilities/htmlConstants.ts';
|
|
8
|
+
import { convertItemToLossy } from '#src/converters/lossy.ts';
|
|
9
|
+
import type { LossyValue } from '#src/converters/lossy.ts';
|
|
10
|
+
import { convertItemToLossless } from '#src/converters/lossless.ts';
|
|
11
|
+
import type { LosslessEntry } from '#src/converters/lossless.ts';
|
|
12
|
+
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Options
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Options for `XmlParseStream`.
|
|
19
|
+
*
|
|
20
|
+
* Only the fields that the transform layer actually consumes are exposed here.
|
|
21
|
+
* This keeps the API honest — options like `trimWhitespace`, `entities`, and
|
|
22
|
+
* `strict` belong to the synchronous `parse()` function, not the streaming
|
|
23
|
+
* tree-builder.
|
|
24
|
+
*/
|
|
25
|
+
export interface XmlParseStreamOptions {
|
|
26
|
+
/**
|
|
27
|
+
* Starting byte offset — skip this many leading characters from the input.
|
|
28
|
+
* When a string is passed, its `.length` is used as the offset.
|
|
29
|
+
*/
|
|
30
|
+
offset?: number | string;
|
|
31
|
+
/** Enable HTML parsing mode (sets default selfClosingTags/rawContentTags). */
|
|
32
|
+
html?: boolean;
|
|
33
|
+
/**
|
|
34
|
+
* Tag names that are self-closing (void elements).
|
|
35
|
+
* Defaults to standard HTML void elements when `html` is `true`, else `[]`.
|
|
36
|
+
*/
|
|
37
|
+
selfClosingTags?: string[];
|
|
38
|
+
/**
|
|
39
|
+
* Tag names whose content is raw text (not parsed as XML/HTML).
|
|
40
|
+
* Defaults to `["script", "style"]` when `html` is `true`, else `[]`.
|
|
41
|
+
*/
|
|
42
|
+
rawContentTags?: string[];
|
|
43
|
+
/** Keep XML comments in the output. Defaults to `false`. */
|
|
44
|
+
keepComments?: boolean;
|
|
45
|
+
/**
|
|
46
|
+
* Emit only elements matching these tag names instead of waiting for the
|
|
47
|
+
* entire top-level tree to close.
|
|
48
|
+
*
|
|
49
|
+
* When set, each matching element is emitted as a standalone `TNode` subtree
|
|
50
|
+
* the moment its close tag is encountered, regardless of nesting depth.
|
|
51
|
+
* Non-matching ancestor elements are **not** built or emitted — the stream
|
|
52
|
+
* only produces the selected subtrees.
|
|
53
|
+
*
|
|
54
|
+
* When multiple selected tags are nested (e.g. selecting both `item` and
|
|
55
|
+
* `sub` where `<sub>` appears inside `<item>`), each matching element
|
|
56
|
+
* is emitted independently as it closes. The inner element appears both as a
|
|
57
|
+
* separate emission **and** as a child within its ancestor's subtree.
|
|
58
|
+
*
|
|
59
|
+
* Accepts a single tag name or an array of tag names.
|
|
60
|
+
*
|
|
61
|
+
* @example
|
|
62
|
+
* ```ts
|
|
63
|
+
* // Given:
|
|
64
|
+
* // <root>
|
|
65
|
+
* // <item>
|
|
66
|
+
* // <sub>1</sub><box>a</box>
|
|
67
|
+
* // </item>
|
|
68
|
+
* // <item>
|
|
69
|
+
* // <sub>2</sub><box>b</box>
|
|
70
|
+
* // </item>
|
|
71
|
+
* // </root>
|
|
72
|
+
* //
|
|
73
|
+
* // Without select: emits one big <root> TNode after </root>
|
|
74
|
+
* // With select: "item": emits two <item> TNodes as each closes
|
|
75
|
+
* const stream = new XmlParseStream({ select: 'item' });
|
|
76
|
+
*
|
|
77
|
+
* // Nested selection: emits each <sub> as it closes, then the
|
|
78
|
+
* // containing <item> (which still includes the <sub> as a child).
|
|
79
|
+
* const stream2 = new XmlParseStream({ select: ['item', 'sub'] });
|
|
80
|
+
* ```
|
|
81
|
+
*/
|
|
82
|
+
select?: string | string[];
|
|
83
|
+
/**
|
|
84
|
+
* Output format for emitted chunks.
|
|
85
|
+
*
|
|
86
|
+
* - `'dom'` (default) — emit raw `TNode | string` values.
|
|
87
|
+
* - `'lossy'` — convert each item to the compact lossy format (`LossyValue`).
|
|
88
|
+
* - `'lossless'` — convert each item to the order-preserving lossless format
|
|
89
|
+
* (`LosslessEntry`).
|
|
90
|
+
*/
|
|
91
|
+
output?: 'dom' | 'lossy' | 'lossless';
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Re-export converter output types so consumers can import them from the
|
|
95
|
+
// same entry point when using `output: 'lossy'` or `output: 'lossless'`.
|
|
96
|
+
export type { LossyValue, LosslessEntry };
|
|
97
|
+
// @generated:char-codes:begin
|
|
98
|
+
const EQ = 61; // =
|
|
99
|
+
const SQUOTE = 39; // '
|
|
100
|
+
const DQUOTE = 34; // "
|
|
101
|
+
const TAB = 9; // \t
|
|
102
|
+
const LF = 10; // \n
|
|
103
|
+
const CR = 13; // \r
|
|
104
|
+
const SPACE = 32; // (space)
|
|
105
|
+
// @generated:char-codes:end
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Convert a SAX `Attributes` object into the format used by `parse()`:
|
|
109
|
+
* - Returns `null` when there are no attributes.
|
|
110
|
+
* - Returns an `Object.create(null)` prototype-free record otherwise.
|
|
111
|
+
*/
|
|
112
|
+
function toNodeAttributes(
|
|
113
|
+
attributes: Attributes,
|
|
114
|
+
): Record<string, string | null> | null {
|
|
115
|
+
const keys = Object.keys(attributes);
|
|
116
|
+
if (keys.length === 0) return null;
|
|
117
|
+
const out: Record<string, string | null> = Object.create(null);
|
|
118
|
+
for (let i = 0; i < keys.length; i++) {
|
|
119
|
+
out[keys[i]!] = attributes[keys[i]!]!;
|
|
120
|
+
}
|
|
121
|
+
return out;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Parse a processing instruction body string into an attributes record.
|
|
126
|
+
* e.g. `version="1.0" encoding="UTF-8"` -> `{ version: "1.0", encoding: "UTF-8" }`
|
|
127
|
+
*
|
|
128
|
+
* This replicates the attribute-parsing behavior of `parse()` so that PIs
|
|
129
|
+
* emitted by `XmlParseStream` match the `{ tagName: "?xml", attributes: {...} }`
|
|
130
|
+
* format that consumers expect.
|
|
131
|
+
*
|
|
132
|
+
* Returns `null` when the body contains no attributes, matching `parse()`.
|
|
133
|
+
* Uses `Object.create(null)` for prototype-free attribute records.
|
|
134
|
+
*/
|
|
135
|
+
function parsePIAttributes(body: string): Record<string, string | null> | null {
|
|
136
|
+
let attributes: Record<string, string | null> | null = null;
|
|
137
|
+
const bodyLength = body.length;
|
|
138
|
+
let i = 0;
|
|
139
|
+
|
|
140
|
+
while (i < bodyLength) {
|
|
141
|
+
// Skip whitespace
|
|
142
|
+
let charCode = body.charCodeAt(i);
|
|
143
|
+
if (
|
|
144
|
+
charCode === SPACE ||
|
|
145
|
+
charCode === TAB ||
|
|
146
|
+
charCode === LF ||
|
|
147
|
+
charCode === CR
|
|
148
|
+
) {
|
|
149
|
+
i++;
|
|
150
|
+
continue;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Read attribute name
|
|
154
|
+
const nameStart = i;
|
|
155
|
+
while (i < bodyLength) {
|
|
156
|
+
charCode = body.charCodeAt(i);
|
|
157
|
+
if (
|
|
158
|
+
charCode === EQ ||
|
|
159
|
+
charCode === SPACE ||
|
|
160
|
+
charCode === TAB ||
|
|
161
|
+
charCode === LF ||
|
|
162
|
+
charCode === CR
|
|
163
|
+
)
|
|
164
|
+
break;
|
|
165
|
+
i++;
|
|
166
|
+
}
|
|
167
|
+
if (i === nameStart) {
|
|
168
|
+
i++;
|
|
169
|
+
continue;
|
|
170
|
+
}
|
|
171
|
+
const name = body.substring(nameStart, i);
|
|
172
|
+
|
|
173
|
+
// Allocate attributes object lazily on first attribute
|
|
174
|
+
if (attributes === null) attributes = Object.create(null);
|
|
175
|
+
|
|
176
|
+
// Skip whitespace
|
|
177
|
+
while (i < bodyLength) {
|
|
178
|
+
charCode = body.charCodeAt(i);
|
|
179
|
+
if (
|
|
180
|
+
charCode !== SPACE &&
|
|
181
|
+
charCode !== TAB &&
|
|
182
|
+
charCode !== LF &&
|
|
183
|
+
charCode !== CR
|
|
184
|
+
)
|
|
185
|
+
break;
|
|
186
|
+
i++;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Check for =
|
|
190
|
+
if (i < bodyLength && body.charCodeAt(i) === EQ) {
|
|
191
|
+
i++; // skip =
|
|
192
|
+
// Skip whitespace
|
|
193
|
+
while (i < bodyLength) {
|
|
194
|
+
charCode = body.charCodeAt(i);
|
|
195
|
+
if (
|
|
196
|
+
charCode !== SPACE &&
|
|
197
|
+
charCode !== TAB &&
|
|
198
|
+
charCode !== LF &&
|
|
199
|
+
charCode !== CR
|
|
200
|
+
)
|
|
201
|
+
break;
|
|
202
|
+
i++;
|
|
203
|
+
}
|
|
204
|
+
// Read value
|
|
205
|
+
if (i < bodyLength) {
|
|
206
|
+
const quoteCharCode = body.charCodeAt(i);
|
|
207
|
+
if (quoteCharCode === DQUOTE || quoteCharCode === SQUOTE) {
|
|
208
|
+
const quoteCharacter = body[i]!;
|
|
209
|
+
i++; // skip opening quote
|
|
210
|
+
const valueStartIndex = i;
|
|
211
|
+
const end = body.indexOf(quoteCharacter, i);
|
|
212
|
+
if (end === -1) {
|
|
213
|
+
attributes![name] = body.substring(valueStartIndex);
|
|
214
|
+
i = bodyLength;
|
|
215
|
+
} else {
|
|
216
|
+
attributes![name] = body.substring(valueStartIndex, end);
|
|
217
|
+
i = end + 1;
|
|
218
|
+
}
|
|
219
|
+
} else {
|
|
220
|
+
// Unquoted value — read until whitespace
|
|
221
|
+
const valueStartIndex = i;
|
|
222
|
+
while (i < bodyLength) {
|
|
223
|
+
charCode = body.charCodeAt(i);
|
|
224
|
+
if (
|
|
225
|
+
charCode === SPACE ||
|
|
226
|
+
charCode === TAB ||
|
|
227
|
+
charCode === LF ||
|
|
228
|
+
charCode === CR
|
|
229
|
+
)
|
|
230
|
+
break;
|
|
231
|
+
i++;
|
|
232
|
+
}
|
|
233
|
+
attributes![name] = body.substring(valueStartIndex, i);
|
|
234
|
+
}
|
|
235
|
+
} else {
|
|
236
|
+
attributes![name] = null;
|
|
237
|
+
}
|
|
238
|
+
} else {
|
|
239
|
+
// Boolean attribute (no value)
|
|
240
|
+
attributes![name] = null;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
return attributes;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* A Web Streams `TransformStream` that incrementally parses XML chunks into
|
|
249
|
+
* `TNode` subtrees (or lossy/lossless objects). Works in browsers, Node.js
|
|
250
|
+
* 18+, Deno, and Bun.
|
|
251
|
+
*
|
|
252
|
+
* Follows the platform stream class convention (like `TextDecoderStream`,
|
|
253
|
+
* `DecompressionStream`, etc.) — instantiate with `new` and use with
|
|
254
|
+
* `.pipeThrough()`.
|
|
255
|
+
*
|
|
256
|
+
* Internally powered by the SAX engine with a tree-construction layer
|
|
257
|
+
* that assembles `TNode` subtrees and emits them as they complete.
|
|
258
|
+
*
|
|
259
|
+
* By default, nodes are emitted when a top-level element closes (depth 0).
|
|
260
|
+
* Use the `select` option to emit specific elements as they close at any depth,
|
|
261
|
+
* without waiting for the entire document root to finish.
|
|
262
|
+
*
|
|
263
|
+
* Use the `output` option to choose the emitted format:
|
|
264
|
+
* - `'dom'` (default) — raw `TNode | string`
|
|
265
|
+
* - `'lossy'` — compact lossy objects (`LossyValue`)
|
|
266
|
+
* - `'lossless'` — order-preserving objects (`LosslessEntry`)
|
|
267
|
+
*
|
|
268
|
+
* @example
|
|
269
|
+
* ```ts
|
|
270
|
+
* import { XmlParseStream } from '@eksml/xml/stream';
|
|
271
|
+
*
|
|
272
|
+
* const response = await fetch('/feed.xml');
|
|
273
|
+
* const reader = response.body
|
|
274
|
+
* .pipeThrough(new TextDecoderStream())
|
|
275
|
+
* .pipeThrough(new XmlParseStream())
|
|
276
|
+
* .getReader();
|
|
277
|
+
*
|
|
278
|
+
* while (true) {
|
|
279
|
+
* const { done, value } = await reader.read();
|
|
280
|
+
* if (done) break;
|
|
281
|
+
* console.log(value); // TNode or string
|
|
282
|
+
* }
|
|
283
|
+
* ```
|
|
284
|
+
*/
|
|
285
|
+
export class XmlParseStream<TOutput = TNode | string> extends TransformStream<
|
|
286
|
+
string,
|
|
287
|
+
TOutput
|
|
288
|
+
> {
|
|
289
|
+
/** Default DOM output (`TNode | string`). */
|
|
290
|
+
constructor(options?: XmlParseStreamOptions & { output?: 'dom' });
|
|
291
|
+
/** Lossy output — each item is converted to `LossyValue`. */
|
|
292
|
+
constructor(options: XmlParseStreamOptions & { output: 'lossy' });
|
|
293
|
+
/** Lossless output — each item is converted to `LosslessEntry`. */
|
|
294
|
+
constructor(options: XmlParseStreamOptions & { output: 'lossless' });
|
|
295
|
+
/** Dynamic output — when the `output` option is not a literal, returns the widest type. */
|
|
296
|
+
constructor(options: XmlParseStreamOptions);
|
|
297
|
+
constructor(options?: XmlParseStreamOptions) {
|
|
298
|
+
const resolvedOptions = options ?? {};
|
|
299
|
+
let skipBytes: number =
|
|
300
|
+
typeof resolvedOptions.offset === 'string'
|
|
301
|
+
? resolvedOptions.offset.length
|
|
302
|
+
: resolvedOptions.offset || 0;
|
|
303
|
+
|
|
304
|
+
// Resolve HTML-mode defaults (same logic as parse())
|
|
305
|
+
const isHtml = resolvedOptions.html === true;
|
|
306
|
+
const selfClosingTags =
|
|
307
|
+
resolvedOptions.selfClosingTags ?? (isHtml ? HTML_VOID_ELEMENTS : []);
|
|
308
|
+
const rawContentTags =
|
|
309
|
+
resolvedOptions.rawContentTags ?? (isHtml ? HTML_RAW_CONTENT_TAGS : []);
|
|
310
|
+
const keepComments = resolvedOptions.keepComments === true;
|
|
311
|
+
|
|
312
|
+
// Resolve select into a Set for O(1) lookup (null = emit at depth 0)
|
|
313
|
+
const selectSet: Set<string> | null =
|
|
314
|
+
resolvedOptions.select == null
|
|
315
|
+
? null
|
|
316
|
+
: typeof resolvedOptions.select === 'string'
|
|
317
|
+
? new Set([resolvedOptions.select])
|
|
318
|
+
: resolvedOptions.select.length > 0
|
|
319
|
+
? new Set(resolvedOptions.select)
|
|
320
|
+
: null;
|
|
321
|
+
|
|
322
|
+
// Resolve the output converter — identity for 'dom', otherwise a mapping fn.
|
|
323
|
+
const outputMode = resolvedOptions.output ?? 'dom';
|
|
324
|
+
const convert: (item: TNode | string) => TOutput =
|
|
325
|
+
outputMode === 'lossy'
|
|
326
|
+
? (convertItemToLossy as (item: TNode | string) => TOutput)
|
|
327
|
+
: outputMode === 'lossless'
|
|
328
|
+
? (convertItemToLossless as (item: TNode | string) => TOutput)
|
|
329
|
+
: (((item: TNode | string) => item) as (
|
|
330
|
+
item: TNode | string,
|
|
331
|
+
) => TOutput);
|
|
332
|
+
|
|
333
|
+
// --- Tree-construction state ---
|
|
334
|
+
let streamController: TransformStreamDefaultController<TOutput> | null =
|
|
335
|
+
null;
|
|
336
|
+
|
|
337
|
+
// When `select` is null (default mode): stack holds all open TNodes.
|
|
338
|
+
// Emission happens when stack.length returns to 0.
|
|
339
|
+
//
|
|
340
|
+
// When `select` is set: `depth` tracks total nesting depth (for all
|
|
341
|
+
// elements, including non-selected ancestors). `stack` only holds nodes
|
|
342
|
+
// within a selected subtree. `selectDepths` is a stack of depths at which
|
|
343
|
+
// selected elements were opened — this allows nested selections (e.g.
|
|
344
|
+
// selecting both `item` and `sub` where `sub` is a child of `item`).
|
|
345
|
+
// Each entry records the depth at which a matching element was opened.
|
|
346
|
+
const stack: TNode[] = [];
|
|
347
|
+
let depth = 0;
|
|
348
|
+
const selectDepths: number[] = [];
|
|
349
|
+
|
|
350
|
+
// -----------------------------------------------------------------------
|
|
351
|
+
// Default mode helpers (no select — original behavior)
|
|
352
|
+
// -----------------------------------------------------------------------
|
|
353
|
+
|
|
354
|
+
function currentParent(): TNode | null {
|
|
355
|
+
return stack.length > 0 ? stack[stack.length - 1]! : null;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
function emitOrAttach(item: TNode | string): void {
|
|
359
|
+
const parent = currentParent();
|
|
360
|
+
if (parent) {
|
|
361
|
+
parent.children.push(item);
|
|
362
|
+
} else if (streamController) {
|
|
363
|
+
streamController.enqueue(convert(item));
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// -----------------------------------------------------------------------
|
|
368
|
+
// Select-mode helpers
|
|
369
|
+
// -----------------------------------------------------------------------
|
|
370
|
+
|
|
371
|
+
/** Whether we are currently inside a selected subtree. */
|
|
372
|
+
function insideSelection(): boolean {
|
|
373
|
+
return selectDepths.length > 0;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
/** The TNode at the top of the stack, or null. */
|
|
377
|
+
function selectParent(): TNode | null {
|
|
378
|
+
return stack.length > 0 ? stack[stack.length - 1]! : null;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// -----------------------------------------------------------------------
|
|
382
|
+
// SAX handler: default mode (no select)
|
|
383
|
+
// -----------------------------------------------------------------------
|
|
384
|
+
|
|
385
|
+
function defaultOnOpenTag(tagName: string, attributes: Attributes): void {
|
|
386
|
+
const node: TNode = {
|
|
387
|
+
tagName,
|
|
388
|
+
attributes: toNodeAttributes(attributes),
|
|
389
|
+
children: [],
|
|
390
|
+
};
|
|
391
|
+
const parent = currentParent();
|
|
392
|
+
if (parent) {
|
|
393
|
+
parent.children.push(node);
|
|
394
|
+
}
|
|
395
|
+
stack.push(node);
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
function defaultOnCloseTag(_tagName: string): void {
|
|
399
|
+
const node = stack.pop();
|
|
400
|
+
if (!node) return;
|
|
401
|
+
if (stack.length === 0 && streamController) {
|
|
402
|
+
streamController.enqueue(convert(node));
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
function defaultOnText(text: string): void {
|
|
407
|
+
const parent = currentParent();
|
|
408
|
+
if (parent) {
|
|
409
|
+
parent.children.push(text);
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
function defaultOnCdata(data: string): void {
|
|
414
|
+
const parent = currentParent();
|
|
415
|
+
if (parent) {
|
|
416
|
+
parent.children.push(data);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
function defaultOnComment(comment: string): void {
|
|
421
|
+
if (!keepComments) return;
|
|
422
|
+
const parent = currentParent();
|
|
423
|
+
if (parent) {
|
|
424
|
+
parent.children.push(comment);
|
|
425
|
+
} else if (streamController) {
|
|
426
|
+
streamController.enqueue(convert(comment));
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
function defaultOnProcessingInstruction(name: string, body: string): void {
|
|
431
|
+
const node: TNode = {
|
|
432
|
+
tagName: '?' + name,
|
|
433
|
+
attributes: parsePIAttributes(body),
|
|
434
|
+
children: [],
|
|
435
|
+
};
|
|
436
|
+
emitOrAttach(node);
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
function defaultOnDoctype(tagName: string, attributes: Attributes): void {
|
|
440
|
+
const node: TNode = {
|
|
441
|
+
tagName,
|
|
442
|
+
attributes: toNodeAttributes(attributes),
|
|
443
|
+
children: [],
|
|
444
|
+
};
|
|
445
|
+
emitOrAttach(node);
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
// -----------------------------------------------------------------------
|
|
449
|
+
// SAX handler: select mode
|
|
450
|
+
// -----------------------------------------------------------------------
|
|
451
|
+
|
|
452
|
+
function selectOnOpenTag(tagName: string, attributes: Attributes): void {
|
|
453
|
+
depth++;
|
|
454
|
+
if (insideSelection()) {
|
|
455
|
+
// Inside a selected subtree — build the TNode and attach to parent.
|
|
456
|
+
const node: TNode = {
|
|
457
|
+
tagName,
|
|
458
|
+
attributes: toNodeAttributes(attributes),
|
|
459
|
+
children: [],
|
|
460
|
+
};
|
|
461
|
+
const parent = selectParent();
|
|
462
|
+
if (parent) {
|
|
463
|
+
parent.children.push(node);
|
|
464
|
+
}
|
|
465
|
+
stack.push(node);
|
|
466
|
+
// If this tag also matches the selector, record it so it will be
|
|
467
|
+
// emitted independently when it closes (in addition to being part
|
|
468
|
+
// of its ancestor's subtree).
|
|
469
|
+
if (selectSet!.has(tagName)) {
|
|
470
|
+
selectDepths.push(depth);
|
|
471
|
+
}
|
|
472
|
+
} else if (selectSet!.has(tagName)) {
|
|
473
|
+
// This element matches the selector — start a new selected subtree.
|
|
474
|
+
selectDepths.push(depth);
|
|
475
|
+
const node: TNode = {
|
|
476
|
+
tagName,
|
|
477
|
+
attributes: toNodeAttributes(attributes),
|
|
478
|
+
children: [],
|
|
479
|
+
};
|
|
480
|
+
stack.push(node);
|
|
481
|
+
}
|
|
482
|
+
// Otherwise: non-selected ancestor — no allocation, just depth tracking.
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
function selectOnCloseTag(_tagName: string): void {
|
|
486
|
+
if (insideSelection()) {
|
|
487
|
+
const topSelectDepth = selectDepths[selectDepths.length - 1]!;
|
|
488
|
+
if (depth === topSelectDepth) {
|
|
489
|
+
// A selected element is closing — emit it independently.
|
|
490
|
+
const node = stack.pop();
|
|
491
|
+
if (node && streamController) {
|
|
492
|
+
streamController.enqueue(convert(node));
|
|
493
|
+
}
|
|
494
|
+
selectDepths.pop();
|
|
495
|
+
} else {
|
|
496
|
+
// A descendant of the selected element is closing — already
|
|
497
|
+
// attached to its parent via onOpenTag, just pop from stack.
|
|
498
|
+
stack.pop();
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
depth--;
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
function selectOnText(text: string): void {
|
|
505
|
+
if (!insideSelection()) return;
|
|
506
|
+
const parent = selectParent();
|
|
507
|
+
if (parent) {
|
|
508
|
+
parent.children.push(text);
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
function selectOnCdata(data: string): void {
|
|
513
|
+
if (!insideSelection()) return;
|
|
514
|
+
const parent = selectParent();
|
|
515
|
+
if (parent) {
|
|
516
|
+
parent.children.push(data);
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
function selectOnComment(comment: string): void {
|
|
521
|
+
if (!keepComments) return;
|
|
522
|
+
if (!insideSelection()) return;
|
|
523
|
+
const parent = selectParent();
|
|
524
|
+
if (parent) {
|
|
525
|
+
parent.children.push(comment);
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
function selectOnProcessingInstruction(name: string, body: string): void {
|
|
530
|
+
if (!insideSelection()) return;
|
|
531
|
+
const node: TNode = {
|
|
532
|
+
tagName: '?' + name,
|
|
533
|
+
attributes: parsePIAttributes(body),
|
|
534
|
+
children: [],
|
|
535
|
+
};
|
|
536
|
+
const parent = selectParent();
|
|
537
|
+
if (parent) {
|
|
538
|
+
parent.children.push(node);
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
function selectOnDoctype(tagName: string, attributes: Attributes): void {
|
|
543
|
+
if (!insideSelection()) return;
|
|
544
|
+
const node: TNode = {
|
|
545
|
+
tagName,
|
|
546
|
+
attributes: toNodeAttributes(attributes),
|
|
547
|
+
children: [],
|
|
548
|
+
};
|
|
549
|
+
const parent = selectParent();
|
|
550
|
+
if (parent) {
|
|
551
|
+
parent.children.push(node);
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
// -----------------------------------------------------------------------
|
|
556
|
+
// Wire up the appropriate handler set
|
|
557
|
+
// -----------------------------------------------------------------------
|
|
558
|
+
|
|
559
|
+
const parser = saxEngine({
|
|
560
|
+
selfClosingTags,
|
|
561
|
+
rawContentTags,
|
|
562
|
+
onOpenTag: selectSet ? selectOnOpenTag : defaultOnOpenTag,
|
|
563
|
+
onCloseTag: selectSet ? selectOnCloseTag : defaultOnCloseTag,
|
|
564
|
+
onText: selectSet ? selectOnText : defaultOnText,
|
|
565
|
+
onCdata: selectSet ? selectOnCdata : defaultOnCdata,
|
|
566
|
+
onComment: selectSet ? selectOnComment : defaultOnComment,
|
|
567
|
+
onProcessingInstruction: selectSet
|
|
568
|
+
? selectOnProcessingInstruction
|
|
569
|
+
: defaultOnProcessingInstruction,
|
|
570
|
+
onDoctype: selectSet ? selectOnDoctype : defaultOnDoctype,
|
|
571
|
+
});
|
|
572
|
+
|
|
573
|
+
super({
|
|
574
|
+
transform(
|
|
575
|
+
chunk: string,
|
|
576
|
+
controller: TransformStreamDefaultController<TOutput>,
|
|
577
|
+
): void {
|
|
578
|
+
streamController = controller;
|
|
579
|
+
// Handle offset: skip leading bytes from the first chunk(s)
|
|
580
|
+
if (skipBytes > 0) {
|
|
581
|
+
if (chunk.length <= skipBytes) {
|
|
582
|
+
skipBytes -= chunk.length;
|
|
583
|
+
return;
|
|
584
|
+
}
|
|
585
|
+
chunk = chunk.substring(skipBytes);
|
|
586
|
+
skipBytes = 0;
|
|
587
|
+
}
|
|
588
|
+
parser.write(chunk);
|
|
589
|
+
},
|
|
590
|
+
|
|
591
|
+
flush(controller: TransformStreamDefaultController<TOutput>): void {
|
|
592
|
+
streamController = controller;
|
|
593
|
+
parser.close();
|
|
594
|
+
},
|
|
595
|
+
});
|
|
596
|
+
}
|
|
597
|
+
}
|