mdream 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1432 @@
1
+ import { ae as TEXT_NODE, af as NodeEventEnter, ab as collectNodeContent, aa as ELEMENT_NODE, a3 as TAG_PRE, M as TAG_LI, a4 as TAG_BLOCKQUOTE, ag as NO_SPACING, ah as DEFAULT_BLOCK_SPACING, ai as TABLE_ROW_SPACING, aj as LIST_ITEM_SPACING, ak as BLOCKQUOTE_SPACING, al as MARKDOWN_STRIKETHROUGH, am as MARKDOWN_HORIZONTAL_RULE, J as TAG_DD, K as TAG_DT, L as TAG_DL, e as TAG_ADDRESS, an as TAG_RP, ao as TAG_RT, ap as TAG_RUBY, aq as TAG_BDO, ar as TAG_TIME, as as TAG_VAR, at as TAG_DFN, au as TAG_CITE, av as TAG_U, z as TAG_ASIDE, aw as TAG_PLAINTEXT, ax as TAG_XMP, ay as TAG_NOFRAMES, az as TAG_NOSCRIPT, aA as TAG_SMALL, aB as TAG_SAMP, aC as TAG_Q, aD as TAG_MARK, aE as TAG_ABBR, aF as TAG_TEMPLATE, aG as TAG_PROGRESS, aH as TAG_METER, aI as TAG_DIALOG, aJ as TAG_MAP, s as TAG_IFRAME, aK as TAG_CANVAS, _ as TAG_VIDEO, Z as TAG_AUDIO, aL as TAG_LEGEND, t as TAG_FIELDSET, aM as TAG_OPTION, v as TAG_TEXTAREA, u as TAG_SELECT, Y as TAG_SVG, aN as TAG_WBR, aO as TAG_TRACK, aP as TAG_SOURCE, aQ as TAG_PARAM, aR as TAG_KEYGEN, w as TAG_INPUT, r as TAG_EMBED, aS as TAG_COL, aT as TAG_BASE, aU as TAG_AREA, aV as TAG_LINK, y as TAG_FORM, B as TAG_FOOTER, aW as TAG_KBD, S as TAG_TFOOT, U as TAG_TBODY, aX as TAG_CENTER, X as TAG_TABLE, T as TAG_BODY, x as TAG_BUTTON, aY as TAG_LABEL, A as TAG_NAV, j as TAG_SPAN, a5 as TAG_DIV, a6 as TAG_P, P as TAG_TD, Q as TAG_TH, R as TAG_TR, V as TAG_THEAD, a1 as TAG_IMG, d as TAG_A, O as TAG_UL, N as TAG_OL, a2 as TAG_CODE, aZ as MARKDOWN_CODE_BLOCK, a_ as MARKDOWN_INLINE_CODE, a$ as TAG_INS, b0 as TAG_SUP, b1 as TAG_SUB, b2 as TAG_DEL, m as TAG_I, n as TAG_EM, o as TAG_B, p as TAG_STRONG, l as TAG_HR, D as TAG_H6, E as TAG_H5, F as TAG_H4, G as TAG_H3, H as TAG_H2, I as TAG_H1, k as TAG_BR, ad as TAG_META, h as TAG_STYLE, i as TAG_SCRIPT, ac as TAG_TITLE, f as TAG_SUMMARY, g as TAG_DETAILS, b as TAG_HEAD, b3 as MARKDOWN_EMPHASIS, b4 as MARKDOWN_STRONG, b5 as HTML_ENTITIES, b6 as MAX_TAG_ID, b7 as assembleBufferedContent, b8 as TagIdMap, b9 as NodeEventExit } from './mdream.-hdaPj9a.mjs';
2
+
3
+ function needsSpacing(lastChar, firstChar) {
4
+ const noSpaceLastChars = /* @__PURE__ */ new Set(["\n", " ", "[", ">", "_", "*", "`", "|", "#", "<", "("]);
5
+ const noSpaceFirstChars = /* @__PURE__ */ new Set([" ", "\n", " ", "_", "*", "`", "|", ">", "#"]);
6
+ return !noSpaceLastChars.has(lastChar) && !noSpaceFirstChars.has(firstChar);
7
+ }
8
+ function shouldAddSpacingBeforeText(lastChar, lastNode, textNode) {
9
+ return lastChar && lastChar !== "\n" && lastChar !== " " && lastChar !== "[" && lastChar !== ">" && !lastNode?.tagHandler?.isInline && textNode.value[0] !== " ";
10
+ }
11
+ function processTextNodeWithPlugins(node, state) {
12
+ if (!state.plugins?.length)
13
+ return void 0;
14
+ for (const plugin of state.plugins) {
15
+ if (!plugin.processTextNode)
16
+ continue;
17
+ const result = plugin.processTextNode(node, state);
18
+ if (result) {
19
+ if (result.skip)
20
+ return result;
21
+ return { content: result.content, skip: false };
22
+ }
23
+ }
24
+ return void 0;
25
+ }
26
+ function processHtmlEventToMarkdown(event, state) {
27
+ const { type: eventType, node } = event;
28
+ const lastNode = state.lastNode;
29
+ state.lastNode = event.node;
30
+ const buff = state.regionContentBuffers.get(node.regionId || 0) || [];
31
+ const lastBuffEntry = buff[buff.length - 1];
32
+ const lastChar = lastBuffEntry?.charAt(lastBuffEntry.length - 1) || "";
33
+ let secondLastChar;
34
+ if (lastBuffEntry?.length > 1) {
35
+ secondLastChar = lastBuffEntry.charAt(lastBuffEntry.length - 2);
36
+ } else {
37
+ secondLastChar = buff[buff.length - 2]?.charAt(buff[buff.length - 2].length - 1);
38
+ }
39
+ if (node.type === TEXT_NODE && eventType === NodeEventEnter) {
40
+ const textNode = node;
41
+ if (textNode.value) {
42
+ if (state.plugins?.length) {
43
+ const pluginResult = processTextNodeWithPlugins(textNode, state);
44
+ if (pluginResult) {
45
+ if (pluginResult.skip) {
46
+ return;
47
+ }
48
+ textNode.value = pluginResult.content;
49
+ }
50
+ }
51
+ if (textNode.value === " " && lastChar === "\n") {
52
+ return;
53
+ }
54
+ if (shouldAddSpacingBeforeText(lastChar, lastNode, textNode)) {
55
+ textNode.value = ` ${textNode.value}`;
56
+ }
57
+ collectNodeContent(textNode, textNode.value, state);
58
+ }
59
+ state.lastTextNode = textNode;
60
+ return;
61
+ }
62
+ if (node.type !== ELEMENT_NODE) {
63
+ return;
64
+ }
65
+ const context = { node, state };
66
+ const output = [];
67
+ const lastFragment = state.lastContentCache;
68
+ if (state.plugins?.length) {
69
+ const results = [];
70
+ const fn = eventType === NodeEventEnter ? "onNodeEnter" : "onNodeExit";
71
+ for (const plugin of state.plugins) {
72
+ if (!plugin[fn])
73
+ continue;
74
+ const result = plugin[fn](event.node, state);
75
+ if (result) {
76
+ results.push(result);
77
+ }
78
+ }
79
+ output.push(...results);
80
+ }
81
+ let lastNewLines = 0;
82
+ if (lastChar === "\n") {
83
+ lastNewLines++;
84
+ }
85
+ if (secondLastChar === "\n") {
86
+ lastNewLines++;
87
+ }
88
+ const eventFn = eventType === NodeEventEnter ? "enter" : "exit";
89
+ const handler = node.tagHandler;
90
+ if (!output.length && handler?.[eventFn]) {
91
+ const res = handler[eventFn](context);
92
+ if (res) {
93
+ output.push(res);
94
+ }
95
+ }
96
+ const newLineConfig = calculateNewLineConfig(node);
97
+ const newLines = Math.max(0, (newLineConfig[eventType] || 0) - lastNewLines);
98
+ if (newLines > 0) {
99
+ if (!buff.length) {
100
+ for (const fragment of output) {
101
+ collectNodeContent(node, fragment, state);
102
+ }
103
+ return;
104
+ }
105
+ const newlinesStr = "\n".repeat(newLines);
106
+ if (lastChar === " " && buff?.length) {
107
+ buff[buff.length - 1] = buff[buff.length - 1].substring(0, buff[buff.length - 1].length - 1);
108
+ }
109
+ if (eventType === NodeEventEnter) {
110
+ output.unshift(newlinesStr);
111
+ } else {
112
+ output.push(newlinesStr);
113
+ }
114
+ } else {
115
+ if (lastFragment && state.lastTextNode?.containsWhitespace && !!node.parent && "value" in state.lastTextNode && typeof state.lastTextNode.value === "string") {
116
+ if (!node.parent.depthMap[TAG_PRE] || node.parent.tagId === TAG_PRE) {
117
+ const originalLength = lastFragment.length;
118
+ const trimmed = lastFragment.trimEnd();
119
+ const trimmedChars = originalLength - trimmed.length;
120
+ if (trimmedChars > 0) {
121
+ if (buff?.length && buff[buff.length - 1] === lastFragment) {
122
+ buff[buff.length - 1] = trimmed;
123
+ }
124
+ }
125
+ state.lastTextNode = void 0;
126
+ }
127
+ }
128
+ }
129
+ if (output[0]?.[0] && eventType === NodeEventEnter && lastChar && needsSpacing(lastChar, output[0][0])) {
130
+ collectNodeContent(node, " ", state);
131
+ }
132
+ for (const fragment of output) {
133
+ collectNodeContent(node, fragment, state);
134
+ }
135
+ }
136
+ function calculateNewLineConfig(node) {
137
+ const tagId = node.tagId;
138
+ const depthMap = node.depthMap;
139
+ if (tagId !== TAG_LI && depthMap[TAG_LI] > 0 || tagId !== TAG_BLOCKQUOTE && depthMap[TAG_BLOCKQUOTE] > 0) {
140
+ return NO_SPACING;
141
+ }
142
+ let currParent = node.parent;
143
+ while (currParent) {
144
+ if (currParent.tagHandler?.collapsesInnerWhiteSpace) {
145
+ return NO_SPACING;
146
+ }
147
+ currParent = currParent.parent;
148
+ }
149
+ if (node.tagHandler?.spacing) {
150
+ return node.tagHandler?.spacing;
151
+ }
152
+ return DEFAULT_BLOCK_SPACING;
153
+ }
154
+
155
+ function resolveUrl(url, origin) {
156
+ if (!url)
157
+ return url;
158
+ if (url.startsWith("//")) {
159
+ return `https:${url}`;
160
+ }
161
+ if (origin) {
162
+ if (url.startsWith("/") && origin) {
163
+ const cleanOrigin = origin.endsWith("/") ? origin.slice(0, -1) : origin;
164
+ return `${cleanOrigin}${url}`;
165
+ }
166
+ if (url.startsWith("./")) {
167
+ return `${origin}/${url.slice(2)}`;
168
+ }
169
+ if (!url.startsWith("http")) {
170
+ const cleanUrl = url.startsWith("/") ? url.slice(1) : url;
171
+ return `${origin}/${cleanUrl}`;
172
+ }
173
+ }
174
+ return url;
175
+ }
176
+ function isInsideTableCell(node) {
177
+ return node.depthMap[TAG_TD] > 0;
178
+ }
179
+ function getLanguageFromClass(className) {
180
+ if (!className)
181
+ return "";
182
+ const langParts = className.split(" ").map((c) => c.split("language-")[1]).filter(Boolean);
183
+ return langParts.length > 0 ? langParts[0].trim() : "";
184
+ }
185
+ function handleHeading(depth) {
186
+ return {
187
+ enter: ({ node }) => {
188
+ if (node.depthMap[TAG_A]) {
189
+ return `<h${depth}>`;
190
+ }
191
+ return `${"#".repeat(depth)} `;
192
+ },
193
+ exit: ({ node }) => {
194
+ if (node.depthMap[TAG_A]) {
195
+ return `</h${depth}>`;
196
+ }
197
+ },
198
+ collapsesInnerWhiteSpace: true
199
+ };
200
+ }
201
+ const Strong = {
202
+ enter: ({ node }) => {
203
+ if (node.depthMap[TAG_B] > 1) {
204
+ return "";
205
+ }
206
+ return MARKDOWN_STRONG;
207
+ },
208
+ exit: ({ node }) => {
209
+ if (node.depthMap[TAG_B] > 1) {
210
+ return "";
211
+ }
212
+ return MARKDOWN_STRONG;
213
+ },
214
+ collapsesInnerWhiteSpace: true,
215
+ spacing: NO_SPACING,
216
+ isInline: true
217
+ };
218
+ const Emphasis = {
219
+ enter: ({ node }) => {
220
+ if (node.depthMap[TAG_I] > 1) {
221
+ return "";
222
+ }
223
+ return MARKDOWN_EMPHASIS;
224
+ },
225
+ exit: ({ node }) => {
226
+ if (node.depthMap[TAG_I] > 1) {
227
+ return "";
228
+ }
229
+ return MARKDOWN_EMPHASIS;
230
+ },
231
+ collapsesInnerWhiteSpace: true,
232
+ spacing: NO_SPACING,
233
+ isInline: true
234
+ };
235
+ const tagHandlers = {
236
+ // Numeric tag constants
237
+ [TAG_HEAD]: {
238
+ // No special handling for head - plugins will handle frontmatter
239
+ spacing: NO_SPACING,
240
+ collapsesInnerWhiteSpace: true
241
+ },
242
+ [TAG_DETAILS]: {
243
+ enter: () => "<details>",
244
+ exit: () => "</details>\n\n"
245
+ },
246
+ [TAG_SUMMARY]: {
247
+ enter: () => "<summary>",
248
+ exit: () => "</summary>\n\n"
249
+ },
250
+ [TAG_TITLE]: {
251
+ // No special handling for title - plugins will handle frontmatter
252
+ collapsesInnerWhiteSpace: true,
253
+ isNonNesting: true,
254
+ spacing: NO_SPACING
255
+ },
256
+ [TAG_SCRIPT]: {
257
+ excludesTextNodes: true,
258
+ isNonNesting: true
259
+ },
260
+ [TAG_STYLE]: {
261
+ isNonNesting: true,
262
+ excludesTextNodes: true
263
+ },
264
+ [TAG_META]: {
265
+ // No special handling for meta - plugins will handle frontmatter
266
+ collapsesInnerWhiteSpace: true,
267
+ isSelfClosing: true,
268
+ spacing: NO_SPACING
269
+ },
270
+ [TAG_BR]: {
271
+ enter: ({ node }) => {
272
+ return isInsideTableCell(node) ? "<br>" : void 0;
273
+ },
274
+ isSelfClosing: true,
275
+ spacing: NO_SPACING,
276
+ collapsesInnerWhiteSpace: true,
277
+ isInline: true
278
+ },
279
+ [TAG_H1]: handleHeading(1),
280
+ [TAG_H2]: handleHeading(2),
281
+ [TAG_H3]: handleHeading(3),
282
+ [TAG_H4]: handleHeading(4),
283
+ [TAG_H5]: handleHeading(5),
284
+ [TAG_H6]: handleHeading(6),
285
+ [TAG_HR]: {
286
+ enter: () => MARKDOWN_HORIZONTAL_RULE,
287
+ isSelfClosing: true
288
+ },
289
+ [TAG_STRONG]: Strong,
290
+ [TAG_B]: Strong,
291
+ [TAG_EM]: Emphasis,
292
+ [TAG_I]: Emphasis,
293
+ [TAG_DEL]: {
294
+ enter: () => MARKDOWN_STRIKETHROUGH,
295
+ exit: () => MARKDOWN_STRIKETHROUGH,
296
+ collapsesInnerWhiteSpace: true,
297
+ spacing: NO_SPACING,
298
+ isInline: true
299
+ },
300
+ [TAG_SUB]: {
301
+ enter: () => "<sub>",
302
+ exit: () => "</sub>",
303
+ collapsesInnerWhiteSpace: true,
304
+ spacing: NO_SPACING,
305
+ isInline: true
306
+ },
307
+ [TAG_SUP]: {
308
+ enter: () => "<sup>",
309
+ exit: () => "</sup>",
310
+ collapsesInnerWhiteSpace: true,
311
+ spacing: NO_SPACING,
312
+ isInline: true
313
+ },
314
+ [TAG_INS]: {
315
+ enter: () => "<ins>",
316
+ exit: () => "</ins>",
317
+ collapsesInnerWhiteSpace: true,
318
+ spacing: NO_SPACING,
319
+ isInline: true
320
+ },
321
+ [TAG_BLOCKQUOTE]: {
322
+ enter: ({ node }) => {
323
+ const depth = node.depthMap[TAG_BLOCKQUOTE] || 1;
324
+ let prefix = "> ".repeat(depth);
325
+ if (node.depthMap[TAG_LI] > 0) {
326
+ prefix = `
327
+ ${" ".repeat(node.depthMap[TAG_LI])}${prefix}`;
328
+ }
329
+ return prefix;
330
+ },
331
+ spacing: BLOCKQUOTE_SPACING
332
+ },
333
+ [TAG_CODE]: {
334
+ enter: ({ node }) => {
335
+ if ((node.depthMap[TAG_PRE] || 0) > 0) {
336
+ const language = getLanguageFromClass(node.attributes?.class);
337
+ return `${MARKDOWN_CODE_BLOCK}${language}
338
+ `;
339
+ }
340
+ return MARKDOWN_INLINE_CODE;
341
+ },
342
+ exit: ({ node }) => {
343
+ return node.depthMap[TAG_PRE] > 0 ? `
344
+ ${MARKDOWN_CODE_BLOCK}` : MARKDOWN_INLINE_CODE;
345
+ },
346
+ collapsesInnerWhiteSpace: true,
347
+ spacing: NO_SPACING,
348
+ isInline: true
349
+ },
350
+ [TAG_UL]: {
351
+ enter: ({ node }) => isInsideTableCell(node) ? "<ul>" : void 0,
352
+ exit: ({ node }) => isInsideTableCell(node) ? "</ul>" : void 0
353
+ },
354
+ [TAG_LI]: {
355
+ enter: ({ node }) => {
356
+ if (isInsideTableCell(node)) {
357
+ return "<li>";
358
+ }
359
+ const depth = (node.depthMap[TAG_UL] || 0) + (node.depthMap[TAG_OL] || 0) - 1;
360
+ const isOrdered = node.parent?.tagId === TAG_OL;
361
+ const indent = " ".repeat(Math.max(0, depth));
362
+ const marker = isOrdered ? `${node.index + 1}. ` : "- ";
363
+ return `${indent}${marker}`;
364
+ },
365
+ exit: ({ node }) => isInsideTableCell(node) ? "</li>" : void 0,
366
+ spacing: LIST_ITEM_SPACING
367
+ },
368
+ [TAG_A]: {
369
+ enter: ({ node }) => {
370
+ if (node.attributes?.href) {
371
+ return "[";
372
+ }
373
+ },
374
+ exit: ({ node, state }) => {
375
+ if (!node.attributes?.href) {
376
+ return "";
377
+ }
378
+ const href = resolveUrl(node.attributes?.href || "", state.options?.origin);
379
+ let title = node.attributes?.title;
380
+ const lastContent = state.lastContentCache;
381
+ if (lastContent === title) {
382
+ title = "";
383
+ }
384
+ return title ? `](${href} "${title}")` : `](${href})`;
385
+ },
386
+ collapsesInnerWhiteSpace: true,
387
+ spacing: NO_SPACING,
388
+ isInline: true
389
+ },
390
+ [TAG_IMG]: {
391
+ enter: ({ node, state }) => {
392
+ const alt = node.attributes?.alt || "";
393
+ const src = resolveUrl(node.attributes?.src || "", state.options?.origin);
394
+ return `![${alt}](${src})`;
395
+ },
396
+ collapsesInnerWhiteSpace: true,
397
+ isSelfClosing: true,
398
+ spacing: NO_SPACING,
399
+ isInline: true
400
+ },
401
+ [TAG_TABLE]: {
402
+ enter: ({ node, state }) => {
403
+ if (isInsideTableCell(node)) {
404
+ return "<table>";
405
+ }
406
+ if (node.depthMap[TAG_TABLE] <= 1) {
407
+ state.tableRenderedTable = false;
408
+ }
409
+ state.tableColumnAlignments = [];
410
+ },
411
+ exit: ({ node }) => isInsideTableCell(node) ? "</table>" : void 0
412
+ },
413
+ [TAG_THEAD]: {
414
+ enter: ({ node }) => {
415
+ if (isInsideTableCell(node)) {
416
+ return "<thead>";
417
+ }
418
+ },
419
+ exit: ({ node }) => isInsideTableCell(node) ? "</thead>" : void 0,
420
+ spacing: TABLE_ROW_SPACING,
421
+ excludesTextNodes: true
422
+ },
423
+ [TAG_TR]: {
424
+ enter: ({ node, state }) => {
425
+ if (isInsideTableCell(node)) {
426
+ return "<tr>";
427
+ }
428
+ state.tableCurrentRowCells = 0;
429
+ return "| ";
430
+ },
431
+ exit: ({ node, state }) => {
432
+ if (isInsideTableCell(node) || node.depthMap[TAG_TABLE] > 1) {
433
+ return "</tr>";
434
+ }
435
+ if (!state.tableRenderedTable) {
436
+ state.tableRenderedTable = true;
437
+ const alignments = state.tableColumnAlignments;
438
+ while (alignments.length < state.tableCurrentRowCells) {
439
+ alignments.push("");
440
+ }
441
+ const alignmentMarkers = alignments.map((align) => {
442
+ switch (align) {
443
+ case "left":
444
+ return ":---";
445
+ case "center":
446
+ return ":---:";
447
+ case "right":
448
+ return "---:";
449
+ default:
450
+ return "---";
451
+ }
452
+ });
453
+ return ` |
454
+ | ${alignmentMarkers.join(" | ")} |`;
455
+ }
456
+ return " |";
457
+ },
458
+ excludesTextNodes: true,
459
+ spacing: TABLE_ROW_SPACING
460
+ },
461
+ [TAG_TH]: {
462
+ enter: ({ node, state }) => {
463
+ if (node.depthMap[TAG_TABLE] > 1) {
464
+ return "<th>";
465
+ }
466
+ const align = node.attributes?.align?.toLowerCase();
467
+ if (align) {
468
+ state.tableColumnAlignments.push(align);
469
+ } else if (state.tableColumnAlignments.length <= state.tableCurrentRowCells) {
470
+ state.tableColumnAlignments.push("");
471
+ }
472
+ return node.index === 0 ? "" : " | ";
473
+ },
474
+ exit: ({ node, state }) => {
475
+ if (node.depthMap[TAG_TABLE] > 1) {
476
+ return "</th>";
477
+ }
478
+ state.tableCurrentRowCells++;
479
+ },
480
+ collapsesInnerWhiteSpace: true,
481
+ spacing: NO_SPACING
482
+ },
483
+ [TAG_TD]: {
484
+ enter: ({ node }) => {
485
+ if (node.depthMap[TAG_TABLE] > 1) {
486
+ return "<td>";
487
+ }
488
+ return node.index === 0 ? "" : " | ";
489
+ },
490
+ exit: ({ node, state }) => {
491
+ if (node.depthMap[TAG_TABLE] > 1) {
492
+ return "</td>";
493
+ }
494
+ state.tableCurrentRowCells++;
495
+ },
496
+ collapsesInnerWhiteSpace: true,
497
+ spacing: NO_SPACING
498
+ },
499
+ [TAG_P]: {},
500
+ [TAG_DIV]: {},
501
+ [TAG_SPAN]: {
502
+ collapsesInnerWhiteSpace: true,
503
+ spacing: NO_SPACING,
504
+ isInline: true
505
+ },
506
+ [TAG_NAV]: {},
507
+ [TAG_LABEL]: {
508
+ collapsesInnerWhiteSpace: true,
509
+ spacing: NO_SPACING,
510
+ isInline: true
511
+ },
512
+ [TAG_BUTTON]: {
513
+ collapsesInnerWhiteSpace: true,
514
+ isInline: true
515
+ },
516
+ [TAG_BODY]: { spacing: NO_SPACING },
517
+ [TAG_CENTER]: {
518
+ // if in table cell we preserve
519
+ enter: ({ node }) => {
520
+ if (node.depthMap[TAG_TABLE] > 1) {
521
+ return "<center>";
522
+ }
523
+ },
524
+ exit: ({ node }) => {
525
+ if (node.depthMap[TAG_TABLE] > 1) {
526
+ return "</center>";
527
+ }
528
+ },
529
+ spacing: NO_SPACING
530
+ },
531
+ [TAG_TBODY]: {
532
+ spacing: NO_SPACING,
533
+ excludesTextNodes: true
534
+ },
535
+ [TAG_TFOOT]: {
536
+ spacing: TABLE_ROW_SPACING,
537
+ excludesTextNodes: true
538
+ },
539
+ [TAG_KBD]: {
540
+ enter: () => "`",
541
+ exit: () => "`",
542
+ collapsesInnerWhiteSpace: true,
543
+ spacing: NO_SPACING,
544
+ isInline: true
545
+ },
546
+ [TAG_FOOTER]: {
547
+ spacing: NO_SPACING
548
+ },
549
+ [TAG_FORM]: {
550
+ spacing: NO_SPACING
551
+ },
552
+ [TAG_LINK]: {
553
+ isSelfClosing: true,
554
+ spacing: NO_SPACING,
555
+ collapsesInnerWhiteSpace: true,
556
+ isInline: true
557
+ },
558
+ [TAG_AREA]: {
559
+ isSelfClosing: true,
560
+ spacing: NO_SPACING,
561
+ isInline: true
562
+ },
563
+ [TAG_BASE]: {
564
+ isSelfClosing: true,
565
+ spacing: NO_SPACING,
566
+ isInline: true
567
+ },
568
+ [TAG_COL]: {
569
+ isSelfClosing: true,
570
+ spacing: NO_SPACING
571
+ },
572
+ [TAG_EMBED]: {
573
+ isSelfClosing: true,
574
+ spacing: NO_SPACING
575
+ },
576
+ [TAG_INPUT]: {
577
+ isSelfClosing: true,
578
+ spacing: NO_SPACING,
579
+ isInline: true
580
+ },
581
+ [TAG_KEYGEN]: {
582
+ isSelfClosing: true,
583
+ spacing: NO_SPACING,
584
+ isInline: true
585
+ },
586
+ [TAG_PARAM]: {
587
+ isSelfClosing: true,
588
+ spacing: NO_SPACING
589
+ },
590
+ [TAG_SOURCE]: {
591
+ isSelfClosing: true,
592
+ spacing: NO_SPACING
593
+ },
594
+ [TAG_TRACK]: {
595
+ isSelfClosing: true,
596
+ spacing: NO_SPACING
597
+ },
598
+ [TAG_WBR]: {
599
+ isSelfClosing: true,
600
+ spacing: NO_SPACING,
601
+ isInline: true
602
+ },
603
+ [TAG_SVG]: {
604
+ spacing: NO_SPACING
605
+ },
606
+ [TAG_SELECT]: {
607
+ spacing: NO_SPACING
608
+ },
609
+ [TAG_TEXTAREA]: {
610
+ isNonNesting: true,
611
+ spacing: NO_SPACING
612
+ },
613
+ [TAG_OPTION]: {
614
+ isNonNesting: true,
615
+ spacing: NO_SPACING
616
+ },
617
+ [TAG_FIELDSET]: {
618
+ spacing: NO_SPACING
619
+ },
620
+ [TAG_LEGEND]: {
621
+ spacing: NO_SPACING
622
+ },
623
+ [TAG_AUDIO]: {
624
+ spacing: NO_SPACING
625
+ },
626
+ [TAG_VIDEO]: {
627
+ spacing: NO_SPACING
628
+ },
629
+ [TAG_CANVAS]: {
630
+ spacing: NO_SPACING
631
+ },
632
+ [TAG_IFRAME]: {
633
+ isNonNesting: true,
634
+ spacing: NO_SPACING
635
+ },
636
+ [TAG_MAP]: {
637
+ spacing: NO_SPACING
638
+ },
639
+ [TAG_DIALOG]: {
640
+ spacing: NO_SPACING
641
+ },
642
+ [TAG_METER]: {
643
+ spacing: NO_SPACING
644
+ },
645
+ [TAG_PROGRESS]: {
646
+ spacing: NO_SPACING
647
+ },
648
+ [TAG_TEMPLATE]: {
649
+ spacing: NO_SPACING
650
+ },
651
+ [TAG_ABBR]: {
652
+ enter: () => "",
653
+ exit: () => "",
654
+ collapsesInnerWhiteSpace: true,
655
+ spacing: NO_SPACING,
656
+ isInline: true
657
+ },
658
+ [TAG_MARK]: {
659
+ enter: () => "<mark>",
660
+ exit: () => "</mark>",
661
+ collapsesInnerWhiteSpace: true,
662
+ spacing: NO_SPACING,
663
+ isInline: true
664
+ },
665
+ [TAG_Q]: {
666
+ enter: () => '"',
667
+ exit: () => '"',
668
+ collapsesInnerWhiteSpace: true,
669
+ spacing: NO_SPACING,
670
+ isInline: true
671
+ },
672
+ [TAG_SAMP]: {
673
+ enter: () => "`",
674
+ exit: () => "`",
675
+ collapsesInnerWhiteSpace: true,
676
+ spacing: NO_SPACING,
677
+ isInline: true
678
+ },
679
+ [TAG_SMALL]: {
680
+ enter: () => "",
681
+ exit: () => "",
682
+ collapsesInnerWhiteSpace: true,
683
+ spacing: NO_SPACING,
684
+ isInline: true
685
+ },
686
+ [TAG_NOSCRIPT]: {
687
+ excludesTextNodes: true,
688
+ spacing: NO_SPACING
689
+ },
690
+ [TAG_NOFRAMES]: {
691
+ isNonNesting: true,
692
+ spacing: NO_SPACING
693
+ },
694
+ [TAG_XMP]: {
695
+ isNonNesting: true,
696
+ spacing: NO_SPACING
697
+ },
698
+ [TAG_PLAINTEXT]: {
699
+ isNonNesting: true,
700
+ spacing: NO_SPACING
701
+ },
702
+ [TAG_ASIDE]: {
703
+ spacing: NO_SPACING
704
+ },
705
+ [TAG_U]: {
706
+ enter: () => {
707
+ return "<u>";
708
+ },
709
+ exit: () => {
710
+ return "</u>";
711
+ },
712
+ collapsesInnerWhiteSpace: true,
713
+ spacing: NO_SPACING,
714
+ isInline: true
715
+ },
716
+ [TAG_CITE]: {
717
+ enter: () => "*",
718
+ exit: () => "*",
719
+ collapsesInnerWhiteSpace: true,
720
+ spacing: NO_SPACING,
721
+ isInline: true
722
+ },
723
+ [TAG_DFN]: {
724
+ enter: () => "**",
725
+ exit: () => "**",
726
+ collapsesInnerWhiteSpace: true,
727
+ spacing: NO_SPACING,
728
+ isInline: true
729
+ },
730
+ [TAG_VAR]: {
731
+ enter: () => "`",
732
+ exit: () => "`",
733
+ collapsesInnerWhiteSpace: true,
734
+ spacing: NO_SPACING,
735
+ isInline: true
736
+ },
737
+ [TAG_TIME]: {
738
+ enter: () => "",
739
+ exit: () => "",
740
+ collapsesInnerWhiteSpace: true,
741
+ spacing: NO_SPACING,
742
+ isInline: true
743
+ },
744
+ [TAG_BDO]: {
745
+ enter: () => "",
746
+ exit: () => "",
747
+ collapsesInnerWhiteSpace: true,
748
+ spacing: NO_SPACING,
749
+ isInline: true
750
+ },
751
+ [TAG_RUBY]: {
752
+ enter: () => "",
753
+ exit: () => "",
754
+ collapsesInnerWhiteSpace: true,
755
+ spacing: NO_SPACING,
756
+ isInline: true
757
+ },
758
+ [TAG_RT]: {
759
+ enter: () => "",
760
+ exit: () => "",
761
+ collapsesInnerWhiteSpace: true,
762
+ spacing: NO_SPACING,
763
+ isInline: true
764
+ },
765
+ [TAG_RP]: {
766
+ enter: () => "",
767
+ exit: () => "",
768
+ collapsesInnerWhiteSpace: true,
769
+ spacing: NO_SPACING,
770
+ isInline: true
771
+ },
772
+ [TAG_ADDRESS]: {
773
+ enter: () => "<address>",
774
+ exit: () => "</address>",
775
+ spacing: NO_SPACING,
776
+ collapsesInnerWhiteSpace: true
777
+ },
778
+ [TAG_DL]: {
779
+ spacing: NO_SPACING,
780
+ enter: () => "<dl>",
781
+ exit: () => "</dl>"
782
+ },
783
+ [TAG_DT]: {
784
+ // Definition term
785
+ enter: () => "<dt>",
786
+ exit: () => "</dt>",
787
+ collapsesInnerWhiteSpace: true,
788
+ spacing: [0, 1]
789
+ },
790
+ [TAG_DD]: {
791
+ // Definition term
792
+ enter: () => "<dd>",
793
+ exit: () => "</dd>",
794
+ spacing: [0, 1]
795
+ }
796
+ };
797
+
798
+ function decodeHTMLEntities(text) {
799
+ let result = "";
800
+ let i = 0;
801
+ while (i < text.length) {
802
+ if (text[i] === "&") {
803
+ let match = false;
804
+ for (const [entity, replacement] of Object.entries(HTML_ENTITIES)) {
805
+ if (text.startsWith(entity, i)) {
806
+ result += replacement;
807
+ i += entity.length;
808
+ match = true;
809
+ break;
810
+ }
811
+ }
812
+ if (match)
813
+ continue;
814
+ if (i + 2 < text.length && text[i + 1] === "#") {
815
+ const start = i;
816
+ i += 2;
817
+ const isHex = text[i] === "x" || text[i] === "X";
818
+ if (isHex)
819
+ i++;
820
+ const numStart = i;
821
+ while (i < text.length && text[i] !== ";") {
822
+ i++;
823
+ }
824
+ if (i < text.length && text[i] === ";") {
825
+ const numStr = text.substring(numStart, i);
826
+ const base = isHex ? 16 : 10;
827
+ try {
828
+ const codePoint = Number.parseInt(numStr, base);
829
+ if (!Number.isNaN(codePoint)) {
830
+ result += String.fromCodePoint(codePoint);
831
+ i++;
832
+ continue;
833
+ }
834
+ } catch {
835
+ }
836
+ }
837
+ i = start;
838
+ }
839
+ }
840
+ result += text[i];
841
+ i++;
842
+ }
843
+ return result;
844
+ }
845
+ function traverseUpToFirstBlockNode(node) {
846
+ let firstBlockParent = node;
847
+ const parentsToIncrement = [firstBlockParent];
848
+ while (firstBlockParent.tagHandler?.isInline) {
849
+ if (!firstBlockParent.parent) {
850
+ break;
851
+ }
852
+ firstBlockParent = firstBlockParent.parent;
853
+ parentsToIncrement.push(firstBlockParent);
854
+ }
855
+ return parentsToIncrement;
856
+ }
857
+
858
+ const LT_CHAR = 60;
859
+ const GT_CHAR = 62;
860
+ const SLASH_CHAR = 47;
861
+ const EQUALS_CHAR = 61;
862
+ const QUOTE_CHAR = 34;
863
+ const APOS_CHAR = 39;
864
+ const EXCLAMATION_CHAR = 33;
865
+ const AMPERSAND_CHAR = 38;
866
+ const BACKSLASH_CHAR = 92;
867
+ const DASH_CHAR = 45;
868
+ const SPACE_CHAR = 32;
869
+ const TAB_CHAR = 9;
870
+ const NEWLINE_CHAR = 10;
871
+ const CARRIAGE_RETURN_CHAR = 13;
872
+ const EMPTY_ATTRIBUTES = Object.freeze({});
873
+ function copyDepthMap(depthMap) {
874
+ return new Uint8Array(depthMap);
875
+ }
876
+ function isWhitespace(charCode) {
877
+ return charCode === SPACE_CHAR || charCode === TAB_CHAR || charCode === NEWLINE_CHAR || charCode === CARRIAGE_RETURN_CHAR;
878
+ }
879
+ function runProcessAttributesHooks(node, state) {
880
+ if (!state.plugins?.length)
881
+ return;
882
+ for (const plugin of state.plugins) {
883
+ if (plugin.processAttributes) {
884
+ plugin.processAttributes(node, state);
885
+ }
886
+ }
887
+ }
888
+ function parseHTML(htmlChunk, state, handleEvent) {
889
+ let textBuffer = "";
890
+ state.depthMap ??= new Uint8Array(MAX_TAG_ID);
891
+ state.depth ??= 0;
892
+ state.lastCharWasWhitespace ??= true;
893
+ state.justClosedTag ??= false;
894
+ state.isFirstTextInElement ??= false;
895
+ let i = 0;
896
+ const chunkLength = htmlChunk.length;
897
+ while (i < chunkLength) {
898
+ const currentCharCode = htmlChunk.charCodeAt(i);
899
+ if (currentCharCode !== LT_CHAR) {
900
+ if (currentCharCode === AMPERSAND_CHAR) {
901
+ state.hasEncodedHtmlEntity = true;
902
+ }
903
+ if (isWhitespace(currentCharCode)) {
904
+ const inPreTag = state.depthMap[TAG_PRE] > 0;
905
+ if (state.justClosedTag) {
906
+ state.justClosedTag = false;
907
+ state.lastCharWasWhitespace = false;
908
+ }
909
+ if (!inPreTag && state.lastCharWasWhitespace) {
910
+ i++;
911
+ continue;
912
+ }
913
+ if (inPreTag) {
914
+ textBuffer += htmlChunk[i];
915
+ } else {
916
+ if (currentCharCode === SPACE_CHAR || !state.lastCharWasWhitespace) {
917
+ textBuffer += " ";
918
+ }
919
+ }
920
+ state.lastCharWasWhitespace = true;
921
+ state.textBufferContainsWhitespace = true;
922
+ } else {
923
+ state.textBufferContainsNonWhitespace = true;
924
+ state.lastCharWasWhitespace = false;
925
+ state.justClosedTag = false;
926
+ if (currentCharCode === 124 && state.depthMap[TAG_TABLE]) {
927
+ textBuffer += "\\|";
928
+ } else if (currentCharCode === 96 && (state.depthMap[TAG_CODE] || state.depthMap[TAG_PRE])) {
929
+ textBuffer += "\\`";
930
+ } else if (currentCharCode === 91 && state.depthMap[TAG_A]) {
931
+ textBuffer += "\\[";
932
+ } else if (currentCharCode === 93 && state.depthMap[TAG_A]) {
933
+ textBuffer += "\\]";
934
+ } else if (currentCharCode === 62 && state.depthMap[TAG_BLOCKQUOTE]) {
935
+ textBuffer += "\\>";
936
+ } else {
937
+ textBuffer += htmlChunk[i];
938
+ }
939
+ }
940
+ i++;
941
+ continue;
942
+ }
943
+ if (i + 1 >= chunkLength) {
944
+ textBuffer += htmlChunk[i];
945
+ break;
946
+ }
947
+ const nextCharCode = htmlChunk.charCodeAt(i + 1);
948
+ if (nextCharCode === EXCLAMATION_CHAR) {
949
+ if (textBuffer.length > 0) {
950
+ processTextBuffer(textBuffer, state, handleEvent);
951
+ textBuffer = "";
952
+ }
953
+ const result = processCommentOrDoctype(htmlChunk, i);
954
+ if (result.complete) {
955
+ i = result.newPosition;
956
+ } else {
957
+ textBuffer += result.remainingText;
958
+ break;
959
+ }
960
+ } else if (nextCharCode === SLASH_CHAR) {
961
+ if (textBuffer.length > 0) {
962
+ processTextBuffer(textBuffer, state, handleEvent);
963
+ textBuffer = "";
964
+ }
965
+ const result = processClosingTag(htmlChunk, i, state, handleEvent);
966
+ if (result.complete) {
967
+ i = result.newPosition;
968
+ } else {
969
+ textBuffer += result.remainingText;
970
+ break;
971
+ }
972
+ } else {
973
+ let i2 = i + 1;
974
+ const tagNameStart = i2;
975
+ let tagNameEnd = -1;
976
+ const chunkLength2 = htmlChunk.length;
977
+ while (i2 < chunkLength2) {
978
+ const c = htmlChunk.charCodeAt(i2);
979
+ if (isWhitespace(c) || c === SLASH_CHAR || c === GT_CHAR) {
980
+ tagNameEnd = i2;
981
+ break;
982
+ }
983
+ i2++;
984
+ }
985
+ if (tagNameEnd === -1) {
986
+ textBuffer += htmlChunk.substring(i);
987
+ break;
988
+ }
989
+ const tagName = htmlChunk.substring(tagNameStart, tagNameEnd).toLowerCase();
990
+ if (!tagName) {
991
+ i = tagNameEnd;
992
+ break;
993
+ }
994
+ const tagId = TagIdMap[tagName] ?? -1;
995
+ i2 = tagNameEnd;
996
+ if (state.currentNode?.tagHandler?.isNonNesting) {
997
+ if (tagId !== state.currentNode?.tagId) {
998
+ textBuffer += htmlChunk[i++];
999
+ continue;
1000
+ }
1001
+ }
1002
+ if (textBuffer.length > 0) {
1003
+ processTextBuffer(textBuffer, state, handleEvent);
1004
+ textBuffer = "";
1005
+ }
1006
+ const result = processOpeningTag(tagName, tagId, htmlChunk, i2, state, handleEvent);
1007
+ if (result.skip) {
1008
+ textBuffer += htmlChunk[i++];
1009
+ } else if (result.complete) {
1010
+ i = result.newPosition;
1011
+ if (!result.selfClosing) {
1012
+ state.isFirstTextInElement = true;
1013
+ }
1014
+ } else {
1015
+ textBuffer += result.remainingText;
1016
+ break;
1017
+ }
1018
+ }
1019
+ }
1020
+ return textBuffer;
1021
+ }
1022
+ function processTextBuffer(textBuffer, state, handleEvent) {
1023
+ const containsNonWhitespace = state.textBufferContainsNonWhitespace;
1024
+ const containsWhitespace = state.textBufferContainsWhitespace;
1025
+ state.textBufferContainsNonWhitespace = false;
1026
+ state.textBufferContainsWhitespace = false;
1027
+ if (!state.currentNode || state.currentNode?.tagHandler?.excludesTextNodes) {
1028
+ return;
1029
+ }
1030
+ const inPreTag = state.depthMap[TAG_PRE] > 0;
1031
+ if (!inPreTag && !containsNonWhitespace && !state.currentNode.childTextNodeIndex) {
1032
+ return;
1033
+ }
1034
+ let text = textBuffer;
1035
+ if (text.length === 0) {
1036
+ return;
1037
+ }
1038
+ const parentsToIncrement = traverseUpToFirstBlockNode(state.currentNode);
1039
+ const firstBlockParent = parentsToIncrement[parentsToIncrement.length - 1];
1040
+ if (containsWhitespace && !firstBlockParent?.childTextNodeIndex) {
1041
+ let start = 0;
1042
+ while (start < text.length && (inPreTag ? text.charCodeAt(start) === 10 || text.charCodeAt(start) === 13 : isWhitespace(text.charCodeAt(start)))) {
1043
+ start++;
1044
+ }
1045
+ if (start > 0) {
1046
+ text = text.substring(start);
1047
+ }
1048
+ }
1049
+ if (state.hasEncodedHtmlEntity) {
1050
+ text = decodeHTMLEntities(String(text));
1051
+ state.hasEncodedHtmlEntity = false;
1052
+ }
1053
+ const textNode = {
1054
+ type: TEXT_NODE,
1055
+ value: text,
1056
+ parent: state.currentNode,
1057
+ regionId: state.currentNode?.regionId,
1058
+ index: state.currentNode.currentWalkIndex++,
1059
+ depth: state.depth,
1060
+ containsWhitespace
1061
+ };
1062
+ for (const parent of parentsToIncrement) {
1063
+ parent.childTextNodeIndex = (parent.childTextNodeIndex || 0) + 1;
1064
+ }
1065
+ handleEvent({ type: NodeEventEnter, node: textNode });
1066
+ state.lastTextNode = textNode;
1067
+ }
1068
+ function processClosingTag(htmlChunk, position, state, handleEvent) {
1069
+ let i = position + 2;
1070
+ const tagNameStart = i;
1071
+ const chunkLength = htmlChunk.length;
1072
+ let foundClose = false;
1073
+ while (i < chunkLength) {
1074
+ const charCode = htmlChunk.charCodeAt(i);
1075
+ if (charCode === GT_CHAR) {
1076
+ foundClose = true;
1077
+ break;
1078
+ }
1079
+ i++;
1080
+ }
1081
+ if (!foundClose) {
1082
+ return {
1083
+ complete: false,
1084
+ newPosition: position,
1085
+ remainingText: htmlChunk.substring(position)
1086
+ };
1087
+ }
1088
+ const tagName = htmlChunk.substring(tagNameStart, i).toLowerCase();
1089
+ const tagId = TagIdMap[tagName] ?? -1;
1090
+ if (state.currentNode?.tagHandler?.isNonNesting && tagId !== state.currentNode.tagId) {
1091
+ return {
1092
+ complete: false,
1093
+ newPosition: position,
1094
+ remainingText: htmlChunk.substring(position)
1095
+ };
1096
+ }
1097
+ let curr = state.currentNode;
1098
+ if (curr) {
1099
+ let match = curr.tagId !== tagId;
1100
+ while (curr && match) {
1101
+ closeNode(curr, state, handleEvent);
1102
+ curr = curr.parent;
1103
+ match = curr?.tagId !== tagId;
1104
+ }
1105
+ }
1106
+ if (curr) {
1107
+ closeNode(state.currentNode, state, handleEvent);
1108
+ }
1109
+ state.justClosedTag = true;
1110
+ return {
1111
+ complete: true,
1112
+ newPosition: i + 1,
1113
+ // Skip past '>'
1114
+ remainingText: ""
1115
+ };
1116
+ }
1117
+ function closeNode(node, state, handleEvent) {
1118
+ if (!node) {
1119
+ return;
1120
+ }
1121
+ if (node.tagId === TAG_A && !node.childTextNodeIndex) {
1122
+ const prefix = node.attributes?.title || node.attributes?.["aria-label"] || "";
1123
+ if (prefix) {
1124
+ node.childTextNodeIndex = 1;
1125
+ const textNode = {
1126
+ type: TEXT_NODE,
1127
+ value: prefix,
1128
+ parent: node,
1129
+ index: 0,
1130
+ depth: node.depth + 1
1131
+ };
1132
+ handleEvent({ type: NodeEventEnter, node: textNode });
1133
+ for (const parent of traverseUpToFirstBlockNode(node)) {
1134
+ parent.childTextNodeIndex = (parent.childTextNodeIndex || 0) + 1;
1135
+ }
1136
+ }
1137
+ }
1138
+ if (node.tagId) {
1139
+ state.depthMap[node.tagId] = Math.max(0, state.depthMap[node.tagId] - 1);
1140
+ }
1141
+ state.depth--;
1142
+ handleEvent({ type: NodeEventExit, node });
1143
+ state.currentNode = state.currentNode.parent;
1144
+ state.hasEncodedHtmlEntity = false;
1145
+ state.justClosedTag = true;
1146
+ }
1147
+ function processCommentOrDoctype(htmlChunk, position) {
1148
+ let i = position;
1149
+ const chunkLength = htmlChunk.length;
1150
+ if (i + 3 < chunkLength && htmlChunk.charCodeAt(i + 2) === DASH_CHAR && htmlChunk.charCodeAt(i + 3) === DASH_CHAR) {
1151
+ i += 4;
1152
+ while (i < chunkLength - 2) {
1153
+ if (htmlChunk.charCodeAt(i) === DASH_CHAR && htmlChunk.charCodeAt(i + 1) === DASH_CHAR && htmlChunk.charCodeAt(i + 2) === GT_CHAR) {
1154
+ i += 3;
1155
+ return {
1156
+ complete: true,
1157
+ newPosition: i,
1158
+ remainingText: ""
1159
+ };
1160
+ }
1161
+ i++;
1162
+ }
1163
+ return {
1164
+ complete: false,
1165
+ newPosition: position,
1166
+ remainingText: htmlChunk.substring(position)
1167
+ };
1168
+ } else {
1169
+ i += 2;
1170
+ while (i < chunkLength) {
1171
+ if (htmlChunk.charCodeAt(i) === GT_CHAR) {
1172
+ i++;
1173
+ return {
1174
+ complete: true,
1175
+ newPosition: i,
1176
+ remainingText: ""
1177
+ };
1178
+ }
1179
+ i++;
1180
+ }
1181
+ return {
1182
+ complete: false,
1183
+ newPosition: i,
1184
+ remainingText: htmlChunk.substring(position, i)
1185
+ };
1186
+ }
1187
+ }
1188
+ function processOpeningTag(tagName, tagId, htmlChunk, i, state, handleEvent) {
1189
+ if (state.currentNode?.tagHandler?.isNonNesting) {
1190
+ closeNode(state.currentNode, state, handleEvent);
1191
+ }
1192
+ const tagHandler = tagHandlers[tagId];
1193
+ const result = processTagAttributes(htmlChunk, i, tagHandler);
1194
+ if (!result.complete) {
1195
+ return {
1196
+ complete: false,
1197
+ newPosition: i,
1198
+ remainingText: `<${tagName}${result.attrBuffer}`,
1199
+ selfClosing: false
1200
+ };
1201
+ }
1202
+ const currentTagCount = state.depthMap[tagId];
1203
+ state.depthMap[tagId] = currentTagCount + 1;
1204
+ state.depth++;
1205
+ i = result.newPosition;
1206
+ if (state.currentNode) {
1207
+ state.currentNode.currentWalkIndex = state.currentNode.currentWalkIndex || 0;
1208
+ }
1209
+ const currentWalkIndex = state.currentNode ? state.currentNode.currentWalkIndex++ : 0;
1210
+ const tag = {
1211
+ type: ELEMENT_NODE,
1212
+ name: tagName,
1213
+ attributes: result.attributes,
1214
+ parent: state.currentNode,
1215
+ depthMap: copyDepthMap(state.depthMap),
1216
+ depth: state.depth,
1217
+ index: currentWalkIndex,
1218
+ regionId: state.currentNode?.regionId,
1219
+ tagId,
1220
+ tagHandler
1221
+ };
1222
+ state.lastTextNode = tag;
1223
+ if (state.options?.plugins) {
1224
+ runProcessAttributesHooks(tag, state);
1225
+ }
1226
+ handleEvent({ type: NodeEventEnter, node: tag });
1227
+ const parentNode = tag;
1228
+ parentNode.currentWalkIndex = 0;
1229
+ state.currentNode = parentNode;
1230
+ state.hasEncodedHtmlEntity = false;
1231
+ if (result.selfClosing) {
1232
+ closeNode(tag, state, handleEvent);
1233
+ state.justClosedTag = true;
1234
+ } else {
1235
+ state.justClosedTag = false;
1236
+ }
1237
+ return {
1238
+ complete: true,
1239
+ newPosition: i,
1240
+ remainingText: "",
1241
+ selfClosing: result.selfClosing
1242
+ };
1243
+ }
1244
+ function processTagAttributes(htmlChunk, position, tagHandler) {
1245
+ let i = position;
1246
+ const chunkLength = htmlChunk.length;
1247
+ const selfClosing = tagHandler?.isSelfClosing || false;
1248
+ const attrStartPos = i;
1249
+ let insideQuote = false;
1250
+ let quoteChar = 0;
1251
+ let prevChar = 0;
1252
+ while (i < chunkLength) {
1253
+ const c = htmlChunk.charCodeAt(i);
1254
+ if (insideQuote) {
1255
+ if (c === quoteChar && prevChar !== BACKSLASH_CHAR) {
1256
+ insideQuote = false;
1257
+ }
1258
+ i++;
1259
+ continue;
1260
+ } else if (c === QUOTE_CHAR || c === APOS_CHAR) {
1261
+ insideQuote = true;
1262
+ quoteChar = c;
1263
+ } else if (c === SLASH_CHAR && i + 1 < chunkLength && htmlChunk.charCodeAt(i + 1) === GT_CHAR) {
1264
+ const attrStr = htmlChunk.substring(attrStartPos, i).trim();
1265
+ return {
1266
+ complete: true,
1267
+ newPosition: i + 2,
1268
+ attributes: parseAttributes(attrStr),
1269
+ selfClosing: true,
1270
+ attrBuffer: attrStr
1271
+ };
1272
+ } else if (c === GT_CHAR) {
1273
+ const attrStr = htmlChunk.substring(attrStartPos, i).trim();
1274
+ return {
1275
+ complete: true,
1276
+ newPosition: i + 1,
1277
+ attributes: parseAttributes(attrStr),
1278
+ selfClosing,
1279
+ attrBuffer: attrStr
1280
+ };
1281
+ }
1282
+ i++;
1283
+ prevChar = c;
1284
+ }
1285
+ return {
1286
+ complete: false,
1287
+ newPosition: i,
1288
+ attributes: EMPTY_ATTRIBUTES,
1289
+ selfClosing: false,
1290
+ attrBuffer: htmlChunk.substring(attrStartPos, i)
1291
+ };
1292
+ }
1293
+ function parseAttributes(attrStr) {
1294
+ if (!attrStr)
1295
+ return EMPTY_ATTRIBUTES;
1296
+ const result = {};
1297
+ const len = attrStr.length;
1298
+ let i = 0;
1299
+ const WHITESPACE = 0;
1300
+ const NAME = 1;
1301
+ const AFTER_NAME = 2;
1302
+ const BEFORE_VALUE = 3;
1303
+ const QUOTED_VALUE = 4;
1304
+ const UNQUOTED_VALUE = 5;
1305
+ let state = WHITESPACE;
1306
+ let nameStart = 0;
1307
+ let nameEnd = 0;
1308
+ let valueStart = 0;
1309
+ let quoteChar = 0;
1310
+ let name = "";
1311
+ while (i < len) {
1312
+ const charCode = attrStr.charCodeAt(i);
1313
+ const isSpace = isWhitespace(charCode);
1314
+ switch (state) {
1315
+ case WHITESPACE:
1316
+ if (!isSpace) {
1317
+ state = NAME;
1318
+ nameStart = i;
1319
+ }
1320
+ break;
1321
+ case NAME:
1322
+ if (charCode === EQUALS_CHAR || isSpace) {
1323
+ nameEnd = i;
1324
+ name = attrStr.substring(nameStart, nameEnd).toLowerCase();
1325
+ state = charCode === EQUALS_CHAR ? BEFORE_VALUE : AFTER_NAME;
1326
+ }
1327
+ break;
1328
+ case AFTER_NAME:
1329
+ if (charCode === EQUALS_CHAR) {
1330
+ state = BEFORE_VALUE;
1331
+ } else if (!isSpace) {
1332
+ result[name] = "";
1333
+ state = NAME;
1334
+ nameStart = i;
1335
+ }
1336
+ break;
1337
+ case BEFORE_VALUE:
1338
+ if (charCode === QUOTE_CHAR || charCode === APOS_CHAR) {
1339
+ quoteChar = charCode;
1340
+ state = QUOTED_VALUE;
1341
+ valueStart = i + 1;
1342
+ } else if (!isSpace) {
1343
+ state = UNQUOTED_VALUE;
1344
+ valueStart = i;
1345
+ }
1346
+ break;
1347
+ case QUOTED_VALUE:
1348
+ if (charCode === BACKSLASH_CHAR && i + 1 < len) {
1349
+ i++;
1350
+ } else if (charCode === quoteChar) {
1351
+ result[name] = attrStr.substring(valueStart, i);
1352
+ state = WHITESPACE;
1353
+ }
1354
+ break;
1355
+ case UNQUOTED_VALUE:
1356
+ if (isSpace || charCode === GT_CHAR) {
1357
+ result[name] = attrStr.substring(valueStart, i);
1358
+ state = WHITESPACE;
1359
+ }
1360
+ break;
1361
+ }
1362
+ i++;
1363
+ }
1364
+ if (name) {
1365
+ if (state === QUOTED_VALUE || state === UNQUOTED_VALUE) {
1366
+ result[name] = attrStr.substring(valueStart, i);
1367
+ } else if (state === NAME || state === AFTER_NAME || state === BEFORE_VALUE) {
1368
+ nameEnd = nameEnd || i;
1369
+ name = name || attrStr.substring(nameStart, nameEnd).toLowerCase();
1370
+ result[name] = "";
1371
+ }
1372
+ }
1373
+ return result;
1374
+ }
1375
+ function processPartialHTMLToMarkdown(partialHtml, state = {}) {
1376
+ state.depthMap ??= new Uint8Array(MAX_TAG_ID);
1377
+ state.plugins = [...state.options?.plugins || []];
1378
+ state.regionToggles ??= /* @__PURE__ */ new Map();
1379
+ state.regionContentBuffers ??= /* @__PURE__ */ new Map();
1380
+ state.regionToggles.set(0, true);
1381
+ state.regionContentBuffers.set(0, []);
1382
+ const fullState = state;
1383
+ function handleEvent(event) {
1384
+ for (const plugin of fullState.plugins || []) {
1385
+ const res = plugin.beforeNodeProcess?.(event, fullState);
1386
+ if (typeof res === "object" && res.skip) {
1387
+ return;
1388
+ }
1389
+ }
1390
+ if (event.node.type === TEXT_NODE) {
1391
+ processHtmlEventToMarkdown(event, fullState);
1392
+ return;
1393
+ }
1394
+ processHtmlEventToMarkdown(event, fullState);
1395
+ }
1396
+ const unprocessedHtml = parseHTML(partialHtml, fullState, handleEvent);
1397
+ const assembledContent = assembleBufferedContent(fullState);
1398
+ return { chunk: assembledContent, remainingHTML: unprocessedHtml };
1399
+ }
1400
+
1401
+ async function* streamHtmlToMarkdown(htmlStream, options = {}) {
1402
+ if (!htmlStream) {
1403
+ throw new Error("Invalid HTML stream provided");
1404
+ }
1405
+ const decoder = new TextDecoder();
1406
+ const reader = htmlStream.getReader();
1407
+ const state = {
1408
+ options
1409
+ };
1410
+ let remainingHtml = "";
1411
+ try {
1412
+ while (true) {
1413
+ const { done, value } = await reader.read();
1414
+ if (done) {
1415
+ break;
1416
+ }
1417
+ const htmlContent = `${remainingHtml}${typeof value === "string" ? value : decoder.decode(value, { stream: true })}`;
1418
+ const result = processPartialHTMLToMarkdown(htmlContent, state);
1419
+ if (result.chunk) {
1420
+ yield result.chunk;
1421
+ }
1422
+ remainingHtml = result.remainingHTML;
1423
+ }
1424
+ } finally {
1425
+ if (remainingHtml) {
1426
+ decoder.decode(new Uint8Array(0), { stream: false });
1427
+ }
1428
+ reader.releaseLock();
1429
+ }
1430
+ }
1431
+
1432
+ export { processPartialHTMLToMarkdown as p, streamHtmlToMarkdown as s };