comark 0.0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/README.md +104 -0
  2. package/dist/index.d.ts +4 -0
  3. package/dist/index.js +6 -0
  4. package/dist/internal/frontmatter.d.ts +16 -0
  5. package/dist/internal/frontmatter.js +43 -0
  6. package/dist/internal/parse/auto-close/index.d.ts +12 -0
  7. package/dist/internal/parse/auto-close/index.js +457 -0
  8. package/dist/internal/parse/auto-close/table.d.ts +4 -0
  9. package/dist/internal/parse/auto-close/table.js +161 -0
  10. package/dist/internal/parse/auto-unwrap.d.ts +20 -0
  11. package/dist/internal/parse/auto-unwrap.js +42 -0
  12. package/dist/internal/parse/html/html_block_rule.d.ts +2 -0
  13. package/dist/internal/parse/html/html_block_rule.js +60 -0
  14. package/dist/internal/parse/html/html_blocks.d.ts +2 -0
  15. package/dist/internal/parse/html/html_blocks.js +66 -0
  16. package/dist/internal/parse/html/html_inline_rule.d.ts +2 -0
  17. package/dist/internal/parse/html/html_inline_rule.js +43 -0
  18. package/dist/internal/parse/html/html_re.d.ts +3 -0
  19. package/dist/internal/parse/html/html_re.js +18 -0
  20. package/dist/internal/parse/html/index.d.ts +18 -0
  21. package/dist/internal/parse/html/index.js +122 -0
  22. package/dist/internal/parse/incremental.d.ts +12 -0
  23. package/dist/internal/parse/incremental.js +39 -0
  24. package/dist/internal/parse/token-processor.d.ts +9 -0
  25. package/dist/internal/parse/token-processor.js +803 -0
  26. package/dist/internal/props-validation.d.ts +12 -0
  27. package/dist/internal/props-validation.js +112 -0
  28. package/dist/internal/stringify/attributes.d.ts +21 -0
  29. package/dist/internal/stringify/attributes.js +67 -0
  30. package/dist/internal/stringify/handlers/a.d.ts +3 -0
  31. package/dist/internal/stringify/handlers/a.js +11 -0
  32. package/dist/internal/stringify/handlers/blockquote.d.ts +3 -0
  33. package/dist/internal/stringify/handlers/blockquote.js +18 -0
  34. package/dist/internal/stringify/handlers/br.d.ts +3 -0
  35. package/dist/internal/stringify/handlers/br.js +3 -0
  36. package/dist/internal/stringify/handlers/code.d.ts +3 -0
  37. package/dist/internal/stringify/handlers/code.js +11 -0
  38. package/dist/internal/stringify/handlers/comment.d.ts +3 -0
  39. package/dist/internal/stringify/handlers/comment.js +6 -0
  40. package/dist/internal/stringify/handlers/del.d.ts +3 -0
  41. package/dist/internal/stringify/handlers/del.js +4 -0
  42. package/dist/internal/stringify/handlers/emphesis.d.ts +3 -0
  43. package/dist/internal/stringify/handlers/emphesis.js +13 -0
  44. package/dist/internal/stringify/handlers/heading.d.ts +3 -0
  45. package/dist/internal/stringify/handlers/heading.js +7 -0
  46. package/dist/internal/stringify/handlers/hr.d.ts +3 -0
  47. package/dist/internal/stringify/handlers/hr.js +3 -0
  48. package/dist/internal/stringify/handlers/html.d.ts +3 -0
  49. package/dist/internal/stringify/handlers/html.js +73 -0
  50. package/dist/internal/stringify/handlers/img.d.ts +3 -0
  51. package/dist/internal/stringify/handlers/img.js +9 -0
  52. package/dist/internal/stringify/handlers/index.d.ts +2 -0
  53. package/dist/internal/stringify/handlers/index.js +56 -0
  54. package/dist/internal/stringify/handlers/li.d.ts +3 -0
  55. package/dist/internal/stringify/handlers/li.js +43 -0
  56. package/dist/internal/stringify/handlers/math.d.ts +3 -0
  57. package/dist/internal/stringify/handlers/math.js +8 -0
  58. package/dist/internal/stringify/handlers/mdc.d.ts +3 -0
  59. package/dist/internal/stringify/handlers/mdc.js +47 -0
  60. package/dist/internal/stringify/handlers/mermaid.d.ts +3 -0
  61. package/dist/internal/stringify/handlers/mermaid.js +8 -0
  62. package/dist/internal/stringify/handlers/ol.d.ts +3 -0
  63. package/dist/internal/stringify/handlers/ol.js +18 -0
  64. package/dist/internal/stringify/handlers/p.d.ts +3 -0
  65. package/dist/internal/stringify/handlers/p.js +8 -0
  66. package/dist/internal/stringify/handlers/pre.d.ts +3 -0
  67. package/dist/internal/stringify/handlers/pre.js +60 -0
  68. package/dist/internal/stringify/handlers/strong.d.ts +3 -0
  69. package/dist/internal/stringify/handlers/strong.js +13 -0
  70. package/dist/internal/stringify/handlers/table.d.ts +8 -0
  71. package/dist/internal/stringify/handlers/table.js +180 -0
  72. package/dist/internal/stringify/handlers/template.d.ts +3 -0
  73. package/dist/internal/stringify/handlers/template.js +14 -0
  74. package/dist/internal/stringify/handlers/ul.d.ts +3 -0
  75. package/dist/internal/stringify/handlers/ul.js +18 -0
  76. package/dist/internal/stringify/indent.d.ts +4 -0
  77. package/dist/internal/stringify/indent.js +8 -0
  78. package/dist/internal/stringify/state.d.ts +13 -0
  79. package/dist/internal/stringify/state.js +121 -0
  80. package/dist/internal/yaml.d.ts +12 -0
  81. package/dist/internal/yaml.js +51 -0
  82. package/dist/parse.d.ts +66 -0
  83. package/dist/parse.js +163 -0
  84. package/dist/plugins/alert.d.ts +2 -0
  85. package/dist/plugins/alert.js +66 -0
  86. package/dist/plugins/emoji.d.ts +3 -0
  87. package/dist/plugins/emoji.js +438 -0
  88. package/dist/plugins/headings.d.ts +48 -0
  89. package/dist/plugins/headings.js +85 -0
  90. package/dist/plugins/highlight.d.ts +71 -0
  91. package/dist/plugins/highlight.js +234 -0
  92. package/dist/plugins/math.d.ts +59 -0
  93. package/dist/plugins/math.js +263 -0
  94. package/dist/plugins/mermaid.d.ts +38 -0
  95. package/dist/plugins/mermaid.js +185 -0
  96. package/dist/plugins/security.d.ts +11 -0
  97. package/dist/plugins/security.js +32 -0
  98. package/dist/plugins/summary.d.ts +2 -0
  99. package/dist/plugins/summary.js +22 -0
  100. package/dist/plugins/task-list.d.ts +8 -0
  101. package/dist/plugins/task-list.js +117 -0
  102. package/dist/plugins/toc.d.ts +15 -0
  103. package/dist/plugins/toc.js +118 -0
  104. package/dist/render.d.ts +18 -0
  105. package/dist/render.js +29 -0
  106. package/dist/types.d.ts +258 -0
  107. package/dist/types.js +1 -0
  108. package/dist/utils/caret.d.ts +7 -0
  109. package/dist/utils/caret.js +36 -0
  110. package/dist/utils/index.d.ts +38 -0
  111. package/dist/utils/index.js +149 -0
  112. package/package.json +75 -9
@@ -0,0 +1,161 @@
1
+ /**
2
+ * Parse cell widths from a table row (respects escaped pipes \|)
3
+ */
4
+ function parseCellWidths(row) {
5
+ const widths = [];
6
+ let cellContent = '';
7
+ let inCell = false;
8
+ for (let i = 0; i < row.length; i++) {
9
+ const ch = row[i];
10
+ const isEscapedPipe = ch === '|' && i > 0 && row[i - 1] === '\\';
11
+ if (ch === '|' && !isEscapedPipe) {
12
+ if (inCell && cellContent) {
13
+ widths.push(cellContent.length);
14
+ cellContent = '';
15
+ }
16
+ inCell = true;
17
+ }
18
+ else if (inCell) {
19
+ cellContent += ch;
20
+ }
21
+ }
22
+ // Capture last cell if no trailing pipe
23
+ if (inCell && cellContent) {
24
+ widths.push(cellContent.length);
25
+ }
26
+ return widths;
27
+ }
28
+ /**
29
+ * Parse cell contents from a table row (respects escaped pipes \|)
30
+ */
31
+ function parseCells(row) {
32
+ const cells = [];
33
+ let cell = '';
34
+ let inCell = false;
35
+ for (let i = 0; i < row.length; i++) {
36
+ const ch = row[i];
37
+ const isEscapedPipe = ch === '|' && i > 0 && row[i - 1] === '\\';
38
+ if (ch === '|' && !isEscapedPipe) {
39
+ if (inCell) {
40
+ cells.push(cell.trim());
41
+ cell = '';
42
+ }
43
+ inCell = true;
44
+ }
45
+ else if (inCell) {
46
+ cell += ch;
47
+ }
48
+ }
49
+ // Capture last cell if any
50
+ if (cell.trim()) {
51
+ cells.push(cell.trim());
52
+ }
53
+ return cells;
54
+ }
55
+ /**
56
+ * Closes unclosed markdown tables
57
+ */
58
+ export function closeTables(markdown) {
59
+ const lines = markdown.split('\n');
60
+ // Group consecutive table rows (lines starting with |) into blocks
61
+ const tableBlocks = [];
62
+ let blockStart = -1;
63
+ for (let i = 0; i < lines.length; i++) {
64
+ if (lines[i].trim().startsWith('|')) {
65
+ if (blockStart === -1)
66
+ blockStart = i;
67
+ }
68
+ else if (blockStart !== -1) {
69
+ tableBlocks.push({ start: blockStart, end: i - 1 });
70
+ blockStart = -1;
71
+ }
72
+ }
73
+ if (blockStart !== -1) {
74
+ tableBlocks.push({ start: blockStart, end: lines.length - 1 });
75
+ }
76
+ if (tableBlocks.length === 0)
77
+ return markdown;
78
+ // Process last table
79
+ const { start, end } = tableBlocks[tableBlocks.length - 1];
80
+ const headerLine = lines[start].trim();
81
+ // Ensure header has trailing pipe
82
+ if (!headerLine.endsWith('|')) {
83
+ lines[start] += ' |';
84
+ }
85
+ // Count columns from header
86
+ const columnCount = parseCellWidths(lines[start].trim()).length;
87
+ const generateSeparator = () => '| ' + Array(columnCount).fill('---').join(' | ') + ' |';
88
+ // Check if separator exists (including incomplete ones with just :)
89
+ const secondLine = end - start >= 1 ? lines[start + 1].trim() : '';
90
+ const hasSeparator = secondLine.startsWith('|')
91
+ && (secondLine.includes('-') || secondLine.includes(':'));
92
+ // Handle last line
93
+ const lastLine = lines[end].trim();
94
+ const isSeparator = lastLine.startsWith('|')
95
+ && (lastLine.includes('-') || lastLine.includes(':'));
96
+ if (isSeparator) {
97
+ // Parse and complete separator cells
98
+ const sepCells = parseCells(lastLine);
99
+ // Complete each cell: ensure minimum dash count
100
+ const completedCells = sepCells.map((cell) => {
101
+ const hasLeftAlign = cell.startsWith(':');
102
+ const hasRightAlign = cell.endsWith(':') && cell.length > 1;
103
+ // Strip alignment markers to count dashes
104
+ let dashes = cell.replace(/^:/, '').replace(/:$/, '');
105
+ // Ensure at least 1 dash (alignment markers don't need extra length)
106
+ if (hasLeftAlign && hasRightAlign) {
107
+ // Center align :-:
108
+ if (dashes.length < 1)
109
+ dashes = '-';
110
+ return ':' + dashes + ':';
111
+ }
112
+ else if (hasLeftAlign) {
113
+ // Left align :-
114
+ if (dashes.length < 1)
115
+ dashes = '-';
116
+ return ':' + dashes;
117
+ }
118
+ else if (hasRightAlign) {
119
+ // Right align -:
120
+ if (dashes.length < 1)
121
+ dashes = '-';
122
+ return dashes + ':';
123
+ }
124
+ else {
125
+ // No align ---
126
+ while (dashes.length < 3)
127
+ dashes += '-';
128
+ return dashes;
129
+ }
130
+ });
131
+ // Add missing columns
132
+ while (completedCells.length < columnCount) {
133
+ completedCells.push('---');
134
+ }
135
+ lines[end] = '| ' + completedCells.join(' | ') + ' |';
136
+ }
137
+ else if (lastLine.startsWith('|') && !lastLine.endsWith('|')) {
138
+ // Complete data row - find reference widths and pad
139
+ let refRow = lines[start].trim();
140
+ for (let i = start + (hasSeparator ? 2 : 1); i < end; i++) {
141
+ const row = lines[i].trim();
142
+ if (row.startsWith('|') && row.endsWith('|') && !row.includes('-')) {
143
+ refRow = row;
144
+ break;
145
+ }
146
+ }
147
+ const refWidths = parseCellWidths(refRow);
148
+ const cells = parseCells(lastLine);
149
+ // Rebuild with padding
150
+ lines[end] = '| ' + cells.map((cell, i) => {
151
+ const targetWidth = refWidths[i] || cell.length + 2;
152
+ const padding = ' '.repeat(Math.max(0, targetWidth - cell.length - 2));
153
+ return cell + padding;
154
+ }).join(' | ') + ' |';
155
+ }
156
+ // Add separator if missing
157
+ if (!hasSeparator) {
158
+ lines.splice(start + 1, 0, generateSeparator());
159
+ }
160
+ return lines.join('\n');
161
+ }
@@ -0,0 +1,20 @@
1
+ import type { ComarkNode } from 'comark';
2
+ /**
3
+ * Applies automatic unwrapping to container components.
4
+ *
5
+ * This utility removes unnecessary paragraph wrappers from container component children.
6
+ * If a container has only a single paragraph child (and no other block elements),
7
+ * the paragraph is unwrapped and its children are hoisted up to be direct children
8
+ * of the container.
9
+ *
10
+ * @param node - The Comark element to process
11
+ * @returns The node with auto-unwrapped children (if applicable)
12
+ *
13
+ * @example
14
+ * // Before:
15
+ * { tag: 'alert', children: [{ type: 'element', tag: 'p', children: [{ type: 'text', value: 'Text' }] }] }
16
+ *
17
+ * // After:
18
+ * { tag: 'alert', children: [{ type: 'text', value: 'Text' }] }
19
+ */
20
+ export declare function applyAutoUnwrap(node: ComarkNode): ComarkNode;
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Applies automatic unwrapping to container components.
3
+ *
4
+ * This utility removes unnecessary paragraph wrappers from container component children.
5
+ * If a container has only a single paragraph child (and no other block elements),
6
+ * the paragraph is unwrapped and its children are hoisted up to be direct children
7
+ * of the container.
8
+ *
9
+ * @param node - The Comark element to process
10
+ * @returns The node with auto-unwrapped children (if applicable)
11
+ *
12
+ * @example
13
+ * // Before:
14
+ * { tag: 'alert', children: [{ type: 'element', tag: 'p', children: [{ type: 'text', value: 'Text' }] }] }
15
+ *
16
+ * // After:
17
+ * { tag: 'alert', children: [{ type: 'text', value: 'Text' }] }
18
+ */
19
+ export function applyAutoUnwrap(node) {
20
+ if (typeof node === 'string' || node.length < 2) {
21
+ return node;
22
+ }
23
+ const [tag, props, ...children] = node;
24
+ // Filter out empty text nodes for checking
25
+ const nonEmptyChildren = children.filter((child) => typeof child !== 'string' || (child && child.trim()));
26
+ if (nonEmptyChildren.length === 0) {
27
+ return node;
28
+ }
29
+ // Check if we have exactly one paragraph child (and possibly empty text nodes)
30
+ if (nonEmptyChildren.length > 1 || typeof nonEmptyChildren[0] === 'string' || nonEmptyChildren[0][0] !== 'p') {
31
+ return [
32
+ tag,
33
+ props,
34
+ ...children.map((child) => applyAutoUnwrap(child)),
35
+ ];
36
+ }
37
+ return [
38
+ tag,
39
+ props,
40
+ ...nonEmptyChildren[0].slice(2),
41
+ ];
42
+ }
@@ -0,0 +1,2 @@
1
+ import type { StateBlock } from 'markdown-exit';
2
+ export default function html_block(state: StateBlock, startLine: number, endLine: number, silent: boolean): boolean;
@@ -0,0 +1,60 @@
1
+ // BASED ON https://github.com/serkodev/markdown-exit/blob/fe1351070a5841426223ab4a0a5c7874ba2b1257/packages/markdown-exit/src/parser/block/rules/html_block.ts
2
+ import block_names from "./html_blocks.js";
3
+ import { HTML_OPEN_CLOSE_TAG_RE } from "./html_re.js";
4
+ // An array of opening and corresponding closing sequences for html tags,
5
+ // last argument defines whether it can terminate a paragraph or not
6
+ //
7
+ const HTML_SEQUENCES = [
8
+ [new RegExp(`${HTML_OPEN_CLOSE_TAG_RE.source}\\s*$`), /^<\/[^>]+>$/, true],
9
+ [/^<(script|pre|style|textarea)(?=(\s|>|$))/i, /<\/(script|pre|style|textarea)>/i, true],
10
+ [/^<!--/, /-->/, true],
11
+ [/^<\?/, /\?>/, true],
12
+ [/^<![A-Z]/, />/, true],
13
+ [/^<!\[CDATA\[/, /\]\]>/, true],
14
+ [new RegExp(`^</?(${block_names.join('|')})(?=(\\s|/?>|$))`, 'i'), /^$/, true],
15
+ [new RegExp(`${HTML_OPEN_CLOSE_TAG_RE.source}\\s*$`), /^$/, false],
16
+ ];
17
+ export default function html_block(state, startLine, endLine, silent) {
18
+ let pos = state.bMarks[startLine] + state.tShift[startLine];
19
+ let max = state.eMarks[startLine];
20
+ // if it's indented more than 3 spaces, it should be a code block
21
+ if (state.sCount[startLine] - state.blkIndent >= 4)
22
+ return false;
23
+ if (state.src.charCodeAt(pos) !== 0x3C /* < */)
24
+ return false;
25
+ let lineText = state.src.slice(pos, max);
26
+ let i = 0;
27
+ for (; i < HTML_SEQUENCES.length; i++) {
28
+ if (HTML_SEQUENCES[i][0].test(lineText))
29
+ break;
30
+ }
31
+ if (i === HTML_SEQUENCES.length)
32
+ return false;
33
+ if (silent) {
34
+ // true if this sequence can be a terminator, false otherwise
35
+ return HTML_SEQUENCES[i][2];
36
+ }
37
+ let nextLine = startLine + 1;
38
+ // If we are here - we detected HTML block.
39
+ // Let's roll down till block end.
40
+ if (i !== 0 && !HTML_SEQUENCES[i][1].test(lineText)) {
41
+ for (; nextLine < endLine; nextLine++) {
42
+ if (state.sCount[nextLine] < state.blkIndent) {
43
+ break;
44
+ }
45
+ pos = state.bMarks[nextLine] + state.tShift[nextLine];
46
+ max = state.eMarks[nextLine];
47
+ lineText = state.src.slice(pos, max);
48
+ if (HTML_SEQUENCES[i][1].test(lineText)) {
49
+ if (lineText.length !== 0)
50
+ nextLine++;
51
+ break;
52
+ }
53
+ }
54
+ }
55
+ state.line = nextLine;
56
+ const token = lineText.startsWith('</') ? state.push('html_block_close', '', -1) : state.push('html_block', '', 1);
57
+ token.map = [startLine, nextLine];
58
+ token.content = state.getLines(startLine, nextLine, state.blkIndent, true);
59
+ return true;
60
+ }
@@ -0,0 +1,2 @@
1
+ declare const _default: string[];
2
+ export default _default;
@@ -0,0 +1,66 @@
1
+ // List of valid html blocks names, according to commonmark spec
2
+ // https://spec.commonmark.org/0.30/#html-blocks
3
+ export default [
4
+ 'address',
5
+ 'article',
6
+ 'aside',
7
+ 'base',
8
+ 'basefont',
9
+ 'blockquote',
10
+ 'body',
11
+ 'caption',
12
+ 'center',
13
+ 'col',
14
+ 'colgroup',
15
+ 'dd',
16
+ 'details',
17
+ 'dialog',
18
+ 'dir',
19
+ 'div',
20
+ 'dl',
21
+ 'dt',
22
+ 'fieldset',
23
+ 'figcaption',
24
+ 'figure',
25
+ 'footer',
26
+ 'form',
27
+ 'frame',
28
+ 'frameset',
29
+ 'h1',
30
+ 'h2',
31
+ 'h3',
32
+ 'h4',
33
+ 'h5',
34
+ 'h6',
35
+ 'head',
36
+ 'header',
37
+ 'hr',
38
+ 'html',
39
+ 'iframe',
40
+ 'legend',
41
+ 'li',
42
+ 'link',
43
+ 'main',
44
+ 'menu',
45
+ 'menuitem',
46
+ 'nav',
47
+ 'noframes',
48
+ 'ol',
49
+ 'optgroup',
50
+ 'option',
51
+ 'p',
52
+ 'param',
53
+ 'search',
54
+ 'section',
55
+ 'summary',
56
+ 'table',
57
+ 'tbody',
58
+ 'td',
59
+ 'tfoot',
60
+ 'th',
61
+ 'thead',
62
+ 'title',
63
+ 'tr',
64
+ 'track',
65
+ 'ul',
66
+ ];
@@ -0,0 +1,2 @@
1
+ import type { StateInline } from 'markdown-exit';
2
+ export default function html_inline(state: StateInline, silent: boolean): boolean;
@@ -0,0 +1,43 @@
1
+ // BASED ON https://github.com/serkodev/markdown-exit/blob/fe1351070a5841426223ab4a0a5c7874ba2b1257/packages/markdown-exit/src/parser/inline/rules/html_inline.ts
2
+ import { HTML_TAG_RE } from "./html_re.js";
3
+ function isLinkOpen(str) {
4
+ return /^<a[>\s]/i.test(str);
5
+ }
6
+ function isLinkClose(str) {
7
+ return /^<\/a\s*>/i.test(str);
8
+ }
9
+ function isLetter(ch) {
10
+ /* eslint no-bitwise:0 */
11
+ const lc = ch | 0x20; // to lower case
12
+ return (lc >= 0x61 /* a */) && (lc <= 0x7A /* z */);
13
+ }
14
+ export default function html_inline(state, silent) {
15
+ // Check start
16
+ const max = state.posMax;
17
+ const pos = state.pos;
18
+ if (state.src.charCodeAt(pos) !== 0x3C
19
+ || /* < */ pos + 2 >= max) {
20
+ return false;
21
+ }
22
+ // Quick fail on second char
23
+ const ch = state.src.charCodeAt(pos + 1);
24
+ if (ch !== 0x21
25
+ && /* ! */ ch !== 0x3F
26
+ && /* ? */ ch !== 0x2F
27
+ && /* / */ !isLetter(ch)) {
28
+ return false;
29
+ }
30
+ const match = state.src.slice(pos).match(HTML_TAG_RE);
31
+ if (!match)
32
+ return false;
33
+ if (!silent) {
34
+ const token = state.push('html_inline', '', 0);
35
+ token.content = match[0];
36
+ if (isLinkOpen(token.content))
37
+ state.linkLevel++;
38
+ if (isLinkClose(token.content))
39
+ state.linkLevel--;
40
+ }
41
+ state.pos += match[0].length;
42
+ return true;
43
+ }
@@ -0,0 +1,3 @@
1
+ declare const HTML_TAG_RE: RegExp;
2
+ declare const HTML_OPEN_CLOSE_TAG_RE: RegExp;
3
+ export { HTML_OPEN_CLOSE_TAG_RE, HTML_TAG_RE };
@@ -0,0 +1,18 @@
1
+ // Regexps to match html elements
2
+ const attr_name = '[a-zA-Z_:][a-zA-Z0-9:._-]*';
3
+ const unquoted = '[^"\'=<>`\\x00-\\x20]+';
4
+ const single_quoted = '\'[^\']*\'';
5
+ const double_quoted = '"[^"]*"';
6
+ const attr_value = `(?:${unquoted}|${single_quoted}|${double_quoted})`;
7
+ const attribute = `(?:\\s+${attr_name}(?:\\s*=\\s*${attr_value})?)`;
8
+ const open_tag = `<[A-Za-z][A-Za-z0-9\\-]*${attribute}*\\s*\\/?>`;
9
+ const close_tag = '<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>';
10
+ const comment = '<!---?>|<!--(?:[^-]|-[^-]|--[^>])*-->';
11
+ const processing = '<\\?[\\s\\S]*?\\?>';
12
+ const declaration = '<![A-Za-z][^>]*>';
13
+ const cdata = '<!\\[CDATA\\[[\\s\\S]*?\\]\\]>';
14
+ // eslint-disable-next-line regexp/no-super-linear-backtracking, regexp/prefer-w
15
+ const HTML_TAG_RE = new RegExp(`^(?:${open_tag}|${close_tag}|${comment}|${processing}|${declaration}|${cdata})`);
16
+ // eslint-disable-next-line regexp/use-ignore-case, regexp/no-super-linear-backtracking, regexp/prefer-w
17
+ const HTML_OPEN_CLOSE_TAG_RE = new RegExp(`^(?:${open_tag}|${close_tag})`);
18
+ export { HTML_OPEN_CLOSE_TAG_RE, HTML_TAG_RE };
@@ -0,0 +1,18 @@
1
+ import type { ComarkNode } from 'comark';
2
+ interface HtmlTagInfo {
3
+ tag: string;
4
+ attrs: Record<string, unknown>;
5
+ isVoid: boolean;
6
+ isClose: boolean;
7
+ }
8
+ /**
9
+ * Parse a single inline HTML tag fragment (opening, closing, or void).
10
+ * Returns null if the content is not a recognisable HTML tag.
11
+ */
12
+ export declare function parseInlineHtmlTag(html: string): HtmlTagInfo | null;
13
+ /**
14
+ * Parse a full HTML string into ComarkNodes using htmlparser2.
15
+ * Handles nested elements, text, void elements, and comments.
16
+ */
17
+ export declare function htmlToComarkNodes(html: string): ComarkNode[];
18
+ export {};
@@ -0,0 +1,122 @@
1
+ import { Parser } from 'htmlparser2';
2
+ const VOID_ELEMENTS = new Set([
3
+ 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input',
4
+ 'link', 'meta', 'param', 'source', 'track', 'wbr',
5
+ ]);
6
+ function attribsToComarkAttrs(attribs, isInline = false) {
7
+ const attrs = {
8
+ $: {
9
+ html: 1,
10
+ block: isInline ? 0 : 1,
11
+ },
12
+ };
13
+ for (const key in attribs) {
14
+ const value = attribs[key];
15
+ if (value === '') {
16
+ attrs[`:${key}`] = 'true';
17
+ }
18
+ else {
19
+ attrs[key] = value;
20
+ }
21
+ }
22
+ return attrs;
23
+ }
24
+ /**
25
+ * Parse a single inline HTML tag fragment (opening, closing, or void).
26
+ * Returns null if the content is not a recognisable HTML tag.
27
+ */
28
+ export function parseInlineHtmlTag(html) {
29
+ const trimmed = html.trim();
30
+ if (!trimmed.startsWith('<'))
31
+ return null;
32
+ // Fast path: closing tag
33
+ const closeMatch = trimmed.match(/^<\/([a-z][a-z0-9]*)\s*>/i);
34
+ if (closeMatch) {
35
+ return { tag: closeMatch[1].toLowerCase(), attrs: {}, isVoid: false, isClose: true };
36
+ }
37
+ let info = null;
38
+ const parser = new Parser({
39
+ onopentag(name, attribs) {
40
+ info = {
41
+ tag: name,
42
+ attrs: attribsToComarkAttrs(attribs, true),
43
+ isVoid: VOID_ELEMENTS.has(name),
44
+ isClose: false,
45
+ };
46
+ },
47
+ }, { decodeEntities: false });
48
+ parser.write(trimmed);
49
+ parser.end();
50
+ return info;
51
+ }
52
+ /**
53
+ * Parse a full HTML string into ComarkNodes using htmlparser2.
54
+ * Handles nested elements, text, void elements, and comments.
55
+ */
56
+ export function htmlToComarkNodes(html) {
57
+ const root = [];
58
+ const stack = [];
59
+ const parser = new Parser({
60
+ onopentag(name, attribs) {
61
+ const attrs = attribsToComarkAttrs(attribs);
62
+ if (VOID_ELEMENTS.has(name)) {
63
+ const node = [name, attrs];
64
+ if (stack.length > 0) {
65
+ stack[stack.length - 1].children.push(node);
66
+ }
67
+ else {
68
+ root.push(node);
69
+ }
70
+ return;
71
+ }
72
+ stack.push({ tag: name, attrs, children: [] });
73
+ },
74
+ ontext(text) {
75
+ const trimmed = text.trim();
76
+ if (!trimmed)
77
+ return;
78
+ if (stack.length > 0) {
79
+ stack[stack.length - 1].children.push(trimmed);
80
+ }
81
+ else {
82
+ root.push(trimmed);
83
+ }
84
+ },
85
+ onclosetag(name) {
86
+ if (VOID_ELEMENTS.has(name)) {
87
+ return;
88
+ }
89
+ // Find matching frame (handles mismatched tags gracefully)
90
+ let idx = stack.length - 1;
91
+ while (idx >= 0 && stack[idx].tag !== name) {
92
+ idx--;
93
+ }
94
+ if (idx >= 0) {
95
+ while (stack.length > idx) {
96
+ const frame = stack.pop();
97
+ const node = frame.children.length > 0
98
+ ? [frame.tag, frame.attrs, ...frame.children]
99
+ : [frame.tag, frame.attrs];
100
+ if (stack.length > 0) {
101
+ stack[stack.length - 1].children.push(node);
102
+ }
103
+ else {
104
+ root.push(node);
105
+ }
106
+ }
107
+ }
108
+ },
109
+ oncomment(data) {
110
+ const node = [null, {}, data];
111
+ if (stack.length > 0) {
112
+ stack[stack.length - 1].children.push(node);
113
+ }
114
+ else {
115
+ root.push(node);
116
+ }
117
+ },
118
+ }, { decodeEntities: true });
119
+ parser.write(html.trim());
120
+ parser.end();
121
+ return root;
122
+ }
@@ -0,0 +1,12 @@
1
+ import type { ComarkTree } from '../../types';
2
+ /**
3
+ * Extracts reusable nodes from the last output tree
4
+ * @param markdown - The markdown to parse
5
+ * @param lastOutput - The last output tree
6
+ * @returns The reusable nodes and the remaining markdown
7
+ */
8
+ export declare function extractReusableNodes(markdown: string, lastOutput: ComarkTree): {
9
+ remainingMarkdownStartLine: number;
10
+ reusedNodes: import("../..").ComarkNode[];
11
+ remainingMarkdown: string;
12
+ };
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Extracts reusable nodes from the last output tree
3
+ * @param markdown - The markdown to parse
4
+ * @param lastOutput - The last output tree
5
+ * @returns The reusable nodes and the remaining markdown
6
+ */
7
+ export function extractReusableNodes(markdown, lastOutput) {
8
+ let lastValidNodeIndex = -1;
9
+ let i = lastOutput.nodes.length - 1;
10
+ let lastNodeIgnored = false;
11
+ while (i >= 0) {
12
+ const node = lastOutput.nodes[i];
13
+ if (node[1] && node[1].$?.line) {
14
+ if (lastNodeIgnored) {
15
+ lastValidNodeIndex = i;
16
+ break;
17
+ }
18
+ else {
19
+ lastNodeIgnored = true;
20
+ }
21
+ }
22
+ i--;
23
+ }
24
+ const lastNode = lastValidNodeIndex !== -1 ? lastOutput.nodes[lastValidNodeIndex] : null;
25
+ if (lastNode) {
26
+ const remainingMarkdownStartLine = lastNode[1].$?.line ?? 0;
27
+ return {
28
+ remainingMarkdownStartLine,
29
+ reusedNodes: lastOutput.nodes.slice(0, lastValidNodeIndex + 1),
30
+ remainingMarkdown: '\n' // Add back the new line character which will be remove by the slice and join
31
+ + markdown.split('\n').slice(remainingMarkdownStartLine + 1).join('\n') || '',
32
+ };
33
+ }
34
+ return {
35
+ remainingMarkdownStartLine: 0,
36
+ remainingMarkdown: markdown,
37
+ reusedNodes: [],
38
+ };
39
+ }
@@ -0,0 +1,9 @@
1
+ import type { ComarkNode } from 'comark';
2
+ /**
3
+ * Convert Markdown-It tokens to a Comark tree
4
+ */
5
+ export declare function marmdownItTokensToComarkTree(tokens: any[], options?: {
6
+ startLine: number;
7
+ preservePositions: boolean;
8
+ }): ComarkNode[];
9
+ export declare function processInlineTokens(tokens: any[], inHeading?: boolean): ComarkNode[];