comark 0.0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +104 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +6 -0
- package/dist/internal/frontmatter.d.ts +16 -0
- package/dist/internal/frontmatter.js +43 -0
- package/dist/internal/parse/auto-close/index.d.ts +12 -0
- package/dist/internal/parse/auto-close/index.js +457 -0
- package/dist/internal/parse/auto-close/table.d.ts +4 -0
- package/dist/internal/parse/auto-close/table.js +161 -0
- package/dist/internal/parse/auto-unwrap.d.ts +20 -0
- package/dist/internal/parse/auto-unwrap.js +42 -0
- package/dist/internal/parse/html/html_block_rule.d.ts +2 -0
- package/dist/internal/parse/html/html_block_rule.js +60 -0
- package/dist/internal/parse/html/html_blocks.d.ts +2 -0
- package/dist/internal/parse/html/html_blocks.js +66 -0
- package/dist/internal/parse/html/html_inline_rule.d.ts +2 -0
- package/dist/internal/parse/html/html_inline_rule.js +43 -0
- package/dist/internal/parse/html/html_re.d.ts +3 -0
- package/dist/internal/parse/html/html_re.js +18 -0
- package/dist/internal/parse/html/index.d.ts +18 -0
- package/dist/internal/parse/html/index.js +122 -0
- package/dist/internal/parse/incremental.d.ts +12 -0
- package/dist/internal/parse/incremental.js +39 -0
- package/dist/internal/parse/token-processor.d.ts +9 -0
- package/dist/internal/parse/token-processor.js +803 -0
- package/dist/internal/props-validation.d.ts +12 -0
- package/dist/internal/props-validation.js +112 -0
- package/dist/internal/stringify/attributes.d.ts +21 -0
- package/dist/internal/stringify/attributes.js +67 -0
- package/dist/internal/stringify/handlers/a.d.ts +3 -0
- package/dist/internal/stringify/handlers/a.js +11 -0
- package/dist/internal/stringify/handlers/blockquote.d.ts +3 -0
- package/dist/internal/stringify/handlers/blockquote.js +18 -0
- package/dist/internal/stringify/handlers/br.d.ts +3 -0
- package/dist/internal/stringify/handlers/br.js +3 -0
- package/dist/internal/stringify/handlers/code.d.ts +3 -0
- package/dist/internal/stringify/handlers/code.js +11 -0
- package/dist/internal/stringify/handlers/comment.d.ts +3 -0
- package/dist/internal/stringify/handlers/comment.js +6 -0
- package/dist/internal/stringify/handlers/del.d.ts +3 -0
- package/dist/internal/stringify/handlers/del.js +4 -0
- package/dist/internal/stringify/handlers/emphesis.d.ts +3 -0
- package/dist/internal/stringify/handlers/emphesis.js +13 -0
- package/dist/internal/stringify/handlers/heading.d.ts +3 -0
- package/dist/internal/stringify/handlers/heading.js +7 -0
- package/dist/internal/stringify/handlers/hr.d.ts +3 -0
- package/dist/internal/stringify/handlers/hr.js +3 -0
- package/dist/internal/stringify/handlers/html.d.ts +3 -0
- package/dist/internal/stringify/handlers/html.js +73 -0
- package/dist/internal/stringify/handlers/img.d.ts +3 -0
- package/dist/internal/stringify/handlers/img.js +9 -0
- package/dist/internal/stringify/handlers/index.d.ts +2 -0
- package/dist/internal/stringify/handlers/index.js +56 -0
- package/dist/internal/stringify/handlers/li.d.ts +3 -0
- package/dist/internal/stringify/handlers/li.js +43 -0
- package/dist/internal/stringify/handlers/math.d.ts +3 -0
- package/dist/internal/stringify/handlers/math.js +8 -0
- package/dist/internal/stringify/handlers/mdc.d.ts +3 -0
- package/dist/internal/stringify/handlers/mdc.js +47 -0
- package/dist/internal/stringify/handlers/mermaid.d.ts +3 -0
- package/dist/internal/stringify/handlers/mermaid.js +8 -0
- package/dist/internal/stringify/handlers/ol.d.ts +3 -0
- package/dist/internal/stringify/handlers/ol.js +18 -0
- package/dist/internal/stringify/handlers/p.d.ts +3 -0
- package/dist/internal/stringify/handlers/p.js +8 -0
- package/dist/internal/stringify/handlers/pre.d.ts +3 -0
- package/dist/internal/stringify/handlers/pre.js +60 -0
- package/dist/internal/stringify/handlers/strong.d.ts +3 -0
- package/dist/internal/stringify/handlers/strong.js +13 -0
- package/dist/internal/stringify/handlers/table.d.ts +8 -0
- package/dist/internal/stringify/handlers/table.js +180 -0
- package/dist/internal/stringify/handlers/template.d.ts +3 -0
- package/dist/internal/stringify/handlers/template.js +14 -0
- package/dist/internal/stringify/handlers/ul.d.ts +3 -0
- package/dist/internal/stringify/handlers/ul.js +18 -0
- package/dist/internal/stringify/indent.d.ts +4 -0
- package/dist/internal/stringify/indent.js +8 -0
- package/dist/internal/stringify/state.d.ts +13 -0
- package/dist/internal/stringify/state.js +121 -0
- package/dist/internal/yaml.d.ts +12 -0
- package/dist/internal/yaml.js +51 -0
- package/dist/parse.d.ts +66 -0
- package/dist/parse.js +163 -0
- package/dist/plugins/alert.d.ts +2 -0
- package/dist/plugins/alert.js +66 -0
- package/dist/plugins/emoji.d.ts +3 -0
- package/dist/plugins/emoji.js +438 -0
- package/dist/plugins/headings.d.ts +48 -0
- package/dist/plugins/headings.js +85 -0
- package/dist/plugins/highlight.d.ts +71 -0
- package/dist/plugins/highlight.js +234 -0
- package/dist/plugins/math.d.ts +59 -0
- package/dist/plugins/math.js +263 -0
- package/dist/plugins/mermaid.d.ts +38 -0
- package/dist/plugins/mermaid.js +185 -0
- package/dist/plugins/security.d.ts +11 -0
- package/dist/plugins/security.js +32 -0
- package/dist/plugins/summary.d.ts +2 -0
- package/dist/plugins/summary.js +22 -0
- package/dist/plugins/task-list.d.ts +8 -0
- package/dist/plugins/task-list.js +117 -0
- package/dist/plugins/toc.d.ts +15 -0
- package/dist/plugins/toc.js +118 -0
- package/dist/render.d.ts +18 -0
- package/dist/render.js +29 -0
- package/dist/types.d.ts +258 -0
- package/dist/types.js +1 -0
- package/dist/utils/caret.d.ts +7 -0
- package/dist/utils/caret.js +36 -0
- package/dist/utils/index.d.ts +38 -0
- package/dist/utils/index.js +149 -0
- package/package.json +75 -9
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse cell widths from a table row (respects escaped pipes \|)
|
|
3
|
+
*/
|
|
4
|
+
function parseCellWidths(row) {
|
|
5
|
+
const widths = [];
|
|
6
|
+
let cellContent = '';
|
|
7
|
+
let inCell = false;
|
|
8
|
+
for (let i = 0; i < row.length; i++) {
|
|
9
|
+
const ch = row[i];
|
|
10
|
+
const isEscapedPipe = ch === '|' && i > 0 && row[i - 1] === '\\';
|
|
11
|
+
if (ch === '|' && !isEscapedPipe) {
|
|
12
|
+
if (inCell && cellContent) {
|
|
13
|
+
widths.push(cellContent.length);
|
|
14
|
+
cellContent = '';
|
|
15
|
+
}
|
|
16
|
+
inCell = true;
|
|
17
|
+
}
|
|
18
|
+
else if (inCell) {
|
|
19
|
+
cellContent += ch;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
// Capture last cell if no trailing pipe
|
|
23
|
+
if (inCell && cellContent) {
|
|
24
|
+
widths.push(cellContent.length);
|
|
25
|
+
}
|
|
26
|
+
return widths;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Parse cell contents from a table row (respects escaped pipes \|)
|
|
30
|
+
*/
|
|
31
|
+
function parseCells(row) {
|
|
32
|
+
const cells = [];
|
|
33
|
+
let cell = '';
|
|
34
|
+
let inCell = false;
|
|
35
|
+
for (let i = 0; i < row.length; i++) {
|
|
36
|
+
const ch = row[i];
|
|
37
|
+
const isEscapedPipe = ch === '|' && i > 0 && row[i - 1] === '\\';
|
|
38
|
+
if (ch === '|' && !isEscapedPipe) {
|
|
39
|
+
if (inCell) {
|
|
40
|
+
cells.push(cell.trim());
|
|
41
|
+
cell = '';
|
|
42
|
+
}
|
|
43
|
+
inCell = true;
|
|
44
|
+
}
|
|
45
|
+
else if (inCell) {
|
|
46
|
+
cell += ch;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
// Capture last cell if any
|
|
50
|
+
if (cell.trim()) {
|
|
51
|
+
cells.push(cell.trim());
|
|
52
|
+
}
|
|
53
|
+
return cells;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Closes unclosed markdown tables
|
|
57
|
+
*/
|
|
58
|
+
export function closeTables(markdown) {
|
|
59
|
+
const lines = markdown.split('\n');
|
|
60
|
+
// Group consecutive table rows (lines starting with |) into blocks
|
|
61
|
+
const tableBlocks = [];
|
|
62
|
+
let blockStart = -1;
|
|
63
|
+
for (let i = 0; i < lines.length; i++) {
|
|
64
|
+
if (lines[i].trim().startsWith('|')) {
|
|
65
|
+
if (blockStart === -1)
|
|
66
|
+
blockStart = i;
|
|
67
|
+
}
|
|
68
|
+
else if (blockStart !== -1) {
|
|
69
|
+
tableBlocks.push({ start: blockStart, end: i - 1 });
|
|
70
|
+
blockStart = -1;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
if (blockStart !== -1) {
|
|
74
|
+
tableBlocks.push({ start: blockStart, end: lines.length - 1 });
|
|
75
|
+
}
|
|
76
|
+
if (tableBlocks.length === 0)
|
|
77
|
+
return markdown;
|
|
78
|
+
// Process last table
|
|
79
|
+
const { start, end } = tableBlocks[tableBlocks.length - 1];
|
|
80
|
+
const headerLine = lines[start].trim();
|
|
81
|
+
// Ensure header has trailing pipe
|
|
82
|
+
if (!headerLine.endsWith('|')) {
|
|
83
|
+
lines[start] += ' |';
|
|
84
|
+
}
|
|
85
|
+
// Count columns from header
|
|
86
|
+
const columnCount = parseCellWidths(lines[start].trim()).length;
|
|
87
|
+
const generateSeparator = () => '| ' + Array(columnCount).fill('---').join(' | ') + ' |';
|
|
88
|
+
// Check if separator exists (including incomplete ones with just :)
|
|
89
|
+
const secondLine = end - start >= 1 ? lines[start + 1].trim() : '';
|
|
90
|
+
const hasSeparator = secondLine.startsWith('|')
|
|
91
|
+
&& (secondLine.includes('-') || secondLine.includes(':'));
|
|
92
|
+
// Handle last line
|
|
93
|
+
const lastLine = lines[end].trim();
|
|
94
|
+
const isSeparator = lastLine.startsWith('|')
|
|
95
|
+
&& (lastLine.includes('-') || lastLine.includes(':'));
|
|
96
|
+
if (isSeparator) {
|
|
97
|
+
// Parse and complete separator cells
|
|
98
|
+
const sepCells = parseCells(lastLine);
|
|
99
|
+
// Complete each cell: ensure minimum dash count
|
|
100
|
+
const completedCells = sepCells.map((cell) => {
|
|
101
|
+
const hasLeftAlign = cell.startsWith(':');
|
|
102
|
+
const hasRightAlign = cell.endsWith(':') && cell.length > 1;
|
|
103
|
+
// Strip alignment markers to count dashes
|
|
104
|
+
let dashes = cell.replace(/^:/, '').replace(/:$/, '');
|
|
105
|
+
// Ensure at least 1 dash (alignment markers don't need extra length)
|
|
106
|
+
if (hasLeftAlign && hasRightAlign) {
|
|
107
|
+
// Center align :-:
|
|
108
|
+
if (dashes.length < 1)
|
|
109
|
+
dashes = '-';
|
|
110
|
+
return ':' + dashes + ':';
|
|
111
|
+
}
|
|
112
|
+
else if (hasLeftAlign) {
|
|
113
|
+
// Left align :-
|
|
114
|
+
if (dashes.length < 1)
|
|
115
|
+
dashes = '-';
|
|
116
|
+
return ':' + dashes;
|
|
117
|
+
}
|
|
118
|
+
else if (hasRightAlign) {
|
|
119
|
+
// Right align -:
|
|
120
|
+
if (dashes.length < 1)
|
|
121
|
+
dashes = '-';
|
|
122
|
+
return dashes + ':';
|
|
123
|
+
}
|
|
124
|
+
else {
|
|
125
|
+
// No align ---
|
|
126
|
+
while (dashes.length < 3)
|
|
127
|
+
dashes += '-';
|
|
128
|
+
return dashes;
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
// Add missing columns
|
|
132
|
+
while (completedCells.length < columnCount) {
|
|
133
|
+
completedCells.push('---');
|
|
134
|
+
}
|
|
135
|
+
lines[end] = '| ' + completedCells.join(' | ') + ' |';
|
|
136
|
+
}
|
|
137
|
+
else if (lastLine.startsWith('|') && !lastLine.endsWith('|')) {
|
|
138
|
+
// Complete data row - find reference widths and pad
|
|
139
|
+
let refRow = lines[start].trim();
|
|
140
|
+
for (let i = start + (hasSeparator ? 2 : 1); i < end; i++) {
|
|
141
|
+
const row = lines[i].trim();
|
|
142
|
+
if (row.startsWith('|') && row.endsWith('|') && !row.includes('-')) {
|
|
143
|
+
refRow = row;
|
|
144
|
+
break;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
const refWidths = parseCellWidths(refRow);
|
|
148
|
+
const cells = parseCells(lastLine);
|
|
149
|
+
// Rebuild with padding
|
|
150
|
+
lines[end] = '| ' + cells.map((cell, i) => {
|
|
151
|
+
const targetWidth = refWidths[i] || cell.length + 2;
|
|
152
|
+
const padding = ' '.repeat(Math.max(0, targetWidth - cell.length - 2));
|
|
153
|
+
return cell + padding;
|
|
154
|
+
}).join(' | ') + ' |';
|
|
155
|
+
}
|
|
156
|
+
// Add separator if missing
|
|
157
|
+
if (!hasSeparator) {
|
|
158
|
+
lines.splice(start + 1, 0, generateSeparator());
|
|
159
|
+
}
|
|
160
|
+
return lines.join('\n');
|
|
161
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import type { ComarkNode } from 'comark';
|
|
2
|
+
/**
|
|
3
|
+
* Applies automatic unwrapping to container components.
|
|
4
|
+
*
|
|
5
|
+
* This utility removes unnecessary paragraph wrappers from container component children.
|
|
6
|
+
* If a container has only a single paragraph child (and no other block elements),
|
|
7
|
+
* the paragraph is unwrapped and its children are hoisted up to be direct children
|
|
8
|
+
* of the container.
|
|
9
|
+
*
|
|
10
|
+
* @param node - The Comark element to process
|
|
11
|
+
* @returns The node with auto-unwrapped children (if applicable)
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* // Before:
|
|
15
|
+
* { tag: 'alert', children: [{ type: 'element', tag: 'p', children: [{ type: 'text', value: 'Text' }] }] }
|
|
16
|
+
*
|
|
17
|
+
* // After:
|
|
18
|
+
* { tag: 'alert', children: [{ type: 'text', value: 'Text' }] }
|
|
19
|
+
*/
|
|
20
|
+
export declare function applyAutoUnwrap(node: ComarkNode): ComarkNode;
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Applies automatic unwrapping to container components.
|
|
3
|
+
*
|
|
4
|
+
* This utility removes unnecessary paragraph wrappers from container component children.
|
|
5
|
+
* If a container has only a single paragraph child (and no other block elements),
|
|
6
|
+
* the paragraph is unwrapped and its children are hoisted up to be direct children
|
|
7
|
+
* of the container.
|
|
8
|
+
*
|
|
9
|
+
* @param node - The Comark element to process
|
|
10
|
+
* @returns The node with auto-unwrapped children (if applicable)
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* // Before:
|
|
14
|
+
* { tag: 'alert', children: [{ type: 'element', tag: 'p', children: [{ type: 'text', value: 'Text' }] }] }
|
|
15
|
+
*
|
|
16
|
+
* // After:
|
|
17
|
+
* { tag: 'alert', children: [{ type: 'text', value: 'Text' }] }
|
|
18
|
+
*/
|
|
19
|
+
export function applyAutoUnwrap(node) {
|
|
20
|
+
if (typeof node === 'string' || node.length < 2) {
|
|
21
|
+
return node;
|
|
22
|
+
}
|
|
23
|
+
const [tag, props, ...children] = node;
|
|
24
|
+
// Filter out empty text nodes for checking
|
|
25
|
+
const nonEmptyChildren = children.filter((child) => typeof child !== 'string' || (child && child.trim()));
|
|
26
|
+
if (nonEmptyChildren.length === 0) {
|
|
27
|
+
return node;
|
|
28
|
+
}
|
|
29
|
+
// Check if we have exactly one paragraph child (and possibly empty text nodes)
|
|
30
|
+
if (nonEmptyChildren.length > 1 || typeof nonEmptyChildren[0] === 'string' || nonEmptyChildren[0][0] !== 'p') {
|
|
31
|
+
return [
|
|
32
|
+
tag,
|
|
33
|
+
props,
|
|
34
|
+
...children.map((child) => applyAutoUnwrap(child)),
|
|
35
|
+
];
|
|
36
|
+
}
|
|
37
|
+
return [
|
|
38
|
+
tag,
|
|
39
|
+
props,
|
|
40
|
+
...nonEmptyChildren[0].slice(2),
|
|
41
|
+
];
|
|
42
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
// BASED ON https://github.com/serkodev/markdown-exit/blob/fe1351070a5841426223ab4a0a5c7874ba2b1257/packages/markdown-exit/src/parser/block/rules/html_block.ts
|
|
2
|
+
import block_names from "./html_blocks.js";
|
|
3
|
+
import { HTML_OPEN_CLOSE_TAG_RE } from "./html_re.js";
|
|
4
|
+
// An array of opening and corresponding closing sequences for html tags,
|
|
5
|
+
// last argument defines whether it can terminate a paragraph or not
|
|
6
|
+
//
|
|
7
|
+
const HTML_SEQUENCES = [
|
|
8
|
+
[new RegExp(`${HTML_OPEN_CLOSE_TAG_RE.source}\\s*$`), /^<\/[^>]+>$/, true],
|
|
9
|
+
[/^<(script|pre|style|textarea)(?=(\s|>|$))/i, /<\/(script|pre|style|textarea)>/i, true],
|
|
10
|
+
[/^<!--/, /-->/, true],
|
|
11
|
+
[/^<\?/, /\?>/, true],
|
|
12
|
+
[/^<![A-Z]/, />/, true],
|
|
13
|
+
[/^<!\[CDATA\[/, /\]\]>/, true],
|
|
14
|
+
[new RegExp(`^</?(${block_names.join('|')})(?=(\\s|/?>|$))`, 'i'), /^$/, true],
|
|
15
|
+
[new RegExp(`${HTML_OPEN_CLOSE_TAG_RE.source}\\s*$`), /^$/, false],
|
|
16
|
+
];
|
|
17
|
+
export default function html_block(state, startLine, endLine, silent) {
|
|
18
|
+
let pos = state.bMarks[startLine] + state.tShift[startLine];
|
|
19
|
+
let max = state.eMarks[startLine];
|
|
20
|
+
// if it's indented more than 3 spaces, it should be a code block
|
|
21
|
+
if (state.sCount[startLine] - state.blkIndent >= 4)
|
|
22
|
+
return false;
|
|
23
|
+
if (state.src.charCodeAt(pos) !== 0x3C /* < */)
|
|
24
|
+
return false;
|
|
25
|
+
let lineText = state.src.slice(pos, max);
|
|
26
|
+
let i = 0;
|
|
27
|
+
for (; i < HTML_SEQUENCES.length; i++) {
|
|
28
|
+
if (HTML_SEQUENCES[i][0].test(lineText))
|
|
29
|
+
break;
|
|
30
|
+
}
|
|
31
|
+
if (i === HTML_SEQUENCES.length)
|
|
32
|
+
return false;
|
|
33
|
+
if (silent) {
|
|
34
|
+
// true if this sequence can be a terminator, false otherwise
|
|
35
|
+
return HTML_SEQUENCES[i][2];
|
|
36
|
+
}
|
|
37
|
+
let nextLine = startLine + 1;
|
|
38
|
+
// If we are here - we detected HTML block.
|
|
39
|
+
// Let's roll down till block end.
|
|
40
|
+
if (i !== 0 && !HTML_SEQUENCES[i][1].test(lineText)) {
|
|
41
|
+
for (; nextLine < endLine; nextLine++) {
|
|
42
|
+
if (state.sCount[nextLine] < state.blkIndent) {
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
pos = state.bMarks[nextLine] + state.tShift[nextLine];
|
|
46
|
+
max = state.eMarks[nextLine];
|
|
47
|
+
lineText = state.src.slice(pos, max);
|
|
48
|
+
if (HTML_SEQUENCES[i][1].test(lineText)) {
|
|
49
|
+
if (lineText.length !== 0)
|
|
50
|
+
nextLine++;
|
|
51
|
+
break;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
state.line = nextLine;
|
|
56
|
+
const token = lineText.startsWith('</') ? state.push('html_block_close', '', -1) : state.push('html_block', '', 1);
|
|
57
|
+
token.map = [startLine, nextLine];
|
|
58
|
+
token.content = state.getLines(startLine, nextLine, state.blkIndent, true);
|
|
59
|
+
return true;
|
|
60
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
// List of valid html blocks names, according to commonmark spec
|
|
2
|
+
// https://spec.commonmark.org/0.30/#html-blocks
|
|
3
|
+
export default [
|
|
4
|
+
'address',
|
|
5
|
+
'article',
|
|
6
|
+
'aside',
|
|
7
|
+
'base',
|
|
8
|
+
'basefont',
|
|
9
|
+
'blockquote',
|
|
10
|
+
'body',
|
|
11
|
+
'caption',
|
|
12
|
+
'center',
|
|
13
|
+
'col',
|
|
14
|
+
'colgroup',
|
|
15
|
+
'dd',
|
|
16
|
+
'details',
|
|
17
|
+
'dialog',
|
|
18
|
+
'dir',
|
|
19
|
+
'div',
|
|
20
|
+
'dl',
|
|
21
|
+
'dt',
|
|
22
|
+
'fieldset',
|
|
23
|
+
'figcaption',
|
|
24
|
+
'figure',
|
|
25
|
+
'footer',
|
|
26
|
+
'form',
|
|
27
|
+
'frame',
|
|
28
|
+
'frameset',
|
|
29
|
+
'h1',
|
|
30
|
+
'h2',
|
|
31
|
+
'h3',
|
|
32
|
+
'h4',
|
|
33
|
+
'h5',
|
|
34
|
+
'h6',
|
|
35
|
+
'head',
|
|
36
|
+
'header',
|
|
37
|
+
'hr',
|
|
38
|
+
'html',
|
|
39
|
+
'iframe',
|
|
40
|
+
'legend',
|
|
41
|
+
'li',
|
|
42
|
+
'link',
|
|
43
|
+
'main',
|
|
44
|
+
'menu',
|
|
45
|
+
'menuitem',
|
|
46
|
+
'nav',
|
|
47
|
+
'noframes',
|
|
48
|
+
'ol',
|
|
49
|
+
'optgroup',
|
|
50
|
+
'option',
|
|
51
|
+
'p',
|
|
52
|
+
'param',
|
|
53
|
+
'search',
|
|
54
|
+
'section',
|
|
55
|
+
'summary',
|
|
56
|
+
'table',
|
|
57
|
+
'tbody',
|
|
58
|
+
'td',
|
|
59
|
+
'tfoot',
|
|
60
|
+
'th',
|
|
61
|
+
'thead',
|
|
62
|
+
'title',
|
|
63
|
+
'tr',
|
|
64
|
+
'track',
|
|
65
|
+
'ul',
|
|
66
|
+
];
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
// BASED ON https://github.com/serkodev/markdown-exit/blob/fe1351070a5841426223ab4a0a5c7874ba2b1257/packages/markdown-exit/src/parser/inline/rules/html_inline.ts
|
|
2
|
+
import { HTML_TAG_RE } from "./html_re.js";
|
|
3
|
+
function isLinkOpen(str) {
|
|
4
|
+
return /^<a[>\s]/i.test(str);
|
|
5
|
+
}
|
|
6
|
+
function isLinkClose(str) {
|
|
7
|
+
return /^<\/a\s*>/i.test(str);
|
|
8
|
+
}
|
|
9
|
+
function isLetter(ch) {
|
|
10
|
+
/* eslint no-bitwise:0 */
|
|
11
|
+
const lc = ch | 0x20; // to lower case
|
|
12
|
+
return (lc >= 0x61 /* a */) && (lc <= 0x7A /* z */);
|
|
13
|
+
}
|
|
14
|
+
export default function html_inline(state, silent) {
|
|
15
|
+
// Check start
|
|
16
|
+
const max = state.posMax;
|
|
17
|
+
const pos = state.pos;
|
|
18
|
+
if (state.src.charCodeAt(pos) !== 0x3C
|
|
19
|
+
|| /* < */ pos + 2 >= max) {
|
|
20
|
+
return false;
|
|
21
|
+
}
|
|
22
|
+
// Quick fail on second char
|
|
23
|
+
const ch = state.src.charCodeAt(pos + 1);
|
|
24
|
+
if (ch !== 0x21
|
|
25
|
+
&& /* ! */ ch !== 0x3F
|
|
26
|
+
&& /* ? */ ch !== 0x2F
|
|
27
|
+
&& /* / */ !isLetter(ch)) {
|
|
28
|
+
return false;
|
|
29
|
+
}
|
|
30
|
+
const match = state.src.slice(pos).match(HTML_TAG_RE);
|
|
31
|
+
if (!match)
|
|
32
|
+
return false;
|
|
33
|
+
if (!silent) {
|
|
34
|
+
const token = state.push('html_inline', '', 0);
|
|
35
|
+
token.content = match[0];
|
|
36
|
+
if (isLinkOpen(token.content))
|
|
37
|
+
state.linkLevel++;
|
|
38
|
+
if (isLinkClose(token.content))
|
|
39
|
+
state.linkLevel--;
|
|
40
|
+
}
|
|
41
|
+
state.pos += match[0].length;
|
|
42
|
+
return true;
|
|
43
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
// Regexps to match html elements
|
|
2
|
+
const attr_name = '[a-zA-Z_:][a-zA-Z0-9:._-]*';
|
|
3
|
+
const unquoted = '[^"\'=<>`\\x00-\\x20]+';
|
|
4
|
+
const single_quoted = '\'[^\']*\'';
|
|
5
|
+
const double_quoted = '"[^"]*"';
|
|
6
|
+
const attr_value = `(?:${unquoted}|${single_quoted}|${double_quoted})`;
|
|
7
|
+
const attribute = `(?:\\s+${attr_name}(?:\\s*=\\s*${attr_value})?)`;
|
|
8
|
+
const open_tag = `<[A-Za-z][A-Za-z0-9\\-]*${attribute}*\\s*\\/?>`;
|
|
9
|
+
const close_tag = '<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>';
|
|
10
|
+
const comment = '<!---?>|<!--(?:[^-]|-[^-]|--[^>])*-->';
|
|
11
|
+
const processing = '<\\?[\\s\\S]*?\\?>';
|
|
12
|
+
const declaration = '<![A-Za-z][^>]*>';
|
|
13
|
+
const cdata = '<!\\[CDATA\\[[\\s\\S]*?\\]\\]>';
|
|
14
|
+
// eslint-disable-next-line regexp/no-super-linear-backtracking, regexp/prefer-w
|
|
15
|
+
const HTML_TAG_RE = new RegExp(`^(?:${open_tag}|${close_tag}|${comment}|${processing}|${declaration}|${cdata})`);
|
|
16
|
+
// eslint-disable-next-line regexp/use-ignore-case, regexp/no-super-linear-backtracking, regexp/prefer-w
|
|
17
|
+
const HTML_OPEN_CLOSE_TAG_RE = new RegExp(`^(?:${open_tag}|${close_tag})`);
|
|
18
|
+
export { HTML_OPEN_CLOSE_TAG_RE, HTML_TAG_RE };
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { ComarkNode } from 'comark';
|
|
2
|
+
interface HtmlTagInfo {
|
|
3
|
+
tag: string;
|
|
4
|
+
attrs: Record<string, unknown>;
|
|
5
|
+
isVoid: boolean;
|
|
6
|
+
isClose: boolean;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Parse a single inline HTML tag fragment (opening, closing, or void).
|
|
10
|
+
* Returns null if the content is not a recognisable HTML tag.
|
|
11
|
+
*/
|
|
12
|
+
export declare function parseInlineHtmlTag(html: string): HtmlTagInfo | null;
|
|
13
|
+
/**
|
|
14
|
+
* Parse a full HTML string into ComarkNodes using htmlparser2.
|
|
15
|
+
* Handles nested elements, text, void elements, and comments.
|
|
16
|
+
*/
|
|
17
|
+
export declare function htmlToComarkNodes(html: string): ComarkNode[];
|
|
18
|
+
export {};
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import { Parser } from 'htmlparser2';
|
|
2
|
+
const VOID_ELEMENTS = new Set([
|
|
3
|
+
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input',
|
|
4
|
+
'link', 'meta', 'param', 'source', 'track', 'wbr',
|
|
5
|
+
]);
|
|
6
|
+
function attribsToComarkAttrs(attribs, isInline = false) {
|
|
7
|
+
const attrs = {
|
|
8
|
+
$: {
|
|
9
|
+
html: 1,
|
|
10
|
+
block: isInline ? 0 : 1,
|
|
11
|
+
},
|
|
12
|
+
};
|
|
13
|
+
for (const key in attribs) {
|
|
14
|
+
const value = attribs[key];
|
|
15
|
+
if (value === '') {
|
|
16
|
+
attrs[`:${key}`] = 'true';
|
|
17
|
+
}
|
|
18
|
+
else {
|
|
19
|
+
attrs[key] = value;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
return attrs;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Parse a single inline HTML tag fragment (opening, closing, or void).
|
|
26
|
+
* Returns null if the content is not a recognisable HTML tag.
|
|
27
|
+
*/
|
|
28
|
+
export function parseInlineHtmlTag(html) {
|
|
29
|
+
const trimmed = html.trim();
|
|
30
|
+
if (!trimmed.startsWith('<'))
|
|
31
|
+
return null;
|
|
32
|
+
// Fast path: closing tag
|
|
33
|
+
const closeMatch = trimmed.match(/^<\/([a-z][a-z0-9]*)\s*>/i);
|
|
34
|
+
if (closeMatch) {
|
|
35
|
+
return { tag: closeMatch[1].toLowerCase(), attrs: {}, isVoid: false, isClose: true };
|
|
36
|
+
}
|
|
37
|
+
let info = null;
|
|
38
|
+
const parser = new Parser({
|
|
39
|
+
onopentag(name, attribs) {
|
|
40
|
+
info = {
|
|
41
|
+
tag: name,
|
|
42
|
+
attrs: attribsToComarkAttrs(attribs, true),
|
|
43
|
+
isVoid: VOID_ELEMENTS.has(name),
|
|
44
|
+
isClose: false,
|
|
45
|
+
};
|
|
46
|
+
},
|
|
47
|
+
}, { decodeEntities: false });
|
|
48
|
+
parser.write(trimmed);
|
|
49
|
+
parser.end();
|
|
50
|
+
return info;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Parse a full HTML string into ComarkNodes using htmlparser2.
|
|
54
|
+
* Handles nested elements, text, void elements, and comments.
|
|
55
|
+
*/
|
|
56
|
+
export function htmlToComarkNodes(html) {
|
|
57
|
+
const root = [];
|
|
58
|
+
const stack = [];
|
|
59
|
+
const parser = new Parser({
|
|
60
|
+
onopentag(name, attribs) {
|
|
61
|
+
const attrs = attribsToComarkAttrs(attribs);
|
|
62
|
+
if (VOID_ELEMENTS.has(name)) {
|
|
63
|
+
const node = [name, attrs];
|
|
64
|
+
if (stack.length > 0) {
|
|
65
|
+
stack[stack.length - 1].children.push(node);
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
root.push(node);
|
|
69
|
+
}
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
stack.push({ tag: name, attrs, children: [] });
|
|
73
|
+
},
|
|
74
|
+
ontext(text) {
|
|
75
|
+
const trimmed = text.trim();
|
|
76
|
+
if (!trimmed)
|
|
77
|
+
return;
|
|
78
|
+
if (stack.length > 0) {
|
|
79
|
+
stack[stack.length - 1].children.push(trimmed);
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
root.push(trimmed);
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
onclosetag(name) {
|
|
86
|
+
if (VOID_ELEMENTS.has(name)) {
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
// Find matching frame (handles mismatched tags gracefully)
|
|
90
|
+
let idx = stack.length - 1;
|
|
91
|
+
while (idx >= 0 && stack[idx].tag !== name) {
|
|
92
|
+
idx--;
|
|
93
|
+
}
|
|
94
|
+
if (idx >= 0) {
|
|
95
|
+
while (stack.length > idx) {
|
|
96
|
+
const frame = stack.pop();
|
|
97
|
+
const node = frame.children.length > 0
|
|
98
|
+
? [frame.tag, frame.attrs, ...frame.children]
|
|
99
|
+
: [frame.tag, frame.attrs];
|
|
100
|
+
if (stack.length > 0) {
|
|
101
|
+
stack[stack.length - 1].children.push(node);
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
root.push(node);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
},
|
|
109
|
+
oncomment(data) {
|
|
110
|
+
const node = [null, {}, data];
|
|
111
|
+
if (stack.length > 0) {
|
|
112
|
+
stack[stack.length - 1].children.push(node);
|
|
113
|
+
}
|
|
114
|
+
else {
|
|
115
|
+
root.push(node);
|
|
116
|
+
}
|
|
117
|
+
},
|
|
118
|
+
}, { decodeEntities: true });
|
|
119
|
+
parser.write(html.trim());
|
|
120
|
+
parser.end();
|
|
121
|
+
return root;
|
|
122
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { ComarkTree } from '../../types';
|
|
2
|
+
/**
|
|
3
|
+
* Extracts reusable nodes from the last output tree
|
|
4
|
+
* @param markdown - The markdown to parse
|
|
5
|
+
* @param lastOutput - The last output tree
|
|
6
|
+
* @returns The reusable nodes and the remaining markdown
|
|
7
|
+
*/
|
|
8
|
+
export declare function extractReusableNodes(markdown: string, lastOutput: ComarkTree): {
|
|
9
|
+
remainingMarkdownStartLine: number;
|
|
10
|
+
reusedNodes: import("../..").ComarkNode[];
|
|
11
|
+
remainingMarkdown: string;
|
|
12
|
+
};
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extracts reusable nodes from the last output tree
|
|
3
|
+
* @param markdown - The markdown to parse
|
|
4
|
+
* @param lastOutput - The last output tree
|
|
5
|
+
* @returns The reusable nodes and the remaining markdown
|
|
6
|
+
*/
|
|
7
|
+
export function extractReusableNodes(markdown, lastOutput) {
|
|
8
|
+
let lastValidNodeIndex = -1;
|
|
9
|
+
let i = lastOutput.nodes.length - 1;
|
|
10
|
+
let lastNodeIgnored = false;
|
|
11
|
+
while (i >= 0) {
|
|
12
|
+
const node = lastOutput.nodes[i];
|
|
13
|
+
if (node[1] && node[1].$?.line) {
|
|
14
|
+
if (lastNodeIgnored) {
|
|
15
|
+
lastValidNodeIndex = i;
|
|
16
|
+
break;
|
|
17
|
+
}
|
|
18
|
+
else {
|
|
19
|
+
lastNodeIgnored = true;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
i--;
|
|
23
|
+
}
|
|
24
|
+
const lastNode = lastValidNodeIndex !== -1 ? lastOutput.nodes[lastValidNodeIndex] : null;
|
|
25
|
+
if (lastNode) {
|
|
26
|
+
const remainingMarkdownStartLine = lastNode[1].$?.line ?? 0;
|
|
27
|
+
return {
|
|
28
|
+
remainingMarkdownStartLine,
|
|
29
|
+
reusedNodes: lastOutput.nodes.slice(0, lastValidNodeIndex + 1),
|
|
30
|
+
remainingMarkdown: '\n' // Add back the new line character which will be remove by the slice and join
|
|
31
|
+
+ markdown.split('\n').slice(remainingMarkdownStartLine + 1).join('\n') || '',
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
return {
|
|
35
|
+
remainingMarkdownStartLine: 0,
|
|
36
|
+
remainingMarkdown: markdown,
|
|
37
|
+
reusedNodes: [],
|
|
38
|
+
};
|
|
39
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { ComarkNode } from 'comark';
|
|
2
|
+
/**
|
|
3
|
+
* Convert Markdown-It tokens to a Comark tree
|
|
4
|
+
*/
|
|
5
|
+
export declare function marmdownItTokensToComarkTree(tokens: any[], options?: {
|
|
6
|
+
startLine: number;
|
|
7
|
+
preservePositions: boolean;
|
|
8
|
+
}): ComarkNode[];
|
|
9
|
+
export declare function processInlineTokens(tokens: any[], inHeading?: boolean): ComarkNode[];
|