@sigx/lynx-markdown 0.4.5 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +114 -44
- package/dist/XMarkdown.d.ts +36 -0
- package/dist/{Markdown.js → XMarkdown.js} +9 -3
- package/dist/ast.d.ts +117 -0
- package/dist/ast.js +11 -0
- package/dist/index.d.ts +13 -2
- package/dist/index.js +13 -1
- package/dist/parser/blocks.d.ts +21 -0
- package/dist/parser/blocks.js +274 -0
- package/dist/parser/incremental.d.ts +26 -0
- package/dist/parser/incremental.js +65 -0
- package/dist/parser/inline.d.ts +16 -0
- package/dist/parser/inline.js +451 -0
- package/dist/parser/scanner.d.ts +73 -0
- package/dist/parser/scanner.js +219 -0
- package/dist/render/MarkdownView.d.ts +29 -0
- package/dist/render/MarkdownView.js +43 -0
- package/dist/render/components.d.ts +140 -0
- package/dist/render/components.js +99 -0
- package/dist/render/engine.d.ts +21 -0
- package/dist/render/engine.js +145 -0
- package/dist/stream.d.ts +42 -0
- package/dist/stream.js +69 -0
- package/package.json +9 -4
- package/dist/Markdown.d.ts +0 -30
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Block-level parser: source → `BlockNode[]`.
|
|
3
|
+
*
|
|
4
|
+
* Line-oriented with a small open-container model. The key streaming-safety
|
|
5
|
+
* rule lives here: a blank line only separates blocks at the top level and
|
|
6
|
+
* *outside* an open fenced code block — a blank line inside ``` does not close
|
|
7
|
+
* it. An unterminated fence parses as a `codeBlock` with `closed: false` and is
|
|
8
|
+
* still rendered, so streaming code dumps don't flicker when the closing fence
|
|
9
|
+
* finally arrives.
|
|
10
|
+
*
|
|
11
|
+
* Keys are assigned by the caller through a `KeyGen` so nested blocks (list
|
|
12
|
+
* items, blockquote contents) get stable, unique keys for reconciliation.
|
|
13
|
+
*/
|
|
14
|
+
import { parseInline } from './inline.js';
|
|
15
|
+
import { indentOf, isBlank, isFenceClose, isThematicBreak, matchEmptyListMarker, matchFence, matchHeading, matchListMarker, matchTableDelimiter, splitTableRow, } from './scanner.js';
|
|
16
|
+
export function makeKeyGen(prefix) {
|
|
17
|
+
let n = 0;
|
|
18
|
+
return { next: () => `${prefix}-${n++}` };
|
|
19
|
+
}
|
|
20
|
+
/** Parse a complete (or partial) source string into block nodes. */
|
|
21
|
+
export function parseBlocks(src, keys = makeKeyGen('b')) {
|
|
22
|
+
const lines = src.split('\n');
|
|
23
|
+
return parseLines(lines, keys);
|
|
24
|
+
}
|
|
25
|
+
function parseLines(lines, keys) {
|
|
26
|
+
const blocks = [];
|
|
27
|
+
let i = 0;
|
|
28
|
+
while (i < lines.length) {
|
|
29
|
+
if (isBlank(lines[i])) {
|
|
30
|
+
i++;
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
const line = lines[i];
|
|
34
|
+
// Fenced code block.
|
|
35
|
+
const fence = matchFence(line);
|
|
36
|
+
if (fence) {
|
|
37
|
+
const start = i;
|
|
38
|
+
const content = [];
|
|
39
|
+
i++;
|
|
40
|
+
let closed = false;
|
|
41
|
+
while (i < lines.length) {
|
|
42
|
+
if (isFenceClose(lines[i], fence)) {
|
|
43
|
+
closed = true;
|
|
44
|
+
i++;
|
|
45
|
+
break;
|
|
46
|
+
}
|
|
47
|
+
// Strip the fence's own indentation from content lines.
|
|
48
|
+
content.push(stripIndent(lines[i], fence.indent));
|
|
49
|
+
i++;
|
|
50
|
+
}
|
|
51
|
+
blocks.push({
|
|
52
|
+
type: 'codeBlock',
|
|
53
|
+
key: keys.next(),
|
|
54
|
+
raw: lines.slice(start, i).join('\n'),
|
|
55
|
+
...(fence.info ? { lang: fence.info.split(/\s+/)[0] } : {}),
|
|
56
|
+
value: content.join('\n'),
|
|
57
|
+
closed,
|
|
58
|
+
});
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
// ATX heading.
|
|
62
|
+
const heading = matchHeading(line);
|
|
63
|
+
if (heading) {
|
|
64
|
+
blocks.push({
|
|
65
|
+
type: 'heading',
|
|
66
|
+
key: keys.next(),
|
|
67
|
+
raw: line,
|
|
68
|
+
level: heading.level,
|
|
69
|
+
children: parseInline(heading.text),
|
|
70
|
+
});
|
|
71
|
+
i++;
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
// Thematic break (checked before list so `* * *` isn't a bullet).
|
|
75
|
+
if (isThematicBreak(line)) {
|
|
76
|
+
blocks.push({ type: 'thematicBreak', key: keys.next(), raw: line });
|
|
77
|
+
i++;
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
// Blockquote.
|
|
81
|
+
if (/^ {0,3}>/.test(line)) {
|
|
82
|
+
const start = i;
|
|
83
|
+
const inner = [];
|
|
84
|
+
while (i < lines.length && /^ {0,3}>/.test(lines[i])) {
|
|
85
|
+
inner.push(lines[i].replace(/^ {0,3}> ?/, ''));
|
|
86
|
+
i++;
|
|
87
|
+
}
|
|
88
|
+
blocks.push({
|
|
89
|
+
type: 'blockquote',
|
|
90
|
+
key: keys.next(),
|
|
91
|
+
raw: lines.slice(start, i).join('\n'),
|
|
92
|
+
children: parseLines(inner, keys),
|
|
93
|
+
});
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
// List.
|
|
97
|
+
const marker = matchListMarker(line) ?? matchEmptyListMarker(line);
|
|
98
|
+
if (marker) {
|
|
99
|
+
const result = parseList(lines, i, marker, keys);
|
|
100
|
+
blocks.push(result.block);
|
|
101
|
+
i = result.next;
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
// GFM table: a header line whose successor is a delimiter row.
|
|
105
|
+
if (line.indexOf('|') !== -1 && i + 1 < lines.length) {
|
|
106
|
+
const align = matchTableDelimiter(lines[i + 1]);
|
|
107
|
+
if (align) {
|
|
108
|
+
const result = parseTable(lines, i, align, keys);
|
|
109
|
+
blocks.push(result.block);
|
|
110
|
+
i = result.next;
|
|
111
|
+
continue;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
// Paragraph: consecutive non-blank lines until a block interrupts.
|
|
115
|
+
const start = i;
|
|
116
|
+
const para = [];
|
|
117
|
+
while (i < lines.length && !isBlank(lines[i]) && !interrupts(lines, i)) {
|
|
118
|
+
para.push(lines[i]);
|
|
119
|
+
i++;
|
|
120
|
+
}
|
|
121
|
+
blocks.push({
|
|
122
|
+
type: 'paragraph',
|
|
123
|
+
key: keys.next(),
|
|
124
|
+
raw: lines.slice(start, i).join('\n'),
|
|
125
|
+
children: parseInline(para.join('\n')),
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
return blocks;
|
|
129
|
+
}
|
|
130
|
+
/** True when the line at `i` starts a block that interrupts a paragraph. */
|
|
131
|
+
function interrupts(lines, i) {
|
|
132
|
+
const line = lines[i];
|
|
133
|
+
if (matchFence(line))
|
|
134
|
+
return true;
|
|
135
|
+
if (matchHeading(line))
|
|
136
|
+
return true;
|
|
137
|
+
if (isThematicBreak(line))
|
|
138
|
+
return true;
|
|
139
|
+
if (/^ {0,3}>/.test(line))
|
|
140
|
+
return true;
|
|
141
|
+
// An unordered/ordered-1 list marker interrupts; other ordered starts don't.
|
|
142
|
+
const m = matchListMarker(line);
|
|
143
|
+
if (m && (!m.ordered || m.start === 1))
|
|
144
|
+
return true;
|
|
145
|
+
if (line.indexOf('|') !== -1 && i + 1 < lines.length && matchTableDelimiter(lines[i + 1])) {
|
|
146
|
+
return true;
|
|
147
|
+
}
|
|
148
|
+
return false;
|
|
149
|
+
}
|
|
150
|
+
function stripIndent(line, n) {
|
|
151
|
+
let i = 0;
|
|
152
|
+
while (i < n && line[i] === ' ')
|
|
153
|
+
i++;
|
|
154
|
+
return line.slice(i);
|
|
155
|
+
}
|
|
156
|
+
// ---------------------------------------------------------------------------
|
|
157
|
+
// Lists
|
|
158
|
+
// ---------------------------------------------------------------------------
|
|
159
|
+
function sameList(a, b) {
|
|
160
|
+
return a.ordered === b.ordered && a.delimiter === b.delimiter;
|
|
161
|
+
}
|
|
162
|
+
function parseList(lines, start, first, keys) {
|
|
163
|
+
const items = [];
|
|
164
|
+
let i = start;
|
|
165
|
+
let tight = true;
|
|
166
|
+
let sawBlankBetween = false;
|
|
167
|
+
while (i < lines.length) {
|
|
168
|
+
const m = matchListMarker(lines[i]) ?? matchEmptyListMarker(lines[i]);
|
|
169
|
+
if (!m || !sameList(m, first) || m.markerIndent >= first.contentIndent)
|
|
170
|
+
break;
|
|
171
|
+
const itemLines = [m.content];
|
|
172
|
+
const contentIndent = m.contentIndent;
|
|
173
|
+
i++;
|
|
174
|
+
let pendingBlank = false;
|
|
175
|
+
while (i < lines.length) {
|
|
176
|
+
const cur = lines[i];
|
|
177
|
+
if (isBlank(cur)) {
|
|
178
|
+
pendingBlank = true;
|
|
179
|
+
itemLines.push('');
|
|
180
|
+
i++;
|
|
181
|
+
continue;
|
|
182
|
+
}
|
|
183
|
+
// A new marker for this list ends the current item.
|
|
184
|
+
const nm = matchListMarker(cur) ?? matchEmptyListMarker(cur);
|
|
185
|
+
if (nm && sameList(nm, first) && nm.markerIndent < contentIndent) {
|
|
186
|
+
if (pendingBlank)
|
|
187
|
+
sawBlankBetween = true;
|
|
188
|
+
break;
|
|
189
|
+
}
|
|
190
|
+
// Indented continuation (nested content, lazy paragraph, sub-list).
|
|
191
|
+
if (indentOf(cur) >= contentIndent) {
|
|
192
|
+
if (pendingBlank) {
|
|
193
|
+
tight = false;
|
|
194
|
+
pendingBlank = false;
|
|
195
|
+
}
|
|
196
|
+
itemLines.push(cur.slice(contentIndent));
|
|
197
|
+
i++;
|
|
198
|
+
continue;
|
|
199
|
+
}
|
|
200
|
+
// A blank line followed by an unindented, non-marker line ends the list.
|
|
201
|
+
if (pendingBlank) {
|
|
202
|
+
sawBlankBetween = true;
|
|
203
|
+
break;
|
|
204
|
+
}
|
|
205
|
+
// Lazy paragraph continuation (no indent, not a new block).
|
|
206
|
+
if (!interrupts(lines, i)) {
|
|
207
|
+
itemLines.push(cur);
|
|
208
|
+
i++;
|
|
209
|
+
continue;
|
|
210
|
+
}
|
|
211
|
+
break;
|
|
212
|
+
}
|
|
213
|
+
// Trim a trailing blank line captured at the item boundary.
|
|
214
|
+
while (itemLines.length && itemLines[itemLines.length - 1] === '')
|
|
215
|
+
itemLines.pop();
|
|
216
|
+
const { checked, lines: bodyLines } = extractTask(itemLines);
|
|
217
|
+
items.push({
|
|
218
|
+
key: keys.next(),
|
|
219
|
+
checked,
|
|
220
|
+
children: parseLines(bodyLines, keys),
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
if (sawBlankBetween)
|
|
224
|
+
tight = false;
|
|
225
|
+
return {
|
|
226
|
+
block: {
|
|
227
|
+
type: 'list',
|
|
228
|
+
key: keys.next(),
|
|
229
|
+
raw: lines.slice(start, i).join('\n'),
|
|
230
|
+
ordered: first.ordered,
|
|
231
|
+
start: first.start,
|
|
232
|
+
tight,
|
|
233
|
+
items,
|
|
234
|
+
},
|
|
235
|
+
next: i,
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
/** Detect and strip a GFM task-list marker (`[ ]` / `[x]`) from an item. */
|
|
239
|
+
function extractTask(itemLines) {
|
|
240
|
+
if (itemLines.length === 0)
|
|
241
|
+
return { checked: null, lines: itemLines };
|
|
242
|
+
const m = /^\[([ xX])\]\s+(.*)$/.exec(itemLines[0]);
|
|
243
|
+
if (!m)
|
|
244
|
+
return { checked: null, lines: itemLines };
|
|
245
|
+
const rest = [m[2], ...itemLines.slice(1)];
|
|
246
|
+
return { checked: m[1].toLowerCase() === 'x', lines: rest };
|
|
247
|
+
}
|
|
248
|
+
// ---------------------------------------------------------------------------
|
|
249
|
+
// Tables
|
|
250
|
+
// ---------------------------------------------------------------------------
|
|
251
|
+
function parseTable(lines, start, align, keys) {
|
|
252
|
+
const header = splitTableRow(lines[start]).map((c) => ({ children: parseInline(c) }));
|
|
253
|
+
let i = start + 2; // skip header + delimiter
|
|
254
|
+
const rows = [];
|
|
255
|
+
while (i < lines.length && !isBlank(lines[i]) && lines[i].indexOf('|') !== -1 && !interrupts(lines, i)) {
|
|
256
|
+
const cells = splitTableRow(lines[i]).map((c) => ({ children: parseInline(c) }));
|
|
257
|
+
// Normalize cell count to the header width.
|
|
258
|
+
while (cells.length < header.length)
|
|
259
|
+
cells.push({ children: [] });
|
|
260
|
+
rows.push(cells.slice(0, header.length));
|
|
261
|
+
i++;
|
|
262
|
+
}
|
|
263
|
+
return {
|
|
264
|
+
block: {
|
|
265
|
+
type: 'table',
|
|
266
|
+
key: keys.next(),
|
|
267
|
+
raw: lines.slice(start, i).join('\n'),
|
|
268
|
+
align,
|
|
269
|
+
header,
|
|
270
|
+
rows,
|
|
271
|
+
},
|
|
272
|
+
next: i,
|
|
273
|
+
};
|
|
274
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Incremental parsing engine for streaming markdown.
|
|
3
|
+
*
|
|
4
|
+
* The win for AI chat output: as the source string grows token-by-token, we
|
|
5
|
+
* never re-parse or re-render finalized content, and completed blocks keep a
|
|
6
|
+
* stable identity so the reconciler never remounts (no flicker / reflow).
|
|
7
|
+
*
|
|
8
|
+
* Core invariant: for an append-only stream, only the **last** top-level block
|
|
9
|
+
* can still change — appended text always extends the final block. So when a
|
|
10
|
+
* parse of the live tail yields N blocks, the first N−1 are finalized: they are
|
|
11
|
+
* cached by reference and never rebuilt, and the source cursor advances to the
|
|
12
|
+
* start of the last block. Each subsequent chunk re-parses only that trailing
|
|
13
|
+
* block region (≈ O(last block), not O(document)).
|
|
14
|
+
*
|
|
15
|
+
* Keys are the absolute block index (`b-<i>`), so a block keeps the same key
|
|
16
|
+
* when it transitions from live → finalized: the reconciler patches in place
|
|
17
|
+
* rather than remounting.
|
|
18
|
+
*/
|
|
19
|
+
import type { BlockNode } from '../ast.js';
|
|
20
|
+
export interface IncrementalEngine {
|
|
21
|
+
/** Parse `src`, reusing finalized blocks from prior calls where possible. */
|
|
22
|
+
parse(src: string): BlockNode[];
|
|
23
|
+
/** Drop all cached state (e.g. when the source is replaced, not appended). */
|
|
24
|
+
reset(): void;
|
|
25
|
+
}
|
|
26
|
+
export declare function createIncrementalEngine(): IncrementalEngine;
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Incremental parsing engine for streaming markdown.
|
|
3
|
+
*
|
|
4
|
+
* The win for AI chat output: as the source string grows token-by-token, we
|
|
5
|
+
* never re-parse or re-render finalized content, and completed blocks keep a
|
|
6
|
+
* stable identity so the reconciler never remounts (no flicker / reflow).
|
|
7
|
+
*
|
|
8
|
+
* Core invariant: for an append-only stream, only the **last** top-level block
|
|
9
|
+
* can still change — appended text always extends the final block. So when a
|
|
10
|
+
* parse of the live tail yields N blocks, the first N−1 are finalized: they are
|
|
11
|
+
* cached by reference and never rebuilt, and the source cursor advances to the
|
|
12
|
+
* start of the last block. Each subsequent chunk re-parses only that trailing
|
|
13
|
+
* block region (≈ O(last block), not O(document)).
|
|
14
|
+
*
|
|
15
|
+
* Keys are the absolute block index (`b-<i>`), so a block keeps the same key
|
|
16
|
+
* when it transitions from live → finalized: the reconciler patches in place
|
|
17
|
+
* rather than remounting.
|
|
18
|
+
*/
|
|
19
|
+
import { parseBlocks, makeKeyGen } from './blocks.js';
|
|
20
|
+
import { normalize } from './scanner.js';
|
|
21
|
+
export function createIncrementalEngine() {
|
|
22
|
+
/** Finalized blocks, frozen and reused by reference across chunks. */
|
|
23
|
+
let cached = [];
|
|
24
|
+
/** Source length (in normalized chars) already folded into `cached`. */
|
|
25
|
+
let cut = 0;
|
|
26
|
+
/** The normalized source prefix corresponding to `cached` (`src.slice(0, cut)`). */
|
|
27
|
+
let stableSource = '';
|
|
28
|
+
const reset = () => {
|
|
29
|
+
cached = [];
|
|
30
|
+
cut = 0;
|
|
31
|
+
stableSource = '';
|
|
32
|
+
};
|
|
33
|
+
const assignKey = (block, index) => {
|
|
34
|
+
block.key = `b-${index}`;
|
|
35
|
+
};
|
|
36
|
+
const parse = (src) => {
|
|
37
|
+
const norm = normalize(src ?? '');
|
|
38
|
+
// Non-append edit (regenerate, replace): the cached prefix no longer
|
|
39
|
+
// matches → full re-parse.
|
|
40
|
+
if (!norm.startsWith(stableSource))
|
|
41
|
+
reset();
|
|
42
|
+
const tail = norm.slice(cut);
|
|
43
|
+
const blocks = parseBlocks(tail, makeKeyGen(`p${cut}`));
|
|
44
|
+
if (blocks.length === 0) {
|
|
45
|
+
// Tail is empty / all-blank: nothing live, return finalized prefix.
|
|
46
|
+
return cached.slice();
|
|
47
|
+
}
|
|
48
|
+
// Finalize every block except the last (the only one that can still grow).
|
|
49
|
+
if (blocks.length > 1) {
|
|
50
|
+
const finalized = blocks.slice(0, -1);
|
|
51
|
+
finalized.forEach((b, i) => assignKey(b, cached.length + i));
|
|
52
|
+
cached = cached.concat(finalized);
|
|
53
|
+
const last = blocks[blocks.length - 1];
|
|
54
|
+
const idx = tail.lastIndexOf(last.raw);
|
|
55
|
+
cut += idx < 0 ? 0 : idx;
|
|
56
|
+
stableSource = norm.slice(0, cut);
|
|
57
|
+
assignKey(last, cached.length);
|
|
58
|
+
return cached.concat(last);
|
|
59
|
+
}
|
|
60
|
+
const live = blocks[0];
|
|
61
|
+
assignKey(live, cached.length);
|
|
62
|
+
return cached.concat(live);
|
|
63
|
+
};
|
|
64
|
+
return { parse, reset };
|
|
65
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inline tokenizer: a markdown text run → `InlineNode[]` (nested spans).
|
|
3
|
+
*
|
|
4
|
+
* Precedence (highest first): backtick code spans, backslash escapes, images,
|
|
5
|
+
* links, angle/bare autolinks, then emphasis (`**`/`__` strong, `*`/`_` em,
|
|
6
|
+
* `~~` del) resolved with a delimiter-run stack, then hard breaks, then text.
|
|
7
|
+
*
|
|
8
|
+
* Robustness for streaming: any unmatched construct at the tail (a lone `**`,
|
|
9
|
+
* a half-open `[text](`, an unterminated code span) degrades to literal text.
|
|
10
|
+
* The function never throws.
|
|
11
|
+
*/
|
|
12
|
+
import type { InlineNode } from '../ast.js';
|
|
13
|
+
/** Parse an inline text run into a node array. */
|
|
14
|
+
export declare function parseInline(input: string): InlineNode[];
|
|
15
|
+
/** Flatten inline nodes to their visible text (for image alt). */
|
|
16
|
+
export declare function stripFormatting(text: string): string;
|