@sigx/lynx-markdown 0.4.5 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,451 @@
1
+ /**
2
+ * Inline tokenizer: a markdown text run → `InlineNode[]` (nested spans).
3
+ *
4
+ * Precedence (highest first): backtick code spans, backslash escapes, images,
5
+ * links, angle/bare autolinks, then emphasis (`**`/`__` strong, `*`/`_` em,
6
+ * `~~` del) resolved with a delimiter-run stack, then hard breaks, then text.
7
+ *
8
+ * Robustness for streaming: any unmatched construct at the tail (a lone `**`,
9
+ * a half-open `[text](`, an unterminated code span) degrades to literal text.
10
+ * The function never throws.
11
+ */
12
+ import { isEscapable, sanitizeHref, trimAutolinkTail } from './scanner.js';
13
+ function isDelim(t) {
14
+ return t.delim === true;
15
+ }
16
+ const PUNCT = /[!-/:-@[-`{-~]/;
17
+ /** Parse an inline text run into a node array. */
18
+ export function parseInline(input) {
19
+ const tokens = tokenize(input);
20
+ resolveEmphasis(tokens, 0);
21
+ return coalesce(tokensToNodes(tokens));
22
+ }
23
+ // ---------------------------------------------------------------------------
24
+ // Tokenization
25
+ // ---------------------------------------------------------------------------
26
+ function tokenize(text) {
27
+ const tokens = [];
28
+ let buf = '';
29
+ const flush = () => {
30
+ if (buf) {
31
+ tokens.push({ type: 'text', value: buf });
32
+ buf = '';
33
+ }
34
+ };
35
+ let i = 0;
36
+ const n = text.length;
37
+ while (i < n) {
38
+ const ch = text[i];
39
+ // Backslash escape / hard break.
40
+ if (ch === '\\') {
41
+ const next = text[i + 1];
42
+ if (next === '\n') {
43
+ flush();
44
+ tokens.push({ type: 'br' });
45
+ i += 2;
46
+ continue;
47
+ }
48
+ if (next !== undefined && isEscapable(next)) {
49
+ buf += next;
50
+ i += 2;
51
+ continue;
52
+ }
53
+ buf += '\\';
54
+ i++;
55
+ continue;
56
+ }
57
+ // Code span.
58
+ if (ch === '`') {
59
+ const span = scanCodeSpan(text, i);
60
+ if (span) {
61
+ flush();
62
+ tokens.push({ type: 'codeSpan', value: span.value });
63
+ i = span.end;
64
+ continue;
65
+ }
66
+ buf += ch;
67
+ i++;
68
+ continue;
69
+ }
70
+ // Image.
71
+ if (ch === '!' && text[i + 1] === '[') {
72
+ const img = scanLink(text, i + 1);
73
+ if (img) {
74
+ flush();
75
+ tokens.push({
76
+ type: 'image',
77
+ src: sanitizeHref(img.href),
78
+ alt: stripFormatting(img.label),
79
+ ...(img.title ? { title: img.title } : {}),
80
+ });
81
+ i = img.end;
82
+ continue;
83
+ }
84
+ buf += ch;
85
+ i++;
86
+ continue;
87
+ }
88
+ // Link.
89
+ if (ch === '[') {
90
+ const link = scanLink(text, i);
91
+ if (link) {
92
+ flush();
93
+ tokens.push({
94
+ type: 'link',
95
+ href: sanitizeHref(link.href),
96
+ ...(link.title ? { title: link.title } : {}),
97
+ children: parseInline(link.label),
98
+ });
99
+ i = link.end;
100
+ continue;
101
+ }
102
+ buf += ch;
103
+ i++;
104
+ continue;
105
+ }
106
+ // Angle autolink: <scheme:...> or <email>.
107
+ if (ch === '<') {
108
+ const auto = scanAngleAutolink(text, i);
109
+ if (auto) {
110
+ flush();
111
+ tokens.push({ type: 'autolink', href: sanitizeHref(auto.href), value: auto.value });
112
+ i = auto.end;
113
+ continue;
114
+ }
115
+ buf += ch;
116
+ i++;
117
+ continue;
118
+ }
119
+ // Bare GFM autolink (http(s):// or www.) at a boundary.
120
+ if ((ch === 'h' || ch === 'w') && isBoundary(text[i - 1])) {
121
+ const auto = scanBareAutolink(text, i);
122
+ if (auto) {
123
+ flush();
124
+ tokens.push({ type: 'autolink', href: sanitizeHref(auto.href), value: auto.value });
125
+ i = auto.end;
126
+ continue;
127
+ }
128
+ }
129
+ // Soft line break → single space (paragraphs join their lines).
130
+ if (ch === '\n') {
131
+ // Two trailing spaces before the newline = hard break.
132
+ if (buf.endsWith(' ')) {
133
+ buf = buf.replace(/ +$/, '');
134
+ flush();
135
+ tokens.push({ type: 'br' });
136
+ }
137
+ else {
138
+ buf = buf.replace(/ +$/, '');
139
+ buf += ' ';
140
+ }
141
+ i++;
142
+ // Skip leading spaces of the continuation line.
143
+ while (i < n && text[i] === ' ')
144
+ i++;
145
+ continue;
146
+ }
147
+ // Emphasis / strikethrough delimiter run.
148
+ if (ch === '*' || ch === '_' || ch === '~') {
149
+ let j = i;
150
+ while (j < n && text[j] === ch)
151
+ j++;
152
+ const count = j - i;
153
+ // Single `~` is literal; only `~~` (or longer) marks strikethrough.
154
+ if (ch === '~' && count < 2) {
155
+ buf += text.slice(i, j);
156
+ i = j;
157
+ continue;
158
+ }
159
+ const before = text[i - 1] ?? ' ';
160
+ const after = text[j] ?? ' ';
161
+ const { canOpen, canClose } = flanking(ch, before, after);
162
+ if (!canOpen && !canClose) {
163
+ buf += text.slice(i, j);
164
+ }
165
+ else {
166
+ flush();
167
+ tokens.push({ delim: true, ch, count, canOpen, canClose });
168
+ }
169
+ i = j;
170
+ continue;
171
+ }
172
+ buf += ch;
173
+ i++;
174
+ }
175
+ flush();
176
+ return tokens;
177
+ }
178
+ /** Compute flanking rules for an emphasis delimiter run. */
179
+ function flanking(ch, before, after) {
180
+ const beforeWs = /\s/.test(before);
181
+ const afterWs = /\s/.test(after);
182
+ const beforePunct = PUNCT.test(before);
183
+ const afterPunct = PUNCT.test(after);
184
+ const leftFlanking = !afterWs && (!afterPunct || beforeWs || beforePunct);
185
+ const rightFlanking = !beforeWs && (!beforePunct || afterWs || afterPunct);
186
+ if (ch === '_') {
187
+ // Intraword underscore does not open/close emphasis.
188
+ return {
189
+ canOpen: leftFlanking && (!rightFlanking || beforePunct),
190
+ canClose: rightFlanking && (!leftFlanking || afterPunct),
191
+ };
192
+ }
193
+ return { canOpen: leftFlanking, canClose: rightFlanking };
194
+ }
195
+ function isBoundary(prev) {
196
+ return prev === undefined || /[\s(*_~]/.test(prev);
197
+ }
198
+ // ---------------------------------------------------------------------------
199
+ // Code spans, links, autolinks
200
+ // ---------------------------------------------------------------------------
201
+ function scanCodeSpan(text, start) {
202
+ let open = start;
203
+ while (text[open] === '`')
204
+ open++;
205
+ const ticks = open - start;
206
+ const close = text.indexOf('`'.repeat(ticks), open);
207
+ if (close === -1)
208
+ return null;
209
+ // Ensure the closing run is exactly `ticks` long (not part of a longer run).
210
+ if (text[close + ticks] === '`') {
211
+ // Longer run — find a run of exactly `ticks`.
212
+ let k = open;
213
+ while (k < text.length) {
214
+ const idx = text.indexOf('`'.repeat(ticks), k);
215
+ if (idx === -1)
216
+ return null;
217
+ if (text[idx - 1] !== '`' && text[idx + ticks] !== '`') {
218
+ return finishCodeSpan(text, open, idx, ticks);
219
+ }
220
+ k = idx + ticks;
221
+ }
222
+ return null;
223
+ }
224
+ return finishCodeSpan(text, open, close, ticks);
225
+ }
226
+ function finishCodeSpan(text, open, close, ticks) {
227
+ let value = text.slice(open, close).replace(/\n/g, ' ');
228
+ // Strip one leading and trailing space if the content is not all spaces.
229
+ if (value.length > 1 && value.startsWith(' ') && value.endsWith(' ') && value.trim() !== '') {
230
+ value = value.slice(1, -1);
231
+ }
232
+ return { value, end: close + ticks };
233
+ }
234
+ /** Scan `[label](href "title")` starting at the `[`. */
235
+ function scanLink(text, start) {
236
+ const labelEnd = findClosingBracket(text, start);
237
+ if (labelEnd === -1)
238
+ return null;
239
+ const label = text.slice(start + 1, labelEnd);
240
+ if (text[labelEnd + 1] !== '(')
241
+ return null;
242
+ const dest = scanLinkDest(text, labelEnd + 2);
243
+ if (!dest)
244
+ return null;
245
+ return { label, href: dest.href, ...(dest.title ? { title: dest.title } : {}), end: dest.end };
246
+ }
247
+ /** Find the `]` matching the `[` at `start`, honoring nesting and escapes. */
248
+ function findClosingBracket(text, start) {
249
+ let depth = 0;
250
+ for (let i = start; i < text.length; i++) {
251
+ const ch = text[i];
252
+ if (ch === '\\') {
253
+ i++;
254
+ continue;
255
+ }
256
+ if (ch === '`') {
257
+ const span = scanCodeSpan(text, i);
258
+ if (span) {
259
+ i = span.end - 1;
260
+ continue;
261
+ }
262
+ }
263
+ if (ch === '[')
264
+ depth++;
265
+ else if (ch === ']') {
266
+ depth--;
267
+ if (depth === 0)
268
+ return i;
269
+ }
270
+ }
271
+ return -1;
272
+ }
273
+ /** Parse the `href "title")` portion starting just after `(`. */
274
+ function scanLinkDest(text, start) {
275
+ let i = start;
276
+ while (text[i] === ' ' || text[i] === '\n')
277
+ i++;
278
+ let href = '';
279
+ if (text[i] === '<') {
280
+ i++;
281
+ while (i < text.length && text[i] !== '>' && text[i] !== '\n') {
282
+ href += text[i];
283
+ i++;
284
+ }
285
+ if (text[i] !== '>')
286
+ return null;
287
+ i++;
288
+ }
289
+ else {
290
+ let depth = 0;
291
+ while (i < text.length) {
292
+ const ch = text[i];
293
+ if (ch === '\\' && text[i + 1] !== undefined) {
294
+ href += text[i + 1];
295
+ i += 2;
296
+ continue;
297
+ }
298
+ if (ch === ' ' || ch === '\n')
299
+ break;
300
+ if (ch === '(')
301
+ depth++;
302
+ if (ch === ')') {
303
+ if (depth === 0)
304
+ break;
305
+ depth--;
306
+ }
307
+ href += ch;
308
+ i++;
309
+ }
310
+ }
311
+ // Optional title.
312
+ let title;
313
+ while (text[i] === ' ' || text[i] === '\n')
314
+ i++;
315
+ const q = text[i];
316
+ if (q === '"' || q === "'" || q === '(') {
317
+ const close = q === '(' ? ')' : q;
318
+ i++;
319
+ let t = '';
320
+ while (i < text.length && text[i] !== close) {
321
+ if (text[i] === '\\' && text[i + 1] !== undefined) {
322
+ t += text[i + 1];
323
+ i += 2;
324
+ continue;
325
+ }
326
+ t += text[i];
327
+ i++;
328
+ }
329
+ if (text[i] !== close)
330
+ return null;
331
+ title = t;
332
+ i++;
333
+ }
334
+ while (text[i] === ' ' || text[i] === '\n')
335
+ i++;
336
+ if (text[i] !== ')')
337
+ return null;
338
+ return { href, ...(title !== undefined ? { title } : {}), end: i + 1 };
339
+ }
340
+ function scanAngleAutolink(text, start) {
341
+ const close = text.indexOf('>', start);
342
+ if (close === -1)
343
+ return null;
344
+ const inner = text.slice(start + 1, close);
345
+ if (/\s/.test(inner) || inner === '')
346
+ return null;
347
+ if (/^[a-z][a-z0-9+.-]*:/i.test(inner)) {
348
+ return { href: inner, value: inner, end: close + 1 };
349
+ }
350
+ if (/^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(inner)) {
351
+ return { href: `mailto:${inner}`, value: inner, end: close + 1 };
352
+ }
353
+ return null;
354
+ }
355
+ function scanBareAutolink(text, start) {
356
+ const rest = text.slice(start);
357
+ const m = /^(https?:\/\/|www\.)[^\s<]+/i.exec(rest);
358
+ if (!m)
359
+ return null;
360
+ const raw = m[0];
361
+ const { url } = trimAutolinkTail(raw);
362
+ // Require something after the scheme/host prefix.
363
+ if (url.length <= m[1].length)
364
+ return null;
365
+ const href = url.toLowerCase().startsWith('www.') ? `http://${url}` : url;
366
+ return { href, value: url, end: start + url.length };
367
+ }
368
+ // ---------------------------------------------------------------------------
369
+ // Emphasis resolution (delimiter stack)
370
+ // ---------------------------------------------------------------------------
371
+ function resolveEmphasis(tokens, lo) {
372
+ // Walk closers left→right; for each, find the nearest compatible opener.
373
+ let i = lo;
374
+ while (i < tokens.length) {
375
+ const closer = tokens[i];
376
+ if (!isDelim(closer) || !closer.canClose || closer.count === 0) {
377
+ i++;
378
+ continue;
379
+ }
380
+ let j = i - 1;
381
+ let matched = false;
382
+ while (j >= lo) {
383
+ const opener = tokens[j];
384
+ if (isDelim(opener) && opener.ch === closer.ch && opener.canOpen && opener.count > 0) {
385
+ // "Rule of 3": when sum is a multiple of 3, both must be too —
386
+ // approximated by allowing the match (good enough for our scope).
387
+ const use = closer.count >= 2 && opener.count >= 2 ? 2 : 1;
388
+ const inner = tokens.slice(j + 1, i).filter((t) => !isDelim(t));
389
+ const node = closer.ch === '~'
390
+ ? { type: 'del', children: coalesce(inner) }
391
+ : use === 2
392
+ ? { type: 'strong', children: coalesce(inner) }
393
+ : { type: 'em', children: coalesce(inner) };
394
+ // Replace [opener..closer] region: keep leftover opener/closer counts.
395
+ opener.count -= use;
396
+ closer.count -= use;
397
+ const removeStart = j + 1;
398
+ const removeEnd = i; // exclusive of closer
399
+ tokens.splice(removeStart, removeEnd - removeStart, node);
400
+ // Recompute index of closer after splice.
401
+ i = removeStart + 1;
402
+ if (opener.count === 0) {
403
+ // Leave the empty opener; tokensToNodes ignores count-0 delims.
404
+ }
405
+ matched = true;
406
+ break;
407
+ }
408
+ j--;
409
+ }
410
+ if (!matched)
411
+ i++;
412
+ }
413
+ }
414
+ // ---------------------------------------------------------------------------
415
+ // Finalization
416
+ // ---------------------------------------------------------------------------
417
+ function tokensToNodes(tokens) {
418
+ const out = [];
419
+ for (const t of tokens) {
420
+ if (isDelim(t)) {
421
+ // Unmatched / leftover delimiter run → literal text.
422
+ if (t.count > 0)
423
+ out.push({ type: 'text', value: t.ch.repeat(t.count) });
424
+ }
425
+ else {
426
+ out.push(t);
427
+ }
428
+ }
429
+ return out;
430
+ }
431
+ /** Merge adjacent text nodes; drop empty ones. */
432
+ function coalesce(nodes) {
433
+ const out = [];
434
+ for (const node of nodes) {
435
+ if (node.type === 'text') {
436
+ if (node.value === '')
437
+ continue;
438
+ const last = out[out.length - 1];
439
+ if (last && last.type === 'text') {
440
+ last.value += node.value;
441
+ continue;
442
+ }
443
+ }
444
+ out.push(node);
445
+ }
446
+ return out;
447
+ }
448
+ /** Flatten inline nodes to their visible text (for image alt). */
449
+ export function stripFormatting(text) {
450
+ return text.replace(/[*_~`[\]]/g, '');
451
+ }
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Shared low-level scanning helpers: source normalization, line classification,
3
+ * escape handling, and URL scanning. Pure functions, no AST/JSX dependency.
4
+ */
5
+ import type { HeadingLevel, TableAlign } from '../ast.js';
6
+ /**
7
+ * Normalize source for uniform parsing: CRLF/CR → LF, and expand leading tabs
8
+ * to four spaces so indentation logic is consistent. Tabs after the first
9
+ * non-space content are left alone (they only matter for block indentation).
10
+ */
11
+ export declare function normalize(src: string): string;
12
+ /** Count leading spaces (indentation) of a line. */
13
+ export declare function indentOf(line: string): number;
14
+ /** True when a line is blank (empty or whitespace-only). */
15
+ export declare function isBlank(line: string): boolean;
16
+ export interface FenceInfo {
17
+ /** The fence character: '`' or '~'. */
18
+ char: string;
19
+ /** Length of the fence run (≥ 3). */
20
+ length: number;
21
+ /** Indentation of the fence (stripped from content lines). */
22
+ indent: number;
23
+ /** Trimmed info string (language) after the opening fence, if any. */
24
+ info: string;
25
+ }
26
+ /** Match an opening (or closing) fenced-code-block marker. */
27
+ export declare function matchFence(line: string): FenceInfo | null;
28
+ /** True when `line` closes a fence opened by `open`. */
29
+ export declare function isFenceClose(line: string, open: FenceInfo): boolean;
30
+ /** Match an ATX heading (`#`..`######`). Returns level + text, or null. */
31
+ export declare function matchHeading(line: string): {
32
+ level: HeadingLevel;
33
+ text: string;
34
+ } | null;
35
+ /** Match a thematic break (`---`, `***`, `___`, optionally spaced). */
36
+ export declare function isThematicBreak(line: string): boolean;
37
+ export interface ListMarkerInfo {
38
+ ordered: boolean;
39
+ /** Start number (ordered) or 1 (unordered). */
40
+ start: number;
41
+ /** The bullet/delimiter char: '-', '*', '+', '.', or ')'. */
42
+ delimiter: string;
43
+ /** Column where the item content begins (used for continuation indent). */
44
+ contentIndent: number;
45
+ /** Indentation of the marker itself. */
46
+ markerIndent: number;
47
+ /** The content following the marker on the same line. */
48
+ content: string;
49
+ }
50
+ /** Match a list-item marker (`- `, `* `, `+ `, `1. `, `1) `). */
51
+ export declare function matchListMarker(line: string): ListMarkerInfo | null;
52
+ /** Match an empty list item (`-` / `1.` with nothing after). */
53
+ export declare function matchEmptyListMarker(line: string): ListMarkerInfo | null;
54
+ /**
55
+ * Parse a GFM table delimiter row (`| --- | :--: | --: |`) into per-column
56
+ * alignment, or return null if the line is not a valid delimiter row.
57
+ */
58
+ export declare function matchTableDelimiter(line: string): (TableAlign | null)[] | null;
59
+ /** Split a table row into raw cell strings, honoring `\|` escapes. */
60
+ export declare function splitTableRow(line: string): string[];
61
+ /** True when `ch` is a backslash-escapable punctuation character. */
62
+ export declare function isEscapable(ch: string): boolean;
63
+ /**
64
+ * Return a link href if it is safe to expose to handlers, else `'#'`. Blocks
65
+ * `javascript:`/`data:` and other unexpected schemes. Relative/anchor links and
66
+ * the recognized safe schemes pass through unchanged.
67
+ */
68
+ export declare function sanitizeHref(href: string): string;
69
+ /** Trailing punctuation trimmed from bare-URL autolinks (GFM behavior). */
70
+ export declare function trimAutolinkTail(url: string): {
71
+ url: string;
72
+ tail: string;
73
+ };