@markuplint/markdown-parser 5.0.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +190 -0
- package/CHANGELOG.md +10 -0
- package/LICENSE +21 -0
- package/README.ja.md +47 -0
- package/README.md +47 -0
- package/lib/index.d.ts +6 -0
- package/lib/index.js +6 -0
- package/lib/markdown-aware-parser.d.ts +179 -0
- package/lib/markdown-aware-parser.js +529 -0
- package/lib/parser.d.ts +40 -0
- package/lib/parser.js +91 -0
- package/package.json +39 -0
- package/src/index.spec.ts +747 -0
- package/src/index.ts +7 -0
- package/src/markdown-aware-parser.ts +656 -0
- package/src/parser.ts +109 -0
- package/tsconfig.build.json +9 -0
- package/tsconfig.build.tsbuildinfo +1 -0
- package/tsconfig.json +17 -0
package/src/index.ts
ADDED
|
@@ -0,0 +1,656 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
MLASTAttr,
|
|
3
|
+
MLASTElement,
|
|
4
|
+
MLASTHTMLAttr,
|
|
5
|
+
MLASTNodeTreeItem,
|
|
6
|
+
MLASTParentNode,
|
|
7
|
+
MLASTText,
|
|
8
|
+
} from '@markuplint/ml-ast';
|
|
9
|
+
import type { ParserOptions, Token } from '@markuplint/parser-utils';
|
|
10
|
+
import type {
|
|
11
|
+
Code,
|
|
12
|
+
Definition,
|
|
13
|
+
Image,
|
|
14
|
+
ImageReference,
|
|
15
|
+
InlineCode,
|
|
16
|
+
Link,
|
|
17
|
+
LinkReference,
|
|
18
|
+
List,
|
|
19
|
+
RootContent,
|
|
20
|
+
Table,
|
|
21
|
+
} from 'mdast';
|
|
22
|
+
|
|
23
|
+
import { Parser, getNamespace } from '@markuplint/parser-utils';
|
|
24
|
+
|
|
25
|
+
type MdastNode = RootContent;
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Abstract base class for parsers that handle Markdown content.
|
|
29
|
+
*
|
|
30
|
+
* Provides shared logic for converting mdast nodes (headings, links, images,
|
|
31
|
+
* lists, code, tables, etc.) into markuplint's AST. Both MarkdownParser and
|
|
32
|
+
* MDXParser extend this class to avoid code duplication.
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
export abstract class MarkdownAwareParser extends Parser<MdastNode> {
|
|
36
|
+
/**
|
|
37
|
+
* Stores link/image reference definitions (`[id]: url "title"`)
|
|
38
|
+
* extracted during tokenization for resolving linkReference/imageReference nodes.
|
|
39
|
+
*/
|
|
40
|
+
protected definitions = new Map<string, Definition>();
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Offsets of table rows that are header rows (first row of each table).
|
|
44
|
+
* Set by visitTableElement, read by nodeizeMarkdownNode for tableRow dispatch.
|
|
45
|
+
*/
|
|
46
|
+
readonly #headerRowOffsets = new Set<number>();
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Current cell element name ('th' or 'td').
|
|
50
|
+
* Set by tableRow processing, read by tableCell processing.
|
|
51
|
+
* Reset to 'td' after each row.
|
|
52
|
+
*/
|
|
53
|
+
#currentCellName: 'th' | 'td' = 'td';
|
|
54
|
+
|
|
55
|
+
constructor(options?: ParserOptions) {
|
|
56
|
+
super(options);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Resets mutable state accumulated during a previous `parse()` call.
|
|
61
|
+
*
|
|
62
|
+
* Must be called at the beginning of every `tokenize()` invocation to
|
|
63
|
+
* prevent definitions, header-row offsets, and cell-name state from
|
|
64
|
+
* leaking across successive `parse()` calls on the same parser instance.
|
|
65
|
+
*/
|
|
66
|
+
protected resetMarkdownState() {
|
|
67
|
+
this.definitions.clear();
|
|
68
|
+
this.#headerRowOffsets.clear();
|
|
69
|
+
this.#currentCellName = 'td';
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Adjusts the flattened node list for Markdown output.
|
|
74
|
+
*
|
|
75
|
+
* Disables whitespace and invalid-node exposure because Markdown
|
|
76
|
+
* generates only synthetic elements with no real HTML whitespace tokens.
|
|
77
|
+
*
|
|
78
|
+
* @param nodeList - The flattened node tree produced by the base class.
|
|
79
|
+
* @returns The adjusted node list.
|
|
80
|
+
*/
|
|
81
|
+
afterFlattenNodes(nodeList: readonly MLASTNodeTreeItem[]) {
|
|
82
|
+
return super.afterFlattenNodes(nodeList, {
|
|
83
|
+
exposeWhiteSpace: false,
|
|
84
|
+
exposeInvalidNode: false,
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Creates a synthetic HTML attribute token for Markdown-derived elements.
|
|
90
|
+
*
|
|
91
|
+
* The attribute positions point to the element's own token range because
|
|
92
|
+
* Markdown syntax does not have discrete attribute source positions.
|
|
93
|
+
*
|
|
94
|
+
* @param name - The attribute name (e.g., `"href"`, `"alt"`).
|
|
95
|
+
* @param value - The attribute value extracted from Markdown syntax.
|
|
96
|
+
* @param token - The source token whose position is reused for the attribute.
|
|
97
|
+
* @returns A fully-formed HTML attribute node.
|
|
98
|
+
*/
|
|
99
|
+
protected createSyntheticAttr(name: string, value: string, token: Token): MLASTHTMLAttr {
|
|
100
|
+
const emptyToken = this.createToken('', token.offset, token.line, token.col);
|
|
101
|
+
const nameToken = this.createToken(name, token.offset, token.line, token.col);
|
|
102
|
+
const valueToken = this.createToken(value, token.offset, token.line, token.col);
|
|
103
|
+
const attrToken = this.createToken(`${name}="${value}"`, token.offset, token.line, token.col);
|
|
104
|
+
|
|
105
|
+
return {
|
|
106
|
+
...attrToken,
|
|
107
|
+
type: 'attr',
|
|
108
|
+
nodeName: name,
|
|
109
|
+
spacesBeforeName: emptyToken,
|
|
110
|
+
name: nameToken,
|
|
111
|
+
spacesBeforeEqual: emptyToken,
|
|
112
|
+
equal: this.createToken('=', token.offset, token.line, token.col),
|
|
113
|
+
spacesAfterEqual: emptyToken,
|
|
114
|
+
startQuote: this.createToken('"', token.offset, token.line, token.col),
|
|
115
|
+
value: valueToken,
|
|
116
|
+
endQuote: this.createToken('"', token.offset, token.line, token.col),
|
|
117
|
+
isDuplicatable: false,
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Builds a generic HTML element node from a Markdown construct.
|
|
123
|
+
*
|
|
124
|
+
* @param token - The source token covering the entire construct.
|
|
125
|
+
* @param nodeName - The HTML element name (e.g., `"p"`, `"h1"`, `"li"`).
|
|
126
|
+
* @param childNodes - The mdast children to recurse into.
|
|
127
|
+
* @param depth - Current nesting depth in the AST.
|
|
128
|
+
* @param parentNode - Parent AST node, or `null` for top-level nodes.
|
|
129
|
+
* @param attributes - Optional pre-built attributes to attach.
|
|
130
|
+
* @returns The element node followed by its descendants.
|
|
131
|
+
*/
|
|
132
|
+
protected visitMarkdownElement(
|
|
133
|
+
token: Token,
|
|
134
|
+
nodeName: string,
|
|
135
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
136
|
+
childNodes: readonly MdastNode[],
|
|
137
|
+
depth: number,
|
|
138
|
+
parentNode: MLASTParentNode | null,
|
|
139
|
+
attributes: readonly MLASTAttr[] = [],
|
|
140
|
+
): readonly MLASTNodeTreeItem[] {
|
|
141
|
+
const startTag: MLASTElement = {
|
|
142
|
+
...token,
|
|
143
|
+
...this.createToken(token),
|
|
144
|
+
attributes: [...attributes],
|
|
145
|
+
type: 'starttag',
|
|
146
|
+
elementType: this.detectElementType(nodeName),
|
|
147
|
+
namespace: getNamespace(nodeName, parentNode),
|
|
148
|
+
childNodes: [],
|
|
149
|
+
blockBehavior: null,
|
|
150
|
+
depth,
|
|
151
|
+
parentNode,
|
|
152
|
+
pairNode: null,
|
|
153
|
+
tagOpenChar: '',
|
|
154
|
+
tagCloseChar: '',
|
|
155
|
+
isGhost: false,
|
|
156
|
+
isFragment: false,
|
|
157
|
+
nodeName,
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
// Safe cast: childNodes are always subtypes of RootContent (= MdastNode)
|
|
161
|
+
const siblings = this.visitChildren([...childNodes] as MdastNode[], startTag);
|
|
162
|
+
|
|
163
|
+
return [startTag, ...siblings];
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Builds an `<a>` element with `href` (and optionally `title`) attributes.
|
|
168
|
+
*
|
|
169
|
+
* @param originNode - The mdast `link` node.
|
|
170
|
+
* @param token - The source token covering the link.
|
|
171
|
+
* @param depth - Current nesting depth.
|
|
172
|
+
* @param parentNode - Parent AST node, or `null` for top-level.
|
|
173
|
+
* @returns The `<a>` element node and its descendants.
|
|
174
|
+
*/
|
|
175
|
+
protected visitLinkElement(
|
|
176
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
177
|
+
originNode: Link,
|
|
178
|
+
token: Token,
|
|
179
|
+
depth: number,
|
|
180
|
+
parentNode: MLASTParentNode | null,
|
|
181
|
+
): readonly MLASTNodeTreeItem[] {
|
|
182
|
+
const attrs: MLASTHTMLAttr[] = [this.createSyntheticAttr('href', originNode.url, token)];
|
|
183
|
+
|
|
184
|
+
if (originNode.title != null) {
|
|
185
|
+
attrs.push(this.createSyntheticAttr('title', originNode.title, token));
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return this.visitMarkdownElement(token, 'a', originNode.children, depth, parentNode, attrs);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Builds an `<img>` element with `src`, `alt`, and optionally `title` attributes.
|
|
193
|
+
*
|
|
194
|
+
* @param originNode - The mdast `image` node.
|
|
195
|
+
* @param token - The source token covering the image.
|
|
196
|
+
* @param depth - Current nesting depth.
|
|
197
|
+
* @param parentNode - Parent AST node, or `null` for top-level.
|
|
198
|
+
* @returns The `<img>` element node.
|
|
199
|
+
*/
|
|
200
|
+
protected visitImageElement(
|
|
201
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
202
|
+
originNode: Image,
|
|
203
|
+
token: Token,
|
|
204
|
+
depth: number,
|
|
205
|
+
parentNode: MLASTParentNode | null,
|
|
206
|
+
): readonly MLASTNodeTreeItem[] {
|
|
207
|
+
const attrs: MLASTHTMLAttr[] = [
|
|
208
|
+
this.createSyntheticAttr('src', originNode.url, token),
|
|
209
|
+
this.createSyntheticAttr('alt', originNode.alt ?? '', token),
|
|
210
|
+
];
|
|
211
|
+
|
|
212
|
+
if (originNode.title != null) {
|
|
213
|
+
attrs.push(this.createSyntheticAttr('title', originNode.title, token));
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
return this.visitMarkdownElement(token, 'img', [], depth, parentNode, attrs);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Builds a `<ul>` or `<ol>` element. Adds a `start` attribute when the
|
|
221
|
+
* ordered list begins at a number other than 1.
|
|
222
|
+
*
|
|
223
|
+
* @param originNode - The mdast `list` node.
|
|
224
|
+
* @param token - The source token covering the list.
|
|
225
|
+
* @param depth - Current nesting depth.
|
|
226
|
+
* @param parentNode - Parent AST node, or `null` for top-level.
|
|
227
|
+
* @returns The list element node and its descendants.
|
|
228
|
+
*/
|
|
229
|
+
protected visitListElement(
|
|
230
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
231
|
+
originNode: List,
|
|
232
|
+
token: Token,
|
|
233
|
+
depth: number,
|
|
234
|
+
parentNode: MLASTParentNode | null,
|
|
235
|
+
): readonly MLASTNodeTreeItem[] {
|
|
236
|
+
const nodeName = originNode.ordered ? 'ol' : 'ul';
|
|
237
|
+
const attrs: MLASTHTMLAttr[] = [];
|
|
238
|
+
|
|
239
|
+
if (originNode.ordered && originNode.start != null && originNode.start !== 1) {
|
|
240
|
+
attrs.push(this.createSyntheticAttr('start', String(originNode.start), token));
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return this.visitMarkdownElement(token, nodeName, originNode.children, depth, parentNode, attrs);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Builds a `<code>` element for inline code spans (backtick-delimited).
|
|
248
|
+
*
|
|
249
|
+
* @param originNode - The mdast `inlineCode` node.
|
|
250
|
+
* @param token - The source token covering the code span.
|
|
251
|
+
* @param offset - Start offset in the original source.
|
|
252
|
+
* @param endOffset - End offset in the original source.
|
|
253
|
+
* @param depth - Current nesting depth.
|
|
254
|
+
* @param parentNode - Parent AST node, or `null` for top-level.
|
|
255
|
+
* @returns The `<code>` element node (with a text child when content is found).
|
|
256
|
+
*/
|
|
257
|
+
protected visitInlineCode(
|
|
258
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
259
|
+
originNode: InlineCode,
|
|
260
|
+
token: Token,
|
|
261
|
+
offset: number,
|
|
262
|
+
endOffset: number,
|
|
263
|
+
depth: number,
|
|
264
|
+
parentNode: MLASTParentNode | null,
|
|
265
|
+
): readonly MLASTNodeTreeItem[] {
|
|
266
|
+
const startTag: MLASTElement = {
|
|
267
|
+
...token,
|
|
268
|
+
...this.createToken(token),
|
|
269
|
+
attributes: [],
|
|
270
|
+
type: 'starttag',
|
|
271
|
+
elementType: this.detectElementType('code'),
|
|
272
|
+
namespace: getNamespace('code', parentNode),
|
|
273
|
+
childNodes: [],
|
|
274
|
+
blockBehavior: null,
|
|
275
|
+
depth,
|
|
276
|
+
parentNode,
|
|
277
|
+
pairNode: null,
|
|
278
|
+
tagOpenChar: '',
|
|
279
|
+
tagCloseChar: '',
|
|
280
|
+
isGhost: false,
|
|
281
|
+
isFragment: false,
|
|
282
|
+
nodeName: 'code',
|
|
283
|
+
};
|
|
284
|
+
|
|
285
|
+
const raw = this.rawCode.slice(offset, endOffset);
|
|
286
|
+
const valueStart = raw.indexOf(originNode.value);
|
|
287
|
+
// Defensive guard: if value cannot be found in raw source (e.g., whitespace-only code spans) or is empty
|
|
288
|
+
if (valueStart === -1 || originNode.value.length === 0) {
|
|
289
|
+
return [startTag];
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
const valueOffset = offset + valueStart;
|
|
293
|
+
const valueEndOffset = valueOffset + originNode.value.length;
|
|
294
|
+
const textToken = this.sliceFragment(valueOffset, valueEndOffset);
|
|
295
|
+
|
|
296
|
+
const textNode: MLASTText = {
|
|
297
|
+
...textToken,
|
|
298
|
+
...this.createToken(textToken),
|
|
299
|
+
type: 'text',
|
|
300
|
+
depth: depth + 1,
|
|
301
|
+
nodeName: '#text',
|
|
302
|
+
parentNode: startTag,
|
|
303
|
+
};
|
|
304
|
+
|
|
305
|
+
this.appendChild(startTag, textNode);
|
|
306
|
+
|
|
307
|
+
return [startTag];
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* Builds a `<pre><code>` structure for fenced code blocks.
|
|
312
|
+
* When a language is specified, adds `class="language-{lang}"` to the `<code>` element.
|
|
313
|
+
*
|
|
314
|
+
* @param originNode - The mdast `code` node.
|
|
315
|
+
* @param token - The source token covering the fenced block.
|
|
316
|
+
* @param depth - Current nesting depth.
|
|
317
|
+
* @param parentNode - Parent AST node, or `null` for top-level.
|
|
318
|
+
* @returns The `<pre>` and `<code>` element nodes.
|
|
319
|
+
*/
|
|
320
|
+
protected visitCodeBlock(
|
|
321
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
322
|
+
originNode: Code,
|
|
323
|
+
token: Token,
|
|
324
|
+
depth: number,
|
|
325
|
+
parentNode: MLASTParentNode | null,
|
|
326
|
+
): readonly MLASTNodeTreeItem[] {
|
|
327
|
+
// Build <pre> element
|
|
328
|
+
const preTag: MLASTElement = {
|
|
329
|
+
...token,
|
|
330
|
+
...this.createToken(token),
|
|
331
|
+
attributes: [],
|
|
332
|
+
type: 'starttag',
|
|
333
|
+
elementType: this.detectElementType('pre'),
|
|
334
|
+
namespace: getNamespace('pre', parentNode),
|
|
335
|
+
childNodes: [],
|
|
336
|
+
blockBehavior: null,
|
|
337
|
+
depth,
|
|
338
|
+
parentNode,
|
|
339
|
+
pairNode: null,
|
|
340
|
+
tagOpenChar: '',
|
|
341
|
+
tagCloseChar: '',
|
|
342
|
+
isGhost: false,
|
|
343
|
+
isFragment: false,
|
|
344
|
+
nodeName: 'pre',
|
|
345
|
+
};
|
|
346
|
+
|
|
347
|
+
// Build <code> element as child of <pre>
|
|
348
|
+
const codeAttrs: MLASTHTMLAttr[] = [];
|
|
349
|
+
if (originNode.lang) {
|
|
350
|
+
codeAttrs.push(this.createSyntheticAttr('class', `language-${originNode.lang}`, token));
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
const codeTag: MLASTElement = {
|
|
354
|
+
...token,
|
|
355
|
+
...this.createToken(token),
|
|
356
|
+
attributes: codeAttrs,
|
|
357
|
+
type: 'starttag',
|
|
358
|
+
elementType: this.detectElementType('code'),
|
|
359
|
+
namespace: getNamespace('code', preTag),
|
|
360
|
+
childNodes: [],
|
|
361
|
+
blockBehavior: null,
|
|
362
|
+
depth: depth + 1,
|
|
363
|
+
parentNode: preTag,
|
|
364
|
+
pairNode: null,
|
|
365
|
+
tagOpenChar: '',
|
|
366
|
+
tagCloseChar: '',
|
|
367
|
+
isGhost: false,
|
|
368
|
+
isFragment: false,
|
|
369
|
+
nodeName: 'code',
|
|
370
|
+
};
|
|
371
|
+
|
|
372
|
+
// Add code content as text node if present
|
|
373
|
+
if (originNode.value.length > 0) {
|
|
374
|
+
const position = originNode.position;
|
|
375
|
+
if (position) {
|
|
376
|
+
const rawContent = this.rawCode.slice(position.start.offset ?? 0, position.end.offset ?? 0);
|
|
377
|
+
const valueStart = rawContent.indexOf(originNode.value);
|
|
378
|
+
if (valueStart !== -1) {
|
|
379
|
+
const valueOffset = (position.start.offset ?? 0) + valueStart;
|
|
380
|
+
const valueEndOffset = valueOffset + originNode.value.length;
|
|
381
|
+
const textToken = this.sliceFragment(valueOffset, valueEndOffset);
|
|
382
|
+
|
|
383
|
+
const textNode: MLASTText = {
|
|
384
|
+
...textToken,
|
|
385
|
+
...this.createToken(textToken),
|
|
386
|
+
type: 'text',
|
|
387
|
+
depth: depth + 2,
|
|
388
|
+
nodeName: '#text',
|
|
389
|
+
parentNode: codeTag,
|
|
390
|
+
};
|
|
391
|
+
|
|
392
|
+
this.appendChild(codeTag, textNode);
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
this.appendChild(preTag, codeTag);
|
|
398
|
+
|
|
399
|
+
return [preTag, codeTag];
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
/**
|
|
403
|
+
* Builds a `<table>` element from a GFM table node.
|
|
404
|
+
* Marks the first row's offset as a header row so that its cells become `<th>`.
|
|
405
|
+
*
|
|
406
|
+
* @param originNode - The mdast `table` node (GFM extension).
|
|
407
|
+
* @param token - The source token covering the table.
|
|
408
|
+
* @param depth - Current nesting depth.
|
|
409
|
+
* @param parentNode - Parent AST node, or `null` for top-level.
|
|
410
|
+
* @returns The `<table>` element node and its descendants.
|
|
411
|
+
*/
|
|
412
|
+
protected visitTableElement(
|
|
413
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
414
|
+
originNode: Table,
|
|
415
|
+
token: Token,
|
|
416
|
+
depth: number,
|
|
417
|
+
parentNode: MLASTParentNode | null,
|
|
418
|
+
): readonly MLASTNodeTreeItem[] {
|
|
419
|
+
const firstRow = originNode.children[0];
|
|
420
|
+
if (firstRow?.position?.start.offset != null) {
|
|
421
|
+
this.#headerRowOffsets.add(firstRow.position.start.offset);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
return this.visitMarkdownElement(token, 'table', originNode.children as MdastNode[], depth, parentNode);
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
/**
|
|
428
|
+
* Dispatches a single mdast node to the appropriate visit method.
|
|
429
|
+
*
|
|
430
|
+
* @param originNode - The mdast node to convert.
|
|
431
|
+
* @param token - The source token covering the node's range.
|
|
432
|
+
* @param offset - Start offset in the original source.
|
|
433
|
+
* @param endOffset - End offset in the original source.
|
|
434
|
+
* @param depth - Current nesting depth.
|
|
435
|
+
* @param parentNode - Parent AST node, or `null` for top-level nodes.
|
|
436
|
+
* @returns An array of AST nodes for recognized Markdown constructs,
|
|
437
|
+
* or `null` when the node type is not handled here (the caller is
|
|
438
|
+
* responsible for handling it — typically `text`, `html`, or
|
|
439
|
+
* parser-specific node types).
|
|
440
|
+
*/
|
|
441
|
+
protected nodeizeMarkdownNode(
|
|
442
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
443
|
+
originNode: MdastNode,
|
|
444
|
+
token: Token,
|
|
445
|
+
offset: number,
|
|
446
|
+
endOffset: number,
|
|
447
|
+
depth: number,
|
|
448
|
+
parentNode: MLASTParentNode | null,
|
|
449
|
+
): readonly MLASTNodeTreeItem[] | null {
|
|
450
|
+
switch (originNode.type) {
|
|
451
|
+
case 'heading': {
|
|
452
|
+
const nodeName = `h${originNode.depth}`;
|
|
453
|
+
return this.visitMarkdownElement(token, nodeName, originNode.children, depth, parentNode);
|
|
454
|
+
}
|
|
455
|
+
case 'paragraph': {
|
|
456
|
+
return this.visitMarkdownElement(token, 'p', originNode.children, depth, parentNode);
|
|
457
|
+
}
|
|
458
|
+
case 'emphasis': {
|
|
459
|
+
return this.visitMarkdownElement(token, 'em', originNode.children, depth, parentNode);
|
|
460
|
+
}
|
|
461
|
+
case 'strong': {
|
|
462
|
+
return this.visitMarkdownElement(token, 'strong', originNode.children, depth, parentNode);
|
|
463
|
+
}
|
|
464
|
+
case 'link': {
|
|
465
|
+
return this.visitLinkElement(originNode, token, depth, parentNode);
|
|
466
|
+
}
|
|
467
|
+
case 'image': {
|
|
468
|
+
return this.visitImageElement(originNode, token, depth, parentNode);
|
|
469
|
+
}
|
|
470
|
+
case 'list': {
|
|
471
|
+
return this.visitListElement(originNode, token, depth, parentNode);
|
|
472
|
+
}
|
|
473
|
+
case 'listItem': {
|
|
474
|
+
return this.visitMarkdownElement(token, 'li', originNode.children, depth, parentNode);
|
|
475
|
+
}
|
|
476
|
+
case 'blockquote': {
|
|
477
|
+
return this.visitMarkdownElement(token, 'blockquote', originNode.children, depth, parentNode);
|
|
478
|
+
}
|
|
479
|
+
case 'thematicBreak': {
|
|
480
|
+
return this.visitMarkdownElement(token, 'hr', [], depth, parentNode);
|
|
481
|
+
}
|
|
482
|
+
case 'break': {
|
|
483
|
+
return this.visitMarkdownElement(token, 'br', [], depth, parentNode);
|
|
484
|
+
}
|
|
485
|
+
case 'inlineCode': {
|
|
486
|
+
return this.visitInlineCode(originNode, token, offset, endOffset, depth, parentNode);
|
|
487
|
+
}
|
|
488
|
+
case 'code': {
|
|
489
|
+
return this.visitCodeBlock(originNode, token, depth, parentNode);
|
|
490
|
+
}
|
|
491
|
+
case 'linkReference': {
|
|
492
|
+
return this.visitLinkReference(originNode, token, depth, parentNode);
|
|
493
|
+
}
|
|
494
|
+
case 'imageReference': {
|
|
495
|
+
return this.visitImageReference(originNode, token, depth, parentNode);
|
|
496
|
+
}
|
|
497
|
+
case 'table': {
|
|
498
|
+
return this.visitTableElement(originNode, token, depth, parentNode);
|
|
499
|
+
}
|
|
500
|
+
case 'tableRow': {
|
|
501
|
+
const isHeader = this.#headerRowOffsets.delete(offset);
|
|
502
|
+
if (isHeader) {
|
|
503
|
+
this.#currentCellName = 'th';
|
|
504
|
+
}
|
|
505
|
+
// tableRow.children is TableCell[] — safely widens to MdastNode[]
|
|
506
|
+
const result = this.visitMarkdownElement(
|
|
507
|
+
token,
|
|
508
|
+
'tr',
|
|
509
|
+
originNode.children as MdastNode[],
|
|
510
|
+
depth,
|
|
511
|
+
parentNode,
|
|
512
|
+
);
|
|
513
|
+
this.#currentCellName = 'td';
|
|
514
|
+
return result;
|
|
515
|
+
}
|
|
516
|
+
case 'tableCell': {
|
|
517
|
+
return this.visitMarkdownElement(
|
|
518
|
+
token,
|
|
519
|
+
this.#currentCellName,
|
|
520
|
+
originNode.children as MdastNode[],
|
|
521
|
+
depth,
|
|
522
|
+
parentNode,
|
|
523
|
+
);
|
|
524
|
+
}
|
|
525
|
+
case 'delete': {
|
|
526
|
+
return this.visitMarkdownElement(token, 'del', originNode.children as MdastNode[], depth, parentNode);
|
|
527
|
+
}
|
|
528
|
+
case 'yaml':
|
|
529
|
+
case 'definition':
|
|
530
|
+
case 'footnoteReference':
|
|
531
|
+
case 'footnoteDefinition': {
|
|
532
|
+
return this.visitPsBlock({
|
|
533
|
+
...token,
|
|
534
|
+
depth,
|
|
535
|
+
parentNode,
|
|
536
|
+
nodeName: originNode.type,
|
|
537
|
+
isFragment: false,
|
|
538
|
+
});
|
|
539
|
+
}
|
|
540
|
+
case 'text': {
|
|
541
|
+
// Caller handles text nodes directly
|
|
542
|
+
return null;
|
|
543
|
+
}
|
|
544
|
+
default: {
|
|
545
|
+
// null = the caller is responsible for handling this node type
|
|
546
|
+
return null;
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
/**
|
|
552
|
+
* Resolves a linkReference using collected definitions, producing an `<a>` element.
|
|
553
|
+
* Falls back to a psblock when the definition is not found.
|
|
554
|
+
*/
|
|
555
|
+
private visitLinkReference(
|
|
556
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
557
|
+
originNode: LinkReference,
|
|
558
|
+
token: Token,
|
|
559
|
+
depth: number,
|
|
560
|
+
parentNode: MLASTParentNode | null,
|
|
561
|
+
): readonly MLASTNodeTreeItem[] {
|
|
562
|
+
const def = this.definitions.get(originNode.identifier);
|
|
563
|
+
if (!def) {
|
|
564
|
+
return this.visitPsBlock({
|
|
565
|
+
...token,
|
|
566
|
+
depth,
|
|
567
|
+
parentNode,
|
|
568
|
+
nodeName: 'linkReference',
|
|
569
|
+
isFragment: false,
|
|
570
|
+
});
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
const attrs: MLASTHTMLAttr[] = [this.createSyntheticAttr('href', def.url, token)];
|
|
574
|
+
if (def.title != null) {
|
|
575
|
+
attrs.push(this.createSyntheticAttr('title', def.title, token));
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
return this.visitMarkdownElement(token, 'a', originNode.children as MdastNode[], depth, parentNode, attrs);
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
/**
|
|
582
|
+
* Resolves an imageReference using collected definitions, producing an `<img>` element.
|
|
583
|
+
* Falls back to a psblock when the definition is not found.
|
|
584
|
+
*/
|
|
585
|
+
private visitImageReference(
|
|
586
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
587
|
+
originNode: ImageReference,
|
|
588
|
+
token: Token,
|
|
589
|
+
depth: number,
|
|
590
|
+
parentNode: MLASTParentNode | null,
|
|
591
|
+
): readonly MLASTNodeTreeItem[] {
|
|
592
|
+
const def = this.definitions.get(originNode.identifier);
|
|
593
|
+
if (!def) {
|
|
594
|
+
return this.visitPsBlock({
|
|
595
|
+
...token,
|
|
596
|
+
depth,
|
|
597
|
+
parentNode,
|
|
598
|
+
nodeName: 'imageReference',
|
|
599
|
+
isFragment: false,
|
|
600
|
+
});
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
const attrs: MLASTHTMLAttr[] = [
|
|
604
|
+
this.createSyntheticAttr('src', def.url, token),
|
|
605
|
+
this.createSyntheticAttr('alt', originNode.alt ?? '', token),
|
|
606
|
+
];
|
|
607
|
+
if (def.title != null) {
|
|
608
|
+
attrs.push(this.createSyntheticAttr('title', def.title, token));
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
return this.visitMarkdownElement(token, 'img', [], depth, parentNode, attrs);
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
/**
|
|
615
|
+
* Extracts definition nodes from mdast children and populates `this.definitions`.
|
|
616
|
+
*
|
|
617
|
+
* Per CommonMark spec, the first definition for a given identifier takes
|
|
618
|
+
* precedence. remark-parse emits all definition nodes in source order, so
|
|
619
|
+
* we skip duplicates via `Map.has` to honour the first-wins rule.
|
|
620
|
+
*
|
|
621
|
+
* @param children - The root-level mdast children to scan for `definition` nodes.
|
|
622
|
+
*/
|
|
623
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
624
|
+
protected collectDefinitions(children: readonly RootContent[]) {
|
|
625
|
+
for (const child of children) {
|
|
626
|
+
if (child.type === 'definition' && !this.definitions.has(child.identifier)) {
|
|
627
|
+
this.definitions.set(child.identifier, child);
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
/**
|
|
634
|
+
* Computes the 1-based line number and 1-based column for a given offset.
|
|
635
|
+
*
|
|
636
|
+
* Equivalent to `getPosition()` in `@markuplint/parser-utils`, but that
|
|
637
|
+
* function is not exported from the package. Kept as a standalone utility
|
|
638
|
+
* to avoid coupling to parser-utils internals.
|
|
639
|
+
*
|
|
640
|
+
* @param source - The full source string.
|
|
641
|
+
* @param offset - The 0-based character offset to resolve.
|
|
642
|
+
* @returns An object with 1-based `line` and `col` values.
|
|
643
|
+
*/
|
|
644
|
+
export function getLineAndColumn(source: string, offset: number): { line: number; col: number } {
|
|
645
|
+
let line = 1;
|
|
646
|
+
let col = 1;
|
|
647
|
+
for (let i = 0; i < offset; i++) {
|
|
648
|
+
if (source[i] === '\n') {
|
|
649
|
+
line++;
|
|
650
|
+
col = 1;
|
|
651
|
+
} else {
|
|
652
|
+
col++;
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
return { line, col };
|
|
656
|
+
}
|