@markuplint/markdown-parser 5.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts ADDED
@@ -0,0 +1,7 @@
1
+ /**
2
+ * @module @markuplint/markdown-parser
3
+ */
4
+
5
+ /** @internal Exported for subclass use by mdx-parser; not part of the public API. */
6
+ export { MarkdownAwareParser } from './markdown-aware-parser.js';
7
+ export { parser } from './parser.js';
@@ -0,0 +1,656 @@
1
+ import type {
2
+ MLASTAttr,
3
+ MLASTElement,
4
+ MLASTHTMLAttr,
5
+ MLASTNodeTreeItem,
6
+ MLASTParentNode,
7
+ MLASTText,
8
+ } from '@markuplint/ml-ast';
9
+ import type { ParserOptions, Token } from '@markuplint/parser-utils';
10
+ import type {
11
+ Code,
12
+ Definition,
13
+ Image,
14
+ ImageReference,
15
+ InlineCode,
16
+ Link,
17
+ LinkReference,
18
+ List,
19
+ RootContent,
20
+ Table,
21
+ } from 'mdast';
22
+
23
+ import { Parser, getNamespace } from '@markuplint/parser-utils';
24
+
25
+ type MdastNode = RootContent;
26
+
27
+ /**
28
+ * Abstract base class for parsers that handle Markdown content.
29
+ *
30
+ * Provides shared logic for converting mdast nodes (headings, links, images,
31
+ * lists, code, tables, etc.) into markuplint's AST. Both MarkdownParser and
32
+ * MDXParser extend this class to avoid code duplication.
33
+ */
34
+
35
+ export abstract class MarkdownAwareParser extends Parser<MdastNode> {
36
+ /**
37
+ * Stores link/image reference definitions (`[id]: url "title"`)
38
+ * extracted during tokenization for resolving linkReference/imageReference nodes.
39
+ */
40
+ protected definitions = new Map<string, Definition>();
41
+
42
+ /**
43
+ * Offsets of table rows that are header rows (first row of each table).
44
+ * Set by visitTableElement, read by nodeizeMarkdownNode for tableRow dispatch.
45
+ */
46
+ readonly #headerRowOffsets = new Set<number>();
47
+
48
+ /**
49
+ * Current cell element name ('th' or 'td').
50
+ * Set by tableRow processing, read by tableCell processing.
51
+ * Reset to 'td' after each row.
52
+ */
53
+ #currentCellName: 'th' | 'td' = 'td';
54
+
55
+ constructor(options?: ParserOptions) {
56
+ super(options);
57
+ }
58
+
59
+ /**
60
+ * Resets mutable state accumulated during a previous `parse()` call.
61
+ *
62
+ * Must be called at the beginning of every `tokenize()` invocation to
63
+ * prevent definitions, header-row offsets, and cell-name state from
64
+ * leaking across successive `parse()` calls on the same parser instance.
65
+ */
66
+ protected resetMarkdownState() {
67
+ this.definitions.clear();
68
+ this.#headerRowOffsets.clear();
69
+ this.#currentCellName = 'td';
70
+ }
71
+
72
+ /**
73
+ * Adjusts the flattened node list for Markdown output.
74
+ *
75
+ * Disables whitespace and invalid-node exposure because Markdown
76
+ * generates only synthetic elements with no real HTML whitespace tokens.
77
+ *
78
+ * @param nodeList - The flattened node tree produced by the base class.
79
+ * @returns The adjusted node list.
80
+ */
81
+ afterFlattenNodes(nodeList: readonly MLASTNodeTreeItem[]) {
82
+ return super.afterFlattenNodes(nodeList, {
83
+ exposeWhiteSpace: false,
84
+ exposeInvalidNode: false,
85
+ });
86
+ }
87
+
88
+ /**
89
+ * Creates a synthetic HTML attribute token for Markdown-derived elements.
90
+ *
91
+ * The attribute positions point to the element's own token range because
92
+ * Markdown syntax does not have discrete attribute source positions.
93
+ *
94
+ * @param name - The attribute name (e.g., `"href"`, `"alt"`).
95
+ * @param value - The attribute value extracted from Markdown syntax.
96
+ * @param token - The source token whose position is reused for the attribute.
97
+ * @returns A fully-formed HTML attribute node.
98
+ */
99
+ protected createSyntheticAttr(name: string, value: string, token: Token): MLASTHTMLAttr {
100
+ const emptyToken = this.createToken('', token.offset, token.line, token.col);
101
+ const nameToken = this.createToken(name, token.offset, token.line, token.col);
102
+ const valueToken = this.createToken(value, token.offset, token.line, token.col);
103
+ const attrToken = this.createToken(`${name}="${value}"`, token.offset, token.line, token.col);
104
+
105
+ return {
106
+ ...attrToken,
107
+ type: 'attr',
108
+ nodeName: name,
109
+ spacesBeforeName: emptyToken,
110
+ name: nameToken,
111
+ spacesBeforeEqual: emptyToken,
112
+ equal: this.createToken('=', token.offset, token.line, token.col),
113
+ spacesAfterEqual: emptyToken,
114
+ startQuote: this.createToken('"', token.offset, token.line, token.col),
115
+ value: valueToken,
116
+ endQuote: this.createToken('"', token.offset, token.line, token.col),
117
+ isDuplicatable: false,
118
+ };
119
+ }
120
+
121
+ /**
122
+ * Builds a generic HTML element node from a Markdown construct.
123
+ *
124
+ * @param token - The source token covering the entire construct.
125
+ * @param nodeName - The HTML element name (e.g., `"p"`, `"h1"`, `"li"`).
126
+ * @param childNodes - The mdast children to recurse into.
127
+ * @param depth - Current nesting depth in the AST.
128
+ * @param parentNode - Parent AST node, or `null` for top-level nodes.
129
+ * @param attributes - Optional pre-built attributes to attach.
130
+ * @returns The element node followed by its descendants.
131
+ */
132
+ protected visitMarkdownElement(
133
+ token: Token,
134
+ nodeName: string,
135
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
136
+ childNodes: readonly MdastNode[],
137
+ depth: number,
138
+ parentNode: MLASTParentNode | null,
139
+ attributes: readonly MLASTAttr[] = [],
140
+ ): readonly MLASTNodeTreeItem[] {
141
+ const startTag: MLASTElement = {
142
+ ...token,
143
+ ...this.createToken(token),
144
+ attributes: [...attributes],
145
+ type: 'starttag',
146
+ elementType: this.detectElementType(nodeName),
147
+ namespace: getNamespace(nodeName, parentNode),
148
+ childNodes: [],
149
+ blockBehavior: null,
150
+ depth,
151
+ parentNode,
152
+ pairNode: null,
153
+ tagOpenChar: '',
154
+ tagCloseChar: '',
155
+ isGhost: false,
156
+ isFragment: false,
157
+ nodeName,
158
+ };
159
+
160
+ // Safe cast: childNodes are always subtypes of RootContent (= MdastNode)
161
+ const siblings = this.visitChildren([...childNodes] as MdastNode[], startTag);
162
+
163
+ return [startTag, ...siblings];
164
+ }
165
+
166
+ /**
167
+ * Builds an `<a>` element with `href` (and optionally `title`) attributes.
168
+ *
169
+ * @param originNode - The mdast `link` node.
170
+ * @param token - The source token covering the link.
171
+ * @param depth - Current nesting depth.
172
+ * @param parentNode - Parent AST node, or `null` for top-level.
173
+ * @returns The `<a>` element node and its descendants.
174
+ */
175
+ protected visitLinkElement(
176
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
177
+ originNode: Link,
178
+ token: Token,
179
+ depth: number,
180
+ parentNode: MLASTParentNode | null,
181
+ ): readonly MLASTNodeTreeItem[] {
182
+ const attrs: MLASTHTMLAttr[] = [this.createSyntheticAttr('href', originNode.url, token)];
183
+
184
+ if (originNode.title != null) {
185
+ attrs.push(this.createSyntheticAttr('title', originNode.title, token));
186
+ }
187
+
188
+ return this.visitMarkdownElement(token, 'a', originNode.children, depth, parentNode, attrs);
189
+ }
190
+
191
+ /**
192
+ * Builds an `<img>` element with `src`, `alt`, and optionally `title` attributes.
193
+ *
194
+ * @param originNode - The mdast `image` node.
195
+ * @param token - The source token covering the image.
196
+ * @param depth - Current nesting depth.
197
+ * @param parentNode - Parent AST node, or `null` for top-level.
198
+ * @returns The `<img>` element node.
199
+ */
200
+ protected visitImageElement(
201
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
202
+ originNode: Image,
203
+ token: Token,
204
+ depth: number,
205
+ parentNode: MLASTParentNode | null,
206
+ ): readonly MLASTNodeTreeItem[] {
207
+ const attrs: MLASTHTMLAttr[] = [
208
+ this.createSyntheticAttr('src', originNode.url, token),
209
+ this.createSyntheticAttr('alt', originNode.alt ?? '', token),
210
+ ];
211
+
212
+ if (originNode.title != null) {
213
+ attrs.push(this.createSyntheticAttr('title', originNode.title, token));
214
+ }
215
+
216
+ return this.visitMarkdownElement(token, 'img', [], depth, parentNode, attrs);
217
+ }
218
+
219
+ /**
220
+ * Builds a `<ul>` or `<ol>` element. Adds a `start` attribute when the
221
+ * ordered list begins at a number other than 1.
222
+ *
223
+ * @param originNode - The mdast `list` node.
224
+ * @param token - The source token covering the list.
225
+ * @param depth - Current nesting depth.
226
+ * @param parentNode - Parent AST node, or `null` for top-level.
227
+ * @returns The list element node and its descendants.
228
+ */
229
+ protected visitListElement(
230
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
231
+ originNode: List,
232
+ token: Token,
233
+ depth: number,
234
+ parentNode: MLASTParentNode | null,
235
+ ): readonly MLASTNodeTreeItem[] {
236
+ const nodeName = originNode.ordered ? 'ol' : 'ul';
237
+ const attrs: MLASTHTMLAttr[] = [];
238
+
239
+ if (originNode.ordered && originNode.start != null && originNode.start !== 1) {
240
+ attrs.push(this.createSyntheticAttr('start', String(originNode.start), token));
241
+ }
242
+
243
+ return this.visitMarkdownElement(token, nodeName, originNode.children, depth, parentNode, attrs);
244
+ }
245
+
246
+ /**
247
+ * Builds a `<code>` element for inline code spans (backtick-delimited).
248
+ *
249
+ * @param originNode - The mdast `inlineCode` node.
250
+ * @param token - The source token covering the code span.
251
+ * @param offset - Start offset in the original source.
252
+ * @param endOffset - End offset in the original source.
253
+ * @param depth - Current nesting depth.
254
+ * @param parentNode - Parent AST node, or `null` for top-level.
255
+ * @returns The `<code>` element node (with a text child when content is found).
256
+ */
257
+ protected visitInlineCode(
258
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
259
+ originNode: InlineCode,
260
+ token: Token,
261
+ offset: number,
262
+ endOffset: number,
263
+ depth: number,
264
+ parentNode: MLASTParentNode | null,
265
+ ): readonly MLASTNodeTreeItem[] {
266
+ const startTag: MLASTElement = {
267
+ ...token,
268
+ ...this.createToken(token),
269
+ attributes: [],
270
+ type: 'starttag',
271
+ elementType: this.detectElementType('code'),
272
+ namespace: getNamespace('code', parentNode),
273
+ childNodes: [],
274
+ blockBehavior: null,
275
+ depth,
276
+ parentNode,
277
+ pairNode: null,
278
+ tagOpenChar: '',
279
+ tagCloseChar: '',
280
+ isGhost: false,
281
+ isFragment: false,
282
+ nodeName: 'code',
283
+ };
284
+
285
+ const raw = this.rawCode.slice(offset, endOffset);
286
+ const valueStart = raw.indexOf(originNode.value);
287
+ // Defensive guard: if value cannot be found in raw source (e.g., whitespace-only code spans) or is empty
288
+ if (valueStart === -1 || originNode.value.length === 0) {
289
+ return [startTag];
290
+ }
291
+
292
+ const valueOffset = offset + valueStart;
293
+ const valueEndOffset = valueOffset + originNode.value.length;
294
+ const textToken = this.sliceFragment(valueOffset, valueEndOffset);
295
+
296
+ const textNode: MLASTText = {
297
+ ...textToken,
298
+ ...this.createToken(textToken),
299
+ type: 'text',
300
+ depth: depth + 1,
301
+ nodeName: '#text',
302
+ parentNode: startTag,
303
+ };
304
+
305
+ this.appendChild(startTag, textNode);
306
+
307
+ return [startTag];
308
+ }
309
+
310
+ /**
311
+ * Builds a `<pre><code>` structure for fenced code blocks.
312
+ * When a language is specified, adds `class="language-{lang}"` to the `<code>` element.
313
+ *
314
+ * @param originNode - The mdast `code` node.
315
+ * @param token - The source token covering the fenced block.
316
+ * @param depth - Current nesting depth.
317
+ * @param parentNode - Parent AST node, or `null` for top-level.
318
+ * @returns The `<pre>` and `<code>` element nodes.
319
+ */
320
+ protected visitCodeBlock(
321
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
322
+ originNode: Code,
323
+ token: Token,
324
+ depth: number,
325
+ parentNode: MLASTParentNode | null,
326
+ ): readonly MLASTNodeTreeItem[] {
327
+ // Build <pre> element
328
+ const preTag: MLASTElement = {
329
+ ...token,
330
+ ...this.createToken(token),
331
+ attributes: [],
332
+ type: 'starttag',
333
+ elementType: this.detectElementType('pre'),
334
+ namespace: getNamespace('pre', parentNode),
335
+ childNodes: [],
336
+ blockBehavior: null,
337
+ depth,
338
+ parentNode,
339
+ pairNode: null,
340
+ tagOpenChar: '',
341
+ tagCloseChar: '',
342
+ isGhost: false,
343
+ isFragment: false,
344
+ nodeName: 'pre',
345
+ };
346
+
347
+ // Build <code> element as child of <pre>
348
+ const codeAttrs: MLASTHTMLAttr[] = [];
349
+ if (originNode.lang) {
350
+ codeAttrs.push(this.createSyntheticAttr('class', `language-${originNode.lang}`, token));
351
+ }
352
+
353
+ const codeTag: MLASTElement = {
354
+ ...token,
355
+ ...this.createToken(token),
356
+ attributes: codeAttrs,
357
+ type: 'starttag',
358
+ elementType: this.detectElementType('code'),
359
+ namespace: getNamespace('code', preTag),
360
+ childNodes: [],
361
+ blockBehavior: null,
362
+ depth: depth + 1,
363
+ parentNode: preTag,
364
+ pairNode: null,
365
+ tagOpenChar: '',
366
+ tagCloseChar: '',
367
+ isGhost: false,
368
+ isFragment: false,
369
+ nodeName: 'code',
370
+ };
371
+
372
+ // Add code content as text node if present
373
+ if (originNode.value.length > 0) {
374
+ const position = originNode.position;
375
+ if (position) {
376
+ const rawContent = this.rawCode.slice(position.start.offset ?? 0, position.end.offset ?? 0);
377
+ const valueStart = rawContent.indexOf(originNode.value);
378
+ if (valueStart !== -1) {
379
+ const valueOffset = (position.start.offset ?? 0) + valueStart;
380
+ const valueEndOffset = valueOffset + originNode.value.length;
381
+ const textToken = this.sliceFragment(valueOffset, valueEndOffset);
382
+
383
+ const textNode: MLASTText = {
384
+ ...textToken,
385
+ ...this.createToken(textToken),
386
+ type: 'text',
387
+ depth: depth + 2,
388
+ nodeName: '#text',
389
+ parentNode: codeTag,
390
+ };
391
+
392
+ this.appendChild(codeTag, textNode);
393
+ }
394
+ }
395
+ }
396
+
397
+ this.appendChild(preTag, codeTag);
398
+
399
+ return [preTag, codeTag];
400
+ }
401
+
402
+ /**
403
+ * Builds a `<table>` element from a GFM table node.
404
+ * Marks the first row's offset as a header row so that its cells become `<th>`.
405
+ *
406
+ * @param originNode - The mdast `table` node (GFM extension).
407
+ * @param token - The source token covering the table.
408
+ * @param depth - Current nesting depth.
409
+ * @param parentNode - Parent AST node, or `null` for top-level.
410
+ * @returns The `<table>` element node and its descendants.
411
+ */
412
+ protected visitTableElement(
413
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
414
+ originNode: Table,
415
+ token: Token,
416
+ depth: number,
417
+ parentNode: MLASTParentNode | null,
418
+ ): readonly MLASTNodeTreeItem[] {
419
+ const firstRow = originNode.children[0];
420
+ if (firstRow?.position?.start.offset != null) {
421
+ this.#headerRowOffsets.add(firstRow.position.start.offset);
422
+ }
423
+
424
+ return this.visitMarkdownElement(token, 'table', originNode.children as MdastNode[], depth, parentNode);
425
+ }
426
+
427
+ /**
428
+ * Dispatches a single mdast node to the appropriate visit method.
429
+ *
430
+ * @param originNode - The mdast node to convert.
431
+ * @param token - The source token covering the node's range.
432
+ * @param offset - Start offset in the original source.
433
+ * @param endOffset - End offset in the original source.
434
+ * @param depth - Current nesting depth.
435
+ * @param parentNode - Parent AST node, or `null` for top-level nodes.
436
+ * @returns An array of AST nodes for recognized Markdown constructs,
437
+ * or `null` when the node type is not handled here (the caller is
438
+ * responsible for handling it — typically `text`, `html`, or
439
+ * parser-specific node types).
440
+ */
441
+ protected nodeizeMarkdownNode(
442
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
443
+ originNode: MdastNode,
444
+ token: Token,
445
+ offset: number,
446
+ endOffset: number,
447
+ depth: number,
448
+ parentNode: MLASTParentNode | null,
449
+ ): readonly MLASTNodeTreeItem[] | null {
450
+ switch (originNode.type) {
451
+ case 'heading': {
452
+ const nodeName = `h${originNode.depth}`;
453
+ return this.visitMarkdownElement(token, nodeName, originNode.children, depth, parentNode);
454
+ }
455
+ case 'paragraph': {
456
+ return this.visitMarkdownElement(token, 'p', originNode.children, depth, parentNode);
457
+ }
458
+ case 'emphasis': {
459
+ return this.visitMarkdownElement(token, 'em', originNode.children, depth, parentNode);
460
+ }
461
+ case 'strong': {
462
+ return this.visitMarkdownElement(token, 'strong', originNode.children, depth, parentNode);
463
+ }
464
+ case 'link': {
465
+ return this.visitLinkElement(originNode, token, depth, parentNode);
466
+ }
467
+ case 'image': {
468
+ return this.visitImageElement(originNode, token, depth, parentNode);
469
+ }
470
+ case 'list': {
471
+ return this.visitListElement(originNode, token, depth, parentNode);
472
+ }
473
+ case 'listItem': {
474
+ return this.visitMarkdownElement(token, 'li', originNode.children, depth, parentNode);
475
+ }
476
+ case 'blockquote': {
477
+ return this.visitMarkdownElement(token, 'blockquote', originNode.children, depth, parentNode);
478
+ }
479
+ case 'thematicBreak': {
480
+ return this.visitMarkdownElement(token, 'hr', [], depth, parentNode);
481
+ }
482
+ case 'break': {
483
+ return this.visitMarkdownElement(token, 'br', [], depth, parentNode);
484
+ }
485
+ case 'inlineCode': {
486
+ return this.visitInlineCode(originNode, token, offset, endOffset, depth, parentNode);
487
+ }
488
+ case 'code': {
489
+ return this.visitCodeBlock(originNode, token, depth, parentNode);
490
+ }
491
+ case 'linkReference': {
492
+ return this.visitLinkReference(originNode, token, depth, parentNode);
493
+ }
494
+ case 'imageReference': {
495
+ return this.visitImageReference(originNode, token, depth, parentNode);
496
+ }
497
+ case 'table': {
498
+ return this.visitTableElement(originNode, token, depth, parentNode);
499
+ }
500
+ case 'tableRow': {
501
+ const isHeader = this.#headerRowOffsets.delete(offset);
502
+ if (isHeader) {
503
+ this.#currentCellName = 'th';
504
+ }
505
+ // tableRow.children is TableCell[] — safely widens to MdastNode[]
506
+ const result = this.visitMarkdownElement(
507
+ token,
508
+ 'tr',
509
+ originNode.children as MdastNode[],
510
+ depth,
511
+ parentNode,
512
+ );
513
+ this.#currentCellName = 'td';
514
+ return result;
515
+ }
516
+ case 'tableCell': {
517
+ return this.visitMarkdownElement(
518
+ token,
519
+ this.#currentCellName,
520
+ originNode.children as MdastNode[],
521
+ depth,
522
+ parentNode,
523
+ );
524
+ }
525
+ case 'delete': {
526
+ return this.visitMarkdownElement(token, 'del', originNode.children as MdastNode[], depth, parentNode);
527
+ }
528
+ case 'yaml':
529
+ case 'definition':
530
+ case 'footnoteReference':
531
+ case 'footnoteDefinition': {
532
+ return this.visitPsBlock({
533
+ ...token,
534
+ depth,
535
+ parentNode,
536
+ nodeName: originNode.type,
537
+ isFragment: false,
538
+ });
539
+ }
540
+ case 'text': {
541
+ // Caller handles text nodes directly
542
+ return null;
543
+ }
544
+ default: {
545
+ // null = the caller is responsible for handling this node type
546
+ return null;
547
+ }
548
+ }
549
+ }
550
+
551
+ /**
552
+ * Resolves a linkReference using collected definitions, producing an `<a>` element.
553
+ * Falls back to a psblock when the definition is not found.
554
+ */
555
+ private visitLinkReference(
556
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
557
+ originNode: LinkReference,
558
+ token: Token,
559
+ depth: number,
560
+ parentNode: MLASTParentNode | null,
561
+ ): readonly MLASTNodeTreeItem[] {
562
+ const def = this.definitions.get(originNode.identifier);
563
+ if (!def) {
564
+ return this.visitPsBlock({
565
+ ...token,
566
+ depth,
567
+ parentNode,
568
+ nodeName: 'linkReference',
569
+ isFragment: false,
570
+ });
571
+ }
572
+
573
+ const attrs: MLASTHTMLAttr[] = [this.createSyntheticAttr('href', def.url, token)];
574
+ if (def.title != null) {
575
+ attrs.push(this.createSyntheticAttr('title', def.title, token));
576
+ }
577
+
578
+ return this.visitMarkdownElement(token, 'a', originNode.children as MdastNode[], depth, parentNode, attrs);
579
+ }
580
+
581
+ /**
582
+ * Resolves an imageReference using collected definitions, producing an `<img>` element.
583
+ * Falls back to a psblock when the definition is not found.
584
+ */
585
+ private visitImageReference(
586
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
587
+ originNode: ImageReference,
588
+ token: Token,
589
+ depth: number,
590
+ parentNode: MLASTParentNode | null,
591
+ ): readonly MLASTNodeTreeItem[] {
592
+ const def = this.definitions.get(originNode.identifier);
593
+ if (!def) {
594
+ return this.visitPsBlock({
595
+ ...token,
596
+ depth,
597
+ parentNode,
598
+ nodeName: 'imageReference',
599
+ isFragment: false,
600
+ });
601
+ }
602
+
603
+ const attrs: MLASTHTMLAttr[] = [
604
+ this.createSyntheticAttr('src', def.url, token),
605
+ this.createSyntheticAttr('alt', originNode.alt ?? '', token),
606
+ ];
607
+ if (def.title != null) {
608
+ attrs.push(this.createSyntheticAttr('title', def.title, token));
609
+ }
610
+
611
+ return this.visitMarkdownElement(token, 'img', [], depth, parentNode, attrs);
612
+ }
613
+
614
+ /**
615
+ * Extracts definition nodes from mdast children and populates `this.definitions`.
616
+ *
617
+ * Per CommonMark spec, the first definition for a given identifier takes
618
+ * precedence. remark-parse emits all definition nodes in source order, so
619
+ * we skip duplicates via `Map.has` to honour the first-wins rule.
620
+ *
621
+ * @param children - The root-level mdast children to scan for `definition` nodes.
622
+ */
623
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
624
+ protected collectDefinitions(children: readonly RootContent[]) {
625
+ for (const child of children) {
626
+ if (child.type === 'definition' && !this.definitions.has(child.identifier)) {
627
+ this.definitions.set(child.identifier, child);
628
+ }
629
+ }
630
+ }
631
+ }
632
+
633
+ /**
634
+ * Computes the 1-based line number and 1-based column for a given offset.
635
+ *
636
+ * Equivalent to `getPosition()` in `@markuplint/parser-utils`, but that
637
+ * function is not exported from the package. Kept as a standalone utility
638
+ * to avoid coupling to parser-utils internals.
639
+ *
640
+ * @param source - The full source string.
641
+ * @param offset - The 0-based character offset to resolve.
642
+ * @returns An object with 1-based `line` and `col` values.
643
+ */
644
+ export function getLineAndColumn(source: string, offset: number): { line: number; col: number } {
645
+ let line = 1;
646
+ let col = 1;
647
+ for (let i = 0; i < offset; i++) {
648
+ if (source[i] === '\n') {
649
+ line++;
650
+ col = 1;
651
+ } else {
652
+ col++;
653
+ }
654
+ }
655
+ return { line, col };
656
+ }