npm - @markuplint/parser-utils - Versions diffs - 4.8.10 → 5.0.0-alpha.0 - Mend

@markuplint/parser-utils 4.8.10 → 5.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/lib/parser.d.ts CHANGED Viewed

@@ -1,11 +1,30 @@
 import type { Token, ChildToken, QuoteSet, ParseOptions, ParserOptions, Tokenized, ValueType } from './types.js';
-import type { EndTagType, MLASTDocument, MLASTParentNode, MLParser, ParserAuthoredElementNameDistinguishing, MLASTElement, MLASTElementCloseTag, MLASTToken, MLASTNodeTreeItem, MLASTTag, MLASTText, MLASTAttr, MLASTChildNode, MLASTSpreadAttr, ElementType, Walker, MLASTHTMLAttr, MLASTPreprocessorSpecificBlockConditionalType } from '@markuplint/ml-ast';
+import type { EndTagType, MLASTDocument, MLASTParentNode, MLParser, ParserAuthoredElementNameDistinguishing, MLASTElement, MLASTElementCloseTag, MLASTToken, MLASTNodeTreeItem, MLASTTag, MLASTText, MLASTAttr, MLASTChildNode, MLASTSpreadAttr, ElementType, Walker, MLASTHTMLAttr, MLASTBlockBehavior } from '@markuplint/ml-ast';
 import { AttrState } from './enums.js';
 import { ParserError } from './parser-error.js';
+/**
+ * Abstract base class for all markuplint parsers. Provides the core parsing pipeline
+ * including tokenization, tree traversal, node flattening, and error handling.
+ * Subclasses must implement `nodeize` to convert language-specific AST nodes
+ * into the markuplint AST format.
+ *
+ * @template Node - The language-specific AST node type produced by the tokenizer
+ * @template State - An optional parser state type that persists across tokenization
+ */
 export declare abstract class Parser<Node extends {} = {}, State extends unknown = null> implements MLParser {
     #private;
     state: State;
+    /**
+     * Creates a new Parser instance with the given options and initial state.
+     *
+     * @param options - Configuration options controlling tag handling, whitespace, and quoting behavior
+     * @param defaultState - The initial parser state, cloned and restored after each parse call
+     */
     constructor(options?: ParserOptions, defaultState?: State);
+    /**
+     * The pattern used to distinguish authored (component) element names
+     * from native HTML elements, as specified by the parse options.
+     */
     get authoredElementName(): ParserAuthoredElementNameDistinguishing | undefined;
     /**
      * Detect value as a true if its attribute is booleanish value and omitted.
@@ -26,55 +45,224 @@ export declare abstract class Parser<Node extends {} = {}, State extends unknown
      * - `"never"`: Never need
      */
     get endTag(): EndTagType;
+    /**
+     * The current raw source code being parsed, which may have been
+     * preprocessed (e.g., ignore blocks masked, front matter removed).
+     */
     get rawCode(): string;
+    /**
+     * Whether tag names should be compared in a case-sensitive manner.
+     * When false (the default), tag name comparisons are case-insensitive (HTML behavior).
+     */
     get tagNameCaseSensitive(): boolean;
+    /**
+     * Tokenizes the raw source code into language-specific AST nodes.
+     * Subclasses should override this method to provide actual tokenization logic.
+     *
+     * @param options - Parse options controlling offset, depth, and other parse-time settings
+     * @returns The tokenized result containing the AST node array and fragment flag
+     */
     tokenize(options?: ParseOptions): Tokenized<Node, State>;
+    /**
+     * Hook called before parsing begins, allowing subclasses to preprocess
+     * the raw source code. The default implementation prepends offset spaces
+     * based on the parse options.
+     *
+     * @param rawCode - The raw source code about to be parsed
+     * @param options - Parse options that may specify offset positioning
+     * @returns The preprocessed source code to be used for tokenization
+     */
     beforeParse(rawCode: string, options?: ParseOptions): string;
+    /**
+     * Parses raw source code through the full pipeline: preprocessing, tokenization,
+     * traversal, flattening, ignore-block restoration, and post-processing.
+     * Returns the complete markuplint AST document.
+     *
+     * @param rawCode - The raw source code to parse
+     * @param options - Parse options controlling offsets, depth, front matter, and authored element names
+     * @returns The parsed AST document containing the node list and fragment flag
+     */
     parse(rawCode: string, options?: ParseOptions): MLASTDocument;
+    /**
+     * Hook called after the main parse pipeline completes, allowing subclasses
+     * to perform final transformations on the node list. The default implementation
+     * removes any offset spaces that were prepended during preprocessing.
+     *
+     * @param nodeList - The fully parsed and flattened node list
+     * @param options - The parse options used for this parse invocation
+     * @returns The post-processed node list
+     */
     afterParse(nodeList: readonly MLASTNodeTreeItem[], options?: ParseOptions): readonly MLASTNodeTreeItem[];
+    /**
+     * Wraps an arbitrary error into a ParserError with source location information.
+     * Extracts line and column numbers from common error formats.
+     *
+     * @param error - The original error to wrap
+     * @returns A ParserError containing the original error's message and location data
+     */
     parseError(error: any): ParserError;
+    /**
+     * Recursively traverses language-specific AST nodes by calling `nodeize` on each,
+     * filtering duplicates, and separating child nodes from ancestor-level siblings.
+     *
+     * @param originNodes - The language-specific AST nodes to traverse
+     * @param parentNode - The parent markuplint AST node, or null for top-level nodes
+     * @param depth - The current nesting depth in the tree
+     * @returns An object containing `childNodes` at the current depth and `siblings` that belong to ancestor levels
+     */
     traverse(originNodes: readonly Node[], parentNode: (MLASTParentNode | null) | undefined, depth: number): {
         childNodes: readonly MLASTChildNode[];
         siblings: readonly MLASTNodeTreeItem[];
     };
+    /**
+     * Hook called after traversal completes, used to sort the resulting node tree
+     * by source position. Subclasses may override for custom post-traversal logic.
+     *
+     * @param nodeTree - The unsorted node tree produced by traversal
+     * @returns The node tree sorted by source position
+     */
     afterTraverse(nodeTree: readonly MLASTNodeTreeItem[]): readonly MLASTNodeTreeItem[];
+    /**
+     * Converts a single language-specific AST node into one or more markuplint AST nodes.
+     * Subclasses must override this method to provide actual node conversion logic
+     * using visitor methods like `visitElement`, `visitText`, `visitComment`, etc.
+     *
+     * @param originNode - The language-specific AST node to convert
+     * @param parentNode - The parent markuplint AST node, or null for top-level nodes
+     * @param depth - The current nesting depth in the tree
+     * @returns An array of markuplint AST nodes produced from the origin node
+     */
     nodeize(originNode: Node, parentNode: MLASTParentNode | null, depth: number): readonly MLASTNodeTreeItem[];
+    /**
+     * Post-processes the nodes produced by `nodeize`, separating them into siblings
+     * at the current depth and ancestors that belong to a shallower depth level.
+     * Doctype nodes at depth 0 are promoted to ancestors.
+     *
+     * @param siblings - The nodes produced by `nodeize` for a single origin node
+     * @param parentNode - The parent markuplint AST node, or null for top-level nodes
+     * @param depth - The current nesting depth
+     * @returns An object with `siblings` at the current depth and `ancestors` at shallower depths
+     */
     afterNodeize(siblings: readonly MLASTNodeTreeItem[], parentNode: MLASTParentNode | null, depth: number): {
         siblings: MLASTChildNode[];
         ancestors: MLASTNodeTreeItem[];
     };
+    /**
+     * Flattens a hierarchical node tree into a flat, sorted list by walking
+     * the tree depth-first and removing duplicated nodes.
+     *
+     * @param nodeTree - The hierarchical node tree to flatten
+     * @returns A flat array of all nodes in source order
+     */
     flattenNodes(nodeTree: readonly MLASTNodeTreeItem[]): readonly MLASTNodeTreeItem[];
+    /**
+     * Post-processes the flattened node list by exposing remnant whitespace and
+     * invalid nodes between known nodes, converting orphan end tags to bogus markers,
+     * concatenating adjacent text nodes, and trimming overlapping text.
+     *
+     * @param nodeList - The flat node list to post-process
+     * @param options - Controls which post-processing steps are applied
+     * @returns The cleaned-up flat node list
+     */
     afterFlattenNodes(nodeList: readonly MLASTNodeTreeItem[], options?: {
         readonly exposeInvalidNode?: boolean;
         readonly exposeWhiteSpace?: boolean;
         readonly concatText?: boolean;
     }): readonly MLASTNodeTreeItem[];
+    /**
+     * Creates an AST doctype node from a token containing the doctype
+     * name, public ID, and system ID.
+     *
+     * @param token - The child token with doctype-specific properties
+     * @returns An array containing the single doctype AST node
+     */
     visitDoctype(token: ChildToken & {
         readonly name: string;
         readonly publicId: string;
         readonly systemId: string;
     }): readonly MLASTNodeTreeItem[];
+    /**
+     * Creates an AST comment node from a token. Automatically detects whether
+     * the comment is a bogus comment (not starting with `<!--`).
+     *
+     * @param token - The child token containing the comment's raw text and position
+     * @param options - Optional settings to override the bogus detection
+     * @returns An array containing the single comment AST node
+     */
     visitComment(token: ChildToken, options?: {
         readonly isBogus?: boolean;
     }): readonly MLASTNodeTreeItem[];
+    /**
+     * Creates AST text node(s) from a token. Optionally re-parses the text content
+     * to discover embedded HTML tags within it.
+     *
+     * @param token - The child token containing the text content and position
+     * @param options - Controls whether to search for embedded tags and how to handle invalid ones
+     * @returns An array of AST nodes; a single text node or multiple tag/text nodes if tags were found
+     */
     visitText(token: ChildToken, options?: {
         readonly researchTags?: boolean;
         readonly invalidTagAsText?: boolean;
     }): readonly MLASTNodeTreeItem[];
+    /**
+     * Creates AST element node(s) from a token, including the start tag, optional end tag,
+     * and recursively traversed child nodes. Handles ghost elements (empty raw),
+     * self-closing tags, and nameless fragments (e.g., JSX `<>`).
+     *
+     * @param token - The child token with the element's node name; namespace is auto-detected from tag name and parent node
+     * @param childNodes - The language-specific child AST nodes to traverse
+     * @param options - Controls end tag creation, fragment handling, and property overrides
+     * @returns An array of AST nodes including the start tag, optional end tag, and any sibling nodes
+     */
     visitElement(token: ChildToken & {
         readonly nodeName: string;
-        readonly namespace: string;
     }, childNodes?: readonly Node[], options?: {
         readonly createEndTagToken?: (startTag: MLASTElement) => ChildToken | null;
         readonly namelessFragment?: boolean;
         readonly overwriteProps?: Partial<MLASTElement>;
     }): readonly MLASTNodeTreeItem[];
+    /**
+     * Creates an AST preprocessor-specific block node (e.g., for template directives
+     * like `{#if}`, `{#each}`, or front matter). Recursively traverses child nodes.
+     *
+     * @param token - The child token with the block's node name and fragment flag
+     * @param childNodes - The language-specific child AST nodes to traverse
+     * @param blockBehavior - The block behavior if this is a control-flow block (e.g., "if", "each")
+     * @param originBlockNode - The original language-specific block node for reference
+     * @returns An array of AST nodes including the block node and any sibling nodes
+     */
     visitPsBlock(token: ChildToken & {
         readonly nodeName: string;
         readonly isFragment: boolean;
-    }, childNodes?: readonly Node[], conditionalType?: MLASTPreprocessorSpecificBlockConditionalType, originBlockNode?: Node): readonly MLASTNodeTreeItem[];
+    }, childNodes?: readonly Node[], blockBehavior?: MLASTBlockBehavior | null, originBlockNode?: Node): readonly MLASTNodeTreeItem[];
+    /**
+     * Traverses a list of child nodes under the given parent, appending the resulting
+     * child AST nodes to the parent and returning any sibling nodes that belong
+     * to ancestor levels. Skips traversal for raw text elements (e.g., `<script>`, `<style>`).
+     *
+     * @param children - The language-specific child AST nodes to traverse
+     * @param parentNode - The parent markuplint AST node to which children will be appended
+     * @returns An array of sibling nodes that belong to ancestor depth levels
+     */
     visitChildren(children: readonly Node[], parentNode: MLASTParentNode | null): readonly MLASTNodeTreeItem[];
+    /**
+     * Attempts to parse a token as a JSX spread attribute (e.g., `{...props}`).
+     * Returns null if the token does not match the spread attribute pattern.
+     *
+     * @param token - The token to inspect for spread attribute syntax
+     * @returns A spread attribute AST node, or null if the token is not a spread attribute
+     */
     visitSpreadAttr(token: Token): MLASTSpreadAttr | null;
+    /**
+     * Parses a token into a fully structured attribute AST node, breaking it down
+     * into its constituent parts: spaces, name, equal sign, quotes, and value.
+     * Also detects spread attributes. If there is leftover text after the attribute,
+     * it is returned in the `__rightText` property for further processing.
+     *
+     * @param token - The token containing the raw attribute text and position
+     * @param options - Controls quoting behavior, value types, and the initial parser state
+     * @returns The parsed attribute AST node with an optional `__rightText` for remaining unparsed content
+     */
     visitAttr(token: Token, options?: {
         readonly quoteSet?: readonly QuoteSet[];
         readonly noQuoteValueType?: ValueType;
@@ -83,31 +271,121 @@ export declare abstract class Parser<Node extends {} = {}, State extends unknown
     }): MLASTAttr & {
         __rightText?: string;
     };
+    /**
+     * Re-parses a text token to discover embedded HTML/XML tags within it,
+     * splitting the content into a sequence of tag and text AST nodes.
+     * Handles self-closing detection, depth tracking, and void element recognition.
+     *
+     * @param token - The child token containing the code fragment to re-parse
+     * @param options - Controls whether nameless fragments (JSX `<>`) are recognized
+     * @returns An array of tag and text AST nodes discovered in the code fragment
+     */
     parseCodeFragment(token: ChildToken, options?: {
         readonly namelessFragment?: boolean;
     }): (MLASTTag | MLASTText)[];
-    updateLocation(node: MLASTNodeTreeItem, props: Partial<Pick<MLASTNodeTreeItem, 'startOffset' | 'startLine' | 'startCol' | 'depth'>>): void;
     /**
-     * Set new raw code to target node.
+     * Updates the position and depth properties of an AST node.
      *
-     * Replace the raw code and update the start/end offset/line/column.
+     * @param node - The AST node whose location should be updated
+     * @param props - The new position and depth values to apply (only provided values are changed)
+     */
+    updateLocation(node: MLASTNodeTreeItem, props: Partial<Pick<MLASTNodeTreeItem, 'offset' | 'line' | 'col' | 'depth'>>): void;
+    /**
+     * Set new raw code to target node.
      *
      * @param node target node
      * @param raw new raw code
      */
     updateRaw(node: MLASTToken, raw: string): void;
+    /**
+     * Updates the node name and/or element type of an element or close tag AST node.
+     * Useful for renaming elements or changing their classification after initial parsing.
+     *
+     * @param el - The element or close tag AST node to update
+     * @param props - The properties to overwrite on the element
+     */
     updateElement(el: MLASTElement, props: Partial<Pick<MLASTElement, 'nodeName' | 'elementType'>>): void;
     updateElement(el: MLASTElementCloseTag, props: Partial<Pick<MLASTElementCloseTag, 'nodeName'>>): void;
+    /**
+     * Updates metadata properties on an HTML attribute AST node, such as marking
+     * it as a directive, dynamic value, or setting its potential name/value
+     * for preprocessor-specific attribute transformations.
+     *
+     * @param attr - The HTML attribute AST node to update
+     * @param props - The metadata properties to overwrite on the attribute
+     */
     updateAttr(attr: MLASTHTMLAttr, props: Partial<Pick<MLASTHTMLAttr, 'isDynamicValue' | 'isDirective' | 'potentialName' | 'potentialValue' | 'valueType' | 'candidate' | 'isDuplicatable'>>): void;
+    /**
+     * Determines the element type (e.g., "html", "web-component", "authored") for a
+     * given tag name, using the parser's authored element name distinguishing pattern.
+     *
+     * @param nodeName - The tag name to classify
+     * @param defaultPattern - A fallback pattern if no authored element name pattern is set
+     * @returns The element type classification
+     */
     detectElementType(nodeName: string, defaultPattern?: ParserAuthoredElementNameDistinguishing): ElementType;
+    /**
+     * Creates a new MLASTToken with a generated UUID.
+     * Accepts either a Token object or a raw string with explicit start coordinates.
+     *
+     * @param token - A Token object or raw string to create the AST token from
+     * @param offset - The zero-based byte offset where the token starts (required when token is a string)
+     * @param line - The one-based line number where the token starts (required when token is a string)
+     * @param col - The one-based column number where the token starts (required when token is a string)
+     * @returns An AST token with UUID, start position, and raw content
+     */
     createToken(token: Token): MLASTToken;
-    createToken(token: string, startOffset: number, startLine: number, startCol: number): MLASTToken;
+    createToken(token: string, offset: number, line: number, col: number): MLASTToken;
+    /**
+     * Extracts a Token from the current raw code at the given byte offset range,
+     * computing the line and column from the source position.
+     *
+     * @param start - The starting byte offset (inclusive) in the raw code
+     * @param end - The ending byte offset (exclusive) in the raw code; if omitted, slices to the end
+     * @returns A Token containing the sliced raw content and its start position
+     */
     sliceFragment(start: number, end?: number): Token;
+    /**
+     * Calculates start and end byte offsets from line/column positions
+     * within the current raw source code.
+     *
+     * @param startLine - The starting line number (1-based)
+     * @param startCol - The starting column number (1-based)
+     * @param endLine - The ending line number (1-based)
+     * @param endCol - The ending column number (1-based)
+     * @returns The computed start and end byte offsets
+     */
     getOffsetsFromCode(startLine: number, startCol: number, endLine: number, endCol: number): {
         offset: number;
         endOffset: number;
     };
+    /**
+     * Walks through a node list depth-first, invoking the walker callback for each node.
+     * The walker receives the current node, the sequentially previous node, and the depth.
+     * Automatically recurses into child nodes of parent elements and preprocessor blocks.
+     *
+     * @template Node - The specific AST node type being walked
+     * @param nodeList - The list of nodes to walk
+     * @param walker - The callback invoked for each node during the walk
+     * @param depth - The current depth (starts at 0 for top-level calls)
+     */
     walk<Node extends MLASTNodeTreeItem>(nodeList: readonly Node[], walker: Walker<Node>, depth?: number): void;
+    /**
+     * Appends child nodes to a parent node, updating parent references and
+     * maintaining sorted order by source position. If a child already exists
+     * in the parent (by UUID), it is replaced in place rather than duplicated.
+     *
+     * @param parentNode - The parent node to append children to, or null (no-op)
+     * @param childNodes - The child nodes to append
+     */
     appendChild(parentNode: MLASTParentNode | null, ...childNodes: readonly MLASTChildNode[]): void;
+    /**
+     * Replaces a child node within a parent's child list with one or more replacement nodes.
+     * If the old child is not found in the parent, the operation is a no-op.
+     *
+     * @param parentNode - The parent node containing the child to replace
+     * @param oldChildNode - The existing child node to be replaced
+     * @param replacementChildNodes - The replacement nodes to insert at the old child's position
+     */
     replaceChild(parentNode: MLASTParentNode, oldChildNode: MLASTChildNode, ...replacementChildNodes: readonly MLASTChildNode[]): void;
 }