npm - @wdprlib/parser - Versions diffs - 3.1.2 → 3.2.0 - Mend

@wdprlib/parser 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

package/dist/index.cjs +295 -118
package/dist/index.js +272 -95
package/package.json +5 -3
package/src/index.ts +163 -0
package/src/lexer/index.ts +20 -0
package/src/lexer/lexer.ts +687 -0
package/src/lexer/tokens.ts +141 -0
package/src/parser/constants.ts +173 -0
package/src/parser/depth.ts +251 -0
package/src/parser/index.ts +18 -0
package/src/parser/parse.ts +315 -0
package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
package/src/parser/postprocess/index.ts +15 -0
package/src/parser/postprocess/spanStrip.ts +697 -0
package/src/parser/preprocess/expr.ts +265 -0
package/src/parser/preprocess/index.ts +38 -0
package/src/parser/preprocess/typography.ts +67 -0
package/src/parser/preprocess/utils.ts +250 -0
package/src/parser/preprocess/whitespace.ts +111 -0
package/src/parser/rules/block/align.ts +282 -0
package/src/parser/rules/block/bibliography.ts +359 -0
package/src/parser/rules/block/block-list.ts +689 -0
package/src/parser/rules/block/blockquote.ts +238 -0
package/src/parser/rules/block/center.ts +87 -0
package/src/parser/rules/block/clear-float.ts +75 -0
package/src/parser/rules/block/code.ts +187 -0
package/src/parser/rules/block/collapsible.ts +337 -0
package/src/parser/rules/block/comment.ts +73 -0
package/src/parser/rules/block/content-separator.ts +79 -0
package/src/parser/rules/block/definition-list.ts +270 -0
package/src/parser/rules/block/div.ts +400 -0
package/src/parser/rules/block/embed-block.ts +153 -0
package/src/parser/rules/block/footnoteblock.ts +200 -0
package/src/parser/rules/block/heading.ts +142 -0
package/src/parser/rules/block/horizontal-rule.ts +61 -0
package/src/parser/rules/block/html.ts +222 -0
package/src/parser/rules/block/iframe.ts +239 -0
package/src/parser/rules/block/iftags.ts +150 -0
package/src/parser/rules/block/include.ts +179 -0
package/src/parser/rules/block/index.ts +127 -0
package/src/parser/rules/block/list.ts +244 -0
package/src/parser/rules/block/math.ts +183 -0
package/src/parser/rules/block/module/backlinks/index.ts +31 -0
package/src/parser/rules/block/module/backlinks/types.ts +21 -0
package/src/parser/rules/block/module/categories/index.ts +34 -0
package/src/parser/rules/block/module/categories/types.ts +21 -0
package/src/parser/rules/block/module/css/index.ts +37 -0
package/src/parser/rules/block/module/iftags/condition.ts +109 -0
package/src/parser/rules/block/module/iftags/index.ts +26 -0
package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
package/src/parser/rules/block/module/iftags/types.ts +63 -0
package/src/parser/rules/block/module/include/index.ts +20 -0
package/src/parser/rules/block/module/include/resolve.ts +556 -0
package/src/parser/rules/block/module/index.ts +122 -0
package/src/parser/rules/block/module/join/index.ts +34 -0
package/src/parser/rules/block/module/join/types.ts +23 -0
package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
package/src/parser/rules/block/module/listpages/extract.ts +410 -0
package/src/parser/rules/block/module/listpages/index.ts +83 -0
package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
package/src/parser/rules/block/module/listpages/parser.ts +106 -0
package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
package/src/parser/rules/block/module/listpages/types.ts +513 -0
package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
package/src/parser/rules/block/module/listusers/extract.ts +45 -0
package/src/parser/rules/block/module/listusers/index.ts +36 -0
package/src/parser/rules/block/module/listusers/parser.ts +54 -0
package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
package/src/parser/rules/block/module/listusers/types.ts +93 -0
package/src/parser/rules/block/module/mapping.ts +61 -0
package/src/parser/rules/block/module/page-tree/index.ts +38 -0
package/src/parser/rules/block/module/page-tree/types.ts +29 -0
package/src/parser/rules/block/module/rate/index.ts +28 -0
package/src/parser/rules/block/module/rate/types.ts +19 -0
package/src/parser/rules/block/module/resolve.ts +411 -0
package/src/parser/rules/block/module/types-common.ts +59 -0
package/src/parser/rules/block/module/types.ts +61 -0
package/src/parser/rules/block/module/utils.ts +43 -0
package/src/parser/rules/block/module/walk.ts +380 -0
package/src/parser/rules/block/module.ts +164 -0
package/src/parser/rules/block/orphan-li.ts +177 -0
package/src/parser/rules/block/paragraph.ts +157 -0
package/src/parser/rules/block/table-block.ts +726 -0
package/src/parser/rules/block/table.ts +441 -0
package/src/parser/rules/block/tabview.ts +331 -0
package/src/parser/rules/block/toc.ts +129 -0
package/src/parser/rules/block/utils.ts +615 -0
package/src/parser/rules/index.ts +49 -0
package/src/parser/rules/inline/anchor-name.ts +154 -0
package/src/parser/rules/inline/anchor.ts +327 -0
package/src/parser/rules/inline/bibcite.ts +153 -0
package/src/parser/rules/inline/bold.ts +86 -0
package/src/parser/rules/inline/color.ts +140 -0
package/src/parser/rules/inline/comment.ts +90 -0
package/src/parser/rules/inline/equation-ref.ts +115 -0
package/src/parser/rules/inline/expr.ts +526 -0
package/src/parser/rules/inline/footnote.ts +223 -0
package/src/parser/rules/inline/guillemet.ts +64 -0
package/src/parser/rules/inline/html.ts +132 -0
package/src/parser/rules/inline/image.ts +328 -0
package/src/parser/rules/inline/index.ts +150 -0
package/src/parser/rules/inline/italic.ts +74 -0
package/src/parser/rules/inline/line-break.ts +326 -0
package/src/parser/rules/inline/link-anchor.ts +147 -0
package/src/parser/rules/inline/link-single.ts +164 -0
package/src/parser/rules/inline/link-star.ts +134 -0
package/src/parser/rules/inline/link-triple.ts +267 -0
package/src/parser/rules/inline/math-inline.ts +126 -0
package/src/parser/rules/inline/monospace.ts +78 -0
package/src/parser/rules/inline/raw.ts +262 -0
package/src/parser/rules/inline/size.ts +244 -0
package/src/parser/rules/inline/span.ts +424 -0
package/src/parser/rules/inline/strikethrough.ts +115 -0
package/src/parser/rules/inline/subscript.ts +84 -0
package/src/parser/rules/inline/superscript.ts +84 -0
package/src/parser/rules/inline/text.ts +84 -0
package/src/parser/rules/inline/underline.ts +127 -0
package/src/parser/rules/inline/user.ts +147 -0
package/src/parser/rules/inline/utils.ts +344 -0
package/src/parser/rules/types.ts +252 -0
package/src/parser/rules/utils.ts +155 -0
package/src/parser/toc.ts +130 -0

package/src/parser/rules/inline/footnote.ts ADDED Viewed

@@ -0,0 +1,223 @@
+/**
+ *
+ * Parses the Wikidot footnote syntax: `[[footnote]]content[[/footnote]]`.
+ *
+ * Footnotes work in two parts: the inline `[[footnote]]` block produces
+ * a numbered superscript reference marker at the point of use, while the
+ * actual footnote content is collected separately and rendered by a
+ * `[[footnoteblock]]` element (typically at the bottom of the page).
+ *
+ * Footnote content supports multiple paragraphs:
+ * - The first paragraph is rendered as inline content (no wrapping `<p>` tag)
+ * - Subsequent paragraphs (separated by blank lines) are each wrapped
+ *   in `<p>` tags, matching Wikidot's rendering behavior
+ * - Single newlines within a paragraph become `<br />` elements
+ *
+ * The parsed footnote content is pushed into `ctx.footnotes` (an array
+ * of Element arrays) so the renderer can later assign sequential numbers
+ * and generate the footnote block.
+ *
+ * Produces a simple `"footnote"` AST element (a marker with no data)
+ * at the inline reference point.
+ *
+ * @module
+ */
+import type { Element } from "@wdprlib/ast";
+import type { InlineRule, ParseContext, RuleResult } from "../types";
+import { currentToken } from "../types";
+import { parseBlockName } from "../utils";
+import { parseInlineUntil } from "./utils";
+/**
+ * Inline rule for parsing `[[footnote]]content[[/footnote]]`.
+ *
+ * Triggered by a `BLOCK_OPEN` (`[[`) token. Verifies the block name
+ * is `footnote`, then parses multiline inline content until the matching
+ * `[[/footnote]]` closing tag is found.
+ *
+ * Side effect: appends the parsed footnote content to `ctx.footnotes`.
+ */
+export const footnoteRule: InlineRule = {
+  name: "footnote",
+  startTokens: ["BLOCK_OPEN"],
+  /**
+   * Attempts to parse a footnote block at the current position.
+   *
+   * @param ctx - Parse context with token stream and current position
+   * @returns A successful result with a `"footnote"` marker element,
+   *          or `{ success: false }` if this is not a valid footnote
+   */
+  parse(ctx: ParseContext): RuleResult<Element> {
+    const openToken = currentToken(ctx);
+    if (openToken.type !== "BLOCK_OPEN") {
+      return { success: false };
+    }
+    let pos = ctx.pos + 1;
+    let consumed = 1;
+    // Parse block name
+    const nameResult = parseBlockName(ctx, pos);
+    if (!nameResult) {
+      return { success: false };
+    }
+    const blockName = nameResult.name;
+    if (blockName !== "footnote") {
+      return { success: false };
+    }
+    pos += nameResult.consumed;
+    consumed += nameResult.consumed;
+    // Expect ]]
+    while (ctx.tokens[pos]?.type === "WHITESPACE") {
+      pos++;
+      consumed++;
+    }
+    if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
+      return { success: false };
+    }
+    pos++;
+    consumed++;
+    // Parse content until [[/footnote]]
+    // Wikidot footnote behavior:
+    // - First paragraph: inline content (no <p> tag)
+    // - After blank line: content wrapped in <p> tag
+    const paragraphs: Element[][] = [[]];
+    let currentParagraph = 0;
+    let foundClose = false;
+    while (pos < ctx.tokens.length) {
+      const token = ctx.tokens[pos];
+      if (!token || token.type === "EOF") {
+        break;
+      }
+      // Check for [[/footnote]]
+      if (token.type === "BLOCK_END_OPEN") {
+        const closeNameResult = parseBlockName(ctx, pos + 1);
+        if (closeNameResult && closeNameResult.name === "footnote") {
+          foundClose = true;
+          // Skip [[/footnote]]
+          pos++; // [[/
+          consumed++;
+          pos += closeNameResult.consumed; // footnote
+          consumed += closeNameResult.consumed;
+          // Skip whitespace
+          while (ctx.tokens[pos]?.type === "WHITESPACE") {
+            pos++;
+            consumed++;
+          }
+          // Skip ]]
+          if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
+            pos++;
+            consumed++;
+          }
+          break;
+        }
+      }
+      // Handle NEWLINE - check if it's a paragraph break (blank line)
+      if (token.type === "NEWLINE") {
+        pos++;
+        consumed++;
+        // Peek ahead: skip whitespace to check for blank line (e.g. "\n  \n")
+        let peekPos = pos;
+        let peekConsumed = 0;
+        while (ctx.tokens[peekPos]?.type === "WHITESPACE") {
+          peekPos++;
+          peekConsumed++;
+        }
+        // Look ahead for another NEWLINE (blank line = paragraph break)
+        if (ctx.tokens[peekPos]?.type === "NEWLINE") {
+          // Commit the whitespace skip
+          pos = peekPos;
+          consumed += peekConsumed;
+          // Skip all consecutive newlines
+          while (ctx.tokens[pos]?.type === "NEWLINE") {
+            pos++;
+            consumed++;
+          }
+          // Start new paragraph
+          currentParagraph++;
+          paragraphs[currentParagraph] = [];
+        } else {
+          // Single newline - just continue (becomes space or line-break)
+          // For Wikidot compatibility, single newlines in footnotes become <br />
+          paragraphs[currentParagraph]!.push({ element: "line-break" });
+        }
+        continue;
+      }
+      // Parse inline content (including newlines for multiline footnotes)
+      const inlineCtx: ParseContext = { ...ctx, pos };
+      const inlineResult = parseInlineUntil(inlineCtx, "BLOCK_END_OPEN");
+      if (inlineResult.elements.length > 0) {
+        paragraphs[currentParagraph]!.push(...inlineResult.elements);
+        pos += inlineResult.consumed;
+        consumed += inlineResult.consumed;
+      } else {
+        // Fallback: just add as text
+        paragraphs[currentParagraph]!.push({ element: "text", data: token.value });
+        pos++;
+        consumed++;
+      }
+    }
+    // Build children: first paragraph inline, subsequent paragraphs wrapped in <p>
+    const children: Element[] = [];
+    for (let i = 0; i < paragraphs.length; i++) {
+      const para = paragraphs[i]!;
+      if (para.length === 0) continue;
+      // Remove leading/trailing line-breaks
+      while (para.length > 0 && para[0]?.element === "line-break") {
+        para.shift();
+      }
+      while (para.length > 0 && para[para.length - 1]?.element === "line-break") {
+        para.pop();
+      }
+      if (para.length === 0) continue;
+      if (i === 0) {
+        // First paragraph: inline
+        children.push(...para);
+      } else {
+        // Subsequent paragraphs: wrapped in <p>
+        children.push({
+          element: "container",
+          data: {
+            type: "paragraph",
+            attributes: {},
+            elements: para,
+          },
+        });
+      }
+    }
+    if (!foundClose) {
+      ctx.diagnostics.push({
+        severity: "warning",
+        code: "unclosed-block",
+        message: "Missing closing tag [[/footnote]] for [[footnote]]",
+        position: openToken.position,
+      });
+    }
+    // Store footnote content in context
+    ctx.footnotes.push(children);
+    // Return simple footnote marker
+    return {
+      success: true,
+      elements: [
+        {
+          element: "footnote",
+        },
+      ],
+      consumed,
+    };
+  },
+};

package/src/parser/rules/inline/guillemet.ts ADDED Viewed

@@ -0,0 +1,64 @@
+/**
+ *
+ * Parses Wikidot's guillemet (angle quotation mark) syntax.
+ *
+ * Converts ASCII double-angle-bracket sequences into their Unicode
+ * typographic equivalents:
+ * - `<<` becomes `\u00AB` (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK)
+ * - `>>` becomes `\u00BB` (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK)
+ *
+ * These typographic characters are commonly used in European languages
+ * (particularly French and Russian) as quotation marks. Wikidot provides
+ * this shorthand so authors do not need to type the Unicode characters
+ * directly.
+ *
+ * Produces a `"text"` AST element containing the Unicode character.
+ *
+ * @module
+ */
+import type { Element } from "@wdprlib/ast";
+import type { InlineRule, ParseContext, RuleResult } from "../types";
+/**
+ * Inline rule for converting `<<` and `>>` to typographic guillemets.
+ *
+ * Triggered by `LEFT_DOUBLE_ANGLE` or `RIGHT_DOUBLE_ANGLE` tokens.
+ * This is a simple one-to-one token replacement with no content
+ * parsing or nesting.
+ */
+export const guillemetRule: InlineRule = {
+  name: "guillemet",
+  startTokens: ["LEFT_DOUBLE_ANGLE", "RIGHT_DOUBLE_ANGLE"],
+  /**
+   * Converts a double-angle-bracket token to its Unicode guillemet equivalent.
+   *
+   * @param ctx - Parse context with token stream and current position
+   * @returns A successful result with a `"text"` element containing the
+   *          Unicode guillemet character, or `{ success: false }` if the
+   *          token is neither `<<` nor `>>`
+   */
+  parse(ctx: ParseContext): RuleResult<Element> {
+    const token = ctx.tokens[ctx.pos];
+    // << → «
+    if (token?.type === "LEFT_DOUBLE_ANGLE") {
+      return {
+        success: true,
+        elements: [{ element: "text", data: "\u00AB" }],
+        consumed: 1,
+      };
+    }
+    // >> → »
+    if (token?.type === "RIGHT_DOUBLE_ANGLE") {
+      return {
+        success: true,
+        elements: [{ element: "text", data: "\u00BB" }],
+        consumed: 1,
+      };
+    }
+    return { success: false };
+  },
+};

package/src/parser/rules/inline/html.ts ADDED Viewed

@@ -0,0 +1,132 @@
+/**
+ *
+ * Inline-position gate for `[[html]]...[[/html]]` when the parser is
+ * configured with `allowHtmlBlocks: false`.
+ *
+ * The block-level {@link htmlBlockRule} already removes `[[html]]` blocks
+ * that sit at the start of a line, but the block dispatcher never
+ * reaches a `[[html]]` that appears mid-paragraph. Without this inline
+ * rule, the body of a disabled-but-inline-positioned `[[html]]` would
+ * end up parsed as paragraph text and leak into the output as escaped
+ * HTML.
+ *
+ * When enabled (`allowHtmlBlocks !== false`), the rule does nothing
+ * (returns `success: false`) so the existing paragraph behaviour is
+ * preserved: a stray inline `[[html]]` renders as text. The block-level
+ * rule handles the proper case where `[[html]]` is on its own line.
+ *
+ * When disabled (`allowHtmlBlocks === false`):
+ * - A well-formed `[[html ...]]...[[/html]]` is fully consumed and
+ *   produces no AST element, emitting an `html-block-disabled` info
+ *   diagnostic.
+ * - An unclosed `[[html ...]]` is consumed to the end of the token
+ *   stream so the body cannot leak as inline text, emitting both
+ *   `unclosed-block` (warning) and `html-block-disabled` (info).
+ *
+ * @module
+ */
+import type { Element } from "@wdprlib/ast";
+import type { InlineRule, ParseContext, RuleResult } from "../types";
+import { currentToken } from "../types";
+import { parseBlockName, parseAttributesRaw } from "../block/utils";
+import { lookaheadHasHtmlClose } from "../block/html";
+/**
+ * Inline rule that gates `[[html]]` when the setting disallows it.
+ */
+export const htmlInlineRule: InlineRule = {
+  name: "html",
+  startTokens: ["BLOCK_OPEN"],
+  parse(ctx: ParseContext): RuleResult<Element> {
+    const openToken = currentToken(ctx);
+    if (openToken.type !== "BLOCK_OPEN") {
+      return { success: false };
+    }
+    let pos = ctx.pos + 1;
+    let consumed = 1;
+    const nameResult = parseBlockName(ctx, pos);
+    if (!nameResult || nameResult.name.toLowerCase() !== "html") {
+      return { success: false };
+    }
+    pos += nameResult.consumed;
+    consumed += nameResult.consumed;
+    const attrResult = parseAttributesRaw(ctx, pos);
+    pos += attrResult.consumed;
+    consumed += attrResult.consumed;
+    if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
+      return { success: false };
+    }
+    pos++;
+    consumed++;
+    // Enabled: leave inline `[[html]]` alone — it falls through to text
+    // rendering, matching the historical behaviour for stray block-named
+    // openers used inline.
+    if (ctx.settings.allowHtmlBlocks !== false) {
+      return { success: false };
+    }
+    // Disabled path: consume the body until a real `[[/html]]` (BLOCK_END_OPEN
+    // + name + BLOCK_CLOSE, allowing whitespace inside the close tag).
+    // Only allow the blank-line stop when no real close exists ahead, so
+    // a closed body that spans paragraphs is still consumed correctly.
+    const hasCloseAhead = lookaheadHasHtmlClose(ctx, pos);
+    let foundClose = false;
+    while (pos < ctx.tokens.length) {
+      const token = ctx.tokens[pos];
+      if (!token || token.type === "EOF") break;
+      // Stop at a blank line so an unclosed inline `[[html]]` does not
+      // swallow subsequent paragraphs.
+      if (!hasCloseAhead && token.type === "NEWLINE" && ctx.tokens[pos + 1]?.type === "NEWLINE") {
+        break;
+      }
+      if (token.type === "BLOCK_END_OPEN") {
+        const closeNameResult = parseBlockName(ctx, pos + 1);
+        if (closeNameResult?.name.toLowerCase() === "html") {
+          let checkPos = pos + 1 + closeNameResult.consumed;
+          while (ctx.tokens[checkPos]?.type === "WHITESPACE") checkPos++;
+          if (ctx.tokens[checkPos]?.type === "BLOCK_CLOSE") {
+            foundClose = true;
+            // Consume `[[/html]]` (and optional trailing newline) too.
+            consumed += checkPos - pos + 1;
+            pos = checkPos + 1;
+            if (ctx.tokens[pos]?.type === "NEWLINE") {
+              pos++;
+              consumed++;
+            }
+            break;
+          }
+        }
+      }
+      pos++;
+      consumed++;
+    }
+    if (!foundClose) {
+      ctx.diagnostics.push({
+        severity: "warning",
+        code: "unclosed-block",
+        message: "Missing closing tag [[/html]] for [[html]]",
+        position: openToken.position,
+      });
+    }
+    ctx.diagnostics.push({
+      severity: "info",
+      code: "html-block-disabled",
+      message: "[[html]] block ignored: disabled by settings",
+      position: openToken.position,
+    });
+    return { success: true, elements: [], consumed };
+  },
+};