npm - @wdprlib/parser - Versions diffs - 3.1.1 → 3.2.0 - Mend

@wdprlib/parser 3.1.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

package/dist/index.cjs +312 -121
package/dist/index.js +289 -98
package/package.json +5 -3
package/src/index.ts +163 -0
package/src/lexer/index.ts +20 -0
package/src/lexer/lexer.ts +687 -0
package/src/lexer/tokens.ts +141 -0
package/src/parser/constants.ts +173 -0
package/src/parser/depth.ts +251 -0
package/src/parser/index.ts +18 -0
package/src/parser/parse.ts +315 -0
package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
package/src/parser/postprocess/index.ts +15 -0
package/src/parser/postprocess/spanStrip.ts +697 -0
package/src/parser/preprocess/expr.ts +265 -0
package/src/parser/preprocess/index.ts +38 -0
package/src/parser/preprocess/typography.ts +67 -0
package/src/parser/preprocess/utils.ts +250 -0
package/src/parser/preprocess/whitespace.ts +111 -0
package/src/parser/rules/block/align.ts +282 -0
package/src/parser/rules/block/bibliography.ts +359 -0
package/src/parser/rules/block/block-list.ts +689 -0
package/src/parser/rules/block/blockquote.ts +238 -0
package/src/parser/rules/block/center.ts +87 -0
package/src/parser/rules/block/clear-float.ts +75 -0
package/src/parser/rules/block/code.ts +187 -0
package/src/parser/rules/block/collapsible.ts +337 -0
package/src/parser/rules/block/comment.ts +73 -0
package/src/parser/rules/block/content-separator.ts +79 -0
package/src/parser/rules/block/definition-list.ts +270 -0
package/src/parser/rules/block/div.ts +400 -0
package/src/parser/rules/block/embed-block.ts +153 -0
package/src/parser/rules/block/footnoteblock.ts +200 -0
package/src/parser/rules/block/heading.ts +142 -0
package/src/parser/rules/block/horizontal-rule.ts +61 -0
package/src/parser/rules/block/html.ts +222 -0
package/src/parser/rules/block/iframe.ts +239 -0
package/src/parser/rules/block/iftags.ts +150 -0
package/src/parser/rules/block/include.ts +179 -0
package/src/parser/rules/block/index.ts +127 -0
package/src/parser/rules/block/list.ts +244 -0
package/src/parser/rules/block/math.ts +183 -0
package/src/parser/rules/block/module/backlinks/index.ts +31 -0
package/src/parser/rules/block/module/backlinks/types.ts +21 -0
package/src/parser/rules/block/module/categories/index.ts +34 -0
package/src/parser/rules/block/module/categories/types.ts +21 -0
package/src/parser/rules/block/module/css/index.ts +37 -0
package/src/parser/rules/block/module/iftags/condition.ts +109 -0
package/src/parser/rules/block/module/iftags/index.ts +26 -0
package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
package/src/parser/rules/block/module/iftags/types.ts +63 -0
package/src/parser/rules/block/module/include/index.ts +20 -0
package/src/parser/rules/block/module/include/resolve.ts +556 -0
package/src/parser/rules/block/module/index.ts +122 -0
package/src/parser/rules/block/module/join/index.ts +34 -0
package/src/parser/rules/block/module/join/types.ts +23 -0
package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
package/src/parser/rules/block/module/listpages/extract.ts +410 -0
package/src/parser/rules/block/module/listpages/index.ts +83 -0
package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
package/src/parser/rules/block/module/listpages/parser.ts +106 -0
package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
package/src/parser/rules/block/module/listpages/types.ts +513 -0
package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
package/src/parser/rules/block/module/listusers/extract.ts +45 -0
package/src/parser/rules/block/module/listusers/index.ts +36 -0
package/src/parser/rules/block/module/listusers/parser.ts +54 -0
package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
package/src/parser/rules/block/module/listusers/types.ts +93 -0
package/src/parser/rules/block/module/mapping.ts +61 -0
package/src/parser/rules/block/module/page-tree/index.ts +38 -0
package/src/parser/rules/block/module/page-tree/types.ts +29 -0
package/src/parser/rules/block/module/rate/index.ts +28 -0
package/src/parser/rules/block/module/rate/types.ts +19 -0
package/src/parser/rules/block/module/resolve.ts +411 -0
package/src/parser/rules/block/module/types-common.ts +59 -0
package/src/parser/rules/block/module/types.ts +61 -0
package/src/parser/rules/block/module/utils.ts +43 -0
package/src/parser/rules/block/module/walk.ts +380 -0
package/src/parser/rules/block/module.ts +164 -0
package/src/parser/rules/block/orphan-li.ts +177 -0
package/src/parser/rules/block/paragraph.ts +157 -0
package/src/parser/rules/block/table-block.ts +726 -0
package/src/parser/rules/block/table.ts +441 -0
package/src/parser/rules/block/tabview.ts +331 -0
package/src/parser/rules/block/toc.ts +129 -0
package/src/parser/rules/block/utils.ts +615 -0
package/src/parser/rules/index.ts +49 -0
package/src/parser/rules/inline/anchor-name.ts +154 -0
package/src/parser/rules/inline/anchor.ts +327 -0
package/src/parser/rules/inline/bibcite.ts +153 -0
package/src/parser/rules/inline/bold.ts +86 -0
package/src/parser/rules/inline/color.ts +140 -0
package/src/parser/rules/inline/comment.ts +90 -0
package/src/parser/rules/inline/equation-ref.ts +115 -0
package/src/parser/rules/inline/expr.ts +526 -0
package/src/parser/rules/inline/footnote.ts +223 -0
package/src/parser/rules/inline/guillemet.ts +64 -0
package/src/parser/rules/inline/html.ts +132 -0
package/src/parser/rules/inline/image.ts +328 -0
package/src/parser/rules/inline/index.ts +150 -0
package/src/parser/rules/inline/italic.ts +74 -0
package/src/parser/rules/inline/line-break.ts +326 -0
package/src/parser/rules/inline/link-anchor.ts +147 -0
package/src/parser/rules/inline/link-single.ts +164 -0
package/src/parser/rules/inline/link-star.ts +134 -0
package/src/parser/rules/inline/link-triple.ts +267 -0
package/src/parser/rules/inline/math-inline.ts +126 -0
package/src/parser/rules/inline/monospace.ts +78 -0
package/src/parser/rules/inline/raw.ts +262 -0
package/src/parser/rules/inline/size.ts +244 -0
package/src/parser/rules/inline/span.ts +424 -0
package/src/parser/rules/inline/strikethrough.ts +115 -0
package/src/parser/rules/inline/subscript.ts +84 -0
package/src/parser/rules/inline/superscript.ts +84 -0
package/src/parser/rules/inline/text.ts +84 -0
package/src/parser/rules/inline/underline.ts +127 -0
package/src/parser/rules/inline/user.ts +147 -0
package/src/parser/rules/inline/utils.ts +344 -0
package/src/parser/rules/types.ts +252 -0
package/src/parser/rules/utils.ts +155 -0
package/src/parser/toc.ts +130 -0

package/src/parser/preprocess/whitespace.ts ADDED Viewed

@@ -0,0 +1,111 @@
+/**
+ *
+ * Whitespace normalization preprocessing for Wikidot markup.
+ *
+ * This module ensures the lexer and parser receive input with consistent
+ * whitespace conventions. It handles platform differences (DOS/Mac newlines),
+ * normalizes exotic whitespace characters that users may paste from external
+ * sources, and applies Wikidot-specific behaviors like backslash line continuation.
+ *
+ * Substitutions are applied in a deliberate order:
+ * 1. Newline normalization (DOS `\r\n` and legacy Mac `\r` to Unix `\n`)
+ * 2. Non-standard leading whitespace replacement (nbsp, figure space to regular space)
+ * 3. Whitespace-only line stripping (collapse to empty lines)
+ * 4. Backslash line continuation (`\\\n` to line-break marker U+E000)
+ * 5. Tab expansion (tab to four spaces)
+ * 6. Null character replacement (NUL to space)
+ * 7. Leading/trailing newline removal
+ *
+ * @module
+ */
+/**
+ * Matches non-standard whitespace characters (non-breaking space U+00A0,
+ * figure space U+2007) at the start of lines. These are replaced with
+ * regular ASCII spaces so the parser's indentation logic works correctly.
+ */
+const LEADING_NONSTANDARD_WHITESPACE = /^[\u00a0\u2007]+/gm;
+/** Matches lines containing only whitespace (collapsed to empty lines). */
+const WHITESPACE_ONLY_LINE = /^\s+$/gm;
+/** Matches one or more newlines at the very start of the text. */
+const LEADING_NEWLINES = /^\n+/;
+/** Matches one or more newlines at the very end of the text. */
+const TRAILING_NEWLINES = /\n+$/;
+/** Matches DOS (`\r\n`) and legacy Mac (`\r`) line endings. */
+const DOS_MAC_NEWLINES = /\r\n?/g;
+/**
+ * Matches a backslash immediately followed by a newline.
+ * In Wikidot, `\` at end of line acts as an explicit line break (`<br />`).
+ */
+const CONCAT_LINES = /\\\n/g;
+/** Matches tab characters (expanded to four spaces). */
+const TABS = /\t/g;
+/** Matches null (NUL) characters (replaced with spaces). */
+const NULL_CHARS = /\0/g;
+/**
+ * Replace non-standard whitespace characters at the start of each line
+ * with the same number of regular ASCII spaces.
+ *
+ * This ensures indentation-sensitive constructs (like nested lists) work
+ * correctly regardless of whether the user typed regular spaces, non-breaking
+ * spaces, or figure spaces.
+ *
+ * @param text - Input text with potentially non-standard leading whitespace
+ * @returns Text with leading non-standard whitespace replaced by ASCII spaces
+ */
+function replaceLeadingSpaces(text: string): string {
+  return text.replace(LEADING_NONSTANDARD_WHITESPACE, (match) => {
+    return " ".repeat(match.length);
+  });
+}
+/**
+ * Apply all whitespace normalization substitutions to the given text.
+ *
+ * Substitutions are applied in a specific order that avoids interference
+ * between steps (e.g., DOS newlines must be normalized before backslash
+ * continuation can be detected).
+ *
+ * The backslash continuation step converts `\\\n` to the Private Use Area
+ * character U+E000, which the lexer later recognizes as an explicit line break.
+ * This approach avoids ambiguity with other uses of the backslash character.
+ *
+ * @param text - Raw input text
+ * @returns Text with normalized whitespace, ready for typography preprocessing
+ */
+export function substitute(text: string): string {
+  let result = text;
+  // Replace DOS and Mac newlines
+  result = result.replace(DOS_MAC_NEWLINES, "\n");
+  // Replace leading non-standard spaces with regular spaces
+  result = replaceLeadingSpaces(result);
+  // Strip lines with only whitespace
+  result = result.replace(WHITESPACE_ONLY_LINE, "");
+  // Backslash at end of line → line break marker (U+E000)
+  // Wikidot treats \ at end of line as <br />
+  result = result.replace(CONCAT_LINES, String.fromCharCode(0xe000));
+  // Tabs to spaces
+  result = result.replace(TABS, "    ");
+  // Null characters to spaces
+  result = result.replace(NULL_CHARS, " ");
+  // Remove leading and trailing newlines
+  result = result.replace(LEADING_NEWLINES, "");
+  result = result.replace(TRAILING_NEWLINES, "");
+  return result;
+}

package/src/parser/rules/block/align.ts ADDED Viewed

@@ -0,0 +1,282 @@
+/**
+ *
+ * Block rule for Wikidot alignment containers.
+ *
+ * Wikidot provides a shorthand bracket syntax for wrapping content in a
+ * directional alignment container:
+ *
+ * ```
+ * [[>]]        ... [[/>]]        right-aligned
+ * [[<]]        ... [[/<]]        left-aligned
+ * [[=]]        ... [[/=]]        center-aligned
+ * [[==]]       ... [[/==]]       justify-aligned
+ * ```
+ *
+ * Each pair acts as a block-level wrapper. The opening tag must appear at
+ * the start of a line and be followed by a newline. Body content is parsed
+ * recursively as block-level markup, and the matching closing tag terminates
+ * the container.
+ *
+ * The resulting AST node is a generic container element whose `type` field
+ * carries the alignment direction (e.g. `{ align: "right" }`).
+ *
+ * @module
+ */
+import type { Element } from "@wdprlib/ast";
+import type { BlockRule, ParseContext, RuleResult } from "../types";
+import { currentToken } from "../types";
+import { parseBlocksUntil } from "./utils";
+/** The four text-alignment directions Wikidot supports. */
+type AlignDirection = "left" | "right" | "center" | "justify";
+/**
+ * Attempts to parse the interior of an align opening tag starting after
+ * the BLOCK_OPEN (`[[`) token.
+ *
+ * The function inspects the token(s) immediately following `[[` to determine
+ * which alignment direction is requested:
+ *
+ * | Tokens after `[[`    | Direction   |
+ * |----------------------|-------------|
+ * | `>` `]]`             | right       |
+ * | `<` `]]`             | left        |
+ * | `=` `]]`             | center      |
+ * | `=` `=` `]]`         | justify     |
+ *
+ * The `>` character may arrive as either a BLOCKQUOTE_MARKER (when the
+ * line starts with `[[`) or as a TEXT token (when it does not).
+ *
+ * @param ctx - Current parse context.
+ * @param pos - Token index right after the BLOCK_OPEN token.
+ * @returns The detected direction and how many tokens were consumed,
+ *          or `null` if the tokens do not form a valid align open tag.
+ */
+function parseAlignOpen(
+  ctx: ParseContext,
+  pos: number,
+): { direction: AlignDirection; consumed: number } | null {
+  const tokens = ctx.tokens;
+  // After BLOCK_OPEN, expect specific patterns
+  const firstToken = tokens[pos];
+  if (!firstToken) return null;
+  // [[>]] - right
+  if (
+    firstToken.type === "BLOCKQUOTE_MARKER" &&
+    firstToken.value === ">" &&
+    tokens[pos + 1]?.type === "BLOCK_CLOSE"
+  ) {
+    return { direction: "right", consumed: 2 };
+  }
+  // Also handle TEXT ">" for non-line-start cases
+  if (
+    firstToken.type === "TEXT" &&
+    firstToken.value === ">" &&
+    tokens[pos + 1]?.type === "BLOCK_CLOSE"
+  ) {
+    return { direction: "right", consumed: 2 };
+  }
+  // [[<]] - left (LEFT_DOUBLE_ANGLE might be tokenized, but usually it's after [[)
+  if (
+    firstToken.type === "TEXT" &&
+    firstToken.value === "<" &&
+    tokens[pos + 1]?.type === "BLOCK_CLOSE"
+  ) {
+    return { direction: "left", consumed: 2 };
+  }
+  // [[=]] - center (single =)
+  if (firstToken.type === "EQUALS" && tokens[pos + 1]?.type === "BLOCK_CLOSE") {
+    return { direction: "center", consumed: 2 };
+  }
+  // [[==]] - justify (double =)
+  if (
+    firstToken.type === "EQUALS" &&
+    tokens[pos + 1]?.type === "EQUALS" &&
+    tokens[pos + 2]?.type === "BLOCK_CLOSE"
+  ) {
+    return { direction: "justify", consumed: 3 };
+  }
+  return null;
+}
+/**
+ * Tests whether the tokens at the current position form a closing align
+ * tag (`[[/> ]]`, `[[/< ]]`, `[[/= ]]`, or `[[/== ]]`) that matches
+ * the given direction.
+ *
+ * The closing tag always starts with a BLOCK_END_OPEN token (`[[/`)
+ * followed by the same symbol(s) as the opening tag plus BLOCK_CLOSE.
+ *
+ * @param ctx       - Current parse context (reads from `ctx.pos`).
+ * @param direction - The alignment direction of the currently open block,
+ *                    used to select the expected closing pattern.
+ * @returns An object with `match` (whether the close tag was found) and
+ *          `consumed` (number of tokens the closing tag occupies).
+ */
+function isAlignClose(
+  ctx: ParseContext,
+  direction: AlignDirection,
+): { match: boolean; consumed: number } {
+  const tokens = ctx.tokens;
+  let pos = ctx.pos;
+  if (tokens[pos]?.type !== "BLOCK_END_OPEN") {
+    return { match: false, consumed: 0 };
+  }
+  pos++;
+  // [[/>]] - right
+  if (direction === "right") {
+    if (
+      (tokens[pos]?.type === "BLOCKQUOTE_MARKER" || tokens[pos]?.type === "TEXT") &&
+      tokens[pos]?.value === ">" &&
+      tokens[pos + 1]?.type === "BLOCK_CLOSE"
+    ) {
+      return { match: true, consumed: 3 };
+    }
+  }
+  // [[/<]] - left
+  if (direction === "left") {
+    if (
+      tokens[pos]?.type === "TEXT" &&
+      tokens[pos]?.value === "<" &&
+      tokens[pos + 1]?.type === "BLOCK_CLOSE"
+    ) {
+      return { match: true, consumed: 3 };
+    }
+  }
+  // [[/=]] - center
+  if (direction === "center") {
+    if (tokens[pos]?.type === "EQUALS" && tokens[pos + 1]?.type === "BLOCK_CLOSE") {
+      return { match: true, consumed: 3 };
+    }
+  }
+  // [[/==]] - justify
+  if (direction === "justify") {
+    if (
+      tokens[pos]?.type === "EQUALS" &&
+      tokens[pos + 1]?.type === "EQUALS" &&
+      tokens[pos + 2]?.type === "BLOCK_CLOSE"
+    ) {
+      return { match: true, consumed: 4 };
+    }
+  }
+  return { match: false, consumed: 0 };
+}
+/**
+ * Block rule that matches Wikidot directional alignment containers.
+ *
+ * Parsing strategy:
+ * 1. Verify the first token is BLOCK_OPEN at line start.
+ * 2. Delegate to `parseAlignOpen()` to identify direction and consume
+ *    the opening tag interior.
+ * 3. Require a NEWLINE immediately after the opening tag.
+ * 4. Recursively parse body blocks via `parseBlocksUntil()`, stopping
+ *    when `isAlignClose()` finds the matching closing tag.
+ * 5. Consume the closing tag and optional trailing newline.
+ * 6. Emit a container element with `type: { align: direction }`.
+ *
+ * `preservesPrecedingLineBreak` is `true` because, unlike most block
+ * constructs, an alignment block does not suppress a preceding `\n` from
+ * becoming a `<br />` in Wikidot's output.
+ */
+export const alignRule: BlockRule = {
+  name: "align",
+  startTokens: ["BLOCK_OPEN"],
+  requiresLineStart: true,
+  preservesPrecedingLineBreak: true,
+  isStartPattern(ctx: ParseContext, pos: number): boolean {
+    if (ctx.tokens[pos]?.type !== "BLOCK_OPEN") return false;
+    return parseAlignOpen(ctx, pos + 1) !== null;
+  },
+  parse(ctx: ParseContext): RuleResult<Element> {
+    const openToken = currentToken(ctx);
+    if (openToken.type !== "BLOCK_OPEN") {
+      return { success: false };
+    }
+    let pos = ctx.pos + 1;
+    let consumed = 1;
+    // Parse align open syntax
+    const alignResult = parseAlignOpen(ctx, pos);
+    if (!alignResult) {
+      return { success: false };
+    }
+    const { direction } = alignResult;
+    pos += alignResult.consumed;
+    consumed += alignResult.consumed;
+    // Must be followed by newline
+    if (ctx.tokens[pos]?.type !== "NEWLINE") {
+      return { success: false };
+    }
+    pos++;
+    consumed++;
+    // Close condition
+    const closeCondition = (checkCtx: ParseContext): boolean => {
+      return isAlignClose(checkCtx, direction).match;
+    };
+    // Parse body
+    const bodyCtx: ParseContext = { ...ctx, pos };
+    const bodyResult = parseBlocksUntil(bodyCtx, closeCondition);
+    consumed += bodyResult.consumed;
+    pos += bodyResult.consumed;
+    // Check for missing close tag
+    const directionSymbol = { left: "<", right: ">", center: "=", justify: "==" }[direction];
+    const closeCheck = isAlignClose({ ...ctx, pos }, direction);
+    if (!closeCheck.match) {
+      ctx.diagnostics.push({
+        severity: "warning",
+        code: "unclosed-block",
+        message: `Missing closing tag [[/${directionSymbol}]] for [[${directionSymbol}]]`,
+        position: openToken.position,
+      });
+    }
+    // Consume closing tag
+    if (closeCheck.match) {
+      consumed += closeCheck.consumed;
+      pos += closeCheck.consumed;
+      // Consume trailing newline
+      if (ctx.tokens[pos]?.type === "NEWLINE") {
+        pos++;
+        consumed++;
+      }
+    }
+    return {
+      success: true,
+      elements: [
+        {
+          element: "container",
+          data: {
+            type: { align: direction },
+            attributes: {},
+            elements: bodyResult.elements,
+          },
+        },
+      ],
+      consumed,
+    };
+  },
+};