npm - @wdprlib/parser - Versions diffs - 3.1.2 → 3.2.0 - Mend

@wdprlib/parser 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

package/dist/index.cjs +295 -118
package/dist/index.js +272 -95
package/package.json +5 -3
package/src/index.ts +163 -0
package/src/lexer/index.ts +20 -0
package/src/lexer/lexer.ts +687 -0
package/src/lexer/tokens.ts +141 -0
package/src/parser/constants.ts +173 -0
package/src/parser/depth.ts +251 -0
package/src/parser/index.ts +18 -0
package/src/parser/parse.ts +315 -0
package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
package/src/parser/postprocess/index.ts +15 -0
package/src/parser/postprocess/spanStrip.ts +697 -0
package/src/parser/preprocess/expr.ts +265 -0
package/src/parser/preprocess/index.ts +38 -0
package/src/parser/preprocess/typography.ts +67 -0
package/src/parser/preprocess/utils.ts +250 -0
package/src/parser/preprocess/whitespace.ts +111 -0
package/src/parser/rules/block/align.ts +282 -0
package/src/parser/rules/block/bibliography.ts +359 -0
package/src/parser/rules/block/block-list.ts +689 -0
package/src/parser/rules/block/blockquote.ts +238 -0
package/src/parser/rules/block/center.ts +87 -0
package/src/parser/rules/block/clear-float.ts +75 -0
package/src/parser/rules/block/code.ts +187 -0
package/src/parser/rules/block/collapsible.ts +337 -0
package/src/parser/rules/block/comment.ts +73 -0
package/src/parser/rules/block/content-separator.ts +79 -0
package/src/parser/rules/block/definition-list.ts +270 -0
package/src/parser/rules/block/div.ts +400 -0
package/src/parser/rules/block/embed-block.ts +153 -0
package/src/parser/rules/block/footnoteblock.ts +200 -0
package/src/parser/rules/block/heading.ts +142 -0
package/src/parser/rules/block/horizontal-rule.ts +61 -0
package/src/parser/rules/block/html.ts +222 -0
package/src/parser/rules/block/iframe.ts +239 -0
package/src/parser/rules/block/iftags.ts +150 -0
package/src/parser/rules/block/include.ts +179 -0
package/src/parser/rules/block/index.ts +127 -0
package/src/parser/rules/block/list.ts +244 -0
package/src/parser/rules/block/math.ts +183 -0
package/src/parser/rules/block/module/backlinks/index.ts +31 -0
package/src/parser/rules/block/module/backlinks/types.ts +21 -0
package/src/parser/rules/block/module/categories/index.ts +34 -0
package/src/parser/rules/block/module/categories/types.ts +21 -0
package/src/parser/rules/block/module/css/index.ts +37 -0
package/src/parser/rules/block/module/iftags/condition.ts +109 -0
package/src/parser/rules/block/module/iftags/index.ts +26 -0
package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
package/src/parser/rules/block/module/iftags/types.ts +63 -0
package/src/parser/rules/block/module/include/index.ts +20 -0
package/src/parser/rules/block/module/include/resolve.ts +556 -0
package/src/parser/rules/block/module/index.ts +122 -0
package/src/parser/rules/block/module/join/index.ts +34 -0
package/src/parser/rules/block/module/join/types.ts +23 -0
package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
package/src/parser/rules/block/module/listpages/extract.ts +410 -0
package/src/parser/rules/block/module/listpages/index.ts +83 -0
package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
package/src/parser/rules/block/module/listpages/parser.ts +106 -0
package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
package/src/parser/rules/block/module/listpages/types.ts +513 -0
package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
package/src/parser/rules/block/module/listusers/extract.ts +45 -0
package/src/parser/rules/block/module/listusers/index.ts +36 -0
package/src/parser/rules/block/module/listusers/parser.ts +54 -0
package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
package/src/parser/rules/block/module/listusers/types.ts +93 -0
package/src/parser/rules/block/module/mapping.ts +61 -0
package/src/parser/rules/block/module/page-tree/index.ts +38 -0
package/src/parser/rules/block/module/page-tree/types.ts +29 -0
package/src/parser/rules/block/module/rate/index.ts +28 -0
package/src/parser/rules/block/module/rate/types.ts +19 -0
package/src/parser/rules/block/module/resolve.ts +411 -0
package/src/parser/rules/block/module/types-common.ts +59 -0
package/src/parser/rules/block/module/types.ts +61 -0
package/src/parser/rules/block/module/utils.ts +43 -0
package/src/parser/rules/block/module/walk.ts +380 -0
package/src/parser/rules/block/module.ts +164 -0
package/src/parser/rules/block/orphan-li.ts +177 -0
package/src/parser/rules/block/paragraph.ts +157 -0
package/src/parser/rules/block/table-block.ts +726 -0
package/src/parser/rules/block/table.ts +441 -0
package/src/parser/rules/block/tabview.ts +331 -0
package/src/parser/rules/block/toc.ts +129 -0
package/src/parser/rules/block/utils.ts +615 -0
package/src/parser/rules/index.ts +49 -0
package/src/parser/rules/inline/anchor-name.ts +154 -0
package/src/parser/rules/inline/anchor.ts +327 -0
package/src/parser/rules/inline/bibcite.ts +153 -0
package/src/parser/rules/inline/bold.ts +86 -0
package/src/parser/rules/inline/color.ts +140 -0
package/src/parser/rules/inline/comment.ts +90 -0
package/src/parser/rules/inline/equation-ref.ts +115 -0
package/src/parser/rules/inline/expr.ts +526 -0
package/src/parser/rules/inline/footnote.ts +223 -0
package/src/parser/rules/inline/guillemet.ts +64 -0
package/src/parser/rules/inline/html.ts +132 -0
package/src/parser/rules/inline/image.ts +328 -0
package/src/parser/rules/inline/index.ts +150 -0
package/src/parser/rules/inline/italic.ts +74 -0
package/src/parser/rules/inline/line-break.ts +326 -0
package/src/parser/rules/inline/link-anchor.ts +147 -0
package/src/parser/rules/inline/link-single.ts +164 -0
package/src/parser/rules/inline/link-star.ts +134 -0
package/src/parser/rules/inline/link-triple.ts +267 -0
package/src/parser/rules/inline/math-inline.ts +126 -0
package/src/parser/rules/inline/monospace.ts +78 -0
package/src/parser/rules/inline/raw.ts +262 -0
package/src/parser/rules/inline/size.ts +244 -0
package/src/parser/rules/inline/span.ts +424 -0
package/src/parser/rules/inline/strikethrough.ts +115 -0
package/src/parser/rules/inline/subscript.ts +84 -0
package/src/parser/rules/inline/superscript.ts +84 -0
package/src/parser/rules/inline/text.ts +84 -0
package/src/parser/rules/inline/underline.ts +127 -0
package/src/parser/rules/inline/user.ts +147 -0
package/src/parser/rules/inline/utils.ts +344 -0
package/src/parser/rules/types.ts +252 -0
package/src/parser/rules/utils.ts +155 -0
package/src/parser/toc.ts +130 -0

package/src/parser/rules/inline/image.ts ADDED Viewed

@@ -0,0 +1,328 @@
+/**
+ *
+ * Parses the Wikidot image block syntax: `[[image source attributes]]`.
+ *
+ * Images support several alignment/float prefixes that modify how the
+ * image is positioned on the page:
+ * - `[[image src]]` -- default (no alignment)
+ * - `[[=image src]]` -- centered
+ * - `[[<image src]]` -- left-aligned
+ * - `[[>image src]]` -- right-aligned
+ * - `[[f<image src]]` -- float left
+ * - `[[f>image src]]` -- float right
+ * - `[[f=image src]]` -- float center
+ *
+ * Image sources can be:
+ * - Full URLs (`http://...`, `https://...`, `/path`)
+ * - Local file references in three formats:
+ *   - `file.ext` (file on current page, type `file1`)
+ *   - `page/file.ext` (file on another page, type `file2`)
+ *   - `site:page/file.ext` or `site/page/file.ext` (cross-site file, type `file3`)
+ *
+ * Optional attributes follow the source (e.g. `alt`, `title`, `width`,
+ * `height`, `style`, `class`, `link`). The `link` attribute is treated
+ * specially: it wraps the image in a hyperlink rather than being applied
+ * as an HTML attribute. Unsafe attributes are filtered out.
+ *
+ * Produces an `"image"` AST element with source, alignment, link, and
+ * attribute data.
+ *
+ * @module
+ */
+import type { Element, ImageSource, FloatAlignment, Alignment, AttributeMap } from "@wdprlib/ast";
+import type { InlineRule, ParseContext, RuleResult } from "../types";
+import { currentToken } from "../types";
+import { filterUnsafeAttributes } from "../utils";
+import { parseAttributesRaw } from "../block/utils";
+/**
+ * Parses the block name portion of an image tag, including alignment
+ * prefix characters.
+ *
+ * The alignment prefix may consist of `=`, `<`, `>`, `f<`, `f>`, or `f=`,
+ * each tokenized differently depending on the lexer's context (e.g. `>`
+ * may appear as either a `TEXT` token or a `BLOCKQUOTE_MARKER`).
+ *
+ * @param ctx - The current parse context
+ * @param startPos - Token index at which to begin scanning
+ * @returns An object with the combined lowercased name (prefix + "image")
+ *          and the number of tokens consumed, or `null` if no valid image
+ *          block name was found
+ */
+function parseImageBlockName(
+  ctx: ParseContext,
+  startPos: number,
+): { name: string; consumed: number } | null {
+  let pos = startPos;
+  let consumed = 0;
+  while (ctx.tokens[pos]?.type === "WHITESPACE") {
+    pos++;
+    consumed++;
+  }
+  let prefix = "";
+  const token = ctx.tokens[pos];
+  // Handle prefix characters for image variants: =image, <image, >image, f<image, f>image
+  // These are tokenized as separate tokens: EQUALS/TEXT + IDENTIFIER
+  if (token?.type === "EQUALS") {
+    // =image (center)
+    prefix = "=";
+    pos++;
+    consumed++;
+  } else if (token?.type === "TEXT" && token.value === "<") {
+    // <image (left align)
+    prefix = "<";
+    pos++;
+    consumed++;
+  } else if (token?.type === "TEXT" && token.value === ">") {
+    // >image (right align)
+    prefix = ">";
+    pos++;
+    consumed++;
+  } else if (token?.type === "BLOCKQUOTE_MARKER" && token.value === ">") {
+    // >image (right align) - may also be tokenized as BLOCKQUOTE_MARKER
+    prefix = ">";
+    pos++;
+    consumed++;
+  } else if (token?.type === "IDENTIFIER" && token.value.toLowerCase() === "f") {
+    // Check for f<, f>, or f=
+    const nextToken = ctx.tokens[pos + 1];
+    if (nextToken?.type === "TEXT" && nextToken.value === "<") {
+      prefix = "f<";
+      pos += 2;
+      consumed += 2;
+    } else if (nextToken?.type === "TEXT" && nextToken.value === ">") {
+      prefix = "f>";
+      pos += 2;
+      consumed += 2;
+    } else if (nextToken?.type === "BLOCKQUOTE_MARKER" && nextToken.value === ">") {
+      prefix = "f>";
+      pos += 2;
+      consumed += 2;
+    } else if (nextToken?.type === "EQUALS") {
+      // f=image (float center)
+      prefix = "f=";
+      pos += 2;
+      consumed += 2;
+    }
+  }
+  const nameToken = ctx.tokens[pos];
+  if (!nameToken || (nameToken.type !== "TEXT" && nameToken.type !== "IDENTIFIER")) {
+    return null;
+  }
+  return { name: prefix + nameToken.value.toLowerCase(), consumed: consumed + 1 };
+}
+/**
+ * Determines the {@link ImageSource} type and data from a raw source string.
+ *
+ * Classification logic:
+ * - Strings starting with `http://`, `https://`, or `/` are URL sources.
+ * - Strings containing a colon before a slash (e.g. `site:page/file`) are
+ *   `file3` (cross-site) references.
+ * - Strings with 2+ slashes (e.g. `site/page/file`) are also `file3`.
+ * - Strings with exactly 1 slash (e.g. `page/file`) are `file2` references.
+ * - Strings with no slashes are `file1` (current-page file) references.
+ *
+ * @param src - The raw image source string from the markup
+ * @returns An {@link ImageSource} object describing the source type and data
+ */
+function parseImageSource(src: string): ImageSource {
+  // URL sources
+  if (src.startsWith("http://") || src.startsWith("https://") || src.startsWith("/")) {
+    return { type: "url", data: src };
+  }
+  // File references - determine type based on format
+  // file3: site:page/file or site/page/file (2+ slashes)
+  // file2: page/file (1 slash)
+  // file1: file (no slash)
+  const colonIdx = src.indexOf(":");
+  const slashIdx = src.indexOf("/");
+  if (colonIdx > 0 && slashIdx > colonIdx) {
+    // site:page/file format (colon-based)
+    const site = src.substring(0, colonIdx);
+    const rest = src.substring(colonIdx + 1);
+    const lastSlash = rest.lastIndexOf("/");
+    const page = rest.substring(0, lastSlash);
+    const file = rest.substring(lastSlash + 1);
+    return { type: "file3", data: { site, page, file } };
+  }
+  // Count slashes to determine format
+  const slashes = src.split("/").length - 1;
+  if (slashes >= 2) {
+    // site/page/file format (2+ slashes = file3)
+    const firstSlash = src.indexOf("/");
+    const lastSlash = src.lastIndexOf("/");
+    const site = src.substring(0, firstSlash);
+    const page = src.substring(firstSlash + 1, lastSlash);
+    const file = src.substring(lastSlash + 1);
+    return { type: "file3", data: { site, page, file } };
+  }
+  if (slashIdx > 0) {
+    // page/file format (1 slash = file2)
+    const page = src.substring(0, slashIdx);
+    const file = src.substring(slashIdx + 1);
+    return { type: "file2", data: { page, file } };
+  }
+  // Just file
+  return { type: "file1", data: { file: src } };
+}
+/**
+ * Converts the image block name (including its alignment prefix) into a
+ * {@link FloatAlignment} descriptor.
+ *
+ * The prefix portion of the block name determines both the alignment
+ * direction and whether the image should float. A plain `"image"` name
+ * (no prefix) returns `null`, indicating no explicit alignment.
+ *
+ * @param blockName - The lowercased block name (e.g. `"f>image"`, `"=image"`, `"image"`)
+ * @returns A {@link FloatAlignment} object with `align` and `float` fields,
+ *          or `null` for the unprefixed `"image"` form
+ */
+function parseAlignment(blockName: string): FloatAlignment | null {
+  let align: Alignment = "left";
+  let float = false;
+  if (blockName === "=image") {
+    align = "center";
+  } else if (blockName === "<image") {
+    align = "left";
+  } else if (blockName === ">image") {
+    align = "right";
+  } else if (blockName === "f<image") {
+    align = "left";
+    float = true;
+  } else if (blockName === "f>image") {
+    align = "right";
+    float = true;
+  } else if (blockName === "f=image") {
+    align = "center";
+    float = true;
+  } else if (blockName === "image") {
+    return null;
+  }
+  return { align, float };
+}
+/**
+ * Inline rule for parsing `[[image source attributes]]` and its alignment variants.
+ *
+ * Triggered by a `BLOCK_OPEN` (`[[`) token. The rule identifies the image
+ * block name (with optional alignment prefix), extracts the image source,
+ * parses remaining attributes, filters unsafe attributes, and extracts the
+ * `link` attribute for special handling.
+ *
+ * Fails if the block name is not an image variant, if no source is provided,
+ * or if `]]` is not found.
+ */
+export const imageRule: InlineRule = {
+  name: "image",
+  startTokens: ["BLOCK_OPEN"],
+  /**
+   * Attempts to parse an image block at the current position.
+   *
+   * @param ctx - Parse context with token stream and current position
+   * @returns A successful result with an `"image"` element, or `{ success: false }`
+   */
+  parse(ctx: ParseContext): RuleResult<Element> {
+    const openToken = currentToken(ctx);
+    if (openToken.type !== "BLOCK_OPEN") {
+      return { success: false };
+    }
+    let pos = ctx.pos + 1;
+    let consumed = 1;
+    const nameResult = parseImageBlockName(ctx, pos);
+    if (!nameResult) {
+      return { success: false };
+    }
+    // Check for image, =image, <image, >image, f<image, f>image, f=image
+    const blockName = nameResult.name;
+    const imageNames = ["image", "=image", "<image", ">image", "f<image", "f>image", "f=image"];
+    if (!imageNames.includes(blockName)) {
+      return { success: false };
+    }
+    pos += nameResult.consumed;
+    consumed += nameResult.consumed;
+    // Skip whitespace
+    while (ctx.tokens[pos]?.type === "WHITESPACE") {
+      pos++;
+      consumed++;
+    }
+    // Get image source (collect tokens until whitespace or ]])
+    let src = "";
+    while (pos < ctx.tokens.length) {
+      const srcToken = ctx.tokens[pos];
+      if (
+        !srcToken ||
+        srcToken.type === "WHITESPACE" ||
+        srcToken.type === "BLOCK_CLOSE" ||
+        srcToken.type === "NEWLINE" ||
+        srcToken.type === "EOF"
+      ) {
+        break;
+      }
+      src += srcToken.value;
+      pos++;
+      consumed++;
+    }
+    // Parse remaining attributes (raw, before safety filtering)
+    const attrResultRaw = parseAttributesRaw(ctx, pos, false);
+    pos += attrResultRaw.consumed;
+    consumed += attrResultRaw.consumed;
+    // Expect ]]
+    if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
+      return { success: false };
+    }
+    pos++;
+    consumed++;
+    // Wikidot requires image source - [[image]] without source fails
+    if (!src) {
+      return { success: false };
+    }
+    // Parse source and alignment
+    const source = parseImageSource(src);
+    const alignment = parseAlignment(blockName);
+    // Extract link before filtering (link is image-specific, not an HTML attribute)
+    const linkUrl = attrResultRaw.attrs.link;
+    const { link: _link, ...restAttrs } = attrResultRaw.attrs;
+    const cleanAttrs = filterUnsafeAttributes(restAttrs);
+    return {
+      success: true,
+      elements: [
+        {
+          element: "image",
+          data: {
+            source,
+            link: linkUrl ?? null,
+            alignment,
+            attributes: cleanAttrs as AttributeMap,
+          },
+        },
+      ],
+      consumed,
+    };
+  },
+};

package/src/parser/rules/inline/index.ts ADDED Viewed

@@ -0,0 +1,150 @@
+/**
+ *
+ * Central registry and priority-ordered list of all inline parsing rules.
+ *
+ * This module imports every inline rule, re-exports them for individual use,
+ * and assembles them into the {@link inlineRules} array, which defines the
+ * order in which rules are attempted during inline parsing.
+ *
+ * Rule ordering matters: earlier rules take priority when multiple rules
+ * could match the same token. For example, formatting rules (bold, italic,
+ * etc.) are tried before link rules, and the text/fallback rules are
+ * placed last as catch-alls.
+ *
+ * The `fallbackRule` is exported separately as `inlineFallbackRule` because
+ * it matches any token type and is used as a last resort when no other rule
+ * succeeds. It is NOT included in the `inlineRules` array to prevent it
+ * from short-circuiting more specific rules.
+ *
+ * @module
+ */
+import type { InlineRule } from "../types";
+import { boldRule } from "./bold";
+import { italicRule } from "./italic";
+import { underlineRule } from "./underline";
+import { strikethroughRule } from "./strikethrough";
+import { superscriptRule } from "./superscript";
+import { subscriptRule } from "./subscript";
+import { monospaceRule } from "./monospace";
+import { linkTripleRule } from "./link-triple";
+import { linkSingleRule } from "./link-single";
+import { linkAnchorRule } from "./link-anchor";
+import { linkStarRule } from "./link-star";
+import { colorRule } from "./color";
+import {
+  backslashLineBreakRule,
+  newlineLineBreakRule,
+  underscoreLineBreakRule,
+} from "./line-break";
+import { commentRule } from "./comment";
+import { htmlInlineRule } from "./html";
+import { rawRule } from "./raw";
+import { spanRule, closeSpanRule } from "./span";
+import { sizeRule } from "./size";
+import { footnoteRule } from "./footnote";
+import { imageRule } from "./image";
+import { guillemetRule } from "./guillemet";
+import { userRule } from "./user";
+import { anchorNameRule } from "./anchor-name";
+import { anchorRule } from "./anchor";
+import { mathInlineRule } from "./math-inline";
+import { equationRefRule } from "./equation-ref";
+import { exprRule, ifRule, ifExprRule } from "./expr";
+import { bibciteRule } from "./bibcite";
+import { textRule, fallbackRule } from "./text";
+export { boldRule } from "./bold";
+export { italicRule } from "./italic";
+export { underlineRule } from "./underline";
+export { strikethroughRule } from "./strikethrough";
+export { superscriptRule } from "./superscript";
+export { subscriptRule } from "./subscript";
+export { monospaceRule } from "./monospace";
+export { linkTripleRule } from "./link-triple";
+export { linkSingleRule } from "./link-single";
+export { linkAnchorRule } from "./link-anchor";
+export { linkStarRule } from "./link-star";
+export { colorRule } from "./color";
+export {
+  backslashLineBreakRule,
+  newlineLineBreakRule,
+  underscoreLineBreakRule,
+} from "./line-break";
+export { commentRule } from "./comment";
+export { htmlInlineRule } from "./html";
+export { rawRule } from "./raw";
+export { spanRule, closeSpanRule } from "./span";
+export { sizeRule } from "./size";
+export { footnoteRule } from "./footnote";
+export { imageRule } from "./image";
+export { guillemetRule } from "./guillemet";
+export { userRule } from "./user";
+export { anchorNameRule } from "./anchor-name";
+export { anchorRule } from "./anchor";
+export { mathInlineRule } from "./math-inline";
+export { equationRefRule } from "./equation-ref";
+export { exprRule, ifRule, ifExprRule } from "./expr";
+export { bibciteRule } from "./bibcite";
+export { textRule, fallbackRule } from "./text";
+/**
+ * All inline rules in priority order.
+ *
+ * Rules are tried top-to-bottom against the current token. The first
+ * rule whose `startTokens` match the token type and whose `parse()`
+ * returns `{ success: true }` wins.
+ *
+ * Ordering rationale:
+ * 1. Paired formatting markers (bold, italic, underline, strikethrough,
+ *    superscript, subscript, monospace) -- most common inline syntax
+ * 2. Link rules (triple, single, anchor, star) -- order matters because
+ *    `[[[` must be tried before `[`
+ * 3. Color, line-break, and comment rules
+ * 4. Raw (verbatim) text
+ * 5. Block-open-triggered rules (image, size, footnote, span, user,
+ *    expr/if/ifexpr, anchor-name, anchor, math-inline, equation-ref)
+ * 6. Bibcite (double-parenthesis syntax)
+ * 7. Guillemet (typographic angle quotes)
+ * 8. Text rule (catch-all for TEXT and WHITESPACE tokens)
+ *
+ * The `fallbackRule` is intentionally excluded; it is used as a
+ * separate last-resort handler.
+ */
+export const inlineRules: InlineRule[] = [
+  boldRule,
+  italicRule,
+  underlineRule,
+  strikethroughRule,
+  superscriptRule,
+  subscriptRule,
+  monospaceRule,
+  linkTripleRule,
+  linkSingleRule,
+  linkAnchorRule,
+  linkStarRule,
+  colorRule,
+  backslashLineBreakRule,
+  underscoreLineBreakRule,
+  newlineLineBreakRule,
+  commentRule,
+  htmlInlineRule,
+  rawRule,
+  imageRule,
+  sizeRule,
+  footnoteRule,
+  spanRule,
+  closeSpanRule,
+  userRule,
+  exprRule,
+  ifRule,
+  ifExprRule,
+  anchorNameRule,
+  anchorRule,
+  mathInlineRule,
+  equationRefRule,
+  bibciteRule,
+  guillemetRule,
+  textRule,
+];
+export { fallbackRule as inlineFallbackRule };

package/src/parser/rules/inline/italic.ts ADDED Viewed

@@ -0,0 +1,74 @@
+/**
+ *
+ * Parses the Wikidot italic formatting syntax: `//text//`.
+ *
+ * Italic text is delimited by double forward slashes. The opening and
+ * closing markers must appear on the same line. If no closing `//` is
+ * found before a newline, the opening marker is emitted as literal text.
+ *
+ * Unlike bold (which discards empty markers), italic markers with empty
+ * content (`////`) still produce an italic container, matching Wikidot's
+ * behavior.
+ *
+ * Italic may nest other inline formatting within its body.
+ *
+ * Produces a `"container"` AST element with `type: "italics"`.
+ *
+ * @module
+ */
+import type { Element } from "@wdprlib/ast";
+import type { InlineRule, ParseContext, RuleResult } from "../types";
+import { currentToken, hasClosingMarkerBeforeNewline } from "../types";
+import { parseInlineUntil } from "./utils";
+/**
+ * Inline rule for parsing `//italic//` formatting.
+ *
+ * Triggered by an `ITALIC_MARKER` token (`//`). Checks for a matching
+ * closing marker on the same line, then recursively parses inline content.
+ *
+ * When no closing marker is found, the opening `//` is treated as
+ * literal text.
+ */
+export const italicRule: InlineRule = {
+  name: "italic",
+  startTokens: ["ITALIC_MARKER"],
+  /**
+   * Attempts to parse italic formatting at the current position.
+   *
+   * @param ctx - Parse context with token stream and current position
+   * @returns A successful result containing either a `"container"` element
+   *          with `type: "italics"`, or a text fallback for unmatched markers
+   */
+  parse(ctx: ParseContext): RuleResult<Element> {
+    const startToken = currentToken(ctx);
+    // Check if closing marker exists
+    if (!hasClosingMarkerBeforeNewline({ ...ctx, pos: ctx.pos + 1 }, "ITALIC_MARKER")) {
+      return {
+        success: true,
+        elements: [{ element: "text", data: startToken.value }],
+        consumed: 1,
+      };
+    }
+    // Parse content between markers
+    const result = parseInlineUntil({ ...ctx, pos: ctx.pos + 1 }, "ITALIC_MARKER");
+    return {
+      success: true,
+      elements: [
+        {
+          element: "container",
+          data: {
+            type: "italics",
+            attributes: {},
+            elements: result.elements,
+          },
+        },
+      ],
+      consumed: 1 + result.consumed + 1, // open + content + close
+    };
+  },
+};