@wdprlib/parser 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/index.cjs +295 -118
  2. package/dist/index.js +272 -95
  3. package/package.json +5 -3
  4. package/src/index.ts +163 -0
  5. package/src/lexer/index.ts +20 -0
  6. package/src/lexer/lexer.ts +687 -0
  7. package/src/lexer/tokens.ts +141 -0
  8. package/src/parser/constants.ts +173 -0
  9. package/src/parser/depth.ts +251 -0
  10. package/src/parser/index.ts +18 -0
  11. package/src/parser/parse.ts +315 -0
  12. package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
  13. package/src/parser/postprocess/index.ts +15 -0
  14. package/src/parser/postprocess/spanStrip.ts +697 -0
  15. package/src/parser/preprocess/expr.ts +265 -0
  16. package/src/parser/preprocess/index.ts +38 -0
  17. package/src/parser/preprocess/typography.ts +67 -0
  18. package/src/parser/preprocess/utils.ts +250 -0
  19. package/src/parser/preprocess/whitespace.ts +111 -0
  20. package/src/parser/rules/block/align.ts +282 -0
  21. package/src/parser/rules/block/bibliography.ts +359 -0
  22. package/src/parser/rules/block/block-list.ts +689 -0
  23. package/src/parser/rules/block/blockquote.ts +238 -0
  24. package/src/parser/rules/block/center.ts +87 -0
  25. package/src/parser/rules/block/clear-float.ts +75 -0
  26. package/src/parser/rules/block/code.ts +187 -0
  27. package/src/parser/rules/block/collapsible.ts +337 -0
  28. package/src/parser/rules/block/comment.ts +73 -0
  29. package/src/parser/rules/block/content-separator.ts +79 -0
  30. package/src/parser/rules/block/definition-list.ts +270 -0
  31. package/src/parser/rules/block/div.ts +400 -0
  32. package/src/parser/rules/block/embed-block.ts +153 -0
  33. package/src/parser/rules/block/footnoteblock.ts +200 -0
  34. package/src/parser/rules/block/heading.ts +142 -0
  35. package/src/parser/rules/block/horizontal-rule.ts +61 -0
  36. package/src/parser/rules/block/html.ts +222 -0
  37. package/src/parser/rules/block/iframe.ts +239 -0
  38. package/src/parser/rules/block/iftags.ts +150 -0
  39. package/src/parser/rules/block/include.ts +179 -0
  40. package/src/parser/rules/block/index.ts +127 -0
  41. package/src/parser/rules/block/list.ts +244 -0
  42. package/src/parser/rules/block/math.ts +183 -0
  43. package/src/parser/rules/block/module/backlinks/index.ts +31 -0
  44. package/src/parser/rules/block/module/backlinks/types.ts +21 -0
  45. package/src/parser/rules/block/module/categories/index.ts +34 -0
  46. package/src/parser/rules/block/module/categories/types.ts +21 -0
  47. package/src/parser/rules/block/module/css/index.ts +37 -0
  48. package/src/parser/rules/block/module/iftags/condition.ts +109 -0
  49. package/src/parser/rules/block/module/iftags/index.ts +26 -0
  50. package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
  51. package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
  52. package/src/parser/rules/block/module/iftags/types.ts +63 -0
  53. package/src/parser/rules/block/module/include/index.ts +20 -0
  54. package/src/parser/rules/block/module/include/resolve.ts +556 -0
  55. package/src/parser/rules/block/module/index.ts +122 -0
  56. package/src/parser/rules/block/module/join/index.ts +34 -0
  57. package/src/parser/rules/block/module/join/types.ts +23 -0
  58. package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
  59. package/src/parser/rules/block/module/listpages/extract.ts +410 -0
  60. package/src/parser/rules/block/module/listpages/index.ts +83 -0
  61. package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
  62. package/src/parser/rules/block/module/listpages/parser.ts +106 -0
  63. package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
  64. package/src/parser/rules/block/module/listpages/types.ts +513 -0
  65. package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
  66. package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
  67. package/src/parser/rules/block/module/listusers/extract.ts +45 -0
  68. package/src/parser/rules/block/module/listusers/index.ts +36 -0
  69. package/src/parser/rules/block/module/listusers/parser.ts +54 -0
  70. package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
  71. package/src/parser/rules/block/module/listusers/types.ts +93 -0
  72. package/src/parser/rules/block/module/mapping.ts +61 -0
  73. package/src/parser/rules/block/module/page-tree/index.ts +38 -0
  74. package/src/parser/rules/block/module/page-tree/types.ts +29 -0
  75. package/src/parser/rules/block/module/rate/index.ts +28 -0
  76. package/src/parser/rules/block/module/rate/types.ts +19 -0
  77. package/src/parser/rules/block/module/resolve.ts +411 -0
  78. package/src/parser/rules/block/module/types-common.ts +59 -0
  79. package/src/parser/rules/block/module/types.ts +61 -0
  80. package/src/parser/rules/block/module/utils.ts +43 -0
  81. package/src/parser/rules/block/module/walk.ts +380 -0
  82. package/src/parser/rules/block/module.ts +164 -0
  83. package/src/parser/rules/block/orphan-li.ts +177 -0
  84. package/src/parser/rules/block/paragraph.ts +157 -0
  85. package/src/parser/rules/block/table-block.ts +726 -0
  86. package/src/parser/rules/block/table.ts +441 -0
  87. package/src/parser/rules/block/tabview.ts +331 -0
  88. package/src/parser/rules/block/toc.ts +129 -0
  89. package/src/parser/rules/block/utils.ts +615 -0
  90. package/src/parser/rules/index.ts +49 -0
  91. package/src/parser/rules/inline/anchor-name.ts +154 -0
  92. package/src/parser/rules/inline/anchor.ts +327 -0
  93. package/src/parser/rules/inline/bibcite.ts +153 -0
  94. package/src/parser/rules/inline/bold.ts +86 -0
  95. package/src/parser/rules/inline/color.ts +140 -0
  96. package/src/parser/rules/inline/comment.ts +90 -0
  97. package/src/parser/rules/inline/equation-ref.ts +115 -0
  98. package/src/parser/rules/inline/expr.ts +526 -0
  99. package/src/parser/rules/inline/footnote.ts +223 -0
  100. package/src/parser/rules/inline/guillemet.ts +64 -0
  101. package/src/parser/rules/inline/html.ts +132 -0
  102. package/src/parser/rules/inline/image.ts +328 -0
  103. package/src/parser/rules/inline/index.ts +150 -0
  104. package/src/parser/rules/inline/italic.ts +74 -0
  105. package/src/parser/rules/inline/line-break.ts +326 -0
  106. package/src/parser/rules/inline/link-anchor.ts +147 -0
  107. package/src/parser/rules/inline/link-single.ts +164 -0
  108. package/src/parser/rules/inline/link-star.ts +134 -0
  109. package/src/parser/rules/inline/link-triple.ts +267 -0
  110. package/src/parser/rules/inline/math-inline.ts +126 -0
  111. package/src/parser/rules/inline/monospace.ts +78 -0
  112. package/src/parser/rules/inline/raw.ts +262 -0
  113. package/src/parser/rules/inline/size.ts +244 -0
  114. package/src/parser/rules/inline/span.ts +424 -0
  115. package/src/parser/rules/inline/strikethrough.ts +115 -0
  116. package/src/parser/rules/inline/subscript.ts +84 -0
  117. package/src/parser/rules/inline/superscript.ts +84 -0
  118. package/src/parser/rules/inline/text.ts +84 -0
  119. package/src/parser/rules/inline/underline.ts +127 -0
  120. package/src/parser/rules/inline/user.ts +147 -0
  121. package/src/parser/rules/inline/utils.ts +344 -0
  122. package/src/parser/rules/types.ts +252 -0
  123. package/src/parser/rules/utils.ts +155 -0
  124. package/src/parser/toc.ts +130 -0
@@ -0,0 +1,84 @@
1
+ /**
2
+ *
3
+ * Provides the two lowest-priority inline rules: `textRule` and `fallbackRule`.
4
+ *
5
+ * These rules act as catch-alls that convert unrecognized tokens into
6
+ * plain `"text"` AST elements, ensuring no token is ever silently dropped
7
+ * during inline parsing.
8
+ *
9
+ * `textRule` handles `TEXT` and `WHITESPACE` tokens specifically and is
10
+ * included in the main `inlineRules` array as the last entry before
11
+ * the fallback.
12
+ *
13
+ * `fallbackRule` has an empty `startTokens` array, which means it matches
14
+ * ANY token type. It is exported separately as `inlineFallbackRule` and
15
+ * is NOT included in the `inlineRules` array to prevent it from
16
+ * short-circuiting more specific rules. Instead, it is invoked explicitly
17
+ * by the parser when no other rule matches.
18
+ *
19
+ * @module
20
+ */
21
+ import type { Element } from "@wdprlib/ast";
22
+ import type { InlineRule, ParseContext, RuleResult } from "../types";
23
+ import { currentToken } from "../types";
24
+
25
+ /**
26
+ * Inline rule for plain text and whitespace tokens.
27
+ *
28
+ * Matches `TEXT` and `WHITESPACE` token types and converts them
29
+ * directly to `"text"` AST elements. This rule always succeeds.
30
+ *
31
+ * Placed last (before the fallback) in the inline rules array so
32
+ * that all formatting and structural rules are tried first.
33
+ */
34
+ export const textRule: InlineRule = {
35
+ name: "text",
36
+ startTokens: ["TEXT", "WHITESPACE"],
37
+
38
+ /**
39
+ * Converts a TEXT or WHITESPACE token into a text element.
40
+ *
41
+ * @param ctx - Parse context with token stream and current position
42
+ * @returns Always returns `{ success: true }` with a single `"text"` element
43
+ */
44
+ parse(ctx: ParseContext): RuleResult<Element> {
45
+ const token = currentToken(ctx);
46
+
47
+ return {
48
+ success: true,
49
+ elements: [{ element: "text", data: token.value }],
50
+ consumed: 1,
51
+ };
52
+ },
53
+ };
54
+
55
+ /**
56
+ * Universal fallback rule for any token type not matched by other rules.
57
+ *
58
+ * The empty `startTokens` array signals to the parser that this rule
59
+ * can match any token. It converts the token's value to a `"text"`
60
+ * element, ensuring no token is silently dropped.
61
+ *
62
+ * This rule is used as a last-resort handler and is intentionally
63
+ * excluded from the main `inlineRules` array.
64
+ */
65
+ export const fallbackRule: InlineRule = {
66
+ name: "fallback",
67
+ startTokens: [], // matches anything not matched by other rules
68
+
69
+ /**
70
+ * Converts any unrecognized token into a text element.
71
+ *
72
+ * @param ctx - Parse context with token stream and current position
73
+ * @returns Always returns `{ success: true }` with a single `"text"` element
74
+ */
75
+ parse(ctx: ParseContext): RuleResult<Element> {
76
+ const token = currentToken(ctx);
77
+
78
+ return {
79
+ success: true,
80
+ elements: [{ element: "text", data: token.value }],
81
+ consumed: 1,
82
+ };
83
+ },
84
+ };
@@ -0,0 +1,127 @@
1
+ /**
2
+ *
3
+ * Parses the Wikidot underline formatting syntax: `__text__`.
4
+ *
5
+ * Underline text is delimited by double underscores. Unlike most
6
+ * inline formatting markers (bold, italic, etc.) which require the
7
+ * closing marker on the same line, underline markers can span
8
+ * multiple lines within the same paragraph. The closing marker
9
+ * must appear before a paragraph break (blank line).
10
+ *
11
+ * Single newlines within underlined content are converted to
12
+ * `<br />` elements, matching Wikidot's multiline underline behavior.
13
+ *
14
+ * If no closing `__` is found before a paragraph break, the opening
15
+ * marker is emitted as literal text.
16
+ *
17
+ * Empty underline (`____`) is silently discarded by Wikidot (produces
18
+ * no output).
19
+ *
20
+ * Renders as a `<u>` element in HTML.
21
+ *
22
+ * Produces a `"container"` AST element with `type: "underline"`.
23
+ *
24
+ * @module
25
+ */
26
+ import type { Element } from "@wdprlib/ast";
27
+ import type { InlineRule, ParseContext, RuleResult } from "../types";
28
+ import { currentToken, hasClosingMarkerBeforeParagraphBreak } from "../types";
29
+ import { parseInlineUntil } from "./utils";
30
+
31
+ /**
32
+ * Inline rule for parsing `__underline__` formatting.
33
+ *
34
+ * Triggered by an `UNDERLINE_MARKER` token (`__`). Uses
35
+ * {@link hasClosingMarkerBeforeParagraphBreak} instead of the
36
+ * single-line variant because Wikidot allows underline to span
37
+ * multiple lines within a paragraph.
38
+ *
39
+ * When no closing marker is found before a paragraph break, the
40
+ * opening `__` is treated as literal text.
41
+ */
42
+ export const underlineRule: InlineRule = {
43
+ name: "underline",
44
+ startTokens: ["UNDERLINE_MARKER"],
45
+
46
+ /**
47
+ * Attempts to parse underline formatting at the current position.
48
+ *
49
+ * @param ctx - Parse context with token stream and current position
50
+ * @returns A successful result containing either a `"container"` element
51
+ * with `type: "underline"`, an empty array (for `____`), or a
52
+ * text fallback for unmatched markers
53
+ */
54
+ parse(ctx: ParseContext): RuleResult<Element> {
55
+ const startToken = currentToken(ctx);
56
+
57
+ // Check if closing marker exists before paragraph break
58
+ if (!hasClosingMarkerBeforeParagraphBreak({ ...ctx, pos: ctx.pos + 1 }, "UNDERLINE_MARKER")) {
59
+ return {
60
+ success: true,
61
+ elements: [{ element: "text", data: startToken.value }],
62
+ consumed: 1,
63
+ };
64
+ }
65
+
66
+ // Parse content between markers, handling newlines as line-breaks
67
+ const children: Element[] = [];
68
+ let pos = ctx.pos + 1;
69
+ let consumed = 1; // opening marker
70
+
71
+ while (pos < ctx.tokens.length) {
72
+ const token = ctx.tokens[pos];
73
+ if (!token || token.type === "EOF") break;
74
+
75
+ // Found closing marker
76
+ if (token.type === "UNDERLINE_MARKER") {
77
+ consumed++;
78
+ break;
79
+ }
80
+
81
+ // Handle newlines as line-breaks
82
+ if (token.type === "NEWLINE") {
83
+ children.push({ element: "line-break" });
84
+ pos++;
85
+ consumed++;
86
+ continue;
87
+ }
88
+
89
+ // Parse inline content until NEWLINE or closing marker
90
+ const inlineCtx = { ...ctx, pos };
91
+ const result = parseInlineUntil(inlineCtx, "UNDERLINE_MARKER");
92
+ if (result.elements.length > 0) {
93
+ children.push(...result.elements);
94
+ pos += result.consumed;
95
+ consumed += result.consumed;
96
+ } else {
97
+ children.push({ element: "text", data: token.value });
98
+ pos++;
99
+ consumed++;
100
+ }
101
+ }
102
+
103
+ // Empty underline (____) is discarded entirely in Wikidot
104
+ if (children.length === 0) {
105
+ return {
106
+ success: true,
107
+ elements: [],
108
+ consumed,
109
+ };
110
+ }
111
+
112
+ return {
113
+ success: true,
114
+ elements: [
115
+ {
116
+ element: "container",
117
+ data: {
118
+ type: "underline",
119
+ attributes: {},
120
+ elements: children,
121
+ },
122
+ },
123
+ ],
124
+ consumed,
125
+ };
126
+ },
127
+ };
@@ -0,0 +1,147 @@
1
+ /**
2
+ *
3
+ * Parses the Wikidot user reference syntax: `[[user name]]` and
4
+ * `[[*user name]]`.
5
+ *
6
+ * A user reference displays a linked username (typically linking to
7
+ * the user's profile page). The variant with a star prefix (`[[*user]]`)
8
+ * also displays the user's avatar alongside the username.
9
+ *
10
+ * Wikidot syntax:
11
+ * - `[[user some-user]]` -- displays username as a link
12
+ * - `[[*user some-user]]` -- displays avatar and username
13
+ *
14
+ * Note: Wikidot requires no whitespace immediately after `[[`. This
15
+ * means `[[ user name]]` is invalid, but `[[user name]]` and
16
+ * `[[*user name]]` are valid.
17
+ *
18
+ * The username may contain any characters except `]]` and newlines.
19
+ * Leading/trailing whitespace around the username is trimmed.
20
+ *
21
+ * Produces a `"user"` AST element with `data.name` (the username)
22
+ * and `data["show-avatar"]` (boolean).
23
+ *
24
+ * @module
25
+ */
26
+ import type { Element } from "@wdprlib/ast";
27
+ import type { InlineRule, ParseContext, RuleResult } from "../types";
28
+ import { currentToken } from "../types";
29
+
30
+ /**
31
+ * Inline rule for parsing `[[user name]]` and `[[*user name]]` references.
32
+ *
33
+ * Triggered by a `BLOCK_OPEN` (`[[`) token. Optionally detects a `*`
34
+ * prefix for avatar display, then verifies the keyword `user`, and
35
+ * collects the username until `]]`.
36
+ *
37
+ * Fails if:
38
+ * - Whitespace immediately follows `[[` (Wikidot requires no leading space)
39
+ * - The keyword is not `user`
40
+ * - The username is empty
41
+ * - No closing `]]` is found
42
+ */
43
+ export const userRule: InlineRule = {
44
+ name: "user",
45
+ startTokens: ["BLOCK_OPEN"],
46
+
47
+ /**
48
+ * Attempts to parse a user reference at the current position.
49
+ *
50
+ * @param ctx - Parse context with token stream and current position
51
+ * @returns A successful result with a `"user"` element, or `{ success: false }`
52
+ */
53
+ parse(ctx: ParseContext): RuleResult<Element> {
54
+ const openToken = currentToken(ctx);
55
+ if (openToken.type !== "BLOCK_OPEN") {
56
+ return { success: false };
57
+ }
58
+
59
+ let pos = ctx.pos + 1;
60
+ let consumed = 1;
61
+
62
+ // Wikidot requires no whitespace immediately after [[
63
+ // [[ user]] is invalid, [[user]] is valid
64
+ if (ctx.tokens[pos]?.type === "WHITESPACE") {
65
+ return { success: false };
66
+ }
67
+
68
+ // Check for star (avatar flag)
69
+ let showAvatar = false;
70
+ if (ctx.tokens[pos]?.type === "STAR") {
71
+ showAvatar = true;
72
+ pos++;
73
+ consumed++;
74
+ }
75
+
76
+ // Skip whitespace after star
77
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
78
+ pos++;
79
+ consumed++;
80
+ }
81
+
82
+ // Parse block name
83
+ const nameToken = ctx.tokens[pos];
84
+ if (!nameToken || (nameToken.type !== "TEXT" && nameToken.type !== "IDENTIFIER")) {
85
+ return { success: false };
86
+ }
87
+
88
+ const blockName = nameToken.value.toLowerCase();
89
+ if (blockName !== "user") {
90
+ return { success: false };
91
+ }
92
+ pos++;
93
+ consumed++;
94
+
95
+ // Skip whitespace
96
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
97
+ pos++;
98
+ consumed++;
99
+ }
100
+
101
+ // Parse username - collect all tokens until ]]
102
+ let username = "";
103
+ while (pos < ctx.tokens.length) {
104
+ const token = ctx.tokens[pos];
105
+ if (
106
+ !token ||
107
+ token.type === "BLOCK_CLOSE" ||
108
+ token.type === "NEWLINE" ||
109
+ token.type === "EOF"
110
+ ) {
111
+ break;
112
+ }
113
+ username += token.value;
114
+ pos++;
115
+ consumed++;
116
+ }
117
+
118
+ // Trim whitespace from username
119
+ username = username.trim();
120
+
121
+ // Username is required
122
+ if (!username) {
123
+ return { success: false };
124
+ }
125
+
126
+ // Expect ]]
127
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
128
+ return { success: false };
129
+ }
130
+ pos++;
131
+ consumed++;
132
+
133
+ return {
134
+ success: true,
135
+ elements: [
136
+ {
137
+ element: "user",
138
+ data: {
139
+ name: username,
140
+ "show-avatar": showAvatar,
141
+ },
142
+ },
143
+ ],
144
+ consumed,
145
+ };
146
+ },
147
+ };
@@ -0,0 +1,344 @@
1
+ import type { TokenType, Token } from "../../../lexer";
2
+ import type { Element } from "@wdprlib/ast";
3
+ import type { ParseContext, InlineRule } from "../types";
4
+ import {
5
+ BLOCK_START_TOKENS,
6
+ INDENT_ACCEPTING_BLOCK_NAMES,
7
+ KNOWN_BLOCK_NAMES,
8
+ } from "../../constants";
9
+ import { parseBlockName } from "../utils";
10
+
11
+ /**
12
+ * Checks whether the block token at `tokenPos` (BLOCK_OPEN or BLOCK_END_OPEN)
13
+ * names a block in the excluded set.
14
+ */
15
+ function isExcludedBlockToken(ctx: ParseContext, tokenPos: number): boolean {
16
+ const excluded = ctx.scope.excludedBlockNames;
17
+ if (!excluded?.size) return false;
18
+ const token = ctx.tokens[tokenPos];
19
+ if (token?.type !== "BLOCK_OPEN" && token?.type !== "BLOCK_END_OPEN") return false;
20
+ const nameResult = parseBlockName(ctx, tokenPos + 1);
21
+ return nameResult !== null && excluded.has(nameResult.name);
22
+ }
23
+
24
+ /**
25
+ * Checks whether the block token at `tokenPos` names a block that no rule
26
+ * recognizes (e.g. `[[foo]]`). Wikidot leaves such tokens inside paragraphs
27
+ * rather than treating them as paragraph boundaries.
28
+ *
29
+ * Align blocks (`[[=]]`, `[[==]]`) are recognized as a special case: their
30
+ * marker tokens are `EQUALS`, not `TEXT`/`IDENTIFIER`, so `parseBlockName`
31
+ * cannot extract a name. They are still real block boundaries.
32
+ */
33
+ function isUnknownBlockToken(ctx: ParseContext, tokenPos: number): boolean {
34
+ const token = ctx.tokens[tokenPos];
35
+ if (token?.type !== "BLOCK_OPEN" && token?.type !== "BLOCK_END_OPEN") return false;
36
+ const nameResult = parseBlockName(ctx, tokenPos + 1);
37
+ if (nameResult === null) {
38
+ // `[[=]]` / `[[==]]` align markers tokenize as EQUALS, not TEXT/IDENTIFIER.
39
+ if (ctx.tokens[tokenPos + 1]?.type === "EQUALS") {
40
+ return false;
41
+ }
42
+ // No recognizable identifier after [[ / [[/ — treat as inline.
43
+ return true;
44
+ }
45
+ return !KNOWN_BLOCK_NAMES.has(nameResult.name);
46
+ }
47
+
48
+ /**
49
+ * Checks whether the block token at `tokenPos` names a block whose rule
50
+ * accepts leading whitespace before the opener (`requiresLineStart: false`).
51
+ *
52
+ * Used to decide whether a `\n<indent>[[name]]` sequence should end a
53
+ * paragraph: only when the matching block rule would actually consume
54
+ * the indented token. Otherwise the boundary check would split the
55
+ * paragraph for tokens that the block dispatcher then refuses, leaving
56
+ * literal `[[toc]]` text in a fresh paragraph.
57
+ */
58
+ function isIndentAcceptingBlock(ctx: ParseContext, tokenPos: number): boolean {
59
+ const token = ctx.tokens[tokenPos];
60
+ if (token?.type !== "BLOCK_OPEN" && token?.type !== "BLOCK_END_OPEN") return false;
61
+ const nameResult = parseBlockName(ctx, tokenPos + 1);
62
+ if (nameResult === null) return false;
63
+ return INDENT_ACCEPTING_BLOCK_NAMES.has(nameResult.name);
64
+ }
65
+
66
+ /**
67
+ * Result of parsing inline content
68
+ */
69
+ export interface InlineParseResult {
70
+ elements: Element[];
71
+ consumed: number;
72
+ }
73
+
74
+ /**
75
+ * Check if an inline rule can be applied
76
+ */
77
+ export function canApplyInlineRule(rule: InlineRule, token: { type: TokenType }): boolean {
78
+ if (rule.startTokens.length === 0) {
79
+ return true; // fallback rule
80
+ }
81
+ return rule.startTokens.includes(token.type);
82
+ }
83
+
84
+ /**
85
+ * Parse inline content until a specific token type
86
+ *
87
+ * When endType is "PARAGRAPH_BREAK", handles NEWLINEs and stops at:
88
+ * - Double NEWLINE (paragraph break)
89
+ * - NEWLINE followed by block-start token
90
+ * - EOF
91
+ */
92
+ export function parseInlineUntil(ctx: ParseContext, endType: TokenType): InlineParseResult {
93
+ const nodes: Element[] = [];
94
+ let consumed = 0;
95
+ let pos = ctx.pos;
96
+
97
+ // Check if we're in paragraph mode (handle NEWLINEs inline)
98
+ const paragraphMode = endType === ("PARAGRAPH_BREAK" as TokenType);
99
+
100
+ const { inlineRules } = ctx;
101
+
102
+ while (pos < ctx.tokens.length) {
103
+ const token = ctx.tokens[pos];
104
+ if (!token || token.type === "EOF") {
105
+ break;
106
+ }
107
+
108
+ // Stop at block close condition if set in context
109
+ // This allows paragraph parser to respect parent block's close condition
110
+ if (paragraphMode && ctx.scope.blockCloseCondition) {
111
+ const checkCtx: ParseContext = { ...ctx, pos };
112
+ if (ctx.scope.blockCloseCondition(checkCtx)) {
113
+ break;
114
+ }
115
+ }
116
+
117
+ // Standard mode: stop at NEWLINE
118
+ if (!paragraphMode && token.type === "NEWLINE") {
119
+ break;
120
+ }
121
+
122
+ // Paragraph mode: check for paragraph break or block start
123
+ if (paragraphMode && token.type === "NEWLINE") {
124
+ // Look ahead to check what's after the newline
125
+ let lookAhead = 1;
126
+ while (ctx.tokens[pos + lookAhead]?.type === "WHITESPACE") {
127
+ lookAhead++;
128
+ }
129
+ const nextMeaningfulToken = ctx.tokens[pos + lookAhead];
130
+
131
+ // Check if this is [[/span]] - don't treat as block start, handle inline
132
+ let isOrphanCloseSpan = false;
133
+ if (nextMeaningfulToken?.type === "BLOCK_END_OPEN") {
134
+ // Check if it's [[/span]]
135
+ const namePos = pos + lookAhead + 1;
136
+ let nameLookAhead = 0;
137
+ while (ctx.tokens[namePos + nameLookAhead]?.type === "WHITESPACE") {
138
+ nameLookAhead++;
139
+ }
140
+ const nameToken = ctx.tokens[namePos + nameLookAhead];
141
+ if (nameToken?.type === "IDENTIFIER" && nameToken.value.toLowerCase() === "span") {
142
+ isOrphanCloseSpan = true;
143
+ }
144
+ }
145
+
146
+ // Check if this is [[# name]] - anchor name is inline, not block start
147
+ let isAnchorName = false;
148
+ if (nextMeaningfulToken?.type === "BLOCK_OPEN") {
149
+ const namePos = pos + lookAhead + 1;
150
+ let nameLookAhead = 0;
151
+ while (ctx.tokens[namePos + nameLookAhead]?.type === "WHITESPACE") {
152
+ nameLookAhead++;
153
+ }
154
+ const hashToken = ctx.tokens[namePos + nameLookAhead];
155
+ if (hashToken?.type === "HASH" || (hashToken?.type === "TEXT" && hashToken.value === "#")) {
156
+ isAnchorName = true;
157
+ }
158
+ }
159
+
160
+ // Check if this is [[>X or [[<X (where X is not ]]) - not a valid block opener
161
+ // [[>]] and [[<]] are valid align blocks, but [[>toc]] etc. are invalid
162
+ let isInvalidBlockOpen = false;
163
+ if (nextMeaningfulToken?.type === "BLOCK_OPEN") {
164
+ const afterOpen = pos + lookAhead + 1;
165
+ const firstAfter = ctx.tokens[afterOpen];
166
+ if (firstAfter?.type === "TEXT" && (firstAfter.value === ">" || firstAfter.value === "<")) {
167
+ const secondAfter = ctx.tokens[afterOpen + 1];
168
+ if (secondAfter && secondAfter.type !== "BLOCK_CLOSE") {
169
+ isInvalidBlockOpen = true;
170
+ }
171
+ }
172
+ // Check if this is [[footnoteblock]] but already parsed (2nd+ occurrence)
173
+ let skipWhitespace = 0;
174
+ while (ctx.tokens[afterOpen + skipWhitespace]?.type === "WHITESPACE") {
175
+ skipWhitespace++;
176
+ }
177
+ const blockNameToken = ctx.tokens[afterOpen + skipWhitespace];
178
+ if (
179
+ blockNameToken &&
180
+ (blockNameToken.type === "TEXT" || blockNameToken.type === "IDENTIFIER") &&
181
+ blockNameToken.value.toLowerCase() === "footnoteblock" &&
182
+ ctx.scope.footnoteBlockParsed
183
+ ) {
184
+ isInvalidBlockOpen = true;
185
+ }
186
+ }
187
+
188
+ // Check if HEADING_MARKER would actually succeed as a heading
189
+ // Wikidot requires: 1-6 plus signs + whitespace. Otherwise it's plain text.
190
+ let isInvalidHeading = false;
191
+ if (nextMeaningfulToken?.type === "HEADING_MARKER") {
192
+ const markerLen = nextMeaningfulToken.value.length;
193
+ const afterMarkerPos = pos + lookAhead + 1;
194
+ const afterMarker = ctx.tokens[afterMarkerPos];
195
+ // Invalid if: 7+ plus signs, or no whitespace after marker (or after optional *)
196
+ if (markerLen > 6) {
197
+ isInvalidHeading = true;
198
+ } else if (afterMarker?.type === "STAR") {
199
+ // +* pattern - check whitespace after *
200
+ const afterStar = ctx.tokens[afterMarkerPos + 1];
201
+ if (afterStar?.type !== "WHITESPACE") {
202
+ isInvalidHeading = true;
203
+ }
204
+ } else if (afterMarker?.type !== "WHITESPACE") {
205
+ isInvalidHeading = true;
206
+ }
207
+ }
208
+
209
+ // Check if this block token names an excluded block (e.g. nested collapsible)
210
+ const isExcludedBlock =
211
+ (nextMeaningfulToken?.type === "BLOCK_OPEN" ||
212
+ nextMeaningfulToken?.type === "BLOCK_END_OPEN") &&
213
+ isExcludedBlockToken(ctx, pos + lookAhead);
214
+
215
+ // Wikidot treats `[[foo]]` (where `foo` is not a known block name) as
216
+ // inline text rather than a paragraph-breaking block. Mirror that here.
217
+ const isUnknownBlock =
218
+ (nextMeaningfulToken?.type === "BLOCK_OPEN" ||
219
+ nextMeaningfulToken?.type === "BLOCK_END_OPEN") &&
220
+ isUnknownBlockToken(ctx, pos + lookAhead);
221
+
222
+ // Stop at double NEWLINE, EOF, or block start token (at line start)
223
+ // But don't stop at [[/span]], [[# name]], [[>/[[<, invalid headings,
224
+ // excluded block names, or unrecognized block names.
225
+ //
226
+ // Most block-start tokens require the strict `lineStart` flag (no
227
+ // leading whitespace at all): ` # one` is NOT a list item in
228
+ // Wikidot, ` + Heading` is NOT a heading, etc. We preserve that.
229
+ //
230
+ // A subset of `[[...]]` block constructs is the exception:
231
+ // their rules declare `requiresLineStart: false`, so Wikidot
232
+ // accepts leading whitespace before them and `[[/<name>]]` at
233
+ // arbitrary indentation also has to close such a block. The
234
+ // `lookAhead` walk above already consumed the NEWLINE and any
235
+ // leading WHITESPACE, so we know `nextMeaningfulToken` sits at
236
+ // the semantic start of the next line. We relax the `lineStart`
237
+ // check only when the block name's rule will actually accept the
238
+ // indented opener ({@link INDENT_ACCEPTING_BLOCK_NAMES});
239
+ // otherwise (e.g. `[[toc]]`, `[[footnoteblock]]`, align markers)
240
+ // the dispatcher would reject the indented token anyway and we
241
+ // would end up splitting the paragraph only to leave literal
242
+ // `[[…]]` text behind.
243
+ const isIndentedBlockOpener =
244
+ nextMeaningfulToken &&
245
+ (nextMeaningfulToken.type === "BLOCK_OPEN" ||
246
+ nextMeaningfulToken.type === "BLOCK_END_OPEN") &&
247
+ isIndentAcceptingBlock(ctx, pos + lookAhead);
248
+ const isBlockStart =
249
+ nextMeaningfulToken &&
250
+ BLOCK_START_TOKENS.includes(nextMeaningfulToken.type) &&
251
+ (nextMeaningfulToken.lineStart || isIndentedBlockOpener) &&
252
+ !isOrphanCloseSpan &&
253
+ !isAnchorName &&
254
+ !isInvalidBlockOpen &&
255
+ !isInvalidHeading &&
256
+ !isExcludedBlock &&
257
+ !isUnknownBlock;
258
+ if (
259
+ !nextMeaningfulToken ||
260
+ nextMeaningfulToken.type === "NEWLINE" ||
261
+ nextMeaningfulToken.type === "EOF" ||
262
+ isBlockStart
263
+ ) {
264
+ // Check if a block rule with preservesPrecedingLineBreak matches at the next position.
265
+ // Wikidot's Divalign expands content inline, so \n before it becomes <br />.
266
+ // Other blocks (Code, Div, etc.) suppress this by prepending \n\n to their token.
267
+ if (isBlockStart && nodes.length > 0) {
268
+ const nextPos = pos + lookAhead;
269
+ const shouldPreserve = ctx.blockRules.some(
270
+ (rule) => rule.preservesPrecedingLineBreak && rule.isStartPattern?.(ctx, nextPos),
271
+ );
272
+ if (shouldPreserve) {
273
+ const lb: any = { element: "line-break" };
274
+ lb._preservedTrailingBreak = true;
275
+ nodes.push(lb);
276
+ }
277
+ }
278
+ // Consume the NEWLINE and stop
279
+ consumed++;
280
+ if (nextMeaningfulToken?.type === "NEWLINE") {
281
+ consumed++; // Also consume second newline for paragraph break
282
+ }
283
+ break;
284
+ }
285
+ }
286
+
287
+ if (token.type === endType) {
288
+ break;
289
+ }
290
+
291
+ const inlineCtx: ParseContext = {
292
+ ...ctx,
293
+ pos,
294
+ };
295
+
296
+ let matched = false;
297
+ for (const rule of inlineRules) {
298
+ // Skip the rule that would match the end type to avoid infinite recursion
299
+ if (rule.startTokens.includes(endType)) {
300
+ continue;
301
+ }
302
+ if (canApplyInlineRule(rule, token)) {
303
+ const result = rule.parse(inlineCtx);
304
+ if (result.success) {
305
+ nodes.push(...result.elements);
306
+ consumed += result.consumed;
307
+ pos += result.consumed;
308
+ matched = true;
309
+ break;
310
+ }
311
+ }
312
+ }
313
+
314
+ if (!matched) {
315
+ // Fallback to text
316
+ nodes.push({ element: "text", data: token.value });
317
+ consumed++;
318
+ pos++;
319
+ }
320
+ }
321
+
322
+ return { elements: nodes, consumed };
323
+ }
324
+
325
+ /**
326
+ * Collect tokens until newline or EOF
327
+ */
328
+ export function collectUntilNewline(ctx: ParseContext): { tokens: Token[]; consumed: number } {
329
+ const tokens: Token[] = [];
330
+ let consumed = 0;
331
+ let pos = ctx.pos;
332
+
333
+ while (pos < ctx.tokens.length) {
334
+ const token = ctx.tokens[pos];
335
+ if (!token || token.type === "NEWLINE" || token.type === "EOF") {
336
+ break;
337
+ }
338
+ tokens.push(token);
339
+ consumed++;
340
+ pos++;
341
+ }
342
+
343
+ return { tokens, consumed };
344
+ }