@wdprlib/parser 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/index.cjs +295 -118
  2. package/dist/index.js +272 -95
  3. package/package.json +5 -3
  4. package/src/index.ts +163 -0
  5. package/src/lexer/index.ts +20 -0
  6. package/src/lexer/lexer.ts +687 -0
  7. package/src/lexer/tokens.ts +141 -0
  8. package/src/parser/constants.ts +173 -0
  9. package/src/parser/depth.ts +251 -0
  10. package/src/parser/index.ts +18 -0
  11. package/src/parser/parse.ts +315 -0
  12. package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
  13. package/src/parser/postprocess/index.ts +15 -0
  14. package/src/parser/postprocess/spanStrip.ts +697 -0
  15. package/src/parser/preprocess/expr.ts +265 -0
  16. package/src/parser/preprocess/index.ts +38 -0
  17. package/src/parser/preprocess/typography.ts +67 -0
  18. package/src/parser/preprocess/utils.ts +250 -0
  19. package/src/parser/preprocess/whitespace.ts +111 -0
  20. package/src/parser/rules/block/align.ts +282 -0
  21. package/src/parser/rules/block/bibliography.ts +359 -0
  22. package/src/parser/rules/block/block-list.ts +689 -0
  23. package/src/parser/rules/block/blockquote.ts +238 -0
  24. package/src/parser/rules/block/center.ts +87 -0
  25. package/src/parser/rules/block/clear-float.ts +75 -0
  26. package/src/parser/rules/block/code.ts +187 -0
  27. package/src/parser/rules/block/collapsible.ts +337 -0
  28. package/src/parser/rules/block/comment.ts +73 -0
  29. package/src/parser/rules/block/content-separator.ts +79 -0
  30. package/src/parser/rules/block/definition-list.ts +270 -0
  31. package/src/parser/rules/block/div.ts +400 -0
  32. package/src/parser/rules/block/embed-block.ts +153 -0
  33. package/src/parser/rules/block/footnoteblock.ts +200 -0
  34. package/src/parser/rules/block/heading.ts +142 -0
  35. package/src/parser/rules/block/horizontal-rule.ts +61 -0
  36. package/src/parser/rules/block/html.ts +222 -0
  37. package/src/parser/rules/block/iframe.ts +239 -0
  38. package/src/parser/rules/block/iftags.ts +150 -0
  39. package/src/parser/rules/block/include.ts +179 -0
  40. package/src/parser/rules/block/index.ts +127 -0
  41. package/src/parser/rules/block/list.ts +244 -0
  42. package/src/parser/rules/block/math.ts +183 -0
  43. package/src/parser/rules/block/module/backlinks/index.ts +31 -0
  44. package/src/parser/rules/block/module/backlinks/types.ts +21 -0
  45. package/src/parser/rules/block/module/categories/index.ts +34 -0
  46. package/src/parser/rules/block/module/categories/types.ts +21 -0
  47. package/src/parser/rules/block/module/css/index.ts +37 -0
  48. package/src/parser/rules/block/module/iftags/condition.ts +109 -0
  49. package/src/parser/rules/block/module/iftags/index.ts +26 -0
  50. package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
  51. package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
  52. package/src/parser/rules/block/module/iftags/types.ts +63 -0
  53. package/src/parser/rules/block/module/include/index.ts +20 -0
  54. package/src/parser/rules/block/module/include/resolve.ts +556 -0
  55. package/src/parser/rules/block/module/index.ts +122 -0
  56. package/src/parser/rules/block/module/join/index.ts +34 -0
  57. package/src/parser/rules/block/module/join/types.ts +23 -0
  58. package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
  59. package/src/parser/rules/block/module/listpages/extract.ts +410 -0
  60. package/src/parser/rules/block/module/listpages/index.ts +83 -0
  61. package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
  62. package/src/parser/rules/block/module/listpages/parser.ts +106 -0
  63. package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
  64. package/src/parser/rules/block/module/listpages/types.ts +513 -0
  65. package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
  66. package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
  67. package/src/parser/rules/block/module/listusers/extract.ts +45 -0
  68. package/src/parser/rules/block/module/listusers/index.ts +36 -0
  69. package/src/parser/rules/block/module/listusers/parser.ts +54 -0
  70. package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
  71. package/src/parser/rules/block/module/listusers/types.ts +93 -0
  72. package/src/parser/rules/block/module/mapping.ts +61 -0
  73. package/src/parser/rules/block/module/page-tree/index.ts +38 -0
  74. package/src/parser/rules/block/module/page-tree/types.ts +29 -0
  75. package/src/parser/rules/block/module/rate/index.ts +28 -0
  76. package/src/parser/rules/block/module/rate/types.ts +19 -0
  77. package/src/parser/rules/block/module/resolve.ts +411 -0
  78. package/src/parser/rules/block/module/types-common.ts +59 -0
  79. package/src/parser/rules/block/module/types.ts +61 -0
  80. package/src/parser/rules/block/module/utils.ts +43 -0
  81. package/src/parser/rules/block/module/walk.ts +380 -0
  82. package/src/parser/rules/block/module.ts +164 -0
  83. package/src/parser/rules/block/orphan-li.ts +177 -0
  84. package/src/parser/rules/block/paragraph.ts +157 -0
  85. package/src/parser/rules/block/table-block.ts +726 -0
  86. package/src/parser/rules/block/table.ts +441 -0
  87. package/src/parser/rules/block/tabview.ts +331 -0
  88. package/src/parser/rules/block/toc.ts +129 -0
  89. package/src/parser/rules/block/utils.ts +615 -0
  90. package/src/parser/rules/index.ts +49 -0
  91. package/src/parser/rules/inline/anchor-name.ts +154 -0
  92. package/src/parser/rules/inline/anchor.ts +327 -0
  93. package/src/parser/rules/inline/bibcite.ts +153 -0
  94. package/src/parser/rules/inline/bold.ts +86 -0
  95. package/src/parser/rules/inline/color.ts +140 -0
  96. package/src/parser/rules/inline/comment.ts +90 -0
  97. package/src/parser/rules/inline/equation-ref.ts +115 -0
  98. package/src/parser/rules/inline/expr.ts +526 -0
  99. package/src/parser/rules/inline/footnote.ts +223 -0
  100. package/src/parser/rules/inline/guillemet.ts +64 -0
  101. package/src/parser/rules/inline/html.ts +132 -0
  102. package/src/parser/rules/inline/image.ts +328 -0
  103. package/src/parser/rules/inline/index.ts +150 -0
  104. package/src/parser/rules/inline/italic.ts +74 -0
  105. package/src/parser/rules/inline/line-break.ts +326 -0
  106. package/src/parser/rules/inline/link-anchor.ts +147 -0
  107. package/src/parser/rules/inline/link-single.ts +164 -0
  108. package/src/parser/rules/inline/link-star.ts +134 -0
  109. package/src/parser/rules/inline/link-triple.ts +267 -0
  110. package/src/parser/rules/inline/math-inline.ts +126 -0
  111. package/src/parser/rules/inline/monospace.ts +78 -0
  112. package/src/parser/rules/inline/raw.ts +262 -0
  113. package/src/parser/rules/inline/size.ts +244 -0
  114. package/src/parser/rules/inline/span.ts +424 -0
  115. package/src/parser/rules/inline/strikethrough.ts +115 -0
  116. package/src/parser/rules/inline/subscript.ts +84 -0
  117. package/src/parser/rules/inline/superscript.ts +84 -0
  118. package/src/parser/rules/inline/text.ts +84 -0
  119. package/src/parser/rules/inline/underline.ts +127 -0
  120. package/src/parser/rules/inline/user.ts +147 -0
  121. package/src/parser/rules/inline/utils.ts +344 -0
  122. package/src/parser/rules/types.ts +252 -0
  123. package/src/parser/rules/utils.ts +155 -0
  124. package/src/parser/toc.ts +130 -0
@@ -0,0 +1,177 @@
1
+ /**
2
+ *
3
+ * Block rule for `[[li]]...[[/li]]` appearing outside of any `[[ul]]`/`[[ol]]` block.
4
+ *
5
+ * When `[[li]]` is used without an enclosing list block, Wikidot does NOT
6
+ * create a list item. Instead, it treats the tags as literal text and
7
+ * renders the body content without `<p>` wrapping, using `<br />` for
8
+ * newlines.
9
+ *
10
+ * Example input:
11
+ * ```
12
+ * [[li]]
13
+ * Baz
14
+ * [[/li]]
15
+ * ```
16
+ *
17
+ * Rendered output:
18
+ * ```
19
+ * [[li]]<br />Baz<br />[[/li]]
20
+ * ```
21
+ *
22
+ * This rule exists to correctly consume and reproduce that output. Without
23
+ * it, the parser would try to match `[[li]]` against other block rules
24
+ * and potentially produce incorrect results.
25
+ *
26
+ * If no `[[/li]]` closing tag is found, the rule fails.
27
+ *
28
+ * @module
29
+ */
30
+ import type { Element } from "@wdprlib/ast";
31
+ import type { BlockRule, ParseContext, RuleResult } from "../types";
32
+ import { currentToken } from "../types";
33
+ import { parseBlockName } from "./utils";
34
+
35
+ /**
36
+ * Tests whether the tokens at `pos` form a `[[li]]` opening tag.
37
+ * Only the exact name `"li"` matches; `[[li_]]` is not recognised.
38
+ *
39
+ * @param ctx - Parse context.
40
+ * @param pos - Token index to inspect.
41
+ * @returns The number of tokens consumed, or `null` if not matched.
42
+ */
43
+ function isLiOpen(ctx: ParseContext, pos: number): { consumed: number } | null {
44
+ if (ctx.tokens[pos]?.type !== "BLOCK_OPEN") return null;
45
+ const nameResult = parseBlockName(ctx, pos + 1);
46
+ if (!nameResult) return null;
47
+ if (nameResult.name === "li") {
48
+ return { consumed: 1 + nameResult.consumed };
49
+ }
50
+ return null;
51
+ }
52
+
53
+ /**
54
+ * Tests whether the tokens at `pos` form a `[[/li]]` closing tag.
55
+ *
56
+ * @param ctx - Parse context.
57
+ * @param pos - Token index to inspect.
58
+ * @returns The number of tokens consumed (including BLOCK_CLOSE), or `null`.
59
+ */
60
+ function isLiClose(ctx: ParseContext, pos: number): { consumed: number } | null {
61
+ if (ctx.tokens[pos]?.type !== "BLOCK_END_OPEN") return null;
62
+ const nameResult = parseBlockName(ctx, pos + 1);
63
+ if (!nameResult || nameResult.name !== "li") return null;
64
+ let consumed = 1 + nameResult.consumed;
65
+ if (ctx.tokens[pos + consumed]?.type === "BLOCK_CLOSE") consumed++;
66
+ return { consumed };
67
+ }
68
+
69
+ /**
70
+ * Block rule for orphaned `[[li]]...[[/li]]` (outside any list block).
71
+ *
72
+ * The opening and closing tags are emitted as literal text elements, and
73
+ * newlines within the body become `<br />` elements. Leading whitespace
74
+ * on each line is discarded.
75
+ */
76
+ export const orphanLiRule: BlockRule = {
77
+ name: "orphan-li",
78
+ startTokens: ["BLOCK_OPEN"],
79
+ requiresLineStart: false,
80
+
81
+ parse(ctx: ParseContext): RuleResult<Element> {
82
+ const openToken = currentToken(ctx);
83
+ if (openToken.type !== "BLOCK_OPEN") {
84
+ return { success: false };
85
+ }
86
+
87
+ // Check for [[li]] (not [[li_]])
88
+ const liOpen = isLiOpen(ctx, ctx.pos);
89
+ if (!liOpen) {
90
+ return { success: false };
91
+ }
92
+
93
+ let pos = ctx.pos + liOpen.consumed;
94
+ let consumed = liOpen.consumed;
95
+
96
+ // Expect ]]
97
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
98
+ return { success: false };
99
+ }
100
+ pos++;
101
+ consumed++;
102
+
103
+ // Collect content until [[/li]]
104
+ const elements: Element[] = [];
105
+ let foundClose = false;
106
+
107
+ // Output [[li]] as text
108
+ elements.push({ element: "text", data: "[[" });
109
+ elements.push({ element: "text", data: "li" });
110
+ elements.push({ element: "text", data: "]]" });
111
+
112
+ // Wikidot outputs: [[li]]<br />Baz<br />[[/li]]
113
+ // - Newline after [[li]] becomes <br />
114
+ // - Newline after content becomes <br />
115
+ // - No <br /> right before [[/li]]
116
+
117
+ while (pos < ctx.tokens.length) {
118
+ const token = ctx.tokens[pos];
119
+ if (!token || token.type === "EOF") break;
120
+
121
+ // Check for [[/li]] close
122
+ const liClose = isLiClose(ctx, pos);
123
+ if (liClose) {
124
+ foundClose = true;
125
+ // Output [[/li]] as text (no <br /> before it)
126
+ elements.push({ element: "text", data: "[[/" });
127
+ elements.push({ element: "text", data: "li" });
128
+ elements.push({ element: "text", data: "]]" });
129
+ consumed += liClose.consumed;
130
+ pos += liClose.consumed;
131
+ // Skip trailing newline
132
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
133
+ pos++;
134
+ consumed++;
135
+ }
136
+ break;
137
+ }
138
+
139
+ // Handle newlines - convert to <br />
140
+ if (token.type === "NEWLINE") {
141
+ elements.push({ element: "line-break" });
142
+ pos++;
143
+ consumed++;
144
+ continue;
145
+ }
146
+
147
+ // Skip leading whitespace on lines
148
+ if (token.type === "WHITESPACE" && token.lineStart) {
149
+ pos++;
150
+ consumed++;
151
+ continue;
152
+ }
153
+
154
+ // Other content
155
+ elements.push({ element: "text", data: token.value });
156
+ pos++;
157
+ consumed++;
158
+ }
159
+
160
+ // Require closing tag - without it, fail to prevent consuming entire document
161
+ if (!foundClose) {
162
+ ctx.diagnostics.push({
163
+ severity: "warning",
164
+ code: "unclosed-block",
165
+ message: "Missing closing tag [[/li]] for [[li]]",
166
+ position: openToken.position,
167
+ });
168
+ return { success: false };
169
+ }
170
+
171
+ return {
172
+ success: true,
173
+ elements,
174
+ consumed,
175
+ };
176
+ },
177
+ };
@@ -0,0 +1,157 @@
1
+ /**
2
+ * Paragraph rule
3
+ *
4
+ * Collects inline content until paragraph break (double newline) or end of input.
5
+ * Line breaks within paragraphs are handled by the newlineLineBreakRule.
6
+ */
7
+ import type { Element } from "@wdprlib/ast";
8
+ import type { BlockRule, ParseContext, RuleResult } from "../types";
9
+ import { parseInlineUntil } from "../inline/utils";
10
+
11
+ /**
12
+ * Process closeSpan markers in inline content
13
+ * When we find a _closeSpan marker, wrap all preceding content in a span
14
+ */
15
+ function processCloseSpanMarkers(elements: Element[]): Element[] {
16
+ const result: Element[] = [];
17
+
18
+ for (let i = 0; i < elements.length; i++) {
19
+ const elem = elements[i];
20
+
21
+ if (!elem) continue;
22
+
23
+ // Check for closeSpan marker
24
+ if (
25
+ elem.element === "container" &&
26
+ elem.data &&
27
+ typeof elem.data === "object" &&
28
+ "type" in elem.data &&
29
+ elem.data.type === "span" &&
30
+ "attributes" in elem.data &&
31
+ typeof elem.data.attributes === "object" &&
32
+ elem.data.attributes &&
33
+ "_closeSpan" in elem.data.attributes
34
+ ) {
35
+ // Wrap all preceding content in a span
36
+ if (result.length > 0) {
37
+ const spanContent = [...result];
38
+ result.length = 0; // Clear result
39
+ result.push({
40
+ element: "container",
41
+ data: {
42
+ type: "span",
43
+ attributes: {},
44
+ elements: spanContent,
45
+ },
46
+ });
47
+ }
48
+ // Don't add the marker itself to output
49
+ } else {
50
+ result.push(elem);
51
+ }
52
+ }
53
+
54
+ return result;
55
+ }
56
+
57
+ /**
58
+ * Paragraph is the fallback block rule
59
+ * It collects inline content until blank line (double newline)
60
+ *
61
+ * Wikidot behavior:
62
+ * - Single newline → <br> (handled by newlineLineBreakRule)
63
+ * - Blank line (double newline) → new paragraph
64
+ */
65
+ export const paragraphRule: BlockRule = {
66
+ name: "paragraph",
67
+ startTokens: [], // matches anything not matched by other rules
68
+ requiresLineStart: false,
69
+
70
+ parse(ctx: ParseContext): RuleResult<Element> {
71
+ // Parse inline content, including NEWLINEs
72
+ // Stop at double NEWLINE (paragraph break) or EOF
73
+ const result = parseInlineContent(ctx);
74
+
75
+ // Filter out empty paragraphs
76
+ if (result.elements.length === 0) {
77
+ return { success: false };
78
+ }
79
+
80
+ // Process closeSpan markers (for split spans)
81
+ let elements = processCloseSpanMarkers(result.elements);
82
+
83
+ // Remove trailing line-breaks (they shouldn't appear at end of paragraph)
84
+ // Exception: line-breaks flagged by preserveTrailingLineBreak context are kept
85
+ while (elements.length > 0 && elements[elements.length - 1]?.element === "line-break") {
86
+ const lastEl = elements[elements.length - 1] as any;
87
+ if (lastEl._preservedTrailingBreak) {
88
+ delete lastEl._preservedTrailingBreak;
89
+ break;
90
+ }
91
+ elements.pop();
92
+ }
93
+
94
+ // Remove trailing whitespace-only text nodes
95
+ while (elements.length > 0) {
96
+ const last = elements[elements.length - 1];
97
+ if (
98
+ last?.element === "text" &&
99
+ "data" in last &&
100
+ typeof last.data === "string" &&
101
+ last.data.trim() === ""
102
+ ) {
103
+ elements.pop();
104
+ } else {
105
+ break;
106
+ }
107
+ }
108
+
109
+ // Remove leading line-breaks
110
+ while (elements.length > 0 && elements[0]?.element === "line-break") {
111
+ elements.shift();
112
+ }
113
+
114
+ if (elements.length === 0) {
115
+ return { success: false };
116
+ }
117
+
118
+ // Wikidot: text lines immediately before a definition list are not
119
+ // wrapped in <p>. Check if next token starts a definition list.
120
+ const nextPos = ctx.pos + result.consumed;
121
+ const nextToken = ctx.tokens[nextPos];
122
+ if (nextToken?.type === "COLON" && nextToken.lineStart) {
123
+ return {
124
+ success: true,
125
+ elements: [...elements, { element: "line-break" }],
126
+ consumed: result.consumed,
127
+ };
128
+ }
129
+
130
+ return {
131
+ success: true,
132
+ elements: [
133
+ {
134
+ element: "container",
135
+ data: {
136
+ type: "paragraph",
137
+ attributes: {},
138
+ elements,
139
+ },
140
+ },
141
+ ],
142
+ consumed: result.consumed,
143
+ };
144
+ },
145
+ };
146
+
147
+ /**
148
+ * Parse inline content until paragraph break or EOF
149
+ */
150
+ function parseInlineContent(ctx: ParseContext): {
151
+ elements: Element[];
152
+ consumed: number;
153
+ } {
154
+ // Use the standard inline parser which now handles NEWLINEs
155
+ // The parser will stop at double NEWLINE (paragraph break)
156
+ return parseInlineUntil(ctx, "PARAGRAPH_BREAK" as any);
157
+ }