@wdprlib/parser 3.1.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/index.cjs +312 -121
  2. package/dist/index.js +289 -98
  3. package/package.json +5 -3
  4. package/src/index.ts +163 -0
  5. package/src/lexer/index.ts +20 -0
  6. package/src/lexer/lexer.ts +687 -0
  7. package/src/lexer/tokens.ts +141 -0
  8. package/src/parser/constants.ts +173 -0
  9. package/src/parser/depth.ts +251 -0
  10. package/src/parser/index.ts +18 -0
  11. package/src/parser/parse.ts +315 -0
  12. package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
  13. package/src/parser/postprocess/index.ts +15 -0
  14. package/src/parser/postprocess/spanStrip.ts +697 -0
  15. package/src/parser/preprocess/expr.ts +265 -0
  16. package/src/parser/preprocess/index.ts +38 -0
  17. package/src/parser/preprocess/typography.ts +67 -0
  18. package/src/parser/preprocess/utils.ts +250 -0
  19. package/src/parser/preprocess/whitespace.ts +111 -0
  20. package/src/parser/rules/block/align.ts +282 -0
  21. package/src/parser/rules/block/bibliography.ts +359 -0
  22. package/src/parser/rules/block/block-list.ts +689 -0
  23. package/src/parser/rules/block/blockquote.ts +238 -0
  24. package/src/parser/rules/block/center.ts +87 -0
  25. package/src/parser/rules/block/clear-float.ts +75 -0
  26. package/src/parser/rules/block/code.ts +187 -0
  27. package/src/parser/rules/block/collapsible.ts +337 -0
  28. package/src/parser/rules/block/comment.ts +73 -0
  29. package/src/parser/rules/block/content-separator.ts +79 -0
  30. package/src/parser/rules/block/definition-list.ts +270 -0
  31. package/src/parser/rules/block/div.ts +400 -0
  32. package/src/parser/rules/block/embed-block.ts +153 -0
  33. package/src/parser/rules/block/footnoteblock.ts +200 -0
  34. package/src/parser/rules/block/heading.ts +142 -0
  35. package/src/parser/rules/block/horizontal-rule.ts +61 -0
  36. package/src/parser/rules/block/html.ts +222 -0
  37. package/src/parser/rules/block/iframe.ts +239 -0
  38. package/src/parser/rules/block/iftags.ts +150 -0
  39. package/src/parser/rules/block/include.ts +179 -0
  40. package/src/parser/rules/block/index.ts +127 -0
  41. package/src/parser/rules/block/list.ts +244 -0
  42. package/src/parser/rules/block/math.ts +183 -0
  43. package/src/parser/rules/block/module/backlinks/index.ts +31 -0
  44. package/src/parser/rules/block/module/backlinks/types.ts +21 -0
  45. package/src/parser/rules/block/module/categories/index.ts +34 -0
  46. package/src/parser/rules/block/module/categories/types.ts +21 -0
  47. package/src/parser/rules/block/module/css/index.ts +37 -0
  48. package/src/parser/rules/block/module/iftags/condition.ts +109 -0
  49. package/src/parser/rules/block/module/iftags/index.ts +26 -0
  50. package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
  51. package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
  52. package/src/parser/rules/block/module/iftags/types.ts +63 -0
  53. package/src/parser/rules/block/module/include/index.ts +20 -0
  54. package/src/parser/rules/block/module/include/resolve.ts +556 -0
  55. package/src/parser/rules/block/module/index.ts +122 -0
  56. package/src/parser/rules/block/module/join/index.ts +34 -0
  57. package/src/parser/rules/block/module/join/types.ts +23 -0
  58. package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
  59. package/src/parser/rules/block/module/listpages/extract.ts +410 -0
  60. package/src/parser/rules/block/module/listpages/index.ts +83 -0
  61. package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
  62. package/src/parser/rules/block/module/listpages/parser.ts +106 -0
  63. package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
  64. package/src/parser/rules/block/module/listpages/types.ts +513 -0
  65. package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
  66. package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
  67. package/src/parser/rules/block/module/listusers/extract.ts +45 -0
  68. package/src/parser/rules/block/module/listusers/index.ts +36 -0
  69. package/src/parser/rules/block/module/listusers/parser.ts +54 -0
  70. package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
  71. package/src/parser/rules/block/module/listusers/types.ts +93 -0
  72. package/src/parser/rules/block/module/mapping.ts +61 -0
  73. package/src/parser/rules/block/module/page-tree/index.ts +38 -0
  74. package/src/parser/rules/block/module/page-tree/types.ts +29 -0
  75. package/src/parser/rules/block/module/rate/index.ts +28 -0
  76. package/src/parser/rules/block/module/rate/types.ts +19 -0
  77. package/src/parser/rules/block/module/resolve.ts +411 -0
  78. package/src/parser/rules/block/module/types-common.ts +59 -0
  79. package/src/parser/rules/block/module/types.ts +61 -0
  80. package/src/parser/rules/block/module/utils.ts +43 -0
  81. package/src/parser/rules/block/module/walk.ts +380 -0
  82. package/src/parser/rules/block/module.ts +164 -0
  83. package/src/parser/rules/block/orphan-li.ts +177 -0
  84. package/src/parser/rules/block/paragraph.ts +157 -0
  85. package/src/parser/rules/block/table-block.ts +726 -0
  86. package/src/parser/rules/block/table.ts +441 -0
  87. package/src/parser/rules/block/tabview.ts +331 -0
  88. package/src/parser/rules/block/toc.ts +129 -0
  89. package/src/parser/rules/block/utils.ts +615 -0
  90. package/src/parser/rules/index.ts +49 -0
  91. package/src/parser/rules/inline/anchor-name.ts +154 -0
  92. package/src/parser/rules/inline/anchor.ts +327 -0
  93. package/src/parser/rules/inline/bibcite.ts +153 -0
  94. package/src/parser/rules/inline/bold.ts +86 -0
  95. package/src/parser/rules/inline/color.ts +140 -0
  96. package/src/parser/rules/inline/comment.ts +90 -0
  97. package/src/parser/rules/inline/equation-ref.ts +115 -0
  98. package/src/parser/rules/inline/expr.ts +526 -0
  99. package/src/parser/rules/inline/footnote.ts +223 -0
  100. package/src/parser/rules/inline/guillemet.ts +64 -0
  101. package/src/parser/rules/inline/html.ts +132 -0
  102. package/src/parser/rules/inline/image.ts +328 -0
  103. package/src/parser/rules/inline/index.ts +150 -0
  104. package/src/parser/rules/inline/italic.ts +74 -0
  105. package/src/parser/rules/inline/line-break.ts +326 -0
  106. package/src/parser/rules/inline/link-anchor.ts +147 -0
  107. package/src/parser/rules/inline/link-single.ts +164 -0
  108. package/src/parser/rules/inline/link-star.ts +134 -0
  109. package/src/parser/rules/inline/link-triple.ts +267 -0
  110. package/src/parser/rules/inline/math-inline.ts +126 -0
  111. package/src/parser/rules/inline/monospace.ts +78 -0
  112. package/src/parser/rules/inline/raw.ts +262 -0
  113. package/src/parser/rules/inline/size.ts +244 -0
  114. package/src/parser/rules/inline/span.ts +424 -0
  115. package/src/parser/rules/inline/strikethrough.ts +115 -0
  116. package/src/parser/rules/inline/subscript.ts +84 -0
  117. package/src/parser/rules/inline/superscript.ts +84 -0
  118. package/src/parser/rules/inline/text.ts +84 -0
  119. package/src/parser/rules/inline/underline.ts +127 -0
  120. package/src/parser/rules/inline/user.ts +147 -0
  121. package/src/parser/rules/inline/utils.ts +344 -0
  122. package/src/parser/rules/types.ts +252 -0
  123. package/src/parser/rules/utils.ts +155 -0
  124. package/src/parser/toc.ts +130 -0
@@ -0,0 +1,86 @@
1
+ /**
2
+ *
3
+ * Parses the Wikidot bold formatting syntax: `**text**`.
4
+ *
5
+ * Bold text is delimited by double asterisks. The opening and closing
6
+ * markers must appear on the same line; if no closing `**` is found
7
+ * before a newline, the opening marker is emitted as literal text.
8
+ *
9
+ * Wikidot behavior for empty bold (`****`): the markers and their
10
+ * (empty) content are discarded entirely, producing no output.
11
+ *
12
+ * Bold may nest other inline formatting (italic, underline, etc.)
13
+ * within its body.
14
+ *
15
+ * Produces a `"container"` AST element with `type: "bold"`.
16
+ *
17
+ * @module
18
+ */
19
+ import type { Element } from "@wdprlib/ast";
20
+ import type { InlineRule, ParseContext, RuleResult } from "../types";
21
+ import { currentToken, hasClosingMarkerBeforeNewline } from "../types";
22
+ import { parseInlineUntil } from "./utils";
23
+
24
+ /**
25
+ * Inline rule for parsing `**bold**` formatting.
26
+ *
27
+ * Triggered by a `BOLD_MARKER` token (`**`). The rule checks for a
28
+ * matching closing marker on the same line, then recursively parses
29
+ * inline content between the markers.
30
+ *
31
+ * When no closing marker is found, the opening `**` is treated as
32
+ * literal text rather than causing a parse failure, preserving
33
+ * Wikidot's graceful-degradation behavior.
34
+ */
35
+ export const boldRule: InlineRule = {
36
+ name: "bold",
37
+ startTokens: ["BOLD_MARKER"],
38
+
39
+ /**
40
+ * Attempts to parse bold formatting at the current position.
41
+ *
42
+ * @param ctx - Parse context with token stream and current position
43
+ * @returns A successful result containing either a `"container"` element
44
+ * with `type: "bold"`, an empty array (for `****`), or a text
45
+ * fallback for unmatched markers
46
+ */
47
+ parse(ctx: ParseContext): RuleResult<Element> {
48
+ const startToken = currentToken(ctx);
49
+
50
+ // Check if closing marker exists
51
+ if (!hasClosingMarkerBeforeNewline({ ...ctx, pos: ctx.pos + 1 }, "BOLD_MARKER")) {
52
+ return {
53
+ success: true,
54
+ elements: [{ element: "text", data: startToken.value }],
55
+ consumed: 1,
56
+ };
57
+ }
58
+
59
+ // Parse content between markers
60
+ const result = parseInlineUntil({ ...ctx, pos: ctx.pos + 1 }, "BOLD_MARKER");
61
+
62
+ // Empty bold (****) is discarded entirely in Wikidot
63
+ if (result.elements.length === 0) {
64
+ return {
65
+ success: true,
66
+ elements: [],
67
+ consumed: 1 + result.consumed + 1,
68
+ };
69
+ }
70
+
71
+ return {
72
+ success: true,
73
+ elements: [
74
+ {
75
+ element: "container",
76
+ data: {
77
+ type: "bold",
78
+ attributes: {},
79
+ elements: result.elements,
80
+ },
81
+ },
82
+ ],
83
+ consumed: 1 + result.consumed + 1, // open + content + close
84
+ };
85
+ },
86
+ };
@@ -0,0 +1,140 @@
1
+ /**
2
+ *
3
+ * Parses the Wikidot inline color syntax: `##color|text##`.
4
+ *
5
+ * This syntax applies a CSS color to inline text. The color specifier
6
+ * and the text content are separated by a pipe (`|`). Both parts are
7
+ * required; an empty color or empty content causes the parse to fail.
8
+ *
9
+ * Supported color formats:
10
+ * - 3-digit hex (e.g. `c00`) -- automatically prefixed with `#`
11
+ * - 6-digit hex (e.g. `cc0000`) -- automatically prefixed with `#`
12
+ * - Named CSS colors (e.g. `blue`, `red`)
13
+ * - CSS color functions (e.g. `rgb(255,0,0)`)
14
+ *
15
+ * Wikidot syntax examples:
16
+ * - `##c00|Apple##` -- red text reading "Apple"
17
+ * - `##blue|Ocean##` -- blue text reading "Ocean"
18
+ * - `##rgb(0,128,0)|Green text##` -- CSS function color
19
+ *
20
+ * Produces a `"color"` AST element with the resolved color value and
21
+ * nested inline elements.
22
+ *
23
+ * @module
24
+ */
25
+ import type { Element } from "@wdprlib/ast";
26
+ import type { InlineRule, ParseContext, RuleResult } from "../types";
27
+ import { hasClosingMarkerBeforeNewline } from "../types";
28
+ import { parseInlineUntil } from "./utils";
29
+
30
+ /**
31
+ * Inline rule for parsing `##color|text##` color formatting.
32
+ *
33
+ * Triggered by a `COLOR_MARKER` token (`##`). The rule collects the
34
+ * color specifier until a `PIPE` token, then recursively parses inline
35
+ * content until the closing `##`.
36
+ *
37
+ * Fails if:
38
+ * - No closing `##` is found on the same line
39
+ * - No pipe separator is present
40
+ * - The color specifier or content is empty
41
+ */
42
+ export const colorRule: InlineRule = {
43
+ name: "color",
44
+ startTokens: ["COLOR_MARKER"],
45
+
46
+ /**
47
+ * Attempts to parse color formatting at the current position.
48
+ *
49
+ * @param ctx - Parse context with token stream and current position
50
+ * @returns A successful result with a `"color"` element, or `{ success: false }`
51
+ */
52
+ parse(ctx: ParseContext): RuleResult<Element> {
53
+ // Check if closing marker exists
54
+ if (!hasClosingMarkerBeforeNewline({ ...ctx, pos: ctx.pos + 1 }, "COLOR_MARKER")) {
55
+ return { success: false };
56
+ }
57
+
58
+ let pos = ctx.pos + 1;
59
+ let consumed = 1; // ##
60
+
61
+ // Collect color specification until PIPE
62
+ let colorSpec = "";
63
+ while (pos < ctx.tokens.length) {
64
+ const token = ctx.tokens[pos];
65
+ if (
66
+ !token ||
67
+ token.type === "PIPE" ||
68
+ token.type === "COLOR_MARKER" ||
69
+ token.type === "NEWLINE" ||
70
+ token.type === "EOF"
71
+ ) {
72
+ break;
73
+ }
74
+ colorSpec += token.value;
75
+ pos++;
76
+ consumed++;
77
+ }
78
+
79
+ // Must have a PIPE separator
80
+ if (ctx.tokens[pos]?.type !== "PIPE") {
81
+ return { success: false };
82
+ }
83
+ pos++;
84
+ consumed++;
85
+
86
+ // Parse inline content until closing ##
87
+ const contentResult = parseInlineUntil({ ...ctx, pos }, "COLOR_MARKER");
88
+ pos += contentResult.consumed;
89
+ consumed += contentResult.consumed;
90
+
91
+ // Consume closing ##
92
+ if (ctx.tokens[pos]?.type === "COLOR_MARKER") {
93
+ pos++;
94
+ consumed++;
95
+ } else {
96
+ return { success: false };
97
+ }
98
+
99
+ const textChildren = contentResult.elements;
100
+
101
+ const trimmedColor = colorSpec.trim();
102
+
103
+ // Wikidot requires non-empty color spec and non-empty content
104
+ if (trimmedColor === "" || textChildren.length === 0) {
105
+ return { success: false };
106
+ }
107
+
108
+ return {
109
+ success: true,
110
+ elements: [
111
+ {
112
+ element: "color",
113
+ data: {
114
+ color: hexifyColor(trimmedColor),
115
+ elements: textChildren,
116
+ },
117
+ },
118
+ ],
119
+ consumed,
120
+ };
121
+ },
122
+ };
123
+
124
+ /**
125
+ * Normalizes shorthand hex color values by prepending a `#` sign.
126
+ *
127
+ * Wikidot allows users to write hex colors without the `#` prefix
128
+ * (e.g. `c00` or `ff0000`). This function detects 3- or 6-character
129
+ * hex strings and adds the prefix. Non-hex color values (named colors,
130
+ * CSS functions) are returned unchanged.
131
+ *
132
+ * @param color - The trimmed color string from the markup
133
+ * @returns The color string, with `#` prepended if it was a bare hex value
134
+ */
135
+ function hexifyColor(color: string): string {
136
+ if (/^[a-fA-F0-9]{3}$/.test(color) || /^[a-fA-F0-9]{6}$/.test(color)) {
137
+ return `#${color}`;
138
+ }
139
+ return color;
140
+ }
@@ -0,0 +1,90 @@
1
+ /**
2
+ *
3
+ * Parses the Wikidot comment syntax: `[!-- text --]`.
4
+ *
5
+ * Comments are completely removed from the rendered output. Any content
6
+ * between the opening `[!--` and closing `--]` markers is consumed and
7
+ * discarded. Comments may contain arbitrary text, including markup
8
+ * characters that would otherwise be interpreted.
9
+ *
10
+ * If the closing `--]` is never found (unterminated comment), the rule
11
+ * fails and the opening `[!--` falls through to other rules or the
12
+ * text fallback.
13
+ *
14
+ * Unlike HTML comments (`<!-- -->`), Wikidot comments use square
15
+ * brackets with exclamation marks.
16
+ *
17
+ * @module
18
+ */
19
+ import type { Element } from "@wdprlib/ast";
20
+ import type { InlineRule, ParseContext, RuleResult } from "../types";
21
+ import { currentToken } from "../types";
22
+
23
+ /**
24
+ * Inline rule for parsing `[!-- comment --]` syntax.
25
+ *
26
+ * Triggered by a `COMMENT_OPEN` token (`[!--`). Consumes all tokens
27
+ * until a `COMMENT_CLOSE` (`--]`) token is found, then returns an
28
+ * empty elements array (discarding the comment content).
29
+ *
30
+ * Comments may span across newlines. However, if an EOF is reached
31
+ * before the closing marker, the parse fails.
32
+ */
33
+ export const commentRule: InlineRule = {
34
+ name: "comment",
35
+ startTokens: ["COMMENT_OPEN"],
36
+
37
+ /**
38
+ * Attempts to parse a comment at the current position.
39
+ *
40
+ * @param ctx - Parse context with token stream and current position
41
+ * @returns A successful result with an empty elements array (comment discarded),
42
+ * or `{ success: false }` if the comment is unterminated
43
+ */
44
+ parse(ctx: ParseContext): RuleResult<Element> {
45
+ const openToken = currentToken(ctx);
46
+ let pos = ctx.pos + 1; // skip [!--
47
+ let consumed = 1;
48
+
49
+ // Consume all tokens until we find --]
50
+ while (pos < ctx.tokens.length) {
51
+ const token = ctx.tokens[pos];
52
+ if (!token) {
53
+ break;
54
+ }
55
+
56
+ if (token.type === "COMMENT_CLOSE") {
57
+ consumed++;
58
+ pos++;
59
+ // Return empty result - comment is discarded
60
+ return {
61
+ success: true,
62
+ elements: [],
63
+ consumed,
64
+ };
65
+ }
66
+
67
+ if (token.type === "EOF") {
68
+ // Unterminated comment - fail
69
+ ctx.diagnostics.push({
70
+ severity: "warning",
71
+ code: "unclosed-comment",
72
+ message: "Unterminated comment: missing closing --]",
73
+ position: openToken.position,
74
+ });
75
+ return { success: false };
76
+ }
77
+
78
+ pos++;
79
+ consumed++;
80
+ }
81
+
82
+ ctx.diagnostics.push({
83
+ severity: "warning",
84
+ code: "unclosed-comment",
85
+ message: "Unterminated comment: missing closing --]",
86
+ position: openToken.position,
87
+ });
88
+ return { success: false };
89
+ },
90
+ };
@@ -0,0 +1,115 @@
1
+ /**
2
+ *
3
+ * Parses the Wikidot equation reference syntax: `[[eref name]]`.
4
+ *
5
+ * An equation reference creates a clickable link that points to a
6
+ * named equation block defined elsewhere on the page (via
7
+ * `[[equation name]]` block syntax in the block-level parser). The
8
+ * reference is rendered as the equation's assigned number.
9
+ *
10
+ * Only the short form `eref` is recognized as a valid keyword.
11
+ * The long form `[[equation name]]` is NOT supported by Wikidot for
12
+ * inline references and is rendered as plain text.
13
+ *
14
+ * Produces an `"equation-reference"` AST element whose `data` field
15
+ * contains the reference name string.
16
+ *
17
+ * Wikidot syntax example:
18
+ * - `[[eref myEquation]]` -- references equation named "myEquation"
19
+ *
20
+ * @module
21
+ */
22
+ import type { Element } from "@wdprlib/ast";
23
+ import type { InlineRule, ParseContext, RuleResult } from "../types";
24
+ import { currentToken } from "../types";
25
+ import { parseBlockName } from "../utils";
26
+
27
+ /**
28
+ * Inline rule for parsing `[[eref name]]` equation references.
29
+ *
30
+ * Triggered by a `BLOCK_OPEN` (`[[`) token. The rule verifies the
31
+ * block name is `eref` (case-insensitive), then collects the
32
+ * reference name until the closing `]]`.
33
+ *
34
+ * Fails if the block name is not `eref`, the reference name is empty,
35
+ * or `]]` is not found.
36
+ */
37
+ export const equationRefRule: InlineRule = {
38
+ name: "equation-ref",
39
+ startTokens: ["BLOCK_OPEN"],
40
+
41
+ /**
42
+ * Attempts to parse an `[[eref name]]` reference at the current position.
43
+ *
44
+ * @param ctx - Parse context with token stream and current position
45
+ * @returns A successful result with an `"equation-reference"` element,
46
+ * or `{ success: false }`
47
+ */
48
+ parse(ctx: ParseContext): RuleResult<Element> {
49
+ const openToken = currentToken(ctx);
50
+ if (openToken.type !== "BLOCK_OPEN") {
51
+ return { success: false };
52
+ }
53
+
54
+ let pos = ctx.pos + 1;
55
+ let consumed = 1;
56
+
57
+ // Parse block name
58
+ const nameResult = parseBlockName(ctx, pos);
59
+ if (!nameResult) {
60
+ return { success: false };
61
+ }
62
+
63
+ const blockName = nameResult.name.toLowerCase();
64
+ // Only "eref" is supported in Wikidot
65
+ if (blockName !== "eref") {
66
+ return { success: false };
67
+ }
68
+
69
+ pos += nameResult.consumed;
70
+ consumed += nameResult.consumed;
71
+
72
+ // Skip whitespace
73
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
74
+ pos++;
75
+ consumed++;
76
+ }
77
+
78
+ // Parse reference name
79
+ let refName = "";
80
+ while (pos < ctx.tokens.length) {
81
+ const token = ctx.tokens[pos];
82
+ if (!token || token.type === "BLOCK_CLOSE" || token.type === "NEWLINE") {
83
+ break;
84
+ }
85
+ refName += token.value;
86
+ pos++;
87
+ consumed++;
88
+ }
89
+
90
+ // Expect ]]
91
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
92
+ return { success: false };
93
+ }
94
+ pos++;
95
+ consumed++;
96
+
97
+ refName = refName.trim();
98
+
99
+ // Empty reference is invalid
100
+ if (!refName) {
101
+ return { success: false };
102
+ }
103
+
104
+ return {
105
+ success: true,
106
+ elements: [
107
+ {
108
+ element: "equation-reference",
109
+ data: refName,
110
+ },
111
+ ],
112
+ consumed,
113
+ };
114
+ },
115
+ };