@wdprlib/parser 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/index.cjs +295 -118
  2. package/dist/index.js +272 -95
  3. package/package.json +5 -3
  4. package/src/index.ts +163 -0
  5. package/src/lexer/index.ts +20 -0
  6. package/src/lexer/lexer.ts +687 -0
  7. package/src/lexer/tokens.ts +141 -0
  8. package/src/parser/constants.ts +173 -0
  9. package/src/parser/depth.ts +251 -0
  10. package/src/parser/index.ts +18 -0
  11. package/src/parser/parse.ts +315 -0
  12. package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
  13. package/src/parser/postprocess/index.ts +15 -0
  14. package/src/parser/postprocess/spanStrip.ts +697 -0
  15. package/src/parser/preprocess/expr.ts +265 -0
  16. package/src/parser/preprocess/index.ts +38 -0
  17. package/src/parser/preprocess/typography.ts +67 -0
  18. package/src/parser/preprocess/utils.ts +250 -0
  19. package/src/parser/preprocess/whitespace.ts +111 -0
  20. package/src/parser/rules/block/align.ts +282 -0
  21. package/src/parser/rules/block/bibliography.ts +359 -0
  22. package/src/parser/rules/block/block-list.ts +689 -0
  23. package/src/parser/rules/block/blockquote.ts +238 -0
  24. package/src/parser/rules/block/center.ts +87 -0
  25. package/src/parser/rules/block/clear-float.ts +75 -0
  26. package/src/parser/rules/block/code.ts +187 -0
  27. package/src/parser/rules/block/collapsible.ts +337 -0
  28. package/src/parser/rules/block/comment.ts +73 -0
  29. package/src/parser/rules/block/content-separator.ts +79 -0
  30. package/src/parser/rules/block/definition-list.ts +270 -0
  31. package/src/parser/rules/block/div.ts +400 -0
  32. package/src/parser/rules/block/embed-block.ts +153 -0
  33. package/src/parser/rules/block/footnoteblock.ts +200 -0
  34. package/src/parser/rules/block/heading.ts +142 -0
  35. package/src/parser/rules/block/horizontal-rule.ts +61 -0
  36. package/src/parser/rules/block/html.ts +222 -0
  37. package/src/parser/rules/block/iframe.ts +239 -0
  38. package/src/parser/rules/block/iftags.ts +150 -0
  39. package/src/parser/rules/block/include.ts +179 -0
  40. package/src/parser/rules/block/index.ts +127 -0
  41. package/src/parser/rules/block/list.ts +244 -0
  42. package/src/parser/rules/block/math.ts +183 -0
  43. package/src/parser/rules/block/module/backlinks/index.ts +31 -0
  44. package/src/parser/rules/block/module/backlinks/types.ts +21 -0
  45. package/src/parser/rules/block/module/categories/index.ts +34 -0
  46. package/src/parser/rules/block/module/categories/types.ts +21 -0
  47. package/src/parser/rules/block/module/css/index.ts +37 -0
  48. package/src/parser/rules/block/module/iftags/condition.ts +109 -0
  49. package/src/parser/rules/block/module/iftags/index.ts +26 -0
  50. package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
  51. package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
  52. package/src/parser/rules/block/module/iftags/types.ts +63 -0
  53. package/src/parser/rules/block/module/include/index.ts +20 -0
  54. package/src/parser/rules/block/module/include/resolve.ts +556 -0
  55. package/src/parser/rules/block/module/index.ts +122 -0
  56. package/src/parser/rules/block/module/join/index.ts +34 -0
  57. package/src/parser/rules/block/module/join/types.ts +23 -0
  58. package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
  59. package/src/parser/rules/block/module/listpages/extract.ts +410 -0
  60. package/src/parser/rules/block/module/listpages/index.ts +83 -0
  61. package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
  62. package/src/parser/rules/block/module/listpages/parser.ts +106 -0
  63. package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
  64. package/src/parser/rules/block/module/listpages/types.ts +513 -0
  65. package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
  66. package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
  67. package/src/parser/rules/block/module/listusers/extract.ts +45 -0
  68. package/src/parser/rules/block/module/listusers/index.ts +36 -0
  69. package/src/parser/rules/block/module/listusers/parser.ts +54 -0
  70. package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
  71. package/src/parser/rules/block/module/listusers/types.ts +93 -0
  72. package/src/parser/rules/block/module/mapping.ts +61 -0
  73. package/src/parser/rules/block/module/page-tree/index.ts +38 -0
  74. package/src/parser/rules/block/module/page-tree/types.ts +29 -0
  75. package/src/parser/rules/block/module/rate/index.ts +28 -0
  76. package/src/parser/rules/block/module/rate/types.ts +19 -0
  77. package/src/parser/rules/block/module/resolve.ts +411 -0
  78. package/src/parser/rules/block/module/types-common.ts +59 -0
  79. package/src/parser/rules/block/module/types.ts +61 -0
  80. package/src/parser/rules/block/module/utils.ts +43 -0
  81. package/src/parser/rules/block/module/walk.ts +380 -0
  82. package/src/parser/rules/block/module.ts +164 -0
  83. package/src/parser/rules/block/orphan-li.ts +177 -0
  84. package/src/parser/rules/block/paragraph.ts +157 -0
  85. package/src/parser/rules/block/table-block.ts +726 -0
  86. package/src/parser/rules/block/table.ts +441 -0
  87. package/src/parser/rules/block/tabview.ts +331 -0
  88. package/src/parser/rules/block/toc.ts +129 -0
  89. package/src/parser/rules/block/utils.ts +615 -0
  90. package/src/parser/rules/index.ts +49 -0
  91. package/src/parser/rules/inline/anchor-name.ts +154 -0
  92. package/src/parser/rules/inline/anchor.ts +327 -0
  93. package/src/parser/rules/inline/bibcite.ts +153 -0
  94. package/src/parser/rules/inline/bold.ts +86 -0
  95. package/src/parser/rules/inline/color.ts +140 -0
  96. package/src/parser/rules/inline/comment.ts +90 -0
  97. package/src/parser/rules/inline/equation-ref.ts +115 -0
  98. package/src/parser/rules/inline/expr.ts +526 -0
  99. package/src/parser/rules/inline/footnote.ts +223 -0
  100. package/src/parser/rules/inline/guillemet.ts +64 -0
  101. package/src/parser/rules/inline/html.ts +132 -0
  102. package/src/parser/rules/inline/image.ts +328 -0
  103. package/src/parser/rules/inline/index.ts +150 -0
  104. package/src/parser/rules/inline/italic.ts +74 -0
  105. package/src/parser/rules/inline/line-break.ts +326 -0
  106. package/src/parser/rules/inline/link-anchor.ts +147 -0
  107. package/src/parser/rules/inline/link-single.ts +164 -0
  108. package/src/parser/rules/inline/link-star.ts +134 -0
  109. package/src/parser/rules/inline/link-triple.ts +267 -0
  110. package/src/parser/rules/inline/math-inline.ts +126 -0
  111. package/src/parser/rules/inline/monospace.ts +78 -0
  112. package/src/parser/rules/inline/raw.ts +262 -0
  113. package/src/parser/rules/inline/size.ts +244 -0
  114. package/src/parser/rules/inline/span.ts +424 -0
  115. package/src/parser/rules/inline/strikethrough.ts +115 -0
  116. package/src/parser/rules/inline/subscript.ts +84 -0
  117. package/src/parser/rules/inline/superscript.ts +84 -0
  118. package/src/parser/rules/inline/text.ts +84 -0
  119. package/src/parser/rules/inline/underline.ts +127 -0
  120. package/src/parser/rules/inline/user.ts +147 -0
  121. package/src/parser/rules/inline/utils.ts +344 -0
  122. package/src/parser/rules/types.ts +252 -0
  123. package/src/parser/rules/utils.ts +155 -0
  124. package/src/parser/toc.ts +130 -0
@@ -0,0 +1,615 @@
1
+ /**
2
+ *
3
+ * Shared utilities used by block-level parser rules.
4
+ *
5
+ * This module provides the core building blocks that most block rules
6
+ * depend on:
7
+ *
8
+ * - {@link canApplyBlockRule} -- fast pre-check for whether a rule's start
9
+ * tokens match the current token.
10
+ * - {@link parseBlocksUntil} -- the main block-level content parser that
11
+ * iterates rules until a close condition is met (used by div, collapsible,
12
+ * tabview, iftags, align, etc.).
13
+ * - {@link parseInlineContentUntil} -- similar to `parseBlocksUntil` but
14
+ * without paragraph wrapping, used for `div_` paragraph-strip mode.
15
+ * - {@link parseAttributes} / {@link parseAttributesRaw} -- attribute
16
+ * parsers for block opening tags (with and without safety filtering).
17
+ * - {@link createBlockEndCondition} -- factory for close-condition predicates.
18
+ *
19
+ * Re-exports {@link filterUnsafeAttributes} and {@link parseBlockName} from
20
+ * the shared `../utils` module for backward compatibility.
21
+ *
22
+ * @module
23
+ */
24
+ import type { Token } from "../../../lexer";
25
+ import type { Element } from "@wdprlib/ast";
26
+ import type { ParseContext, BlockRule } from "../types";
27
+ import { KNOWN_BLOCK_NAMES } from "../../constants";
28
+ import { canApplyInlineRule } from "../inline/utils";
29
+ import { filterUnsafeAttributes, parseBlockName } from "../utils";
30
+
31
+ /**
32
+ * Whether the BLOCK_OPEN / BLOCK_END_OPEN token at `pos` opens a block name
33
+ * that should *not* end the surrounding paragraph / inline run. Mirrors the
34
+ * logic used by `parseInlineUntil` so that paragraph-strip mode (`div_`)
35
+ * agrees with regular paragraph parsing about which block names are inline.
36
+ */
37
+ function isNonBoundaryBlockToken(ctx: ParseContext, pos: number): boolean {
38
+ const token = ctx.tokens[pos];
39
+ if (token?.type !== "BLOCK_OPEN" && token?.type !== "BLOCK_END_OPEN") {
40
+ return false;
41
+ }
42
+ const nameResult = parseBlockName(ctx, pos + 1);
43
+ if (nameResult === null) {
44
+ // `[[=]]` / `[[==]]` align markers tokenize as EQUALS, not TEXT/IDENTIFIER —
45
+ // those are real block boundaries.
46
+ if (ctx.tokens[pos + 1]?.type === "EQUALS") {
47
+ return false;
48
+ }
49
+ // `[[` followed by no recognizable identifier -- treat as inline.
50
+ return true;
51
+ }
52
+ if (ctx.scope.excludedBlockNames?.has(nameResult.name)) {
53
+ return true;
54
+ }
55
+ return !KNOWN_BLOCK_NAMES.has(nameResult.name);
56
+ }
57
+
58
+ // Re-export for backwards compatibility
59
+ export { filterUnsafeAttributes, parseBlockName } from "../utils";
60
+
61
+ /**
62
+ * Result of parsing a sequence of block-level content.
63
+ */
64
+ export interface BlockParseResult {
65
+ /** The parsed AST elements. */
66
+ elements: Element[];
67
+ /** Total number of tokens consumed from the stream. */
68
+ consumed: number;
69
+ }
70
+
71
+ /**
72
+ * Determines whether a block rule is eligible for the current token.
73
+ *
74
+ * A rule is eligible if:
75
+ * 1. The token is at line start (when `rule.requiresLineStart` is true).
76
+ * 2. The token's type is in the rule's `startTokens` list (or the list
77
+ * is empty, meaning the rule is a universal fallback).
78
+ *
79
+ * @param rule - The block rule to check.
80
+ * @param token - The current token.
81
+ * @returns `true` if the rule may be attempted.
82
+ */
83
+ export function canApplyBlockRule(rule: BlockRule, token: Token): boolean {
84
+ if (rule.requiresLineStart && !token.lineStart) {
85
+ return false;
86
+ }
87
+ if (rule.startTokens.length === 0) {
88
+ return true; // fallback rule
89
+ }
90
+ return rule.startTokens.includes(token.type);
91
+ }
92
+
93
+ /**
94
+ * Parses block-level elements from the token stream until a close
95
+ * condition is satisfied.
96
+ *
97
+ * This is the workhorse parser used by container blocks (div, collapsible,
98
+ * tabview, iftags, align, etc.) to parse their body content. It loops
99
+ * through tokens, trying each block rule in priority order, and falls back
100
+ * to the paragraph rule when nothing else matches.
101
+ *
102
+ * Whitespace and newline tokens between blocks are silently consumed.
103
+ * The close condition receives a ParseContext snapshot at the current
104
+ * position and should return `true` to stop parsing (the close tag
105
+ * itself is NOT consumed here -- the caller handles that).
106
+ *
107
+ * The close condition is also injected into `blockCloseCondition` on
108
+ * the context so that the paragraph parser can respect the enclosing
109
+ * block's boundary.
110
+ *
111
+ * @param ctx - Parse context positioned at the start of the body.
112
+ * @param closeCondition - Predicate that signals the end of the block body.
113
+ * @param options - Optional settings.
114
+ * @param options.excludedBlockNames - Block names that should be excluded
115
+ * from both rule dispatch and paragraph-boundary detection. The named
116
+ * rules are filtered out of `blockRules`, and the set is propagated to
117
+ * the inline parser via `ParseContext.excludedBlockNames` so that
118
+ * `BLOCK_OPEN` / `BLOCK_END_OPEN` tokens for these names do not trigger
119
+ * paragraph breaks.
120
+ * @returns Parsed elements and total tokens consumed.
121
+ */
122
+ export function parseBlocksUntil(
123
+ ctx: ParseContext,
124
+ closeCondition: (ctx: ParseContext) => boolean,
125
+ options?: { excludedBlockNames?: ReadonlySet<string> },
126
+ ): BlockParseResult {
127
+ const elements: Element[] = [];
128
+ let consumed = 0;
129
+ let pos = ctx.pos;
130
+
131
+ const excluded = options?.excludedBlockNames;
132
+ const blockRules = excluded
133
+ ? ctx.blockRules.filter((r) => !excluded.has(r.name))
134
+ : ctx.blockRules;
135
+ const { blockFallbackRule } = ctx;
136
+
137
+ while (pos < ctx.tokens.length) {
138
+ const token = ctx.tokens[pos];
139
+ if (!token || token.type === "EOF") {
140
+ break;
141
+ }
142
+
143
+ // Check close condition
144
+ const checkCtx: ParseContext = { ...ctx, pos };
145
+ if (closeCondition(checkCtx)) {
146
+ break;
147
+ }
148
+
149
+ // Skip whitespace
150
+ if (token.type === "WHITESPACE") {
151
+ pos++;
152
+ consumed++;
153
+ continue;
154
+ }
155
+
156
+ // Skip newlines
157
+ if (token.type === "NEWLINE") {
158
+ pos++;
159
+ consumed++;
160
+ continue;
161
+ }
162
+
163
+ // Try each block rule
164
+ let matched = false;
165
+ // Pass close condition and excluded names to context
166
+ const blockCtx: ParseContext = {
167
+ ...ctx,
168
+ pos,
169
+ blockRules,
170
+ scope: {
171
+ ...ctx.scope,
172
+ blockCloseCondition: closeCondition,
173
+ excludedBlockNames: excluded,
174
+ },
175
+ };
176
+
177
+ for (const rule of blockRules) {
178
+ if (canApplyBlockRule(rule, token)) {
179
+ const result = rule.parse(blockCtx);
180
+ if (result.success) {
181
+ elements.push(...result.elements);
182
+ consumed += result.consumed;
183
+ pos += result.consumed;
184
+ matched = true;
185
+ break;
186
+ }
187
+ }
188
+ }
189
+
190
+ if (!matched) {
191
+ // Fallback to paragraph
192
+ const result = blockFallbackRule.parse(blockCtx);
193
+ if (result.success && result.elements.length > 0) {
194
+ elements.push(...result.elements);
195
+ consumed += result.consumed;
196
+ pos += result.consumed;
197
+ } else {
198
+ // Skip token to avoid infinite loop
199
+ pos++;
200
+ consumed++;
201
+ }
202
+ }
203
+ }
204
+
205
+ return { elements, consumed };
206
+ }
207
+
208
+ /**
209
+ * Parses mixed inline/block content until a close condition is met,
210
+ * WITHOUT paragraph wrapping.
211
+ *
212
+ * This is used for `div_` (paragraph strip mode) where newlines become
213
+ * `<br />` elements rather than paragraph separators. Blank lines
214
+ * (multiple consecutive newlines) are collapsed into a single `<br />`.
215
+ *
216
+ * Block-level elements (nested div, collapsible, etc.) are mixed directly
217
+ * into the inline element stream. Newlines immediately before a BLOCK_OPEN
218
+ * or BLOCK_END_OPEN are silently consumed (no `<br />` generated).
219
+ *
220
+ * Trailing line-break elements are stripped from the result.
221
+ *
222
+ * @param ctx - Parse context positioned at the start of the body.
223
+ * @param closeCondition - Predicate that signals the end of the content.
224
+ * @returns Parsed elements and total tokens consumed.
225
+ */
226
+ export function parseInlineContentUntil(
227
+ ctx: ParseContext,
228
+ closeCondition: (ctx: ParseContext) => boolean,
229
+ ): BlockParseResult {
230
+ const elements: Element[] = [];
231
+ let consumed = 0;
232
+ let pos = ctx.pos;
233
+
234
+ const { blockRules, inlineRules } = ctx;
235
+
236
+ while (pos < ctx.tokens.length) {
237
+ const token = ctx.tokens[pos];
238
+ if (!token || token.type === "EOF") {
239
+ break;
240
+ }
241
+
242
+ // Check close condition
243
+ const checkCtx: ParseContext = { ...ctx, pos };
244
+ if (closeCondition(checkCtx)) {
245
+ break;
246
+ }
247
+
248
+ // Skip whitespace at beginning of lines (but not between words)
249
+ if (token.type === "WHITESPACE" && token.lineStart) {
250
+ pos++;
251
+ consumed++;
252
+ continue;
253
+ }
254
+
255
+ // Handle newlines - convert to line-breaks
256
+ // In paragraph strip mode, blank lines (double newline) become single line-break
257
+ // But newlines before block elements are not converted to line-breaks
258
+ if (token.type === "NEWLINE") {
259
+ pos++;
260
+ consumed++;
261
+ // Skip additional blank lines
262
+ while (ctx.tokens[pos]?.type === "NEWLINE") {
263
+ pos++;
264
+ consumed++;
265
+ }
266
+
267
+ // Check if next token starts a block element (BLOCK_OPEN, BLOCK_END_OPEN)
268
+ // If so, don't add line-break - the newline just separates text from block.
269
+ // But: excluded block names (e.g. nested collapsible inside div_) and
270
+ // unknown block names (e.g. `[[foo]]`) are treated as inline by the
271
+ // paragraph parser, so they must still produce a `<br />` here.
272
+ const nextToken = ctx.tokens[pos];
273
+ if (!nextToken || nextToken.type === "EOF") {
274
+ continue;
275
+ }
276
+ if (nextToken.type === "BLOCK_OPEN" || nextToken.type === "BLOCK_END_OPEN") {
277
+ const peekCtx: ParseContext = { ...ctx, pos };
278
+ if (!isNonBoundaryBlockToken(peekCtx, pos)) {
279
+ continue;
280
+ }
281
+ // Fall through and emit a line-break for inline-treated `[[name]]`.
282
+ }
283
+
284
+ // Otherwise, add line-break
285
+ elements.push({ element: "line-break" });
286
+ continue;
287
+ }
288
+
289
+ // Try block rules first (for nested div, collapsible, etc.)
290
+ // In paragraph strip mode, blocks are mixed into the inline stream
291
+ let matched = false;
292
+ const blockCtx: ParseContext = { ...ctx, pos };
293
+
294
+ for (const rule of blockRules) {
295
+ if (canApplyBlockRule(rule, token)) {
296
+ const result = rule.parse(blockCtx);
297
+ if (result.success) {
298
+ // Add block elements directly (mixed into inline stream)
299
+ elements.push(...result.elements);
300
+ consumed += result.consumed;
301
+ pos += result.consumed;
302
+ matched = true;
303
+ break;
304
+ }
305
+ }
306
+ }
307
+
308
+ if (matched) continue;
309
+
310
+ // Try each inline rule
311
+ const inlineCtx: ParseContext = { ...ctx, pos };
312
+
313
+ for (const rule of inlineRules) {
314
+ if (canApplyInlineRule(rule, token)) {
315
+ const result = rule.parse(inlineCtx);
316
+ if (result.success) {
317
+ elements.push(...result.elements);
318
+ consumed += result.consumed;
319
+ pos += result.consumed;
320
+ matched = true;
321
+ break;
322
+ }
323
+ }
324
+ }
325
+
326
+ if (!matched) {
327
+ // Fallback to text
328
+ elements.push({ element: "text", data: token.value });
329
+ consumed++;
330
+ pos++;
331
+ }
332
+ }
333
+
334
+ // Remove trailing line-breaks
335
+ while (elements.length > 0 && elements[elements.length - 1]?.element === "line-break") {
336
+ elements.pop();
337
+ }
338
+
339
+ return { elements, consumed };
340
+ }
341
+
342
+ /**
343
+ * Parses HTML-style attributes from block opening tags.
344
+ *
345
+ * Supports:
346
+ * - `name="value"` (quoted string)
347
+ * - `name=value` (unquoted single-token value)
348
+ * - `name` (boolean attribute, stored as `"true"`)
349
+ * - Hyphenated names like `data-paragraph` or `aria-label` (composed
350
+ * from TEXT `-` IDENTIFIER token sequences).
351
+ *
352
+ * Attribute names are lowercased (Wikidot is case-insensitive).
353
+ * The result is filtered through {@link filterUnsafeAttributes} to strip
354
+ * potentially dangerous attributes (e.g. `onload`, `onclick`).
355
+ *
356
+ * Stops at BLOCK_CLOSE, NEWLINE, or EOF.
357
+ *
358
+ * @param ctx - Parse context.
359
+ * @param startPos - Token index to begin scanning.
360
+ * @returns Parsed (filtered) attributes and total tokens consumed.
361
+ */
362
+ export function parseAttributes(
363
+ ctx: ParseContext,
364
+ startPos: number,
365
+ ): { attrs: Record<string, string>; consumed: number } {
366
+ const attrs: Record<string, string> = {};
367
+ let pos = startPos;
368
+ let consumed = 0;
369
+
370
+ while (pos < ctx.tokens.length) {
371
+ const token = ctx.tokens[pos];
372
+ if (
373
+ !token ||
374
+ token.type === "BLOCK_CLOSE" ||
375
+ token.type === "NEWLINE" ||
376
+ token.type === "EOF"
377
+ ) {
378
+ break;
379
+ }
380
+
381
+ // Skip whitespace
382
+ if (token.type === "WHITESPACE") {
383
+ pos++;
384
+ consumed++;
385
+ continue;
386
+ }
387
+
388
+ // Attribute name (TEXT or IDENTIFIER token)
389
+ // May include hyphens like "data-paragraph" which tokenizes as: IDENTIFIER "data", TEXT "-", IDENTIFIER "paragraph"
390
+ if (token.type === "TEXT" || token.type === "IDENTIFIER") {
391
+ let name = token.value;
392
+ pos++;
393
+ consumed++;
394
+
395
+ // Collect hyphenated parts (e.g., data-paragraph, aria-label)
396
+ while (
397
+ ctx.tokens[pos]?.type === "TEXT" &&
398
+ ctx.tokens[pos]?.value === "-" &&
399
+ (ctx.tokens[pos + 1]?.type === "IDENTIFIER" || ctx.tokens[pos + 1]?.type === "TEXT")
400
+ ) {
401
+ name += "-";
402
+ pos++;
403
+ consumed++;
404
+ name += ctx.tokens[pos]?.value ?? "";
405
+ pos++;
406
+ consumed++;
407
+ }
408
+
409
+ // Normalize attribute name to lowercase (Wikidot is case-insensitive)
410
+ name = name.toLowerCase();
411
+
412
+ // Check for =
413
+ const eqToken = ctx.tokens[pos];
414
+ if (eqToken?.type === "EQUALS") {
415
+ pos++;
416
+ consumed++;
417
+
418
+ // Get value (quoted string or text)
419
+ const valueToken = ctx.tokens[pos];
420
+ if (valueToken?.type === "QUOTED_STRING") {
421
+ // Remove quotes
422
+ let value = valueToken.value;
423
+ if (value.startsWith('"') && value.endsWith('"')) {
424
+ value = value.slice(1, -1);
425
+ }
426
+ attrs[name] = value;
427
+ pos++;
428
+ consumed++;
429
+ } else if (valueToken?.type === "TEXT" || valueToken?.type === "IDENTIFIER") {
430
+ attrs[name] = valueToken.value;
431
+ pos++;
432
+ consumed++;
433
+ }
434
+ } else {
435
+ // Boolean attribute
436
+ attrs[name] = "true";
437
+ }
438
+ } else {
439
+ // Unknown token, skip
440
+ pos++;
441
+ consumed++;
442
+ }
443
+ }
444
+
445
+ return { attrs: filterUnsafeAttributes(attrs), consumed };
446
+ }
447
+
448
+ /**
449
+ * Parses attributes from block opening tags WITHOUT safety filtering.
450
+ *
451
+ * Use this for block-specific parameters (like `type` on `[[code]]`) that
452
+ * are not emitted as HTML attributes and therefore do not need XSS
453
+ * protection. The parsing logic is identical to {@link parseAttributes}
454
+ * except the result is returned as-is.
455
+ *
456
+ * Hyphenated name handling is configurable because some contexts (e.g.
457
+ * code block with `data-src`) should treat hyphens as part of the name,
458
+ * while others should not.
459
+ *
460
+ * Also handles STRIKE_MARKER tokens (`--`) in attribute name positions,
461
+ * which can appear when a double hyphen is used in names like
462
+ * `data--something`.
463
+ *
464
+ * @param ctx - Parse context.
465
+ * @param startPos - Token index to begin scanning.
466
+ * @param hyphenatedNames - When `true` (default), hyphens are collected
467
+ * into the attribute name. When `false`, only
468
+ * the first segment before a hyphen is used.
469
+ * @returns Parsed (unfiltered) attributes and total tokens consumed.
470
+ */
471
+ export function parseAttributesRaw(
472
+ ctx: ParseContext,
473
+ startPos: number,
474
+ hyphenatedNames = true,
475
+ ): { attrs: Record<string, string>; consumed: number } {
476
+ const attrs: Record<string, string> = {};
477
+ let pos = startPos;
478
+ let consumed = 0;
479
+
480
+ while (pos < ctx.tokens.length) {
481
+ const token = ctx.tokens[pos];
482
+ if (
483
+ !token ||
484
+ token.type === "BLOCK_CLOSE" ||
485
+ token.type === "NEWLINE" ||
486
+ token.type === "EOF"
487
+ ) {
488
+ break;
489
+ }
490
+
491
+ if (token.type === "WHITESPACE") {
492
+ pos++;
493
+ consumed++;
494
+ continue;
495
+ }
496
+
497
+ if (token.type === "TEXT" || token.type === "IDENTIFIER") {
498
+ let name = token.value;
499
+ pos++;
500
+ consumed++;
501
+
502
+ // Handle hyphenated attribute names (e.g., data-paragraph, aria-label)
503
+ // When hyphenatedNames=true: collect full name (data-paragraph)
504
+ // When hyphenatedNames=false: skip hyphen parts, use first segment only (data)
505
+ // This prevents data-src from becoming separate "data" and "src" attributes
506
+ // Also handles STRIKE_MARKER (--) and multiple hyphens (----, etc.)
507
+ const isHyphen = (t: (typeof ctx.tokens)[0] | undefined) =>
508
+ (t?.type === "TEXT" && t.value === "-") || t?.type === "STRIKE_MARKER";
509
+ const isNamePart = (t: (typeof ctx.tokens)[0] | undefined) =>
510
+ t?.type === "IDENTIFIER" || t?.type === "TEXT";
511
+
512
+ while (isHyphen(ctx.tokens[pos])) {
513
+ // Skip consecutive hyphens first
514
+ while (isHyphen(ctx.tokens[pos])) {
515
+ if (hyphenatedNames) {
516
+ name += ctx.tokens[pos]?.value ?? "-";
517
+ }
518
+ pos++;
519
+ consumed++;
520
+ }
521
+ // Then check if followed by name part
522
+ if (isNamePart(ctx.tokens[pos])) {
523
+ if (hyphenatedNames) {
524
+ name += ctx.tokens[pos]?.value ?? "";
525
+ }
526
+ pos++;
527
+ consumed++;
528
+ } else {
529
+ // No name part after hyphens, stop
530
+ break;
531
+ }
532
+ }
533
+
534
+ // Normalize attribute name to lowercase (Wikidot is case-insensitive)
535
+ name = name.toLowerCase();
536
+
537
+ const eqToken = ctx.tokens[pos];
538
+ if (eqToken?.type === "EQUALS") {
539
+ pos++;
540
+ consumed++;
541
+
542
+ const valueToken = ctx.tokens[pos];
543
+ if (valueToken?.type === "QUOTED_STRING") {
544
+ let value = valueToken.value;
545
+ if (value.startsWith('"') && value.endsWith('"')) {
546
+ value = value.slice(1, -1);
547
+ }
548
+ attrs[name] = value;
549
+ pos++;
550
+ consumed++;
551
+ } else if (valueToken?.type === "TEXT" || valueToken?.type === "IDENTIFIER") {
552
+ attrs[name] = valueToken.value;
553
+ pos++;
554
+ consumed++;
555
+ }
556
+ } else {
557
+ attrs[name] = "true";
558
+ }
559
+ } else {
560
+ pos++;
561
+ consumed++;
562
+ }
563
+ }
564
+
565
+ return { attrs, consumed };
566
+ }
567
+
568
+ /**
569
+ * Creates a reusable close-condition function that matches block end tags
570
+ * (`[[/name]]`) for one or more block names.
571
+ *
572
+ * The returned function inspects the tokens at `ctx.pos` and returns both
573
+ * whether a match was found and how many tokens the closing tag occupies
574
+ * (including the optional trailing NEWLINE).
575
+ *
576
+ * @param blockNames - Array of block names to match (e.g. `["div"]`).
577
+ * @returns A function suitable for use as a `closeCondition` argument,
578
+ * returning `{ matched, consumed }`.
579
+ */
580
+ export function createBlockEndCondition(
581
+ blockNames: string[],
582
+ ): (ctx: ParseContext) => { matched: boolean; consumed: number } {
583
+ return (ctx: ParseContext) => {
584
+ const token = ctx.tokens[ctx.pos];
585
+ if (token?.type !== "BLOCK_END_OPEN") {
586
+ return { matched: false, consumed: 0 };
587
+ }
588
+
589
+ const nameResult = parseBlockName(ctx, ctx.pos + 1);
590
+ if (!nameResult) {
591
+ return { matched: false, consumed: 0 };
592
+ }
593
+
594
+ if (!blockNames.includes(nameResult.name)) {
595
+ return { matched: false, consumed: 0 };
596
+ }
597
+
598
+ // Calculate consumed: [[/ + name + ]]
599
+ let consumed = 1 + nameResult.consumed;
600
+
601
+ // Check for closing ]]
602
+ const closePos = ctx.pos + 1 + nameResult.consumed;
603
+ if (ctx.tokens[closePos]?.type === "BLOCK_CLOSE") {
604
+ consumed++;
605
+ }
606
+
607
+ // Check for trailing newline
608
+ const newlinePos = closePos + 1;
609
+ if (ctx.tokens[newlinePos]?.type === "NEWLINE") {
610
+ consumed++;
611
+ }
612
+
613
+ return { matched: true, consumed };
614
+ };
615
+ }
@@ -0,0 +1,49 @@
1
+ /**
2
+ *
3
+ * Aggregated exports for all parser rules (block and inline).
4
+ *
5
+ * The parser uses a rule-based architecture where each syntactic construct
6
+ * (heading, list, bold, link, etc.) is defined as a separate rule. Rules are
7
+ * organized into two categories:
8
+ *
9
+ * - **Block rules**: Match constructs that occupy one or more full lines
10
+ * (headings, lists, blockquotes, horizontal rules, paragraphs, etc.)
11
+ * - **Inline rules**: Match constructs within a line of text
12
+ * (bold, italic, links, raw/code spans, etc.)
13
+ *
14
+ * Each category also has a fallback rule that is used when no other rule matches,
15
+ * ensuring that all input is consumed.
16
+ *
17
+ * @module
18
+ */
19
+
20
+ // Types
21
+ export type { ParseContext, ScopeContext, RuleResult, BlockRule, InlineRule } from "./types";
22
+ export {
23
+ currentToken,
24
+ peekToken,
25
+ checkToken,
26
+ isAtEnd,
27
+ hasClosingMarkerBeforeNewline,
28
+ } from "./types";
29
+
30
+ // Block rules
31
+ export { blockRules, blockFallbackRule } from "./block";
32
+ export { headingRule, horizontalRuleRule, listRule, blockquoteRule, paragraphRule } from "./block"; // TODO: exportが足りているのか確認
33
+
34
+ // Inline rules
35
+ export { inlineRules, inlineFallbackRule } from "./inline";
36
+ export {
37
+ boldRule,
38
+ italicRule,
39
+ underlineRule,
40
+ strikethroughRule,
41
+ superscriptRule,
42
+ subscriptRule,
43
+ monospaceRule,
44
+ linkTripleRule,
45
+ linkSingleRule,
46
+ linkAnchorRule,
47
+ rawRule,
48
+ textRule,
49
+ } from "./inline"; // TODO: exportが足りているのか確認