@wdprlib/parser 3.2.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (433) hide show
  1. package/dist/index.cjs +10451 -8402
  2. package/dist/index.d.cts +313 -337
  3. package/dist/index.d.ts +313 -337
  4. package/dist/index.js +10438 -8389
  5. package/package.json +1 -1
  6. package/src/index.ts +7 -0
  7. package/src/lexer/anchor.ts +48 -0
  8. package/src/lexer/index.ts +3 -2
  9. package/src/lexer/lexer.ts +73 -559
  10. package/src/lexer/options.ts +19 -0
  11. package/src/lexer/punctuation.ts +70 -0
  12. package/src/lexer/quoted-string.ts +16 -0
  13. package/src/lexer/runs.ts +85 -0
  14. package/src/lexer/spacing-actions.ts +24 -0
  15. package/src/lexer/state.ts +103 -0
  16. package/src/lexer/syntax-actions.ts +80 -0
  17. package/src/lexer/text-actions.ts +41 -0
  18. package/src/lexer/token-actions.ts +136 -0
  19. package/src/lexer/token-factory.ts +62 -0
  20. package/src/lexer/tokenize.ts +18 -0
  21. package/src/parser/constants.ts +2 -0
  22. package/src/parser/depth/index.ts +111 -0
  23. package/src/parser/depth/stack.ts +82 -0
  24. package/src/parser/parse/block.ts +42 -0
  25. package/src/parser/parse/context.ts +26 -0
  26. package/src/parser/parse/footnotes.ts +25 -0
  27. package/src/parser/parse/index.ts +42 -0
  28. package/src/parser/parse/options.ts +34 -0
  29. package/src/parser/parse/parser.ts +79 -0
  30. package/src/parser/parse/plain-non-ascii.ts +129 -0
  31. package/src/parser/parse/result.ts +57 -0
  32. package/src/parser/parse/source.ts +11 -0
  33. package/src/parser/postprocess/divAdjacentParagraph.ts +1 -1
  34. package/src/parser/postprocess/spanStrip/clean-element.ts +168 -0
  35. package/src/parser/postprocess/spanStrip/cleanup.ts +25 -0
  36. package/src/parser/postprocess/spanStrip/empty-spans.ts +36 -0
  37. package/src/parser/postprocess/spanStrip/escaped.ts +78 -0
  38. package/src/parser/postprocess/spanStrip/factory.ts +23 -0
  39. package/src/parser/postprocess/spanStrip/index.ts +8 -0
  40. package/src/parser/postprocess/spanStrip/merge.ts +117 -0
  41. package/src/parser/postprocess/spanStrip/predicates.ts +59 -0
  42. package/src/parser/postprocess/spanStrip/split.ts +67 -0
  43. package/src/parser/preprocess/expr/chars.ts +15 -0
  44. package/src/parser/preprocess/expr/evaluate.ts +22 -0
  45. package/src/parser/preprocess/expr/index.ts +45 -0
  46. package/src/parser/preprocess/expr/kind.ts +19 -0
  47. package/src/parser/preprocess/expr/parse.ts +103 -0
  48. package/src/parser/preprocess/expr/scan.ts +34 -0
  49. package/src/parser/preprocess/expr/types.ts +14 -0
  50. package/src/parser/preprocess/typography.ts +70 -5
  51. package/src/parser/preprocess/utils/bracket-depths.ts +98 -0
  52. package/src/parser/preprocess/utils/index.ts +13 -0
  53. package/src/parser/preprocess/utils/raw-regions.ts +153 -0
  54. package/src/parser/preprocess/whitespace/detection.ts +39 -0
  55. package/src/parser/preprocess/whitespace/index.ts +79 -0
  56. package/src/parser/preprocess/whitespace/leading-spaces.ts +11 -0
  57. package/src/parser/preprocess/whitespace/patterns.ts +23 -0
  58. package/src/parser/rules/block/align/body.ts +46 -0
  59. package/src/parser/rules/block/align/element.ts +13 -0
  60. package/src/parser/rules/block/align/index.ts +90 -0
  61. package/src/parser/rules/block/align/syntax.ts +113 -0
  62. package/src/parser/rules/block/bibliography/body.ts +81 -0
  63. package/src/parser/rules/block/bibliography/entries.ts +49 -0
  64. package/src/parser/rules/block/bibliography/entry-content.ts +73 -0
  65. package/src/parser/rules/block/bibliography/entry-key.ts +83 -0
  66. package/src/parser/rules/block/bibliography/index.ts +90 -0
  67. package/src/parser/rules/block/bibliography/open.ts +53 -0
  68. package/src/parser/rules/block/block-list/bare-content.ts +105 -0
  69. package/src/parser/rules/block/block-list/bare-paragraph.ts +60 -0
  70. package/src/parser/rules/block/block-list/index.ts +51 -0
  71. package/src/parser/rules/block/block-list/item-content.ts +132 -0
  72. package/src/parser/rules/block/block-list/li-content.ts +107 -0
  73. package/src/parser/rules/block/block-list/li-item.ts +77 -0
  74. package/src/parser/rules/block/block-list/list-block.ts +100 -0
  75. package/src/parser/rules/block/block-list/open.ts +51 -0
  76. package/src/parser/rules/block/block-list/tags.ts +50 -0
  77. package/src/parser/rules/block/blockquote/build.ts +62 -0
  78. package/src/parser/rules/block/blockquote/index.ts +80 -0
  79. package/src/parser/rules/block/blockquote/line.ts +79 -0
  80. package/src/parser/rules/block/blockquote/lines.ts +39 -0
  81. package/src/parser/rules/block/{center.ts → center/index.ts} +7 -22
  82. package/src/parser/rules/block/center/open.ts +27 -0
  83. package/src/parser/rules/block/{clear-float.ts → clear-float/index.ts} +6 -30
  84. package/src/parser/rules/block/clear-float/syntax.ts +43 -0
  85. package/src/parser/rules/block/code/attributes.ts +30 -0
  86. package/src/parser/rules/block/code/content.ts +57 -0
  87. package/src/parser/rules/block/code/index.ts +100 -0
  88. package/src/parser/rules/block/collapsible/attributes.ts +95 -0
  89. package/src/parser/rules/block/collapsible/body.ts +69 -0
  90. package/src/parser/rules/block/collapsible/index.ts +117 -0
  91. package/src/parser/rules/block/collapsible/open.ts +51 -0
  92. package/src/parser/rules/block/collapsible/orphans.ts +31 -0
  93. package/src/parser/rules/block/collapsible/tags.ts +17 -0
  94. package/src/parser/rules/block/comment/consume.ts +37 -0
  95. package/src/parser/rules/block/{comment.ts → comment/index.ts} +12 -38
  96. package/src/parser/rules/block/{content-separator.ts → content-separator/index.ts} +5 -35
  97. package/src/parser/rules/block/content-separator/syntax.ts +33 -0
  98. package/src/parser/rules/block/definition-list/collect.ts +40 -0
  99. package/src/parser/rules/block/definition-list/index.ts +63 -0
  100. package/src/parser/rules/block/definition-list/item-key.ts +95 -0
  101. package/src/parser/rules/block/definition-list/item-value.ts +56 -0
  102. package/src/parser/rules/block/definition-list/items.ts +54 -0
  103. package/src/parser/rules/block/div/body.ts +41 -0
  104. package/src/parser/rules/block/div/close.ts +41 -0
  105. package/src/parser/rules/block/div/failed.ts +117 -0
  106. package/src/parser/rules/block/div/index.ts +112 -0
  107. package/src/parser/rules/block/div/nesting.ts +37 -0
  108. package/src/parser/rules/block/div/open.ts +59 -0
  109. package/src/parser/rules/block/div/paragraph-strip.ts +44 -0
  110. package/src/parser/rules/block/embed-block/content.ts +53 -0
  111. package/src/parser/rules/block/embed-block/index.ts +91 -0
  112. package/src/parser/rules/block/embed-block/open.ts +52 -0
  113. package/src/parser/rules/block/embed-block/tags.ts +5 -0
  114. package/src/parser/rules/block/footnoteblock/attributes.ts +73 -0
  115. package/src/parser/rules/block/footnoteblock/index.ts +82 -0
  116. package/src/parser/rules/block/footnoteblock/open.ts +53 -0
  117. package/src/parser/rules/block/heading/index.ts +87 -0
  118. package/src/parser/rules/block/heading/open.ts +50 -0
  119. package/src/parser/rules/block/heading/toc-text.ts +26 -0
  120. package/src/parser/rules/block/{horizontal-rule.ts → horizontal-rule/index.ts} +4 -21
  121. package/src/parser/rules/block/horizontal-rule/syntax.ts +21 -0
  122. package/src/parser/rules/block/html/body.ts +114 -0
  123. package/src/parser/rules/block/html/diagnostics.ts +11 -0
  124. package/src/parser/rules/block/html/index.ts +95 -0
  125. package/src/parser/rules/block/html/open.ts +36 -0
  126. package/src/parser/rules/block/iframe/attributes.ts +106 -0
  127. package/src/parser/rules/block/iframe/index.ts +73 -0
  128. package/src/parser/rules/block/iframe/open.ts +58 -0
  129. package/src/parser/rules/block/iframe/source.ts +24 -0
  130. package/src/parser/rules/block/iframe/url.ts +38 -0
  131. package/src/parser/rules/block/iftags/body.ts +48 -0
  132. package/src/parser/rules/block/iftags/condition.ts +24 -0
  133. package/src/parser/rules/block/{iftags.ts → iftags/index.ts} +16 -58
  134. package/src/parser/rules/block/include/arguments.ts +48 -0
  135. package/src/parser/rules/block/include/index.ts +75 -0
  136. package/src/parser/rules/block/include/location.ts +24 -0
  137. package/src/parser/rules/block/include/variables.ts +37 -0
  138. package/src/parser/rules/block/list/index.ts +73 -0
  139. package/src/parser/rules/block/list/line.ts +77 -0
  140. package/src/parser/rules/block/list/native.ts +89 -0
  141. package/src/parser/rules/block/math/content.ts +54 -0
  142. package/src/parser/rules/block/math/index.ts +106 -0
  143. package/src/parser/rules/block/math/name.ts +35 -0
  144. package/src/parser/rules/block/module/body.ts +92 -0
  145. package/src/parser/rules/block/module/element.ts +33 -0
  146. package/src/parser/rules/block/module/include/directive.ts +91 -0
  147. package/src/parser/rules/block/module/include/index.ts +11 -2
  148. package/src/parser/rules/block/module/include/references.ts +42 -0
  149. package/src/parser/rules/block/module/include/resolve/cache.ts +44 -0
  150. package/src/parser/rules/block/module/include/resolve/index.ts +106 -0
  151. package/src/parser/rules/block/module/include/resolve/iterate.ts +202 -0
  152. package/src/parser/rules/block/module/include/resolve/replace.ts +31 -0
  153. package/src/parser/rules/block/module/include/resolve/types.ts +105 -0
  154. package/src/parser/rules/block/module/include/scanner.ts +121 -0
  155. package/src/parser/rules/block/module/index.ts +14 -2
  156. package/src/parser/rules/block/module/listpages/compiler.ts +12 -392
  157. package/src/parser/rules/block/module/listpages/extract.ts +25 -359
  158. package/src/parser/rules/block/module/listpages/extraction/listpages.ts +42 -0
  159. package/src/parser/rules/block/module/listpages/extraction/listusers.ts +30 -0
  160. package/src/parser/rules/block/module/listpages/extraction/query.ts +51 -0
  161. package/src/parser/rules/block/module/listpages/extraction/result.ts +18 -0
  162. package/src/parser/rules/block/module/listpages/extraction/template.ts +96 -0
  163. package/src/parser/rules/block/module/listpages/extraction/variables.ts +58 -0
  164. package/src/parser/rules/block/module/listpages/normalization/date-selector.ts +53 -0
  165. package/src/parser/rules/block/module/listpages/normalization/numeric-selector.ts +32 -0
  166. package/src/parser/rules/block/module/listpages/normalization/order-parent.ts +82 -0
  167. package/src/parser/rules/block/module/listpages/normalization/selectors.ts +2 -0
  168. package/src/parser/rules/block/module/listpages/normalization/tags-category.ts +86 -0
  169. package/src/parser/rules/block/module/listpages/normalize.ts +8 -324
  170. package/src/parser/rules/block/module/listpages/resolution/items.ts +43 -0
  171. package/src/parser/rules/block/module/listpages/resolution/wrapper.ts +42 -0
  172. package/src/parser/rules/block/module/listpages/resolve.ts +5 -75
  173. package/src/parser/rules/block/module/listpages/template/format/content.ts +41 -0
  174. package/src/parser/rules/block/module/listpages/template/format/date.ts +116 -0
  175. package/src/parser/rules/block/module/listpages/template/format/index.ts +4 -0
  176. package/src/parser/rules/block/module/listpages/template/format/tags.ts +7 -0
  177. package/src/parser/rules/block/module/listpages/template/format/user.ts +9 -0
  178. package/src/parser/rules/block/module/listpages/template/getters/index.ts +36 -0
  179. package/src/parser/rules/block/module/listpages/template/getters/parameterized.ts +60 -0
  180. package/src/parser/rules/block/module/listpages/template/getters/simple.ts +65 -0
  181. package/src/parser/rules/block/module/listpages/template/getters/types.ts +3 -0
  182. package/src/parser/rules/block/module/listpages/template/syntax.ts +97 -0
  183. package/src/parser/rules/block/module/listpages/types/data-fetcher.ts +15 -0
  184. package/src/parser/rules/block/module/listpages/types/data-requirements.ts +52 -0
  185. package/src/parser/rules/block/module/listpages/types/external-data.ts +77 -0
  186. package/src/parser/rules/block/module/listpages/types/index.ts +17 -0
  187. package/src/parser/rules/block/module/listpages/types/normalized-query.ts +120 -0
  188. package/src/parser/rules/block/module/listpages/types/query.ts +67 -0
  189. package/src/parser/rules/block/module/listpages/types/template.ts +17 -0
  190. package/src/parser/rules/block/module/listpages/types/variables.ts +69 -0
  191. package/src/parser/rules/block/module/listpages/url-resolution/fields.ts +48 -0
  192. package/src/parser/rules/block/module/listpages/url-resolution/params.ts +19 -0
  193. package/src/parser/rules/block/module/listpages/url-resolution/query.ts +24 -0
  194. package/src/parser/rules/block/module/listpages/url-resolution/resolve.ts +53 -0
  195. package/src/parser/rules/block/module/listpages/url-resolution/value.ts +25 -0
  196. package/src/parser/rules/block/module/listpages/url-resolver.ts +3 -160
  197. package/src/parser/rules/block/module/listusers/compiler.ts +4 -25
  198. package/src/parser/rules/block/module/listusers/extract.ts +4 -9
  199. package/src/parser/rules/block/module/listusers/getters.ts +21 -0
  200. package/src/parser/rules/block/module/listusers/variables.ts +15 -0
  201. package/src/parser/rules/block/module/open.ts +57 -0
  202. package/src/parser/rules/block/module/resolution/contexts.ts +78 -0
  203. package/src/parser/rules/block/module/resolution/data-maps.ts +39 -0
  204. package/src/parser/rules/block/module/resolution/dynamic-modules.ts +93 -0
  205. package/src/parser/rules/block/module/resolution/styles.ts +53 -0
  206. package/src/parser/rules/block/module/resolution/walk-resolve.ts +107 -0
  207. package/src/parser/rules/block/module/resolve.ts +79 -292
  208. package/src/parser/rules/block/module/rule.ts +56 -0
  209. package/src/parser/rules/block/module/types-common.ts +11 -0
  210. package/src/parser/rules/block/module/walk/children.ts +35 -0
  211. package/src/parser/rules/block/module/walk/index.ts +9 -0
  212. package/src/parser/rules/block/module/walk/map/index.ts +2 -0
  213. package/src/parser/rules/block/module/walk/map/stateful-definition-list.ts +25 -0
  214. package/src/parser/rules/block/module/walk/map/stateful-list.ts +40 -0
  215. package/src/parser/rules/block/module/walk/map/stateful-table.ts +23 -0
  216. package/src/parser/rules/block/module/walk/map/stateful-tabs.ts +19 -0
  217. package/src/parser/rules/block/module/walk/map/stateful.ts +71 -0
  218. package/src/parser/rules/block/module/walk/map/stateless-definition-list.ts +12 -0
  219. package/src/parser/rules/block/module/walk/map/stateless-list.ts +29 -0
  220. package/src/parser/rules/block/module/walk/map/stateless-table.ts +11 -0
  221. package/src/parser/rules/block/module/walk/map/stateless-tabs.ts +5 -0
  222. package/src/parser/rules/block/module/walk/map/stateless.ts +51 -0
  223. package/src/parser/rules/block/module/walk/map/types.ts +6 -0
  224. package/src/parser/rules/block/module/walk/traverse.ts +65 -0
  225. package/src/parser/rules/block/orphan-li/content.ts +60 -0
  226. package/src/parser/rules/block/orphan-li/index.ts +75 -0
  227. package/src/parser/rules/block/orphan-li/open.ts +25 -0
  228. package/src/parser/rules/block/orphan-li/tags.ts +40 -0
  229. package/src/parser/rules/block/paragraph/content.ts +12 -0
  230. package/src/parser/rules/block/paragraph/index.ts +60 -0
  231. package/src/parser/rules/block/paragraph/normalize.ts +52 -0
  232. package/src/parser/rules/block/paragraph/span-markers.ts +52 -0
  233. package/src/parser/rules/block/parsing/attributes/index.ts +32 -0
  234. package/src/parser/rules/block/parsing/attributes/names.ts +93 -0
  235. package/src/parser/rules/block/parsing/attributes/scanner.ts +75 -0
  236. package/src/parser/rules/block/parsing/attributes/values.ts +26 -0
  237. package/src/parser/rules/block/parsing/block-item.ts +29 -0
  238. package/src/parser/rules/block/parsing/content.ts +127 -0
  239. package/src/parser/rules/block/parsing/end-condition.ts +51 -0
  240. package/src/parser/rules/block/parsing/inline-content.ts +105 -0
  241. package/src/parser/rules/block/parsing/inline-newline.ts +41 -0
  242. package/src/parser/rules/block/parsing/non-boundary.ts +24 -0
  243. package/src/parser/rules/block/parsing/rule-dispatch.ts +44 -0
  244. package/src/parser/rules/block/table/index.ts +80 -0
  245. package/src/parser/rules/block/table/pipe/cell-start.ts +69 -0
  246. package/src/parser/rules/block/table/pipe/cell.ts +106 -0
  247. package/src/parser/rules/block/table/pipe/index.ts +2 -0
  248. package/src/parser/rules/block/table/pipe/row.ts +88 -0
  249. package/src/parser/rules/block/table/pipe/tokens.ts +14 -0
  250. package/src/parser/rules/block/table/pipe/trim.ts +50 -0
  251. package/src/parser/rules/block/table-block/body.ts +79 -0
  252. package/src/parser/rules/block/table-block/cell-attributes.ts +33 -0
  253. package/src/parser/rules/block/table-block/cell-boundary.ts +99 -0
  254. package/src/parser/rules/block/table-block/cell-content/index.ts +88 -0
  255. package/src/parser/rules/block/table-block/cell-content/segments.ts +134 -0
  256. package/src/parser/rules/block/table-block/cell-newline.ts +47 -0
  257. package/src/parser/rules/block/table-block/cell.ts +64 -0
  258. package/src/parser/rules/block/table-block/index.ts +113 -0
  259. package/src/parser/rules/block/table-block/row-boundary.ts +75 -0
  260. package/src/parser/rules/block/table-block/structure.ts +80 -0
  261. package/src/parser/rules/block/tabview/body.ts +64 -0
  262. package/src/parser/rules/block/tabview/index.ts +90 -0
  263. package/src/parser/rules/block/tabview/open.ts +50 -0
  264. package/src/parser/rules/block/tabview/tab.ts +92 -0
  265. package/src/parser/rules/block/tabview/tags.ts +30 -0
  266. package/src/parser/rules/block/toc/element.ts +11 -0
  267. package/src/parser/rules/block/toc/index.ts +44 -0
  268. package/src/parser/rules/block/toc/open.ts +84 -0
  269. package/src/parser/rules/block/utils.ts +10 -610
  270. package/src/parser/rules/{utils.ts → common/attribute-safety.ts} +3 -49
  271. package/src/parser/rules/common/block-name.ts +33 -0
  272. package/src/parser/rules/common/index.ts +2 -0
  273. package/src/parser/rules/contracts/index.ts +3 -0
  274. package/src/parser/rules/contracts/parse-context.ts +38 -0
  275. package/src/parser/rules/contracts/rule.ts +43 -0
  276. package/src/parser/rules/contracts/scope.ts +31 -0
  277. package/src/parser/rules/inline/anchor/attributes.ts +54 -0
  278. package/src/parser/rules/inline/anchor/child.ts +26 -0
  279. package/src/parser/rules/inline/anchor/close.ts +34 -0
  280. package/src/parser/rules/inline/anchor/content.ts +59 -0
  281. package/src/parser/rules/inline/anchor/index.ts +103 -0
  282. package/src/parser/rules/inline/anchor/newline.ts +26 -0
  283. package/src/parser/rules/inline/anchor/open.ts +47 -0
  284. package/src/parser/rules/inline/anchor/paragraph-strip.ts +14 -0
  285. package/src/parser/rules/inline/anchor/syntax.ts +40 -0
  286. package/src/parser/rules/inline/anchor-name/index.ts +38 -0
  287. package/src/parser/rules/inline/anchor-name/name.ts +39 -0
  288. package/src/parser/rules/inline/anchor-name/syntax.ts +46 -0
  289. package/src/parser/rules/inline/bibcite/element.ts +14 -0
  290. package/src/parser/rules/inline/bibcite/index.ts +34 -0
  291. package/src/parser/rules/inline/bibcite/syntax.ts +64 -0
  292. package/src/parser/rules/inline/bold.ts +2 -39
  293. package/src/parser/rules/inline/color/index.ts +35 -0
  294. package/src/parser/rules/inline/color/syntax.ts +69 -0
  295. package/src/parser/rules/inline/comment/consume.ts +31 -0
  296. package/src/parser/rules/inline/{comment.ts → comment/index.ts} +10 -36
  297. package/src/parser/rules/inline/equation-ref/element.ts +8 -0
  298. package/src/parser/rules/inline/equation-ref/index.ts +34 -0
  299. package/src/parser/rules/inline/equation-ref/syntax.ts +45 -0
  300. package/src/parser/rules/inline/expr/branch.ts +104 -0
  301. package/src/parser/rules/inline/expr/conditional-branch.ts +27 -0
  302. package/src/parser/rules/inline/expr/conditional.ts +80 -0
  303. package/src/parser/rules/inline/expr/depth.ts +25 -0
  304. package/src/parser/rules/inline/expr/elements.ts +39 -0
  305. package/src/parser/rules/inline/expr/index.ts +84 -0
  306. package/src/parser/rules/inline/expr/syntax.ts +45 -0
  307. package/src/parser/rules/inline/footnote/child.ts +22 -0
  308. package/src/parser/rules/inline/footnote/close.ts +33 -0
  309. package/src/parser/rules/inline/footnote/content.ts +54 -0
  310. package/src/parser/rules/inline/footnote/elements.ts +38 -0
  311. package/src/parser/rules/inline/footnote/index.ts +54 -0
  312. package/src/parser/rules/inline/footnote/newline.ts +27 -0
  313. package/src/parser/rules/inline/footnote/open.ts +38 -0
  314. package/src/parser/rules/inline/formatting/container.ts +50 -0
  315. package/src/parser/rules/inline/{guillemet.ts → guillemet/index.ts} +5 -13
  316. package/src/parser/rules/inline/guillemet/text.ts +11 -0
  317. package/src/parser/rules/inline/html/gate.ts +64 -0
  318. package/src/parser/rules/inline/{html.ts → html/index.ts} +9 -60
  319. package/src/parser/rules/inline/html/open.ts +37 -0
  320. package/src/parser/rules/inline/image/attributes.ts +22 -0
  321. package/src/parser/rules/inline/image/body.ts +36 -0
  322. package/src/parser/rules/inline/image/index.ts +89 -0
  323. package/src/parser/rules/inline/image/open.ts +56 -0
  324. package/src/parser/rules/inline/image/source.ts +62 -0
  325. package/src/parser/rules/inline/image/syntax.ts +76 -0
  326. package/src/parser/rules/inline/italic.ts +2 -30
  327. package/src/parser/rules/inline/line-break/backslash.ts +58 -0
  328. package/src/parser/rules/inline/line-break/elements.ts +9 -0
  329. package/src/parser/rules/inline/line-break/index.ts +3 -0
  330. package/src/parser/rules/inline/line-break/newline.ts +82 -0
  331. package/src/parser/rules/inline/line-break/underscore.ts +45 -0
  332. package/src/parser/rules/inline/link-anchor.ts +6 -81
  333. package/src/parser/rules/inline/link-bracket/anchor.ts +3 -0
  334. package/src/parser/rules/inline/link-bracket/direct-url.ts +5 -0
  335. package/src/parser/rules/inline/link-bracket/parsed.ts +81 -0
  336. package/src/parser/rules/inline/link-bracket/parts.ts +64 -0
  337. package/src/parser/rules/inline/link-bracket/prefix.ts +15 -0
  338. package/src/parser/rules/inline/link-single.ts +7 -98
  339. package/src/parser/rules/inline/link-star.ts +7 -69
  340. package/src/parser/rules/inline/link-triple/fallback.ts +10 -0
  341. package/src/parser/rules/inline/link-triple/index.ts +62 -0
  342. package/src/parser/rules/inline/link-triple/interwiki.ts +11 -0
  343. package/src/parser/rules/inline/link-triple/label.ts +35 -0
  344. package/src/parser/rules/inline/link-triple/syntax.ts +72 -0
  345. package/src/parser/rules/inline/link-triple/target.ts +36 -0
  346. package/src/parser/rules/inline/math-inline/index.ts +40 -0
  347. package/src/parser/rules/inline/math-inline/syntax.ts +55 -0
  348. package/src/parser/rules/inline/monospace.ts +2 -30
  349. package/src/parser/rules/inline/parsing/block-boundary.ts +42 -0
  350. package/src/parser/rules/inline/parsing/block-start-predicates.ts +117 -0
  351. package/src/parser/rules/inline/parsing/collect.ts +23 -0
  352. package/src/parser/rules/inline/parsing/inline-content.ts +115 -0
  353. package/src/parser/rules/inline/parsing/paragraph-boundary.ts +47 -0
  354. package/src/parser/rules/inline/parsing/plain-text.ts +69 -0
  355. package/src/parser/rules/inline/parsing/preserved-line-break.ts +11 -0
  356. package/src/parser/rules/inline/parsing/rules.ts +34 -0
  357. package/src/parser/rules/inline/parsing/simple-token.ts +26 -0
  358. package/src/parser/rules/inline/raw/angle.ts +40 -0
  359. package/src/parser/rules/inline/raw/double-at.ts +78 -0
  360. package/src/parser/rules/inline/raw/index.ts +26 -0
  361. package/src/parser/rules/inline/raw/result.ts +26 -0
  362. package/src/parser/rules/inline/size/content.ts +65 -0
  363. package/src/parser/rules/inline/size/index.ts +55 -0
  364. package/src/parser/rules/inline/size/open.ts +43 -0
  365. package/src/parser/rules/inline/size/value.ts +45 -0
  366. package/src/parser/rules/inline/span/content.ts +97 -0
  367. package/src/parser/rules/inline/span/elements.ts +108 -0
  368. package/src/parser/rules/inline/span/index.ts +79 -0
  369. package/src/parser/rules/inline/span/newline.ts +50 -0
  370. package/src/parser/rules/inline/span/syntax.ts +70 -0
  371. package/src/parser/rules/inline/{strikethrough.ts → strikethrough/index.ts} +5 -60
  372. package/src/parser/rules/inline/strikethrough/parse.ts +14 -0
  373. package/src/parser/rules/inline/strikethrough/syntax.ts +24 -0
  374. package/src/parser/rules/inline/subscript.ts +2 -39
  375. package/src/parser/rules/inline/superscript.ts +4 -39
  376. package/src/parser/rules/inline/text/element.ts +5 -0
  377. package/src/parser/rules/inline/{text.ts → text/index.ts} +5 -4
  378. package/src/parser/rules/inline/underline/child.ts +26 -0
  379. package/src/parser/rules/inline/underline/content.ts +29 -0
  380. package/src/parser/rules/inline/{underline.ts → underline/index.ts} +6 -49
  381. package/src/parser/rules/inline/user/element.ts +11 -0
  382. package/src/parser/rules/inline/user/index.ts +34 -0
  383. package/src/parser/rules/inline/user/syntax.ts +67 -0
  384. package/src/parser/rules/inline/utils.ts +4 -344
  385. package/src/parser/rules/tokens.ts +106 -0
  386. package/src/parser/rules/types.ts +9 -252
  387. package/src/parser/depth.ts +0 -251
  388. package/src/parser/parse.ts +0 -315
  389. package/src/parser/postprocess/spanStrip.ts +0 -697
  390. package/src/parser/preprocess/expr.ts +0 -265
  391. package/src/parser/preprocess/utils.ts +0 -250
  392. package/src/parser/preprocess/whitespace.ts +0 -111
  393. package/src/parser/rules/block/align.ts +0 -282
  394. package/src/parser/rules/block/bibliography.ts +0 -359
  395. package/src/parser/rules/block/block-list.ts +0 -689
  396. package/src/parser/rules/block/blockquote.ts +0 -238
  397. package/src/parser/rules/block/code.ts +0 -187
  398. package/src/parser/rules/block/collapsible.ts +0 -337
  399. package/src/parser/rules/block/definition-list.ts +0 -270
  400. package/src/parser/rules/block/div.ts +0 -400
  401. package/src/parser/rules/block/embed-block.ts +0 -153
  402. package/src/parser/rules/block/footnoteblock.ts +0 -200
  403. package/src/parser/rules/block/heading.ts +0 -142
  404. package/src/parser/rules/block/html.ts +0 -222
  405. package/src/parser/rules/block/iframe.ts +0 -239
  406. package/src/parser/rules/block/include.ts +0 -179
  407. package/src/parser/rules/block/list.ts +0 -244
  408. package/src/parser/rules/block/math.ts +0 -183
  409. package/src/parser/rules/block/module/include/resolve.ts +0 -556
  410. package/src/parser/rules/block/module/listpages/types.ts +0 -513
  411. package/src/parser/rules/block/module/walk.ts +0 -380
  412. package/src/parser/rules/block/module.ts +0 -164
  413. package/src/parser/rules/block/orphan-li.ts +0 -177
  414. package/src/parser/rules/block/paragraph.ts +0 -157
  415. package/src/parser/rules/block/table-block.ts +0 -726
  416. package/src/parser/rules/block/table.ts +0 -441
  417. package/src/parser/rules/block/tabview.ts +0 -331
  418. package/src/parser/rules/block/toc.ts +0 -129
  419. package/src/parser/rules/inline/anchor-name.ts +0 -154
  420. package/src/parser/rules/inline/anchor.ts +0 -327
  421. package/src/parser/rules/inline/bibcite.ts +0 -153
  422. package/src/parser/rules/inline/color.ts +0 -140
  423. package/src/parser/rules/inline/equation-ref.ts +0 -115
  424. package/src/parser/rules/inline/expr.ts +0 -526
  425. package/src/parser/rules/inline/footnote.ts +0 -223
  426. package/src/parser/rules/inline/image.ts +0 -328
  427. package/src/parser/rules/inline/line-break.ts +0 -326
  428. package/src/parser/rules/inline/link-triple.ts +0 -267
  429. package/src/parser/rules/inline/math-inline.ts +0 -126
  430. package/src/parser/rules/inline/raw.ts +0 -262
  431. package/src/parser/rules/inline/size.ts +0 -244
  432. package/src/parser/rules/inline/span.ts +0 -424
  433. package/src/parser/rules/inline/user.ts +0 -147
@@ -1,697 +0,0 @@
1
- /**
2
- *
3
- * Post-processing pass for paragraph merging and cleanup in the parsed AST.
4
- *
5
- * This module handles two related Wikidot behaviors:
6
- *
7
- * 1. **Paragraph strip (`span_`)**: In Wikidot, the `[[span_]]` inline element
8
- * removes paragraph boundaries around it. When a paragraph contains a `span_`
9
- * marker, it becomes a "merge anchor" that absorbs adjacent paragraphs. The
10
- * merged content is unwrapped (no `<p>` tag), matching Wikidot's rendering.
11
- * Content after a blank line inside `span_` becomes "escaped" and is placed
12
- * outside the merged paragraph.
13
- *
14
- * 2. **Empty `[[#expr ]]` paragraph splitting**: An `[[#expr ]]` element with
15
- * an empty expression acts as a paragraph break, splitting the containing
16
- * paragraph into separate paragraphs.
17
- *
18
- * Additionally, this module recursively cleans internal flags (`_paragraphStrip`,
19
- * `_emptyParagraphStrip`, `_escapedFromParagraph`, `_splitByBlankLine`) from AST
20
- * elements. These flags are used during parsing as inter-pass communication and
21
- * must not appear in the final output.
22
- *
23
- * @module
24
- */
25
- import type { Element, ContainerData, ExprData } from "@wdprlib/ast";
26
-
27
- /**
28
- * Check if an element is a container with a specific container type.
29
- *
30
- * @param el - The element to check
31
- * @param type - The container type to match (e.g., "paragraph", "span", "div")
32
- * @returns true if the element is a container of the specified type
33
- */
34
- function isContainer(el: Element, type: string): boolean {
35
- if (el.element !== "container") return false;
36
- const data = el.data as ContainerData;
37
- return data.type === type;
38
- }
39
-
40
- /**
41
- * Extract ContainerData from an element, if it is a container.
42
- *
43
- * @param el - The element to extract data from
44
- * @returns The container's data, or null if the element is not a container
45
- */
46
- function getContainerData(el: Element): ContainerData | null {
47
- if (el.element !== "container") return null;
48
- return el.data as ContainerData;
49
- }
50
-
51
- /**
52
- * Check if an element is a `span_` marker (paragraph strip indicator).
53
- *
54
- * During parsing, `[[span_]]` elements are annotated with `_paragraphStrip`
55
- * or `_emptyParagraphStrip` internal flags. This function detects those markers
56
- * so the post-processor can merge adjacent paragraphs.
57
- *
58
- * @param el - The element to check (may be undefined for boundary checks)
59
- * @returns true if the element is a span with a paragraph-strip flag
60
- */
61
- function isSpanStripMarker(el: Element | undefined): boolean {
62
- if (!el || el.element !== "container") return false;
63
- const data = el.data as ContainerData & {
64
- _paragraphStrip?: boolean;
65
- _emptyParagraphStrip?: boolean;
66
- };
67
- return (
68
- data.type === "span" && (data._paragraphStrip === true || data._emptyParagraphStrip === true)
69
- );
70
- }
71
-
72
- /**
73
- * Check if a paragraph contains at least one `span_` marker among its children.
74
- *
75
- * A paragraph with a `span_` marker becomes a "merge anchor" that can absorb
76
- * adjacent paragraphs during post-processing.
77
- *
78
- * @param para - A paragraph element to inspect
79
- * @returns true if the paragraph contains a child with a paragraph-strip flag
80
- */
81
- function hasParagraphStripSpan(para: Element): boolean {
82
- const data = getContainerData(para);
83
- if (!data || data.type !== "paragraph") return false;
84
- return data.elements.some((child) => isSpanStripMarker(child));
85
- }
86
-
87
- /**
88
- * Check if an element is an "escaped span" (content after a blank line inside `span_`).
89
- *
90
- * In Wikidot, when a blank line appears inside a `[[span_]]` block, the content
91
- * after the blank line escapes the paragraph and is rendered outside it. These
92
- * spans are marked with `_escapedFromParagraph` during parsing.
93
- *
94
- * @param el - The element to check
95
- * @returns true if the element is a span marked as escaped from its paragraph
96
- */
97
- function isEscapedSpan(el: Element): boolean {
98
- if (el.element !== "container") return false;
99
- const data = el.data as ContainerData & { _escapedFromParagraph?: boolean };
100
- return data.type === "span" && data._escapedFromParagraph === true;
101
- }
102
-
103
- /**
104
- * Extract escaped spans and all subsequent content from a paragraph's children.
105
- *
106
- * When an escaped span is found, it and everything after it is removed from the
107
- * original children array (mutated in place via `splice`) and returned as a
108
- * separate array of span elements. Non-span content between escaped spans is
109
- * collected and wrapped in anonymous span elements.
110
- *
111
- * This models Wikidot's behavior where once a blank line occurs inside `span_`,
112
- * all subsequent content is rendered outside the paragraph.
113
- *
114
- * @param children - Mutable array of paragraph children; escaped items are spliced out
115
- * @returns Array of span elements that should be rendered outside the paragraph
116
- */
117
- function extractEscapedSpans(children: Element[]): Element[] {
118
- const escaped: Element[] = [];
119
-
120
- // Find the first escaped span
121
- let firstEscapedIndex = -1;
122
- for (let i = 0; i < children.length; i++) {
123
- const child = children[i];
124
- if (child && isEscapedSpan(child)) {
125
- firstEscapedIndex = i;
126
- break;
127
- }
128
- }
129
-
130
- if (firstEscapedIndex === -1) {
131
- return escaped;
132
- }
133
-
134
- // Extract everything from the first escaped span onwards
135
- // Group content into spans
136
- let currentSpanChildren: Element[] = [];
137
-
138
- for (let i = firstEscapedIndex; i < children.length; i++) {
139
- const child = children[i];
140
- if (!child) continue;
141
-
142
- if (isEscapedSpan(child)) {
143
- // Add current span if any
144
- if (currentSpanChildren.length > 0) {
145
- escaped.push({
146
- element: "container",
147
- data: {
148
- type: "span",
149
- attributes: {},
150
- elements: currentSpanChildren,
151
- },
152
- });
153
- currentSpanChildren = [];
154
- }
155
- // Add the escaped span itself
156
- escaped.push(child);
157
- } else if (isSpanStripMarker(child)) {
158
- // Regular span_ after escaped - convert to escaped
159
- const childData = getContainerData(child);
160
- if (childData) {
161
- escaped.push({
162
- element: "container",
163
- data: {
164
- type: "span",
165
- attributes: childData.attributes,
166
- elements: childData.elements,
167
- },
168
- });
169
- }
170
- } else {
171
- // Non-span content after escaped - collect into a span
172
- currentSpanChildren.push(child);
173
- }
174
- }
175
-
176
- // Add remaining content as a span
177
- if (currentSpanChildren.length > 0) {
178
- escaped.push({
179
- element: "container",
180
- data: {
181
- type: "span",
182
- attributes: {},
183
- elements: currentSpanChildren,
184
- },
185
- });
186
- }
187
-
188
- // Remove extracted items from original array
189
- children.splice(firstEscapedIndex);
190
-
191
- return escaped;
192
- }
193
-
194
- /**
195
- * Remove line-break elements adjacent to `span_` markers and remove empty `span_` markers.
196
- *
197
- * Wikidot's `span_` removes paragraph boundaries, including the line-breaks that
198
- * would normally appear between merged paragraphs. Empty `span_` markers (created
199
- * by `[[span_]][[/span]]` with no content) are also removed as they serve no purpose.
200
- *
201
- * Uses a single reverse pass to safely splice elements without index invalidation.
202
- *
203
- * @param children - Mutable array of merged paragraph children; modified in place
204
- */
205
- function removeLineBreaksAroundSpanStrip(children: Element[]): void {
206
- // Single reverse pass: check each element for removal criteria
207
- for (let i = children.length - 1; i >= 0; i--) {
208
- const child = children[i];
209
- if (!child) continue;
210
-
211
- // Check for line-break adjacent to span_ markers
212
- if (child.element === "line-break") {
213
- const prev = children[i - 1];
214
- const next = children[i + 1];
215
- if (isSpanStripMarker(prev) || isSpanStripMarker(next)) {
216
- children.splice(i, 1);
217
- continue;
218
- }
219
- }
220
-
221
- // Check for empty span_ markers
222
- if (child.element === "container") {
223
- const data = child.data as ContainerData & { _emptyParagraphStrip?: boolean };
224
- if (data.type === "span" && data._emptyParagraphStrip) {
225
- children.splice(i, 1);
226
- }
227
- }
228
- }
229
- }
230
-
231
- /**
232
- * Check if an element is a span that was split by a blank line.
233
- *
234
- * When a blank line appears inside `[[span]]..[[/span]]`, the parser marks the
235
- * span with `_splitByBlankLine`. During post-processing, the containing paragraph
236
- * is split at these markers so each part becomes its own paragraph.
237
- *
238
- * @param el - The element to check
239
- * @returns true if the element is a span marked with `_splitByBlankLine`
240
- */
241
- function isSplitSpan(el: Element): boolean {
242
- if (el.element !== "container") return false;
243
- const data = el.data as ContainerData;
244
- return data.type === "span" && data._splitByBlankLine === true;
245
- }
246
-
247
- /**
248
- * Split a paragraph at spans marked with `_splitByBlankLine`.
249
- *
250
- * Each `_splitByBlankLine` span starts a new paragraph. Content before the first
251
- * split span remains in the initial paragraph; each split span begins a new one.
252
- *
253
- * @param para - A paragraph element that may contain split-marked spans
254
- * @returns Array of paragraph elements (one or more); returns the original
255
- * element in a single-element array if no splits are needed
256
- */
257
- function splitParagraphAtBlankLineSpans(para: Element): Element[] {
258
- const data = getContainerData(para);
259
- if (!data || data.type !== "paragraph") return [para];
260
-
261
- const result: Element[] = [];
262
- let currentElements: Element[] = [];
263
-
264
- for (const child of data.elements) {
265
- if (isSplitSpan(child)) {
266
- // Save current paragraph if not empty
267
- if (currentElements.length > 0) {
268
- result.push({
269
- element: "container",
270
- data: {
271
- type: "paragraph",
272
- attributes: {},
273
- elements: currentElements,
274
- },
275
- });
276
- currentElements = [];
277
- }
278
- // Start new paragraph with this span
279
- currentElements.push(child);
280
- } else {
281
- currentElements.push(child);
282
- }
283
- }
284
-
285
- // Add remaining elements as final paragraph
286
- if (currentElements.length > 0) {
287
- result.push({
288
- element: "container",
289
- data: {
290
- type: "paragraph",
291
- attributes: {},
292
- elements: currentElements,
293
- },
294
- });
295
- }
296
-
297
- return result.length > 0 ? result : [para];
298
- }
299
-
300
- /**
301
- * Check if an element is an empty `[[#expr ]]` (expression is an empty string).
302
- *
303
- * In Wikidot markup, `[[# ]]` with an empty expression acts as a paragraph
304
- * break without generating visible output.
305
- *
306
- * @param el - The element to check
307
- * @returns true if the element is an expr with an empty expression string
308
- */
309
- function isEmptyExpr(el: Element): boolean {
310
- if (el.element !== "expr") return false;
311
- const data = el.data as ExprData;
312
- return data.expression === "";
313
- }
314
-
315
- /**
316
- * Split a paragraph at empty `[[#expr ]]` elements.
317
- *
318
- * Each empty expr acts as a paragraph break. Line-break elements immediately
319
- * before or after an empty expr are also removed to avoid spurious whitespace.
320
- *
321
- * @param para - A paragraph element that may contain empty expr elements
322
- * @returns Array of paragraph elements; empty if all content was consumed by splits
323
- */
324
- function splitParagraphAtEmptyExpr(para: Element): Element[] {
325
- const data = getContainerData(para);
326
- if (!data || data.type !== "paragraph") return [para];
327
-
328
- // Check if paragraph contains empty expr
329
- const hasEmptyExpr = data.elements.some(isEmptyExpr);
330
- if (!hasEmptyExpr) return [para];
331
-
332
- const result: Element[] = [];
333
- let currentElements: Element[] = [];
334
-
335
- for (let i = 0; i < data.elements.length; i++) {
336
- const child = data.elements[i];
337
- if (!child) continue;
338
-
339
- if (isEmptyExpr(child)) {
340
- // Skip the empty expr and surrounding line-breaks
341
- // Check if prev element is line-break, remove it
342
- if (
343
- currentElements.length > 0 &&
344
- currentElements[currentElements.length - 1]?.element === "line-break"
345
- ) {
346
- currentElements.pop();
347
- }
348
- // Save current paragraph if not empty
349
- if (currentElements.length > 0) {
350
- result.push({
351
- element: "container",
352
- data: {
353
- type: "paragraph",
354
- attributes: {},
355
- elements: currentElements,
356
- },
357
- });
358
- currentElements = [];
359
- }
360
- // Skip next line-break if present
361
- if (i + 1 < data.elements.length && data.elements[i + 1]?.element === "line-break") {
362
- i++;
363
- }
364
- } else {
365
- currentElements.push(child);
366
- }
367
- }
368
-
369
- // Add remaining elements as final paragraph
370
- if (currentElements.length > 0) {
371
- result.push({
372
- element: "container",
373
- data: {
374
- type: "paragraph",
375
- attributes: {},
376
- elements: currentElements,
377
- },
378
- });
379
- }
380
-
381
- return result.length > 0 ? result : [];
382
- }
383
-
384
- /**
385
- * Merge and split paragraphs according to Wikidot's `span_` and expr behaviors.
386
- *
387
- * This is the main post-processing entry point for paragraph restructuring.
388
- * It performs two passes over the top-level element list:
389
- *
390
- * **First pass**: Split paragraphs at `_splitByBlankLine` spans and empty `[[#expr ]]`
391
- * elements. A single input paragraph may become multiple output paragraphs.
392
- *
393
- * **Second pass**: Merge consecutive paragraphs around `span_` markers. When a
394
- * paragraph contains a `span_` marker, it absorbs adjacent paragraphs (even those
395
- * without `span_`). The merged content is unwrapped (no `<p>` tag), matching
396
- * Wikidot's rendering behavior. Escaped spans (content after blank lines in `span_`)
397
- * are extracted and placed outside the merged paragraph.
398
- *
399
- * @param children - Top-level element array from the parser
400
- * @returns Restructured element array with paragraphs merged/split as needed
401
- */
402
- export function mergeSpanStripParagraphs(children: Element[]): Element[] {
403
- // First pass: split paragraphs at _splitByBlankLine markers and empty expr
404
- const expandedChildren: Element[] = [];
405
- for (const child of children) {
406
- if (isContainer(child, "paragraph")) {
407
- const data = getContainerData(child);
408
- if (data && data.elements.some(isSplitSpan)) {
409
- expandedChildren.push(...splitParagraphAtBlankLineSpans(child));
410
- } else if (data && data.elements.some(isEmptyExpr)) {
411
- expandedChildren.push(...splitParagraphAtEmptyExpr(child));
412
- } else {
413
- expandedChildren.push(child);
414
- }
415
- } else {
416
- expandedChildren.push(child);
417
- }
418
- }
419
-
420
- // Second pass: merge span_ paragraphs and unwrap them (no <p> tag)
421
- // span_ removes paragraph boundaries, so merged content becomes top-level elements
422
- const result: Element[] = [];
423
- let i = 0;
424
-
425
- while (i < expandedChildren.length) {
426
- const node = expandedChildren[i];
427
-
428
- // Only process paragraphs
429
- if (!node || !isContainer(node, "paragraph")) {
430
- if (node) result.push(node);
431
- i++;
432
- continue;
433
- }
434
-
435
- // Check if THIS paragraph contains span_
436
- const thisHasSpanStrip = hasParagraphStripSpan(node);
437
-
438
- // If this paragraph doesn't have span_, just output as normal paragraph
439
- if (!thisHasSpanStrip) {
440
- result.push(node);
441
- i++;
442
- continue;
443
- }
444
-
445
- // Start merging: collect elements from current and subsequent paragraphs
446
- const paraData = getContainerData(node);
447
- if (!paraData) {
448
- result.push(node);
449
- i++;
450
- continue;
451
- }
452
- const mergedChildren: Element[] = [...paraData.elements];
453
- i++;
454
-
455
- // Continue merging subsequent paragraphs
456
- while (i < expandedChildren.length) {
457
- const nextPara = expandedChildren[i];
458
- if (!nextPara || !isContainer(nextPara, "paragraph")) {
459
- break;
460
- }
461
-
462
- const nextParaData = getContainerData(nextPara);
463
- if (!nextParaData) {
464
- break;
465
- }
466
-
467
- const hasSpanStrip = hasParagraphStripSpan(nextPara);
468
-
469
- // Merge: add the next paragraph's children
470
- mergedChildren.push(...nextParaData.elements);
471
- i++;
472
-
473
- // If this paragraph doesn't have span_, check if the next one does
474
- // If not, stop merging
475
- if (!hasSpanStrip) {
476
- const peekNext = expandedChildren[i];
477
- if (!peekNext || !isContainer(peekNext, "paragraph") || !hasParagraphStripSpan(peekNext)) {
478
- break;
479
- }
480
- }
481
- }
482
-
483
- // Extract escaped spans (content after blank line in span_)
484
- // These go outside the merged content
485
- const escapedSpans = extractEscapedSpans(mergedChildren);
486
-
487
- // Remove line-breaks that are adjacent to span_ elements
488
- removeLineBreaksAroundSpanStrip(mergedChildren);
489
-
490
- // If there are escaped spans, wrap the main content in a paragraph
491
- // This is because escaped spans split the content, and the main part needs <p>
492
- // If no escaped spans, unwrap (no <p> tag) - span_ removes paragraph boundaries
493
- if (escapedSpans.length > 0) {
494
- // Wrap main content in paragraph
495
- if (mergedChildren.length > 0) {
496
- const para: Element = {
497
- element: "container",
498
- data: {
499
- type: "paragraph",
500
- attributes: {},
501
- elements: mergedChildren,
502
- },
503
- };
504
- result.push(para);
505
- }
506
- } else {
507
- // UNWRAP: push merged children directly (no paragraph wrapper = no <p> tag)
508
- for (const child of mergedChildren) {
509
- result.push(child);
510
- }
511
- }
512
-
513
- // Add escaped spans as top-level spans (outside paragraph)
514
- for (const span of escapedSpans) {
515
- result.push(span);
516
- }
517
- }
518
-
519
- return result;
520
- }
521
-
522
- /**
523
- * Recursively remove internal flags from AST elements and clean up empty spans.
524
- *
525
- * During parsing, elements are annotated with internal flags like `_paragraphStrip`,
526
- * `_emptyParagraphStrip`, `_escapedFromParagraph`, and `_splitByBlankLine`. These
527
- * flags serve as inter-pass communication and must be stripped before the AST is
528
- * returned to callers.
529
- *
530
- * Additionally, empty `[[span]][[/span]]` elements and their adjacent whitespace
531
- * text nodes are removed. Wikidot renders empty spans as no output, so they and
532
- * their surrounding whitespace should not appear in the final AST.
533
- *
534
- * @param elements - Array of elements to clean
535
- * @returns New array with all internal flags removed and empty spans stripped
536
- */
537
- export function cleanInternalFlags(elements: Element[]): Element[] {
538
- const cleaned = elements.map((el) => cleanElement(el));
539
- return removeEmptySpansAndAdjacentWhitespace(cleaned);
540
- }
541
-
542
- /**
543
- * Check if an element is a span container with no children.
544
- *
545
- * Empty `[[span]][[/span]]` produces no visible output in Wikidot and should
546
- * be removed from the AST along with surrounding whitespace.
547
- *
548
- * @param el - The element to check
549
- * @returns true if the element is a span container with an empty elements array
550
- */
551
- function isEmptySpan(el: Element): boolean {
552
- if (el.element !== "container") return false;
553
- const data = el.data as ContainerData;
554
- return data.type === "span" && data.elements.length === 0;
555
- }
556
-
557
- /**
558
- * Check if an element is a text node containing only whitespace characters.
559
- *
560
- * @param el - The element to check
561
- * @returns true if the element is a text node whose data is entirely whitespace
562
- */
563
- function isWhitespaceText(el: Element): boolean {
564
- return el.element === "text" && typeof el.data === "string" && /^\s+$/.test(el.data);
565
- }
566
-
567
- /**
568
- * Remove empty span elements and their adjacent whitespace text nodes.
569
- *
570
- * When an empty span is found, whitespace immediately before it is removed
571
- * (popped from the result), and whitespace immediately after it is skipped
572
- * (by advancing the index). This prevents orphaned whitespace from appearing
573
- * where the empty span was.
574
- *
575
- * @param elements - Array of elements to filter
576
- * @returns New array with empty spans and their adjacent whitespace removed
577
- */
578
- function removeEmptySpansAndAdjacentWhitespace(elements: Element[]): Element[] {
579
- const result: Element[] = [];
580
-
581
- for (let i = 0; i < elements.length; i++) {
582
- const el = elements[i];
583
- if (!el) continue;
584
-
585
- if (isEmptySpan(el)) {
586
- // Remove whitespace before empty span
587
- if (result.length > 0 && isWhitespaceText(result[result.length - 1]!)) {
588
- result.pop();
589
- }
590
- // Skip whitespace after empty span (by looking ahead)
591
- while (i + 1 < elements.length && elements[i + 1] && isWhitespaceText(elements[i + 1]!)) {
592
- i++;
593
- }
594
- continue;
595
- }
596
-
597
- result.push(el);
598
- }
599
-
600
- return result;
601
- }
602
-
603
- /**
604
- * Clean a single element by removing internal flags and recursively cleaning children.
605
- *
606
- * For container elements, a new ContainerData is created without internal flag properties.
607
- * For line-break elements, all extra properties are stripped. For lists and definition
608
- * lists, items are recursively cleaned.
609
- *
610
- * @param el - The element to clean
611
- * @returns A new element with internal flags removed
612
- */
613
- function cleanElement(el: Element): Element {
614
- // Remove internal flags from line-break elements
615
- if (el.element === "line-break") {
616
- return { element: "line-break" };
617
- }
618
-
619
- if (el.element === "container") {
620
- const data = el.data as ContainerData;
621
-
622
- // Create new data without internal flags
623
- const cleanedData: ContainerData = {
624
- type: data.type,
625
- attributes: data.attributes,
626
- elements: cleanInternalFlags(data.elements),
627
- };
628
-
629
- return {
630
- element: "container",
631
- data: cleanedData,
632
- };
633
- }
634
-
635
- if (el.element === "collapsible") {
636
- return {
637
- element: "collapsible",
638
- data: {
639
- ...el.data,
640
- elements: cleanInternalFlags(el.data.elements),
641
- },
642
- };
643
- }
644
-
645
- if (el.element === "color") {
646
- return {
647
- element: "color",
648
- data: {
649
- ...el.data,
650
- elements: cleanInternalFlags(el.data.elements),
651
- },
652
- };
653
- }
654
-
655
- // Clean list items recursively
656
- if (el.element === "list") {
657
- const data = el.data as any;
658
- return {
659
- element: "list",
660
- data: {
661
- ...data,
662
- items: data.items.map((item: any) => {
663
- if (item["item-type"] === "elements") {
664
- return {
665
- ...item,
666
- elements: cleanInternalFlags(item.elements),
667
- };
668
- } else if (item["item-type"] === "sub-list") {
669
- // Recursively clean the nested list
670
- const cleanedList = cleanElement({ element: "list", data: item.data } as Element);
671
- return {
672
- "item-type": "sub-list",
673
- element: "list",
674
- data: "data" in cleanedList ? cleanedList.data : item.data,
675
- };
676
- }
677
- return item;
678
- }),
679
- },
680
- };
681
- }
682
-
683
- // Clean definition-list items recursively
684
- if (el.element === "definition-list") {
685
- const items = el.data as any[];
686
- return {
687
- element: "definition-list",
688
- data: items.map((item: any) => ({
689
- ...item,
690
- key: cleanInternalFlags(item.key),
691
- value: cleanInternalFlags(item.value),
692
- })),
693
- };
694
- }
695
-
696
- return el;
697
- }