@wdprlib/parser 3.2.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (433) hide show
  1. package/dist/index.cjs +10451 -8402
  2. package/dist/index.d.cts +313 -337
  3. package/dist/index.d.ts +313 -337
  4. package/dist/index.js +10438 -8389
  5. package/package.json +1 -1
  6. package/src/index.ts +7 -0
  7. package/src/lexer/anchor.ts +48 -0
  8. package/src/lexer/index.ts +3 -2
  9. package/src/lexer/lexer.ts +73 -559
  10. package/src/lexer/options.ts +19 -0
  11. package/src/lexer/punctuation.ts +70 -0
  12. package/src/lexer/quoted-string.ts +16 -0
  13. package/src/lexer/runs.ts +85 -0
  14. package/src/lexer/spacing-actions.ts +24 -0
  15. package/src/lexer/state.ts +103 -0
  16. package/src/lexer/syntax-actions.ts +80 -0
  17. package/src/lexer/text-actions.ts +41 -0
  18. package/src/lexer/token-actions.ts +136 -0
  19. package/src/lexer/token-factory.ts +62 -0
  20. package/src/lexer/tokenize.ts +18 -0
  21. package/src/parser/constants.ts +2 -0
  22. package/src/parser/depth/index.ts +111 -0
  23. package/src/parser/depth/stack.ts +82 -0
  24. package/src/parser/parse/block.ts +42 -0
  25. package/src/parser/parse/context.ts +26 -0
  26. package/src/parser/parse/footnotes.ts +25 -0
  27. package/src/parser/parse/index.ts +42 -0
  28. package/src/parser/parse/options.ts +34 -0
  29. package/src/parser/parse/parser.ts +79 -0
  30. package/src/parser/parse/plain-non-ascii.ts +129 -0
  31. package/src/parser/parse/result.ts +57 -0
  32. package/src/parser/parse/source.ts +11 -0
  33. package/src/parser/postprocess/divAdjacentParagraph.ts +1 -1
  34. package/src/parser/postprocess/spanStrip/clean-element.ts +168 -0
  35. package/src/parser/postprocess/spanStrip/cleanup.ts +25 -0
  36. package/src/parser/postprocess/spanStrip/empty-spans.ts +36 -0
  37. package/src/parser/postprocess/spanStrip/escaped.ts +78 -0
  38. package/src/parser/postprocess/spanStrip/factory.ts +23 -0
  39. package/src/parser/postprocess/spanStrip/index.ts +8 -0
  40. package/src/parser/postprocess/spanStrip/merge.ts +117 -0
  41. package/src/parser/postprocess/spanStrip/predicates.ts +59 -0
  42. package/src/parser/postprocess/spanStrip/split.ts +67 -0
  43. package/src/parser/preprocess/expr/chars.ts +15 -0
  44. package/src/parser/preprocess/expr/evaluate.ts +22 -0
  45. package/src/parser/preprocess/expr/index.ts +45 -0
  46. package/src/parser/preprocess/expr/kind.ts +19 -0
  47. package/src/parser/preprocess/expr/parse.ts +103 -0
  48. package/src/parser/preprocess/expr/scan.ts +34 -0
  49. package/src/parser/preprocess/expr/types.ts +14 -0
  50. package/src/parser/preprocess/typography.ts +70 -5
  51. package/src/parser/preprocess/utils/bracket-depths.ts +98 -0
  52. package/src/parser/preprocess/utils/index.ts +13 -0
  53. package/src/parser/preprocess/utils/raw-regions.ts +153 -0
  54. package/src/parser/preprocess/whitespace/detection.ts +39 -0
  55. package/src/parser/preprocess/whitespace/index.ts +79 -0
  56. package/src/parser/preprocess/whitespace/leading-spaces.ts +11 -0
  57. package/src/parser/preprocess/whitespace/patterns.ts +23 -0
  58. package/src/parser/rules/block/align/body.ts +46 -0
  59. package/src/parser/rules/block/align/element.ts +13 -0
  60. package/src/parser/rules/block/align/index.ts +90 -0
  61. package/src/parser/rules/block/align/syntax.ts +113 -0
  62. package/src/parser/rules/block/bibliography/body.ts +81 -0
  63. package/src/parser/rules/block/bibliography/entries.ts +49 -0
  64. package/src/parser/rules/block/bibliography/entry-content.ts +73 -0
  65. package/src/parser/rules/block/bibliography/entry-key.ts +83 -0
  66. package/src/parser/rules/block/bibliography/index.ts +90 -0
  67. package/src/parser/rules/block/bibliography/open.ts +53 -0
  68. package/src/parser/rules/block/block-list/bare-content.ts +105 -0
  69. package/src/parser/rules/block/block-list/bare-paragraph.ts +60 -0
  70. package/src/parser/rules/block/block-list/index.ts +51 -0
  71. package/src/parser/rules/block/block-list/item-content.ts +132 -0
  72. package/src/parser/rules/block/block-list/li-content.ts +107 -0
  73. package/src/parser/rules/block/block-list/li-item.ts +77 -0
  74. package/src/parser/rules/block/block-list/list-block.ts +100 -0
  75. package/src/parser/rules/block/block-list/open.ts +51 -0
  76. package/src/parser/rules/block/block-list/tags.ts +50 -0
  77. package/src/parser/rules/block/blockquote/build.ts +62 -0
  78. package/src/parser/rules/block/blockquote/index.ts +80 -0
  79. package/src/parser/rules/block/blockquote/line.ts +79 -0
  80. package/src/parser/rules/block/blockquote/lines.ts +39 -0
  81. package/src/parser/rules/block/{center.ts → center/index.ts} +7 -22
  82. package/src/parser/rules/block/center/open.ts +27 -0
  83. package/src/parser/rules/block/{clear-float.ts → clear-float/index.ts} +6 -30
  84. package/src/parser/rules/block/clear-float/syntax.ts +43 -0
  85. package/src/parser/rules/block/code/attributes.ts +30 -0
  86. package/src/parser/rules/block/code/content.ts +57 -0
  87. package/src/parser/rules/block/code/index.ts +100 -0
  88. package/src/parser/rules/block/collapsible/attributes.ts +95 -0
  89. package/src/parser/rules/block/collapsible/body.ts +69 -0
  90. package/src/parser/rules/block/collapsible/index.ts +117 -0
  91. package/src/parser/rules/block/collapsible/open.ts +51 -0
  92. package/src/parser/rules/block/collapsible/orphans.ts +31 -0
  93. package/src/parser/rules/block/collapsible/tags.ts +17 -0
  94. package/src/parser/rules/block/comment/consume.ts +37 -0
  95. package/src/parser/rules/block/{comment.ts → comment/index.ts} +12 -38
  96. package/src/parser/rules/block/{content-separator.ts → content-separator/index.ts} +5 -35
  97. package/src/parser/rules/block/content-separator/syntax.ts +33 -0
  98. package/src/parser/rules/block/definition-list/collect.ts +40 -0
  99. package/src/parser/rules/block/definition-list/index.ts +63 -0
  100. package/src/parser/rules/block/definition-list/item-key.ts +95 -0
  101. package/src/parser/rules/block/definition-list/item-value.ts +56 -0
  102. package/src/parser/rules/block/definition-list/items.ts +54 -0
  103. package/src/parser/rules/block/div/body.ts +41 -0
  104. package/src/parser/rules/block/div/close.ts +41 -0
  105. package/src/parser/rules/block/div/failed.ts +117 -0
  106. package/src/parser/rules/block/div/index.ts +112 -0
  107. package/src/parser/rules/block/div/nesting.ts +37 -0
  108. package/src/parser/rules/block/div/open.ts +59 -0
  109. package/src/parser/rules/block/div/paragraph-strip.ts +44 -0
  110. package/src/parser/rules/block/embed-block/content.ts +53 -0
  111. package/src/parser/rules/block/embed-block/index.ts +91 -0
  112. package/src/parser/rules/block/embed-block/open.ts +52 -0
  113. package/src/parser/rules/block/embed-block/tags.ts +5 -0
  114. package/src/parser/rules/block/footnoteblock/attributes.ts +73 -0
  115. package/src/parser/rules/block/footnoteblock/index.ts +82 -0
  116. package/src/parser/rules/block/footnoteblock/open.ts +53 -0
  117. package/src/parser/rules/block/heading/index.ts +87 -0
  118. package/src/parser/rules/block/heading/open.ts +50 -0
  119. package/src/parser/rules/block/heading/toc-text.ts +26 -0
  120. package/src/parser/rules/block/{horizontal-rule.ts → horizontal-rule/index.ts} +4 -21
  121. package/src/parser/rules/block/horizontal-rule/syntax.ts +21 -0
  122. package/src/parser/rules/block/html/body.ts +114 -0
  123. package/src/parser/rules/block/html/diagnostics.ts +11 -0
  124. package/src/parser/rules/block/html/index.ts +95 -0
  125. package/src/parser/rules/block/html/open.ts +36 -0
  126. package/src/parser/rules/block/iframe/attributes.ts +106 -0
  127. package/src/parser/rules/block/iframe/index.ts +73 -0
  128. package/src/parser/rules/block/iframe/open.ts +58 -0
  129. package/src/parser/rules/block/iframe/source.ts +24 -0
  130. package/src/parser/rules/block/iframe/url.ts +38 -0
  131. package/src/parser/rules/block/iftags/body.ts +48 -0
  132. package/src/parser/rules/block/iftags/condition.ts +24 -0
  133. package/src/parser/rules/block/{iftags.ts → iftags/index.ts} +16 -58
  134. package/src/parser/rules/block/include/arguments.ts +48 -0
  135. package/src/parser/rules/block/include/index.ts +75 -0
  136. package/src/parser/rules/block/include/location.ts +24 -0
  137. package/src/parser/rules/block/include/variables.ts +37 -0
  138. package/src/parser/rules/block/list/index.ts +73 -0
  139. package/src/parser/rules/block/list/line.ts +77 -0
  140. package/src/parser/rules/block/list/native.ts +89 -0
  141. package/src/parser/rules/block/math/content.ts +54 -0
  142. package/src/parser/rules/block/math/index.ts +106 -0
  143. package/src/parser/rules/block/math/name.ts +35 -0
  144. package/src/parser/rules/block/module/body.ts +92 -0
  145. package/src/parser/rules/block/module/element.ts +33 -0
  146. package/src/parser/rules/block/module/include/directive.ts +91 -0
  147. package/src/parser/rules/block/module/include/index.ts +11 -2
  148. package/src/parser/rules/block/module/include/references.ts +42 -0
  149. package/src/parser/rules/block/module/include/resolve/cache.ts +44 -0
  150. package/src/parser/rules/block/module/include/resolve/index.ts +106 -0
  151. package/src/parser/rules/block/module/include/resolve/iterate.ts +202 -0
  152. package/src/parser/rules/block/module/include/resolve/replace.ts +31 -0
  153. package/src/parser/rules/block/module/include/resolve/types.ts +105 -0
  154. package/src/parser/rules/block/module/include/scanner.ts +121 -0
  155. package/src/parser/rules/block/module/index.ts +14 -2
  156. package/src/parser/rules/block/module/listpages/compiler.ts +12 -392
  157. package/src/parser/rules/block/module/listpages/extract.ts +25 -359
  158. package/src/parser/rules/block/module/listpages/extraction/listpages.ts +42 -0
  159. package/src/parser/rules/block/module/listpages/extraction/listusers.ts +30 -0
  160. package/src/parser/rules/block/module/listpages/extraction/query.ts +51 -0
  161. package/src/parser/rules/block/module/listpages/extraction/result.ts +18 -0
  162. package/src/parser/rules/block/module/listpages/extraction/template.ts +96 -0
  163. package/src/parser/rules/block/module/listpages/extraction/variables.ts +58 -0
  164. package/src/parser/rules/block/module/listpages/normalization/date-selector.ts +53 -0
  165. package/src/parser/rules/block/module/listpages/normalization/numeric-selector.ts +32 -0
  166. package/src/parser/rules/block/module/listpages/normalization/order-parent.ts +82 -0
  167. package/src/parser/rules/block/module/listpages/normalization/selectors.ts +2 -0
  168. package/src/parser/rules/block/module/listpages/normalization/tags-category.ts +86 -0
  169. package/src/parser/rules/block/module/listpages/normalize.ts +8 -324
  170. package/src/parser/rules/block/module/listpages/resolution/items.ts +43 -0
  171. package/src/parser/rules/block/module/listpages/resolution/wrapper.ts +42 -0
  172. package/src/parser/rules/block/module/listpages/resolve.ts +5 -75
  173. package/src/parser/rules/block/module/listpages/template/format/content.ts +41 -0
  174. package/src/parser/rules/block/module/listpages/template/format/date.ts +116 -0
  175. package/src/parser/rules/block/module/listpages/template/format/index.ts +4 -0
  176. package/src/parser/rules/block/module/listpages/template/format/tags.ts +7 -0
  177. package/src/parser/rules/block/module/listpages/template/format/user.ts +9 -0
  178. package/src/parser/rules/block/module/listpages/template/getters/index.ts +36 -0
  179. package/src/parser/rules/block/module/listpages/template/getters/parameterized.ts +60 -0
  180. package/src/parser/rules/block/module/listpages/template/getters/simple.ts +65 -0
  181. package/src/parser/rules/block/module/listpages/template/getters/types.ts +3 -0
  182. package/src/parser/rules/block/module/listpages/template/syntax.ts +97 -0
  183. package/src/parser/rules/block/module/listpages/types/data-fetcher.ts +15 -0
  184. package/src/parser/rules/block/module/listpages/types/data-requirements.ts +52 -0
  185. package/src/parser/rules/block/module/listpages/types/external-data.ts +77 -0
  186. package/src/parser/rules/block/module/listpages/types/index.ts +17 -0
  187. package/src/parser/rules/block/module/listpages/types/normalized-query.ts +120 -0
  188. package/src/parser/rules/block/module/listpages/types/query.ts +67 -0
  189. package/src/parser/rules/block/module/listpages/types/template.ts +17 -0
  190. package/src/parser/rules/block/module/listpages/types/variables.ts +69 -0
  191. package/src/parser/rules/block/module/listpages/url-resolution/fields.ts +48 -0
  192. package/src/parser/rules/block/module/listpages/url-resolution/params.ts +19 -0
  193. package/src/parser/rules/block/module/listpages/url-resolution/query.ts +24 -0
  194. package/src/parser/rules/block/module/listpages/url-resolution/resolve.ts +53 -0
  195. package/src/parser/rules/block/module/listpages/url-resolution/value.ts +25 -0
  196. package/src/parser/rules/block/module/listpages/url-resolver.ts +3 -160
  197. package/src/parser/rules/block/module/listusers/compiler.ts +4 -25
  198. package/src/parser/rules/block/module/listusers/extract.ts +4 -9
  199. package/src/parser/rules/block/module/listusers/getters.ts +21 -0
  200. package/src/parser/rules/block/module/listusers/variables.ts +15 -0
  201. package/src/parser/rules/block/module/open.ts +57 -0
  202. package/src/parser/rules/block/module/resolution/contexts.ts +78 -0
  203. package/src/parser/rules/block/module/resolution/data-maps.ts +39 -0
  204. package/src/parser/rules/block/module/resolution/dynamic-modules.ts +93 -0
  205. package/src/parser/rules/block/module/resolution/styles.ts +53 -0
  206. package/src/parser/rules/block/module/resolution/walk-resolve.ts +107 -0
  207. package/src/parser/rules/block/module/resolve.ts +79 -292
  208. package/src/parser/rules/block/module/rule.ts +56 -0
  209. package/src/parser/rules/block/module/types-common.ts +11 -0
  210. package/src/parser/rules/block/module/walk/children.ts +35 -0
  211. package/src/parser/rules/block/module/walk/index.ts +9 -0
  212. package/src/parser/rules/block/module/walk/map/index.ts +2 -0
  213. package/src/parser/rules/block/module/walk/map/stateful-definition-list.ts +25 -0
  214. package/src/parser/rules/block/module/walk/map/stateful-list.ts +40 -0
  215. package/src/parser/rules/block/module/walk/map/stateful-table.ts +23 -0
  216. package/src/parser/rules/block/module/walk/map/stateful-tabs.ts +19 -0
  217. package/src/parser/rules/block/module/walk/map/stateful.ts +71 -0
  218. package/src/parser/rules/block/module/walk/map/stateless-definition-list.ts +12 -0
  219. package/src/parser/rules/block/module/walk/map/stateless-list.ts +29 -0
  220. package/src/parser/rules/block/module/walk/map/stateless-table.ts +11 -0
  221. package/src/parser/rules/block/module/walk/map/stateless-tabs.ts +5 -0
  222. package/src/parser/rules/block/module/walk/map/stateless.ts +51 -0
  223. package/src/parser/rules/block/module/walk/map/types.ts +6 -0
  224. package/src/parser/rules/block/module/walk/traverse.ts +65 -0
  225. package/src/parser/rules/block/orphan-li/content.ts +60 -0
  226. package/src/parser/rules/block/orphan-li/index.ts +75 -0
  227. package/src/parser/rules/block/orphan-li/open.ts +25 -0
  228. package/src/parser/rules/block/orphan-li/tags.ts +40 -0
  229. package/src/parser/rules/block/paragraph/content.ts +12 -0
  230. package/src/parser/rules/block/paragraph/index.ts +60 -0
  231. package/src/parser/rules/block/paragraph/normalize.ts +52 -0
  232. package/src/parser/rules/block/paragraph/span-markers.ts +52 -0
  233. package/src/parser/rules/block/parsing/attributes/index.ts +32 -0
  234. package/src/parser/rules/block/parsing/attributes/names.ts +93 -0
  235. package/src/parser/rules/block/parsing/attributes/scanner.ts +75 -0
  236. package/src/parser/rules/block/parsing/attributes/values.ts +26 -0
  237. package/src/parser/rules/block/parsing/block-item.ts +29 -0
  238. package/src/parser/rules/block/parsing/content.ts +127 -0
  239. package/src/parser/rules/block/parsing/end-condition.ts +51 -0
  240. package/src/parser/rules/block/parsing/inline-content.ts +105 -0
  241. package/src/parser/rules/block/parsing/inline-newline.ts +41 -0
  242. package/src/parser/rules/block/parsing/non-boundary.ts +24 -0
  243. package/src/parser/rules/block/parsing/rule-dispatch.ts +44 -0
  244. package/src/parser/rules/block/table/index.ts +80 -0
  245. package/src/parser/rules/block/table/pipe/cell-start.ts +69 -0
  246. package/src/parser/rules/block/table/pipe/cell.ts +106 -0
  247. package/src/parser/rules/block/table/pipe/index.ts +2 -0
  248. package/src/parser/rules/block/table/pipe/row.ts +88 -0
  249. package/src/parser/rules/block/table/pipe/tokens.ts +14 -0
  250. package/src/parser/rules/block/table/pipe/trim.ts +50 -0
  251. package/src/parser/rules/block/table-block/body.ts +79 -0
  252. package/src/parser/rules/block/table-block/cell-attributes.ts +33 -0
  253. package/src/parser/rules/block/table-block/cell-boundary.ts +99 -0
  254. package/src/parser/rules/block/table-block/cell-content/index.ts +88 -0
  255. package/src/parser/rules/block/table-block/cell-content/segments.ts +134 -0
  256. package/src/parser/rules/block/table-block/cell-newline.ts +47 -0
  257. package/src/parser/rules/block/table-block/cell.ts +64 -0
  258. package/src/parser/rules/block/table-block/index.ts +113 -0
  259. package/src/parser/rules/block/table-block/row-boundary.ts +75 -0
  260. package/src/parser/rules/block/table-block/structure.ts +80 -0
  261. package/src/parser/rules/block/tabview/body.ts +64 -0
  262. package/src/parser/rules/block/tabview/index.ts +90 -0
  263. package/src/parser/rules/block/tabview/open.ts +50 -0
  264. package/src/parser/rules/block/tabview/tab.ts +92 -0
  265. package/src/parser/rules/block/tabview/tags.ts +30 -0
  266. package/src/parser/rules/block/toc/element.ts +11 -0
  267. package/src/parser/rules/block/toc/index.ts +44 -0
  268. package/src/parser/rules/block/toc/open.ts +84 -0
  269. package/src/parser/rules/block/utils.ts +10 -610
  270. package/src/parser/rules/{utils.ts → common/attribute-safety.ts} +3 -49
  271. package/src/parser/rules/common/block-name.ts +33 -0
  272. package/src/parser/rules/common/index.ts +2 -0
  273. package/src/parser/rules/contracts/index.ts +3 -0
  274. package/src/parser/rules/contracts/parse-context.ts +38 -0
  275. package/src/parser/rules/contracts/rule.ts +43 -0
  276. package/src/parser/rules/contracts/scope.ts +31 -0
  277. package/src/parser/rules/inline/anchor/attributes.ts +54 -0
  278. package/src/parser/rules/inline/anchor/child.ts +26 -0
  279. package/src/parser/rules/inline/anchor/close.ts +34 -0
  280. package/src/parser/rules/inline/anchor/content.ts +59 -0
  281. package/src/parser/rules/inline/anchor/index.ts +103 -0
  282. package/src/parser/rules/inline/anchor/newline.ts +26 -0
  283. package/src/parser/rules/inline/anchor/open.ts +47 -0
  284. package/src/parser/rules/inline/anchor/paragraph-strip.ts +14 -0
  285. package/src/parser/rules/inline/anchor/syntax.ts +40 -0
  286. package/src/parser/rules/inline/anchor-name/index.ts +38 -0
  287. package/src/parser/rules/inline/anchor-name/name.ts +39 -0
  288. package/src/parser/rules/inline/anchor-name/syntax.ts +46 -0
  289. package/src/parser/rules/inline/bibcite/element.ts +14 -0
  290. package/src/parser/rules/inline/bibcite/index.ts +34 -0
  291. package/src/parser/rules/inline/bibcite/syntax.ts +64 -0
  292. package/src/parser/rules/inline/bold.ts +2 -39
  293. package/src/parser/rules/inline/color/index.ts +35 -0
  294. package/src/parser/rules/inline/color/syntax.ts +69 -0
  295. package/src/parser/rules/inline/comment/consume.ts +31 -0
  296. package/src/parser/rules/inline/{comment.ts → comment/index.ts} +10 -36
  297. package/src/parser/rules/inline/equation-ref/element.ts +8 -0
  298. package/src/parser/rules/inline/equation-ref/index.ts +34 -0
  299. package/src/parser/rules/inline/equation-ref/syntax.ts +45 -0
  300. package/src/parser/rules/inline/expr/branch.ts +104 -0
  301. package/src/parser/rules/inline/expr/conditional-branch.ts +27 -0
  302. package/src/parser/rules/inline/expr/conditional.ts +80 -0
  303. package/src/parser/rules/inline/expr/depth.ts +25 -0
  304. package/src/parser/rules/inline/expr/elements.ts +39 -0
  305. package/src/parser/rules/inline/expr/index.ts +84 -0
  306. package/src/parser/rules/inline/expr/syntax.ts +45 -0
  307. package/src/parser/rules/inline/footnote/child.ts +22 -0
  308. package/src/parser/rules/inline/footnote/close.ts +33 -0
  309. package/src/parser/rules/inline/footnote/content.ts +54 -0
  310. package/src/parser/rules/inline/footnote/elements.ts +38 -0
  311. package/src/parser/rules/inline/footnote/index.ts +54 -0
  312. package/src/parser/rules/inline/footnote/newline.ts +27 -0
  313. package/src/parser/rules/inline/footnote/open.ts +38 -0
  314. package/src/parser/rules/inline/formatting/container.ts +50 -0
  315. package/src/parser/rules/inline/{guillemet.ts → guillemet/index.ts} +5 -13
  316. package/src/parser/rules/inline/guillemet/text.ts +11 -0
  317. package/src/parser/rules/inline/html/gate.ts +64 -0
  318. package/src/parser/rules/inline/{html.ts → html/index.ts} +9 -60
  319. package/src/parser/rules/inline/html/open.ts +37 -0
  320. package/src/parser/rules/inline/image/attributes.ts +22 -0
  321. package/src/parser/rules/inline/image/body.ts +36 -0
  322. package/src/parser/rules/inline/image/index.ts +89 -0
  323. package/src/parser/rules/inline/image/open.ts +56 -0
  324. package/src/parser/rules/inline/image/source.ts +62 -0
  325. package/src/parser/rules/inline/image/syntax.ts +76 -0
  326. package/src/parser/rules/inline/italic.ts +2 -30
  327. package/src/parser/rules/inline/line-break/backslash.ts +58 -0
  328. package/src/parser/rules/inline/line-break/elements.ts +9 -0
  329. package/src/parser/rules/inline/line-break/index.ts +3 -0
  330. package/src/parser/rules/inline/line-break/newline.ts +82 -0
  331. package/src/parser/rules/inline/line-break/underscore.ts +45 -0
  332. package/src/parser/rules/inline/link-anchor.ts +6 -81
  333. package/src/parser/rules/inline/link-bracket/anchor.ts +3 -0
  334. package/src/parser/rules/inline/link-bracket/direct-url.ts +5 -0
  335. package/src/parser/rules/inline/link-bracket/parsed.ts +81 -0
  336. package/src/parser/rules/inline/link-bracket/parts.ts +64 -0
  337. package/src/parser/rules/inline/link-bracket/prefix.ts +15 -0
  338. package/src/parser/rules/inline/link-single.ts +7 -98
  339. package/src/parser/rules/inline/link-star.ts +7 -69
  340. package/src/parser/rules/inline/link-triple/fallback.ts +10 -0
  341. package/src/parser/rules/inline/link-triple/index.ts +62 -0
  342. package/src/parser/rules/inline/link-triple/interwiki.ts +11 -0
  343. package/src/parser/rules/inline/link-triple/label.ts +35 -0
  344. package/src/parser/rules/inline/link-triple/syntax.ts +72 -0
  345. package/src/parser/rules/inline/link-triple/target.ts +36 -0
  346. package/src/parser/rules/inline/math-inline/index.ts +40 -0
  347. package/src/parser/rules/inline/math-inline/syntax.ts +55 -0
  348. package/src/parser/rules/inline/monospace.ts +2 -30
  349. package/src/parser/rules/inline/parsing/block-boundary.ts +42 -0
  350. package/src/parser/rules/inline/parsing/block-start-predicates.ts +117 -0
  351. package/src/parser/rules/inline/parsing/collect.ts +23 -0
  352. package/src/parser/rules/inline/parsing/inline-content.ts +115 -0
  353. package/src/parser/rules/inline/parsing/paragraph-boundary.ts +47 -0
  354. package/src/parser/rules/inline/parsing/plain-text.ts +69 -0
  355. package/src/parser/rules/inline/parsing/preserved-line-break.ts +11 -0
  356. package/src/parser/rules/inline/parsing/rules.ts +34 -0
  357. package/src/parser/rules/inline/parsing/simple-token.ts +26 -0
  358. package/src/parser/rules/inline/raw/angle.ts +40 -0
  359. package/src/parser/rules/inline/raw/double-at.ts +78 -0
  360. package/src/parser/rules/inline/raw/index.ts +26 -0
  361. package/src/parser/rules/inline/raw/result.ts +26 -0
  362. package/src/parser/rules/inline/size/content.ts +65 -0
  363. package/src/parser/rules/inline/size/index.ts +55 -0
  364. package/src/parser/rules/inline/size/open.ts +43 -0
  365. package/src/parser/rules/inline/size/value.ts +45 -0
  366. package/src/parser/rules/inline/span/content.ts +97 -0
  367. package/src/parser/rules/inline/span/elements.ts +108 -0
  368. package/src/parser/rules/inline/span/index.ts +79 -0
  369. package/src/parser/rules/inline/span/newline.ts +50 -0
  370. package/src/parser/rules/inline/span/syntax.ts +70 -0
  371. package/src/parser/rules/inline/{strikethrough.ts → strikethrough/index.ts} +5 -60
  372. package/src/parser/rules/inline/strikethrough/parse.ts +14 -0
  373. package/src/parser/rules/inline/strikethrough/syntax.ts +24 -0
  374. package/src/parser/rules/inline/subscript.ts +2 -39
  375. package/src/parser/rules/inline/superscript.ts +4 -39
  376. package/src/parser/rules/inline/text/element.ts +5 -0
  377. package/src/parser/rules/inline/{text.ts → text/index.ts} +5 -4
  378. package/src/parser/rules/inline/underline/child.ts +26 -0
  379. package/src/parser/rules/inline/underline/content.ts +29 -0
  380. package/src/parser/rules/inline/{underline.ts → underline/index.ts} +6 -49
  381. package/src/parser/rules/inline/user/element.ts +11 -0
  382. package/src/parser/rules/inline/user/index.ts +34 -0
  383. package/src/parser/rules/inline/user/syntax.ts +67 -0
  384. package/src/parser/rules/inline/utils.ts +4 -344
  385. package/src/parser/rules/tokens.ts +106 -0
  386. package/src/parser/rules/types.ts +9 -252
  387. package/src/parser/depth.ts +0 -251
  388. package/src/parser/parse.ts +0 -315
  389. package/src/parser/postprocess/spanStrip.ts +0 -697
  390. package/src/parser/preprocess/expr.ts +0 -265
  391. package/src/parser/preprocess/utils.ts +0 -250
  392. package/src/parser/preprocess/whitespace.ts +0 -111
  393. package/src/parser/rules/block/align.ts +0 -282
  394. package/src/parser/rules/block/bibliography.ts +0 -359
  395. package/src/parser/rules/block/block-list.ts +0 -689
  396. package/src/parser/rules/block/blockquote.ts +0 -238
  397. package/src/parser/rules/block/code.ts +0 -187
  398. package/src/parser/rules/block/collapsible.ts +0 -337
  399. package/src/parser/rules/block/definition-list.ts +0 -270
  400. package/src/parser/rules/block/div.ts +0 -400
  401. package/src/parser/rules/block/embed-block.ts +0 -153
  402. package/src/parser/rules/block/footnoteblock.ts +0 -200
  403. package/src/parser/rules/block/heading.ts +0 -142
  404. package/src/parser/rules/block/html.ts +0 -222
  405. package/src/parser/rules/block/iframe.ts +0 -239
  406. package/src/parser/rules/block/include.ts +0 -179
  407. package/src/parser/rules/block/list.ts +0 -244
  408. package/src/parser/rules/block/math.ts +0 -183
  409. package/src/parser/rules/block/module/include/resolve.ts +0 -556
  410. package/src/parser/rules/block/module/listpages/types.ts +0 -513
  411. package/src/parser/rules/block/module/walk.ts +0 -380
  412. package/src/parser/rules/block/module.ts +0 -164
  413. package/src/parser/rules/block/orphan-li.ts +0 -177
  414. package/src/parser/rules/block/paragraph.ts +0 -157
  415. package/src/parser/rules/block/table-block.ts +0 -726
  416. package/src/parser/rules/block/table.ts +0 -441
  417. package/src/parser/rules/block/tabview.ts +0 -331
  418. package/src/parser/rules/block/toc.ts +0 -129
  419. package/src/parser/rules/inline/anchor-name.ts +0 -154
  420. package/src/parser/rules/inline/anchor.ts +0 -327
  421. package/src/parser/rules/inline/bibcite.ts +0 -153
  422. package/src/parser/rules/inline/color.ts +0 -140
  423. package/src/parser/rules/inline/equation-ref.ts +0 -115
  424. package/src/parser/rules/inline/expr.ts +0 -526
  425. package/src/parser/rules/inline/footnote.ts +0 -223
  426. package/src/parser/rules/inline/image.ts +0 -328
  427. package/src/parser/rules/inline/line-break.ts +0 -326
  428. package/src/parser/rules/inline/link-triple.ts +0 -267
  429. package/src/parser/rules/inline/math-inline.ts +0 -126
  430. package/src/parser/rules/inline/raw.ts +0 -262
  431. package/src/parser/rules/inline/size.ts +0 -244
  432. package/src/parser/rules/inline/span.ts +0 -424
  433. package/src/parser/rules/inline/user.ts +0 -147
@@ -0,0 +1,103 @@
1
+ import { isWhitespace } from "./chars";
2
+ import { matchDirectiveKind } from "./kind";
3
+ import type { DirectiveKind, DirectiveMatch } from "./types";
4
+
5
+ /**
6
+ * Try to parse a single `[[#kind ...]]` directive starting at `start`.
7
+ * Returns `null` when the directive is malformed (no closing `]]`) or
8
+ * when its body contains another `[[#kind]]` of the same family
9
+ * (so the caller should keep descending). The substrings are returned
10
+ * raw; callers decide how to evaluate them.
11
+ */
12
+ export function tryParseInnermostDirective(
13
+ source: string,
14
+ start: number,
15
+ kind: DirectiveKind,
16
+ ): DirectiveMatch | null {
17
+ const keywordLen = kind === "ifexpr" ? 6 : kind === "expr" ? 4 : 2;
18
+ let pos = start + 3 + keywordLen;
19
+ while (pos < source.length && isWhitespace(source[pos])) pos++;
20
+
21
+ const headStart = pos;
22
+ let blockDepth = 0;
23
+ let linkDepth = 0;
24
+ const pipes: number[] = [];
25
+ let closeStart = -1;
26
+
27
+ while (pos < source.length) {
28
+ if (matchDirectiveKind(source, pos) !== null) {
29
+ return null;
30
+ }
31
+ if (source.startsWith("[[[", pos)) {
32
+ linkDepth++;
33
+ pos += 3;
34
+ continue;
35
+ }
36
+ if (linkDepth > 0 && source.startsWith("]]]", pos)) {
37
+ linkDepth--;
38
+ pos += 3;
39
+ continue;
40
+ }
41
+ if (linkDepth > 0) {
42
+ pos++;
43
+ continue;
44
+ }
45
+ if (source.startsWith("[[", pos)) {
46
+ blockDepth++;
47
+ pos += 2;
48
+ continue;
49
+ }
50
+ if (source.startsWith("]]", pos)) {
51
+ if (blockDepth === 0) {
52
+ closeStart = pos;
53
+ break;
54
+ }
55
+ blockDepth--;
56
+ pos += 2;
57
+ continue;
58
+ }
59
+ if (source[pos] === "|" && blockDepth === 0 && linkDepth === 0) {
60
+ pipes.push(pos);
61
+ }
62
+ pos++;
63
+ }
64
+
65
+ if (closeStart === -1) return null;
66
+ const hasPipe = pipes.length > 0;
67
+ if (!hasPipe && (kind === "if" || kind === "ifexpr")) return null;
68
+
69
+ return buildDirectiveMatch(source, headStart, closeStart, pipes, hasPipe);
70
+ }
71
+
72
+ function buildDirectiveMatch(
73
+ source: string,
74
+ headStart: number,
75
+ closeStart: number,
76
+ pipes: number[],
77
+ hasPipe: boolean,
78
+ ): DirectiveMatch {
79
+ if (!hasPipe) {
80
+ return {
81
+ end: closeStart + 2,
82
+ head: source.slice(headStart, closeStart).trim(),
83
+ thenText: "",
84
+ elseText: "",
85
+ hasPipe,
86
+ };
87
+ }
88
+
89
+ const head = source.slice(headStart, pipes[0]!).trim();
90
+ const thenText =
91
+ pipes.length >= 2
92
+ ? source.slice(pipes[0]! + 1, pipes[1]!).trim()
93
+ : source.slice(pipes[0]! + 1, closeStart).trim();
94
+ const elseText = pipes.length >= 2 ? source.slice(pipes[1]! + 1, closeStart).trim() : "";
95
+
96
+ return {
97
+ end: closeStart + 2,
98
+ head,
99
+ thenText,
100
+ elseText,
101
+ hasPipe,
102
+ };
103
+ }
@@ -0,0 +1,34 @@
1
+ import { computeBracketDepths } from "../utils";
2
+ import { evaluateDirective } from "./evaluate";
3
+ import { matchDirectiveKind } from "./kind";
4
+ import { tryParseInnermostDirective } from "./parse";
5
+
6
+ /**
7
+ * Walk `source`, locate every innermost `[[#if]]` / `[[#ifexpr]]` /
8
+ * `[[#expr]]` directive that sits inside an unclosed `[[`, and replace
9
+ * it with its evaluated string. Returns the source unchanged when no
10
+ * replacements were made.
11
+ */
12
+ export function expandInnermost(source: string): string {
13
+ const depths = computeBracketDepths(source);
14
+ let result = "";
15
+ let i = 0;
16
+ let replaced = false;
17
+
18
+ while (i < source.length) {
19
+ const kind = matchDirectiveKind(source, i);
20
+ if (kind !== null && depths[i]! > 0) {
21
+ const match = tryParseInnermostDirective(source, i, kind);
22
+ if (match !== null) {
23
+ result += evaluateDirective(kind, match);
24
+ i = match.end;
25
+ replaced = true;
26
+ continue;
27
+ }
28
+ }
29
+ result += source[i];
30
+ i++;
31
+ }
32
+
33
+ return replaced ? result : source;
34
+ }
@@ -0,0 +1,14 @@
1
+ export type DirectiveKind = "if" | "ifexpr" | "expr";
2
+
3
+ export interface DirectiveMatch {
4
+ /** Position just past the closing `]]`. */
5
+ end: number;
6
+ /** Raw condition / expression. */
7
+ head: string;
8
+ /** Raw `then` branch. */
9
+ thenText: string;
10
+ /** Raw `else` branch. */
11
+ elseText: string;
12
+ /** Whether the directive supplied a `|` at all. */
13
+ hasPipe: boolean;
14
+ }
@@ -32,6 +32,61 @@ const LOW_DOUBLE_QUOTE = "\u201e"; // „
32
32
  /** Unicode horizontal ellipsis (U+2026) */
33
33
  const ELLIPSIS = "\u2026"; // …
34
34
 
35
+ function replaceExactEllipsisPattern(text: string, pattern: string): string {
36
+ let searchFrom = 0;
37
+ let result = "";
38
+ let lastCopied = 0;
39
+ const patternLength = pattern.length;
40
+
41
+ while (searchFrom < text.length) {
42
+ const index = text.indexOf(pattern, searchFrom);
43
+ if (index === -1) break;
44
+
45
+ const prev = index > 0 ? text[index - 1] : "";
46
+ const next = index + patternLength < text.length ? text[index + patternLength] : "";
47
+ if (prev !== "." && next !== ".") {
48
+ result += text.slice(lastCopied, index) + ELLIPSIS;
49
+ lastCopied = index + patternLength;
50
+ searchFrom = lastCopied;
51
+ } else {
52
+ searchFrom = index + 1;
53
+ }
54
+ }
55
+
56
+ return lastCopied === 0 ? text : result + text.slice(lastCopied);
57
+ }
58
+
59
+ function replaceDelimitedTypography(
60
+ text: string,
61
+ opener: string,
62
+ closer: string,
63
+ leftQuote: string,
64
+ rightQuote: string,
65
+ ): string {
66
+ let searchFrom = 0;
67
+ let result = "";
68
+ let lastCopied = 0;
69
+
70
+ while (searchFrom < text.length) {
71
+ const openIndex = text.indexOf(opener, searchFrom);
72
+ if (openIndex === -1) break;
73
+
74
+ const contentStart = openIndex + opener.length;
75
+ const closeIndex = text.indexOf(closer, contentStart);
76
+ if (closeIndex === -1) break;
77
+
78
+ result += text.slice(lastCopied, openIndex);
79
+ result += leftQuote;
80
+ result += text.slice(contentStart, closeIndex);
81
+ result += rightQuote;
82
+
83
+ lastCopied = closeIndex + closer.length;
84
+ searchFrom = lastCopied;
85
+ }
86
+
87
+ return lastCopied === 0 ? text : result + text.slice(lastCopied);
88
+ }
89
+
35
90
  /**
36
91
  * Apply all typographic substitutions to the given text.
37
92
  *
@@ -47,21 +102,31 @@ export function substitute(text: string): string {
47
102
  let result = text;
48
103
 
49
104
  // Double quotes: ``...'' -> "..."
50
- result = result.replace(/``(.*?)''/g, `${LEFT_DOUBLE_QUOTE}$1${RIGHT_DOUBLE_QUOTE}`);
105
+ if (result.includes("``") && result.includes("''")) {
106
+ result = replaceDelimitedTypography(result, "``", "''", LEFT_DOUBLE_QUOTE, RIGHT_DOUBLE_QUOTE);
107
+ }
51
108
 
52
109
  // Low double quotes: ,,..'' -> „..."
53
- result = result.replace(/,,(.*?)''/g, `${LOW_DOUBLE_QUOTE}$1${RIGHT_DOUBLE_QUOTE}`);
110
+ if (result.includes(",,") && result.includes("''")) {
111
+ result = replaceDelimitedTypography(result, ",,", "''", LOW_DOUBLE_QUOTE, RIGHT_DOUBLE_QUOTE);
112
+ }
54
113
 
55
114
  // Single quotes: `...' -> '...'
56
- result = result.replace(/`(.*?)'/g, `${LEFT_SINGLE_QUOTE}$1${RIGHT_SINGLE_QUOTE}`);
115
+ if (result.includes("`") && result.includes("'")) {
116
+ result = replaceDelimitedTypography(result, "`", "'", LEFT_SINGLE_QUOTE, RIGHT_SINGLE_QUOTE);
117
+ }
57
118
 
58
119
  // Ellipsis: ... or . . . -> …
59
120
  // Must be exactly 3 dots, not preceded or followed by more dots
60
121
  // Handle continuous dots: ...
61
- result = result.replace(/(?<![.])\.\.\.(?![.])/g, ELLIPSIS);
122
+ if (result.includes("...")) {
123
+ result = replaceExactEllipsisPattern(result, "...");
124
+ }
62
125
 
63
126
  // Handle spaced dots: . . .
64
- result = result.replace(/(?<![.])\. \. \.(?![.])/g, ELLIPSIS);
127
+ if (result.includes(". . .")) {
128
+ result = replaceExactEllipsisPattern(result, ". . .");
129
+ }
65
130
 
66
131
  return result;
67
132
  }
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Compute the unmatched-`[[` depth at each character offset of `source`.
3
+ * Mirrors the lexer's `blockOpenerDepth`. Returns `Int32Array` of length
4
+ * `source.length + 1`; `depths[k]` is the depth immediately before the
5
+ * character at offset `k` is consumed.
6
+ */
7
+ export function computeBracketDepths(source: string): Int32Array {
8
+ const n = source.length;
9
+ const depths = new Int32Array(n + 1);
10
+ let depth = 0;
11
+ let i = 0;
12
+
13
+ while (i < n) {
14
+ depths[i] = depth;
15
+ const c = source.charCodeAt(i);
16
+ const c1 = i + 1 < n ? source.charCodeAt(i + 1) : -1;
17
+ const c2 = i + 2 < n ? source.charCodeAt(i + 2) : -1;
18
+
19
+ if (depth > 0 && c === 0x22 /* " */ && precededByEqualsAttr(source, i)) {
20
+ const end = findQuoteEnd(source, i + 1);
21
+ for (let k = i; k <= end; k++) depths[k] = depth;
22
+ i = end + 1;
23
+ continue;
24
+ }
25
+
26
+ if (c === 0x5b /* [ */ && c1 === 0x5b && c2 === 0x5b) {
27
+ const end = findTripleLinkEnd(source, i + 3);
28
+ for (let k = i; k <= end; k++) depths[k] = depth;
29
+ i = end + 1;
30
+ continue;
31
+ }
32
+
33
+ if (c === 0x5b && c1 === 0x5b) {
34
+ depth++;
35
+ depths[i + 1] = depth;
36
+ i += 2;
37
+ continue;
38
+ }
39
+
40
+ if (c === 0x5d /* ] */ && c1 === 0x5d) {
41
+ depth = Math.max(0, depth - 1);
42
+ depths[i + 1] = depth;
43
+ i += 2;
44
+ continue;
45
+ }
46
+
47
+ if (c === 0x0a /* \n */) {
48
+ depth = 0;
49
+ }
50
+
51
+ i++;
52
+ }
53
+
54
+ depths[n] = depth;
55
+ return depths;
56
+ }
57
+
58
+ function precededByEqualsAttr(source: string, pos: number): boolean {
59
+ let j = pos - 1;
60
+ while (j >= 0) {
61
+ const ch = source.charCodeAt(j);
62
+ if (ch === 0x20 /* space */ || ch === 0x09 /* tab */) {
63
+ j--;
64
+ continue;
65
+ }
66
+ return ch === 0x3d; /* = */
67
+ }
68
+ return false;
69
+ }
70
+
71
+ function findQuoteEnd(source: string, from: number): number {
72
+ for (let i = from; i < source.length; i++) {
73
+ const ch = source.charCodeAt(i);
74
+ if (ch === 0x22 /* " */ || ch === 0x0a /* \n */) return i;
75
+ }
76
+ return source.length - 1;
77
+ }
78
+
79
+ function findTripleLinkEnd(source: string, from: number): number {
80
+ for (let i = from; i < source.length; i++) {
81
+ if (
82
+ source.charCodeAt(i) === 0x5d &&
83
+ i + 2 < source.length &&
84
+ source.charCodeAt(i + 1) === 0x5d &&
85
+ source.charCodeAt(i + 2) === 0x5d
86
+ ) {
87
+ return i + 2;
88
+ }
89
+ if (
90
+ source.charCodeAt(i) === 0x0a &&
91
+ i + 1 < source.length &&
92
+ source.charCodeAt(i + 1) === 0x0a
93
+ ) {
94
+ return i;
95
+ }
96
+ }
97
+ return source.length - 1;
98
+ }
@@ -0,0 +1,13 @@
1
+ /**
2
+ *
3
+ * Shared helper facade for source-level preprocess passes.
4
+ *
5
+ * Raw-region masking and bracket-depth tracking are implemented in separate
6
+ * files so preprocess passes can depend on a small stable import surface.
7
+ *
8
+ * @module
9
+ */
10
+
11
+ export type { Sentinels } from "./raw-regions";
12
+ export { makeUniqueSentinels, maskRawRegions, restorePlaceholders } from "./raw-regions";
13
+ export { computeBracketDepths } from "./bracket-depths";
@@ -0,0 +1,153 @@
1
+ const BASE_PLACEHOLDER_OPEN = "\uE000";
2
+ const BASE_PLACEHOLDER_CLOSE = "\uE001";
3
+
4
+ const RAW_BLOCK_OPEN_PATTERN = /\[\[\s*(code|html)\b[^\]]*\]\]/iy;
5
+
6
+ /** Unique sentinel characters used to wrap raw-region placeholders. */
7
+ export interface Sentinels {
8
+ open: string;
9
+ close: string;
10
+ }
11
+
12
+ /**
13
+ * Choose sentinel strings that are guaranteed not to appear in `source`.
14
+ * The placeholders we splice into the masked source have the form
15
+ * `<open><digits><close>`, so the restore pass must not confuse them
16
+ * with content. Extends both sentinel characters until neither appears.
17
+ */
18
+ export function makeUniqueSentinels(source: string): Sentinels {
19
+ let open = BASE_PLACEHOLDER_OPEN;
20
+ let close = BASE_PLACEHOLDER_CLOSE;
21
+ while (source.includes(open) || source.includes(close)) {
22
+ open += BASE_PLACEHOLDER_OPEN;
23
+ close += BASE_PLACEHOLDER_CLOSE;
24
+ }
25
+ return { open, close };
26
+ }
27
+
28
+ /**
29
+ * Walk `source` and replace each raw region with a placeholder token so
30
+ * downstream passes do not transform their bodies.
31
+ */
32
+ export function maskRawRegions(
33
+ source: string,
34
+ sentinels: Sentinels,
35
+ ): { masked: string; placeholders: string[] } {
36
+ const placeholders: string[] = [];
37
+ let masked = "";
38
+ let i = 0;
39
+
40
+ while (i < source.length) {
41
+ const rawBlock = tryMaskRawBlock(source, i, placeholders, sentinels);
42
+ if (rawBlock) {
43
+ masked += rawBlock.placeholder;
44
+ i = rawBlock.end;
45
+ continue;
46
+ }
47
+
48
+ const rawInline = tryMaskRawInline(source, i, placeholders, sentinels);
49
+ if (rawInline) {
50
+ masked += rawInline.placeholder;
51
+ i = rawInline.end;
52
+ continue;
53
+ }
54
+
55
+ masked += source[i];
56
+ i++;
57
+ }
58
+
59
+ return { masked, placeholders };
60
+ }
61
+
62
+ /** Inverse of {@link maskRawRegions}: replace placeholders with originals. */
63
+ export function restorePlaceholders(
64
+ source: string,
65
+ placeholders: string[],
66
+ sentinels: Sentinels,
67
+ ): string {
68
+ const pattern = new RegExp(
69
+ `${escapeRegex(sentinels.open)}(\\d+)${escapeRegex(sentinels.close)}`,
70
+ "g",
71
+ );
72
+ return source.replace(pattern, (_, idx: string) => placeholders[Number(idx)] ?? "");
73
+ }
74
+
75
+ function tryMaskRawBlock(
76
+ source: string,
77
+ pos: number,
78
+ placeholders: string[],
79
+ sentinels: Sentinels,
80
+ ): { placeholder: string; end: number } | null {
81
+ if (source[pos] !== "[" || source[pos + 1] !== "[") return null;
82
+
83
+ RAW_BLOCK_OPEN_PATTERN.lastIndex = pos;
84
+ const openMatch = RAW_BLOCK_OPEN_PATTERN.exec(source);
85
+ if (!openMatch) return null;
86
+
87
+ const name = openMatch[1]!.toLowerCase();
88
+ const openLen = openMatch[0].length;
89
+ const closePattern = new RegExp(`\\[\\[\\/\\s*${name}\\s*\\]\\]`, "ig");
90
+ closePattern.lastIndex = pos + openLen;
91
+ const closeMatch = closePattern.exec(source);
92
+
93
+ if (closeMatch) {
94
+ const end = closeMatch.index + closeMatch[0].length;
95
+ return {
96
+ placeholder: pushPlaceholder(placeholders, source.slice(pos, end), sentinels),
97
+ end,
98
+ };
99
+ }
100
+
101
+ if (name !== "code") return null;
102
+
103
+ return {
104
+ placeholder: pushPlaceholder(placeholders, source.slice(pos), sentinels),
105
+ end: source.length,
106
+ };
107
+ }
108
+
109
+ function tryMaskRawInline(
110
+ source: string,
111
+ pos: number,
112
+ placeholders: string[],
113
+ sentinels: Sentinels,
114
+ ): { placeholder: string; end: number } | null {
115
+ if (source[pos] === "@" && source[pos + 1] === "<") {
116
+ return tryMaskSingleLineRaw(source, pos, 2, ">@", placeholders, sentinels);
117
+ }
118
+
119
+ if (source[pos] === "@" && source[pos + 1] === "@") {
120
+ return tryMaskSingleLineRaw(source, pos, 2, "@@", placeholders, sentinels);
121
+ }
122
+
123
+ return null;
124
+ }
125
+
126
+ function tryMaskSingleLineRaw(
127
+ source: string,
128
+ pos: number,
129
+ openerLength: number,
130
+ close: string,
131
+ placeholders: string[],
132
+ sentinels: Sentinels,
133
+ ): { placeholder: string; end: number } | null {
134
+ const closePos = source.indexOf(close, pos + openerLength);
135
+ const newline = source.indexOf("\n", pos + openerLength);
136
+ if (closePos === -1 || (newline !== -1 && newline < closePos)) return null;
137
+
138
+ const end = closePos + close.length;
139
+ return {
140
+ placeholder: pushPlaceholder(placeholders, source.slice(pos, end), sentinels),
141
+ end,
142
+ };
143
+ }
144
+
145
+ function pushPlaceholder(placeholders: string[], text: string, sentinels: Sentinels): string {
146
+ const idx = placeholders.length;
147
+ placeholders.push(text);
148
+ return `${sentinels.open}${idx}${sentinels.close}`;
149
+ }
150
+
151
+ function escapeRegex(str: string): string {
152
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
153
+ }
@@ -0,0 +1,39 @@
1
+ export function needsWhitespaceSubstitution(text: string): boolean {
2
+ if (text.length === 0) return false;
3
+
4
+ if (text[0] === "\n" || text[0] === " " || text[text.length - 1] === "\n") {
5
+ return true;
6
+ }
7
+
8
+ return (
9
+ text.indexOf("\r") !== -1 ||
10
+ text.indexOf("\t") !== -1 ||
11
+ text.indexOf("\0") !== -1 ||
12
+ text.indexOf("\u00a0") !== -1 ||
13
+ text.indexOf("\u2007") !== -1 ||
14
+ text.indexOf("\\\n") !== -1 ||
15
+ text.indexOf("\n\n\n") !== -1 ||
16
+ text.indexOf("\n ") !== -1
17
+ );
18
+ }
19
+
20
+ export function mayContainWhitespaceOnlyLine(text: string): boolean {
21
+ const first = text[0];
22
+ if (
23
+ first === " " ||
24
+ first === "\t" ||
25
+ first === "\n" ||
26
+ first === "\u00a0" ||
27
+ first === "\u2007"
28
+ ) {
29
+ return true;
30
+ }
31
+
32
+ return (
33
+ text.indexOf("\n ") !== -1 ||
34
+ text.indexOf("\n\n") !== -1 ||
35
+ text.indexOf("\n\t") !== -1 ||
36
+ text.indexOf("\n\u00a0") !== -1 ||
37
+ text.indexOf("\n\u2007") !== -1
38
+ );
39
+ }
@@ -0,0 +1,79 @@
1
+ /**
2
+ *
3
+ * Whitespace normalization preprocessing for Wikidot markup.
4
+ *
5
+ * This module ensures the lexer and parser receive input with consistent
6
+ * whitespace conventions. It handles platform differences (DOS/Mac newlines),
7
+ * normalizes exotic whitespace characters that users may paste from external
8
+ * sources, and applies Wikidot-specific behaviors like backslash line continuation.
9
+ *
10
+ * @module
11
+ */
12
+
13
+ import { needsWhitespaceSubstitution, mayContainWhitespaceOnlyLine } from "./detection";
14
+ import { replaceLeadingSpaces } from "./leading-spaces";
15
+ import { CONCAT_LINES, DOS_MAC_NEWLINES, NULL_CHARS, TABS, WHITESPACE_ONLY_LINE } from "./patterns";
16
+
17
+ /**
18
+ * Apply all whitespace normalization substitutions to the given text.
19
+ *
20
+ * Substitutions are applied in a specific order that avoids interference
21
+ * between steps (e.g., DOS newlines must be normalized before backslash
22
+ * continuation can be detected).
23
+ */
24
+ export function substitute(text: string): string {
25
+ if (!needsWhitespaceSubstitution(text)) {
26
+ return text;
27
+ }
28
+
29
+ let result = text;
30
+
31
+ if (result.indexOf("\r") !== -1) {
32
+ result = result.replace(DOS_MAC_NEWLINES, "\n");
33
+ }
34
+
35
+ if (result.indexOf("\u00a0") !== -1 || result.indexOf("\u2007") !== -1) {
36
+ result = replaceLeadingSpaces(result);
37
+ }
38
+
39
+ if (mayContainWhitespaceOnlyLine(result)) {
40
+ result = result.replace(WHITESPACE_ONLY_LINE, "");
41
+ }
42
+
43
+ if (result.indexOf("\\\n") !== -1) {
44
+ result = result.replace(CONCAT_LINES, String.fromCharCode(0xe000));
45
+ }
46
+
47
+ if (result.indexOf("\t") !== -1) {
48
+ result = result.replace(TABS, " ");
49
+ }
50
+
51
+ if (result.indexOf("\0") !== -1) {
52
+ result = result.replace(NULL_CHARS, " ");
53
+ }
54
+
55
+ if (result[0] === "\n") {
56
+ result = trimLeadingNewlines(result);
57
+ }
58
+ if (result[result.length - 1] === "\n") {
59
+ result = trimTrailingNewlines(result);
60
+ }
61
+
62
+ return result;
63
+ }
64
+
65
+ function trimLeadingNewlines(text: string): string {
66
+ let index = 0;
67
+ while (text[index] === "\n") {
68
+ index++;
69
+ }
70
+ return index === 0 ? text : text.slice(index);
71
+ }
72
+
73
+ function trimTrailingNewlines(text: string): string {
74
+ let end = text.length;
75
+ while (end > 0 && text[end - 1] === "\n") {
76
+ end--;
77
+ }
78
+ return end === text.length ? text : text.slice(0, end);
79
+ }
@@ -0,0 +1,11 @@
1
+ import { LEADING_NONSTANDARD_WHITESPACE } from "./patterns";
2
+
3
+ /**
4
+ * Replace non-standard whitespace characters at the start of each line
5
+ * with the same number of regular ASCII spaces.
6
+ */
7
+ export function replaceLeadingSpaces(text: string): string {
8
+ return text.replace(LEADING_NONSTANDARD_WHITESPACE, (match) => {
9
+ return " ".repeat(match.length);
10
+ });
11
+ }
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Matches non-standard whitespace characters (non-breaking space U+00A0,
3
+ * figure space U+2007) at the start of lines.
4
+ */
5
+ export const LEADING_NONSTANDARD_WHITESPACE: RegExp = /^[\u00a0\u2007]+/gm;
6
+
7
+ /** Matches lines containing only whitespace (collapsed to empty lines). */
8
+ export const WHITESPACE_ONLY_LINE: RegExp = /^\s+$/gm;
9
+
10
+ /** Matches DOS (`\r\n`) and legacy Mac (`\r`) line endings. */
11
+ export const DOS_MAC_NEWLINES: RegExp = /\r\n?/g;
12
+
13
+ /**
14
+ * Matches a backslash immediately followed by a newline.
15
+ * In Wikidot, `\` at end of line acts as an explicit line break (`<br />`).
16
+ */
17
+ export const CONCAT_LINES: RegExp = /\\\n/g;
18
+
19
+ /** Matches tab characters (expanded to four spaces). */
20
+ export const TABS: RegExp = /\t/g;
21
+
22
+ /** Matches null (NUL) characters (replaced with spaces). */
23
+ export const NULL_CHARS: RegExp = new RegExp(String.fromCharCode(0), "g");