@wdprlib/parser 3.2.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (433) hide show
  1. package/dist/index.cjs +10451 -8402
  2. package/dist/index.d.cts +313 -337
  3. package/dist/index.d.ts +313 -337
  4. package/dist/index.js +10438 -8389
  5. package/package.json +1 -1
  6. package/src/index.ts +7 -0
  7. package/src/lexer/anchor.ts +48 -0
  8. package/src/lexer/index.ts +3 -2
  9. package/src/lexer/lexer.ts +73 -559
  10. package/src/lexer/options.ts +19 -0
  11. package/src/lexer/punctuation.ts +70 -0
  12. package/src/lexer/quoted-string.ts +16 -0
  13. package/src/lexer/runs.ts +85 -0
  14. package/src/lexer/spacing-actions.ts +24 -0
  15. package/src/lexer/state.ts +103 -0
  16. package/src/lexer/syntax-actions.ts +80 -0
  17. package/src/lexer/text-actions.ts +41 -0
  18. package/src/lexer/token-actions.ts +136 -0
  19. package/src/lexer/token-factory.ts +62 -0
  20. package/src/lexer/tokenize.ts +18 -0
  21. package/src/parser/constants.ts +2 -0
  22. package/src/parser/depth/index.ts +111 -0
  23. package/src/parser/depth/stack.ts +82 -0
  24. package/src/parser/parse/block.ts +42 -0
  25. package/src/parser/parse/context.ts +26 -0
  26. package/src/parser/parse/footnotes.ts +25 -0
  27. package/src/parser/parse/index.ts +42 -0
  28. package/src/parser/parse/options.ts +34 -0
  29. package/src/parser/parse/parser.ts +79 -0
  30. package/src/parser/parse/plain-non-ascii.ts +129 -0
  31. package/src/parser/parse/result.ts +57 -0
  32. package/src/parser/parse/source.ts +11 -0
  33. package/src/parser/postprocess/divAdjacentParagraph.ts +1 -1
  34. package/src/parser/postprocess/spanStrip/clean-element.ts +168 -0
  35. package/src/parser/postprocess/spanStrip/cleanup.ts +25 -0
  36. package/src/parser/postprocess/spanStrip/empty-spans.ts +36 -0
  37. package/src/parser/postprocess/spanStrip/escaped.ts +78 -0
  38. package/src/parser/postprocess/spanStrip/factory.ts +23 -0
  39. package/src/parser/postprocess/spanStrip/index.ts +8 -0
  40. package/src/parser/postprocess/spanStrip/merge.ts +117 -0
  41. package/src/parser/postprocess/spanStrip/predicates.ts +59 -0
  42. package/src/parser/postprocess/spanStrip/split.ts +67 -0
  43. package/src/parser/preprocess/expr/chars.ts +15 -0
  44. package/src/parser/preprocess/expr/evaluate.ts +22 -0
  45. package/src/parser/preprocess/expr/index.ts +45 -0
  46. package/src/parser/preprocess/expr/kind.ts +19 -0
  47. package/src/parser/preprocess/expr/parse.ts +103 -0
  48. package/src/parser/preprocess/expr/scan.ts +34 -0
  49. package/src/parser/preprocess/expr/types.ts +14 -0
  50. package/src/parser/preprocess/typography.ts +70 -5
  51. package/src/parser/preprocess/utils/bracket-depths.ts +98 -0
  52. package/src/parser/preprocess/utils/index.ts +13 -0
  53. package/src/parser/preprocess/utils/raw-regions.ts +153 -0
  54. package/src/parser/preprocess/whitespace/detection.ts +39 -0
  55. package/src/parser/preprocess/whitespace/index.ts +79 -0
  56. package/src/parser/preprocess/whitespace/leading-spaces.ts +11 -0
  57. package/src/parser/preprocess/whitespace/patterns.ts +23 -0
  58. package/src/parser/rules/block/align/body.ts +46 -0
  59. package/src/parser/rules/block/align/element.ts +13 -0
  60. package/src/parser/rules/block/align/index.ts +90 -0
  61. package/src/parser/rules/block/align/syntax.ts +113 -0
  62. package/src/parser/rules/block/bibliography/body.ts +81 -0
  63. package/src/parser/rules/block/bibliography/entries.ts +49 -0
  64. package/src/parser/rules/block/bibliography/entry-content.ts +73 -0
  65. package/src/parser/rules/block/bibliography/entry-key.ts +83 -0
  66. package/src/parser/rules/block/bibliography/index.ts +90 -0
  67. package/src/parser/rules/block/bibliography/open.ts +53 -0
  68. package/src/parser/rules/block/block-list/bare-content.ts +105 -0
  69. package/src/parser/rules/block/block-list/bare-paragraph.ts +60 -0
  70. package/src/parser/rules/block/block-list/index.ts +51 -0
  71. package/src/parser/rules/block/block-list/item-content.ts +132 -0
  72. package/src/parser/rules/block/block-list/li-content.ts +107 -0
  73. package/src/parser/rules/block/block-list/li-item.ts +77 -0
  74. package/src/parser/rules/block/block-list/list-block.ts +100 -0
  75. package/src/parser/rules/block/block-list/open.ts +51 -0
  76. package/src/parser/rules/block/block-list/tags.ts +50 -0
  77. package/src/parser/rules/block/blockquote/build.ts +62 -0
  78. package/src/parser/rules/block/blockquote/index.ts +80 -0
  79. package/src/parser/rules/block/blockquote/line.ts +79 -0
  80. package/src/parser/rules/block/blockquote/lines.ts +39 -0
  81. package/src/parser/rules/block/{center.ts → center/index.ts} +7 -22
  82. package/src/parser/rules/block/center/open.ts +27 -0
  83. package/src/parser/rules/block/{clear-float.ts → clear-float/index.ts} +6 -30
  84. package/src/parser/rules/block/clear-float/syntax.ts +43 -0
  85. package/src/parser/rules/block/code/attributes.ts +30 -0
  86. package/src/parser/rules/block/code/content.ts +57 -0
  87. package/src/parser/rules/block/code/index.ts +100 -0
  88. package/src/parser/rules/block/collapsible/attributes.ts +95 -0
  89. package/src/parser/rules/block/collapsible/body.ts +69 -0
  90. package/src/parser/rules/block/collapsible/index.ts +117 -0
  91. package/src/parser/rules/block/collapsible/open.ts +51 -0
  92. package/src/parser/rules/block/collapsible/orphans.ts +31 -0
  93. package/src/parser/rules/block/collapsible/tags.ts +17 -0
  94. package/src/parser/rules/block/comment/consume.ts +37 -0
  95. package/src/parser/rules/block/{comment.ts → comment/index.ts} +12 -38
  96. package/src/parser/rules/block/{content-separator.ts → content-separator/index.ts} +5 -35
  97. package/src/parser/rules/block/content-separator/syntax.ts +33 -0
  98. package/src/parser/rules/block/definition-list/collect.ts +40 -0
  99. package/src/parser/rules/block/definition-list/index.ts +63 -0
  100. package/src/parser/rules/block/definition-list/item-key.ts +95 -0
  101. package/src/parser/rules/block/definition-list/item-value.ts +56 -0
  102. package/src/parser/rules/block/definition-list/items.ts +54 -0
  103. package/src/parser/rules/block/div/body.ts +41 -0
  104. package/src/parser/rules/block/div/close.ts +41 -0
  105. package/src/parser/rules/block/div/failed.ts +117 -0
  106. package/src/parser/rules/block/div/index.ts +112 -0
  107. package/src/parser/rules/block/div/nesting.ts +37 -0
  108. package/src/parser/rules/block/div/open.ts +59 -0
  109. package/src/parser/rules/block/div/paragraph-strip.ts +44 -0
  110. package/src/parser/rules/block/embed-block/content.ts +53 -0
  111. package/src/parser/rules/block/embed-block/index.ts +91 -0
  112. package/src/parser/rules/block/embed-block/open.ts +52 -0
  113. package/src/parser/rules/block/embed-block/tags.ts +5 -0
  114. package/src/parser/rules/block/footnoteblock/attributes.ts +73 -0
  115. package/src/parser/rules/block/footnoteblock/index.ts +82 -0
  116. package/src/parser/rules/block/footnoteblock/open.ts +53 -0
  117. package/src/parser/rules/block/heading/index.ts +87 -0
  118. package/src/parser/rules/block/heading/open.ts +50 -0
  119. package/src/parser/rules/block/heading/toc-text.ts +26 -0
  120. package/src/parser/rules/block/{horizontal-rule.ts → horizontal-rule/index.ts} +4 -21
  121. package/src/parser/rules/block/horizontal-rule/syntax.ts +21 -0
  122. package/src/parser/rules/block/html/body.ts +114 -0
  123. package/src/parser/rules/block/html/diagnostics.ts +11 -0
  124. package/src/parser/rules/block/html/index.ts +95 -0
  125. package/src/parser/rules/block/html/open.ts +36 -0
  126. package/src/parser/rules/block/iframe/attributes.ts +106 -0
  127. package/src/parser/rules/block/iframe/index.ts +73 -0
  128. package/src/parser/rules/block/iframe/open.ts +58 -0
  129. package/src/parser/rules/block/iframe/source.ts +24 -0
  130. package/src/parser/rules/block/iframe/url.ts +38 -0
  131. package/src/parser/rules/block/iftags/body.ts +48 -0
  132. package/src/parser/rules/block/iftags/condition.ts +24 -0
  133. package/src/parser/rules/block/{iftags.ts → iftags/index.ts} +16 -58
  134. package/src/parser/rules/block/include/arguments.ts +48 -0
  135. package/src/parser/rules/block/include/index.ts +75 -0
  136. package/src/parser/rules/block/include/location.ts +24 -0
  137. package/src/parser/rules/block/include/variables.ts +37 -0
  138. package/src/parser/rules/block/list/index.ts +73 -0
  139. package/src/parser/rules/block/list/line.ts +77 -0
  140. package/src/parser/rules/block/list/native.ts +89 -0
  141. package/src/parser/rules/block/math/content.ts +54 -0
  142. package/src/parser/rules/block/math/index.ts +106 -0
  143. package/src/parser/rules/block/math/name.ts +35 -0
  144. package/src/parser/rules/block/module/body.ts +92 -0
  145. package/src/parser/rules/block/module/element.ts +33 -0
  146. package/src/parser/rules/block/module/include/directive.ts +91 -0
  147. package/src/parser/rules/block/module/include/index.ts +11 -2
  148. package/src/parser/rules/block/module/include/references.ts +42 -0
  149. package/src/parser/rules/block/module/include/resolve/cache.ts +44 -0
  150. package/src/parser/rules/block/module/include/resolve/index.ts +106 -0
  151. package/src/parser/rules/block/module/include/resolve/iterate.ts +202 -0
  152. package/src/parser/rules/block/module/include/resolve/replace.ts +31 -0
  153. package/src/parser/rules/block/module/include/resolve/types.ts +105 -0
  154. package/src/parser/rules/block/module/include/scanner.ts +121 -0
  155. package/src/parser/rules/block/module/index.ts +14 -2
  156. package/src/parser/rules/block/module/listpages/compiler.ts +12 -392
  157. package/src/parser/rules/block/module/listpages/extract.ts +25 -359
  158. package/src/parser/rules/block/module/listpages/extraction/listpages.ts +42 -0
  159. package/src/parser/rules/block/module/listpages/extraction/listusers.ts +30 -0
  160. package/src/parser/rules/block/module/listpages/extraction/query.ts +51 -0
  161. package/src/parser/rules/block/module/listpages/extraction/result.ts +18 -0
  162. package/src/parser/rules/block/module/listpages/extraction/template.ts +96 -0
  163. package/src/parser/rules/block/module/listpages/extraction/variables.ts +58 -0
  164. package/src/parser/rules/block/module/listpages/normalization/date-selector.ts +53 -0
  165. package/src/parser/rules/block/module/listpages/normalization/numeric-selector.ts +32 -0
  166. package/src/parser/rules/block/module/listpages/normalization/order-parent.ts +82 -0
  167. package/src/parser/rules/block/module/listpages/normalization/selectors.ts +2 -0
  168. package/src/parser/rules/block/module/listpages/normalization/tags-category.ts +86 -0
  169. package/src/parser/rules/block/module/listpages/normalize.ts +8 -324
  170. package/src/parser/rules/block/module/listpages/resolution/items.ts +43 -0
  171. package/src/parser/rules/block/module/listpages/resolution/wrapper.ts +42 -0
  172. package/src/parser/rules/block/module/listpages/resolve.ts +5 -75
  173. package/src/parser/rules/block/module/listpages/template/format/content.ts +41 -0
  174. package/src/parser/rules/block/module/listpages/template/format/date.ts +116 -0
  175. package/src/parser/rules/block/module/listpages/template/format/index.ts +4 -0
  176. package/src/parser/rules/block/module/listpages/template/format/tags.ts +7 -0
  177. package/src/parser/rules/block/module/listpages/template/format/user.ts +9 -0
  178. package/src/parser/rules/block/module/listpages/template/getters/index.ts +36 -0
  179. package/src/parser/rules/block/module/listpages/template/getters/parameterized.ts +60 -0
  180. package/src/parser/rules/block/module/listpages/template/getters/simple.ts +65 -0
  181. package/src/parser/rules/block/module/listpages/template/getters/types.ts +3 -0
  182. package/src/parser/rules/block/module/listpages/template/syntax.ts +97 -0
  183. package/src/parser/rules/block/module/listpages/types/data-fetcher.ts +15 -0
  184. package/src/parser/rules/block/module/listpages/types/data-requirements.ts +52 -0
  185. package/src/parser/rules/block/module/listpages/types/external-data.ts +77 -0
  186. package/src/parser/rules/block/module/listpages/types/index.ts +17 -0
  187. package/src/parser/rules/block/module/listpages/types/normalized-query.ts +120 -0
  188. package/src/parser/rules/block/module/listpages/types/query.ts +67 -0
  189. package/src/parser/rules/block/module/listpages/types/template.ts +17 -0
  190. package/src/parser/rules/block/module/listpages/types/variables.ts +69 -0
  191. package/src/parser/rules/block/module/listpages/url-resolution/fields.ts +48 -0
  192. package/src/parser/rules/block/module/listpages/url-resolution/params.ts +19 -0
  193. package/src/parser/rules/block/module/listpages/url-resolution/query.ts +24 -0
  194. package/src/parser/rules/block/module/listpages/url-resolution/resolve.ts +53 -0
  195. package/src/parser/rules/block/module/listpages/url-resolution/value.ts +25 -0
  196. package/src/parser/rules/block/module/listpages/url-resolver.ts +3 -160
  197. package/src/parser/rules/block/module/listusers/compiler.ts +4 -25
  198. package/src/parser/rules/block/module/listusers/extract.ts +4 -9
  199. package/src/parser/rules/block/module/listusers/getters.ts +21 -0
  200. package/src/parser/rules/block/module/listusers/variables.ts +15 -0
  201. package/src/parser/rules/block/module/open.ts +57 -0
  202. package/src/parser/rules/block/module/resolution/contexts.ts +78 -0
  203. package/src/parser/rules/block/module/resolution/data-maps.ts +39 -0
  204. package/src/parser/rules/block/module/resolution/dynamic-modules.ts +93 -0
  205. package/src/parser/rules/block/module/resolution/styles.ts +53 -0
  206. package/src/parser/rules/block/module/resolution/walk-resolve.ts +107 -0
  207. package/src/parser/rules/block/module/resolve.ts +79 -292
  208. package/src/parser/rules/block/module/rule.ts +56 -0
  209. package/src/parser/rules/block/module/types-common.ts +11 -0
  210. package/src/parser/rules/block/module/walk/children.ts +35 -0
  211. package/src/parser/rules/block/module/walk/index.ts +9 -0
  212. package/src/parser/rules/block/module/walk/map/index.ts +2 -0
  213. package/src/parser/rules/block/module/walk/map/stateful-definition-list.ts +25 -0
  214. package/src/parser/rules/block/module/walk/map/stateful-list.ts +40 -0
  215. package/src/parser/rules/block/module/walk/map/stateful-table.ts +23 -0
  216. package/src/parser/rules/block/module/walk/map/stateful-tabs.ts +19 -0
  217. package/src/parser/rules/block/module/walk/map/stateful.ts +71 -0
  218. package/src/parser/rules/block/module/walk/map/stateless-definition-list.ts +12 -0
  219. package/src/parser/rules/block/module/walk/map/stateless-list.ts +29 -0
  220. package/src/parser/rules/block/module/walk/map/stateless-table.ts +11 -0
  221. package/src/parser/rules/block/module/walk/map/stateless-tabs.ts +5 -0
  222. package/src/parser/rules/block/module/walk/map/stateless.ts +51 -0
  223. package/src/parser/rules/block/module/walk/map/types.ts +6 -0
  224. package/src/parser/rules/block/module/walk/traverse.ts +65 -0
  225. package/src/parser/rules/block/orphan-li/content.ts +60 -0
  226. package/src/parser/rules/block/orphan-li/index.ts +75 -0
  227. package/src/parser/rules/block/orphan-li/open.ts +25 -0
  228. package/src/parser/rules/block/orphan-li/tags.ts +40 -0
  229. package/src/parser/rules/block/paragraph/content.ts +12 -0
  230. package/src/parser/rules/block/paragraph/index.ts +60 -0
  231. package/src/parser/rules/block/paragraph/normalize.ts +52 -0
  232. package/src/parser/rules/block/paragraph/span-markers.ts +52 -0
  233. package/src/parser/rules/block/parsing/attributes/index.ts +32 -0
  234. package/src/parser/rules/block/parsing/attributes/names.ts +93 -0
  235. package/src/parser/rules/block/parsing/attributes/scanner.ts +75 -0
  236. package/src/parser/rules/block/parsing/attributes/values.ts +26 -0
  237. package/src/parser/rules/block/parsing/block-item.ts +29 -0
  238. package/src/parser/rules/block/parsing/content.ts +127 -0
  239. package/src/parser/rules/block/parsing/end-condition.ts +51 -0
  240. package/src/parser/rules/block/parsing/inline-content.ts +105 -0
  241. package/src/parser/rules/block/parsing/inline-newline.ts +41 -0
  242. package/src/parser/rules/block/parsing/non-boundary.ts +24 -0
  243. package/src/parser/rules/block/parsing/rule-dispatch.ts +44 -0
  244. package/src/parser/rules/block/table/index.ts +80 -0
  245. package/src/parser/rules/block/table/pipe/cell-start.ts +69 -0
  246. package/src/parser/rules/block/table/pipe/cell.ts +106 -0
  247. package/src/parser/rules/block/table/pipe/index.ts +2 -0
  248. package/src/parser/rules/block/table/pipe/row.ts +88 -0
  249. package/src/parser/rules/block/table/pipe/tokens.ts +14 -0
  250. package/src/parser/rules/block/table/pipe/trim.ts +50 -0
  251. package/src/parser/rules/block/table-block/body.ts +79 -0
  252. package/src/parser/rules/block/table-block/cell-attributes.ts +33 -0
  253. package/src/parser/rules/block/table-block/cell-boundary.ts +99 -0
  254. package/src/parser/rules/block/table-block/cell-content/index.ts +88 -0
  255. package/src/parser/rules/block/table-block/cell-content/segments.ts +134 -0
  256. package/src/parser/rules/block/table-block/cell-newline.ts +47 -0
  257. package/src/parser/rules/block/table-block/cell.ts +64 -0
  258. package/src/parser/rules/block/table-block/index.ts +113 -0
  259. package/src/parser/rules/block/table-block/row-boundary.ts +75 -0
  260. package/src/parser/rules/block/table-block/structure.ts +80 -0
  261. package/src/parser/rules/block/tabview/body.ts +64 -0
  262. package/src/parser/rules/block/tabview/index.ts +90 -0
  263. package/src/parser/rules/block/tabview/open.ts +50 -0
  264. package/src/parser/rules/block/tabview/tab.ts +92 -0
  265. package/src/parser/rules/block/tabview/tags.ts +30 -0
  266. package/src/parser/rules/block/toc/element.ts +11 -0
  267. package/src/parser/rules/block/toc/index.ts +44 -0
  268. package/src/parser/rules/block/toc/open.ts +84 -0
  269. package/src/parser/rules/block/utils.ts +10 -610
  270. package/src/parser/rules/{utils.ts → common/attribute-safety.ts} +3 -49
  271. package/src/parser/rules/common/block-name.ts +33 -0
  272. package/src/parser/rules/common/index.ts +2 -0
  273. package/src/parser/rules/contracts/index.ts +3 -0
  274. package/src/parser/rules/contracts/parse-context.ts +38 -0
  275. package/src/parser/rules/contracts/rule.ts +43 -0
  276. package/src/parser/rules/contracts/scope.ts +31 -0
  277. package/src/parser/rules/inline/anchor/attributes.ts +54 -0
  278. package/src/parser/rules/inline/anchor/child.ts +26 -0
  279. package/src/parser/rules/inline/anchor/close.ts +34 -0
  280. package/src/parser/rules/inline/anchor/content.ts +59 -0
  281. package/src/parser/rules/inline/anchor/index.ts +103 -0
  282. package/src/parser/rules/inline/anchor/newline.ts +26 -0
  283. package/src/parser/rules/inline/anchor/open.ts +47 -0
  284. package/src/parser/rules/inline/anchor/paragraph-strip.ts +14 -0
  285. package/src/parser/rules/inline/anchor/syntax.ts +40 -0
  286. package/src/parser/rules/inline/anchor-name/index.ts +38 -0
  287. package/src/parser/rules/inline/anchor-name/name.ts +39 -0
  288. package/src/parser/rules/inline/anchor-name/syntax.ts +46 -0
  289. package/src/parser/rules/inline/bibcite/element.ts +14 -0
  290. package/src/parser/rules/inline/bibcite/index.ts +34 -0
  291. package/src/parser/rules/inline/bibcite/syntax.ts +64 -0
  292. package/src/parser/rules/inline/bold.ts +2 -39
  293. package/src/parser/rules/inline/color/index.ts +35 -0
  294. package/src/parser/rules/inline/color/syntax.ts +69 -0
  295. package/src/parser/rules/inline/comment/consume.ts +31 -0
  296. package/src/parser/rules/inline/{comment.ts → comment/index.ts} +10 -36
  297. package/src/parser/rules/inline/equation-ref/element.ts +8 -0
  298. package/src/parser/rules/inline/equation-ref/index.ts +34 -0
  299. package/src/parser/rules/inline/equation-ref/syntax.ts +45 -0
  300. package/src/parser/rules/inline/expr/branch.ts +104 -0
  301. package/src/parser/rules/inline/expr/conditional-branch.ts +27 -0
  302. package/src/parser/rules/inline/expr/conditional.ts +80 -0
  303. package/src/parser/rules/inline/expr/depth.ts +25 -0
  304. package/src/parser/rules/inline/expr/elements.ts +39 -0
  305. package/src/parser/rules/inline/expr/index.ts +84 -0
  306. package/src/parser/rules/inline/expr/syntax.ts +45 -0
  307. package/src/parser/rules/inline/footnote/child.ts +22 -0
  308. package/src/parser/rules/inline/footnote/close.ts +33 -0
  309. package/src/parser/rules/inline/footnote/content.ts +54 -0
  310. package/src/parser/rules/inline/footnote/elements.ts +38 -0
  311. package/src/parser/rules/inline/footnote/index.ts +54 -0
  312. package/src/parser/rules/inline/footnote/newline.ts +27 -0
  313. package/src/parser/rules/inline/footnote/open.ts +38 -0
  314. package/src/parser/rules/inline/formatting/container.ts +50 -0
  315. package/src/parser/rules/inline/{guillemet.ts → guillemet/index.ts} +5 -13
  316. package/src/parser/rules/inline/guillemet/text.ts +11 -0
  317. package/src/parser/rules/inline/html/gate.ts +64 -0
  318. package/src/parser/rules/inline/{html.ts → html/index.ts} +9 -60
  319. package/src/parser/rules/inline/html/open.ts +37 -0
  320. package/src/parser/rules/inline/image/attributes.ts +22 -0
  321. package/src/parser/rules/inline/image/body.ts +36 -0
  322. package/src/parser/rules/inline/image/index.ts +89 -0
  323. package/src/parser/rules/inline/image/open.ts +56 -0
  324. package/src/parser/rules/inline/image/source.ts +62 -0
  325. package/src/parser/rules/inline/image/syntax.ts +76 -0
  326. package/src/parser/rules/inline/italic.ts +2 -30
  327. package/src/parser/rules/inline/line-break/backslash.ts +58 -0
  328. package/src/parser/rules/inline/line-break/elements.ts +9 -0
  329. package/src/parser/rules/inline/line-break/index.ts +3 -0
  330. package/src/parser/rules/inline/line-break/newline.ts +82 -0
  331. package/src/parser/rules/inline/line-break/underscore.ts +45 -0
  332. package/src/parser/rules/inline/link-anchor.ts +6 -81
  333. package/src/parser/rules/inline/link-bracket/anchor.ts +3 -0
  334. package/src/parser/rules/inline/link-bracket/direct-url.ts +5 -0
  335. package/src/parser/rules/inline/link-bracket/parsed.ts +81 -0
  336. package/src/parser/rules/inline/link-bracket/parts.ts +64 -0
  337. package/src/parser/rules/inline/link-bracket/prefix.ts +15 -0
  338. package/src/parser/rules/inline/link-single.ts +7 -98
  339. package/src/parser/rules/inline/link-star.ts +7 -69
  340. package/src/parser/rules/inline/link-triple/fallback.ts +10 -0
  341. package/src/parser/rules/inline/link-triple/index.ts +62 -0
  342. package/src/parser/rules/inline/link-triple/interwiki.ts +11 -0
  343. package/src/parser/rules/inline/link-triple/label.ts +35 -0
  344. package/src/parser/rules/inline/link-triple/syntax.ts +72 -0
  345. package/src/parser/rules/inline/link-triple/target.ts +36 -0
  346. package/src/parser/rules/inline/math-inline/index.ts +40 -0
  347. package/src/parser/rules/inline/math-inline/syntax.ts +55 -0
  348. package/src/parser/rules/inline/monospace.ts +2 -30
  349. package/src/parser/rules/inline/parsing/block-boundary.ts +42 -0
  350. package/src/parser/rules/inline/parsing/block-start-predicates.ts +117 -0
  351. package/src/parser/rules/inline/parsing/collect.ts +23 -0
  352. package/src/parser/rules/inline/parsing/inline-content.ts +115 -0
  353. package/src/parser/rules/inline/parsing/paragraph-boundary.ts +47 -0
  354. package/src/parser/rules/inline/parsing/plain-text.ts +69 -0
  355. package/src/parser/rules/inline/parsing/preserved-line-break.ts +11 -0
  356. package/src/parser/rules/inline/parsing/rules.ts +34 -0
  357. package/src/parser/rules/inline/parsing/simple-token.ts +26 -0
  358. package/src/parser/rules/inline/raw/angle.ts +40 -0
  359. package/src/parser/rules/inline/raw/double-at.ts +78 -0
  360. package/src/parser/rules/inline/raw/index.ts +26 -0
  361. package/src/parser/rules/inline/raw/result.ts +26 -0
  362. package/src/parser/rules/inline/size/content.ts +65 -0
  363. package/src/parser/rules/inline/size/index.ts +55 -0
  364. package/src/parser/rules/inline/size/open.ts +43 -0
  365. package/src/parser/rules/inline/size/value.ts +45 -0
  366. package/src/parser/rules/inline/span/content.ts +97 -0
  367. package/src/parser/rules/inline/span/elements.ts +108 -0
  368. package/src/parser/rules/inline/span/index.ts +79 -0
  369. package/src/parser/rules/inline/span/newline.ts +50 -0
  370. package/src/parser/rules/inline/span/syntax.ts +70 -0
  371. package/src/parser/rules/inline/{strikethrough.ts → strikethrough/index.ts} +5 -60
  372. package/src/parser/rules/inline/strikethrough/parse.ts +14 -0
  373. package/src/parser/rules/inline/strikethrough/syntax.ts +24 -0
  374. package/src/parser/rules/inline/subscript.ts +2 -39
  375. package/src/parser/rules/inline/superscript.ts +4 -39
  376. package/src/parser/rules/inline/text/element.ts +5 -0
  377. package/src/parser/rules/inline/{text.ts → text/index.ts} +5 -4
  378. package/src/parser/rules/inline/underline/child.ts +26 -0
  379. package/src/parser/rules/inline/underline/content.ts +29 -0
  380. package/src/parser/rules/inline/{underline.ts → underline/index.ts} +6 -49
  381. package/src/parser/rules/inline/user/element.ts +11 -0
  382. package/src/parser/rules/inline/user/index.ts +34 -0
  383. package/src/parser/rules/inline/user/syntax.ts +67 -0
  384. package/src/parser/rules/inline/utils.ts +4 -344
  385. package/src/parser/rules/tokens.ts +106 -0
  386. package/src/parser/rules/types.ts +9 -252
  387. package/src/parser/depth.ts +0 -251
  388. package/src/parser/parse.ts +0 -315
  389. package/src/parser/postprocess/spanStrip.ts +0 -697
  390. package/src/parser/preprocess/expr.ts +0 -265
  391. package/src/parser/preprocess/utils.ts +0 -250
  392. package/src/parser/preprocess/whitespace.ts +0 -111
  393. package/src/parser/rules/block/align.ts +0 -282
  394. package/src/parser/rules/block/bibliography.ts +0 -359
  395. package/src/parser/rules/block/block-list.ts +0 -689
  396. package/src/parser/rules/block/blockquote.ts +0 -238
  397. package/src/parser/rules/block/code.ts +0 -187
  398. package/src/parser/rules/block/collapsible.ts +0 -337
  399. package/src/parser/rules/block/definition-list.ts +0 -270
  400. package/src/parser/rules/block/div.ts +0 -400
  401. package/src/parser/rules/block/embed-block.ts +0 -153
  402. package/src/parser/rules/block/footnoteblock.ts +0 -200
  403. package/src/parser/rules/block/heading.ts +0 -142
  404. package/src/parser/rules/block/html.ts +0 -222
  405. package/src/parser/rules/block/iframe.ts +0 -239
  406. package/src/parser/rules/block/include.ts +0 -179
  407. package/src/parser/rules/block/list.ts +0 -244
  408. package/src/parser/rules/block/math.ts +0 -183
  409. package/src/parser/rules/block/module/include/resolve.ts +0 -556
  410. package/src/parser/rules/block/module/listpages/types.ts +0 -513
  411. package/src/parser/rules/block/module/walk.ts +0 -380
  412. package/src/parser/rules/block/module.ts +0 -164
  413. package/src/parser/rules/block/orphan-li.ts +0 -177
  414. package/src/parser/rules/block/paragraph.ts +0 -157
  415. package/src/parser/rules/block/table-block.ts +0 -726
  416. package/src/parser/rules/block/table.ts +0 -441
  417. package/src/parser/rules/block/tabview.ts +0 -331
  418. package/src/parser/rules/block/toc.ts +0 -129
  419. package/src/parser/rules/inline/anchor-name.ts +0 -154
  420. package/src/parser/rules/inline/anchor.ts +0 -327
  421. package/src/parser/rules/inline/bibcite.ts +0 -153
  422. package/src/parser/rules/inline/color.ts +0 -140
  423. package/src/parser/rules/inline/equation-ref.ts +0 -115
  424. package/src/parser/rules/inline/expr.ts +0 -526
  425. package/src/parser/rules/inline/footnote.ts +0 -223
  426. package/src/parser/rules/inline/image.ts +0 -328
  427. package/src/parser/rules/inline/line-break.ts +0 -326
  428. package/src/parser/rules/inline/link-triple.ts +0 -267
  429. package/src/parser/rules/inline/math-inline.ts +0 -126
  430. package/src/parser/rules/inline/raw.ts +0 -262
  431. package/src/parser/rules/inline/size.ts +0 -244
  432. package/src/parser/rules/inline/span.ts +0 -424
  433. package/src/parser/rules/inline/user.ts +0 -147
@@ -1,265 +0,0 @@
1
- /**
2
- *
3
- * Text-level expansion of `[[#if ...]]`, `[[#ifexpr ...]]`, and
4
- * `[[#expr ...]]` directives that sit *inside* another block's opener.
5
- *
6
- * The inline rules in `rules/inline/expr.ts` parse these forms as regular
7
- * inline elements, but that only works when the directive appears in
8
- * parseable inline text. When one is embedded inside a block opener's
9
- * attribute string, e.g.
10
- *
11
- * ```wikitext
12
- * [[div class="x [[#if 1 | a | b ]]"]]
13
- * [[li class="[[#if 1 | folded | unfolded ]] [[#ifexpr 1>0 | hot | cold ]]"]]
14
- * [[div col="[[#expr 1+1]]"]]
15
- * ```
16
- *
17
- * the lexer cannot recover a well-formed opener from the input. The
18
- * embedded directive has to collapse to a plain string before the parser
19
- * sees the outer tag.
20
- *
21
- * This pass only resolves directives whose `[[#` sits inside an unclosed
22
- * `[[` (depth > 0). Top-level directives are left untouched so the inline
23
- * parser / AST renderer keeps its full evaluator + element support.
24
- *
25
- * Truthiness rules match the inline `ifRule` / `ifExprRule`: an empty
26
- * string, `"0"`, `"false"`, `"null"` (case-insensitive) are falsy.
27
- *
28
- * @module
29
- */
30
-
31
- import { evaluateExpression, formatExprValue, isTruthy } from "@wdprlib/ast";
32
- import {
33
- computeBracketDepths,
34
- makeUniqueSentinels,
35
- maskRawRegions,
36
- restorePlaceholders,
37
- } from "./utils";
38
-
39
- /**
40
- * Resolve every `[[#if]]` / `[[#ifexpr]]` / `[[#expr]]` that sits inside
41
- * another block's opener (depth > 0). Top-level directives are left for
42
- * the inline parser. Innermost-first reduction lets an outer directive
43
- * re-process the flattened body on the next pass. Unmatched / malformed
44
- * directives are left untouched.
45
- */
46
- export function preprocessExpr(source: string): string {
47
- if (!source.includes("[[#")) return source;
48
-
49
- const sentinels = makeUniqueSentinels(source);
50
- const { masked, placeholders } = maskRawRegions(source, sentinels);
51
- const reduced = reduceExpr(masked);
52
- return restorePlaceholders(reduced, placeholders, sentinels);
53
- }
54
-
55
- /**
56
- * Backwards-compatible alias for the older `preprocessIf` name (used by
57
- * external callers that target the previous, `[[#if]]`-only behaviour).
58
- * Both names point at the same implementation, which now also resolves
59
- * `[[#ifexpr]]` and `[[#expr]]` in opener context.
60
- */
61
- export const preprocessIf: (source: string) => string = preprocessExpr;
62
-
63
- function reduceExpr(source: string): string {
64
- let current = source;
65
- const maxIterations = source.length + 1;
66
- for (let i = 0; i < maxIterations; i++) {
67
- const next = expandInnermost(current);
68
- if (next === current) return current;
69
- current = next;
70
- }
71
- return current;
72
- }
73
-
74
- /**
75
- * Walk `source`, locate every innermost `[[#if]]` / `[[#ifexpr]]` /
76
- * `[[#expr]]` directive that sits inside an unclosed `[[`, and replace
77
- * it with its evaluated string. Returns the source unchanged when no
78
- * replacements were made.
79
- */
80
- function expandInnermost(source: string): string {
81
- const depths = computeBracketDepths(source);
82
- let result = "";
83
- let i = 0;
84
- let replaced = false;
85
-
86
- while (i < source.length) {
87
- const kind = matchDirectiveKind(source, i);
88
- if (kind !== null && depths[i]! > 0) {
89
- const match = tryParseInnermostDirective(source, i, kind);
90
- if (match !== null) {
91
- result += evaluateDirective(kind, match);
92
- i = match.end;
93
- replaced = true;
94
- continue;
95
- }
96
- }
97
- result += source[i];
98
- i++;
99
- }
100
-
101
- return replaced ? result : source;
102
- }
103
-
104
- type DirectiveKind = "if" | "ifexpr" | "expr";
105
-
106
- /** Return the kind of `[[#xxx` directive at `i`, or null if none matches. */
107
- function matchDirectiveKind(source: string, i: number): DirectiveKind | null {
108
- if (!source.startsWith("[[#", i)) return null;
109
- // Order matters: `ifexpr` must be checked before `if` because the
110
- // shorter `if` prefix would otherwise consume `ifexpr` openings.
111
- if (source.startsWith("ifexpr", i + 3) && !isIdentChar(source[i + 9])) {
112
- return "ifexpr";
113
- }
114
- if (source.startsWith("if", i + 3) && !isIdentChar(source[i + 5])) {
115
- return "if";
116
- }
117
- if (source.startsWith("expr", i + 3) && !isIdentChar(source[i + 7])) {
118
- return "expr";
119
- }
120
- return null;
121
- }
122
-
123
- interface DirectiveMatch {
124
- /** Position just past the closing `]]`. */
125
- end: number;
126
- /** Raw condition / expression (everything between the keyword and the first top-level `|` or `]]`). */
127
- head: string;
128
- /** Raw `then` branch (empty when no `|` appeared). */
129
- thenText: string;
130
- /** Raw `else` branch (empty when only one `|` appeared). */
131
- elseText: string;
132
- /** Whether the directive supplied a `|` at all. */
133
- hasPipe: boolean;
134
- }
135
-
136
- /**
137
- * Try to parse a single `[[#kind ...]]` directive starting at `start`.
138
- * Returns `null` when the directive is malformed (no closing `]]`) or
139
- * when its body contains another `[[#kind]]` of the same family
140
- * (so the caller should keep descending). The substrings are returned
141
- * raw; callers decide how to evaluate them.
142
- */
143
- function tryParseInnermostDirective(
144
- source: string,
145
- start: number,
146
- kind: DirectiveKind,
147
- ): DirectiveMatch | null {
148
- const keywordLen = kind === "ifexpr" ? 6 : kind === "expr" ? 4 : 2;
149
- // start + 3 ("[[#") + keywordLen → first char after the keyword.
150
- let pos = start + 3 + keywordLen;
151
- // The inline rule does not require a whitespace separator here — it
152
- // accepts e.g. `[[#expr(1+1)]]` and `[[#ifexpr(1)|yes|no]]`. Skip any
153
- // optional leading whitespace and let the body scan handle the rest.
154
- while (pos < source.length && isWhitespace(source[pos])) pos++;
155
-
156
- const headStart = pos;
157
- let blockDepth = 0;
158
- let linkDepth = 0;
159
- const pipes: number[] = [];
160
- let closeStart = -1;
161
-
162
- while (pos < source.length) {
163
- // Reject any nested directive of the same family so we resolve
164
- // innermost-first.
165
- if (matchDirectiveKind(source, pos) !== null) {
166
- return null;
167
- }
168
- if (source.startsWith("[[[", pos)) {
169
- linkDepth++;
170
- pos += 3;
171
- continue;
172
- }
173
- if (linkDepth > 0 && source.startsWith("]]]", pos)) {
174
- linkDepth--;
175
- pos += 3;
176
- continue;
177
- }
178
- if (linkDepth > 0) {
179
- pos++;
180
- continue;
181
- }
182
- if (source.startsWith("[[", pos)) {
183
- blockDepth++;
184
- pos += 2;
185
- continue;
186
- }
187
- if (source.startsWith("]]", pos)) {
188
- if (blockDepth === 0) {
189
- closeStart = pos;
190
- break;
191
- }
192
- blockDepth--;
193
- pos += 2;
194
- continue;
195
- }
196
- if (source[pos] === "|" && blockDepth === 0 && linkDepth === 0) {
197
- pipes.push(pos);
198
- }
199
- pos++;
200
- }
201
-
202
- if (closeStart === -1) return null;
203
- const hasPipe = pipes.length > 0;
204
- // `[[#if]]` / `[[#ifexpr]]` require a `then` branch separated by `|`.
205
- // A directive without a pipe is malformed; leave it for the inline
206
- // parser to report rather than silently dropping it.
207
- if (!hasPipe && (kind === "if" || kind === "ifexpr")) return null;
208
-
209
- let head: string;
210
- let thenText = "";
211
- let elseText = "";
212
-
213
- if (!hasPipe) {
214
- head = source.slice(headStart, closeStart).trim();
215
- } else {
216
- head = source.slice(headStart, pipes[0]!).trim();
217
- if (pipes.length >= 2) {
218
- thenText = source.slice(pipes[0]! + 1, pipes[1]!).trim();
219
- elseText = source.slice(pipes[1]! + 1, closeStart).trim();
220
- } else {
221
- thenText = source.slice(pipes[0]! + 1, closeStart).trim();
222
- }
223
- }
224
-
225
- return {
226
- end: closeStart + 2,
227
- head,
228
- thenText,
229
- elseText,
230
- hasPipe,
231
- };
232
- }
233
-
234
- /** Evaluate a parsed directive into its replacement string. */
235
- function evaluateDirective(kind: DirectiveKind, m: DirectiveMatch): string {
236
- if (kind === "expr") {
237
- const result = evaluateExpression(m.head);
238
- if (result.success) return formatExprValue(result.value);
239
- // The inline renderer emits nothing for an empty `[[#expr ]]`; mirror
240
- // that so an opener-embedded empty expr collapses to an empty
241
- // attribute value rather than the literal "ERROR" placeholder.
242
- if (result.error === "empty expression") return "";
243
- return "ERROR";
244
- }
245
- if (kind === "if") {
246
- if (!m.hasPipe) return "";
247
- return isTruthy(m.head) ? m.thenText : m.elseText;
248
- }
249
- // ifexpr — the inline renderer treats every error (including empty
250
- // expression) as a "run-time error" string, so we keep the placeholder
251
- // here to avoid silently swallowing a malformed conditional.
252
- if (!m.hasPipe) return "";
253
- const result = evaluateExpression(m.head);
254
- if (!result.success) return "ERROR";
255
- return result.value !== 0 && !Number.isNaN(result.value) ? m.thenText : m.elseText;
256
- }
257
-
258
- function isWhitespace(ch: string | undefined): boolean {
259
- return ch === " " || ch === "\t" || ch === "\n" || ch === "\r";
260
- }
261
-
262
- function isIdentChar(ch: string | undefined): boolean {
263
- if (!ch) return false;
264
- return /[a-z0-9_-]/i.test(ch);
265
- }
@@ -1,250 +0,0 @@
1
- /**
2
- *
3
- * Shared helpers for text-level preprocess passes that run before
4
- * tokenization (e.g. `[[iftags]]` collapse, opener-embedded `[[#if]]`
5
- * collapse).
6
- *
7
- * Each pass needs to:
8
- * - mask raw regions (`[[code]]`, `[[html]]`, `@@..@@`, `@<..>@`) so a
9
- * pattern they enclose is not transformed
10
- * - know the bracket-opener depth at every offset so it can distinguish
11
- * directives at the top level from ones nested inside another block's
12
- * opener attribute string
13
- *
14
- * The depth tracking mirrors the lexer's `blockOpenerDepth`:
15
- * - `[[` increments, `]]` decrements (clamped at 0)
16
- * - `[[[ ... ]]]` triple links do not affect block depth
17
- * - quoted attribute values (`= "..."`) are skipped to the next `"` /
18
- * newline, matching the lexer's `QUOTED_STRING` recognition
19
- * - newlines reset depth to 0 (block openers are single-line constructs)
20
- *
21
- * @module
22
- */
23
-
24
- const BASE_PLACEHOLDER_OPEN = "\uE000";
25
- const BASE_PLACEHOLDER_CLOSE = "\uE001";
26
-
27
- const RAW_BLOCK_OPEN_PATTERN = /\[\[\s*(code|html)\b[^\]]*\]\]/iy;
28
-
29
- /** Unique sentinel characters used to wrap raw-region placeholders. */
30
- export interface Sentinels {
31
- open: string;
32
- close: string;
33
- }
34
-
35
- /**
36
- * Choose sentinel strings that are guaranteed not to appear in `source`.
37
- * The placeholders we splice into the masked source have the form
38
- * `<open><digits><close>`, so the restore pass must not confuse them
39
- * with content. Extends both sentinel characters until neither appears.
40
- */
41
- export function makeUniqueSentinels(source: string): Sentinels {
42
- let open = BASE_PLACEHOLDER_OPEN;
43
- let close = BASE_PLACEHOLDER_CLOSE;
44
- while (source.includes(open) || source.includes(close)) {
45
- open += BASE_PLACEHOLDER_OPEN;
46
- close += BASE_PLACEHOLDER_CLOSE;
47
- }
48
- return { open, close };
49
- }
50
-
51
- /**
52
- * Walk `source` and replace each raw region with a placeholder token so
53
- * downstream passes (regex / scan) do not transform their bodies. The
54
- * original substrings are kept in `placeholders` for {@link restorePlaceholders}
55
- * to splice back at the end.
56
- *
57
- * Raw regions handled:
58
- * - `[[code ...]]...[[/code]]` — consumes to EOF when the closing tag
59
- * is missing (mirroring the block parser's behaviour for unclosed
60
- * code blocks).
61
- * - `[[html ...]]...[[/html]]` — only masked when the closing tag is
62
- * present; an unclosed `[[html]]` is left in place so a later directive
63
- * is not incorrectly hidden behind the mask.
64
- * - `@<...>@` (single-line balanced raw — `>@` must be on the same line).
65
- * - `@@...@@` (single-line inline raw — must not span newlines).
66
- *
67
- * Genuinely unclosed `@@` / `@<` are left in place (the parser treats
68
- * them as literal text anyway). Comments `[!-- ... --]` are intentionally
69
- * not masked: Wikidot's legacy Text_Wiki evaluates `[[iftags]]` before
70
- * comments, so masking here would invert that order.
71
- */
72
- export function maskRawRegions(
73
- source: string,
74
- sentinels: Sentinels,
75
- ): { masked: string; placeholders: string[] } {
76
- const placeholders: string[] = [];
77
- let masked = "";
78
- let i = 0;
79
-
80
- while (i < source.length) {
81
- if (source[i] === "[" && source[i + 1] === "[") {
82
- RAW_BLOCK_OPEN_PATTERN.lastIndex = i;
83
- const openMatch = RAW_BLOCK_OPEN_PATTERN.exec(source);
84
- if (openMatch) {
85
- const name = openMatch[1]!.toLowerCase();
86
- const openLen = openMatch[0].length;
87
- const closePattern = new RegExp(`\\[\\[\\/\\s*${name}\\s*\\]\\]`, "ig");
88
- closePattern.lastIndex = i + openLen;
89
- const closeMatch = closePattern.exec(source);
90
- if (closeMatch) {
91
- const regionEnd = closeMatch.index + closeMatch[0].length;
92
- masked += pushPlaceholder(placeholders, source.slice(i, regionEnd), sentinels);
93
- i = regionEnd;
94
- continue;
95
- }
96
- if (name === "code") {
97
- masked += pushPlaceholder(placeholders, source.slice(i), sentinels);
98
- i = source.length;
99
- continue;
100
- }
101
- }
102
- }
103
-
104
- if (source[i] === "@" && source[i + 1] === "<") {
105
- const close = source.indexOf(">@", i + 2);
106
- const newline = source.indexOf("\n", i + 2);
107
- if (close !== -1 && (newline === -1 || close < newline)) {
108
- const regionEnd = close + 2;
109
- masked += pushPlaceholder(placeholders, source.slice(i, regionEnd), sentinels);
110
- i = regionEnd;
111
- continue;
112
- }
113
- }
114
-
115
- if (source[i] === "@" && source[i + 1] === "@") {
116
- const close = source.indexOf("@@", i + 2);
117
- const newline = source.indexOf("\n", i + 2);
118
- if (close !== -1 && (newline === -1 || close < newline)) {
119
- const regionEnd = close + 2;
120
- masked += pushPlaceholder(placeholders, source.slice(i, regionEnd), sentinels);
121
- i = regionEnd;
122
- continue;
123
- }
124
- }
125
-
126
- masked += source[i];
127
- i++;
128
- }
129
-
130
- return { masked, placeholders };
131
- }
132
-
133
- function pushPlaceholder(placeholders: string[], text: string, sentinels: Sentinels): string {
134
- const idx = placeholders.length;
135
- placeholders.push(text);
136
- return `${sentinels.open}${idx}${sentinels.close}`;
137
- }
138
-
139
- function escapeRegex(str: string): string {
140
- return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
141
- }
142
-
143
- /** Inverse of {@link maskRawRegions}: replace placeholders with originals. */
144
- export function restorePlaceholders(
145
- source: string,
146
- placeholders: string[],
147
- sentinels: Sentinels,
148
- ): string {
149
- const pattern = new RegExp(
150
- `${escapeRegex(sentinels.open)}(\\d+)${escapeRegex(sentinels.close)}`,
151
- "g",
152
- );
153
- return source.replace(pattern, (_, idx: string) => placeholders[Number(idx)] ?? "");
154
- }
155
-
156
- /**
157
- * Compute the unmatched-`[[` depth at each character offset of `source`.
158
- * Mirrors the lexer's `blockOpenerDepth`. Returns `Int32Array` of length
159
- * `source.length + 1`; `depths[k]` is the depth immediately before the
160
- * character at offset `k` is consumed.
161
- */
162
- export function computeBracketDepths(source: string): Int32Array {
163
- const n = source.length;
164
- const depths = new Int32Array(n + 1);
165
- let depth = 0;
166
- let i = 0;
167
- while (i < n) {
168
- depths[i] = depth;
169
- const c = source.charCodeAt(i);
170
- const c1 = i + 1 < n ? source.charCodeAt(i + 1) : -1;
171
- const c2 = i + 2 < n ? source.charCodeAt(i + 2) : -1;
172
-
173
- if (depth > 0 && c === 0x22 /* " */ && precededByEqualsAttr(source, i)) {
174
- const end = findQuoteEnd(source, i + 1);
175
- for (let k = i; k <= end; k++) depths[k] = depth;
176
- i = end + 1;
177
- continue;
178
- }
179
-
180
- if (c === 0x5b /* [ */ && c1 === 0x5b && c2 === 0x5b) {
181
- const end = findTripleLinkEnd(source, i + 3);
182
- for (let k = i; k <= end; k++) depths[k] = depth;
183
- i = end + 1;
184
- continue;
185
- }
186
-
187
- if (c === 0x5b && c1 === 0x5b) {
188
- depth++;
189
- depths[i + 1] = depth;
190
- i += 2;
191
- continue;
192
- }
193
-
194
- if (c === 0x5d /* ] */ && c1 === 0x5d) {
195
- depth = Math.max(0, depth - 1);
196
- depths[i + 1] = depth;
197
- i += 2;
198
- continue;
199
- }
200
-
201
- if (c === 0x0a /* \n */) {
202
- // Block openers are single-line; reset depth at line boundaries so
203
- // an unterminated `[[xxx` does not keep subsequent directives
204
- // inside its (imaginary) opener context.
205
- depth = 0;
206
- }
207
-
208
- i++;
209
- }
210
- depths[n] = depth;
211
- return depths;
212
- }
213
-
214
- function precededByEqualsAttr(s: string, i: number): boolean {
215
- let j = i - 1;
216
- while (j >= 0) {
217
- const ch = s.charCodeAt(j);
218
- if (ch === 0x20 /* space */ || ch === 0x09 /* tab */) {
219
- j--;
220
- continue;
221
- }
222
- return ch === 0x3d; /* = */
223
- }
224
- return false;
225
- }
226
-
227
- function findQuoteEnd(s: string, from: number): number {
228
- for (let i = from; i < s.length; i++) {
229
- const ch = s.charCodeAt(i);
230
- if (ch === 0x22 /* " */ || ch === 0x0a /* \n */) return i;
231
- }
232
- return s.length - 1;
233
- }
234
-
235
- function findTripleLinkEnd(s: string, from: number): number {
236
- for (let i = from; i < s.length; i++) {
237
- if (
238
- s.charCodeAt(i) === 0x5d &&
239
- i + 2 < s.length &&
240
- s.charCodeAt(i + 1) === 0x5d &&
241
- s.charCodeAt(i + 2) === 0x5d
242
- ) {
243
- return i + 2;
244
- }
245
- if (s.charCodeAt(i) === 0x0a && i + 1 < s.length && s.charCodeAt(i + 1) === 0x0a) {
246
- return i;
247
- }
248
- }
249
- return s.length - 1;
250
- }
@@ -1,111 +0,0 @@
1
- /**
2
- *
3
- * Whitespace normalization preprocessing for Wikidot markup.
4
- *
5
- * This module ensures the lexer and parser receive input with consistent
6
- * whitespace conventions. It handles platform differences (DOS/Mac newlines),
7
- * normalizes exotic whitespace characters that users may paste from external
8
- * sources, and applies Wikidot-specific behaviors like backslash line continuation.
9
- *
10
- * Substitutions are applied in a deliberate order:
11
- * 1. Newline normalization (DOS `\r\n` and legacy Mac `\r` to Unix `\n`)
12
- * 2. Non-standard leading whitespace replacement (nbsp, figure space to regular space)
13
- * 3. Whitespace-only line stripping (collapse to empty lines)
14
- * 4. Backslash line continuation (`\\\n` to line-break marker U+E000)
15
- * 5. Tab expansion (tab to four spaces)
16
- * 6. Null character replacement (NUL to space)
17
- * 7. Leading/trailing newline removal
18
- *
19
- * @module
20
- */
21
-
22
- /**
23
- * Matches non-standard whitespace characters (non-breaking space U+00A0,
24
- * figure space U+2007) at the start of lines. These are replaced with
25
- * regular ASCII spaces so the parser's indentation logic works correctly.
26
- */
27
- const LEADING_NONSTANDARD_WHITESPACE = /^[\u00a0\u2007]+/gm;
28
-
29
- /** Matches lines containing only whitespace (collapsed to empty lines). */
30
- const WHITESPACE_ONLY_LINE = /^\s+$/gm;
31
-
32
- /** Matches one or more newlines at the very start of the text. */
33
- const LEADING_NEWLINES = /^\n+/;
34
-
35
- /** Matches one or more newlines at the very end of the text. */
36
- const TRAILING_NEWLINES = /\n+$/;
37
-
38
- /** Matches DOS (`\r\n`) and legacy Mac (`\r`) line endings. */
39
- const DOS_MAC_NEWLINES = /\r\n?/g;
40
-
41
- /**
42
- * Matches a backslash immediately followed by a newline.
43
- * In Wikidot, `\` at end of line acts as an explicit line break (`<br />`).
44
- */
45
- const CONCAT_LINES = /\\\n/g;
46
-
47
- /** Matches tab characters (expanded to four spaces). */
48
- const TABS = /\t/g;
49
-
50
- /** Matches null (NUL) characters (replaced with spaces). */
51
- const NULL_CHARS = /\0/g;
52
-
53
- /**
54
- * Replace non-standard whitespace characters at the start of each line
55
- * with the same number of regular ASCII spaces.
56
- *
57
- * This ensures indentation-sensitive constructs (like nested lists) work
58
- * correctly regardless of whether the user typed regular spaces, non-breaking
59
- * spaces, or figure spaces.
60
- *
61
- * @param text - Input text with potentially non-standard leading whitespace
62
- * @returns Text with leading non-standard whitespace replaced by ASCII spaces
63
- */
64
- function replaceLeadingSpaces(text: string): string {
65
- return text.replace(LEADING_NONSTANDARD_WHITESPACE, (match) => {
66
- return " ".repeat(match.length);
67
- });
68
- }
69
-
70
- /**
71
- * Apply all whitespace normalization substitutions to the given text.
72
- *
73
- * Substitutions are applied in a specific order that avoids interference
74
- * between steps (e.g., DOS newlines must be normalized before backslash
75
- * continuation can be detected).
76
- *
77
- * The backslash continuation step converts `\\\n` to the Private Use Area
78
- * character U+E000, which the lexer later recognizes as an explicit line break.
79
- * This approach avoids ambiguity with other uses of the backslash character.
80
- *
81
- * @param text - Raw input text
82
- * @returns Text with normalized whitespace, ready for typography preprocessing
83
- */
84
- export function substitute(text: string): string {
85
- let result = text;
86
-
87
- // Replace DOS and Mac newlines
88
- result = result.replace(DOS_MAC_NEWLINES, "\n");
89
-
90
- // Replace leading non-standard spaces with regular spaces
91
- result = replaceLeadingSpaces(result);
92
-
93
- // Strip lines with only whitespace
94
- result = result.replace(WHITESPACE_ONLY_LINE, "");
95
-
96
- // Backslash at end of line → line break marker (U+E000)
97
- // Wikidot treats \ at end of line as <br />
98
- result = result.replace(CONCAT_LINES, String.fromCharCode(0xe000));
99
-
100
- // Tabs to spaces
101
- result = result.replace(TABS, " ");
102
-
103
- // Null characters to spaces
104
- result = result.replace(NULL_CHARS, " ");
105
-
106
- // Remove leading and trailing newlines
107
- result = result.replace(LEADING_NEWLINES, "");
108
- result = result.replace(TRAILING_NEWLINES, "");
109
-
110
- return result;
111
- }