@wdprlib/parser 3.2.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (433) hide show
  1. package/dist/index.cjs +10451 -8402
  2. package/dist/index.d.cts +313 -337
  3. package/dist/index.d.ts +313 -337
  4. package/dist/index.js +10438 -8389
  5. package/package.json +1 -1
  6. package/src/index.ts +7 -0
  7. package/src/lexer/anchor.ts +48 -0
  8. package/src/lexer/index.ts +3 -2
  9. package/src/lexer/lexer.ts +73 -559
  10. package/src/lexer/options.ts +19 -0
  11. package/src/lexer/punctuation.ts +70 -0
  12. package/src/lexer/quoted-string.ts +16 -0
  13. package/src/lexer/runs.ts +85 -0
  14. package/src/lexer/spacing-actions.ts +24 -0
  15. package/src/lexer/state.ts +103 -0
  16. package/src/lexer/syntax-actions.ts +80 -0
  17. package/src/lexer/text-actions.ts +41 -0
  18. package/src/lexer/token-actions.ts +136 -0
  19. package/src/lexer/token-factory.ts +62 -0
  20. package/src/lexer/tokenize.ts +18 -0
  21. package/src/parser/constants.ts +2 -0
  22. package/src/parser/depth/index.ts +111 -0
  23. package/src/parser/depth/stack.ts +82 -0
  24. package/src/parser/parse/block.ts +42 -0
  25. package/src/parser/parse/context.ts +26 -0
  26. package/src/parser/parse/footnotes.ts +25 -0
  27. package/src/parser/parse/index.ts +42 -0
  28. package/src/parser/parse/options.ts +34 -0
  29. package/src/parser/parse/parser.ts +79 -0
  30. package/src/parser/parse/plain-non-ascii.ts +129 -0
  31. package/src/parser/parse/result.ts +57 -0
  32. package/src/parser/parse/source.ts +11 -0
  33. package/src/parser/postprocess/divAdjacentParagraph.ts +1 -1
  34. package/src/parser/postprocess/spanStrip/clean-element.ts +168 -0
  35. package/src/parser/postprocess/spanStrip/cleanup.ts +25 -0
  36. package/src/parser/postprocess/spanStrip/empty-spans.ts +36 -0
  37. package/src/parser/postprocess/spanStrip/escaped.ts +78 -0
  38. package/src/parser/postprocess/spanStrip/factory.ts +23 -0
  39. package/src/parser/postprocess/spanStrip/index.ts +8 -0
  40. package/src/parser/postprocess/spanStrip/merge.ts +117 -0
  41. package/src/parser/postprocess/spanStrip/predicates.ts +59 -0
  42. package/src/parser/postprocess/spanStrip/split.ts +67 -0
  43. package/src/parser/preprocess/expr/chars.ts +15 -0
  44. package/src/parser/preprocess/expr/evaluate.ts +22 -0
  45. package/src/parser/preprocess/expr/index.ts +45 -0
  46. package/src/parser/preprocess/expr/kind.ts +19 -0
  47. package/src/parser/preprocess/expr/parse.ts +103 -0
  48. package/src/parser/preprocess/expr/scan.ts +34 -0
  49. package/src/parser/preprocess/expr/types.ts +14 -0
  50. package/src/parser/preprocess/typography.ts +70 -5
  51. package/src/parser/preprocess/utils/bracket-depths.ts +98 -0
  52. package/src/parser/preprocess/utils/index.ts +13 -0
  53. package/src/parser/preprocess/utils/raw-regions.ts +153 -0
  54. package/src/parser/preprocess/whitespace/detection.ts +39 -0
  55. package/src/parser/preprocess/whitespace/index.ts +79 -0
  56. package/src/parser/preprocess/whitespace/leading-spaces.ts +11 -0
  57. package/src/parser/preprocess/whitespace/patterns.ts +23 -0
  58. package/src/parser/rules/block/align/body.ts +46 -0
  59. package/src/parser/rules/block/align/element.ts +13 -0
  60. package/src/parser/rules/block/align/index.ts +90 -0
  61. package/src/parser/rules/block/align/syntax.ts +113 -0
  62. package/src/parser/rules/block/bibliography/body.ts +81 -0
  63. package/src/parser/rules/block/bibliography/entries.ts +49 -0
  64. package/src/parser/rules/block/bibliography/entry-content.ts +73 -0
  65. package/src/parser/rules/block/bibliography/entry-key.ts +83 -0
  66. package/src/parser/rules/block/bibliography/index.ts +90 -0
  67. package/src/parser/rules/block/bibliography/open.ts +53 -0
  68. package/src/parser/rules/block/block-list/bare-content.ts +105 -0
  69. package/src/parser/rules/block/block-list/bare-paragraph.ts +60 -0
  70. package/src/parser/rules/block/block-list/index.ts +51 -0
  71. package/src/parser/rules/block/block-list/item-content.ts +132 -0
  72. package/src/parser/rules/block/block-list/li-content.ts +107 -0
  73. package/src/parser/rules/block/block-list/li-item.ts +77 -0
  74. package/src/parser/rules/block/block-list/list-block.ts +100 -0
  75. package/src/parser/rules/block/block-list/open.ts +51 -0
  76. package/src/parser/rules/block/block-list/tags.ts +50 -0
  77. package/src/parser/rules/block/blockquote/build.ts +62 -0
  78. package/src/parser/rules/block/blockquote/index.ts +80 -0
  79. package/src/parser/rules/block/blockquote/line.ts +79 -0
  80. package/src/parser/rules/block/blockquote/lines.ts +39 -0
  81. package/src/parser/rules/block/{center.ts → center/index.ts} +7 -22
  82. package/src/parser/rules/block/center/open.ts +27 -0
  83. package/src/parser/rules/block/{clear-float.ts → clear-float/index.ts} +6 -30
  84. package/src/parser/rules/block/clear-float/syntax.ts +43 -0
  85. package/src/parser/rules/block/code/attributes.ts +30 -0
  86. package/src/parser/rules/block/code/content.ts +57 -0
  87. package/src/parser/rules/block/code/index.ts +100 -0
  88. package/src/parser/rules/block/collapsible/attributes.ts +95 -0
  89. package/src/parser/rules/block/collapsible/body.ts +69 -0
  90. package/src/parser/rules/block/collapsible/index.ts +117 -0
  91. package/src/parser/rules/block/collapsible/open.ts +51 -0
  92. package/src/parser/rules/block/collapsible/orphans.ts +31 -0
  93. package/src/parser/rules/block/collapsible/tags.ts +17 -0
  94. package/src/parser/rules/block/comment/consume.ts +37 -0
  95. package/src/parser/rules/block/{comment.ts → comment/index.ts} +12 -38
  96. package/src/parser/rules/block/{content-separator.ts → content-separator/index.ts} +5 -35
  97. package/src/parser/rules/block/content-separator/syntax.ts +33 -0
  98. package/src/parser/rules/block/definition-list/collect.ts +40 -0
  99. package/src/parser/rules/block/definition-list/index.ts +63 -0
  100. package/src/parser/rules/block/definition-list/item-key.ts +95 -0
  101. package/src/parser/rules/block/definition-list/item-value.ts +56 -0
  102. package/src/parser/rules/block/definition-list/items.ts +54 -0
  103. package/src/parser/rules/block/div/body.ts +41 -0
  104. package/src/parser/rules/block/div/close.ts +41 -0
  105. package/src/parser/rules/block/div/failed.ts +117 -0
  106. package/src/parser/rules/block/div/index.ts +112 -0
  107. package/src/parser/rules/block/div/nesting.ts +37 -0
  108. package/src/parser/rules/block/div/open.ts +59 -0
  109. package/src/parser/rules/block/div/paragraph-strip.ts +44 -0
  110. package/src/parser/rules/block/embed-block/content.ts +53 -0
  111. package/src/parser/rules/block/embed-block/index.ts +91 -0
  112. package/src/parser/rules/block/embed-block/open.ts +52 -0
  113. package/src/parser/rules/block/embed-block/tags.ts +5 -0
  114. package/src/parser/rules/block/footnoteblock/attributes.ts +73 -0
  115. package/src/parser/rules/block/footnoteblock/index.ts +82 -0
  116. package/src/parser/rules/block/footnoteblock/open.ts +53 -0
  117. package/src/parser/rules/block/heading/index.ts +87 -0
  118. package/src/parser/rules/block/heading/open.ts +50 -0
  119. package/src/parser/rules/block/heading/toc-text.ts +26 -0
  120. package/src/parser/rules/block/{horizontal-rule.ts → horizontal-rule/index.ts} +4 -21
  121. package/src/parser/rules/block/horizontal-rule/syntax.ts +21 -0
  122. package/src/parser/rules/block/html/body.ts +114 -0
  123. package/src/parser/rules/block/html/diagnostics.ts +11 -0
  124. package/src/parser/rules/block/html/index.ts +95 -0
  125. package/src/parser/rules/block/html/open.ts +36 -0
  126. package/src/parser/rules/block/iframe/attributes.ts +106 -0
  127. package/src/parser/rules/block/iframe/index.ts +73 -0
  128. package/src/parser/rules/block/iframe/open.ts +58 -0
  129. package/src/parser/rules/block/iframe/source.ts +24 -0
  130. package/src/parser/rules/block/iframe/url.ts +38 -0
  131. package/src/parser/rules/block/iftags/body.ts +48 -0
  132. package/src/parser/rules/block/iftags/condition.ts +24 -0
  133. package/src/parser/rules/block/{iftags.ts → iftags/index.ts} +16 -58
  134. package/src/parser/rules/block/include/arguments.ts +48 -0
  135. package/src/parser/rules/block/include/index.ts +75 -0
  136. package/src/parser/rules/block/include/location.ts +24 -0
  137. package/src/parser/rules/block/include/variables.ts +37 -0
  138. package/src/parser/rules/block/list/index.ts +73 -0
  139. package/src/parser/rules/block/list/line.ts +77 -0
  140. package/src/parser/rules/block/list/native.ts +89 -0
  141. package/src/parser/rules/block/math/content.ts +54 -0
  142. package/src/parser/rules/block/math/index.ts +106 -0
  143. package/src/parser/rules/block/math/name.ts +35 -0
  144. package/src/parser/rules/block/module/body.ts +92 -0
  145. package/src/parser/rules/block/module/element.ts +33 -0
  146. package/src/parser/rules/block/module/include/directive.ts +91 -0
  147. package/src/parser/rules/block/module/include/index.ts +11 -2
  148. package/src/parser/rules/block/module/include/references.ts +42 -0
  149. package/src/parser/rules/block/module/include/resolve/cache.ts +44 -0
  150. package/src/parser/rules/block/module/include/resolve/index.ts +106 -0
  151. package/src/parser/rules/block/module/include/resolve/iterate.ts +202 -0
  152. package/src/parser/rules/block/module/include/resolve/replace.ts +31 -0
  153. package/src/parser/rules/block/module/include/resolve/types.ts +105 -0
  154. package/src/parser/rules/block/module/include/scanner.ts +121 -0
  155. package/src/parser/rules/block/module/index.ts +14 -2
  156. package/src/parser/rules/block/module/listpages/compiler.ts +12 -392
  157. package/src/parser/rules/block/module/listpages/extract.ts +25 -359
  158. package/src/parser/rules/block/module/listpages/extraction/listpages.ts +42 -0
  159. package/src/parser/rules/block/module/listpages/extraction/listusers.ts +30 -0
  160. package/src/parser/rules/block/module/listpages/extraction/query.ts +51 -0
  161. package/src/parser/rules/block/module/listpages/extraction/result.ts +18 -0
  162. package/src/parser/rules/block/module/listpages/extraction/template.ts +96 -0
  163. package/src/parser/rules/block/module/listpages/extraction/variables.ts +58 -0
  164. package/src/parser/rules/block/module/listpages/normalization/date-selector.ts +53 -0
  165. package/src/parser/rules/block/module/listpages/normalization/numeric-selector.ts +32 -0
  166. package/src/parser/rules/block/module/listpages/normalization/order-parent.ts +82 -0
  167. package/src/parser/rules/block/module/listpages/normalization/selectors.ts +2 -0
  168. package/src/parser/rules/block/module/listpages/normalization/tags-category.ts +86 -0
  169. package/src/parser/rules/block/module/listpages/normalize.ts +8 -324
  170. package/src/parser/rules/block/module/listpages/resolution/items.ts +43 -0
  171. package/src/parser/rules/block/module/listpages/resolution/wrapper.ts +42 -0
  172. package/src/parser/rules/block/module/listpages/resolve.ts +5 -75
  173. package/src/parser/rules/block/module/listpages/template/format/content.ts +41 -0
  174. package/src/parser/rules/block/module/listpages/template/format/date.ts +116 -0
  175. package/src/parser/rules/block/module/listpages/template/format/index.ts +4 -0
  176. package/src/parser/rules/block/module/listpages/template/format/tags.ts +7 -0
  177. package/src/parser/rules/block/module/listpages/template/format/user.ts +9 -0
  178. package/src/parser/rules/block/module/listpages/template/getters/index.ts +36 -0
  179. package/src/parser/rules/block/module/listpages/template/getters/parameterized.ts +60 -0
  180. package/src/parser/rules/block/module/listpages/template/getters/simple.ts +65 -0
  181. package/src/parser/rules/block/module/listpages/template/getters/types.ts +3 -0
  182. package/src/parser/rules/block/module/listpages/template/syntax.ts +97 -0
  183. package/src/parser/rules/block/module/listpages/types/data-fetcher.ts +15 -0
  184. package/src/parser/rules/block/module/listpages/types/data-requirements.ts +52 -0
  185. package/src/parser/rules/block/module/listpages/types/external-data.ts +77 -0
  186. package/src/parser/rules/block/module/listpages/types/index.ts +17 -0
  187. package/src/parser/rules/block/module/listpages/types/normalized-query.ts +120 -0
  188. package/src/parser/rules/block/module/listpages/types/query.ts +67 -0
  189. package/src/parser/rules/block/module/listpages/types/template.ts +17 -0
  190. package/src/parser/rules/block/module/listpages/types/variables.ts +69 -0
  191. package/src/parser/rules/block/module/listpages/url-resolution/fields.ts +48 -0
  192. package/src/parser/rules/block/module/listpages/url-resolution/params.ts +19 -0
  193. package/src/parser/rules/block/module/listpages/url-resolution/query.ts +24 -0
  194. package/src/parser/rules/block/module/listpages/url-resolution/resolve.ts +53 -0
  195. package/src/parser/rules/block/module/listpages/url-resolution/value.ts +25 -0
  196. package/src/parser/rules/block/module/listpages/url-resolver.ts +3 -160
  197. package/src/parser/rules/block/module/listusers/compiler.ts +4 -25
  198. package/src/parser/rules/block/module/listusers/extract.ts +4 -9
  199. package/src/parser/rules/block/module/listusers/getters.ts +21 -0
  200. package/src/parser/rules/block/module/listusers/variables.ts +15 -0
  201. package/src/parser/rules/block/module/open.ts +57 -0
  202. package/src/parser/rules/block/module/resolution/contexts.ts +78 -0
  203. package/src/parser/rules/block/module/resolution/data-maps.ts +39 -0
  204. package/src/parser/rules/block/module/resolution/dynamic-modules.ts +93 -0
  205. package/src/parser/rules/block/module/resolution/styles.ts +53 -0
  206. package/src/parser/rules/block/module/resolution/walk-resolve.ts +107 -0
  207. package/src/parser/rules/block/module/resolve.ts +79 -292
  208. package/src/parser/rules/block/module/rule.ts +56 -0
  209. package/src/parser/rules/block/module/types-common.ts +11 -0
  210. package/src/parser/rules/block/module/walk/children.ts +35 -0
  211. package/src/parser/rules/block/module/walk/index.ts +9 -0
  212. package/src/parser/rules/block/module/walk/map/index.ts +2 -0
  213. package/src/parser/rules/block/module/walk/map/stateful-definition-list.ts +25 -0
  214. package/src/parser/rules/block/module/walk/map/stateful-list.ts +40 -0
  215. package/src/parser/rules/block/module/walk/map/stateful-table.ts +23 -0
  216. package/src/parser/rules/block/module/walk/map/stateful-tabs.ts +19 -0
  217. package/src/parser/rules/block/module/walk/map/stateful.ts +71 -0
  218. package/src/parser/rules/block/module/walk/map/stateless-definition-list.ts +12 -0
  219. package/src/parser/rules/block/module/walk/map/stateless-list.ts +29 -0
  220. package/src/parser/rules/block/module/walk/map/stateless-table.ts +11 -0
  221. package/src/parser/rules/block/module/walk/map/stateless-tabs.ts +5 -0
  222. package/src/parser/rules/block/module/walk/map/stateless.ts +51 -0
  223. package/src/parser/rules/block/module/walk/map/types.ts +6 -0
  224. package/src/parser/rules/block/module/walk/traverse.ts +65 -0
  225. package/src/parser/rules/block/orphan-li/content.ts +60 -0
  226. package/src/parser/rules/block/orphan-li/index.ts +75 -0
  227. package/src/parser/rules/block/orphan-li/open.ts +25 -0
  228. package/src/parser/rules/block/orphan-li/tags.ts +40 -0
  229. package/src/parser/rules/block/paragraph/content.ts +12 -0
  230. package/src/parser/rules/block/paragraph/index.ts +60 -0
  231. package/src/parser/rules/block/paragraph/normalize.ts +52 -0
  232. package/src/parser/rules/block/paragraph/span-markers.ts +52 -0
  233. package/src/parser/rules/block/parsing/attributes/index.ts +32 -0
  234. package/src/parser/rules/block/parsing/attributes/names.ts +93 -0
  235. package/src/parser/rules/block/parsing/attributes/scanner.ts +75 -0
  236. package/src/parser/rules/block/parsing/attributes/values.ts +26 -0
  237. package/src/parser/rules/block/parsing/block-item.ts +29 -0
  238. package/src/parser/rules/block/parsing/content.ts +127 -0
  239. package/src/parser/rules/block/parsing/end-condition.ts +51 -0
  240. package/src/parser/rules/block/parsing/inline-content.ts +105 -0
  241. package/src/parser/rules/block/parsing/inline-newline.ts +41 -0
  242. package/src/parser/rules/block/parsing/non-boundary.ts +24 -0
  243. package/src/parser/rules/block/parsing/rule-dispatch.ts +44 -0
  244. package/src/parser/rules/block/table/index.ts +80 -0
  245. package/src/parser/rules/block/table/pipe/cell-start.ts +69 -0
  246. package/src/parser/rules/block/table/pipe/cell.ts +106 -0
  247. package/src/parser/rules/block/table/pipe/index.ts +2 -0
  248. package/src/parser/rules/block/table/pipe/row.ts +88 -0
  249. package/src/parser/rules/block/table/pipe/tokens.ts +14 -0
  250. package/src/parser/rules/block/table/pipe/trim.ts +50 -0
  251. package/src/parser/rules/block/table-block/body.ts +79 -0
  252. package/src/parser/rules/block/table-block/cell-attributes.ts +33 -0
  253. package/src/parser/rules/block/table-block/cell-boundary.ts +99 -0
  254. package/src/parser/rules/block/table-block/cell-content/index.ts +88 -0
  255. package/src/parser/rules/block/table-block/cell-content/segments.ts +134 -0
  256. package/src/parser/rules/block/table-block/cell-newline.ts +47 -0
  257. package/src/parser/rules/block/table-block/cell.ts +64 -0
  258. package/src/parser/rules/block/table-block/index.ts +113 -0
  259. package/src/parser/rules/block/table-block/row-boundary.ts +75 -0
  260. package/src/parser/rules/block/table-block/structure.ts +80 -0
  261. package/src/parser/rules/block/tabview/body.ts +64 -0
  262. package/src/parser/rules/block/tabview/index.ts +90 -0
  263. package/src/parser/rules/block/tabview/open.ts +50 -0
  264. package/src/parser/rules/block/tabview/tab.ts +92 -0
  265. package/src/parser/rules/block/tabview/tags.ts +30 -0
  266. package/src/parser/rules/block/toc/element.ts +11 -0
  267. package/src/parser/rules/block/toc/index.ts +44 -0
  268. package/src/parser/rules/block/toc/open.ts +84 -0
  269. package/src/parser/rules/block/utils.ts +10 -610
  270. package/src/parser/rules/{utils.ts → common/attribute-safety.ts} +3 -49
  271. package/src/parser/rules/common/block-name.ts +33 -0
  272. package/src/parser/rules/common/index.ts +2 -0
  273. package/src/parser/rules/contracts/index.ts +3 -0
  274. package/src/parser/rules/contracts/parse-context.ts +38 -0
  275. package/src/parser/rules/contracts/rule.ts +43 -0
  276. package/src/parser/rules/contracts/scope.ts +31 -0
  277. package/src/parser/rules/inline/anchor/attributes.ts +54 -0
  278. package/src/parser/rules/inline/anchor/child.ts +26 -0
  279. package/src/parser/rules/inline/anchor/close.ts +34 -0
  280. package/src/parser/rules/inline/anchor/content.ts +59 -0
  281. package/src/parser/rules/inline/anchor/index.ts +103 -0
  282. package/src/parser/rules/inline/anchor/newline.ts +26 -0
  283. package/src/parser/rules/inline/anchor/open.ts +47 -0
  284. package/src/parser/rules/inline/anchor/paragraph-strip.ts +14 -0
  285. package/src/parser/rules/inline/anchor/syntax.ts +40 -0
  286. package/src/parser/rules/inline/anchor-name/index.ts +38 -0
  287. package/src/parser/rules/inline/anchor-name/name.ts +39 -0
  288. package/src/parser/rules/inline/anchor-name/syntax.ts +46 -0
  289. package/src/parser/rules/inline/bibcite/element.ts +14 -0
  290. package/src/parser/rules/inline/bibcite/index.ts +34 -0
  291. package/src/parser/rules/inline/bibcite/syntax.ts +64 -0
  292. package/src/parser/rules/inline/bold.ts +2 -39
  293. package/src/parser/rules/inline/color/index.ts +35 -0
  294. package/src/parser/rules/inline/color/syntax.ts +69 -0
  295. package/src/parser/rules/inline/comment/consume.ts +31 -0
  296. package/src/parser/rules/inline/{comment.ts → comment/index.ts} +10 -36
  297. package/src/parser/rules/inline/equation-ref/element.ts +8 -0
  298. package/src/parser/rules/inline/equation-ref/index.ts +34 -0
  299. package/src/parser/rules/inline/equation-ref/syntax.ts +45 -0
  300. package/src/parser/rules/inline/expr/branch.ts +104 -0
  301. package/src/parser/rules/inline/expr/conditional-branch.ts +27 -0
  302. package/src/parser/rules/inline/expr/conditional.ts +80 -0
  303. package/src/parser/rules/inline/expr/depth.ts +25 -0
  304. package/src/parser/rules/inline/expr/elements.ts +39 -0
  305. package/src/parser/rules/inline/expr/index.ts +84 -0
  306. package/src/parser/rules/inline/expr/syntax.ts +45 -0
  307. package/src/parser/rules/inline/footnote/child.ts +22 -0
  308. package/src/parser/rules/inline/footnote/close.ts +33 -0
  309. package/src/parser/rules/inline/footnote/content.ts +54 -0
  310. package/src/parser/rules/inline/footnote/elements.ts +38 -0
  311. package/src/parser/rules/inline/footnote/index.ts +54 -0
  312. package/src/parser/rules/inline/footnote/newline.ts +27 -0
  313. package/src/parser/rules/inline/footnote/open.ts +38 -0
  314. package/src/parser/rules/inline/formatting/container.ts +50 -0
  315. package/src/parser/rules/inline/{guillemet.ts → guillemet/index.ts} +5 -13
  316. package/src/parser/rules/inline/guillemet/text.ts +11 -0
  317. package/src/parser/rules/inline/html/gate.ts +64 -0
  318. package/src/parser/rules/inline/{html.ts → html/index.ts} +9 -60
  319. package/src/parser/rules/inline/html/open.ts +37 -0
  320. package/src/parser/rules/inline/image/attributes.ts +22 -0
  321. package/src/parser/rules/inline/image/body.ts +36 -0
  322. package/src/parser/rules/inline/image/index.ts +89 -0
  323. package/src/parser/rules/inline/image/open.ts +56 -0
  324. package/src/parser/rules/inline/image/source.ts +62 -0
  325. package/src/parser/rules/inline/image/syntax.ts +76 -0
  326. package/src/parser/rules/inline/italic.ts +2 -30
  327. package/src/parser/rules/inline/line-break/backslash.ts +58 -0
  328. package/src/parser/rules/inline/line-break/elements.ts +9 -0
  329. package/src/parser/rules/inline/line-break/index.ts +3 -0
  330. package/src/parser/rules/inline/line-break/newline.ts +82 -0
  331. package/src/parser/rules/inline/line-break/underscore.ts +45 -0
  332. package/src/parser/rules/inline/link-anchor.ts +6 -81
  333. package/src/parser/rules/inline/link-bracket/anchor.ts +3 -0
  334. package/src/parser/rules/inline/link-bracket/direct-url.ts +5 -0
  335. package/src/parser/rules/inline/link-bracket/parsed.ts +81 -0
  336. package/src/parser/rules/inline/link-bracket/parts.ts +64 -0
  337. package/src/parser/rules/inline/link-bracket/prefix.ts +15 -0
  338. package/src/parser/rules/inline/link-single.ts +7 -98
  339. package/src/parser/rules/inline/link-star.ts +7 -69
  340. package/src/parser/rules/inline/link-triple/fallback.ts +10 -0
  341. package/src/parser/rules/inline/link-triple/index.ts +62 -0
  342. package/src/parser/rules/inline/link-triple/interwiki.ts +11 -0
  343. package/src/parser/rules/inline/link-triple/label.ts +35 -0
  344. package/src/parser/rules/inline/link-triple/syntax.ts +72 -0
  345. package/src/parser/rules/inline/link-triple/target.ts +36 -0
  346. package/src/parser/rules/inline/math-inline/index.ts +40 -0
  347. package/src/parser/rules/inline/math-inline/syntax.ts +55 -0
  348. package/src/parser/rules/inline/monospace.ts +2 -30
  349. package/src/parser/rules/inline/parsing/block-boundary.ts +42 -0
  350. package/src/parser/rules/inline/parsing/block-start-predicates.ts +117 -0
  351. package/src/parser/rules/inline/parsing/collect.ts +23 -0
  352. package/src/parser/rules/inline/parsing/inline-content.ts +115 -0
  353. package/src/parser/rules/inline/parsing/paragraph-boundary.ts +47 -0
  354. package/src/parser/rules/inline/parsing/plain-text.ts +69 -0
  355. package/src/parser/rules/inline/parsing/preserved-line-break.ts +11 -0
  356. package/src/parser/rules/inline/parsing/rules.ts +34 -0
  357. package/src/parser/rules/inline/parsing/simple-token.ts +26 -0
  358. package/src/parser/rules/inline/raw/angle.ts +40 -0
  359. package/src/parser/rules/inline/raw/double-at.ts +78 -0
  360. package/src/parser/rules/inline/raw/index.ts +26 -0
  361. package/src/parser/rules/inline/raw/result.ts +26 -0
  362. package/src/parser/rules/inline/size/content.ts +65 -0
  363. package/src/parser/rules/inline/size/index.ts +55 -0
  364. package/src/parser/rules/inline/size/open.ts +43 -0
  365. package/src/parser/rules/inline/size/value.ts +45 -0
  366. package/src/parser/rules/inline/span/content.ts +97 -0
  367. package/src/parser/rules/inline/span/elements.ts +108 -0
  368. package/src/parser/rules/inline/span/index.ts +79 -0
  369. package/src/parser/rules/inline/span/newline.ts +50 -0
  370. package/src/parser/rules/inline/span/syntax.ts +70 -0
  371. package/src/parser/rules/inline/{strikethrough.ts → strikethrough/index.ts} +5 -60
  372. package/src/parser/rules/inline/strikethrough/parse.ts +14 -0
  373. package/src/parser/rules/inline/strikethrough/syntax.ts +24 -0
  374. package/src/parser/rules/inline/subscript.ts +2 -39
  375. package/src/parser/rules/inline/superscript.ts +4 -39
  376. package/src/parser/rules/inline/text/element.ts +5 -0
  377. package/src/parser/rules/inline/{text.ts → text/index.ts} +5 -4
  378. package/src/parser/rules/inline/underline/child.ts +26 -0
  379. package/src/parser/rules/inline/underline/content.ts +29 -0
  380. package/src/parser/rules/inline/{underline.ts → underline/index.ts} +6 -49
  381. package/src/parser/rules/inline/user/element.ts +11 -0
  382. package/src/parser/rules/inline/user/index.ts +34 -0
  383. package/src/parser/rules/inline/user/syntax.ts +67 -0
  384. package/src/parser/rules/inline/utils.ts +4 -344
  385. package/src/parser/rules/tokens.ts +106 -0
  386. package/src/parser/rules/types.ts +9 -252
  387. package/src/parser/depth.ts +0 -251
  388. package/src/parser/parse.ts +0 -315
  389. package/src/parser/postprocess/spanStrip.ts +0 -697
  390. package/src/parser/preprocess/expr.ts +0 -265
  391. package/src/parser/preprocess/utils.ts +0 -250
  392. package/src/parser/preprocess/whitespace.ts +0 -111
  393. package/src/parser/rules/block/align.ts +0 -282
  394. package/src/parser/rules/block/bibliography.ts +0 -359
  395. package/src/parser/rules/block/block-list.ts +0 -689
  396. package/src/parser/rules/block/blockquote.ts +0 -238
  397. package/src/parser/rules/block/code.ts +0 -187
  398. package/src/parser/rules/block/collapsible.ts +0 -337
  399. package/src/parser/rules/block/definition-list.ts +0 -270
  400. package/src/parser/rules/block/div.ts +0 -400
  401. package/src/parser/rules/block/embed-block.ts +0 -153
  402. package/src/parser/rules/block/footnoteblock.ts +0 -200
  403. package/src/parser/rules/block/heading.ts +0 -142
  404. package/src/parser/rules/block/html.ts +0 -222
  405. package/src/parser/rules/block/iframe.ts +0 -239
  406. package/src/parser/rules/block/include.ts +0 -179
  407. package/src/parser/rules/block/list.ts +0 -244
  408. package/src/parser/rules/block/math.ts +0 -183
  409. package/src/parser/rules/block/module/include/resolve.ts +0 -556
  410. package/src/parser/rules/block/module/listpages/types.ts +0 -513
  411. package/src/parser/rules/block/module/walk.ts +0 -380
  412. package/src/parser/rules/block/module.ts +0 -164
  413. package/src/parser/rules/block/orphan-li.ts +0 -177
  414. package/src/parser/rules/block/paragraph.ts +0 -157
  415. package/src/parser/rules/block/table-block.ts +0 -726
  416. package/src/parser/rules/block/table.ts +0 -441
  417. package/src/parser/rules/block/tabview.ts +0 -331
  418. package/src/parser/rules/block/toc.ts +0 -129
  419. package/src/parser/rules/inline/anchor-name.ts +0 -154
  420. package/src/parser/rules/inline/anchor.ts +0 -327
  421. package/src/parser/rules/inline/bibcite.ts +0 -153
  422. package/src/parser/rules/inline/color.ts +0 -140
  423. package/src/parser/rules/inline/equation-ref.ts +0 -115
  424. package/src/parser/rules/inline/expr.ts +0 -526
  425. package/src/parser/rules/inline/footnote.ts +0 -223
  426. package/src/parser/rules/inline/image.ts +0 -328
  427. package/src/parser/rules/inline/line-break.ts +0 -326
  428. package/src/parser/rules/inline/link-triple.ts +0 -267
  429. package/src/parser/rules/inline/math-inline.ts +0 -126
  430. package/src/parser/rules/inline/raw.ts +0 -262
  431. package/src/parser/rules/inline/size.ts +0 -244
  432. package/src/parser/rules/inline/span.ts +0 -424
  433. package/src/parser/rules/inline/user.ts +0 -147
@@ -1,615 +1,15 @@
1
1
  /**
2
+ * Shared block parser utility facade.
2
3
  *
3
- * Shared utilities used by block-level parser rules.
4
- *
5
- * This module provides the core building blocks that most block rules
6
- * depend on:
7
- *
8
- * - {@link canApplyBlockRule} -- fast pre-check for whether a rule's start
9
- * tokens match the current token.
10
- * - {@link parseBlocksUntil} -- the main block-level content parser that
11
- * iterates rules until a close condition is met (used by div, collapsible,
12
- * tabview, iftags, align, etc.).
13
- * - {@link parseInlineContentUntil} -- similar to `parseBlocksUntil` but
14
- * without paragraph wrapping, used for `div_` paragraph-strip mode.
15
- * - {@link parseAttributes} / {@link parseAttributesRaw} -- attribute
16
- * parsers for block opening tags (with and without safety filtering).
17
- * - {@link createBlockEndCondition} -- factory for close-condition predicates.
18
- *
19
- * Re-exports {@link filterUnsafeAttributes} and {@link parseBlockName} from
20
- * the shared `../utils` module for backward compatibility.
4
+ * Implementation lives in ./parsing/* so dispatch, content parsing,
5
+ * attributes, and close-condition handling can evolve independently.
21
6
  *
22
7
  * @module
23
8
  */
24
- import type { Token } from "../../../lexer";
25
- import type { Element } from "@wdprlib/ast";
26
- import type { ParseContext, BlockRule } from "../types";
27
- import { KNOWN_BLOCK_NAMES } from "../../constants";
28
- import { canApplyInlineRule } from "../inline/utils";
29
- import { filterUnsafeAttributes, parseBlockName } from "../utils";
30
-
31
- /**
32
- * Whether the BLOCK_OPEN / BLOCK_END_OPEN token at `pos` opens a block name
33
- * that should *not* end the surrounding paragraph / inline run. Mirrors the
34
- * logic used by `parseInlineUntil` so that paragraph-strip mode (`div_`)
35
- * agrees with regular paragraph parsing about which block names are inline.
36
- */
37
- function isNonBoundaryBlockToken(ctx: ParseContext, pos: number): boolean {
38
- const token = ctx.tokens[pos];
39
- if (token?.type !== "BLOCK_OPEN" && token?.type !== "BLOCK_END_OPEN") {
40
- return false;
41
- }
42
- const nameResult = parseBlockName(ctx, pos + 1);
43
- if (nameResult === null) {
44
- // `[[=]]` / `[[==]]` align markers tokenize as EQUALS, not TEXT/IDENTIFIER —
45
- // those are real block boundaries.
46
- if (ctx.tokens[pos + 1]?.type === "EQUALS") {
47
- return false;
48
- }
49
- // `[[` followed by no recognizable identifier -- treat as inline.
50
- return true;
51
- }
52
- if (ctx.scope.excludedBlockNames?.has(nameResult.name)) {
53
- return true;
54
- }
55
- return !KNOWN_BLOCK_NAMES.has(nameResult.name);
56
- }
57
-
58
- // Re-export for backwards compatibility
59
- export { filterUnsafeAttributes, parseBlockName } from "../utils";
60
-
61
- /**
62
- * Result of parsing a sequence of block-level content.
63
- */
64
- export interface BlockParseResult {
65
- /** The parsed AST elements. */
66
- elements: Element[];
67
- /** Total number of tokens consumed from the stream. */
68
- consumed: number;
69
- }
70
-
71
- /**
72
- * Determines whether a block rule is eligible for the current token.
73
- *
74
- * A rule is eligible if:
75
- * 1. The token is at line start (when `rule.requiresLineStart` is true).
76
- * 2. The token's type is in the rule's `startTokens` list (or the list
77
- * is empty, meaning the rule is a universal fallback).
78
- *
79
- * @param rule - The block rule to check.
80
- * @param token - The current token.
81
- * @returns `true` if the rule may be attempted.
82
- */
83
- export function canApplyBlockRule(rule: BlockRule, token: Token): boolean {
84
- if (rule.requiresLineStart && !token.lineStart) {
85
- return false;
86
- }
87
- if (rule.startTokens.length === 0) {
88
- return true; // fallback rule
89
- }
90
- return rule.startTokens.includes(token.type);
91
- }
92
-
93
- /**
94
- * Parses block-level elements from the token stream until a close
95
- * condition is satisfied.
96
- *
97
- * This is the workhorse parser used by container blocks (div, collapsible,
98
- * tabview, iftags, align, etc.) to parse their body content. It loops
99
- * through tokens, trying each block rule in priority order, and falls back
100
- * to the paragraph rule when nothing else matches.
101
- *
102
- * Whitespace and newline tokens between blocks are silently consumed.
103
- * The close condition receives a ParseContext snapshot at the current
104
- * position and should return `true` to stop parsing (the close tag
105
- * itself is NOT consumed here -- the caller handles that).
106
- *
107
- * The close condition is also injected into `blockCloseCondition` on
108
- * the context so that the paragraph parser can respect the enclosing
109
- * block's boundary.
110
- *
111
- * @param ctx - Parse context positioned at the start of the body.
112
- * @param closeCondition - Predicate that signals the end of the block body.
113
- * @param options - Optional settings.
114
- * @param options.excludedBlockNames - Block names that should be excluded
115
- * from both rule dispatch and paragraph-boundary detection. The named
116
- * rules are filtered out of `blockRules`, and the set is propagated to
117
- * the inline parser via `ParseContext.excludedBlockNames` so that
118
- * `BLOCK_OPEN` / `BLOCK_END_OPEN` tokens for these names do not trigger
119
- * paragraph breaks.
120
- * @returns Parsed elements and total tokens consumed.
121
- */
122
- export function parseBlocksUntil(
123
- ctx: ParseContext,
124
- closeCondition: (ctx: ParseContext) => boolean,
125
- options?: { excludedBlockNames?: ReadonlySet<string> },
126
- ): BlockParseResult {
127
- const elements: Element[] = [];
128
- let consumed = 0;
129
- let pos = ctx.pos;
130
-
131
- const excluded = options?.excludedBlockNames;
132
- const blockRules = excluded
133
- ? ctx.blockRules.filter((r) => !excluded.has(r.name))
134
- : ctx.blockRules;
135
- const { blockFallbackRule } = ctx;
136
-
137
- while (pos < ctx.tokens.length) {
138
- const token = ctx.tokens[pos];
139
- if (!token || token.type === "EOF") {
140
- break;
141
- }
142
-
143
- // Check close condition
144
- const checkCtx: ParseContext = { ...ctx, pos };
145
- if (closeCondition(checkCtx)) {
146
- break;
147
- }
148
-
149
- // Skip whitespace
150
- if (token.type === "WHITESPACE") {
151
- pos++;
152
- consumed++;
153
- continue;
154
- }
155
-
156
- // Skip newlines
157
- if (token.type === "NEWLINE") {
158
- pos++;
159
- consumed++;
160
- continue;
161
- }
162
-
163
- // Try each block rule
164
- let matched = false;
165
- // Pass close condition and excluded names to context
166
- const blockCtx: ParseContext = {
167
- ...ctx,
168
- pos,
169
- blockRules,
170
- scope: {
171
- ...ctx.scope,
172
- blockCloseCondition: closeCondition,
173
- excludedBlockNames: excluded,
174
- },
175
- };
176
-
177
- for (const rule of blockRules) {
178
- if (canApplyBlockRule(rule, token)) {
179
- const result = rule.parse(blockCtx);
180
- if (result.success) {
181
- elements.push(...result.elements);
182
- consumed += result.consumed;
183
- pos += result.consumed;
184
- matched = true;
185
- break;
186
- }
187
- }
188
- }
189
-
190
- if (!matched) {
191
- // Fallback to paragraph
192
- const result = blockFallbackRule.parse(blockCtx);
193
- if (result.success && result.elements.length > 0) {
194
- elements.push(...result.elements);
195
- consumed += result.consumed;
196
- pos += result.consumed;
197
- } else {
198
- // Skip token to avoid infinite loop
199
- pos++;
200
- consumed++;
201
- }
202
- }
203
- }
204
-
205
- return { elements, consumed };
206
- }
207
-
208
- /**
209
- * Parses mixed inline/block content until a close condition is met,
210
- * WITHOUT paragraph wrapping.
211
- *
212
- * This is used for `div_` (paragraph strip mode) where newlines become
213
- * `<br />` elements rather than paragraph separators. Blank lines
214
- * (multiple consecutive newlines) are collapsed into a single `<br />`.
215
- *
216
- * Block-level elements (nested div, collapsible, etc.) are mixed directly
217
- * into the inline element stream. Newlines immediately before a BLOCK_OPEN
218
- * or BLOCK_END_OPEN are silently consumed (no `<br />` generated).
219
- *
220
- * Trailing line-break elements are stripped from the result.
221
- *
222
- * @param ctx - Parse context positioned at the start of the body.
223
- * @param closeCondition - Predicate that signals the end of the content.
224
- * @returns Parsed elements and total tokens consumed.
225
- */
226
- export function parseInlineContentUntil(
227
- ctx: ParseContext,
228
- closeCondition: (ctx: ParseContext) => boolean,
229
- ): BlockParseResult {
230
- const elements: Element[] = [];
231
- let consumed = 0;
232
- let pos = ctx.pos;
233
-
234
- const { blockRules, inlineRules } = ctx;
235
-
236
- while (pos < ctx.tokens.length) {
237
- const token = ctx.tokens[pos];
238
- if (!token || token.type === "EOF") {
239
- break;
240
- }
241
-
242
- // Check close condition
243
- const checkCtx: ParseContext = { ...ctx, pos };
244
- if (closeCondition(checkCtx)) {
245
- break;
246
- }
247
-
248
- // Skip whitespace at beginning of lines (but not between words)
249
- if (token.type === "WHITESPACE" && token.lineStart) {
250
- pos++;
251
- consumed++;
252
- continue;
253
- }
254
-
255
- // Handle newlines - convert to line-breaks
256
- // In paragraph strip mode, blank lines (double newline) become single line-break
257
- // But newlines before block elements are not converted to line-breaks
258
- if (token.type === "NEWLINE") {
259
- pos++;
260
- consumed++;
261
- // Skip additional blank lines
262
- while (ctx.tokens[pos]?.type === "NEWLINE") {
263
- pos++;
264
- consumed++;
265
- }
266
-
267
- // Check if next token starts a block element (BLOCK_OPEN, BLOCK_END_OPEN)
268
- // If so, don't add line-break - the newline just separates text from block.
269
- // But: excluded block names (e.g. nested collapsible inside div_) and
270
- // unknown block names (e.g. `[[foo]]`) are treated as inline by the
271
- // paragraph parser, so they must still produce a `<br />` here.
272
- const nextToken = ctx.tokens[pos];
273
- if (!nextToken || nextToken.type === "EOF") {
274
- continue;
275
- }
276
- if (nextToken.type === "BLOCK_OPEN" || nextToken.type === "BLOCK_END_OPEN") {
277
- const peekCtx: ParseContext = { ...ctx, pos };
278
- if (!isNonBoundaryBlockToken(peekCtx, pos)) {
279
- continue;
280
- }
281
- // Fall through and emit a line-break for inline-treated `[[name]]`.
282
- }
283
-
284
- // Otherwise, add line-break
285
- elements.push({ element: "line-break" });
286
- continue;
287
- }
288
-
289
- // Try block rules first (for nested div, collapsible, etc.)
290
- // In paragraph strip mode, blocks are mixed into the inline stream
291
- let matched = false;
292
- const blockCtx: ParseContext = { ...ctx, pos };
293
-
294
- for (const rule of blockRules) {
295
- if (canApplyBlockRule(rule, token)) {
296
- const result = rule.parse(blockCtx);
297
- if (result.success) {
298
- // Add block elements directly (mixed into inline stream)
299
- elements.push(...result.elements);
300
- consumed += result.consumed;
301
- pos += result.consumed;
302
- matched = true;
303
- break;
304
- }
305
- }
306
- }
307
-
308
- if (matched) continue;
309
-
310
- // Try each inline rule
311
- const inlineCtx: ParseContext = { ...ctx, pos };
312
-
313
- for (const rule of inlineRules) {
314
- if (canApplyInlineRule(rule, token)) {
315
- const result = rule.parse(inlineCtx);
316
- if (result.success) {
317
- elements.push(...result.elements);
318
- consumed += result.consumed;
319
- pos += result.consumed;
320
- matched = true;
321
- break;
322
- }
323
- }
324
- }
325
-
326
- if (!matched) {
327
- // Fallback to text
328
- elements.push({ element: "text", data: token.value });
329
- consumed++;
330
- pos++;
331
- }
332
- }
333
-
334
- // Remove trailing line-breaks
335
- while (elements.length > 0 && elements[elements.length - 1]?.element === "line-break") {
336
- elements.pop();
337
- }
338
-
339
- return { elements, consumed };
340
- }
341
-
342
- /**
343
- * Parses HTML-style attributes from block opening tags.
344
- *
345
- * Supports:
346
- * - `name="value"` (quoted string)
347
- * - `name=value` (unquoted single-token value)
348
- * - `name` (boolean attribute, stored as `"true"`)
349
- * - Hyphenated names like `data-paragraph` or `aria-label` (composed
350
- * from TEXT `-` IDENTIFIER token sequences).
351
- *
352
- * Attribute names are lowercased (Wikidot is case-insensitive).
353
- * The result is filtered through {@link filterUnsafeAttributes} to strip
354
- * potentially dangerous attributes (e.g. `onload`, `onclick`).
355
- *
356
- * Stops at BLOCK_CLOSE, NEWLINE, or EOF.
357
- *
358
- * @param ctx - Parse context.
359
- * @param startPos - Token index to begin scanning.
360
- * @returns Parsed (filtered) attributes and total tokens consumed.
361
- */
362
- export function parseAttributes(
363
- ctx: ParseContext,
364
- startPos: number,
365
- ): { attrs: Record<string, string>; consumed: number } {
366
- const attrs: Record<string, string> = {};
367
- let pos = startPos;
368
- let consumed = 0;
369
-
370
- while (pos < ctx.tokens.length) {
371
- const token = ctx.tokens[pos];
372
- if (
373
- !token ||
374
- token.type === "BLOCK_CLOSE" ||
375
- token.type === "NEWLINE" ||
376
- token.type === "EOF"
377
- ) {
378
- break;
379
- }
380
-
381
- // Skip whitespace
382
- if (token.type === "WHITESPACE") {
383
- pos++;
384
- consumed++;
385
- continue;
386
- }
387
-
388
- // Attribute name (TEXT or IDENTIFIER token)
389
- // May include hyphens like "data-paragraph" which tokenizes as: IDENTIFIER "data", TEXT "-", IDENTIFIER "paragraph"
390
- if (token.type === "TEXT" || token.type === "IDENTIFIER") {
391
- let name = token.value;
392
- pos++;
393
- consumed++;
394
-
395
- // Collect hyphenated parts (e.g., data-paragraph, aria-label)
396
- while (
397
- ctx.tokens[pos]?.type === "TEXT" &&
398
- ctx.tokens[pos]?.value === "-" &&
399
- (ctx.tokens[pos + 1]?.type === "IDENTIFIER" || ctx.tokens[pos + 1]?.type === "TEXT")
400
- ) {
401
- name += "-";
402
- pos++;
403
- consumed++;
404
- name += ctx.tokens[pos]?.value ?? "";
405
- pos++;
406
- consumed++;
407
- }
408
-
409
- // Normalize attribute name to lowercase (Wikidot is case-insensitive)
410
- name = name.toLowerCase();
411
-
412
- // Check for =
413
- const eqToken = ctx.tokens[pos];
414
- if (eqToken?.type === "EQUALS") {
415
- pos++;
416
- consumed++;
417
-
418
- // Get value (quoted string or text)
419
- const valueToken = ctx.tokens[pos];
420
- if (valueToken?.type === "QUOTED_STRING") {
421
- // Remove quotes
422
- let value = valueToken.value;
423
- if (value.startsWith('"') && value.endsWith('"')) {
424
- value = value.slice(1, -1);
425
- }
426
- attrs[name] = value;
427
- pos++;
428
- consumed++;
429
- } else if (valueToken?.type === "TEXT" || valueToken?.type === "IDENTIFIER") {
430
- attrs[name] = valueToken.value;
431
- pos++;
432
- consumed++;
433
- }
434
- } else {
435
- // Boolean attribute
436
- attrs[name] = "true";
437
- }
438
- } else {
439
- // Unknown token, skip
440
- pos++;
441
- consumed++;
442
- }
443
- }
444
-
445
- return { attrs: filterUnsafeAttributes(attrs), consumed };
446
- }
447
-
448
- /**
449
- * Parses attributes from block opening tags WITHOUT safety filtering.
450
- *
451
- * Use this for block-specific parameters (like `type` on `[[code]]`) that
452
- * are not emitted as HTML attributes and therefore do not need XSS
453
- * protection. The parsing logic is identical to {@link parseAttributes}
454
- * except the result is returned as-is.
455
- *
456
- * Hyphenated name handling is configurable because some contexts (e.g.
457
- * code block with `data-src`) should treat hyphens as part of the name,
458
- * while others should not.
459
- *
460
- * Also handles STRIKE_MARKER tokens (`--`) in attribute name positions,
461
- * which can appear when a double hyphen is used in names like
462
- * `data--something`.
463
- *
464
- * @param ctx - Parse context.
465
- * @param startPos - Token index to begin scanning.
466
- * @param hyphenatedNames - When `true` (default), hyphens are collected
467
- * into the attribute name. When `false`, only
468
- * the first segment before a hyphen is used.
469
- * @returns Parsed (unfiltered) attributes and total tokens consumed.
470
- */
471
- export function parseAttributesRaw(
472
- ctx: ParseContext,
473
- startPos: number,
474
- hyphenatedNames = true,
475
- ): { attrs: Record<string, string>; consumed: number } {
476
- const attrs: Record<string, string> = {};
477
- let pos = startPos;
478
- let consumed = 0;
479
-
480
- while (pos < ctx.tokens.length) {
481
- const token = ctx.tokens[pos];
482
- if (
483
- !token ||
484
- token.type === "BLOCK_CLOSE" ||
485
- token.type === "NEWLINE" ||
486
- token.type === "EOF"
487
- ) {
488
- break;
489
- }
490
-
491
- if (token.type === "WHITESPACE") {
492
- pos++;
493
- consumed++;
494
- continue;
495
- }
496
-
497
- if (token.type === "TEXT" || token.type === "IDENTIFIER") {
498
- let name = token.value;
499
- pos++;
500
- consumed++;
501
-
502
- // Handle hyphenated attribute names (e.g., data-paragraph, aria-label)
503
- // When hyphenatedNames=true: collect full name (data-paragraph)
504
- // When hyphenatedNames=false: skip hyphen parts, use first segment only (data)
505
- // This prevents data-src from becoming separate "data" and "src" attributes
506
- // Also handles STRIKE_MARKER (--) and multiple hyphens (----, etc.)
507
- const isHyphen = (t: (typeof ctx.tokens)[0] | undefined) =>
508
- (t?.type === "TEXT" && t.value === "-") || t?.type === "STRIKE_MARKER";
509
- const isNamePart = (t: (typeof ctx.tokens)[0] | undefined) =>
510
- t?.type === "IDENTIFIER" || t?.type === "TEXT";
511
-
512
- while (isHyphen(ctx.tokens[pos])) {
513
- // Skip consecutive hyphens first
514
- while (isHyphen(ctx.tokens[pos])) {
515
- if (hyphenatedNames) {
516
- name += ctx.tokens[pos]?.value ?? "-";
517
- }
518
- pos++;
519
- consumed++;
520
- }
521
- // Then check if followed by name part
522
- if (isNamePart(ctx.tokens[pos])) {
523
- if (hyphenatedNames) {
524
- name += ctx.tokens[pos]?.value ?? "";
525
- }
526
- pos++;
527
- consumed++;
528
- } else {
529
- // No name part after hyphens, stop
530
- break;
531
- }
532
- }
533
-
534
- // Normalize attribute name to lowercase (Wikidot is case-insensitive)
535
- name = name.toLowerCase();
536
-
537
- const eqToken = ctx.tokens[pos];
538
- if (eqToken?.type === "EQUALS") {
539
- pos++;
540
- consumed++;
541
-
542
- const valueToken = ctx.tokens[pos];
543
- if (valueToken?.type === "QUOTED_STRING") {
544
- let value = valueToken.value;
545
- if (value.startsWith('"') && value.endsWith('"')) {
546
- value = value.slice(1, -1);
547
- }
548
- attrs[name] = value;
549
- pos++;
550
- consumed++;
551
- } else if (valueToken?.type === "TEXT" || valueToken?.type === "IDENTIFIER") {
552
- attrs[name] = valueToken.value;
553
- pos++;
554
- consumed++;
555
- }
556
- } else {
557
- attrs[name] = "true";
558
- }
559
- } else {
560
- pos++;
561
- consumed++;
562
- }
563
- }
564
-
565
- return { attrs, consumed };
566
- }
567
-
568
- /**
569
- * Creates a reusable close-condition function that matches block end tags
570
- * (`[[/name]]`) for one or more block names.
571
- *
572
- * The returned function inspects the tokens at `ctx.pos` and returns both
573
- * whether a match was found and how many tokens the closing tag occupies
574
- * (including the optional trailing NEWLINE).
575
- *
576
- * @param blockNames - Array of block names to match (e.g. `["div"]`).
577
- * @returns A function suitable for use as a `closeCondition` argument,
578
- * returning `{ matched, consumed }`.
579
- */
580
- export function createBlockEndCondition(
581
- blockNames: string[],
582
- ): (ctx: ParseContext) => { matched: boolean; consumed: number } {
583
- return (ctx: ParseContext) => {
584
- const token = ctx.tokens[ctx.pos];
585
- if (token?.type !== "BLOCK_END_OPEN") {
586
- return { matched: false, consumed: 0 };
587
- }
588
-
589
- const nameResult = parseBlockName(ctx, ctx.pos + 1);
590
- if (!nameResult) {
591
- return { matched: false, consumed: 0 };
592
- }
593
-
594
- if (!blockNames.includes(nameResult.name)) {
595
- return { matched: false, consumed: 0 };
596
- }
597
-
598
- // Calculate consumed: [[/ + name + ]]
599
- let consumed = 1 + nameResult.consumed;
600
-
601
- // Check for closing ]]
602
- const closePos = ctx.pos + 1 + nameResult.consumed;
603
- if (ctx.tokens[closePos]?.type === "BLOCK_CLOSE") {
604
- consumed++;
605
- }
606
-
607
- // Check for trailing newline
608
- const newlinePos = closePos + 1;
609
- if (ctx.tokens[newlinePos]?.type === "NEWLINE") {
610
- consumed++;
611
- }
612
-
613
- return { matched: true, consumed };
614
- };
615
- }
9
+ export { filterUnsafeAttributes, parseBlockName } from "../common";
10
+ export { canApplyBlockRule, getCandidateBlockRules } from "./parsing/rule-dispatch";
11
+ export type { BlockParseResult } from "./parsing/content";
12
+ export { parseBlocksUntil } from "./parsing/content";
13
+ export { parseInlineContentUntil } from "./parsing/inline-content";
14
+ export { parseAttributes, parseAttributesRaw } from "./parsing/attributes";
15
+ export { createBlockEndCondition } from "./parsing/end-condition";
@@ -1,14 +1,4 @@
1
- /**
2
- * Common utilities shared between block and inline rules
3
- */
4
-
5
- import type { ParseContext } from "./types";
6
-
7
- // =============================================================================
8
- // Attribute Safety
9
- // =============================================================================
10
-
11
- // Event handler attributes (on*) are blocked entirely
1
+ // Event handler attributes (on*) are blocked entirely.
12
2
  const SAFE_ATTRIBUTES = new Set([
13
3
  "accept",
14
4
  "align",
@@ -96,7 +86,7 @@ const SAFE_ATTRIBUTES = new Set([
96
86
  ]);
97
87
 
98
88
  /**
99
- * Filter unsafe HTML attributes (blocks event handlers, allows safe attributes + aria-* / data-*)
89
+ * Filter unsafe HTML attributes (blocks event handlers, allows safe attributes + aria-* / data-*).
100
90
  */
101
91
  export function filterUnsafeAttributes(attrs: Record<string, string>): Record<string, string> {
102
92
  const result: Record<string, string> = {};
@@ -108,7 +98,7 @@ export function filterUnsafeAttributes(attrs: Record<string, string>): Record<st
108
98
  continue;
109
99
  }
110
100
  if (!SAFE_ATTRIBUTES.has(lower)) continue;
111
- // Wikidot prefixes user-set IDs with "u-"
101
+ // Wikidot prefixes user-set IDs with "u-".
112
102
  if (lower === "id") {
113
103
  result[key] = value.startsWith("u-") ? value : `u-${value}`;
114
104
  continue;
@@ -117,39 +107,3 @@ export function filterUnsafeAttributes(attrs: Record<string, string>): Record<st
117
107
  }
118
108
  return result;
119
109
  }
120
-
121
- // =============================================================================
122
- // Block Name Parsing
123
- // =============================================================================
124
-
125
- /**
126
- * Parse block name from tokens (handles [[name or [[/name)
127
- * Handles underscore suffix like "div_" which may be tokenized as [IDENTIFIER "div"] [UNDERSCORE "_"]
128
- */
129
- export function parseBlockName(
130
- ctx: ParseContext,
131
- startPos: number,
132
- ): { name: string; consumed: number } | null {
133
- let pos = startPos;
134
- let consumed = 0;
135
-
136
- // Wikidot does NOT allow whitespace between [[ and block name
137
- // e.g. [[ code ]] is treated as plain text, not a code block
138
- const token = ctx.tokens[pos];
139
- if (!token || (token.type !== "TEXT" && token.type !== "IDENTIFIER")) {
140
- return null;
141
- }
142
-
143
- // Base name
144
- let name = token.value.toLowerCase();
145
- consumed++;
146
- pos++;
147
-
148
- // Check for underscore suffix (e.g., "div_" -> "div" + "_")
149
- if (ctx.tokens[pos]?.type === "UNDERSCORE") {
150
- name += "_";
151
- consumed++;
152
- }
153
-
154
- return { name, consumed };
155
- }