@wdprlib/parser 3.2.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (433) hide show
  1. package/dist/index.cjs +10471 -8406
  2. package/dist/index.d.cts +313 -337
  3. package/dist/index.d.ts +313 -337
  4. package/dist/index.js +10457 -8392
  5. package/package.json +1 -1
  6. package/src/index.ts +7 -0
  7. package/src/lexer/anchor.ts +48 -0
  8. package/src/lexer/index.ts +3 -2
  9. package/src/lexer/lexer.ts +73 -559
  10. package/src/lexer/options.ts +19 -0
  11. package/src/lexer/punctuation.ts +70 -0
  12. package/src/lexer/quoted-string.ts +16 -0
  13. package/src/lexer/runs.ts +85 -0
  14. package/src/lexer/spacing-actions.ts +24 -0
  15. package/src/lexer/state.ts +103 -0
  16. package/src/lexer/syntax-actions.ts +80 -0
  17. package/src/lexer/text-actions.ts +41 -0
  18. package/src/lexer/token-actions.ts +136 -0
  19. package/src/lexer/token-factory.ts +62 -0
  20. package/src/lexer/tokenize.ts +18 -0
  21. package/src/parser/constants.ts +2 -0
  22. package/src/parser/depth/index.ts +111 -0
  23. package/src/parser/depth/stack.ts +82 -0
  24. package/src/parser/parse/block.ts +42 -0
  25. package/src/parser/parse/context.ts +26 -0
  26. package/src/parser/parse/footnotes.ts +25 -0
  27. package/src/parser/parse/index.ts +42 -0
  28. package/src/parser/parse/options.ts +34 -0
  29. package/src/parser/parse/parser.ts +79 -0
  30. package/src/parser/parse/plain-non-ascii.ts +129 -0
  31. package/src/parser/parse/result.ts +57 -0
  32. package/src/parser/parse/source.ts +11 -0
  33. package/src/parser/postprocess/divAdjacentParagraph.ts +1 -1
  34. package/src/parser/postprocess/spanStrip/clean-element.ts +168 -0
  35. package/src/parser/postprocess/spanStrip/cleanup.ts +25 -0
  36. package/src/parser/postprocess/spanStrip/empty-spans.ts +36 -0
  37. package/src/parser/postprocess/spanStrip/escaped.ts +78 -0
  38. package/src/parser/postprocess/spanStrip/factory.ts +23 -0
  39. package/src/parser/postprocess/spanStrip/index.ts +8 -0
  40. package/src/parser/postprocess/spanStrip/merge.ts +117 -0
  41. package/src/parser/postprocess/spanStrip/predicates.ts +59 -0
  42. package/src/parser/postprocess/spanStrip/split.ts +67 -0
  43. package/src/parser/preprocess/expr/chars.ts +15 -0
  44. package/src/parser/preprocess/expr/evaluate.ts +22 -0
  45. package/src/parser/preprocess/expr/index.ts +45 -0
  46. package/src/parser/preprocess/expr/kind.ts +19 -0
  47. package/src/parser/preprocess/expr/parse.ts +103 -0
  48. package/src/parser/preprocess/expr/scan.ts +34 -0
  49. package/src/parser/preprocess/expr/types.ts +14 -0
  50. package/src/parser/preprocess/typography.ts +70 -5
  51. package/src/parser/preprocess/utils/bracket-depths.ts +98 -0
  52. package/src/parser/preprocess/utils/index.ts +13 -0
  53. package/src/parser/preprocess/utils/raw-regions.ts +153 -0
  54. package/src/parser/preprocess/whitespace/detection.ts +39 -0
  55. package/src/parser/preprocess/whitespace/index.ts +79 -0
  56. package/src/parser/preprocess/whitespace/leading-spaces.ts +11 -0
  57. package/src/parser/preprocess/whitespace/patterns.ts +23 -0
  58. package/src/parser/rules/block/align/body.ts +46 -0
  59. package/src/parser/rules/block/align/element.ts +13 -0
  60. package/src/parser/rules/block/align/index.ts +90 -0
  61. package/src/parser/rules/block/align/syntax.ts +113 -0
  62. package/src/parser/rules/block/bibliography/body.ts +81 -0
  63. package/src/parser/rules/block/bibliography/entries.ts +49 -0
  64. package/src/parser/rules/block/bibliography/entry-content.ts +73 -0
  65. package/src/parser/rules/block/bibliography/entry-key.ts +83 -0
  66. package/src/parser/rules/block/bibliography/index.ts +90 -0
  67. package/src/parser/rules/block/bibliography/open.ts +53 -0
  68. package/src/parser/rules/block/block-list/bare-content.ts +105 -0
  69. package/src/parser/rules/block/block-list/bare-paragraph.ts +60 -0
  70. package/src/parser/rules/block/block-list/index.ts +51 -0
  71. package/src/parser/rules/block/block-list/item-content.ts +132 -0
  72. package/src/parser/rules/block/block-list/li-content.ts +107 -0
  73. package/src/parser/rules/block/block-list/li-item.ts +77 -0
  74. package/src/parser/rules/block/block-list/list-block.ts +100 -0
  75. package/src/parser/rules/block/block-list/open.ts +51 -0
  76. package/src/parser/rules/block/block-list/tags.ts +50 -0
  77. package/src/parser/rules/block/blockquote/build.ts +62 -0
  78. package/src/parser/rules/block/blockquote/index.ts +80 -0
  79. package/src/parser/rules/block/blockquote/line.ts +79 -0
  80. package/src/parser/rules/block/blockquote/lines.ts +39 -0
  81. package/src/parser/rules/block/{center.ts → center/index.ts} +7 -22
  82. package/src/parser/rules/block/center/open.ts +27 -0
  83. package/src/parser/rules/block/{clear-float.ts → clear-float/index.ts} +6 -30
  84. package/src/parser/rules/block/clear-float/syntax.ts +43 -0
  85. package/src/parser/rules/block/code/attributes.ts +30 -0
  86. package/src/parser/rules/block/code/content.ts +57 -0
  87. package/src/parser/rules/block/code/index.ts +100 -0
  88. package/src/parser/rules/block/collapsible/attributes.ts +95 -0
  89. package/src/parser/rules/block/collapsible/body.ts +69 -0
  90. package/src/parser/rules/block/collapsible/index.ts +117 -0
  91. package/src/parser/rules/block/collapsible/open.ts +51 -0
  92. package/src/parser/rules/block/collapsible/orphans.ts +31 -0
  93. package/src/parser/rules/block/collapsible/tags.ts +17 -0
  94. package/src/parser/rules/block/comment/consume.ts +37 -0
  95. package/src/parser/rules/block/{comment.ts → comment/index.ts} +12 -38
  96. package/src/parser/rules/block/{content-separator.ts → content-separator/index.ts} +5 -35
  97. package/src/parser/rules/block/content-separator/syntax.ts +33 -0
  98. package/src/parser/rules/block/definition-list/collect.ts +40 -0
  99. package/src/parser/rules/block/definition-list/index.ts +63 -0
  100. package/src/parser/rules/block/definition-list/item-key.ts +95 -0
  101. package/src/parser/rules/block/definition-list/item-value.ts +56 -0
  102. package/src/parser/rules/block/definition-list/items.ts +54 -0
  103. package/src/parser/rules/block/div/body.ts +41 -0
  104. package/src/parser/rules/block/div/close.ts +41 -0
  105. package/src/parser/rules/block/div/failed.ts +117 -0
  106. package/src/parser/rules/block/div/index.ts +112 -0
  107. package/src/parser/rules/block/div/nesting.ts +37 -0
  108. package/src/parser/rules/block/div/open.ts +59 -0
  109. package/src/parser/rules/block/div/paragraph-strip.ts +44 -0
  110. package/src/parser/rules/block/embed-block/content.ts +53 -0
  111. package/src/parser/rules/block/embed-block/index.ts +91 -0
  112. package/src/parser/rules/block/embed-block/open.ts +52 -0
  113. package/src/parser/rules/block/embed-block/tags.ts +5 -0
  114. package/src/parser/rules/block/footnoteblock/attributes.ts +73 -0
  115. package/src/parser/rules/block/footnoteblock/index.ts +82 -0
  116. package/src/parser/rules/block/footnoteblock/open.ts +53 -0
  117. package/src/parser/rules/block/heading/index.ts +87 -0
  118. package/src/parser/rules/block/heading/open.ts +50 -0
  119. package/src/parser/rules/block/heading/toc-text.ts +26 -0
  120. package/src/parser/rules/block/{horizontal-rule.ts → horizontal-rule/index.ts} +4 -21
  121. package/src/parser/rules/block/horizontal-rule/syntax.ts +21 -0
  122. package/src/parser/rules/block/html/body.ts +114 -0
  123. package/src/parser/rules/block/html/diagnostics.ts +11 -0
  124. package/src/parser/rules/block/html/index.ts +95 -0
  125. package/src/parser/rules/block/html/open.ts +36 -0
  126. package/src/parser/rules/block/iframe/attributes.ts +106 -0
  127. package/src/parser/rules/block/iframe/index.ts +73 -0
  128. package/src/parser/rules/block/iframe/open.ts +58 -0
  129. package/src/parser/rules/block/iframe/source.ts +24 -0
  130. package/src/parser/rules/block/iframe/url.ts +38 -0
  131. package/src/parser/rules/block/iftags/body.ts +48 -0
  132. package/src/parser/rules/block/iftags/condition.ts +24 -0
  133. package/src/parser/rules/block/{iftags.ts → iftags/index.ts} +16 -58
  134. package/src/parser/rules/block/include/arguments.ts +48 -0
  135. package/src/parser/rules/block/include/index.ts +75 -0
  136. package/src/parser/rules/block/include/location.ts +24 -0
  137. package/src/parser/rules/block/include/variables.ts +37 -0
  138. package/src/parser/rules/block/list/index.ts +73 -0
  139. package/src/parser/rules/block/list/line.ts +77 -0
  140. package/src/parser/rules/block/list/native.ts +89 -0
  141. package/src/parser/rules/block/math/content.ts +54 -0
  142. package/src/parser/rules/block/math/index.ts +106 -0
  143. package/src/parser/rules/block/math/name.ts +35 -0
  144. package/src/parser/rules/block/module/body.ts +92 -0
  145. package/src/parser/rules/block/module/element.ts +33 -0
  146. package/src/parser/rules/block/module/include/directive.ts +91 -0
  147. package/src/parser/rules/block/module/include/index.ts +11 -2
  148. package/src/parser/rules/block/module/include/references.ts +42 -0
  149. package/src/parser/rules/block/module/include/resolve/cache.ts +44 -0
  150. package/src/parser/rules/block/module/include/resolve/index.ts +106 -0
  151. package/src/parser/rules/block/module/include/resolve/iterate.ts +202 -0
  152. package/src/parser/rules/block/module/include/resolve/replace.ts +31 -0
  153. package/src/parser/rules/block/module/include/resolve/types.ts +105 -0
  154. package/src/parser/rules/block/module/include/scanner.ts +121 -0
  155. package/src/parser/rules/block/module/index.ts +14 -2
  156. package/src/parser/rules/block/module/listpages/compiler.ts +12 -392
  157. package/src/parser/rules/block/module/listpages/extract.ts +25 -359
  158. package/src/parser/rules/block/module/listpages/extraction/listpages.ts +42 -0
  159. package/src/parser/rules/block/module/listpages/extraction/listusers.ts +30 -0
  160. package/src/parser/rules/block/module/listpages/extraction/query.ts +51 -0
  161. package/src/parser/rules/block/module/listpages/extraction/result.ts +18 -0
  162. package/src/parser/rules/block/module/listpages/extraction/template.ts +96 -0
  163. package/src/parser/rules/block/module/listpages/extraction/variables.ts +58 -0
  164. package/src/parser/rules/block/module/listpages/normalization/date-selector.ts +53 -0
  165. package/src/parser/rules/block/module/listpages/normalization/numeric-selector.ts +32 -0
  166. package/src/parser/rules/block/module/listpages/normalization/order-parent.ts +82 -0
  167. package/src/parser/rules/block/module/listpages/normalization/selectors.ts +2 -0
  168. package/src/parser/rules/block/module/listpages/normalization/tags-category.ts +86 -0
  169. package/src/parser/rules/block/module/listpages/normalize.ts +8 -324
  170. package/src/parser/rules/block/module/listpages/resolution/items.ts +43 -0
  171. package/src/parser/rules/block/module/listpages/resolution/wrapper.ts +42 -0
  172. package/src/parser/rules/block/module/listpages/resolve.ts +5 -75
  173. package/src/parser/rules/block/module/listpages/template/format/content.ts +41 -0
  174. package/src/parser/rules/block/module/listpages/template/format/date.ts +116 -0
  175. package/src/parser/rules/block/module/listpages/template/format/index.ts +4 -0
  176. package/src/parser/rules/block/module/listpages/template/format/tags.ts +7 -0
  177. package/src/parser/rules/block/module/listpages/template/format/user.ts +9 -0
  178. package/src/parser/rules/block/module/listpages/template/getters/index.ts +36 -0
  179. package/src/parser/rules/block/module/listpages/template/getters/parameterized.ts +60 -0
  180. package/src/parser/rules/block/module/listpages/template/getters/simple.ts +65 -0
  181. package/src/parser/rules/block/module/listpages/template/getters/types.ts +3 -0
  182. package/src/parser/rules/block/module/listpages/template/syntax.ts +97 -0
  183. package/src/parser/rules/block/module/listpages/types/data-fetcher.ts +15 -0
  184. package/src/parser/rules/block/module/listpages/types/data-requirements.ts +52 -0
  185. package/src/parser/rules/block/module/listpages/types/external-data.ts +77 -0
  186. package/src/parser/rules/block/module/listpages/types/index.ts +17 -0
  187. package/src/parser/rules/block/module/listpages/types/normalized-query.ts +120 -0
  188. package/src/parser/rules/block/module/listpages/types/query.ts +67 -0
  189. package/src/parser/rules/block/module/listpages/types/template.ts +17 -0
  190. package/src/parser/rules/block/module/listpages/types/variables.ts +69 -0
  191. package/src/parser/rules/block/module/listpages/url-resolution/fields.ts +48 -0
  192. package/src/parser/rules/block/module/listpages/url-resolution/params.ts +30 -0
  193. package/src/parser/rules/block/module/listpages/url-resolution/query.ts +24 -0
  194. package/src/parser/rules/block/module/listpages/url-resolution/resolve.ts +62 -0
  195. package/src/parser/rules/block/module/listpages/url-resolution/value.ts +34 -0
  196. package/src/parser/rules/block/module/listpages/url-resolver.ts +3 -160
  197. package/src/parser/rules/block/module/listusers/compiler.ts +4 -25
  198. package/src/parser/rules/block/module/listusers/extract.ts +4 -9
  199. package/src/parser/rules/block/module/listusers/getters.ts +21 -0
  200. package/src/parser/rules/block/module/listusers/variables.ts +15 -0
  201. package/src/parser/rules/block/module/open.ts +57 -0
  202. package/src/parser/rules/block/module/resolution/contexts.ts +78 -0
  203. package/src/parser/rules/block/module/resolution/data-maps.ts +39 -0
  204. package/src/parser/rules/block/module/resolution/dynamic-modules.ts +93 -0
  205. package/src/parser/rules/block/module/resolution/styles.ts +53 -0
  206. package/src/parser/rules/block/module/resolution/walk-resolve.ts +107 -0
  207. package/src/parser/rules/block/module/resolve.ts +79 -292
  208. package/src/parser/rules/block/module/rule.ts +56 -0
  209. package/src/parser/rules/block/module/types-common.ts +11 -0
  210. package/src/parser/rules/block/module/walk/children.ts +35 -0
  211. package/src/parser/rules/block/module/walk/index.ts +9 -0
  212. package/src/parser/rules/block/module/walk/map/index.ts +2 -0
  213. package/src/parser/rules/block/module/walk/map/stateful-definition-list.ts +25 -0
  214. package/src/parser/rules/block/module/walk/map/stateful-list.ts +40 -0
  215. package/src/parser/rules/block/module/walk/map/stateful-table.ts +23 -0
  216. package/src/parser/rules/block/module/walk/map/stateful-tabs.ts +19 -0
  217. package/src/parser/rules/block/module/walk/map/stateful.ts +71 -0
  218. package/src/parser/rules/block/module/walk/map/stateless-definition-list.ts +12 -0
  219. package/src/parser/rules/block/module/walk/map/stateless-list.ts +29 -0
  220. package/src/parser/rules/block/module/walk/map/stateless-table.ts +11 -0
  221. package/src/parser/rules/block/module/walk/map/stateless-tabs.ts +5 -0
  222. package/src/parser/rules/block/module/walk/map/stateless.ts +51 -0
  223. package/src/parser/rules/block/module/walk/map/types.ts +6 -0
  224. package/src/parser/rules/block/module/walk/traverse.ts +65 -0
  225. package/src/parser/rules/block/orphan-li/content.ts +60 -0
  226. package/src/parser/rules/block/orphan-li/index.ts +75 -0
  227. package/src/parser/rules/block/orphan-li/open.ts +25 -0
  228. package/src/parser/rules/block/orphan-li/tags.ts +40 -0
  229. package/src/parser/rules/block/paragraph/content.ts +12 -0
  230. package/src/parser/rules/block/paragraph/index.ts +60 -0
  231. package/src/parser/rules/block/paragraph/normalize.ts +52 -0
  232. package/src/parser/rules/block/paragraph/span-markers.ts +52 -0
  233. package/src/parser/rules/block/parsing/attributes/index.ts +32 -0
  234. package/src/parser/rules/block/parsing/attributes/names.ts +93 -0
  235. package/src/parser/rules/block/parsing/attributes/scanner.ts +75 -0
  236. package/src/parser/rules/block/parsing/attributes/values.ts +26 -0
  237. package/src/parser/rules/block/parsing/block-item.ts +29 -0
  238. package/src/parser/rules/block/parsing/content.ts +127 -0
  239. package/src/parser/rules/block/parsing/end-condition.ts +51 -0
  240. package/src/parser/rules/block/parsing/inline-content.ts +105 -0
  241. package/src/parser/rules/block/parsing/inline-newline.ts +41 -0
  242. package/src/parser/rules/block/parsing/non-boundary.ts +24 -0
  243. package/src/parser/rules/block/parsing/rule-dispatch.ts +44 -0
  244. package/src/parser/rules/block/table/index.ts +80 -0
  245. package/src/parser/rules/block/table/pipe/cell-start.ts +69 -0
  246. package/src/parser/rules/block/table/pipe/cell.ts +106 -0
  247. package/src/parser/rules/block/table/pipe/index.ts +2 -0
  248. package/src/parser/rules/block/table/pipe/row.ts +88 -0
  249. package/src/parser/rules/block/table/pipe/tokens.ts +14 -0
  250. package/src/parser/rules/block/table/pipe/trim.ts +50 -0
  251. package/src/parser/rules/block/table-block/body.ts +79 -0
  252. package/src/parser/rules/block/table-block/cell-attributes.ts +33 -0
  253. package/src/parser/rules/block/table-block/cell-boundary.ts +99 -0
  254. package/src/parser/rules/block/table-block/cell-content/index.ts +88 -0
  255. package/src/parser/rules/block/table-block/cell-content/segments.ts +134 -0
  256. package/src/parser/rules/block/table-block/cell-newline.ts +47 -0
  257. package/src/parser/rules/block/table-block/cell.ts +64 -0
  258. package/src/parser/rules/block/table-block/index.ts +113 -0
  259. package/src/parser/rules/block/table-block/row-boundary.ts +75 -0
  260. package/src/parser/rules/block/table-block/structure.ts +80 -0
  261. package/src/parser/rules/block/tabview/body.ts +64 -0
  262. package/src/parser/rules/block/tabview/index.ts +90 -0
  263. package/src/parser/rules/block/tabview/open.ts +50 -0
  264. package/src/parser/rules/block/tabview/tab.ts +92 -0
  265. package/src/parser/rules/block/tabview/tags.ts +30 -0
  266. package/src/parser/rules/block/toc/element.ts +11 -0
  267. package/src/parser/rules/block/toc/index.ts +44 -0
  268. package/src/parser/rules/block/toc/open.ts +84 -0
  269. package/src/parser/rules/block/utils.ts +10 -610
  270. package/src/parser/rules/{utils.ts → common/attribute-safety.ts} +3 -49
  271. package/src/parser/rules/common/block-name.ts +33 -0
  272. package/src/parser/rules/common/index.ts +2 -0
  273. package/src/parser/rules/contracts/index.ts +3 -0
  274. package/src/parser/rules/contracts/parse-context.ts +38 -0
  275. package/src/parser/rules/contracts/rule.ts +43 -0
  276. package/src/parser/rules/contracts/scope.ts +31 -0
  277. package/src/parser/rules/inline/anchor/attributes.ts +54 -0
  278. package/src/parser/rules/inline/anchor/child.ts +26 -0
  279. package/src/parser/rules/inline/anchor/close.ts +34 -0
  280. package/src/parser/rules/inline/anchor/content.ts +59 -0
  281. package/src/parser/rules/inline/anchor/index.ts +103 -0
  282. package/src/parser/rules/inline/anchor/newline.ts +26 -0
  283. package/src/parser/rules/inline/anchor/open.ts +47 -0
  284. package/src/parser/rules/inline/anchor/paragraph-strip.ts +14 -0
  285. package/src/parser/rules/inline/anchor/syntax.ts +40 -0
  286. package/src/parser/rules/inline/anchor-name/index.ts +38 -0
  287. package/src/parser/rules/inline/anchor-name/name.ts +39 -0
  288. package/src/parser/rules/inline/anchor-name/syntax.ts +46 -0
  289. package/src/parser/rules/inline/bibcite/element.ts +14 -0
  290. package/src/parser/rules/inline/bibcite/index.ts +34 -0
  291. package/src/parser/rules/inline/bibcite/syntax.ts +64 -0
  292. package/src/parser/rules/inline/bold.ts +2 -39
  293. package/src/parser/rules/inline/color/index.ts +35 -0
  294. package/src/parser/rules/inline/color/syntax.ts +69 -0
  295. package/src/parser/rules/inline/comment/consume.ts +31 -0
  296. package/src/parser/rules/inline/{comment.ts → comment/index.ts} +10 -36
  297. package/src/parser/rules/inline/equation-ref/element.ts +8 -0
  298. package/src/parser/rules/inline/equation-ref/index.ts +34 -0
  299. package/src/parser/rules/inline/equation-ref/syntax.ts +45 -0
  300. package/src/parser/rules/inline/expr/branch.ts +104 -0
  301. package/src/parser/rules/inline/expr/conditional-branch.ts +27 -0
  302. package/src/parser/rules/inline/expr/conditional.ts +80 -0
  303. package/src/parser/rules/inline/expr/depth.ts +25 -0
  304. package/src/parser/rules/inline/expr/elements.ts +39 -0
  305. package/src/parser/rules/inline/expr/index.ts +84 -0
  306. package/src/parser/rules/inline/expr/syntax.ts +45 -0
  307. package/src/parser/rules/inline/footnote/child.ts +22 -0
  308. package/src/parser/rules/inline/footnote/close.ts +33 -0
  309. package/src/parser/rules/inline/footnote/content.ts +54 -0
  310. package/src/parser/rules/inline/footnote/elements.ts +38 -0
  311. package/src/parser/rules/inline/footnote/index.ts +54 -0
  312. package/src/parser/rules/inline/footnote/newline.ts +27 -0
  313. package/src/parser/rules/inline/footnote/open.ts +38 -0
  314. package/src/parser/rules/inline/formatting/container.ts +50 -0
  315. package/src/parser/rules/inline/{guillemet.ts → guillemet/index.ts} +5 -13
  316. package/src/parser/rules/inline/guillemet/text.ts +11 -0
  317. package/src/parser/rules/inline/html/gate.ts +64 -0
  318. package/src/parser/rules/inline/{html.ts → html/index.ts} +9 -60
  319. package/src/parser/rules/inline/html/open.ts +37 -0
  320. package/src/parser/rules/inline/image/attributes.ts +22 -0
  321. package/src/parser/rules/inline/image/body.ts +36 -0
  322. package/src/parser/rules/inline/image/index.ts +89 -0
  323. package/src/parser/rules/inline/image/open.ts +56 -0
  324. package/src/parser/rules/inline/image/source.ts +62 -0
  325. package/src/parser/rules/inline/image/syntax.ts +76 -0
  326. package/src/parser/rules/inline/italic.ts +2 -30
  327. package/src/parser/rules/inline/line-break/backslash.ts +58 -0
  328. package/src/parser/rules/inline/line-break/elements.ts +9 -0
  329. package/src/parser/rules/inline/line-break/index.ts +3 -0
  330. package/src/parser/rules/inline/line-break/newline.ts +82 -0
  331. package/src/parser/rules/inline/line-break/underscore.ts +45 -0
  332. package/src/parser/rules/inline/link-anchor.ts +6 -81
  333. package/src/parser/rules/inline/link-bracket/anchor.ts +3 -0
  334. package/src/parser/rules/inline/link-bracket/direct-url.ts +5 -0
  335. package/src/parser/rules/inline/link-bracket/parsed.ts +81 -0
  336. package/src/parser/rules/inline/link-bracket/parts.ts +64 -0
  337. package/src/parser/rules/inline/link-bracket/prefix.ts +15 -0
  338. package/src/parser/rules/inline/link-single.ts +7 -98
  339. package/src/parser/rules/inline/link-star.ts +7 -69
  340. package/src/parser/rules/inline/link-triple/fallback.ts +10 -0
  341. package/src/parser/rules/inline/link-triple/index.ts +62 -0
  342. package/src/parser/rules/inline/link-triple/interwiki.ts +11 -0
  343. package/src/parser/rules/inline/link-triple/label.ts +35 -0
  344. package/src/parser/rules/inline/link-triple/syntax.ts +72 -0
  345. package/src/parser/rules/inline/link-triple/target.ts +36 -0
  346. package/src/parser/rules/inline/math-inline/index.ts +40 -0
  347. package/src/parser/rules/inline/math-inline/syntax.ts +55 -0
  348. package/src/parser/rules/inline/monospace.ts +2 -30
  349. package/src/parser/rules/inline/parsing/block-boundary.ts +42 -0
  350. package/src/parser/rules/inline/parsing/block-start-predicates.ts +117 -0
  351. package/src/parser/rules/inline/parsing/collect.ts +23 -0
  352. package/src/parser/rules/inline/parsing/inline-content.ts +115 -0
  353. package/src/parser/rules/inline/parsing/paragraph-boundary.ts +47 -0
  354. package/src/parser/rules/inline/parsing/plain-text.ts +69 -0
  355. package/src/parser/rules/inline/parsing/preserved-line-break.ts +11 -0
  356. package/src/parser/rules/inline/parsing/rules.ts +34 -0
  357. package/src/parser/rules/inline/parsing/simple-token.ts +26 -0
  358. package/src/parser/rules/inline/raw/angle.ts +40 -0
  359. package/src/parser/rules/inline/raw/double-at.ts +78 -0
  360. package/src/parser/rules/inline/raw/index.ts +26 -0
  361. package/src/parser/rules/inline/raw/result.ts +26 -0
  362. package/src/parser/rules/inline/size/content.ts +65 -0
  363. package/src/parser/rules/inline/size/index.ts +55 -0
  364. package/src/parser/rules/inline/size/open.ts +43 -0
  365. package/src/parser/rules/inline/size/value.ts +45 -0
  366. package/src/parser/rules/inline/span/content.ts +97 -0
  367. package/src/parser/rules/inline/span/elements.ts +108 -0
  368. package/src/parser/rules/inline/span/index.ts +79 -0
  369. package/src/parser/rules/inline/span/newline.ts +50 -0
  370. package/src/parser/rules/inline/span/syntax.ts +70 -0
  371. package/src/parser/rules/inline/{strikethrough.ts → strikethrough/index.ts} +5 -60
  372. package/src/parser/rules/inline/strikethrough/parse.ts +14 -0
  373. package/src/parser/rules/inline/strikethrough/syntax.ts +24 -0
  374. package/src/parser/rules/inline/subscript.ts +2 -39
  375. package/src/parser/rules/inline/superscript.ts +4 -39
  376. package/src/parser/rules/inline/text/element.ts +5 -0
  377. package/src/parser/rules/inline/{text.ts → text/index.ts} +5 -4
  378. package/src/parser/rules/inline/underline/child.ts +26 -0
  379. package/src/parser/rules/inline/underline/content.ts +29 -0
  380. package/src/parser/rules/inline/{underline.ts → underline/index.ts} +6 -49
  381. package/src/parser/rules/inline/user/element.ts +11 -0
  382. package/src/parser/rules/inline/user/index.ts +34 -0
  383. package/src/parser/rules/inline/user/syntax.ts +67 -0
  384. package/src/parser/rules/inline/utils.ts +4 -344
  385. package/src/parser/rules/tokens.ts +106 -0
  386. package/src/parser/rules/types.ts +9 -252
  387. package/src/parser/depth.ts +0 -251
  388. package/src/parser/parse.ts +0 -315
  389. package/src/parser/postprocess/spanStrip.ts +0 -697
  390. package/src/parser/preprocess/expr.ts +0 -265
  391. package/src/parser/preprocess/utils.ts +0 -250
  392. package/src/parser/preprocess/whitespace.ts +0 -111
  393. package/src/parser/rules/block/align.ts +0 -282
  394. package/src/parser/rules/block/bibliography.ts +0 -359
  395. package/src/parser/rules/block/block-list.ts +0 -689
  396. package/src/parser/rules/block/blockquote.ts +0 -238
  397. package/src/parser/rules/block/code.ts +0 -187
  398. package/src/parser/rules/block/collapsible.ts +0 -337
  399. package/src/parser/rules/block/definition-list.ts +0 -270
  400. package/src/parser/rules/block/div.ts +0 -400
  401. package/src/parser/rules/block/embed-block.ts +0 -153
  402. package/src/parser/rules/block/footnoteblock.ts +0 -200
  403. package/src/parser/rules/block/heading.ts +0 -142
  404. package/src/parser/rules/block/html.ts +0 -222
  405. package/src/parser/rules/block/iframe.ts +0 -239
  406. package/src/parser/rules/block/include.ts +0 -179
  407. package/src/parser/rules/block/list.ts +0 -244
  408. package/src/parser/rules/block/math.ts +0 -183
  409. package/src/parser/rules/block/module/include/resolve.ts +0 -556
  410. package/src/parser/rules/block/module/listpages/types.ts +0 -513
  411. package/src/parser/rules/block/module/walk.ts +0 -380
  412. package/src/parser/rules/block/module.ts +0 -164
  413. package/src/parser/rules/block/orphan-li.ts +0 -177
  414. package/src/parser/rules/block/paragraph.ts +0 -157
  415. package/src/parser/rules/block/table-block.ts +0 -726
  416. package/src/parser/rules/block/table.ts +0 -441
  417. package/src/parser/rules/block/tabview.ts +0 -331
  418. package/src/parser/rules/block/toc.ts +0 -129
  419. package/src/parser/rules/inline/anchor-name.ts +0 -154
  420. package/src/parser/rules/inline/anchor.ts +0 -327
  421. package/src/parser/rules/inline/bibcite.ts +0 -153
  422. package/src/parser/rules/inline/color.ts +0 -140
  423. package/src/parser/rules/inline/equation-ref.ts +0 -115
  424. package/src/parser/rules/inline/expr.ts +0 -526
  425. package/src/parser/rules/inline/footnote.ts +0 -223
  426. package/src/parser/rules/inline/image.ts +0 -328
  427. package/src/parser/rules/inline/line-break.ts +0 -326
  428. package/src/parser/rules/inline/link-triple.ts +0 -267
  429. package/src/parser/rules/inline/math-inline.ts +0 -126
  430. package/src/parser/rules/inline/raw.ts +0 -262
  431. package/src/parser/rules/inline/size.ts +0 -244
  432. package/src/parser/rules/inline/span.ts +0 -424
  433. package/src/parser/rules/inline/user.ts +0 -147
@@ -1,31 +1,25 @@
1
- import { createPoint, createPosition } from "@wdprlib/ast";
2
- import { createToken, type Token, type TokenType } from "./tokens";
3
-
4
- /**
5
- * Configuration for the {@link Lexer}.
6
- *
7
- * @group Lexer
8
- */
9
- export interface LexerOptions {
10
- /**
11
- * When `true` (default), every token carries accurate line/column/offset
12
- * data. Set to `false` to skip position tracking for faster tokenisation
13
- * when source-map information is not needed.
14
- */
15
- trackPositions?: boolean;
16
- }
17
-
18
- /**
19
- * Internal mutable state carried through a single tokenisation pass.
20
- */
21
- interface LexerState {
22
- source: string;
23
- pos: number;
24
- line: number;
25
- column: number;
26
- lineStart: boolean;
27
- tokens: Token[];
28
- }
1
+ import type { Token, TokenType } from "./tokens";
2
+ import type { LexerOptions } from "./options";
3
+ import {
4
+ createLexerToken,
5
+ nextBlockOpenerDepth,
6
+ updateLastNonWhitespaceType,
7
+ } from "./token-factory";
8
+ import {
9
+ advance,
10
+ advanceByToken,
11
+ createInitialLexerState,
12
+ current,
13
+ isAtEnd,
14
+ type LexerState,
15
+ } from "./state";
16
+ import { findInvalidAnchorNameEnd } from "./anchor";
17
+ import { scanQuotedString } from "./quoted-string";
18
+ import { scanSimpleSyntaxToken } from "./syntax-actions";
19
+ import type { TokenAction } from "./token-actions";
20
+ import { scanPunctuationToken } from "./punctuation";
21
+ import { scanCompactTextToken, scanTextToken } from "./text-actions";
22
+ import { scanSpacingToken } from "./spacing-actions";
29
23
 
30
24
  /**
31
25
  * Converts a Wikidot markup source string into a flat array of {@link Token}s.
@@ -45,6 +39,7 @@ export class Lexer {
45
39
  private options: Required<LexerOptions>;
46
40
  // Positions where ]] should be split into ] + ] (for invalid anchor names)
47
41
  private splitBlockClosePositions: Set<number> = new Set();
42
+ private lastNonWhitespaceType: TokenType | null = null;
48
43
  /**
49
44
  * Nesting depth of block-opener context (between `[[` / `[[/` and the
50
45
  * matching `]]`). Used to scope `QUOTED_STRING` recognition so that
@@ -58,15 +53,9 @@ export class Lexer {
58
53
  constructor(source: string, options: LexerOptions = {}) {
59
54
  this.options = {
60
55
  trackPositions: options.trackPositions ?? true,
56
+ compactTextRuns: options.compactTextRuns ?? false,
61
57
  };
62
- this.state = {
63
- source,
64
- pos: 0,
65
- line: 1,
66
- column: 1,
67
- lineStart: true,
68
- tokens: [],
69
- };
58
+ this.state = createInitialLexerState(source);
70
59
  }
71
60
 
72
61
  /**
@@ -85,14 +74,14 @@ export class Lexer {
85
74
  * Check if at end of source
86
75
  */
87
76
  private isAtEnd(): boolean {
88
- return this.state.pos >= this.state.source.length;
77
+ return isAtEnd(this.state);
89
78
  }
90
79
 
91
80
  /**
92
81
  * Get current character
93
82
  */
94
83
  private current(): string {
95
- return this.state.source[this.state.pos] ?? "";
84
+ return current(this.state);
96
85
  }
97
86
 
98
87
  /**
@@ -103,126 +92,45 @@ export class Lexer {
103
92
  * emit tokens that allow the inner [# text] to be parsed as a described link.
104
93
  */
105
94
  private findInvalidAnchorNameEnd(): number | null {
106
- const src = this.state.source;
107
- const pos = this.state.pos;
108
-
109
- // Must start with [[#
110
- if (src[pos] !== "[" || src[pos + 1] !== "[" || src[pos + 2] !== "#") {
111
- return null;
112
- }
113
-
114
- // Must have space after #
115
- if (src[pos + 3] !== " ") {
116
- return null;
117
- }
118
-
119
- // Skip spaces after #
120
- let i = pos + 4;
121
- while (i < src.length && src[i] === " ") {
122
- i++;
123
- }
124
-
125
- // Scan for invalid characters
126
- let foundInvalid = false;
127
- while (i < src.length) {
128
- const ch = src[i]!;
129
- if (ch === "\n") return null;
130
- if (ch === "]" && src[i + 1] === "]") {
131
- // Reached ]] - if we found invalid chars, this is an invalid anchor name
132
- return foundInvalid ? i : null;
133
- }
134
- const code = ch.charCodeAt(0);
135
- const isValid =
136
- (code >= 48 && code <= 57) || // 0-9
137
- (code >= 65 && code <= 90) || // A-Z
138
- (code >= 97 && code <= 122) || // a-z
139
- code === 45 || // -
140
- code === 95 || // _
141
- code === 46 || // .
142
- code === 37; // %
143
- if (!isValid) {
144
- foundInvalid = true;
145
- }
146
- i++;
147
- }
148
-
149
- return null;
150
- }
151
-
152
- /**
153
- * Check if source matches pattern at current position
154
- */
155
- private match(pattern: string): boolean {
156
- for (let i = 0; i < pattern.length; i++) {
157
- if (this.state.source[this.state.pos + i] !== pattern[i]) {
158
- return false;
159
- }
160
- }
161
- return true;
95
+ return findInvalidAnchorNameEnd(this.state.source, this.state.pos);
162
96
  }
163
97
 
164
98
  /**
165
99
  * Advance position by n characters
166
100
  */
167
101
  private advance(n = 1): string {
168
- let result = "";
169
- for (let i = 0; i < n && !this.isAtEnd(); i++) {
170
- const char = this.current();
171
- result += char;
172
- this.state.pos++;
173
-
174
- if (char === "\n") {
175
- this.state.line++;
176
- this.state.column = 1;
177
- this.state.lineStart = true;
178
- } else {
179
- this.state.column++;
180
- if (char !== " " && char !== "\t") {
181
- this.state.lineStart = false;
182
- }
183
- }
184
- }
185
- return result;
102
+ return advance(this.state, n);
186
103
  }
187
104
 
188
105
  /**
189
106
  * Returns the type of the last non-whitespace token, or null if none.
190
107
  */
191
108
  private lastNonWhitespaceTokenType(): TokenType | null {
192
- for (let i = this.state.tokens.length - 1; i >= 0; i--) {
193
- const t = this.state.tokens[i]!;
194
- if (t.type !== "WHITESPACE") return t.type;
195
- }
196
- return null;
109
+ return this.lastNonWhitespaceType;
197
110
  }
198
111
 
199
112
  /**
200
113
  * Add token
201
114
  */
202
115
  private addToken(type: TokenType, value: string): void {
203
- const startPos = createPoint(
204
- this.state.line,
205
- this.state.column - value.length,
206
- this.state.pos - value.length,
207
- );
208
- const endPos = createPoint(this.state.line, this.state.column, this.state.pos);
209
- const position = this.options.trackPositions
210
- ? createPosition(startPos, endPos)
211
- : createPosition(createPoint(0, 0, 0), createPoint(0, 0, 0));
212
-
213
- const lineStart =
214
- this.state.tokens.length === 0 ||
215
- this.state.tokens[this.state.tokens.length - 1]?.type === "NEWLINE";
116
+ this.state.tokens.push(createLexerToken(this.state, type, value, this.options.trackPositions));
117
+ this.lastNonWhitespaceType = updateLastNonWhitespaceType(this.lastNonWhitespaceType, type);
118
+ this.blockOpenerDepth = nextBlockOpenerDepth(this.blockOpenerDepth, type);
119
+ }
216
120
 
217
- this.state.tokens.push(createToken(type, value, position, lineStart));
121
+ private emitTokenAction(action: TokenAction): void {
122
+ advanceByToken(this.state, action.type, action.length);
123
+ this.addToken(action.type, action.value);
124
+ }
218
125
 
219
- // Track block-opener nesting so `"` after `=` is only recognised as a
220
- // quoted attribute value while we are actually inside `[[ ... ]]`.
221
- if (type === "BLOCK_OPEN" || type === "BLOCK_END_OPEN") {
222
- this.blockOpenerDepth++;
223
- } else if (type === "BLOCK_CLOSE" && this.blockOpenerDepth > 0) {
224
- this.blockOpenerDepth--;
126
+ private emitTokenActions(actions: TokenAction | TokenAction[]): void {
127
+ if (Array.isArray(actions)) {
128
+ for (const action of actions) {
129
+ this.emitTokenAction(action);
130
+ }
131
+ return;
225
132
  }
133
+ this.emitTokenAction(actions);
226
134
  }
227
135
 
228
136
  /**
@@ -231,343 +139,30 @@ export class Lexer {
231
139
  private scanToken(): void {
232
140
  const char = this.current();
233
141
  const isLineStart = this.state.lineStart;
142
+ const src = this.state.source;
234
143
 
235
- // Newline
236
- if (char === "\n") {
237
- this.advance();
238
- this.addToken("NEWLINE", "\n");
239
- return;
240
- }
241
-
242
- // Whitespace (non-newline)
243
- if (char === " " || char === "\t") {
244
- let ws = "";
245
- while (!this.isAtEnd() && (this.current() === " " || this.current() === "\t")) {
246
- ws += this.advance();
247
- }
248
- this.addToken("WHITESPACE", ws);
249
- return;
250
- }
251
-
252
- // Comment open [!-- (must check before [[[)
253
- if (this.match("[!--")) {
254
- this.advance(4);
255
- this.addToken("COMMENT_OPEN", "[!--");
256
- return;
257
- }
258
-
259
- // Link open [[[ (must check before [[)
260
- if (this.match("[[[")) {
261
- this.advance(3);
262
- this.addToken("LINK_OPEN", "[[[");
263
- return;
264
- }
265
-
266
- // Block end open [[/
267
- if (this.match("[[/")) {
268
- this.advance(3);
269
- this.addToken("BLOCK_END_OPEN", "[[/");
270
- return;
271
- }
272
-
273
- // Block open [[
274
- if (this.match("[[")) {
275
- // Check for invalid anchor name pattern: [[# name-with-spaces]]
276
- // Wikidot's Anchor regex requires [-_A-Za-z0-9.%] only after [[# .
277
- // If [[# is followed by invalid anchor name, decompose into
278
- // TEXT "[" so the inner [# text] is parsed as a described anchor link.
279
- // The closing ]] will also be split: ] (BRACKET_CLOSE) + ] (TEXT).
280
- const invalidEnd = this.findInvalidAnchorNameEnd();
281
- if (invalidEnd !== null) {
282
- this.splitBlockClosePositions.add(invalidEnd);
283
- this.advance(1);
284
- this.addToken("TEXT", "[");
285
- return;
286
- }
287
- this.advance(2);
288
- this.addToken("BLOCK_OPEN", "[[");
289
- return;
290
- }
291
-
292
- // Link close ]]] (must check before ]])
293
- if (this.match("]]]")) {
294
- this.advance(3);
295
- this.addToken("LINK_CLOSE", "]]]");
296
- return;
297
- }
298
-
299
- // Block close ]]
300
- if (this.match("]]")) {
301
- // For invalid anchor names, split ]] into ] (BRACKET_CLOSE) + ] (TEXT)
302
- if (this.splitBlockClosePositions.has(this.state.pos)) {
303
- this.splitBlockClosePositions.delete(this.state.pos);
304
- this.advance(1);
305
- this.addToken("BRACKET_CLOSE", "]");
306
- this.advance(1);
307
- this.addToken("TEXT", "]");
308
- return;
309
- }
310
- this.advance(2);
311
- this.addToken("BLOCK_CLOSE", "]]");
312
- return;
313
- }
314
-
315
- // Raw/escape @@
316
- if (this.match("@@")) {
317
- this.advance(2);
318
- this.addToken("RAW_OPEN", "@@");
319
- return;
320
- }
321
-
322
- // Raw block @<
323
- if (this.match("@<")) {
324
- this.advance(2);
325
- this.addToken("RAW_BLOCK_OPEN", "@<");
326
- return;
327
- }
328
-
329
- // Raw block close >@
330
- if (this.match(">@")) {
331
- this.advance(2);
332
- this.addToken("RAW_BLOCK_CLOSE", ">@");
333
- return;
334
- }
335
-
336
- // Monospace open {{
337
- if (this.match("{{")) {
338
- this.advance(2);
339
- this.addToken("MONO_MARKER", "{{");
340
- return;
341
- }
342
-
343
- // Monospace close }}
344
- if (this.match("}}")) {
345
- this.advance(2);
346
- this.addToken("MONO_CLOSE", "}}");
347
- return;
348
- }
349
-
350
- // Bold **
351
- if (this.match("**")) {
352
- this.advance(2);
353
- this.addToken("BOLD_MARKER", "**");
354
- return;
355
- }
356
-
357
- // Horizontal rule ---- or more (4+ hyphens, check before --)
358
- if (isLineStart && this.match("----")) {
359
- let dashes = "";
360
- while (this.current() === "-") {
361
- dashes += this.advance();
362
- }
363
- this.addToken("HR_MARKER", dashes);
364
- return;
365
- }
366
-
367
- // Comment close --] (must check before --)
368
- if (this.match("--]")) {
369
- this.advance(3);
370
- this.addToken("COMMENT_CLOSE", "--]");
371
- return;
372
- }
373
-
374
- // Strikethrough -- (Wikidot only uses --)
375
- if (this.match("--")) {
376
- this.advance(2);
377
- this.addToken("STRIKE_MARKER", "--");
378
- return;
379
- }
380
-
381
- // Left double angle << (guillemet)
382
- if (this.match("<<")) {
383
- this.advance(2);
384
- this.addToken("LEFT_DOUBLE_ANGLE", "<<");
385
- return;
386
- }
387
-
388
- // Clear float ~~~~ or more (at line start only, Wikidot requires 4+)
389
- if (isLineStart && this.match("~~~~")) {
390
- let tildes = "";
391
- while (this.current() === "~") {
392
- tildes += this.advance();
393
- }
394
- // Check for directional clear float
395
- if (this.current() === "<") {
396
- this.advance();
397
- this.addToken("CLEAR_FLOAT_LEFT", `${tildes}<`);
398
- return;
399
- }
400
- if (this.current() === ">") {
401
- this.advance();
402
- this.addToken("CLEAR_FLOAT_RIGHT", `${tildes}>`);
403
- return;
404
- }
405
- this.addToken("CLEAR_FLOAT", `${tildes}`);
406
- return;
407
- }
408
-
409
- // Single hyphen (not part of --)
410
- if (char === "-") {
411
- this.advance();
412
- this.addToken("TEXT", "-");
413
- return;
414
- }
415
-
416
- // Underline __ (check before single _)
417
- if (this.match("__")) {
418
- this.advance(2);
419
- this.addToken("UNDERLINE_MARKER", "__");
420
- return;
421
- }
422
-
423
- // Single underscore _ (for line break)
424
- if (char === "_") {
425
- this.advance();
426
- this.addToken("UNDERSCORE", "_");
427
- return;
428
- }
429
-
430
- // Superscript ^^
431
- if (this.match("^^")) {
432
- this.advance(2);
433
- this.addToken("SUPER_MARKER", "^^");
434
- return;
435
- }
436
-
437
- // Subscript ,,
438
- if (this.match(",,")) {
439
- this.advance(2);
440
- this.addToken("SUB_MARKER", ",,");
441
- return;
442
- }
443
-
444
- // Italic //
445
- if (this.match("//")) {
446
- this.advance(2);
447
- this.addToken("ITALIC_MARKER", "//");
448
- return;
449
- }
450
-
451
- // Table markers
452
- // ||~ (header), ||< (left), ||= (center), ||> (right), || (normal)
453
- if (this.match("||~")) {
454
- this.advance(3);
455
- this.addToken("TABLE_HEADER", "||~");
456
- return;
457
- }
458
- if (this.match("||<")) {
459
- this.advance(3);
460
- this.addToken("TABLE_LEFT", "||<");
461
- return;
462
- }
463
- if (this.match("||=")) {
464
- this.advance(3);
465
- this.addToken("TABLE_CENTER", "||=");
466
- return;
467
- }
468
- if (this.match("||>")) {
469
- this.advance(3);
470
- this.addToken("TABLE_RIGHT", "||>");
471
- return;
472
- }
473
- if (this.match("||")) {
474
- this.advance(2);
475
- this.addToken("TABLE_MARKER", "||");
476
- return;
477
- }
478
-
479
- // Heading + (at line start)
480
- if (isLineStart && char === "+") {
481
- let plusCount = 0;
482
- while (this.current() === "+") {
483
- plusCount++;
484
- this.advance();
485
- }
486
- this.addToken("HEADING_MARKER", "+".repeat(plusCount));
487
- return;
488
- }
489
-
490
- // List bullet * (at line start)
491
- if (isLineStart && char === "*") {
492
- this.advance();
493
- this.addToken("LIST_BULLET", "*");
494
- return;
495
- }
496
-
497
- // Color marker ## (check before LIST_NUMBER)
498
- if (this.match("##")) {
499
- this.advance(2);
500
- this.addToken("COLOR_MARKER", "##");
501
- return;
502
- }
503
-
504
- // List number # (at line start)
505
- if (isLineStart && char === "#") {
506
- this.advance();
507
- this.addToken("LIST_NUMBER", "#");
144
+ const spacingAction = scanSpacingToken(src, this.state.pos);
145
+ if (spacingAction) {
146
+ this.emitTokenAction(spacingAction);
508
147
  return;
509
148
  }
510
149
 
511
- // Blockquote > or >>> (at line start only for blockquote)
512
- if (char === ">") {
513
- if (isLineStart) {
514
- // At line start: consume all consecutive > as a single blockquote marker
515
- let depth = "";
516
- while (this.current() === ">") {
517
- depth += this.advance();
518
- }
519
- this.addToken("BLOCKQUOTE_MARKER", depth);
520
- return;
150
+ const punctuation = scanPunctuationToken({
151
+ char,
152
+ source: src,
153
+ pos: this.state.pos,
154
+ lineStart: isLineStart,
155
+ splitBlockClose: this.splitBlockClosePositions.has(this.state.pos),
156
+ findInvalidAnchorNameEnd: () => this.findInvalidAnchorNameEnd(),
157
+ });
158
+ if (punctuation.handled) {
159
+ if (punctuation.clearSplitBlockCloseAt !== undefined) {
160
+ this.splitBlockClosePositions.delete(punctuation.clearSplitBlockCloseAt);
521
161
  }
522
- // Not at line start
523
- if (this.match(">>")) {
524
- // >> not at line start - guillemet
525
- this.advance(2);
526
- this.addToken("RIGHT_DOUBLE_ANGLE", ">>");
527
- return;
162
+ if (punctuation.splitBlockCloseAt !== undefined) {
163
+ this.splitBlockClosePositions.add(punctuation.splitBlockCloseAt);
528
164
  }
529
- // Single > not at line start - just text
530
- this.advance();
531
- this.addToken("TEXT", ">");
532
- return;
533
- }
534
-
535
- // Bracket anchor [#
536
- if (this.match("[#")) {
537
- this.advance(2);
538
- this.addToken("BRACKET_ANCHOR", "[#");
539
- return;
540
- }
541
-
542
- // Bracket star [* (for new tab links)
543
- if (this.match("[*")) {
544
- this.advance(2);
545
- this.addToken("BRACKET_STAR", "[*");
546
- return;
547
- }
548
-
549
- // Single characters
550
- if (char === "[") {
551
- this.advance();
552
- this.addToken("BRACKET_OPEN", "[");
553
- return;
554
- }
555
-
556
- if (char === "]") {
557
- this.advance();
558
- this.addToken("BRACKET_CLOSE", "]");
559
- return;
560
- }
561
-
562
- if (char === "|") {
563
- this.advance();
564
- this.addToken("PIPE", "|");
565
- return;
566
- }
567
-
568
- if (char === "=") {
569
- this.advance();
570
- this.addToken("EQUALS", "=");
165
+ this.emitTokenActions(punctuation.actions);
571
166
  return;
572
167
  }
573
168
 
@@ -579,14 +174,7 @@ export class Lexer {
579
174
  if (char === '"') {
580
175
  const lastNonWs = this.lastNonWhitespaceTokenType();
581
176
  if (this.blockOpenerDepth > 0 && lastNonWs === "EQUALS") {
582
- let quoted = this.advance(); // opening "
583
- while (!this.isAtEnd() && this.current() !== '"' && this.current() !== "\n") {
584
- quoted += this.advance();
585
- }
586
- if (this.current() === '"') {
587
- quoted += this.advance(); // closing "
588
- }
589
- this.addToken("QUOTED_STRING", quoted);
177
+ this.addToken("QUOTED_STRING", scanQuotedString(this.state));
590
178
  return;
591
179
  }
592
180
  this.advance();
@@ -594,94 +182,20 @@ export class Lexer {
594
182
  return;
595
183
  }
596
184
 
597
- if (char === ":") {
598
- this.advance();
599
- this.addToken("COLON", ":");
600
- return;
601
- }
602
-
603
- if (char === "/") {
604
- this.advance();
605
- this.addToken("SLASH", "/");
606
- return;
607
- }
608
-
609
- if (char === "*") {
610
- this.advance();
611
- this.addToken("STAR", "*");
612
- return;
613
- }
614
-
615
- if (char === "#") {
616
- this.advance();
617
- this.addToken("HASH", "#");
185
+ const simpleAction = scanSimpleSyntaxToken(src, this.state.pos, isLineStart);
186
+ if (simpleAction) {
187
+ this.emitTokenAction(simpleAction);
618
188
  return;
619
189
  }
620
190
 
621
- if (char === "@") {
622
- this.advance();
623
- this.addToken("AT", "@");
624
- return;
625
- }
626
-
627
- if (char === "&") {
628
- this.advance();
629
- this.addToken("AMPERSAND", "&");
630
- return;
631
- }
632
-
633
- if (char === "\\") {
634
- this.advance();
635
- this.addToken("BACKSLASH", "\\");
636
- return;
637
- }
638
-
639
- // Backslash line break marker (U+E000, inserted by preproc)
640
- if (char.charCodeAt(0) === 0xe000) {
641
- this.advance();
642
- this.addToken("BACKSLASH_BREAK", char);
643
- return;
644
- }
645
-
646
- // Identifier: alphanumeric sequence
647
- if (this.isAlphanumeric(char)) {
648
- let ident = "";
649
- while (!this.isAtEnd() && this.isAlphanumeric(this.current())) {
650
- ident += this.advance();
191
+ if (this.options.compactTextRuns && this.blockOpenerDepth === 0) {
192
+ const compactTextAction = scanCompactTextToken(src, this.state.pos);
193
+ if (compactTextAction) {
194
+ this.emitTokenAction(compactTextAction);
195
+ return;
651
196
  }
652
- this.addToken("IDENTIFIER", ident);
653
- return;
654
197
  }
655
198
 
656
- // Default: single character as text
657
- const text = this.advance();
658
- this.addToken("TEXT", text);
199
+ this.emitTokenAction(scanTextToken(src, this.state.pos));
659
200
  }
660
-
661
- /**
662
- * Check if character is alphanumeric (for identifier tokens)
663
- */
664
- private isAlphanumeric(char: string): boolean {
665
- const code = char.charCodeAt(0);
666
- return (
667
- (code >= 48 && code <= 57) || // 0-9
668
- (code >= 65 && code <= 90) || // A-Z
669
- (code >= 97 && code <= 122) // a-z
670
- );
671
- }
672
- }
673
-
674
- /**
675
- * Tokenise a Wikidot markup source string in one call.
676
- *
677
- * Shorthand for `new Lexer(source, options).tokenize()`.
678
- *
679
- * @param source - Raw Wikidot markup
680
- * @param options - Optional lexer configuration
681
- * @returns A flat array of tokens, ending with an `EOF` token
682
- *
683
- * @group Lexer
684
- */
685
- export function tokenize(source: string, options?: LexerOptions): Token[] {
686
- return new Lexer(source, options).tokenize();
687
201
  }