@wdprlib/parser 3.2.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (433) hide show
  1. package/dist/index.cjs +10471 -8406
  2. package/dist/index.d.cts +313 -337
  3. package/dist/index.d.ts +313 -337
  4. package/dist/index.js +10457 -8392
  5. package/package.json +1 -1
  6. package/src/index.ts +7 -0
  7. package/src/lexer/anchor.ts +48 -0
  8. package/src/lexer/index.ts +3 -2
  9. package/src/lexer/lexer.ts +73 -559
  10. package/src/lexer/options.ts +19 -0
  11. package/src/lexer/punctuation.ts +70 -0
  12. package/src/lexer/quoted-string.ts +16 -0
  13. package/src/lexer/runs.ts +85 -0
  14. package/src/lexer/spacing-actions.ts +24 -0
  15. package/src/lexer/state.ts +103 -0
  16. package/src/lexer/syntax-actions.ts +80 -0
  17. package/src/lexer/text-actions.ts +41 -0
  18. package/src/lexer/token-actions.ts +136 -0
  19. package/src/lexer/token-factory.ts +62 -0
  20. package/src/lexer/tokenize.ts +18 -0
  21. package/src/parser/constants.ts +2 -0
  22. package/src/parser/depth/index.ts +111 -0
  23. package/src/parser/depth/stack.ts +82 -0
  24. package/src/parser/parse/block.ts +42 -0
  25. package/src/parser/parse/context.ts +26 -0
  26. package/src/parser/parse/footnotes.ts +25 -0
  27. package/src/parser/parse/index.ts +42 -0
  28. package/src/parser/parse/options.ts +34 -0
  29. package/src/parser/parse/parser.ts +79 -0
  30. package/src/parser/parse/plain-non-ascii.ts +129 -0
  31. package/src/parser/parse/result.ts +57 -0
  32. package/src/parser/parse/source.ts +11 -0
  33. package/src/parser/postprocess/divAdjacentParagraph.ts +1 -1
  34. package/src/parser/postprocess/spanStrip/clean-element.ts +168 -0
  35. package/src/parser/postprocess/spanStrip/cleanup.ts +25 -0
  36. package/src/parser/postprocess/spanStrip/empty-spans.ts +36 -0
  37. package/src/parser/postprocess/spanStrip/escaped.ts +78 -0
  38. package/src/parser/postprocess/spanStrip/factory.ts +23 -0
  39. package/src/parser/postprocess/spanStrip/index.ts +8 -0
  40. package/src/parser/postprocess/spanStrip/merge.ts +117 -0
  41. package/src/parser/postprocess/spanStrip/predicates.ts +59 -0
  42. package/src/parser/postprocess/spanStrip/split.ts +67 -0
  43. package/src/parser/preprocess/expr/chars.ts +15 -0
  44. package/src/parser/preprocess/expr/evaluate.ts +22 -0
  45. package/src/parser/preprocess/expr/index.ts +45 -0
  46. package/src/parser/preprocess/expr/kind.ts +19 -0
  47. package/src/parser/preprocess/expr/parse.ts +103 -0
  48. package/src/parser/preprocess/expr/scan.ts +34 -0
  49. package/src/parser/preprocess/expr/types.ts +14 -0
  50. package/src/parser/preprocess/typography.ts +70 -5
  51. package/src/parser/preprocess/utils/bracket-depths.ts +98 -0
  52. package/src/parser/preprocess/utils/index.ts +13 -0
  53. package/src/parser/preprocess/utils/raw-regions.ts +153 -0
  54. package/src/parser/preprocess/whitespace/detection.ts +39 -0
  55. package/src/parser/preprocess/whitespace/index.ts +79 -0
  56. package/src/parser/preprocess/whitespace/leading-spaces.ts +11 -0
  57. package/src/parser/preprocess/whitespace/patterns.ts +23 -0
  58. package/src/parser/rules/block/align/body.ts +46 -0
  59. package/src/parser/rules/block/align/element.ts +13 -0
  60. package/src/parser/rules/block/align/index.ts +90 -0
  61. package/src/parser/rules/block/align/syntax.ts +113 -0
  62. package/src/parser/rules/block/bibliography/body.ts +81 -0
  63. package/src/parser/rules/block/bibliography/entries.ts +49 -0
  64. package/src/parser/rules/block/bibliography/entry-content.ts +73 -0
  65. package/src/parser/rules/block/bibliography/entry-key.ts +83 -0
  66. package/src/parser/rules/block/bibliography/index.ts +90 -0
  67. package/src/parser/rules/block/bibliography/open.ts +53 -0
  68. package/src/parser/rules/block/block-list/bare-content.ts +105 -0
  69. package/src/parser/rules/block/block-list/bare-paragraph.ts +60 -0
  70. package/src/parser/rules/block/block-list/index.ts +51 -0
  71. package/src/parser/rules/block/block-list/item-content.ts +132 -0
  72. package/src/parser/rules/block/block-list/li-content.ts +107 -0
  73. package/src/parser/rules/block/block-list/li-item.ts +77 -0
  74. package/src/parser/rules/block/block-list/list-block.ts +100 -0
  75. package/src/parser/rules/block/block-list/open.ts +51 -0
  76. package/src/parser/rules/block/block-list/tags.ts +50 -0
  77. package/src/parser/rules/block/blockquote/build.ts +62 -0
  78. package/src/parser/rules/block/blockquote/index.ts +80 -0
  79. package/src/parser/rules/block/blockquote/line.ts +79 -0
  80. package/src/parser/rules/block/blockquote/lines.ts +39 -0
  81. package/src/parser/rules/block/{center.ts → center/index.ts} +7 -22
  82. package/src/parser/rules/block/center/open.ts +27 -0
  83. package/src/parser/rules/block/{clear-float.ts → clear-float/index.ts} +6 -30
  84. package/src/parser/rules/block/clear-float/syntax.ts +43 -0
  85. package/src/parser/rules/block/code/attributes.ts +30 -0
  86. package/src/parser/rules/block/code/content.ts +57 -0
  87. package/src/parser/rules/block/code/index.ts +100 -0
  88. package/src/parser/rules/block/collapsible/attributes.ts +95 -0
  89. package/src/parser/rules/block/collapsible/body.ts +69 -0
  90. package/src/parser/rules/block/collapsible/index.ts +117 -0
  91. package/src/parser/rules/block/collapsible/open.ts +51 -0
  92. package/src/parser/rules/block/collapsible/orphans.ts +31 -0
  93. package/src/parser/rules/block/collapsible/tags.ts +17 -0
  94. package/src/parser/rules/block/comment/consume.ts +37 -0
  95. package/src/parser/rules/block/{comment.ts → comment/index.ts} +12 -38
  96. package/src/parser/rules/block/{content-separator.ts → content-separator/index.ts} +5 -35
  97. package/src/parser/rules/block/content-separator/syntax.ts +33 -0
  98. package/src/parser/rules/block/definition-list/collect.ts +40 -0
  99. package/src/parser/rules/block/definition-list/index.ts +63 -0
  100. package/src/parser/rules/block/definition-list/item-key.ts +95 -0
  101. package/src/parser/rules/block/definition-list/item-value.ts +56 -0
  102. package/src/parser/rules/block/definition-list/items.ts +54 -0
  103. package/src/parser/rules/block/div/body.ts +41 -0
  104. package/src/parser/rules/block/div/close.ts +41 -0
  105. package/src/parser/rules/block/div/failed.ts +117 -0
  106. package/src/parser/rules/block/div/index.ts +112 -0
  107. package/src/parser/rules/block/div/nesting.ts +37 -0
  108. package/src/parser/rules/block/div/open.ts +59 -0
  109. package/src/parser/rules/block/div/paragraph-strip.ts +44 -0
  110. package/src/parser/rules/block/embed-block/content.ts +53 -0
  111. package/src/parser/rules/block/embed-block/index.ts +91 -0
  112. package/src/parser/rules/block/embed-block/open.ts +52 -0
  113. package/src/parser/rules/block/embed-block/tags.ts +5 -0
  114. package/src/parser/rules/block/footnoteblock/attributes.ts +73 -0
  115. package/src/parser/rules/block/footnoteblock/index.ts +82 -0
  116. package/src/parser/rules/block/footnoteblock/open.ts +53 -0
  117. package/src/parser/rules/block/heading/index.ts +87 -0
  118. package/src/parser/rules/block/heading/open.ts +50 -0
  119. package/src/parser/rules/block/heading/toc-text.ts +26 -0
  120. package/src/parser/rules/block/{horizontal-rule.ts → horizontal-rule/index.ts} +4 -21
  121. package/src/parser/rules/block/horizontal-rule/syntax.ts +21 -0
  122. package/src/parser/rules/block/html/body.ts +114 -0
  123. package/src/parser/rules/block/html/diagnostics.ts +11 -0
  124. package/src/parser/rules/block/html/index.ts +95 -0
  125. package/src/parser/rules/block/html/open.ts +36 -0
  126. package/src/parser/rules/block/iframe/attributes.ts +106 -0
  127. package/src/parser/rules/block/iframe/index.ts +73 -0
  128. package/src/parser/rules/block/iframe/open.ts +58 -0
  129. package/src/parser/rules/block/iframe/source.ts +24 -0
  130. package/src/parser/rules/block/iframe/url.ts +38 -0
  131. package/src/parser/rules/block/iftags/body.ts +48 -0
  132. package/src/parser/rules/block/iftags/condition.ts +24 -0
  133. package/src/parser/rules/block/{iftags.ts → iftags/index.ts} +16 -58
  134. package/src/parser/rules/block/include/arguments.ts +48 -0
  135. package/src/parser/rules/block/include/index.ts +75 -0
  136. package/src/parser/rules/block/include/location.ts +24 -0
  137. package/src/parser/rules/block/include/variables.ts +37 -0
  138. package/src/parser/rules/block/list/index.ts +73 -0
  139. package/src/parser/rules/block/list/line.ts +77 -0
  140. package/src/parser/rules/block/list/native.ts +89 -0
  141. package/src/parser/rules/block/math/content.ts +54 -0
  142. package/src/parser/rules/block/math/index.ts +106 -0
  143. package/src/parser/rules/block/math/name.ts +35 -0
  144. package/src/parser/rules/block/module/body.ts +92 -0
  145. package/src/parser/rules/block/module/element.ts +33 -0
  146. package/src/parser/rules/block/module/include/directive.ts +91 -0
  147. package/src/parser/rules/block/module/include/index.ts +11 -2
  148. package/src/parser/rules/block/module/include/references.ts +42 -0
  149. package/src/parser/rules/block/module/include/resolve/cache.ts +44 -0
  150. package/src/parser/rules/block/module/include/resolve/index.ts +106 -0
  151. package/src/parser/rules/block/module/include/resolve/iterate.ts +202 -0
  152. package/src/parser/rules/block/module/include/resolve/replace.ts +31 -0
  153. package/src/parser/rules/block/module/include/resolve/types.ts +105 -0
  154. package/src/parser/rules/block/module/include/scanner.ts +121 -0
  155. package/src/parser/rules/block/module/index.ts +14 -2
  156. package/src/parser/rules/block/module/listpages/compiler.ts +12 -392
  157. package/src/parser/rules/block/module/listpages/extract.ts +25 -359
  158. package/src/parser/rules/block/module/listpages/extraction/listpages.ts +42 -0
  159. package/src/parser/rules/block/module/listpages/extraction/listusers.ts +30 -0
  160. package/src/parser/rules/block/module/listpages/extraction/query.ts +51 -0
  161. package/src/parser/rules/block/module/listpages/extraction/result.ts +18 -0
  162. package/src/parser/rules/block/module/listpages/extraction/template.ts +96 -0
  163. package/src/parser/rules/block/module/listpages/extraction/variables.ts +58 -0
  164. package/src/parser/rules/block/module/listpages/normalization/date-selector.ts +53 -0
  165. package/src/parser/rules/block/module/listpages/normalization/numeric-selector.ts +32 -0
  166. package/src/parser/rules/block/module/listpages/normalization/order-parent.ts +82 -0
  167. package/src/parser/rules/block/module/listpages/normalization/selectors.ts +2 -0
  168. package/src/parser/rules/block/module/listpages/normalization/tags-category.ts +86 -0
  169. package/src/parser/rules/block/module/listpages/normalize.ts +8 -324
  170. package/src/parser/rules/block/module/listpages/resolution/items.ts +43 -0
  171. package/src/parser/rules/block/module/listpages/resolution/wrapper.ts +42 -0
  172. package/src/parser/rules/block/module/listpages/resolve.ts +5 -75
  173. package/src/parser/rules/block/module/listpages/template/format/content.ts +41 -0
  174. package/src/parser/rules/block/module/listpages/template/format/date.ts +116 -0
  175. package/src/parser/rules/block/module/listpages/template/format/index.ts +4 -0
  176. package/src/parser/rules/block/module/listpages/template/format/tags.ts +7 -0
  177. package/src/parser/rules/block/module/listpages/template/format/user.ts +9 -0
  178. package/src/parser/rules/block/module/listpages/template/getters/index.ts +36 -0
  179. package/src/parser/rules/block/module/listpages/template/getters/parameterized.ts +60 -0
  180. package/src/parser/rules/block/module/listpages/template/getters/simple.ts +65 -0
  181. package/src/parser/rules/block/module/listpages/template/getters/types.ts +3 -0
  182. package/src/parser/rules/block/module/listpages/template/syntax.ts +97 -0
  183. package/src/parser/rules/block/module/listpages/types/data-fetcher.ts +15 -0
  184. package/src/parser/rules/block/module/listpages/types/data-requirements.ts +52 -0
  185. package/src/parser/rules/block/module/listpages/types/external-data.ts +77 -0
  186. package/src/parser/rules/block/module/listpages/types/index.ts +17 -0
  187. package/src/parser/rules/block/module/listpages/types/normalized-query.ts +120 -0
  188. package/src/parser/rules/block/module/listpages/types/query.ts +67 -0
  189. package/src/parser/rules/block/module/listpages/types/template.ts +17 -0
  190. package/src/parser/rules/block/module/listpages/types/variables.ts +69 -0
  191. package/src/parser/rules/block/module/listpages/url-resolution/fields.ts +48 -0
  192. package/src/parser/rules/block/module/listpages/url-resolution/params.ts +30 -0
  193. package/src/parser/rules/block/module/listpages/url-resolution/query.ts +24 -0
  194. package/src/parser/rules/block/module/listpages/url-resolution/resolve.ts +62 -0
  195. package/src/parser/rules/block/module/listpages/url-resolution/value.ts +34 -0
  196. package/src/parser/rules/block/module/listpages/url-resolver.ts +3 -160
  197. package/src/parser/rules/block/module/listusers/compiler.ts +4 -25
  198. package/src/parser/rules/block/module/listusers/extract.ts +4 -9
  199. package/src/parser/rules/block/module/listusers/getters.ts +21 -0
  200. package/src/parser/rules/block/module/listusers/variables.ts +15 -0
  201. package/src/parser/rules/block/module/open.ts +57 -0
  202. package/src/parser/rules/block/module/resolution/contexts.ts +78 -0
  203. package/src/parser/rules/block/module/resolution/data-maps.ts +39 -0
  204. package/src/parser/rules/block/module/resolution/dynamic-modules.ts +93 -0
  205. package/src/parser/rules/block/module/resolution/styles.ts +53 -0
  206. package/src/parser/rules/block/module/resolution/walk-resolve.ts +107 -0
  207. package/src/parser/rules/block/module/resolve.ts +79 -292
  208. package/src/parser/rules/block/module/rule.ts +56 -0
  209. package/src/parser/rules/block/module/types-common.ts +11 -0
  210. package/src/parser/rules/block/module/walk/children.ts +35 -0
  211. package/src/parser/rules/block/module/walk/index.ts +9 -0
  212. package/src/parser/rules/block/module/walk/map/index.ts +2 -0
  213. package/src/parser/rules/block/module/walk/map/stateful-definition-list.ts +25 -0
  214. package/src/parser/rules/block/module/walk/map/stateful-list.ts +40 -0
  215. package/src/parser/rules/block/module/walk/map/stateful-table.ts +23 -0
  216. package/src/parser/rules/block/module/walk/map/stateful-tabs.ts +19 -0
  217. package/src/parser/rules/block/module/walk/map/stateful.ts +71 -0
  218. package/src/parser/rules/block/module/walk/map/stateless-definition-list.ts +12 -0
  219. package/src/parser/rules/block/module/walk/map/stateless-list.ts +29 -0
  220. package/src/parser/rules/block/module/walk/map/stateless-table.ts +11 -0
  221. package/src/parser/rules/block/module/walk/map/stateless-tabs.ts +5 -0
  222. package/src/parser/rules/block/module/walk/map/stateless.ts +51 -0
  223. package/src/parser/rules/block/module/walk/map/types.ts +6 -0
  224. package/src/parser/rules/block/module/walk/traverse.ts +65 -0
  225. package/src/parser/rules/block/orphan-li/content.ts +60 -0
  226. package/src/parser/rules/block/orphan-li/index.ts +75 -0
  227. package/src/parser/rules/block/orphan-li/open.ts +25 -0
  228. package/src/parser/rules/block/orphan-li/tags.ts +40 -0
  229. package/src/parser/rules/block/paragraph/content.ts +12 -0
  230. package/src/parser/rules/block/paragraph/index.ts +60 -0
  231. package/src/parser/rules/block/paragraph/normalize.ts +52 -0
  232. package/src/parser/rules/block/paragraph/span-markers.ts +52 -0
  233. package/src/parser/rules/block/parsing/attributes/index.ts +32 -0
  234. package/src/parser/rules/block/parsing/attributes/names.ts +93 -0
  235. package/src/parser/rules/block/parsing/attributes/scanner.ts +75 -0
  236. package/src/parser/rules/block/parsing/attributes/values.ts +26 -0
  237. package/src/parser/rules/block/parsing/block-item.ts +29 -0
  238. package/src/parser/rules/block/parsing/content.ts +127 -0
  239. package/src/parser/rules/block/parsing/end-condition.ts +51 -0
  240. package/src/parser/rules/block/parsing/inline-content.ts +105 -0
  241. package/src/parser/rules/block/parsing/inline-newline.ts +41 -0
  242. package/src/parser/rules/block/parsing/non-boundary.ts +24 -0
  243. package/src/parser/rules/block/parsing/rule-dispatch.ts +44 -0
  244. package/src/parser/rules/block/table/index.ts +80 -0
  245. package/src/parser/rules/block/table/pipe/cell-start.ts +69 -0
  246. package/src/parser/rules/block/table/pipe/cell.ts +106 -0
  247. package/src/parser/rules/block/table/pipe/index.ts +2 -0
  248. package/src/parser/rules/block/table/pipe/row.ts +88 -0
  249. package/src/parser/rules/block/table/pipe/tokens.ts +14 -0
  250. package/src/parser/rules/block/table/pipe/trim.ts +50 -0
  251. package/src/parser/rules/block/table-block/body.ts +79 -0
  252. package/src/parser/rules/block/table-block/cell-attributes.ts +33 -0
  253. package/src/parser/rules/block/table-block/cell-boundary.ts +99 -0
  254. package/src/parser/rules/block/table-block/cell-content/index.ts +88 -0
  255. package/src/parser/rules/block/table-block/cell-content/segments.ts +134 -0
  256. package/src/parser/rules/block/table-block/cell-newline.ts +47 -0
  257. package/src/parser/rules/block/table-block/cell.ts +64 -0
  258. package/src/parser/rules/block/table-block/index.ts +113 -0
  259. package/src/parser/rules/block/table-block/row-boundary.ts +75 -0
  260. package/src/parser/rules/block/table-block/structure.ts +80 -0
  261. package/src/parser/rules/block/tabview/body.ts +64 -0
  262. package/src/parser/rules/block/tabview/index.ts +90 -0
  263. package/src/parser/rules/block/tabview/open.ts +50 -0
  264. package/src/parser/rules/block/tabview/tab.ts +92 -0
  265. package/src/parser/rules/block/tabview/tags.ts +30 -0
  266. package/src/parser/rules/block/toc/element.ts +11 -0
  267. package/src/parser/rules/block/toc/index.ts +44 -0
  268. package/src/parser/rules/block/toc/open.ts +84 -0
  269. package/src/parser/rules/block/utils.ts +10 -610
  270. package/src/parser/rules/{utils.ts → common/attribute-safety.ts} +3 -49
  271. package/src/parser/rules/common/block-name.ts +33 -0
  272. package/src/parser/rules/common/index.ts +2 -0
  273. package/src/parser/rules/contracts/index.ts +3 -0
  274. package/src/parser/rules/contracts/parse-context.ts +38 -0
  275. package/src/parser/rules/contracts/rule.ts +43 -0
  276. package/src/parser/rules/contracts/scope.ts +31 -0
  277. package/src/parser/rules/inline/anchor/attributes.ts +54 -0
  278. package/src/parser/rules/inline/anchor/child.ts +26 -0
  279. package/src/parser/rules/inline/anchor/close.ts +34 -0
  280. package/src/parser/rules/inline/anchor/content.ts +59 -0
  281. package/src/parser/rules/inline/anchor/index.ts +103 -0
  282. package/src/parser/rules/inline/anchor/newline.ts +26 -0
  283. package/src/parser/rules/inline/anchor/open.ts +47 -0
  284. package/src/parser/rules/inline/anchor/paragraph-strip.ts +14 -0
  285. package/src/parser/rules/inline/anchor/syntax.ts +40 -0
  286. package/src/parser/rules/inline/anchor-name/index.ts +38 -0
  287. package/src/parser/rules/inline/anchor-name/name.ts +39 -0
  288. package/src/parser/rules/inline/anchor-name/syntax.ts +46 -0
  289. package/src/parser/rules/inline/bibcite/element.ts +14 -0
  290. package/src/parser/rules/inline/bibcite/index.ts +34 -0
  291. package/src/parser/rules/inline/bibcite/syntax.ts +64 -0
  292. package/src/parser/rules/inline/bold.ts +2 -39
  293. package/src/parser/rules/inline/color/index.ts +35 -0
  294. package/src/parser/rules/inline/color/syntax.ts +69 -0
  295. package/src/parser/rules/inline/comment/consume.ts +31 -0
  296. package/src/parser/rules/inline/{comment.ts → comment/index.ts} +10 -36
  297. package/src/parser/rules/inline/equation-ref/element.ts +8 -0
  298. package/src/parser/rules/inline/equation-ref/index.ts +34 -0
  299. package/src/parser/rules/inline/equation-ref/syntax.ts +45 -0
  300. package/src/parser/rules/inline/expr/branch.ts +104 -0
  301. package/src/parser/rules/inline/expr/conditional-branch.ts +27 -0
  302. package/src/parser/rules/inline/expr/conditional.ts +80 -0
  303. package/src/parser/rules/inline/expr/depth.ts +25 -0
  304. package/src/parser/rules/inline/expr/elements.ts +39 -0
  305. package/src/parser/rules/inline/expr/index.ts +84 -0
  306. package/src/parser/rules/inline/expr/syntax.ts +45 -0
  307. package/src/parser/rules/inline/footnote/child.ts +22 -0
  308. package/src/parser/rules/inline/footnote/close.ts +33 -0
  309. package/src/parser/rules/inline/footnote/content.ts +54 -0
  310. package/src/parser/rules/inline/footnote/elements.ts +38 -0
  311. package/src/parser/rules/inline/footnote/index.ts +54 -0
  312. package/src/parser/rules/inline/footnote/newline.ts +27 -0
  313. package/src/parser/rules/inline/footnote/open.ts +38 -0
  314. package/src/parser/rules/inline/formatting/container.ts +50 -0
  315. package/src/parser/rules/inline/{guillemet.ts → guillemet/index.ts} +5 -13
  316. package/src/parser/rules/inline/guillemet/text.ts +11 -0
  317. package/src/parser/rules/inline/html/gate.ts +64 -0
  318. package/src/parser/rules/inline/{html.ts → html/index.ts} +9 -60
  319. package/src/parser/rules/inline/html/open.ts +37 -0
  320. package/src/parser/rules/inline/image/attributes.ts +22 -0
  321. package/src/parser/rules/inline/image/body.ts +36 -0
  322. package/src/parser/rules/inline/image/index.ts +89 -0
  323. package/src/parser/rules/inline/image/open.ts +56 -0
  324. package/src/parser/rules/inline/image/source.ts +62 -0
  325. package/src/parser/rules/inline/image/syntax.ts +76 -0
  326. package/src/parser/rules/inline/italic.ts +2 -30
  327. package/src/parser/rules/inline/line-break/backslash.ts +58 -0
  328. package/src/parser/rules/inline/line-break/elements.ts +9 -0
  329. package/src/parser/rules/inline/line-break/index.ts +3 -0
  330. package/src/parser/rules/inline/line-break/newline.ts +82 -0
  331. package/src/parser/rules/inline/line-break/underscore.ts +45 -0
  332. package/src/parser/rules/inline/link-anchor.ts +6 -81
  333. package/src/parser/rules/inline/link-bracket/anchor.ts +3 -0
  334. package/src/parser/rules/inline/link-bracket/direct-url.ts +5 -0
  335. package/src/parser/rules/inline/link-bracket/parsed.ts +81 -0
  336. package/src/parser/rules/inline/link-bracket/parts.ts +64 -0
  337. package/src/parser/rules/inline/link-bracket/prefix.ts +15 -0
  338. package/src/parser/rules/inline/link-single.ts +7 -98
  339. package/src/parser/rules/inline/link-star.ts +7 -69
  340. package/src/parser/rules/inline/link-triple/fallback.ts +10 -0
  341. package/src/parser/rules/inline/link-triple/index.ts +62 -0
  342. package/src/parser/rules/inline/link-triple/interwiki.ts +11 -0
  343. package/src/parser/rules/inline/link-triple/label.ts +35 -0
  344. package/src/parser/rules/inline/link-triple/syntax.ts +72 -0
  345. package/src/parser/rules/inline/link-triple/target.ts +36 -0
  346. package/src/parser/rules/inline/math-inline/index.ts +40 -0
  347. package/src/parser/rules/inline/math-inline/syntax.ts +55 -0
  348. package/src/parser/rules/inline/monospace.ts +2 -30
  349. package/src/parser/rules/inline/parsing/block-boundary.ts +42 -0
  350. package/src/parser/rules/inline/parsing/block-start-predicates.ts +117 -0
  351. package/src/parser/rules/inline/parsing/collect.ts +23 -0
  352. package/src/parser/rules/inline/parsing/inline-content.ts +115 -0
  353. package/src/parser/rules/inline/parsing/paragraph-boundary.ts +47 -0
  354. package/src/parser/rules/inline/parsing/plain-text.ts +69 -0
  355. package/src/parser/rules/inline/parsing/preserved-line-break.ts +11 -0
  356. package/src/parser/rules/inline/parsing/rules.ts +34 -0
  357. package/src/parser/rules/inline/parsing/simple-token.ts +26 -0
  358. package/src/parser/rules/inline/raw/angle.ts +40 -0
  359. package/src/parser/rules/inline/raw/double-at.ts +78 -0
  360. package/src/parser/rules/inline/raw/index.ts +26 -0
  361. package/src/parser/rules/inline/raw/result.ts +26 -0
  362. package/src/parser/rules/inline/size/content.ts +65 -0
  363. package/src/parser/rules/inline/size/index.ts +55 -0
  364. package/src/parser/rules/inline/size/open.ts +43 -0
  365. package/src/parser/rules/inline/size/value.ts +45 -0
  366. package/src/parser/rules/inline/span/content.ts +97 -0
  367. package/src/parser/rules/inline/span/elements.ts +108 -0
  368. package/src/parser/rules/inline/span/index.ts +79 -0
  369. package/src/parser/rules/inline/span/newline.ts +50 -0
  370. package/src/parser/rules/inline/span/syntax.ts +70 -0
  371. package/src/parser/rules/inline/{strikethrough.ts → strikethrough/index.ts} +5 -60
  372. package/src/parser/rules/inline/strikethrough/parse.ts +14 -0
  373. package/src/parser/rules/inline/strikethrough/syntax.ts +24 -0
  374. package/src/parser/rules/inline/subscript.ts +2 -39
  375. package/src/parser/rules/inline/superscript.ts +4 -39
  376. package/src/parser/rules/inline/text/element.ts +5 -0
  377. package/src/parser/rules/inline/{text.ts → text/index.ts} +5 -4
  378. package/src/parser/rules/inline/underline/child.ts +26 -0
  379. package/src/parser/rules/inline/underline/content.ts +29 -0
  380. package/src/parser/rules/inline/{underline.ts → underline/index.ts} +6 -49
  381. package/src/parser/rules/inline/user/element.ts +11 -0
  382. package/src/parser/rules/inline/user/index.ts +34 -0
  383. package/src/parser/rules/inline/user/syntax.ts +67 -0
  384. package/src/parser/rules/inline/utils.ts +4 -344
  385. package/src/parser/rules/tokens.ts +106 -0
  386. package/src/parser/rules/types.ts +9 -252
  387. package/src/parser/depth.ts +0 -251
  388. package/src/parser/parse.ts +0 -315
  389. package/src/parser/postprocess/spanStrip.ts +0 -697
  390. package/src/parser/preprocess/expr.ts +0 -265
  391. package/src/parser/preprocess/utils.ts +0 -250
  392. package/src/parser/preprocess/whitespace.ts +0 -111
  393. package/src/parser/rules/block/align.ts +0 -282
  394. package/src/parser/rules/block/bibliography.ts +0 -359
  395. package/src/parser/rules/block/block-list.ts +0 -689
  396. package/src/parser/rules/block/blockquote.ts +0 -238
  397. package/src/parser/rules/block/code.ts +0 -187
  398. package/src/parser/rules/block/collapsible.ts +0 -337
  399. package/src/parser/rules/block/definition-list.ts +0 -270
  400. package/src/parser/rules/block/div.ts +0 -400
  401. package/src/parser/rules/block/embed-block.ts +0 -153
  402. package/src/parser/rules/block/footnoteblock.ts +0 -200
  403. package/src/parser/rules/block/heading.ts +0 -142
  404. package/src/parser/rules/block/html.ts +0 -222
  405. package/src/parser/rules/block/iframe.ts +0 -239
  406. package/src/parser/rules/block/include.ts +0 -179
  407. package/src/parser/rules/block/list.ts +0 -244
  408. package/src/parser/rules/block/math.ts +0 -183
  409. package/src/parser/rules/block/module/include/resolve.ts +0 -556
  410. package/src/parser/rules/block/module/listpages/types.ts +0 -513
  411. package/src/parser/rules/block/module/walk.ts +0 -380
  412. package/src/parser/rules/block/module.ts +0 -164
  413. package/src/parser/rules/block/orphan-li.ts +0 -177
  414. package/src/parser/rules/block/paragraph.ts +0 -157
  415. package/src/parser/rules/block/table-block.ts +0 -726
  416. package/src/parser/rules/block/table.ts +0 -441
  417. package/src/parser/rules/block/tabview.ts +0 -331
  418. package/src/parser/rules/block/toc.ts +0 -129
  419. package/src/parser/rules/inline/anchor-name.ts +0 -154
  420. package/src/parser/rules/inline/anchor.ts +0 -327
  421. package/src/parser/rules/inline/bibcite.ts +0 -153
  422. package/src/parser/rules/inline/color.ts +0 -140
  423. package/src/parser/rules/inline/equation-ref.ts +0 -115
  424. package/src/parser/rules/inline/expr.ts +0 -526
  425. package/src/parser/rules/inline/footnote.ts +0 -223
  426. package/src/parser/rules/inline/image.ts +0 -328
  427. package/src/parser/rules/inline/line-break.ts +0 -326
  428. package/src/parser/rules/inline/link-triple.ts +0 -267
  429. package/src/parser/rules/inline/math-inline.ts +0 -126
  430. package/src/parser/rules/inline/raw.ts +0 -262
  431. package/src/parser/rules/inline/size.ts +0 -244
  432. package/src/parser/rules/inline/span.ts +0 -424
  433. package/src/parser/rules/inline/user.ts +0 -147
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Configuration for the {@link Lexer}.
3
+ *
4
+ * @group Lexer
5
+ */
6
+ export interface LexerOptions {
7
+ /**
8
+ * When `true` (default), every token carries accurate line/column/offset
9
+ * data. Set to `false` to skip position tracking for faster tokenisation
10
+ * when source-map information is not needed.
11
+ */
12
+ trackPositions?: boolean;
13
+ /**
14
+ * Coalesce ordinary text outside `[[...]]` openers into larger TEXT tokens.
15
+ * This keeps block names and attributes tokenized normally while reducing
16
+ * token volume for large documents.
17
+ */
18
+ compactTextRuns?: boolean;
19
+ }
@@ -0,0 +1,70 @@
1
+ import {
2
+ scanAtToken,
3
+ scanClosingBracketToken,
4
+ scanDashToken,
5
+ scanGreaterToken,
6
+ scanOpeningBracketToken,
7
+ scanPipeToken,
8
+ scanTildeToken,
9
+ type TokenAction,
10
+ } from "./token-actions";
11
+
12
+ export interface PunctuationScanInput {
13
+ char: string;
14
+ source: string;
15
+ pos: number;
16
+ lineStart: boolean;
17
+ splitBlockClose: boolean;
18
+ findInvalidAnchorNameEnd: () => number | null;
19
+ }
20
+
21
+ export type PunctuationScanResult =
22
+ | { handled: false }
23
+ | {
24
+ handled: true;
25
+ actions: TokenAction | TokenAction[];
26
+ splitBlockCloseAt?: number;
27
+ clearSplitBlockCloseAt?: number;
28
+ };
29
+
30
+ export function scanPunctuationToken(input: PunctuationScanInput): PunctuationScanResult {
31
+ const { char, source, pos, lineStart } = input;
32
+
33
+ switch (char) {
34
+ case "[": {
35
+ const action = scanOpeningBracketToken(source, pos, input.findInvalidAnchorNameEnd());
36
+ return {
37
+ handled: true,
38
+ actions: action,
39
+ splitBlockCloseAt: action.splitBlockCloseAt,
40
+ };
41
+ }
42
+
43
+ case "]":
44
+ return {
45
+ handled: true,
46
+ actions: scanClosingBracketToken(source, pos, input.splitBlockClose),
47
+ clearSplitBlockCloseAt: input.splitBlockClose ? pos : undefined,
48
+ };
49
+
50
+ case "@":
51
+ return { handled: true, actions: scanAtToken(source, pos) };
52
+
53
+ case ">":
54
+ return { handled: true, actions: scanGreaterToken(source, pos, lineStart) };
55
+
56
+ case "-":
57
+ return { handled: true, actions: scanDashToken(source, pos, lineStart) };
58
+
59
+ case "~": {
60
+ const action = scanTildeToken(source, pos, lineStart);
61
+ return action ? { handled: true, actions: action } : { handled: false };
62
+ }
63
+
64
+ case "|":
65
+ return { handled: true, actions: scanPipeToken(source, pos) };
66
+
67
+ default:
68
+ return { handled: false };
69
+ }
70
+ }
@@ -0,0 +1,16 @@
1
+ import { advance, current, isAtEnd, type LexerState } from "./state";
2
+
3
+ /**
4
+ * Scan a quoted block-attribute value, including the opening quote and optional
5
+ * closing quote. Newline terminates the token without being consumed.
6
+ */
7
+ export function scanQuotedString(state: LexerState): string {
8
+ let quoted = advance(state);
9
+ while (!isAtEnd(state) && current(state) !== '"' && current(state) !== "\n") {
10
+ quoted += advance(state);
11
+ }
12
+ if (current(state) === '"') {
13
+ quoted += advance(state);
14
+ }
15
+ return quoted;
16
+ }
@@ -0,0 +1,85 @@
1
+ const MIN_PLAIN_TEXT_RUN_LENGTH = 32;
2
+
3
+ export function findWhitespaceRunEnd(src: string, pos: number): number {
4
+ let end = pos + 1;
5
+ while (end < src.length && (src[end] === " " || src[end] === "\t")) {
6
+ end++;
7
+ }
8
+ return end;
9
+ }
10
+
11
+ export function findRepeatedCharRunEnd(src: string, pos: number, char: string): number {
12
+ let end = pos + 1;
13
+ while (end < src.length && src[end] === char) {
14
+ end++;
15
+ }
16
+ return end;
17
+ }
18
+
19
+ export function findLongPlainTextRunEnd(src: string, pos: number): number | null {
20
+ let end = pos;
21
+ while (end < src.length) {
22
+ const code = src.charCodeAt(end);
23
+ if (code <= 0x7f || code === 0xe000) {
24
+ break;
25
+ }
26
+ end++;
27
+ }
28
+
29
+ return end - pos >= MIN_PLAIN_TEXT_RUN_LENGTH ? end : null;
30
+ }
31
+
32
+ export function findAsciiIdentifierEnd(src: string, pos: number): number {
33
+ let end = pos + 1;
34
+ while (end < src.length && isAsciiAlphanumericCode(src.charCodeAt(end))) {
35
+ end++;
36
+ }
37
+ return end;
38
+ }
39
+
40
+ export function isAsciiAlphanumericCode(code: number): boolean {
41
+ return (code >= 48 && code <= 57) || (code >= 65 && code <= 90) || (code >= 97 && code <= 122);
42
+ }
43
+
44
+ export function findCompactPlainTextRunEnd(src: string, pos: number): number {
45
+ let end = pos;
46
+ while (end < src.length) {
47
+ const code = src.charCodeAt(end);
48
+ if (isCompactPlainTextBoundary(code)) {
49
+ break;
50
+ }
51
+ end++;
52
+ }
53
+ return end;
54
+ }
55
+
56
+ function isCompactPlainTextBoundary(code: number): boolean {
57
+ switch (code) {
58
+ case 0x0a: // \n
59
+ case 0xe000: // preprocessed backslash break marker
60
+ case 0x5b: // [
61
+ case 0x5d: // ]
62
+ case 0x40: // @
63
+ case 0x3e: // >
64
+ case 0x2d: // -
65
+ case 0x7e: // ~
66
+ case 0x7c: // |
67
+ case 0x7b: // {
68
+ case 0x7d: // }
69
+ case 0x2a: // *
70
+ case 0x3c: // <
71
+ case 0x5f: // _
72
+ case 0x5e: // ^
73
+ case 0x2c: // ,
74
+ case 0x2f: // /
75
+ case 0x2b: // +
76
+ case 0x23: // #
77
+ case 0x3d: // =
78
+ case 0x3a: // :
79
+ case 0x26: // &
80
+ case 0x5c: // \
81
+ return true;
82
+ default:
83
+ return false;
84
+ }
85
+ }
@@ -0,0 +1,24 @@
1
+ import { findWhitespaceRunEnd } from "./runs";
2
+ import type { TokenAction } from "./token-actions";
3
+
4
+ export function scanSpacingToken(src: string, pos: number): TokenAction | null {
5
+ const char = src[pos];
6
+
7
+ if (char === "\n") {
8
+ return token("NEWLINE", "\n");
9
+ }
10
+
11
+ if (char === " " || char === "\t") {
12
+ return runToken(src, pos, findWhitespaceRunEnd(src, pos), "WHITESPACE");
13
+ }
14
+
15
+ return null;
16
+ }
17
+
18
+ function token(type: TokenAction["type"], value: string): TokenAction {
19
+ return { type, value, length: value.length };
20
+ }
21
+
22
+ function runToken(src: string, pos: number, end: number, type: TokenAction["type"]): TokenAction {
23
+ return { type, value: src.slice(pos, end), length: end - pos };
24
+ }
@@ -0,0 +1,103 @@
1
+ import type { Token } from "./tokens";
2
+ import type { TokenType } from "./tokens";
3
+
4
+ /**
5
+ * Internal mutable state carried through a single tokenisation pass.
6
+ */
7
+ export interface LexerState {
8
+ source: string;
9
+ pos: number;
10
+ line: number;
11
+ column: number;
12
+ lineStart: boolean;
13
+ tokens: Token[];
14
+ }
15
+
16
+ export function createInitialLexerState(source: string): LexerState {
17
+ return {
18
+ source,
19
+ pos: 0,
20
+ line: 1,
21
+ column: 1,
22
+ lineStart: true,
23
+ tokens: [],
24
+ };
25
+ }
26
+
27
+ export function isAtEnd(state: LexerState): boolean {
28
+ return state.pos >= state.source.length;
29
+ }
30
+
31
+ export function current(state: LexerState): string {
32
+ return state.source[state.pos] ?? "";
33
+ }
34
+
35
+ export function advance(state: LexerState, n = 1): string {
36
+ const start = state.pos;
37
+ const end = Math.min(state.pos + n, state.source.length);
38
+ const value = state.source.slice(start, end);
39
+ updatePosition(state, start, end);
40
+ return value;
41
+ }
42
+
43
+ export function advanceBy(state: LexerState, n = 1): void {
44
+ const start = state.pos;
45
+ const end = Math.min(state.pos + n, state.source.length);
46
+ updatePosition(state, start, end);
47
+ }
48
+
49
+ export function advanceByToken(state: LexerState, type: TokenType, length: number): void {
50
+ state.pos += length;
51
+
52
+ if (type === "NEWLINE") {
53
+ state.line++;
54
+ state.column = 1;
55
+ state.lineStart = true;
56
+ return;
57
+ }
58
+
59
+ state.column += length;
60
+ if (type !== "WHITESPACE") {
61
+ state.lineStart = false;
62
+ }
63
+ }
64
+
65
+ function updatePosition(state: LexerState, start: number, end: number): void {
66
+ state.pos = end;
67
+ updatePositionFromValue(state, state.source.slice(start, end));
68
+ }
69
+
70
+ function updatePositionFromValue(state: LexerState, value: string): void {
71
+ const firstNewline = value.indexOf("\n");
72
+ if (firstNewline === -1) {
73
+ state.column += value.length;
74
+ if (state.lineStart && hasNonLineStartSpacing(value, 0)) {
75
+ state.lineStart = false;
76
+ }
77
+ return;
78
+ }
79
+
80
+ const lastNewline = value.lastIndexOf("\n");
81
+ let newlineCount = 1;
82
+ let searchFrom = firstNewline + 1;
83
+ while (searchFrom <= lastNewline) {
84
+ const nextNewline = value.indexOf("\n", searchFrom);
85
+ if (nextNewline === -1) break;
86
+ newlineCount++;
87
+ searchFrom = nextNewline + 1;
88
+ }
89
+
90
+ state.line += newlineCount;
91
+ state.column = value.length - lastNewline;
92
+ state.lineStart = !hasNonLineStartSpacing(value, lastNewline + 1);
93
+ }
94
+
95
+ function hasNonLineStartSpacing(value: string, start: number): boolean {
96
+ for (let i = start; i < value.length; i++) {
97
+ const char = value[i];
98
+ if (char !== " " && char !== "\t") {
99
+ return true;
100
+ }
101
+ }
102
+ return false;
103
+ }
@@ -0,0 +1,80 @@
1
+ import type { TokenAction } from "./token-actions";
2
+ import { findRepeatedCharRunEnd } from "./runs";
3
+ import type { TokenType } from "./tokens";
4
+
5
+ export function scanSimpleSyntaxToken(
6
+ src: string,
7
+ pos: number,
8
+ isLineStart: boolean,
9
+ ): TokenAction | null {
10
+ switch (src[pos]) {
11
+ case "{":
12
+ return pairedToken(src, pos, "{", "MONO_MARKER", "{{");
13
+ case "}":
14
+ return pairedToken(src, pos, "}", "MONO_CLOSE", "}}");
15
+ case "*":
16
+ return scanStarToken(src, pos, isLineStart);
17
+ case "<":
18
+ return pairedToken(src, pos, "<", "LEFT_DOUBLE_ANGLE", "<<");
19
+ case "_":
20
+ return pairedToken(src, pos, "_", "UNDERLINE_MARKER", "__") ?? token("UNDERSCORE", "_");
21
+ case "^":
22
+ return pairedToken(src, pos, "^", "SUPER_MARKER", "^^");
23
+ case ",":
24
+ return pairedToken(src, pos, ",", "SUB_MARKER", ",,");
25
+ case "/":
26
+ return pairedToken(src, pos, "/", "ITALIC_MARKER", "//") ?? token("SLASH", "/");
27
+ case "+":
28
+ return scanHeadingToken(src, pos, isLineStart);
29
+ case "#":
30
+ return scanHashToken(src, pos, isLineStart);
31
+ case "=":
32
+ return token("EQUALS", "=");
33
+ case ":":
34
+ return token("COLON", ":");
35
+ case "&":
36
+ return token("AMPERSAND", "&");
37
+ case "\\":
38
+ return token("BACKSLASH", "\\");
39
+ default:
40
+ return null;
41
+ }
42
+ }
43
+
44
+ function scanStarToken(src: string, pos: number, isLineStart: boolean): TokenAction {
45
+ if (src[pos + 1] === "*") {
46
+ return token("BOLD_MARKER", "**");
47
+ }
48
+ return isLineStart ? token("LIST_BULLET", "*") : token("STAR", "*");
49
+ }
50
+
51
+ function scanHeadingToken(src: string, pos: number, isLineStart: boolean): TokenAction | null {
52
+ return isLineStart
53
+ ? runToken(src, pos, findRepeatedCharRunEnd(src, pos, "+"), "HEADING_MARKER")
54
+ : null;
55
+ }
56
+
57
+ function scanHashToken(src: string, pos: number, isLineStart: boolean): TokenAction {
58
+ if (src[pos + 1] === "#") {
59
+ return token("COLOR_MARKER", "##");
60
+ }
61
+ return isLineStart ? token("LIST_NUMBER", "#") : token("HASH", "#");
62
+ }
63
+
64
+ function pairedToken(
65
+ src: string,
66
+ pos: number,
67
+ secondChar: string,
68
+ type: TokenType,
69
+ value: string,
70
+ ): TokenAction | null {
71
+ return src[pos + 1] === secondChar ? token(type, value) : null;
72
+ }
73
+
74
+ function token(type: TokenType, value: string): TokenAction {
75
+ return { type, value, length: value.length };
76
+ }
77
+
78
+ function runToken(src: string, pos: number, end: number, type: TokenType): TokenAction {
79
+ return { type, value: src.slice(pos, end), length: end - pos };
80
+ }
@@ -0,0 +1,41 @@
1
+ import {
2
+ findAsciiIdentifierEnd,
3
+ findCompactPlainTextRunEnd,
4
+ findLongPlainTextRunEnd,
5
+ isAsciiAlphanumericCode,
6
+ } from "./runs";
7
+ import type { TokenAction } from "./token-actions";
8
+
9
+ export function scanTextToken(src: string, pos: number): TokenAction {
10
+ const char = src[pos] ?? "";
11
+
12
+ // Backslash line break marker (U+E000, inserted by preproc)
13
+ if (char.charCodeAt(0) === 0xe000) {
14
+ return token("BACKSLASH_BREAK", char);
15
+ }
16
+
17
+ const plainTextRunEnd = findLongPlainTextRunEnd(src, pos);
18
+ if (plainTextRunEnd !== null) {
19
+ return runToken(src, pos, plainTextRunEnd, "TEXT");
20
+ }
21
+
22
+ const code = char.charCodeAt(0);
23
+ if (isAsciiAlphanumericCode(code)) {
24
+ return runToken(src, pos, findAsciiIdentifierEnd(src, pos), "IDENTIFIER");
25
+ }
26
+
27
+ return token("TEXT", char);
28
+ }
29
+
30
+ export function scanCompactTextToken(src: string, pos: number): TokenAction | null {
31
+ const end = findCompactPlainTextRunEnd(src, pos);
32
+ return end > pos ? runToken(src, pos, end, "TEXT") : null;
33
+ }
34
+
35
+ function token(type: TokenAction["type"], value: string): TokenAction {
36
+ return { type, value, length: value.length };
37
+ }
38
+
39
+ function runToken(src: string, pos: number, end: number, type: TokenAction["type"]): TokenAction {
40
+ return { type, value: src.slice(pos, end), length: end - pos };
41
+ }
@@ -0,0 +1,136 @@
1
+ import type { TokenType } from "./tokens";
2
+ import { findRepeatedCharRunEnd } from "./runs";
3
+
4
+ export interface TokenAction {
5
+ type: TokenType;
6
+ value: string;
7
+ length: number;
8
+ splitBlockCloseAt?: number;
9
+ }
10
+
11
+ export function scanOpeningBracketToken(
12
+ src: string,
13
+ pos: number,
14
+ invalidAnchorEnd: number | null,
15
+ ): TokenAction {
16
+ if (src[pos + 1] === "!" && src[pos + 2] === "-" && src[pos + 3] === "-") {
17
+ return token("COMMENT_OPEN", "[!--");
18
+ }
19
+ if (src[pos + 1] === "[" && src[pos + 2] === "[") {
20
+ return token("LINK_OPEN", "[[[");
21
+ }
22
+ if (src[pos + 1] === "[" && src[pos + 2] === "/") {
23
+ return token("BLOCK_END_OPEN", "[[/");
24
+ }
25
+ if (src[pos + 1] === "[") {
26
+ if (invalidAnchorEnd !== null) {
27
+ return { ...token("TEXT", "["), splitBlockCloseAt: invalidAnchorEnd };
28
+ }
29
+ return token("BLOCK_OPEN", "[[");
30
+ }
31
+ if (src[pos + 1] === "#") {
32
+ return token("BRACKET_ANCHOR", "[#");
33
+ }
34
+ if (src[pos + 1] === "*") {
35
+ return token("BRACKET_STAR", "[*");
36
+ }
37
+ return token("BRACKET_OPEN", "[");
38
+ }
39
+
40
+ export function scanClosingBracketToken(
41
+ src: string,
42
+ pos: number,
43
+ splitBlockClose: boolean,
44
+ ): TokenAction | TokenAction[] {
45
+ if (src[pos + 1] === "]" && src[pos + 2] === "]") {
46
+ return token("LINK_CLOSE", "]]]");
47
+ }
48
+ if (src[pos + 1] === "]") {
49
+ if (splitBlockClose) {
50
+ return [token("BRACKET_CLOSE", "]"), token("TEXT", "]")];
51
+ }
52
+ return token("BLOCK_CLOSE", "]]");
53
+ }
54
+ return token("BRACKET_CLOSE", "]");
55
+ }
56
+
57
+ export function scanAtToken(src: string, pos: number): TokenAction {
58
+ if (src[pos + 1] === "@") {
59
+ return token("RAW_OPEN", "@@");
60
+ }
61
+ if (src[pos + 1] === "<") {
62
+ return token("RAW_BLOCK_OPEN", "@<");
63
+ }
64
+ return token("AT", "@");
65
+ }
66
+
67
+ export function scanGreaterToken(src: string, pos: number, isLineStart: boolean): TokenAction {
68
+ if (src[pos + 1] === "@") {
69
+ return token("RAW_BLOCK_CLOSE", ">@");
70
+ }
71
+ if (isLineStart) {
72
+ return runToken(src, pos, findRepeatedCharRunEnd(src, pos, ">"), "BLOCKQUOTE_MARKER");
73
+ }
74
+ if (src[pos + 1] === ">") {
75
+ return token("RIGHT_DOUBLE_ANGLE", ">>");
76
+ }
77
+ return token("TEXT", ">");
78
+ }
79
+
80
+ export function scanDashToken(src: string, pos: number, isLineStart: boolean): TokenAction {
81
+ if (isLineStart && src[pos + 1] === "-" && src[pos + 2] === "-" && src[pos + 3] === "-") {
82
+ return runToken(src, pos, findRepeatedCharRunEnd(src, pos, "-"), "HR_MARKER");
83
+ }
84
+ if (src[pos + 1] === "-" && src[pos + 2] === "]") {
85
+ return token("COMMENT_CLOSE", "--]");
86
+ }
87
+ if (src[pos + 1] === "-") {
88
+ return token("STRIKE_MARKER", "--");
89
+ }
90
+ return token("TEXT", "-");
91
+ }
92
+
93
+ export function scanTildeToken(src: string, pos: number, isLineStart: boolean): TokenAction | null {
94
+ if (!isLineStart || src[pos + 1] !== "~" || src[pos + 2] !== "~" || src[pos + 3] !== "~") {
95
+ return null;
96
+ }
97
+
98
+ const end = findRepeatedCharRunEnd(src, pos, "~");
99
+ const next = src[end];
100
+ if (next === "<") {
101
+ return runToken(src, pos, end + 1, "CLEAR_FLOAT_LEFT");
102
+ }
103
+ if (next === ">") {
104
+ return runToken(src, pos, end + 1, "CLEAR_FLOAT_RIGHT");
105
+ }
106
+ return runToken(src, pos, end, "CLEAR_FLOAT");
107
+ }
108
+
109
+ export function scanPipeToken(src: string, pos: number): TokenAction {
110
+ if (src[pos + 1] !== "|") {
111
+ return token("PIPE", "|");
112
+ }
113
+
114
+ const third = src[pos + 2];
115
+ if (third === "~") {
116
+ return token("TABLE_HEADER", "||~");
117
+ }
118
+ if (third === "<") {
119
+ return token("TABLE_LEFT", "||<");
120
+ }
121
+ if (third === "=") {
122
+ return token("TABLE_CENTER", "||=");
123
+ }
124
+ if (third === ">") {
125
+ return token("TABLE_RIGHT", "||>");
126
+ }
127
+ return token("TABLE_MARKER", "||");
128
+ }
129
+
130
+ function token(type: TokenType, value: string): TokenAction {
131
+ return { type, value, length: value.length };
132
+ }
133
+
134
+ function runToken(src: string, pos: number, end: number, type: TokenType): TokenAction {
135
+ return { type, value: src.slice(pos, end), length: end - pos };
136
+ }
@@ -0,0 +1,62 @@
1
+ import type { Position } from "@wdprlib/ast";
2
+ import type { LexerState } from "./state";
3
+ import type { Token, TokenType } from "./tokens";
4
+
5
+ const ZERO_POSITION: Position = {
6
+ start: { line: 0, column: 0, offset: 0 },
7
+ end: { line: 0, column: 0, offset: 0 },
8
+ };
9
+
10
+ export function createLexerToken(
11
+ state: LexerState,
12
+ type: TokenType,
13
+ value: string,
14
+ trackPositions: boolean,
15
+ ): Token {
16
+ return {
17
+ type,
18
+ value,
19
+ position: trackPositions ? currentTokenPosition(state, value) : ZERO_POSITION,
20
+ lineStart: isTokenAtLineStart(state),
21
+ };
22
+ }
23
+
24
+ export function updateLastNonWhitespaceType(
25
+ current: TokenType | null,
26
+ type: TokenType,
27
+ ): TokenType | null {
28
+ return type === "WHITESPACE" ? current : type;
29
+ }
30
+
31
+ /**
32
+ * Track block-opener nesting so `"` after `=` is only recognised as a quoted
33
+ * attribute value while inside `[[ ... ]]`.
34
+ */
35
+ export function nextBlockOpenerDepth(current: number, type: TokenType): number {
36
+ if (type === "BLOCK_OPEN" || type === "BLOCK_END_OPEN") {
37
+ return current + 1;
38
+ }
39
+ if (type === "BLOCK_CLOSE" && current > 0) {
40
+ return current - 1;
41
+ }
42
+ return current;
43
+ }
44
+
45
+ function currentTokenPosition(state: LexerState, value: string): Position {
46
+ return {
47
+ start: {
48
+ line: state.line,
49
+ column: state.column - value.length,
50
+ offset: state.pos - value.length,
51
+ },
52
+ end: {
53
+ line: state.line,
54
+ column: state.column,
55
+ offset: state.pos,
56
+ },
57
+ };
58
+ }
59
+
60
+ function isTokenAtLineStart(state: LexerState): boolean {
61
+ return state.tokens.length === 0 || state.tokens[state.tokens.length - 1]?.type === "NEWLINE";
62
+ }
@@ -0,0 +1,18 @@
1
+ import type { Token } from "./tokens";
2
+ import type { LexerOptions } from "./options";
3
+ import { Lexer } from "./lexer";
4
+
5
+ /**
6
+ * Tokenise a Wikidot markup source string in one call.
7
+ *
8
+ * Shorthand for `new Lexer(source, options).tokenize()`.
9
+ *
10
+ * @param source - Raw Wikidot markup
11
+ * @param options - Optional lexer configuration
12
+ * @returns A flat array of tokens, ending with an `EOF` token
13
+ *
14
+ * @group Lexer
15
+ */
16
+ export function tokenize(source: string, options?: LexerOptions): Token[] {
17
+ return new Lexer(source, options).tokenize();
18
+ }
@@ -37,6 +37,8 @@ export const BLOCK_START_TOKENS: TokenType[] = [
37
37
  "CLEAR_FLOAT_RIGHT", // ~~~~>
38
38
  ];
39
39
 
40
+ export const BLOCK_START_TOKEN_SET: ReadonlySet<TokenType> = new Set(BLOCK_START_TOKENS);
41
+
40
42
  /**
41
43
  * Set of block names recognized by the parser at `[[name]]` / `[[/name]]`.
42
44
  *