@wdprlib/parser 3.1.2 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +10456 -8230
- package/dist/index.d.cts +313 -337
- package/dist/index.d.ts +313 -337
- package/dist/index.js +10460 -8234
- package/package.json +5 -3
- package/src/index.ts +170 -0
- package/src/lexer/anchor.ts +48 -0
- package/src/lexer/index.ts +21 -0
- package/src/lexer/lexer.ts +201 -0
- package/src/lexer/options.ts +19 -0
- package/src/lexer/punctuation.ts +70 -0
- package/src/lexer/quoted-string.ts +16 -0
- package/src/lexer/runs.ts +85 -0
- package/src/lexer/spacing-actions.ts +24 -0
- package/src/lexer/state.ts +103 -0
- package/src/lexer/syntax-actions.ts +80 -0
- package/src/lexer/text-actions.ts +41 -0
- package/src/lexer/token-actions.ts +136 -0
- package/src/lexer/token-factory.ts +62 -0
- package/src/lexer/tokenize.ts +18 -0
- package/src/lexer/tokens.ts +141 -0
- package/src/parser/constants.ts +175 -0
- package/src/parser/depth/index.ts +111 -0
- package/src/parser/depth/stack.ts +82 -0
- package/src/parser/index.ts +18 -0
- package/src/parser/parse/block.ts +42 -0
- package/src/parser/parse/context.ts +26 -0
- package/src/parser/parse/footnotes.ts +25 -0
- package/src/parser/parse/index.ts +42 -0
- package/src/parser/parse/options.ts +34 -0
- package/src/parser/parse/parser.ts +79 -0
- package/src/parser/parse/plain-non-ascii.ts +129 -0
- package/src/parser/parse/result.ts +57 -0
- package/src/parser/parse/source.ts +11 -0
- package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
- package/src/parser/postprocess/index.ts +15 -0
- package/src/parser/postprocess/spanStrip/clean-element.ts +168 -0
- package/src/parser/postprocess/spanStrip/cleanup.ts +25 -0
- package/src/parser/postprocess/spanStrip/empty-spans.ts +36 -0
- package/src/parser/postprocess/spanStrip/escaped.ts +78 -0
- package/src/parser/postprocess/spanStrip/factory.ts +23 -0
- package/src/parser/postprocess/spanStrip/index.ts +8 -0
- package/src/parser/postprocess/spanStrip/merge.ts +117 -0
- package/src/parser/postprocess/spanStrip/predicates.ts +59 -0
- package/src/parser/postprocess/spanStrip/split.ts +67 -0
- package/src/parser/preprocess/expr/chars.ts +15 -0
- package/src/parser/preprocess/expr/evaluate.ts +22 -0
- package/src/parser/preprocess/expr/index.ts +45 -0
- package/src/parser/preprocess/expr/kind.ts +19 -0
- package/src/parser/preprocess/expr/parse.ts +103 -0
- package/src/parser/preprocess/expr/scan.ts +34 -0
- package/src/parser/preprocess/expr/types.ts +14 -0
- package/src/parser/preprocess/index.ts +38 -0
- package/src/parser/preprocess/typography.ts +132 -0
- package/src/parser/preprocess/utils/bracket-depths.ts +98 -0
- package/src/parser/preprocess/utils/index.ts +13 -0
- package/src/parser/preprocess/utils/raw-regions.ts +153 -0
- package/src/parser/preprocess/whitespace/detection.ts +39 -0
- package/src/parser/preprocess/whitespace/index.ts +79 -0
- package/src/parser/preprocess/whitespace/leading-spaces.ts +11 -0
- package/src/parser/preprocess/whitespace/patterns.ts +23 -0
- package/src/parser/rules/block/align/body.ts +46 -0
- package/src/parser/rules/block/align/element.ts +13 -0
- package/src/parser/rules/block/align/index.ts +90 -0
- package/src/parser/rules/block/align/syntax.ts +113 -0
- package/src/parser/rules/block/bibliography/body.ts +81 -0
- package/src/parser/rules/block/bibliography/entries.ts +49 -0
- package/src/parser/rules/block/bibliography/entry-content.ts +73 -0
- package/src/parser/rules/block/bibliography/entry-key.ts +83 -0
- package/src/parser/rules/block/bibliography/index.ts +90 -0
- package/src/parser/rules/block/bibliography/open.ts +53 -0
- package/src/parser/rules/block/block-list/bare-content.ts +105 -0
- package/src/parser/rules/block/block-list/bare-paragraph.ts +60 -0
- package/src/parser/rules/block/block-list/index.ts +51 -0
- package/src/parser/rules/block/block-list/item-content.ts +132 -0
- package/src/parser/rules/block/block-list/li-content.ts +107 -0
- package/src/parser/rules/block/block-list/li-item.ts +77 -0
- package/src/parser/rules/block/block-list/list-block.ts +100 -0
- package/src/parser/rules/block/block-list/open.ts +51 -0
- package/src/parser/rules/block/block-list/tags.ts +50 -0
- package/src/parser/rules/block/blockquote/build.ts +62 -0
- package/src/parser/rules/block/blockquote/index.ts +80 -0
- package/src/parser/rules/block/blockquote/line.ts +79 -0
- package/src/parser/rules/block/blockquote/lines.ts +39 -0
- package/src/parser/rules/block/center/index.ts +72 -0
- package/src/parser/rules/block/center/open.ts +27 -0
- package/src/parser/rules/block/clear-float/index.ts +51 -0
- package/src/parser/rules/block/clear-float/syntax.ts +43 -0
- package/src/parser/rules/block/code/attributes.ts +30 -0
- package/src/parser/rules/block/code/content.ts +57 -0
- package/src/parser/rules/block/code/index.ts +100 -0
- package/src/parser/rules/block/collapsible/attributes.ts +95 -0
- package/src/parser/rules/block/collapsible/body.ts +69 -0
- package/src/parser/rules/block/collapsible/index.ts +117 -0
- package/src/parser/rules/block/collapsible/open.ts +51 -0
- package/src/parser/rules/block/collapsible/orphans.ts +31 -0
- package/src/parser/rules/block/collapsible/tags.ts +17 -0
- package/src/parser/rules/block/comment/consume.ts +37 -0
- package/src/parser/rules/block/comment/index.ts +47 -0
- package/src/parser/rules/block/content-separator/index.ts +49 -0
- package/src/parser/rules/block/content-separator/syntax.ts +33 -0
- package/src/parser/rules/block/definition-list/collect.ts +40 -0
- package/src/parser/rules/block/definition-list/index.ts +63 -0
- package/src/parser/rules/block/definition-list/item-key.ts +95 -0
- package/src/parser/rules/block/definition-list/item-value.ts +56 -0
- package/src/parser/rules/block/definition-list/items.ts +54 -0
- package/src/parser/rules/block/div/body.ts +41 -0
- package/src/parser/rules/block/div/close.ts +41 -0
- package/src/parser/rules/block/div/failed.ts +117 -0
- package/src/parser/rules/block/div/index.ts +112 -0
- package/src/parser/rules/block/div/nesting.ts +37 -0
- package/src/parser/rules/block/div/open.ts +59 -0
- package/src/parser/rules/block/div/paragraph-strip.ts +44 -0
- package/src/parser/rules/block/embed-block/content.ts +53 -0
- package/src/parser/rules/block/embed-block/index.ts +91 -0
- package/src/parser/rules/block/embed-block/open.ts +52 -0
- package/src/parser/rules/block/embed-block/tags.ts +5 -0
- package/src/parser/rules/block/footnoteblock/attributes.ts +73 -0
- package/src/parser/rules/block/footnoteblock/index.ts +82 -0
- package/src/parser/rules/block/footnoteblock/open.ts +53 -0
- package/src/parser/rules/block/heading/index.ts +87 -0
- package/src/parser/rules/block/heading/open.ts +50 -0
- package/src/parser/rules/block/heading/toc-text.ts +26 -0
- package/src/parser/rules/block/horizontal-rule/index.ts +44 -0
- package/src/parser/rules/block/horizontal-rule/syntax.ts +21 -0
- package/src/parser/rules/block/html/body.ts +114 -0
- package/src/parser/rules/block/html/diagnostics.ts +11 -0
- package/src/parser/rules/block/html/index.ts +95 -0
- package/src/parser/rules/block/html/open.ts +36 -0
- package/src/parser/rules/block/iframe/attributes.ts +106 -0
- package/src/parser/rules/block/iframe/index.ts +73 -0
- package/src/parser/rules/block/iframe/open.ts +58 -0
- package/src/parser/rules/block/iframe/source.ts +24 -0
- package/src/parser/rules/block/iframe/url.ts +38 -0
- package/src/parser/rules/block/iftags/body.ts +48 -0
- package/src/parser/rules/block/iftags/condition.ts +24 -0
- package/src/parser/rules/block/iftags/index.ts +108 -0
- package/src/parser/rules/block/include/arguments.ts +48 -0
- package/src/parser/rules/block/include/index.ts +75 -0
- package/src/parser/rules/block/include/location.ts +24 -0
- package/src/parser/rules/block/include/variables.ts +37 -0
- package/src/parser/rules/block/index.ts +127 -0
- package/src/parser/rules/block/list/index.ts +73 -0
- package/src/parser/rules/block/list/line.ts +77 -0
- package/src/parser/rules/block/list/native.ts +89 -0
- package/src/parser/rules/block/math/content.ts +54 -0
- package/src/parser/rules/block/math/index.ts +106 -0
- package/src/parser/rules/block/math/name.ts +35 -0
- package/src/parser/rules/block/module/backlinks/index.ts +31 -0
- package/src/parser/rules/block/module/backlinks/types.ts +21 -0
- package/src/parser/rules/block/module/body.ts +92 -0
- package/src/parser/rules/block/module/categories/index.ts +34 -0
- package/src/parser/rules/block/module/categories/types.ts +21 -0
- package/src/parser/rules/block/module/css/index.ts +37 -0
- package/src/parser/rules/block/module/element.ts +33 -0
- package/src/parser/rules/block/module/iftags/condition.ts +109 -0
- package/src/parser/rules/block/module/iftags/index.ts +26 -0
- package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
- package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
- package/src/parser/rules/block/module/iftags/types.ts +63 -0
- package/src/parser/rules/block/module/include/directive.ts +91 -0
- package/src/parser/rules/block/module/include/index.ts +29 -0
- package/src/parser/rules/block/module/include/references.ts +42 -0
- package/src/parser/rules/block/module/include/resolve/cache.ts +44 -0
- package/src/parser/rules/block/module/include/resolve/index.ts +106 -0
- package/src/parser/rules/block/module/include/resolve/iterate.ts +202 -0
- package/src/parser/rules/block/module/include/resolve/replace.ts +31 -0
- package/src/parser/rules/block/module/include/resolve/types.ts +105 -0
- package/src/parser/rules/block/module/include/scanner.ts +121 -0
- package/src/parser/rules/block/module/index.ts +134 -0
- package/src/parser/rules/block/module/join/index.ts +34 -0
- package/src/parser/rules/block/module/join/types.ts +23 -0
- package/src/parser/rules/block/module/listpages/compiler.ts +73 -0
- package/src/parser/rules/block/module/listpages/extract.ts +76 -0
- package/src/parser/rules/block/module/listpages/extraction/listpages.ts +42 -0
- package/src/parser/rules/block/module/listpages/extraction/listusers.ts +30 -0
- package/src/parser/rules/block/module/listpages/extraction/query.ts +51 -0
- package/src/parser/rules/block/module/listpages/extraction/result.ts +18 -0
- package/src/parser/rules/block/module/listpages/extraction/template.ts +96 -0
- package/src/parser/rules/block/module/listpages/extraction/variables.ts +58 -0
- package/src/parser/rules/block/module/listpages/index.ts +83 -0
- package/src/parser/rules/block/module/listpages/normalization/date-selector.ts +53 -0
- package/src/parser/rules/block/module/listpages/normalization/numeric-selector.ts +32 -0
- package/src/parser/rules/block/module/listpages/normalization/order-parent.ts +82 -0
- package/src/parser/rules/block/module/listpages/normalization/selectors.ts +2 -0
- package/src/parser/rules/block/module/listpages/normalization/tags-category.ts +86 -0
- package/src/parser/rules/block/module/listpages/normalize.ts +74 -0
- package/src/parser/rules/block/module/listpages/parser.ts +106 -0
- package/src/parser/rules/block/module/listpages/resolution/items.ts +43 -0
- package/src/parser/rules/block/module/listpages/resolution/wrapper.ts +42 -0
- package/src/parser/rules/block/module/listpages/resolve.ts +60 -0
- package/src/parser/rules/block/module/listpages/template/format/content.ts +41 -0
- package/src/parser/rules/block/module/listpages/template/format/date.ts +116 -0
- package/src/parser/rules/block/module/listpages/template/format/index.ts +4 -0
- package/src/parser/rules/block/module/listpages/template/format/tags.ts +7 -0
- package/src/parser/rules/block/module/listpages/template/format/user.ts +9 -0
- package/src/parser/rules/block/module/listpages/template/getters/index.ts +36 -0
- package/src/parser/rules/block/module/listpages/template/getters/parameterized.ts +60 -0
- package/src/parser/rules/block/module/listpages/template/getters/simple.ts +65 -0
- package/src/parser/rules/block/module/listpages/template/getters/types.ts +3 -0
- package/src/parser/rules/block/module/listpages/template/syntax.ts +97 -0
- package/src/parser/rules/block/module/listpages/types/data-fetcher.ts +15 -0
- package/src/parser/rules/block/module/listpages/types/data-requirements.ts +52 -0
- package/src/parser/rules/block/module/listpages/types/external-data.ts +77 -0
- package/src/parser/rules/block/module/listpages/types/index.ts +17 -0
- package/src/parser/rules/block/module/listpages/types/normalized-query.ts +120 -0
- package/src/parser/rules/block/module/listpages/types/query.ts +67 -0
- package/src/parser/rules/block/module/listpages/types/template.ts +17 -0
- package/src/parser/rules/block/module/listpages/types/variables.ts +69 -0
- package/src/parser/rules/block/module/listpages/url-resolution/fields.ts +48 -0
- package/src/parser/rules/block/module/listpages/url-resolution/params.ts +19 -0
- package/src/parser/rules/block/module/listpages/url-resolution/query.ts +24 -0
- package/src/parser/rules/block/module/listpages/url-resolution/resolve.ts +53 -0
- package/src/parser/rules/block/module/listpages/url-resolution/value.ts +25 -0
- package/src/parser/rules/block/module/listpages/url-resolver.ts +29 -0
- package/src/parser/rules/block/module/listusers/compiler.ts +56 -0
- package/src/parser/rules/block/module/listusers/extract.ts +40 -0
- package/src/parser/rules/block/module/listusers/getters.ts +21 -0
- package/src/parser/rules/block/module/listusers/index.ts +36 -0
- package/src/parser/rules/block/module/listusers/parser.ts +54 -0
- package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
- package/src/parser/rules/block/module/listusers/types.ts +93 -0
- package/src/parser/rules/block/module/listusers/variables.ts +15 -0
- package/src/parser/rules/block/module/mapping.ts +61 -0
- package/src/parser/rules/block/module/open.ts +57 -0
- package/src/parser/rules/block/module/page-tree/index.ts +38 -0
- package/src/parser/rules/block/module/page-tree/types.ts +29 -0
- package/src/parser/rules/block/module/rate/index.ts +28 -0
- package/src/parser/rules/block/module/rate/types.ts +19 -0
- package/src/parser/rules/block/module/resolution/contexts.ts +78 -0
- package/src/parser/rules/block/module/resolution/data-maps.ts +39 -0
- package/src/parser/rules/block/module/resolution/dynamic-modules.ts +93 -0
- package/src/parser/rules/block/module/resolution/styles.ts +53 -0
- package/src/parser/rules/block/module/resolution/walk-resolve.ts +107 -0
- package/src/parser/rules/block/module/resolve.ts +198 -0
- package/src/parser/rules/block/module/rule.ts +56 -0
- package/src/parser/rules/block/module/types-common.ts +70 -0
- package/src/parser/rules/block/module/types.ts +61 -0
- package/src/parser/rules/block/module/utils.ts +43 -0
- package/src/parser/rules/block/module/walk/children.ts +35 -0
- package/src/parser/rules/block/module/walk/index.ts +9 -0
- package/src/parser/rules/block/module/walk/map/index.ts +2 -0
- package/src/parser/rules/block/module/walk/map/stateful-definition-list.ts +25 -0
- package/src/parser/rules/block/module/walk/map/stateful-list.ts +40 -0
- package/src/parser/rules/block/module/walk/map/stateful-table.ts +23 -0
- package/src/parser/rules/block/module/walk/map/stateful-tabs.ts +19 -0
- package/src/parser/rules/block/module/walk/map/stateful.ts +71 -0
- package/src/parser/rules/block/module/walk/map/stateless-definition-list.ts +12 -0
- package/src/parser/rules/block/module/walk/map/stateless-list.ts +29 -0
- package/src/parser/rules/block/module/walk/map/stateless-table.ts +11 -0
- package/src/parser/rules/block/module/walk/map/stateless-tabs.ts +5 -0
- package/src/parser/rules/block/module/walk/map/stateless.ts +51 -0
- package/src/parser/rules/block/module/walk/map/types.ts +6 -0
- package/src/parser/rules/block/module/walk/traverse.ts +65 -0
- package/src/parser/rules/block/orphan-li/content.ts +60 -0
- package/src/parser/rules/block/orphan-li/index.ts +75 -0
- package/src/parser/rules/block/orphan-li/open.ts +25 -0
- package/src/parser/rules/block/orphan-li/tags.ts +40 -0
- package/src/parser/rules/block/paragraph/content.ts +12 -0
- package/src/parser/rules/block/paragraph/index.ts +60 -0
- package/src/parser/rules/block/paragraph/normalize.ts +52 -0
- package/src/parser/rules/block/paragraph/span-markers.ts +52 -0
- package/src/parser/rules/block/parsing/attributes/index.ts +32 -0
- package/src/parser/rules/block/parsing/attributes/names.ts +93 -0
- package/src/parser/rules/block/parsing/attributes/scanner.ts +75 -0
- package/src/parser/rules/block/parsing/attributes/values.ts +26 -0
- package/src/parser/rules/block/parsing/block-item.ts +29 -0
- package/src/parser/rules/block/parsing/content.ts +127 -0
- package/src/parser/rules/block/parsing/end-condition.ts +51 -0
- package/src/parser/rules/block/parsing/inline-content.ts +105 -0
- package/src/parser/rules/block/parsing/inline-newline.ts +41 -0
- package/src/parser/rules/block/parsing/non-boundary.ts +24 -0
- package/src/parser/rules/block/parsing/rule-dispatch.ts +44 -0
- package/src/parser/rules/block/table/index.ts +80 -0
- package/src/parser/rules/block/table/pipe/cell-start.ts +69 -0
- package/src/parser/rules/block/table/pipe/cell.ts +106 -0
- package/src/parser/rules/block/table/pipe/index.ts +2 -0
- package/src/parser/rules/block/table/pipe/row.ts +88 -0
- package/src/parser/rules/block/table/pipe/tokens.ts +14 -0
- package/src/parser/rules/block/table/pipe/trim.ts +50 -0
- package/src/parser/rules/block/table-block/body.ts +79 -0
- package/src/parser/rules/block/table-block/cell-attributes.ts +33 -0
- package/src/parser/rules/block/table-block/cell-boundary.ts +99 -0
- package/src/parser/rules/block/table-block/cell-content/index.ts +88 -0
- package/src/parser/rules/block/table-block/cell-content/segments.ts +134 -0
- package/src/parser/rules/block/table-block/cell-newline.ts +47 -0
- package/src/parser/rules/block/table-block/cell.ts +64 -0
- package/src/parser/rules/block/table-block/index.ts +113 -0
- package/src/parser/rules/block/table-block/row-boundary.ts +75 -0
- package/src/parser/rules/block/table-block/structure.ts +80 -0
- package/src/parser/rules/block/tabview/body.ts +64 -0
- package/src/parser/rules/block/tabview/index.ts +90 -0
- package/src/parser/rules/block/tabview/open.ts +50 -0
- package/src/parser/rules/block/tabview/tab.ts +92 -0
- package/src/parser/rules/block/tabview/tags.ts +30 -0
- package/src/parser/rules/block/toc/element.ts +11 -0
- package/src/parser/rules/block/toc/index.ts +44 -0
- package/src/parser/rules/block/toc/open.ts +84 -0
- package/src/parser/rules/block/utils.ts +15 -0
- package/src/parser/rules/common/attribute-safety.ts +109 -0
- package/src/parser/rules/common/block-name.ts +33 -0
- package/src/parser/rules/common/index.ts +2 -0
- package/src/parser/rules/contracts/index.ts +3 -0
- package/src/parser/rules/contracts/parse-context.ts +38 -0
- package/src/parser/rules/contracts/rule.ts +43 -0
- package/src/parser/rules/contracts/scope.ts +31 -0
- package/src/parser/rules/index.ts +49 -0
- package/src/parser/rules/inline/anchor/attributes.ts +54 -0
- package/src/parser/rules/inline/anchor/child.ts +26 -0
- package/src/parser/rules/inline/anchor/close.ts +34 -0
- package/src/parser/rules/inline/anchor/content.ts +59 -0
- package/src/parser/rules/inline/anchor/index.ts +103 -0
- package/src/parser/rules/inline/anchor/newline.ts +26 -0
- package/src/parser/rules/inline/anchor/open.ts +47 -0
- package/src/parser/rules/inline/anchor/paragraph-strip.ts +14 -0
- package/src/parser/rules/inline/anchor/syntax.ts +40 -0
- package/src/parser/rules/inline/anchor-name/index.ts +38 -0
- package/src/parser/rules/inline/anchor-name/name.ts +39 -0
- package/src/parser/rules/inline/anchor-name/syntax.ts +46 -0
- package/src/parser/rules/inline/bibcite/element.ts +14 -0
- package/src/parser/rules/inline/bibcite/index.ts +34 -0
- package/src/parser/rules/inline/bibcite/syntax.ts +64 -0
- package/src/parser/rules/inline/bold.ts +49 -0
- package/src/parser/rules/inline/color/index.ts +35 -0
- package/src/parser/rules/inline/color/syntax.ts +69 -0
- package/src/parser/rules/inline/comment/consume.ts +31 -0
- package/src/parser/rules/inline/comment/index.ts +64 -0
- package/src/parser/rules/inline/equation-ref/element.ts +8 -0
- package/src/parser/rules/inline/equation-ref/index.ts +34 -0
- package/src/parser/rules/inline/equation-ref/syntax.ts +45 -0
- package/src/parser/rules/inline/expr/branch.ts +104 -0
- package/src/parser/rules/inline/expr/conditional-branch.ts +27 -0
- package/src/parser/rules/inline/expr/conditional.ts +80 -0
- package/src/parser/rules/inline/expr/depth.ts +25 -0
- package/src/parser/rules/inline/expr/elements.ts +39 -0
- package/src/parser/rules/inline/expr/index.ts +84 -0
- package/src/parser/rules/inline/expr/syntax.ts +45 -0
- package/src/parser/rules/inline/footnote/child.ts +22 -0
- package/src/parser/rules/inline/footnote/close.ts +33 -0
- package/src/parser/rules/inline/footnote/content.ts +54 -0
- package/src/parser/rules/inline/footnote/elements.ts +38 -0
- package/src/parser/rules/inline/footnote/index.ts +54 -0
- package/src/parser/rules/inline/footnote/newline.ts +27 -0
- package/src/parser/rules/inline/footnote/open.ts +38 -0
- package/src/parser/rules/inline/formatting/container.ts +50 -0
- package/src/parser/rules/inline/guillemet/index.ts +56 -0
- package/src/parser/rules/inline/guillemet/text.ts +11 -0
- package/src/parser/rules/inline/html/gate.ts +64 -0
- package/src/parser/rules/inline/html/index.ts +81 -0
- package/src/parser/rules/inline/html/open.ts +37 -0
- package/src/parser/rules/inline/image/attributes.ts +22 -0
- package/src/parser/rules/inline/image/body.ts +36 -0
- package/src/parser/rules/inline/image/index.ts +89 -0
- package/src/parser/rules/inline/image/open.ts +56 -0
- package/src/parser/rules/inline/image/source.ts +62 -0
- package/src/parser/rules/inline/image/syntax.ts +76 -0
- package/src/parser/rules/inline/index.ts +150 -0
- package/src/parser/rules/inline/italic.ts +46 -0
- package/src/parser/rules/inline/line-break/backslash.ts +58 -0
- package/src/parser/rules/inline/line-break/elements.ts +9 -0
- package/src/parser/rules/inline/line-break/index.ts +3 -0
- package/src/parser/rules/inline/line-break/newline.ts +82 -0
- package/src/parser/rules/inline/line-break/underscore.ts +45 -0
- package/src/parser/rules/inline/link-anchor.ts +72 -0
- package/src/parser/rules/inline/link-bracket/anchor.ts +3 -0
- package/src/parser/rules/inline/link-bracket/direct-url.ts +5 -0
- package/src/parser/rules/inline/link-bracket/parsed.ts +81 -0
- package/src/parser/rules/inline/link-bracket/parts.ts +64 -0
- package/src/parser/rules/inline/link-bracket/prefix.ts +15 -0
- package/src/parser/rules/inline/link-single.ts +73 -0
- package/src/parser/rules/inline/link-star.ts +72 -0
- package/src/parser/rules/inline/link-triple/fallback.ts +10 -0
- package/src/parser/rules/inline/link-triple/index.ts +62 -0
- package/src/parser/rules/inline/link-triple/interwiki.ts +11 -0
- package/src/parser/rules/inline/link-triple/label.ts +35 -0
- package/src/parser/rules/inline/link-triple/syntax.ts +72 -0
- package/src/parser/rules/inline/link-triple/target.ts +36 -0
- package/src/parser/rules/inline/math-inline/index.ts +40 -0
- package/src/parser/rules/inline/math-inline/syntax.ts +55 -0
- package/src/parser/rules/inline/monospace.ts +50 -0
- package/src/parser/rules/inline/parsing/block-boundary.ts +42 -0
- package/src/parser/rules/inline/parsing/block-start-predicates.ts +117 -0
- package/src/parser/rules/inline/parsing/collect.ts +23 -0
- package/src/parser/rules/inline/parsing/inline-content.ts +115 -0
- package/src/parser/rules/inline/parsing/paragraph-boundary.ts +47 -0
- package/src/parser/rules/inline/parsing/plain-text.ts +69 -0
- package/src/parser/rules/inline/parsing/preserved-line-break.ts +11 -0
- package/src/parser/rules/inline/parsing/rules.ts +34 -0
- package/src/parser/rules/inline/parsing/simple-token.ts +26 -0
- package/src/parser/rules/inline/raw/angle.ts +40 -0
- package/src/parser/rules/inline/raw/double-at.ts +78 -0
- package/src/parser/rules/inline/raw/index.ts +26 -0
- package/src/parser/rules/inline/raw/result.ts +26 -0
- package/src/parser/rules/inline/size/content.ts +65 -0
- package/src/parser/rules/inline/size/index.ts +55 -0
- package/src/parser/rules/inline/size/open.ts +43 -0
- package/src/parser/rules/inline/size/value.ts +45 -0
- package/src/parser/rules/inline/span/content.ts +97 -0
- package/src/parser/rules/inline/span/elements.ts +108 -0
- package/src/parser/rules/inline/span/index.ts +79 -0
- package/src/parser/rules/inline/span/newline.ts +50 -0
- package/src/parser/rules/inline/span/syntax.ts +70 -0
- package/src/parser/rules/inline/strikethrough/index.ts +60 -0
- package/src/parser/rules/inline/strikethrough/parse.ts +14 -0
- package/src/parser/rules/inline/strikethrough/syntax.ts +24 -0
- package/src/parser/rules/inline/subscript.ts +47 -0
- package/src/parser/rules/inline/superscript.ts +49 -0
- package/src/parser/rules/inline/text/element.ts +5 -0
- package/src/parser/rules/inline/text/index.ts +85 -0
- package/src/parser/rules/inline/underline/child.ts +26 -0
- package/src/parser/rules/inline/underline/content.ts +29 -0
- package/src/parser/rules/inline/underline/index.ts +84 -0
- package/src/parser/rules/inline/user/element.ts +11 -0
- package/src/parser/rules/inline/user/index.ts +34 -0
- package/src/parser/rules/inline/user/syntax.ts +67 -0
- package/src/parser/rules/inline/utils.ts +4 -0
- package/src/parser/rules/tokens.ts +106 -0
- package/src/parser/rules/types.ts +9 -0
- package/src/parser/toc.ts +130 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import type { Token } from "./tokens";
|
|
2
|
+
import type { TokenType } from "./tokens";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Internal mutable state carried through a single tokenisation pass.
|
|
6
|
+
*/
|
|
7
|
+
export interface LexerState {
|
|
8
|
+
source: string;
|
|
9
|
+
pos: number;
|
|
10
|
+
line: number;
|
|
11
|
+
column: number;
|
|
12
|
+
lineStart: boolean;
|
|
13
|
+
tokens: Token[];
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function createInitialLexerState(source: string): LexerState {
|
|
17
|
+
return {
|
|
18
|
+
source,
|
|
19
|
+
pos: 0,
|
|
20
|
+
line: 1,
|
|
21
|
+
column: 1,
|
|
22
|
+
lineStart: true,
|
|
23
|
+
tokens: [],
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function isAtEnd(state: LexerState): boolean {
|
|
28
|
+
return state.pos >= state.source.length;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export function current(state: LexerState): string {
|
|
32
|
+
return state.source[state.pos] ?? "";
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function advance(state: LexerState, n = 1): string {
|
|
36
|
+
const start = state.pos;
|
|
37
|
+
const end = Math.min(state.pos + n, state.source.length);
|
|
38
|
+
const value = state.source.slice(start, end);
|
|
39
|
+
updatePosition(state, start, end);
|
|
40
|
+
return value;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function advanceBy(state: LexerState, n = 1): void {
|
|
44
|
+
const start = state.pos;
|
|
45
|
+
const end = Math.min(state.pos + n, state.source.length);
|
|
46
|
+
updatePosition(state, start, end);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export function advanceByToken(state: LexerState, type: TokenType, length: number): void {
|
|
50
|
+
state.pos += length;
|
|
51
|
+
|
|
52
|
+
if (type === "NEWLINE") {
|
|
53
|
+
state.line++;
|
|
54
|
+
state.column = 1;
|
|
55
|
+
state.lineStart = true;
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
state.column += length;
|
|
60
|
+
if (type !== "WHITESPACE") {
|
|
61
|
+
state.lineStart = false;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function updatePosition(state: LexerState, start: number, end: number): void {
|
|
66
|
+
state.pos = end;
|
|
67
|
+
updatePositionFromValue(state, state.source.slice(start, end));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function updatePositionFromValue(state: LexerState, value: string): void {
|
|
71
|
+
const firstNewline = value.indexOf("\n");
|
|
72
|
+
if (firstNewline === -1) {
|
|
73
|
+
state.column += value.length;
|
|
74
|
+
if (state.lineStart && hasNonLineStartSpacing(value, 0)) {
|
|
75
|
+
state.lineStart = false;
|
|
76
|
+
}
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const lastNewline = value.lastIndexOf("\n");
|
|
81
|
+
let newlineCount = 1;
|
|
82
|
+
let searchFrom = firstNewline + 1;
|
|
83
|
+
while (searchFrom <= lastNewline) {
|
|
84
|
+
const nextNewline = value.indexOf("\n", searchFrom);
|
|
85
|
+
if (nextNewline === -1) break;
|
|
86
|
+
newlineCount++;
|
|
87
|
+
searchFrom = nextNewline + 1;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
state.line += newlineCount;
|
|
91
|
+
state.column = value.length - lastNewline;
|
|
92
|
+
state.lineStart = !hasNonLineStartSpacing(value, lastNewline + 1);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function hasNonLineStartSpacing(value: string, start: number): boolean {
|
|
96
|
+
for (let i = start; i < value.length; i++) {
|
|
97
|
+
const char = value[i];
|
|
98
|
+
if (char !== " " && char !== "\t") {
|
|
99
|
+
return true;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return false;
|
|
103
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import type { TokenAction } from "./token-actions";
|
|
2
|
+
import { findRepeatedCharRunEnd } from "./runs";
|
|
3
|
+
import type { TokenType } from "./tokens";
|
|
4
|
+
|
|
5
|
+
export function scanSimpleSyntaxToken(
|
|
6
|
+
src: string,
|
|
7
|
+
pos: number,
|
|
8
|
+
isLineStart: boolean,
|
|
9
|
+
): TokenAction | null {
|
|
10
|
+
switch (src[pos]) {
|
|
11
|
+
case "{":
|
|
12
|
+
return pairedToken(src, pos, "{", "MONO_MARKER", "{{");
|
|
13
|
+
case "}":
|
|
14
|
+
return pairedToken(src, pos, "}", "MONO_CLOSE", "}}");
|
|
15
|
+
case "*":
|
|
16
|
+
return scanStarToken(src, pos, isLineStart);
|
|
17
|
+
case "<":
|
|
18
|
+
return pairedToken(src, pos, "<", "LEFT_DOUBLE_ANGLE", "<<");
|
|
19
|
+
case "_":
|
|
20
|
+
return pairedToken(src, pos, "_", "UNDERLINE_MARKER", "__") ?? token("UNDERSCORE", "_");
|
|
21
|
+
case "^":
|
|
22
|
+
return pairedToken(src, pos, "^", "SUPER_MARKER", "^^");
|
|
23
|
+
case ",":
|
|
24
|
+
return pairedToken(src, pos, ",", "SUB_MARKER", ",,");
|
|
25
|
+
case "/":
|
|
26
|
+
return pairedToken(src, pos, "/", "ITALIC_MARKER", "//") ?? token("SLASH", "/");
|
|
27
|
+
case "+":
|
|
28
|
+
return scanHeadingToken(src, pos, isLineStart);
|
|
29
|
+
case "#":
|
|
30
|
+
return scanHashToken(src, pos, isLineStart);
|
|
31
|
+
case "=":
|
|
32
|
+
return token("EQUALS", "=");
|
|
33
|
+
case ":":
|
|
34
|
+
return token("COLON", ":");
|
|
35
|
+
case "&":
|
|
36
|
+
return token("AMPERSAND", "&");
|
|
37
|
+
case "\\":
|
|
38
|
+
return token("BACKSLASH", "\\");
|
|
39
|
+
default:
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function scanStarToken(src: string, pos: number, isLineStart: boolean): TokenAction {
|
|
45
|
+
if (src[pos + 1] === "*") {
|
|
46
|
+
return token("BOLD_MARKER", "**");
|
|
47
|
+
}
|
|
48
|
+
return isLineStart ? token("LIST_BULLET", "*") : token("STAR", "*");
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function scanHeadingToken(src: string, pos: number, isLineStart: boolean): TokenAction | null {
|
|
52
|
+
return isLineStart
|
|
53
|
+
? runToken(src, pos, findRepeatedCharRunEnd(src, pos, "+"), "HEADING_MARKER")
|
|
54
|
+
: null;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function scanHashToken(src: string, pos: number, isLineStart: boolean): TokenAction {
|
|
58
|
+
if (src[pos + 1] === "#") {
|
|
59
|
+
return token("COLOR_MARKER", "##");
|
|
60
|
+
}
|
|
61
|
+
return isLineStart ? token("LIST_NUMBER", "#") : token("HASH", "#");
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function pairedToken(
|
|
65
|
+
src: string,
|
|
66
|
+
pos: number,
|
|
67
|
+
secondChar: string,
|
|
68
|
+
type: TokenType,
|
|
69
|
+
value: string,
|
|
70
|
+
): TokenAction | null {
|
|
71
|
+
return src[pos + 1] === secondChar ? token(type, value) : null;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function token(type: TokenType, value: string): TokenAction {
|
|
75
|
+
return { type, value, length: value.length };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function runToken(src: string, pos: number, end: number, type: TokenType): TokenAction {
|
|
79
|
+
return { type, value: src.slice(pos, end), length: end - pos };
|
|
80
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import {
|
|
2
|
+
findAsciiIdentifierEnd,
|
|
3
|
+
findCompactPlainTextRunEnd,
|
|
4
|
+
findLongPlainTextRunEnd,
|
|
5
|
+
isAsciiAlphanumericCode,
|
|
6
|
+
} from "./runs";
|
|
7
|
+
import type { TokenAction } from "./token-actions";
|
|
8
|
+
|
|
9
|
+
export function scanTextToken(src: string, pos: number): TokenAction {
|
|
10
|
+
const char = src[pos] ?? "";
|
|
11
|
+
|
|
12
|
+
// Backslash line break marker (U+E000, inserted by preproc)
|
|
13
|
+
if (char.charCodeAt(0) === 0xe000) {
|
|
14
|
+
return token("BACKSLASH_BREAK", char);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
const plainTextRunEnd = findLongPlainTextRunEnd(src, pos);
|
|
18
|
+
if (plainTextRunEnd !== null) {
|
|
19
|
+
return runToken(src, pos, plainTextRunEnd, "TEXT");
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const code = char.charCodeAt(0);
|
|
23
|
+
if (isAsciiAlphanumericCode(code)) {
|
|
24
|
+
return runToken(src, pos, findAsciiIdentifierEnd(src, pos), "IDENTIFIER");
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return token("TEXT", char);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function scanCompactTextToken(src: string, pos: number): TokenAction | null {
|
|
31
|
+
const end = findCompactPlainTextRunEnd(src, pos);
|
|
32
|
+
return end > pos ? runToken(src, pos, end, "TEXT") : null;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function token(type: TokenAction["type"], value: string): TokenAction {
|
|
36
|
+
return { type, value, length: value.length };
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function runToken(src: string, pos: number, end: number, type: TokenAction["type"]): TokenAction {
|
|
40
|
+
return { type, value: src.slice(pos, end), length: end - pos };
|
|
41
|
+
}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import type { TokenType } from "./tokens";
|
|
2
|
+
import { findRepeatedCharRunEnd } from "./runs";
|
|
3
|
+
|
|
4
|
+
export interface TokenAction {
|
|
5
|
+
type: TokenType;
|
|
6
|
+
value: string;
|
|
7
|
+
length: number;
|
|
8
|
+
splitBlockCloseAt?: number;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function scanOpeningBracketToken(
|
|
12
|
+
src: string,
|
|
13
|
+
pos: number,
|
|
14
|
+
invalidAnchorEnd: number | null,
|
|
15
|
+
): TokenAction {
|
|
16
|
+
if (src[pos + 1] === "!" && src[pos + 2] === "-" && src[pos + 3] === "-") {
|
|
17
|
+
return token("COMMENT_OPEN", "[!--");
|
|
18
|
+
}
|
|
19
|
+
if (src[pos + 1] === "[" && src[pos + 2] === "[") {
|
|
20
|
+
return token("LINK_OPEN", "[[[");
|
|
21
|
+
}
|
|
22
|
+
if (src[pos + 1] === "[" && src[pos + 2] === "/") {
|
|
23
|
+
return token("BLOCK_END_OPEN", "[[/");
|
|
24
|
+
}
|
|
25
|
+
if (src[pos + 1] === "[") {
|
|
26
|
+
if (invalidAnchorEnd !== null) {
|
|
27
|
+
return { ...token("TEXT", "["), splitBlockCloseAt: invalidAnchorEnd };
|
|
28
|
+
}
|
|
29
|
+
return token("BLOCK_OPEN", "[[");
|
|
30
|
+
}
|
|
31
|
+
if (src[pos + 1] === "#") {
|
|
32
|
+
return token("BRACKET_ANCHOR", "[#");
|
|
33
|
+
}
|
|
34
|
+
if (src[pos + 1] === "*") {
|
|
35
|
+
return token("BRACKET_STAR", "[*");
|
|
36
|
+
}
|
|
37
|
+
return token("BRACKET_OPEN", "[");
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function scanClosingBracketToken(
|
|
41
|
+
src: string,
|
|
42
|
+
pos: number,
|
|
43
|
+
splitBlockClose: boolean,
|
|
44
|
+
): TokenAction | TokenAction[] {
|
|
45
|
+
if (src[pos + 1] === "]" && src[pos + 2] === "]") {
|
|
46
|
+
return token("LINK_CLOSE", "]]]");
|
|
47
|
+
}
|
|
48
|
+
if (src[pos + 1] === "]") {
|
|
49
|
+
if (splitBlockClose) {
|
|
50
|
+
return [token("BRACKET_CLOSE", "]"), token("TEXT", "]")];
|
|
51
|
+
}
|
|
52
|
+
return token("BLOCK_CLOSE", "]]");
|
|
53
|
+
}
|
|
54
|
+
return token("BRACKET_CLOSE", "]");
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function scanAtToken(src: string, pos: number): TokenAction {
|
|
58
|
+
if (src[pos + 1] === "@") {
|
|
59
|
+
return token("RAW_OPEN", "@@");
|
|
60
|
+
}
|
|
61
|
+
if (src[pos + 1] === "<") {
|
|
62
|
+
return token("RAW_BLOCK_OPEN", "@<");
|
|
63
|
+
}
|
|
64
|
+
return token("AT", "@");
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function scanGreaterToken(src: string, pos: number, isLineStart: boolean): TokenAction {
|
|
68
|
+
if (src[pos + 1] === "@") {
|
|
69
|
+
return token("RAW_BLOCK_CLOSE", ">@");
|
|
70
|
+
}
|
|
71
|
+
if (isLineStart) {
|
|
72
|
+
return runToken(src, pos, findRepeatedCharRunEnd(src, pos, ">"), "BLOCKQUOTE_MARKER");
|
|
73
|
+
}
|
|
74
|
+
if (src[pos + 1] === ">") {
|
|
75
|
+
return token("RIGHT_DOUBLE_ANGLE", ">>");
|
|
76
|
+
}
|
|
77
|
+
return token("TEXT", ">");
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export function scanDashToken(src: string, pos: number, isLineStart: boolean): TokenAction {
|
|
81
|
+
if (isLineStart && src[pos + 1] === "-" && src[pos + 2] === "-" && src[pos + 3] === "-") {
|
|
82
|
+
return runToken(src, pos, findRepeatedCharRunEnd(src, pos, "-"), "HR_MARKER");
|
|
83
|
+
}
|
|
84
|
+
if (src[pos + 1] === "-" && src[pos + 2] === "]") {
|
|
85
|
+
return token("COMMENT_CLOSE", "--]");
|
|
86
|
+
}
|
|
87
|
+
if (src[pos + 1] === "-") {
|
|
88
|
+
return token("STRIKE_MARKER", "--");
|
|
89
|
+
}
|
|
90
|
+
return token("TEXT", "-");
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export function scanTildeToken(src: string, pos: number, isLineStart: boolean): TokenAction | null {
|
|
94
|
+
if (!isLineStart || src[pos + 1] !== "~" || src[pos + 2] !== "~" || src[pos + 3] !== "~") {
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const end = findRepeatedCharRunEnd(src, pos, "~");
|
|
99
|
+
const next = src[end];
|
|
100
|
+
if (next === "<") {
|
|
101
|
+
return runToken(src, pos, end + 1, "CLEAR_FLOAT_LEFT");
|
|
102
|
+
}
|
|
103
|
+
if (next === ">") {
|
|
104
|
+
return runToken(src, pos, end + 1, "CLEAR_FLOAT_RIGHT");
|
|
105
|
+
}
|
|
106
|
+
return runToken(src, pos, end, "CLEAR_FLOAT");
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
export function scanPipeToken(src: string, pos: number): TokenAction {
|
|
110
|
+
if (src[pos + 1] !== "|") {
|
|
111
|
+
return token("PIPE", "|");
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const third = src[pos + 2];
|
|
115
|
+
if (third === "~") {
|
|
116
|
+
return token("TABLE_HEADER", "||~");
|
|
117
|
+
}
|
|
118
|
+
if (third === "<") {
|
|
119
|
+
return token("TABLE_LEFT", "||<");
|
|
120
|
+
}
|
|
121
|
+
if (third === "=") {
|
|
122
|
+
return token("TABLE_CENTER", "||=");
|
|
123
|
+
}
|
|
124
|
+
if (third === ">") {
|
|
125
|
+
return token("TABLE_RIGHT", "||>");
|
|
126
|
+
}
|
|
127
|
+
return token("TABLE_MARKER", "||");
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function token(type: TokenType, value: string): TokenAction {
|
|
131
|
+
return { type, value, length: value.length };
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function runToken(src: string, pos: number, end: number, type: TokenType): TokenAction {
|
|
135
|
+
return { type, value: src.slice(pos, end), length: end - pos };
|
|
136
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import type { Position } from "@wdprlib/ast";
|
|
2
|
+
import type { LexerState } from "./state";
|
|
3
|
+
import type { Token, TokenType } from "./tokens";
|
|
4
|
+
|
|
5
|
+
const ZERO_POSITION: Position = {
|
|
6
|
+
start: { line: 0, column: 0, offset: 0 },
|
|
7
|
+
end: { line: 0, column: 0, offset: 0 },
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export function createLexerToken(
|
|
11
|
+
state: LexerState,
|
|
12
|
+
type: TokenType,
|
|
13
|
+
value: string,
|
|
14
|
+
trackPositions: boolean,
|
|
15
|
+
): Token {
|
|
16
|
+
return {
|
|
17
|
+
type,
|
|
18
|
+
value,
|
|
19
|
+
position: trackPositions ? currentTokenPosition(state, value) : ZERO_POSITION,
|
|
20
|
+
lineStart: isTokenAtLineStart(state),
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function updateLastNonWhitespaceType(
|
|
25
|
+
current: TokenType | null,
|
|
26
|
+
type: TokenType,
|
|
27
|
+
): TokenType | null {
|
|
28
|
+
return type === "WHITESPACE" ? current : type;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Track block-opener nesting so `"` after `=` is only recognised as a quoted
|
|
33
|
+
* attribute value while inside `[[ ... ]]`.
|
|
34
|
+
*/
|
|
35
|
+
export function nextBlockOpenerDepth(current: number, type: TokenType): number {
|
|
36
|
+
if (type === "BLOCK_OPEN" || type === "BLOCK_END_OPEN") {
|
|
37
|
+
return current + 1;
|
|
38
|
+
}
|
|
39
|
+
if (type === "BLOCK_CLOSE" && current > 0) {
|
|
40
|
+
return current - 1;
|
|
41
|
+
}
|
|
42
|
+
return current;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function currentTokenPosition(state: LexerState, value: string): Position {
|
|
46
|
+
return {
|
|
47
|
+
start: {
|
|
48
|
+
line: state.line,
|
|
49
|
+
column: state.column - value.length,
|
|
50
|
+
offset: state.pos - value.length,
|
|
51
|
+
},
|
|
52
|
+
end: {
|
|
53
|
+
line: state.line,
|
|
54
|
+
column: state.column,
|
|
55
|
+
offset: state.pos,
|
|
56
|
+
},
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function isTokenAtLineStart(state: LexerState): boolean {
|
|
61
|
+
return state.tokens.length === 0 || state.tokens[state.tokens.length - 1]?.type === "NEWLINE";
|
|
62
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { Token } from "./tokens";
|
|
2
|
+
import type { LexerOptions } from "./options";
|
|
3
|
+
import { Lexer } from "./lexer";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Tokenise a Wikidot markup source string in one call.
|
|
7
|
+
*
|
|
8
|
+
* Shorthand for `new Lexer(source, options).tokenize()`.
|
|
9
|
+
*
|
|
10
|
+
* @param source - Raw Wikidot markup
|
|
11
|
+
* @param options - Optional lexer configuration
|
|
12
|
+
* @returns A flat array of tokens, ending with an `EOF` token
|
|
13
|
+
*
|
|
14
|
+
* @group Lexer
|
|
15
|
+
*/
|
|
16
|
+
export function tokenize(source: string, options?: LexerOptions): Token[] {
|
|
17
|
+
return new Lexer(source, options).tokenize();
|
|
18
|
+
}
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import type { Position } from "@wdprlib/ast";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Every distinct lexeme the Wikidot lexer can produce.
|
|
5
|
+
*
|
|
6
|
+
* Each value corresponds to a fixed character sequence (or class of
|
|
7
|
+
* sequences) in Wikidot markup. The inline comments show the literal
|
|
8
|
+
* text that produces each token type.
|
|
9
|
+
*
|
|
10
|
+
* @group Lexer
|
|
11
|
+
*/
|
|
12
|
+
export type TokenType =
|
|
13
|
+
// Special
|
|
14
|
+
| "EOF"
|
|
15
|
+
| "TEXT"
|
|
16
|
+
| "IDENTIFIER" // alphanumeric word
|
|
17
|
+
| "NEWLINE"
|
|
18
|
+
| "WHITESPACE"
|
|
19
|
+
|
|
20
|
+
// Block delimiters
|
|
21
|
+
| "BLOCK_OPEN" // [[
|
|
22
|
+
| "BLOCK_CLOSE" // ]]
|
|
23
|
+
| "BLOCK_END_OPEN" // [[/
|
|
24
|
+
|
|
25
|
+
// Inline formatting
|
|
26
|
+
| "BOLD_MARKER" // **
|
|
27
|
+
| "ITALIC_MARKER" // //
|
|
28
|
+
| "UNDERLINE_MARKER" // __
|
|
29
|
+
| "STRIKE_MARKER" // --
|
|
30
|
+
| "SUPER_MARKER" // ^^
|
|
31
|
+
| "SUB_MARKER" // ,,
|
|
32
|
+
| "MONO_MARKER" // {{
|
|
33
|
+
| "MONO_CLOSE" // }}
|
|
34
|
+
|
|
35
|
+
// Special syntax
|
|
36
|
+
| "HEADING_MARKER" // + (at line start)
|
|
37
|
+
| "HR_MARKER" // ---- (at line start)
|
|
38
|
+
| "LIST_BULLET" // * (at line start)
|
|
39
|
+
| "LIST_NUMBER" // # (at line start)
|
|
40
|
+
| "BLOCKQUOTE_MARKER" // > (at line start)
|
|
41
|
+
| "TABLE_MARKER" // || (at line start)
|
|
42
|
+
| "TABLE_HEADER" // ||~ (header cell)
|
|
43
|
+
| "TABLE_LEFT" // ||< (left align)
|
|
44
|
+
| "TABLE_CENTER" // ||= (center align)
|
|
45
|
+
| "TABLE_RIGHT" // ||> (right align)
|
|
46
|
+
|
|
47
|
+
// Code blocks
|
|
48
|
+
| "CODE_OPEN" // [[code]]
|
|
49
|
+
| "CODE_CLOSE" // [[/code]]
|
|
50
|
+
|
|
51
|
+
// Links
|
|
52
|
+
| "LINK_OPEN" // [[[
|
|
53
|
+
| "LINK_CLOSE" // ]]]
|
|
54
|
+
| "BRACKET_OPEN" // [
|
|
55
|
+
| "BRACKET_CLOSE" // ]
|
|
56
|
+
| "BRACKET_ANCHOR" // [#
|
|
57
|
+
| "BRACKET_STAR" // [*
|
|
58
|
+
|
|
59
|
+
// Special characters
|
|
60
|
+
| "PIPE" // |
|
|
61
|
+
| "EQUALS" // =
|
|
62
|
+
| "COLON" // :
|
|
63
|
+
| "SLASH" // /
|
|
64
|
+
| "STAR" // *
|
|
65
|
+
| "HASH" // #
|
|
66
|
+
| "AT" // @
|
|
67
|
+
| "AMPERSAND" // &
|
|
68
|
+
| "BACKSLASH" // \
|
|
69
|
+
| "QUOTED_STRING" // "..."
|
|
70
|
+
|
|
71
|
+
// Raw/Escape
|
|
72
|
+
| "RAW_OPEN" // @@
|
|
73
|
+
| "RAW_CLOSE" // @@
|
|
74
|
+
| "RAW_BLOCK_OPEN" // @<
|
|
75
|
+
| "RAW_BLOCK_CLOSE" // >@
|
|
76
|
+
|
|
77
|
+
// Color
|
|
78
|
+
| "COLOR_MARKER" // ##
|
|
79
|
+
|
|
80
|
+
// Other
|
|
81
|
+
| "UNDERSCORE" // _ (single underscore, for line break)
|
|
82
|
+
| "BACKSLASH_BREAK" // U+E000 (preproc marker for \ at end of line)
|
|
83
|
+
|
|
84
|
+
// Comment
|
|
85
|
+
| "COMMENT_OPEN" // [!--
|
|
86
|
+
| "COMMENT_CLOSE" // --]
|
|
87
|
+
|
|
88
|
+
// Clear float
|
|
89
|
+
| "CLEAR_FLOAT" // ~~~
|
|
90
|
+
| "CLEAR_FLOAT_LEFT" // ~~~<
|
|
91
|
+
| "CLEAR_FLOAT_RIGHT" // ~~~>
|
|
92
|
+
|
|
93
|
+
// Double angle (guillemet)
|
|
94
|
+
| "LEFT_DOUBLE_ANGLE" // <<
|
|
95
|
+
| "RIGHT_DOUBLE_ANGLE"; // >> (non-line-start)
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* A single lexical token produced by the `Lexer`.
|
|
99
|
+
*
|
|
100
|
+
* Tokens are the input to the parser stage. Each token carries its
|
|
101
|
+
* literal text (`value`), source location (`position`), and a flag
|
|
102
|
+
* indicating whether it appeared at the beginning of a line — which
|
|
103
|
+
* matters because several Wikidot constructs (headings, lists,
|
|
104
|
+
* blockquotes, horizontal rules) are only valid at line start.
|
|
105
|
+
*
|
|
106
|
+
* @group Lexer
|
|
107
|
+
*/
|
|
108
|
+
export interface Token {
|
|
109
|
+
/** The lexeme category */
|
|
110
|
+
type: TokenType;
|
|
111
|
+
/** The literal source text that produced this token */
|
|
112
|
+
value: string;
|
|
113
|
+
/** Start/end location in the original source string */
|
|
114
|
+
position: Position;
|
|
115
|
+
/**
|
|
116
|
+
* `true` when this token is the first non-whitespace token on its
|
|
117
|
+
* line. Block-level rules (headings, lists, blockquotes) check this
|
|
118
|
+
* flag before attempting to match.
|
|
119
|
+
*/
|
|
120
|
+
lineStart: boolean;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Construct a {@link Token} value.
|
|
125
|
+
*
|
|
126
|
+
* @param type - The lexeme category
|
|
127
|
+
* @param value - Literal source text
|
|
128
|
+
* @param position - Source location range
|
|
129
|
+
* @param lineStart - Whether the token starts a new line
|
|
130
|
+
* @returns A new token object
|
|
131
|
+
*
|
|
132
|
+
* @group Lexer
|
|
133
|
+
*/
|
|
134
|
+
export function createToken(
|
|
135
|
+
type: TokenType,
|
|
136
|
+
value: string,
|
|
137
|
+
position: Position,
|
|
138
|
+
lineStart = false,
|
|
139
|
+
): Token {
|
|
140
|
+
return { type, value, position, lineStart };
|
|
141
|
+
}
|