@wdprlib/parser 3.1.2 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +10456 -8230
- package/dist/index.d.cts +313 -337
- package/dist/index.d.ts +313 -337
- package/dist/index.js +10460 -8234
- package/package.json +5 -3
- package/src/index.ts +170 -0
- package/src/lexer/anchor.ts +48 -0
- package/src/lexer/index.ts +21 -0
- package/src/lexer/lexer.ts +201 -0
- package/src/lexer/options.ts +19 -0
- package/src/lexer/punctuation.ts +70 -0
- package/src/lexer/quoted-string.ts +16 -0
- package/src/lexer/runs.ts +85 -0
- package/src/lexer/spacing-actions.ts +24 -0
- package/src/lexer/state.ts +103 -0
- package/src/lexer/syntax-actions.ts +80 -0
- package/src/lexer/text-actions.ts +41 -0
- package/src/lexer/token-actions.ts +136 -0
- package/src/lexer/token-factory.ts +62 -0
- package/src/lexer/tokenize.ts +18 -0
- package/src/lexer/tokens.ts +141 -0
- package/src/parser/constants.ts +175 -0
- package/src/parser/depth/index.ts +111 -0
- package/src/parser/depth/stack.ts +82 -0
- package/src/parser/index.ts +18 -0
- package/src/parser/parse/block.ts +42 -0
- package/src/parser/parse/context.ts +26 -0
- package/src/parser/parse/footnotes.ts +25 -0
- package/src/parser/parse/index.ts +42 -0
- package/src/parser/parse/options.ts +34 -0
- package/src/parser/parse/parser.ts +79 -0
- package/src/parser/parse/plain-non-ascii.ts +129 -0
- package/src/parser/parse/result.ts +57 -0
- package/src/parser/parse/source.ts +11 -0
- package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
- package/src/parser/postprocess/index.ts +15 -0
- package/src/parser/postprocess/spanStrip/clean-element.ts +168 -0
- package/src/parser/postprocess/spanStrip/cleanup.ts +25 -0
- package/src/parser/postprocess/spanStrip/empty-spans.ts +36 -0
- package/src/parser/postprocess/spanStrip/escaped.ts +78 -0
- package/src/parser/postprocess/spanStrip/factory.ts +23 -0
- package/src/parser/postprocess/spanStrip/index.ts +8 -0
- package/src/parser/postprocess/spanStrip/merge.ts +117 -0
- package/src/parser/postprocess/spanStrip/predicates.ts +59 -0
- package/src/parser/postprocess/spanStrip/split.ts +67 -0
- package/src/parser/preprocess/expr/chars.ts +15 -0
- package/src/parser/preprocess/expr/evaluate.ts +22 -0
- package/src/parser/preprocess/expr/index.ts +45 -0
- package/src/parser/preprocess/expr/kind.ts +19 -0
- package/src/parser/preprocess/expr/parse.ts +103 -0
- package/src/parser/preprocess/expr/scan.ts +34 -0
- package/src/parser/preprocess/expr/types.ts +14 -0
- package/src/parser/preprocess/index.ts +38 -0
- package/src/parser/preprocess/typography.ts +132 -0
- package/src/parser/preprocess/utils/bracket-depths.ts +98 -0
- package/src/parser/preprocess/utils/index.ts +13 -0
- package/src/parser/preprocess/utils/raw-regions.ts +153 -0
- package/src/parser/preprocess/whitespace/detection.ts +39 -0
- package/src/parser/preprocess/whitespace/index.ts +79 -0
- package/src/parser/preprocess/whitespace/leading-spaces.ts +11 -0
- package/src/parser/preprocess/whitespace/patterns.ts +23 -0
- package/src/parser/rules/block/align/body.ts +46 -0
- package/src/parser/rules/block/align/element.ts +13 -0
- package/src/parser/rules/block/align/index.ts +90 -0
- package/src/parser/rules/block/align/syntax.ts +113 -0
- package/src/parser/rules/block/bibliography/body.ts +81 -0
- package/src/parser/rules/block/bibliography/entries.ts +49 -0
- package/src/parser/rules/block/bibliography/entry-content.ts +73 -0
- package/src/parser/rules/block/bibliography/entry-key.ts +83 -0
- package/src/parser/rules/block/bibliography/index.ts +90 -0
- package/src/parser/rules/block/bibliography/open.ts +53 -0
- package/src/parser/rules/block/block-list/bare-content.ts +105 -0
- package/src/parser/rules/block/block-list/bare-paragraph.ts +60 -0
- package/src/parser/rules/block/block-list/index.ts +51 -0
- package/src/parser/rules/block/block-list/item-content.ts +132 -0
- package/src/parser/rules/block/block-list/li-content.ts +107 -0
- package/src/parser/rules/block/block-list/li-item.ts +77 -0
- package/src/parser/rules/block/block-list/list-block.ts +100 -0
- package/src/parser/rules/block/block-list/open.ts +51 -0
- package/src/parser/rules/block/block-list/tags.ts +50 -0
- package/src/parser/rules/block/blockquote/build.ts +62 -0
- package/src/parser/rules/block/blockquote/index.ts +80 -0
- package/src/parser/rules/block/blockquote/line.ts +79 -0
- package/src/parser/rules/block/blockquote/lines.ts +39 -0
- package/src/parser/rules/block/center/index.ts +72 -0
- package/src/parser/rules/block/center/open.ts +27 -0
- package/src/parser/rules/block/clear-float/index.ts +51 -0
- package/src/parser/rules/block/clear-float/syntax.ts +43 -0
- package/src/parser/rules/block/code/attributes.ts +30 -0
- package/src/parser/rules/block/code/content.ts +57 -0
- package/src/parser/rules/block/code/index.ts +100 -0
- package/src/parser/rules/block/collapsible/attributes.ts +95 -0
- package/src/parser/rules/block/collapsible/body.ts +69 -0
- package/src/parser/rules/block/collapsible/index.ts +117 -0
- package/src/parser/rules/block/collapsible/open.ts +51 -0
- package/src/parser/rules/block/collapsible/orphans.ts +31 -0
- package/src/parser/rules/block/collapsible/tags.ts +17 -0
- package/src/parser/rules/block/comment/consume.ts +37 -0
- package/src/parser/rules/block/comment/index.ts +47 -0
- package/src/parser/rules/block/content-separator/index.ts +49 -0
- package/src/parser/rules/block/content-separator/syntax.ts +33 -0
- package/src/parser/rules/block/definition-list/collect.ts +40 -0
- package/src/parser/rules/block/definition-list/index.ts +63 -0
- package/src/parser/rules/block/definition-list/item-key.ts +95 -0
- package/src/parser/rules/block/definition-list/item-value.ts +56 -0
- package/src/parser/rules/block/definition-list/items.ts +54 -0
- package/src/parser/rules/block/div/body.ts +41 -0
- package/src/parser/rules/block/div/close.ts +41 -0
- package/src/parser/rules/block/div/failed.ts +117 -0
- package/src/parser/rules/block/div/index.ts +112 -0
- package/src/parser/rules/block/div/nesting.ts +37 -0
- package/src/parser/rules/block/div/open.ts +59 -0
- package/src/parser/rules/block/div/paragraph-strip.ts +44 -0
- package/src/parser/rules/block/embed-block/content.ts +53 -0
- package/src/parser/rules/block/embed-block/index.ts +91 -0
- package/src/parser/rules/block/embed-block/open.ts +52 -0
- package/src/parser/rules/block/embed-block/tags.ts +5 -0
- package/src/parser/rules/block/footnoteblock/attributes.ts +73 -0
- package/src/parser/rules/block/footnoteblock/index.ts +82 -0
- package/src/parser/rules/block/footnoteblock/open.ts +53 -0
- package/src/parser/rules/block/heading/index.ts +87 -0
- package/src/parser/rules/block/heading/open.ts +50 -0
- package/src/parser/rules/block/heading/toc-text.ts +26 -0
- package/src/parser/rules/block/horizontal-rule/index.ts +44 -0
- package/src/parser/rules/block/horizontal-rule/syntax.ts +21 -0
- package/src/parser/rules/block/html/body.ts +114 -0
- package/src/parser/rules/block/html/diagnostics.ts +11 -0
- package/src/parser/rules/block/html/index.ts +95 -0
- package/src/parser/rules/block/html/open.ts +36 -0
- package/src/parser/rules/block/iframe/attributes.ts +106 -0
- package/src/parser/rules/block/iframe/index.ts +73 -0
- package/src/parser/rules/block/iframe/open.ts +58 -0
- package/src/parser/rules/block/iframe/source.ts +24 -0
- package/src/parser/rules/block/iframe/url.ts +38 -0
- package/src/parser/rules/block/iftags/body.ts +48 -0
- package/src/parser/rules/block/iftags/condition.ts +24 -0
- package/src/parser/rules/block/iftags/index.ts +108 -0
- package/src/parser/rules/block/include/arguments.ts +48 -0
- package/src/parser/rules/block/include/index.ts +75 -0
- package/src/parser/rules/block/include/location.ts +24 -0
- package/src/parser/rules/block/include/variables.ts +37 -0
- package/src/parser/rules/block/index.ts +127 -0
- package/src/parser/rules/block/list/index.ts +73 -0
- package/src/parser/rules/block/list/line.ts +77 -0
- package/src/parser/rules/block/list/native.ts +89 -0
- package/src/parser/rules/block/math/content.ts +54 -0
- package/src/parser/rules/block/math/index.ts +106 -0
- package/src/parser/rules/block/math/name.ts +35 -0
- package/src/parser/rules/block/module/backlinks/index.ts +31 -0
- package/src/parser/rules/block/module/backlinks/types.ts +21 -0
- package/src/parser/rules/block/module/body.ts +92 -0
- package/src/parser/rules/block/module/categories/index.ts +34 -0
- package/src/parser/rules/block/module/categories/types.ts +21 -0
- package/src/parser/rules/block/module/css/index.ts +37 -0
- package/src/parser/rules/block/module/element.ts +33 -0
- package/src/parser/rules/block/module/iftags/condition.ts +109 -0
- package/src/parser/rules/block/module/iftags/index.ts +26 -0
- package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
- package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
- package/src/parser/rules/block/module/iftags/types.ts +63 -0
- package/src/parser/rules/block/module/include/directive.ts +91 -0
- package/src/parser/rules/block/module/include/index.ts +29 -0
- package/src/parser/rules/block/module/include/references.ts +42 -0
- package/src/parser/rules/block/module/include/resolve/cache.ts +44 -0
- package/src/parser/rules/block/module/include/resolve/index.ts +106 -0
- package/src/parser/rules/block/module/include/resolve/iterate.ts +202 -0
- package/src/parser/rules/block/module/include/resolve/replace.ts +31 -0
- package/src/parser/rules/block/module/include/resolve/types.ts +105 -0
- package/src/parser/rules/block/module/include/scanner.ts +121 -0
- package/src/parser/rules/block/module/index.ts +134 -0
- package/src/parser/rules/block/module/join/index.ts +34 -0
- package/src/parser/rules/block/module/join/types.ts +23 -0
- package/src/parser/rules/block/module/listpages/compiler.ts +73 -0
- package/src/parser/rules/block/module/listpages/extract.ts +76 -0
- package/src/parser/rules/block/module/listpages/extraction/listpages.ts +42 -0
- package/src/parser/rules/block/module/listpages/extraction/listusers.ts +30 -0
- package/src/parser/rules/block/module/listpages/extraction/query.ts +51 -0
- package/src/parser/rules/block/module/listpages/extraction/result.ts +18 -0
- package/src/parser/rules/block/module/listpages/extraction/template.ts +96 -0
- package/src/parser/rules/block/module/listpages/extraction/variables.ts +58 -0
- package/src/parser/rules/block/module/listpages/index.ts +83 -0
- package/src/parser/rules/block/module/listpages/normalization/date-selector.ts +53 -0
- package/src/parser/rules/block/module/listpages/normalization/numeric-selector.ts +32 -0
- package/src/parser/rules/block/module/listpages/normalization/order-parent.ts +82 -0
- package/src/parser/rules/block/module/listpages/normalization/selectors.ts +2 -0
- package/src/parser/rules/block/module/listpages/normalization/tags-category.ts +86 -0
- package/src/parser/rules/block/module/listpages/normalize.ts +74 -0
- package/src/parser/rules/block/module/listpages/parser.ts +106 -0
- package/src/parser/rules/block/module/listpages/resolution/items.ts +43 -0
- package/src/parser/rules/block/module/listpages/resolution/wrapper.ts +42 -0
- package/src/parser/rules/block/module/listpages/resolve.ts +60 -0
- package/src/parser/rules/block/module/listpages/template/format/content.ts +41 -0
- package/src/parser/rules/block/module/listpages/template/format/date.ts +116 -0
- package/src/parser/rules/block/module/listpages/template/format/index.ts +4 -0
- package/src/parser/rules/block/module/listpages/template/format/tags.ts +7 -0
- package/src/parser/rules/block/module/listpages/template/format/user.ts +9 -0
- package/src/parser/rules/block/module/listpages/template/getters/index.ts +36 -0
- package/src/parser/rules/block/module/listpages/template/getters/parameterized.ts +60 -0
- package/src/parser/rules/block/module/listpages/template/getters/simple.ts +65 -0
- package/src/parser/rules/block/module/listpages/template/getters/types.ts +3 -0
- package/src/parser/rules/block/module/listpages/template/syntax.ts +97 -0
- package/src/parser/rules/block/module/listpages/types/data-fetcher.ts +15 -0
- package/src/parser/rules/block/module/listpages/types/data-requirements.ts +52 -0
- package/src/parser/rules/block/module/listpages/types/external-data.ts +77 -0
- package/src/parser/rules/block/module/listpages/types/index.ts +17 -0
- package/src/parser/rules/block/module/listpages/types/normalized-query.ts +120 -0
- package/src/parser/rules/block/module/listpages/types/query.ts +67 -0
- package/src/parser/rules/block/module/listpages/types/template.ts +17 -0
- package/src/parser/rules/block/module/listpages/types/variables.ts +69 -0
- package/src/parser/rules/block/module/listpages/url-resolution/fields.ts +48 -0
- package/src/parser/rules/block/module/listpages/url-resolution/params.ts +19 -0
- package/src/parser/rules/block/module/listpages/url-resolution/query.ts +24 -0
- package/src/parser/rules/block/module/listpages/url-resolution/resolve.ts +53 -0
- package/src/parser/rules/block/module/listpages/url-resolution/value.ts +25 -0
- package/src/parser/rules/block/module/listpages/url-resolver.ts +29 -0
- package/src/parser/rules/block/module/listusers/compiler.ts +56 -0
- package/src/parser/rules/block/module/listusers/extract.ts +40 -0
- package/src/parser/rules/block/module/listusers/getters.ts +21 -0
- package/src/parser/rules/block/module/listusers/index.ts +36 -0
- package/src/parser/rules/block/module/listusers/parser.ts +54 -0
- package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
- package/src/parser/rules/block/module/listusers/types.ts +93 -0
- package/src/parser/rules/block/module/listusers/variables.ts +15 -0
- package/src/parser/rules/block/module/mapping.ts +61 -0
- package/src/parser/rules/block/module/open.ts +57 -0
- package/src/parser/rules/block/module/page-tree/index.ts +38 -0
- package/src/parser/rules/block/module/page-tree/types.ts +29 -0
- package/src/parser/rules/block/module/rate/index.ts +28 -0
- package/src/parser/rules/block/module/rate/types.ts +19 -0
- package/src/parser/rules/block/module/resolution/contexts.ts +78 -0
- package/src/parser/rules/block/module/resolution/data-maps.ts +39 -0
- package/src/parser/rules/block/module/resolution/dynamic-modules.ts +93 -0
- package/src/parser/rules/block/module/resolution/styles.ts +53 -0
- package/src/parser/rules/block/module/resolution/walk-resolve.ts +107 -0
- package/src/parser/rules/block/module/resolve.ts +198 -0
- package/src/parser/rules/block/module/rule.ts +56 -0
- package/src/parser/rules/block/module/types-common.ts +70 -0
- package/src/parser/rules/block/module/types.ts +61 -0
- package/src/parser/rules/block/module/utils.ts +43 -0
- package/src/parser/rules/block/module/walk/children.ts +35 -0
- package/src/parser/rules/block/module/walk/index.ts +9 -0
- package/src/parser/rules/block/module/walk/map/index.ts +2 -0
- package/src/parser/rules/block/module/walk/map/stateful-definition-list.ts +25 -0
- package/src/parser/rules/block/module/walk/map/stateful-list.ts +40 -0
- package/src/parser/rules/block/module/walk/map/stateful-table.ts +23 -0
- package/src/parser/rules/block/module/walk/map/stateful-tabs.ts +19 -0
- package/src/parser/rules/block/module/walk/map/stateful.ts +71 -0
- package/src/parser/rules/block/module/walk/map/stateless-definition-list.ts +12 -0
- package/src/parser/rules/block/module/walk/map/stateless-list.ts +29 -0
- package/src/parser/rules/block/module/walk/map/stateless-table.ts +11 -0
- package/src/parser/rules/block/module/walk/map/stateless-tabs.ts +5 -0
- package/src/parser/rules/block/module/walk/map/stateless.ts +51 -0
- package/src/parser/rules/block/module/walk/map/types.ts +6 -0
- package/src/parser/rules/block/module/walk/traverse.ts +65 -0
- package/src/parser/rules/block/orphan-li/content.ts +60 -0
- package/src/parser/rules/block/orphan-li/index.ts +75 -0
- package/src/parser/rules/block/orphan-li/open.ts +25 -0
- package/src/parser/rules/block/orphan-li/tags.ts +40 -0
- package/src/parser/rules/block/paragraph/content.ts +12 -0
- package/src/parser/rules/block/paragraph/index.ts +60 -0
- package/src/parser/rules/block/paragraph/normalize.ts +52 -0
- package/src/parser/rules/block/paragraph/span-markers.ts +52 -0
- package/src/parser/rules/block/parsing/attributes/index.ts +32 -0
- package/src/parser/rules/block/parsing/attributes/names.ts +93 -0
- package/src/parser/rules/block/parsing/attributes/scanner.ts +75 -0
- package/src/parser/rules/block/parsing/attributes/values.ts +26 -0
- package/src/parser/rules/block/parsing/block-item.ts +29 -0
- package/src/parser/rules/block/parsing/content.ts +127 -0
- package/src/parser/rules/block/parsing/end-condition.ts +51 -0
- package/src/parser/rules/block/parsing/inline-content.ts +105 -0
- package/src/parser/rules/block/parsing/inline-newline.ts +41 -0
- package/src/parser/rules/block/parsing/non-boundary.ts +24 -0
- package/src/parser/rules/block/parsing/rule-dispatch.ts +44 -0
- package/src/parser/rules/block/table/index.ts +80 -0
- package/src/parser/rules/block/table/pipe/cell-start.ts +69 -0
- package/src/parser/rules/block/table/pipe/cell.ts +106 -0
- package/src/parser/rules/block/table/pipe/index.ts +2 -0
- package/src/parser/rules/block/table/pipe/row.ts +88 -0
- package/src/parser/rules/block/table/pipe/tokens.ts +14 -0
- package/src/parser/rules/block/table/pipe/trim.ts +50 -0
- package/src/parser/rules/block/table-block/body.ts +79 -0
- package/src/parser/rules/block/table-block/cell-attributes.ts +33 -0
- package/src/parser/rules/block/table-block/cell-boundary.ts +99 -0
- package/src/parser/rules/block/table-block/cell-content/index.ts +88 -0
- package/src/parser/rules/block/table-block/cell-content/segments.ts +134 -0
- package/src/parser/rules/block/table-block/cell-newline.ts +47 -0
- package/src/parser/rules/block/table-block/cell.ts +64 -0
- package/src/parser/rules/block/table-block/index.ts +113 -0
- package/src/parser/rules/block/table-block/row-boundary.ts +75 -0
- package/src/parser/rules/block/table-block/structure.ts +80 -0
- package/src/parser/rules/block/tabview/body.ts +64 -0
- package/src/parser/rules/block/tabview/index.ts +90 -0
- package/src/parser/rules/block/tabview/open.ts +50 -0
- package/src/parser/rules/block/tabview/tab.ts +92 -0
- package/src/parser/rules/block/tabview/tags.ts +30 -0
- package/src/parser/rules/block/toc/element.ts +11 -0
- package/src/parser/rules/block/toc/index.ts +44 -0
- package/src/parser/rules/block/toc/open.ts +84 -0
- package/src/parser/rules/block/utils.ts +15 -0
- package/src/parser/rules/common/attribute-safety.ts +109 -0
- package/src/parser/rules/common/block-name.ts +33 -0
- package/src/parser/rules/common/index.ts +2 -0
- package/src/parser/rules/contracts/index.ts +3 -0
- package/src/parser/rules/contracts/parse-context.ts +38 -0
- package/src/parser/rules/contracts/rule.ts +43 -0
- package/src/parser/rules/contracts/scope.ts +31 -0
- package/src/parser/rules/index.ts +49 -0
- package/src/parser/rules/inline/anchor/attributes.ts +54 -0
- package/src/parser/rules/inline/anchor/child.ts +26 -0
- package/src/parser/rules/inline/anchor/close.ts +34 -0
- package/src/parser/rules/inline/anchor/content.ts +59 -0
- package/src/parser/rules/inline/anchor/index.ts +103 -0
- package/src/parser/rules/inline/anchor/newline.ts +26 -0
- package/src/parser/rules/inline/anchor/open.ts +47 -0
- package/src/parser/rules/inline/anchor/paragraph-strip.ts +14 -0
- package/src/parser/rules/inline/anchor/syntax.ts +40 -0
- package/src/parser/rules/inline/anchor-name/index.ts +38 -0
- package/src/parser/rules/inline/anchor-name/name.ts +39 -0
- package/src/parser/rules/inline/anchor-name/syntax.ts +46 -0
- package/src/parser/rules/inline/bibcite/element.ts +14 -0
- package/src/parser/rules/inline/bibcite/index.ts +34 -0
- package/src/parser/rules/inline/bibcite/syntax.ts +64 -0
- package/src/parser/rules/inline/bold.ts +49 -0
- package/src/parser/rules/inline/color/index.ts +35 -0
- package/src/parser/rules/inline/color/syntax.ts +69 -0
- package/src/parser/rules/inline/comment/consume.ts +31 -0
- package/src/parser/rules/inline/comment/index.ts +64 -0
- package/src/parser/rules/inline/equation-ref/element.ts +8 -0
- package/src/parser/rules/inline/equation-ref/index.ts +34 -0
- package/src/parser/rules/inline/equation-ref/syntax.ts +45 -0
- package/src/parser/rules/inline/expr/branch.ts +104 -0
- package/src/parser/rules/inline/expr/conditional-branch.ts +27 -0
- package/src/parser/rules/inline/expr/conditional.ts +80 -0
- package/src/parser/rules/inline/expr/depth.ts +25 -0
- package/src/parser/rules/inline/expr/elements.ts +39 -0
- package/src/parser/rules/inline/expr/index.ts +84 -0
- package/src/parser/rules/inline/expr/syntax.ts +45 -0
- package/src/parser/rules/inline/footnote/child.ts +22 -0
- package/src/parser/rules/inline/footnote/close.ts +33 -0
- package/src/parser/rules/inline/footnote/content.ts +54 -0
- package/src/parser/rules/inline/footnote/elements.ts +38 -0
- package/src/parser/rules/inline/footnote/index.ts +54 -0
- package/src/parser/rules/inline/footnote/newline.ts +27 -0
- package/src/parser/rules/inline/footnote/open.ts +38 -0
- package/src/parser/rules/inline/formatting/container.ts +50 -0
- package/src/parser/rules/inline/guillemet/index.ts +56 -0
- package/src/parser/rules/inline/guillemet/text.ts +11 -0
- package/src/parser/rules/inline/html/gate.ts +64 -0
- package/src/parser/rules/inline/html/index.ts +81 -0
- package/src/parser/rules/inline/html/open.ts +37 -0
- package/src/parser/rules/inline/image/attributes.ts +22 -0
- package/src/parser/rules/inline/image/body.ts +36 -0
- package/src/parser/rules/inline/image/index.ts +89 -0
- package/src/parser/rules/inline/image/open.ts +56 -0
- package/src/parser/rules/inline/image/source.ts +62 -0
- package/src/parser/rules/inline/image/syntax.ts +76 -0
- package/src/parser/rules/inline/index.ts +150 -0
- package/src/parser/rules/inline/italic.ts +46 -0
- package/src/parser/rules/inline/line-break/backslash.ts +58 -0
- package/src/parser/rules/inline/line-break/elements.ts +9 -0
- package/src/parser/rules/inline/line-break/index.ts +3 -0
- package/src/parser/rules/inline/line-break/newline.ts +82 -0
- package/src/parser/rules/inline/line-break/underscore.ts +45 -0
- package/src/parser/rules/inline/link-anchor.ts +72 -0
- package/src/parser/rules/inline/link-bracket/anchor.ts +3 -0
- package/src/parser/rules/inline/link-bracket/direct-url.ts +5 -0
- package/src/parser/rules/inline/link-bracket/parsed.ts +81 -0
- package/src/parser/rules/inline/link-bracket/parts.ts +64 -0
- package/src/parser/rules/inline/link-bracket/prefix.ts +15 -0
- package/src/parser/rules/inline/link-single.ts +73 -0
- package/src/parser/rules/inline/link-star.ts +72 -0
- package/src/parser/rules/inline/link-triple/fallback.ts +10 -0
- package/src/parser/rules/inline/link-triple/index.ts +62 -0
- package/src/parser/rules/inline/link-triple/interwiki.ts +11 -0
- package/src/parser/rules/inline/link-triple/label.ts +35 -0
- package/src/parser/rules/inline/link-triple/syntax.ts +72 -0
- package/src/parser/rules/inline/link-triple/target.ts +36 -0
- package/src/parser/rules/inline/math-inline/index.ts +40 -0
- package/src/parser/rules/inline/math-inline/syntax.ts +55 -0
- package/src/parser/rules/inline/monospace.ts +50 -0
- package/src/parser/rules/inline/parsing/block-boundary.ts +42 -0
- package/src/parser/rules/inline/parsing/block-start-predicates.ts +117 -0
- package/src/parser/rules/inline/parsing/collect.ts +23 -0
- package/src/parser/rules/inline/parsing/inline-content.ts +115 -0
- package/src/parser/rules/inline/parsing/paragraph-boundary.ts +47 -0
- package/src/parser/rules/inline/parsing/plain-text.ts +69 -0
- package/src/parser/rules/inline/parsing/preserved-line-break.ts +11 -0
- package/src/parser/rules/inline/parsing/rules.ts +34 -0
- package/src/parser/rules/inline/parsing/simple-token.ts +26 -0
- package/src/parser/rules/inline/raw/angle.ts +40 -0
- package/src/parser/rules/inline/raw/double-at.ts +78 -0
- package/src/parser/rules/inline/raw/index.ts +26 -0
- package/src/parser/rules/inline/raw/result.ts +26 -0
- package/src/parser/rules/inline/size/content.ts +65 -0
- package/src/parser/rules/inline/size/index.ts +55 -0
- package/src/parser/rules/inline/size/open.ts +43 -0
- package/src/parser/rules/inline/size/value.ts +45 -0
- package/src/parser/rules/inline/span/content.ts +97 -0
- package/src/parser/rules/inline/span/elements.ts +108 -0
- package/src/parser/rules/inline/span/index.ts +79 -0
- package/src/parser/rules/inline/span/newline.ts +50 -0
- package/src/parser/rules/inline/span/syntax.ts +70 -0
- package/src/parser/rules/inline/strikethrough/index.ts +60 -0
- package/src/parser/rules/inline/strikethrough/parse.ts +14 -0
- package/src/parser/rules/inline/strikethrough/syntax.ts +24 -0
- package/src/parser/rules/inline/subscript.ts +47 -0
- package/src/parser/rules/inline/superscript.ts +49 -0
- package/src/parser/rules/inline/text/element.ts +5 -0
- package/src/parser/rules/inline/text/index.ts +85 -0
- package/src/parser/rules/inline/underline/child.ts +26 -0
- package/src/parser/rules/inline/underline/content.ts +29 -0
- package/src/parser/rules/inline/underline/index.ts +84 -0
- package/src/parser/rules/inline/user/element.ts +11 -0
- package/src/parser/rules/inline/user/index.ts +34 -0
- package/src/parser/rules/inline/user/syntax.ts +67 -0
- package/src/parser/rules/inline/utils.ts +4 -0
- package/src/parser/rules/tokens.ts +106 -0
- package/src/parser/rules/types.ts +9 -0
- package/src/parser/toc.ts +130 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wdprlib/parser",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "4.0.0",
|
|
4
4
|
"description": "Parser for Wikidot markup",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ast",
|
|
@@ -15,7 +15,8 @@
|
|
|
15
15
|
"directory": "packages/parser"
|
|
16
16
|
},
|
|
17
17
|
"files": [
|
|
18
|
-
"dist"
|
|
18
|
+
"dist",
|
|
19
|
+
"src"
|
|
19
20
|
],
|
|
20
21
|
"type": "module",
|
|
21
22
|
"sideEffects": false,
|
|
@@ -24,6 +25,7 @@
|
|
|
24
25
|
"types": "./dist/index.d.ts",
|
|
25
26
|
"exports": {
|
|
26
27
|
".": {
|
|
28
|
+
"bun": "./src/index.ts",
|
|
27
29
|
"import": {
|
|
28
30
|
"types": "./dist/index.d.ts",
|
|
29
31
|
"default": "./dist/index.js"
|
|
@@ -39,6 +41,6 @@
|
|
|
39
41
|
},
|
|
40
42
|
"dependencies": {
|
|
41
43
|
"@braintree/sanitize-url": "^7.1.1",
|
|
42
|
-
"@wdprlib/ast": "2.
|
|
44
|
+
"@wdprlib/ast": "2.1.0"
|
|
43
45
|
}
|
|
44
46
|
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wikidot markup parser.
|
|
3
|
+
*
|
|
4
|
+
* This package converts Wikidot wikitext source into an abstract syntax
|
|
5
|
+
* tree (AST) defined by `@wdprlib/ast`. It also provides module-resolution
|
|
6
|
+
* utilities for dynamic constructs such as `[[module ListPages]]`,
|
|
7
|
+
* `[[module ListUsers]]`, `[[include]]`, and `[[iftags]]`.
|
|
8
|
+
*
|
|
9
|
+
* Typical usage:
|
|
10
|
+
*
|
|
11
|
+
* ```ts
|
|
12
|
+
* import { parse } from "@wdprlib/parser";
|
|
13
|
+
*
|
|
14
|
+
* const { ast, diagnostics } = parse("**bold** and //italic//");
|
|
15
|
+
* ```
|
|
16
|
+
*
|
|
17
|
+
* For server-side module resolution, see {@link extractDataRequirements},
|
|
18
|
+
* {@link resolveModules}, and {@link resolveIncludes}.
|
|
19
|
+
*
|
|
20
|
+
* @packageDocumentation
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
// Re-export AST types and utilities from @wdprlib/ast
|
|
24
|
+
export type {
|
|
25
|
+
Position,
|
|
26
|
+
Point,
|
|
27
|
+
Version,
|
|
28
|
+
Element,
|
|
29
|
+
SyntaxTree,
|
|
30
|
+
ContainerType,
|
|
31
|
+
ContainerData,
|
|
32
|
+
AttributeMap,
|
|
33
|
+
VariableMap,
|
|
34
|
+
Alignment,
|
|
35
|
+
LinkType,
|
|
36
|
+
LinkLocation,
|
|
37
|
+
LinkLabel,
|
|
38
|
+
PageRef,
|
|
39
|
+
ImageSource,
|
|
40
|
+
FloatAlignment,
|
|
41
|
+
ListType,
|
|
42
|
+
ListItem,
|
|
43
|
+
ListData,
|
|
44
|
+
CodeBlockData,
|
|
45
|
+
TabData,
|
|
46
|
+
TableCell,
|
|
47
|
+
TableRow,
|
|
48
|
+
TableData,
|
|
49
|
+
DefinitionListItem,
|
|
50
|
+
Module,
|
|
51
|
+
CollapsibleData,
|
|
52
|
+
ClearFloat,
|
|
53
|
+
AnchorTarget,
|
|
54
|
+
HeaderType,
|
|
55
|
+
AlignType,
|
|
56
|
+
HeadingLevel,
|
|
57
|
+
Heading,
|
|
58
|
+
DateItem,
|
|
59
|
+
Embed,
|
|
60
|
+
TocEntry,
|
|
61
|
+
// Diagnostics
|
|
62
|
+
Diagnostic,
|
|
63
|
+
DiagnosticSeverity,
|
|
64
|
+
ParseResult,
|
|
65
|
+
} from "@wdprlib/ast";
|
|
66
|
+
export {
|
|
67
|
+
createPoint,
|
|
68
|
+
createPosition,
|
|
69
|
+
text,
|
|
70
|
+
container,
|
|
71
|
+
paragraph,
|
|
72
|
+
bold,
|
|
73
|
+
italics,
|
|
74
|
+
heading,
|
|
75
|
+
lineBreak,
|
|
76
|
+
horizontalRule,
|
|
77
|
+
link,
|
|
78
|
+
list,
|
|
79
|
+
listItemElements,
|
|
80
|
+
listItemSubList,
|
|
81
|
+
} from "@wdprlib/ast";
|
|
82
|
+
|
|
83
|
+
// Wikitext settings (re-exported from @wdprlib/ast)
|
|
84
|
+
export type { WikitextMode, WikitextSettings } from "@wdprlib/ast";
|
|
85
|
+
export { createSettings, DEFAULT_SETTINGS } from "@wdprlib/ast";
|
|
86
|
+
|
|
87
|
+
// Lexer
|
|
88
|
+
export type { TokenType, Token, LexerOptions } from "./lexer";
|
|
89
|
+
export { Lexer, tokenize, createToken } from "./lexer";
|
|
90
|
+
|
|
91
|
+
// Parser
|
|
92
|
+
export type { ParserOptions } from "./parser";
|
|
93
|
+
export { Parser, parse } from "./parser";
|
|
94
|
+
|
|
95
|
+
// Modules (ListPages, ListUsers, IfTags, Include, etc.)
|
|
96
|
+
export type {
|
|
97
|
+
// ListPages query types
|
|
98
|
+
ListPagesQuery,
|
|
99
|
+
ListPagesVariable,
|
|
100
|
+
// Data requirement types
|
|
101
|
+
ListPagesDataRequirement,
|
|
102
|
+
DataRequirements,
|
|
103
|
+
// External data types
|
|
104
|
+
UserInfo,
|
|
105
|
+
PageData,
|
|
106
|
+
SiteContext,
|
|
107
|
+
ListPagesExternalData,
|
|
108
|
+
// Callback types
|
|
109
|
+
ListPagesDataFetcher,
|
|
110
|
+
DataProvider,
|
|
111
|
+
// Template types
|
|
112
|
+
VariableContext,
|
|
113
|
+
CompiledTemplate,
|
|
114
|
+
// Extraction types
|
|
115
|
+
ExtractionResult,
|
|
116
|
+
// Resolution types
|
|
117
|
+
ParseFunction,
|
|
118
|
+
ModuleSourceTransform,
|
|
119
|
+
ResolveOptions,
|
|
120
|
+
// Include resolution
|
|
121
|
+
IncludeFetcher,
|
|
122
|
+
AsyncIncludeFetcher,
|
|
123
|
+
ResolveIncludesOptions,
|
|
124
|
+
IncludeReference,
|
|
125
|
+
IncludeDependency,
|
|
126
|
+
IncludeIterationTrace,
|
|
127
|
+
ResolveIncludesTraceResult,
|
|
128
|
+
// ListUsers types
|
|
129
|
+
ListUsersVariable,
|
|
130
|
+
ListUsersUserData,
|
|
131
|
+
ListUsersDataRequirement,
|
|
132
|
+
ListUsersExternalData,
|
|
133
|
+
ListUsersDataFetcher,
|
|
134
|
+
ListUsersVariableContext,
|
|
135
|
+
ListUsersCompiledTemplate,
|
|
136
|
+
// Normalized query types
|
|
137
|
+
NormalizedListPagesQuery,
|
|
138
|
+
NormalizedTags,
|
|
139
|
+
NormalizedCategory,
|
|
140
|
+
NormalizedOrder,
|
|
141
|
+
NormalizedParent,
|
|
142
|
+
NormalizedDateSelector,
|
|
143
|
+
NormalizedNumericSelector,
|
|
144
|
+
} from "./parser/rules/block/module/index";
|
|
145
|
+
export {
|
|
146
|
+
extractDataRequirements,
|
|
147
|
+
resolveModules,
|
|
148
|
+
STYLE_SLOT_PREFIX,
|
|
149
|
+
compileTemplate,
|
|
150
|
+
// Include resolution
|
|
151
|
+
extractIncludeReferences,
|
|
152
|
+
resolveIncludes,
|
|
153
|
+
resolveIncludesAsync,
|
|
154
|
+
resolveIncludesWithTrace,
|
|
155
|
+
// IfTags source-level preprocessing (run between include expansion and parse)
|
|
156
|
+
preprocessIftags,
|
|
157
|
+
// Query normalization (for advanced use cases)
|
|
158
|
+
normalizeQuery,
|
|
159
|
+
parseTags,
|
|
160
|
+
parseCategory,
|
|
161
|
+
parseOrder,
|
|
162
|
+
parseParent,
|
|
163
|
+
parseDateSelector,
|
|
164
|
+
parseNumericSelector,
|
|
165
|
+
// ListUsers
|
|
166
|
+
extractListUsersVariables,
|
|
167
|
+
compileListUsersTemplate,
|
|
168
|
+
isListUsersModule,
|
|
169
|
+
resolveListUsers,
|
|
170
|
+
} from "./parser/rules/block/module/index";
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Check if `[[#` is followed by an invalid anchor name that closes with `]]`.
|
|
3
|
+
*
|
|
4
|
+
* Valid anchor names match `[-_A-Za-z0-9.%]+`; invalid names are decomposed so
|
|
5
|
+
* the parser can handle the inner `[# text]` as a described anchor link.
|
|
6
|
+
*/
|
|
7
|
+
export function findInvalidAnchorNameEnd(src: string, pos: number): number | null {
|
|
8
|
+
if (src[pos] !== "[" || src[pos + 1] !== "[" || src[pos + 2] !== "#") {
|
|
9
|
+
return null;
|
|
10
|
+
}
|
|
11
|
+
if (src[pos + 3] !== " ") {
|
|
12
|
+
return null;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
let i = pos + 4;
|
|
16
|
+
while (i < src.length && src[i] === " ") {
|
|
17
|
+
i++;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
let foundInvalid = false;
|
|
21
|
+
while (i < src.length) {
|
|
22
|
+
const ch = src[i]!;
|
|
23
|
+
if (ch === "\n") {
|
|
24
|
+
return null;
|
|
25
|
+
}
|
|
26
|
+
if (ch === "]" && src[i + 1] === "]") {
|
|
27
|
+
return foundInvalid ? i : null;
|
|
28
|
+
}
|
|
29
|
+
if (!isValidAnchorNameChar(ch.charCodeAt(0))) {
|
|
30
|
+
foundInvalid = true;
|
|
31
|
+
}
|
|
32
|
+
i++;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function isValidAnchorNameChar(code: number): boolean {
|
|
39
|
+
return (
|
|
40
|
+
(code >= 48 && code <= 57) ||
|
|
41
|
+
(code >= 65 && code <= 90) ||
|
|
42
|
+
(code >= 97 && code <= 122) ||
|
|
43
|
+
code === 45 ||
|
|
44
|
+
code === 95 ||
|
|
45
|
+
code === 46 ||
|
|
46
|
+
code === 37
|
|
47
|
+
);
|
|
48
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Lexer (tokenizer) for Wikidot markup.
|
|
4
|
+
*
|
|
5
|
+
* The lexer converts preprocessed wikitext into a flat sequence of tokens
|
|
6
|
+
* that the parser consumes. Each token has a type (e.g., `HEADING_MARKER`,
|
|
7
|
+
* `BOLD`, `TEXT`) and a string value. The lexer is context-free and does
|
|
8
|
+
* not build any tree structure; that is the parser's responsibility.
|
|
9
|
+
*
|
|
10
|
+
* The main entry points are:
|
|
11
|
+
* - `tokenize()` - convenience function that tokenizes a string in one call
|
|
12
|
+
* - `Lexer` class - for more control over tokenization options
|
|
13
|
+
*
|
|
14
|
+
* @module
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
export type { TokenType, Token } from "./tokens";
|
|
18
|
+
export { createToken } from "./tokens";
|
|
19
|
+
export type { LexerOptions } from "./options";
|
|
20
|
+
export { Lexer } from "./lexer";
|
|
21
|
+
export { tokenize } from "./tokenize";
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import type { Token, TokenType } from "./tokens";
|
|
2
|
+
import type { LexerOptions } from "./options";
|
|
3
|
+
import {
|
|
4
|
+
createLexerToken,
|
|
5
|
+
nextBlockOpenerDepth,
|
|
6
|
+
updateLastNonWhitespaceType,
|
|
7
|
+
} from "./token-factory";
|
|
8
|
+
import {
|
|
9
|
+
advance,
|
|
10
|
+
advanceByToken,
|
|
11
|
+
createInitialLexerState,
|
|
12
|
+
current,
|
|
13
|
+
isAtEnd,
|
|
14
|
+
type LexerState,
|
|
15
|
+
} from "./state";
|
|
16
|
+
import { findInvalidAnchorNameEnd } from "./anchor";
|
|
17
|
+
import { scanQuotedString } from "./quoted-string";
|
|
18
|
+
import { scanSimpleSyntaxToken } from "./syntax-actions";
|
|
19
|
+
import type { TokenAction } from "./token-actions";
|
|
20
|
+
import { scanPunctuationToken } from "./punctuation";
|
|
21
|
+
import { scanCompactTextToken, scanTextToken } from "./text-actions";
|
|
22
|
+
import { scanSpacingToken } from "./spacing-actions";
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Converts a Wikidot markup source string into a flat array of {@link Token}s.
|
|
26
|
+
*
|
|
27
|
+
* The lexer is single-pass and greedy: it tries the longest-matching
|
|
28
|
+
* multi-character pattern first (e.g. `[[[` before `[[`, `**` before `*`).
|
|
29
|
+
* Context-sensitive constructs (line-start headings, blockquote markers)
|
|
30
|
+
* are disambiguated via the `lineStart` state flag.
|
|
31
|
+
*
|
|
32
|
+
* For convenience, use the standalone {@link tokenize} function instead
|
|
33
|
+
* of constructing a `Lexer` directly.
|
|
34
|
+
*
|
|
35
|
+
* @group Lexer
|
|
36
|
+
*/
|
|
37
|
+
export class Lexer {
|
|
38
|
+
private state: LexerState;
|
|
39
|
+
private options: Required<LexerOptions>;
|
|
40
|
+
// Positions where ]] should be split into ] + ] (for invalid anchor names)
|
|
41
|
+
private splitBlockClosePositions: Set<number> = new Set();
|
|
42
|
+
private lastNonWhitespaceType: TokenType | null = null;
|
|
43
|
+
/**
|
|
44
|
+
* Nesting depth of block-opener context (between `[[` / `[[/` and the
|
|
45
|
+
* matching `]]`). Used to scope `QUOTED_STRING` recognition so that
|
|
46
|
+
* `"` after `=` only becomes a quoted attribute value while we are
|
|
47
|
+
* actually parsing block attributes — otherwise inline `=` followed by
|
|
48
|
+
* `"` (e.g. inside `[[footnote]]="[[/footnote]]`) would erroneously
|
|
49
|
+
* consume content up to the next `"` or newline.
|
|
50
|
+
*/
|
|
51
|
+
private blockOpenerDepth = 0;
|
|
52
|
+
|
|
53
|
+
constructor(source: string, options: LexerOptions = {}) {
|
|
54
|
+
this.options = {
|
|
55
|
+
trackPositions: options.trackPositions ?? true,
|
|
56
|
+
compactTextRuns: options.compactTextRuns ?? false,
|
|
57
|
+
};
|
|
58
|
+
this.state = createInitialLexerState(source);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Tokenize the entire source
|
|
63
|
+
*/
|
|
64
|
+
tokenize(): Token[] {
|
|
65
|
+
while (!this.isAtEnd()) {
|
|
66
|
+
this.scanToken();
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
this.addToken("EOF", "");
|
|
70
|
+
return this.state.tokens;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Check if at end of source
|
|
75
|
+
*/
|
|
76
|
+
private isAtEnd(): boolean {
|
|
77
|
+
return isAtEnd(this.state);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Get current character
|
|
82
|
+
*/
|
|
83
|
+
private current(): string {
|
|
84
|
+
return current(this.state);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Check if [[# is followed by an invalid anchor name that closes with ]].
|
|
89
|
+
* Valid: [[# valid-name]] where name matches [-_A-Za-z0-9.%]+
|
|
90
|
+
* Invalid: [[# name with spaces]] or [[# name$special]]
|
|
91
|
+
* When invalid, returns the position of the closing ]] so the lexer can
|
|
92
|
+
* emit tokens that allow the inner [# text] to be parsed as a described link.
|
|
93
|
+
*/
|
|
94
|
+
private findInvalidAnchorNameEnd(): number | null {
|
|
95
|
+
return findInvalidAnchorNameEnd(this.state.source, this.state.pos);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Advance position by n characters
|
|
100
|
+
*/
|
|
101
|
+
private advance(n = 1): string {
|
|
102
|
+
return advance(this.state, n);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Returns the type of the last non-whitespace token, or null if none.
|
|
107
|
+
*/
|
|
108
|
+
private lastNonWhitespaceTokenType(): TokenType | null {
|
|
109
|
+
return this.lastNonWhitespaceType;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Add token
|
|
114
|
+
*/
|
|
115
|
+
private addToken(type: TokenType, value: string): void {
|
|
116
|
+
this.state.tokens.push(createLexerToken(this.state, type, value, this.options.trackPositions));
|
|
117
|
+
this.lastNonWhitespaceType = updateLastNonWhitespaceType(this.lastNonWhitespaceType, type);
|
|
118
|
+
this.blockOpenerDepth = nextBlockOpenerDepth(this.blockOpenerDepth, type);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
private emitTokenAction(action: TokenAction): void {
|
|
122
|
+
advanceByToken(this.state, action.type, action.length);
|
|
123
|
+
this.addToken(action.type, action.value);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
private emitTokenActions(actions: TokenAction | TokenAction[]): void {
|
|
127
|
+
if (Array.isArray(actions)) {
|
|
128
|
+
for (const action of actions) {
|
|
129
|
+
this.emitTokenAction(action);
|
|
130
|
+
}
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
133
|
+
this.emitTokenAction(actions);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Scan a single token
|
|
138
|
+
*/
|
|
139
|
+
private scanToken(): void {
|
|
140
|
+
const char = this.current();
|
|
141
|
+
const isLineStart = this.state.lineStart;
|
|
142
|
+
const src = this.state.source;
|
|
143
|
+
|
|
144
|
+
const spacingAction = scanSpacingToken(src, this.state.pos);
|
|
145
|
+
if (spacingAction) {
|
|
146
|
+
this.emitTokenAction(spacingAction);
|
|
147
|
+
return;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const punctuation = scanPunctuationToken({
|
|
151
|
+
char,
|
|
152
|
+
source: src,
|
|
153
|
+
pos: this.state.pos,
|
|
154
|
+
lineStart: isLineStart,
|
|
155
|
+
splitBlockClose: this.splitBlockClosePositions.has(this.state.pos),
|
|
156
|
+
findInvalidAnchorNameEnd: () => this.findInvalidAnchorNameEnd(),
|
|
157
|
+
});
|
|
158
|
+
if (punctuation.handled) {
|
|
159
|
+
if (punctuation.clearSplitBlockCloseAt !== undefined) {
|
|
160
|
+
this.splitBlockClosePositions.delete(punctuation.clearSplitBlockCloseAt);
|
|
161
|
+
}
|
|
162
|
+
if (punctuation.splitBlockCloseAt !== undefined) {
|
|
163
|
+
this.splitBlockClosePositions.add(punctuation.splitBlockCloseAt);
|
|
164
|
+
}
|
|
165
|
+
this.emitTokenActions(punctuation.actions);
|
|
166
|
+
return;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Quoted string (only after EQUALS for block attribute values)
|
|
170
|
+
// In inline context (outside of a `[[...]]` opener), `"` is just a
|
|
171
|
+
// text character (typographic quote). Without the depth gate, an
|
|
172
|
+
// inline `=` followed by `"` (e.g. `[[footnote]]="[[/footnote]]`)
|
|
173
|
+
// would otherwise eat the closing tag.
|
|
174
|
+
if (char === '"') {
|
|
175
|
+
const lastNonWs = this.lastNonWhitespaceTokenType();
|
|
176
|
+
if (this.blockOpenerDepth > 0 && lastNonWs === "EQUALS") {
|
|
177
|
+
this.addToken("QUOTED_STRING", scanQuotedString(this.state));
|
|
178
|
+
return;
|
|
179
|
+
}
|
|
180
|
+
this.advance();
|
|
181
|
+
this.addToken("TEXT", '"');
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const simpleAction = scanSimpleSyntaxToken(src, this.state.pos, isLineStart);
|
|
186
|
+
if (simpleAction) {
|
|
187
|
+
this.emitTokenAction(simpleAction);
|
|
188
|
+
return;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
if (this.options.compactTextRuns && this.blockOpenerDepth === 0) {
|
|
192
|
+
const compactTextAction = scanCompactTextToken(src, this.state.pos);
|
|
193
|
+
if (compactTextAction) {
|
|
194
|
+
this.emitTokenAction(compactTextAction);
|
|
195
|
+
return;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
this.emitTokenAction(scanTextToken(src, this.state.pos));
|
|
200
|
+
}
|
|
201
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Configuration for the {@link Lexer}.
|
|
3
|
+
*
|
|
4
|
+
* @group Lexer
|
|
5
|
+
*/
|
|
6
|
+
export interface LexerOptions {
|
|
7
|
+
/**
|
|
8
|
+
* When `true` (default), every token carries accurate line/column/offset
|
|
9
|
+
* data. Set to `false` to skip position tracking for faster tokenisation
|
|
10
|
+
* when source-map information is not needed.
|
|
11
|
+
*/
|
|
12
|
+
trackPositions?: boolean;
|
|
13
|
+
/**
|
|
14
|
+
* Coalesce ordinary text outside `[[...]]` openers into larger TEXT tokens.
|
|
15
|
+
* This keeps block names and attributes tokenized normally while reducing
|
|
16
|
+
* token volume for large documents.
|
|
17
|
+
*/
|
|
18
|
+
compactTextRuns?: boolean;
|
|
19
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import {
|
|
2
|
+
scanAtToken,
|
|
3
|
+
scanClosingBracketToken,
|
|
4
|
+
scanDashToken,
|
|
5
|
+
scanGreaterToken,
|
|
6
|
+
scanOpeningBracketToken,
|
|
7
|
+
scanPipeToken,
|
|
8
|
+
scanTildeToken,
|
|
9
|
+
type TokenAction,
|
|
10
|
+
} from "./token-actions";
|
|
11
|
+
|
|
12
|
+
export interface PunctuationScanInput {
|
|
13
|
+
char: string;
|
|
14
|
+
source: string;
|
|
15
|
+
pos: number;
|
|
16
|
+
lineStart: boolean;
|
|
17
|
+
splitBlockClose: boolean;
|
|
18
|
+
findInvalidAnchorNameEnd: () => number | null;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export type PunctuationScanResult =
|
|
22
|
+
| { handled: false }
|
|
23
|
+
| {
|
|
24
|
+
handled: true;
|
|
25
|
+
actions: TokenAction | TokenAction[];
|
|
26
|
+
splitBlockCloseAt?: number;
|
|
27
|
+
clearSplitBlockCloseAt?: number;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
export function scanPunctuationToken(input: PunctuationScanInput): PunctuationScanResult {
|
|
31
|
+
const { char, source, pos, lineStart } = input;
|
|
32
|
+
|
|
33
|
+
switch (char) {
|
|
34
|
+
case "[": {
|
|
35
|
+
const action = scanOpeningBracketToken(source, pos, input.findInvalidAnchorNameEnd());
|
|
36
|
+
return {
|
|
37
|
+
handled: true,
|
|
38
|
+
actions: action,
|
|
39
|
+
splitBlockCloseAt: action.splitBlockCloseAt,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
case "]":
|
|
44
|
+
return {
|
|
45
|
+
handled: true,
|
|
46
|
+
actions: scanClosingBracketToken(source, pos, input.splitBlockClose),
|
|
47
|
+
clearSplitBlockCloseAt: input.splitBlockClose ? pos : undefined,
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
case "@":
|
|
51
|
+
return { handled: true, actions: scanAtToken(source, pos) };
|
|
52
|
+
|
|
53
|
+
case ">":
|
|
54
|
+
return { handled: true, actions: scanGreaterToken(source, pos, lineStart) };
|
|
55
|
+
|
|
56
|
+
case "-":
|
|
57
|
+
return { handled: true, actions: scanDashToken(source, pos, lineStart) };
|
|
58
|
+
|
|
59
|
+
case "~": {
|
|
60
|
+
const action = scanTildeToken(source, pos, lineStart);
|
|
61
|
+
return action ? { handled: true, actions: action } : { handled: false };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
case "|":
|
|
65
|
+
return { handled: true, actions: scanPipeToken(source, pos) };
|
|
66
|
+
|
|
67
|
+
default:
|
|
68
|
+
return { handled: false };
|
|
69
|
+
}
|
|
70
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { advance, current, isAtEnd, type LexerState } from "./state";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Scan a quoted block-attribute value, including the opening quote and optional
|
|
5
|
+
* closing quote. Newline terminates the token without being consumed.
|
|
6
|
+
*/
|
|
7
|
+
export function scanQuotedString(state: LexerState): string {
|
|
8
|
+
let quoted = advance(state);
|
|
9
|
+
while (!isAtEnd(state) && current(state) !== '"' && current(state) !== "\n") {
|
|
10
|
+
quoted += advance(state);
|
|
11
|
+
}
|
|
12
|
+
if (current(state) === '"') {
|
|
13
|
+
quoted += advance(state);
|
|
14
|
+
}
|
|
15
|
+
return quoted;
|
|
16
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
const MIN_PLAIN_TEXT_RUN_LENGTH = 32;
|
|
2
|
+
|
|
3
|
+
export function findWhitespaceRunEnd(src: string, pos: number): number {
|
|
4
|
+
let end = pos + 1;
|
|
5
|
+
while (end < src.length && (src[end] === " " || src[end] === "\t")) {
|
|
6
|
+
end++;
|
|
7
|
+
}
|
|
8
|
+
return end;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function findRepeatedCharRunEnd(src: string, pos: number, char: string): number {
|
|
12
|
+
let end = pos + 1;
|
|
13
|
+
while (end < src.length && src[end] === char) {
|
|
14
|
+
end++;
|
|
15
|
+
}
|
|
16
|
+
return end;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export function findLongPlainTextRunEnd(src: string, pos: number): number | null {
|
|
20
|
+
let end = pos;
|
|
21
|
+
while (end < src.length) {
|
|
22
|
+
const code = src.charCodeAt(end);
|
|
23
|
+
if (code <= 0x7f || code === 0xe000) {
|
|
24
|
+
break;
|
|
25
|
+
}
|
|
26
|
+
end++;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
return end - pos >= MIN_PLAIN_TEXT_RUN_LENGTH ? end : null;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function findAsciiIdentifierEnd(src: string, pos: number): number {
|
|
33
|
+
let end = pos + 1;
|
|
34
|
+
while (end < src.length && isAsciiAlphanumericCode(src.charCodeAt(end))) {
|
|
35
|
+
end++;
|
|
36
|
+
}
|
|
37
|
+
return end;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function isAsciiAlphanumericCode(code: number): boolean {
|
|
41
|
+
return (code >= 48 && code <= 57) || (code >= 65 && code <= 90) || (code >= 97 && code <= 122);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function findCompactPlainTextRunEnd(src: string, pos: number): number {
|
|
45
|
+
let end = pos;
|
|
46
|
+
while (end < src.length) {
|
|
47
|
+
const code = src.charCodeAt(end);
|
|
48
|
+
if (isCompactPlainTextBoundary(code)) {
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
end++;
|
|
52
|
+
}
|
|
53
|
+
return end;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function isCompactPlainTextBoundary(code: number): boolean {
|
|
57
|
+
switch (code) {
|
|
58
|
+
case 0x0a: // \n
|
|
59
|
+
case 0xe000: // preprocessed backslash break marker
|
|
60
|
+
case 0x5b: // [
|
|
61
|
+
case 0x5d: // ]
|
|
62
|
+
case 0x40: // @
|
|
63
|
+
case 0x3e: // >
|
|
64
|
+
case 0x2d: // -
|
|
65
|
+
case 0x7e: // ~
|
|
66
|
+
case 0x7c: // |
|
|
67
|
+
case 0x7b: // {
|
|
68
|
+
case 0x7d: // }
|
|
69
|
+
case 0x2a: // *
|
|
70
|
+
case 0x3c: // <
|
|
71
|
+
case 0x5f: // _
|
|
72
|
+
case 0x5e: // ^
|
|
73
|
+
case 0x2c: // ,
|
|
74
|
+
case 0x2f: // /
|
|
75
|
+
case 0x2b: // +
|
|
76
|
+
case 0x23: // #
|
|
77
|
+
case 0x3d: // =
|
|
78
|
+
case 0x3a: // :
|
|
79
|
+
case 0x26: // &
|
|
80
|
+
case 0x5c: // \
|
|
81
|
+
return true;
|
|
82
|
+
default:
|
|
83
|
+
return false;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { findWhitespaceRunEnd } from "./runs";
|
|
2
|
+
import type { TokenAction } from "./token-actions";
|
|
3
|
+
|
|
4
|
+
export function scanSpacingToken(src: string, pos: number): TokenAction | null {
|
|
5
|
+
const char = src[pos];
|
|
6
|
+
|
|
7
|
+
if (char === "\n") {
|
|
8
|
+
return token("NEWLINE", "\n");
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
if (char === " " || char === "\t") {
|
|
12
|
+
return runToken(src, pos, findWhitespaceRunEnd(src, pos), "WHITESPACE");
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
return null;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function token(type: TokenAction["type"], value: string): TokenAction {
|
|
19
|
+
return { type, value, length: value.length };
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function runToken(src: string, pos: number, end: number, type: TokenAction["type"]): TokenAction {
|
|
23
|
+
return { type, value: src.slice(pos, end), length: end - pos };
|
|
24
|
+
}
|