@wdprlib/parser 3.1.2 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +10456 -8230
- package/dist/index.d.cts +313 -337
- package/dist/index.d.ts +313 -337
- package/dist/index.js +10460 -8234
- package/package.json +5 -3
- package/src/index.ts +170 -0
- package/src/lexer/anchor.ts +48 -0
- package/src/lexer/index.ts +21 -0
- package/src/lexer/lexer.ts +201 -0
- package/src/lexer/options.ts +19 -0
- package/src/lexer/punctuation.ts +70 -0
- package/src/lexer/quoted-string.ts +16 -0
- package/src/lexer/runs.ts +85 -0
- package/src/lexer/spacing-actions.ts +24 -0
- package/src/lexer/state.ts +103 -0
- package/src/lexer/syntax-actions.ts +80 -0
- package/src/lexer/text-actions.ts +41 -0
- package/src/lexer/token-actions.ts +136 -0
- package/src/lexer/token-factory.ts +62 -0
- package/src/lexer/tokenize.ts +18 -0
- package/src/lexer/tokens.ts +141 -0
- package/src/parser/constants.ts +175 -0
- package/src/parser/depth/index.ts +111 -0
- package/src/parser/depth/stack.ts +82 -0
- package/src/parser/index.ts +18 -0
- package/src/parser/parse/block.ts +42 -0
- package/src/parser/parse/context.ts +26 -0
- package/src/parser/parse/footnotes.ts +25 -0
- package/src/parser/parse/index.ts +42 -0
- package/src/parser/parse/options.ts +34 -0
- package/src/parser/parse/parser.ts +79 -0
- package/src/parser/parse/plain-non-ascii.ts +129 -0
- package/src/parser/parse/result.ts +57 -0
- package/src/parser/parse/source.ts +11 -0
- package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
- package/src/parser/postprocess/index.ts +15 -0
- package/src/parser/postprocess/spanStrip/clean-element.ts +168 -0
- package/src/parser/postprocess/spanStrip/cleanup.ts +25 -0
- package/src/parser/postprocess/spanStrip/empty-spans.ts +36 -0
- package/src/parser/postprocess/spanStrip/escaped.ts +78 -0
- package/src/parser/postprocess/spanStrip/factory.ts +23 -0
- package/src/parser/postprocess/spanStrip/index.ts +8 -0
- package/src/parser/postprocess/spanStrip/merge.ts +117 -0
- package/src/parser/postprocess/spanStrip/predicates.ts +59 -0
- package/src/parser/postprocess/spanStrip/split.ts +67 -0
- package/src/parser/preprocess/expr/chars.ts +15 -0
- package/src/parser/preprocess/expr/evaluate.ts +22 -0
- package/src/parser/preprocess/expr/index.ts +45 -0
- package/src/parser/preprocess/expr/kind.ts +19 -0
- package/src/parser/preprocess/expr/parse.ts +103 -0
- package/src/parser/preprocess/expr/scan.ts +34 -0
- package/src/parser/preprocess/expr/types.ts +14 -0
- package/src/parser/preprocess/index.ts +38 -0
- package/src/parser/preprocess/typography.ts +132 -0
- package/src/parser/preprocess/utils/bracket-depths.ts +98 -0
- package/src/parser/preprocess/utils/index.ts +13 -0
- package/src/parser/preprocess/utils/raw-regions.ts +153 -0
- package/src/parser/preprocess/whitespace/detection.ts +39 -0
- package/src/parser/preprocess/whitespace/index.ts +79 -0
- package/src/parser/preprocess/whitespace/leading-spaces.ts +11 -0
- package/src/parser/preprocess/whitespace/patterns.ts +23 -0
- package/src/parser/rules/block/align/body.ts +46 -0
- package/src/parser/rules/block/align/element.ts +13 -0
- package/src/parser/rules/block/align/index.ts +90 -0
- package/src/parser/rules/block/align/syntax.ts +113 -0
- package/src/parser/rules/block/bibliography/body.ts +81 -0
- package/src/parser/rules/block/bibliography/entries.ts +49 -0
- package/src/parser/rules/block/bibliography/entry-content.ts +73 -0
- package/src/parser/rules/block/bibliography/entry-key.ts +83 -0
- package/src/parser/rules/block/bibliography/index.ts +90 -0
- package/src/parser/rules/block/bibliography/open.ts +53 -0
- package/src/parser/rules/block/block-list/bare-content.ts +105 -0
- package/src/parser/rules/block/block-list/bare-paragraph.ts +60 -0
- package/src/parser/rules/block/block-list/index.ts +51 -0
- package/src/parser/rules/block/block-list/item-content.ts +132 -0
- package/src/parser/rules/block/block-list/li-content.ts +107 -0
- package/src/parser/rules/block/block-list/li-item.ts +77 -0
- package/src/parser/rules/block/block-list/list-block.ts +100 -0
- package/src/parser/rules/block/block-list/open.ts +51 -0
- package/src/parser/rules/block/block-list/tags.ts +50 -0
- package/src/parser/rules/block/blockquote/build.ts +62 -0
- package/src/parser/rules/block/blockquote/index.ts +80 -0
- package/src/parser/rules/block/blockquote/line.ts +79 -0
- package/src/parser/rules/block/blockquote/lines.ts +39 -0
- package/src/parser/rules/block/center/index.ts +72 -0
- package/src/parser/rules/block/center/open.ts +27 -0
- package/src/parser/rules/block/clear-float/index.ts +51 -0
- package/src/parser/rules/block/clear-float/syntax.ts +43 -0
- package/src/parser/rules/block/code/attributes.ts +30 -0
- package/src/parser/rules/block/code/content.ts +57 -0
- package/src/parser/rules/block/code/index.ts +100 -0
- package/src/parser/rules/block/collapsible/attributes.ts +95 -0
- package/src/parser/rules/block/collapsible/body.ts +69 -0
- package/src/parser/rules/block/collapsible/index.ts +117 -0
- package/src/parser/rules/block/collapsible/open.ts +51 -0
- package/src/parser/rules/block/collapsible/orphans.ts +31 -0
- package/src/parser/rules/block/collapsible/tags.ts +17 -0
- package/src/parser/rules/block/comment/consume.ts +37 -0
- package/src/parser/rules/block/comment/index.ts +47 -0
- package/src/parser/rules/block/content-separator/index.ts +49 -0
- package/src/parser/rules/block/content-separator/syntax.ts +33 -0
- package/src/parser/rules/block/definition-list/collect.ts +40 -0
- package/src/parser/rules/block/definition-list/index.ts +63 -0
- package/src/parser/rules/block/definition-list/item-key.ts +95 -0
- package/src/parser/rules/block/definition-list/item-value.ts +56 -0
- package/src/parser/rules/block/definition-list/items.ts +54 -0
- package/src/parser/rules/block/div/body.ts +41 -0
- package/src/parser/rules/block/div/close.ts +41 -0
- package/src/parser/rules/block/div/failed.ts +117 -0
- package/src/parser/rules/block/div/index.ts +112 -0
- package/src/parser/rules/block/div/nesting.ts +37 -0
- package/src/parser/rules/block/div/open.ts +59 -0
- package/src/parser/rules/block/div/paragraph-strip.ts +44 -0
- package/src/parser/rules/block/embed-block/content.ts +53 -0
- package/src/parser/rules/block/embed-block/index.ts +91 -0
- package/src/parser/rules/block/embed-block/open.ts +52 -0
- package/src/parser/rules/block/embed-block/tags.ts +5 -0
- package/src/parser/rules/block/footnoteblock/attributes.ts +73 -0
- package/src/parser/rules/block/footnoteblock/index.ts +82 -0
- package/src/parser/rules/block/footnoteblock/open.ts +53 -0
- package/src/parser/rules/block/heading/index.ts +87 -0
- package/src/parser/rules/block/heading/open.ts +50 -0
- package/src/parser/rules/block/heading/toc-text.ts +26 -0
- package/src/parser/rules/block/horizontal-rule/index.ts +44 -0
- package/src/parser/rules/block/horizontal-rule/syntax.ts +21 -0
- package/src/parser/rules/block/html/body.ts +114 -0
- package/src/parser/rules/block/html/diagnostics.ts +11 -0
- package/src/parser/rules/block/html/index.ts +95 -0
- package/src/parser/rules/block/html/open.ts +36 -0
- package/src/parser/rules/block/iframe/attributes.ts +106 -0
- package/src/parser/rules/block/iframe/index.ts +73 -0
- package/src/parser/rules/block/iframe/open.ts +58 -0
- package/src/parser/rules/block/iframe/source.ts +24 -0
- package/src/parser/rules/block/iframe/url.ts +38 -0
- package/src/parser/rules/block/iftags/body.ts +48 -0
- package/src/parser/rules/block/iftags/condition.ts +24 -0
- package/src/parser/rules/block/iftags/index.ts +108 -0
- package/src/parser/rules/block/include/arguments.ts +48 -0
- package/src/parser/rules/block/include/index.ts +75 -0
- package/src/parser/rules/block/include/location.ts +24 -0
- package/src/parser/rules/block/include/variables.ts +37 -0
- package/src/parser/rules/block/index.ts +127 -0
- package/src/parser/rules/block/list/index.ts +73 -0
- package/src/parser/rules/block/list/line.ts +77 -0
- package/src/parser/rules/block/list/native.ts +89 -0
- package/src/parser/rules/block/math/content.ts +54 -0
- package/src/parser/rules/block/math/index.ts +106 -0
- package/src/parser/rules/block/math/name.ts +35 -0
- package/src/parser/rules/block/module/backlinks/index.ts +31 -0
- package/src/parser/rules/block/module/backlinks/types.ts +21 -0
- package/src/parser/rules/block/module/body.ts +92 -0
- package/src/parser/rules/block/module/categories/index.ts +34 -0
- package/src/parser/rules/block/module/categories/types.ts +21 -0
- package/src/parser/rules/block/module/css/index.ts +37 -0
- package/src/parser/rules/block/module/element.ts +33 -0
- package/src/parser/rules/block/module/iftags/condition.ts +109 -0
- package/src/parser/rules/block/module/iftags/index.ts +26 -0
- package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
- package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
- package/src/parser/rules/block/module/iftags/types.ts +63 -0
- package/src/parser/rules/block/module/include/directive.ts +91 -0
- package/src/parser/rules/block/module/include/index.ts +29 -0
- package/src/parser/rules/block/module/include/references.ts +42 -0
- package/src/parser/rules/block/module/include/resolve/cache.ts +44 -0
- package/src/parser/rules/block/module/include/resolve/index.ts +106 -0
- package/src/parser/rules/block/module/include/resolve/iterate.ts +202 -0
- package/src/parser/rules/block/module/include/resolve/replace.ts +31 -0
- package/src/parser/rules/block/module/include/resolve/types.ts +105 -0
- package/src/parser/rules/block/module/include/scanner.ts +121 -0
- package/src/parser/rules/block/module/index.ts +134 -0
- package/src/parser/rules/block/module/join/index.ts +34 -0
- package/src/parser/rules/block/module/join/types.ts +23 -0
- package/src/parser/rules/block/module/listpages/compiler.ts +73 -0
- package/src/parser/rules/block/module/listpages/extract.ts +76 -0
- package/src/parser/rules/block/module/listpages/extraction/listpages.ts +42 -0
- package/src/parser/rules/block/module/listpages/extraction/listusers.ts +30 -0
- package/src/parser/rules/block/module/listpages/extraction/query.ts +51 -0
- package/src/parser/rules/block/module/listpages/extraction/result.ts +18 -0
- package/src/parser/rules/block/module/listpages/extraction/template.ts +96 -0
- package/src/parser/rules/block/module/listpages/extraction/variables.ts +58 -0
- package/src/parser/rules/block/module/listpages/index.ts +83 -0
- package/src/parser/rules/block/module/listpages/normalization/date-selector.ts +53 -0
- package/src/parser/rules/block/module/listpages/normalization/numeric-selector.ts +32 -0
- package/src/parser/rules/block/module/listpages/normalization/order-parent.ts +82 -0
- package/src/parser/rules/block/module/listpages/normalization/selectors.ts +2 -0
- package/src/parser/rules/block/module/listpages/normalization/tags-category.ts +86 -0
- package/src/parser/rules/block/module/listpages/normalize.ts +74 -0
- package/src/parser/rules/block/module/listpages/parser.ts +106 -0
- package/src/parser/rules/block/module/listpages/resolution/items.ts +43 -0
- package/src/parser/rules/block/module/listpages/resolution/wrapper.ts +42 -0
- package/src/parser/rules/block/module/listpages/resolve.ts +60 -0
- package/src/parser/rules/block/module/listpages/template/format/content.ts +41 -0
- package/src/parser/rules/block/module/listpages/template/format/date.ts +116 -0
- package/src/parser/rules/block/module/listpages/template/format/index.ts +4 -0
- package/src/parser/rules/block/module/listpages/template/format/tags.ts +7 -0
- package/src/parser/rules/block/module/listpages/template/format/user.ts +9 -0
- package/src/parser/rules/block/module/listpages/template/getters/index.ts +36 -0
- package/src/parser/rules/block/module/listpages/template/getters/parameterized.ts +60 -0
- package/src/parser/rules/block/module/listpages/template/getters/simple.ts +65 -0
- package/src/parser/rules/block/module/listpages/template/getters/types.ts +3 -0
- package/src/parser/rules/block/module/listpages/template/syntax.ts +97 -0
- package/src/parser/rules/block/module/listpages/types/data-fetcher.ts +15 -0
- package/src/parser/rules/block/module/listpages/types/data-requirements.ts +52 -0
- package/src/parser/rules/block/module/listpages/types/external-data.ts +77 -0
- package/src/parser/rules/block/module/listpages/types/index.ts +17 -0
- package/src/parser/rules/block/module/listpages/types/normalized-query.ts +120 -0
- package/src/parser/rules/block/module/listpages/types/query.ts +67 -0
- package/src/parser/rules/block/module/listpages/types/template.ts +17 -0
- package/src/parser/rules/block/module/listpages/types/variables.ts +69 -0
- package/src/parser/rules/block/module/listpages/url-resolution/fields.ts +48 -0
- package/src/parser/rules/block/module/listpages/url-resolution/params.ts +19 -0
- package/src/parser/rules/block/module/listpages/url-resolution/query.ts +24 -0
- package/src/parser/rules/block/module/listpages/url-resolution/resolve.ts +53 -0
- package/src/parser/rules/block/module/listpages/url-resolution/value.ts +25 -0
- package/src/parser/rules/block/module/listpages/url-resolver.ts +29 -0
- package/src/parser/rules/block/module/listusers/compiler.ts +56 -0
- package/src/parser/rules/block/module/listusers/extract.ts +40 -0
- package/src/parser/rules/block/module/listusers/getters.ts +21 -0
- package/src/parser/rules/block/module/listusers/index.ts +36 -0
- package/src/parser/rules/block/module/listusers/parser.ts +54 -0
- package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
- package/src/parser/rules/block/module/listusers/types.ts +93 -0
- package/src/parser/rules/block/module/listusers/variables.ts +15 -0
- package/src/parser/rules/block/module/mapping.ts +61 -0
- package/src/parser/rules/block/module/open.ts +57 -0
- package/src/parser/rules/block/module/page-tree/index.ts +38 -0
- package/src/parser/rules/block/module/page-tree/types.ts +29 -0
- package/src/parser/rules/block/module/rate/index.ts +28 -0
- package/src/parser/rules/block/module/rate/types.ts +19 -0
- package/src/parser/rules/block/module/resolution/contexts.ts +78 -0
- package/src/parser/rules/block/module/resolution/data-maps.ts +39 -0
- package/src/parser/rules/block/module/resolution/dynamic-modules.ts +93 -0
- package/src/parser/rules/block/module/resolution/styles.ts +53 -0
- package/src/parser/rules/block/module/resolution/walk-resolve.ts +107 -0
- package/src/parser/rules/block/module/resolve.ts +198 -0
- package/src/parser/rules/block/module/rule.ts +56 -0
- package/src/parser/rules/block/module/types-common.ts +70 -0
- package/src/parser/rules/block/module/types.ts +61 -0
- package/src/parser/rules/block/module/utils.ts +43 -0
- package/src/parser/rules/block/module/walk/children.ts +35 -0
- package/src/parser/rules/block/module/walk/index.ts +9 -0
- package/src/parser/rules/block/module/walk/map/index.ts +2 -0
- package/src/parser/rules/block/module/walk/map/stateful-definition-list.ts +25 -0
- package/src/parser/rules/block/module/walk/map/stateful-list.ts +40 -0
- package/src/parser/rules/block/module/walk/map/stateful-table.ts +23 -0
- package/src/parser/rules/block/module/walk/map/stateful-tabs.ts +19 -0
- package/src/parser/rules/block/module/walk/map/stateful.ts +71 -0
- package/src/parser/rules/block/module/walk/map/stateless-definition-list.ts +12 -0
- package/src/parser/rules/block/module/walk/map/stateless-list.ts +29 -0
- package/src/parser/rules/block/module/walk/map/stateless-table.ts +11 -0
- package/src/parser/rules/block/module/walk/map/stateless-tabs.ts +5 -0
- package/src/parser/rules/block/module/walk/map/stateless.ts +51 -0
- package/src/parser/rules/block/module/walk/map/types.ts +6 -0
- package/src/parser/rules/block/module/walk/traverse.ts +65 -0
- package/src/parser/rules/block/orphan-li/content.ts +60 -0
- package/src/parser/rules/block/orphan-li/index.ts +75 -0
- package/src/parser/rules/block/orphan-li/open.ts +25 -0
- package/src/parser/rules/block/orphan-li/tags.ts +40 -0
- package/src/parser/rules/block/paragraph/content.ts +12 -0
- package/src/parser/rules/block/paragraph/index.ts +60 -0
- package/src/parser/rules/block/paragraph/normalize.ts +52 -0
- package/src/parser/rules/block/paragraph/span-markers.ts +52 -0
- package/src/parser/rules/block/parsing/attributes/index.ts +32 -0
- package/src/parser/rules/block/parsing/attributes/names.ts +93 -0
- package/src/parser/rules/block/parsing/attributes/scanner.ts +75 -0
- package/src/parser/rules/block/parsing/attributes/values.ts +26 -0
- package/src/parser/rules/block/parsing/block-item.ts +29 -0
- package/src/parser/rules/block/parsing/content.ts +127 -0
- package/src/parser/rules/block/parsing/end-condition.ts +51 -0
- package/src/parser/rules/block/parsing/inline-content.ts +105 -0
- package/src/parser/rules/block/parsing/inline-newline.ts +41 -0
- package/src/parser/rules/block/parsing/non-boundary.ts +24 -0
- package/src/parser/rules/block/parsing/rule-dispatch.ts +44 -0
- package/src/parser/rules/block/table/index.ts +80 -0
- package/src/parser/rules/block/table/pipe/cell-start.ts +69 -0
- package/src/parser/rules/block/table/pipe/cell.ts +106 -0
- package/src/parser/rules/block/table/pipe/index.ts +2 -0
- package/src/parser/rules/block/table/pipe/row.ts +88 -0
- package/src/parser/rules/block/table/pipe/tokens.ts +14 -0
- package/src/parser/rules/block/table/pipe/trim.ts +50 -0
- package/src/parser/rules/block/table-block/body.ts +79 -0
- package/src/parser/rules/block/table-block/cell-attributes.ts +33 -0
- package/src/parser/rules/block/table-block/cell-boundary.ts +99 -0
- package/src/parser/rules/block/table-block/cell-content/index.ts +88 -0
- package/src/parser/rules/block/table-block/cell-content/segments.ts +134 -0
- package/src/parser/rules/block/table-block/cell-newline.ts +47 -0
- package/src/parser/rules/block/table-block/cell.ts +64 -0
- package/src/parser/rules/block/table-block/index.ts +113 -0
- package/src/parser/rules/block/table-block/row-boundary.ts +75 -0
- package/src/parser/rules/block/table-block/structure.ts +80 -0
- package/src/parser/rules/block/tabview/body.ts +64 -0
- package/src/parser/rules/block/tabview/index.ts +90 -0
- package/src/parser/rules/block/tabview/open.ts +50 -0
- package/src/parser/rules/block/tabview/tab.ts +92 -0
- package/src/parser/rules/block/tabview/tags.ts +30 -0
- package/src/parser/rules/block/toc/element.ts +11 -0
- package/src/parser/rules/block/toc/index.ts +44 -0
- package/src/parser/rules/block/toc/open.ts +84 -0
- package/src/parser/rules/block/utils.ts +15 -0
- package/src/parser/rules/common/attribute-safety.ts +109 -0
- package/src/parser/rules/common/block-name.ts +33 -0
- package/src/parser/rules/common/index.ts +2 -0
- package/src/parser/rules/contracts/index.ts +3 -0
- package/src/parser/rules/contracts/parse-context.ts +38 -0
- package/src/parser/rules/contracts/rule.ts +43 -0
- package/src/parser/rules/contracts/scope.ts +31 -0
- package/src/parser/rules/index.ts +49 -0
- package/src/parser/rules/inline/anchor/attributes.ts +54 -0
- package/src/parser/rules/inline/anchor/child.ts +26 -0
- package/src/parser/rules/inline/anchor/close.ts +34 -0
- package/src/parser/rules/inline/anchor/content.ts +59 -0
- package/src/parser/rules/inline/anchor/index.ts +103 -0
- package/src/parser/rules/inline/anchor/newline.ts +26 -0
- package/src/parser/rules/inline/anchor/open.ts +47 -0
- package/src/parser/rules/inline/anchor/paragraph-strip.ts +14 -0
- package/src/parser/rules/inline/anchor/syntax.ts +40 -0
- package/src/parser/rules/inline/anchor-name/index.ts +38 -0
- package/src/parser/rules/inline/anchor-name/name.ts +39 -0
- package/src/parser/rules/inline/anchor-name/syntax.ts +46 -0
- package/src/parser/rules/inline/bibcite/element.ts +14 -0
- package/src/parser/rules/inline/bibcite/index.ts +34 -0
- package/src/parser/rules/inline/bibcite/syntax.ts +64 -0
- package/src/parser/rules/inline/bold.ts +49 -0
- package/src/parser/rules/inline/color/index.ts +35 -0
- package/src/parser/rules/inline/color/syntax.ts +69 -0
- package/src/parser/rules/inline/comment/consume.ts +31 -0
- package/src/parser/rules/inline/comment/index.ts +64 -0
- package/src/parser/rules/inline/equation-ref/element.ts +8 -0
- package/src/parser/rules/inline/equation-ref/index.ts +34 -0
- package/src/parser/rules/inline/equation-ref/syntax.ts +45 -0
- package/src/parser/rules/inline/expr/branch.ts +104 -0
- package/src/parser/rules/inline/expr/conditional-branch.ts +27 -0
- package/src/parser/rules/inline/expr/conditional.ts +80 -0
- package/src/parser/rules/inline/expr/depth.ts +25 -0
- package/src/parser/rules/inline/expr/elements.ts +39 -0
- package/src/parser/rules/inline/expr/index.ts +84 -0
- package/src/parser/rules/inline/expr/syntax.ts +45 -0
- package/src/parser/rules/inline/footnote/child.ts +22 -0
- package/src/parser/rules/inline/footnote/close.ts +33 -0
- package/src/parser/rules/inline/footnote/content.ts +54 -0
- package/src/parser/rules/inline/footnote/elements.ts +38 -0
- package/src/parser/rules/inline/footnote/index.ts +54 -0
- package/src/parser/rules/inline/footnote/newline.ts +27 -0
- package/src/parser/rules/inline/footnote/open.ts +38 -0
- package/src/parser/rules/inline/formatting/container.ts +50 -0
- package/src/parser/rules/inline/guillemet/index.ts +56 -0
- package/src/parser/rules/inline/guillemet/text.ts +11 -0
- package/src/parser/rules/inline/html/gate.ts +64 -0
- package/src/parser/rules/inline/html/index.ts +81 -0
- package/src/parser/rules/inline/html/open.ts +37 -0
- package/src/parser/rules/inline/image/attributes.ts +22 -0
- package/src/parser/rules/inline/image/body.ts +36 -0
- package/src/parser/rules/inline/image/index.ts +89 -0
- package/src/parser/rules/inline/image/open.ts +56 -0
- package/src/parser/rules/inline/image/source.ts +62 -0
- package/src/parser/rules/inline/image/syntax.ts +76 -0
- package/src/parser/rules/inline/index.ts +150 -0
- package/src/parser/rules/inline/italic.ts +46 -0
- package/src/parser/rules/inline/line-break/backslash.ts +58 -0
- package/src/parser/rules/inline/line-break/elements.ts +9 -0
- package/src/parser/rules/inline/line-break/index.ts +3 -0
- package/src/parser/rules/inline/line-break/newline.ts +82 -0
- package/src/parser/rules/inline/line-break/underscore.ts +45 -0
- package/src/parser/rules/inline/link-anchor.ts +72 -0
- package/src/parser/rules/inline/link-bracket/anchor.ts +3 -0
- package/src/parser/rules/inline/link-bracket/direct-url.ts +5 -0
- package/src/parser/rules/inline/link-bracket/parsed.ts +81 -0
- package/src/parser/rules/inline/link-bracket/parts.ts +64 -0
- package/src/parser/rules/inline/link-bracket/prefix.ts +15 -0
- package/src/parser/rules/inline/link-single.ts +73 -0
- package/src/parser/rules/inline/link-star.ts +72 -0
- package/src/parser/rules/inline/link-triple/fallback.ts +10 -0
- package/src/parser/rules/inline/link-triple/index.ts +62 -0
- package/src/parser/rules/inline/link-triple/interwiki.ts +11 -0
- package/src/parser/rules/inline/link-triple/label.ts +35 -0
- package/src/parser/rules/inline/link-triple/syntax.ts +72 -0
- package/src/parser/rules/inline/link-triple/target.ts +36 -0
- package/src/parser/rules/inline/math-inline/index.ts +40 -0
- package/src/parser/rules/inline/math-inline/syntax.ts +55 -0
- package/src/parser/rules/inline/monospace.ts +50 -0
- package/src/parser/rules/inline/parsing/block-boundary.ts +42 -0
- package/src/parser/rules/inline/parsing/block-start-predicates.ts +117 -0
- package/src/parser/rules/inline/parsing/collect.ts +23 -0
- package/src/parser/rules/inline/parsing/inline-content.ts +115 -0
- package/src/parser/rules/inline/parsing/paragraph-boundary.ts +47 -0
- package/src/parser/rules/inline/parsing/plain-text.ts +69 -0
- package/src/parser/rules/inline/parsing/preserved-line-break.ts +11 -0
- package/src/parser/rules/inline/parsing/rules.ts +34 -0
- package/src/parser/rules/inline/parsing/simple-token.ts +26 -0
- package/src/parser/rules/inline/raw/angle.ts +40 -0
- package/src/parser/rules/inline/raw/double-at.ts +78 -0
- package/src/parser/rules/inline/raw/index.ts +26 -0
- package/src/parser/rules/inline/raw/result.ts +26 -0
- package/src/parser/rules/inline/size/content.ts +65 -0
- package/src/parser/rules/inline/size/index.ts +55 -0
- package/src/parser/rules/inline/size/open.ts +43 -0
- package/src/parser/rules/inline/size/value.ts +45 -0
- package/src/parser/rules/inline/span/content.ts +97 -0
- package/src/parser/rules/inline/span/elements.ts +108 -0
- package/src/parser/rules/inline/span/index.ts +79 -0
- package/src/parser/rules/inline/span/newline.ts +50 -0
- package/src/parser/rules/inline/span/syntax.ts +70 -0
- package/src/parser/rules/inline/strikethrough/index.ts +60 -0
- package/src/parser/rules/inline/strikethrough/parse.ts +14 -0
- package/src/parser/rules/inline/strikethrough/syntax.ts +24 -0
- package/src/parser/rules/inline/subscript.ts +47 -0
- package/src/parser/rules/inline/superscript.ts +49 -0
- package/src/parser/rules/inline/text/element.ts +5 -0
- package/src/parser/rules/inline/text/index.ts +85 -0
- package/src/parser/rules/inline/underline/child.ts +26 -0
- package/src/parser/rules/inline/underline/content.ts +29 -0
- package/src/parser/rules/inline/underline/index.ts +84 -0
- package/src/parser/rules/inline/user/element.ts +11 -0
- package/src/parser/rules/inline/user/index.ts +34 -0
- package/src/parser/rules/inline/user/syntax.ts +67 -0
- package/src/parser/rules/inline/utils.ts +4 -0
- package/src/parser/rules/tokens.ts +106 -0
- package/src/parser/rules/types.ts +9 -0
- package/src/parser/toc.ts +130 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Parser constants that define structural boundaries in Wikidot markup.
|
|
4
|
+
*
|
|
5
|
+
* These constants are used by the paragraph rule to determine when a new
|
|
6
|
+
* block-level construct begins, which terminates the current paragraph.
|
|
7
|
+
* When any of these token types appear at the start of a line, the parser
|
|
8
|
+
* stops collecting inline content for the current paragraph and begins
|
|
9
|
+
* processing the new block element.
|
|
10
|
+
*
|
|
11
|
+
* @module
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import type { TokenType } from "../lexer";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Token types that signal the start of a block-level construct in Wikidot markup.
|
|
18
|
+
*
|
|
19
|
+
* When the parser encounters any of these tokens at the beginning of a line while
|
|
20
|
+
* building a paragraph, it stops the paragraph and delegates to the appropriate
|
|
21
|
+
* block rule. Each token maps to a specific Wikidot syntax element (documented
|
|
22
|
+
* inline with comments).
|
|
23
|
+
*/
|
|
24
|
+
export const BLOCK_START_TOKENS: TokenType[] = [
|
|
25
|
+
"BLOCKQUOTE_MARKER",
|
|
26
|
+
"LIST_BULLET",
|
|
27
|
+
"LIST_NUMBER",
|
|
28
|
+
"HEADING_MARKER",
|
|
29
|
+
"HR_MARKER",
|
|
30
|
+
"TABLE_MARKER",
|
|
31
|
+
"COLON", // Definition list
|
|
32
|
+
"BLOCK_OPEN", // [[footnoteblock]], [[div]], etc.
|
|
33
|
+
"BLOCK_END_OPEN", // [[/div]], [[/collapsible]], etc.
|
|
34
|
+
"EQUALS", // Center align (= text) or content separator (====)
|
|
35
|
+
"CLEAR_FLOAT", // ~~~~
|
|
36
|
+
"CLEAR_FLOAT_LEFT", // ~~~~<
|
|
37
|
+
"CLEAR_FLOAT_RIGHT", // ~~~~>
|
|
38
|
+
];
|
|
39
|
+
|
|
40
|
+
export const BLOCK_START_TOKEN_SET: ReadonlySet<TokenType> = new Set(BLOCK_START_TOKENS);
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Set of block names recognized by the parser at `[[name]]` / `[[/name]]`.
|
|
44
|
+
*
|
|
45
|
+
* Used by inline-parser logic to distinguish real block boundaries from
|
|
46
|
+
* unknown tokens like `[[foo]]`, which Wikidot treats as inline text
|
|
47
|
+
* rather than as a paragraph-breaking block.
|
|
48
|
+
*
|
|
49
|
+
* Keep in sync with the set of block rules registered in
|
|
50
|
+
* `packages/parser/src/parser/rules/block/index.ts`. Align-style markers
|
|
51
|
+
* (`<`, `>`, `=`, `==`) are intentionally included because `[[<]]` etc.
|
|
52
|
+
* open `alignRule`.
|
|
53
|
+
*/
|
|
54
|
+
export const KNOWN_BLOCK_NAMES: ReadonlySet<string> = new Set<string>([
|
|
55
|
+
// structural containers
|
|
56
|
+
"collapsible",
|
|
57
|
+
"div",
|
|
58
|
+
"div_",
|
|
59
|
+
"code",
|
|
60
|
+
// list blocks
|
|
61
|
+
"ul",
|
|
62
|
+
"ol",
|
|
63
|
+
"li",
|
|
64
|
+
// table blocks
|
|
65
|
+
"table",
|
|
66
|
+
"row",
|
|
67
|
+
"cell",
|
|
68
|
+
"hcell",
|
|
69
|
+
// tabview / module
|
|
70
|
+
"tabview",
|
|
71
|
+
"tabs",
|
|
72
|
+
"module",
|
|
73
|
+
"module654",
|
|
74
|
+
// misc named blocks
|
|
75
|
+
"bibliography",
|
|
76
|
+
"footnoteblock",
|
|
77
|
+
"toc",
|
|
78
|
+
"iframe",
|
|
79
|
+
"math",
|
|
80
|
+
"html",
|
|
81
|
+
"iftags",
|
|
82
|
+
"include",
|
|
83
|
+
"f", // float TOC prefix: `[[f<toc]]`, `[[f>toc]]` (see toc rule)
|
|
84
|
+
// embed family
|
|
85
|
+
"embed",
|
|
86
|
+
"embedvideo",
|
|
87
|
+
"embedaudio",
|
|
88
|
+
// align markers
|
|
89
|
+
"<",
|
|
90
|
+
">",
|
|
91
|
+
"=",
|
|
92
|
+
"==",
|
|
93
|
+
// inline-level constructs that use BLOCK_OPEN tokens; recognized here so
|
|
94
|
+
// that the paragraph parser keeps existing block-boundary behavior for
|
|
95
|
+
// `[[span]]`, `[[user ...]]`, `[[$ ... $]]`, etc. when they appear at
|
|
96
|
+
// the start of a line.
|
|
97
|
+
"span",
|
|
98
|
+
"span_",
|
|
99
|
+
"user",
|
|
100
|
+
"a",
|
|
101
|
+
"anchor",
|
|
102
|
+
"size",
|
|
103
|
+
"footnote",
|
|
104
|
+
"eref",
|
|
105
|
+
"$",
|
|
106
|
+
"image",
|
|
107
|
+
"gallery",
|
|
108
|
+
"file",
|
|
109
|
+
]);
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Block names whose rule sets `requiresLineStart: false`, i.e. they can
|
|
113
|
+
* legitimately start a block even when the `[[...]]` opener is preceded
|
|
114
|
+
* by leading whitespace on its line.
|
|
115
|
+
*
|
|
116
|
+
* Used by the inline parser to decide whether a `\n<indent>[[name]]`
|
|
117
|
+
* sequence ends the current paragraph. Without this list, the inline
|
|
118
|
+
* parser would either:
|
|
119
|
+
* - keep `lineStart` strict and miss legitimately-indented container
|
|
120
|
+
* blocks (Wikidot accepts e.g. `\n [[div_]]`); the inner block
|
|
121
|
+
* gets absorbed into the parent paragraph as literal text, or
|
|
122
|
+
* - drop the `lineStart` check entirely and prematurely break out of
|
|
123
|
+
* paragraphs for `\n [[toc]]` — a rule with `requiresLineStart: true`
|
|
124
|
+
* would refuse the indented token, leaving the paragraph split but
|
|
125
|
+
* the block unconsumed (literal `[[toc]]` text in a new paragraph).
|
|
126
|
+
*
|
|
127
|
+
* Each entry corresponds to a name handled by a block rule whose
|
|
128
|
+
* `requiresLineStart` is `false`. Keep this list in sync when adding or
|
|
129
|
+
* changing such rules; the inline-level constructs that happen to share
|
|
130
|
+
* `BLOCK_OPEN` (`[[span]]`, `[[image]]`, `[[user]]`, etc.) are
|
|
131
|
+
* intentionally excluded — they remain inline and should not split
|
|
132
|
+
* paragraphs based on indentation alone.
|
|
133
|
+
*
|
|
134
|
+
* Sources (block rule → handled names):
|
|
135
|
+
* - `bibliographyRule` → bibliography
|
|
136
|
+
* - `blockListRule` → ul, ol, li
|
|
137
|
+
* - `codeRule` → code
|
|
138
|
+
* - `collapsibleRule` → collapsible
|
|
139
|
+
* - `divRule` → div, div_
|
|
140
|
+
* - `embedBlockRule` → embed, embedvideo, embedaudio
|
|
141
|
+
* - `htmlRule` → html
|
|
142
|
+
* - `iframeRule` → iframe
|
|
143
|
+
* - `iftagsRule` → iftags
|
|
144
|
+
* - `mathRule` → math
|
|
145
|
+
* - `moduleRule` → module, module654
|
|
146
|
+
* - `orphanLiRule` → li (also under blockListRule)
|
|
147
|
+
* - `tableBlockRule` → table (row, cell, hcell are private to the in-table
|
|
148
|
+
* parser, never accepted by the top-level dispatcher)
|
|
149
|
+
* - `tabviewRule` → tabview, tabs (tab is private to the in-tabview parser)
|
|
150
|
+
*
|
|
151
|
+
* `includeRule` is omitted because `[[include ...]]` is expanded as a
|
|
152
|
+
* text-level macro by `resolveIncludes` before the parser sees it.
|
|
153
|
+
*/
|
|
154
|
+
export const INDENT_ACCEPTING_BLOCK_NAMES: ReadonlySet<string> = new Set<string>([
|
|
155
|
+
"bibliography",
|
|
156
|
+
"ul",
|
|
157
|
+
"ol",
|
|
158
|
+
"li",
|
|
159
|
+
"code",
|
|
160
|
+
"collapsible",
|
|
161
|
+
"div",
|
|
162
|
+
"div_",
|
|
163
|
+
"embed",
|
|
164
|
+
"embedvideo",
|
|
165
|
+
"embedaudio",
|
|
166
|
+
"html",
|
|
167
|
+
"iframe",
|
|
168
|
+
"iftags",
|
|
169
|
+
"math",
|
|
170
|
+
"module",
|
|
171
|
+
"module654",
|
|
172
|
+
"table",
|
|
173
|
+
"tabview",
|
|
174
|
+
"tabs",
|
|
175
|
+
]);
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Depth processing module for converting flat lists into nested tree structures.
|
|
4
|
+
*
|
|
5
|
+
* This is a TypeScript port of Wikidot's `depth.rs`. It handles the conversion
|
|
6
|
+
* of flat depth-annotated items (such as bullet/numbered list entries at various
|
|
7
|
+
* indentation levels) into properly nested tree structures. The algorithm uses an
|
|
8
|
+
* internal stack to track open nesting levels and collapses them as depth decreases.
|
|
9
|
+
*
|
|
10
|
+
* Used primarily by the list parser and the table-of-contents builder to transform
|
|
11
|
+
* flat sequences of items with depth annotations into hierarchical AST structures.
|
|
12
|
+
*
|
|
13
|
+
* @module
|
|
14
|
+
*/
|
|
15
|
+
import { DepthStack } from "./stack";
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Represents a single node in a depth tree.
|
|
19
|
+
*
|
|
20
|
+
* A node is either a leaf item containing a value, or a nested list containing
|
|
21
|
+
* children. This recursive type allows arbitrarily deep nesting.
|
|
22
|
+
*
|
|
23
|
+
* @typeParam L - The list type discriminator (e.g., "bullet" vs "number" for lists,
|
|
24
|
+
* or `null` when list type distinction is not needed)
|
|
25
|
+
* @typeParam T - The type of leaf item values
|
|
26
|
+
*/
|
|
27
|
+
export type DepthItem<L, T> =
|
|
28
|
+
| { kind: "item"; value: T }
|
|
29
|
+
| { kind: "list"; ltype: L; children: DepthList<L, T> };
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* An ordered collection of depth tree nodes at the same level.
|
|
33
|
+
*
|
|
34
|
+
* @typeParam L - The list type discriminator
|
|
35
|
+
* @typeParam T - The type of leaf item values
|
|
36
|
+
*/
|
|
37
|
+
export type DepthList<L, T> = DepthItem<L, T>[];
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Process a flat list of depth-annotated items into nested tree structures.
|
|
41
|
+
*
|
|
42
|
+
* This is the main entry point for the depth module. It takes a sequence of items,
|
|
43
|
+
* each annotated with a nesting depth and a list type, and produces one or more
|
|
44
|
+
* nested trees. Multiple trees are produced when the list type changes at the
|
|
45
|
+
* root level (depth 0).
|
|
46
|
+
*
|
|
47
|
+
* The algorithm iterates through items sequentially, using a stack to track
|
|
48
|
+
* open nesting levels. When depth increases, new levels are pushed; when depth
|
|
49
|
+
* decreases, levels are popped and collapsed into their parent. When the list
|
|
50
|
+
* type changes at the same depth, the current list is finalized and a new one begins.
|
|
51
|
+
*
|
|
52
|
+
* @typeParam L - The list type discriminator
|
|
53
|
+
* @typeParam T - The type of leaf item values
|
|
54
|
+
* @param topLtype - The default list type for the root level
|
|
55
|
+
* @param items - Flat sequence of depth-annotated items, where each item has:
|
|
56
|
+
* - `depth`: the 0-based nesting level
|
|
57
|
+
* - `ltype`: the list type for grouping (e.g., "bullet" vs "number")
|
|
58
|
+
* - `value`: the actual item content
|
|
59
|
+
* @param ltypeEquals - Equality comparator for list types (defaults to `===`)
|
|
60
|
+
* @returns Array of finished trees, each with an `ltype` and a `list` of nested items.
|
|
61
|
+
* Multiple trees are returned when the list type changes at depth 0.
|
|
62
|
+
*/
|
|
63
|
+
export function processDepths<L, T>(
|
|
64
|
+
topLtype: L,
|
|
65
|
+
items: Array<{ depth: number; ltype: L; value: T }>,
|
|
66
|
+
ltypeEquals: (a: L, b: L) => boolean = (a, b) => a === b,
|
|
67
|
+
): Array<{ ltype: L; list: DepthList<L, T> }> {
|
|
68
|
+
const stack = new DepthStack<L, T>(topLtype);
|
|
69
|
+
|
|
70
|
+
// The depth value for the previous item
|
|
71
|
+
let previous = 0;
|
|
72
|
+
|
|
73
|
+
// Iterate through each of the items
|
|
74
|
+
for (const { depth, ltype, value } of items) {
|
|
75
|
+
// Add or remove new depth levels as appropriate,
|
|
76
|
+
// based on what our new depth value is compared
|
|
77
|
+
// to the value in the previous iteration.
|
|
78
|
+
//
|
|
79
|
+
// If previous == depth, then neither of these for loops will run
|
|
80
|
+
// If previous < depth, then only the first will run
|
|
81
|
+
// If previous > depth, then only the second will run
|
|
82
|
+
|
|
83
|
+
// Open new levels
|
|
84
|
+
for (let i = previous; i < depth; i++) {
|
|
85
|
+
stack.increaseDepth(ltype);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Close existing levels
|
|
89
|
+
for (let i = depth; i < previous; i++) {
|
|
90
|
+
stack.decreaseDepth();
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Create new level if the type doesn't match
|
|
94
|
+
//
|
|
95
|
+
// Here we decrease and increase the depth to close
|
|
96
|
+
// the current layer, then make a new one with the
|
|
97
|
+
// type this item has.
|
|
98
|
+
//
|
|
99
|
+
// We'll keep appending to this remade layer until
|
|
100
|
+
// we hit a different depth or a different type.
|
|
101
|
+
if (!ltypeEquals(stack.lastType(), ltype)) {
|
|
102
|
+
stack.newList(ltype);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Push element and update state
|
|
106
|
+
stack.pushItem(value);
|
|
107
|
+
previous = depth;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return stack.intoTrees();
|
|
111
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import type { DepthItem, DepthList } from "../depth";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Internal stack-based builder for constructing depth trees incrementally.
|
|
5
|
+
*/
|
|
6
|
+
export class DepthStack<L, T> {
|
|
7
|
+
private finished: Array<{ ltype: L; list: DepthList<L, T> }> = [];
|
|
8
|
+
private stack: Array<{ ltype: L; items: DepthItem<L, T>[] }>;
|
|
9
|
+
|
|
10
|
+
constructor(topLtype: L) {
|
|
11
|
+
this.stack = [{ ltype: topLtype, items: [] }];
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
private get last(): { ltype: L; items: DepthItem<L, T>[] } {
|
|
15
|
+
return this.stack[this.stack.length - 1]!;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
private get first(): { ltype: L; items: DepthItem<L, T>[] } {
|
|
19
|
+
return this.stack[0]!;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
private isSingle(): boolean {
|
|
23
|
+
return this.stack.length === 1;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
increaseDepth(ltype: L): void {
|
|
27
|
+
this.stack.push({ ltype, items: [] });
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
decreaseDepth(): void {
|
|
31
|
+
const popped = this.stack.pop();
|
|
32
|
+
if (!popped) {
|
|
33
|
+
throw new Error("No depth to pop off!");
|
|
34
|
+
}
|
|
35
|
+
this.push({ kind: "list", ltype: popped.ltype, children: popped.items });
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
newList(ltype: L): void {
|
|
39
|
+
if (this.isSingle()) {
|
|
40
|
+
this.finishDepthList(ltype);
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
this.decreaseDepth();
|
|
45
|
+
this.increaseDepth(ltype);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
pushItem(item: T): void {
|
|
49
|
+
this.push({ kind: "item", value: item });
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
lastType(): L {
|
|
53
|
+
return this.last.ltype;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
intoTrees(): Array<{ ltype: L; list: DepthList<L, T> }> {
|
|
57
|
+
this.finishDepthList(null);
|
|
58
|
+
return this.finished;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
private push(item: DepthItem<L, T>): void {
|
|
62
|
+
this.last.items.push(item);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
private finishDepthList(newLtype: L | null): void {
|
|
66
|
+
while (this.stack.length > 1) {
|
|
67
|
+
this.decreaseDepth();
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const first = this.first;
|
|
71
|
+
const ltype = first.ltype;
|
|
72
|
+
const list = first.items;
|
|
73
|
+
const actualNewLtype = newLtype ?? ltype;
|
|
74
|
+
|
|
75
|
+
first.ltype = actualNewLtype;
|
|
76
|
+
first.items = [];
|
|
77
|
+
|
|
78
|
+
if (list.length > 0) {
|
|
79
|
+
this.finished.push({ ltype, list });
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Main parser for Wikidot markup.
|
|
4
|
+
*
|
|
5
|
+
* The parser consumes a token stream from the lexer and produces an AST
|
|
6
|
+
* (Abstract Syntax Tree) conforming to the `@wdprlib/ast` package types.
|
|
7
|
+
* It applies block rules and inline rules in a recursive-descent fashion,
|
|
8
|
+
* followed by post-processing passes for paragraph merging and cleanup.
|
|
9
|
+
*
|
|
10
|
+
* The main entry points are:
|
|
11
|
+
* - `parse()` - convenience function that parses a string in one call
|
|
12
|
+
* - `Parser` class - for more control over parsing options
|
|
13
|
+
*
|
|
14
|
+
* @module
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
export type { ParserOptions } from "./parse";
|
|
18
|
+
export { Parser, parse } from "./parse";
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import type { Element } from "@wdprlib/ast";
|
|
2
|
+
import type { ParseContext } from "../rules";
|
|
3
|
+
import { getCandidateBlockRules } from "../rules/block/utils";
|
|
4
|
+
|
|
5
|
+
export function parseNextBlock(
|
|
6
|
+
ctx: ParseContext,
|
|
7
|
+
skipWhitespace: () => void,
|
|
8
|
+
isAtEnd: () => boolean,
|
|
9
|
+
): Element[] {
|
|
10
|
+
skipWhitespace();
|
|
11
|
+
|
|
12
|
+
if (isAtEnd()) {
|
|
13
|
+
return [];
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
const token = ctx.tokens[ctx.pos];
|
|
17
|
+
if (!token) {
|
|
18
|
+
return [];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
if (token.type === "NEWLINE") {
|
|
22
|
+
ctx.pos++;
|
|
23
|
+
return [];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
for (const rule of getCandidateBlockRules(ctx.blockRules, token)) {
|
|
27
|
+
const result = rule.parse(ctx);
|
|
28
|
+
if (result.success) {
|
|
29
|
+
ctx.pos += result.consumed;
|
|
30
|
+
return result.elements;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const result = ctx.blockFallbackRule.parse(ctx);
|
|
35
|
+
if (result.success && result.elements.length > 0) {
|
|
36
|
+
ctx.pos += result.consumed;
|
|
37
|
+
return result.elements;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
ctx.pos++;
|
|
41
|
+
return [];
|
|
42
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { DEFAULT_SETTINGS } from "@wdprlib/ast";
|
|
2
|
+
import type { Token } from "../../lexer";
|
|
3
|
+
import { blockFallbackRule, blockRules, inlineRules, type ParseContext } from "../rules";
|
|
4
|
+
import type { ParserOptions } from "./options";
|
|
5
|
+
|
|
6
|
+
export function createParseContext(tokens: Token[], options: ParserOptions = {}): ParseContext {
|
|
7
|
+
return {
|
|
8
|
+
tokens,
|
|
9
|
+
pos: 0,
|
|
10
|
+
version: options.version ?? "wikidot",
|
|
11
|
+
trackPositions: options.trackPositions ?? true,
|
|
12
|
+
settings: options.settings ?? DEFAULT_SETTINGS,
|
|
13
|
+
footnotes: [],
|
|
14
|
+
tocEntries: [],
|
|
15
|
+
codeBlocks: [],
|
|
16
|
+
htmlBlocks: [],
|
|
17
|
+
bibcites: [],
|
|
18
|
+
diagnostics: [],
|
|
19
|
+
blockRules,
|
|
20
|
+
blockFallbackRule,
|
|
21
|
+
inlineRules,
|
|
22
|
+
scope: {
|
|
23
|
+
footnoteBlockParsed: false,
|
|
24
|
+
},
|
|
25
|
+
};
|
|
26
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { Element } from "@wdprlib/ast";
|
|
2
|
+
import { walkElements } from "../rules/block/module/walk";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Return `true` when an explicit `footnote-block` element exists anywhere
|
|
6
|
+
* in the tree.
|
|
7
|
+
*
|
|
8
|
+
* Reuses the parser's general-purpose {@link walkElements} so descent
|
|
9
|
+
* into containers, collapsibles, list items, table cells, tab panels,
|
|
10
|
+
* definition lists, etc. matches the rest of the parser exactly.
|
|
11
|
+
*
|
|
12
|
+
* Note on `[[iftags]]`: the walker also descends into iftags bodies, so
|
|
13
|
+
* a `[[footnoteblock]]` that only renders conditionally still suppresses
|
|
14
|
+
* the auto-append. This matches the previous behaviour for unresolved
|
|
15
|
+
* iftags but means the implicit block does not reappear if the iftags
|
|
16
|
+
* condition evaluates to false during `resolveModules`. Tracked as a
|
|
17
|
+
* known limitation; iftags preprocessing would fix it.
|
|
18
|
+
*/
|
|
19
|
+
export function containsFootnoteBlock(elements: Element[]): boolean {
|
|
20
|
+
let found = false;
|
|
21
|
+
walkElements(elements, (element) => {
|
|
22
|
+
if (element.element === "footnote-block") found = true;
|
|
23
|
+
});
|
|
24
|
+
return found;
|
|
25
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import type { ParseResult } from "@wdprlib/ast";
|
|
2
|
+
import { tokenize } from "../../lexer";
|
|
3
|
+
import { Parser } from "./parser";
|
|
4
|
+
import { parseLargePlainTextDocument, parsePlainNonAsciiDocument } from "./plain-non-ascii";
|
|
5
|
+
import { prepareSourceForParse } from "./source";
|
|
6
|
+
import type { ParserOptions } from "./options";
|
|
7
|
+
|
|
8
|
+
const COMPACT_TEXT_RUN_SOURCE_LENGTH = 100_000;
|
|
9
|
+
|
|
10
|
+
export type { ParserOptions } from "./options";
|
|
11
|
+
export { Parser } from "./parser";
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Parse a Wikidot markup string into an AST with diagnostics.
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```ts
|
|
18
|
+
* import { parse } from "@wdprlib/parser";
|
|
19
|
+
*
|
|
20
|
+
* const { ast, diagnostics } = parse("**bold** and //italic//");
|
|
21
|
+
* ```
|
|
22
|
+
*
|
|
23
|
+
* @since 2.0.0
|
|
24
|
+
*/
|
|
25
|
+
export function parse(source: string, options?: ParserOptions): ParseResult {
|
|
26
|
+
const plainResult = parsePlainNonAsciiDocument(source);
|
|
27
|
+
if (plainResult) {
|
|
28
|
+
return plainResult;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const preprocessed = prepareSourceForParse(source, options);
|
|
32
|
+
const largePlainResult = parseLargePlainTextDocument(preprocessed);
|
|
33
|
+
if (largePlainResult) {
|
|
34
|
+
return largePlainResult;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const tokens = tokenize(preprocessed, {
|
|
38
|
+
trackPositions: options?.trackPositions,
|
|
39
|
+
compactTextRuns: preprocessed.length >= COMPACT_TEXT_RUN_SOURCE_LENGTH,
|
|
40
|
+
});
|
|
41
|
+
return new Parser(tokens, options).parse();
|
|
42
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import type { WikitextSettings } from "@wdprlib/ast";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Configuration for the {@link Parser} and the {@link parse} function.
|
|
5
|
+
*
|
|
6
|
+
* All fields are optional; sensible defaults are applied when omitted.
|
|
7
|
+
*
|
|
8
|
+
* @group Parser
|
|
9
|
+
*/
|
|
10
|
+
export interface ParserOptions {
|
|
11
|
+
/** Markup dialect. Currently only `"wikidot"` is supported. */
|
|
12
|
+
version?: "wikidot";
|
|
13
|
+
/**
|
|
14
|
+
* Propagate source-position data into every AST node.
|
|
15
|
+
* Defaults to `true`. Set to `false` for smaller output when positions
|
|
16
|
+
* are not needed.
|
|
17
|
+
*/
|
|
18
|
+
trackPositions?: boolean;
|
|
19
|
+
/**
|
|
20
|
+
* Context-dependent feature flags (page vs. forum-post, etc.).
|
|
21
|
+
* Defaults to {@link DEFAULT_SETTINGS} (full page mode).
|
|
22
|
+
*/
|
|
23
|
+
settings?: WikitextSettings;
|
|
24
|
+
/**
|
|
25
|
+
* Page tags consulted when expanding `[[iftags]]` directives that are
|
|
26
|
+
* embedded inside another block's opener.
|
|
27
|
+
*
|
|
28
|
+
* Values:
|
|
29
|
+
* - omitted / `undefined`: no preprocess pass.
|
|
30
|
+
* - `null`: opener-embedded iftags only, evaluated as if the page has no tags.
|
|
31
|
+
* - `string[]`: every iftags block is evaluated against the given tags eagerly.
|
|
32
|
+
*/
|
|
33
|
+
pageTags?: string[] | null;
|
|
34
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import type { Element, ParseResult } from "@wdprlib/ast";
|
|
2
|
+
import type { Token } from "../../lexer";
|
|
3
|
+
import type { ParseContext } from "../rules";
|
|
4
|
+
import type { ParserOptions } from "./options";
|
|
5
|
+
import { parseNextBlock } from "./block";
|
|
6
|
+
import { createParseContext } from "./context";
|
|
7
|
+
import { finalizeParseResult } from "./result";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Converts a token stream into a Wikidot {@link SyntaxTree}.
|
|
11
|
+
*
|
|
12
|
+
* The parser consumes tokens produced by the `Lexer` and emits a tree of
|
|
13
|
+
* {@link Element} nodes. Block-level rules are tried in priority order; when
|
|
14
|
+
* none match, the fallback paragraph rule collects inline tokens until the
|
|
15
|
+
* next blank line.
|
|
16
|
+
*
|
|
17
|
+
* For most use-cases the standalone {@link parse} function is simpler than
|
|
18
|
+
* constructing a `Parser` directly.
|
|
19
|
+
*
|
|
20
|
+
* @group Parser
|
|
21
|
+
*/
|
|
22
|
+
export class Parser {
|
|
23
|
+
private ctx: ParseContext;
|
|
24
|
+
|
|
25
|
+
constructor(tokens: Token[], options: ParserOptions = {}) {
|
|
26
|
+
this.ctx = createParseContext(tokens, options);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Parse tokens into a {@link ParseResult} containing the AST and
|
|
31
|
+
* any diagnostics emitted during parsing.
|
|
32
|
+
*
|
|
33
|
+
* @since 2.0.0
|
|
34
|
+
*/
|
|
35
|
+
parse(): ParseResult {
|
|
36
|
+
const children: Element[] = [];
|
|
37
|
+
|
|
38
|
+
while (!this.isAtEnd()) {
|
|
39
|
+
const blocks = this.parseBlock();
|
|
40
|
+
children.push(...blocks);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return finalizeParseResult(this.ctx, children);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
private isAtEnd(): boolean {
|
|
47
|
+
return this.ctx.pos >= this.ctx.tokens.length || this.currentToken().type === "EOF";
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
private currentToken(): Token {
|
|
51
|
+
return this.ctx.tokens[this.ctx.pos] ?? this.eofToken();
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
private eofToken(): Token {
|
|
55
|
+
return {
|
|
56
|
+
type: "EOF",
|
|
57
|
+
value: "",
|
|
58
|
+
position: {
|
|
59
|
+
start: { line: 0, column: 0, offset: 0 },
|
|
60
|
+
end: { line: 0, column: 0, offset: 0 },
|
|
61
|
+
},
|
|
62
|
+
lineStart: false,
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
private skipWhitespace(): void {
|
|
67
|
+
while (this.currentToken().type === "WHITESPACE") {
|
|
68
|
+
this.ctx.pos++;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
private parseBlock(): Element[] {
|
|
73
|
+
return parseNextBlock(
|
|
74
|
+
this.ctx,
|
|
75
|
+
() => this.skipWhitespace(),
|
|
76
|
+
() => this.isAtEnd(),
|
|
77
|
+
);
|
|
78
|
+
}
|
|
79
|
+
}
|