@wdprlib/parser 3.1.2 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +10456 -8230
- package/dist/index.d.cts +313 -337
- package/dist/index.d.ts +313 -337
- package/dist/index.js +10460 -8234
- package/package.json +5 -3
- package/src/index.ts +170 -0
- package/src/lexer/anchor.ts +48 -0
- package/src/lexer/index.ts +21 -0
- package/src/lexer/lexer.ts +201 -0
- package/src/lexer/options.ts +19 -0
- package/src/lexer/punctuation.ts +70 -0
- package/src/lexer/quoted-string.ts +16 -0
- package/src/lexer/runs.ts +85 -0
- package/src/lexer/spacing-actions.ts +24 -0
- package/src/lexer/state.ts +103 -0
- package/src/lexer/syntax-actions.ts +80 -0
- package/src/lexer/text-actions.ts +41 -0
- package/src/lexer/token-actions.ts +136 -0
- package/src/lexer/token-factory.ts +62 -0
- package/src/lexer/tokenize.ts +18 -0
- package/src/lexer/tokens.ts +141 -0
- package/src/parser/constants.ts +175 -0
- package/src/parser/depth/index.ts +111 -0
- package/src/parser/depth/stack.ts +82 -0
- package/src/parser/index.ts +18 -0
- package/src/parser/parse/block.ts +42 -0
- package/src/parser/parse/context.ts +26 -0
- package/src/parser/parse/footnotes.ts +25 -0
- package/src/parser/parse/index.ts +42 -0
- package/src/parser/parse/options.ts +34 -0
- package/src/parser/parse/parser.ts +79 -0
- package/src/parser/parse/plain-non-ascii.ts +129 -0
- package/src/parser/parse/result.ts +57 -0
- package/src/parser/parse/source.ts +11 -0
- package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
- package/src/parser/postprocess/index.ts +15 -0
- package/src/parser/postprocess/spanStrip/clean-element.ts +168 -0
- package/src/parser/postprocess/spanStrip/cleanup.ts +25 -0
- package/src/parser/postprocess/spanStrip/empty-spans.ts +36 -0
- package/src/parser/postprocess/spanStrip/escaped.ts +78 -0
- package/src/parser/postprocess/spanStrip/factory.ts +23 -0
- package/src/parser/postprocess/spanStrip/index.ts +8 -0
- package/src/parser/postprocess/spanStrip/merge.ts +117 -0
- package/src/parser/postprocess/spanStrip/predicates.ts +59 -0
- package/src/parser/postprocess/spanStrip/split.ts +67 -0
- package/src/parser/preprocess/expr/chars.ts +15 -0
- package/src/parser/preprocess/expr/evaluate.ts +22 -0
- package/src/parser/preprocess/expr/index.ts +45 -0
- package/src/parser/preprocess/expr/kind.ts +19 -0
- package/src/parser/preprocess/expr/parse.ts +103 -0
- package/src/parser/preprocess/expr/scan.ts +34 -0
- package/src/parser/preprocess/expr/types.ts +14 -0
- package/src/parser/preprocess/index.ts +38 -0
- package/src/parser/preprocess/typography.ts +132 -0
- package/src/parser/preprocess/utils/bracket-depths.ts +98 -0
- package/src/parser/preprocess/utils/index.ts +13 -0
- package/src/parser/preprocess/utils/raw-regions.ts +153 -0
- package/src/parser/preprocess/whitespace/detection.ts +39 -0
- package/src/parser/preprocess/whitespace/index.ts +79 -0
- package/src/parser/preprocess/whitespace/leading-spaces.ts +11 -0
- package/src/parser/preprocess/whitespace/patterns.ts +23 -0
- package/src/parser/rules/block/align/body.ts +46 -0
- package/src/parser/rules/block/align/element.ts +13 -0
- package/src/parser/rules/block/align/index.ts +90 -0
- package/src/parser/rules/block/align/syntax.ts +113 -0
- package/src/parser/rules/block/bibliography/body.ts +81 -0
- package/src/parser/rules/block/bibliography/entries.ts +49 -0
- package/src/parser/rules/block/bibliography/entry-content.ts +73 -0
- package/src/parser/rules/block/bibliography/entry-key.ts +83 -0
- package/src/parser/rules/block/bibliography/index.ts +90 -0
- package/src/parser/rules/block/bibliography/open.ts +53 -0
- package/src/parser/rules/block/block-list/bare-content.ts +105 -0
- package/src/parser/rules/block/block-list/bare-paragraph.ts +60 -0
- package/src/parser/rules/block/block-list/index.ts +51 -0
- package/src/parser/rules/block/block-list/item-content.ts +132 -0
- package/src/parser/rules/block/block-list/li-content.ts +107 -0
- package/src/parser/rules/block/block-list/li-item.ts +77 -0
- package/src/parser/rules/block/block-list/list-block.ts +100 -0
- package/src/parser/rules/block/block-list/open.ts +51 -0
- package/src/parser/rules/block/block-list/tags.ts +50 -0
- package/src/parser/rules/block/blockquote/build.ts +62 -0
- package/src/parser/rules/block/blockquote/index.ts +80 -0
- package/src/parser/rules/block/blockquote/line.ts +79 -0
- package/src/parser/rules/block/blockquote/lines.ts +39 -0
- package/src/parser/rules/block/center/index.ts +72 -0
- package/src/parser/rules/block/center/open.ts +27 -0
- package/src/parser/rules/block/clear-float/index.ts +51 -0
- package/src/parser/rules/block/clear-float/syntax.ts +43 -0
- package/src/parser/rules/block/code/attributes.ts +30 -0
- package/src/parser/rules/block/code/content.ts +57 -0
- package/src/parser/rules/block/code/index.ts +100 -0
- package/src/parser/rules/block/collapsible/attributes.ts +95 -0
- package/src/parser/rules/block/collapsible/body.ts +69 -0
- package/src/parser/rules/block/collapsible/index.ts +117 -0
- package/src/parser/rules/block/collapsible/open.ts +51 -0
- package/src/parser/rules/block/collapsible/orphans.ts +31 -0
- package/src/parser/rules/block/collapsible/tags.ts +17 -0
- package/src/parser/rules/block/comment/consume.ts +37 -0
- package/src/parser/rules/block/comment/index.ts +47 -0
- package/src/parser/rules/block/content-separator/index.ts +49 -0
- package/src/parser/rules/block/content-separator/syntax.ts +33 -0
- package/src/parser/rules/block/definition-list/collect.ts +40 -0
- package/src/parser/rules/block/definition-list/index.ts +63 -0
- package/src/parser/rules/block/definition-list/item-key.ts +95 -0
- package/src/parser/rules/block/definition-list/item-value.ts +56 -0
- package/src/parser/rules/block/definition-list/items.ts +54 -0
- package/src/parser/rules/block/div/body.ts +41 -0
- package/src/parser/rules/block/div/close.ts +41 -0
- package/src/parser/rules/block/div/failed.ts +117 -0
- package/src/parser/rules/block/div/index.ts +112 -0
- package/src/parser/rules/block/div/nesting.ts +37 -0
- package/src/parser/rules/block/div/open.ts +59 -0
- package/src/parser/rules/block/div/paragraph-strip.ts +44 -0
- package/src/parser/rules/block/embed-block/content.ts +53 -0
- package/src/parser/rules/block/embed-block/index.ts +91 -0
- package/src/parser/rules/block/embed-block/open.ts +52 -0
- package/src/parser/rules/block/embed-block/tags.ts +5 -0
- package/src/parser/rules/block/footnoteblock/attributes.ts +73 -0
- package/src/parser/rules/block/footnoteblock/index.ts +82 -0
- package/src/parser/rules/block/footnoteblock/open.ts +53 -0
- package/src/parser/rules/block/heading/index.ts +87 -0
- package/src/parser/rules/block/heading/open.ts +50 -0
- package/src/parser/rules/block/heading/toc-text.ts +26 -0
- package/src/parser/rules/block/horizontal-rule/index.ts +44 -0
- package/src/parser/rules/block/horizontal-rule/syntax.ts +21 -0
- package/src/parser/rules/block/html/body.ts +114 -0
- package/src/parser/rules/block/html/diagnostics.ts +11 -0
- package/src/parser/rules/block/html/index.ts +95 -0
- package/src/parser/rules/block/html/open.ts +36 -0
- package/src/parser/rules/block/iframe/attributes.ts +106 -0
- package/src/parser/rules/block/iframe/index.ts +73 -0
- package/src/parser/rules/block/iframe/open.ts +58 -0
- package/src/parser/rules/block/iframe/source.ts +24 -0
- package/src/parser/rules/block/iframe/url.ts +38 -0
- package/src/parser/rules/block/iftags/body.ts +48 -0
- package/src/parser/rules/block/iftags/condition.ts +24 -0
- package/src/parser/rules/block/iftags/index.ts +108 -0
- package/src/parser/rules/block/include/arguments.ts +48 -0
- package/src/parser/rules/block/include/index.ts +75 -0
- package/src/parser/rules/block/include/location.ts +24 -0
- package/src/parser/rules/block/include/variables.ts +37 -0
- package/src/parser/rules/block/index.ts +127 -0
- package/src/parser/rules/block/list/index.ts +73 -0
- package/src/parser/rules/block/list/line.ts +77 -0
- package/src/parser/rules/block/list/native.ts +89 -0
- package/src/parser/rules/block/math/content.ts +54 -0
- package/src/parser/rules/block/math/index.ts +106 -0
- package/src/parser/rules/block/math/name.ts +35 -0
- package/src/parser/rules/block/module/backlinks/index.ts +31 -0
- package/src/parser/rules/block/module/backlinks/types.ts +21 -0
- package/src/parser/rules/block/module/body.ts +92 -0
- package/src/parser/rules/block/module/categories/index.ts +34 -0
- package/src/parser/rules/block/module/categories/types.ts +21 -0
- package/src/parser/rules/block/module/css/index.ts +37 -0
- package/src/parser/rules/block/module/element.ts +33 -0
- package/src/parser/rules/block/module/iftags/condition.ts +109 -0
- package/src/parser/rules/block/module/iftags/index.ts +26 -0
- package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
- package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
- package/src/parser/rules/block/module/iftags/types.ts +63 -0
- package/src/parser/rules/block/module/include/directive.ts +91 -0
- package/src/parser/rules/block/module/include/index.ts +29 -0
- package/src/parser/rules/block/module/include/references.ts +42 -0
- package/src/parser/rules/block/module/include/resolve/cache.ts +44 -0
- package/src/parser/rules/block/module/include/resolve/index.ts +106 -0
- package/src/parser/rules/block/module/include/resolve/iterate.ts +202 -0
- package/src/parser/rules/block/module/include/resolve/replace.ts +31 -0
- package/src/parser/rules/block/module/include/resolve/types.ts +105 -0
- package/src/parser/rules/block/module/include/scanner.ts +121 -0
- package/src/parser/rules/block/module/index.ts +134 -0
- package/src/parser/rules/block/module/join/index.ts +34 -0
- package/src/parser/rules/block/module/join/types.ts +23 -0
- package/src/parser/rules/block/module/listpages/compiler.ts +73 -0
- package/src/parser/rules/block/module/listpages/extract.ts +76 -0
- package/src/parser/rules/block/module/listpages/extraction/listpages.ts +42 -0
- package/src/parser/rules/block/module/listpages/extraction/listusers.ts +30 -0
- package/src/parser/rules/block/module/listpages/extraction/query.ts +51 -0
- package/src/parser/rules/block/module/listpages/extraction/result.ts +18 -0
- package/src/parser/rules/block/module/listpages/extraction/template.ts +96 -0
- package/src/parser/rules/block/module/listpages/extraction/variables.ts +58 -0
- package/src/parser/rules/block/module/listpages/index.ts +83 -0
- package/src/parser/rules/block/module/listpages/normalization/date-selector.ts +53 -0
- package/src/parser/rules/block/module/listpages/normalization/numeric-selector.ts +32 -0
- package/src/parser/rules/block/module/listpages/normalization/order-parent.ts +82 -0
- package/src/parser/rules/block/module/listpages/normalization/selectors.ts +2 -0
- package/src/parser/rules/block/module/listpages/normalization/tags-category.ts +86 -0
- package/src/parser/rules/block/module/listpages/normalize.ts +74 -0
- package/src/parser/rules/block/module/listpages/parser.ts +106 -0
- package/src/parser/rules/block/module/listpages/resolution/items.ts +43 -0
- package/src/parser/rules/block/module/listpages/resolution/wrapper.ts +42 -0
- package/src/parser/rules/block/module/listpages/resolve.ts +60 -0
- package/src/parser/rules/block/module/listpages/template/format/content.ts +41 -0
- package/src/parser/rules/block/module/listpages/template/format/date.ts +116 -0
- package/src/parser/rules/block/module/listpages/template/format/index.ts +4 -0
- package/src/parser/rules/block/module/listpages/template/format/tags.ts +7 -0
- package/src/parser/rules/block/module/listpages/template/format/user.ts +9 -0
- package/src/parser/rules/block/module/listpages/template/getters/index.ts +36 -0
- package/src/parser/rules/block/module/listpages/template/getters/parameterized.ts +60 -0
- package/src/parser/rules/block/module/listpages/template/getters/simple.ts +65 -0
- package/src/parser/rules/block/module/listpages/template/getters/types.ts +3 -0
- package/src/parser/rules/block/module/listpages/template/syntax.ts +97 -0
- package/src/parser/rules/block/module/listpages/types/data-fetcher.ts +15 -0
- package/src/parser/rules/block/module/listpages/types/data-requirements.ts +52 -0
- package/src/parser/rules/block/module/listpages/types/external-data.ts +77 -0
- package/src/parser/rules/block/module/listpages/types/index.ts +17 -0
- package/src/parser/rules/block/module/listpages/types/normalized-query.ts +120 -0
- package/src/parser/rules/block/module/listpages/types/query.ts +67 -0
- package/src/parser/rules/block/module/listpages/types/template.ts +17 -0
- package/src/parser/rules/block/module/listpages/types/variables.ts +69 -0
- package/src/parser/rules/block/module/listpages/url-resolution/fields.ts +48 -0
- package/src/parser/rules/block/module/listpages/url-resolution/params.ts +19 -0
- package/src/parser/rules/block/module/listpages/url-resolution/query.ts +24 -0
- package/src/parser/rules/block/module/listpages/url-resolution/resolve.ts +53 -0
- package/src/parser/rules/block/module/listpages/url-resolution/value.ts +25 -0
- package/src/parser/rules/block/module/listpages/url-resolver.ts +29 -0
- package/src/parser/rules/block/module/listusers/compiler.ts +56 -0
- package/src/parser/rules/block/module/listusers/extract.ts +40 -0
- package/src/parser/rules/block/module/listusers/getters.ts +21 -0
- package/src/parser/rules/block/module/listusers/index.ts +36 -0
- package/src/parser/rules/block/module/listusers/parser.ts +54 -0
- package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
- package/src/parser/rules/block/module/listusers/types.ts +93 -0
- package/src/parser/rules/block/module/listusers/variables.ts +15 -0
- package/src/parser/rules/block/module/mapping.ts +61 -0
- package/src/parser/rules/block/module/open.ts +57 -0
- package/src/parser/rules/block/module/page-tree/index.ts +38 -0
- package/src/parser/rules/block/module/page-tree/types.ts +29 -0
- package/src/parser/rules/block/module/rate/index.ts +28 -0
- package/src/parser/rules/block/module/rate/types.ts +19 -0
- package/src/parser/rules/block/module/resolution/contexts.ts +78 -0
- package/src/parser/rules/block/module/resolution/data-maps.ts +39 -0
- package/src/parser/rules/block/module/resolution/dynamic-modules.ts +93 -0
- package/src/parser/rules/block/module/resolution/styles.ts +53 -0
- package/src/parser/rules/block/module/resolution/walk-resolve.ts +107 -0
- package/src/parser/rules/block/module/resolve.ts +198 -0
- package/src/parser/rules/block/module/rule.ts +56 -0
- package/src/parser/rules/block/module/types-common.ts +70 -0
- package/src/parser/rules/block/module/types.ts +61 -0
- package/src/parser/rules/block/module/utils.ts +43 -0
- package/src/parser/rules/block/module/walk/children.ts +35 -0
- package/src/parser/rules/block/module/walk/index.ts +9 -0
- package/src/parser/rules/block/module/walk/map/index.ts +2 -0
- package/src/parser/rules/block/module/walk/map/stateful-definition-list.ts +25 -0
- package/src/parser/rules/block/module/walk/map/stateful-list.ts +40 -0
- package/src/parser/rules/block/module/walk/map/stateful-table.ts +23 -0
- package/src/parser/rules/block/module/walk/map/stateful-tabs.ts +19 -0
- package/src/parser/rules/block/module/walk/map/stateful.ts +71 -0
- package/src/parser/rules/block/module/walk/map/stateless-definition-list.ts +12 -0
- package/src/parser/rules/block/module/walk/map/stateless-list.ts +29 -0
- package/src/parser/rules/block/module/walk/map/stateless-table.ts +11 -0
- package/src/parser/rules/block/module/walk/map/stateless-tabs.ts +5 -0
- package/src/parser/rules/block/module/walk/map/stateless.ts +51 -0
- package/src/parser/rules/block/module/walk/map/types.ts +6 -0
- package/src/parser/rules/block/module/walk/traverse.ts +65 -0
- package/src/parser/rules/block/orphan-li/content.ts +60 -0
- package/src/parser/rules/block/orphan-li/index.ts +75 -0
- package/src/parser/rules/block/orphan-li/open.ts +25 -0
- package/src/parser/rules/block/orphan-li/tags.ts +40 -0
- package/src/parser/rules/block/paragraph/content.ts +12 -0
- package/src/parser/rules/block/paragraph/index.ts +60 -0
- package/src/parser/rules/block/paragraph/normalize.ts +52 -0
- package/src/parser/rules/block/paragraph/span-markers.ts +52 -0
- package/src/parser/rules/block/parsing/attributes/index.ts +32 -0
- package/src/parser/rules/block/parsing/attributes/names.ts +93 -0
- package/src/parser/rules/block/parsing/attributes/scanner.ts +75 -0
- package/src/parser/rules/block/parsing/attributes/values.ts +26 -0
- package/src/parser/rules/block/parsing/block-item.ts +29 -0
- package/src/parser/rules/block/parsing/content.ts +127 -0
- package/src/parser/rules/block/parsing/end-condition.ts +51 -0
- package/src/parser/rules/block/parsing/inline-content.ts +105 -0
- package/src/parser/rules/block/parsing/inline-newline.ts +41 -0
- package/src/parser/rules/block/parsing/non-boundary.ts +24 -0
- package/src/parser/rules/block/parsing/rule-dispatch.ts +44 -0
- package/src/parser/rules/block/table/index.ts +80 -0
- package/src/parser/rules/block/table/pipe/cell-start.ts +69 -0
- package/src/parser/rules/block/table/pipe/cell.ts +106 -0
- package/src/parser/rules/block/table/pipe/index.ts +2 -0
- package/src/parser/rules/block/table/pipe/row.ts +88 -0
- package/src/parser/rules/block/table/pipe/tokens.ts +14 -0
- package/src/parser/rules/block/table/pipe/trim.ts +50 -0
- package/src/parser/rules/block/table-block/body.ts +79 -0
- package/src/parser/rules/block/table-block/cell-attributes.ts +33 -0
- package/src/parser/rules/block/table-block/cell-boundary.ts +99 -0
- package/src/parser/rules/block/table-block/cell-content/index.ts +88 -0
- package/src/parser/rules/block/table-block/cell-content/segments.ts +134 -0
- package/src/parser/rules/block/table-block/cell-newline.ts +47 -0
- package/src/parser/rules/block/table-block/cell.ts +64 -0
- package/src/parser/rules/block/table-block/index.ts +113 -0
- package/src/parser/rules/block/table-block/row-boundary.ts +75 -0
- package/src/parser/rules/block/table-block/structure.ts +80 -0
- package/src/parser/rules/block/tabview/body.ts +64 -0
- package/src/parser/rules/block/tabview/index.ts +90 -0
- package/src/parser/rules/block/tabview/open.ts +50 -0
- package/src/parser/rules/block/tabview/tab.ts +92 -0
- package/src/parser/rules/block/tabview/tags.ts +30 -0
- package/src/parser/rules/block/toc/element.ts +11 -0
- package/src/parser/rules/block/toc/index.ts +44 -0
- package/src/parser/rules/block/toc/open.ts +84 -0
- package/src/parser/rules/block/utils.ts +15 -0
- package/src/parser/rules/common/attribute-safety.ts +109 -0
- package/src/parser/rules/common/block-name.ts +33 -0
- package/src/parser/rules/common/index.ts +2 -0
- package/src/parser/rules/contracts/index.ts +3 -0
- package/src/parser/rules/contracts/parse-context.ts +38 -0
- package/src/parser/rules/contracts/rule.ts +43 -0
- package/src/parser/rules/contracts/scope.ts +31 -0
- package/src/parser/rules/index.ts +49 -0
- package/src/parser/rules/inline/anchor/attributes.ts +54 -0
- package/src/parser/rules/inline/anchor/child.ts +26 -0
- package/src/parser/rules/inline/anchor/close.ts +34 -0
- package/src/parser/rules/inline/anchor/content.ts +59 -0
- package/src/parser/rules/inline/anchor/index.ts +103 -0
- package/src/parser/rules/inline/anchor/newline.ts +26 -0
- package/src/parser/rules/inline/anchor/open.ts +47 -0
- package/src/parser/rules/inline/anchor/paragraph-strip.ts +14 -0
- package/src/parser/rules/inline/anchor/syntax.ts +40 -0
- package/src/parser/rules/inline/anchor-name/index.ts +38 -0
- package/src/parser/rules/inline/anchor-name/name.ts +39 -0
- package/src/parser/rules/inline/anchor-name/syntax.ts +46 -0
- package/src/parser/rules/inline/bibcite/element.ts +14 -0
- package/src/parser/rules/inline/bibcite/index.ts +34 -0
- package/src/parser/rules/inline/bibcite/syntax.ts +64 -0
- package/src/parser/rules/inline/bold.ts +49 -0
- package/src/parser/rules/inline/color/index.ts +35 -0
- package/src/parser/rules/inline/color/syntax.ts +69 -0
- package/src/parser/rules/inline/comment/consume.ts +31 -0
- package/src/parser/rules/inline/comment/index.ts +64 -0
- package/src/parser/rules/inline/equation-ref/element.ts +8 -0
- package/src/parser/rules/inline/equation-ref/index.ts +34 -0
- package/src/parser/rules/inline/equation-ref/syntax.ts +45 -0
- package/src/parser/rules/inline/expr/branch.ts +104 -0
- package/src/parser/rules/inline/expr/conditional-branch.ts +27 -0
- package/src/parser/rules/inline/expr/conditional.ts +80 -0
- package/src/parser/rules/inline/expr/depth.ts +25 -0
- package/src/parser/rules/inline/expr/elements.ts +39 -0
- package/src/parser/rules/inline/expr/index.ts +84 -0
- package/src/parser/rules/inline/expr/syntax.ts +45 -0
- package/src/parser/rules/inline/footnote/child.ts +22 -0
- package/src/parser/rules/inline/footnote/close.ts +33 -0
- package/src/parser/rules/inline/footnote/content.ts +54 -0
- package/src/parser/rules/inline/footnote/elements.ts +38 -0
- package/src/parser/rules/inline/footnote/index.ts +54 -0
- package/src/parser/rules/inline/footnote/newline.ts +27 -0
- package/src/parser/rules/inline/footnote/open.ts +38 -0
- package/src/parser/rules/inline/formatting/container.ts +50 -0
- package/src/parser/rules/inline/guillemet/index.ts +56 -0
- package/src/parser/rules/inline/guillemet/text.ts +11 -0
- package/src/parser/rules/inline/html/gate.ts +64 -0
- package/src/parser/rules/inline/html/index.ts +81 -0
- package/src/parser/rules/inline/html/open.ts +37 -0
- package/src/parser/rules/inline/image/attributes.ts +22 -0
- package/src/parser/rules/inline/image/body.ts +36 -0
- package/src/parser/rules/inline/image/index.ts +89 -0
- package/src/parser/rules/inline/image/open.ts +56 -0
- package/src/parser/rules/inline/image/source.ts +62 -0
- package/src/parser/rules/inline/image/syntax.ts +76 -0
- package/src/parser/rules/inline/index.ts +150 -0
- package/src/parser/rules/inline/italic.ts +46 -0
- package/src/parser/rules/inline/line-break/backslash.ts +58 -0
- package/src/parser/rules/inline/line-break/elements.ts +9 -0
- package/src/parser/rules/inline/line-break/index.ts +3 -0
- package/src/parser/rules/inline/line-break/newline.ts +82 -0
- package/src/parser/rules/inline/line-break/underscore.ts +45 -0
- package/src/parser/rules/inline/link-anchor.ts +72 -0
- package/src/parser/rules/inline/link-bracket/anchor.ts +3 -0
- package/src/parser/rules/inline/link-bracket/direct-url.ts +5 -0
- package/src/parser/rules/inline/link-bracket/parsed.ts +81 -0
- package/src/parser/rules/inline/link-bracket/parts.ts +64 -0
- package/src/parser/rules/inline/link-bracket/prefix.ts +15 -0
- package/src/parser/rules/inline/link-single.ts +73 -0
- package/src/parser/rules/inline/link-star.ts +72 -0
- package/src/parser/rules/inline/link-triple/fallback.ts +10 -0
- package/src/parser/rules/inline/link-triple/index.ts +62 -0
- package/src/parser/rules/inline/link-triple/interwiki.ts +11 -0
- package/src/parser/rules/inline/link-triple/label.ts +35 -0
- package/src/parser/rules/inline/link-triple/syntax.ts +72 -0
- package/src/parser/rules/inline/link-triple/target.ts +36 -0
- package/src/parser/rules/inline/math-inline/index.ts +40 -0
- package/src/parser/rules/inline/math-inline/syntax.ts +55 -0
- package/src/parser/rules/inline/monospace.ts +50 -0
- package/src/parser/rules/inline/parsing/block-boundary.ts +42 -0
- package/src/parser/rules/inline/parsing/block-start-predicates.ts +117 -0
- package/src/parser/rules/inline/parsing/collect.ts +23 -0
- package/src/parser/rules/inline/parsing/inline-content.ts +115 -0
- package/src/parser/rules/inline/parsing/paragraph-boundary.ts +47 -0
- package/src/parser/rules/inline/parsing/plain-text.ts +69 -0
- package/src/parser/rules/inline/parsing/preserved-line-break.ts +11 -0
- package/src/parser/rules/inline/parsing/rules.ts +34 -0
- package/src/parser/rules/inline/parsing/simple-token.ts +26 -0
- package/src/parser/rules/inline/raw/angle.ts +40 -0
- package/src/parser/rules/inline/raw/double-at.ts +78 -0
- package/src/parser/rules/inline/raw/index.ts +26 -0
- package/src/parser/rules/inline/raw/result.ts +26 -0
- package/src/parser/rules/inline/size/content.ts +65 -0
- package/src/parser/rules/inline/size/index.ts +55 -0
- package/src/parser/rules/inline/size/open.ts +43 -0
- package/src/parser/rules/inline/size/value.ts +45 -0
- package/src/parser/rules/inline/span/content.ts +97 -0
- package/src/parser/rules/inline/span/elements.ts +108 -0
- package/src/parser/rules/inline/span/index.ts +79 -0
- package/src/parser/rules/inline/span/newline.ts +50 -0
- package/src/parser/rules/inline/span/syntax.ts +70 -0
- package/src/parser/rules/inline/strikethrough/index.ts +60 -0
- package/src/parser/rules/inline/strikethrough/parse.ts +14 -0
- package/src/parser/rules/inline/strikethrough/syntax.ts +24 -0
- package/src/parser/rules/inline/subscript.ts +47 -0
- package/src/parser/rules/inline/superscript.ts +49 -0
- package/src/parser/rules/inline/text/element.ts +5 -0
- package/src/parser/rules/inline/text/index.ts +85 -0
- package/src/parser/rules/inline/underline/child.ts +26 -0
- package/src/parser/rules/inline/underline/content.ts +29 -0
- package/src/parser/rules/inline/underline/index.ts +84 -0
- package/src/parser/rules/inline/user/element.ts +11 -0
- package/src/parser/rules/inline/user/index.ts +34 -0
- package/src/parser/rules/inline/user/syntax.ts +67 -0
- package/src/parser/rules/inline/utils.ts +4 -0
- package/src/parser/rules/tokens.ts +106 -0
- package/src/parser/rules/types.ts +9 -0
- package/src/parser/toc.ts +130 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import type { Element } from "@wdprlib/ast";
|
|
2
|
+
import { extractEscapedSpans, removeLineBreaksAroundSpanStrip } from "./escaped";
|
|
3
|
+
import { paragraphElement } from "./factory";
|
|
4
|
+
import {
|
|
5
|
+
getContainerData,
|
|
6
|
+
hasParagraphStripSpan,
|
|
7
|
+
isContainer,
|
|
8
|
+
isEmptyExpr,
|
|
9
|
+
isSplitSpan,
|
|
10
|
+
} from "./predicates";
|
|
11
|
+
import { splitParagraphAtBlankLineSpans, splitParagraphAtEmptyExpr } from "./split";
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Merge and split paragraphs according to Wikidot's `span_` and expr behaviors.
|
|
15
|
+
*/
|
|
16
|
+
export function mergeSpanStripParagraphs(children: Element[]): Element[] {
|
|
17
|
+
const expandedChildren = expandSplitParagraphs(children);
|
|
18
|
+
const result: Element[] = [];
|
|
19
|
+
let i = 0;
|
|
20
|
+
|
|
21
|
+
while (i < expandedChildren.length) {
|
|
22
|
+
const node = expandedChildren[i];
|
|
23
|
+
|
|
24
|
+
if (!node || !isContainer(node, "paragraph")) {
|
|
25
|
+
if (node) result.push(node);
|
|
26
|
+
i++;
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if (!hasParagraphStripSpan(node)) {
|
|
31
|
+
result.push(node);
|
|
32
|
+
i++;
|
|
33
|
+
continue;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const paraData = getContainerData(node);
|
|
37
|
+
if (!paraData) {
|
|
38
|
+
result.push(node);
|
|
39
|
+
i++;
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const mergedChildren: Element[] = [...paraData.elements];
|
|
44
|
+
i++;
|
|
45
|
+
|
|
46
|
+
while (i < expandedChildren.length) {
|
|
47
|
+
const nextPara = expandedChildren[i];
|
|
48
|
+
if (!nextPara || !isContainer(nextPara, "paragraph")) {
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const nextParaData = getContainerData(nextPara);
|
|
53
|
+
if (!nextParaData) {
|
|
54
|
+
break;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const hasSpanStrip = hasParagraphStripSpan(nextPara);
|
|
58
|
+
mergedChildren.push(...nextParaData.elements);
|
|
59
|
+
i++;
|
|
60
|
+
|
|
61
|
+
if (!hasSpanStrip) {
|
|
62
|
+
const peekNext = expandedChildren[i];
|
|
63
|
+
if (!peekNext || !isContainer(peekNext, "paragraph") || !hasParagraphStripSpan(peekNext)) {
|
|
64
|
+
break;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const escapedSpans = extractEscapedSpans(mergedChildren);
|
|
70
|
+
removeLineBreaksAroundSpanStrip(mergedChildren);
|
|
71
|
+
|
|
72
|
+
if (escapedSpans.length > 0) {
|
|
73
|
+
if (mergedChildren.length > 0) {
|
|
74
|
+
result.push(paragraphElement(mergedChildren));
|
|
75
|
+
}
|
|
76
|
+
} else {
|
|
77
|
+
result.push(...mergedChildren);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
result.push(...escapedSpans);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return result;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function expandSplitParagraphs(children: Element[]): Element[] {
|
|
87
|
+
const expandedChildren: Element[] = [];
|
|
88
|
+
for (const child of children) {
|
|
89
|
+
if (isContainer(child, "paragraph")) {
|
|
90
|
+
const data = getContainerData(child);
|
|
91
|
+
if (data) {
|
|
92
|
+
const split = getParagraphSplitKind(data.elements);
|
|
93
|
+
if (split === "blank-line-span") {
|
|
94
|
+
expandedChildren.push(...splitParagraphAtBlankLineSpans(child));
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
if (split === "empty-expr") {
|
|
98
|
+
expandedChildren.push(...splitParagraphAtEmptyExpr(child));
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
expandedChildren.push(child);
|
|
104
|
+
}
|
|
105
|
+
return expandedChildren;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function getParagraphSplitKind(elements: Element[]): "blank-line-span" | "empty-expr" | null {
|
|
109
|
+
let hasEmptyExpr = false;
|
|
110
|
+
for (const element of elements) {
|
|
111
|
+
if (isSplitSpan(element)) {
|
|
112
|
+
return "blank-line-span";
|
|
113
|
+
}
|
|
114
|
+
hasEmptyExpr ||= isEmptyExpr(element);
|
|
115
|
+
}
|
|
116
|
+
return hasEmptyExpr ? "empty-expr" : null;
|
|
117
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import type { ContainerData, Element, ExprData } from "@wdprlib/ast";
|
|
2
|
+
|
|
3
|
+
export type InternalContainerData = ContainerData & {
|
|
4
|
+
_paragraphStrip?: boolean;
|
|
5
|
+
_emptyParagraphStrip?: boolean;
|
|
6
|
+
_escapedFromParagraph?: boolean;
|
|
7
|
+
_splitByBlankLine?: boolean;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export function isContainer(el: Element, type: string): boolean {
|
|
11
|
+
if (el.element !== "container") return false;
|
|
12
|
+
return el.data.type === type;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function getContainerData(el: Element): ContainerData | null {
|
|
16
|
+
if (el.element !== "container") return null;
|
|
17
|
+
return el.data;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function isSpanStripMarker(el: Element | undefined): boolean {
|
|
21
|
+
if (!el || el.element !== "container") return false;
|
|
22
|
+
const data = el.data as InternalContainerData;
|
|
23
|
+
return (
|
|
24
|
+
data.type === "span" && (data._paragraphStrip === true || data._emptyParagraphStrip === true)
|
|
25
|
+
);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function hasParagraphStripSpan(para: Element): boolean {
|
|
29
|
+
const data = getContainerData(para);
|
|
30
|
+
if (!data || data.type !== "paragraph") return false;
|
|
31
|
+
return data.elements.some((child) => isSpanStripMarker(child));
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function isEscapedSpan(el: Element): boolean {
|
|
35
|
+
if (el.element !== "container") return false;
|
|
36
|
+
const data = el.data as InternalContainerData;
|
|
37
|
+
return data.type === "span" && data._escapedFromParagraph === true;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function isSplitSpan(el: Element): boolean {
|
|
41
|
+
if (el.element !== "container") return false;
|
|
42
|
+
const data = el.data as InternalContainerData;
|
|
43
|
+
return data.type === "span" && data._splitByBlankLine === true;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function isEmptyExpr(el: Element): boolean {
|
|
47
|
+
if (el.element !== "expr") return false;
|
|
48
|
+
const data = el.data as ExprData;
|
|
49
|
+
return data.expression === "";
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export function isEmptySpan(el: Element): boolean {
|
|
53
|
+
if (el.element !== "container") return false;
|
|
54
|
+
return el.data.type === "span" && el.data.elements.length === 0;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function isWhitespaceText(el: Element): boolean {
|
|
58
|
+
return el.element === "text" && typeof el.data === "string" && /^\s+$/.test(el.data);
|
|
59
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import type { Element } from "@wdprlib/ast";
|
|
2
|
+
import { paragraphElement } from "./factory";
|
|
3
|
+
import { getContainerData, isEmptyExpr, isSplitSpan } from "./predicates";
|
|
4
|
+
|
|
5
|
+
export function splitParagraphAtBlankLineSpans(para: Element): Element[] {
|
|
6
|
+
const data = getContainerData(para);
|
|
7
|
+
if (!data || data.type !== "paragraph") return [para];
|
|
8
|
+
|
|
9
|
+
const result: Element[] = [];
|
|
10
|
+
let currentElements: Element[] = [];
|
|
11
|
+
|
|
12
|
+
for (const child of data.elements) {
|
|
13
|
+
if (isSplitSpan(child)) {
|
|
14
|
+
if (currentElements.length > 0) {
|
|
15
|
+
result.push(paragraphElement(currentElements));
|
|
16
|
+
currentElements = [];
|
|
17
|
+
}
|
|
18
|
+
currentElements.push(child);
|
|
19
|
+
} else {
|
|
20
|
+
currentElements.push(child);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
if (currentElements.length > 0) {
|
|
25
|
+
result.push(paragraphElement(currentElements));
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
return result.length > 0 ? result : [para];
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export function splitParagraphAtEmptyExpr(para: Element): Element[] {
|
|
32
|
+
const data = getContainerData(para);
|
|
33
|
+
if (!data || data.type !== "paragraph") return [para];
|
|
34
|
+
if (!data.elements.some(isEmptyExpr)) return [para];
|
|
35
|
+
|
|
36
|
+
const result: Element[] = [];
|
|
37
|
+
let currentElements: Element[] = [];
|
|
38
|
+
|
|
39
|
+
for (let i = 0; i < data.elements.length; i++) {
|
|
40
|
+
const child = data.elements[i];
|
|
41
|
+
if (!child) continue;
|
|
42
|
+
|
|
43
|
+
if (isEmptyExpr(child)) {
|
|
44
|
+
if (
|
|
45
|
+
currentElements.length > 0 &&
|
|
46
|
+
currentElements[currentElements.length - 1]?.element === "line-break"
|
|
47
|
+
) {
|
|
48
|
+
currentElements.pop();
|
|
49
|
+
}
|
|
50
|
+
if (currentElements.length > 0) {
|
|
51
|
+
result.push(paragraphElement(currentElements));
|
|
52
|
+
currentElements = [];
|
|
53
|
+
}
|
|
54
|
+
if (i + 1 < data.elements.length && data.elements[i + 1]?.element === "line-break") {
|
|
55
|
+
i++;
|
|
56
|
+
}
|
|
57
|
+
} else {
|
|
58
|
+
currentElements.push(child);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (currentElements.length > 0) {
|
|
63
|
+
result.push(paragraphElement(currentElements));
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return result.length > 0 ? result : [];
|
|
67
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export function isWhitespace(ch: string | undefined): boolean {
|
|
2
|
+
return ch === " " || ch === "\t" || ch === "\n" || ch === "\r";
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
export function isIdentChar(ch: string | undefined): boolean {
|
|
6
|
+
if (!ch) return false;
|
|
7
|
+
const code = ch.charCodeAt(0);
|
|
8
|
+
return (
|
|
9
|
+
(code >= 48 && code <= 57) ||
|
|
10
|
+
(code >= 65 && code <= 90) ||
|
|
11
|
+
(code >= 97 && code <= 122) ||
|
|
12
|
+
code === 95 ||
|
|
13
|
+
code === 45
|
|
14
|
+
);
|
|
15
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { evaluateExpression, formatExprValue, isTruthy } from "@wdprlib/ast";
|
|
2
|
+
import type { DirectiveKind, DirectiveMatch } from "./types";
|
|
3
|
+
|
|
4
|
+
/** Evaluate a parsed directive into its replacement string. */
|
|
5
|
+
export function evaluateDirective(kind: DirectiveKind, match: DirectiveMatch): string {
|
|
6
|
+
if (kind === "expr") {
|
|
7
|
+
const result = evaluateExpression(match.head);
|
|
8
|
+
if (result.success) return formatExprValue(result.value);
|
|
9
|
+
if (result.error === "empty expression") return "";
|
|
10
|
+
return "ERROR";
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
if (kind === "if") {
|
|
14
|
+
if (!match.hasPipe) return "";
|
|
15
|
+
return isTruthy(match.head) ? match.thenText : match.elseText;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
if (!match.hasPipe) return "";
|
|
19
|
+
const result = evaluateExpression(match.head);
|
|
20
|
+
if (!result.success) return "ERROR";
|
|
21
|
+
return result.value !== 0 && !Number.isNaN(result.value) ? match.thenText : match.elseText;
|
|
22
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Text-level expansion of `[[#if ...]]`, `[[#ifexpr ...]]`, and
|
|
4
|
+
* `[[#expr ...]]` directives that sit *inside* another block's opener.
|
|
5
|
+
*
|
|
6
|
+
* @module
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { makeUniqueSentinels, maskRawRegions, restorePlaceholders } from "../utils";
|
|
10
|
+
import { expandInnermost } from "./scan";
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Resolve every `[[#if]]` / `[[#ifexpr]]` / `[[#expr]]` that sits inside
|
|
14
|
+
* another block's opener (depth > 0). Top-level directives are left for
|
|
15
|
+
* the inline parser. Innermost-first reduction lets an outer directive
|
|
16
|
+
* re-process the flattened body on the next pass. Unmatched / malformed
|
|
17
|
+
* directives are left untouched.
|
|
18
|
+
*/
|
|
19
|
+
export function preprocessExpr(source: string): string {
|
|
20
|
+
if (!source.includes("[[#")) return source;
|
|
21
|
+
|
|
22
|
+
const sentinels = makeUniqueSentinels(source);
|
|
23
|
+
const { masked, placeholders } = maskRawRegions(source, sentinels);
|
|
24
|
+
const reduced = reduceExpr(masked);
|
|
25
|
+
return restorePlaceholders(reduced, placeholders, sentinels);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Backwards-compatible alias for the older `preprocessIf` name (used by
|
|
30
|
+
* external callers that target the previous, `[[#if]]`-only behaviour).
|
|
31
|
+
* Both names point at the same implementation, which now also resolves
|
|
32
|
+
* `[[#ifexpr]]` and `[[#expr]]` in opener context.
|
|
33
|
+
*/
|
|
34
|
+
export const preprocessIf: (source: string) => string = preprocessExpr;
|
|
35
|
+
|
|
36
|
+
function reduceExpr(source: string): string {
|
|
37
|
+
let current = source;
|
|
38
|
+
const maxIterations = source.length + 1;
|
|
39
|
+
for (let i = 0; i < maxIterations; i++) {
|
|
40
|
+
const next = expandInnermost(current);
|
|
41
|
+
if (next === current) return current;
|
|
42
|
+
current = next;
|
|
43
|
+
}
|
|
44
|
+
return current;
|
|
45
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { DirectiveKind } from "./types";
|
|
2
|
+
import { isIdentChar } from "./chars";
|
|
3
|
+
|
|
4
|
+
/** Return the kind of `[[#xxx` directive at `i`, or null if none matches. */
|
|
5
|
+
export function matchDirectiveKind(source: string, i: number): DirectiveKind | null {
|
|
6
|
+
if (!source.startsWith("[[#", i)) return null;
|
|
7
|
+
// Order matters: `ifexpr` must be checked before `if` because the
|
|
8
|
+
// shorter `if` prefix would otherwise consume `ifexpr` openings.
|
|
9
|
+
if (source.startsWith("ifexpr", i + 3) && !isIdentChar(source[i + 9])) {
|
|
10
|
+
return "ifexpr";
|
|
11
|
+
}
|
|
12
|
+
if (source.startsWith("if", i + 3) && !isIdentChar(source[i + 5])) {
|
|
13
|
+
return "if";
|
|
14
|
+
}
|
|
15
|
+
if (source.startsWith("expr", i + 3) && !isIdentChar(source[i + 7])) {
|
|
16
|
+
return "expr";
|
|
17
|
+
}
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import { isWhitespace } from "./chars";
|
|
2
|
+
import { matchDirectiveKind } from "./kind";
|
|
3
|
+
import type { DirectiveKind, DirectiveMatch } from "./types";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Try to parse a single `[[#kind ...]]` directive starting at `start`.
|
|
7
|
+
* Returns `null` when the directive is malformed (no closing `]]`) or
|
|
8
|
+
* when its body contains another `[[#kind]]` of the same family
|
|
9
|
+
* (so the caller should keep descending). The substrings are returned
|
|
10
|
+
* raw; callers decide how to evaluate them.
|
|
11
|
+
*/
|
|
12
|
+
export function tryParseInnermostDirective(
|
|
13
|
+
source: string,
|
|
14
|
+
start: number,
|
|
15
|
+
kind: DirectiveKind,
|
|
16
|
+
): DirectiveMatch | null {
|
|
17
|
+
const keywordLen = kind === "ifexpr" ? 6 : kind === "expr" ? 4 : 2;
|
|
18
|
+
let pos = start + 3 + keywordLen;
|
|
19
|
+
while (pos < source.length && isWhitespace(source[pos])) pos++;
|
|
20
|
+
|
|
21
|
+
const headStart = pos;
|
|
22
|
+
let blockDepth = 0;
|
|
23
|
+
let linkDepth = 0;
|
|
24
|
+
const pipes: number[] = [];
|
|
25
|
+
let closeStart = -1;
|
|
26
|
+
|
|
27
|
+
while (pos < source.length) {
|
|
28
|
+
if (matchDirectiveKind(source, pos) !== null) {
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
if (source.startsWith("[[[", pos)) {
|
|
32
|
+
linkDepth++;
|
|
33
|
+
pos += 3;
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
if (linkDepth > 0 && source.startsWith("]]]", pos)) {
|
|
37
|
+
linkDepth--;
|
|
38
|
+
pos += 3;
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
if (linkDepth > 0) {
|
|
42
|
+
pos++;
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
if (source.startsWith("[[", pos)) {
|
|
46
|
+
blockDepth++;
|
|
47
|
+
pos += 2;
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
if (source.startsWith("]]", pos)) {
|
|
51
|
+
if (blockDepth === 0) {
|
|
52
|
+
closeStart = pos;
|
|
53
|
+
break;
|
|
54
|
+
}
|
|
55
|
+
blockDepth--;
|
|
56
|
+
pos += 2;
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
if (source[pos] === "|" && blockDepth === 0 && linkDepth === 0) {
|
|
60
|
+
pipes.push(pos);
|
|
61
|
+
}
|
|
62
|
+
pos++;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if (closeStart === -1) return null;
|
|
66
|
+
const hasPipe = pipes.length > 0;
|
|
67
|
+
if (!hasPipe && (kind === "if" || kind === "ifexpr")) return null;
|
|
68
|
+
|
|
69
|
+
return buildDirectiveMatch(source, headStart, closeStart, pipes, hasPipe);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function buildDirectiveMatch(
|
|
73
|
+
source: string,
|
|
74
|
+
headStart: number,
|
|
75
|
+
closeStart: number,
|
|
76
|
+
pipes: number[],
|
|
77
|
+
hasPipe: boolean,
|
|
78
|
+
): DirectiveMatch {
|
|
79
|
+
if (!hasPipe) {
|
|
80
|
+
return {
|
|
81
|
+
end: closeStart + 2,
|
|
82
|
+
head: source.slice(headStart, closeStart).trim(),
|
|
83
|
+
thenText: "",
|
|
84
|
+
elseText: "",
|
|
85
|
+
hasPipe,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const head = source.slice(headStart, pipes[0]!).trim();
|
|
90
|
+
const thenText =
|
|
91
|
+
pipes.length >= 2
|
|
92
|
+
? source.slice(pipes[0]! + 1, pipes[1]!).trim()
|
|
93
|
+
: source.slice(pipes[0]! + 1, closeStart).trim();
|
|
94
|
+
const elseText = pipes.length >= 2 ? source.slice(pipes[1]! + 1, closeStart).trim() : "";
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
end: closeStart + 2,
|
|
98
|
+
head,
|
|
99
|
+
thenText,
|
|
100
|
+
elseText,
|
|
101
|
+
hasPipe,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { computeBracketDepths } from "../utils";
|
|
2
|
+
import { evaluateDirective } from "./evaluate";
|
|
3
|
+
import { matchDirectiveKind } from "./kind";
|
|
4
|
+
import { tryParseInnermostDirective } from "./parse";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Walk `source`, locate every innermost `[[#if]]` / `[[#ifexpr]]` /
|
|
8
|
+
* `[[#expr]]` directive that sits inside an unclosed `[[`, and replace
|
|
9
|
+
* it with its evaluated string. Returns the source unchanged when no
|
|
10
|
+
* replacements were made.
|
|
11
|
+
*/
|
|
12
|
+
export function expandInnermost(source: string): string {
|
|
13
|
+
const depths = computeBracketDepths(source);
|
|
14
|
+
let result = "";
|
|
15
|
+
let i = 0;
|
|
16
|
+
let replaced = false;
|
|
17
|
+
|
|
18
|
+
while (i < source.length) {
|
|
19
|
+
const kind = matchDirectiveKind(source, i);
|
|
20
|
+
if (kind !== null && depths[i]! > 0) {
|
|
21
|
+
const match = tryParseInnermostDirective(source, i, kind);
|
|
22
|
+
if (match !== null) {
|
|
23
|
+
result += evaluateDirective(kind, match);
|
|
24
|
+
i = match.end;
|
|
25
|
+
replaced = true;
|
|
26
|
+
continue;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
result += source[i];
|
|
30
|
+
i++;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return replaced ? result : source;
|
|
34
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export type DirectiveKind = "if" | "ifexpr" | "expr";
|
|
2
|
+
|
|
3
|
+
export interface DirectiveMatch {
|
|
4
|
+
/** Position just past the closing `]]`. */
|
|
5
|
+
end: number;
|
|
6
|
+
/** Raw condition / expression. */
|
|
7
|
+
head: string;
|
|
8
|
+
/** Raw `then` branch. */
|
|
9
|
+
thenText: string;
|
|
10
|
+
/** Raw `else` branch. */
|
|
11
|
+
elseText: string;
|
|
12
|
+
/** Whether the directive supplied a `|` at all. */
|
|
13
|
+
hasPipe: boolean;
|
|
14
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Preprocessing pipeline that transforms raw wikitext before tokenization.
|
|
4
|
+
*
|
|
5
|
+
* Wikidot applies two categories of text substitutions before the main parser
|
|
6
|
+
* sees the input. This module orchestrates those substitutions in the correct
|
|
7
|
+
* order: whitespace normalization first (to establish consistent line structure),
|
|
8
|
+
* then typographic transformations (to convert ASCII quote/ellipsis patterns
|
|
9
|
+
* into Unicode equivalents).
|
|
10
|
+
*
|
|
11
|
+
* The preprocessing step is essential because the lexer and parser assume
|
|
12
|
+
* normalized input (Unix newlines, no tabs, consistent whitespace).
|
|
13
|
+
*
|
|
14
|
+
* @module
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { substitute as whitespaceSubstitute } from "./whitespace";
|
|
18
|
+
import { substitute as typographySubstitute } from "./typography";
|
|
19
|
+
|
|
20
|
+
export { substitute as whitespace } from "./whitespace";
|
|
21
|
+
export { substitute as typography } from "./typography";
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Run the full preprocessing pipeline on raw wikitext.
|
|
25
|
+
*
|
|
26
|
+
* Applies the following transformations in order:
|
|
27
|
+
* 1. Whitespace normalization (DOS/Mac newlines, tabs, leading spaces, etc.)
|
|
28
|
+
* 2. Typographic substitutions (curly quotes, ellipsis)
|
|
29
|
+
*
|
|
30
|
+
* @param text - Raw wikitext input
|
|
31
|
+
* @returns Preprocessed text ready for tokenization
|
|
32
|
+
*/
|
|
33
|
+
export function preprocess(text: string): string {
|
|
34
|
+
let result = text;
|
|
35
|
+
result = whitespaceSubstitute(result);
|
|
36
|
+
result = typographySubstitute(result);
|
|
37
|
+
return result;
|
|
38
|
+
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Typographic preprocessing for Wikidot markup.
|
|
4
|
+
*
|
|
5
|
+
* Wikidot converts certain ASCII character sequences into their Unicode
|
|
6
|
+
* typographic equivalents before parsing. This module handles the following
|
|
7
|
+
* conversions:
|
|
8
|
+
*
|
|
9
|
+
* - ` `` ... '' ` becomes left/right double curly quotes (U+201C / U+201D)
|
|
10
|
+
* - ` ,, ... '' ` becomes low-9 double quote + right double quote (U+201E / U+201D)
|
|
11
|
+
* - `` ` ... ' `` becomes left/right single curly quotes (U+2018 / U+2019)
|
|
12
|
+
* - `...` (three dots) and `. . .` (spaced dots) become an ellipsis (U+2026)
|
|
13
|
+
*
|
|
14
|
+
* Em dash conversion (`--` to U+2014) is intentionally NOT handled here.
|
|
15
|
+
* It is performed in the parser instead, because the `--` sequence also appears
|
|
16
|
+
* in HTML comment markers (`[!--` and `--]`), and converting it during
|
|
17
|
+
* preprocessing would break comment detection.
|
|
18
|
+
*
|
|
19
|
+
* @module
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
/** Unicode left single quotation mark (U+2018) */
|
|
23
|
+
const LEFT_SINGLE_QUOTE = "\u2018"; // '
|
|
24
|
+
/** Unicode right single quotation mark (U+2019) */
|
|
25
|
+
const RIGHT_SINGLE_QUOTE = "\u2019"; // '
|
|
26
|
+
/** Unicode left double quotation mark (U+201C) */
|
|
27
|
+
const LEFT_DOUBLE_QUOTE = "\u201c"; // "
|
|
28
|
+
/** Unicode right double quotation mark (U+201D) */
|
|
29
|
+
const RIGHT_DOUBLE_QUOTE = "\u201d"; // "
|
|
30
|
+
/** Unicode double low-9 quotation mark (U+201E), used in German/Polish typography */
|
|
31
|
+
const LOW_DOUBLE_QUOTE = "\u201e"; // „
|
|
32
|
+
/** Unicode horizontal ellipsis (U+2026) */
|
|
33
|
+
const ELLIPSIS = "\u2026"; // …
|
|
34
|
+
|
|
35
|
+
function replaceExactEllipsisPattern(text: string, pattern: string): string {
|
|
36
|
+
let searchFrom = 0;
|
|
37
|
+
let result = "";
|
|
38
|
+
let lastCopied = 0;
|
|
39
|
+
const patternLength = pattern.length;
|
|
40
|
+
|
|
41
|
+
while (searchFrom < text.length) {
|
|
42
|
+
const index = text.indexOf(pattern, searchFrom);
|
|
43
|
+
if (index === -1) break;
|
|
44
|
+
|
|
45
|
+
const prev = index > 0 ? text[index - 1] : "";
|
|
46
|
+
const next = index + patternLength < text.length ? text[index + patternLength] : "";
|
|
47
|
+
if (prev !== "." && next !== ".") {
|
|
48
|
+
result += text.slice(lastCopied, index) + ELLIPSIS;
|
|
49
|
+
lastCopied = index + patternLength;
|
|
50
|
+
searchFrom = lastCopied;
|
|
51
|
+
} else {
|
|
52
|
+
searchFrom = index + 1;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return lastCopied === 0 ? text : result + text.slice(lastCopied);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function replaceDelimitedTypography(
|
|
60
|
+
text: string,
|
|
61
|
+
opener: string,
|
|
62
|
+
closer: string,
|
|
63
|
+
leftQuote: string,
|
|
64
|
+
rightQuote: string,
|
|
65
|
+
): string {
|
|
66
|
+
let searchFrom = 0;
|
|
67
|
+
let result = "";
|
|
68
|
+
let lastCopied = 0;
|
|
69
|
+
|
|
70
|
+
while (searchFrom < text.length) {
|
|
71
|
+
const openIndex = text.indexOf(opener, searchFrom);
|
|
72
|
+
if (openIndex === -1) break;
|
|
73
|
+
|
|
74
|
+
const contentStart = openIndex + opener.length;
|
|
75
|
+
const closeIndex = text.indexOf(closer, contentStart);
|
|
76
|
+
if (closeIndex === -1) break;
|
|
77
|
+
|
|
78
|
+
result += text.slice(lastCopied, openIndex);
|
|
79
|
+
result += leftQuote;
|
|
80
|
+
result += text.slice(contentStart, closeIndex);
|
|
81
|
+
result += rightQuote;
|
|
82
|
+
|
|
83
|
+
lastCopied = closeIndex + closer.length;
|
|
84
|
+
searchFrom = lastCopied;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return lastCopied === 0 ? text : result + text.slice(lastCopied);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Apply all typographic substitutions to the given text.
|
|
92
|
+
*
|
|
93
|
+
* Substitutions are applied in a specific order: double quotes first,
|
|
94
|
+
* then low double quotes, then single quotes, then ellipsis. This order
|
|
95
|
+
* matters because the backtick and apostrophe characters are shared
|
|
96
|
+
* between single and double quote patterns.
|
|
97
|
+
*
|
|
98
|
+
* @param text - Text to transform
|
|
99
|
+
* @returns Text with ASCII typography patterns replaced by Unicode equivalents
|
|
100
|
+
*/
|
|
101
|
+
export function substitute(text: string): string {
|
|
102
|
+
let result = text;
|
|
103
|
+
|
|
104
|
+
// Double quotes: ``...'' -> "..."
|
|
105
|
+
if (result.includes("``") && result.includes("''")) {
|
|
106
|
+
result = replaceDelimitedTypography(result, "``", "''", LEFT_DOUBLE_QUOTE, RIGHT_DOUBLE_QUOTE);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Low double quotes: ,,..'' -> „..."
|
|
110
|
+
if (result.includes(",,") && result.includes("''")) {
|
|
111
|
+
result = replaceDelimitedTypography(result, ",,", "''", LOW_DOUBLE_QUOTE, RIGHT_DOUBLE_QUOTE);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Single quotes: `...' -> '...'
|
|
115
|
+
if (result.includes("`") && result.includes("'")) {
|
|
116
|
+
result = replaceDelimitedTypography(result, "`", "'", LEFT_SINGLE_QUOTE, RIGHT_SINGLE_QUOTE);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Ellipsis: ... or . . . -> …
|
|
120
|
+
// Must be exactly 3 dots, not preceded or followed by more dots
|
|
121
|
+
// Handle continuous dots: ...
|
|
122
|
+
if (result.includes("...")) {
|
|
123
|
+
result = replaceExactEllipsisPattern(result, "...");
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Handle spaced dots: . . .
|
|
127
|
+
if (result.includes(". . .")) {
|
|
128
|
+
result = replaceExactEllipsisPattern(result, ". . .");
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return result;
|
|
132
|
+
}
|