@wdprlib/parser 3.2.0 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +10471 -8406
- package/dist/index.d.cts +313 -337
- package/dist/index.d.ts +313 -337
- package/dist/index.js +10457 -8392
- package/package.json +1 -1
- package/src/index.ts +7 -0
- package/src/lexer/anchor.ts +48 -0
- package/src/lexer/index.ts +3 -2
- package/src/lexer/lexer.ts +73 -559
- package/src/lexer/options.ts +19 -0
- package/src/lexer/punctuation.ts +70 -0
- package/src/lexer/quoted-string.ts +16 -0
- package/src/lexer/runs.ts +85 -0
- package/src/lexer/spacing-actions.ts +24 -0
- package/src/lexer/state.ts +103 -0
- package/src/lexer/syntax-actions.ts +80 -0
- package/src/lexer/text-actions.ts +41 -0
- package/src/lexer/token-actions.ts +136 -0
- package/src/lexer/token-factory.ts +62 -0
- package/src/lexer/tokenize.ts +18 -0
- package/src/parser/constants.ts +2 -0
- package/src/parser/depth/index.ts +111 -0
- package/src/parser/depth/stack.ts +82 -0
- package/src/parser/parse/block.ts +42 -0
- package/src/parser/parse/context.ts +26 -0
- package/src/parser/parse/footnotes.ts +25 -0
- package/src/parser/parse/index.ts +42 -0
- package/src/parser/parse/options.ts +34 -0
- package/src/parser/parse/parser.ts +79 -0
- package/src/parser/parse/plain-non-ascii.ts +129 -0
- package/src/parser/parse/result.ts +57 -0
- package/src/parser/parse/source.ts +11 -0
- package/src/parser/postprocess/divAdjacentParagraph.ts +1 -1
- package/src/parser/postprocess/spanStrip/clean-element.ts +168 -0
- package/src/parser/postprocess/spanStrip/cleanup.ts +25 -0
- package/src/parser/postprocess/spanStrip/empty-spans.ts +36 -0
- package/src/parser/postprocess/spanStrip/escaped.ts +78 -0
- package/src/parser/postprocess/spanStrip/factory.ts +23 -0
- package/src/parser/postprocess/spanStrip/index.ts +8 -0
- package/src/parser/postprocess/spanStrip/merge.ts +117 -0
- package/src/parser/postprocess/spanStrip/predicates.ts +59 -0
- package/src/parser/postprocess/spanStrip/split.ts +67 -0
- package/src/parser/preprocess/expr/chars.ts +15 -0
- package/src/parser/preprocess/expr/evaluate.ts +22 -0
- package/src/parser/preprocess/expr/index.ts +45 -0
- package/src/parser/preprocess/expr/kind.ts +19 -0
- package/src/parser/preprocess/expr/parse.ts +103 -0
- package/src/parser/preprocess/expr/scan.ts +34 -0
- package/src/parser/preprocess/expr/types.ts +14 -0
- package/src/parser/preprocess/typography.ts +70 -5
- package/src/parser/preprocess/utils/bracket-depths.ts +98 -0
- package/src/parser/preprocess/utils/index.ts +13 -0
- package/src/parser/preprocess/utils/raw-regions.ts +153 -0
- package/src/parser/preprocess/whitespace/detection.ts +39 -0
- package/src/parser/preprocess/whitespace/index.ts +79 -0
- package/src/parser/preprocess/whitespace/leading-spaces.ts +11 -0
- package/src/parser/preprocess/whitespace/patterns.ts +23 -0
- package/src/parser/rules/block/align/body.ts +46 -0
- package/src/parser/rules/block/align/element.ts +13 -0
- package/src/parser/rules/block/align/index.ts +90 -0
- package/src/parser/rules/block/align/syntax.ts +113 -0
- package/src/parser/rules/block/bibliography/body.ts +81 -0
- package/src/parser/rules/block/bibliography/entries.ts +49 -0
- package/src/parser/rules/block/bibliography/entry-content.ts +73 -0
- package/src/parser/rules/block/bibliography/entry-key.ts +83 -0
- package/src/parser/rules/block/bibliography/index.ts +90 -0
- package/src/parser/rules/block/bibliography/open.ts +53 -0
- package/src/parser/rules/block/block-list/bare-content.ts +105 -0
- package/src/parser/rules/block/block-list/bare-paragraph.ts +60 -0
- package/src/parser/rules/block/block-list/index.ts +51 -0
- package/src/parser/rules/block/block-list/item-content.ts +132 -0
- package/src/parser/rules/block/block-list/li-content.ts +107 -0
- package/src/parser/rules/block/block-list/li-item.ts +77 -0
- package/src/parser/rules/block/block-list/list-block.ts +100 -0
- package/src/parser/rules/block/block-list/open.ts +51 -0
- package/src/parser/rules/block/block-list/tags.ts +50 -0
- package/src/parser/rules/block/blockquote/build.ts +62 -0
- package/src/parser/rules/block/blockquote/index.ts +80 -0
- package/src/parser/rules/block/blockquote/line.ts +79 -0
- package/src/parser/rules/block/blockquote/lines.ts +39 -0
- package/src/parser/rules/block/{center.ts → center/index.ts} +7 -22
- package/src/parser/rules/block/center/open.ts +27 -0
- package/src/parser/rules/block/{clear-float.ts → clear-float/index.ts} +6 -30
- package/src/parser/rules/block/clear-float/syntax.ts +43 -0
- package/src/parser/rules/block/code/attributes.ts +30 -0
- package/src/parser/rules/block/code/content.ts +57 -0
- package/src/parser/rules/block/code/index.ts +100 -0
- package/src/parser/rules/block/collapsible/attributes.ts +95 -0
- package/src/parser/rules/block/collapsible/body.ts +69 -0
- package/src/parser/rules/block/collapsible/index.ts +117 -0
- package/src/parser/rules/block/collapsible/open.ts +51 -0
- package/src/parser/rules/block/collapsible/orphans.ts +31 -0
- package/src/parser/rules/block/collapsible/tags.ts +17 -0
- package/src/parser/rules/block/comment/consume.ts +37 -0
- package/src/parser/rules/block/{comment.ts → comment/index.ts} +12 -38
- package/src/parser/rules/block/{content-separator.ts → content-separator/index.ts} +5 -35
- package/src/parser/rules/block/content-separator/syntax.ts +33 -0
- package/src/parser/rules/block/definition-list/collect.ts +40 -0
- package/src/parser/rules/block/definition-list/index.ts +63 -0
- package/src/parser/rules/block/definition-list/item-key.ts +95 -0
- package/src/parser/rules/block/definition-list/item-value.ts +56 -0
- package/src/parser/rules/block/definition-list/items.ts +54 -0
- package/src/parser/rules/block/div/body.ts +41 -0
- package/src/parser/rules/block/div/close.ts +41 -0
- package/src/parser/rules/block/div/failed.ts +117 -0
- package/src/parser/rules/block/div/index.ts +112 -0
- package/src/parser/rules/block/div/nesting.ts +37 -0
- package/src/parser/rules/block/div/open.ts +59 -0
- package/src/parser/rules/block/div/paragraph-strip.ts +44 -0
- package/src/parser/rules/block/embed-block/content.ts +53 -0
- package/src/parser/rules/block/embed-block/index.ts +91 -0
- package/src/parser/rules/block/embed-block/open.ts +52 -0
- package/src/parser/rules/block/embed-block/tags.ts +5 -0
- package/src/parser/rules/block/footnoteblock/attributes.ts +73 -0
- package/src/parser/rules/block/footnoteblock/index.ts +82 -0
- package/src/parser/rules/block/footnoteblock/open.ts +53 -0
- package/src/parser/rules/block/heading/index.ts +87 -0
- package/src/parser/rules/block/heading/open.ts +50 -0
- package/src/parser/rules/block/heading/toc-text.ts +26 -0
- package/src/parser/rules/block/{horizontal-rule.ts → horizontal-rule/index.ts} +4 -21
- package/src/parser/rules/block/horizontal-rule/syntax.ts +21 -0
- package/src/parser/rules/block/html/body.ts +114 -0
- package/src/parser/rules/block/html/diagnostics.ts +11 -0
- package/src/parser/rules/block/html/index.ts +95 -0
- package/src/parser/rules/block/html/open.ts +36 -0
- package/src/parser/rules/block/iframe/attributes.ts +106 -0
- package/src/parser/rules/block/iframe/index.ts +73 -0
- package/src/parser/rules/block/iframe/open.ts +58 -0
- package/src/parser/rules/block/iframe/source.ts +24 -0
- package/src/parser/rules/block/iframe/url.ts +38 -0
- package/src/parser/rules/block/iftags/body.ts +48 -0
- package/src/parser/rules/block/iftags/condition.ts +24 -0
- package/src/parser/rules/block/{iftags.ts → iftags/index.ts} +16 -58
- package/src/parser/rules/block/include/arguments.ts +48 -0
- package/src/parser/rules/block/include/index.ts +75 -0
- package/src/parser/rules/block/include/location.ts +24 -0
- package/src/parser/rules/block/include/variables.ts +37 -0
- package/src/parser/rules/block/list/index.ts +73 -0
- package/src/parser/rules/block/list/line.ts +77 -0
- package/src/parser/rules/block/list/native.ts +89 -0
- package/src/parser/rules/block/math/content.ts +54 -0
- package/src/parser/rules/block/math/index.ts +106 -0
- package/src/parser/rules/block/math/name.ts +35 -0
- package/src/parser/rules/block/module/body.ts +92 -0
- package/src/parser/rules/block/module/element.ts +33 -0
- package/src/parser/rules/block/module/include/directive.ts +91 -0
- package/src/parser/rules/block/module/include/index.ts +11 -2
- package/src/parser/rules/block/module/include/references.ts +42 -0
- package/src/parser/rules/block/module/include/resolve/cache.ts +44 -0
- package/src/parser/rules/block/module/include/resolve/index.ts +106 -0
- package/src/parser/rules/block/module/include/resolve/iterate.ts +202 -0
- package/src/parser/rules/block/module/include/resolve/replace.ts +31 -0
- package/src/parser/rules/block/module/include/resolve/types.ts +105 -0
- package/src/parser/rules/block/module/include/scanner.ts +121 -0
- package/src/parser/rules/block/module/index.ts +14 -2
- package/src/parser/rules/block/module/listpages/compiler.ts +12 -392
- package/src/parser/rules/block/module/listpages/extract.ts +25 -359
- package/src/parser/rules/block/module/listpages/extraction/listpages.ts +42 -0
- package/src/parser/rules/block/module/listpages/extraction/listusers.ts +30 -0
- package/src/parser/rules/block/module/listpages/extraction/query.ts +51 -0
- package/src/parser/rules/block/module/listpages/extraction/result.ts +18 -0
- package/src/parser/rules/block/module/listpages/extraction/template.ts +96 -0
- package/src/parser/rules/block/module/listpages/extraction/variables.ts +58 -0
- package/src/parser/rules/block/module/listpages/normalization/date-selector.ts +53 -0
- package/src/parser/rules/block/module/listpages/normalization/numeric-selector.ts +32 -0
- package/src/parser/rules/block/module/listpages/normalization/order-parent.ts +82 -0
- package/src/parser/rules/block/module/listpages/normalization/selectors.ts +2 -0
- package/src/parser/rules/block/module/listpages/normalization/tags-category.ts +86 -0
- package/src/parser/rules/block/module/listpages/normalize.ts +8 -324
- package/src/parser/rules/block/module/listpages/resolution/items.ts +43 -0
- package/src/parser/rules/block/module/listpages/resolution/wrapper.ts +42 -0
- package/src/parser/rules/block/module/listpages/resolve.ts +5 -75
- package/src/parser/rules/block/module/listpages/template/format/content.ts +41 -0
- package/src/parser/rules/block/module/listpages/template/format/date.ts +116 -0
- package/src/parser/rules/block/module/listpages/template/format/index.ts +4 -0
- package/src/parser/rules/block/module/listpages/template/format/tags.ts +7 -0
- package/src/parser/rules/block/module/listpages/template/format/user.ts +9 -0
- package/src/parser/rules/block/module/listpages/template/getters/index.ts +36 -0
- package/src/parser/rules/block/module/listpages/template/getters/parameterized.ts +60 -0
- package/src/parser/rules/block/module/listpages/template/getters/simple.ts +65 -0
- package/src/parser/rules/block/module/listpages/template/getters/types.ts +3 -0
- package/src/parser/rules/block/module/listpages/template/syntax.ts +97 -0
- package/src/parser/rules/block/module/listpages/types/data-fetcher.ts +15 -0
- package/src/parser/rules/block/module/listpages/types/data-requirements.ts +52 -0
- package/src/parser/rules/block/module/listpages/types/external-data.ts +77 -0
- package/src/parser/rules/block/module/listpages/types/index.ts +17 -0
- package/src/parser/rules/block/module/listpages/types/normalized-query.ts +120 -0
- package/src/parser/rules/block/module/listpages/types/query.ts +67 -0
- package/src/parser/rules/block/module/listpages/types/template.ts +17 -0
- package/src/parser/rules/block/module/listpages/types/variables.ts +69 -0
- package/src/parser/rules/block/module/listpages/url-resolution/fields.ts +48 -0
- package/src/parser/rules/block/module/listpages/url-resolution/params.ts +30 -0
- package/src/parser/rules/block/module/listpages/url-resolution/query.ts +24 -0
- package/src/parser/rules/block/module/listpages/url-resolution/resolve.ts +62 -0
- package/src/parser/rules/block/module/listpages/url-resolution/value.ts +34 -0
- package/src/parser/rules/block/module/listpages/url-resolver.ts +3 -160
- package/src/parser/rules/block/module/listusers/compiler.ts +4 -25
- package/src/parser/rules/block/module/listusers/extract.ts +4 -9
- package/src/parser/rules/block/module/listusers/getters.ts +21 -0
- package/src/parser/rules/block/module/listusers/variables.ts +15 -0
- package/src/parser/rules/block/module/open.ts +57 -0
- package/src/parser/rules/block/module/resolution/contexts.ts +78 -0
- package/src/parser/rules/block/module/resolution/data-maps.ts +39 -0
- package/src/parser/rules/block/module/resolution/dynamic-modules.ts +93 -0
- package/src/parser/rules/block/module/resolution/styles.ts +53 -0
- package/src/parser/rules/block/module/resolution/walk-resolve.ts +107 -0
- package/src/parser/rules/block/module/resolve.ts +79 -292
- package/src/parser/rules/block/module/rule.ts +56 -0
- package/src/parser/rules/block/module/types-common.ts +11 -0
- package/src/parser/rules/block/module/walk/children.ts +35 -0
- package/src/parser/rules/block/module/walk/index.ts +9 -0
- package/src/parser/rules/block/module/walk/map/index.ts +2 -0
- package/src/parser/rules/block/module/walk/map/stateful-definition-list.ts +25 -0
- package/src/parser/rules/block/module/walk/map/stateful-list.ts +40 -0
- package/src/parser/rules/block/module/walk/map/stateful-table.ts +23 -0
- package/src/parser/rules/block/module/walk/map/stateful-tabs.ts +19 -0
- package/src/parser/rules/block/module/walk/map/stateful.ts +71 -0
- package/src/parser/rules/block/module/walk/map/stateless-definition-list.ts +12 -0
- package/src/parser/rules/block/module/walk/map/stateless-list.ts +29 -0
- package/src/parser/rules/block/module/walk/map/stateless-table.ts +11 -0
- package/src/parser/rules/block/module/walk/map/stateless-tabs.ts +5 -0
- package/src/parser/rules/block/module/walk/map/stateless.ts +51 -0
- package/src/parser/rules/block/module/walk/map/types.ts +6 -0
- package/src/parser/rules/block/module/walk/traverse.ts +65 -0
- package/src/parser/rules/block/orphan-li/content.ts +60 -0
- package/src/parser/rules/block/orphan-li/index.ts +75 -0
- package/src/parser/rules/block/orphan-li/open.ts +25 -0
- package/src/parser/rules/block/orphan-li/tags.ts +40 -0
- package/src/parser/rules/block/paragraph/content.ts +12 -0
- package/src/parser/rules/block/paragraph/index.ts +60 -0
- package/src/parser/rules/block/paragraph/normalize.ts +52 -0
- package/src/parser/rules/block/paragraph/span-markers.ts +52 -0
- package/src/parser/rules/block/parsing/attributes/index.ts +32 -0
- package/src/parser/rules/block/parsing/attributes/names.ts +93 -0
- package/src/parser/rules/block/parsing/attributes/scanner.ts +75 -0
- package/src/parser/rules/block/parsing/attributes/values.ts +26 -0
- package/src/parser/rules/block/parsing/block-item.ts +29 -0
- package/src/parser/rules/block/parsing/content.ts +127 -0
- package/src/parser/rules/block/parsing/end-condition.ts +51 -0
- package/src/parser/rules/block/parsing/inline-content.ts +105 -0
- package/src/parser/rules/block/parsing/inline-newline.ts +41 -0
- package/src/parser/rules/block/parsing/non-boundary.ts +24 -0
- package/src/parser/rules/block/parsing/rule-dispatch.ts +44 -0
- package/src/parser/rules/block/table/index.ts +80 -0
- package/src/parser/rules/block/table/pipe/cell-start.ts +69 -0
- package/src/parser/rules/block/table/pipe/cell.ts +106 -0
- package/src/parser/rules/block/table/pipe/index.ts +2 -0
- package/src/parser/rules/block/table/pipe/row.ts +88 -0
- package/src/parser/rules/block/table/pipe/tokens.ts +14 -0
- package/src/parser/rules/block/table/pipe/trim.ts +50 -0
- package/src/parser/rules/block/table-block/body.ts +79 -0
- package/src/parser/rules/block/table-block/cell-attributes.ts +33 -0
- package/src/parser/rules/block/table-block/cell-boundary.ts +99 -0
- package/src/parser/rules/block/table-block/cell-content/index.ts +88 -0
- package/src/parser/rules/block/table-block/cell-content/segments.ts +134 -0
- package/src/parser/rules/block/table-block/cell-newline.ts +47 -0
- package/src/parser/rules/block/table-block/cell.ts +64 -0
- package/src/parser/rules/block/table-block/index.ts +113 -0
- package/src/parser/rules/block/table-block/row-boundary.ts +75 -0
- package/src/parser/rules/block/table-block/structure.ts +80 -0
- package/src/parser/rules/block/tabview/body.ts +64 -0
- package/src/parser/rules/block/tabview/index.ts +90 -0
- package/src/parser/rules/block/tabview/open.ts +50 -0
- package/src/parser/rules/block/tabview/tab.ts +92 -0
- package/src/parser/rules/block/tabview/tags.ts +30 -0
- package/src/parser/rules/block/toc/element.ts +11 -0
- package/src/parser/rules/block/toc/index.ts +44 -0
- package/src/parser/rules/block/toc/open.ts +84 -0
- package/src/parser/rules/block/utils.ts +10 -610
- package/src/parser/rules/{utils.ts → common/attribute-safety.ts} +3 -49
- package/src/parser/rules/common/block-name.ts +33 -0
- package/src/parser/rules/common/index.ts +2 -0
- package/src/parser/rules/contracts/index.ts +3 -0
- package/src/parser/rules/contracts/parse-context.ts +38 -0
- package/src/parser/rules/contracts/rule.ts +43 -0
- package/src/parser/rules/contracts/scope.ts +31 -0
- package/src/parser/rules/inline/anchor/attributes.ts +54 -0
- package/src/parser/rules/inline/anchor/child.ts +26 -0
- package/src/parser/rules/inline/anchor/close.ts +34 -0
- package/src/parser/rules/inline/anchor/content.ts +59 -0
- package/src/parser/rules/inline/anchor/index.ts +103 -0
- package/src/parser/rules/inline/anchor/newline.ts +26 -0
- package/src/parser/rules/inline/anchor/open.ts +47 -0
- package/src/parser/rules/inline/anchor/paragraph-strip.ts +14 -0
- package/src/parser/rules/inline/anchor/syntax.ts +40 -0
- package/src/parser/rules/inline/anchor-name/index.ts +38 -0
- package/src/parser/rules/inline/anchor-name/name.ts +39 -0
- package/src/parser/rules/inline/anchor-name/syntax.ts +46 -0
- package/src/parser/rules/inline/bibcite/element.ts +14 -0
- package/src/parser/rules/inline/bibcite/index.ts +34 -0
- package/src/parser/rules/inline/bibcite/syntax.ts +64 -0
- package/src/parser/rules/inline/bold.ts +2 -39
- package/src/parser/rules/inline/color/index.ts +35 -0
- package/src/parser/rules/inline/color/syntax.ts +69 -0
- package/src/parser/rules/inline/comment/consume.ts +31 -0
- package/src/parser/rules/inline/{comment.ts → comment/index.ts} +10 -36
- package/src/parser/rules/inline/equation-ref/element.ts +8 -0
- package/src/parser/rules/inline/equation-ref/index.ts +34 -0
- package/src/parser/rules/inline/equation-ref/syntax.ts +45 -0
- package/src/parser/rules/inline/expr/branch.ts +104 -0
- package/src/parser/rules/inline/expr/conditional-branch.ts +27 -0
- package/src/parser/rules/inline/expr/conditional.ts +80 -0
- package/src/parser/rules/inline/expr/depth.ts +25 -0
- package/src/parser/rules/inline/expr/elements.ts +39 -0
- package/src/parser/rules/inline/expr/index.ts +84 -0
- package/src/parser/rules/inline/expr/syntax.ts +45 -0
- package/src/parser/rules/inline/footnote/child.ts +22 -0
- package/src/parser/rules/inline/footnote/close.ts +33 -0
- package/src/parser/rules/inline/footnote/content.ts +54 -0
- package/src/parser/rules/inline/footnote/elements.ts +38 -0
- package/src/parser/rules/inline/footnote/index.ts +54 -0
- package/src/parser/rules/inline/footnote/newline.ts +27 -0
- package/src/parser/rules/inline/footnote/open.ts +38 -0
- package/src/parser/rules/inline/formatting/container.ts +50 -0
- package/src/parser/rules/inline/{guillemet.ts → guillemet/index.ts} +5 -13
- package/src/parser/rules/inline/guillemet/text.ts +11 -0
- package/src/parser/rules/inline/html/gate.ts +64 -0
- package/src/parser/rules/inline/{html.ts → html/index.ts} +9 -60
- package/src/parser/rules/inline/html/open.ts +37 -0
- package/src/parser/rules/inline/image/attributes.ts +22 -0
- package/src/parser/rules/inline/image/body.ts +36 -0
- package/src/parser/rules/inline/image/index.ts +89 -0
- package/src/parser/rules/inline/image/open.ts +56 -0
- package/src/parser/rules/inline/image/source.ts +62 -0
- package/src/parser/rules/inline/image/syntax.ts +76 -0
- package/src/parser/rules/inline/italic.ts +2 -30
- package/src/parser/rules/inline/line-break/backslash.ts +58 -0
- package/src/parser/rules/inline/line-break/elements.ts +9 -0
- package/src/parser/rules/inline/line-break/index.ts +3 -0
- package/src/parser/rules/inline/line-break/newline.ts +82 -0
- package/src/parser/rules/inline/line-break/underscore.ts +45 -0
- package/src/parser/rules/inline/link-anchor.ts +6 -81
- package/src/parser/rules/inline/link-bracket/anchor.ts +3 -0
- package/src/parser/rules/inline/link-bracket/direct-url.ts +5 -0
- package/src/parser/rules/inline/link-bracket/parsed.ts +81 -0
- package/src/parser/rules/inline/link-bracket/parts.ts +64 -0
- package/src/parser/rules/inline/link-bracket/prefix.ts +15 -0
- package/src/parser/rules/inline/link-single.ts +7 -98
- package/src/parser/rules/inline/link-star.ts +7 -69
- package/src/parser/rules/inline/link-triple/fallback.ts +10 -0
- package/src/parser/rules/inline/link-triple/index.ts +62 -0
- package/src/parser/rules/inline/link-triple/interwiki.ts +11 -0
- package/src/parser/rules/inline/link-triple/label.ts +35 -0
- package/src/parser/rules/inline/link-triple/syntax.ts +72 -0
- package/src/parser/rules/inline/link-triple/target.ts +36 -0
- package/src/parser/rules/inline/math-inline/index.ts +40 -0
- package/src/parser/rules/inline/math-inline/syntax.ts +55 -0
- package/src/parser/rules/inline/monospace.ts +2 -30
- package/src/parser/rules/inline/parsing/block-boundary.ts +42 -0
- package/src/parser/rules/inline/parsing/block-start-predicates.ts +117 -0
- package/src/parser/rules/inline/parsing/collect.ts +23 -0
- package/src/parser/rules/inline/parsing/inline-content.ts +115 -0
- package/src/parser/rules/inline/parsing/paragraph-boundary.ts +47 -0
- package/src/parser/rules/inline/parsing/plain-text.ts +69 -0
- package/src/parser/rules/inline/parsing/preserved-line-break.ts +11 -0
- package/src/parser/rules/inline/parsing/rules.ts +34 -0
- package/src/parser/rules/inline/parsing/simple-token.ts +26 -0
- package/src/parser/rules/inline/raw/angle.ts +40 -0
- package/src/parser/rules/inline/raw/double-at.ts +78 -0
- package/src/parser/rules/inline/raw/index.ts +26 -0
- package/src/parser/rules/inline/raw/result.ts +26 -0
- package/src/parser/rules/inline/size/content.ts +65 -0
- package/src/parser/rules/inline/size/index.ts +55 -0
- package/src/parser/rules/inline/size/open.ts +43 -0
- package/src/parser/rules/inline/size/value.ts +45 -0
- package/src/parser/rules/inline/span/content.ts +97 -0
- package/src/parser/rules/inline/span/elements.ts +108 -0
- package/src/parser/rules/inline/span/index.ts +79 -0
- package/src/parser/rules/inline/span/newline.ts +50 -0
- package/src/parser/rules/inline/span/syntax.ts +70 -0
- package/src/parser/rules/inline/{strikethrough.ts → strikethrough/index.ts} +5 -60
- package/src/parser/rules/inline/strikethrough/parse.ts +14 -0
- package/src/parser/rules/inline/strikethrough/syntax.ts +24 -0
- package/src/parser/rules/inline/subscript.ts +2 -39
- package/src/parser/rules/inline/superscript.ts +4 -39
- package/src/parser/rules/inline/text/element.ts +5 -0
- package/src/parser/rules/inline/{text.ts → text/index.ts} +5 -4
- package/src/parser/rules/inline/underline/child.ts +26 -0
- package/src/parser/rules/inline/underline/content.ts +29 -0
- package/src/parser/rules/inline/{underline.ts → underline/index.ts} +6 -49
- package/src/parser/rules/inline/user/element.ts +11 -0
- package/src/parser/rules/inline/user/index.ts +34 -0
- package/src/parser/rules/inline/user/syntax.ts +67 -0
- package/src/parser/rules/inline/utils.ts +4 -344
- package/src/parser/rules/tokens.ts +106 -0
- package/src/parser/rules/types.ts +9 -252
- package/src/parser/depth.ts +0 -251
- package/src/parser/parse.ts +0 -315
- package/src/parser/postprocess/spanStrip.ts +0 -697
- package/src/parser/preprocess/expr.ts +0 -265
- package/src/parser/preprocess/utils.ts +0 -250
- package/src/parser/preprocess/whitespace.ts +0 -111
- package/src/parser/rules/block/align.ts +0 -282
- package/src/parser/rules/block/bibliography.ts +0 -359
- package/src/parser/rules/block/block-list.ts +0 -689
- package/src/parser/rules/block/blockquote.ts +0 -238
- package/src/parser/rules/block/code.ts +0 -187
- package/src/parser/rules/block/collapsible.ts +0 -337
- package/src/parser/rules/block/definition-list.ts +0 -270
- package/src/parser/rules/block/div.ts +0 -400
- package/src/parser/rules/block/embed-block.ts +0 -153
- package/src/parser/rules/block/footnoteblock.ts +0 -200
- package/src/parser/rules/block/heading.ts +0 -142
- package/src/parser/rules/block/html.ts +0 -222
- package/src/parser/rules/block/iframe.ts +0 -239
- package/src/parser/rules/block/include.ts +0 -179
- package/src/parser/rules/block/list.ts +0 -244
- package/src/parser/rules/block/math.ts +0 -183
- package/src/parser/rules/block/module/include/resolve.ts +0 -556
- package/src/parser/rules/block/module/listpages/types.ts +0 -513
- package/src/parser/rules/block/module/walk.ts +0 -380
- package/src/parser/rules/block/module.ts +0 -164
- package/src/parser/rules/block/orphan-li.ts +0 -177
- package/src/parser/rules/block/paragraph.ts +0 -157
- package/src/parser/rules/block/table-block.ts +0 -726
- package/src/parser/rules/block/table.ts +0 -441
- package/src/parser/rules/block/tabview.ts +0 -331
- package/src/parser/rules/block/toc.ts +0 -129
- package/src/parser/rules/inline/anchor-name.ts +0 -154
- package/src/parser/rules/inline/anchor.ts +0 -327
- package/src/parser/rules/inline/bibcite.ts +0 -153
- package/src/parser/rules/inline/color.ts +0 -140
- package/src/parser/rules/inline/equation-ref.ts +0 -115
- package/src/parser/rules/inline/expr.ts +0 -526
- package/src/parser/rules/inline/footnote.ts +0 -223
- package/src/parser/rules/inline/image.ts +0 -328
- package/src/parser/rules/inline/line-break.ts +0 -326
- package/src/parser/rules/inline/link-triple.ts +0 -267
- package/src/parser/rules/inline/math-inline.ts +0 -126
- package/src/parser/rules/inline/raw.ts +0 -262
- package/src/parser/rules/inline/size.ts +0 -244
- package/src/parser/rules/inline/span.ts +0 -424
- package/src/parser/rules/inline/user.ts +0 -147
|
@@ -1,265 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
*
|
|
3
|
-
* Text-level expansion of `[[#if ...]]`, `[[#ifexpr ...]]`, and
|
|
4
|
-
* `[[#expr ...]]` directives that sit *inside* another block's opener.
|
|
5
|
-
*
|
|
6
|
-
* The inline rules in `rules/inline/expr.ts` parse these forms as regular
|
|
7
|
-
* inline elements, but that only works when the directive appears in
|
|
8
|
-
* parseable inline text. When one is embedded inside a block opener's
|
|
9
|
-
* attribute string, e.g.
|
|
10
|
-
*
|
|
11
|
-
* ```wikitext
|
|
12
|
-
* [[div class="x [[#if 1 | a | b ]]"]]
|
|
13
|
-
* [[li class="[[#if 1 | folded | unfolded ]] [[#ifexpr 1>0 | hot | cold ]]"]]
|
|
14
|
-
* [[div col="[[#expr 1+1]]"]]
|
|
15
|
-
* ```
|
|
16
|
-
*
|
|
17
|
-
* the lexer cannot recover a well-formed opener from the input. The
|
|
18
|
-
* embedded directive has to collapse to a plain string before the parser
|
|
19
|
-
* sees the outer tag.
|
|
20
|
-
*
|
|
21
|
-
* This pass only resolves directives whose `[[#` sits inside an unclosed
|
|
22
|
-
* `[[` (depth > 0). Top-level directives are left untouched so the inline
|
|
23
|
-
* parser / AST renderer keeps its full evaluator + element support.
|
|
24
|
-
*
|
|
25
|
-
* Truthiness rules match the inline `ifRule` / `ifExprRule`: an empty
|
|
26
|
-
* string, `"0"`, `"false"`, `"null"` (case-insensitive) are falsy.
|
|
27
|
-
*
|
|
28
|
-
* @module
|
|
29
|
-
*/
|
|
30
|
-
|
|
31
|
-
import { evaluateExpression, formatExprValue, isTruthy } from "@wdprlib/ast";
|
|
32
|
-
import {
|
|
33
|
-
computeBracketDepths,
|
|
34
|
-
makeUniqueSentinels,
|
|
35
|
-
maskRawRegions,
|
|
36
|
-
restorePlaceholders,
|
|
37
|
-
} from "./utils";
|
|
38
|
-
|
|
39
|
-
/**
|
|
40
|
-
* Resolve every `[[#if]]` / `[[#ifexpr]]` / `[[#expr]]` that sits inside
|
|
41
|
-
* another block's opener (depth > 0). Top-level directives are left for
|
|
42
|
-
* the inline parser. Innermost-first reduction lets an outer directive
|
|
43
|
-
* re-process the flattened body on the next pass. Unmatched / malformed
|
|
44
|
-
* directives are left untouched.
|
|
45
|
-
*/
|
|
46
|
-
export function preprocessExpr(source: string): string {
|
|
47
|
-
if (!source.includes("[[#")) return source;
|
|
48
|
-
|
|
49
|
-
const sentinels = makeUniqueSentinels(source);
|
|
50
|
-
const { masked, placeholders } = maskRawRegions(source, sentinels);
|
|
51
|
-
const reduced = reduceExpr(masked);
|
|
52
|
-
return restorePlaceholders(reduced, placeholders, sentinels);
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
/**
|
|
56
|
-
* Backwards-compatible alias for the older `preprocessIf` name (used by
|
|
57
|
-
* external callers that target the previous, `[[#if]]`-only behaviour).
|
|
58
|
-
* Both names point at the same implementation, which now also resolves
|
|
59
|
-
* `[[#ifexpr]]` and `[[#expr]]` in opener context.
|
|
60
|
-
*/
|
|
61
|
-
export const preprocessIf: (source: string) => string = preprocessExpr;
|
|
62
|
-
|
|
63
|
-
function reduceExpr(source: string): string {
|
|
64
|
-
let current = source;
|
|
65
|
-
const maxIterations = source.length + 1;
|
|
66
|
-
for (let i = 0; i < maxIterations; i++) {
|
|
67
|
-
const next = expandInnermost(current);
|
|
68
|
-
if (next === current) return current;
|
|
69
|
-
current = next;
|
|
70
|
-
}
|
|
71
|
-
return current;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
/**
|
|
75
|
-
* Walk `source`, locate every innermost `[[#if]]` / `[[#ifexpr]]` /
|
|
76
|
-
* `[[#expr]]` directive that sits inside an unclosed `[[`, and replace
|
|
77
|
-
* it with its evaluated string. Returns the source unchanged when no
|
|
78
|
-
* replacements were made.
|
|
79
|
-
*/
|
|
80
|
-
function expandInnermost(source: string): string {
|
|
81
|
-
const depths = computeBracketDepths(source);
|
|
82
|
-
let result = "";
|
|
83
|
-
let i = 0;
|
|
84
|
-
let replaced = false;
|
|
85
|
-
|
|
86
|
-
while (i < source.length) {
|
|
87
|
-
const kind = matchDirectiveKind(source, i);
|
|
88
|
-
if (kind !== null && depths[i]! > 0) {
|
|
89
|
-
const match = tryParseInnermostDirective(source, i, kind);
|
|
90
|
-
if (match !== null) {
|
|
91
|
-
result += evaluateDirective(kind, match);
|
|
92
|
-
i = match.end;
|
|
93
|
-
replaced = true;
|
|
94
|
-
continue;
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
result += source[i];
|
|
98
|
-
i++;
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
return replaced ? result : source;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
type DirectiveKind = "if" | "ifexpr" | "expr";
|
|
105
|
-
|
|
106
|
-
/** Return the kind of `[[#xxx` directive at `i`, or null if none matches. */
|
|
107
|
-
function matchDirectiveKind(source: string, i: number): DirectiveKind | null {
|
|
108
|
-
if (!source.startsWith("[[#", i)) return null;
|
|
109
|
-
// Order matters: `ifexpr` must be checked before `if` because the
|
|
110
|
-
// shorter `if` prefix would otherwise consume `ifexpr` openings.
|
|
111
|
-
if (source.startsWith("ifexpr", i + 3) && !isIdentChar(source[i + 9])) {
|
|
112
|
-
return "ifexpr";
|
|
113
|
-
}
|
|
114
|
-
if (source.startsWith("if", i + 3) && !isIdentChar(source[i + 5])) {
|
|
115
|
-
return "if";
|
|
116
|
-
}
|
|
117
|
-
if (source.startsWith("expr", i + 3) && !isIdentChar(source[i + 7])) {
|
|
118
|
-
return "expr";
|
|
119
|
-
}
|
|
120
|
-
return null;
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
interface DirectiveMatch {
|
|
124
|
-
/** Position just past the closing `]]`. */
|
|
125
|
-
end: number;
|
|
126
|
-
/** Raw condition / expression (everything between the keyword and the first top-level `|` or `]]`). */
|
|
127
|
-
head: string;
|
|
128
|
-
/** Raw `then` branch (empty when no `|` appeared). */
|
|
129
|
-
thenText: string;
|
|
130
|
-
/** Raw `else` branch (empty when only one `|` appeared). */
|
|
131
|
-
elseText: string;
|
|
132
|
-
/** Whether the directive supplied a `|` at all. */
|
|
133
|
-
hasPipe: boolean;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
/**
|
|
137
|
-
* Try to parse a single `[[#kind ...]]` directive starting at `start`.
|
|
138
|
-
* Returns `null` when the directive is malformed (no closing `]]`) or
|
|
139
|
-
* when its body contains another `[[#kind]]` of the same family
|
|
140
|
-
* (so the caller should keep descending). The substrings are returned
|
|
141
|
-
* raw; callers decide how to evaluate them.
|
|
142
|
-
*/
|
|
143
|
-
function tryParseInnermostDirective(
|
|
144
|
-
source: string,
|
|
145
|
-
start: number,
|
|
146
|
-
kind: DirectiveKind,
|
|
147
|
-
): DirectiveMatch | null {
|
|
148
|
-
const keywordLen = kind === "ifexpr" ? 6 : kind === "expr" ? 4 : 2;
|
|
149
|
-
// start + 3 ("[[#") + keywordLen → first char after the keyword.
|
|
150
|
-
let pos = start + 3 + keywordLen;
|
|
151
|
-
// The inline rule does not require a whitespace separator here — it
|
|
152
|
-
// accepts e.g. `[[#expr(1+1)]]` and `[[#ifexpr(1)|yes|no]]`. Skip any
|
|
153
|
-
// optional leading whitespace and let the body scan handle the rest.
|
|
154
|
-
while (pos < source.length && isWhitespace(source[pos])) pos++;
|
|
155
|
-
|
|
156
|
-
const headStart = pos;
|
|
157
|
-
let blockDepth = 0;
|
|
158
|
-
let linkDepth = 0;
|
|
159
|
-
const pipes: number[] = [];
|
|
160
|
-
let closeStart = -1;
|
|
161
|
-
|
|
162
|
-
while (pos < source.length) {
|
|
163
|
-
// Reject any nested directive of the same family so we resolve
|
|
164
|
-
// innermost-first.
|
|
165
|
-
if (matchDirectiveKind(source, pos) !== null) {
|
|
166
|
-
return null;
|
|
167
|
-
}
|
|
168
|
-
if (source.startsWith("[[[", pos)) {
|
|
169
|
-
linkDepth++;
|
|
170
|
-
pos += 3;
|
|
171
|
-
continue;
|
|
172
|
-
}
|
|
173
|
-
if (linkDepth > 0 && source.startsWith("]]]", pos)) {
|
|
174
|
-
linkDepth--;
|
|
175
|
-
pos += 3;
|
|
176
|
-
continue;
|
|
177
|
-
}
|
|
178
|
-
if (linkDepth > 0) {
|
|
179
|
-
pos++;
|
|
180
|
-
continue;
|
|
181
|
-
}
|
|
182
|
-
if (source.startsWith("[[", pos)) {
|
|
183
|
-
blockDepth++;
|
|
184
|
-
pos += 2;
|
|
185
|
-
continue;
|
|
186
|
-
}
|
|
187
|
-
if (source.startsWith("]]", pos)) {
|
|
188
|
-
if (blockDepth === 0) {
|
|
189
|
-
closeStart = pos;
|
|
190
|
-
break;
|
|
191
|
-
}
|
|
192
|
-
blockDepth--;
|
|
193
|
-
pos += 2;
|
|
194
|
-
continue;
|
|
195
|
-
}
|
|
196
|
-
if (source[pos] === "|" && blockDepth === 0 && linkDepth === 0) {
|
|
197
|
-
pipes.push(pos);
|
|
198
|
-
}
|
|
199
|
-
pos++;
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
if (closeStart === -1) return null;
|
|
203
|
-
const hasPipe = pipes.length > 0;
|
|
204
|
-
// `[[#if]]` / `[[#ifexpr]]` require a `then` branch separated by `|`.
|
|
205
|
-
// A directive without a pipe is malformed; leave it for the inline
|
|
206
|
-
// parser to report rather than silently dropping it.
|
|
207
|
-
if (!hasPipe && (kind === "if" || kind === "ifexpr")) return null;
|
|
208
|
-
|
|
209
|
-
let head: string;
|
|
210
|
-
let thenText = "";
|
|
211
|
-
let elseText = "";
|
|
212
|
-
|
|
213
|
-
if (!hasPipe) {
|
|
214
|
-
head = source.slice(headStart, closeStart).trim();
|
|
215
|
-
} else {
|
|
216
|
-
head = source.slice(headStart, pipes[0]!).trim();
|
|
217
|
-
if (pipes.length >= 2) {
|
|
218
|
-
thenText = source.slice(pipes[0]! + 1, pipes[1]!).trim();
|
|
219
|
-
elseText = source.slice(pipes[1]! + 1, closeStart).trim();
|
|
220
|
-
} else {
|
|
221
|
-
thenText = source.slice(pipes[0]! + 1, closeStart).trim();
|
|
222
|
-
}
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
return {
|
|
226
|
-
end: closeStart + 2,
|
|
227
|
-
head,
|
|
228
|
-
thenText,
|
|
229
|
-
elseText,
|
|
230
|
-
hasPipe,
|
|
231
|
-
};
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
/** Evaluate a parsed directive into its replacement string. */
|
|
235
|
-
function evaluateDirective(kind: DirectiveKind, m: DirectiveMatch): string {
|
|
236
|
-
if (kind === "expr") {
|
|
237
|
-
const result = evaluateExpression(m.head);
|
|
238
|
-
if (result.success) return formatExprValue(result.value);
|
|
239
|
-
// The inline renderer emits nothing for an empty `[[#expr ]]`; mirror
|
|
240
|
-
// that so an opener-embedded empty expr collapses to an empty
|
|
241
|
-
// attribute value rather than the literal "ERROR" placeholder.
|
|
242
|
-
if (result.error === "empty expression") return "";
|
|
243
|
-
return "ERROR";
|
|
244
|
-
}
|
|
245
|
-
if (kind === "if") {
|
|
246
|
-
if (!m.hasPipe) return "";
|
|
247
|
-
return isTruthy(m.head) ? m.thenText : m.elseText;
|
|
248
|
-
}
|
|
249
|
-
// ifexpr — the inline renderer treats every error (including empty
|
|
250
|
-
// expression) as a "run-time error" string, so we keep the placeholder
|
|
251
|
-
// here to avoid silently swallowing a malformed conditional.
|
|
252
|
-
if (!m.hasPipe) return "";
|
|
253
|
-
const result = evaluateExpression(m.head);
|
|
254
|
-
if (!result.success) return "ERROR";
|
|
255
|
-
return result.value !== 0 && !Number.isNaN(result.value) ? m.thenText : m.elseText;
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
function isWhitespace(ch: string | undefined): boolean {
|
|
259
|
-
return ch === " " || ch === "\t" || ch === "\n" || ch === "\r";
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
function isIdentChar(ch: string | undefined): boolean {
|
|
263
|
-
if (!ch) return false;
|
|
264
|
-
return /[a-z0-9_-]/i.test(ch);
|
|
265
|
-
}
|
|
@@ -1,250 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
*
|
|
3
|
-
* Shared helpers for text-level preprocess passes that run before
|
|
4
|
-
* tokenization (e.g. `[[iftags]]` collapse, opener-embedded `[[#if]]`
|
|
5
|
-
* collapse).
|
|
6
|
-
*
|
|
7
|
-
* Each pass needs to:
|
|
8
|
-
* - mask raw regions (`[[code]]`, `[[html]]`, `@@..@@`, `@<..>@`) so a
|
|
9
|
-
* pattern they enclose is not transformed
|
|
10
|
-
* - know the bracket-opener depth at every offset so it can distinguish
|
|
11
|
-
* directives at the top level from ones nested inside another block's
|
|
12
|
-
* opener attribute string
|
|
13
|
-
*
|
|
14
|
-
* The depth tracking mirrors the lexer's `blockOpenerDepth`:
|
|
15
|
-
* - `[[` increments, `]]` decrements (clamped at 0)
|
|
16
|
-
* - `[[[ ... ]]]` triple links do not affect block depth
|
|
17
|
-
* - quoted attribute values (`= "..."`) are skipped to the next `"` /
|
|
18
|
-
* newline, matching the lexer's `QUOTED_STRING` recognition
|
|
19
|
-
* - newlines reset depth to 0 (block openers are single-line constructs)
|
|
20
|
-
*
|
|
21
|
-
* @module
|
|
22
|
-
*/
|
|
23
|
-
|
|
24
|
-
const BASE_PLACEHOLDER_OPEN = "\uE000";
|
|
25
|
-
const BASE_PLACEHOLDER_CLOSE = "\uE001";
|
|
26
|
-
|
|
27
|
-
const RAW_BLOCK_OPEN_PATTERN = /\[\[\s*(code|html)\b[^\]]*\]\]/iy;
|
|
28
|
-
|
|
29
|
-
/** Unique sentinel characters used to wrap raw-region placeholders. */
|
|
30
|
-
export interface Sentinels {
|
|
31
|
-
open: string;
|
|
32
|
-
close: string;
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
/**
|
|
36
|
-
* Choose sentinel strings that are guaranteed not to appear in `source`.
|
|
37
|
-
* The placeholders we splice into the masked source have the form
|
|
38
|
-
* `<open><digits><close>`, so the restore pass must not confuse them
|
|
39
|
-
* with content. Extends both sentinel characters until neither appears.
|
|
40
|
-
*/
|
|
41
|
-
export function makeUniqueSentinels(source: string): Sentinels {
|
|
42
|
-
let open = BASE_PLACEHOLDER_OPEN;
|
|
43
|
-
let close = BASE_PLACEHOLDER_CLOSE;
|
|
44
|
-
while (source.includes(open) || source.includes(close)) {
|
|
45
|
-
open += BASE_PLACEHOLDER_OPEN;
|
|
46
|
-
close += BASE_PLACEHOLDER_CLOSE;
|
|
47
|
-
}
|
|
48
|
-
return { open, close };
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* Walk `source` and replace each raw region with a placeholder token so
|
|
53
|
-
* downstream passes (regex / scan) do not transform their bodies. The
|
|
54
|
-
* original substrings are kept in `placeholders` for {@link restorePlaceholders}
|
|
55
|
-
* to splice back at the end.
|
|
56
|
-
*
|
|
57
|
-
* Raw regions handled:
|
|
58
|
-
* - `[[code ...]]...[[/code]]` — consumes to EOF when the closing tag
|
|
59
|
-
* is missing (mirroring the block parser's behaviour for unclosed
|
|
60
|
-
* code blocks).
|
|
61
|
-
* - `[[html ...]]...[[/html]]` — only masked when the closing tag is
|
|
62
|
-
* present; an unclosed `[[html]]` is left in place so a later directive
|
|
63
|
-
* is not incorrectly hidden behind the mask.
|
|
64
|
-
* - `@<...>@` (single-line balanced raw — `>@` must be on the same line).
|
|
65
|
-
* - `@@...@@` (single-line inline raw — must not span newlines).
|
|
66
|
-
*
|
|
67
|
-
* Genuinely unclosed `@@` / `@<` are left in place (the parser treats
|
|
68
|
-
* them as literal text anyway). Comments `[!-- ... --]` are intentionally
|
|
69
|
-
* not masked: Wikidot's legacy Text_Wiki evaluates `[[iftags]]` before
|
|
70
|
-
* comments, so masking here would invert that order.
|
|
71
|
-
*/
|
|
72
|
-
export function maskRawRegions(
|
|
73
|
-
source: string,
|
|
74
|
-
sentinels: Sentinels,
|
|
75
|
-
): { masked: string; placeholders: string[] } {
|
|
76
|
-
const placeholders: string[] = [];
|
|
77
|
-
let masked = "";
|
|
78
|
-
let i = 0;
|
|
79
|
-
|
|
80
|
-
while (i < source.length) {
|
|
81
|
-
if (source[i] === "[" && source[i + 1] === "[") {
|
|
82
|
-
RAW_BLOCK_OPEN_PATTERN.lastIndex = i;
|
|
83
|
-
const openMatch = RAW_BLOCK_OPEN_PATTERN.exec(source);
|
|
84
|
-
if (openMatch) {
|
|
85
|
-
const name = openMatch[1]!.toLowerCase();
|
|
86
|
-
const openLen = openMatch[0].length;
|
|
87
|
-
const closePattern = new RegExp(`\\[\\[\\/\\s*${name}\\s*\\]\\]`, "ig");
|
|
88
|
-
closePattern.lastIndex = i + openLen;
|
|
89
|
-
const closeMatch = closePattern.exec(source);
|
|
90
|
-
if (closeMatch) {
|
|
91
|
-
const regionEnd = closeMatch.index + closeMatch[0].length;
|
|
92
|
-
masked += pushPlaceholder(placeholders, source.slice(i, regionEnd), sentinels);
|
|
93
|
-
i = regionEnd;
|
|
94
|
-
continue;
|
|
95
|
-
}
|
|
96
|
-
if (name === "code") {
|
|
97
|
-
masked += pushPlaceholder(placeholders, source.slice(i), sentinels);
|
|
98
|
-
i = source.length;
|
|
99
|
-
continue;
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
if (source[i] === "@" && source[i + 1] === "<") {
|
|
105
|
-
const close = source.indexOf(">@", i + 2);
|
|
106
|
-
const newline = source.indexOf("\n", i + 2);
|
|
107
|
-
if (close !== -1 && (newline === -1 || close < newline)) {
|
|
108
|
-
const regionEnd = close + 2;
|
|
109
|
-
masked += pushPlaceholder(placeholders, source.slice(i, regionEnd), sentinels);
|
|
110
|
-
i = regionEnd;
|
|
111
|
-
continue;
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
if (source[i] === "@" && source[i + 1] === "@") {
|
|
116
|
-
const close = source.indexOf("@@", i + 2);
|
|
117
|
-
const newline = source.indexOf("\n", i + 2);
|
|
118
|
-
if (close !== -1 && (newline === -1 || close < newline)) {
|
|
119
|
-
const regionEnd = close + 2;
|
|
120
|
-
masked += pushPlaceholder(placeholders, source.slice(i, regionEnd), sentinels);
|
|
121
|
-
i = regionEnd;
|
|
122
|
-
continue;
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
masked += source[i];
|
|
127
|
-
i++;
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
return { masked, placeholders };
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
function pushPlaceholder(placeholders: string[], text: string, sentinels: Sentinels): string {
|
|
134
|
-
const idx = placeholders.length;
|
|
135
|
-
placeholders.push(text);
|
|
136
|
-
return `${sentinels.open}${idx}${sentinels.close}`;
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
function escapeRegex(str: string): string {
|
|
140
|
-
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
/** Inverse of {@link maskRawRegions}: replace placeholders with originals. */
|
|
144
|
-
export function restorePlaceholders(
|
|
145
|
-
source: string,
|
|
146
|
-
placeholders: string[],
|
|
147
|
-
sentinels: Sentinels,
|
|
148
|
-
): string {
|
|
149
|
-
const pattern = new RegExp(
|
|
150
|
-
`${escapeRegex(sentinels.open)}(\\d+)${escapeRegex(sentinels.close)}`,
|
|
151
|
-
"g",
|
|
152
|
-
);
|
|
153
|
-
return source.replace(pattern, (_, idx: string) => placeholders[Number(idx)] ?? "");
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
/**
|
|
157
|
-
* Compute the unmatched-`[[` depth at each character offset of `source`.
|
|
158
|
-
* Mirrors the lexer's `blockOpenerDepth`. Returns `Int32Array` of length
|
|
159
|
-
* `source.length + 1`; `depths[k]` is the depth immediately before the
|
|
160
|
-
* character at offset `k` is consumed.
|
|
161
|
-
*/
|
|
162
|
-
export function computeBracketDepths(source: string): Int32Array {
|
|
163
|
-
const n = source.length;
|
|
164
|
-
const depths = new Int32Array(n + 1);
|
|
165
|
-
let depth = 0;
|
|
166
|
-
let i = 0;
|
|
167
|
-
while (i < n) {
|
|
168
|
-
depths[i] = depth;
|
|
169
|
-
const c = source.charCodeAt(i);
|
|
170
|
-
const c1 = i + 1 < n ? source.charCodeAt(i + 1) : -1;
|
|
171
|
-
const c2 = i + 2 < n ? source.charCodeAt(i + 2) : -1;
|
|
172
|
-
|
|
173
|
-
if (depth > 0 && c === 0x22 /* " */ && precededByEqualsAttr(source, i)) {
|
|
174
|
-
const end = findQuoteEnd(source, i + 1);
|
|
175
|
-
for (let k = i; k <= end; k++) depths[k] = depth;
|
|
176
|
-
i = end + 1;
|
|
177
|
-
continue;
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
if (c === 0x5b /* [ */ && c1 === 0x5b && c2 === 0x5b) {
|
|
181
|
-
const end = findTripleLinkEnd(source, i + 3);
|
|
182
|
-
for (let k = i; k <= end; k++) depths[k] = depth;
|
|
183
|
-
i = end + 1;
|
|
184
|
-
continue;
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
if (c === 0x5b && c1 === 0x5b) {
|
|
188
|
-
depth++;
|
|
189
|
-
depths[i + 1] = depth;
|
|
190
|
-
i += 2;
|
|
191
|
-
continue;
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
if (c === 0x5d /* ] */ && c1 === 0x5d) {
|
|
195
|
-
depth = Math.max(0, depth - 1);
|
|
196
|
-
depths[i + 1] = depth;
|
|
197
|
-
i += 2;
|
|
198
|
-
continue;
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
if (c === 0x0a /* \n */) {
|
|
202
|
-
// Block openers are single-line; reset depth at line boundaries so
|
|
203
|
-
// an unterminated `[[xxx` does not keep subsequent directives
|
|
204
|
-
// inside its (imaginary) opener context.
|
|
205
|
-
depth = 0;
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
i++;
|
|
209
|
-
}
|
|
210
|
-
depths[n] = depth;
|
|
211
|
-
return depths;
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
function precededByEqualsAttr(s: string, i: number): boolean {
|
|
215
|
-
let j = i - 1;
|
|
216
|
-
while (j >= 0) {
|
|
217
|
-
const ch = s.charCodeAt(j);
|
|
218
|
-
if (ch === 0x20 /* space */ || ch === 0x09 /* tab */) {
|
|
219
|
-
j--;
|
|
220
|
-
continue;
|
|
221
|
-
}
|
|
222
|
-
return ch === 0x3d; /* = */
|
|
223
|
-
}
|
|
224
|
-
return false;
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
function findQuoteEnd(s: string, from: number): number {
|
|
228
|
-
for (let i = from; i < s.length; i++) {
|
|
229
|
-
const ch = s.charCodeAt(i);
|
|
230
|
-
if (ch === 0x22 /* " */ || ch === 0x0a /* \n */) return i;
|
|
231
|
-
}
|
|
232
|
-
return s.length - 1;
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
function findTripleLinkEnd(s: string, from: number): number {
|
|
236
|
-
for (let i = from; i < s.length; i++) {
|
|
237
|
-
if (
|
|
238
|
-
s.charCodeAt(i) === 0x5d &&
|
|
239
|
-
i + 2 < s.length &&
|
|
240
|
-
s.charCodeAt(i + 1) === 0x5d &&
|
|
241
|
-
s.charCodeAt(i + 2) === 0x5d
|
|
242
|
-
) {
|
|
243
|
-
return i + 2;
|
|
244
|
-
}
|
|
245
|
-
if (s.charCodeAt(i) === 0x0a && i + 1 < s.length && s.charCodeAt(i + 1) === 0x0a) {
|
|
246
|
-
return i;
|
|
247
|
-
}
|
|
248
|
-
}
|
|
249
|
-
return s.length - 1;
|
|
250
|
-
}
|
|
@@ -1,111 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
*
|
|
3
|
-
* Whitespace normalization preprocessing for Wikidot markup.
|
|
4
|
-
*
|
|
5
|
-
* This module ensures the lexer and parser receive input with consistent
|
|
6
|
-
* whitespace conventions. It handles platform differences (DOS/Mac newlines),
|
|
7
|
-
* normalizes exotic whitespace characters that users may paste from external
|
|
8
|
-
* sources, and applies Wikidot-specific behaviors like backslash line continuation.
|
|
9
|
-
*
|
|
10
|
-
* Substitutions are applied in a deliberate order:
|
|
11
|
-
* 1. Newline normalization (DOS `\r\n` and legacy Mac `\r` to Unix `\n`)
|
|
12
|
-
* 2. Non-standard leading whitespace replacement (nbsp, figure space to regular space)
|
|
13
|
-
* 3. Whitespace-only line stripping (collapse to empty lines)
|
|
14
|
-
* 4. Backslash line continuation (`\\\n` to line-break marker U+E000)
|
|
15
|
-
* 5. Tab expansion (tab to four spaces)
|
|
16
|
-
* 6. Null character replacement (NUL to space)
|
|
17
|
-
* 7. Leading/trailing newline removal
|
|
18
|
-
*
|
|
19
|
-
* @module
|
|
20
|
-
*/
|
|
21
|
-
|
|
22
|
-
/**
|
|
23
|
-
* Matches non-standard whitespace characters (non-breaking space U+00A0,
|
|
24
|
-
* figure space U+2007) at the start of lines. These are replaced with
|
|
25
|
-
* regular ASCII spaces so the parser's indentation logic works correctly.
|
|
26
|
-
*/
|
|
27
|
-
const LEADING_NONSTANDARD_WHITESPACE = /^[\u00a0\u2007]+/gm;
|
|
28
|
-
|
|
29
|
-
/** Matches lines containing only whitespace (collapsed to empty lines). */
|
|
30
|
-
const WHITESPACE_ONLY_LINE = /^\s+$/gm;
|
|
31
|
-
|
|
32
|
-
/** Matches one or more newlines at the very start of the text. */
|
|
33
|
-
const LEADING_NEWLINES = /^\n+/;
|
|
34
|
-
|
|
35
|
-
/** Matches one or more newlines at the very end of the text. */
|
|
36
|
-
const TRAILING_NEWLINES = /\n+$/;
|
|
37
|
-
|
|
38
|
-
/** Matches DOS (`\r\n`) and legacy Mac (`\r`) line endings. */
|
|
39
|
-
const DOS_MAC_NEWLINES = /\r\n?/g;
|
|
40
|
-
|
|
41
|
-
/**
|
|
42
|
-
* Matches a backslash immediately followed by a newline.
|
|
43
|
-
* In Wikidot, `\` at end of line acts as an explicit line break (`<br />`).
|
|
44
|
-
*/
|
|
45
|
-
const CONCAT_LINES = /\\\n/g;
|
|
46
|
-
|
|
47
|
-
/** Matches tab characters (expanded to four spaces). */
|
|
48
|
-
const TABS = /\t/g;
|
|
49
|
-
|
|
50
|
-
/** Matches null (NUL) characters (replaced with spaces). */
|
|
51
|
-
const NULL_CHARS = /\0/g;
|
|
52
|
-
|
|
53
|
-
/**
|
|
54
|
-
* Replace non-standard whitespace characters at the start of each line
|
|
55
|
-
* with the same number of regular ASCII spaces.
|
|
56
|
-
*
|
|
57
|
-
* This ensures indentation-sensitive constructs (like nested lists) work
|
|
58
|
-
* correctly regardless of whether the user typed regular spaces, non-breaking
|
|
59
|
-
* spaces, or figure spaces.
|
|
60
|
-
*
|
|
61
|
-
* @param text - Input text with potentially non-standard leading whitespace
|
|
62
|
-
* @returns Text with leading non-standard whitespace replaced by ASCII spaces
|
|
63
|
-
*/
|
|
64
|
-
function replaceLeadingSpaces(text: string): string {
|
|
65
|
-
return text.replace(LEADING_NONSTANDARD_WHITESPACE, (match) => {
|
|
66
|
-
return " ".repeat(match.length);
|
|
67
|
-
});
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
/**
|
|
71
|
-
* Apply all whitespace normalization substitutions to the given text.
|
|
72
|
-
*
|
|
73
|
-
* Substitutions are applied in a specific order that avoids interference
|
|
74
|
-
* between steps (e.g., DOS newlines must be normalized before backslash
|
|
75
|
-
* continuation can be detected).
|
|
76
|
-
*
|
|
77
|
-
* The backslash continuation step converts `\\\n` to the Private Use Area
|
|
78
|
-
* character U+E000, which the lexer later recognizes as an explicit line break.
|
|
79
|
-
* This approach avoids ambiguity with other uses of the backslash character.
|
|
80
|
-
*
|
|
81
|
-
* @param text - Raw input text
|
|
82
|
-
* @returns Text with normalized whitespace, ready for typography preprocessing
|
|
83
|
-
*/
|
|
84
|
-
export function substitute(text: string): string {
|
|
85
|
-
let result = text;
|
|
86
|
-
|
|
87
|
-
// Replace DOS and Mac newlines
|
|
88
|
-
result = result.replace(DOS_MAC_NEWLINES, "\n");
|
|
89
|
-
|
|
90
|
-
// Replace leading non-standard spaces with regular spaces
|
|
91
|
-
result = replaceLeadingSpaces(result);
|
|
92
|
-
|
|
93
|
-
// Strip lines with only whitespace
|
|
94
|
-
result = result.replace(WHITESPACE_ONLY_LINE, "");
|
|
95
|
-
|
|
96
|
-
// Backslash at end of line → line break marker (U+E000)
|
|
97
|
-
// Wikidot treats \ at end of line as <br />
|
|
98
|
-
result = result.replace(CONCAT_LINES, String.fromCharCode(0xe000));
|
|
99
|
-
|
|
100
|
-
// Tabs to spaces
|
|
101
|
-
result = result.replace(TABS, " ");
|
|
102
|
-
|
|
103
|
-
// Null characters to spaces
|
|
104
|
-
result = result.replace(NULL_CHARS, " ");
|
|
105
|
-
|
|
106
|
-
// Remove leading and trailing newlines
|
|
107
|
-
result = result.replace(LEADING_NEWLINES, "");
|
|
108
|
-
result = result.replace(TRAILING_NEWLINES, "");
|
|
109
|
-
|
|
110
|
-
return result;
|
|
111
|
-
}
|