@wdprlib/parser 3.2.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +10451 -8402
- package/dist/index.d.cts +313 -337
- package/dist/index.d.ts +313 -337
- package/dist/index.js +10438 -8389
- package/package.json +1 -1
- package/src/index.ts +7 -0
- package/src/lexer/anchor.ts +48 -0
- package/src/lexer/index.ts +3 -2
- package/src/lexer/lexer.ts +73 -559
- package/src/lexer/options.ts +19 -0
- package/src/lexer/punctuation.ts +70 -0
- package/src/lexer/quoted-string.ts +16 -0
- package/src/lexer/runs.ts +85 -0
- package/src/lexer/spacing-actions.ts +24 -0
- package/src/lexer/state.ts +103 -0
- package/src/lexer/syntax-actions.ts +80 -0
- package/src/lexer/text-actions.ts +41 -0
- package/src/lexer/token-actions.ts +136 -0
- package/src/lexer/token-factory.ts +62 -0
- package/src/lexer/tokenize.ts +18 -0
- package/src/parser/constants.ts +2 -0
- package/src/parser/depth/index.ts +111 -0
- package/src/parser/depth/stack.ts +82 -0
- package/src/parser/parse/block.ts +42 -0
- package/src/parser/parse/context.ts +26 -0
- package/src/parser/parse/footnotes.ts +25 -0
- package/src/parser/parse/index.ts +42 -0
- package/src/parser/parse/options.ts +34 -0
- package/src/parser/parse/parser.ts +79 -0
- package/src/parser/parse/plain-non-ascii.ts +129 -0
- package/src/parser/parse/result.ts +57 -0
- package/src/parser/parse/source.ts +11 -0
- package/src/parser/postprocess/divAdjacentParagraph.ts +1 -1
- package/src/parser/postprocess/spanStrip/clean-element.ts +168 -0
- package/src/parser/postprocess/spanStrip/cleanup.ts +25 -0
- package/src/parser/postprocess/spanStrip/empty-spans.ts +36 -0
- package/src/parser/postprocess/spanStrip/escaped.ts +78 -0
- package/src/parser/postprocess/spanStrip/factory.ts +23 -0
- package/src/parser/postprocess/spanStrip/index.ts +8 -0
- package/src/parser/postprocess/spanStrip/merge.ts +117 -0
- package/src/parser/postprocess/spanStrip/predicates.ts +59 -0
- package/src/parser/postprocess/spanStrip/split.ts +67 -0
- package/src/parser/preprocess/expr/chars.ts +15 -0
- package/src/parser/preprocess/expr/evaluate.ts +22 -0
- package/src/parser/preprocess/expr/index.ts +45 -0
- package/src/parser/preprocess/expr/kind.ts +19 -0
- package/src/parser/preprocess/expr/parse.ts +103 -0
- package/src/parser/preprocess/expr/scan.ts +34 -0
- package/src/parser/preprocess/expr/types.ts +14 -0
- package/src/parser/preprocess/typography.ts +70 -5
- package/src/parser/preprocess/utils/bracket-depths.ts +98 -0
- package/src/parser/preprocess/utils/index.ts +13 -0
- package/src/parser/preprocess/utils/raw-regions.ts +153 -0
- package/src/parser/preprocess/whitespace/detection.ts +39 -0
- package/src/parser/preprocess/whitespace/index.ts +79 -0
- package/src/parser/preprocess/whitespace/leading-spaces.ts +11 -0
- package/src/parser/preprocess/whitespace/patterns.ts +23 -0
- package/src/parser/rules/block/align/body.ts +46 -0
- package/src/parser/rules/block/align/element.ts +13 -0
- package/src/parser/rules/block/align/index.ts +90 -0
- package/src/parser/rules/block/align/syntax.ts +113 -0
- package/src/parser/rules/block/bibliography/body.ts +81 -0
- package/src/parser/rules/block/bibliography/entries.ts +49 -0
- package/src/parser/rules/block/bibliography/entry-content.ts +73 -0
- package/src/parser/rules/block/bibliography/entry-key.ts +83 -0
- package/src/parser/rules/block/bibliography/index.ts +90 -0
- package/src/parser/rules/block/bibliography/open.ts +53 -0
- package/src/parser/rules/block/block-list/bare-content.ts +105 -0
- package/src/parser/rules/block/block-list/bare-paragraph.ts +60 -0
- package/src/parser/rules/block/block-list/index.ts +51 -0
- package/src/parser/rules/block/block-list/item-content.ts +132 -0
- package/src/parser/rules/block/block-list/li-content.ts +107 -0
- package/src/parser/rules/block/block-list/li-item.ts +77 -0
- package/src/parser/rules/block/block-list/list-block.ts +100 -0
- package/src/parser/rules/block/block-list/open.ts +51 -0
- package/src/parser/rules/block/block-list/tags.ts +50 -0
- package/src/parser/rules/block/blockquote/build.ts +62 -0
- package/src/parser/rules/block/blockquote/index.ts +80 -0
- package/src/parser/rules/block/blockquote/line.ts +79 -0
- package/src/parser/rules/block/blockquote/lines.ts +39 -0
- package/src/parser/rules/block/{center.ts → center/index.ts} +7 -22
- package/src/parser/rules/block/center/open.ts +27 -0
- package/src/parser/rules/block/{clear-float.ts → clear-float/index.ts} +6 -30
- package/src/parser/rules/block/clear-float/syntax.ts +43 -0
- package/src/parser/rules/block/code/attributes.ts +30 -0
- package/src/parser/rules/block/code/content.ts +57 -0
- package/src/parser/rules/block/code/index.ts +100 -0
- package/src/parser/rules/block/collapsible/attributes.ts +95 -0
- package/src/parser/rules/block/collapsible/body.ts +69 -0
- package/src/parser/rules/block/collapsible/index.ts +117 -0
- package/src/parser/rules/block/collapsible/open.ts +51 -0
- package/src/parser/rules/block/collapsible/orphans.ts +31 -0
- package/src/parser/rules/block/collapsible/tags.ts +17 -0
- package/src/parser/rules/block/comment/consume.ts +37 -0
- package/src/parser/rules/block/{comment.ts → comment/index.ts} +12 -38
- package/src/parser/rules/block/{content-separator.ts → content-separator/index.ts} +5 -35
- package/src/parser/rules/block/content-separator/syntax.ts +33 -0
- package/src/parser/rules/block/definition-list/collect.ts +40 -0
- package/src/parser/rules/block/definition-list/index.ts +63 -0
- package/src/parser/rules/block/definition-list/item-key.ts +95 -0
- package/src/parser/rules/block/definition-list/item-value.ts +56 -0
- package/src/parser/rules/block/definition-list/items.ts +54 -0
- package/src/parser/rules/block/div/body.ts +41 -0
- package/src/parser/rules/block/div/close.ts +41 -0
- package/src/parser/rules/block/div/failed.ts +117 -0
- package/src/parser/rules/block/div/index.ts +112 -0
- package/src/parser/rules/block/div/nesting.ts +37 -0
- package/src/parser/rules/block/div/open.ts +59 -0
- package/src/parser/rules/block/div/paragraph-strip.ts +44 -0
- package/src/parser/rules/block/embed-block/content.ts +53 -0
- package/src/parser/rules/block/embed-block/index.ts +91 -0
- package/src/parser/rules/block/embed-block/open.ts +52 -0
- package/src/parser/rules/block/embed-block/tags.ts +5 -0
- package/src/parser/rules/block/footnoteblock/attributes.ts +73 -0
- package/src/parser/rules/block/footnoteblock/index.ts +82 -0
- package/src/parser/rules/block/footnoteblock/open.ts +53 -0
- package/src/parser/rules/block/heading/index.ts +87 -0
- package/src/parser/rules/block/heading/open.ts +50 -0
- package/src/parser/rules/block/heading/toc-text.ts +26 -0
- package/src/parser/rules/block/{horizontal-rule.ts → horizontal-rule/index.ts} +4 -21
- package/src/parser/rules/block/horizontal-rule/syntax.ts +21 -0
- package/src/parser/rules/block/html/body.ts +114 -0
- package/src/parser/rules/block/html/diagnostics.ts +11 -0
- package/src/parser/rules/block/html/index.ts +95 -0
- package/src/parser/rules/block/html/open.ts +36 -0
- package/src/parser/rules/block/iframe/attributes.ts +106 -0
- package/src/parser/rules/block/iframe/index.ts +73 -0
- package/src/parser/rules/block/iframe/open.ts +58 -0
- package/src/parser/rules/block/iframe/source.ts +24 -0
- package/src/parser/rules/block/iframe/url.ts +38 -0
- package/src/parser/rules/block/iftags/body.ts +48 -0
- package/src/parser/rules/block/iftags/condition.ts +24 -0
- package/src/parser/rules/block/{iftags.ts → iftags/index.ts} +16 -58
- package/src/parser/rules/block/include/arguments.ts +48 -0
- package/src/parser/rules/block/include/index.ts +75 -0
- package/src/parser/rules/block/include/location.ts +24 -0
- package/src/parser/rules/block/include/variables.ts +37 -0
- package/src/parser/rules/block/list/index.ts +73 -0
- package/src/parser/rules/block/list/line.ts +77 -0
- package/src/parser/rules/block/list/native.ts +89 -0
- package/src/parser/rules/block/math/content.ts +54 -0
- package/src/parser/rules/block/math/index.ts +106 -0
- package/src/parser/rules/block/math/name.ts +35 -0
- package/src/parser/rules/block/module/body.ts +92 -0
- package/src/parser/rules/block/module/element.ts +33 -0
- package/src/parser/rules/block/module/include/directive.ts +91 -0
- package/src/parser/rules/block/module/include/index.ts +11 -2
- package/src/parser/rules/block/module/include/references.ts +42 -0
- package/src/parser/rules/block/module/include/resolve/cache.ts +44 -0
- package/src/parser/rules/block/module/include/resolve/index.ts +106 -0
- package/src/parser/rules/block/module/include/resolve/iterate.ts +202 -0
- package/src/parser/rules/block/module/include/resolve/replace.ts +31 -0
- package/src/parser/rules/block/module/include/resolve/types.ts +105 -0
- package/src/parser/rules/block/module/include/scanner.ts +121 -0
- package/src/parser/rules/block/module/index.ts +14 -2
- package/src/parser/rules/block/module/listpages/compiler.ts +12 -392
- package/src/parser/rules/block/module/listpages/extract.ts +25 -359
- package/src/parser/rules/block/module/listpages/extraction/listpages.ts +42 -0
- package/src/parser/rules/block/module/listpages/extraction/listusers.ts +30 -0
- package/src/parser/rules/block/module/listpages/extraction/query.ts +51 -0
- package/src/parser/rules/block/module/listpages/extraction/result.ts +18 -0
- package/src/parser/rules/block/module/listpages/extraction/template.ts +96 -0
- package/src/parser/rules/block/module/listpages/extraction/variables.ts +58 -0
- package/src/parser/rules/block/module/listpages/normalization/date-selector.ts +53 -0
- package/src/parser/rules/block/module/listpages/normalization/numeric-selector.ts +32 -0
- package/src/parser/rules/block/module/listpages/normalization/order-parent.ts +82 -0
- package/src/parser/rules/block/module/listpages/normalization/selectors.ts +2 -0
- package/src/parser/rules/block/module/listpages/normalization/tags-category.ts +86 -0
- package/src/parser/rules/block/module/listpages/normalize.ts +8 -324
- package/src/parser/rules/block/module/listpages/resolution/items.ts +43 -0
- package/src/parser/rules/block/module/listpages/resolution/wrapper.ts +42 -0
- package/src/parser/rules/block/module/listpages/resolve.ts +5 -75
- package/src/parser/rules/block/module/listpages/template/format/content.ts +41 -0
- package/src/parser/rules/block/module/listpages/template/format/date.ts +116 -0
- package/src/parser/rules/block/module/listpages/template/format/index.ts +4 -0
- package/src/parser/rules/block/module/listpages/template/format/tags.ts +7 -0
- package/src/parser/rules/block/module/listpages/template/format/user.ts +9 -0
- package/src/parser/rules/block/module/listpages/template/getters/index.ts +36 -0
- package/src/parser/rules/block/module/listpages/template/getters/parameterized.ts +60 -0
- package/src/parser/rules/block/module/listpages/template/getters/simple.ts +65 -0
- package/src/parser/rules/block/module/listpages/template/getters/types.ts +3 -0
- package/src/parser/rules/block/module/listpages/template/syntax.ts +97 -0
- package/src/parser/rules/block/module/listpages/types/data-fetcher.ts +15 -0
- package/src/parser/rules/block/module/listpages/types/data-requirements.ts +52 -0
- package/src/parser/rules/block/module/listpages/types/external-data.ts +77 -0
- package/src/parser/rules/block/module/listpages/types/index.ts +17 -0
- package/src/parser/rules/block/module/listpages/types/normalized-query.ts +120 -0
- package/src/parser/rules/block/module/listpages/types/query.ts +67 -0
- package/src/parser/rules/block/module/listpages/types/template.ts +17 -0
- package/src/parser/rules/block/module/listpages/types/variables.ts +69 -0
- package/src/parser/rules/block/module/listpages/url-resolution/fields.ts +48 -0
- package/src/parser/rules/block/module/listpages/url-resolution/params.ts +19 -0
- package/src/parser/rules/block/module/listpages/url-resolution/query.ts +24 -0
- package/src/parser/rules/block/module/listpages/url-resolution/resolve.ts +53 -0
- package/src/parser/rules/block/module/listpages/url-resolution/value.ts +25 -0
- package/src/parser/rules/block/module/listpages/url-resolver.ts +3 -160
- package/src/parser/rules/block/module/listusers/compiler.ts +4 -25
- package/src/parser/rules/block/module/listusers/extract.ts +4 -9
- package/src/parser/rules/block/module/listusers/getters.ts +21 -0
- package/src/parser/rules/block/module/listusers/variables.ts +15 -0
- package/src/parser/rules/block/module/open.ts +57 -0
- package/src/parser/rules/block/module/resolution/contexts.ts +78 -0
- package/src/parser/rules/block/module/resolution/data-maps.ts +39 -0
- package/src/parser/rules/block/module/resolution/dynamic-modules.ts +93 -0
- package/src/parser/rules/block/module/resolution/styles.ts +53 -0
- package/src/parser/rules/block/module/resolution/walk-resolve.ts +107 -0
- package/src/parser/rules/block/module/resolve.ts +79 -292
- package/src/parser/rules/block/module/rule.ts +56 -0
- package/src/parser/rules/block/module/types-common.ts +11 -0
- package/src/parser/rules/block/module/walk/children.ts +35 -0
- package/src/parser/rules/block/module/walk/index.ts +9 -0
- package/src/parser/rules/block/module/walk/map/index.ts +2 -0
- package/src/parser/rules/block/module/walk/map/stateful-definition-list.ts +25 -0
- package/src/parser/rules/block/module/walk/map/stateful-list.ts +40 -0
- package/src/parser/rules/block/module/walk/map/stateful-table.ts +23 -0
- package/src/parser/rules/block/module/walk/map/stateful-tabs.ts +19 -0
- package/src/parser/rules/block/module/walk/map/stateful.ts +71 -0
- package/src/parser/rules/block/module/walk/map/stateless-definition-list.ts +12 -0
- package/src/parser/rules/block/module/walk/map/stateless-list.ts +29 -0
- package/src/parser/rules/block/module/walk/map/stateless-table.ts +11 -0
- package/src/parser/rules/block/module/walk/map/stateless-tabs.ts +5 -0
- package/src/parser/rules/block/module/walk/map/stateless.ts +51 -0
- package/src/parser/rules/block/module/walk/map/types.ts +6 -0
- package/src/parser/rules/block/module/walk/traverse.ts +65 -0
- package/src/parser/rules/block/orphan-li/content.ts +60 -0
- package/src/parser/rules/block/orphan-li/index.ts +75 -0
- package/src/parser/rules/block/orphan-li/open.ts +25 -0
- package/src/parser/rules/block/orphan-li/tags.ts +40 -0
- package/src/parser/rules/block/paragraph/content.ts +12 -0
- package/src/parser/rules/block/paragraph/index.ts +60 -0
- package/src/parser/rules/block/paragraph/normalize.ts +52 -0
- package/src/parser/rules/block/paragraph/span-markers.ts +52 -0
- package/src/parser/rules/block/parsing/attributes/index.ts +32 -0
- package/src/parser/rules/block/parsing/attributes/names.ts +93 -0
- package/src/parser/rules/block/parsing/attributes/scanner.ts +75 -0
- package/src/parser/rules/block/parsing/attributes/values.ts +26 -0
- package/src/parser/rules/block/parsing/block-item.ts +29 -0
- package/src/parser/rules/block/parsing/content.ts +127 -0
- package/src/parser/rules/block/parsing/end-condition.ts +51 -0
- package/src/parser/rules/block/parsing/inline-content.ts +105 -0
- package/src/parser/rules/block/parsing/inline-newline.ts +41 -0
- package/src/parser/rules/block/parsing/non-boundary.ts +24 -0
- package/src/parser/rules/block/parsing/rule-dispatch.ts +44 -0
- package/src/parser/rules/block/table/index.ts +80 -0
- package/src/parser/rules/block/table/pipe/cell-start.ts +69 -0
- package/src/parser/rules/block/table/pipe/cell.ts +106 -0
- package/src/parser/rules/block/table/pipe/index.ts +2 -0
- package/src/parser/rules/block/table/pipe/row.ts +88 -0
- package/src/parser/rules/block/table/pipe/tokens.ts +14 -0
- package/src/parser/rules/block/table/pipe/trim.ts +50 -0
- package/src/parser/rules/block/table-block/body.ts +79 -0
- package/src/parser/rules/block/table-block/cell-attributes.ts +33 -0
- package/src/parser/rules/block/table-block/cell-boundary.ts +99 -0
- package/src/parser/rules/block/table-block/cell-content/index.ts +88 -0
- package/src/parser/rules/block/table-block/cell-content/segments.ts +134 -0
- package/src/parser/rules/block/table-block/cell-newline.ts +47 -0
- package/src/parser/rules/block/table-block/cell.ts +64 -0
- package/src/parser/rules/block/table-block/index.ts +113 -0
- package/src/parser/rules/block/table-block/row-boundary.ts +75 -0
- package/src/parser/rules/block/table-block/structure.ts +80 -0
- package/src/parser/rules/block/tabview/body.ts +64 -0
- package/src/parser/rules/block/tabview/index.ts +90 -0
- package/src/parser/rules/block/tabview/open.ts +50 -0
- package/src/parser/rules/block/tabview/tab.ts +92 -0
- package/src/parser/rules/block/tabview/tags.ts +30 -0
- package/src/parser/rules/block/toc/element.ts +11 -0
- package/src/parser/rules/block/toc/index.ts +44 -0
- package/src/parser/rules/block/toc/open.ts +84 -0
- package/src/parser/rules/block/utils.ts +10 -610
- package/src/parser/rules/{utils.ts → common/attribute-safety.ts} +3 -49
- package/src/parser/rules/common/block-name.ts +33 -0
- package/src/parser/rules/common/index.ts +2 -0
- package/src/parser/rules/contracts/index.ts +3 -0
- package/src/parser/rules/contracts/parse-context.ts +38 -0
- package/src/parser/rules/contracts/rule.ts +43 -0
- package/src/parser/rules/contracts/scope.ts +31 -0
- package/src/parser/rules/inline/anchor/attributes.ts +54 -0
- package/src/parser/rules/inline/anchor/child.ts +26 -0
- package/src/parser/rules/inline/anchor/close.ts +34 -0
- package/src/parser/rules/inline/anchor/content.ts +59 -0
- package/src/parser/rules/inline/anchor/index.ts +103 -0
- package/src/parser/rules/inline/anchor/newline.ts +26 -0
- package/src/parser/rules/inline/anchor/open.ts +47 -0
- package/src/parser/rules/inline/anchor/paragraph-strip.ts +14 -0
- package/src/parser/rules/inline/anchor/syntax.ts +40 -0
- package/src/parser/rules/inline/anchor-name/index.ts +38 -0
- package/src/parser/rules/inline/anchor-name/name.ts +39 -0
- package/src/parser/rules/inline/anchor-name/syntax.ts +46 -0
- package/src/parser/rules/inline/bibcite/element.ts +14 -0
- package/src/parser/rules/inline/bibcite/index.ts +34 -0
- package/src/parser/rules/inline/bibcite/syntax.ts +64 -0
- package/src/parser/rules/inline/bold.ts +2 -39
- package/src/parser/rules/inline/color/index.ts +35 -0
- package/src/parser/rules/inline/color/syntax.ts +69 -0
- package/src/parser/rules/inline/comment/consume.ts +31 -0
- package/src/parser/rules/inline/{comment.ts → comment/index.ts} +10 -36
- package/src/parser/rules/inline/equation-ref/element.ts +8 -0
- package/src/parser/rules/inline/equation-ref/index.ts +34 -0
- package/src/parser/rules/inline/equation-ref/syntax.ts +45 -0
- package/src/parser/rules/inline/expr/branch.ts +104 -0
- package/src/parser/rules/inline/expr/conditional-branch.ts +27 -0
- package/src/parser/rules/inline/expr/conditional.ts +80 -0
- package/src/parser/rules/inline/expr/depth.ts +25 -0
- package/src/parser/rules/inline/expr/elements.ts +39 -0
- package/src/parser/rules/inline/expr/index.ts +84 -0
- package/src/parser/rules/inline/expr/syntax.ts +45 -0
- package/src/parser/rules/inline/footnote/child.ts +22 -0
- package/src/parser/rules/inline/footnote/close.ts +33 -0
- package/src/parser/rules/inline/footnote/content.ts +54 -0
- package/src/parser/rules/inline/footnote/elements.ts +38 -0
- package/src/parser/rules/inline/footnote/index.ts +54 -0
- package/src/parser/rules/inline/footnote/newline.ts +27 -0
- package/src/parser/rules/inline/footnote/open.ts +38 -0
- package/src/parser/rules/inline/formatting/container.ts +50 -0
- package/src/parser/rules/inline/{guillemet.ts → guillemet/index.ts} +5 -13
- package/src/parser/rules/inline/guillemet/text.ts +11 -0
- package/src/parser/rules/inline/html/gate.ts +64 -0
- package/src/parser/rules/inline/{html.ts → html/index.ts} +9 -60
- package/src/parser/rules/inline/html/open.ts +37 -0
- package/src/parser/rules/inline/image/attributes.ts +22 -0
- package/src/parser/rules/inline/image/body.ts +36 -0
- package/src/parser/rules/inline/image/index.ts +89 -0
- package/src/parser/rules/inline/image/open.ts +56 -0
- package/src/parser/rules/inline/image/source.ts +62 -0
- package/src/parser/rules/inline/image/syntax.ts +76 -0
- package/src/parser/rules/inline/italic.ts +2 -30
- package/src/parser/rules/inline/line-break/backslash.ts +58 -0
- package/src/parser/rules/inline/line-break/elements.ts +9 -0
- package/src/parser/rules/inline/line-break/index.ts +3 -0
- package/src/parser/rules/inline/line-break/newline.ts +82 -0
- package/src/parser/rules/inline/line-break/underscore.ts +45 -0
- package/src/parser/rules/inline/link-anchor.ts +6 -81
- package/src/parser/rules/inline/link-bracket/anchor.ts +3 -0
- package/src/parser/rules/inline/link-bracket/direct-url.ts +5 -0
- package/src/parser/rules/inline/link-bracket/parsed.ts +81 -0
- package/src/parser/rules/inline/link-bracket/parts.ts +64 -0
- package/src/parser/rules/inline/link-bracket/prefix.ts +15 -0
- package/src/parser/rules/inline/link-single.ts +7 -98
- package/src/parser/rules/inline/link-star.ts +7 -69
- package/src/parser/rules/inline/link-triple/fallback.ts +10 -0
- package/src/parser/rules/inline/link-triple/index.ts +62 -0
- package/src/parser/rules/inline/link-triple/interwiki.ts +11 -0
- package/src/parser/rules/inline/link-triple/label.ts +35 -0
- package/src/parser/rules/inline/link-triple/syntax.ts +72 -0
- package/src/parser/rules/inline/link-triple/target.ts +36 -0
- package/src/parser/rules/inline/math-inline/index.ts +40 -0
- package/src/parser/rules/inline/math-inline/syntax.ts +55 -0
- package/src/parser/rules/inline/monospace.ts +2 -30
- package/src/parser/rules/inline/parsing/block-boundary.ts +42 -0
- package/src/parser/rules/inline/parsing/block-start-predicates.ts +117 -0
- package/src/parser/rules/inline/parsing/collect.ts +23 -0
- package/src/parser/rules/inline/parsing/inline-content.ts +115 -0
- package/src/parser/rules/inline/parsing/paragraph-boundary.ts +47 -0
- package/src/parser/rules/inline/parsing/plain-text.ts +69 -0
- package/src/parser/rules/inline/parsing/preserved-line-break.ts +11 -0
- package/src/parser/rules/inline/parsing/rules.ts +34 -0
- package/src/parser/rules/inline/parsing/simple-token.ts +26 -0
- package/src/parser/rules/inline/raw/angle.ts +40 -0
- package/src/parser/rules/inline/raw/double-at.ts +78 -0
- package/src/parser/rules/inline/raw/index.ts +26 -0
- package/src/parser/rules/inline/raw/result.ts +26 -0
- package/src/parser/rules/inline/size/content.ts +65 -0
- package/src/parser/rules/inline/size/index.ts +55 -0
- package/src/parser/rules/inline/size/open.ts +43 -0
- package/src/parser/rules/inline/size/value.ts +45 -0
- package/src/parser/rules/inline/span/content.ts +97 -0
- package/src/parser/rules/inline/span/elements.ts +108 -0
- package/src/parser/rules/inline/span/index.ts +79 -0
- package/src/parser/rules/inline/span/newline.ts +50 -0
- package/src/parser/rules/inline/span/syntax.ts +70 -0
- package/src/parser/rules/inline/{strikethrough.ts → strikethrough/index.ts} +5 -60
- package/src/parser/rules/inline/strikethrough/parse.ts +14 -0
- package/src/parser/rules/inline/strikethrough/syntax.ts +24 -0
- package/src/parser/rules/inline/subscript.ts +2 -39
- package/src/parser/rules/inline/superscript.ts +4 -39
- package/src/parser/rules/inline/text/element.ts +5 -0
- package/src/parser/rules/inline/{text.ts → text/index.ts} +5 -4
- package/src/parser/rules/inline/underline/child.ts +26 -0
- package/src/parser/rules/inline/underline/content.ts +29 -0
- package/src/parser/rules/inline/{underline.ts → underline/index.ts} +6 -49
- package/src/parser/rules/inline/user/element.ts +11 -0
- package/src/parser/rules/inline/user/index.ts +34 -0
- package/src/parser/rules/inline/user/syntax.ts +67 -0
- package/src/parser/rules/inline/utils.ts +4 -344
- package/src/parser/rules/tokens.ts +106 -0
- package/src/parser/rules/types.ts +9 -252
- package/src/parser/depth.ts +0 -251
- package/src/parser/parse.ts +0 -315
- package/src/parser/postprocess/spanStrip.ts +0 -697
- package/src/parser/preprocess/expr.ts +0 -265
- package/src/parser/preprocess/utils.ts +0 -250
- package/src/parser/preprocess/whitespace.ts +0 -111
- package/src/parser/rules/block/align.ts +0 -282
- package/src/parser/rules/block/bibliography.ts +0 -359
- package/src/parser/rules/block/block-list.ts +0 -689
- package/src/parser/rules/block/blockquote.ts +0 -238
- package/src/parser/rules/block/code.ts +0 -187
- package/src/parser/rules/block/collapsible.ts +0 -337
- package/src/parser/rules/block/definition-list.ts +0 -270
- package/src/parser/rules/block/div.ts +0 -400
- package/src/parser/rules/block/embed-block.ts +0 -153
- package/src/parser/rules/block/footnoteblock.ts +0 -200
- package/src/parser/rules/block/heading.ts +0 -142
- package/src/parser/rules/block/html.ts +0 -222
- package/src/parser/rules/block/iframe.ts +0 -239
- package/src/parser/rules/block/include.ts +0 -179
- package/src/parser/rules/block/list.ts +0 -244
- package/src/parser/rules/block/math.ts +0 -183
- package/src/parser/rules/block/module/include/resolve.ts +0 -556
- package/src/parser/rules/block/module/listpages/types.ts +0 -513
- package/src/parser/rules/block/module/walk.ts +0 -380
- package/src/parser/rules/block/module.ts +0 -164
- package/src/parser/rules/block/orphan-li.ts +0 -177
- package/src/parser/rules/block/paragraph.ts +0 -157
- package/src/parser/rules/block/table-block.ts +0 -726
- package/src/parser/rules/block/table.ts +0 -441
- package/src/parser/rules/block/tabview.ts +0 -331
- package/src/parser/rules/block/toc.ts +0 -129
- package/src/parser/rules/inline/anchor-name.ts +0 -154
- package/src/parser/rules/inline/anchor.ts +0 -327
- package/src/parser/rules/inline/bibcite.ts +0 -153
- package/src/parser/rules/inline/color.ts +0 -140
- package/src/parser/rules/inline/equation-ref.ts +0 -115
- package/src/parser/rules/inline/expr.ts +0 -526
- package/src/parser/rules/inline/footnote.ts +0 -223
- package/src/parser/rules/inline/image.ts +0 -328
- package/src/parser/rules/inline/line-break.ts +0 -326
- package/src/parser/rules/inline/link-triple.ts +0 -267
- package/src/parser/rules/inline/math-inline.ts +0 -126
- package/src/parser/rules/inline/raw.ts +0 -262
- package/src/parser/rules/inline/size.ts +0 -244
- package/src/parser/rules/inline/span.ts +0 -424
- package/src/parser/rules/inline/user.ts +0 -147
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import { isWhitespace } from "./chars";
|
|
2
|
+
import { matchDirectiveKind } from "./kind";
|
|
3
|
+
import type { DirectiveKind, DirectiveMatch } from "./types";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Try to parse a single `[[#kind ...]]` directive starting at `start`.
|
|
7
|
+
* Returns `null` when the directive is malformed (no closing `]]`) or
|
|
8
|
+
* when its body contains another `[[#kind]]` of the same family
|
|
9
|
+
* (so the caller should keep descending). The substrings are returned
|
|
10
|
+
* raw; callers decide how to evaluate them.
|
|
11
|
+
*/
|
|
12
|
+
export function tryParseInnermostDirective(
|
|
13
|
+
source: string,
|
|
14
|
+
start: number,
|
|
15
|
+
kind: DirectiveKind,
|
|
16
|
+
): DirectiveMatch | null {
|
|
17
|
+
const keywordLen = kind === "ifexpr" ? 6 : kind === "expr" ? 4 : 2;
|
|
18
|
+
let pos = start + 3 + keywordLen;
|
|
19
|
+
while (pos < source.length && isWhitespace(source[pos])) pos++;
|
|
20
|
+
|
|
21
|
+
const headStart = pos;
|
|
22
|
+
let blockDepth = 0;
|
|
23
|
+
let linkDepth = 0;
|
|
24
|
+
const pipes: number[] = [];
|
|
25
|
+
let closeStart = -1;
|
|
26
|
+
|
|
27
|
+
while (pos < source.length) {
|
|
28
|
+
if (matchDirectiveKind(source, pos) !== null) {
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
if (source.startsWith("[[[", pos)) {
|
|
32
|
+
linkDepth++;
|
|
33
|
+
pos += 3;
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
if (linkDepth > 0 && source.startsWith("]]]", pos)) {
|
|
37
|
+
linkDepth--;
|
|
38
|
+
pos += 3;
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
if (linkDepth > 0) {
|
|
42
|
+
pos++;
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
if (source.startsWith("[[", pos)) {
|
|
46
|
+
blockDepth++;
|
|
47
|
+
pos += 2;
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
if (source.startsWith("]]", pos)) {
|
|
51
|
+
if (blockDepth === 0) {
|
|
52
|
+
closeStart = pos;
|
|
53
|
+
break;
|
|
54
|
+
}
|
|
55
|
+
blockDepth--;
|
|
56
|
+
pos += 2;
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
if (source[pos] === "|" && blockDepth === 0 && linkDepth === 0) {
|
|
60
|
+
pipes.push(pos);
|
|
61
|
+
}
|
|
62
|
+
pos++;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if (closeStart === -1) return null;
|
|
66
|
+
const hasPipe = pipes.length > 0;
|
|
67
|
+
if (!hasPipe && (kind === "if" || kind === "ifexpr")) return null;
|
|
68
|
+
|
|
69
|
+
return buildDirectiveMatch(source, headStart, closeStart, pipes, hasPipe);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function buildDirectiveMatch(
|
|
73
|
+
source: string,
|
|
74
|
+
headStart: number,
|
|
75
|
+
closeStart: number,
|
|
76
|
+
pipes: number[],
|
|
77
|
+
hasPipe: boolean,
|
|
78
|
+
): DirectiveMatch {
|
|
79
|
+
if (!hasPipe) {
|
|
80
|
+
return {
|
|
81
|
+
end: closeStart + 2,
|
|
82
|
+
head: source.slice(headStart, closeStart).trim(),
|
|
83
|
+
thenText: "",
|
|
84
|
+
elseText: "",
|
|
85
|
+
hasPipe,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const head = source.slice(headStart, pipes[0]!).trim();
|
|
90
|
+
const thenText =
|
|
91
|
+
pipes.length >= 2
|
|
92
|
+
? source.slice(pipes[0]! + 1, pipes[1]!).trim()
|
|
93
|
+
: source.slice(pipes[0]! + 1, closeStart).trim();
|
|
94
|
+
const elseText = pipes.length >= 2 ? source.slice(pipes[1]! + 1, closeStart).trim() : "";
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
end: closeStart + 2,
|
|
98
|
+
head,
|
|
99
|
+
thenText,
|
|
100
|
+
elseText,
|
|
101
|
+
hasPipe,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { computeBracketDepths } from "../utils";
|
|
2
|
+
import { evaluateDirective } from "./evaluate";
|
|
3
|
+
import { matchDirectiveKind } from "./kind";
|
|
4
|
+
import { tryParseInnermostDirective } from "./parse";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Walk `source`, locate every innermost `[[#if]]` / `[[#ifexpr]]` /
|
|
8
|
+
* `[[#expr]]` directive that sits inside an unclosed `[[`, and replace
|
|
9
|
+
* it with its evaluated string. Returns the source unchanged when no
|
|
10
|
+
* replacements were made.
|
|
11
|
+
*/
|
|
12
|
+
export function expandInnermost(source: string): string {
|
|
13
|
+
const depths = computeBracketDepths(source);
|
|
14
|
+
let result = "";
|
|
15
|
+
let i = 0;
|
|
16
|
+
let replaced = false;
|
|
17
|
+
|
|
18
|
+
while (i < source.length) {
|
|
19
|
+
const kind = matchDirectiveKind(source, i);
|
|
20
|
+
if (kind !== null && depths[i]! > 0) {
|
|
21
|
+
const match = tryParseInnermostDirective(source, i, kind);
|
|
22
|
+
if (match !== null) {
|
|
23
|
+
result += evaluateDirective(kind, match);
|
|
24
|
+
i = match.end;
|
|
25
|
+
replaced = true;
|
|
26
|
+
continue;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
result += source[i];
|
|
30
|
+
i++;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return replaced ? result : source;
|
|
34
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export type DirectiveKind = "if" | "ifexpr" | "expr";
|
|
2
|
+
|
|
3
|
+
export interface DirectiveMatch {
|
|
4
|
+
/** Position just past the closing `]]`. */
|
|
5
|
+
end: number;
|
|
6
|
+
/** Raw condition / expression. */
|
|
7
|
+
head: string;
|
|
8
|
+
/** Raw `then` branch. */
|
|
9
|
+
thenText: string;
|
|
10
|
+
/** Raw `else` branch. */
|
|
11
|
+
elseText: string;
|
|
12
|
+
/** Whether the directive supplied a `|` at all. */
|
|
13
|
+
hasPipe: boolean;
|
|
14
|
+
}
|
|
@@ -32,6 +32,61 @@ const LOW_DOUBLE_QUOTE = "\u201e"; // „
|
|
|
32
32
|
/** Unicode horizontal ellipsis (U+2026) */
|
|
33
33
|
const ELLIPSIS = "\u2026"; // …
|
|
34
34
|
|
|
35
|
+
function replaceExactEllipsisPattern(text: string, pattern: string): string {
|
|
36
|
+
let searchFrom = 0;
|
|
37
|
+
let result = "";
|
|
38
|
+
let lastCopied = 0;
|
|
39
|
+
const patternLength = pattern.length;
|
|
40
|
+
|
|
41
|
+
while (searchFrom < text.length) {
|
|
42
|
+
const index = text.indexOf(pattern, searchFrom);
|
|
43
|
+
if (index === -1) break;
|
|
44
|
+
|
|
45
|
+
const prev = index > 0 ? text[index - 1] : "";
|
|
46
|
+
const next = index + patternLength < text.length ? text[index + patternLength] : "";
|
|
47
|
+
if (prev !== "." && next !== ".") {
|
|
48
|
+
result += text.slice(lastCopied, index) + ELLIPSIS;
|
|
49
|
+
lastCopied = index + patternLength;
|
|
50
|
+
searchFrom = lastCopied;
|
|
51
|
+
} else {
|
|
52
|
+
searchFrom = index + 1;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return lastCopied === 0 ? text : result + text.slice(lastCopied);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function replaceDelimitedTypography(
|
|
60
|
+
text: string,
|
|
61
|
+
opener: string,
|
|
62
|
+
closer: string,
|
|
63
|
+
leftQuote: string,
|
|
64
|
+
rightQuote: string,
|
|
65
|
+
): string {
|
|
66
|
+
let searchFrom = 0;
|
|
67
|
+
let result = "";
|
|
68
|
+
let lastCopied = 0;
|
|
69
|
+
|
|
70
|
+
while (searchFrom < text.length) {
|
|
71
|
+
const openIndex = text.indexOf(opener, searchFrom);
|
|
72
|
+
if (openIndex === -1) break;
|
|
73
|
+
|
|
74
|
+
const contentStart = openIndex + opener.length;
|
|
75
|
+
const closeIndex = text.indexOf(closer, contentStart);
|
|
76
|
+
if (closeIndex === -1) break;
|
|
77
|
+
|
|
78
|
+
result += text.slice(lastCopied, openIndex);
|
|
79
|
+
result += leftQuote;
|
|
80
|
+
result += text.slice(contentStart, closeIndex);
|
|
81
|
+
result += rightQuote;
|
|
82
|
+
|
|
83
|
+
lastCopied = closeIndex + closer.length;
|
|
84
|
+
searchFrom = lastCopied;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return lastCopied === 0 ? text : result + text.slice(lastCopied);
|
|
88
|
+
}
|
|
89
|
+
|
|
35
90
|
/**
|
|
36
91
|
* Apply all typographic substitutions to the given text.
|
|
37
92
|
*
|
|
@@ -47,21 +102,31 @@ export function substitute(text: string): string {
|
|
|
47
102
|
let result = text;
|
|
48
103
|
|
|
49
104
|
// Double quotes: ``...'' -> "..."
|
|
50
|
-
result
|
|
105
|
+
if (result.includes("``") && result.includes("''")) {
|
|
106
|
+
result = replaceDelimitedTypography(result, "``", "''", LEFT_DOUBLE_QUOTE, RIGHT_DOUBLE_QUOTE);
|
|
107
|
+
}
|
|
51
108
|
|
|
52
109
|
// Low double quotes: ,,..'' -> „..."
|
|
53
|
-
result
|
|
110
|
+
if (result.includes(",,") && result.includes("''")) {
|
|
111
|
+
result = replaceDelimitedTypography(result, ",,", "''", LOW_DOUBLE_QUOTE, RIGHT_DOUBLE_QUOTE);
|
|
112
|
+
}
|
|
54
113
|
|
|
55
114
|
// Single quotes: `...' -> '...'
|
|
56
|
-
result
|
|
115
|
+
if (result.includes("`") && result.includes("'")) {
|
|
116
|
+
result = replaceDelimitedTypography(result, "`", "'", LEFT_SINGLE_QUOTE, RIGHT_SINGLE_QUOTE);
|
|
117
|
+
}
|
|
57
118
|
|
|
58
119
|
// Ellipsis: ... or . . . -> …
|
|
59
120
|
// Must be exactly 3 dots, not preceded or followed by more dots
|
|
60
121
|
// Handle continuous dots: ...
|
|
61
|
-
|
|
122
|
+
if (result.includes("...")) {
|
|
123
|
+
result = replaceExactEllipsisPattern(result, "...");
|
|
124
|
+
}
|
|
62
125
|
|
|
63
126
|
// Handle spaced dots: . . .
|
|
64
|
-
|
|
127
|
+
if (result.includes(". . .")) {
|
|
128
|
+
result = replaceExactEllipsisPattern(result, ". . .");
|
|
129
|
+
}
|
|
65
130
|
|
|
66
131
|
return result;
|
|
67
132
|
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Compute the unmatched-`[[` depth at each character offset of `source`.
|
|
3
|
+
* Mirrors the lexer's `blockOpenerDepth`. Returns `Int32Array` of length
|
|
4
|
+
* `source.length + 1`; `depths[k]` is the depth immediately before the
|
|
5
|
+
* character at offset `k` is consumed.
|
|
6
|
+
*/
|
|
7
|
+
export function computeBracketDepths(source: string): Int32Array {
|
|
8
|
+
const n = source.length;
|
|
9
|
+
const depths = new Int32Array(n + 1);
|
|
10
|
+
let depth = 0;
|
|
11
|
+
let i = 0;
|
|
12
|
+
|
|
13
|
+
while (i < n) {
|
|
14
|
+
depths[i] = depth;
|
|
15
|
+
const c = source.charCodeAt(i);
|
|
16
|
+
const c1 = i + 1 < n ? source.charCodeAt(i + 1) : -1;
|
|
17
|
+
const c2 = i + 2 < n ? source.charCodeAt(i + 2) : -1;
|
|
18
|
+
|
|
19
|
+
if (depth > 0 && c === 0x22 /* " */ && precededByEqualsAttr(source, i)) {
|
|
20
|
+
const end = findQuoteEnd(source, i + 1);
|
|
21
|
+
for (let k = i; k <= end; k++) depths[k] = depth;
|
|
22
|
+
i = end + 1;
|
|
23
|
+
continue;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if (c === 0x5b /* [ */ && c1 === 0x5b && c2 === 0x5b) {
|
|
27
|
+
const end = findTripleLinkEnd(source, i + 3);
|
|
28
|
+
for (let k = i; k <= end; k++) depths[k] = depth;
|
|
29
|
+
i = end + 1;
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
if (c === 0x5b && c1 === 0x5b) {
|
|
34
|
+
depth++;
|
|
35
|
+
depths[i + 1] = depth;
|
|
36
|
+
i += 2;
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (c === 0x5d /* ] */ && c1 === 0x5d) {
|
|
41
|
+
depth = Math.max(0, depth - 1);
|
|
42
|
+
depths[i + 1] = depth;
|
|
43
|
+
i += 2;
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (c === 0x0a /* \n */) {
|
|
48
|
+
depth = 0;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
i++;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
depths[n] = depth;
|
|
55
|
+
return depths;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function precededByEqualsAttr(source: string, pos: number): boolean {
|
|
59
|
+
let j = pos - 1;
|
|
60
|
+
while (j >= 0) {
|
|
61
|
+
const ch = source.charCodeAt(j);
|
|
62
|
+
if (ch === 0x20 /* space */ || ch === 0x09 /* tab */) {
|
|
63
|
+
j--;
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
return ch === 0x3d; /* = */
|
|
67
|
+
}
|
|
68
|
+
return false;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function findQuoteEnd(source: string, from: number): number {
|
|
72
|
+
for (let i = from; i < source.length; i++) {
|
|
73
|
+
const ch = source.charCodeAt(i);
|
|
74
|
+
if (ch === 0x22 /* " */ || ch === 0x0a /* \n */) return i;
|
|
75
|
+
}
|
|
76
|
+
return source.length - 1;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function findTripleLinkEnd(source: string, from: number): number {
|
|
80
|
+
for (let i = from; i < source.length; i++) {
|
|
81
|
+
if (
|
|
82
|
+
source.charCodeAt(i) === 0x5d &&
|
|
83
|
+
i + 2 < source.length &&
|
|
84
|
+
source.charCodeAt(i + 1) === 0x5d &&
|
|
85
|
+
source.charCodeAt(i + 2) === 0x5d
|
|
86
|
+
) {
|
|
87
|
+
return i + 2;
|
|
88
|
+
}
|
|
89
|
+
if (
|
|
90
|
+
source.charCodeAt(i) === 0x0a &&
|
|
91
|
+
i + 1 < source.length &&
|
|
92
|
+
source.charCodeAt(i + 1) === 0x0a
|
|
93
|
+
) {
|
|
94
|
+
return i;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return source.length - 1;
|
|
98
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Shared helper facade for source-level preprocess passes.
|
|
4
|
+
*
|
|
5
|
+
* Raw-region masking and bracket-depth tracking are implemented in separate
|
|
6
|
+
* files so preprocess passes can depend on a small stable import surface.
|
|
7
|
+
*
|
|
8
|
+
* @module
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
export type { Sentinels } from "./raw-regions";
|
|
12
|
+
export { makeUniqueSentinels, maskRawRegions, restorePlaceholders } from "./raw-regions";
|
|
13
|
+
export { computeBracketDepths } from "./bracket-depths";
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
const BASE_PLACEHOLDER_OPEN = "\uE000";
|
|
2
|
+
const BASE_PLACEHOLDER_CLOSE = "\uE001";
|
|
3
|
+
|
|
4
|
+
const RAW_BLOCK_OPEN_PATTERN = /\[\[\s*(code|html)\b[^\]]*\]\]/iy;
|
|
5
|
+
|
|
6
|
+
/** Unique sentinel characters used to wrap raw-region placeholders. */
|
|
7
|
+
export interface Sentinels {
|
|
8
|
+
open: string;
|
|
9
|
+
close: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Choose sentinel strings that are guaranteed not to appear in `source`.
|
|
14
|
+
* The placeholders we splice into the masked source have the form
|
|
15
|
+
* `<open><digits><close>`, so the restore pass must not confuse them
|
|
16
|
+
* with content. Extends both sentinel characters until neither appears.
|
|
17
|
+
*/
|
|
18
|
+
export function makeUniqueSentinels(source: string): Sentinels {
|
|
19
|
+
let open = BASE_PLACEHOLDER_OPEN;
|
|
20
|
+
let close = BASE_PLACEHOLDER_CLOSE;
|
|
21
|
+
while (source.includes(open) || source.includes(close)) {
|
|
22
|
+
open += BASE_PLACEHOLDER_OPEN;
|
|
23
|
+
close += BASE_PLACEHOLDER_CLOSE;
|
|
24
|
+
}
|
|
25
|
+
return { open, close };
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Walk `source` and replace each raw region with a placeholder token so
|
|
30
|
+
* downstream passes do not transform their bodies.
|
|
31
|
+
*/
|
|
32
|
+
export function maskRawRegions(
|
|
33
|
+
source: string,
|
|
34
|
+
sentinels: Sentinels,
|
|
35
|
+
): { masked: string; placeholders: string[] } {
|
|
36
|
+
const placeholders: string[] = [];
|
|
37
|
+
let masked = "";
|
|
38
|
+
let i = 0;
|
|
39
|
+
|
|
40
|
+
while (i < source.length) {
|
|
41
|
+
const rawBlock = tryMaskRawBlock(source, i, placeholders, sentinels);
|
|
42
|
+
if (rawBlock) {
|
|
43
|
+
masked += rawBlock.placeholder;
|
|
44
|
+
i = rawBlock.end;
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const rawInline = tryMaskRawInline(source, i, placeholders, sentinels);
|
|
49
|
+
if (rawInline) {
|
|
50
|
+
masked += rawInline.placeholder;
|
|
51
|
+
i = rawInline.end;
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
masked += source[i];
|
|
56
|
+
i++;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return { masked, placeholders };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/** Inverse of {@link maskRawRegions}: replace placeholders with originals. */
|
|
63
|
+
export function restorePlaceholders(
|
|
64
|
+
source: string,
|
|
65
|
+
placeholders: string[],
|
|
66
|
+
sentinels: Sentinels,
|
|
67
|
+
): string {
|
|
68
|
+
const pattern = new RegExp(
|
|
69
|
+
`${escapeRegex(sentinels.open)}(\\d+)${escapeRegex(sentinels.close)}`,
|
|
70
|
+
"g",
|
|
71
|
+
);
|
|
72
|
+
return source.replace(pattern, (_, idx: string) => placeholders[Number(idx)] ?? "");
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function tryMaskRawBlock(
|
|
76
|
+
source: string,
|
|
77
|
+
pos: number,
|
|
78
|
+
placeholders: string[],
|
|
79
|
+
sentinels: Sentinels,
|
|
80
|
+
): { placeholder: string; end: number } | null {
|
|
81
|
+
if (source[pos] !== "[" || source[pos + 1] !== "[") return null;
|
|
82
|
+
|
|
83
|
+
RAW_BLOCK_OPEN_PATTERN.lastIndex = pos;
|
|
84
|
+
const openMatch = RAW_BLOCK_OPEN_PATTERN.exec(source);
|
|
85
|
+
if (!openMatch) return null;
|
|
86
|
+
|
|
87
|
+
const name = openMatch[1]!.toLowerCase();
|
|
88
|
+
const openLen = openMatch[0].length;
|
|
89
|
+
const closePattern = new RegExp(`\\[\\[\\/\\s*${name}\\s*\\]\\]`, "ig");
|
|
90
|
+
closePattern.lastIndex = pos + openLen;
|
|
91
|
+
const closeMatch = closePattern.exec(source);
|
|
92
|
+
|
|
93
|
+
if (closeMatch) {
|
|
94
|
+
const end = closeMatch.index + closeMatch[0].length;
|
|
95
|
+
return {
|
|
96
|
+
placeholder: pushPlaceholder(placeholders, source.slice(pos, end), sentinels),
|
|
97
|
+
end,
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (name !== "code") return null;
|
|
102
|
+
|
|
103
|
+
return {
|
|
104
|
+
placeholder: pushPlaceholder(placeholders, source.slice(pos), sentinels),
|
|
105
|
+
end: source.length,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function tryMaskRawInline(
|
|
110
|
+
source: string,
|
|
111
|
+
pos: number,
|
|
112
|
+
placeholders: string[],
|
|
113
|
+
sentinels: Sentinels,
|
|
114
|
+
): { placeholder: string; end: number } | null {
|
|
115
|
+
if (source[pos] === "@" && source[pos + 1] === "<") {
|
|
116
|
+
return tryMaskSingleLineRaw(source, pos, 2, ">@", placeholders, sentinels);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (source[pos] === "@" && source[pos + 1] === "@") {
|
|
120
|
+
return tryMaskSingleLineRaw(source, pos, 2, "@@", placeholders, sentinels);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return null;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function tryMaskSingleLineRaw(
|
|
127
|
+
source: string,
|
|
128
|
+
pos: number,
|
|
129
|
+
openerLength: number,
|
|
130
|
+
close: string,
|
|
131
|
+
placeholders: string[],
|
|
132
|
+
sentinels: Sentinels,
|
|
133
|
+
): { placeholder: string; end: number } | null {
|
|
134
|
+
const closePos = source.indexOf(close, pos + openerLength);
|
|
135
|
+
const newline = source.indexOf("\n", pos + openerLength);
|
|
136
|
+
if (closePos === -1 || (newline !== -1 && newline < closePos)) return null;
|
|
137
|
+
|
|
138
|
+
const end = closePos + close.length;
|
|
139
|
+
return {
|
|
140
|
+
placeholder: pushPlaceholder(placeholders, source.slice(pos, end), sentinels),
|
|
141
|
+
end,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function pushPlaceholder(placeholders: string[], text: string, sentinels: Sentinels): string {
|
|
146
|
+
const idx = placeholders.length;
|
|
147
|
+
placeholders.push(text);
|
|
148
|
+
return `${sentinels.open}${idx}${sentinels.close}`;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function escapeRegex(str: string): string {
|
|
152
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
153
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
export function needsWhitespaceSubstitution(text: string): boolean {
|
|
2
|
+
if (text.length === 0) return false;
|
|
3
|
+
|
|
4
|
+
if (text[0] === "\n" || text[0] === " " || text[text.length - 1] === "\n") {
|
|
5
|
+
return true;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
return (
|
|
9
|
+
text.indexOf("\r") !== -1 ||
|
|
10
|
+
text.indexOf("\t") !== -1 ||
|
|
11
|
+
text.indexOf("\0") !== -1 ||
|
|
12
|
+
text.indexOf("\u00a0") !== -1 ||
|
|
13
|
+
text.indexOf("\u2007") !== -1 ||
|
|
14
|
+
text.indexOf("\\\n") !== -1 ||
|
|
15
|
+
text.indexOf("\n\n\n") !== -1 ||
|
|
16
|
+
text.indexOf("\n ") !== -1
|
|
17
|
+
);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function mayContainWhitespaceOnlyLine(text: string): boolean {
|
|
21
|
+
const first = text[0];
|
|
22
|
+
if (
|
|
23
|
+
first === " " ||
|
|
24
|
+
first === "\t" ||
|
|
25
|
+
first === "\n" ||
|
|
26
|
+
first === "\u00a0" ||
|
|
27
|
+
first === "\u2007"
|
|
28
|
+
) {
|
|
29
|
+
return true;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return (
|
|
33
|
+
text.indexOf("\n ") !== -1 ||
|
|
34
|
+
text.indexOf("\n\n") !== -1 ||
|
|
35
|
+
text.indexOf("\n\t") !== -1 ||
|
|
36
|
+
text.indexOf("\n\u00a0") !== -1 ||
|
|
37
|
+
text.indexOf("\n\u2007") !== -1
|
|
38
|
+
);
|
|
39
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Whitespace normalization preprocessing for Wikidot markup.
|
|
4
|
+
*
|
|
5
|
+
* This module ensures the lexer and parser receive input with consistent
|
|
6
|
+
* whitespace conventions. It handles platform differences (DOS/Mac newlines),
|
|
7
|
+
* normalizes exotic whitespace characters that users may paste from external
|
|
8
|
+
* sources, and applies Wikidot-specific behaviors like backslash line continuation.
|
|
9
|
+
*
|
|
10
|
+
* @module
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { needsWhitespaceSubstitution, mayContainWhitespaceOnlyLine } from "./detection";
|
|
14
|
+
import { replaceLeadingSpaces } from "./leading-spaces";
|
|
15
|
+
import { CONCAT_LINES, DOS_MAC_NEWLINES, NULL_CHARS, TABS, WHITESPACE_ONLY_LINE } from "./patterns";
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Apply all whitespace normalization substitutions to the given text.
|
|
19
|
+
*
|
|
20
|
+
* Substitutions are applied in a specific order that avoids interference
|
|
21
|
+
* between steps (e.g., DOS newlines must be normalized before backslash
|
|
22
|
+
* continuation can be detected).
|
|
23
|
+
*/
|
|
24
|
+
export function substitute(text: string): string {
|
|
25
|
+
if (!needsWhitespaceSubstitution(text)) {
|
|
26
|
+
return text;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
let result = text;
|
|
30
|
+
|
|
31
|
+
if (result.indexOf("\r") !== -1) {
|
|
32
|
+
result = result.replace(DOS_MAC_NEWLINES, "\n");
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (result.indexOf("\u00a0") !== -1 || result.indexOf("\u2007") !== -1) {
|
|
36
|
+
result = replaceLeadingSpaces(result);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if (mayContainWhitespaceOnlyLine(result)) {
|
|
40
|
+
result = result.replace(WHITESPACE_ONLY_LINE, "");
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (result.indexOf("\\\n") !== -1) {
|
|
44
|
+
result = result.replace(CONCAT_LINES, String.fromCharCode(0xe000));
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (result.indexOf("\t") !== -1) {
|
|
48
|
+
result = result.replace(TABS, " ");
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (result.indexOf("\0") !== -1) {
|
|
52
|
+
result = result.replace(NULL_CHARS, " ");
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (result[0] === "\n") {
|
|
56
|
+
result = trimLeadingNewlines(result);
|
|
57
|
+
}
|
|
58
|
+
if (result[result.length - 1] === "\n") {
|
|
59
|
+
result = trimTrailingNewlines(result);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return result;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function trimLeadingNewlines(text: string): string {
|
|
66
|
+
let index = 0;
|
|
67
|
+
while (text[index] === "\n") {
|
|
68
|
+
index++;
|
|
69
|
+
}
|
|
70
|
+
return index === 0 ? text : text.slice(index);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function trimTrailingNewlines(text: string): string {
|
|
74
|
+
let end = text.length;
|
|
75
|
+
while (end > 0 && text[end - 1] === "\n") {
|
|
76
|
+
end--;
|
|
77
|
+
}
|
|
78
|
+
return end === text.length ? text : text.slice(0, end);
|
|
79
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { LEADING_NONSTANDARD_WHITESPACE } from "./patterns";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Replace non-standard whitespace characters at the start of each line
|
|
5
|
+
* with the same number of regular ASCII spaces.
|
|
6
|
+
*/
|
|
7
|
+
export function replaceLeadingSpaces(text: string): string {
|
|
8
|
+
return text.replace(LEADING_NONSTANDARD_WHITESPACE, (match) => {
|
|
9
|
+
return " ".repeat(match.length);
|
|
10
|
+
});
|
|
11
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Matches non-standard whitespace characters (non-breaking space U+00A0,
|
|
3
|
+
* figure space U+2007) at the start of lines.
|
|
4
|
+
*/
|
|
5
|
+
export const LEADING_NONSTANDARD_WHITESPACE: RegExp = /^[\u00a0\u2007]+/gm;
|
|
6
|
+
|
|
7
|
+
/** Matches lines containing only whitespace (collapsed to empty lines). */
|
|
8
|
+
export const WHITESPACE_ONLY_LINE: RegExp = /^\s+$/gm;
|
|
9
|
+
|
|
10
|
+
/** Matches DOS (`\r\n`) and legacy Mac (`\r`) line endings. */
|
|
11
|
+
export const DOS_MAC_NEWLINES: RegExp = /\r\n?/g;
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Matches a backslash immediately followed by a newline.
|
|
15
|
+
* In Wikidot, `\` at end of line acts as an explicit line break (`<br />`).
|
|
16
|
+
*/
|
|
17
|
+
export const CONCAT_LINES: RegExp = /\\\n/g;
|
|
18
|
+
|
|
19
|
+
/** Matches tab characters (expanded to four spaces). */
|
|
20
|
+
export const TABS: RegExp = /\t/g;
|
|
21
|
+
|
|
22
|
+
/** Matches null (NUL) characters (replaced with spaces). */
|
|
23
|
+
export const NULL_CHARS: RegExp = new RegExp(String.fromCharCode(0), "g");
|