@wdprlib/parser 3.2.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +10451 -8402
- package/dist/index.d.cts +313 -337
- package/dist/index.d.ts +313 -337
- package/dist/index.js +10438 -8389
- package/package.json +1 -1
- package/src/index.ts +7 -0
- package/src/lexer/anchor.ts +48 -0
- package/src/lexer/index.ts +3 -2
- package/src/lexer/lexer.ts +73 -559
- package/src/lexer/options.ts +19 -0
- package/src/lexer/punctuation.ts +70 -0
- package/src/lexer/quoted-string.ts +16 -0
- package/src/lexer/runs.ts +85 -0
- package/src/lexer/spacing-actions.ts +24 -0
- package/src/lexer/state.ts +103 -0
- package/src/lexer/syntax-actions.ts +80 -0
- package/src/lexer/text-actions.ts +41 -0
- package/src/lexer/token-actions.ts +136 -0
- package/src/lexer/token-factory.ts +62 -0
- package/src/lexer/tokenize.ts +18 -0
- package/src/parser/constants.ts +2 -0
- package/src/parser/depth/index.ts +111 -0
- package/src/parser/depth/stack.ts +82 -0
- package/src/parser/parse/block.ts +42 -0
- package/src/parser/parse/context.ts +26 -0
- package/src/parser/parse/footnotes.ts +25 -0
- package/src/parser/parse/index.ts +42 -0
- package/src/parser/parse/options.ts +34 -0
- package/src/parser/parse/parser.ts +79 -0
- package/src/parser/parse/plain-non-ascii.ts +129 -0
- package/src/parser/parse/result.ts +57 -0
- package/src/parser/parse/source.ts +11 -0
- package/src/parser/postprocess/divAdjacentParagraph.ts +1 -1
- package/src/parser/postprocess/spanStrip/clean-element.ts +168 -0
- package/src/parser/postprocess/spanStrip/cleanup.ts +25 -0
- package/src/parser/postprocess/spanStrip/empty-spans.ts +36 -0
- package/src/parser/postprocess/spanStrip/escaped.ts +78 -0
- package/src/parser/postprocess/spanStrip/factory.ts +23 -0
- package/src/parser/postprocess/spanStrip/index.ts +8 -0
- package/src/parser/postprocess/spanStrip/merge.ts +117 -0
- package/src/parser/postprocess/spanStrip/predicates.ts +59 -0
- package/src/parser/postprocess/spanStrip/split.ts +67 -0
- package/src/parser/preprocess/expr/chars.ts +15 -0
- package/src/parser/preprocess/expr/evaluate.ts +22 -0
- package/src/parser/preprocess/expr/index.ts +45 -0
- package/src/parser/preprocess/expr/kind.ts +19 -0
- package/src/parser/preprocess/expr/parse.ts +103 -0
- package/src/parser/preprocess/expr/scan.ts +34 -0
- package/src/parser/preprocess/expr/types.ts +14 -0
- package/src/parser/preprocess/typography.ts +70 -5
- package/src/parser/preprocess/utils/bracket-depths.ts +98 -0
- package/src/parser/preprocess/utils/index.ts +13 -0
- package/src/parser/preprocess/utils/raw-regions.ts +153 -0
- package/src/parser/preprocess/whitespace/detection.ts +39 -0
- package/src/parser/preprocess/whitespace/index.ts +79 -0
- package/src/parser/preprocess/whitespace/leading-spaces.ts +11 -0
- package/src/parser/preprocess/whitespace/patterns.ts +23 -0
- package/src/parser/rules/block/align/body.ts +46 -0
- package/src/parser/rules/block/align/element.ts +13 -0
- package/src/parser/rules/block/align/index.ts +90 -0
- package/src/parser/rules/block/align/syntax.ts +113 -0
- package/src/parser/rules/block/bibliography/body.ts +81 -0
- package/src/parser/rules/block/bibliography/entries.ts +49 -0
- package/src/parser/rules/block/bibliography/entry-content.ts +73 -0
- package/src/parser/rules/block/bibliography/entry-key.ts +83 -0
- package/src/parser/rules/block/bibliography/index.ts +90 -0
- package/src/parser/rules/block/bibliography/open.ts +53 -0
- package/src/parser/rules/block/block-list/bare-content.ts +105 -0
- package/src/parser/rules/block/block-list/bare-paragraph.ts +60 -0
- package/src/parser/rules/block/block-list/index.ts +51 -0
- package/src/parser/rules/block/block-list/item-content.ts +132 -0
- package/src/parser/rules/block/block-list/li-content.ts +107 -0
- package/src/parser/rules/block/block-list/li-item.ts +77 -0
- package/src/parser/rules/block/block-list/list-block.ts +100 -0
- package/src/parser/rules/block/block-list/open.ts +51 -0
- package/src/parser/rules/block/block-list/tags.ts +50 -0
- package/src/parser/rules/block/blockquote/build.ts +62 -0
- package/src/parser/rules/block/blockquote/index.ts +80 -0
- package/src/parser/rules/block/blockquote/line.ts +79 -0
- package/src/parser/rules/block/blockquote/lines.ts +39 -0
- package/src/parser/rules/block/{center.ts → center/index.ts} +7 -22
- package/src/parser/rules/block/center/open.ts +27 -0
- package/src/parser/rules/block/{clear-float.ts → clear-float/index.ts} +6 -30
- package/src/parser/rules/block/clear-float/syntax.ts +43 -0
- package/src/parser/rules/block/code/attributes.ts +30 -0
- package/src/parser/rules/block/code/content.ts +57 -0
- package/src/parser/rules/block/code/index.ts +100 -0
- package/src/parser/rules/block/collapsible/attributes.ts +95 -0
- package/src/parser/rules/block/collapsible/body.ts +69 -0
- package/src/parser/rules/block/collapsible/index.ts +117 -0
- package/src/parser/rules/block/collapsible/open.ts +51 -0
- package/src/parser/rules/block/collapsible/orphans.ts +31 -0
- package/src/parser/rules/block/collapsible/tags.ts +17 -0
- package/src/parser/rules/block/comment/consume.ts +37 -0
- package/src/parser/rules/block/{comment.ts → comment/index.ts} +12 -38
- package/src/parser/rules/block/{content-separator.ts → content-separator/index.ts} +5 -35
- package/src/parser/rules/block/content-separator/syntax.ts +33 -0
- package/src/parser/rules/block/definition-list/collect.ts +40 -0
- package/src/parser/rules/block/definition-list/index.ts +63 -0
- package/src/parser/rules/block/definition-list/item-key.ts +95 -0
- package/src/parser/rules/block/definition-list/item-value.ts +56 -0
- package/src/parser/rules/block/definition-list/items.ts +54 -0
- package/src/parser/rules/block/div/body.ts +41 -0
- package/src/parser/rules/block/div/close.ts +41 -0
- package/src/parser/rules/block/div/failed.ts +117 -0
- package/src/parser/rules/block/div/index.ts +112 -0
- package/src/parser/rules/block/div/nesting.ts +37 -0
- package/src/parser/rules/block/div/open.ts +59 -0
- package/src/parser/rules/block/div/paragraph-strip.ts +44 -0
- package/src/parser/rules/block/embed-block/content.ts +53 -0
- package/src/parser/rules/block/embed-block/index.ts +91 -0
- package/src/parser/rules/block/embed-block/open.ts +52 -0
- package/src/parser/rules/block/embed-block/tags.ts +5 -0
- package/src/parser/rules/block/footnoteblock/attributes.ts +73 -0
- package/src/parser/rules/block/footnoteblock/index.ts +82 -0
- package/src/parser/rules/block/footnoteblock/open.ts +53 -0
- package/src/parser/rules/block/heading/index.ts +87 -0
- package/src/parser/rules/block/heading/open.ts +50 -0
- package/src/parser/rules/block/heading/toc-text.ts +26 -0
- package/src/parser/rules/block/{horizontal-rule.ts → horizontal-rule/index.ts} +4 -21
- package/src/parser/rules/block/horizontal-rule/syntax.ts +21 -0
- package/src/parser/rules/block/html/body.ts +114 -0
- package/src/parser/rules/block/html/diagnostics.ts +11 -0
- package/src/parser/rules/block/html/index.ts +95 -0
- package/src/parser/rules/block/html/open.ts +36 -0
- package/src/parser/rules/block/iframe/attributes.ts +106 -0
- package/src/parser/rules/block/iframe/index.ts +73 -0
- package/src/parser/rules/block/iframe/open.ts +58 -0
- package/src/parser/rules/block/iframe/source.ts +24 -0
- package/src/parser/rules/block/iframe/url.ts +38 -0
- package/src/parser/rules/block/iftags/body.ts +48 -0
- package/src/parser/rules/block/iftags/condition.ts +24 -0
- package/src/parser/rules/block/{iftags.ts → iftags/index.ts} +16 -58
- package/src/parser/rules/block/include/arguments.ts +48 -0
- package/src/parser/rules/block/include/index.ts +75 -0
- package/src/parser/rules/block/include/location.ts +24 -0
- package/src/parser/rules/block/include/variables.ts +37 -0
- package/src/parser/rules/block/list/index.ts +73 -0
- package/src/parser/rules/block/list/line.ts +77 -0
- package/src/parser/rules/block/list/native.ts +89 -0
- package/src/parser/rules/block/math/content.ts +54 -0
- package/src/parser/rules/block/math/index.ts +106 -0
- package/src/parser/rules/block/math/name.ts +35 -0
- package/src/parser/rules/block/module/body.ts +92 -0
- package/src/parser/rules/block/module/element.ts +33 -0
- package/src/parser/rules/block/module/include/directive.ts +91 -0
- package/src/parser/rules/block/module/include/index.ts +11 -2
- package/src/parser/rules/block/module/include/references.ts +42 -0
- package/src/parser/rules/block/module/include/resolve/cache.ts +44 -0
- package/src/parser/rules/block/module/include/resolve/index.ts +106 -0
- package/src/parser/rules/block/module/include/resolve/iterate.ts +202 -0
- package/src/parser/rules/block/module/include/resolve/replace.ts +31 -0
- package/src/parser/rules/block/module/include/resolve/types.ts +105 -0
- package/src/parser/rules/block/module/include/scanner.ts +121 -0
- package/src/parser/rules/block/module/index.ts +14 -2
- package/src/parser/rules/block/module/listpages/compiler.ts +12 -392
- package/src/parser/rules/block/module/listpages/extract.ts +25 -359
- package/src/parser/rules/block/module/listpages/extraction/listpages.ts +42 -0
- package/src/parser/rules/block/module/listpages/extraction/listusers.ts +30 -0
- package/src/parser/rules/block/module/listpages/extraction/query.ts +51 -0
- package/src/parser/rules/block/module/listpages/extraction/result.ts +18 -0
- package/src/parser/rules/block/module/listpages/extraction/template.ts +96 -0
- package/src/parser/rules/block/module/listpages/extraction/variables.ts +58 -0
- package/src/parser/rules/block/module/listpages/normalization/date-selector.ts +53 -0
- package/src/parser/rules/block/module/listpages/normalization/numeric-selector.ts +32 -0
- package/src/parser/rules/block/module/listpages/normalization/order-parent.ts +82 -0
- package/src/parser/rules/block/module/listpages/normalization/selectors.ts +2 -0
- package/src/parser/rules/block/module/listpages/normalization/tags-category.ts +86 -0
- package/src/parser/rules/block/module/listpages/normalize.ts +8 -324
- package/src/parser/rules/block/module/listpages/resolution/items.ts +43 -0
- package/src/parser/rules/block/module/listpages/resolution/wrapper.ts +42 -0
- package/src/parser/rules/block/module/listpages/resolve.ts +5 -75
- package/src/parser/rules/block/module/listpages/template/format/content.ts +41 -0
- package/src/parser/rules/block/module/listpages/template/format/date.ts +116 -0
- package/src/parser/rules/block/module/listpages/template/format/index.ts +4 -0
- package/src/parser/rules/block/module/listpages/template/format/tags.ts +7 -0
- package/src/parser/rules/block/module/listpages/template/format/user.ts +9 -0
- package/src/parser/rules/block/module/listpages/template/getters/index.ts +36 -0
- package/src/parser/rules/block/module/listpages/template/getters/parameterized.ts +60 -0
- package/src/parser/rules/block/module/listpages/template/getters/simple.ts +65 -0
- package/src/parser/rules/block/module/listpages/template/getters/types.ts +3 -0
- package/src/parser/rules/block/module/listpages/template/syntax.ts +97 -0
- package/src/parser/rules/block/module/listpages/types/data-fetcher.ts +15 -0
- package/src/parser/rules/block/module/listpages/types/data-requirements.ts +52 -0
- package/src/parser/rules/block/module/listpages/types/external-data.ts +77 -0
- package/src/parser/rules/block/module/listpages/types/index.ts +17 -0
- package/src/parser/rules/block/module/listpages/types/normalized-query.ts +120 -0
- package/src/parser/rules/block/module/listpages/types/query.ts +67 -0
- package/src/parser/rules/block/module/listpages/types/template.ts +17 -0
- package/src/parser/rules/block/module/listpages/types/variables.ts +69 -0
- package/src/parser/rules/block/module/listpages/url-resolution/fields.ts +48 -0
- package/src/parser/rules/block/module/listpages/url-resolution/params.ts +19 -0
- package/src/parser/rules/block/module/listpages/url-resolution/query.ts +24 -0
- package/src/parser/rules/block/module/listpages/url-resolution/resolve.ts +53 -0
- package/src/parser/rules/block/module/listpages/url-resolution/value.ts +25 -0
- package/src/parser/rules/block/module/listpages/url-resolver.ts +3 -160
- package/src/parser/rules/block/module/listusers/compiler.ts +4 -25
- package/src/parser/rules/block/module/listusers/extract.ts +4 -9
- package/src/parser/rules/block/module/listusers/getters.ts +21 -0
- package/src/parser/rules/block/module/listusers/variables.ts +15 -0
- package/src/parser/rules/block/module/open.ts +57 -0
- package/src/parser/rules/block/module/resolution/contexts.ts +78 -0
- package/src/parser/rules/block/module/resolution/data-maps.ts +39 -0
- package/src/parser/rules/block/module/resolution/dynamic-modules.ts +93 -0
- package/src/parser/rules/block/module/resolution/styles.ts +53 -0
- package/src/parser/rules/block/module/resolution/walk-resolve.ts +107 -0
- package/src/parser/rules/block/module/resolve.ts +79 -292
- package/src/parser/rules/block/module/rule.ts +56 -0
- package/src/parser/rules/block/module/types-common.ts +11 -0
- package/src/parser/rules/block/module/walk/children.ts +35 -0
- package/src/parser/rules/block/module/walk/index.ts +9 -0
- package/src/parser/rules/block/module/walk/map/index.ts +2 -0
- package/src/parser/rules/block/module/walk/map/stateful-definition-list.ts +25 -0
- package/src/parser/rules/block/module/walk/map/stateful-list.ts +40 -0
- package/src/parser/rules/block/module/walk/map/stateful-table.ts +23 -0
- package/src/parser/rules/block/module/walk/map/stateful-tabs.ts +19 -0
- package/src/parser/rules/block/module/walk/map/stateful.ts +71 -0
- package/src/parser/rules/block/module/walk/map/stateless-definition-list.ts +12 -0
- package/src/parser/rules/block/module/walk/map/stateless-list.ts +29 -0
- package/src/parser/rules/block/module/walk/map/stateless-table.ts +11 -0
- package/src/parser/rules/block/module/walk/map/stateless-tabs.ts +5 -0
- package/src/parser/rules/block/module/walk/map/stateless.ts +51 -0
- package/src/parser/rules/block/module/walk/map/types.ts +6 -0
- package/src/parser/rules/block/module/walk/traverse.ts +65 -0
- package/src/parser/rules/block/orphan-li/content.ts +60 -0
- package/src/parser/rules/block/orphan-li/index.ts +75 -0
- package/src/parser/rules/block/orphan-li/open.ts +25 -0
- package/src/parser/rules/block/orphan-li/tags.ts +40 -0
- package/src/parser/rules/block/paragraph/content.ts +12 -0
- package/src/parser/rules/block/paragraph/index.ts +60 -0
- package/src/parser/rules/block/paragraph/normalize.ts +52 -0
- package/src/parser/rules/block/paragraph/span-markers.ts +52 -0
- package/src/parser/rules/block/parsing/attributes/index.ts +32 -0
- package/src/parser/rules/block/parsing/attributes/names.ts +93 -0
- package/src/parser/rules/block/parsing/attributes/scanner.ts +75 -0
- package/src/parser/rules/block/parsing/attributes/values.ts +26 -0
- package/src/parser/rules/block/parsing/block-item.ts +29 -0
- package/src/parser/rules/block/parsing/content.ts +127 -0
- package/src/parser/rules/block/parsing/end-condition.ts +51 -0
- package/src/parser/rules/block/parsing/inline-content.ts +105 -0
- package/src/parser/rules/block/parsing/inline-newline.ts +41 -0
- package/src/parser/rules/block/parsing/non-boundary.ts +24 -0
- package/src/parser/rules/block/parsing/rule-dispatch.ts +44 -0
- package/src/parser/rules/block/table/index.ts +80 -0
- package/src/parser/rules/block/table/pipe/cell-start.ts +69 -0
- package/src/parser/rules/block/table/pipe/cell.ts +106 -0
- package/src/parser/rules/block/table/pipe/index.ts +2 -0
- package/src/parser/rules/block/table/pipe/row.ts +88 -0
- package/src/parser/rules/block/table/pipe/tokens.ts +14 -0
- package/src/parser/rules/block/table/pipe/trim.ts +50 -0
- package/src/parser/rules/block/table-block/body.ts +79 -0
- package/src/parser/rules/block/table-block/cell-attributes.ts +33 -0
- package/src/parser/rules/block/table-block/cell-boundary.ts +99 -0
- package/src/parser/rules/block/table-block/cell-content/index.ts +88 -0
- package/src/parser/rules/block/table-block/cell-content/segments.ts +134 -0
- package/src/parser/rules/block/table-block/cell-newline.ts +47 -0
- package/src/parser/rules/block/table-block/cell.ts +64 -0
- package/src/parser/rules/block/table-block/index.ts +113 -0
- package/src/parser/rules/block/table-block/row-boundary.ts +75 -0
- package/src/parser/rules/block/table-block/structure.ts +80 -0
- package/src/parser/rules/block/tabview/body.ts +64 -0
- package/src/parser/rules/block/tabview/index.ts +90 -0
- package/src/parser/rules/block/tabview/open.ts +50 -0
- package/src/parser/rules/block/tabview/tab.ts +92 -0
- package/src/parser/rules/block/tabview/tags.ts +30 -0
- package/src/parser/rules/block/toc/element.ts +11 -0
- package/src/parser/rules/block/toc/index.ts +44 -0
- package/src/parser/rules/block/toc/open.ts +84 -0
- package/src/parser/rules/block/utils.ts +10 -610
- package/src/parser/rules/{utils.ts → common/attribute-safety.ts} +3 -49
- package/src/parser/rules/common/block-name.ts +33 -0
- package/src/parser/rules/common/index.ts +2 -0
- package/src/parser/rules/contracts/index.ts +3 -0
- package/src/parser/rules/contracts/parse-context.ts +38 -0
- package/src/parser/rules/contracts/rule.ts +43 -0
- package/src/parser/rules/contracts/scope.ts +31 -0
- package/src/parser/rules/inline/anchor/attributes.ts +54 -0
- package/src/parser/rules/inline/anchor/child.ts +26 -0
- package/src/parser/rules/inline/anchor/close.ts +34 -0
- package/src/parser/rules/inline/anchor/content.ts +59 -0
- package/src/parser/rules/inline/anchor/index.ts +103 -0
- package/src/parser/rules/inline/anchor/newline.ts +26 -0
- package/src/parser/rules/inline/anchor/open.ts +47 -0
- package/src/parser/rules/inline/anchor/paragraph-strip.ts +14 -0
- package/src/parser/rules/inline/anchor/syntax.ts +40 -0
- package/src/parser/rules/inline/anchor-name/index.ts +38 -0
- package/src/parser/rules/inline/anchor-name/name.ts +39 -0
- package/src/parser/rules/inline/anchor-name/syntax.ts +46 -0
- package/src/parser/rules/inline/bibcite/element.ts +14 -0
- package/src/parser/rules/inline/bibcite/index.ts +34 -0
- package/src/parser/rules/inline/bibcite/syntax.ts +64 -0
- package/src/parser/rules/inline/bold.ts +2 -39
- package/src/parser/rules/inline/color/index.ts +35 -0
- package/src/parser/rules/inline/color/syntax.ts +69 -0
- package/src/parser/rules/inline/comment/consume.ts +31 -0
- package/src/parser/rules/inline/{comment.ts → comment/index.ts} +10 -36
- package/src/parser/rules/inline/equation-ref/element.ts +8 -0
- package/src/parser/rules/inline/equation-ref/index.ts +34 -0
- package/src/parser/rules/inline/equation-ref/syntax.ts +45 -0
- package/src/parser/rules/inline/expr/branch.ts +104 -0
- package/src/parser/rules/inline/expr/conditional-branch.ts +27 -0
- package/src/parser/rules/inline/expr/conditional.ts +80 -0
- package/src/parser/rules/inline/expr/depth.ts +25 -0
- package/src/parser/rules/inline/expr/elements.ts +39 -0
- package/src/parser/rules/inline/expr/index.ts +84 -0
- package/src/parser/rules/inline/expr/syntax.ts +45 -0
- package/src/parser/rules/inline/footnote/child.ts +22 -0
- package/src/parser/rules/inline/footnote/close.ts +33 -0
- package/src/parser/rules/inline/footnote/content.ts +54 -0
- package/src/parser/rules/inline/footnote/elements.ts +38 -0
- package/src/parser/rules/inline/footnote/index.ts +54 -0
- package/src/parser/rules/inline/footnote/newline.ts +27 -0
- package/src/parser/rules/inline/footnote/open.ts +38 -0
- package/src/parser/rules/inline/formatting/container.ts +50 -0
- package/src/parser/rules/inline/{guillemet.ts → guillemet/index.ts} +5 -13
- package/src/parser/rules/inline/guillemet/text.ts +11 -0
- package/src/parser/rules/inline/html/gate.ts +64 -0
- package/src/parser/rules/inline/{html.ts → html/index.ts} +9 -60
- package/src/parser/rules/inline/html/open.ts +37 -0
- package/src/parser/rules/inline/image/attributes.ts +22 -0
- package/src/parser/rules/inline/image/body.ts +36 -0
- package/src/parser/rules/inline/image/index.ts +89 -0
- package/src/parser/rules/inline/image/open.ts +56 -0
- package/src/parser/rules/inline/image/source.ts +62 -0
- package/src/parser/rules/inline/image/syntax.ts +76 -0
- package/src/parser/rules/inline/italic.ts +2 -30
- package/src/parser/rules/inline/line-break/backslash.ts +58 -0
- package/src/parser/rules/inline/line-break/elements.ts +9 -0
- package/src/parser/rules/inline/line-break/index.ts +3 -0
- package/src/parser/rules/inline/line-break/newline.ts +82 -0
- package/src/parser/rules/inline/line-break/underscore.ts +45 -0
- package/src/parser/rules/inline/link-anchor.ts +6 -81
- package/src/parser/rules/inline/link-bracket/anchor.ts +3 -0
- package/src/parser/rules/inline/link-bracket/direct-url.ts +5 -0
- package/src/parser/rules/inline/link-bracket/parsed.ts +81 -0
- package/src/parser/rules/inline/link-bracket/parts.ts +64 -0
- package/src/parser/rules/inline/link-bracket/prefix.ts +15 -0
- package/src/parser/rules/inline/link-single.ts +7 -98
- package/src/parser/rules/inline/link-star.ts +7 -69
- package/src/parser/rules/inline/link-triple/fallback.ts +10 -0
- package/src/parser/rules/inline/link-triple/index.ts +62 -0
- package/src/parser/rules/inline/link-triple/interwiki.ts +11 -0
- package/src/parser/rules/inline/link-triple/label.ts +35 -0
- package/src/parser/rules/inline/link-triple/syntax.ts +72 -0
- package/src/parser/rules/inline/link-triple/target.ts +36 -0
- package/src/parser/rules/inline/math-inline/index.ts +40 -0
- package/src/parser/rules/inline/math-inline/syntax.ts +55 -0
- package/src/parser/rules/inline/monospace.ts +2 -30
- package/src/parser/rules/inline/parsing/block-boundary.ts +42 -0
- package/src/parser/rules/inline/parsing/block-start-predicates.ts +117 -0
- package/src/parser/rules/inline/parsing/collect.ts +23 -0
- package/src/parser/rules/inline/parsing/inline-content.ts +115 -0
- package/src/parser/rules/inline/parsing/paragraph-boundary.ts +47 -0
- package/src/parser/rules/inline/parsing/plain-text.ts +69 -0
- package/src/parser/rules/inline/parsing/preserved-line-break.ts +11 -0
- package/src/parser/rules/inline/parsing/rules.ts +34 -0
- package/src/parser/rules/inline/parsing/simple-token.ts +26 -0
- package/src/parser/rules/inline/raw/angle.ts +40 -0
- package/src/parser/rules/inline/raw/double-at.ts +78 -0
- package/src/parser/rules/inline/raw/index.ts +26 -0
- package/src/parser/rules/inline/raw/result.ts +26 -0
- package/src/parser/rules/inline/size/content.ts +65 -0
- package/src/parser/rules/inline/size/index.ts +55 -0
- package/src/parser/rules/inline/size/open.ts +43 -0
- package/src/parser/rules/inline/size/value.ts +45 -0
- package/src/parser/rules/inline/span/content.ts +97 -0
- package/src/parser/rules/inline/span/elements.ts +108 -0
- package/src/parser/rules/inline/span/index.ts +79 -0
- package/src/parser/rules/inline/span/newline.ts +50 -0
- package/src/parser/rules/inline/span/syntax.ts +70 -0
- package/src/parser/rules/inline/{strikethrough.ts → strikethrough/index.ts} +5 -60
- package/src/parser/rules/inline/strikethrough/parse.ts +14 -0
- package/src/parser/rules/inline/strikethrough/syntax.ts +24 -0
- package/src/parser/rules/inline/subscript.ts +2 -39
- package/src/parser/rules/inline/superscript.ts +4 -39
- package/src/parser/rules/inline/text/element.ts +5 -0
- package/src/parser/rules/inline/{text.ts → text/index.ts} +5 -4
- package/src/parser/rules/inline/underline/child.ts +26 -0
- package/src/parser/rules/inline/underline/content.ts +29 -0
- package/src/parser/rules/inline/{underline.ts → underline/index.ts} +6 -49
- package/src/parser/rules/inline/user/element.ts +11 -0
- package/src/parser/rules/inline/user/index.ts +34 -0
- package/src/parser/rules/inline/user/syntax.ts +67 -0
- package/src/parser/rules/inline/utils.ts +4 -344
- package/src/parser/rules/tokens.ts +106 -0
- package/src/parser/rules/types.ts +9 -252
- package/src/parser/depth.ts +0 -251
- package/src/parser/parse.ts +0 -315
- package/src/parser/postprocess/spanStrip.ts +0 -697
- package/src/parser/preprocess/expr.ts +0 -265
- package/src/parser/preprocess/utils.ts +0 -250
- package/src/parser/preprocess/whitespace.ts +0 -111
- package/src/parser/rules/block/align.ts +0 -282
- package/src/parser/rules/block/bibliography.ts +0 -359
- package/src/parser/rules/block/block-list.ts +0 -689
- package/src/parser/rules/block/blockquote.ts +0 -238
- package/src/parser/rules/block/code.ts +0 -187
- package/src/parser/rules/block/collapsible.ts +0 -337
- package/src/parser/rules/block/definition-list.ts +0 -270
- package/src/parser/rules/block/div.ts +0 -400
- package/src/parser/rules/block/embed-block.ts +0 -153
- package/src/parser/rules/block/footnoteblock.ts +0 -200
- package/src/parser/rules/block/heading.ts +0 -142
- package/src/parser/rules/block/html.ts +0 -222
- package/src/parser/rules/block/iframe.ts +0 -239
- package/src/parser/rules/block/include.ts +0 -179
- package/src/parser/rules/block/list.ts +0 -244
- package/src/parser/rules/block/math.ts +0 -183
- package/src/parser/rules/block/module/include/resolve.ts +0 -556
- package/src/parser/rules/block/module/listpages/types.ts +0 -513
- package/src/parser/rules/block/module/walk.ts +0 -380
- package/src/parser/rules/block/module.ts +0 -164
- package/src/parser/rules/block/orphan-li.ts +0 -177
- package/src/parser/rules/block/paragraph.ts +0 -157
- package/src/parser/rules/block/table-block.ts +0 -726
- package/src/parser/rules/block/table.ts +0 -441
- package/src/parser/rules/block/tabview.ts +0 -331
- package/src/parser/rules/block/toc.ts +0 -129
- package/src/parser/rules/inline/anchor-name.ts +0 -154
- package/src/parser/rules/inline/anchor.ts +0 -327
- package/src/parser/rules/inline/bibcite.ts +0 -153
- package/src/parser/rules/inline/color.ts +0 -140
- package/src/parser/rules/inline/equation-ref.ts +0 -115
- package/src/parser/rules/inline/expr.ts +0 -526
- package/src/parser/rules/inline/footnote.ts +0 -223
- package/src/parser/rules/inline/image.ts +0 -328
- package/src/parser/rules/inline/line-break.ts +0 -326
- package/src/parser/rules/inline/link-triple.ts +0 -267
- package/src/parser/rules/inline/math-inline.ts +0 -126
- package/src/parser/rules/inline/raw.ts +0 -262
- package/src/parser/rules/inline/size.ts +0 -244
- package/src/parser/rules/inline/span.ts +0 -424
- package/src/parser/rules/inline/user.ts +0 -147
|
@@ -1,326 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
*
|
|
3
|
-
* Parses the various Wikidot line-break syntaxes.
|
|
4
|
-
*
|
|
5
|
-
* Wikidot supports three distinct mechanisms for producing `<br />` elements:
|
|
6
|
-
*
|
|
7
|
-
* 1. Implicit newline: a single `NEWLINE` token within a paragraph
|
|
8
|
-
* becomes a `<br />`, unless it precedes a block-level element
|
|
9
|
-
* (heading, list, blockquote, etc.) or another newline (paragraph break).
|
|
10
|
-
*
|
|
11
|
-
* 2. Backslash at end of line: `\` followed by newline. The preprocessor
|
|
12
|
-
* converts `\\\n` to a `BACKSLASH_BREAK` token (U+E000), which this
|
|
13
|
-
* rule then handles. Wikidot preserves a space after the line break
|
|
14
|
-
* in this case.
|
|
15
|
-
*
|
|
16
|
-
* 3. Underscore at end of line: ` _` followed by newline, or `_` at the
|
|
17
|
-
* start of a line followed by newline. This is a more explicit
|
|
18
|
-
* line-break syntax.
|
|
19
|
-
*
|
|
20
|
-
* All three rules mark their line-break elements with `_preservedTrailingBreak`
|
|
21
|
-
* when the break was explicitly requested (backslash or underscore syntax),
|
|
22
|
-
* so the paragraph postprocessor knows not to strip trailing breaks.
|
|
23
|
-
*
|
|
24
|
-
* The newline rule suppresses line-breaks in several situations to avoid
|
|
25
|
-
* spurious `<br />` elements before block-level constructs.
|
|
26
|
-
*
|
|
27
|
-
* @module
|
|
28
|
-
*/
|
|
29
|
-
import type { Element } from "@wdprlib/ast";
|
|
30
|
-
import type { InlineRule, ParseContext, RuleResult } from "../types";
|
|
31
|
-
import type { TokenType } from "../../../lexer";
|
|
32
|
-
|
|
33
|
-
/**
|
|
34
|
-
* Token types that indicate the start of a block-level element.
|
|
35
|
-
*
|
|
36
|
-
* When a NEWLINE is followed (after optional whitespace) by one of
|
|
37
|
-
* these token types, the newline line-break rule suppresses the
|
|
38
|
-
* `<br />` to prevent extra whitespace before block elements.
|
|
39
|
-
*/
|
|
40
|
-
const BLOCK_START_TOKENS: TokenType[] = [
|
|
41
|
-
"BLOCKQUOTE_MARKER", // >
|
|
42
|
-
"LIST_BULLET", // *
|
|
43
|
-
"LIST_NUMBER", // #
|
|
44
|
-
"HEADING_MARKER", // + ++ +++
|
|
45
|
-
"HR_MARKER", // ----
|
|
46
|
-
"TABLE_MARKER", // ||
|
|
47
|
-
];
|
|
48
|
-
|
|
49
|
-
/**
|
|
50
|
-
* Checks whether a token type represents the start of a block-level element.
|
|
51
|
-
*
|
|
52
|
-
* @param type - The token type to check
|
|
53
|
-
* @returns `true` if the token type is in the {@link BLOCK_START_TOKENS} list
|
|
54
|
-
*/
|
|
55
|
-
function isBlockStartToken(type: TokenType): boolean {
|
|
56
|
-
return BLOCK_START_TOKENS.includes(type);
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
/**
|
|
60
|
-
* Inline rule for implicit newline-to-line-break conversion.
|
|
61
|
-
*
|
|
62
|
-
* A single `NEWLINE` token within inline content typically becomes a
|
|
63
|
-
* `<br />` element. However, the line break is suppressed in several
|
|
64
|
-
* situations to match Wikidot's behavior:
|
|
65
|
-
*
|
|
66
|
-
* - End of input (no meaningful token follows)
|
|
67
|
-
* - Another NEWLINE follows (this is a paragraph break, not a line break)
|
|
68
|
-
* - A valid block-start token follows at line start (heading, list, etc.)
|
|
69
|
-
* - A `BACKSLASH_BREAK` token follows (the backslash rule handles the break)
|
|
70
|
-
*
|
|
71
|
-
* Additional validation is performed for heading and list markers to ensure
|
|
72
|
-
* they actually form valid block structures (e.g. a heading marker of 7+
|
|
73
|
-
* characters is not a valid heading).
|
|
74
|
-
*/
|
|
75
|
-
export const newlineLineBreakRule: InlineRule = {
|
|
76
|
-
name: "newlineLineBreak",
|
|
77
|
-
startTokens: ["NEWLINE"],
|
|
78
|
-
|
|
79
|
-
/**
|
|
80
|
-
* Attempts to convert a NEWLINE token into a line-break element.
|
|
81
|
-
*
|
|
82
|
-
* @param ctx - Parse context with token stream and current position
|
|
83
|
-
* @returns A successful result with either a `"line-break"` element or
|
|
84
|
-
* an empty array (when the break is suppressed)
|
|
85
|
-
*/
|
|
86
|
-
parse(ctx: ParseContext): RuleResult<Element> {
|
|
87
|
-
const currentTok = ctx.tokens[ctx.pos];
|
|
88
|
-
if (!currentTok || currentTok.type !== "NEWLINE") {
|
|
89
|
-
return { success: false };
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
// Check what comes after the newline
|
|
93
|
-
let lookAhead = 1;
|
|
94
|
-
|
|
95
|
-
// Skip optional whitespace
|
|
96
|
-
while (ctx.tokens[ctx.pos + lookAhead]?.type === "WHITESPACE") {
|
|
97
|
-
lookAhead++;
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
const nextMeaningfulToken = ctx.tokens[ctx.pos + lookAhead];
|
|
101
|
-
|
|
102
|
-
// Check if HEADING_MARKER would actually form a valid heading
|
|
103
|
-
// Block-start tokens are only valid when at actual line start
|
|
104
|
-
let isValidBlock = isBlockStartToken(nextMeaningfulToken?.type as TokenType);
|
|
105
|
-
if (isValidBlock && !nextMeaningfulToken?.lineStart) {
|
|
106
|
-
isValidBlock = false;
|
|
107
|
-
}
|
|
108
|
-
if (isValidBlock && nextMeaningfulToken?.type === "HEADING_MARKER") {
|
|
109
|
-
const markerLen = nextMeaningfulToken.value.length;
|
|
110
|
-
const afterPos = ctx.pos + lookAhead + 1;
|
|
111
|
-
const afterMarker = ctx.tokens[afterPos];
|
|
112
|
-
if (markerLen > 6) {
|
|
113
|
-
isValidBlock = false;
|
|
114
|
-
} else if (afterMarker?.type === "STAR") {
|
|
115
|
-
if (ctx.tokens[afterPos + 1]?.type !== "WHITESPACE") isValidBlock = false;
|
|
116
|
-
} else if (afterMarker?.type !== "WHITESPACE") {
|
|
117
|
-
isValidBlock = false;
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
// Check if there's a BACKSLASH_BREAK ahead (skip whitespace)
|
|
122
|
-
// Pattern: NEWLINE + WHITESPACE? + BACKSLASH_BREAK
|
|
123
|
-
// In this case, the BACKSLASH_BREAK rule will handle the line-break
|
|
124
|
-
let hasBackslashBreak = false;
|
|
125
|
-
{
|
|
126
|
-
let ahead = 1;
|
|
127
|
-
while (ctx.tokens[ctx.pos + ahead]?.type === "WHITESPACE") {
|
|
128
|
-
ahead++;
|
|
129
|
-
}
|
|
130
|
-
if (ctx.tokens[ctx.pos + ahead]?.type === "BACKSLASH_BREAK") {
|
|
131
|
-
hasBackslashBreak = true;
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
// Skip line-break if:
|
|
136
|
-
// - End of input
|
|
137
|
-
// - Another NEWLINE (paragraph break will handle this)
|
|
138
|
-
// - Valid block start token
|
|
139
|
-
// - BACKSLASH_BREAK ahead (that rule will create the line-break)
|
|
140
|
-
if (
|
|
141
|
-
!nextMeaningfulToken ||
|
|
142
|
-
nextMeaningfulToken.type === "EOF" ||
|
|
143
|
-
nextMeaningfulToken.type === "NEWLINE" ||
|
|
144
|
-
isValidBlock ||
|
|
145
|
-
hasBackslashBreak
|
|
146
|
-
) {
|
|
147
|
-
// Don't generate line-break, return empty array
|
|
148
|
-
return {
|
|
149
|
-
success: true,
|
|
150
|
-
elements: [],
|
|
151
|
-
consumed: 1,
|
|
152
|
-
};
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
return {
|
|
156
|
-
success: true,
|
|
157
|
-
elements: [{ element: "line-break" }],
|
|
158
|
-
consumed: 1,
|
|
159
|
-
};
|
|
160
|
-
},
|
|
161
|
-
};
|
|
162
|
-
|
|
163
|
-
/**
|
|
164
|
-
* Inline rule for backslash-at-end-of-line line breaks.
|
|
165
|
-
*
|
|
166
|
-
* In Wikidot, a backslash at the end of a line (`\` followed by newline)
|
|
167
|
-
* creates a line break. The preprocessor converts this `\\\n` sequence
|
|
168
|
-
* into a special `BACKSLASH_BREAK` token (U+E000).
|
|
169
|
-
*
|
|
170
|
-
* This rule handles two token patterns:
|
|
171
|
-
* - `WHITESPACE + BACKSLASH_BREAK`: produces a line-break followed by a
|
|
172
|
-
* space text element (Wikidot preserves the space after the break)
|
|
173
|
-
* - Standalone `BACKSLASH_BREAK`: produces only a line-break
|
|
174
|
-
*
|
|
175
|
-
* A special case exists when the backslash break is followed by an
|
|
176
|
-
* underscore line-break pattern (` _\n`): in that case, the trailing
|
|
177
|
-
* space is omitted to avoid doubled spacing.
|
|
178
|
-
*
|
|
179
|
-
* All line-break elements produced by this rule are marked with
|
|
180
|
-
* `_preservedTrailingBreak = true` so the paragraph postprocessor
|
|
181
|
-
* does not strip them.
|
|
182
|
-
*/
|
|
183
|
-
export const backslashLineBreakRule: InlineRule = {
|
|
184
|
-
name: "backslashLineBreak",
|
|
185
|
-
startTokens: ["WHITESPACE", "BACKSLASH_BREAK"],
|
|
186
|
-
|
|
187
|
-
/**
|
|
188
|
-
* Attempts to parse a backslash line break at the current position.
|
|
189
|
-
*
|
|
190
|
-
* @param ctx - Parse context with token stream and current position
|
|
191
|
-
* @returns A successful result with line-break elements (and possibly a
|
|
192
|
-
* trailing space), or `{ success: false }` if the pattern does not match
|
|
193
|
-
*/
|
|
194
|
-
parse(ctx: ParseContext): RuleResult<Element> {
|
|
195
|
-
const currentTok = ctx.tokens[ctx.pos];
|
|
196
|
-
if (!currentTok) {
|
|
197
|
-
return { success: false };
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
// Pattern: WHITESPACE + BACKSLASH_BREAK → line-break + text(" ")
|
|
201
|
-
// But if followed by underscore line-break pattern, don't include the space
|
|
202
|
-
if (currentTok.type === "WHITESPACE") {
|
|
203
|
-
const nextTok = ctx.tokens[ctx.pos + 1];
|
|
204
|
-
if (nextTok?.type === "BACKSLASH_BREAK") {
|
|
205
|
-
// Check if followed by " _\n" pattern (underscore line-break)
|
|
206
|
-
const afterBreak = ctx.tokens[ctx.pos + 2];
|
|
207
|
-
const afterAfter = ctx.tokens[ctx.pos + 3];
|
|
208
|
-
const afterAfterAfter = ctx.tokens[ctx.pos + 4];
|
|
209
|
-
|
|
210
|
-
const isFollowedByUnderscoreBreak =
|
|
211
|
-
afterBreak?.type === "WHITESPACE" &&
|
|
212
|
-
afterAfter?.type === "UNDERSCORE" &&
|
|
213
|
-
(afterAfterAfter?.type === "NEWLINE" || afterAfterAfter?.type === "EOF");
|
|
214
|
-
|
|
215
|
-
if (isFollowedByUnderscoreBreak) {
|
|
216
|
-
// Don't include the space, let underscore rule handle the rest
|
|
217
|
-
// Mark as explicit line-break to preserve at paragraph end
|
|
218
|
-
const lb: any = { element: "line-break" };
|
|
219
|
-
lb._preservedTrailingBreak = true;
|
|
220
|
-
return {
|
|
221
|
-
success: true,
|
|
222
|
-
elements: [lb],
|
|
223
|
-
consumed: 2,
|
|
224
|
-
};
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
// Mark as explicit line-break to preserve at paragraph end
|
|
228
|
-
const lb: any = { element: "line-break" };
|
|
229
|
-
lb._preservedTrailingBreak = true;
|
|
230
|
-
return {
|
|
231
|
-
success: true,
|
|
232
|
-
elements: [lb, { element: "text", data: " " }],
|
|
233
|
-
consumed: 2,
|
|
234
|
-
};
|
|
235
|
-
}
|
|
236
|
-
return { success: false };
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
// Standalone BACKSLASH_BREAK
|
|
240
|
-
// Mark as explicit line-break to preserve at paragraph end
|
|
241
|
-
if (currentTok.type === "BACKSLASH_BREAK") {
|
|
242
|
-
const lb: any = { element: "line-break" };
|
|
243
|
-
lb._preservedTrailingBreak = true;
|
|
244
|
-
return {
|
|
245
|
-
success: true,
|
|
246
|
-
elements: [lb],
|
|
247
|
-
consumed: 1,
|
|
248
|
-
};
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
return { success: false };
|
|
252
|
-
},
|
|
253
|
-
};
|
|
254
|
-
|
|
255
|
-
/**
|
|
256
|
-
* Inline rule for underscore-at-end-of-line line breaks.
|
|
257
|
-
*
|
|
258
|
-
* Wikidot syntax: ` _` followed by newline (space + underscore + newline),
|
|
259
|
-
* or `_` at the start of a line followed by newline.
|
|
260
|
-
*
|
|
261
|
-
* This rule handles two token patterns:
|
|
262
|
-
* - Pattern 1: `WHITESPACE + UNDERSCORE + NEWLINE/EOF`
|
|
263
|
-
* - Pattern 2: `UNDERSCORE (at lineStart) + NEWLINE/EOF`
|
|
264
|
-
*
|
|
265
|
-
* Both patterns consume the newline as part of the line-break to prevent
|
|
266
|
-
* the newline rule from producing a duplicate break.
|
|
267
|
-
*
|
|
268
|
-
* All line-break elements are marked with `_preservedTrailingBreak = true`
|
|
269
|
-
* so the paragraph postprocessor does not strip them.
|
|
270
|
-
*/
|
|
271
|
-
export const underscoreLineBreakRule: InlineRule = {
|
|
272
|
-
name: "underscoreLineBreak",
|
|
273
|
-
startTokens: ["WHITESPACE", "UNDERSCORE"],
|
|
274
|
-
|
|
275
|
-
/**
|
|
276
|
-
* Attempts to parse an underscore line break at the current position.
|
|
277
|
-
*
|
|
278
|
-
* @param ctx - Parse context with token stream and current position
|
|
279
|
-
* @returns A successful result with a `"line-break"` element,
|
|
280
|
-
* or `{ success: false }` if the pattern does not match
|
|
281
|
-
*/
|
|
282
|
-
parse(ctx: ParseContext): RuleResult<Element> {
|
|
283
|
-
const currentTok = ctx.tokens[ctx.pos];
|
|
284
|
-
if (!currentTok) {
|
|
285
|
-
return { success: false };
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
// Pattern 1: WHITESPACE followed by UNDERSCORE, then NEWLINE
|
|
289
|
-
// Mark as explicit line-break to preserve at paragraph end
|
|
290
|
-
if (currentTok.type === "WHITESPACE") {
|
|
291
|
-
const nextTok = ctx.tokens[ctx.pos + 1];
|
|
292
|
-
const afterTok = ctx.tokens[ctx.pos + 2];
|
|
293
|
-
|
|
294
|
-
if (
|
|
295
|
-
nextTok?.type === "UNDERSCORE" &&
|
|
296
|
-
afterTok &&
|
|
297
|
-
(afterTok.type === "NEWLINE" || afterTok.type === "EOF")
|
|
298
|
-
) {
|
|
299
|
-
const lb: any = { element: "line-break" };
|
|
300
|
-
lb._preservedTrailingBreak = true;
|
|
301
|
-
return {
|
|
302
|
-
success: true,
|
|
303
|
-
elements: [lb],
|
|
304
|
-
consumed: 3, // WHITESPACE + UNDERSCORE + NEWLINE
|
|
305
|
-
};
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
// Pattern 2: UNDERSCORE at start of line, then NEWLINE
|
|
310
|
-
// Mark as explicit line-break to preserve at paragraph end
|
|
311
|
-
if (currentTok.type === "UNDERSCORE" && currentTok.lineStart) {
|
|
312
|
-
const nextTok = ctx.tokens[ctx.pos + 1];
|
|
313
|
-
if (nextTok && (nextTok.type === "NEWLINE" || nextTok.type === "EOF")) {
|
|
314
|
-
const lb: any = { element: "line-break" };
|
|
315
|
-
lb._preservedTrailingBreak = true;
|
|
316
|
-
return {
|
|
317
|
-
success: true,
|
|
318
|
-
elements: [lb],
|
|
319
|
-
consumed: 2, // UNDERSCORE + NEWLINE
|
|
320
|
-
};
|
|
321
|
-
}
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
return { success: false };
|
|
325
|
-
},
|
|
326
|
-
};
|
|
@@ -1,267 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
*
|
|
3
|
-
* Parses the Wikidot triple-bracket link syntax: `[[[target | label]]]`.
|
|
4
|
-
*
|
|
5
|
-
* Triple-bracket links are Wikidot's primary page-linking mechanism.
|
|
6
|
-
* They support several target formats:
|
|
7
|
-
*
|
|
8
|
-
* - Page links: `[[[page-name]]]` or `[[[page-name | Label]]]`
|
|
9
|
-
* - Category pages: `[[[category:page-name]]]` (display shows text after colon)
|
|
10
|
-
* - Anchor links: `[[[#anchor-name]]]`
|
|
11
|
-
* - External URLs: `[[[https://example.com | Label]]]`
|
|
12
|
-
* - Interwiki links: `[[[wikipedia:Article]]]` (for known prefixes)
|
|
13
|
-
*
|
|
14
|
-
* Special syntax:
|
|
15
|
-
* - `[[[*target]]]` -- `*` prefix is stripped from target; for external URLs,
|
|
16
|
-
* adds `target="_blank"` (new tab)
|
|
17
|
-
* - `[[[*|label]]]` -- links to root `/` with the given label
|
|
18
|
-
* - `[[[page|]]]` -- empty label after pipe defaults to the page name
|
|
19
|
-
*
|
|
20
|
-
* Multi-line support: a single newline within the link is converted to
|
|
21
|
-
* a space (in both target and label portions), but a double newline
|
|
22
|
-
* (paragraph break) or a newline directly before `]]]` invalidates the link.
|
|
23
|
-
*
|
|
24
|
-
* When the opening `[[[` has no valid closing `]]]`, it falls through
|
|
25
|
-
* as literal text rather than failing.
|
|
26
|
-
*
|
|
27
|
-
* Produces a `"link"` AST element with an appropriate `type` field
|
|
28
|
-
* (`"page"`, `"anchor"`, `"direct"`, or `"interwiki"`).
|
|
29
|
-
*
|
|
30
|
-
* @module
|
|
31
|
-
*/
|
|
32
|
-
import type { Element, LinkType, LinkLocation, LinkLabel } from "@wdprlib/ast";
|
|
33
|
-
import type { InlineRule, ParseContext, RuleResult } from "../types";
|
|
34
|
-
import { currentToken } from "../types";
|
|
35
|
-
|
|
36
|
-
/**
|
|
37
|
-
* Scans ahead to check whether a valid `LINK_CLOSE` (`]]]`) token
|
|
38
|
-
* exists, respecting Wikidot's multiline link rules.
|
|
39
|
-
*
|
|
40
|
-
* Allows at most one newline within the link content (typically after
|
|
41
|
-
* the pipe separator). Rejects the link if:
|
|
42
|
-
* - A double newline (paragraph break) is found
|
|
43
|
-
* - A newline appears directly before the closing `]]]`
|
|
44
|
-
* - EOF is reached without finding `]]]`
|
|
45
|
-
*
|
|
46
|
-
* @param ctx - The current parse context
|
|
47
|
-
* @param startPos - Token index at which to begin scanning (after `[[[`)
|
|
48
|
-
* @returns `true` if a valid closing `]]]` is found
|
|
49
|
-
*/
|
|
50
|
-
function hasClosingLinkMarker(ctx: ParseContext, startPos: number): boolean {
|
|
51
|
-
let pos = startPos;
|
|
52
|
-
while (pos < ctx.tokens.length) {
|
|
53
|
-
const token = ctx.tokens[pos];
|
|
54
|
-
if (!token || token.type === "EOF") {
|
|
55
|
-
return false;
|
|
56
|
-
}
|
|
57
|
-
if (token.type === "LINK_CLOSE") {
|
|
58
|
-
return true;
|
|
59
|
-
}
|
|
60
|
-
// Allow at most one newline, but not directly before LINK_CLOSE
|
|
61
|
-
if (token.type === "NEWLINE") {
|
|
62
|
-
const next = ctx.tokens[pos + 1];
|
|
63
|
-
if (next?.type === "NEWLINE") {
|
|
64
|
-
return false; // Double newline = paragraph break
|
|
65
|
-
}
|
|
66
|
-
// Newline directly before close = invalid
|
|
67
|
-
if (next?.type === "LINK_CLOSE") {
|
|
68
|
-
return false;
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
pos++;
|
|
72
|
-
}
|
|
73
|
-
return false;
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
/**
|
|
77
|
-
* Inline rule for parsing `[[[target | label]]]` triple-bracket links.
|
|
78
|
-
*
|
|
79
|
-
* Triggered by a `LINK_OPEN` (`[[[`) token. Collects the target string
|
|
80
|
-
* and optional pipe-separated label, then determines the link type
|
|
81
|
-
* (page, anchor, direct URL, or interwiki) based on the target format.
|
|
82
|
-
*
|
|
83
|
-
* When no valid closing `]]]` is found, the opening `[[[` is emitted
|
|
84
|
-
* as literal text.
|
|
85
|
-
*
|
|
86
|
-
* Edge cases handled:
|
|
87
|
-
* - Empty target with pipe (`[[[|text]]]`) is invalid
|
|
88
|
-
* - Multiple consecutive `#` in the target (`[[[page##anchor]]]`) is invalid
|
|
89
|
-
* - `[[[*|label]]]` links to root `/`
|
|
90
|
-
* - `[[[*target]]]` strips `*`; adds `target="_blank"` for external URLs
|
|
91
|
-
* - Category pages show only the text after the colon when no label is given
|
|
92
|
-
*/
|
|
93
|
-
export const linkTripleRule: InlineRule = {
|
|
94
|
-
name: "linkTriple",
|
|
95
|
-
startTokens: ["LINK_OPEN"],
|
|
96
|
-
|
|
97
|
-
/**
|
|
98
|
-
* Attempts to parse a triple-bracket link at the current position.
|
|
99
|
-
*
|
|
100
|
-
* @param ctx - Parse context with token stream and current position
|
|
101
|
-
* @returns A successful result with a `"link"` element, or a text
|
|
102
|
-
* fallback when the syntax is invalid
|
|
103
|
-
*/
|
|
104
|
-
parse(ctx: ParseContext): RuleResult<Element> {
|
|
105
|
-
const startToken = currentToken(ctx);
|
|
106
|
-
|
|
107
|
-
if (!hasClosingLinkMarker(ctx, ctx.pos + 1)) {
|
|
108
|
-
return {
|
|
109
|
-
success: true,
|
|
110
|
-
elements: [{ element: "text", data: startToken.value }],
|
|
111
|
-
consumed: 1,
|
|
112
|
-
};
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
// Collect tokens until LINK_CLOSE (newlines converted to spaces)
|
|
116
|
-
let target = "";
|
|
117
|
-
let labelText = "";
|
|
118
|
-
let foundPipe = false;
|
|
119
|
-
let consumed = 1; // opening [[[
|
|
120
|
-
let pos = ctx.pos + 1;
|
|
121
|
-
|
|
122
|
-
while (pos < ctx.tokens.length) {
|
|
123
|
-
const token = ctx.tokens[pos];
|
|
124
|
-
if (!token || token.type === "LINK_CLOSE" || token.type === "EOF") {
|
|
125
|
-
break;
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
// Convert newlines to spaces in link content (Wikidot allows single newlines)
|
|
129
|
-
if (token.type === "NEWLINE") {
|
|
130
|
-
if (foundPipe) {
|
|
131
|
-
labelText += " ";
|
|
132
|
-
} else {
|
|
133
|
-
target += " ";
|
|
134
|
-
}
|
|
135
|
-
consumed++;
|
|
136
|
-
pos++;
|
|
137
|
-
continue;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
if (token.type === "PIPE" && !foundPipe) {
|
|
141
|
-
foundPipe = true;
|
|
142
|
-
} else if (foundPipe) {
|
|
143
|
-
labelText += token.value;
|
|
144
|
-
} else {
|
|
145
|
-
target += token.value;
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
consumed++;
|
|
149
|
-
pos++;
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
// Consume closing ]]]
|
|
153
|
-
if (ctx.tokens[pos]?.type === "LINK_CLOSE") {
|
|
154
|
-
consumed++;
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
const trimmedTarget = target.trim();
|
|
158
|
-
|
|
159
|
-
// Invalid: empty target with pipe (e.g., [[[|some-page]]])
|
|
160
|
-
if (trimmedTarget === "" && foundPipe) {
|
|
161
|
-
return {
|
|
162
|
-
success: true,
|
|
163
|
-
elements: [{ element: "text", data: startToken.value }],
|
|
164
|
-
consumed: 1,
|
|
165
|
-
};
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
// Invalid: multiple consecutive # in target (e.g., [[[home###|Home]]], [[[page##anchor]]])
|
|
169
|
-
// Wikidot rejects these as invalid link syntax
|
|
170
|
-
if (/#{2,}/.test(trimmedTarget)) {
|
|
171
|
-
return {
|
|
172
|
-
success: true,
|
|
173
|
-
elements: [{ element: "text", data: startToken.value }],
|
|
174
|
-
consumed: 1,
|
|
175
|
-
};
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
// `*` prefix: stripped from target; sets target="_blank" for external URLs
|
|
179
|
-
let finalTarget = trimmedTarget;
|
|
180
|
-
let hasStar = false;
|
|
181
|
-
if (trimmedTarget.startsWith("*")) {
|
|
182
|
-
hasStar = true;
|
|
183
|
-
finalTarget = trimmedTarget.slice(1);
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
const { linkType, link } = determineLinkTypeAndLocation(finalTarget);
|
|
187
|
-
const trimmedLabel = labelText.trim();
|
|
188
|
-
|
|
189
|
-
// Determine display text
|
|
190
|
-
let displayText: string;
|
|
191
|
-
if (foundPipe) {
|
|
192
|
-
// If label is empty (e.g., [[[page|]]]), use page name
|
|
193
|
-
displayText = trimmedLabel || finalTarget;
|
|
194
|
-
} else {
|
|
195
|
-
// For category pages (system:Recent Changes), use only the part after colon
|
|
196
|
-
// Use trimmedTarget (preserves * prefix) for display when no pipe
|
|
197
|
-
const colonIdx = trimmedTarget.indexOf(":");
|
|
198
|
-
if (colonIdx !== -1 && !trimmedTarget.startsWith("http") && !trimmedTarget.startsWith("*")) {
|
|
199
|
-
displayText = trimmedTarget.slice(colonIdx + 1).trim();
|
|
200
|
-
} else {
|
|
201
|
-
displayText = trimmedTarget;
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
const label: LinkLabel = { text: displayText };
|
|
206
|
-
|
|
207
|
-
return {
|
|
208
|
-
success: true,
|
|
209
|
-
elements: [
|
|
210
|
-
{
|
|
211
|
-
element: "link",
|
|
212
|
-
data: {
|
|
213
|
-
type: linkType,
|
|
214
|
-
link,
|
|
215
|
-
extra: null,
|
|
216
|
-
label,
|
|
217
|
-
target: hasStar && linkType === "direct" ? "new-tab" : null,
|
|
218
|
-
},
|
|
219
|
-
},
|
|
220
|
-
],
|
|
221
|
-
consumed,
|
|
222
|
-
};
|
|
223
|
-
},
|
|
224
|
-
};
|
|
225
|
-
|
|
226
|
-
/**
|
|
227
|
-
* Known interwiki prefixes recognized by Wikidot.
|
|
228
|
-
*
|
|
229
|
-
* Links whose target starts with one of these prefixes followed by a colon
|
|
230
|
-
* (e.g. `wikipedia:Article`) are classified as interwiki links rather than
|
|
231
|
-
* category page links.
|
|
232
|
-
*/
|
|
233
|
-
const INTERWIKI_PREFIXES = new Set(["wikipedia", "google", "dictionary", "wikidot"]);
|
|
234
|
-
|
|
235
|
-
/**
|
|
236
|
-
* Determines the link type and structured location data from a raw
|
|
237
|
-
* triple-bracket link target string.
|
|
238
|
-
*
|
|
239
|
-
* Classification order:
|
|
240
|
-
* 1. Targets starting with `#` are anchor links
|
|
241
|
-
* 2. Targets starting with `http://` or `https://` are direct (external) links
|
|
242
|
-
* 3. Targets with a colon and a known interwiki prefix (without slashes)
|
|
243
|
-
* are interwiki links
|
|
244
|
-
* 4. Everything else is a page link (including category pages like
|
|
245
|
-
* `system:Recent Changes`)
|
|
246
|
-
*
|
|
247
|
-
* @param target - The trimmed, processed link target string
|
|
248
|
-
* @returns An object with `linkType` and `link` (the structured location data)
|
|
249
|
-
*/
|
|
250
|
-
function determineLinkTypeAndLocation(target: string): { linkType: LinkType; link: LinkLocation } {
|
|
251
|
-
if (target.startsWith("#")) {
|
|
252
|
-
return { linkType: "anchor", link: target };
|
|
253
|
-
}
|
|
254
|
-
if (target.startsWith("http://") || target.startsWith("https://")) {
|
|
255
|
-
return { linkType: "direct", link: target };
|
|
256
|
-
}
|
|
257
|
-
// Check for interwiki links (only known prefixes)
|
|
258
|
-
const colonIdx = target.indexOf(":");
|
|
259
|
-
if (colonIdx > 0 && !target.includes("/")) {
|
|
260
|
-
const prefix = target.slice(0, colonIdx).toLowerCase();
|
|
261
|
-
if (INTERWIKI_PREFIXES.has(prefix)) {
|
|
262
|
-
return { linkType: "interwiki", link: target };
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
// Page link (includes category pages like "system:Recent Changes")
|
|
266
|
-
return { linkType: "page", link: { site: null, page: target } };
|
|
267
|
-
}
|