npm - @yozora/tokenizer-definition - Versions diffs - 2.0.4 → 2.0.5-alpha.0 - Mend

@yozora/tokenizer-definition 2.0.4 → 2.0.5-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/lib/cjs/{index.js → index.cjs} +6 -8
package/lib/esm/{index.js → index.mjs} +5 -7
package/lib/types/index.d.ts +4 -4
package/package.json +18 -14
package/src/index.ts +12 -0
package/src/match.ts +340 -0
package/src/parse.ts +50 -0
package/src/tokenizer.ts +31 -0
package/src/types.ts +58 -0
package/src/util/link-destination.ts +160 -0
package/src/util/link-label.ts +116 -0
package/src/util/link-title.ts +143 -0

package/lib/cjs/{index.js → index.cjs} RENAMED Viewed

@@ -297,7 +297,6 @@ const match = function (api) {
         return { token, nextIndex: endIndex };
     }
     function eatContinuationText(line, token) {
-        var _a;
         if (token.title != null && token.title.saturated)
             return { status: 'notMatched' };
         const { nodePoints, startIndex, firstNonWhitespaceIndex, endIndex } = line;
@@ -357,7 +356,7 @@ const match = function (api) {
             };
         }
         token.lines.push(line);
-        const saturated = (_a = token.title) === null || _a === void 0 ? void 0 : _a.saturated;
+        const saturated = token.title?.saturated;
         return { status: saturated ? 'closing' : 'opening', nextIndex: endIndex };
     }
     function onClose(token) {
@@ -415,19 +414,18 @@ const uniqueName = '@yozora/tokenizer-definition';
 class DefinitionTokenizer extends coreTokenizer.BaseBlockTokenizer {
     constructor(props = {}) {
-        var _a, _b;
         super({
-            name: (_a = props.name) !== null && _a !== void 0 ? _a : uniqueName,
-            priority: (_b = props.priority) !== null && _b !== void 0 ? _b : coreTokenizer.TokenizerPriority.ATOMIC,
+            name: props.name ?? uniqueName,
+            priority: props.priority ?? coreTokenizer.TokenizerPriority.ATOMIC,
         });
-        this.match = match;
-        this.parse = parse;
     }
+    match = match;
+    parse = parse;
 }
 exports.DefinitionTokenizer = DefinitionTokenizer;
 exports.DefinitionTokenizerName = uniqueName;
-exports["default"] = DefinitionTokenizer;
+exports.default = DefinitionTokenizer;
 exports.definitionMatch = match;
 exports.definitionParse = parse;
 exports.eatAndCollectLinkDestination = eatAndCollectLinkDestination;

package/lib/esm/{index.js → index.mjs} RENAMED Viewed

@@ -293,7 +293,6 @@ const match = function (api) {
         return { token, nextIndex: endIndex };
     }
     function eatContinuationText(line, token) {
-        var _a;
         if (token.title != null && token.title.saturated)
             return { status: 'notMatched' };
         const { nodePoints, startIndex, firstNonWhitespaceIndex, endIndex } = line;
@@ -353,7 +352,7 @@ const match = function (api) {
             };
         }
         token.lines.push(line);
-        const saturated = (_a = token.title) === null || _a === void 0 ? void 0 : _a.saturated;
+        const saturated = token.title?.saturated;
         return { status: saturated ? 'closing' : 'opening', nextIndex: endIndex };
     }
     function onClose(token) {
@@ -411,14 +410,13 @@ const uniqueName = '@yozora/tokenizer-definition';
 class DefinitionTokenizer extends BaseBlockTokenizer {
     constructor(props = {}) {
-        var _a, _b;
         super({
-            name: (_a = props.name) !== null && _a !== void 0 ? _a : uniqueName,
-            priority: (_b = props.priority) !== null && _b !== void 0 ? _b : TokenizerPriority.ATOMIC,
+            name: props.name ?? uniqueName,
+            priority: props.priority ?? TokenizerPriority.ATOMIC,
         });
-        this.match = match;
-        this.parse = parse;
     }
+    match = match;
+    parse = parse;
 }
 export { DefinitionTokenizer, uniqueName as DefinitionTokenizerName, DefinitionTokenizer as default, match as definitionMatch, parse as definitionParse, eatAndCollectLinkDestination, eatAndCollectLinkLabel, eatAndCollectLinkTitle };

package/lib/types/index.d.ts CHANGED Viewed

@@ -116,8 +116,8 @@ declare function eatAndCollectLinkTitle(nodePoints: ReadonlyArray<INodePoint>, s
     state: ILinkTitleCollectingState;
 };
-declare type T = DefinitionType;
-declare type INode = Definition;
+type T = DefinitionType;
+type INode = Definition;
 declare const uniqueName = "@yozora/tokenizer-definition";
 interface IToken extends IPartialYastBlockToken<T> {
     /**
@@ -158,8 +158,8 @@ interface IToken extends IPartialYastBlockToken<T> {
      */
     _identifier?: string;
 }
-declare type IThis = ITokenizer;
-declare type ITokenizerProps = Partial<IBaseBlockTokenizerProps>;
+type IThis = ITokenizer;
+type ITokenizerProps = Partial<IBaseBlockTokenizerProps>;
 /**
  * A link reference definition consists of a link label, indented up to three

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@yozora/tokenizer-definition",
-  "version": "2.0.4",
+  "version": "2.0.5-alpha.0",
   "author": {
     "name": "guanghechen",
     "url": "https://github.com/guanghechen/"
@@ -11,33 +11,37 @@
     "directory": "tokenizers/definition"
   },
   "homepage": "https://github.com/yozorajs/yozora/tree/release-2.x.x/tokenizers/definition",
-  "main": "lib/cjs/index.js",
-  "module": "lib/esm/index.js",
-  "types": "lib/types/index.d.ts",
-  "source": "src/index.ts",
+  "type": "module",
+  "exports": {
+    "types": "./lib/types/index.d.ts",
+    "import": "./lib/esm/index.mjs",
+    "require": "./lib/cjs/index.cjs"
+  },
+  "source": "./src/index.ts",
+  "types": "./lib/types/index.d.ts",
+  "main": "./lib/cjs/index.cjs",
+  "module": "./lib/esm/index.mjs",
   "license": "MIT",
   "engines": {
     "node": ">= 16.0.0"
   },
   "files": [
     "lib/",
-    "!lib/**/*.js.map",
-    "!lib/**/*.d.ts.map",
+    "src/",
     "package.json",
     "CHANGELOG.md",
     "LICENSE",
     "README.md"
   ],
   "scripts": {
-    "build": "cross-env NODE_ENV=production rollup -c ../../rollup.config.js",
-    "prebuild": "rimraf lib/",
+    "build": "rimraf lib/ && cross-env NODE_ENV=production rollup -c ../../rollup.config.mjs",
     "prepublishOnly": "cross-env ROLLUP_SHOULD_SOURCEMAP=false yarn build",
-    "test": "cross-env TS_NODE_FILES=true jest --config ../../jest.config.js --rootDir ."
+    "test": "cross-env TS_NODE_FILES=true NODE_OPTIONS=--experimental-vm-modules jest --config ../../jest.config.mjs --rootDir ."
   },
   "dependencies": {
-    "@yozora/ast": "^2.0.4",
-    "@yozora/character": "^2.0.4",
-    "@yozora/core-tokenizer": "^2.0.4"
+    "@yozora/ast": "^2.0.5-alpha.0",
+    "@yozora/character": "^2.0.5-alpha.0",
+    "@yozora/core-tokenizer": "^2.0.5-alpha.0"
   },
-  "gitHead": "c980b95254394dcacba0cbb4bea251350b09397c"
+  "gitHead": "8bf941fe4ef82947165b0f3cc123cd493665e13b"
 }

package/src/index.ts ADDED Viewed

@@ -0,0 +1,12 @@
+export * from './util/link-destination'
+export * from './util/link-label'
+export * from './util/link-title'
+export { match as definitionMatch } from './match'
+export { parse as definitionParse } from './parse'
+export { DefinitionTokenizer, DefinitionTokenizer as default } from './tokenizer'
+export { uniqueName as DefinitionTokenizerName } from './types'
+export type {
+  IThis as IDefinitionHookContext,
+  IToken as IDefinitionToken,
+  ITokenizerProps as IDefinitionTokenizerProps,
+} from './types'

package/src/match.ts ADDED Viewed

@@ -0,0 +1,340 @@
+import { DefinitionType } from '@yozora/ast'
+import type { INodePoint } from '@yozora/character'
+import { AsciiCodePoint, calcStringFromNodePoints } from '@yozora/character'
+import type {
+  IMatchBlockHookCreator,
+  IPhrasingContentLine,
+  IResultOfEatContinuationText,
+  IResultOfEatOpener,
+  IResultOfOnClose,
+} from '@yozora/core-tokenizer'
+import {
+  calcEndPoint,
+  calcStartPoint,
+  eatOptionalWhitespaces,
+  resolveLabelToIdentifier,
+} from '@yozora/core-tokenizer'
+import type { IThis, IToken, T } from './types'
+import { eatAndCollectLinkDestination } from './util/link-destination'
+import { eatAndCollectLinkLabel } from './util/link-label'
+import { eatAndCollectLinkTitle } from './util/link-title'
+/**
+ * A link reference definition consists of a link label, indented up to three
+ * spaces, followed by a colon (:), optional whitespace (including up to one
+ * line ending), a link destination, optional whitespace (including up to one
+ * line ending), and an optional link title, which if it is present must be
+ * separated from the link destination by whitespace. No further non-whitespace
+ * characters may occur on the line.
+ *
+ * A link reference definition does not correspond to a structural element of
+ * a document. Instead, it defines a label which can be used in reference
+ * links and reference-style images elsewhere in the document. Link reference
+ * definitions can come either before or after the links that use them.
+ *
+ * @see https://github.github.com/gfm/#link-reference-definition
+ */
+export const match: IMatchBlockHookCreator<T, IToken, IThis> = function (api) {
+  return {
+    isContainingBlock: false,
+    eatOpener,
+    eatContinuationText,
+    onClose,
+  }
+  function eatOpener(line: Readonly<IPhrasingContentLine>): IResultOfEatOpener<T, IToken> {
+    /**
+     * Four spaces are too much
+     * @see https://github.github.com/gfm/#example-180
+     */
+    if (line.countOfPrecedeSpaces >= 4) return null
+    const { nodePoints, startIndex, endIndex, firstNonWhitespaceIndex } = line
+    if (firstNonWhitespaceIndex >= endIndex) return null
+    // Try to match link label
+    let i = firstNonWhitespaceIndex
+    const { nextIndex: labelEndIndex, state: labelState } = eatAndCollectLinkLabel(
+      nodePoints,
+      i,
+      endIndex,
+      null,
+    )
+    if (labelEndIndex < 0) return null
+    const lineNo = nodePoints[startIndex].line
+    // Optimization: lazy calculation
+    const createInitState = (): IToken => {
+      const token: IToken = {
+        nodeType: DefinitionType,
+        position: {
+          start: calcStartPoint(nodePoints, startIndex),
+          end: calcEndPoint(nodePoints, endIndex - 1),
+        },
+        label: labelState,
+        destination: null,
+        title: null,
+        lineNoOfLabel: lineNo,
+        lineNoOfDestination: -1,
+        lineNoOfTitle: -1,
+        lines: [line],
+      }
+      return token
+    }
+    if (!labelState.saturated) {
+      const token = createInitState()
+      return { token, nextIndex: endIndex }
+    }
+    // Saturated but no following colon exists.
+    if (
+      labelEndIndex < 0 ||
+      labelEndIndex + 1 >= endIndex ||
+      nodePoints[labelEndIndex].codePoint !== AsciiCodePoint.COLON
+    )
+      return null
+    /**
+     * At most one line break can be used between link destination and link label
+     * @see https://github.github.com/gfm/#example-162
+     * @see https://github.github.com/gfm/#example-164
+     * @see https://github.github.com/gfm/#link-reference-definition
+     */
+    i = eatOptionalWhitespaces(nodePoints, labelEndIndex + 1, endIndex)
+    if (i >= endIndex) {
+      const token = createInitState()
+      return { token, nextIndex: endIndex }
+    }
+    // Try to match link destination
+    const { nextIndex: destinationEndIndex, state: destinationState } =
+      eatAndCollectLinkDestination(nodePoints, i, endIndex, null)
+    /**
+     * The link destination may not be omitted
+     * @see https://github.github.com/gfm/#example-168
+     */
+    if (destinationEndIndex < 0) return null
+    // Link destination not saturated
+    if (!destinationState.saturated && destinationEndIndex !== endIndex) return null
+    /**
+     * At most one line break can be used between link title and link destination
+     * @see https://github.github.com/gfm/#example-162
+     * @see https://github.github.com/gfm/#example-164
+     * @see https://github.github.com/gfm/#link-reference-definition
+     */
+    i = eatOptionalWhitespaces(nodePoints, destinationEndIndex, endIndex)
+    if (i >= endIndex) {
+      const token = createInitState()
+      token.destination = destinationState
+      token.lineNoOfDestination = lineNo
+      return { token, nextIndex: endIndex }
+    }
+    /**
+     * The title must be separated from the link destination by whitespace.
+     * @see https://github.github.com/gfm/#example-170
+     */
+    if (i === destinationEndIndex) return null
+    // Try to match link-title
+    const { nextIndex: titleEndIndex, state: titleState } = eatAndCollectLinkTitle(
+      nodePoints,
+      i,
+      endIndex,
+      null,
+    )
+    /**
+     * non-whitespace characters after title is not allowed
+     * @see https://github.github.com/gfm/#example-178
+     */
+    if (titleEndIndex >= 0) i = titleEndIndex
+    if (i < endIndex) {
+      const k = eatOptionalWhitespaces(nodePoints, i, endIndex)
+      if (k < endIndex) return null
+    }
+    const token = createInitState()
+    token.destination = destinationState
+    token.title = titleState
+    token.lineNoOfDestination = lineNo
+    token.lineNoOfTitle = lineNo
+    return { token, nextIndex: endIndex }
+  }
+  function eatContinuationText(
+    line: Readonly<IPhrasingContentLine>,
+    token: IToken,
+  ): IResultOfEatContinuationText {
+    // All parts of Definition have been matched
+    if (token.title != null && token.title.saturated) return { status: 'notMatched' }
+    const { nodePoints, startIndex, firstNonWhitespaceIndex, endIndex } = line
+    const lineNo = nodePoints[startIndex].line
+    let i = firstNonWhitespaceIndex
+    if (!token.label.saturated) {
+      const { nextIndex: labelEndIndex, state: labelState } = eatAndCollectLinkLabel(
+        nodePoints,
+        i,
+        endIndex,
+        token.label,
+      )
+      if (labelEndIndex < 0) {
+        return { status: 'failedAndRollback', lines: token.lines }
+      }
+      if (!labelState.saturated) {
+        token.lines.push(line)
+        return { status: 'opening', nextIndex: endIndex }
+      }
+      // Saturated but no following colon exists.
+      if (
+        labelEndIndex + 1 >= endIndex ||
+        nodePoints[labelEndIndex].codePoint !== AsciiCodePoint.COLON
+      ) {
+        return { status: 'failedAndRollback', lines: token.lines }
+      }
+      i = labelEndIndex + 1
+    }
+    if (token.destination == null) {
+      i = eatOptionalWhitespaces(nodePoints, i, endIndex)
+      if (i >= endIndex) {
+        return { status: 'failedAndRollback', lines: token.lines }
+      }
+      // Try to match link destination
+      const { nextIndex: destinationEndIndex, state: destinationState } =
+        eatAndCollectLinkDestination(nodePoints, i, endIndex, null)
+      /**
+       * At most one line break can be used between link destination and link label,
+       * therefore, this line must match a complete link destination
+       */
+      if (destinationEndIndex < 0 || !destinationState.saturated) {
+        return { status: 'failedAndRollback', lines: token.lines }
+      }
+      /**
+       * At most one line break can be used between link title and link destination
+       * @see https://github.github.com/gfm/#example-162
+       * @see https://github.github.com/gfm/#example-164
+       * @see https://github.github.com/gfm/#link-reference-definition
+       */
+      i = eatOptionalWhitespaces(nodePoints, destinationEndIndex, endIndex)
+      if (i >= endIndex) {
+        // eslint-disable-next-line no-param-reassign
+        token.destination = destinationState
+        token.lines.push(line)
+        return { status: 'opening', nextIndex: endIndex }
+      }
+      // eslint-disable-next-line no-param-reassign
+      token.lineNoOfDestination = lineNo
+      // eslint-disable-next-line no-param-reassign
+      token.lineNoOfTitle = lineNo
+    }
+    if (token.lineNoOfTitle < 0) {
+      // eslint-disable-next-line no-param-reassign
+      token.lineNoOfTitle = lineNo
+    }
+    const { nextIndex: titleEndIndex, state: titleState } = eatAndCollectLinkTitle(
+      nodePoints,
+      i,
+      endIndex,
+      token.title,
+    )
+    // eslint-disable-next-line no-param-reassign
+    token.title = titleState
+    if (
+      titleEndIndex < 0 ||
+      titleState.nodePoints.length <= 0 ||
+      (titleState.saturated &&
+        eatOptionalWhitespaces(nodePoints, titleEndIndex, endIndex) < endIndex)
+    ) {
+      // check if there exists a valid title
+      if (token.lineNoOfDestination === token.lineNoOfTitle) {
+        return { status: 'failedAndRollback', lines: token.lines }
+      }
+      const lastLine = token.lines[token.lines.length - 1]
+      // eslint-disable-next-line no-param-reassign
+      token.title = null
+      // eslint-disable-next-line no-param-reassign
+      token.position.end = calcEndPoint(lastLine.nodePoints, lastLine.endIndex - 1)
+      return {
+        status: 'closingAndRollback',
+        lines: token.lines.slice(token.lineNoOfTitle - 1),
+      }
+    }
+    token.lines.push(line)
+    const saturated: boolean = token.title?.saturated
+    return { status: saturated ? 'closing' : 'opening', nextIndex: endIndex }
+  }
+  function onClose(token: IToken): IResultOfOnClose {
+    let result: IResultOfOnClose
+    // Not all parts of Definition have been matched.
+    if (token.title == null || !token.title.saturated) {
+      // No valid label matched.
+      if (!token.label.saturated) {
+        return { status: 'failedAndRollback', lines: token.lines }
+      }
+      // No valid destination matched.
+      if (token.destination == null || !token.destination.saturated) {
+        return { status: 'failedAndRollback', lines: token.lines }
+      }
+      // No valid title matched.
+      if (token.title != null && !token.title.saturated) {
+        if (token.lineNoOfDestination === token.lineNoOfTitle) {
+          return { status: 'failedAndRollback', lines: token.lines }
+        }
+        const lines = token.lines.splice(token.lineNoOfTitle - 1)
+        const lastLine = token.lines[token.lines.length - 1]
+        // eslint-disable-next-line no-param-reassign
+        token.title = null
+        // eslint-disable-next-line no-param-reassign
+        token.position.end = calcEndPoint(lastLine.nodePoints, lastLine.endIndex - 1)
+        result = { status: 'closingAndRollback', lines }
+      }
+    }
+    /**
+     * Labels are trimmed and case-insensitive
+     * @see https://github.github.com/gfm/#example-174
+     * @see https://github.github.com/gfm/#example-175
+     */
+    const labelPoints: INodePoint[] = token.label.nodePoints
+    const label = calcStringFromNodePoints(labelPoints, 1, labelPoints.length - 1)
+    const identifier = resolveLabelToIdentifier(label)
+    // Register definition identifier.
+    api.registerDefinitionIdentifier(identifier)
+    // Cache label and identifier for performance.
+    // eslint-disable-next-line no-param-reassign
+    token._label = label
+    // eslint-disable-next-line no-param-reassign
+    token._identifier = identifier
+    return result
+  }
+}

package/src/parse.ts ADDED Viewed

@@ -0,0 +1,50 @@
+import { DefinitionType } from '@yozora/ast'
+import type { INodePoint } from '@yozora/character'
+import { AsciiCodePoint, calcEscapedStringFromNodePoints } from '@yozora/character'
+import type { IParseBlockHookCreator } from '@yozora/core-tokenizer'
+import { encodeLinkDestination } from '@yozora/core-tokenizer'
+import type { INode, IThis, IToken, T } from './types'
+export const parse: IParseBlockHookCreator<T, IToken, INode, IThis> = function (api) {
+  return {
+    parse: tokens =>
+      tokens.map(token => {
+        const label: string = token._label!
+        const identifier: string = token._identifier!
+        /**
+         * Resolve link destination
+         * @see https://github.github.com/gfm/#link-destination
+         */
+        const destinationPoints: INodePoint[] = token.destination!.nodePoints
+        const destination: string =
+          destinationPoints[0].codePoint === AsciiCodePoint.OPEN_ANGLE
+            ? calcEscapedStringFromNodePoints(
+                destinationPoints,
+                1,
+                destinationPoints.length - 1,
+                true,
+              )
+            : calcEscapedStringFromNodePoints(destinationPoints, 0, destinationPoints.length, true)
+        const url = encodeLinkDestination(destination)
+        /**
+         * Resolve link title
+         * @see https://github.github.com/gfm/#link-title
+         */
+        const title: string | undefined =
+          token.title == null
+            ? undefined
+            : calcEscapedStringFromNodePoints(
+                token.title.nodePoints,
+                1,
+                token.title.nodePoints.length - 1,
+              )
+        const node: INode = api.shouldReservePosition
+          ? { type: DefinitionType, position: token.position, identifier, label, url, title }
+          : { type: DefinitionType, identifier, label, url, title }
+        return node
+      }),
+  }
+}

package/src/tokenizer.ts ADDED Viewed

@@ -0,0 +1,31 @@
+import type {
+  IBlockTokenizer,
+  IMatchBlockHookCreator,
+  IParseBlockHookCreator,
+} from '@yozora/core-tokenizer'
+import { BaseBlockTokenizer, TokenizerPriority } from '@yozora/core-tokenizer'
+import { match } from './match'
+import { parse } from './parse'
+import { uniqueName } from './types'
+import type { INode, IThis, IToken, ITokenizerProps, T } from './types'
+/**
+ * Lexical Analyzer for Definition.
+ * @see https://github.github.com/gfm/#link-reference-definition
+ */
+export class DefinitionTokenizer
+  extends BaseBlockTokenizer<T, IToken, INode, IThis>
+  implements IBlockTokenizer<T, IToken, INode, IThis>
+{
+  /* istanbul ignore next */
+  constructor(props: ITokenizerProps = {}) {
+    super({
+      name: props.name ?? uniqueName,
+      priority: props.priority ?? TokenizerPriority.ATOMIC,
+    })
+  }
+  public override readonly match: IMatchBlockHookCreator<T, IToken, IThis> = match
+  public override readonly parse: IParseBlockHookCreator<T, IToken, INode, IThis> = parse
+}

package/src/types.ts ADDED Viewed

@@ -0,0 +1,58 @@
+import type { Definition, DefinitionType } from '@yozora/ast'
+import type {
+  IBaseBlockTokenizerProps,
+  IPartialYastBlockToken,
+  IPhrasingContentLine,
+  ITokenizer,
+} from '@yozora/core-tokenizer'
+import type { ILinkDestinationCollectingState } from './util/link-destination'
+import type { ILinkLabelCollectingState } from './util/link-label'
+import type { ILinkTitleCollectingState } from './util/link-title'
+export type T = DefinitionType
+export type INode = Definition
+export const uniqueName = '@yozora/tokenizer-definition'
+export interface IToken extends IPartialYastBlockToken<T> {
+  /**
+   *
+   */
+  lines: Array<Readonly<IPhrasingContentLine>>
+  /**
+   * Link label
+   * Trimmed, Case-Insensitive
+   */
+  label: ILinkLabelCollectingState
+  /**
+   * Link destination
+   */
+  destination: ILinkDestinationCollectingState | null
+  /**
+   * Link title
+   */
+  title: ILinkTitleCollectingState | null
+  /**
+   * The line number of the first matched character of the link label
+   */
+  lineNoOfLabel: number
+  /**
+   * The line number of the first matched character of the link destination
+   */
+  lineNoOfDestination: number
+  /**
+   * The line number of the first matched character of the link title
+   */
+  lineNoOfTitle: number
+  /**
+   * Resolved definition label.
+   */
+  _label?: string
+  /**
+   * Resolved definition identifier.
+   */
+  _identifier?: string
+}
+export type IThis = ITokenizer
+export type ITokenizerProps = Partial<IBaseBlockTokenizerProps>

package/src/util/link-destination.ts ADDED Viewed

@@ -0,0 +1,160 @@
+import type { INodePoint } from '@yozora/character'
+import {
+  AsciiCodePoint,
+  VirtualCodePoint,
+  isAsciiControlCharacter,
+  isWhitespaceCharacter,
+} from '@yozora/character'
+import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
+/**
+ * The processing token of eatAndCollectLinkDestination, used to save
+ * intermediate data to support multiple codePosition fragment processing
+ *
+ * @see https://github.github.com/gfm/#link-destination
+ */
+export interface ILinkDestinationCollectingState {
+  /**
+   * Whether the current token has collected a legal LinkDestination
+   */
+  saturated: boolean
+  /**
+   * Collected token points
+   */
+  nodePoints: INodePoint[]
+  /**
+   * Whether an opening angle bracket has been matched
+   */
+  hasOpenAngleBracket: boolean
+  /**
+   * Number of parentheses encountered
+   */
+  openParensCount: number
+}
+/**
+ *
+ * @param nodePoints
+ * @param startIndex
+ * @param endIndex
+ * @param state
+ * @see https://github.github.com/gfm/#link-destination
+ */
+export function eatAndCollectLinkDestination(
+  nodePoints: ReadonlyArray<INodePoint>,
+  startIndex: number,
+  endIndex: number,
+  state: ILinkDestinationCollectingState | null,
+): { nextIndex: number; state: ILinkDestinationCollectingState } {
+  let i = startIndex
+  // init token
+  if (state == null) {
+    // eslint-disable-next-line no-param-reassign
+    state = {
+      saturated: false,
+      nodePoints: [],
+      hasOpenAngleBracket: false,
+      openParensCount: 0,
+    }
+  }
+  /**
+   * Although link destination may span multiple lines,
+   * they may not contain a blank line.
+   */
+  const firstNonWhitespaceIndex = eatOptionalWhitespaces(nodePoints, i, endIndex)
+  if (firstNonWhitespaceIndex >= endIndex) return { nextIndex: -1, state: state }
+  if (state.nodePoints.length <= 0) {
+    i = firstNonWhitespaceIndex
+    // check whether in pointy brackets
+    const p = nodePoints[i]
+    if (p.codePoint === AsciiCodePoint.OPEN_ANGLE) {
+      i += 1
+      // eslint-disable-next-line no-param-reassign
+      state.hasOpenAngleBracket = true
+      state.nodePoints.push(p)
+    }
+  }
+  /**
+   * In pointy brackets:
+   *  - A sequence of zero or more characters between an opening '<' and
+   *    a closing '>' that contains no line breaks or unescaped '<' or '>' characters
+   */
+  if (state.hasOpenAngleBracket) {
+    for (; i < endIndex; ++i) {
+      const p = nodePoints[i]
+      switch (p.codePoint) {
+        case AsciiCodePoint.BACKSLASH:
+          if (i + 1 < endIndex) {
+            state.nodePoints.push(p)
+            state.nodePoints.push(nodePoints[i + 1])
+          }
+          i += 1
+          break
+        case AsciiCodePoint.OPEN_ANGLE:
+        case VirtualCodePoint.LINE_END:
+          return { nextIndex: -1, state: state }
+        case AsciiCodePoint.CLOSE_ANGLE:
+          // eslint-disable-next-line no-param-reassign
+          state.saturated = true
+          state.nodePoints.push(p)
+          return { nextIndex: i + 1, state: state }
+        default:
+          state.nodePoints.push(p)
+      }
+    }
+    return { nextIndex: i, state: state }
+  }
+  /**
+   * Not in pointy brackets:
+   *  - A nonempty sequence of characters that does not start with '<', does not include
+   *    ASCII space or control characters, and includes parentheses only if
+   *
+   *    a) they are backslash-escaped or
+   *    b) they are part of a balanced pair of unescaped parentheses. (Implementations
+   *       may impose limits on parentheses nesting to avoid performance issues,
+   *       but at least three levels of nesting should be supported.)
+   */
+  for (; i < endIndex; ++i) {
+    const p = nodePoints[i]
+    switch (p.codePoint) {
+      case AsciiCodePoint.BACKSLASH:
+        if (i + 1 < endIndex) {
+          state.nodePoints.push(p)
+          state.nodePoints.push(nodePoints[i + 1])
+        }
+        i += 1
+        break
+      case AsciiCodePoint.OPEN_PARENTHESIS:
+        // eslint-disable-next-line no-param-reassign
+        state.openParensCount += 1
+        state.nodePoints.push(p)
+        break
+      case AsciiCodePoint.CLOSE_PARENTHESIS:
+        // eslint-disable-next-line no-param-reassign
+        state.openParensCount -= 1
+        state.nodePoints.push(p)
+        if (state.openParensCount < 0) {
+          return { nextIndex: i, state: state }
+        }
+        break
+      default:
+        if (isWhitespaceCharacter(p.codePoint) || isAsciiControlCharacter(p.codePoint)) {
+          // eslint-disable-next-line no-param-reassign
+          state.saturated = true
+          return { nextIndex: i, state: state }
+        }
+        state.nodePoints.push(p)
+        break
+    }
+  }
+  // eslint-disable-next-line no-param-reassign
+  state.saturated = true
+  return { nextIndex: i, state: state }
+}

package/src/util/link-label.ts ADDED Viewed

@@ -0,0 +1,116 @@
+import type { INodePoint } from '@yozora/character'
+import { AsciiCodePoint, isWhitespaceCharacter } from '@yozora/character'
+import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
+/**
+ * The processing token of eatAndCollectLinkLabel, used to save
+ * intermediate data to support multiple codePosition fragment processing
+ *
+ * @see https://github.github.com/gfm/#link-label
+ */
+export interface ILinkLabelCollectingState {
+  /**
+   * Whether the current token has collected a legal LinkDestination
+   */
+  saturated: boolean
+  /**
+   * Collected token points
+   */
+  nodePoints: INodePoint[]
+  /**
+   * Does it contain non-blank characters
+   */
+  hasNonWhitespaceCharacter: boolean
+}
+/**
+ * A link label begins with a left bracket '[' and ends with the first right bracket ']'
+ * that is not backslash-escaped. Between these brackets there must be at least one
+ * non-whitespace character. Unescaped square bracket characters are not allowed inside
+ * the opening and closing square brackets of link labels. A link label can have at most
+ * 999 characters inside the square brackets.
+ *
+ * One label matches another just in case their normalized forms are equal. To normalize
+ * a label, strip off the opening and closing brackets, perform the Unicode case fold,
+ * strip leading and trailing whitespace and collapse consecutive internal whitespace to
+ * a single space. If there are multiple matching reference link definitions, the one that
+ * comes first in the document is used. (It is desirable in such cases to emit a warning.)
+ *
+ * @param nodePoints
+ * @param startIndex
+ * @param endIndex
+ * @param state
+ * @see https://github.github.com/gfm/#link-label
+ */
+export function eatAndCollectLinkLabel(
+  nodePoints: ReadonlyArray<INodePoint>,
+  startIndex: number,
+  endIndex: number,
+  state: ILinkLabelCollectingState | null,
+): { nextIndex: number; state: ILinkLabelCollectingState } {
+  let i = startIndex
+  // init token
+  if (state == null) {
+    // eslint-disable-next-line no-param-reassign
+    state = {
+      saturated: false,
+      nodePoints: [],
+      hasNonWhitespaceCharacter: false,
+    }
+  }
+  /**
+   * Although link label may span multiple lines,
+   * they may not contain a blank line.
+   */
+  const firstNonWhitespaceIndex = eatOptionalWhitespaces(nodePoints, i, endIndex)
+  if (firstNonWhitespaceIndex >= endIndex) return { nextIndex: -1, state: state }
+  if (state.nodePoints.length <= 0) {
+    i = firstNonWhitespaceIndex
+    // check whether in brackets
+    const p = nodePoints[i]
+    if (p.codePoint !== AsciiCodePoint.OPEN_BRACKET) {
+      return { nextIndex: -1, state: state }
+    }
+    i += 1
+    // eslint-disable-next-line no-param-reassign
+    state.nodePoints.push(p)
+  }
+  for (; i < endIndex; ++i) {
+    const p = nodePoints[i]
+    switch (p.codePoint) {
+      case AsciiCodePoint.BACKSLASH:
+        // eslint-disable-next-line no-param-reassign
+        state.hasNonWhitespaceCharacter = true
+        if (i + 1 < endIndex) {
+          state.nodePoints.push(p)
+          state.nodePoints.push(nodePoints[i + 1])
+        }
+        i += 1
+        break
+      case AsciiCodePoint.OPEN_BRACKET:
+        return { nextIndex: -1, state: state }
+      case AsciiCodePoint.CLOSE_BRACKET:
+        state.nodePoints.push(p)
+        if (state.hasNonWhitespaceCharacter) {
+          // eslint-disable-next-line no-param-reassign
+          state.saturated = true
+          return { nextIndex: i + 1, state: state }
+        }
+        return { nextIndex: -1, state: state }
+      default:
+        if (!isWhitespaceCharacter(p.codePoint)) {
+          // eslint-disable-next-line no-param-reassign
+          state.hasNonWhitespaceCharacter = true
+        }
+        state.nodePoints.push(p)
+    }
+  }
+  return { nextIndex: 1, state: state }
+}

package/src/util/link-title.ts ADDED Viewed

@@ -0,0 +1,143 @@
+import type { INodePoint } from '@yozora/character'
+import { AsciiCodePoint, VirtualCodePoint } from '@yozora/character'
+import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
+/**
+ * The processing token of eatAndCollectLinkDestination, used to save
+ * intermediate data to support multiple codePosition fragment processing.
+ *
+ * @see https://github.github.com/gfm/#link-title
+ */
+export interface ILinkTitleCollectingState {
+  /**
+   * Whether the current token has collected a legal LinkDestination
+   */
+  saturated: boolean
+  /**
+   * Collected token points
+   */
+  nodePoints: INodePoint[]
+  /**
+   * Character that wrap link-title
+   */
+  wrapSymbol: number | null
+}
+/**
+ *
+ * @param nodePoints
+ * @param startIndex
+ * @param endIndex
+ * @param state
+ * @see https://github.github.com/gfm/#link-title
+ */
+export function eatAndCollectLinkTitle(
+  nodePoints: ReadonlyArray<INodePoint>,
+  startIndex: number,
+  endIndex: number,
+  state: ILinkTitleCollectingState | null,
+): { nextIndex: number; state: ILinkTitleCollectingState } {
+  let i = startIndex
+  // init token
+  if (state == null) {
+    // eslint-disable-next-line no-param-reassign
+    state = {
+      saturated: false,
+      nodePoints: [],
+      wrapSymbol: null,
+    }
+  }
+  /**
+   * Although link titles may span multiple lines,
+   * they may not contain a blank line.
+   */
+  const firstNonWhitespaceIndex = eatOptionalWhitespaces(nodePoints, i, endIndex)
+  if (firstNonWhitespaceIndex >= endIndex) return { nextIndex: -1, state: state }
+  if (state.nodePoints.length <= 0) {
+    i = firstNonWhitespaceIndex
+    const p = nodePoints[i]
+    switch (p.codePoint) {
+      case AsciiCodePoint.DOUBLE_QUOTE:
+      case AsciiCodePoint.SINGLE_QUOTE:
+      case AsciiCodePoint.OPEN_PARENTHESIS:
+        // eslint-disable-next-line no-param-reassign
+        state.wrapSymbol = p.codePoint
+        state.nodePoints.push(p)
+        i += 1
+        break
+      default:
+        return { nextIndex: -1, state: state }
+    }
+  }
+  if (state.wrapSymbol == null) return { nextIndex: -1, state: state }
+  switch (state.wrapSymbol) {
+    /**
+     *  - a sequence of zero or more characters between straight double-quote characters '"',
+     *    including a '"' character only if it is backslash-escaped, or
+     *  - a sequence of zero or more characters between straight single-quote characters '\'',
+     *    including a '\'' character only if it is backslash-escaped,
+     */
+    case AsciiCodePoint.DOUBLE_QUOTE:
+    case AsciiCodePoint.SINGLE_QUOTE: {
+      for (; i < endIndex; ++i) {
+        const p = nodePoints[i]
+        switch (p.codePoint) {
+          case AsciiCodePoint.BACKSLASH:
+            if (i + 1 < endIndex) {
+              state.nodePoints.push(p)
+              state.nodePoints.push(nodePoints[i + 1])
+            }
+            i += 1
+            break
+          case state.wrapSymbol:
+            // eslint-disable-next-line no-param-reassign
+            state.saturated = true
+            state.nodePoints.push(p)
+            return { nextIndex: i + 1, state: state }
+          default:
+            state.nodePoints.push(p)
+        }
+      }
+      break
+    }
+    /**
+     * a sequence of zero or more characters between matching parentheses '((...))',
+     * including a '(' or ')' character only if it is backslash-escaped.
+     */
+    case AsciiCodePoint.OPEN_PARENTHESIS: {
+      for (; i < endIndex; ++i) {
+        const p = nodePoints[i]
+        switch (p.codePoint) {
+          case AsciiCodePoint.BACKSLASH:
+            if (i + 1 < endIndex) {
+              state.nodePoints.push(p)
+              state.nodePoints.push(nodePoints[i + 1])
+            }
+            i += 1
+            break
+          case AsciiCodePoint.OPEN_PARENTHESIS:
+            return { nextIndex: -1, state: state }
+          case AsciiCodePoint.CLOSE_PARENTHESIS:
+            if (i + 1 >= endIndex || nodePoints[i + 1].codePoint === VirtualCodePoint.LINE_END) {
+              state.nodePoints.push(p)
+              // eslint-disable-next-line no-param-reassign
+              state.saturated = true
+              break
+            }
+            return { nextIndex: -1, state: state }
+          default:
+            state.nodePoints.push(p)
+        }
+      }
+      break
+    }
+  }
+  return { nextIndex: endIndex, state: state }
+}