npm - @yozora/tokenizer-html-block - Versions diffs - 2.0.4 → 2.0.5 - Mend

@yozora/tokenizer-html-block 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/lib/cjs/{index.js → index.cjs} +5 -6
package/lib/esm/{index.js → index.mjs} +4 -5
package/lib/types/index.d.ts +5 -5
package/package.json +18 -14
package/src/conditions/c1.ts +79 -0
package/src/conditions/c2.ts +55 -0
package/src/conditions/c3.ts +48 -0
package/src/conditions/c4.ts +48 -0
package/src/conditions/c5.ts +59 -0
package/src/conditions/c6.ts +109 -0
package/src/conditions/c7.ts +54 -0
package/src/index.ts +11 -0
package/src/match.ts +231 -0
package/src/parse.ts +18 -0
package/src/tokenizer.ts +32 -0
package/src/types.ts +74 -0
package/src/util/eat-html-attribute.ts +170 -0
package/src/util/eat-html-tagname.ts +27 -0

package/lib/cjs/{index.js → index.cjs} RENAMED Viewed

@@ -496,19 +496,18 @@ const uniqueName = '@yozora/tokenizer-html-block';
 class HtmlBlockTokenizer extends coreTokenizer.BaseBlockTokenizer {
     constructor(props = {}) {
-        var _a, _b;
         super({
-            name: (_a = props.name) !== null && _a !== void 0 ? _a : uniqueName,
-            priority: (_b = props.priority) !== null && _b !== void 0 ? _b : coreTokenizer.TokenizerPriority.ATOMIC,
+            name: props.name ?? uniqueName,
+            priority: props.priority ?? coreTokenizer.TokenizerPriority.ATOMIC,
         });
-        this.match = match;
-        this.parse = parse;
     }
+    match = match;
+    parse = parse;
 }
 exports.HtmlBlockTokenizer = HtmlBlockTokenizer;
 exports.HtmlBlockTokenizerName = uniqueName;
-exports["default"] = HtmlBlockTokenizer;
+exports.default = HtmlBlockTokenizer;
 exports.eatHTMLAttribute = eatHTMLAttribute;
 exports.eatHTMLTagName = eatHTMLTagName;
 exports.htmlBlockMatch = match;

package/lib/esm/{index.js → index.mjs} RENAMED Viewed

@@ -492,14 +492,13 @@ const uniqueName = '@yozora/tokenizer-html-block';
 class HtmlBlockTokenizer extends BaseBlockTokenizer {
     constructor(props = {}) {
-        var _a, _b;
         super({
-            name: (_a = props.name) !== null && _a !== void 0 ? _a : uniqueName,
-            priority: (_b = props.priority) !== null && _b !== void 0 ? _b : TokenizerPriority.ATOMIC,
+            name: props.name ?? uniqueName,
+            priority: props.priority ?? TokenizerPriority.ATOMIC,
         });
-        this.match = match;
-        this.parse = parse;
     }
+    match = match;
+    parse = parse;
 }
 export { HtmlBlockTokenizer, uniqueName as HtmlBlockTokenizerName, HtmlBlockTokenizer as default, eatHTMLAttribute, eatHTMLTagName, match as htmlBlockMatch, parse as htmlBlockParse };

package/lib/types/index.d.ts CHANGED Viewed

@@ -37,10 +37,10 @@ declare function eatHTMLAttribute(nodePoints: ReadonlyArray<INodePoint>, startIn
  */
 declare function eatHTMLTagName(nodePoints: ReadonlyArray<INodePoint>, startIndex: number, endIndex: number): number | null;
-declare type T = HtmlType;
-declare type INode = Html;
+type T = HtmlType;
+type INode = Html;
 declare const uniqueName = "@yozora/tokenizer-html-block";
-declare type HtmlBlockConditionType = 1 | 2 | 3 | 4 | 5 | 6 | 7;
+type HtmlBlockConditionType = 1 | 2 | 3 | 4 | 5 | 6 | 7;
 /**
  * Middle state during the whole match and parse phase.
  */
@@ -97,8 +97,8 @@ interface IToken extends IPartialYastBlockToken<T> {
      */
     lines: Array<Readonly<IPhrasingContentLine>>;
 }
-declare type IThis = ITokenizer;
-declare type ITokenizerProps = Partial<IBaseBlockTokenizerProps>;
+type IThis = ITokenizer;
+type ITokenizerProps = Partial<IBaseBlockTokenizerProps>;
 /**
  * An HTML block is a group of lines that is treated as raw HTML (and will not

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@yozora/tokenizer-html-block",
-  "version": "2.0.4",
+  "version": "2.0.5",
   "author": {
     "name": "guanghechen",
     "url": "https://github.com/guanghechen/"
@@ -11,33 +11,37 @@
     "directory": "tokenizers/html-block"
   },
   "homepage": "https://github.com/yozorajs/yozora/tree/release-2.x.x/tokenizers/html-block",
-  "main": "lib/cjs/index.js",
-  "module": "lib/esm/index.js",
-  "types": "lib/types/index.d.ts",
-  "source": "src/index.ts",
+  "type": "module",
+  "exports": {
+    "types": "./lib/types/index.d.ts",
+    "import": "./lib/esm/index.mjs",
+    "require": "./lib/cjs/index.cjs"
+  },
+  "source": "./src/index.ts",
+  "types": "./lib/types/index.d.ts",
+  "main": "./lib/cjs/index.cjs",
+  "module": "./lib/esm/index.mjs",
   "license": "MIT",
   "engines": {
     "node": ">= 16.0.0"
   },
   "files": [
     "lib/",
-    "!lib/**/*.js.map",
-    "!lib/**/*.d.ts.map",
+    "src/",
     "package.json",
     "CHANGELOG.md",
     "LICENSE",
     "README.md"
   ],
   "scripts": {
-    "build": "cross-env NODE_ENV=production rollup -c ../../rollup.config.js",
-    "prebuild": "rimraf lib/",
+    "build": "rimraf lib/ && cross-env NODE_ENV=production rollup -c ../../rollup.config.mjs",
     "prepublishOnly": "cross-env ROLLUP_SHOULD_SOURCEMAP=false yarn build",
-    "test": "cross-env TS_NODE_FILES=true jest --config ../../jest.config.js --rootDir ."
+    "test": "cross-env TS_NODE_FILES=true NODE_OPTIONS=--experimental-vm-modules jest --config ../../jest.config.mjs --rootDir ."
   },
   "dependencies": {
-    "@yozora/ast": "^2.0.4",
-    "@yozora/character": "^2.0.4",
-    "@yozora/core-tokenizer": "^2.0.4"
+    "@yozora/ast": "^2.0.5",
+    "@yozora/character": "^2.0.5",
+    "@yozora/core-tokenizer": "^2.0.5"
   },
-  "gitHead": "c980b95254394dcacba0cbb4bea251350b09397c"
+  "gitHead": "7ba3bab49fe65cf2f57082c0503af73da9356cf0"
 }

package/src/conditions/c1.ts ADDED Viewed

@@ -0,0 +1,79 @@
+import type { INodePoint } from '@yozora/character'
+import { AsciiCodePoint, calcStringFromNodePoints, isWhitespaceCharacter } from '@yozora/character'
+import { eatHTMLTagName } from '../util/eat-html-tagname'
+const includedTags = ['pre', 'script', 'style']
+/**
+ * Eat block html start condition 1:
+ *
+ *    line begins with the string `<script`, `<pre`, or
+ *    `<style` (case-insensitive), followed by whitespace, the string `>`,
+ *    or the end of the line.
+ *
+ * @param nodePoints
+ * @param startIndex
+ * @param endIndex
+ * @see https://github.github.com/gfm/#start-condition
+ */
+export function eatStartCondition1(
+  nodePoints: ReadonlyArray<INodePoint>,
+  startIndex: number,
+  endIndex: number,
+  tagName: string,
+): number | null {
+  if (!includedTags.includes(tagName)) return null
+  if (startIndex >= endIndex) return endIndex
+  const c = nodePoints[startIndex].codePoint
+  if (isWhitespaceCharacter(c) || c === AsciiCodePoint.CLOSE_ANGLE) {
+    return startIndex + 1
+  }
+  return null
+}
+/**
+ * Eat block html end condition 1:
+ *
+ *    line contains an end tag `</script>`, `</pre>`,
+ *    or `</style>` (case-insensitive; it need not match the start tag).
+ *
+ * @param nodePoints
+ * @param startIndex
+ * @param endIndex
+ * @see https://github.github.com/gfm/#start-condition
+ */
+export function eatEndCondition1(
+  nodePoints: ReadonlyArray<INodePoint>,
+  startIndex: number,
+  endIndex: number,
+): number | null {
+  for (let i = startIndex; i < endIndex; ++i) {
+    if (
+      nodePoints[i].codePoint === AsciiCodePoint.OPEN_ANGLE &&
+      i + 3 < endIndex &&
+      nodePoints[i + 1].codePoint === AsciiCodePoint.SLASH
+    ) {
+      const tagNameStartIndex = i + 2
+      const tagNameEndIndex = eatHTMLTagName(nodePoints, tagNameStartIndex, endIndex)
+      if (
+        tagNameEndIndex == null ||
+        tagNameEndIndex >= endIndex ||
+        nodePoints[tagNameEndIndex].codePoint !== AsciiCodePoint.CLOSE_ANGLE
+      ) {
+        i += 1
+        continue
+      }
+      const rawTagName = calcStringFromNodePoints(
+        nodePoints,
+        tagNameStartIndex,
+        tagNameEndIndex,
+        true,
+      )
+      const tagName = rawTagName.toLowerCase()
+      if (includedTags.includes(tagName)) return tagNameEndIndex
+    }
+  }
+  return null
+}

package/src/conditions/c2.ts ADDED Viewed

@@ -0,0 +1,55 @@
+import type { INodePoint } from '@yozora/character'
+import { AsciiCodePoint } from '@yozora/character'
+/**
+ * Eat block html start condition 2:
+ *
+ *    Line begins with the string `<!--`.
+ *
+ * @param nodePoints
+ * @param startIndex
+ * @param endIndex
+ * @see https://github.github.com/gfm/#start-condition
+ */
+export function eatStartCondition2(
+  nodePoints: ReadonlyArray<INodePoint>,
+  startIndex: number,
+  endIndex: number,
+): number | null {
+  const i = startIndex
+  if (
+    i + 2 < endIndex &&
+    nodePoints[i].codePoint === AsciiCodePoint.EXCLAMATION_MARK &&
+    nodePoints[i + 1].codePoint === AsciiCodePoint.MINUS_SIGN &&
+    nodePoints[i + 2].codePoint === AsciiCodePoint.MINUS_SIGN
+  )
+    return i + 3
+  return null
+}
+/**
+ * Eat block html end condition 2:
+ *
+ *    line contains the string `-->`.
+ *
+ * @param nodePoints
+ * @param startIndex
+ * @param endIndex
+ * @see https://github.github.com/gfm/#start-condition
+ */
+export function eatEndCondition2(
+  nodePoints: ReadonlyArray<INodePoint>,
+  startIndex: number,
+  endIndex: number,
+): number | null {
+  for (let i = startIndex; i < endIndex; ++i) {
+    if (
+      nodePoints[i].codePoint === AsciiCodePoint.MINUS_SIGN &&
+      i + 2 < endIndex &&
+      nodePoints[i + 1].codePoint === AsciiCodePoint.MINUS_SIGN &&
+      nodePoints[i + 2].codePoint === AsciiCodePoint.CLOSE_ANGLE
+    )
+      return i + 3
+  }
+  return null
+}

package/src/conditions/c3.ts ADDED Viewed

@@ -0,0 +1,48 @@
+import type { INodePoint } from '@yozora/character'
+import { AsciiCodePoint } from '@yozora/character'
+/**
+ * Eat block html start condition 3:
+ *
+ *    line begins with the string `<?`.
+ *
+ * @param nodePoints
+ * @param startIndex
+ * @param endIndex
+ * @see https://github.github.com/gfm/#start-condition
+ */
+export function eatStartCondition3(
+  nodePoints: ReadonlyArray<INodePoint>,
+  startIndex: number,
+  endIndex: number,
+): number | null {
+  const i = startIndex
+  if (i < endIndex && nodePoints[i].codePoint === AsciiCodePoint.QUESTION_MARK) return i + 1
+  return null
+}
+/**
+ * Eat block html end condition 3:
+ *
+ *    line contains the string `?>`.
+ *
+ * @param nodePoints
+ * @param startIndex
+ * @param endIndex
+ * @see https://github.github.com/gfm/#start-condition
+ */
+export function eatEndCondition3(
+  nodePoints: ReadonlyArray<INodePoint>,
+  startIndex: number,
+  endIndex: number,
+): number | null {
+  for (let i = startIndex; i < endIndex; ++i) {
+    if (
+      nodePoints[i].codePoint === AsciiCodePoint.QUESTION_MARK &&
+      i + 1 < endIndex &&
+      nodePoints[i + 1].codePoint === AsciiCodePoint.CLOSE_ANGLE
+    )
+      return i + 2
+  }
+  return null
+}

package/src/conditions/c4.ts ADDED Viewed

@@ -0,0 +1,48 @@
+import type { INodePoint } from '@yozora/character'
+import { AsciiCodePoint, isAsciiUpperLetter } from '@yozora/character'
+/**
+ * Eat block html start condition 4:
+ *
+ *    line begins with the string `<!` followed by an uppercase ASCII letter.
+ *
+ * @param nodePoints
+ * @param startIndex
+ * @param endIndex
+ * @see https://github.github.com/gfm/#start-condition
+ */
+export function eatStartCondition4(
+  nodePoints: ReadonlyArray<INodePoint>,
+  startIndex: number,
+  endIndex: number,
+): number | null {
+  const i = startIndex
+  if (
+    i + 1 < endIndex &&
+    nodePoints[i].codePoint === AsciiCodePoint.EXCLAMATION_MARK &&
+    isAsciiUpperLetter(nodePoints[i + 1].codePoint)
+  )
+    return i + 2
+  return null
+}
+/**
+ * Eat block html end condition 4:
+ *
+ *    line contains the character >.
+ *
+ * @param nodePoints
+ * @param startIndex
+ * @param endIndex
+ * @see https://github.github.com/gfm/#start-condition
+ */
+export function eatEndCondition4(
+  nodePoints: ReadonlyArray<INodePoint>,
+  startIndex: number,
+  endIndex: number,
+): number | null {
+  for (let i = startIndex; i < endIndex; ++i) {
+    if (nodePoints[i].codePoint === AsciiCodePoint.CLOSE_ANGLE) return i + 1
+  }
+  return null
+}

package/src/conditions/c5.ts ADDED Viewed

@@ -0,0 +1,59 @@
+import type { INodePoint } from '@yozora/character'
+import { AsciiCodePoint } from '@yozora/character'
+/**
+ * Eat block html start condition 5:
+ *
+ *    line begins with the string `<![CDATA[`.
+ *
+ * @param nodePoints
+ * @param startIndex
+ * @param endIndex
+ * @see https://github.github.com/gfm/#start-condition
+ */
+export function eatStartCondition5(
+  nodePoints: ReadonlyArray<INodePoint>,
+  startIndex: number,
+  endIndex: number,
+): number | null {
+  const i = startIndex
+  if (
+    i + 6 < endIndex &&
+    nodePoints[i].codePoint === AsciiCodePoint.EXCLAMATION_MARK &&
+    nodePoints[i + 1].codePoint === AsciiCodePoint.OPEN_BRACKET &&
+    nodePoints[i + 2].codePoint === AsciiCodePoint.UPPERCASE_C &&
+    nodePoints[i + 3].codePoint === AsciiCodePoint.UPPERCASE_D &&
+    nodePoints[i + 4].codePoint === AsciiCodePoint.UPPERCASE_A &&
+    nodePoints[i + 5].codePoint === AsciiCodePoint.UPPERCASE_T &&
+    nodePoints[i + 6].codePoint === AsciiCodePoint.UPPERCASE_A
+  )
+    return i + 7
+  return null
+}
+/**
+ * Eat block html end condition 5:
+ *
+ *    line contains the string `]]>`.
+ *
+ * @param nodePoints
+ * @param startIndex
+ * @param endIndex
+ * @see https://github.github.com/gfm/#start-condition
+ */
+export function eatEndCondition5(
+  nodePoints: ReadonlyArray<INodePoint>,
+  startIndex: number,
+  endIndex: number,
+): number | null {
+  for (let i = startIndex; i < endIndex; ++i) {
+    if (
+      nodePoints[i].codePoint === AsciiCodePoint.CLOSE_BRACKET &&
+      i + 2 < endIndex &&
+      nodePoints[i + 1].codePoint === AsciiCodePoint.CLOSE_BRACKET &&
+      nodePoints[i + 2].codePoint === AsciiCodePoint.CLOSE_ANGLE
+    )
+      return i + 3
+  }
+  return null
+}

package/src/conditions/c6.ts ADDED Viewed

@@ -0,0 +1,109 @@
+import type { INodePoint } from '@yozora/character'
+import { AsciiCodePoint, isWhitespaceCharacter } from '@yozora/character'
+const includedTags = [
+  'address',
+  'article',
+  'aside',
+  'base',
+  'basefont',
+  'blockquote',
+  'body',
+  'caption',
+  'center',
+  'col',
+  'colgroup',
+  'dd',
+  'details',
+  'dialog',
+  'dir',
+  'div',
+  'dl',
+  'dt',
+  'fieldset',
+  'figcaption',
+  'figure',
+  'footer',
+  'form',
+  'frame',
+  'frameset',
+  'h1',
+  'h2',
+  'h3',
+  'h4',
+  'h5',
+  'h6',
+  'head',
+  'header',
+  'hr',
+  'html',
+  'iframe',
+  'legend',
+  'li',
+  'link',
+  'main',
+  'menu',
+  'menuitem',
+  'nav',
+  'noframes',
+  'ol',
+  'optgroup',
+  'option',
+  'p',
+  'param',
+  'section',
+  'source',
+  'summary',
+  'table',
+  'tbody',
+  'td',
+  'tfoot',
+  'th',
+  'thead',
+  'title',
+  'tr',
+  'track',
+  'ul',
+]
+/**
+ * Eat block html start condition 6:
+ *
+ *    line begins the string `<` or `</` followed by one of
+ *    the strings (case-insensitive) `address`, `article`, `aside`, `base`,
+ *    `basefont`, `blockquote`, `body`, `caption`, `center`, `col`, `colgroup`,
+ *    `dd`, `details`, `dialog`, `dir`, `div`, `dl`, `dt`, `fieldset`,
+ *    `figcaption`, `figure`, `footer`, `form`, `frame`, `frameset`, `h1`,
+ *    `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`, `html`, `iframe`,
+ *    `legend`, `li`, `link`, `main`, `menu`, `menuitem`, `nav`, `noframes`,
+ *    `ol`, `optgroup`, `option`, `p`, `param`, `section`, `source`, `summary`,
+ *    `table`, `tbody`, `td`, `tfoot`, `th`, `thead`, `title`, `tr`, `track`,
+ *    `ul`, followed by whitespace, the end of the line, the string `>`,
+ *    or the string `/>`.
+ *
+ * @param nodePoints
+ * @param startIndex
+ * @param endIndex
+ * @see https://github.github.com/gfm/#start-condition
+ */
+export function eatStartCondition6(
+  nodePoints: ReadonlyArray<INodePoint>,
+  startIndex: number,
+  endIndex: number,
+  tagName: string,
+): number | null {
+  if (!includedTags.includes(tagName)) return null
+  if (startIndex >= endIndex) return endIndex
+  const c = nodePoints[startIndex].codePoint
+  if (isWhitespaceCharacter(c) || c === AsciiCodePoint.CLOSE_ANGLE) return startIndex + 1
+  if (
+    c === AsciiCodePoint.SLASH &&
+    startIndex + 1 < endIndex &&
+    nodePoints[startIndex + 1].codePoint === AsciiCodePoint.CLOSE_ANGLE
+  )
+    return startIndex + 2
+  return null
+}

package/src/conditions/c7.ts ADDED Viewed

@@ -0,0 +1,54 @@
+import type { INodePoint } from '@yozora/character'
+import { AsciiCodePoint, isWhitespaceCharacter } from '@yozora/character'
+import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
+import { eatHTMLAttribute } from '../util/eat-html-attribute'
+const excludedTags = ['pre', 'script', 'style']
+/**
+ * Eat block html start condition 7:
+ *
+ *    line begins with a complete open tag (with any tag name
+ *    other than `script`, `style`, or `pre`) or a complete closing tag,
+ *    followed only by whitespace or the end of the line
+ *
+ * @param nodePoints
+ * @param startIndex
+ * @param endIndex
+ * @see https://github.github.com/gfm/#start-condition
+ */
+export function eatStartCondition7(
+  nodePoints: ReadonlyArray<INodePoint>,
+  startIndex: number,
+  endIndex: number,
+  tagName: string,
+  potentialOpenTag: boolean,
+): number | null {
+  if (excludedTags.includes(tagName) || startIndex >= endIndex) return null
+  let i = startIndex
+  if (potentialOpenTag) {
+    // Try to resolve an open tag.
+    for (; i < endIndex; ) {
+      const result = eatHTMLAttribute(nodePoints, i, endIndex)
+      if (result == null) break
+      i = result.nextIndex
+    }
+    i = eatOptionalWhitespaces(nodePoints, i, endIndex)
+    if (i >= endIndex) return null
+    if (nodePoints[i].codePoint === AsciiCodePoint.SLASH) i += 1
+  } else {
+    // Try to resolve a closing tag.
+    i = eatOptionalWhitespaces(nodePoints, startIndex, endIndex)
+  }
+  if (i >= endIndex || nodePoints[i].codePoint !== AsciiCodePoint.CLOSE_ANGLE) return null
+  for (i += 1; i < endIndex; ++i) {
+    if (!isWhitespaceCharacter(nodePoints[i].codePoint)) return null
+  }
+  return endIndex
+}

package/src/index.ts ADDED Viewed

@@ -0,0 +1,11 @@
+export * from './util/eat-html-attribute'
+export * from './util/eat-html-tagname'
+export { match as htmlBlockMatch } from './match'
+export { parse as htmlBlockParse } from './parse'
+export { HtmlBlockTokenizer, HtmlBlockTokenizer as default } from './tokenizer'
+export { uniqueName as HtmlBlockTokenizerName } from './types'
+export type {
+  IThis as IHtmlBlockHookContext,
+  IToken as IHtmlBlockToken,
+  ITokenizerProps as IHtmlBlockTokenizerProps,
+} from './types'

package/src/match.ts ADDED Viewed

@@ -0,0 +1,231 @@
+import { HtmlType } from '@yozora/ast'
+import type { INodeInterval, INodePoint } from '@yozora/character'
+import { AsciiCodePoint, calcStringFromNodePoints } from '@yozora/character'
+import type {
+  IMatchBlockHookCreator,
+  IPhrasingContentLine,
+  IResultOfEatAndInterruptPreviousSibling,
+  IResultOfEatContinuationText,
+  IResultOfEatOpener,
+  IYastBlockToken,
+} from '@yozora/core-tokenizer'
+import { calcEndPoint, calcStartPoint, eatOptionalWhitespaces } from '@yozora/core-tokenizer'
+import { eatEndCondition1, eatStartCondition1 } from './conditions/c1'
+import { eatEndCondition2, eatStartCondition2 } from './conditions/c2'
+import { eatEndCondition3, eatStartCondition3 } from './conditions/c3'
+import { eatEndCondition4, eatStartCondition4 } from './conditions/c4'
+import { eatEndCondition5, eatStartCondition5 } from './conditions/c5'
+import { eatStartCondition6 } from './conditions/c6'
+import { eatStartCondition7 } from './conditions/c7'
+import type { HtmlBlockConditionType, IThis, IToken, T } from './types'
+import { eatHTMLTagName } from './util/eat-html-tagname'
+/**
+ * An HTML block is a group of lines that is treated as raw HTML (and will not
+ * be escaped in HTML output).
+ *
+ * @see https://github.com/syntax-tree/mdast#html
+ * @see https://github.github.com/gfm/#html-blocks
+ */
+export const match: IMatchBlockHookCreator<T, IToken, IThis> = function () {
+  return {
+    isContainingBlock: false,
+    eatOpener,
+    eatAndInterruptPreviousSibling,
+    eatContinuationText,
+  }
+  function eatOpener(line: Readonly<IPhrasingContentLine>): IResultOfEatOpener<T, IToken> {
+    /**
+     * The opening tag can be indented 1-3 spaces, but not 4.
+     * @see https://github.github.com/gfm/#example-152
+     */
+    if (line.countOfPrecedeSpaces >= 4) return null
+    const { nodePoints, startIndex, endIndex, firstNonWhitespaceIndex } = line
+    if (
+      firstNonWhitespaceIndex >= endIndex ||
+      nodePoints[firstNonWhitespaceIndex].codePoint !== AsciiCodePoint.OPEN_ANGLE
+    )
+      return null
+    const i = firstNonWhitespaceIndex + 1
+    const startResult = eatStartCondition(nodePoints, i, endIndex)
+    if (startResult == null) return null
+    const { condition } = startResult
+    /**
+     * The end tag can occur on the same line as the start tag.
+     * @see https://github.github.com/gfm/#example-145
+     * @see https://github.github.com/gfm/#example-146
+     */
+    let saturated = false
+    if (condition !== 6 && condition !== 7) {
+      const endResult = eatEndCondition(nodePoints, startResult.nextIndex, endIndex, condition)
+      if (endResult != null) saturated = true
+    }
+    const nextIndex = endIndex
+    const token: IToken = {
+      nodeType: HtmlType,
+      position: {
+        start: calcStartPoint(nodePoints, startIndex),
+        end: calcEndPoint(nodePoints, nextIndex - 1),
+      },
+      condition,
+      lines: [line],
+    }
+    return { token, nextIndex, saturated }
+  }
+  function eatAndInterruptPreviousSibling(
+    line: Readonly<IPhrasingContentLine>,
+    prevSiblingToken: Readonly<IYastBlockToken>,
+  ): IResultOfEatAndInterruptPreviousSibling<T, IToken> {
+    const result = eatOpener(line)
+    if (result == null || result.token.condition === 7) return null
+    const { token, nextIndex } = result
+    return {
+      token,
+      nextIndex,
+      remainingSibling: prevSiblingToken,
+    }
+  }
+  function eatContinuationText(
+    line: Readonly<IPhrasingContentLine>,
+    token: IToken,
+  ): IResultOfEatContinuationText {
+    const { nodePoints, endIndex, firstNonWhitespaceIndex } = line
+    const nextIndex = eatEndCondition(
+      nodePoints,
+      firstNonWhitespaceIndex,
+      endIndex,
+      token.condition,
+    )
+    if (nextIndex === -1) return { status: 'notMatched' }
+    token.lines.push(line)
+    if (nextIndex != null) return { status: 'closing', nextIndex: endIndex }
+    return { status: 'opening', nextIndex: endIndex }
+  }
+  function eatStartCondition(
+    nodePoints: ReadonlyArray<INodePoint>,
+    startIndex: number,
+    endIndex: number,
+  ): { condition: HtmlBlockConditionType; nextIndex: number } | null {
+    let nextIndex: number | null = null
+    if (startIndex >= endIndex) return null
+    // condition 2
+    nextIndex = eatStartCondition2(nodePoints, startIndex, endIndex)
+    if (nextIndex != null) return { nextIndex, condition: 2 }
+    // condition 3
+    nextIndex = eatStartCondition3(nodePoints, startIndex, endIndex)
+    if (nextIndex != null) return { nextIndex, condition: 3 }
+    // condition 4
+    nextIndex = eatStartCondition4(nodePoints, startIndex, endIndex)
+    if (nextIndex != null) return { nextIndex, condition: 4 }
+    // condition 5
+    nextIndex = eatStartCondition5(nodePoints, startIndex, endIndex)
+    if (nextIndex != null) return { nextIndex, condition: 5 }
+    if (nodePoints[startIndex].codePoint !== AsciiCodePoint.SLASH) {
+      const tagNameStartIndex = startIndex
+      const tagNameEndIndex = eatHTMLTagName(nodePoints, tagNameStartIndex, endIndex)
+      if (tagNameEndIndex == null) return null
+      const tagNameInterval: INodeInterval = {
+        startIndex: tagNameStartIndex,
+        endIndex: tagNameEndIndex,
+      }
+      const rawTagName = calcStringFromNodePoints(
+        nodePoints,
+        tagNameInterval.startIndex,
+        tagNameInterval.endIndex,
+      )
+      const tagName = rawTagName.toLowerCase()
+      // condition1
+      nextIndex = eatStartCondition1(nodePoints, tagNameInterval.endIndex, endIndex, tagName)
+      if (nextIndex != null) return { nextIndex, condition: 1 }
+      // condition 6
+      nextIndex = eatStartCondition6(nodePoints, tagNameInterval.endIndex, endIndex, tagName)
+      if (nextIndex != null) return { nextIndex, condition: 6 }
+      // condition 7
+      nextIndex = eatStartCondition7(nodePoints, tagNameInterval.endIndex, endIndex, tagName, true)
+      if (nextIndex != null) return { nextIndex, condition: 7 }
+      // fallback
+      return null
+    }
+    const tagNameStartIndex = startIndex + 1
+    const tagNameEndIndex = eatHTMLTagName(nodePoints, tagNameStartIndex, endIndex)
+    if (tagNameEndIndex == null) return null
+    const tagNameInterval: INodeInterval = {
+      startIndex: tagNameStartIndex,
+      endIndex: tagNameEndIndex,
+    }
+    const rawTagName = calcStringFromNodePoints(
+      nodePoints,
+      tagNameInterval.startIndex,
+      tagNameInterval.endIndex,
+    )
+    const tagName = rawTagName.toLowerCase()
+    // condition 6
+    nextIndex = eatStartCondition6(nodePoints, tagNameInterval.endIndex, endIndex, tagName)
+    if (nextIndex != null) return { nextIndex, condition: 6 }
+    // condition 7.
+    nextIndex = eatStartCondition7(nodePoints, tagNameInterval.endIndex, endIndex, tagName, false)
+    if (nextIndex != null) return { nextIndex, condition: 7 }
+    // fallback
+    return null
+  }
+  function eatEndCondition(
+    nodePoints: ReadonlyArray<INodePoint>,
+    startIndex: number,
+    endIndex: number,
+    condition: HtmlBlockConditionType,
+  ): -1 | number | null {
+    switch (condition) {
+      case 1: {
+        const nextIndex = eatEndCondition1(nodePoints, startIndex, endIndex)
+        return nextIndex == null ? null : endIndex
+      }
+      case 2: {
+        const nextIndex = eatEndCondition2(nodePoints, startIndex, endIndex)
+        return nextIndex == null ? null : endIndex
+      }
+      case 3: {
+        const nextIndex = eatEndCondition3(nodePoints, startIndex, endIndex)
+        return nextIndex == null ? null : endIndex
+      }
+      case 4: {
+        const nextIndex = eatEndCondition4(nodePoints, startIndex, endIndex)
+        return nextIndex == null ? null : endIndex
+      }
+      case 5: {
+        const nextIndex = eatEndCondition5(nodePoints, startIndex, endIndex)
+        return nextIndex == null ? null : endIndex
+      }
+      case 6:
+      case 7: {
+        const firstNonWhitespaceIndex = eatOptionalWhitespaces(nodePoints, startIndex, endIndex)
+        return firstNonWhitespaceIndex >= endIndex ? -1 : null
+      }
+    }
+  }
+}

package/src/parse.ts ADDED Viewed

@@ -0,0 +1,18 @@
+import { calcStringFromNodePoints } from '@yozora/character'
+import type { IParseBlockHookCreator } from '@yozora/core-tokenizer'
+import { mergeContentLinesFaithfully } from '@yozora/core-tokenizer'
+import type { INode, IThis, IToken, T } from './types'
+export const parse: IParseBlockHookCreator<T, IToken, INode, IThis> = function (api) {
+  return {
+    parse: tokens =>
+      tokens.map(token => {
+        // Try to build phrasingContent
+        const contents = mergeContentLinesFaithfully(token.lines)
+        const node: INode = api.shouldReservePosition
+          ? { type: 'html', position: token.position, value: calcStringFromNodePoints(contents) }
+          : { type: 'html', value: calcStringFromNodePoints(contents) }
+        return node
+      }),
+  }
+}

package/src/tokenizer.ts ADDED Viewed

@@ -0,0 +1,32 @@
+import type {
+  IBlockTokenizer,
+  IMatchBlockHookCreator,
+  IParseBlockHookCreator,
+} from '@yozora/core-tokenizer'
+import { BaseBlockTokenizer, TokenizerPriority } from '@yozora/core-tokenizer'
+import { match } from './match'
+import { parse } from './parse'
+import type { INode, IThis, IToken, ITokenizerProps, T } from './types'
+import { uniqueName } from './types'
+/**
+ * Lexical Analyzer for HtmlBlock.
+ * @see https://github.com/syntax-tree/mdast#html
+ * @see https://github.github.com/gfm/#html-blocks
+ */
+export class HtmlBlockTokenizer
+  extends BaseBlockTokenizer<T, IToken, INode, IThis>
+  implements IBlockTokenizer<T, IToken, INode, IThis>
+{
+  /* istanbul ignore next */
+  constructor(props: ITokenizerProps = {}) {
+    super({
+      name: props.name ?? uniqueName,
+      priority: props.priority ?? TokenizerPriority.ATOMIC,
+    })
+  }
+  public override readonly match: IMatchBlockHookCreator<T, IToken, IThis> = match
+  public override readonly parse: IParseBlockHookCreator<T, IToken, INode, IThis> = parse
+}

package/src/types.ts ADDED Viewed

@@ -0,0 +1,74 @@
+import type { Html, HtmlType } from '@yozora/ast'
+import type {
+  IBaseBlockTokenizerProps,
+  IPartialYastBlockToken,
+  IPhrasingContentLine,
+  ITokenizer,
+} from '@yozora/core-tokenizer'
+export type T = HtmlType
+export type INode = Html
+export const uniqueName = '@yozora/tokenizer-html-block'
+export type HtmlBlockConditionType = 1 | 2 | 3 | 4 | 5 | 6 | 7
+/**
+ * Middle state during the whole match and parse phase.
+ */
+export interface IToken extends IPartialYastBlockToken<T> {
+  /**
+   * Number of conditions defined in GFM:
+   *
+   * 1. Start condition: line begins with the string `<script`, `<pre`, or
+   *    `<style` (case-insensitive), followed by whitespace, the string `>`,
+   *    or the end of the line.
+   *
+   *    End condition: line contains an end tag `</script>`, `</pre>`,
+   *    or `</style>` (case-insensitive; it need not match the start tag).
+   *
+   * 2. Start condition: line begins with the string `<!--`.
+   *    End condition: line contains the string `-->`.
+   *
+   * 3. Start condition: line begins with the string `<?`.
+   *    End condition: line contains the string `?>`.
+   *
+   * 4. Start condition: line begins with the string `<!` followed by an
+   *    uppercase ASCII letter.
+   *
+   *    End condition: line contains the character >.
+   *
+   * 5. Start condition: line begins with the string `<![CDATA[`.
+   *    End condition: line contains the string `]]>`.
+   *
+   * 6. Start condition: line begins the string `<` or `</` followed by one of
+   *    the strings (case-insensitive) `address`, `article`, `aside`, `base`,
+   *    `basefont`, `blockquote`, `body`, `caption`, `center`, `col`, `colgroup`,
+   *    `dd`, `details`, `dialog`, `dir`, `div`, `dl`, `dt`, `fieldset`,
+   *    `figcaption`, `figure`, `footer`, `form`, `frame`, `frameset`, `h1`,
+   *    `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`, `html`, `iframe`,
+   *    `legend`, `li`, `link`, `main`, `menu`, `menuitem`, `nav`, `noframes`,
+   *    `ol`, `optgroup`, `option`, `p`, `param`, `section`, `source`, `summary`,
+   *    `table`, `tbody`, `td`, `tfoot`, `th`, `thead`, `title`, `tr`, `track`,
+   *    `ul`, followed by whitespace, the end of the line, the string `>`,
+   *    or the string `/>`.
+   *
+   *    End condition: line is followed by a blank line.
+   *
+   * 7. Start condition: line begins with a complete open tag (with any tag name
+   *    other than `script`, `style`, or `pre`) or a complete closing tag,
+   *    followed only by whitespace or the end of the line.
+   *
+   *    End condition: line is followed by a blank line.
+   *
+   * @see https://github.github.com/gfm/#start-condition
+   */
+  condition: HtmlBlockConditionType
+  /**
+   * Contents
+   */
+  lines: Array<Readonly<IPhrasingContentLine>>
+}
+export type IThis = ITokenizer
+export type ITokenizerProps = Partial<IBaseBlockTokenizerProps>

package/src/util/eat-html-attribute.ts ADDED Viewed

@@ -0,0 +1,170 @@
+import type { INodeInterval, INodePoint } from '@yozora/character'
+import {
+  AsciiCodePoint,
+  isAsciiDigitCharacter,
+  isAsciiLetter,
+  isWhitespaceCharacter,
+} from '@yozora/character'
+import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
+export interface RawHTMLAttribute {
+  /**
+   * Attribute name.
+   */
+  name: INodeInterval
+  /**
+   * Attribute value.
+   */
+  value?: INodeInterval
+}
+/**
+ * An attribute consists of whitespace, an attribute name, and an optional
+ * attribute value specification.
+ *
+ * @param nodePoints
+ * @param startIndex
+ * @param endIndex
+ * @see https://github.github.com/gfm/#attribute
+ */
+export function eatHTMLAttribute(
+  nodePoints: ReadonlyArray<INodePoint>,
+  startIndex: number,
+  endIndex: number,
+): { attribute: RawHTMLAttribute; nextIndex: number } | null {
+  // eat whitespace.
+  let i = eatOptionalWhitespaces(nodePoints, startIndex, endIndex)
+  if (i <= startIndex || i >= endIndex) return null
+  /**
+   * Eat attribute name.
+   *
+   * An attribute name consists of an ASCII letter, `_`, or `:`, followed by
+   * zero or more ASCII letters, digits, `_`, `.`, `:`, or `-`.
+   * @see https://github.github.com/gfm/#attribute-name
+   */
+  const attrNameStartIndex = i
+  let c = nodePoints[i].codePoint
+  if (!isAsciiLetter(c) && c !== AsciiCodePoint.UNDERSCORE && c !== AsciiCodePoint.COLON)
+    return null
+  for (i = attrNameStartIndex + 1; i < endIndex; ++i) {
+    c = nodePoints[i].codePoint
+    if (
+      isAsciiLetter(c) ||
+      isAsciiDigitCharacter(c) ||
+      c === AsciiCodePoint.UNDERSCORE ||
+      c === AsciiCodePoint.DOT ||
+      c === AsciiCodePoint.COLON ||
+      c === AsciiCodePoint.MINUS_SIGN
+    )
+      continue
+    break
+  }
+  const attrNameEndIndex = i
+  const attribute: RawHTMLAttribute = {
+    name: {
+      startIndex: attrNameStartIndex,
+      endIndex: attrNameEndIndex,
+    },
+  }
+  /**
+   * Eat attribute value.
+   *
+   * An attribute value specification consists of optional whitespace, a `=`
+   * character, optional whitespace, and an attribute value.
+   *
+   * An attribute value consists of an unquoted attribute value, a single-quoted
+   * attribute value, or a double-quoted attribute value.
+   *
+   * @see https://github.github.com/gfm/#attribute-value-specification
+   * @see https://github.github.com/gfm/#attribute-value
+   */
+  i = eatOptionalWhitespaces(nodePoints, attrNameEndIndex, endIndex)
+  if (i < endIndex && nodePoints[i].codePoint === AsciiCodePoint.EQUALS_SIGN) {
+    i = eatOptionalWhitespaces(nodePoints, i + 1, endIndex)
+    if (i < endIndex) {
+      const mark = nodePoints[i].codePoint
+      switch (mark) {
+        /**
+         * A double-quoted attribute value consists of `"`, zero or more
+         * characters not including `"`, and a final `"`.
+         * @see https://github.github.com/gfm/#double-quoted-attribute-value
+         */
+        case AsciiCodePoint.DOUBLE_QUOTE: {
+          const attrValueStartIndex = i + 1
+          for (i = attrValueStartIndex; i < endIndex; ++i) {
+            c = nodePoints[i].codePoint
+            if (c === AsciiCodePoint.DOUBLE_QUOTE) break
+          }
+          const attrValueEndIndex = i
+          if (i < endIndex && nodePoints[i].codePoint === AsciiCodePoint.DOUBLE_QUOTE) {
+            attribute.value = {
+              startIndex: attrValueStartIndex,
+              endIndex: attrValueEndIndex,
+            }
+            i += 1
+          }
+          break
+        }
+        /**
+         * A single-quoted attribute value consists of `'`, zero or more
+         * characters not including `'`, and a final `'`.
+         * @see https://github.github.com/gfm/#single-quoted-attribute-value
+         */
+        case AsciiCodePoint.SINGLE_QUOTE: {
+          const attrValueStartIndex = i + 1
+          for (i = attrValueStartIndex; i < endIndex; ++i) {
+            c = nodePoints[i].codePoint
+            if (c === AsciiCodePoint.SINGLE_QUOTE) break
+          }
+          const attrValueEndIndex = i
+          if (i < endIndex && nodePoints[i].codePoint === AsciiCodePoint.SINGLE_QUOTE) {
+            attribute.value = {
+              startIndex: attrValueStartIndex,
+              endIndex: attrValueEndIndex,
+            }
+            i += 1
+          }
+          break
+        }
+        /**
+         * An unquoted attribute value is a nonempty string of characters not
+         * including whitespace, `"`, `'`, `=`, `<`, `>`, or `\``.
+         * @see https://github.github.com/gfm/#unquoted-attribute-value
+         */
+        default: {
+          const attrValueStartIndex = i
+          for (; i < endIndex; ++i) {
+            c = nodePoints[i].codePoint
+            if (
+              isWhitespaceCharacter(c) ||
+              c === AsciiCodePoint.DOUBLE_QUOTE ||
+              c === AsciiCodePoint.SINGLE_QUOTE ||
+              c === AsciiCodePoint.EQUALS_SIGN ||
+              c === AsciiCodePoint.OPEN_ANGLE ||
+              c === AsciiCodePoint.CLOSE_ANGLE ||
+              c === AsciiCodePoint.BACKTICK
+            )
+              break
+          }
+          const attrValueEndIndex = i
+          if (attrValueEndIndex > attrValueStartIndex) {
+            attribute.value = {
+              startIndex: attrValueStartIndex,
+              endIndex: attrValueEndIndex,
+            }
+          }
+          break
+        }
+      }
+      if (attribute.value != null) {
+        return { attribute, nextIndex: i }
+      }
+    }
+  }
+  return { attribute, nextIndex: attrNameEndIndex }
+}

package/src/util/eat-html-tagname.ts ADDED Viewed

@@ -0,0 +1,27 @@
+import type { INodePoint } from '@yozora/character'
+import { AsciiCodePoint, isAsciiDigitCharacter, isAsciiLetter } from '@yozora/character'
+/**
+ * A tag name consists of an ASCII letter followed by zero or more ASCII
+ * letters, digits, or hyphens (-).
+ *
+ * @param nodePoints
+ * @param startIndex
+ * @param endIndex
+ * @see https://github.github.com/gfm/#tag-name
+ */
+export function eatHTMLTagName(
+  nodePoints: ReadonlyArray<INodePoint>,
+  startIndex: number,
+  endIndex: number,
+): number | null {
+  if (startIndex >= endIndex || !isAsciiLetter(nodePoints[startIndex].codePoint)) return null
+  let i = startIndex
+  for (; i < endIndex; ++i) {
+    const c = nodePoints[i].codePoint
+    if (isAsciiLetter(c) || isAsciiDigitCharacter(c) || c === AsciiCodePoint.MINUS_SIGN) continue
+    return i
+  }
+  return i
+}