npm - @yozora/tokenizer-html-block - Versions diffs - 2.1.3 → 2.1.5 - Mend

@yozora/tokenizer-html-block 2.1.3 → 2.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/CHANGELOG.md +671 -0
package/README.md +42 -42
package/package.json +6 -6
package/src/conditions/c1.ts +0 -79
package/src/conditions/c2.ts +0 -55
package/src/conditions/c3.ts +0 -48
package/src/conditions/c4.ts +0 -48
package/src/conditions/c5.ts +0 -59
package/src/conditions/c6.ts +0 -109
package/src/conditions/c7.ts +0 -54
package/src/index.ts +0 -11
package/src/match.ts +0 -231
package/src/parse.ts +0 -18
package/src/tokenizer.ts +0 -32
package/src/types.ts +0 -74
package/src/util/eat-html-attribute.ts +0 -170
package/src/util/eat-html-tagname.ts +0 -27

package/src/match.ts DELETED Viewed

@@ -1,231 +0,0 @@
-import { HtmlType } from '@yozora/ast'
-import type { INodeInterval, INodePoint } from '@yozora/character'
-import { AsciiCodePoint, calcStringFromNodePoints } from '@yozora/character'
-import type {
-  IBlockToken,
-  IMatchBlockHookCreator,
-  IPhrasingContentLine,
-  IResultOfEatAndInterruptPreviousSibling,
-  IResultOfEatContinuationText,
-  IResultOfEatOpener,
-} from '@yozora/core-tokenizer'
-import { calcEndPoint, calcStartPoint, eatOptionalWhitespaces } from '@yozora/core-tokenizer'
-import { eatEndCondition1, eatStartCondition1 } from './conditions/c1'
-import { eatEndCondition2, eatStartCondition2 } from './conditions/c2'
-import { eatEndCondition3, eatStartCondition3 } from './conditions/c3'
-import { eatEndCondition4, eatStartCondition4 } from './conditions/c4'
-import { eatEndCondition5, eatStartCondition5 } from './conditions/c5'
-import { eatStartCondition6 } from './conditions/c6'
-import { eatStartCondition7 } from './conditions/c7'
-import type { HtmlBlockConditionType, IThis, IToken, T } from './types'
-import { eatHTMLTagName } from './util/eat-html-tagname'
-/**
- * An HTML block is a group of lines that is treated as raw HTML (and will not
- * be escaped in HTML output).
- *
- * @see https://github.com/syntax-tree/mdast#html
- * @see https://github.github.com/gfm/#html-blocks
- */
-export const match: IMatchBlockHookCreator<T, IToken, IThis> = function () {
-  return {
-    isContainingBlock: false,
-    eatOpener,
-    eatAndInterruptPreviousSibling,
-    eatContinuationText,
-  }
-  function eatOpener(line: Readonly<IPhrasingContentLine>): IResultOfEatOpener<T, IToken> {
-    /**
-     * The opening tag can be indented 1-3 spaces, but not 4.
-     * @see https://github.github.com/gfm/#example-152
-     */
-    if (line.countOfPrecedeSpaces >= 4) return null
-    const { nodePoints, startIndex, endIndex, firstNonWhitespaceIndex } = line
-    if (
-      firstNonWhitespaceIndex >= endIndex ||
-      nodePoints[firstNonWhitespaceIndex].codePoint !== AsciiCodePoint.OPEN_ANGLE
-    )
-      return null
-    const i = firstNonWhitespaceIndex + 1
-    const startResult = eatStartCondition(nodePoints, i, endIndex)
-    if (startResult == null) return null
-    const { condition } = startResult
-    /**
-     * The end tag can occur on the same line as the start tag.
-     * @see https://github.github.com/gfm/#example-145
-     * @see https://github.github.com/gfm/#example-146
-     */
-    let saturated = false
-    if (condition !== 6 && condition !== 7) {
-      const endResult = eatEndCondition(nodePoints, startResult.nextIndex, endIndex, condition)
-      if (endResult != null) saturated = true
-    }
-    const nextIndex = endIndex
-    const token: IToken = {
-      nodeType: HtmlType,
-      position: {
-        start: calcStartPoint(nodePoints, startIndex),
-        end: calcEndPoint(nodePoints, nextIndex - 1),
-      },
-      condition,
-      lines: [line],
-    }
-    return { token, nextIndex, saturated }
-  }
-  function eatAndInterruptPreviousSibling(
-    line: Readonly<IPhrasingContentLine>,
-    prevSiblingToken: Readonly<IBlockToken>,
-  ): IResultOfEatAndInterruptPreviousSibling<T, IToken> {
-    const result = eatOpener(line)
-    if (result == null || result.token.condition === 7) return null
-    const { token, nextIndex } = result
-    return {
-      token,
-      nextIndex,
-      remainingSibling: prevSiblingToken,
-    }
-  }
-  function eatContinuationText(
-    line: Readonly<IPhrasingContentLine>,
-    token: IToken,
-  ): IResultOfEatContinuationText {
-    const { nodePoints, endIndex, firstNonWhitespaceIndex } = line
-    const nextIndex = eatEndCondition(
-      nodePoints,
-      firstNonWhitespaceIndex,
-      endIndex,
-      token.condition,
-    )
-    if (nextIndex === -1) return { status: 'notMatched' }
-    token.lines.push(line)
-    if (nextIndex != null) return { status: 'closing', nextIndex: endIndex }
-    return { status: 'opening', nextIndex: endIndex }
-  }
-  function eatStartCondition(
-    nodePoints: ReadonlyArray<INodePoint>,
-    startIndex: number,
-    endIndex: number,
-  ): { condition: HtmlBlockConditionType; nextIndex: number } | null {
-    let nextIndex: number | null = null
-    if (startIndex >= endIndex) return null
-    // condition 2
-    nextIndex = eatStartCondition2(nodePoints, startIndex, endIndex)
-    if (nextIndex != null) return { nextIndex, condition: 2 }
-    // condition 3
-    nextIndex = eatStartCondition3(nodePoints, startIndex, endIndex)
-    if (nextIndex != null) return { nextIndex, condition: 3 }
-    // condition 4
-    nextIndex = eatStartCondition4(nodePoints, startIndex, endIndex)
-    if (nextIndex != null) return { nextIndex, condition: 4 }
-    // condition 5
-    nextIndex = eatStartCondition5(nodePoints, startIndex, endIndex)
-    if (nextIndex != null) return { nextIndex, condition: 5 }
-    if (nodePoints[startIndex].codePoint !== AsciiCodePoint.SLASH) {
-      const tagNameStartIndex = startIndex
-      const tagNameEndIndex = eatHTMLTagName(nodePoints, tagNameStartIndex, endIndex)
-      if (tagNameEndIndex == null) return null
-      const tagNameInterval: INodeInterval = {
-        startIndex: tagNameStartIndex,
-        endIndex: tagNameEndIndex,
-      }
-      const rawTagName = calcStringFromNodePoints(
-        nodePoints,
-        tagNameInterval.startIndex,
-        tagNameInterval.endIndex,
-      )
-      const tagName = rawTagName.toLowerCase()
-      // condition1
-      nextIndex = eatStartCondition1(nodePoints, tagNameInterval.endIndex, endIndex, tagName)
-      if (nextIndex != null) return { nextIndex, condition: 1 }
-      // condition 6
-      nextIndex = eatStartCondition6(nodePoints, tagNameInterval.endIndex, endIndex, tagName)
-      if (nextIndex != null) return { nextIndex, condition: 6 }
-      // condition 7
-      nextIndex = eatStartCondition7(nodePoints, tagNameInterval.endIndex, endIndex, tagName, true)
-      if (nextIndex != null) return { nextIndex, condition: 7 }
-      // fallback
-      return null
-    }
-    const tagNameStartIndex = startIndex + 1
-    const tagNameEndIndex = eatHTMLTagName(nodePoints, tagNameStartIndex, endIndex)
-    if (tagNameEndIndex == null) return null
-    const tagNameInterval: INodeInterval = {
-      startIndex: tagNameStartIndex,
-      endIndex: tagNameEndIndex,
-    }
-    const rawTagName = calcStringFromNodePoints(
-      nodePoints,
-      tagNameInterval.startIndex,
-      tagNameInterval.endIndex,
-    )
-    const tagName = rawTagName.toLowerCase()
-    // condition 6
-    nextIndex = eatStartCondition6(nodePoints, tagNameInterval.endIndex, endIndex, tagName)
-    if (nextIndex != null) return { nextIndex, condition: 6 }
-    // condition 7.
-    nextIndex = eatStartCondition7(nodePoints, tagNameInterval.endIndex, endIndex, tagName, false)
-    if (nextIndex != null) return { nextIndex, condition: 7 }
-    // fallback
-    return null
-  }
-  function eatEndCondition(
-    nodePoints: ReadonlyArray<INodePoint>,
-    startIndex: number,
-    endIndex: number,
-    condition: HtmlBlockConditionType,
-  ): -1 | number | null {
-    switch (condition) {
-      case 1: {
-        const nextIndex = eatEndCondition1(nodePoints, startIndex, endIndex)
-        return nextIndex == null ? null : endIndex
-      }
-      case 2: {
-        const nextIndex = eatEndCondition2(nodePoints, startIndex, endIndex)
-        return nextIndex == null ? null : endIndex
-      }
-      case 3: {
-        const nextIndex = eatEndCondition3(nodePoints, startIndex, endIndex)
-        return nextIndex == null ? null : endIndex
-      }
-      case 4: {
-        const nextIndex = eatEndCondition4(nodePoints, startIndex, endIndex)
-        return nextIndex == null ? null : endIndex
-      }
-      case 5: {
-        const nextIndex = eatEndCondition5(nodePoints, startIndex, endIndex)
-        return nextIndex == null ? null : endIndex
-      }
-      case 6:
-      case 7: {
-        const firstNonWhitespaceIndex = eatOptionalWhitespaces(nodePoints, startIndex, endIndex)
-        return firstNonWhitespaceIndex >= endIndex ? -1 : null
-      }
-    }
-  }
-}

package/src/parse.ts DELETED Viewed

@@ -1,18 +0,0 @@
-import { calcStringFromNodePoints } from '@yozora/character'
-import type { IParseBlockHookCreator } from '@yozora/core-tokenizer'
-import { mergeContentLinesFaithfully } from '@yozora/core-tokenizer'
-import type { INode, IThis, IToken, T } from './types'
-export const parse: IParseBlockHookCreator<T, IToken, INode, IThis> = function (api) {
-  return {
-    parse: tokens =>
-      tokens.map(token => {
-        // Try to build phrasingContent
-        const contents = mergeContentLinesFaithfully(token.lines)
-        const node: INode = api.shouldReservePosition
-          ? { type: 'html', position: token.position, value: calcStringFromNodePoints(contents) }
-          : { type: 'html', value: calcStringFromNodePoints(contents) }
-        return node
-      }),
-  }
-}

package/src/tokenizer.ts DELETED Viewed

@@ -1,32 +0,0 @@
-import type {
-  IBlockTokenizer,
-  IMatchBlockHookCreator,
-  IParseBlockHookCreator,
-} from '@yozora/core-tokenizer'
-import { BaseBlockTokenizer, TokenizerPriority } from '@yozora/core-tokenizer'
-import { match } from './match'
-import { parse } from './parse'
-import type { INode, IThis, IToken, ITokenizerProps, T } from './types'
-import { uniqueName } from './types'
-/**
- * Lexical Analyzer for HtmlBlock.
- * @see https://github.com/syntax-tree/mdast#html
- * @see https://github.github.com/gfm/#html-blocks
- */
-export class HtmlBlockTokenizer
-  extends BaseBlockTokenizer<T, IToken, INode, IThis>
-  implements IBlockTokenizer<T, IToken, INode, IThis>
-{
-  /* istanbul ignore next */
-  constructor(props: ITokenizerProps = {}) {
-    super({
-      name: props.name ?? uniqueName,
-      priority: props.priority ?? TokenizerPriority.ATOMIC,
-    })
-  }
-  public override readonly match: IMatchBlockHookCreator<T, IToken, IThis> = match
-  public override readonly parse: IParseBlockHookCreator<T, IToken, INode, IThis> = parse
-}

package/src/types.ts DELETED Viewed

@@ -1,74 +0,0 @@
-import type { Html, HtmlType } from '@yozora/ast'
-import type {
-  IBaseBlockTokenizerProps,
-  IPartialBlockToken,
-  IPhrasingContentLine,
-  ITokenizer,
-} from '@yozora/core-tokenizer'
-export type T = HtmlType
-export type INode = Html
-export const uniqueName = '@yozora/tokenizer-html-block'
-export type HtmlBlockConditionType = 1 | 2 | 3 | 4 | 5 | 6 | 7
-/**
- * Middle state during the whole match and parse phase.
- */
-export interface IToken extends IPartialBlockToken<T> {
-  /**
-   * Number of conditions defined in GFM:
-   *
-   * 1. Start condition: line begins with the string `<script`, `<pre`, or
-   *    `<style` (case-insensitive), followed by whitespace, the string `>`,
-   *    or the end of the line.
-   *
-   *    End condition: line contains an end tag `</script>`, `</pre>`,
-   *    or `</style>` (case-insensitive; it need not match the start tag).
-   *
-   * 2. Start condition: line begins with the string `<!--`.
-   *    End condition: line contains the string `-->`.
-   *
-   * 3. Start condition: line begins with the string `<?`.
-   *    End condition: line contains the string `?>`.
-   *
-   * 4. Start condition: line begins with the string `<!` followed by an
-   *    uppercase ASCII letter.
-   *
-   *    End condition: line contains the character >.
-   *
-   * 5. Start condition: line begins with the string `<![CDATA[`.
-   *    End condition: line contains the string `]]>`.
-   *
-   * 6. Start condition: line begins the string `<` or `</` followed by one of
-   *    the strings (case-insensitive) `address`, `article`, `aside`, `base`,
-   *    `basefont`, `blockquote`, `body`, `caption`, `center`, `col`, `colgroup`,
-   *    `dd`, `details`, `dialog`, `dir`, `div`, `dl`, `dt`, `fieldset`,
-   *    `figcaption`, `figure`, `footer`, `form`, `frame`, `frameset`, `h1`,
-   *    `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`, `html`, `iframe`,
-   *    `legend`, `li`, `link`, `main`, `menu`, `menuitem`, `nav`, `noframes`,
-   *    `ol`, `optgroup`, `option`, `p`, `param`, `section`, `source`, `summary`,
-   *    `table`, `tbody`, `td`, `tfoot`, `th`, `thead`, `title`, `tr`, `track`,
-   *    `ul`, followed by whitespace, the end of the line, the string `>`,
-   *    or the string `/>`.
-   *
-   *    End condition: line is followed by a blank line.
-   *
-   * 7. Start condition: line begins with a complete open tag (with any tag name
-   *    other than `script`, `style`, or `pre`) or a complete closing tag,
-   *    followed only by whitespace or the end of the line.
-   *
-   *    End condition: line is followed by a blank line.
-   *
-   * @see https://github.github.com/gfm/#start-condition
-   */
-  condition: HtmlBlockConditionType
-  /**
-   * Contents
-   */
-  lines: Array<Readonly<IPhrasingContentLine>>
-}
-export type IThis = ITokenizer
-export type ITokenizerProps = Partial<IBaseBlockTokenizerProps>

package/src/util/eat-html-attribute.ts DELETED Viewed

@@ -1,170 +0,0 @@
-import type { INodeInterval, INodePoint } from '@yozora/character'
-import {
-  AsciiCodePoint,
-  isAsciiDigitCharacter,
-  isAsciiLetter,
-  isWhitespaceCharacter,
-} from '@yozora/character'
-import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
-export interface RawHTMLAttribute {
-  /**
-   * Attribute name.
-   */
-  name: INodeInterval
-  /**
-   * Attribute value.
-   */
-  value?: INodeInterval
-}
-/**
- * An attribute consists of whitespace, an attribute name, and an optional
- * attribute value specification.
- *
- * @param nodePoints
- * @param startIndex
- * @param endIndex
- * @see https://github.github.com/gfm/#attribute
- */
-export function eatHTMLAttribute(
-  nodePoints: ReadonlyArray<INodePoint>,
-  startIndex: number,
-  endIndex: number,
-): { attribute: RawHTMLAttribute; nextIndex: number } | null {
-  // eat whitespace.
-  let i = eatOptionalWhitespaces(nodePoints, startIndex, endIndex)
-  if (i <= startIndex || i >= endIndex) return null
-  /**
-   * Eat attribute name.
-   *
-   * An attribute name consists of an ASCII letter, `_`, or `:`, followed by
-   * zero or more ASCII letters, digits, `_`, `.`, `:`, or `-`.
-   * @see https://github.github.com/gfm/#attribute-name
-   */
-  const attrNameStartIndex = i
-  let c = nodePoints[i].codePoint
-  if (!isAsciiLetter(c) && c !== AsciiCodePoint.UNDERSCORE && c !== AsciiCodePoint.COLON)
-    return null
-  for (i = attrNameStartIndex + 1; i < endIndex; ++i) {
-    c = nodePoints[i].codePoint
-    if (
-      isAsciiLetter(c) ||
-      isAsciiDigitCharacter(c) ||
-      c === AsciiCodePoint.UNDERSCORE ||
-      c === AsciiCodePoint.DOT ||
-      c === AsciiCodePoint.COLON ||
-      c === AsciiCodePoint.MINUS_SIGN
-    )
-      continue
-    break
-  }
-  const attrNameEndIndex = i
-  const attribute: RawHTMLAttribute = {
-    name: {
-      startIndex: attrNameStartIndex,
-      endIndex: attrNameEndIndex,
-    },
-  }
-  /**
-   * Eat attribute value.
-   *
-   * An attribute value specification consists of optional whitespace, a `=`
-   * character, optional whitespace, and an attribute value.
-   *
-   * An attribute value consists of an unquoted attribute value, a single-quoted
-   * attribute value, or a double-quoted attribute value.
-   *
-   * @see https://github.github.com/gfm/#attribute-value-specification
-   * @see https://github.github.com/gfm/#attribute-value
-   */
-  i = eatOptionalWhitespaces(nodePoints, attrNameEndIndex, endIndex)
-  if (i < endIndex && nodePoints[i].codePoint === AsciiCodePoint.EQUALS_SIGN) {
-    i = eatOptionalWhitespaces(nodePoints, i + 1, endIndex)
-    if (i < endIndex) {
-      const mark = nodePoints[i].codePoint
-      switch (mark) {
-        /**
-         * A double-quoted attribute value consists of `"`, zero or more
-         * characters not including `"`, and a final `"`.
-         * @see https://github.github.com/gfm/#double-quoted-attribute-value
-         */
-        case AsciiCodePoint.DOUBLE_QUOTE: {
-          const attrValueStartIndex = i + 1
-          for (i = attrValueStartIndex; i < endIndex; ++i) {
-            c = nodePoints[i].codePoint
-            if (c === AsciiCodePoint.DOUBLE_QUOTE) break
-          }
-          const attrValueEndIndex = i
-          if (i < endIndex && nodePoints[i].codePoint === AsciiCodePoint.DOUBLE_QUOTE) {
-            attribute.value = {
-              startIndex: attrValueStartIndex,
-              endIndex: attrValueEndIndex,
-            }
-            i += 1
-          }
-          break
-        }
-        /**
-         * A single-quoted attribute value consists of `'`, zero or more
-         * characters not including `'`, and a final `'`.
-         * @see https://github.github.com/gfm/#single-quoted-attribute-value
-         */
-        case AsciiCodePoint.SINGLE_QUOTE: {
-          const attrValueStartIndex = i + 1
-          for (i = attrValueStartIndex; i < endIndex; ++i) {
-            c = nodePoints[i].codePoint
-            if (c === AsciiCodePoint.SINGLE_QUOTE) break
-          }
-          const attrValueEndIndex = i
-          if (i < endIndex && nodePoints[i].codePoint === AsciiCodePoint.SINGLE_QUOTE) {
-            attribute.value = {
-              startIndex: attrValueStartIndex,
-              endIndex: attrValueEndIndex,
-            }
-            i += 1
-          }
-          break
-        }
-        /**
-         * An unquoted attribute value is a nonempty string of characters not
-         * including whitespace, `"`, `'`, `=`, `<`, `>`, or `\``.
-         * @see https://github.github.com/gfm/#unquoted-attribute-value
-         */
-        default: {
-          const attrValueStartIndex = i
-          for (; i < endIndex; ++i) {
-            c = nodePoints[i].codePoint
-            if (
-              isWhitespaceCharacter(c) ||
-              c === AsciiCodePoint.DOUBLE_QUOTE ||
-              c === AsciiCodePoint.SINGLE_QUOTE ||
-              c === AsciiCodePoint.EQUALS_SIGN ||
-              c === AsciiCodePoint.OPEN_ANGLE ||
-              c === AsciiCodePoint.CLOSE_ANGLE ||
-              c === AsciiCodePoint.BACKTICK
-            )
-              break
-          }
-          const attrValueEndIndex = i
-          if (attrValueEndIndex > attrValueStartIndex) {
-            attribute.value = {
-              startIndex: attrValueStartIndex,
-              endIndex: attrValueEndIndex,
-            }
-          }
-          break
-        }
-      }
-      if (attribute.value != null) {
-        return { attribute, nextIndex: i }
-      }
-    }
-  }
-  return { attribute, nextIndex: attrNameEndIndex }
-}

package/src/util/eat-html-tagname.ts DELETED Viewed

@@ -1,27 +0,0 @@
-import type { INodePoint } from '@yozora/character'
-import { AsciiCodePoint, isAsciiDigitCharacter, isAsciiLetter } from '@yozora/character'
-/**
- * A tag name consists of an ASCII letter followed by zero or more ASCII
- * letters, digits, or hyphens (-).
- *
- * @param nodePoints
- * @param startIndex
- * @param endIndex
- * @see https://github.github.com/gfm/#tag-name
- */
-export function eatHTMLTagName(
-  nodePoints: ReadonlyArray<INodePoint>,
-  startIndex: number,
-  endIndex: number,
-): number | null {
-  if (startIndex >= endIndex || !isAsciiLetter(nodePoints[startIndex].codePoint)) return null
-  let i = startIndex
-  for (; i < endIndex; ++i) {
-    const c = nodePoints[i].codePoint
-    if (isAsciiLetter(c) || isAsciiDigitCharacter(c) || c === AsciiCodePoint.MINUS_SIGN) continue
-    return i
-  }
-  return i
-}