@yozora/tokenizer-html-block 2.1.2 → 2.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yozora/tokenizer-html-block",
3
- "version": "2.1.2",
3
+ "version": "2.1.4",
4
4
  "author": {
5
5
  "name": "guanghechen",
6
6
  "url": "https://github.com/guanghechen/"
@@ -27,7 +27,7 @@
27
27
  },
28
28
  "files": [
29
29
  "lib/",
30
- "src/",
30
+ "lib/**/*.map",
31
31
  "package.json",
32
32
  "CHANGELOG.md",
33
33
  "LICENSE",
@@ -39,9 +39,9 @@
39
39
  "test": "cross-env TS_NODE_FILES=true NODE_OPTIONS=--experimental-vm-modules jest --config ../../jest.config.mjs --rootDir ."
40
40
  },
41
41
  "dependencies": {
42
- "@yozora/ast": "^2.1.2",
43
- "@yozora/character": "^2.1.2",
44
- "@yozora/core-tokenizer": "^2.1.2"
42
+ "@yozora/ast": "^2.1.4",
43
+ "@yozora/character": "^2.1.4",
44
+ "@yozora/core-tokenizer": "^2.1.4"
45
45
  },
46
- "gitHead": "992bacafd173e7788e99fed34ce8b45f6ed24cfe"
46
+ "gitHead": "aa464ed1e3cd84892773a833910cfc53a556bf5f"
47
47
  }
@@ -1,79 +0,0 @@
1
- import type { INodePoint } from '@yozora/character'
2
- import { AsciiCodePoint, calcStringFromNodePoints, isWhitespaceCharacter } from '@yozora/character'
3
- import { eatHTMLTagName } from '../util/eat-html-tagname'
4
-
5
- const includedTags = ['pre', 'script', 'style']
6
-
7
- /**
8
- * Eat block html start condition 1:
9
- *
10
- * line begins with the string `<script`, `<pre`, or
11
- * `<style` (case-insensitive), followed by whitespace, the string `>`,
12
- * or the end of the line.
13
- *
14
- * @param nodePoints
15
- * @param startIndex
16
- * @param endIndex
17
- * @see https://github.github.com/gfm/#start-condition
18
- */
19
- export function eatStartCondition1(
20
- nodePoints: ReadonlyArray<INodePoint>,
21
- startIndex: number,
22
- endIndex: number,
23
- tagName: string,
24
- ): number | null {
25
- if (!includedTags.includes(tagName)) return null
26
- if (startIndex >= endIndex) return endIndex
27
-
28
- const c = nodePoints[startIndex].codePoint
29
- if (isWhitespaceCharacter(c) || c === AsciiCodePoint.CLOSE_ANGLE) {
30
- return startIndex + 1
31
- }
32
- return null
33
- }
34
-
35
- /**
36
- * Eat block html end condition 1:
37
- *
38
- * line contains an end tag `</script>`, `</pre>`,
39
- * or `</style>` (case-insensitive; it need not match the start tag).
40
- *
41
- * @param nodePoints
42
- * @param startIndex
43
- * @param endIndex
44
- * @see https://github.github.com/gfm/#start-condition
45
- */
46
- export function eatEndCondition1(
47
- nodePoints: ReadonlyArray<INodePoint>,
48
- startIndex: number,
49
- endIndex: number,
50
- ): number | null {
51
- for (let i = startIndex; i < endIndex; ++i) {
52
- if (
53
- nodePoints[i].codePoint === AsciiCodePoint.OPEN_ANGLE &&
54
- i + 3 < endIndex &&
55
- nodePoints[i + 1].codePoint === AsciiCodePoint.SLASH
56
- ) {
57
- const tagNameStartIndex = i + 2
58
- const tagNameEndIndex = eatHTMLTagName(nodePoints, tagNameStartIndex, endIndex)
59
- if (
60
- tagNameEndIndex == null ||
61
- tagNameEndIndex >= endIndex ||
62
- nodePoints[tagNameEndIndex].codePoint !== AsciiCodePoint.CLOSE_ANGLE
63
- ) {
64
- i += 1
65
- continue
66
- }
67
-
68
- const rawTagName = calcStringFromNodePoints(
69
- nodePoints,
70
- tagNameStartIndex,
71
- tagNameEndIndex,
72
- true,
73
- )
74
- const tagName = rawTagName.toLowerCase()
75
- if (includedTags.includes(tagName)) return tagNameEndIndex
76
- }
77
- }
78
- return null
79
- }
@@ -1,55 +0,0 @@
1
- import type { INodePoint } from '@yozora/character'
2
- import { AsciiCodePoint } from '@yozora/character'
3
-
4
- /**
5
- * Eat block html start condition 2:
6
- *
7
- * Line begins with the string `<!--`.
8
- *
9
- * @param nodePoints
10
- * @param startIndex
11
- * @param endIndex
12
- * @see https://github.github.com/gfm/#start-condition
13
- */
14
- export function eatStartCondition2(
15
- nodePoints: ReadonlyArray<INodePoint>,
16
- startIndex: number,
17
- endIndex: number,
18
- ): number | null {
19
- const i = startIndex
20
- if (
21
- i + 2 < endIndex &&
22
- nodePoints[i].codePoint === AsciiCodePoint.EXCLAMATION_MARK &&
23
- nodePoints[i + 1].codePoint === AsciiCodePoint.MINUS_SIGN &&
24
- nodePoints[i + 2].codePoint === AsciiCodePoint.MINUS_SIGN
25
- )
26
- return i + 3
27
- return null
28
- }
29
-
30
- /**
31
- * Eat block html end condition 2:
32
- *
33
- * line contains the string `-->`.
34
- *
35
- * @param nodePoints
36
- * @param startIndex
37
- * @param endIndex
38
- * @see https://github.github.com/gfm/#start-condition
39
- */
40
- export function eatEndCondition2(
41
- nodePoints: ReadonlyArray<INodePoint>,
42
- startIndex: number,
43
- endIndex: number,
44
- ): number | null {
45
- for (let i = startIndex; i < endIndex; ++i) {
46
- if (
47
- nodePoints[i].codePoint === AsciiCodePoint.MINUS_SIGN &&
48
- i + 2 < endIndex &&
49
- nodePoints[i + 1].codePoint === AsciiCodePoint.MINUS_SIGN &&
50
- nodePoints[i + 2].codePoint === AsciiCodePoint.CLOSE_ANGLE
51
- )
52
- return i + 3
53
- }
54
- return null
55
- }
@@ -1,48 +0,0 @@
1
- import type { INodePoint } from '@yozora/character'
2
- import { AsciiCodePoint } from '@yozora/character'
3
-
4
- /**
5
- * Eat block html start condition 3:
6
- *
7
- * line begins with the string `<?`.
8
- *
9
- * @param nodePoints
10
- * @param startIndex
11
- * @param endIndex
12
- * @see https://github.github.com/gfm/#start-condition
13
- */
14
- export function eatStartCondition3(
15
- nodePoints: ReadonlyArray<INodePoint>,
16
- startIndex: number,
17
- endIndex: number,
18
- ): number | null {
19
- const i = startIndex
20
- if (i < endIndex && nodePoints[i].codePoint === AsciiCodePoint.QUESTION_MARK) return i + 1
21
- return null
22
- }
23
-
24
- /**
25
- * Eat block html end condition 3:
26
- *
27
- * line contains the string `?>`.
28
- *
29
- * @param nodePoints
30
- * @param startIndex
31
- * @param endIndex
32
- * @see https://github.github.com/gfm/#start-condition
33
- */
34
- export function eatEndCondition3(
35
- nodePoints: ReadonlyArray<INodePoint>,
36
- startIndex: number,
37
- endIndex: number,
38
- ): number | null {
39
- for (let i = startIndex; i < endIndex; ++i) {
40
- if (
41
- nodePoints[i].codePoint === AsciiCodePoint.QUESTION_MARK &&
42
- i + 1 < endIndex &&
43
- nodePoints[i + 1].codePoint === AsciiCodePoint.CLOSE_ANGLE
44
- )
45
- return i + 2
46
- }
47
- return null
48
- }
@@ -1,48 +0,0 @@
1
- import type { INodePoint } from '@yozora/character'
2
- import { AsciiCodePoint, isAsciiUpperLetter } from '@yozora/character'
3
-
4
- /**
5
- * Eat block html start condition 4:
6
- *
7
- * line begins with the string `<!` followed by an uppercase ASCII letter.
8
- *
9
- * @param nodePoints
10
- * @param startIndex
11
- * @param endIndex
12
- * @see https://github.github.com/gfm/#start-condition
13
- */
14
- export function eatStartCondition4(
15
- nodePoints: ReadonlyArray<INodePoint>,
16
- startIndex: number,
17
- endIndex: number,
18
- ): number | null {
19
- const i = startIndex
20
- if (
21
- i + 1 < endIndex &&
22
- nodePoints[i].codePoint === AsciiCodePoint.EXCLAMATION_MARK &&
23
- isAsciiUpperLetter(nodePoints[i + 1].codePoint)
24
- )
25
- return i + 2
26
- return null
27
- }
28
-
29
- /**
30
- * Eat block html end condition 4:
31
- *
32
- * line contains the character >.
33
- *
34
- * @param nodePoints
35
- * @param startIndex
36
- * @param endIndex
37
- * @see https://github.github.com/gfm/#start-condition
38
- */
39
- export function eatEndCondition4(
40
- nodePoints: ReadonlyArray<INodePoint>,
41
- startIndex: number,
42
- endIndex: number,
43
- ): number | null {
44
- for (let i = startIndex; i < endIndex; ++i) {
45
- if (nodePoints[i].codePoint === AsciiCodePoint.CLOSE_ANGLE) return i + 1
46
- }
47
- return null
48
- }
@@ -1,59 +0,0 @@
1
- import type { INodePoint } from '@yozora/character'
2
- import { AsciiCodePoint } from '@yozora/character'
3
-
4
- /**
5
- * Eat block html start condition 5:
6
- *
7
- * line begins with the string `<![CDATA[`.
8
- *
9
- * @param nodePoints
10
- * @param startIndex
11
- * @param endIndex
12
- * @see https://github.github.com/gfm/#start-condition
13
- */
14
- export function eatStartCondition5(
15
- nodePoints: ReadonlyArray<INodePoint>,
16
- startIndex: number,
17
- endIndex: number,
18
- ): number | null {
19
- const i = startIndex
20
- if (
21
- i + 6 < endIndex &&
22
- nodePoints[i].codePoint === AsciiCodePoint.EXCLAMATION_MARK &&
23
- nodePoints[i + 1].codePoint === AsciiCodePoint.OPEN_BRACKET &&
24
- nodePoints[i + 2].codePoint === AsciiCodePoint.UPPERCASE_C &&
25
- nodePoints[i + 3].codePoint === AsciiCodePoint.UPPERCASE_D &&
26
- nodePoints[i + 4].codePoint === AsciiCodePoint.UPPERCASE_A &&
27
- nodePoints[i + 5].codePoint === AsciiCodePoint.UPPERCASE_T &&
28
- nodePoints[i + 6].codePoint === AsciiCodePoint.UPPERCASE_A
29
- )
30
- return i + 7
31
- return null
32
- }
33
-
34
- /**
35
- * Eat block html end condition 5:
36
- *
37
- * line contains the string `]]>`.
38
- *
39
- * @param nodePoints
40
- * @param startIndex
41
- * @param endIndex
42
- * @see https://github.github.com/gfm/#start-condition
43
- */
44
- export function eatEndCondition5(
45
- nodePoints: ReadonlyArray<INodePoint>,
46
- startIndex: number,
47
- endIndex: number,
48
- ): number | null {
49
- for (let i = startIndex; i < endIndex; ++i) {
50
- if (
51
- nodePoints[i].codePoint === AsciiCodePoint.CLOSE_BRACKET &&
52
- i + 2 < endIndex &&
53
- nodePoints[i + 1].codePoint === AsciiCodePoint.CLOSE_BRACKET &&
54
- nodePoints[i + 2].codePoint === AsciiCodePoint.CLOSE_ANGLE
55
- )
56
- return i + 3
57
- }
58
- return null
59
- }
@@ -1,109 +0,0 @@
1
- import type { INodePoint } from '@yozora/character'
2
- import { AsciiCodePoint, isWhitespaceCharacter } from '@yozora/character'
3
-
4
- const includedTags = [
5
- 'address',
6
- 'article',
7
- 'aside',
8
- 'base',
9
- 'basefont',
10
- 'blockquote',
11
- 'body',
12
- 'caption',
13
- 'center',
14
- 'col',
15
- 'colgroup',
16
- 'dd',
17
- 'details',
18
- 'dialog',
19
- 'dir',
20
- 'div',
21
- 'dl',
22
- 'dt',
23
- 'fieldset',
24
- 'figcaption',
25
- 'figure',
26
- 'footer',
27
- 'form',
28
- 'frame',
29
- 'frameset',
30
- 'h1',
31
- 'h2',
32
- 'h3',
33
- 'h4',
34
- 'h5',
35
- 'h6',
36
- 'head',
37
- 'header',
38
- 'hr',
39
- 'html',
40
- 'iframe',
41
- 'legend',
42
- 'li',
43
- 'link',
44
- 'main',
45
- 'menu',
46
- 'menuitem',
47
- 'nav',
48
- 'noframes',
49
- 'ol',
50
- 'optgroup',
51
- 'option',
52
- 'p',
53
- 'param',
54
- 'section',
55
- 'source',
56
- 'summary',
57
- 'table',
58
- 'tbody',
59
- 'td',
60
- 'tfoot',
61
- 'th',
62
- 'thead',
63
- 'title',
64
- 'tr',
65
- 'track',
66
- 'ul',
67
- ]
68
-
69
- /**
70
- * Eat block html start condition 6:
71
- *
72
- * line begins the string `<` or `</` followed by one of
73
- * the strings (case-insensitive) `address`, `article`, `aside`, `base`,
74
- * `basefont`, `blockquote`, `body`, `caption`, `center`, `col`, `colgroup`,
75
- * `dd`, `details`, `dialog`, `dir`, `div`, `dl`, `dt`, `fieldset`,
76
- * `figcaption`, `figure`, `footer`, `form`, `frame`, `frameset`, `h1`,
77
- * `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`, `html`, `iframe`,
78
- * `legend`, `li`, `link`, `main`, `menu`, `menuitem`, `nav`, `noframes`,
79
- * `ol`, `optgroup`, `option`, `p`, `param`, `section`, `source`, `summary`,
80
- * `table`, `tbody`, `td`, `tfoot`, `th`, `thead`, `title`, `tr`, `track`,
81
- * `ul`, followed by whitespace, the end of the line, the string `>`,
82
- * or the string `/>`.
83
- *
84
- * @param nodePoints
85
- * @param startIndex
86
- * @param endIndex
87
- * @see https://github.github.com/gfm/#start-condition
88
- */
89
- export function eatStartCondition6(
90
- nodePoints: ReadonlyArray<INodePoint>,
91
- startIndex: number,
92
- endIndex: number,
93
- tagName: string,
94
- ): number | null {
95
- if (!includedTags.includes(tagName)) return null
96
- if (startIndex >= endIndex) return endIndex
97
-
98
- const c = nodePoints[startIndex].codePoint
99
- if (isWhitespaceCharacter(c) || c === AsciiCodePoint.CLOSE_ANGLE) return startIndex + 1
100
-
101
- if (
102
- c === AsciiCodePoint.SLASH &&
103
- startIndex + 1 < endIndex &&
104
- nodePoints[startIndex + 1].codePoint === AsciiCodePoint.CLOSE_ANGLE
105
- )
106
- return startIndex + 2
107
-
108
- return null
109
- }
@@ -1,54 +0,0 @@
1
- import type { INodePoint } from '@yozora/character'
2
- import { AsciiCodePoint, isWhitespaceCharacter } from '@yozora/character'
3
- import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
4
- import { eatHTMLAttribute } from '../util/eat-html-attribute'
5
-
6
- const excludedTags = ['pre', 'script', 'style']
7
-
8
- /**
9
- * Eat block html start condition 7:
10
- *
11
- * line begins with a complete open tag (with any tag name
12
- * other than `script`, `style`, or `pre`) or a complete closing tag,
13
- * followed only by whitespace or the end of the line
14
- *
15
- * @param nodePoints
16
- * @param startIndex
17
- * @param endIndex
18
- * @see https://github.github.com/gfm/#start-condition
19
- */
20
- export function eatStartCondition7(
21
- nodePoints: ReadonlyArray<INodePoint>,
22
- startIndex: number,
23
- endIndex: number,
24
- tagName: string,
25
- potentialOpenTag: boolean,
26
- ): number | null {
27
- if (excludedTags.includes(tagName) || startIndex >= endIndex) return null
28
-
29
- let i = startIndex
30
-
31
- if (potentialOpenTag) {
32
- // Try to resolve an open tag.
33
- for (; i < endIndex; ) {
34
- const result = eatHTMLAttribute(nodePoints, i, endIndex)
35
- if (result == null) break
36
- i = result.nextIndex
37
- }
38
-
39
- i = eatOptionalWhitespaces(nodePoints, i, endIndex)
40
- if (i >= endIndex) return null
41
-
42
- if (nodePoints[i].codePoint === AsciiCodePoint.SLASH) i += 1
43
- } else {
44
- // Try to resolve a closing tag.
45
- i = eatOptionalWhitespaces(nodePoints, startIndex, endIndex)
46
- }
47
-
48
- if (i >= endIndex || nodePoints[i].codePoint !== AsciiCodePoint.CLOSE_ANGLE) return null
49
-
50
- for (i += 1; i < endIndex; ++i) {
51
- if (!isWhitespaceCharacter(nodePoints[i].codePoint)) return null
52
- }
53
- return endIndex
54
- }
package/src/index.ts DELETED
@@ -1,11 +0,0 @@
1
- export * from './util/eat-html-attribute'
2
- export * from './util/eat-html-tagname'
3
- export { match as htmlBlockMatch } from './match'
4
- export { parse as htmlBlockParse } from './parse'
5
- export { HtmlBlockTokenizer, HtmlBlockTokenizer as default } from './tokenizer'
6
- export { uniqueName as HtmlBlockTokenizerName } from './types'
7
- export type {
8
- IThis as IHtmlBlockHookContext,
9
- IToken as IHtmlBlockToken,
10
- ITokenizerProps as IHtmlBlockTokenizerProps,
11
- } from './types'
package/src/match.ts DELETED
@@ -1,231 +0,0 @@
1
- import { HtmlType } from '@yozora/ast'
2
- import type { INodeInterval, INodePoint } from '@yozora/character'
3
- import { AsciiCodePoint, calcStringFromNodePoints } from '@yozora/character'
4
- import type {
5
- IBlockToken,
6
- IMatchBlockHookCreator,
7
- IPhrasingContentLine,
8
- IResultOfEatAndInterruptPreviousSibling,
9
- IResultOfEatContinuationText,
10
- IResultOfEatOpener,
11
- } from '@yozora/core-tokenizer'
12
- import { calcEndPoint, calcStartPoint, eatOptionalWhitespaces } from '@yozora/core-tokenizer'
13
- import { eatEndCondition1, eatStartCondition1 } from './conditions/c1'
14
- import { eatEndCondition2, eatStartCondition2 } from './conditions/c2'
15
- import { eatEndCondition3, eatStartCondition3 } from './conditions/c3'
16
- import { eatEndCondition4, eatStartCondition4 } from './conditions/c4'
17
- import { eatEndCondition5, eatStartCondition5 } from './conditions/c5'
18
- import { eatStartCondition6 } from './conditions/c6'
19
- import { eatStartCondition7 } from './conditions/c7'
20
- import type { HtmlBlockConditionType, IThis, IToken, T } from './types'
21
- import { eatHTMLTagName } from './util/eat-html-tagname'
22
-
23
- /**
24
- * An HTML block is a group of lines that is treated as raw HTML (and will not
25
- * be escaped in HTML output).
26
- *
27
- * @see https://github.com/syntax-tree/mdast#html
28
- * @see https://github.github.com/gfm/#html-blocks
29
- */
30
- export const match: IMatchBlockHookCreator<T, IToken, IThis> = function () {
31
- return {
32
- isContainingBlock: false,
33
- eatOpener,
34
- eatAndInterruptPreviousSibling,
35
- eatContinuationText,
36
- }
37
-
38
- function eatOpener(line: Readonly<IPhrasingContentLine>): IResultOfEatOpener<T, IToken> {
39
- /**
40
- * The opening tag can be indented 1-3 spaces, but not 4.
41
- * @see https://github.github.com/gfm/#example-152
42
- */
43
- if (line.countOfPrecedeSpaces >= 4) return null
44
-
45
- const { nodePoints, startIndex, endIndex, firstNonWhitespaceIndex } = line
46
- if (
47
- firstNonWhitespaceIndex >= endIndex ||
48
- nodePoints[firstNonWhitespaceIndex].codePoint !== AsciiCodePoint.OPEN_ANGLE
49
- )
50
- return null
51
-
52
- const i = firstNonWhitespaceIndex + 1
53
- const startResult = eatStartCondition(nodePoints, i, endIndex)
54
- if (startResult == null) return null
55
-
56
- const { condition } = startResult
57
-
58
- /**
59
- * The end tag can occur on the same line as the start tag.
60
- * @see https://github.github.com/gfm/#example-145
61
- * @see https://github.github.com/gfm/#example-146
62
- */
63
- let saturated = false
64
- if (condition !== 6 && condition !== 7) {
65
- const endResult = eatEndCondition(nodePoints, startResult.nextIndex, endIndex, condition)
66
- if (endResult != null) saturated = true
67
- }
68
-
69
- const nextIndex = endIndex
70
- const token: IToken = {
71
- nodeType: HtmlType,
72
- position: {
73
- start: calcStartPoint(nodePoints, startIndex),
74
- end: calcEndPoint(nodePoints, nextIndex - 1),
75
- },
76
- condition,
77
- lines: [line],
78
- }
79
- return { token, nextIndex, saturated }
80
- }
81
-
82
- function eatAndInterruptPreviousSibling(
83
- line: Readonly<IPhrasingContentLine>,
84
- prevSiblingToken: Readonly<IBlockToken>,
85
- ): IResultOfEatAndInterruptPreviousSibling<T, IToken> {
86
- const result = eatOpener(line)
87
- if (result == null || result.token.condition === 7) return null
88
- const { token, nextIndex } = result
89
- return {
90
- token,
91
- nextIndex,
92
- remainingSibling: prevSiblingToken,
93
- }
94
- }
95
-
96
- function eatContinuationText(
97
- line: Readonly<IPhrasingContentLine>,
98
- token: IToken,
99
- ): IResultOfEatContinuationText {
100
- const { nodePoints, endIndex, firstNonWhitespaceIndex } = line
101
- const nextIndex = eatEndCondition(
102
- nodePoints,
103
- firstNonWhitespaceIndex,
104
- endIndex,
105
- token.condition,
106
- )
107
- if (nextIndex === -1) return { status: 'notMatched' }
108
-
109
- token.lines.push(line)
110
- if (nextIndex != null) return { status: 'closing', nextIndex: endIndex }
111
- return { status: 'opening', nextIndex: endIndex }
112
- }
113
-
114
- function eatStartCondition(
115
- nodePoints: ReadonlyArray<INodePoint>,
116
- startIndex: number,
117
- endIndex: number,
118
- ): { condition: HtmlBlockConditionType; nextIndex: number } | null {
119
- let nextIndex: number | null = null
120
- if (startIndex >= endIndex) return null
121
-
122
- // condition 2
123
- nextIndex = eatStartCondition2(nodePoints, startIndex, endIndex)
124
- if (nextIndex != null) return { nextIndex, condition: 2 }
125
-
126
- // condition 3
127
- nextIndex = eatStartCondition3(nodePoints, startIndex, endIndex)
128
- if (nextIndex != null) return { nextIndex, condition: 3 }
129
-
130
- // condition 4
131
- nextIndex = eatStartCondition4(nodePoints, startIndex, endIndex)
132
- if (nextIndex != null) return { nextIndex, condition: 4 }
133
-
134
- // condition 5
135
- nextIndex = eatStartCondition5(nodePoints, startIndex, endIndex)
136
- if (nextIndex != null) return { nextIndex, condition: 5 }
137
-
138
- if (nodePoints[startIndex].codePoint !== AsciiCodePoint.SLASH) {
139
- const tagNameStartIndex = startIndex
140
- const tagNameEndIndex = eatHTMLTagName(nodePoints, tagNameStartIndex, endIndex)
141
- if (tagNameEndIndex == null) return null
142
-
143
- const tagNameInterval: INodeInterval = {
144
- startIndex: tagNameStartIndex,
145
- endIndex: tagNameEndIndex,
146
- }
147
- const rawTagName = calcStringFromNodePoints(
148
- nodePoints,
149
- tagNameInterval.startIndex,
150
- tagNameInterval.endIndex,
151
- )
152
- const tagName = rawTagName.toLowerCase()
153
-
154
- // condition1
155
- nextIndex = eatStartCondition1(nodePoints, tagNameInterval.endIndex, endIndex, tagName)
156
- if (nextIndex != null) return { nextIndex, condition: 1 }
157
-
158
- // condition 6
159
- nextIndex = eatStartCondition6(nodePoints, tagNameInterval.endIndex, endIndex, tagName)
160
- if (nextIndex != null) return { nextIndex, condition: 6 }
161
-
162
- // condition 7
163
- nextIndex = eatStartCondition7(nodePoints, tagNameInterval.endIndex, endIndex, tagName, true)
164
- if (nextIndex != null) return { nextIndex, condition: 7 }
165
-
166
- // fallback
167
- return null
168
- }
169
-
170
- const tagNameStartIndex = startIndex + 1
171
- const tagNameEndIndex = eatHTMLTagName(nodePoints, tagNameStartIndex, endIndex)
172
- if (tagNameEndIndex == null) return null
173
-
174
- const tagNameInterval: INodeInterval = {
175
- startIndex: tagNameStartIndex,
176
- endIndex: tagNameEndIndex,
177
- }
178
- const rawTagName = calcStringFromNodePoints(
179
- nodePoints,
180
- tagNameInterval.startIndex,
181
- tagNameInterval.endIndex,
182
- )
183
- const tagName = rawTagName.toLowerCase()
184
-
185
- // condition 6
186
- nextIndex = eatStartCondition6(nodePoints, tagNameInterval.endIndex, endIndex, tagName)
187
- if (nextIndex != null) return { nextIndex, condition: 6 }
188
-
189
- // condition 7.
190
- nextIndex = eatStartCondition7(nodePoints, tagNameInterval.endIndex, endIndex, tagName, false)
191
- if (nextIndex != null) return { nextIndex, condition: 7 }
192
-
193
- // fallback
194
- return null
195
- }
196
-
197
- function eatEndCondition(
198
- nodePoints: ReadonlyArray<INodePoint>,
199
- startIndex: number,
200
- endIndex: number,
201
- condition: HtmlBlockConditionType,
202
- ): -1 | number | null {
203
- switch (condition) {
204
- case 1: {
205
- const nextIndex = eatEndCondition1(nodePoints, startIndex, endIndex)
206
- return nextIndex == null ? null : endIndex
207
- }
208
- case 2: {
209
- const nextIndex = eatEndCondition2(nodePoints, startIndex, endIndex)
210
- return nextIndex == null ? null : endIndex
211
- }
212
- case 3: {
213
- const nextIndex = eatEndCondition3(nodePoints, startIndex, endIndex)
214
- return nextIndex == null ? null : endIndex
215
- }
216
- case 4: {
217
- const nextIndex = eatEndCondition4(nodePoints, startIndex, endIndex)
218
- return nextIndex == null ? null : endIndex
219
- }
220
- case 5: {
221
- const nextIndex = eatEndCondition5(nodePoints, startIndex, endIndex)
222
- return nextIndex == null ? null : endIndex
223
- }
224
- case 6:
225
- case 7: {
226
- const firstNonWhitespaceIndex = eatOptionalWhitespaces(nodePoints, startIndex, endIndex)
227
- return firstNonWhitespaceIndex >= endIndex ? -1 : null
228
- }
229
- }
230
- }
231
- }
package/src/parse.ts DELETED
@@ -1,18 +0,0 @@
1
- import { calcStringFromNodePoints } from '@yozora/character'
2
- import type { IParseBlockHookCreator } from '@yozora/core-tokenizer'
3
- import { mergeContentLinesFaithfully } from '@yozora/core-tokenizer'
4
- import type { INode, IThis, IToken, T } from './types'
5
-
6
- export const parse: IParseBlockHookCreator<T, IToken, INode, IThis> = function (api) {
7
- return {
8
- parse: tokens =>
9
- tokens.map(token => {
10
- // Try to build phrasingContent
11
- const contents = mergeContentLinesFaithfully(token.lines)
12
- const node: INode = api.shouldReservePosition
13
- ? { type: 'html', position: token.position, value: calcStringFromNodePoints(contents) }
14
- : { type: 'html', value: calcStringFromNodePoints(contents) }
15
- return node
16
- }),
17
- }
18
- }
package/src/tokenizer.ts DELETED
@@ -1,32 +0,0 @@
1
- import type {
2
- IBlockTokenizer,
3
- IMatchBlockHookCreator,
4
- IParseBlockHookCreator,
5
- } from '@yozora/core-tokenizer'
6
- import { BaseBlockTokenizer, TokenizerPriority } from '@yozora/core-tokenizer'
7
- import { match } from './match'
8
- import { parse } from './parse'
9
- import type { INode, IThis, IToken, ITokenizerProps, T } from './types'
10
- import { uniqueName } from './types'
11
-
12
- /**
13
- * Lexical Analyzer for HtmlBlock.
14
- * @see https://github.com/syntax-tree/mdast#html
15
- * @see https://github.github.com/gfm/#html-blocks
16
- */
17
- export class HtmlBlockTokenizer
18
- extends BaseBlockTokenizer<T, IToken, INode, IThis>
19
- implements IBlockTokenizer<T, IToken, INode, IThis>
20
- {
21
- /* istanbul ignore next */
22
- constructor(props: ITokenizerProps = {}) {
23
- super({
24
- name: props.name ?? uniqueName,
25
- priority: props.priority ?? TokenizerPriority.ATOMIC,
26
- })
27
- }
28
-
29
- public override readonly match: IMatchBlockHookCreator<T, IToken, IThis> = match
30
-
31
- public override readonly parse: IParseBlockHookCreator<T, IToken, INode, IThis> = parse
32
- }
package/src/types.ts DELETED
@@ -1,74 +0,0 @@
1
- import type { Html, HtmlType } from '@yozora/ast'
2
- import type {
3
- IBaseBlockTokenizerProps,
4
- IPartialBlockToken,
5
- IPhrasingContentLine,
6
- ITokenizer,
7
- } from '@yozora/core-tokenizer'
8
-
9
- export type T = HtmlType
10
- export type INode = Html
11
- export const uniqueName = '@yozora/tokenizer-html-block'
12
-
13
- export type HtmlBlockConditionType = 1 | 2 | 3 | 4 | 5 | 6 | 7
14
-
15
- /**
16
- * Middle state during the whole match and parse phase.
17
- */
18
- export interface IToken extends IPartialBlockToken<T> {
19
- /**
20
- * Number of conditions defined in GFM:
21
- *
22
- * 1. Start condition: line begins with the string `<script`, `<pre`, or
23
- * `<style` (case-insensitive), followed by whitespace, the string `>`,
24
- * or the end of the line.
25
- *
26
- * End condition: line contains an end tag `</script>`, `</pre>`,
27
- * or `</style>` (case-insensitive; it need not match the start tag).
28
- *
29
- * 2. Start condition: line begins with the string `<!--`.
30
- * End condition: line contains the string `-->`.
31
- *
32
- * 3. Start condition: line begins with the string `<?`.
33
- * End condition: line contains the string `?>`.
34
- *
35
- * 4. Start condition: line begins with the string `<!` followed by an
36
- * uppercase ASCII letter.
37
- *
38
- * End condition: line contains the character >.
39
- *
40
- * 5. Start condition: line begins with the string `<![CDATA[`.
41
- * End condition: line contains the string `]]>`.
42
- *
43
- * 6. Start condition: line begins the string `<` or `</` followed by one of
44
- * the strings (case-insensitive) `address`, `article`, `aside`, `base`,
45
- * `basefont`, `blockquote`, `body`, `caption`, `center`, `col`, `colgroup`,
46
- * `dd`, `details`, `dialog`, `dir`, `div`, `dl`, `dt`, `fieldset`,
47
- * `figcaption`, `figure`, `footer`, `form`, `frame`, `frameset`, `h1`,
48
- * `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`, `html`, `iframe`,
49
- * `legend`, `li`, `link`, `main`, `menu`, `menuitem`, `nav`, `noframes`,
50
- * `ol`, `optgroup`, `option`, `p`, `param`, `section`, `source`, `summary`,
51
- * `table`, `tbody`, `td`, `tfoot`, `th`, `thead`, `title`, `tr`, `track`,
52
- * `ul`, followed by whitespace, the end of the line, the string `>`,
53
- * or the string `/>`.
54
- *
55
- * End condition: line is followed by a blank line.
56
- *
57
- * 7. Start condition: line begins with a complete open tag (with any tag name
58
- * other than `script`, `style`, or `pre`) or a complete closing tag,
59
- * followed only by whitespace or the end of the line.
60
- *
61
- * End condition: line is followed by a blank line.
62
- *
63
- * @see https://github.github.com/gfm/#start-condition
64
- */
65
- condition: HtmlBlockConditionType
66
- /**
67
- * Contents
68
- */
69
- lines: Array<Readonly<IPhrasingContentLine>>
70
- }
71
-
72
- export type IThis = ITokenizer
73
-
74
- export type ITokenizerProps = Partial<IBaseBlockTokenizerProps>
@@ -1,170 +0,0 @@
1
- import type { INodeInterval, INodePoint } from '@yozora/character'
2
- import {
3
- AsciiCodePoint,
4
- isAsciiDigitCharacter,
5
- isAsciiLetter,
6
- isWhitespaceCharacter,
7
- } from '@yozora/character'
8
- import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
9
-
10
- export interface RawHTMLAttribute {
11
- /**
12
- * Attribute name.
13
- */
14
- name: INodeInterval
15
- /**
16
- * Attribute value.
17
- */
18
- value?: INodeInterval
19
- }
20
-
21
- /**
22
- * An attribute consists of whitespace, an attribute name, and an optional
23
- * attribute value specification.
24
- *
25
- * @param nodePoints
26
- * @param startIndex
27
- * @param endIndex
28
- * @see https://github.github.com/gfm/#attribute
29
- */
30
- export function eatHTMLAttribute(
31
- nodePoints: ReadonlyArray<INodePoint>,
32
- startIndex: number,
33
- endIndex: number,
34
- ): { attribute: RawHTMLAttribute; nextIndex: number } | null {
35
- // eat whitespace.
36
- let i = eatOptionalWhitespaces(nodePoints, startIndex, endIndex)
37
- if (i <= startIndex || i >= endIndex) return null
38
-
39
- /**
40
- * Eat attribute name.
41
- *
42
- * An attribute name consists of an ASCII letter, `_`, or `:`, followed by
43
- * zero or more ASCII letters, digits, `_`, `.`, `:`, or `-`.
44
- * @see https://github.github.com/gfm/#attribute-name
45
- */
46
- const attrNameStartIndex = i
47
- let c = nodePoints[i].codePoint
48
- if (!isAsciiLetter(c) && c !== AsciiCodePoint.UNDERSCORE && c !== AsciiCodePoint.COLON)
49
- return null
50
- for (i = attrNameStartIndex + 1; i < endIndex; ++i) {
51
- c = nodePoints[i].codePoint
52
- if (
53
- isAsciiLetter(c) ||
54
- isAsciiDigitCharacter(c) ||
55
- c === AsciiCodePoint.UNDERSCORE ||
56
- c === AsciiCodePoint.DOT ||
57
- c === AsciiCodePoint.COLON ||
58
- c === AsciiCodePoint.MINUS_SIGN
59
- )
60
- continue
61
- break
62
- }
63
- const attrNameEndIndex = i
64
-
65
- const attribute: RawHTMLAttribute = {
66
- name: {
67
- startIndex: attrNameStartIndex,
68
- endIndex: attrNameEndIndex,
69
- },
70
- }
71
-
72
- /**
73
- * Eat attribute value.
74
- *
75
- * An attribute value specification consists of optional whitespace, a `=`
76
- * character, optional whitespace, and an attribute value.
77
- *
78
- * An attribute value consists of an unquoted attribute value, a single-quoted
79
- * attribute value, or a double-quoted attribute value.
80
- *
81
- * @see https://github.github.com/gfm/#attribute-value-specification
82
- * @see https://github.github.com/gfm/#attribute-value
83
- */
84
- i = eatOptionalWhitespaces(nodePoints, attrNameEndIndex, endIndex)
85
- if (i < endIndex && nodePoints[i].codePoint === AsciiCodePoint.EQUALS_SIGN) {
86
- i = eatOptionalWhitespaces(nodePoints, i + 1, endIndex)
87
- if (i < endIndex) {
88
- const mark = nodePoints[i].codePoint
89
- switch (mark) {
90
- /**
91
- * A double-quoted attribute value consists of `"`, zero or more
92
- * characters not including `"`, and a final `"`.
93
- * @see https://github.github.com/gfm/#double-quoted-attribute-value
94
- */
95
- case AsciiCodePoint.DOUBLE_QUOTE: {
96
- const attrValueStartIndex = i + 1
97
- for (i = attrValueStartIndex; i < endIndex; ++i) {
98
- c = nodePoints[i].codePoint
99
- if (c === AsciiCodePoint.DOUBLE_QUOTE) break
100
- }
101
- const attrValueEndIndex = i
102
- if (i < endIndex && nodePoints[i].codePoint === AsciiCodePoint.DOUBLE_QUOTE) {
103
- attribute.value = {
104
- startIndex: attrValueStartIndex,
105
- endIndex: attrValueEndIndex,
106
- }
107
- i += 1
108
- }
109
- break
110
- }
111
- /**
112
- * A single-quoted attribute value consists of `'`, zero or more
113
- * characters not including `'`, and a final `'`.
114
- * @see https://github.github.com/gfm/#single-quoted-attribute-value
115
- */
116
- case AsciiCodePoint.SINGLE_QUOTE: {
117
- const attrValueStartIndex = i + 1
118
- for (i = attrValueStartIndex; i < endIndex; ++i) {
119
- c = nodePoints[i].codePoint
120
- if (c === AsciiCodePoint.SINGLE_QUOTE) break
121
- }
122
- const attrValueEndIndex = i
123
- if (i < endIndex && nodePoints[i].codePoint === AsciiCodePoint.SINGLE_QUOTE) {
124
- attribute.value = {
125
- startIndex: attrValueStartIndex,
126
- endIndex: attrValueEndIndex,
127
- }
128
- i += 1
129
- }
130
- break
131
- }
132
- /**
133
- * An unquoted attribute value is a nonempty string of characters not
134
- * including whitespace, `"`, `'`, `=`, `<`, `>`, or `\``.
135
- * @see https://github.github.com/gfm/#unquoted-attribute-value
136
- */
137
- default: {
138
- const attrValueStartIndex = i
139
- for (; i < endIndex; ++i) {
140
- c = nodePoints[i].codePoint
141
- if (
142
- isWhitespaceCharacter(c) ||
143
- c === AsciiCodePoint.DOUBLE_QUOTE ||
144
- c === AsciiCodePoint.SINGLE_QUOTE ||
145
- c === AsciiCodePoint.EQUALS_SIGN ||
146
- c === AsciiCodePoint.OPEN_ANGLE ||
147
- c === AsciiCodePoint.CLOSE_ANGLE ||
148
- c === AsciiCodePoint.BACKTICK
149
- )
150
- break
151
- }
152
- const attrValueEndIndex = i
153
- if (attrValueEndIndex > attrValueStartIndex) {
154
- attribute.value = {
155
- startIndex: attrValueStartIndex,
156
- endIndex: attrValueEndIndex,
157
- }
158
- }
159
- break
160
- }
161
- }
162
-
163
- if (attribute.value != null) {
164
- return { attribute, nextIndex: i }
165
- }
166
- }
167
- }
168
-
169
- return { attribute, nextIndex: attrNameEndIndex }
170
- }
@@ -1,27 +0,0 @@
1
- import type { INodePoint } from '@yozora/character'
2
- import { AsciiCodePoint, isAsciiDigitCharacter, isAsciiLetter } from '@yozora/character'
3
-
4
- /**
5
- * A tag name consists of an ASCII letter followed by zero or more ASCII
6
- * letters, digits, or hyphens (-).
7
- *
8
- * @param nodePoints
9
- * @param startIndex
10
- * @param endIndex
11
- * @see https://github.github.com/gfm/#tag-name
12
- */
13
- export function eatHTMLTagName(
14
- nodePoints: ReadonlyArray<INodePoint>,
15
- startIndex: number,
16
- endIndex: number,
17
- ): number | null {
18
- if (startIndex >= endIndex || !isAsciiLetter(nodePoints[startIndex].codePoint)) return null
19
-
20
- let i = startIndex
21
- for (; i < endIndex; ++i) {
22
- const c = nodePoints[i].codePoint
23
- if (isAsciiLetter(c) || isAsciiDigitCharacter(c) || c === AsciiCodePoint.MINUS_SIGN) continue
24
- return i
25
- }
26
- return i
27
- }