@yozora/tokenizer-html-block 2.1.2 → 2.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -6
- package/src/conditions/c1.ts +0 -79
- package/src/conditions/c2.ts +0 -55
- package/src/conditions/c3.ts +0 -48
- package/src/conditions/c4.ts +0 -48
- package/src/conditions/c5.ts +0 -59
- package/src/conditions/c6.ts +0 -109
- package/src/conditions/c7.ts +0 -54
- package/src/index.ts +0 -11
- package/src/match.ts +0 -231
- package/src/parse.ts +0 -18
- package/src/tokenizer.ts +0 -32
- package/src/types.ts +0 -74
- package/src/util/eat-html-attribute.ts +0 -170
- package/src/util/eat-html-tagname.ts +0 -27
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yozora/tokenizer-html-block",
|
|
3
|
-
"version": "2.1.
|
|
3
|
+
"version": "2.1.4",
|
|
4
4
|
"author": {
|
|
5
5
|
"name": "guanghechen",
|
|
6
6
|
"url": "https://github.com/guanghechen/"
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
},
|
|
28
28
|
"files": [
|
|
29
29
|
"lib/",
|
|
30
|
-
"
|
|
30
|
+
"lib/**/*.map",
|
|
31
31
|
"package.json",
|
|
32
32
|
"CHANGELOG.md",
|
|
33
33
|
"LICENSE",
|
|
@@ -39,9 +39,9 @@
|
|
|
39
39
|
"test": "cross-env TS_NODE_FILES=true NODE_OPTIONS=--experimental-vm-modules jest --config ../../jest.config.mjs --rootDir ."
|
|
40
40
|
},
|
|
41
41
|
"dependencies": {
|
|
42
|
-
"@yozora/ast": "^2.1.
|
|
43
|
-
"@yozora/character": "^2.1.
|
|
44
|
-
"@yozora/core-tokenizer": "^2.1.
|
|
42
|
+
"@yozora/ast": "^2.1.4",
|
|
43
|
+
"@yozora/character": "^2.1.4",
|
|
44
|
+
"@yozora/core-tokenizer": "^2.1.4"
|
|
45
45
|
},
|
|
46
|
-
"gitHead": "
|
|
46
|
+
"gitHead": "aa464ed1e3cd84892773a833910cfc53a556bf5f"
|
|
47
47
|
}
|
package/src/conditions/c1.ts
DELETED
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
import type { INodePoint } from '@yozora/character'
|
|
2
|
-
import { AsciiCodePoint, calcStringFromNodePoints, isWhitespaceCharacter } from '@yozora/character'
|
|
3
|
-
import { eatHTMLTagName } from '../util/eat-html-tagname'
|
|
4
|
-
|
|
5
|
-
const includedTags = ['pre', 'script', 'style']
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Eat block html start condition 1:
|
|
9
|
-
*
|
|
10
|
-
* line begins with the string `<script`, `<pre`, or
|
|
11
|
-
* `<style` (case-insensitive), followed by whitespace, the string `>`,
|
|
12
|
-
* or the end of the line.
|
|
13
|
-
*
|
|
14
|
-
* @param nodePoints
|
|
15
|
-
* @param startIndex
|
|
16
|
-
* @param endIndex
|
|
17
|
-
* @see https://github.github.com/gfm/#start-condition
|
|
18
|
-
*/
|
|
19
|
-
export function eatStartCondition1(
|
|
20
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
21
|
-
startIndex: number,
|
|
22
|
-
endIndex: number,
|
|
23
|
-
tagName: string,
|
|
24
|
-
): number | null {
|
|
25
|
-
if (!includedTags.includes(tagName)) return null
|
|
26
|
-
if (startIndex >= endIndex) return endIndex
|
|
27
|
-
|
|
28
|
-
const c = nodePoints[startIndex].codePoint
|
|
29
|
-
if (isWhitespaceCharacter(c) || c === AsciiCodePoint.CLOSE_ANGLE) {
|
|
30
|
-
return startIndex + 1
|
|
31
|
-
}
|
|
32
|
-
return null
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
/**
|
|
36
|
-
* Eat block html end condition 1:
|
|
37
|
-
*
|
|
38
|
-
* line contains an end tag `</script>`, `</pre>`,
|
|
39
|
-
* or `</style>` (case-insensitive; it need not match the start tag).
|
|
40
|
-
*
|
|
41
|
-
* @param nodePoints
|
|
42
|
-
* @param startIndex
|
|
43
|
-
* @param endIndex
|
|
44
|
-
* @see https://github.github.com/gfm/#start-condition
|
|
45
|
-
*/
|
|
46
|
-
export function eatEndCondition1(
|
|
47
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
48
|
-
startIndex: number,
|
|
49
|
-
endIndex: number,
|
|
50
|
-
): number | null {
|
|
51
|
-
for (let i = startIndex; i < endIndex; ++i) {
|
|
52
|
-
if (
|
|
53
|
-
nodePoints[i].codePoint === AsciiCodePoint.OPEN_ANGLE &&
|
|
54
|
-
i + 3 < endIndex &&
|
|
55
|
-
nodePoints[i + 1].codePoint === AsciiCodePoint.SLASH
|
|
56
|
-
) {
|
|
57
|
-
const tagNameStartIndex = i + 2
|
|
58
|
-
const tagNameEndIndex = eatHTMLTagName(nodePoints, tagNameStartIndex, endIndex)
|
|
59
|
-
if (
|
|
60
|
-
tagNameEndIndex == null ||
|
|
61
|
-
tagNameEndIndex >= endIndex ||
|
|
62
|
-
nodePoints[tagNameEndIndex].codePoint !== AsciiCodePoint.CLOSE_ANGLE
|
|
63
|
-
) {
|
|
64
|
-
i += 1
|
|
65
|
-
continue
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
const rawTagName = calcStringFromNodePoints(
|
|
69
|
-
nodePoints,
|
|
70
|
-
tagNameStartIndex,
|
|
71
|
-
tagNameEndIndex,
|
|
72
|
-
true,
|
|
73
|
-
)
|
|
74
|
-
const tagName = rawTagName.toLowerCase()
|
|
75
|
-
if (includedTags.includes(tagName)) return tagNameEndIndex
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
return null
|
|
79
|
-
}
|
package/src/conditions/c2.ts
DELETED
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
import type { INodePoint } from '@yozora/character'
|
|
2
|
-
import { AsciiCodePoint } from '@yozora/character'
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Eat block html start condition 2:
|
|
6
|
-
*
|
|
7
|
-
* Line begins with the string `<!--`.
|
|
8
|
-
*
|
|
9
|
-
* @param nodePoints
|
|
10
|
-
* @param startIndex
|
|
11
|
-
* @param endIndex
|
|
12
|
-
* @see https://github.github.com/gfm/#start-condition
|
|
13
|
-
*/
|
|
14
|
-
export function eatStartCondition2(
|
|
15
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
16
|
-
startIndex: number,
|
|
17
|
-
endIndex: number,
|
|
18
|
-
): number | null {
|
|
19
|
-
const i = startIndex
|
|
20
|
-
if (
|
|
21
|
-
i + 2 < endIndex &&
|
|
22
|
-
nodePoints[i].codePoint === AsciiCodePoint.EXCLAMATION_MARK &&
|
|
23
|
-
nodePoints[i + 1].codePoint === AsciiCodePoint.MINUS_SIGN &&
|
|
24
|
-
nodePoints[i + 2].codePoint === AsciiCodePoint.MINUS_SIGN
|
|
25
|
-
)
|
|
26
|
-
return i + 3
|
|
27
|
-
return null
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
/**
|
|
31
|
-
* Eat block html end condition 2:
|
|
32
|
-
*
|
|
33
|
-
* line contains the string `-->`.
|
|
34
|
-
*
|
|
35
|
-
* @param nodePoints
|
|
36
|
-
* @param startIndex
|
|
37
|
-
* @param endIndex
|
|
38
|
-
* @see https://github.github.com/gfm/#start-condition
|
|
39
|
-
*/
|
|
40
|
-
export function eatEndCondition2(
|
|
41
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
42
|
-
startIndex: number,
|
|
43
|
-
endIndex: number,
|
|
44
|
-
): number | null {
|
|
45
|
-
for (let i = startIndex; i < endIndex; ++i) {
|
|
46
|
-
if (
|
|
47
|
-
nodePoints[i].codePoint === AsciiCodePoint.MINUS_SIGN &&
|
|
48
|
-
i + 2 < endIndex &&
|
|
49
|
-
nodePoints[i + 1].codePoint === AsciiCodePoint.MINUS_SIGN &&
|
|
50
|
-
nodePoints[i + 2].codePoint === AsciiCodePoint.CLOSE_ANGLE
|
|
51
|
-
)
|
|
52
|
-
return i + 3
|
|
53
|
-
}
|
|
54
|
-
return null
|
|
55
|
-
}
|
package/src/conditions/c3.ts
DELETED
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
import type { INodePoint } from '@yozora/character'
|
|
2
|
-
import { AsciiCodePoint } from '@yozora/character'
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Eat block html start condition 3:
|
|
6
|
-
*
|
|
7
|
-
* line begins with the string `<?`.
|
|
8
|
-
*
|
|
9
|
-
* @param nodePoints
|
|
10
|
-
* @param startIndex
|
|
11
|
-
* @param endIndex
|
|
12
|
-
* @see https://github.github.com/gfm/#start-condition
|
|
13
|
-
*/
|
|
14
|
-
export function eatStartCondition3(
|
|
15
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
16
|
-
startIndex: number,
|
|
17
|
-
endIndex: number,
|
|
18
|
-
): number | null {
|
|
19
|
-
const i = startIndex
|
|
20
|
-
if (i < endIndex && nodePoints[i].codePoint === AsciiCodePoint.QUESTION_MARK) return i + 1
|
|
21
|
-
return null
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
/**
|
|
25
|
-
* Eat block html end condition 3:
|
|
26
|
-
*
|
|
27
|
-
* line contains the string `?>`.
|
|
28
|
-
*
|
|
29
|
-
* @param nodePoints
|
|
30
|
-
* @param startIndex
|
|
31
|
-
* @param endIndex
|
|
32
|
-
* @see https://github.github.com/gfm/#start-condition
|
|
33
|
-
*/
|
|
34
|
-
export function eatEndCondition3(
|
|
35
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
36
|
-
startIndex: number,
|
|
37
|
-
endIndex: number,
|
|
38
|
-
): number | null {
|
|
39
|
-
for (let i = startIndex; i < endIndex; ++i) {
|
|
40
|
-
if (
|
|
41
|
-
nodePoints[i].codePoint === AsciiCodePoint.QUESTION_MARK &&
|
|
42
|
-
i + 1 < endIndex &&
|
|
43
|
-
nodePoints[i + 1].codePoint === AsciiCodePoint.CLOSE_ANGLE
|
|
44
|
-
)
|
|
45
|
-
return i + 2
|
|
46
|
-
}
|
|
47
|
-
return null
|
|
48
|
-
}
|
package/src/conditions/c4.ts
DELETED
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
import type { INodePoint } from '@yozora/character'
|
|
2
|
-
import { AsciiCodePoint, isAsciiUpperLetter } from '@yozora/character'
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Eat block html start condition 4:
|
|
6
|
-
*
|
|
7
|
-
* line begins with the string `<!` followed by an uppercase ASCII letter.
|
|
8
|
-
*
|
|
9
|
-
* @param nodePoints
|
|
10
|
-
* @param startIndex
|
|
11
|
-
* @param endIndex
|
|
12
|
-
* @see https://github.github.com/gfm/#start-condition
|
|
13
|
-
*/
|
|
14
|
-
export function eatStartCondition4(
|
|
15
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
16
|
-
startIndex: number,
|
|
17
|
-
endIndex: number,
|
|
18
|
-
): number | null {
|
|
19
|
-
const i = startIndex
|
|
20
|
-
if (
|
|
21
|
-
i + 1 < endIndex &&
|
|
22
|
-
nodePoints[i].codePoint === AsciiCodePoint.EXCLAMATION_MARK &&
|
|
23
|
-
isAsciiUpperLetter(nodePoints[i + 1].codePoint)
|
|
24
|
-
)
|
|
25
|
-
return i + 2
|
|
26
|
-
return null
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
/**
|
|
30
|
-
* Eat block html end condition 4:
|
|
31
|
-
*
|
|
32
|
-
* line contains the character >.
|
|
33
|
-
*
|
|
34
|
-
* @param nodePoints
|
|
35
|
-
* @param startIndex
|
|
36
|
-
* @param endIndex
|
|
37
|
-
* @see https://github.github.com/gfm/#start-condition
|
|
38
|
-
*/
|
|
39
|
-
export function eatEndCondition4(
|
|
40
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
41
|
-
startIndex: number,
|
|
42
|
-
endIndex: number,
|
|
43
|
-
): number | null {
|
|
44
|
-
for (let i = startIndex; i < endIndex; ++i) {
|
|
45
|
-
if (nodePoints[i].codePoint === AsciiCodePoint.CLOSE_ANGLE) return i + 1
|
|
46
|
-
}
|
|
47
|
-
return null
|
|
48
|
-
}
|
package/src/conditions/c5.ts
DELETED
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
import type { INodePoint } from '@yozora/character'
|
|
2
|
-
import { AsciiCodePoint } from '@yozora/character'
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Eat block html start condition 5:
|
|
6
|
-
*
|
|
7
|
-
* line begins with the string `<![CDATA[`.
|
|
8
|
-
*
|
|
9
|
-
* @param nodePoints
|
|
10
|
-
* @param startIndex
|
|
11
|
-
* @param endIndex
|
|
12
|
-
* @see https://github.github.com/gfm/#start-condition
|
|
13
|
-
*/
|
|
14
|
-
export function eatStartCondition5(
|
|
15
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
16
|
-
startIndex: number,
|
|
17
|
-
endIndex: number,
|
|
18
|
-
): number | null {
|
|
19
|
-
const i = startIndex
|
|
20
|
-
if (
|
|
21
|
-
i + 6 < endIndex &&
|
|
22
|
-
nodePoints[i].codePoint === AsciiCodePoint.EXCLAMATION_MARK &&
|
|
23
|
-
nodePoints[i + 1].codePoint === AsciiCodePoint.OPEN_BRACKET &&
|
|
24
|
-
nodePoints[i + 2].codePoint === AsciiCodePoint.UPPERCASE_C &&
|
|
25
|
-
nodePoints[i + 3].codePoint === AsciiCodePoint.UPPERCASE_D &&
|
|
26
|
-
nodePoints[i + 4].codePoint === AsciiCodePoint.UPPERCASE_A &&
|
|
27
|
-
nodePoints[i + 5].codePoint === AsciiCodePoint.UPPERCASE_T &&
|
|
28
|
-
nodePoints[i + 6].codePoint === AsciiCodePoint.UPPERCASE_A
|
|
29
|
-
)
|
|
30
|
-
return i + 7
|
|
31
|
-
return null
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* Eat block html end condition 5:
|
|
36
|
-
*
|
|
37
|
-
* line contains the string `]]>`.
|
|
38
|
-
*
|
|
39
|
-
* @param nodePoints
|
|
40
|
-
* @param startIndex
|
|
41
|
-
* @param endIndex
|
|
42
|
-
* @see https://github.github.com/gfm/#start-condition
|
|
43
|
-
*/
|
|
44
|
-
export function eatEndCondition5(
|
|
45
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
46
|
-
startIndex: number,
|
|
47
|
-
endIndex: number,
|
|
48
|
-
): number | null {
|
|
49
|
-
for (let i = startIndex; i < endIndex; ++i) {
|
|
50
|
-
if (
|
|
51
|
-
nodePoints[i].codePoint === AsciiCodePoint.CLOSE_BRACKET &&
|
|
52
|
-
i + 2 < endIndex &&
|
|
53
|
-
nodePoints[i + 1].codePoint === AsciiCodePoint.CLOSE_BRACKET &&
|
|
54
|
-
nodePoints[i + 2].codePoint === AsciiCodePoint.CLOSE_ANGLE
|
|
55
|
-
)
|
|
56
|
-
return i + 3
|
|
57
|
-
}
|
|
58
|
-
return null
|
|
59
|
-
}
|
package/src/conditions/c6.ts
DELETED
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
import type { INodePoint } from '@yozora/character'
|
|
2
|
-
import { AsciiCodePoint, isWhitespaceCharacter } from '@yozora/character'
|
|
3
|
-
|
|
4
|
-
const includedTags = [
|
|
5
|
-
'address',
|
|
6
|
-
'article',
|
|
7
|
-
'aside',
|
|
8
|
-
'base',
|
|
9
|
-
'basefont',
|
|
10
|
-
'blockquote',
|
|
11
|
-
'body',
|
|
12
|
-
'caption',
|
|
13
|
-
'center',
|
|
14
|
-
'col',
|
|
15
|
-
'colgroup',
|
|
16
|
-
'dd',
|
|
17
|
-
'details',
|
|
18
|
-
'dialog',
|
|
19
|
-
'dir',
|
|
20
|
-
'div',
|
|
21
|
-
'dl',
|
|
22
|
-
'dt',
|
|
23
|
-
'fieldset',
|
|
24
|
-
'figcaption',
|
|
25
|
-
'figure',
|
|
26
|
-
'footer',
|
|
27
|
-
'form',
|
|
28
|
-
'frame',
|
|
29
|
-
'frameset',
|
|
30
|
-
'h1',
|
|
31
|
-
'h2',
|
|
32
|
-
'h3',
|
|
33
|
-
'h4',
|
|
34
|
-
'h5',
|
|
35
|
-
'h6',
|
|
36
|
-
'head',
|
|
37
|
-
'header',
|
|
38
|
-
'hr',
|
|
39
|
-
'html',
|
|
40
|
-
'iframe',
|
|
41
|
-
'legend',
|
|
42
|
-
'li',
|
|
43
|
-
'link',
|
|
44
|
-
'main',
|
|
45
|
-
'menu',
|
|
46
|
-
'menuitem',
|
|
47
|
-
'nav',
|
|
48
|
-
'noframes',
|
|
49
|
-
'ol',
|
|
50
|
-
'optgroup',
|
|
51
|
-
'option',
|
|
52
|
-
'p',
|
|
53
|
-
'param',
|
|
54
|
-
'section',
|
|
55
|
-
'source',
|
|
56
|
-
'summary',
|
|
57
|
-
'table',
|
|
58
|
-
'tbody',
|
|
59
|
-
'td',
|
|
60
|
-
'tfoot',
|
|
61
|
-
'th',
|
|
62
|
-
'thead',
|
|
63
|
-
'title',
|
|
64
|
-
'tr',
|
|
65
|
-
'track',
|
|
66
|
-
'ul',
|
|
67
|
-
]
|
|
68
|
-
|
|
69
|
-
/**
|
|
70
|
-
* Eat block html start condition 6:
|
|
71
|
-
*
|
|
72
|
-
* line begins the string `<` or `</` followed by one of
|
|
73
|
-
* the strings (case-insensitive) `address`, `article`, `aside`, `base`,
|
|
74
|
-
* `basefont`, `blockquote`, `body`, `caption`, `center`, `col`, `colgroup`,
|
|
75
|
-
* `dd`, `details`, `dialog`, `dir`, `div`, `dl`, `dt`, `fieldset`,
|
|
76
|
-
* `figcaption`, `figure`, `footer`, `form`, `frame`, `frameset`, `h1`,
|
|
77
|
-
* `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`, `html`, `iframe`,
|
|
78
|
-
* `legend`, `li`, `link`, `main`, `menu`, `menuitem`, `nav`, `noframes`,
|
|
79
|
-
* `ol`, `optgroup`, `option`, `p`, `param`, `section`, `source`, `summary`,
|
|
80
|
-
* `table`, `tbody`, `td`, `tfoot`, `th`, `thead`, `title`, `tr`, `track`,
|
|
81
|
-
* `ul`, followed by whitespace, the end of the line, the string `>`,
|
|
82
|
-
* or the string `/>`.
|
|
83
|
-
*
|
|
84
|
-
* @param nodePoints
|
|
85
|
-
* @param startIndex
|
|
86
|
-
* @param endIndex
|
|
87
|
-
* @see https://github.github.com/gfm/#start-condition
|
|
88
|
-
*/
|
|
89
|
-
export function eatStartCondition6(
|
|
90
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
91
|
-
startIndex: number,
|
|
92
|
-
endIndex: number,
|
|
93
|
-
tagName: string,
|
|
94
|
-
): number | null {
|
|
95
|
-
if (!includedTags.includes(tagName)) return null
|
|
96
|
-
if (startIndex >= endIndex) return endIndex
|
|
97
|
-
|
|
98
|
-
const c = nodePoints[startIndex].codePoint
|
|
99
|
-
if (isWhitespaceCharacter(c) || c === AsciiCodePoint.CLOSE_ANGLE) return startIndex + 1
|
|
100
|
-
|
|
101
|
-
if (
|
|
102
|
-
c === AsciiCodePoint.SLASH &&
|
|
103
|
-
startIndex + 1 < endIndex &&
|
|
104
|
-
nodePoints[startIndex + 1].codePoint === AsciiCodePoint.CLOSE_ANGLE
|
|
105
|
-
)
|
|
106
|
-
return startIndex + 2
|
|
107
|
-
|
|
108
|
-
return null
|
|
109
|
-
}
|
package/src/conditions/c7.ts
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import type { INodePoint } from '@yozora/character'
|
|
2
|
-
import { AsciiCodePoint, isWhitespaceCharacter } from '@yozora/character'
|
|
3
|
-
import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
|
|
4
|
-
import { eatHTMLAttribute } from '../util/eat-html-attribute'
|
|
5
|
-
|
|
6
|
-
const excludedTags = ['pre', 'script', 'style']
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Eat block html start condition 7:
|
|
10
|
-
*
|
|
11
|
-
* line begins with a complete open tag (with any tag name
|
|
12
|
-
* other than `script`, `style`, or `pre`) or a complete closing tag,
|
|
13
|
-
* followed only by whitespace or the end of the line
|
|
14
|
-
*
|
|
15
|
-
* @param nodePoints
|
|
16
|
-
* @param startIndex
|
|
17
|
-
* @param endIndex
|
|
18
|
-
* @see https://github.github.com/gfm/#start-condition
|
|
19
|
-
*/
|
|
20
|
-
export function eatStartCondition7(
|
|
21
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
22
|
-
startIndex: number,
|
|
23
|
-
endIndex: number,
|
|
24
|
-
tagName: string,
|
|
25
|
-
potentialOpenTag: boolean,
|
|
26
|
-
): number | null {
|
|
27
|
-
if (excludedTags.includes(tagName) || startIndex >= endIndex) return null
|
|
28
|
-
|
|
29
|
-
let i = startIndex
|
|
30
|
-
|
|
31
|
-
if (potentialOpenTag) {
|
|
32
|
-
// Try to resolve an open tag.
|
|
33
|
-
for (; i < endIndex; ) {
|
|
34
|
-
const result = eatHTMLAttribute(nodePoints, i, endIndex)
|
|
35
|
-
if (result == null) break
|
|
36
|
-
i = result.nextIndex
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
i = eatOptionalWhitespaces(nodePoints, i, endIndex)
|
|
40
|
-
if (i >= endIndex) return null
|
|
41
|
-
|
|
42
|
-
if (nodePoints[i].codePoint === AsciiCodePoint.SLASH) i += 1
|
|
43
|
-
} else {
|
|
44
|
-
// Try to resolve a closing tag.
|
|
45
|
-
i = eatOptionalWhitespaces(nodePoints, startIndex, endIndex)
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
if (i >= endIndex || nodePoints[i].codePoint !== AsciiCodePoint.CLOSE_ANGLE) return null
|
|
49
|
-
|
|
50
|
-
for (i += 1; i < endIndex; ++i) {
|
|
51
|
-
if (!isWhitespaceCharacter(nodePoints[i].codePoint)) return null
|
|
52
|
-
}
|
|
53
|
-
return endIndex
|
|
54
|
-
}
|
package/src/index.ts
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
export * from './util/eat-html-attribute'
|
|
2
|
-
export * from './util/eat-html-tagname'
|
|
3
|
-
export { match as htmlBlockMatch } from './match'
|
|
4
|
-
export { parse as htmlBlockParse } from './parse'
|
|
5
|
-
export { HtmlBlockTokenizer, HtmlBlockTokenizer as default } from './tokenizer'
|
|
6
|
-
export { uniqueName as HtmlBlockTokenizerName } from './types'
|
|
7
|
-
export type {
|
|
8
|
-
IThis as IHtmlBlockHookContext,
|
|
9
|
-
IToken as IHtmlBlockToken,
|
|
10
|
-
ITokenizerProps as IHtmlBlockTokenizerProps,
|
|
11
|
-
} from './types'
|
package/src/match.ts
DELETED
|
@@ -1,231 +0,0 @@
|
|
|
1
|
-
import { HtmlType } from '@yozora/ast'
|
|
2
|
-
import type { INodeInterval, INodePoint } from '@yozora/character'
|
|
3
|
-
import { AsciiCodePoint, calcStringFromNodePoints } from '@yozora/character'
|
|
4
|
-
import type {
|
|
5
|
-
IBlockToken,
|
|
6
|
-
IMatchBlockHookCreator,
|
|
7
|
-
IPhrasingContentLine,
|
|
8
|
-
IResultOfEatAndInterruptPreviousSibling,
|
|
9
|
-
IResultOfEatContinuationText,
|
|
10
|
-
IResultOfEatOpener,
|
|
11
|
-
} from '@yozora/core-tokenizer'
|
|
12
|
-
import { calcEndPoint, calcStartPoint, eatOptionalWhitespaces } from '@yozora/core-tokenizer'
|
|
13
|
-
import { eatEndCondition1, eatStartCondition1 } from './conditions/c1'
|
|
14
|
-
import { eatEndCondition2, eatStartCondition2 } from './conditions/c2'
|
|
15
|
-
import { eatEndCondition3, eatStartCondition3 } from './conditions/c3'
|
|
16
|
-
import { eatEndCondition4, eatStartCondition4 } from './conditions/c4'
|
|
17
|
-
import { eatEndCondition5, eatStartCondition5 } from './conditions/c5'
|
|
18
|
-
import { eatStartCondition6 } from './conditions/c6'
|
|
19
|
-
import { eatStartCondition7 } from './conditions/c7'
|
|
20
|
-
import type { HtmlBlockConditionType, IThis, IToken, T } from './types'
|
|
21
|
-
import { eatHTMLTagName } from './util/eat-html-tagname'
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* An HTML block is a group of lines that is treated as raw HTML (and will not
|
|
25
|
-
* be escaped in HTML output).
|
|
26
|
-
*
|
|
27
|
-
* @see https://github.com/syntax-tree/mdast#html
|
|
28
|
-
* @see https://github.github.com/gfm/#html-blocks
|
|
29
|
-
*/
|
|
30
|
-
export const match: IMatchBlockHookCreator<T, IToken, IThis> = function () {
|
|
31
|
-
return {
|
|
32
|
-
isContainingBlock: false,
|
|
33
|
-
eatOpener,
|
|
34
|
-
eatAndInterruptPreviousSibling,
|
|
35
|
-
eatContinuationText,
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
function eatOpener(line: Readonly<IPhrasingContentLine>): IResultOfEatOpener<T, IToken> {
|
|
39
|
-
/**
|
|
40
|
-
* The opening tag can be indented 1-3 spaces, but not 4.
|
|
41
|
-
* @see https://github.github.com/gfm/#example-152
|
|
42
|
-
*/
|
|
43
|
-
if (line.countOfPrecedeSpaces >= 4) return null
|
|
44
|
-
|
|
45
|
-
const { nodePoints, startIndex, endIndex, firstNonWhitespaceIndex } = line
|
|
46
|
-
if (
|
|
47
|
-
firstNonWhitespaceIndex >= endIndex ||
|
|
48
|
-
nodePoints[firstNonWhitespaceIndex].codePoint !== AsciiCodePoint.OPEN_ANGLE
|
|
49
|
-
)
|
|
50
|
-
return null
|
|
51
|
-
|
|
52
|
-
const i = firstNonWhitespaceIndex + 1
|
|
53
|
-
const startResult = eatStartCondition(nodePoints, i, endIndex)
|
|
54
|
-
if (startResult == null) return null
|
|
55
|
-
|
|
56
|
-
const { condition } = startResult
|
|
57
|
-
|
|
58
|
-
/**
|
|
59
|
-
* The end tag can occur on the same line as the start tag.
|
|
60
|
-
* @see https://github.github.com/gfm/#example-145
|
|
61
|
-
* @see https://github.github.com/gfm/#example-146
|
|
62
|
-
*/
|
|
63
|
-
let saturated = false
|
|
64
|
-
if (condition !== 6 && condition !== 7) {
|
|
65
|
-
const endResult = eatEndCondition(nodePoints, startResult.nextIndex, endIndex, condition)
|
|
66
|
-
if (endResult != null) saturated = true
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
const nextIndex = endIndex
|
|
70
|
-
const token: IToken = {
|
|
71
|
-
nodeType: HtmlType,
|
|
72
|
-
position: {
|
|
73
|
-
start: calcStartPoint(nodePoints, startIndex),
|
|
74
|
-
end: calcEndPoint(nodePoints, nextIndex - 1),
|
|
75
|
-
},
|
|
76
|
-
condition,
|
|
77
|
-
lines: [line],
|
|
78
|
-
}
|
|
79
|
-
return { token, nextIndex, saturated }
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
function eatAndInterruptPreviousSibling(
|
|
83
|
-
line: Readonly<IPhrasingContentLine>,
|
|
84
|
-
prevSiblingToken: Readonly<IBlockToken>,
|
|
85
|
-
): IResultOfEatAndInterruptPreviousSibling<T, IToken> {
|
|
86
|
-
const result = eatOpener(line)
|
|
87
|
-
if (result == null || result.token.condition === 7) return null
|
|
88
|
-
const { token, nextIndex } = result
|
|
89
|
-
return {
|
|
90
|
-
token,
|
|
91
|
-
nextIndex,
|
|
92
|
-
remainingSibling: prevSiblingToken,
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
function eatContinuationText(
|
|
97
|
-
line: Readonly<IPhrasingContentLine>,
|
|
98
|
-
token: IToken,
|
|
99
|
-
): IResultOfEatContinuationText {
|
|
100
|
-
const { nodePoints, endIndex, firstNonWhitespaceIndex } = line
|
|
101
|
-
const nextIndex = eatEndCondition(
|
|
102
|
-
nodePoints,
|
|
103
|
-
firstNonWhitespaceIndex,
|
|
104
|
-
endIndex,
|
|
105
|
-
token.condition,
|
|
106
|
-
)
|
|
107
|
-
if (nextIndex === -1) return { status: 'notMatched' }
|
|
108
|
-
|
|
109
|
-
token.lines.push(line)
|
|
110
|
-
if (nextIndex != null) return { status: 'closing', nextIndex: endIndex }
|
|
111
|
-
return { status: 'opening', nextIndex: endIndex }
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
function eatStartCondition(
|
|
115
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
116
|
-
startIndex: number,
|
|
117
|
-
endIndex: number,
|
|
118
|
-
): { condition: HtmlBlockConditionType; nextIndex: number } | null {
|
|
119
|
-
let nextIndex: number | null = null
|
|
120
|
-
if (startIndex >= endIndex) return null
|
|
121
|
-
|
|
122
|
-
// condition 2
|
|
123
|
-
nextIndex = eatStartCondition2(nodePoints, startIndex, endIndex)
|
|
124
|
-
if (nextIndex != null) return { nextIndex, condition: 2 }
|
|
125
|
-
|
|
126
|
-
// condition 3
|
|
127
|
-
nextIndex = eatStartCondition3(nodePoints, startIndex, endIndex)
|
|
128
|
-
if (nextIndex != null) return { nextIndex, condition: 3 }
|
|
129
|
-
|
|
130
|
-
// condition 4
|
|
131
|
-
nextIndex = eatStartCondition4(nodePoints, startIndex, endIndex)
|
|
132
|
-
if (nextIndex != null) return { nextIndex, condition: 4 }
|
|
133
|
-
|
|
134
|
-
// condition 5
|
|
135
|
-
nextIndex = eatStartCondition5(nodePoints, startIndex, endIndex)
|
|
136
|
-
if (nextIndex != null) return { nextIndex, condition: 5 }
|
|
137
|
-
|
|
138
|
-
if (nodePoints[startIndex].codePoint !== AsciiCodePoint.SLASH) {
|
|
139
|
-
const tagNameStartIndex = startIndex
|
|
140
|
-
const tagNameEndIndex = eatHTMLTagName(nodePoints, tagNameStartIndex, endIndex)
|
|
141
|
-
if (tagNameEndIndex == null) return null
|
|
142
|
-
|
|
143
|
-
const tagNameInterval: INodeInterval = {
|
|
144
|
-
startIndex: tagNameStartIndex,
|
|
145
|
-
endIndex: tagNameEndIndex,
|
|
146
|
-
}
|
|
147
|
-
const rawTagName = calcStringFromNodePoints(
|
|
148
|
-
nodePoints,
|
|
149
|
-
tagNameInterval.startIndex,
|
|
150
|
-
tagNameInterval.endIndex,
|
|
151
|
-
)
|
|
152
|
-
const tagName = rawTagName.toLowerCase()
|
|
153
|
-
|
|
154
|
-
// condition1
|
|
155
|
-
nextIndex = eatStartCondition1(nodePoints, tagNameInterval.endIndex, endIndex, tagName)
|
|
156
|
-
if (nextIndex != null) return { nextIndex, condition: 1 }
|
|
157
|
-
|
|
158
|
-
// condition 6
|
|
159
|
-
nextIndex = eatStartCondition6(nodePoints, tagNameInterval.endIndex, endIndex, tagName)
|
|
160
|
-
if (nextIndex != null) return { nextIndex, condition: 6 }
|
|
161
|
-
|
|
162
|
-
// condition 7
|
|
163
|
-
nextIndex = eatStartCondition7(nodePoints, tagNameInterval.endIndex, endIndex, tagName, true)
|
|
164
|
-
if (nextIndex != null) return { nextIndex, condition: 7 }
|
|
165
|
-
|
|
166
|
-
// fallback
|
|
167
|
-
return null
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
const tagNameStartIndex = startIndex + 1
|
|
171
|
-
const tagNameEndIndex = eatHTMLTagName(nodePoints, tagNameStartIndex, endIndex)
|
|
172
|
-
if (tagNameEndIndex == null) return null
|
|
173
|
-
|
|
174
|
-
const tagNameInterval: INodeInterval = {
|
|
175
|
-
startIndex: tagNameStartIndex,
|
|
176
|
-
endIndex: tagNameEndIndex,
|
|
177
|
-
}
|
|
178
|
-
const rawTagName = calcStringFromNodePoints(
|
|
179
|
-
nodePoints,
|
|
180
|
-
tagNameInterval.startIndex,
|
|
181
|
-
tagNameInterval.endIndex,
|
|
182
|
-
)
|
|
183
|
-
const tagName = rawTagName.toLowerCase()
|
|
184
|
-
|
|
185
|
-
// condition 6
|
|
186
|
-
nextIndex = eatStartCondition6(nodePoints, tagNameInterval.endIndex, endIndex, tagName)
|
|
187
|
-
if (nextIndex != null) return { nextIndex, condition: 6 }
|
|
188
|
-
|
|
189
|
-
// condition 7.
|
|
190
|
-
nextIndex = eatStartCondition7(nodePoints, tagNameInterval.endIndex, endIndex, tagName, false)
|
|
191
|
-
if (nextIndex != null) return { nextIndex, condition: 7 }
|
|
192
|
-
|
|
193
|
-
// fallback
|
|
194
|
-
return null
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
function eatEndCondition(
|
|
198
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
199
|
-
startIndex: number,
|
|
200
|
-
endIndex: number,
|
|
201
|
-
condition: HtmlBlockConditionType,
|
|
202
|
-
): -1 | number | null {
|
|
203
|
-
switch (condition) {
|
|
204
|
-
case 1: {
|
|
205
|
-
const nextIndex = eatEndCondition1(nodePoints, startIndex, endIndex)
|
|
206
|
-
return nextIndex == null ? null : endIndex
|
|
207
|
-
}
|
|
208
|
-
case 2: {
|
|
209
|
-
const nextIndex = eatEndCondition2(nodePoints, startIndex, endIndex)
|
|
210
|
-
return nextIndex == null ? null : endIndex
|
|
211
|
-
}
|
|
212
|
-
case 3: {
|
|
213
|
-
const nextIndex = eatEndCondition3(nodePoints, startIndex, endIndex)
|
|
214
|
-
return nextIndex == null ? null : endIndex
|
|
215
|
-
}
|
|
216
|
-
case 4: {
|
|
217
|
-
const nextIndex = eatEndCondition4(nodePoints, startIndex, endIndex)
|
|
218
|
-
return nextIndex == null ? null : endIndex
|
|
219
|
-
}
|
|
220
|
-
case 5: {
|
|
221
|
-
const nextIndex = eatEndCondition5(nodePoints, startIndex, endIndex)
|
|
222
|
-
return nextIndex == null ? null : endIndex
|
|
223
|
-
}
|
|
224
|
-
case 6:
|
|
225
|
-
case 7: {
|
|
226
|
-
const firstNonWhitespaceIndex = eatOptionalWhitespaces(nodePoints, startIndex, endIndex)
|
|
227
|
-
return firstNonWhitespaceIndex >= endIndex ? -1 : null
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
}
|
package/src/parse.ts
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
import { calcStringFromNodePoints } from '@yozora/character'
|
|
2
|
-
import type { IParseBlockHookCreator } from '@yozora/core-tokenizer'
|
|
3
|
-
import { mergeContentLinesFaithfully } from '@yozora/core-tokenizer'
|
|
4
|
-
import type { INode, IThis, IToken, T } from './types'
|
|
5
|
-
|
|
6
|
-
export const parse: IParseBlockHookCreator<T, IToken, INode, IThis> = function (api) {
|
|
7
|
-
return {
|
|
8
|
-
parse: tokens =>
|
|
9
|
-
tokens.map(token => {
|
|
10
|
-
// Try to build phrasingContent
|
|
11
|
-
const contents = mergeContentLinesFaithfully(token.lines)
|
|
12
|
-
const node: INode = api.shouldReservePosition
|
|
13
|
-
? { type: 'html', position: token.position, value: calcStringFromNodePoints(contents) }
|
|
14
|
-
: { type: 'html', value: calcStringFromNodePoints(contents) }
|
|
15
|
-
return node
|
|
16
|
-
}),
|
|
17
|
-
}
|
|
18
|
-
}
|
package/src/tokenizer.ts
DELETED
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
IBlockTokenizer,
|
|
3
|
-
IMatchBlockHookCreator,
|
|
4
|
-
IParseBlockHookCreator,
|
|
5
|
-
} from '@yozora/core-tokenizer'
|
|
6
|
-
import { BaseBlockTokenizer, TokenizerPriority } from '@yozora/core-tokenizer'
|
|
7
|
-
import { match } from './match'
|
|
8
|
-
import { parse } from './parse'
|
|
9
|
-
import type { INode, IThis, IToken, ITokenizerProps, T } from './types'
|
|
10
|
-
import { uniqueName } from './types'
|
|
11
|
-
|
|
12
|
-
/**
|
|
13
|
-
* Lexical Analyzer for HtmlBlock.
|
|
14
|
-
* @see https://github.com/syntax-tree/mdast#html
|
|
15
|
-
* @see https://github.github.com/gfm/#html-blocks
|
|
16
|
-
*/
|
|
17
|
-
export class HtmlBlockTokenizer
|
|
18
|
-
extends BaseBlockTokenizer<T, IToken, INode, IThis>
|
|
19
|
-
implements IBlockTokenizer<T, IToken, INode, IThis>
|
|
20
|
-
{
|
|
21
|
-
/* istanbul ignore next */
|
|
22
|
-
constructor(props: ITokenizerProps = {}) {
|
|
23
|
-
super({
|
|
24
|
-
name: props.name ?? uniqueName,
|
|
25
|
-
priority: props.priority ?? TokenizerPriority.ATOMIC,
|
|
26
|
-
})
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
public override readonly match: IMatchBlockHookCreator<T, IToken, IThis> = match
|
|
30
|
-
|
|
31
|
-
public override readonly parse: IParseBlockHookCreator<T, IToken, INode, IThis> = parse
|
|
32
|
-
}
|
package/src/types.ts
DELETED
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
import type { Html, HtmlType } from '@yozora/ast'
|
|
2
|
-
import type {
|
|
3
|
-
IBaseBlockTokenizerProps,
|
|
4
|
-
IPartialBlockToken,
|
|
5
|
-
IPhrasingContentLine,
|
|
6
|
-
ITokenizer,
|
|
7
|
-
} from '@yozora/core-tokenizer'
|
|
8
|
-
|
|
9
|
-
export type T = HtmlType
|
|
10
|
-
export type INode = Html
|
|
11
|
-
export const uniqueName = '@yozora/tokenizer-html-block'
|
|
12
|
-
|
|
13
|
-
export type HtmlBlockConditionType = 1 | 2 | 3 | 4 | 5 | 6 | 7
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* Middle state during the whole match and parse phase.
|
|
17
|
-
*/
|
|
18
|
-
export interface IToken extends IPartialBlockToken<T> {
|
|
19
|
-
/**
|
|
20
|
-
* Number of conditions defined in GFM:
|
|
21
|
-
*
|
|
22
|
-
* 1. Start condition: line begins with the string `<script`, `<pre`, or
|
|
23
|
-
* `<style` (case-insensitive), followed by whitespace, the string `>`,
|
|
24
|
-
* or the end of the line.
|
|
25
|
-
*
|
|
26
|
-
* End condition: line contains an end tag `</script>`, `</pre>`,
|
|
27
|
-
* or `</style>` (case-insensitive; it need not match the start tag).
|
|
28
|
-
*
|
|
29
|
-
* 2. Start condition: line begins with the string `<!--`.
|
|
30
|
-
* End condition: line contains the string `-->`.
|
|
31
|
-
*
|
|
32
|
-
* 3. Start condition: line begins with the string `<?`.
|
|
33
|
-
* End condition: line contains the string `?>`.
|
|
34
|
-
*
|
|
35
|
-
* 4. Start condition: line begins with the string `<!` followed by an
|
|
36
|
-
* uppercase ASCII letter.
|
|
37
|
-
*
|
|
38
|
-
* End condition: line contains the character >.
|
|
39
|
-
*
|
|
40
|
-
* 5. Start condition: line begins with the string `<![CDATA[`.
|
|
41
|
-
* End condition: line contains the string `]]>`.
|
|
42
|
-
*
|
|
43
|
-
* 6. Start condition: line begins the string `<` or `</` followed by one of
|
|
44
|
-
* the strings (case-insensitive) `address`, `article`, `aside`, `base`,
|
|
45
|
-
* `basefont`, `blockquote`, `body`, `caption`, `center`, `col`, `colgroup`,
|
|
46
|
-
* `dd`, `details`, `dialog`, `dir`, `div`, `dl`, `dt`, `fieldset`,
|
|
47
|
-
* `figcaption`, `figure`, `footer`, `form`, `frame`, `frameset`, `h1`,
|
|
48
|
-
* `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`, `html`, `iframe`,
|
|
49
|
-
* `legend`, `li`, `link`, `main`, `menu`, `menuitem`, `nav`, `noframes`,
|
|
50
|
-
* `ol`, `optgroup`, `option`, `p`, `param`, `section`, `source`, `summary`,
|
|
51
|
-
* `table`, `tbody`, `td`, `tfoot`, `th`, `thead`, `title`, `tr`, `track`,
|
|
52
|
-
* `ul`, followed by whitespace, the end of the line, the string `>`,
|
|
53
|
-
* or the string `/>`.
|
|
54
|
-
*
|
|
55
|
-
* End condition: line is followed by a blank line.
|
|
56
|
-
*
|
|
57
|
-
* 7. Start condition: line begins with a complete open tag (with any tag name
|
|
58
|
-
* other than `script`, `style`, or `pre`) or a complete closing tag,
|
|
59
|
-
* followed only by whitespace or the end of the line.
|
|
60
|
-
*
|
|
61
|
-
* End condition: line is followed by a blank line.
|
|
62
|
-
*
|
|
63
|
-
* @see https://github.github.com/gfm/#start-condition
|
|
64
|
-
*/
|
|
65
|
-
condition: HtmlBlockConditionType
|
|
66
|
-
/**
|
|
67
|
-
* Contents
|
|
68
|
-
*/
|
|
69
|
-
lines: Array<Readonly<IPhrasingContentLine>>
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
export type IThis = ITokenizer
|
|
73
|
-
|
|
74
|
-
export type ITokenizerProps = Partial<IBaseBlockTokenizerProps>
|
|
@@ -1,170 +0,0 @@
|
|
|
1
|
-
import type { INodeInterval, INodePoint } from '@yozora/character'
|
|
2
|
-
import {
|
|
3
|
-
AsciiCodePoint,
|
|
4
|
-
isAsciiDigitCharacter,
|
|
5
|
-
isAsciiLetter,
|
|
6
|
-
isWhitespaceCharacter,
|
|
7
|
-
} from '@yozora/character'
|
|
8
|
-
import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
|
|
9
|
-
|
|
10
|
-
export interface RawHTMLAttribute {
|
|
11
|
-
/**
|
|
12
|
-
* Attribute name.
|
|
13
|
-
*/
|
|
14
|
-
name: INodeInterval
|
|
15
|
-
/**
|
|
16
|
-
* Attribute value.
|
|
17
|
-
*/
|
|
18
|
-
value?: INodeInterval
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
/**
|
|
22
|
-
* An attribute consists of whitespace, an attribute name, and an optional
|
|
23
|
-
* attribute value specification.
|
|
24
|
-
*
|
|
25
|
-
* @param nodePoints
|
|
26
|
-
* @param startIndex
|
|
27
|
-
* @param endIndex
|
|
28
|
-
* @see https://github.github.com/gfm/#attribute
|
|
29
|
-
*/
|
|
30
|
-
export function eatHTMLAttribute(
|
|
31
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
32
|
-
startIndex: number,
|
|
33
|
-
endIndex: number,
|
|
34
|
-
): { attribute: RawHTMLAttribute; nextIndex: number } | null {
|
|
35
|
-
// eat whitespace.
|
|
36
|
-
let i = eatOptionalWhitespaces(nodePoints, startIndex, endIndex)
|
|
37
|
-
if (i <= startIndex || i >= endIndex) return null
|
|
38
|
-
|
|
39
|
-
/**
|
|
40
|
-
* Eat attribute name.
|
|
41
|
-
*
|
|
42
|
-
* An attribute name consists of an ASCII letter, `_`, or `:`, followed by
|
|
43
|
-
* zero or more ASCII letters, digits, `_`, `.`, `:`, or `-`.
|
|
44
|
-
* @see https://github.github.com/gfm/#attribute-name
|
|
45
|
-
*/
|
|
46
|
-
const attrNameStartIndex = i
|
|
47
|
-
let c = nodePoints[i].codePoint
|
|
48
|
-
if (!isAsciiLetter(c) && c !== AsciiCodePoint.UNDERSCORE && c !== AsciiCodePoint.COLON)
|
|
49
|
-
return null
|
|
50
|
-
for (i = attrNameStartIndex + 1; i < endIndex; ++i) {
|
|
51
|
-
c = nodePoints[i].codePoint
|
|
52
|
-
if (
|
|
53
|
-
isAsciiLetter(c) ||
|
|
54
|
-
isAsciiDigitCharacter(c) ||
|
|
55
|
-
c === AsciiCodePoint.UNDERSCORE ||
|
|
56
|
-
c === AsciiCodePoint.DOT ||
|
|
57
|
-
c === AsciiCodePoint.COLON ||
|
|
58
|
-
c === AsciiCodePoint.MINUS_SIGN
|
|
59
|
-
)
|
|
60
|
-
continue
|
|
61
|
-
break
|
|
62
|
-
}
|
|
63
|
-
const attrNameEndIndex = i
|
|
64
|
-
|
|
65
|
-
const attribute: RawHTMLAttribute = {
|
|
66
|
-
name: {
|
|
67
|
-
startIndex: attrNameStartIndex,
|
|
68
|
-
endIndex: attrNameEndIndex,
|
|
69
|
-
},
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
/**
|
|
73
|
-
* Eat attribute value.
|
|
74
|
-
*
|
|
75
|
-
* An attribute value specification consists of optional whitespace, a `=`
|
|
76
|
-
* character, optional whitespace, and an attribute value.
|
|
77
|
-
*
|
|
78
|
-
* An attribute value consists of an unquoted attribute value, a single-quoted
|
|
79
|
-
* attribute value, or a double-quoted attribute value.
|
|
80
|
-
*
|
|
81
|
-
* @see https://github.github.com/gfm/#attribute-value-specification
|
|
82
|
-
* @see https://github.github.com/gfm/#attribute-value
|
|
83
|
-
*/
|
|
84
|
-
i = eatOptionalWhitespaces(nodePoints, attrNameEndIndex, endIndex)
|
|
85
|
-
if (i < endIndex && nodePoints[i].codePoint === AsciiCodePoint.EQUALS_SIGN) {
|
|
86
|
-
i = eatOptionalWhitespaces(nodePoints, i + 1, endIndex)
|
|
87
|
-
if (i < endIndex) {
|
|
88
|
-
const mark = nodePoints[i].codePoint
|
|
89
|
-
switch (mark) {
|
|
90
|
-
/**
|
|
91
|
-
* A double-quoted attribute value consists of `"`, zero or more
|
|
92
|
-
* characters not including `"`, and a final `"`.
|
|
93
|
-
* @see https://github.github.com/gfm/#double-quoted-attribute-value
|
|
94
|
-
*/
|
|
95
|
-
case AsciiCodePoint.DOUBLE_QUOTE: {
|
|
96
|
-
const attrValueStartIndex = i + 1
|
|
97
|
-
for (i = attrValueStartIndex; i < endIndex; ++i) {
|
|
98
|
-
c = nodePoints[i].codePoint
|
|
99
|
-
if (c === AsciiCodePoint.DOUBLE_QUOTE) break
|
|
100
|
-
}
|
|
101
|
-
const attrValueEndIndex = i
|
|
102
|
-
if (i < endIndex && nodePoints[i].codePoint === AsciiCodePoint.DOUBLE_QUOTE) {
|
|
103
|
-
attribute.value = {
|
|
104
|
-
startIndex: attrValueStartIndex,
|
|
105
|
-
endIndex: attrValueEndIndex,
|
|
106
|
-
}
|
|
107
|
-
i += 1
|
|
108
|
-
}
|
|
109
|
-
break
|
|
110
|
-
}
|
|
111
|
-
/**
|
|
112
|
-
* A single-quoted attribute value consists of `'`, zero or more
|
|
113
|
-
* characters not including `'`, and a final `'`.
|
|
114
|
-
* @see https://github.github.com/gfm/#single-quoted-attribute-value
|
|
115
|
-
*/
|
|
116
|
-
case AsciiCodePoint.SINGLE_QUOTE: {
|
|
117
|
-
const attrValueStartIndex = i + 1
|
|
118
|
-
for (i = attrValueStartIndex; i < endIndex; ++i) {
|
|
119
|
-
c = nodePoints[i].codePoint
|
|
120
|
-
if (c === AsciiCodePoint.SINGLE_QUOTE) break
|
|
121
|
-
}
|
|
122
|
-
const attrValueEndIndex = i
|
|
123
|
-
if (i < endIndex && nodePoints[i].codePoint === AsciiCodePoint.SINGLE_QUOTE) {
|
|
124
|
-
attribute.value = {
|
|
125
|
-
startIndex: attrValueStartIndex,
|
|
126
|
-
endIndex: attrValueEndIndex,
|
|
127
|
-
}
|
|
128
|
-
i += 1
|
|
129
|
-
}
|
|
130
|
-
break
|
|
131
|
-
}
|
|
132
|
-
/**
|
|
133
|
-
* An unquoted attribute value is a nonempty string of characters not
|
|
134
|
-
* including whitespace, `"`, `'`, `=`, `<`, `>`, or `\``.
|
|
135
|
-
* @see https://github.github.com/gfm/#unquoted-attribute-value
|
|
136
|
-
*/
|
|
137
|
-
default: {
|
|
138
|
-
const attrValueStartIndex = i
|
|
139
|
-
for (; i < endIndex; ++i) {
|
|
140
|
-
c = nodePoints[i].codePoint
|
|
141
|
-
if (
|
|
142
|
-
isWhitespaceCharacter(c) ||
|
|
143
|
-
c === AsciiCodePoint.DOUBLE_QUOTE ||
|
|
144
|
-
c === AsciiCodePoint.SINGLE_QUOTE ||
|
|
145
|
-
c === AsciiCodePoint.EQUALS_SIGN ||
|
|
146
|
-
c === AsciiCodePoint.OPEN_ANGLE ||
|
|
147
|
-
c === AsciiCodePoint.CLOSE_ANGLE ||
|
|
148
|
-
c === AsciiCodePoint.BACKTICK
|
|
149
|
-
)
|
|
150
|
-
break
|
|
151
|
-
}
|
|
152
|
-
const attrValueEndIndex = i
|
|
153
|
-
if (attrValueEndIndex > attrValueStartIndex) {
|
|
154
|
-
attribute.value = {
|
|
155
|
-
startIndex: attrValueStartIndex,
|
|
156
|
-
endIndex: attrValueEndIndex,
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
break
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
if (attribute.value != null) {
|
|
164
|
-
return { attribute, nextIndex: i }
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
return { attribute, nextIndex: attrNameEndIndex }
|
|
170
|
-
}
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import type { INodePoint } from '@yozora/character'
|
|
2
|
-
import { AsciiCodePoint, isAsciiDigitCharacter, isAsciiLetter } from '@yozora/character'
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* A tag name consists of an ASCII letter followed by zero or more ASCII
|
|
6
|
-
* letters, digits, or hyphens (-).
|
|
7
|
-
*
|
|
8
|
-
* @param nodePoints
|
|
9
|
-
* @param startIndex
|
|
10
|
-
* @param endIndex
|
|
11
|
-
* @see https://github.github.com/gfm/#tag-name
|
|
12
|
-
*/
|
|
13
|
-
export function eatHTMLTagName(
|
|
14
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
15
|
-
startIndex: number,
|
|
16
|
-
endIndex: number,
|
|
17
|
-
): number | null {
|
|
18
|
-
if (startIndex >= endIndex || !isAsciiLetter(nodePoints[startIndex].codePoint)) return null
|
|
19
|
-
|
|
20
|
-
let i = startIndex
|
|
21
|
-
for (; i < endIndex; ++i) {
|
|
22
|
-
const c = nodePoints[i].codePoint
|
|
23
|
-
if (isAsciiLetter(c) || isAsciiDigitCharacter(c) || c === AsciiCodePoint.MINUS_SIGN) continue
|
|
24
|
-
return i
|
|
25
|
-
}
|
|
26
|
-
return i
|
|
27
|
-
}
|