@yozora/tokenizer-html-inline 2.0.4 → 2.0.5-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/{index.js → index.cjs} +9 -7
- package/lib/esm/{index.js → index.mjs} +8 -6
- package/lib/types/index.d.ts +6 -6
- package/package.json +19 -15
- package/src/index.ts +15 -0
- package/src/match.ts +101 -0
- package/src/parse.ts +20 -0
- package/src/tokenizer.ts +37 -0
- package/src/types.ts +55 -0
- package/src/util/cdata.ts +64 -0
- package/src/util/closing.ts +58 -0
- package/src/util/comment.ts +89 -0
- package/src/util/declaration.ts +77 -0
- package/src/util/instruction.ts +56 -0
- package/src/util/open.ts +88 -0
|
@@ -230,7 +230,10 @@ const match = function (api) {
|
|
|
230
230
|
return null;
|
|
231
231
|
}
|
|
232
232
|
function processSingleDelimiter(delimiter) {
|
|
233
|
-
const token =
|
|
233
|
+
const token = {
|
|
234
|
+
...delimiter,
|
|
235
|
+
nodeType: ast.HtmlType,
|
|
236
|
+
};
|
|
234
237
|
return [token];
|
|
235
238
|
}
|
|
236
239
|
};
|
|
@@ -273,19 +276,18 @@ const uniqueName = '@yozora/tokenizer-html-inline';
|
|
|
273
276
|
|
|
274
277
|
class HtmlInlineTokenizer extends coreTokenizer.BaseInlineTokenizer {
|
|
275
278
|
constructor(props = {}) {
|
|
276
|
-
var _a, _b;
|
|
277
279
|
super({
|
|
278
|
-
name:
|
|
279
|
-
priority:
|
|
280
|
+
name: props.name ?? uniqueName,
|
|
281
|
+
priority: props.priority ?? coreTokenizer.TokenizerPriority.ATOMIC,
|
|
280
282
|
});
|
|
281
|
-
this.match = match;
|
|
282
|
-
this.parse = parse;
|
|
283
283
|
}
|
|
284
|
+
match = match;
|
|
285
|
+
parse = parse;
|
|
284
286
|
}
|
|
285
287
|
|
|
286
288
|
exports.HtmlInlineTokenizer = HtmlInlineTokenizer;
|
|
287
289
|
exports.HtmlInlineTokenizerName = uniqueName;
|
|
288
|
-
exports
|
|
290
|
+
exports.default = HtmlInlineTokenizer;
|
|
289
291
|
exports.eatHtmlInlineCDataDelimiter = eatHtmlInlineCDataDelimiter;
|
|
290
292
|
exports.eatHtmlInlineClosingDelimiter = eatHtmlInlineClosingDelimiter;
|
|
291
293
|
exports.eatHtmlInlineCommentDelimiter = eatHtmlInlineCommentDelimiter;
|
|
@@ -226,7 +226,10 @@ const match = function (api) {
|
|
|
226
226
|
return null;
|
|
227
227
|
}
|
|
228
228
|
function processSingleDelimiter(delimiter) {
|
|
229
|
-
const token =
|
|
229
|
+
const token = {
|
|
230
|
+
...delimiter,
|
|
231
|
+
nodeType: HtmlType,
|
|
232
|
+
};
|
|
230
233
|
return [token];
|
|
231
234
|
}
|
|
232
235
|
};
|
|
@@ -269,14 +272,13 @@ const uniqueName = '@yozora/tokenizer-html-inline';
|
|
|
269
272
|
|
|
270
273
|
class HtmlInlineTokenizer extends BaseInlineTokenizer {
|
|
271
274
|
constructor(props = {}) {
|
|
272
|
-
var _a, _b;
|
|
273
275
|
super({
|
|
274
|
-
name:
|
|
275
|
-
priority:
|
|
276
|
+
name: props.name ?? uniqueName,
|
|
277
|
+
priority: props.priority ?? TokenizerPriority.ATOMIC,
|
|
276
278
|
});
|
|
277
|
-
this.match = match;
|
|
278
|
-
this.parse = parse;
|
|
279
279
|
}
|
|
280
|
+
match = match;
|
|
281
|
+
parse = parse;
|
|
280
282
|
}
|
|
281
283
|
|
|
282
284
|
export { HtmlInlineTokenizer, uniqueName as HtmlInlineTokenizerName, HtmlInlineTokenizer as default, eatHtmlInlineCDataDelimiter, eatHtmlInlineClosingDelimiter, eatHtmlInlineCommentDelimiter, eatHtmlInlineDeclarationDelimiter, eatHtmlInlineInstructionDelimiter, eatHtmlInlineTokenOpenDelimiter, match as htmlInlineMatch, parse as htmlInlineParse };
|
package/lib/types/index.d.ts
CHANGED
|
@@ -149,8 +149,8 @@ interface IHtmlInlineOpenDelimiter extends IYastTokenDelimiter, IHtmlInlineOpenT
|
|
|
149
149
|
*/
|
|
150
150
|
declare function eatHtmlInlineTokenOpenDelimiter(nodePoints: ReadonlyArray<INodePoint>, startIndex: number, endIndex: number): IHtmlInlineOpenDelimiter | null;
|
|
151
151
|
|
|
152
|
-
|
|
153
|
-
|
|
152
|
+
type T = HtmlType;
|
|
153
|
+
type INode = Html;
|
|
154
154
|
declare const uniqueName = "@yozora/tokenizer-html-inline";
|
|
155
155
|
/**
|
|
156
156
|
* Text between '<' and '>' that looks like an HTML tag is parsed as a raw
|
|
@@ -160,10 +160,10 @@ declare const uniqueName = "@yozora/tokenizer-html-inline";
|
|
|
160
160
|
*
|
|
161
161
|
* @see https://github.github.com/gfm/#raw-html
|
|
162
162
|
*/
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
163
|
+
type IToken = IPartialYastInlineToken<T> & (IHtmlInlineOpenTokenData | IHtmlInlineClosingTokenData | IHtmlInlineCommentTokenData | IHtmlInlineInstructionTokenData | IHtmlInlineDeclarationTokenData | IHtmlInlineCDataTokenData);
|
|
164
|
+
type IDelimiter = IHtmlInlineOpenDelimiter | IHtmlInlineClosingDelimiter | IHtmlInlineCommentDelimiter | IHtmlInlineInstructionDelimiter | IHtmlInlineDeclarationDelimiter | IHtmlInlineCDataDelimiter;
|
|
165
|
+
type IThis = ITokenizer;
|
|
166
|
+
type ITokenizerProps = Partial<IBaseInlineTokenizerProps>;
|
|
167
167
|
|
|
168
168
|
/**
|
|
169
169
|
* Text between '<' and '>' that looks like an HTML tag is parsed as a raw HTML
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yozora/tokenizer-html-inline",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.5-alpha.0",
|
|
4
4
|
"author": {
|
|
5
5
|
"name": "guanghechen",
|
|
6
6
|
"url": "https://github.com/guanghechen/"
|
|
@@ -11,34 +11,38 @@
|
|
|
11
11
|
"directory": "tokenizers/html-inline"
|
|
12
12
|
},
|
|
13
13
|
"homepage": "https://github.com/yozorajs/yozora/tree/release-2.x.x/tokenizers/html-inline",
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
|
|
17
|
-
|
|
14
|
+
"type": "module",
|
|
15
|
+
"exports": {
|
|
16
|
+
"types": "./lib/types/index.d.ts",
|
|
17
|
+
"import": "./lib/esm/index.mjs",
|
|
18
|
+
"require": "./lib/cjs/index.cjs"
|
|
19
|
+
},
|
|
20
|
+
"source": "./src/index.ts",
|
|
21
|
+
"types": "./lib/types/index.d.ts",
|
|
22
|
+
"main": "./lib/cjs/index.cjs",
|
|
23
|
+
"module": "./lib/esm/index.mjs",
|
|
18
24
|
"license": "MIT",
|
|
19
25
|
"engines": {
|
|
20
26
|
"node": ">= 16.0.0"
|
|
21
27
|
},
|
|
22
28
|
"files": [
|
|
23
29
|
"lib/",
|
|
24
|
-
"
|
|
25
|
-
"!lib/**/*.d.ts.map",
|
|
30
|
+
"src/",
|
|
26
31
|
"package.json",
|
|
27
32
|
"CHANGELOG.md",
|
|
28
33
|
"LICENSE",
|
|
29
34
|
"README.md"
|
|
30
35
|
],
|
|
31
36
|
"scripts": {
|
|
32
|
-
"build": "cross-env NODE_ENV=production rollup -c ../../rollup.config.
|
|
33
|
-
"prebuild": "rimraf lib/",
|
|
37
|
+
"build": "rimraf lib/ && cross-env NODE_ENV=production rollup -c ../../rollup.config.mjs",
|
|
34
38
|
"prepublishOnly": "cross-env ROLLUP_SHOULD_SOURCEMAP=false yarn build",
|
|
35
|
-
"test": "cross-env TS_NODE_FILES=true jest --config ../../jest.config.
|
|
39
|
+
"test": "cross-env TS_NODE_FILES=true NODE_OPTIONS=--experimental-vm-modules jest --config ../../jest.config.mjs --rootDir ."
|
|
36
40
|
},
|
|
37
41
|
"dependencies": {
|
|
38
|
-
"@yozora/ast": "^2.0.
|
|
39
|
-
"@yozora/character": "^2.0.
|
|
40
|
-
"@yozora/core-tokenizer": "^2.0.
|
|
41
|
-
"@yozora/tokenizer-html-block": "^2.0.
|
|
42
|
+
"@yozora/ast": "^2.0.5-alpha.0",
|
|
43
|
+
"@yozora/character": "^2.0.5-alpha.0",
|
|
44
|
+
"@yozora/core-tokenizer": "^2.0.5-alpha.0",
|
|
45
|
+
"@yozora/tokenizer-html-block": "^2.0.5-alpha.0"
|
|
42
46
|
},
|
|
43
|
-
"gitHead": "
|
|
47
|
+
"gitHead": "8bf941fe4ef82947165b0f3cc123cd493665e13b"
|
|
44
48
|
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export * from './util/cdata'
|
|
2
|
+
export * from './util/closing'
|
|
3
|
+
export * from './util/comment'
|
|
4
|
+
export * from './util/declaration'
|
|
5
|
+
export * from './util/instruction'
|
|
6
|
+
export * from './util/open'
|
|
7
|
+
export { match as htmlInlineMatch } from './match'
|
|
8
|
+
export { parse as htmlInlineParse } from './parse'
|
|
9
|
+
export { HtmlInlineTokenizer, HtmlInlineTokenizer as default } from './tokenizer'
|
|
10
|
+
export { uniqueName as HtmlInlineTokenizerName } from './types'
|
|
11
|
+
export type {
|
|
12
|
+
IThis as IHtmlInlineHookContext,
|
|
13
|
+
IToken as IHtmlInlineToken,
|
|
14
|
+
ITokenizerProps as IHtmlInlineTokenizerProps,
|
|
15
|
+
} from './types'
|
package/src/match.ts
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import { HtmlType } from '@yozora/ast'
|
|
2
|
+
import type { INodePoint } from '@yozora/character'
|
|
3
|
+
import { AsciiCodePoint } from '@yozora/character'
|
|
4
|
+
import type {
|
|
5
|
+
IMatchInlineHookCreator,
|
|
6
|
+
IResultOfProcessSingleDelimiter,
|
|
7
|
+
} from '@yozora/core-tokenizer'
|
|
8
|
+
import { eatOptionalWhitespaces, genFindDelimiter } from '@yozora/core-tokenizer'
|
|
9
|
+
import type { IDelimiter, IThis, IToken, T } from './types'
|
|
10
|
+
import { eatHtmlInlineCDataDelimiter } from './util/cdata'
|
|
11
|
+
import { eatHtmlInlineClosingDelimiter } from './util/closing'
|
|
12
|
+
import { eatHtmlInlineCommentDelimiter } from './util/comment'
|
|
13
|
+
import { eatHtmlInlineDeclarationDelimiter } from './util/declaration'
|
|
14
|
+
import { eatHtmlInlineInstructionDelimiter } from './util/instruction'
|
|
15
|
+
import { eatHtmlInlineTokenOpenDelimiter } from './util/open'
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Text between '<' and '>' that looks like an HTML tag is parsed as a raw HTML
|
|
19
|
+
* tag and will be rendered in HTML without escaping. Tag and attribute names
|
|
20
|
+
* are not limited to current HTML tags, so custom tags (and even, say, DocBook
|
|
21
|
+
* tags) may be used.
|
|
22
|
+
*
|
|
23
|
+
* @see https://github.github.com/gfm/#raw-html
|
|
24
|
+
*/
|
|
25
|
+
export const match: IMatchInlineHookCreator<T, IDelimiter, IToken, IThis> = function (api) {
|
|
26
|
+
return {
|
|
27
|
+
findDelimiter: () => genFindDelimiter<IDelimiter>(_findDelimiter),
|
|
28
|
+
processSingleDelimiter,
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function _findDelimiter(startIndex: number, endIndex: number): IDelimiter | null {
|
|
32
|
+
const nodePoints: ReadonlyArray<INodePoint> = api.getNodePoints()
|
|
33
|
+
|
|
34
|
+
for (let i = startIndex; i < endIndex; ++i) {
|
|
35
|
+
i = eatOptionalWhitespaces(nodePoints, i, endIndex)
|
|
36
|
+
if (i >= endIndex) break
|
|
37
|
+
|
|
38
|
+
const c = nodePoints[i].codePoint
|
|
39
|
+
switch (c) {
|
|
40
|
+
case AsciiCodePoint.BACKSLASH:
|
|
41
|
+
i += 1
|
|
42
|
+
break
|
|
43
|
+
case AsciiCodePoint.OPEN_ANGLE: {
|
|
44
|
+
const delimiter: IDelimiter | null = tryToEatDelimiter(nodePoints, i, endIndex)
|
|
45
|
+
if (delimiter != null) return delimiter
|
|
46
|
+
break
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return null
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function processSingleDelimiter(
|
|
54
|
+
delimiter: IDelimiter,
|
|
55
|
+
): IResultOfProcessSingleDelimiter<T, IToken> {
|
|
56
|
+
const token: IToken = {
|
|
57
|
+
...delimiter,
|
|
58
|
+
nodeType: HtmlType,
|
|
59
|
+
}
|
|
60
|
+
return [token]
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Try to eat a delimiter
|
|
66
|
+
*
|
|
67
|
+
* @param nodePoints
|
|
68
|
+
* @param startIndex
|
|
69
|
+
* @param endIndex
|
|
70
|
+
*/
|
|
71
|
+
function tryToEatDelimiter(
|
|
72
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
73
|
+
startIndex: number,
|
|
74
|
+
endIndex: number,
|
|
75
|
+
): IDelimiter | null {
|
|
76
|
+
let delimiter: IDelimiter | null = null
|
|
77
|
+
|
|
78
|
+
// Try open tag.
|
|
79
|
+
delimiter = eatHtmlInlineTokenOpenDelimiter(nodePoints, startIndex, endIndex)
|
|
80
|
+
if (delimiter != null) return delimiter
|
|
81
|
+
|
|
82
|
+
// Try closing tag.
|
|
83
|
+
delimiter = eatHtmlInlineClosingDelimiter(nodePoints, startIndex, endIndex)
|
|
84
|
+
if (delimiter != null) return delimiter
|
|
85
|
+
|
|
86
|
+
// Try html comment.
|
|
87
|
+
delimiter = eatHtmlInlineCommentDelimiter(nodePoints, startIndex, endIndex)
|
|
88
|
+
if (delimiter != null) return delimiter
|
|
89
|
+
|
|
90
|
+
// Try processing instruction.
|
|
91
|
+
delimiter = eatHtmlInlineInstructionDelimiter(nodePoints, startIndex, endIndex)
|
|
92
|
+
if (delimiter != null) return delimiter
|
|
93
|
+
|
|
94
|
+
// Try declaration.
|
|
95
|
+
delimiter = eatHtmlInlineDeclarationDelimiter(nodePoints, startIndex, endIndex)
|
|
96
|
+
if (delimiter != null) return delimiter
|
|
97
|
+
|
|
98
|
+
// Try CDATA section.
|
|
99
|
+
delimiter = eatHtmlInlineCDataDelimiter(nodePoints, startIndex, endIndex)
|
|
100
|
+
return delimiter
|
|
101
|
+
}
|
package/src/parse.ts
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { HtmlType } from '@yozora/ast'
|
|
2
|
+
import type { INodePoint } from '@yozora/character'
|
|
3
|
+
import { calcStringFromNodePoints } from '@yozora/character'
|
|
4
|
+
import type { IParseInlineHookCreator } from '@yozora/core-tokenizer'
|
|
5
|
+
import type { INode, IThis, IToken, T } from './types'
|
|
6
|
+
|
|
7
|
+
export const parse: IParseInlineHookCreator<T, IToken, INode, IThis> = function (api) {
|
|
8
|
+
return {
|
|
9
|
+
parse: tokens =>
|
|
10
|
+
tokens.map(token => {
|
|
11
|
+
const { startIndex, endIndex } = token
|
|
12
|
+
const nodePoints: ReadonlyArray<INodePoint> = api.getNodePoints()
|
|
13
|
+
const value = calcStringFromNodePoints(nodePoints, startIndex, endIndex)
|
|
14
|
+
const node: INode = api.shouldReservePosition
|
|
15
|
+
? { type: HtmlType, position: api.calcPosition(token), value }
|
|
16
|
+
: { type: HtmlType, value }
|
|
17
|
+
return node
|
|
18
|
+
}),
|
|
19
|
+
}
|
|
20
|
+
}
|
package/src/tokenizer.ts
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
IInlineTokenizer,
|
|
3
|
+
IMatchInlineHookCreator,
|
|
4
|
+
IParseInlineHookCreator,
|
|
5
|
+
} from '@yozora/core-tokenizer'
|
|
6
|
+
import { BaseInlineTokenizer, TokenizerPriority } from '@yozora/core-tokenizer'
|
|
7
|
+
import { match } from './match'
|
|
8
|
+
import { parse } from './parse'
|
|
9
|
+
import type { IDelimiter, INode, IThis, IToken, ITokenizerProps, T } from './types'
|
|
10
|
+
import { uniqueName } from './types'
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Lexical Analyzer for HtmlInline.
|
|
14
|
+
*
|
|
15
|
+
* Text between '<' and '>' that looks like an HTML tag is parsed as a raw HTML
|
|
16
|
+
* tag and will be rendered in HTML without escaping. Tag and attribute names
|
|
17
|
+
* are not limited to current HTML tags, so custom tags (and even, say, DocBook
|
|
18
|
+
* tags) may be used.
|
|
19
|
+
*
|
|
20
|
+
* @see https://github.github.com/gfm/#raw-html
|
|
21
|
+
*/
|
|
22
|
+
export class HtmlInlineTokenizer
|
|
23
|
+
extends BaseInlineTokenizer<T, IDelimiter, IToken, INode, IThis>
|
|
24
|
+
implements IInlineTokenizer<T, IDelimiter, IToken, INode, IThis>
|
|
25
|
+
{
|
|
26
|
+
/* istanbul ignore next */
|
|
27
|
+
constructor(props: ITokenizerProps = {}) {
|
|
28
|
+
super({
|
|
29
|
+
name: props.name ?? uniqueName,
|
|
30
|
+
priority: props.priority ?? TokenizerPriority.ATOMIC,
|
|
31
|
+
})
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
public override readonly match: IMatchInlineHookCreator<T, IDelimiter, IToken, IThis> = match
|
|
35
|
+
|
|
36
|
+
public override readonly parse: IParseInlineHookCreator<T, IToken, INode, IThis> = parse
|
|
37
|
+
}
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import type { Html, HtmlType } from '@yozora/ast'
|
|
2
|
+
import type {
|
|
3
|
+
IBaseInlineTokenizerProps,
|
|
4
|
+
IPartialYastInlineToken,
|
|
5
|
+
ITokenizer,
|
|
6
|
+
} from '@yozora/core-tokenizer'
|
|
7
|
+
import type { IHtmlInlineCDataDelimiter, IHtmlInlineCDataTokenData } from './util/cdata'
|
|
8
|
+
import type { IHtmlInlineClosingDelimiter, IHtmlInlineClosingTokenData } from './util/closing'
|
|
9
|
+
import type { IHtmlInlineCommentDelimiter, IHtmlInlineCommentTokenData } from './util/comment'
|
|
10
|
+
import type {
|
|
11
|
+
IHtmlInlineDeclarationDelimiter,
|
|
12
|
+
IHtmlInlineDeclarationTokenData,
|
|
13
|
+
} from './util/declaration'
|
|
14
|
+
import type {
|
|
15
|
+
IHtmlInlineInstructionDelimiter,
|
|
16
|
+
IHtmlInlineInstructionTokenData,
|
|
17
|
+
} from './util/instruction'
|
|
18
|
+
import type {
|
|
19
|
+
IHtmlInlineOpenDelimiter,
|
|
20
|
+
IHtmlInlineOpenTokenData as IHtmlInlineOpenTokenData,
|
|
21
|
+
} from './util/open'
|
|
22
|
+
|
|
23
|
+
export type T = HtmlType
|
|
24
|
+
export type INode = Html
|
|
25
|
+
export const uniqueName = '@yozora/tokenizer-html-inline'
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Text between '<' and '>' that looks like an HTML tag is parsed as a raw
|
|
29
|
+
* HTML tag and will be rendered in HTML without escaping. Tag and attribute
|
|
30
|
+
* names are not limited to current HTML tags, so custom tags (and even, say,
|
|
31
|
+
* DocBook tags) may be used.
|
|
32
|
+
*
|
|
33
|
+
* @see https://github.github.com/gfm/#raw-html
|
|
34
|
+
*/
|
|
35
|
+
export type IToken = IPartialYastInlineToken<T> &
|
|
36
|
+
(
|
|
37
|
+
| IHtmlInlineOpenTokenData
|
|
38
|
+
| IHtmlInlineClosingTokenData
|
|
39
|
+
| IHtmlInlineCommentTokenData
|
|
40
|
+
| IHtmlInlineInstructionTokenData
|
|
41
|
+
| IHtmlInlineDeclarationTokenData
|
|
42
|
+
| IHtmlInlineCDataTokenData
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
export type IDelimiter =
|
|
46
|
+
| IHtmlInlineOpenDelimiter
|
|
47
|
+
| IHtmlInlineClosingDelimiter
|
|
48
|
+
| IHtmlInlineCommentDelimiter
|
|
49
|
+
| IHtmlInlineInstructionDelimiter
|
|
50
|
+
| IHtmlInlineDeclarationDelimiter
|
|
51
|
+
| IHtmlInlineCDataDelimiter
|
|
52
|
+
|
|
53
|
+
export type IThis = ITokenizer
|
|
54
|
+
|
|
55
|
+
export type ITokenizerProps = Partial<IBaseInlineTokenizerProps>
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import type { INodePoint } from '@yozora/character'
|
|
2
|
+
import { AsciiCodePoint } from '@yozora/character'
|
|
3
|
+
import type { IYastTokenDelimiter } from '@yozora/core-tokenizer'
|
|
4
|
+
|
|
5
|
+
export interface IHtmlInlineCDataData {
|
|
6
|
+
htmlType: 'cdata'
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export interface IHtmlInlineCDataTokenData {
|
|
10
|
+
htmlType: 'cdata'
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface IHtmlInlineCDataDelimiter extends IYastTokenDelimiter, IHtmlInlineCDataTokenData {
|
|
14
|
+
type: 'full'
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* A CDATA section consists of the string `<![CDATA[`, a string of characters
|
|
19
|
+
* not including the string `]]>`, and the string `]]>`.
|
|
20
|
+
*
|
|
21
|
+
* @param nodePoints
|
|
22
|
+
* @param startIndex
|
|
23
|
+
* @param endIndex
|
|
24
|
+
* @see https://github.github.com/gfm/#cdata-section
|
|
25
|
+
*/
|
|
26
|
+
export function eatHtmlInlineCDataDelimiter(
|
|
27
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
28
|
+
startIndex: number,
|
|
29
|
+
endIndex: number,
|
|
30
|
+
): IHtmlInlineCDataDelimiter | null {
|
|
31
|
+
let i = startIndex
|
|
32
|
+
if (
|
|
33
|
+
i + 11 >= endIndex ||
|
|
34
|
+
nodePoints[i + 1].codePoint !== AsciiCodePoint.EXCLAMATION_MARK ||
|
|
35
|
+
nodePoints[i + 2].codePoint !== AsciiCodePoint.OPEN_BRACKET ||
|
|
36
|
+
nodePoints[i + 3].codePoint !== AsciiCodePoint.UPPERCASE_C ||
|
|
37
|
+
nodePoints[i + 4].codePoint !== AsciiCodePoint.UPPERCASE_D ||
|
|
38
|
+
nodePoints[i + 5].codePoint !== AsciiCodePoint.UPPERCASE_A ||
|
|
39
|
+
nodePoints[i + 6].codePoint !== AsciiCodePoint.UPPERCASE_T ||
|
|
40
|
+
nodePoints[i + 7].codePoint !== AsciiCodePoint.UPPERCASE_A ||
|
|
41
|
+
nodePoints[i + 8].codePoint !== AsciiCodePoint.OPEN_BRACKET
|
|
42
|
+
)
|
|
43
|
+
return null
|
|
44
|
+
|
|
45
|
+
const si = i + 9
|
|
46
|
+
for (i = si; i < endIndex; ++i) {
|
|
47
|
+
const p = nodePoints[i]
|
|
48
|
+
if (p.codePoint !== AsciiCodePoint.CLOSE_BRACKET) continue
|
|
49
|
+
if (i + 2 >= endIndex) return null
|
|
50
|
+
if (
|
|
51
|
+
nodePoints[i + 1].codePoint === AsciiCodePoint.CLOSE_BRACKET &&
|
|
52
|
+
nodePoints[i + 2].codePoint === AsciiCodePoint.CLOSE_ANGLE
|
|
53
|
+
) {
|
|
54
|
+
const delimiter: IHtmlInlineCDataDelimiter = {
|
|
55
|
+
type: 'full',
|
|
56
|
+
startIndex,
|
|
57
|
+
endIndex: i + 3,
|
|
58
|
+
htmlType: 'cdata',
|
|
59
|
+
}
|
|
60
|
+
return delimiter
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return null
|
|
64
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import type { INodeInterval, INodePoint } from '@yozora/character'
|
|
2
|
+
import { AsciiCodePoint } from '@yozora/character'
|
|
3
|
+
import type { IYastTokenDelimiter } from '@yozora/core-tokenizer'
|
|
4
|
+
import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
|
|
5
|
+
import { eatHTMLTagName } from '@yozora/tokenizer-html-block'
|
|
6
|
+
|
|
7
|
+
export interface IHtmlInlineClosingTagData {
|
|
8
|
+
htmlType: 'closing'
|
|
9
|
+
tagName: string
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface IHtmlInlineClosingTokenData {
|
|
13
|
+
htmlType: 'closing'
|
|
14
|
+
tagName: INodeInterval
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface IHtmlInlineClosingDelimiter
|
|
18
|
+
extends IYastTokenDelimiter,
|
|
19
|
+
IHtmlInlineClosingTokenData {
|
|
20
|
+
type: 'full'
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* A closing tag consists of the string '</', a tag name, optional whitespace,
|
|
25
|
+
* and the character '>'.
|
|
26
|
+
*
|
|
27
|
+
* @param nodePoints
|
|
28
|
+
* @param startIndex
|
|
29
|
+
* @param endIndex
|
|
30
|
+
* @see https://github.github.com/gfm/#closing-tag
|
|
31
|
+
*/
|
|
32
|
+
export function eatHtmlInlineClosingDelimiter(
|
|
33
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
34
|
+
startIndex: number,
|
|
35
|
+
endIndex: number,
|
|
36
|
+
): IHtmlInlineClosingDelimiter | null {
|
|
37
|
+
let i = startIndex
|
|
38
|
+
if (i + 3 >= endIndex || nodePoints[i + 1].codePoint !== AsciiCodePoint.SLASH) return null
|
|
39
|
+
|
|
40
|
+
const tagNameStartIndex = i + 2
|
|
41
|
+
const tagNameEndIndex = eatHTMLTagName(nodePoints, tagNameStartIndex, endIndex)
|
|
42
|
+
if (tagNameEndIndex == null) return null
|
|
43
|
+
|
|
44
|
+
i = eatOptionalWhitespaces(nodePoints, tagNameEndIndex, endIndex)
|
|
45
|
+
if (i >= endIndex || nodePoints[i].codePoint !== AsciiCodePoint.CLOSE_ANGLE) return null
|
|
46
|
+
|
|
47
|
+
const delimiter: IHtmlInlineClosingDelimiter = {
|
|
48
|
+
type: 'full',
|
|
49
|
+
startIndex,
|
|
50
|
+
endIndex: i + 1,
|
|
51
|
+
htmlType: 'closing',
|
|
52
|
+
tagName: {
|
|
53
|
+
startIndex: tagNameStartIndex,
|
|
54
|
+
endIndex: tagNameEndIndex,
|
|
55
|
+
},
|
|
56
|
+
}
|
|
57
|
+
return delimiter
|
|
58
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import type { INodePoint } from '@yozora/character'
|
|
2
|
+
import { AsciiCodePoint } from '@yozora/character'
|
|
3
|
+
import type { IYastTokenDelimiter } from '@yozora/core-tokenizer'
|
|
4
|
+
|
|
5
|
+
export interface IHtmlInlineCommentData {
|
|
6
|
+
htmlType: 'comment'
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export interface IHtmlInlineCommentTokenData {
|
|
10
|
+
htmlType: 'comment'
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface IHtmlInlineCommentDelimiter
|
|
14
|
+
extends IYastTokenDelimiter,
|
|
15
|
+
IHtmlInlineCommentTokenData {
|
|
16
|
+
type: 'full'
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* An HTML comment consists of `<!--` + text + `-->`, where text does not start
|
|
21
|
+
* with `>` or `->`, does not end with `-`, and does not contain `--`.
|
|
22
|
+
*
|
|
23
|
+
* @param nodePoints
|
|
24
|
+
* @param startIndex
|
|
25
|
+
* @param endIndex
|
|
26
|
+
* @see https://github.github.com/gfm/#html-comment
|
|
27
|
+
*/
|
|
28
|
+
export function eatHtmlInlineCommentDelimiter(
|
|
29
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
30
|
+
startIndex: number,
|
|
31
|
+
endIndex: number,
|
|
32
|
+
): IHtmlInlineCommentDelimiter | null {
|
|
33
|
+
let i = startIndex
|
|
34
|
+
if (
|
|
35
|
+
i + 6 >= endIndex ||
|
|
36
|
+
nodePoints[i + 1].codePoint !== AsciiCodePoint.EXCLAMATION_MARK ||
|
|
37
|
+
nodePoints[i + 2].codePoint !== AsciiCodePoint.MINUS_SIGN ||
|
|
38
|
+
nodePoints[i + 3].codePoint !== AsciiCodePoint.MINUS_SIGN
|
|
39
|
+
)
|
|
40
|
+
return null
|
|
41
|
+
|
|
42
|
+
// text dose not start with '>'
|
|
43
|
+
if (nodePoints[i + 4].codePoint === AsciiCodePoint.CLOSE_ANGLE) return null
|
|
44
|
+
|
|
45
|
+
// text dose not start with '->', and does not end with -
|
|
46
|
+
if (
|
|
47
|
+
nodePoints[i + 4].codePoint === AsciiCodePoint.MINUS_SIGN &&
|
|
48
|
+
nodePoints[i + 5].codePoint === AsciiCodePoint.CLOSE_ANGLE
|
|
49
|
+
)
|
|
50
|
+
return null
|
|
51
|
+
|
|
52
|
+
const si = i + 4
|
|
53
|
+
for (i = si; i < endIndex; ++i) {
|
|
54
|
+
const p = nodePoints[i]
|
|
55
|
+
if (p.codePoint !== AsciiCodePoint.MINUS_SIGN) continue
|
|
56
|
+
|
|
57
|
+
let hyphenCount = 1
|
|
58
|
+
for (; i + hyphenCount < endIndex; hyphenCount += 1) {
|
|
59
|
+
const q = nodePoints[i + hyphenCount]
|
|
60
|
+
if (q.codePoint !== AsciiCodePoint.MINUS_SIGN) break
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Single hyphen is allowed.
|
|
65
|
+
* @see https://github.github.com/gfm/#example-644
|
|
66
|
+
*/
|
|
67
|
+
if (hyphenCount < 2) continue
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* text does not contain '--' and does not end with -
|
|
71
|
+
* @see https://github.github.com/gfm/#example-645
|
|
72
|
+
*/
|
|
73
|
+
if (
|
|
74
|
+
hyphenCount > 2 ||
|
|
75
|
+
i + 2 >= endIndex ||
|
|
76
|
+
nodePoints[i + 2].codePoint !== AsciiCodePoint.CLOSE_ANGLE
|
|
77
|
+
)
|
|
78
|
+
return null
|
|
79
|
+
|
|
80
|
+
const delimiter: IHtmlInlineCommentDelimiter = {
|
|
81
|
+
type: 'full',
|
|
82
|
+
startIndex,
|
|
83
|
+
endIndex: i + 3,
|
|
84
|
+
htmlType: 'comment',
|
|
85
|
+
}
|
|
86
|
+
return delimiter
|
|
87
|
+
}
|
|
88
|
+
return null
|
|
89
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import type { INodeInterval, INodePoint } from '@yozora/character'
|
|
2
|
+
import { AsciiCodePoint, isAsciiUpperLetter, isWhitespaceCharacter } from '@yozora/character'
|
|
3
|
+
import type { IYastTokenDelimiter } from '@yozora/core-tokenizer'
|
|
4
|
+
|
|
5
|
+
export interface IHtmlInlineDeclarationData {
|
|
6
|
+
htmlType: 'declaration'
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export interface IHtmlInlineDeclarationTokenData {
|
|
10
|
+
htmlType: 'declaration'
|
|
11
|
+
tagName: INodeInterval
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface IHtmlInlineDeclarationDelimiter
|
|
15
|
+
extends IYastTokenDelimiter,
|
|
16
|
+
IHtmlInlineDeclarationTokenData {
|
|
17
|
+
type: 'full'
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* A declaration consists of the string `<!`, a name consisting of one or more
|
|
22
|
+
* uppercase ASCII letters, whitespace, a string of characters not including
|
|
23
|
+
* the character `>`, and the character `>`.
|
|
24
|
+
*
|
|
25
|
+
* @param nodePoints
|
|
26
|
+
* @param startIndex
|
|
27
|
+
* @param endIndex
|
|
28
|
+
* @see https://github.github.com/gfm/#declaration
|
|
29
|
+
*/
|
|
30
|
+
export function eatHtmlInlineDeclarationDelimiter(
|
|
31
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
32
|
+
startIndex: number,
|
|
33
|
+
endIndex: number,
|
|
34
|
+
): IHtmlInlineDeclarationDelimiter | null {
|
|
35
|
+
let i = startIndex
|
|
36
|
+
if (i + 4 >= endIndex || nodePoints[i + 1].codePoint !== AsciiCodePoint.EXCLAMATION_MARK)
|
|
37
|
+
return null
|
|
38
|
+
|
|
39
|
+
const tagNameStartIndex = i + 2
|
|
40
|
+
|
|
41
|
+
// Try to eating a declaration name.
|
|
42
|
+
for (i = tagNameStartIndex; i < endIndex; ++i) {
|
|
43
|
+
const p = nodePoints[i]
|
|
44
|
+
if (!isAsciiUpperLetter(p.codePoint)) break
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* If no uppercase name or a following whitespace exists,
|
|
49
|
+
* then it's not a valid declaration.
|
|
50
|
+
*/
|
|
51
|
+
if (
|
|
52
|
+
i - tagNameStartIndex <= 0 ||
|
|
53
|
+
i + 1 >= endIndex ||
|
|
54
|
+
!isWhitespaceCharacter(nodePoints[i].codePoint)
|
|
55
|
+
)
|
|
56
|
+
return null
|
|
57
|
+
|
|
58
|
+
const tagNameEndIndex = i,
|
|
59
|
+
si = i + 1
|
|
60
|
+
for (i = si; i < endIndex; ++i) {
|
|
61
|
+
const p = nodePoints[i]
|
|
62
|
+
if (p.codePoint === AsciiCodePoint.CLOSE_ANGLE) {
|
|
63
|
+
const delimiter: IHtmlInlineDeclarationDelimiter = {
|
|
64
|
+
type: 'full',
|
|
65
|
+
startIndex,
|
|
66
|
+
endIndex: i + 1,
|
|
67
|
+
htmlType: 'declaration',
|
|
68
|
+
tagName: {
|
|
69
|
+
startIndex: tagNameStartIndex,
|
|
70
|
+
endIndex: tagNameEndIndex,
|
|
71
|
+
},
|
|
72
|
+
}
|
|
73
|
+
return delimiter
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return null
|
|
77
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import type { INodePoint } from '@yozora/character'
|
|
2
|
+
import { AsciiCodePoint } from '@yozora/character'
|
|
3
|
+
import type { IYastTokenDelimiter } from '@yozora/core-tokenizer'
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
*
|
|
7
|
+
* @see https://github.github.com/gfm/#processing-instruction
|
|
8
|
+
*/
|
|
9
|
+
export interface IHtmlInlineInstructionData {
|
|
10
|
+
htmlType: 'instruction'
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface IHtmlInlineInstructionTokenData {
|
|
14
|
+
htmlType: 'instruction'
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface IHtmlInlineInstructionDelimiter
|
|
18
|
+
extends IYastTokenDelimiter,
|
|
19
|
+
IHtmlInlineInstructionTokenData {
|
|
20
|
+
type: 'full'
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* A processing instruction consists of the string `<?`, a string of characters
|
|
25
|
+
* not including the string `?>`, and the string `?>`.
|
|
26
|
+
*
|
|
27
|
+
* @param nodePoints
|
|
28
|
+
* @param startIndex
|
|
29
|
+
* @param endIndex
|
|
30
|
+
* @see https://github.github.com/gfm/#processing-instruction
|
|
31
|
+
*/
|
|
32
|
+
export function eatHtmlInlineInstructionDelimiter(
|
|
33
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
34
|
+
startIndex: number,
|
|
35
|
+
endIndex: number,
|
|
36
|
+
): IHtmlInlineInstructionDelimiter | null {
|
|
37
|
+
let i = startIndex
|
|
38
|
+
if (i + 3 >= endIndex || nodePoints[i + 1].codePoint !== AsciiCodePoint.QUESTION_MARK) return null
|
|
39
|
+
|
|
40
|
+
const si = i + 2
|
|
41
|
+
for (i = si; i < endIndex; ++i) {
|
|
42
|
+
const p = nodePoints[i]
|
|
43
|
+
if (p.codePoint !== AsciiCodePoint.QUESTION_MARK) continue
|
|
44
|
+
if (i + 1 >= endIndex) return null
|
|
45
|
+
if (nodePoints[i + 1].codePoint === AsciiCodePoint.CLOSE_ANGLE) {
|
|
46
|
+
const delimiter: IHtmlInlineInstructionDelimiter = {
|
|
47
|
+
type: 'full',
|
|
48
|
+
startIndex,
|
|
49
|
+
endIndex: i + 2,
|
|
50
|
+
htmlType: 'instruction',
|
|
51
|
+
}
|
|
52
|
+
return delimiter
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return null
|
|
56
|
+
}
|
package/src/util/open.ts
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import type { INodeInterval, INodePoint } from '@yozora/character'
|
|
2
|
+
import { AsciiCodePoint } from '@yozora/character'
|
|
3
|
+
import type { IYastTokenDelimiter } from '@yozora/core-tokenizer'
|
|
4
|
+
import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
|
|
5
|
+
import type { RawHTMLAttribute } from '@yozora/tokenizer-html-block'
|
|
6
|
+
import { eatHTMLAttribute, eatHTMLTagName } from '@yozora/tokenizer-html-block'
|
|
7
|
+
|
|
8
|
+
export interface IHtmlInlineOpenTagData {
|
|
9
|
+
htmlType: 'open'
|
|
10
|
+
/**
|
|
11
|
+
* HTML tag name.
|
|
12
|
+
*/
|
|
13
|
+
tagName: string
|
|
14
|
+
/**
|
|
15
|
+
* HTML attributes.
|
|
16
|
+
*/
|
|
17
|
+
attributes: Array<{ name: string; value?: string }>
|
|
18
|
+
/**
|
|
19
|
+
* Whether if a html tag is self closed.
|
|
20
|
+
*/
|
|
21
|
+
selfClosed: boolean
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface IHtmlInlineOpenTokenData {
|
|
25
|
+
htmlType: 'open'
|
|
26
|
+
tagName: INodeInterval
|
|
27
|
+
attributes: RawHTMLAttribute[]
|
|
28
|
+
selfClosed: boolean
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface IHtmlInlineOpenDelimiter extends IYastTokenDelimiter, IHtmlInlineOpenTokenData {
|
|
32
|
+
type: 'full'
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* An open tag consists of a '<' character, a tag name, zero or more attributes,
|
|
37
|
+
* optional whitespace, an optional '/' character, and a '>' character.
|
|
38
|
+
*
|
|
39
|
+
* @param nodePoints
|
|
40
|
+
* @param startIndex
|
|
41
|
+
* @param endIndex
|
|
42
|
+
* @see https://github.github.com/gfm/#open-tag
|
|
43
|
+
*/
|
|
44
|
+
export function eatHtmlInlineTokenOpenDelimiter(
|
|
45
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
46
|
+
startIndex: number,
|
|
47
|
+
endIndex: number,
|
|
48
|
+
): IHtmlInlineOpenDelimiter | null {
|
|
49
|
+
let i = startIndex
|
|
50
|
+
if (i + 2 >= endIndex) return null
|
|
51
|
+
|
|
52
|
+
const tagNameStartIndex = i + 1
|
|
53
|
+
const tagNameEndIndex = eatHTMLTagName(nodePoints, tagNameStartIndex, endIndex)
|
|
54
|
+
if (tagNameEndIndex == null) return null
|
|
55
|
+
|
|
56
|
+
const attributes: RawHTMLAttribute[] = []
|
|
57
|
+
for (i = tagNameEndIndex; i < endIndex; ) {
|
|
58
|
+
const result = eatHTMLAttribute(nodePoints, i, endIndex)
|
|
59
|
+
if (result == null) break
|
|
60
|
+
attributes.push(result.attribute)
|
|
61
|
+
i = result.nextIndex
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
i = eatOptionalWhitespaces(nodePoints, i, endIndex)
|
|
65
|
+
if (i >= endIndex) return null
|
|
66
|
+
|
|
67
|
+
let selfClosed = false
|
|
68
|
+
if (nodePoints[i].codePoint === AsciiCodePoint.SLASH) {
|
|
69
|
+
i += 1
|
|
70
|
+
selfClosed = true
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (i >= endIndex || nodePoints[i].codePoint !== AsciiCodePoint.CLOSE_ANGLE) return null
|
|
74
|
+
|
|
75
|
+
const delimiter: IHtmlInlineOpenDelimiter = {
|
|
76
|
+
type: 'full',
|
|
77
|
+
startIndex,
|
|
78
|
+
endIndex: i + 1,
|
|
79
|
+
htmlType: 'open',
|
|
80
|
+
tagName: {
|
|
81
|
+
startIndex: tagNameStartIndex,
|
|
82
|
+
endIndex: tagNameEndIndex,
|
|
83
|
+
},
|
|
84
|
+
attributes,
|
|
85
|
+
selfClosed,
|
|
86
|
+
}
|
|
87
|
+
return delimiter
|
|
88
|
+
}
|