@yozora/tokenizer-heading 2.0.4 → 2.0.5-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/{index.js → index.cjs} +5 -6
- package/lib/esm/{index.js → index.mjs} +4 -5
- package/lib/types/index.d.ts +4 -4
- package/package.json +18 -14
- package/src/index.ts +9 -0
- package/src/match.ts +101 -0
- package/src/parse.ts +79 -0
- package/src/tokenizer.ts +32 -0
- package/src/types.ts +26 -0
|
@@ -97,18 +97,17 @@ const uniqueName = '@yozora/tokenizer-heading';
|
|
|
97
97
|
|
|
98
98
|
class HeadingTokenizer extends coreTokenizer.BaseBlockTokenizer {
|
|
99
99
|
constructor(props = {}) {
|
|
100
|
-
var _a, _b;
|
|
101
100
|
super({
|
|
102
|
-
name:
|
|
103
|
-
priority:
|
|
101
|
+
name: props.name ?? uniqueName,
|
|
102
|
+
priority: props.priority ?? coreTokenizer.TokenizerPriority.ATOMIC,
|
|
104
103
|
});
|
|
105
|
-
this.match = match;
|
|
106
|
-
this.parse = parse;
|
|
107
104
|
}
|
|
105
|
+
match = match;
|
|
106
|
+
parse = parse;
|
|
108
107
|
}
|
|
109
108
|
|
|
110
109
|
exports.HeadingTokenizer = HeadingTokenizer;
|
|
111
110
|
exports.HeadingTokenizerName = uniqueName;
|
|
112
|
-
exports
|
|
111
|
+
exports.default = HeadingTokenizer;
|
|
113
112
|
exports.headingMatch = match;
|
|
114
113
|
exports.headingParse = parse;
|
|
@@ -93,14 +93,13 @@ const uniqueName = '@yozora/tokenizer-heading';
|
|
|
93
93
|
|
|
94
94
|
class HeadingTokenizer extends BaseBlockTokenizer {
|
|
95
95
|
constructor(props = {}) {
|
|
96
|
-
var _a, _b;
|
|
97
96
|
super({
|
|
98
|
-
name:
|
|
99
|
-
priority:
|
|
97
|
+
name: props.name ?? uniqueName,
|
|
98
|
+
priority: props.priority ?? TokenizerPriority.ATOMIC,
|
|
100
99
|
});
|
|
101
|
-
this.match = match;
|
|
102
|
-
this.parse = parse;
|
|
103
100
|
}
|
|
101
|
+
match = match;
|
|
102
|
+
parse = parse;
|
|
104
103
|
}
|
|
105
104
|
|
|
106
105
|
export { HeadingTokenizer, uniqueName as HeadingTokenizerName, HeadingTokenizer as default, match as headingMatch, parse as headingParse };
|
package/lib/types/index.d.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { IPartialYastBlockToken, IPhrasingContentLine, ITokenizer, IBaseBlockTokenizerProps, IMatchBlockHookCreator, IParseBlockHookCreator, BaseBlockTokenizer, IBlockTokenizer } from '@yozora/core-tokenizer';
|
|
2
2
|
import { HeadingType, Heading } from '@yozora/ast';
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
type T = HeadingType;
|
|
5
|
+
type INode = Heading;
|
|
6
6
|
declare const uniqueName = "@yozora/tokenizer-heading";
|
|
7
7
|
interface IToken extends IPartialYastBlockToken<T> {
|
|
8
8
|
/**
|
|
@@ -14,8 +14,8 @@ interface IToken extends IPartialYastBlockToken<T> {
|
|
|
14
14
|
*/
|
|
15
15
|
line: Readonly<IPhrasingContentLine>;
|
|
16
16
|
}
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
type IThis = ITokenizer;
|
|
18
|
+
type ITokenizerProps = Partial<IBaseBlockTokenizerProps>;
|
|
19
19
|
|
|
20
20
|
/**
|
|
21
21
|
* An ATX heading consists of a string of characters, parsed as inline content,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yozora/tokenizer-heading",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.5-alpha.0",
|
|
4
4
|
"author": {
|
|
5
5
|
"name": "guanghechen",
|
|
6
6
|
"url": "https://github.com/guanghechen/"
|
|
@@ -11,33 +11,37 @@
|
|
|
11
11
|
"directory": "tokenizers/heading"
|
|
12
12
|
},
|
|
13
13
|
"homepage": "https://github.com/yozorajs/yozora/tree/release-2.x.x/tokenizers/heading",
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
|
|
17
|
-
|
|
14
|
+
"type": "module",
|
|
15
|
+
"exports": {
|
|
16
|
+
"types": "./lib/types/index.d.ts",
|
|
17
|
+
"import": "./lib/esm/index.mjs",
|
|
18
|
+
"require": "./lib/cjs/index.cjs"
|
|
19
|
+
},
|
|
20
|
+
"source": "./src/index.ts",
|
|
21
|
+
"types": "./lib/types/index.d.ts",
|
|
22
|
+
"main": "./lib/cjs/index.cjs",
|
|
23
|
+
"module": "./lib/esm/index.mjs",
|
|
18
24
|
"license": "MIT",
|
|
19
25
|
"engines": {
|
|
20
26
|
"node": ">= 16.0.0"
|
|
21
27
|
},
|
|
22
28
|
"files": [
|
|
23
29
|
"lib/",
|
|
24
|
-
"
|
|
25
|
-
"!lib/**/*.d.ts.map",
|
|
30
|
+
"src/",
|
|
26
31
|
"package.json",
|
|
27
32
|
"CHANGELOG.md",
|
|
28
33
|
"LICENSE",
|
|
29
34
|
"README.md"
|
|
30
35
|
],
|
|
31
36
|
"scripts": {
|
|
32
|
-
"build": "cross-env NODE_ENV=production rollup -c ../../rollup.config.
|
|
33
|
-
"prebuild": "rimraf lib/",
|
|
37
|
+
"build": "rimraf lib/ && cross-env NODE_ENV=production rollup -c ../../rollup.config.mjs",
|
|
34
38
|
"prepublishOnly": "cross-env ROLLUP_SHOULD_SOURCEMAP=false yarn build",
|
|
35
|
-
"test": "cross-env TS_NODE_FILES=true jest --config ../../jest.config.
|
|
39
|
+
"test": "cross-env TS_NODE_FILES=true NODE_OPTIONS=--experimental-vm-modules jest --config ../../jest.config.mjs --rootDir ."
|
|
36
40
|
},
|
|
37
41
|
"dependencies": {
|
|
38
|
-
"@yozora/ast": "^2.0.
|
|
39
|
-
"@yozora/character": "^2.0.
|
|
40
|
-
"@yozora/core-tokenizer": "^2.0.
|
|
42
|
+
"@yozora/ast": "^2.0.5-alpha.0",
|
|
43
|
+
"@yozora/character": "^2.0.5-alpha.0",
|
|
44
|
+
"@yozora/core-tokenizer": "^2.0.5-alpha.0"
|
|
41
45
|
},
|
|
42
|
-
"gitHead": "
|
|
46
|
+
"gitHead": "8bf941fe4ef82947165b0f3cc123cd493665e13b"
|
|
43
47
|
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export { match as headingMatch } from './match'
|
|
2
|
+
export { parse as headingParse } from './parse'
|
|
3
|
+
export { HeadingTokenizer, HeadingTokenizer as default } from './tokenizer'
|
|
4
|
+
export { uniqueName as HeadingTokenizerName } from './types'
|
|
5
|
+
export type {
|
|
6
|
+
IThis as IHeadingHookContext,
|
|
7
|
+
IToken as IHeadingToken,
|
|
8
|
+
ITokenizerProps as IHeadingTokenizerProps,
|
|
9
|
+
} from './types'
|
package/src/match.ts
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import { HeadingType } from '@yozora/ast'
|
|
2
|
+
import { AsciiCodePoint, isSpaceCharacter } from '@yozora/character'
|
|
3
|
+
import type {
|
|
4
|
+
IMatchBlockHookCreator,
|
|
5
|
+
IPhrasingContentLine,
|
|
6
|
+
IResultOfEatAndInterruptPreviousSibling,
|
|
7
|
+
IResultOfEatOpener,
|
|
8
|
+
IYastBlockToken,
|
|
9
|
+
} from '@yozora/core-tokenizer'
|
|
10
|
+
import { calcEndPoint, calcStartPoint, eatOptionalCharacters } from '@yozora/core-tokenizer'
|
|
11
|
+
import type { IThis, IToken, T } from './types'
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* An ATX heading consists of a string of characters, parsed as inline content,
|
|
15
|
+
* between an opening sequence of 1–6 unescaped '#' characters and an optional
|
|
16
|
+
* closing sequence of any number of unescaped '#' characters. The opening
|
|
17
|
+
* sequence of '#' characters must be followed by a space or by the end of line.
|
|
18
|
+
* The optional closing sequence of #s must be preceded by a space and may be
|
|
19
|
+
* followed by spaces only. The opening # character may be indented 0-3 spaces.
|
|
20
|
+
* The raw contents of the heading are stripped of leading and trailing spaces
|
|
21
|
+
* before being parsed as inline content. The heading level is equal to the
|
|
22
|
+
* number of '#' characters in the opening sequence.
|
|
23
|
+
*
|
|
24
|
+
* @see https://github.com/syntax-tree/mdast#heading
|
|
25
|
+
* @see https://github.github.com/gfm/#atx-heading
|
|
26
|
+
*/
|
|
27
|
+
export const match: IMatchBlockHookCreator<T, IToken, IThis> = function () {
|
|
28
|
+
return {
|
|
29
|
+
isContainingBlock: false,
|
|
30
|
+
eatOpener,
|
|
31
|
+
eatAndInterruptPreviousSibling,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function eatOpener(line: Readonly<IPhrasingContentLine>): IResultOfEatOpener<T, IToken> {
|
|
35
|
+
/**
|
|
36
|
+
* Four spaces are too much
|
|
37
|
+
* @see https://github.github.com/gfm/#example-39
|
|
38
|
+
* @see https://github.github.com/gfm/#example-40
|
|
39
|
+
*/
|
|
40
|
+
if (line.countOfPrecedeSpaces >= 4) return null
|
|
41
|
+
|
|
42
|
+
const { nodePoints, startIndex, endIndex, firstNonWhitespaceIndex } = line
|
|
43
|
+
if (
|
|
44
|
+
firstNonWhitespaceIndex >= endIndex ||
|
|
45
|
+
nodePoints[firstNonWhitespaceIndex].codePoint !== AsciiCodePoint.NUMBER_SIGN
|
|
46
|
+
) {
|
|
47
|
+
return null
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const i = eatOptionalCharacters(
|
|
51
|
+
nodePoints,
|
|
52
|
+
firstNonWhitespaceIndex + 1,
|
|
53
|
+
endIndex,
|
|
54
|
+
AsciiCodePoint.NUMBER_SIGN,
|
|
55
|
+
)
|
|
56
|
+
const depth: number = i - firstNonWhitespaceIndex
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* More than six '#' characters is not a heading
|
|
60
|
+
* @see https://github.github.com/gfm/#example-33
|
|
61
|
+
*/
|
|
62
|
+
if (depth > 6) return null
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* At least one space is required between the '#' characters and the
|
|
66
|
+
* heading’s contents, unless the heading is empty. Note that many
|
|
67
|
+
* implementations currently do not require the space. However, the space
|
|
68
|
+
* was required by the original ATX implementation, and it helps prevent
|
|
69
|
+
* things like the following from being parsed as headings:
|
|
70
|
+
*
|
|
71
|
+
* ATX headings can be empty
|
|
72
|
+
* @see https://github.github.com/gfm/#example-49
|
|
73
|
+
*/
|
|
74
|
+
if (i + 1 < endIndex && !isSpaceCharacter(nodePoints[i].codePoint)) return null
|
|
75
|
+
|
|
76
|
+
const nextIndex = endIndex
|
|
77
|
+
const token: IToken = {
|
|
78
|
+
nodeType: HeadingType,
|
|
79
|
+
position: {
|
|
80
|
+
start: calcStartPoint(nodePoints, startIndex),
|
|
81
|
+
end: calcEndPoint(nodePoints, nextIndex - 1),
|
|
82
|
+
},
|
|
83
|
+
depth: depth as IToken['depth'],
|
|
84
|
+
line,
|
|
85
|
+
}
|
|
86
|
+
return { token, nextIndex, saturated: true }
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function eatAndInterruptPreviousSibling(
|
|
90
|
+
line: Readonly<IPhrasingContentLine>,
|
|
91
|
+
prevSiblingToken: Readonly<IYastBlockToken>,
|
|
92
|
+
): IResultOfEatAndInterruptPreviousSibling<T, IToken> {
|
|
93
|
+
const result = eatOpener(line)
|
|
94
|
+
if (result == null) return null
|
|
95
|
+
return {
|
|
96
|
+
token: result.token,
|
|
97
|
+
nextIndex: result.nextIndex,
|
|
98
|
+
remainingSibling: prevSiblingToken,
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
package/src/parse.ts
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import type { Node } from '@yozora/ast'
|
|
2
|
+
import { HeadingType } from '@yozora/ast'
|
|
3
|
+
import type { INodePoint } from '@yozora/character'
|
|
4
|
+
import {
|
|
5
|
+
AsciiCodePoint,
|
|
6
|
+
calcTrimBoundaryOfCodePoints,
|
|
7
|
+
isWhitespaceCharacter,
|
|
8
|
+
} from '@yozora/character'
|
|
9
|
+
import type { IParseBlockHookCreator, IPhrasingContentLine } from '@yozora/core-tokenizer'
|
|
10
|
+
import { mergeAndStripContentLines } from '@yozora/core-tokenizer'
|
|
11
|
+
import type { INode, IThis, IToken, T } from './types'
|
|
12
|
+
|
|
13
|
+
export const parse: IParseBlockHookCreator<T, IToken, INode, IThis> = function (api) {
|
|
14
|
+
return {
|
|
15
|
+
parse: tokens =>
|
|
16
|
+
tokens.map(token => {
|
|
17
|
+
const { nodePoints, firstNonWhitespaceIndex, endIndex } = token.line
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Leading and trailing whitespace is ignored in parsing inline content
|
|
21
|
+
* Spaces are allowed after the closing sequence
|
|
22
|
+
* @see https://github.github.com/gfm/#example-37
|
|
23
|
+
* @see https://github.github.com/gfm/#example-43
|
|
24
|
+
*/
|
|
25
|
+
// eslint-disable-next-line prefer-const
|
|
26
|
+
let [leftIndex, rightIndex] = calcTrimBoundaryOfCodePoints(
|
|
27
|
+
nodePoints,
|
|
28
|
+
firstNonWhitespaceIndex + token.depth,
|
|
29
|
+
endIndex,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* A closing sequence of '#' characters is optional
|
|
34
|
+
* It need not be the same length as the opening sequence
|
|
35
|
+
* @see https://github.github.com/gfm/#example-41
|
|
36
|
+
* @see https://github.github.com/gfm/#example-42
|
|
37
|
+
* @see https://github.github.com/gfm/#example-44
|
|
38
|
+
*/
|
|
39
|
+
let closeCharCount = 0
|
|
40
|
+
for (let j = rightIndex - 1; j >= leftIndex; --j) {
|
|
41
|
+
const c = nodePoints[j].codePoint
|
|
42
|
+
if (c !== AsciiCodePoint.NUMBER_SIGN) break
|
|
43
|
+
closeCharCount += 1
|
|
44
|
+
}
|
|
45
|
+
if (closeCharCount > 0) {
|
|
46
|
+
let spaceCount = 0,
|
|
47
|
+
j = rightIndex - 1 - closeCharCount
|
|
48
|
+
for (; j >= leftIndex; --j) {
|
|
49
|
+
const c = nodePoints[j].codePoint
|
|
50
|
+
if (!isWhitespaceCharacter(c)) break
|
|
51
|
+
spaceCount += 1
|
|
52
|
+
}
|
|
53
|
+
if (spaceCount > 0 || j < leftIndex) {
|
|
54
|
+
rightIndex -= closeCharCount + spaceCount
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Resolve phrasing content.
|
|
59
|
+
const lines: IPhrasingContentLine[] = [
|
|
60
|
+
{
|
|
61
|
+
nodePoints,
|
|
62
|
+
startIndex: leftIndex,
|
|
63
|
+
endIndex: rightIndex,
|
|
64
|
+
firstNonWhitespaceIndex: leftIndex,
|
|
65
|
+
countOfPrecedeSpaces: 0,
|
|
66
|
+
},
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
// Resolve phrasing content.
|
|
70
|
+
const contents: INodePoint[] = mergeAndStripContentLines(lines)
|
|
71
|
+
const children: Node[] = api.processInlines(contents)
|
|
72
|
+
|
|
73
|
+
const node: INode = api.shouldReservePosition
|
|
74
|
+
? { type: HeadingType, position: token.position, depth: token.depth, children }
|
|
75
|
+
: { type: HeadingType, depth: token.depth, children }
|
|
76
|
+
return node
|
|
77
|
+
}),
|
|
78
|
+
}
|
|
79
|
+
}
|
package/src/tokenizer.ts
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
IBlockTokenizer,
|
|
3
|
+
IMatchBlockHookCreator,
|
|
4
|
+
IParseBlockHookCreator,
|
|
5
|
+
} from '@yozora/core-tokenizer'
|
|
6
|
+
import { BaseBlockTokenizer, TokenizerPriority } from '@yozora/core-tokenizer'
|
|
7
|
+
import { match } from './match'
|
|
8
|
+
import { parse } from './parse'
|
|
9
|
+
import type { INode, IThis, IToken, ITokenizerProps, T } from './types'
|
|
10
|
+
import { uniqueName } from './types'
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Lexical Analyzer for Heading.
|
|
14
|
+
* @see https://github.com/syntax-tree/mdast#heading
|
|
15
|
+
* @see https://github.github.com/gfm/#atx-heading
|
|
16
|
+
*/
|
|
17
|
+
export class HeadingTokenizer
|
|
18
|
+
extends BaseBlockTokenizer<T, IToken, INode, IThis>
|
|
19
|
+
implements IBlockTokenizer<T, IToken, INode, IThis>
|
|
20
|
+
{
|
|
21
|
+
/* istanbul ignore next */
|
|
22
|
+
constructor(props: ITokenizerProps = {}) {
|
|
23
|
+
super({
|
|
24
|
+
name: props.name ?? uniqueName,
|
|
25
|
+
priority: props.priority ?? TokenizerPriority.ATOMIC,
|
|
26
|
+
})
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
public override readonly match: IMatchBlockHookCreator<T, IToken, IThis> = match
|
|
30
|
+
|
|
31
|
+
public override readonly parse: IParseBlockHookCreator<T, IToken, INode, IThis> = parse
|
|
32
|
+
}
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { Heading, HeadingType } from '@yozora/ast'
|
|
2
|
+
import type {
|
|
3
|
+
IBaseBlockTokenizerProps,
|
|
4
|
+
IPartialYastBlockToken,
|
|
5
|
+
IPhrasingContentLine,
|
|
6
|
+
ITokenizer,
|
|
7
|
+
} from '@yozora/core-tokenizer'
|
|
8
|
+
|
|
9
|
+
export type T = HeadingType
|
|
10
|
+
export type INode = Heading
|
|
11
|
+
export const uniqueName = '@yozora/tokenizer-heading'
|
|
12
|
+
|
|
13
|
+
export interface IToken extends IPartialYastBlockToken<T> {
|
|
14
|
+
/**
|
|
15
|
+
* Level of heading
|
|
16
|
+
*/
|
|
17
|
+
depth: 1 | 2 | 3 | 4 | 5 | 6
|
|
18
|
+
/**
|
|
19
|
+
* Contents
|
|
20
|
+
*/
|
|
21
|
+
line: Readonly<IPhrasingContentLine>
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export type IThis = ITokenizer
|
|
25
|
+
|
|
26
|
+
export type ITokenizerProps = Partial<IBaseBlockTokenizerProps>
|