@yozora/tokenizer-definition 2.1.3 → 2.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +598 -0
- package/README.md +42 -42
- package/package.json +6 -6
- package/src/index.ts +0 -12
- package/src/match.ts +0 -340
- package/src/parse.ts +0 -50
- package/src/tokenizer.ts +0 -31
- package/src/types.ts +0 -58
- package/src/util/link-destination.ts +0 -160
- package/src/util/link-label.ts +0 -116
- package/src/util/link-title.ts +0 -143
|
@@ -1,160 +0,0 @@
|
|
|
1
|
-
import type { INodePoint } from '@yozora/character'
|
|
2
|
-
import {
|
|
3
|
-
AsciiCodePoint,
|
|
4
|
-
VirtualCodePoint,
|
|
5
|
-
isAsciiControlCharacter,
|
|
6
|
-
isWhitespaceCharacter,
|
|
7
|
-
} from '@yozora/character'
|
|
8
|
-
import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* The processing token of eatAndCollectLinkDestination, used to save
|
|
12
|
-
* intermediate data to support multiple codePosition fragment processing
|
|
13
|
-
*
|
|
14
|
-
* @see https://github.github.com/gfm/#link-destination
|
|
15
|
-
*/
|
|
16
|
-
export interface ILinkDestinationCollectingState {
|
|
17
|
-
/**
|
|
18
|
-
* Whether the current token has collected a legal LinkDestination
|
|
19
|
-
*/
|
|
20
|
-
saturated: boolean
|
|
21
|
-
/**
|
|
22
|
-
* Collected token points
|
|
23
|
-
*/
|
|
24
|
-
nodePoints: INodePoint[]
|
|
25
|
-
/**
|
|
26
|
-
* Whether an opening angle bracket has been matched
|
|
27
|
-
*/
|
|
28
|
-
hasOpenAngleBracket: boolean
|
|
29
|
-
/**
|
|
30
|
-
* Number of parentheses encountered
|
|
31
|
-
*/
|
|
32
|
-
openParensCount: number
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
/**
|
|
36
|
-
*
|
|
37
|
-
* @param nodePoints
|
|
38
|
-
* @param startIndex
|
|
39
|
-
* @param endIndex
|
|
40
|
-
* @param state
|
|
41
|
-
* @see https://github.github.com/gfm/#link-destination
|
|
42
|
-
*/
|
|
43
|
-
export function eatAndCollectLinkDestination(
|
|
44
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
45
|
-
startIndex: number,
|
|
46
|
-
endIndex: number,
|
|
47
|
-
state: ILinkDestinationCollectingState | null,
|
|
48
|
-
): { nextIndex: number; state: ILinkDestinationCollectingState } {
|
|
49
|
-
let i = startIndex
|
|
50
|
-
|
|
51
|
-
// init token
|
|
52
|
-
if (state == null) {
|
|
53
|
-
// eslint-disable-next-line no-param-reassign
|
|
54
|
-
state = {
|
|
55
|
-
saturated: false,
|
|
56
|
-
nodePoints: [],
|
|
57
|
-
hasOpenAngleBracket: false,
|
|
58
|
-
openParensCount: 0,
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
/**
|
|
63
|
-
* Although link destination may span multiple lines,
|
|
64
|
-
* they may not contain a blank line.
|
|
65
|
-
*/
|
|
66
|
-
const firstNonWhitespaceIndex = eatOptionalWhitespaces(nodePoints, i, endIndex)
|
|
67
|
-
if (firstNonWhitespaceIndex >= endIndex) return { nextIndex: -1, state: state }
|
|
68
|
-
|
|
69
|
-
if (state.nodePoints.length <= 0) {
|
|
70
|
-
i = firstNonWhitespaceIndex
|
|
71
|
-
|
|
72
|
-
// check whether in pointy brackets
|
|
73
|
-
const p = nodePoints[i]
|
|
74
|
-
if (p.codePoint === AsciiCodePoint.OPEN_ANGLE) {
|
|
75
|
-
i += 1
|
|
76
|
-
// eslint-disable-next-line no-param-reassign
|
|
77
|
-
state.hasOpenAngleBracket = true
|
|
78
|
-
state.nodePoints.push(p)
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
/**
|
|
83
|
-
* In pointy brackets:
|
|
84
|
-
* - A sequence of zero or more characters between an opening '<' and
|
|
85
|
-
* a closing '>' that contains no line breaks or unescaped '<' or '>' characters
|
|
86
|
-
*/
|
|
87
|
-
if (state.hasOpenAngleBracket) {
|
|
88
|
-
for (; i < endIndex; ++i) {
|
|
89
|
-
const p = nodePoints[i]
|
|
90
|
-
switch (p.codePoint) {
|
|
91
|
-
case AsciiCodePoint.BACKSLASH:
|
|
92
|
-
if (i + 1 < endIndex) {
|
|
93
|
-
state.nodePoints.push(p)
|
|
94
|
-
state.nodePoints.push(nodePoints[i + 1])
|
|
95
|
-
}
|
|
96
|
-
i += 1
|
|
97
|
-
break
|
|
98
|
-
case AsciiCodePoint.OPEN_ANGLE:
|
|
99
|
-
case VirtualCodePoint.LINE_END:
|
|
100
|
-
return { nextIndex: -1, state: state }
|
|
101
|
-
case AsciiCodePoint.CLOSE_ANGLE:
|
|
102
|
-
// eslint-disable-next-line no-param-reassign
|
|
103
|
-
state.saturated = true
|
|
104
|
-
state.nodePoints.push(p)
|
|
105
|
-
return { nextIndex: i + 1, state: state }
|
|
106
|
-
default:
|
|
107
|
-
state.nodePoints.push(p)
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
return { nextIndex: i, state: state }
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
/**
|
|
114
|
-
* Not in pointy brackets:
|
|
115
|
-
* - A nonempty sequence of characters that does not start with '<', does not include
|
|
116
|
-
* ASCII space or control characters, and includes parentheses only if
|
|
117
|
-
*
|
|
118
|
-
* a) they are backslash-escaped or
|
|
119
|
-
* b) they are part of a balanced pair of unescaped parentheses. (Implementations
|
|
120
|
-
* may impose limits on parentheses nesting to avoid performance issues,
|
|
121
|
-
* but at least three levels of nesting should be supported.)
|
|
122
|
-
*/
|
|
123
|
-
for (; i < endIndex; ++i) {
|
|
124
|
-
const p = nodePoints[i]
|
|
125
|
-
switch (p.codePoint) {
|
|
126
|
-
case AsciiCodePoint.BACKSLASH:
|
|
127
|
-
if (i + 1 < endIndex) {
|
|
128
|
-
state.nodePoints.push(p)
|
|
129
|
-
state.nodePoints.push(nodePoints[i + 1])
|
|
130
|
-
}
|
|
131
|
-
i += 1
|
|
132
|
-
break
|
|
133
|
-
case AsciiCodePoint.OPEN_PARENTHESIS:
|
|
134
|
-
// eslint-disable-next-line no-param-reassign
|
|
135
|
-
state.openParensCount += 1
|
|
136
|
-
state.nodePoints.push(p)
|
|
137
|
-
break
|
|
138
|
-
case AsciiCodePoint.CLOSE_PARENTHESIS:
|
|
139
|
-
// eslint-disable-next-line no-param-reassign
|
|
140
|
-
state.openParensCount -= 1
|
|
141
|
-
state.nodePoints.push(p)
|
|
142
|
-
if (state.openParensCount < 0) {
|
|
143
|
-
return { nextIndex: i, state: state }
|
|
144
|
-
}
|
|
145
|
-
break
|
|
146
|
-
default:
|
|
147
|
-
if (isWhitespaceCharacter(p.codePoint) || isAsciiControlCharacter(p.codePoint)) {
|
|
148
|
-
// eslint-disable-next-line no-param-reassign
|
|
149
|
-
state.saturated = true
|
|
150
|
-
return { nextIndex: i, state: state }
|
|
151
|
-
}
|
|
152
|
-
state.nodePoints.push(p)
|
|
153
|
-
break
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
// eslint-disable-next-line no-param-reassign
|
|
158
|
-
state.saturated = true
|
|
159
|
-
return { nextIndex: i, state: state }
|
|
160
|
-
}
|
package/src/util/link-label.ts
DELETED
|
@@ -1,116 +0,0 @@
|
|
|
1
|
-
import type { INodePoint } from '@yozora/character'
|
|
2
|
-
import { AsciiCodePoint, isWhitespaceCharacter } from '@yozora/character'
|
|
3
|
-
import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* The processing token of eatAndCollectLinkLabel, used to save
|
|
7
|
-
* intermediate data to support multiple codePosition fragment processing
|
|
8
|
-
*
|
|
9
|
-
* @see https://github.github.com/gfm/#link-label
|
|
10
|
-
*/
|
|
11
|
-
export interface ILinkLabelCollectingState {
|
|
12
|
-
/**
|
|
13
|
-
* Whether the current token has collected a legal LinkDestination
|
|
14
|
-
*/
|
|
15
|
-
saturated: boolean
|
|
16
|
-
/**
|
|
17
|
-
* Collected token points
|
|
18
|
-
*/
|
|
19
|
-
nodePoints: INodePoint[]
|
|
20
|
-
/**
|
|
21
|
-
* Does it contain non-blank characters
|
|
22
|
-
*/
|
|
23
|
-
hasNonWhitespaceCharacter: boolean
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* A link label begins with a left bracket '[' and ends with the first right bracket ']'
|
|
28
|
-
* that is not backslash-escaped. Between these brackets there must be at least one
|
|
29
|
-
* non-whitespace character. Unescaped square bracket characters are not allowed inside
|
|
30
|
-
* the opening and closing square brackets of link labels. A link label can have at most
|
|
31
|
-
* 999 characters inside the square brackets.
|
|
32
|
-
*
|
|
33
|
-
* One label matches another just in case their normalized forms are equal. To normalize
|
|
34
|
-
* a label, strip off the opening and closing brackets, perform the Unicode case fold,
|
|
35
|
-
* strip leading and trailing whitespace and collapse consecutive internal whitespace to
|
|
36
|
-
* a single space. If there are multiple matching reference link definitions, the one that
|
|
37
|
-
* comes first in the document is used. (It is desirable in such cases to emit a warning.)
|
|
38
|
-
*
|
|
39
|
-
* @param nodePoints
|
|
40
|
-
* @param startIndex
|
|
41
|
-
* @param endIndex
|
|
42
|
-
* @param state
|
|
43
|
-
* @see https://github.github.com/gfm/#link-label
|
|
44
|
-
*/
|
|
45
|
-
export function eatAndCollectLinkLabel(
|
|
46
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
47
|
-
startIndex: number,
|
|
48
|
-
endIndex: number,
|
|
49
|
-
state: ILinkLabelCollectingState | null,
|
|
50
|
-
): { nextIndex: number; state: ILinkLabelCollectingState } {
|
|
51
|
-
let i = startIndex
|
|
52
|
-
|
|
53
|
-
// init token
|
|
54
|
-
if (state == null) {
|
|
55
|
-
// eslint-disable-next-line no-param-reassign
|
|
56
|
-
state = {
|
|
57
|
-
saturated: false,
|
|
58
|
-
nodePoints: [],
|
|
59
|
-
hasNonWhitespaceCharacter: false,
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
/**
|
|
64
|
-
* Although link label may span multiple lines,
|
|
65
|
-
* they may not contain a blank line.
|
|
66
|
-
*/
|
|
67
|
-
const firstNonWhitespaceIndex = eatOptionalWhitespaces(nodePoints, i, endIndex)
|
|
68
|
-
if (firstNonWhitespaceIndex >= endIndex) return { nextIndex: -1, state: state }
|
|
69
|
-
|
|
70
|
-
if (state.nodePoints.length <= 0) {
|
|
71
|
-
i = firstNonWhitespaceIndex
|
|
72
|
-
|
|
73
|
-
// check whether in brackets
|
|
74
|
-
const p = nodePoints[i]
|
|
75
|
-
if (p.codePoint !== AsciiCodePoint.OPEN_BRACKET) {
|
|
76
|
-
return { nextIndex: -1, state: state }
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
i += 1
|
|
80
|
-
// eslint-disable-next-line no-param-reassign
|
|
81
|
-
state.nodePoints.push(p)
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
for (; i < endIndex; ++i) {
|
|
85
|
-
const p = nodePoints[i]
|
|
86
|
-
switch (p.codePoint) {
|
|
87
|
-
case AsciiCodePoint.BACKSLASH:
|
|
88
|
-
// eslint-disable-next-line no-param-reassign
|
|
89
|
-
state.hasNonWhitespaceCharacter = true
|
|
90
|
-
if (i + 1 < endIndex) {
|
|
91
|
-
state.nodePoints.push(p)
|
|
92
|
-
state.nodePoints.push(nodePoints[i + 1])
|
|
93
|
-
}
|
|
94
|
-
i += 1
|
|
95
|
-
break
|
|
96
|
-
case AsciiCodePoint.OPEN_BRACKET:
|
|
97
|
-
return { nextIndex: -1, state: state }
|
|
98
|
-
case AsciiCodePoint.CLOSE_BRACKET:
|
|
99
|
-
state.nodePoints.push(p)
|
|
100
|
-
if (state.hasNonWhitespaceCharacter) {
|
|
101
|
-
// eslint-disable-next-line no-param-reassign
|
|
102
|
-
state.saturated = true
|
|
103
|
-
return { nextIndex: i + 1, state: state }
|
|
104
|
-
}
|
|
105
|
-
return { nextIndex: -1, state: state }
|
|
106
|
-
default:
|
|
107
|
-
if (!isWhitespaceCharacter(p.codePoint)) {
|
|
108
|
-
// eslint-disable-next-line no-param-reassign
|
|
109
|
-
state.hasNonWhitespaceCharacter = true
|
|
110
|
-
}
|
|
111
|
-
state.nodePoints.push(p)
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
return { nextIndex: 1, state: state }
|
|
116
|
-
}
|
package/src/util/link-title.ts
DELETED
|
@@ -1,143 +0,0 @@
|
|
|
1
|
-
import type { INodePoint } from '@yozora/character'
|
|
2
|
-
import { AsciiCodePoint, VirtualCodePoint } from '@yozora/character'
|
|
3
|
-
import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* The processing token of eatAndCollectLinkDestination, used to save
|
|
7
|
-
* intermediate data to support multiple codePosition fragment processing.
|
|
8
|
-
*
|
|
9
|
-
* @see https://github.github.com/gfm/#link-title
|
|
10
|
-
*/
|
|
11
|
-
export interface ILinkTitleCollectingState {
|
|
12
|
-
/**
|
|
13
|
-
* Whether the current token has collected a legal LinkDestination
|
|
14
|
-
*/
|
|
15
|
-
saturated: boolean
|
|
16
|
-
/**
|
|
17
|
-
* Collected token points
|
|
18
|
-
*/
|
|
19
|
-
nodePoints: INodePoint[]
|
|
20
|
-
/**
|
|
21
|
-
* Character that wrap link-title
|
|
22
|
-
*/
|
|
23
|
-
wrapSymbol: number | null
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
*
|
|
28
|
-
* @param nodePoints
|
|
29
|
-
* @param startIndex
|
|
30
|
-
* @param endIndex
|
|
31
|
-
* @param state
|
|
32
|
-
* @see https://github.github.com/gfm/#link-title
|
|
33
|
-
*/
|
|
34
|
-
export function eatAndCollectLinkTitle(
|
|
35
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
36
|
-
startIndex: number,
|
|
37
|
-
endIndex: number,
|
|
38
|
-
state: ILinkTitleCollectingState | null,
|
|
39
|
-
): { nextIndex: number; state: ILinkTitleCollectingState } {
|
|
40
|
-
let i = startIndex
|
|
41
|
-
|
|
42
|
-
// init token
|
|
43
|
-
if (state == null) {
|
|
44
|
-
// eslint-disable-next-line no-param-reassign
|
|
45
|
-
state = {
|
|
46
|
-
saturated: false,
|
|
47
|
-
nodePoints: [],
|
|
48
|
-
wrapSymbol: null,
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
/**
|
|
53
|
-
* Although link titles may span multiple lines,
|
|
54
|
-
* they may not contain a blank line.
|
|
55
|
-
*/
|
|
56
|
-
const firstNonWhitespaceIndex = eatOptionalWhitespaces(nodePoints, i, endIndex)
|
|
57
|
-
if (firstNonWhitespaceIndex >= endIndex) return { nextIndex: -1, state: state }
|
|
58
|
-
|
|
59
|
-
if (state.nodePoints.length <= 0) {
|
|
60
|
-
i = firstNonWhitespaceIndex
|
|
61
|
-
const p = nodePoints[i]
|
|
62
|
-
|
|
63
|
-
switch (p.codePoint) {
|
|
64
|
-
case AsciiCodePoint.DOUBLE_QUOTE:
|
|
65
|
-
case AsciiCodePoint.SINGLE_QUOTE:
|
|
66
|
-
case AsciiCodePoint.OPEN_PARENTHESIS:
|
|
67
|
-
// eslint-disable-next-line no-param-reassign
|
|
68
|
-
state.wrapSymbol = p.codePoint
|
|
69
|
-
state.nodePoints.push(p)
|
|
70
|
-
i += 1
|
|
71
|
-
break
|
|
72
|
-
default:
|
|
73
|
-
return { nextIndex: -1, state: state }
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
if (state.wrapSymbol == null) return { nextIndex: -1, state: state }
|
|
78
|
-
|
|
79
|
-
switch (state.wrapSymbol) {
|
|
80
|
-
/**
|
|
81
|
-
* - a sequence of zero or more characters between straight double-quote characters '"',
|
|
82
|
-
* including a '"' character only if it is backslash-escaped, or
|
|
83
|
-
* - a sequence of zero or more characters between straight single-quote characters '\'',
|
|
84
|
-
* including a '\'' character only if it is backslash-escaped,
|
|
85
|
-
*/
|
|
86
|
-
case AsciiCodePoint.DOUBLE_QUOTE:
|
|
87
|
-
case AsciiCodePoint.SINGLE_QUOTE: {
|
|
88
|
-
for (; i < endIndex; ++i) {
|
|
89
|
-
const p = nodePoints[i]
|
|
90
|
-
switch (p.codePoint) {
|
|
91
|
-
case AsciiCodePoint.BACKSLASH:
|
|
92
|
-
if (i + 1 < endIndex) {
|
|
93
|
-
state.nodePoints.push(p)
|
|
94
|
-
state.nodePoints.push(nodePoints[i + 1])
|
|
95
|
-
}
|
|
96
|
-
i += 1
|
|
97
|
-
break
|
|
98
|
-
case state.wrapSymbol:
|
|
99
|
-
// eslint-disable-next-line no-param-reassign
|
|
100
|
-
state.saturated = true
|
|
101
|
-
state.nodePoints.push(p)
|
|
102
|
-
return { nextIndex: i + 1, state: state }
|
|
103
|
-
default:
|
|
104
|
-
state.nodePoints.push(p)
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
break
|
|
108
|
-
}
|
|
109
|
-
/**
|
|
110
|
-
* a sequence of zero or more characters between matching parentheses '((...))',
|
|
111
|
-
* including a '(' or ')' character only if it is backslash-escaped.
|
|
112
|
-
*/
|
|
113
|
-
case AsciiCodePoint.OPEN_PARENTHESIS: {
|
|
114
|
-
for (; i < endIndex; ++i) {
|
|
115
|
-
const p = nodePoints[i]
|
|
116
|
-
switch (p.codePoint) {
|
|
117
|
-
case AsciiCodePoint.BACKSLASH:
|
|
118
|
-
if (i + 1 < endIndex) {
|
|
119
|
-
state.nodePoints.push(p)
|
|
120
|
-
state.nodePoints.push(nodePoints[i + 1])
|
|
121
|
-
}
|
|
122
|
-
i += 1
|
|
123
|
-
break
|
|
124
|
-
case AsciiCodePoint.OPEN_PARENTHESIS:
|
|
125
|
-
return { nextIndex: -1, state: state }
|
|
126
|
-
case AsciiCodePoint.CLOSE_PARENTHESIS:
|
|
127
|
-
if (i + 1 >= endIndex || nodePoints[i + 1].codePoint === VirtualCodePoint.LINE_END) {
|
|
128
|
-
state.nodePoints.push(p)
|
|
129
|
-
// eslint-disable-next-line no-param-reassign
|
|
130
|
-
state.saturated = true
|
|
131
|
-
break
|
|
132
|
-
}
|
|
133
|
-
return { nextIndex: -1, state: state }
|
|
134
|
-
default:
|
|
135
|
-
state.nodePoints.push(p)
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
break
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
return { nextIndex: endIndex, state: state }
|
|
143
|
-
}
|