@yozora/tokenizer-definition 2.1.3 → 2.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,160 +0,0 @@
1
- import type { INodePoint } from '@yozora/character'
2
- import {
3
- AsciiCodePoint,
4
- VirtualCodePoint,
5
- isAsciiControlCharacter,
6
- isWhitespaceCharacter,
7
- } from '@yozora/character'
8
- import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
9
-
10
- /**
11
- * The processing token of eatAndCollectLinkDestination, used to save
12
- * intermediate data to support multiple codePosition fragment processing
13
- *
14
- * @see https://github.github.com/gfm/#link-destination
15
- */
16
- export interface ILinkDestinationCollectingState {
17
- /**
18
- * Whether the current token has collected a legal LinkDestination
19
- */
20
- saturated: boolean
21
- /**
22
- * Collected token points
23
- */
24
- nodePoints: INodePoint[]
25
- /**
26
- * Whether an opening angle bracket has been matched
27
- */
28
- hasOpenAngleBracket: boolean
29
- /**
30
- * Number of parentheses encountered
31
- */
32
- openParensCount: number
33
- }
34
-
35
- /**
36
- *
37
- * @param nodePoints
38
- * @param startIndex
39
- * @param endIndex
40
- * @param state
41
- * @see https://github.github.com/gfm/#link-destination
42
- */
43
- export function eatAndCollectLinkDestination(
44
- nodePoints: ReadonlyArray<INodePoint>,
45
- startIndex: number,
46
- endIndex: number,
47
- state: ILinkDestinationCollectingState | null,
48
- ): { nextIndex: number; state: ILinkDestinationCollectingState } {
49
- let i = startIndex
50
-
51
- // init token
52
- if (state == null) {
53
- // eslint-disable-next-line no-param-reassign
54
- state = {
55
- saturated: false,
56
- nodePoints: [],
57
- hasOpenAngleBracket: false,
58
- openParensCount: 0,
59
- }
60
- }
61
-
62
- /**
63
- * Although link destination may span multiple lines,
64
- * they may not contain a blank line.
65
- */
66
- const firstNonWhitespaceIndex = eatOptionalWhitespaces(nodePoints, i, endIndex)
67
- if (firstNonWhitespaceIndex >= endIndex) return { nextIndex: -1, state: state }
68
-
69
- if (state.nodePoints.length <= 0) {
70
- i = firstNonWhitespaceIndex
71
-
72
- // check whether in pointy brackets
73
- const p = nodePoints[i]
74
- if (p.codePoint === AsciiCodePoint.OPEN_ANGLE) {
75
- i += 1
76
- // eslint-disable-next-line no-param-reassign
77
- state.hasOpenAngleBracket = true
78
- state.nodePoints.push(p)
79
- }
80
- }
81
-
82
- /**
83
- * In pointy brackets:
84
- * - A sequence of zero or more characters between an opening '<' and
85
- * a closing '>' that contains no line breaks or unescaped '<' or '>' characters
86
- */
87
- if (state.hasOpenAngleBracket) {
88
- for (; i < endIndex; ++i) {
89
- const p = nodePoints[i]
90
- switch (p.codePoint) {
91
- case AsciiCodePoint.BACKSLASH:
92
- if (i + 1 < endIndex) {
93
- state.nodePoints.push(p)
94
- state.nodePoints.push(nodePoints[i + 1])
95
- }
96
- i += 1
97
- break
98
- case AsciiCodePoint.OPEN_ANGLE:
99
- case VirtualCodePoint.LINE_END:
100
- return { nextIndex: -1, state: state }
101
- case AsciiCodePoint.CLOSE_ANGLE:
102
- // eslint-disable-next-line no-param-reassign
103
- state.saturated = true
104
- state.nodePoints.push(p)
105
- return { nextIndex: i + 1, state: state }
106
- default:
107
- state.nodePoints.push(p)
108
- }
109
- }
110
- return { nextIndex: i, state: state }
111
- }
112
-
113
- /**
114
- * Not in pointy brackets:
115
- * - A nonempty sequence of characters that does not start with '<', does not include
116
- * ASCII space or control characters, and includes parentheses only if
117
- *
118
- * a) they are backslash-escaped or
119
- * b) they are part of a balanced pair of unescaped parentheses. (Implementations
120
- * may impose limits on parentheses nesting to avoid performance issues,
121
- * but at least three levels of nesting should be supported.)
122
- */
123
- for (; i < endIndex; ++i) {
124
- const p = nodePoints[i]
125
- switch (p.codePoint) {
126
- case AsciiCodePoint.BACKSLASH:
127
- if (i + 1 < endIndex) {
128
- state.nodePoints.push(p)
129
- state.nodePoints.push(nodePoints[i + 1])
130
- }
131
- i += 1
132
- break
133
- case AsciiCodePoint.OPEN_PARENTHESIS:
134
- // eslint-disable-next-line no-param-reassign
135
- state.openParensCount += 1
136
- state.nodePoints.push(p)
137
- break
138
- case AsciiCodePoint.CLOSE_PARENTHESIS:
139
- // eslint-disable-next-line no-param-reassign
140
- state.openParensCount -= 1
141
- state.nodePoints.push(p)
142
- if (state.openParensCount < 0) {
143
- return { nextIndex: i, state: state }
144
- }
145
- break
146
- default:
147
- if (isWhitespaceCharacter(p.codePoint) || isAsciiControlCharacter(p.codePoint)) {
148
- // eslint-disable-next-line no-param-reassign
149
- state.saturated = true
150
- return { nextIndex: i, state: state }
151
- }
152
- state.nodePoints.push(p)
153
- break
154
- }
155
- }
156
-
157
- // eslint-disable-next-line no-param-reassign
158
- state.saturated = true
159
- return { nextIndex: i, state: state }
160
- }
@@ -1,116 +0,0 @@
1
- import type { INodePoint } from '@yozora/character'
2
- import { AsciiCodePoint, isWhitespaceCharacter } from '@yozora/character'
3
- import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
4
-
5
- /**
6
- * The processing token of eatAndCollectLinkLabel, used to save
7
- * intermediate data to support multiple codePosition fragment processing
8
- *
9
- * @see https://github.github.com/gfm/#link-label
10
- */
11
- export interface ILinkLabelCollectingState {
12
- /**
13
- * Whether the current token has collected a legal LinkDestination
14
- */
15
- saturated: boolean
16
- /**
17
- * Collected token points
18
- */
19
- nodePoints: INodePoint[]
20
- /**
21
- * Does it contain non-blank characters
22
- */
23
- hasNonWhitespaceCharacter: boolean
24
- }
25
-
26
- /**
27
- * A link label begins with a left bracket '[' and ends with the first right bracket ']'
28
- * that is not backslash-escaped. Between these brackets there must be at least one
29
- * non-whitespace character. Unescaped square bracket characters are not allowed inside
30
- * the opening and closing square brackets of link labels. A link label can have at most
31
- * 999 characters inside the square brackets.
32
- *
33
- * One label matches another just in case their normalized forms are equal. To normalize
34
- * a label, strip off the opening and closing brackets, perform the Unicode case fold,
35
- * strip leading and trailing whitespace and collapse consecutive internal whitespace to
36
- * a single space. If there are multiple matching reference link definitions, the one that
37
- * comes first in the document is used. (It is desirable in such cases to emit a warning.)
38
- *
39
- * @param nodePoints
40
- * @param startIndex
41
- * @param endIndex
42
- * @param state
43
- * @see https://github.github.com/gfm/#link-label
44
- */
45
- export function eatAndCollectLinkLabel(
46
- nodePoints: ReadonlyArray<INodePoint>,
47
- startIndex: number,
48
- endIndex: number,
49
- state: ILinkLabelCollectingState | null,
50
- ): { nextIndex: number; state: ILinkLabelCollectingState } {
51
- let i = startIndex
52
-
53
- // init token
54
- if (state == null) {
55
- // eslint-disable-next-line no-param-reassign
56
- state = {
57
- saturated: false,
58
- nodePoints: [],
59
- hasNonWhitespaceCharacter: false,
60
- }
61
- }
62
-
63
- /**
64
- * Although link label may span multiple lines,
65
- * they may not contain a blank line.
66
- */
67
- const firstNonWhitespaceIndex = eatOptionalWhitespaces(nodePoints, i, endIndex)
68
- if (firstNonWhitespaceIndex >= endIndex) return { nextIndex: -1, state: state }
69
-
70
- if (state.nodePoints.length <= 0) {
71
- i = firstNonWhitespaceIndex
72
-
73
- // check whether in brackets
74
- const p = nodePoints[i]
75
- if (p.codePoint !== AsciiCodePoint.OPEN_BRACKET) {
76
- return { nextIndex: -1, state: state }
77
- }
78
-
79
- i += 1
80
- // eslint-disable-next-line no-param-reassign
81
- state.nodePoints.push(p)
82
- }
83
-
84
- for (; i < endIndex; ++i) {
85
- const p = nodePoints[i]
86
- switch (p.codePoint) {
87
- case AsciiCodePoint.BACKSLASH:
88
- // eslint-disable-next-line no-param-reassign
89
- state.hasNonWhitespaceCharacter = true
90
- if (i + 1 < endIndex) {
91
- state.nodePoints.push(p)
92
- state.nodePoints.push(nodePoints[i + 1])
93
- }
94
- i += 1
95
- break
96
- case AsciiCodePoint.OPEN_BRACKET:
97
- return { nextIndex: -1, state: state }
98
- case AsciiCodePoint.CLOSE_BRACKET:
99
- state.nodePoints.push(p)
100
- if (state.hasNonWhitespaceCharacter) {
101
- // eslint-disable-next-line no-param-reassign
102
- state.saturated = true
103
- return { nextIndex: i + 1, state: state }
104
- }
105
- return { nextIndex: -1, state: state }
106
- default:
107
- if (!isWhitespaceCharacter(p.codePoint)) {
108
- // eslint-disable-next-line no-param-reassign
109
- state.hasNonWhitespaceCharacter = true
110
- }
111
- state.nodePoints.push(p)
112
- }
113
- }
114
-
115
- return { nextIndex: 1, state: state }
116
- }
@@ -1,143 +0,0 @@
1
- import type { INodePoint } from '@yozora/character'
2
- import { AsciiCodePoint, VirtualCodePoint } from '@yozora/character'
3
- import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
4
-
5
- /**
6
- * The processing token of eatAndCollectLinkDestination, used to save
7
- * intermediate data to support multiple codePosition fragment processing.
8
- *
9
- * @see https://github.github.com/gfm/#link-title
10
- */
11
- export interface ILinkTitleCollectingState {
12
- /**
13
- * Whether the current token has collected a legal LinkDestination
14
- */
15
- saturated: boolean
16
- /**
17
- * Collected token points
18
- */
19
- nodePoints: INodePoint[]
20
- /**
21
- * Character that wrap link-title
22
- */
23
- wrapSymbol: number | null
24
- }
25
-
26
- /**
27
- *
28
- * @param nodePoints
29
- * @param startIndex
30
- * @param endIndex
31
- * @param state
32
- * @see https://github.github.com/gfm/#link-title
33
- */
34
- export function eatAndCollectLinkTitle(
35
- nodePoints: ReadonlyArray<INodePoint>,
36
- startIndex: number,
37
- endIndex: number,
38
- state: ILinkTitleCollectingState | null,
39
- ): { nextIndex: number; state: ILinkTitleCollectingState } {
40
- let i = startIndex
41
-
42
- // init token
43
- if (state == null) {
44
- // eslint-disable-next-line no-param-reassign
45
- state = {
46
- saturated: false,
47
- nodePoints: [],
48
- wrapSymbol: null,
49
- }
50
- }
51
-
52
- /**
53
- * Although link titles may span multiple lines,
54
- * they may not contain a blank line.
55
- */
56
- const firstNonWhitespaceIndex = eatOptionalWhitespaces(nodePoints, i, endIndex)
57
- if (firstNonWhitespaceIndex >= endIndex) return { nextIndex: -1, state: state }
58
-
59
- if (state.nodePoints.length <= 0) {
60
- i = firstNonWhitespaceIndex
61
- const p = nodePoints[i]
62
-
63
- switch (p.codePoint) {
64
- case AsciiCodePoint.DOUBLE_QUOTE:
65
- case AsciiCodePoint.SINGLE_QUOTE:
66
- case AsciiCodePoint.OPEN_PARENTHESIS:
67
- // eslint-disable-next-line no-param-reassign
68
- state.wrapSymbol = p.codePoint
69
- state.nodePoints.push(p)
70
- i += 1
71
- break
72
- default:
73
- return { nextIndex: -1, state: state }
74
- }
75
- }
76
-
77
- if (state.wrapSymbol == null) return { nextIndex: -1, state: state }
78
-
79
- switch (state.wrapSymbol) {
80
- /**
81
- * - a sequence of zero or more characters between straight double-quote characters '"',
82
- * including a '"' character only if it is backslash-escaped, or
83
- * - a sequence of zero or more characters between straight single-quote characters '\'',
84
- * including a '\'' character only if it is backslash-escaped,
85
- */
86
- case AsciiCodePoint.DOUBLE_QUOTE:
87
- case AsciiCodePoint.SINGLE_QUOTE: {
88
- for (; i < endIndex; ++i) {
89
- const p = nodePoints[i]
90
- switch (p.codePoint) {
91
- case AsciiCodePoint.BACKSLASH:
92
- if (i + 1 < endIndex) {
93
- state.nodePoints.push(p)
94
- state.nodePoints.push(nodePoints[i + 1])
95
- }
96
- i += 1
97
- break
98
- case state.wrapSymbol:
99
- // eslint-disable-next-line no-param-reassign
100
- state.saturated = true
101
- state.nodePoints.push(p)
102
- return { nextIndex: i + 1, state: state }
103
- default:
104
- state.nodePoints.push(p)
105
- }
106
- }
107
- break
108
- }
109
- /**
110
- * a sequence of zero or more characters between matching parentheses '((...))',
111
- * including a '(' or ')' character only if it is backslash-escaped.
112
- */
113
- case AsciiCodePoint.OPEN_PARENTHESIS: {
114
- for (; i < endIndex; ++i) {
115
- const p = nodePoints[i]
116
- switch (p.codePoint) {
117
- case AsciiCodePoint.BACKSLASH:
118
- if (i + 1 < endIndex) {
119
- state.nodePoints.push(p)
120
- state.nodePoints.push(nodePoints[i + 1])
121
- }
122
- i += 1
123
- break
124
- case AsciiCodePoint.OPEN_PARENTHESIS:
125
- return { nextIndex: -1, state: state }
126
- case AsciiCodePoint.CLOSE_PARENTHESIS:
127
- if (i + 1 >= endIndex || nodePoints[i + 1].codePoint === VirtualCodePoint.LINE_END) {
128
- state.nodePoints.push(p)
129
- // eslint-disable-next-line no-param-reassign
130
- state.saturated = true
131
- break
132
- }
133
- return { nextIndex: -1, state: state }
134
- default:
135
- state.nodePoints.push(p)
136
- }
137
- }
138
- break
139
- }
140
- }
141
-
142
- return { nextIndex: endIndex, state: state }
143
- }