@yozora/tokenizer-list 2.1.3 → 2.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -6
- package/src/index.ts +0 -3
- package/src/match.ts +0 -368
- package/src/parse.ts +0 -130
- package/src/tokenizer.ts +0 -45
- package/src/types.ts +0 -76
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yozora/tokenizer-list",
|
|
3
|
-
"version": "2.1.
|
|
3
|
+
"version": "2.1.4",
|
|
4
4
|
"author": {
|
|
5
5
|
"name": "guanghechen",
|
|
6
6
|
"url": "https://github.com/guanghechen/"
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
},
|
|
28
28
|
"files": [
|
|
29
29
|
"lib/",
|
|
30
|
-
"
|
|
30
|
+
"lib/**/*.map",
|
|
31
31
|
"package.json",
|
|
32
32
|
"CHANGELOG.md",
|
|
33
33
|
"LICENSE",
|
|
@@ -39,9 +39,9 @@
|
|
|
39
39
|
"test": "cross-env TS_NODE_FILES=true NODE_OPTIONS=--experimental-vm-modules jest --config ../../jest.config.mjs --rootDir ."
|
|
40
40
|
},
|
|
41
41
|
"dependencies": {
|
|
42
|
-
"@yozora/ast": "^2.1.
|
|
43
|
-
"@yozora/character": "^2.1.
|
|
44
|
-
"@yozora/core-tokenizer": "^2.1.
|
|
42
|
+
"@yozora/ast": "^2.1.4",
|
|
43
|
+
"@yozora/character": "^2.1.4",
|
|
44
|
+
"@yozora/core-tokenizer": "^2.1.4"
|
|
45
45
|
},
|
|
46
|
-
"gitHead": "
|
|
46
|
+
"gitHead": "aa464ed1e3cd84892773a833910cfc53a556bf5f"
|
|
47
47
|
}
|
package/src/index.ts
DELETED
package/src/match.ts
DELETED
|
@@ -1,368 +0,0 @@
|
|
|
1
|
-
import { ListType, TaskStatus } from '@yozora/ast'
|
|
2
|
-
import type { INodePoint } from '@yozora/character'
|
|
3
|
-
import {
|
|
4
|
-
AsciiCodePoint,
|
|
5
|
-
VirtualCodePoint,
|
|
6
|
-
isAsciiDigitCharacter,
|
|
7
|
-
isAsciiLowerLetter,
|
|
8
|
-
isAsciiUpperLetter,
|
|
9
|
-
isSpaceCharacter,
|
|
10
|
-
isWhitespaceCharacter,
|
|
11
|
-
} from '@yozora/character'
|
|
12
|
-
import type {
|
|
13
|
-
IBlockToken,
|
|
14
|
-
IMatchBlockHookCreator,
|
|
15
|
-
IPhrasingContentLine,
|
|
16
|
-
IResultOfEatAndInterruptPreviousSibling,
|
|
17
|
-
IResultOfEatContinuationText,
|
|
18
|
-
IResultOfEatOpener,
|
|
19
|
-
} from '@yozora/core-tokenizer'
|
|
20
|
-
import { calcEndPoint, calcStartPoint } from '@yozora/core-tokenizer'
|
|
21
|
-
import type { IThis, IToken, T } from './types'
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* The following rules define list items:
|
|
25
|
-
* - Basic case. If a sequence of lines Ls constitute a sequence of blocks Bs
|
|
26
|
-
* starting with a non-whitespace character, and M is a list marker of width
|
|
27
|
-
* W followed by 1 ≤ N ≤ 4 spaces, then the result of prepending M and the
|
|
28
|
-
* following spaces to the first line of Ls, and indenting subsequent lines
|
|
29
|
-
* of Ls by W + N spaces, is a list item with Bs as its contents. The type
|
|
30
|
-
* of the list item (bullet or ordered) is determined by the type of its
|
|
31
|
-
* list marker. If the list item is ordered, then it is also assigned a
|
|
32
|
-
* start number, based on the ordered list marker.
|
|
33
|
-
*
|
|
34
|
-
* Exceptions:
|
|
35
|
-
* - When the first list item in a list interrupts a paragraph—that is,
|
|
36
|
-
* when it starts on a line that would otherwise count as paragraph
|
|
37
|
-
* continuation text—then
|
|
38
|
-
* (a) the lines Ls must not begin with a blank line, and
|
|
39
|
-
* (b) if the list item is ordered, the start number must be 1.
|
|
40
|
-
* - If any line is a thematic break then that line is not a list item.
|
|
41
|
-
*
|
|
42
|
-
* @see https://github.com/syntax-tree/mdast#listitem
|
|
43
|
-
* @see https://github.github.com/gfm/#list-items
|
|
44
|
-
*/
|
|
45
|
-
|
|
46
|
-
export const match: IMatchBlockHookCreator<T, IToken, IThis> = function () {
|
|
47
|
-
const { emptyItemCouldNotInterruptedTypes, enableTaskListItem } = this
|
|
48
|
-
|
|
49
|
-
return {
|
|
50
|
-
isContainingBlock: true,
|
|
51
|
-
eatOpener,
|
|
52
|
-
eatAndInterruptPreviousSibling,
|
|
53
|
-
eatContinuationText,
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
function eatOpener(line: Readonly<IPhrasingContentLine>): IResultOfEatOpener<T, IToken> {
|
|
57
|
-
/**
|
|
58
|
-
* Four spaces are too much.
|
|
59
|
-
* @see https://github.github.com/gfm/#example-253
|
|
60
|
-
*/
|
|
61
|
-
if (line.countOfPrecedeSpaces >= 4) return null
|
|
62
|
-
|
|
63
|
-
const { nodePoints, startIndex, endIndex, firstNonWhitespaceIndex } = line
|
|
64
|
-
if (firstNonWhitespaceIndex >= endIndex) return null
|
|
65
|
-
|
|
66
|
-
let ordered = false
|
|
67
|
-
let marker: number | null = null
|
|
68
|
-
let orderType: '1' | 'a' | 'A' | 'i' | 'I' | undefined
|
|
69
|
-
let order: number | undefined
|
|
70
|
-
let i = firstNonWhitespaceIndex
|
|
71
|
-
let c = nodePoints[i].codePoint
|
|
72
|
-
|
|
73
|
-
/**
|
|
74
|
-
* Try to resolve an ordered list-item.
|
|
75
|
-
*
|
|
76
|
-
* An ordered list marker is a sequence of 1–9 arabic digits (0-9),
|
|
77
|
-
* followed by either a . character or a ) character. (The reason
|
|
78
|
-
* for the length limit is that with 10 digits we start seeing integer
|
|
79
|
-
* overflows in some browsers.)
|
|
80
|
-
* @see https://github.github.com/gfm/#ordered-list-marker
|
|
81
|
-
*
|
|
82
|
-
* Extension: /[a-z]/ and /[A-Z]/ and [iv]+ also could be consisted the
|
|
83
|
-
* marker of an ordered list.
|
|
84
|
-
*/
|
|
85
|
-
if (i + 1 < endIndex) {
|
|
86
|
-
// TODO Support roman numerals.
|
|
87
|
-
const c0 = c
|
|
88
|
-
if (isAsciiDigitCharacter(c0)) {
|
|
89
|
-
orderType = '1'
|
|
90
|
-
let v = c0 - AsciiCodePoint.DIGIT0
|
|
91
|
-
for (i += 1; i < endIndex; ++i) {
|
|
92
|
-
c = nodePoints[i].codePoint
|
|
93
|
-
if (!isAsciiDigitCharacter(c)) break
|
|
94
|
-
v = v * 10 + c - AsciiCodePoint.DIGIT0
|
|
95
|
-
}
|
|
96
|
-
order = v
|
|
97
|
-
orderType = '1'
|
|
98
|
-
} else if (isAsciiLowerLetter(c0)) {
|
|
99
|
-
i += 1
|
|
100
|
-
c = nodePoints[i].codePoint
|
|
101
|
-
order = c0 - AsciiCodePoint.LOWERCASE_A + 1
|
|
102
|
-
orderType = 'a'
|
|
103
|
-
} else if (isAsciiUpperLetter(c0)) {
|
|
104
|
-
i += 1
|
|
105
|
-
c = nodePoints[i].codePoint
|
|
106
|
-
order = c0 - AsciiCodePoint.UPPERCASE_A + 1
|
|
107
|
-
orderType = 'A'
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
// eat '.' / ')'
|
|
111
|
-
if (
|
|
112
|
-
i > firstNonWhitespaceIndex &&
|
|
113
|
-
i - firstNonWhitespaceIndex <= 9 &&
|
|
114
|
-
(c === AsciiCodePoint.DOT || c === AsciiCodePoint.CLOSE_PARENTHESIS)
|
|
115
|
-
) {
|
|
116
|
-
i += 1
|
|
117
|
-
ordered = true
|
|
118
|
-
marker = c
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
/**
|
|
123
|
-
* Try to resolve a bullet list-item.
|
|
124
|
-
*
|
|
125
|
-
* A bullet list marker is a -, +, or * character.
|
|
126
|
-
* @see https://github.github.com/gfm/#bullet-list-marker
|
|
127
|
-
*/
|
|
128
|
-
if (!ordered) {
|
|
129
|
-
if (
|
|
130
|
-
c === AsciiCodePoint.PLUS_SIGN ||
|
|
131
|
-
c === AsciiCodePoint.MINUS_SIGN ||
|
|
132
|
-
c === AsciiCodePoint.ASTERISK
|
|
133
|
-
) {
|
|
134
|
-
i += 1
|
|
135
|
-
marker = c
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
if (marker == null) return null
|
|
140
|
-
|
|
141
|
-
/**
|
|
142
|
-
* When the list-item mark followed by a tab, it is treated as if it were
|
|
143
|
-
* expanded into three spaces.
|
|
144
|
-
*
|
|
145
|
-
* @see https://github.github.com/gfm/#example-7
|
|
146
|
-
*/
|
|
147
|
-
let countOfSpaces = 0,
|
|
148
|
-
nextIndex = i
|
|
149
|
-
if (nextIndex < endIndex) {
|
|
150
|
-
c = nodePoints[nextIndex].codePoint
|
|
151
|
-
if (c === VirtualCodePoint.SPACE) nextIndex += 1
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
/**
|
|
155
|
-
* #Rule1 Basic case
|
|
156
|
-
*
|
|
157
|
-
* If a sequence of lines Ls constitute a sequence of blocks Bs starting
|
|
158
|
-
* with a non-whitespace character, and M is a list marker of width W
|
|
159
|
-
* followed by 1 ≤ N ≤ 4 spaces, then the result of prepending M and the
|
|
160
|
-
* following spaces to the first line of Ls, and indenting subsequent
|
|
161
|
-
* lines of Ls by W + N spaces, is a list item with Bs as its contents.
|
|
162
|
-
* The type of the list item (bullet or ordered) is determined by the
|
|
163
|
-
* type of its list marker. If the list item is ordered, then it is also
|
|
164
|
-
* assigned a start number, based on the ordered list marker.
|
|
165
|
-
* @see https://github.github.com/gfm/#list-items Basic case
|
|
166
|
-
*/
|
|
167
|
-
for (; nextIndex < endIndex; ++nextIndex) {
|
|
168
|
-
c = nodePoints[nextIndex].codePoint
|
|
169
|
-
if (!isSpaceCharacter(c)) break
|
|
170
|
-
countOfSpaces += 1
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
/**
|
|
174
|
-
* Rule#2 Item starting with indented code.
|
|
175
|
-
*
|
|
176
|
-
* If a sequence of lines Ls constitute a sequence of blocks Bs starting
|
|
177
|
-
* with an indented code block, and M is a list marker of width W followed
|
|
178
|
-
* by one space, then the result of prepending M and the following space to
|
|
179
|
-
* the first line of Ls, and indenting subsequent lines of Ls by W + 1 spaces,
|
|
180
|
-
* is a list item with Bs as its contents. If a line is empty, then it need
|
|
181
|
-
* not be indented. The type of the list item (bullet or ordered) is
|
|
182
|
-
* determined by the type of its list marker. If the list item is ordered,
|
|
183
|
-
* then it is also assigned a start number, based on the ordered list marker.
|
|
184
|
-
* @see https://github.github.com/gfm/#list-items Item starting with indented code.
|
|
185
|
-
*/
|
|
186
|
-
if (countOfSpaces > 4) {
|
|
187
|
-
nextIndex -= countOfSpaces - 1
|
|
188
|
-
countOfSpaces = 1
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
/**
|
|
192
|
-
* Rule#3 Item starting with a blank line.
|
|
193
|
-
*
|
|
194
|
-
* If a sequence of lines Ls starting with a single blank line constitute
|
|
195
|
-
* a (possibly empty) sequence of blocks Bs, not separated from each other
|
|
196
|
-
* by more than one blank line, and M is a list marker of width W, then the
|
|
197
|
-
* result of prepending M to the first line of Ls, and indenting subsequent
|
|
198
|
-
* lines of Ls by W + 1 spaces, is a list item with Bs as its contents.
|
|
199
|
-
* If a line is empty, then it need not be indented. The type of the list
|
|
200
|
-
* item (bullet or ordered) is determined by the type of its list marker.
|
|
201
|
-
* If the list item is ordered, then it is also assigned a start number,
|
|
202
|
-
* based on the ordered list marker.
|
|
203
|
-
* @see https://github.github.com/gfm/#list-items Item starting with a blank line
|
|
204
|
-
*/
|
|
205
|
-
if (countOfSpaces === 0 && nextIndex < endIndex && c !== VirtualCodePoint.LINE_END) return null
|
|
206
|
-
|
|
207
|
-
const countOfTopBlankLine = c === VirtualCodePoint.LINE_END ? 1 : -1
|
|
208
|
-
if (c === VirtualCodePoint.LINE_END) {
|
|
209
|
-
nextIndex -= countOfSpaces - 1
|
|
210
|
-
countOfSpaces = 1
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
/**
|
|
214
|
-
* Rule#4 Indentation.
|
|
215
|
-
*
|
|
216
|
-
* If a sequence of lines Ls constitutes a list item according to rule #1,
|
|
217
|
-
* #2, or #3, then the result of indenting each line of Ls by 1-3 spaces
|
|
218
|
-
* (the same for each line) also constitutes a list item with the same
|
|
219
|
-
* contents and attributes. If a line is empty, then it need not be indented.
|
|
220
|
-
*/
|
|
221
|
-
const indent = i - startIndex + countOfSpaces
|
|
222
|
-
|
|
223
|
-
// Try to resolve task status.
|
|
224
|
-
let status: TaskStatus | null = null
|
|
225
|
-
if (enableTaskListItem) {
|
|
226
|
-
;({ status, nextIndex } = eatTaskStatus(nodePoints, nextIndex, endIndex))
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
const token: IToken = {
|
|
230
|
-
nodeType: ListType,
|
|
231
|
-
position: {
|
|
232
|
-
start: calcStartPoint(nodePoints, startIndex),
|
|
233
|
-
end: calcEndPoint(nodePoints, nextIndex - 1),
|
|
234
|
-
},
|
|
235
|
-
ordered,
|
|
236
|
-
marker,
|
|
237
|
-
orderType: ordered ? orderType : undefined,
|
|
238
|
-
order: ordered ? order : undefined,
|
|
239
|
-
indent,
|
|
240
|
-
countOfTopBlankLine,
|
|
241
|
-
children: [],
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
if (status != null) token.status = status
|
|
245
|
-
return { token, nextIndex }
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
function eatAndInterruptPreviousSibling(
|
|
249
|
-
line: Readonly<IPhrasingContentLine>,
|
|
250
|
-
prevSiblingToken: Readonly<IBlockToken>,
|
|
251
|
-
): IResultOfEatAndInterruptPreviousSibling<T, IToken> {
|
|
252
|
-
/**
|
|
253
|
-
* ListItem can interrupt Paragraph
|
|
254
|
-
* @see https://github.github.com/gfm/#list-items Basic case Exceptions 1
|
|
255
|
-
*/
|
|
256
|
-
const result = eatOpener(line)
|
|
257
|
-
if (result == null) return null
|
|
258
|
-
const { token, nextIndex } = result
|
|
259
|
-
|
|
260
|
-
/**
|
|
261
|
-
* But an empty list item cannot interrupt a paragraph
|
|
262
|
-
* @see https://github.github.com/gfm/#example-263
|
|
263
|
-
*/
|
|
264
|
-
if (emptyItemCouldNotInterruptedTypes.includes(prevSiblingToken.nodeType)) {
|
|
265
|
-
if (token.indent === line.endIndex - line.startIndex) {
|
|
266
|
-
return null
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
/**
|
|
270
|
-
* In order to solve of unwanted lists in paragraphs with hard-wrapped
|
|
271
|
-
* numerals, we allow only lists starting with 1 to interrupt paragraphs
|
|
272
|
-
* @see https://github.github.com/gfm/#example-284
|
|
273
|
-
*/
|
|
274
|
-
if (token.ordered && token.order !== 1) return null
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
return { token, nextIndex, remainingSibling: prevSiblingToken }
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
function eatContinuationText(
|
|
281
|
-
line: Readonly<IPhrasingContentLine>,
|
|
282
|
-
token: IToken,
|
|
283
|
-
): IResultOfEatContinuationText {
|
|
284
|
-
const { startIndex, endIndex, firstNonWhitespaceIndex, countOfPrecedeSpaces: indent } = line
|
|
285
|
-
|
|
286
|
-
/**
|
|
287
|
-
* A list item can begin with at most one blank line
|
|
288
|
-
* @see https://github.github.com/gfm/#example-258
|
|
289
|
-
*/
|
|
290
|
-
if (firstNonWhitespaceIndex < endIndex && indent < token.indent) {
|
|
291
|
-
return { status: 'notMatched' }
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
/**
|
|
295
|
-
* When encountering a blank line, it consumes at most indent characters
|
|
296
|
-
* and cannot exceed the newline character
|
|
297
|
-
* @see https://github.github.com/gfm/#example-242
|
|
298
|
-
* @see https://github.github.com/gfm/#example-298
|
|
299
|
-
*/
|
|
300
|
-
if (firstNonWhitespaceIndex >= endIndex) {
|
|
301
|
-
if (token.countOfTopBlankLine >= 0) {
|
|
302
|
-
// eslint-disable-next-line no-param-reassign
|
|
303
|
-
token.countOfTopBlankLine += 1
|
|
304
|
-
if (token.countOfTopBlankLine > 1) {
|
|
305
|
-
return { status: 'notMatched' }
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
|
-
} else {
|
|
309
|
-
// eslint-disable-next-line no-param-reassign
|
|
310
|
-
token.countOfTopBlankLine = -1
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
const nextIndex = Math.min(startIndex + token.indent, endIndex - 1)
|
|
314
|
-
return { status: 'opening', nextIndex }
|
|
315
|
-
}
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
/**
|
|
319
|
-
* A task list item is a list item where the first block in it is a paragraph
|
|
320
|
-
* which begins with a task list item marker and at least one whitespace
|
|
321
|
-
* character before any other content.
|
|
322
|
-
*
|
|
323
|
-
* A task list item marker consists of an optional number of spaces, a left
|
|
324
|
-
* bracket ([), either a whitespace character or the letter x in either
|
|
325
|
-
* lowercase or uppercase, and then a right bracket (]).
|
|
326
|
-
*
|
|
327
|
-
* @param nodePoints
|
|
328
|
-
* @param startIndex
|
|
329
|
-
* @param endIndex
|
|
330
|
-
* @see https://github.github.com/gfm/#task-list-item
|
|
331
|
-
*/
|
|
332
|
-
function eatTaskStatus(
|
|
333
|
-
nodePoints: ReadonlyArray<INodePoint>,
|
|
334
|
-
startIndex: number,
|
|
335
|
-
endIndex: number,
|
|
336
|
-
): { status: TaskStatus | null; nextIndex: number } {
|
|
337
|
-
let i = startIndex
|
|
338
|
-
for (; i < endIndex; ++i) {
|
|
339
|
-
const c = nodePoints[i].codePoint
|
|
340
|
-
if (!isSpaceCharacter(c)) break
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
if (
|
|
344
|
-
i + 3 >= endIndex ||
|
|
345
|
-
nodePoints[i].codePoint !== AsciiCodePoint.OPEN_BRACKET ||
|
|
346
|
-
nodePoints[i + 2].codePoint !== AsciiCodePoint.CLOSE_BRACKET ||
|
|
347
|
-
!isWhitespaceCharacter(nodePoints[i + 3].codePoint)
|
|
348
|
-
)
|
|
349
|
-
return { status: null, nextIndex: startIndex }
|
|
350
|
-
|
|
351
|
-
let status: TaskStatus | undefined
|
|
352
|
-
const c = nodePoints[i + 1].codePoint
|
|
353
|
-
switch (c) {
|
|
354
|
-
case AsciiCodePoint.SPACE:
|
|
355
|
-
status = TaskStatus.TODO
|
|
356
|
-
break
|
|
357
|
-
case AsciiCodePoint.MINUS_SIGN:
|
|
358
|
-
status = TaskStatus.DOING
|
|
359
|
-
break
|
|
360
|
-
case AsciiCodePoint.LOWERCASE_X:
|
|
361
|
-
case AsciiCodePoint.UPPERCASE_X:
|
|
362
|
-
status = TaskStatus.DONE
|
|
363
|
-
break
|
|
364
|
-
default:
|
|
365
|
-
return { status: null, nextIndex: startIndex }
|
|
366
|
-
}
|
|
367
|
-
return { status, nextIndex: i + 4 }
|
|
368
|
-
}
|
package/src/parse.ts
DELETED
|
@@ -1,130 +0,0 @@
|
|
|
1
|
-
import type { ListItem, Node, Paragraph, Position } from '@yozora/ast'
|
|
2
|
-
import { ListItemType, ListType, ParagraphType } from '@yozora/ast'
|
|
3
|
-
import type { IParseBlockHookCreator, IParseBlockPhaseApi } from '@yozora/core-tokenizer'
|
|
4
|
-
import type { INode, IThis, IToken, T } from './types'
|
|
5
|
-
|
|
6
|
-
export const parse: IParseBlockHookCreator<T, IToken, INode, IThis> = function (api) {
|
|
7
|
-
return {
|
|
8
|
-
parse: tokens => {
|
|
9
|
-
const results: INode[] = []
|
|
10
|
-
let listItemTokens: IToken[] = []
|
|
11
|
-
for (let i = 0; i < tokens.length; ++i) {
|
|
12
|
-
const originalToken = tokens[i]
|
|
13
|
-
if (
|
|
14
|
-
listItemTokens.length <= 0 ||
|
|
15
|
-
listItemTokens[0].ordered !== originalToken.ordered ||
|
|
16
|
-
listItemTokens[0].orderType !== originalToken.orderType ||
|
|
17
|
-
listItemTokens[0].marker !== originalToken.marker
|
|
18
|
-
) {
|
|
19
|
-
const node: INode | null = resolveList(listItemTokens, api)
|
|
20
|
-
if (node) results.push(node)
|
|
21
|
-
|
|
22
|
-
listItemTokens = [originalToken]
|
|
23
|
-
continue
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* Otherwise the current item should be a child of the originalToken,
|
|
28
|
-
* and the originalToken should be removed from the
|
|
29
|
-
* BlockTokenizerPostMatchPhaseStateTree
|
|
30
|
-
*/
|
|
31
|
-
listItemTokens.push(originalToken)
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
const node: INode | null = resolveList(listItemTokens, api)
|
|
35
|
-
if (node) results.push(node)
|
|
36
|
-
return results
|
|
37
|
-
},
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
/**
|
|
42
|
-
* A list is loose if any of its constituent list items are separated by
|
|
43
|
-
* blank lines, or if any of its constituent list items directly contain
|
|
44
|
-
* two block-level elements with a blank line between them. Otherwise a
|
|
45
|
-
* list is tight. (The difference in HTML output is that paragraphs in a
|
|
46
|
-
* loose list are wrapped in <p> tags, while paragraphs in a tight list
|
|
47
|
-
* are not.)
|
|
48
|
-
*
|
|
49
|
-
* If list is not loose, traverse the list-items, for the list-item whose
|
|
50
|
-
* first child node is Paragraph, convert the first node in this list-item
|
|
51
|
-
* to Phrasing content
|
|
52
|
-
* @see https://github.com/syntax-tree/mdast#phrasingcontent
|
|
53
|
-
*/
|
|
54
|
-
const resolveList = (tokens: IToken[], api: IParseBlockPhaseApi): INode | null => {
|
|
55
|
-
if (tokens.length <= 0) return null
|
|
56
|
-
|
|
57
|
-
let spread = tokens.some((item): boolean => {
|
|
58
|
-
if (item.children == null || item.children.length <= 1) return false
|
|
59
|
-
|
|
60
|
-
let previousPosition: Position = item.children[0].position
|
|
61
|
-
for (let j = 1; j < item.children.length; ++j) {
|
|
62
|
-
const currentPosition: Position = item.children[j].position
|
|
63
|
-
if (previousPosition.end.line + 1 < currentPosition.start.line) {
|
|
64
|
-
return true
|
|
65
|
-
}
|
|
66
|
-
previousPosition = currentPosition
|
|
67
|
-
}
|
|
68
|
-
return false
|
|
69
|
-
})
|
|
70
|
-
|
|
71
|
-
if (!spread && tokens.length > 1) {
|
|
72
|
-
let previousItem = tokens[0]
|
|
73
|
-
for (let i = 1; i < tokens.length; ++i) {
|
|
74
|
-
const currentItem = tokens[i]
|
|
75
|
-
|
|
76
|
-
// If there exists blank line between list items, then the list is loose.
|
|
77
|
-
if (previousItem.position.end.line + 1 < currentItem.position.start.line) {
|
|
78
|
-
spread = true
|
|
79
|
-
break
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
previousItem = currentItem
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
const children: ListItem[] = tokens.map((listItemToken): ListItem => {
|
|
87
|
-
// Make list tighter if spread is false.
|
|
88
|
-
const nodes: Node[] = api.parseBlockTokens(listItemToken.children)
|
|
89
|
-
const children: Node[] = spread
|
|
90
|
-
? nodes
|
|
91
|
-
: nodes
|
|
92
|
-
.map(node => (node.type === ParagraphType ? (node as Paragraph).children : node))
|
|
93
|
-
.flat()
|
|
94
|
-
|
|
95
|
-
const listItem: ListItem = api.shouldReservePosition
|
|
96
|
-
? {
|
|
97
|
-
type: ListItemType,
|
|
98
|
-
position: listItemToken.position,
|
|
99
|
-
status: listItemToken.status,
|
|
100
|
-
children,
|
|
101
|
-
}
|
|
102
|
-
: { type: ListItemType, status: listItemToken.status, children }
|
|
103
|
-
return listItem
|
|
104
|
-
})
|
|
105
|
-
|
|
106
|
-
const node: INode = api.shouldReservePosition
|
|
107
|
-
? {
|
|
108
|
-
type: ListType,
|
|
109
|
-
position: {
|
|
110
|
-
start: { ...tokens[0].position.start },
|
|
111
|
-
end: { ...tokens[tokens.length - 1].position.end },
|
|
112
|
-
},
|
|
113
|
-
ordered: tokens[0].ordered,
|
|
114
|
-
orderType: tokens[0].orderType,
|
|
115
|
-
start: tokens[0].order,
|
|
116
|
-
marker: tokens[0].marker,
|
|
117
|
-
spread,
|
|
118
|
-
children,
|
|
119
|
-
}
|
|
120
|
-
: {
|
|
121
|
-
type: ListType,
|
|
122
|
-
ordered: tokens[0].ordered,
|
|
123
|
-
orderType: tokens[0].orderType,
|
|
124
|
-
start: tokens[0].order,
|
|
125
|
-
marker: tokens[0].marker,
|
|
126
|
-
spread,
|
|
127
|
-
children,
|
|
128
|
-
}
|
|
129
|
-
return node
|
|
130
|
-
}
|
package/src/tokenizer.ts
DELETED
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
import type { NodeType } from '@yozora/ast'
|
|
2
|
-
import { ParagraphType } from '@yozora/ast'
|
|
3
|
-
import type {
|
|
4
|
-
IBlockTokenizer,
|
|
5
|
-
IMatchBlockHookCreator,
|
|
6
|
-
IParseBlockHookCreator,
|
|
7
|
-
} from '@yozora/core-tokenizer'
|
|
8
|
-
import { BaseBlockTokenizer, TokenizerPriority } from '@yozora/core-tokenizer'
|
|
9
|
-
import { match } from './match'
|
|
10
|
-
import { parse } from './parse'
|
|
11
|
-
import type { INode, IThis, IToken, ITokenizerProps, T } from './types'
|
|
12
|
-
import { uniqueName } from './types'
|
|
13
|
-
|
|
14
|
-
/**
|
|
15
|
-
* Lexical Analyzer for List.
|
|
16
|
-
*
|
|
17
|
-
* A list is a sequence of one or more list items of the same type.
|
|
18
|
-
* The list items may be separated by any number of blank lines.
|
|
19
|
-
*
|
|
20
|
-
* @see https://github.com/syntax-tree/mdast#list
|
|
21
|
-
* @see https://github.github.com/gfm/#list
|
|
22
|
-
*/
|
|
23
|
-
export class ListTokenizer
|
|
24
|
-
extends BaseBlockTokenizer<T, IToken, INode, IThis>
|
|
25
|
-
implements IBlockTokenizer<T, IToken, INode, IThis>
|
|
26
|
-
{
|
|
27
|
-
/* istanbul ignore next */
|
|
28
|
-
constructor(props: ITokenizerProps = {}) {
|
|
29
|
-
super({
|
|
30
|
-
name: props.name ?? uniqueName,
|
|
31
|
-
priority: props.priority ?? TokenizerPriority.CONTAINING_BLOCK,
|
|
32
|
-
})
|
|
33
|
-
this.enableTaskListItem = props.enableTaskListItem ?? false
|
|
34
|
-
this.emptyItemCouldNotInterruptedTypes = props.emptyItemCouldNotInterruptedTypes ?? [
|
|
35
|
-
ParagraphType,
|
|
36
|
-
]
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
public readonly enableTaskListItem: boolean
|
|
40
|
-
public readonly emptyItemCouldNotInterruptedTypes: ReadonlyArray<NodeType>
|
|
41
|
-
|
|
42
|
-
public override readonly match: IMatchBlockHookCreator<T, IToken, IThis> = match
|
|
43
|
-
|
|
44
|
-
public override readonly parse: IParseBlockHookCreator<T, IToken, INode, IThis> = parse
|
|
45
|
-
}
|
package/src/types.ts
DELETED
|
@@ -1,76 +0,0 @@
|
|
|
1
|
-
import type { List, ListType, NodeType, TaskStatus } from '@yozora/ast'
|
|
2
|
-
import type {
|
|
3
|
-
IBaseBlockTokenizerProps,
|
|
4
|
-
IBlockToken,
|
|
5
|
-
IPartialBlockToken,
|
|
6
|
-
ITokenizer,
|
|
7
|
-
} from '@yozora/core-tokenizer'
|
|
8
|
-
|
|
9
|
-
export type T = ListType
|
|
10
|
-
export type INode = List
|
|
11
|
-
export const uniqueName = '@yozora/tokenizer-list'
|
|
12
|
-
|
|
13
|
-
export interface IToken extends IPartialBlockToken<T> {
|
|
14
|
-
/**
|
|
15
|
-
* Is it an ordered list item.
|
|
16
|
-
*/
|
|
17
|
-
ordered: boolean
|
|
18
|
-
/**
|
|
19
|
-
* Marker of bullet list-item, or a delimiter of ordered list-item.
|
|
20
|
-
*/
|
|
21
|
-
marker: number
|
|
22
|
-
/**
|
|
23
|
-
* Marker type of the list.
|
|
24
|
-
* @see https://developer.mozilla.org/en-US/docs/Web/HTML/Element/ol#attr-type
|
|
25
|
-
*/
|
|
26
|
-
orderType?: '1' | 'a' | 'A' | 'i' | 'I'
|
|
27
|
-
/**
|
|
28
|
-
* Serial number of ordered list-item.
|
|
29
|
-
*/
|
|
30
|
-
order?: number
|
|
31
|
-
/**
|
|
32
|
-
* Status of a todo task.
|
|
33
|
-
*/
|
|
34
|
-
status?: TaskStatus
|
|
35
|
-
/**
|
|
36
|
-
* Indent of a list item.
|
|
37
|
-
*/
|
|
38
|
-
indent: number
|
|
39
|
-
/**
|
|
40
|
-
* list-item 起始的空行数量
|
|
41
|
-
* The number of blank lines at the beginning of a list-item
|
|
42
|
-
*/
|
|
43
|
-
countOfTopBlankLine: number
|
|
44
|
-
/**
|
|
45
|
-
* Child token nodes.
|
|
46
|
-
*/
|
|
47
|
-
children: IBlockToken[]
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
export interface IThis extends ITokenizer {
|
|
51
|
-
/**
|
|
52
|
-
* Specify an array of Node types that could not be interrupted
|
|
53
|
-
* by this ITokenizer if the current list-item is empty.
|
|
54
|
-
* @see https://github.github.com/gfm/#example-263
|
|
55
|
-
*/
|
|
56
|
-
readonly emptyItemCouldNotInterruptedTypes: ReadonlyArray<NodeType>
|
|
57
|
-
|
|
58
|
-
/**
|
|
59
|
-
* Should enable task list item (extension).
|
|
60
|
-
*/
|
|
61
|
-
readonly enableTaskListItem: boolean
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
export interface ITokenizerProps extends Partial<IBaseBlockTokenizerProps> {
|
|
65
|
-
/**
|
|
66
|
-
* Specify an array of Node types that could not be interrupted
|
|
67
|
-
* by this ITokenizer if the current list-item is empty.
|
|
68
|
-
* @see https://github.github.com/gfm/#example-263
|
|
69
|
-
*/
|
|
70
|
-
readonly emptyItemCouldNotInterruptedTypes?: NodeType[]
|
|
71
|
-
|
|
72
|
-
/**
|
|
73
|
-
* Should enable task list item (extension).
|
|
74
|
-
*/
|
|
75
|
-
readonly enableTaskListItem?: boolean
|
|
76
|
-
}
|