@yozora/tokenizer-list 2.0.3 → 2.0.5-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -255,8 +255,8 @@ const resolveList = (tokens, api) => {
255
255
  ? {
256
256
  type: ast.ListType,
257
257
  position: {
258
- start: Object.assign({}, tokens[0].position.start),
259
- end: Object.assign({}, tokens[tokens.length - 1].position.end),
258
+ start: { ...tokens[0].position.start },
259
+ end: { ...tokens[tokens.length - 1].position.end },
260
260
  },
261
261
  ordered: tokens[0].ordered,
262
262
  orderType: tokens[0].orderType,
@@ -281,20 +281,21 @@ const uniqueName = '@yozora/tokenizer-list';
281
281
 
282
282
  class ListTokenizer extends coreTokenizer.BaseBlockTokenizer {
283
283
  constructor(props = {}) {
284
- var _a, _b, _c, _d;
285
284
  super({
286
- name: (_a = props.name) !== null && _a !== void 0 ? _a : uniqueName,
287
- priority: (_b = props.priority) !== null && _b !== void 0 ? _b : coreTokenizer.TokenizerPriority.CONTAINING_BLOCK,
285
+ name: props.name ?? uniqueName,
286
+ priority: props.priority ?? coreTokenizer.TokenizerPriority.CONTAINING_BLOCK,
288
287
  });
289
- this.match = match;
290
- this.parse = parse;
291
- this.enableTaskListItem = (_c = props.enableTaskListItem) !== null && _c !== void 0 ? _c : false;
292
- this.emptyItemCouldNotInterruptedTypes = (_d = props.emptyItemCouldNotInterruptedTypes) !== null && _d !== void 0 ? _d : [
288
+ this.enableTaskListItem = props.enableTaskListItem ?? false;
289
+ this.emptyItemCouldNotInterruptedTypes = props.emptyItemCouldNotInterruptedTypes ?? [
293
290
  ast.ParagraphType,
294
291
  ];
295
292
  }
293
+ enableTaskListItem;
294
+ emptyItemCouldNotInterruptedTypes;
295
+ match = match;
296
+ parse = parse;
296
297
  }
297
298
 
298
299
  exports.ListTokenizer = ListTokenizer;
299
300
  exports.ListTokenizerName = uniqueName;
300
- exports["default"] = ListTokenizer;
301
+ exports.default = ListTokenizer;
@@ -251,8 +251,8 @@ const resolveList = (tokens, api) => {
251
251
  ? {
252
252
  type: ListType,
253
253
  position: {
254
- start: Object.assign({}, tokens[0].position.start),
255
- end: Object.assign({}, tokens[tokens.length - 1].position.end),
254
+ start: { ...tokens[0].position.start },
255
+ end: { ...tokens[tokens.length - 1].position.end },
256
256
  },
257
257
  ordered: tokens[0].ordered,
258
258
  orderType: tokens[0].orderType,
@@ -277,18 +277,19 @@ const uniqueName = '@yozora/tokenizer-list';
277
277
 
278
278
  class ListTokenizer extends BaseBlockTokenizer {
279
279
  constructor(props = {}) {
280
- var _a, _b, _c, _d;
281
280
  super({
282
- name: (_a = props.name) !== null && _a !== void 0 ? _a : uniqueName,
283
- priority: (_b = props.priority) !== null && _b !== void 0 ? _b : TokenizerPriority.CONTAINING_BLOCK,
281
+ name: props.name ?? uniqueName,
282
+ priority: props.priority ?? TokenizerPriority.CONTAINING_BLOCK,
284
283
  });
285
- this.match = match;
286
- this.parse = parse;
287
- this.enableTaskListItem = (_c = props.enableTaskListItem) !== null && _c !== void 0 ? _c : false;
288
- this.emptyItemCouldNotInterruptedTypes = (_d = props.emptyItemCouldNotInterruptedTypes) !== null && _d !== void 0 ? _d : [
284
+ this.enableTaskListItem = props.enableTaskListItem ?? false;
285
+ this.emptyItemCouldNotInterruptedTypes = props.emptyItemCouldNotInterruptedTypes ?? [
289
286
  ParagraphType,
290
287
  ];
291
288
  }
289
+ enableTaskListItem;
290
+ emptyItemCouldNotInterruptedTypes;
291
+ match = match;
292
+ parse = parse;
292
293
  }
293
294
 
294
295
  export { ListTokenizer, uniqueName as ListTokenizerName, ListTokenizer as default };
@@ -1,8 +1,8 @@
1
1
  import { TaskStatus, NodeType, ListType, List } from '@yozora/ast';
2
2
  import { IPartialYastBlockToken, IYastBlockToken, IBaseBlockTokenizerProps, ITokenizer, BaseBlockTokenizer, IBlockTokenizer, IMatchBlockHookCreator, IParseBlockHookCreator } from '@yozora/core-tokenizer';
3
3
 
4
- declare type T = ListType;
5
- declare type INode = List;
4
+ type T = ListType;
5
+ type INode = List;
6
6
  declare const uniqueName = "@yozora/tokenizer-list";
7
7
  interface IToken extends IPartialYastBlockToken<T> {
8
8
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yozora/tokenizer-list",
3
- "version": "2.0.3",
3
+ "version": "2.0.5-alpha.0",
4
4
  "author": {
5
5
  "name": "guanghechen",
6
6
  "url": "https://github.com/guanghechen/"
@@ -11,33 +11,37 @@
11
11
  "directory": "tokenizers/list"
12
12
  },
13
13
  "homepage": "https://github.com/yozorajs/yozora/tree/release-2.x.x/tokenizers/list",
14
- "main": "lib/cjs/index.js",
15
- "module": "lib/esm/index.js",
16
- "types": "lib/types/index.d.ts",
17
- "source": "src/index.ts",
14
+ "type": "module",
15
+ "exports": {
16
+ "types": "./lib/types/index.d.ts",
17
+ "import": "./lib/esm/index.mjs",
18
+ "require": "./lib/cjs/index.cjs"
19
+ },
20
+ "source": "./src/index.ts",
21
+ "types": "./lib/types/index.d.ts",
22
+ "main": "./lib/cjs/index.cjs",
23
+ "module": "./lib/esm/index.mjs",
18
24
  "license": "MIT",
19
25
  "engines": {
20
26
  "node": ">= 16.0.0"
21
27
  },
22
28
  "files": [
23
29
  "lib/",
24
- "!lib/**/*.js.map",
25
- "!lib/**/*.d.ts.map",
30
+ "src/",
26
31
  "package.json",
27
32
  "CHANGELOG.md",
28
33
  "LICENSE",
29
34
  "README.md"
30
35
  ],
31
36
  "scripts": {
32
- "build": "cross-env NODE_ENV=production rollup -c ../../rollup.config.js",
33
- "prebuild": "rimraf lib/",
37
+ "build": "rimraf lib/ && cross-env NODE_ENV=production rollup -c ../../rollup.config.mjs",
34
38
  "prepublishOnly": "cross-env ROLLUP_SHOULD_SOURCEMAP=false yarn build",
35
- "test": "cross-env TS_NODE_FILES=true jest --config ../../jest.config.js --rootDir ."
39
+ "test": "cross-env TS_NODE_FILES=true NODE_OPTIONS=--experimental-vm-modules jest --config ../../jest.config.mjs --rootDir ."
36
40
  },
37
41
  "dependencies": {
38
- "@yozora/ast": "^2.0.3",
39
- "@yozora/character": "^2.0.3",
40
- "@yozora/core-tokenizer": "^2.0.3"
42
+ "@yozora/ast": "^2.0.5-alpha.0",
43
+ "@yozora/character": "^2.0.5-alpha.0",
44
+ "@yozora/core-tokenizer": "^2.0.5-alpha.0"
41
45
  },
42
- "gitHead": "8cc8f95cfebc8d752bc3272cdd24965f540c130b"
46
+ "gitHead": "8bf941fe4ef82947165b0f3cc123cd493665e13b"
43
47
  }
package/src/index.ts ADDED
@@ -0,0 +1,3 @@
1
+ export { ListTokenizer, ListTokenizer as default } from './tokenizer'
2
+ export { uniqueName as ListTokenizerName } from './types'
3
+ export type { IToken as IListToken, ITokenizerProps as IListTokenizerProps } from './types'
package/src/match.ts ADDED
@@ -0,0 +1,368 @@
1
+ import { ListType, TaskStatus } from '@yozora/ast'
2
+ import type { INodePoint } from '@yozora/character'
3
+ import {
4
+ AsciiCodePoint,
5
+ VirtualCodePoint,
6
+ isAsciiDigitCharacter,
7
+ isAsciiLowerLetter,
8
+ isAsciiUpperLetter,
9
+ isSpaceCharacter,
10
+ isWhitespaceCharacter,
11
+ } from '@yozora/character'
12
+ import type {
13
+ IMatchBlockHookCreator,
14
+ IPhrasingContentLine,
15
+ IResultOfEatAndInterruptPreviousSibling,
16
+ IResultOfEatContinuationText,
17
+ IResultOfEatOpener,
18
+ IYastBlockToken,
19
+ } from '@yozora/core-tokenizer'
20
+ import { calcEndPoint, calcStartPoint } from '@yozora/core-tokenizer'
21
+ import type { IThis, IToken, T } from './types'
22
+
23
+ /**
24
+ * The following rules define list items:
25
+ * - Basic case. If a sequence of lines Ls constitute a sequence of blocks Bs
26
+ * starting with a non-whitespace character, and M is a list marker of width
27
+ * W followed by 1 ≤ N ≤ 4 spaces, then the result of prepending M and the
28
+ * following spaces to the first line of Ls, and indenting subsequent lines
29
+ * of Ls by W + N spaces, is a list item with Bs as its contents. The type
30
+ * of the list item (bullet or ordered) is determined by the type of its
31
+ * list marker. If the list item is ordered, then it is also assigned a
32
+ * start number, based on the ordered list marker.
33
+ *
34
+ * Exceptions:
35
+ * - When the first list item in a list interrupts a paragraph—that is,
36
+ * when it starts on a line that would otherwise count as paragraph
37
+ * continuation text—then
38
+ * (a) the lines Ls must not begin with a blank line, and
39
+ * (b) if the list item is ordered, the start number must be 1.
40
+ * - If any line is a thematic break then that line is not a list item.
41
+ *
42
+ * @see https://github.com/syntax-tree/mdast#listitem
43
+ * @see https://github.github.com/gfm/#list-items
44
+ */
45
+
46
+ export const match: IMatchBlockHookCreator<T, IToken, IThis> = function () {
47
+ const { emptyItemCouldNotInterruptedTypes, enableTaskListItem } = this
48
+
49
+ return {
50
+ isContainingBlock: true,
51
+ eatOpener,
52
+ eatAndInterruptPreviousSibling,
53
+ eatContinuationText,
54
+ }
55
+
56
+ function eatOpener(line: Readonly<IPhrasingContentLine>): IResultOfEatOpener<T, IToken> {
57
+ /**
58
+ * Four spaces are too much.
59
+ * @see https://github.github.com/gfm/#example-253
60
+ */
61
+ if (line.countOfPrecedeSpaces >= 4) return null
62
+
63
+ const { nodePoints, startIndex, endIndex, firstNonWhitespaceIndex } = line
64
+ if (firstNonWhitespaceIndex >= endIndex) return null
65
+
66
+ let ordered = false
67
+ let marker: number | null = null
68
+ let orderType: '1' | 'a' | 'A' | 'i' | 'I' | undefined
69
+ let order: number | undefined
70
+ let i = firstNonWhitespaceIndex
71
+ let c = nodePoints[i].codePoint
72
+
73
+ /**
74
+ * Try to resolve an ordered list-item.
75
+ *
76
+ * An ordered list marker is a sequence of 1–9 arabic digits (0-9),
77
+ * followed by either a . character or a ) character. (The reason
78
+ * for the length limit is that with 10 digits we start seeing integer
79
+ * overflows in some browsers.)
80
+ * @see https://github.github.com/gfm/#ordered-list-marker
81
+ *
82
+ * Extension: /[a-z]/ and /[A-Z]/ and [iv]+ also could be consisted the
83
+ * marker of an ordered list.
84
+ */
85
+ if (i + 1 < endIndex) {
86
+ // TODO Support roman numerals.
87
+ const c0 = c
88
+ if (isAsciiDigitCharacter(c0)) {
89
+ orderType = '1'
90
+ let v = c0 - AsciiCodePoint.DIGIT0
91
+ for (i += 1; i < endIndex; ++i) {
92
+ c = nodePoints[i].codePoint
93
+ if (!isAsciiDigitCharacter(c)) break
94
+ v = v * 10 + c - AsciiCodePoint.DIGIT0
95
+ }
96
+ order = v
97
+ orderType = '1'
98
+ } else if (isAsciiLowerLetter(c0)) {
99
+ i += 1
100
+ c = nodePoints[i].codePoint
101
+ order = c0 - AsciiCodePoint.LOWERCASE_A + 1
102
+ orderType = 'a'
103
+ } else if (isAsciiUpperLetter(c0)) {
104
+ i += 1
105
+ c = nodePoints[i].codePoint
106
+ order = c0 - AsciiCodePoint.UPPERCASE_A + 1
107
+ orderType = 'A'
108
+ }
109
+
110
+ // eat '.' / ')'
111
+ if (
112
+ i > firstNonWhitespaceIndex &&
113
+ i - firstNonWhitespaceIndex <= 9 &&
114
+ (c === AsciiCodePoint.DOT || c === AsciiCodePoint.CLOSE_PARENTHESIS)
115
+ ) {
116
+ i += 1
117
+ ordered = true
118
+ marker = c
119
+ }
120
+ }
121
+
122
+ /**
123
+ * Try to resolve a bullet list-item.
124
+ *
125
+ * A bullet list marker is a -, +, or * character.
126
+ * @see https://github.github.com/gfm/#bullet-list-marker
127
+ */
128
+ if (!ordered) {
129
+ if (
130
+ c === AsciiCodePoint.PLUS_SIGN ||
131
+ c === AsciiCodePoint.MINUS_SIGN ||
132
+ c === AsciiCodePoint.ASTERISK
133
+ ) {
134
+ i += 1
135
+ marker = c
136
+ }
137
+ }
138
+
139
+ if (marker == null) return null
140
+
141
+ /**
142
+ * When the list-item mark followed by a tab, it is treated as if it were
143
+ * expanded into three spaces.
144
+ *
145
+ * @see https://github.github.com/gfm/#example-7
146
+ */
147
+ let countOfSpaces = 0,
148
+ nextIndex = i
149
+ if (nextIndex < endIndex) {
150
+ c = nodePoints[nextIndex].codePoint
151
+ if (c === VirtualCodePoint.SPACE) nextIndex += 1
152
+ }
153
+
154
+ /**
155
+ * #Rule1 Basic case
156
+ *
157
+ * If a sequence of lines Ls constitute a sequence of blocks Bs starting
158
+ * with a non-whitespace character, and M is a list marker of width W
159
+ * followed by 1 ≤ N ≤ 4 spaces, then the result of prepending M and the
160
+ * following spaces to the first line of Ls, and indenting subsequent
161
+ * lines of Ls by W + N spaces, is a list item with Bs as its contents.
162
+ * The type of the list item (bullet or ordered) is determined by the
163
+ * type of its list marker. If the list item is ordered, then it is also
164
+ * assigned a start number, based on the ordered list marker.
165
+ * @see https://github.github.com/gfm/#list-items Basic case
166
+ */
167
+ for (; nextIndex < endIndex; ++nextIndex) {
168
+ c = nodePoints[nextIndex].codePoint
169
+ if (!isSpaceCharacter(c)) break
170
+ countOfSpaces += 1
171
+ }
172
+
173
+ /**
174
+ * Rule#2 Item starting with indented code.
175
+ *
176
+ * If a sequence of lines Ls constitute a sequence of blocks Bs starting
177
+ * with an indented code block, and M is a list marker of width W followed
178
+ * by one space, then the result of prepending M and the following space to
179
+ * the first line of Ls, and indenting subsequent lines of Ls by W + 1 spaces,
180
+ * is a list item with Bs as its contents. If a line is empty, then it need
181
+ * not be indented. The type of the list item (bullet or ordered) is
182
+ * determined by the type of its list marker. If the list item is ordered,
183
+ * then it is also assigned a start number, based on the ordered list marker.
184
+ * @see https://github.github.com/gfm/#list-items Item starting with indented code.
185
+ */
186
+ if (countOfSpaces > 4) {
187
+ nextIndex -= countOfSpaces - 1
188
+ countOfSpaces = 1
189
+ }
190
+
191
+ /**
192
+ * Rule#3 Item starting with a blank line.
193
+ *
194
+ * If a sequence of lines Ls starting with a single blank line constitute
195
+ * a (possibly empty) sequence of blocks Bs, not separated from each other
196
+ * by more than one blank line, and M is a list marker of width W, then the
197
+ * result of prepending M to the first line of Ls, and indenting subsequent
198
+ * lines of Ls by W + 1 spaces, is a list item with Bs as its contents.
199
+ * If a line is empty, then it need not be indented. The type of the list
200
+ * item (bullet or ordered) is determined by the type of its list marker.
201
+ * If the list item is ordered, then it is also assigned a start number,
202
+ * based on the ordered list marker.
203
+ * @see https://github.github.com/gfm/#list-items Item starting with a blank line
204
+ */
205
+ if (countOfSpaces === 0 && nextIndex < endIndex && c !== VirtualCodePoint.LINE_END) return null
206
+
207
+ const countOfTopBlankLine = c === VirtualCodePoint.LINE_END ? 1 : -1
208
+ if (c === VirtualCodePoint.LINE_END) {
209
+ nextIndex -= countOfSpaces - 1
210
+ countOfSpaces = 1
211
+ }
212
+
213
+ /**
214
+ * Rule#4 Indentation.
215
+ *
216
+ * If a sequence of lines Ls constitutes a list item according to rule #1,
217
+ * #2, or #3, then the result of indenting each line of Ls by 1-3 spaces
218
+ * (the same for each line) also constitutes a list item with the same
219
+ * contents and attributes. If a line is empty, then it need not be indented.
220
+ */
221
+ const indent = i - startIndex + countOfSpaces
222
+
223
+ // Try to resolve task status.
224
+ let status: TaskStatus | null = null
225
+ if (enableTaskListItem) {
226
+ ;({ status, nextIndex } = eatTaskStatus(nodePoints, nextIndex, endIndex))
227
+ }
228
+
229
+ const token: IToken = {
230
+ nodeType: ListType,
231
+ position: {
232
+ start: calcStartPoint(nodePoints, startIndex),
233
+ end: calcEndPoint(nodePoints, nextIndex - 1),
234
+ },
235
+ ordered,
236
+ marker,
237
+ orderType: ordered ? orderType : undefined,
238
+ order: ordered ? order : undefined,
239
+ indent,
240
+ countOfTopBlankLine,
241
+ children: [],
242
+ }
243
+
244
+ if (status != null) token.status = status
245
+ return { token, nextIndex }
246
+ }
247
+
248
+ function eatAndInterruptPreviousSibling(
249
+ line: Readonly<IPhrasingContentLine>,
250
+ prevSiblingToken: Readonly<IYastBlockToken>,
251
+ ): IResultOfEatAndInterruptPreviousSibling<T, IToken> {
252
+ /**
253
+ * ListItem can interrupt Paragraph
254
+ * @see https://github.github.com/gfm/#list-items Basic case Exceptions 1
255
+ */
256
+ const result = eatOpener(line)
257
+ if (result == null) return null
258
+ const { token, nextIndex } = result
259
+
260
+ /**
261
+ * But an empty list item cannot interrupt a paragraph
262
+ * @see https://github.github.com/gfm/#example-263
263
+ */
264
+ if (emptyItemCouldNotInterruptedTypes.includes(prevSiblingToken.nodeType)) {
265
+ if (token.indent === line.endIndex - line.startIndex) {
266
+ return null
267
+ }
268
+
269
+ /**
270
+ * In order to solve of unwanted lists in paragraphs with hard-wrapped
271
+ * numerals, we allow only lists starting with 1 to interrupt paragraphs
272
+ * @see https://github.github.com/gfm/#example-284
273
+ */
274
+ if (token.ordered && token.order !== 1) return null
275
+ }
276
+
277
+ return { token, nextIndex, remainingSibling: prevSiblingToken }
278
+ }
279
+
280
+ function eatContinuationText(
281
+ line: Readonly<IPhrasingContentLine>,
282
+ token: IToken,
283
+ ): IResultOfEatContinuationText {
284
+ const { startIndex, endIndex, firstNonWhitespaceIndex, countOfPrecedeSpaces: indent } = line
285
+
286
+ /**
287
+ * A list item can begin with at most one blank line
288
+ * @see https://github.github.com/gfm/#example-258
289
+ */
290
+ if (firstNonWhitespaceIndex < endIndex && indent < token.indent) {
291
+ return { status: 'notMatched' }
292
+ }
293
+
294
+ /**
295
+ * When encountering a blank line, it consumes at most indent characters
296
+ * and cannot exceed the newline character
297
+ * @see https://github.github.com/gfm/#example-242
298
+ * @see https://github.github.com/gfm/#example-298
299
+ */
300
+ if (firstNonWhitespaceIndex >= endIndex) {
301
+ if (token.countOfTopBlankLine >= 0) {
302
+ // eslint-disable-next-line no-param-reassign
303
+ token.countOfTopBlankLine += 1
304
+ if (token.countOfTopBlankLine > 1) {
305
+ return { status: 'notMatched' }
306
+ }
307
+ }
308
+ } else {
309
+ // eslint-disable-next-line no-param-reassign
310
+ token.countOfTopBlankLine = -1
311
+ }
312
+
313
+ const nextIndex = Math.min(startIndex + token.indent, endIndex - 1)
314
+ return { status: 'opening', nextIndex }
315
+ }
316
+ }
317
+
318
+ /**
319
+ * A task list item is a list item where the first block in it is a paragraph
320
+ * which begins with a task list item marker and at least one whitespace
321
+ * character before any other content.
322
+ *
323
+ * A task list item marker consists of an optional number of spaces, a left
324
+ * bracket ([), either a whitespace character or the letter x in either
325
+ * lowercase or uppercase, and then a right bracket (]).
326
+ *
327
+ * @param nodePoints
328
+ * @param startIndex
329
+ * @param endIndex
330
+ * @see https://github.github.com/gfm/#task-list-item
331
+ */
332
+ function eatTaskStatus(
333
+ nodePoints: ReadonlyArray<INodePoint>,
334
+ startIndex: number,
335
+ endIndex: number,
336
+ ): { status: TaskStatus | null; nextIndex: number } {
337
+ let i = startIndex
338
+ for (; i < endIndex; ++i) {
339
+ const c = nodePoints[i].codePoint
340
+ if (!isSpaceCharacter(c)) break
341
+ }
342
+
343
+ if (
344
+ i + 3 >= endIndex ||
345
+ nodePoints[i].codePoint !== AsciiCodePoint.OPEN_BRACKET ||
346
+ nodePoints[i + 2].codePoint !== AsciiCodePoint.CLOSE_BRACKET ||
347
+ !isWhitespaceCharacter(nodePoints[i + 3].codePoint)
348
+ )
349
+ return { status: null, nextIndex: startIndex }
350
+
351
+ let status: TaskStatus | undefined
352
+ const c = nodePoints[i + 1].codePoint
353
+ switch (c) {
354
+ case AsciiCodePoint.SPACE:
355
+ status = TaskStatus.TODO
356
+ break
357
+ case AsciiCodePoint.MINUS_SIGN:
358
+ status = TaskStatus.DOING
359
+ break
360
+ case AsciiCodePoint.LOWERCASE_X:
361
+ case AsciiCodePoint.UPPERCASE_X:
362
+ status = TaskStatus.DONE
363
+ break
364
+ default:
365
+ return { status: null, nextIndex: startIndex }
366
+ }
367
+ return { status, nextIndex: i + 4 }
368
+ }
package/src/parse.ts ADDED
@@ -0,0 +1,130 @@
1
+ import type { ListItem, Node, Paragraph, Position } from '@yozora/ast'
2
+ import { ListItemType, ListType, ParagraphType } from '@yozora/ast'
3
+ import type { IParseBlockHookCreator, IParseBlockPhaseApi } from '@yozora/core-tokenizer'
4
+ import type { INode, IThis, IToken, T } from './types'
5
+
6
+ export const parse: IParseBlockHookCreator<T, IToken, INode, IThis> = function (api) {
7
+ return {
8
+ parse: tokens => {
9
+ const results: INode[] = []
10
+ let listItemTokens: IToken[] = []
11
+ for (let i = 0; i < tokens.length; ++i) {
12
+ const originalToken = tokens[i]
13
+ if (
14
+ listItemTokens.length <= 0 ||
15
+ listItemTokens[0].ordered !== originalToken.ordered ||
16
+ listItemTokens[0].orderType !== originalToken.orderType ||
17
+ listItemTokens[0].marker !== originalToken.marker
18
+ ) {
19
+ const node: INode | null = resolveList(listItemTokens, api)
20
+ if (node) results.push(node)
21
+
22
+ listItemTokens = [originalToken]
23
+ continue
24
+ }
25
+
26
+ /**
27
+ * Otherwise the current item should be a child of the originalToken,
28
+ * and the originalToken should be removed from the
29
+ * BlockTokenizerPostMatchPhaseStateTree
30
+ */
31
+ listItemTokens.push(originalToken)
32
+ }
33
+
34
+ const node: INode | null = resolveList(listItemTokens, api)
35
+ if (node) results.push(node)
36
+ return results
37
+ },
38
+ }
39
+ }
40
+
41
+ /**
42
+ * A list is loose if any of its constituent list items are separated by
43
+ * blank lines, or if any of its constituent list items directly contain
44
+ * two block-level elements with a blank line between them. Otherwise a
45
+ * list is tight. (The difference in HTML output is that paragraphs in a
46
+ * loose list are wrapped in <p> tags, while paragraphs in a tight list
47
+ * are not.)
48
+ *
49
+ * If list is not loose, traverse the list-items, for the list-item whose
50
+ * first child node is Paragraph, convert the first node in this list-item
51
+ * to Phrasing content
52
+ * @see https://github.com/syntax-tree/mdast#phrasingcontent
53
+ */
54
+ const resolveList = (tokens: IToken[], api: IParseBlockPhaseApi): INode | null => {
55
+ if (tokens.length <= 0) return null
56
+
57
+ let spread = tokens.some((item): boolean => {
58
+ if (item.children == null || item.children.length <= 1) return false
59
+
60
+ let previousPosition: Position = item.children[0].position
61
+ for (let j = 1; j < item.children.length; ++j) {
62
+ const currentPosition: Position = item.children[j].position
63
+ if (previousPosition.end.line + 1 < currentPosition.start.line) {
64
+ return true
65
+ }
66
+ previousPosition = currentPosition
67
+ }
68
+ return false
69
+ })
70
+
71
+ if (!spread && tokens.length > 1) {
72
+ let previousItem = tokens[0]
73
+ for (let i = 1; i < tokens.length; ++i) {
74
+ const currentItem = tokens[i]
75
+
76
+ // If there exists blank line between list items, then the list is loose.
77
+ if (previousItem.position.end.line + 1 < currentItem.position.start.line) {
78
+ spread = true
79
+ break
80
+ }
81
+
82
+ previousItem = currentItem
83
+ }
84
+ }
85
+
86
+ const children: ListItem[] = tokens.map((listItemToken): ListItem => {
87
+ // Make list tighter if spread is false.
88
+ const nodes: Node[] = api.parseBlockTokens(listItemToken.children)
89
+ const children: Node[] = spread
90
+ ? nodes
91
+ : nodes
92
+ .map(node => (node.type === ParagraphType ? (node as Paragraph).children : node))
93
+ .flat()
94
+
95
+ const listItem: ListItem = api.shouldReservePosition
96
+ ? {
97
+ type: ListItemType,
98
+ position: listItemToken.position,
99
+ status: listItemToken.status,
100
+ children,
101
+ }
102
+ : { type: ListItemType, status: listItemToken.status, children }
103
+ return listItem
104
+ })
105
+
106
+ const node: INode = api.shouldReservePosition
107
+ ? {
108
+ type: ListType,
109
+ position: {
110
+ start: { ...tokens[0].position.start },
111
+ end: { ...tokens[tokens.length - 1].position.end },
112
+ },
113
+ ordered: tokens[0].ordered,
114
+ orderType: tokens[0].orderType,
115
+ start: tokens[0].order,
116
+ marker: tokens[0].marker,
117
+ spread,
118
+ children,
119
+ }
120
+ : {
121
+ type: ListType,
122
+ ordered: tokens[0].ordered,
123
+ orderType: tokens[0].orderType,
124
+ start: tokens[0].order,
125
+ marker: tokens[0].marker,
126
+ spread,
127
+ children,
128
+ }
129
+ return node
130
+ }
@@ -0,0 +1,45 @@
1
+ import type { NodeType } from '@yozora/ast'
2
+ import { ParagraphType } from '@yozora/ast'
3
+ import type {
4
+ IBlockTokenizer,
5
+ IMatchBlockHookCreator,
6
+ IParseBlockHookCreator,
7
+ } from '@yozora/core-tokenizer'
8
+ import { BaseBlockTokenizer, TokenizerPriority } from '@yozora/core-tokenizer'
9
+ import { match } from './match'
10
+ import { parse } from './parse'
11
+ import type { INode, IThis, IToken, ITokenizerProps, T } from './types'
12
+ import { uniqueName } from './types'
13
+
14
+ /**
15
+ * Lexical Analyzer for List.
16
+ *
17
+ * A list is a sequence of one or more list items of the same type.
18
+ * The list items may be separated by any number of blank lines.
19
+ *
20
+ * @see https://github.com/syntax-tree/mdast#list
21
+ * @see https://github.github.com/gfm/#list
22
+ */
23
+ export class ListTokenizer
24
+ extends BaseBlockTokenizer<T, IToken, INode, IThis>
25
+ implements IBlockTokenizer<T, IToken, INode, IThis>
26
+ {
27
+ /* istanbul ignore next */
28
+ constructor(props: ITokenizerProps = {}) {
29
+ super({
30
+ name: props.name ?? uniqueName,
31
+ priority: props.priority ?? TokenizerPriority.CONTAINING_BLOCK,
32
+ })
33
+ this.enableTaskListItem = props.enableTaskListItem ?? false
34
+ this.emptyItemCouldNotInterruptedTypes = props.emptyItemCouldNotInterruptedTypes ?? [
35
+ ParagraphType,
36
+ ]
37
+ }
38
+
39
+ public readonly enableTaskListItem: boolean
40
+ public readonly emptyItemCouldNotInterruptedTypes: ReadonlyArray<NodeType>
41
+
42
+ public override readonly match: IMatchBlockHookCreator<T, IToken, IThis> = match
43
+
44
+ public override readonly parse: IParseBlockHookCreator<T, IToken, INode, IThis> = parse
45
+ }
package/src/types.ts ADDED
@@ -0,0 +1,76 @@
1
+ import type { List, ListType, NodeType, TaskStatus } from '@yozora/ast'
2
+ import type {
3
+ IBaseBlockTokenizerProps,
4
+ IPartialYastBlockToken,
5
+ ITokenizer,
6
+ IYastBlockToken,
7
+ } from '@yozora/core-tokenizer'
8
+
9
+ export type T = ListType
10
+ export type INode = List
11
+ export const uniqueName = '@yozora/tokenizer-list'
12
+
13
+ export interface IToken extends IPartialYastBlockToken<T> {
14
+ /**
15
+ * Is it an ordered list item.
16
+ */
17
+ ordered: boolean
18
+ /**
19
+ * Marker of bullet list-item, or a delimiter of ordered list-item.
20
+ */
21
+ marker: number
22
+ /**
23
+ * Marker type of the list.
24
+ * @see https://developer.mozilla.org/en-US/docs/Web/HTML/Element/ol#attr-type
25
+ */
26
+ orderType?: '1' | 'a' | 'A' | 'i' | 'I'
27
+ /**
28
+ * Serial number of ordered list-item.
29
+ */
30
+ order?: number
31
+ /**
32
+ * Status of a todo task.
33
+ */
34
+ status?: TaskStatus
35
+ /**
36
+ * Indent of a list item.
37
+ */
38
+ indent: number
39
+ /**
40
+ * list-item 起始的空行数量
41
+ * The number of blank lines at the beginning of a list-item
42
+ */
43
+ countOfTopBlankLine: number
44
+ /**
45
+ * Child token nodes.
46
+ */
47
+ children: IYastBlockToken[]
48
+ }
49
+
50
+ export interface IThis extends ITokenizer {
51
+ /**
52
+ * Specify an array of Node types that could not be interrupted
53
+ * by this ITokenizer if the current list-item is empty.
54
+ * @see https://github.github.com/gfm/#example-263
55
+ */
56
+ readonly emptyItemCouldNotInterruptedTypes: ReadonlyArray<NodeType>
57
+
58
+ /**
59
+ * Should enable task list item (extension).
60
+ */
61
+ readonly enableTaskListItem: boolean
62
+ }
63
+
64
+ export interface ITokenizerProps extends Partial<IBaseBlockTokenizerProps> {
65
+ /**
66
+ * Specify an array of Node types that could not be interrupted
67
+ * by this ITokenizer if the current list-item is empty.
68
+ * @see https://github.github.com/gfm/#example-263
69
+ */
70
+ readonly emptyItemCouldNotInterruptedTypes?: NodeType[]
71
+
72
+ /**
73
+ * Should enable task list item (extension).
74
+ */
75
+ readonly enableTaskListItem?: boolean
76
+ }