@yozora/tokenizer-list 1.3.0 → 2.0.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -53,7 +53,7 @@
53
53
 
54
54
  <!-- :end -->
55
55
 
56
- [@yozora/tokenizer-list] produce [List][node-type] type nodes.
56
+ [@yozora/tokenizer-list] produce [List][node-type] (with [ListItem][node-type:list-item] as children) type nodes.
57
57
  See [documentation][docpage] for details.
58
58
 
59
59
  <!-- :begin use tokenizer/usage -->
@@ -84,14 +84,14 @@ so you can use `YozoraParser` / `GfmExParser` / `GfmParser` directly.
84
84
  registered in *YastParser* as a plugin-in before it can be used.
85
85
 
86
86
  ```typescript {4,9}
87
- import { DefaultYastParser } from '@yozora/core-parser'
87
+ import { DefaultParser } from '@yozora/core-parser'
88
88
  import ParagraphTokenizer from '@yozora/tokenizer-paragraph'
89
89
  import TextTokenizer from '@yozora/tokenizer-text'
90
90
  import ListTokenizer from '@yozora/tokenizer-list'
91
91
 
92
- const parser = new DefaultYastParser()
93
- .useBlockFallbackTokenizer(new ParagraphTokenizer())
94
- .useInlineFallbackTokenizer(new TextTokenizer())
92
+ const parser = new DefaultParser()
93
+ .useFallbackTokenizer(new ParagraphTokenizer())
94
+ .useFallbackTokenizer(new TextTokenizer())
95
95
  .useTokenizer(new ListTokenizer())
96
96
 
97
97
  // parse source markdown content
@@ -213,10 +213,12 @@ a. This is an another type of ordered list item
213
213
 
214
214
  ### Options
215
215
 
216
- Name | Type | Required | Default
217
- :----------------:|:-----------:|:---------:|:--------------:
218
- `name` | `string` | `false` | `"@yozora/tokenizer-list"`
219
- `priority` | `number` | `false` | `TokenizerPriority.CONTAINING_BLOCK`
216
+ Name | Type | Required | Default
217
+ :----------------------------------:|:-----------------:|:---------:|:--------------:
218
+ `name` | `string` | `false` | `"@yozora/tokenizer-list"`
219
+ `priority` | `number` | `false` | `TokenizerPriority.CONTAINING_BLOCK`
220
+ `enableTaskListItem` | `boolean` | `false` | `false`
221
+ `emptyItemCouldNotInterruptedTypes` | `NodeType[]` | `false` | `[ParagraphType]`
220
222
 
221
223
  * `name`: The unique name of the tokenizer, used to bind the token it generates,
222
224
  to determine the tokenizer that should be called in each life cycle of the
@@ -227,6 +229,14 @@ Name | Type | Required | Default
227
229
  stage, a high-priority tokenizer can interrupt the matching process of a
228
230
  low-priority tokenizer.
229
231
 
232
+ * `enableTaskListItem`: Whether to enable task list item (extension).
233
+
234
+ * `emptyItemCouldNotInterruptedTypes`: Specify an array of Node types that could not be
235
+ interrupted by this ITokenizer if the current list-item is empty.
236
+
237
+ @see https://github.github.com/gfm/#example-263
238
+
239
+
230
240
  <!-- :end -->
231
241
 
232
242
  ## Related
@@ -236,15 +246,17 @@ Name | Type | Required | Default
236
246
  * [@yozora/parser][]
237
247
  * [@yozora/parser-gfm][]
238
248
  * [@yozora/parser-gfm-ex][]
239
- * [@yozora/tokenizer-list-item][]
240
249
  * [@yozora/react-list][]
241
250
  * [@yozora/react-list-item][]
242
251
  * [@yozora/react-markdown][]
243
252
  * [Live Examples][live-examples]
244
253
  * [List | Yozora AST][node-type]
245
254
  * [Documentation][docpage]
255
+ * [List | Mdast][mdast-homepage]
256
+ * [ListItem | Documentation][mdast-homepage:list-item]
246
257
 
247
258
  [node-type]: http://yozora.guanghechen.com/docs/package/ast#list
259
+ [node-type:list-item]: http://yozora.guanghechen.com/docs/package/ast#listitem
248
260
 
249
261
  <!-- :begin use tokenizer/definitions -->
250
262
 
@@ -252,7 +264,8 @@ Name | Type | Required | Default
252
264
  [docpage]: https://yozora.guanghechen.com/docs/package/tokenizer-list
253
265
  [homepage]: https://github.com/yozorajs/yozora/tree/main/tokenizers/list#readme
254
266
  [gfm-spec]: https://github.github.com/gfm
255
- [mdast-homepage]: https://github.com/syntax-tree/mdast
267
+ [mdast-homepage]: https://github.com/syntax-tree/mdast#list
268
+ [mdast-homepage:list-item]: https://github.com/syntax-tree/mdast#listitem
256
269
 
257
270
  [@yozora/ast]: https://github.com/yozorajs/yozora/tree/main/packages/ast#readme
258
271
  [@yozora/ast-util]: https://github.com/yozorajs/yozora/tree/main/packages/ast-util#readme
@@ -291,7 +304,6 @@ Name | Type | Required | Default
291
304
  [@yozora/tokenizer-link]: https://github.com/yozorajs/yozora/tree/main/tokenizers/link#readme
292
305
  [@yozora/tokenizer-link-reference]: https://github.com/yozorajs/yozora/tree/main/tokenizers/link-reference#readme
293
306
  [@yozora/tokenizer-list]: https://github.com/yozorajs/yozora/tree/main/tokenizers/list#readme
294
- [@yozora/tokenizer-list-item]: https://github.com/yozorajs/yozora/tree/main/tokenizers/list-item#readme
295
307
  [@yozora/tokenizer-math]: https://github.com/yozorajs/yozora/tree/main/tokenizers/math#readme
296
308
  [@yozora/tokenizer-paragraph]: https://github.com/yozorajs/yozora/tree/main/tokenizers/paragraph#readme
297
309
  [@yozora/tokenizer-setext-heading]: https://github.com/yozorajs/yozora/tree/main/tokenizers/setext-heading#readme
@@ -351,7 +363,6 @@ Name | Type | Required | Default
351
363
  [doc-@yozora/tokenizer-definition]: https://yozora.guanghechen.com/docs/package/tokenizer-definition
352
364
  [doc-@yozora/tokenizer-link-reference]: https://yozora.guanghechen.com/docs/package/tokenizer-link-reference
353
365
  [doc-@yozora/tokenizer-list]: https://yozora.guanghechen.com/docs/package/tokenizer-list
354
- [doc-@yozora/tokenizer-list-item]: https://yozora.guanghechen.com/docs/package/tokenizer-list-item
355
366
  [doc-@yozora/tokenizer-math]: https://yozora.guanghechen.com/docs/package/tokenizer-math
356
367
  [doc-@yozora/tokenizer-paragraph]: https://yozora.guanghechen.com/docs/package/tokenizer-paragraph
357
368
  [doc-@yozora/tokenizer-setext-heading]: https://yozora.guanghechen.com/docs/package/tokenizer-setext-heading
package/lib/cjs/index.js CHANGED
@@ -4,113 +4,297 @@ Object.defineProperty(exports, '__esModule', { value: true });
4
4
 
5
5
  var ast = require('@yozora/ast');
6
6
  var coreTokenizer = require('@yozora/core-tokenizer');
7
+ var character = require('@yozora/character');
7
8
 
8
- const uniqueName = '@yozora/tokenizer-list';
9
-
10
- class ListTokenizer extends coreTokenizer.BaseBlockTokenizer {
11
- constructor(props = {}) {
12
- var _a, _b;
13
- super({
14
- name: (_a = props.name) !== null && _a !== void 0 ? _a : uniqueName,
15
- priority: (_b = props.priority) !== null && _b !== void 0 ? _b : coreTokenizer.TokenizerPriority.CONTAINING_BLOCK,
16
- });
17
- this.isContainingBlock = true;
18
- }
19
- transformMatch(tokens, api) {
20
- const results = [];
21
- let listItems = [];
22
- const resolveList = () => {
23
- if (listItems.length <= 0)
24
- return;
25
- let spread = listItems.some((item) => {
26
- if (item.children == null || item.children.length <= 1)
27
- return false;
28
- let previousPosition = item.children[0].position;
29
- for (let j = 1; j < item.children.length; ++j) {
30
- const currentPosition = item.children[j].position;
31
- if (previousPosition.end.line + 1 < currentPosition.start.line) {
32
- return true;
33
- }
34
- previousPosition = currentPosition;
35
- }
36
- return false;
37
- });
38
- if (!spread && listItems.length > 1) {
39
- let previousItem = listItems[0];
40
- for (let i = 1; i < listItems.length; ++i) {
41
- const currentItem = listItems[i];
42
- if (previousItem.position.end.line + 1 <
43
- currentItem.position.start.line) {
44
- spread = true;
9
+ const match = function () {
10
+ const { emptyItemCouldNotInterruptedTypes, enableTaskListItem } = this;
11
+ return {
12
+ isContainingBlock: true,
13
+ eatOpener,
14
+ eatAndInterruptPreviousSibling,
15
+ eatContinuationText,
16
+ };
17
+ function eatOpener(line) {
18
+ if (line.countOfPrecedeSpaces >= 4)
19
+ return null;
20
+ const { nodePoints, startIndex, endIndex, firstNonWhitespaceIndex } = line;
21
+ if (firstNonWhitespaceIndex >= endIndex)
22
+ return null;
23
+ let ordered = false;
24
+ let marker = null;
25
+ let orderType;
26
+ let order;
27
+ let i = firstNonWhitespaceIndex;
28
+ let c = nodePoints[i].codePoint;
29
+ if (i + 1 < endIndex) {
30
+ const c0 = c;
31
+ if (character.isAsciiDigitCharacter(c0)) {
32
+ orderType = '1';
33
+ let v = c0 - character.AsciiCodePoint.DIGIT0;
34
+ for (i += 1; i < endIndex; ++i) {
35
+ c = nodePoints[i].codePoint;
36
+ if (!character.isAsciiDigitCharacter(c))
45
37
  break;
46
- }
47
- previousItem = currentItem;
38
+ v = v * 10 + c - character.AsciiCodePoint.DIGIT0;
48
39
  }
40
+ order = v;
41
+ orderType = '1';
49
42
  }
50
- const list = {
51
- _tokenizer: this.name,
52
- nodeType: ast.ListType,
53
- ordered: listItems[0].ordered,
54
- orderType: listItems[0].orderType,
55
- start: listItems[0].order,
56
- marker: listItems[0].marker,
57
- spread,
58
- position: {
59
- start: Object.assign({}, listItems[0].position.start),
60
- end: Object.assign({}, listItems[listItems.length - 1].position.end),
61
- },
62
- children: [...listItems],
63
- };
64
- results.push(list);
65
- if (list.spread)
66
- return;
67
- for (const listItem of list.children) {
68
- if (listItem.children == null || listItem.children.length <= 0)
69
- continue;
70
- listItem.children = listItem.children.map(child => {
71
- const lines = api.extractPhrasingLines(child);
72
- if (lines == null)
73
- return child;
74
- const token = api.buildPhrasingContentToken(lines);
75
- return token !== null && token !== void 0 ? token : child;
76
- });
43
+ else if (character.isAsciiLowerLetter(c0)) {
44
+ i += 1;
45
+ c = nodePoints[i].codePoint;
46
+ order = c0 - character.AsciiCodePoint.LOWERCASE_A + 1;
47
+ orderType = 'a';
48
+ }
49
+ else if (character.isAsciiUpperLetter(c0)) {
50
+ i += 1;
51
+ c = nodePoints[i].codePoint;
52
+ order = c0 - character.AsciiCodePoint.UPPERCASE_A + 1;
53
+ orderType = 'A';
54
+ }
55
+ if (i > firstNonWhitespaceIndex &&
56
+ i - firstNonWhitespaceIndex <= 9 &&
57
+ (c === character.AsciiCodePoint.DOT || c === character.AsciiCodePoint.CLOSE_PARENTHESIS)) {
58
+ i += 1;
59
+ ordered = true;
60
+ marker = c;
77
61
  }
62
+ }
63
+ if (!ordered) {
64
+ if (c === character.AsciiCodePoint.PLUS_SIGN ||
65
+ c === character.AsciiCodePoint.MINUS_SIGN ||
66
+ c === character.AsciiCodePoint.ASTERISK) {
67
+ i += 1;
68
+ marker = c;
69
+ }
70
+ }
71
+ if (marker == null)
72
+ return null;
73
+ let countOfSpaces = 0, nextIndex = i;
74
+ if (nextIndex < endIndex) {
75
+ c = nodePoints[nextIndex].codePoint;
76
+ if (c === character.VirtualCodePoint.SPACE)
77
+ nextIndex += 1;
78
+ }
79
+ for (; nextIndex < endIndex; ++nextIndex) {
80
+ c = nodePoints[nextIndex].codePoint;
81
+ if (!character.isSpaceCharacter(c))
82
+ break;
83
+ countOfSpaces += 1;
84
+ }
85
+ if (countOfSpaces > 4) {
86
+ nextIndex -= countOfSpaces - 1;
87
+ countOfSpaces = 1;
88
+ }
89
+ if (countOfSpaces === 0 && nextIndex < endIndex && c !== character.VirtualCodePoint.LINE_END)
90
+ return null;
91
+ const countOfTopBlankLine = c === character.VirtualCodePoint.LINE_END ? 1 : -1;
92
+ if (c === character.VirtualCodePoint.LINE_END) {
93
+ nextIndex -= countOfSpaces - 1;
94
+ countOfSpaces = 1;
95
+ }
96
+ const indent = i - startIndex + countOfSpaces;
97
+ let status = null;
98
+ if (enableTaskListItem) {
99
+ ({ status, nextIndex } = eatTaskStatus(nodePoints, nextIndex, endIndex));
100
+ }
101
+ const token = {
102
+ nodeType: ast.ListType,
103
+ position: {
104
+ start: coreTokenizer.calcStartYastNodePoint(nodePoints, startIndex),
105
+ end: coreTokenizer.calcEndYastNodePoint(nodePoints, nextIndex - 1),
106
+ },
107
+ ordered,
108
+ marker,
109
+ orderType: ordered ? orderType : undefined,
110
+ order: ordered ? order : undefined,
111
+ indent,
112
+ countOfTopBlankLine,
113
+ children: [],
78
114
  };
79
- for (let i = 0; i < tokens.length; ++i) {
80
- const originalToken = tokens[i];
81
- if (originalToken.nodeType !== ast.ListItemType) {
82
- resolveList();
83
- listItems = [];
84
- results.push(originalToken);
85
- continue;
115
+ if (status != null)
116
+ token.status = status;
117
+ return { token, nextIndex };
118
+ }
119
+ function eatAndInterruptPreviousSibling(line, prevSiblingToken) {
120
+ const result = eatOpener(line);
121
+ if (result == null)
122
+ return null;
123
+ const { token, nextIndex } = result;
124
+ if (emptyItemCouldNotInterruptedTypes.includes(prevSiblingToken.nodeType)) {
125
+ if (token.indent === line.endIndex - line.startIndex) {
126
+ return null;
86
127
  }
87
- if (listItems.length <= 0 ||
88
- listItems[0].ordered !== originalToken.ordered ||
89
- listItems[0].orderType !== originalToken.orderType ||
90
- listItems[0].marker !== originalToken.marker) {
91
- resolveList();
92
- listItems = [originalToken];
93
- continue;
128
+ if (token.ordered && token.order !== 1)
129
+ return null;
130
+ }
131
+ return { token, nextIndex, remainingSibling: prevSiblingToken };
132
+ }
133
+ function eatContinuationText(line, token) {
134
+ const { startIndex, endIndex, firstNonWhitespaceIndex, countOfPrecedeSpaces: indent } = line;
135
+ if (firstNonWhitespaceIndex < endIndex && indent < token.indent) {
136
+ return { status: 'notMatched' };
137
+ }
138
+ if (firstNonWhitespaceIndex >= endIndex) {
139
+ if (token.countOfTopBlankLine >= 0) {
140
+ token.countOfTopBlankLine += 1;
141
+ if (token.countOfTopBlankLine > 1) {
142
+ return { status: 'notMatched' };
143
+ }
94
144
  }
95
- listItems.push(originalToken);
96
145
  }
97
- resolveList();
98
- return results;
146
+ else {
147
+ token.countOfTopBlankLine = -1;
148
+ }
149
+ const nextIndex = Math.min(startIndex + token.indent, endIndex - 1);
150
+ return { status: 'opening', nextIndex };
151
+ }
152
+ };
153
+ function eatTaskStatus(nodePoints, startIndex, endIndex) {
154
+ let i = startIndex;
155
+ for (; i < endIndex; ++i) {
156
+ const c = nodePoints[i].codePoint;
157
+ if (!character.isSpaceCharacter(c))
158
+ break;
159
+ }
160
+ if (i + 3 >= endIndex ||
161
+ nodePoints[i].codePoint !== character.AsciiCodePoint.OPEN_BRACKET ||
162
+ nodePoints[i + 2].codePoint !== character.AsciiCodePoint.CLOSE_BRACKET ||
163
+ !character.isWhitespaceCharacter(nodePoints[i + 3].codePoint))
164
+ return { status: null, nextIndex: startIndex };
165
+ let status;
166
+ const c = nodePoints[i + 1].codePoint;
167
+ switch (c) {
168
+ case character.AsciiCodePoint.SPACE:
169
+ status = ast.TaskStatus.TODO;
170
+ break;
171
+ case character.AsciiCodePoint.MINUS_SIGN:
172
+ status = ast.TaskStatus.DOING;
173
+ break;
174
+ case character.AsciiCodePoint.LOWERCASE_X:
175
+ case character.AsciiCodePoint.UPPERCASE_X:
176
+ status = ast.TaskStatus.DONE;
177
+ break;
178
+ default:
179
+ return { status: null, nextIndex: startIndex };
180
+ }
181
+ return { status, nextIndex: i + 4 };
182
+ }
183
+
184
+ const parse = function (api) {
185
+ return {
186
+ parse: tokens => {
187
+ const results = [];
188
+ let listItemTokens = [];
189
+ for (let i = 0; i < tokens.length; ++i) {
190
+ const originalToken = tokens[i];
191
+ if (listItemTokens.length <= 0 ||
192
+ listItemTokens[0].ordered !== originalToken.ordered ||
193
+ listItemTokens[0].orderType !== originalToken.orderType ||
194
+ listItemTokens[0].marker !== originalToken.marker) {
195
+ const node = resolveList(listItemTokens, api);
196
+ if (node)
197
+ results.push(node);
198
+ listItemTokens = [originalToken];
199
+ continue;
200
+ }
201
+ listItemTokens.push(originalToken);
202
+ }
203
+ const node = resolveList(listItemTokens, api);
204
+ if (node)
205
+ results.push(node);
206
+ return results;
207
+ },
208
+ };
209
+ };
210
+ const resolveList = (tokens, api) => {
211
+ if (tokens.length <= 0)
212
+ return null;
213
+ let spread = tokens.some((item) => {
214
+ if (item.children == null || item.children.length <= 1)
215
+ return false;
216
+ let previousPosition = item.children[0].position;
217
+ for (let j = 1; j < item.children.length; ++j) {
218
+ const currentPosition = item.children[j].position;
219
+ if (previousPosition.end.line + 1 < currentPosition.start.line) {
220
+ return true;
221
+ }
222
+ previousPosition = currentPosition;
223
+ }
224
+ return false;
225
+ });
226
+ if (!spread && tokens.length > 1) {
227
+ let previousItem = tokens[0];
228
+ for (let i = 1; i < tokens.length; ++i) {
229
+ const currentItem = tokens[i];
230
+ if (previousItem.position.end.line + 1 < currentItem.position.start.line) {
231
+ spread = true;
232
+ break;
233
+ }
234
+ previousItem = currentItem;
235
+ }
99
236
  }
100
- parseBlock(token, children) {
101
- const node = {
237
+ const children = tokens.map((listItemToken) => {
238
+ const nodes = api.parseBlockTokens(listItemToken.children);
239
+ const children = spread
240
+ ? nodes
241
+ : nodes
242
+ .map(node => (node.type === ast.ParagraphType ? node.children : node))
243
+ .flat();
244
+ const listItem = api.shouldReservePosition
245
+ ? {
246
+ type: ast.ListItemType,
247
+ position: listItemToken.position,
248
+ status: listItemToken.status,
249
+ children,
250
+ }
251
+ : { type: ast.ListItemType, status: listItemToken.status, children };
252
+ return listItem;
253
+ });
254
+ const node = api.shouldReservePosition
255
+ ? {
256
+ type: ast.ListType,
257
+ position: {
258
+ start: Object.assign({}, tokens[0].position.start),
259
+ end: Object.assign({}, tokens[tokens.length - 1].position.end),
260
+ },
261
+ ordered: tokens[0].ordered,
262
+ orderType: tokens[0].orderType,
263
+ start: tokens[0].order,
264
+ marker: tokens[0].marker,
265
+ spread,
266
+ children,
267
+ }
268
+ : {
102
269
  type: ast.ListType,
103
- ordered: token.ordered,
104
- orderType: token.orderType,
105
- start: token.start,
106
- marker: token.marker,
107
- spread: token.spread,
108
- children: children,
270
+ ordered: tokens[0].ordered,
271
+ orderType: tokens[0].orderType,
272
+ start: tokens[0].order,
273
+ marker: tokens[0].marker,
274
+ spread,
275
+ children,
109
276
  };
110
- return node;
277
+ return node;
278
+ };
279
+
280
+ const uniqueName = '@yozora/tokenizer-list';
281
+
282
+ class ListTokenizer extends coreTokenizer.BaseBlockTokenizer {
283
+ constructor(props = {}) {
284
+ var _a, _b, _c, _d;
285
+ super({
286
+ name: (_a = props.name) !== null && _a !== void 0 ? _a : uniqueName,
287
+ priority: (_b = props.priority) !== null && _b !== void 0 ? _b : coreTokenizer.TokenizerPriority.CONTAINING_BLOCK,
288
+ });
289
+ this.match = match;
290
+ this.parse = parse;
291
+ this.enableTaskListItem = (_c = props.enableTaskListItem) !== null && _c !== void 0 ? _c : false;
292
+ this.emptyItemCouldNotInterruptedTypes = (_d = props.emptyItemCouldNotInterruptedTypes) !== null && _d !== void 0 ? _d : [
293
+ ast.ParagraphType,
294
+ ];
111
295
  }
112
296
  }
113
297
 
114
298
  exports.ListTokenizer = ListTokenizer;
115
299
  exports.ListTokenizerName = uniqueName;
116
- exports['default'] = ListTokenizer;
300
+ exports["default"] = ListTokenizer;
package/lib/esm/index.js CHANGED
@@ -1,109 +1,293 @@
1
- import { ListItemType, ListType } from '@yozora/ast';
2
- import { BaseBlockTokenizer, TokenizerPriority } from '@yozora/core-tokenizer';
1
+ import { ListType, TaskStatus, ParagraphType, ListItemType } from '@yozora/ast';
2
+ import { calcStartYastNodePoint, calcEndYastNodePoint, BaseBlockTokenizer, TokenizerPriority } from '@yozora/core-tokenizer';
3
+ import { isAsciiDigitCharacter, AsciiCodePoint, isAsciiLowerLetter, isAsciiUpperLetter, VirtualCodePoint, isSpaceCharacter, isWhitespaceCharacter } from '@yozora/character';
3
4
 
4
- const uniqueName = '@yozora/tokenizer-list';
5
-
6
- class ListTokenizer extends BaseBlockTokenizer {
7
- constructor(props = {}) {
8
- var _a, _b;
9
- super({
10
- name: (_a = props.name) !== null && _a !== void 0 ? _a : uniqueName,
11
- priority: (_b = props.priority) !== null && _b !== void 0 ? _b : TokenizerPriority.CONTAINING_BLOCK,
12
- });
13
- this.isContainingBlock = true;
14
- }
15
- transformMatch(tokens, api) {
16
- const results = [];
17
- let listItems = [];
18
- const resolveList = () => {
19
- if (listItems.length <= 0)
20
- return;
21
- let spread = listItems.some((item) => {
22
- if (item.children == null || item.children.length <= 1)
23
- return false;
24
- let previousPosition = item.children[0].position;
25
- for (let j = 1; j < item.children.length; ++j) {
26
- const currentPosition = item.children[j].position;
27
- if (previousPosition.end.line + 1 < currentPosition.start.line) {
28
- return true;
29
- }
30
- previousPosition = currentPosition;
31
- }
32
- return false;
33
- });
34
- if (!spread && listItems.length > 1) {
35
- let previousItem = listItems[0];
36
- for (let i = 1; i < listItems.length; ++i) {
37
- const currentItem = listItems[i];
38
- if (previousItem.position.end.line + 1 <
39
- currentItem.position.start.line) {
40
- spread = true;
5
+ const match = function () {
6
+ const { emptyItemCouldNotInterruptedTypes, enableTaskListItem } = this;
7
+ return {
8
+ isContainingBlock: true,
9
+ eatOpener,
10
+ eatAndInterruptPreviousSibling,
11
+ eatContinuationText,
12
+ };
13
+ function eatOpener(line) {
14
+ if (line.countOfPrecedeSpaces >= 4)
15
+ return null;
16
+ const { nodePoints, startIndex, endIndex, firstNonWhitespaceIndex } = line;
17
+ if (firstNonWhitespaceIndex >= endIndex)
18
+ return null;
19
+ let ordered = false;
20
+ let marker = null;
21
+ let orderType;
22
+ let order;
23
+ let i = firstNonWhitespaceIndex;
24
+ let c = nodePoints[i].codePoint;
25
+ if (i + 1 < endIndex) {
26
+ const c0 = c;
27
+ if (isAsciiDigitCharacter(c0)) {
28
+ orderType = '1';
29
+ let v = c0 - AsciiCodePoint.DIGIT0;
30
+ for (i += 1; i < endIndex; ++i) {
31
+ c = nodePoints[i].codePoint;
32
+ if (!isAsciiDigitCharacter(c))
41
33
  break;
42
- }
43
- previousItem = currentItem;
34
+ v = v * 10 + c - AsciiCodePoint.DIGIT0;
44
35
  }
36
+ order = v;
37
+ orderType = '1';
45
38
  }
46
- const list = {
47
- _tokenizer: this.name,
48
- nodeType: ListType,
49
- ordered: listItems[0].ordered,
50
- orderType: listItems[0].orderType,
51
- start: listItems[0].order,
52
- marker: listItems[0].marker,
53
- spread,
54
- position: {
55
- start: Object.assign({}, listItems[0].position.start),
56
- end: Object.assign({}, listItems[listItems.length - 1].position.end),
57
- },
58
- children: [...listItems],
59
- };
60
- results.push(list);
61
- if (list.spread)
62
- return;
63
- for (const listItem of list.children) {
64
- if (listItem.children == null || listItem.children.length <= 0)
65
- continue;
66
- listItem.children = listItem.children.map(child => {
67
- const lines = api.extractPhrasingLines(child);
68
- if (lines == null)
69
- return child;
70
- const token = api.buildPhrasingContentToken(lines);
71
- return token !== null && token !== void 0 ? token : child;
72
- });
39
+ else if (isAsciiLowerLetter(c0)) {
40
+ i += 1;
41
+ c = nodePoints[i].codePoint;
42
+ order = c0 - AsciiCodePoint.LOWERCASE_A + 1;
43
+ orderType = 'a';
44
+ }
45
+ else if (isAsciiUpperLetter(c0)) {
46
+ i += 1;
47
+ c = nodePoints[i].codePoint;
48
+ order = c0 - AsciiCodePoint.UPPERCASE_A + 1;
49
+ orderType = 'A';
50
+ }
51
+ if (i > firstNonWhitespaceIndex &&
52
+ i - firstNonWhitespaceIndex <= 9 &&
53
+ (c === AsciiCodePoint.DOT || c === AsciiCodePoint.CLOSE_PARENTHESIS)) {
54
+ i += 1;
55
+ ordered = true;
56
+ marker = c;
73
57
  }
58
+ }
59
+ if (!ordered) {
60
+ if (c === AsciiCodePoint.PLUS_SIGN ||
61
+ c === AsciiCodePoint.MINUS_SIGN ||
62
+ c === AsciiCodePoint.ASTERISK) {
63
+ i += 1;
64
+ marker = c;
65
+ }
66
+ }
67
+ if (marker == null)
68
+ return null;
69
+ let countOfSpaces = 0, nextIndex = i;
70
+ if (nextIndex < endIndex) {
71
+ c = nodePoints[nextIndex].codePoint;
72
+ if (c === VirtualCodePoint.SPACE)
73
+ nextIndex += 1;
74
+ }
75
+ for (; nextIndex < endIndex; ++nextIndex) {
76
+ c = nodePoints[nextIndex].codePoint;
77
+ if (!isSpaceCharacter(c))
78
+ break;
79
+ countOfSpaces += 1;
80
+ }
81
+ if (countOfSpaces > 4) {
82
+ nextIndex -= countOfSpaces - 1;
83
+ countOfSpaces = 1;
84
+ }
85
+ if (countOfSpaces === 0 && nextIndex < endIndex && c !== VirtualCodePoint.LINE_END)
86
+ return null;
87
+ const countOfTopBlankLine = c === VirtualCodePoint.LINE_END ? 1 : -1;
88
+ if (c === VirtualCodePoint.LINE_END) {
89
+ nextIndex -= countOfSpaces - 1;
90
+ countOfSpaces = 1;
91
+ }
92
+ const indent = i - startIndex + countOfSpaces;
93
+ let status = null;
94
+ if (enableTaskListItem) {
95
+ ({ status, nextIndex } = eatTaskStatus(nodePoints, nextIndex, endIndex));
96
+ }
97
+ const token = {
98
+ nodeType: ListType,
99
+ position: {
100
+ start: calcStartYastNodePoint(nodePoints, startIndex),
101
+ end: calcEndYastNodePoint(nodePoints, nextIndex - 1),
102
+ },
103
+ ordered,
104
+ marker,
105
+ orderType: ordered ? orderType : undefined,
106
+ order: ordered ? order : undefined,
107
+ indent,
108
+ countOfTopBlankLine,
109
+ children: [],
74
110
  };
75
- for (let i = 0; i < tokens.length; ++i) {
76
- const originalToken = tokens[i];
77
- if (originalToken.nodeType !== ListItemType) {
78
- resolveList();
79
- listItems = [];
80
- results.push(originalToken);
81
- continue;
111
+ if (status != null)
112
+ token.status = status;
113
+ return { token, nextIndex };
114
+ }
115
+ function eatAndInterruptPreviousSibling(line, prevSiblingToken) {
116
+ const result = eatOpener(line);
117
+ if (result == null)
118
+ return null;
119
+ const { token, nextIndex } = result;
120
+ if (emptyItemCouldNotInterruptedTypes.includes(prevSiblingToken.nodeType)) {
121
+ if (token.indent === line.endIndex - line.startIndex) {
122
+ return null;
82
123
  }
83
- if (listItems.length <= 0 ||
84
- listItems[0].ordered !== originalToken.ordered ||
85
- listItems[0].orderType !== originalToken.orderType ||
86
- listItems[0].marker !== originalToken.marker) {
87
- resolveList();
88
- listItems = [originalToken];
89
- continue;
124
+ if (token.ordered && token.order !== 1)
125
+ return null;
126
+ }
127
+ return { token, nextIndex, remainingSibling: prevSiblingToken };
128
+ }
129
+ function eatContinuationText(line, token) {
130
+ const { startIndex, endIndex, firstNonWhitespaceIndex, countOfPrecedeSpaces: indent } = line;
131
+ if (firstNonWhitespaceIndex < endIndex && indent < token.indent) {
132
+ return { status: 'notMatched' };
133
+ }
134
+ if (firstNonWhitespaceIndex >= endIndex) {
135
+ if (token.countOfTopBlankLine >= 0) {
136
+ token.countOfTopBlankLine += 1;
137
+ if (token.countOfTopBlankLine > 1) {
138
+ return { status: 'notMatched' };
139
+ }
90
140
  }
91
- listItems.push(originalToken);
92
141
  }
93
- resolveList();
94
- return results;
142
+ else {
143
+ token.countOfTopBlankLine = -1;
144
+ }
145
+ const nextIndex = Math.min(startIndex + token.indent, endIndex - 1);
146
+ return { status: 'opening', nextIndex };
147
+ }
148
+ };
149
+ function eatTaskStatus(nodePoints, startIndex, endIndex) {
150
+ let i = startIndex;
151
+ for (; i < endIndex; ++i) {
152
+ const c = nodePoints[i].codePoint;
153
+ if (!isSpaceCharacter(c))
154
+ break;
155
+ }
156
+ if (i + 3 >= endIndex ||
157
+ nodePoints[i].codePoint !== AsciiCodePoint.OPEN_BRACKET ||
158
+ nodePoints[i + 2].codePoint !== AsciiCodePoint.CLOSE_BRACKET ||
159
+ !isWhitespaceCharacter(nodePoints[i + 3].codePoint))
160
+ return { status: null, nextIndex: startIndex };
161
+ let status;
162
+ const c = nodePoints[i + 1].codePoint;
163
+ switch (c) {
164
+ case AsciiCodePoint.SPACE:
165
+ status = TaskStatus.TODO;
166
+ break;
167
+ case AsciiCodePoint.MINUS_SIGN:
168
+ status = TaskStatus.DOING;
169
+ break;
170
+ case AsciiCodePoint.LOWERCASE_X:
171
+ case AsciiCodePoint.UPPERCASE_X:
172
+ status = TaskStatus.DONE;
173
+ break;
174
+ default:
175
+ return { status: null, nextIndex: startIndex };
176
+ }
177
+ return { status, nextIndex: i + 4 };
178
+ }
179
+
180
+ const parse = function (api) {
181
+ return {
182
+ parse: tokens => {
183
+ const results = [];
184
+ let listItemTokens = [];
185
+ for (let i = 0; i < tokens.length; ++i) {
186
+ const originalToken = tokens[i];
187
+ if (listItemTokens.length <= 0 ||
188
+ listItemTokens[0].ordered !== originalToken.ordered ||
189
+ listItemTokens[0].orderType !== originalToken.orderType ||
190
+ listItemTokens[0].marker !== originalToken.marker) {
191
+ const node = resolveList(listItemTokens, api);
192
+ if (node)
193
+ results.push(node);
194
+ listItemTokens = [originalToken];
195
+ continue;
196
+ }
197
+ listItemTokens.push(originalToken);
198
+ }
199
+ const node = resolveList(listItemTokens, api);
200
+ if (node)
201
+ results.push(node);
202
+ return results;
203
+ },
204
+ };
205
+ };
206
+ const resolveList = (tokens, api) => {
207
+ if (tokens.length <= 0)
208
+ return null;
209
+ let spread = tokens.some((item) => {
210
+ if (item.children == null || item.children.length <= 1)
211
+ return false;
212
+ let previousPosition = item.children[0].position;
213
+ for (let j = 1; j < item.children.length; ++j) {
214
+ const currentPosition = item.children[j].position;
215
+ if (previousPosition.end.line + 1 < currentPosition.start.line) {
216
+ return true;
217
+ }
218
+ previousPosition = currentPosition;
219
+ }
220
+ return false;
221
+ });
222
+ if (!spread && tokens.length > 1) {
223
+ let previousItem = tokens[0];
224
+ for (let i = 1; i < tokens.length; ++i) {
225
+ const currentItem = tokens[i];
226
+ if (previousItem.position.end.line + 1 < currentItem.position.start.line) {
227
+ spread = true;
228
+ break;
229
+ }
230
+ previousItem = currentItem;
231
+ }
95
232
  }
96
- parseBlock(token, children) {
97
- const node = {
233
+ const children = tokens.map((listItemToken) => {
234
+ const nodes = api.parseBlockTokens(listItemToken.children);
235
+ const children = spread
236
+ ? nodes
237
+ : nodes
238
+ .map(node => (node.type === ParagraphType ? node.children : node))
239
+ .flat();
240
+ const listItem = api.shouldReservePosition
241
+ ? {
242
+ type: ListItemType,
243
+ position: listItemToken.position,
244
+ status: listItemToken.status,
245
+ children,
246
+ }
247
+ : { type: ListItemType, status: listItemToken.status, children };
248
+ return listItem;
249
+ });
250
+ const node = api.shouldReservePosition
251
+ ? {
252
+ type: ListType,
253
+ position: {
254
+ start: Object.assign({}, tokens[0].position.start),
255
+ end: Object.assign({}, tokens[tokens.length - 1].position.end),
256
+ },
257
+ ordered: tokens[0].ordered,
258
+ orderType: tokens[0].orderType,
259
+ start: tokens[0].order,
260
+ marker: tokens[0].marker,
261
+ spread,
262
+ children,
263
+ }
264
+ : {
98
265
  type: ListType,
99
- ordered: token.ordered,
100
- orderType: token.orderType,
101
- start: token.start,
102
- marker: token.marker,
103
- spread: token.spread,
104
- children: children,
266
+ ordered: tokens[0].ordered,
267
+ orderType: tokens[0].orderType,
268
+ start: tokens[0].order,
269
+ marker: tokens[0].marker,
270
+ spread,
271
+ children,
105
272
  };
106
- return node;
273
+ return node;
274
+ };
275
+
276
+ const uniqueName = '@yozora/tokenizer-list';
277
+
278
+ class ListTokenizer extends BaseBlockTokenizer {
279
+ constructor(props = {}) {
280
+ var _a, _b, _c, _d;
281
+ super({
282
+ name: (_a = props.name) !== null && _a !== void 0 ? _a : uniqueName,
283
+ priority: (_b = props.priority) !== null && _b !== void 0 ? _b : TokenizerPriority.CONTAINING_BLOCK,
284
+ });
285
+ this.match = match;
286
+ this.parse = parse;
287
+ this.enableTaskListItem = (_c = props.enableTaskListItem) !== null && _c !== void 0 ? _c : false;
288
+ this.emptyItemCouldNotInterruptedTypes = (_d = props.emptyItemCouldNotInterruptedTypes) !== null && _d !== void 0 ? _d : [
289
+ ParagraphType,
290
+ ];
107
291
  }
108
292
  }
109
293
 
@@ -1,5 +1,3 @@
1
- import { ListTokenizer } from './tokenizer';
2
- export { ListTokenizer } from './tokenizer';
1
+ export { ListTokenizer, ListTokenizer as default } from './tokenizer';
3
2
  export { uniqueName as ListTokenizerName } from './types';
4
- export type { Token as ListToken, TokenizerProps as ListTokenizerProps, } from './types';
5
- export default ListTokenizer;
3
+ export type { IToken as IListToken, ITokenizerProps as IListTokenizerProps } from './types';
@@ -0,0 +1,25 @@
1
+ import type { IMatchBlockHookCreator } from '@yozora/core-tokenizer';
2
+ import type { IThis, IToken, T } from './types';
3
+ /**
4
+ * The following rules define list items:
5
+ * - Basic case. If a sequence of lines Ls constitute a sequence of blocks Bs
6
+ * starting with a non-whitespace character, and M is a list marker of width
7
+ * W followed by 1 ≤ N ≤ 4 spaces, then the result of prepending M and the
8
+ * following spaces to the first line of Ls, and indenting subsequent lines
9
+ * of Ls by W + N spaces, is a list item with Bs as its contents. The type
10
+ * of the list item (bullet or ordered) is determined by the type of its
11
+ * list marker. If the list item is ordered, then it is also assigned a
12
+ * start number, based on the ordered list marker.
13
+ *
14
+ * Exceptions:
15
+ * - When the first list item in a list interrupts a paragraph—that is,
16
+ * when it starts on a line that would otherwise count as paragraph
17
+ * continuation text—then
18
+ * (a) the lines Ls must not begin with a blank line, and
19
+ * (b) if the list item is ordered, the start number must be 1.
20
+ * - If any line is a thematic break then that line is not a list item.
21
+ *
22
+ * @see https://github.com/syntax-tree/mdast#listitem
23
+ * @see https://github.github.com/gfm/#list-items
24
+ */
25
+ export declare const match: IMatchBlockHookCreator<T, IToken, IThis>;
@@ -0,0 +1,3 @@
1
+ import type { IParseBlockHookCreator } from '@yozora/core-tokenizer';
2
+ import type { INode, IThis, IToken, T } from './types';
3
+ export declare const parse: IParseBlockHookCreator<T, IToken, INode, IThis>;
@@ -1,12 +1,7 @@
1
- import type { YastNode } from '@yozora/ast';
2
- import type { PostMatchBlockPhaseApi, ResultOfParse, Tokenizer, TokenizerParseBlockHook, TokenizerPostMatchBlockHook, YastBlockToken } from '@yozora/core-tokenizer';
1
+ import type { NodeType } from '@yozora/ast';
2
+ import type { IBlockTokenizer, IMatchBlockHookCreator, IParseBlockHookCreator } from '@yozora/core-tokenizer';
3
3
  import { BaseBlockTokenizer } from '@yozora/core-tokenizer';
4
- import type { Node, T, Token, TokenizerProps } from './types';
5
- /**
6
- * Params for constructing ListTokenizer
7
- */
8
- export interface ListTokenizerProps {
9
- }
4
+ import type { INode, IThis, IToken, ITokenizerProps, T } from './types';
10
5
  /**
11
6
  * Lexical Analyzer for List.
12
7
  *
@@ -16,17 +11,10 @@ export interface ListTokenizerProps {
16
11
  * @see https://github.com/syntax-tree/mdast#list
17
12
  * @see https://github.github.com/gfm/#list
18
13
  */
19
- export declare class ListTokenizer extends BaseBlockTokenizer implements Tokenizer, TokenizerPostMatchBlockHook, TokenizerParseBlockHook<T, Token, Node> {
20
- readonly isContainingBlock: boolean;
21
- constructor(props?: TokenizerProps);
22
- /**
23
- * @override
24
- * @see TokenizerPostMatchBlockHook
25
- */
26
- transformMatch(tokens: ReadonlyArray<YastBlockToken>, api: PostMatchBlockPhaseApi): YastBlockToken[];
27
- /**
28
- * @override
29
- * @see TokenizerParseBlockHook
30
- */
31
- parseBlock(token: Readonly<Token>, children: YastNode[]): ResultOfParse<T, Node>;
14
+ export declare class ListTokenizer extends BaseBlockTokenizer<T, IToken, INode, IThis> implements IBlockTokenizer<T, IToken, INode, IThis> {
15
+ constructor(props?: ITokenizerProps);
16
+ readonly enableTaskListItem: boolean;
17
+ readonly emptyItemCouldNotInterruptedTypes: ReadonlyArray<NodeType>;
18
+ readonly match: IMatchBlockHookCreator<T, IToken, IThis>;
19
+ readonly parse: IParseBlockHookCreator<T, IToken, INode, IThis>;
32
20
  }
@@ -1,17 +1,15 @@
1
- import type { List, ListType } from '@yozora/ast';
2
- import type { BaseBlockTokenizerProps, PartialYastBlockToken, YastBlockToken } from '@yozora/core-tokenizer';
3
- import type { ListItemToken as _ListItemToken } from '@yozora/tokenizer-list-item';
1
+ import type { List, ListType, NodeType, TaskStatus } from '@yozora/ast';
2
+ import type { IBaseBlockTokenizerProps, IPartialYastBlockToken, ITokenizer, IYastBlockToken } from '@yozora/core-tokenizer';
4
3
  export declare type T = ListType;
5
- export declare type Node = List;
4
+ export declare type INode = List;
6
5
  export declare const uniqueName = "@yozora/tokenizer-list";
7
- export declare type ListItemToken = _ListItemToken & YastBlockToken;
8
- export interface Token extends PartialYastBlockToken<T> {
6
+ export interface IToken extends IPartialYastBlockToken<T> {
9
7
  /**
10
8
  * Is it an ordered list item.
11
9
  */
12
10
  ordered: boolean;
13
11
  /**
14
- * Marker of a bullet list-item, or delimiter of an ordered list-item.
12
+ * Marker of bullet list-item, or a delimiter of ordered list-item.
15
13
  */
16
14
  marker: number;
17
15
  /**
@@ -20,16 +18,48 @@ export interface Token extends PartialYastBlockToken<T> {
20
18
  */
21
19
  orderType?: '1' | 'a' | 'A' | 'i' | 'I';
22
20
  /**
23
- * The starting number of a ordered list-item.
21
+ * Serial number of ordered list-item.
24
22
  */
25
- start?: number;
23
+ order?: number;
26
24
  /**
27
- * Whether if the list is loose.
25
+ * Status of a todo task.
28
26
  */
29
- spread: boolean;
27
+ status?: TaskStatus;
30
28
  /**
31
- * List items.
29
+ * Indent of a list item.
32
30
  */
33
- children: ListItemToken[];
31
+ indent: number;
32
+ /**
33
+ * list-item 起始的空行数量
34
+ * The number of blank lines at the beginning of a list-item
35
+ */
36
+ countOfTopBlankLine: number;
37
+ /**
38
+ * Child token nodes.
39
+ */
40
+ children: IYastBlockToken[];
41
+ }
42
+ export interface IThis extends ITokenizer {
43
+ /**
44
+ * Specify an array of Node types that could not be interrupted
45
+ * by this ITokenizer if the current list-item is empty.
46
+ * @see https://github.github.com/gfm/#example-263
47
+ */
48
+ readonly emptyItemCouldNotInterruptedTypes: ReadonlyArray<NodeType>;
49
+ /**
50
+ * Should enable task list item (extension).
51
+ */
52
+ readonly enableTaskListItem: boolean;
53
+ }
54
+ export interface ITokenizerProps extends Partial<IBaseBlockTokenizerProps> {
55
+ /**
56
+ * Specify an array of Node types that could not be interrupted
57
+ * by this ITokenizer if the current list-item is empty.
58
+ * @see https://github.github.com/gfm/#example-263
59
+ */
60
+ readonly emptyItemCouldNotInterruptedTypes?: NodeType[];
61
+ /**
62
+ * Should enable task list item (extension).
63
+ */
64
+ readonly enableTaskListItem?: boolean;
34
65
  }
35
- export declare type TokenizerProps = Partial<BaseBlockTokenizerProps>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yozora/tokenizer-list",
3
- "version": "1.3.0",
3
+ "version": "2.0.0-alpha.3",
4
4
  "author": {
5
5
  "name": "guanghechen",
6
6
  "url": "https://github.com/guanghechen/"
@@ -35,9 +35,9 @@
35
35
  "test": "cross-env TS_NODE_FILES=true jest --config ../../jest.config.js --rootDir ."
36
36
  },
37
37
  "dependencies": {
38
- "@yozora/ast": "^1.3.0",
39
- "@yozora/core-tokenizer": "^1.3.0",
40
- "@yozora/tokenizer-list-item": "^1.3.0"
38
+ "@yozora/ast": "^2.0.0-alpha.3",
39
+ "@yozora/character": "^2.0.0-alpha.3",
40
+ "@yozora/core-tokenizer": "^2.0.0-alpha.3"
41
41
  },
42
- "gitHead": "18c9b167004ad97718b2f94f25139f80598cbf7a"
42
+ "gitHead": "9f274fc7487a8c1dd213405d92508f9a7621f730"
43
43
  }