@yozora/tokenizer-list 2.0.0-alpha.0 → 2.0.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -53,7 +53,7 @@
53
53
 
54
54
  <!-- :end -->
55
55
 
56
- [@yozora/tokenizer-list] produce [List][node-type] type nodes.
56
+ [@yozora/tokenizer-list] produce [List][node-type] (with [ListItem][node-type:list-item] as children) type nodes.
57
57
  See [documentation][docpage] for details.
58
58
 
59
59
  <!-- :begin use tokenizer/usage -->
@@ -84,14 +84,14 @@ so you can use `YozoraParser` / `GfmExParser` / `GfmParser` directly.
84
84
  registered in *YastParser* as a plugin-in before it can be used.
85
85
 
86
86
  ```typescript {4,9}
87
- import { DefaultYastParser } from '@yozora/core-parser'
87
+ import { DefaultParser } from '@yozora/core-parser'
88
88
  import ParagraphTokenizer from '@yozora/tokenizer-paragraph'
89
89
  import TextTokenizer from '@yozora/tokenizer-text'
90
90
  import ListTokenizer from '@yozora/tokenizer-list'
91
91
 
92
- const parser = new DefaultYastParser()
93
- .useBlockFallbackTokenizer(new ParagraphTokenizer())
94
- .useInlineFallbackTokenizer(new TextTokenizer())
92
+ const parser = new DefaultParser()
93
+ .useFallbackTokenizer(new ParagraphTokenizer())
94
+ .useFallbackTokenizer(new TextTokenizer())
95
95
  .useTokenizer(new ListTokenizer())
96
96
 
97
97
  // parse source markdown content
@@ -213,10 +213,12 @@ a. This is an another type of ordered list item
213
213
 
214
214
  ### Options
215
215
 
216
- Name | Type | Required | Default
217
- :----------------:|:-----------:|:---------:|:--------------:
218
- `name` | `string` | `false` | `"@yozora/tokenizer-list"`
219
- `priority` | `number` | `false` | `TokenizerPriority.CONTAINING_BLOCK`
216
+ Name | Type | Required | Default
217
+ :----------------------------------:|:-----------------:|:---------:|:--------------:
218
+ `name` | `string` | `false` | `"@yozora/tokenizer-list"`
219
+ `priority` | `number` | `false` | `TokenizerPriority.CONTAINING_BLOCK`
220
+ `enableTaskListItem` | `boolean` | `false` | `false`
221
+ `emptyItemCouldNotInterruptedTypes` | `YastNodeType[]` | `false` | `[PhrasingContentType, ParagraphType]`
220
222
 
221
223
  * `name`: The unique name of the tokenizer, used to bind the token it generates,
222
224
  to determine the tokenizer that should be called in each life cycle of the
@@ -227,6 +229,14 @@ Name | Type | Required | Default
227
229
  stage, a high-priority tokenizer can interrupt the matching process of a
228
230
  low-priority tokenizer.
229
231
 
232
+ * `enableTaskListItem`: Whether to enable task list item (extension).
233
+
234
+ * `emptyItemCouldNotInterruptedTypes`: Specify an array of IYastNode types that could not be
235
+ interrupted by this ITokenizer if the current list-item is empty.
236
+
237
+ @see https://github.github.com/gfm/#example-263
238
+
239
+
230
240
  <!-- :end -->
231
241
 
232
242
  ## Related
@@ -236,15 +246,17 @@ Name | Type | Required | Default
236
246
  * [@yozora/parser][]
237
247
  * [@yozora/parser-gfm][]
238
248
  * [@yozora/parser-gfm-ex][]
239
- * [@yozora/tokenizer-list-item][]
240
249
  * [@yozora/react-list][]
241
250
  * [@yozora/react-list-item][]
242
251
  * [@yozora/react-markdown][]
243
252
  * [Live Examples][live-examples]
244
253
  * [List | Yozora AST][node-type]
245
254
  * [Documentation][docpage]
255
+ * [List | Mdast][mdast-homepage]
256
+ * [ListItem | Documentation][mdast-homepage:list-item]
246
257
 
247
258
  [node-type]: http://yozora.guanghechen.com/docs/package/ast#list
259
+ [node-type:list-item]: http://yozora.guanghechen.com/docs/package/ast#listitem
248
260
 
249
261
  <!-- :begin use tokenizer/definitions -->
250
262
 
@@ -252,7 +264,8 @@ Name | Type | Required | Default
252
264
  [docpage]: https://yozora.guanghechen.com/docs/package/tokenizer-list
253
265
  [homepage]: https://github.com/yozorajs/yozora/tree/main/tokenizers/list#readme
254
266
  [gfm-spec]: https://github.github.com/gfm
255
- [mdast-homepage]: https://github.com/syntax-tree/mdast
267
+ [mdast-homepage]: https://github.com/syntax-tree/mdast#list
268
+ [mdast-homepage:list-item]: https://github.com/syntax-tree/mdast#listitem
256
269
 
257
270
  [@yozora/ast]: https://github.com/yozorajs/yozora/tree/main/packages/ast#readme
258
271
  [@yozora/ast-util]: https://github.com/yozorajs/yozora/tree/main/packages/ast-util#readme
@@ -291,7 +304,6 @@ Name | Type | Required | Default
291
304
  [@yozora/tokenizer-link]: https://github.com/yozorajs/yozora/tree/main/tokenizers/link#readme
292
305
  [@yozora/tokenizer-link-reference]: https://github.com/yozorajs/yozora/tree/main/tokenizers/link-reference#readme
293
306
  [@yozora/tokenizer-list]: https://github.com/yozorajs/yozora/tree/main/tokenizers/list#readme
294
- [@yozora/tokenizer-list-item]: https://github.com/yozorajs/yozora/tree/main/tokenizers/list-item#readme
295
307
  [@yozora/tokenizer-math]: https://github.com/yozorajs/yozora/tree/main/tokenizers/math#readme
296
308
  [@yozora/tokenizer-paragraph]: https://github.com/yozorajs/yozora/tree/main/tokenizers/paragraph#readme
297
309
  [@yozora/tokenizer-setext-heading]: https://github.com/yozorajs/yozora/tree/main/tokenizers/setext-heading#readme
@@ -351,7 +363,6 @@ Name | Type | Required | Default
351
363
  [doc-@yozora/tokenizer-definition]: https://yozora.guanghechen.com/docs/package/tokenizer-definition
352
364
  [doc-@yozora/tokenizer-link-reference]: https://yozora.guanghechen.com/docs/package/tokenizer-link-reference
353
365
  [doc-@yozora/tokenizer-list]: https://yozora.guanghechen.com/docs/package/tokenizer-list
354
- [doc-@yozora/tokenizer-list-item]: https://yozora.guanghechen.com/docs/package/tokenizer-list-item
355
366
  [doc-@yozora/tokenizer-math]: https://yozora.guanghechen.com/docs/package/tokenizer-math
356
367
  [doc-@yozora/tokenizer-paragraph]: https://yozora.guanghechen.com/docs/package/tokenizer-paragraph
357
368
  [doc-@yozora/tokenizer-setext-heading]: https://yozora.guanghechen.com/docs/package/tokenizer-setext-heading
package/lib/cjs/index.js CHANGED
@@ -2,128 +2,285 @@
2
2
 
3
3
  Object.defineProperty(exports, '__esModule', { value: true });
4
4
 
5
- var coreTokenizer = require('@yozora/core-tokenizer');
6
5
  var ast = require('@yozora/ast');
6
+ var coreTokenizer = require('@yozora/core-tokenizer');
7
+ var character = require('@yozora/character');
7
8
 
8
- const parse = function () {
9
+ const match = function () {
10
+ const { emptyItemCouldNotInterruptedTypes, enableTaskListItem } = this;
9
11
  return {
10
- parse: (token, children) => {
11
- const node = {
12
- type: ast.ListType,
13
- ordered: token.ordered,
14
- orderType: token.orderType,
15
- start: token.start,
16
- marker: token.marker,
17
- spread: token.spread,
18
- children: children,
19
- };
20
- return node;
21
- },
12
+ isContainingBlock: true,
13
+ eatOpener,
14
+ eatAndInterruptPreviousSibling,
15
+ eatContinuationText,
22
16
  };
23
- };
24
-
25
- const postMatch = function (api) {
26
- const { name: _tokenizer } = this;
27
- return { transformMatch };
28
- function transformMatch(tokens) {
29
- const results = [];
30
- let listItems = [];
31
- const resolveList = () => {
32
- if (listItems.length <= 0)
33
- return;
34
- let spread = listItems.some((item) => {
35
- if (item.children == null || item.children.length <= 1)
36
- return false;
37
- let previousPosition = item.children[0].position;
38
- for (let j = 1; j < item.children.length; ++j) {
39
- const currentPosition = item.children[j].position;
40
- if (previousPosition.end.line + 1 < currentPosition.start.line) {
41
- return true;
42
- }
43
- previousPosition = currentPosition;
44
- }
45
- return false;
46
- });
47
- if (!spread && listItems.length > 1) {
48
- let previousItem = listItems[0];
49
- for (let i = 1; i < listItems.length; ++i) {
50
- const currentItem = listItems[i];
51
- if (previousItem.position.end.line + 1 < currentItem.position.start.line) {
52
- spread = true;
17
+ function eatOpener(line) {
18
+ if (line.countOfPrecedeSpaces >= 4)
19
+ return null;
20
+ const { nodePoints, startIndex, endIndex, firstNonWhitespaceIndex } = line;
21
+ if (firstNonWhitespaceIndex >= endIndex)
22
+ return null;
23
+ let ordered = false;
24
+ let marker = null;
25
+ let orderType;
26
+ let order;
27
+ let i = firstNonWhitespaceIndex;
28
+ let c = nodePoints[i].codePoint;
29
+ if (i + 1 < endIndex) {
30
+ const c0 = c;
31
+ if (character.isAsciiDigitCharacter(c0)) {
32
+ orderType = '1';
33
+ let v = c0 - character.AsciiCodePoint.DIGIT0;
34
+ for (i += 1; i < endIndex; ++i) {
35
+ c = nodePoints[i].codePoint;
36
+ if (!character.isAsciiDigitCharacter(c))
53
37
  break;
54
- }
55
- previousItem = currentItem;
38
+ v = v * 10 + c - character.AsciiCodePoint.DIGIT0;
56
39
  }
40
+ order = v;
41
+ orderType = '1';
57
42
  }
58
- const list = {
59
- _tokenizer,
60
- nodeType: ast.ListType,
61
- ordered: listItems[0].ordered,
62
- orderType: listItems[0].orderType,
63
- start: listItems[0].order,
64
- marker: listItems[0].marker,
65
- spread,
66
- position: {
67
- start: Object.assign({}, listItems[0].position.start),
68
- end: Object.assign({}, listItems[listItems.length - 1].position.end),
69
- },
70
- children: [...listItems],
71
- };
72
- results.push(list);
73
- if (list.spread)
74
- return;
75
- for (const listItem of list.children) {
76
- if (listItem.children == null || listItem.children.length <= 0)
77
- continue;
78
- listItem.children = listItem.children.map(child => {
79
- const lines = api.extractPhrasingLines(child);
80
- if (lines == null)
81
- return child;
82
- const token = api.buildPhrasingContentToken(lines);
83
- return token !== null && token !== void 0 ? token : child;
84
- });
43
+ else if (character.isAsciiLowerLetter(c0)) {
44
+ i += 1;
45
+ c = nodePoints[i].codePoint;
46
+ order = c0 - character.AsciiCodePoint.LOWERCASE_A + 1;
47
+ orderType = 'a';
48
+ }
49
+ else if (character.isAsciiUpperLetter(c0)) {
50
+ i += 1;
51
+ c = nodePoints[i].codePoint;
52
+ order = c0 - character.AsciiCodePoint.UPPERCASE_A + 1;
53
+ orderType = 'A';
54
+ }
55
+ if (i > firstNonWhitespaceIndex &&
56
+ i - firstNonWhitespaceIndex <= 9 &&
57
+ (c === character.AsciiCodePoint.DOT || c === character.AsciiCodePoint.CLOSE_PARENTHESIS)) {
58
+ i += 1;
59
+ ordered = true;
60
+ marker = c;
61
+ }
62
+ }
63
+ if (!ordered) {
64
+ if (c === character.AsciiCodePoint.PLUS_SIGN ||
65
+ c === character.AsciiCodePoint.MINUS_SIGN ||
66
+ c === character.AsciiCodePoint.ASTERISK) {
67
+ i += 1;
68
+ marker = c;
85
69
  }
70
+ }
71
+ if (marker == null)
72
+ return null;
73
+ let countOfSpaces = 0, nextIndex = i;
74
+ if (nextIndex < endIndex) {
75
+ c = nodePoints[nextIndex].codePoint;
76
+ if (c === character.VirtualCodePoint.SPACE)
77
+ nextIndex += 1;
78
+ }
79
+ for (; nextIndex < endIndex; ++nextIndex) {
80
+ c = nodePoints[nextIndex].codePoint;
81
+ if (!character.isSpaceCharacter(c))
82
+ break;
83
+ countOfSpaces += 1;
84
+ }
85
+ if (countOfSpaces > 4) {
86
+ nextIndex -= countOfSpaces - 1;
87
+ countOfSpaces = 1;
88
+ }
89
+ if (countOfSpaces === 0 && nextIndex < endIndex && c !== character.VirtualCodePoint.LINE_END)
90
+ return null;
91
+ const countOfTopBlankLine = c === character.VirtualCodePoint.LINE_END ? 1 : -1;
92
+ if (c === character.VirtualCodePoint.LINE_END) {
93
+ nextIndex -= countOfSpaces - 1;
94
+ countOfSpaces = 1;
95
+ }
96
+ const indent = i - startIndex + countOfSpaces;
97
+ let status = null;
98
+ if (enableTaskListItem) {
99
+ ({ status, nextIndex } = eatTaskStatus(nodePoints, nextIndex, endIndex));
100
+ }
101
+ const token = {
102
+ nodeType: ast.ListType,
103
+ position: {
104
+ start: coreTokenizer.calcStartYastNodePoint(nodePoints, startIndex),
105
+ end: coreTokenizer.calcEndYastNodePoint(nodePoints, nextIndex - 1),
106
+ },
107
+ ordered,
108
+ marker,
109
+ orderType: ordered ? orderType : undefined,
110
+ order: ordered ? order : undefined,
111
+ indent,
112
+ countOfTopBlankLine,
113
+ children: [],
86
114
  };
87
- for (let i = 0; i < tokens.length; ++i) {
88
- const originalToken = tokens[i];
89
- if (originalToken.nodeType !== ast.ListItemType) {
90
- resolveList();
91
- listItems = [];
92
- results.push(originalToken);
93
- continue;
115
+ if (status != null)
116
+ token.status = status;
117
+ return { token, nextIndex };
118
+ }
119
+ function eatAndInterruptPreviousSibling(line, prevSiblingToken) {
120
+ const result = eatOpener(line);
121
+ if (result == null)
122
+ return null;
123
+ const { token, nextIndex } = result;
124
+ if (emptyItemCouldNotInterruptedTypes.includes(prevSiblingToken.nodeType)) {
125
+ if (token.indent === line.endIndex - line.startIndex) {
126
+ return null;
127
+ }
128
+ if (token.ordered && token.order !== 1)
129
+ return null;
130
+ }
131
+ return { token, nextIndex, remainingSibling: prevSiblingToken };
132
+ }
133
+ function eatContinuationText(line, token) {
134
+ const { startIndex, endIndex, firstNonWhitespaceIndex, countOfPrecedeSpaces: indent } = line;
135
+ if (firstNonWhitespaceIndex < endIndex && indent < token.indent) {
136
+ return { status: 'notMatched' };
137
+ }
138
+ if (firstNonWhitespaceIndex >= endIndex) {
139
+ if (token.countOfTopBlankLine >= 0) {
140
+ token.countOfTopBlankLine += 1;
141
+ if (token.countOfTopBlankLine > 1) {
142
+ return { status: 'notMatched' };
143
+ }
144
+ }
145
+ }
146
+ else {
147
+ token.countOfTopBlankLine = -1;
148
+ }
149
+ const nextIndex = Math.min(startIndex + token.indent, endIndex - 1);
150
+ return { status: 'opening', nextIndex };
151
+ }
152
+ };
153
+ function eatTaskStatus(nodePoints, startIndex, endIndex) {
154
+ let i = startIndex;
155
+ for (; i < endIndex; ++i) {
156
+ const c = nodePoints[i].codePoint;
157
+ if (!character.isSpaceCharacter(c))
158
+ break;
159
+ }
160
+ if (i + 3 >= endIndex ||
161
+ nodePoints[i].codePoint !== character.AsciiCodePoint.OPEN_BRACKET ||
162
+ nodePoints[i + 2].codePoint !== character.AsciiCodePoint.CLOSE_BRACKET ||
163
+ !character.isWhitespaceCharacter(nodePoints[i + 3].codePoint))
164
+ return { status: null, nextIndex: startIndex };
165
+ let status;
166
+ const c = nodePoints[i + 1].codePoint;
167
+ switch (c) {
168
+ case character.AsciiCodePoint.SPACE:
169
+ status = ast.TaskStatus.TODO;
170
+ break;
171
+ case character.AsciiCodePoint.MINUS_SIGN:
172
+ status = ast.TaskStatus.DOING;
173
+ break;
174
+ case character.AsciiCodePoint.LOWERCASE_X:
175
+ case character.AsciiCodePoint.UPPERCASE_X:
176
+ status = ast.TaskStatus.DONE;
177
+ break;
178
+ default:
179
+ return { status: null, nextIndex: startIndex };
180
+ }
181
+ return { status, nextIndex: i + 4 };
182
+ }
183
+
184
+ const parse = function (api) {
185
+ return {
186
+ parse: tokens => {
187
+ const results = [];
188
+ let listItemTokens = [];
189
+ for (let i = 0; i < tokens.length; ++i) {
190
+ const originalToken = tokens[i];
191
+ if (listItemTokens.length <= 0 ||
192
+ listItemTokens[0].ordered !== originalToken.ordered ||
193
+ listItemTokens[0].orderType !== originalToken.orderType ||
194
+ listItemTokens[0].marker !== originalToken.marker) {
195
+ const node = resolveList(listItemTokens, api);
196
+ if (node)
197
+ results.push(node);
198
+ listItemTokens = [originalToken];
199
+ continue;
200
+ }
201
+ listItemTokens.push(originalToken);
94
202
  }
95
- if (listItems.length <= 0 ||
96
- listItems[0].ordered !== originalToken.ordered ||
97
- listItems[0].orderType !== originalToken.orderType ||
98
- listItems[0].marker !== originalToken.marker) {
99
- resolveList();
100
- listItems = [originalToken];
101
- continue;
203
+ const node = resolveList(listItemTokens, api);
204
+ if (node)
205
+ results.push(node);
206
+ return results;
207
+ },
208
+ };
209
+ };
210
+ const resolveList = (tokens, api) => {
211
+ if (tokens.length <= 0)
212
+ return null;
213
+ let spread = tokens.some((item) => {
214
+ if (item.children == null || item.children.length <= 1)
215
+ return false;
216
+ let previousPosition = item.children[0].position;
217
+ for (let j = 1; j < item.children.length; ++j) {
218
+ const currentPosition = item.children[j].position;
219
+ if (previousPosition.end.line + 1 < currentPosition.start.line) {
220
+ return true;
102
221
  }
103
- listItems.push(originalToken);
222
+ previousPosition = currentPosition;
223
+ }
224
+ return false;
225
+ });
226
+ if (!spread && tokens.length > 1) {
227
+ let previousItem = tokens[0];
228
+ for (let i = 1; i < tokens.length; ++i) {
229
+ const currentItem = tokens[i];
230
+ if (previousItem.position.end.line + 1 < currentItem.position.start.line) {
231
+ spread = true;
232
+ break;
233
+ }
234
+ previousItem = currentItem;
104
235
  }
105
- resolveList();
106
- return results;
107
236
  }
237
+ const children = tokens.map((listItemToken) => {
238
+ const nodes = api.parseBlockTokens(listItemToken.children);
239
+ const children = spread
240
+ ? nodes
241
+ : nodes
242
+ .map(node => (node.type === ast.ParagraphType ? node.children : node))
243
+ .flat();
244
+ const listItem = {
245
+ type: ast.ListItemType,
246
+ position: listItemToken.position,
247
+ status: listItemToken.status,
248
+ children,
249
+ };
250
+ return listItem;
251
+ });
252
+ const node = {
253
+ type: ast.ListType,
254
+ position: {
255
+ start: Object.assign({}, tokens[0].position.start),
256
+ end: Object.assign({}, tokens[tokens.length - 1].position.end),
257
+ },
258
+ ordered: tokens[0].ordered,
259
+ orderType: tokens[0].orderType,
260
+ start: tokens[0].order,
261
+ marker: tokens[0].marker,
262
+ spread,
263
+ children,
264
+ };
265
+ return node;
108
266
  };
109
267
 
110
268
  const uniqueName = '@yozora/tokenizer-list';
111
269
 
112
270
  class ListTokenizer extends coreTokenizer.BaseBlockTokenizer {
113
271
  constructor(props = {}) {
114
- var _a, _b;
272
+ var _a, _b, _c, _d;
115
273
  super({
116
274
  name: (_a = props.name) !== null && _a !== void 0 ? _a : uniqueName,
117
275
  priority: (_b = props.priority) !== null && _b !== void 0 ? _b : coreTokenizer.TokenizerPriority.CONTAINING_BLOCK,
118
276
  });
119
- this.match = () => {
120
- return {
121
- isContainingBlock: true,
122
- eatOpener: () => null,
123
- };
124
- };
125
- this.postMatch = postMatch;
277
+ this.match = match;
126
278
  this.parse = parse;
279
+ this.enableTaskListItem = (_c = props.enableTaskListItem) !== null && _c !== void 0 ? _c : false;
280
+ this.emptyItemCouldNotInterruptedTypes = (_d = props.emptyItemCouldNotInterruptedTypes) !== null && _d !== void 0 ? _d : [
281
+ coreTokenizer.PhrasingContentType,
282
+ ast.ParagraphType,
283
+ ];
127
284
  }
128
285
  }
129
286
 
package/lib/esm/index.js CHANGED
@@ -1,125 +1,282 @@
1
- import { BaseBlockTokenizer, TokenizerPriority } from '@yozora/core-tokenizer';
2
- import { ListType, ListItemType } from '@yozora/ast';
1
+ import { ListType, TaskStatus, ParagraphType, ListItemType } from '@yozora/ast';
2
+ import { calcStartYastNodePoint, calcEndYastNodePoint, BaseBlockTokenizer, TokenizerPriority, PhrasingContentType } from '@yozora/core-tokenizer';
3
+ import { isAsciiDigitCharacter, AsciiCodePoint, isAsciiLowerLetter, isAsciiUpperLetter, VirtualCodePoint, isSpaceCharacter, isWhitespaceCharacter } from '@yozora/character';
3
4
 
4
- const parse = function () {
5
+ const match = function () {
6
+ const { emptyItemCouldNotInterruptedTypes, enableTaskListItem } = this;
5
7
  return {
6
- parse: (token, children) => {
7
- const node = {
8
- type: ListType,
9
- ordered: token.ordered,
10
- orderType: token.orderType,
11
- start: token.start,
12
- marker: token.marker,
13
- spread: token.spread,
14
- children: children,
15
- };
16
- return node;
17
- },
8
+ isContainingBlock: true,
9
+ eatOpener,
10
+ eatAndInterruptPreviousSibling,
11
+ eatContinuationText,
18
12
  };
19
- };
20
-
21
- const postMatch = function (api) {
22
- const { name: _tokenizer } = this;
23
- return { transformMatch };
24
- function transformMatch(tokens) {
25
- const results = [];
26
- let listItems = [];
27
- const resolveList = () => {
28
- if (listItems.length <= 0)
29
- return;
30
- let spread = listItems.some((item) => {
31
- if (item.children == null || item.children.length <= 1)
32
- return false;
33
- let previousPosition = item.children[0].position;
34
- for (let j = 1; j < item.children.length; ++j) {
35
- const currentPosition = item.children[j].position;
36
- if (previousPosition.end.line + 1 < currentPosition.start.line) {
37
- return true;
38
- }
39
- previousPosition = currentPosition;
40
- }
41
- return false;
42
- });
43
- if (!spread && listItems.length > 1) {
44
- let previousItem = listItems[0];
45
- for (let i = 1; i < listItems.length; ++i) {
46
- const currentItem = listItems[i];
47
- if (previousItem.position.end.line + 1 < currentItem.position.start.line) {
48
- spread = true;
13
+ function eatOpener(line) {
14
+ if (line.countOfPrecedeSpaces >= 4)
15
+ return null;
16
+ const { nodePoints, startIndex, endIndex, firstNonWhitespaceIndex } = line;
17
+ if (firstNonWhitespaceIndex >= endIndex)
18
+ return null;
19
+ let ordered = false;
20
+ let marker = null;
21
+ let orderType;
22
+ let order;
23
+ let i = firstNonWhitespaceIndex;
24
+ let c = nodePoints[i].codePoint;
25
+ if (i + 1 < endIndex) {
26
+ const c0 = c;
27
+ if (isAsciiDigitCharacter(c0)) {
28
+ orderType = '1';
29
+ let v = c0 - AsciiCodePoint.DIGIT0;
30
+ for (i += 1; i < endIndex; ++i) {
31
+ c = nodePoints[i].codePoint;
32
+ if (!isAsciiDigitCharacter(c))
49
33
  break;
50
- }
51
- previousItem = currentItem;
34
+ v = v * 10 + c - AsciiCodePoint.DIGIT0;
52
35
  }
36
+ order = v;
37
+ orderType = '1';
53
38
  }
54
- const list = {
55
- _tokenizer,
56
- nodeType: ListType,
57
- ordered: listItems[0].ordered,
58
- orderType: listItems[0].orderType,
59
- start: listItems[0].order,
60
- marker: listItems[0].marker,
61
- spread,
62
- position: {
63
- start: Object.assign({}, listItems[0].position.start),
64
- end: Object.assign({}, listItems[listItems.length - 1].position.end),
65
- },
66
- children: [...listItems],
67
- };
68
- results.push(list);
69
- if (list.spread)
70
- return;
71
- for (const listItem of list.children) {
72
- if (listItem.children == null || listItem.children.length <= 0)
73
- continue;
74
- listItem.children = listItem.children.map(child => {
75
- const lines = api.extractPhrasingLines(child);
76
- if (lines == null)
77
- return child;
78
- const token = api.buildPhrasingContentToken(lines);
79
- return token !== null && token !== void 0 ? token : child;
80
- });
39
+ else if (isAsciiLowerLetter(c0)) {
40
+ i += 1;
41
+ c = nodePoints[i].codePoint;
42
+ order = c0 - AsciiCodePoint.LOWERCASE_A + 1;
43
+ orderType = 'a';
44
+ }
45
+ else if (isAsciiUpperLetter(c0)) {
46
+ i += 1;
47
+ c = nodePoints[i].codePoint;
48
+ order = c0 - AsciiCodePoint.UPPERCASE_A + 1;
49
+ orderType = 'A';
81
50
  }
51
+ if (i > firstNonWhitespaceIndex &&
52
+ i - firstNonWhitespaceIndex <= 9 &&
53
+ (c === AsciiCodePoint.DOT || c === AsciiCodePoint.CLOSE_PARENTHESIS)) {
54
+ i += 1;
55
+ ordered = true;
56
+ marker = c;
57
+ }
58
+ }
59
+ if (!ordered) {
60
+ if (c === AsciiCodePoint.PLUS_SIGN ||
61
+ c === AsciiCodePoint.MINUS_SIGN ||
62
+ c === AsciiCodePoint.ASTERISK) {
63
+ i += 1;
64
+ marker = c;
65
+ }
66
+ }
67
+ if (marker == null)
68
+ return null;
69
+ let countOfSpaces = 0, nextIndex = i;
70
+ if (nextIndex < endIndex) {
71
+ c = nodePoints[nextIndex].codePoint;
72
+ if (c === VirtualCodePoint.SPACE)
73
+ nextIndex += 1;
74
+ }
75
+ for (; nextIndex < endIndex; ++nextIndex) {
76
+ c = nodePoints[nextIndex].codePoint;
77
+ if (!isSpaceCharacter(c))
78
+ break;
79
+ countOfSpaces += 1;
80
+ }
81
+ if (countOfSpaces > 4) {
82
+ nextIndex -= countOfSpaces - 1;
83
+ countOfSpaces = 1;
84
+ }
85
+ if (countOfSpaces === 0 && nextIndex < endIndex && c !== VirtualCodePoint.LINE_END)
86
+ return null;
87
+ const countOfTopBlankLine = c === VirtualCodePoint.LINE_END ? 1 : -1;
88
+ if (c === VirtualCodePoint.LINE_END) {
89
+ nextIndex -= countOfSpaces - 1;
90
+ countOfSpaces = 1;
91
+ }
92
+ const indent = i - startIndex + countOfSpaces;
93
+ let status = null;
94
+ if (enableTaskListItem) {
95
+ ({ status, nextIndex } = eatTaskStatus(nodePoints, nextIndex, endIndex));
96
+ }
97
+ const token = {
98
+ nodeType: ListType,
99
+ position: {
100
+ start: calcStartYastNodePoint(nodePoints, startIndex),
101
+ end: calcEndYastNodePoint(nodePoints, nextIndex - 1),
102
+ },
103
+ ordered,
104
+ marker,
105
+ orderType: ordered ? orderType : undefined,
106
+ order: ordered ? order : undefined,
107
+ indent,
108
+ countOfTopBlankLine,
109
+ children: [],
82
110
  };
83
- for (let i = 0; i < tokens.length; ++i) {
84
- const originalToken = tokens[i];
85
- if (originalToken.nodeType !== ListItemType) {
86
- resolveList();
87
- listItems = [];
88
- results.push(originalToken);
89
- continue;
111
+ if (status != null)
112
+ token.status = status;
113
+ return { token, nextIndex };
114
+ }
115
+ function eatAndInterruptPreviousSibling(line, prevSiblingToken) {
116
+ const result = eatOpener(line);
117
+ if (result == null)
118
+ return null;
119
+ const { token, nextIndex } = result;
120
+ if (emptyItemCouldNotInterruptedTypes.includes(prevSiblingToken.nodeType)) {
121
+ if (token.indent === line.endIndex - line.startIndex) {
122
+ return null;
123
+ }
124
+ if (token.ordered && token.order !== 1)
125
+ return null;
126
+ }
127
+ return { token, nextIndex, remainingSibling: prevSiblingToken };
128
+ }
129
+ function eatContinuationText(line, token) {
130
+ const { startIndex, endIndex, firstNonWhitespaceIndex, countOfPrecedeSpaces: indent } = line;
131
+ if (firstNonWhitespaceIndex < endIndex && indent < token.indent) {
132
+ return { status: 'notMatched' };
133
+ }
134
+ if (firstNonWhitespaceIndex >= endIndex) {
135
+ if (token.countOfTopBlankLine >= 0) {
136
+ token.countOfTopBlankLine += 1;
137
+ if (token.countOfTopBlankLine > 1) {
138
+ return { status: 'notMatched' };
139
+ }
90
140
  }
91
- if (listItems.length <= 0 ||
92
- listItems[0].ordered !== originalToken.ordered ||
93
- listItems[0].orderType !== originalToken.orderType ||
94
- listItems[0].marker !== originalToken.marker) {
95
- resolveList();
96
- listItems = [originalToken];
97
- continue;
141
+ }
142
+ else {
143
+ token.countOfTopBlankLine = -1;
144
+ }
145
+ const nextIndex = Math.min(startIndex + token.indent, endIndex - 1);
146
+ return { status: 'opening', nextIndex };
147
+ }
148
+ };
149
+ function eatTaskStatus(nodePoints, startIndex, endIndex) {
150
+ let i = startIndex;
151
+ for (; i < endIndex; ++i) {
152
+ const c = nodePoints[i].codePoint;
153
+ if (!isSpaceCharacter(c))
154
+ break;
155
+ }
156
+ if (i + 3 >= endIndex ||
157
+ nodePoints[i].codePoint !== AsciiCodePoint.OPEN_BRACKET ||
158
+ nodePoints[i + 2].codePoint !== AsciiCodePoint.CLOSE_BRACKET ||
159
+ !isWhitespaceCharacter(nodePoints[i + 3].codePoint))
160
+ return { status: null, nextIndex: startIndex };
161
+ let status;
162
+ const c = nodePoints[i + 1].codePoint;
163
+ switch (c) {
164
+ case AsciiCodePoint.SPACE:
165
+ status = TaskStatus.TODO;
166
+ break;
167
+ case AsciiCodePoint.MINUS_SIGN:
168
+ status = TaskStatus.DOING;
169
+ break;
170
+ case AsciiCodePoint.LOWERCASE_X:
171
+ case AsciiCodePoint.UPPERCASE_X:
172
+ status = TaskStatus.DONE;
173
+ break;
174
+ default:
175
+ return { status: null, nextIndex: startIndex };
176
+ }
177
+ return { status, nextIndex: i + 4 };
178
+ }
179
+
180
+ const parse = function (api) {
181
+ return {
182
+ parse: tokens => {
183
+ const results = [];
184
+ let listItemTokens = [];
185
+ for (let i = 0; i < tokens.length; ++i) {
186
+ const originalToken = tokens[i];
187
+ if (listItemTokens.length <= 0 ||
188
+ listItemTokens[0].ordered !== originalToken.ordered ||
189
+ listItemTokens[0].orderType !== originalToken.orderType ||
190
+ listItemTokens[0].marker !== originalToken.marker) {
191
+ const node = resolveList(listItemTokens, api);
192
+ if (node)
193
+ results.push(node);
194
+ listItemTokens = [originalToken];
195
+ continue;
196
+ }
197
+ listItemTokens.push(originalToken);
98
198
  }
99
- listItems.push(originalToken);
199
+ const node = resolveList(listItemTokens, api);
200
+ if (node)
201
+ results.push(node);
202
+ return results;
203
+ },
204
+ };
205
+ };
206
+ const resolveList = (tokens, api) => {
207
+ if (tokens.length <= 0)
208
+ return null;
209
+ let spread = tokens.some((item) => {
210
+ if (item.children == null || item.children.length <= 1)
211
+ return false;
212
+ let previousPosition = item.children[0].position;
213
+ for (let j = 1; j < item.children.length; ++j) {
214
+ const currentPosition = item.children[j].position;
215
+ if (previousPosition.end.line + 1 < currentPosition.start.line) {
216
+ return true;
217
+ }
218
+ previousPosition = currentPosition;
219
+ }
220
+ return false;
221
+ });
222
+ if (!spread && tokens.length > 1) {
223
+ let previousItem = tokens[0];
224
+ for (let i = 1; i < tokens.length; ++i) {
225
+ const currentItem = tokens[i];
226
+ if (previousItem.position.end.line + 1 < currentItem.position.start.line) {
227
+ spread = true;
228
+ break;
229
+ }
230
+ previousItem = currentItem;
100
231
  }
101
- resolveList();
102
- return results;
103
232
  }
233
+ const children = tokens.map((listItemToken) => {
234
+ const nodes = api.parseBlockTokens(listItemToken.children);
235
+ const children = spread
236
+ ? nodes
237
+ : nodes
238
+ .map(node => (node.type === ParagraphType ? node.children : node))
239
+ .flat();
240
+ const listItem = {
241
+ type: ListItemType,
242
+ position: listItemToken.position,
243
+ status: listItemToken.status,
244
+ children,
245
+ };
246
+ return listItem;
247
+ });
248
+ const node = {
249
+ type: ListType,
250
+ position: {
251
+ start: Object.assign({}, tokens[0].position.start),
252
+ end: Object.assign({}, tokens[tokens.length - 1].position.end),
253
+ },
254
+ ordered: tokens[0].ordered,
255
+ orderType: tokens[0].orderType,
256
+ start: tokens[0].order,
257
+ marker: tokens[0].marker,
258
+ spread,
259
+ children,
260
+ };
261
+ return node;
104
262
  };
105
263
 
106
264
  const uniqueName = '@yozora/tokenizer-list';
107
265
 
108
266
  class ListTokenizer extends BaseBlockTokenizer {
109
267
  constructor(props = {}) {
110
- var _a, _b;
268
+ var _a, _b, _c, _d;
111
269
  super({
112
270
  name: (_a = props.name) !== null && _a !== void 0 ? _a : uniqueName,
113
271
  priority: (_b = props.priority) !== null && _b !== void 0 ? _b : TokenizerPriority.CONTAINING_BLOCK,
114
272
  });
115
- this.match = () => {
116
- return {
117
- isContainingBlock: true,
118
- eatOpener: () => null,
119
- };
120
- };
121
- this.postMatch = postMatch;
273
+ this.match = match;
122
274
  this.parse = parse;
275
+ this.enableTaskListItem = (_c = props.enableTaskListItem) !== null && _c !== void 0 ? _c : false;
276
+ this.emptyItemCouldNotInterruptedTypes = (_d = props.emptyItemCouldNotInterruptedTypes) !== null && _d !== void 0 ? _d : [
277
+ PhrasingContentType,
278
+ ParagraphType,
279
+ ];
123
280
  }
124
281
  }
125
282
 
@@ -0,0 +1,25 @@
1
+ import type { IMatchBlockHookCreator } from '@yozora/core-tokenizer';
2
+ import type { IThis, IToken, T } from './types';
3
+ /**
4
+ * The following rules define list items:
5
+ * - Basic case. If a sequence of lines Ls constitute a sequence of blocks Bs
6
+ * starting with a non-whitespace character, and M is a list marker of width
7
+ * W followed by 1 ≤ N ≤ 4 spaces, then the result of prepending M and the
8
+ * following spaces to the first line of Ls, and indenting subsequent lines
9
+ * of Ls by W + N spaces, is a list item with Bs as its contents. The type
10
+ * of the list item (bullet or ordered) is determined by the type of its
11
+ * list marker. If the list item is ordered, then it is also assigned a
12
+ * start number, based on the ordered list marker.
13
+ *
14
+ * Exceptions:
15
+ * - When the first list item in a list interrupts a paragraph—that is,
16
+ * when it starts on a line that would otherwise count as paragraph
17
+ * continuation text—then
18
+ * (a) the lines Ls must not begin with a blank line, and
19
+ * (b) if the list item is ordered, the start number must be 1.
20
+ * - If any line is a thematic break then that line is not a list item.
21
+ *
22
+ * @see https://github.com/syntax-tree/mdast#listitem
23
+ * @see https://github.github.com/gfm/#list-items
24
+ */
25
+ export declare const match: IMatchBlockHookCreator<T, IToken, IThis>;
@@ -1,3 +1,3 @@
1
1
  import type { IParseBlockHookCreator } from '@yozora/core-tokenizer';
2
- import type { IHookContext, INode, IToken, T } from './types';
3
- export declare const parse: IParseBlockHookCreator<T, IToken, INode, IHookContext>;
2
+ import type { INode, IThis, IToken, T } from './types';
3
+ export declare const parse: IParseBlockHookCreator<T, IToken, INode, IThis>;
@@ -1,11 +1,7 @@
1
- import type { IBlockTokenizer, IMatchBlockHookCreator, IParseBlockHookCreator, IPostMatchBlockHookCreator } from '@yozora/core-tokenizer';
1
+ import type { YastNodeType } from '@yozora/ast';
2
+ import type { IBlockTokenizer, IMatchBlockHookCreator, IParseBlockHookCreator } from '@yozora/core-tokenizer';
2
3
  import { BaseBlockTokenizer } from '@yozora/core-tokenizer';
3
- import type { IHookContext, INode, IToken, ITokenizerProps, T } from './types';
4
- /**
5
- * Params for constructing ListTokenizer
6
- */
7
- export interface ListTokenizerProps {
8
- }
4
+ import type { INode, IThis, IToken, ITokenizerProps, T } from './types';
9
5
  /**
10
6
  * Lexical Analyzer for List.
11
7
  *
@@ -15,9 +11,10 @@ export interface ListTokenizerProps {
15
11
  * @see https://github.com/syntax-tree/mdast#list
16
12
  * @see https://github.github.com/gfm/#list
17
13
  */
18
- export declare class ListTokenizer extends BaseBlockTokenizer<T, IToken, INode, IHookContext> implements IBlockTokenizer<T, IToken, INode, IHookContext> {
14
+ export declare class ListTokenizer extends BaseBlockTokenizer<T, IToken, INode, IThis> implements IBlockTokenizer<T, IToken, INode, IThis> {
19
15
  constructor(props?: ITokenizerProps);
20
- readonly match: IMatchBlockHookCreator<T, IToken, IHookContext>;
21
- readonly postMatch: IPostMatchBlockHookCreator<IHookContext>;
22
- readonly parse: IParseBlockHookCreator<T, IToken, INode, IHookContext>;
16
+ readonly enableTaskListItem: boolean;
17
+ readonly emptyItemCouldNotInterruptedTypes: ReadonlyArray<YastNodeType>;
18
+ readonly match: IMatchBlockHookCreator<T, IToken, IThis>;
19
+ readonly parse: IParseBlockHookCreator<T, IToken, INode, IThis>;
23
20
  }
@@ -1,17 +1,15 @@
1
- import type { IList, ListType } from '@yozora/ast';
1
+ import type { IList, ListType, TaskStatus, YastNodeType } from '@yozora/ast';
2
2
  import type { IBaseBlockTokenizerProps, IPartialYastBlockToken, ITokenizer, IYastBlockToken } from '@yozora/core-tokenizer';
3
- import type { IListItemToken as IListItemToken0 } from '@yozora/tokenizer-list-item';
4
3
  export declare type T = ListType;
5
4
  export declare type INode = IList;
6
5
  export declare const uniqueName = "@yozora/tokenizer-list";
7
- export declare type IListItemToken = IListItemToken0 & IYastBlockToken;
8
6
  export interface IToken extends IPartialYastBlockToken<T> {
9
7
  /**
10
8
  * Is it an ordered list item.
11
9
  */
12
10
  ordered: boolean;
13
11
  /**
14
- * Marker of a bullet list-item, or delimiter of an ordered list-item.
12
+ * Marker of bullet list-item, or a delimiter of ordered list-item.
15
13
  */
16
14
  marker: number;
17
15
  /**
@@ -20,17 +18,48 @@ export interface IToken extends IPartialYastBlockToken<T> {
20
18
  */
21
19
  orderType?: '1' | 'a' | 'A' | 'i' | 'I';
22
20
  /**
23
- * The starting number of a ordered list-item.
21
+ * Serial number of ordered list-item.
24
22
  */
25
- start?: number;
23
+ order?: number;
26
24
  /**
27
- * Whether if the list is loose.
25
+ * Status of a todo task.
28
26
  */
29
- spread: boolean;
27
+ status?: TaskStatus;
30
28
  /**
31
- * List items.
29
+ * Indent of a list item.
32
30
  */
33
- children: IListItemToken[];
31
+ indent: number;
32
+ /**
33
+ * list-item 起始的空行数量
34
+ * The number of blank lines at the beginning of a list-item
35
+ */
36
+ countOfTopBlankLine: number;
37
+ /**
38
+ * Child token nodes.
39
+ */
40
+ children: IYastBlockToken[];
41
+ }
42
+ export interface IThis extends ITokenizer {
43
+ /**
44
+ * Specify an array of IYastNode types that could not be interrupted
45
+ * by this ITokenizer if the current list-item is empty.
46
+ * @see https://github.github.com/gfm/#example-263
47
+ */
48
+ readonly emptyItemCouldNotInterruptedTypes: ReadonlyArray<YastNodeType>;
49
+ /**
50
+ * Should enable task list item (extension).
51
+ */
52
+ readonly enableTaskListItem: boolean;
53
+ }
54
+ export interface ITokenizerProps extends Partial<IBaseBlockTokenizerProps> {
55
+ /**
56
+ * Specify an array of IYastNode types that could not be interrupted
57
+ * by this ITokenizer if the current list-item is empty.
58
+ * @see https://github.github.com/gfm/#example-263
59
+ */
60
+ readonly emptyItemCouldNotInterruptedTypes?: YastNodeType[];
61
+ /**
62
+ * Should enable task list item (extension).
63
+ */
64
+ readonly enableTaskListItem?: boolean;
34
65
  }
35
- export declare type IHookContext = ITokenizer;
36
- export declare type ITokenizerProps = Partial<IBaseBlockTokenizerProps>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yozora/tokenizer-list",
3
- "version": "2.0.0-alpha.0",
3
+ "version": "2.0.0-alpha.1",
4
4
  "author": {
5
5
  "name": "guanghechen",
6
6
  "url": "https://github.com/guanghechen/"
@@ -35,9 +35,9 @@
35
35
  "test": "cross-env TS_NODE_FILES=true jest --config ../../jest.config.js --rootDir ."
36
36
  },
37
37
  "dependencies": {
38
- "@yozora/ast": "^2.0.0-alpha.0",
39
- "@yozora/core-tokenizer": "^2.0.0-alpha.0",
40
- "@yozora/tokenizer-list-item": "^2.0.0-alpha.0"
38
+ "@yozora/ast": "^2.0.0-alpha.1",
39
+ "@yozora/character": "^2.0.0-alpha.1",
40
+ "@yozora/core-tokenizer": "^2.0.0-alpha.1"
41
41
  },
42
- "gitHead": "0171501339c49ffd02ed16a63447fa20a47a29a7"
42
+ "gitHead": "86202e1d2b03ccfc2ab030517d9d314f7aee7666"
43
43
  }
@@ -1,10 +0,0 @@
1
- import type { IPostMatchBlockHookCreator } from '@yozora/core-tokenizer';
2
- import type { IHookContext } from './types';
3
- /**
4
- * A list is a sequence of one or more list items of the same type.
5
- * The list items may be separated by any number of blank lines.
6
- *
7
- * @see https://github.com/syntax-tree/mdast#list
8
- * @see https://github.github.com/gfm/#list
9
- */
10
- export declare const postMatch: IPostMatchBlockHookCreator<IHookContext>;