npm - @yozora/tokenizer-list - Versions diffs - 2.0.0-alpha.0 → 2.0.0-alpha.1 - Mend

@yozora/tokenizer-list 2.0.0-alpha.0 → 2.0.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md CHANGED Viewed

@@ -53,7 +53,7 @@
 <!-- :end -->
-[@yozora/tokenizer-list] produce [List][node-type] type nodes.
+[@yozora/tokenizer-list] produce [List][node-type] (with [ListItem][node-type:list-item] as children) type nodes.
 See [documentation][docpage] for details.
 <!-- :begin use tokenizer/usage -->
@@ -84,14 +84,14 @@ so you can use `YozoraParser` / `GfmExParser` / `GfmParser` directly.
 registered in *YastParser* as a plugin-in before it can be used.
 ```typescript {4,9}
-import { DefaultYastParser } from '@yozora/core-parser'
+import { DefaultParser } from '@yozora/core-parser'
 import ParagraphTokenizer from '@yozora/tokenizer-paragraph'
 import TextTokenizer from '@yozora/tokenizer-text'
 import ListTokenizer from '@yozora/tokenizer-list'
-const parser = new DefaultYastParser()
-  .useBlockFallbackTokenizer(new ParagraphTokenizer())
-  .useInlineFallbackTokenizer(new TextTokenizer())
+const parser = new DefaultParser()
+  .useFallbackTokenizer(new ParagraphTokenizer())
+  .useFallbackTokenizer(new TextTokenizer())
   .useTokenizer(new ListTokenizer())
 // parse source markdown content
@@ -213,10 +213,12 @@ a. This is an another type of ordered list item
 ### Options
-Name              | Type        | Required  | Default
-:----------------:|:-----------:|:---------:|:--------------:
-`name`            | `string`    | `false`   | `"@yozora/tokenizer-list"`
-`priority`        | `number`    | `false`   | `TokenizerPriority.CONTAINING_BLOCK`
+Name                                | Type              | Required  | Default
+:----------------------------------:|:-----------------:|:---------:|:--------------:
+`name`                              | `string`          | `false`   | `"@yozora/tokenizer-list"`
+`priority`                          | `number`          | `false`   | `TokenizerPriority.CONTAINING_BLOCK`
+`enableTaskListItem`                | `boolean`         | `false`   | `false`
+`emptyItemCouldNotInterruptedTypes` | `YastNodeType[]`  | `false`   | `[PhrasingContentType, ParagraphType]`
 * `name`: The unique name of the tokenizer, used to bind the token it generates,
   to determine the tokenizer that should be called in each life cycle of the
@@ -227,6 +229,14 @@ Name              | Type        | Required  | Default
   stage, a high-priority tokenizer can interrupt the matching process of a
   low-priority tokenizer.
+* `enableTaskListItem`: Whether to enable task list item (extension).
+* `emptyItemCouldNotInterruptedTypes`: Specify an array of IYastNode types that could not be
+  interrupted by this ITokenizer if the current list-item is empty.
+  @see https://github.github.com/gfm/#example-263
 <!-- :end -->
 ## Related
@@ -236,15 +246,17 @@ Name              | Type        | Required  | Default
 * [@yozora/parser][]
 * [@yozora/parser-gfm][]
 * [@yozora/parser-gfm-ex][]
-* [@yozora/tokenizer-list-item][]
 * [@yozora/react-list][]
 * [@yozora/react-list-item][]
 * [@yozora/react-markdown][]
 * [Live Examples][live-examples]
 * [List | Yozora AST][node-type]
 * [Documentation][docpage]
+* [List | Mdast][mdast-homepage]
+* [ListItem | Documentation][mdast-homepage:list-item]
 [node-type]: http://yozora.guanghechen.com/docs/package/ast#list
+[node-type:list-item]: http://yozora.guanghechen.com/docs/package/ast#listitem
 <!-- :begin use tokenizer/definitions -->
@@ -252,7 +264,8 @@ Name              | Type        | Required  | Default
 [docpage]: https://yozora.guanghechen.com/docs/package/tokenizer-list
 [homepage]: https://github.com/yozorajs/yozora/tree/main/tokenizers/list#readme
 [gfm-spec]: https://github.github.com/gfm
-[mdast-homepage]: https://github.com/syntax-tree/mdast
+[mdast-homepage]: https://github.com/syntax-tree/mdast#list
+[mdast-homepage:list-item]: https://github.com/syntax-tree/mdast#listitem
 [@yozora/ast]:                                https://github.com/yozorajs/yozora/tree/main/packages/ast#readme
 [@yozora/ast-util]:                           https://github.com/yozorajs/yozora/tree/main/packages/ast-util#readme
@@ -291,7 +304,6 @@ Name              | Type        | Required  | Default
 [@yozora/tokenizer-link]:                     https://github.com/yozorajs/yozora/tree/main/tokenizers/link#readme
 [@yozora/tokenizer-link-reference]:           https://github.com/yozorajs/yozora/tree/main/tokenizers/link-reference#readme
 [@yozora/tokenizer-list]:                     https://github.com/yozorajs/yozora/tree/main/tokenizers/list#readme
-[@yozora/tokenizer-list-item]:                https://github.com/yozorajs/yozora/tree/main/tokenizers/list-item#readme
 [@yozora/tokenizer-math]:                     https://github.com/yozorajs/yozora/tree/main/tokenizers/math#readme
 [@yozora/tokenizer-paragraph]:                https://github.com/yozorajs/yozora/tree/main/tokenizers/paragraph#readme
 [@yozora/tokenizer-setext-heading]:           https://github.com/yozorajs/yozora/tree/main/tokenizers/setext-heading#readme
@@ -351,7 +363,6 @@ Name              | Type        | Required  | Default
 [doc-@yozora/tokenizer-definition]:           https://yozora.guanghechen.com/docs/package/tokenizer-definition
 [doc-@yozora/tokenizer-link-reference]:       https://yozora.guanghechen.com/docs/package/tokenizer-link-reference
 [doc-@yozora/tokenizer-list]:                 https://yozora.guanghechen.com/docs/package/tokenizer-list
-[doc-@yozora/tokenizer-list-item]:            https://yozora.guanghechen.com/docs/package/tokenizer-list-item
 [doc-@yozora/tokenizer-math]:                 https://yozora.guanghechen.com/docs/package/tokenizer-math
 [doc-@yozora/tokenizer-paragraph]:            https://yozora.guanghechen.com/docs/package/tokenizer-paragraph
 [doc-@yozora/tokenizer-setext-heading]:       https://yozora.guanghechen.com/docs/package/tokenizer-setext-heading

package/lib/cjs/index.js CHANGED Viewed

@@ -2,128 +2,285 @@
 Object.defineProperty(exports, '__esModule', { value: true });
-var coreTokenizer = require('@yozora/core-tokenizer');
 var ast = require('@yozora/ast');
+var coreTokenizer = require('@yozora/core-tokenizer');
+var character = require('@yozora/character');
-const parse = function () {
+const match = function () {
+    const { emptyItemCouldNotInterruptedTypes, enableTaskListItem } = this;
     return {
-        parse: (token, children) => {
-            const node = {
-                type: ast.ListType,
-                ordered: token.ordered,
-                orderType: token.orderType,
-                start: token.start,
-                marker: token.marker,
-                spread: token.spread,
-                children: children,
-            };
-            return node;
-        },
+        isContainingBlock: true,
+        eatOpener,
+        eatAndInterruptPreviousSibling,
+        eatContinuationText,
     };
-};
-const postMatch = function (api) {
-    const { name: _tokenizer } = this;
-    return { transformMatch };
-    function transformMatch(tokens) {
-        const results = [];
-        let listItems = [];
-        const resolveList = () => {
-            if (listItems.length <= 0)
-                return;
-            let spread = listItems.some((item) => {
-                if (item.children == null || item.children.length <= 1)
-                    return false;
-                let previousPosition = item.children[0].position;
-                for (let j = 1; j < item.children.length; ++j) {
-                    const currentPosition = item.children[j].position;
-                    if (previousPosition.end.line + 1 < currentPosition.start.line) {
-                        return true;
-                    }
-                    previousPosition = currentPosition;
-                }
-                return false;
-            });
-            if (!spread && listItems.length > 1) {
-                let previousItem = listItems[0];
-                for (let i = 1; i < listItems.length; ++i) {
-                    const currentItem = listItems[i];
-                    if (previousItem.position.end.line + 1 < currentItem.position.start.line) {
-                        spread = true;
+    function eatOpener(line) {
+        if (line.countOfPrecedeSpaces >= 4)
+            return null;
+        const { nodePoints, startIndex, endIndex, firstNonWhitespaceIndex } = line;
+        if (firstNonWhitespaceIndex >= endIndex)
+            return null;
+        let ordered = false;
+        let marker = null;
+        let orderType;
+        let order;
+        let i = firstNonWhitespaceIndex;
+        let c = nodePoints[i].codePoint;
+        if (i + 1 < endIndex) {
+            const c0 = c;
+            if (character.isAsciiDigitCharacter(c0)) {
+                orderType = '1';
+                let v = c0 - character.AsciiCodePoint.DIGIT0;
+                for (i += 1; i < endIndex; ++i) {
+                    c = nodePoints[i].codePoint;
+                    if (!character.isAsciiDigitCharacter(c))
                         break;
-                    }
-                    previousItem = currentItem;
+                    v = v * 10 + c - character.AsciiCodePoint.DIGIT0;
                 }
+                order = v;
+                orderType = '1';
             }
-            const list = {
-                _tokenizer,
-                nodeType: ast.ListType,
-                ordered: listItems[0].ordered,
-                orderType: listItems[0].orderType,
-                start: listItems[0].order,
-                marker: listItems[0].marker,
-                spread,
-                position: {
-                    start: Object.assign({}, listItems[0].position.start),
-                    end: Object.assign({}, listItems[listItems.length - 1].position.end),
-                },
-                children: [...listItems],
-            };
-            results.push(list);
-            if (list.spread)
-                return;
-            for (const listItem of list.children) {
-                if (listItem.children == null || listItem.children.length <= 0)
-                    continue;
-                listItem.children = listItem.children.map(child => {
-                    const lines = api.extractPhrasingLines(child);
-                    if (lines == null)
-                        return child;
-                    const token = api.buildPhrasingContentToken(lines);
-                    return token !== null && token !== void 0 ? token : child;
-                });
+            else if (character.isAsciiLowerLetter(c0)) {
+                i += 1;
+                c = nodePoints[i].codePoint;
+                order = c0 - character.AsciiCodePoint.LOWERCASE_A + 1;
+                orderType = 'a';
+            }
+            else if (character.isAsciiUpperLetter(c0)) {
+                i += 1;
+                c = nodePoints[i].codePoint;
+                order = c0 - character.AsciiCodePoint.UPPERCASE_A + 1;
+                orderType = 'A';
+            }
+            if (i > firstNonWhitespaceIndex &&
+                i - firstNonWhitespaceIndex <= 9 &&
+                (c === character.AsciiCodePoint.DOT || c === character.AsciiCodePoint.CLOSE_PARENTHESIS)) {
+                i += 1;
+                ordered = true;
+                marker = c;
+            }
+        }
+        if (!ordered) {
+            if (c === character.AsciiCodePoint.PLUS_SIGN ||
+                c === character.AsciiCodePoint.MINUS_SIGN ||
+                c === character.AsciiCodePoint.ASTERISK) {
+                i += 1;
+                marker = c;
             }
+        }
+        if (marker == null)
+            return null;
+        let countOfSpaces = 0, nextIndex = i;
+        if (nextIndex < endIndex) {
+            c = nodePoints[nextIndex].codePoint;
+            if (c === character.VirtualCodePoint.SPACE)
+                nextIndex += 1;
+        }
+        for (; nextIndex < endIndex; ++nextIndex) {
+            c = nodePoints[nextIndex].codePoint;
+            if (!character.isSpaceCharacter(c))
+                break;
+            countOfSpaces += 1;
+        }
+        if (countOfSpaces > 4) {
+            nextIndex -= countOfSpaces - 1;
+            countOfSpaces = 1;
+        }
+        if (countOfSpaces === 0 && nextIndex < endIndex && c !== character.VirtualCodePoint.LINE_END)
+            return null;
+        const countOfTopBlankLine = c === character.VirtualCodePoint.LINE_END ? 1 : -1;
+        if (c === character.VirtualCodePoint.LINE_END) {
+            nextIndex -= countOfSpaces - 1;
+            countOfSpaces = 1;
+        }
+        const indent = i - startIndex + countOfSpaces;
+        let status = null;
+        if (enableTaskListItem) {
+            ({ status, nextIndex } = eatTaskStatus(nodePoints, nextIndex, endIndex));
+        }
+        const token = {
+            nodeType: ast.ListType,
+            position: {
+                start: coreTokenizer.calcStartYastNodePoint(nodePoints, startIndex),
+                end: coreTokenizer.calcEndYastNodePoint(nodePoints, nextIndex - 1),
+            },
+            ordered,
+            marker,
+            orderType: ordered ? orderType : undefined,
+            order: ordered ? order : undefined,
+            indent,
+            countOfTopBlankLine,
+            children: [],
         };
-        for (let i = 0; i < tokens.length; ++i) {
-            const originalToken = tokens[i];
-            if (originalToken.nodeType !== ast.ListItemType) {
-                resolveList();
-                listItems = [];
-                results.push(originalToken);
-                continue;
+        if (status != null)
+            token.status = status;
+        return { token, nextIndex };
+    }
+    function eatAndInterruptPreviousSibling(line, prevSiblingToken) {
+        const result = eatOpener(line);
+        if (result == null)
+            return null;
+        const { token, nextIndex } = result;
+        if (emptyItemCouldNotInterruptedTypes.includes(prevSiblingToken.nodeType)) {
+            if (token.indent === line.endIndex - line.startIndex) {
+                return null;
+            }
+            if (token.ordered && token.order !== 1)
+                return null;
+        }
+        return { token, nextIndex, remainingSibling: prevSiblingToken };
+    }
+    function eatContinuationText(line, token) {
+        const { startIndex, endIndex, firstNonWhitespaceIndex, countOfPrecedeSpaces: indent } = line;
+        if (firstNonWhitespaceIndex < endIndex && indent < token.indent) {
+            return { status: 'notMatched' };
+        }
+        if (firstNonWhitespaceIndex >= endIndex) {
+            if (token.countOfTopBlankLine >= 0) {
+                token.countOfTopBlankLine += 1;
+                if (token.countOfTopBlankLine > 1) {
+                    return { status: 'notMatched' };
+                }
+            }
+        }
+        else {
+            token.countOfTopBlankLine = -1;
+        }
+        const nextIndex = Math.min(startIndex + token.indent, endIndex - 1);
+        return { status: 'opening', nextIndex };
+    }
+};
+function eatTaskStatus(nodePoints, startIndex, endIndex) {
+    let i = startIndex;
+    for (; i < endIndex; ++i) {
+        const c = nodePoints[i].codePoint;
+        if (!character.isSpaceCharacter(c))
+            break;
+    }
+    if (i + 3 >= endIndex ||
+        nodePoints[i].codePoint !== character.AsciiCodePoint.OPEN_BRACKET ||
+        nodePoints[i + 2].codePoint !== character.AsciiCodePoint.CLOSE_BRACKET ||
+        !character.isWhitespaceCharacter(nodePoints[i + 3].codePoint))
+        return { status: null, nextIndex: startIndex };
+    let status;
+    const c = nodePoints[i + 1].codePoint;
+    switch (c) {
+        case character.AsciiCodePoint.SPACE:
+            status = ast.TaskStatus.TODO;
+            break;
+        case character.AsciiCodePoint.MINUS_SIGN:
+            status = ast.TaskStatus.DOING;
+            break;
+        case character.AsciiCodePoint.LOWERCASE_X:
+        case character.AsciiCodePoint.UPPERCASE_X:
+            status = ast.TaskStatus.DONE;
+            break;
+        default:
+            return { status: null, nextIndex: startIndex };
+    }
+    return { status, nextIndex: i + 4 };
+}
+const parse = function (api) {
+    return {
+        parse: tokens => {
+            const results = [];
+            let listItemTokens = [];
+            for (let i = 0; i < tokens.length; ++i) {
+                const originalToken = tokens[i];
+                if (listItemTokens.length <= 0 ||
+                    listItemTokens[0].ordered !== originalToken.ordered ||
+                    listItemTokens[0].orderType !== originalToken.orderType ||
+                    listItemTokens[0].marker !== originalToken.marker) {
+                    const node = resolveList(listItemTokens, api);
+                    if (node)
+                        results.push(node);
+                    listItemTokens = [originalToken];
+                    continue;
+                }
+                listItemTokens.push(originalToken);
             }
-            if (listItems.length <= 0 ||
-                listItems[0].ordered !== originalToken.ordered ||
-                listItems[0].orderType !== originalToken.orderType ||
-                listItems[0].marker !== originalToken.marker) {
-                resolveList();
-                listItems = [originalToken];
-                continue;
+            const node = resolveList(listItemTokens, api);
+            if (node)
+                results.push(node);
+            return results;
+        },
+    };
+};
+const resolveList = (tokens, api) => {
+    if (tokens.length <= 0)
+        return null;
+    let spread = tokens.some((item) => {
+        if (item.children == null || item.children.length <= 1)
+            return false;
+        let previousPosition = item.children[0].position;
+        for (let j = 1; j < item.children.length; ++j) {
+            const currentPosition = item.children[j].position;
+            if (previousPosition.end.line + 1 < currentPosition.start.line) {
+                return true;
             }
-            listItems.push(originalToken);
+            previousPosition = currentPosition;
+        }
+        return false;
+    });
+    if (!spread && tokens.length > 1) {
+        let previousItem = tokens[0];
+        for (let i = 1; i < tokens.length; ++i) {
+            const currentItem = tokens[i];
+            if (previousItem.position.end.line + 1 < currentItem.position.start.line) {
+                spread = true;
+                break;
+            }
+            previousItem = currentItem;
         }
-        resolveList();
-        return results;
     }
+    const children = tokens.map((listItemToken) => {
+        const nodes = api.parseBlockTokens(listItemToken.children);
+        const children = spread
+            ? nodes
+            : nodes
+                .map(node => (node.type === ast.ParagraphType ? node.children : node))
+                .flat();
+        const listItem = {
+            type: ast.ListItemType,
+            position: listItemToken.position,
+            status: listItemToken.status,
+            children,
+        };
+        return listItem;
+    });
+    const node = {
+        type: ast.ListType,
+        position: {
+            start: Object.assign({}, tokens[0].position.start),
+            end: Object.assign({}, tokens[tokens.length - 1].position.end),
+        },
+        ordered: tokens[0].ordered,
+        orderType: tokens[0].orderType,
+        start: tokens[0].order,
+        marker: tokens[0].marker,
+        spread,
+        children,
+    };
+    return node;
 };
 const uniqueName = '@yozora/tokenizer-list';
 class ListTokenizer extends coreTokenizer.BaseBlockTokenizer {
     constructor(props = {}) {
-        var _a, _b;
+        var _a, _b, _c, _d;
         super({
             name: (_a = props.name) !== null && _a !== void 0 ? _a : uniqueName,
             priority: (_b = props.priority) !== null && _b !== void 0 ? _b : coreTokenizer.TokenizerPriority.CONTAINING_BLOCK,
         });
-        this.match = () => {
-            return {
-                isContainingBlock: true,
-                eatOpener: () => null,
-            };
-        };
-        this.postMatch = postMatch;
+        this.match = match;
         this.parse = parse;
+        this.enableTaskListItem = (_c = props.enableTaskListItem) !== null && _c !== void 0 ? _c : false;
+        this.emptyItemCouldNotInterruptedTypes = (_d = props.emptyItemCouldNotInterruptedTypes) !== null && _d !== void 0 ? _d : [
+            coreTokenizer.PhrasingContentType,
+            ast.ParagraphType,
+        ];
     }
 }

package/lib/esm/index.js CHANGED Viewed

@@ -1,125 +1,282 @@
-import { BaseBlockTokenizer, TokenizerPriority } from '@yozora/core-tokenizer';
-import { ListType, ListItemType } from '@yozora/ast';
+import { ListType, TaskStatus, ParagraphType, ListItemType } from '@yozora/ast';
+import { calcStartYastNodePoint, calcEndYastNodePoint, BaseBlockTokenizer, TokenizerPriority, PhrasingContentType } from '@yozora/core-tokenizer';
+import { isAsciiDigitCharacter, AsciiCodePoint, isAsciiLowerLetter, isAsciiUpperLetter, VirtualCodePoint, isSpaceCharacter, isWhitespaceCharacter } from '@yozora/character';
-const parse = function () {
+const match = function () {
+    const { emptyItemCouldNotInterruptedTypes, enableTaskListItem } = this;
     return {
-        parse: (token, children) => {
-            const node = {
-                type: ListType,
-                ordered: token.ordered,
-                orderType: token.orderType,
-                start: token.start,
-                marker: token.marker,
-                spread: token.spread,
-                children: children,
-            };
-            return node;
-        },
+        isContainingBlock: true,
+        eatOpener,
+        eatAndInterruptPreviousSibling,
+        eatContinuationText,
     };
-};
-const postMatch = function (api) {
-    const { name: _tokenizer } = this;
-    return { transformMatch };
-    function transformMatch(tokens) {
-        const results = [];
-        let listItems = [];
-        const resolveList = () => {
-            if (listItems.length <= 0)
-                return;
-            let spread = listItems.some((item) => {
-                if (item.children == null || item.children.length <= 1)
-                    return false;
-                let previousPosition = item.children[0].position;
-                for (let j = 1; j < item.children.length; ++j) {
-                    const currentPosition = item.children[j].position;
-                    if (previousPosition.end.line + 1 < currentPosition.start.line) {
-                        return true;
-                    }
-                    previousPosition = currentPosition;
-                }
-                return false;
-            });
-            if (!spread && listItems.length > 1) {
-                let previousItem = listItems[0];
-                for (let i = 1; i < listItems.length; ++i) {
-                    const currentItem = listItems[i];
-                    if (previousItem.position.end.line + 1 < currentItem.position.start.line) {
-                        spread = true;
+    function eatOpener(line) {
+        if (line.countOfPrecedeSpaces >= 4)
+            return null;
+        const { nodePoints, startIndex, endIndex, firstNonWhitespaceIndex } = line;
+        if (firstNonWhitespaceIndex >= endIndex)
+            return null;
+        let ordered = false;
+        let marker = null;
+        let orderType;
+        let order;
+        let i = firstNonWhitespaceIndex;
+        let c = nodePoints[i].codePoint;
+        if (i + 1 < endIndex) {
+            const c0 = c;
+            if (isAsciiDigitCharacter(c0)) {
+                orderType = '1';
+                let v = c0 - AsciiCodePoint.DIGIT0;
+                for (i += 1; i < endIndex; ++i) {
+                    c = nodePoints[i].codePoint;
+                    if (!isAsciiDigitCharacter(c))
                         break;
-                    }
-                    previousItem = currentItem;
+                    v = v * 10 + c - AsciiCodePoint.DIGIT0;
                 }
+                order = v;
+                orderType = '1';
             }
-            const list = {
-                _tokenizer,
-                nodeType: ListType,
-                ordered: listItems[0].ordered,
-                orderType: listItems[0].orderType,
-                start: listItems[0].order,
-                marker: listItems[0].marker,
-                spread,
-                position: {
-                    start: Object.assign({}, listItems[0].position.start),
-                    end: Object.assign({}, listItems[listItems.length - 1].position.end),
-                },
-                children: [...listItems],
-            };
-            results.push(list);
-            if (list.spread)
-                return;
-            for (const listItem of list.children) {
-                if (listItem.children == null || listItem.children.length <= 0)
-                    continue;
-                listItem.children = listItem.children.map(child => {
-                    const lines = api.extractPhrasingLines(child);
-                    if (lines == null)
-                        return child;
-                    const token = api.buildPhrasingContentToken(lines);
-                    return token !== null && token !== void 0 ? token : child;
-                });
+            else if (isAsciiLowerLetter(c0)) {
+                i += 1;
+                c = nodePoints[i].codePoint;
+                order = c0 - AsciiCodePoint.LOWERCASE_A + 1;
+                orderType = 'a';
+            }
+            else if (isAsciiUpperLetter(c0)) {
+                i += 1;
+                c = nodePoints[i].codePoint;
+                order = c0 - AsciiCodePoint.UPPERCASE_A + 1;
+                orderType = 'A';
             }
+            if (i > firstNonWhitespaceIndex &&
+                i - firstNonWhitespaceIndex <= 9 &&
+                (c === AsciiCodePoint.DOT || c === AsciiCodePoint.CLOSE_PARENTHESIS)) {
+                i += 1;
+                ordered = true;
+                marker = c;
+            }
+        }
+        if (!ordered) {
+            if (c === AsciiCodePoint.PLUS_SIGN ||
+                c === AsciiCodePoint.MINUS_SIGN ||
+                c === AsciiCodePoint.ASTERISK) {
+                i += 1;
+                marker = c;
+            }
+        }
+        if (marker == null)
+            return null;
+        let countOfSpaces = 0, nextIndex = i;
+        if (nextIndex < endIndex) {
+            c = nodePoints[nextIndex].codePoint;
+            if (c === VirtualCodePoint.SPACE)
+                nextIndex += 1;
+        }
+        for (; nextIndex < endIndex; ++nextIndex) {
+            c = nodePoints[nextIndex].codePoint;
+            if (!isSpaceCharacter(c))
+                break;
+            countOfSpaces += 1;
+        }
+        if (countOfSpaces > 4) {
+            nextIndex -= countOfSpaces - 1;
+            countOfSpaces = 1;
+        }
+        if (countOfSpaces === 0 && nextIndex < endIndex && c !== VirtualCodePoint.LINE_END)
+            return null;
+        const countOfTopBlankLine = c === VirtualCodePoint.LINE_END ? 1 : -1;
+        if (c === VirtualCodePoint.LINE_END) {
+            nextIndex -= countOfSpaces - 1;
+            countOfSpaces = 1;
+        }
+        const indent = i - startIndex + countOfSpaces;
+        let status = null;
+        if (enableTaskListItem) {
+            ({ status, nextIndex } = eatTaskStatus(nodePoints, nextIndex, endIndex));
+        }
+        const token = {
+            nodeType: ListType,
+            position: {
+                start: calcStartYastNodePoint(nodePoints, startIndex),
+                end: calcEndYastNodePoint(nodePoints, nextIndex - 1),
+            },
+            ordered,
+            marker,
+            orderType: ordered ? orderType : undefined,
+            order: ordered ? order : undefined,
+            indent,
+            countOfTopBlankLine,
+            children: [],
         };
-        for (let i = 0; i < tokens.length; ++i) {
-            const originalToken = tokens[i];
-            if (originalToken.nodeType !== ListItemType) {
-                resolveList();
-                listItems = [];
-                results.push(originalToken);
-                continue;
+        if (status != null)
+            token.status = status;
+        return { token, nextIndex };
+    }
+    function eatAndInterruptPreviousSibling(line, prevSiblingToken) {
+        const result = eatOpener(line);
+        if (result == null)
+            return null;
+        const { token, nextIndex } = result;
+        if (emptyItemCouldNotInterruptedTypes.includes(prevSiblingToken.nodeType)) {
+            if (token.indent === line.endIndex - line.startIndex) {
+                return null;
+            }
+            if (token.ordered && token.order !== 1)
+                return null;
+        }
+        return { token, nextIndex, remainingSibling: prevSiblingToken };
+    }
+    function eatContinuationText(line, token) {
+        const { startIndex, endIndex, firstNonWhitespaceIndex, countOfPrecedeSpaces: indent } = line;
+        if (firstNonWhitespaceIndex < endIndex && indent < token.indent) {
+            return { status: 'notMatched' };
+        }
+        if (firstNonWhitespaceIndex >= endIndex) {
+            if (token.countOfTopBlankLine >= 0) {
+                token.countOfTopBlankLine += 1;
+                if (token.countOfTopBlankLine > 1) {
+                    return { status: 'notMatched' };
+                }
             }
-            if (listItems.length <= 0 ||
-                listItems[0].ordered !== originalToken.ordered ||
-                listItems[0].orderType !== originalToken.orderType ||
-                listItems[0].marker !== originalToken.marker) {
-                resolveList();
-                listItems = [originalToken];
-                continue;
+        }
+        else {
+            token.countOfTopBlankLine = -1;
+        }
+        const nextIndex = Math.min(startIndex + token.indent, endIndex - 1);
+        return { status: 'opening', nextIndex };
+    }
+};
+function eatTaskStatus(nodePoints, startIndex, endIndex) {
+    let i = startIndex;
+    for (; i < endIndex; ++i) {
+        const c = nodePoints[i].codePoint;
+        if (!isSpaceCharacter(c))
+            break;
+    }
+    if (i + 3 >= endIndex ||
+        nodePoints[i].codePoint !== AsciiCodePoint.OPEN_BRACKET ||
+        nodePoints[i + 2].codePoint !== AsciiCodePoint.CLOSE_BRACKET ||
+        !isWhitespaceCharacter(nodePoints[i + 3].codePoint))
+        return { status: null, nextIndex: startIndex };
+    let status;
+    const c = nodePoints[i + 1].codePoint;
+    switch (c) {
+        case AsciiCodePoint.SPACE:
+            status = TaskStatus.TODO;
+            break;
+        case AsciiCodePoint.MINUS_SIGN:
+            status = TaskStatus.DOING;
+            break;
+        case AsciiCodePoint.LOWERCASE_X:
+        case AsciiCodePoint.UPPERCASE_X:
+            status = TaskStatus.DONE;
+            break;
+        default:
+            return { status: null, nextIndex: startIndex };
+    }
+    return { status, nextIndex: i + 4 };
+}
+const parse = function (api) {
+    return {
+        parse: tokens => {
+            const results = [];
+            let listItemTokens = [];
+            for (let i = 0; i < tokens.length; ++i) {
+                const originalToken = tokens[i];
+                if (listItemTokens.length <= 0 ||
+                    listItemTokens[0].ordered !== originalToken.ordered ||
+                    listItemTokens[0].orderType !== originalToken.orderType ||
+                    listItemTokens[0].marker !== originalToken.marker) {
+                    const node = resolveList(listItemTokens, api);
+                    if (node)
+                        results.push(node);
+                    listItemTokens = [originalToken];
+                    continue;
+                }
+                listItemTokens.push(originalToken);
             }
-            listItems.push(originalToken);
+            const node = resolveList(listItemTokens, api);
+            if (node)
+                results.push(node);
+            return results;
+        },
+    };
+};
+const resolveList = (tokens, api) => {
+    if (tokens.length <= 0)
+        return null;
+    let spread = tokens.some((item) => {
+        if (item.children == null || item.children.length <= 1)
+            return false;
+        let previousPosition = item.children[0].position;
+        for (let j = 1; j < item.children.length; ++j) {
+            const currentPosition = item.children[j].position;
+            if (previousPosition.end.line + 1 < currentPosition.start.line) {
+                return true;
+            }
+            previousPosition = currentPosition;
+        }
+        return false;
+    });
+    if (!spread && tokens.length > 1) {
+        let previousItem = tokens[0];
+        for (let i = 1; i < tokens.length; ++i) {
+            const currentItem = tokens[i];
+            if (previousItem.position.end.line + 1 < currentItem.position.start.line) {
+                spread = true;
+                break;
+            }
+            previousItem = currentItem;
         }
-        resolveList();
-        return results;
     }
+    const children = tokens.map((listItemToken) => {
+        const nodes = api.parseBlockTokens(listItemToken.children);
+        const children = spread
+            ? nodes
+            : nodes
+                .map(node => (node.type === ParagraphType ? node.children : node))
+                .flat();
+        const listItem = {
+            type: ListItemType,
+            position: listItemToken.position,
+            status: listItemToken.status,
+            children,
+        };
+        return listItem;
+    });
+    const node = {
+        type: ListType,
+        position: {
+            start: Object.assign({}, tokens[0].position.start),
+            end: Object.assign({}, tokens[tokens.length - 1].position.end),
+        },
+        ordered: tokens[0].ordered,
+        orderType: tokens[0].orderType,
+        start: tokens[0].order,
+        marker: tokens[0].marker,
+        spread,
+        children,
+    };
+    return node;
 };
 const uniqueName = '@yozora/tokenizer-list';
 class ListTokenizer extends BaseBlockTokenizer {
     constructor(props = {}) {
-        var _a, _b;
+        var _a, _b, _c, _d;
         super({
             name: (_a = props.name) !== null && _a !== void 0 ? _a : uniqueName,
             priority: (_b = props.priority) !== null && _b !== void 0 ? _b : TokenizerPriority.CONTAINING_BLOCK,
         });
-        this.match = () => {
-            return {
-                isContainingBlock: true,
-                eatOpener: () => null,
-            };
-        };
-        this.postMatch = postMatch;
+        this.match = match;
         this.parse = parse;
+        this.enableTaskListItem = (_c = props.enableTaskListItem) !== null && _c !== void 0 ? _c : false;
+        this.emptyItemCouldNotInterruptedTypes = (_d = props.emptyItemCouldNotInterruptedTypes) !== null && _d !== void 0 ? _d : [
+            PhrasingContentType,
+            ParagraphType,
+        ];
     }
 }

package/lib/types/match.d.ts ADDED Viewed

@@ -0,0 +1,25 @@
+import type { IMatchBlockHookCreator } from '@yozora/core-tokenizer';
+import type { IThis, IToken, T } from './types';
+/**
+ * The following rules define list items:
+ *  - Basic case. If a sequence of lines Ls constitute a sequence of blocks Bs
+ *    starting with a non-whitespace character, and M is a list marker of width
+ *    W followed by 1 ≤ N ≤ 4 spaces, then the result of prepending M and the
+ *    following spaces to the first line of Ls, and indenting subsequent lines
+ *    of Ls by W + N spaces, is a list item with Bs as its contents. The type
+ *    of the list item (bullet or ordered) is determined by the type of its
+ *    list marker. If the list item is ordered, then it is also assigned a
+ *    start number, based on the ordered list marker.
+ *
+ *    Exceptions:
+ *      - When the first list item in a list interrupts a paragraph—that is,
+ *        when it starts on a line that would otherwise count as paragraph
+ *        continuation text—then
+ *        (a) the lines Ls must not begin with a blank line, and
+ *        (b) if the list item is ordered, the start number must be 1.
+ *      - If any line is a thematic break then that line is not a list item.
+ *
+ * @see https://github.com/syntax-tree/mdast#listitem
+ * @see https://github.github.com/gfm/#list-items
+ */
+export declare const match: IMatchBlockHookCreator<T, IToken, IThis>;

package/lib/types/parse.d.ts CHANGED Viewed

@@ -1,3 +1,3 @@
 import type { IParseBlockHookCreator } from '@yozora/core-tokenizer';
-import type { IHookContext, INode, IToken, T } from './types';
-export declare const parse: IParseBlockHookCreator<T, IToken, INode, IHookContext>;
+import type { INode, IThis, IToken, T } from './types';
+export declare const parse: IParseBlockHookCreator<T, IToken, INode, IThis>;

package/lib/types/tokenizer.d.ts CHANGED Viewed

@@ -1,11 +1,7 @@
-import type { IBlockTokenizer, IMatchBlockHookCreator, IParseBlockHookCreator, IPostMatchBlockHookCreator } from '@yozora/core-tokenizer';
+import type { YastNodeType } from '@yozora/ast';
+import type { IBlockTokenizer, IMatchBlockHookCreator, IParseBlockHookCreator } from '@yozora/core-tokenizer';
 import { BaseBlockTokenizer } from '@yozora/core-tokenizer';
-import type { IHookContext, INode, IToken, ITokenizerProps, T } from './types';
-/**
- * Params for constructing ListTokenizer
- */
-export interface ListTokenizerProps {
-}
+import type { INode, IThis, IToken, ITokenizerProps, T } from './types';
 /**
  * Lexical Analyzer for List.
  *
@@ -15,9 +11,10 @@ export interface ListTokenizerProps {
  * @see https://github.com/syntax-tree/mdast#list
  * @see https://github.github.com/gfm/#list
  */
-export declare class ListTokenizer extends BaseBlockTokenizer<T, IToken, INode, IHookContext> implements IBlockTokenizer<T, IToken, INode, IHookContext> {
+export declare class ListTokenizer extends BaseBlockTokenizer<T, IToken, INode, IThis> implements IBlockTokenizer<T, IToken, INode, IThis> {
     constructor(props?: ITokenizerProps);
-    readonly match: IMatchBlockHookCreator<T, IToken, IHookContext>;
-    readonly postMatch: IPostMatchBlockHookCreator<IHookContext>;
-    readonly parse: IParseBlockHookCreator<T, IToken, INode, IHookContext>;
+    readonly enableTaskListItem: boolean;
+    readonly emptyItemCouldNotInterruptedTypes: ReadonlyArray<YastNodeType>;
+    readonly match: IMatchBlockHookCreator<T, IToken, IThis>;
+    readonly parse: IParseBlockHookCreator<T, IToken, INode, IThis>;
 }

package/lib/types/types.d.ts CHANGED Viewed

@@ -1,17 +1,15 @@
-import type { IList, ListType } from '@yozora/ast';
+import type { IList, ListType, TaskStatus, YastNodeType } from '@yozora/ast';
 import type { IBaseBlockTokenizerProps, IPartialYastBlockToken, ITokenizer, IYastBlockToken } from '@yozora/core-tokenizer';
-import type { IListItemToken as IListItemToken0 } from '@yozora/tokenizer-list-item';
 export declare type T = ListType;
 export declare type INode = IList;
 export declare const uniqueName = "@yozora/tokenizer-list";
-export declare type IListItemToken = IListItemToken0 & IYastBlockToken;
 export interface IToken extends IPartialYastBlockToken<T> {
     /**
      * Is it an ordered list item.
      */
     ordered: boolean;
     /**
-     * Marker of a bullet list-item, or delimiter of an ordered list-item.
+     * Marker of bullet list-item, or a delimiter of ordered list-item.
      */
     marker: number;
     /**
@@ -20,17 +18,48 @@ export interface IToken extends IPartialYastBlockToken<T> {
      */
     orderType?: '1' | 'a' | 'A' | 'i' | 'I';
     /**
-     * The starting number of a ordered list-item.
+     * Serial number of ordered list-item.
      */
-    start?: number;
+    order?: number;
     /**
-     * Whether if the list is loose.
+     * Status of a todo task.
      */
-    spread: boolean;
+    status?: TaskStatus;
     /**
-     * List items.
+     * Indent of a list item.
      */
-    children: IListItemToken[];
+    indent: number;
+    /**
+     * list-item 起始的空行数量
+     * The number of blank lines at the beginning of a list-item
+     */
+    countOfTopBlankLine: number;
+    /**
+     * Child token nodes.
+     */
+    children: IYastBlockToken[];
+}
+export interface IThis extends ITokenizer {
+    /**
+     * Specify an array of IYastNode types that could not be interrupted
+     * by this ITokenizer if the current list-item is empty.
+     * @see https://github.github.com/gfm/#example-263
+     */
+    readonly emptyItemCouldNotInterruptedTypes: ReadonlyArray<YastNodeType>;
+    /**
+     * Should enable task list item (extension).
+     */
+    readonly enableTaskListItem: boolean;
+}
+export interface ITokenizerProps extends Partial<IBaseBlockTokenizerProps> {
+    /**
+     * Specify an array of IYastNode types that could not be interrupted
+     * by this ITokenizer if the current list-item is empty.
+     * @see https://github.github.com/gfm/#example-263
+     */
+    readonly emptyItemCouldNotInterruptedTypes?: YastNodeType[];
+    /**
+     * Should enable task list item (extension).
+     */
+    readonly enableTaskListItem?: boolean;
 }
-export declare type IHookContext = ITokenizer;
-export declare type ITokenizerProps = Partial<IBaseBlockTokenizerProps>;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@yozora/tokenizer-list",
-  "version": "2.0.0-alpha.0",
+  "version": "2.0.0-alpha.1",
   "author": {
     "name": "guanghechen",
     "url": "https://github.com/guanghechen/"
@@ -35,9 +35,9 @@
     "test": "cross-env TS_NODE_FILES=true jest --config ../../jest.config.js --rootDir ."
   },
   "dependencies": {
-    "@yozora/ast": "^2.0.0-alpha.0",
-    "@yozora/core-tokenizer": "^2.0.0-alpha.0",
-    "@yozora/tokenizer-list-item": "^2.0.0-alpha.0"
+    "@yozora/ast": "^2.0.0-alpha.1",
+    "@yozora/character": "^2.0.0-alpha.1",
+    "@yozora/core-tokenizer": "^2.0.0-alpha.1"
   },
-  "gitHead": "0171501339c49ffd02ed16a63447fa20a47a29a7"
+  "gitHead": "86202e1d2b03ccfc2ab030517d9d314f7aee7666"
 }

package/lib/types/postMatch.d.ts DELETED Viewed

@@ -1,10 +0,0 @@
-import type { IPostMatchBlockHookCreator } from '@yozora/core-tokenizer';
-import type { IHookContext } from './types';
-/**
- * A list is a sequence of one or more list items of the same type.
- * The list items may be separated by any number of blank lines.
- *
- * @see https://github.com/syntax-tree/mdast#list
- * @see https://github.github.com/gfm/#list
- */
-export declare const postMatch: IPostMatchBlockHookCreator<IHookContext>;