npm - @markuplint/parser-utils - Versions diffs - 4.0.0-alpha.3 → 4.0.0-alpha.5 - Mend

@markuplint/parser-utils 4.0.0-alpha.3 → 4.0.0-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/LICENSE +1 -1
package/lib/attr-parser.d.ts +25 -0
package/lib/attr-parser.js +188 -0
package/lib/attr-tokenizer.d.ts +6 -0
package/lib/attr-tokenizer.js +75 -0
package/lib/const.d.ts +7 -1
package/lib/const.js +8 -3
package/lib/debugger.js +8 -19
package/lib/detect-element-type.js +1 -1
package/lib/flatten-nodes.js +14 -12
package/lib/get-location.js +3 -3
package/lib/idl-attributes.js +2 -2
package/lib/ignore-block.js +16 -4
package/lib/ignore-front-matter.js +2 -2
package/lib/index.d.ts +4 -0
package/lib/index.js +4 -0
package/lib/parse-attr.js +1 -1
package/lib/remove-deprecated-node.js +3 -3
package/lib/script-parser.d.ts +6 -0
package/lib/script-parser.js +22 -0
package/lib/tag-parser.d.ts +10 -0
package/lib/tag-parser.js +152 -0
package/lib/tag-splitter.d.ts +1 -1
package/lib/tag-splitter.js +48 -41
package/lib/types.d.ts +6 -2
package/package.json +7 -7

package/LICENSE CHANGED Viewed

@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2017-2019 Yusuke Hirao
+Copyright (c) 2017-2023 Yusuke Hirao
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

package/lib/attr-parser.d.ts ADDED Viewed

@@ -0,0 +1,25 @@
+import type { QuoteSet } from './types.js';
+export declare enum AttrState {
+    BeforeName = 0,
+    Name = 1,
+    Equal = 2,
+    BeforeValue = 3,
+    Value = 4,
+    AfterValue = 5
+}
+/**
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
+ */
+export declare function attrParser(raw: string, quoteSet?: readonly QuoteSet[], startState?: AttrState, quoteInValueChars?: ReadonlyArray<QuoteSet>, spaces?: ReadonlyArray<string>): {
+    spacesBeforeAttrName: string;
+    attrName: string;
+    spacesBeforeEqual: string;
+    equal: string;
+    spacesAfterEqual: string;
+    quoteStart: string;
+    attrValue: string;
+    quoteEnd: string;
+    leftover: string;
+};

package/lib/attr-parser.js ADDED Viewed

@@ -0,0 +1,188 @@
+import { defaultSpaces } from './const.js';
+const defaultQuoteSet = [
+    { start: '"', end: '"' },
+    { start: "'", end: "'" },
+];
+const defaultQuoteInValueChars = [];
+const EQUAL = '=';
+export var AttrState;
+(function (AttrState) {
+    AttrState[AttrState["BeforeName"] = 0] = "BeforeName";
+    AttrState[AttrState["Name"] = 1] = "Name";
+    AttrState[AttrState["Equal"] = 2] = "Equal";
+    AttrState[AttrState["BeforeValue"] = 3] = "BeforeValue";
+    AttrState[AttrState["Value"] = 4] = "Value";
+    AttrState[AttrState["AfterValue"] = 5] = "AfterValue";
+})(AttrState || (AttrState = {}));
+/**
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
+ */
+export function attrParser(raw, quoteSet = defaultQuoteSet, startState = AttrState.BeforeName, quoteInValueChars = defaultQuoteInValueChars, spaces = defaultSpaces) {
+    let state = startState;
+    let spacesBeforeAttrName = '';
+    let attrName = '';
+    let spacesBeforeEqual = '';
+    let equal = '';
+    let spacesAfterEqual = '';
+    let quoteTypeIndex = -1;
+    let quoteStart = '';
+    let attrValue = '';
+    let quoteEnd = '';
+    const quoteModeStack = [];
+    const chars = [...raw];
+    while (chars.length > 0) {
+        if (state === AttrState.AfterValue) {
+            break;
+        }
+        const char = chars.shift();
+        switch (state) {
+            case AttrState.BeforeName: {
+                if (char === '>') {
+                    chars.unshift(char);
+                    state = AttrState.AfterValue;
+                    break;
+                }
+                if (char === '/') {
+                    chars.unshift(char);
+                    state = AttrState.AfterValue;
+                    break;
+                }
+                if (spaces.includes(char)) {
+                    spacesBeforeAttrName += char;
+                    break;
+                }
+                attrName += char;
+                state = AttrState.Name;
+                break;
+            }
+            case AttrState.Name: {
+                if (char === '>') {
+                    chars.unshift(char);
+                    state = AttrState.AfterValue;
+                    break;
+                }
+                if (char === '/') {
+                    chars.unshift(char);
+                    state = AttrState.AfterValue;
+                    break;
+                }
+                if (spaces.includes(char)) {
+                    spacesBeforeEqual += char;
+                    state = AttrState.Equal;
+                    break;
+                }
+                if (char === EQUAL) {
+                    equal += char;
+                    state = AttrState.BeforeValue;
+                    break;
+                }
+                attrName += char;
+                break;
+            }
+            case AttrState.Equal: {
+                if (spaces.includes(char)) {
+                    spacesBeforeEqual += char;
+                    break;
+                }
+                if (char === EQUAL) {
+                    equal += char;
+                    state = AttrState.BeforeValue;
+                    break;
+                }
+                // End of attribute
+                chars.unshift(spacesBeforeEqual, char);
+                spacesBeforeEqual = '';
+                state = AttrState.AfterValue;
+                break;
+            }
+            case AttrState.BeforeValue: {
+                if (spaces.includes(char)) {
+                    spacesAfterEqual += char;
+                    break;
+                }
+                quoteTypeIndex = quoteSet.findIndex(quote => quote.start === char);
+                const quote = quoteSet[quoteTypeIndex];
+                if (quote) {
+                    quoteStart = quote.start;
+                    state = AttrState.Value;
+                    break;
+                }
+                const raw = char + chars.join('');
+                const inQuote = quoteInValueChars.find(quote => raw.startsWith(quote.start));
+                if (inQuote) {
+                    quoteModeStack.push(inQuote);
+                    attrValue += inQuote.start;
+                    chars.splice(0, inQuote.start.length - 1);
+                    state = AttrState.Value;
+                    break;
+                }
+                chars.unshift(char);
+                state = AttrState.Value;
+                break;
+            }
+            case AttrState.Value: {
+                // console.log(
+                // 	char,
+                // 	quoteSet[quoteTypeIndex]?.end,
+                // 	quoteModeStack.map(q => `${q.start}${q.end}`),
+                // );
+                if (!quoteSet[quoteTypeIndex]) {
+                    if (spaces.includes(char)) {
+                        chars.unshift(char);
+                        state = AttrState.AfterValue;
+                        break;
+                    }
+                    if (char === '/') {
+                        chars.unshift(char);
+                        state = AttrState.AfterValue;
+                        break;
+                    }
+                    if (char === '>') {
+                        chars.unshift(char);
+                        state = AttrState.AfterValue;
+                        break;
+                    }
+                }
+                if (quoteModeStack.length === 0 && char === quoteSet[quoteTypeIndex]?.end) {
+                    quoteEnd = char;
+                    state = AttrState.AfterValue;
+                    break;
+                }
+                const raw = char + chars.join('');
+                const inQuoteEnd = quoteModeStack.at(-1);
+                if (inQuoteEnd && raw.startsWith(inQuoteEnd.end)) {
+                    quoteModeStack.pop();
+                    attrValue += inQuoteEnd.end;
+                    chars.splice(0, inQuoteEnd.end.length - 1);
+                    break;
+                }
+                const inQuoteStart = quoteInValueChars.find(quote => raw.startsWith(quote.start));
+                if (inQuoteStart) {
+                    quoteModeStack.push(inQuoteStart);
+                    attrValue += inQuoteStart.start;
+                    chars.splice(0, inQuoteStart.start.length - 1);
+                    break;
+                }
+                attrValue += char;
+                break;
+            }
+        }
+    }
+    if (state === AttrState.Value && quoteTypeIndex !== -1) {
+        throw new SyntaxError(`Unclosed attribute value: ${raw}`);
+    }
+    const leftover = chars.join('');
+    return {
+        spacesBeforeAttrName,
+        attrName,
+        spacesBeforeEqual,
+        equal,
+        spacesAfterEqual,
+        quoteStart,
+        attrValue,
+        quoteEnd,
+        leftover,
+    };
+}

package/lib/attr-tokenizer.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+import type { QuoteSet } from './types.js';
+import type { MLASTHTMLAttr } from '@markuplint/ml-ast';
+import { AttrState } from './attr-parser.js';
+export declare function attrTokenizer(raw: string, line: number, col: number, startOffset: number, quoteSet?: ReadonlyArray<QuoteSet>, startState?: AttrState, quoteInValueChars?: ReadonlyArray<QuoteSet>, spaces?: ReadonlyArray<string>): MLASTHTMLAttr & {
+    __leftover?: string;
+};

package/lib/attr-tokenizer.js ADDED Viewed

@@ -0,0 +1,75 @@
+import { AttrState, attrParser } from './attr-parser.js';
+import { tokenizer, uuid } from './create-token.js';
+export function attrTokenizer(raw, line, col, startOffset, quoteSet, startState = AttrState.BeforeName, quoteInValueChars, spaces) {
+    const parsed = attrParser(raw, quoteSet, startState, quoteInValueChars, spaces);
+    let offset = startOffset;
+    const spacesBeforeName = tokenizer(parsed.spacesBeforeAttrName, line, col, offset);
+    line = spacesBeforeName.endLine;
+    col = spacesBeforeName.endCol;
+    offset = spacesBeforeName.endOffset;
+    const name = tokenizer(parsed.attrName, line, col, offset);
+    line = name.endLine;
+    col = name.endCol;
+    offset = name.endOffset;
+    const spacesBeforeEqual = tokenizer(parsed.spacesBeforeEqual, line, col, offset);
+    line = spacesBeforeEqual.endLine;
+    col = spacesBeforeEqual.endCol;
+    offset = spacesBeforeEqual.endOffset;
+    const equal = tokenizer(parsed.equal, line, col, offset);
+    line = equal.endLine;
+    col = equal.endCol;
+    offset = equal.endOffset;
+    const spacesAfterEqual = tokenizer(parsed.spacesAfterEqual, line, col, offset);
+    line = spacesAfterEqual.endLine;
+    col = spacesAfterEqual.endCol;
+    offset = spacesAfterEqual.endOffset;
+    const startQuote = tokenizer(parsed.quoteStart, line, col, offset);
+    line = startQuote.endLine;
+    col = startQuote.endCol;
+    offset = startQuote.endOffset;
+    const value = tokenizer(parsed.attrValue, line, col, offset);
+    line = value.endLine;
+    col = value.endCol;
+    offset = value.endOffset;
+    const endQuote = tokenizer(parsed.quoteEnd, line, col, offset);
+    const attrToken = tokenizer(parsed.attrName +
+        parsed.spacesBeforeEqual +
+        parsed.equal +
+        parsed.spacesAfterEqual +
+        parsed.quoteStart +
+        parsed.attrValue +
+        parsed.quoteEnd, name.startLine, name.startCol, name.startOffset);
+    const result = {
+        type: 'html-attr',
+        uuid: uuid(),
+        raw: attrToken.raw,
+        startOffset: attrToken.startOffset,
+        endOffset: attrToken.endOffset,
+        startLine: attrToken.startLine,
+        endLine: attrToken.endLine,
+        startCol: attrToken.startCol,
+        endCol: attrToken.endCol,
+        spacesBeforeName,
+        name,
+        spacesBeforeEqual,
+        equal,
+        spacesAfterEqual,
+        startQuote,
+        value,
+        endQuote,
+        isDuplicatable: false,
+        nodeName: name.raw,
+        parentNode: null,
+        prevNode: null,
+        nextNode: null,
+        isFragment: false,
+        isGhost: false,
+    };
+    if (parsed.leftover) {
+        return {
+            ...result,
+            __leftover: parsed.leftover,
+        };
+    }
+    return result;
+}

package/lib/const.d.ts CHANGED Viewed

@@ -5,6 +5,12 @@ export declare const MASK_CHAR = "\uE000";
  * @see https://developer.mozilla.org/en-US/docs/Web/SVG/Element
  */
 export declare const svgElementList: string[];
-export declare const reTag: RegExp;
 export declare const reTagName: RegExp;
 export declare const reSplitterTag: RegExp;
+/**
+ * - U+0009 CHARACTER TABULATION (tab) => `\t`
+ * - U+000A LINE FEED (LF) => `\n`
+ * - U+000C FORM FEED (FF) => `\f`
+ * - U+0020 SPACE => ` `
+ */
+export declare const defaultSpaces: readonly ["\t", "\n", "\f", " "];

package/lib/const.js CHANGED Viewed

@@ -94,7 +94,12 @@ export const svgElementList = [
     'tref',
     'vkern',
 ];
-export const reTag = /^<((?:.|\s|\n)+)>\s*$/;
-// eslint-disable-next-line no-control-regex
-export const reTagName = /^(?:[a-z][^\u0000\u0009\u000A\u000C\u0020/>]*)/i;
+export const reTagName = /^[a-z][^\0\t\n\f />]*/i;
 export const reSplitterTag = /<[^>]+>/g;
+/**
+ * - U+0009 CHARACTER TABULATION (tab) => `\t`
+ * - U+000A LINE FEED (LF) => `\n`
+ * - U+000C FORM FEED (FF) => `\f`
+ * - U+0020 SPACE => ` `
+ */
+export const defaultSpaces = ['\t', '\n', '\f', ' '];

package/lib/debugger.js CHANGED Viewed

@@ -1,21 +1,19 @@
 export function nodeListToDebugMaps(
 // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
 nodeList, withAttr = false) {
-    return nodeList
-        .map(n => {
+    return nodeList.flatMap(n => {
         const r = [];
-        if (!n.isGhost) {
+        if (n.isGhost) {
+            r.push(`[N/A]>[N/A](N/A)${n.nodeName}: ${visibleWhiteSpace(n.raw)}`);
+        }
+        else {
             r.push(tokenDebug(n));
             if (withAttr && 'attributes' in n) {
                 r.push(...attributesToDebugMaps(n.attributes).flat());
             }
         }
-        else {
-            r.push(`[N/A]>[N/A](N/A)${n.nodeName}: ${visibleWhiteSpace(n.raw)}`);
-        }
         return r;
-    })
-        .flat();
+    });
 }
 export function attributesToDebugMaps(
 // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
@@ -28,16 +26,7 @@ attributes) {
             }),
         ];
         if (n.type === 'html-attr') {
-            r.push(`  ${tokenDebug(n.spacesBeforeName, 'bN')}`);
-            r.push(`  ${tokenDebug(n.name, 'name')}`);
-            r.push(`  ${tokenDebug(n.spacesBeforeEqual, 'bE')}`);
-            r.push(`  ${tokenDebug(n.equal, 'equal')}`);
-            r.push(`  ${tokenDebug(n.spacesAfterEqual, 'aE')}`);
-            r.push(`  ${tokenDebug(n.startQuote, 'sQ')}`);
-            r.push(`  ${tokenDebug(n.value, 'value')}`);
-            r.push(`  ${tokenDebug(n.endQuote, 'eQ')}`);
-            r.push(`  isDirective: ${!!n.isDirective}`);
-            r.push(`  isDynamicValue: ${!!n.isDynamicValue}`);
+            r.push(`  ${tokenDebug(n.spacesBeforeName, 'bN')}`, `  ${tokenDebug(n.name, 'name')}`, `  ${tokenDebug(n.spacesBeforeEqual, 'bE')}`, `  ${tokenDebug(n.equal, 'equal')}`, `  ${tokenDebug(n.spacesAfterEqual, 'aE')}`, `  ${tokenDebug(n.startQuote, 'sQ')}`, `  ${tokenDebug(n.value, 'value')}`, `  ${tokenDebug(n.endQuote, 'eQ')}`, `  isDirective: ${!!n.isDirective}`, `  isDynamicValue: ${!!n.isDynamicValue}`);
         }
         if (n.potentialName != null) {
             r.push(`  potentialName: ${visibleWhiteSpace(n.potentialName)}`);
@@ -54,5 +43,5 @@ function tokenDebug(n, type = '') {
     n.potentialName ?? n.nodeName ?? n.name ?? n.type ?? type}: ${visibleWhiteSpace(n.raw)}`;
 }
 function visibleWhiteSpace(chars) {
-    return chars.replace(/\n/g, '⏎').replace(/\t/g, '→').replace(/\s/g, '␣');
+    return chars.replaceAll('\n', '⏎').replaceAll('\t', '→').replaceAll(/\s/g, '␣');
 }

package/lib/detect-element-type.js CHANGED Viewed

@@ -25,7 +25,7 @@ function _distinguishAuthoredName(name, patterns) {
     });
 }
 function toRegexp(pattern) {
-    const matched = pattern.match(/^\/(.+)\/([ig]*)$/i);
+    const matched = pattern.match(/^\/(.+)\/([gi]*)$/i);
     if (matched && matched[1]) {
         return new RegExp(matched[1], matched[2]);
     }

package/lib/flatten-nodes.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { uuid } from './create-token.js';
 import { getEndCol, getEndLine } from './get-location.js';
 import { removeDeprecatedNode } from './remove-deprecated-node.js';
-import tagSplitter from './tag-splitter.js';
+import { tagSplitter } from './tag-splitter.js';
 import { walk } from './walker.js';
 export function flattenNodes(
 // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
@@ -71,9 +71,9 @@ nodeTree, rawHtml, createLastText = true) {
                  * create Last spaces
                  */
                 let lastOffset = 0;
-                nodeOrders.forEach((node, i) => {
+                for (const node of nodeOrders) {
                     lastOffset = Math.max(node.endOffset, lastOffset);
-                });
+                }
                 // console.log(lastOffset);
                 const lastTextContent = rawHtml.slice(lastOffset);
                 // console.log(`"${lastTextContent}"`);
@@ -110,8 +110,8 @@ nodeTree, rawHtml, createLastText = true) {
      * concat text nodes
      */
     const result = [];
-    nodeOrders.forEach(node => {
-        const prevNode = result[result.length - 1] ?? null;
+    for (const node of nodeOrders) {
+        const prevNode = result.at(-1) ?? null;
         if (node.type === 'text' && prevNode?.type === 'text') {
             prevNode.raw = prevNode.raw + node.raw;
             prevNode.endOffset = node.endOffset;
@@ -132,10 +132,10 @@ nodeTree, rawHtml, createLastText = true) {
             if (node.nextNode) {
                 node.nextNode.prevNode = prevNode;
             }
-            return;
+            continue;
         }
         result.push(node);
-    });
+    }
     {
         /**
          * Correction prev/next/parent
@@ -168,10 +168,12 @@ nodeTree, rawHtml, createLastText = true) {
             // Children
             if (node.type === 'text') {
                 const parent = node.parentNode;
-                if (parent && parent.type === 'starttag' && parent.nodeName.toLowerCase() === 'html') {
-                    if (parent.childNodes && !parent.childNodes.some(n => n.uuid === node.uuid)) {
-                        parent.childNodes.push(node);
-                    }
+                if (parent &&
+                    parent.type === 'starttag' &&
+                    parent.nodeName.toLowerCase() === 'html' &&
+                    parent.childNodes &&
+                    !parent.childNodes.some(n => n.uuid === node.uuid)) {
+                    parent.childNodes.push(node);
                 }
             }
             prevToken = node;
@@ -241,5 +243,5 @@ nodeTree, rawHtml) {
         node.endOffset = node.endOffset ?? currentEndOffset;
         nodeOrders.push(node);
     });
-    return nodeOrders.slice();
+    return [...nodeOrders];
 }

package/lib/get-location.js CHANGED Viewed

@@ -1,9 +1,9 @@
 export function getLine(html, startOffset) {
-    return html.slice(0, startOffset).split(/\n/g).length;
+    return html.slice(0, startOffset).split(/\n/).length;
 }
 export function getCol(html, startOffset) {
-    const lines = html.slice(0, startOffset).split(/\n/g);
-    return (lines[lines.length - 1] ?? '').length + 1;
+    const lines = html.slice(0, startOffset).split(/\n/);
+    return (lines.at(-1) ?? '').length + 1;
 }
 export function getEndLine(html, line) {
     return html.split(/\r?\n/).length - 1 + line;

package/lib/idl-attributes.js CHANGED Viewed

@@ -428,8 +428,8 @@ export function searchIDLAttribute(name) {
     };
 }
 function camelize(str) {
-    return str.replace(/[:-][a-z]/g, $0 => $0[1]?.toUpperCase() ?? '');
+    return str.replaceAll(/[:-][a-z]/g, $0 => $0[1]?.toUpperCase() ?? '');
 }
 function hyphenize(str) {
-    return str.replace(/[A-Z]/g, $0 => `-${$0.toLowerCase()}`);
+    return str.replaceAll(/[A-Z]/g, $0 => `-${$0.toLowerCase()}`);
 }

package/lib/ignore-block.js CHANGED Viewed

@@ -9,7 +9,7 @@ export function ignoreBlock(source, tags, maskChar = MASK_CHAR) {
         // Replace tags in attributes
         const attr = maskText(prepend(tag.start, '(?<=(?:"|\'))'), append(tag.end, '(?=(?:"|\'))'), replaced, (startTag, taggedCode, endTag) => {
             const mask = maskChar.repeat(startTag.length) +
-                taggedCode.replace(/[^\n]/g, maskChar) +
+                taggedCode.replaceAll(/[^\n]/g, maskChar) +
                 maskChar.repeat((endTag ?? '').length);
             return mask;
         });
@@ -18,7 +18,7 @@ export function ignoreBlock(source, tags, maskChar = MASK_CHAR) {
         // Replace tags in other nodes
         const text = maskText(tag.start, tag.end, replaced, (startTag, taggedCode, endTag) => {
             const mask = maskChar.repeat(startTag.length) +
-                taggedCode.replace(/[^\n]/g, maskChar) +
+                taggedCode.replaceAll(/[^\n]/g, maskChar) +
                 maskChar.repeat((endTag ?? '').length);
             const taggedMask = `<!${mask.slice(2).slice(0, -1)}>`;
             return taggedMask;
@@ -63,7 +63,7 @@ function maskText(start, end, replaced, masking) {
 export function restoreNode(
 // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
 nodeList, ignoreBlock) {
-    nodeList = nodeList.slice();
+    nodeList = [...nodeList];
     const { source, stack, maskChar } = ignoreBlock;
     for (const node of nodeList) {
         if (node.type === 'comment' || node.type === 'text' || node.type === 'psblock') {
@@ -71,7 +71,7 @@ nodeList, ignoreBlock) {
                 continue;
             }
             const parentNode = node.parentNode;
-            const index = nodeList.findIndex(n => n === node);
+            const index = nodeList.indexOf(node);
             const insertList = [];
             let text = node.raw;
             let pointer = 0;
@@ -192,14 +192,26 @@ function snap(str, reg) {
     return [index, above, snapPoint, below];
 }
 function removeGlobalOption(reg) {
+    if (typeof reg === 'string') {
+        return new RegExp(escapeRegExpForStr(reg));
+    }
     return new RegExp(reg.source, reg.ignoreCase ? 'i' : '');
 }
 function prepend(reg, str) {
+    if (typeof reg === 'string') {
+        return new RegExp(str + escapeRegExpForStr(reg));
+    }
     return new RegExp(str + reg.source, reg.ignoreCase ? 'i' : '');
 }
 function append(reg, str) {
+    if (typeof reg === 'string') {
+        return new RegExp(escapeRegExpForStr(reg) + str);
+    }
     return new RegExp(reg.source + str, reg.ignoreCase ? 'i' : '');
 }
 function hasIgnoreBlock(textContent, maskChar) {
     return textContent.includes(maskChar);
 }
+function escapeRegExpForStr(str) {
+    return str.replaceAll(/[!$()*+./:=?[\\\]^{|}]/g, '\\$&');
+}

package/lib/ignore-front-matter.js CHANGED Viewed

@@ -1,5 +1,5 @@
 export function ignoreFrontMatter(code) {
-    const reStart = /^(?:\s*\r?\n)?---\r?\n/.exec(code);
+    const reStart = /^(?:\s*\n)?---\r?\n/.exec(code);
     if (!reStart) {
         return code;
     }
@@ -12,6 +12,6 @@ export function ignoreFrontMatter(code) {
     const endPoint = startPoint + reEnd.index + reEnd[0].length;
     const frontMatter = code.slice(0, endPoint);
     const afterCode = code.slice(endPoint);
-    const masked = frontMatter.replace(/[^\r\n]/g, ' ');
+    const masked = frontMatter.replaceAll(/[^\n\r]/g, ' ');
     return masked + afterCode;
 }

package/lib/index.d.ts CHANGED Viewed

@@ -1,3 +1,5 @@
+export * from './attr-parser.js';
+export * from './attr-tokenizer.js';
 export * from './const.js';
 export * from './create-token.js';
 export * from './debugger.js';
@@ -12,5 +14,7 @@ export * from './ignore-front-matter.js';
 export * from './parse-attr.js';
 export * from './parser-error.js';
 export * from './remove-deprecated-node.js';
+export * from './script-parser.js';
+export * from './tag-parser.js';
 export * from './tag-splitter.js';
 export * from './walker.js';

package/lib/index.js CHANGED Viewed

@@ -1,3 +1,5 @@
+export * from './attr-parser.js';
+export * from './attr-tokenizer.js';
 export * from './const.js';
 export * from './create-token.js';
 export * from './debugger.js';
@@ -12,5 +14,7 @@ export * from './ignore-front-matter.js';
 export * from './parse-attr.js';
 export * from './parser-error.js';
 export * from './remove-deprecated-node.js';
+export * from './script-parser.js';
+export * from './tag-parser.js';
 export * from './tag-splitter.js';
 export * from './walker.js';

package/lib/parse-attr.js CHANGED Viewed

@@ -55,7 +55,7 @@ export function tokenize(raw, options) {
     const valueDelimiters = options?.valueDelimiters ?? defaultValueDelimiters;
     const equalDelimiter = options?.equal ?? defaultEqual;
     let state = 'b-name';
-    const charactors = raw.split('');
+    const charactors = [...raw];
     let beforeName = '';
     let name = '';
     let afterName = '';

package/lib/remove-deprecated-node.js CHANGED Viewed

@@ -20,16 +20,16 @@ nodeOrders) {
      */
     const stack = {};
     const removeIndexes = [];
-    nodeOrders.forEach((node, i) => {
+    for (const [i, node] of nodeOrders.entries()) {
         if (node.isGhost) {
-            return;
+            continue;
         }
         const id = `${node.startLine}:${node.startCol}:${node.endLine}:${node.endCol}`;
         if (stack[id] != null) {
             removeIndexes.push(i);
         }
         stack[id] = i;
-    });
+    }
     let r = nodeOrders.length;
     while (r-- > 0) {
         if (removeIndexes.includes(r)) {

package/lib/script-parser.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+export declare function scriptParser(script: string): any;
+export declare function removeQuote(str: string): string;
+export type ScriptTokenType = {
+    type: 'Identifier' | 'Boolean' | 'Numeric' | 'String' | 'Template' | 'Punctuator';
+    value: string;
+};

package/lib/script-parser.js ADDED Viewed

@@ -0,0 +1,22 @@
+// @ts-ignore
+import { tokenize } from 'espree';
+export function scriptParser(script) {
+    const tokens = tokenize(script, {
+        ecmaVersion: 'latest',
+        loc: false,
+    });
+    return tokens.map((token) => ({
+        type: token.type,
+        value: token.value,
+    }));
+}
+export function removeQuote(str) {
+    const quote = str[0];
+    if (quote !== '"' && quote !== "'") {
+        return str;
+    }
+    if (str.at(-1) !== quote) {
+        return str;
+    }
+    return str.slice(1, -1);
+}

package/lib/tag-parser.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+import type { MLASTAttr } from '@markuplint/ml-ast';
+export declare function tagParser(raw: string, startLine: number, startCol: number, startOffset: number, offsetOffset?: number, offsetLine?: number, offsetColumn?: number, spaces?: ReadonlyArray<string>): {
+    beforeOpenTag: string;
+    tagName: string;
+    attrs: MLASTAttr[];
+    afterAttrSpaces: import("@markuplint/ml-ast").MLToken;
+    selfClosingSolidus: import("@markuplint/ml-ast").MLToken;
+    isOpenTag: boolean;
+    leftover: string;
+};

package/lib/tag-parser.js ADDED Viewed

@@ -0,0 +1,152 @@
+import { attrTokenizer } from './attr-tokenizer.js';
+import { defaultSpaces } from './const.js';
+import { tokenizer } from './create-token.js';
+var TagState;
+(function (TagState) {
+    TagState[TagState["BeforeOpenTag"] = 0] = "BeforeOpenTag";
+    TagState[TagState["FirstCharOfTagName"] = 1] = "FirstCharOfTagName";
+    TagState[TagState["TagName"] = 2] = "TagName";
+    TagState[TagState["Attrs"] = 3] = "Attrs";
+    TagState[TagState["AfterAttrs"] = 4] = "AfterAttrs";
+    TagState[TagState["AfterOpenTag"] = 5] = "AfterOpenTag";
+})(TagState || (TagState = {}));
+export function tagParser(raw, startLine, startCol, startOffset, offsetOffset = 0, offsetLine = 0, offsetColumn = 0, spaces = defaultSpaces) {
+    let offset = startOffset + offsetOffset;
+    let line = startLine + offsetLine;
+    let col = startCol + (startLine === 1 ? offsetColumn : 0);
+    let state = TagState.BeforeOpenTag;
+    let beforeOpenTagChars = '';
+    let tagName = '';
+    let afterAttrsSpaceChars = '';
+    let selfClosingSolidusChar = '';
+    let isOpenTag = true;
+    const attrs = [];
+    const chars = [...raw];
+    while (chars.length > 0) {
+        if (state === TagState.AfterOpenTag) {
+            break;
+        }
+        const char = chars.shift();
+        stateSwitch: switch (state) {
+            case TagState.BeforeOpenTag: {
+                if (char === '<') {
+                    const beforeOpenTag = tokenizer(beforeOpenTagChars, line, col, offset);
+                    line = beforeOpenTag.endLine;
+                    col = beforeOpenTag.endCol;
+                    offset = beforeOpenTag.endOffset;
+                    // Add `<` length
+                    col += 1;
+                    offset += 1;
+                    state = TagState.FirstCharOfTagName;
+                    break;
+                }
+                beforeOpenTagChars += char;
+                break;
+            }
+            case TagState.FirstCharOfTagName: {
+                if (/[a-z]/i.test(char)) {
+                    tagName += char;
+                    state = TagState.TagName;
+                    break;
+                }
+                if (char === '/') {
+                    isOpenTag = false;
+                    break;
+                }
+                chars.unshift(char);
+                state = TagState.AfterOpenTag;
+                break;
+            }
+            case TagState.TagName: {
+                if (spaces.includes(char)) {
+                    chars.unshift(char);
+                    if (!isOpenTag) {
+                        // Add `/` of `</`(close tag) length
+                        offset += 1;
+                        col += 1;
+                    }
+                    offset += tagName.length;
+                    col += tagName.length;
+                    state = TagState.Attrs;
+                    break;
+                }
+                if (char === '/') {
+                    chars.unshift(char);
+                    state = TagState.AfterAttrs;
+                    break;
+                }
+                if (char === '>') {
+                    state = TagState.AfterOpenTag;
+                    break;
+                }
+                tagName += char;
+                break;
+            }
+            case TagState.Attrs: {
+                let leftover = char + chars.join('');
+                while (leftover.trim()) {
+                    if (leftover.trim().startsWith('/') || leftover.trim().startsWith('>')) {
+                        chars.length = 0;
+                        chars.push(...leftover);
+                        state = TagState.AfterAttrs;
+                        break stateSwitch;
+                    }
+                    const attr = attrTokenizer(leftover, line, col, offset);
+                    line = attr.endLine;
+                    col = attr.endCol;
+                    offset = attr.endOffset;
+                    if (leftover === attr.__leftover) {
+                        throw new SyntaxError(`Invalid attribute syntax: ${leftover}`);
+                    }
+                    leftover = attr.__leftover ?? '';
+                    delete attr.__leftover;
+                    attrs.push(attr);
+                }
+                break;
+            }
+            case TagState.AfterAttrs: {
+                if (char === '>') {
+                    state = TagState.AfterOpenTag;
+                    break;
+                }
+                if (spaces.includes(char)) {
+                    afterAttrsSpaceChars += char;
+                    break;
+                }
+                if (char === '/') {
+                    selfClosingSolidusChar = char;
+                    break;
+                }
+                throw new SyntaxError(`Invalid tag syntax: "${raw}"`);
+            }
+        }
+    }
+    const leftover = chars.join('');
+    if ((!leftover && state === TagState.TagName) || tagName === '') {
+        throw new SyntaxError(`Invalid tag syntax: "${raw}"`);
+    }
+    // console.log({
+    // 	state,
+    // 	leftover,
+    // 	afterAttrsSpaceChars,
+    // 	selfClosingSolidusChar,
+    // 	attrs: attrs.map(a => a.raw),
+    // });
+    const afterAttrSpaces = tokenizer(afterAttrsSpaceChars, line, col, offset);
+    line = afterAttrSpaces.endLine;
+    col = afterAttrSpaces.endCol;
+    offset = afterAttrSpaces.endOffset;
+    const selfClosingSolidus = tokenizer(selfClosingSolidusChar, line, col, offset);
+    line = selfClosingSolidus.endLine;
+    col = selfClosingSolidus.endCol;
+    offset = selfClosingSolidus.endOffset;
+    return {
+        beforeOpenTag: beforeOpenTagChars,
+        tagName,
+        attrs,
+        afterAttrSpaces,
+        selfClosingSolidus,
+        isOpenTag,
+        leftover,
+    };
+}

package/lib/tag-splitter.d.ts CHANGED Viewed

@@ -4,4 +4,4 @@ export interface N {
     line: number;
     col: number;
 }
-export default function tagSplitter(raw: string, line: number, col: number): N[];
+export declare function tagSplitter(raw: string, line: number, col: number): N[];

package/lib/tag-splitter.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { reSplitterTag, reTagName } from './const.js';
 import { getEndCol, getEndLine } from '@markuplint/parser-utils';
-export default function tagSplitter(raw, line, col) {
+export function tagSplitter(raw, line, col) {
     return withLocation(tagSplitterAsString(raw), line, col);
 }
 function tagSplitterAsString(raw) {
@@ -8,7 +8,7 @@ function tagSplitterAsString(raw) {
     if (!tagMatches) {
         return [raw];
     }
-    const tokens = Array.from(tagMatches);
+    const tokens = [...tagMatches];
     tokens.unshift(); // remove all match
     const nodes = [];
     let rest = raw;
@@ -31,15 +31,7 @@ function tagSplitterAsString(raw) {
 function withLocation(nodes, line, col) {
     const result = [];
     for (const node of nodes) {
-        if (node[0] !== '<') {
-            result.push({
-                type: 'text',
-                raw: node,
-                line,
-                col,
-            });
-        }
-        else {
+        if (node[0] === '<') {
             const label = node.slice(1).slice(0, -1);
             if (reTagName.test(label)) {
                 result.push({
@@ -49,39 +41,54 @@ function withLocation(nodes, line, col) {
                     col,
                 });
             }
-            else if (label[0] === '/') {
-                result.push({
-                    type: 'endtag',
-                    raw: node,
-                    line,
-                    col,
-                });
-            }
-            else if (label[0] === '!') {
-                result.push({
-                    type: 'comment',
-                    raw: node,
-                    line,
-                    col,
-                });
-            }
-            else if (label[0] === '?') {
-                result.push({
-                    type: 'boguscomment',
-                    raw: node,
-                    line,
-                    col,
-                });
-            }
             else {
-                result.push({
-                    type: 'text',
-                    raw: node,
-                    line,
-                    col,
-                });
+                switch (label[0]) {
+                    case '/': {
+                        result.push({
+                            type: 'endtag',
+                            raw: node,
+                            line,
+                            col,
+                        });
+                        break;
+                    }
+                    case '!': {
+                        result.push({
+                            type: 'comment',
+                            raw: node,
+                            line,
+                            col,
+                        });
+                        break;
+                    }
+                    case '?': {
+                        result.push({
+                            type: 'boguscomment',
+                            raw: node,
+                            line,
+                            col,
+                        });
+                        break;
+                    }
+                    default: {
+                        result.push({
+                            type: 'text',
+                            raw: node,
+                            line,
+                            col,
+                        });
+                    }
+                }
             }
         }
+        else {
+            result.push({
+                type: 'text',
+                raw: node,
+                line,
+                col,
+            });
+        }
         line = getEndLine(node, line);
         col = getEndCol(node, col);
     }

package/lib/types.d.ts CHANGED Viewed

@@ -7,8 +7,8 @@ export type Code = {
 };
 export type IgnoreTag = {
     readonly type: string;
-    readonly start: Readonly<RegExp>;
-    readonly end: Readonly<RegExp>;
+    readonly start: Readonly<RegExp> | string;
+    readonly end: Readonly<RegExp> | string;
 };
 export type IgnoreBlock = {
     readonly source: string;
@@ -16,3 +16,7 @@ export type IgnoreBlock = {
     readonly stack: readonly Code[];
     readonly maskChar: string;
 };
+export type QuoteSet = {
+    readonly start: string;
+    readonly end: string;
+};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@markuplint/parser-utils",
-	"version": "4.0.0-alpha.3",
+	"version": "4.0.0-alpha.5",
 	"description": "Utility module for markuplint parser plugin",
 	"repository": "git@github.com:markuplint/markuplint.git",
 	"author": "Yusuke Hirao <yusukehirao@me.com>",
@@ -24,12 +24,12 @@
 		"clean": "tsc --build --clean"
 	},
 	"dependencies": {
-		"@markuplint/ml-ast": "4.0.0-alpha.3",
-		"@markuplint/types": "4.0.0-alpha.3",
-		"@types/uuid": "^9.0.4",
-		"tslib": "^2.6.2",
-		"type-fest": "^4.3.1",
+		"@markuplint/ml-ast": "4.0.0-alpha.5",
+		"@markuplint/types": "4.0.0-alpha.5",
+		"@types/uuid": "^9.0.6",
+		"espree": "^9.6.1",
+		"type-fest": "^4.5.0",
 		"uuid": "^9.0.1"
 	},
-	"gitHead": "380836f7adc1ff7e8eaf9d869e68d29eee8f3b7e"
+	"gitHead": "0c3e4690662edf1765bcc4b6411ec5507c1e2ea3"
 }