npm - @markuplint/html-parser - Versions diffs - 4.0.0-alpha.3 → 4.0.0-alpha.5 - Mend

@markuplint/html-parser 4.0.0-alpha.3 → 4.0.0-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/LICENSE +1 -1
package/lib/create-tree.js +3 -4
package/lib/index.d.ts +1 -3
package/lib/index.js +1 -3
package/lib/is-document-fragment.d.ts +1 -1
package/lib/is-document-fragment.js +2 -2
package/lib/optimize-starts-head-or-body.js +5 -7
package/lib/parse.js +1 -1
package/package.json +5 -6
package/lib/attr-tokenizer.d.ts +0 -2
package/lib/attr-tokenizer.js +0 -80
package/lib/parse-raw-tag.d.ts +0 -9
package/lib/parse-raw-tag.js +0 -51

package/LICENSE CHANGED Viewed

@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2017-2019 Yusuke Hirao
+Copyright (c) 2017-2023 Yusuke Hirao
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

package/lib/create-tree.js CHANGED Viewed

@@ -1,7 +1,6 @@
 // @ts-nocheck TODO: Parse5(https://github.com/inikulin/parse5) supports to expose type definitions as submodules.
-import { detectElementType, getEndCol, getEndLine, sliceFragment, uuid } from '@markuplint/parser-utils';
+import { detectElementType, getEndCol, getEndLine, sliceFragment, tagParser, uuid } from '@markuplint/parser-utils';
 import { parse, parseFragment } from 'parse5';
-import parseRawTag from './parse-raw-tag.js';
 const P5_OPTIONS = {
     scriptingEnabled: false,
     sourceCodeLocationInfo: true,
@@ -147,7 +146,7 @@ parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
             const startTagRaw = tagLoc
                 ? rawHtml.slice(tagLoc.startOffset, tagLoc.endOffset)
                 : rawHtml.slice(startOffset, endOffset ?? startOffset);
-            const tagTokens = parseRawTag(startTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
+            const tagTokens = tagParser(startTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
             const tagName = tagTokens.tagName;
             let endTag = null;
             let endTagLoc = 'endTag' in location ? location.endTag : null;
@@ -168,7 +167,7 @@ parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
             if (endTagLoc) {
                 const { startOffset, endOffset, startLine, endLine, startCol, endCol } = endTagLoc;
                 const endTagRaw = rawHtml.slice(startOffset, endOffset);
-                const endTagTokens = parseRawTag(endTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
+                const endTagTokens = tagParser(endTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
                 const endTagName = endTagTokens.tagName;
                 endTag = {
                     uuid: uuid(),

package/lib/index.d.ts CHANGED Viewed

@@ -1,6 +1,4 @@
-export { default as attrTokenizer } from './attr-tokenizer.js';
-export { default as isDocumentFragment } from './is-document-fragment.js';
-export { default as parseRawTag } from './parse-raw-tag.js';
+export { isDocumentFragment } from './is-document-fragment.js';
 export { getNamespace } from './get-namespace.js';
 export { parse } from './parse.js';
 export { createTree } from './create-tree.js';

package/lib/index.js CHANGED Viewed

@@ -1,6 +1,4 @@
-export { default as attrTokenizer } from './attr-tokenizer.js';
-export { default as isDocumentFragment } from './is-document-fragment.js';
-export { default as parseRawTag } from './parse-raw-tag.js';
+export { isDocumentFragment } from './is-document-fragment.js';
 export { getNamespace } from './get-namespace.js';
 export { parse } from './parse.js';
 export { createTree } from './create-tree.js';

package/lib/is-document-fragment.d.ts CHANGED Viewed

	@@ -1 +1 @@
1	- export ~~default~~ function isDocumentFragment(html: string): boolean;
1	+ export declare function isDocumentFragment(html: string): boolean;

package/lib/is-document-fragment.js CHANGED Viewed

@@ -1,3 +1,3 @@
-export default function isDocumentFragment(html) {
-    return !/^\s*(<!doctype html(?:\s*.+)?>|<html(?:\s|>))/im.test(html);
+export function isDocumentFragment(html) {
+    return !/^\s*(?:<!doctype html(?:\s*(?:\S.*|[\t\v\f \u00A0\u1680\u2000-\u200A\u202F\u205F\u3000\uFEFF]))?>|<html[\s>])/im.test(html);
 }

package/lib/optimize-starts-head-or-body.js CHANGED Viewed

@@ -5,9 +5,7 @@ export function isStartsHeadTagOrBodyTag(rawCode) {
 export function optimizeStartsHeadTagOrBodyTagSetup(rawCode) {
     const heads = [];
     const bodies = [];
-    const code = rawCode.replace(
-    // eslint-disable-next-line no-control-regex
-    /(?<=<\/?)(?:head|body)(?=\u0009|\u000A|\u000C|\u0020|\/|>|\u0000)/gi, tag => {
+    const code = rawCode.replaceAll(/(?<=<\/?)(?:head|body)(?=[\0\t\n\f />])/gi, tag => {
         const prefix = `x-${UNDUPLICATED_CHAR}`;
         let name;
         if (/^head$/i.test(tag)) {
@@ -34,18 +32,18 @@ export function optimizeStartsHeadTagOrBodyTagResume(
 nodeList,
 // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
 replacements) {
-    nodeList.forEach(node => {
+    for (const node of nodeList) {
         if (!node.nodeName.startsWith(`x-${UNDUPLICATED_CHAR}`)) {
-            return;
+            continue;
         }
         const realName = node.nodeName === `x-${UNDUPLICATED_CHAR}h` ? replacements.heads.shift() : replacements.bodies.shift();
         if (!realName) {
-            return;
+            continue;
         }
         node.raw = node.raw.replace(node.nodeName, realName);
         node.nodeName = realName;
         if (node.type === 'starttag') {
             node.elementType = 'html';
         }
-    });
+    }
 }

package/lib/parse.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { ignoreFrontMatter, flattenNodes } from '@markuplint/parser-utils';
 import { createTree } from './create-tree.js';
-import isDocumentFragment from './is-document-fragment.js';
+import { isDocumentFragment } from './is-document-fragment.js';
 import { isStartsHeadTagOrBodyTag, optimizeStartsHeadTagOrBodyTagResume, optimizeStartsHeadTagOrBodyTagSetup, } from './optimize-starts-head-or-body.js';
 export const parse = (rawCode, options) => {
     if (options?.ignoreFrontMatter) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@markuplint/html-parser",
-	"version": "4.0.0-alpha.3",
+	"version": "4.0.0-alpha.5",
 	"description": "HTML parser for markuplint",
 	"repository": "git@github.com:markuplint/markuplint.git",
 	"author": "Yusuke Hirao <yusukehirao@me.com>",
@@ -25,11 +25,10 @@
 		"clean": "tsc --build --clean"
 	},
 	"dependencies": {
-		"@markuplint/ml-ast": "4.0.0-alpha.3",
-		"@markuplint/parser-utils": "4.0.0-alpha.3",
+		"@markuplint/ml-ast": "4.0.0-alpha.5",
+		"@markuplint/parser-utils": "4.0.0-alpha.5",
 		"parse5": "7.1.2",
-		"tslib": "^2.6.2",
-		"type-fest": "^4.3.1"
+		"type-fest": "^4.5.0"
 	},
-	"gitHead": "380836f7adc1ff7e8eaf9d869e68d29eee8f3b7e"
+	"gitHead": "0c3e4690662edf1765bcc4b6411ec5507c1e2ea3"
 }

package/lib/attr-tokenizer.d.ts DELETED Viewed

	@@ -1,2 +0,0 @@
1	- import type { MLASTHTMLAttr } from '@markuplint/ml-ast';
2	- export default function attrTokenizer(raw: string, line: number, col: number, startOffset: number): MLASTHTMLAttr;

package/lib/attr-tokenizer.js DELETED Viewed

@@ -1,80 +0,0 @@
-import { tokenizer, uuid } from '@markuplint/parser-utils';
-const reAttrsInStartTag =
-// eslint-disable-next-line no-control-regex
-/(\s*)([^\x00-\x1f\x7f-\x9f "'>/=]+)(?:(\s*)(=)(\s*)(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s]*)))?/;
-export default function attrTokenizer(raw, line, col, startOffset) {
-    const attrMatchedMap = raw.match(reAttrsInStartTag);
-    if (!attrMatchedMap) {
-        throw new SyntaxError('Illegal attribute token');
-    }
-    const spacesBeforeAttrString = attrMatchedMap[1] ?? '';
-    const nameChars = attrMatchedMap[2] ?? '';
-    const spacesBeforeEqualChars = attrMatchedMap[3] ?? '';
-    const equalChars = attrMatchedMap[4] ?? null;
-    const spacesAfterEqualChars = attrMatchedMap[5] ?? '';
-    const quoteChars = attrMatchedMap[6] != null ? '"' : attrMatchedMap[7] != null ? "'" : null;
-    const valueChars = attrMatchedMap[6] ?? attrMatchedMap[7] ?? attrMatchedMap[8] ?? (quoteChars ? '' : null);
-    let offset = startOffset;
-    const spacesBeforeName = tokenizer(spacesBeforeAttrString, line, col, offset);
-    line = spacesBeforeName.endLine;
-    col = spacesBeforeName.endCol;
-    offset = spacesBeforeName.endOffset;
-    const name = tokenizer(nameChars, line, col, offset);
-    line = name.endLine;
-    col = name.endCol;
-    offset = name.endOffset;
-    const spacesBeforeEqual = tokenizer(spacesBeforeEqualChars, line, col, offset);
-    line = spacesBeforeEqual.endLine;
-    col = spacesBeforeEqual.endCol;
-    offset = spacesBeforeEqual.endOffset;
-    const equal = tokenizer(equalChars, line, col, offset);
-    line = equal.endLine;
-    col = equal.endCol;
-    offset = equal.endOffset;
-    const spacesAfterEqual = tokenizer(spacesAfterEqualChars, line, col, offset);
-    line = spacesAfterEqual.endLine;
-    col = spacesAfterEqual.endCol;
-    offset = spacesAfterEqual.endOffset;
-    const startQuote = tokenizer(quoteChars, line, col, offset);
-    line = startQuote.endLine;
-    col = startQuote.endCol;
-    offset = startQuote.endOffset;
-    const value = tokenizer(valueChars, line, col, offset);
-    line = value.endLine;
-    col = value.endCol;
-    offset = value.endOffset;
-    const endQuote = tokenizer(quoteChars, line, col, offset);
-    const attrToken = tokenizer(nameChars +
-        spacesBeforeEqualChars +
-        (equalChars ?? '') +
-        spacesAfterEqualChars +
-        (quoteChars ?? '') +
-        (valueChars ?? '') +
-        (quoteChars ?? ''), name.startLine, name.startCol, name.startOffset);
-    return {
-        type: 'html-attr',
-        uuid: uuid(),
-        raw: attrToken.raw,
-        startOffset: attrToken.startOffset,
-        endOffset: attrToken.endOffset,
-        startLine: attrToken.startLine,
-        endLine: attrToken.endLine,
-        startCol: attrToken.startCol,
-        endCol: attrToken.endCol,
-        spacesBeforeName,
-        name,
-        spacesBeforeEqual,
-        equal,
-        spacesAfterEqual,
-        startQuote,
-        value,
-        endQuote,
-        isDuplicatable: false,
-        nodeName: name.raw,
-        parentNode: null,
-        prevNode: null,
-        nextNode: null,
-        isFragment: false,
-        isGhost: false,
-    };
-}

package/lib/parse-raw-tag.d.ts DELETED Viewed

@@ -1,9 +0,0 @@
-import type { MLASTAttr, MLToken } from '@markuplint/ml-ast';
-type TagTokens = {
-    tagName: string;
-    attrs: MLASTAttr[];
-    selfClosingSolidus: MLToken;
-    endSpace: MLToken;
-};
-export default function parseRawTag(raw: string, startLine: number, startCol: number, startOffset: number, offsetOffset?: number, offsetLine?: number, offsetColumn?: number): TagTokens;
-export {};

package/lib/parse-raw-tag.js DELETED Viewed

@@ -1,51 +0,0 @@
-import { reTag, reTagName, isPotentialCustomElementName, tokenizer } from '@markuplint/parser-utils';
-import attrTokenizer from './attr-tokenizer.js';
-// eslint-disable-next-line no-control-regex
-const reAttrsInStartTag = /\s*[^\x00-\x1f\x7f-\x9f "'>/=]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^\s]*))?/;
-const reEndTokens = /(\s*\/)?(\s*)>$/;
-export default function parseRawTag(raw, startLine, startCol, startOffset, offsetOffset = 0, offsetLine = 0, offsetColumn = 0) {
-    let offset = startOffset + offsetOffset;
-    let line = startLine + offsetLine;
-    let col = startCol + (startLine === 1 ? offsetColumn : 0);
-    const matches = raw.match(reTag);
-    const tagWithAttrs = matches?.[1];
-    if (!tagWithAttrs) {
-        throw new SyntaxError(`Invalid tag syntax: "${raw}"`);
-    }
-    // eslint-disable-next-line no-control-regex
-    const tagNameSplitted = tagWithAttrs.split(/[\u0000\u0009\u000A\u000C\u0020/>]/);
-    const tagName = tagNameSplitted[0] || tagNameSplitted[1];
-    if (!tagName || (!reTagName.test(tagName) && !isPotentialCustomElementName(tagName))) {
-        throw new SyntaxError(`Invalid tag name: "${tagName}" in <${tagWithAttrs}>`);
-    }
-    const tagStartPos = tagWithAttrs.indexOf(tagName);
-    let rawAttrs = tagWithAttrs.substring(tagStartPos + tagName.length);
-    // console.log({ raw, tagStartPos, tagName, rawAttrs });
-    col += tagName.length + 1 + tagStartPos;
-    offset += tagName.length + 1 + tagStartPos;
-    const attrs = [];
-    while (reAttrsInStartTag.test(rawAttrs)) {
-        const attrMatchedMap = rawAttrs.match(reAttrsInStartTag);
-        if (attrMatchedMap && attrMatchedMap[0]) {
-            const rawAttr = attrMatchedMap[0];
-            const attr = attrTokenizer(rawAttr, line, col, offset);
-            line = attr.endLine;
-            col = attr.endCol;
-            offset = attr.endOffset;
-            rawAttrs = rawAttrs.substr(rawAttr.length);
-            attrs.push(attr);
-        }
-    }
-    const endTokens = reEndTokens.exec(raw);
-    const selfClosingSolidus = tokenizer(endTokens?.[1] ?? '', line, col, offset);
-    line = selfClosingSolidus.endLine;
-    col = selfClosingSolidus.endCol;
-    offset = selfClosingSolidus.endOffset;
-    const endSpace = tokenizer(endTokens?.[2] ?? '', line, col, offset);
-    return {
-        tagName,
-        attrs,
-        selfClosingSolidus,
-        endSpace,
-    };
-}