@markuplint/html-parser 4.0.0-alpha.3 → 4.0.0-alpha.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/attr-tokenizer.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { tokenizer, uuid } from '@markuplint/parser-utils';
|
|
2
2
|
const reAttrsInStartTag =
|
|
3
3
|
// eslint-disable-next-line no-control-regex
|
|
4
|
-
/(\s*)([^\
|
|
4
|
+
/(\s*)([^\u0000-\u001F "'/=>\u007F-\u009F]+)(?:(\s*)(=)(\s*)(?:(?:"([^"]*)")|(?:'([^']*)')|(\S*)))?/;
|
|
5
5
|
export default function attrTokenizer(raw, line, col, startOffset) {
|
|
6
6
|
const attrMatchedMap = raw.match(reAttrsInStartTag);
|
|
7
7
|
if (!attrMatchedMap) {
|
|
@@ -12,7 +12,7 @@ export default function attrTokenizer(raw, line, col, startOffset) {
|
|
|
12
12
|
const spacesBeforeEqualChars = attrMatchedMap[3] ?? '';
|
|
13
13
|
const equalChars = attrMatchedMap[4] ?? null;
|
|
14
14
|
const spacesAfterEqualChars = attrMatchedMap[5] ?? '';
|
|
15
|
-
const quoteChars = attrMatchedMap[6]
|
|
15
|
+
const quoteChars = attrMatchedMap[6] == null ? (attrMatchedMap[7] == null ? null : "'") : '"';
|
|
16
16
|
const valueChars = attrMatchedMap[6] ?? attrMatchedMap[7] ?? attrMatchedMap[8] ?? (quoteChars ? '' : null);
|
|
17
17
|
let offset = startOffset;
|
|
18
18
|
const spacesBeforeName = tokenizer(spacesBeforeAttrString, line, col, offset);
|
|
@@ -5,9 +5,9 @@ export function isStartsHeadTagOrBodyTag(rawCode) {
|
|
|
5
5
|
export function optimizeStartsHeadTagOrBodyTagSetup(rawCode) {
|
|
6
6
|
const heads = [];
|
|
7
7
|
const bodies = [];
|
|
8
|
-
const code = rawCode.
|
|
8
|
+
const code = rawCode.replaceAll(
|
|
9
9
|
// eslint-disable-next-line no-control-regex
|
|
10
|
-
/(?<=<\/?)(?:head|body)(?=\u0009|\u000A|\u000C
|
|
10
|
+
/(?<=<\/?)(?:head|body)(?=\u0009|\u000A|\u000C| |\/|>|\u0000)/gi, tag => {
|
|
11
11
|
const prefix = `x-${UNDUPLICATED_CHAR}`;
|
|
12
12
|
let name;
|
|
13
13
|
if (/^head$/i.test(tag)) {
|
|
@@ -34,18 +34,18 @@ export function optimizeStartsHeadTagOrBodyTagResume(
|
|
|
34
34
|
nodeList,
|
|
35
35
|
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
36
36
|
replacements) {
|
|
37
|
-
|
|
37
|
+
for (const node of nodeList) {
|
|
38
38
|
if (!node.nodeName.startsWith(`x-${UNDUPLICATED_CHAR}`)) {
|
|
39
|
-
|
|
39
|
+
continue;
|
|
40
40
|
}
|
|
41
41
|
const realName = node.nodeName === `x-${UNDUPLICATED_CHAR}h` ? replacements.heads.shift() : replacements.bodies.shift();
|
|
42
42
|
if (!realName) {
|
|
43
|
-
|
|
43
|
+
continue;
|
|
44
44
|
}
|
|
45
45
|
node.raw = node.raw.replace(node.nodeName, realName);
|
|
46
46
|
node.nodeName = realName;
|
|
47
47
|
if (node.type === 'starttag') {
|
|
48
48
|
node.elementType = 'html';
|
|
49
49
|
}
|
|
50
|
-
}
|
|
50
|
+
}
|
|
51
51
|
}
|
package/lib/parse-raw-tag.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { reTag, reTagName, isPotentialCustomElementName, tokenizer } from '@markuplint/parser-utils';
|
|
2
2
|
import attrTokenizer from './attr-tokenizer.js';
|
|
3
3
|
// eslint-disable-next-line no-control-regex
|
|
4
|
-
const reAttrsInStartTag = /\s*[^\
|
|
4
|
+
const reAttrsInStartTag = /\s*[^\u0000-\u001F "'/=>\u007F-\u009F]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|\S*))?/;
|
|
5
5
|
const reEndTokens = /(\s*\/)?(\s*)>$/;
|
|
6
6
|
export default function parseRawTag(raw, startLine, startCol, startOffset, offsetOffset = 0, offsetLine = 0, offsetColumn = 0) {
|
|
7
7
|
let offset = startOffset + offsetOffset;
|
|
@@ -13,13 +13,13 @@ export default function parseRawTag(raw, startLine, startCol, startOffset, offse
|
|
|
13
13
|
throw new SyntaxError(`Invalid tag syntax: "${raw}"`);
|
|
14
14
|
}
|
|
15
15
|
// eslint-disable-next-line no-control-regex
|
|
16
|
-
const tagNameSplitted = tagWithAttrs.split(/[\u0000\u0009\u000A\u000C
|
|
16
|
+
const tagNameSplitted = tagWithAttrs.split(/[\u0000\u0009\u000A\u000C />]/);
|
|
17
17
|
const tagName = tagNameSplitted[0] || tagNameSplitted[1];
|
|
18
18
|
if (!tagName || (!reTagName.test(tagName) && !isPotentialCustomElementName(tagName))) {
|
|
19
19
|
throw new SyntaxError(`Invalid tag name: "${tagName}" in <${tagWithAttrs}>`);
|
|
20
20
|
}
|
|
21
21
|
const tagStartPos = tagWithAttrs.indexOf(tagName);
|
|
22
|
-
let rawAttrs = tagWithAttrs.
|
|
22
|
+
let rawAttrs = tagWithAttrs.slice(Math.max(0, tagStartPos + tagName.length));
|
|
23
23
|
// console.log({ raw, tagStartPos, tagName, rawAttrs });
|
|
24
24
|
col += tagName.length + 1 + tagStartPos;
|
|
25
25
|
offset += tagName.length + 1 + tagStartPos;
|
|
@@ -32,7 +32,7 @@ export default function parseRawTag(raw, startLine, startCol, startOffset, offse
|
|
|
32
32
|
line = attr.endLine;
|
|
33
33
|
col = attr.endCol;
|
|
34
34
|
offset = attr.endOffset;
|
|
35
|
-
rawAttrs = rawAttrs.
|
|
35
|
+
rawAttrs = rawAttrs.slice(rawAttr.length);
|
|
36
36
|
attrs.push(attr);
|
|
37
37
|
}
|
|
38
38
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@markuplint/html-parser",
|
|
3
|
-
"version": "4.0.0-alpha.
|
|
3
|
+
"version": "4.0.0-alpha.4",
|
|
4
4
|
"description": "HTML parser for markuplint",
|
|
5
5
|
"repository": "git@github.com:markuplint/markuplint.git",
|
|
6
6
|
"author": "Yusuke Hirao <yusukehirao@me.com>",
|
|
@@ -25,11 +25,10 @@
|
|
|
25
25
|
"clean": "tsc --build --clean"
|
|
26
26
|
},
|
|
27
27
|
"dependencies": {
|
|
28
|
-
"@markuplint/ml-ast": "4.0.0-alpha.
|
|
29
|
-
"@markuplint/parser-utils": "4.0.0-alpha.
|
|
28
|
+
"@markuplint/ml-ast": "4.0.0-alpha.4",
|
|
29
|
+
"@markuplint/parser-utils": "4.0.0-alpha.4",
|
|
30
30
|
"parse5": "7.1.2",
|
|
31
|
-
"
|
|
32
|
-
"type-fest": "^4.3.1"
|
|
31
|
+
"type-fest": "^4.5.0"
|
|
33
32
|
},
|
|
34
|
-
"gitHead": "
|
|
33
|
+
"gitHead": "991b3aef77fde42c79343ee8c807257a35c589d7"
|
|
35
34
|
}
|