@markuplint/html-parser 4.0.0-alpha.3 → 4.0.0-alpha.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/lib/create-tree.js +3 -4
- package/lib/index.d.ts +1 -3
- package/lib/index.js +1 -3
- package/lib/is-document-fragment.d.ts +1 -1
- package/lib/is-document-fragment.js +2 -2
- package/lib/optimize-starts-head-or-body.js +5 -7
- package/lib/parse.js +1 -1
- package/package.json +5 -6
- package/lib/attr-tokenizer.d.ts +0 -2
- package/lib/attr-tokenizer.js +0 -80
- package/lib/parse-raw-tag.d.ts +0 -9
- package/lib/parse-raw-tag.js +0 -51
package/LICENSE
CHANGED
package/lib/create-tree.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
// @ts-nocheck TODO: Parse5(https://github.com/inikulin/parse5) supports to expose type definitions as submodules.
|
|
2
|
-
import { detectElementType, getEndCol, getEndLine, sliceFragment, uuid } from '@markuplint/parser-utils';
|
|
2
|
+
import { detectElementType, getEndCol, getEndLine, sliceFragment, tagParser, uuid } from '@markuplint/parser-utils';
|
|
3
3
|
import { parse, parseFragment } from 'parse5';
|
|
4
|
-
import parseRawTag from './parse-raw-tag.js';
|
|
5
4
|
const P5_OPTIONS = {
|
|
6
5
|
scriptingEnabled: false,
|
|
7
6
|
sourceCodeLocationInfo: true,
|
|
@@ -147,7 +146,7 @@ parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
|
|
|
147
146
|
const startTagRaw = tagLoc
|
|
148
147
|
? rawHtml.slice(tagLoc.startOffset, tagLoc.endOffset)
|
|
149
148
|
: rawHtml.slice(startOffset, endOffset ?? startOffset);
|
|
150
|
-
const tagTokens =
|
|
149
|
+
const tagTokens = tagParser(startTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
|
|
151
150
|
const tagName = tagTokens.tagName;
|
|
152
151
|
let endTag = null;
|
|
153
152
|
let endTagLoc = 'endTag' in location ? location.endTag : null;
|
|
@@ -168,7 +167,7 @@ parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
|
|
|
168
167
|
if (endTagLoc) {
|
|
169
168
|
const { startOffset, endOffset, startLine, endLine, startCol, endCol } = endTagLoc;
|
|
170
169
|
const endTagRaw = rawHtml.slice(startOffset, endOffset);
|
|
171
|
-
const endTagTokens =
|
|
170
|
+
const endTagTokens = tagParser(endTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
|
|
172
171
|
const endTagName = endTagTokens.tagName;
|
|
173
172
|
endTag = {
|
|
174
173
|
uuid: uuid(),
|
package/lib/index.d.ts
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
export {
|
|
2
|
-
export { default as isDocumentFragment } from './is-document-fragment.js';
|
|
3
|
-
export { default as parseRawTag } from './parse-raw-tag.js';
|
|
1
|
+
export { isDocumentFragment } from './is-document-fragment.js';
|
|
4
2
|
export { getNamespace } from './get-namespace.js';
|
|
5
3
|
export { parse } from './parse.js';
|
|
6
4
|
export { createTree } from './create-tree.js';
|
package/lib/index.js
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
export {
|
|
2
|
-
export { default as isDocumentFragment } from './is-document-fragment.js';
|
|
3
|
-
export { default as parseRawTag } from './parse-raw-tag.js';
|
|
1
|
+
export { isDocumentFragment } from './is-document-fragment.js';
|
|
4
2
|
export { getNamespace } from './get-namespace.js';
|
|
5
3
|
export { parse } from './parse.js';
|
|
6
4
|
export { createTree } from './create-tree.js';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export
|
|
1
|
+
export declare function isDocumentFragment(html: string): boolean;
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export
|
|
2
|
-
return !/^\s*(
|
|
1
|
+
export function isDocumentFragment(html) {
|
|
2
|
+
return !/^\s*(?:<!doctype html(?:\s*(?:\S.*|[\t\v\f \u00A0\u1680\u2000-\u200A\u202F\u205F\u3000\uFEFF]))?>|<html[\s>])/im.test(html);
|
|
3
3
|
}
|
|
@@ -5,9 +5,7 @@ export function isStartsHeadTagOrBodyTag(rawCode) {
|
|
|
5
5
|
export function optimizeStartsHeadTagOrBodyTagSetup(rawCode) {
|
|
6
6
|
const heads = [];
|
|
7
7
|
const bodies = [];
|
|
8
|
-
const code = rawCode.
|
|
9
|
-
// eslint-disable-next-line no-control-regex
|
|
10
|
-
/(?<=<\/?)(?:head|body)(?=\u0009|\u000A|\u000C|\u0020|\/|>|\u0000)/gi, tag => {
|
|
8
|
+
const code = rawCode.replaceAll(/(?<=<\/?)(?:head|body)(?=[\0\t\n\f />])/gi, tag => {
|
|
11
9
|
const prefix = `x-${UNDUPLICATED_CHAR}`;
|
|
12
10
|
let name;
|
|
13
11
|
if (/^head$/i.test(tag)) {
|
|
@@ -34,18 +32,18 @@ export function optimizeStartsHeadTagOrBodyTagResume(
|
|
|
34
32
|
nodeList,
|
|
35
33
|
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
36
34
|
replacements) {
|
|
37
|
-
|
|
35
|
+
for (const node of nodeList) {
|
|
38
36
|
if (!node.nodeName.startsWith(`x-${UNDUPLICATED_CHAR}`)) {
|
|
39
|
-
|
|
37
|
+
continue;
|
|
40
38
|
}
|
|
41
39
|
const realName = node.nodeName === `x-${UNDUPLICATED_CHAR}h` ? replacements.heads.shift() : replacements.bodies.shift();
|
|
42
40
|
if (!realName) {
|
|
43
|
-
|
|
41
|
+
continue;
|
|
44
42
|
}
|
|
45
43
|
node.raw = node.raw.replace(node.nodeName, realName);
|
|
46
44
|
node.nodeName = realName;
|
|
47
45
|
if (node.type === 'starttag') {
|
|
48
46
|
node.elementType = 'html';
|
|
49
47
|
}
|
|
50
|
-
}
|
|
48
|
+
}
|
|
51
49
|
}
|
package/lib/parse.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { ignoreFrontMatter, flattenNodes } from '@markuplint/parser-utils';
|
|
2
2
|
import { createTree } from './create-tree.js';
|
|
3
|
-
import isDocumentFragment from './is-document-fragment.js';
|
|
3
|
+
import { isDocumentFragment } from './is-document-fragment.js';
|
|
4
4
|
import { isStartsHeadTagOrBodyTag, optimizeStartsHeadTagOrBodyTagResume, optimizeStartsHeadTagOrBodyTagSetup, } from './optimize-starts-head-or-body.js';
|
|
5
5
|
export const parse = (rawCode, options) => {
|
|
6
6
|
if (options?.ignoreFrontMatter) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@markuplint/html-parser",
|
|
3
|
-
"version": "4.0.0-alpha.
|
|
3
|
+
"version": "4.0.0-alpha.5",
|
|
4
4
|
"description": "HTML parser for markuplint",
|
|
5
5
|
"repository": "git@github.com:markuplint/markuplint.git",
|
|
6
6
|
"author": "Yusuke Hirao <yusukehirao@me.com>",
|
|
@@ -25,11 +25,10 @@
|
|
|
25
25
|
"clean": "tsc --build --clean"
|
|
26
26
|
},
|
|
27
27
|
"dependencies": {
|
|
28
|
-
"@markuplint/ml-ast": "4.0.0-alpha.
|
|
29
|
-
"@markuplint/parser-utils": "4.0.0-alpha.
|
|
28
|
+
"@markuplint/ml-ast": "4.0.0-alpha.5",
|
|
29
|
+
"@markuplint/parser-utils": "4.0.0-alpha.5",
|
|
30
30
|
"parse5": "7.1.2",
|
|
31
|
-
"
|
|
32
|
-
"type-fest": "^4.3.1"
|
|
31
|
+
"type-fest": "^4.5.0"
|
|
33
32
|
},
|
|
34
|
-
"gitHead": "
|
|
33
|
+
"gitHead": "0c3e4690662edf1765bcc4b6411ec5507c1e2ea3"
|
|
35
34
|
}
|
package/lib/attr-tokenizer.d.ts
DELETED
package/lib/attr-tokenizer.js
DELETED
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
import { tokenizer, uuid } from '@markuplint/parser-utils';
|
|
2
|
-
const reAttrsInStartTag =
|
|
3
|
-
// eslint-disable-next-line no-control-regex
|
|
4
|
-
/(\s*)([^\x00-\x1f\x7f-\x9f "'>/=]+)(?:(\s*)(=)(\s*)(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s]*)))?/;
|
|
5
|
-
export default function attrTokenizer(raw, line, col, startOffset) {
|
|
6
|
-
const attrMatchedMap = raw.match(reAttrsInStartTag);
|
|
7
|
-
if (!attrMatchedMap) {
|
|
8
|
-
throw new SyntaxError('Illegal attribute token');
|
|
9
|
-
}
|
|
10
|
-
const spacesBeforeAttrString = attrMatchedMap[1] ?? '';
|
|
11
|
-
const nameChars = attrMatchedMap[2] ?? '';
|
|
12
|
-
const spacesBeforeEqualChars = attrMatchedMap[3] ?? '';
|
|
13
|
-
const equalChars = attrMatchedMap[4] ?? null;
|
|
14
|
-
const spacesAfterEqualChars = attrMatchedMap[5] ?? '';
|
|
15
|
-
const quoteChars = attrMatchedMap[6] != null ? '"' : attrMatchedMap[7] != null ? "'" : null;
|
|
16
|
-
const valueChars = attrMatchedMap[6] ?? attrMatchedMap[7] ?? attrMatchedMap[8] ?? (quoteChars ? '' : null);
|
|
17
|
-
let offset = startOffset;
|
|
18
|
-
const spacesBeforeName = tokenizer(spacesBeforeAttrString, line, col, offset);
|
|
19
|
-
line = spacesBeforeName.endLine;
|
|
20
|
-
col = spacesBeforeName.endCol;
|
|
21
|
-
offset = spacesBeforeName.endOffset;
|
|
22
|
-
const name = tokenizer(nameChars, line, col, offset);
|
|
23
|
-
line = name.endLine;
|
|
24
|
-
col = name.endCol;
|
|
25
|
-
offset = name.endOffset;
|
|
26
|
-
const spacesBeforeEqual = tokenizer(spacesBeforeEqualChars, line, col, offset);
|
|
27
|
-
line = spacesBeforeEqual.endLine;
|
|
28
|
-
col = spacesBeforeEqual.endCol;
|
|
29
|
-
offset = spacesBeforeEqual.endOffset;
|
|
30
|
-
const equal = tokenizer(equalChars, line, col, offset);
|
|
31
|
-
line = equal.endLine;
|
|
32
|
-
col = equal.endCol;
|
|
33
|
-
offset = equal.endOffset;
|
|
34
|
-
const spacesAfterEqual = tokenizer(spacesAfterEqualChars, line, col, offset);
|
|
35
|
-
line = spacesAfterEqual.endLine;
|
|
36
|
-
col = spacesAfterEqual.endCol;
|
|
37
|
-
offset = spacesAfterEqual.endOffset;
|
|
38
|
-
const startQuote = tokenizer(quoteChars, line, col, offset);
|
|
39
|
-
line = startQuote.endLine;
|
|
40
|
-
col = startQuote.endCol;
|
|
41
|
-
offset = startQuote.endOffset;
|
|
42
|
-
const value = tokenizer(valueChars, line, col, offset);
|
|
43
|
-
line = value.endLine;
|
|
44
|
-
col = value.endCol;
|
|
45
|
-
offset = value.endOffset;
|
|
46
|
-
const endQuote = tokenizer(quoteChars, line, col, offset);
|
|
47
|
-
const attrToken = tokenizer(nameChars +
|
|
48
|
-
spacesBeforeEqualChars +
|
|
49
|
-
(equalChars ?? '') +
|
|
50
|
-
spacesAfterEqualChars +
|
|
51
|
-
(quoteChars ?? '') +
|
|
52
|
-
(valueChars ?? '') +
|
|
53
|
-
(quoteChars ?? ''), name.startLine, name.startCol, name.startOffset);
|
|
54
|
-
return {
|
|
55
|
-
type: 'html-attr',
|
|
56
|
-
uuid: uuid(),
|
|
57
|
-
raw: attrToken.raw,
|
|
58
|
-
startOffset: attrToken.startOffset,
|
|
59
|
-
endOffset: attrToken.endOffset,
|
|
60
|
-
startLine: attrToken.startLine,
|
|
61
|
-
endLine: attrToken.endLine,
|
|
62
|
-
startCol: attrToken.startCol,
|
|
63
|
-
endCol: attrToken.endCol,
|
|
64
|
-
spacesBeforeName,
|
|
65
|
-
name,
|
|
66
|
-
spacesBeforeEqual,
|
|
67
|
-
equal,
|
|
68
|
-
spacesAfterEqual,
|
|
69
|
-
startQuote,
|
|
70
|
-
value,
|
|
71
|
-
endQuote,
|
|
72
|
-
isDuplicatable: false,
|
|
73
|
-
nodeName: name.raw,
|
|
74
|
-
parentNode: null,
|
|
75
|
-
prevNode: null,
|
|
76
|
-
nextNode: null,
|
|
77
|
-
isFragment: false,
|
|
78
|
-
isGhost: false,
|
|
79
|
-
};
|
|
80
|
-
}
|
package/lib/parse-raw-tag.d.ts
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
import type { MLASTAttr, MLToken } from '@markuplint/ml-ast';
|
|
2
|
-
type TagTokens = {
|
|
3
|
-
tagName: string;
|
|
4
|
-
attrs: MLASTAttr[];
|
|
5
|
-
selfClosingSolidus: MLToken;
|
|
6
|
-
endSpace: MLToken;
|
|
7
|
-
};
|
|
8
|
-
export default function parseRawTag(raw: string, startLine: number, startCol: number, startOffset: number, offsetOffset?: number, offsetLine?: number, offsetColumn?: number): TagTokens;
|
|
9
|
-
export {};
|
package/lib/parse-raw-tag.js
DELETED
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
import { reTag, reTagName, isPotentialCustomElementName, tokenizer } from '@markuplint/parser-utils';
|
|
2
|
-
import attrTokenizer from './attr-tokenizer.js';
|
|
3
|
-
// eslint-disable-next-line no-control-regex
|
|
4
|
-
const reAttrsInStartTag = /\s*[^\x00-\x1f\x7f-\x9f "'>/=]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^\s]*))?/;
|
|
5
|
-
const reEndTokens = /(\s*\/)?(\s*)>$/;
|
|
6
|
-
export default function parseRawTag(raw, startLine, startCol, startOffset, offsetOffset = 0, offsetLine = 0, offsetColumn = 0) {
|
|
7
|
-
let offset = startOffset + offsetOffset;
|
|
8
|
-
let line = startLine + offsetLine;
|
|
9
|
-
let col = startCol + (startLine === 1 ? offsetColumn : 0);
|
|
10
|
-
const matches = raw.match(reTag);
|
|
11
|
-
const tagWithAttrs = matches?.[1];
|
|
12
|
-
if (!tagWithAttrs) {
|
|
13
|
-
throw new SyntaxError(`Invalid tag syntax: "${raw}"`);
|
|
14
|
-
}
|
|
15
|
-
// eslint-disable-next-line no-control-regex
|
|
16
|
-
const tagNameSplitted = tagWithAttrs.split(/[\u0000\u0009\u000A\u000C\u0020/>]/);
|
|
17
|
-
const tagName = tagNameSplitted[0] || tagNameSplitted[1];
|
|
18
|
-
if (!tagName || (!reTagName.test(tagName) && !isPotentialCustomElementName(tagName))) {
|
|
19
|
-
throw new SyntaxError(`Invalid tag name: "${tagName}" in <${tagWithAttrs}>`);
|
|
20
|
-
}
|
|
21
|
-
const tagStartPos = tagWithAttrs.indexOf(tagName);
|
|
22
|
-
let rawAttrs = tagWithAttrs.substring(tagStartPos + tagName.length);
|
|
23
|
-
// console.log({ raw, tagStartPos, tagName, rawAttrs });
|
|
24
|
-
col += tagName.length + 1 + tagStartPos;
|
|
25
|
-
offset += tagName.length + 1 + tagStartPos;
|
|
26
|
-
const attrs = [];
|
|
27
|
-
while (reAttrsInStartTag.test(rawAttrs)) {
|
|
28
|
-
const attrMatchedMap = rawAttrs.match(reAttrsInStartTag);
|
|
29
|
-
if (attrMatchedMap && attrMatchedMap[0]) {
|
|
30
|
-
const rawAttr = attrMatchedMap[0];
|
|
31
|
-
const attr = attrTokenizer(rawAttr, line, col, offset);
|
|
32
|
-
line = attr.endLine;
|
|
33
|
-
col = attr.endCol;
|
|
34
|
-
offset = attr.endOffset;
|
|
35
|
-
rawAttrs = rawAttrs.substr(rawAttr.length);
|
|
36
|
-
attrs.push(attr);
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
const endTokens = reEndTokens.exec(raw);
|
|
40
|
-
const selfClosingSolidus = tokenizer(endTokens?.[1] ?? '', line, col, offset);
|
|
41
|
-
line = selfClosingSolidus.endLine;
|
|
42
|
-
col = selfClosingSolidus.endCol;
|
|
43
|
-
offset = selfClosingSolidus.endOffset;
|
|
44
|
-
const endSpace = tokenizer(endTokens?.[2] ?? '', line, col, offset);
|
|
45
|
-
return {
|
|
46
|
-
tagName,
|
|
47
|
-
attrs,
|
|
48
|
-
selfClosingSolidus,
|
|
49
|
-
endSpace,
|
|
50
|
-
};
|
|
51
|
-
}
|