@markuplint/html-parser 3.9.0 → 4.0.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/attr-tokenizer.js +22 -26
- package/lib/create-tree.js +25 -31
- package/lib/get-namespace.js +5 -9
- package/lib/index.d.ts +6 -6
- package/lib/index.js +6 -18
- package/lib/is-document-fragment.js +1 -4
- package/lib/optimize-starts-head-or-body.js +3 -9
- package/lib/parse-raw-tag.js +9 -14
- package/lib/parse.js +13 -19
- package/package.json +12 -7
- package/test/attr-tokenizer.spec.js +0 -675
- package/test/get-namespace.spec.js +0 -21
- package/test/index.spec.js +0 -1246
- package/test/optimize-starts-head-or-body.spec.js +0 -25
- package/test/parse-raw-tag.spec.js +0 -483
package/lib/attr-tokenizer.js
CHANGED
|
@@ -1,62 +1,59 @@
|
|
|
1
|
-
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
const parser_utils_1 = require("@markuplint/parser-utils");
|
|
1
|
+
import { tokenizer, uuid } from '@markuplint/parser-utils';
|
|
4
2
|
const reAttrsInStartTag =
|
|
5
3
|
// eslint-disable-next-line no-control-regex
|
|
6
4
|
/(\s*)([^\x00-\x1f\x7f-\x9f "'>/=]+)(?:(\s*)(=)(\s*)(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s]*)))?/;
|
|
7
|
-
function attrTokenizer(raw, line, col, startOffset) {
|
|
8
|
-
var _a, _b, _c, _d, _e, _f, _g, _h;
|
|
5
|
+
export default function attrTokenizer(raw, line, col, startOffset) {
|
|
9
6
|
const attrMatchedMap = raw.match(reAttrsInStartTag);
|
|
10
7
|
if (!attrMatchedMap) {
|
|
11
8
|
throw new SyntaxError('Illegal attribute token');
|
|
12
9
|
}
|
|
13
|
-
const spacesBeforeAttrString =
|
|
14
|
-
const nameChars =
|
|
15
|
-
const spacesBeforeEqualChars =
|
|
16
|
-
const equalChars =
|
|
17
|
-
const spacesAfterEqualChars =
|
|
10
|
+
const spacesBeforeAttrString = attrMatchedMap[1] ?? '';
|
|
11
|
+
const nameChars = attrMatchedMap[2] ?? '';
|
|
12
|
+
const spacesBeforeEqualChars = attrMatchedMap[3] ?? '';
|
|
13
|
+
const equalChars = attrMatchedMap[4] ?? null;
|
|
14
|
+
const spacesAfterEqualChars = attrMatchedMap[5] ?? '';
|
|
18
15
|
const quoteChars = attrMatchedMap[6] != null ? '"' : attrMatchedMap[7] != null ? "'" : null;
|
|
19
|
-
const valueChars =
|
|
16
|
+
const valueChars = attrMatchedMap[6] ?? attrMatchedMap[7] ?? attrMatchedMap[8] ?? (quoteChars ? '' : null);
|
|
20
17
|
let offset = startOffset;
|
|
21
|
-
const spacesBeforeName =
|
|
18
|
+
const spacesBeforeName = tokenizer(spacesBeforeAttrString, line, col, offset);
|
|
22
19
|
line = spacesBeforeName.endLine;
|
|
23
20
|
col = spacesBeforeName.endCol;
|
|
24
21
|
offset = spacesBeforeName.endOffset;
|
|
25
|
-
const name =
|
|
22
|
+
const name = tokenizer(nameChars, line, col, offset);
|
|
26
23
|
line = name.endLine;
|
|
27
24
|
col = name.endCol;
|
|
28
25
|
offset = name.endOffset;
|
|
29
|
-
const spacesBeforeEqual =
|
|
26
|
+
const spacesBeforeEqual = tokenizer(spacesBeforeEqualChars, line, col, offset);
|
|
30
27
|
line = spacesBeforeEqual.endLine;
|
|
31
28
|
col = spacesBeforeEqual.endCol;
|
|
32
29
|
offset = spacesBeforeEqual.endOffset;
|
|
33
|
-
const equal =
|
|
30
|
+
const equal = tokenizer(equalChars, line, col, offset);
|
|
34
31
|
line = equal.endLine;
|
|
35
32
|
col = equal.endCol;
|
|
36
33
|
offset = equal.endOffset;
|
|
37
|
-
const spacesAfterEqual =
|
|
34
|
+
const spacesAfterEqual = tokenizer(spacesAfterEqualChars, line, col, offset);
|
|
38
35
|
line = spacesAfterEqual.endLine;
|
|
39
36
|
col = spacesAfterEqual.endCol;
|
|
40
37
|
offset = spacesAfterEqual.endOffset;
|
|
41
|
-
const startQuote =
|
|
38
|
+
const startQuote = tokenizer(quoteChars, line, col, offset);
|
|
42
39
|
line = startQuote.endLine;
|
|
43
40
|
col = startQuote.endCol;
|
|
44
41
|
offset = startQuote.endOffset;
|
|
45
|
-
const value =
|
|
42
|
+
const value = tokenizer(valueChars, line, col, offset);
|
|
46
43
|
line = value.endLine;
|
|
47
44
|
col = value.endCol;
|
|
48
45
|
offset = value.endOffset;
|
|
49
|
-
const endQuote =
|
|
50
|
-
const attrToken =
|
|
46
|
+
const endQuote = tokenizer(quoteChars, line, col, offset);
|
|
47
|
+
const attrToken = tokenizer(nameChars +
|
|
51
48
|
spacesBeforeEqualChars +
|
|
52
|
-
(equalChars
|
|
49
|
+
(equalChars ?? '') +
|
|
53
50
|
spacesAfterEqualChars +
|
|
54
|
-
(quoteChars
|
|
55
|
-
(valueChars
|
|
56
|
-
(quoteChars
|
|
51
|
+
(quoteChars ?? '') +
|
|
52
|
+
(valueChars ?? '') +
|
|
53
|
+
(quoteChars ?? ''), name.startLine, name.startCol, name.startOffset);
|
|
57
54
|
return {
|
|
58
55
|
type: 'html-attr',
|
|
59
|
-
uuid:
|
|
56
|
+
uuid: uuid(),
|
|
60
57
|
raw: attrToken.raw,
|
|
61
58
|
startOffset: attrToken.startOffset,
|
|
62
59
|
endOffset: attrToken.endOffset,
|
|
@@ -81,4 +78,3 @@ function attrTokenizer(raw, line, col, startOffset) {
|
|
|
81
78
|
isGhost: false,
|
|
82
79
|
};
|
|
83
80
|
}
|
|
84
|
-
exports.default = attrTokenizer;
|
package/lib/create-tree.js
CHANGED
|
@@ -1,19 +1,15 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
const parser_utils_1 = require("@markuplint/parser-utils");
|
|
6
|
-
const parse5_1 = require("parse5");
|
|
7
|
-
const parse_raw_tag_1 = tslib_1.__importDefault(require("./parse-raw-tag"));
|
|
1
|
+
// @ts-nocheck TODO: Parse5(https://github.com/inikulin/parse5) supports to expose type definitions as submodules.
|
|
2
|
+
import { detectElementType, getEndCol, getEndLine, sliceFragment, uuid } from '@markuplint/parser-utils';
|
|
3
|
+
import { parse, parseFragment } from 'parse5';
|
|
4
|
+
import parseRawTag from './parse-raw-tag.js';
|
|
8
5
|
const P5_OPTIONS = {
|
|
9
6
|
scriptingEnabled: false,
|
|
10
7
|
sourceCodeLocationInfo: true,
|
|
11
8
|
};
|
|
12
|
-
function createTree(rawCode, isFragment, offsetOffset, offsetLine, offsetColumn) {
|
|
13
|
-
const doc = isFragment ?
|
|
9
|
+
export function createTree(rawCode, isFragment, offsetOffset, offsetLine, offsetColumn) {
|
|
10
|
+
const doc = isFragment ? parseFragment(rawCode, P5_OPTIONS) : parse(rawCode, P5_OPTIONS);
|
|
14
11
|
return createTreeRecursive(doc, null, rawCode, offsetOffset, offsetLine, offsetColumn);
|
|
15
12
|
}
|
|
16
|
-
exports.createTree = createTree;
|
|
17
13
|
function createTreeRecursive(rootNode,
|
|
18
14
|
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
19
15
|
parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
|
|
@@ -41,11 +37,10 @@ function nodeize(originNode,
|
|
|
41
37
|
prevNode,
|
|
42
38
|
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
43
39
|
parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
|
|
44
|
-
var _a, _b, _c;
|
|
45
40
|
const nextNode = null;
|
|
46
41
|
const location = getLocation(originNode);
|
|
47
42
|
if (!location) {
|
|
48
|
-
const prevToken = prevNode
|
|
43
|
+
const prevToken = prevNode ?? parentNode;
|
|
49
44
|
const startOffset = prevToken ? prevToken.endOffset : 0;
|
|
50
45
|
const endOffset = prevToken ? prevToken.endOffset : 0;
|
|
51
46
|
const startLine = prevToken ? prevToken.endLine : 0;
|
|
@@ -53,7 +48,7 @@ parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
|
|
|
53
48
|
const startCol = prevToken ? prevToken.endCol : 0;
|
|
54
49
|
const endCol = prevToken ? prevToken.endCol : 0;
|
|
55
50
|
const node = {
|
|
56
|
-
uuid:
|
|
51
|
+
uuid: uuid(),
|
|
57
52
|
raw: '',
|
|
58
53
|
startOffset: startOffset + offsetOffset,
|
|
59
54
|
endOffset: endOffset + offsetOffset,
|
|
@@ -80,18 +75,18 @@ parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
|
|
|
80
75
|
return node;
|
|
81
76
|
}
|
|
82
77
|
const { startOffset, endOffset, startLine, endLine, startCol, endCol } = location;
|
|
83
|
-
const raw = rawHtml.slice(startOffset, endOffset
|
|
78
|
+
const raw = rawHtml.slice(startOffset, endOffset ?? startOffset);
|
|
84
79
|
switch (originNode.nodeName) {
|
|
85
80
|
case '#documentType': {
|
|
86
81
|
return {
|
|
87
|
-
uuid:
|
|
82
|
+
uuid: uuid(),
|
|
88
83
|
raw,
|
|
89
84
|
// @ts-ignore
|
|
90
|
-
name:
|
|
85
|
+
name: originNode.name ?? '',
|
|
91
86
|
// @ts-ignore
|
|
92
|
-
publicId:
|
|
87
|
+
publicId: originNode.publicId ?? '',
|
|
93
88
|
// @ts-ignore
|
|
94
|
-
systemId:
|
|
89
|
+
systemId: originNode.systemId ?? '',
|
|
95
90
|
startOffset: startOffset + offsetOffset,
|
|
96
91
|
endOffset: endOffset + offsetOffset,
|
|
97
92
|
startLine: startLine + offsetLine,
|
|
@@ -110,7 +105,7 @@ parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
|
|
|
110
105
|
}
|
|
111
106
|
case '#text': {
|
|
112
107
|
const node = {
|
|
113
|
-
uuid:
|
|
108
|
+
uuid: uuid(),
|
|
114
109
|
raw,
|
|
115
110
|
startOffset: startOffset + offsetOffset,
|
|
116
111
|
endOffset: endOffset + offsetOffset,
|
|
@@ -130,7 +125,7 @@ parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
|
|
|
130
125
|
}
|
|
131
126
|
case '#comment': {
|
|
132
127
|
return {
|
|
133
|
-
uuid:
|
|
128
|
+
uuid: uuid(),
|
|
134
129
|
raw,
|
|
135
130
|
startOffset: startOffset + offsetOffset,
|
|
136
131
|
endOffset: endOffset + offsetOffset,
|
|
@@ -151,8 +146,8 @@ parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
|
|
|
151
146
|
const tagLoc = 'startTag' in location ? location.startTag : null;
|
|
152
147
|
const startTagRaw = tagLoc
|
|
153
148
|
? rawHtml.slice(tagLoc.startOffset, tagLoc.endOffset)
|
|
154
|
-
: rawHtml.slice(startOffset, endOffset
|
|
155
|
-
const tagTokens = (
|
|
149
|
+
: rawHtml.slice(startOffset, endOffset ?? startOffset);
|
|
150
|
+
const tagTokens = parseRawTag(startTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
|
|
156
151
|
const tagName = tagTokens.tagName;
|
|
157
152
|
let endTag = null;
|
|
158
153
|
let endTagLoc = 'endTag' in location ? location.endTag : null;
|
|
@@ -167,16 +162,16 @@ parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
|
|
|
167
162
|
const endTagMatched = belowRawHTMLFromStartTagEnd.match(new RegExp(`^</\\s*${tagName}[^>]*>`, 'm'));
|
|
168
163
|
const endTag = endTagMatched && endTagMatched[0];
|
|
169
164
|
if (endTag) {
|
|
170
|
-
endTagLoc =
|
|
165
|
+
endTagLoc = sliceFragment(rawHtml, location.endOffset, location.endOffset + endTag.length);
|
|
171
166
|
}
|
|
172
167
|
}
|
|
173
168
|
if (endTagLoc) {
|
|
174
169
|
const { startOffset, endOffset, startLine, endLine, startCol, endCol } = endTagLoc;
|
|
175
170
|
const endTagRaw = rawHtml.slice(startOffset, endOffset);
|
|
176
|
-
const endTagTokens = (
|
|
171
|
+
const endTagTokens = parseRawTag(endTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
|
|
177
172
|
const endTagName = endTagTokens.tagName;
|
|
178
173
|
endTag = {
|
|
179
|
-
uuid:
|
|
174
|
+
uuid: uuid(),
|
|
180
175
|
raw: endTagRaw,
|
|
181
176
|
startOffset: startOffset + offsetOffset,
|
|
182
177
|
endOffset: endOffset + offsetOffset,
|
|
@@ -199,10 +194,10 @@ parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
|
|
|
199
194
|
};
|
|
200
195
|
}
|
|
201
196
|
const _endOffset = startOffset + startTagRaw.length;
|
|
202
|
-
const _endLine =
|
|
203
|
-
const _endCol =
|
|
197
|
+
const _endLine = getEndLine(startTagRaw, startLine);
|
|
198
|
+
const _endCol = getEndCol(startTagRaw, startCol);
|
|
204
199
|
const startTag = {
|
|
205
|
-
uuid:
|
|
200
|
+
uuid: uuid(),
|
|
206
201
|
raw: startTagRaw,
|
|
207
202
|
startOffset: startOffset + offsetOffset,
|
|
208
203
|
endOffset: _endOffset + offsetOffset,
|
|
@@ -213,7 +208,7 @@ parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
|
|
|
213
208
|
nodeName: tagName,
|
|
214
209
|
type: 'starttag',
|
|
215
210
|
namespace: getNamespace(originNode),
|
|
216
|
-
elementType:
|
|
211
|
+
elementType: detectElementType(tagName),
|
|
217
212
|
attributes: tagTokens.attrs,
|
|
218
213
|
hasSpreadAttr: false,
|
|
219
214
|
parentNode,
|
|
@@ -241,8 +236,7 @@ parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
|
|
|
241
236
|
* - If node has "content" property then parse as document fragment.
|
|
242
237
|
*/
|
|
243
238
|
function getChildNodes(rootNode) {
|
|
244
|
-
|
|
245
|
-
return rootNode.content ? rootNode.content.childNodes : (_a = rootNode.childNodes) !== null && _a !== void 0 ? _a : [];
|
|
239
|
+
return rootNode.content ? rootNode.content.childNodes : rootNode.childNodes ?? [];
|
|
246
240
|
}
|
|
247
241
|
function hasLocation(node) {
|
|
248
242
|
return 'sourceCodeLocation' in node;
|
package/lib/get-namespace.js
CHANGED
|
@@ -1,15 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.getNamespace = void 0;
|
|
4
|
-
const parse5_1 = require("parse5");
|
|
1
|
+
import { parse, parseFragment } from 'parse5';
|
|
5
2
|
const DEFAULT_NAMESPACE = 'http://www.w3.org/1999/xhtml';
|
|
6
|
-
function getNamespace(tagName, parentNamespace = DEFAULT_NAMESPACE) {
|
|
3
|
+
export function getNamespace(tagName, parentNamespace = DEFAULT_NAMESPACE) {
|
|
7
4
|
switch (parentNamespace) {
|
|
8
5
|
case 'http://www.w3.org/2000/svg':
|
|
9
6
|
case 'http://www.w3.org/1998/Math/MathML': {
|
|
10
7
|
const parent = parentNamespace === 'http://www.w3.org/2000/svg' ? 'svg' : 'math';
|
|
11
8
|
const tag = `<${parent}><${tagName}></${parent}>`;
|
|
12
|
-
const frag =
|
|
9
|
+
const frag = parseFragment(tag);
|
|
13
10
|
const node = frag.childNodes[0];
|
|
14
11
|
if (!node) {
|
|
15
12
|
return DEFAULT_NAMESPACE;
|
|
@@ -21,10 +18,10 @@ function getNamespace(tagName, parentNamespace = DEFAULT_NAMESPACE) {
|
|
|
21
18
|
}
|
|
22
19
|
}
|
|
23
20
|
const tag = `<${tagName}>`;
|
|
24
|
-
const frag =
|
|
21
|
+
const frag = parseFragment(tag);
|
|
25
22
|
let node = frag.childNodes[0];
|
|
26
23
|
if (!node) {
|
|
27
|
-
const doc =
|
|
24
|
+
const doc = parse(tag);
|
|
28
25
|
node = doc.childNodes[0];
|
|
29
26
|
}
|
|
30
27
|
if (node && 'namespaceURI' in node) {
|
|
@@ -32,4 +29,3 @@ function getNamespace(tagName, parentNamespace = DEFAULT_NAMESPACE) {
|
|
|
32
29
|
}
|
|
33
30
|
return DEFAULT_NAMESPACE;
|
|
34
31
|
}
|
|
35
|
-
exports.getNamespace = getNamespace;
|
package/lib/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
export { default as attrTokenizer } from './attr-tokenizer';
|
|
2
|
-
export { default as isDocumentFragment } from './is-document-fragment';
|
|
3
|
-
export { default as parseRawTag } from './parse-raw-tag';
|
|
4
|
-
export { getNamespace } from './get-namespace';
|
|
5
|
-
export { parse } from './parse';
|
|
6
|
-
export { createTree } from './create-tree';
|
|
1
|
+
export { default as attrTokenizer } from './attr-tokenizer.js';
|
|
2
|
+
export { default as isDocumentFragment } from './is-document-fragment.js';
|
|
3
|
+
export { default as parseRawTag } from './parse-raw-tag.js';
|
|
4
|
+
export { getNamespace } from './get-namespace.js';
|
|
5
|
+
export { parse } from './parse.js';
|
|
6
|
+
export { createTree } from './create-tree.js';
|
package/lib/index.js
CHANGED
|
@@ -1,18 +1,6 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
};
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
var attr_tokenizer_1 = require("./attr-tokenizer");
|
|
8
|
-
Object.defineProperty(exports, "attrTokenizer", { enumerable: true, get: function () { return __importDefault(attr_tokenizer_1).default; } });
|
|
9
|
-
var is_document_fragment_1 = require("./is-document-fragment");
|
|
10
|
-
Object.defineProperty(exports, "isDocumentFragment", { enumerable: true, get: function () { return __importDefault(is_document_fragment_1).default; } });
|
|
11
|
-
var parse_raw_tag_1 = require("./parse-raw-tag");
|
|
12
|
-
Object.defineProperty(exports, "parseRawTag", { enumerable: true, get: function () { return __importDefault(parse_raw_tag_1).default; } });
|
|
13
|
-
var get_namespace_1 = require("./get-namespace");
|
|
14
|
-
Object.defineProperty(exports, "getNamespace", { enumerable: true, get: function () { return get_namespace_1.getNamespace; } });
|
|
15
|
-
var parse_1 = require("./parse");
|
|
16
|
-
Object.defineProperty(exports, "parse", { enumerable: true, get: function () { return parse_1.parse; } });
|
|
17
|
-
var create_tree_1 = require("./create-tree");
|
|
18
|
-
Object.defineProperty(exports, "createTree", { enumerable: true, get: function () { return create_tree_1.createTree; } });
|
|
1
|
+
export { default as attrTokenizer } from './attr-tokenizer.js';
|
|
2
|
+
export { default as isDocumentFragment } from './is-document-fragment.js';
|
|
3
|
+
export { default as parseRawTag } from './parse-raw-tag.js';
|
|
4
|
+
export { getNamespace } from './get-namespace.js';
|
|
5
|
+
export { parse } from './parse.js';
|
|
6
|
+
export { createTree } from './create-tree.js';
|
|
@@ -1,6 +1,3 @@
|
|
|
1
|
-
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
function isDocumentFragment(html) {
|
|
1
|
+
export default function isDocumentFragment(html) {
|
|
4
2
|
return !/^\s*(<!doctype html(?:\s*.+)?>|<html(?:\s|>))/im.test(html);
|
|
5
3
|
}
|
|
6
|
-
exports.default = isDocumentFragment;
|
|
@@ -1,12 +1,8 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.optimizeStartsHeadTagOrBodyTagResume = exports.optimizeStartsHeadTagOrBodyTagSetup = exports.isStartsHeadTagOrBodyTag = void 0;
|
|
4
1
|
const UNDUPLICATED_CHAR = '\uFFFD';
|
|
5
|
-
function isStartsHeadTagOrBodyTag(rawCode) {
|
|
2
|
+
export function isStartsHeadTagOrBodyTag(rawCode) {
|
|
6
3
|
return /^\s*<(?:head|body)>/i.test(rawCode);
|
|
7
4
|
}
|
|
8
|
-
|
|
9
|
-
function optimizeStartsHeadTagOrBodyTagSetup(rawCode) {
|
|
5
|
+
export function optimizeStartsHeadTagOrBodyTagSetup(rawCode) {
|
|
10
6
|
const heads = [];
|
|
11
7
|
const bodies = [];
|
|
12
8
|
const code = rawCode.replace(
|
|
@@ -33,8 +29,7 @@ function optimizeStartsHeadTagOrBodyTagSetup(rawCode) {
|
|
|
33
29
|
bodies,
|
|
34
30
|
};
|
|
35
31
|
}
|
|
36
|
-
|
|
37
|
-
function optimizeStartsHeadTagOrBodyTagResume(
|
|
32
|
+
export function optimizeStartsHeadTagOrBodyTagResume(
|
|
38
33
|
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
39
34
|
nodeList,
|
|
40
35
|
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
@@ -54,4 +49,3 @@ replacements) {
|
|
|
54
49
|
}
|
|
55
50
|
});
|
|
56
51
|
}
|
|
57
|
-
exports.optimizeStartsHeadTagOrBodyTagResume = optimizeStartsHeadTagOrBodyTagResume;
|
package/lib/parse-raw-tag.js
CHANGED
|
@@ -1,25 +1,21 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
const tslib_1 = require("tslib");
|
|
4
|
-
const parser_utils_1 = require("@markuplint/parser-utils");
|
|
5
|
-
const attr_tokenizer_1 = tslib_1.__importDefault(require("./attr-tokenizer"));
|
|
1
|
+
import { reTag, reTagName, isPotentialCustomElementName, tokenizer } from '@markuplint/parser-utils';
|
|
2
|
+
import attrTokenizer from './attr-tokenizer.js';
|
|
6
3
|
// eslint-disable-next-line no-control-regex
|
|
7
4
|
const reAttrsInStartTag = /\s*[^\x00-\x1f\x7f-\x9f "'>/=]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^\s]*))?/;
|
|
8
5
|
const reEndTokens = /(\s*\/)?(\s*)>$/;
|
|
9
|
-
function parseRawTag(raw, startLine, startCol, startOffset, offsetOffset = 0, offsetLine = 0, offsetColumn = 0) {
|
|
10
|
-
var _a, _b;
|
|
6
|
+
export default function parseRawTag(raw, startLine, startCol, startOffset, offsetOffset = 0, offsetLine = 0, offsetColumn = 0) {
|
|
11
7
|
let offset = startOffset + offsetOffset;
|
|
12
8
|
let line = startLine + offsetLine;
|
|
13
9
|
let col = startCol + (startLine === 1 ? offsetColumn : 0);
|
|
14
|
-
const matches = raw.match(
|
|
15
|
-
const tagWithAttrs = matches
|
|
10
|
+
const matches = raw.match(reTag);
|
|
11
|
+
const tagWithAttrs = matches?.[1];
|
|
16
12
|
if (!tagWithAttrs) {
|
|
17
13
|
throw new SyntaxError(`Invalid tag syntax: "${raw}"`);
|
|
18
14
|
}
|
|
19
15
|
// eslint-disable-next-line no-control-regex
|
|
20
16
|
const tagNameSplitted = tagWithAttrs.split(/[\u0000\u0009\u000A\u000C\u0020/>]/);
|
|
21
17
|
const tagName = tagNameSplitted[0] || tagNameSplitted[1];
|
|
22
|
-
if (!tagName || (!
|
|
18
|
+
if (!tagName || (!reTagName.test(tagName) && !isPotentialCustomElementName(tagName))) {
|
|
23
19
|
throw new SyntaxError(`Invalid tag name: "${tagName}" in <${tagWithAttrs}>`);
|
|
24
20
|
}
|
|
25
21
|
const tagStartPos = tagWithAttrs.indexOf(tagName);
|
|
@@ -32,7 +28,7 @@ function parseRawTag(raw, startLine, startCol, startOffset, offsetOffset = 0, of
|
|
|
32
28
|
const attrMatchedMap = rawAttrs.match(reAttrsInStartTag);
|
|
33
29
|
if (attrMatchedMap && attrMatchedMap[0]) {
|
|
34
30
|
const rawAttr = attrMatchedMap[0];
|
|
35
|
-
const attr = (
|
|
31
|
+
const attr = attrTokenizer(rawAttr, line, col, offset);
|
|
36
32
|
line = attr.endLine;
|
|
37
33
|
col = attr.endCol;
|
|
38
34
|
offset = attr.endOffset;
|
|
@@ -41,11 +37,11 @@ function parseRawTag(raw, startLine, startCol, startOffset, offsetOffset = 0, of
|
|
|
41
37
|
}
|
|
42
38
|
}
|
|
43
39
|
const endTokens = reEndTokens.exec(raw);
|
|
44
|
-
const selfClosingSolidus =
|
|
40
|
+
const selfClosingSolidus = tokenizer(endTokens?.[1] ?? '', line, col, offset);
|
|
45
41
|
line = selfClosingSolidus.endLine;
|
|
46
42
|
col = selfClosingSolidus.endCol;
|
|
47
43
|
offset = selfClosingSolidus.endOffset;
|
|
48
|
-
const endSpace =
|
|
44
|
+
const endSpace = tokenizer(endTokens?.[2] ?? '', line, col, offset);
|
|
49
45
|
return {
|
|
50
46
|
tagName,
|
|
51
47
|
attrs,
|
|
@@ -53,4 +49,3 @@ function parseRawTag(raw, startLine, startCol, startOffset, offsetOffset = 0, of
|
|
|
53
49
|
endSpace,
|
|
54
50
|
};
|
|
55
51
|
}
|
|
56
|
-
exports.default = parseRawTag;
|
package/lib/parse.js
CHANGED
|
@@ -1,29 +1,23 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
const
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
const optimize_starts_head_or_body_1 = require("./optimize-starts-head-or-body");
|
|
9
|
-
const parse = (rawCode, options) => {
|
|
10
|
-
var _a, _b, _c;
|
|
11
|
-
if (options === null || options === void 0 ? void 0 : options.ignoreFrontMatter) {
|
|
12
|
-
rawCode = (0, parser_utils_1.ignoreFrontMatter)(rawCode);
|
|
1
|
+
import { ignoreFrontMatter, flattenNodes } from '@markuplint/parser-utils';
|
|
2
|
+
import { createTree } from './create-tree.js';
|
|
3
|
+
import isDocumentFragment from './is-document-fragment.js';
|
|
4
|
+
import { isStartsHeadTagOrBodyTag, optimizeStartsHeadTagOrBodyTagResume, optimizeStartsHeadTagOrBodyTagSetup, } from './optimize-starts-head-or-body.js';
|
|
5
|
+
export const parse = (rawCode, options) => {
|
|
6
|
+
if (options?.ignoreFrontMatter) {
|
|
7
|
+
rawCode = ignoreFrontMatter(rawCode);
|
|
13
8
|
}
|
|
14
|
-
const isFragment = (
|
|
15
|
-
const data =
|
|
16
|
-
if (data
|
|
9
|
+
const isFragment = isDocumentFragment(rawCode);
|
|
10
|
+
const data = isStartsHeadTagOrBodyTag(rawCode) ? optimizeStartsHeadTagOrBodyTagSetup(rawCode) : null;
|
|
11
|
+
if (data?.code) {
|
|
17
12
|
rawCode = data.code;
|
|
18
13
|
}
|
|
19
|
-
const nodeTree =
|
|
20
|
-
const nodeList =
|
|
14
|
+
const nodeTree = createTree(rawCode, isFragment, options?.offsetOffset ?? 0, options?.offsetLine ?? 0, options?.offsetColumn ?? 0);
|
|
15
|
+
const nodeList = flattenNodes(nodeTree, rawCode);
|
|
21
16
|
if (data) {
|
|
22
|
-
|
|
17
|
+
optimizeStartsHeadTagOrBodyTagResume(nodeList, data);
|
|
23
18
|
}
|
|
24
19
|
return {
|
|
25
20
|
nodeList,
|
|
26
21
|
isFragment,
|
|
27
22
|
};
|
|
28
23
|
};
|
|
29
|
-
exports.parse = parse;
|
package/package.json
CHANGED
|
@@ -1,12 +1,17 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@markuplint/html-parser",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "4.0.0-alpha.1",
|
|
4
4
|
"description": "HTML parser for markuplint",
|
|
5
5
|
"repository": "git@github.com:markuplint/markuplint.git",
|
|
6
6
|
"author": "Yusuke Hirao <yusukehirao@me.com>",
|
|
7
7
|
"license": "MIT",
|
|
8
8
|
"private": false,
|
|
9
|
-
"
|
|
9
|
+
"type": "module",
|
|
10
|
+
"exports": {
|
|
11
|
+
".": {
|
|
12
|
+
"import": "./lib/index.js"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
10
15
|
"types": "lib/index.d.ts",
|
|
11
16
|
"publishConfig": {
|
|
12
17
|
"access": "public"
|
|
@@ -20,11 +25,11 @@
|
|
|
20
25
|
"clean": "tsc --build --clean"
|
|
21
26
|
},
|
|
22
27
|
"dependencies": {
|
|
23
|
-
"@markuplint/ml-ast": "
|
|
24
|
-
"@markuplint/parser-utils": "
|
|
28
|
+
"@markuplint/ml-ast": "4.0.0-alpha.1",
|
|
29
|
+
"@markuplint/parser-utils": "4.0.0-alpha.1",
|
|
25
30
|
"parse5": "7.1.2",
|
|
26
|
-
"tslib": "^2.
|
|
27
|
-
"type-fest": "^
|
|
31
|
+
"tslib": "^2.6.1",
|
|
32
|
+
"type-fest": "^4.1.0"
|
|
28
33
|
},
|
|
29
|
-
"gitHead": "
|
|
34
|
+
"gitHead": "22502ee22a378ae766033d687dbc0443e5ed35dc"
|
|
30
35
|
}
|