@markuplint/html-parser 3.13.0 → 4.0.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,261 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.createTree = void 0;
4
- const tslib_1 = require("tslib");
5
- const parser_utils_1 = require("@markuplint/parser-utils");
6
- const parse5_1 = require("parse5");
7
- const parse_raw_tag_1 = tslib_1.__importDefault(require("./parse-raw-tag"));
8
- const P5_OPTIONS = {
9
- scriptingEnabled: false,
10
- sourceCodeLocationInfo: true,
11
- };
12
- function createTree(rawCode, isFragment, offsetOffset, offsetLine, offsetColumn) {
13
- const doc = isFragment ? (0, parse5_1.parseFragment)(rawCode, P5_OPTIONS) : (0, parse5_1.parse)(rawCode, P5_OPTIONS);
14
- return createTreeRecursive(doc, null, rawCode, offsetOffset, offsetLine, offsetColumn);
15
- }
16
- exports.createTree = createTree;
17
- function createTreeRecursive(rootNode,
18
- // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
19
- parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
20
- const nodeList = [];
21
- const childNodes = getChildNodes(rootNode);
22
- let prevNode = null;
23
- for (const p5node of childNodes) {
24
- const node = nodeize(p5node, prevNode, parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn);
25
- if (!node) {
26
- continue;
27
- }
28
- if (prevNode) {
29
- if (node.type !== 'endtag') {
30
- prevNode.nextNode = node;
31
- }
32
- node.prevNode = prevNode;
33
- }
34
- prevNode = node;
35
- nodeList.push(node);
36
- }
37
- return nodeList;
38
- }
39
- function nodeize(originNode,
40
- // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
41
- prevNode,
42
- // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
43
- parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
44
- var _a, _b, _c;
45
- const nextNode = null;
46
- const location = getLocation(originNode);
47
- if (!location) {
48
- const prevToken = prevNode !== null && prevNode !== void 0 ? prevNode : parentNode;
49
- const startOffset = prevToken ? prevToken.endOffset : 0;
50
- const endOffset = prevToken ? prevToken.endOffset : 0;
51
- const startLine = prevToken ? prevToken.endLine : 0;
52
- const endLine = prevToken ? prevToken.endLine : 0;
53
- const startCol = prevToken ? prevToken.endCol : 0;
54
- const endCol = prevToken ? prevToken.endCol : 0;
55
- const node = {
56
- uuid: (0, parser_utils_1.uuid)(),
57
- raw: '',
58
- startOffset: startOffset + offsetOffset,
59
- endOffset: endOffset + offsetOffset,
60
- startLine: startLine + offsetLine,
61
- endLine: endLine + offsetLine,
62
- startCol: startCol + (startLine === 1 ? offsetColumn : 0),
63
- endCol: endCol + (endLine === 1 ? offsetColumn : 0),
64
- nodeName: originNode.nodeName,
65
- type: 'starttag',
66
- namespace: getNamespace(originNode),
67
- elementType: 'html',
68
- attributes: [],
69
- hasSpreadAttr: false,
70
- pearNode: null,
71
- tagCloseChar: '',
72
- tagOpenChar: '',
73
- parentNode,
74
- prevNode,
75
- nextNode,
76
- isFragment: false,
77
- isGhost: true,
78
- };
79
- node.childNodes = createTreeRecursive(originNode, node, rawHtml, offsetOffset, offsetLine, offsetColumn);
80
- return node;
81
- }
82
- const { startOffset, endOffset, startLine, endLine, startCol, endCol } = location;
83
- const raw = rawHtml.slice(startOffset, endOffset !== null && endOffset !== void 0 ? endOffset : startOffset);
84
- switch (originNode.nodeName) {
85
- case '#documentType': {
86
- return {
87
- uuid: (0, parser_utils_1.uuid)(),
88
- raw,
89
- // @ts-ignore
90
- name: (_a = originNode.name) !== null && _a !== void 0 ? _a : '',
91
- // @ts-ignore
92
- publicId: (_b = originNode.publicId) !== null && _b !== void 0 ? _b : '',
93
- // @ts-ignore
94
- systemId: (_c = originNode.systemId) !== null && _c !== void 0 ? _c : '',
95
- startOffset: startOffset + offsetOffset,
96
- endOffset: endOffset + offsetOffset,
97
- startLine: startLine + offsetLine,
98
- endLine: endLine + offsetLine,
99
- startCol: startCol + (startLine === 1 ? offsetColumn : 0),
100
- endCol: endCol + (endLine === 1 ? offsetColumn : 0),
101
- nodeName: '#doctype',
102
- type: 'doctype',
103
- parentNode,
104
- prevNode,
105
- _addPrevNode: 102,
106
- nextNode,
107
- isFragment: false,
108
- isGhost: false,
109
- };
110
- }
111
- case '#text': {
112
- const node = {
113
- uuid: (0, parser_utils_1.uuid)(),
114
- raw,
115
- startOffset: startOffset + offsetOffset,
116
- endOffset: endOffset + offsetOffset,
117
- startLine: startLine + offsetLine,
118
- endLine: endLine + offsetLine,
119
- startCol: startCol + (startLine === 1 ? offsetColumn : 0),
120
- endCol: endCol + (endLine === 1 ? offsetColumn : 0),
121
- nodeName: '#text',
122
- type: 'text',
123
- parentNode,
124
- prevNode,
125
- nextNode,
126
- isFragment: false,
127
- isGhost: false,
128
- };
129
- return node;
130
- }
131
- case '#comment': {
132
- return {
133
- uuid: (0, parser_utils_1.uuid)(),
134
- raw,
135
- startOffset: startOffset + offsetOffset,
136
- endOffset: endOffset + offsetOffset,
137
- startLine: startLine + offsetLine,
138
- endLine: endLine + offsetLine,
139
- startCol: startCol + (startLine === 1 ? offsetColumn : 0),
140
- endCol: endCol + (endLine === 1 ? offsetColumn : 0),
141
- nodeName: '#comment',
142
- type: 'comment',
143
- parentNode,
144
- prevNode,
145
- nextNode,
146
- isFragment: false,
147
- isGhost: false,
148
- };
149
- }
150
- default: {
151
- const tagLoc = 'startTag' in location ? location.startTag : null;
152
- const startTagRaw = tagLoc
153
- ? rawHtml.slice(tagLoc.startOffset, tagLoc.endOffset)
154
- : rawHtml.slice(startOffset, endOffset !== null && endOffset !== void 0 ? endOffset : startOffset);
155
- const tagTokens = (0, parse_raw_tag_1.default)(startTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
156
- const tagName = tagTokens.tagName;
157
- let endTag = null;
158
- let endTagLoc = 'endTag' in location ? location.endTag : null;
159
- /**
160
- * Patch: Create endTag for SVG Element
161
- * @see https://github.com/inikulin/parse5/issues/352
162
- */
163
- if (!endTagLoc &&
164
- 'namespaceURI' in originNode &&
165
- originNode.namespaceURI === 'http://www.w3.org/2000/svg') {
166
- const belowRawHTMLFromStartTagEnd = rawHtml.slice(location.endOffset);
167
- const endTagMatched = belowRawHTMLFromStartTagEnd.match(new RegExp(`^</\\s*${tagName}[^>]*>`, 'm'));
168
- const endTag = endTagMatched && endTagMatched[0];
169
- if (endTag) {
170
- endTagLoc = (0, parser_utils_1.sliceFragment)(rawHtml, location.endOffset, location.endOffset + endTag.length);
171
- }
172
- }
173
- if (endTagLoc) {
174
- const { startOffset, endOffset, startLine, endLine, startCol, endCol } = endTagLoc;
175
- const endTagRaw = rawHtml.slice(startOffset, endOffset);
176
- const endTagTokens = (0, parse_raw_tag_1.default)(endTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
177
- const endTagName = endTagTokens.tagName;
178
- endTag = {
179
- uuid: (0, parser_utils_1.uuid)(),
180
- raw: endTagRaw,
181
- startOffset: startOffset + offsetOffset,
182
- endOffset: endOffset + offsetOffset,
183
- startLine: startLine + offsetLine,
184
- endLine: endLine + offsetLine,
185
- startCol: startCol + (startLine === 1 ? offsetColumn : 0),
186
- endCol: endCol + (endLine === 1 ? offsetColumn : 0),
187
- nodeName: endTagName,
188
- type: 'endtag',
189
- namespace: getNamespace(originNode),
190
- attributes: endTagTokens.attrs,
191
- parentNode,
192
- prevNode,
193
- nextNode,
194
- pearNode: null,
195
- isFragment: false,
196
- isGhost: false,
197
- tagOpenChar: '</',
198
- tagCloseChar: '>',
199
- };
200
- }
201
- const _endOffset = startOffset + startTagRaw.length;
202
- const _endLine = (0, parser_utils_1.getEndLine)(startTagRaw, startLine);
203
- const _endCol = (0, parser_utils_1.getEndCol)(startTagRaw, startCol);
204
- const startTag = {
205
- uuid: (0, parser_utils_1.uuid)(),
206
- raw: startTagRaw,
207
- startOffset: startOffset + offsetOffset,
208
- endOffset: _endOffset + offsetOffset,
209
- startLine: startLine + offsetLine,
210
- endLine: _endLine + offsetLine,
211
- startCol: startCol + (startLine === 1 ? offsetColumn : 0),
212
- endCol: _endCol + (startLine === _endLine ? offsetColumn : 0),
213
- nodeName: tagName,
214
- type: 'starttag',
215
- namespace: getNamespace(originNode),
216
- elementType: (0, parser_utils_1.detectElementType)(tagName),
217
- attributes: tagTokens.attrs,
218
- hasSpreadAttr: false,
219
- parentNode,
220
- prevNode,
221
- nextNode,
222
- pearNode: endTag,
223
- selfClosingSolidus: tagTokens.selfClosingSolidus,
224
- endSpace: tagTokens.endSpace,
225
- isFragment: false,
226
- isGhost: false,
227
- tagOpenChar: '<',
228
- tagCloseChar: '>',
229
- };
230
- if (endTag) {
231
- endTag.pearNode = startTag;
232
- }
233
- startTag.childNodes = createTreeRecursive(originNode, startTag, rawHtml, offsetOffset, offsetLine, offsetColumn);
234
- return startTag;
235
- }
236
- }
237
- }
238
- /**
239
- * getChildNodes
240
- *
241
- * - If node has "content" property then parse as document fragment.
242
- */
243
- function getChildNodes(rootNode) {
244
- var _a;
245
- return rootNode.content ? rootNode.content.childNodes : (_a = rootNode.childNodes) !== null && _a !== void 0 ? _a : [];
246
- }
247
- function hasLocation(node) {
248
- return 'sourceCodeLocation' in node;
249
- }
250
- function getLocation(node) {
251
- if (hasLocation(node) && node.sourceCodeLocation) {
252
- return node.sourceCodeLocation;
253
- }
254
- return null;
255
- }
256
- function getNamespace(node) {
257
- if ('namespaceURI' in node) {
258
- return node.namespaceURI;
259
- }
260
- return '';
261
- }
@@ -1,17 +0,0 @@
1
- import type { MLASTAttr, MLToken } from '@markuplint/ml-ast';
2
- type TagTokens = {
3
- tagName: string;
4
- attrs: MLASTAttr[];
5
- selfClosingSolidus: MLToken;
6
- endSpace: MLToken;
7
- };
8
- export default function parseRawTag(
9
- raw: string,
10
- startLine: number,
11
- startCol: number,
12
- startOffset: number,
13
- offsetOffset?: number,
14
- offsetLine?: number,
15
- offsetColumn?: number,
16
- ): TagTokens;
17
- export {};
@@ -1,56 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- const tslib_1 = require("tslib");
4
- const parser_utils_1 = require("@markuplint/parser-utils");
5
- const attr_tokenizer_1 = tslib_1.__importDefault(require("./attr-tokenizer"));
6
- // eslint-disable-next-line no-control-regex
7
- const reAttrsInStartTag = /\s*[^\x00-\x1f\x7f-\x9f "'>/=]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^\s]*))?/;
8
- const reEndTokens = /(\s*\/)?(\s*)>$/;
9
- function parseRawTag(raw, startLine, startCol, startOffset, offsetOffset = 0, offsetLine = 0, offsetColumn = 0) {
10
- var _a, _b;
11
- let offset = startOffset + offsetOffset;
12
- let line = startLine + offsetLine;
13
- let col = startCol + (startLine === 1 ? offsetColumn : 0);
14
- const matches = raw.match(parser_utils_1.reTag);
15
- const tagWithAttrs = matches === null || matches === void 0 ? void 0 : matches[1];
16
- if (!tagWithAttrs) {
17
- throw new SyntaxError(`Invalid tag syntax: "${raw}"`);
18
- }
19
- // eslint-disable-next-line no-control-regex
20
- const tagNameSplitted = tagWithAttrs.split(/[\u0000\u0009\u000A\u000C\u0020/>]/);
21
- const tagName = tagNameSplitted[0] || tagNameSplitted[1];
22
- if (!tagName || (!parser_utils_1.reTagName.test(tagName) && !(0, parser_utils_1.isPotentialCustomElementName)(tagName))) {
23
- throw new SyntaxError(`Invalid tag name: "${tagName}" in <${tagWithAttrs}>`);
24
- }
25
- const tagStartPos = tagWithAttrs.indexOf(tagName);
26
- let rawAttrs = tagWithAttrs.substring(tagStartPos + tagName.length);
27
- // console.log({ raw, tagStartPos, tagName, rawAttrs });
28
- col += tagName.length + 1 + tagStartPos;
29
- offset += tagName.length + 1 + tagStartPos;
30
- const attrs = [];
31
- while (reAttrsInStartTag.test(rawAttrs)) {
32
- const attrMatchedMap = rawAttrs.match(reAttrsInStartTag);
33
- if (attrMatchedMap && attrMatchedMap[0]) {
34
- const rawAttr = attrMatchedMap[0];
35
- const attr = (0, attr_tokenizer_1.default)(rawAttr, line, col, offset);
36
- line = attr.endLine;
37
- col = attr.endCol;
38
- offset = attr.endOffset;
39
- rawAttrs = rawAttrs.substr(rawAttr.length);
40
- attrs.push(attr);
41
- }
42
- }
43
- const endTokens = reEndTokens.exec(raw);
44
- const selfClosingSolidus = (0, parser_utils_1.tokenizer)((_a = endTokens === null || endTokens === void 0 ? void 0 : endTokens[1]) !== null && _a !== void 0 ? _a : '', line, col, offset);
45
- line = selfClosingSolidus.endLine;
46
- col = selfClosingSolidus.endCol;
47
- offset = selfClosingSolidus.endOffset;
48
- const endSpace = (0, parser_utils_1.tokenizer)((_b = endTokens === null || endTokens === void 0 ? void 0 : endTokens[2]) !== null && _b !== void 0 ? _b : '', line, col, offset);
49
- return {
50
- tagName,
51
- attrs,
52
- selfClosingSolidus,
53
- endSpace,
54
- };
55
- }
56
- exports.default = parseRawTag;
package/lib/parse.d.ts DELETED
@@ -1,2 +0,0 @@
1
- import type { Parse } from '@markuplint/ml-ast';
2
- export declare const parse: Parse;
package/lib/parse.js DELETED
@@ -1,29 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.parse = void 0;
4
- const tslib_1 = require("tslib");
5
- const parser_utils_1 = require("@markuplint/parser-utils");
6
- const create_tree_1 = require("./create-tree");
7
- const is_document_fragment_1 = tslib_1.__importDefault(require("./is-document-fragment"));
8
- const optimize_starts_head_or_body_1 = require("./optimize-starts-head-or-body");
9
- const parse = (rawCode, options) => {
10
- var _a, _b, _c;
11
- if (options === null || options === void 0 ? void 0 : options.ignoreFrontMatter) {
12
- rawCode = (0, parser_utils_1.ignoreFrontMatter)(rawCode);
13
- }
14
- const isFragment = (0, is_document_fragment_1.default)(rawCode);
15
- const data = (0, optimize_starts_head_or_body_1.isStartsHeadTagOrBodyTag)(rawCode) ? (0, optimize_starts_head_or_body_1.optimizeStartsHeadTagOrBodyTagSetup)(rawCode) : null;
16
- if (data === null || data === void 0 ? void 0 : data.code) {
17
- rawCode = data.code;
18
- }
19
- const nodeTree = (0, create_tree_1.createTree)(rawCode, isFragment, (_a = options === null || options === void 0 ? void 0 : options.offsetOffset) !== null && _a !== void 0 ? _a : 0, (_b = options === null || options === void 0 ? void 0 : options.offsetLine) !== null && _b !== void 0 ? _b : 0, (_c = options === null || options === void 0 ? void 0 : options.offsetColumn) !== null && _c !== void 0 ? _c : 0);
20
- const nodeList = (0, parser_utils_1.flattenNodes)(nodeTree, rawCode);
21
- if (data) {
22
- (0, optimize_starts_head_or_body_1.optimizeStartsHeadTagOrBodyTagResume)(nodeList, data);
23
- }
24
- return {
25
- nodeList,
26
- isFragment,
27
- };
28
- };
29
- exports.parse = parse;