@markuplint/html-parser 4.0.0-dev.10 → 4.0.0-dev.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/lib/index.d.ts +1 -3
- package/lib/index.js +1 -3
- package/lib/optimize-starts-head-or-body.d.ts +9 -7
- package/lib/optimize-starts-head-or-body.js +26 -8
- package/lib/parser.d.ts +33 -0
- package/lib/parser.js +162 -0
- package/lib/types.d.ts +9 -0
- package/lib/types.js +1 -0
- package/package.json +5 -5
- package/lib/create-tree.d.ts +0 -2
- package/lib/create-tree.js +0 -254
- package/lib/parse.d.ts +0 -2
- package/lib/parse.js +0 -23
package/LICENSE
CHANGED
package/lib/index.d.ts
CHANGED
package/lib/index.js
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
|
|
3
|
-
export
|
|
4
|
-
code: string;
|
|
5
|
-
heads: string[];
|
|
6
|
-
bodies: string[];
|
|
1
|
+
import type { HtmlParser } from './parser.js';
|
|
2
|
+
import type { MLASTNodeTreeItem } from '@markuplint/ml-ast';
|
|
3
|
+
export type Replacements = {
|
|
4
|
+
readonly code: string;
|
|
5
|
+
readonly heads: readonly string[];
|
|
6
|
+
readonly bodies: readonly string[];
|
|
7
7
|
};
|
|
8
|
-
export declare function
|
|
8
|
+
export declare function isStartsHeadTagOrBodyTag(rawCode: string): boolean;
|
|
9
|
+
export declare function optimizeStartsHeadTagOrBodyTagSetup(rawCode: string): Replacements;
|
|
10
|
+
export declare function optimizeStartsHeadTagOrBodyTagResume(parser: HtmlParser, nodeList: readonly MLASTNodeTreeItem[], replacements: Replacements): readonly MLASTNodeTreeItem[];
|
|
@@ -1,8 +1,15 @@
|
|
|
1
1
|
const UNDUPLICATED_CHAR = '\uFFFD';
|
|
2
2
|
export function isStartsHeadTagOrBodyTag(rawCode) {
|
|
3
|
-
return /^\s*<(?:head|body)
|
|
3
|
+
return /^\s*<(?:head|body)[\s>]/i.test(rawCode);
|
|
4
4
|
}
|
|
5
5
|
export function optimizeStartsHeadTagOrBodyTagSetup(rawCode) {
|
|
6
|
+
if (!isStartsHeadTagOrBodyTag(rawCode)) {
|
|
7
|
+
return {
|
|
8
|
+
code: rawCode,
|
|
9
|
+
heads: [],
|
|
10
|
+
bodies: [],
|
|
11
|
+
};
|
|
12
|
+
}
|
|
6
13
|
const heads = [];
|
|
7
14
|
const bodies = [];
|
|
8
15
|
const code = rawCode.replaceAll(/(?<=<\/?)(?:head|body)(?=[\0\t\n\f />])/gi, tag => {
|
|
@@ -29,21 +36,32 @@ export function optimizeStartsHeadTagOrBodyTagSetup(rawCode) {
|
|
|
29
36
|
}
|
|
30
37
|
export function optimizeStartsHeadTagOrBodyTagResume(
|
|
31
38
|
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
32
|
-
nodeList,
|
|
33
|
-
|
|
34
|
-
replacements
|
|
39
|
+
parser, nodeList, replacements) {
|
|
40
|
+
const heads = [...replacements.heads];
|
|
41
|
+
const bodies = [...replacements.bodies];
|
|
35
42
|
for (const node of nodeList) {
|
|
36
43
|
if (!node.nodeName.startsWith(`x-${UNDUPLICATED_CHAR}`)) {
|
|
37
44
|
continue;
|
|
38
45
|
}
|
|
39
|
-
const realName = node.nodeName === `x-${UNDUPLICATED_CHAR}h` ?
|
|
46
|
+
const realName = node.nodeName === `x-${UNDUPLICATED_CHAR}h` ? heads.shift() : bodies.shift();
|
|
40
47
|
if (!realName) {
|
|
41
48
|
continue;
|
|
42
49
|
}
|
|
43
|
-
node.
|
|
44
|
-
|
|
50
|
+
if (node.type !== 'starttag' && node.type !== 'endtag') {
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
53
|
+
parser.updateRaw(node, node.raw.replace(node.nodeName, realName));
|
|
45
54
|
if (node.type === 'starttag') {
|
|
46
|
-
node
|
|
55
|
+
parser.updateElement(node, {
|
|
56
|
+
nodeName: realName,
|
|
57
|
+
elementType: 'html',
|
|
58
|
+
});
|
|
59
|
+
continue;
|
|
47
60
|
}
|
|
61
|
+
parser.updateElement(node, {
|
|
62
|
+
nodeName: realName,
|
|
63
|
+
});
|
|
64
|
+
continue;
|
|
48
65
|
}
|
|
66
|
+
return nodeList;
|
|
49
67
|
}
|
package/lib/parser.d.ts
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { Replacements } from './optimize-starts-head-or-body.js';
|
|
2
|
+
import type { Node } from './types.js';
|
|
3
|
+
import type { MLASTNodeTreeItem, MLASTParentNode } from '@markuplint/ml-ast';
|
|
4
|
+
import type { ChildToken, ParseOptions, ParserOptions } from '@markuplint/parser-utils';
|
|
5
|
+
import { Parser } from '@markuplint/parser-utils';
|
|
6
|
+
type State = {
|
|
7
|
+
startsHeadTagOrBodyTag: Replacements | null;
|
|
8
|
+
afterPosition: {
|
|
9
|
+
endOffset: number;
|
|
10
|
+
endLine: number;
|
|
11
|
+
endCol: number;
|
|
12
|
+
depth: number;
|
|
13
|
+
};
|
|
14
|
+
};
|
|
15
|
+
type ExtendsOptions = Pick<ParserOptions, 'ignoreTags' | 'maskChar'>;
|
|
16
|
+
export declare class HtmlParser extends Parser<Node, State> {
|
|
17
|
+
constructor(options?: ExtendsOptions);
|
|
18
|
+
tokenize(): {
|
|
19
|
+
ast: import("node_modules/parse5/dist/tree-adapters/default.js").ChildNode[];
|
|
20
|
+
isFragment: boolean;
|
|
21
|
+
};
|
|
22
|
+
beforeParse(rawCode: string, options?: ParseOptions): string;
|
|
23
|
+
afterParse(nodeList: readonly MLASTNodeTreeItem[], options?: ParseOptions): readonly MLASTNodeTreeItem[];
|
|
24
|
+
nodeize(originNode: Node, parentNode: MLASTParentNode | null, depth: number): readonly MLASTNodeTreeItem[];
|
|
25
|
+
afterNodeize(siblings: readonly MLASTNodeTreeItem[], parentNode: MLASTParentNode | null, depth: number): {
|
|
26
|
+
siblings: import("@markuplint/ml-ast").MLASTChildNode[];
|
|
27
|
+
ancestors: MLASTNodeTreeItem[];
|
|
28
|
+
};
|
|
29
|
+
visitText(token: ChildToken): readonly MLASTNodeTreeItem[];
|
|
30
|
+
visitSpreadAttr(): null;
|
|
31
|
+
}
|
|
32
|
+
export declare const parser: HtmlParser;
|
|
33
|
+
export {};
|
package/lib/parser.js
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import { Parser } from '@markuplint/parser-utils';
|
|
2
|
+
import { parse, parseFragment } from 'parse5';
|
|
3
|
+
import { isDocumentFragment } from './is-document-fragment.js';
|
|
4
|
+
import { optimizeStartsHeadTagOrBodyTagResume, optimizeStartsHeadTagOrBodyTagSetup, } from './optimize-starts-head-or-body.js';
|
|
5
|
+
export class HtmlParser extends Parser {
|
|
6
|
+
constructor(options) {
|
|
7
|
+
super(options, {
|
|
8
|
+
startsHeadTagOrBodyTag: null,
|
|
9
|
+
afterPosition: {
|
|
10
|
+
endOffset: 0,
|
|
11
|
+
endLine: 1,
|
|
12
|
+
endCol: 1,
|
|
13
|
+
depth: 0,
|
|
14
|
+
},
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
tokenize() {
|
|
18
|
+
const isFragment = isDocumentFragment(this.rawCode);
|
|
19
|
+
const parseFn = isFragment ? parseFragment : parse;
|
|
20
|
+
const doc = parseFn(this.rawCode, {
|
|
21
|
+
scriptingEnabled: false,
|
|
22
|
+
sourceCodeLocationInfo: true,
|
|
23
|
+
});
|
|
24
|
+
const childNodes = doc.childNodes;
|
|
25
|
+
return {
|
|
26
|
+
ast: childNodes,
|
|
27
|
+
isFragment,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
beforeParse(rawCode, options) {
|
|
31
|
+
rawCode = super.beforeParse(rawCode, options);
|
|
32
|
+
const replacements = optimizeStartsHeadTagOrBodyTagSetup(rawCode);
|
|
33
|
+
if (replacements?.code) {
|
|
34
|
+
this.state.startsHeadTagOrBodyTag = replacements;
|
|
35
|
+
return replacements.code;
|
|
36
|
+
}
|
|
37
|
+
this.state.afterPosition = {
|
|
38
|
+
endOffset: (options?.offsetOffset ?? 0) + this.state.afterPosition.endOffset,
|
|
39
|
+
endLine: (options?.offsetLine ?? 0) + this.state.afterPosition.endLine,
|
|
40
|
+
endCol: (options?.offsetColumn ?? 0) + this.state.afterPosition.endCol,
|
|
41
|
+
depth: this.state.afterPosition.depth,
|
|
42
|
+
};
|
|
43
|
+
return rawCode;
|
|
44
|
+
}
|
|
45
|
+
afterParse(nodeList, options) {
|
|
46
|
+
nodeList = super.afterParse(nodeList, options);
|
|
47
|
+
if (this.state.startsHeadTagOrBodyTag) {
|
|
48
|
+
return optimizeStartsHeadTagOrBodyTagResume(this, nodeList, this.state.startsHeadTagOrBodyTag);
|
|
49
|
+
}
|
|
50
|
+
return nodeList;
|
|
51
|
+
}
|
|
52
|
+
nodeize(
|
|
53
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
54
|
+
originNode, parentNode, depth) {
|
|
55
|
+
const namespace = 'namespaceURI' in originNode ? originNode.namespaceURI : '';
|
|
56
|
+
const location = originNode.sourceCodeLocation;
|
|
57
|
+
if (!location) {
|
|
58
|
+
// Ghost element
|
|
59
|
+
const afterNode = this.state.afterPosition.depth === depth ? this.state.afterPosition : parentNode;
|
|
60
|
+
const startOffset = afterNode?.endOffset ?? 0;
|
|
61
|
+
const startLine = afterNode?.endLine ?? 0;
|
|
62
|
+
const startCol = afterNode?.endCol ?? 0;
|
|
63
|
+
const childNodes = 'childNodes' in originNode ? originNode.childNodes : [];
|
|
64
|
+
return this.visitElement({
|
|
65
|
+
raw: '',
|
|
66
|
+
startOffset,
|
|
67
|
+
startLine,
|
|
68
|
+
startCol,
|
|
69
|
+
depth,
|
|
70
|
+
parentNode,
|
|
71
|
+
nodeName: originNode.nodeName,
|
|
72
|
+
namespace,
|
|
73
|
+
}, childNodes);
|
|
74
|
+
}
|
|
75
|
+
const { startOffset, endOffset } = location;
|
|
76
|
+
const token = this.sliceFragment(startOffset, endOffset ?? startOffset);
|
|
77
|
+
switch (originNode.nodeName) {
|
|
78
|
+
case '#documentType': {
|
|
79
|
+
if (!('name' in originNode)) {
|
|
80
|
+
throw new TypeError("DocumentType doesn't have name");
|
|
81
|
+
}
|
|
82
|
+
return this.visitDoctype({
|
|
83
|
+
...token,
|
|
84
|
+
depth,
|
|
85
|
+
name: originNode.name ?? '',
|
|
86
|
+
publicId: originNode.publicId ?? '',
|
|
87
|
+
systemId: originNode.systemId ?? '',
|
|
88
|
+
parentNode,
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
case '#text': {
|
|
92
|
+
return this.visitText({
|
|
93
|
+
...token,
|
|
94
|
+
depth,
|
|
95
|
+
parentNode,
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
case '#comment': {
|
|
99
|
+
return this.visitComment({
|
|
100
|
+
...token,
|
|
101
|
+
depth,
|
|
102
|
+
parentNode,
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
default: {
|
|
106
|
+
const tagLoc = 'startTag' in location ? location.startTag : null;
|
|
107
|
+
const offset = tagLoc?.startOffset ?? startOffset;
|
|
108
|
+
const endOffset = tagLoc?.endOffset ?? offset;
|
|
109
|
+
const startTagToken = this.sliceFragment(offset, endOffset);
|
|
110
|
+
const childNodes = 'childNodes' in originNode
|
|
111
|
+
? originNode.nodeName === 'template' && 'content' in originNode
|
|
112
|
+
? originNode.content.childNodes
|
|
113
|
+
: originNode.childNodes
|
|
114
|
+
: [];
|
|
115
|
+
return this.visitElement({
|
|
116
|
+
...startTagToken,
|
|
117
|
+
depth,
|
|
118
|
+
parentNode,
|
|
119
|
+
nodeName: originNode.nodeName,
|
|
120
|
+
namespace,
|
|
121
|
+
}, childNodes, {
|
|
122
|
+
createEndTagToken: () => {
|
|
123
|
+
const endTagLoc = 'endTag' in location ? location.endTag : null;
|
|
124
|
+
if (!endTagLoc) {
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
const { startOffset, endOffset } = endTagLoc;
|
|
128
|
+
const endTagToken = this.sliceFragment(startOffset, endOffset);
|
|
129
|
+
return {
|
|
130
|
+
...endTagToken,
|
|
131
|
+
depth,
|
|
132
|
+
parentNode,
|
|
133
|
+
};
|
|
134
|
+
},
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
afterNodeize(siblings, parentNode, depth) {
|
|
140
|
+
const after = super.afterNodeize(siblings, parentNode, depth);
|
|
141
|
+
const prevNode = after.siblings.at(-1) ?? after.ancestors.findLast(n => n.depth === depth);
|
|
142
|
+
if (prevNode) {
|
|
143
|
+
this.state.afterPosition = {
|
|
144
|
+
endOffset: prevNode.endOffset,
|
|
145
|
+
endLine: prevNode.endLine,
|
|
146
|
+
endCol: prevNode.endCol,
|
|
147
|
+
depth,
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
return after;
|
|
151
|
+
}
|
|
152
|
+
visitText(token) {
|
|
153
|
+
return super.visitText(token, {
|
|
154
|
+
researchTags: true,
|
|
155
|
+
invalidTagAsText: true,
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
visitSpreadAttr() {
|
|
159
|
+
return null;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
export const parser = new HtmlParser();
|
package/lib/types.d.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export type { ParserOptions, DefaultTreeAdapterMap } from 'parse5';
|
|
2
|
+
import type { DefaultTreeAdapterMap } from 'parse5';
|
|
3
|
+
export type Node = DefaultTreeAdapterMap['node'];
|
|
4
|
+
export type TextNode = DefaultTreeAdapterMap['textNode'];
|
|
5
|
+
export type Element = DefaultTreeAdapterMap['element'];
|
|
6
|
+
export type CommentNode = DefaultTreeAdapterMap['commentNode'];
|
|
7
|
+
export type Document = DefaultTreeAdapterMap['document'];
|
|
8
|
+
export type DocumentFragment = DefaultTreeAdapterMap['documentFragment'];
|
|
9
|
+
export type ChildNode = DefaultTreeAdapterMap['childNode'];
|
package/lib/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@markuplint/html-parser",
|
|
3
|
-
"version": "4.0.0-dev.
|
|
3
|
+
"version": "4.0.0-dev.12+2275fbeb0",
|
|
4
4
|
"description": "HTML parser for markuplint",
|
|
5
5
|
"repository": "git@github.com:markuplint/markuplint.git",
|
|
6
6
|
"author": "Yusuke Hirao <yusukehirao@me.com>",
|
|
@@ -25,10 +25,10 @@
|
|
|
25
25
|
"clean": "tsc --build --clean"
|
|
26
26
|
},
|
|
27
27
|
"dependencies": {
|
|
28
|
-
"@markuplint/ml-ast": "4.0.0-dev.
|
|
29
|
-
"@markuplint/parser-utils": "4.0.0-dev.
|
|
28
|
+
"@markuplint/ml-ast": "4.0.0-dev.12+2275fbeb0",
|
|
29
|
+
"@markuplint/parser-utils": "4.0.0-dev.12+2275fbeb0",
|
|
30
30
|
"parse5": "7.1.2",
|
|
31
|
-
"type-fest": "^4.
|
|
31
|
+
"type-fest": "^4.9.0"
|
|
32
32
|
},
|
|
33
|
-
"gitHead": "
|
|
33
|
+
"gitHead": "2275fbeb053605b636f080f4fafd7cd4fc57a9a3"
|
|
34
34
|
}
|
package/lib/create-tree.d.ts
DELETED
package/lib/create-tree.js
DELETED
|
@@ -1,254 +0,0 @@
|
|
|
1
|
-
// @ts-nocheck TODO: Parse5(https://github.com/inikulin/parse5) supports to expose type definitions as submodules.
|
|
2
|
-
import { detectElementType, getEndCol, getEndLine, sliceFragment, tagParser, uuid } from '@markuplint/parser-utils';
|
|
3
|
-
import { parse, parseFragment } from 'parse5';
|
|
4
|
-
const P5_OPTIONS = {
|
|
5
|
-
scriptingEnabled: false,
|
|
6
|
-
sourceCodeLocationInfo: true,
|
|
7
|
-
};
|
|
8
|
-
export function createTree(rawCode, isFragment, offsetOffset, offsetLine, offsetColumn) {
|
|
9
|
-
const doc = isFragment ? parseFragment(rawCode, P5_OPTIONS) : parse(rawCode, P5_OPTIONS);
|
|
10
|
-
return createTreeRecursive(doc, null, rawCode, offsetOffset, offsetLine, offsetColumn);
|
|
11
|
-
}
|
|
12
|
-
function createTreeRecursive(rootNode,
|
|
13
|
-
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
14
|
-
parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
|
|
15
|
-
const nodeList = [];
|
|
16
|
-
const childNodes = getChildNodes(rootNode);
|
|
17
|
-
let prevNode = null;
|
|
18
|
-
for (const p5node of childNodes) {
|
|
19
|
-
const node = nodeize(p5node, prevNode, parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn);
|
|
20
|
-
if (!node) {
|
|
21
|
-
continue;
|
|
22
|
-
}
|
|
23
|
-
if (prevNode) {
|
|
24
|
-
if (node.type !== 'endtag') {
|
|
25
|
-
prevNode.nextNode = node;
|
|
26
|
-
}
|
|
27
|
-
node.prevNode = prevNode;
|
|
28
|
-
}
|
|
29
|
-
prevNode = node;
|
|
30
|
-
nodeList.push(node);
|
|
31
|
-
}
|
|
32
|
-
return nodeList;
|
|
33
|
-
}
|
|
34
|
-
function nodeize(originNode,
|
|
35
|
-
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
36
|
-
prevNode,
|
|
37
|
-
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
|
38
|
-
parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
|
|
39
|
-
const nextNode = null;
|
|
40
|
-
const location = getLocation(originNode);
|
|
41
|
-
if (!location) {
|
|
42
|
-
const prevToken = prevNode ?? parentNode;
|
|
43
|
-
const startOffset = prevToken ? prevToken.endOffset : 0;
|
|
44
|
-
const endOffset = prevToken ? prevToken.endOffset : 0;
|
|
45
|
-
const startLine = prevToken ? prevToken.endLine : 0;
|
|
46
|
-
const endLine = prevToken ? prevToken.endLine : 0;
|
|
47
|
-
const startCol = prevToken ? prevToken.endCol : 0;
|
|
48
|
-
const endCol = prevToken ? prevToken.endCol : 0;
|
|
49
|
-
const node = {
|
|
50
|
-
uuid: uuid(),
|
|
51
|
-
raw: '',
|
|
52
|
-
startOffset: startOffset + offsetOffset,
|
|
53
|
-
endOffset: endOffset + offsetOffset,
|
|
54
|
-
startLine: startLine + offsetLine,
|
|
55
|
-
endLine: endLine + offsetLine,
|
|
56
|
-
startCol: startCol + (startLine === 1 ? offsetColumn : 0),
|
|
57
|
-
endCol: endCol + (endLine === 1 ? offsetColumn : 0),
|
|
58
|
-
nodeName: originNode.nodeName,
|
|
59
|
-
type: 'starttag',
|
|
60
|
-
namespace: getNamespace(originNode),
|
|
61
|
-
elementType: 'html',
|
|
62
|
-
attributes: [],
|
|
63
|
-
hasSpreadAttr: false,
|
|
64
|
-
pearNode: null,
|
|
65
|
-
tagCloseChar: '',
|
|
66
|
-
tagOpenChar: '',
|
|
67
|
-
parentNode,
|
|
68
|
-
prevNode,
|
|
69
|
-
nextNode,
|
|
70
|
-
isFragment: false,
|
|
71
|
-
isGhost: true,
|
|
72
|
-
};
|
|
73
|
-
node.childNodes = createTreeRecursive(originNode, node, rawHtml, offsetOffset, offsetLine, offsetColumn);
|
|
74
|
-
return node;
|
|
75
|
-
}
|
|
76
|
-
const { startOffset, endOffset, startLine, endLine, startCol, endCol } = location;
|
|
77
|
-
const raw = rawHtml.slice(startOffset, endOffset ?? startOffset);
|
|
78
|
-
switch (originNode.nodeName) {
|
|
79
|
-
case '#documentType': {
|
|
80
|
-
return {
|
|
81
|
-
uuid: uuid(),
|
|
82
|
-
raw,
|
|
83
|
-
// @ts-ignore
|
|
84
|
-
name: originNode.name ?? '',
|
|
85
|
-
// @ts-ignore
|
|
86
|
-
publicId: originNode.publicId ?? '',
|
|
87
|
-
// @ts-ignore
|
|
88
|
-
systemId: originNode.systemId ?? '',
|
|
89
|
-
startOffset: startOffset + offsetOffset,
|
|
90
|
-
endOffset: endOffset + offsetOffset,
|
|
91
|
-
startLine: startLine + offsetLine,
|
|
92
|
-
endLine: endLine + offsetLine,
|
|
93
|
-
startCol: startCol + (startLine === 1 ? offsetColumn : 0),
|
|
94
|
-
endCol: endCol + (endLine === 1 ? offsetColumn : 0),
|
|
95
|
-
nodeName: '#doctype',
|
|
96
|
-
type: 'doctype',
|
|
97
|
-
parentNode,
|
|
98
|
-
prevNode,
|
|
99
|
-
_addPrevNode: 102,
|
|
100
|
-
nextNode,
|
|
101
|
-
isFragment: false,
|
|
102
|
-
isGhost: false,
|
|
103
|
-
};
|
|
104
|
-
}
|
|
105
|
-
case '#text': {
|
|
106
|
-
const node = {
|
|
107
|
-
uuid: uuid(),
|
|
108
|
-
raw,
|
|
109
|
-
startOffset: startOffset + offsetOffset,
|
|
110
|
-
endOffset: endOffset + offsetOffset,
|
|
111
|
-
startLine: startLine + offsetLine,
|
|
112
|
-
endLine: endLine + offsetLine,
|
|
113
|
-
startCol: startCol + (startLine === 1 ? offsetColumn : 0),
|
|
114
|
-
endCol: endCol + (endLine === 1 ? offsetColumn : 0),
|
|
115
|
-
nodeName: '#text',
|
|
116
|
-
type: 'text',
|
|
117
|
-
parentNode,
|
|
118
|
-
prevNode,
|
|
119
|
-
nextNode,
|
|
120
|
-
isFragment: false,
|
|
121
|
-
isGhost: false,
|
|
122
|
-
};
|
|
123
|
-
return node;
|
|
124
|
-
}
|
|
125
|
-
case '#comment': {
|
|
126
|
-
return {
|
|
127
|
-
uuid: uuid(),
|
|
128
|
-
raw,
|
|
129
|
-
startOffset: startOffset + offsetOffset,
|
|
130
|
-
endOffset: endOffset + offsetOffset,
|
|
131
|
-
startLine: startLine + offsetLine,
|
|
132
|
-
endLine: endLine + offsetLine,
|
|
133
|
-
startCol: startCol + (startLine === 1 ? offsetColumn : 0),
|
|
134
|
-
endCol: endCol + (endLine === 1 ? offsetColumn : 0),
|
|
135
|
-
nodeName: '#comment',
|
|
136
|
-
type: 'comment',
|
|
137
|
-
parentNode,
|
|
138
|
-
prevNode,
|
|
139
|
-
nextNode,
|
|
140
|
-
isFragment: false,
|
|
141
|
-
isGhost: false,
|
|
142
|
-
};
|
|
143
|
-
}
|
|
144
|
-
default: {
|
|
145
|
-
const tagLoc = 'startTag' in location ? location.startTag : null;
|
|
146
|
-
const startTagRaw = tagLoc
|
|
147
|
-
? rawHtml.slice(tagLoc.startOffset, tagLoc.endOffset)
|
|
148
|
-
: rawHtml.slice(startOffset, endOffset ?? startOffset);
|
|
149
|
-
const tagTokens = tagParser(startTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
|
|
150
|
-
const tagName = tagTokens.tagName;
|
|
151
|
-
let endTag = null;
|
|
152
|
-
let endTagLoc = 'endTag' in location ? location.endTag : null;
|
|
153
|
-
/**
|
|
154
|
-
* Patch: Create endTag for SVG Element
|
|
155
|
-
* @see https://github.com/inikulin/parse5/issues/352
|
|
156
|
-
*/
|
|
157
|
-
if (!endTagLoc &&
|
|
158
|
-
'namespaceURI' in originNode &&
|
|
159
|
-
originNode.namespaceURI === 'http://www.w3.org/2000/svg') {
|
|
160
|
-
const belowRawHTMLFromStartTagEnd = rawHtml.slice(location.endOffset);
|
|
161
|
-
const endTagMatched = belowRawHTMLFromStartTagEnd.match(new RegExp(`^</\\s*${tagName}[^>]*>`, 'm'));
|
|
162
|
-
const endTag = endTagMatched && endTagMatched[0];
|
|
163
|
-
if (endTag) {
|
|
164
|
-
endTagLoc = sliceFragment(rawHtml, location.endOffset, location.endOffset + endTag.length);
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
if (endTagLoc) {
|
|
168
|
-
const { startOffset, endOffset, startLine, endLine, startCol, endCol } = endTagLoc;
|
|
169
|
-
const endTagRaw = rawHtml.slice(startOffset, endOffset);
|
|
170
|
-
const endTagTokens = tagParser(endTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
|
|
171
|
-
const endTagName = endTagTokens.tagName;
|
|
172
|
-
endTag = {
|
|
173
|
-
uuid: uuid(),
|
|
174
|
-
raw: endTagRaw,
|
|
175
|
-
startOffset: startOffset + offsetOffset,
|
|
176
|
-
endOffset: endOffset + offsetOffset,
|
|
177
|
-
startLine: startLine + offsetLine,
|
|
178
|
-
endLine: endLine + offsetLine,
|
|
179
|
-
startCol: startCol + (startLine === 1 ? offsetColumn : 0),
|
|
180
|
-
endCol: endCol + (endLine === 1 ? offsetColumn : 0),
|
|
181
|
-
nodeName: endTagName,
|
|
182
|
-
type: 'endtag',
|
|
183
|
-
namespace: getNamespace(originNode),
|
|
184
|
-
attributes: endTagTokens.attrs,
|
|
185
|
-
parentNode,
|
|
186
|
-
prevNode,
|
|
187
|
-
nextNode,
|
|
188
|
-
pearNode: null,
|
|
189
|
-
isFragment: false,
|
|
190
|
-
isGhost: false,
|
|
191
|
-
tagOpenChar: '</',
|
|
192
|
-
tagCloseChar: '>',
|
|
193
|
-
};
|
|
194
|
-
}
|
|
195
|
-
const _endOffset = startOffset + startTagRaw.length;
|
|
196
|
-
const _endLine = getEndLine(startTagRaw, startLine);
|
|
197
|
-
const _endCol = getEndCol(startTagRaw, startCol);
|
|
198
|
-
const startTag = {
|
|
199
|
-
uuid: uuid(),
|
|
200
|
-
raw: startTagRaw,
|
|
201
|
-
startOffset: startOffset + offsetOffset,
|
|
202
|
-
endOffset: _endOffset + offsetOffset,
|
|
203
|
-
startLine: startLine + offsetLine,
|
|
204
|
-
endLine: _endLine + offsetLine,
|
|
205
|
-
startCol: startCol + (startLine === 1 ? offsetColumn : 0),
|
|
206
|
-
endCol: _endCol + (startLine === _endLine ? offsetColumn : 0),
|
|
207
|
-
nodeName: tagName,
|
|
208
|
-
type: 'starttag',
|
|
209
|
-
namespace: getNamespace(originNode),
|
|
210
|
-
elementType: detectElementType(tagName),
|
|
211
|
-
attributes: tagTokens.attrs,
|
|
212
|
-
hasSpreadAttr: false,
|
|
213
|
-
parentNode,
|
|
214
|
-
prevNode,
|
|
215
|
-
nextNode,
|
|
216
|
-
pearNode: endTag,
|
|
217
|
-
selfClosingSolidus: tagTokens.selfClosingSolidus,
|
|
218
|
-
endSpace: tagTokens.endSpace,
|
|
219
|
-
isFragment: false,
|
|
220
|
-
isGhost: false,
|
|
221
|
-
tagOpenChar: '<',
|
|
222
|
-
tagCloseChar: '>',
|
|
223
|
-
};
|
|
224
|
-
if (endTag) {
|
|
225
|
-
endTag.pearNode = startTag;
|
|
226
|
-
}
|
|
227
|
-
startTag.childNodes = createTreeRecursive(originNode, startTag, rawHtml, offsetOffset, offsetLine, offsetColumn);
|
|
228
|
-
return startTag;
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
/**
|
|
233
|
-
* getChildNodes
|
|
234
|
-
*
|
|
235
|
-
* - If node has "content" property then parse as document fragment.
|
|
236
|
-
*/
|
|
237
|
-
function getChildNodes(rootNode) {
|
|
238
|
-
return rootNode.content ? rootNode.content.childNodes : rootNode.childNodes ?? [];
|
|
239
|
-
}
|
|
240
|
-
function hasLocation(node) {
|
|
241
|
-
return 'sourceCodeLocation' in node;
|
|
242
|
-
}
|
|
243
|
-
function getLocation(node) {
|
|
244
|
-
if (hasLocation(node) && node.sourceCodeLocation) {
|
|
245
|
-
return node.sourceCodeLocation;
|
|
246
|
-
}
|
|
247
|
-
return null;
|
|
248
|
-
}
|
|
249
|
-
function getNamespace(node) {
|
|
250
|
-
if ('namespaceURI' in node) {
|
|
251
|
-
return node.namespaceURI;
|
|
252
|
-
}
|
|
253
|
-
return '';
|
|
254
|
-
}
|
package/lib/parse.d.ts
DELETED
package/lib/parse.js
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
import { ignoreFrontMatter, flattenNodes } from '@markuplint/parser-utils';
|
|
2
|
-
import { createTree } from './create-tree.js';
|
|
3
|
-
import { isDocumentFragment } from './is-document-fragment.js';
|
|
4
|
-
import { isStartsHeadTagOrBodyTag, optimizeStartsHeadTagOrBodyTagResume, optimizeStartsHeadTagOrBodyTagSetup, } from './optimize-starts-head-or-body.js';
|
|
5
|
-
export const parse = (rawCode, options) => {
|
|
6
|
-
if (options?.ignoreFrontMatter) {
|
|
7
|
-
rawCode = ignoreFrontMatter(rawCode);
|
|
8
|
-
}
|
|
9
|
-
const isFragment = isDocumentFragment(rawCode);
|
|
10
|
-
const data = isStartsHeadTagOrBodyTag(rawCode) ? optimizeStartsHeadTagOrBodyTagSetup(rawCode) : null;
|
|
11
|
-
if (data?.code) {
|
|
12
|
-
rawCode = data.code;
|
|
13
|
-
}
|
|
14
|
-
const nodeTree = createTree(rawCode, isFragment, options?.offsetOffset ?? 0, options?.offsetLine ?? 0, options?.offsetColumn ?? 0);
|
|
15
|
-
const nodeList = flattenNodes(nodeTree, rawCode);
|
|
16
|
-
if (data) {
|
|
17
|
-
optimizeStartsHeadTagOrBodyTagResume(nodeList, data);
|
|
18
|
-
}
|
|
19
|
-
return {
|
|
20
|
-
nodeList,
|
|
21
|
-
isFragment,
|
|
22
|
-
};
|
|
23
|
-
};
|