@kksiuda/html-parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/HtmlNode.d.ts +37 -0
- package/dist/HtmlNode.d.ts.map +1 -0
- package/dist/HtmlNode.js +141 -0
- package/dist/HtmlNode.js.map +1 -0
- package/dist/HtmlParser.d.ts +35 -0
- package/dist/HtmlParser.d.ts.map +1 -0
- package/dist/HtmlParser.js +261 -0
- package/dist/HtmlParser.js.map +1 -0
- package/dist/ParserNode.d.ts +20 -0
- package/dist/ParserNode.d.ts.map +1 -0
- package/dist/ParserNode.js +56 -0
- package/dist/ParserNode.js.map +1 -0
- package/dist/Selector.d.ts +9 -0
- package/dist/Selector.d.ts.map +1 -0
- package/dist/Selector.js +2 -0
- package/dist/Selector.js.map +1 -0
- package/dist/isVoidNode.d.ts +2 -0
- package/dist/isVoidNode.d.ts.map +1 -0
- package/dist/isVoidNode.js +20 -0
- package/dist/isVoidNode.js.map +1 -0
- package/dist/lib.d.ts +4 -0
- package/dist/lib.d.ts.map +1 -0
- package/dist/lib.js +10 -0
- package/dist/lib.js.map +1 -0
- package/dist/parseHtml.d.ts +4 -0
- package/dist/parseHtml.d.ts.map +1 -0
- package/dist/parseHtml.js +5 -0
- package/dist/parseHtml.js.map +1 -0
- package/dist/parseHtml.spec.d.ts +2 -0
- package/dist/parseHtml.spec.d.ts.map +1 -0
- package/dist/parseHtml.spec.js +76 -0
- package/dist/parseHtml.spec.js.map +1 -0
- package/dist/parseSelector.d.ts +3 -0
- package/dist/parseSelector.d.ts.map +1 -0
- package/dist/parseSelector.js +87 -0
- package/dist/parseSelector.js.map +1 -0
- package/dist/parseSelector.spec.d.ts +2 -0
- package/dist/parseSelector.spec.d.ts.map +1 -0
- package/dist/parseSelector.spec.js +72 -0
- package/dist/parseSelector.spec.js.map +1 -0
- package/dist/tagRegex.d.ts +2 -0
- package/dist/tagRegex.d.ts.map +1 -0
- package/dist/tagRegex.js +8 -0
- package/dist/tagRegex.js.map +1 -0
- package/package.json +29 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { type ParserNode } from './ParserNode.js';
|
|
2
|
+
export declare const stubError: Error;
|
|
3
|
+
export declare class HtmlNode {
|
|
4
|
+
allNodes: HtmlNode[];
|
|
5
|
+
private children;
|
|
6
|
+
classes: string[];
|
|
7
|
+
parent: number | null;
|
|
8
|
+
attrs: Record<string, string>;
|
|
9
|
+
id: string | null;
|
|
10
|
+
tag: string;
|
|
11
|
+
isError: boolean;
|
|
12
|
+
error: Error;
|
|
13
|
+
textContent: string | null;
|
|
14
|
+
isTextNode: boolean;
|
|
15
|
+
constructor(parserNode: ParserNode, allNodes: HtmlNode[], error?: Error);
|
|
16
|
+
select(selector: string): HtmlNode | null;
|
|
17
|
+
selectOrThrow(selector: string): HtmlNode;
|
|
18
|
+
selectAll(selector: string): HtmlNode[];
|
|
19
|
+
private _select;
|
|
20
|
+
private matchSelector;
|
|
21
|
+
getAttribute(attribute: string): string | null;
|
|
22
|
+
getTextContent(separator?: string, recursive?: boolean): string;
|
|
23
|
+
protected _getTextContent(recursive: boolean, chunks: string[]): void;
|
|
24
|
+
toJSON(): Node;
|
|
25
|
+
toHTML(joinWith?: string): string;
|
|
26
|
+
}
|
|
27
|
+
interface Node {
|
|
28
|
+
children: Node[];
|
|
29
|
+
classes: string[];
|
|
30
|
+
attrs: Record<string, string>;
|
|
31
|
+
id: string | null;
|
|
32
|
+
tag: string;
|
|
33
|
+
textContent: string | null;
|
|
34
|
+
}
|
|
35
|
+
export declare const makeErrorHtmlNode: (errorMessage: string) => HtmlNode;
|
|
36
|
+
export {};
|
|
37
|
+
//# sourceMappingURL=HtmlNode.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HtmlNode.d.ts","sourceRoot":"","sources":["../src/HtmlNode.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,UAAU,EAAkB,MAAM,iBAAiB,CAAC;AAIlE,eAAO,MAAM,SAAS,OAAgB,CAAC;AAEvC,qBAAa,QAAQ;IAcZ,QAAQ,EAAE,QAAQ,EAAE;IAb5B,OAAO,CAAC,QAAQ,CAAW;IACpB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC9B,EAAE,EAAE,MAAM,GAAG,IAAI,CAAC;IAClB,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,UAAS;IAChB,KAAK,EAAE,KAAK,CAAa;IACzB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;IAClC,UAAU,EAAE,OAAO,CAAS;gBAGlC,UAAU,EAAE,UAAU,EACf,QAAQ,EAAE,QAAQ,EAAE,EAC3B,KAAK,CAAC,EAAE,KAAK;IAiBP,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,GAAG,IAAI;IAOzC,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ;IAQzC,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,EAAE;IAO9C,OAAO,CAAC,OAAO;IA6Bf,OAAO,CAAC,aAAa;IAsBd,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IAI9C,cAAc,CAAC,SAAS,SAAK,EAAE,SAAS,UAAO,GAAG,MAAM;IAM/D,SAAS,CAAC,eAAe,CAAC,SAAS,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI;IAS9D,MAAM,IAAI,IAAI;IAWd,MAAM,CAAC,QAAQ,SAAK,GAAG,MAAM;CAepC;AACD,UAAU,IAAI;IACb,QAAQ,EAAE,IAAI,EAAE,CAAC;IACjB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC9B,EAAE,EAAE,MAAM,GAAG,IAAI,CAAC;IAClB,GAAG,EAAE,MAAM,CAAC;IACZ,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3B;AAED,eAAO,MAAM,iBAAiB,GAAI,cAAc,MAAM,aACI,CAAC"}
|
package/dist/HtmlNode.js
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import { stubParserNode } from './ParserNode.js';
|
|
2
|
+
import { parseSelector } from './parseSelector.js';
|
|
3
|
+
export const stubError = new Error('');
|
|
4
|
+
export class HtmlNode {
|
|
5
|
+
allNodes;
|
|
6
|
+
children;
|
|
7
|
+
classes;
|
|
8
|
+
parent;
|
|
9
|
+
attrs;
|
|
10
|
+
id;
|
|
11
|
+
tag;
|
|
12
|
+
isError = false;
|
|
13
|
+
error = stubError;
|
|
14
|
+
textContent = null;
|
|
15
|
+
isTextNode = false;
|
|
16
|
+
constructor(parserNode, allNodes, error) {
|
|
17
|
+
this.allNodes = allNodes;
|
|
18
|
+
this.children = parserNode.children;
|
|
19
|
+
this.classes = parserNode.classes;
|
|
20
|
+
this.attrs = parserNode.attrs;
|
|
21
|
+
this.id = parserNode.id;
|
|
22
|
+
this.tag = parserNode.tag;
|
|
23
|
+
this.parent = parserNode.parent;
|
|
24
|
+
this.textContent = parserNode.textContent;
|
|
25
|
+
this.isTextNode = parserNode.isTextNode;
|
|
26
|
+
if (error) {
|
|
27
|
+
this.isError = true;
|
|
28
|
+
this.error = error;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
select(selector) {
|
|
32
|
+
const selectors = parseSelector(selector);
|
|
33
|
+
const nodes = [];
|
|
34
|
+
this._select(selectors, true, nodes, 0);
|
|
35
|
+
return nodes[0] || null;
|
|
36
|
+
}
|
|
37
|
+
selectOrThrow(selector) {
|
|
38
|
+
const node = this.select(selector);
|
|
39
|
+
if (node) {
|
|
40
|
+
return node;
|
|
41
|
+
}
|
|
42
|
+
throw new Error(`Unable to find element for selector "${selector}"`);
|
|
43
|
+
}
|
|
44
|
+
selectAll(selector) {
|
|
45
|
+
const selectors = parseSelector(selector);
|
|
46
|
+
const nodes = [];
|
|
47
|
+
this._select(selectors, false, nodes, 0);
|
|
48
|
+
return nodes;
|
|
49
|
+
}
|
|
50
|
+
_select(selectors, returnEarly, matchedNodes, depth) {
|
|
51
|
+
let currentDepth = depth;
|
|
52
|
+
const isMatch = this.matchSelector(selectors[currentDepth]);
|
|
53
|
+
if (isMatch) {
|
|
54
|
+
if (currentDepth + 1 === selectors.length) {
|
|
55
|
+
matchedNodes.push(this);
|
|
56
|
+
if (returnEarly)
|
|
57
|
+
return true;
|
|
58
|
+
}
|
|
59
|
+
else {
|
|
60
|
+
currentDepth++;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
const l = this.children.length;
|
|
64
|
+
for (let i = 0; i < l; i++) {
|
|
65
|
+
const nodeFound = this.allNodes[this.children[i]]._select(selectors, returnEarly, matchedNodes, currentDepth);
|
|
66
|
+
if (nodeFound && returnEarly)
|
|
67
|
+
return true;
|
|
68
|
+
}
|
|
69
|
+
return false;
|
|
70
|
+
}
|
|
71
|
+
matchSelector(selectors) {
|
|
72
|
+
const l = selectors.length;
|
|
73
|
+
for (let i = 0; i < l; i++) {
|
|
74
|
+
const meta = selectors[i];
|
|
75
|
+
switch (meta.type) {
|
|
76
|
+
case 'attr':
|
|
77
|
+
if (this.attrs[meta.key] !== meta.value)
|
|
78
|
+
return false;
|
|
79
|
+
break;
|
|
80
|
+
case 'class':
|
|
81
|
+
if (!this.classes.includes(meta.value))
|
|
82
|
+
return false;
|
|
83
|
+
break;
|
|
84
|
+
case 'id':
|
|
85
|
+
if (this.id !== meta.value)
|
|
86
|
+
return false;
|
|
87
|
+
break;
|
|
88
|
+
case 'tag':
|
|
89
|
+
if (this.tag !== meta.value)
|
|
90
|
+
return false;
|
|
91
|
+
break;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return true;
|
|
95
|
+
}
|
|
96
|
+
getAttribute(attribute) {
|
|
97
|
+
return this.attrs[attribute] || null;
|
|
98
|
+
}
|
|
99
|
+
getTextContent(separator = '', recursive = true) {
|
|
100
|
+
const chunks = [];
|
|
101
|
+
this._getTextContent(recursive, chunks);
|
|
102
|
+
return chunks.join(separator);
|
|
103
|
+
}
|
|
104
|
+
_getTextContent(recursive, chunks) {
|
|
105
|
+
if (this.isTextNode)
|
|
106
|
+
chunks.push(this.textContent || '');
|
|
107
|
+
if (recursive) {
|
|
108
|
+
for (const child of this.children) {
|
|
109
|
+
this.allNodes[child]._getTextContent(recursive, chunks);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
toJSON() {
|
|
114
|
+
return {
|
|
115
|
+
tag: this.tag,
|
|
116
|
+
id: this.id,
|
|
117
|
+
classes: this.classes,
|
|
118
|
+
attrs: this.attrs,
|
|
119
|
+
textContent: this.textContent,
|
|
120
|
+
children: this.children.map((i) => this.allNodes[i].toJSON()),
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
toHTML(joinWith = '') {
|
|
124
|
+
if (this.isTextNode)
|
|
125
|
+
return this.textContent || '';
|
|
126
|
+
return [
|
|
127
|
+
`<${this.tag}`,
|
|
128
|
+
this.id ? ` id="${this.id}"` : '',
|
|
129
|
+
Object.entries(this.attrs)
|
|
130
|
+
.map((key, value) => `${key}="${value}"`)
|
|
131
|
+
.join(' '),
|
|
132
|
+
'>',
|
|
133
|
+
this.children
|
|
134
|
+
.map((index) => this.allNodes[index].toHTML())
|
|
135
|
+
.join(joinWith),
|
|
136
|
+
`</${this.tag}>`,
|
|
137
|
+
].join(joinWith);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
export const makeErrorHtmlNode = (errorMessage) => new HtmlNode(stubParserNode, [], new Error(errorMessage));
|
|
141
|
+
//# sourceMappingURL=HtmlNode.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HtmlNode.js","sourceRoot":"","sources":["../src/HtmlNode.ts"],"names":[],"mappings":"AAAA,OAAO,EAAmB,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAGnD,MAAM,CAAC,MAAM,SAAS,GAAG,IAAI,KAAK,CAAC,EAAE,CAAC,CAAC;AAEvC,MAAM,OAAO,QAAQ;IAcZ;IAbA,QAAQ,CAAW;IACpB,OAAO,CAAW;IAClB,MAAM,CAAgB;IACtB,KAAK,CAAyB;IAC9B,EAAE,CAAgB;IAClB,GAAG,CAAS;IACZ,OAAO,GAAG,KAAK,CAAC;IAChB,KAAK,GAAU,SAAS,CAAC;IACzB,WAAW,GAAkB,IAAI,CAAC;IAClC,UAAU,GAAY,KAAK,CAAC;IAEnC,YACC,UAAsB,EACf,QAAoB,EAC3B,KAAa;QADN,aAAQ,GAAR,QAAQ,CAAY;QAG3B,IAAI,CAAC,QAAQ,GAAG,UAAU,CAAC,QAAQ,CAAC;QACpC,IAAI,CAAC,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC;QAClC,IAAI,CAAC,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC;QAC9B,IAAI,CAAC,EAAE,GAAG,UAAU,CAAC,EAAE,CAAC;QACxB,IAAI,CAAC,GAAG,GAAG,UAAU,CAAC,GAAG,CAAC;QAC1B,IAAI,CAAC,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC;QAChC,IAAI,CAAC,WAAW,GAAG,UAAU,CAAC,WAAW,CAAC;QAC1C,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC,UAAU,CAAC;QAExC,IAAI,KAAK,EAAE,CAAC;YACX,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;YACpB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACpB,CAAC;IACF,CAAC;IAEM,MAAM,CAAC,QAAgB;QAC7B,MAAM,SAAS,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;QAC1C,MAAM,KAAK,GAAe,EAAE,CAAC;QAC7B,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC;QACxC,OAAO,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;IACzB,CAAC;IAEM,aAAa,CAAC,QAAgB;QACpC,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QACnC,IAAI,IAAI,EAAE,CAAC;YACV,OAAO,IAAI,CAAC;QACb,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,wCAAwC,QAAQ,GAAG,CAAC,CAAC;IACtE,CAAC;IAEM,SAAS,CAAC,QAAgB;QAChC,MAAM,SAAS,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;QAC1C,MAAM,KAAK,GAAe,EAAE,CAAC;QAC7B,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC;QACzC,OAAO,KAAK,CAAC;IACd,CAAC;IAEO,OAAO,CACd,SAAuB,EACvB,WAAoB,EACpB,YAAwB,EACxB,KAAa;QAEb,IAAI,YAAY,GAAG,KAAK,CAAC;QACzB,MAAM,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC,CAAC;QAC5D,IAAI,OAAO,EAAE,CAAC;YACb,IAAI,YAAY,GAAG,CAAC,KAAK,SAAS,CAAC,MAAM,EAAE,CAAC;gBAC3C,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACxB,IAAI,WAAW;oBAAE,OAAO,IAAI,CAAC;YAC9B,CAAC;iBAAM,CAAC;gBACP,YAAY,EAAE,CAAC;YAChB,CAAC;QACF,CAAC;QACD,MAAM,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;QAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5B,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CACxD,SAAS,EACT,WAAW,EACX,YAAY,EACZ,YAAY,CACZ,CAAC;YACF,IAAI,SAAS,IAAI,WAAW;gBAAE,OAAO,IAAI,CAAC;QAC3C,CAAC;QACD,OAAO,KAAK,CAAC;IACd,CAAC;IAEO,aAAa,CAAC,SAAqB;QAC1C,MAAM,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC;QAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5B,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;YAC1B,QAAQ,IAAI,CAAC,IAAI,EAAE,CAAC;gBACnB,KAAK,MAAM;oBACV,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,KAAK;wBAAE,OAAO,KAAK,CAAC;oBACtD,MAAM;gBACP,KAAK,OAAO;oBACX,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC;wBAAE,OAAO,KAAK,CAAC;oBACrD,MAAM;gBACP,KAAK,IAAI;oBACR,IAAI,IAAI,CAAC,EAAE,KAAK,IAAI,CAAC,KAAK;wBAAE,OAAO,KAAK,CAAC;oBACzC,MAAM;gBACP,KAAK,KAAK;oBACT,IAAI,IAAI,CAAC,GAAG,KAAK,IAAI,CAAC,KAAK;wBAAE,OAAO,KAAK,CAAC;oBAC1C,MAAM;YACR,CAAC;QACF,CAAC;QACD,OAAO,IAAI,CAAC;IACb,CAAC;IAEM,YAAY,CAAC,SAAiB;QACpC,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC;IACtC,CAAC;IAEM,cAAc,CAAC,SAAS,GAAG,EAAE,EAAE,SAAS,GAAG,IAAI;QACrD,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,CAAC,eAAe,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;QACxC,OAAO,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC/B,CAAC;IAES,eAAe,CAAC,SAAkB,EAAE,MAAgB;QAC7D,IAAI,IAAI,CAAC,UAAU;YAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC;QACzD,IAAI,SAAS,EAAE,CAAC;YACf,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACnC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,eAAe,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YACzD,CAAC;QACF,CAAC;IACF,CAAC;IAEM,MAAM;QACZ,OAAO;YACN,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,EAAE,EAAE,IAAI,CAAC,EAAE;YACX,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;SAC7D,CAAC;IACH,CAAC;IAEM,MAAM,CAAC,QAAQ,GAAG,EAAE;QAC1B,IAAI,IAAI,CAAC,UAAU;YAAE,OAAO,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;QACnD,OAAO;YACN,IAAI,IAAI,CAAC,GAAG,EAAE;YACd,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE;YACjC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC;iBACxB,GAAG,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,KAAK,KAAK,GAAG,CAAC;iBACxC,IAAI,CAAC,GAAG,CAAC;YACX,GAAG;YACH,IAAI,CAAC,QAAQ;iBACX,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;iBAC7C,IAAI,CAAC,QAAQ,CAAC;YAChB,KAAK,IAAI,CAAC,GAAG,GAAG;SAChB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAClB,CAAC;CACD;AAUD,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAAC,YAAoB,EAAE,EAAE,CACzD,IAAI,QAAQ,CAAC,cAAc,EAAE,EAAE,EAAE,IAAI,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { HtmlNode } from './HtmlNode.js';
|
|
2
|
+
export interface HtmlParserOptions {
|
|
3
|
+
skipNodeTypes?: string[];
|
|
4
|
+
ignoreAttributes?: string[];
|
|
5
|
+
}
|
|
6
|
+
export declare class HtmlParser {
|
|
7
|
+
private html;
|
|
8
|
+
private nodes;
|
|
9
|
+
private htmlNodes;
|
|
10
|
+
private parents;
|
|
11
|
+
private currentNode;
|
|
12
|
+
private currentTag;
|
|
13
|
+
private currentTagIsClosed;
|
|
14
|
+
private currentNodeIndex;
|
|
15
|
+
private index;
|
|
16
|
+
private skipTags;
|
|
17
|
+
private ignoreAttributes;
|
|
18
|
+
constructor(html: string, options?: HtmlParserOptions);
|
|
19
|
+
parse(): HtmlNode;
|
|
20
|
+
private nextNode;
|
|
21
|
+
private parseStyleOrScript;
|
|
22
|
+
private moveCursorToNextTag;
|
|
23
|
+
private parseComment;
|
|
24
|
+
private setTag;
|
|
25
|
+
private enterNode;
|
|
26
|
+
private exitNode;
|
|
27
|
+
private moveCursorAfterCloseBracket;
|
|
28
|
+
private shouldSkipNode;
|
|
29
|
+
private setAttrs;
|
|
30
|
+
private isStyleOrScript;
|
|
31
|
+
private isVoidNode;
|
|
32
|
+
private setError;
|
|
33
|
+
private skipNode;
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=HtmlParser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HtmlParser.d.ts","sourceRoot":"","sources":["../src/HtmlParser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAqB,MAAM,eAAe,CAAC;AAM5D,MAAM,WAAW,iBAAiB;IACjC,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;CAC5B;AASD,qBAAa,UAAU;IAarB,OAAO,CAAC,IAAI;IAZb,OAAO,CAAC,KAAK,CAAoB;IACjC,OAAO,CAAC,SAAS,CAAkB;IACnC,OAAO,CAAC,OAAO,CAAgB;IAC/B,OAAO,CAAC,WAAW,CAA2B;IAC9C,OAAO,CAAC,UAAU,CAAc;IAChC,OAAO,CAAC,kBAAkB,CAAiB;IAC3C,OAAO,CAAC,gBAAgB,CAAa;IACrC,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,QAAQ,CAAgB;IAChC,OAAO,CAAC,gBAAgB,CAA+B;gBAG9C,IAAI,EAAE,MAAM,EACpB,OAAO,GAAE,iBAA4C;IAoB/C,KAAK,IAAI,QAAQ;IAexB,OAAO,CAAC,QAAQ;IA0BhB,OAAO,CAAC,kBAAkB;IAkB1B,OAAO,CAAC,mBAAmB;IAoC3B,OAAO,CAAC,YAAY;IAQpB,OAAO,CAAC,MAAM;IAWd,OAAO,CAAC,SAAS;IAiBjB,OAAO,CAAC,QAAQ;IAOhB,OAAO,CAAC,2BAA2B;IAOnC,OAAO,CAAC,cAAc;IAGtB,OAAO,CAAC,QAAQ;IAsDhB,OAAO,CAAC,eAAe;IAIvB,OAAO,CAAC,UAAU;IAIlB,OAAO,CAAC,QAAQ;IAKhB,OAAO,CAAC,QAAQ;CAqBhB"}
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
import { HtmlNode, makeErrorHtmlNode } from './HtmlNode.js';
|
|
2
|
+
import { isVoidNode } from './isVoidNode.js';
|
|
3
|
+
import { getLast, hasElements, unquote } from './lib.js';
|
|
4
|
+
import { ParserNode, stubParserNode } from './ParserNode.js';
|
|
5
|
+
import { getTagRegex } from './tagRegex.js';
|
|
6
|
+
const tagRegex = /<\/?[a-z][a-z0-9_-]*/g;
|
|
7
|
+
const attrRegex = /[a-z][a-z0-9_-]*/g;
|
|
8
|
+
const equalSignRegex = /\s*=\s*/g;
|
|
9
|
+
const quotedAttrValueRegex = /"[^"]*"/g;
|
|
10
|
+
const attrValueRegex = /[^ >"]+/g;
|
|
11
|
+
const defaultHtmlParserOptions = {};
|
|
12
|
+
export class HtmlParser {
|
|
13
|
+
html;
|
|
14
|
+
nodes = [];
|
|
15
|
+
htmlNodes = [];
|
|
16
|
+
parents = [];
|
|
17
|
+
currentNode = null;
|
|
18
|
+
currentTag = '';
|
|
19
|
+
currentTagIsClosed = true;
|
|
20
|
+
currentNodeIndex = 0;
|
|
21
|
+
index = 0;
|
|
22
|
+
skipTags = [];
|
|
23
|
+
ignoreAttributes = {};
|
|
24
|
+
constructor(html, options = defaultHtmlParserOptions) {
|
|
25
|
+
this.html = html;
|
|
26
|
+
if (options &&
|
|
27
|
+
'skipNodeTypes' in options &&
|
|
28
|
+
Array.isArray(options.skipNodeTypes)) {
|
|
29
|
+
this.skipTags = options.skipNodeTypes;
|
|
30
|
+
}
|
|
31
|
+
if (options &&
|
|
32
|
+
'ignoreAttributes' in options &&
|
|
33
|
+
Array.isArray(options.ignoreAttributes)) {
|
|
34
|
+
for (const attr of options.ignoreAttributes) {
|
|
35
|
+
this.ignoreAttributes[attr] = true;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
parse() {
|
|
40
|
+
while (this.nextNode()) { }
|
|
41
|
+
const nodeCount = this.nodes.length;
|
|
42
|
+
if (nodeCount > 0) {
|
|
43
|
+
const nodes = new Array(nodeCount);
|
|
44
|
+
for (let i = 0; i < nodeCount; i++)
|
|
45
|
+
nodes[i] = new HtmlNode(this.nodes[i], nodes);
|
|
46
|
+
this.htmlNodes = nodes;
|
|
47
|
+
}
|
|
48
|
+
if (this.htmlNodes.length > 0) {
|
|
49
|
+
return this.htmlNodes[0];
|
|
50
|
+
}
|
|
51
|
+
return makeErrorHtmlNode('Unknown error');
|
|
52
|
+
}
|
|
53
|
+
nextNode() {
|
|
54
|
+
if (!this.moveCursorToNextTag())
|
|
55
|
+
return false;
|
|
56
|
+
if (!this.setTag())
|
|
57
|
+
return false;
|
|
58
|
+
if (this.currentTagIsClosed) {
|
|
59
|
+
if (!this.moveCursorAfterCloseBracket())
|
|
60
|
+
return this.setError('Cannot find closing bracket');
|
|
61
|
+
this.exitNode();
|
|
62
|
+
return true;
|
|
63
|
+
}
|
|
64
|
+
if (this.shouldSkipNode())
|
|
65
|
+
return this.skipNode();
|
|
66
|
+
this.enterNode();
|
|
67
|
+
if (!this.setAttrs())
|
|
68
|
+
return false;
|
|
69
|
+
if (this.isStyleOrScript()) {
|
|
70
|
+
const result = this.parseStyleOrScript();
|
|
71
|
+
this.exitNode();
|
|
72
|
+
return result;
|
|
73
|
+
}
|
|
74
|
+
if (this.isVoidNode())
|
|
75
|
+
this.exitNode();
|
|
76
|
+
return true;
|
|
77
|
+
}
|
|
78
|
+
parseStyleOrScript() {
|
|
79
|
+
const endTag = this.currentTag === 'style' ? '</style>' : '</script>';
|
|
80
|
+
const textStartIndex = this.index;
|
|
81
|
+
const textEndIndex = this.html.indexOf(endTag, textStartIndex);
|
|
82
|
+
if (textEndIndex < 0)
|
|
83
|
+
return false;
|
|
84
|
+
if (this.currentNode) {
|
|
85
|
+
const tmpIndex = this.index;
|
|
86
|
+
this.index = textStartIndex;
|
|
87
|
+
this.index = textEndIndex;
|
|
88
|
+
this.index = tmpIndex;
|
|
89
|
+
this.currentNode.addTextContent(this.html.slice(textStartIndex, textEndIndex));
|
|
90
|
+
}
|
|
91
|
+
this.index = textEndIndex + endTag.length;
|
|
92
|
+
return true;
|
|
93
|
+
}
|
|
94
|
+
moveCursorToNextTag() {
|
|
95
|
+
let startIndex = this.index;
|
|
96
|
+
let success = true;
|
|
97
|
+
do {
|
|
98
|
+
this.index = this.html.indexOf('<', this.index);
|
|
99
|
+
if (this.index < 0) {
|
|
100
|
+
success = false;
|
|
101
|
+
break;
|
|
102
|
+
}
|
|
103
|
+
else if (this.html[this.index + 1] !== '!') {
|
|
104
|
+
break;
|
|
105
|
+
}
|
|
106
|
+
else if (this.html.slice(this.index + 2, this.index + 4) === '--') {
|
|
107
|
+
if (this.currentNode)
|
|
108
|
+
this.currentNode.addTextContent(this.html.slice(startIndex, this.index));
|
|
109
|
+
if (!this.parseComment())
|
|
110
|
+
return false;
|
|
111
|
+
startIndex = this.index;
|
|
112
|
+
}
|
|
113
|
+
else {
|
|
114
|
+
this.index++;
|
|
115
|
+
}
|
|
116
|
+
} while (success);
|
|
117
|
+
if (success) {
|
|
118
|
+
if (this.currentNode) {
|
|
119
|
+
const tmpIndex = this.index;
|
|
120
|
+
this.index = startIndex;
|
|
121
|
+
this.index = tmpIndex;
|
|
122
|
+
this.currentNode.addTextContent(this.html.slice(startIndex, this.index));
|
|
123
|
+
}
|
|
124
|
+
this.index++;
|
|
125
|
+
}
|
|
126
|
+
return success;
|
|
127
|
+
}
|
|
128
|
+
parseComment() {
|
|
129
|
+
const textStartIndex = this.index;
|
|
130
|
+
const textEndIndex = this.html.indexOf('-->', textStartIndex);
|
|
131
|
+
if (textEndIndex < 0)
|
|
132
|
+
return false;
|
|
133
|
+
this.index = textEndIndex + 3;
|
|
134
|
+
return true;
|
|
135
|
+
}
|
|
136
|
+
setTag() {
|
|
137
|
+
const startIndex = this.index;
|
|
138
|
+
tagRegex.lastIndex = startIndex - 1;
|
|
139
|
+
const tagMatch = tagRegex.exec(this.html);
|
|
140
|
+
if (!tagMatch)
|
|
141
|
+
return false;
|
|
142
|
+
this.currentTagIsClosed = tagMatch[0][1] === '/';
|
|
143
|
+
this.currentTag = tagMatch[0].slice(this.currentTagIsClosed ? 2 : 1);
|
|
144
|
+
this.index = startIndex + this.currentTag.length;
|
|
145
|
+
return true;
|
|
146
|
+
}
|
|
147
|
+
enterNode() {
|
|
148
|
+
this.currentNodeIndex = this.nodes.length;
|
|
149
|
+
const parentIndex = hasElements(this.parents)
|
|
150
|
+
? this.parents[this.parents.length - 1]
|
|
151
|
+
: null;
|
|
152
|
+
const node = new ParserNode(this.currentTag, this.currentNodeIndex, parentIndex, this.nodes, this.ignoreAttributes);
|
|
153
|
+
this.currentNode = node;
|
|
154
|
+
this.nodes.push(node);
|
|
155
|
+
this.parents.push(this.currentNodeIndex);
|
|
156
|
+
}
|
|
157
|
+
exitNode() {
|
|
158
|
+
this.parents.pop();
|
|
159
|
+
this.currentNode = hasElements(this.parents)
|
|
160
|
+
? this.nodes[getLast(this.parents)]
|
|
161
|
+
: stubParserNode;
|
|
162
|
+
}
|
|
163
|
+
moveCursorAfterCloseBracket() {
|
|
164
|
+
this.index = this.html.indexOf('>', this.index);
|
|
165
|
+
if (this.index < 0)
|
|
166
|
+
return false;
|
|
167
|
+
this.index++;
|
|
168
|
+
return true;
|
|
169
|
+
}
|
|
170
|
+
shouldSkipNode() {
|
|
171
|
+
return this.skipTags.includes(this.currentTag);
|
|
172
|
+
}
|
|
173
|
+
setAttrs() {
|
|
174
|
+
const endIndex = this.html.indexOf('>', this.index);
|
|
175
|
+
if (endIndex < 0)
|
|
176
|
+
return false;
|
|
177
|
+
while (this.index < endIndex) {
|
|
178
|
+
attrRegex.lastIndex = this.index;
|
|
179
|
+
const attrMatch = attrRegex.exec(this.html);
|
|
180
|
+
if (!attrMatch || attrMatch.index > endIndex) {
|
|
181
|
+
this.index = endIndex + 1;
|
|
182
|
+
return true;
|
|
183
|
+
}
|
|
184
|
+
this.index = attrMatch.index + attrMatch[0].length;
|
|
185
|
+
const attr = this.html.slice(attrMatch.index, this.index);
|
|
186
|
+
equalSignRegex.lastIndex = this.index;
|
|
187
|
+
const equalSignMatch = equalSignRegex.exec(this.html);
|
|
188
|
+
if (!equalSignMatch ||
|
|
189
|
+
equalSignMatch.index !== this.index ||
|
|
190
|
+
equalSignMatch.index > endIndex) {
|
|
191
|
+
if (this.currentNode)
|
|
192
|
+
this.currentNode.addAttr(attr, attr);
|
|
193
|
+
this.index++;
|
|
194
|
+
}
|
|
195
|
+
else {
|
|
196
|
+
this.index = equalSignMatch.index + equalSignMatch[0].length;
|
|
197
|
+
const isQuoted = this.html[this.index] === '"';
|
|
198
|
+
if (isQuoted) {
|
|
199
|
+
quotedAttrValueRegex.lastIndex = this.index;
|
|
200
|
+
const valueMatch = quotedAttrValueRegex.exec(this.html);
|
|
201
|
+
if (!valueMatch ||
|
|
202
|
+
valueMatch.index > endIndex ||
|
|
203
|
+
valueMatch.index + valueMatch[0].length > endIndex)
|
|
204
|
+
return false;
|
|
205
|
+
if (this.currentNode)
|
|
206
|
+
this.currentNode.addAttr(attr, unquote(valueMatch[0]));
|
|
207
|
+
this.index = valueMatch.index + valueMatch[0].length + 1;
|
|
208
|
+
}
|
|
209
|
+
else {
|
|
210
|
+
attrValueRegex.lastIndex = this.index;
|
|
211
|
+
const valueMatch = attrValueRegex.exec(this.html);
|
|
212
|
+
if (!valueMatch ||
|
|
213
|
+
valueMatch.index > endIndex ||
|
|
214
|
+
valueMatch.index + valueMatch[0].length > endIndex)
|
|
215
|
+
return false;
|
|
216
|
+
if (this.currentNode)
|
|
217
|
+
this.currentNode.addAttr(attr, valueMatch[0]);
|
|
218
|
+
this.index = valueMatch.index + valueMatch[0].length + 1;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
this.index = endIndex + 1;
|
|
223
|
+
return true;
|
|
224
|
+
}
|
|
225
|
+
isStyleOrScript() {
|
|
226
|
+
return this.currentTag === 'style' || this.currentTag === 'script';
|
|
227
|
+
}
|
|
228
|
+
isVoidNode() {
|
|
229
|
+
return isVoidNode(this.currentTag);
|
|
230
|
+
}
|
|
231
|
+
setError(errorMessage) {
|
|
232
|
+
this.htmlNodes[0] = makeErrorHtmlNode(errorMessage);
|
|
233
|
+
return false;
|
|
234
|
+
}
|
|
235
|
+
skipNode() {
|
|
236
|
+
let depth = 0;
|
|
237
|
+
const tagRegex = getTagRegex(this.currentTag);
|
|
238
|
+
while (true) {
|
|
239
|
+
tagRegex.lastIndex = this.index;
|
|
240
|
+
const match = tagRegex.exec(this.html);
|
|
241
|
+
if (!match)
|
|
242
|
+
return false;
|
|
243
|
+
const matchStr = match[0];
|
|
244
|
+
this.index = tagRegex.lastIndex;
|
|
245
|
+
if (!this.moveCursorAfterCloseBracket())
|
|
246
|
+
return false;
|
|
247
|
+
if (matchStr[1] === '/') {
|
|
248
|
+
if (depth === 0) {
|
|
249
|
+
return true;
|
|
250
|
+
}
|
|
251
|
+
else {
|
|
252
|
+
depth--;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
else {
|
|
256
|
+
depth++;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
//# sourceMappingURL=HtmlParser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HtmlParser.js","sourceRoot":"","sources":["../src/HtmlParser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAC5D,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,UAAU,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAC7D,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAO5C,MAAM,QAAQ,GAAG,uBAAuB,CAAC;AACzC,MAAM,SAAS,GAAG,mBAAmB,CAAC;AACtC,MAAM,cAAc,GAAG,UAAU,CAAC;AAClC,MAAM,oBAAoB,GAAG,UAAU,CAAC;AACxC,MAAM,cAAc,GAAG,UAAU,CAAC;AAElC,MAAM,wBAAwB,GAAsB,EAAE,CAAC;AACvD,MAAM,OAAO,UAAU;IAab;IAZD,KAAK,GAAiB,EAAE,CAAC;IACzB,SAAS,GAAe,EAAE,CAAC;IAC3B,OAAO,GAAa,EAAE,CAAC;IACvB,WAAW,GAAsB,IAAI,CAAC;IACtC,UAAU,GAAW,EAAE,CAAC;IACxB,kBAAkB,GAAY,IAAI,CAAC;IACnC,gBAAgB,GAAW,CAAC,CAAC;IAC7B,KAAK,GAAW,CAAC,CAAC;IAClB,QAAQ,GAAa,EAAE,CAAC;IACxB,gBAAgB,GAA4B,EAAE,CAAC;IAEvD,YACS,IAAY,EACpB,UAA6B,wBAAwB;QAD7C,SAAI,GAAJ,IAAI,CAAQ;QAGpB,IACC,OAAO;YACP,eAAe,IAAI,OAAO;YAC1B,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,EACnC,CAAC;YACF,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,aAAa,CAAC;QACvC,CAAC;QACD,IACC,OAAO;YACP,kBAAkB,IAAI,OAAO;YAC7B,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,gBAAgB,CAAC,EACtC,CAAC;YACF,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,gBAAgB,EAAE,CAAC;gBAC7C,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;YACpC,CAAC;QACF,CAAC;IACF,CAAC;IAEM,KAAK;QACX,OAAO,IAAI,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA,CAAC;QAC1B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;QACpC,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YACnB,MAAM,KAAK,GAAe,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC;YAC/C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE;gBACjC,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;YAC/C,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;QACxB,CAAC;QACD,IAAI,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/B,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;QAC1B,CAAC;QACD,OAAO,iBAAiB,CAAC,eAAe,CAAC,CAAC;IAC3C,CAAC;IAEO,QAAQ;QACf,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE;YAAE,OAAO,KAAK,CAAC;QAC9C,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAAE,OAAO,KAAK,CAAC;QACjC,IAAI,IAAI,CAAC,kBAAkB,EAAE,CAAC;YAC7B,IAAI,CAAC,IAAI,CAAC,2BAA2B,EAAE;gBACtC,OAAO,IAAI,CAAC,QAAQ,CAAC,6BAA6B,CAAC,CAAC;YACrD,IAAI,CAAC,QAAQ,EAAE,CAAC;YAChB,OAAO,IAAI,CAAC;QACb,CAAC;QACD,IAAI,IAAI,CAAC,cAAc,EAAE;YAAE,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAC;QAElD,IAAI,CAAC,SAAS,EAAE,CAAC;QAEjB,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE;YAAE,OAAO,KAAK,CAAC;QAEnC,IAAI,IAAI,CAAC,eAAe,EAAE,EAAE,CAAC;YAC5B,MAAM,MAAM,GAAG,IAAI,CAAC,kBAAkB,EAAE,CAAC;YACzC,IAAI,CAAC,QAAQ,EAAE,CAAC;YAChB,OAAO,MAAM,CAAC;QACf,CAAC;QAED,IAAI,IAAI,CAAC,UAAU,EAAE;YAAE,IAAI,CAAC,QAAQ,EAAE,CAAC;QAEvC,OAAO,IAAI,CAAC;IACb,CAAC;IAEO,kBAAkB;QACzB,MAAM,MAAM,GAAG,IAAI,CAAC,UAAU,KAAK,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,WAAW,CAAC;QACtE,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC;QAClC,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;QAC/D,IAAI,YAAY,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QACnC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC;YAC5B,IAAI,CAAC,KAAK,GAAG,cAAc,CAAC;YAC5B,IAAI,CAAC,KAAK,GAAG,YAAY,CAAC;YAC1B,IAAI,CAAC,KAAK,GAAG,QAAQ,CAAC;YACtB,IAAI,CAAC,WAAW,CAAC,cAAc,CAC9B,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,cAAc,EAAE,YAAY,CAAC,CAC7C,CAAC;QACH,CAAC;QACD,IAAI,CAAC,KAAK,GAAG,YAAY,GAAG,MAAM,CAAC,MAAM,CAAC;QAC1C,OAAO,IAAI,CAAC;IACb,CAAC;IAEO,mBAAmB;QAC1B,IAAI,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC;QAC5B,IAAI,OAAO,GAAG,IAAI,CAAC;QAEnB,GAAG,CAAC;YACH,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;YAChD,IAAI,IAAI,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC;gBACpB,OAAO,GAAG,KAAK,CAAC;gBAChB,MAAM;YACP,CAAC;iBAAM,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;gBAC9C,MAAM;YACP,CAAC;iBAAM,IAAI,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;gBACrE,IAAI,IAAI,CAAC,WAAW;oBACnB,IAAI,CAAC,WAAW,CAAC,cAAc,CAC9B,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,CACvC,CAAC;gBACH,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE;oBAAE,OAAO,KAAK,CAAC;gBACvC,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC;YACzB,CAAC;iBAAM,CAAC;gBACP,IAAI,CAAC,KAAK,EAAE,CAAC;YACd,CAAC;QACF,CAAC,QAAQ,OAAO,EAAE;QAClB,IAAI,OAAO,EAAE,CAAC;YACb,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;gBACtB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC;gBAC5B,IAAI,CAAC,KAAK,GAAG,UAAU,CAAC;gBACxB,IAAI,CAAC,KAAK,GAAG,QAAQ,CAAC;gBACtB,IAAI,CAAC,WAAW,CAAC,cAAc,CAC9B,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,CACvC,CAAC;YACH,CAAC;YACD,IAAI,CAAC,KAAK,EAAE,CAAC;QACd,CAAC;QACD,OAAO,OAAO,CAAC;IAChB,CAAC;IAEO,YAAY;QACnB,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC;QAClC,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,cAAc,CAAC,CAAC;QAC9D,IAAI,YAAY,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QACnC,IAAI,CAAC,KAAK,GAAG,YAAY,GAAG,CAAC,CAAC;QAC9B,OAAO,IAAI,CAAC;IACb,CAAC;IAEO,MAAM;QACb,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC;QAC9B,QAAQ,CAAC,SAAS,GAAG,UAAU,GAAG,CAAC,CAAC;QACpC,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1C,IAAI,CAAC,QAAQ;YAAE,OAAO,KAAK,CAAC;QAC5B,IAAI,CAAC,kBAAkB,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC;QACjD,IAAI,CAAC,UAAU,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrE,IAAI,CAAC,KAAK,GAAG,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;QACjD,OAAO,IAAI,CAAC;IACb,CAAC;IAEO,SAAS;QAChB,IAAI,CAAC,gBAAgB,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;QAC1C,MAAM,WAAW,GAAG,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC;YAC5C,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;YACvC,CAAC,CAAC,IAAI,CAAC;QACR,MAAM,IAAI,GAAG,IAAI,UAAU,CAC1B,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,gBAAgB,EACrB,WAAW,EACX,IAAI,CAAC,KAAK,EACV,IAAI,CAAC,gBAAgB,CACrB,CAAC;QACF,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QACxB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACtB,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IAC1C,CAAC;IAEO,QAAQ;QACf,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC;QACnB,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC;YAC3C,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACnC,CAAC,CAAC,cAAc,CAAC;IACnB,CAAC;IAEO,2BAA2B;QAClC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;QAChD,IAAI,IAAI,CAAC,KAAK,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QACjC,IAAI,CAAC,KAAK,EAAE,CAAC;QACb,OAAO,IAAI,CAAC;IACb,CAAC;IAEO,cAAc;QACrB,OAAO,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAChD,CAAC;IACO,QAAQ;QACf,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;QACpD,IAAI,QAAQ,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QAC/B,OAAO,IAAI,CAAC,KAAK,GAAG,QAAQ,EAAE,CAAC;YAC9B,SAAS,CAAC,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC;YACjC,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC5C,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,KAAK,GAAG,QAAQ,EAAE,CAAC;gBAC9C,IAAI,CAAC,KAAK,GAAG,QAAQ,GAAG,CAAC,CAAC;gBAC1B,OAAO,IAAI,CAAC;YACb,CAAC;YACD,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;YACnD,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;YAC1D,cAAc,CAAC,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC;YACtC,MAAM,cAAc,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACtD,IACC,CAAC,cAAc;gBACf,cAAc,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK;gBACnC,cAAc,CAAC,KAAK,GAAG,QAAQ,EAC9B,CAAC;gBACF,IAAI,IAAI,CAAC,WAAW;oBAAE,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;gBAC3D,IAAI,CAAC,KAAK,EAAE,CAAC;YACd,CAAC;iBAAM,CAAC;gBACP,IAAI,CAAC,KAAK,GAAG,cAAc,CAAC,KAAK,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;gBAC7D,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC;gBAC/C,IAAI,QAAQ,EAAE,CAAC;oBACd,oBAAoB,CAAC,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC;oBAC5C,MAAM,UAAU,GAAG,oBAAoB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACxD,IACC,CAAC,UAAU;wBACX,UAAU,CAAC,KAAK,GAAG,QAAQ;wBAC3B,UAAU,CAAC,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,QAAQ;wBAElD,OAAO,KAAK,CAAC;oBACd,IAAI,IAAI,CAAC,WAAW;wBACnB,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;oBACxD,IAAI,CAAC,KAAK,GAAG,UAAU,CAAC,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;gBAC1D,CAAC;qBAAM,CAAC;oBACP,cAAc,CAAC,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC;oBACtC,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBAClD,IACC,CAAC,UAAU;wBACX,UAAU,CAAC,KAAK,GAAG,QAAQ;wBAC3B,UAAU,CAAC,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,QAAQ;wBAElD,OAAO,KAAK,CAAC;oBACd,IAAI,IAAI,CAAC,WAAW;wBAAE,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;oBACpE,IAAI,CAAC,KAAK,GAAG,UAAU,CAAC,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;gBAC1D,CAAC;YACF,CAAC;QACF,CAAC;QACD,IAAI,CAAC,KAAK,GAAG,QAAQ,GAAG,CAAC,CAAC;QAC1B,OAAO,IAAI,CAAC;IACb,CAAC;IAEO,eAAe;QACtB,OAAO,IAAI,CAAC,UAAU,KAAK,OAAO,IAAI,IAAI,CAAC,UAAU,KAAK,QAAQ,CAAC;IACpE,CAAC;IAEO,UAAU;QACjB,OAAO,UAAU,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACpC,CAAC;IAEO,QAAQ,CAAC,YAAoB;QACpC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,iBAAiB,CAAC,YAAY,CAAC,CAAC;QACpD,OAAO,KAAK,CAAC;IACd,CAAC;IAEO,QAAQ;QACf,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC9C,OAAO,IAAI,EAAE,CAAC;YACb,QAAQ,CAAC,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC;YAChC,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACvC,IAAI,CAAC,KAAK;gBAAE,OAAO,KAAK,CAAC;YACzB,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAC1B,IAAI,CAAC,KAAK,GAAG,QAAQ,CAAC,SAAS,CAAC;YAChC,IAAI,CAAC,IAAI,CAAC,2BAA2B,EAAE;gBAAE,OAAO,KAAK,CAAC;YACtD,IAAI,QAAQ,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;gBACzB,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;oBACjB,OAAO,IAAI,CAAC;gBACb,CAAC;qBAAM,CAAC;oBACP,KAAK,EAAE,CAAC;gBACT,CAAC;YACF,CAAC;iBAAM,CAAC;gBACP,KAAK,EAAE,CAAC;YACT,CAAC;QACF,CAAC;IACF,CAAC;CACD"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export declare class ParserNode {
|
|
2
|
+
tag: string;
|
|
3
|
+
index: number;
|
|
4
|
+
parent: number | null;
|
|
5
|
+
allNodesRef: ParserNode[];
|
|
6
|
+
private ignoreAttributesHash;
|
|
7
|
+
isTextNode: boolean;
|
|
8
|
+
children: number[];
|
|
9
|
+
classes: string[];
|
|
10
|
+
attrs: Record<string, string>;
|
|
11
|
+
id: string | null;
|
|
12
|
+
textContent: string | null;
|
|
13
|
+
constructor(tag: string, index: number, parent: number | null, allNodesRef: ParserNode[], ignoreAttributesHash: Record<string, boolean>, isTextNode?: boolean);
|
|
14
|
+
addChild(nodeIndex: number): void;
|
|
15
|
+
addClass(className: string): void;
|
|
16
|
+
addAttr(attribute: string, value: string): void;
|
|
17
|
+
addTextContent(text: string): void;
|
|
18
|
+
}
|
|
19
|
+
export declare const stubParserNode: ParserNode;
|
|
20
|
+
//# sourceMappingURL=ParserNode.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ParserNode.d.ts","sourceRoot":"","sources":["../src/ParserNode.ts"],"names":[],"mappings":"AAAA,qBAAa,UAAU;IAQd,GAAG,EAAE,MAAM;IACX,KAAK,EAAE,MAAM;IACb,MAAM,EAAE,MAAM,GAAG,IAAI;IACrB,WAAW,EAAE,UAAU,EAAE;IAChC,OAAO,CAAC,oBAAoB;IACrB,UAAU,EAAE,OAAO;IAZpB,QAAQ,EAAE,MAAM,EAAE,CAAM;IACxB,OAAO,EAAE,MAAM,EAAE,CAAM;IACvB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAM;IACnC,EAAE,EAAE,MAAM,GAAG,IAAI,CAAQ;IACzB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;gBAGjC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,GAAG,IAAI,EACrB,WAAW,EAAE,UAAU,EAAE,EACxB,oBAAoB,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAC9C,UAAU,GAAE,OAAe;IAO5B,QAAQ,CAAC,SAAS,EAAE,MAAM;IAI1B,QAAQ,CAAC,SAAS,EAAE,MAAM;IAI1B,OAAO,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM;IAkBxC,cAAc,CAAC,IAAI,EAAE,MAAM;CAclC;AAED,eAAO,MAAM,cAAc,YAAsC,CAAC"}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
export class ParserNode {
|
|
2
|
+
tag;
|
|
3
|
+
index;
|
|
4
|
+
parent;
|
|
5
|
+
allNodesRef;
|
|
6
|
+
ignoreAttributesHash;
|
|
7
|
+
isTextNode;
|
|
8
|
+
children = [];
|
|
9
|
+
classes = [];
|
|
10
|
+
attrs = {};
|
|
11
|
+
id = null;
|
|
12
|
+
textContent = null;
|
|
13
|
+
constructor(tag, index, parent, allNodesRef, ignoreAttributesHash, isTextNode = false) {
|
|
14
|
+
this.tag = tag;
|
|
15
|
+
this.index = index;
|
|
16
|
+
this.parent = parent;
|
|
17
|
+
this.allNodesRef = allNodesRef;
|
|
18
|
+
this.ignoreAttributesHash = ignoreAttributesHash;
|
|
19
|
+
this.isTextNode = isTextNode;
|
|
20
|
+
if (parent !== null) {
|
|
21
|
+
this.allNodesRef[parent].addChild(this.index);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
addChild(nodeIndex) {
|
|
25
|
+
this.children.push(nodeIndex);
|
|
26
|
+
}
|
|
27
|
+
addClass(className) {
|
|
28
|
+
this.classes.push(className);
|
|
29
|
+
}
|
|
30
|
+
addAttr(attribute, value) {
|
|
31
|
+
const attr = attribute.toLowerCase();
|
|
32
|
+
if (this.ignoreAttributesHash[attr])
|
|
33
|
+
return;
|
|
34
|
+
switch (attr) {
|
|
35
|
+
case 'class':
|
|
36
|
+
for (const className of value.trim().split(' '))
|
|
37
|
+
this.classes.push(className);
|
|
38
|
+
return;
|
|
39
|
+
case 'id':
|
|
40
|
+
this.id = value;
|
|
41
|
+
return;
|
|
42
|
+
default:
|
|
43
|
+
this.attrs[attr] = value;
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
addTextContent(text) {
|
|
48
|
+
const index = this.allNodesRef.length;
|
|
49
|
+
const textNode = new ParserNode('', index, this.index, this.allNodesRef, this.ignoreAttributesHash, true);
|
|
50
|
+
this.allNodesRef.push(textNode);
|
|
51
|
+
this.children.push(index);
|
|
52
|
+
textNode.textContent = text;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
export const stubParserNode = new ParserNode('', 0, null, [], {});
|
|
56
|
+
//# sourceMappingURL=ParserNode.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ParserNode.js","sourceRoot":"","sources":["../src/ParserNode.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,UAAU;IAQd;IACA;IACA;IACA;IACC;IACD;IAZD,QAAQ,GAAa,EAAE,CAAC;IACxB,OAAO,GAAa,EAAE,CAAC;IACvB,KAAK,GAA2B,EAAE,CAAC;IACnC,EAAE,GAAkB,IAAI,CAAC;IACzB,WAAW,GAAkB,IAAI,CAAC;IAEzC,YACQ,GAAW,EACX,KAAa,EACb,MAAqB,EACrB,WAAyB,EACxB,oBAA6C,EAC9C,aAAsB,KAAK;QAL3B,QAAG,GAAH,GAAG,CAAQ;QACX,UAAK,GAAL,KAAK,CAAQ;QACb,WAAM,GAAN,MAAM,CAAe;QACrB,gBAAW,GAAX,WAAW,CAAc;QACxB,yBAAoB,GAApB,oBAAoB,CAAyB;QAC9C,eAAU,GAAV,UAAU,CAAiB;QAElC,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;YACrB,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC/C,CAAC;IACF,CAAC;IAEM,QAAQ,CAAC,SAAiB;QAChC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC/B,CAAC;IAEM,QAAQ,CAAC,SAAiB;QAChC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC9B,CAAC;IAEM,OAAO,CAAC,SAAiB,EAAE,KAAa;QAC9C,MAAM,IAAI,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC;QACrC,IAAI,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC;YAAE,OAAO;QAC5C,QAAQ,IAAI,EAAE,CAAC;YACd,KAAK,OAAO;gBACX,KAAK,MAAM,SAAS,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC;oBAC9C,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAE9B,OAAO;YACR,KAAK,IAAI;gBACR,IAAI,CAAC,EAAE,GAAG,KAAK,CAAC;gBAChB,OAAO;YACR;gBACC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC;gBACzB,OAAO;QACT,CAAC;IACF,CAAC;IAEM,cAAc,CAAC,IAAY;QACjC,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC;QACtC,MAAM,QAAQ,GAAG,IAAI,UAAU,CAC9B,EAAE,EACF,KAAK,EACL,IAAI,CAAC,KAAK,EACV,IAAI,CAAC,WAAW,EAChB,IAAI,CAAC,oBAAoB,EACzB,IAAI,CACJ,CAAC;QACF,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC1B,QAAQ,CAAC,WAAW,GAAG,IAAI,CAAC;IAC7B,CAAC;CACD;AAED,MAAM,CAAC,MAAM,cAAc,GAAG,IAAI,UAAU,CAAC,EAAE,EAAE,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Selector.d.ts","sourceRoot":"","sources":["../src/Selector.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,QAAQ,GACjB;IACA,IAAI,EAAE,IAAI,GAAG,OAAO,GAAG,KAAK,CAAC;IAC7B,KAAK,EAAE,MAAM,CAAC;CACb,GACD;IACA,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;CACb,CAAC"}
|
package/dist/Selector.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Selector.js","sourceRoot":"","sources":["../src/Selector.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"isVoidNode.d.ts","sourceRoot":"","sources":["../src/isVoidNode.ts"],"names":[],"mappings":"AAiBA,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAE/C"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
const voidNodes = [
|
|
2
|
+
'area',
|
|
3
|
+
'base',
|
|
4
|
+
'br',
|
|
5
|
+
'col',
|
|
6
|
+
'embed',
|
|
7
|
+
'hr',
|
|
8
|
+
'img',
|
|
9
|
+
'input',
|
|
10
|
+
'link',
|
|
11
|
+
'meta',
|
|
12
|
+
'param',
|
|
13
|
+
'source',
|
|
14
|
+
'track',
|
|
15
|
+
'wbr',
|
|
16
|
+
];
|
|
17
|
+
export function isVoidNode(tag) {
|
|
18
|
+
return voidNodes.includes(tag);
|
|
19
|
+
}
|
|
20
|
+
//# sourceMappingURL=isVoidNode.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"isVoidNode.js","sourceRoot":"","sources":["../src/isVoidNode.ts"],"names":[],"mappings":"AAAA,MAAM,SAAS,GAAG;IACjB,MAAM;IACN,MAAM;IACN,IAAI;IACJ,KAAK;IACL,OAAO;IACP,IAAI;IACJ,KAAK;IACL,OAAO;IACP,MAAM;IACN,MAAM;IACN,OAAO;IACP,QAAQ;IACR,OAAO;IACP,KAAK;CACL,CAAC;AAEF,MAAM,UAAU,UAAU,CAAC,GAAW;IACrC,OAAO,SAAS,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;AAChC,CAAC"}
|
package/dist/lib.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"lib.d.ts","sourceRoot":"","sources":["../src/lib.ts"],"names":[],"mappings":"AAAA,wBAAgB,WAAW,CAAC,GAAG,EAAE,OAAO,EAAE,GAAG,OAAO,CAEnD;AAED,wBAAgB,OAAO,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,CAAC,CAEtC;AAED,wBAAgB,OAAO,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAE3C"}
|
package/dist/lib.js
ADDED
package/dist/lib.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"lib.js","sourceRoot":"","sources":["../src/lib.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,WAAW,CAAC,GAAc;IACzC,OAAO,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC;AACvB,CAAC;AAED,MAAM,UAAU,OAAO,CAAI,GAAQ;IAClC,OAAO,GAAG,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AAC5B,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,GAAW;IAClC,OAAO,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACrC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parseHtml.d.ts","sourceRoot":"","sources":["../src/parseHtml.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAc,KAAK,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AAErE,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,QAAQ,CAE7E"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parseHtml.js","sourceRoot":"","sources":["../src/parseHtml.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAA0B,MAAM,iBAAiB,CAAC;AAErE,MAAM,UAAU,SAAS,CAAC,IAAY,EAAE,OAA2B;IAClE,OAAO,IAAI,UAAU,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC;AAC9C,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parseHtml.spec.d.ts","sourceRoot":"","sources":["../src/parseHtml.spec.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { describe, expect, test } from 'vitest';
|
|
2
|
+
import { HtmlNode, stubError } from './HtmlNode.js';
|
|
3
|
+
import { parseHtml } from './parseHtml.js';
|
|
4
|
+
describe('parseHtml', () => {
|
|
5
|
+
test('parses valid html', () => {
|
|
6
|
+
const node = parseHtml(validHtml);
|
|
7
|
+
expect(node).toBeInstanceOf(HtmlNode);
|
|
8
|
+
expect(node.error).toBe(stubError);
|
|
9
|
+
});
|
|
10
|
+
test('gets all node properties', () => {
|
|
11
|
+
const node = parseHtml(validHtml);
|
|
12
|
+
expect(node.id).toBe('html');
|
|
13
|
+
expect(node.classes).toEqual(['html']);
|
|
14
|
+
expect(node.attrs.lang).toBe('en');
|
|
15
|
+
});
|
|
16
|
+
test('node can select itself', () => {
|
|
17
|
+
const node = parseHtml(validHtml);
|
|
18
|
+
expect(node.select('html')).toBe(node);
|
|
19
|
+
});
|
|
20
|
+
test('can select deeply nested node by node name', () => {
|
|
21
|
+
const node = parseHtml(validHtml);
|
|
22
|
+
const match = node.select('span');
|
|
23
|
+
expect(match).toBeInstanceOf(HtmlNode);
|
|
24
|
+
expect(match?.tag).toBe('span');
|
|
25
|
+
});
|
|
26
|
+
test('can select based on class', () => {
|
|
27
|
+
const node = parseHtml(validHtml);
|
|
28
|
+
const match = node.select('.a2');
|
|
29
|
+
expect(match).toBeInstanceOf(HtmlNode);
|
|
30
|
+
expect(match?.tag).toBe('div');
|
|
31
|
+
});
|
|
32
|
+
test('can select based on attribute', () => {
|
|
33
|
+
const node = parseHtml(validHtml);
|
|
34
|
+
const match = node.select('script[type=defer]');
|
|
35
|
+
expect(match).toBeInstanceOf(HtmlNode);
|
|
36
|
+
expect(match?.attrs.type).toBe('defer');
|
|
37
|
+
expect(match?.tag).toBe('script');
|
|
38
|
+
});
|
|
39
|
+
test('can skip tags', () => {
|
|
40
|
+
const node = parseHtml(validHtml, { skipNodeTypes: ['script', 'style'] });
|
|
41
|
+
expect(node.select('script')).toBe(null);
|
|
42
|
+
expect(node.select('style')).toBe(null);
|
|
43
|
+
});
|
|
44
|
+
});
|
|
45
|
+
const validHtml = `
|
|
46
|
+
<!DOCTYPE html>
|
|
47
|
+
<html lang="en" id="html" class="html">
|
|
48
|
+
<head>
|
|
49
|
+
<meta charset="UTF-8">
|
|
50
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
51
|
+
<title>Document</title>
|
|
52
|
+
</head>
|
|
53
|
+
<!--
|
|
54
|
+
multiline comment
|
|
55
|
+
-->
|
|
56
|
+
<body>
|
|
57
|
+
<div id="bob" class="a1 a2 a3">
|
|
58
|
+
<span class="span-class">hello</span>
|
|
59
|
+
</div>
|
|
60
|
+
<script></script>
|
|
61
|
+
<script type=defer>
|
|
62
|
+
const foo = true;
|
|
63
|
+
function bar() {
|
|
64
|
+
return !foo;
|
|
65
|
+
}
|
|
66
|
+
</script>
|
|
67
|
+
<style>
|
|
68
|
+
.body {
|
|
69
|
+
background-color: green;
|
|
70
|
+
}
|
|
71
|
+
</style>
|
|
72
|
+
</body>
|
|
73
|
+
</html>
|
|
74
|
+
|
|
75
|
+
`;
|
|
76
|
+
//# sourceMappingURL=parseHtml.spec.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parseHtml.spec.js","sourceRoot":"","sources":["../src/parseHtml.spec.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAC;AAChD,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AACpD,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAE3C,QAAQ,CAAC,WAAW,EAAE,GAAG,EAAE;IAC1B,IAAI,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAC9B,MAAM,IAAI,GAAG,SAAS,CAAC,SAAS,CAAC,CAAC;QAClC,MAAM,CAAC,IAAI,CAAC,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;QACtC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,0BAA0B,EAAE,GAAG,EAAE;QACrC,MAAM,IAAI,GAAG,SAAS,CAAC,SAAS,CAAC,CAAC;QAClC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,wBAAwB,EAAE,GAAG,EAAE;QACnC,MAAM,IAAI,GAAG,SAAS,CAAC,SAAS,CAAC,CAAC;QAClC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,4CAA4C,EAAE,GAAG,EAAE;QACvD,MAAM,IAAI,GAAG,SAAS,CAAC,SAAS,CAAC,CAAC;QAClC,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAClC,MAAM,CAAC,KAAK,CAAC,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;QACvC,MAAM,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACtC,MAAM,IAAI,GAAG,SAAS,CAAC,SAAS,CAAC,CAAC;QAClC,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACjC,MAAM,CAAC,KAAK,CAAC,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;QACvC,MAAM,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,+BAA+B,EAAE,GAAG,EAAE;QAC1C,MAAM,IAAI,GAAG,SAAS,CAAC,SAAS,CAAC,CAAC;QAClC,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC;QAChD,MAAM,CAAC,KAAK,CAAC,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;QACvC,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACxC,MAAM,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,eAAe,EAAE,GAAG,EAAE;QAC1B,MAAM,IAAI,GAAG,SAAS,CAAC,SAAS,EAAE,EAAE,aAAa,EAAE,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,CAAC,CAAC;QAC1E,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC,CAAC;AAEH,MAAM,SAAS,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA8BjB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parseSelector.d.ts","sourceRoot":"","sources":["../src/parseSelector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAoB9C,wBAAgB,aAAa,CAAC,cAAc,EAAE,MAAM,GAAG,QAAQ,EAAE,EAAE,CA6ElE"}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
const char = /[^[\] .#]+/y;
|
|
2
|
+
const space = /\s+/g;
|
|
3
|
+
const nonEquals = /[^=]+/g;
|
|
4
|
+
const nonClosing = /[^\]]+/g;
|
|
5
|
+
function unquote(str) {
|
|
6
|
+
const lastIndex = str.length - 1;
|
|
7
|
+
if (str[0] === '"') {
|
|
8
|
+
if (str[lastIndex] === '"') {
|
|
9
|
+
return str.slice(1, lastIndex);
|
|
10
|
+
}
|
|
11
|
+
else {
|
|
12
|
+
throw new Error('Malformed selector string. Unterminated quote');
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
else {
|
|
16
|
+
return str;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
export function parseSelector(selectorString) {
|
|
20
|
+
const str = selectorString.trim();
|
|
21
|
+
const selectors = [[]];
|
|
22
|
+
let depth = 0;
|
|
23
|
+
let cursor = 0;
|
|
24
|
+
const end = str.length;
|
|
25
|
+
while (cursor < end) {
|
|
26
|
+
const qualifier = str[cursor];
|
|
27
|
+
switch (qualifier) {
|
|
28
|
+
case '#':
|
|
29
|
+
case '.': {
|
|
30
|
+
char.lastIndex = cursor + 1;
|
|
31
|
+
const match = char.exec(str);
|
|
32
|
+
if (!match)
|
|
33
|
+
throw new Error('Malformed selector string. expected identifier after "#"');
|
|
34
|
+
selectors[depth].push({
|
|
35
|
+
type: qualifier === '#' ? 'id' : 'class',
|
|
36
|
+
value: match[0],
|
|
37
|
+
});
|
|
38
|
+
cursor = char.lastIndex;
|
|
39
|
+
break;
|
|
40
|
+
}
|
|
41
|
+
case ' ': {
|
|
42
|
+
depth++;
|
|
43
|
+
selectors[depth] = [];
|
|
44
|
+
space.lastIndex = cursor;
|
|
45
|
+
const match = space.exec(str);
|
|
46
|
+
if (match) {
|
|
47
|
+
cursor = space.lastIndex;
|
|
48
|
+
}
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
case '[': {
|
|
52
|
+
nonEquals.lastIndex = cursor + 1;
|
|
53
|
+
const keyMatch = nonEquals.exec(str);
|
|
54
|
+
if (!keyMatch)
|
|
55
|
+
throw new Error('Malformed selector string. expected attribute name after "["');
|
|
56
|
+
cursor = nonEquals.lastIndex;
|
|
57
|
+
if (str[cursor] !== '=')
|
|
58
|
+
throw new Error('Malformed selector string. expected "=" after attribute name');
|
|
59
|
+
nonClosing.lastIndex = cursor + 1;
|
|
60
|
+
const valueMatch = nonClosing.exec(str);
|
|
61
|
+
if (!valueMatch)
|
|
62
|
+
throw new Error('Malformed selector string. expected attribute value after "="');
|
|
63
|
+
cursor = nonClosing.lastIndex;
|
|
64
|
+
if (str[cursor] !== ']')
|
|
65
|
+
throw new Error('Malformed selector string. expected "]" after attribute value');
|
|
66
|
+
cursor += 1;
|
|
67
|
+
selectors[depth].push({
|
|
68
|
+
type: 'attr',
|
|
69
|
+
key: keyMatch[0].trim(),
|
|
70
|
+
value: unquote(valueMatch[0].trim()),
|
|
71
|
+
});
|
|
72
|
+
break;
|
|
73
|
+
}
|
|
74
|
+
default: {
|
|
75
|
+
char.lastIndex = cursor;
|
|
76
|
+
const match = char.exec(str);
|
|
77
|
+
if (!match)
|
|
78
|
+
throw new Error('Malformed selector string. expected node type');
|
|
79
|
+
selectors[depth].push({ type: 'tag', value: match[0].toLowerCase() });
|
|
80
|
+
cursor = char.lastIndex;
|
|
81
|
+
break;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
return selectors;
|
|
86
|
+
}
|
|
87
|
+
//# sourceMappingURL=parseSelector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parseSelector.js","sourceRoot":"","sources":["../src/parseSelector.ts"],"names":[],"mappings":"AAEA,MAAM,IAAI,GAAG,aAAa,CAAC;AAC3B,MAAM,KAAK,GAAG,MAAM,CAAC;AACrB,MAAM,SAAS,GAAG,QAAQ,CAAC;AAC3B,MAAM,UAAU,GAAG,SAAS,CAAC;AAE7B,SAAS,OAAO,CAAC,GAAW;IAC3B,MAAM,SAAS,GAAG,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC;IACjC,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;QACpB,IAAI,GAAG,CAAC,SAAS,CAAC,KAAK,GAAG,EAAE,CAAC;YAC5B,OAAO,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAChC,CAAC;aAAM,CAAC;YACP,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;QAClE,CAAC;IACF,CAAC;SAAM,CAAC;QACP,OAAO,GAAG,CAAC;IACZ,CAAC;AACF,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,cAAsB;IACnD,MAAM,GAAG,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC;IAClC,MAAM,SAAS,GAAiB,CAAC,EAAE,CAAC,CAAC;IACrC,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC;IACvB,OAAO,MAAM,GAAG,GAAG,EAAE,CAAC;QACrB,MAAM,SAAS,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC;QAC9B,QAAQ,SAAS,EAAE,CAAC;YACnB,KAAK,GAAG,CAAC;YACT,KAAK,GAAG,CAAC,CAAC,CAAC;gBACV,IAAI,CAAC,SAAS,GAAG,MAAM,GAAG,CAAC,CAAC;gBAC5B,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBAC7B,IAAI,CAAC,KAAK;oBACT,MAAM,IAAI,KAAK,CACd,0DAA0D,CAC1D,CAAC;gBACH,SAAS,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC;oBACrB,IAAI,EAAE,SAAS,KAAK,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO;oBACxC,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC;iBACf,CAAC,CAAC;gBACH,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC;gBACxB,MAAM;YACP,CAAC;YACD,KAAK,GAAG,CAAC,CAAC,CAAC;gBACV,KAAK,EAAE,CAAC;gBACR,SAAS,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;gBACtB,KAAK,CAAC,SAAS,GAAG,MAAM,CAAC;gBACzB,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBAC9B,IAAI,KAAK,EAAE,CAAC;oBACX,MAAM,GAAG,KAAK,CAAC,SAAS,CAAC;gBAC1B,CAAC;gBACD,MAAM;YACP,CAAC;YACD,KAAK,GAAG,CAAC,CAAC,CAAC;gBACV,SAAS,CAAC,SAAS,GAAG,MAAM,GAAG,CAAC,CAAC;gBACjC,MAAM,QAAQ,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACrC,IAAI,CAAC,QAAQ;oBACZ,MAAM,IAAI,KAAK,CACd,8DAA8D,CAC9D,CAAC;gBACH,MAAM,GAAG,SAAS,CAAC,SAAS,CAAC;gBAC7B,IAAI,GAAG,CAAC,MAAM,CAAC,KAAK,GAAG;oBACtB,MAAM,IAAI,KAAK,CACd,8DAA8D,CAC9D,CAAC;gBACH,UAAU,CAAC,SAAS,GAAG,MAAM,GAAG,CAAC,CAAC;gBAClC,MAAM,UAAU,GAAG,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACxC,IAAI,CAAC,UAAU;oBACd,MAAM,IAAI,KAAK,CACd,+DAA+D,CAC/D,CAAC;gBACH,MAAM,GAAG,UAAU,CAAC,SAAS,CAAC;gBAC9B,IAAI,GAAG,CAAC,MAAM,CAAC,KAAK,GAAG;oBACtB,MAAM,IAAI,KAAK,CACd,+DAA+D,CAC/D,CAAC;gBACH,MAAM,IAAI,CAAC,CAAC;gBACZ,SAAS,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC;oBACrB,IAAI,EAAE,MAAM;oBACZ,GAAG,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE;oBACvB,KAAK,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;iBACpC,CAAC,CAAC;gBACH,MAAM;YACP,CAAC;YACD,OAAO,CAAC,CAAC,CAAC;gBACT,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC;gBACxB,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBAC7B,IAAI,CAAC,KAAK;oBACT,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;gBAClE,SAAS,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;gBACtE,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC;gBACxB,MAAM;YACP,CAAC;QACF,CAAC;IACF,CAAC;IACD,OAAO,SAAS,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parseSelector.spec.d.ts","sourceRoot":"","sources":["../src/parseSelector.spec.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { describe, expect, test } from 'vitest';
|
|
2
|
+
import { parseSelector } from './parseSelector.js';
|
|
3
|
+
describe('parseSelector', () => {
|
|
4
|
+
test('single id', () => {
|
|
5
|
+
expect(parseSelector('#foo')).toEqual([[{ type: 'id', value: 'foo' }]]);
|
|
6
|
+
});
|
|
7
|
+
test('single class', () => {
|
|
8
|
+
expect(parseSelector('.foo')).toEqual([[{ type: 'class', value: 'foo' }]]);
|
|
9
|
+
});
|
|
10
|
+
test('single tag', () => {
|
|
11
|
+
expect(parseSelector('div')).toEqual([[{ type: 'tag', value: 'div' }]]);
|
|
12
|
+
});
|
|
13
|
+
test('single attribute', () => {
|
|
14
|
+
expect(parseSelector('[href=google.com]')).toEqual([
|
|
15
|
+
[{ type: 'attr', key: 'href', value: 'google.com' }],
|
|
16
|
+
]);
|
|
17
|
+
});
|
|
18
|
+
test('single quoted attribute', () => {
|
|
19
|
+
expect(parseSelector('[href="google.com"]')).toEqual([
|
|
20
|
+
[{ type: 'attr', key: 'href', value: 'google.com' }],
|
|
21
|
+
]);
|
|
22
|
+
});
|
|
23
|
+
test('single quoted attribute with spaces', () => {
|
|
24
|
+
expect(parseSelector('[ href = "google.com" ]')).toEqual([
|
|
25
|
+
[{ type: 'attr', key: 'href', value: 'google.com' }],
|
|
26
|
+
]);
|
|
27
|
+
});
|
|
28
|
+
test('tag, class', () => {
|
|
29
|
+
expect(parseSelector('div.foo')).toEqual([
|
|
30
|
+
[
|
|
31
|
+
{ type: 'tag', value: 'div' },
|
|
32
|
+
{ type: 'class', value: 'foo' },
|
|
33
|
+
],
|
|
34
|
+
]);
|
|
35
|
+
});
|
|
36
|
+
test('tag, class, attr', () => {
|
|
37
|
+
expect(parseSelector('div.foo[key="value with spaces ..."]')).toEqual([
|
|
38
|
+
[
|
|
39
|
+
{ type: 'tag', value: 'div' },
|
|
40
|
+
{ type: 'class', value: 'foo' },
|
|
41
|
+
{ type: 'attr', key: 'key', value: 'value with spaces ...' },
|
|
42
|
+
],
|
|
43
|
+
]);
|
|
44
|
+
});
|
|
45
|
+
test('two levels', () => {
|
|
46
|
+
expect(parseSelector('div .foo')).toEqual([
|
|
47
|
+
[{ type: 'tag', value: 'div' }],
|
|
48
|
+
[{ type: 'class', value: 'foo' }],
|
|
49
|
+
]);
|
|
50
|
+
});
|
|
51
|
+
test('three levels', () => {
|
|
52
|
+
expect(parseSelector('div .foo span')).toEqual([
|
|
53
|
+
[{ type: 'tag', value: 'div' }],
|
|
54
|
+
[{ type: 'class', value: 'foo' }],
|
|
55
|
+
[{ type: 'tag', value: 'span' }],
|
|
56
|
+
]);
|
|
57
|
+
});
|
|
58
|
+
test('throws on invalid character after # or .', () => {
|
|
59
|
+
expect(() => parseSelector('#.')).toThrow();
|
|
60
|
+
expect(() => parseSelector('# foo')).toThrow();
|
|
61
|
+
expect(() => parseSelector('.[]')).toThrow();
|
|
62
|
+
expect(() => parseSelector('. foo')).toThrow();
|
|
63
|
+
});
|
|
64
|
+
test('throws on unterminated quote', () => {
|
|
65
|
+
expect(() => parseSelector('[key="value]')).toThrow();
|
|
66
|
+
});
|
|
67
|
+
test('throws on empty attribute', () => {
|
|
68
|
+
expect(() => parseSelector('[=value]')).toThrow();
|
|
69
|
+
});
|
|
70
|
+
// TODO: add more test on edge cases with errors
|
|
71
|
+
});
|
|
72
|
+
//# sourceMappingURL=parseSelector.spec.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parseSelector.spec.js","sourceRoot":"","sources":["../src/parseSelector.spec.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAC;AAChD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAEnD,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC9B,IAAI,CAAC,WAAW,EAAE,GAAG,EAAE;QACtB,MAAM,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC;IACzE,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,cAAc,EAAE,GAAG,EAAE;QACzB,MAAM,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC;IAC5E,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,YAAY,EAAE,GAAG,EAAE;QACvB,MAAM,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC;IACzE,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,kBAAkB,EAAE,GAAG,EAAE;QAC7B,MAAM,CAAC,aAAa,CAAC,mBAAmB,CAAC,CAAC,CAAC,OAAO,CAAC;YAClD,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC;SACpD,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACpC,MAAM,CAAC,aAAa,CAAC,qBAAqB,CAAC,CAAC,CAAC,OAAO,CAAC;YACpD,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC;SACpD,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAChD,MAAM,CAAC,aAAa,CAAC,yBAAyB,CAAC,CAAC,CAAC,OAAO,CAAC;YACxD,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC;SACpD,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,YAAY,EAAE,GAAG,EAAE;QACvB,MAAM,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC;YACxC;gBACC,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE;gBAC7B,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE;aAC/B;SACD,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,kBAAkB,EAAE,GAAG,EAAE;QAC7B,MAAM,CAAC,aAAa,CAAC,sCAAsC,CAAC,CAAC,CAAC,OAAO,CAAC;YACrE;gBACC,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE;gBAC7B,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE;gBAC/B,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,KAAK,EAAE,uBAAuB,EAAE;aAC5D;SACD,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,YAAY,EAAE,GAAG,EAAE;QACvB,MAAM,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC;YACzC,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;YAC/B,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;SACjC,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,cAAc,EAAE,GAAG,EAAE;QACzB,MAAM,CAAC,aAAa,CAAC,eAAe,CAAC,CAAC,CAAC,OAAO,CAAC;YAC9C,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;YAC/B,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;YACjC,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;SAChC,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,0CAA0C,EAAE,GAAG,EAAE;QACrD,MAAM,CAAC,GAAG,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;QAC5C,MAAM,CAAC,GAAG,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;QAC/C,MAAM,CAAC,GAAG,EAAE,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;QAC7C,MAAM,CAAC,GAAG,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;IAChD,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACzC,MAAM,CAAC,GAAG,EAAE,CAAC,aAAa,CAAC,cAAc,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;IACvD,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACtC,MAAM,CAAC,GAAG,EAAE,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;IACnD,CAAC,CAAC,CAAC;IACH,gDAAgD;AACjD,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tagRegex.d.ts","sourceRoot":"","sources":["../src/tagRegex.ts"],"names":[],"mappings":"AACA,wBAAgB,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAK/C"}
|
package/dist/tagRegex.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tagRegex.js","sourceRoot":"","sources":["../src/tagRegex.ts"],"names":[],"mappings":"AAAA,MAAM,KAAK,GAA2B,EAAE,CAAC;AACzC,MAAM,UAAU,WAAW,CAAC,GAAW;IACtC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;QACjB,KAAK,CAAC,GAAG,CAAC,GAAG,IAAI,MAAM,CAAC,MAAM,GAAG,EAAE,EAAE,GAAG,CAAC,CAAC;IAC3C,CAAC;IACD,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC;AACnB,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@kksiuda/html-parser",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"private": false,
|
|
5
|
+
"description": "",
|
|
6
|
+
"main": "dist/parseHtml.js",
|
|
7
|
+
"keywords": [],
|
|
8
|
+
"files": [
|
|
9
|
+
"dist"
|
|
10
|
+
],
|
|
11
|
+
"author": "Krzysztof Siuda",
|
|
12
|
+
"repository": {
|
|
13
|
+
"type": "git",
|
|
14
|
+
"url": "https://github.com/p00dle/html-parser"
|
|
15
|
+
},
|
|
16
|
+
"type": "module",
|
|
17
|
+
"license": "MIT",
|
|
18
|
+
"devDependencies": {
|
|
19
|
+
"@biomejs/biome": "^2.4.6",
|
|
20
|
+
"@types/node": "^20.12.7",
|
|
21
|
+
"typescript": "^5.9.3",
|
|
22
|
+
"vitest": "^1.5.2"
|
|
23
|
+
},
|
|
24
|
+
"scripts": {
|
|
25
|
+
"check": "tsc --noEmit && biome check --write && vitest --run",
|
|
26
|
+
"test": "vitest --coverage",
|
|
27
|
+
"build": "tsc"
|
|
28
|
+
}
|
|
29
|
+
}
|