als-document 1.0.5-alpha → 1.0.7-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/document.js +2 -2
- package/index.js +2 -2
- package/index.mjs +2 -2
- package/package.json +1 -1
- package/src/node/node.js +1 -1
- package/src/parse/parser.js +3 -1
- package/tests/index.html +2 -2
- package/tests/parse-real.js +3 -2
package/index.mjs
CHANGED
|
@@ -21,13 +21,13 @@ function buildStyle(attributes){
|
|
|
21
21
|
const styles=attributes.style || "";
|
|
22
22
|
con
|
|
23
23
|
class NodeClassList{
|
|
24
24
|
constructor(node){ this.node=node }
|
|
25
25
|
get classes(){ return (this.node.attributes.class || "").split(" ").filter(Boolean) }
|
|
26
26
|
set classes(val){ this.node.attributes.class=val.join(" ") }
|
|
27
27
|
contains(className){ return this.classes.includes(className) }
|
|
28
28
|
add(className){
|
|
29
29
|
const currentClasses=this.classes;
|
|
30
30
|
if (!currentClasses.includes(className)) this.classes=[...currentClasses,className];
|
|
31
31
|
}
|
|
32
32
|
remove(className){ this.classes=this.classes.filter(cls=>cls!==className); }
|
|
33
33
|
toggle(className){
|
|
34
34
|
if (this.classes.includes(className)) this.remove(className);
|
|
35
35
|
else this.add(className);
|
|
36
36
|
}
|
|
37
37
|
replace(oldClass,newClass){
|
|
38
38
|
if (this.classes.includes(oldClass)){
|
|
39
39
|
this.remove(oldClass);
|
|
40
40
|
this.add(newClass);
|
|
41
41
|
}
|
|
42
42
|
}
|
|
43
43
|
}
|
|
44
44
|
function insertBefore(arr,index,newItem){
|
|
45
45
|
const existingIndex=arr.indexOf(newItem);
|
|
46
46
|
if (existingIndex!==-1) arr.splice(existingIndex,1);
|
|
47
47
|
arr.splice(index,0,newItem);
|
|
48
|
-
}
|
|
49
48
|
class Node{
|
|
50
49
|
constructor(tagName,attributes={},parent=null){
|
|
51
50
|
this.isSingle=false;
|
|
52
51
|
this.tagName=tagName;
|
|
53
52
|
this.attributes=attributes;
|
|
54
53
|
this.childNodes=[];
|
|
55
54
|
if (parent!==null) parent.childNodes.push(this)
|
|
56
55
|
this.parent=parent;
|
|
57
56
|
this._classList=null;
|
|
58
57
|
this.__style=null;
|
|
59
58
|
this._dataset=null
|
|
60
59
|
}
|
|
61
60
|
get id(){ return this.attributes.id ? this.attributes.id : null; }
|
|
62
61
|
set id(newValue){ this.attributes.id=newValue; }
|
|
63
62
|
get className(){return this.attributes.class || null}
|
|
64
63
|
get parentNode(){ return this.parent }
|
|
65
64
|
get ancestors(){
|
|
66
65
|
if(!this.parent) return []
|
|
67
66
|
const ancestors=[]
|
|
68
67
|
let element=this.parent
|
|
69
68
|
while (element.tagName!=='ROOT'){
|
|
70
69
|
ancestors.push(element)
|
|
71
70
|
element=element.parent
|
|
72
71
|
}
|
|
73
72
|
return ancestors.reverse()
|
|
74
73
|
}
|
|
75
74
|
get childNodeIndex(){
|
|
76
75
|
if(!this.parent) return null
|
|
77
76
|
return this.parent.childNodes ? this.parent.childNodes.indexOf(this) : null
|
|
78
77
|
}
|
|
79
78
|
get childIndex(){
|
|
80
79
|
if(!this.parent) return null
|
|
81
80
|
return this.parent.children ? this.parent.children.indexOf(this) : null
|
|
82
81
|
}
|
|
83
82
|
get previousElementSibling(){ return this.prev }
|
|
84
83
|
get prev(){
|
|
85
84
|
if (!this.childIndex) return null
|
|
86
85
|
return this.parent.children[this.childIndex-1]
|
|
87
86
|
}
|
|
88
87
|
get nextElementSibling(){ return this.next }
|
|
89
88
|
get next(){
|
|
90
89
|
if (!this.childIndex) return null
|
|
91
90
|
return this.parent.children[this.childIndex+1] || null
|
|
92
91
|
}
|
|
93
92
|
get dataset(){
|
|
94
93
|
if (!this._dataset) this._dataset=getDataset(this);
|
|
95
94
|
return this._dataset;
|
|
96
95
|
}
|
|
97
96
|
get classList(){
|
|
98
97
|
if (!this._classList) this._classList=new NodeClassList(this);
|
|
99
98
|
return this._classList;
|
|
100
99
|
}
|
|
101
100
|
get style(){
|
|
102
101
|
if (!this.__style) this.__style=buildStyle(this.attributes)
|
|
103
102
|
return this.__style
|
|
104
103
|
}
|
|
105
104
|
get outerHTML(){
|
|
106
105
|
const attrs=Object.entries(this.attributes).map(([key,val])=>`${key}="${val}"`).join(" ");
|
|
107
106
|
return `<${this.tagName}${attrs ? ' '+attrs : ''}>${this.innerHTML}</${this.tagName}>`;
|
|
108
107
|
}
|
|
109
108
|
getAttribute(attrName){ return this.attributes[attrName] || null }
|
|
110
109
|
setAttribute(attrName,value){ this.attributes[attrName]=value }
|
|
111
110
|
removeAttribute(attrName){ delete this.attributes[attrName] }
|
|
112
111
|
remove(){
|
|
113
112
|
if (!this.parent) return
|
|
114
113
|
const index=this.childIndex;
|
|
115
114
|
if (index!==null) this.parent.childNodes.splice(index,1);
|
|
116
115
|
}
|
|
117
116
|
get innerHTML(){
|
|
118
117
|
return this.childNodes.map(child=>{
|
|
119
118
|
if (child instanceof Node || child instanceof SingleNode) return child.outerHTML;
|
|
120
119
|
else if (child instanceof TextNode) return child.textContent;
|
|
121
120
|
else return child
|
|
122
121
|
}).join("");
|
|
123
122
|
}
|
|
124
123
|
$$(query){return this.querySelectorAll(query)}
|
|
125
124
|
querySelectorAll(query){
|
|
126
125
|
const selectors=Query.get(query)
|
|
127
126
|
return find(selectors,this,new Set())
|
|
128
127
|
}
|
|
129
128
|
$(query){return this.querySelector(query)}
|
|
130
129
|
querySelector(query){
|
|
131
130
|
const selectors=Query.get(query)
|
|
132
131
|
return find(selectors,this,new Set(),true)[0] || null
|
|
133
132
|
}
|
|
134
133
|
getElementsByClassName(query){ return this.querySelectorAll('.'+query) }
|
|
135
134
|
getElementsByTagName(query){ return this.querySelectorAll(query) }
|
|
136
135
|
getElementById(query){ return this.querySelector('#'+query) }
|
|
137
136
|
get children(){
|
|
138
137
|
return this.childNodes.filter(child=>{
|
|
139
138
|
if (!(child instanceof Node)) return false
|
|
140
139
|
if (child.tagName==='#comment') return false
|
|
141
140
|
return true
|
|
142
141
|
});
|
|
143
142
|
}
|
|
144
143
|
insertAdjacentElement(position,newElement){
|
|
145
144
|
if(newElement.tagName==='ROOT' && newElement.childNodes.length>0) newElement=newElement.childNodes[0]
|
|
146
145
|
const pos=position.toLowerCase();
|
|
147
146
|
if (pos==="afterbegin") this.childNodes.unshift(newElement);
|
|
148
147
|
else if (pos==="beforeend") this.childNodes.push(newElement);
|
|
149
148
|
newElement.parent=this
|
|
150
149
|
if (!this.parent) return newElement
|
|
151
150
|
if (pos==="beforebegin") insertBefore(this.parent.childNodes,this.childNodeIndex,newElement)
|
|
152
151
|
else if (pos==="afterend") this.parent.childNodes.splice(this.childNodeIndex+1,0,newElement);
|
|
153
152
|
newElement.parent=this.parent
|
|
154
153
|
return newElement
|
|
155
154
|
}
|
|
156
155
|
insertAdjacentHTML(position,html){
|
|
157
156
|
const newNode=parseHTML(html);
|
|
158
157
|
newNode.childNodes.reverse().forEach(node=>{
|
|
159
158
|
this.insertAdjacentElement(position,node);
|
|
160
159
|
});
|
|
161
160
|
return newNode
|
|
162
161
|
}
|
|
163
162
|
insertAdjacentText(position,text){
|
|
164
163
|
return this.insertAdjacentElement(position,new TextNode(text));
|
|
165
164
|
}
|
|
166
165
|
set innerHTML(html){
|
|
167
166
|
const parsed=parseHTML(html);
|
|
168
167
|
this.childNodes=parsed.childNodes;
|
|
169
168
|
}
|
|
170
169
|
set outerHTML(html){
|
|
171
170
|
const parsed=parseHTML(html);
|
|
172
171
|
if (!this.parent) return console.log('element has no parent node')
|
|
173
172
|
const index=this.childIndex
|
|
174
173
|
if (index!==null) this.parent.childNodes.splice(index,1,...parsed.childNodes);
|
|
175
174
|
}
|
|
176
175
|
appendChild(newChild){
|
|
177
176
|
if (newChild instanceof Node || newChild instanceof TextNode || newChild instanceof SingleNode){
|
|
178
177
|
if (newChild.parent) newChild.parent.childNodes=newChild.parent.childNodes.filter(child=>child!==newChild);
|
|
179
178
|
} else if(typeof newChild==='string') newChild=new TextNode(newChild)
|
|
180
179
|
else return newChild
|
|
181
180
|
this.childNodes.push(newChild);
|
|
182
181
|
newChild.parent=this;
|
|
183
182
|
return newChild;
|
|
184
183
|
}
|
|
185
184
|
get textContent(){
|
|
186
185
|
if (this.childNodes.length===0) return this.nodeName==='#text' ? this.nodeValue : '';
|
|
187
186
|
return this.childNodes.map(child=>{
|
|
188
187
|
if(child instanceof SingleNode) return ''
|
|
189
188
|
if(child instanceof TextNode) return child.nodeValue
|
|
190
189
|
if(child instanceof Node) return child.textContent;
|
|
191
190
|
else return child;
|
|
192
191
|
}).join(" ");
|
|
193
192
|
}
|
|
194
193
|
set textContent(value){
|
|
195
194
|
this.childNodes=[];
|
|
196
195
|
if (value!==null && value!==undefined){
|
|
197
196
|
this.childNodes.push(value.toString());
|
|
198
197
|
}
|
|
199
198
|
}
|
|
199
|
+
}
|
|
200
200
|
class Node{
|
|
201
201
|
constructor(tagName,attributes={},parent=null){
|
|
202
202
|
this.isSingle=false;
|
|
203
203
|
this.tagName=tagName;
|
|
204
204
|
this.attributes=attributes;
|
|
205
205
|
this.childNodes=[];
|
|
206
206
|
if (parent!==null) parent.childNodes.push(this)
|
|
207
207
|
this.parent=parent;
|
|
208
208
|
this._classList=null;
|
|
209
209
|
this.__style=null;
|
|
210
210
|
this._dataset=null
|
|
211
211
|
}
|
|
212
212
|
get id(){ return this.attributes.id ? this.attributes.id : null; }
|
|
213
213
|
set id(newValue){ this.attributes.id=newValue; }
|
|
214
214
|
get className(){return this.attributes.class || null}
|
|
215
215
|
get parentNode(){ return this.parent }
|
|
216
216
|
get ancestors(){
|
|
217
217
|
if(!this.parent) return []
|
|
218
218
|
const ancestors=[]
|
|
219
219
|
let element=this.parent
|
|
220
220
|
while (element.tagName!=='ROOT'){
|
|
221
221
|
ancestors.push(element)
|
|
222
222
|
element=element.parent
|
|
223
223
|
}
|
|
224
224
|
return ancestors.reverse()
|
|
225
225
|
}
|
|
226
226
|
get childNodeIndex(){
|
|
227
227
|
if(!this.parent) return null
|
|
228
228
|
return this.parent.childNodes ? this.parent.childNodes.indexOf(this) : null
|
|
229
229
|
}
|
|
230
230
|
get childIndex(){
|
|
231
231
|
if(!this.parent) return null
|
|
232
232
|
return this.parent.children ? this.parent.children.indexOf(this) : null
|
|
233
233
|
}
|
|
234
234
|
get previousElementSibling(){ return this.prev }
|
|
235
235
|
get prev(){
|
|
236
236
|
if (!this.childIndex) return null
|
|
237
237
|
return this.parent.children[this.childIndex-1]
|
|
238
238
|
}
|
|
239
239
|
get nextElementSibling(){ return this.next }
|
|
240
240
|
get next(){
|
|
241
241
|
if (!this.childIndex) return null
|
|
242
242
|
return this.parent.children[this.childIndex+1] || null
|
|
243
243
|
}
|
|
244
244
|
get dataset(){
|
|
245
245
|
if (!this._dataset) this._dataset=getDataset(this);
|
|
246
246
|
return this._dataset;
|
|
247
247
|
}
|
|
248
248
|
get classList(){
|
|
249
249
|
if (!this._classList) this._classList=new NodeClassList(this);
|
|
250
250
|
return this._classList;
|
|
251
251
|
}
|
|
252
252
|
get style(){
|
|
253
253
|
if (!this.__style) this.__style=buildStyle(this.attributes)
|
|
254
254
|
return this.__style
|
|
255
255
|
}
|
|
256
256
|
get outerHTML(){
|
|
257
257
|
const attrs=Object.entries(this.attributes).map(([key,val])=>`${key}="${val}"`).join(" ");
|
|
258
258
|
return `<${this.tagName}${attrs ? ' '+attrs : ''}>${this.innerHTML}</${this.tagName}>`;
|
|
259
259
|
}
|
|
260
260
|
getAttribute(attrName){ return this.attributes[attrName] || null }
|
|
261
261
|
setAttribute(attrName,value){ this.attributes[attrName]=value }
|
|
262
262
|
removeAttribute(attrName){ delete this.attributes[attrName] }
|
|
263
263
|
remove(){
|
|
264
264
|
if (!this.parent) return
|
|
265
265
|
const index=this.childNodeIndex;
|
|
266
266
|
if (index!==null) this.parent.childNodes.splice(index,1);
|
|
267
267
|
}
|
|
268
268
|
get innerHTML(){
|
|
269
269
|
return this.childNodes.map(child=>{
|
|
270
270
|
if (child instanceof Node || child instanceof SingleNode) return child.outerHTML;
|
|
271
271
|
else if (child instanceof TextNode) return child.textContent;
|
|
272
272
|
else return child
|
|
273
273
|
}).join("");
|
|
274
274
|
}
|
|
275
275
|
$$(query){return this.querySelectorAll(query)}
|
|
276
276
|
querySelectorAll(query){
|
|
277
277
|
const selectors=Query.get(query)
|
|
278
278
|
return find(selectors,this,new Set())
|
|
279
279
|
}
|
|
280
280
|
$(query){return this.querySelector(query)}
|
|
281
281
|
querySelector(query){
|
|
282
282
|
const selectors=Query.get(query)
|
|
283
283
|
return find(selectors,this,new Set(),true)[0] || null
|
|
284
284
|
}
|
|
285
285
|
getElementsByClassName(query){ return this.querySelectorAll('.'+query) }
|
|
286
286
|
getElementsByTagName(query){ return this.querySelectorAll(query) }
|
|
287
287
|
getElementById(query){ return this.querySelector('#'+query) }
|
|
288
288
|
get children(){
|
|
289
289
|
return this.childNodes.filter(child=>{
|
|
290
290
|
if (!(child instanceof Node)) return false
|
|
291
291
|
if (child.tagName==='#comment') return false
|
|
292
292
|
return true
|
|
293
293
|
});
|
|
294
294
|
}
|
|
295
295
|
insertAdjacentElement(position,newElement){
|
|
296
296
|
if(newElement.tagName==='ROOT' && newElement.childNodes.length>0) newElement=newElement.childNodes[0]
|
|
297
297
|
const pos=position.toLowerCase();
|
|
298
298
|
if (pos==="afterbegin") this.childNodes.unshift(newElement);
|
|
299
299
|
else if (pos==="beforeend") this.childNodes.push(newElement);
|
|
300
300
|
newElement.parent=this
|
|
301
301
|
if (!this.parent) return newElement
|
|
302
302
|
if (pos==="beforebegin") insertBefore(this.parent.childNodes,this.childNodeIndex,newElement)
|
|
303
303
|
else if (pos==="afterend") this.parent.childNodes.splice(this.childNodeIndex+1,0,newElement);
|
|
304
304
|
newElement.parent=this.parent
|
|
305
305
|
return newElement
|
|
306
306
|
}
|
|
307
307
|
insertAdjacentHTML(position,html){
|
|
308
308
|
const newNode=parseHTML(html);
|
|
309
309
|
newNode.childNodes.reverse().forEach(node=>{
|
|
310
310
|
this.insertAdjacentElement(position,node);
|
|
311
311
|
});
|
|
312
312
|
return newNode
|
|
313
313
|
}
|
|
314
314
|
insertAdjacentText(position,text){
|
|
315
315
|
return this.insertAdjacentElement(position,new TextNode(text));
|
|
316
316
|
}
|
|
317
317
|
set innerHTML(html){
|
|
318
318
|
const parsed=parseHTML(html);
|
|
319
319
|
this.childNodes=parsed.childNodes;
|
|
320
320
|
}
|
|
321
321
|
set outerHTML(html){
|
|
322
322
|
const parsed=parseHTML(html);
|
|
323
323
|
if (!this.parent) return console.log('element has no parent node')
|
|
324
324
|
const index=this.childIndex
|
|
325
325
|
if (index!==null) this.parent.childNodes.splice(index,1,...parsed.childNodes);
|
|
326
326
|
}
|
|
327
327
|
appendChild(newChild){
|
|
328
328
|
if (newChild instanceof Node || newChild instanceof TextNode || newChild instanceof SingleNode){
|
|
329
329
|
if (newChild.parent) newChild.parent.childNodes=newChild.parent.childNodes.filter(child=>child!==newChild);
|
|
330
330
|
} else if(typeof newChild==='string') newChild=new TextNode(newChild)
|
|
331
331
|
else return newChild
|
|
332
332
|
this.childNodes.push(newChild);
|
|
333
333
|
newChild.parent=this;
|
|
334
334
|
return newChild;
|
|
335
335
|
}
|
|
336
336
|
get textContent(){
|
|
337
337
|
if (this.childNodes.length===0) return this.nodeName==='#text' ? this.nodeValue : '';
|
|
338
338
|
return this.childNodes.map(child=>{
|
|
339
339
|
if(child instanceof SingleNode) return ''
|
|
340
340
|
if(child instanceof TextNode) return child.nodeValue
|
|
341
341
|
if(child instanceof Node) return child.textContent;
|
|
342
342
|
else return child;
|
|
343
343
|
}).join(" ");
|
|
344
344
|
}
|
|
345
345
|
set textContent(value){
|
|
346
346
|
this.childNodes=[];
|
|
347
347
|
if (value!==null && value!==undefined){
|
|
348
348
|
this.childNodes.push(value.toString());
|
|
349
349
|
}
|
|
350
350
|
}
|
|
351
351
|
}
|
|
352
352
|
class SingleNode extends Node{
|
|
353
353
|
constructor(tagName,attributes={},parent=null){
|
|
354
354
|
if(attributes['?'] && tagName==='?xml') delete attributes['?']
|
|
355
355
|
super(tagName,attributes,parent);
|
|
356
356
|
this.isSingle=true
|
|
357
357
|
}
|
|
358
358
|
get outerHTML(){
|
|
359
359
|
if (this.tagName==="#cdata-section") return `<![CDATA[${this.nodeValue}]]>`;
|
|
360
360
|
const attrs=Object.entries(this.attributes).map(([key,val])=>`${key}="${val}"`).join(" ");
|
|
361
361
|
return `<${this.tagName} ${attrs}${this.tagName==='?xml' ? '?' : ''}>`;
|
|
362
362
|
}
|
|
363
363
|
get innerHTML(){ return ""; }
|
|
364
364
|
set innerHTML(_){ }
|
|
365
365
|
$(_){return null}
|
|
366
366
|
$$(_){return []}
|
|
367
367
|
querySelectorAll(_){ return []; }
|
|
368
368
|
querySelector(_){ return null; }
|
|
369
369
|
getElementsByClassName(_){ return []; }
|
|
370
370
|
getElementsByTagName(_){ return []; }
|
|
371
371
|
getElementById(_){ return null; }
|
|
372
372
|
get children(){ return []; }
|
|
373
373
|
insertAdjacentElement(_,__){ }
|
|
374
374
|
insertAdjacentHTML(_,__){ }
|
|
375
375
|
insertAdjacentText(_,__){ }
|
|
376
376
|
appendChild(_){ }
|
|
377
377
|
get textContent(){ return ""; }
|
|
378
378
|
set textContent(_){ }
|
|
379
379
|
}
|
|
380
380
|
function parseAttributes(str){
|
|
381
381
|
const attrs={};
|
|
382
382
|
let key="";
|
|
383
383
|
let value="";
|
|
384
384
|
let isKey=true;
|
|
385
385
|
let quoteChar=null;
|
|
386
386
|
for (let i=0; i< str.length; i++){
|
|
387
387
|
const char=str[i];
|
|
388
388
|
if (isKey && (char==='=' || char===' ')){
|
|
389
389
|
if (char==='=') isKey=false;
|
|
390
390
|
else if (key.trim()){
|
|
391
391
|
attrs[key.trim()]=true;
|
|
392
392
|
key="";
|
|
393
393
|
}
|
|
394
394
|
continue;
|
|
395
395
|
}
|
|
396
396
|
if (!quoteChar && (char==='"' || char==="'")){
|
|
397
397
|
quoteChar=char;
|
|
398
398
|
continue;
|
|
399
399
|
} else if (quoteChar && char===quoteChar){
|
|
400
400
|
quoteChar=null;
|
|
401
401
|
attrs[key.trim()]=value.trim();
|
|
402
402
|
key=""; value=""; isKey=true;
|
|
403
403
|
continue;
|
|
404
404
|
}
|
|
405
405
|
if (isKey) key+=char;
|
|
406
406
|
else value+=char;
|
|
407
407
|
}
|
|
408
408
|
if (key.trim() &&!value) attrs[key.trim()]=true;
|
|
409
409
|
return attrs;
|
|
410
410
|
}
|
|
411
411
|
const VOID_TAGS=new Set(["area","base","br","col","command","embed","hr","img","input","keygen","link","meta","param","source","track","wbr","!doctype",'?xml']);
|
|
412
|
-
function parseHTML(html){
|
|
413
412
|
const root=new Node("ROOT");
|
|
414
413
|
const stack=[root];
|
|
415
414
|
let currentText="",i=0;
|
|
416
415
|
let max=0
|
|
417
416
|
function parseScript(){
|
|
418
417
|
if (!html.startsWith("<script",i)) return false;
|
|
419
418
|
const openTagEnd=html.indexOf(">",i);
|
|
420
419
|
if (openTagEnd===-1) return false;
|
|
421
420
|
const attributesString=html.substring(i+7,openTagEnd).trim();
|
|
422
421
|
const attributes=parseAttributes(attributesString);
|
|
423
422
|
let closeTagStart=html.indexOf("</script>",openTagEnd);
|
|
424
423
|
if (closeTagStart===-1) return false;
|
|
425
424
|
const content=html.substring(openTagEnd+1,closeTagStart);
|
|
426
425
|
const scriptNode=new Node('script',attributes,stack[stack.length-1]);
|
|
427
426
|
if(content.length>0) scriptNode.childNodes.push(content);
|
|
428
427
|
i=closeTagStart+9;
|
|
429
428
|
return true;
|
|
430
429
|
}
|
|
431
430
|
function parseSpecial(startStr,endStr,n1,n2,tag){
|
|
432
431
|
if (!html.startsWith(startStr,i)) return false
|
|
433
432
|
const end=html.indexOf(endStr,i+n1);
|
|
434
433
|
const strNode=new Node(tag,{},stack[stack.length-1]);
|
|
435
434
|
strNode.childNodes.push(html.substring(i+n1,end));
|
|
436
435
|
i=end+n2;
|
|
437
436
|
return true
|
|
438
437
|
}
|
|
439
438
|
while (i< html.length){
|
|
440
439
|
if(i>=max) max=i;
|
|
441
440
|
else break;
|
|
442
441
|
if (parseScript()) continue
|
|
443
442
|
if (parseSpecial("<!--","-->",4,3,'#comment')) continue
|
|
444
443
|
if (parseSpecial("<style","</style>",7,8,'style')) continue
|
|
445
444
|
if (html.startsWith("<![CDATA[",i)){
|
|
446
445
|
const end=html.indexOf("]]>",i+9);
|
|
447
446
|
if (end===-1) break;
|
|
448
447
|
const content=html.substring(i+9,end);
|
|
449
448
|
const cdataNode=new SingleNode("#cdata-section",{},stack[stack.length-1]);
|
|
450
449
|
cdataNode.nodeValue=content;
|
|
451
450
|
i=end+3;
|
|
452
451
|
continue;
|
|
453
452
|
}
|
|
454
453
|
if (html.startsWith("<",i)){
|
|
455
454
|
if (currentText && stack[stack.length-1]){
|
|
456
455
|
stack[stack.length-1].childNodes.push(new TextNode(currentText));
|
|
457
456
|
currentText="";
|
|
458
457
|
}
|
|
459
458
|
let tagEnd=i+1;
|
|
460
459
|
let insideQuotes=false;
|
|
461
460
|
let quoteChar=null;
|
|
462
461
|
while (tagEnd< html.length){
|
|
463
462
|
const char=html[tagEnd];
|
|
464
463
|
if (!insideQuotes && (char==='"' || char==="'")){
|
|
465
464
|
insideQuotes=true;
|
|
466
465
|
quoteChar=char;
|
|
467
466
|
} else if (insideQuotes && char===quoteChar){
|
|
468
467
|
insideQuotes=false;
|
|
469
468
|
quoteChar=null;
|
|
470
469
|
}
|
|
471
470
|
if (!insideQuotes && char==='>') break;
|
|
472
471
|
tagEnd++;
|
|
473
472
|
}
|
|
474
473
|
const tagContent=html.substring(i+1,tagEnd);
|
|
475
474
|
if (tagContent.startsWith("/")) stack.pop();
|
|
476
475
|
else{
|
|
477
476
|
let isSelfClosing=tagContent.endsWith('/');
|
|
478
477
|
const tagNameEnd=tagContent.search(/\s|>|\//);
|
|
479
478
|
const tagName=tagContent.substring(0,tagNameEnd>0 ? tagNameEnd : tagEnd-i-1);
|
|
480
479
|
const attributesString=tagContent.substring(tagName.length,isSelfClosing ? tagContent.length-1 : tagContent.length).trim();
|
|
481
480
|
const attributes=parseAttributes(attributesString);
|
|
482
481
|
if (VOID_TAGS.has(tagName.toLowerCase()) || isSelfClosing) new SingleNode(tagName,attributes,stack[stack.length-1])
|
|
483
482
|
else stack.push(new Node(tagName,attributes,stack[stack.length-1]));
|
|
484
483
|
}
|
|
485
484
|
i=tagEnd+1;
|
|
486
485
|
} else{
|
|
487
486
|
currentText+=html[i];
|
|
488
487
|
i++;
|
|
489
488
|
}
|
|
490
489
|
}
|
|
491
490
|
if (currentText.trim() && stack[stack.length-1]) stack[stack.length-1].childNodes.push(new TextNode(currentText));
|
|
492
491
|
return root;
|
|
492
|
+
function parseHTML(html){
|
|
493
493
|
const root=new Node("ROOT");
|
|
494
494
|
const stack=[root];
|
|
495
495
|
let currentText="",i=0;
|
|
496
496
|
let max=0
|
|
497
497
|
function parseScript(){
|
|
498
498
|
if (!html.startsWith("<script",i)) return false;
|
|
499
499
|
const openTagEnd=html.indexOf(">",i);
|
|
500
500
|
if (openTagEnd===-1) return false;
|
|
501
501
|
const attributesString=html.substring(i+7,openTagEnd).trim();
|
|
502
502
|
const attributes=parseAttributes(attributesString);
|
|
503
503
|
let closeTagStart=html.indexOf("</script>",openTagEnd);
|
|
504
504
|
if (closeTagStart===-1) return false;
|
|
505
505
|
const content=html.substring(openTagEnd+1,closeTagStart);
|
|
506
506
|
const scriptNode=new Node('script',attributes,stack[stack.length-1]);
|
|
507
507
|
if(content.length>0) scriptNode.childNodes.push(content);
|
|
508
508
|
i=closeTagStart+9;
|
|
509
509
|
return true;
|
|
510
510
|
}
|
|
511
511
|
function parseSpecial(startStr,endStr,n1,n2,tag){
|
|
512
512
|
if (!html.startsWith(startStr,i)) return false
|
|
513
513
|
const end=html.indexOf(endStr,i+n1);
|
|
514
514
|
const strNode=new Node(tag,{},stack[stack.length-1]);
|
|
515
515
|
strNode.childNodes.push(html.substring(i+n1,end));
|
|
516
516
|
i=end+n2;
|
|
517
517
|
return true
|
|
518
518
|
}
|
|
519
519
|
while (i< html.length){
|
|
520
520
|
if(i>=max) max=i;
|
|
521
521
|
else break;
|
|
522
522
|
if (parseScript()) continue
|
|
523
523
|
if (parseSpecial("<!--","-->",4,3,'#comment')) continue
|
|
524
524
|
if (parseSpecial("<style","</style>",7,8,'style')) continue
|
|
525
525
|
if (html.startsWith("<![CDATA[",i)){
|
|
526
526
|
const end=html.indexOf("]]>",i+9);
|
|
527
527
|
if (end===-1) break;
|
|
528
528
|
const content=html.substring(i+9,end);
|
|
529
529
|
const cdataNode=new SingleNode("#cdata-section",{},stack[stack.length-1]);
|
|
530
530
|
cdataNode.nodeValue=content;
|
|
531
531
|
i=end+3;
|
|
532
532
|
continue;
|
|
533
533
|
}
|
|
534
534
|
if (html.startsWith("<",i)){
|
|
535
535
|
if (currentText && stack[stack.length-1]){
|
|
536
536
|
const textNode=new TextNode(currentText)
|
|
537
537
|
stack[stack.length-1].childNodes.push(textNode);
|
|
538
538
|
textNode.parent=stack[stack.length-1]
|
|
539
539
|
currentText="";
|
|
540
540
|
}
|
|
541
541
|
let tagEnd=i+1;
|
|
542
542
|
let insideQuotes=false;
|
|
543
543
|
let quoteChar=null;
|
|
544
544
|
while (tagEnd< html.length){
|
|
545
545
|
const char=html[tagEnd];
|
|
546
546
|
if (!insideQuotes && (char==='"' || char==="'")){
|
|
547
547
|
insideQuotes=true;
|
|
548
548
|
quoteChar=char;
|
|
549
549
|
} else if (insideQuotes && char===quoteChar){
|
|
550
550
|
insideQuotes=false;
|
|
551
551
|
quoteChar=null;
|
|
552
552
|
}
|
|
553
553
|
if (!insideQuotes && char==='>') break;
|
|
554
554
|
tagEnd++;
|
|
555
555
|
}
|
|
556
556
|
const tagContent=html.substring(i+1,tagEnd);
|
|
557
557
|
if (tagContent.startsWith("/")) stack.pop();
|
|
558
558
|
else{
|
|
559
559
|
let isSelfClosing=tagContent.endsWith('/');
|
|
560
560
|
const tagNameEnd=tagContent.search(/\s|>|\//);
|
|
561
561
|
const tagName=tagContent.substring(0,tagNameEnd>0 ? tagNameEnd : tagEnd-i-1);
|
|
562
562
|
const attributesString=tagContent.substring(tagName.length,isSelfClosing ? tagContent.length-1 : tagContent.length).trim();
|
|
563
563
|
const attributes=parseAttributes(attributesString);
|
|
564
564
|
if (VOID_TAGS.has(tagName.toLowerCase()) || isSelfClosing) new SingleNode(tagName,attributes,stack[stack.length-1])
|
|
565
565
|
else stack.push(new Node(tagName,attributes,stack[stack.length-1]));
|
|
566
566
|
}
|
|
567
567
|
i=tagEnd+1;
|
|
568
568
|
} else{
|
|
569
569
|
currentText+=html[i];
|
|
570
570
|
i++;
|
|
571
571
|
}
|
|
572
572
|
}
|
|
573
573
|
if (currentText.trim() && stack[stack.length-1]) stack[stack.length-1].childNodes.push(new TextNode(currentText));
|
|
574
574
|
return root;
|
|
575
575
|
}
|
|
576
576
|
export default { parseHTML, Node, Query, TextNode, SingleNode }
|
package/package.json
CHANGED
package/src/node/node.js
CHANGED
package/src/parse/parser.js
CHANGED
|
@@ -52,7 +52,9 @@ function parseHTML(html) {
|
|
|
52
52
|
|
|
53
53
|
if (html.startsWith("<", i)) {
|
|
54
54
|
if (currentText && stack[stack.length - 1]) {
|
|
55
|
-
|
|
55
|
+
const textNode = new TextNode(currentText)
|
|
56
|
+
stack[stack.length - 1].childNodes.push(textNode);
|
|
57
|
+
textNode.parent = stack[stack.length - 1]
|
|
56
58
|
currentText = "";
|
|
57
59
|
}
|
|
58
60
|
|
package/tests/index.html
CHANGED
|
@@ -6,8 +6,8 @@
|
|
|
6
6
|
<title>Document</title>
|
|
7
7
|
<script src="/node_modules/als-simple-test/test.js"></script>
|
|
8
8
|
<script src="../document.js"></script>
|
|
9
|
-
<script src="./data/html1.js"></script>
|
|
10
|
-
|
|
9
|
+
<!-- <script src="./data/html1.js"></script> -->
|
|
10
|
+
<script src="./data/html2.js"></script>
|
|
11
11
|
<script src="./data/svg.js"></script>
|
|
12
12
|
<script>
|
|
13
13
|
const { parseHTML, Node, Query, TextNode, SingleNode } = alsDocument
|
package/tests/parse-real.js
CHANGED
|
@@ -32,8 +32,9 @@ describe('Real data html1', async () => {
|
|
|
32
32
|
it('Text nodes check', () => {
|
|
33
33
|
const realParagraph = iframe.querySelector('p');
|
|
34
34
|
const parsedParagraph = parsedHTML.querySelector('p');
|
|
35
|
-
const real = realParagraph.textContent.trim().replace(/\n
|
|
36
|
-
const parsed = parsedParagraph.textContent.trim()
|
|
35
|
+
const real = realParagraph.textContent.trim().replace(/\n|\s/gm,'')
|
|
36
|
+
const parsed = parsedParagraph.textContent.trim().replace(/\n|\s/gm,'')
|
|
37
|
+
console.log({parsed,real})
|
|
37
38
|
assert(real === parsed, 'Text contents are the same');
|
|
38
39
|
});
|
|
39
40
|
|