als-document 1.0.8-alpha → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/document.js +10 -7
- package/index.js +10 -7
- package/index.mjs +10 -7
- package/package.json +1 -1
- package/readme.md +14 -4
- package/src/build.js +2 -2
- package/src/node/node.js +38 -23
- package/src/node/root.js +11 -0
- package/src/node/single-node.js +4 -3
- package/src/parse/cache.js +4 -2
- package/src/parse/parser.js +1 -1
- package/src/query/query.js +13 -6
- package/tests/index.html +3 -3
- package/tests/parser.js +1 -1
package/package.json
CHANGED
package/readme.md
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# als-document: HTML Parser & DOM Manipulation Library
|
|
2
2
|
|
|
3
|
+
|
|
3
4
|
## Overview
|
|
4
5
|
|
|
5
6
|
`als-document` is a powerful library for parsing HTML and manipulating the DOM structure on backend and frontend. It provides a robust and intuitive API for querying and interacting with DOM elements using selectors, making it a valuable tool for web developers.
|
|
@@ -8,7 +9,6 @@
|
|
|
8
9
|
* als-document is still on alpha testing. All tested features works fine, but through the use, discovering some bugs or things that should work different. For example in this release, changed the way for storing attributes with empty value.
|
|
9
10
|
* Also, this release, has additional very powefull feature which is building cache for storing DOM tree as json and building back DOM from cache.
|
|
10
11
|
|
|
11
|
-
|
|
12
12
|
## Installation
|
|
13
13
|
|
|
14
14
|
To install the `als-document` library, use the following npm command:
|
|
@@ -25,13 +25,13 @@ The library provides three different files to cater to different module systems:
|
|
|
25
25
|
1. **index.js**: This file uses the CommonJS module system. It's suitable for projects using Node.js or bundlers like Browserify or Webpack. The entry point in `package.json` for this file is "main".
|
|
26
26
|
|
|
27
27
|
```javascript
|
|
28
|
-
const { parseHTML, Node, Query, TextNode, SingleNode } = require('als-document');
|
|
28
|
+
const { parseHTML, Node, Query, TextNode, SingleNode,Root } = require('als-document');
|
|
29
29
|
```
|
|
30
30
|
|
|
31
31
|
2. **index.mjs**: This file uses the ES Modules (ESM) system. It's suitable for modern JavaScript environments that support ESM. The entry point in `package.json` for this file is "module".
|
|
32
32
|
|
|
33
33
|
```js
|
|
34
|
-
import { parseHTML, Node, Query, TextNode, SingleNode } from 'als-document';
|
|
34
|
+
import { parseHTML, Node, Query, TextNode, SingleNode, Root } from 'als-document';
|
|
35
35
|
```
|
|
36
36
|
|
|
37
37
|
3. **document.js**: By including this file, a constant variable named `alsDocument` is created, which wraps all the exports.
|
|
@@ -39,7 +39,7 @@ import { parseHTML, Node, Query, TextNode, SingleNode } from 'als-document';
|
|
|
39
39
|
```html
|
|
40
40
|
<script src="/node_modules/als-document/document.js"></script>
|
|
41
41
|
<script>
|
|
42
|
-
const { parseHTML, Node, Query, TextNode, SingleNode, buildFromCache, cacheDoc } = alsDocument
|
|
42
|
+
const { parseHTML, Node, Query, TextNode, SingleNode, buildFromCache, cacheDoc, Root } = alsDocument
|
|
43
43
|
</script>
|
|
44
44
|
```
|
|
45
45
|
|
|
@@ -105,6 +105,7 @@ Remember, the actual tree structure will be more complex and detailed, but the p
|
|
|
105
105
|
- **getElementsByClassName, getElementsByTagName, getElementById**: Get elements by class, tag, or id respectively.
|
|
106
106
|
- **insertAdjacentElement, insertAdjacentHTML, insertAdjacentText**: Insert content relative to the element.
|
|
107
107
|
- **appendChild**: Add a child node to the element.
|
|
108
|
+
- **insert(place,element)**: place (0-3) or beforebegin,afterbegin,... eleemnt - raw html or element
|
|
108
109
|
|
|
109
110
|
|
|
110
111
|
### SingleNode
|
|
@@ -116,6 +117,15 @@ Remember, the actual tree structure will be more complex and detailed, but the p
|
|
|
116
117
|
`TextNode` is a class that represents text content within the DOM. A TextNode holds raw text data and does not have child nodes.
|
|
117
118
|
|
|
118
119
|
|
|
120
|
+
### Root node (extends Node)
|
|
121
|
+
|
|
122
|
+
Has additional getters and setters:
|
|
123
|
+
* getter root.title
|
|
124
|
+
* setter root.title
|
|
125
|
+
* getter root.body
|
|
126
|
+
* getter root.head
|
|
127
|
+
|
|
128
|
+
|
|
119
129
|
|
|
120
130
|
### Examples:
|
|
121
131
|
|
package/src/build.js
CHANGED
|
@@ -22,7 +22,7 @@ const files = {
|
|
|
22
22
|
'query': ['query','check-element'],
|
|
23
23
|
'node':[
|
|
24
24
|
'dataset','find','text-node',
|
|
25
|
-
'style','class-list','node','single-node',
|
|
25
|
+
'style','class-list','node','single-node','root'
|
|
26
26
|
],
|
|
27
27
|
'parse':['parse-atts','void-tags','parser','cache'],
|
|
28
28
|
}
|
|
@@ -40,7 +40,7 @@ buildFileList()
|
|
|
40
40
|
function build() {
|
|
41
41
|
let content = fileList.map(filePath => readFileSync(filePath, 'utf-8')).join('\n');
|
|
42
42
|
|
|
43
|
-
const toReturn = '{ parseHTML, Node, Query, TextNode, SingleNode, buildFromCache, cacheDoc }'
|
|
43
|
+
const toReturn = '{ parseHTML, Node, Query, TextNode, SingleNode, buildFromCache, cacheDoc, Root }'
|
|
44
44
|
content = optimizeCode(content)
|
|
45
45
|
writeFileSync(join(root, 'document.js'), `const alsDocument = (function(){\n${content}\nreturn ${toReturn}\n})()`)
|
|
46
46
|
writeFileSync(join(root, 'index.js'), content + '\n' + `module.exports = ${toReturn}`)
|
package/src/node/node.js
CHANGED
|
@@ -19,10 +19,10 @@ class Node {
|
|
|
19
19
|
|
|
20
20
|
get id() { return this.attributes.id ? this.attributes.id : null; }
|
|
21
21
|
set id(newValue) { this.attributes.id = newValue; }
|
|
22
|
-
get className() {return this.attributes.class || null}
|
|
22
|
+
get className() { return this.attributes.class || null }
|
|
23
23
|
get parentNode() { return this.parent }
|
|
24
24
|
get ancestors() {
|
|
25
|
-
if(!this.parent) return []
|
|
25
|
+
if (!this.parent) return []
|
|
26
26
|
const ancestors = []
|
|
27
27
|
let element = this.parent
|
|
28
28
|
while (element.tagName !== 'ROOT') {
|
|
@@ -33,13 +33,13 @@ class Node {
|
|
|
33
33
|
}
|
|
34
34
|
|
|
35
35
|
get childNodeIndex() {
|
|
36
|
-
if(!this.parent) return null
|
|
37
|
-
return this.parent.childNodes ? this.parent.childNodes.indexOf(this) : null
|
|
36
|
+
if (!this.parent) return null
|
|
37
|
+
return this.parent.childNodes ? this.parent.childNodes.indexOf(this) : null
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
-
get childIndex() {
|
|
41
|
-
if(!this.parent) return null
|
|
42
|
-
return this.parent.children ? this.parent.children.indexOf(this) : null
|
|
40
|
+
get childIndex() {
|
|
41
|
+
if (!this.parent) return null
|
|
42
|
+
return this.parent.children ? this.parent.children.indexOf(this) : null
|
|
43
43
|
}
|
|
44
44
|
get previousElementSibling() { return this.prev }
|
|
45
45
|
get prev() {
|
|
@@ -70,11 +70,11 @@ class Node {
|
|
|
70
70
|
|
|
71
71
|
get outerHTML() {
|
|
72
72
|
const attrs = Object.entries(this.attributes).map(([key, val]) => val.length ? `${key}="${val}"` : key).join(" ");
|
|
73
|
-
return `<${this.tagName}${attrs ? ' '+attrs : ''}>${this.innerHTML}</${this.tagName}>`;
|
|
73
|
+
return `<${this.tagName}${attrs ? ' ' + attrs : ''}>${this.innerHTML}</${this.tagName}>`;
|
|
74
74
|
}
|
|
75
75
|
|
|
76
76
|
getAttribute(attrName) { return this.attributes[attrName] || null }
|
|
77
|
-
setAttribute(attrName, value) { this.attributes[attrName] = value }
|
|
77
|
+
setAttribute(attrName, value='') { this.attributes[attrName] = value }
|
|
78
78
|
removeAttribute(attrName) { delete this.attributes[attrName] }
|
|
79
79
|
|
|
80
80
|
remove() {
|
|
@@ -91,12 +91,12 @@ class Node {
|
|
|
91
91
|
}).join("");
|
|
92
92
|
}
|
|
93
93
|
|
|
94
|
-
$$(query) {return this.querySelectorAll(query)}
|
|
94
|
+
$$(query) { return this.querySelectorAll(query) }
|
|
95
95
|
querySelectorAll(query) {
|
|
96
96
|
const selectors = Query.get(query)
|
|
97
97
|
return find(selectors, this, new Set())
|
|
98
98
|
}
|
|
99
|
-
$(query) {return this.querySelector(query)}
|
|
99
|
+
$(query) { return this.querySelector(query) }
|
|
100
100
|
querySelector(query) {
|
|
101
101
|
const selectors = Query.get(query)
|
|
102
102
|
return find(selectors, this, new Set(), true)[0] || null
|
|
@@ -115,12 +115,15 @@ class Node {
|
|
|
115
115
|
}
|
|
116
116
|
|
|
117
117
|
insertAdjacentElement(position, newElement) {
|
|
118
|
-
if(newElement.tagName === 'ROOT' && newElement.childNodes.length > 0) newElement = newElement.childNodes[0]
|
|
118
|
+
if (newElement.tagName === 'ROOT' && newElement.childNodes.length > 0) newElement = newElement.childNodes[0]
|
|
119
119
|
const pos = position.toLowerCase();
|
|
120
|
-
if (pos ===
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
120
|
+
if (pos === 'afterbegin' || pos === 'beforeend') {
|
|
121
|
+
if (pos === "afterbegin") this.childNodes.unshift(newElement);
|
|
122
|
+
else if (pos === "beforeend") this.childNodes.push(newElement);
|
|
123
|
+
newElement.parent = this
|
|
124
|
+
return newElement
|
|
125
|
+
}
|
|
126
|
+
if (!this.parent) throw new Error("Can't insert element to element without parent")
|
|
124
127
|
if (pos === "beforebegin") insertBefore(this.parent.childNodes, this.childNodeIndex, newElement)
|
|
125
128
|
else if (pos === "afterend") this.parent.childNodes.splice(this.childNodeIndex + 1, 0, newElement);
|
|
126
129
|
newElement.parent = this.parent
|
|
@@ -129,7 +132,7 @@ class Node {
|
|
|
129
132
|
|
|
130
133
|
insertAdjacentHTML(position, html) {
|
|
131
134
|
const newNode = parseHTML(html);
|
|
132
|
-
newNode.childNodes.
|
|
135
|
+
newNode.childNodes.forEach(node => {
|
|
133
136
|
this.insertAdjacentElement(position, node);
|
|
134
137
|
});
|
|
135
138
|
return newNode
|
|
@@ -139,9 +142,21 @@ class Node {
|
|
|
139
142
|
return this.insertAdjacentElement(position, new TextNode(text));
|
|
140
143
|
}
|
|
141
144
|
|
|
145
|
+
insert(position, element) {
|
|
146
|
+
const positions = ['beforebegin', 'afterbegin', 'beforeend', 'afterend']
|
|
147
|
+
if (positions[position]) position = positions[position]
|
|
148
|
+
if (typeof element === 'string') {
|
|
149
|
+
element = element.trim()
|
|
150
|
+
if (element.startsWith('<') && element.endsWith('>')) {
|
|
151
|
+
return this.insertAdjacentHTML(position, element)
|
|
152
|
+
}
|
|
153
|
+
return this.insertAdjacentText(position, element)
|
|
154
|
+
}
|
|
155
|
+
return this.insertAdjacentElement(position, element)
|
|
156
|
+
}
|
|
157
|
+
|
|
142
158
|
set innerHTML(html) {
|
|
143
|
-
|
|
144
|
-
this.childNodes = parsed.childNodes;
|
|
159
|
+
this.childNodes = html.trim().startsWith('<') ? parseHTML(html).childNodes : [html]
|
|
145
160
|
}
|
|
146
161
|
|
|
147
162
|
set outerHTML(html) {
|
|
@@ -154,7 +169,7 @@ class Node {
|
|
|
154
169
|
appendChild(newChild) {
|
|
155
170
|
if (newChild instanceof Node || newChild instanceof TextNode || newChild instanceof SingleNode) {
|
|
156
171
|
if (newChild.parent) newChild.parent.childNodes = newChild.parent.childNodes.filter(child => child !== newChild); // Если у newChild уже есть родительский узел, необходимо его удалить оттуда
|
|
157
|
-
} else if(typeof newChild === 'string') newChild = new TextNode(newChild)
|
|
172
|
+
} else if (typeof newChild === 'string') newChild = new TextNode(newChild)
|
|
158
173
|
else return newChild
|
|
159
174
|
this.childNodes.push(newChild);
|
|
160
175
|
newChild.parent = this;
|
|
@@ -164,9 +179,9 @@ class Node {
|
|
|
164
179
|
get textContent() {
|
|
165
180
|
if (this.childNodes.length === 0) return this.nodeName === '#text' ? this.nodeValue : '';
|
|
166
181
|
return this.childNodes.map(child => { // Concatenate text content of this node and all descendants
|
|
167
|
-
if(child instanceof SingleNode) return ''
|
|
168
|
-
if(child instanceof TextNode) return child.nodeValue
|
|
169
|
-
if(child instanceof Node) return child.textContent;
|
|
182
|
+
if (child instanceof SingleNode) return ''
|
|
183
|
+
if (child instanceof TextNode) return child.nodeValue
|
|
184
|
+
if (child instanceof Node) return child.textContent;
|
|
170
185
|
else return child;
|
|
171
186
|
}).join(" ");
|
|
172
187
|
}
|
package/src/node/root.js
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
class Root extends Node {
|
|
2
|
+
constructor() {
|
|
3
|
+
super('ROOT',{},null);
|
|
4
|
+
this.isSingle = false
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
get body() {return this.$('body')}
|
|
8
|
+
get head() {return this.$('head')}
|
|
9
|
+
get title() {return this.$('title')}
|
|
10
|
+
set title(title) {return this.$('title').innerHTML = title}
|
|
11
|
+
}
|
package/src/node/single-node.js
CHANGED
|
@@ -5,12 +5,12 @@ class SingleNode extends Node {
|
|
|
5
5
|
this.isSingle = true
|
|
6
6
|
}
|
|
7
7
|
|
|
8
|
-
get outerHTML() { //
|
|
8
|
+
get outerHTML() { // outerHTML for single node
|
|
9
9
|
if (this.tagName === "#cdata-section") return `<![CDATA[${this.nodeValue}]]>`;
|
|
10
10
|
const attrs = Object.entries(this.attributes).map(([key, val]) => val.length ? `${key}="${val}"` : key).join(" ");
|
|
11
11
|
return `<${this.tagName} ${attrs}${this.tagName === '?xml' ? '?' : ''}>`;
|
|
12
12
|
}
|
|
13
|
-
//
|
|
13
|
+
// Remove getters,setters and methods which no make sence in single node
|
|
14
14
|
get innerHTML() { return ""; }
|
|
15
15
|
set innerHTML(_) { }
|
|
16
16
|
$(_) {return null}
|
|
@@ -25,6 +25,7 @@ class SingleNode extends Node {
|
|
|
25
25
|
insertAdjacentHTML(_, __) { }
|
|
26
26
|
insertAdjacentText(_, __) { }
|
|
27
27
|
appendChild(_) { }
|
|
28
|
+
insert(_,__) { }
|
|
28
29
|
get textContent() { return ""; }
|
|
29
30
|
set textContent(_) { }
|
|
30
|
-
}
|
|
31
|
+
}
|
package/src/parse/cache.js
CHANGED
|
@@ -4,7 +4,7 @@ function buildFromCache(cached) {
|
|
|
4
4
|
const {isSingle,tagName,attributes,childNodes,textContent} = cache
|
|
5
5
|
if(textContent) return parent.childNodes.push(new TextNode(textContent))
|
|
6
6
|
if(isSingle) return parent.childNodes.push(new SingleNode(tagName,attributes))
|
|
7
|
-
const newDoc = new Node(tagName,attributes,parent)
|
|
7
|
+
const newDoc = tagName === 'ROOT' ? new Root() : new Node(tagName,attributes,parent)
|
|
8
8
|
childNodes.forEach(childNode => {
|
|
9
9
|
buildNode(childNode,newDoc)
|
|
10
10
|
});
|
|
@@ -20,7 +20,9 @@ function cacheDoc(doc) {
|
|
|
20
20
|
if(typeof element === 'string') return element
|
|
21
21
|
if(element.nodeName === '#text') return {textContent:element.textContent}
|
|
22
22
|
props.forEach(prop => {
|
|
23
|
-
if(element[prop])
|
|
23
|
+
if(element[prop]) {
|
|
24
|
+
cache[prop]= typeof element[prop] === 'object' ? {...element[prop]} : element[prop]
|
|
25
|
+
}
|
|
24
26
|
});
|
|
25
27
|
if(!element.childNodes) return cache
|
|
26
28
|
cache.childNodes = []
|
package/src/parse/parser.js
CHANGED
package/src/query/query.js
CHANGED
|
@@ -31,7 +31,7 @@ class Query {
|
|
|
31
31
|
|
|
32
32
|
splitAndCutLast(string, splitBy) {
|
|
33
33
|
const array = string.split(splitBy);
|
|
34
|
-
const last = array.pop();
|
|
34
|
+
const last = array.pop();
|
|
35
35
|
return [last, array];
|
|
36
36
|
}
|
|
37
37
|
|
|
@@ -104,11 +104,18 @@ class Query {
|
|
|
104
104
|
attribs = attribs.map(attrib => {
|
|
105
105
|
let query = attrib
|
|
106
106
|
attrib = attrib.replace('[', '').replace(']', '')
|
|
107
|
-
let [name
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
if
|
|
107
|
+
let [name,...values] = attrib.split('=')
|
|
108
|
+
const value = values.join('=').trim().replace(/^\"/,'').replace(/\"$/,'')
|
|
109
|
+
let sign
|
|
110
|
+
attrib = {query,name}
|
|
111
|
+
if(value) {
|
|
112
|
+
sign = '='
|
|
113
|
+
attrib.name = attrib.name.replace(/[\~\|\^\$\*]$/,(match => {
|
|
114
|
+
sign = match+sign
|
|
115
|
+
return ''
|
|
116
|
+
}))
|
|
117
|
+
attrib.value = value
|
|
118
|
+
}
|
|
112
119
|
if (sign) {
|
|
113
120
|
attrib.sign = sign
|
|
114
121
|
attrib.check = this.getAttribFn(sign).bind(attrib)
|
package/tests/index.html
CHANGED
|
@@ -6,11 +6,11 @@
|
|
|
6
6
|
<title>Document</title>
|
|
7
7
|
<script src="test.js"></script>
|
|
8
8
|
<script src="../document.js"></script>
|
|
9
|
-
<script src="./data/html1.js"></script>
|
|
10
|
-
|
|
9
|
+
<!-- <script src="./data/html1.js"></script> -->
|
|
10
|
+
<script src="./data/html2.js"></script>
|
|
11
11
|
<script src="./data/svg.js"></script>
|
|
12
12
|
<script>
|
|
13
|
-
const { parseHTML, Node, Query, TextNode, SingleNode, buildFromCache, cacheDoc } = alsDocument
|
|
13
|
+
const { parseHTML, Node, Query, TextNode, SingleNode, buildFromCache, cacheDoc,Root } = alsDocument
|
|
14
14
|
let {describe,it,beforeEach,runTests,expect,delay,assert,beforeAll} = SimpleTest
|
|
15
15
|
SimpleTest.showFullError = true
|
|
16
16
|
</script>
|
package/tests/parser.js
CHANGED
|
@@ -70,7 +70,7 @@ describe('Advanced tests', () => {
|
|
|
70
70
|
let time = Date.now() - now
|
|
71
71
|
// console.log(memoryAfter - memoryBefore)
|
|
72
72
|
assert(time < 20, `Big html (${(deepHTML.length / 1024).toFixed(2)}KB) in less then 20ms (${time}ms)`)
|
|
73
|
-
expect(result).instanceof(
|
|
73
|
+
expect(result).instanceof(Root); // or any other validation you see fit
|
|
74
74
|
});
|
|
75
75
|
|
|
76
76
|
it('handles incorrectly closed tags', () => {
|