als-document 0.1.1 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,14 +1,13 @@
1
1
  {
2
2
  "name": "als-document",
3
- "version": "0.1.1",
4
- "description": "virtual dom",
3
+ "version": "0.5.1",
4
+ "description": "",
5
5
  "main": "document.js",
6
6
  "scripts": {
7
- "test": "echo \"Error: no test specified\" && exit 1"
7
+ "test": "node test/test.js"
8
8
  },
9
- "keywords": [
10
- "virtual DOM"
11
- ],
9
+ "keywords": ["htmlparser","css query","cheerio","virtual dom"],
12
10
  "author": "Alex Sorkin",
13
- "license": "ISC"
11
+ "license": "ISC",
12
+ "dependencies": {}
14
13
  }
@@ -0,0 +1,287 @@
1
+ class HtmlParser {
2
+ static parse(html) {
3
+ let result = new HtmlParser(html)
4
+ return result.root
5
+ }
6
+ constructor(html='') {
7
+ if(this.checkHtml(html)) {
8
+ this.indexes = []
9
+ this.events = []
10
+ this.html = this.htmlString = html
11
+ this.removeScripts()
12
+ this.removeStyles()
13
+ this.removeEventStrings()
14
+ this.htmlString = this.htmlString.replace(/\<\!\-\-([\S\s]*?)\-\-\>/gm,'') // remove all comments
15
+ this.root = this.parse()
16
+ }
17
+ }
18
+
19
+ checkHtml(html,isGood=false) {
20
+ if(html == '') console.log('html parameter is empty')
21
+ else if(typeof html !== 'string') console.log(`html parameter has to be string. Recieved ${typeof html}`)
22
+ else isGood = true
23
+ return isGood
24
+ }
25
+
26
+ removeScripts(scripts=[]) {
27
+ let $scripts = this.htmlString.match(/\<script(.*?)\>[\S\s]*?\<\/script\>/gm)
28
+ if($scripts !== null) $scripts.forEach((script,i) => {
29
+ let inner = script.replace(/^\<script(.*?)\>/,'').replace(/\<\/script\>$/,'')
30
+ scripts.push(inner)
31
+ this.htmlString = this.htmlString.replace(inner,`{{{{script ${scripts.length-1}`)
32
+ });
33
+ this.scripts = scripts
34
+ }
35
+ removeStyles(styles=[]) {
36
+ let $styles = this.htmlString.match(/\<style\>[\S\s]*?\<\/style\>/gm)
37
+ if($styles !== null) $styles.forEach((style,i) => {
38
+ let inner = style.replace(/^\<style\>/,'').replace(/\<\/style\>$/,'')
39
+ styles.push(inner)
40
+ this.htmlString = this.htmlString.replace(inner,`{{{{style ${styles.length-1}`)
41
+ });
42
+ this.styles = styles
43
+ }
44
+
45
+ removeEventStrings() {
46
+ let eventsWithHtml = this.htmlString.match(/on\w*\s*?\=\s*?["|'|`](.*?(<[^>]*>)(\'|\`).*?)["|'|`]/g)
47
+ if(eventsWithHtml !== null) {
48
+ eventsWithHtml.forEach(event => {
49
+ let array = event.split('=')
50
+ let value = array.filter((v,i) => i>0 && i!=='').join('=')
51
+ value = value.replace(/^\"/,'').replace(/\"$/,'')
52
+ this.events.push(value)
53
+ let newEvent = event.replace(value,`{{{{event ${this.events.length-1}`)
54
+ this.htmlString = this.htmlString.replace(event,newEvent)
55
+ })
56
+ }
57
+ }
58
+
59
+ parse(htmlString=this.htmlString) {
60
+ // Parse tags
61
+ let elements = htmlString.match(/<[^>]*>/g)
62
+ elements.forEach((tag,index) => {
63
+ elements[index] = this.parseElement(tag)
64
+ htmlString = htmlString.replace(tag,`<tag${index}>`)
65
+ });
66
+ // Parse inner text
67
+ let inners = htmlString.match(/tag[\s\S]*?\</g)
68
+ for (let i = inners.length-1; i >=0 ; i--) {
69
+ let inner = inners[i];
70
+ let tagIndex = inner.match(/(\d*)\>/)[1]
71
+ inner = inner.replace(/tag.*\>/,'').slice(0, -1).trim()
72
+ if(inner.length > 0) {
73
+ elements.splice(parseInt(tagIndex)+1, 0, inner);
74
+ }
75
+ }
76
+ this.elements = elements
77
+ let root = this.getPairs()
78
+ return root
79
+ }
80
+
81
+ lookForPair(element,startIndex,parent,level) {
82
+ element.parent = parent
83
+ element.children = []
84
+ let {tag} = element
85
+ let count = 0
86
+ let endIndex
87
+ for (let index = startIndex+1; index < this.elements.length; index++) {
88
+ const el = this.elements[index];
89
+ if(el.tag == tag) {
90
+ if(el.status == 'close') {
91
+ if(count == 0) {
92
+ endIndex = index
93
+ el.level = level
94
+ break
95
+ } else count--
96
+ } else if(el.status == 'open') count++
97
+ }
98
+ }
99
+ if(!endIndex) endIndex = startIndex+1
100
+ element.endIndex = endIndex
101
+ let child = this.getPairs(element,startIndex+1,endIndex,level+1)
102
+ return child
103
+ }
104
+
105
+ getPairs(parent={type:'root',children:[]},startIndex = 0,endIndex=this.elements.length,level=0,childIndex=0) {
106
+ for(let index = startIndex; index < endIndex; index++) {
107
+ if(this.indexes.includes(index)) continue
108
+ const element = this.elements[index];
109
+ let child
110
+ if(typeof element == 'string') child = element
111
+ else if(element.type == 'tag') {
112
+ if(element.status == 'single') {
113
+ child = element
114
+ child.parent = parent
115
+ }
116
+ else if(element.status == 'open') {
117
+ child = this.lookForPair(element,index,parent,level)
118
+ }
119
+ }
120
+ this.addChild(parent,child,index,level,childIndex/2)
121
+ childIndex++
122
+ }
123
+ return parent
124
+ }
125
+
126
+ addScriptsAndStyles(parent,child,index) {
127
+ if(parent.tag == 'script') {
128
+ let scriptIndex = child.match(/(?<=\{\{\{\{script\s)(\d*)?/)
129
+ if(scriptIndex !== null) {
130
+ let i = parseInt(scriptIndex[0])
131
+ if(typeof i == 'number') child = this.scripts[i]
132
+ }
133
+ }
134
+ if(parent.tag == 'style') {
135
+ let stylesIndex = child.match(/(?<=\{\{\{\{style\s)(\d*)?/)
136
+ if(stylesIndex !== null) {
137
+ let i = parseInt(stylesIndex[0])
138
+ if(typeof i == 'number') child = this.styles[i]
139
+ }
140
+ }
141
+ this.elements[index] = child
142
+ return child
143
+ }
144
+
145
+ addChild(parent,child,index,level,childIndex) {
146
+ if(typeof child == 'string') {
147
+ child = this.addScriptsAndStyles(parent,child,index)
148
+ child = {type:'text',text:child}
149
+ this.elements[index] = child
150
+ }
151
+ if(child) {
152
+ delete child.status
153
+ this.getElements(child)
154
+ this.innerHTML(child)
155
+ this.outerHTML(child)
156
+ this.innerText(child)
157
+ this.getAncestors(child)
158
+ this.getAttribute(child)
159
+ child.childIndex = childIndex
160
+ child.index = index
161
+ child.level = level
162
+ child.prev = null
163
+ child.next = null
164
+ if(parent.children.length > 0) {
165
+ let prevI = parent.children.length-1
166
+ parent.children[prevI].next = child
167
+ child.prev = parent.children[prevI]
168
+ }
169
+ parent.children.push(child)
170
+ }
171
+ this.indexes.push(index)
172
+ }
173
+
174
+ getAttribute(element) {
175
+ element.getAttribute = function(name) {
176
+ let array = Object.keys(this.attribs).filter(key => key == name)
177
+ return array.length > 0 ? this.attribs[array[0]] : null
178
+ }
179
+ }
180
+
181
+ getAncestors(element) {
182
+ Object.defineProperty(element, 'ancestors', { get() {
183
+ let ancestors = []
184
+ let parent = this.parent
185
+ if(parent) while(parent.parent) {
186
+ ancestors.unshift(parent)
187
+ parent = parent.parent
188
+ }
189
+ return ancestors
190
+ }});
191
+ }
192
+
193
+ getElements(element,elements=this.elements) {
194
+ Object.defineProperty(element, 'elements', { get() {
195
+ return elements.slice(this.index,this.endIndex+1)
196
+ }});
197
+ }
198
+
199
+ outerHTML(element) {
200
+ Object.defineProperty(element, 'outerHTML', { get() {
201
+ let {elements} = this
202
+ return elements[0].text + this.innerHTML + elements[elements.length-1].text
203
+ }});
204
+ }
205
+
206
+ innerText(element) {
207
+ Object.defineProperty(element, 'innerText', { get() {
208
+ if(element.children)
209
+ return element.children.map(child => child.type == 'text' ? child.text : '').join('')
210
+ else return ''
211
+ }})
212
+ }
213
+
214
+ innerHTML(element) {
215
+ Object.defineProperty(element, 'innerHTML', { get() {
216
+ let tab = ' ',result = '',firstLevel,space=''
217
+ let {elements} = this
218
+ let endIndex = elements.length
219
+ for (let i = 1; i < endIndex-1; i++) {
220
+ let element = elements[i]
221
+ let {level,tag,text} = element
222
+ if(firstLevel == undefined) firstLevel = level
223
+ if(i == endIndex-1) space = ''
224
+ else if(level) space = Array.from(Array(level-firstLevel).keys()).map(n => tab).join('')
225
+ result += space + text
226
+ if(endIndex > 3) result += '\n'
227
+ }
228
+ return result
229
+ }});
230
+ }
231
+
232
+ parseElement(tagString) {
233
+ let text = tagString
234
+ let type = 'tag'
235
+ if(tagString == '<!DOCTYPE html>') return {tag:'!DOCTYPE html',status:'single',attribs:{},type}
236
+ let status = 'close'
237
+ let tag = tagString.match(/(?<=\<\/)(\w*\-?)*/)
238
+ if(tag == null) {
239
+ tag = tagString.match(/(?<=\<)(\w*\-?)*/)
240
+ if(tag) {
241
+ tag = tag[0]
242
+ if(this.singleTags.includes(tag)) status='single'
243
+ else status = 'open'
244
+ }
245
+ } else tag = tag[0]
246
+ let {classList,attribs,style,id} = this.parseAttributes(tagString)
247
+ let obj = {tag,status,attribs,type,classList,text,style,id}
248
+ return obj
249
+ }
250
+ singleTags = ['comment','area','base','br','col','command','embed','hr','img','input','keygen','link','meta','param','source','track','wbr']
251
+
252
+ parseAttributes(tagString,classList=[],attribs={},style={},id=null) {
253
+ let attributes = tagString.match(/(?<=\s)(\w*\-?)*(\s*?\=\s*?\"[\s\S]*?\")?/g)
254
+ if(attributes) attributes.forEach(attribString => {
255
+ let [name,value] = attribString.split('=')
256
+ if(value !== undefined && name !== '') {
257
+ value = value.trim().replace(/^\"/,'').replace(/\"$/m,'')
258
+
259
+ let eventIndex = value.match(/(?<=\{\{\{\{event\s)(\d*)?/)
260
+ if(eventIndex !== null) value = this.events[eventIndex[0]]
261
+
262
+ if(name == 'class') classList = value.split(/\s\s?\s?/)
263
+ else if(name == 'style') style = this.parseInlineCss(value)
264
+ else if(name == 'id') id = value
265
+ attribs[name] = value
266
+ } else if(name !== '') attribs[name] = undefined
267
+ });
268
+ return {classList,attribs,style,id}
269
+ }
270
+
271
+ parseInlineCss(textCss) {
272
+ let rules = textCss.split(';')
273
+ let styles = {}
274
+ rules.forEach(rule => {
275
+ let [prop,value] = rule.trim().split(':')
276
+ if(rule !== '') {
277
+ if(prop.match(/\w*\-\w*(-\w*)?/) !== null) {
278
+ let words = prop.split('-')
279
+ prop = words.map((w,i) => i==0 ? w : w[0].toUpperCase() + w.slice(1)).join('')
280
+ }
281
+ styles[prop] = value.trim()
282
+ }
283
+ });
284
+ return styles
285
+ }
286
+ }
287
+ try {module.exports = HtmlParser} catch{}
@@ -0,0 +1,121 @@
1
+ ## HtmlParser
2
+
3
+ HtmlParser is a class which build dom tree from html string.
4
+
5
+ * HtmlParser removes all html comments and they not included in dom tree.
6
+ * Contrary to regular dom, attribute includes class,id and style as attribute in addition to classList, id and style(as array) inside element's object.
7
+
8
+
9
+ ### Syntax
10
+
11
+ ```javascript
12
+ let parsed = new HtmlParser(htmlString:string):instanceof HtmlParser
13
+ parsed.root : circular object
14
+
15
+ // static method
16
+ HtmlParser.parse(html):object // parsed.root
17
+ ```
18
+
19
+ Each element, except root and text elements has:
20
+ * attribs - element's attributes
21
+ * parent - parent element
22
+ * next - next element or null
23
+ * prev - previous element or null
24
+ * children - array of children include text nodes
25
+ * type - tag or text or root
26
+ * classList - array with classes
27
+ * index - start index of element inside elements list
28
+ * id - element's id or null
29
+ * endIndex - end index of element inside elements list
30
+ * level - level in dom tree
31
+ * text - parsed text for tag and for text element
32
+ * innerText:getter - concats all children's text together | ''
33
+ * innerHTML:getter - return innerHTML for element
34
+ * outerHTML:getter - return outerHTML for element
35
+ * ancestors:getter - return array of ancestors
36
+ * getAttribute(name) - return value of attribute or null
37
+ * style:[] - array of styles with camelCase property name
38
+
39
+ Example for parsed.document
40
+
41
+ ```javascript
42
+ {
43
+ type:'root',
44
+ children:[
45
+ {
46
+ attribs: {},
47
+ index: 0,
48
+ prev:null,
49
+ next:{...}
50
+ tag: "!DOCTYPE html",
51
+ type: "tag",
52
+ ...
53
+ },
54
+ {
55
+ attribs: {lang:'en'},
56
+ prev:{...},
57
+ next:null,
58
+ classList:[],
59
+ children:[
60
+ {
61
+ attribs: {},
62
+ children:[...]
63
+ prev:null,
64
+ next:{...}
65
+ classList:[],
66
+ index: 2,
67
+ tag: "head",
68
+ parent:{tag:'html',...} // reference to parent
69
+ type: "tag",
70
+ ...
71
+ },
72
+ {
73
+ attribs: {},
74
+ children:[...]
75
+ index: 10,
76
+ prev:{...},
77
+ next:null,
78
+ classList:[],
79
+ tag: "body",
80
+ parent:{tag:'html',...} // reference to parent
81
+ type: "tag",
82
+ ...
83
+ },
84
+ ]
85
+ index: 1,
86
+ tag: "html",
87
+ parent:{type:'root',...} // reference to parent
88
+ type: "tag",
89
+ ...
90
+ }
91
+ ]
92
+ }
93
+ ```
94
+
95
+
96
+ ### Frontend example
97
+
98
+ ```html
99
+ <script src="/node_modules/als-document/parser/parser.js"></script>
100
+ <script>
101
+ let result = new HtmlParser(htmlString)
102
+ console.log(result.root)
103
+
104
+ // Or with static method
105
+ console.log(HtmlParser.parse(html))
106
+
107
+ </script>
108
+ ```
109
+
110
+ ### Backend example
111
+
112
+ ```javascript
113
+ const {HtmlParser} = require('als-htmlparser')
114
+ let result = new HtmlParser(htmlString)
115
+ console.log(result.root)
116
+
117
+ // Or with static method
118
+ console.log(HtmlParser.parse(html))
119
+ ```
120
+
121
+
package/parser/test.js ADDED
@@ -0,0 +1,233 @@
1
+ let Test = require('als-test')
2
+ let {equal,greater,smaller,$greater, $smaller,mesureTime} = Test
3
+ let Parser = require('./parser')
4
+ const htmlparser2 = require("htmlparser2");
5
+
6
+ module.exports = new Test('HtmlParser tests',[
7
+ {
8
+ title:'Parse html for html1',
9
+ result:function({html1}){
10
+ this.vars.root1 = Parser.parse(html1)
11
+ if(this.vars.root1.type == 'root')
12
+ return `succesfully parsed with Parser.parse(html)`
13
+ else {
14
+ this.terminate = true
15
+ return 'Something went wrong..'
16
+ }
17
+ },
18
+ },
19
+ {
20
+ title:'Compare to htmlparser2',
21
+ expected:mesureTime(function({html2}){htmlparser2.parseDocument(html2);}),
22
+ result:mesureTime(function({html2}){Parser.parse(html2)}),
23
+ action:greater
24
+ },
25
+ {
26
+ title:'Parse html for html2',
27
+ result:function({html2}){
28
+ this.vars.root2 = Parser.parse(html2)
29
+ if(this.vars.root2.type == 'root')
30
+ return `succesfully parsed with Parser.parse(html)`
31
+ else {
32
+ this.terminate = true
33
+ return 'Something went wrong..'
34
+ }
35
+ },
36
+ },
37
+ {
38
+ title:'root1 element has 2 children',
39
+ expected:2,
40
+ result:function({root1}){
41
+ return root1.children.length
42
+ },
43
+ action:equal
44
+ },
45
+ {
46
+ title:'Check if element is right',
47
+ expected:'body',
48
+ result:function({root1}){
49
+ this.vars.body1 = root1.children[1].children[1]
50
+ return this.vars.body1.tag
51
+ },
52
+ action:equal
53
+ },
54
+ {
55
+ title:'Check if element right',
56
+ expected:'head',
57
+ result:function({root1}){
58
+ this.vars.head1 = root1.children[1].children[0]
59
+ return this.vars.head1.tag
60
+ },
61
+ action:equal
62
+ },
63
+ {
64
+ title:'body element has 4 children',
65
+ expected:4,
66
+ result:function({body1}){
67
+ return body1.children.length
68
+ },
69
+ action:equal
70
+ },
71
+ {
72
+ title:'Read title text',
73
+ expected:'Document',
74
+ result:function({head1}){
75
+ return head1.children[3].children[0].text
76
+ },
77
+ action:equal
78
+ },
79
+ {
80
+ title:'innerHTML',
81
+ expected:'tab3 content',
82
+ result:function({body1}){
83
+ return body1.children[3].children[6].innerHTML
84
+ },
85
+ action:equal
86
+ },
87
+ {
88
+ title:'innerHTML',
89
+ expected:669,
90
+ result:function({body1}){
91
+ return body1.children[3].innerHTML.length
92
+ },
93
+ action:equal
94
+ },
95
+ {
96
+ title:'outerHTML',
97
+ expected:'<div class="tab-content p2 transition1">tab3 content</div>'.length,
98
+ result:function({body1}){
99
+ return body1.children[3].children[6].outerHTML.length
100
+ },
101
+ action:equal
102
+ },
103
+ {
104
+ title:'ancestors',
105
+ expected:3,
106
+ result:function({body1}){
107
+ return body1.children[3].children[6].ancestors.length
108
+ },
109
+ action:equal
110
+ },
111
+ {
112
+ title:'elements',
113
+ expected:25,
114
+ result:function({body1}){
115
+ return body1.children[3].elements.length
116
+ },
117
+ action:equal
118
+ },
119
+ {
120
+ title:'children length',
121
+ expected:15,
122
+ result:function({root2}){
123
+ this.vars.body2 = root2.children[0].children[1]
124
+ return this.vars.body2.children.length
125
+ },
126
+ action:equal
127
+ },
128
+ {
129
+ title:'id',
130
+ expected:'tab2',
131
+ result:function({body1}){
132
+ return body1.children[3].children[1].id
133
+ },
134
+ action:equal
135
+ },
136
+ {
137
+ title:'classList',
138
+ expected:5,
139
+ result:function({body2}){
140
+ return body2.children[2].children[0].children[4].classList.length
141
+ },
142
+ action:equal
143
+ },
144
+ {
145
+ title:'style',
146
+ expected:'marginTop',
147
+ result:function({body2}){
148
+ let style = body2.children[2].children[0].children[4].style
149
+ return Object.keys(style)[0]
150
+ },
151
+ action:equal
152
+ },
153
+ {
154
+ title:'style length',
155
+ expected:5,
156
+ result:function({body2}){
157
+ let style = body2.children[4].style
158
+ return Object.keys(style).length
159
+ },
160
+ action:equal
161
+ },
162
+ {
163
+ title:'style with multiple styles',
164
+ expected:'605px',
165
+ result:function({body2}){
166
+ let style = body2.children[4].style
167
+ return style.left
168
+ },
169
+ action:equal
170
+ },
171
+ {
172
+ title:'attributs length',
173
+ expected:6,
174
+ result:function({body2}){
175
+ let attribs = body2.children[2].children[0].children[0].attribs
176
+ return Object.keys(attribs).length
177
+ },
178
+ action:equal
179
+ },
180
+ {
181
+ title:'attribute content',
182
+ expected:'_blank',
183
+ result:function({body2}){
184
+ return body2.children[2].children[0].children[0].attribs.target
185
+ },
186
+ action:equal
187
+ },
188
+ {
189
+ title:'next',
190
+ expected:'a#menuButton',
191
+ result:function({body2}){
192
+ let {tag,id} = body2.children[2].children[0].children[0].next
193
+ return `${tag}#${id}`
194
+ },
195
+ action:equal
196
+ },
197
+ {
198
+ title:'prev',
199
+ expected:'a#tryhome',
200
+ result:function({body2}){
201
+ let {tag,id} = body2.children[2].children[0].children[1].prev
202
+ return `${tag}#${id}`
203
+ },
204
+ action:equal
205
+ },
206
+ {
207
+ title:'parent',
208
+ expected:'div.w3-bar',
209
+ result:function({body2}){
210
+ let {tag,classList} = body2.children[2].children[0].children[1].parent
211
+ return `${tag}.${classList[0]}`
212
+ },
213
+ action:equal
214
+ },
215
+ {
216
+ title:'Single tags doesnt have children',
217
+ expected:undefined,
218
+ result:function({root2}){
219
+ let head = root2.children[0].children[0]
220
+ return head.children[1].children
221
+ },
222
+ action:equal
223
+ },
224
+ {
225
+ title:'Text tag doesnt have children',
226
+ expected:undefined,
227
+ result:function({root2}){
228
+ let head = root2.children[0].children[0]
229
+ return head.children[0].children[0].children
230
+ },
231
+ action:equal
232
+ },
233
+ ])