node-html-parser 1.3.2 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -75,6 +75,12 @@ Parse given data, and return root of the generated DOM.
75
75
  {
76
76
  lowerCaseTagName: false, // convert tag name to lower case (hurt performance heavily)
77
77
  comment: false // retrieve comments (hurt performance slightly)
78
+ blockTextElements: {
79
+ script: true, // keep text content when parsing
80
+ noscript: true, // keep text content when parsing
81
+ style: true, // keep text content when parsing
82
+ pre: true // keep text content when parsing
83
+ }
78
84
  }
79
85
  ```
80
86
 
@@ -0,0 +1,3 @@
1
+ export default function arr_back(arr) {
2
+ return arr[arr.length - 1];
3
+ }
@@ -0,0 +1,5 @@
1
+ export { default as CommentNode } from './nodes/comment';
2
+ export { default as HTMLElement, parse, parse as default } from './nodes/html';
3
+ export { default as Node } from './nodes/node';
4
+ export { default as TextNode } from './nodes/text';
5
+ export { default as NodeType } from './nodes/type';
@@ -0,0 +1,251 @@
1
+ /**
2
+ * Cache to store generated match functions
3
+ * @type {Object}
4
+ */
5
+ let pMatchFunctionCache = {};
6
+ function compare_tagname(tag1, tag2) {
7
+ if (!tag1) {
8
+ return !tag2;
9
+ }
10
+ if (!tag2) {
11
+ return !tag1;
12
+ }
13
+ return tag1.toLowerCase() === tag2.toLowerCase();
14
+ }
15
+ /**
16
+ * Function cache
17
+ */
18
+ const functionCache = {
19
+ f145(el, tagName, classes) {
20
+ 'use strict';
21
+ tagName = tagName || '';
22
+ classes = classes || [];
23
+ if (el.id !== tagName.substr(1)) {
24
+ return false;
25
+ }
26
+ for (let cls = classes, i = 0; i < cls.length; i++) {
27
+ if (el.classNames.indexOf(cls[i]) === -1) {
28
+ return false;
29
+ }
30
+ }
31
+ return true;
32
+ },
33
+ f45(el, tagName, classes) {
34
+ 'use strict';
35
+ tagName = tagName || '';
36
+ classes = classes || [];
37
+ for (let cls = classes, i = 0; i < cls.length; i++) {
38
+ if (el.classNames.indexOf(cls[i]) === -1) {
39
+ return false;
40
+ }
41
+ }
42
+ return true;
43
+ },
44
+ f15(el, tagName) {
45
+ 'use strict';
46
+ tagName = tagName || '';
47
+ if (el.id !== tagName.substr(1)) {
48
+ return false;
49
+ }
50
+ return true;
51
+ },
52
+ f1(el, tagName) {
53
+ 'use strict';
54
+ tagName = tagName || '';
55
+ if (el.id !== tagName.substr(1)) {
56
+ return false;
57
+ }
58
+ },
59
+ f5() {
60
+ 'use strict';
61
+ return true;
62
+ },
63
+ f55(el, tagName, classes, attr_key) {
64
+ 'use strict';
65
+ tagName = tagName || '';
66
+ classes = classes || [];
67
+ attr_key = attr_key || '';
68
+ const attrs = el.attributes;
69
+ return attrs.hasOwnProperty(attr_key);
70
+ },
71
+ f245(el, tagName, classes, attr_key, value) {
72
+ 'use strict';
73
+ tagName = tagName || '';
74
+ classes = classes || [];
75
+ attr_key = (attr_key || '').toLowerCase();
76
+ value = value || '';
77
+ const attrs = el.attributes;
78
+ return Object.keys(attrs).some((key) => {
79
+ const val = attrs[key];
80
+ return key.toLowerCase() === attr_key && val === value;
81
+ });
82
+ // for (let cls = classes, i = 0; i < cls.length; i++) {if (el.classNames.indexOf(cls[i]) === -1){ return false;}}
83
+ // return true;
84
+ },
85
+ f25(el, tagName, classes, attr_key, value) {
86
+ 'use strict';
87
+ tagName = tagName || '';
88
+ classes = classes || [];
89
+ attr_key = (attr_key || '').toLowerCase();
90
+ value = value || '';
91
+ const attrs = el.attributes;
92
+ return Object.keys(attrs).some((key) => {
93
+ const val = attrs[key];
94
+ return key.toLowerCase() === attr_key && val === value;
95
+ });
96
+ // return true;
97
+ },
98
+ f2(el, tagName, classes, attr_key, value) {
99
+ 'use strict';
100
+ tagName = tagName || '';
101
+ classes = classes || [];
102
+ attr_key = (attr_key || '').toLowerCase();
103
+ value = value || '';
104
+ const attrs = el.attributes;
105
+ return Object.keys(attrs).some((key) => {
106
+ const val = attrs[key];
107
+ return key.toLowerCase() === attr_key && val === value;
108
+ });
109
+ },
110
+ f345(el, tagName, classes) {
111
+ 'use strict';
112
+ tagName = tagName || '';
113
+ classes = classes || [];
114
+ if (!compare_tagname(el.tagName, tagName)) {
115
+ return false;
116
+ }
117
+ for (let cls = classes, i = 0; i < cls.length; i++) {
118
+ if (el.classNames.indexOf(cls[i]) === -1) {
119
+ return false;
120
+ }
121
+ }
122
+ return true;
123
+ },
124
+ f35(el, tagName) {
125
+ 'use strict';
126
+ tagName = tagName || '';
127
+ return compare_tagname(el.tagName, tagName);
128
+ },
129
+ f3(el, tagName) {
130
+ 'use strict';
131
+ tagName = tagName || '';
132
+ // if (el.tagName !== tagName) {
133
+ // return false;
134
+ // }
135
+ return compare_tagname(el.tagName, tagName);
136
+ }
137
+ };
138
+ /**
139
+ * Matcher class to make CSS match
140
+ *
141
+ * @class Matcher
142
+ */
143
+ export default class Matcher {
144
+ /**
145
+ * Creates an instance of Matcher.
146
+ * @param {string} selector
147
+ *
148
+ * @memberof Matcher
149
+ */
150
+ constructor(selector) {
151
+ this.nextMatch = 0;
152
+ this.matchers = selector.split(' ').map((matcher) => {
153
+ if (pMatchFunctionCache[matcher]) {
154
+ return pMatchFunctionCache[matcher];
155
+ }
156
+ const parts = matcher.split('.');
157
+ const tagName = parts[0];
158
+ const classes = parts.slice(1).sort();
159
+ // let source = '"use strict";';
160
+ let function_name = 'f';
161
+ let attr_key = '';
162
+ let value = '';
163
+ if (tagName && tagName !== '*') {
164
+ let reg;
165
+ if (tagName.startsWith('#')) {
166
+ // source += 'if (el.id != ' + JSON.stringify(tagName.substr(1)) + ') return false;';// 1
167
+ function_name += '1';
168
+ }
169
+ else {
170
+ reg = /^\[\s*(\S+)\s*(=|!=)\s*((((["'])([^\6]*)\6))|(\S*?))\]\s*/.exec(tagName);
171
+ if (reg) {
172
+ attr_key = reg[1];
173
+ let method = reg[2];
174
+ if (method !== '=' && method !== '!=') {
175
+ // eslint-disable-next-line no-template-curly-in-string
176
+ throw new Error('Selector not supported, Expect [key${op}value].op must be =,!=');
177
+ }
178
+ if (method === '=') {
179
+ method = '==';
180
+ }
181
+ value = reg[7] || reg[8];
182
+ // source += `let attrs = el.attributes;for (let key in attrs){const val = attrs[key]; if (key == "${attr_key}" && val == "${value}"){return true;}} return false;`;// 2
183
+ function_name += '2';
184
+ }
185
+ else if ((reg = /^\[(.*?)\]/.exec(tagName))) {
186
+ attr_key = reg[1];
187
+ function_name += '5';
188
+ }
189
+ else {
190
+ // source += 'if (el.tagName != ' + JSON.stringify(tagName) + ') return false;';// 3
191
+ function_name += '3';
192
+ }
193
+ }
194
+ }
195
+ if (classes.length > 0) {
196
+ // source += 'for (let cls = ' + JSON.stringify(classes) + ', i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false;';// 4
197
+ function_name += '4';
198
+ }
199
+ // source += 'return true;';// 5
200
+ function_name += '5';
201
+ const obj = {
202
+ func: functionCache[function_name],
203
+ tagName: tagName || '',
204
+ classes: classes || '',
205
+ attr_key: attr_key || '',
206
+ value: value || ''
207
+ };
208
+ // source = source || '';
209
+ return (pMatchFunctionCache[matcher] = obj);
210
+ });
211
+ }
212
+ /**
213
+ * Trying to advance match pointer
214
+ * @param {HTMLElement} el element to make the match
215
+ * @return {bool} true when pointer advanced.
216
+ */
217
+ advance(el) {
218
+ if (this.nextMatch < this.matchers.length &&
219
+ this.matchers[this.nextMatch].func(el, this.matchers[this.nextMatch].tagName, this.matchers[this.nextMatch].classes, this.matchers[this.nextMatch].attr_key, this.matchers[this.nextMatch].value)) {
220
+ this.nextMatch++;
221
+ return true;
222
+ }
223
+ return false;
224
+ }
225
+ /**
226
+ * Rewind the match pointer
227
+ */
228
+ rewind() {
229
+ this.nextMatch--;
230
+ }
231
+ /**
232
+ * Trying to determine if match made.
233
+ * @return {bool} true when the match is made
234
+ */
235
+ get matched() {
236
+ return this.nextMatch === this.matchers.length;
237
+ }
238
+ /**
239
+ * Rest match pointer.
240
+ * @return {[type]} [description]
241
+ */
242
+ reset() {
243
+ this.nextMatch = 0;
244
+ }
245
+ /**
246
+ * flush cache to free memory
247
+ */
248
+ flushCache() {
249
+ pMatchFunctionCache = {};
250
+ }
251
+ }
@@ -0,0 +1,23 @@
1
+ import Node from './node';
2
+ import NodeType from './type';
3
+ export default class CommentNode extends Node {
4
+ constructor(rawText) {
5
+ super();
6
+ this.rawText = rawText;
7
+ /**
8
+ * Node Type declaration.
9
+ * @type {Number}
10
+ */
11
+ this.nodeType = NodeType.COMMENT_NODE;
12
+ }
13
+ /**
14
+ * Get unescaped text value of current node and its children.
15
+ * @return {string} text content
16
+ */
17
+ get text() {
18
+ return this.rawText;
19
+ }
20
+ toString() {
21
+ return `<!--${this.rawText}-->`;
22
+ }
23
+ }
@@ -0,0 +1,787 @@
1
+ import { decode } from 'he';
2
+ import Node from './node';
3
+ import NodeType from './type';
4
+ import TextNode from './text';
5
+ import Matcher from '../matcher';
6
+ import arr_back from '../back';
7
+ import CommentNode from './comment';
8
+ const kBlockElements = new Map();
9
+ kBlockElements.set('DIV', true);
10
+ kBlockElements.set('div', true);
11
+ kBlockElements.set('P', true);
12
+ kBlockElements.set('p', true);
13
+ // ul: true,
14
+ // ol: true,
15
+ kBlockElements.set('LI', true);
16
+ kBlockElements.set('li', true);
17
+ // table: true,
18
+ // tr: true,
19
+ kBlockElements.set('TD', true);
20
+ kBlockElements.set('td', true);
21
+ kBlockElements.set('SECTION', true);
22
+ kBlockElements.set('section', true);
23
+ kBlockElements.set('BR', true);
24
+ kBlockElements.set('br', true);
25
+ /**
26
+ * HTMLElement, which contains a set of children.
27
+ *
28
+ * Note: this is a minimalist implementation, no complete tree
29
+ * structure provided (no parentNode, nextSibling,
30
+ * previousSibling etc).
31
+ * @class HTMLElement
32
+ * @extends {Node}
33
+ */
34
+ export default class HTMLElement extends Node {
35
+ /**
36
+ * Creates an instance of HTMLElement.
37
+ * @param keyAttrs id and class attribute
38
+ * @param [rawAttrs] attributes in string
39
+ *
40
+ * @memberof HTMLElement
41
+ */
42
+ constructor(tagName, keyAttrs, rawAttrs = '', parentNode = null) {
43
+ super();
44
+ this.rawAttrs = rawAttrs;
45
+ this.parentNode = parentNode;
46
+ this.classNames = [];
47
+ /**
48
+ * Node Type declaration.
49
+ */
50
+ this.nodeType = NodeType.ELEMENT_NODE;
51
+ this.rawTagName = tagName;
52
+ this.rawAttrs = rawAttrs || '';
53
+ this.parentNode = parentNode || null;
54
+ this.childNodes = [];
55
+ if (keyAttrs.id) {
56
+ this.id = keyAttrs.id;
57
+ if (!rawAttrs) {
58
+ this.rawAttrs = `id="${keyAttrs.id}"`;
59
+ }
60
+ }
61
+ if (keyAttrs.class) {
62
+ this.classNames = keyAttrs.class.split(/\s+/);
63
+ if (!rawAttrs) {
64
+ const cls = `class="${this.classNames.join(' ')}"`;
65
+ if (this.rawAttrs) {
66
+ this.rawAttrs += ` ${cls}`;
67
+ }
68
+ else {
69
+ this.rawAttrs = cls;
70
+ }
71
+ }
72
+ }
73
+ }
74
+ /**
75
+ * Remove Child element from childNodes array
76
+ * @param {HTMLElement} node node to remove
77
+ */
78
+ removeChild(node) {
79
+ this.childNodes = this.childNodes.filter((child) => {
80
+ return (child !== node);
81
+ });
82
+ }
83
+ /**
84
+ * Exchanges given child with new child
85
+ * @param {HTMLElement} oldNode node to exchange
86
+ * @param {HTMLElement} newNode new node
87
+ */
88
+ exchangeChild(oldNode, newNode) {
89
+ let idx = -1;
90
+ for (let i = 0; i < this.childNodes.length; i++) {
91
+ if (this.childNodes[i] === oldNode) {
92
+ idx = i;
93
+ break;
94
+ }
95
+ }
96
+ this.childNodes[idx] = newNode;
97
+ }
98
+ get tagName() {
99
+ return this.rawTagName?.toUpperCase();
100
+ }
101
+ /**
102
+ * Get escpaed (as-it) text value of current node and its children.
103
+ * @return {string} text content
104
+ */
105
+ get rawText() {
106
+ return this.childNodes.reduce((pre, cur) => {
107
+ return (pre += cur.rawText);
108
+ }, '');
109
+ }
110
+ /**
111
+ * Get unescaped text value of current node and its children.
112
+ * @return {string} text content
113
+ */
114
+ get text() {
115
+ return decode(this.rawText);
116
+ }
117
+ /**
118
+ * Get structured Text (with '\n' etc.)
119
+ * @return {string} structured text
120
+ */
121
+ get structuredText() {
122
+ let currentBlock = [];
123
+ const blocks = [currentBlock];
124
+ function dfs(node) {
125
+ if (node.nodeType === NodeType.ELEMENT_NODE) {
126
+ if (kBlockElements.get(node.rawTagName)) {
127
+ if (currentBlock.length > 0) {
128
+ blocks.push(currentBlock = []);
129
+ }
130
+ node.childNodes.forEach(dfs);
131
+ if (currentBlock.length > 0) {
132
+ blocks.push(currentBlock = []);
133
+ }
134
+ }
135
+ else {
136
+ node.childNodes.forEach(dfs);
137
+ }
138
+ }
139
+ else if (node.nodeType === NodeType.TEXT_NODE) {
140
+ if (node.isWhitespace) {
141
+ // Whitespace node, postponed output
142
+ currentBlock.prependWhitespace = true;
143
+ }
144
+ else {
145
+ let text = node.text;
146
+ if (currentBlock.prependWhitespace) {
147
+ text = ` ${text}`;
148
+ currentBlock.prependWhitespace = false;
149
+ }
150
+ currentBlock.push(text);
151
+ }
152
+ }
153
+ }
154
+ dfs(this);
155
+ return blocks.map((block) => {
156
+ // Normalize each line's whitespace
157
+ return block.join('').trim().replace(/\s{2,}/g, ' ');
158
+ })
159
+ .join('\n').replace(/\s+$/, ''); // trimRight;
160
+ }
161
+ toString() {
162
+ const tag = this.rawTagName;
163
+ if (tag) {
164
+ const is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
165
+ const attrs = this.rawAttrs ? ` ${this.rawAttrs}` : '';
166
+ if (is_void) {
167
+ return `<${tag}${attrs}>`;
168
+ }
169
+ return `<${tag}${attrs}>${this.innerHTML}</${tag}>`;
170
+ }
171
+ return this.innerHTML;
172
+ }
173
+ get innerHTML() {
174
+ return this.childNodes.map((child) => {
175
+ return child.toString();
176
+ }).join('');
177
+ }
178
+ set_content(content, options = {}) {
179
+ if (content instanceof Node) {
180
+ content = [content];
181
+ }
182
+ else if (typeof content == 'string') {
183
+ const r = parse(content, options);
184
+ content = r.childNodes.length ? r.childNodes : [new TextNode(content)];
185
+ }
186
+ this.childNodes = content;
187
+ }
188
+ get outerHTML() {
189
+ return this.toString();
190
+ }
191
+ /**
192
+ * Trim element from right (in block) after seeing pattern in a TextNode.
193
+ * @param {RegExp} pattern pattern to find
194
+ * @return {HTMLElement} reference to current node
195
+ */
196
+ trimRight(pattern) {
197
+ for (let i = 0; i < this.childNodes.length; i++) {
198
+ const childNode = this.childNodes[i];
199
+ if (childNode.nodeType === NodeType.ELEMENT_NODE) {
200
+ childNode.trimRight(pattern);
201
+ }
202
+ else {
203
+ const index = childNode.rawText.search(pattern);
204
+ if (index > -1) {
205
+ childNode.rawText = childNode.rawText.substr(0, index);
206
+ // trim all following nodes.
207
+ this.childNodes.length = i + 1;
208
+ }
209
+ }
210
+ }
211
+ return this;
212
+ }
213
+ /**
214
+ * Get DOM structure
215
+ * @return {string} strucutre
216
+ */
217
+ get structure() {
218
+ const res = [];
219
+ let indention = 0;
220
+ function write(str) {
221
+ res.push(' '.repeat(indention) + str);
222
+ }
223
+ function dfs(node) {
224
+ const idStr = node.id ? (`#${node.id}`) : '';
225
+ const classStr = node.classNames.length ? (`.${node.classNames.join('.')}`) : '';
226
+ write(node.rawTagName + idStr + classStr);
227
+ indention++;
228
+ node.childNodes.forEach((childNode) => {
229
+ if (childNode.nodeType === NodeType.ELEMENT_NODE) {
230
+ dfs(childNode);
231
+ }
232
+ else if (childNode.nodeType === NodeType.TEXT_NODE) {
233
+ if (!childNode.isWhitespace) {
234
+ write('#text');
235
+ }
236
+ }
237
+ });
238
+ indention--;
239
+ }
240
+ dfs(this);
241
+ return res.join('\n');
242
+ }
243
+ /**
244
+ * Remove whitespaces in this sub tree.
245
+ * @return {HTMLElement} pointer to this
246
+ */
247
+ removeWhitespace() {
248
+ let o = 0;
249
+ this.childNodes.forEach((node) => {
250
+ if (node.nodeType === NodeType.TEXT_NODE) {
251
+ if (node.isWhitespace) {
252
+ return;
253
+ }
254
+ node.rawText = node.rawText.trim();
255
+ }
256
+ else if (node.nodeType === NodeType.ELEMENT_NODE) {
257
+ node.removeWhitespace();
258
+ }
259
+ this.childNodes[o++] = node;
260
+ });
261
+ this.childNodes.length = o;
262
+ return this;
263
+ }
264
+ /**
265
+ * Query CSS selector to find matching nodes.
266
+ * @param {string} selector Simplified CSS selector
267
+ * @param {Matcher} selector A Matcher instance
268
+ * @return {HTMLElement[]} matching elements
269
+ */
270
+ querySelectorAll(selector) {
271
+ let matcher;
272
+ if (selector instanceof Matcher) {
273
+ matcher = selector;
274
+ matcher.reset();
275
+ }
276
+ else {
277
+ if (selector.includes(',')) {
278
+ const selectors = selector.split(',');
279
+ return Array.from(selectors.reduce((pre, cur) => {
280
+ const result = this.querySelectorAll(cur.trim());
281
+ return result.reduce((p, c) => {
282
+ return p.add(c);
283
+ }, pre);
284
+ }, new Set()));
285
+ }
286
+ matcher = new Matcher(selector);
287
+ }
288
+ const stack = [];
289
+ return this.childNodes.reduce((res, cur) => {
290
+ stack.push([cur, 0, false]);
291
+ while (stack.length) {
292
+ const state = arr_back(stack); // get last element
293
+ const el = state[0];
294
+ if (state[1] === 0) {
295
+ // Seen for first time.
296
+ if (el.nodeType !== NodeType.ELEMENT_NODE) {
297
+ stack.pop();
298
+ continue;
299
+ }
300
+ const html_el = el;
301
+ state[2] = matcher.advance(html_el);
302
+ if (state[2]) {
303
+ if (matcher.matched) {
304
+ res.push(html_el);
305
+ res.push(...(html_el.querySelectorAll(selector)));
306
+ // no need to go further.
307
+ matcher.rewind();
308
+ stack.pop();
309
+ continue;
310
+ }
311
+ }
312
+ }
313
+ if (state[1] < el.childNodes.length) {
314
+ stack.push([el.childNodes[state[1]++], 0, false]);
315
+ }
316
+ else {
317
+ if (state[2]) {
318
+ matcher.rewind();
319
+ }
320
+ stack.pop();
321
+ }
322
+ }
323
+ return res;
324
+ }, []);
325
+ }
326
+ /**
327
+ * Query CSS Selector to find matching node.
328
+ * @param {string} selector Simplified CSS selector
329
+ * @param {Matcher} selector A Matcher instance
330
+ * @return {HTMLElement} matching node
331
+ */
332
+ querySelector(selector) {
333
+ let matcher;
334
+ if (selector instanceof Matcher) {
335
+ matcher = selector;
336
+ matcher.reset();
337
+ }
338
+ else {
339
+ matcher = new Matcher(selector);
340
+ }
341
+ const stack = [];
342
+ for (const node of this.childNodes) {
343
+ stack.push([node, 0, false]);
344
+ while (stack.length) {
345
+ const state = arr_back(stack);
346
+ const el = state[0];
347
+ if (state[1] === 0) {
348
+ // Seen for first time.
349
+ if (el.nodeType !== NodeType.ELEMENT_NODE) {
350
+ stack.pop();
351
+ continue;
352
+ }
353
+ state[2] = matcher.advance(el);
354
+ if (state[2]) {
355
+ if (matcher.matched) {
356
+ return el;
357
+ }
358
+ }
359
+ }
360
+ if (state[1] < el.childNodes.length) {
361
+ stack.push([el.childNodes[state[1]++], 0, false]);
362
+ }
363
+ else {
364
+ if (state[2]) {
365
+ matcher.rewind();
366
+ }
367
+ stack.pop();
368
+ }
369
+ }
370
+ }
371
+ return null;
372
+ }
373
+ /**
374
+ * Append a child node to childNodes
375
+ * @param {Node} node node to append
376
+ * @return {Node} node appended
377
+ */
378
+ appendChild(node) {
379
+ // node.parentNode = this;
380
+ this.childNodes.push(node);
381
+ if (node instanceof HTMLElement) {
382
+ node.parentNode = this;
383
+ }
384
+ return node;
385
+ }
386
+ /**
387
+ * Get first child node
388
+ * @return {Node} first child node
389
+ */
390
+ get firstChild() {
391
+ return this.childNodes[0];
392
+ }
393
+ /**
394
+ * Get last child node
395
+ * @return {Node} last child node
396
+ */
397
+ get lastChild() {
398
+ return arr_back(this.childNodes);
399
+ }
400
+ /**
401
+ * Get attributes
402
+ * @return {Object} parsed and unescaped attributes
403
+ */
404
+ get attributes() {
405
+ if (this._attrs) {
406
+ return this._attrs;
407
+ }
408
+ this._attrs = {};
409
+ const attrs = this.rawAttributes;
410
+ for (const key in attrs) {
411
+ const val = attrs[key] || '';
412
+ this._attrs[key] = decode(val);
413
+ }
414
+ return this._attrs;
415
+ }
416
+ /**
417
+ * Get escaped (as-it) attributes
418
+ * @return {Object} parsed attributes
419
+ */
420
+ get rawAttributes() {
421
+ if (this._rawAttrs) {
422
+ return this._rawAttrs;
423
+ }
424
+ const attrs = {};
425
+ if (this.rawAttrs) {
426
+ const re = /\b([a-z][a-z0-9-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
427
+ let match;
428
+ while ((match = re.exec(this.rawAttrs))) {
429
+ attrs[match[1]] = match[2] || match[3] || match[4] || null;
430
+ }
431
+ }
432
+ this._rawAttrs = attrs;
433
+ return attrs;
434
+ }
435
+ removeAttribute(key) {
436
+ const attrs = this.rawAttributes;
437
+ delete attrs[key];
438
+ // Update this.attribute
439
+ if (this._attrs) {
440
+ delete this._attrs[key];
441
+ }
442
+ // Update rawString
443
+ this.rawAttrs = Object.keys(attrs).map((name) => {
444
+ const val = JSON.stringify(attrs[name]);
445
+ if (val === undefined || val === 'null') {
446
+ return name;
447
+ }
448
+ return `${name}=${val}`;
449
+ }).join(' ');
450
+ }
451
+ hasAttribute(key) {
452
+ return key in this.attributes;
453
+ }
454
+ /**
455
+ * Get an attribute
456
+ * @return {string} value of the attribute
457
+ */
458
+ getAttribute(key) {
459
+ return this.attributes[key];
460
+ }
461
+ /**
462
+ * Set an attribute value to the HTMLElement
463
+ * @param {string} key The attribute name
464
+ * @param {string} value The value to set, or null / undefined to remove an attribute
465
+ */
466
+ setAttribute(key, value) {
467
+ if (arguments.length < 2) {
468
+ throw new Error('Failed to execute \'setAttribute\' on \'Element\'');
469
+ }
470
+ const attrs = this.rawAttributes;
471
+ attrs[key] = String(value);
472
+ if (this._attrs) {
473
+ this._attrs[key] = decode(attrs[key]);
474
+ }
475
+ // Update rawString
476
+ this.rawAttrs = Object.keys(attrs).map((name) => {
477
+ const val = JSON.stringify(attrs[name]);
478
+ if (val === 'null' || val === '""') {
479
+ return name;
480
+ }
481
+ return `${name}=${val}`;
482
+ }).join(' ');
483
+ }
484
+ /**
485
+ * Replace all the attributes of the HTMLElement by the provided attributes
486
+ * @param {Attributes} attributes the new attribute set
487
+ */
488
+ setAttributes(attributes) {
489
+ // Invalidate current this.attributes
490
+ if (this._attrs) {
491
+ delete this._attrs;
492
+ }
493
+ // Invalidate current this.rawAttributes
494
+ if (this._rawAttrs) {
495
+ delete this._rawAttrs;
496
+ }
497
+ // Update rawString
498
+ this.rawAttrs = Object.keys(attributes).map((name) => {
499
+ const val = attributes[name];
500
+ if (val === 'null' || val === '""') {
501
+ return name;
502
+ }
503
+ return `${name}=${JSON.stringify(String(val))}`;
504
+ }).join(' ');
505
+ }
506
+ insertAdjacentHTML(where, html) {
507
+ if (arguments.length < 2) {
508
+ throw new Error('2 arguments required');
509
+ }
510
+ const p = parse(html);
511
+ if (where === 'afterend') {
512
+ const idx = this.parentNode.childNodes.findIndex((child) => {
513
+ return child === this;
514
+ });
515
+ this.parentNode.childNodes.splice(idx + 1, 0, ...p.childNodes);
516
+ p.childNodes.forEach((n) => {
517
+ if (n instanceof HTMLElement) {
518
+ n.parentNode = this.parentNode;
519
+ }
520
+ });
521
+ }
522
+ else if (where === 'afterbegin') {
523
+ this.childNodes.unshift(...p.childNodes);
524
+ }
525
+ else if (where === 'beforeend') {
526
+ p.childNodes.forEach((n) => {
527
+ this.appendChild(n);
528
+ });
529
+ }
530
+ else if (where === 'beforebegin') {
531
+ const idx = this.parentNode.childNodes.findIndex((child) => {
532
+ return child === this;
533
+ });
534
+ this.parentNode.childNodes.splice(idx, 0, ...p.childNodes);
535
+ p.childNodes.forEach((n) => {
536
+ if (n instanceof HTMLElement) {
537
+ n.parentNode = this.parentNode;
538
+ }
539
+ });
540
+ }
541
+ else {
542
+ throw new Error(`The value provided ('${where}') is not one of 'beforebegin', 'afterbegin', 'beforeend', or 'afterend'`);
543
+ }
544
+ // if (!where || html === undefined || html === null) {
545
+ // return;
546
+ // }
547
+ }
548
+ }
549
+ // https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
550
+ const kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*([^>]*?)(\/?)>/ig;
551
+ // <(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
552
+ // <([a-z][-.:0-9_a-z]*)\s*\/>
553
+ // <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>
554
+ // <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>|<(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
555
+ const kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]+)"|'([^']+)'|(\S+))/ig;
556
+ const kSelfClosingElements = {
557
+ area: true,
558
+ AREA: true,
559
+ base: true,
560
+ BASE: true,
561
+ br: true,
562
+ BR: true,
563
+ col: true,
564
+ COL: true,
565
+ hr: true,
566
+ HR: true,
567
+ img: true,
568
+ IMG: true,
569
+ input: true,
570
+ INPUT: true,
571
+ link: true,
572
+ LINK: true,
573
+ meta: true,
574
+ META: true,
575
+ source: true,
576
+ SOURCE: true
577
+ };
578
+ const kElementsClosedByOpening = {
579
+ li: { li: true, LI: true },
580
+ LI: { li: true, LI: true },
581
+ p: { p: true, div: true, P: true, DIV: true },
582
+ P: { p: true, div: true, P: true, DIV: true },
583
+ b: { div: true, DIV: true },
584
+ B: { div: true, DIV: true },
585
+ td: { td: true, th: true, TD: true, TH: true },
586
+ TD: { td: true, th: true, TD: true, TH: true },
587
+ th: { td: true, th: true, TD: true, TH: true },
588
+ TH: { td: true, th: true, TD: true, TH: true },
589
+ h1: { h1: true, H1: true },
590
+ H1: { h1: true, H1: true },
591
+ h2: { h2: true, H2: true },
592
+ H2: { h2: true, H2: true },
593
+ h3: { h3: true, H3: true },
594
+ H3: { h3: true, H3: true },
595
+ h4: { h4: true, H4: true },
596
+ H4: { h4: true, H4: true },
597
+ h5: { h5: true, H5: true },
598
+ H5: { h5: true, H5: true },
599
+ h6: { h6: true, H6: true },
600
+ H6: { h6: true, H6: true }
601
+ };
602
+ const kElementsClosedByClosing = {
603
+ li: { ul: true, ol: true, UL: true, OL: true },
604
+ LI: { ul: true, ol: true, UL: true, OL: true },
605
+ a: { div: true, DIV: true },
606
+ A: { div: true, DIV: true },
607
+ b: { div: true, DIV: true },
608
+ B: { div: true, DIV: true },
609
+ i: { div: true, DIV: true },
610
+ I: { div: true, DIV: true },
611
+ p: { div: true, DIV: true },
612
+ P: { div: true, DIV: true },
613
+ td: { tr: true, table: true, TR: true, TABLE: true },
614
+ TD: { tr: true, table: true, TR: true, TABLE: true },
615
+ th: { tr: true, table: true, TR: true, TABLE: true },
616
+ TH: { tr: true, table: true, TR: true, TABLE: true }
617
+ };
618
+ const frameflag = 'documentfragmentcontainer';
619
+ export function parse(data, options = { lowerCaseTagName: false, comment: false }) {
620
+ const elements = options.blockTextElements || {
621
+ script: true,
622
+ noscript: true,
623
+ style: true,
624
+ pre: true
625
+ };
626
+ const element_names = Object.keys(elements);
627
+ const kBlockTextElements = element_names.map((it) => {
628
+ return new RegExp(it, 'i');
629
+ });
630
+ const kIgnoreElements = element_names.filter((it) => {
631
+ return elements[it];
632
+ }).map((it) => {
633
+ return new RegExp(it, 'i');
634
+ });
635
+ function element_should_be_ignore(tag) {
636
+ return kIgnoreElements.some((it) => {
637
+ return it.test(tag);
638
+ });
639
+ }
640
+ function is_block_text_element(tag) {
641
+ return kBlockTextElements.some((it) => {
642
+ return it.test(tag);
643
+ });
644
+ }
645
+ const root = new HTMLElement(null, {});
646
+ let currentParent = root;
647
+ const stack = [root];
648
+ let lastTextPos = -1;
649
+ let match;
650
+ // https://github.com/taoqf/node-html-parser/issues/38
651
+ data = `<${frameflag}>${data}</${frameflag}>`;
652
+ while ((match = kMarkupPattern.exec(data))) {
653
+ if (lastTextPos > -1) {
654
+ if (lastTextPos + match[0].length < kMarkupPattern.lastIndex) {
655
+ // if has content
656
+ const text = data.substring(lastTextPos, kMarkupPattern.lastIndex - match[0].length);
657
+ currentParent.appendChild(new TextNode(text));
658
+ }
659
+ }
660
+ lastTextPos = kMarkupPattern.lastIndex;
661
+ if (match[2] === frameflag) {
662
+ continue;
663
+ }
664
+ if (match[0][1] === '!') {
665
+ // this is a comment
666
+ if (options.comment) {
667
+ // Only keep what is in between <!-- and -->
668
+ const text = data.substring(lastTextPos - 3, lastTextPos - match[0].length + 4);
669
+ currentParent.appendChild(new CommentNode(text));
670
+ }
671
+ continue;
672
+ }
673
+ if (options.lowerCaseTagName) {
674
+ match[2] = match[2].toLowerCase();
675
+ }
676
+ if (!match[1]) {
677
+ // not </ tags
678
+ const attrs = {};
679
+ for (let attMatch; (attMatch = kAttributePattern.exec(match[3]));) {
680
+ attrs[attMatch[2].toLowerCase()] = attMatch[4] || attMatch[5] || attMatch[6];
681
+ }
682
+ const tagName = currentParent.rawTagName;
683
+ if (!match[4] && kElementsClosedByOpening[tagName]) {
684
+ if (kElementsClosedByOpening[tagName][match[2]]) {
685
+ stack.pop();
686
+ currentParent = arr_back(stack);
687
+ }
688
+ }
689
+ // ignore container tag we add above
690
+ // https://github.com/taoqf/node-html-parser/issues/38
691
+ currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3]));
692
+ stack.push(currentParent);
693
+ if (is_block_text_element(match[2])) {
694
+ // a little test to find next </script> or </style> ...
695
+ const closeMarkup = `</${match[2]}>`;
696
+ const index = (() => {
697
+ if (options.lowerCaseTagName) {
698
+ return data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex);
699
+ }
700
+ return data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
701
+ })();
702
+ if (element_should_be_ignore(match[2])) {
703
+ let text;
704
+ if (index === -1) {
705
+ // there is no matching ending for the text element.
706
+ text = data.substr(kMarkupPattern.lastIndex);
707
+ }
708
+ else {
709
+ text = data.substring(kMarkupPattern.lastIndex, index);
710
+ }
711
+ if (text.length > 0) {
712
+ currentParent.appendChild(new TextNode(text));
713
+ }
714
+ }
715
+ if (index === -1) {
716
+ lastTextPos = kMarkupPattern.lastIndex = data.length + 1;
717
+ }
718
+ else {
719
+ lastTextPos = kMarkupPattern.lastIndex = index + closeMarkup.length;
720
+ match[1] = 'true';
721
+ }
722
+ }
723
+ }
724
+ if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
725
+ // </ or /> or <br> etc.
726
+ while (true) {
727
+ if (currentParent.rawTagName === match[2]) {
728
+ stack.pop();
729
+ currentParent = arr_back(stack);
730
+ break;
731
+ }
732
+ else {
733
+ const tagName = currentParent.tagName;
734
+ // Trying to close current tag, and move on
735
+ if (kElementsClosedByClosing[tagName]) {
736
+ if (kElementsClosedByClosing[tagName][match[2]]) {
737
+ stack.pop();
738
+ currentParent = arr_back(stack);
739
+ continue;
740
+ }
741
+ }
742
+ // Use aggressive strategy to handle unmatching markups.
743
+ break;
744
+ }
745
+ }
746
+ }
747
+ }
748
+ const valid = Boolean(stack.length === 1);
749
+ if (!options.noFix) {
750
+ const response = root;
751
+ response.valid = valid;
752
+ while (stack.length > 1) {
753
+ // Handle each error elements.
754
+ const last = stack.pop();
755
+ const oneBefore = arr_back(stack);
756
+ if (last.parentNode && last.parentNode.parentNode) {
757
+ if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
758
+ // Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
759
+ oneBefore.removeChild(last);
760
+ last.childNodes.forEach((child) => {
761
+ oneBefore.parentNode.appendChild(child);
762
+ });
763
+ stack.pop();
764
+ }
765
+ else {
766
+ // Single error <div> <h3> </div> handle: Just removes <h3>
767
+ oneBefore.removeChild(last);
768
+ last.childNodes.forEach((child) => {
769
+ oneBefore.appendChild(child);
770
+ });
771
+ }
772
+ }
773
+ else {
774
+ // If it's final element just skip.
775
+ }
776
+ }
777
+ response.childNodes.forEach((node) => {
778
+ if (node instanceof HTMLElement) {
779
+ node.parentNode = null;
780
+ }
781
+ });
782
+ return response;
783
+ }
784
+ const response = new TextNode(data);
785
+ response.valid = valid;
786
+ return response;
787
+ }
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Node Class as base class for TextNode and HTMLElement.
3
+ */
4
+ export default class Node {
5
+ constructor() {
6
+ this.childNodes = [];
7
+ }
8
+ get innerText() {
9
+ return this.rawText;
10
+ }
11
+ }
@@ -0,0 +1,34 @@
1
+ import NodeType from './type';
2
+ import Node from './node';
3
+ /**
4
+ * TextNode to contain a text element in DOM tree.
5
+ * @param {string} value [description]
6
+ */
7
+ export default class TextNode extends Node {
8
+ constructor(rawText) {
9
+ super();
10
+ this.rawText = rawText;
11
+ /**
12
+ * Node Type declaration.
13
+ * @type {Number}
14
+ */
15
+ this.nodeType = NodeType.TEXT_NODE;
16
+ }
17
+ /**
18
+ * Get unescaped text value of current node and its children.
19
+ * @return {string} text content
20
+ */
21
+ get text() {
22
+ return this.rawText;
23
+ }
24
+ /**
25
+ * Detect if the node contains only white space.
26
+ * @return {bool}
27
+ */
28
+ get isWhitespace() {
29
+ return /^(\s|&nbsp;)*$/.test(this.rawText);
30
+ }
31
+ toString() {
32
+ return this.text;
33
+ }
34
+ }
@@ -0,0 +1,7 @@
1
+ var NodeType;
2
+ (function (NodeType) {
3
+ NodeType[NodeType["ELEMENT_NODE"] = 1] = "ELEMENT_NODE";
4
+ NodeType[NodeType["TEXT_NODE"] = 3] = "TEXT_NODE";
5
+ NodeType[NodeType["COMMENT_NODE"] = 8] = "COMMENT_NODE";
6
+ })(NodeType || (NodeType = {}));
7
+ export default NodeType;
package/dist/main.js CHANGED
@@ -1084,19 +1084,34 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
1084
1084
  th: { tr: true, table: true, TR: true, TABLE: true },
1085
1085
  TH: { tr: true, table: true, TR: true, TABLE: true }
1086
1086
  };
1087
- var kBlockTextElements = {
1088
- script: true,
1089
- SCRIPT: true,
1090
- noscript: true,
1091
- NOSCRIPT: true,
1092
- style: true,
1093
- STYLE: true,
1094
- pre: true,
1095
- PRE: true
1096
- };
1097
1087
  var frameflag = 'documentfragmentcontainer';
1098
1088
  function parse(data, options) {
1099
- if (options === void 0) { options = {}; }
1089
+ if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
1090
+ var elements = options.blockTextElements || {
1091
+ script: true,
1092
+ noscript: true,
1093
+ style: true,
1094
+ pre: true
1095
+ };
1096
+ var element_names = Object.keys(elements);
1097
+ var kBlockTextElements = element_names.map(function (it) {
1098
+ return new RegExp(it, 'i');
1099
+ });
1100
+ var kIgnoreElements = element_names.filter(function (it) {
1101
+ return elements[it];
1102
+ }).map(function (it) {
1103
+ return new RegExp(it, 'i');
1104
+ });
1105
+ function element_should_be_ignore(tag) {
1106
+ return kIgnoreElements.some(function (it) {
1107
+ return it.test(tag);
1108
+ });
1109
+ }
1110
+ function is_block_text_element(tag) {
1111
+ return kBlockTextElements.some(function (it) {
1112
+ return it.test(tag);
1113
+ });
1114
+ }
1100
1115
  var root = new HTMLElement(null, {});
1101
1116
  var currentParent = root;
1102
1117
  var stack = [root];
@@ -1145,7 +1160,7 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
1145
1160
  // https://github.com/taoqf/node-html-parser/issues/38
1146
1161
  currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3]));
1147
1162
  stack.push(currentParent);
1148
- if (kBlockTextElements[match[2]]) {
1163
+ if (is_block_text_element(match[2])) {
1149
1164
  // a little test to find next </script> or </style> ...
1150
1165
  var closeMarkup_1 = "</" + match[2] + ">";
1151
1166
  var index = (function () {
@@ -1154,16 +1169,18 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
1154
1169
  }
1155
1170
  return data.indexOf(closeMarkup_1, kMarkupPattern.lastIndex);
1156
1171
  })();
1157
- var text = void 0;
1158
- if (index === -1) {
1159
- // there is no matching ending for the text element.
1160
- text = data.substr(kMarkupPattern.lastIndex);
1161
- }
1162
- else {
1163
- text = data.substring(kMarkupPattern.lastIndex, index);
1164
- }
1165
- if (text.length > 0) {
1166
- currentParent.appendChild(new text_1.default(text));
1172
+ if (element_should_be_ignore(match[2])) {
1173
+ var text = void 0;
1174
+ if (index === -1) {
1175
+ // there is no matching ending for the text element.
1176
+ text = data.substr(kMarkupPattern.lastIndex);
1177
+ }
1178
+ else {
1179
+ text = data.substring(kMarkupPattern.lastIndex, index);
1180
+ }
1181
+ if (text.length > 0) {
1182
+ currentParent.appendChild(new text_1.default(text));
1183
+ }
1167
1184
  }
1168
1185
  if (index === -1) {
1169
1186
  lastTextPos = kMarkupPattern.lastIndex = data.length + 1;
@@ -150,8 +150,11 @@ export default class HTMLElement extends Node {
150
150
  insertAdjacentHTML(where: InsertPosition, html: string): void;
151
151
  }
152
152
  export interface Options {
153
- lowerCaseTagName?: boolean;
154
- comment?: boolean;
153
+ lowerCaseTagName: boolean;
154
+ comment: boolean;
155
+ blockTextElements: {
156
+ [tag: string]: boolean;
157
+ };
155
158
  }
156
159
  /**
157
160
  * Parses HTML and returns a root element
@@ -159,15 +162,15 @@ export interface Options {
159
162
  * @param {string} data html
160
163
  * @return {HTMLElement} root element
161
164
  */
162
- export declare function parse(data: string, options?: Options): HTMLElement & {
165
+ export declare function parse(data: string, options?: Partial<Options>): HTMLElement & {
163
166
  valid: boolean;
164
167
  };
165
- export declare function parse(data: string, options?: Options & {
168
+ export declare function parse(data: string, options?: Partial<Options> & {
166
169
  noFix: false;
167
170
  }): HTMLElement & {
168
171
  valid: boolean;
169
172
  };
170
- export declare function parse(data: string, options?: Options & {
173
+ export declare function parse(data: string, options?: Partial<Options> & {
171
174
  noFix: true;
172
175
  }): (HTMLElement | TextNode) & {
173
176
  valid: boolean;
@@ -698,19 +698,34 @@ var kElementsClosedByClosing = {
698
698
  th: { tr: true, table: true, TR: true, TABLE: true },
699
699
  TH: { tr: true, table: true, TR: true, TABLE: true }
700
700
  };
701
- var kBlockTextElements = {
702
- script: true,
703
- SCRIPT: true,
704
- noscript: true,
705
- NOSCRIPT: true,
706
- style: true,
707
- STYLE: true,
708
- pre: true,
709
- PRE: true
710
- };
711
701
  var frameflag = 'documentfragmentcontainer';
712
702
  function parse(data, options) {
713
- if (options === void 0) { options = {}; }
703
+ if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
704
+ var elements = options.blockTextElements || {
705
+ script: true,
706
+ noscript: true,
707
+ style: true,
708
+ pre: true
709
+ };
710
+ var element_names = Object.keys(elements);
711
+ var kBlockTextElements = element_names.map(function (it) {
712
+ return new RegExp(it, 'i');
713
+ });
714
+ var kIgnoreElements = element_names.filter(function (it) {
715
+ return elements[it];
716
+ }).map(function (it) {
717
+ return new RegExp(it, 'i');
718
+ });
719
+ function element_should_be_ignore(tag) {
720
+ return kIgnoreElements.some(function (it) {
721
+ return it.test(tag);
722
+ });
723
+ }
724
+ function is_block_text_element(tag) {
725
+ return kBlockTextElements.some(function (it) {
726
+ return it.test(tag);
727
+ });
728
+ }
714
729
  var root = new HTMLElement(null, {});
715
730
  var currentParent = root;
716
731
  var stack = [root];
@@ -759,7 +774,7 @@ function parse(data, options) {
759
774
  // https://github.com/taoqf/node-html-parser/issues/38
760
775
  currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3]));
761
776
  stack.push(currentParent);
762
- if (kBlockTextElements[match[2]]) {
777
+ if (is_block_text_element(match[2])) {
763
778
  // a little test to find next </script> or </style> ...
764
779
  var closeMarkup_1 = "</" + match[2] + ">";
765
780
  var index = (function () {
@@ -768,16 +783,18 @@ function parse(data, options) {
768
783
  }
769
784
  return data.indexOf(closeMarkup_1, kMarkupPattern.lastIndex);
770
785
  })();
771
- var text = void 0;
772
- if (index === -1) {
773
- // there is no matching ending for the text element.
774
- text = data.substr(kMarkupPattern.lastIndex);
775
- }
776
- else {
777
- text = data.substring(kMarkupPattern.lastIndex, index);
778
- }
779
- if (text.length > 0) {
780
- currentParent.appendChild(new text_1.default(text));
786
+ if (element_should_be_ignore(match[2])) {
787
+ var text = void 0;
788
+ if (index === -1) {
789
+ // there is no matching ending for the text element.
790
+ text = data.substr(kMarkupPattern.lastIndex);
791
+ }
792
+ else {
793
+ text = data.substring(kMarkupPattern.lastIndex, index);
794
+ }
795
+ if (text.length > 0) {
796
+ currentParent.appendChild(new text_1.default(text));
797
+ }
781
798
  }
782
799
  if (index === -1) {
783
800
  lastTextPos = kMarkupPattern.lastIndex = data.length + 1;
package/package.json CHANGED
@@ -1,8 +1,10 @@
1
1
  {
2
2
  "name": "node-html-parser",
3
- "version": "1.3.2",
3
+ "version": "1.4.2",
4
4
  "description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
5
5
  "main": "dist/index.js",
6
+ "module": "dist/esm/index.js",
7
+ "browser": "dist/main.js",
6
8
  "types": "dist/index.d.ts",
7
9
  "scripts": {
8
10
  "test": "mocha",
@@ -10,7 +12,8 @@
10
12
  "clean": "del-cli ./dist/",
11
13
  "ts:cjs": "tsc -m commonjs",
12
14
  "ts:umd": "tsc -t es5 -m amd -d false --outFile ./dist/main.js",
13
- "build": "npm run lint && npm run clean && npm run ts:cjs && npm run ts:umd",
15
+ "ts:esm": "tsc -t esnext -m esnext -d false --outDir ./dist/esm/",
16
+ "build": "npm run lint && npm run clean && npm run ts:cjs && npm run ts:umd && npm run ts:esm",
14
17
  "dev": "tsc -w",
15
18
  "pretest": "tsc -m commonjs"
16
19
  },