html-minifier-next 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,565 @@
1
+ /*!
2
+ * HTML Parser By John Resig (ejohn.org)
3
+ * Modified by Juriy "kangax" Zaytsev
4
+ * Original code by Erik Arvidsson, Mozilla Public License
5
+ * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
6
+ */
7
+
8
+ /*
9
+ * // Use like so:
10
+ * HTMLParser(htmlString, {
11
+ * start: function(tag, attrs, unary) {},
12
+ * end: function(tag) {},
13
+ * chars: function(text) {},
14
+ * comment: function(text) {}
15
+ * });
16
+ *
17
+ * // or to get an XML string:
18
+ * HTMLtoXML(htmlString);
19
+ *
20
+ * // or to get an XML DOM Document
21
+ * HTMLtoDOM(htmlString);
22
+ *
23
+ * // or to inject into an existing document/DOM node
24
+ * HTMLtoDOM(htmlString, document);
25
+ * HTMLtoDOM(htmlString, document.body);
26
+ *
27
+ */
28
+
29
+ /* global ActiveXObject, DOMDocument */
30
+
31
+ import { replaceAsync } from './utils.js';
32
+
33
+ class CaseInsensitiveSet extends Set {
34
+ has(str) {
35
+ return super.has(str.toLowerCase());
36
+ }
37
+ }
38
+
39
+ // Regular Expressions for parsing tags and attributes
40
+ const singleAttrIdentifier = /([^\s"'<>/=]+)/;
41
+ const singleAttrAssigns = [/=/];
42
+ const singleAttrValues = [
43
+ // attr value double quotes
44
+ /"([^"]*)"+/.source,
45
+ // attr value, single quotes
46
+ /'([^']*)'+/.source,
47
+ // attr value, no quotes
48
+ /([^ \t\n\f\r"'`=<>]+)/.source
49
+ ];
50
+ // https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-QName
51
+ const qnameCapture = (function () {
52
+ // based on https://www.npmjs.com/package/ncname
53
+ const combiningChar = '\\u0300-\\u0345\\u0360\\u0361\\u0483-\\u0486\\u0591-\\u05A1\\u05A3-\\u05B9\\u05BB-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u064B-\\u0652\\u0670\\u06D6-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED\\u0901-\\u0903\\u093C\\u093E-\\u094D\\u0951-\\u0954\\u0962\\u0963\\u0981-\\u0983\\u09BC\\u09BE-\\u09C4\\u09C7\\u09C8\\u09CB-\\u09CD\\u09D7\\u09E2\\u09E3\\u0A02\\u0A3C\\u0A3E-\\u0A42\\u0A47\\u0A48\\u0A4B-\\u0A4D\\u0A70\\u0A71\\u0A81-\\u0A83\\u0ABC\\u0ABE-\\u0AC5\\u0AC7-\\u0AC9\\u0ACB-\\u0ACD\\u0B01-\\u0B03\\u0B3C\\u0B3E-\\u0B43\\u0B47\\u0B48\\u0B4B-\\u0B4D\\u0B56\\u0B57\\u0B82\\u0B83\\u0BBE-\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCD\\u0BD7\\u0C01-\\u0C03\\u0C3E-\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56\\u0C82\\u0C83\\u0CBE-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD\\u0CD5\\u0CD6\\u0D02\\u0D03\\u0D3E-\\u0D43\\u0D46-\\u0D48\\u0D4A-\\u0D4D\\u0D57\\u0E31\\u0E34-\\u0E3A\\u0E47-\\u0E4E\\u0EB1\\u0EB4-\\u0EB9\\u0EBB\\u0EBC\\u0EC8-\\u0ECD\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F3E\\u0F3F\\u0F71-\\u0F84\\u0F86-\\u0F8B\\u0F90-\\u0F95\\u0F97\\u0F99-\\u0FAD\\u0FB1-\\u0FB7\\u0FB9\\u20D0-\\u20DC\\u20E1\\u302A-\\u302F\\u3099\\u309A';
54
+ const digit = '0-9\\u0660-\\u0669\\u06F0-\\u06F9\\u0966-\\u096F\\u09E6-\\u09EF\\u0A66-\\u0A6F\\u0AE6-\\u0AEF\\u0B66-\\u0B6F\\u0BE7-\\u0BEF\\u0C66-\\u0C6F\\u0CE6-\\u0CEF\\u0D66-\\u0D6F\\u0E50-\\u0E59\\u0ED0-\\u0ED9\\u0F20-\\u0F29';
55
+ const extender = '\\xB7\\u02D0\\u02D1\\u0387\\u0640\\u0E46\\u0EC6\\u3005\\u3031-\\u3035\\u309D\\u309E\\u30FC-\\u30FE';
56
+ const letter = 'A-Za-z\\xC0-\\xD6\\xD8-\\xF6\\xF8-\\u0131\\u0134-\\u013E\\u0141-\\u0148\\u014A-\\u017E\\u0180-\\u01C3\\u01CD-\\u01F0\\u01F4\\u01F5\\u01FA-\\u0217\\u0250-\\u02A8\\u02BB-\\u02C1\\u0386\\u0388-\\u038A\\u038C\\u038E-\\u03A1\\u03A3-\\u03CE\\u03D0-\\u03D6\\u03DA\\u03DC\\u03DE\\u03E0\\u03E2-\\u03F3\\u0401-\\u040C\\u040E-\\u044F\\u0451-\\u045C\\u045E-\\u0481\\u0490-\\u04C4\\u04C7\\u04C8\\u04CB\\u04CC\\u04D0-\\u04EB\\u04EE-\\u04F5\\u04F8\\u04F9\\u0531-\\u0556\\u0559\\u0561-\\u0586\\u05D0-\\u05EA\\u05F0-\\u05F2\\u0621-\\u063A\\u0641-\\u064A\\u0671-\\u06B7\\u06BA-\\u06BE\\u06C0-\\u06CE\\u06D0-\\u06D3\\u06D5\\u06E5\\u06E6\\u0905-\\u0939\\u093D\\u0958-\\u0961\\u0985-\\u098C\\u098F\\u0990\\u0993-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u09DC\\u09DD\\u09DF-\\u09E1\\u09F0\\u09F1\\u0A05-\\u0A0A\\u0A0F\\u0A10\\u0A13-\\u0A28\\u0A2A-\\u0A30\\u0A32\\u0A33\\u0A35\\u0A36\\u0A38\\u0A39\\u0A59-\\u0A5C\\u0A5E\\u0A72-\\u0A74\\u0A85-\\u0A8B\\u0A8D\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2\\u0AB3\\u0AB5-\\u0AB9\\u0ABD\\u0AE0\\u0B05-\\u0B0C\\u0B0F\\u0B10\\u0B13-\\u0B28\\u0B2A-\\u0B30\\u0B32\\u0B33\\u0B36-\\u0B39\\u0B3D\\u0B5C\\u0B5D\\u0B5F-\\u0B61\\u0B85-\\u0B8A\\u0B8E-\\u0B90\\u0B92-\\u0B95\\u0B99\\u0B9A\\u0B9C\\u0B9E\\u0B9F\\u0BA3\\u0BA4\\u0BA8-\\u0BAA\\u0BAE-\\u0BB5\\u0BB7-\\u0BB9\\u0C05-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28\\u0C2A-\\u0C33\\u0C35-\\u0C39\\u0C60\\u0C61\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CDE\\u0CE0\\u0CE1\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D28\\u0D2A-\\u0D39\\u0D60\\u0D61\\u0E01-\\u0E2E\\u0E30\\u0E32\\u0E33\\u0E40-\\u0E45\\u0E81\\u0E82\\u0E84\\u0E87\\u0E88\\u0E8A\\u0E8D\\u0E94-\\u0E97\\u0E99-\\u0E9F\\u0EA1-\\u0EA3\\u0EA5\\u0EA7\\u0EAA\\u0EAB\\u0EAD\\u0EAE\\u0EB0\\u0EB2\\u0EB3\\u0EBD\\u0EC0-\\u0EC4\\u0F40-\\u0F47\\u0F49-\\u0F69\\u10A0-\\u10C5\\u10D0-\\u10F6\\u1100\\u1102\\u1103\\u1105-\\u1107\\u1109\\u110B\\u110C\\u110E-\\u1112\\u113C\\u113E\\u1140\\u114C\\u114E\\u1150\\u1154\\u1155\\u1159\\u115F-\\u1161\\u1163\\u1165\\u1167\\u1169\\u116D\\u116E\\u1172\\u1173\\u1175\\u119E\\u11A8\\u11AB\\u11AE\\u11AF\\u11B7\\u11B8\\u11BA\\u11BC-\\u11C2\\u11EB\\u11F0\\u11F9\\u1E00-\\u1E9B\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FBC\\u1FBE\\u1FC2-\\u1FC4\\u1FC6-\\u1FCC\\u1FD0-\\u1FD3\\u1FD6-\\u1FDB\\u1FE0-\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FFC\\u2126\\u212A\\u212B\\u212E\\u2180-\\u2182\\u3007\\u3021-\\u3029\\u3041-\\u3094\\u30A1-\\u30FA\\u3105-\\u312C\\u4E00-\\u9FA5\\uAC00-\\uD7A3';
57
+ const ncname = '[' + letter + '_][' + letter + digit + '\\.\\-_' + combiningChar + extender + ']*';
58
+ return '((?:' + ncname + '\\:)?' + ncname + ')';
59
+ })();
60
+ const startTagOpen = new RegExp('^<' + qnameCapture);
61
+ const startTagClose = /^\s*(\/?)>/;
62
+ export const endTag = new RegExp('^<\\/' + qnameCapture + '[^>]*>');
63
+ const doctype = /^<!DOCTYPE\s?[^>]+>/i;
64
+
65
+ let IS_REGEX_CAPTURING_BROKEN = false;
66
+ 'x'.replace(/x(.)?/g, function (m, g) {
67
+ IS_REGEX_CAPTURING_BROKEN = g === '';
68
+ });
69
+
70
+ // Empty Elements
71
+ const empty = new CaseInsensitiveSet(['area', 'base', 'basefont', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'isindex', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
72
+
73
+ // Inline Elements
74
+ const inline = new CaseInsensitiveSet(['a', 'abbr', 'acronym', 'applet', 'b', 'basefont', 'bdo', 'big', 'br', 'button', 'cite', 'code', 'del', 'dfn', 'em', 'font', 'i', 'iframe', 'img', 'input', 'ins', 'kbd', 'label', 'map', 'noscript', 'object', 'q', 's', 'samp', 'script', 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'svg', 'textarea', 'tt', 'u', 'var']);
75
+
76
+ // Elements that you can, intentionally, leave open
77
+ // (and which close themselves)
78
+ const closeSelf = new CaseInsensitiveSet(['colgroup', 'dd', 'dt', 'li', 'option', 'p', 'td', 'tfoot', 'th', 'thead', 'tr', 'source']);
79
+
80
+ // Attributes that have their values filled in disabled='disabled'
81
+ const fillAttrs = new CaseInsensitiveSet(['checked', 'compact', 'declare', 'defer', 'disabled', 'ismap', 'multiple', 'nohref', 'noresize', 'noshade', 'nowrap', 'readonly', 'selected']);
82
+
83
+ // Special Elements (can contain anything)
84
+ const special = new CaseInsensitiveSet(['script', 'style']);
85
+
86
+ // HTML5 tags https://html.spec.whatwg.org/multipage/indices.html#elements-3
87
+ // Phrasing Content https://html.spec.whatwg.org/multipage/dom.html#phrasing-content
88
+ const nonPhrasing = new CaseInsensitiveSet(['address', 'article', 'aside', 'base', 'blockquote', 'body', 'caption', 'col', 'colgroup', 'dd', 'details', 'dialog', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'legend', 'li', 'menuitem', 'meta', 'ol', 'optgroup', 'option', 'param', 'rp', 'rt', 'source', 'style', 'summary', 'tbody', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'track', 'ul']);
89
+
90
+ const reCache = {};
91
+
92
+ function attrForHandler(handler) {
93
+ let pattern = singleAttrIdentifier.source +
94
+ '(?:\\s*(' + joinSingleAttrAssigns(handler) + ')' +
95
+ '[ \\t\\n\\f\\r]*(?:' + singleAttrValues.join('|') + '))?';
96
+ if (handler.customAttrSurround) {
97
+ const attrClauses = [];
98
+ for (let i = handler.customAttrSurround.length - 1; i >= 0; i--) {
99
+ attrClauses[i] = '(?:' +
100
+ '(' + handler.customAttrSurround[i][0].source + ')\\s*' +
101
+ pattern +
102
+ '\\s*(' + handler.customAttrSurround[i][1].source + ')' +
103
+ ')';
104
+ }
105
+ attrClauses.push('(?:' + pattern + ')');
106
+ pattern = '(?:' + attrClauses.join('|') + ')';
107
+ }
108
+ return new RegExp('^\\s*' + pattern);
109
+ }
110
+
111
+ function joinSingleAttrAssigns(handler) {
112
+ return singleAttrAssigns.concat(
113
+ handler.customAttrAssign || []
114
+ ).map(function (assign) {
115
+ return '(?:' + assign.source + ')';
116
+ }).join('|');
117
+ }
118
+
119
+ export class HTMLParser {
120
+ constructor(html, handler) {
121
+ this.html = html;
122
+ this.handler = handler;
123
+ }
124
+
125
+ async parse() {
126
+ let html = this.html;
127
+ const handler = this.handler;
128
+
129
+ const stack = []; let lastTag;
130
+ const attribute = attrForHandler(handler);
131
+ let last, prevTag, nextTag;
132
+ while (html) {
133
+ last = html;
134
+ // Make sure we're not in a script or style element
135
+ if (!lastTag || !special.has(lastTag)) {
136
+ let textEnd = html.indexOf('<');
137
+ if (textEnd === 0) {
138
+ // Comment:
139
+ if (/^<!--/.test(html)) {
140
+ const commentEnd = html.indexOf('-->');
141
+
142
+ if (commentEnd >= 0) {
143
+ if (handler.comment) {
144
+ await handler.comment(html.substring(4, commentEnd));
145
+ }
146
+ html = html.substring(commentEnd + 3);
147
+ prevTag = '';
148
+ continue;
149
+ }
150
+ }
151
+
152
+ // https://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment
153
+ if (/^<!\[/.test(html)) {
154
+ const conditionalEnd = html.indexOf(']>');
155
+
156
+ if (conditionalEnd >= 0) {
157
+ if (handler.comment) {
158
+ await handler.comment(html.substring(2, conditionalEnd + 1), true /* non-standard */);
159
+ }
160
+ html = html.substring(conditionalEnd + 2);
161
+ prevTag = '';
162
+ continue;
163
+ }
164
+ }
165
+
166
+ // Doctype:
167
+ const doctypeMatch = html.match(doctype);
168
+ if (doctypeMatch) {
169
+ if (handler.doctype) {
170
+ handler.doctype(doctypeMatch[0]);
171
+ }
172
+ html = html.substring(doctypeMatch[0].length);
173
+ prevTag = '';
174
+ continue;
175
+ }
176
+
177
+ // End tag:
178
+ const endTagMatch = html.match(endTag);
179
+ if (endTagMatch) {
180
+ html = html.substring(endTagMatch[0].length);
181
+ await replaceAsync(endTagMatch[0], endTag, parseEndTag);
182
+ prevTag = '/' + endTagMatch[1].toLowerCase();
183
+ continue;
184
+ }
185
+
186
+ // Start tag:
187
+ const startTagMatch = parseStartTag(html);
188
+ if (startTagMatch) {
189
+ html = startTagMatch.rest;
190
+ await handleStartTag(startTagMatch);
191
+ prevTag = startTagMatch.tagName.toLowerCase();
192
+ continue;
193
+ }
194
+
195
+ // Treat `<` as text
196
+ if (handler.continueOnParseError) {
197
+ textEnd = html.indexOf('<', 1);
198
+ }
199
+ }
200
+
201
+ let text;
202
+ if (textEnd >= 0) {
203
+ text = html.substring(0, textEnd);
204
+ html = html.substring(textEnd);
205
+ } else {
206
+ text = html;
207
+ html = '';
208
+ }
209
+
210
+ // next tag
211
+ let nextTagMatch = parseStartTag(html);
212
+ if (nextTagMatch) {
213
+ nextTag = nextTagMatch.tagName;
214
+ } else {
215
+ nextTagMatch = html.match(endTag);
216
+ if (nextTagMatch) {
217
+ nextTag = '/' + nextTagMatch[1];
218
+ } else {
219
+ nextTag = '';
220
+ }
221
+ }
222
+
223
+ if (handler.chars) {
224
+ await handler.chars(text, prevTag, nextTag);
225
+ }
226
+ prevTag = '';
227
+ } else {
228
+ const stackedTag = lastTag.toLowerCase();
229
+ const reStackedTag = reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)</' + stackedTag + '[^>]*>', 'i'));
230
+
231
+ html = await replaceAsync(html, reStackedTag, async (_, text) => {
232
+ if (stackedTag !== 'script' && stackedTag !== 'style' && stackedTag !== 'noscript') {
233
+ text = text
234
+ .replace(/<!--([\s\S]*?)-->/g, '$1')
235
+ .replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1');
236
+ }
237
+
238
+ if (handler.chars) {
239
+ await handler.chars(text);
240
+ }
241
+
242
+ return '';
243
+ });
244
+
245
+ await parseEndTag('</' + stackedTag + '>', stackedTag);
246
+ }
247
+
248
+ if (html === last) {
249
+ throw new Error('Parse Error: ' + html);
250
+ }
251
+ }
252
+
253
+ if (!handler.partialMarkup) {
254
+ // Clean up any remaining tags
255
+ await parseEndTag();
256
+ }
257
+
258
+ function parseStartTag(input) {
259
+ const start = input.match(startTagOpen);
260
+ if (start) {
261
+ const match = {
262
+ tagName: start[1],
263
+ attrs: []
264
+ };
265
+ input = input.slice(start[0].length);
266
+ let end, attr;
267
+ while (!(end = input.match(startTagClose)) && (attr = input.match(attribute))) {
268
+ input = input.slice(attr[0].length);
269
+ match.attrs.push(attr);
270
+ }
271
+ if (end) {
272
+ match.unarySlash = end[1];
273
+ match.rest = input.slice(end[0].length);
274
+ return match;
275
+ }
276
+ }
277
+ }
278
+
279
+ async function closeIfFound(tagName) {
280
+ if (findTag(tagName) >= 0) {
281
+ await parseEndTag('', tagName);
282
+ return true;
283
+ }
284
+ }
285
+
286
+ async function handleStartTag(match) {
287
+ const tagName = match.tagName;
288
+ let unarySlash = match.unarySlash;
289
+
290
+ if (handler.html5) {
291
+ if (lastTag === 'p' && nonPhrasing.has(tagName)) {
292
+ await parseEndTag('', lastTag);
293
+ } else if (tagName === 'tbody') {
294
+ await closeIfFound('thead');
295
+ } else if (tagName === 'tfoot') {
296
+ if (!await closeIfFound('tbody')) {
297
+ await closeIfFound('thead');
298
+ }
299
+ }
300
+ if (tagName === 'col' && findTag('colgroup') < 0) {
301
+ lastTag = 'colgroup';
302
+ stack.push({ tag: lastTag, attrs: [] });
303
+ if (handler.start) {
304
+ await handler.start(lastTag, [], false, '');
305
+ }
306
+ }
307
+ }
308
+
309
+ if (!handler.html5 && !inline.has(tagName)) {
310
+ while (lastTag && inline.has(lastTag)) {
311
+ await parseEndTag('', lastTag);
312
+ }
313
+ }
314
+
315
+ if (closeSelf.has(tagName) && lastTag === tagName) {
316
+ await parseEndTag('', tagName);
317
+ }
318
+
319
+ const unary = empty.has(tagName) || (tagName === 'html' && lastTag === 'head') || !!unarySlash;
320
+
321
+ const attrs = match.attrs.map(function (args) {
322
+ let name, value, customOpen, customClose, customAssign, quote;
323
+ const ncp = 7; // number of captured parts, scalar
324
+
325
+ // hackish work around FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
326
+ if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
327
+ if (args[3] === '') { delete args[3]; }
328
+ if (args[4] === '') { delete args[4]; }
329
+ if (args[5] === '') { delete args[5]; }
330
+ }
331
+
332
+ function populate(index) {
333
+ customAssign = args[index];
334
+ value = args[index + 1];
335
+ if (typeof value !== 'undefined') {
336
+ return '"';
337
+ }
338
+ value = args[index + 2];
339
+ if (typeof value !== 'undefined') {
340
+ return '\'';
341
+ }
342
+ value = args[index + 3];
343
+ if (typeof value === 'undefined' && fillAttrs.has(name)) {
344
+ value = name;
345
+ }
346
+ return '';
347
+ }
348
+
349
+ let j = 1;
350
+ if (handler.customAttrSurround) {
351
+ for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += ncp) {
352
+ name = args[j + 1];
353
+ if (name) {
354
+ quote = populate(j + 2);
355
+ customOpen = args[j];
356
+ customClose = args[j + 6];
357
+ break;
358
+ }
359
+ }
360
+ }
361
+
362
+ if (!name && (name = args[j])) {
363
+ quote = populate(j + 1);
364
+ }
365
+
366
+ return {
367
+ name,
368
+ value,
369
+ customAssign: customAssign || '=',
370
+ customOpen: customOpen || '',
371
+ customClose: customClose || '',
372
+ quote: quote || ''
373
+ };
374
+ });
375
+
376
+ if (!unary) {
377
+ stack.push({ tag: tagName, attrs });
378
+ lastTag = tagName;
379
+ unarySlash = '';
380
+ }
381
+
382
+ if (handler.start) {
383
+ await handler.start(tagName, attrs, unary, unarySlash);
384
+ }
385
+ }
386
+
387
+ function findTag(tagName) {
388
+ let pos;
389
+ const needle = tagName.toLowerCase();
390
+ for (pos = stack.length - 1; pos >= 0; pos--) {
391
+ if (stack[pos].tag.toLowerCase() === needle) {
392
+ break;
393
+ }
394
+ }
395
+ return pos;
396
+ }
397
+
398
+ async function parseEndTag(tag, tagName) {
399
+ let pos;
400
+
401
+ // Find the closest opened tag of the same type
402
+ if (tagName) {
403
+ pos = findTag(tagName);
404
+ } else { // If no tag name is provided, clean shop
405
+ pos = 0;
406
+ }
407
+
408
+ if (pos >= 0) {
409
+ // Close all the open elements, up the stack
410
+ for (let i = stack.length - 1; i >= pos; i--) {
411
+ if (handler.end) {
412
+ handler.end(stack[i].tag, stack[i].attrs, i > pos || !tag);
413
+ }
414
+ }
415
+
416
+ // Remove the open elements from the stack
417
+ stack.length = pos;
418
+ lastTag = pos && stack[pos - 1].tag;
419
+ } else if (tagName.toLowerCase() === 'br') {
420
+ if (handler.start) {
421
+ await handler.start(tagName, [], true, '');
422
+ }
423
+ } else if (tagName.toLowerCase() === 'p') {
424
+ if (handler.start) {
425
+ await handler.start(tagName, [], false, '', true);
426
+ }
427
+ if (handler.end) {
428
+ handler.end(tagName, []);
429
+ }
430
+ }
431
+ }
432
+ }
433
+ }
434
+
435
+ export const HTMLtoXML = (html) => {
436
+ let results = '';
437
+
438
+ const parser = new HTMLParser(html, {
439
+ start: function (tag, attrs, unary) {
440
+ results += '<' + tag;
441
+
442
+ for (let i = 0, len = attrs.length; i < len; i++) {
443
+ results += ' ' + attrs[i].name + '="' + (attrs[i].value || '').replace(/"/g, '&#34;') + '"';
444
+ }
445
+
446
+ results += (unary ? '/' : '') + '>';
447
+ },
448
+ end: function (tag) {
449
+ results += '</' + tag + '>';
450
+ },
451
+ chars: function (text) {
452
+ results += text;
453
+ },
454
+ comment: function (text) {
455
+ results += '<!--' + text + '-->';
456
+ },
457
+ ignore: function (text) {
458
+ results += text;
459
+ }
460
+ });
461
+
462
+ parser.parse();
463
+
464
+ return results;
465
+ };
466
+
467
+ export const HTMLtoDOM = (html, doc) => {
468
+ // There can be only one of these elements
469
+ const one = {
470
+ html: true,
471
+ head: true,
472
+ body: true,
473
+ title: true
474
+ };
475
+
476
+ // Enforce a structure for the document
477
+ const structure = {
478
+ link: 'head',
479
+ base: 'head'
480
+ };
481
+
482
+ if (doc) {
483
+ doc = doc.ownerDocument || (doc.getOwnerDocument && doc.getOwnerDocument()) || doc;
484
+ } else if (typeof DOMDocument !== 'undefined') {
485
+ doc = new DOMDocument();
486
+ } else if (typeof document !== 'undefined' && document.implementation && document.implementation.createDocument) {
487
+ doc = document.implementation.createDocument('', '', null);
488
+ } else if (typeof ActiveX !== 'undefined') {
489
+ doc = new ActiveXObject('Msxml.DOMDocument');
490
+ }
491
+
492
+ const elems = [];
493
+ const documentElement = doc.documentElement || (doc.getDocumentElement && doc.getDocumentElement());
494
+
495
+ // If we're dealing with an empty document then we
496
+ // need to pre-populate it with the HTML document structure
497
+ if (!documentElement && doc.createElement) {
498
+ (function () {
499
+ const html = doc.createElement('html');
500
+ const head = doc.createElement('head');
501
+ head.appendChild(doc.createElement('title'));
502
+ html.appendChild(head);
503
+ html.appendChild(doc.createElement('body'));
504
+ doc.appendChild(html);
505
+ })();
506
+ }
507
+
508
+ // Find all the unique elements
509
+ if (doc.getElementsByTagName) {
510
+ for (const i in one) {
511
+ one[i] = doc.getElementsByTagName(i)[0];
512
+ }
513
+ }
514
+
515
+ // If we're working with a document, inject contents into
516
+ // the body element
517
+ let curParentNode = one.body;
518
+
519
+ const parser = new HTMLParser(html, {
520
+ start: function (tagName, attrs, unary) {
521
+ // If it's a pre-built element, then we can ignore
522
+ // its construction
523
+ if (one[tagName]) {
524
+ curParentNode = one[tagName];
525
+ return;
526
+ }
527
+
528
+ const elem = doc.createElement(tagName);
529
+
530
+ for (const attr in attrs) {
531
+ elem.setAttribute(attrs[attr].name, attrs[attr].value);
532
+ }
533
+
534
+ if (structure[tagName] && typeof one[structure[tagName]] !== 'boolean') {
535
+ one[structure[tagName]].appendChild(elem);
536
+ } else if (curParentNode && curParentNode.appendChild) {
537
+ curParentNode.appendChild(elem);
538
+ }
539
+
540
+ if (!unary) {
541
+ elems.push(elem);
542
+ curParentNode = elem;
543
+ }
544
+ },
545
+ end: function (/* tag */) {
546
+ elems.length -= 1;
547
+
548
+ // Init the new parentNode
549
+ curParentNode = elems[elems.length - 1];
550
+ },
551
+ chars: function (text) {
552
+ curParentNode.appendChild(doc.createTextNode(text));
553
+ },
554
+ comment: function (/* text */) {
555
+ // create comment node
556
+ },
557
+ ignore: function (/* text */) {
558
+ // What to do here?
559
+ }
560
+ });
561
+
562
+ parser.parse();
563
+
564
+ return doc;
565
+ };
@@ -0,0 +1,68 @@
1
+ class Sorter {
2
+ sort(tokens, fromIndex = 0) {
3
+ for (let i = 0, len = this.keys.length; i < len; i++) {
4
+ const key = this.keys[i];
5
+ const token = key.slice(1);
6
+
7
+ let index = tokens.indexOf(token, fromIndex);
8
+
9
+ if (index !== -1) {
10
+ do {
11
+ if (index !== fromIndex) {
12
+ tokens.splice(index, 1);
13
+ tokens.splice(fromIndex, 0, token);
14
+ }
15
+ fromIndex++;
16
+ } while ((index = tokens.indexOf(token, fromIndex)) !== -1);
17
+
18
+ return this[key].sort(tokens, fromIndex);
19
+ }
20
+ }
21
+ return tokens;
22
+ }
23
+ }
24
+
25
+ class TokenChain {
26
+ add(tokens) {
27
+ tokens.forEach((token) => {
28
+ const key = '$' + token;
29
+ if (!this[key]) {
30
+ this[key] = [];
31
+ this[key].processed = 0;
32
+ }
33
+ this[key].push(tokens);
34
+ });
35
+ }
36
+
37
+ createSorter() {
38
+ const sorter = new Sorter();
39
+
40
+ sorter.keys = Object.keys(this).sort((j, k) => {
41
+ const m = this[j].length;
42
+ const n = this[k].length;
43
+ return m < n ? 1 : m > n ? -1 : j < k ? -1 : j > k ? 1 : 0;
44
+ }).filter((key) => {
45
+ if (this[key].processed < this[key].length) {
46
+ const token = key.slice(1);
47
+ const chain = new TokenChain();
48
+
49
+ this[key].forEach((tokens) => {
50
+ let index;
51
+ while ((index = tokens.indexOf(token)) !== -1) {
52
+ tokens.splice(index, 1);
53
+ }
54
+ tokens.forEach((token) => {
55
+ this['$' + token].processed++;
56
+ });
57
+ chain.add(tokens.slice(0));
58
+ });
59
+ sorter[key] = chain.createSorter();
60
+ return true;
61
+ }
62
+ return false;
63
+ });
64
+ return sorter;
65
+ }
66
+ }
67
+
68
+ export default TokenChain;
package/src/utils.js ADDED
@@ -0,0 +1,11 @@
1
+ export async function replaceAsync(str, regex, asyncFn) {
2
+ const promises = [];
3
+
4
+ str.replace(regex, (match, ...args) => {
5
+ const promise = asyncFn(match, ...args);
6
+ promises.push(promise);
7
+ });
8
+
9
+ const data = await Promise.all(promises);
10
+ return str.replace(regex, () => data.shift());
11
+ }