html-minifier-next 4.9.0 → 4.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -17
- package/dist/htmlminifier.cjs +117 -21
- package/dist/htmlminifier.esm.bundle.js +117 -21
- package/dist/types/htmlminifier.d.ts.map +1 -1
- package/dist/types/htmlparser.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/htmlminifier.js +80 -1
- package/src/htmlparser.js +16 -10
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
HTML Minifier Next (HMN) is a **super-configurable, well-tested, JavaScript-based HTML minifier**.
|
|
6
6
|
|
|
7
|
-
The project was based on [HTML Minifier Terser](https://github.com/terser/html-minifier-terser), which in turn had been based on [Juriy Zaytsev’s HTML Minifier](https://github.com/kangax/html-minifier). HMN offers additional features, but is backwards-compatible with both. The project was set up because as of 2025, both HTML Minifier Terser and HTML Minifier had been unmaintained for a few years. As the project seems maintainable [to me, [Jens](https://meiert.com/)]—even more so with community support—, it’s being [updated, extended, and documented](https://github.com/j9t/html-minifier-next/blob/main/CHANGELOG.md) further in this place.
|
|
7
|
+
The project was based on [HTML Minifier Terser](https://github.com/terser/html-minifier-terser), which in turn had been based on [Juriy “kangax” Zaytsev’s HTML Minifier](https://github.com/kangax/html-minifier). HMN offers additional features, but is backwards-compatible with both. The project was set up because as of 2025, both HTML Minifier Terser and HTML Minifier had been unmaintained for a few years. As the project seems maintainable [to me, [Jens](https://meiert.com/), an HTML optimizer]—even more so with community support—, it’s being [updated, extended, and documented](https://github.com/j9t/html-minifier-next/blob/main/CHANGELOG.md) further in this place.
|
|
8
8
|
|
|
9
9
|
## Installation
|
|
10
10
|
|
|
@@ -233,30 +233,32 @@ How does HTML Minifier Next compare to other minifiers? (All with the most aggre
|
|
|
233
233
|
| Site | Original Size (KB) | [HTML Minifier Next](https://github.com/j9t/html-minifier-next)<br>[](https://socket.dev/npm/package/html-minifier-next) | [HTML Minifier Terser](https://github.com/terser/html-minifier-terser)<br>[](https://socket.dev/npm/package/html-minifier-terser) | [htmlnano](https://github.com/posthtml/htmlnano)<br>[](https://socket.dev/npm/package/htmlnano) | [@swc/html](https://github.com/swc-project/swc)<br>[](https://socket.dev/npm/package/@swc/html) | [minify-html](https://github.com/wilsonzlin/minify-html)<br>[](https://socket.dev/npm/package/@minify-html/node) | [minimize](https://github.com/Swaagie/minimize)<br>[](https://socket.dev/npm/package/minimize) | [htmlcompressor.com](https://htmlcompressor.com/) |
|
|
234
234
|
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
|
235
235
|
| [A List Apart](https://alistapart.com/) | 59 | **49** | 50 | 51 | 52 | 51 | 54 | 52 |
|
|
236
|
-
| [Apple](https://www.apple.com/) | 260 | **
|
|
237
|
-
| [BBC](https://www.bbc.co.uk/) |
|
|
238
|
-
| [CSS-Tricks](https://css-tricks.com/) |
|
|
236
|
+
| [Apple](https://www.apple.com/) | 260 | **203** | **203** | 231 | 235 | 236 | 237 | 238 |
|
|
237
|
+
| [BBC](https://www.bbc.co.uk/) | 904 | **772** | 783 | 840 | 852 | 855 | 896 | n/a |
|
|
238
|
+
| [CSS-Tricks](https://css-tricks.com/) | 160 | 120 | **119** | 126 | 141 | 141 | 146 | 143 |
|
|
239
239
|
| [ECMAScript](https://tc39.es/ecma262/) | 7238 | **6341** | **6341** | 6561 | 6444 | 6567 | 6614 | n/a |
|
|
240
|
-
| [
|
|
241
|
-
| [
|
|
242
|
-
| [
|
|
240
|
+
| [EDRi](https://edri.org/) | 80 | **59** | 60 | 70 | 70 | 71 | 75 | 73 |
|
|
241
|
+
| [EFF](https://www.eff.org/) | 56 | **47** | **47** | 50 | 49 | 49 | 51 | 50 |
|
|
242
|
+
| [European Alternatives](https://european-alternatives.eu/) | 48 | **30** | **30** | 32 | 32 | 32 | 32 | 32 |
|
|
243
|
+
| [FAZ](https://www.faz.net/aktuell/) | 1559 | 1452 | 1457 | **1397** | 1484 | 1495 | 1506 | n/a |
|
|
244
|
+
| [Frontend Dogma](https://frontenddogma.com/) | 221 | **211** | 213 | 234 | 219 | 221 | 239 | 220 |
|
|
243
245
|
| [Google](https://www.google.com/) | 18 | **17** | **17** | **17** | **17** | **17** | 18 | 18 |
|
|
244
|
-
| [Ground News](https://ground.news/) |
|
|
246
|
+
| [Ground News](https://ground.news/) | 1967 | **1707** | 1710 | 1810 | 1835 | 1840 | 1954 | n/a |
|
|
245
247
|
| [HTML Living Standard](https://html.spec.whatwg.org/multipage/) | 149 | **147** | **147** | 153 | **147** | 149 | 155 | 149 |
|
|
246
|
-
| [Igalia](https://www.igalia.com/) | 49 | **33** | **33** |
|
|
247
|
-
| [Leanpub](https://leanpub.com/) |
|
|
248
|
+
| [Igalia](https://www.igalia.com/) | 49 | **33** | **33** | 36 | 35 | 36 | 36 | 36 |
|
|
249
|
+
| [Leanpub](https://leanpub.com/) | 1268 | **1071** | **1071** | 1078 | 1076 | 1072 | 1263 | n/a |
|
|
248
250
|
| [Mastodon](https://mastodon.social/explore) | 36 | **27** | **27** | 31 | 34 | 34 | 35 | 35 |
|
|
249
|
-
| [MDN](https://developer.mozilla.org/en-US/) |
|
|
250
|
-
| [Middle East Eye](https://www.middleeasteye.net/) | 223 | **195** |
|
|
251
|
-
| [Nielsen Norman Group](https://www.nngroup.com/) | 84 |
|
|
251
|
+
| [MDN](https://developer.mozilla.org/en-US/) | 109 | **62** | **62** | 64 | 65 | 65 | 68 | 68 |
|
|
252
|
+
| [Middle East Eye](https://www.middleeasteye.net/) | 223 | **195** | **195** | 202 | 200 | 200 | 202 | 203 |
|
|
253
|
+
| [Nielsen Norman Group](https://www.nngroup.com/) | 84 | 71 | 72 | **53** | 71 | 73 | 74 | 73 |
|
|
252
254
|
| [SitePoint](https://www.sitepoint.com/) | 487 | **346** | **346** | 424 | 461 | 466 | 484 | n/a |
|
|
253
255
|
| [TetraLogical](https://tetralogical.com/) | 44 | 38 | 38 | **35** | 38 | 38 | 39 | 39 |
|
|
254
|
-
| [TPGi](https://www.tpgi.com/) |
|
|
256
|
+
| [TPGi](https://www.tpgi.com/) | 176 | **160** | 162 | **160** | 165 | 166 | 173 | 172 |
|
|
255
257
|
| [United Nations](https://www.un.org/en/) | 150 | **112** | 113 | 120 | 124 | 124 | 129 | 122 |
|
|
256
258
|
| [W3C](https://www.w3.org/) | 50 | **35** | 36 | 38 | 38 | 38 | 40 | 38 |
|
|
257
|
-
| **Average processing time** | |
|
|
259
|
+
| **Average processing time** | | 313 ms (24/24) | 367 ms (24/24) | 184 ms (24/24) | 62 ms (24/24) | **18 ms (24/24)** | 336 ms (24/24) | 1392 ms (18/24) |
|
|
258
260
|
|
|
259
|
-
(Last updated: Dec
|
|
261
|
+
(Last updated: Dec 14, 2025)
|
|
260
262
|
<!-- End auto-generated -->
|
|
261
263
|
|
|
262
264
|
## Examples
|
|
@@ -431,4 +433,4 @@ npm run benchmarks
|
|
|
431
433
|
|
|
432
434
|
## Acknowledgements
|
|
433
435
|
|
|
434
|
-
With many thanks to all the previous authors of HTML Minifier, especially [Juriy Zaytsev](https://github.com/kangax), and to everyone who helped make this new edition better, particularly [Daniel Ruf](https://github.com/DanielRuf) and [Jonas Geiler](https://github.com/jonasgeiler).
|
|
436
|
+
With many thanks to all the previous authors of HTML Minifier, especially [Juriy “kangax” Zaytsev](https://github.com/kangax), and to everyone who helped make this new edition better, particularly [Daniel Ruf](https://github.com/DanielRuf) and [Jonas Geiler](https://github.com/jonasgeiler).
|
package/dist/htmlminifier.cjs
CHANGED
|
@@ -5,18 +5,6 @@ Object.defineProperty(exports, '__esModule', { value: true });
|
|
|
5
5
|
var entities = require('entities');
|
|
6
6
|
var RelateURL = require('relateurl');
|
|
7
7
|
|
|
8
|
-
async function replaceAsync(str, regex, asyncFn) {
|
|
9
|
-
const promises = [];
|
|
10
|
-
|
|
11
|
-
str.replace(regex, (match, ...args) => {
|
|
12
|
-
const promise = asyncFn(match, ...args);
|
|
13
|
-
promises.push(promise);
|
|
14
|
-
});
|
|
15
|
-
|
|
16
|
-
const data = await Promise.all(promises);
|
|
17
|
-
return str.replace(regex, () => data.shift());
|
|
18
|
-
}
|
|
19
|
-
|
|
20
8
|
/*!
|
|
21
9
|
* HTML Parser By John Resig (ejohn.org)
|
|
22
10
|
* Modified by Juriy “kangax” Zaytsev
|
|
@@ -24,6 +12,15 @@ async function replaceAsync(str, regex, asyncFn) {
|
|
|
24
12
|
* http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
|
|
25
13
|
*/
|
|
26
14
|
|
|
15
|
+
/*
|
|
16
|
+
* // Use like so:
|
|
17
|
+
* HTMLParser(htmlString, {
|
|
18
|
+
* start: function(tag, attrs, unary) {},
|
|
19
|
+
* end: function(tag) {},
|
|
20
|
+
* chars: function(text) {},
|
|
21
|
+
* comment: function(text) {}
|
|
22
|
+
* });
|
|
23
|
+
*/
|
|
27
24
|
|
|
28
25
|
class CaseInsensitiveSet extends Set {
|
|
29
26
|
has(str) {
|
|
@@ -192,7 +189,7 @@ class HTMLParser {
|
|
|
192
189
|
const endTagMatch = html.match(endTag);
|
|
193
190
|
if (endTagMatch) {
|
|
194
191
|
html = html.substring(endTagMatch[0].length);
|
|
195
|
-
await
|
|
192
|
+
await parseEndTag(endTagMatch[0], endTagMatch[1]);
|
|
196
193
|
prevTag = '/' + endTagMatch[1].toLowerCase();
|
|
197
194
|
continue;
|
|
198
195
|
}
|
|
@@ -243,21 +240,29 @@ class HTMLParser {
|
|
|
243
240
|
// Use pre-compiled regex for common tags (`script`, `style`, `noscript`) to avoid regex creation overhead
|
|
244
241
|
const reStackedTag = preCompiledStackedTags[stackedTag] || reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)</' + stackedTag + '[^>]*>', 'i'));
|
|
245
242
|
|
|
246
|
-
|
|
243
|
+
const m = reStackedTag.exec(html);
|
|
244
|
+
if (m) {
|
|
245
|
+
let text = m[1];
|
|
247
246
|
if (stackedTag !== 'script' && stackedTag !== 'style' && stackedTag !== 'noscript') {
|
|
248
247
|
text = text
|
|
249
248
|
.replace(/<!--([\s\S]*?)-->/g, '$1')
|
|
250
249
|
.replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1');
|
|
251
250
|
}
|
|
252
|
-
|
|
253
251
|
if (handler.chars) {
|
|
254
252
|
await handler.chars(text);
|
|
255
253
|
}
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
254
|
+
// Advance HTML past the matched special tag content and its closing tag
|
|
255
|
+
html = html.slice(m.index + m[0].length);
|
|
256
|
+
await parseEndTag('</' + stackedTag + '>', stackedTag);
|
|
257
|
+
} else {
|
|
258
|
+
// No closing tag found; to avoid infinite loop, break similarly to previous behavior
|
|
259
|
+
if (handler.continueOnParseError && handler.chars && html) {
|
|
260
|
+
await handler.chars(html[0], prevTag, '');
|
|
261
|
+
html = html.substring(1);
|
|
262
|
+
} else {
|
|
263
|
+
break;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
261
266
|
}
|
|
262
267
|
|
|
263
268
|
if (html === last) {
|
|
@@ -644,6 +649,18 @@ class TokenChain {
|
|
|
644
649
|
}
|
|
645
650
|
}
|
|
646
651
|
|
|
652
|
+
async function replaceAsync(str, regex, asyncFn) {
|
|
653
|
+
const promises = [];
|
|
654
|
+
|
|
655
|
+
str.replace(regex, (match, ...args) => {
|
|
656
|
+
const promise = asyncFn(match, ...args);
|
|
657
|
+
promises.push(promise);
|
|
658
|
+
});
|
|
659
|
+
|
|
660
|
+
const data = await Promise.all(promises);
|
|
661
|
+
return str.replace(regex, () => data.shift());
|
|
662
|
+
}
|
|
663
|
+
|
|
647
664
|
/**
|
|
648
665
|
* Preset configurations for HTML Minifier Next
|
|
649
666
|
*
|
|
@@ -2294,7 +2311,9 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
|
|
|
2294
2311
|
currentType === 'text/html') {
|
|
2295
2312
|
await scan(text);
|
|
2296
2313
|
}
|
|
2297
|
-
}
|
|
2314
|
+
},
|
|
2315
|
+
// We never need `nextTag` information in this scan
|
|
2316
|
+
wantsNextTag: false
|
|
2298
2317
|
});
|
|
2299
2318
|
|
|
2300
2319
|
await parser.parse();
|
|
@@ -2363,6 +2382,7 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
2363
2382
|
const ignoredMarkupChunks = [];
|
|
2364
2383
|
const ignoredCustomMarkupChunks = [];
|
|
2365
2384
|
let uidIgnore;
|
|
2385
|
+
let uidIgnorePlaceholderPattern;
|
|
2366
2386
|
let uidAttr;
|
|
2367
2387
|
let uidPattern;
|
|
2368
2388
|
// Create inline tags/text sets with custom elements
|
|
@@ -2396,6 +2416,7 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
2396
2416
|
if (!uidIgnore) {
|
|
2397
2417
|
uidIgnore = uniqueId(value);
|
|
2398
2418
|
const pattern = new RegExp('^' + uidIgnore + '([0-9]+)$');
|
|
2419
|
+
uidIgnorePlaceholderPattern = new RegExp('^<!--' + uidIgnore + '(\\d+)-->$');
|
|
2399
2420
|
if (options.ignoreCustomComments) {
|
|
2400
2421
|
options.ignoreCustomComments = options.ignoreCustomComments.slice();
|
|
2401
2422
|
} else {
|
|
@@ -2528,6 +2549,8 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
2528
2549
|
customAttrAssign: options.customAttrAssign,
|
|
2529
2550
|
customAttrSurround: options.customAttrSurround,
|
|
2530
2551
|
html5: options.html5,
|
|
2552
|
+
// Compute `nextTag` only when whitespace collapse features require it
|
|
2553
|
+
wantsNextTag: !!(options.collapseWhitespace || options.collapseInlineTagWhitespace || options.conservativeCollapse),
|
|
2531
2554
|
|
|
2532
2555
|
start: async function (tag, attrs, unary, unarySlash, autoGenerated) {
|
|
2533
2556
|
if (tag.toLowerCase() === 'svg') {
|
|
@@ -2820,6 +2843,79 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
2820
2843
|
optionalStartTag = '';
|
|
2821
2844
|
optionalEndTag = '';
|
|
2822
2845
|
}
|
|
2846
|
+
|
|
2847
|
+
// Optimize whitespace collapsing between consecutive `htmlmin:ignore` placeholder comments
|
|
2848
|
+
if (options.collapseWhitespace && text && uidIgnorePlaceholderPattern) {
|
|
2849
|
+
if (uidIgnorePlaceholderPattern.test(text)) {
|
|
2850
|
+
// Check if previous buffer items are: [ignore-placeholder, whitespace-only text]
|
|
2851
|
+
if (buffer.length >= 2) {
|
|
2852
|
+
const prevText = buffer[buffer.length - 1];
|
|
2853
|
+
const prevComment = buffer[buffer.length - 2];
|
|
2854
|
+
|
|
2855
|
+
// Check if previous item is whitespace-only and item before that is ignore-placeholder
|
|
2856
|
+
if (prevText && /^\s+$/.test(prevText) &&
|
|
2857
|
+
prevComment && uidIgnorePlaceholderPattern.test(prevComment)) {
|
|
2858
|
+
// Extract the index from both placeholders to check their content
|
|
2859
|
+
const currentMatch = text.match(uidIgnorePlaceholderPattern);
|
|
2860
|
+
const prevMatch = prevComment.match(uidIgnorePlaceholderPattern);
|
|
2861
|
+
|
|
2862
|
+
if (currentMatch && prevMatch) {
|
|
2863
|
+
const currentIndex = +currentMatch[1];
|
|
2864
|
+
const prevIndex = +prevMatch[1];
|
|
2865
|
+
|
|
2866
|
+
// Defensive bounds check to ensure indices are valid
|
|
2867
|
+
if (currentIndex < ignoredMarkupChunks.length && prevIndex < ignoredMarkupChunks.length) {
|
|
2868
|
+
const currentContent = ignoredMarkupChunks[currentIndex];
|
|
2869
|
+
const prevContent = ignoredMarkupChunks[prevIndex];
|
|
2870
|
+
|
|
2871
|
+
// Only collapse whitespace if both blocks contain HTML (start with `<`)
|
|
2872
|
+
// Don’t collapse if either contains plain text, as that would change meaning
|
|
2873
|
+
// Note: This check will match HTML comments (`<!-- … -->`), but the tag-name
|
|
2874
|
+
// regex below requires starting with a letter, so comments are intentionally
|
|
2875
|
+
// excluded by the `currentTagMatch && prevTagMatch` guard
|
|
2876
|
+
if (currentContent && prevContent && /^\s*</.test(currentContent) && /^\s*</.test(prevContent)) {
|
|
2877
|
+
// Extract tag names from the HTML content (excludes comments, processing instructions, etc.)
|
|
2878
|
+
const currentTagMatch = currentContent.match(/^\s*<([a-zA-Z][\w:-]*)/);
|
|
2879
|
+
const prevTagMatch = prevContent.match(/^\s*<([a-zA-Z][\w:-]*)/);
|
|
2880
|
+
|
|
2881
|
+
// Only collapse if both matched valid element tags (not comments/text)
|
|
2882
|
+
// and both tags are block-level (inline elements need whitespace preserved)
|
|
2883
|
+
if (currentTagMatch && prevTagMatch) {
|
|
2884
|
+
const currentTag = options.name(currentTagMatch[1]);
|
|
2885
|
+
const prevTag = options.name(prevTagMatch[1]);
|
|
2886
|
+
|
|
2887
|
+
// Don’t collapse between inline elements
|
|
2888
|
+
if (!inlineElements.has(currentTag) && !inlineElements.has(prevTag)) {
|
|
2889
|
+
// Collapse whitespace respecting context rules
|
|
2890
|
+
let collapsedText = prevText;
|
|
2891
|
+
|
|
2892
|
+
// Apply `collapseWhitespace` with appropriate context
|
|
2893
|
+
if (!stackNoTrimWhitespace.length && !stackNoCollapseWhitespace.length) {
|
|
2894
|
+
// Not in pre or other no-collapse context
|
|
2895
|
+
if (options.preserveLineBreaks && /[\n\r]/.test(prevText)) {
|
|
2896
|
+
// Preserve line break as single newline
|
|
2897
|
+
collapsedText = '\n';
|
|
2898
|
+
} else if (options.conservativeCollapse) {
|
|
2899
|
+
// Conservative mode: keep single space
|
|
2900
|
+
collapsedText = ' ';
|
|
2901
|
+
} else {
|
|
2902
|
+
// Aggressive mode: remove all whitespace
|
|
2903
|
+
collapsedText = '';
|
|
2904
|
+
}
|
|
2905
|
+
}
|
|
2906
|
+
|
|
2907
|
+
// Replace the whitespace in buffer
|
|
2908
|
+
buffer[buffer.length - 1] = collapsedText;
|
|
2909
|
+
}
|
|
2910
|
+
}
|
|
2911
|
+
}
|
|
2912
|
+
}
|
|
2913
|
+
}
|
|
2914
|
+
}
|
|
2915
|
+
}
|
|
2916
|
+
}
|
|
2917
|
+
}
|
|
2918
|
+
|
|
2823
2919
|
buffer.push(text);
|
|
2824
2920
|
},
|
|
2825
2921
|
doctype: function (doctype) {
|
|
@@ -5147,18 +5147,6 @@ function requireLib () {
|
|
|
5147
5147
|
var libExports = requireLib();
|
|
5148
5148
|
var RelateURL = /*@__PURE__*/getDefaultExportFromCjs(libExports);
|
|
5149
5149
|
|
|
5150
|
-
async function replaceAsync(str, regex, asyncFn) {
|
|
5151
|
-
const promises = [];
|
|
5152
|
-
|
|
5153
|
-
str.replace(regex, (match, ...args) => {
|
|
5154
|
-
const promise = asyncFn(match, ...args);
|
|
5155
|
-
promises.push(promise);
|
|
5156
|
-
});
|
|
5157
|
-
|
|
5158
|
-
const data = await Promise.all(promises);
|
|
5159
|
-
return str.replace(regex, () => data.shift());
|
|
5160
|
-
}
|
|
5161
|
-
|
|
5162
5150
|
/*!
|
|
5163
5151
|
* HTML Parser By John Resig (ejohn.org)
|
|
5164
5152
|
* Modified by Juriy “kangax” Zaytsev
|
|
@@ -5166,6 +5154,15 @@ async function replaceAsync(str, regex, asyncFn) {
|
|
|
5166
5154
|
* http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
|
|
5167
5155
|
*/
|
|
5168
5156
|
|
|
5157
|
+
/*
|
|
5158
|
+
* // Use like so:
|
|
5159
|
+
* HTMLParser(htmlString, {
|
|
5160
|
+
* start: function(tag, attrs, unary) {},
|
|
5161
|
+
* end: function(tag) {},
|
|
5162
|
+
* chars: function(text) {},
|
|
5163
|
+
* comment: function(text) {}
|
|
5164
|
+
* });
|
|
5165
|
+
*/
|
|
5169
5166
|
|
|
5170
5167
|
class CaseInsensitiveSet extends Set {
|
|
5171
5168
|
has(str) {
|
|
@@ -5334,7 +5331,7 @@ class HTMLParser {
|
|
|
5334
5331
|
const endTagMatch = html.match(endTag);
|
|
5335
5332
|
if (endTagMatch) {
|
|
5336
5333
|
html = html.substring(endTagMatch[0].length);
|
|
5337
|
-
await
|
|
5334
|
+
await parseEndTag(endTagMatch[0], endTagMatch[1]);
|
|
5338
5335
|
prevTag = '/' + endTagMatch[1].toLowerCase();
|
|
5339
5336
|
continue;
|
|
5340
5337
|
}
|
|
@@ -5385,21 +5382,29 @@ class HTMLParser {
|
|
|
5385
5382
|
// Use pre-compiled regex for common tags (`script`, `style`, `noscript`) to avoid regex creation overhead
|
|
5386
5383
|
const reStackedTag = preCompiledStackedTags[stackedTag] || reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)</' + stackedTag + '[^>]*>', 'i'));
|
|
5387
5384
|
|
|
5388
|
-
|
|
5385
|
+
const m = reStackedTag.exec(html);
|
|
5386
|
+
if (m) {
|
|
5387
|
+
let text = m[1];
|
|
5389
5388
|
if (stackedTag !== 'script' && stackedTag !== 'style' && stackedTag !== 'noscript') {
|
|
5390
5389
|
text = text
|
|
5391
5390
|
.replace(/<!--([\s\S]*?)-->/g, '$1')
|
|
5392
5391
|
.replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1');
|
|
5393
5392
|
}
|
|
5394
|
-
|
|
5395
5393
|
if (handler.chars) {
|
|
5396
5394
|
await handler.chars(text);
|
|
5397
5395
|
}
|
|
5398
|
-
|
|
5399
|
-
|
|
5400
|
-
|
|
5401
|
-
|
|
5402
|
-
|
|
5396
|
+
// Advance HTML past the matched special tag content and its closing tag
|
|
5397
|
+
html = html.slice(m.index + m[0].length);
|
|
5398
|
+
await parseEndTag('</' + stackedTag + '>', stackedTag);
|
|
5399
|
+
} else {
|
|
5400
|
+
// No closing tag found; to avoid infinite loop, break similarly to previous behavior
|
|
5401
|
+
if (handler.continueOnParseError && handler.chars && html) {
|
|
5402
|
+
await handler.chars(html[0], prevTag, '');
|
|
5403
|
+
html = html.substring(1);
|
|
5404
|
+
} else {
|
|
5405
|
+
break;
|
|
5406
|
+
}
|
|
5407
|
+
}
|
|
5403
5408
|
}
|
|
5404
5409
|
|
|
5405
5410
|
if (html === last) {
|
|
@@ -5786,6 +5791,18 @@ class TokenChain {
|
|
|
5786
5791
|
}
|
|
5787
5792
|
}
|
|
5788
5793
|
|
|
5794
|
+
async function replaceAsync(str, regex, asyncFn) {
|
|
5795
|
+
const promises = [];
|
|
5796
|
+
|
|
5797
|
+
str.replace(regex, (match, ...args) => {
|
|
5798
|
+
const promise = asyncFn(match, ...args);
|
|
5799
|
+
promises.push(promise);
|
|
5800
|
+
});
|
|
5801
|
+
|
|
5802
|
+
const data = await Promise.all(promises);
|
|
5803
|
+
return str.replace(regex, () => data.shift());
|
|
5804
|
+
}
|
|
5805
|
+
|
|
5789
5806
|
/**
|
|
5790
5807
|
* Preset configurations for HTML Minifier Next
|
|
5791
5808
|
*
|
|
@@ -7436,7 +7453,9 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
|
|
|
7436
7453
|
currentType === 'text/html') {
|
|
7437
7454
|
await scan(text);
|
|
7438
7455
|
}
|
|
7439
|
-
}
|
|
7456
|
+
},
|
|
7457
|
+
// We never need `nextTag` information in this scan
|
|
7458
|
+
wantsNextTag: false
|
|
7440
7459
|
});
|
|
7441
7460
|
|
|
7442
7461
|
await parser.parse();
|
|
@@ -7505,6 +7524,7 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
7505
7524
|
const ignoredMarkupChunks = [];
|
|
7506
7525
|
const ignoredCustomMarkupChunks = [];
|
|
7507
7526
|
let uidIgnore;
|
|
7527
|
+
let uidIgnorePlaceholderPattern;
|
|
7508
7528
|
let uidAttr;
|
|
7509
7529
|
let uidPattern;
|
|
7510
7530
|
// Create inline tags/text sets with custom elements
|
|
@@ -7538,6 +7558,7 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
7538
7558
|
if (!uidIgnore) {
|
|
7539
7559
|
uidIgnore = uniqueId(value);
|
|
7540
7560
|
const pattern = new RegExp('^' + uidIgnore + '([0-9]+)$');
|
|
7561
|
+
uidIgnorePlaceholderPattern = new RegExp('^<!--' + uidIgnore + '(\\d+)-->$');
|
|
7541
7562
|
if (options.ignoreCustomComments) {
|
|
7542
7563
|
options.ignoreCustomComments = options.ignoreCustomComments.slice();
|
|
7543
7564
|
} else {
|
|
@@ -7670,6 +7691,8 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
7670
7691
|
customAttrAssign: options.customAttrAssign,
|
|
7671
7692
|
customAttrSurround: options.customAttrSurround,
|
|
7672
7693
|
html5: options.html5,
|
|
7694
|
+
// Compute `nextTag` only when whitespace collapse features require it
|
|
7695
|
+
wantsNextTag: !!(options.collapseWhitespace || options.collapseInlineTagWhitespace || options.conservativeCollapse),
|
|
7673
7696
|
|
|
7674
7697
|
start: async function (tag, attrs, unary, unarySlash, autoGenerated) {
|
|
7675
7698
|
if (tag.toLowerCase() === 'svg') {
|
|
@@ -7962,6 +7985,79 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
7962
7985
|
optionalStartTag = '';
|
|
7963
7986
|
optionalEndTag = '';
|
|
7964
7987
|
}
|
|
7988
|
+
|
|
7989
|
+
// Optimize whitespace collapsing between consecutive `htmlmin:ignore` placeholder comments
|
|
7990
|
+
if (options.collapseWhitespace && text && uidIgnorePlaceholderPattern) {
|
|
7991
|
+
if (uidIgnorePlaceholderPattern.test(text)) {
|
|
7992
|
+
// Check if previous buffer items are: [ignore-placeholder, whitespace-only text]
|
|
7993
|
+
if (buffer.length >= 2) {
|
|
7994
|
+
const prevText = buffer[buffer.length - 1];
|
|
7995
|
+
const prevComment = buffer[buffer.length - 2];
|
|
7996
|
+
|
|
7997
|
+
// Check if previous item is whitespace-only and item before that is ignore-placeholder
|
|
7998
|
+
if (prevText && /^\s+$/.test(prevText) &&
|
|
7999
|
+
prevComment && uidIgnorePlaceholderPattern.test(prevComment)) {
|
|
8000
|
+
// Extract the index from both placeholders to check their content
|
|
8001
|
+
const currentMatch = text.match(uidIgnorePlaceholderPattern);
|
|
8002
|
+
const prevMatch = prevComment.match(uidIgnorePlaceholderPattern);
|
|
8003
|
+
|
|
8004
|
+
if (currentMatch && prevMatch) {
|
|
8005
|
+
const currentIndex = +currentMatch[1];
|
|
8006
|
+
const prevIndex = +prevMatch[1];
|
|
8007
|
+
|
|
8008
|
+
// Defensive bounds check to ensure indices are valid
|
|
8009
|
+
if (currentIndex < ignoredMarkupChunks.length && prevIndex < ignoredMarkupChunks.length) {
|
|
8010
|
+
const currentContent = ignoredMarkupChunks[currentIndex];
|
|
8011
|
+
const prevContent = ignoredMarkupChunks[prevIndex];
|
|
8012
|
+
|
|
8013
|
+
// Only collapse whitespace if both blocks contain HTML (start with `<`)
|
|
8014
|
+
// Don’t collapse if either contains plain text, as that would change meaning
|
|
8015
|
+
// Note: This check will match HTML comments (`<!-- … -->`), but the tag-name
|
|
8016
|
+
// regex below requires starting with a letter, so comments are intentionally
|
|
8017
|
+
// excluded by the `currentTagMatch && prevTagMatch` guard
|
|
8018
|
+
if (currentContent && prevContent && /^\s*</.test(currentContent) && /^\s*</.test(prevContent)) {
|
|
8019
|
+
// Extract tag names from the HTML content (excludes comments, processing instructions, etc.)
|
|
8020
|
+
const currentTagMatch = currentContent.match(/^\s*<([a-zA-Z][\w:-]*)/);
|
|
8021
|
+
const prevTagMatch = prevContent.match(/^\s*<([a-zA-Z][\w:-]*)/);
|
|
8022
|
+
|
|
8023
|
+
// Only collapse if both matched valid element tags (not comments/text)
|
|
8024
|
+
// and both tags are block-level (inline elements need whitespace preserved)
|
|
8025
|
+
if (currentTagMatch && prevTagMatch) {
|
|
8026
|
+
const currentTag = options.name(currentTagMatch[1]);
|
|
8027
|
+
const prevTag = options.name(prevTagMatch[1]);
|
|
8028
|
+
|
|
8029
|
+
// Don’t collapse between inline elements
|
|
8030
|
+
if (!inlineElements.has(currentTag) && !inlineElements.has(prevTag)) {
|
|
8031
|
+
// Collapse whitespace respecting context rules
|
|
8032
|
+
let collapsedText = prevText;
|
|
8033
|
+
|
|
8034
|
+
// Apply `collapseWhitespace` with appropriate context
|
|
8035
|
+
if (!stackNoTrimWhitespace.length && !stackNoCollapseWhitespace.length) {
|
|
8036
|
+
// Not in pre or other no-collapse context
|
|
8037
|
+
if (options.preserveLineBreaks && /[\n\r]/.test(prevText)) {
|
|
8038
|
+
// Preserve line break as single newline
|
|
8039
|
+
collapsedText = '\n';
|
|
8040
|
+
} else if (options.conservativeCollapse) {
|
|
8041
|
+
// Conservative mode: keep single space
|
|
8042
|
+
collapsedText = ' ';
|
|
8043
|
+
} else {
|
|
8044
|
+
// Aggressive mode: remove all whitespace
|
|
8045
|
+
collapsedText = '';
|
|
8046
|
+
}
|
|
8047
|
+
}
|
|
8048
|
+
|
|
8049
|
+
// Replace the whitespace in buffer
|
|
8050
|
+
buffer[buffer.length - 1] = collapsedText;
|
|
8051
|
+
}
|
|
8052
|
+
}
|
|
8053
|
+
}
|
|
8054
|
+
}
|
|
8055
|
+
}
|
|
8056
|
+
}
|
|
8057
|
+
}
|
|
8058
|
+
}
|
|
8059
|
+
}
|
|
8060
|
+
|
|
7965
8061
|
buffer.push(text);
|
|
7966
8062
|
},
|
|
7967
8063
|
doctype: function (doctype) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"AAovEO,8BAJI,MAAM,YACN,eAAe,GACb,OAAO,CAAC,MAAM,CAAC,CAQ3B;;;;;;;;;;;;UA3tES,MAAM;YACN,MAAM;YACN,MAAM;mBACN,MAAM;iBACN,MAAM;kBACN,MAAM;;;;;;;;;;;;;4BAQN,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,EAAE,qBAAqB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;wBAMjG,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,EAAE,KAAK,EAAE,aAAa,EAAE,GAAG,SAAS,EAAE,iBAAiB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;;oBAMhH,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;;kCAOP,OAAO;;;;;;;;yBAOP,OAAO;;;;;;;;2BAOP,OAAO;;;;;;;;4BAOP,OAAO;;;;;;;2BAOP,OAAO;;;;;;;;uBAMP,MAAM,EAAE;;;;;;yBAOR,MAAM;;;;;;yBAKN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE;;;;;;;4BAKlB,MAAM,EAAE;;;;;;;oCAMR,MAAM;;;;;;;qBAMN,OAAO;;;;;;;YAMP,OAAO;;;;;;;;2BAMP,MAAM,EAAE;;;;;;;;;4BAOR,MAAM,EAAE;;;;;;;+BAQR,OAAO;;;;;;;2BAMP,SAAS,CAAC,MAAM,CAAC;;;;;;uBAMjB,OAAO;;;;;;;;UAKP,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI;;;;;;;;qBAO1B,MAAM;;;;;;;oBAON,MAAM;;;;;;;;;;gBAMN,OAAO,GAAG,OAAO,CAAC,OAAO,cAAc,EAAE,gBAAgB,CAAC,OAAO,cAAc,EAAE,aAAa,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;eAS9J,OAAO,GAAG,OAAO,QAAQ,EAAE,aAAa,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,OAAO,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;iBASzG,OAAO,GAAG,MAAM,GAAG,OAAO,WAAW,EAAE,OAAO,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;WAS7F,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM;;;;;;;+BAOxB,OAAO;;;;;;;;;;oBAMP,OAAO;;;;;;;;yBASP,OAAO;;;;;;;gCAOP,OAAO;;;;;;;;iCAMP,OAAO;;;;;;;;;;qBAOP,MAAM,EAAE;;;;;;;qBASR,IAAI,GAAG,GAAG;;;;;;;4BAMV,OAAO;;;;;;;;qBAMP,OAAO;;;;;;;;;4BAOP,OAAO,GAAG,CAAC,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC;;;;;;;;0BAQtD,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;gCAOP,MAAM,EAAE;;;;;;;;yBAyBR,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;iCAOP,OAAO;;;;;;;oCAMP,OAAO;;;;;;;;;;0BAMP,OAAO;;;;;;;;;qBASP,OAAO,GAAG,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,KAAK,IAAI,CAAC;;;;;;;;;oBAQzD,OAAO,GAAG,CAAC,CAAC,KAAK,EAAE,MAAM,KAAK,MAAM,CAAC;;;;;;;;0BAQrC,OAAO;;;;;;;sBAOP,OAAO;;wBAnYkC,cAAc;0BAAd,cAAc;+BAAd,cAAc"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"htmlparser.d.ts","sourceRoot":"","sources":["../../src/htmlparser.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"htmlparser.d.ts","sourceRoot":"","sources":["../../src/htmlparser.js"],"names":[],"mappings":"AA8CA,4BAAoE;AAoEpE;IACE,qCAGC;IAFC,UAAgB;IAChB,aAAsB;IAGxB,uBAscC;CACF"}
|
package/package.json
CHANGED
package/src/htmlminifier.js
CHANGED
|
@@ -1588,7 +1588,9 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
|
|
|
1588
1588
|
currentType === 'text/html') {
|
|
1589
1589
|
await scan(text);
|
|
1590
1590
|
}
|
|
1591
|
-
}
|
|
1591
|
+
},
|
|
1592
|
+
// We never need `nextTag` information in this scan
|
|
1593
|
+
wantsNextTag: false
|
|
1592
1594
|
});
|
|
1593
1595
|
|
|
1594
1596
|
await parser.parse();
|
|
@@ -1657,6 +1659,7 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
1657
1659
|
const ignoredMarkupChunks = [];
|
|
1658
1660
|
const ignoredCustomMarkupChunks = [];
|
|
1659
1661
|
let uidIgnore;
|
|
1662
|
+
let uidIgnorePlaceholderPattern;
|
|
1660
1663
|
let uidAttr;
|
|
1661
1664
|
let uidPattern;
|
|
1662
1665
|
// Create inline tags/text sets with custom elements
|
|
@@ -1690,6 +1693,7 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
1690
1693
|
if (!uidIgnore) {
|
|
1691
1694
|
uidIgnore = uniqueId(value);
|
|
1692
1695
|
const pattern = new RegExp('^' + uidIgnore + '([0-9]+)$');
|
|
1696
|
+
uidIgnorePlaceholderPattern = new RegExp('^<!--' + uidIgnore + '(\\d+)-->$');
|
|
1693
1697
|
if (options.ignoreCustomComments) {
|
|
1694
1698
|
options.ignoreCustomComments = options.ignoreCustomComments.slice();
|
|
1695
1699
|
} else {
|
|
@@ -1822,6 +1826,8 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
1822
1826
|
customAttrAssign: options.customAttrAssign,
|
|
1823
1827
|
customAttrSurround: options.customAttrSurround,
|
|
1824
1828
|
html5: options.html5,
|
|
1829
|
+
// Compute `nextTag` only when whitespace collapse features require it
|
|
1830
|
+
wantsNextTag: !!(options.collapseWhitespace || options.collapseInlineTagWhitespace || options.conservativeCollapse),
|
|
1825
1831
|
|
|
1826
1832
|
start: async function (tag, attrs, unary, unarySlash, autoGenerated) {
|
|
1827
1833
|
if (tag.toLowerCase() === 'svg') {
|
|
@@ -2114,6 +2120,79 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
2114
2120
|
optionalStartTag = '';
|
|
2115
2121
|
optionalEndTag = '';
|
|
2116
2122
|
}
|
|
2123
|
+
|
|
2124
|
+
// Optimize whitespace collapsing between consecutive `htmlmin:ignore` placeholder comments
|
|
2125
|
+
if (options.collapseWhitespace && text && uidIgnorePlaceholderPattern) {
|
|
2126
|
+
if (uidIgnorePlaceholderPattern.test(text)) {
|
|
2127
|
+
// Check if previous buffer items are: [ignore-placeholder, whitespace-only text]
|
|
2128
|
+
if (buffer.length >= 2) {
|
|
2129
|
+
const prevText = buffer[buffer.length - 1];
|
|
2130
|
+
const prevComment = buffer[buffer.length - 2];
|
|
2131
|
+
|
|
2132
|
+
// Check if previous item is whitespace-only and item before that is ignore-placeholder
|
|
2133
|
+
if (prevText && /^\s+$/.test(prevText) &&
|
|
2134
|
+
prevComment && uidIgnorePlaceholderPattern.test(prevComment)) {
|
|
2135
|
+
// Extract the index from both placeholders to check their content
|
|
2136
|
+
const currentMatch = text.match(uidIgnorePlaceholderPattern);
|
|
2137
|
+
const prevMatch = prevComment.match(uidIgnorePlaceholderPattern);
|
|
2138
|
+
|
|
2139
|
+
if (currentMatch && prevMatch) {
|
|
2140
|
+
const currentIndex = +currentMatch[1];
|
|
2141
|
+
const prevIndex = +prevMatch[1];
|
|
2142
|
+
|
|
2143
|
+
// Defensive bounds check to ensure indices are valid
|
|
2144
|
+
if (currentIndex < ignoredMarkupChunks.length && prevIndex < ignoredMarkupChunks.length) {
|
|
2145
|
+
const currentContent = ignoredMarkupChunks[currentIndex];
|
|
2146
|
+
const prevContent = ignoredMarkupChunks[prevIndex];
|
|
2147
|
+
|
|
2148
|
+
// Only collapse whitespace if both blocks contain HTML (start with `<`)
|
|
2149
|
+
// Don’t collapse if either contains plain text, as that would change meaning
|
|
2150
|
+
// Note: This check will match HTML comments (`<!-- … -->`), but the tag-name
|
|
2151
|
+
// regex below requires starting with a letter, so comments are intentionally
|
|
2152
|
+
// excluded by the `currentTagMatch && prevTagMatch` guard
|
|
2153
|
+
if (currentContent && prevContent && /^\s*</.test(currentContent) && /^\s*</.test(prevContent)) {
|
|
2154
|
+
// Extract tag names from the HTML content (excludes comments, processing instructions, etc.)
|
|
2155
|
+
const currentTagMatch = currentContent.match(/^\s*<([a-zA-Z][\w:-]*)/);
|
|
2156
|
+
const prevTagMatch = prevContent.match(/^\s*<([a-zA-Z][\w:-]*)/);
|
|
2157
|
+
|
|
2158
|
+
// Only collapse if both matched valid element tags (not comments/text)
|
|
2159
|
+
// and both tags are block-level (inline elements need whitespace preserved)
|
|
2160
|
+
if (currentTagMatch && prevTagMatch) {
|
|
2161
|
+
const currentTag = options.name(currentTagMatch[1]);
|
|
2162
|
+
const prevTag = options.name(prevTagMatch[1]);
|
|
2163
|
+
|
|
2164
|
+
// Don’t collapse between inline elements
|
|
2165
|
+
if (!inlineElements.has(currentTag) && !inlineElements.has(prevTag)) {
|
|
2166
|
+
// Collapse whitespace respecting context rules
|
|
2167
|
+
let collapsedText = prevText;
|
|
2168
|
+
|
|
2169
|
+
// Apply `collapseWhitespace` with appropriate context
|
|
2170
|
+
if (!stackNoTrimWhitespace.length && !stackNoCollapseWhitespace.length) {
|
|
2171
|
+
// Not in pre or other no-collapse context
|
|
2172
|
+
if (options.preserveLineBreaks && /[\n\r]/.test(prevText)) {
|
|
2173
|
+
// Preserve line break as single newline
|
|
2174
|
+
collapsedText = '\n';
|
|
2175
|
+
} else if (options.conservativeCollapse) {
|
|
2176
|
+
// Conservative mode: keep single space
|
|
2177
|
+
collapsedText = ' ';
|
|
2178
|
+
} else {
|
|
2179
|
+
// Aggressive mode: remove all whitespace
|
|
2180
|
+
collapsedText = '';
|
|
2181
|
+
}
|
|
2182
|
+
}
|
|
2183
|
+
|
|
2184
|
+
// Replace the whitespace in buffer
|
|
2185
|
+
buffer[buffer.length - 1] = collapsedText;
|
|
2186
|
+
}
|
|
2187
|
+
}
|
|
2188
|
+
}
|
|
2189
|
+
}
|
|
2190
|
+
}
|
|
2191
|
+
}
|
|
2192
|
+
}
|
|
2193
|
+
}
|
|
2194
|
+
}
|
|
2195
|
+
|
|
2117
2196
|
buffer.push(text);
|
|
2118
2197
|
},
|
|
2119
2198
|
doctype: function (doctype) {
|
package/src/htmlparser.js
CHANGED
|
@@ -15,8 +15,6 @@
|
|
|
15
15
|
* });
|
|
16
16
|
*/
|
|
17
17
|
|
|
18
|
-
import { replaceAsync } from './utils.js';
|
|
19
|
-
|
|
20
18
|
class CaseInsensitiveSet extends Set {
|
|
21
19
|
has(str) {
|
|
22
20
|
return super.has(str.toLowerCase());
|
|
@@ -184,7 +182,7 @@ export class HTMLParser {
|
|
|
184
182
|
const endTagMatch = html.match(endTag);
|
|
185
183
|
if (endTagMatch) {
|
|
186
184
|
html = html.substring(endTagMatch[0].length);
|
|
187
|
-
await
|
|
185
|
+
await parseEndTag(endTagMatch[0], endTagMatch[1]);
|
|
188
186
|
prevTag = '/' + endTagMatch[1].toLowerCase();
|
|
189
187
|
continue;
|
|
190
188
|
}
|
|
@@ -235,21 +233,29 @@ export class HTMLParser {
|
|
|
235
233
|
// Use pre-compiled regex for common tags (`script`, `style`, `noscript`) to avoid regex creation overhead
|
|
236
234
|
const reStackedTag = preCompiledStackedTags[stackedTag] || reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)</' + stackedTag + '[^>]*>', 'i'));
|
|
237
235
|
|
|
238
|
-
|
|
236
|
+
const m = reStackedTag.exec(html);
|
|
237
|
+
if (m) {
|
|
238
|
+
let text = m[1];
|
|
239
239
|
if (stackedTag !== 'script' && stackedTag !== 'style' && stackedTag !== 'noscript') {
|
|
240
240
|
text = text
|
|
241
241
|
.replace(/<!--([\s\S]*?)-->/g, '$1')
|
|
242
242
|
.replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1');
|
|
243
243
|
}
|
|
244
|
-
|
|
245
244
|
if (handler.chars) {
|
|
246
245
|
await handler.chars(text);
|
|
247
246
|
}
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
247
|
+
// Advance HTML past the matched special tag content and its closing tag
|
|
248
|
+
html = html.slice(m.index + m[0].length);
|
|
249
|
+
await parseEndTag('</' + stackedTag + '>', stackedTag);
|
|
250
|
+
} else {
|
|
251
|
+
// No closing tag found; to avoid infinite loop, break similarly to previous behavior
|
|
252
|
+
if (handler.continueOnParseError && handler.chars && html) {
|
|
253
|
+
await handler.chars(html[0], prevTag, '');
|
|
254
|
+
html = html.substring(1);
|
|
255
|
+
} else {
|
|
256
|
+
break;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
253
259
|
}
|
|
254
260
|
|
|
255
261
|
if (html === last) {
|