html-minifier-next 6.2.3 → 6.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.js +1 -1
- package/dist/htmlminifier.cjs +62 -30
- package/dist/types/htmlminifier.d.ts.map +1 -1
- package/dist/types/lib/attributes.d.ts +1 -1
- package/package.json +2 -2
- package/src/htmlminifier.js +52 -20
- package/src/htmlparser.js +6 -6
- package/src/lib/attributes.js +4 -4
package/cli.js
CHANGED
|
@@ -300,7 +300,7 @@ program.helpOption('-h, --help', 'Display help for command');
|
|
|
300
300
|
await program.arguments('[files...]').action(function (files) {
|
|
301
301
|
capturedFiles = files;
|
|
302
302
|
filesProvided = files.length > 0;
|
|
303
|
-
// Defer reading files until after
|
|
303
|
+
// Defer reading files until after check for consumed filenames
|
|
304
304
|
}).parseAsync(process.argv);
|
|
305
305
|
|
|
306
306
|
const programOptions = program.opts();
|
package/dist/htmlminifier.cjs
CHANGED
|
@@ -332,12 +332,12 @@ class HTMLParser {
|
|
|
332
332
|
while (pos < fullLength) {
|
|
333
333
|
lastPos = pos;
|
|
334
334
|
|
|
335
|
-
// Make sure
|
|
335
|
+
// Make sure not to be in a `script` or `style` element
|
|
336
336
|
if (!lastTag || !special.has(lastTag)) {
|
|
337
337
|
const textEnd = fullHtml.indexOf('<', pos);
|
|
338
338
|
|
|
339
339
|
if (textEnd === pos) {
|
|
340
|
-
//
|
|
340
|
+
// Tag found at current position
|
|
341
341
|
|
|
342
342
|
// Check cache from previous lookahead (avoids re-parsing the same tag)
|
|
343
343
|
if (cachedNextStartTag && cachedNextStartTag.pos === pos) {
|
|
@@ -571,7 +571,7 @@ class HTMLParser {
|
|
|
571
571
|
break;
|
|
572
572
|
}
|
|
573
573
|
|
|
574
|
-
// Limit the input length
|
|
574
|
+
// Limit the input length to pass to the regex to prevent catastrophic backtracking
|
|
575
575
|
const remainingLen = fullLength - currentPos;
|
|
576
576
|
const isLimited = remainingLen > MAX_ATTR_PARSE_LENGTH;
|
|
577
577
|
|
|
@@ -586,7 +586,7 @@ class HTMLParser {
|
|
|
586
586
|
const searchStr = fullHtml.substring(currentPos, extractEndPos);
|
|
587
587
|
attr = searchStr.match(attribute);
|
|
588
588
|
|
|
589
|
-
// If
|
|
589
|
+
// If input was limited and there’s a match, check if the value might be truncated
|
|
590
590
|
if (attr) {
|
|
591
591
|
// Check if the attribute value extends beyond our search window
|
|
592
592
|
const attrEnd = attr[0].length;
|
|
@@ -644,7 +644,7 @@ class HTMLParser {
|
|
|
644
644
|
}
|
|
645
645
|
|
|
646
646
|
if (!attr) {
|
|
647
|
-
// If
|
|
647
|
+
// If input was limited and there’s no match, try manual extraction
|
|
648
648
|
// This handles cases where quoted attributes exceed `MAX_ATTR_PARSE_LENGTH`
|
|
649
649
|
const manualMatch = searchStr.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
|
|
650
650
|
if (manualMatch) {
|
|
@@ -710,7 +710,7 @@ class HTMLParser {
|
|
|
710
710
|
if (currentTag === needle) {
|
|
711
711
|
return pos;
|
|
712
712
|
}
|
|
713
|
-
// Stop searching if
|
|
713
|
+
// Stop searching if hitting a table boundary
|
|
714
714
|
if (currentTag === 'table') {
|
|
715
715
|
break;
|
|
716
716
|
}
|
|
@@ -2154,7 +2154,7 @@ function attributesInclude(attributes, attribute) {
|
|
|
2154
2154
|
/**
|
|
2155
2155
|
* Remove duplicate attributes from an attribute list.
|
|
2156
2156
|
* Per HTML spec, when an attribute appears multiple times, the first occurrence wins.
|
|
2157
|
-
* Duplicate attributes result in invalid HTML, so
|
|
2157
|
+
* Duplicate attributes result in invalid HTML, so only the first is kept.
|
|
2158
2158
|
* @param {Array} attrs - Array of attribute objects with `name` property
|
|
2159
2159
|
* @param {boolean} caseSensitive - Whether to compare names case-sensitively (for XML/SVG)
|
|
2160
2160
|
* @returns {Array} Deduplicated attribute array (modifies in place and returns)
|
|
@@ -2199,7 +2199,7 @@ function isAttributeRedundant(tag, attrName, attrValue, attrs) {
|
|
|
2199
2199
|
return false;
|
|
2200
2200
|
}
|
|
2201
2201
|
|
|
2202
|
-
//
|
|
2202
|
+
// Value needs to be checked, so normalize it
|
|
2203
2203
|
attrValue = attrValue ? trimWhitespace(attrValue.toLowerCase()) : '';
|
|
2204
2204
|
|
|
2205
2205
|
// Legacy attribute checks
|
|
@@ -2657,7 +2657,7 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
|
|
|
2657
2657
|
let attrFragment;
|
|
2658
2658
|
let emittedAttrValue;
|
|
2659
2659
|
|
|
2660
|
-
// Determine if
|
|
2660
|
+
// Determine if need to add/keep quotes
|
|
2661
2661
|
const shouldAddQuotes = typeof attrValue !== 'undefined' && (
|
|
2662
2662
|
// If `removeAttributeQuotes` is enabled, add quotes only if they can’t be removed
|
|
2663
2663
|
(options.removeAttributeQuotes && (attrValue.indexOf(uidAttr) !== -1 || !canRemoveAttributeQuotes(attrValue))) ||
|
|
@@ -2672,7 +2672,7 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
|
|
|
2672
2672
|
// Determine the appropriate quote character
|
|
2673
2673
|
if (!options.preventAttributesEscaping) {
|
|
2674
2674
|
// Normal mode: Choose optimal quote type to minimize escaping
|
|
2675
|
-
// unless
|
|
2675
|
+
// unless preserving original quotes and they don’t need escaping
|
|
2676
2676
|
const needsEscaping = (attrQuote === '"' && attrValue.indexOf('"') !== -1) || (attrQuote === "'" && attrValue.indexOf("'") !== -1);
|
|
2677
2677
|
|
|
2678
2678
|
if (options.removeAttributeQuotes || typeof options.quoteCharacter !== 'undefined' || needsEscaping || attrQuote === '') {
|
|
@@ -3064,6 +3064,11 @@ const DEFAULT_JS_TYPES = new Set(['', 'text/javascript', 'application/javascript
|
|
|
3064
3064
|
const RE_START_TAG = /^<[^/!]/;
|
|
3065
3065
|
const RE_END_TAG = /^<\//;
|
|
3066
3066
|
|
|
3067
|
+
// Pre-compiled patterns for `htmlmin:ignore` block content analysis
|
|
3068
|
+
const RE_HTML_COMMENT_START = /^\s*<!--/;
|
|
3069
|
+
const RE_CLOSING_TAG_START = /^\s*<\/([a-zA-Z][\w:-]*)/;
|
|
3070
|
+
const RE_LAST_HTML_TAG = /[\s\S]*<(\/?[a-zA-Z][\w:-]*)/;
|
|
3071
|
+
|
|
3067
3072
|
// HTML encoding types for annotation-xml (MathML)
|
|
3068
3073
|
const RE_HTML_ENCODING = /^(text\/html|application\/xhtml\+xml)$/i;
|
|
3069
3074
|
|
|
@@ -3702,7 +3707,7 @@ async function createSortFns(value, options, uidIgnore, uidAttr, ignoredMarkupCh
|
|
|
3702
3707
|
await scan(text);
|
|
3703
3708
|
}
|
|
3704
3709
|
},
|
|
3705
|
-
//
|
|
3710
|
+
// No need for `nextTag` information in this scan
|
|
3706
3711
|
wantsNextTag: false,
|
|
3707
3712
|
// Continue on parse errors during analysis pass
|
|
3708
3713
|
continueOnParseError: options.continueOnParseError
|
|
@@ -3711,7 +3716,7 @@ async function createSortFns(value, options, uidIgnore, uidAttr, ignoredMarkupCh
|
|
|
3711
3716
|
try {
|
|
3712
3717
|
await parser.parse();
|
|
3713
3718
|
} catch (err) {
|
|
3714
|
-
// If parsing fails during analysis pass,
|
|
3719
|
+
// If parsing fails during analysis pass, skip it—there’s partial frequency data from what can be parsed
|
|
3715
3720
|
if (!options.continueOnParseError) {
|
|
3716
3721
|
throw err;
|
|
3717
3722
|
}
|
|
@@ -3766,9 +3771,9 @@ async function createSortFns(value, options, uidIgnore, uidAttr, ignoredMarkupCh
|
|
|
3766
3771
|
// First pass minification applies attribute transformations like `removeStyleLinkTypeAttributes` for accurate frequency analysis
|
|
3767
3772
|
const firstPassOutput = await minifyHTML(expandedValue, firstPassOptions);
|
|
3768
3773
|
|
|
3769
|
-
// For frequency analysis,
|
|
3770
|
-
// because HTML comments in opening tags prevent proper attribute parsing
|
|
3771
|
-
//
|
|
3774
|
+
// For frequency analysis, remove custom fragments temporarily
|
|
3775
|
+
// because HTML comments in opening tags prevent proper attribute parsing;
|
|
3776
|
+
// removed with a space to preserve attribute boundaries
|
|
3772
3777
|
let scanValue = firstPassOutput;
|
|
3773
3778
|
if (customFragmentPattern) {
|
|
3774
3779
|
scanValue = firstPassOutput.replace(customFragmentPattern, ' ');
|
|
@@ -3910,8 +3915,8 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
3910
3915
|
removeEmptyElementsExcept = parseRemoveEmptyElementsExcept(options.removeEmptyElementsExcept, options) || [];
|
|
3911
3916
|
}
|
|
3912
3917
|
|
|
3913
|
-
// Temporarily replace ignored chunks with comments, so that
|
|
3914
|
-
//
|
|
3918
|
+
// Temporarily replace ignored chunks with comments, so that there’s no need to worry what’s there;
|
|
3919
|
+
// there might be completely-horribly-broken-alien-non-html-emoji-cthulhu-filled content
|
|
3915
3920
|
if (value.indexOf('<!-- htmlmin:ignore -->') !== -1) {
|
|
3916
3921
|
// Use `indexOf`-based O(n) loop instead of a global regex with [\s\S]*? to avoid O(n²)
|
|
3917
3922
|
// backtracking on adversarial HTML with many `<!--` prefixes but no closing marker
|
|
@@ -4344,11 +4349,16 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
4344
4349
|
}
|
|
4345
4350
|
if (options.collapseWhitespace) {
|
|
4346
4351
|
if (!stackNoTrimWhitespace.length) {
|
|
4352
|
+
// When the prev item is a UID placeholder, compute its effective tag name for whitespace decisions;
|
|
4353
|
+
// this is only used in `collapseWhitespaceSmart`—`prevTag` itself is not modified,
|
|
4354
|
+
// to avoid side effects on the `inlineTextSet` branch below
|
|
4355
|
+
let effectivePrevTag = prevTag;
|
|
4347
4356
|
if (prevTag === 'comment') {
|
|
4348
4357
|
const prevComment = buffer[buffer.length - 1];
|
|
4349
4358
|
if (!uidIgnore || prevComment.indexOf(uidIgnore) === -1) {
|
|
4350
4359
|
if (!prevComment) {
|
|
4351
4360
|
prevTag = charsPrevTag;
|
|
4361
|
+
effectivePrevTag = prevTag;
|
|
4352
4362
|
}
|
|
4353
4363
|
if (buffer.length > 1 && (!prevComment || (!options.conservativeCollapse && / $/.test(currentChars)))) {
|
|
4354
4364
|
const charsIndex = buffer.length - 2;
|
|
@@ -4357,6 +4367,23 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
4357
4367
|
return '';
|
|
4358
4368
|
});
|
|
4359
4369
|
}
|
|
4370
|
+
} else if (uidIgnorePlaceholderPattern && nextTag !== 'comment') {
|
|
4371
|
+
// UID placeholder followed by a real element—derive the effective `prevTag` from the
|
|
4372
|
+
// placeholder’s last HTML tag so `collapseWhitespaceSmart` can make the right call;
|
|
4373
|
+
// when `nextTag` is `comment` (another UID placeholder), `commentFinalize` handles it
|
|
4374
|
+
const match = prevComment.match(uidIgnorePlaceholderPattern);
|
|
4375
|
+
if (match) {
|
|
4376
|
+
const idx = +match[1];
|
|
4377
|
+
if (idx < ignoredMarkupChunks.length) {
|
|
4378
|
+
const content = ignoredMarkupChunks[idx];
|
|
4379
|
+
const lastTagMatch = content && RE_LAST_HTML_TAG.exec(content);
|
|
4380
|
+
if (lastTagMatch) {
|
|
4381
|
+
const isClose = lastTagMatch[1].charAt(0) === '/';
|
|
4382
|
+
const tagName = options.name(isClose ? lastTagMatch[1].slice(1) : lastTagMatch[1]);
|
|
4383
|
+
effectivePrevTag = isClose ? '/' + tagName : tagName;
|
|
4384
|
+
}
|
|
4385
|
+
}
|
|
4386
|
+
}
|
|
4360
4387
|
}
|
|
4361
4388
|
}
|
|
4362
4389
|
if (prevTag) {
|
|
@@ -4373,7 +4400,7 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
4373
4400
|
}
|
|
4374
4401
|
}
|
|
4375
4402
|
if (prevTag || nextTag) {
|
|
4376
|
-
text = collapseWhitespaceSmart(text,
|
|
4403
|
+
text = collapseWhitespaceSmart(text, effectivePrevTag, nextTag, prevAttrs, nextAttrs, options, inlineElements, inlineTextSet);
|
|
4377
4404
|
} else {
|
|
4378
4405
|
text = collapseWhitespace(text, options, true, true);
|
|
4379
4406
|
}
|
|
@@ -4496,21 +4523,26 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
4496
4523
|
|
|
4497
4524
|
// Only collapse whitespace if both blocks contain HTML (start with `<`)
|
|
4498
4525
|
// Don’t collapse if either contains plain text, as that would change meaning
|
|
4499
|
-
// Note: This check will match HTML comments (`<!-- … -->`), but the tag name
|
|
4500
|
-
// regex below requires starting with a letter, so comments are intentionally
|
|
4501
|
-
// excluded by the `currentTagMatch && prevTagMatch` guard
|
|
4502
4526
|
if (currentContent && prevContent && /^\s*</.test(currentContent) && /^\s*</.test(prevContent)) {
|
|
4503
|
-
// Extract tag names from the HTML content
|
|
4527
|
+
// Extract tag names from the HTML content
|
|
4504
4528
|
const currentTagMatch = currentContent.match(/^\s*<([a-zA-Z][\w:-]*)/);
|
|
4505
4529
|
const prevTagMatch = prevContent.match(/^\s*<([a-zA-Z][\w:-]*)/);
|
|
4506
|
-
|
|
4507
|
-
|
|
4508
|
-
|
|
4509
|
-
|
|
4510
|
-
|
|
4511
|
-
|
|
4512
|
-
|
|
4513
|
-
|
|
4530
|
+
// HTML comments are invisible (no block/inline nature), treat as non-inline
|
|
4531
|
+
const prevIsHtmlComment = !prevTagMatch && RE_HTML_COMMENT_START.test(prevContent);
|
|
4532
|
+
const currentIsHtmlComment = !currentTagMatch && RE_HTML_COMMENT_START.test(currentContent);
|
|
4533
|
+
// Closing tags (e.g., `</div>`)—inline-ness determines whether to collapse
|
|
4534
|
+
const prevClosingTagMatch = !prevTagMatch && RE_CLOSING_TAG_START.exec(prevContent);
|
|
4535
|
+
const currentClosingTagMatch = !currentTagMatch && RE_CLOSING_TAG_START.exec(currentContent);
|
|
4536
|
+
|
|
4537
|
+
// Collapse if both sides are element/closing tags or HTML comments, and neither is inline
|
|
4538
|
+
if ((currentTagMatch || currentIsHtmlComment || currentClosingTagMatch) &&
|
|
4539
|
+
(prevTagMatch || prevIsHtmlComment || prevClosingTagMatch)) {
|
|
4540
|
+
const currentTag = currentTagMatch ? options.name(currentTagMatch[1])
|
|
4541
|
+
: currentClosingTagMatch ? options.name(currentClosingTagMatch[1]) : null;
|
|
4542
|
+
const prevTag = prevTagMatch ? options.name(prevTagMatch[1])
|
|
4543
|
+
: prevClosingTagMatch ? options.name(prevClosingTagMatch[1]) : null;
|
|
4544
|
+
|
|
4545
|
+
// Don’t collapse between inline elements (HTML comments count as non-inline)
|
|
4514
4546
|
if (!inlineElements.has(currentTag) && !inlineElements.has(prevTag)) {
|
|
4515
4547
|
// Collapse whitespace respecting context rules
|
|
4516
4548
|
let collapsedText = prevText;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"AA2wDO,8BAJI,MAAM,YACN,eAAe,GACb,OAAO,CAAC,MAAM,CAAC,CAwB3B;;;;;;;;;;;;UA3/CS,MAAM;;;;;;;;;;;;;;;;;;mCAaA,MAAM,SAAS,aAAa,EAAE,yBAAyB,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;+BAM3F,MAAM,GAAG,IAAI,SAAS,aAAa,EAAE,GAAG,SAAS,qBAAqB,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qBA6JtG,OAAO,KAAK,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;2HA2BiF,MAAM,SAAS,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;;;;;;iBASxG,QAAQ,GAAG,KAAK;gBAAgC,MAAM,WAAW,OAAO,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;eAa/H,MAAM;gBAAY,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;;;;;;;mBAiBzE,MAAM,KAAK,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kDA+DF,MAAM,OAAO,MAAM,KAAK,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;sCA2EpC,MAAM,SAAS,aAAa,EAAE,KAAK,IAAI;;;;;;;;;wCAQrC,MAAM,KAAK,MAAM;;;;;;;;;;;;;;;;;wBAtqBK,cAAc;0BAAd,cAAc;+BAAd,cAAc"}
|
|
@@ -24,7 +24,7 @@ export function buildAttr(normalized: any, hasUnarySlash: any, options: any, isL
|
|
|
24
24
|
/**
|
|
25
25
|
* Remove duplicate attributes from an attribute list.
|
|
26
26
|
* Per HTML spec, when an attribute appears multiple times, the first occurrence wins.
|
|
27
|
-
* Duplicate attributes result in invalid HTML, so
|
|
27
|
+
* Duplicate attributes result in invalid HTML, so only the first is kept.
|
|
28
28
|
* @param {Array} attrs - Array of attribute objects with `name` property
|
|
29
29
|
* @param {boolean} caseSensitive - Whether to compare names case-sensitively (for XML/SVG)
|
|
30
30
|
* @returns {Array} Deduplicated attribute array (modifies in place and returns)
|
package/package.json
CHANGED
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
"@rollup/plugin-commonjs": "^29.0.2",
|
|
19
19
|
"@rollup/plugin-json": "^6.1.0",
|
|
20
20
|
"@rollup/plugin-node-resolve": "^16.0.3",
|
|
21
|
-
"@swc/core": "^1.15.
|
|
21
|
+
"@swc/core": "^1.15.33",
|
|
22
22
|
"eslint": "^10.3.0",
|
|
23
23
|
"rollup": "^4.60.2",
|
|
24
24
|
"rollup-plugin-polyfill-node": "^0.13.0",
|
|
@@ -96,5 +96,5 @@
|
|
|
96
96
|
},
|
|
97
97
|
"type": "module",
|
|
98
98
|
"types": "./dist/types/htmlminifier.d.ts",
|
|
99
|
-
"version": "6.2.
|
|
99
|
+
"version": "6.2.4"
|
|
100
100
|
}
|
package/src/htmlminifier.js
CHANGED
|
@@ -121,6 +121,11 @@ const DEFAULT_JS_TYPES = new Set(['', 'text/javascript', 'application/javascript
|
|
|
121
121
|
const RE_START_TAG = /^<[^/!]/;
|
|
122
122
|
const RE_END_TAG = /^<\//;
|
|
123
123
|
|
|
124
|
+
// Pre-compiled patterns for `htmlmin:ignore` block content analysis
|
|
125
|
+
const RE_HTML_COMMENT_START = /^\s*<!--/;
|
|
126
|
+
const RE_CLOSING_TAG_START = /^\s*<\/([a-zA-Z][\w:-]*)/;
|
|
127
|
+
const RE_LAST_HTML_TAG = /[\s\S]*<(\/?[a-zA-Z][\w:-]*)/;
|
|
128
|
+
|
|
124
129
|
// HTML encoding types for annotation-xml (MathML)
|
|
125
130
|
const RE_HTML_ENCODING = /^(text\/html|application\/xhtml\+xml)$/i;
|
|
126
131
|
|
|
@@ -759,7 +764,7 @@ async function createSortFns(value, options, uidIgnore, uidAttr, ignoredMarkupCh
|
|
|
759
764
|
await scan(text);
|
|
760
765
|
}
|
|
761
766
|
},
|
|
762
|
-
//
|
|
767
|
+
// No need for `nextTag` information in this scan
|
|
763
768
|
wantsNextTag: false,
|
|
764
769
|
// Continue on parse errors during analysis pass
|
|
765
770
|
continueOnParseError: options.continueOnParseError
|
|
@@ -768,7 +773,7 @@ async function createSortFns(value, options, uidIgnore, uidAttr, ignoredMarkupCh
|
|
|
768
773
|
try {
|
|
769
774
|
await parser.parse();
|
|
770
775
|
} catch (err) {
|
|
771
|
-
// If parsing fails during analysis pass,
|
|
776
|
+
// If parsing fails during analysis pass, skip it—there’s partial frequency data from what can be parsed
|
|
772
777
|
if (!options.continueOnParseError) {
|
|
773
778
|
throw err;
|
|
774
779
|
}
|
|
@@ -823,9 +828,9 @@ async function createSortFns(value, options, uidIgnore, uidAttr, ignoredMarkupCh
|
|
|
823
828
|
// First pass minification applies attribute transformations like `removeStyleLinkTypeAttributes` for accurate frequency analysis
|
|
824
829
|
const firstPassOutput = await minifyHTML(expandedValue, firstPassOptions);
|
|
825
830
|
|
|
826
|
-
// For frequency analysis,
|
|
827
|
-
// because HTML comments in opening tags prevent proper attribute parsing
|
|
828
|
-
//
|
|
831
|
+
// For frequency analysis, remove custom fragments temporarily
|
|
832
|
+
// because HTML comments in opening tags prevent proper attribute parsing;
|
|
833
|
+
// removed with a space to preserve attribute boundaries
|
|
829
834
|
let scanValue = firstPassOutput;
|
|
830
835
|
if (customFragmentPattern) {
|
|
831
836
|
scanValue = firstPassOutput.replace(customFragmentPattern, ' ');
|
|
@@ -967,8 +972,8 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
967
972
|
removeEmptyElementsExcept = parseRemoveEmptyElementsExcept(options.removeEmptyElementsExcept, options) || [];
|
|
968
973
|
}
|
|
969
974
|
|
|
970
|
-
// Temporarily replace ignored chunks with comments, so that
|
|
971
|
-
//
|
|
975
|
+
// Temporarily replace ignored chunks with comments, so that there’s no need to worry what’s there;
|
|
976
|
+
// there might be completely-horribly-broken-alien-non-html-emoji-cthulhu-filled content
|
|
972
977
|
if (value.indexOf('<!-- htmlmin:ignore -->') !== -1) {
|
|
973
978
|
// Use `indexOf`-based O(n) loop instead of a global regex with [\s\S]*? to avoid O(n²)
|
|
974
979
|
// backtracking on adversarial HTML with many `<!--` prefixes but no closing marker
|
|
@@ -1401,11 +1406,16 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
1401
1406
|
}
|
|
1402
1407
|
if (options.collapseWhitespace) {
|
|
1403
1408
|
if (!stackNoTrimWhitespace.length) {
|
|
1409
|
+
// When the prev item is a UID placeholder, compute its effective tag name for whitespace decisions;
|
|
1410
|
+
// this is only used in `collapseWhitespaceSmart`—`prevTag` itself is not modified,
|
|
1411
|
+
// to avoid side effects on the `inlineTextSet` branch below
|
|
1412
|
+
let effectivePrevTag = prevTag;
|
|
1404
1413
|
if (prevTag === 'comment') {
|
|
1405
1414
|
const prevComment = buffer[buffer.length - 1];
|
|
1406
1415
|
if (!uidIgnore || prevComment.indexOf(uidIgnore) === -1) {
|
|
1407
1416
|
if (!prevComment) {
|
|
1408
1417
|
prevTag = charsPrevTag;
|
|
1418
|
+
effectivePrevTag = prevTag;
|
|
1409
1419
|
}
|
|
1410
1420
|
if (buffer.length > 1 && (!prevComment || (!options.conservativeCollapse && / $/.test(currentChars)))) {
|
|
1411
1421
|
const charsIndex = buffer.length - 2;
|
|
@@ -1414,6 +1424,23 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
1414
1424
|
return '';
|
|
1415
1425
|
});
|
|
1416
1426
|
}
|
|
1427
|
+
} else if (uidIgnorePlaceholderPattern && nextTag !== 'comment') {
|
|
1428
|
+
// UID placeholder followed by a real element—derive the effective `prevTag` from the
|
|
1429
|
+
// placeholder’s last HTML tag so `collapseWhitespaceSmart` can make the right call;
|
|
1430
|
+
// when `nextTag` is `comment` (another UID placeholder), `commentFinalize` handles it
|
|
1431
|
+
const match = prevComment.match(uidIgnorePlaceholderPattern);
|
|
1432
|
+
if (match) {
|
|
1433
|
+
const idx = +match[1];
|
|
1434
|
+
if (idx < ignoredMarkupChunks.length) {
|
|
1435
|
+
const content = ignoredMarkupChunks[idx];
|
|
1436
|
+
const lastTagMatch = content && RE_LAST_HTML_TAG.exec(content);
|
|
1437
|
+
if (lastTagMatch) {
|
|
1438
|
+
const isClose = lastTagMatch[1].charAt(0) === '/';
|
|
1439
|
+
const tagName = options.name(isClose ? lastTagMatch[1].slice(1) : lastTagMatch[1]);
|
|
1440
|
+
effectivePrevTag = isClose ? '/' + tagName : tagName;
|
|
1441
|
+
}
|
|
1442
|
+
}
|
|
1443
|
+
}
|
|
1417
1444
|
}
|
|
1418
1445
|
}
|
|
1419
1446
|
if (prevTag) {
|
|
@@ -1430,7 +1457,7 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
1430
1457
|
}
|
|
1431
1458
|
}
|
|
1432
1459
|
if (prevTag || nextTag) {
|
|
1433
|
-
text = collapseWhitespaceSmart(text,
|
|
1460
|
+
text = collapseWhitespaceSmart(text, effectivePrevTag, nextTag, prevAttrs, nextAttrs, options, inlineElements, inlineTextSet);
|
|
1434
1461
|
} else {
|
|
1435
1462
|
text = collapseWhitespace(text, options, true, true);
|
|
1436
1463
|
}
|
|
@@ -1553,21 +1580,26 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
1553
1580
|
|
|
1554
1581
|
// Only collapse whitespace if both blocks contain HTML (start with `<`)
|
|
1555
1582
|
// Don’t collapse if either contains plain text, as that would change meaning
|
|
1556
|
-
// Note: This check will match HTML comments (`<!-- … -->`), but the tag name
|
|
1557
|
-
// regex below requires starting with a letter, so comments are intentionally
|
|
1558
|
-
// excluded by the `currentTagMatch && prevTagMatch` guard
|
|
1559
1583
|
if (currentContent && prevContent && /^\s*</.test(currentContent) && /^\s*</.test(prevContent)) {
|
|
1560
|
-
// Extract tag names from the HTML content
|
|
1584
|
+
// Extract tag names from the HTML content
|
|
1561
1585
|
const currentTagMatch = currentContent.match(/^\s*<([a-zA-Z][\w:-]*)/);
|
|
1562
1586
|
const prevTagMatch = prevContent.match(/^\s*<([a-zA-Z][\w:-]*)/);
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1587
|
+
// HTML comments are invisible (no block/inline nature), treat as non-inline
|
|
1588
|
+
const prevIsHtmlComment = !prevTagMatch && RE_HTML_COMMENT_START.test(prevContent);
|
|
1589
|
+
const currentIsHtmlComment = !currentTagMatch && RE_HTML_COMMENT_START.test(currentContent);
|
|
1590
|
+
// Closing tags (e.g., `</div>`)—inline-ness determines whether to collapse
|
|
1591
|
+
const prevClosingTagMatch = !prevTagMatch && RE_CLOSING_TAG_START.exec(prevContent);
|
|
1592
|
+
const currentClosingTagMatch = !currentTagMatch && RE_CLOSING_TAG_START.exec(currentContent);
|
|
1593
|
+
|
|
1594
|
+
// Collapse if both sides are element/closing tags or HTML comments, and neither is inline
|
|
1595
|
+
if ((currentTagMatch || currentIsHtmlComment || currentClosingTagMatch) &&
|
|
1596
|
+
(prevTagMatch || prevIsHtmlComment || prevClosingTagMatch)) {
|
|
1597
|
+
const currentTag = currentTagMatch ? options.name(currentTagMatch[1])
|
|
1598
|
+
: currentClosingTagMatch ? options.name(currentClosingTagMatch[1]) : null;
|
|
1599
|
+
const prevTag = prevTagMatch ? options.name(prevTagMatch[1])
|
|
1600
|
+
: prevClosingTagMatch ? options.name(prevClosingTagMatch[1]) : null;
|
|
1601
|
+
|
|
1602
|
+
// Don’t collapse between inline elements (HTML comments count as non-inline)
|
|
1571
1603
|
if (!inlineElements.has(currentTag) && !inlineElements.has(prevTag)) {
|
|
1572
1604
|
// Collapse whitespace respecting context rules
|
|
1573
1605
|
let collapsedText = prevText;
|
package/src/htmlparser.js
CHANGED
|
@@ -218,12 +218,12 @@ export class HTMLParser {
|
|
|
218
218
|
while (pos < fullLength) {
|
|
219
219
|
lastPos = pos;
|
|
220
220
|
|
|
221
|
-
// Make sure
|
|
221
|
+
// Make sure not to be in a `script` or `style` element
|
|
222
222
|
if (!lastTag || !special.has(lastTag)) {
|
|
223
223
|
const textEnd = fullHtml.indexOf('<', pos);
|
|
224
224
|
|
|
225
225
|
if (textEnd === pos) {
|
|
226
|
-
//
|
|
226
|
+
// Tag found at current position
|
|
227
227
|
|
|
228
228
|
// Check cache from previous lookahead (avoids re-parsing the same tag)
|
|
229
229
|
if (cachedNextStartTag && cachedNextStartTag.pos === pos) {
|
|
@@ -459,7 +459,7 @@ export class HTMLParser {
|
|
|
459
459
|
break;
|
|
460
460
|
}
|
|
461
461
|
|
|
462
|
-
// Limit the input length
|
|
462
|
+
// Limit the input length to pass to the regex to prevent catastrophic backtracking
|
|
463
463
|
const remainingLen = fullLength - currentPos;
|
|
464
464
|
const isLimited = remainingLen > MAX_ATTR_PARSE_LENGTH;
|
|
465
465
|
|
|
@@ -474,7 +474,7 @@ export class HTMLParser {
|
|
|
474
474
|
const searchStr = fullHtml.substring(currentPos, extractEndPos);
|
|
475
475
|
attr = searchStr.match(attribute);
|
|
476
476
|
|
|
477
|
-
// If
|
|
477
|
+
// If input was limited and there’s a match, check if the value might be truncated
|
|
478
478
|
if (attr) {
|
|
479
479
|
// Check if the attribute value extends beyond our search window
|
|
480
480
|
const attrEnd = attr[0].length;
|
|
@@ -532,7 +532,7 @@ export class HTMLParser {
|
|
|
532
532
|
}
|
|
533
533
|
|
|
534
534
|
if (!attr) {
|
|
535
|
-
// If
|
|
535
|
+
// If input was limited and there’s no match, try manual extraction
|
|
536
536
|
// This handles cases where quoted attributes exceed `MAX_ATTR_PARSE_LENGTH`
|
|
537
537
|
const manualMatch = searchStr.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
|
|
538
538
|
if (manualMatch) {
|
|
@@ -598,7 +598,7 @@ export class HTMLParser {
|
|
|
598
598
|
if (currentTag === needle) {
|
|
599
599
|
return pos;
|
|
600
600
|
}
|
|
601
|
-
// Stop searching if
|
|
601
|
+
// Stop searching if hitting a table boundary
|
|
602
602
|
if (currentTag === 'table') {
|
|
603
603
|
break;
|
|
604
604
|
}
|
package/src/lib/attributes.js
CHANGED
|
@@ -72,7 +72,7 @@ function attributesInclude(attributes, attribute) {
|
|
|
72
72
|
/**
|
|
73
73
|
* Remove duplicate attributes from an attribute list.
|
|
74
74
|
* Per HTML spec, when an attribute appears multiple times, the first occurrence wins.
|
|
75
|
-
* Duplicate attributes result in invalid HTML, so
|
|
75
|
+
* Duplicate attributes result in invalid HTML, so only the first is kept.
|
|
76
76
|
* @param {Array} attrs - Array of attribute objects with `name` property
|
|
77
77
|
* @param {boolean} caseSensitive - Whether to compare names case-sensitively (for XML/SVG)
|
|
78
78
|
* @returns {Array} Deduplicated attribute array (modifies in place and returns)
|
|
@@ -117,7 +117,7 @@ function isAttributeRedundant(tag, attrName, attrValue, attrs) {
|
|
|
117
117
|
return false;
|
|
118
118
|
}
|
|
119
119
|
|
|
120
|
-
//
|
|
120
|
+
// Value needs to be checked, so normalize it
|
|
121
121
|
attrValue = attrValue ? trimWhitespace(attrValue.toLowerCase()) : '';
|
|
122
122
|
|
|
123
123
|
// Legacy attribute checks
|
|
@@ -575,7 +575,7 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
|
|
|
575
575
|
let attrFragment;
|
|
576
576
|
let emittedAttrValue;
|
|
577
577
|
|
|
578
|
-
// Determine if
|
|
578
|
+
// Determine if need to add/keep quotes
|
|
579
579
|
const shouldAddQuotes = typeof attrValue !== 'undefined' && (
|
|
580
580
|
// If `removeAttributeQuotes` is enabled, add quotes only if they can’t be removed
|
|
581
581
|
(options.removeAttributeQuotes && (attrValue.indexOf(uidAttr) !== -1 || !canRemoveAttributeQuotes(attrValue))) ||
|
|
@@ -590,7 +590,7 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
|
|
|
590
590
|
// Determine the appropriate quote character
|
|
591
591
|
if (!options.preventAttributesEscaping) {
|
|
592
592
|
// Normal mode: Choose optimal quote type to minimize escaping
|
|
593
|
-
// unless
|
|
593
|
+
// unless preserving original quotes and they don’t need escaping
|
|
594
594
|
const needsEscaping = (attrQuote === '"' && attrValue.indexOf('"') !== -1) || (attrQuote === "'" && attrValue.indexOf("'") !== -1);
|
|
595
595
|
|
|
596
596
|
if (options.removeAttributeQuotes || typeof options.quoteCharacter !== 'undefined' || needsEscaping || attrQuote === '') {
|