html-minifier-next 6.2.3 → 6.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.js +3 -3
- package/dist/htmlminifier.cjs +62 -31
- package/dist/types/htmlminifier.d.ts.map +1 -1
- package/dist/types/lib/attributes.d.ts +1 -1
- package/dist/types/presets.d.ts +0 -1
- package/dist/types/presets.d.ts.map +1 -1
- package/package.json +2 -2
- package/src/htmlminifier.js +52 -20
- package/src/htmlparser.js +6 -6
- package/src/lib/attributes.js +4 -4
- package/src/presets.js +0 -1
package/cli.js
CHANGED
|
@@ -60,10 +60,10 @@ const pkg = require('./package.json');
|
|
|
60
60
|
|
|
61
61
|
const DEFAULT_FILE_EXTENSIONS = ['html', 'htm', 'shtml', 'shtm'];
|
|
62
62
|
|
|
63
|
-
const MARK_ERROR
|
|
63
|
+
const MARK_ERROR = process.stderr.isTTY ? '\x1b[31m' : '';
|
|
64
64
|
const MARK_SUCCESS = process.stderr.isTTY ? '\x1b[32m' : '';
|
|
65
65
|
const MARK_WARNING = process.stderr.isTTY ? '\x1b[33m' : '';
|
|
66
|
-
const MARK_RESET
|
|
66
|
+
const MARK_RESET = process.stderr.isTTY ? '\x1b[0m' : '';
|
|
67
67
|
|
|
68
68
|
const program = new Command();
|
|
69
69
|
program.name(pkg.name);
|
|
@@ -300,7 +300,7 @@ program.helpOption('-h, --help', 'Display help for command');
|
|
|
300
300
|
await program.arguments('[files...]').action(function (files) {
|
|
301
301
|
capturedFiles = files;
|
|
302
302
|
filesProvided = files.length > 0;
|
|
303
|
-
// Defer reading files until after
|
|
303
|
+
// Defer reading files until after check for consumed filenames
|
|
304
304
|
}).parseAsync(process.argv);
|
|
305
305
|
|
|
306
306
|
const programOptions = program.opts();
|
package/dist/htmlminifier.cjs
CHANGED
|
@@ -332,12 +332,12 @@ class HTMLParser {
|
|
|
332
332
|
while (pos < fullLength) {
|
|
333
333
|
lastPos = pos;
|
|
334
334
|
|
|
335
|
-
// Make sure
|
|
335
|
+
// Make sure not to be in a `script` or `style` element
|
|
336
336
|
if (!lastTag || !special.has(lastTag)) {
|
|
337
337
|
const textEnd = fullHtml.indexOf('<', pos);
|
|
338
338
|
|
|
339
339
|
if (textEnd === pos) {
|
|
340
|
-
//
|
|
340
|
+
// Tag found at current position
|
|
341
341
|
|
|
342
342
|
// Check cache from previous lookahead (avoids re-parsing the same tag)
|
|
343
343
|
if (cachedNextStartTag && cachedNextStartTag.pos === pos) {
|
|
@@ -571,7 +571,7 @@ class HTMLParser {
|
|
|
571
571
|
break;
|
|
572
572
|
}
|
|
573
573
|
|
|
574
|
-
// Limit the input length
|
|
574
|
+
// Limit the input length to pass to the regex to prevent catastrophic backtracking
|
|
575
575
|
const remainingLen = fullLength - currentPos;
|
|
576
576
|
const isLimited = remainingLen > MAX_ATTR_PARSE_LENGTH;
|
|
577
577
|
|
|
@@ -586,7 +586,7 @@ class HTMLParser {
|
|
|
586
586
|
const searchStr = fullHtml.substring(currentPos, extractEndPos);
|
|
587
587
|
attr = searchStr.match(attribute);
|
|
588
588
|
|
|
589
|
-
// If
|
|
589
|
+
// If input was limited and there’s a match, check if the value might be truncated
|
|
590
590
|
if (attr) {
|
|
591
591
|
// Check if the attribute value extends beyond our search window
|
|
592
592
|
const attrEnd = attr[0].length;
|
|
@@ -644,7 +644,7 @@ class HTMLParser {
|
|
|
644
644
|
}
|
|
645
645
|
|
|
646
646
|
if (!attr) {
|
|
647
|
-
// If
|
|
647
|
+
// If input was limited and there’s no match, try manual extraction
|
|
648
648
|
// This handles cases where quoted attributes exceed `MAX_ATTR_PARSE_LENGTH`
|
|
649
649
|
const manualMatch = searchStr.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
|
|
650
650
|
if (manualMatch) {
|
|
@@ -710,7 +710,7 @@ class HTMLParser {
|
|
|
710
710
|
if (currentTag === needle) {
|
|
711
711
|
return pos;
|
|
712
712
|
}
|
|
713
|
-
// Stop searching if
|
|
713
|
+
// Stop searching if hitting a table boundary
|
|
714
714
|
if (currentTag === 'table') {
|
|
715
715
|
break;
|
|
716
716
|
}
|
|
@@ -1028,7 +1028,6 @@ const presets = {
|
|
|
1028
1028
|
collapseAttributeWhitespace: true,
|
|
1029
1029
|
collapseBooleanAttributes: true,
|
|
1030
1030
|
collapseWhitespace: true,
|
|
1031
|
-
continueOnParseError: true,
|
|
1032
1031
|
decodeEntities: true,
|
|
1033
1032
|
mergeScripts: true,
|
|
1034
1033
|
minifyCSS: true,
|
|
@@ -2154,7 +2153,7 @@ function attributesInclude(attributes, attribute) {
|
|
|
2154
2153
|
/**
|
|
2155
2154
|
* Remove duplicate attributes from an attribute list.
|
|
2156
2155
|
* Per HTML spec, when an attribute appears multiple times, the first occurrence wins.
|
|
2157
|
-
* Duplicate attributes result in invalid HTML, so
|
|
2156
|
+
* Duplicate attributes result in invalid HTML, so only the first is kept.
|
|
2158
2157
|
* @param {Array} attrs - Array of attribute objects with `name` property
|
|
2159
2158
|
* @param {boolean} caseSensitive - Whether to compare names case-sensitively (for XML/SVG)
|
|
2160
2159
|
* @returns {Array} Deduplicated attribute array (modifies in place and returns)
|
|
@@ -2199,7 +2198,7 @@ function isAttributeRedundant(tag, attrName, attrValue, attrs) {
|
|
|
2199
2198
|
return false;
|
|
2200
2199
|
}
|
|
2201
2200
|
|
|
2202
|
-
//
|
|
2201
|
+
// Value needs to be checked, so normalize it
|
|
2203
2202
|
attrValue = attrValue ? trimWhitespace(attrValue.toLowerCase()) : '';
|
|
2204
2203
|
|
|
2205
2204
|
// Legacy attribute checks
|
|
@@ -2657,7 +2656,7 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
|
|
|
2657
2656
|
let attrFragment;
|
|
2658
2657
|
let emittedAttrValue;
|
|
2659
2658
|
|
|
2660
|
-
// Determine if
|
|
2659
|
+
// Determine if need to add/keep quotes
|
|
2661
2660
|
const shouldAddQuotes = typeof attrValue !== 'undefined' && (
|
|
2662
2661
|
// If `removeAttributeQuotes` is enabled, add quotes only if they can’t be removed
|
|
2663
2662
|
(options.removeAttributeQuotes && (attrValue.indexOf(uidAttr) !== -1 || !canRemoveAttributeQuotes(attrValue))) ||
|
|
@@ -2672,7 +2671,7 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
|
|
|
2672
2671
|
// Determine the appropriate quote character
|
|
2673
2672
|
if (!options.preventAttributesEscaping) {
|
|
2674
2673
|
// Normal mode: Choose optimal quote type to minimize escaping
|
|
2675
|
-
// unless
|
|
2674
|
+
// unless preserving original quotes and they don’t need escaping
|
|
2676
2675
|
const needsEscaping = (attrQuote === '"' && attrValue.indexOf('"') !== -1) || (attrQuote === "'" && attrValue.indexOf("'") !== -1);
|
|
2677
2676
|
|
|
2678
2677
|
if (options.removeAttributeQuotes || typeof options.quoteCharacter !== 'undefined' || needsEscaping || attrQuote === '') {
|
|
@@ -3064,6 +3063,11 @@ const DEFAULT_JS_TYPES = new Set(['', 'text/javascript', 'application/javascript
|
|
|
3064
3063
|
const RE_START_TAG = /^<[^/!]/;
|
|
3065
3064
|
const RE_END_TAG = /^<\//;
|
|
3066
3065
|
|
|
3066
|
+
// Pre-compiled patterns for `htmlmin:ignore` block content analysis
|
|
3067
|
+
const RE_HTML_COMMENT_START = /^\s*<!--/;
|
|
3068
|
+
const RE_CLOSING_TAG_START = /^\s*<\/([a-zA-Z][\w:-]*)/;
|
|
3069
|
+
const RE_LAST_HTML_TAG = /[\s\S]*<(\/?[a-zA-Z][\w:-]*)/;
|
|
3070
|
+
|
|
3067
3071
|
// HTML encoding types for annotation-xml (MathML)
|
|
3068
3072
|
const RE_HTML_ENCODING = /^(text\/html|application\/xhtml\+xml)$/i;
|
|
3069
3073
|
|
|
@@ -3702,7 +3706,7 @@ async function createSortFns(value, options, uidIgnore, uidAttr, ignoredMarkupCh
|
|
|
3702
3706
|
await scan(text);
|
|
3703
3707
|
}
|
|
3704
3708
|
},
|
|
3705
|
-
//
|
|
3709
|
+
// No need for `nextTag` information in this scan
|
|
3706
3710
|
wantsNextTag: false,
|
|
3707
3711
|
// Continue on parse errors during analysis pass
|
|
3708
3712
|
continueOnParseError: options.continueOnParseError
|
|
@@ -3711,7 +3715,7 @@ async function createSortFns(value, options, uidIgnore, uidAttr, ignoredMarkupCh
|
|
|
3711
3715
|
try {
|
|
3712
3716
|
await parser.parse();
|
|
3713
3717
|
} catch (err) {
|
|
3714
|
-
// If parsing fails during analysis pass,
|
|
3718
|
+
// If parsing fails during analysis pass, skip it—there’s partial frequency data from what can be parsed
|
|
3715
3719
|
if (!options.continueOnParseError) {
|
|
3716
3720
|
throw err;
|
|
3717
3721
|
}
|
|
@@ -3766,9 +3770,9 @@ async function createSortFns(value, options, uidIgnore, uidAttr, ignoredMarkupCh
|
|
|
3766
3770
|
// First pass minification applies attribute transformations like `removeStyleLinkTypeAttributes` for accurate frequency analysis
|
|
3767
3771
|
const firstPassOutput = await minifyHTML(expandedValue, firstPassOptions);
|
|
3768
3772
|
|
|
3769
|
-
// For frequency analysis,
|
|
3770
|
-
// because HTML comments in opening tags prevent proper attribute parsing
|
|
3771
|
-
//
|
|
3773
|
+
// For frequency analysis, remove custom fragments temporarily
|
|
3774
|
+
// because HTML comments in opening tags prevent proper attribute parsing;
|
|
3775
|
+
// removed with a space to preserve attribute boundaries
|
|
3772
3776
|
let scanValue = firstPassOutput;
|
|
3773
3777
|
if (customFragmentPattern) {
|
|
3774
3778
|
scanValue = firstPassOutput.replace(customFragmentPattern, ' ');
|
|
@@ -3910,8 +3914,8 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
3910
3914
|
removeEmptyElementsExcept = parseRemoveEmptyElementsExcept(options.removeEmptyElementsExcept, options) || [];
|
|
3911
3915
|
}
|
|
3912
3916
|
|
|
3913
|
-
// Temporarily replace ignored chunks with comments, so that
|
|
3914
|
-
//
|
|
3917
|
+
// Temporarily replace ignored chunks with comments, so that there’s no need to worry what’s there;
|
|
3918
|
+
// there might be completely-horribly-broken-alien-non-html-emoji-cthulhu-filled content
|
|
3915
3919
|
if (value.indexOf('<!-- htmlmin:ignore -->') !== -1) {
|
|
3916
3920
|
// Use `indexOf`-based O(n) loop instead of a global regex with [\s\S]*? to avoid O(n²)
|
|
3917
3921
|
// backtracking on adversarial HTML with many `<!--` prefixes but no closing marker
|
|
@@ -4344,11 +4348,16 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
4344
4348
|
}
|
|
4345
4349
|
if (options.collapseWhitespace) {
|
|
4346
4350
|
if (!stackNoTrimWhitespace.length) {
|
|
4351
|
+
// When the prev item is a UID placeholder, compute its effective tag name for whitespace decisions;
|
|
4352
|
+
// this is only used in `collapseWhitespaceSmart`—`prevTag` itself is not modified,
|
|
4353
|
+
// to avoid side effects on the `inlineTextSet` branch below
|
|
4354
|
+
let effectivePrevTag = prevTag;
|
|
4347
4355
|
if (prevTag === 'comment') {
|
|
4348
4356
|
const prevComment = buffer[buffer.length - 1];
|
|
4349
4357
|
if (!uidIgnore || prevComment.indexOf(uidIgnore) === -1) {
|
|
4350
4358
|
if (!prevComment) {
|
|
4351
4359
|
prevTag = charsPrevTag;
|
|
4360
|
+
effectivePrevTag = prevTag;
|
|
4352
4361
|
}
|
|
4353
4362
|
if (buffer.length > 1 && (!prevComment || (!options.conservativeCollapse && / $/.test(currentChars)))) {
|
|
4354
4363
|
const charsIndex = buffer.length - 2;
|
|
@@ -4357,6 +4366,23 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
4357
4366
|
return '';
|
|
4358
4367
|
});
|
|
4359
4368
|
}
|
|
4369
|
+
} else if (uidIgnorePlaceholderPattern && nextTag !== 'comment') {
|
|
4370
|
+
// UID placeholder followed by a real element—derive the effective `prevTag` from the
|
|
4371
|
+
// placeholder’s last HTML tag so `collapseWhitespaceSmart` can make the right call;
|
|
4372
|
+
// when `nextTag` is `comment` (another UID placeholder), `commentFinalize` handles it
|
|
4373
|
+
const match = prevComment.match(uidIgnorePlaceholderPattern);
|
|
4374
|
+
if (match) {
|
|
4375
|
+
const idx = +match[1];
|
|
4376
|
+
if (idx < ignoredMarkupChunks.length) {
|
|
4377
|
+
const content = ignoredMarkupChunks[idx];
|
|
4378
|
+
const lastTagMatch = content && RE_LAST_HTML_TAG.exec(content);
|
|
4379
|
+
if (lastTagMatch) {
|
|
4380
|
+
const isClose = lastTagMatch[1].charAt(0) === '/';
|
|
4381
|
+
const tagName = options.name(isClose ? lastTagMatch[1].slice(1) : lastTagMatch[1]);
|
|
4382
|
+
effectivePrevTag = isClose ? '/' + tagName : tagName;
|
|
4383
|
+
}
|
|
4384
|
+
}
|
|
4385
|
+
}
|
|
4360
4386
|
}
|
|
4361
4387
|
}
|
|
4362
4388
|
if (prevTag) {
|
|
@@ -4373,7 +4399,7 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
4373
4399
|
}
|
|
4374
4400
|
}
|
|
4375
4401
|
if (prevTag || nextTag) {
|
|
4376
|
-
text = collapseWhitespaceSmart(text,
|
|
4402
|
+
text = collapseWhitespaceSmart(text, effectivePrevTag, nextTag, prevAttrs, nextAttrs, options, inlineElements, inlineTextSet);
|
|
4377
4403
|
} else {
|
|
4378
4404
|
text = collapseWhitespace(text, options, true, true);
|
|
4379
4405
|
}
|
|
@@ -4496,21 +4522,26 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
4496
4522
|
|
|
4497
4523
|
// Only collapse whitespace if both blocks contain HTML (start with `<`)
|
|
4498
4524
|
// Don’t collapse if either contains plain text, as that would change meaning
|
|
4499
|
-
// Note: This check will match HTML comments (`<!-- … -->`), but the tag name
|
|
4500
|
-
// regex below requires starting with a letter, so comments are intentionally
|
|
4501
|
-
// excluded by the `currentTagMatch && prevTagMatch` guard
|
|
4502
4525
|
if (currentContent && prevContent && /^\s*</.test(currentContent) && /^\s*</.test(prevContent)) {
|
|
4503
|
-
// Extract tag names from the HTML content
|
|
4526
|
+
// Extract tag names from the HTML content
|
|
4504
4527
|
const currentTagMatch = currentContent.match(/^\s*<([a-zA-Z][\w:-]*)/);
|
|
4505
4528
|
const prevTagMatch = prevContent.match(/^\s*<([a-zA-Z][\w:-]*)/);
|
|
4506
|
-
|
|
4507
|
-
|
|
4508
|
-
|
|
4509
|
-
|
|
4510
|
-
|
|
4511
|
-
|
|
4512
|
-
|
|
4513
|
-
|
|
4529
|
+
// HTML comments are invisible (no block/inline nature), treat as non-inline
|
|
4530
|
+
const prevIsHtmlComment = !prevTagMatch && RE_HTML_COMMENT_START.test(prevContent);
|
|
4531
|
+
const currentIsHtmlComment = !currentTagMatch && RE_HTML_COMMENT_START.test(currentContent);
|
|
4532
|
+
// Closing tags (e.g., `</div>`)—inline-ness determines whether to collapse
|
|
4533
|
+
const prevClosingTagMatch = !prevTagMatch && RE_CLOSING_TAG_START.exec(prevContent);
|
|
4534
|
+
const currentClosingTagMatch = !currentTagMatch && RE_CLOSING_TAG_START.exec(currentContent);
|
|
4535
|
+
|
|
4536
|
+
// Collapse if both sides are element/closing tags or HTML comments, and neither is inline
|
|
4537
|
+
if ((currentTagMatch || currentIsHtmlComment || currentClosingTagMatch) &&
|
|
4538
|
+
(prevTagMatch || prevIsHtmlComment || prevClosingTagMatch)) {
|
|
4539
|
+
const currentTag = currentTagMatch ? options.name(currentTagMatch[1])
|
|
4540
|
+
: currentClosingTagMatch ? options.name(currentClosingTagMatch[1]) : null;
|
|
4541
|
+
const prevTag = prevTagMatch ? options.name(prevTagMatch[1])
|
|
4542
|
+
: prevClosingTagMatch ? options.name(prevClosingTagMatch[1]) : null;
|
|
4543
|
+
|
|
4544
|
+
// Don’t collapse between inline elements (HTML comments count as non-inline)
|
|
4514
4545
|
if (!inlineElements.has(currentTag) && !inlineElements.has(prevTag)) {
|
|
4515
4546
|
// Collapse whitespace respecting context rules
|
|
4516
4547
|
let collapsedText = prevText;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"AA2wDO,8BAJI,MAAM,YACN,eAAe,GACb,OAAO,CAAC,MAAM,CAAC,CAwB3B;;;;;;;;;;;;UA3/CS,MAAM;;;;;;;;;;;;;;;;;;mCAaA,MAAM,SAAS,aAAa,EAAE,yBAAyB,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;+BAM3F,MAAM,GAAG,IAAI,SAAS,aAAa,EAAE,GAAG,SAAS,qBAAqB,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qBA6JtG,OAAO,KAAK,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;2HA2BiF,MAAM,SAAS,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;;;;;;iBASxG,QAAQ,GAAG,KAAK;gBAAgC,MAAM,WAAW,OAAO,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;eAa/H,MAAM;gBAAY,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;;;;;;;mBAiBzE,MAAM,KAAK,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kDA+DF,MAAM,OAAO,MAAM,KAAK,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;sCA2EpC,MAAM,SAAS,aAAa,EAAE,KAAK,IAAI;;;;;;;;;wCAQrC,MAAM,KAAK,MAAM;;;;;;;;;;;;;;;;;wBAtqBK,cAAc;0BAAd,cAAc;+BAAd,cAAc"}
|
|
@@ -24,7 +24,7 @@ export function buildAttr(normalized: any, hasUnarySlash: any, options: any, isL
|
|
|
24
24
|
/**
|
|
25
25
|
* Remove duplicate attributes from an attribute list.
|
|
26
26
|
* Per HTML spec, when an attribute appears multiple times, the first occurrence wins.
|
|
27
|
-
* Duplicate attributes result in invalid HTML, so
|
|
27
|
+
* Duplicate attributes result in invalid HTML, so only the first is kept.
|
|
28
28
|
* @param {Array} attrs - Array of attribute objects with `name` property
|
|
29
29
|
* @param {boolean} caseSensitive - Whether to compare names case-sensitively (for XML/SVG)
|
|
30
30
|
* @returns {Array} Deduplicated attribute array (modifies in place and returns)
|
package/dist/types/presets.d.ts
CHANGED
|
@@ -27,7 +27,6 @@ export namespace presets {
|
|
|
27
27
|
export { collapseBooleanAttributes_1 as collapseBooleanAttributes };
|
|
28
28
|
let collapseWhitespace_1: boolean;
|
|
29
29
|
export { collapseWhitespace_1 as collapseWhitespace };
|
|
30
|
-
export let continueOnParseError: boolean;
|
|
31
30
|
export let decodeEntities: boolean;
|
|
32
31
|
export let mergeScripts: boolean;
|
|
33
32
|
export let minifyCSS: boolean;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"presets.d.ts","sourceRoot":"","sources":["../../src/presets.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"presets.d.ts","sourceRoot":"","sources":["../../src/presets.js"],"names":[],"mappings":"AAyCA;;;;GAIG;AACH,gCAHW,MAAM,GACJ,MAAM,GAAC,IAAI,CAMvB;AAED;;;GAGG;AACH,kCAFa,MAAM,EAAE,CAIpB"}
|
package/package.json
CHANGED
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
"@rollup/plugin-commonjs": "^29.0.2",
|
|
19
19
|
"@rollup/plugin-json": "^6.1.0",
|
|
20
20
|
"@rollup/plugin-node-resolve": "^16.0.3",
|
|
21
|
-
"@swc/core": "^1.15.
|
|
21
|
+
"@swc/core": "^1.15.33",
|
|
22
22
|
"eslint": "^10.3.0",
|
|
23
23
|
"rollup": "^4.60.2",
|
|
24
24
|
"rollup-plugin-polyfill-node": "^0.13.0",
|
|
@@ -96,5 +96,5 @@
|
|
|
96
96
|
},
|
|
97
97
|
"type": "module",
|
|
98
98
|
"types": "./dist/types/htmlminifier.d.ts",
|
|
99
|
-
"version": "6.2.
|
|
99
|
+
"version": "6.2.5"
|
|
100
100
|
}
|
package/src/htmlminifier.js
CHANGED
|
@@ -121,6 +121,11 @@ const DEFAULT_JS_TYPES = new Set(['', 'text/javascript', 'application/javascript
|
|
|
121
121
|
const RE_START_TAG = /^<[^/!]/;
|
|
122
122
|
const RE_END_TAG = /^<\//;
|
|
123
123
|
|
|
124
|
+
// Pre-compiled patterns for `htmlmin:ignore` block content analysis
|
|
125
|
+
const RE_HTML_COMMENT_START = /^\s*<!--/;
|
|
126
|
+
const RE_CLOSING_TAG_START = /^\s*<\/([a-zA-Z][\w:-]*)/;
|
|
127
|
+
const RE_LAST_HTML_TAG = /[\s\S]*<(\/?[a-zA-Z][\w:-]*)/;
|
|
128
|
+
|
|
124
129
|
// HTML encoding types for annotation-xml (MathML)
|
|
125
130
|
const RE_HTML_ENCODING = /^(text\/html|application\/xhtml\+xml)$/i;
|
|
126
131
|
|
|
@@ -759,7 +764,7 @@ async function createSortFns(value, options, uidIgnore, uidAttr, ignoredMarkupCh
|
|
|
759
764
|
await scan(text);
|
|
760
765
|
}
|
|
761
766
|
},
|
|
762
|
-
//
|
|
767
|
+
// No need for `nextTag` information in this scan
|
|
763
768
|
wantsNextTag: false,
|
|
764
769
|
// Continue on parse errors during analysis pass
|
|
765
770
|
continueOnParseError: options.continueOnParseError
|
|
@@ -768,7 +773,7 @@ async function createSortFns(value, options, uidIgnore, uidAttr, ignoredMarkupCh
|
|
|
768
773
|
try {
|
|
769
774
|
await parser.parse();
|
|
770
775
|
} catch (err) {
|
|
771
|
-
// If parsing fails during analysis pass,
|
|
776
|
+
// If parsing fails during analysis pass, skip it—there’s partial frequency data from what can be parsed
|
|
772
777
|
if (!options.continueOnParseError) {
|
|
773
778
|
throw err;
|
|
774
779
|
}
|
|
@@ -823,9 +828,9 @@ async function createSortFns(value, options, uidIgnore, uidAttr, ignoredMarkupCh
|
|
|
823
828
|
// First pass minification applies attribute transformations like `removeStyleLinkTypeAttributes` for accurate frequency analysis
|
|
824
829
|
const firstPassOutput = await minifyHTML(expandedValue, firstPassOptions);
|
|
825
830
|
|
|
826
|
-
// For frequency analysis,
|
|
827
|
-
// because HTML comments in opening tags prevent proper attribute parsing
|
|
828
|
-
//
|
|
831
|
+
// For frequency analysis, remove custom fragments temporarily
|
|
832
|
+
// because HTML comments in opening tags prevent proper attribute parsing;
|
|
833
|
+
// removed with a space to preserve attribute boundaries
|
|
829
834
|
let scanValue = firstPassOutput;
|
|
830
835
|
if (customFragmentPattern) {
|
|
831
836
|
scanValue = firstPassOutput.replace(customFragmentPattern, ' ');
|
|
@@ -967,8 +972,8 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
967
972
|
removeEmptyElementsExcept = parseRemoveEmptyElementsExcept(options.removeEmptyElementsExcept, options) || [];
|
|
968
973
|
}
|
|
969
974
|
|
|
970
|
-
// Temporarily replace ignored chunks with comments, so that
|
|
971
|
-
//
|
|
975
|
+
// Temporarily replace ignored chunks with comments, so that there’s no need to worry what’s there;
|
|
976
|
+
// there might be completely-horribly-broken-alien-non-html-emoji-cthulhu-filled content
|
|
972
977
|
if (value.indexOf('<!-- htmlmin:ignore -->') !== -1) {
|
|
973
978
|
// Use `indexOf`-based O(n) loop instead of a global regex with [\s\S]*? to avoid O(n²)
|
|
974
979
|
// backtracking on adversarial HTML with many `<!--` prefixes but no closing marker
|
|
@@ -1401,11 +1406,16 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
1401
1406
|
}
|
|
1402
1407
|
if (options.collapseWhitespace) {
|
|
1403
1408
|
if (!stackNoTrimWhitespace.length) {
|
|
1409
|
+
// When the prev item is a UID placeholder, compute its effective tag name for whitespace decisions;
|
|
1410
|
+
// this is only used in `collapseWhitespaceSmart`—`prevTag` itself is not modified,
|
|
1411
|
+
// to avoid side effects on the `inlineTextSet` branch below
|
|
1412
|
+
let effectivePrevTag = prevTag;
|
|
1404
1413
|
if (prevTag === 'comment') {
|
|
1405
1414
|
const prevComment = buffer[buffer.length - 1];
|
|
1406
1415
|
if (!uidIgnore || prevComment.indexOf(uidIgnore) === -1) {
|
|
1407
1416
|
if (!prevComment) {
|
|
1408
1417
|
prevTag = charsPrevTag;
|
|
1418
|
+
effectivePrevTag = prevTag;
|
|
1409
1419
|
}
|
|
1410
1420
|
if (buffer.length > 1 && (!prevComment || (!options.conservativeCollapse && / $/.test(currentChars)))) {
|
|
1411
1421
|
const charsIndex = buffer.length - 2;
|
|
@@ -1414,6 +1424,23 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
1414
1424
|
return '';
|
|
1415
1425
|
});
|
|
1416
1426
|
}
|
|
1427
|
+
} else if (uidIgnorePlaceholderPattern && nextTag !== 'comment') {
|
|
1428
|
+
// UID placeholder followed by a real element—derive the effective `prevTag` from the
|
|
1429
|
+
// placeholder’s last HTML tag so `collapseWhitespaceSmart` can make the right call;
|
|
1430
|
+
// when `nextTag` is `comment` (another UID placeholder), `commentFinalize` handles it
|
|
1431
|
+
const match = prevComment.match(uidIgnorePlaceholderPattern);
|
|
1432
|
+
if (match) {
|
|
1433
|
+
const idx = +match[1];
|
|
1434
|
+
if (idx < ignoredMarkupChunks.length) {
|
|
1435
|
+
const content = ignoredMarkupChunks[idx];
|
|
1436
|
+
const lastTagMatch = content && RE_LAST_HTML_TAG.exec(content);
|
|
1437
|
+
if (lastTagMatch) {
|
|
1438
|
+
const isClose = lastTagMatch[1].charAt(0) === '/';
|
|
1439
|
+
const tagName = options.name(isClose ? lastTagMatch[1].slice(1) : lastTagMatch[1]);
|
|
1440
|
+
effectivePrevTag = isClose ? '/' + tagName : tagName;
|
|
1441
|
+
}
|
|
1442
|
+
}
|
|
1443
|
+
}
|
|
1417
1444
|
}
|
|
1418
1445
|
}
|
|
1419
1446
|
if (prevTag) {
|
|
@@ -1430,7 +1457,7 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
1430
1457
|
}
|
|
1431
1458
|
}
|
|
1432
1459
|
if (prevTag || nextTag) {
|
|
1433
|
-
text = collapseWhitespaceSmart(text,
|
|
1460
|
+
text = collapseWhitespaceSmart(text, effectivePrevTag, nextTag, prevAttrs, nextAttrs, options, inlineElements, inlineTextSet);
|
|
1434
1461
|
} else {
|
|
1435
1462
|
text = collapseWhitespace(text, options, true, true);
|
|
1436
1463
|
}
|
|
@@ -1553,21 +1580,26 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
1553
1580
|
|
|
1554
1581
|
// Only collapse whitespace if both blocks contain HTML (start with `<`)
|
|
1555
1582
|
// Don’t collapse if either contains plain text, as that would change meaning
|
|
1556
|
-
// Note: This check will match HTML comments (`<!-- … -->`), but the tag name
|
|
1557
|
-
// regex below requires starting with a letter, so comments are intentionally
|
|
1558
|
-
// excluded by the `currentTagMatch && prevTagMatch` guard
|
|
1559
1583
|
if (currentContent && prevContent && /^\s*</.test(currentContent) && /^\s*</.test(prevContent)) {
|
|
1560
|
-
// Extract tag names from the HTML content
|
|
1584
|
+
// Extract tag names from the HTML content
|
|
1561
1585
|
const currentTagMatch = currentContent.match(/^\s*<([a-zA-Z][\w:-]*)/);
|
|
1562
1586
|
const prevTagMatch = prevContent.match(/^\s*<([a-zA-Z][\w:-]*)/);
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1587
|
+
// HTML comments are invisible (no block/inline nature), treat as non-inline
|
|
1588
|
+
const prevIsHtmlComment = !prevTagMatch && RE_HTML_COMMENT_START.test(prevContent);
|
|
1589
|
+
const currentIsHtmlComment = !currentTagMatch && RE_HTML_COMMENT_START.test(currentContent);
|
|
1590
|
+
// Closing tags (e.g., `</div>`)—inline-ness determines whether to collapse
|
|
1591
|
+
const prevClosingTagMatch = !prevTagMatch && RE_CLOSING_TAG_START.exec(prevContent);
|
|
1592
|
+
const currentClosingTagMatch = !currentTagMatch && RE_CLOSING_TAG_START.exec(currentContent);
|
|
1593
|
+
|
|
1594
|
+
// Collapse if both sides are element/closing tags or HTML comments, and neither is inline
|
|
1595
|
+
if ((currentTagMatch || currentIsHtmlComment || currentClosingTagMatch) &&
|
|
1596
|
+
(prevTagMatch || prevIsHtmlComment || prevClosingTagMatch)) {
|
|
1597
|
+
const currentTag = currentTagMatch ? options.name(currentTagMatch[1])
|
|
1598
|
+
: currentClosingTagMatch ? options.name(currentClosingTagMatch[1]) : null;
|
|
1599
|
+
const prevTag = prevTagMatch ? options.name(prevTagMatch[1])
|
|
1600
|
+
: prevClosingTagMatch ? options.name(prevClosingTagMatch[1]) : null;
|
|
1601
|
+
|
|
1602
|
+
// Don’t collapse between inline elements (HTML comments count as non-inline)
|
|
1571
1603
|
if (!inlineElements.has(currentTag) && !inlineElements.has(prevTag)) {
|
|
1572
1604
|
// Collapse whitespace respecting context rules
|
|
1573
1605
|
let collapsedText = prevText;
|
package/src/htmlparser.js
CHANGED
|
@@ -218,12 +218,12 @@ export class HTMLParser {
|
|
|
218
218
|
while (pos < fullLength) {
|
|
219
219
|
lastPos = pos;
|
|
220
220
|
|
|
221
|
-
// Make sure
|
|
221
|
+
// Make sure not to be in a `script` or `style` element
|
|
222
222
|
if (!lastTag || !special.has(lastTag)) {
|
|
223
223
|
const textEnd = fullHtml.indexOf('<', pos);
|
|
224
224
|
|
|
225
225
|
if (textEnd === pos) {
|
|
226
|
-
//
|
|
226
|
+
// Tag found at current position
|
|
227
227
|
|
|
228
228
|
// Check cache from previous lookahead (avoids re-parsing the same tag)
|
|
229
229
|
if (cachedNextStartTag && cachedNextStartTag.pos === pos) {
|
|
@@ -459,7 +459,7 @@ export class HTMLParser {
|
|
|
459
459
|
break;
|
|
460
460
|
}
|
|
461
461
|
|
|
462
|
-
// Limit the input length
|
|
462
|
+
// Limit the input length to pass to the regex to prevent catastrophic backtracking
|
|
463
463
|
const remainingLen = fullLength - currentPos;
|
|
464
464
|
const isLimited = remainingLen > MAX_ATTR_PARSE_LENGTH;
|
|
465
465
|
|
|
@@ -474,7 +474,7 @@ export class HTMLParser {
|
|
|
474
474
|
const searchStr = fullHtml.substring(currentPos, extractEndPos);
|
|
475
475
|
attr = searchStr.match(attribute);
|
|
476
476
|
|
|
477
|
-
// If
|
|
477
|
+
// If input was limited and there’s a match, check if the value might be truncated
|
|
478
478
|
if (attr) {
|
|
479
479
|
// Check if the attribute value extends beyond our search window
|
|
480
480
|
const attrEnd = attr[0].length;
|
|
@@ -532,7 +532,7 @@ export class HTMLParser {
|
|
|
532
532
|
}
|
|
533
533
|
|
|
534
534
|
if (!attr) {
|
|
535
|
-
// If
|
|
535
|
+
// If input was limited and there’s no match, try manual extraction
|
|
536
536
|
// This handles cases where quoted attributes exceed `MAX_ATTR_PARSE_LENGTH`
|
|
537
537
|
const manualMatch = searchStr.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
|
|
538
538
|
if (manualMatch) {
|
|
@@ -598,7 +598,7 @@ export class HTMLParser {
|
|
|
598
598
|
if (currentTag === needle) {
|
|
599
599
|
return pos;
|
|
600
600
|
}
|
|
601
|
-
// Stop searching if
|
|
601
|
+
// Stop searching if hitting a table boundary
|
|
602
602
|
if (currentTag === 'table') {
|
|
603
603
|
break;
|
|
604
604
|
}
|
package/src/lib/attributes.js
CHANGED
|
@@ -72,7 +72,7 @@ function attributesInclude(attributes, attribute) {
|
|
|
72
72
|
/**
|
|
73
73
|
* Remove duplicate attributes from an attribute list.
|
|
74
74
|
* Per HTML spec, when an attribute appears multiple times, the first occurrence wins.
|
|
75
|
-
* Duplicate attributes result in invalid HTML, so
|
|
75
|
+
* Duplicate attributes result in invalid HTML, so only the first is kept.
|
|
76
76
|
* @param {Array} attrs - Array of attribute objects with `name` property
|
|
77
77
|
* @param {boolean} caseSensitive - Whether to compare names case-sensitively (for XML/SVG)
|
|
78
78
|
* @returns {Array} Deduplicated attribute array (modifies in place and returns)
|
|
@@ -117,7 +117,7 @@ function isAttributeRedundant(tag, attrName, attrValue, attrs) {
|
|
|
117
117
|
return false;
|
|
118
118
|
}
|
|
119
119
|
|
|
120
|
-
//
|
|
120
|
+
// Value needs to be checked, so normalize it
|
|
121
121
|
attrValue = attrValue ? trimWhitespace(attrValue.toLowerCase()) : '';
|
|
122
122
|
|
|
123
123
|
// Legacy attribute checks
|
|
@@ -575,7 +575,7 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
|
|
|
575
575
|
let attrFragment;
|
|
576
576
|
let emittedAttrValue;
|
|
577
577
|
|
|
578
|
-
// Determine if
|
|
578
|
+
// Determine if need to add/keep quotes
|
|
579
579
|
const shouldAddQuotes = typeof attrValue !== 'undefined' && (
|
|
580
580
|
// If `removeAttributeQuotes` is enabled, add quotes only if they can’t be removed
|
|
581
581
|
(options.removeAttributeQuotes && (attrValue.indexOf(uidAttr) !== -1 || !canRemoveAttributeQuotes(attrValue))) ||
|
|
@@ -590,7 +590,7 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
|
|
|
590
590
|
// Determine the appropriate quote character
|
|
591
591
|
if (!options.preventAttributesEscaping) {
|
|
592
592
|
// Normal mode: Choose optimal quote type to minimize escaping
|
|
593
|
-
// unless
|
|
593
|
+
// unless preserving original quotes and they don’t need escaping
|
|
594
594
|
const needsEscaping = (attrQuote === '"' && attrValue.indexOf('"') !== -1) || (attrQuote === "'" && attrValue.indexOf("'") !== -1);
|
|
595
595
|
|
|
596
596
|
if (options.removeAttributeQuotes || typeof options.quoteCharacter !== 'undefined' || needsEscaping || attrQuote === '') {
|