html-minifier-next 4.16.4 → 4.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -70
- package/cli.js +29 -26
- package/dist/htmlminifier.cjs +280 -139
- package/dist/htmlminifier.esm.bundle.js +280 -139
- package/dist/types/htmlminifier.d.ts.map +1 -1
- package/dist/types/htmlparser.d.ts.map +1 -1
- package/dist/types/lib/attributes.d.ts +1 -1
- package/dist/types/lib/attributes.d.ts.map +1 -1
- package/dist/types/lib/constants.d.ts +16 -15
- package/dist/types/lib/constants.d.ts.map +1 -1
- package/dist/types/lib/content.d.ts.map +1 -1
- package/dist/types/lib/options.d.ts +2 -2
- package/dist/types/lib/options.d.ts.map +1 -1
- package/dist/types/lib/whitespace.d.ts +1 -1
- package/dist/types/lib/whitespace.d.ts.map +1 -1
- package/dist/types/presets.d.ts +1 -2
- package/dist/types/presets.d.ts.map +1 -1
- package/package.json +9 -8
- package/src/htmlminifier.js +49 -47
- package/src/htmlparser.js +44 -13
- package/src/lib/attributes.js +72 -30
- package/src/lib/constants.js +46 -39
- package/src/lib/content.js +0 -1
- package/src/lib/elements.js +15 -15
- package/src/lib/options.js +26 -9
- package/src/lib/svg.js +14 -14
- package/src/lib/whitespace.js +53 -4
- package/src/presets.js +4 -5
- package/src/tokenchain.js +2 -2
- package/src/lib/index.js +0 -20
package/src/htmlparser.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
/*
|
|
2
2
|
* HTML Parser By John Resig (ejohn.org)
|
|
3
3
|
* Modified by Juriy “kangax” Zaytsev
|
|
4
4
|
* Original code by Erik Arvidsson, Mozilla Public License
|
|
@@ -9,10 +9,10 @@
|
|
|
9
9
|
* Use like so:
|
|
10
10
|
*
|
|
11
11
|
* HTMLParser(htmlString, {
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
12
|
+
* start: function(tag, attrs, unary) {},
|
|
13
|
+
* end: function(tag) {},
|
|
14
|
+
* chars: function(text) {},
|
|
15
|
+
* comment: function(text) {}
|
|
16
16
|
* });
|
|
17
17
|
*/
|
|
18
18
|
|
|
@@ -35,7 +35,7 @@ const singleAttrValues = [
|
|
|
35
35
|
];
|
|
36
36
|
// https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-QName
|
|
37
37
|
const qnameCapture = (function () {
|
|
38
|
-
//
|
|
38
|
+
// https://www.npmjs.com/package/ncname
|
|
39
39
|
const combiningChar = '\\u0300-\\u0345\\u0360\\u0361\\u0483-\\u0486\\u0591-\\u05A1\\u05A3-\\u05B9\\u05BB-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u064B-\\u0652\\u0670\\u06D6-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED\\u0901-\\u0903\\u093C\\u093E-\\u094D\\u0951-\\u0954\\u0962\\u0963\\u0981-\\u0983\\u09BC\\u09BE-\\u09C4\\u09C7\\u09C8\\u09CB-\\u09CD\\u09D7\\u09E2\\u09E3\\u0A02\\u0A3C\\u0A3E-\\u0A42\\u0A47\\u0A48\\u0A4B-\\u0A4D\\u0A70\\u0A71\\u0A81-\\u0A83\\u0ABC\\u0ABE-\\u0AC5\\u0AC7-\\u0AC9\\u0ACB-\\u0ACD\\u0B01-\\u0B03\\u0B3C\\u0B3E-\\u0B43\\u0B47\\u0B48\\u0B4B-\\u0B4D\\u0B56\\u0B57\\u0B82\\u0B83\\u0BBE-\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCD\\u0BD7\\u0C01-\\u0C03\\u0C3E-\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56\\u0C82\\u0C83\\u0CBE-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD\\u0CD5\\u0CD6\\u0D02\\u0D03\\u0D3E-\\u0D43\\u0D46-\\u0D48\\u0D4A-\\u0D4D\\u0D57\\u0E31\\u0E34-\\u0E3A\\u0E47-\\u0E4E\\u0EB1\\u0EB4-\\u0EB9\\u0EBB\\u0EBC\\u0EC8-\\u0ECD\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F3E\\u0F3F\\u0F71-\\u0F84\\u0F86-\\u0F8B\\u0F90-\\u0F95\\u0F97\\u0F99-\\u0FAD\\u0FB1-\\u0FB7\\u0FB9\\u20D0-\\u20DC\\u20E1\\u302A-\\u302F\\u3099\\u309A';
|
|
40
40
|
const digit = '0-9\\u0660-\\u0669\\u06F0-\\u06F9\\u0966-\\u096F\\u09E6-\\u09EF\\u0A66-\\u0A6F\\u0AE6-\\u0AEF\\u0B66-\\u0B6F\\u0BE7-\\u0BEF\\u0C66-\\u0C6F\\u0CE6-\\u0CEF\\u0D66-\\u0D6F\\u0E50-\\u0E59\\u0ED0-\\u0ED9\\u0F20-\\u0F29';
|
|
41
41
|
const extender = '\\xB7\\u02D0\\u02D1\\u0387\\u0640\\u0E46\\u0EC6\\u3005\\u3031-\\u3035\\u309D\\u309E\\u30FC-\\u30FE';
|
|
@@ -75,7 +75,7 @@ const nonPhrasing = new CaseInsensitiveSet(['address', 'article', 'aside', 'base
|
|
|
75
75
|
const reCache = {};
|
|
76
76
|
|
|
77
77
|
// Pre-compiled regexes for common special elements (`script`, `style`, `noscript`)
|
|
78
|
-
// These are used frequently and pre-compiling them avoids regex creation overhead
|
|
78
|
+
// These are used frequently, and pre-compiling them avoids regex creation overhead
|
|
79
79
|
const preCompiledStackedTags = {
|
|
80
80
|
'script': /([\s\S]*?)<\/script[^>]*>/i,
|
|
81
81
|
'style': /([\s\S]*?)<\/style[^>]*>/i,
|
|
@@ -138,6 +138,7 @@ export class HTMLParser {
|
|
|
138
138
|
// Use cached attribute regex for this handler configuration
|
|
139
139
|
const attribute = getAttrRegexForHandler(handler);
|
|
140
140
|
let prevTag = undefined, nextTag = undefined;
|
|
141
|
+
let prevAttrs = [], nextAttrs = [];
|
|
141
142
|
|
|
142
143
|
// Index-based parsing
|
|
143
144
|
let pos = 0;
|
|
@@ -181,6 +182,7 @@ export class HTMLParser {
|
|
|
181
182
|
}
|
|
182
183
|
advance(commentEnd + 3);
|
|
183
184
|
prevTag = '';
|
|
185
|
+
prevAttrs = [];
|
|
184
186
|
continue;
|
|
185
187
|
}
|
|
186
188
|
}
|
|
@@ -195,6 +197,7 @@ export class HTMLParser {
|
|
|
195
197
|
}
|
|
196
198
|
advance(conditionalEnd + 2);
|
|
197
199
|
prevTag = '';
|
|
200
|
+
prevAttrs = [];
|
|
198
201
|
continue;
|
|
199
202
|
}
|
|
200
203
|
}
|
|
@@ -207,6 +210,7 @@ export class HTMLParser {
|
|
|
207
210
|
}
|
|
208
211
|
advance(doctypeMatch[0].length);
|
|
209
212
|
prevTag = '';
|
|
213
|
+
prevAttrs = [];
|
|
210
214
|
continue;
|
|
211
215
|
}
|
|
212
216
|
|
|
@@ -216,6 +220,7 @@ export class HTMLParser {
|
|
|
216
220
|
advance(endTagMatch[0].length);
|
|
217
221
|
await parseEndTag(endTagMatch[0], endTagMatch[1]);
|
|
218
222
|
prevTag = '/' + endTagMatch[1].toLowerCase();
|
|
223
|
+
prevAttrs = [];
|
|
219
224
|
continue;
|
|
220
225
|
}
|
|
221
226
|
|
|
@@ -248,19 +253,24 @@ export class HTMLParser {
|
|
|
248
253
|
let nextTagMatch = parseStartTag(nextHtml);
|
|
249
254
|
if (nextTagMatch) {
|
|
250
255
|
nextTag = nextTagMatch.tagName;
|
|
256
|
+
// Extract minimal attribute info for whitespace logic (just name/value pairs)
|
|
257
|
+
nextAttrs = extractAttrInfo(nextTagMatch.attrs);
|
|
251
258
|
} else {
|
|
252
259
|
nextTagMatch = nextHtml.match(endTag);
|
|
253
260
|
if (nextTagMatch) {
|
|
254
261
|
nextTag = '/' + nextTagMatch[1];
|
|
262
|
+
nextAttrs = [];
|
|
255
263
|
} else {
|
|
256
264
|
nextTag = '';
|
|
265
|
+
nextAttrs = [];
|
|
257
266
|
}
|
|
258
267
|
}
|
|
259
268
|
|
|
260
269
|
if (handler.chars) {
|
|
261
|
-
await handler.chars(text, prevTag, nextTag);
|
|
270
|
+
await handler.chars(text, prevTag, nextTag, prevAttrs, nextAttrs);
|
|
262
271
|
}
|
|
263
272
|
prevTag = '';
|
|
273
|
+
prevAttrs = [];
|
|
264
274
|
} else {
|
|
265
275
|
const stackedTag = lastTag.toLowerCase();
|
|
266
276
|
// Use pre-compiled regex for common tags (`script`, `style`, `noscript`) to avoid regex creation overhead
|
|
@@ -283,7 +293,7 @@ export class HTMLParser {
|
|
|
283
293
|
} else {
|
|
284
294
|
// No closing tag found; to avoid infinite loop, break similarly to previous behavior
|
|
285
295
|
if (handler.continueOnParseError && handler.chars && html) {
|
|
286
|
-
await handler.chars(html[0], prevTag, '');
|
|
296
|
+
await handler.chars(html[0], prevTag, '', prevAttrs, []);
|
|
287
297
|
advance(1);
|
|
288
298
|
} else {
|
|
289
299
|
break;
|
|
@@ -295,10 +305,11 @@ export class HTMLParser {
|
|
|
295
305
|
if (handler.continueOnParseError) {
|
|
296
306
|
// Skip the problematic character and continue
|
|
297
307
|
if (handler.chars) {
|
|
298
|
-
await handler.chars(fullHtml[pos], prevTag, '');
|
|
308
|
+
await handler.chars(fullHtml[pos], prevTag, '', prevAttrs, []);
|
|
299
309
|
}
|
|
300
310
|
advance(1);
|
|
301
311
|
prevTag = '';
|
|
312
|
+
prevAttrs = [];
|
|
302
313
|
continue;
|
|
303
314
|
}
|
|
304
315
|
const loc = getLineColumn(pos);
|
|
@@ -317,6 +328,23 @@ export class HTMLParser {
|
|
|
317
328
|
await parseEndTag();
|
|
318
329
|
}
|
|
319
330
|
|
|
331
|
+
// Helper to extract minimal attribute info (name/value pairs) from raw attribute matches
|
|
332
|
+
// Used for whitespace collapsing logic—doesn’t need full processing
|
|
333
|
+
function extractAttrInfo(rawAttrs) {
|
|
334
|
+
if (!rawAttrs || !rawAttrs.length) return [];
|
|
335
|
+
|
|
336
|
+
const numCustomParts = handler.customAttrSurround ? handler.customAttrSurround.length * NCP : 0;
|
|
337
|
+
const baseIndex = 1 + numCustomParts;
|
|
338
|
+
|
|
339
|
+
return rawAttrs.map(args => {
|
|
340
|
+
// Extract attribute name (always at `baseIndex`)
|
|
341
|
+
const name = args[baseIndex];
|
|
342
|
+
// Extract value from double-quoted (`baseIndex + 2`), single-quoted (`baseIndex + 3`), or unquoted (`baseIndex + 4`)
|
|
343
|
+
const value = args[baseIndex + 2] ?? args[baseIndex + 3] ?? args[baseIndex + 4];
|
|
344
|
+
return { name: name?.toLowerCase(), value };
|
|
345
|
+
}).filter(attr => attr.name); // Filter out invalid entries
|
|
346
|
+
}
|
|
347
|
+
|
|
320
348
|
function parseStartTag(input) {
|
|
321
349
|
const start = input.match(startTagOpen);
|
|
322
350
|
if (start) {
|
|
@@ -329,7 +357,7 @@ export class HTMLParser {
|
|
|
329
357
|
input = input.slice(consumed);
|
|
330
358
|
let end, attr;
|
|
331
359
|
|
|
332
|
-
// Safety limit:
|
|
360
|
+
// Safety limit: Max length of input to check for attributes
|
|
333
361
|
// Protects against catastrophic backtracking on massive attribute values
|
|
334
362
|
const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
|
|
335
363
|
|
|
@@ -429,7 +457,7 @@ export class HTMLParser {
|
|
|
429
457
|
}
|
|
430
458
|
|
|
431
459
|
async function parseEndTagAt(pos) {
|
|
432
|
-
// Close all open elements up to pos (mirrors parseEndTag
|
|
460
|
+
// Close all open elements up to `pos` (mirrors `parseEndTag`’s core branch)
|
|
433
461
|
for (let i = stack.length - 1; i >= pos; i--) {
|
|
434
462
|
if (handler.end) {
|
|
435
463
|
await handler.end(stack[i].tag, stack[i].attrs, true);
|
|
@@ -497,7 +525,7 @@ export class HTMLParser {
|
|
|
497
525
|
const attrs = match.attrs.map(function (args) {
|
|
498
526
|
let name, value, customOpen, customClose, customAssign, quote;
|
|
499
527
|
|
|
500
|
-
// Hackish workaround for
|
|
528
|
+
// Hackish workaround for Firefox bug, https://bugzilla.mozilla.org/show_bug.cgi?id=369778
|
|
501
529
|
if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
|
|
502
530
|
if (args[3] === '') { delete args[3]; }
|
|
503
531
|
if (args[4] === '') { delete args[4]; }
|
|
@@ -554,6 +582,9 @@ export class HTMLParser {
|
|
|
554
582
|
unarySlash = '';
|
|
555
583
|
}
|
|
556
584
|
|
|
585
|
+
// Store attributes for `prevAttrs` tracking (used in whitespace collapsing)
|
|
586
|
+
prevAttrs = attrs;
|
|
587
|
+
|
|
557
588
|
if (handler.start) {
|
|
558
589
|
await handler.start(tagName, attrs, unary, unarySlash);
|
|
559
590
|
}
|
package/src/lib/attributes.js
CHANGED
|
@@ -15,7 +15,7 @@ import {
|
|
|
15
15
|
keepScriptsMimetypes,
|
|
16
16
|
isSimpleBoolean,
|
|
17
17
|
isBooleanValue,
|
|
18
|
-
|
|
18
|
+
srcsetElements,
|
|
19
19
|
reEmptyAttribute
|
|
20
20
|
} from './constants.js';
|
|
21
21
|
import { trimWhitespace, collapseWhitespaceAll } from './whitespace.js';
|
|
@@ -75,8 +75,7 @@ function isAttributeRedundant(tag, attrName, attrValue, attrs) {
|
|
|
75
75
|
const tagHasDefaults = tag in tagDefaults;
|
|
76
76
|
|
|
77
77
|
// Check for legacy attribute rules (element- and attribute-specific)
|
|
78
|
-
const isLegacyAttr = (tag === 'script' && (attrName === 'language' || attrName === 'charset')) ||
|
|
79
|
-
(tag === 'a' && attrName === 'name');
|
|
78
|
+
const isLegacyAttr = (tag === 'script' && (attrName === 'language' || attrName === 'charset')) || (tag === 'a' && attrName === 'name');
|
|
80
79
|
|
|
81
80
|
// If none of these conditions apply, attribute cannot be redundant
|
|
82
81
|
if (!hasGeneralDefault && !tagHasDefaults && !isLegacyAttr) {
|
|
@@ -134,7 +133,7 @@ function isStyleLinkTypeAttribute(attrValue = '') {
|
|
|
134
133
|
return attrValue === '' || attrValue === 'text/css';
|
|
135
134
|
}
|
|
136
135
|
|
|
137
|
-
function
|
|
136
|
+
function isStyleElement(tag, attrs) {
|
|
138
137
|
if (tag !== 'style') {
|
|
139
138
|
return false;
|
|
140
139
|
}
|
|
@@ -191,11 +190,11 @@ function isLinkType(tag, attrs, value) {
|
|
|
191
190
|
}
|
|
192
191
|
|
|
193
192
|
function isMediaQuery(tag, attrs, attrName) {
|
|
194
|
-
return attrName === 'media' && (isLinkType(tag, attrs, 'stylesheet') ||
|
|
193
|
+
return attrName === 'media' && (isLinkType(tag, attrs, 'stylesheet') || isStyleElement(tag, attrs));
|
|
195
194
|
}
|
|
196
195
|
|
|
197
196
|
function isSrcset(attrName, tag) {
|
|
198
|
-
return attrName === 'srcset' &&
|
|
197
|
+
return attrName === 'srcset' && srcsetElements.has(tag);
|
|
199
198
|
}
|
|
200
199
|
|
|
201
200
|
function isMetaViewport(tag, attrs) {
|
|
@@ -203,7 +202,7 @@ function isMetaViewport(tag, attrs) {
|
|
|
203
202
|
return false;
|
|
204
203
|
}
|
|
205
204
|
for (let i = 0, len = attrs.length; i < len; i++) {
|
|
206
|
-
if (attrs[i].name === 'name' && attrs[i].value === 'viewport') {
|
|
205
|
+
if (attrs[i].name.toLowerCase() === 'name' && attrs[i].value.toLowerCase() === 'viewport') {
|
|
207
206
|
return true;
|
|
208
207
|
}
|
|
209
208
|
}
|
|
@@ -223,7 +222,7 @@ function isContentSecurityPolicy(tag, attrs) {
|
|
|
223
222
|
}
|
|
224
223
|
|
|
225
224
|
function canDeleteEmptyAttribute(tag, attrName, attrValue, options) {
|
|
226
|
-
const isValueEmpty = !attrValue ||
|
|
225
|
+
const isValueEmpty = !attrValue || attrValue.trim() === '';
|
|
227
226
|
if (!isValueEmpty) {
|
|
228
227
|
return false;
|
|
229
228
|
}
|
|
@@ -246,7 +245,7 @@ function hasAttrName(name, attrs) {
|
|
|
246
245
|
|
|
247
246
|
async function cleanAttributeValue(tag, attrName, attrValue, options, attrs, minifyHTMLSelf) {
|
|
248
247
|
// Apply early whitespace normalization if enabled
|
|
249
|
-
// Preserves special spaces (
|
|
248
|
+
// Preserves special spaces (no-break space, hair space, etc.) for consistency with `collapseWhitespace`
|
|
250
249
|
if (options.collapseAttributeWhitespace) {
|
|
251
250
|
// Fast path: Only process if whitespace exists (avoids regex overhead on clean values)
|
|
252
251
|
if (RE_ATTR_WS_CHECK.test(attrValue)) {
|
|
@@ -302,7 +301,7 @@ async function cleanAttributeValue(tag, attrName, attrValue, options, attrs, min
|
|
|
302
301
|
try {
|
|
303
302
|
attrValue = await options.minifyCSS(attrValue, 'inline');
|
|
304
303
|
// After minification, check if CSS consists entirely of invalid properties (no values)
|
|
305
|
-
//
|
|
304
|
+
// I.e., `color:` or `margin:;padding:` should be treated as empty
|
|
306
305
|
if (attrValue && /^(?:[a-z-]+:\s*;?\s*)+$/i.test(attrValue)) {
|
|
307
306
|
attrValue = '';
|
|
308
307
|
}
|
|
@@ -422,13 +421,13 @@ async function normalizeAttr(attr, attrs, tag, options, minifyHTML) {
|
|
|
422
421
|
}
|
|
423
422
|
|
|
424
423
|
if ((options.removeRedundantAttributes &&
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
424
|
+
isAttributeRedundant(tag, attrName, attrValue, attrs)) ||
|
|
425
|
+
(options.removeScriptTypeAttributes && tag === 'script' &&
|
|
426
|
+
attrName === 'type' && isScriptTypeAttribute(attrValue) && !keepScriptTypeAttribute(attrValue)) ||
|
|
427
|
+
(options.removeStyleLinkTypeAttributes && (tag === 'style' || tag === 'link') &&
|
|
428
|
+
attrName === 'type' && isStyleLinkTypeAttribute(attrValue)) ||
|
|
429
|
+
(options.insideSVG && options.minifySVG &&
|
|
430
|
+
shouldRemoveSVGAttribute(tag, attrName, attrValue, options.minifySVG))) {
|
|
432
431
|
return;
|
|
433
432
|
}
|
|
434
433
|
|
|
@@ -437,7 +436,7 @@ async function normalizeAttr(attr, attrs, tag, options, minifyHTML) {
|
|
|
437
436
|
}
|
|
438
437
|
|
|
439
438
|
if (options.removeEmptyAttributes &&
|
|
440
|
-
|
|
439
|
+
canDeleteEmptyAttribute(tag, attrName, attrValue, options)) {
|
|
441
440
|
return;
|
|
442
441
|
}
|
|
443
442
|
|
|
@@ -460,19 +459,35 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
|
|
|
460
459
|
let attrFragment;
|
|
461
460
|
let emittedAttrValue;
|
|
462
461
|
|
|
463
|
-
|
|
464
|
-
|
|
462
|
+
// Determine if we need to add/keep quotes
|
|
463
|
+
const shouldAddQuotes = typeof attrValue !== 'undefined' && (
|
|
464
|
+
// If `removeAttributeQuotes` is enabled, add quotes only if they can’t be removed
|
|
465
|
+
(options.removeAttributeQuotes && (attrValue.indexOf(uidAttr) !== -1 || !canRemoveAttributeQuotes(attrValue))) ||
|
|
466
|
+
// If `removeAttributeQuotes` is not enabled, preserve original quote style or add quotes if value requires them
|
|
467
|
+
(!options.removeAttributeQuotes && (attrQuote !== '' || !canRemoveAttributeQuotes(attrValue) ||
|
|
468
|
+
// Special case: With `removeTagWhitespace`, unquoted values that aren’t last will have space added,
|
|
469
|
+
// which can create ambiguous/invalid HTML—add quotes to be safe
|
|
470
|
+
(options.removeTagWhitespace && attrQuote === '' && !isLast)))
|
|
471
|
+
);
|
|
472
|
+
|
|
473
|
+
if (shouldAddQuotes) {
|
|
465
474
|
// Determine the appropriate quote character
|
|
466
475
|
if (!options.preventAttributesEscaping) {
|
|
467
|
-
// Normal mode:
|
|
468
|
-
|
|
476
|
+
// Normal mode: Choose optimal quote type to minimize escaping
|
|
477
|
+
// unless we’re preserving original quotes and they don’t need escaping
|
|
478
|
+
const needsEscaping = (attrQuote === '"' && attrValue.indexOf('"') !== -1) || (attrQuote === "'" && attrValue.indexOf("'") !== -1);
|
|
479
|
+
|
|
480
|
+
if (options.removeAttributeQuotes || typeof options.quoteCharacter !== 'undefined' || needsEscaping || attrQuote === '') {
|
|
481
|
+
attrQuote = chooseAttributeQuote(attrValue, options);
|
|
482
|
+
}
|
|
483
|
+
|
|
469
484
|
if (attrQuote === '"') {
|
|
470
485
|
attrValue = attrValue.replace(/"/g, '"');
|
|
471
486
|
} else {
|
|
472
487
|
attrValue = attrValue.replace(/'/g, ''');
|
|
473
488
|
}
|
|
474
489
|
} else {
|
|
475
|
-
// `preventAttributesEscaping` mode:
|
|
490
|
+
// `preventAttributesEscaping` mode: Choose safe quotes but don't escape
|
|
476
491
|
// except when both quote types are present—then escape to prevent invalid HTML
|
|
477
492
|
const hasDoubleQuote = attrValue.indexOf('"') !== -1;
|
|
478
493
|
const hasSingleQuote = attrValue.indexOf("'") !== -1;
|
|
@@ -491,8 +506,18 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
|
|
|
491
506
|
attrQuote = "'";
|
|
492
507
|
} else if (attrQuote === "'" && hasSingleQuote && !hasDoubleQuote) {
|
|
493
508
|
attrQuote = '"';
|
|
494
|
-
//
|
|
495
|
-
} else if (attrQuote
|
|
509
|
+
// If no quote character yet (empty string), choose based on content
|
|
510
|
+
} else if (attrQuote === '') {
|
|
511
|
+
if (hasSingleQuote && !hasDoubleQuote) {
|
|
512
|
+
attrQuote = '"';
|
|
513
|
+
} else if (hasDoubleQuote && !hasSingleQuote) {
|
|
514
|
+
attrQuote = "'";
|
|
515
|
+
} else {
|
|
516
|
+
attrQuote = '"';
|
|
517
|
+
}
|
|
518
|
+
// Fallback for invalid/unsupported attrQuote values (not `"`, `'`, or empty string):
|
|
519
|
+
// Choose safe default based on value content
|
|
520
|
+
} else if (attrQuote !== '"' && attrQuote !== "'") {
|
|
496
521
|
if (hasSingleQuote && !hasDoubleQuote) {
|
|
497
522
|
attrQuote = '"';
|
|
498
523
|
} else if (hasDoubleQuote && !hasSingleQuote) {
|
|
@@ -502,7 +527,22 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
|
|
|
502
527
|
}
|
|
503
528
|
}
|
|
504
529
|
} else {
|
|
505
|
-
|
|
530
|
+
// `quoteCharacter` is explicitly set
|
|
531
|
+
const preferredQuote = options.quoteCharacter === '\'' ? '\'' : '"';
|
|
532
|
+
// Safety check: If the preferred quote conflicts with value content, switch to the opposite quote
|
|
533
|
+
if ((preferredQuote === '"' && hasDoubleQuote && !hasSingleQuote) || (preferredQuote === "'" && hasSingleQuote && !hasDoubleQuote)) {
|
|
534
|
+
attrQuote = preferredQuote === '"' ? "'" : '"';
|
|
535
|
+
} else if ((preferredQuote === '"' && hasDoubleQuote && hasSingleQuote) || (preferredQuote === "'" && hasSingleQuote && hasDoubleQuote)) {
|
|
536
|
+
// Both quote types present: Fall back to escaping despite `preventAttributesEscaping`
|
|
537
|
+
attrQuote = preferredQuote;
|
|
538
|
+
if (attrQuote === '"') {
|
|
539
|
+
attrValue = attrValue.replace(/"/g, '"');
|
|
540
|
+
} else {
|
|
541
|
+
attrValue = attrValue.replace(/'/g, ''');
|
|
542
|
+
}
|
|
543
|
+
} else {
|
|
544
|
+
attrQuote = preferredQuote;
|
|
545
|
+
}
|
|
506
546
|
}
|
|
507
547
|
}
|
|
508
548
|
emittedAttrValue = attrQuote + attrValue + attrQuote;
|
|
@@ -510,15 +550,17 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
|
|
|
510
550
|
emittedAttrValue += ' ';
|
|
511
551
|
}
|
|
512
552
|
} else if (isLast && !hasUnarySlash) {
|
|
513
|
-
// Last attribute in a non-self-closing tag:
|
|
553
|
+
// Last attribute in a non-self-closing tag:
|
|
554
|
+
// No space needed
|
|
514
555
|
emittedAttrValue = attrValue;
|
|
515
556
|
} else {
|
|
516
|
-
// Not last attribute, or is a self-closing tag:
|
|
557
|
+
// Not last attribute, or is a self-closing tag:
|
|
558
|
+
// Unquoted values must have space after them to delimit from next attribute
|
|
517
559
|
emittedAttrValue = attrValue + ' ';
|
|
518
560
|
}
|
|
519
561
|
|
|
520
562
|
if (typeof attrValue === 'undefined' || (options.collapseBooleanAttributes &&
|
|
521
|
-
|
|
563
|
+
isBooleanAttribute(attrName.toLowerCase(), (attrValue || '').toLowerCase()))) {
|
|
522
564
|
attrFragment = attrName;
|
|
523
565
|
if (!isLast) {
|
|
524
566
|
attrFragment += ' ';
|
|
@@ -544,7 +586,7 @@ export {
|
|
|
544
586
|
keepScriptTypeAttribute,
|
|
545
587
|
isExecutableScript,
|
|
546
588
|
isStyleLinkTypeAttribute,
|
|
547
|
-
|
|
589
|
+
isStyleElement,
|
|
548
590
|
isBooleanAttribute,
|
|
549
591
|
isUriTypeAttribute,
|
|
550
592
|
isNumberTypeAttribute,
|
package/src/lib/constants.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
//
|
|
1
|
+
// Regex patterns (to avoid repeated allocations in hot paths)
|
|
2
2
|
|
|
3
3
|
const RE_WS_START = /^[ \n\r\t\f]+/;
|
|
4
4
|
const RE_WS_END = /[ \n\r\t\f]+$/;
|
|
@@ -19,7 +19,7 @@ const RE_ATTR_WS_COLLAPSE = /[ \n\r\t\f]+/g;
|
|
|
19
19
|
const RE_ATTR_WS_TRIM = /^[ \n\r\t\f]+|[ \n\r\t\f]+$/g;
|
|
20
20
|
const RE_NUMERIC_VALUE = /-?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?/g;
|
|
21
21
|
|
|
22
|
-
// Inline element
|
|
22
|
+
// Inline element sets for whitespace handling
|
|
23
23
|
|
|
24
24
|
// Non-empty elements that will maintain whitespace around them
|
|
25
25
|
const inlineElementsToKeepWhitespaceAround = new Set(['a', 'abbr', 'acronym', 'b', 'bdi', 'bdo', 'big', 'button', 'cite', 'code', 'del', 'dfn', 'em', 'font', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'mark', 'math', 'meter', 'nobr', 'object', 'output', 'progress', 'q', 'rb', 'rp', 'rt', 'rtc', 'ruby', 's', 'samp', 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'svg', 'textarea', 'time', 'tt', 'u', 'var', 'wbr']);
|
|
@@ -30,6 +30,9 @@ const inlineElementsToKeepWhitespaceWithin = new Set(['a', 'abbr', 'acronym', 'b
|
|
|
30
30
|
// Elements that will always maintain whitespace around them
|
|
31
31
|
const inlineElementsToKeepWhitespace = new Set(['comment', 'img', 'input', 'wbr']);
|
|
32
32
|
|
|
33
|
+
// Form control elements (for conditional whitespace collapsing)
|
|
34
|
+
const formControlElements = new Set(['input', 'button', 'select', 'textarea', 'output', 'meter', 'progress']);
|
|
35
|
+
|
|
33
36
|
// Default attribute values
|
|
34
37
|
|
|
35
38
|
// Default attribute values (could apply to any element)
|
|
@@ -69,14 +72,17 @@ const tagDefaults = {
|
|
|
69
72
|
// Script MIME types
|
|
70
73
|
|
|
71
74
|
// https://mathiasbynens.be/demo/javascript-mime-type
|
|
72
|
-
// https://developer.mozilla.org/en/docs/Web/HTML/
|
|
75
|
+
// https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/script
|
|
73
76
|
const executableScriptsMimetypes = new Set([
|
|
74
77
|
'text/javascript',
|
|
78
|
+
'text/x-javascript',
|
|
75
79
|
'text/ecmascript',
|
|
80
|
+
'text/x-ecmascript',
|
|
76
81
|
'text/jscript',
|
|
77
82
|
'application/javascript',
|
|
78
83
|
'application/x-javascript',
|
|
79
84
|
'application/ecmascript',
|
|
85
|
+
'application/x-ecmascript',
|
|
80
86
|
'module'
|
|
81
87
|
]);
|
|
82
88
|
|
|
@@ -84,15 +90,15 @@ const keepScriptsMimetypes = new Set([
|
|
|
84
90
|
'module'
|
|
85
91
|
]);
|
|
86
92
|
|
|
87
|
-
// Boolean attribute
|
|
93
|
+
// Boolean attribute sets
|
|
88
94
|
|
|
89
95
|
const isSimpleBoolean = new Set(['allowfullscreen', 'async', 'autofocus', 'autoplay', 'checked', 'compact', 'controls', 'declare', 'default', 'defaultchecked', 'defaultmuted', 'defaultselected', 'defer', 'disabled', 'enabled', 'formnovalidate', 'hidden', 'indeterminate', 'inert', 'ismap', 'itemscope', 'loop', 'multiple', 'muted', 'nohref', 'noresize', 'noshade', 'novalidate', 'nowrap', 'open', 'pauseonexit', 'readonly', 'required', 'reversed', 'scoped', 'seamless', 'selected', 'sortable', 'truespeed', 'typemustmatch', 'visible']);
|
|
90
96
|
|
|
91
97
|
const isBooleanValue = new Set(['true', 'false']);
|
|
92
98
|
|
|
93
|
-
// `srcset`
|
|
99
|
+
// `srcset` elements
|
|
94
100
|
|
|
95
|
-
const
|
|
101
|
+
const srcsetElements = new Set(['img', 'source']);
|
|
96
102
|
|
|
97
103
|
// JSON script types
|
|
98
104
|
|
|
@@ -108,7 +114,7 @@ const jsonScriptTypes = new Set([
|
|
|
108
114
|
'speculationrules',
|
|
109
115
|
]);
|
|
110
116
|
|
|
111
|
-
// Tag omission rules and element
|
|
117
|
+
// Tag omission rules and element sets
|
|
112
118
|
|
|
113
119
|
// Tag omission rules from https://html.spec.whatwg.org/multipage/syntax.html#optional-tags with the following extensions:
|
|
114
120
|
// - retain `<body>` if followed by `<noscript>`
|
|
@@ -119,35 +125,35 @@ const optionalStartTags = new Set(['html', 'head', 'body', 'colgroup', 'tbody'])
|
|
|
119
125
|
|
|
120
126
|
const optionalEndTags = new Set(['html', 'head', 'body', 'li', 'dt', 'dd', 'p', 'rb', 'rt', 'rtc', 'rp', 'optgroup', 'option', 'colgroup', 'caption', 'thead', 'tbody', 'tfoot', 'tr', 'td', 'th']);
|
|
121
127
|
|
|
122
|
-
const
|
|
128
|
+
const headerElements = new Set(['meta', 'link', 'script', 'style', 'template', 'noscript']);
|
|
123
129
|
|
|
124
|
-
const
|
|
130
|
+
const descriptionElements = new Set(['dt', 'dd']);
|
|
125
131
|
|
|
126
|
-
const
|
|
132
|
+
const pBlockElements = new Set(['address', 'article', 'aside', 'blockquote', 'details', 'dialog', 'div', 'dl', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol', 'p', 'pre', 'search', 'section', 'table', 'ul']);
|
|
127
133
|
|
|
128
|
-
const
|
|
134
|
+
const pInlineElements = new Set(['a', 'audio', 'del', 'ins', 'map', 'noscript', 'video']);
|
|
129
135
|
|
|
130
136
|
const rubyEndTagOmission = new Set(['rb', 'rt', 'rtc', 'rp']); // `</rb>`, `</rt>`, `</rp>` can be omitted if followed by `<rb>`, `<rt>`, `<rtc>`, or `<rp>`
|
|
131
137
|
|
|
132
138
|
const rubyRtcEndTagOmission = new Set(['rb', 'rtc']); // `</rtc>` can be omitted if followed by `<rb>` or `<rtc>` (not `<rt>` or `<rp>`)
|
|
133
139
|
|
|
134
|
-
const
|
|
140
|
+
const optionElements = new Set(['option', 'optgroup']);
|
|
135
141
|
|
|
136
|
-
const
|
|
142
|
+
const tableContentElements = new Set(['tbody', 'tfoot']);
|
|
137
143
|
|
|
138
|
-
const
|
|
144
|
+
const tableSectionElements = new Set(['thead', 'tbody', 'tfoot']);
|
|
139
145
|
|
|
140
|
-
const
|
|
146
|
+
const cellElements = new Set(['td', 'th']);
|
|
141
147
|
|
|
142
|
-
const
|
|
148
|
+
const topLevelElements = new Set(['html', 'head', 'body']);
|
|
143
149
|
|
|
144
|
-
const
|
|
150
|
+
const compactElements = new Set(['html', 'body']);
|
|
145
151
|
|
|
146
|
-
const
|
|
152
|
+
const looseElements = new Set(['head', 'colgroup', 'caption']);
|
|
147
153
|
|
|
148
|
-
const
|
|
154
|
+
const trailingElements = new Set(['dt', 'thead']);
|
|
149
155
|
|
|
150
|
-
const
|
|
156
|
+
const htmlElements = new Set(['a', 'abbr', 'acronym', 'address', 'applet', 'area', 'article', 'aside', 'audio', 'b', 'base', 'basefont', 'bdi', 'bdo', 'bgsound', 'big', 'blink', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'command', 'content', 'data', 'datalist', 'dd', 'del', 'details', 'dfn', 'dialog', 'dir', 'div', 'dl', 'dt', 'element', 'em', 'embed', 'fieldset', 'figcaption', 'figure', 'font', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'i', 'iframe', 'image', 'img', 'input', 'ins', 'isindex', 'kbd', 'keygen', 'label', 'legend', 'li', 'link', 'listing', 'main', 'map', 'mark', 'marquee', 'menu', 'menuitem', 'meta', 'meter', 'multicol', 'nav', 'nobr', 'noembed', 'noframes', 'noscript', 'object', 'ol', 'optgroup', 'option', 'output', 'p', 'param', 'picture', 'plaintext', 'pre', 'progress', 'q', 'rb', 'rp', 'rt', 'rtc', 'ruby', 's', 'samp', 'script', 'search', 'section', 'select', 'selectedcontent', 'shadow', 'small', 'source', 'spacer', 'span', 'strike', 'strong', 'style', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'template', 'textarea', 'tfoot', 'th', 'thead', 'time', 'title', 'tr', 'track', 'tt', 'u', 'ul', 'var', 'video', 'wbr', 'xmp']);
|
|
151
157
|
|
|
152
158
|
// Empty attribute regex
|
|
153
159
|
|
|
@@ -157,12 +163,12 @@ const reEmptyAttribute = new RegExp(
|
|
|
157
163
|
|
|
158
164
|
// Special content elements
|
|
159
165
|
|
|
160
|
-
const
|
|
166
|
+
const specialContentElements = new Set(['script', 'style']);
|
|
161
167
|
|
|
162
168
|
// Exports
|
|
163
169
|
|
|
164
170
|
export {
|
|
165
|
-
//
|
|
171
|
+
// Regex patterns
|
|
166
172
|
RE_WS_START,
|
|
167
173
|
RE_WS_END,
|
|
168
174
|
RE_ALL_WS_NBSP,
|
|
@@ -182,10 +188,11 @@ export {
|
|
|
182
188
|
RE_ATTR_WS_TRIM,
|
|
183
189
|
RE_NUMERIC_VALUE,
|
|
184
190
|
|
|
185
|
-
// Inline element
|
|
191
|
+
// Inline element sets
|
|
186
192
|
inlineElementsToKeepWhitespaceAround,
|
|
187
193
|
inlineElementsToKeepWhitespaceWithin,
|
|
188
194
|
inlineElementsToKeepWhitespace,
|
|
195
|
+
formControlElements,
|
|
189
196
|
|
|
190
197
|
// Default values
|
|
191
198
|
generalDefaults,
|
|
@@ -196,33 +203,33 @@ export {
|
|
|
196
203
|
keepScriptsMimetypes,
|
|
197
204
|
jsonScriptTypes,
|
|
198
205
|
|
|
199
|
-
// Boolean
|
|
206
|
+
// Boolean sets
|
|
200
207
|
isSimpleBoolean,
|
|
201
208
|
isBooleanValue,
|
|
202
209
|
|
|
203
210
|
// Misc
|
|
204
|
-
|
|
211
|
+
srcsetElements,
|
|
205
212
|
|
|
206
213
|
// Tag omission rules
|
|
207
214
|
optionalStartTags,
|
|
208
215
|
optionalEndTags,
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
216
|
+
headerElements,
|
|
217
|
+
descriptionElements,
|
|
218
|
+
pBlockElements,
|
|
219
|
+
pInlineElements,
|
|
213
220
|
rubyEndTagOmission,
|
|
214
221
|
rubyRtcEndTagOmission,
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
222
|
+
optionElements,
|
|
223
|
+
tableContentElements,
|
|
224
|
+
tableSectionElements,
|
|
225
|
+
cellElements,
|
|
226
|
+
topLevelElements,
|
|
227
|
+
compactElements,
|
|
228
|
+
looseElements,
|
|
229
|
+
trailingElements,
|
|
230
|
+
htmlElements,
|
|
224
231
|
|
|
225
232
|
// Regex
|
|
226
233
|
reEmptyAttribute,
|
|
227
|
-
|
|
234
|
+
specialContentElements
|
|
228
235
|
};
|
package/src/lib/content.js
CHANGED