html-minifier-next 4.17.2 → 4.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +93 -21
- package/cli.js +3 -0
- package/dist/htmlminifier.cjs +344 -99
- package/dist/htmlminifier.esm.bundle.js +344 -99
- package/dist/types/htmlminifier.d.ts +28 -0
- package/dist/types/htmlminifier.d.ts.map +1 -1
- package/dist/types/htmlparser.d.ts.map +1 -1
- package/dist/types/lib/attributes.d.ts.map +1 -1
- package/dist/types/lib/constants.d.ts +1 -0
- package/dist/types/lib/constants.d.ts.map +1 -1
- package/dist/types/lib/options.d.ts +1 -2
- package/dist/types/lib/options.d.ts.map +1 -1
- package/dist/types/lib/svg.d.ts.map +1 -1
- package/dist/types/presets.d.ts +1 -0
- package/dist/types/presets.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/htmlminifier.js +206 -12
- package/src/htmlparser.js +111 -63
- package/src/lib/attributes.js +4 -1
- package/src/lib/constants.js +6 -0
- package/src/lib/options.js +8 -14
- package/src/lib/svg.js +15 -8
- package/src/presets.js +1 -0
package/src/htmlparser.js
CHANGED
|
@@ -36,16 +36,16 @@ const singleAttrValues = [
|
|
|
36
36
|
// https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-QName
|
|
37
37
|
const qnameCapture = (function () {
|
|
38
38
|
// https://www.npmjs.com/package/ncname
|
|
39
|
-
const combiningChar = '
|
|
40
|
-
const digit = '0-9
|
|
41
|
-
const extender = '
|
|
42
|
-
const letter = 'A-Za-z
|
|
39
|
+
const combiningChar = '\u0300-\u0345\u0360\u0361\u0483-\u0486\u0591-\u05A1\u05A3-\u05B9\u05BB-\u05BD\u05BF\u05C1\u05C2\u05C4\u064B-\u0652\u0670\u06D6-\u06E4\u06E7\u06E8\u06EA-\u06ED\u0901-\u0903\u093C\u093E-\u094D\u0951-\u0954\u0962\u0963\u0981-\u0983\u09BC\u09BE-\u09C4\u09C7\u09C8\u09CB-\u09CD\u09D7\u09E2\u09E3\u0A02\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A70\u0A71\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0B01-\u0B03\u0B3C\u0B3E-\u0B43\u0B47\u0B48\u0B4B-\u0B4D\u0B56\u0B57\u0B82\u0B83\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C01-\u0C03\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C82\u0C83\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0D02\u0D03\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB\u0EBC\u0EC8-\u0ECD\u0F18\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86-\u0F8B\u0F90-\u0F95\u0F97\u0F99-\u0FAD\u0FB1-\u0FB7\u0FB9\u20D0-\u20DC\u20E1\u302A-\u302F\u3099\u309A';
|
|
40
|
+
const digit = '0-9\u0660-\u0669\u06F0-\u06F9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE7-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29';
|
|
41
|
+
const extender = '\xB7\u02D0\u02D1\u0387\u0640\u0E46\u0EC6\u3005\u3031-\u3035\u309D\u309E\u30FC-\u30FE';
|
|
42
|
+
const letter = 'A-Za-z\xC0-\xD6\xD8-\xF6\xF8-\u0131\u0134-\u013E\u0141-\u0148\u014A-\u017E\u0180-\u01C3\u01CD-\u01F0\u01F4\u01F5\u01FA-\u0217\u0250-\u02A8\u02BB-\u02C1\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2-\u03F3\u0401-\u040C\u040E-\u044F\u0451-\u045C\u045E-\u0481\u0490-\u04C4\u04C7\u04C8\u04CB\u04CC\u04D0-\u04EB\u04EE-\u04F5\u04F8\u04F9\u0531-\u0556\u0559\u0561-\u0586\u05D0-\u05EA\u05F0-\u05F2\u0621-\u063A\u0641-\u064A\u0671-\u06B7\u06BA-\u06BE\u06C0-\u06CE\u06D0-\u06D3\u06D5\u06E5\u06E6\u0905-\u0939\u093D\u0958-\u0961\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09DC\u09DD\u09DF-\u09E1\u09F0\u09F1\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8B\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABD\u0AE0\u0B05-\u0B0C\u0B0F\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32\u0B33\u0B36-\u0B39\u0B3D\u0B5C\u0B5D\u0B5F-\u0B61\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C60\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CDE\u0CE0\u0CE1\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D60\u0D61\u0E01-\u0E2E\u0E30\u0E32\u0E33\u0E40-\u0E45\u0E81\u0E82\u0E84\u0E87\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA\u0EAB\u0EAD\u0EAE\u0EB0\u0EB2\u0EB3\u0EBD\u0EC0-\u0EC4\u0F40-\u0F47\u0F49-\u0F69\u10A0-\u10C5\u10D0-\u10F6\u1100\u1102\u1103\u1105-\u1107\u1109\u110B\u110C\u110E-\u1112\u113C\u113E\u1140\u114C\u114E\u1150\u1154\u1155\u1159\u115F-\u1161\u1163\u1165\u1167\u1169\u116D\u116E\u1172\u1173\u1175\u119E\u11A8\u11AB\u11AE\u11AF\u11B7\u11B8\u11BA\u11BC-\u11C2\u11EB\u11F0\u11F9\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u212A\u212B\u212E\u2180-\u2182\u3007\u3021-\u3029\u3041-\u3094\u30A1-\u30FA\u3105-\u312C\u4E00-\u9FA5\uAC00-\uD7A3';
|
|
43
43
|
const ncname = '[' + letter + '_][' + letter + digit + '\\.\\-_' + combiningChar + extender + ']*';
|
|
44
44
|
return '((?:' + ncname + '\\:)?' + ncname + ')';
|
|
45
45
|
})();
|
|
46
46
|
const startTagOpen = new RegExp('^<' + qnameCapture);
|
|
47
47
|
const startTagClose = /^\s*(\/?)>/;
|
|
48
|
-
export const endTag = new RegExp('
|
|
48
|
+
export const endTag = new RegExp('^</' + qnameCapture + '[^>]*>');
|
|
49
49
|
const doctype = /^<!DOCTYPE\s?[^>]+>/i;
|
|
50
50
|
|
|
51
51
|
let IS_REGEX_CAPTURING_BROKEN = false;
|
|
@@ -144,9 +144,6 @@ export class HTMLParser {
|
|
|
144
144
|
let pos = 0;
|
|
145
145
|
let lastPos;
|
|
146
146
|
|
|
147
|
-
// Helper to get remaining HTML from current position
|
|
148
|
-
const remaining = () => fullHtml.slice(pos);
|
|
149
|
-
|
|
150
147
|
// Helper to advance position
|
|
151
148
|
const advance = (n) => { pos += n; };
|
|
152
149
|
|
|
@@ -165,22 +162,32 @@ export class HTMLParser {
|
|
|
165
162
|
return { line, column };
|
|
166
163
|
};
|
|
167
164
|
|
|
165
|
+
// Helper to safely extract substring when needed for regex operations
|
|
166
|
+
const sliceFromPos = (startPos, len) => {
|
|
167
|
+
const endPos = len !== undefined ? startPos + len : fullLength;
|
|
168
|
+
return fullHtml.slice(startPos, endPos);
|
|
169
|
+
};
|
|
170
|
+
|
|
168
171
|
while (pos < fullLength) {
|
|
169
172
|
lastPos = pos;
|
|
170
|
-
|
|
173
|
+
|
|
171
174
|
// Make sure we’re not in a `script` or `style` element
|
|
172
175
|
if (!lastTag || !special.has(lastTag)) {
|
|
173
|
-
|
|
174
|
-
|
|
176
|
+
const textEnd = fullHtml.indexOf('<', pos);
|
|
177
|
+
|
|
178
|
+
if (textEnd === pos) {
|
|
179
|
+
// We found a tag at current position
|
|
180
|
+
const remaining = sliceFromPos(pos);
|
|
181
|
+
|
|
175
182
|
// Comment
|
|
176
|
-
if (/^<!--/.test(
|
|
177
|
-
const commentEnd =
|
|
183
|
+
if (/^<!--/.test(remaining)) {
|
|
184
|
+
const commentEnd = fullHtml.indexOf('-->', pos + 4);
|
|
178
185
|
|
|
179
186
|
if (commentEnd >= 0) {
|
|
180
187
|
if (handler.comment) {
|
|
181
|
-
await handler.comment(
|
|
188
|
+
await handler.comment(fullHtml.substring(pos + 4, commentEnd));
|
|
182
189
|
}
|
|
183
|
-
advance(commentEnd + 3);
|
|
190
|
+
advance(commentEnd + 3 - pos);
|
|
184
191
|
prevTag = '';
|
|
185
192
|
prevAttrs = [];
|
|
186
193
|
continue;
|
|
@@ -188,14 +195,14 @@ export class HTMLParser {
|
|
|
188
195
|
}
|
|
189
196
|
|
|
190
197
|
// https://web.archive.org/web/20241201212701/https://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment
|
|
191
|
-
if (/^<!\[/.test(
|
|
192
|
-
const conditionalEnd =
|
|
198
|
+
if (/^<!\[/.test(remaining)) {
|
|
199
|
+
const conditionalEnd = fullHtml.indexOf(']>', pos + 3);
|
|
193
200
|
|
|
194
201
|
if (conditionalEnd >= 0) {
|
|
195
202
|
if (handler.comment) {
|
|
196
|
-
await handler.comment(
|
|
203
|
+
await handler.comment(fullHtml.substring(pos + 2, conditionalEnd + 1), true /* Non-standard */);
|
|
197
204
|
}
|
|
198
|
-
advance(conditionalEnd + 2);
|
|
205
|
+
advance(conditionalEnd + 2 - pos);
|
|
199
206
|
prevTag = '';
|
|
200
207
|
prevAttrs = [];
|
|
201
208
|
continue;
|
|
@@ -203,8 +210,8 @@ export class HTMLParser {
|
|
|
203
210
|
}
|
|
204
211
|
|
|
205
212
|
// Doctype
|
|
206
|
-
|
|
207
|
-
|
|
213
|
+
if (doctype.test(remaining)) {
|
|
214
|
+
const doctypeMatch = remaining.match(doctype);
|
|
208
215
|
if (handler.doctype) {
|
|
209
216
|
handler.doctype(doctypeMatch[0]);
|
|
210
217
|
}
|
|
@@ -215,8 +222,8 @@ export class HTMLParser {
|
|
|
215
222
|
}
|
|
216
223
|
|
|
217
224
|
// End tag
|
|
218
|
-
|
|
219
|
-
|
|
225
|
+
if (endTag.test(remaining)) {
|
|
226
|
+
const endTagMatch = remaining.match(endTag);
|
|
220
227
|
advance(endTagMatch[0].length);
|
|
221
228
|
await parseEndTag(endTagMatch[0], endTagMatch[1]);
|
|
222
229
|
prevTag = '/' + endTagMatch[1].toLowerCase();
|
|
@@ -225,7 +232,7 @@ export class HTMLParser {
|
|
|
225
232
|
}
|
|
226
233
|
|
|
227
234
|
// Start tag
|
|
228
|
-
const startTagMatch = parseStartTag(
|
|
235
|
+
const startTagMatch = parseStartTag(remaining, pos);
|
|
229
236
|
if (startTagMatch) {
|
|
230
237
|
advance(startTagMatch.advance);
|
|
231
238
|
await handleStartTag(startTagMatch);
|
|
@@ -235,30 +242,30 @@ export class HTMLParser {
|
|
|
235
242
|
|
|
236
243
|
// Treat `<` as text
|
|
237
244
|
if (handler.continueOnParseError) {
|
|
238
|
-
|
|
245
|
+
// Continue looking for next tag
|
|
239
246
|
}
|
|
240
247
|
}
|
|
241
248
|
|
|
242
249
|
let text;
|
|
243
250
|
if (textEnd >= 0) {
|
|
244
|
-
text =
|
|
245
|
-
advance(textEnd);
|
|
251
|
+
text = fullHtml.substring(pos, textEnd);
|
|
252
|
+
advance(textEnd - pos);
|
|
246
253
|
} else {
|
|
247
|
-
text =
|
|
248
|
-
advance(
|
|
254
|
+
text = fullHtml.substring(pos);
|
|
255
|
+
advance(fullLength - pos);
|
|
249
256
|
}
|
|
250
257
|
|
|
251
|
-
// Next tag
|
|
252
|
-
const
|
|
253
|
-
let nextTagMatch = parseStartTag(
|
|
258
|
+
// Next tag for whitespace processing context
|
|
259
|
+
const remainingAfterText = sliceFromPos(pos);
|
|
260
|
+
let nextTagMatch = parseStartTag(remainingAfterText, pos);
|
|
254
261
|
if (nextTagMatch) {
|
|
255
262
|
nextTag = nextTagMatch.tagName;
|
|
256
263
|
// Extract minimal attribute info for whitespace logic (just name/value pairs)
|
|
257
264
|
nextAttrs = extractAttrInfo(nextTagMatch.attrs);
|
|
258
265
|
} else {
|
|
259
|
-
|
|
260
|
-
if (
|
|
261
|
-
nextTag = '/' +
|
|
266
|
+
const endTagMatch = remainingAfterText.match(endTag);
|
|
267
|
+
if (endTagMatch) {
|
|
268
|
+
nextTag = '/' + endTagMatch[1];
|
|
262
269
|
nextAttrs = [];
|
|
263
270
|
} else {
|
|
264
271
|
nextTag = '';
|
|
@@ -274,10 +281,11 @@ export class HTMLParser {
|
|
|
274
281
|
} else {
|
|
275
282
|
const stackedTag = lastTag.toLowerCase();
|
|
276
283
|
// Use pre-compiled regex for common tags (`script`, `style`, `noscript`) to avoid regex creation overhead
|
|
277
|
-
const reStackedTag = preCompiledStackedTags[stackedTag] || reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)
|
|
284
|
+
const reStackedTag = preCompiledStackedTags[stackedTag] || reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)\\x3c/' + stackedTag + '[^>]*>', 'i'));
|
|
278
285
|
|
|
279
|
-
const
|
|
280
|
-
|
|
286
|
+
const remaining = sliceFromPos(pos);
|
|
287
|
+
const m = reStackedTag.exec(remaining);
|
|
288
|
+
if (m && m.index === 0) {
|
|
281
289
|
let text = m[1];
|
|
282
290
|
if (stackedTag !== 'script' && stackedTag !== 'style' && stackedTag !== 'noscript') {
|
|
283
291
|
text = text
|
|
@@ -288,12 +296,12 @@ export class HTMLParser {
|
|
|
288
296
|
await handler.chars(text);
|
|
289
297
|
}
|
|
290
298
|
// Advance HTML past the matched special tag content and its closing tag
|
|
291
|
-
advance(m
|
|
299
|
+
advance(m[0].length);
|
|
292
300
|
await parseEndTag('</' + stackedTag + '>', stackedTag);
|
|
293
301
|
} else {
|
|
294
302
|
// No closing tag found; to avoid infinite loop, break similarly to previous behavior
|
|
295
|
-
if (handler.continueOnParseError && handler.chars &&
|
|
296
|
-
await handler.chars(
|
|
303
|
+
if (handler.continueOnParseError && handler.chars && pos < fullLength) {
|
|
304
|
+
await handler.chars(fullHtml[pos], prevTag, '', prevAttrs, []);
|
|
297
305
|
advance(1);
|
|
298
306
|
} else {
|
|
299
307
|
break;
|
|
@@ -313,7 +321,7 @@ export class HTMLParser {
|
|
|
313
321
|
continue;
|
|
314
322
|
}
|
|
315
323
|
const loc = getLineColumn(pos);
|
|
316
|
-
// Include some context before the error position so the snippet contains the offending markup plus preceding characters (e.g.,
|
|
324
|
+
// Include some context before the error position so the snippet contains the offending markup plus preceding characters (e.g., `invalid<tag`)
|
|
317
325
|
const CONTEXT_BEFORE = 50;
|
|
318
326
|
const startPos = Math.max(0, pos - CONTEXT_BEFORE);
|
|
319
327
|
const snippet = fullHtml.slice(startPos, startPos + 200).replace(/\n/g, ' ');
|
|
@@ -345,8 +353,8 @@ export class HTMLParser {
|
|
|
345
353
|
}).filter(attr => attr.name); // Filter out invalid entries
|
|
346
354
|
}
|
|
347
355
|
|
|
348
|
-
function parseStartTag(
|
|
349
|
-
const start =
|
|
356
|
+
function parseStartTag(remaining, startPos) {
|
|
357
|
+
const start = remaining.match(startTagOpen);
|
|
350
358
|
if (start) {
|
|
351
359
|
const match = {
|
|
352
360
|
tagName: start[1],
|
|
@@ -354,7 +362,7 @@ export class HTMLParser {
|
|
|
354
362
|
advance: 0
|
|
355
363
|
};
|
|
356
364
|
let consumed = start[0].length;
|
|
357
|
-
|
|
365
|
+
let currentPos = startPos + consumed;
|
|
358
366
|
let end, attr;
|
|
359
367
|
|
|
360
368
|
// Safety limit: Max length of input to check for attributes
|
|
@@ -363,16 +371,20 @@ export class HTMLParser {
|
|
|
363
371
|
|
|
364
372
|
while (true) {
|
|
365
373
|
// Check for closing tag first
|
|
366
|
-
|
|
374
|
+
const remainingForEnd = sliceFromPos(currentPos);
|
|
375
|
+
end = remainingForEnd.match(startTagClose);
|
|
367
376
|
if (end) {
|
|
368
377
|
break;
|
|
369
378
|
}
|
|
370
379
|
|
|
371
380
|
// Limit the input length we pass to the regex to prevent catastrophic backtracking
|
|
372
|
-
const
|
|
373
|
-
const
|
|
381
|
+
const remainingLen = fullLength - currentPos;
|
|
382
|
+
const isLimited = remainingLen > MAX_ATTR_PARSE_LENGTH;
|
|
383
|
+
const extractEndPos = isLimited ? currentPos + MAX_ATTR_PARSE_LENGTH : fullLength;
|
|
374
384
|
|
|
375
|
-
|
|
385
|
+
// Create a temporary substring only for attribute parsing (this is limited and necessary for regex)
|
|
386
|
+
const searchStr = fullHtml.substring(currentPos, extractEndPos);
|
|
387
|
+
attr = searchStr.match(attribute);
|
|
376
388
|
|
|
377
389
|
// If we limited the input and got a match, check if the value might be truncated
|
|
378
390
|
if (attr && isLimited) {
|
|
@@ -381,32 +393,31 @@ export class HTMLParser {
|
|
|
381
393
|
// If the match ends near the limit, the value might be truncated
|
|
382
394
|
if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
|
|
383
395
|
// Manually extract this attribute to handle potentially huge value
|
|
384
|
-
const manualMatch =
|
|
396
|
+
const manualMatch = searchStr.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
|
|
385
397
|
if (manualMatch) {
|
|
386
|
-
const quoteChar =
|
|
398
|
+
const quoteChar = searchStr[manualMatch[0].length];
|
|
387
399
|
if (quoteChar === '"' || quoteChar === "'") {
|
|
388
|
-
const closeQuote =
|
|
400
|
+
const closeQuote = searchStr.indexOf(quoteChar, manualMatch[0].length + 1);
|
|
389
401
|
if (closeQuote !== -1) {
|
|
390
|
-
const
|
|
402
|
+
const fullAttrLen = closeQuote + 1;
|
|
391
403
|
const numCustomParts = handler.customAttrSurround
|
|
392
404
|
? handler.customAttrSurround.length * NCP
|
|
393
405
|
: 0;
|
|
394
406
|
const baseIndex = 1 + numCustomParts;
|
|
395
407
|
|
|
396
408
|
attr = [];
|
|
397
|
-
attr[0] =
|
|
409
|
+
attr[0] = searchStr.substring(0, fullAttrLen);
|
|
398
410
|
attr[baseIndex] = manualMatch[1]; // Attribute name
|
|
399
|
-
attr[baseIndex + 1] = '='; // `customAssign` (falls back to
|
|
400
|
-
const value =
|
|
411
|
+
attr[baseIndex + 1] = '='; // `customAssign` (falls back to "=" for huge attributes)
|
|
412
|
+
const value = searchStr.substring(manualMatch[0].length + 1, closeQuote);
|
|
401
413
|
// Place value at correct index based on quote type
|
|
402
414
|
if (quoteChar === '"') {
|
|
403
415
|
attr[baseIndex + 2] = value; // Double-quoted value
|
|
404
416
|
} else {
|
|
405
417
|
attr[baseIndex + 3] = value; // Single-quoted value
|
|
406
418
|
}
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
consumed += attrLen;
|
|
419
|
+
currentPos += fullAttrLen;
|
|
420
|
+
consumed += fullAttrLen;
|
|
410
421
|
match.attrs.push(attr);
|
|
411
422
|
continue;
|
|
412
423
|
}
|
|
@@ -419,18 +430,55 @@ export class HTMLParser {
|
|
|
419
430
|
}
|
|
420
431
|
}
|
|
421
432
|
|
|
433
|
+
if (!attr && isLimited) {
|
|
434
|
+
// If we limited the input and got no match, try manual extraction
|
|
435
|
+
// This handles cases where quoted attributes exceed `MAX_ATTR_PARSE_LENGTH`
|
|
436
|
+
const manualMatch = searchStr.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
|
|
437
|
+
if (manualMatch) {
|
|
438
|
+
const quoteChar = searchStr[manualMatch[0].length];
|
|
439
|
+
if (quoteChar === '"' || quoteChar === "'") {
|
|
440
|
+
// Search in the full HTML (not limited substring) for closing quote
|
|
441
|
+
const closeQuote = fullHtml.indexOf(quoteChar, currentPos + manualMatch[0].length + 1);
|
|
442
|
+
if (closeQuote !== -1) {
|
|
443
|
+
const fullAttrLen = closeQuote - currentPos + 1;
|
|
444
|
+
const numCustomParts = handler.customAttrSurround
|
|
445
|
+
? handler.customAttrSurround.length * NCP
|
|
446
|
+
: 0;
|
|
447
|
+
const baseIndex = 1 + numCustomParts;
|
|
448
|
+
|
|
449
|
+
attr = [];
|
|
450
|
+
attr[0] = fullHtml.substring(currentPos, closeQuote + 1);
|
|
451
|
+
attr[baseIndex] = manualMatch[1]; // Attribute name
|
|
452
|
+
attr[baseIndex + 1] = '='; // customAssign
|
|
453
|
+
const value = fullHtml.substring(currentPos + manualMatch[0].length + 1, closeQuote);
|
|
454
|
+
// Place value at correct index based on quote type
|
|
455
|
+
if (quoteChar === '"') {
|
|
456
|
+
attr[baseIndex + 2] = value; // Double-quoted value
|
|
457
|
+
} else {
|
|
458
|
+
attr[baseIndex + 3] = value; // Single-quoted value
|
|
459
|
+
}
|
|
460
|
+
currentPos += fullAttrLen;
|
|
461
|
+
consumed += fullAttrLen;
|
|
462
|
+
match.attrs.push(attr);
|
|
463
|
+
continue;
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
|
|
422
469
|
if (!attr) {
|
|
423
470
|
break;
|
|
424
471
|
}
|
|
425
472
|
|
|
426
473
|
const attrLen = attr[0].length;
|
|
427
|
-
|
|
474
|
+
currentPos += attrLen;
|
|
428
475
|
consumed += attrLen;
|
|
429
476
|
match.attrs.push(attr);
|
|
430
477
|
}
|
|
431
478
|
|
|
432
479
|
// Check for closing tag
|
|
433
|
-
|
|
480
|
+
const remainingForClose = sliceFromPos(currentPos);
|
|
481
|
+
end = remainingForClose.match(startTagClose);
|
|
434
482
|
if (end) {
|
|
435
483
|
match.unarySlash = end[1];
|
|
436
484
|
consumed += end[0].length;
|
|
@@ -627,11 +675,11 @@ export class HTMLParser {
|
|
|
627
675
|
if (handler.end) {
|
|
628
676
|
handler.end(tagName, [], false);
|
|
629
677
|
}
|
|
630
|
-
} else if (tagName.toLowerCase() === 'br') {
|
|
678
|
+
} else if (tagName && tagName.toLowerCase() === 'br') {
|
|
631
679
|
if (handler.start) {
|
|
632
680
|
await handler.start(tagName, [], true, '');
|
|
633
681
|
}
|
|
634
|
-
} else if (tagName.toLowerCase() === 'p') {
|
|
682
|
+
} else if (tagName && tagName.toLowerCase() === 'p') {
|
|
635
683
|
if (handler.start) {
|
|
636
684
|
await handler.start(tagName, [], false, '', true);
|
|
637
685
|
}
|
package/src/lib/attributes.js
CHANGED
|
@@ -15,6 +15,7 @@ import {
|
|
|
15
15
|
keepScriptsMimetypes,
|
|
16
16
|
isSimpleBoolean,
|
|
17
17
|
isBooleanValue,
|
|
18
|
+
emptyCollapsible,
|
|
18
19
|
srcsetElements,
|
|
19
20
|
reEmptyAttribute
|
|
20
21
|
} from './constants.js';
|
|
@@ -147,7 +148,9 @@ function isStyleElement(tag, attrs) {
|
|
|
147
148
|
}
|
|
148
149
|
|
|
149
150
|
function isBooleanAttribute(attrName, attrValue) {
|
|
150
|
-
return isSimpleBoolean.has(attrName) ||
|
|
151
|
+
return isSimpleBoolean.has(attrName) ||
|
|
152
|
+
(attrName === 'draggable' && !isBooleanValue.has(attrValue)) ||
|
|
153
|
+
(attrValue === '' && emptyCollapsible.has(attrName));
|
|
151
154
|
}
|
|
152
155
|
|
|
153
156
|
function isUriTypeAttribute(attrName, tag) {
|
package/src/lib/constants.js
CHANGED
|
@@ -96,6 +96,11 @@ const isSimpleBoolean = new Set(['allowfullscreen', 'async', 'autofocus', 'autop
|
|
|
96
96
|
|
|
97
97
|
const isBooleanValue = new Set(['true', 'false']);
|
|
98
98
|
|
|
99
|
+
// Attributes where empty value can be collapsed to just the attribute name
|
|
100
|
+
// `crossorigin=""` → `crossorigin` (empty string equals anonymous mode)
|
|
101
|
+
// `contenteditable=""` → `contenteditable` (empty string equals `true`)
|
|
102
|
+
const emptyCollapsible = new Set(['crossorigin', 'contenteditable']);
|
|
103
|
+
|
|
99
104
|
// `srcset` elements
|
|
100
105
|
|
|
101
106
|
const srcsetElements = new Set(['img', 'source']);
|
|
@@ -206,6 +211,7 @@ export {
|
|
|
206
211
|
// Boolean sets
|
|
207
212
|
isSimpleBoolean,
|
|
208
213
|
isBooleanValue,
|
|
214
|
+
emptyCollapsible,
|
|
209
215
|
|
|
210
216
|
// Misc
|
|
211
217
|
srcsetElements,
|
package/src/lib/options.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// Imports
|
|
2
2
|
|
|
3
3
|
import RelateURL from 'relateurl';
|
|
4
|
-
import { stableStringify, identity, identityAsync, replaceAsync } from './utils.js';
|
|
4
|
+
import { LRU, stableStringify, identity, identityAsync, replaceAsync } from './utils.js';
|
|
5
5
|
import { RE_TRAILING_SEMICOLON } from './constants.js';
|
|
6
6
|
import { canCollapseWhitespace, canTrimWhitespace } from './whitespace.js';
|
|
7
7
|
import { wrapCSS, unwrapCSS } from './content.js';
|
|
@@ -32,10 +32,9 @@ function shouldMinifyInnerHTML(options) {
|
|
|
32
32
|
* @param {Function} deps.getSwc - Function to lazily load @swc/core
|
|
33
33
|
* @param {LRU} deps.cssMinifyCache - CSS minification cache
|
|
34
34
|
* @param {LRU} deps.jsMinifyCache - JS minification cache
|
|
35
|
-
* @param {LRU} deps.urlMinifyCache - URL minification cache
|
|
36
35
|
* @returns {MinifierOptions} Normalized options with defaults applied
|
|
37
36
|
*/
|
|
38
|
-
const processOptions = (inputOptions, { getLightningCSS, getTerser, getSwc, cssMinifyCache, jsMinifyCache
|
|
37
|
+
const processOptions = (inputOptions, { getLightningCSS, getTerser, getSwc, cssMinifyCache, jsMinifyCache } = {}) => {
|
|
39
38
|
const options = {
|
|
40
39
|
name: function (name) {
|
|
41
40
|
return name.toLowerCase();
|
|
@@ -329,7 +328,7 @@ const processOptions = (inputOptions, { getLightningCSS, getTerser, getSwc, cssM
|
|
|
329
328
|
const relateUrlInstance = new RelateURL(relateUrlOptions.site || '', relateUrlOptions);
|
|
330
329
|
|
|
331
330
|
// Create instance-specific cache (results depend on site configuration)
|
|
332
|
-
const instanceCache =
|
|
331
|
+
const instanceCache = new LRU(500);
|
|
333
332
|
|
|
334
333
|
options.minifyURLs = function (text) {
|
|
335
334
|
// Fast-path: Skip if text doesn’t look like a URL that needs processing
|
|
@@ -338,20 +337,15 @@ const processOptions = (inputOptions, { getLightningCSS, getTerser, getSwc, cssM
|
|
|
338
337
|
return text;
|
|
339
338
|
}
|
|
340
339
|
|
|
341
|
-
// Check
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
return cached;
|
|
346
|
-
}
|
|
340
|
+
// Check cache
|
|
341
|
+
const cached = instanceCache.get(text);
|
|
342
|
+
if (cached !== undefined) {
|
|
343
|
+
return cached;
|
|
347
344
|
}
|
|
348
345
|
|
|
349
346
|
try {
|
|
350
347
|
const result = relateUrlInstance.relate(text);
|
|
351
|
-
|
|
352
|
-
if (instanceCache) {
|
|
353
|
-
instanceCache.set(text, result);
|
|
354
|
-
}
|
|
348
|
+
instanceCache.set(text, result);
|
|
355
349
|
return result;
|
|
356
350
|
} catch (err) {
|
|
357
351
|
// Don’t cache errors
|
package/src/lib/svg.js
CHANGED
|
@@ -115,7 +115,8 @@ function minifyNumber(num, precision = 3) {
|
|
|
115
115
|
const fixed = parsed.toFixed(precision);
|
|
116
116
|
const trimmed = fixed.replace(/\.?0+$/, '');
|
|
117
117
|
|
|
118
|
-
|
|
118
|
+
// Remove leading zero before decimal point (e.g., `0.5` → `.5`, `-0.3` → `-.3`)
|
|
119
|
+
const result = (trimmed || '0').replace(/^(-?)0\./, '$1.');
|
|
119
120
|
numberCache.set(cacheKey, result);
|
|
120
121
|
return result;
|
|
121
122
|
}
|
|
@@ -135,17 +136,23 @@ function minifyPathData(pathData, precision = 3) {
|
|
|
135
136
|
});
|
|
136
137
|
|
|
137
138
|
// Remove unnecessary spaces around path commands
|
|
138
|
-
// Safe to remove space after a command letter when it’s followed by a number
|
|
139
|
-
//
|
|
140
|
-
|
|
139
|
+
// Safe to remove space after a command letter when it’s followed by a number
|
|
140
|
+
// (which may be negative or start with a decimal point)
|
|
141
|
+
// `M 10 20` → `M10 20`, `L -5 -3` → `L-5-3`, `M .5 .3` → `M.5.3`
|
|
142
|
+
result = result.replace(/([MLHVCSQTAZmlhvcsqtaz])\s+(?=-?\.?\d)/g, '$1');
|
|
141
143
|
|
|
142
144
|
// Safe to remove space before command letter when preceded by a number
|
|
143
|
-
// `0 L` → `0L`, `20 M` → `20M`
|
|
144
|
-
result = result.replace(/(\d)\s+([MLHVCSQTAZmlhvcsqtaz])/g, '$1$2');
|
|
145
|
+
// `0 L` → `0L`, `20 M` → `20M`, `.5 L` → `.5L`
|
|
146
|
+
result = result.replace(/([\d.])\s+([MLHVCSQTAZmlhvcsqtaz])/g, '$1$2');
|
|
145
147
|
|
|
146
148
|
// Safe to remove space before negative number when preceded by a number
|
|
147
|
-
// `10 -20` → `10-20` (
|
|
148
|
-
result = result.replace(/(\d)\s+(
|
|
149
|
+
// `10 -20` → `10-20`, `.5 -.3` → `.5-.3` (minus sign is always a separator)
|
|
150
|
+
result = result.replace(/([\d.])\s+(-)/g, '$1$2');
|
|
151
|
+
|
|
152
|
+
// Safe to remove space between two decimal numbers (decimal point acts as separator)
|
|
153
|
+
// `.5 .3` → `.5.3` (only when previous char is `.`, indicating a complete decimal)
|
|
154
|
+
// Note: `0 .3` must not become `0.3` (that would change two numbers into one)
|
|
155
|
+
result = result.replace(/(\.\d*)\s+(\.)/g, '$1$2');
|
|
149
156
|
|
|
150
157
|
return result;
|
|
151
158
|
}
|