@small-tools/html 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ type MinifyHtml = (html: string) => string;
2
+ declare const minifyHtml: MinifyHtml;
3
+ export { minifyHtml };
4
+ export type { MinifyHtml };
@@ -0,0 +1,25 @@
1
+ const minifyHtml = (html) => {
2
+ const preservedTagContentRegExp = /<(pre|code|textarea|script|style)(?:[\s\S]*?)<\/\1>/gi;
3
+ const preservedAttributeContentRegExp = /\s+(?:title|alt|placeholder|value|style|srcdoc|data-.+?|aria-.+?|on.+?)\s*=\s*(?:"(?:[\s\S]*?)")/gi;
4
+ const preservedTagContents = [];
5
+ const preservedAttributesContents = [];
6
+ return html
7
+ .replaceAll(preservedTagContentRegExp, (match) => {
8
+ preservedTagContents.push(match);
9
+ return `___PRESERVED_TAG_${preservedTagContents.length - 1}___`;
10
+ }) // Preserve tag contents
11
+ .replaceAll(preservedAttributeContentRegExp, (match) => {
12
+ preservedAttributesContents.push(match.trim());
13
+ return `___PRESERVED_ATTRIBUTE_${preservedAttributesContents.length - 1}___`;
14
+ }) // Preserve attribute contents
15
+ .replaceAll(/<!--.*?-->/gs, "") // Remove comments
16
+ .replaceAll(/\n|\r|\t/g, "") // Remove tabs and new lines
17
+ .replaceAll(/\s{2,}/g, " ") // Remove multiple spaces
18
+ .replaceAll(/>\s+</g, "><") // Remove spaces between tags
19
+ .replaceAll(/\s+>/g, ">") // Remove spaces before closing tags
20
+ .replaceAll(/<\s+/g, "<") // Remove spaces before opening tags
21
+ .replaceAll(/\s+=\s+/g, "=") // Remove spaces around =
22
+ .replaceAll(/___PRESERVED_TAG_(\d+)___/g, (match, index) => preservedTagContents[index] ?? match)
23
+ .replaceAll(/___PRESERVED_ATTRIBUTE_(\d+)___/g, (_match, index) => ` ${preservedAttributesContents[index]}`);
24
+ };
25
+ export { minifyHtml };
@@ -1,3 +1,4 @@
1
1
  type SanitizeHtml = (unsafeHtml: string) => string;
2
2
  declare const sanitizeHtml: SanitizeHtml;
3
3
  export { sanitizeHtml };
4
+ export type { SanitizeHtml };
@@ -1,13 +1,290 @@
1
- const safeEntities = {
2
- "&": "&amp;",
3
- "<": "&lt;",
4
- ">": "&gt;",
5
- '"': "&quot;",
6
- "'": "&#039;",
1
+ const safeTags = {
2
+ html: new Set([
3
+ "div",
4
+ "span",
5
+ "p",
6
+ "br",
7
+ "hr",
8
+ "b",
9
+ "strong",
10
+ "i",
11
+ "em",
12
+ "u",
13
+ "s",
14
+ "small",
15
+ "mark",
16
+ "sub",
17
+ "sup",
18
+ "abbr",
19
+ "cite",
20
+ "q",
21
+ "blockquote",
22
+ "code",
23
+ "pre",
24
+ "kbd",
25
+ "samp",
26
+ "var",
27
+ "time",
28
+ "h1",
29
+ "h2",
30
+ "h3",
31
+ "h4",
32
+ "h5",
33
+ "h6",
34
+ "ul",
35
+ "ol",
36
+ "li",
37
+ "dl",
38
+ "dt",
39
+ "dd",
40
+ "table",
41
+ "thead",
42
+ "tbody",
43
+ "tfoot",
44
+ "tr",
45
+ "td",
46
+ "th",
47
+ "caption",
48
+ "colgroup",
49
+ "col",
50
+ "img",
51
+ "picture",
52
+ "source",
53
+ "track",
54
+ "a",
55
+ "area",
56
+ "form",
57
+ "input",
58
+ "textarea",
59
+ "select",
60
+ "option",
61
+ "optgroup",
62
+ "label",
63
+ "button",
64
+ "fieldset",
65
+ "legend",
66
+ "datalist",
67
+ "output",
68
+ "progress",
69
+ "meter",
70
+ "template",
71
+ "slot",
72
+ ]),
73
+ svg: new Set([
74
+ "svg",
75
+ "g",
76
+ "path",
77
+ "rect",
78
+ "circle",
79
+ "ellipse",
80
+ "line",
81
+ "polyline",
82
+ "polygon",
83
+ ]),
84
+ };
85
+ const safeAttributes = {
86
+ html: new Set([
87
+ "id",
88
+ "class",
89
+ "title",
90
+ "lang",
91
+ "dir",
92
+ "hidden",
93
+ "tabindex",
94
+ "draggable",
95
+ "spellcheck",
96
+ "translate",
97
+ "value",
98
+ "name",
99
+ "placeholder",
100
+ "readonly",
101
+ "disabled",
102
+ "required",
103
+ "checked",
104
+ "selected",
105
+ "multiple",
106
+ "maxlength",
107
+ "minlength",
108
+ "size",
109
+ "cols",
110
+ "rows",
111
+ "wrap",
112
+ "for",
113
+ "type",
114
+ "src",
115
+ "srcset",
116
+ "sizes",
117
+ "alt",
118
+ "poster",
119
+ "href",
120
+ "action",
121
+ "formaction",
122
+ "target",
123
+ "download",
124
+ "rel",
125
+ "colspan",
126
+ "rowspan",
127
+ "scope",
128
+ "headers",
129
+ ]),
130
+ svg: new Set([
131
+ "viewbox",
132
+ "d",
133
+ "x",
134
+ "y",
135
+ "cx",
136
+ "cy",
137
+ "r",
138
+ "width",
139
+ "height",
140
+ "points",
141
+ "fill",
142
+ "stroke",
143
+ "stroke-width",
144
+ ]),
145
+ };
146
+ const urlAttributes = new Set([
147
+ "href",
148
+ "src",
149
+ "action",
150
+ "formaction",
151
+ "poster",
152
+ ]);
153
+ const invisibleCharsAttributes = new Set([
154
+ ...urlAttributes,
155
+ "target",
156
+ "id",
157
+ "name",
158
+ ]);
159
+ const dangerousUnicodeRegExp = /[\u200B-\u200F\u202A-\u202E\u2066-\u2069]/;
160
+ const formActionsAttributes = new Set(["action", "formaction"]);
161
+ const isSafeUrl = (value) => {
162
+ try {
163
+ const valueWithoutControlChars = value.replace(
164
+ // biome-ignore lint/suspicious/noControlCharactersInRegex: We want to remove control chars
165
+ /[\u0000-\u001F\u007F]/g, "");
166
+ const url = new URL(valueWithoutControlChars, "http://base");
167
+ return ["http:", "https:", "mailto:"].includes(url.protocol);
168
+ }
169
+ catch {
170
+ return false;
171
+ }
172
+ };
173
+ const isSafeSrcset = (value) => {
174
+ const items = value.split(",");
175
+ for (const item of items) {
176
+ const trimmed = item.trim();
177
+ if (trimmed === "") {
178
+ continue;
179
+ }
180
+ const parts = trimmed.split(/\s+/);
181
+ const url = parts[0];
182
+ if (url === undefined) {
183
+ continue;
184
+ }
185
+ if (!isSafeUrl(url)) {
186
+ return false;
187
+ }
188
+ }
189
+ return true;
190
+ };
191
+ const isCustomElement = (tag) => /^[a-z][a-z0-9._-]*-[a-z0-9._-]+$/.test(tag);
192
+ const isSafeAttribute = (type, attributeName) => safeAttributes[type].has(attributeName) ||
193
+ attributeName.startsWith("data-") ||
194
+ attributeName.startsWith("aria-");
195
+ const isSafeFormAction = (value) => {
196
+ const url = new URL(value, window.location.origin);
197
+ return url.origin === window.location.origin;
198
+ };
199
+ const isSafeTag = (type, tag) => {
200
+ return type === "html"
201
+ ? safeTags[type].has(tag) || isCustomElement(tag)
202
+ : safeTags[type].has(tag);
203
+ };
204
+ const isSafeElement = (node) => {
205
+ return node instanceof HTMLElement || node instanceof SVGElement;
206
+ };
207
+ const sanitizeAttributes = (type, node, tag) => {
208
+ for (let i = node.attributes.length - 1; i >= 0; i--) {
209
+ const attribute = node.attributes[i];
210
+ if (attribute === undefined) {
211
+ continue;
212
+ }
213
+ const name = attribute.name.toLowerCase();
214
+ if (!isSafeAttribute(type, name)) {
215
+ node.removeAttribute(attribute.name);
216
+ continue;
217
+ }
218
+ if (invisibleCharsAttributes.has(name)) {
219
+ if (dangerousUnicodeRegExp.test(attribute.value)) {
220
+ node.removeAttribute(attribute.name);
221
+ continue;
222
+ }
223
+ }
224
+ if (formActionsAttributes.has(name)) {
225
+ if (!isSafeFormAction(attribute.value)) {
226
+ node.removeAttribute(name);
227
+ continue;
228
+ }
229
+ }
230
+ if (urlAttributes.has(name)) {
231
+ if (!isSafeUrl(attribute.value)) {
232
+ node.removeAttribute(attribute.name);
233
+ continue;
234
+ }
235
+ }
236
+ if (name === "srcset") {
237
+ if (!isSafeSrcset(attribute.value)) {
238
+ node.removeAttribute(attribute.name);
239
+ continue;
240
+ }
241
+ }
242
+ if (name === "target" && attribute.value !== "_blank") {
243
+ node.removeAttribute(attribute.name);
244
+ }
245
+ }
246
+ if (tag === "a") {
247
+ node.setAttribute("rel", "nofollow noopener noreferrer");
248
+ }
249
+ };
250
+ const sanitizeNodeChildren = (node) => {
251
+ let child = node.firstChild;
252
+ while (child !== null) {
253
+ const next = child.nextSibling;
254
+ sanitizeNode(child);
255
+ child = next;
256
+ }
257
+ };
258
+ const sanitizeNode = (node) => {
259
+ if (node instanceof DocumentFragment) {
260
+ sanitizeNodeChildren(node);
261
+ return;
262
+ }
263
+ if (!(node instanceof Element)) {
264
+ return;
265
+ }
266
+ if (!isSafeElement(node)) {
267
+ node.remove();
268
+ return;
269
+ }
270
+ const tag = node.tagName.toLowerCase();
271
+ const elementType = node instanceof SVGElement ? "svg" : "html";
272
+ if (!isSafeTag(elementType, tag)) {
273
+ node.remove();
274
+ return;
275
+ }
276
+ sanitizeAttributes(elementType, node, tag);
277
+ if (node instanceof HTMLTemplateElement) {
278
+ sanitizeNode(node.content);
279
+ }
280
+ sanitizeNodeChildren(node);
7
281
  };
8
282
  const sanitizeHtml = (unsafeHtml) => {
9
- return unsafeHtml.replace(/[&<>"']/g, (character) => {
10
- return safeEntities[character] ?? "";
11
- });
283
+ const parser = new DOMParser();
284
+ const document = parser.parseFromString(unsafeHtml, "text/html");
285
+ sanitizeNode(document.body);
286
+ const stabilized = parser.parseFromString(document.body.innerHTML, "text/html");
287
+ sanitizeNode(stabilized.body);
288
+ return stabilized.body.innerHTML.trim();
12
289
  };
13
290
  export { sanitizeHtml };
package/dist/index.d.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  export * from "./api/classnames.ts";
2
2
  export * from "./api/css.ts";
3
3
  export * from "./api/html.ts";
4
+ export * from "./api/minify-html.ts";
4
5
  export * from "./api/sanitize-html.ts";
package/dist/index.js CHANGED
@@ -1,4 +1,5 @@
1
1
  export * from "./api/classnames.js";
2
2
  export * from "./api/css.js";
3
3
  export * from "./api/html.js";
4
+ export * from "./api/minify-html.js";
4
5
  export * from "./api/sanitize-html.js";
package/package.json CHANGED
@@ -18,5 +18,5 @@
18
18
  "lint": "biome check",
19
19
  "build": "tsc --build"
20
20
  },
21
- "version": "1.2.0"
21
+ "version": "1.4.0"
22
22
  }