libretto 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/cli.js +20 -19
- package/dist/cli/commands/ai.js +1 -1
- package/dist/cli/commands/browser.js +3 -3
- package/dist/cli/commands/execution.js +3 -3
- package/dist/cli/commands/logs.js +1 -1
- package/dist/cli/core/browser.js +11 -6
- package/dist/cli/core/context.js +4 -18
- package/dist/cli/core/session.js +2 -2
- package/dist/cli/core/snapshot-analyzer.js +2 -2
- package/dist/cli/router.js +1 -1
- package/dist/cli/workers/run-integration-runtime.js +2 -2
- package/dist/shared/paths/paths.js +2 -1
- package/dist/shared/paths/repo-root.d.ts +3 -0
- package/dist/shared/paths/repo-root.js +24 -0
- package/package.json +6 -7
- package/scripts/postinstall.mjs +12 -3
- package/skills/libretto/SKILL.md +93 -404
- package/skills/libretto/references/auth-profiles.md +30 -0
- package/skills/libretto/references/pages-and-page-targeting.md +29 -0
- package/skills/libretto/references/reverse-engineering-network-requests.md +39 -0
- package/skills/libretto/references/user-action-log.md +31 -0
- package/src/cli/cli.ts +173 -0
- package/src/cli/commands/ai.ts +35 -0
- package/src/cli/commands/browser.ts +165 -0
- package/src/cli/commands/execution.ts +691 -0
- package/src/cli/commands/init.ts +327 -0
- package/src/cli/commands/logs.ts +128 -0
- package/src/cli/commands/shared.ts +70 -0
- package/src/cli/commands/snapshot.ts +327 -0
- package/src/cli/core/ai-config.ts +255 -0
- package/src/cli/core/api-snapshot-analyzer.ts +97 -0
- package/src/cli/core/browser.ts +839 -0
- package/src/cli/core/context.ts +122 -0
- package/src/cli/core/pause-signals.ts +35 -0
- package/src/cli/core/session-telemetry.ts +553 -0
- package/src/cli/core/session.ts +209 -0
- package/src/cli/core/snapshot-analyzer.ts +875 -0
- package/src/cli/core/snapshot-api-config.ts +236 -0
- package/src/cli/core/telemetry.ts +446 -0
- package/src/cli/framework/simple-cli.ts +1273 -0
- package/src/cli/index.ts +13 -0
- package/src/cli/router.ts +28 -0
- package/src/cli/workers/run-integration-runtime.ts +311 -0
- package/src/cli/workers/run-integration-worker-protocol.ts +14 -0
- package/src/cli/workers/run-integration-worker.ts +75 -0
- package/src/index.ts +120 -0
- package/src/runtime/download/download.ts +100 -0
- package/src/runtime/download/index.ts +7 -0
- package/src/runtime/extract/extract.ts +92 -0
- package/src/runtime/extract/index.ts +1 -0
- package/src/runtime/network/index.ts +5 -0
- package/src/runtime/network/network.ts +113 -0
- package/src/runtime/recovery/agent.ts +256 -0
- package/src/runtime/recovery/errors.ts +152 -0
- package/src/runtime/recovery/index.ts +7 -0
- package/src/runtime/recovery/recovery.ts +50 -0
- package/{dist/shared/condense-dom/condense-dom.cjs → src/shared/condense-dom/condense-dom.ts} +243 -115
- package/src/shared/config/config.ts +22 -0
- package/src/shared/config/index.ts +5 -0
- package/src/shared/debug/index.ts +1 -0
- package/src/shared/debug/pause.ts +85 -0
- package/src/shared/instrumentation/errors.ts +82 -0
- package/src/shared/instrumentation/index.ts +9 -0
- package/src/shared/instrumentation/instrument.ts +276 -0
- package/src/shared/llm/ai-sdk-adapter.ts +78 -0
- package/src/shared/llm/client.ts +217 -0
- package/src/shared/llm/index.ts +3 -0
- package/src/shared/llm/types.ts +63 -0
- package/src/shared/logger/index.ts +6 -0
- package/src/shared/logger/logger.ts +352 -0
- package/src/shared/logger/sinks.ts +144 -0
- package/src/shared/paths/paths.ts +109 -0
- package/src/shared/paths/repo-root.ts +27 -0
- package/src/shared/run/api.ts +2 -0
- package/src/shared/run/browser.ts +98 -0
- package/src/shared/state/index.ts +11 -0
- package/src/shared/state/session-state.ts +74 -0
- package/src/shared/visualization/ghost-cursor.ts +200 -0
- package/src/shared/visualization/highlight.ts +146 -0
- package/src/shared/visualization/index.ts +18 -0
- package/src/shared/workflow/workflow.ts +42 -0
- package/dist/index.cjs +0 -144
- package/dist/index.d.cts +0 -21
- package/dist/runtime/download/download.cjs +0 -70
- package/dist/runtime/download/download.d.cts +0 -35
- package/dist/runtime/download/index.cjs +0 -30
- package/dist/runtime/download/index.d.cts +0 -3
- package/dist/runtime/extract/extract.cjs +0 -88
- package/dist/runtime/extract/extract.d.cts +0 -23
- package/dist/runtime/extract/index.cjs +0 -28
- package/dist/runtime/extract/index.d.cts +0 -5
- package/dist/runtime/network/index.cjs +0 -28
- package/dist/runtime/network/index.d.cts +0 -4
- package/dist/runtime/network/network.cjs +0 -91
- package/dist/runtime/network/network.d.cts +0 -28
- package/dist/runtime/recovery/agent.cjs +0 -223
- package/dist/runtime/recovery/agent.d.cts +0 -13
- package/dist/runtime/recovery/errors.cjs +0 -124
- package/dist/runtime/recovery/errors.d.cts +0 -31
- package/dist/runtime/recovery/index.cjs +0 -34
- package/dist/runtime/recovery/index.d.cts +0 -7
- package/dist/runtime/recovery/recovery.cjs +0 -55
- package/dist/runtime/recovery/recovery.d.cts +0 -12
- package/dist/shared/condense-dom/condense-dom.d.cts +0 -34
- package/dist/shared/config/config.cjs +0 -44
- package/dist/shared/config/config.d.cts +0 -10
- package/dist/shared/config/index.cjs +0 -32
- package/dist/shared/config/index.d.cts +0 -1
- package/dist/shared/debug/index.cjs +0 -28
- package/dist/shared/debug/index.d.cts +0 -1
- package/dist/shared/debug/pause.cjs +0 -86
- package/dist/shared/debug/pause.d.cts +0 -12
- package/dist/shared/instrumentation/errors.cjs +0 -81
- package/dist/shared/instrumentation/errors.d.cts +0 -12
- package/dist/shared/instrumentation/index.cjs +0 -35
- package/dist/shared/instrumentation/index.d.cts +0 -6
- package/dist/shared/instrumentation/instrument.cjs +0 -206
- package/dist/shared/instrumentation/instrument.d.cts +0 -32
- package/dist/shared/llm/ai-sdk-adapter.cjs +0 -71
- package/dist/shared/llm/ai-sdk-adapter.d.cts +0 -22
- package/dist/shared/llm/client.cjs +0 -218
- package/dist/shared/llm/client.d.cts +0 -13
- package/dist/shared/llm/index.cjs +0 -31
- package/dist/shared/llm/index.d.cts +0 -5
- package/dist/shared/llm/types.cjs +0 -16
- package/dist/shared/llm/types.d.cts +0 -67
- package/dist/shared/logger/index.cjs +0 -37
- package/dist/shared/logger/index.d.cts +0 -2
- package/dist/shared/logger/logger.cjs +0 -232
- package/dist/shared/logger/logger.d.cts +0 -86
- package/dist/shared/logger/sinks.cjs +0 -160
- package/dist/shared/logger/sinks.d.cts +0 -9
- package/dist/shared/paths/paths.cjs +0 -104
- package/dist/shared/paths/paths.d.cts +0 -10
- package/dist/shared/run/api.cjs +0 -28
- package/dist/shared/run/api.d.cts +0 -2
- package/dist/shared/run/browser.cjs +0 -98
- package/dist/shared/run/browser.d.cts +0 -22
- package/dist/shared/state/index.cjs +0 -38
- package/dist/shared/state/index.d.cts +0 -2
- package/dist/shared/state/session-state.cjs +0 -92
- package/dist/shared/state/session-state.d.cts +0 -40
- package/dist/shared/visualization/ghost-cursor.cjs +0 -174
- package/dist/shared/visualization/ghost-cursor.d.cts +0 -37
- package/dist/shared/visualization/highlight.cjs +0 -134
- package/dist/shared/visualization/highlight.d.cts +0 -22
- package/dist/shared/visualization/index.cjs +0 -45
- package/dist/shared/visualization/index.d.cts +0 -3
- package/dist/shared/workflow/workflow.cjs +0 -47
- package/dist/shared/workflow/workflow.d.cts +0 -21
- package/skills/libretto/code-generation-rules.md +0 -223
- package/skills/libretto/integration-approach-selection.md +0 -174
package/{dist/shared/condense-dom/condense-dom.cjs → src/shared/condense-dom/condense-dom.ts}
RENAMED
|
@@ -1,28 +1,44 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
/**
|
|
2
|
+
* DOM condensation — reduces serialized HTML for LLM consumption.
|
|
3
|
+
*
|
|
4
|
+
* All rules run unconditionally (no tiers). The function operates on
|
|
5
|
+
* already-serialized HTML strings (the output of `page.content()`),
|
|
6
|
+
* not a browser-side DOM walk or parsed DOM tree.
|
|
7
|
+
*
|
|
8
|
+
* Rules applied in order:
|
|
9
|
+
* 1. Noscript blocks — remove entirely
|
|
10
|
+
* 2. HTML comments — remove entirely
|
|
11
|
+
* 3. Script contents — hollow out, keep tags + useful attributes
|
|
12
|
+
* 4. Style contents — hollow out, keep tags + useful attributes
|
|
13
|
+
* 5. Embedded binary data — replace base64 data URIs
|
|
14
|
+
* 6. Attribute allowlist — keep trusted attrs, special-case class/style/URLs
|
|
15
|
+
* 7. SVG elements — collapse to single tag, extract title/desc
|
|
16
|
+
* 8. Inline style properties — keep only layout-relevant props
|
|
17
|
+
* 9. Non-semantic class names — filter or delete class values
|
|
18
|
+
* 10. (Cross-reference IDs — preserved, no action needed)
|
|
19
|
+
* 11. Framework-internal and SVG visual attributes — remove
|
|
20
|
+
* 12. Whitespace — collapse (preserve <pre> content)
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
export type CondenseDomResult = {
|
|
24
|
+
/** The condensed HTML string. Valid, parseable HTML. */
|
|
25
|
+
html: string;
|
|
26
|
+
/** Character count of the input. */
|
|
27
|
+
originalLength: number;
|
|
28
|
+
/** Character count of the output. */
|
|
29
|
+
condensedLength: number;
|
|
30
|
+
/** Characters removed, keyed by rule name. */
|
|
31
|
+
reductions: Record<string, number>;
|
|
9
32
|
};
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
}
|
|
16
|
-
return to;
|
|
33
|
+
|
|
34
|
+
type ParsedAttribute = {
|
|
35
|
+
name: string;
|
|
36
|
+
rawToken: string;
|
|
37
|
+
value: string | null;
|
|
17
38
|
};
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
condenseDom: () => condenseDom
|
|
22
|
-
});
|
|
23
|
-
module.exports = __toCommonJS(condense_dom_exports);
|
|
24
|
-
const TEST_ATTRS = /* @__PURE__ */ new Set(["data-testid", "data-test", "data-qa", "data-cy"]);
|
|
25
|
-
const TRUSTED_ATTRS = /* @__PURE__ */ new Set([
|
|
39
|
+
|
|
40
|
+
const TEST_ATTRS = new Set(["data-testid", "data-test", "data-qa", "data-cy"]);
|
|
41
|
+
const TRUSTED_ATTRS = new Set([
|
|
26
42
|
"id",
|
|
27
43
|
"name",
|
|
28
44
|
"for",
|
|
@@ -38,9 +54,9 @@ const TRUSTED_ATTRS = /* @__PURE__ */ new Set([
|
|
|
38
54
|
"href",
|
|
39
55
|
"action",
|
|
40
56
|
"method",
|
|
41
|
-
"src"
|
|
57
|
+
"src",
|
|
42
58
|
]);
|
|
43
|
-
const STATE_ATTRS =
|
|
59
|
+
const STATE_ATTRS = new Set([
|
|
44
60
|
"disabled",
|
|
45
61
|
"hidden",
|
|
46
62
|
"inert",
|
|
@@ -49,15 +65,15 @@ const STATE_ATTRS = /* @__PURE__ */ new Set([
|
|
|
49
65
|
"checked",
|
|
50
66
|
"selected",
|
|
51
67
|
"open",
|
|
52
|
-
"multiple"
|
|
68
|
+
"multiple",
|
|
53
69
|
]);
|
|
54
|
-
const BOOLEAN_ATTRS =
|
|
70
|
+
const BOOLEAN_ATTRS = new Set([
|
|
55
71
|
...STATE_ATTRS,
|
|
56
72
|
"async",
|
|
57
73
|
"defer",
|
|
58
|
-
"nomodule"
|
|
74
|
+
"nomodule",
|
|
59
75
|
]);
|
|
60
|
-
const EMPTY_VALUE_DROP_ATTRS =
|
|
76
|
+
const EMPTY_VALUE_DROP_ATTRS = new Set([
|
|
61
77
|
"alt",
|
|
62
78
|
"autocomplete",
|
|
63
79
|
"href",
|
|
@@ -68,10 +84,10 @@ const EMPTY_VALUE_DROP_ATTRS = /* @__PURE__ */ new Set([
|
|
|
68
84
|
"src",
|
|
69
85
|
"tabindex",
|
|
70
86
|
"title",
|
|
71
|
-
"type"
|
|
87
|
+
"type",
|
|
72
88
|
]);
|
|
73
|
-
const URL_ATTRS =
|
|
74
|
-
const SCRIPT_ATTRS =
|
|
89
|
+
const URL_ATTRS = new Set(["href", "src", "action"]);
|
|
90
|
+
const SCRIPT_ATTRS = new Set([
|
|
75
91
|
"src",
|
|
76
92
|
"type",
|
|
77
93
|
"id",
|
|
@@ -80,10 +96,10 @@ const SCRIPT_ATTRS = /* @__PURE__ */ new Set([
|
|
|
80
96
|
"crossorigin",
|
|
81
97
|
"integrity",
|
|
82
98
|
"nomodule",
|
|
83
|
-
"referrerpolicy"
|
|
99
|
+
"referrerpolicy",
|
|
84
100
|
]);
|
|
85
|
-
const STYLE_TAG_ATTRS =
|
|
86
|
-
const INTERACTIVE_TAGS =
|
|
101
|
+
const STYLE_TAG_ATTRS = new Set(["media", "type", "nonce", "title"]);
|
|
102
|
+
const INTERACTIVE_TAGS = new Set([
|
|
87
103
|
"a",
|
|
88
104
|
"button",
|
|
89
105
|
"input",
|
|
@@ -92,9 +108,9 @@ const INTERACTIVE_TAGS = /* @__PURE__ */ new Set([
|
|
|
92
108
|
"form",
|
|
93
109
|
"details",
|
|
94
110
|
"dialog",
|
|
95
|
-
"label"
|
|
111
|
+
"label",
|
|
96
112
|
]);
|
|
97
|
-
const INTERACTIVE_ROLES =
|
|
113
|
+
const INTERACTIVE_ROLES = new Set([
|
|
98
114
|
"button",
|
|
99
115
|
"link",
|
|
100
116
|
"tab",
|
|
@@ -103,81 +119,103 @@ const INTERACTIVE_ROLES = /* @__PURE__ */ new Set([
|
|
|
103
119
|
"radio",
|
|
104
120
|
"switch",
|
|
105
121
|
"slider",
|
|
106
|
-
"combobox"
|
|
122
|
+
"combobox",
|
|
107
123
|
]);
|
|
108
|
-
const OPEN_TAG_PATTERN =
|
|
109
|
-
|
|
124
|
+
const OPEN_TAG_PATTERN =
|
|
125
|
+
/<([a-zA-Z][\w:-]*)(\s(?:[^"'<>/]|"[^"]*"|'[^']*')*)?\s*(\/?)>/g;
|
|
126
|
+
|
|
127
|
+
export function condenseDom(html: string): CondenseDomResult {
|
|
110
128
|
const originalLength = html.length;
|
|
111
|
-
const reductions = {};
|
|
112
|
-
|
|
129
|
+
const reductions: Record<string, number> = {};
|
|
130
|
+
|
|
131
|
+
function track(label: string, before: string, after: string): string {
|
|
113
132
|
const diff = before.length - after.length;
|
|
114
133
|
if (diff > 0) {
|
|
115
134
|
reductions[label] = (reductions[label] ?? 0) + diff;
|
|
116
135
|
}
|
|
117
136
|
return after;
|
|
118
137
|
}
|
|
138
|
+
|
|
119
139
|
let result = html;
|
|
140
|
+
|
|
141
|
+
// ── Rule 1: Noscript blocks ──────────────────────────────────────────
|
|
120
142
|
result = track(
|
|
121
143
|
"noscript",
|
|
122
144
|
result,
|
|
123
|
-
result.replace(/<noscript\b[^>]*>[\s\S]*?<\/noscript>/gi, "")
|
|
145
|
+
result.replace(/<noscript\b[^>]*>[\s\S]*?<\/noscript>/gi, ""),
|
|
124
146
|
);
|
|
147
|
+
|
|
148
|
+
// ── Rule 2: HTML comments ────────────────────────────────────────────
|
|
125
149
|
result = track(
|
|
126
150
|
"comments",
|
|
127
151
|
result,
|
|
128
|
-
result.replace(/<!--[\s\S]*?(?:-->|$)/g, "")
|
|
152
|
+
result.replace(/<!--[\s\S]*?(?:-->|$)/g, ""),
|
|
129
153
|
);
|
|
154
|
+
|
|
155
|
+
// ── Rule 3: Script contents ──────────────────────────────────────────
|
|
130
156
|
result = track(
|
|
131
157
|
"scripts",
|
|
132
158
|
result,
|
|
133
159
|
result.replace(
|
|
134
160
|
/(<script\b[^>]*>)([\s\S]*?)(<\/script(?:\s[^>]*)?>)/gi,
|
|
135
|
-
(_match, open, content, close) => {
|
|
161
|
+
(_match, open: string, content: string, close: string) => {
|
|
136
162
|
if (!content.trim()) return `${open}${close}`;
|
|
137
|
-
const isDataScript =
|
|
163
|
+
const isDataScript =
|
|
164
|
+
/type\s*=\s*["']application\/(json|ld\+json)["']/i.test(open);
|
|
138
165
|
if (isDataScript) {
|
|
139
166
|
return `${open}[JSON data, ${content.length} chars]${close}`;
|
|
140
167
|
}
|
|
141
168
|
return `${open}[script, ${content.length} chars]${close}`;
|
|
142
|
-
}
|
|
143
|
-
)
|
|
169
|
+
},
|
|
170
|
+
),
|
|
144
171
|
);
|
|
172
|
+
|
|
173
|
+
// ── Rule 4: Style contents ───────────────────────────────────────────
|
|
145
174
|
result = track(
|
|
146
175
|
"styles",
|
|
147
176
|
result,
|
|
148
177
|
result.replace(
|
|
149
178
|
/(<style\b[^>]*>)([\s\S]*?)(<\/style(?:\s[^>]*)?>)/gi,
|
|
150
|
-
(_match, open, content, close) => {
|
|
179
|
+
(_match, open: string, content: string, close: string) => {
|
|
151
180
|
if (!content.trim()) return `${open}${close}`;
|
|
152
181
|
return `${open}[CSS, ${content.length} chars]${close}`;
|
|
153
|
-
}
|
|
154
|
-
)
|
|
182
|
+
},
|
|
183
|
+
),
|
|
155
184
|
);
|
|
185
|
+
|
|
186
|
+
// ── Rule 5: Embedded binary data ─────────────────────────────────────
|
|
156
187
|
result = track(
|
|
157
188
|
"base64",
|
|
158
189
|
result,
|
|
159
190
|
result.replace(
|
|
160
191
|
/(src|href)\s*=\s*["'](data:[^;]+;base64,)[A-Za-z0-9+/=]{100,}["']/gi,
|
|
161
|
-
(_match, attr, prefix) => {
|
|
192
|
+
(_match, attr: string, prefix: string) => {
|
|
162
193
|
const mime = prefix.replace("data:", "").replace(";base64,", "");
|
|
163
194
|
return `${attr}="[base64 ${mime}]"`;
|
|
164
|
-
}
|
|
165
|
-
)
|
|
195
|
+
},
|
|
196
|
+
),
|
|
166
197
|
);
|
|
198
|
+
|
|
199
|
+
// ── Rule 6: Attribute allowlist ──────────────────────────────────────
|
|
167
200
|
result = track("attribute-allowlist", result, rewriteTagAttributes(result));
|
|
201
|
+
|
|
202
|
+
// ── Rule 7: SVG elements ─────────────────────────────────────────────
|
|
203
|
+
// Collapse each <svg> to a single tag, preserving key attributes.
|
|
204
|
+
// Extract <title>/<desc> text as aria-label if none exists.
|
|
205
|
+
// Iterate from innermost to outermost to handle nested SVGs correctly.
|
|
168
206
|
const svgPattern = /<svg\b([^>]*)>((?:(?!<svg\b)[\s\S])*?)<\/svg>/gi;
|
|
169
207
|
result = track(
|
|
170
208
|
"svg-collapse",
|
|
171
209
|
result,
|
|
172
210
|
(() => {
|
|
173
|
-
let prev;
|
|
211
|
+
let prev: string;
|
|
174
212
|
let current = result;
|
|
175
213
|
do {
|
|
176
214
|
prev = current;
|
|
177
215
|
current = current.replace(
|
|
178
216
|
svgPattern,
|
|
179
|
-
(_match, attrs, inner) => {
|
|
180
|
-
const keepAttrs = [];
|
|
217
|
+
(_match, attrs: string, inner: string) => {
|
|
218
|
+
const keepAttrs: string[] = [];
|
|
181
219
|
const attrPatterns = [
|
|
182
220
|
"id",
|
|
183
221
|
"class",
|
|
@@ -185,246 +223,331 @@ function condenseDom(html) {
|
|
|
185
223
|
"aria-label",
|
|
186
224
|
"aria-hidden",
|
|
187
225
|
"title",
|
|
188
|
-
"data-testid"
|
|
226
|
+
"data-testid",
|
|
189
227
|
];
|
|
190
228
|
for (const name of attrPatterns) {
|
|
191
229
|
const attrToken = findAttributeToken(attrs, name);
|
|
192
230
|
if (attrToken) keepAttrs.push(attrToken);
|
|
193
231
|
}
|
|
232
|
+
|
|
194
233
|
const hasAriaLabel = /aria-label\s*=/i.test(attrs);
|
|
195
234
|
if (!hasAriaLabel) {
|
|
196
235
|
const titleMatch = inner.match(
|
|
197
|
-
/<title[^>]*>([^<]+)<\/title>/i
|
|
236
|
+
/<title[^>]*>([^<]+)<\/title>/i,
|
|
198
237
|
);
|
|
199
238
|
const descMatch = inner.match(
|
|
200
|
-
/<desc[^>]*>([^<]+)<\/desc>/i
|
|
239
|
+
/<desc[^>]*>([^<]+)<\/desc>/i,
|
|
201
240
|
);
|
|
202
|
-
const labelText =
|
|
241
|
+
const labelText =
|
|
242
|
+
titleMatch?.[1]?.trim() || descMatch?.[1]?.trim();
|
|
203
243
|
if (labelText) {
|
|
204
244
|
keepAttrs.push(
|
|
205
|
-
`aria-label="${escapeHtmlAttribute(labelText)}"
|
|
245
|
+
`aria-label="${escapeHtmlAttribute(labelText)}"`,
|
|
206
246
|
);
|
|
207
247
|
}
|
|
208
248
|
}
|
|
209
|
-
|
|
249
|
+
|
|
250
|
+
const attrStr =
|
|
251
|
+
keepAttrs.length > 0 ? ` ${keepAttrs.join(" ")}` : "";
|
|
210
252
|
return `<svg${attrStr}><!-- [icon] --></svg>`;
|
|
211
|
-
}
|
|
253
|
+
},
|
|
212
254
|
);
|
|
213
255
|
svgPattern.lastIndex = 0;
|
|
214
256
|
} while (current !== prev);
|
|
215
257
|
return current;
|
|
216
|
-
})()
|
|
258
|
+
})(),
|
|
217
259
|
);
|
|
218
|
-
|
|
260
|
+
|
|
261
|
+
// ── Rule 8: Inline style properties ──────────────────────────────────
|
|
262
|
+
// Keep only layout-relevant properties.
|
|
263
|
+
const layoutProps =
|
|
264
|
+
/(?:^|;)\s*(?:display|visibility|opacity|pointer-events|position|z-index|overflow)(?:-[a-z]+)?\s*:[^;"]*/gi;
|
|
265
|
+
|
|
219
266
|
result = track(
|
|
220
267
|
"inline-styles",
|
|
221
268
|
result,
|
|
222
269
|
result.replace(
|
|
223
270
|
/\sstyle\s*=\s*["']([^"']*)["']/gi,
|
|
224
|
-
(_match, value) => {
|
|
225
|
-
const kept = [];
|
|
226
|
-
let propMatch;
|
|
271
|
+
(_match, value: string) => {
|
|
272
|
+
const kept: string[] = [];
|
|
273
|
+
let propMatch: RegExpExecArray | null;
|
|
227
274
|
layoutProps.lastIndex = 0;
|
|
228
275
|
while ((propMatch = layoutProps.exec(value)) !== null) {
|
|
229
276
|
kept.push(propMatch[0].replace(/^[;\s]+/, "").trim());
|
|
230
277
|
}
|
|
231
278
|
if (kept.length === 0) return "";
|
|
232
279
|
return ` style="${kept.join("; ")}"`;
|
|
233
|
-
}
|
|
234
|
-
)
|
|
280
|
+
},
|
|
281
|
+
),
|
|
235
282
|
);
|
|
283
|
+
|
|
284
|
+
// ── Rule 9: Non-semantic class names ─────────────────────────────────
|
|
236
285
|
result = track(
|
|
237
286
|
"obfuscated-classes",
|
|
238
287
|
result,
|
|
239
288
|
result.replace(
|
|
240
289
|
/\sclass\s*=\s*["']([^"']*)["']/gi,
|
|
241
|
-
(_match, value) => {
|
|
290
|
+
(_match, value: string) => {
|
|
242
291
|
const filtered = filterSemanticClasses(value);
|
|
243
292
|
if (!filtered) return "";
|
|
244
293
|
return ` class="${filtered}"`;
|
|
245
|
-
}
|
|
246
|
-
)
|
|
294
|
+
},
|
|
295
|
+
),
|
|
247
296
|
);
|
|
248
|
-
|
|
297
|
+
|
|
298
|
+
// ── Rule 10: Cross-reference IDs — no action, preserved by default ──
|
|
299
|
+
|
|
300
|
+
// ── Rule 11: Framework-internal and SVG visual attributes ────────────
|
|
301
|
+
const removableAttrs =
|
|
302
|
+
/\s(?:xmlns(?::[a-z]+)?|xml:space|xml:lang|fill|stroke|stroke-width|stroke-linecap|stroke-linejoin|stroke-miterlimit|stroke-dasharray|stroke-dashoffset|stroke-opacity|fill-opacity|clip-rule|fill-rule|focusable)\s*=\s*["'][^"']*["']/gi;
|
|
249
303
|
result = track(
|
|
250
304
|
"framework-svg-attrs",
|
|
251
305
|
result,
|
|
252
|
-
result.replace(removableAttrs, "")
|
|
306
|
+
result.replace(removableAttrs, ""),
|
|
253
307
|
);
|
|
254
|
-
|
|
308
|
+
|
|
309
|
+
// ── Rule 12: Whitespace ──────────────────────────────────────────────
|
|
310
|
+
// Collapse runs of spaces/tabs to a single space, multiple blank lines
|
|
311
|
+
// to a single newline. Preserve <pre> content.
|
|
312
|
+
const preBlocks: string[] = [];
|
|
255
313
|
result = result.replace(
|
|
256
314
|
/(<pre\b[^>]*>)([\s\S]*?)(<\/pre>)/gi,
|
|
257
|
-
(_match, open, content, close) => {
|
|
315
|
+
(_match, open: string, content: string, close: string) => {
|
|
258
316
|
const idx = preBlocks.length;
|
|
259
317
|
preBlocks.push(`${open}${content}${close}`);
|
|
260
318
|
return `__PRE_PLACEHOLDER_${idx}__`;
|
|
261
|
-
}
|
|
319
|
+
},
|
|
262
320
|
);
|
|
321
|
+
|
|
263
322
|
result = track(
|
|
264
323
|
"whitespace",
|
|
265
324
|
result,
|
|
266
|
-
result.replace(/[ \t]+/g, " ").replace(/\n\s*\n/g, "\n")
|
|
325
|
+
result.replace(/[ \t]+/g, " ").replace(/\n\s*\n/g, "\n"),
|
|
267
326
|
);
|
|
327
|
+
|
|
268
328
|
for (let i = 0; i < preBlocks.length; i++) {
|
|
269
329
|
const placeholder = `__PRE_PLACEHOLDER_${i}__`;
|
|
270
|
-
const preBlock = preBlocks[i]
|
|
330
|
+
const preBlock = preBlocks[i]!;
|
|
271
331
|
result = result.replace(placeholder, () => preBlock);
|
|
272
332
|
}
|
|
333
|
+
|
|
273
334
|
return {
|
|
274
335
|
html: result,
|
|
275
336
|
originalLength,
|
|
276
337
|
condensedLength: result.length,
|
|
277
|
-
reductions
|
|
338
|
+
reductions,
|
|
278
339
|
};
|
|
279
340
|
}
|
|
280
|
-
|
|
341
|
+
|
|
342
|
+
function rewriteTagAttributes(html: string): string {
|
|
281
343
|
return html.replace(
|
|
282
344
|
OPEN_TAG_PATTERN,
|
|
283
|
-
(match, rawTagName, rawAttrs, selfClosing) => {
|
|
345
|
+
(match, rawTagName: string, rawAttrs: string | undefined, selfClosing: string) => {
|
|
284
346
|
const tagName = rawTagName.toLowerCase();
|
|
285
347
|
if (!rawAttrs?.trim()) return match;
|
|
348
|
+
|
|
286
349
|
const attrs = parseAttributes(rawAttrs);
|
|
287
350
|
if (attrs.length === 0) return match;
|
|
351
|
+
|
|
288
352
|
const interactive = isInteractiveElement(tagName, attrs);
|
|
289
|
-
const kept = attrs
|
|
353
|
+
const kept = attrs
|
|
354
|
+
.map((attr) => keepAttribute(tagName, attr, interactive))
|
|
355
|
+
.filter((value): value is string => value !== null);
|
|
356
|
+
|
|
290
357
|
const attrStr = kept.length > 0 ? ` ${kept.join(" ")}` : "";
|
|
291
358
|
const closing = selfClosing ? " /" : "";
|
|
292
359
|
return `<${rawTagName}${attrStr}${closing}>`;
|
|
293
|
-
}
|
|
360
|
+
},
|
|
294
361
|
);
|
|
295
362
|
}
|
|
296
|
-
|
|
363
|
+
|
|
364
|
+
function keepAttribute(
|
|
365
|
+
tagName: string,
|
|
366
|
+
attr: ParsedAttribute,
|
|
367
|
+
interactive: boolean,
|
|
368
|
+
): string | null {
|
|
297
369
|
const name = attr.name.toLowerCase();
|
|
298
370
|
const value = attr.value;
|
|
371
|
+
|
|
299
372
|
if (name === "class") {
|
|
300
373
|
if (!value?.trim()) return null;
|
|
301
374
|
const filtered = filterSemanticClasses(value);
|
|
302
375
|
if (!filtered) return null;
|
|
303
376
|
return serializeAttribute(attr.name, filtered);
|
|
304
377
|
}
|
|
378
|
+
|
|
305
379
|
if (name === "style") {
|
|
306
380
|
if (!value?.trim()) return null;
|
|
307
381
|
return serializeAttribute(attr.name, value);
|
|
308
382
|
}
|
|
383
|
+
|
|
309
384
|
if (name.startsWith("aria-")) {
|
|
310
385
|
if (!value?.trim()) return null;
|
|
311
386
|
return attr.rawToken;
|
|
312
387
|
}
|
|
388
|
+
|
|
313
389
|
if (TEST_ATTRS.has(name)) {
|
|
314
390
|
if (!value?.trim()) return null;
|
|
315
391
|
return attr.rawToken;
|
|
316
392
|
}
|
|
393
|
+
|
|
317
394
|
if (tagName === "script" && SCRIPT_ATTRS.has(name)) {
|
|
318
395
|
return serializePreservedAttribute(attr);
|
|
319
396
|
}
|
|
397
|
+
|
|
320
398
|
if (tagName === "style" && STYLE_TAG_ATTRS.has(name)) {
|
|
321
399
|
if (!value?.trim()) return null;
|
|
322
400
|
return attr.rawToken;
|
|
323
401
|
}
|
|
402
|
+
|
|
324
403
|
if (STATE_ATTRS.has(name)) {
|
|
325
404
|
return serializePreservedAttribute(attr);
|
|
326
405
|
}
|
|
406
|
+
|
|
327
407
|
if (URL_ATTRS.has(name)) {
|
|
328
408
|
if (!value?.trim()) return null;
|
|
329
409
|
const normalized = normalizeUrlValue(value);
|
|
330
410
|
if (normalized === value) return attr.rawToken;
|
|
331
411
|
return serializeAttribute(attr.name, normalized);
|
|
332
412
|
}
|
|
413
|
+
|
|
333
414
|
if (TRUSTED_ATTRS.has(name)) {
|
|
334
415
|
if (shouldDropEmptyValue(name, value)) return null;
|
|
335
416
|
return serializePreservedAttribute(attr);
|
|
336
417
|
}
|
|
418
|
+
|
|
337
419
|
if (shouldKeepCustomDataAttribute(tagName, name, value, interactive)) {
|
|
338
420
|
return attr.rawToken;
|
|
339
421
|
}
|
|
422
|
+
|
|
340
423
|
return null;
|
|
341
424
|
}
|
|
342
|
-
|
|
425
|
+
|
|
426
|
+
function serializePreservedAttribute(attr: ParsedAttribute): string | null {
|
|
343
427
|
if (BOOLEAN_ATTRS.has(attr.name.toLowerCase())) {
|
|
344
428
|
return attr.rawToken;
|
|
345
429
|
}
|
|
346
430
|
if (attr.value === null) return attr.rawToken;
|
|
347
431
|
return attr.rawToken;
|
|
348
432
|
}
|
|
349
|
-
|
|
433
|
+
|
|
434
|
+
function shouldDropEmptyValue(
|
|
435
|
+
name: string,
|
|
436
|
+
value: string | null,
|
|
437
|
+
): boolean {
|
|
350
438
|
if (value === null) return false;
|
|
351
439
|
if (value.trim()) return false;
|
|
352
440
|
if (name.startsWith("aria-")) return true;
|
|
353
441
|
return EMPTY_VALUE_DROP_ATTRS.has(name);
|
|
354
442
|
}
|
|
355
|
-
|
|
443
|
+
|
|
444
|
+
function normalizeUrlValue(value: string): string {
|
|
356
445
|
const loweredValue = value.trim().toLowerCase();
|
|
357
446
|
if (loweredValue.startsWith("blob:")) return "blob:[omitted]";
|
|
358
447
|
if (loweredValue.startsWith("javascript:")) return "javascript:[omitted]";
|
|
359
448
|
if (loweredValue.startsWith("vbscript:")) return "vbscript:[omitted]";
|
|
360
449
|
if (loweredValue.startsWith("data:")) return "data:[omitted]";
|
|
361
450
|
if (value.length <= 160) return value;
|
|
451
|
+
|
|
362
452
|
try {
|
|
363
453
|
const isAbsolute = /^[a-z][a-z0-9+.-]*:/i.test(value);
|
|
364
|
-
const parsed = isAbsolute
|
|
365
|
-
|
|
454
|
+
const parsed = isAbsolute
|
|
455
|
+
? new URL(value)
|
|
456
|
+
: new URL(value, "https://condensed.local");
|
|
457
|
+
|
|
458
|
+
const prefix = isAbsolute
|
|
459
|
+
? `${parsed.protocol}//${parsed.host}${parsed.pathname}`
|
|
460
|
+
: `${parsed.pathname}${parsed.hash}`;
|
|
366
461
|
const query = parsed.search ? "?[query omitted]" : "";
|
|
367
462
|
return `${prefix}${query}`;
|
|
368
463
|
} catch {
|
|
369
464
|
return `${value.slice(0, 96)}[omitted]`;
|
|
370
465
|
}
|
|
371
466
|
}
|
|
372
|
-
|
|
467
|
+
|
|
468
|
+
function filterSemanticClasses(value: string): string {
|
|
373
469
|
const classes = value.split(/\s+/).filter(Boolean);
|
|
374
470
|
const kept = classes.filter((cls) => !isObfuscatedClass(cls));
|
|
375
471
|
return kept.join(" ");
|
|
376
472
|
}
|
|
377
|
-
|
|
473
|
+
|
|
474
|
+
/**
|
|
475
|
+
* Heuristic: a class name is "obfuscated" if it looks like a hash or random ID
|
|
476
|
+
* rather than a human-readable semantic name.
|
|
477
|
+
*/
|
|
478
|
+
function isObfuscatedClass(cls: string): boolean {
|
|
378
479
|
if (cls.length > 80) return true;
|
|
379
480
|
if (/^_?[0-9a-f]{6,}$/i.test(cls)) return true;
|
|
380
481
|
if (/^[a-z]+_[0-9a-f]{4,}$/i.test(cls)) return true;
|
|
381
482
|
if (/^[a-z]{1,2}[0-9]{2,}$/i.test(cls)) return true;
|
|
483
|
+
|
|
382
484
|
const digits = (cls.match(/[0-9]/g) || []).length;
|
|
383
485
|
const letters = (cls.match(/[a-zA-Z]/g) || []).length;
|
|
384
486
|
if (cls.length >= 6 && digits >= letters * 0.5 && digits >= 2) return true;
|
|
487
|
+
|
|
385
488
|
return false;
|
|
386
489
|
}
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
const
|
|
390
|
-
|
|
490
|
+
|
|
491
|
+
function parseAttributes(rawAttrs: string): ParsedAttribute[] {
|
|
492
|
+
const attrs: ParsedAttribute[] = [];
|
|
493
|
+
const attrPattern =
|
|
494
|
+
/([^\s"'<>\/=]+)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+)))?/g;
|
|
495
|
+
|
|
496
|
+
let match: RegExpExecArray | null;
|
|
391
497
|
while ((match = attrPattern.exec(rawAttrs)) !== null) {
|
|
392
498
|
const name = match[1];
|
|
393
499
|
if (!name) continue;
|
|
394
500
|
attrs.push({
|
|
395
501
|
name,
|
|
396
|
-
rawToken: match[0]
|
|
397
|
-
value: match[2] ?? match[3] ?? match[4] ?? null
|
|
502
|
+
rawToken: match[0]!.trim(),
|
|
503
|
+
value: match[2] ?? match[3] ?? match[4] ?? null,
|
|
398
504
|
});
|
|
399
505
|
}
|
|
506
|
+
|
|
400
507
|
return attrs;
|
|
401
508
|
}
|
|
402
|
-
|
|
509
|
+
|
|
510
|
+
function isInteractiveElement(
|
|
511
|
+
tagName: string,
|
|
512
|
+
attrs: ParsedAttribute[],
|
|
513
|
+
): boolean {
|
|
403
514
|
if (INTERACTIVE_TAGS.has(tagName)) return true;
|
|
515
|
+
|
|
404
516
|
for (const attr of attrs) {
|
|
405
517
|
const name = attr.name.toLowerCase();
|
|
406
518
|
if (name === "tabindex" || name === "contenteditable") return true;
|
|
407
519
|
if (name !== "role") continue;
|
|
520
|
+
|
|
408
521
|
const role = attr.value?.trim().toLowerCase();
|
|
409
522
|
if (role && INTERACTIVE_ROLES.has(role)) {
|
|
410
523
|
return true;
|
|
411
524
|
}
|
|
412
525
|
}
|
|
526
|
+
|
|
413
527
|
return false;
|
|
414
528
|
}
|
|
415
|
-
|
|
529
|
+
|
|
530
|
+
function shouldKeepCustomDataAttribute(
|
|
531
|
+
tagName: string,
|
|
532
|
+
attrName: string,
|
|
533
|
+
value: string | null,
|
|
534
|
+
interactive: boolean,
|
|
535
|
+
): boolean {
|
|
416
536
|
if (!interactive) return false;
|
|
417
537
|
if (!attrName.startsWith("data-")) return false;
|
|
418
538
|
if (TEST_ATTRS.has(attrName)) return false;
|
|
419
539
|
if (!value?.trim()) return false;
|
|
420
540
|
if (value.length > 80) return false;
|
|
421
541
|
if (tagName === "script" || tagName === "style") return false;
|
|
542
|
+
|
|
422
543
|
const key = attrName.slice("data-".length);
|
|
423
544
|
if (!looksMeaningfulToken(key)) return false;
|
|
424
545
|
if (!looksMeaningfulDataValue(value)) return false;
|
|
546
|
+
|
|
425
547
|
return true;
|
|
426
548
|
}
|
|
427
|
-
|
|
549
|
+
|
|
550
|
+
function looksMeaningfulToken(value: string): boolean {
|
|
428
551
|
if (!/^[a-z][a-z0-9-]{1,40}$/i.test(value)) return false;
|
|
429
552
|
if (!/[a-z]{3}/i.test(value)) return false;
|
|
430
553
|
if (/(track|metric|telemetry|analytics|component|display|loaded|token|dps|color|screen|strict|rehydr|fetch)/i.test(value)) {
|
|
@@ -432,31 +555,36 @@ function looksMeaningfulToken(value) {
|
|
|
432
555
|
}
|
|
433
556
|
return true;
|
|
434
557
|
}
|
|
435
|
-
|
|
558
|
+
|
|
559
|
+
function looksMeaningfulDataValue(value: string): boolean {
|
|
436
560
|
if (value.length > 80) return false;
|
|
437
561
|
if (/[<>]/.test(value)) return false;
|
|
438
562
|
if (/https?:\/\//i.test(value)) return false;
|
|
439
563
|
return /^[a-z0-9:_./ -]+$/i.test(value);
|
|
440
564
|
}
|
|
441
|
-
|
|
565
|
+
|
|
566
|
+
function findAttributeToken(attrs: string, name: string): string | null {
|
|
442
567
|
const match = attrs.match(
|
|
443
568
|
new RegExp(
|
|
444
569
|
`(?:^|\\s)(${escapeRegExp(name)}(?:\\s*=\\s*(?:"[^"]*"|'[^']*'|[^\\s"'=<>\\x60]+))?)`,
|
|
445
|
-
"i"
|
|
446
|
-
)
|
|
570
|
+
"i",
|
|
571
|
+
),
|
|
447
572
|
);
|
|
448
573
|
return match?.[1] ?? null;
|
|
449
574
|
}
|
|
450
|
-
|
|
575
|
+
|
|
576
|
+
function escapeRegExp(value: string): string {
|
|
451
577
|
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
452
578
|
}
|
|
453
|
-
|
|
579
|
+
|
|
580
|
+
function serializeAttribute(name: string, value: string): string {
|
|
454
581
|
return `${name}="${escapeHtmlAttribute(value)}"`;
|
|
455
582
|
}
|
|
456
|
-
|
|
457
|
-
|
|
583
|
+
|
|
584
|
+
function escapeHtmlAttribute(value: string): string {
|
|
585
|
+
return value
|
|
586
|
+
.replace(/&/g, "&")
|
|
587
|
+
.replace(/"/g, """)
|
|
588
|
+
.replace(/</g, "<")
|
|
589
|
+
.replace(/>/g, ">");
|
|
458
590
|
}
|
|
459
|
-
// Annotate the CommonJS export names for ESM import in node:
|
|
460
|
-
0 && (module.exports = {
|
|
461
|
-
condenseDom
|
|
462
|
-
});
|