libretto 0.4.4 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +106 -36
- package/dist/cli/cli.js +39 -113
- package/dist/cli/commands/ai.js +1 -1
- package/dist/cli/commands/browser.js +87 -60
- package/dist/cli/commands/execution.js +201 -88
- package/dist/cli/commands/init.js +30 -8
- package/dist/cli/commands/logs.js +5 -6
- package/dist/cli/commands/shared.js +30 -29
- package/dist/cli/commands/snapshot.js +26 -39
- package/dist/cli/core/ai-config.js +9 -2
- package/dist/cli/core/api-snapshot-analyzer.js +15 -5
- package/dist/cli/core/browser.js +141 -33
- package/dist/cli/core/context.js +7 -18
- package/dist/cli/core/session-telemetry.js +5 -2
- package/dist/cli/core/session.js +23 -10
- package/dist/cli/core/snapshot-analyzer.js +16 -33
- package/dist/cli/core/snapshot-api-config.js +2 -6
- package/dist/cli/core/telemetry.js +10 -2
- package/dist/cli/framework/simple-cli.js +45 -25
- package/dist/cli/router.js +14 -21
- package/dist/cli/workers/run-integration-runtime.js +26 -7
- package/dist/cli/workers/run-integration-worker-protocol.js +3 -1
- package/dist/cli/workers/run-integration-worker.js +1 -4
- package/dist/index.d.ts +1 -2
- package/dist/index.js +7 -10
- package/dist/runtime/download/download.js +5 -1
- package/dist/runtime/extract/extract.js +11 -2
- package/dist/runtime/network/network.js +8 -1
- package/dist/runtime/recovery/agent.js +6 -2
- package/dist/runtime/recovery/errors.js +3 -1
- package/dist/runtime/recovery/recovery.js +3 -1
- package/dist/shared/condense-dom/condense-dom.js +6 -13
- package/dist/shared/config/config.d.ts +1 -9
- package/dist/shared/config/config.js +0 -18
- package/dist/shared/config/index.d.ts +2 -1
- package/dist/shared/config/index.js +0 -10
- package/dist/shared/debug/pause.js +9 -3
- package/dist/shared/instrumentation/instrument.js +101 -5
- package/dist/shared/llm/ai-sdk-adapter.js +3 -1
- package/dist/shared/llm/client.js +3 -1
- package/dist/shared/logger/index.js +4 -1
- package/dist/shared/paths/paths.js +2 -1
- package/dist/shared/paths/repo-root.d.ts +3 -0
- package/dist/shared/paths/repo-root.js +24 -0
- package/dist/shared/run/api.js +3 -1
- package/dist/shared/run/browser.js +7 -2
- package/dist/shared/state/session-state.d.ts +2 -1
- package/dist/shared/state/session-state.js +5 -2
- package/dist/shared/visualization/ghost-cursor.js +19 -10
- package/dist/shared/visualization/highlight.js +9 -6
- package/dist/shared/workflow/workflow.d.ts +4 -5
- package/dist/shared/workflow/workflow.js +3 -5
- package/package.json +11 -8
- package/scripts/check-skills-sync.mjs +25 -0
- package/scripts/compare-eval-summary.mjs +47 -0
- package/scripts/postinstall.mjs +26 -17
- package/scripts/prepare-release.sh +97 -0
- package/scripts/skills-libretto.mjs +103 -0
- package/scripts/summarize-evals.mjs +135 -0
- package/scripts/sync-skills.mjs +12 -0
- package/skills/libretto/SKILL.md +130 -377
- package/skills/libretto/references/auth-profiles.md +30 -0
- package/skills/libretto/{code-generation-rules.md → references/code-generation-rules.md} +27 -42
- package/skills/libretto/references/configuration-file-reference.md +53 -0
- package/skills/libretto/references/pages-and-page-targeting.md +29 -0
- package/skills/libretto/references/site-security-review.md +143 -0
- package/src/cli/cli.ts +86 -0
- package/src/cli/commands/ai.ts +35 -0
- package/src/cli/commands/browser.ts +189 -0
- package/src/cli/commands/execution.ts +822 -0
- package/src/cli/commands/init.ts +350 -0
- package/src/cli/commands/logs.ts +128 -0
- package/src/cli/commands/shared.ts +69 -0
- package/src/cli/commands/snapshot.ts +312 -0
- package/src/cli/core/ai-config.ts +264 -0
- package/src/cli/core/api-snapshot-analyzer.ts +108 -0
- package/src/cli/core/browser.ts +976 -0
- package/src/cli/core/context.ts +127 -0
- package/src/cli/core/pause-signals.ts +35 -0
- package/src/cli/core/session-telemetry.ts +564 -0
- package/src/cli/core/session.ts +223 -0
- package/src/cli/core/snapshot-analyzer.ts +855 -0
- package/src/cli/core/snapshot-api-config.ts +231 -0
- package/src/cli/core/telemetry.ts +459 -0
- package/src/cli/framework/simple-cli.ts +1340 -0
- package/src/cli/index.ts +13 -0
- package/src/cli/router.ts +20 -0
- package/src/cli/workers/run-integration-runtime.ts +338 -0
- package/src/cli/workers/run-integration-worker-protocol.ts +16 -0
- package/src/cli/workers/run-integration-worker.ts +72 -0
- package/src/index.ts +127 -0
- package/src/runtime/download/download.ts +104 -0
- package/src/runtime/download/index.ts +7 -0
- package/src/runtime/extract/extract.ts +102 -0
- package/src/runtime/extract/index.ts +1 -0
- package/src/runtime/network/index.ts +5 -0
- package/src/runtime/network/network.ts +119 -0
- package/{dist/runtime/recovery/agent.cjs → src/runtime/recovery/agent.ts} +114 -76
- package/src/runtime/recovery/errors.ts +155 -0
- package/src/runtime/recovery/index.ts +7 -0
- package/src/runtime/recovery/recovery.ts +53 -0
- package/{dist/shared/condense-dom/condense-dom.cjs → src/shared/condense-dom/condense-dom.ts} +249 -124
- package/src/shared/config/config.ts +3 -0
- package/src/shared/config/index.ts +0 -0
- package/src/shared/debug/index.ts +1 -0
- package/src/shared/debug/pause.ts +91 -0
- package/src/shared/instrumentation/errors.ts +84 -0
- package/src/shared/instrumentation/index.ts +9 -0
- package/src/shared/instrumentation/instrument.ts +406 -0
- package/src/shared/llm/ai-sdk-adapter.ts +81 -0
- package/{dist/shared/llm/client.cjs → src/shared/llm/client.ts} +86 -80
- package/src/shared/llm/index.ts +3 -0
- package/src/shared/llm/types.ts +63 -0
- package/src/shared/logger/index.ts +13 -0
- package/src/shared/logger/logger.ts +358 -0
- package/src/shared/logger/sinks.ts +148 -0
- package/src/shared/paths/paths.ts +110 -0
- package/src/shared/paths/repo-root.ts +27 -0
- package/src/shared/run/api.ts +6 -0
- package/src/shared/run/browser.ts +107 -0
- package/src/shared/state/index.ts +11 -0
- package/src/shared/state/session-state.ts +77 -0
- package/src/shared/visualization/ghost-cursor.ts +213 -0
- package/src/shared/visualization/highlight.ts +149 -0
- package/src/shared/visualization/index.ts +18 -0
- package/src/shared/workflow/workflow.ts +36 -0
- package/dist/index.cjs +0 -144
- package/dist/index.d.cts +0 -21
- package/dist/runtime/download/download.cjs +0 -70
- package/dist/runtime/download/download.d.cts +0 -35
- package/dist/runtime/download/index.cjs +0 -30
- package/dist/runtime/download/index.d.cts +0 -3
- package/dist/runtime/extract/extract.cjs +0 -88
- package/dist/runtime/extract/extract.d.cts +0 -23
- package/dist/runtime/extract/index.cjs +0 -28
- package/dist/runtime/extract/index.d.cts +0 -5
- package/dist/runtime/network/index.cjs +0 -28
- package/dist/runtime/network/index.d.cts +0 -4
- package/dist/runtime/network/network.cjs +0 -91
- package/dist/runtime/network/network.d.cts +0 -28
- package/dist/runtime/recovery/agent.d.cts +0 -13
- package/dist/runtime/recovery/errors.cjs +0 -124
- package/dist/runtime/recovery/errors.d.cts +0 -31
- package/dist/runtime/recovery/index.cjs +0 -34
- package/dist/runtime/recovery/index.d.cts +0 -7
- package/dist/runtime/recovery/recovery.cjs +0 -55
- package/dist/runtime/recovery/recovery.d.cts +0 -12
- package/dist/shared/condense-dom/condense-dom.d.cts +0 -34
- package/dist/shared/config/config.cjs +0 -44
- package/dist/shared/config/config.d.cts +0 -10
- package/dist/shared/config/index.cjs +0 -32
- package/dist/shared/config/index.d.cts +0 -1
- package/dist/shared/debug/index.cjs +0 -28
- package/dist/shared/debug/index.d.cts +0 -1
- package/dist/shared/debug/pause.cjs +0 -86
- package/dist/shared/debug/pause.d.cts +0 -12
- package/dist/shared/instrumentation/errors.cjs +0 -81
- package/dist/shared/instrumentation/errors.d.cts +0 -12
- package/dist/shared/instrumentation/index.cjs +0 -35
- package/dist/shared/instrumentation/index.d.cts +0 -6
- package/dist/shared/instrumentation/instrument.cjs +0 -206
- package/dist/shared/instrumentation/instrument.d.cts +0 -32
- package/dist/shared/llm/ai-sdk-adapter.cjs +0 -71
- package/dist/shared/llm/ai-sdk-adapter.d.cts +0 -22
- package/dist/shared/llm/client.d.cts +0 -13
- package/dist/shared/llm/index.cjs +0 -31
- package/dist/shared/llm/index.d.cts +0 -5
- package/dist/shared/llm/types.cjs +0 -16
- package/dist/shared/llm/types.d.cts +0 -67
- package/dist/shared/logger/index.cjs +0 -37
- package/dist/shared/logger/index.d.cts +0 -2
- package/dist/shared/logger/logger.cjs +0 -232
- package/dist/shared/logger/logger.d.cts +0 -86
- package/dist/shared/logger/sinks.cjs +0 -160
- package/dist/shared/logger/sinks.d.cts +0 -9
- package/dist/shared/paths/paths.cjs +0 -104
- package/dist/shared/paths/paths.d.cts +0 -10
- package/dist/shared/run/api.cjs +0 -28
- package/dist/shared/run/api.d.cts +0 -2
- package/dist/shared/run/browser.cjs +0 -98
- package/dist/shared/run/browser.d.cts +0 -22
- package/dist/shared/state/index.cjs +0 -38
- package/dist/shared/state/index.d.cts +0 -2
- package/dist/shared/state/session-state.cjs +0 -92
- package/dist/shared/state/session-state.d.cts +0 -40
- package/dist/shared/visualization/ghost-cursor.cjs +0 -174
- package/dist/shared/visualization/ghost-cursor.d.cts +0 -37
- package/dist/shared/visualization/highlight.cjs +0 -134
- package/dist/shared/visualization/highlight.d.cts +0 -22
- package/dist/shared/visualization/index.cjs +0 -45
- package/dist/shared/visualization/index.d.cts +0 -3
- package/dist/shared/workflow/workflow.cjs +0 -47
- package/dist/shared/workflow/workflow.d.cts +0 -21
- package/skills/libretto/integration-approach-selection.md +0 -174
package/{dist/shared/condense-dom/condense-dom.cjs → src/shared/condense-dom/condense-dom.ts}
RENAMED
|
@@ -1,28 +1,44 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
/**
|
|
2
|
+
* DOM condensation — reduces serialized HTML for LLM consumption.
|
|
3
|
+
*
|
|
4
|
+
* All rules run unconditionally (no tiers). The function operates on
|
|
5
|
+
* already-serialized HTML strings (the output of `page.content()`),
|
|
6
|
+
* not a browser-side DOM walk or parsed DOM tree.
|
|
7
|
+
*
|
|
8
|
+
* Rules applied in order:
|
|
9
|
+
* 1. Noscript blocks — remove entirely
|
|
10
|
+
* 2. HTML comments — remove entirely
|
|
11
|
+
* 3. Script contents — hollow out, keep tags + useful attributes
|
|
12
|
+
* 4. Style contents — hollow out, keep tags + useful attributes
|
|
13
|
+
* 5. Embedded binary data — replace base64 data URIs
|
|
14
|
+
* 6. Attribute allowlist — keep trusted attrs, special-case class/style/URLs
|
|
15
|
+
* 7. SVG elements — collapse to single tag, extract title/desc
|
|
16
|
+
* 8. Inline style properties — keep only layout-relevant props
|
|
17
|
+
* 9. Non-semantic class names — filter or delete class values
|
|
18
|
+
* 10. (Cross-reference IDs — preserved, no action needed)
|
|
19
|
+
* 11. Framework-internal and SVG visual attributes — remove
|
|
20
|
+
* 12. Whitespace — collapse (preserve <pre> content)
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
export type CondenseDomResult = {
|
|
24
|
+
/** The condensed HTML string. Valid, parseable HTML. */
|
|
25
|
+
html: string;
|
|
26
|
+
/** Character count of the input. */
|
|
27
|
+
originalLength: number;
|
|
28
|
+
/** Character count of the output. */
|
|
29
|
+
condensedLength: number;
|
|
30
|
+
/** Characters removed, keyed by rule name. */
|
|
31
|
+
reductions: Record<string, number>;
|
|
9
32
|
};
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
}
|
|
16
|
-
return to;
|
|
33
|
+
|
|
34
|
+
type ParsedAttribute = {
|
|
35
|
+
name: string;
|
|
36
|
+
rawToken: string;
|
|
37
|
+
value: string | null;
|
|
17
38
|
};
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
condenseDom: () => condenseDom
|
|
22
|
-
});
|
|
23
|
-
module.exports = __toCommonJS(condense_dom_exports);
|
|
24
|
-
const TEST_ATTRS = /* @__PURE__ */ new Set(["data-testid", "data-test", "data-qa", "data-cy"]);
|
|
25
|
-
const TRUSTED_ATTRS = /* @__PURE__ */ new Set([
|
|
39
|
+
|
|
40
|
+
const TEST_ATTRS = new Set(["data-testid", "data-test", "data-qa", "data-cy"]);
|
|
41
|
+
const TRUSTED_ATTRS = new Set([
|
|
26
42
|
"id",
|
|
27
43
|
"name",
|
|
28
44
|
"for",
|
|
@@ -38,9 +54,9 @@ const TRUSTED_ATTRS = /* @__PURE__ */ new Set([
|
|
|
38
54
|
"href",
|
|
39
55
|
"action",
|
|
40
56
|
"method",
|
|
41
|
-
"src"
|
|
57
|
+
"src",
|
|
42
58
|
]);
|
|
43
|
-
const STATE_ATTRS =
|
|
59
|
+
const STATE_ATTRS = new Set([
|
|
44
60
|
"disabled",
|
|
45
61
|
"hidden",
|
|
46
62
|
"inert",
|
|
@@ -49,15 +65,10 @@ const STATE_ATTRS = /* @__PURE__ */ new Set([
|
|
|
49
65
|
"checked",
|
|
50
66
|
"selected",
|
|
51
67
|
"open",
|
|
52
|
-
"multiple"
|
|
53
|
-
]);
|
|
54
|
-
const BOOLEAN_ATTRS = /* @__PURE__ */ new Set([
|
|
55
|
-
...STATE_ATTRS,
|
|
56
|
-
"async",
|
|
57
|
-
"defer",
|
|
58
|
-
"nomodule"
|
|
68
|
+
"multiple",
|
|
59
69
|
]);
|
|
60
|
-
const
|
|
70
|
+
const BOOLEAN_ATTRS = new Set([...STATE_ATTRS, "async", "defer", "nomodule"]);
|
|
71
|
+
const EMPTY_VALUE_DROP_ATTRS = new Set([
|
|
61
72
|
"alt",
|
|
62
73
|
"autocomplete",
|
|
63
74
|
"href",
|
|
@@ -68,10 +79,10 @@ const EMPTY_VALUE_DROP_ATTRS = /* @__PURE__ */ new Set([
|
|
|
68
79
|
"src",
|
|
69
80
|
"tabindex",
|
|
70
81
|
"title",
|
|
71
|
-
"type"
|
|
82
|
+
"type",
|
|
72
83
|
]);
|
|
73
|
-
const URL_ATTRS =
|
|
74
|
-
const SCRIPT_ATTRS =
|
|
84
|
+
const URL_ATTRS = new Set(["href", "src", "action"]);
|
|
85
|
+
const SCRIPT_ATTRS = new Set([
|
|
75
86
|
"src",
|
|
76
87
|
"type",
|
|
77
88
|
"id",
|
|
@@ -80,10 +91,10 @@ const SCRIPT_ATTRS = /* @__PURE__ */ new Set([
|
|
|
80
91
|
"crossorigin",
|
|
81
92
|
"integrity",
|
|
82
93
|
"nomodule",
|
|
83
|
-
"referrerpolicy"
|
|
94
|
+
"referrerpolicy",
|
|
84
95
|
]);
|
|
85
|
-
const STYLE_TAG_ATTRS =
|
|
86
|
-
const INTERACTIVE_TAGS =
|
|
96
|
+
const STYLE_TAG_ATTRS = new Set(["media", "type", "nonce", "title"]);
|
|
97
|
+
const INTERACTIVE_TAGS = new Set([
|
|
87
98
|
"a",
|
|
88
99
|
"button",
|
|
89
100
|
"input",
|
|
@@ -92,9 +103,9 @@ const INTERACTIVE_TAGS = /* @__PURE__ */ new Set([
|
|
|
92
103
|
"form",
|
|
93
104
|
"details",
|
|
94
105
|
"dialog",
|
|
95
|
-
"label"
|
|
106
|
+
"label",
|
|
96
107
|
]);
|
|
97
|
-
const INTERACTIVE_ROLES =
|
|
108
|
+
const INTERACTIVE_ROLES = new Set([
|
|
98
109
|
"button",
|
|
99
110
|
"link",
|
|
100
111
|
"tab",
|
|
@@ -103,81 +114,103 @@ const INTERACTIVE_ROLES = /* @__PURE__ */ new Set([
|
|
|
103
114
|
"radio",
|
|
104
115
|
"switch",
|
|
105
116
|
"slider",
|
|
106
|
-
"combobox"
|
|
117
|
+
"combobox",
|
|
107
118
|
]);
|
|
108
|
-
const OPEN_TAG_PATTERN =
|
|
109
|
-
|
|
119
|
+
const OPEN_TAG_PATTERN =
|
|
120
|
+
/<([a-zA-Z][\w:-]*)(\s(?:[^"'<>/]|"[^"]*"|'[^']*')*)?\s*(\/?)>/g;
|
|
121
|
+
|
|
122
|
+
export function condenseDom(html: string): CondenseDomResult {
|
|
110
123
|
const originalLength = html.length;
|
|
111
|
-
const reductions = {};
|
|
112
|
-
|
|
124
|
+
const reductions: Record<string, number> = {};
|
|
125
|
+
|
|
126
|
+
function track(label: string, before: string, after: string): string {
|
|
113
127
|
const diff = before.length - after.length;
|
|
114
128
|
if (diff > 0) {
|
|
115
129
|
reductions[label] = (reductions[label] ?? 0) + diff;
|
|
116
130
|
}
|
|
117
131
|
return after;
|
|
118
132
|
}
|
|
133
|
+
|
|
119
134
|
let result = html;
|
|
135
|
+
|
|
136
|
+
// ── Rule 1: Noscript blocks ──────────────────────────────────────────
|
|
120
137
|
result = track(
|
|
121
138
|
"noscript",
|
|
122
139
|
result,
|
|
123
|
-
result.replace(/<noscript\b[^>]*>[\s\S]*?<\/noscript>/gi, "")
|
|
140
|
+
result.replace(/<noscript\b[^>]*>[\s\S]*?<\/noscript>/gi, ""),
|
|
124
141
|
);
|
|
142
|
+
|
|
143
|
+
// ── Rule 2: HTML comments ────────────────────────────────────────────
|
|
125
144
|
result = track(
|
|
126
145
|
"comments",
|
|
127
146
|
result,
|
|
128
|
-
result.replace(/<!--[\s\S]*?(?:-->|$)/g, "")
|
|
147
|
+
result.replace(/<!--[\s\S]*?(?:-->|$)/g, ""),
|
|
129
148
|
);
|
|
149
|
+
|
|
150
|
+
// ── Rule 3: Script contents ──────────────────────────────────────────
|
|
130
151
|
result = track(
|
|
131
152
|
"scripts",
|
|
132
153
|
result,
|
|
133
154
|
result.replace(
|
|
134
155
|
/(<script\b[^>]*>)([\s\S]*?)(<\/script(?:\s[^>]*)?>)/gi,
|
|
135
|
-
(_match, open, content, close) => {
|
|
156
|
+
(_match, open: string, content: string, close: string) => {
|
|
136
157
|
if (!content.trim()) return `${open}${close}`;
|
|
137
|
-
const isDataScript =
|
|
158
|
+
const isDataScript =
|
|
159
|
+
/type\s*=\s*["']application\/(json|ld\+json)["']/i.test(open);
|
|
138
160
|
if (isDataScript) {
|
|
139
161
|
return `${open}[JSON data, ${content.length} chars]${close}`;
|
|
140
162
|
}
|
|
141
163
|
return `${open}[script, ${content.length} chars]${close}`;
|
|
142
|
-
}
|
|
143
|
-
)
|
|
164
|
+
},
|
|
165
|
+
),
|
|
144
166
|
);
|
|
167
|
+
|
|
168
|
+
// ── Rule 4: Style contents ───────────────────────────────────────────
|
|
145
169
|
result = track(
|
|
146
170
|
"styles",
|
|
147
171
|
result,
|
|
148
172
|
result.replace(
|
|
149
173
|
/(<style\b[^>]*>)([\s\S]*?)(<\/style(?:\s[^>]*)?>)/gi,
|
|
150
|
-
(_match, open, content, close) => {
|
|
174
|
+
(_match, open: string, content: string, close: string) => {
|
|
151
175
|
if (!content.trim()) return `${open}${close}`;
|
|
152
176
|
return `${open}[CSS, ${content.length} chars]${close}`;
|
|
153
|
-
}
|
|
154
|
-
)
|
|
177
|
+
},
|
|
178
|
+
),
|
|
155
179
|
);
|
|
180
|
+
|
|
181
|
+
// ── Rule 5: Embedded binary data ─────────────────────────────────────
|
|
156
182
|
result = track(
|
|
157
183
|
"base64",
|
|
158
184
|
result,
|
|
159
185
|
result.replace(
|
|
160
186
|
/(src|href)\s*=\s*["'](data:[^;]+;base64,)[A-Za-z0-9+/=]{100,}["']/gi,
|
|
161
|
-
(_match, attr, prefix) => {
|
|
187
|
+
(_match, attr: string, prefix: string) => {
|
|
162
188
|
const mime = prefix.replace("data:", "").replace(";base64,", "");
|
|
163
189
|
return `${attr}="[base64 ${mime}]"`;
|
|
164
|
-
}
|
|
165
|
-
)
|
|
190
|
+
},
|
|
191
|
+
),
|
|
166
192
|
);
|
|
193
|
+
|
|
194
|
+
// ── Rule 6: Attribute allowlist ──────────────────────────────────────
|
|
167
195
|
result = track("attribute-allowlist", result, rewriteTagAttributes(result));
|
|
196
|
+
|
|
197
|
+
// ── Rule 7: SVG elements ─────────────────────────────────────────────
|
|
198
|
+
// Collapse each <svg> to a single tag, preserving key attributes.
|
|
199
|
+
// Extract <title>/<desc> text as aria-label if none exists.
|
|
200
|
+
// Iterate from innermost to outermost to handle nested SVGs correctly.
|
|
168
201
|
const svgPattern = /<svg\b([^>]*)>((?:(?!<svg\b)[\s\S])*?)<\/svg>/gi;
|
|
169
202
|
result = track(
|
|
170
203
|
"svg-collapse",
|
|
171
204
|
result,
|
|
172
205
|
(() => {
|
|
173
|
-
let prev;
|
|
206
|
+
let prev: string;
|
|
174
207
|
let current = result;
|
|
175
208
|
do {
|
|
176
209
|
prev = current;
|
|
177
210
|
current = current.replace(
|
|
178
211
|
svgPattern,
|
|
179
|
-
(_match, attrs, inner) => {
|
|
180
|
-
const keepAttrs = [];
|
|
212
|
+
(_match, attrs: string, inner: string) => {
|
|
213
|
+
const keepAttrs: string[] = [];
|
|
181
214
|
const attrPatterns = [
|
|
182
215
|
"id",
|
|
183
216
|
"class",
|
|
@@ -185,278 +218,370 @@ function condenseDom(html) {
|
|
|
185
218
|
"aria-label",
|
|
186
219
|
"aria-hidden",
|
|
187
220
|
"title",
|
|
188
|
-
"data-testid"
|
|
221
|
+
"data-testid",
|
|
189
222
|
];
|
|
190
223
|
for (const name of attrPatterns) {
|
|
191
224
|
const attrToken = findAttributeToken(attrs, name);
|
|
192
225
|
if (attrToken) keepAttrs.push(attrToken);
|
|
193
226
|
}
|
|
227
|
+
|
|
194
228
|
const hasAriaLabel = /aria-label\s*=/i.test(attrs);
|
|
195
229
|
if (!hasAriaLabel) {
|
|
196
|
-
const titleMatch = inner.match(
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
/<desc[^>]*>([^<]+)<\/desc>/i
|
|
201
|
-
);
|
|
202
|
-
const labelText = titleMatch?.[1]?.trim() || descMatch?.[1]?.trim();
|
|
230
|
+
const titleMatch = inner.match(/<title[^>]*>([^<]+)<\/title>/i);
|
|
231
|
+
const descMatch = inner.match(/<desc[^>]*>([^<]+)<\/desc>/i);
|
|
232
|
+
const labelText =
|
|
233
|
+
titleMatch?.[1]?.trim() || descMatch?.[1]?.trim();
|
|
203
234
|
if (labelText) {
|
|
204
235
|
keepAttrs.push(
|
|
205
|
-
`aria-label="${escapeHtmlAttribute(labelText)}"
|
|
236
|
+
`aria-label="${escapeHtmlAttribute(labelText)}"`,
|
|
206
237
|
);
|
|
207
238
|
}
|
|
208
239
|
}
|
|
209
|
-
|
|
240
|
+
|
|
241
|
+
const attrStr =
|
|
242
|
+
keepAttrs.length > 0 ? ` ${keepAttrs.join(" ")}` : "";
|
|
210
243
|
return `<svg${attrStr}><!-- [icon] --></svg>`;
|
|
211
|
-
}
|
|
244
|
+
},
|
|
212
245
|
);
|
|
213
246
|
svgPattern.lastIndex = 0;
|
|
214
247
|
} while (current !== prev);
|
|
215
248
|
return current;
|
|
216
|
-
})()
|
|
249
|
+
})(),
|
|
217
250
|
);
|
|
218
|
-
|
|
251
|
+
|
|
252
|
+
// ── Rule 8: Inline style properties ──────────────────────────────────
|
|
253
|
+
// Keep only layout-relevant properties.
|
|
254
|
+
const layoutProps =
|
|
255
|
+
/(?:^|;)\s*(?:display|visibility|opacity|pointer-events|position|z-index|overflow)(?:-[a-z]+)?\s*:[^;"]*/gi;
|
|
256
|
+
|
|
219
257
|
result = track(
|
|
220
258
|
"inline-styles",
|
|
221
259
|
result,
|
|
222
260
|
result.replace(
|
|
223
261
|
/\sstyle\s*=\s*["']([^"']*)["']/gi,
|
|
224
|
-
(_match, value) => {
|
|
225
|
-
const kept = [];
|
|
226
|
-
let propMatch;
|
|
262
|
+
(_match, value: string) => {
|
|
263
|
+
const kept: string[] = [];
|
|
264
|
+
let propMatch: RegExpExecArray | null;
|
|
227
265
|
layoutProps.lastIndex = 0;
|
|
228
266
|
while ((propMatch = layoutProps.exec(value)) !== null) {
|
|
229
267
|
kept.push(propMatch[0].replace(/^[;\s]+/, "").trim());
|
|
230
268
|
}
|
|
231
269
|
if (kept.length === 0) return "";
|
|
232
270
|
return ` style="${kept.join("; ")}"`;
|
|
233
|
-
}
|
|
234
|
-
)
|
|
271
|
+
},
|
|
272
|
+
),
|
|
235
273
|
);
|
|
274
|
+
|
|
275
|
+
// ── Rule 9: Non-semantic class names ─────────────────────────────────
|
|
236
276
|
result = track(
|
|
237
277
|
"obfuscated-classes",
|
|
238
278
|
result,
|
|
239
279
|
result.replace(
|
|
240
280
|
/\sclass\s*=\s*["']([^"']*)["']/gi,
|
|
241
|
-
(_match, value) => {
|
|
281
|
+
(_match, value: string) => {
|
|
242
282
|
const filtered = filterSemanticClasses(value);
|
|
243
283
|
if (!filtered) return "";
|
|
244
284
|
return ` class="${filtered}"`;
|
|
245
|
-
}
|
|
246
|
-
)
|
|
285
|
+
},
|
|
286
|
+
),
|
|
247
287
|
);
|
|
248
|
-
|
|
288
|
+
|
|
289
|
+
// ── Rule 10: Cross-reference IDs — no action, preserved by default ──
|
|
290
|
+
|
|
291
|
+
// ── Rule 11: Framework-internal and SVG visual attributes ────────────
|
|
292
|
+
const removableAttrs =
|
|
293
|
+
/\s(?:xmlns(?::[a-z]+)?|xml:space|xml:lang|fill|stroke|stroke-width|stroke-linecap|stroke-linejoin|stroke-miterlimit|stroke-dasharray|stroke-dashoffset|stroke-opacity|fill-opacity|clip-rule|fill-rule|focusable)\s*=\s*["'][^"']*["']/gi;
|
|
249
294
|
result = track(
|
|
250
295
|
"framework-svg-attrs",
|
|
251
296
|
result,
|
|
252
|
-
result.replace(removableAttrs, "")
|
|
297
|
+
result.replace(removableAttrs, ""),
|
|
253
298
|
);
|
|
254
|
-
|
|
299
|
+
|
|
300
|
+
// ── Rule 12: Whitespace ──────────────────────────────────────────────
|
|
301
|
+
// Collapse runs of spaces/tabs to a single space, multiple blank lines
|
|
302
|
+
// to a single newline. Preserve <pre> content.
|
|
303
|
+
const preBlocks: string[] = [];
|
|
255
304
|
result = result.replace(
|
|
256
305
|
/(<pre\b[^>]*>)([\s\S]*?)(<\/pre>)/gi,
|
|
257
|
-
(_match, open, content, close) => {
|
|
306
|
+
(_match, open: string, content: string, close: string) => {
|
|
258
307
|
const idx = preBlocks.length;
|
|
259
308
|
preBlocks.push(`${open}${content}${close}`);
|
|
260
309
|
return `__PRE_PLACEHOLDER_${idx}__`;
|
|
261
|
-
}
|
|
310
|
+
},
|
|
262
311
|
);
|
|
312
|
+
|
|
263
313
|
result = track(
|
|
264
314
|
"whitespace",
|
|
265
315
|
result,
|
|
266
|
-
result.replace(/[ \t]+/g, " ").replace(/\n\s*\n/g, "\n")
|
|
316
|
+
result.replace(/[ \t]+/g, " ").replace(/\n\s*\n/g, "\n"),
|
|
267
317
|
);
|
|
318
|
+
|
|
268
319
|
for (let i = 0; i < preBlocks.length; i++) {
|
|
269
320
|
const placeholder = `__PRE_PLACEHOLDER_${i}__`;
|
|
270
|
-
const preBlock = preBlocks[i]
|
|
321
|
+
const preBlock = preBlocks[i]!;
|
|
271
322
|
result = result.replace(placeholder, () => preBlock);
|
|
272
323
|
}
|
|
324
|
+
|
|
273
325
|
return {
|
|
274
326
|
html: result,
|
|
275
327
|
originalLength,
|
|
276
328
|
condensedLength: result.length,
|
|
277
|
-
reductions
|
|
329
|
+
reductions,
|
|
278
330
|
};
|
|
279
331
|
}
|
|
280
|
-
|
|
332
|
+
|
|
333
|
+
function rewriteTagAttributes(html: string): string {
|
|
281
334
|
return html.replace(
|
|
282
335
|
OPEN_TAG_PATTERN,
|
|
283
|
-
(
|
|
336
|
+
(
|
|
337
|
+
match,
|
|
338
|
+
rawTagName: string,
|
|
339
|
+
rawAttrs: string | undefined,
|
|
340
|
+
selfClosing: string,
|
|
341
|
+
) => {
|
|
284
342
|
const tagName = rawTagName.toLowerCase();
|
|
285
343
|
if (!rawAttrs?.trim()) return match;
|
|
344
|
+
|
|
286
345
|
const attrs = parseAttributes(rawAttrs);
|
|
287
346
|
if (attrs.length === 0) return match;
|
|
347
|
+
|
|
288
348
|
const interactive = isInteractiveElement(tagName, attrs);
|
|
289
|
-
const kept = attrs
|
|
349
|
+
const kept = attrs
|
|
350
|
+
.map((attr) => keepAttribute(tagName, attr, interactive))
|
|
351
|
+
.filter((value): value is string => value !== null);
|
|
352
|
+
|
|
290
353
|
const attrStr = kept.length > 0 ? ` ${kept.join(" ")}` : "";
|
|
291
354
|
const closing = selfClosing ? " /" : "";
|
|
292
355
|
return `<${rawTagName}${attrStr}${closing}>`;
|
|
293
|
-
}
|
|
356
|
+
},
|
|
294
357
|
);
|
|
295
358
|
}
|
|
296
|
-
|
|
359
|
+
|
|
360
|
+
function keepAttribute(
|
|
361
|
+
tagName: string,
|
|
362
|
+
attr: ParsedAttribute,
|
|
363
|
+
interactive: boolean,
|
|
364
|
+
): string | null {
|
|
297
365
|
const name = attr.name.toLowerCase();
|
|
298
366
|
const value = attr.value;
|
|
367
|
+
|
|
299
368
|
if (name === "class") {
|
|
300
369
|
if (!value?.trim()) return null;
|
|
301
370
|
const filtered = filterSemanticClasses(value);
|
|
302
371
|
if (!filtered) return null;
|
|
303
372
|
return serializeAttribute(attr.name, filtered);
|
|
304
373
|
}
|
|
374
|
+
|
|
305
375
|
if (name === "style") {
|
|
306
376
|
if (!value?.trim()) return null;
|
|
307
377
|
return serializeAttribute(attr.name, value);
|
|
308
378
|
}
|
|
379
|
+
|
|
309
380
|
if (name.startsWith("aria-")) {
|
|
310
381
|
if (!value?.trim()) return null;
|
|
311
382
|
return attr.rawToken;
|
|
312
383
|
}
|
|
384
|
+
|
|
313
385
|
if (TEST_ATTRS.has(name)) {
|
|
314
386
|
if (!value?.trim()) return null;
|
|
315
387
|
return attr.rawToken;
|
|
316
388
|
}
|
|
389
|
+
|
|
317
390
|
if (tagName === "script" && SCRIPT_ATTRS.has(name)) {
|
|
318
391
|
return serializePreservedAttribute(attr);
|
|
319
392
|
}
|
|
393
|
+
|
|
320
394
|
if (tagName === "style" && STYLE_TAG_ATTRS.has(name)) {
|
|
321
395
|
if (!value?.trim()) return null;
|
|
322
396
|
return attr.rawToken;
|
|
323
397
|
}
|
|
398
|
+
|
|
324
399
|
if (STATE_ATTRS.has(name)) {
|
|
325
400
|
return serializePreservedAttribute(attr);
|
|
326
401
|
}
|
|
402
|
+
|
|
327
403
|
if (URL_ATTRS.has(name)) {
|
|
328
404
|
if (!value?.trim()) return null;
|
|
329
405
|
const normalized = normalizeUrlValue(value);
|
|
330
406
|
if (normalized === value) return attr.rawToken;
|
|
331
407
|
return serializeAttribute(attr.name, normalized);
|
|
332
408
|
}
|
|
409
|
+
|
|
333
410
|
if (TRUSTED_ATTRS.has(name)) {
|
|
334
411
|
if (shouldDropEmptyValue(name, value)) return null;
|
|
335
412
|
return serializePreservedAttribute(attr);
|
|
336
413
|
}
|
|
414
|
+
|
|
337
415
|
if (shouldKeepCustomDataAttribute(tagName, name, value, interactive)) {
|
|
338
416
|
return attr.rawToken;
|
|
339
417
|
}
|
|
418
|
+
|
|
340
419
|
return null;
|
|
341
420
|
}
|
|
342
|
-
|
|
421
|
+
|
|
422
|
+
function serializePreservedAttribute(attr: ParsedAttribute): string | null {
|
|
343
423
|
if (BOOLEAN_ATTRS.has(attr.name.toLowerCase())) {
|
|
344
424
|
return attr.rawToken;
|
|
345
425
|
}
|
|
346
426
|
if (attr.value === null) return attr.rawToken;
|
|
347
427
|
return attr.rawToken;
|
|
348
428
|
}
|
|
349
|
-
|
|
429
|
+
|
|
430
|
+
function shouldDropEmptyValue(name: string, value: string | null): boolean {
|
|
350
431
|
if (value === null) return false;
|
|
351
432
|
if (value.trim()) return false;
|
|
352
433
|
if (name.startsWith("aria-")) return true;
|
|
353
434
|
return EMPTY_VALUE_DROP_ATTRS.has(name);
|
|
354
435
|
}
|
|
355
|
-
|
|
436
|
+
|
|
437
|
+
function normalizeUrlValue(value: string): string {
|
|
356
438
|
const loweredValue = value.trim().toLowerCase();
|
|
357
439
|
if (loweredValue.startsWith("blob:")) return "blob:[omitted]";
|
|
358
440
|
if (loweredValue.startsWith("javascript:")) return "javascript:[omitted]";
|
|
359
441
|
if (loweredValue.startsWith("vbscript:")) return "vbscript:[omitted]";
|
|
360
442
|
if (loweredValue.startsWith("data:")) return "data:[omitted]";
|
|
361
443
|
if (value.length <= 160) return value;
|
|
444
|
+
|
|
362
445
|
try {
|
|
363
446
|
const isAbsolute = /^[a-z][a-z0-9+.-]*:/i.test(value);
|
|
364
|
-
const parsed = isAbsolute
|
|
365
|
-
|
|
447
|
+
const parsed = isAbsolute
|
|
448
|
+
? new URL(value)
|
|
449
|
+
: new URL(value, "https://condensed.local");
|
|
450
|
+
|
|
451
|
+
const prefix = isAbsolute
|
|
452
|
+
? `${parsed.protocol}//${parsed.host}${parsed.pathname}`
|
|
453
|
+
: `${parsed.pathname}${parsed.hash}`;
|
|
366
454
|
const query = parsed.search ? "?[query omitted]" : "";
|
|
367
455
|
return `${prefix}${query}`;
|
|
368
456
|
} catch {
|
|
369
457
|
return `${value.slice(0, 96)}[omitted]`;
|
|
370
458
|
}
|
|
371
459
|
}
|
|
372
|
-
|
|
460
|
+
|
|
461
|
+
function filterSemanticClasses(value: string): string {
|
|
373
462
|
const classes = value.split(/\s+/).filter(Boolean);
|
|
374
463
|
const kept = classes.filter((cls) => !isObfuscatedClass(cls));
|
|
375
464
|
return kept.join(" ");
|
|
376
465
|
}
|
|
377
|
-
|
|
466
|
+
|
|
467
|
+
/**
|
|
468
|
+
* Heuristic: a class name is "obfuscated" if it looks like a hash or random ID
|
|
469
|
+
* rather than a human-readable semantic name.
|
|
470
|
+
*/
|
|
471
|
+
function isObfuscatedClass(cls: string): boolean {
|
|
378
472
|
if (cls.length > 80) return true;
|
|
379
473
|
if (/^_?[0-9a-f]{6,}$/i.test(cls)) return true;
|
|
380
474
|
if (/^[a-z]+_[0-9a-f]{4,}$/i.test(cls)) return true;
|
|
381
475
|
if (/^[a-z]{1,2}[0-9]{2,}$/i.test(cls)) return true;
|
|
476
|
+
|
|
382
477
|
const digits = (cls.match(/[0-9]/g) || []).length;
|
|
383
478
|
const letters = (cls.match(/[a-zA-Z]/g) || []).length;
|
|
384
479
|
if (cls.length >= 6 && digits >= letters * 0.5 && digits >= 2) return true;
|
|
480
|
+
|
|
385
481
|
return false;
|
|
386
482
|
}
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
const
|
|
390
|
-
|
|
483
|
+
|
|
484
|
+
function parseAttributes(rawAttrs: string): ParsedAttribute[] {
|
|
485
|
+
const attrs: ParsedAttribute[] = [];
|
|
486
|
+
const attrPattern =
|
|
487
|
+
/([^\s"'<>\/=]+)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+)))?/g;
|
|
488
|
+
|
|
489
|
+
let match: RegExpExecArray | null;
|
|
391
490
|
while ((match = attrPattern.exec(rawAttrs)) !== null) {
|
|
392
491
|
const name = match[1];
|
|
393
492
|
if (!name) continue;
|
|
394
493
|
attrs.push({
|
|
395
494
|
name,
|
|
396
|
-
rawToken: match[0]
|
|
397
|
-
value: match[2] ?? match[3] ?? match[4] ?? null
|
|
495
|
+
rawToken: match[0]!.trim(),
|
|
496
|
+
value: match[2] ?? match[3] ?? match[4] ?? null,
|
|
398
497
|
});
|
|
399
498
|
}
|
|
499
|
+
|
|
400
500
|
return attrs;
|
|
401
501
|
}
|
|
402
|
-
|
|
502
|
+
|
|
503
|
+
function isInteractiveElement(
|
|
504
|
+
tagName: string,
|
|
505
|
+
attrs: ParsedAttribute[],
|
|
506
|
+
): boolean {
|
|
403
507
|
if (INTERACTIVE_TAGS.has(tagName)) return true;
|
|
508
|
+
|
|
404
509
|
for (const attr of attrs) {
|
|
405
510
|
const name = attr.name.toLowerCase();
|
|
406
511
|
if (name === "tabindex" || name === "contenteditable") return true;
|
|
407
512
|
if (name !== "role") continue;
|
|
513
|
+
|
|
408
514
|
const role = attr.value?.trim().toLowerCase();
|
|
409
515
|
if (role && INTERACTIVE_ROLES.has(role)) {
|
|
410
516
|
return true;
|
|
411
517
|
}
|
|
412
518
|
}
|
|
519
|
+
|
|
413
520
|
return false;
|
|
414
521
|
}
|
|
415
|
-
|
|
522
|
+
|
|
523
|
+
function shouldKeepCustomDataAttribute(
|
|
524
|
+
tagName: string,
|
|
525
|
+
attrName: string,
|
|
526
|
+
value: string | null,
|
|
527
|
+
interactive: boolean,
|
|
528
|
+
): boolean {
|
|
416
529
|
if (!interactive) return false;
|
|
417
530
|
if (!attrName.startsWith("data-")) return false;
|
|
418
531
|
if (TEST_ATTRS.has(attrName)) return false;
|
|
419
532
|
if (!value?.trim()) return false;
|
|
420
533
|
if (value.length > 80) return false;
|
|
421
534
|
if (tagName === "script" || tagName === "style") return false;
|
|
535
|
+
|
|
422
536
|
const key = attrName.slice("data-".length);
|
|
423
537
|
if (!looksMeaningfulToken(key)) return false;
|
|
424
538
|
if (!looksMeaningfulDataValue(value)) return false;
|
|
539
|
+
|
|
425
540
|
return true;
|
|
426
541
|
}
|
|
427
|
-
|
|
542
|
+
|
|
543
|
+
function looksMeaningfulToken(value: string): boolean {
|
|
428
544
|
if (!/^[a-z][a-z0-9-]{1,40}$/i.test(value)) return false;
|
|
429
545
|
if (!/[a-z]{3}/i.test(value)) return false;
|
|
430
|
-
if (
|
|
546
|
+
if (
|
|
547
|
+
/(track|metric|telemetry|analytics|component|display|loaded|token|dps|color|screen|strict|rehydr|fetch)/i.test(
|
|
548
|
+
value,
|
|
549
|
+
)
|
|
550
|
+
) {
|
|
431
551
|
return false;
|
|
432
552
|
}
|
|
433
553
|
return true;
|
|
434
554
|
}
|
|
435
|
-
|
|
555
|
+
|
|
556
|
+
function looksMeaningfulDataValue(value: string): boolean {
|
|
436
557
|
if (value.length > 80) return false;
|
|
437
558
|
if (/[<>]/.test(value)) return false;
|
|
438
559
|
if (/https?:\/\//i.test(value)) return false;
|
|
439
560
|
return /^[a-z0-9:_./ -]+$/i.test(value);
|
|
440
561
|
}
|
|
441
|
-
|
|
562
|
+
|
|
563
|
+
function findAttributeToken(attrs: string, name: string): string | null {
|
|
442
564
|
const match = attrs.match(
|
|
443
565
|
new RegExp(
|
|
444
566
|
`(?:^|\\s)(${escapeRegExp(name)}(?:\\s*=\\s*(?:"[^"]*"|'[^']*'|[^\\s"'=<>\\x60]+))?)`,
|
|
445
|
-
"i"
|
|
446
|
-
)
|
|
567
|
+
"i",
|
|
568
|
+
),
|
|
447
569
|
);
|
|
448
570
|
return match?.[1] ?? null;
|
|
449
571
|
}
|
|
450
|
-
|
|
572
|
+
|
|
573
|
+
function escapeRegExp(value: string): string {
|
|
451
574
|
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
452
575
|
}
|
|
453
|
-
|
|
576
|
+
|
|
577
|
+
function serializeAttribute(name: string, value: string): string {
|
|
454
578
|
return `${name}="${escapeHtmlAttribute(value)}"`;
|
|
455
579
|
}
|
|
456
|
-
|
|
457
|
-
|
|
580
|
+
|
|
581
|
+
function escapeHtmlAttribute(value: string): string {
|
|
582
|
+
return value
|
|
583
|
+
.replace(/&/g, "&")
|
|
584
|
+
.replace(/"/g, """)
|
|
585
|
+
.replace(/</g, "<")
|
|
586
|
+
.replace(/>/g, ">");
|
|
458
587
|
}
|
|
459
|
-
// Annotate the CommonJS export names for ESM import in node:
|
|
460
|
-
0 && (module.exports = {
|
|
461
|
-
condenseDom
|
|
462
|
-
});
|