libretto 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. package/dist/cli/cli.js +20 -19
  2. package/dist/cli/commands/ai.js +1 -1
  3. package/dist/cli/commands/browser.js +3 -3
  4. package/dist/cli/commands/execution.js +3 -3
  5. package/dist/cli/commands/logs.js +1 -1
  6. package/dist/cli/core/browser.js +11 -6
  7. package/dist/cli/core/context.js +4 -18
  8. package/dist/cli/core/session.js +2 -2
  9. package/dist/cli/core/snapshot-analyzer.js +2 -2
  10. package/dist/cli/router.js +1 -1
  11. package/dist/cli/workers/run-integration-runtime.js +2 -2
  12. package/dist/shared/paths/paths.js +2 -1
  13. package/dist/shared/paths/repo-root.d.ts +3 -0
  14. package/dist/shared/paths/repo-root.js +24 -0
  15. package/package.json +6 -7
  16. package/scripts/postinstall.mjs +12 -3
  17. package/skills/libretto/SKILL.md +93 -404
  18. package/skills/libretto/references/auth-profiles.md +30 -0
  19. package/skills/libretto/references/pages-and-page-targeting.md +29 -0
  20. package/skills/libretto/references/reverse-engineering-network-requests.md +39 -0
  21. package/skills/libretto/references/user-action-log.md +31 -0
  22. package/src/cli/cli.ts +173 -0
  23. package/src/cli/commands/ai.ts +35 -0
  24. package/src/cli/commands/browser.ts +165 -0
  25. package/src/cli/commands/execution.ts +691 -0
  26. package/src/cli/commands/init.ts +327 -0
  27. package/src/cli/commands/logs.ts +128 -0
  28. package/src/cli/commands/shared.ts +70 -0
  29. package/src/cli/commands/snapshot.ts +327 -0
  30. package/src/cli/core/ai-config.ts +255 -0
  31. package/src/cli/core/api-snapshot-analyzer.ts +97 -0
  32. package/src/cli/core/browser.ts +839 -0
  33. package/src/cli/core/context.ts +122 -0
  34. package/src/cli/core/pause-signals.ts +35 -0
  35. package/src/cli/core/session-telemetry.ts +553 -0
  36. package/src/cli/core/session.ts +209 -0
  37. package/src/cli/core/snapshot-analyzer.ts +875 -0
  38. package/src/cli/core/snapshot-api-config.ts +236 -0
  39. package/src/cli/core/telemetry.ts +446 -0
  40. package/src/cli/framework/simple-cli.ts +1273 -0
  41. package/src/cli/index.ts +13 -0
  42. package/src/cli/router.ts +28 -0
  43. package/src/cli/workers/run-integration-runtime.ts +311 -0
  44. package/src/cli/workers/run-integration-worker-protocol.ts +14 -0
  45. package/src/cli/workers/run-integration-worker.ts +75 -0
  46. package/src/index.ts +120 -0
  47. package/src/runtime/download/download.ts +100 -0
  48. package/src/runtime/download/index.ts +7 -0
  49. package/src/runtime/extract/extract.ts +92 -0
  50. package/src/runtime/extract/index.ts +1 -0
  51. package/src/runtime/network/index.ts +5 -0
  52. package/src/runtime/network/network.ts +113 -0
  53. package/src/runtime/recovery/agent.ts +256 -0
  54. package/src/runtime/recovery/errors.ts +152 -0
  55. package/src/runtime/recovery/index.ts +7 -0
  56. package/src/runtime/recovery/recovery.ts +50 -0
  57. package/{dist/shared/condense-dom/condense-dom.cjs → src/shared/condense-dom/condense-dom.ts} +243 -115
  58. package/src/shared/config/config.ts +22 -0
  59. package/src/shared/config/index.ts +5 -0
  60. package/src/shared/debug/index.ts +1 -0
  61. package/src/shared/debug/pause.ts +85 -0
  62. package/src/shared/instrumentation/errors.ts +82 -0
  63. package/src/shared/instrumentation/index.ts +9 -0
  64. package/src/shared/instrumentation/instrument.ts +276 -0
  65. package/src/shared/llm/ai-sdk-adapter.ts +78 -0
  66. package/src/shared/llm/client.ts +217 -0
  67. package/src/shared/llm/index.ts +3 -0
  68. package/src/shared/llm/types.ts +63 -0
  69. package/src/shared/logger/index.ts +6 -0
  70. package/src/shared/logger/logger.ts +352 -0
  71. package/src/shared/logger/sinks.ts +144 -0
  72. package/src/shared/paths/paths.ts +109 -0
  73. package/src/shared/paths/repo-root.ts +27 -0
  74. package/src/shared/run/api.ts +2 -0
  75. package/src/shared/run/browser.ts +98 -0
  76. package/src/shared/state/index.ts +11 -0
  77. package/src/shared/state/session-state.ts +74 -0
  78. package/src/shared/visualization/ghost-cursor.ts +200 -0
  79. package/src/shared/visualization/highlight.ts +146 -0
  80. package/src/shared/visualization/index.ts +18 -0
  81. package/src/shared/workflow/workflow.ts +42 -0
  82. package/dist/index.cjs +0 -144
  83. package/dist/index.d.cts +0 -21
  84. package/dist/runtime/download/download.cjs +0 -70
  85. package/dist/runtime/download/download.d.cts +0 -35
  86. package/dist/runtime/download/index.cjs +0 -30
  87. package/dist/runtime/download/index.d.cts +0 -3
  88. package/dist/runtime/extract/extract.cjs +0 -88
  89. package/dist/runtime/extract/extract.d.cts +0 -23
  90. package/dist/runtime/extract/index.cjs +0 -28
  91. package/dist/runtime/extract/index.d.cts +0 -5
  92. package/dist/runtime/network/index.cjs +0 -28
  93. package/dist/runtime/network/index.d.cts +0 -4
  94. package/dist/runtime/network/network.cjs +0 -91
  95. package/dist/runtime/network/network.d.cts +0 -28
  96. package/dist/runtime/recovery/agent.cjs +0 -223
  97. package/dist/runtime/recovery/agent.d.cts +0 -13
  98. package/dist/runtime/recovery/errors.cjs +0 -124
  99. package/dist/runtime/recovery/errors.d.cts +0 -31
  100. package/dist/runtime/recovery/index.cjs +0 -34
  101. package/dist/runtime/recovery/index.d.cts +0 -7
  102. package/dist/runtime/recovery/recovery.cjs +0 -55
  103. package/dist/runtime/recovery/recovery.d.cts +0 -12
  104. package/dist/shared/condense-dom/condense-dom.d.cts +0 -34
  105. package/dist/shared/config/config.cjs +0 -44
  106. package/dist/shared/config/config.d.cts +0 -10
  107. package/dist/shared/config/index.cjs +0 -32
  108. package/dist/shared/config/index.d.cts +0 -1
  109. package/dist/shared/debug/index.cjs +0 -28
  110. package/dist/shared/debug/index.d.cts +0 -1
  111. package/dist/shared/debug/pause.cjs +0 -86
  112. package/dist/shared/debug/pause.d.cts +0 -12
  113. package/dist/shared/instrumentation/errors.cjs +0 -81
  114. package/dist/shared/instrumentation/errors.d.cts +0 -12
  115. package/dist/shared/instrumentation/index.cjs +0 -35
  116. package/dist/shared/instrumentation/index.d.cts +0 -6
  117. package/dist/shared/instrumentation/instrument.cjs +0 -206
  118. package/dist/shared/instrumentation/instrument.d.cts +0 -32
  119. package/dist/shared/llm/ai-sdk-adapter.cjs +0 -71
  120. package/dist/shared/llm/ai-sdk-adapter.d.cts +0 -22
  121. package/dist/shared/llm/client.cjs +0 -218
  122. package/dist/shared/llm/client.d.cts +0 -13
  123. package/dist/shared/llm/index.cjs +0 -31
  124. package/dist/shared/llm/index.d.cts +0 -5
  125. package/dist/shared/llm/types.cjs +0 -16
  126. package/dist/shared/llm/types.d.cts +0 -67
  127. package/dist/shared/logger/index.cjs +0 -37
  128. package/dist/shared/logger/index.d.cts +0 -2
  129. package/dist/shared/logger/logger.cjs +0 -232
  130. package/dist/shared/logger/logger.d.cts +0 -86
  131. package/dist/shared/logger/sinks.cjs +0 -160
  132. package/dist/shared/logger/sinks.d.cts +0 -9
  133. package/dist/shared/paths/paths.cjs +0 -104
  134. package/dist/shared/paths/paths.d.cts +0 -10
  135. package/dist/shared/run/api.cjs +0 -28
  136. package/dist/shared/run/api.d.cts +0 -2
  137. package/dist/shared/run/browser.cjs +0 -98
  138. package/dist/shared/run/browser.d.cts +0 -22
  139. package/dist/shared/state/index.cjs +0 -38
  140. package/dist/shared/state/index.d.cts +0 -2
  141. package/dist/shared/state/session-state.cjs +0 -92
  142. package/dist/shared/state/session-state.d.cts +0 -40
  143. package/dist/shared/visualization/ghost-cursor.cjs +0 -174
  144. package/dist/shared/visualization/ghost-cursor.d.cts +0 -37
  145. package/dist/shared/visualization/highlight.cjs +0 -134
  146. package/dist/shared/visualization/highlight.d.cts +0 -22
  147. package/dist/shared/visualization/index.cjs +0 -45
  148. package/dist/shared/visualization/index.d.cts +0 -3
  149. package/dist/shared/workflow/workflow.cjs +0 -47
  150. package/dist/shared/workflow/workflow.d.cts +0 -21
  151. package/skills/libretto/code-generation-rules.md +0 -223
  152. package/skills/libretto/integration-approach-selection.md +0 -174
@@ -1,28 +1,44 @@
1
- "use strict";
2
- var __defProp = Object.defineProperty;
3
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
- var __getOwnPropNames = Object.getOwnPropertyNames;
5
- var __hasOwnProp = Object.prototype.hasOwnProperty;
6
- var __export = (target, all) => {
7
- for (var name in all)
8
- __defProp(target, name, { get: all[name], enumerable: true });
1
+ /**
2
+ * DOM condensation — reduces serialized HTML for LLM consumption.
3
+ *
4
+ * All rules run unconditionally (no tiers). The function operates on
5
+ * already-serialized HTML strings (the output of `page.content()`),
6
+ * not a browser-side DOM walk or parsed DOM tree.
7
+ *
8
+ * Rules applied in order:
9
+ * 1. Noscript blocks — remove entirely
10
+ * 2. HTML comments — remove entirely
11
+ * 3. Script contents — hollow out, keep tags + useful attributes
12
+ * 4. Style contents — hollow out, keep tags + useful attributes
13
+ * 5. Embedded binary data — replace base64 data URIs
14
+ * 6. Attribute allowlist — keep trusted attrs, special-case class/style/URLs
15
+ * 7. SVG elements — collapse to single tag, extract title/desc
16
+ * 8. Inline style properties — keep only layout-relevant props
17
+ * 9. Non-semantic class names — filter or delete class values
18
+ * 10. (Cross-reference IDs — preserved, no action needed)
19
+ * 11. Framework-internal and SVG visual attributes — remove
20
+ * 12. Whitespace — collapse (preserve <pre> content)
21
+ */
22
+
23
+ export type CondenseDomResult = {
24
+ /** The condensed HTML string. Valid, parseable HTML. */
25
+ html: string;
26
+ /** Character count of the input. */
27
+ originalLength: number;
28
+ /** Character count of the output. */
29
+ condensedLength: number;
30
+ /** Characters removed, keyed by rule name. */
31
+ reductions: Record<string, number>;
9
32
  };
10
- var __copyProps = (to, from, except, desc) => {
11
- if (from && typeof from === "object" || typeof from === "function") {
12
- for (let key of __getOwnPropNames(from))
13
- if (!__hasOwnProp.call(to, key) && key !== except)
14
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
- }
16
- return to;
33
+
34
+ type ParsedAttribute = {
35
+ name: string;
36
+ rawToken: string;
37
+ value: string | null;
17
38
  };
18
- var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
- var condense_dom_exports = {};
20
- __export(condense_dom_exports, {
21
- condenseDom: () => condenseDom
22
- });
23
- module.exports = __toCommonJS(condense_dom_exports);
24
- const TEST_ATTRS = /* @__PURE__ */ new Set(["data-testid", "data-test", "data-qa", "data-cy"]);
25
- const TRUSTED_ATTRS = /* @__PURE__ */ new Set([
39
+
40
+ const TEST_ATTRS = new Set(["data-testid", "data-test", "data-qa", "data-cy"]);
41
+ const TRUSTED_ATTRS = new Set([
26
42
  "id",
27
43
  "name",
28
44
  "for",
@@ -38,9 +54,9 @@ const TRUSTED_ATTRS = /* @__PURE__ */ new Set([
38
54
  "href",
39
55
  "action",
40
56
  "method",
41
- "src"
57
+ "src",
42
58
  ]);
43
- const STATE_ATTRS = /* @__PURE__ */ new Set([
59
+ const STATE_ATTRS = new Set([
44
60
  "disabled",
45
61
  "hidden",
46
62
  "inert",
@@ -49,15 +65,15 @@ const STATE_ATTRS = /* @__PURE__ */ new Set([
49
65
  "checked",
50
66
  "selected",
51
67
  "open",
52
- "multiple"
68
+ "multiple",
53
69
  ]);
54
- const BOOLEAN_ATTRS = /* @__PURE__ */ new Set([
70
+ const BOOLEAN_ATTRS = new Set([
55
71
  ...STATE_ATTRS,
56
72
  "async",
57
73
  "defer",
58
- "nomodule"
74
+ "nomodule",
59
75
  ]);
60
- const EMPTY_VALUE_DROP_ATTRS = /* @__PURE__ */ new Set([
76
+ const EMPTY_VALUE_DROP_ATTRS = new Set([
61
77
  "alt",
62
78
  "autocomplete",
63
79
  "href",
@@ -68,10 +84,10 @@ const EMPTY_VALUE_DROP_ATTRS = /* @__PURE__ */ new Set([
68
84
  "src",
69
85
  "tabindex",
70
86
  "title",
71
- "type"
87
+ "type",
72
88
  ]);
73
- const URL_ATTRS = /* @__PURE__ */ new Set(["href", "src", "action"]);
74
- const SCRIPT_ATTRS = /* @__PURE__ */ new Set([
89
+ const URL_ATTRS = new Set(["href", "src", "action"]);
90
+ const SCRIPT_ATTRS = new Set([
75
91
  "src",
76
92
  "type",
77
93
  "id",
@@ -80,10 +96,10 @@ const SCRIPT_ATTRS = /* @__PURE__ */ new Set([
80
96
  "crossorigin",
81
97
  "integrity",
82
98
  "nomodule",
83
- "referrerpolicy"
99
+ "referrerpolicy",
84
100
  ]);
85
- const STYLE_TAG_ATTRS = /* @__PURE__ */ new Set(["media", "type", "nonce", "title"]);
86
- const INTERACTIVE_TAGS = /* @__PURE__ */ new Set([
101
+ const STYLE_TAG_ATTRS = new Set(["media", "type", "nonce", "title"]);
102
+ const INTERACTIVE_TAGS = new Set([
87
103
  "a",
88
104
  "button",
89
105
  "input",
@@ -92,9 +108,9 @@ const INTERACTIVE_TAGS = /* @__PURE__ */ new Set([
92
108
  "form",
93
109
  "details",
94
110
  "dialog",
95
- "label"
111
+ "label",
96
112
  ]);
97
- const INTERACTIVE_ROLES = /* @__PURE__ */ new Set([
113
+ const INTERACTIVE_ROLES = new Set([
98
114
  "button",
99
115
  "link",
100
116
  "tab",
@@ -103,81 +119,103 @@ const INTERACTIVE_ROLES = /* @__PURE__ */ new Set([
103
119
  "radio",
104
120
  "switch",
105
121
  "slider",
106
- "combobox"
122
+ "combobox",
107
123
  ]);
108
- const OPEN_TAG_PATTERN = /<([a-zA-Z][\w:-]*)(\s(?:[^"'<>/]|"[^"]*"|'[^']*')*)?\s*(\/?)>/g;
109
- function condenseDom(html) {
124
+ const OPEN_TAG_PATTERN =
125
+ /<([a-zA-Z][\w:-]*)(\s(?:[^"'<>/]|"[^"]*"|'[^']*')*)?\s*(\/?)>/g;
126
+
127
+ export function condenseDom(html: string): CondenseDomResult {
110
128
  const originalLength = html.length;
111
- const reductions = {};
112
- function track(label, before, after) {
129
+ const reductions: Record<string, number> = {};
130
+
131
+ function track(label: string, before: string, after: string): string {
113
132
  const diff = before.length - after.length;
114
133
  if (diff > 0) {
115
134
  reductions[label] = (reductions[label] ?? 0) + diff;
116
135
  }
117
136
  return after;
118
137
  }
138
+
119
139
  let result = html;
140
+
141
+ // ── Rule 1: Noscript blocks ──────────────────────────────────────────
120
142
  result = track(
121
143
  "noscript",
122
144
  result,
123
- result.replace(/<noscript\b[^>]*>[\s\S]*?<\/noscript>/gi, "")
145
+ result.replace(/<noscript\b[^>]*>[\s\S]*?<\/noscript>/gi, ""),
124
146
  );
147
+
148
+ // ── Rule 2: HTML comments ────────────────────────────────────────────
125
149
  result = track(
126
150
  "comments",
127
151
  result,
128
- result.replace(/<!--[\s\S]*?(?:-->|$)/g, "")
152
+ result.replace(/<!--[\s\S]*?(?:-->|$)/g, ""),
129
153
  );
154
+
155
+ // ── Rule 3: Script contents ──────────────────────────────────────────
130
156
  result = track(
131
157
  "scripts",
132
158
  result,
133
159
  result.replace(
134
160
  /(<script\b[^>]*>)([\s\S]*?)(<\/script(?:\s[^>]*)?>)/gi,
135
- (_match, open, content, close) => {
161
+ (_match, open: string, content: string, close: string) => {
136
162
  if (!content.trim()) return `${open}${close}`;
137
- const isDataScript = /type\s*=\s*["']application\/(json|ld\+json)["']/i.test(open);
163
+ const isDataScript =
164
+ /type\s*=\s*["']application\/(json|ld\+json)["']/i.test(open);
138
165
  if (isDataScript) {
139
166
  return `${open}[JSON data, ${content.length} chars]${close}`;
140
167
  }
141
168
  return `${open}[script, ${content.length} chars]${close}`;
142
- }
143
- )
169
+ },
170
+ ),
144
171
  );
172
+
173
+ // ── Rule 4: Style contents ───────────────────────────────────────────
145
174
  result = track(
146
175
  "styles",
147
176
  result,
148
177
  result.replace(
149
178
  /(<style\b[^>]*>)([\s\S]*?)(<\/style(?:\s[^>]*)?>)/gi,
150
- (_match, open, content, close) => {
179
+ (_match, open: string, content: string, close: string) => {
151
180
  if (!content.trim()) return `${open}${close}`;
152
181
  return `${open}[CSS, ${content.length} chars]${close}`;
153
- }
154
- )
182
+ },
183
+ ),
155
184
  );
185
+
186
+ // ── Rule 5: Embedded binary data ─────────────────────────────────────
156
187
  result = track(
157
188
  "base64",
158
189
  result,
159
190
  result.replace(
160
191
  /(src|href)\s*=\s*["'](data:[^;]+;base64,)[A-Za-z0-9+/=]{100,}["']/gi,
161
- (_match, attr, prefix) => {
192
+ (_match, attr: string, prefix: string) => {
162
193
  const mime = prefix.replace("data:", "").replace(";base64,", "");
163
194
  return `${attr}="[base64 ${mime}]"`;
164
- }
165
- )
195
+ },
196
+ ),
166
197
  );
198
+
199
+ // ── Rule 6: Attribute allowlist ──────────────────────────────────────
167
200
  result = track("attribute-allowlist", result, rewriteTagAttributes(result));
201
+
202
+ // ── Rule 7: SVG elements ─────────────────────────────────────────────
203
+ // Collapse each <svg> to a single tag, preserving key attributes.
204
+ // Extract <title>/<desc> text as aria-label if none exists.
205
+ // Iterate from innermost to outermost to handle nested SVGs correctly.
168
206
  const svgPattern = /<svg\b([^>]*)>((?:(?!<svg\b)[\s\S])*?)<\/svg>/gi;
169
207
  result = track(
170
208
  "svg-collapse",
171
209
  result,
172
210
  (() => {
173
- let prev;
211
+ let prev: string;
174
212
  let current = result;
175
213
  do {
176
214
  prev = current;
177
215
  current = current.replace(
178
216
  svgPattern,
179
- (_match, attrs, inner) => {
180
- const keepAttrs = [];
217
+ (_match, attrs: string, inner: string) => {
218
+ const keepAttrs: string[] = [];
181
219
  const attrPatterns = [
182
220
  "id",
183
221
  "class",
@@ -185,246 +223,331 @@ function condenseDom(html) {
185
223
  "aria-label",
186
224
  "aria-hidden",
187
225
  "title",
188
- "data-testid"
226
+ "data-testid",
189
227
  ];
190
228
  for (const name of attrPatterns) {
191
229
  const attrToken = findAttributeToken(attrs, name);
192
230
  if (attrToken) keepAttrs.push(attrToken);
193
231
  }
232
+
194
233
  const hasAriaLabel = /aria-label\s*=/i.test(attrs);
195
234
  if (!hasAriaLabel) {
196
235
  const titleMatch = inner.match(
197
- /<title[^>]*>([^<]+)<\/title>/i
236
+ /<title[^>]*>([^<]+)<\/title>/i,
198
237
  );
199
238
  const descMatch = inner.match(
200
- /<desc[^>]*>([^<]+)<\/desc>/i
239
+ /<desc[^>]*>([^<]+)<\/desc>/i,
201
240
  );
202
- const labelText = titleMatch?.[1]?.trim() || descMatch?.[1]?.trim();
241
+ const labelText =
242
+ titleMatch?.[1]?.trim() || descMatch?.[1]?.trim();
203
243
  if (labelText) {
204
244
  keepAttrs.push(
205
- `aria-label="${escapeHtmlAttribute(labelText)}"`
245
+ `aria-label="${escapeHtmlAttribute(labelText)}"`,
206
246
  );
207
247
  }
208
248
  }
209
- const attrStr = keepAttrs.length > 0 ? ` ${keepAttrs.join(" ")}` : "";
249
+
250
+ const attrStr =
251
+ keepAttrs.length > 0 ? ` ${keepAttrs.join(" ")}` : "";
210
252
  return `<svg${attrStr}><!-- [icon] --></svg>`;
211
- }
253
+ },
212
254
  );
213
255
  svgPattern.lastIndex = 0;
214
256
  } while (current !== prev);
215
257
  return current;
216
- })()
258
+ })(),
217
259
  );
218
- const layoutProps = /(?:^|;)\s*(?:display|visibility|opacity|pointer-events|position|z-index|overflow)(?:-[a-z]+)?\s*:[^;"]*/gi;
260
+
261
+ // ── Rule 8: Inline style properties ──────────────────────────────────
262
+ // Keep only layout-relevant properties.
263
+ const layoutProps =
264
+ /(?:^|;)\s*(?:display|visibility|opacity|pointer-events|position|z-index|overflow)(?:-[a-z]+)?\s*:[^;"]*/gi;
265
+
219
266
  result = track(
220
267
  "inline-styles",
221
268
  result,
222
269
  result.replace(
223
270
  /\sstyle\s*=\s*["']([^"']*)["']/gi,
224
- (_match, value) => {
225
- const kept = [];
226
- let propMatch;
271
+ (_match, value: string) => {
272
+ const kept: string[] = [];
273
+ let propMatch: RegExpExecArray | null;
227
274
  layoutProps.lastIndex = 0;
228
275
  while ((propMatch = layoutProps.exec(value)) !== null) {
229
276
  kept.push(propMatch[0].replace(/^[;\s]+/, "").trim());
230
277
  }
231
278
  if (kept.length === 0) return "";
232
279
  return ` style="${kept.join("; ")}"`;
233
- }
234
- )
280
+ },
281
+ ),
235
282
  );
283
+
284
+ // ── Rule 9: Non-semantic class names ─────────────────────────────────
236
285
  result = track(
237
286
  "obfuscated-classes",
238
287
  result,
239
288
  result.replace(
240
289
  /\sclass\s*=\s*["']([^"']*)["']/gi,
241
- (_match, value) => {
290
+ (_match, value: string) => {
242
291
  const filtered = filterSemanticClasses(value);
243
292
  if (!filtered) return "";
244
293
  return ` class="${filtered}"`;
245
- }
246
- )
294
+ },
295
+ ),
247
296
  );
248
- const removableAttrs = /\s(?:xmlns(?::[a-z]+)?|xml:space|xml:lang|fill|stroke|stroke-width|stroke-linecap|stroke-linejoin|stroke-miterlimit|stroke-dasharray|stroke-dashoffset|stroke-opacity|fill-opacity|clip-rule|fill-rule|focusable)\s*=\s*["'][^"']*["']/gi;
297
+
298
+ // ── Rule 10: Cross-reference IDs — no action, preserved by default ──
299
+
300
+ // ── Rule 11: Framework-internal and SVG visual attributes ────────────
301
+ const removableAttrs =
302
+ /\s(?:xmlns(?::[a-z]+)?|xml:space|xml:lang|fill|stroke|stroke-width|stroke-linecap|stroke-linejoin|stroke-miterlimit|stroke-dasharray|stroke-dashoffset|stroke-opacity|fill-opacity|clip-rule|fill-rule|focusable)\s*=\s*["'][^"']*["']/gi;
249
303
  result = track(
250
304
  "framework-svg-attrs",
251
305
  result,
252
- result.replace(removableAttrs, "")
306
+ result.replace(removableAttrs, ""),
253
307
  );
254
- const preBlocks = [];
308
+
309
+ // ── Rule 12: Whitespace ──────────────────────────────────────────────
310
+ // Collapse runs of spaces/tabs to a single space, multiple blank lines
311
+ // to a single newline. Preserve <pre> content.
312
+ const preBlocks: string[] = [];
255
313
  result = result.replace(
256
314
  /(<pre\b[^>]*>)([\s\S]*?)(<\/pre>)/gi,
257
- (_match, open, content, close) => {
315
+ (_match, open: string, content: string, close: string) => {
258
316
  const idx = preBlocks.length;
259
317
  preBlocks.push(`${open}${content}${close}`);
260
318
  return `__PRE_PLACEHOLDER_${idx}__`;
261
- }
319
+ },
262
320
  );
321
+
263
322
  result = track(
264
323
  "whitespace",
265
324
  result,
266
- result.replace(/[ \t]+/g, " ").replace(/\n\s*\n/g, "\n")
325
+ result.replace(/[ \t]+/g, " ").replace(/\n\s*\n/g, "\n"),
267
326
  );
327
+
268
328
  for (let i = 0; i < preBlocks.length; i++) {
269
329
  const placeholder = `__PRE_PLACEHOLDER_${i}__`;
270
- const preBlock = preBlocks[i];
330
+ const preBlock = preBlocks[i]!;
271
331
  result = result.replace(placeholder, () => preBlock);
272
332
  }
333
+
273
334
  return {
274
335
  html: result,
275
336
  originalLength,
276
337
  condensedLength: result.length,
277
- reductions
338
+ reductions,
278
339
  };
279
340
  }
280
- function rewriteTagAttributes(html) {
341
+
342
+ function rewriteTagAttributes(html: string): string {
281
343
  return html.replace(
282
344
  OPEN_TAG_PATTERN,
283
- (match, rawTagName, rawAttrs, selfClosing) => {
345
+ (match, rawTagName: string, rawAttrs: string | undefined, selfClosing: string) => {
284
346
  const tagName = rawTagName.toLowerCase();
285
347
  if (!rawAttrs?.trim()) return match;
348
+
286
349
  const attrs = parseAttributes(rawAttrs);
287
350
  if (attrs.length === 0) return match;
351
+
288
352
  const interactive = isInteractiveElement(tagName, attrs);
289
- const kept = attrs.map((attr) => keepAttribute(tagName, attr, interactive)).filter((value) => value !== null);
353
+ const kept = attrs
354
+ .map((attr) => keepAttribute(tagName, attr, interactive))
355
+ .filter((value): value is string => value !== null);
356
+
290
357
  const attrStr = kept.length > 0 ? ` ${kept.join(" ")}` : "";
291
358
  const closing = selfClosing ? " /" : "";
292
359
  return `<${rawTagName}${attrStr}${closing}>`;
293
- }
360
+ },
294
361
  );
295
362
  }
296
- function keepAttribute(tagName, attr, interactive) {
363
+
364
+ function keepAttribute(
365
+ tagName: string,
366
+ attr: ParsedAttribute,
367
+ interactive: boolean,
368
+ ): string | null {
297
369
  const name = attr.name.toLowerCase();
298
370
  const value = attr.value;
371
+
299
372
  if (name === "class") {
300
373
  if (!value?.trim()) return null;
301
374
  const filtered = filterSemanticClasses(value);
302
375
  if (!filtered) return null;
303
376
  return serializeAttribute(attr.name, filtered);
304
377
  }
378
+
305
379
  if (name === "style") {
306
380
  if (!value?.trim()) return null;
307
381
  return serializeAttribute(attr.name, value);
308
382
  }
383
+
309
384
  if (name.startsWith("aria-")) {
310
385
  if (!value?.trim()) return null;
311
386
  return attr.rawToken;
312
387
  }
388
+
313
389
  if (TEST_ATTRS.has(name)) {
314
390
  if (!value?.trim()) return null;
315
391
  return attr.rawToken;
316
392
  }
393
+
317
394
  if (tagName === "script" && SCRIPT_ATTRS.has(name)) {
318
395
  return serializePreservedAttribute(attr);
319
396
  }
397
+
320
398
  if (tagName === "style" && STYLE_TAG_ATTRS.has(name)) {
321
399
  if (!value?.trim()) return null;
322
400
  return attr.rawToken;
323
401
  }
402
+
324
403
  if (STATE_ATTRS.has(name)) {
325
404
  return serializePreservedAttribute(attr);
326
405
  }
406
+
327
407
  if (URL_ATTRS.has(name)) {
328
408
  if (!value?.trim()) return null;
329
409
  const normalized = normalizeUrlValue(value);
330
410
  if (normalized === value) return attr.rawToken;
331
411
  return serializeAttribute(attr.name, normalized);
332
412
  }
413
+
333
414
  if (TRUSTED_ATTRS.has(name)) {
334
415
  if (shouldDropEmptyValue(name, value)) return null;
335
416
  return serializePreservedAttribute(attr);
336
417
  }
418
+
337
419
  if (shouldKeepCustomDataAttribute(tagName, name, value, interactive)) {
338
420
  return attr.rawToken;
339
421
  }
422
+
340
423
  return null;
341
424
  }
342
- function serializePreservedAttribute(attr) {
425
+
426
+ function serializePreservedAttribute(attr: ParsedAttribute): string | null {
343
427
  if (BOOLEAN_ATTRS.has(attr.name.toLowerCase())) {
344
428
  return attr.rawToken;
345
429
  }
346
430
  if (attr.value === null) return attr.rawToken;
347
431
  return attr.rawToken;
348
432
  }
349
- function shouldDropEmptyValue(name, value) {
433
+
434
+ function shouldDropEmptyValue(
435
+ name: string,
436
+ value: string | null,
437
+ ): boolean {
350
438
  if (value === null) return false;
351
439
  if (value.trim()) return false;
352
440
  if (name.startsWith("aria-")) return true;
353
441
  return EMPTY_VALUE_DROP_ATTRS.has(name);
354
442
  }
355
- function normalizeUrlValue(value) {
443
+
444
+ function normalizeUrlValue(value: string): string {
356
445
  const loweredValue = value.trim().toLowerCase();
357
446
  if (loweredValue.startsWith("blob:")) return "blob:[omitted]";
358
447
  if (loweredValue.startsWith("javascript:")) return "javascript:[omitted]";
359
448
  if (loweredValue.startsWith("vbscript:")) return "vbscript:[omitted]";
360
449
  if (loweredValue.startsWith("data:")) return "data:[omitted]";
361
450
  if (value.length <= 160) return value;
451
+
362
452
  try {
363
453
  const isAbsolute = /^[a-z][a-z0-9+.-]*:/i.test(value);
364
- const parsed = isAbsolute ? new URL(value) : new URL(value, "https://condensed.local");
365
- const prefix = isAbsolute ? `${parsed.protocol}//${parsed.host}${parsed.pathname}` : `${parsed.pathname}${parsed.hash}`;
454
+ const parsed = isAbsolute
455
+ ? new URL(value)
456
+ : new URL(value, "https://condensed.local");
457
+
458
+ const prefix = isAbsolute
459
+ ? `${parsed.protocol}//${parsed.host}${parsed.pathname}`
460
+ : `${parsed.pathname}${parsed.hash}`;
366
461
  const query = parsed.search ? "?[query omitted]" : "";
367
462
  return `${prefix}${query}`;
368
463
  } catch {
369
464
  return `${value.slice(0, 96)}[omitted]`;
370
465
  }
371
466
  }
372
- function filterSemanticClasses(value) {
467
+
468
+ function filterSemanticClasses(value: string): string {
373
469
  const classes = value.split(/\s+/).filter(Boolean);
374
470
  const kept = classes.filter((cls) => !isObfuscatedClass(cls));
375
471
  return kept.join(" ");
376
472
  }
377
- function isObfuscatedClass(cls) {
473
+
474
+ /**
475
+ * Heuristic: a class name is "obfuscated" if it looks like a hash or random ID
476
+ * rather than a human-readable semantic name.
477
+ */
478
+ function isObfuscatedClass(cls: string): boolean {
378
479
  if (cls.length > 80) return true;
379
480
  if (/^_?[0-9a-f]{6,}$/i.test(cls)) return true;
380
481
  if (/^[a-z]+_[0-9a-f]{4,}$/i.test(cls)) return true;
381
482
  if (/^[a-z]{1,2}[0-9]{2,}$/i.test(cls)) return true;
483
+
382
484
  const digits = (cls.match(/[0-9]/g) || []).length;
383
485
  const letters = (cls.match(/[a-zA-Z]/g) || []).length;
384
486
  if (cls.length >= 6 && digits >= letters * 0.5 && digits >= 2) return true;
487
+
385
488
  return false;
386
489
  }
387
- function parseAttributes(rawAttrs) {
388
- const attrs = [];
389
- const attrPattern = /([^\s"'<>\/=]+)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+)))?/g;
390
- let match;
490
+
491
+ function parseAttributes(rawAttrs: string): ParsedAttribute[] {
492
+ const attrs: ParsedAttribute[] = [];
493
+ const attrPattern =
494
+ /([^\s"'<>\/=]+)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+)))?/g;
495
+
496
+ let match: RegExpExecArray | null;
391
497
  while ((match = attrPattern.exec(rawAttrs)) !== null) {
392
498
  const name = match[1];
393
499
  if (!name) continue;
394
500
  attrs.push({
395
501
  name,
396
- rawToken: match[0].trim(),
397
- value: match[2] ?? match[3] ?? match[4] ?? null
502
+ rawToken: match[0]!.trim(),
503
+ value: match[2] ?? match[3] ?? match[4] ?? null,
398
504
  });
399
505
  }
506
+
400
507
  return attrs;
401
508
  }
402
- function isInteractiveElement(tagName, attrs) {
509
+
510
+ function isInteractiveElement(
511
+ tagName: string,
512
+ attrs: ParsedAttribute[],
513
+ ): boolean {
403
514
  if (INTERACTIVE_TAGS.has(tagName)) return true;
515
+
404
516
  for (const attr of attrs) {
405
517
  const name = attr.name.toLowerCase();
406
518
  if (name === "tabindex" || name === "contenteditable") return true;
407
519
  if (name !== "role") continue;
520
+
408
521
  const role = attr.value?.trim().toLowerCase();
409
522
  if (role && INTERACTIVE_ROLES.has(role)) {
410
523
  return true;
411
524
  }
412
525
  }
526
+
413
527
  return false;
414
528
  }
415
- function shouldKeepCustomDataAttribute(tagName, attrName, value, interactive) {
529
+
530
+ function shouldKeepCustomDataAttribute(
531
+ tagName: string,
532
+ attrName: string,
533
+ value: string | null,
534
+ interactive: boolean,
535
+ ): boolean {
416
536
  if (!interactive) return false;
417
537
  if (!attrName.startsWith("data-")) return false;
418
538
  if (TEST_ATTRS.has(attrName)) return false;
419
539
  if (!value?.trim()) return false;
420
540
  if (value.length > 80) return false;
421
541
  if (tagName === "script" || tagName === "style") return false;
542
+
422
543
  const key = attrName.slice("data-".length);
423
544
  if (!looksMeaningfulToken(key)) return false;
424
545
  if (!looksMeaningfulDataValue(value)) return false;
546
+
425
547
  return true;
426
548
  }
427
- function looksMeaningfulToken(value) {
549
+
550
+ function looksMeaningfulToken(value: string): boolean {
428
551
  if (!/^[a-z][a-z0-9-]{1,40}$/i.test(value)) return false;
429
552
  if (!/[a-z]{3}/i.test(value)) return false;
430
553
  if (/(track|metric|telemetry|analytics|component|display|loaded|token|dps|color|screen|strict|rehydr|fetch)/i.test(value)) {
@@ -432,31 +555,36 @@ function looksMeaningfulToken(value) {
432
555
  }
433
556
  return true;
434
557
  }
435
- function looksMeaningfulDataValue(value) {
558
+
559
+ function looksMeaningfulDataValue(value: string): boolean {
436
560
  if (value.length > 80) return false;
437
561
  if (/[<>]/.test(value)) return false;
438
562
  if (/https?:\/\//i.test(value)) return false;
439
563
  return /^[a-z0-9:_./ -]+$/i.test(value);
440
564
  }
441
- function findAttributeToken(attrs, name) {
565
+
566
+ function findAttributeToken(attrs: string, name: string): string | null {
442
567
  const match = attrs.match(
443
568
  new RegExp(
444
569
  `(?:^|\\s)(${escapeRegExp(name)}(?:\\s*=\\s*(?:"[^"]*"|'[^']*'|[^\\s"'=<>\\x60]+))?)`,
445
- "i"
446
- )
570
+ "i",
571
+ ),
447
572
  );
448
573
  return match?.[1] ?? null;
449
574
  }
450
- function escapeRegExp(value) {
575
+
576
+ function escapeRegExp(value: string): string {
451
577
  return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
452
578
  }
453
- function serializeAttribute(name, value) {
579
+
580
+ function serializeAttribute(name: string, value: string): string {
454
581
  return `${name}="${escapeHtmlAttribute(value)}"`;
455
582
  }
456
- function escapeHtmlAttribute(value) {
457
- return value.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
583
+
584
+ function escapeHtmlAttribute(value: string): string {
585
+ return value
586
+ .replace(/&/g, "&amp;")
587
+ .replace(/"/g, "&quot;")
588
+ .replace(/</g, "&lt;")
589
+ .replace(/>/g, "&gt;");
458
590
  }
459
- // Annotate the CommonJS export names for ESM import in node:
460
- 0 && (module.exports = {
461
- condenseDom
462
- });