browserwire 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,376 @@
1
+ /**
2
+ * enrich.js — Stage 7: LLM Semantic Enrichment
3
+ *
4
+ * Runs on the CLI server. Takes a compiled draft manifest from Stage 6
5
+ * and uses an LLM to enrich it with domain-level semantic names and
6
+ * composite actions.
7
+ *
8
+ * If the LLM is unavailable or returns invalid output, falls back to
9
+ * the deterministic draft.
10
+ */
11
+
12
+ import { getLLMConfig, callLLM } from "./llm-client.js";
13
+
14
+ // ---------------------------------------------------------------------------
15
+ // Prompt construction
16
+ // ---------------------------------------------------------------------------
17
+
18
+ const SYSTEM_PROMPT = `You are a web application analyst. You are given a structural manifest of a web page — entities (UI regions) and actions (interactive elements) — discovered by automated DOM analysis.
19
+
20
+ Your job: understand what this page DOES and assign **domain-specific, developer-friendly names** to every entity and action, as if you were designing an API for this website.
21
+
22
+ ## Naming Rules
23
+
24
+ - Use snake_case for all names
25
+ - Entity names should be nouns describing WHAT the region is: "event_feed", "login_form", "message_list", "user_profile_card" — NOT "generic_button" or "div_container"
26
+ - Action names should be verbs describing WHAT the action does: "create_event", "filter_by_upcoming", "open_event_details", "submit_login" — NOT "click_button" or "navigate_to_a"
27
+ - NEVER use "generic", "orphan", "unknown", or scan IDs in semantic names
28
+ - EVERY entity and action MUST get a meaningful domain name — no exceptions
29
+ - Use the textContent, CSS classes, href values, and locator details to understand what each element actually does
30
+ - If two actions do the same thing (e.g. same href), note it in the description but still give each a unique semantic name indicating context (e.g., "navigate_to_home_navbar" vs "navigate_to_home_footer")
31
+
32
+ ## Composite Actions
33
+
34
+ Group related actions that form a logical user operation:
35
+ - A card click + link click → "open_event(event_id)"
36
+ - Type email + type password + click submit → "login(email, password)"
37
+ - Type in search + click search → "search(query)"
38
+ You MUST create at least one composite action if there are related sequential actions on the page.
39
+
40
+ ## Output Format
41
+
42
+ Respond with ONLY valid JSON (no markdown fences, no explanation) matching this schema:
43
+ {
44
+ "domain": "string (e.g. event_management, messaging, email_client)",
45
+ "domainDescription": "string (1-2 sentence description of what this site/page does)",
46
+ "entities": [
47
+ { "originalId": "string", "semanticName": "string", "description": "string" }
48
+ ],
49
+ "actions": [
50
+ { "originalId": "string", "semanticName": "string", "description": "string", "inputs": [{ "name": "string", "description": "string" }] }
51
+ ],
52
+ "compositeActions": [
53
+ { "name": "string", "description": "string", "stepActionIds": ["string (existing action IDs)"], "inputs": [{ "name": "string", "type": "string", "description": "string" }] }
54
+ ]
55
+ }
56
+
57
+ CRITICAL: You MUST only reference entity/action IDs that exist in the input. Do NOT invent new actions.`;
58
+
59
+ /**
60
+ * Build the user message for the LLM from the draft manifest.
61
+ * Sends rich context: all locators, textContent, interactionKind, entity groupings.
62
+ */
63
+ const buildUserMessage = (manifest, pageText) => {
64
+ const parts = [
65
+ `## Page`,
66
+ `URL: ${manifest.metadata.site}`,
67
+ ""
68
+ ];
69
+
70
+ if (pageText) {
71
+ parts.push(`## Visible Page Text (first ~2000 chars)`, pageText.slice(0, 2000), "");
72
+ }
73
+
74
+ // Build entity → action mapping for context
75
+ const entityActions = new Map();
76
+ for (const action of manifest.actions) {
77
+ if (!entityActions.has(action.entityId)) {
78
+ entityActions.set(action.entityId, []);
79
+ }
80
+ entityActions.get(action.entityId).push(action.id);
81
+ }
82
+
83
+ // Entities with their signals and child actions
84
+ const entitySummary = manifest.entities.map((e) => {
85
+ const entry = {
86
+ id: e.id,
87
+ name: e.name,
88
+ signals: e.signals.map((s) => `${s.kind}:${s.value}`),
89
+ actions: entityActions.get(e.id) || []
90
+ };
91
+ return entry;
92
+ });
93
+
94
+ parts.push(
95
+ `## Entities (${entitySummary.length})`,
96
+ JSON.stringify(entitySummary, null, 2),
97
+ ""
98
+ );
99
+
100
+ // Actions with FULL context: all locators, textContent, interactionKind
101
+ const actionSummary = manifest.actions.map((a) => {
102
+ const entry = {
103
+ id: a.id,
104
+ name: a.name,
105
+ entityId: a.entityId,
106
+ interactionKind: a.interactionKind || "unknown",
107
+ textContent: a.textContent || null,
108
+ inputs: a.inputs.map((i) => i.name),
109
+ locators: a.locatorSet.strategies.map((s) =>
110
+ `${s.kind}: ${s.value}`
111
+ )
112
+ };
113
+ return entry;
114
+ });
115
+
116
+ parts.push(
117
+ `## Actions (${actionSummary.length})`,
118
+ JSON.stringify(actionSummary, null, 2)
119
+ );
120
+
121
+ return parts.join("\n");
122
+ };
123
+
124
+
125
+ // ---------------------------------------------------------------------------
126
+ // Validation of LLM output
127
+ // ---------------------------------------------------------------------------
128
+
129
+ /**
130
+ * Parse and validate the LLM response against the draft manifest.
131
+ * Returns validated enrichment data or null if invalid.
132
+ */
133
+ const validateEnrichment = (rawResponse, manifest) => {
134
+ let parsed;
135
+ try {
136
+ // Try to extract JSON from the response (handle markdown code fences)
137
+ let jsonStr = rawResponse.trim();
138
+ const fenceMatch = jsonStr.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
139
+ if (fenceMatch) {
140
+ jsonStr = fenceMatch[1].trim();
141
+ }
142
+ parsed = JSON.parse(jsonStr);
143
+ } catch (error) {
144
+ console.warn("[browserwire-cli] LLM returned unparseable JSON:", error.message);
145
+ return null;
146
+ }
147
+
148
+ if (!parsed || typeof parsed !== "object") {
149
+ console.warn("[browserwire-cli] LLM response is not an object");
150
+ return null;
151
+ }
152
+
153
+ // Build ID lookup sets
154
+ const entityIds = new Set(manifest.entities.map((e) => e.id));
155
+ const actionIds = new Set(manifest.actions.map((a) => a.id));
156
+
157
+ // Validate domain
158
+ const domain = typeof parsed.domain === "string" ? parsed.domain : null;
159
+ const domainDescription = typeof parsed.domainDescription === "string" ? parsed.domainDescription : null;
160
+
161
+ // Validate entity enrichments
162
+ const entities = [];
163
+ if (Array.isArray(parsed.entities)) {
164
+ for (const e of parsed.entities) {
165
+ if (!e || typeof e.originalId !== "string" || typeof e.semanticName !== "string") continue;
166
+ if (!entityIds.has(e.originalId)) {
167
+ console.warn(`[browserwire-cli] LLM referenced unknown entity: ${e.originalId}`);
168
+ continue;
169
+ }
170
+ entities.push({
171
+ originalId: e.originalId,
172
+ semanticName: e.semanticName,
173
+ description: typeof e.description === "string" ? e.description : ""
174
+ });
175
+ }
176
+ }
177
+
178
+ // Validate action enrichments
179
+ const actions = [];
180
+ if (Array.isArray(parsed.actions)) {
181
+ for (const a of parsed.actions) {
182
+ if (!a || typeof a.originalId !== "string" || typeof a.semanticName !== "string") continue;
183
+ if (!actionIds.has(a.originalId)) {
184
+ console.warn(`[browserwire-cli] LLM referenced unknown action: ${a.originalId}`);
185
+ continue;
186
+ }
187
+ const inputs = Array.isArray(a.inputs)
188
+ ? a.inputs.filter((i) => i && typeof i.name === "string")
189
+ : [];
190
+ actions.push({
191
+ originalId: a.originalId,
192
+ semanticName: a.semanticName,
193
+ description: typeof a.description === "string" ? a.description : "",
194
+ inputs
195
+ });
196
+ }
197
+ }
198
+
199
+ // Validate composite actions
200
+ const compositeActions = [];
201
+ if (Array.isArray(parsed.compositeActions)) {
202
+ for (const ca of parsed.compositeActions) {
203
+ if (!ca || typeof ca.name !== "string" || !Array.isArray(ca.stepActionIds)) continue;
204
+ // Must have at least 2 steps
205
+ if (ca.stepActionIds.length < 2) {
206
+ console.warn(`[browserwire-cli] composite action "${ca.name}" has < 2 steps, skipping`);
207
+ continue;
208
+ }
209
+ // All step IDs must reference existing actions
210
+ const invalidSteps = ca.stepActionIds.filter((id) => !actionIds.has(id));
211
+ if (invalidSteps.length > 0) {
212
+ console.warn(`[browserwire-cli] composite action "${ca.name}" references unknown actions: ${invalidSteps.join(", ")}`);
213
+ continue;
214
+ }
215
+ const inputs = Array.isArray(ca.inputs)
216
+ ? ca.inputs.filter((i) => i && typeof i.name === "string" && typeof i.type === "string")
217
+ : [];
218
+ compositeActions.push({
219
+ name: ca.name,
220
+ description: typeof ca.description === "string" ? ca.description : "",
221
+ stepActionIds: ca.stepActionIds,
222
+ inputs
223
+ });
224
+ }
225
+ }
226
+
227
+ return { domain, domainDescription, entities, actions, compositeActions };
228
+ };
229
+
230
+ // ---------------------------------------------------------------------------
231
+ // Merge enrichment into manifest
232
+ // ---------------------------------------------------------------------------
233
+
234
+ /**
235
+ * Merge validated enrichment data into a copy of the draft manifest.
236
+ * Exported for direct use by the vision pipeline in session.js.
237
+ */
238
+ export const mergeEnrichment = (manifest, enrichment, capturedAt) => {
239
+ const enriched = JSON.parse(JSON.stringify(manifest));
240
+
241
+ // Domain metadata
242
+ if (enrichment.domain) {
243
+ enriched.domain = enrichment.domain;
244
+ }
245
+ if (enrichment.domainDescription) {
246
+ enriched.domainDescription = enrichment.domainDescription;
247
+ }
248
+
249
+ // Entity semantic names
250
+ const entityEnrichMap = new Map();
251
+ for (const e of enrichment.entities) {
252
+ entityEnrichMap.set(e.originalId, e);
253
+ }
254
+ for (const entity of enriched.entities) {
255
+ const enrich = entityEnrichMap.get(entity.id);
256
+ if (enrich) {
257
+ entity.semanticName = enrich.semanticName;
258
+ if (enrich.description) {
259
+ entity.description = enrich.description;
260
+ }
261
+ }
262
+ }
263
+
264
+ // Action semantic names
265
+ const actionEnrichMap = new Map();
266
+ for (const a of enrichment.actions) {
267
+ actionEnrichMap.set(a.originalId, a);
268
+ }
269
+ for (const action of enriched.actions) {
270
+ const enrich = actionEnrichMap.get(action.id);
271
+ if (enrich) {
272
+ action.semanticName = enrich.semanticName;
273
+ if (enrich.description) {
274
+ action.description = enrich.description;
275
+ }
276
+ // Refine input names/descriptions if provided
277
+ if (enrich.inputs && enrich.inputs.length > 0 && action.inputs.length > 0) {
278
+ for (let i = 0; i < Math.min(enrich.inputs.length, action.inputs.length); i++) {
279
+ if (enrich.inputs[i].name) action.inputs[i].name = enrich.inputs[i].name;
280
+ if (enrich.inputs[i].description) action.inputs[i].description = enrich.inputs[i].description;
281
+ }
282
+ }
283
+ }
284
+ }
285
+
286
+ // Composite actions
287
+ if (enrichment.compositeActions.length > 0) {
288
+ const provenance = {
289
+ source: "agent",
290
+ sessionId: enriched.metadata.id,
291
+ traceIds: [],
292
+ annotationIds: [],
293
+ capturedAt: capturedAt || new Date().toISOString()
294
+ };
295
+
296
+ enriched.compositeActions = enrichment.compositeActions.map((ca, index) => ({
297
+ id: `composite_${ca.name}`,
298
+ name: ca.name,
299
+ description: ca.description,
300
+ stepActionIds: ca.stepActionIds,
301
+ inputs: ca.inputs.map((inp) => ({
302
+ name: inp.name,
303
+ type: inp.type || "string",
304
+ required: true,
305
+ description: inp.description || ""
306
+ })),
307
+ provenance
308
+ }));
309
+ }
310
+
311
+ return enriched;
312
+ };
313
+
314
+ // ---------------------------------------------------------------------------
315
+ // Public API
316
+ // ---------------------------------------------------------------------------
317
+
318
+ /**
319
+ * Check if LLM enrichment is available.
320
+ */
321
+ export const isEnrichmentAvailable = () => {
322
+ return getLLMConfig() !== null;
323
+ };
324
+
325
+ /**
326
+ * Enrich a draft manifest with LLM semantic analysis.
327
+ *
328
+ * @param {object} manifest - Draft BrowserWireManifest from Stage 6
329
+ * @param {string} [pageText] - Visible page text for context
330
+ * @param {string} [capturedAt] - ISO timestamp
331
+ * @returns {Promise<{ enriched: object, stats: object } | null>} enriched manifest or null on failure
332
+ */
333
+ export const enrichManifest = async (manifest, pageText, capturedAt) => {
334
+ const config = getLLMConfig();
335
+ if (!config) {
336
+ console.log("[browserwire-cli] LLM not configured, skipping enrichment");
337
+ return null;
338
+ }
339
+
340
+ console.log(`[browserwire-cli] enriching manifest with ${config.provider}/${config.model}`);
341
+
342
+ const userMessage = buildUserMessage(manifest, pageText);
343
+
344
+ let rawResponse;
345
+ try {
346
+ rawResponse = await callLLM(SYSTEM_PROMPT, userMessage, config);
347
+ } catch (error) {
348
+ console.warn(`[browserwire-cli] LLM call failed: ${error.message}`);
349
+ return null;
350
+ }
351
+
352
+ if (!rawResponse || rawResponse.trim().length === 0) {
353
+ console.warn("[browserwire-cli] LLM returned empty response");
354
+ return null;
355
+ }
356
+
357
+ const enrichment = validateEnrichment(rawResponse, manifest);
358
+ if (!enrichment) {
359
+ return null;
360
+ }
361
+
362
+ const enriched = mergeEnrichment(manifest, enrichment, capturedAt);
363
+
364
+ const stats = {
365
+ domain: enrichment.domain || "unknown",
366
+ entitiesEnriched: enrichment.entities.length,
367
+ actionsEnriched: enrichment.actions.length,
368
+ compositeActions: enrichment.compositeActions.length
369
+ };
370
+
371
+ console.log(
372
+ `[browserwire-cli] enrichment complete: domain="${stats.domain}" entities=${stats.entitiesEnriched} actions=${stats.actionsEnriched} composites=${stats.compositeActions}`
373
+ );
374
+
375
+ return { enriched, stats };
376
+ };