browserwire 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,930 @@
1
+ /**
2
+ * session.js — Discovery Session Manager (Vision-First Pipeline)
3
+ *
4
+ * Flow per snapshot:
5
+ * 1. perceiveSnapshot() — vision LLM sees screenshot + HTML skeleton (~2K tokens)
6
+ * 2. focusAndInspect() — build elements/locators only for focused ~20 elements
7
+ * 3. compileManifest() — unchanged
8
+ * 4. mergeEnrichment() — apply LLM semantic names from perception result
9
+ *
10
+ * finalize(): merge across snapshots → compile — no LLM Pass 2.
11
+ */
12
+
13
+ import { synthesizeAllLocators } from "./locators.js";
14
+ import { compileManifest } from "./compile.js";
15
+ import { mergeEnrichment } from "./enrich.js";
16
+ import { perceiveSnapshot } from "./perceive.js";
17
+ import { synthesizeWorkflows } from "./synthesize-workflows.js";
18
+
19
+ // ---------------------------------------------------------------------------
20
+ // Helpers — mirror compile.js logic to build scanId → actionId mapping
21
+ // ---------------------------------------------------------------------------
22
+
23
+ /** Mirrors compile.js slugify exactly. */
24
+ const slugify = (name) =>
25
+ name.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_+|_+$/g, "").slice(0, 40) || "unnamed";
26
+
27
+ /** Mirrors compile.js interactionVerb exactly. */
28
+ const interactionVerb = (kind) =>
29
+ ({ click: "Click", type: "Type into", select: "Select from", toggle: "Toggle", navigate: "Navigate to", submit: "Submit", scroll: "Scroll" })[kind] || "Interact with";
30
+
31
+ /**
32
+ * Derive a reasonable interactionKind from a skeleton entry
33
+ * (used as fallback when perception is unavailable).
34
+ */
35
+ const deriveInteractionKind = (entry) => {
36
+ const tag = entry.tagName;
37
+ const type = entry.attributes?.type || "";
38
+ if (tag === "input") {
39
+ if (type === "checkbox" || type === "radio") return "toggle";
40
+ if (type === "submit") return "click";
41
+ return "type";
42
+ }
43
+ if (tag === "select") return "select";
44
+ if (tag === "textarea") return "type";
45
+ if (tag === "a") return "navigate";
46
+ return "click";
47
+ };
48
+
49
+ /**
50
+ * Build scanId → compiled actionId mapping by mirroring compile.js's ID
51
+ * generation logic. Must be called with the same interactables/elements/a11y
52
+ * arrays that will be passed to compileManifest.
53
+ */
54
+ const buildScanToActionIdMap = (interactables, elements, a11y, locatorMap) => {
55
+ const elementMap = new Map(elements.map((e) => [e.scanId, e]));
56
+ const a11yMap = new Map(a11y.map((e) => [e.scanId, e]));
57
+ const usedIds = new Set();
58
+ const scanToActionId = new Map();
59
+
60
+ for (const interactable of interactables) {
61
+ if (interactable.interactionKind === "none") continue;
62
+ const el = elementMap.get(interactable.scanId);
63
+ const a11yEntry = a11yMap.get(interactable.scanId);
64
+ const locatorCandidate = locatorMap.get(interactable.scanId);
65
+ if (!el || !locatorCandidate || locatorCandidate.strategies.length === 0) continue;
66
+
67
+ const targetName =
68
+ a11yEntry?.name?.trim().slice(0, 50) ||
69
+ el.textContent?.trim().slice(0, 50) ||
70
+ el.tagName;
71
+ const verb = interactionVerb(interactable.interactionKind);
72
+ const actionName = `${verb} ${targetName}`;
73
+ let actionId = `action_${slugify(actionName)}`;
74
+ if (usedIds.has(actionId)) {
75
+ actionId = `${actionId}_${interactable.scanId}`;
76
+ }
77
+ usedIds.add(actionId);
78
+ scanToActionId.set(interactable.scanId, actionId);
79
+ }
80
+
81
+ return scanToActionId;
82
+ };
83
+
84
+ /**
85
+ * Build entity name → compiled entityId mapping by mirroring compile.js's
86
+ * ID generation logic.
87
+ */
88
+ const buildEntityNameToIdMap = (entities) => {
89
+ const usedIds = new Set();
90
+ const nameToId = new Map();
91
+
92
+ for (const candidate of entities) {
93
+ let entityId = `entity_${slugify(candidate.name)}`;
94
+ if (usedIds.has(entityId)) {
95
+ entityId = `${entityId}_${candidate.rootScanId}`;
96
+ }
97
+ usedIds.add(entityId);
98
+ nameToId.set(candidate.name, entityId);
99
+ }
100
+
101
+ return nameToId;
102
+ };
103
+
104
+ /**
105
+ * Translate a perception result into the enrichment format expected by
106
+ * mergeEnrichment(). Filters out any references to unknown action/entity IDs.
107
+ */
108
+ const buildEnrichmentFromPerception = (perception, scanToActionId, entityNameToId, manifest) => {
109
+ const manifestEntityIds = new Set(manifest.entities.map((e) => e.id));
110
+ const manifestActionIds = new Set(manifest.actions.map((a) => a.id));
111
+
112
+ const entities = perception.entities
113
+ .map((e) => ({
114
+ originalId: entityNameToId.get(e.name),
115
+ semanticName: e.name,
116
+ description: e.description || ""
117
+ }))
118
+ .filter((e) => e.originalId && manifestEntityIds.has(e.originalId));
119
+
120
+ const actions = perception.actions
121
+ .map((a) => {
122
+ const actionId = scanToActionId.get(a.scanId);
123
+ if (!actionId || !manifestActionIds.has(actionId)) return null;
124
+ return {
125
+ originalId: actionId,
126
+ semanticName: a.semanticName,
127
+ description: a.description || "",
128
+ inputs: []
129
+ };
130
+ })
131
+ .filter(Boolean);
132
+
133
+ const compositeActions = (perception.compositeActions || [])
134
+ .map((ca) => {
135
+ const stepActionIds = ca.stepScanIds
136
+ .map((sid) => scanToActionId.get(sid))
137
+ .filter((id) => id && manifestActionIds.has(id));
138
+ if (stepActionIds.length < 2) return null;
139
+ return {
140
+ name: ca.name,
141
+ description: ca.description || "",
142
+ stepActionIds,
143
+ inputs: (ca.inputs || []).map((i) => ({
144
+ name: i.name,
145
+ type: i.type || "string",
146
+ description: i.description || ""
147
+ }))
148
+ };
149
+ })
150
+ .filter(Boolean);
151
+
152
+ return { domain: perception.domain, domainDescription: perception.domainDescription, entities, actions, compositeActions };
153
+ };
154
+
155
+ // ---------------------------------------------------------------------------
156
+ // focusAndInspect — core per-snapshot pipeline
157
+ // ---------------------------------------------------------------------------
158
+
159
+ /**
160
+ * Build ViewDef objects from perception views.
161
+ */
162
+ const buildViewDefs = (perceptionViews, entityNameToId, capturedAt) => {
163
+ if (!perceptionViews || perceptionViews.length === 0) return [];
164
+
165
+ const provenance = {
166
+ source: "agent",
167
+ sessionId: "vision-discovery",
168
+ traceIds: [],
169
+ annotationIds: [],
170
+ capturedAt: capturedAt || new Date().toISOString()
171
+ };
172
+
173
+ return perceptionViews.map((v) => {
174
+ const viewId = `view_${slugify(v.name)}`;
175
+ const strategies = [{ kind: "css", value: v.containerSelector, confidence: 0.8 }];
176
+
177
+ // If the LLM selector contains [role='X'], also add the tag equivalent
178
+ const roleMatch = v.containerSelector.match(/\[role=['"](\w+)['"]\]/);
179
+ if (roleMatch) {
180
+ strategies.push({ kind: "css", value: roleMatch[1], confidence: 0.6 });
181
+ }
182
+
183
+ // Absolute last resort: body
184
+ strategies.push({ kind: "css", value: "body", confidence: 0.2 });
185
+
186
+ const containerLocator = {
187
+ id: `loc_${viewId}`,
188
+ strategies
189
+ };
190
+ const itemLocator = v.itemSelector
191
+ ? { kind: "css", value: v.itemSelector, confidence: 0.8 }
192
+ : undefined;
193
+ const fields = v.fields.map((f) => ({
194
+ name: f.name,
195
+ type: f.type || "string",
196
+ locator: { kind: "css", value: f.selector, confidence: 0.8 }
197
+ }));
198
+
199
+ return {
200
+ id: viewId,
201
+ name: v.name.replace(/_/g, " ").replace(/\b\w/g, (c) => c.toUpperCase()),
202
+ semanticName: v.name,
203
+ description: v.description || "",
204
+ isList: v.isList || false,
205
+ isDynamic: v.isDynamic || false,
206
+ containerLocator,
207
+ itemLocator,
208
+ fields,
209
+ provenance
210
+ };
211
+ });
212
+ };
213
+
214
+ /**
215
+ * Build a PageDef from perception pageState.
216
+ */
217
+ const buildPageDef = (pageState, viewIds, actionIds) => {
218
+ if (!pageState) return null;
219
+ const pageDef = {
220
+ id: `page_${slugify(pageState.name)}`,
221
+ routePattern: pageState.routePattern,
222
+ name: pageState.name,
223
+ description: pageState.description || "",
224
+ viewIds: viewIds || [],
225
+ actionIds: actionIds || []
226
+ };
227
+ if (pageState.stateSignals && pageState.stateSignals.length > 0) {
228
+ pageDef.stateSignals = pageState.stateSignals;
229
+ }
230
+ return pageDef;
231
+ };
232
+
233
+ /**
234
+ * Build a manifest from a skeleton + perception result.
235
+ *
236
+ * Steps:
237
+ * 1. Filter skeleton to focused scanIds (LLM actions + entity members)
238
+ * 2. Map skeleton entries → elements[] and a11y[] (locators.js-compatible)
239
+ * 3. Build interactables[] and entities[] from perception (or fallback)
240
+ * 4. synthesizeAllLocators → locators[]
241
+ * 5. compileManifest → draft manifest
242
+ * 6. mergeEnrichment → apply LLM semantic names
243
+ *
244
+ * Returns { manifest, elements, a11y, interactables, entities, locators, views, page, stats }
245
+ */
246
+ const focusAndInspect = ({ skeleton, perception, url, title, capturedAt, pageState }) => {
247
+ const skeletonByScanId = new Map(skeleton.map((e) => [e.scanId, e]));
248
+
249
+ // --- Determine focused set ---
250
+ let interactableData;
251
+ let entityData;
252
+ let focusedScanIds;
253
+
254
+ if (perception && perception.actions.length > 0) {
255
+ const actionScanIds = new Set(perception.actions.map((a) => a.scanId));
256
+ const entityScanIds = new Set(perception.entities.flatMap((e) => e.scanIds));
257
+ focusedScanIds = new Set([...actionScanIds, ...entityScanIds]);
258
+ interactableData = perception.actions;
259
+ entityData = perception.entities;
260
+ } else {
261
+ // Fallback: use all interactable skeleton entries
262
+ focusedScanIds = new Set(skeleton.filter((e) => e.interactable).map((e) => e.scanId));
263
+ interactableData = [];
264
+ entityData = [];
265
+ }
266
+
267
+ const focusedSkeleton = skeleton.filter((e) => focusedScanIds.has(e.scanId));
268
+
269
+ // --- Build elements (locators.js-compatible format) ---
270
+ const elements = focusedSkeleton.map((entry) => ({
271
+ scanId: entry.scanId,
272
+ tagName: entry.tagName,
273
+ attributes: entry.attributes || {},
274
+ textContent: entry.text || "",
275
+ parentScanId: entry.parentScanId,
276
+ childScanIds: entry.childScanIds || [],
277
+ boundingRect: entry.rect || { x: 0, y: 0, width: 0, height: 0 },
278
+ isVisible: true
279
+ }));
280
+
281
+ // --- Build a11y ---
282
+ const a11y = focusedSkeleton.map((entry) => ({
283
+ scanId: entry.scanId,
284
+ role: entry.role || null,
285
+ name: entry.name || null,
286
+ description: null,
287
+ isDisabled:
288
+ entry.attributes?.disabled != null ||
289
+ entry.attributes?.["aria-disabled"] === "true",
290
+ isRequired:
291
+ entry.attributes?.required != null ||
292
+ entry.attributes?.["aria-required"] === "true",
293
+ expandedState: entry.attributes?.["aria-expanded"] || null,
294
+ checkedState: entry.attributes?.["aria-checked"] || null,
295
+ selectedState: entry.attributes?.["aria-selected"] || null
296
+ }));
297
+
298
+ // --- Build interactables ---
299
+ const interactables = interactableData.length > 0
300
+ ? interactableData.map((a) => ({
301
+ scanId: a.scanId,
302
+ interactionKind: a.interactionKind || "click",
303
+ confidence: 0.9,
304
+ inputType: skeletonByScanId.get(a.scanId)?.attributes?.type || null
305
+ }))
306
+ : focusedSkeleton
307
+ .filter((e) => e.interactable)
308
+ .map((e) => ({
309
+ scanId: e.scanId,
310
+ interactionKind: deriveInteractionKind(e),
311
+ confidence: 0.7,
312
+ inputType: e.attributes?.type || null
313
+ }));
314
+
315
+ // --- Build entity candidates ---
316
+ const actionScanIdSet = new Set(interactables.map((i) => i.scanId));
317
+ const entities = entityData.map((e, i) => ({
318
+ candidateId: `vision_entity_${i}`,
319
+ name: e.name,
320
+ source: "vision_llm",
321
+ rootScanId: e.scanIds[0] || 0,
322
+ memberScanIds: e.scanIds,
323
+ interactableScanIds: e.scanIds.filter((sid) => actionScanIdSet.has(sid)),
324
+ signals: []
325
+ }));
326
+
327
+ // --- Synthesize locators ---
328
+ const { locators, stats: locatorStats } = synthesizeAllLocators(elements, a11y, interactables);
329
+
330
+ // --- Inject LLM-generated semantic locators from perception ---
331
+ // LLM locators are placed at the front (highest confidence: 0.97) because
332
+ // they encode surrounding context (labels, ARIA) that heuristics cannot.
333
+ if (perception) {
334
+ let injectedCount = 0;
335
+ const locatorMapTemp = new Map(locators.map((l) => [l.scanId, l]));
336
+ for (const action of perception.actions) {
337
+ if (!action.locator) continue;
338
+ const { kind, value } = action.locator;
339
+ if (!kind || !value) continue;
340
+
341
+ let candidate = locatorMapTemp.get(action.scanId);
342
+ if (!candidate) {
343
+ candidate = { scanId: action.scanId, strategies: [] };
344
+ locators.push(candidate);
345
+ locatorMapTemp.set(action.scanId, candidate);
346
+ }
347
+ candidate.strategies.unshift({ kind, value, confidence: 0.97 });
348
+ injectedCount++;
349
+ }
350
+ if (injectedCount > 0) {
351
+ console.log(`[browserwire-cli] LLM locators injected: ${injectedCount}`);
352
+ }
353
+ }
354
+
355
+ // --- Build ID mappings (must mirror compile.js logic exactly) ---
356
+ const locatorMap = new Map(locators.map((l) => [l.scanId, l]));
357
+ const scanToActionId = buildScanToActionIdMap(interactables, elements, a11y, locatorMap);
358
+ const entityNameToId = buildEntityNameToIdMap(entities);
359
+
360
+ // --- Compile draft manifest ---
361
+ const { manifest, stats: manifestStats } = compileManifest({
362
+ url, title, capturedAt,
363
+ elements, a11y, interactables, entities, locators
364
+ });
365
+
366
+ // --- Apply LLM semantic names via enrichment ---
367
+ let finalManifest = manifest;
368
+ if (perception) {
369
+ const enrichment = buildEnrichmentFromPerception(
370
+ perception, scanToActionId, entityNameToId, manifest
371
+ );
372
+ finalManifest = mergeEnrichment(manifest, enrichment, capturedAt);
373
+ console.log(
374
+ `[browserwire-cli] enrichment applied: domain="${enrichment.domain}" ` +
375
+ `entities=${enrichment.entities.length} actions=${enrichment.actions.length} ` +
376
+ `composites=${enrichment.compositeActions.length}`
377
+ );
378
+ }
379
+
380
+ // --- Build views and page from perception ---
381
+ const views = perception
382
+ ? buildViewDefs(perception.views, entityNameToId, capturedAt)
383
+ : [];
384
+
385
+ // Collect action IDs for the page
386
+ const allActionIds = [...scanToActionId.values()];
387
+ const viewIds = views.map((v) => v.id);
388
+
389
+ const perceptionPageState = perception?.pageState || pageState || null;
390
+ const page = buildPageDef(perceptionPageState, viewIds, allActionIds);
391
+
392
+ if (views.length > 0) {
393
+ finalManifest.views = views;
394
+ }
395
+ if (page) {
396
+ finalManifest.pages = [page];
397
+ }
398
+
399
+ return {
400
+ manifest: finalManifest,
401
+ elements,
402
+ a11y,
403
+ interactables,
404
+ entities,
405
+ locators,
406
+ views,
407
+ page,
408
+ stats: { locator: locatorStats, manifest: manifestStats }
409
+ };
410
+ };
411
+
412
+ // ---------------------------------------------------------------------------
413
+ // Snapshot merging (unchanged from original)
414
+ // ---------------------------------------------------------------------------
415
+
416
+ const mergeEntities = (allSnapshotEntities) => {
417
+ const merged = new Map();
418
+ for (const entities of allSnapshotEntities) {
419
+ for (const entity of entities) {
420
+ const key = `${entity.name}|${entity.source}`;
421
+ const existing = merged.get(key);
422
+ if (existing) {
423
+ const memberSet = new Set([...existing.memberScanIds, ...entity.memberScanIds]);
424
+ existing.memberScanIds = [...memberSet];
425
+ const interactSet = new Set([...existing.interactableScanIds, ...entity.interactableScanIds]);
426
+ existing.interactableScanIds = [...interactSet];
427
+ const signalMap = new Map();
428
+ for (const sig of existing.signals) {
429
+ signalMap.set(`${sig.kind}:${sig.value}`, sig);
430
+ }
431
+ for (const sig of entity.signals) {
432
+ const sigKey = `${sig.kind}:${sig.value}`;
433
+ if (!signalMap.has(sigKey) || signalMap.get(sigKey).weight < sig.weight) {
434
+ signalMap.set(sigKey, sig);
435
+ }
436
+ }
437
+ existing.signals = [...signalMap.values()];
438
+ } else {
439
+ merged.set(key, { ...entity });
440
+ }
441
+ }
442
+ }
443
+ return [...merged.values()];
444
+ };
445
+
446
+ const mergeElements = (allSnapshotElements) => {
447
+ const merged = new Map();
448
+ for (const elements of allSnapshotElements) {
449
+ for (const el of elements) {
450
+ merged.set(el.scanId, el);
451
+ }
452
+ }
453
+ return [...merged.values()];
454
+ };
455
+
456
+ const mergeA11y = (allSnapshotA11y) => {
457
+ const merged = new Map();
458
+ for (const entries of allSnapshotA11y) {
459
+ for (const entry of entries) {
460
+ merged.set(entry.scanId, entry);
461
+ }
462
+ }
463
+ return [...merged.values()];
464
+ };
465
+
466
+ const mergeInteractables = (allSnapshotInteractables) => {
467
+ const all = [];
468
+ for (const interactables of allSnapshotInteractables) {
469
+ all.push(...interactables);
470
+ }
471
+ return all;
472
+ };
473
+
474
+ const mergeLocators = (allSnapshotLocators) => {
475
+ const all = [];
476
+ for (const locators of allSnapshotLocators) {
477
+ all.push(...locators);
478
+ }
479
+ return all;
480
+ };
481
+
482
+ /**
483
+ * Normalize a view name for dedup: strip trailing _view/_list/_detail suffixes
484
+ * and common prefixes so "event_list" and "event_list_view" merge together.
485
+ */
486
+ const normalizeViewName = (name) => {
487
+ return name
488
+ .replace(/_view$/, "")
489
+ .replace(/_list$/, "")
490
+ .replace(/_detail$/, "");
491
+ };
492
+
493
+ /**
494
+ * Merge views across snapshots. Union by normalized semanticName.
495
+ * If same view seen in multiple snapshots, keep the one with more valid fields.
496
+ */
497
+ const mergeViews = (allSnapshotViews) => {
498
+ const merged = new Map();
499
+ for (const views of allSnapshotViews) {
500
+ for (const view of views) {
501
+ const rawKey = view.semanticName || view.name;
502
+ const key = normalizeViewName(rawKey);
503
+ const existing = merged.get(key);
504
+ if (!existing || view.fields.length > existing.fields.length) {
505
+ merged.set(key, { ...view });
506
+ }
507
+ }
508
+ }
509
+ return [...merged.values()];
510
+ };
511
+
512
+ /**
513
+ * Normalize a route pattern by stripping query params.
514
+ * "/home?period=past" → "/home", "/home?period=:period" → "/home"
515
+ */
516
+ const normalizeRoutePattern = (pattern) => {
517
+ const qIndex = pattern.indexOf("?");
518
+ return qIndex >= 0 ? pattern.slice(0, qIndex) : pattern;
519
+ };
520
+
521
+ /**
522
+ * Merge stateSignals arrays — union by "kind:value" key.
523
+ */
524
+ const mergeStateSignals = (existing, incoming) => {
525
+ const signalMap = new Map();
526
+ for (const s of (existing || [])) {
527
+ signalMap.set(`${s.kind}:${s.value}`, s);
528
+ }
529
+ for (const s of (incoming || [])) {
530
+ const key = `${s.kind}:${s.value}`;
531
+ if (!signalMap.has(key)) {
532
+ signalMap.set(key, s);
533
+ }
534
+ }
535
+ return [...signalMap.values()];
536
+ };
537
+
538
+ /**
539
+ * Merge pages across snapshots. Union by normalized routePattern (ignoring query params).
540
+ * Merge viewIds, actionIds, and stateSignals.
541
+ */
542
+ const mergePages = (allSnapshotPages) => {
543
+ const merged = new Map();
544
+ for (const page of allSnapshotPages) {
545
+ if (!page) continue;
546
+ const key = normalizeRoutePattern(page.routePattern);
547
+ const existing = merged.get(key);
548
+ if (existing) {
549
+ const viewSet = new Set([...existing.viewIds, ...page.viewIds]);
550
+ existing.viewIds = [...viewSet];
551
+ const actionSet = new Set([...existing.actionIds, ...page.actionIds]);
552
+ existing.actionIds = [...actionSet];
553
+ existing.stateSignals = mergeStateSignals(existing.stateSignals, page.stateSignals);
554
+ } else {
555
+ merged.set(key, {
556
+ ...page,
557
+ routePattern: key,
558
+ viewIds: [...page.viewIds],
559
+ actionIds: [...page.actionIds],
560
+ stateSignals: [...(page.stateSignals || [])]
561
+ });
562
+ }
563
+ }
564
+ return [...merged.values()];
565
+ };
566
+
567
+ // ---------------------------------------------------------------------------
568
+ // Trigger description helper (kept for logging)
569
+ // ---------------------------------------------------------------------------
570
+
571
+ const describeTrigger = (trigger) => {
572
+ if (!trigger) return "Unknown interaction";
573
+ if (trigger.kind === "initial") {
574
+ return `Initial page load at ${trigger.url || "unknown URL"}`;
575
+ }
576
+ if (trigger.kind === "navigation") {
577
+ return `Navigated to ${trigger.url || "unknown URL"} (title: "${trigger.title || "unknown"}")`;
578
+ }
579
+ const target = trigger.target;
580
+ if (!target) return `${trigger.kind} interaction`;
581
+ const parts = [`${trigger.kind} on`];
582
+ if (target.role) parts.push(`[role=${target.role}]`);
583
+ parts.push(`<${target.tag}>`);
584
+ if (target.name) parts.push(`"${target.name}"`);
585
+ else if (target.text) parts.push(`"${target.text.slice(0, 60)}"`);
586
+ const ctx = trigger.parentContext;
587
+ if (ctx) {
588
+ if (ctx.nearestLandmark) parts.push(`within ${ctx.nearestLandmark}`);
589
+ if (ctx.nearestHeading) parts.push(`near heading "${ctx.nearestHeading}"`);
590
+ }
591
+ return parts.join(" ");
592
+ };
593
+
594
+ // ---------------------------------------------------------------------------
595
+ // DiscoverySession
596
+ // ---------------------------------------------------------------------------
597
+
598
+ export class DiscoverySession {
599
+ constructor(sessionId, site) {
600
+ this.sessionId = sessionId;
601
+ this.site = site;
602
+ this.startedAt = new Date().toISOString();
603
+ this.snapshots = [];
604
+ this.status = "active";
605
+ this.lastEnrichedManifest = null;
606
+ this.checkpointCount = 0;
607
+ this.checkpointNotes = [];
608
+ this.priorManifest = null;
609
+ /** Queue to serialize concurrent addSnapshot calls */
610
+ this._queue = Promise.resolve();
611
+ }
612
+
613
+ /**
614
+ * Seed this session with a prior manifest so _buildMergedManifest()
615
+ * merges new knowledge into existing.
616
+ */
617
+ seedWithManifest(manifest) {
618
+ this.priorManifest = manifest;
619
+ }
620
+
621
+ /**
622
+ * Process an incoming skeleton snapshot: perceive → focusAndInspect.
623
+ * Serialized via queue to avoid concurrent snapshot counter issues.
624
+ */
625
+ addSnapshot(payload) {
626
+ this._queue = this._queue.then(() => this._processSnapshot(payload));
627
+ return this._queue;
628
+ }
629
+
630
+ async _processSnapshot(payload) {
631
+ const snapshotNum = this.snapshots.length + 1;
632
+ const snapshotId = payload.snapshotId || `snap_${snapshotNum}`;
633
+ const trigger = payload.trigger || null;
634
+ const pageText = payload.pageText || "";
635
+ const capturedAt = payload.capturedAt || new Date().toISOString();
636
+ const url = payload.url || "unknown";
637
+ const title = payload.title || "unknown";
638
+ const skeleton = Array.isArray(payload.skeleton) ? payload.skeleton : [];
639
+ const pageState = payload.pageState || null;
640
+
641
+ console.log(
642
+ `[browserwire-cli] session ${this.sessionId} snapshot #${snapshotNum}: ` +
643
+ `trigger=${trigger?.kind || "unknown"} skeleton=${skeleton.length}`
644
+ );
645
+ if (trigger) {
646
+ console.log(`[browserwire-cli] trigger: ${describeTrigger(trigger)}`);
647
+ }
648
+
649
+ // Step 1 — Vision LLM perception
650
+ let perception = null;
651
+ try {
652
+ perception = await perceiveSnapshot({
653
+ skeleton,
654
+ screenshot: payload.screenshot || null,
655
+ pageText,
656
+ url,
657
+ title
658
+ });
659
+ } catch (error) {
660
+ console.warn(`[browserwire-cli] perception failed: ${error.message}`);
661
+ }
662
+
663
+ // Step 2 — Build locators + manifest from focused skeleton
664
+ let result = null;
665
+ try {
666
+ result = focusAndInspect({ skeleton, perception, url, title, capturedAt, pageState });
667
+ console.log(
668
+ `[browserwire-cli] manifest: ${result.stats.manifest?.entityCount ?? 0} entities, ` +
669
+ `${result.stats.manifest?.actionCount ?? 0} actions, ` +
670
+ `${result.stats.locator?.total ?? 0} locators` +
671
+ (result.views?.length ? `, ${result.views.length} views` : "")
672
+ );
673
+ } catch (error) {
674
+ console.warn(`[browserwire-cli] focusAndInspect failed: ${error.message}`);
675
+ }
676
+
677
+ if (result?.manifest?.domain) {
678
+ this.lastEnrichedManifest = result.manifest;
679
+ }
680
+
681
+ this.snapshots.push({
682
+ snapshotId,
683
+ trigger,
684
+ url,
685
+ title,
686
+ capturedAt,
687
+ elements: result?.elements || [],
688
+ a11y: result?.a11y || [],
689
+ interactables: result?.interactables || [],
690
+ entities: result?.entities || [],
691
+ locators: result?.locators || [],
692
+ views: result?.views || [],
693
+ page: result?.page || null,
694
+ manifest: result?.manifest || null,
695
+ stats: result?.stats || {}
696
+ });
697
+
698
+ return this.getStats();
699
+ }
700
+
701
+ /**
702
+ * Internal merge logic shared by finalize() and compileCheckpoint().
703
+ * Merges all current per-snapshot enriched manifests into one unified manifest.
704
+ */
705
+ async _buildMergedManifest() {
706
+ let snapshotManifests = this.snapshots.map((s) => s.manifest).filter(Boolean);
707
+ if (this.priorManifest) {
708
+ snapshotManifests = [this.priorManifest, ...snapshotManifests];
709
+ }
710
+ if (snapshotManifests.length === 0) return null;
711
+
712
+ // --- Merge entities: by name slug, prefer non-orphan ---
713
+ const entitiesBySlug = new Map();
714
+ for (const m of snapshotManifests) {
715
+ for (const entity of (m.entities || [])) {
716
+ const nameSlug = slugify(entity.name);
717
+ const existing = entitiesBySlug.get(nameSlug);
718
+ if (!existing) {
719
+ entitiesBySlug.set(nameSlug, entity);
720
+ } else if (!entity.id.startsWith("entity_orphan_") && existing.id.startsWith("entity_orphan_")) {
721
+ entitiesBySlug.set(nameSlug, entity);
722
+ }
723
+ }
724
+ }
725
+ const mergedEntities = [...entitiesBySlug.values()];
726
+ const entityIdSet = new Set(mergedEntities.map((e) => e.id));
727
+
728
+ // --- Merge actions: by name, keep highest confidence ---
729
+ const actionsByName = new Map();
730
+ for (const m of snapshotManifests) {
731
+ for (const action of (m.actions || [])) {
732
+ const existing = actionsByName.get(action.name);
733
+ if (!existing || (action.confidence?.score || 0) > (existing.confidence?.score || 0)) {
734
+ actionsByName.set(action.name, { ...action });
735
+ }
736
+ }
737
+ }
738
+
739
+ // Fix entity references — remap to surviving entities
740
+ const mergedActions = [...actionsByName.values()].map((action) => {
741
+ if (action.entityId && !entityIdSet.has(action.entityId)) {
742
+ const stripped = action.entityId.replace(/^entity_/, "").replace(/_\d+$/, "");
743
+ const match = mergedEntities.find((e) => slugify(e.name) === stripped);
744
+ if (match) action.entityId = match.id;
745
+ }
746
+ return action;
747
+ });
748
+
749
+ // --- Merge views + pages (include prior manifest data) ---
750
+ const priorViews = this.priorManifest?.views || [];
751
+ const priorPages = this.priorManifest?.pages || [];
752
+ const mergedViewDefs = mergeViews([priorViews, ...this.snapshots.map((s) => s.views || [])]);
753
+ const mergedPageDefs = mergePages([...priorPages, ...this.snapshots.map((s) => s.page).filter(Boolean)]);
754
+
755
+ // Deduplicate actionIds in pages to only reference surviving actions
756
+ const actionIdToName = new Map();
757
+ for (const m of snapshotManifests) {
758
+ for (const a of (m.actions || [])) {
759
+ actionIdToName.set(a.id, a.name);
760
+ }
761
+ }
762
+
763
+ for (const page of mergedPageDefs) {
764
+ const seenNames = new Set();
765
+ page.actionIds = page.actionIds.filter((aid) => {
766
+ const name = actionIdToName.get(aid);
767
+ if (!name || seenNames.has(name)) return false;
768
+ seenNames.add(name);
769
+ const surviving = mergedActions.find((a) => a.name === name);
770
+ return !!surviving;
771
+ }).map((aid) => {
772
+ const name = actionIdToName.get(aid);
773
+ const surviving = mergedActions.find((a) => a.name === name);
774
+ return surviving ? surviving.id : aid;
775
+ });
776
+ }
777
+
778
+ // Assemble final manifest (preserve original metadata from prior if available)
779
+ const base = this.priorManifest || snapshotManifests[0];
780
+ const manifest = {
781
+ contractVersion: base.contractVersion || "1.0.0",
782
+ manifestVersion: base.manifestVersion || "0.1.0",
783
+ metadata: { ...base.metadata, updatedAt: new Date().toISOString() },
784
+ entities: mergedEntities,
785
+ actions: mergedActions,
786
+ errors: base.errors || []
787
+ };
788
+
789
+ if (mergedViewDefs.length > 0) manifest.views = mergedViewDefs;
790
+ if (mergedPageDefs.length > 0) manifest.pages = mergedPageDefs;
791
+
792
+ if (this.lastEnrichedManifest) {
793
+ if (this.lastEnrichedManifest.domain) manifest.domain = this.lastEnrichedManifest.domain;
794
+ if (this.lastEnrichedManifest.domainDescription) manifest.domainDescription = this.lastEnrichedManifest.domainDescription;
795
+ }
796
+ // Carry forward domain info from prior manifest if not set by current session
797
+ if (!manifest.domain && this.priorManifest?.domain) manifest.domain = this.priorManifest.domain;
798
+ if (!manifest.domainDescription && this.priorManifest?.domainDescription) manifest.domainDescription = this.priorManifest.domainDescription;
799
+
800
+ // Synthesize task-level workflow APIs from the merged manifest
801
+ const workflowDefs = await synthesizeWorkflows(manifest);
802
+
803
+ // Resolve workflow step references (actionId/viewId) to inline execution data
804
+ const actionMap = new Map(mergedActions.map(a => [a.id, a]));
805
+ const viewMap = new Map((manifest.views || []).map(v => [v.id, v]));
806
+
807
+ for (const wf of workflowDefs) {
808
+ for (const step of wf.steps) {
809
+ if (step.actionId) {
810
+ const action = actionMap.get(step.actionId);
811
+ if (action?.locatorSet?.strategies) {
812
+ step.strategies = action.locatorSet.strategies;
813
+ }
814
+ }
815
+ if (step.viewId) {
816
+ const view = viewMap.get(step.viewId);
817
+ if (view) {
818
+ step.viewConfig = {
819
+ containerLocator: view.containerLocator?.strategies || [],
820
+ itemLocator: view.itemLocator || null,
821
+ fields: view.fields || [],
822
+ isList: view.isList || false
823
+ };
824
+ }
825
+ }
826
+ }
827
+ }
828
+
829
+ if (workflowDefs.length > 0) manifest.workflowActions = workflowDefs;
830
+
831
+ console.log(
832
+ `[browserwire-cli] merged manifest: ${mergedEntities.length} entities, ` +
833
+ `${mergedActions.length} actions` +
834
+ (mergedViewDefs.length ? `, ${mergedViewDefs.length} views` : "") +
835
+ (mergedPageDefs.length ? `, ${mergedPageDefs.length} pages` : "") +
836
+ (workflowDefs.length ? `, ${workflowDefs.length} workflows` : "")
837
+ );
838
+
839
+ return manifest;
840
+ }
841
+
842
+ /**
843
+ * Finalize the session: merge per-snapshot enriched manifests with deduplication.
844
+ * Merges at the manifest level (not raw data) to preserve LLM semantic names
845
+ * and avoid the massive duplication from naive interactable concatenation.
846
+ */
847
+ async finalize() {
848
+ await this._queue;
849
+ this.status = "stopped";
850
+
851
+ if (this.snapshots.length === 0) {
852
+ console.log(`[browserwire-cli] session ${this.sessionId} finalized with 0 snapshots`);
853
+ return { manifest: null, stats: this.getStats() };
854
+ }
855
+
856
+ console.log(`[browserwire-cli] session ${this.sessionId} finalizing ${this.snapshots.length} snapshots`);
857
+
858
+ const manifest = await this._buildMergedManifest();
859
+ if (!manifest) {
860
+ return { manifest: null, stats: this.getStats() };
861
+ }
862
+
863
+ return {
864
+ manifest,
865
+ draftManifest: manifest,
866
+ enrichedManifest: manifest,
867
+ stats: this.getStats()
868
+ };
869
+ }
870
+
871
+ /**
872
+ * Compile a checkpoint manifest from current snapshots without stopping the session.
873
+ * Increments checkpointCount and records the note. Does NOT set status = "stopped".
874
+ *
875
+ * @param {string} [note] - Optional annotation about what was just explored
876
+ * @returns {{ manifest, draftManifest, enrichedManifest, checkpointIndex, stats }}
877
+ */
878
+ async compileCheckpoint(note) {
879
+ await this._queue;
880
+
881
+ const checkpointIndex = this.checkpointCount;
882
+ this.checkpointCount += 1;
883
+ if (note) this.checkpointNotes.push(note);
884
+
885
+ if (this.snapshots.length === 0) {
886
+ console.log(`[browserwire-cli] session ${this.sessionId} checkpoint-${checkpointIndex}: no snapshots`);
887
+ return { manifest: null, draftManifest: null, enrichedManifest: null, checkpointIndex, stats: this.getStats() };
888
+ }
889
+
890
+ console.log(
891
+ `[browserwire-cli] session ${this.sessionId} checkpoint-${checkpointIndex}: ` +
892
+ `${this.snapshots.length} snapshots${note ? ` ("${note}")` : ""}`
893
+ );
894
+
895
+ const manifest = await this._buildMergedManifest();
896
+
897
+ return {
898
+ manifest,
899
+ draftManifest: manifest,
900
+ enrichedManifest: manifest,
901
+ checkpointIndex,
902
+ stats: this.getStats()
903
+ };
904
+ }
905
+
906
+ getStats() {
907
+ let totalEntities = 0;
908
+ let totalActions = 0;
909
+ let totalViews = 0;
910
+ for (const snap of this.snapshots) {
911
+ if (snap.manifest) {
912
+ totalEntities = Math.max(totalEntities, snap.manifest.entities?.length || 0);
913
+ totalActions = Math.max(totalActions, snap.manifest.actions?.length || 0);
914
+ totalViews = Math.max(totalViews, snap.manifest.views?.length || 0);
915
+ } else {
916
+ totalEntities = Math.max(totalEntities, snap.entities?.length || 0);
917
+ }
918
+ totalViews = Math.max(totalViews, snap.views?.length || 0);
919
+ }
920
+ return {
921
+ sessionId: this.sessionId,
922
+ snapshotCount: this.snapshots.length,
923
+ entityCount: totalEntities,
924
+ actionCount: totalActions,
925
+ viewCount: totalViews,
926
+ checkpointCount: this.checkpointCount,
927
+ status: this.status
928
+ };
929
+ }
930
+ }