screenhand 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,6 +32,7 @@ export class PlanExecutor {
32
32
  config;
33
33
  /** Accumulated execution trace for current goal — reset on each executeGoal() call */
34
34
  log = [];
35
+ appMap = null;
35
36
  constructor(worldModel, planner, executeTool, config, recovery, learningEngine) {
36
37
  this.worldModel = worldModel;
37
38
  this.planner = planner;
@@ -40,6 +41,13 @@ export class PlanExecutor {
40
41
  this.learningEngine = learningEngine;
41
42
  this.config = { ...DEFAULT_PLANNER_CONFIG, ...config };
42
43
  }
44
+ /**
45
+ * Set the AppMap for contract-based precondition checks and postcondition validation.
46
+ * Wire #7: L7→L4.
47
+ */
48
+ setAppMap(map) {
49
+ this.appMap = map;
50
+ }
43
51
  dbg(msg) {
44
52
  const line = `[${new Date().toISOString().substring(11, 23)}] ${msg}`;
45
53
  this.log.push(line);
@@ -380,7 +388,51 @@ export class PlanExecutor {
380
388
  }
381
389
  }
382
390
  }
383
- // 3. Focus validation: for type_text, verify a text field is focused
391
+ // 3. Contract precondition check (Wire #7: L7→L4)
392
+ // If AppMap has a contract for this element+action, verify preconditions are met.
393
+ // Violations are surfaced as structured warnings in the tool result.
394
+ const preconditionWarnings = [];
395
+ if (this.appMap && INTERACTION_TOOLS.has(step.tool)) {
396
+ const target = (step.params.title ?? step.params.text ?? step.params.name);
397
+ const bundleId = preState.focusedApp?.bundleId;
398
+ if (target && bundleId) {
399
+ try {
400
+ // Normalize action key for contract lookup
401
+ const actionKey = step.tool
402
+ .replace(/_with_fallback$/, "")
403
+ .replace(/^browser_/, "")
404
+ .replace(/^click_text$/, "click")
405
+ .replace(/^ui_press$/, "click")
406
+ .replace(/^ui_set_value$/, "type");
407
+ const contractInfo = this.appMap.getContract(bundleId, target, actionKey);
408
+ if (contractInfo) {
409
+ const contract = contractInfo.contract;
410
+ // Verify preconditions
411
+ for (const precondition of contract.preconditions) {
412
+ const precLower = precondition.toLowerCase();
413
+ // Check "no dialogs" precondition
414
+ if (precLower.includes("no dialog") && preState.activeDialogs.length > 0) {
415
+ this.dbg(` L7PC | Precondition failed: "${precondition}" — dialog present`);
416
+ preconditionWarnings.push(`dialog present — ${precondition}`);
417
+ }
418
+ // Check "input focused" precondition for type tools
419
+ if (precLower.includes("focused") && step.tool.includes("type")) {
420
+ const win = preState.focusedWindowId ? preState.windows.get(preState.focusedWindowId) : null;
421
+ if (win?.focusedElement) {
422
+ const role = win.focusedElement.role.toLowerCase();
423
+ if (!role.includes("text") && !role.includes("field") && !role.includes("area")) {
424
+ this.dbg(` L7PC | Precondition warning: "${precondition}" — focused: ${win.focusedElement.role}`);
425
+ preconditionWarnings.push(`wrong focus (${win.focusedElement.role}) — ${precondition}`);
426
+ }
427
+ }
428
+ }
429
+ }
430
+ }
431
+ }
432
+ catch { /* best-effort contract check */ }
433
+ }
434
+ }
435
+ // 4. Focus validation: for type_text, verify a text field is focused
384
436
  if (step.tool === "type_text") {
385
437
  const focusedWinId = preState.focusedWindowId;
386
438
  if (focusedWinId !== null) {
@@ -397,6 +449,11 @@ export class PlanExecutor {
397
449
  }
398
450
  }
399
451
  }
452
+ // Surface precondition warnings in step description (visible to calling agent)
453
+ if (preconditionWarnings.length > 0) {
454
+ const warningText = preconditionWarnings.map((w) => `[⚠ PRECONDITION: ${w}]`).join(" ");
455
+ step.description = `${warningText} ${step.description}`;
456
+ }
400
457
  // ── ACT: Execute the tool ──
401
458
  // Auto-upgrade click_text → ui_press when world model already has the target via AX.
402
459
  // click_text uses cg.captureWindow (crashes on GPU-heavy pages) + OCR (slow, sometimes wrong tab).
@@ -464,7 +521,31 @@ export class PlanExecutor {
464
521
  params._budget = this.learningEngine.getAdaptiveBudget(bundleId);
465
522
  }
466
523
  }
467
- const stepTimeout = Math.max(step.timeout || 0, this.config.defaultStepTimeout);
524
+ // Wire F4: Inject AppMap verified positions as fallback coordinates (L4→L1)
525
+ if (this.appMap && INTERACTION_TOOLS.has(step.tool)) {
526
+ const target = (params.title ?? params.text ?? params.name);
527
+ const bundleId = preState.focusedApp?.bundleId;
528
+ if (target && bundleId && !params.x && !params.y) {
529
+ if (this.appMap.isElementVerified(bundleId, target)) {
530
+ const winId = preState.focusedWindowId;
531
+ const win = winId != null ? preState.windows.get(winId) : null;
532
+ if (win) {
533
+ const pos = this.appMap.resolvePosition(bundleId, target, win.bounds.value);
534
+ if (pos) {
535
+ params._mapHintX = pos.x;
536
+ params._mapHintY = pos.y;
537
+ }
538
+ }
539
+ }
540
+ }
541
+ }
542
+ // Wire F1: Blend adaptive budget into step timeout (L5→L4)
543
+ let adaptiveTimeout = this.config.defaultStepTimeout;
544
+ const budget = params._budget;
545
+ if (budget) {
546
+ adaptiveTimeout = Math.max(budget.locateMs + budget.actMs + budget.verifyMs, 3000);
547
+ }
548
+ const stepTimeout = Math.max(step.timeout || 0, adaptiveTimeout);
468
549
  this.dbg(` ACT | calling ${step.tool} (timeout=${stepTimeout}ms)`);
469
550
  let result = await this.tryToolWithTimeout(step.tool, params, stepTimeout);
470
551
  this.dbg(` ACT | ok=${result.ok}${result.ok ? "" : ` error="${result.error}"`}`);
@@ -665,6 +746,48 @@ export class PlanExecutor {
665
746
  return { type: "app_focused", target: bundleId };
666
747
  }
667
748
  }
749
+ // Wire #7: L7→L4 — Contract-based postcondition inference.
750
+ // If AppMap has a reliable outcome for this element+action, use it.
751
+ if (this.appMap && INTERACTION_TOOLS.has(step.tool)) {
752
+ const target = (step.params.title ?? step.params.text ?? step.params.name);
753
+ const bundleId = this.worldModel.getState().focusedApp?.bundleId;
754
+ if (target && bundleId) {
755
+ try {
756
+ // Normalize action key for contract lookup
757
+ const postActionKey = step.tool
758
+ .replace(/_with_fallback$/, "")
759
+ .replace(/^browser_/, "")
760
+ .replace(/^click_text$/, "click")
761
+ .replace(/^ui_press$/, "click")
762
+ .replace(/^ui_set_value$/, "type");
763
+ const contractInfo = this.appMap.getContract(bundleId, target, postActionKey);
764
+ if (contractInfo) {
765
+ const contract = contractInfo.contract;
766
+ // Find the most reliable outcome that can be checked via world model
767
+ const reliableOutcome = contract.outcomes.find((o) => o.reliable && o.seenCount >= 3);
768
+ if (reliableOutcome) {
769
+ const desc = reliableOutcome.description.toLowerCase();
770
+ // Map outcome descriptions to assertions:
771
+ // "dialog closed", "modal dismissed" → dialog_absent
772
+ if (desc.includes("dialog") && (desc.includes("closed") || desc.includes("dismissed"))) {
773
+ return { type: "dialog_absent", target: "" };
774
+ }
775
+ // "X visible", "X appears", "shows X" → text_visible
776
+ const visibleMatch = reliableOutcome.description.match(/["']?(\w[\w\s]*?)["']?\s+(?:visible|appears|shown|displayed)/i);
777
+ if (visibleMatch) {
778
+ return { type: "text_visible", target: visibleMatch[1].trim() };
779
+ }
780
+ // Generic: use outcome description as text_visible target
781
+ if (reliableOutcome.description.length >= 3 && reliableOutcome.description.length < 50) {
782
+ this.dbg(` L7PC | Using contract outcome: "${reliableOutcome.description}"`);
783
+ return { type: "text_visible", target: reliableOutcome.description };
784
+ }
785
+ }
786
+ }
787
+ }
788
+ catch { /* best-effort */ }
789
+ }
790
+ }
668
791
  // Navigation or state-changing click → next step's target should be visible
669
792
  if (nextStep && STATE_CHANGING_TOOLS.has(step.tool)) {
670
793
  const nextTarget = (nextStep.params.text ?? nextStep.params.title ?? nextStep.params.name);