npm - @zibby/core - Versions diffs - 0.4.6 → 0.5.0 - Mend

@zibby/core 0.4.6 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/dist/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@zibby/core",
-  "version": "0.4.6",
+  "version": "0.5.0",
   "description": "Core test automation engine with multi-agent and multi-MCP support",
   "type": "module",
   "main": "dist/index.js",
@@ -17,7 +17,6 @@
     "./utils/parallel-config.js": "./dist/utils/parallel-config.js",
     "./utils/run-registry.js": "./dist/utils/run-registry.js",
     "./utils/run-index-merge.js": "./dist/utils/run-index-merge.js",
-    "./utils/run-index-post-cli.js": "./dist/utils/run-index-post-cli.js",
     "./utils/run-state-session.js": "./dist/utils/run-state-session.js",
     "./utils/session-state-live-runs.js": "./dist/utils/session-state-live-runs.js",
     "./utils/mission-control-from-run-states.js": "./dist/utils/mission-control-from-run-states.js",
@@ -26,8 +25,6 @@
     "./utils/run-capacity-queue.js": "./dist/utils/run-capacity-queue.js",
     "./utils/cursor-mcp-isolated-home.js": "./dist/utils/cursor-mcp-isolated-home.js",
     "./constants/zibby-scratch.js": "./dist/constants/zibby-scratch.js",
-    "./templates": "./templates/index.js",
-    "./templates/*": "./templates/*",
     "./package.json": "./package.json"
   },
   "scripts": {
@@ -59,10 +56,6 @@
   },
   "files": [
     "dist/",
-    "templates/",
-    "!templates/**/__tests__/",
-    "!templates/**/*.test.js",
-    "!templates/**/*.spec.js",
     "README.md",
     "LICENSE"
   ],

package/dist/utils/run-index-post-cli.js CHANGED Viewed

@@ -1,4 +1 @@
-import{existsSync as P,readdirSync as F,statSync as Q}from"fs";import{join as a,relative as B,sep as x,resolve as m}from"path";import{appendFileSync as H,readFileSync as z,existsSync as _,mkdirSync as $}from"node:fs";import{join as h}from"node:path";import{DEFAULT_OUTPUT_BASE as E}from"@zibby/agent-workflow";var b="run-index.jsonl";function A(e,t=E){let n=h(e,t);return h(n,b)}function w(e){if(!e||!e.sessionId)return;let t=e.cwd||process.cwd(),n=e.outputBase||E,s=h(t,n);_(s)||$(s,{recursive:!0});let r=h(s,b),i=`${JSON.stringify(e)}
-`;H(r,i,"utf8")}function D(e){if(!e||!_(e))return[];let t;try{t=z(e,"utf8")}catch{return[]}let n=[];for(let s of t.split(`
-`)){let r=s.trim();if(r)try{n.push(JSON.parse(r))}catch{}}return n}import{existsSync as J,mkdirSync as K,readFileSync as W,readdirSync as de,statSync as fe,writeFileSync as G}from"node:fs";import{join as V}from"node:path";var X="zibby-run-state.json";function M(e){return V(e,X)}function R(e){if(!e||typeof e!="string")return null;let t=M(e);if(!J(t))return null;try{let n=W(t,"utf8"),s=JSON.parse(n);return s&&typeof s=="object"?s:null}catch{return null}}function v(e,t){if(!e||typeof e!="string")return;try{K(e,{recursive:!0})}catch{return}let s={...R(e)||{v:1},...t,v:1,updatedAt:Date.now()};try{G(M(e),`${JSON.stringify(s)}
-`,"utf8")}catch(r){console.warn(`[zibby run-state] ${r.message}`)}}function q(e){return e?.recordKind==="progress"}function O(e){let t=Number(e)||0;return t<=0?0:t<1e12?t*1e3:t}function C(e,t={}){let n=t.maxProgressAgeMs!=null&&Number.isFinite(t.maxProgressAgeMs)?Math.max(0,t.maxProgressAgeMs):21e5,s=typeof t.now=="number"?t.now:Date.now(),{summary:r,progress:i}=e||{};if(!i)return!1;let o=O(i.ts);if(n>0&&o>0&&s-o>n)return!1;if(!r)return o>0;let d=O(r.ts);return o>d}function k(e){let t=new Map;for(let n of e||[]){if(!n?.sessionId)continue;let s=t.get(n.sessionId);s||(s={summary:null,progress:null});let r=Number(n.ts)||0;q(n)?(!s.progress||r>=(Number(s.progress.ts)||0))&&(s.progress=n):(!s.summary||r>=(Number(s.summary.ts)||0))&&(s.summary=n),t.set(n.sessionId,s)}return t}import{DEFAULT_OUTPUT_BASE as I,SESSIONS_DIR as y}from"@zibby/agent-workflow";var N=Object.freeze(["preflight","execute_live","generate_script"]);function T(e){let t=process.env.ZIBBY_STUDIO_TEST_CASE_ID;return t!=null&&String(t).trim()!==""?String(t).trim():e!=null?String(e):""}var ee=[a("generate_script","generated-test.spec.js"),a("generate_script","generated-test.spec.ts"),a("generate_script","playwright.spec.ts"),a("generate_script","test.spec.ts")];function te(e){let t=[a(e,"execute_live","videos"),a(e,"execute_live"),e];for(let n of t){if(!P(n))continue;let s;try{s=F(n)}catch{continue}let r=s.find(i=>i.endsWith(".webm"));if(r)return a(n,r)}return null}function ne(e){let t=[a(e,"execute_live","events.json"),a(e,"events.json")];for(let n of t)if(P(n))return n;return null}function se(e){for(let t of ee){let n=a(e,t);if(P(n))return n}return null}function re(e){return!e||!P(e)?{videoPathAbs:null,eventsPathAbs:null,scriptPathAbs:null}:{videoPathAbs:te(e),eventsPathAbs:ne(e),scriptPathAbs:se(e)}}function j(e){let t=e.cwd||process.cwd(),n=e.outputBase||I,i=((e.result||{}).state||{}).sessionPath;if(!i||typeof i!="string")return null;let o=i.split(/[/\\]/).filter(Boolean).pop();if(!o)return null;let{videoPathAbs:d,eventsPathAbs:f,scriptPathAbs:l}=re(i),p=u=>{if(!u)return null;try{return B(t,u).split(x).join("/")}catch{return null}},c=null;if(e.specPath)try{let u=m(t,e.specPath);c=B(t,u).split(x).join("/")}catch{c=String(e.specPath).split(x).join("/")}return{v:1,recordKind:"summary",ts:Date.now(),sessionId:o,status:e.status??(e.success?"completed":"failed"),cwd:t,outputBase:n,sessionPathAbs:i,sessionDirRel:p(i),videoPathAbs:d||null,eventsPathAbs:f||null,scriptPathAbs:l||null,videoRel:p(d),eventsRel:p(f),scriptRel:p(l),specRel:c,source:process.env.ZIBBY_RUN_SOURCE||"cli",studioTestCaseId:T(o)||null,errorMessage:e.errorMessage||null}}function U({cwd:e,config:t,result:n,success:s,specPath:r,errorMessage:i}){try{let o=j({cwd:e||process.cwd(),result:n,success:s,outputBase:t?.paths?.output||I,specPath:r,errorMessage:i});o&&(w(o),o.sessionPathAbs&&v(o.sessionPathAbs,{sessionId:o.sessionId,studioTestCaseId:o.studioTestCaseId||o.sessionId,status:o.status,activeNode:null,activeStageIndex:null,errorMessage:o.errorMessage||null,runSource:o.source||"cli",cwd:o.cwd,outputBase:o.outputBase,sessionPathAbs:o.sessionPathAbs}))}catch(o){console.warn(`[zibby browser-test run-index] ${o.message}`)}}function oe({sessionPath:e,sessionId:t,cwd:n,outputBase:s=I}={}){let r=n||process.cwd(),i=s||I,o=t!=null&&String(t).trim()!==""?String(t).trim():null,d=process.env.ZIBBY_PIN_SESSION_PATH==="1"||process.env.ZIBBY_PIN_SESSION_PATH==="true",f=process.env.ZIBBY_SESSION_PATH&&String(process.env.ZIBBY_SESSION_PATH).trim();if(d&&f)return m(f);let l=e&&String(e).trim();if(l)return m(l);let p=process.env.ZIBBY_SESSIONS_ROOT&&String(process.env.ZIBBY_SESSIONS_ROOT).trim();return p&&o?m(a(p,o)):process.env.ZIBBY_SESSION_PATH&&String(process.env.ZIBBY_SESSION_PATH).trim()?m(String(process.env.ZIBBY_SESSION_PATH).trim()):m(a(r,i,y,o||"invalid"))}function ie(e){try{let t=e?.currentNode;if(!t||!N.includes(t))return;let n=e.sessionPath,s=e.sessionId||n&&String(n).split(/[/\\]/).filter(Boolean).pop()||null;if(!s)return;let r=e.cwd||process.cwd(),i=e.outputBase||I,o=N.indexOf(t),d=e?.specPath!=null?String(e.specPath).trim():"",f=e?.taskDescription!=null?String(e.taskDescription):"",l=null;if(d)try{let c=m(r,d);l=B(r,c).split(x).join("/")}catch{l=d.split(x).join("/")}let p=oe({sessionPath:n,sessionId:s,cwd:r,outputBase:i});w({v:1,recordKind:"progress",ts:Date.now(),sessionId:s,cwd:r,outputBase:i,sessionPathAbs:p,activeNode:t,activeStageIndex:o,specRel:l,taskDescription:f||null,studioTestCaseId:T(s)||null,source:process.env.ZIBBY_RUN_SOURCE||"cli"}),v(p,{sessionId:s,studioTestCaseId:T(s)||s,status:"running",activeNode:t,activeStageIndex:o,sessionPathAbs:p,cwd:r,outputBase:i,specPath:l||null,task:f||null,taskDescription:f||null,runSource:process.env.ZIBBY_RUN_SOURCE||"cli",pid:typeof process.pid=="number"?process.pid:null})}catch(t){console.warn(`[zibby browser-test run-index progress] ${t.message}`)}}function Y({cwd:e,config:t}={}){let n=e||process.cwd(),s=t?.paths?.output||I;return r=>{ie({cwd:r?.cwd||n,outputBase:r?.outputBase||s,sessionPath:r?.sessionPath,sessionId:r?.sessionId,currentNode:r?.currentNode,specPath:r?.specPath,taskDescription:r?.taskDescription})}}function Z(e={}){try{let t=e.cwd||process.cwd(),n=e.config?.paths?.output||e.outputBase||I,s=A(t,n),r=D(s),i=k(r),o=new Set,d=e.errorMessage||"Run stopped (SIGINT/SIGTERM) before a normal summary was written.",f=(c,u)=>{if(!c||!u||o.has(c))return;o.add(c);let S=j({cwd:t,outputBase:n,result:{state:{sessionPath:u}},success:!1,specPath:null,status:"interrupted",errorMessage:d});S&&(w(S),v(u,{sessionId:c,studioTestCaseId:S.studioTestCaseId||c,status:"interrupted",activeNode:null,activeStageIndex:null,errorMessage:S.errorMessage||null,runSource:S.source||"cli",cwd:t,outputBase:n,sessionPathAbs:u}))};for(let[c,u]of i){if(!C(u))continue;let S=u.progress;if(!S)continue;let g=String(c),L=S.sessionPathAbs&&String(S.sessionPathAbs)||a(t,n,y,g);f(g,L)}let l=a(t,n,y);if(!P(l))return;let p;try{p=F(l)}catch{return}for(let c of p){let u=a(l,c),S;try{S=Q(u)}catch{continue}if(!S.isDirectory())continue;let g=R(u);!g||g.status!=="running"||f(String(c),u)}}catch(t){console.warn(`[zibby browser-test run-index interrupt] ${t.message}`)}}function Te(e){U(e)}function _e(e){Z(e)}function Ee(e){return Y(e)}export{Ee as createCliRunIndexPipelineProgressAppender,_e as postCliInterruptedRunIndex,Te as postCliRunIndex};
+function e(n){}function t(n){}function o(n){return null}export{o as createCliRunIndexPipelineProgressAppender,t as postCliInterruptedRunIndex,e as postCliRunIndex};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@zibby/core",
-  "version": "0.4.6",
+  "version": "0.5.0",
   "description": "Core test automation engine with multi-agent and multi-MCP support",
   "type": "module",
   "main": "dist/index.js",
@@ -17,7 +17,6 @@
     "./utils/parallel-config.js": "./dist/utils/parallel-config.js",
     "./utils/run-registry.js": "./dist/utils/run-registry.js",
     "./utils/run-index-merge.js": "./dist/utils/run-index-merge.js",
-    "./utils/run-index-post-cli.js": "./dist/utils/run-index-post-cli.js",
     "./utils/run-state-session.js": "./dist/utils/run-state-session.js",
     "./utils/session-state-live-runs.js": "./dist/utils/session-state-live-runs.js",
     "./utils/mission-control-from-run-states.js": "./dist/utils/mission-control-from-run-states.js",
@@ -26,8 +25,6 @@
     "./utils/run-capacity-queue.js": "./dist/utils/run-capacity-queue.js",
     "./utils/cursor-mcp-isolated-home.js": "./dist/utils/cursor-mcp-isolated-home.js",
     "./constants/zibby-scratch.js": "./dist/constants/zibby-scratch.js",
-    "./templates": "./templates/index.js",
-    "./templates/*": "./templates/*",
     "./package.json": "./package.json"
   },
   "scripts": {
@@ -59,10 +56,6 @@
   },
   "files": [
     "dist/",
-    "templates/",
-    "!templates/**/__tests__/",
-    "!templates/**/*.test.js",
-    "!templates/**/*.spec.js",
     "README.md",
     "LICENSE"
   ],

package/dist/templates/browser-test-automation/README.md DELETED Viewed

@@ -1,136 +0,0 @@
-# Browser Test Automation Workflow
-This is YOUR workflow graph. You can customize it however you want!
-Works with **Claude** or **Cursor** agents (configured in `.zibby.config.mjs`).
-## Default Flow
-```
-preflight → execute_live → generate_script
-```
-The workflow generates a test title, executes the test live in a **browser** with AI assistance, and generates a Playwright script with stable selectors.
-## Customization
-### Add Custom Nodes
-Create a new file in `nodes/`:
-```javascript
-// nodes/send-slack.js
-export const sendSlackNode = {
-  name: 'send_slack',
-  agent: { type: 'openai', model: 'gpt-4o-mini' },
-  prompt: (state) => `Send Slack notification...`,
-  outputSchema: { success: { type: 'boolean', required: true } }
-};
-```
-Then add it to your graph in `graph.js`:
-```javascript
-import { sendSlackNode } from './nodes/send-slack.js';
-buildGraph() {
-  const graph = new WorkflowGraph();
-  // ... existing nodes
-  graph.addNode('send_slack', sendSlackNode);
-  graph.addEdge('verify_script', 'send_slack');
-  return graph;
-}
-```
-### Multi-Agent Configuration
-Each node can use a different LLM:
-```javascript
-graph.addNode('generate_title', {
-  agent: { type: 'claude', model: 'claude-sonnet-4' },
-  prompt: (state) => `Generate title...`
-});
-graph.addNode('verify_script', {
-  agent: { type: 'deepseek', model: 'deepseek-coder' }, // Cheap & fast
-  prompt: (state) => `Run test...`
-});
-graph.addNode('update_jira', {
-  agent: { type: 'ollama', model: 'llama3' }, // Local for privacy
-  prompt: (state) => `Update Jira...`
-});
-```
-### Skip Nodes
-Comment out nodes you don't need:
-```javascript
-// graph.addNode('verify_script', verifyScriptNode);
-graph.addEdge('generate_script', 'update_jira'); // Skip verification
-```
-### Parallel Execution
-Run multiple nodes in parallel:
-```javascript
-graph.addParallelEdges('verify_script', [
-  'send_slack',
-  'update_jira',
-  'log_datadog'
-]);
-```
-## Configuration
-Edit `.zibby.config.mjs` to set your default agent and optional per-node model overrides:
-```javascript
-export default {
-  agent: {
-    cursor: { model: 'auto' }, // or claude: { model: 'auto' }
-    strictMode: false,
-  },
-  models: {
-    default: 'auto',
-    execute_live: 'auto',
-    generate_script: 'auto',
-  },
-};
-```
-## Studio / Scripts tab (code discovery)
-Runs write `generate_script/result.json` with a `scriptPath` (often under your repo `tests/`). After the graph finishes, **`BrowserTestResultHandler.ensureStudioCodegenMirror`** copies that file into the session folder under stable names so tools don’t need Studio running at generation time:
-| File (under `.zibby/output/sessions/<sessionId>/generate_script/`) | Role |
-|---------------------------------------------------------------------|------|
-| `generated-test.spec.js` | Playwright (`.js`) |
-| `playwright.spec.ts` | Playwright (`.ts` / `.tsx` source) |
-| `test.selenium.py` | Selenium |
-**Electron Studio** resolves these via `discoverCodegenArtifactsElectron` (after `session/codegen/`).
-**Web Studio** (`VITE_STUDIO_API_ORIGIN`, e.g. `:3847`) should implement `GET /api/sessions/:id/codegen/playwright` (and `/selenium`) by reading, in order:
-1. `sessions/<id>/codegen/` legacy JIT names (`test.spec.ts`, `generated-test.spec.js`, …)
-2. **`sessions/<id>/generate_script/`** canonical names above
-3. `scriptPath` from `generate_script/result.json` (resolve relative to session / `cwd` from session meta)
-## Documentation
-- [Full Graph Framework Design](../../docs/GRAPH_FRAMEWORK_DESIGN.md)
-- [Multi-Agent Patterns](../../docs/FRAMEWORK_CONVERSATION_SUMMARY.md)
-## Updates
-To get latest template updates:
-```bash
-zibby update-graph --merge
-```
-This will merge bug fixes while preserving your customizations.

package/dist/templates/browser-test-automation/chat.mjs DELETED Viewed

@@ -1,36 +0,0 @@
-/**
- * Zibby Chat Agent
- *
- * Interactive conversational node that acts as the default entry point
- * when users type `zibby` with no subcommand.
- *
- * This is a plain chat bot — no MCP servers, no middleware, no structured output.
- * Just streamed text conversation with the AI agent.
- *
- * The skill-installer skill injects its promptFragment so the LLM knows which
- * skills are available and can install/uninstall them via natural conversation.
- * Users can customize this file after `zibby init` copies it to .zibby/chat.mjs
- */
-import { SKILLS } from '@zibby/core';
-export const CHAT_CONFIG = {
-  name: 'zibby_chat',
-  skills: [SKILLS.CORE_TOOLS, SKILLS.SKILL_INSTALLER, SKILLS.CHAT_MEMORY, SKILLS.WORKFLOW_BUILDER],
-  timeout: 0,
-  systemPrompt: `You are Zibby, a helpful AI assistant. Capabilities come from installed skills.
-## How you work
-1. When you need data, call tools. You can chain up to 5 calls per turn.
-2. After each tool result, decide: "Would I be embarrassed to give this answer to a coworker?" If yes, call another tool.
-3. Only respond once you have something genuinely useful.
-4. Never claim you did something without actually calling the tool.
-5. After EVERY response, self-evaluate: is the user's goal fully achieved? Is anything still pending or running? If yes, DO NOT ASK — autonomously poll: call wait (you decide how long), then check status, then respond with an update. Repeat until done or the user interrupts.
-## How you talk
-- Talk like a teammate in Slack, not a report generator.
-- Summarize and paraphrase. Never copy-paste field values or list raw steps verbatim.
-- Short paragraphs, not numbered lists (unless the user specifically asks for steps).
-- Match the user's tone and energy. Be concise.`,
-};

package/dist/templates/browser-test-automation/graph.mjs DELETED Viewed

@@ -1,80 +0,0 @@
-/**
- * Test Automation Workflow Graph
- *
- * buildGraph() - define nodes, edges, routing
- * onComplete(result) - post-processing after graph finishes (save artifacts, etc.)
- */
-import { WorkflowAgent, WorkflowGraph } from '@zibby/core';
-import {
-  preflightNode,
-  cacheReplayNode,
-  executeLiveNode,
-  generateScriptNode,
-} from './nodes/index.mjs';
-import { BrowserTestResultHandler } from './result-handler.mjs';
-import { browserTestAutomationStateSchema } from './state.js';
-export class BrowserTestAutomationAgent extends WorkflowAgent {
-  buildGraph() {
-    const graph = new WorkflowGraph();
-    graph.setStateSchema(browserTestAutomationStateSchema);
-    graph.addNode('preflight', preflightNode);
-    graph.addNode('cache_replay', cacheReplayNode);
-    graph.addNode('execute_live', executeLiveNode);
-    graph.addNode('generate_script', generateScriptNode);
-    graph.setEntryPoint('preflight');
-    // Short-circuit when preflight produced nothing usable. Triggered when:
-    //   - the user invoked `zibby workflow run browser-tests` with no spec
-    //     (state.input is undefined / empty), so preflight had nothing to
-    //     analyze and the LLM came back with `assertions: []`
-    //   - the spec is so vague the LLM can't extract any assertions
-    // Without this gate the graph would barrel into execute_live, fire up
-    // a real browser session + a second expensive LLM call, then waste
-    // ~30s before failing — bad UX and bad bill.
-    graph.addConditionalEdges('preflight', (state) => {
-      const assertions = state.preflight?.assertions || [];
-      return assertions.length > 0 ? 'cache_replay' : 'END';
-    });
-    // Lever-#2 fork: cache_replay attempted a Playwright-only replay of
-    // a prior successful action sequence. On hit it side-wrote
-    // state.execute_live with synthesized output, so we can skip
-    // execute_live and jump straight to generate_script — zero LLM
-    // tokens. On miss / replay failure / cold cache, fall through to
-    // the normal LLM-driven execute_live path.
-    graph.addConditionalEdges('cache_replay', (state) => {
-      return state.cache_replay?.hit === true ? 'generate_script' : 'execute_live';
-    });
-    graph.addConditionalEdges('execute_live', (state) => {
-      const result = state.execute_live;
-      const hasExecution = (result?.steps?.length > 0) || (result?.actions?.length > 0);
-      return hasExecution ? 'generate_script' : 'END';
-    });
-    graph.addEdge('generate_script', 'END');
-    return graph;
-  }
-  async onComplete(result) {
-    const cwd = result.state.cwd || process.cwd();
-    BrowserTestResultHandler.saveTitle(result, cwd);
-    await BrowserTestResultHandler.saveExecutionData(result);
-    BrowserTestResultHandler.ensureStudioCodegenMirror(
-      result.state?.sessionPath,
-      result.state?.cwd || cwd,
-    );
-    // Memory end-run hook (if @zibby/ui-memory is installed)
-    try {
-      const { memoryEndRun, memorySyncPush } = await import('@zibby/ui-memory');
-      const sessionId = result.state.sessionPath?.split('/').pop();
-      memoryEndRun(cwd, { sessionId, passed: result.success !== false });
-      memorySyncPush(cwd);
-    } catch { /* @zibby/ui-memory not available */ }
-  }
-}

package/dist/templates/browser-test-automation/nodes/cache-replay.mjs DELETED Viewed

@@ -1,213 +0,0 @@
-/**
- * cache_replay node — lever-#2 read path inside the workflow.
- *
- * Sits between `preflight` and `execute_live` in the graph. Tries to
- * replay a prior successful run's action sequence via Playwright
- * directly, completely skipping the LLM. On a clean cache hit it
- * populates `state.execute_live` with the result so downstream
- * `generate_script` works exactly as if execute_live had run.
- *
- * Conditional edge after this node:
- *   - state.cache_replay.hit === true → skip execute_live → generate_script
- *   - state.cache_replay.hit === false → execute_live (LLM-driven path)
- *
- * Not user-configurable per-spec — the cache key derivation handles
- * staleness (page fingerprint drift invalidates) and replay failures
- * fall through cleanly to the LLM path.
- */
-import { z } from '@zibby/core';
-import { chromium } from 'playwright';
-import { spawn } from 'child_process';
-import { extractDomain, replayActions } from '@zibby/ui-memory';
-import { join } from 'path';
-const REPLAY_TIMEOUT_MS = 60_000;
-export const cacheReplayNode = {
-  name: 'cache_replay',
-  skills: [],
-  timeout: 90000,
-  outputSchema: z.object({
-    hit: z.boolean(),
-    elapsed_ms: z.number().nullish(),
-    executed: z.number().nullish(),
-    total: z.number().nullish(),
-    cache_key: z.string().nullish(),
-    error: z.string().nullish(),
-    // When hit, we also write a synthesized execute_live block so the
-    // downstream generate_script node sees what it expects.
-    execute_live_synthesized: z.boolean().nullish(),
-  }),
-  execute: async (context) => {
-    // graph.js builds nodeContext as `{ state, invokeAgent, _coreInvokeAgent,
-    // ...state.getAll() }`. So `context.testSpec` works (spread) AND
-    // `context.state.get('testSpec')` works (instance). Reading from the
-    // spread is the natural shape — `context.state` is reserved for the
-    // .set(key, value) side-write below.
-    const cwd = context.cwd || context.workspace || process.cwd();
-    const testSpec = context.testSpec || '';
-    const specPath = context.specPath || '';
-    // Derive domain from the spec text (no DOM access yet — pure parse).
-    const domain = extractDomainFromSpec(testSpec);
-    if (!domain) {
-      return { hit: false, error: 'cannot derive domain from spec' };
-    }
-    // Cache key requires page_fingerprint, which is page-state-dependent
-    // and only available AFTER navigation. We compute a key WITHOUT
-    // fingerprint first and look up by (domain, spec_path) prefix —
-    // the persister wrote spec_path too. If we find a candidate, we
-    // use its stored fingerprint to compute the full key and verify.
-    //
-    // Lookup order:
-    //   1. Exact (domain, spec_path) match in action_cache.
-    //   2. If found, use its actions for replay attempt.
-    //   3. On replay success: signal hit, populate state.execute_live.
-    //   4. On replay failure (or cache miss): hit=false, fall back to LLM.
-    const cached = await lookupCacheByDomainAndSpec({ cwd, domain, specPath });
-    if (!cached) {
-      return { hit: false, error: 'no cached actions for this spec' };
-    }
-    // Run the replay in a freshly-launched Playwright browser. Cleanly
-    // independent from the @zibby/mcp-browser path execute_live uses.
-    const t0 = Date.now();
-    const browser = await chromium.launch({ headless: true });
-    const page = await browser.newPage();
-    let replayResult;
-    try {
-      replayResult = await Promise.race([
-        replayActions({
-          actions: cached.actions,
-          page,
-          log: (m) => console.log(`[cache_replay] ${m}`),
-        }),
-        new Promise((_, reject) =>
-          setTimeout(() => reject(new Error('replay timeout')), REPLAY_TIMEOUT_MS),
-        ),
-      ]);
-    } catch (err) {
-      replayResult = { success: false, error: err.message, executed: 0, total: cached.actions.length };
-    }
-    const finalUrl = page.url();
-    await browser.close().catch(() => {});
-    const elapsedMs = Date.now() - t0;
-    if (!replayResult.success) {
-      // Increment failure_count so we can drop chronic misses later.
-      await incrementCacheFailure({ cwd, cacheKey: cached.cache_key });
-      return {
-        hit: false,
-        elapsed_ms: elapsedMs,
-        executed: replayResult.executed,
-        total: replayResult.total,
-        cache_key: cached.cache_key,
-        error: replayResult.error,
-      };
-    }
-    // HIT path. Side-write the synthesized execute_live output via
-    // context.state.set so downstream generate_script reads the same
-    // shape it expects (actions[], finalUrl, …). The customExecute
-    // return-value lands in state.cache_replay; the execute_live slot
-    // has to be populated separately.
-    if (typeof context.state?.set === 'function') {
-      context.state.set('execute_live', {
-        success: true,
-        steps: cached.actions.map((a) => a.description),
-        actions: cached.actions,
-        assertions: [],
-        finalUrl,
-        browserClosed: true,
-        notes: 'cache_replay hit — actions replayed via Playwright, no LLM',
-      });
-    }
-    return {
-      hit: true,
-      elapsed_ms: elapsedMs,
-      executed: replayResult.executed,
-      total: replayResult.total,
-      cache_key: cached.cache_key,
-      execute_live_synthesized: true,
-    };
-  },
-};
-// ─── helpers ────────────────────────────────────────────────────────────
-function extractDomainFromSpec(spec) {
-  if (!spec) return null;
-  // Find the first http(s) URL in the spec and run it through the
-  // SAME `extractDomain` the persister uses, so the cache-key lookup
-  // matches what was actually written (notably: `www.` is stripped).
-  const m = String(spec).match(/https?:\/\/[^\s"'<>]+/);
-  if (!m) return null;
-  return extractDomain(m[0]);
-}
-/**
- * Find a cached row by (domain, spec_path). Picks the row with
- * highest success_count if multiple match.
- * Uses dolt via subprocess (matching the rest of the codebase's
- * Dolt-access pattern).
- */
-async function lookupCacheByDomainAndSpec({ cwd, domain, specPath }) {
-  const dbDir = join(cwd, '.zibby', 'memory');
-  const safeDomain = escapeSql(domain);
-  const safeSpec = escapeSql(specPath);
-  const sql = `SELECT cache_key, actions_json, page_fingerprint
-    FROM action_cache
-    WHERE domain = ${safeDomain} AND spec_path = ${safeSpec}
-    ORDER BY success_count DESC, last_used_at DESC
-    LIMIT 1`;
-  const rows = await runDoltJson(dbDir, sql);
-  if (!rows || rows.length === 0) return null;
-  try {
-    const actions = JSON.parse(rows[0].actions_json);
-    return { cache_key: rows[0].cache_key, actions, fingerprint: rows[0].page_fingerprint };
-  } catch {
-    return null;
-  }
-}
-async function incrementCacheFailure({ cwd, cacheKey }) {
-  const dbDir = join(cwd, '.zibby', 'memory');
-  const sql = `UPDATE action_cache
-    SET failure_count = failure_count + 1, last_replay_status = 'replay-failed'
-    WHERE cache_key = ${escapeSql(cacheKey)}`;
-  await runDoltExec(dbDir, sql).catch(() => { /* non-fatal */ });
-}
-function escapeSql(v) {
-  if (v == null) return 'NULL';
-  return `'${String(v).replace(/'/g, "''")}'`;
-}
-function runDoltJson(dir, sql) {
-  return new Promise((resolve) => {
-    const child = spawn('dolt', ['sql', '-r', 'json', '-q', sql], { cwd: dir });
-    let out = '';
-    child.stdout.on('data', (d) => { out += d; });
-    child.on('close', () => {
-      try {
-        const parsed = JSON.parse(out);
-        resolve(parsed.rows || []);
-      } catch {
-        resolve([]);
-      }
-    });
-    child.on('error', () => resolve([]));
-  });
-}
-function runDoltExec(dir, sql) {
-  return new Promise((resolve, reject) => {
-    const child = spawn('dolt', ['sql', '-q', sql], { cwd: dir });
-    child.on('close', (code) => (code === 0 ? resolve() : reject(new Error(`dolt exit ${code}`))));
-    child.on('error', reject);
-  });
-}