@zibby/core 0.3.11 → 0.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,213 @@
1
+ /**
2
+ * cache_replay node — lever-#2 read path inside the workflow.
3
+ *
4
+ * Sits between `preflight` and `execute_live` in the graph. Tries to
5
+ * replay a prior successful run's action sequence via Playwright
6
+ * directly, completely skipping the LLM. On a clean cache hit it
7
+ * populates `state.execute_live` with the result so downstream
8
+ * `generate_script` works exactly as if execute_live had run.
9
+ *
10
+ * Conditional edge after this node:
11
+ * - state.cache_replay.hit === true → skip execute_live → generate_script
12
+ * - state.cache_replay.hit === false → execute_live (LLM-driven path)
13
+ *
14
+ * Not user-configurable per-spec — the cache key derivation handles
15
+ * staleness (page fingerprint drift invalidates) and replay failures
16
+ * fall through cleanly to the LLM path.
17
+ */
18
+
19
+ import { z } from '@zibby/core';
20
+ import { chromium } from 'playwright';
21
+ import { spawn } from 'child_process';
22
+ import { extractDomain, replayActions } from '@zibby/ui-memory';
23
+ import { join } from 'path';
24
+
25
+ const REPLAY_TIMEOUT_MS = 60_000;
26
+
27
+ export const cacheReplayNode = {
28
+ name: 'cache_replay',
29
+ skills: [],
30
+ timeout: 90000,
31
+ outputSchema: z.object({
32
+ hit: z.boolean(),
33
+ elapsed_ms: z.number().nullish(),
34
+ executed: z.number().nullish(),
35
+ total: z.number().nullish(),
36
+ cache_key: z.string().nullish(),
37
+ error: z.string().nullish(),
38
+ // When hit, we also write a synthesized execute_live block so the
39
+ // downstream generate_script node sees what it expects.
40
+ execute_live_synthesized: z.boolean().nullish(),
41
+ }),
42
+
43
+ execute: async (context) => {
44
+ // graph.js builds nodeContext as `{ state, invokeAgent, _coreInvokeAgent,
45
+ // ...state.getAll() }`. So `context.testSpec` works (spread) AND
46
+ // `context.state.get('testSpec')` works (instance). Reading from the
47
+ // spread is the natural shape — `context.state` is reserved for the
48
+ // .set(key, value) side-write below.
49
+ const cwd = context.cwd || context.workspace || process.cwd();
50
+ const testSpec = context.testSpec || '';
51
+ const specPath = context.specPath || '';
52
+
53
+ // Derive domain from the spec text (no DOM access yet — pure parse).
54
+ const domain = extractDomainFromSpec(testSpec);
55
+ if (!domain) {
56
+ return { hit: false, error: 'cannot derive domain from spec' };
57
+ }
58
+
59
+ // Cache key requires page_fingerprint, which is page-state-dependent
60
+ // and only available AFTER navigation. We compute a key WITHOUT
61
+ // fingerprint first and look up by (domain, spec_path) prefix —
62
+ // the persister wrote spec_path too. If we find a candidate, we
63
+ // use its stored fingerprint to compute the full key and verify.
64
+ //
65
+ // Lookup order:
66
+ // 1. Exact (domain, spec_path) match in action_cache.
67
+ // 2. If found, use its actions for replay attempt.
68
+ // 3. On replay success: signal hit, populate state.execute_live.
69
+ // 4. On replay failure (or cache miss): hit=false, fall back to LLM.
70
+ const cached = await lookupCacheByDomainAndSpec({ cwd, domain, specPath });
71
+ if (!cached) {
72
+ return { hit: false, error: 'no cached actions for this spec' };
73
+ }
74
+
75
+ // Run the replay in a freshly-launched Playwright browser. Cleanly
76
+ // independent from the @zibby/mcp-browser path execute_live uses.
77
+ const t0 = Date.now();
78
+ const browser = await chromium.launch({ headless: true });
79
+ const page = await browser.newPage();
80
+ let replayResult;
81
+ try {
82
+ replayResult = await Promise.race([
83
+ replayActions({
84
+ actions: cached.actions,
85
+ page,
86
+ log: (m) => console.log(`[cache_replay] ${m}`),
87
+ }),
88
+ new Promise((_, reject) =>
89
+ setTimeout(() => reject(new Error('replay timeout')), REPLAY_TIMEOUT_MS),
90
+ ),
91
+ ]);
92
+ } catch (err) {
93
+ replayResult = { success: false, error: err.message, executed: 0, total: cached.actions.length };
94
+ }
95
+ const finalUrl = page.url();
96
+ await browser.close().catch(() => {});
97
+ const elapsedMs = Date.now() - t0;
98
+
99
+ if (!replayResult.success) {
100
+ // Increment failure_count so we can drop chronic misses later.
101
+ await incrementCacheFailure({ cwd, cacheKey: cached.cache_key });
102
+ return {
103
+ hit: false,
104
+ elapsed_ms: elapsedMs,
105
+ executed: replayResult.executed,
106
+ total: replayResult.total,
107
+ cache_key: cached.cache_key,
108
+ error: replayResult.error,
109
+ };
110
+ }
111
+
112
+ // HIT path. Side-write the synthesized execute_live output via
113
+ // context.state.set so downstream generate_script reads the same
114
+ // shape it expects (actions[], finalUrl, …). The customExecute
115
+ // return-value lands in state.cache_replay; the execute_live slot
116
+ // has to be populated separately.
117
+ if (typeof context.state?.set === 'function') {
118
+ context.state.set('execute_live', {
119
+ success: true,
120
+ steps: cached.actions.map((a) => a.description),
121
+ actions: cached.actions,
122
+ assertions: [],
123
+ finalUrl,
124
+ browserClosed: true,
125
+ notes: 'cache_replay hit — actions replayed via Playwright, no LLM',
126
+ });
127
+ }
128
+
129
+ return {
130
+ hit: true,
131
+ elapsed_ms: elapsedMs,
132
+ executed: replayResult.executed,
133
+ total: replayResult.total,
134
+ cache_key: cached.cache_key,
135
+ execute_live_synthesized: true,
136
+ };
137
+ },
138
+ };
139
+
140
+ // ─── helpers ────────────────────────────────────────────────────────────
141
+
142
+ function extractDomainFromSpec(spec) {
143
+ if (!spec) return null;
144
+ // Find the first http(s) URL in the spec and run it through the
145
+ // SAME `extractDomain` the persister uses, so the cache-key lookup
146
+ // matches what was actually written (notably: `www.` is stripped).
147
+ const m = String(spec).match(/https?:\/\/[^\s"'<>]+/);
148
+ if (!m) return null;
149
+ return extractDomain(m[0]);
150
+ }
151
+
152
+ /**
153
+ * Find a cached row by (domain, spec_path). Picks the row with
154
+ * highest success_count if multiple match.
155
+ * Uses dolt via subprocess (matching the rest of the codebase's
156
+ * Dolt-access pattern).
157
+ */
158
+ async function lookupCacheByDomainAndSpec({ cwd, domain, specPath }) {
159
+ const dbDir = join(cwd, '.zibby', 'memory');
160
+ const safeDomain = escapeSql(domain);
161
+ const safeSpec = escapeSql(specPath);
162
+ const sql = `SELECT cache_key, actions_json, page_fingerprint
163
+ FROM action_cache
164
+ WHERE domain = ${safeDomain} AND spec_path = ${safeSpec}
165
+ ORDER BY success_count DESC, last_used_at DESC
166
+ LIMIT 1`;
167
+ const rows = await runDoltJson(dbDir, sql);
168
+ if (!rows || rows.length === 0) return null;
169
+ try {
170
+ const actions = JSON.parse(rows[0].actions_json);
171
+ return { cache_key: rows[0].cache_key, actions, fingerprint: rows[0].page_fingerprint };
172
+ } catch {
173
+ return null;
174
+ }
175
+ }
176
+
177
+ async function incrementCacheFailure({ cwd, cacheKey }) {
178
+ const dbDir = join(cwd, '.zibby', 'memory');
179
+ const sql = `UPDATE action_cache
180
+ SET failure_count = failure_count + 1, last_replay_status = 'replay-failed'
181
+ WHERE cache_key = ${escapeSql(cacheKey)}`;
182
+ await runDoltExec(dbDir, sql).catch(() => { /* non-fatal */ });
183
+ }
184
+
185
+ function escapeSql(v) {
186
+ if (v == null) return 'NULL';
187
+ return `'${String(v).replace(/'/g, "''")}'`;
188
+ }
189
+
190
+ function runDoltJson(dir, sql) {
191
+ return new Promise((resolve) => {
192
+ const child = spawn('dolt', ['sql', '-r', 'json', '-q', sql], { cwd: dir });
193
+ let out = '';
194
+ child.stdout.on('data', (d) => { out += d; });
195
+ child.on('close', () => {
196
+ try {
197
+ const parsed = JSON.parse(out);
198
+ resolve(parsed.rows || []);
199
+ } catch {
200
+ resolve([]);
201
+ }
202
+ });
203
+ child.on('error', () => resolve([]));
204
+ });
205
+ }
206
+
207
+ function runDoltExec(dir, sql) {
208
+ return new Promise((resolve, reject) => {
209
+ const child = spawn('dolt', ['sql', '-q', sql], { cwd: dir });
210
+ child.on('close', (code) => (code === 0 ? resolve() : reject(new Error(`dolt exit ${code}`))));
211
+ child.on('error', reject);
212
+ });
213
+ }
@@ -1,3 +1,4 @@
1
1
  export { preflightNode } from './preflight.mjs';
2
+ export { cacheReplayNode } from './cache-replay.mjs';
2
3
  export { executeLiveNode } from './execute-live.mjs';
3
4
  export { generateScriptNode } from './generate-script.mjs';