npm - @zibby/core - Versions diffs - 0.3.11 → 0.3.13 - Mend

@zibby/core 0.3.11 → 0.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/index.js +16 -16
package/dist/package.json +1 -1
package/dist/register-built-in-strategies.js +1 -1
package/dist/strategies/claude-strategy.js +2 -2
package/dist/strategies/index.js +10 -10
package/dist/templates/browser-test-automation/graph.mjs +14 -2
package/dist/templates/browser-test-automation/nodes/cache-replay.mjs +213 -0
package/dist/templates/browser-test-automation/nodes/index.mjs +1 -0
package/package.json +1 -1
package/templates/browser-test-automation/graph.mjs +14 -2
package/templates/browser-test-automation/nodes/cache-replay.mjs +213 -0
package/templates/browser-test-automation/nodes/index.mjs +1 -0

package/templates/browser-test-automation/nodes/cache-replay.mjs ADDED Viewed

@@ -0,0 +1,213 @@
+/**
+ * cache_replay node — lever-#2 read path inside the workflow.
+ *
+ * Sits between `preflight` and `execute_live` in the graph. Tries to
+ * replay a prior successful run's action sequence via Playwright
+ * directly, completely skipping the LLM. On a clean cache hit it
+ * populates `state.execute_live` with the result so downstream
+ * `generate_script` works exactly as if execute_live had run.
+ *
+ * Conditional edge after this node:
+ *   - state.cache_replay.hit === true → skip execute_live → generate_script
+ *   - state.cache_replay.hit === false → execute_live (LLM-driven path)
+ *
+ * Not user-configurable per-spec — the cache key derivation handles
+ * staleness (page fingerprint drift invalidates) and replay failures
+ * fall through cleanly to the LLM path.
+ */
+import { z } from '@zibby/core';
+import { chromium } from 'playwright';
+import { spawn } from 'child_process';
+import { extractDomain, replayActions } from '@zibby/ui-memory';
+import { join } from 'path';
+const REPLAY_TIMEOUT_MS = 60_000;
+export const cacheReplayNode = {
+  name: 'cache_replay',
+  skills: [],
+  timeout: 90000,
+  outputSchema: z.object({
+    hit: z.boolean(),
+    elapsed_ms: z.number().nullish(),
+    executed: z.number().nullish(),
+    total: z.number().nullish(),
+    cache_key: z.string().nullish(),
+    error: z.string().nullish(),
+    // When hit, we also write a synthesized execute_live block so the
+    // downstream generate_script node sees what it expects.
+    execute_live_synthesized: z.boolean().nullish(),
+  }),
+  execute: async (context) => {
+    // graph.js builds nodeContext as `{ state, invokeAgent, _coreInvokeAgent,
+    // ...state.getAll() }`. So `context.testSpec` works (spread) AND
+    // `context.state.get('testSpec')` works (instance). Reading from the
+    // spread is the natural shape — `context.state` is reserved for the
+    // .set(key, value) side-write below.
+    const cwd = context.cwd || context.workspace || process.cwd();
+    const testSpec = context.testSpec || '';
+    const specPath = context.specPath || '';
+    // Derive domain from the spec text (no DOM access yet — pure parse).
+    const domain = extractDomainFromSpec(testSpec);
+    if (!domain) {
+      return { hit: false, error: 'cannot derive domain from spec' };
+    }
+    // Cache key requires page_fingerprint, which is page-state-dependent
+    // and only available AFTER navigation. We compute a key WITHOUT
+    // fingerprint first and look up by (domain, spec_path) prefix —
+    // the persister wrote spec_path too. If we find a candidate, we
+    // use its stored fingerprint to compute the full key and verify.
+    //
+    // Lookup order:
+    //   1. Exact (domain, spec_path) match in action_cache.
+    //   2. If found, use its actions for replay attempt.
+    //   3. On replay success: signal hit, populate state.execute_live.
+    //   4. On replay failure (or cache miss): hit=false, fall back to LLM.
+    const cached = await lookupCacheByDomainAndSpec({ cwd, domain, specPath });
+    if (!cached) {
+      return { hit: false, error: 'no cached actions for this spec' };
+    }
+    // Run the replay in a freshly-launched Playwright browser. Cleanly
+    // independent from the @zibby/mcp-browser path execute_live uses.
+    const t0 = Date.now();
+    const browser = await chromium.launch({ headless: true });
+    const page = await browser.newPage();
+    let replayResult;
+    try {
+      replayResult = await Promise.race([
+        replayActions({
+          actions: cached.actions,
+          page,
+          log: (m) => console.log(`[cache_replay] ${m}`),
+        }),
+        new Promise((_, reject) =>
+          setTimeout(() => reject(new Error('replay timeout')), REPLAY_TIMEOUT_MS),
+        ),
+      ]);
+    } catch (err) {
+      replayResult = { success: false, error: err.message, executed: 0, total: cached.actions.length };
+    }
+    const finalUrl = page.url();
+    await browser.close().catch(() => {});
+    const elapsedMs = Date.now() - t0;
+    if (!replayResult.success) {
+      // Increment failure_count so we can drop chronic misses later.
+      await incrementCacheFailure({ cwd, cacheKey: cached.cache_key });
+      return {
+        hit: false,
+        elapsed_ms: elapsedMs,
+        executed: replayResult.executed,
+        total: replayResult.total,
+        cache_key: cached.cache_key,
+        error: replayResult.error,
+      };
+    }
+    // HIT path. Side-write the synthesized execute_live output via
+    // context.state.set so downstream generate_script reads the same
+    // shape it expects (actions[], finalUrl, …). The customExecute
+    // return-value lands in state.cache_replay; the execute_live slot
+    // has to be populated separately.
+    if (typeof context.state?.set === 'function') {
+      context.state.set('execute_live', {
+        success: true,
+        steps: cached.actions.map((a) => a.description),
+        actions: cached.actions,
+        assertions: [],
+        finalUrl,
+        browserClosed: true,
+        notes: 'cache_replay hit — actions replayed via Playwright, no LLM',
+      });
+    }
+    return {
+      hit: true,
+      elapsed_ms: elapsedMs,
+      executed: replayResult.executed,
+      total: replayResult.total,
+      cache_key: cached.cache_key,
+      execute_live_synthesized: true,
+    };
+  },
+};
+// ─── helpers ────────────────────────────────────────────────────────────
+function extractDomainFromSpec(spec) {
+  if (!spec) return null;
+  // Find the first http(s) URL in the spec and run it through the
+  // SAME `extractDomain` the persister uses, so the cache-key lookup
+  // matches what was actually written (notably: `www.` is stripped).
+  const m = String(spec).match(/https?:\/\/[^\s"'<>]+/);
+  if (!m) return null;
+  return extractDomain(m[0]);
+}
+/**
+ * Find a cached row by (domain, spec_path). Picks the row with
+ * highest success_count if multiple match.
+ * Uses dolt via subprocess (matching the rest of the codebase's
+ * Dolt-access pattern).
+ */
+async function lookupCacheByDomainAndSpec({ cwd, domain, specPath }) {
+  const dbDir = join(cwd, '.zibby', 'memory');
+  const safeDomain = escapeSql(domain);
+  const safeSpec = escapeSql(specPath);
+  const sql = `SELECT cache_key, actions_json, page_fingerprint
+    FROM action_cache
+    WHERE domain = ${safeDomain} AND spec_path = ${safeSpec}
+    ORDER BY success_count DESC, last_used_at DESC
+    LIMIT 1`;
+  const rows = await runDoltJson(dbDir, sql);
+  if (!rows || rows.length === 0) return null;
+  try {
+    const actions = JSON.parse(rows[0].actions_json);
+    return { cache_key: rows[0].cache_key, actions, fingerprint: rows[0].page_fingerprint };
+  } catch {
+    return null;
+  }
+}
+async function incrementCacheFailure({ cwd, cacheKey }) {
+  const dbDir = join(cwd, '.zibby', 'memory');
+  const sql = `UPDATE action_cache
+    SET failure_count = failure_count + 1, last_replay_status = 'replay-failed'
+    WHERE cache_key = ${escapeSql(cacheKey)}`;
+  await runDoltExec(dbDir, sql).catch(() => { /* non-fatal */ });
+}
+function escapeSql(v) {
+  if (v == null) return 'NULL';
+  return `'${String(v).replace(/'/g, "''")}'`;
+}
+function runDoltJson(dir, sql) {
+  return new Promise((resolve) => {
+    const child = spawn('dolt', ['sql', '-r', 'json', '-q', sql], { cwd: dir });
+    let out = '';
+    child.stdout.on('data', (d) => { out += d; });
+    child.on('close', () => {
+      try {
+        const parsed = JSON.parse(out);
+        resolve(parsed.rows || []);
+      } catch {
+        resolve([]);
+      }
+    });
+    child.on('error', () => resolve([]));
+  });
+}
+function runDoltExec(dir, sql) {
+  return new Promise((resolve, reject) => {
+    const child = spawn('dolt', ['sql', '-q', sql], { cwd: dir });
+    child.on('close', (code) => (code === 0 ? resolve() : reject(new Error(`dolt exit ${code}`))));
+    child.on('error', reject);
+  });
+}

package/templates/browser-test-automation/nodes/index.mjs CHANGED Viewed

@@ -1,3 +1,4 @@
 export { preflightNode } from './preflight.mjs';
+export { cacheReplayNode } from './cache-replay.mjs';
 export { executeLiveNode } from './execute-live.mjs';
 export { generateScriptNode } from './generate-script.mjs';