@zibby/core 0.4.6 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/dist/index.js +150 -153
  2. package/dist/package.json +2 -9
  3. package/dist/utils/run-index-post-cli.js +1 -4
  4. package/package.json +2 -9
  5. package/dist/templates/browser-test-automation/README.md +0 -136
  6. package/dist/templates/browser-test-automation/chat.mjs +0 -36
  7. package/dist/templates/browser-test-automation/graph.mjs +0 -80
  8. package/dist/templates/browser-test-automation/nodes/cache-replay.mjs +0 -213
  9. package/dist/templates/browser-test-automation/nodes/execute-live.mjs +0 -254
  10. package/dist/templates/browser-test-automation/nodes/generate-script.mjs +0 -108
  11. package/dist/templates/browser-test-automation/nodes/index.mjs +0 -4
  12. package/dist/templates/browser-test-automation/nodes/preflight.mjs +0 -94
  13. package/dist/templates/browser-test-automation/nodes/utils.mjs +0 -297
  14. package/dist/templates/browser-test-automation/pipeline-ids.js +0 -12
  15. package/dist/templates/browser-test-automation/result-handler.mjs +0 -327
  16. package/dist/templates/browser-test-automation/run-index.mjs +0 -420
  17. package/dist/templates/browser-test-automation/run_test.json +0 -358
  18. package/dist/templates/browser-test-automation/state.js +0 -61
  19. package/dist/templates/code-analysis/README.md +0 -60
  20. package/dist/templates/code-analysis/graph.js +0 -72
  21. package/dist/templates/code-analysis/graph.mjs +0 -33
  22. package/dist/templates/code-analysis/index.js +0 -18
  23. package/dist/templates/code-analysis/nodes/analyze-ticket-node.js +0 -204
  24. package/dist/templates/code-analysis/nodes/create-pr-node.js +0 -175
  25. package/dist/templates/code-analysis/nodes/finalize-node.js +0 -118
  26. package/dist/templates/code-analysis/nodes/generate-code-node.js +0 -425
  27. package/dist/templates/code-analysis/nodes/generate-test-cases-node.js +0 -376
  28. package/dist/templates/code-analysis/nodes/services/prMetaService.js +0 -86
  29. package/dist/templates/code-analysis/nodes/setup-node.js +0 -142
  30. package/dist/templates/code-analysis/prompts/analyze-ticket.md +0 -181
  31. package/dist/templates/code-analysis/prompts/generate-code.md +0 -33
  32. package/dist/templates/code-analysis/prompts/generate-test-cases.md +0 -110
  33. package/dist/templates/code-analysis/state.js +0 -48
  34. package/dist/templates/generate-test-cases/README.md +0 -72
  35. package/dist/templates/generate-test-cases/graph.mjs +0 -46
  36. package/dist/templates/generate-test-cases/nodes/generate-test-cases-node.js +0 -381
  37. package/dist/templates/generate-test-cases/nodes/setup-node.js +0 -142
  38. package/dist/templates/generate-test-cases/state.js +0 -54
  39. package/dist/templates/global-setup.js +0 -56
  40. package/dist/templates/index.js +0 -147
  41. package/dist/templates/register-nodes.js +0 -24
  42. package/templates/browser-test-automation/README.md +0 -136
  43. package/templates/browser-test-automation/chat.mjs +0 -36
  44. package/templates/browser-test-automation/graph.mjs +0 -80
  45. package/templates/browser-test-automation/nodes/cache-replay.mjs +0 -213
  46. package/templates/browser-test-automation/nodes/execute-live.mjs +0 -254
  47. package/templates/browser-test-automation/nodes/generate-script.mjs +0 -108
  48. package/templates/browser-test-automation/nodes/index.mjs +0 -4
  49. package/templates/browser-test-automation/nodes/preflight.mjs +0 -94
  50. package/templates/browser-test-automation/nodes/utils.mjs +0 -297
  51. package/templates/browser-test-automation/pipeline-ids.js +0 -12
  52. package/templates/browser-test-automation/result-handler.mjs +0 -327
  53. package/templates/browser-test-automation/run-index.mjs +0 -420
  54. package/templates/browser-test-automation/run_test.json +0 -358
  55. package/templates/browser-test-automation/state.js +0 -61
  56. package/templates/code-analysis/README.md +0 -60
  57. package/templates/code-analysis/graph.js +0 -72
  58. package/templates/code-analysis/graph.mjs +0 -33
  59. package/templates/code-analysis/index.js +0 -18
  60. package/templates/code-analysis/nodes/analyze-ticket-node.js +0 -204
  61. package/templates/code-analysis/nodes/create-pr-node.js +0 -175
  62. package/templates/code-analysis/nodes/finalize-node.js +0 -118
  63. package/templates/code-analysis/nodes/generate-code-node.js +0 -425
  64. package/templates/code-analysis/nodes/generate-test-cases-node.js +0 -376
  65. package/templates/code-analysis/nodes/services/prMetaService.js +0 -86
  66. package/templates/code-analysis/nodes/setup-node.js +0 -142
  67. package/templates/code-analysis/prompts/analyze-ticket.md +0 -181
  68. package/templates/code-analysis/prompts/generate-code.md +0 -33
  69. package/templates/code-analysis/prompts/generate-test-cases.md +0 -110
  70. package/templates/code-analysis/state.js +0 -48
  71. package/templates/generate-test-cases/README.md +0 -72
  72. package/templates/generate-test-cases/graph.mjs +0 -46
  73. package/templates/generate-test-cases/nodes/generate-test-cases-node.js +0 -381
  74. package/templates/generate-test-cases/nodes/setup-node.js +0 -142
  75. package/templates/generate-test-cases/state.js +0 -54
  76. package/templates/global-setup.js +0 -56
  77. package/templates/index.js +0 -147
  78. package/templates/register-nodes.js +0 -24
@@ -1,147 +0,0 @@
1
- import { join, dirname } from 'path';
2
- import { fileURLToPath } from 'url';
3
- import { existsSync } from 'fs';
4
-
5
- const __filename = fileURLToPath(import.meta.url);
6
- const __dirname = dirname(__filename);
7
-
8
- export const TEMPLATES = {
9
- 'browser-test-automation': {
10
- name: 'browser-test-automation',
11
- displayName: 'Browser Test Automation (Full Workflow)',
12
- description: 'Complete browser test automation workflow with title generation, live execution, and script generation',
13
- path: join(__dirname, 'browser-test-automation'),
14
- default: true,
15
- // Suggested slug for `zibby workflow new <slug> -t <name>`. Used in
16
- // the `template list` scaffold hint so the printed command is
17
- // copy-paste-ready instead of `your-workflow-name`. Users can still
18
- // pick anything they want at scaffold time.
19
- defaultSlug: 'browser-tests',
20
- // Runtime deps the scaffolded copy needs in addition to @zibby/core.
21
- // graph.mjs now imports state.js which `import { z } from 'zod'`s
22
- // directly, so the user's package.json must declare zod or the
23
- // scaffolded workflow fails on first import.
24
- deps: {
25
- zod: '^3.23.0',
26
- },
27
- features: [
28
- 'Preflight analysis: extract title + assertion checklist from spec',
29
- 'Execute test live with AI + browser (Claude or Cursor)',
30
- 'Generate Playwright script with stable IDs',
31
- 'Real-time streaming output',
32
- 'Video recording of browser sessions'
33
- ]
34
- },
35
- 'code-analysis': {
36
- name: 'code-analysis',
37
- displayName: 'Code Analysis (Ticket → Code + Tests)',
38
- description: 'Multi-node workflow that analyzes a Jira ticket against a code repo, generates code changes, and emits test cases',
39
- path: join(__dirname, 'code-analysis'),
40
- defaultSlug: 'ticket-analyzer',
41
- // Runtime deps the scaffolded copy needs in addition to @zibby/core.
42
- // Merged into the generated package.json so `npm install` works
43
- // without manual edits. Browser-test doesn't declare any because
44
- // its nodes only depend on @zibby/core.
45
- deps: {
46
- axios: '^1.6.0',
47
- handlebars: '^4.7.8',
48
- zod: '^3.23.0',
49
- },
50
- features: [
51
- 'Clone repos + snapshot git baseline',
52
- 'LLM analysis of ticket against codebase (canProceed gate)',
53
- 'Conditional routing: skip code-gen if ticket is invalid',
54
- 'Generate scoped code changes',
55
- 'Generate test cases covering the changes',
56
- 'Customizable prompts in prompts/*.md'
57
- ]
58
- },
59
- 'generate-test-cases': {
60
- name: 'generate-test-cases',
61
- displayName: 'Generate Test Cases (Diff → Test Specs)',
62
- description: 'Standalone slice — takes an existing code diff and generates plain-English test specifications for it. Skips ticket-analysis and code-gen.',
63
- path: join(__dirname, 'generate-test-cases'),
64
- defaultSlug: 'tests-from-diff',
65
- deps: {
66
- zod: '^3.23.0',
67
- },
68
- features: [
69
- 'Two-node graph: setup → generate_test_cases',
70
- 'Takes a PR diff directly as state input (no upstream code-gen needed)',
71
- 'LLM explores codebase routing/components for accurate test steps',
72
- 'Emits 4-8 prioritized test specs (Critical/High/Medium/Low)',
73
- 'Plain-English test steps — runnable by AI agents'
74
- ]
75
- }
76
- };
77
-
78
- export class TemplateFactory {
79
- static listTemplates() {
80
- return Object.values(TEMPLATES);
81
- }
82
-
83
- static getDefault() {
84
- return Object.values(TEMPLATES).find(t => t.default) || TEMPLATES['browser-test-automation'];
85
- }
86
-
87
- static getTemplate(name) {
88
- const template = TEMPLATES[name];
89
- if (!template) {
90
- const available = Object.keys(TEMPLATES).join(', ');
91
- throw new Error(`Template "${name}" not found. Available: ${available}`);
92
- }
93
- return template;
94
- }
95
-
96
- static validateTemplate(templatePath) {
97
- const requiredFiles = ['graph.mjs', 'nodes', 'README.md'];
98
-
99
- for (const file of requiredFiles) {
100
- const filePath = join(templatePath, file);
101
- if (!existsSync(filePath)) {
102
- throw new Error(`Template missing required file: ${file}`);
103
- }
104
- }
105
-
106
- return true;
107
- }
108
-
109
- static getTemplateFiles(templateName) {
110
- const template = this.getTemplate(templateName);
111
- this.validateTemplate(template.path);
112
-
113
- const resultHandlerPath = join(template.path, 'result-handler.mjs');
114
- return {
115
- graphPath: join(template.path, 'graph.mjs'),
116
- nodesPath: join(template.path, 'nodes'),
117
- readmePath: join(template.path, 'README.md'),
118
- resultHandlerPath: existsSync(resultHandlerPath) ? resultHandlerPath : null,
119
- template
120
- };
121
- }
122
-
123
- static registerCustomTemplate(name, config) {
124
- if (TEMPLATES[name]) {
125
- throw new Error(`Template "${name}" already exists`);
126
- }
127
-
128
- if (!config.path || !config.displayName) {
129
- throw new Error('Custom template must have "path" and "displayName"');
130
- }
131
-
132
- this.validateTemplate(config.path);
133
-
134
- TEMPLATES[name] = {
135
- name,
136
- displayName: config.displayName,
137
- description: config.description || '',
138
- path: config.path,
139
- features: config.features || [],
140
- custom: true
141
- };
142
-
143
- return TEMPLATES[name];
144
- }
145
- }
146
-
147
- export default TemplateFactory;
@@ -1,24 +0,0 @@
1
- /**
2
- * Template node registrations
3
- *
4
- * Import this module as a side-effect to register all built-in
5
- * template nodes with the framework's node registry.
6
- *
7
- * Usage: import '@zibby/core/templates/register-nodes.js';
8
- */
9
-
10
- import { registerNode } from '@zibby/agent-workflow';
11
- import { setupNode } from './code-analysis/nodes/setup-node.js';
12
- import { analyzeTicketNode } from './code-analysis/nodes/analyze-ticket-node.js';
13
- import { generateCodeNode, implementCodeNode } from './code-analysis/nodes/generate-code-node.js';
14
- import { generateTestCasesNode } from './code-analysis/nodes/generate-test-cases-node.js';
15
- import { finalizeNode } from './code-analysis/nodes/finalize-node.js';
16
- import { createPRNode } from './code-analysis/nodes/create-pr-node.js';
17
-
18
- registerNode('setup', setupNode);
19
- registerNode('analyze_ticket', analyzeTicketNode);
20
- registerNode('generate_code', generateCodeNode);
21
- registerNode('generate_test_cases', generateTestCasesNode);
22
- registerNode('finalize', finalizeNode);
23
- registerNode('implement_code', implementCodeNode);
24
- registerNode('create_pr', createPRNode);
@@ -1,136 +0,0 @@
1
- # Browser Test Automation Workflow
2
-
3
- This is YOUR workflow graph. You can customize it however you want!
4
-
5
- Works with **Claude** or **Cursor** agents (configured in `.zibby.config.mjs`).
6
-
7
- ## Default Flow
8
-
9
- ```
10
- preflight → execute_live → generate_script
11
- ```
12
-
13
- The workflow generates a test title, executes the test live in a **browser** with AI assistance, and generates a Playwright script with stable selectors.
14
-
15
- ## Customization
16
-
17
- ### Add Custom Nodes
18
-
19
- Create a new file in `nodes/`:
20
-
21
- ```javascript
22
- // nodes/send-slack.js
23
- export const sendSlackNode = {
24
- name: 'send_slack',
25
- agent: { type: 'openai', model: 'gpt-4o-mini' },
26
- prompt: (state) => `Send Slack notification...`,
27
- outputSchema: { success: { type: 'boolean', required: true } }
28
- };
29
- ```
30
-
31
- Then add it to your graph in `graph.js`:
32
-
33
- ```javascript
34
- import { sendSlackNode } from './nodes/send-slack.js';
35
-
36
- buildGraph() {
37
- const graph = new WorkflowGraph();
38
- // ... existing nodes
39
- graph.addNode('send_slack', sendSlackNode);
40
- graph.addEdge('verify_script', 'send_slack');
41
- return graph;
42
- }
43
- ```
44
-
45
- ### Multi-Agent Configuration
46
-
47
- Each node can use a different LLM:
48
-
49
- ```javascript
50
- graph.addNode('generate_title', {
51
- agent: { type: 'claude', model: 'claude-sonnet-4' },
52
- prompt: (state) => `Generate title...`
53
- });
54
-
55
- graph.addNode('verify_script', {
56
- agent: { type: 'deepseek', model: 'deepseek-coder' }, // Cheap & fast
57
- prompt: (state) => `Run test...`
58
- });
59
-
60
- graph.addNode('update_jira', {
61
- agent: { type: 'ollama', model: 'llama3' }, // Local for privacy
62
- prompt: (state) => `Update Jira...`
63
- });
64
- ```
65
-
66
- ### Skip Nodes
67
-
68
- Comment out nodes you don't need:
69
-
70
- ```javascript
71
- // graph.addNode('verify_script', verifyScriptNode);
72
- graph.addEdge('generate_script', 'update_jira'); // Skip verification
73
- ```
74
-
75
- ### Parallel Execution
76
-
77
- Run multiple nodes in parallel:
78
-
79
- ```javascript
80
- graph.addParallelEdges('verify_script', [
81
- 'send_slack',
82
- 'update_jira',
83
- 'log_datadog'
84
- ]);
85
- ```
86
-
87
- ## Configuration
88
-
89
- Edit `.zibby.config.mjs` to set your default agent and optional per-node model overrides:
90
-
91
- ```javascript
92
- export default {
93
- agent: {
94
- cursor: { model: 'auto' }, // or claude: { model: 'auto' }
95
- strictMode: false,
96
- },
97
- models: {
98
- default: 'auto',
99
- execute_live: 'auto',
100
- generate_script: 'auto',
101
- },
102
- };
103
- ```
104
-
105
- ## Studio / Scripts tab (code discovery)
106
-
107
- Runs write `generate_script/result.json` with a `scriptPath` (often under your repo `tests/`). After the graph finishes, **`BrowserTestResultHandler.ensureStudioCodegenMirror`** copies that file into the session folder under stable names so tools don’t need Studio running at generation time:
108
-
109
- | File (under `.zibby/output/sessions/<sessionId>/generate_script/`) | Role |
110
- |---------------------------------------------------------------------|------|
111
- | `generated-test.spec.js` | Playwright (`.js`) |
112
- | `playwright.spec.ts` | Playwright (`.ts` / `.tsx` source) |
113
- | `test.selenium.py` | Selenium |
114
-
115
- **Electron Studio** resolves these via `discoverCodegenArtifactsElectron` (after `session/codegen/`).
116
-
117
- **Web Studio** (`VITE_STUDIO_API_ORIGIN`, e.g. `:3847`) should implement `GET /api/sessions/:id/codegen/playwright` (and `/selenium`) by reading, in order:
118
-
119
- 1. `sessions/<id>/codegen/` legacy JIT names (`test.spec.ts`, `generated-test.spec.js`, …)
120
- 2. **`sessions/<id>/generate_script/`** canonical names above
121
- 3. `scriptPath` from `generate_script/result.json` (resolve relative to session / `cwd` from session meta)
122
-
123
- ## Documentation
124
-
125
- - [Full Graph Framework Design](../../docs/GRAPH_FRAMEWORK_DESIGN.md)
126
- - [Multi-Agent Patterns](../../docs/FRAMEWORK_CONVERSATION_SUMMARY.md)
127
-
128
- ## Updates
129
-
130
- To get latest template updates:
131
-
132
- ```bash
133
- zibby update-graph --merge
134
- ```
135
-
136
- This will merge bug fixes while preserving your customizations.
@@ -1,36 +0,0 @@
1
- /**
2
- * Zibby Chat Agent
3
- *
4
- * Interactive conversational node that acts as the default entry point
5
- * when users type `zibby` with no subcommand.
6
- *
7
- * This is a plain chat bot — no MCP servers, no middleware, no structured output.
8
- * Just streamed text conversation with the AI agent.
9
- *
10
- * The skill-installer skill injects its promptFragment so the LLM knows which
11
- * skills are available and can install/uninstall them via natural conversation.
12
- * Users can customize this file after `zibby init` copies it to .zibby/chat.mjs
13
- */
14
-
15
- import { SKILLS } from '@zibby/core';
16
-
17
- export const CHAT_CONFIG = {
18
- name: 'zibby_chat',
19
- skills: [SKILLS.CORE_TOOLS, SKILLS.SKILL_INSTALLER, SKILLS.CHAT_MEMORY, SKILLS.WORKFLOW_BUILDER],
20
- timeout: 0,
21
-
22
- systemPrompt: `You are Zibby, a helpful AI assistant. Capabilities come from installed skills.
23
-
24
- ## How you work
25
- 1. When you need data, call tools. You can chain up to 5 calls per turn.
26
- 2. After each tool result, decide: "Would I be embarrassed to give this answer to a coworker?" If yes, call another tool.
27
- 3. Only respond once you have something genuinely useful.
28
- 4. Never claim you did something without actually calling the tool.
29
- 5. After EVERY response, self-evaluate: is the user's goal fully achieved? Is anything still pending or running? If yes, DO NOT ASK — autonomously poll: call wait (you decide how long), then check status, then respond with an update. Repeat until done or the user interrupts.
30
-
31
- ## How you talk
32
- - Talk like a teammate in Slack, not a report generator.
33
- - Summarize and paraphrase. Never copy-paste field values or list raw steps verbatim.
34
- - Short paragraphs, not numbered lists (unless the user specifically asks for steps).
35
- - Match the user's tone and energy. Be concise.`,
36
- };
@@ -1,80 +0,0 @@
1
- /**
2
- * Test Automation Workflow Graph
3
- *
4
- * buildGraph() - define nodes, edges, routing
5
- * onComplete(result) - post-processing after graph finishes (save artifacts, etc.)
6
- */
7
-
8
- import { WorkflowAgent, WorkflowGraph } from '@zibby/core';
9
- import {
10
- preflightNode,
11
- cacheReplayNode,
12
- executeLiveNode,
13
- generateScriptNode,
14
- } from './nodes/index.mjs';
15
- import { BrowserTestResultHandler } from './result-handler.mjs';
16
- import { browserTestAutomationStateSchema } from './state.js';
17
-
18
- export class BrowserTestAutomationAgent extends WorkflowAgent {
19
- buildGraph() {
20
- const graph = new WorkflowGraph();
21
- graph.setStateSchema(browserTestAutomationStateSchema);
22
-
23
- graph.addNode('preflight', preflightNode);
24
- graph.addNode('cache_replay', cacheReplayNode);
25
- graph.addNode('execute_live', executeLiveNode);
26
- graph.addNode('generate_script', generateScriptNode);
27
-
28
- graph.setEntryPoint('preflight');
29
-
30
- // Short-circuit when preflight produced nothing usable. Triggered when:
31
- // - the user invoked `zibby workflow run browser-tests` with no spec
32
- // (state.input is undefined / empty), so preflight had nothing to
33
- // analyze and the LLM came back with `assertions: []`
34
- // - the spec is so vague the LLM can't extract any assertions
35
- // Without this gate the graph would barrel into execute_live, fire up
36
- // a real browser session + a second expensive LLM call, then waste
37
- // ~30s before failing — bad UX and bad bill.
38
- graph.addConditionalEdges('preflight', (state) => {
39
- const assertions = state.preflight?.assertions || [];
40
- return assertions.length > 0 ? 'cache_replay' : 'END';
41
- });
42
-
43
- // Lever-#2 fork: cache_replay attempted a Playwright-only replay of
44
- // a prior successful action sequence. On hit it side-wrote
45
- // state.execute_live with synthesized output, so we can skip
46
- // execute_live and jump straight to generate_script — zero LLM
47
- // tokens. On miss / replay failure / cold cache, fall through to
48
- // the normal LLM-driven execute_live path.
49
- graph.addConditionalEdges('cache_replay', (state) => {
50
- return state.cache_replay?.hit === true ? 'generate_script' : 'execute_live';
51
- });
52
-
53
- graph.addConditionalEdges('execute_live', (state) => {
54
- const result = state.execute_live;
55
- const hasExecution = (result?.steps?.length > 0) || (result?.actions?.length > 0);
56
- return hasExecution ? 'generate_script' : 'END';
57
- });
58
-
59
- graph.addEdge('generate_script', 'END');
60
- return graph;
61
- }
62
-
63
- async onComplete(result) {
64
- const cwd = result.state.cwd || process.cwd();
65
- BrowserTestResultHandler.saveTitle(result, cwd);
66
- await BrowserTestResultHandler.saveExecutionData(result);
67
- BrowserTestResultHandler.ensureStudioCodegenMirror(
68
- result.state?.sessionPath,
69
- result.state?.cwd || cwd,
70
- );
71
-
72
- // Memory end-run hook (if @zibby/ui-memory is installed)
73
- try {
74
- const { memoryEndRun, memorySyncPush } = await import('@zibby/ui-memory');
75
- const sessionId = result.state.sessionPath?.split('/').pop();
76
- memoryEndRun(cwd, { sessionId, passed: result.success !== false });
77
- memorySyncPush(cwd);
78
- } catch { /* @zibby/ui-memory not available */ }
79
- }
80
- }
@@ -1,213 +0,0 @@
1
- /**
2
- * cache_replay node — lever-#2 read path inside the workflow.
3
- *
4
- * Sits between `preflight` and `execute_live` in the graph. Tries to
5
- * replay a prior successful run's action sequence via Playwright
6
- * directly, completely skipping the LLM. On a clean cache hit it
7
- * populates `state.execute_live` with the result so downstream
8
- * `generate_script` works exactly as if execute_live had run.
9
- *
10
- * Conditional edge after this node:
11
- * - state.cache_replay.hit === true → skip execute_live → generate_script
12
- * - state.cache_replay.hit === false → execute_live (LLM-driven path)
13
- *
14
- * Not user-configurable per-spec — the cache key derivation handles
15
- * staleness (page fingerprint drift invalidates) and replay failures
16
- * fall through cleanly to the LLM path.
17
- */
18
-
19
- import { z } from '@zibby/core';
20
- import { chromium } from 'playwright';
21
- import { spawn } from 'child_process';
22
- import { extractDomain, replayActions } from '@zibby/ui-memory';
23
- import { join } from 'path';
24
-
25
- const REPLAY_TIMEOUT_MS = 60_000;
26
-
27
- export const cacheReplayNode = {
28
- name: 'cache_replay',
29
- skills: [],
30
- timeout: 90000,
31
- outputSchema: z.object({
32
- hit: z.boolean(),
33
- elapsed_ms: z.number().nullish(),
34
- executed: z.number().nullish(),
35
- total: z.number().nullish(),
36
- cache_key: z.string().nullish(),
37
- error: z.string().nullish(),
38
- // When hit, we also write a synthesized execute_live block so the
39
- // downstream generate_script node sees what it expects.
40
- execute_live_synthesized: z.boolean().nullish(),
41
- }),
42
-
43
- execute: async (context) => {
44
- // graph.js builds nodeContext as `{ state, invokeAgent, _coreInvokeAgent,
45
- // ...state.getAll() }`. So `context.testSpec` works (spread) AND
46
- // `context.state.get('testSpec')` works (instance). Reading from the
47
- // spread is the natural shape — `context.state` is reserved for the
48
- // .set(key, value) side-write below.
49
- const cwd = context.cwd || context.workspace || process.cwd();
50
- const testSpec = context.testSpec || '';
51
- const specPath = context.specPath || '';
52
-
53
- // Derive domain from the spec text (no DOM access yet — pure parse).
54
- const domain = extractDomainFromSpec(testSpec);
55
- if (!domain) {
56
- return { hit: false, error: 'cannot derive domain from spec' };
57
- }
58
-
59
- // Cache key requires page_fingerprint, which is page-state-dependent
60
- // and only available AFTER navigation. We compute a key WITHOUT
61
- // fingerprint first and look up by (domain, spec_path) prefix —
62
- // the persister wrote spec_path too. If we find a candidate, we
63
- // use its stored fingerprint to compute the full key and verify.
64
- //
65
- // Lookup order:
66
- // 1. Exact (domain, spec_path) match in action_cache.
67
- // 2. If found, use its actions for replay attempt.
68
- // 3. On replay success: signal hit, populate state.execute_live.
69
- // 4. On replay failure (or cache miss): hit=false, fall back to LLM.
70
- const cached = await lookupCacheByDomainAndSpec({ cwd, domain, specPath });
71
- if (!cached) {
72
- return { hit: false, error: 'no cached actions for this spec' };
73
- }
74
-
75
- // Run the replay in a freshly-launched Playwright browser. Cleanly
76
- // independent from the @zibby/mcp-browser path execute_live uses.
77
- const t0 = Date.now();
78
- const browser = await chromium.launch({ headless: true });
79
- const page = await browser.newPage();
80
- let replayResult;
81
- try {
82
- replayResult = await Promise.race([
83
- replayActions({
84
- actions: cached.actions,
85
- page,
86
- log: (m) => console.log(`[cache_replay] ${m}`),
87
- }),
88
- new Promise((_, reject) =>
89
- setTimeout(() => reject(new Error('replay timeout')), REPLAY_TIMEOUT_MS),
90
- ),
91
- ]);
92
- } catch (err) {
93
- replayResult = { success: false, error: err.message, executed: 0, total: cached.actions.length };
94
- }
95
- const finalUrl = page.url();
96
- await browser.close().catch(() => {});
97
- const elapsedMs = Date.now() - t0;
98
-
99
- if (!replayResult.success) {
100
- // Increment failure_count so we can drop chronic misses later.
101
- await incrementCacheFailure({ cwd, cacheKey: cached.cache_key });
102
- return {
103
- hit: false,
104
- elapsed_ms: elapsedMs,
105
- executed: replayResult.executed,
106
- total: replayResult.total,
107
- cache_key: cached.cache_key,
108
- error: replayResult.error,
109
- };
110
- }
111
-
112
- // HIT path. Side-write the synthesized execute_live output via
113
- // context.state.set so downstream generate_script reads the same
114
- // shape it expects (actions[], finalUrl, …). The customExecute
115
- // return-value lands in state.cache_replay; the execute_live slot
116
- // has to be populated separately.
117
- if (typeof context.state?.set === 'function') {
118
- context.state.set('execute_live', {
119
- success: true,
120
- steps: cached.actions.map((a) => a.description),
121
- actions: cached.actions,
122
- assertions: [],
123
- finalUrl,
124
- browserClosed: true,
125
- notes: 'cache_replay hit — actions replayed via Playwright, no LLM',
126
- });
127
- }
128
-
129
- return {
130
- hit: true,
131
- elapsed_ms: elapsedMs,
132
- executed: replayResult.executed,
133
- total: replayResult.total,
134
- cache_key: cached.cache_key,
135
- execute_live_synthesized: true,
136
- };
137
- },
138
- };
139
-
140
- // ─── helpers ────────────────────────────────────────────────────────────
141
-
142
- function extractDomainFromSpec(spec) {
143
- if (!spec) return null;
144
- // Find the first http(s) URL in the spec and run it through the
145
- // SAME `extractDomain` the persister uses, so the cache-key lookup
146
- // matches what was actually written (notably: `www.` is stripped).
147
- const m = String(spec).match(/https?:\/\/[^\s"'<>]+/);
148
- if (!m) return null;
149
- return extractDomain(m[0]);
150
- }
151
-
152
- /**
153
- * Find a cached row by (domain, spec_path). Picks the row with
154
- * highest success_count if multiple match.
155
- * Uses dolt via subprocess (matching the rest of the codebase's
156
- * Dolt-access pattern).
157
- */
158
- async function lookupCacheByDomainAndSpec({ cwd, domain, specPath }) {
159
- const dbDir = join(cwd, '.zibby', 'memory');
160
- const safeDomain = escapeSql(domain);
161
- const safeSpec = escapeSql(specPath);
162
- const sql = `SELECT cache_key, actions_json, page_fingerprint
163
- FROM action_cache
164
- WHERE domain = ${safeDomain} AND spec_path = ${safeSpec}
165
- ORDER BY success_count DESC, last_used_at DESC
166
- LIMIT 1`;
167
- const rows = await runDoltJson(dbDir, sql);
168
- if (!rows || rows.length === 0) return null;
169
- try {
170
- const actions = JSON.parse(rows[0].actions_json);
171
- return { cache_key: rows[0].cache_key, actions, fingerprint: rows[0].page_fingerprint };
172
- } catch {
173
- return null;
174
- }
175
- }
176
-
177
- async function incrementCacheFailure({ cwd, cacheKey }) {
178
- const dbDir = join(cwd, '.zibby', 'memory');
179
- const sql = `UPDATE action_cache
180
- SET failure_count = failure_count + 1, last_replay_status = 'replay-failed'
181
- WHERE cache_key = ${escapeSql(cacheKey)}`;
182
- await runDoltExec(dbDir, sql).catch(() => { /* non-fatal */ });
183
- }
184
-
185
- function escapeSql(v) {
186
- if (v == null) return 'NULL';
187
- return `'${String(v).replace(/'/g, "''")}'`;
188
- }
189
-
190
- function runDoltJson(dir, sql) {
191
- return new Promise((resolve) => {
192
- const child = spawn('dolt', ['sql', '-r', 'json', '-q', sql], { cwd: dir });
193
- let out = '';
194
- child.stdout.on('data', (d) => { out += d; });
195
- child.on('close', () => {
196
- try {
197
- const parsed = JSON.parse(out);
198
- resolve(parsed.rows || []);
199
- } catch {
200
- resolve([]);
201
- }
202
- });
203
- child.on('error', () => resolve([]));
204
- });
205
- }
206
-
207
- function runDoltExec(dir, sql) {
208
- return new Promise((resolve, reject) => {
209
- const child = spawn('dolt', ['sql', '-q', sql], { cwd: dir });
210
- child.on('close', (code) => (code === 0 ? resolve() : reject(new Error(`dolt exit ${code}`))));
211
- child.on('error', reject);
212
- });
213
- }