@zibby/core 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -74,6 +74,23 @@ DO NOT:
74
74
  - Spend more than 2 minutes on any single page
75
75
  - Try to click elements that aren't immediately visible
76
76
 
77
+ 📋 BEFORE EXECUTING (MEMORY-AWARE START):
78
+ If the "Domain Knowledge" section above contains a "### Known Pages on This Site"
79
+ block with "expected fingerprint: zibby-..." entries for the URL you're about
80
+ to land on, do this ONCE on first navigation:
81
+
82
+ 1. Navigate to the page.
83
+ 2. Call browser_snapshot just once to read the live DOM's stableIds.
84
+ 3. Count how many of the expected fingerprint stableIds are present.
85
+ 4. If ≥ 85% are present → MATCH: trust the cached selectors directly,
86
+ no further exploration on this page is needed.
87
+ 5. If < 85% are present → MISMATCH: the page has drifted, ignore the
88
+ cached selectors and rediscover from the live snapshot.
89
+
90
+ This costs ONE snapshot per page in exchange for skipping the rest. Don't
91
+ skip this step on pages that have an expected fingerprint — it's the
92
+ mechanism that makes repeat runs cheap.
93
+
77
94
  EXECUTION SEQUENCE (MANDATORY - FOLLOW STRICTLY):
78
95
  1. Execute the test steps efficiently (navigate, fill, click)
79
96
  - Max 10-15 actions total
@@ -97,6 +114,15 @@ IMPORTANT for 'actions' array (STRICT 1:1 MAPPING):
97
114
  - If you call browser_type 3 times for 3 fields, you MUST have 3 actions in the array.
98
115
  - Include actual values/URLs in descriptions.
99
116
  - Keep descriptions SHORT (5-10 words max).
117
+ - **'committed' field (REQUIRED on actions you DELIBERATELY chose):**
118
+ set committed: true when this action was your CHOSEN attempt for an
119
+ intent (the click/fill you actually wanted, regardless of outcome).
120
+ set committed: false (or omit) for exploratory probes you tried while
121
+ searching for the right element. Only committed actions feed the
122
+ negative cache, so probes don't pollute future runs' Avoid lists.
123
+ Example: tried selector A (probe, failed) → tried selector B
124
+ (deliberate, failed) → tried selector C (deliberate, succeeded). Mark
125
+ B and C as committed: true; A as committed: false.
100
126
 
101
127
  IMPORTANT for 'assertions' array (USE THE CHECKLIST ABOVE):
102
128
  - Your assertions array MUST match the ASSERTION CHECKLIST exactly - one entry per item
@@ -164,6 +190,12 @@ IMPORTANT for 'evidenceScreenshots' (array) - OPTIONAL:
164
190
  .describe('Human-readable description of the action'),
165
191
  reasoning: z.string().nullish()
166
192
  .describe('Why this action was performed'),
193
+ committed: z.boolean().nullish()
194
+ .describe('true when this was a deliberate chosen attempt for an intent (feeds negative cache on failure); false/omit for exploratory probes'),
195
+ status: z.enum(['success', 'failed']).nullish()
196
+ .describe('Outcome of the action — set "failed" if the tool call errored or the post-condition was not met'),
197
+ error: z.string().nullish()
198
+ .describe('Error message when status=failed'),
167
199
  selectors: z.object({
168
200
  role: z.object({
169
201
  role: z.string().describe('ARIA role (e.g. button, link, textbox, generic)'),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@zibby/core",
3
- "version": "0.3.1",
3
+ "version": "0.3.3",
4
4
  "description": "Core test automation engine with multi-agent and multi-MCP support",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -43,12 +43,12 @@ export class BrowserTestAutomationAgent extends WorkflowAgent {
43
43
  result.state?.cwd || cwd,
44
44
  );
45
45
 
46
- // Memory end-run hook (if @zibby/memory is installed)
46
+ // Memory end-run hook (if @zibby/ui-memory is installed)
47
47
  try {
48
- const { memoryEndRun, memorySyncPush } = await import('@zibby/memory');
48
+ const { memoryEndRun, memorySyncPush } = await import('@zibby/ui-memory');
49
49
  const sessionId = result.state.sessionPath?.split('/').pop();
50
50
  memoryEndRun(cwd, { sessionId, passed: result.success !== false });
51
51
  memorySyncPush(cwd);
52
- } catch { /* @zibby/memory not available */ }
52
+ } catch { /* @zibby/ui-memory not available */ }
53
53
  }
54
54
  }
@@ -74,6 +74,23 @@ DO NOT:
74
74
  - Spend more than 2 minutes on any single page
75
75
  - Try to click elements that aren't immediately visible
76
76
 
77
+ 📋 BEFORE EXECUTING (MEMORY-AWARE START):
78
+ If the "Domain Knowledge" section above contains a "### Known Pages on This Site"
79
+ block with "expected fingerprint: zibby-..." entries for the URL you're about
80
+ to land on, do this ONCE on first navigation:
81
+
82
+ 1. Navigate to the page.
83
+ 2. Call browser_snapshot just once to read the live DOM's stableIds.
84
+ 3. Count how many of the expected fingerprint stableIds are present.
85
+ 4. If ≥ 85% are present → MATCH: trust the cached selectors directly,
86
+ no further exploration on this page is needed.
87
+ 5. If < 85% are present → MISMATCH: the page has drifted, ignore the
88
+ cached selectors and rediscover from the live snapshot.
89
+
90
+ This costs ONE snapshot per page in exchange for skipping the rest. Don't
91
+ skip this step on pages that have an expected fingerprint — it's the
92
+ mechanism that makes repeat runs cheap.
93
+
77
94
  EXECUTION SEQUENCE (MANDATORY - FOLLOW STRICTLY):
78
95
  1. Execute the test steps efficiently (navigate, fill, click)
79
96
  - Max 10-15 actions total
@@ -97,6 +114,15 @@ IMPORTANT for 'actions' array (STRICT 1:1 MAPPING):
97
114
  - If you call browser_type 3 times for 3 fields, you MUST have 3 actions in the array.
98
115
  - Include actual values/URLs in descriptions.
99
116
  - Keep descriptions SHORT (5-10 words max).
117
+ - **'committed' field (REQUIRED on actions you DELIBERATELY chose):**
118
+ set committed: true when this action was your CHOSEN attempt for an
119
+ intent (the click/fill you actually wanted, regardless of outcome).
120
+ set committed: false (or omit) for exploratory probes you tried while
121
+ searching for the right element. Only committed actions feed the
122
+ negative cache, so probes don't pollute future runs' Avoid lists.
123
+ Example: tried selector A (probe, failed) → tried selector B
124
+ (deliberate, failed) → tried selector C (deliberate, succeeded). Mark
125
+ B and C as committed: true; A as committed: false.
100
126
 
101
127
  IMPORTANT for 'assertions' array (USE THE CHECKLIST ABOVE):
102
128
  - Your assertions array MUST match the ASSERTION CHECKLIST exactly - one entry per item
@@ -164,6 +190,12 @@ IMPORTANT for 'evidenceScreenshots' (array) - OPTIONAL:
164
190
  .describe('Human-readable description of the action'),
165
191
  reasoning: z.string().nullish()
166
192
  .describe('Why this action was performed'),
193
+ committed: z.boolean().nullish()
194
+ .describe('true when this was a deliberate chosen attempt for an intent (feeds negative cache on failure); false/omit for exploratory probes'),
195
+ status: z.enum(['success', 'failed']).nullish()
196
+ .describe('Outcome of the action — set "failed" if the tool call errored or the post-condition was not met'),
197
+ error: z.string().nullish()
198
+ .describe('Error message when status=failed'),
167
199
  selectors: z.object({
168
200
  role: z.object({
169
201
  role: z.string().describe('ARIA role (e.g. button, link, textbox, generic)'),
@@ -1,35 +0,0 @@
1
- /**
2
- * Implementation Graph - Apply code changes AND create PR
3
- *
4
- * Flow:
5
- * 1. setup - Clone repos and init git baseline
6
- * 2. implement_code - Apply changes, commit, push to branch
7
- * 3. generate_tests - Generate test cases
8
- * 4. create_pr - Create GitHub Pull Request
9
- * 5. finalize - Upload results to API
10
- */
11
-
12
- import { setupNode } from '../code-analysis/nodes/setup-node.js';
13
- import { implementCodeNode } from '../code-analysis/nodes/generate-code-node.js';
14
- import { generateTestCasesNode } from '../code-analysis/nodes/generate-test-cases-node.js';
15
- import { createPRNode } from '../code-analysis/nodes/create-pr-node.js';
16
- import { finalizeNode } from '../code-analysis/nodes/finalize-node.js';
17
- import { implementationStateSchema } from './state.js';
18
-
19
- export function buildImplementationGraph(graph) {
20
- graph.setStateSchema(implementationStateSchema);
21
-
22
- graph
23
- .addNode('setup', setupNode) // ← Reused from analysis!
24
- .addNode('implement_code', implementCodeNode) // ← Real changes + push
25
- .addNode('generate_test_cases', generateTestCasesNode) // ← Generate human-readable test specs
26
- .addNode('create_pr', createPRNode) // ← Create GitHub PR
27
- .addNode('finalize', finalizeNode) // ← Reused from analysis!
28
- .setEntryPoint('setup')
29
- .addEdge('setup', 'implement_code')
30
- .addEdge('implement_code', 'generate_test_cases')
31
- .addEdge('generate_test_cases', 'create_pr')
32
- .addEdge('create_pr', 'finalize');
33
-
34
- return graph;
35
- }
@@ -1,7 +0,0 @@
1
- /**
2
- * Code Implementation Module
3
- * Workflow for applying code changes and creating PRs
4
- */
5
-
6
- export { implementationStateSchema } from './state.js';
7
- export { buildImplementationGraph } from './graph.js';
@@ -1,14 +0,0 @@
1
- /**
2
- * Implementation Workflow State Schema
3
- */
4
-
5
- import { z } from 'zod';
6
- import { analysisStateSchema } from '../code-analysis/state.js';
7
-
8
- /**
9
- * Implementation workflow state schema (extends analysis)
10
- */
11
- export const implementationStateSchema = analysisStateSchema.extend({
12
- branchName: z.string().optional().describe('Git branch name for changes'),
13
- prTitle: z.string().optional().describe('Pull request title template'),
14
- });
@@ -1,35 +0,0 @@
1
- /**
2
- * Implementation Graph - Apply code changes AND create PR
3
- *
4
- * Flow:
5
- * 1. setup - Clone repos and init git baseline
6
- * 2. implement_code - Apply changes, commit, push to branch
7
- * 3. generate_tests - Generate test cases
8
- * 4. create_pr - Create GitHub Pull Request
9
- * 5. finalize - Upload results to API
10
- */
11
-
12
- import { setupNode } from '../code-analysis/nodes/setup-node.js';
13
- import { implementCodeNode } from '../code-analysis/nodes/generate-code-node.js';
14
- import { generateTestCasesNode } from '../code-analysis/nodes/generate-test-cases-node.js';
15
- import { createPRNode } from '../code-analysis/nodes/create-pr-node.js';
16
- import { finalizeNode } from '../code-analysis/nodes/finalize-node.js';
17
- import { implementationStateSchema } from './state.js';
18
-
19
- export function buildImplementationGraph(graph) {
20
- graph.setStateSchema(implementationStateSchema);
21
-
22
- graph
23
- .addNode('setup', setupNode) // ← Reused from analysis!
24
- .addNode('implement_code', implementCodeNode) // ← Real changes + push
25
- .addNode('generate_test_cases', generateTestCasesNode) // ← Generate human-readable test specs
26
- .addNode('create_pr', createPRNode) // ← Create GitHub PR
27
- .addNode('finalize', finalizeNode) // ← Reused from analysis!
28
- .setEntryPoint('setup')
29
- .addEdge('setup', 'implement_code')
30
- .addEdge('implement_code', 'generate_test_cases')
31
- .addEdge('generate_test_cases', 'create_pr')
32
- .addEdge('create_pr', 'finalize');
33
-
34
- return graph;
35
- }
@@ -1,7 +0,0 @@
1
- /**
2
- * Code Implementation Module
3
- * Workflow for applying code changes and creating PRs
4
- */
5
-
6
- export { implementationStateSchema } from './state.js';
7
- export { buildImplementationGraph } from './graph.js';
@@ -1,14 +0,0 @@
1
- /**
2
- * Implementation Workflow State Schema
3
- */
4
-
5
- import { z } from 'zod';
6
- import { analysisStateSchema } from '../code-analysis/state.js';
7
-
8
- /**
9
- * Implementation workflow state schema (extends analysis)
10
- */
11
- export const implementationStateSchema = analysisStateSchema.extend({
12
- branchName: z.string().optional().describe('Git branch name for changes'),
13
- prTitle: z.string().optional().describe('Pull request title template'),
14
- });