@zibby/core 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +100 -103
- package/dist/package.json +1 -1
- package/dist/register-built-in-strategies.js +50 -53
- package/dist/strategies/claude-strategy.js +1 -4
- package/dist/strategies/index.js +56 -59
- package/dist/templates/browser-test-automation/graph.mjs +3 -3
- package/dist/templates/browser-test-automation/nodes/execute-live.mjs +32 -0
- package/package.json +1 -1
- package/templates/browser-test-automation/graph.mjs +3 -3
- package/templates/browser-test-automation/nodes/execute-live.mjs +32 -0
- package/dist/templates/code-implementation/graph.js +0 -35
- package/dist/templates/code-implementation/index.js +0 -7
- package/dist/templates/code-implementation/state.js +0 -14
- package/templates/code-implementation/graph.js +0 -35
- package/templates/code-implementation/index.js +0 -7
- package/templates/code-implementation/state.js +0 -14
|
@@ -74,6 +74,23 @@ DO NOT:
|
|
|
74
74
|
- Spend more than 2 minutes on any single page
|
|
75
75
|
- Try to click elements that aren't immediately visible
|
|
76
76
|
|
|
77
|
+
📋 BEFORE EXECUTING (MEMORY-AWARE START):
|
|
78
|
+
If the "Domain Knowledge" section above contains a "### Known Pages on This Site"
|
|
79
|
+
block with "expected fingerprint: zibby-..." entries for the URL you're about
|
|
80
|
+
to land on, do this ONCE on first navigation:
|
|
81
|
+
|
|
82
|
+
1. Navigate to the page.
|
|
83
|
+
2. Call browser_snapshot just once to read the live DOM's stableIds.
|
|
84
|
+
3. Count how many of the expected fingerprint stableIds are present.
|
|
85
|
+
4. If ≥ 85% are present → MATCH: trust the cached selectors directly,
|
|
86
|
+
no further exploration on this page is needed.
|
|
87
|
+
5. If < 85% are present → MISMATCH: the page has drifted, ignore the
|
|
88
|
+
cached selectors and rediscover from the live snapshot.
|
|
89
|
+
|
|
90
|
+
This costs ONE snapshot per page in exchange for skipping the rest. Don't
|
|
91
|
+
skip this step on pages that have an expected fingerprint — it's the
|
|
92
|
+
mechanism that makes repeat runs cheap.
|
|
93
|
+
|
|
77
94
|
EXECUTION SEQUENCE (MANDATORY - FOLLOW STRICTLY):
|
|
78
95
|
1. Execute the test steps efficiently (navigate, fill, click)
|
|
79
96
|
- Max 10-15 actions total
|
|
@@ -97,6 +114,15 @@ IMPORTANT for 'actions' array (STRICT 1:1 MAPPING):
|
|
|
97
114
|
- If you call browser_type 3 times for 3 fields, you MUST have 3 actions in the array.
|
|
98
115
|
- Include actual values/URLs in descriptions.
|
|
99
116
|
- Keep descriptions SHORT (5-10 words max).
|
|
117
|
+
- **'committed' field (REQUIRED on actions you DELIBERATELY chose):**
|
|
118
|
+
set committed: true when this action was your CHOSEN attempt for an
|
|
119
|
+
intent (the click/fill you actually wanted, regardless of outcome).
|
|
120
|
+
set committed: false (or omit) for exploratory probes you tried while
|
|
121
|
+
searching for the right element. Only committed actions feed the
|
|
122
|
+
negative cache, so probes don't pollute future runs' Avoid lists.
|
|
123
|
+
Example: tried selector A (probe, failed) → tried selector B
|
|
124
|
+
(deliberate, failed) → tried selector C (deliberate, succeeded). Mark
|
|
125
|
+
B and C as committed: true; A as committed: false.
|
|
100
126
|
|
|
101
127
|
IMPORTANT for 'assertions' array (USE THE CHECKLIST ABOVE):
|
|
102
128
|
- Your assertions array MUST match the ASSERTION CHECKLIST exactly - one entry per item
|
|
@@ -164,6 +190,12 @@ IMPORTANT for 'evidenceScreenshots' (array) - OPTIONAL:
|
|
|
164
190
|
.describe('Human-readable description of the action'),
|
|
165
191
|
reasoning: z.string().nullish()
|
|
166
192
|
.describe('Why this action was performed'),
|
|
193
|
+
committed: z.boolean().nullish()
|
|
194
|
+
.describe('true when this was a deliberate chosen attempt for an intent (feeds negative cache on failure); false/omit for exploratory probes'),
|
|
195
|
+
status: z.enum(['success', 'failed']).nullish()
|
|
196
|
+
.describe('Outcome of the action — set "failed" if the tool call errored or the post-condition was not met'),
|
|
197
|
+
error: z.string().nullish()
|
|
198
|
+
.describe('Error message when status=failed'),
|
|
167
199
|
selectors: z.object({
|
|
168
200
|
role: z.object({
|
|
169
201
|
role: z.string().describe('ARIA role (e.g. button, link, textbox, generic)'),
|
package/package.json
CHANGED
|
@@ -43,12 +43,12 @@ export class BrowserTestAutomationAgent extends WorkflowAgent {
|
|
|
43
43
|
result.state?.cwd || cwd,
|
|
44
44
|
);
|
|
45
45
|
|
|
46
|
-
// Memory end-run hook (if @zibby/memory is installed)
|
|
46
|
+
// Memory end-run hook (if @zibby/ui-memory is installed)
|
|
47
47
|
try {
|
|
48
|
-
const { memoryEndRun, memorySyncPush } = await import('@zibby/memory');
|
|
48
|
+
const { memoryEndRun, memorySyncPush } = await import('@zibby/ui-memory');
|
|
49
49
|
const sessionId = result.state.sessionPath?.split('/').pop();
|
|
50
50
|
memoryEndRun(cwd, { sessionId, passed: result.success !== false });
|
|
51
51
|
memorySyncPush(cwd);
|
|
52
|
-
} catch { /* @zibby/memory not available */ }
|
|
52
|
+
} catch { /* @zibby/ui-memory not available */ }
|
|
53
53
|
}
|
|
54
54
|
}
|
|
@@ -74,6 +74,23 @@ DO NOT:
|
|
|
74
74
|
- Spend more than 2 minutes on any single page
|
|
75
75
|
- Try to click elements that aren't immediately visible
|
|
76
76
|
|
|
77
|
+
📋 BEFORE EXECUTING (MEMORY-AWARE START):
|
|
78
|
+
If the "Domain Knowledge" section above contains a "### Known Pages on This Site"
|
|
79
|
+
block with "expected fingerprint: zibby-..." entries for the URL you're about
|
|
80
|
+
to land on, do this ONCE on first navigation:
|
|
81
|
+
|
|
82
|
+
1. Navigate to the page.
|
|
83
|
+
2. Call browser_snapshot just once to read the live DOM's stableIds.
|
|
84
|
+
3. Count how many of the expected fingerprint stableIds are present.
|
|
85
|
+
4. If ≥ 85% are present → MATCH: trust the cached selectors directly,
|
|
86
|
+
no further exploration on this page is needed.
|
|
87
|
+
5. If < 85% are present → MISMATCH: the page has drifted, ignore the
|
|
88
|
+
cached selectors and rediscover from the live snapshot.
|
|
89
|
+
|
|
90
|
+
This costs ONE snapshot per page in exchange for skipping the rest. Don't
|
|
91
|
+
skip this step on pages that have an expected fingerprint — it's the
|
|
92
|
+
mechanism that makes repeat runs cheap.
|
|
93
|
+
|
|
77
94
|
EXECUTION SEQUENCE (MANDATORY - FOLLOW STRICTLY):
|
|
78
95
|
1. Execute the test steps efficiently (navigate, fill, click)
|
|
79
96
|
- Max 10-15 actions total
|
|
@@ -97,6 +114,15 @@ IMPORTANT for 'actions' array (STRICT 1:1 MAPPING):
|
|
|
97
114
|
- If you call browser_type 3 times for 3 fields, you MUST have 3 actions in the array.
|
|
98
115
|
- Include actual values/URLs in descriptions.
|
|
99
116
|
- Keep descriptions SHORT (5-10 words max).
|
|
117
|
+
- **'committed' field (REQUIRED on actions you DELIBERATELY chose):**
|
|
118
|
+
set committed: true when this action was your CHOSEN attempt for an
|
|
119
|
+
intent (the click/fill you actually wanted, regardless of outcome).
|
|
120
|
+
set committed: false (or omit) for exploratory probes you tried while
|
|
121
|
+
searching for the right element. Only committed actions feed the
|
|
122
|
+
negative cache, so probes don't pollute future runs' Avoid lists.
|
|
123
|
+
Example: tried selector A (probe, failed) → tried selector B
|
|
124
|
+
(deliberate, failed) → tried selector C (deliberate, succeeded). Mark
|
|
125
|
+
B and C as committed: true; A as committed: false.
|
|
100
126
|
|
|
101
127
|
IMPORTANT for 'assertions' array (USE THE CHECKLIST ABOVE):
|
|
102
128
|
- Your assertions array MUST match the ASSERTION CHECKLIST exactly - one entry per item
|
|
@@ -164,6 +190,12 @@ IMPORTANT for 'evidenceScreenshots' (array) - OPTIONAL:
|
|
|
164
190
|
.describe('Human-readable description of the action'),
|
|
165
191
|
reasoning: z.string().nullish()
|
|
166
192
|
.describe('Why this action was performed'),
|
|
193
|
+
committed: z.boolean().nullish()
|
|
194
|
+
.describe('true when this was a deliberate chosen attempt for an intent (feeds negative cache on failure); false/omit for exploratory probes'),
|
|
195
|
+
status: z.enum(['success', 'failed']).nullish()
|
|
196
|
+
.describe('Outcome of the action — set "failed" if the tool call errored or the post-condition was not met'),
|
|
197
|
+
error: z.string().nullish()
|
|
198
|
+
.describe('Error message when status=failed'),
|
|
167
199
|
selectors: z.object({
|
|
168
200
|
role: z.object({
|
|
169
201
|
role: z.string().describe('ARIA role (e.g. button, link, textbox, generic)'),
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Implementation Graph - Apply code changes AND create PR
|
|
3
|
-
*
|
|
4
|
-
* Flow:
|
|
5
|
-
* 1. setup - Clone repos and init git baseline
|
|
6
|
-
* 2. implement_code - Apply changes, commit, push to branch
|
|
7
|
-
* 3. generate_tests - Generate test cases
|
|
8
|
-
* 4. create_pr - Create GitHub Pull Request
|
|
9
|
-
* 5. finalize - Upload results to API
|
|
10
|
-
*/
|
|
11
|
-
|
|
12
|
-
import { setupNode } from '../code-analysis/nodes/setup-node.js';
|
|
13
|
-
import { implementCodeNode } from '../code-analysis/nodes/generate-code-node.js';
|
|
14
|
-
import { generateTestCasesNode } from '../code-analysis/nodes/generate-test-cases-node.js';
|
|
15
|
-
import { createPRNode } from '../code-analysis/nodes/create-pr-node.js';
|
|
16
|
-
import { finalizeNode } from '../code-analysis/nodes/finalize-node.js';
|
|
17
|
-
import { implementationStateSchema } from './state.js';
|
|
18
|
-
|
|
19
|
-
export function buildImplementationGraph(graph) {
|
|
20
|
-
graph.setStateSchema(implementationStateSchema);
|
|
21
|
-
|
|
22
|
-
graph
|
|
23
|
-
.addNode('setup', setupNode) // ← Reused from analysis!
|
|
24
|
-
.addNode('implement_code', implementCodeNode) // ← Real changes + push
|
|
25
|
-
.addNode('generate_test_cases', generateTestCasesNode) // ← Generate human-readable test specs
|
|
26
|
-
.addNode('create_pr', createPRNode) // ← Create GitHub PR
|
|
27
|
-
.addNode('finalize', finalizeNode) // ← Reused from analysis!
|
|
28
|
-
.setEntryPoint('setup')
|
|
29
|
-
.addEdge('setup', 'implement_code')
|
|
30
|
-
.addEdge('implement_code', 'generate_test_cases')
|
|
31
|
-
.addEdge('generate_test_cases', 'create_pr')
|
|
32
|
-
.addEdge('create_pr', 'finalize');
|
|
33
|
-
|
|
34
|
-
return graph;
|
|
35
|
-
}
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Implementation Workflow State Schema
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import { z } from 'zod';
|
|
6
|
-
import { analysisStateSchema } from '../code-analysis/state.js';
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Implementation workflow state schema (extends analysis)
|
|
10
|
-
*/
|
|
11
|
-
export const implementationStateSchema = analysisStateSchema.extend({
|
|
12
|
-
branchName: z.string().optional().describe('Git branch name for changes'),
|
|
13
|
-
prTitle: z.string().optional().describe('Pull request title template'),
|
|
14
|
-
});
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Implementation Graph - Apply code changes AND create PR
|
|
3
|
-
*
|
|
4
|
-
* Flow:
|
|
5
|
-
* 1. setup - Clone repos and init git baseline
|
|
6
|
-
* 2. implement_code - Apply changes, commit, push to branch
|
|
7
|
-
* 3. generate_tests - Generate test cases
|
|
8
|
-
* 4. create_pr - Create GitHub Pull Request
|
|
9
|
-
* 5. finalize - Upload results to API
|
|
10
|
-
*/
|
|
11
|
-
|
|
12
|
-
import { setupNode } from '../code-analysis/nodes/setup-node.js';
|
|
13
|
-
import { implementCodeNode } from '../code-analysis/nodes/generate-code-node.js';
|
|
14
|
-
import { generateTestCasesNode } from '../code-analysis/nodes/generate-test-cases-node.js';
|
|
15
|
-
import { createPRNode } from '../code-analysis/nodes/create-pr-node.js';
|
|
16
|
-
import { finalizeNode } from '../code-analysis/nodes/finalize-node.js';
|
|
17
|
-
import { implementationStateSchema } from './state.js';
|
|
18
|
-
|
|
19
|
-
export function buildImplementationGraph(graph) {
|
|
20
|
-
graph.setStateSchema(implementationStateSchema);
|
|
21
|
-
|
|
22
|
-
graph
|
|
23
|
-
.addNode('setup', setupNode) // ← Reused from analysis!
|
|
24
|
-
.addNode('implement_code', implementCodeNode) // ← Real changes + push
|
|
25
|
-
.addNode('generate_test_cases', generateTestCasesNode) // ← Generate human-readable test specs
|
|
26
|
-
.addNode('create_pr', createPRNode) // ← Create GitHub PR
|
|
27
|
-
.addNode('finalize', finalizeNode) // ← Reused from analysis!
|
|
28
|
-
.setEntryPoint('setup')
|
|
29
|
-
.addEdge('setup', 'implement_code')
|
|
30
|
-
.addEdge('implement_code', 'generate_test_cases')
|
|
31
|
-
.addEdge('generate_test_cases', 'create_pr')
|
|
32
|
-
.addEdge('create_pr', 'finalize');
|
|
33
|
-
|
|
34
|
-
return graph;
|
|
35
|
-
}
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Implementation Workflow State Schema
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import { z } from 'zod';
|
|
6
|
-
import { analysisStateSchema } from '../code-analysis/state.js';
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Implementation workflow state schema (extends analysis)
|
|
10
|
-
*/
|
|
11
|
-
export const implementationStateSchema = analysisStateSchema.extend({
|
|
12
|
-
branchName: z.string().optional().describe('Git branch name for changes'),
|
|
13
|
-
prTitle: z.string().optional().describe('Pull request title template'),
|
|
14
|
-
});
|