npm - @zibby/core - Versions diffs - 0.3.1 → 0.3.3 - Mend

@zibby/core 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/index.js +100 -103
package/dist/package.json +1 -1
package/dist/register-built-in-strategies.js +50 -53
package/dist/strategies/claude-strategy.js +1 -4
package/dist/strategies/index.js +56 -59
package/dist/templates/browser-test-automation/graph.mjs +3 -3
package/dist/templates/browser-test-automation/nodes/execute-live.mjs +32 -0
package/package.json +1 -1
package/templates/browser-test-automation/graph.mjs +3 -3
package/templates/browser-test-automation/nodes/execute-live.mjs +32 -0
package/dist/templates/code-implementation/graph.js +0 -35
package/dist/templates/code-implementation/index.js +0 -7
package/dist/templates/code-implementation/state.js +0 -14
package/templates/code-implementation/graph.js +0 -35
package/templates/code-implementation/index.js +0 -7
package/templates/code-implementation/state.js +0 -14

package/dist/templates/browser-test-automation/nodes/execute-live.mjs CHANGED Viewed

@@ -74,6 +74,23 @@ DO NOT:
 - Spend more than 2 minutes on any single page
 - Try to click elements that aren't immediately visible
+📋 BEFORE EXECUTING (MEMORY-AWARE START):
+If the "Domain Knowledge" section above contains a "### Known Pages on This Site"
+block with "expected fingerprint: zibby-..." entries for the URL you're about
+to land on, do this ONCE on first navigation:
+  1. Navigate to the page.
+  2. Call browser_snapshot just once to read the live DOM's stableIds.
+  3. Count how many of the expected fingerprint stableIds are present.
+  4. If ≥ 85% are present → MATCH: trust the cached selectors directly,
+     no further exploration on this page is needed.
+  5. If < 85% are present → MISMATCH: the page has drifted, ignore the
+     cached selectors and rediscover from the live snapshot.
+This costs ONE snapshot per page in exchange for skipping the rest. Don't
+skip this step on pages that have an expected fingerprint — it's the
+mechanism that makes repeat runs cheap.
 EXECUTION SEQUENCE (MANDATORY - FOLLOW STRICTLY):
 1. Execute the test steps efficiently (navigate, fill, click)
    - Max 10-15 actions total
@@ -97,6 +114,15 @@ IMPORTANT for 'actions' array (STRICT 1:1 MAPPING):
 - If you call browser_type 3 times for 3 fields, you MUST have 3 actions in the array.
 - Include actual values/URLs in descriptions.
 - Keep descriptions SHORT (5-10 words max).
+- **'committed' field (REQUIRED on actions you DELIBERATELY chose):**
+  set committed: true when this action was your CHOSEN attempt for an
+  intent (the click/fill you actually wanted, regardless of outcome).
+  set committed: false (or omit) for exploratory probes you tried while
+  searching for the right element. Only committed actions feed the
+  negative cache, so probes don't pollute future runs' Avoid lists.
+  Example: tried selector A (probe, failed) → tried selector B
+  (deliberate, failed) → tried selector C (deliberate, succeeded). Mark
+  B and C as committed: true; A as committed: false.
 IMPORTANT for 'assertions' array (USE THE CHECKLIST ABOVE):
 - Your assertions array MUST match the ASSERTION CHECKLIST exactly - one entry per item
@@ -164,6 +190,12 @@ IMPORTANT for 'evidenceScreenshots' (array) - OPTIONAL:
         .describe('Human-readable description of the action'),
       reasoning: z.string().nullish()
         .describe('Why this action was performed'),
+      committed: z.boolean().nullish()
+        .describe('true when this was a deliberate chosen attempt for an intent (feeds negative cache on failure); false/omit for exploratory probes'),
+      status: z.enum(['success', 'failed']).nullish()
+        .describe('Outcome of the action — set "failed" if the tool call errored or the post-condition was not met'),
+      error: z.string().nullish()
+        .describe('Error message when status=failed'),
       selectors: z.object({
         role: z.object({
           role: z.string().describe('ARIA role (e.g. button, link, textbox, generic)'),

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@zibby/core",
-  "version": "0.3.1",
+  "version": "0.3.3",
   "description": "Core test automation engine with multi-agent and multi-MCP support",
   "type": "module",
   "main": "dist/index.js",

package/templates/browser-test-automation/graph.mjs CHANGED Viewed

@@ -43,12 +43,12 @@ export class BrowserTestAutomationAgent extends WorkflowAgent {
       result.state?.cwd || cwd,
     );
-    // Memory end-run hook (if @zibby/memory is installed)
+    // Memory end-run hook (if @zibby/ui-memory is installed)
     try {
-      const { memoryEndRun, memorySyncPush } = await import('@zibby/memory');
+      const { memoryEndRun, memorySyncPush } = await import('@zibby/ui-memory');
       const sessionId = result.state.sessionPath?.split('/').pop();
       memoryEndRun(cwd, { sessionId, passed: result.success !== false });
       memorySyncPush(cwd);
-    } catch { /* @zibby/memory not available */ }
+    } catch { /* @zibby/ui-memory not available */ }
   }
 }

package/templates/browser-test-automation/nodes/execute-live.mjs CHANGED Viewed

@@ -74,6 +74,23 @@ DO NOT:
 - Spend more than 2 minutes on any single page
 - Try to click elements that aren't immediately visible
+📋 BEFORE EXECUTING (MEMORY-AWARE START):
+If the "Domain Knowledge" section above contains a "### Known Pages on This Site"
+block with "expected fingerprint: zibby-..." entries for the URL you're about
+to land on, do this ONCE on first navigation:
+  1. Navigate to the page.
+  2. Call browser_snapshot just once to read the live DOM's stableIds.
+  3. Count how many of the expected fingerprint stableIds are present.
+  4. If ≥ 85% are present → MATCH: trust the cached selectors directly,
+     no further exploration on this page is needed.
+  5. If < 85% are present → MISMATCH: the page has drifted, ignore the
+     cached selectors and rediscover from the live snapshot.
+This costs ONE snapshot per page in exchange for skipping the rest. Don't
+skip this step on pages that have an expected fingerprint — it's the
+mechanism that makes repeat runs cheap.
 EXECUTION SEQUENCE (MANDATORY - FOLLOW STRICTLY):
 1. Execute the test steps efficiently (navigate, fill, click)
    - Max 10-15 actions total
@@ -97,6 +114,15 @@ IMPORTANT for 'actions' array (STRICT 1:1 MAPPING):
 - If you call browser_type 3 times for 3 fields, you MUST have 3 actions in the array.
 - Include actual values/URLs in descriptions.
 - Keep descriptions SHORT (5-10 words max).
+- **'committed' field (REQUIRED on actions you DELIBERATELY chose):**
+  set committed: true when this action was your CHOSEN attempt for an
+  intent (the click/fill you actually wanted, regardless of outcome).
+  set committed: false (or omit) for exploratory probes you tried while
+  searching for the right element. Only committed actions feed the
+  negative cache, so probes don't pollute future runs' Avoid lists.
+  Example: tried selector A (probe, failed) → tried selector B
+  (deliberate, failed) → tried selector C (deliberate, succeeded). Mark
+  B and C as committed: true; A as committed: false.
 IMPORTANT for 'assertions' array (USE THE CHECKLIST ABOVE):
 - Your assertions array MUST match the ASSERTION CHECKLIST exactly - one entry per item
@@ -164,6 +190,12 @@ IMPORTANT for 'evidenceScreenshots' (array) - OPTIONAL:
         .describe('Human-readable description of the action'),
       reasoning: z.string().nullish()
         .describe('Why this action was performed'),
+      committed: z.boolean().nullish()
+        .describe('true when this was a deliberate chosen attempt for an intent (feeds negative cache on failure); false/omit for exploratory probes'),
+      status: z.enum(['success', 'failed']).nullish()
+        .describe('Outcome of the action — set "failed" if the tool call errored or the post-condition was not met'),
+      error: z.string().nullish()
+        .describe('Error message when status=failed'),
       selectors: z.object({
         role: z.object({
           role: z.string().describe('ARIA role (e.g. button, link, textbox, generic)'),

package/dist/templates/code-implementation/graph.js DELETED Viewed

@@ -1,35 +0,0 @@
-/**
- * Implementation Graph - Apply code changes AND create PR
- *
- * Flow:
- * 1. setup - Clone repos and init git baseline
- * 2. implement_code - Apply changes, commit, push to branch
- * 3. generate_tests - Generate test cases
- * 4. create_pr - Create GitHub Pull Request
- * 5. finalize - Upload results to API
- */
-import { setupNode } from '../code-analysis/nodes/setup-node.js';
-import { implementCodeNode } from '../code-analysis/nodes/generate-code-node.js';
-import { generateTestCasesNode } from '../code-analysis/nodes/generate-test-cases-node.js';
-import { createPRNode } from '../code-analysis/nodes/create-pr-node.js';
-import { finalizeNode } from '../code-analysis/nodes/finalize-node.js';
-import { implementationStateSchema } from './state.js';
-export function buildImplementationGraph(graph) {
-  graph.setStateSchema(implementationStateSchema);
-  graph
-    .addNode('setup', setupNode)                   // ← Reused from analysis!
-    .addNode('implement_code', implementCodeNode)  // ← Real changes + push
-    .addNode('generate_test_cases', generateTestCasesNode)  // ← Generate human-readable test specs
-    .addNode('create_pr', createPRNode)            // ← Create GitHub PR
-    .addNode('finalize', finalizeNode)             // ← Reused from analysis!
-    .setEntryPoint('setup')
-    .addEdge('setup', 'implement_code')
-    .addEdge('implement_code', 'generate_test_cases')
-    .addEdge('generate_test_cases', 'create_pr')
-    .addEdge('create_pr', 'finalize');
-  return graph;
-}

package/dist/templates/code-implementation/index.js DELETED Viewed

@@ -1,7 +0,0 @@
-/**
- * Code Implementation Module
- * Workflow for applying code changes and creating PRs
- */
-export { implementationStateSchema } from './state.js';
-export { buildImplementationGraph } from './graph.js';

package/dist/templates/code-implementation/state.js DELETED Viewed

@@ -1,14 +0,0 @@
-/**
- * Implementation Workflow State Schema
- */
-import { z } from 'zod';
-import { analysisStateSchema } from '../code-analysis/state.js';
-/**
- * Implementation workflow state schema (extends analysis)
- */
-export const implementationStateSchema = analysisStateSchema.extend({
-  branchName: z.string().optional().describe('Git branch name for changes'),
-  prTitle: z.string().optional().describe('Pull request title template'),
-});

package/templates/code-implementation/graph.js DELETED Viewed

@@ -1,35 +0,0 @@
-/**
- * Implementation Graph - Apply code changes AND create PR
- *
- * Flow:
- * 1. setup - Clone repos and init git baseline
- * 2. implement_code - Apply changes, commit, push to branch
- * 3. generate_tests - Generate test cases
- * 4. create_pr - Create GitHub Pull Request
- * 5. finalize - Upload results to API
- */
-import { setupNode } from '../code-analysis/nodes/setup-node.js';
-import { implementCodeNode } from '../code-analysis/nodes/generate-code-node.js';
-import { generateTestCasesNode } from '../code-analysis/nodes/generate-test-cases-node.js';
-import { createPRNode } from '../code-analysis/nodes/create-pr-node.js';
-import { finalizeNode } from '../code-analysis/nodes/finalize-node.js';
-import { implementationStateSchema } from './state.js';
-export function buildImplementationGraph(graph) {
-  graph.setStateSchema(implementationStateSchema);
-  graph
-    .addNode('setup', setupNode)                   // ← Reused from analysis!
-    .addNode('implement_code', implementCodeNode)  // ← Real changes + push
-    .addNode('generate_test_cases', generateTestCasesNode)  // ← Generate human-readable test specs
-    .addNode('create_pr', createPRNode)            // ← Create GitHub PR
-    .addNode('finalize', finalizeNode)             // ← Reused from analysis!
-    .setEntryPoint('setup')
-    .addEdge('setup', 'implement_code')
-    .addEdge('implement_code', 'generate_test_cases')
-    .addEdge('generate_test_cases', 'create_pr')
-    .addEdge('create_pr', 'finalize');
-  return graph;
-}

package/templates/code-implementation/index.js DELETED Viewed

@@ -1,7 +0,0 @@
-/**
- * Code Implementation Module
- * Workflow for applying code changes and creating PRs
- */
-export { implementationStateSchema } from './state.js';
-export { buildImplementationGraph } from './graph.js';

package/templates/code-implementation/state.js DELETED Viewed

@@ -1,14 +0,0 @@
-/**
- * Implementation Workflow State Schema
- */
-import { z } from 'zod';
-import { analysisStateSchema } from '../code-analysis/state.js';
-/**
- * Implementation workflow state schema (extends analysis)
- */
-export const implementationStateSchema = analysisStateSchema.extend({
-  branchName: z.string().optional().describe('Git branch name for changes'),
-  prTitle: z.string().optional().describe('Pull request title template'),
-});