npm - agentic-flow - Versions diffs - 1.5.2 → 1.5.4 - Mend

agentic-flow 1.5.2 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/CHANGELOG.md +27 -0
package/dist/reasoningbank/core/distill.js +5 -2
package/dist/reasoningbank/core/judge.js +6 -3
package/dist/reasoningbank/core/matts.js +5 -2
package/dist/reasoningbank/prompts/distill-failure.json +111 -0
package/dist/reasoningbank/prompts/distill-success.json +74 -0
package/dist/reasoningbank/prompts/judge.json +101 -0
package/dist/reasoningbank/prompts/matts-aggregate.json +119 -0
package/package.json +2 -2

package/CHANGELOG.md CHANGED Viewed

@@ -5,6 +5,33 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [1.5.4] - 2025-10-11
+### Fixed
+- **Critical:** Added prompts directory to npm package build
+  - Updated build script to copy `src/reasoningbank/prompts/` to `dist/reasoningbank/prompts/`
+  - Resolves "ENOENT" errors when loading prompt JSON files from installed package
+  - All ReasoningBank features now work correctly when installed via npm/npx
+### Technical Details
+- Build script now includes: `tsc -p config/tsconfig.json && cp -r src/reasoningbank/prompts dist/reasoningbank/`
+- Ensures judge.json, distill-success.json, distill-failure.json, and matts-aggregate.json are included in package
+## [1.5.3] - 2025-10-11
+### Fixed
+- **Critical:** Fixed path resolution for prompt template loading when running via npx
+  - Updated judge.ts, distill.ts, and matts.ts to use `__dirname` instead of `process.cwd()`
+  - Resolves "ENOENT: no such file or directory" errors when loading prompt JSON files
+  - Demo and all ReasoningBank CLI commands now work correctly when installed globally
+  - Files load correctly from npm package structure
+### Technical Details
+- Added proper ES module path resolution: `fileURLToPath(import.meta.url)` and `dirname()`
+- Changed prompt paths from `join(process.cwd(), 'src', 'reasoningbank', 'prompts', ...)`
+  to `join(__dirname, '../prompts', ...)`
+- Ensures prompts load from installed npm package location, not current working directory
 ## [1.5.2] - 2025-10-11
 ### Fixed

package/dist/reasoningbank/core/distill.js CHANGED Viewed

@@ -3,12 +3,15 @@
  * Algorithm 3 from ReasoningBank paper
  */
 import { readFileSync } from 'fs';
-import { join } from 'path';
+import { join, dirname } from 'path';
+import { fileURLToPath } from 'url';
 import { ulid } from 'ulid';
 import { loadConfig } from '../utils/config.js';
 import { scrubMemory } from '../utils/pii-scrubber.js';
 import { computeEmbedding } from '../utils/embeddings.js';
 import * as db from '../db/queries.js';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
 /**
  * Distill memories from a trajectory
  */
@@ -18,7 +21,7 @@ export async function distillMemories(trajectory, verdict, query, options = {})
     console.log(`[INFO] Distilling memories from ${verdict.label} trajectory`);
     // Select appropriate prompt template
     const templateName = verdict.label === 'Success' ? 'distill-success.json' : 'distill-failure.json';
-    const promptPath = join(process.cwd(), 'src', 'reasoningbank', 'prompts', templateName);
+    const promptPath = join(__dirname, '../prompts', templateName);
     const promptTemplate = JSON.parse(readFileSync(promptPath, 'utf-8'));
     const maxItems = verdict.label === 'Success'
         ? config.distill.max_items_success

package/dist/reasoningbank/core/judge.js CHANGED Viewed

@@ -3,8 +3,11 @@
  * Algorithm 2 from ReasoningBank paper
  */
 import { readFileSync } from 'fs';
-import { join } from 'path';
+import { join, dirname } from 'path';
+import { fileURLToPath } from 'url';
 import { loadConfig } from '../utils/config.js';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
 /**
  * Judge a task trajectory using LLM evaluation
  */
@@ -12,8 +15,8 @@ export async function judgeTrajectory(trajectory, query, options = {}) {
     const config = loadConfig();
     const startTime = Date.now();
     console.log(`[INFO] Judging trajectory for query: ${query.substring(0, 100)}...`);
-    // Load judge prompt template
-    const promptPath = join(process.cwd(), 'src', 'reasoningbank', 'prompts', 'judge.json');
+    // Load judge prompt template (relative to this file)
+    const promptPath = join(__dirname, '../prompts/judge.json');
     const promptTemplate = JSON.parse(readFileSync(promptPath, 'utf-8'));
     // Format trajectory for judgment
     const trajectoryText = formatTrajectory(trajectory);

package/dist/reasoningbank/core/matts.js CHANGED Viewed

@@ -7,13 +7,16 @@
  * - Sequential: r iterative refinements with check-and-correct
  */
 import { readFileSync } from 'fs';
-import { join } from 'path';
+import { join, dirname } from 'path';
+import { fileURLToPath } from 'url';
 import { ulid } from 'ulid';
 import { loadConfig } from '../utils/config.js';
 import { retrieveMemories } from './retrieve.js';
 import { judgeTrajectory } from './judge.js';
 import { distillMemories } from './distill.js';
 import * as db from '../db/queries.js';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
 /**
  * Run MaTTS in parallel mode
  * Execute k independent rollouts and aggregate via self-contrast
@@ -164,7 +167,7 @@ export async function mattsSequential(taskFn, query, options = {}) {
 async function aggregateMemories(trajectories, query, options) {
     console.log('[INFO] Aggregating memories via self-contrast');
     // Load aggregation prompt
-    const promptPath = join(process.cwd(), 'src', 'reasoningbank', 'prompts', 'matts-aggregate.json');
+    const promptPath = join(__dirname, '../prompts', 'matts-aggregate.json');
     const promptTemplate = JSON.parse(readFileSync(promptPath, 'utf-8'));
     // Format trajectories for comparison
     const trajectoryTexts = trajectories.map((t, i) => ({

package/dist/reasoningbank/prompts/distill-failure.json ADDED Viewed

@@ -0,0 +1,111 @@
+{
+  "name": "reasoning_bank_distill_failure",
+  "version": "1.0.0",
+  "description": "Extract failure guardrails and preventative patterns from failed trajectories. Creates counterfactual memories.",
+  "model": "claude-sonnet-4-5-20250929",
+  "temperature": 0.3,
+  "max_tokens": 2048,
+  "system": "You are a failure analysis specialist. Your role is to analyze failed task trajectories and extract guardrails, pitfalls, and recovery strategies. Focus on preventable errors and how to detect/avoid them.",
+  "template": "Given a task and its failed trajectory, extract up to {{max_items}} failure guardrail principles.\n\nTask: {{task_query}}\n\nFailed Trajectory:\n{{trajectory}}\n\nExtract guardrail items with this schema:\n- title: Brief title describing the failure mode (5-10 words)\n- description: One-sentence summary of the pitfall\n- content: 3-8 numbered steps covering detection, avoidance, and recovery\n\nGuidelines:\n1. Focus on WHY the failure occurred and HOW to prevent it\n2. Include early warning signs and detection criteria\n3. Specify checks to perform before risky operations\n4. Provide recovery steps if failure is detected\n5. Generalize the pattern (avoid task-specific details)\n\nRespond with JSON:\n{\n  \"memories\": [\n    {\n      \"title\": \"Guardrail title\",\n      \"description\": \"One-sentence summary of pitfall\",\n      \"content\": \"1) Detection: How to identify risk. 2) Prevention: Steps to avoid. 3) Recovery: What to do if encountered.\",\n      \"tags\": [\"failure\", \"tag1\", \"tag2\"],\n      \"domain\": \"optional domain hint\"\n    }\n  ]\n}",
+  "examples": [
+    {
+      "task": "Login to admin panel and extract user list",
+      "trajectory": {
+        "steps": [
+          {
+            "action": "navigate",
+            "url": "https://admin.example.com/login"
+          },
+          {
+            "action": "fill_form",
+            "fields": {
+              "username": "admin",
+              "password": "***"
+            },
+            "note": "Missing CSRF token"
+          },
+          {
+            "action": "submit",
+            "result": "403 Forbidden"
+          },
+          {
+            "action": "retry",
+            "result": "403 Forbidden"
+          }
+        ]
+      },
+      "expected_response": {
+        "memories": [
+          {
+            "title": "Avoid 403 errors by handling CSRF tokens",
+            "description": "Missing CSRF tokens cause repeated 403 Forbidden errors on POST requests.",
+            "content": "1) Detection: 403 error on form submission despite correct credentials. 2) Prevention: Always check for CSRF token requirements (meta tag, form input, or cookie) before POST. 3) Recovery: Refresh page to get new token and retry with token included. 4) Validation: Verify token is present in request before submitting.",
+            "tags": [
+              "failure",
+              "csrf",
+              "403",
+              "web"
+            ],
+            "domain": "webarena.admin"
+          },
+          {
+            "title": "Detect infinite retry loops and stop",
+            "description": "Retrying the same failing operation without changing approach causes loops.",
+            "content": "1) Detection: Same error occurs 2+ times with identical parameters. 2) Prevention: After first failure, analyze error response and adjust approach. 3) Recovery: Stop after 2 failures, log diagnostic info, and try alternative method or request human intervention. 4) Never retry more than 3 times without changing strategy.",
+            "tags": [
+              "failure",
+              "retry",
+              "loop"
+            ],
+            "domain": "general"
+          }
+        ]
+      }
+    },
+    {
+      "task": "Extract product prices from infinite scroll page",
+      "trajectory": {
+        "steps": [
+          {
+            "action": "scroll_down",
+            "new_items": 20
+          },
+          {
+            "action": "scroll_down",
+            "new_items": 20
+          },
+          {
+            "action": "scroll_down",
+            "new_items": 20
+          },
+          {
+            "note": "Repeated indefinitely, never reached end"
+          }
+        ]
+      },
+      "expected_response": {
+        "memories": [
+          {
+            "title": "Prevent infinite pagination loops",
+            "description": "Infinite scroll pages can cause endless loops if end condition is not detected.",
+            "content": "1) Detection: Track number of scrolls and items loaded. If scroll_count > 50 or no new items after 3 consecutive scrolls, likely at end. 2) Prevention: Set hard limit (e.g., max 100 scrolls) and monitor for repeated DOM states. 3) Recovery: Stop scrolling, summarize partial results, and report limited dataset. 4) Use sentinel values or page metadata when available.",
+            "tags": [
+              "failure",
+              "pagination",
+              "infinite-scroll",
+              "web"
+            ],
+            "domain": "webarena.shopping"
+          }
+        ]
+      }
+    }
+  ],
+  "notes": [
+    "Failure memories are equally valuable as success memories",
+    "Focus on root cause, not symptoms",
+    "Include both detection and recovery strategies",
+    "Tag with 'failure' to distinguish from success-derived memories",
+    "Lower confidence prior (0.60) reflects need for validation"
+  ]
+}

package/dist/reasoningbank/prompts/distill-success.json ADDED Viewed

@@ -0,0 +1,74 @@
+{
+  "name": "reasoning_bank_distill_success",
+  "version": "1.0.0",
+  "description": "Extract reusable strategy principles from successful trajectories. Creates title/description/content memories.",
+  "model": "claude-sonnet-4-5-20250929",
+  "temperature": 0.3,
+  "max_tokens": 2048,
+  "system": "You are a knowledge extraction specialist. Your role is to analyze successful task trajectories and extract reusable, generalizable strategy principles. Each principle should be concise, actionable, and avoid task-specific details like URLs, IDs, or PII.",
+  "template": "Given a task and its successful trajectory, extract up to {{max_items}} reusable strategy principles.\n\nTask: {{task_query}}\n\nTrajectory:\n{{trajectory}}\n\nExtract memory items with this schema:\n- title: Brief, descriptive title (5-10 words)\n- description: One-sentence summary of the strategy\n- content: 3-8 numbered steps with clear decision criteria and recovery actions\n\nGuidelines:\n1. Generalize beyond this specific task (avoid URLs, IDs, constants)\n2. Focus on transferable patterns and decision logic\n3. Include preconditions, main steps, and error recovery\n4. Use imperative voice (\"Load page\", \"Verify token\", etc.)\n5. Highlight critical checks and validation steps\n\nRespond with JSON:\n{\n  \"memories\": [\n    {\n      \"title\": \"Strategy title\",\n      \"description\": \"One-sentence summary\",\n      \"content\": \"1) Step one with decision criteria. 2) Step two with validation. 3) Recovery if failure.\",\n      \"tags\": [\"tag1\", \"tag2\"],\n      \"domain\": \"optional domain hint\"\n    }\n  ]\n}",
+  "examples": [
+    {
+      "task": "Login to admin panel with CSRF protection and extract user list",
+      "trajectory": {
+        "steps": [
+          {
+            "action": "navigate",
+            "url": "https://admin.example.com/login"
+          },
+          {
+            "action": "extract_csrf",
+            "selector": "meta[name=csrf-token]",
+            "value": "abc123"
+          },
+          {
+            "action": "fill_form",
+            "fields": {
+              "username": "admin",
+              "password": "***",
+              "csrf_token": "abc123"
+            }
+          },
+          {
+            "action": "submit_and_verify",
+            "success": true
+          }
+        ]
+      },
+      "expected_response": {
+        "memories": [
+          {
+            "title": "Handle login flows with CSRF tokens",
+            "description": "Always fetch and include CSRF token before POST to avoid 403 errors.",
+            "content": "1) Load login page and parse CSRF from form input, meta tag, or cookie. 2) Include token in POST request as form field or header. 3) If 403 or 419 error, refresh page and retry with new token. 4) Verify successful authentication before proceeding.",
+            "tags": [
+              "web",
+              "auth",
+              "csrf",
+              "security"
+            ],
+            "domain": "webarena.admin"
+          },
+          {
+            "title": "Verify authentication state before data extraction",
+            "description": "Check for authentication indicators before attempting protected operations.",
+            "content": "1) After login, verify presence of session cookie or auth token. 2) Check for redirect to dashboard or user-specific content. 3) Look for logout button or user menu as positive signal. 4) If still on login page or see auth error, retry login flow.",
+            "tags": [
+              "web",
+              "auth",
+              "verification"
+            ],
+            "domain": "webarena"
+          }
+        ]
+      }
+    }
+  ],
+  "notes": [
+    "Use temperature=0.3 for some creativity while maintaining structure",
+    "Aim for 1-3 memories per trajectory, not more unless truly distinct",
+    "Content should be 3-8 steps, not a paragraph",
+    "Tags help with retrieval filtering",
+    "Domain hints improve retrieval precision for specialized tasks"
+  ]
+}

package/dist/reasoningbank/prompts/judge.json ADDED Viewed

@@ -0,0 +1,101 @@
+{
+  "name": "reasoning_bank_judge",
+  "version": "1.0.0",
+  "description": "LLM-as-judge for trajectory evaluation. Returns Success or Failure with confidence score.",
+  "model": "claude-sonnet-4-5-20250929",
+  "temperature": 0,
+  "max_tokens": 512,
+  "system": "You are a strict evaluator for task completion. Your role is to judge whether a task trajectory achieved its goal based on the final state and outputs. Be conservative: only label Success if the acceptance criteria are clearly met. Respond with pure JSON.",
+  "template": "Task: {{task_query}}\n\nTrajectory:\n{{trajectory}}\n\nEvaluate if the final state meets the acceptance criteria for this task.\n\nConsider:\n1. Was the stated goal achieved?\n2. Are all required outputs present and correct?\n3. Did the trajectory avoid critical errors or incomplete steps?\n4. Does the final state satisfy implicit requirements (e.g., proper authentication, data consistency)?\n\nRespond with JSON:\n{\n  \"label\": \"Success\" or \"Failure\",\n  \"confidence\": 0.0 to 1.0,\n  \"reasons\": [\"reason 1\", \"reason 2\", ...]\n}",
+  "examples": [
+    {
+      "task": "Login to admin panel and extract user list",
+      "trajectory": {
+        "steps": [
+          {
+            "action": "navigate",
+            "url": "https://admin.example.com/login"
+          },
+          {
+            "action": "fill_form",
+            "fields": {
+              "username": "admin",
+              "password": "***"
+            }
+          },
+          {
+            "action": "click",
+            "selector": "button[type=submit]"
+          },
+          {
+            "action": "navigate",
+            "url": "https://admin.example.com/users"
+          },
+          {
+            "action": "extract",
+            "data": [
+              {
+                "id": 1,
+                "name": "Alice"
+              },
+              {
+                "id": 2,
+                "name": "Bob"
+              }
+            ]
+          }
+        ]
+      },
+      "expected_response": {
+        "label": "Success",
+        "confidence": 0.95,
+        "reasons": [
+          "Successfully authenticated as admin",
+          "Navigated to users page",
+          "Extracted user list with expected fields"
+        ]
+      }
+    },
+    {
+      "task": "Login to admin panel and extract user list",
+      "trajectory": {
+        "steps": [
+          {
+            "action": "navigate",
+            "url": "https://admin.example.com/login"
+          },
+          {
+            "action": "fill_form",
+            "fields": {
+              "username": "admin",
+              "password": "wrong"
+            }
+          },
+          {
+            "action": "click",
+            "selector": "button[type=submit]"
+          },
+          {
+            "action": "observe",
+            "content": "Invalid credentials"
+          }
+        ]
+      },
+      "expected_response": {
+        "label": "Failure",
+        "confidence": 0.98,
+        "reasons": [
+          "Authentication failed with invalid credentials",
+          "Did not reach users page",
+          "No user list extracted"
+        ]
+      }
+    }
+  ],
+  "notes": [
+    "Use temperature=0 for deterministic evaluation",
+    "Be conservative: prefer Failure when ambiguous",
+    "Confidence should reflect certainty of judgment based on available evidence",
+    "If trajectory is malformed or incomplete, return Failure with low confidence"
+  ]
+}

package/dist/reasoningbank/prompts/matts-aggregate.json ADDED Viewed

@@ -0,0 +1,119 @@
+{
+  "name": "reasoning_bank_matts_aggregate",
+  "version": "1.0.0",
+  "description": "Self-contrast aggregation for parallel MaTTS. Compares multiple trajectories to extract high-quality, generalizable memories.",
+  "model": "claude-sonnet-4-5-20250929",
+  "temperature": 0.2,
+  "max_tokens": 3072,
+  "system": "You are a meta-learning specialist analyzing multiple attempts at the same task. Your role is to identify patterns that distinguish successful approaches from failures, and extract robust, generalizable strategies.",
+  "template": "We have {{k}} independent trajectories for the same task. Compare and contrast them to extract high-quality memory items.\n\nTask: {{task_query}}\n\nTrajectories:\n{{trajectories}}\n\nAnalyze:\n1. Patterns present in most successful attempts but absent in failures\n2. Pitfalls present in failures but not in successes\n3. Critical decision points where trajectories diverged\n4. Common suboptimal approaches even in successes\n\nExtract 1-3 distilled memory items that:\n- Generalize across successful attempts\n- Avoid task-specific details (URLs, IDs, etc.)\n- Capture robust decision criteria\n- Include failure modes to avoid\n\nRespond with JSON:\n{\n  \"memories\": [\n    {\n      \"title\": \"Strategy title\",\n      \"description\": \"One-sentence summary\",\n      \"content\": \"1) Step with decision criteria. 2) Validation check. 3) Recovery if needed.\",\n      \"confidence_boost\": 0.0 to 0.2,\n      \"evidence\": [\"trajectory_id_1\", \"trajectory_id_2\"],\n      \"tags\": [\"tag1\", \"tag2\"]\n    }\n  ],\n  \"insights\": [\n    \"Key observation 1 from comparison\",\n    \"Key observation 2 from comparison\"\n  ]\n}",
+  "examples": [
+    {
+      "task": "Login to admin panel and extract user list",
+      "trajectories": [
+        {
+          "id": "traj_1",
+          "label": "Success",
+          "confidence": 0.95,
+          "steps": [
+            "Navigate to login",
+            "Extract CSRF token from meta tag",
+            "Fill form with token",
+            "Submit and verify redirect",
+            "Navigate to users page",
+            "Extract user list"
+          ]
+        },
+        {
+          "id": "traj_2",
+          "label": "Success",
+          "confidence": 0.92,
+          "steps": [
+            "Navigate to login",
+            "Extract CSRF token from hidden input",
+            "Fill form with token",
+            "Submit and check for auth cookie",
+            "Navigate to users page",
+            "Extract user list"
+          ]
+        },
+        {
+          "id": "traj_3",
+          "label": "Failure",
+          "confidence": 0.88,
+          "steps": [
+            "Navigate to login",
+            "Fill form without token",
+            "Submit",
+            "Receive 403 error",
+            "Retry without token",
+            "Fail again"
+          ]
+        },
+        {
+          "id": "traj_4",
+          "label": "Success",
+          "confidence": 0.90,
+          "steps": [
+            "Navigate to login",
+            "Extract CSRF from cookie",
+            "Fill form with token",
+            "Submit and wait for dashboard",
+            "Navigate to users",
+            "Extract list"
+          ]
+        }
+      ],
+      "expected_response": {
+        "memories": [
+          {
+            "title": "CSRF token extraction is critical for protected forms",
+            "description": "All successful attempts extracted and included CSRF token; failure did not.",
+            "content": "1) Before submitting protected forms, search for CSRF token in: meta tags (name=csrf-token), hidden form inputs (name=_token or csrf), or cookies (XSRF-TOKEN). 2) Include token in request as form field or X-CSRF-TOKEN header. 3) If 403/419 error, token is likely missing or stale—refresh page and retry. 4) Verify token extraction succeeded before submission.",
+            "confidence_boost": 0.15,
+            "evidence": [
+              "traj_1",
+              "traj_2",
+              "traj_4"
+            ],
+            "tags": [
+              "csrf",
+              "web",
+              "auth",
+              "critical"
+            ]
+          },
+          {
+            "title": "Multiple CSRF token locations require flexible parsing",
+            "description": "Successful attempts used different token sources (meta, input, cookie).",
+            "content": "1) CSRF tokens may appear in multiple locations: meta tags, hidden inputs, cookies, or response headers. 2) Try common locations in order: meta[name=csrf-token], input[name=_token], document.cookie XSRF-TOKEN. 3) If first location fails, check alternatives before giving up. 4) Cache token location for subsequent requests to same domain.",
+            "confidence_boost": 0.10,
+            "evidence": [
+              "traj_1",
+              "traj_2",
+              "traj_4"
+            ],
+            "tags": [
+              "csrf",
+              "parsing",
+              "flexibility"
+            ]
+          }
+        ],
+        "insights": [
+          "All successes extracted CSRF token before submission; failure did not",
+          "Token sources varied (meta, input, cookie) but all successes found it",
+          "Failure retried without changing approach, demonstrating need for error analysis",
+          "Verification step (redirect, cookie, dashboard) was present in all successes"
+        ]
+      }
+    }
+  ],
+  "notes": [
+    "Use temperature=0.2 for focused analysis with minimal creativity",
+    "Confidence boost (0-0.2) reflects strength of cross-trajectory evidence",
+    "Evidence array links memory to supporting trajectories",
+    "Insights provide debugging context for future analysis",
+    "Aim for 1-3 memories, not more—quality over quantity"
+  ]
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agentic-flow",
-  "version": "1.5.2",
+  "version": "1.5.4",
   "description": "Production-ready AI agent orchestration platform with 66 specialized agents, 213 MCP tools, ReasoningBank learning memory, and autonomous multi-agent swarms. Built by @ruvnet with Claude Agent SDK, neural networks, memory persistence, GitHub integration, and distributed consensus protocols.",
   "type": "module",
   "main": "dist/index.js",
@@ -9,7 +9,7 @@
   },
   "scripts": {
     "start": "node --enable-source-maps dist/index.js",
-    "build": "tsc -p config/tsconfig.json",
+    "build": "tsc -p config/tsconfig.json && cp -r src/reasoningbank/prompts dist/reasoningbank/",
     "dev": "tsx src/index.ts",
     "prepublishOnly": "npm run build",
     "test": "npm run test:retry && npm run test:logging",