npm - ashr-labs - Versions diffs - 0.2.0 → 0.3.0 - Mend

ashr-labs 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/eval.d.ts CHANGED Viewed

@@ -1,8 +1,8 @@
 import { RunBuilder } from "./run-builder.js";
 import type { AshrLabsClient } from "./client.js";
 export interface Agent {
-    respond(message: string): Record<string, unknown> | Promise<Record<string, unknown>>;
-    reset(): void | Promise<void>;
+    respond(message: string, scenarioId?: string): Record<string, unknown> | Promise<Record<string, unknown>>;
+    reset(scenarioId?: string): void | Promise<void>;
 }
 export type OnScenarioCallback = (scenarioId: string, scenario: Record<string, unknown>) => void;
 export type OnActionCallback = (actionIndex: number, action: Record<string, unknown>) => void;
@@ -31,12 +31,12 @@ export declare class EvalRunner {
         };
     }): Promise<EvalRunner>;
     private _runScenario;
-    run(agent: Agent, options?: {
+    run(agent: Agent | (() => Agent), options?: {
         onScenario?: OnScenarioCallback;
         onAction?: OnActionCallback;
         maxWorkers?: number;
     }): Promise<RunBuilder>;
-    runAndDeploy(agent: Agent, client: AshrLabsClient, datasetId?: number, options?: {
+    runAndDeploy(agent: Agent | (() => Agent), client: AshrLabsClient, datasetId?: number, options?: {
         onScenario?: OnScenarioCallback;
         onAction?: OnActionCallback;
         maxWorkers?: number;

package/dist/eval.js CHANGED Viewed

@@ -22,7 +22,7 @@ export class EvalRunner {
     async _runScenario(agent, runId, scenario, onScenario, onAction) {
         if (onScenario)
             onScenario(runId, scenario);
-        await agent.reset();
+        await agent.reset(runId);
         const test = new TestBuilder(runId);
         test.start();
         let agentText = "";
@@ -37,7 +37,7 @@ export class EvalRunner {
             if (actor === "user") {
                 test.addUserText(content, action.name ?? `user_action_${i}`, i);
                 try {
-                    const result = await agent.respond(content);
+                    const result = await agent.respond(content, runId);
                     agentText = (result.text ?? "");
                     agentTools = [...(result.tool_calls ?? [])];
                 }
@@ -106,36 +106,30 @@ export class EvalRunner {
             }
         }
         const maxWorkers = options?.maxWorkers ?? 1;
+        const resolvedAgent = typeof agent === "function" ? agent() : agent;
         if (maxWorkers <= 1) {
-            // Sequential — use the agent directly
             for (const [runId, scenario] of scenarios) {
-                const test = await this._runScenario(agent, runId, scenario, options?.onScenario, options?.onAction);
+                const test = await this._runScenario(resolvedAgent, runId, scenario, options?.onScenario, options?.onAction);
                 run._tests.push(test);
             }
         }
         else {
             // Parallel — run scenarios concurrently with concurrency limit.
-            // Each scenario needs its own agent instance since they maintain
-            // conversation state. The caller must provide an agent that supports
-            // structuredClone, or the agent's respond() must be stateless when
-            // used with maxWorkers > 1.
+            // The agent must key its conversation state on the scenarioId
+            // passed to respond(message, scenarioId) and reset(scenarioId).
+            // This allows a single agent instance (one API client) to handle
+            // multiple concurrent scenarios without cloning or extra clients.
             const results = new Array(scenarios.length).fill(null);
-            // Process in batches of maxWorkers
             for (let batchStart = 0; batchStart < scenarios.length; batchStart += maxWorkers) {
                 const batchEnd = Math.min(batchStart + maxWorkers, scenarios.length);
                 const batch = scenarios.slice(batchStart, batchEnd);
                 const promises = batch.map(async ([runId, scenario], batchIdx) => {
                     const idx = batchStart + batchIdx;
                     try {
-                        // Each parallel scenario gets a deep-copied agent
-                        const agentCopy = structuredClone(agent);
-                        // Restore prototype methods lost by structuredClone
-                        Object.setPrototypeOf(agentCopy, Object.getPrototypeOf(agent));
-                        const test = await this._runScenario(agentCopy, runId, scenario, options?.onScenario, options?.onAction);
+                        const test = await this._runScenario(resolvedAgent, runId, scenario, options?.onScenario, options?.onAction);
                         results[idx] = test;
                     }
                     catch {
-                        // Scenario raised — record as a failed test
                         const failed = new TestBuilder(runId);
                         failed.start();
                         failed.complete("failed");

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ashr-labs",
-  "version": "0.2.0",
+  "version": "0.3.0",
   "description": "TypeScript SDK for the Ashr Labs API",
   "type": "module",
   "main": "./dist/index.js",
@@ -33,5 +33,9 @@
   },
   "engines": {
     "node": ">=18.0.0"
+  },
+  "dependencies": {
+    "@anthropic-ai/sdk": "^0.78.0",
+    "tsx": "^4.21.0"
   }
 }