ashr-labs 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/eval.d.ts CHANGED
@@ -1,8 +1,8 @@
1
1
  import { RunBuilder } from "./run-builder.js";
2
2
  import type { AshrLabsClient } from "./client.js";
3
3
  export interface Agent {
4
- respond(message: string): Record<string, unknown> | Promise<Record<string, unknown>>;
5
- reset(): void | Promise<void>;
4
+ respond(message: string, scenarioId?: string): Record<string, unknown> | Promise<Record<string, unknown>>;
5
+ reset(scenarioId?: string): void | Promise<void>;
6
6
  }
7
7
  export type OnScenarioCallback = (scenarioId: string, scenario: Record<string, unknown>) => void;
8
8
  export type OnActionCallback = (actionIndex: number, action: Record<string, unknown>) => void;
@@ -31,12 +31,12 @@ export declare class EvalRunner {
31
31
  };
32
32
  }): Promise<EvalRunner>;
33
33
  private _runScenario;
34
- run(agent: Agent, options?: {
34
+ run(agent: Agent | (() => Agent), options?: {
35
35
  onScenario?: OnScenarioCallback;
36
36
  onAction?: OnActionCallback;
37
37
  maxWorkers?: number;
38
38
  }): Promise<RunBuilder>;
39
- runAndDeploy(agent: Agent, client: AshrLabsClient, datasetId?: number, options?: {
39
+ runAndDeploy(agent: Agent | (() => Agent), client: AshrLabsClient, datasetId?: number, options?: {
40
40
  onScenario?: OnScenarioCallback;
41
41
  onAction?: OnActionCallback;
42
42
  maxWorkers?: number;
package/dist/eval.js CHANGED
@@ -22,7 +22,7 @@ export class EvalRunner {
22
22
  async _runScenario(agent, runId, scenario, onScenario, onAction) {
23
23
  if (onScenario)
24
24
  onScenario(runId, scenario);
25
- await agent.reset();
25
+ await agent.reset(runId);
26
26
  const test = new TestBuilder(runId);
27
27
  test.start();
28
28
  let agentText = "";
@@ -37,7 +37,7 @@ export class EvalRunner {
37
37
  if (actor === "user") {
38
38
  test.addUserText(content, action.name ?? `user_action_${i}`, i);
39
39
  try {
40
- const result = await agent.respond(content);
40
+ const result = await agent.respond(content, runId);
41
41
  agentText = (result.text ?? "");
42
42
  agentTools = [...(result.tool_calls ?? [])];
43
43
  }
@@ -106,36 +106,30 @@ export class EvalRunner {
106
106
  }
107
107
  }
108
108
  const maxWorkers = options?.maxWorkers ?? 1;
109
+ const resolvedAgent = typeof agent === "function" ? agent() : agent;
109
110
  if (maxWorkers <= 1) {
110
- // Sequential — use the agent directly
111
111
  for (const [runId, scenario] of scenarios) {
112
- const test = await this._runScenario(agent, runId, scenario, options?.onScenario, options?.onAction);
112
+ const test = await this._runScenario(resolvedAgent, runId, scenario, options?.onScenario, options?.onAction);
113
113
  run._tests.push(test);
114
114
  }
115
115
  }
116
116
  else {
117
117
  // Parallel — run scenarios concurrently with concurrency limit.
118
- // Each scenario needs its own agent instance since they maintain
119
- // conversation state. The caller must provide an agent that supports
120
- // structuredClone, or the agent's respond() must be stateless when
121
- // used with maxWorkers > 1.
118
+ // The agent must key its conversation state on the scenarioId
119
+ // passed to respond(message, scenarioId) and reset(scenarioId).
120
+ // This allows a single agent instance (one API client) to handle
121
+ // multiple concurrent scenarios without cloning or extra clients.
122
122
  const results = new Array(scenarios.length).fill(null);
123
- // Process in batches of maxWorkers
124
123
  for (let batchStart = 0; batchStart < scenarios.length; batchStart += maxWorkers) {
125
124
  const batchEnd = Math.min(batchStart + maxWorkers, scenarios.length);
126
125
  const batch = scenarios.slice(batchStart, batchEnd);
127
126
  const promises = batch.map(async ([runId, scenario], batchIdx) => {
128
127
  const idx = batchStart + batchIdx;
129
128
  try {
130
- // Each parallel scenario gets a deep-copied agent
131
- const agentCopy = structuredClone(agent);
132
- // Restore prototype methods lost by structuredClone
133
- Object.setPrototypeOf(agentCopy, Object.getPrototypeOf(agent));
134
- const test = await this._runScenario(agentCopy, runId, scenario, options?.onScenario, options?.onAction);
129
+ const test = await this._runScenario(resolvedAgent, runId, scenario, options?.onScenario, options?.onAction);
135
130
  results[idx] = test;
136
131
  }
137
132
  catch {
138
- // Scenario raised — record as a failed test
139
133
  const failed = new TestBuilder(runId);
140
134
  failed.start();
141
135
  failed.complete("failed");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ashr-labs",
3
- "version": "0.2.0",
3
+ "version": "0.3.0",
4
4
  "description": "TypeScript SDK for the Ashr Labs API",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -33,5 +33,9 @@
33
33
  },
34
34
  "engines": {
35
35
  "node": ">=18.0.0"
36
+ },
37
+ "dependencies": {
38
+ "@anthropic-ai/sdk": "^0.78.0",
39
+ "tsx": "^4.21.0"
36
40
  }
37
41
  }