ashr-labs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ export declare class AshrLabsClient {
2
+ readonly apiKey: string;
3
+ readonly baseUrl: string;
4
+ readonly timeout: number;
5
+ private _tenantId;
6
+ private _userId;
7
+ private _session;
8
+ constructor(apiKey: string, baseUrl?: string, timeout?: number);
9
+ static fromEnv(timeout?: number): AshrLabsClient;
10
+ private _ensureSession;
11
+ private _resolveTenantId;
12
+ private _resolveUserId;
13
+ private _makeRequest;
14
+ private _raiseForStatus;
15
+ getDataset(datasetId: number, includeSignedUrls?: boolean, urlExpiresSeconds?: number): Promise<Record<string, unknown>>;
16
+ listDatasets(tenantId?: number | null, limit?: number, offset?: number, includeSignedUrls?: boolean, urlExpiresSeconds?: number): Promise<Record<string, unknown>>;
17
+ createRun(datasetId: number, result: Record<string, unknown>, tenantId?: number | null, runnerId?: number | null): Promise<Record<string, unknown>>;
18
+ deleteRun(runId: number): Promise<Record<string, unknown>>;
19
+ getRun(runId: number): Promise<Record<string, unknown>>;
20
+ listRuns(datasetId?: number | null, tenantId?: number | null, limit?: number, offset?: number): Promise<Record<string, unknown>>;
21
+ createRequest(requestName: string, request: Record<string, unknown>, requestInputSchema?: Record<string, unknown> | null, tenantId?: number | null, requestorId?: number | null): Promise<Record<string, unknown>>;
22
+ getRequest(requestId: number): Promise<Record<string, unknown>>;
23
+ listRequests(tenantId?: number | null, status?: string | null, limit?: number, offset?: number): Promise<Record<string, unknown>>;
24
+ listApiKeys(includeInactive?: boolean): Promise<Record<string, unknown>[]>;
25
+ revokeApiKey(apiKeyId: number): Promise<Record<string, unknown>>;
26
+ init(): Promise<Record<string, unknown>>;
27
+ healthCheck(): Promise<Record<string, unknown>>;
28
+ waitForRequest(requestId: number, timeout?: number, pollInterval?: number): Promise<Record<string, unknown>>;
29
+ generateDataset(requestName: string, config: Record<string, unknown>, requestInputSchema?: Record<string, unknown> | null, timeout?: number, pollInterval?: number): Promise<[number, Record<string, unknown>]>;
30
+ toString(): string;
31
+ }
package/dist/client.js ADDED
@@ -0,0 +1,251 @@
1
+ import { AshrLabsError, AuthenticationError, AuthorizationError, NotFoundError, ValidationError, RateLimitError, ServerError, } from "./exceptions.js";
2
+ const DEFAULT_BASE_URL = "https://api.ashr.io/testing-platform-api";
3
+ export class AshrLabsClient {
4
+ apiKey;
5
+ baseUrl;
6
+ timeout;
7
+ _tenantId = null;
8
+ _userId = null;
9
+ _session = null;
10
+ constructor(apiKey, baseUrl = DEFAULT_BASE_URL, timeout = 30) {
11
+ if (!apiKey || !apiKey.startsWith("tp_")) {
12
+ throw new Error("Invalid API key format. API keys must start with 'tp_'");
13
+ }
14
+ this.apiKey = apiKey;
15
+ this.baseUrl = baseUrl.replace(/\/+$/, "");
16
+ this.timeout = timeout;
17
+ }
18
+ static fromEnv(timeout = 30) {
19
+ const apiKey = process.env.ASHR_LABS_API_KEY;
20
+ if (!apiKey) {
21
+ throw new Error("ASHR_LABS_API_KEY environment variable is not set. " +
22
+ "Create an API key at https://app.ashr.io → API Keys.");
23
+ }
24
+ const baseUrl = process.env.ASHR_LABS_BASE_URL ?? DEFAULT_BASE_URL;
25
+ return new AshrLabsClient(apiKey, baseUrl, timeout);
26
+ }
27
+ async _ensureSession() {
28
+ if (this._session !== null)
29
+ return;
30
+ this._session = await this._makeRequest("init");
31
+ const tenant = this._session.tenant;
32
+ const user = this._session.user;
33
+ this._tenantId = tenant.id;
34
+ this._userId = user.id;
35
+ }
36
+ async _resolveTenantId(tenantId) {
37
+ if (tenantId != null)
38
+ return tenantId;
39
+ await this._ensureSession();
40
+ return this._tenantId;
41
+ }
42
+ async _resolveUserId(userId) {
43
+ if (userId != null)
44
+ return userId;
45
+ await this._ensureSession();
46
+ return this._userId;
47
+ }
48
+ async _makeRequest(func, params = {}) {
49
+ const payload = { function: func, ...params };
50
+ const controller = new AbortController();
51
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout * 1000);
52
+ let response;
53
+ try {
54
+ response = await fetch(this.baseUrl, {
55
+ method: "POST",
56
+ headers: {
57
+ "Content-Type": "application/json",
58
+ Authorization: `Bearer ${this.apiKey}`,
59
+ },
60
+ body: JSON.stringify(payload),
61
+ signal: controller.signal,
62
+ });
63
+ }
64
+ catch (err) {
65
+ clearTimeout(timeoutId);
66
+ if (err instanceof DOMException && err.name === "AbortError") {
67
+ throw new AshrLabsError("Request timed out");
68
+ }
69
+ throw new AshrLabsError(`Network error: ${err instanceof Error ? err.message : String(err)}`);
70
+ }
71
+ finally {
72
+ clearTimeout(timeoutId);
73
+ }
74
+ let responseData;
75
+ try {
76
+ responseData = (await response.json());
77
+ }
78
+ catch {
79
+ throw new AshrLabsError(`Invalid JSON response (status ${response.status})`);
80
+ }
81
+ if (!response.ok) {
82
+ const message = responseData.message ?? `HTTP ${response.status}`;
83
+ this._raiseForStatus(response.status, message, responseData);
84
+ }
85
+ if (responseData.status === "error") {
86
+ const message = responseData.message ?? "Unknown error";
87
+ this._raiseForStatus(400, message, responseData);
88
+ }
89
+ return responseData;
90
+ }
91
+ _raiseForStatus(statusCode, message, response) {
92
+ if (statusCode === 401)
93
+ throw new AuthenticationError(message, statusCode, response ?? null);
94
+ if (statusCode === 403)
95
+ throw new AuthorizationError(message, statusCode, response ?? null);
96
+ if (statusCode === 404)
97
+ throw new NotFoundError(message, statusCode, response ?? null);
98
+ if (statusCode === 422)
99
+ throw new ValidationError(message, statusCode, response ?? null);
100
+ if (statusCode === 429)
101
+ throw new RateLimitError(message, statusCode, response ?? null);
102
+ if (statusCode >= 500)
103
+ throw new ServerError(message, statusCode, response ?? null);
104
+ throw new AshrLabsError(message, statusCode, response ?? null);
105
+ }
106
+ // =========================================================================
107
+ // Dataset Operations
108
+ // =========================================================================
109
+ async getDataset(datasetId, includeSignedUrls = false, urlExpiresSeconds = 3600) {
110
+ const response = await this._makeRequest("get_dataset", {
111
+ dataset_id: datasetId,
112
+ include_signed_urls: includeSignedUrls,
113
+ url_expires_seconds: urlExpiresSeconds,
114
+ });
115
+ return response.dataset;
116
+ }
117
+ async listDatasets(tenantId, limit = 50, offset = 0, includeSignedUrls = false, urlExpiresSeconds = 3600) {
118
+ return this._makeRequest("list_datasets", {
119
+ tenant_id: await this._resolveTenantId(tenantId),
120
+ limit,
121
+ offset,
122
+ include_signed_urls: includeSignedUrls,
123
+ url_expires_seconds: urlExpiresSeconds,
124
+ });
125
+ }
126
+ // =========================================================================
127
+ // Run Operations
128
+ // =========================================================================
129
+ async createRun(datasetId, result, tenantId, runnerId) {
130
+ const params = {
131
+ tenant_id: await this._resolveTenantId(tenantId),
132
+ dataset_id: datasetId,
133
+ result,
134
+ };
135
+ if (runnerId != null)
136
+ params.runner_id = runnerId;
137
+ const response = await this._makeRequest("create_run", params);
138
+ return response.run;
139
+ }
140
+ async deleteRun(runId) {
141
+ return this._makeRequest("delete_run", { run_id: runId });
142
+ }
143
+ async getRun(runId) {
144
+ const response = await this._makeRequest("get_run", { run_id: runId });
145
+ return response.run;
146
+ }
147
+ async listRuns(datasetId, tenantId, limit = 50, offset = 0) {
148
+ const params = {
149
+ tenant_id: await this._resolveTenantId(tenantId),
150
+ limit,
151
+ offset,
152
+ };
153
+ if (datasetId != null)
154
+ params.dataset_id = datasetId;
155
+ return this._makeRequest("list_runs", params);
156
+ }
157
+ // =========================================================================
158
+ // Request Operations
159
+ // =========================================================================
160
+ async createRequest(requestName, request, requestInputSchema, tenantId, requestorId) {
161
+ if (requestInputSchema == null) {
162
+ requestInputSchema = { type: "object", properties: {} };
163
+ const agent = request.agent;
164
+ const tools = agent?.tools;
165
+ if (tools) {
166
+ requestInputSchema.tools = tools;
167
+ }
168
+ }
169
+ const params = {
170
+ tenant_id: await this._resolveTenantId(tenantId),
171
+ requestor_id: await this._resolveUserId(requestorId),
172
+ request_name: requestName,
173
+ request,
174
+ request_input_schema: requestInputSchema,
175
+ };
176
+ const response = await this._makeRequest("create_request", params);
177
+ return response.request;
178
+ }
179
+ async getRequest(requestId) {
180
+ const response = await this._makeRequest("get_request", {
181
+ request_id: requestId,
182
+ });
183
+ return response.request;
184
+ }
185
+ async listRequests(tenantId, status, limit = 50, offset = 0) {
186
+ const params = {
187
+ tenant_id: await this._resolveTenantId(tenantId),
188
+ limit,
189
+ offset,
190
+ };
191
+ if (status != null)
192
+ params.status = status;
193
+ return this._makeRequest("list_requests", params);
194
+ }
195
+ // =========================================================================
196
+ // API Key Operations
197
+ // =========================================================================
198
+ async listApiKeys(includeInactive = false) {
199
+ const response = await this._makeRequest("list_api_keys", {
200
+ include_inactive: includeInactive,
201
+ });
202
+ return response.api_keys;
203
+ }
204
+ async revokeApiKey(apiKeyId) {
205
+ return this._makeRequest("revoke_api_key", { api_key_id: apiKeyId });
206
+ }
207
+ // =========================================================================
208
+ // Session Operations
209
+ // =========================================================================
210
+ async init() {
211
+ return this._makeRequest("init");
212
+ }
213
+ async healthCheck() {
214
+ return this._makeRequest("keep_alive");
215
+ }
216
+ // =========================================================================
217
+ // Convenience / Workflow Methods
218
+ // =========================================================================
219
+ async waitForRequest(requestId, timeout = 600, pollInterval = 5) {
220
+ const start = Date.now();
221
+ while ((Date.now() - start) / 1000 < timeout) {
222
+ const req = await this.getRequest(requestId);
223
+ const reqStatus = req.request_status;
224
+ if (reqStatus === "completed")
225
+ return req;
226
+ if (reqStatus === "failed") {
227
+ const errorMsg = req.error ?? "Request failed";
228
+ throw new AshrLabsError(`Request ${requestId} failed: ${errorMsg}`);
229
+ }
230
+ await new Promise((r) => setTimeout(r, pollInterval * 1000));
231
+ }
232
+ throw new Error(`Request ${requestId} did not complete within ${timeout}s`);
233
+ }
234
+ async generateDataset(requestName, config, requestInputSchema, timeout = 600, pollInterval = 5) {
235
+ const req = await this.createRequest(requestName, config, requestInputSchema);
236
+ const requestId = req.id;
237
+ await this.waitForRequest(requestId, timeout, pollInterval);
238
+ const resp = await this.listDatasets(undefined, 1);
239
+ const datasets = resp.datasets;
240
+ if (!datasets || datasets.length === 0) {
241
+ throw new AshrLabsError("No datasets found after generation completed");
242
+ }
243
+ const datasetId = datasets[0].id;
244
+ const fullDs = await this.getDataset(datasetId, false);
245
+ const source = (fullDs.dataset_source ?? {});
246
+ return [datasetId, source];
247
+ }
248
+ toString() {
249
+ return `AshrLabsClient(baseUrl='${this.baseUrl}', apiKey='${this.apiKey.slice(0, 8)}...')`;
250
+ }
251
+ }
@@ -0,0 +1,6 @@
1
+ export declare function stripMarkdown(text: string): string;
2
+ export declare function tokenize(text: string): string[];
3
+ export declare function fuzzyStrMatch(a: string, b: string, threshold?: number): boolean;
4
+ export declare function extractToolArgs(toolCall: Record<string, unknown>): Record<string, unknown>;
5
+ export declare function compareToolArgs(expected: Record<string, unknown>, actual: Record<string, unknown>): [string, string | null];
6
+ export declare function textSimilarity(textA: string, textB: string): number;
@@ -0,0 +1,212 @@
1
+ export function stripMarkdown(text) {
2
+ // Remove bold/italic markers
3
+ text = text.replace(/\*{1,3}([^*]+)\*{1,3}/g, "$1");
4
+ // Remove markdown headers
5
+ text = text.replace(/^#{1,6}\s+/gm, "");
6
+ // Remove bullet points
7
+ text = text.replace(/^[\s]*[-*+]\s+/gm, "");
8
+ // Remove markdown links [text](url) -> text
9
+ text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1");
10
+ // Collapse multiple whitespace
11
+ text = text.replace(/\s+/g, " ").trim();
12
+ return text;
13
+ }
14
+ export function tokenize(text) {
15
+ text = stripMarkdown(text);
16
+ text = text.toLowerCase();
17
+ // Strip punctuation
18
+ text = text.replace(/[^\w\s]/g, "");
19
+ const tokens = text.split(/\s+/);
20
+ return tokens.filter((t) => t.length > 0);
21
+ }
22
+ export function fuzzyStrMatch(a, b, threshold) {
23
+ const na = a
24
+ .toLowerCase()
25
+ .replace(/[^\w\s]/g, "")
26
+ .trim();
27
+ const nb = b
28
+ .toLowerCase()
29
+ .replace(/[^\w\s]/g, "")
30
+ .trim();
31
+ if (na === nb)
32
+ return true;
33
+ if (na.includes(nb) || nb.includes(na))
34
+ return true;
35
+ const wa = new Set(na.split(/\s+/).filter((w) => w.length > 0));
36
+ const wb = new Set(nb.split(/\s+/).filter((w) => w.length > 0));
37
+ if (wa.size === 0 || wb.size === 0)
38
+ return false;
39
+ let overlapCount = 0;
40
+ for (const w of wa) {
41
+ if (wb.has(w))
42
+ overlapCount++;
43
+ }
44
+ const overlap = overlapCount / Math.max(wa.size, wb.size);
45
+ if (threshold === undefined) {
46
+ const minLen = Math.min(wa.size, wb.size);
47
+ threshold = minLen <= 5 ? 0.35 : minLen <= 8 ? 0.4 : 0.55;
48
+ }
49
+ return overlap >= threshold;
50
+ }
51
+ export function extractToolArgs(toolCall) {
52
+ if (typeof toolCall.arguments === "object" &&
53
+ toolCall.arguments !== null &&
54
+ !Array.isArray(toolCall.arguments)) {
55
+ return toolCall.arguments;
56
+ }
57
+ const raw = toolCall.arguments_json ?? "{}";
58
+ if (typeof raw === "string") {
59
+ try {
60
+ return JSON.parse(raw);
61
+ }
62
+ catch {
63
+ return {};
64
+ }
65
+ }
66
+ return raw || {};
67
+ }
68
+ export function compareToolArgs(expected, actual) {
69
+ const expArgs = extractToolArgs(expected);
70
+ const actArgs = extractToolArgs(actual);
71
+ if (Object.keys(expArgs).length === 0 && Object.keys(actArgs).length === 0) {
72
+ return ["exact", null];
73
+ }
74
+ let allMatch = true;
75
+ let anyMatch = false;
76
+ for (const key of Object.keys(expArgs)) {
77
+ const expVal = expArgs[key];
78
+ const actVal = actArgs[key];
79
+ if (actVal === undefined || actVal === null) {
80
+ allMatch = false;
81
+ continue;
82
+ }
83
+ if (typeof expVal === "string" && typeof actVal === "string") {
84
+ if (fuzzyStrMatch(expVal, actVal)) {
85
+ anyMatch = true;
86
+ }
87
+ else {
88
+ allMatch = false;
89
+ }
90
+ }
91
+ else {
92
+ if (JSON.stringify(expVal) === JSON.stringify(actVal)) {
93
+ anyMatch = true;
94
+ }
95
+ else {
96
+ allMatch = false;
97
+ }
98
+ }
99
+ }
100
+ let status;
101
+ if (allMatch && Object.keys(expArgs).length > 0) {
102
+ status = "exact";
103
+ }
104
+ else if (anyMatch) {
105
+ status = "partial";
106
+ }
107
+ else if (Object.keys(expArgs).length > 0) {
108
+ status = "mismatch";
109
+ }
110
+ else {
111
+ status = "exact";
112
+ }
113
+ const notes = status !== "exact" ? argDiffNotes(expArgs, actArgs) : null;
114
+ return [status, notes];
115
+ }
116
+ export function textSimilarity(textA, textB) {
117
+ const tokensA = tokenize(textA);
118
+ const tokensB = tokenize(textB);
119
+ if (tokensA.length === 0 || tokensB.length === 0)
120
+ return 0.0;
121
+ const counterA = new Map();
122
+ for (const t of tokensA)
123
+ counterA.set(t, (counterA.get(t) ?? 0) + 1);
124
+ const counterB = new Map();
125
+ for (const t of tokensB)
126
+ counterB.set(t, (counterB.get(t) ?? 0) + 1);
127
+ const allWords = new Set([...counterA.keys(), ...counterB.keys()]);
128
+ let dot = 0;
129
+ for (const w of allWords) {
130
+ dot += (counterA.get(w) ?? 0) * (counterB.get(w) ?? 0);
131
+ }
132
+ let magA = 0;
133
+ for (const v of counterA.values())
134
+ magA += v * v;
135
+ magA = Math.sqrt(magA);
136
+ let magB = 0;
137
+ for (const v of counterB.values())
138
+ magB += v * v;
139
+ magB = Math.sqrt(magB);
140
+ let cosine = magA > 0 && magB > 0 ? dot / (magA * magB) : 0.0;
141
+ // Boost 1: Entity overlap
142
+ const entityPattern = /ORD-\d+|REF-\d+|\$[\d.]+|\d{4}-\d{2}-\d{2}|track\S+\.\w+\/\S+/gi;
143
+ const entitiesA = new Set((textA.toUpperCase().match(entityPattern) ?? []).map((e) => e.toUpperCase()));
144
+ const entitiesB = new Set((textB.toUpperCase().match(entityPattern) ?? []).map((e) => e.toUpperCase()));
145
+ if (entitiesA.size > 0 && entitiesB.size > 0) {
146
+ let entityOverlapCount = 0;
147
+ for (const e of entitiesA) {
148
+ if (entitiesB.has(e))
149
+ entityOverlapCount++;
150
+ }
151
+ const entityOverlap = entityOverlapCount / Math.max(entitiesA.size, entitiesB.size);
152
+ cosine = Math.min(1.0, cosine + entityOverlap * 0.2);
153
+ }
154
+ // Boost 2: Domain concept overlap
155
+ const concepts = [
156
+ new Set(["refund", "refunded", "credited"]),
157
+ new Set(["shipped", "shipping", "transit", "delivered", "delivery"]),
158
+ new Set(["tracking", "track"]),
159
+ new Set(["stock", "available", "availability", "inventory"]),
160
+ new Set(["processing", "processed"]),
161
+ new Set(["pickup", "store"]),
162
+ new Set(["manual", "review", "escalat"]),
163
+ new Set(["damaged", "defective", "cracked", "broken"]),
164
+ ];
165
+ const la = textA.toLowerCase();
166
+ const lb = textB.toLowerCase();
167
+ let conceptMatches = 0;
168
+ let conceptTotal = 0;
169
+ for (const conceptSet of concepts) {
170
+ let aHas = false;
171
+ let bHas = false;
172
+ for (const c of conceptSet) {
173
+ if (la.includes(c))
174
+ aHas = true;
175
+ if (lb.includes(c))
176
+ bHas = true;
177
+ }
178
+ if (aHas || bHas) {
179
+ conceptTotal++;
180
+ if (aHas && bHas)
181
+ conceptMatches++;
182
+ }
183
+ }
184
+ if (conceptTotal > 0) {
185
+ const conceptOverlap = conceptMatches / conceptTotal;
186
+ cosine = Math.min(1.0, cosine + conceptOverlap * 0.1);
187
+ }
188
+ return Math.round(cosine * 100) / 100;
189
+ }
190
+ function argDiffNotes(expArgs, actArgs) {
191
+ const diffs = [];
192
+ const allKeys = new Set([...Object.keys(expArgs), ...Object.keys(actArgs)]);
193
+ for (const key of allKeys) {
194
+ const ev = expArgs[key];
195
+ const av = actArgs[key];
196
+ if (ev === undefined) {
197
+ diffs.push(`extra arg '${key}'=${av}`);
198
+ }
199
+ else if (av === undefined) {
200
+ diffs.push(`missing arg '${key}'`);
201
+ }
202
+ else if (typeof ev === "string" && typeof av === "string") {
203
+ if (!fuzzyStrMatch(ev, av)) {
204
+ diffs.push(`'${key}': expected='${ev}' actual='${av}'`);
205
+ }
206
+ }
207
+ else if (JSON.stringify(ev) !== JSON.stringify(av)) {
208
+ diffs.push(`'${key}': expected=${ev} actual=${av}`);
209
+ }
210
+ }
211
+ return diffs.length > 0 ? diffs.join("; ") : null;
212
+ }
package/dist/eval.d.ts ADDED
@@ -0,0 +1,42 @@
1
+ import { RunBuilder } from "./run-builder.js";
2
+ import type { AshrLabsClient } from "./client.js";
3
+ export interface Agent {
4
+ respond(message: string): Record<string, unknown> | Promise<Record<string, unknown>>;
5
+ reset(): void | Promise<void>;
6
+ }
7
+ export type OnScenarioCallback = (scenarioId: string, scenario: Record<string, unknown>) => void;
8
+ export type OnActionCallback = (actionIndex: number, action: Record<string, unknown>) => void;
9
+ export type ToolComparator = (expected: Record<string, unknown>, actual: Record<string, unknown>) => [string, string | null];
10
+ export type TextComparator = (a: string, b: string) => number;
11
+ export declare class EvalRunner {
12
+ private _source;
13
+ private _toolComparator;
14
+ private _textComparator;
15
+ private _exactThreshold;
16
+ private _similarThreshold;
17
+ constructor(datasetSource: Record<string, unknown>, options?: {
18
+ toolComparator?: ToolComparator;
19
+ textComparator?: TextComparator;
20
+ similarityThresholds?: {
21
+ exact?: number;
22
+ similar?: number;
23
+ };
24
+ });
25
+ static fromDataset(client: AshrLabsClient, datasetId: number, options?: {
26
+ toolComparator?: ToolComparator;
27
+ textComparator?: TextComparator;
28
+ similarityThresholds?: {
29
+ exact?: number;
30
+ similar?: number;
31
+ };
32
+ }): Promise<EvalRunner>;
33
+ private _runScenario;
34
+ run(agent: Agent, options?: {
35
+ onScenario?: OnScenarioCallback;
36
+ onAction?: OnActionCallback;
37
+ }): Promise<RunBuilder>;
38
+ runAndDeploy(agent: Agent, client: AshrLabsClient, datasetId: number, options?: {
39
+ onScenario?: OnScenarioCallback;
40
+ onAction?: OnActionCallback;
41
+ }): Promise<Record<string, unknown>>;
42
+ }
package/dist/eval.js ADDED
@@ -0,0 +1,119 @@
1
+ import { compareToolArgs, extractToolArgs, textSimilarity, } from "./comparators.js";
2
+ import { RunBuilder, TestBuilder } from "./run-builder.js";
3
+ export class EvalRunner {
4
+ _source;
5
+ _toolComparator;
6
+ _textComparator;
7
+ _exactThreshold;
8
+ _similarThreshold;
9
+ constructor(datasetSource, options) {
10
+ this._source = datasetSource;
11
+ this._toolComparator = options?.toolComparator ?? compareToolArgs;
12
+ this._textComparator = options?.textComparator ?? textSimilarity;
13
+ const thresholds = options?.similarityThresholds ?? {};
14
+ this._exactThreshold = thresholds.exact ?? 0.7;
15
+ this._similarThreshold = thresholds.similar ?? 0.4;
16
+ }
17
+ static async fromDataset(client, datasetId, options) {
18
+ const ds = await client.getDataset(datasetId, false);
19
+ const source = (ds.dataset_source ?? {});
20
+ return new EvalRunner(source, options);
21
+ }
22
+ async _runScenario(agent, runId, scenario, onScenario, onAction) {
23
+ if (onScenario)
24
+ onScenario(runId, scenario);
25
+ await agent.reset();
26
+ const test = new TestBuilder(runId);
27
+ test.start();
28
+ let agentText = "";
29
+ let agentTools = [];
30
+ const actions = (scenario.actions ?? []);
31
+ for (let i = 0; i < actions.length; i++) {
32
+ const action = actions[i];
33
+ if (onAction)
34
+ onAction(i, action);
35
+ const actor = action.actor;
36
+ const content = (action.content ?? "");
37
+ if (actor === "user") {
38
+ test.addUserText(content, action.name ?? `user_action_${i}`, i);
39
+ try {
40
+ const result = await agent.respond(content);
41
+ agentText = (result.text ?? "");
42
+ agentTools = [...(result.tool_calls ?? [])];
43
+ }
44
+ catch (e) {
45
+ agentText = `[error: ${e}]`;
46
+ agentTools = [];
47
+ }
48
+ }
49
+ else if (actor === "agent") {
50
+ const expected = (action.expected_response ?? {});
51
+ const expectedTools = (expected.tool_calls ?? []);
52
+ const expectedText = content;
53
+ for (const expTc of expectedTools) {
54
+ const expName = (expTc.name ?? "");
55
+ let matched = null;
56
+ let matchedIdx = -1;
57
+ for (let ti = 0; ti < agentTools.length; ti++) {
58
+ if (agentTools[ti].name === expName) {
59
+ matched = agentTools[ti];
60
+ matchedIdx = ti;
61
+ break;
62
+ }
63
+ }
64
+ if (matched !== null) {
65
+ const [status, notes] = this._toolComparator(expTc, matched);
66
+ const actualArgs = extractToolArgs(matched);
67
+ test.addToolCall(expTc, {
68
+ name: (matched.name ?? ""),
69
+ arguments_json: JSON.stringify(actualArgs),
70
+ }, status, notes, i);
71
+ agentTools.splice(matchedIdx, 1);
72
+ }
73
+ else {
74
+ test.addToolCall(expTc, { name: "NOT_CALLED", arguments_json: "{}" }, "mismatch", `Agent never called ${expName}`, i);
75
+ }
76
+ }
77
+ if (expectedText && agentText) {
78
+ const similarity = this._textComparator(expectedText, agentText);
79
+ let matchStatus;
80
+ if (similarity > this._exactThreshold) {
81
+ matchStatus = "exact";
82
+ }
83
+ else if (similarity > this._similarThreshold) {
84
+ matchStatus = "similar";
85
+ }
86
+ else {
87
+ matchStatus = "divergent";
88
+ }
89
+ test.addAgentResponse({ text: expectedText }, { text: agentText }, matchStatus, similarity, null, i);
90
+ }
91
+ agentText = "";
92
+ }
93
+ }
94
+ test.complete();
95
+ return test;
96
+ }
97
+ async run(agent, options) {
98
+ const runsData = (this._source.runs ?? {});
99
+ const run = new RunBuilder();
100
+ run.start();
101
+ const scenarios = [];
102
+ for (const [runId, scenario] of Object.entries(runsData)) {
103
+ const actions = scenario.actions;
104
+ if (actions && actions.length > 0) {
105
+ scenarios.push([runId, scenario]);
106
+ }
107
+ }
108
+ for (const [runId, scenario] of scenarios) {
109
+ const test = await this._runScenario(agent, runId, scenario, options?.onScenario, options?.onAction);
110
+ run._tests.push(test);
111
+ }
112
+ run.complete();
113
+ return run;
114
+ }
115
+ async runAndDeploy(agent, client, datasetId, options) {
116
+ const runResult = await this.run(agent, options);
117
+ return runResult.deploy(client, datasetId);
118
+ }
119
+ }
@@ -0,0 +1,24 @@
1
+ export declare class AshrLabsError extends Error {
2
+ statusCode: number | null;
3
+ response: Record<string, unknown> | null;
4
+ constructor(message: string, statusCode?: number | null, response?: Record<string, unknown> | null);
5
+ toString(): string;
6
+ }
7
+ export declare class AuthenticationError extends AshrLabsError {
8
+ constructor(message: string, statusCode?: number | null, response?: Record<string, unknown> | null);
9
+ }
10
+ export declare class AuthorizationError extends AshrLabsError {
11
+ constructor(message: string, statusCode?: number | null, response?: Record<string, unknown> | null);
12
+ }
13
+ export declare class NotFoundError extends AshrLabsError {
14
+ constructor(message: string, statusCode?: number | null, response?: Record<string, unknown> | null);
15
+ }
16
+ export declare class ValidationError extends AshrLabsError {
17
+ constructor(message: string, statusCode?: number | null, response?: Record<string, unknown> | null);
18
+ }
19
+ export declare class RateLimitError extends AshrLabsError {
20
+ constructor(message: string, statusCode?: number | null, response?: Record<string, unknown> | null);
21
+ }
22
+ export declare class ServerError extends AshrLabsError {
23
+ constructor(message: string, statusCode?: number | null, response?: Record<string, unknown> | null);
24
+ }
@@ -0,0 +1,52 @@
1
+ export class AshrLabsError extends Error {
2
+ statusCode;
3
+ response;
4
+ constructor(message, statusCode = null, response = null) {
5
+ super(message);
6
+ this.name = "AshrLabsError";
7
+ this.statusCode = statusCode;
8
+ this.response = response;
9
+ }
10
+ toString() {
11
+ if (this.statusCode) {
12
+ return `[${this.statusCode}] ${this.message}`;
13
+ }
14
+ return this.message;
15
+ }
16
+ }
17
+ export class AuthenticationError extends AshrLabsError {
18
+ constructor(message, statusCode = null, response = null) {
19
+ super(message, statusCode, response);
20
+ this.name = "AuthenticationError";
21
+ }
22
+ }
23
+ export class AuthorizationError extends AshrLabsError {
24
+ constructor(message, statusCode = null, response = null) {
25
+ super(message, statusCode, response);
26
+ this.name = "AuthorizationError";
27
+ }
28
+ }
29
+ export class NotFoundError extends AshrLabsError {
30
+ constructor(message, statusCode = null, response = null) {
31
+ super(message, statusCode, response);
32
+ this.name = "NotFoundError";
33
+ }
34
+ }
35
+ export class ValidationError extends AshrLabsError {
36
+ constructor(message, statusCode = null, response = null) {
37
+ super(message, statusCode, response);
38
+ this.name = "ValidationError";
39
+ }
40
+ }
41
+ export class RateLimitError extends AshrLabsError {
42
+ constructor(message, statusCode = null, response = null) {
43
+ super(message, statusCode, response);
44
+ this.name = "RateLimitError";
45
+ }
46
+ }
47
+ export class ServerError extends AshrLabsError {
48
+ constructor(message, statusCode = null, response = null) {
49
+ super(message, statusCode, response);
50
+ this.name = "ServerError";
51
+ }
52
+ }
@@ -0,0 +1,7 @@
1
+ export { AshrLabsClient } from "./client.js";
2
+ export { AshrLabsError, AuthenticationError, AuthorizationError, NotFoundError, ValidationError, RateLimitError, ServerError, } from "./exceptions.js";
3
+ export type { User, Tenant, Session, Dataset, Run, Request, APIKey, ToolCall, ExpectedResponse, Action, Scenario, } from "./models.js";
4
+ export { RunBuilder, TestBuilder } from "./run-builder.js";
5
+ export { stripMarkdown, tokenize, fuzzyStrMatch, extractToolArgs, compareToolArgs, textSimilarity, } from "./comparators.js";
6
+ export { EvalRunner } from "./eval.js";
7
+ export type { Agent } from "./eval.js";
package/dist/index.js ADDED
@@ -0,0 +1,5 @@
1
+ export { AshrLabsClient } from "./client.js";
2
+ export { AshrLabsError, AuthenticationError, AuthorizationError, NotFoundError, ValidationError, RateLimitError, ServerError, } from "./exceptions.js";
3
+ export { RunBuilder, TestBuilder } from "./run-builder.js";
4
+ export { stripMarkdown, tokenize, fuzzyStrMatch, extractToolArgs, compareToolArgs, textSimilarity, } from "./comparators.js";
5
+ export { EvalRunner } from "./eval.js";
@@ -0,0 +1,93 @@
1
+ export interface User {
2
+ id?: number;
3
+ created_at?: string;
4
+ email?: string;
5
+ name?: string | null;
6
+ tenant?: number;
7
+ is_active?: boolean;
8
+ }
9
+ export interface Tenant {
10
+ id?: number;
11
+ created_at?: string;
12
+ tenant_name?: string;
13
+ is_active?: boolean;
14
+ }
15
+ export interface Session {
16
+ status: string;
17
+ user: User;
18
+ tenant: Tenant;
19
+ }
20
+ export interface APIKey {
21
+ id?: number;
22
+ key?: string;
23
+ key_prefix?: string;
24
+ name?: string;
25
+ scopes?: string[];
26
+ user_id?: number;
27
+ tenant_id?: number;
28
+ created_at?: string;
29
+ last_used_at?: string | null;
30
+ expires_at?: string | null;
31
+ is_active?: boolean;
32
+ }
33
+ export interface Dataset {
34
+ id?: number;
35
+ created_at?: string;
36
+ tenant?: number;
37
+ creator?: number;
38
+ name?: string;
39
+ description?: string | null;
40
+ dataset_source?: Record<string, unknown>;
41
+ }
42
+ export interface Run {
43
+ id?: number;
44
+ created_at?: string;
45
+ dataset?: number;
46
+ tenant?: number;
47
+ runner?: number;
48
+ result?: Record<string, unknown>;
49
+ }
50
+ export interface Request {
51
+ id?: number;
52
+ created_at?: string;
53
+ requestor_id?: number;
54
+ requestor_tenant?: number;
55
+ request_name?: string;
56
+ request_status?: string;
57
+ request_input_schema?: Record<string, unknown> | null;
58
+ request?: Record<string, unknown>;
59
+ }
60
+ export interface ListResponse {
61
+ status: string;
62
+ total: number;
63
+ }
64
+ export interface DatasetsListResponse extends ListResponse {
65
+ datasets: Dataset[];
66
+ }
67
+ export interface RunsListResponse extends ListResponse {
68
+ runs: Run[];
69
+ }
70
+ export interface RequestsListResponse extends ListResponse {
71
+ requests: Request[];
72
+ }
73
+ export interface APIKeysListResponse extends ListResponse {
74
+ api_keys: APIKey[];
75
+ }
76
+ export interface ToolCall {
77
+ name?: string;
78
+ arguments_json?: string;
79
+ }
80
+ export interface ExpectedResponse {
81
+ tool_calls?: ToolCall[];
82
+ text?: string;
83
+ }
84
+ export interface Action {
85
+ actor?: string;
86
+ content?: string;
87
+ name?: string;
88
+ expected_response?: ExpectedResponse;
89
+ }
90
+ export interface Scenario {
91
+ title?: string;
92
+ actions?: Action[];
93
+ }
package/dist/models.js ADDED
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,30 @@
1
+ import type { AshrLabsClient } from "./client.js";
2
+ export declare class TestBuilder {
3
+ private _testId;
4
+ private _status;
5
+ private _startedAt;
6
+ private _completedAt;
7
+ private _actionResults;
8
+ private _nextActionIndex;
9
+ constructor(testId: string);
10
+ start(): this;
11
+ addUserFile(filePath: string, description: string, actionIndex?: number): this;
12
+ addUserText(text: string, description: string, actionIndex?: number): this;
13
+ addToolCall(expected: Record<string, unknown>, actual: Record<string, unknown>, matchStatus: string, divergenceNotes?: string | null, actionIndex?: number): this;
14
+ addAgentResponse(expectedResponse: Record<string, unknown>, actualResponse: Record<string, unknown>, matchStatus: string, semanticSimilarity?: number | null, divergenceNotes?: string | null, actionIndex?: number): this;
15
+ complete(status?: string): this;
16
+ build(): Record<string, unknown>;
17
+ private _resolveIndex;
18
+ }
19
+ export declare class RunBuilder {
20
+ private _status;
21
+ private _startedAt;
22
+ private _completedAt;
23
+ /** @internal */
24
+ _tests: TestBuilder[];
25
+ start(): this;
26
+ addTest(testId: string): TestBuilder;
27
+ complete(status?: string): this;
28
+ build(): Record<string, unknown>;
29
+ deploy(client: AshrLabsClient, datasetId: number, tenantId?: number, runnerId?: number): Promise<Record<string, unknown>>;
30
+ }
@@ -0,0 +1,221 @@
1
+ function now() {
2
+ return new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
3
+ }
4
+ function normalizeToolCall(tc) {
5
+ const out = { ...tc };
6
+ if (!("tool_name" in out) && "name" in out) {
7
+ out.tool_name = out.name;
8
+ }
9
+ else if (!("name" in out) && "tool_name" in out) {
10
+ out.name = out.tool_name;
11
+ }
12
+ if (!("arguments" in out) && "arguments_json" in out) {
13
+ const val = out.arguments_json;
14
+ if (typeof val === "string") {
15
+ try {
16
+ out.arguments = JSON.parse(val);
17
+ }
18
+ catch {
19
+ out.arguments = {};
20
+ }
21
+ }
22
+ else {
23
+ out.arguments = val;
24
+ }
25
+ }
26
+ else if (!("arguments_json" in out) && "arguments" in out) {
27
+ const args = out.arguments;
28
+ out.arguments_json =
29
+ typeof args === "object" && args !== null
30
+ ? JSON.stringify(args)
31
+ : String(args);
32
+ }
33
+ return out;
34
+ }
35
+ function computeAggregate(tests) {
36
+ const total = tests.length;
37
+ let passed = 0;
38
+ let failed = 0;
39
+ let totalToolCallDivergence = 0;
40
+ let totalResponseDivergence = 0;
41
+ const similarityScores = [];
42
+ for (const test of tests) {
43
+ if (test.status === "completed")
44
+ passed++;
45
+ else if (test.status === "failed")
46
+ failed++;
47
+ const actionResults = (test.action_results ?? []);
48
+ for (const action of actionResults) {
49
+ if (action.action_type === "tool_call") {
50
+ const tcs = (action.tool_calls ?? []);
51
+ for (const tc of tcs) {
52
+ if (tc.match_status && tc.match_status !== "exact") {
53
+ totalToolCallDivergence++;
54
+ }
55
+ }
56
+ }
57
+ else if (action.action_type === "text" && action.actor === "agent") {
58
+ if (action.match_status && action.match_status !== "exact") {
59
+ totalResponseDivergence++;
60
+ }
61
+ if (typeof action.semantic_similarity === "number") {
62
+ similarityScores.push(action.semantic_similarity);
63
+ }
64
+ }
65
+ }
66
+ }
67
+ const avgSimilarity = similarityScores.length > 0
68
+ ? Math.round((similarityScores.reduce((a, b) => a + b, 0) /
69
+ similarityScores.length) *
70
+ 100) / 100
71
+ : null;
72
+ return {
73
+ total_tests: total,
74
+ tests_passed: passed,
75
+ tests_failed: failed,
76
+ average_similarity_score: avgSimilarity,
77
+ total_tool_call_divergence: totalToolCallDivergence,
78
+ total_response_divergence: totalResponseDivergence,
79
+ };
80
+ }
81
+ export class TestBuilder {
82
+ _testId;
83
+ _status = "pending";
84
+ _startedAt = null;
85
+ _completedAt = null;
86
+ _actionResults = [];
87
+ _nextActionIndex = 0;
88
+ constructor(testId) {
89
+ this._testId = testId;
90
+ }
91
+ start() {
92
+ this._status = "running";
93
+ this._startedAt = now();
94
+ return this;
95
+ }
96
+ addUserFile(filePath, description, actionIndex) {
97
+ const idx = this._resolveIndex(actionIndex);
98
+ this._actionResults.push({
99
+ actor: "user",
100
+ action_type: "file",
101
+ action_index: idx,
102
+ description,
103
+ file_path: filePath,
104
+ input_provided: true,
105
+ });
106
+ return this;
107
+ }
108
+ addUserText(text, description, actionIndex) {
109
+ const idx = this._resolveIndex(actionIndex);
110
+ this._actionResults.push({
111
+ actor: "user",
112
+ action_type: "text",
113
+ action_index: idx,
114
+ description,
115
+ text,
116
+ input_provided: true,
117
+ });
118
+ return this;
119
+ }
120
+ addToolCall(expected, actual, matchStatus, divergenceNotes, actionIndex) {
121
+ const idx = this._resolveIndex(actionIndex);
122
+ const tcEntry = {
123
+ expected: normalizeToolCall(expected),
124
+ actual: normalizeToolCall(actual),
125
+ match_status: matchStatus,
126
+ divergence_notes: divergenceNotes ?? null,
127
+ };
128
+ const existing = this._actionResults.find((ar) => ar.action_type === "tool_call" && ar.action_index === idx);
129
+ if (existing) {
130
+ existing.tool_calls.push(tcEntry);
131
+ }
132
+ else {
133
+ this._actionResults.push({
134
+ actor: "agent",
135
+ action_type: "tool_call",
136
+ action_index: idx,
137
+ tool_calls: [tcEntry],
138
+ });
139
+ }
140
+ return this;
141
+ }
142
+ addAgentResponse(expectedResponse, actualResponse, matchStatus, semanticSimilarity, divergenceNotes, actionIndex) {
143
+ const idx = this._resolveIndex(actionIndex);
144
+ const result = {
145
+ actor: "agent",
146
+ action_type: "text",
147
+ action_index: idx,
148
+ expected_response: expectedResponse,
149
+ actual_response: actualResponse,
150
+ match_status: matchStatus,
151
+ divergence_notes: divergenceNotes ?? null,
152
+ };
153
+ if (semanticSimilarity != null) {
154
+ result.semantic_similarity = semanticSimilarity;
155
+ }
156
+ this._actionResults.push(result);
157
+ return this;
158
+ }
159
+ complete(status = "completed") {
160
+ this._status = status;
161
+ this._completedAt = now();
162
+ return this;
163
+ }
164
+ build() {
165
+ const result = {
166
+ test_id: this._testId,
167
+ status: this._status,
168
+ action_results: [...this._actionResults],
169
+ };
170
+ if (this._startedAt)
171
+ result.started_at = this._startedAt;
172
+ if (this._completedAt)
173
+ result.completed_at = this._completedAt;
174
+ return result;
175
+ }
176
+ _resolveIndex(explicit) {
177
+ if (explicit !== undefined) {
178
+ this._nextActionIndex = explicit + 1;
179
+ return explicit;
180
+ }
181
+ const idx = this._nextActionIndex;
182
+ this._nextActionIndex++;
183
+ return idx;
184
+ }
185
+ }
186
+ export class RunBuilder {
187
+ _status = "pending";
188
+ _startedAt = null;
189
+ _completedAt = null;
190
+ /** @internal */
191
+ _tests = [];
192
+ start() {
193
+ this._status = "running";
194
+ this._startedAt = now();
195
+ return this;
196
+ }
197
+ addTest(testId) {
198
+ const test = new TestBuilder(testId);
199
+ this._tests.push(test);
200
+ return test;
201
+ }
202
+ complete(status = "completed") {
203
+ this._status = status;
204
+ this._completedAt = now();
205
+ return this;
206
+ }
207
+ build() {
208
+ const tests = this._tests.map((t) => t.build());
209
+ return {
210
+ tests,
211
+ status: this._status,
212
+ started_at: this._startedAt,
213
+ completed_at: this._completedAt,
214
+ aggregate_metrics: computeAggregate(tests),
215
+ };
216
+ }
217
+ deploy(client, datasetId, tenantId, runnerId) {
218
+ const result = this.build();
219
+ return client.createRun(datasetId, result, tenantId, runnerId);
220
+ }
221
+ }
package/package.json ADDED
@@ -0,0 +1,37 @@
1
+ {
2
+ "name": "ashr-labs",
3
+ "version": "0.1.0",
4
+ "description": "TypeScript SDK for the Ashr Labs API",
5
+ "type": "module",
6
+ "main": "./dist/index.js",
7
+ "types": "./dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "import": "./dist/index.js",
11
+ "types": "./dist/index.d.ts"
12
+ }
13
+ },
14
+ "files": [
15
+ "dist"
16
+ ],
17
+ "scripts": {
18
+ "build": "tsc",
19
+ "test": "vitest run",
20
+ "test:watch": "vitest"
21
+ },
22
+ "keywords": [
23
+ "ashr",
24
+ "labs",
25
+ "api",
26
+ "sdk"
27
+ ],
28
+ "license": "MIT",
29
+ "devDependencies": {
30
+ "@types/node": "^25.3.5",
31
+ "typescript": "^5.4.0",
32
+ "vitest": "^3.0.0"
33
+ },
34
+ "engines": {
35
+ "node": ">=18.0.0"
36
+ }
37
+ }