ashr-labs 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client.d.ts +123 -0
- package/dist/client.js +140 -0
- package/dist/index.d.ts +4 -1
- package/dist/index.js +2 -0
- package/dist/models.d.ts +99 -0
- package/dist/models.js +20 -1
- package/dist/run-builder.d.ts +33 -0
- package/dist/run-builder.js +55 -0
- package/dist/tracing.d.ts +160 -0
- package/dist/tracing.js +229 -0
- package/package.json +1 -1
package/dist/client.d.ts
CHANGED
|
@@ -18,7 +18,116 @@ export declare class AshrLabsClient {
|
|
|
18
18
|
deleteRun(runId: number): Promise<Record<string, unknown>>;
|
|
19
19
|
getRun(runId: number): Promise<Record<string, unknown>>;
|
|
20
20
|
listRuns(datasetId?: number | null, tenantId?: number | null, limit?: number): Promise<Record<string, unknown>>;
|
|
21
|
+
/**
|
|
22
|
+
* Start a new observability trace for a production agent interaction.
|
|
23
|
+
*
|
|
24
|
+
* Returns a `Trace` object. Add spans, generations, and events to it,
|
|
25
|
+
* then call `await trace.end()` to flush to the Ashr Labs backend.
|
|
26
|
+
*
|
|
27
|
+
* Requires the `observability` feature flag to be enabled.
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* ```typescript
|
|
31
|
+
* const trace = client.trace("support-chat", { userId: "user_42" });
|
|
32
|
+
* const gen = trace.generation("respond", { model: "claude-sonnet-4-6", input: messages });
|
|
33
|
+
* gen.end({ output: reply, usage: { input_tokens: 100, output_tokens: 50 } });
|
|
34
|
+
* await trace.end({ output: { resolution: "resolved" } });
|
|
35
|
+
* ```
|
|
36
|
+
*/
|
|
37
|
+
trace(name: string, opts?: {
|
|
38
|
+
userId?: string;
|
|
39
|
+
sessionId?: string;
|
|
40
|
+
metadata?: Record<string, unknown>;
|
|
41
|
+
tags?: string[];
|
|
42
|
+
}): import("./tracing.js").Trace;
|
|
43
|
+
/**
|
|
44
|
+
* List observability traces for the current tenant.
|
|
45
|
+
*
|
|
46
|
+
* Requires the `observability` feature flag to be enabled.
|
|
47
|
+
*/
|
|
48
|
+
listObservabilityTraces(opts?: {
|
|
49
|
+
userId?: string;
|
|
50
|
+
sessionId?: string;
|
|
51
|
+
limit?: number;
|
|
52
|
+
page?: number;
|
|
53
|
+
}): Promise<Record<string, unknown>>;
|
|
54
|
+
/**
|
|
55
|
+
* Get a single observability trace with full span/generation detail.
|
|
56
|
+
*
|
|
57
|
+
* Requires the `observability` feature flag to be enabled.
|
|
58
|
+
*/
|
|
59
|
+
getObservabilityTrace(traceId: string): Promise<Record<string, unknown>>;
|
|
60
|
+
/** Get observability analytics: overview, tool performance, model usage. */
|
|
61
|
+
getObservabilityAnalytics(days?: number): Promise<Record<string, unknown>>;
|
|
62
|
+
/** Get traces with errors. */
|
|
63
|
+
getObservabilityErrors(opts?: {
|
|
64
|
+
days?: number;
|
|
65
|
+
limit?: number;
|
|
66
|
+
page?: number;
|
|
67
|
+
}): Promise<Record<string, unknown>>;
|
|
68
|
+
/** Get traces with tool call failures. */
|
|
69
|
+
getObservabilityToolErrors(opts?: {
|
|
70
|
+
days?: number;
|
|
71
|
+
limit?: number;
|
|
72
|
+
page?: number;
|
|
73
|
+
}): Promise<Record<string, unknown>>;
|
|
21
74
|
private static _validateConfigStructure;
|
|
75
|
+
/**
|
|
76
|
+
* Create a new dataset generation request.
|
|
77
|
+
*
|
|
78
|
+
* The `request` object describes your agent and what test scenarios to generate.
|
|
79
|
+
*
|
|
80
|
+
* **Required sections:**
|
|
81
|
+
* - `agent` — At least one of `name`, `description`, or `system_prompt`.
|
|
82
|
+
* Include `tools` here (with `name`, `description`, `parameters`) so the
|
|
83
|
+
* generator creates scenarios with tool call expectations.
|
|
84
|
+
* Include `accepted_inputs` specifying which input types your agent supports.
|
|
85
|
+
* Allowed input keys: `text`, `audio`, `file`, `image`, `video`, `conversation`.
|
|
86
|
+
* - `context` — At least one of `domain`, `use_case`, or `scenario_context`.
|
|
87
|
+
* - `generation_options` — Controls what assets to generate.
|
|
88
|
+
* Keys: `scenario_count`, `generate_audio`, `generate_files`, `generate_images`,
|
|
89
|
+
* `generate_videos`, `generate_simulations`.
|
|
90
|
+
*
|
|
91
|
+
* **Optional sections:** `test_config`, `metadata`
|
|
92
|
+
*
|
|
93
|
+
* @example
|
|
94
|
+
* ```typescript
|
|
95
|
+
* const req = await client.createRequest("Loan Agent Eval", {
|
|
96
|
+
* agent: {
|
|
97
|
+
* name: "QuickLend Loan Officer",
|
|
98
|
+
* description: "Helps applicants check credit and submit applications",
|
|
99
|
+
* system_prompt: "You are a professional loan officer.",
|
|
100
|
+
* tools: [
|
|
101
|
+
* {
|
|
102
|
+
* name: "check_credit_score",
|
|
103
|
+
* description: "Pull applicant credit score and history",
|
|
104
|
+
* parameters: {
|
|
105
|
+
* type: "object",
|
|
106
|
+
* required: ["applicant_id"],
|
|
107
|
+
* properties: { applicant_id: { type: "string" } },
|
|
108
|
+
* },
|
|
109
|
+
* },
|
|
110
|
+
* ],
|
|
111
|
+
* accepted_inputs: { text: true, audio: false, file: false, image: false, video: false },
|
|
112
|
+
* },
|
|
113
|
+
* context: {
|
|
114
|
+
* domain: "financial services",
|
|
115
|
+
* use_case: "Applicants inquiring about loan eligibility and rates",
|
|
116
|
+
* scenario_context: "A digital lending platform called QuickLend Financial",
|
|
117
|
+
* },
|
|
118
|
+
* generation_options: {
|
|
119
|
+
* scenario_count: 5,
|
|
120
|
+
* generate_audio: false,
|
|
121
|
+
* generate_files: false,
|
|
122
|
+
* generate_simulations: false,
|
|
123
|
+
* },
|
|
124
|
+
* test_config: {
|
|
125
|
+
* num_variations: 5,
|
|
126
|
+
* coverage: { happy_path: true, edge_cases: true, error_handling: true },
|
|
127
|
+
* },
|
|
128
|
+
* });
|
|
129
|
+
* ```
|
|
130
|
+
*/
|
|
22
131
|
createRequest(requestName: string, request: Record<string, unknown>, requestInputSchema?: Record<string, unknown> | null, tenantId?: number | null, requestorId?: number | null): Promise<Record<string, unknown>>;
|
|
23
132
|
getRequest(requestId: number): Promise<Record<string, unknown>>;
|
|
24
133
|
listRequests(tenantId?: number | null, status?: string | null, limit?: number, cursor?: number | null): Promise<Record<string, unknown>>;
|
|
@@ -33,6 +142,20 @@ export declare class AshrLabsClient {
|
|
|
33
142
|
* from agent name/description.
|
|
34
143
|
*/
|
|
35
144
|
private static _enrichConfig;
|
|
145
|
+
/**
|
|
146
|
+
* Generate a dataset: creates a request, waits for completion, and returns the dataset.
|
|
147
|
+
*
|
|
148
|
+
* **Prefer reusing existing datasets** with `EvalRunner.fromDataset()` instead of
|
|
149
|
+
* generating new ones each time. Only generate a new dataset when the agent's tools,
|
|
150
|
+
* inputs, or domain have changed.
|
|
151
|
+
*
|
|
152
|
+
* @param requestName - A name/title for the request.
|
|
153
|
+
* @param config - Generation config (same structure as `createRequest`).
|
|
154
|
+
* @param requestInputSchema - Optional JSON Schema. If omitted, tools are auto-populated from `config.agent.tools`.
|
|
155
|
+
* @param timeout - Max seconds to wait for generation (default 600).
|
|
156
|
+
* @param pollInterval - Seconds between status checks (default 5).
|
|
157
|
+
* @returns `[datasetId, datasetSource]` tuple.
|
|
158
|
+
*/
|
|
36
159
|
generateDataset(requestName: string, config: Record<string, unknown>, requestInputSchema?: Record<string, unknown> | null, timeout?: number, pollInterval?: number): Promise<[number, Record<string, unknown>]>;
|
|
37
160
|
toString(): string;
|
|
38
161
|
}
|
package/dist/client.js
CHANGED
|
@@ -156,6 +156,76 @@ export class AshrLabsClient {
|
|
|
156
156
|
return this._makeRequest("list_runs", params);
|
|
157
157
|
}
|
|
158
158
|
// =========================================================================
|
|
159
|
+
// Observability — Production Agent Tracing
|
|
160
|
+
// =========================================================================
|
|
161
|
+
/**
|
|
162
|
+
* Start a new observability trace for a production agent interaction.
|
|
163
|
+
*
|
|
164
|
+
* Returns a `Trace` object. Add spans, generations, and events to it,
|
|
165
|
+
* then call `await trace.end()` to flush to the Ashr Labs backend.
|
|
166
|
+
*
|
|
167
|
+
* Requires the `observability` feature flag to be enabled.
|
|
168
|
+
*
|
|
169
|
+
* @example
|
|
170
|
+
* ```typescript
|
|
171
|
+
* const trace = client.trace("support-chat", { userId: "user_42" });
|
|
172
|
+
* const gen = trace.generation("respond", { model: "claude-sonnet-4-6", input: messages });
|
|
173
|
+
* gen.end({ output: reply, usage: { input_tokens: 100, output_tokens: 50 } });
|
|
174
|
+
* await trace.end({ output: { resolution: "resolved" } });
|
|
175
|
+
* ```
|
|
176
|
+
*/
|
|
177
|
+
trace(name, opts = {}) {
|
|
178
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
179
|
+
const { Trace } = require("./tracing.js");
|
|
180
|
+
return new Trace(this, name, opts);
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* List observability traces for the current tenant.
|
|
184
|
+
*
|
|
185
|
+
* Requires the `observability` feature flag to be enabled.
|
|
186
|
+
*/
|
|
187
|
+
async listObservabilityTraces(opts = {}) {
|
|
188
|
+
const params = {
|
|
189
|
+
limit: opts.limit ?? 50,
|
|
190
|
+
page: opts.page ?? 1,
|
|
191
|
+
};
|
|
192
|
+
if (opts.userId != null)
|
|
193
|
+
params.user_id = opts.userId;
|
|
194
|
+
if (opts.sessionId != null)
|
|
195
|
+
params.session_id = opts.sessionId;
|
|
196
|
+
return this._makeRequest("list_observability_traces", params);
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Get a single observability trace with full span/generation detail.
|
|
200
|
+
*
|
|
201
|
+
* Requires the `observability` feature flag to be enabled.
|
|
202
|
+
*/
|
|
203
|
+
async getObservabilityTrace(traceId) {
|
|
204
|
+
return this._makeRequest("get_observability_trace", {
|
|
205
|
+
trace_id: traceId,
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
/** Get observability analytics: overview, tool performance, model usage. */
|
|
209
|
+
async getObservabilityAnalytics(days = 7) {
|
|
210
|
+
return this._makeRequest("get_observability_analytics", { days });
|
|
211
|
+
}
|
|
212
|
+
/** Get traces with errors. */
|
|
213
|
+
async getObservabilityErrors(opts = {}) {
|
|
214
|
+
return this._makeRequest("get_observability_errors", {
|
|
215
|
+
days: opts.days ?? 7,
|
|
216
|
+
limit: opts.limit ?? 50,
|
|
217
|
+
page: opts.page ?? 1,
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
/** Get traces with tool call failures. */
|
|
221
|
+
async getObservabilityToolErrors(opts = {}) {
|
|
222
|
+
return this._makeRequest("get_observability_tool_errors", {
|
|
223
|
+
days: opts.days ?? 7,
|
|
224
|
+
limit: opts.limit ?? 50,
|
|
225
|
+
page: opts.page ?? 1,
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
// =========================================================================
|
|
159
229
|
// Request Operations
|
|
160
230
|
// =========================================================================
|
|
161
231
|
static _validateConfigStructure(config) {
|
|
@@ -185,6 +255,62 @@ export class AshrLabsClient {
|
|
|
185
255
|
throw new ValidationError("config.context must include at least one of: domain, use_case, scenario_context");
|
|
186
256
|
}
|
|
187
257
|
}
|
|
258
|
+
/**
|
|
259
|
+
* Create a new dataset generation request.
|
|
260
|
+
*
|
|
261
|
+
* The `request` object describes your agent and what test scenarios to generate.
|
|
262
|
+
*
|
|
263
|
+
* **Required sections:**
|
|
264
|
+
* - `agent` — At least one of `name`, `description`, or `system_prompt`.
|
|
265
|
+
* Include `tools` here (with `name`, `description`, `parameters`) so the
|
|
266
|
+
* generator creates scenarios with tool call expectations.
|
|
267
|
+
* Include `accepted_inputs` specifying which input types your agent supports.
|
|
268
|
+
* Allowed input keys: `text`, `audio`, `file`, `image`, `video`, `conversation`.
|
|
269
|
+
* - `context` — At least one of `domain`, `use_case`, or `scenario_context`.
|
|
270
|
+
* - `generation_options` — Controls what assets to generate.
|
|
271
|
+
* Keys: `scenario_count`, `generate_audio`, `generate_files`, `generate_images`,
|
|
272
|
+
* `generate_videos`, `generate_simulations`.
|
|
273
|
+
*
|
|
274
|
+
* **Optional sections:** `test_config`, `metadata`
|
|
275
|
+
*
|
|
276
|
+
* @example
|
|
277
|
+
* ```typescript
|
|
278
|
+
* const req = await client.createRequest("Loan Agent Eval", {
|
|
279
|
+
* agent: {
|
|
280
|
+
* name: "QuickLend Loan Officer",
|
|
281
|
+
* description: "Helps applicants check credit and submit applications",
|
|
282
|
+
* system_prompt: "You are a professional loan officer.",
|
|
283
|
+
* tools: [
|
|
284
|
+
* {
|
|
285
|
+
* name: "check_credit_score",
|
|
286
|
+
* description: "Pull applicant credit score and history",
|
|
287
|
+
* parameters: {
|
|
288
|
+
* type: "object",
|
|
289
|
+
* required: ["applicant_id"],
|
|
290
|
+
* properties: { applicant_id: { type: "string" } },
|
|
291
|
+
* },
|
|
292
|
+
* },
|
|
293
|
+
* ],
|
|
294
|
+
* accepted_inputs: { text: true, audio: false, file: false, image: false, video: false },
|
|
295
|
+
* },
|
|
296
|
+
* context: {
|
|
297
|
+
* domain: "financial services",
|
|
298
|
+
* use_case: "Applicants inquiring about loan eligibility and rates",
|
|
299
|
+
* scenario_context: "A digital lending platform called QuickLend Financial",
|
|
300
|
+
* },
|
|
301
|
+
* generation_options: {
|
|
302
|
+
* scenario_count: 5,
|
|
303
|
+
* generate_audio: false,
|
|
304
|
+
* generate_files: false,
|
|
305
|
+
* generate_simulations: false,
|
|
306
|
+
* },
|
|
307
|
+
* test_config: {
|
|
308
|
+
* num_variations: 5,
|
|
309
|
+
* coverage: { happy_path: true, edge_cases: true, error_handling: true },
|
|
310
|
+
* },
|
|
311
|
+
* });
|
|
312
|
+
* ```
|
|
313
|
+
*/
|
|
188
314
|
async createRequest(requestName, request, requestInputSchema, tenantId, requestorId) {
|
|
189
315
|
AshrLabsClient._validateConfigStructure(request);
|
|
190
316
|
if (requestInputSchema == null) {
|
|
@@ -305,6 +431,20 @@ export class AshrLabsClient {
|
|
|
305
431
|
}
|
|
306
432
|
return out;
|
|
307
433
|
}
|
|
434
|
+
/**
|
|
435
|
+
* Generate a dataset: creates a request, waits for completion, and returns the dataset.
|
|
436
|
+
*
|
|
437
|
+
* **Prefer reusing existing datasets** with `EvalRunner.fromDataset()` instead of
|
|
438
|
+
* generating new ones each time. Only generate a new dataset when the agent's tools,
|
|
439
|
+
* inputs, or domain have changed.
|
|
440
|
+
*
|
|
441
|
+
* @param requestName - A name/title for the request.
|
|
442
|
+
* @param config - Generation config (same structure as `createRequest`).
|
|
443
|
+
* @param requestInputSchema - Optional JSON Schema. If omitted, tools are auto-populated from `config.agent.tools`.
|
|
444
|
+
* @param timeout - Max seconds to wait for generation (default 600).
|
|
445
|
+
* @param pollInterval - Seconds between status checks (default 5).
|
|
446
|
+
* @returns `[datasetId, datasetSource]` tuple.
|
|
447
|
+
*/
|
|
308
448
|
async generateDataset(requestName, config, requestInputSchema, timeout = 600, pollInterval = 5) {
|
|
309
449
|
const enriched = AshrLabsClient._enrichConfig(config);
|
|
310
450
|
const req = await this.createRequest(requestName, enriched, requestInputSchema);
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
export { AshrLabsClient } from "./client.js";
|
|
2
2
|
export { AshrLabsError, AuthenticationError, AuthorizationError, NotFoundError, ValidationError, RateLimitError, ServerError, } from "./exceptions.js";
|
|
3
|
-
export type { User, Tenant, Session, Dataset, Run, Request, APIKey, ToolCall, ExpectedResponse, Action, Scenario, } from "./models.js";
|
|
3
|
+
export type { User, Tenant, Session, Dataset, Run, Request, APIKey, ToolCall, ExpectedResponse, Action, Scenario, ObservabilityObservation, ObservabilityTrace, VmLogEntry, VmStream, KernelViewport, KernelActionData, KernelEventData, KernelVmMetadata, KernelVmStream, } from "./models.js";
|
|
4
|
+
export { KERNEL_ACTION_TYPES, KERNEL_EVENT_TYPES, } from "./models.js";
|
|
5
|
+
export type { KernelActionType, KernelEventType, } from "./models.js";
|
|
6
|
+
export { Trace, Span, Generation } from "./tracing.js";
|
|
4
7
|
export { RunBuilder, TestBuilder } from "./run-builder.js";
|
|
5
8
|
export { stripMarkdown, tokenize, fuzzyStrMatch, extractToolArgs, compareToolArgs, textSimilarity, } from "./comparators.js";
|
|
6
9
|
export { EvalRunner } from "./eval.js";
|
package/dist/index.js
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
export { AshrLabsClient } from "./client.js";
|
|
2
2
|
export { AshrLabsError, AuthenticationError, AuthorizationError, NotFoundError, ValidationError, RateLimitError, ServerError, } from "./exceptions.js";
|
|
3
|
+
export { KERNEL_ACTION_TYPES, KERNEL_EVENT_TYPES, } from "./models.js";
|
|
4
|
+
export { Trace, Span, Generation } from "./tracing.js";
|
|
3
5
|
export { RunBuilder, TestBuilder } from "./run-builder.js";
|
|
4
6
|
export { stripMarkdown, tokenize, fuzzyStrMatch, extractToolArgs, compareToolArgs, textSimilarity, } from "./comparators.js";
|
|
5
7
|
export { EvalRunner } from "./eval.js";
|
package/dist/models.d.ts
CHANGED
|
@@ -47,6 +47,35 @@ export interface Run {
|
|
|
47
47
|
runner?: number;
|
|
48
48
|
result?: Record<string, unknown>;
|
|
49
49
|
}
|
|
50
|
+
export interface ObservabilityObservation {
|
|
51
|
+
id?: string;
|
|
52
|
+
name?: string;
|
|
53
|
+
type?: string;
|
|
54
|
+
parent_observation_id?: string | null;
|
|
55
|
+
input?: unknown | null;
|
|
56
|
+
output?: unknown | null;
|
|
57
|
+
metadata?: Record<string, unknown> | null;
|
|
58
|
+
model?: string | null;
|
|
59
|
+
usage?: {
|
|
60
|
+
input_tokens?: number;
|
|
61
|
+
output_tokens?: number;
|
|
62
|
+
} | null;
|
|
63
|
+
level?: "DEBUG" | "DEFAULT" | "WARNING" | "ERROR" | null;
|
|
64
|
+
status_message?: string | null;
|
|
65
|
+
start_time?: string | null;
|
|
66
|
+
end_time?: string | null;
|
|
67
|
+
}
|
|
68
|
+
export interface ObservabilityTrace {
|
|
69
|
+
id?: string;
|
|
70
|
+
name?: string;
|
|
71
|
+
user_id?: string | null;
|
|
72
|
+
session_id?: string | null;
|
|
73
|
+
metadata?: Record<string, unknown> | null;
|
|
74
|
+
tags?: string[];
|
|
75
|
+
created_at?: string | null;
|
|
76
|
+
output?: unknown | null;
|
|
77
|
+
observations?: ObservabilityObservation[];
|
|
78
|
+
}
|
|
50
79
|
export interface Request {
|
|
51
80
|
id?: number;
|
|
52
81
|
created_at?: string;
|
|
@@ -73,6 +102,76 @@ export interface RequestsListResponse extends ListResponse {
|
|
|
73
102
|
export interface APIKeysListResponse extends ListResponse {
|
|
74
103
|
api_keys: APIKey[];
|
|
75
104
|
}
|
|
105
|
+
export interface VmLogEntry {
|
|
106
|
+
ts?: number;
|
|
107
|
+
type?: string;
|
|
108
|
+
data?: Record<string, unknown>;
|
|
109
|
+
}
|
|
110
|
+
export interface KernelViewport {
|
|
111
|
+
width?: number;
|
|
112
|
+
height?: number;
|
|
113
|
+
}
|
|
114
|
+
export declare const KERNEL_ACTION_TYPES: readonly ["click_mouse", "move_mouse", "drag_mouse", "type_text", "press_key", "scroll", "screenshot"];
|
|
115
|
+
export type KernelActionType = (typeof KERNEL_ACTION_TYPES)[number];
|
|
116
|
+
export declare const KERNEL_EVENT_TYPES: readonly ["navigation", "log", "error", "invocation_state", "console", "network"];
|
|
117
|
+
export type KernelEventType = (typeof KERNEL_EVENT_TYPES)[number];
|
|
118
|
+
/** Data payload for a Kernel computer control action. */
|
|
119
|
+
export interface KernelActionData {
|
|
120
|
+
x?: number;
|
|
121
|
+
y?: number;
|
|
122
|
+
button?: string;
|
|
123
|
+
click_type?: string;
|
|
124
|
+
num_clicks?: number;
|
|
125
|
+
smooth?: boolean;
|
|
126
|
+
path?: number[][];
|
|
127
|
+
text?: string;
|
|
128
|
+
delay?: number;
|
|
129
|
+
keys?: string[];
|
|
130
|
+
duration?: number;
|
|
131
|
+
hold_keys?: string[];
|
|
132
|
+
delta_x?: number;
|
|
133
|
+
delta_y?: number;
|
|
134
|
+
format?: string;
|
|
135
|
+
s3_key?: string;
|
|
136
|
+
duration_ms?: number;
|
|
137
|
+
}
|
|
138
|
+
/** Data payload for a Kernel event. */
|
|
139
|
+
export interface KernelEventData {
|
|
140
|
+
url?: string;
|
|
141
|
+
message?: string;
|
|
142
|
+
level?: string;
|
|
143
|
+
code?: string;
|
|
144
|
+
details?: Record<string, unknown>[];
|
|
145
|
+
method?: string;
|
|
146
|
+
status?: number;
|
|
147
|
+
invocation_id?: string;
|
|
148
|
+
action_name?: string;
|
|
149
|
+
status_reason?: string;
|
|
150
|
+
output?: string;
|
|
151
|
+
}
|
|
152
|
+
export interface KernelVmMetadata {
|
|
153
|
+
live_view_url?: string;
|
|
154
|
+
cdp_ws_url?: string;
|
|
155
|
+
replay_id?: string;
|
|
156
|
+
replay_view_url?: string;
|
|
157
|
+
headless?: boolean;
|
|
158
|
+
stealth?: boolean;
|
|
159
|
+
viewport?: KernelViewport;
|
|
160
|
+
}
|
|
161
|
+
export interface KernelVmStream {
|
|
162
|
+
provider: "kernel";
|
|
163
|
+
session_id?: string;
|
|
164
|
+
duration_ms?: number;
|
|
165
|
+
logs?: VmLogEntry[];
|
|
166
|
+
metadata?: KernelVmMetadata;
|
|
167
|
+
}
|
|
168
|
+
export interface VmStream {
|
|
169
|
+
provider: string;
|
|
170
|
+
session_id?: string;
|
|
171
|
+
duration_ms?: number;
|
|
172
|
+
logs?: VmLogEntry[];
|
|
173
|
+
metadata?: Record<string, unknown>;
|
|
174
|
+
}
|
|
76
175
|
export interface ToolCall {
|
|
77
176
|
name?: string;
|
|
78
177
|
arguments_json?: string;
|
package/dist/models.js
CHANGED
|
@@ -1 +1,20 @@
|
|
|
1
|
-
|
|
1
|
+
// ---- Kernel action types (computer control API) ----
|
|
2
|
+
// Map to POST /browsers/{id}/computer/* endpoints.
|
|
3
|
+
export const KERNEL_ACTION_TYPES = [
|
|
4
|
+
"click_mouse", // {x, y, button?, click_type?, num_clicks?}
|
|
5
|
+
"move_mouse", // {x, y, duration_ms?, smooth?}
|
|
6
|
+
"drag_mouse", // {path: [[x,y],...], button?, smooth?, duration_ms?}
|
|
7
|
+
"type_text", // {text, delay?}
|
|
8
|
+
"press_key", // {keys: string[], duration?, hold_keys?}
|
|
9
|
+
"scroll", // {x, y, delta_x?, delta_y?}
|
|
10
|
+
"screenshot", // {format?} — result may include s3_key or base64
|
|
11
|
+
];
|
|
12
|
+
// ---- Kernel event types (SSE streams + navigation) ----
|
|
13
|
+
export const KERNEL_EVENT_TYPES = [
|
|
14
|
+
"navigation", // {url} — page navigation
|
|
15
|
+
"log", // {message} — from GET /browsers/{id}/logs SSE
|
|
16
|
+
"error", // {code, message, details?} — ErrorEvent
|
|
17
|
+
"invocation_state", // {invocation_id, status, action_name, output?}
|
|
18
|
+
"console", // {level, message} — browser console output
|
|
19
|
+
"network", // {method, url, status} — HTTP request observed
|
|
20
|
+
];
|
package/dist/run-builder.d.ts
CHANGED
|
@@ -6,12 +6,43 @@ export declare class TestBuilder {
|
|
|
6
6
|
private _completedAt;
|
|
7
7
|
private _actionResults;
|
|
8
8
|
private _nextActionIndex;
|
|
9
|
+
private _vmStream;
|
|
9
10
|
constructor(testId: string);
|
|
11
|
+
/** The test ID (matches the scenario ID from the dataset). */
|
|
12
|
+
get test_id(): string;
|
|
10
13
|
start(): this;
|
|
11
14
|
addUserFile(filePath: string, description: string, actionIndex?: number): this;
|
|
12
15
|
addUserText(text: string, description: string, actionIndex?: number): this;
|
|
13
16
|
addToolCall(expected: Record<string, unknown>, actual: Record<string, unknown>, matchStatus: string, divergenceNotes?: string | null, actionIndex?: number): this;
|
|
14
17
|
addAgentResponse(expectedResponse: Record<string, unknown>, actualResponse: Record<string, unknown>, matchStatus: string, semanticSimilarity?: number | null, divergenceNotes?: string | null, actionIndex?: number): this;
|
|
18
|
+
/**
|
|
19
|
+
* Attach VM session logs to this test.
|
|
20
|
+
*/
|
|
21
|
+
setVmStream(provider: string, opts?: {
|
|
22
|
+
sessionId?: string;
|
|
23
|
+
durationMs?: number;
|
|
24
|
+
logs?: Record<string, unknown>[];
|
|
25
|
+
metadata?: Record<string, unknown>;
|
|
26
|
+
}): this;
|
|
27
|
+
/**
|
|
28
|
+
* Attach a Kernel browser session to this test.
|
|
29
|
+
* Metadata fields map to Kernel's browser API response
|
|
30
|
+
* (see https://www.kernel.sh/docs).
|
|
31
|
+
*/
|
|
32
|
+
setKernelVm(sessionId: string, opts?: {
|
|
33
|
+
durationMs?: number;
|
|
34
|
+
logs?: Record<string, unknown>[];
|
|
35
|
+
liveViewUrl?: string;
|
|
36
|
+
cdpWsUrl?: string;
|
|
37
|
+
replayId?: string;
|
|
38
|
+
replayViewUrl?: string;
|
|
39
|
+
headless?: boolean;
|
|
40
|
+
stealth?: boolean;
|
|
41
|
+
viewport?: {
|
|
42
|
+
width: number;
|
|
43
|
+
height: number;
|
|
44
|
+
};
|
|
45
|
+
}): this;
|
|
15
46
|
complete(status?: string): this;
|
|
16
47
|
build(): Record<string, unknown>;
|
|
17
48
|
private _resolveIndex;
|
|
@@ -22,6 +53,8 @@ export declare class RunBuilder {
|
|
|
22
53
|
private _completedAt;
|
|
23
54
|
/** @internal */
|
|
24
55
|
_tests: TestBuilder[];
|
|
56
|
+
/** The list of tests in this run. Use this to attach VM streams after eval. */
|
|
57
|
+
get tests(): TestBuilder[];
|
|
25
58
|
start(): this;
|
|
26
59
|
addTest(testId: string): TestBuilder;
|
|
27
60
|
complete(status?: string): this;
|
package/dist/run-builder.js
CHANGED
|
@@ -85,9 +85,14 @@ export class TestBuilder {
|
|
|
85
85
|
_completedAt = null;
|
|
86
86
|
_actionResults = [];
|
|
87
87
|
_nextActionIndex = 0;
|
|
88
|
+
_vmStream = null;
|
|
88
89
|
constructor(testId) {
|
|
89
90
|
this._testId = testId;
|
|
90
91
|
}
|
|
92
|
+
/** The test ID (matches the scenario ID from the dataset). */
|
|
93
|
+
get test_id() {
|
|
94
|
+
return this._testId;
|
|
95
|
+
}
|
|
91
96
|
start() {
|
|
92
97
|
this._status = "running";
|
|
93
98
|
this._startedAt = now();
|
|
@@ -156,6 +161,50 @@ export class TestBuilder {
|
|
|
156
161
|
this._actionResults.push(result);
|
|
157
162
|
return this;
|
|
158
163
|
}
|
|
164
|
+
/**
|
|
165
|
+
* Attach VM session logs to this test.
|
|
166
|
+
*/
|
|
167
|
+
setVmStream(provider, opts) {
|
|
168
|
+
const vm = { provider };
|
|
169
|
+
if (opts?.sessionId != null)
|
|
170
|
+
vm.session_id = opts.sessionId;
|
|
171
|
+
if (opts?.durationMs != null)
|
|
172
|
+
vm.duration_ms = opts.durationMs;
|
|
173
|
+
if (opts?.logs != null)
|
|
174
|
+
vm.logs = opts.logs;
|
|
175
|
+
if (opts?.metadata != null)
|
|
176
|
+
vm.metadata = opts.metadata;
|
|
177
|
+
this._vmStream = vm;
|
|
178
|
+
return this;
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Attach a Kernel browser session to this test.
|
|
182
|
+
* Metadata fields map to Kernel's browser API response
|
|
183
|
+
* (see https://www.kernel.sh/docs).
|
|
184
|
+
*/
|
|
185
|
+
setKernelVm(sessionId, opts) {
|
|
186
|
+
const metadata = {};
|
|
187
|
+
if (opts?.liveViewUrl != null)
|
|
188
|
+
metadata.live_view_url = opts.liveViewUrl;
|
|
189
|
+
if (opts?.cdpWsUrl != null)
|
|
190
|
+
metadata.cdp_ws_url = opts.cdpWsUrl;
|
|
191
|
+
if (opts?.replayId != null)
|
|
192
|
+
metadata.replay_id = opts.replayId;
|
|
193
|
+
if (opts?.replayViewUrl != null)
|
|
194
|
+
metadata.replay_view_url = opts.replayViewUrl;
|
|
195
|
+
if (opts?.headless != null)
|
|
196
|
+
metadata.headless = opts.headless;
|
|
197
|
+
if (opts?.stealth != null)
|
|
198
|
+
metadata.stealth = opts.stealth;
|
|
199
|
+
if (opts?.viewport != null)
|
|
200
|
+
metadata.viewport = opts.viewport;
|
|
201
|
+
return this.setVmStream("kernel", {
|
|
202
|
+
sessionId,
|
|
203
|
+
durationMs: opts?.durationMs,
|
|
204
|
+
logs: opts?.logs,
|
|
205
|
+
metadata: Object.keys(metadata).length > 0 ? metadata : undefined,
|
|
206
|
+
});
|
|
207
|
+
}
|
|
159
208
|
complete(status = "completed") {
|
|
160
209
|
this._status = status;
|
|
161
210
|
this._completedAt = now();
|
|
@@ -171,6 +220,8 @@ export class TestBuilder {
|
|
|
171
220
|
result.started_at = this._startedAt;
|
|
172
221
|
if (this._completedAt)
|
|
173
222
|
result.completed_at = this._completedAt;
|
|
223
|
+
if (this._vmStream)
|
|
224
|
+
result.vm_stream = this._vmStream;
|
|
174
225
|
return result;
|
|
175
226
|
}
|
|
176
227
|
_resolveIndex(explicit) {
|
|
@@ -189,6 +240,10 @@ export class RunBuilder {
|
|
|
189
240
|
_completedAt = null;
|
|
190
241
|
/** @internal */
|
|
191
242
|
_tests = [];
|
|
243
|
+
/** The list of tests in this run. Use this to attach VM streams after eval. */
|
|
244
|
+
get tests() {
|
|
245
|
+
return this._tests;
|
|
246
|
+
}
|
|
192
247
|
start() {
|
|
193
248
|
this._status = "running";
|
|
194
249
|
this._startedAt = now();
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Production agent tracing for Ashr Labs Observability.
|
|
3
|
+
*
|
|
4
|
+
* **Production-safe:** tracing never throws or interferes with your agent.
|
|
5
|
+
* If the backend is unreachable, `trace.end()` resolves with an error object
|
|
6
|
+
* instead of rejecting.
|
|
7
|
+
*
|
|
8
|
+
* @example Manual instrumentation
|
|
9
|
+
* ```typescript
|
|
10
|
+
* const trace = client.trace("handle-ticket", { userId: "user_42" });
|
|
11
|
+
*
|
|
12
|
+
* const gen = trace.generation("classify", { model: "claude-sonnet-4-6", input: [...] });
|
|
13
|
+
* gen.end({ output: { intent: "reset" }, usage: { input_tokens: 50, output_tokens: 12 } });
|
|
14
|
+
*
|
|
15
|
+
* const tool = trace.span("tool:reset_password", { input: { user_id: "42" } });
|
|
16
|
+
* tool.end({ output: { success: true } });
|
|
17
|
+
*
|
|
18
|
+
* await trace.end({ output: { resolution: "password_reset" } });
|
|
19
|
+
* ```
|
|
20
|
+
*
|
|
21
|
+
* @example Using `wrap()` for automatic span lifecycle
|
|
22
|
+
* ```typescript
|
|
23
|
+
* const result = await trace.wrap("tool:search", { input: { q: "..." } }, async (span) => {
|
|
24
|
+
* const data = await search(...);
|
|
25
|
+
* span.end({ output: data });
|
|
26
|
+
* return data;
|
|
27
|
+
* });
|
|
28
|
+
* // If the callback throws, the span auto-ends with level="ERROR" and the error re-throws.
|
|
29
|
+
* ```
|
|
30
|
+
*/
|
|
31
|
+
import type { AshrLabsClient } from "./client.js";
|
|
32
|
+
interface ObservationData {
|
|
33
|
+
id: string;
|
|
34
|
+
type: "span" | "generation" | "event";
|
|
35
|
+
name: string;
|
|
36
|
+
parent_observation_id: string | null;
|
|
37
|
+
start_time: string;
|
|
38
|
+
end_time?: string | null;
|
|
39
|
+
input?: unknown | null;
|
|
40
|
+
output?: unknown | null;
|
|
41
|
+
metadata?: Record<string, unknown> | null;
|
|
42
|
+
model?: string | null;
|
|
43
|
+
usage?: {
|
|
44
|
+
input_tokens?: number;
|
|
45
|
+
output_tokens?: number;
|
|
46
|
+
} | null;
|
|
47
|
+
level?: string | null;
|
|
48
|
+
status_message?: string | null;
|
|
49
|
+
}
|
|
50
|
+
export declare class Span {
|
|
51
|
+
readonly id: string;
|
|
52
|
+
protected _trace: Trace;
|
|
53
|
+
protected _data: ObservationData;
|
|
54
|
+
protected _ended: boolean;
|
|
55
|
+
constructor(trace: Trace, name: string, opts?: {
|
|
56
|
+
parentId?: string | null;
|
|
57
|
+
input?: unknown;
|
|
58
|
+
metadata?: Record<string, unknown>;
|
|
59
|
+
level?: string;
|
|
60
|
+
});
|
|
61
|
+
/** Create a child span nested under this span. */
|
|
62
|
+
span(name: string, opts?: {
|
|
63
|
+
input?: unknown;
|
|
64
|
+
metadata?: Record<string, unknown>;
|
|
65
|
+
}): Span;
|
|
66
|
+
/** Create a child generation nested under this span. */
|
|
67
|
+
generation(name: string, opts?: {
|
|
68
|
+
model?: string;
|
|
69
|
+
input?: unknown;
|
|
70
|
+
metadata?: Record<string, unknown>;
|
|
71
|
+
}): Generation;
|
|
72
|
+
/** Record a point-in-time event under this span. */
|
|
73
|
+
event(name: string, opts?: {
|
|
74
|
+
input?: unknown;
|
|
75
|
+
metadata?: Record<string, unknown>;
|
|
76
|
+
level?: string;
|
|
77
|
+
}): void;
|
|
78
|
+
/** Mark this span as complete. */
|
|
79
|
+
end(opts?: {
|
|
80
|
+
output?: unknown;
|
|
81
|
+
statusMessage?: string;
|
|
82
|
+
level?: string;
|
|
83
|
+
}): void;
|
|
84
|
+
/**
|
|
85
|
+
* Run a callback within this span's lifecycle.
|
|
86
|
+
* Auto-ends the span when the callback completes.
|
|
87
|
+
* If the callback throws, the span is ended with `level="ERROR"` and the error re-throws.
|
|
88
|
+
*/
|
|
89
|
+
wrap<T>(fn: (span: this) => T | Promise<T>): Promise<T>;
|
|
90
|
+
}
|
|
91
|
+
export declare class Generation extends Span {
|
|
92
|
+
constructor(trace: Trace, name: string, opts?: {
|
|
93
|
+
parentId?: string | null;
|
|
94
|
+
model?: string;
|
|
95
|
+
input?: unknown;
|
|
96
|
+
metadata?: Record<string, unknown>;
|
|
97
|
+
});
|
|
98
|
+
/** Mark this generation as complete. */
|
|
99
|
+
end(opts?: {
|
|
100
|
+
output?: unknown;
|
|
101
|
+
usage?: {
|
|
102
|
+
input_tokens?: number;
|
|
103
|
+
output_tokens?: number;
|
|
104
|
+
};
|
|
105
|
+
statusMessage?: string;
|
|
106
|
+
level?: string;
|
|
107
|
+
}): void;
|
|
108
|
+
}
|
|
109
|
+
export declare class Trace {
|
|
110
|
+
/** @internal */
|
|
111
|
+
_observations: ObservationData[];
|
|
112
|
+
private _client;
|
|
113
|
+
private _name;
|
|
114
|
+
private _userId;
|
|
115
|
+
private _sessionId;
|
|
116
|
+
private _metadata;
|
|
117
|
+
private _tags;
|
|
118
|
+
private _traceId;
|
|
119
|
+
private _flushed;
|
|
120
|
+
constructor(client: AshrLabsClient, name: string, opts?: {
|
|
121
|
+
userId?: string;
|
|
122
|
+
sessionId?: string;
|
|
123
|
+
metadata?: Record<string, unknown>;
|
|
124
|
+
tags?: string[];
|
|
125
|
+
});
|
|
126
|
+
/** The server-assigned trace ID (available after `end()` resolves). */
|
|
127
|
+
get traceId(): string | null;
|
|
128
|
+
/** Create a top-level span in this trace. */
|
|
129
|
+
span(name: string, opts?: {
|
|
130
|
+
input?: unknown;
|
|
131
|
+
metadata?: Record<string, unknown>;
|
|
132
|
+
}): Span;
|
|
133
|
+
/** Create a top-level generation (LLM call) in this trace. */
|
|
134
|
+
generation(name: string, opts?: {
|
|
135
|
+
model?: string;
|
|
136
|
+
input?: unknown;
|
|
137
|
+
metadata?: Record<string, unknown>;
|
|
138
|
+
}): Generation;
|
|
139
|
+
/** Record a point-in-time event in this trace. */
|
|
140
|
+
event(name: string, opts?: {
|
|
141
|
+
input?: unknown;
|
|
142
|
+
metadata?: Record<string, unknown>;
|
|
143
|
+
level?: string;
|
|
144
|
+
}): void;
|
|
145
|
+
/**
|
|
146
|
+
* Run a callback within this trace's lifecycle.
|
|
147
|
+
* Auto-flushes the trace when the callback completes.
|
|
148
|
+
*/
|
|
149
|
+
wrap<T>(fn: (trace: this) => T | Promise<T>): Promise<T>;
|
|
150
|
+
/**
|
|
151
|
+
* Flush the trace to the Ashr Labs backend.
|
|
152
|
+
*
|
|
153
|
+
* **Never rejects.** If the backend is unreachable, logs the error
|
|
154
|
+
* and resolves with `{ status: "error", message: "..." }`.
|
|
155
|
+
*/
|
|
156
|
+
end(opts?: {
|
|
157
|
+
output?: unknown;
|
|
158
|
+
}): Promise<Record<string, unknown>>;
|
|
159
|
+
}
|
|
160
|
+
export {};
|
package/dist/tracing.js
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Production agent tracing for Ashr Labs Observability.
|
|
3
|
+
*
|
|
4
|
+
* **Production-safe:** tracing never throws or interferes with your agent.
|
|
5
|
+
* If the backend is unreachable, `trace.end()` resolves with an error object
|
|
6
|
+
* instead of rejecting.
|
|
7
|
+
*
|
|
8
|
+
* @example Manual instrumentation
|
|
9
|
+
* ```typescript
|
|
10
|
+
* const trace = client.trace("handle-ticket", { userId: "user_42" });
|
|
11
|
+
*
|
|
12
|
+
* const gen = trace.generation("classify", { model: "claude-sonnet-4-6", input: [...] });
|
|
13
|
+
* gen.end({ output: { intent: "reset" }, usage: { input_tokens: 50, output_tokens: 12 } });
|
|
14
|
+
*
|
|
15
|
+
* const tool = trace.span("tool:reset_password", { input: { user_id: "42" } });
|
|
16
|
+
* tool.end({ output: { success: true } });
|
|
17
|
+
*
|
|
18
|
+
* await trace.end({ output: { resolution: "password_reset" } });
|
|
19
|
+
* ```
|
|
20
|
+
*
|
|
21
|
+
* @example Using `wrap()` for automatic span lifecycle
|
|
22
|
+
* ```typescript
|
|
23
|
+
* const result = await trace.wrap("tool:search", { input: { q: "..." } }, async (span) => {
|
|
24
|
+
* const data = await search(...);
|
|
25
|
+
* span.end({ output: data });
|
|
26
|
+
* return data;
|
|
27
|
+
* });
|
|
28
|
+
* // If the callback throws, the span auto-ends with level="ERROR" and the error re-throws.
|
|
29
|
+
* ```
|
|
30
|
+
*/
|
|
31
|
+
function now() {
|
|
32
|
+
return new Date().toISOString();
|
|
33
|
+
}
|
|
34
|
+
let _counter = 0;
|
|
35
|
+
function makeId() {
|
|
36
|
+
_counter += 1;
|
|
37
|
+
return `${Date.now().toString(36)}${(_counter).toString(36)}${Math.random().toString(36).slice(2, 8)}`;
|
|
38
|
+
}
|
|
39
|
+
export class Span {
|
|
40
|
+
id;
|
|
41
|
+
_trace;
|
|
42
|
+
_data;
|
|
43
|
+
_ended = false;
|
|
44
|
+
constructor(trace, name, opts = {}) {
|
|
45
|
+
this.id = makeId();
|
|
46
|
+
this._trace = trace;
|
|
47
|
+
this._data = {
|
|
48
|
+
id: this.id,
|
|
49
|
+
type: "span",
|
|
50
|
+
name,
|
|
51
|
+
parent_observation_id: opts.parentId ?? null,
|
|
52
|
+
start_time: now(),
|
|
53
|
+
input: opts.input ?? null,
|
|
54
|
+
metadata: opts.metadata ?? null,
|
|
55
|
+
level: opts.level ?? null,
|
|
56
|
+
};
|
|
57
|
+
trace._observations.push(this._data);
|
|
58
|
+
}
|
|
59
|
+
/** Create a child span nested under this span. */
|
|
60
|
+
span(name, opts = {}) {
|
|
61
|
+
return new Span(this._trace, name, { ...opts, parentId: this.id });
|
|
62
|
+
}
|
|
63
|
+
/** Create a child generation nested under this span. */
|
|
64
|
+
generation(name, opts = {}) {
|
|
65
|
+
return new Generation(this._trace, name, { ...opts, parentId: this.id });
|
|
66
|
+
}
|
|
67
|
+
/** Record a point-in-time event under this span. */
|
|
68
|
+
event(name, opts = {}) {
|
|
69
|
+
this._trace._observations.push({
|
|
70
|
+
id: makeId(),
|
|
71
|
+
type: "event",
|
|
72
|
+
name,
|
|
73
|
+
parent_observation_id: this.id,
|
|
74
|
+
start_time: now(),
|
|
75
|
+
input: opts.input ?? null,
|
|
76
|
+
metadata: opts.metadata ?? null,
|
|
77
|
+
level: opts.level ?? null,
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
/** Mark this span as complete. */
|
|
81
|
+
end(opts = {}) {
|
|
82
|
+
this._data.end_time = now();
|
|
83
|
+
if (opts.output !== undefined)
|
|
84
|
+
this._data.output = opts.output;
|
|
85
|
+
if (opts.statusMessage !== undefined)
|
|
86
|
+
this._data.status_message = opts.statusMessage;
|
|
87
|
+
if (opts.level !== undefined)
|
|
88
|
+
this._data.level = opts.level;
|
|
89
|
+
this._ended = true;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Run a callback within this span's lifecycle.
|
|
93
|
+
* Auto-ends the span when the callback completes.
|
|
94
|
+
* If the callback throws, the span is ended with `level="ERROR"` and the error re-throws.
|
|
95
|
+
*/
|
|
96
|
+
async wrap(fn) {
|
|
97
|
+
try {
|
|
98
|
+
const result = await fn(this);
|
|
99
|
+
if (!this._ended)
|
|
100
|
+
this.end();
|
|
101
|
+
return result;
|
|
102
|
+
}
|
|
103
|
+
catch (e) {
|
|
104
|
+
if (!this._ended) {
|
|
105
|
+
this.end({
|
|
106
|
+
statusMessage: e instanceof Error ? `${e.name}: ${e.message}` : String(e),
|
|
107
|
+
level: "ERROR",
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
throw e;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
export class Generation extends Span {
|
|
115
|
+
constructor(trace, name, opts = {}) {
|
|
116
|
+
super(trace, name, opts);
|
|
117
|
+
this._data.type = "generation";
|
|
118
|
+
if (opts.model)
|
|
119
|
+
this._data.model = opts.model;
|
|
120
|
+
}
|
|
121
|
+
/** Mark this generation as complete. */
|
|
122
|
+
end(opts = {}) {
|
|
123
|
+
this._data.end_time = now();
|
|
124
|
+
if (opts.output !== undefined)
|
|
125
|
+
this._data.output = opts.output;
|
|
126
|
+
if (opts.usage !== undefined)
|
|
127
|
+
this._data.usage = opts.usage;
|
|
128
|
+
if (opts.statusMessage !== undefined)
|
|
129
|
+
this._data.status_message = opts.statusMessage;
|
|
130
|
+
if (opts.level !== undefined)
|
|
131
|
+
this._data.level = opts.level;
|
|
132
|
+
this._ended = true;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
export class Trace {
|
|
136
|
+
/** @internal */
|
|
137
|
+
_observations = [];
|
|
138
|
+
_client;
|
|
139
|
+
_name;
|
|
140
|
+
_userId;
|
|
141
|
+
_sessionId;
|
|
142
|
+
_metadata;
|
|
143
|
+
_tags;
|
|
144
|
+
_traceId = null;
|
|
145
|
+
_flushed = false;
|
|
146
|
+
constructor(client, name, opts = {}) {
|
|
147
|
+
this._client = client;
|
|
148
|
+
this._name = name;
|
|
149
|
+
this._userId = opts.userId ?? null;
|
|
150
|
+
this._sessionId = opts.sessionId ?? null;
|
|
151
|
+
this._metadata = opts.metadata ?? null;
|
|
152
|
+
this._tags = opts.tags ? [...opts.tags] : [];
|
|
153
|
+
}
|
|
154
|
+
/** The server-assigned trace ID (available after `end()` resolves). */
|
|
155
|
+
get traceId() {
|
|
156
|
+
return this._traceId;
|
|
157
|
+
}
|
|
158
|
+
/** Create a top-level span in this trace. */
|
|
159
|
+
span(name, opts = {}) {
|
|
160
|
+
return new Span(this, name, opts);
|
|
161
|
+
}
|
|
162
|
+
/** Create a top-level generation (LLM call) in this trace. */
|
|
163
|
+
generation(name, opts = {}) {
|
|
164
|
+
return new Generation(this, name, opts);
|
|
165
|
+
}
|
|
166
|
+
/** Record a point-in-time event in this trace. */
|
|
167
|
+
event(name, opts = {}) {
|
|
168
|
+
this._observations.push({
|
|
169
|
+
id: makeId(),
|
|
170
|
+
type: "event",
|
|
171
|
+
name,
|
|
172
|
+
parent_observation_id: null,
|
|
173
|
+
start_time: now(),
|
|
174
|
+
input: opts.input ?? null,
|
|
175
|
+
metadata: opts.metadata ?? null,
|
|
176
|
+
level: opts.level ?? null,
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Run a callback within this trace's lifecycle.
|
|
181
|
+
* Auto-flushes the trace when the callback completes.
|
|
182
|
+
*/
|
|
183
|
+
async wrap(fn) {
|
|
184
|
+
try {
|
|
185
|
+
const result = await fn(this);
|
|
186
|
+
if (!this._flushed)
|
|
187
|
+
await this.end();
|
|
188
|
+
return result;
|
|
189
|
+
}
|
|
190
|
+
catch (e) {
|
|
191
|
+
if (!this._flushed) {
|
|
192
|
+
await this.end({
|
|
193
|
+
output: { error: e instanceof Error ? `${e.name}: ${e.message}` : String(e) },
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
throw e;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Flush the trace to the Ashr Labs backend.
|
|
201
|
+
*
|
|
202
|
+
* **Never rejects.** If the backend is unreachable, logs the error
|
|
203
|
+
* and resolves with `{ status: "error", message: "..." }`.
|
|
204
|
+
*/
|
|
205
|
+
async end(opts = {}) {
|
|
206
|
+
this._flushed = true;
|
|
207
|
+
const payload = {
|
|
208
|
+
trace: {
|
|
209
|
+
name: this._name,
|
|
210
|
+
user_id: this._userId,
|
|
211
|
+
session_id: this._sessionId,
|
|
212
|
+
metadata: this._metadata,
|
|
213
|
+
tags: this._tags,
|
|
214
|
+
observations: this._observations,
|
|
215
|
+
...(opts.output !== undefined ? { output: opts.output } : {}),
|
|
216
|
+
},
|
|
217
|
+
};
|
|
218
|
+
try {
|
|
219
|
+
const response = await this._client._makeRequest("ingest_observability_trace", payload);
|
|
220
|
+
this._traceId = response.trace_id ?? null;
|
|
221
|
+
return response;
|
|
222
|
+
}
|
|
223
|
+
catch (e) {
|
|
224
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
225
|
+
console.warn(`[ashr_labs] Failed to flush trace "${this._name}": ${message}`);
|
|
226
|
+
return { status: "error", message };
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|