npm - ashr-labs - Versions diffs - 0.4.2 → 0.4.3 - Mend

ashr-labs 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/client.d.ts CHANGED Viewed

@@ -18,7 +18,116 @@ export declare class AshrLabsClient {
     deleteRun(runId: number): Promise<Record<string, unknown>>;
     getRun(runId: number): Promise<Record<string, unknown>>;
     listRuns(datasetId?: number | null, tenantId?: number | null, limit?: number): Promise<Record<string, unknown>>;
+    /**
+     * Start a new observability trace for a production agent interaction.
+     *
+     * Returns a `Trace` object. Add spans, generations, and events to it,
+     * then call `await trace.end()` to flush to the Ashr Labs backend.
+     *
+     * Requires the `observability` feature flag to be enabled.
+     *
+     * @example
+     * ```typescript
+     * const trace = client.trace("support-chat", { userId: "user_42" });
+     * const gen = trace.generation("respond", { model: "claude-sonnet-4-6", input: messages });
+     * gen.end({ output: reply, usage: { input_tokens: 100, output_tokens: 50 } });
+     * await trace.end({ output: { resolution: "resolved" } });
+     * ```
+     */
+    trace(name: string, opts?: {
+        userId?: string;
+        sessionId?: string;
+        metadata?: Record<string, unknown>;
+        tags?: string[];
+    }): import("./tracing.js").Trace;
+    /**
+     * List observability traces for the current tenant.
+     *
+     * Requires the `observability` feature flag to be enabled.
+     */
+    listObservabilityTraces(opts?: {
+        userId?: string;
+        sessionId?: string;
+        limit?: number;
+        page?: number;
+    }): Promise<Record<string, unknown>>;
+    /**
+     * Get a single observability trace with full span/generation detail.
+     *
+     * Requires the `observability` feature flag to be enabled.
+     */
+    getObservabilityTrace(traceId: string): Promise<Record<string, unknown>>;
+    /** Get observability analytics: overview, tool performance, model usage. */
+    getObservabilityAnalytics(days?: number): Promise<Record<string, unknown>>;
+    /** Get traces with errors. */
+    getObservabilityErrors(opts?: {
+        days?: number;
+        limit?: number;
+        page?: number;
+    }): Promise<Record<string, unknown>>;
+    /** Get traces with tool call failures. */
+    getObservabilityToolErrors(opts?: {
+        days?: number;
+        limit?: number;
+        page?: number;
+    }): Promise<Record<string, unknown>>;
     private static _validateConfigStructure;
+    /**
+     * Create a new dataset generation request.
+     *
+     * The `request` object describes your agent and what test scenarios to generate.
+     *
+     * **Required sections:**
+     * - `agent` — At least one of `name`, `description`, or `system_prompt`.
+     *   Include `tools` here (with `name`, `description`, `parameters`) so the
+     *   generator creates scenarios with tool call expectations.
+     *   Include `accepted_inputs` specifying which input types your agent supports.
+     *   Allowed input keys: `text`, `audio`, `file`, `image`, `video`, `conversation`.
+     * - `context` — At least one of `domain`, `use_case`, or `scenario_context`.
+     * - `generation_options` — Controls what assets to generate.
+     *   Keys: `scenario_count`, `generate_audio`, `generate_files`, `generate_images`,
+     *   `generate_videos`, `generate_simulations`.
+     *
+     * **Optional sections:** `test_config`, `metadata`
+     *
+     * @example
+     * ```typescript
+     * const req = await client.createRequest("Loan Agent Eval", {
+     *   agent: {
+     *     name: "QuickLend Loan Officer",
+     *     description: "Helps applicants check credit and submit applications",
+     *     system_prompt: "You are a professional loan officer.",
+     *     tools: [
+     *       {
+     *         name: "check_credit_score",
+     *         description: "Pull applicant credit score and history",
+     *         parameters: {
+     *           type: "object",
+     *           required: ["applicant_id"],
+     *           properties: { applicant_id: { type: "string" } },
+     *         },
+     *       },
+     *     ],
+     *     accepted_inputs: { text: true, audio: false, file: false, image: false, video: false },
+     *   },
+     *   context: {
+     *     domain: "financial services",
+     *     use_case: "Applicants inquiring about loan eligibility and rates",
+     *     scenario_context: "A digital lending platform called QuickLend Financial",
+     *   },
+     *   generation_options: {
+     *     scenario_count: 5,
+     *     generate_audio: false,
+     *     generate_files: false,
+     *     generate_simulations: false,
+     *   },
+     *   test_config: {
+     *     num_variations: 5,
+     *     coverage: { happy_path: true, edge_cases: true, error_handling: true },
+     *   },
+     * });
+     * ```
+     */
     createRequest(requestName: string, request: Record<string, unknown>, requestInputSchema?: Record<string, unknown> | null, tenantId?: number | null, requestorId?: number | null): Promise<Record<string, unknown>>;
     getRequest(requestId: number): Promise<Record<string, unknown>>;
     listRequests(tenantId?: number | null, status?: string | null, limit?: number, cursor?: number | null): Promise<Record<string, unknown>>;
@@ -33,6 +142,20 @@ export declare class AshrLabsClient {
      * from agent name/description.
      */
     private static _enrichConfig;
+    /**
+     * Generate a dataset: creates a request, waits for completion, and returns the dataset.
+     *
+     * **Prefer reusing existing datasets** with `EvalRunner.fromDataset()` instead of
+     * generating new ones each time. Only generate a new dataset when the agent's tools,
+     * inputs, or domain have changed.
+     *
+     * @param requestName - A name/title for the request.
+     * @param config - Generation config (same structure as `createRequest`).
+     * @param requestInputSchema - Optional JSON Schema. If omitted, tools are auto-populated from `config.agent.tools`.
+     * @param timeout - Max seconds to wait for generation (default 600).
+     * @param pollInterval - Seconds between status checks (default 5).
+     * @returns `[datasetId, datasetSource]` tuple.
+     */
     generateDataset(requestName: string, config: Record<string, unknown>, requestInputSchema?: Record<string, unknown> | null, timeout?: number, pollInterval?: number): Promise<[number, Record<string, unknown>]>;
     toString(): string;
 }

package/dist/client.js CHANGED Viewed

@@ -156,6 +156,76 @@ export class AshrLabsClient {
         return this._makeRequest("list_runs", params);
     }
     // =========================================================================
+    // Observability — Production Agent Tracing
+    // =========================================================================
+    /**
+     * Start a new observability trace for a production agent interaction.
+     *
+     * Returns a `Trace` object. Add spans, generations, and events to it,
+     * then call `await trace.end()` to flush to the Ashr Labs backend.
+     *
+     * Requires the `observability` feature flag to be enabled.
+     *
+     * @example
+     * ```typescript
+     * const trace = client.trace("support-chat", { userId: "user_42" });
+     * const gen = trace.generation("respond", { model: "claude-sonnet-4-6", input: messages });
+     * gen.end({ output: reply, usage: { input_tokens: 100, output_tokens: 50 } });
+     * await trace.end({ output: { resolution: "resolved" } });
+     * ```
+     */
+    trace(name, opts = {}) {
+        // eslint-disable-next-line @typescript-eslint/no-require-imports
+        const { Trace } = require("./tracing.js");
+        return new Trace(this, name, opts);
+    }
+    /**
+     * List observability traces for the current tenant.
+     *
+     * Requires the `observability` feature flag to be enabled.
+     */
+    async listObservabilityTraces(opts = {}) {
+        const params = {
+            limit: opts.limit ?? 50,
+            page: opts.page ?? 1,
+        };
+        if (opts.userId != null)
+            params.user_id = opts.userId;
+        if (opts.sessionId != null)
+            params.session_id = opts.sessionId;
+        return this._makeRequest("list_observability_traces", params);
+    }
+    /**
+     * Get a single observability trace with full span/generation detail.
+     *
+     * Requires the `observability` feature flag to be enabled.
+     */
+    async getObservabilityTrace(traceId) {
+        return this._makeRequest("get_observability_trace", {
+            trace_id: traceId,
+        });
+    }
+    /** Get observability analytics: overview, tool performance, model usage. */
+    async getObservabilityAnalytics(days = 7) {
+        return this._makeRequest("get_observability_analytics", { days });
+    }
+    /** Get traces with errors. */
+    async getObservabilityErrors(opts = {}) {
+        return this._makeRequest("get_observability_errors", {
+            days: opts.days ?? 7,
+            limit: opts.limit ?? 50,
+            page: opts.page ?? 1,
+        });
+    }
+    /** Get traces with tool call failures. */
+    async getObservabilityToolErrors(opts = {}) {
+        return this._makeRequest("get_observability_tool_errors", {
+            days: opts.days ?? 7,
+            limit: opts.limit ?? 50,
+            page: opts.page ?? 1,
+        });
+    }
+    // =========================================================================
     // Request Operations
     // =========================================================================
     static _validateConfigStructure(config) {
@@ -185,6 +255,62 @@ export class AshrLabsClient {
             throw new ValidationError("config.context must include at least one of: domain, use_case, scenario_context");
         }
     }
+    /**
+     * Create a new dataset generation request.
+     *
+     * The `request` object describes your agent and what test scenarios to generate.
+     *
+     * **Required sections:**
+     * - `agent` — At least one of `name`, `description`, or `system_prompt`.
+     *   Include `tools` here (with `name`, `description`, `parameters`) so the
+     *   generator creates scenarios with tool call expectations.
+     *   Include `accepted_inputs` specifying which input types your agent supports.
+     *   Allowed input keys: `text`, `audio`, `file`, `image`, `video`, `conversation`.
+     * - `context` — At least one of `domain`, `use_case`, or `scenario_context`.
+     * - `generation_options` — Controls what assets to generate.
+     *   Keys: `scenario_count`, `generate_audio`, `generate_files`, `generate_images`,
+     *   `generate_videos`, `generate_simulations`.
+     *
+     * **Optional sections:** `test_config`, `metadata`
+     *
+     * @example
+     * ```typescript
+     * const req = await client.createRequest("Loan Agent Eval", {
+     *   agent: {
+     *     name: "QuickLend Loan Officer",
+     *     description: "Helps applicants check credit and submit applications",
+     *     system_prompt: "You are a professional loan officer.",
+     *     tools: [
+     *       {
+     *         name: "check_credit_score",
+     *         description: "Pull applicant credit score and history",
+     *         parameters: {
+     *           type: "object",
+     *           required: ["applicant_id"],
+     *           properties: { applicant_id: { type: "string" } },
+     *         },
+     *       },
+     *     ],
+     *     accepted_inputs: { text: true, audio: false, file: false, image: false, video: false },
+     *   },
+     *   context: {
+     *     domain: "financial services",
+     *     use_case: "Applicants inquiring about loan eligibility and rates",
+     *     scenario_context: "A digital lending platform called QuickLend Financial",
+     *   },
+     *   generation_options: {
+     *     scenario_count: 5,
+     *     generate_audio: false,
+     *     generate_files: false,
+     *     generate_simulations: false,
+     *   },
+     *   test_config: {
+     *     num_variations: 5,
+     *     coverage: { happy_path: true, edge_cases: true, error_handling: true },
+     *   },
+     * });
+     * ```
+     */
     async createRequest(requestName, request, requestInputSchema, tenantId, requestorId) {
         AshrLabsClient._validateConfigStructure(request);
         if (requestInputSchema == null) {
@@ -305,6 +431,20 @@ export class AshrLabsClient {
         }
         return out;
     }
+    /**
+     * Generate a dataset: creates a request, waits for completion, and returns the dataset.
+     *
+     * **Prefer reusing existing datasets** with `EvalRunner.fromDataset()` instead of
+     * generating new ones each time. Only generate a new dataset when the agent's tools,
+     * inputs, or domain have changed.
+     *
+     * @param requestName - A name/title for the request.
+     * @param config - Generation config (same structure as `createRequest`).
+     * @param requestInputSchema - Optional JSON Schema. If omitted, tools are auto-populated from `config.agent.tools`.
+     * @param timeout - Max seconds to wait for generation (default 600).
+     * @param pollInterval - Seconds between status checks (default 5).
+     * @returns `[datasetId, datasetSource]` tuple.
+     */
     async generateDataset(requestName, config, requestInputSchema, timeout = 600, pollInterval = 5) {
         const enriched = AshrLabsClient._enrichConfig(config);
         const req = await this.createRequest(requestName, enriched, requestInputSchema);

package/dist/index.d.ts CHANGED Viewed

@@ -1,6 +1,9 @@
 export { AshrLabsClient } from "./client.js";
 export { AshrLabsError, AuthenticationError, AuthorizationError, NotFoundError, ValidationError, RateLimitError, ServerError, } from "./exceptions.js";
-export type { User, Tenant, Session, Dataset, Run, Request, APIKey, ToolCall, ExpectedResponse, Action, Scenario, } from "./models.js";
+export type { User, Tenant, Session, Dataset, Run, Request, APIKey, ToolCall, ExpectedResponse, Action, Scenario, ObservabilityObservation, ObservabilityTrace, VmLogEntry, VmStream, KernelViewport, KernelActionData, KernelEventData, KernelVmMetadata, KernelVmStream, } from "./models.js";
+export { KERNEL_ACTION_TYPES, KERNEL_EVENT_TYPES, } from "./models.js";
+export type { KernelActionType, KernelEventType, } from "./models.js";
+export { Trace, Span, Generation } from "./tracing.js";
 export { RunBuilder, TestBuilder } from "./run-builder.js";
 export { stripMarkdown, tokenize, fuzzyStrMatch, extractToolArgs, compareToolArgs, textSimilarity, } from "./comparators.js";
 export { EvalRunner } from "./eval.js";

package/dist/index.js CHANGED Viewed

@@ -1,5 +1,7 @@
 export { AshrLabsClient } from "./client.js";
 export { AshrLabsError, AuthenticationError, AuthorizationError, NotFoundError, ValidationError, RateLimitError, ServerError, } from "./exceptions.js";
+export { KERNEL_ACTION_TYPES, KERNEL_EVENT_TYPES, } from "./models.js";
+export { Trace, Span, Generation } from "./tracing.js";
 export { RunBuilder, TestBuilder } from "./run-builder.js";
 export { stripMarkdown, tokenize, fuzzyStrMatch, extractToolArgs, compareToolArgs, textSimilarity, } from "./comparators.js";
 export { EvalRunner } from "./eval.js";

package/dist/models.d.ts CHANGED Viewed

@@ -47,6 +47,35 @@ export interface Run {
     runner?: number;
     result?: Record<string, unknown>;
 }
+export interface ObservabilityObservation {
+    id?: string;
+    name?: string;
+    type?: string;
+    parent_observation_id?: string | null;
+    input?: unknown | null;
+    output?: unknown | null;
+    metadata?: Record<string, unknown> | null;
+    model?: string | null;
+    usage?: {
+        input_tokens?: number;
+        output_tokens?: number;
+    } | null;
+    level?: "DEBUG" | "DEFAULT" | "WARNING" | "ERROR" | null;
+    status_message?: string | null;
+    start_time?: string | null;
+    end_time?: string | null;
+}
+export interface ObservabilityTrace {
+    id?: string;
+    name?: string;
+    user_id?: string | null;
+    session_id?: string | null;
+    metadata?: Record<string, unknown> | null;
+    tags?: string[];
+    created_at?: string | null;
+    output?: unknown | null;
+    observations?: ObservabilityObservation[];
+}
 export interface Request {
     id?: number;
     created_at?: string;
@@ -73,6 +102,76 @@ export interface RequestsListResponse extends ListResponse {
 export interface APIKeysListResponse extends ListResponse {
     api_keys: APIKey[];
 }
+export interface VmLogEntry {
+    ts?: number;
+    type?: string;
+    data?: Record<string, unknown>;
+}
+export interface KernelViewport {
+    width?: number;
+    height?: number;
+}
+export declare const KERNEL_ACTION_TYPES: readonly ["click_mouse", "move_mouse", "drag_mouse", "type_text", "press_key", "scroll", "screenshot"];
+export type KernelActionType = (typeof KERNEL_ACTION_TYPES)[number];
+export declare const KERNEL_EVENT_TYPES: readonly ["navigation", "log", "error", "invocation_state", "console", "network"];
+export type KernelEventType = (typeof KERNEL_EVENT_TYPES)[number];
+/** Data payload for a Kernel computer control action. */
+export interface KernelActionData {
+    x?: number;
+    y?: number;
+    button?: string;
+    click_type?: string;
+    num_clicks?: number;
+    smooth?: boolean;
+    path?: number[][];
+    text?: string;
+    delay?: number;
+    keys?: string[];
+    duration?: number;
+    hold_keys?: string[];
+    delta_x?: number;
+    delta_y?: number;
+    format?: string;
+    s3_key?: string;
+    duration_ms?: number;
+}
+/** Data payload for a Kernel event. */
+export interface KernelEventData {
+    url?: string;
+    message?: string;
+    level?: string;
+    code?: string;
+    details?: Record<string, unknown>[];
+    method?: string;
+    status?: number;
+    invocation_id?: string;
+    action_name?: string;
+    status_reason?: string;
+    output?: string;
+}
+export interface KernelVmMetadata {
+    live_view_url?: string;
+    cdp_ws_url?: string;
+    replay_id?: string;
+    replay_view_url?: string;
+    headless?: boolean;
+    stealth?: boolean;
+    viewport?: KernelViewport;
+}
+export interface KernelVmStream {
+    provider: "kernel";
+    session_id?: string;
+    duration_ms?: number;
+    logs?: VmLogEntry[];
+    metadata?: KernelVmMetadata;
+}
+export interface VmStream {
+    provider: string;
+    session_id?: string;
+    duration_ms?: number;
+    logs?: VmLogEntry[];
+    metadata?: Record<string, unknown>;
+}
 export interface ToolCall {
     name?: string;
     arguments_json?: string;

package/dist/models.js CHANGED Viewed

@@ -1 +1,20 @@
-export {};
+// ---- Kernel action types (computer control API) ----
+// Map to POST /browsers/{id}/computer/* endpoints.
+export const KERNEL_ACTION_TYPES = [
+    "click_mouse", // {x, y, button?, click_type?, num_clicks?}
+    "move_mouse", // {x, y, duration_ms?, smooth?}
+    "drag_mouse", // {path: [[x,y],...], button?, smooth?, duration_ms?}
+    "type_text", // {text, delay?}
+    "press_key", // {keys: string[], duration?, hold_keys?}
+    "scroll", // {x, y, delta_x?, delta_y?}
+    "screenshot", // {format?} — result may include s3_key or base64
+];
+// ---- Kernel event types (SSE streams + navigation) ----
+export const KERNEL_EVENT_TYPES = [
+    "navigation", // {url} — page navigation
+    "log", // {message} — from GET /browsers/{id}/logs SSE
+    "error", // {code, message, details?} — ErrorEvent
+    "invocation_state", // {invocation_id, status, action_name, output?}
+    "console", // {level, message} — browser console output
+    "network", // {method, url, status} — HTTP request observed
+];

package/dist/run-builder.d.ts CHANGED Viewed

@@ -6,12 +6,43 @@ export declare class TestBuilder {
     private _completedAt;
     private _actionResults;
     private _nextActionIndex;
+    private _vmStream;
     constructor(testId: string);
+    /** The test ID (matches the scenario ID from the dataset). */
+    get test_id(): string;
     start(): this;
     addUserFile(filePath: string, description: string, actionIndex?: number): this;
     addUserText(text: string, description: string, actionIndex?: number): this;
     addToolCall(expected: Record<string, unknown>, actual: Record<string, unknown>, matchStatus: string, divergenceNotes?: string | null, actionIndex?: number): this;
     addAgentResponse(expectedResponse: Record<string, unknown>, actualResponse: Record<string, unknown>, matchStatus: string, semanticSimilarity?: number | null, divergenceNotes?: string | null, actionIndex?: number): this;
+    /**
+     * Attach VM session logs to this test.
+     */
+    setVmStream(provider: string, opts?: {
+        sessionId?: string;
+        durationMs?: number;
+        logs?: Record<string, unknown>[];
+        metadata?: Record<string, unknown>;
+    }): this;
+    /**
+     * Attach a Kernel browser session to this test.
+     * Metadata fields map to Kernel's browser API response
+     * (see https://www.kernel.sh/docs).
+     */
+    setKernelVm(sessionId: string, opts?: {
+        durationMs?: number;
+        logs?: Record<string, unknown>[];
+        liveViewUrl?: string;
+        cdpWsUrl?: string;
+        replayId?: string;
+        replayViewUrl?: string;
+        headless?: boolean;
+        stealth?: boolean;
+        viewport?: {
+            width: number;
+            height: number;
+        };
+    }): this;
     complete(status?: string): this;
     build(): Record<string, unknown>;
     private _resolveIndex;
@@ -22,6 +53,8 @@ export declare class RunBuilder {
     private _completedAt;
     /** @internal */
     _tests: TestBuilder[];
+    /** The list of tests in this run. Use this to attach VM streams after eval. */
+    get tests(): TestBuilder[];
     start(): this;
     addTest(testId: string): TestBuilder;
     complete(status?: string): this;

package/dist/run-builder.js CHANGED Viewed

@@ -85,9 +85,14 @@ export class TestBuilder {
     _completedAt = null;
     _actionResults = [];
     _nextActionIndex = 0;
+    _vmStream = null;
     constructor(testId) {
         this._testId = testId;
     }
+    /** The test ID (matches the scenario ID from the dataset). */
+    get test_id() {
+        return this._testId;
+    }
     start() {
         this._status = "running";
         this._startedAt = now();
@@ -156,6 +161,50 @@ export class TestBuilder {
         this._actionResults.push(result);
         return this;
     }
+    /**
+     * Attach VM session logs to this test.
+     */
+    setVmStream(provider, opts) {
+        const vm = { provider };
+        if (opts?.sessionId != null)
+            vm.session_id = opts.sessionId;
+        if (opts?.durationMs != null)
+            vm.duration_ms = opts.durationMs;
+        if (opts?.logs != null)
+            vm.logs = opts.logs;
+        if (opts?.metadata != null)
+            vm.metadata = opts.metadata;
+        this._vmStream = vm;
+        return this;
+    }
+    /**
+     * Attach a Kernel browser session to this test.
+     * Metadata fields map to Kernel's browser API response
+     * (see https://www.kernel.sh/docs).
+     */
+    setKernelVm(sessionId, opts) {
+        const metadata = {};
+        if (opts?.liveViewUrl != null)
+            metadata.live_view_url = opts.liveViewUrl;
+        if (opts?.cdpWsUrl != null)
+            metadata.cdp_ws_url = opts.cdpWsUrl;
+        if (opts?.replayId != null)
+            metadata.replay_id = opts.replayId;
+        if (opts?.replayViewUrl != null)
+            metadata.replay_view_url = opts.replayViewUrl;
+        if (opts?.headless != null)
+            metadata.headless = opts.headless;
+        if (opts?.stealth != null)
+            metadata.stealth = opts.stealth;
+        if (opts?.viewport != null)
+            metadata.viewport = opts.viewport;
+        return this.setVmStream("kernel", {
+            sessionId,
+            durationMs: opts?.durationMs,
+            logs: opts?.logs,
+            metadata: Object.keys(metadata).length > 0 ? metadata : undefined,
+        });
+    }
     complete(status = "completed") {
         this._status = status;
         this._completedAt = now();
@@ -171,6 +220,8 @@ export class TestBuilder {
             result.started_at = this._startedAt;
         if (this._completedAt)
             result.completed_at = this._completedAt;
+        if (this._vmStream)
+            result.vm_stream = this._vmStream;
         return result;
     }
     _resolveIndex(explicit) {
@@ -189,6 +240,10 @@ export class RunBuilder {
     _completedAt = null;
     /** @internal */
     _tests = [];
+    /** The list of tests in this run. Use this to attach VM streams after eval. */
+    get tests() {
+        return this._tests;
+    }
     start() {
         this._status = "running";
         this._startedAt = now();

package/dist/tracing.d.ts ADDED Viewed

@@ -0,0 +1,160 @@
+/**
+ * Production agent tracing for Ashr Labs Observability.
+ *
+ * **Production-safe:** tracing never throws or interferes with your agent.
+ * If the backend is unreachable, `trace.end()` resolves with an error object
+ * instead of rejecting.
+ *
+ * @example Manual instrumentation
+ * ```typescript
+ * const trace = client.trace("handle-ticket", { userId: "user_42" });
+ *
+ * const gen = trace.generation("classify", { model: "claude-sonnet-4-6", input: [...] });
+ * gen.end({ output: { intent: "reset" }, usage: { input_tokens: 50, output_tokens: 12 } });
+ *
+ * const tool = trace.span("tool:reset_password", { input: { user_id: "42" } });
+ * tool.end({ output: { success: true } });
+ *
+ * await trace.end({ output: { resolution: "password_reset" } });
+ * ```
+ *
+ * @example Using `wrap()` for automatic span lifecycle
+ * ```typescript
+ * const result = await trace.wrap("tool:search", { input: { q: "..." } }, async (span) => {
+ *   const data = await search(...);
+ *   span.end({ output: data });
+ *   return data;
+ * });
+ * // If the callback throws, the span auto-ends with level="ERROR" and the error re-throws.
+ * ```
+ */
+import type { AshrLabsClient } from "./client.js";
+interface ObservationData {
+    id: string;
+    type: "span" | "generation" | "event";
+    name: string;
+    parent_observation_id: string | null;
+    start_time: string;
+    end_time?: string | null;
+    input?: unknown | null;
+    output?: unknown | null;
+    metadata?: Record<string, unknown> | null;
+    model?: string | null;
+    usage?: {
+        input_tokens?: number;
+        output_tokens?: number;
+    } | null;
+    level?: string | null;
+    status_message?: string | null;
+}
+export declare class Span {
+    readonly id: string;
+    protected _trace: Trace;
+    protected _data: ObservationData;
+    protected _ended: boolean;
+    constructor(trace: Trace, name: string, opts?: {
+        parentId?: string | null;
+        input?: unknown;
+        metadata?: Record<string, unknown>;
+        level?: string;
+    });
+    /** Create a child span nested under this span. */
+    span(name: string, opts?: {
+        input?: unknown;
+        metadata?: Record<string, unknown>;
+    }): Span;
+    /** Create a child generation nested under this span. */
+    generation(name: string, opts?: {
+        model?: string;
+        input?: unknown;
+        metadata?: Record<string, unknown>;
+    }): Generation;
+    /** Record a point-in-time event under this span. */
+    event(name: string, opts?: {
+        input?: unknown;
+        metadata?: Record<string, unknown>;
+        level?: string;
+    }): void;
+    /** Mark this span as complete. */
+    end(opts?: {
+        output?: unknown;
+        statusMessage?: string;
+        level?: string;
+    }): void;
+    /**
+     * Run a callback within this span's lifecycle.
+     * Auto-ends the span when the callback completes.
+     * If the callback throws, the span is ended with `level="ERROR"` and the error re-throws.
+     */
+    wrap<T>(fn: (span: this) => T | Promise<T>): Promise<T>;
+}
+export declare class Generation extends Span {
+    constructor(trace: Trace, name: string, opts?: {
+        parentId?: string | null;
+        model?: string;
+        input?: unknown;
+        metadata?: Record<string, unknown>;
+    });
+    /** Mark this generation as complete. */
+    end(opts?: {
+        output?: unknown;
+        usage?: {
+            input_tokens?: number;
+            output_tokens?: number;
+        };
+        statusMessage?: string;
+        level?: string;
+    }): void;
+}
+export declare class Trace {
+    /** @internal */
+    _observations: ObservationData[];
+    private _client;
+    private _name;
+    private _userId;
+    private _sessionId;
+    private _metadata;
+    private _tags;
+    private _traceId;
+    private _flushed;
+    constructor(client: AshrLabsClient, name: string, opts?: {
+        userId?: string;
+        sessionId?: string;
+        metadata?: Record<string, unknown>;
+        tags?: string[];
+    });
+    /** The server-assigned trace ID (available after `end()` resolves). */
+    get traceId(): string | null;
+    /** Create a top-level span in this trace. */
+    span(name: string, opts?: {
+        input?: unknown;
+        metadata?: Record<string, unknown>;
+    }): Span;
+    /** Create a top-level generation (LLM call) in this trace. */
+    generation(name: string, opts?: {
+        model?: string;
+        input?: unknown;
+        metadata?: Record<string, unknown>;
+    }): Generation;
+    /** Record a point-in-time event in this trace. */
+    event(name: string, opts?: {
+        input?: unknown;
+        metadata?: Record<string, unknown>;
+        level?: string;
+    }): void;
+    /**
+     * Run a callback within this trace's lifecycle.
+     * Auto-flushes the trace when the callback completes.
+     */
+    wrap<T>(fn: (trace: this) => T | Promise<T>): Promise<T>;
+    /**
+     * Flush the trace to the Ashr Labs backend.
+     *
+     * **Never rejects.** If the backend is unreachable, logs the error
+     * and resolves with `{ status: "error", message: "..." }`.
+     */
+    end(opts?: {
+        output?: unknown;
+    }): Promise<Record<string, unknown>>;
+}
+export {};

package/dist/tracing.js ADDED Viewed

@@ -0,0 +1,229 @@
+/**
+ * Production agent tracing for Ashr Labs Observability.
+ *
+ * **Production-safe:** tracing never throws or interferes with your agent.
+ * If the backend is unreachable, `trace.end()` resolves with an error object
+ * instead of rejecting.
+ *
+ * @example Manual instrumentation
+ * ```typescript
+ * const trace = client.trace("handle-ticket", { userId: "user_42" });
+ *
+ * const gen = trace.generation("classify", { model: "claude-sonnet-4-6", input: [...] });
+ * gen.end({ output: { intent: "reset" }, usage: { input_tokens: 50, output_tokens: 12 } });
+ *
+ * const tool = trace.span("tool:reset_password", { input: { user_id: "42" } });
+ * tool.end({ output: { success: true } });
+ *
+ * await trace.end({ output: { resolution: "password_reset" } });
+ * ```
+ *
+ * @example Using `wrap()` for automatic span lifecycle
+ * ```typescript
+ * const result = await trace.wrap("tool:search", { input: { q: "..." } }, async (span) => {
+ *   const data = await search(...);
+ *   span.end({ output: data });
+ *   return data;
+ * });
+ * // If the callback throws, the span auto-ends with level="ERROR" and the error re-throws.
+ * ```
+ */
+function now() {
+    return new Date().toISOString();
+}
+let _counter = 0;
+function makeId() {
+    _counter += 1;
+    return `${Date.now().toString(36)}${(_counter).toString(36)}${Math.random().toString(36).slice(2, 8)}`;
+}
+export class Span {
+    id;
+    _trace;
+    _data;
+    _ended = false;
+    constructor(trace, name, opts = {}) {
+        this.id = makeId();
+        this._trace = trace;
+        this._data = {
+            id: this.id,
+            type: "span",
+            name,
+            parent_observation_id: opts.parentId ?? null,
+            start_time: now(),
+            input: opts.input ?? null,
+            metadata: opts.metadata ?? null,
+            level: opts.level ?? null,
+        };
+        trace._observations.push(this._data);
+    }
+    /** Create a child span nested under this span. */
+    span(name, opts = {}) {
+        return new Span(this._trace, name, { ...opts, parentId: this.id });
+    }
+    /** Create a child generation nested under this span. */
+    generation(name, opts = {}) {
+        return new Generation(this._trace, name, { ...opts, parentId: this.id });
+    }
+    /** Record a point-in-time event under this span. */
+    event(name, opts = {}) {
+        this._trace._observations.push({
+            id: makeId(),
+            type: "event",
+            name,
+            parent_observation_id: this.id,
+            start_time: now(),
+            input: opts.input ?? null,
+            metadata: opts.metadata ?? null,
+            level: opts.level ?? null,
+        });
+    }
+    /** Mark this span as complete. */
+    end(opts = {}) {
+        this._data.end_time = now();
+        if (opts.output !== undefined)
+            this._data.output = opts.output;
+        if (opts.statusMessage !== undefined)
+            this._data.status_message = opts.statusMessage;
+        if (opts.level !== undefined)
+            this._data.level = opts.level;
+        this._ended = true;
+    }
+    /**
+     * Run a callback within this span's lifecycle.
+     * Auto-ends the span when the callback completes.
+     * If the callback throws, the span is ended with `level="ERROR"` and the error re-throws.
+     */
+    async wrap(fn) {
+        try {
+            const result = await fn(this);
+            if (!this._ended)
+                this.end();
+            return result;
+        }
+        catch (e) {
+            if (!this._ended) {
+                this.end({
+                    statusMessage: e instanceof Error ? `${e.name}: ${e.message}` : String(e),
+                    level: "ERROR",
+                });
+            }
+            throw e;
+        }
+    }
+}
+export class Generation extends Span {
+    constructor(trace, name, opts = {}) {
+        super(trace, name, opts);
+        this._data.type = "generation";
+        if (opts.model)
+            this._data.model = opts.model;
+    }
+    /** Mark this generation as complete. */
+    end(opts = {}) {
+        this._data.end_time = now();
+        if (opts.output !== undefined)
+            this._data.output = opts.output;
+        if (opts.usage !== undefined)
+            this._data.usage = opts.usage;
+        if (opts.statusMessage !== undefined)
+            this._data.status_message = opts.statusMessage;
+        if (opts.level !== undefined)
+            this._data.level = opts.level;
+        this._ended = true;
+    }
+}
+export class Trace {
+    /** @internal */
+    _observations = [];
+    _client;
+    _name;
+    _userId;
+    _sessionId;
+    _metadata;
+    _tags;
+    _traceId = null;
+    _flushed = false;
+    constructor(client, name, opts = {}) {
+        this._client = client;
+        this._name = name;
+        this._userId = opts.userId ?? null;
+        this._sessionId = opts.sessionId ?? null;
+        this._metadata = opts.metadata ?? null;
+        this._tags = opts.tags ? [...opts.tags] : [];
+    }
+    /** The server-assigned trace ID (available after `end()` resolves). */
+    get traceId() {
+        return this._traceId;
+    }
+    /** Create a top-level span in this trace. */
+    span(name, opts = {}) {
+        return new Span(this, name, opts);
+    }
+    /** Create a top-level generation (LLM call) in this trace. */
+    generation(name, opts = {}) {
+        return new Generation(this, name, opts);
+    }
+    /** Record a point-in-time event in this trace. */
+    event(name, opts = {}) {
+        this._observations.push({
+            id: makeId(),
+            type: "event",
+            name,
+            parent_observation_id: null,
+            start_time: now(),
+            input: opts.input ?? null,
+            metadata: opts.metadata ?? null,
+            level: opts.level ?? null,
+        });
+    }
+    /**
+     * Run a callback within this trace's lifecycle.
+     * Auto-flushes the trace when the callback completes.
+     */
+    async wrap(fn) {
+        try {
+            const result = await fn(this);
+            if (!this._flushed)
+                await this.end();
+            return result;
+        }
+        catch (e) {
+            if (!this._flushed) {
+                await this.end({
+                    output: { error: e instanceof Error ? `${e.name}: ${e.message}` : String(e) },
+                });
+            }
+            throw e;
+        }
+    }
+    /**
+     * Flush the trace to the Ashr Labs backend.
+     *
+     * **Never rejects.** If the backend is unreachable, logs the error
+     * and resolves with `{ status: "error", message: "..." }`.
+     */
+    async end(opts = {}) {
+        this._flushed = true;
+        const payload = {
+            trace: {
+                name: this._name,
+                user_id: this._userId,
+                session_id: this._sessionId,
+                metadata: this._metadata,
+                tags: this._tags,
+                observations: this._observations,
+                ...(opts.output !== undefined ? { output: opts.output } : {}),
+            },
+        };
+        try {
+            const response = await this._client._makeRequest("ingest_observability_trace", payload);
+            this._traceId = response.trace_id ?? null;
+            return response;
+        }
+        catch (e) {
+            const message = e instanceof Error ? e.message : String(e);
+            console.warn(`[ashr_labs] Failed to flush trace "${this._name}": ${message}`);
+            return { status: "error", message };
+        }
+    }
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ashr-labs",
-  "version": "0.4.2",
+  "version": "0.4.3",
   "description": "TypeScript SDK for the Ashr Labs API — agent testing & evaluation",
   "type": "module",
   "main": "./dist/index.js",