npm - @yul-labs/agent-relay - Versions diffs - 0.1.1 → 0.1.3 - Mend

@yul-labs/agent-relay 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -128,7 +128,9 @@ interface ApiDeciderOptions {
      * Default 2048. Reasoning models emit a long chain-of-thought before the JSON
      * answer, so a too-small cap (e.g. a few hundred) truncates them mid-thought,
      * leaving an empty `content` and an unparseable reply → safe-deny. Raise it
-     * (not lower) for verbose reasoning models.
+     * (not lower) for verbose reasoning models. Values below
+     * {@link MIN_API_MAX_TOKENS} are clamped UP to it — a cap that small can't fit
+     * even the JSON answer once a reasoning model has spent budget on its CoT.
      */
     maxTokens?: number;
     temperature?: number;
@@ -259,6 +261,58 @@ interface AgentRunInput {
     /** Arbitrary extra options interpreted by specific adapters (e.g. fake). */
     options?: Record<string, unknown>;
 }
+/**
+ * Resource usage for a run, exposed under `result.meta.usage`. Every field is
+ * optional. Token counts come from the agent's own session transcript/rollout
+ * JSONL (the AUTHORITATIVE, device-independent source — written on every run
+ * regardless of TUI/status-line settings); `contextPercent` /
+ * `subscriptionSessionCostUsd` are best-effort extras scraped from the TUI status
+ * line when it is enabled. `costUsd` is COMPUTED from the token counts × the
+ * model's list price (see core/pricing.ts). `source` records which path produced
+ * the token figures.
+ */
+interface AgentUsage {
+    /**
+     * Provenance of the token counts: "transcript" = parsed from the agent's
+     * session log (authoritative); "status-line" = scraped from the TUI (only
+     * present when the user has a status line that shows usage). Token fields are
+     * trustworthy only when `source` is "transcript".
+     */
+    source?: "transcript" | "status-line";
+    /** Model id when known, e.g. "claude-opus-4-8". */
+    model?: string;
+    /** Cumulative non-cached input/prompt tokens. */
+    inputTokens?: number;
+    /** Cumulative output/completion tokens. */
+    outputTokens?: number;
+    /** Cumulative tokens served from the prompt cache (cheap reads). */
+    cachedInputTokens?: number;
+    /** Cumulative tokens spent CREATING prompt-cache entries (Claude). */
+    cacheCreationTokens?: number;
+    /** Cumulative reasoning tokens, when the model reports them (Codex / o-series). */
+    reasoningTokens?: number;
+    /** Total tokens (the agent's own total when given, else the sum of the above). */
+    totalTokens?: number;
+    /**
+     * API-equivalent REFERENCE cost in USD, COMPUTED as Σ(tokens × the model's list
+     * price) — see core/pricing.ts. This is what the run WOULD bill via the API
+     * (or `claude -p` / the Agent SDK). agent-relay drives the agent's INTERACTIVE
+     * TUI, which on a Claude subscription is covered by the plan — so for those runs
+     * the real marginal cost is ~0 and this is a SHADOW cost, not the amount charged.
+     * `null` when the model's price is unknown (so it isn't confused with a real 0).
+     */
+    costUsd?: number | null;
+    /** Context-window usage as a percent, when surfaced by the status line. */
+    contextPercent?: number;
+    /**
+     * The NOMINAL session cost the agent prints in its status line ("Session $X").
+     * On Team/Max subscription seats this reads 0 / is not the API price — use
+     * {@link costUsd} for an actual cost estimate.
+     */
+    subscriptionSessionCostUsd?: number;
+    /** Raw status-line snippet the scraped extras came from (status-line only). */
+    raw?: string;
+}
 /** The terminal result an adapter returns from {@link AgentAdapter.run}. */
 interface AgentRunResult {
     /** Whether the adapter believes the run finished successfully. */
@@ -459,7 +513,9 @@ declare const defaultsSchema: z.ZodObject<{
 type RelayDefaults = z.infer<typeof defaultsSchema>;
 /** Resolve the effective approval mode (adapter override > defaults > legacy). */
 declare function resolveApprovalMode(defaults: RelayDefaults, adapter?: AdapterConfig): ApprovalMode;
-/** Resolve the effective sandbox level (adapter override > defaults > default). */
+/** Resolve the effective sandbox level (adapter override > defaults > default).
+ *  The project default is full bypass (`danger-full-access`) so unattended runs
+ *  "just work"; tighten it per `defaults.sandbox` / `adapters.<name>.sandbox`. */
 declare function resolveSandbox(defaults: RelayDefaults, adapter?: AdapterConfig): SandboxLevel;
 declare const hooksSchema: z.ZodObject<{
     /** Shell command run just before the agent starts. */
@@ -635,6 +691,27 @@ declare const configSchema: z.ZodEffects<z.ZodObject<{
         apiKey?: string | undefined;
         maxTokens?: number | undefined;
     }>>;
+    /** Override the built-in per-model token pricing (USD per 1M tokens). */
+    pricing: z.ZodOptional<z.ZodArray<z.ZodObject<{
+        /** Regex (case-insensitive) matched against the model id; first match wins. */
+        match: z.ZodString;
+        input: z.ZodNumber;
+        output: z.ZodNumber;
+        cacheWrite: z.ZodOptional<z.ZodNumber>;
+        cacheRead: z.ZodOptional<z.ZodNumber>;
+    }, "strict", z.ZodTypeAny, {
+        match: string;
+        input: number;
+        output: number;
+        cacheWrite?: number | undefined;
+        cacheRead?: number | undefined;
+    }, {
+        match: string;
+        input: number;
+        output: number;
+        cacheWrite?: number | undefined;
+        cacheRead?: number | undefined;
+    }>, "many">>;
     /** Optional shell-command lifecycle hooks. */
     hooks: z.ZodOptional<z.ZodObject<{
         /** Shell command run just before the agent starts. */
@@ -684,6 +761,13 @@ declare const configSchema: z.ZodEffects<z.ZodObject<{
         apiKey?: string | undefined;
         maxTokens?: number | undefined;
     } | undefined;
+    pricing?: {
+        match: string;
+        input: number;
+        output: number;
+        cacheWrite?: number | undefined;
+        cacheRead?: number | undefined;
+    }[] | undefined;
     hooks?: {
         onStart?: string | undefined;
         onComplete?: string | undefined;
@@ -724,6 +808,13 @@ declare const configSchema: z.ZodEffects<z.ZodObject<{
         apiKey?: string | undefined;
         maxTokens?: number | undefined;
     } | undefined;
+    pricing?: {
+        match: string;
+        input: number;
+        output: number;
+        cacheWrite?: number | undefined;
+        cacheRead?: number | undefined;
+    }[] | undefined;
     hooks?: {
         onStart?: string | undefined;
         onComplete?: string | undefined;
@@ -764,6 +855,13 @@ declare const configSchema: z.ZodEffects<z.ZodObject<{
         apiKey?: string | undefined;
         maxTokens?: number | undefined;
     } | undefined;
+    pricing?: {
+        match: string;
+        input: number;
+        output: number;
+        cacheWrite?: number | undefined;
+        cacheRead?: number | undefined;
+    }[] | undefined;
     hooks?: {
         onStart?: string | undefined;
         onComplete?: string | undefined;
@@ -804,6 +902,13 @@ declare const configSchema: z.ZodEffects<z.ZodObject<{
         apiKey?: string | undefined;
         maxTokens?: number | undefined;
     } | undefined;
+    pricing?: {
+        match: string;
+        input: number;
+        output: number;
+        cacheWrite?: number | undefined;
+        cacheRead?: number | undefined;
+    }[] | undefined;
     hooks?: {
         onStart?: string | undefined;
         onComplete?: string | undefined;
@@ -854,6 +959,64 @@ declare class SessionNotFoundError extends AgentRelayError {
     constructor(sessionId: string);
 }
+/**
+ * Per-model token pricing → an API-equivalent USD cost for a run's token usage.
+ *
+ * agent-relay reads exact token counts from the agent's transcript (see
+ * `claude-session.ts` / `codex-session.ts`), but the transcript carries no dollar
+ * figure, and the TUI "Session $" line is a NOMINAL/subscription number (often $0
+ * on Team/Max seats) — NOT the API-equivalent cost. So we compute
+ * `cost = Σ(tokens × list rate)` from a built-in price table (overridable via
+ * `config.pricing`). Rates are USD per 1M tokens and are LIST prices, so the
+ * result is an ESTIMATE — override the table when your actual billing differs.
+ *
+ * IMPORTANT: this is a REFERENCE (shadow) cost — what the run would bill via the
+ * API. agent-relay drives the agent's INTERACTIVE TUI, which on a Claude
+ * subscription is covered by the plan (the real marginal cost is ~0). Only the
+ * non-interactive paths (`claude -p` / the Agent SDK — and, from 2026-06-15, no
+ * longer counted against subscription limits) bill at these rates. So treat
+ * `costUsd` as "what this would cost via API", not necessarily what you're charged.
+ */
+interface ModelPricing {
+    /** USD per 1M input (prompt) tokens. */
+    input: number;
+    /** USD per 1M output (completion) tokens. */
+    output: number;
+    /** USD per 1M cache-CREATION tokens (defaults to `input` when unset). */
+    cacheWrite?: number;
+    /** USD per 1M cache-READ tokens (defaults to 0 when unset). */
+    cacheRead?: number;
+}
+interface PricingRule {
+    /** Matched (case-insensitive) against the model id — first match wins. */
+    match: RegExp;
+    pricing: ModelPricing;
+}
+/**
+ * Built-in Anthropic list prices (USD / 1M tokens). These are estimates that
+ * drift as pricing changes — pass `config.pricing` to override per model. Codex
+ * (OpenAI) models aren't listed here, so a Codex run reports `costUsd: null`
+ * unless you add a rule.
+ */
+declare const DEFAULT_PRICING: PricingRule[];
+/** Resolve the price for a model id (overrides first, then the built-in table). */
+declare function pricingForModel(model: string | undefined, overrides?: PricingRule[]): ModelPricing | undefined;
+/** Token usage shape needed to price a run. */
+interface CostUsageInput {
+    model?: string;
+    inputTokens?: number;
+    outputTokens?: number;
+    cacheCreationTokens?: number;
+    cachedInputTokens?: number;
+}
+/**
+ * API-equivalent USD cost for a run's token usage, or `null` when the model's
+ * price is unknown — so callers WARN instead of reporting a misleading `0`.
+ * Assumes Claude-style additive cache accounting (input excludes cached reads;
+ * cache reads are billed separately at the cheap `cacheRead` rate).
+ */
+declare function computeCostUsd(usage: CostUsageInput, overrides?: PricingRule[]): number | null;
 /** Session metadata persistence: create, save, load, list. */
 interface CreateSessionInput {
@@ -1289,6 +1452,13 @@ interface PtySessionOptions {
      * think/build is never mistaken for "done".
      */
     workingPattern?: RegExp;
+    /**
+     * Optional per-adapter scrape of the (ANSI-cleaned) screen into structured
+     * usage (tokens / context / cost). Called on each settle; the latest non-empty
+     * result is merged and attached to `result.meta.usage`. Vendor-specific, so the
+     * adapter supplies it — the session loop itself stays vendor-agnostic.
+     */
+    scrapeUsage?: (cleanedText: string) => AgentUsage | undefined;
     /** Keys to send to exit the TUI on completion (e.g. double Ctrl-C). */
     quitKeys?: string;
     /** Optional text to type once the UI is ready (for TUIs needing typed input). */
@@ -1355,6 +1525,13 @@ interface InteractiveAdapterConfig {
     setup?: (input: AgentRunInput) => SetupStep[] | undefined;
     detector?: PromptDetectorOptions;
     keymap?: PtyKeymap;
+    /**
+     * Optional per-adapter scrape of the agent's TUI status line into structured
+     * usage (tokens / context / cost), surfaced as `result.meta.usage`. Heuristic
+     * and vendor-specific, so it lives in the adapter, not the vendor-agnostic
+     * session loop. Omit it for adapters with no parseable status line.
+     */
+    scrapeUsage?: (cleanedText: string) => AgentUsage | undefined;
     completionPattern?: RegExp;
     completionIdleMs?: number;
     /** "Agent is working" indicator that suppresses completion (see PtySession). */
@@ -1387,28 +1564,46 @@ declare class InteractivePtyAdapter implements AgentAdapter {
  * AUTONOMY: by default Claude runs with `--dangerously-skip-permissions` so it
  * acts without per-action prompts. The {@link Decider} still handles the prompts
  * that appear anyway (the directory-trust menu, etc.). `approvalPolicy: "gated"`
- * uses `--permission-mode acceptEdits` so Claude asks more and the decider sees
- * those; `"readonly"` uses `--permission-mode plan`. The prompt is a positional
- * arg so the session starts immediately.
+ * uses `--permission-mode default` — Claude's normal interactive mode where it
+ * ASKS before each edit/command, so those approvals are routed to the decider.
+ * (NOT `acceptEdits`, which silently auto-approves edits and so never consults
+ * the decider on them.) `"readonly"` uses `--permission-mode plan`. The prompt is
+ * a positional arg so the session starts immediately.
  */
 interface ClaudeInteractiveOptions {
     command?: string;
     env?: Record<string, string>;
     now?: () => Date;
+    /** Override Claude's projects root (~/.claude/projects) — for tests. */
+    projectsDir?: string;
 }
 declare class ClaudeInteractiveAdapter extends InteractivePtyAdapter {
+    private readonly clock;
+    /** Override the projects root (~/.claude/projects) for tests. */
+    private readonly projectsDir?;
     constructor(opts?: ClaudeInteractiveOptions);
+    /**
+     * Run Claude, then read AUTHORITATIVE token usage from its session transcript
+     * (~/.claude/projects/<cwd>/<id>.jsonl) and surface it as `meta.usage`. This is
+     * device-independent — it works regardless of whether the user has a usage
+     * status line — and overwrites the best-effort status-line scrape's token
+     * figures while keeping its context%/cost extras. Best-effort: if no transcript
+     * is found, the status-line usage (if any) is left as-is.
+     */
+    run(input: AgentRunInput, ctx: AdapterRunContext): Promise<AgentRunResult>;
     static fromConfig(config: AdapterConfig): ClaudeInteractiveAdapter;
 }
 /**
  * Codex driven interactively in a PTY. The project's concept is PURE AUTONOMY:
- * by default Codex runs with `-a never` (never ask) within the chosen sandbox,
- * so it just works. The {@link Decider} still handles the prompts that appear
- * anyway (the directory-trust dialog, etc.). `approvalPolicy: "gated"` switches
- * Codex to `-a on-request` so the decider sees each action; `"readonly"` runs it
- * read-only. The prompt is a positional arg so the TUI starts immediately.
+ * by default Codex runs with `-a never` (never ask) and `-s danger-full-access`
+ * (full bypass — the project default), so it just works unattended. The
+ * {@link Decider} still handles the prompts that appear anyway (the directory-
+ * trust dialog, etc.). `approvalPolicy: "gated"` switches Codex to `-a on-request`
+ * so the decider sees each action; `"readonly"` runs it read-only. Tighten the
+ * sandbox with `defaults.sandbox` / `adapters.codex.sandbox` (e.g. workspace-write).
+ * The prompt is a positional arg so the TUI starts immediately.
  */
 interface CodexInteractiveOptions {
@@ -1424,11 +1619,12 @@ declare class CodexInteractiveAdapter extends InteractivePtyAdapter {
     private readonly sessionsDir?;
     constructor(opts?: CodexInteractiveOptions);
     /**
-     * Run Codex, then capture its NATIVE session id (the rollout UUID) for this
-     * cwd and attach it to the result's `sessionRef` so the runner persists it and
-     * a later resume can use `codex resume <id> "<prompt>"`. Capture is best-effort:
-     * if no rollout matches (or any I/O fails) the result is returned unchanged, so
-     * the run still resumes via the `--last` fallback.
+     * Run Codex, then read its rollout for this cwd to capture (a) the NATIVE
+     * session id (the rollout UUID) for `sessionRef` so a later resume can use
+     * `codex resume <id> "<prompt>"`, and (b) authoritative token usage for
+     * `meta.usage` (device-independent — from Codex's own log, not the TUI). Both
+     * are best-effort: if no rollout matches (or any I/O fails) the result is
+     * returned unchanged, so the run still resumes via the `--last` fallback.
      */
     run(input: AgentRunInput, ctx: AdapterRunContext): Promise<AgentRunResult>;
     static fromConfig(config: AdapterConfig): CodexInteractiveAdapter;
@@ -1695,4 +1891,4 @@ declare function cleanTerminalText(input: string): string;
 /** Return the last `n` non-empty lines of cleaned text. */
 declare function tailLines(text: string, n: number): string[];
-export { type AbortReason, type AdapterAvailability, type AdapterConfig, type AdapterFactory, type AdapterListItem, type AdapterMode, AdapterRegistry, type AdapterRunContext, type AgentAdapter, type AgentAdapterDefinition, type AgentErrorInfo, type AgentEvent, type AgentEventType, AgentRelayError, type AgentRunInput, type AgentRunResult, type AgentSessionRef, AlwaysApproveDecider, ApiDecider, type ApprovalMode, BUILTIN_ADAPTER_DEFINITIONS, CONFIG_FILENAME, ClaudeInteractiveAdapter, CodexInteractiveAdapter, CommandDecider, type CommandPreview, type CompletionContext, type CompletionDetector, CompositeCompletionDetector, ConfigError, type CreateSessionInput, DEFAULT_DENY_PATTERNS, type Decider, type DeciderConfig, type DeciderConfigSchema, type DeciderFlags, type DecisionAction, DefaultCompletionDetector, DefaultKeymap, type DetectedPrompt, type DoctorReport, type FakeAdapterOptions, FakeAgentAdapter, FunctionDecider, type HooksConfig, type InitResult, type InteractionDecision, type InteractionKind, type InteractionRequest, type InteractiveAdapterConfig, InteractivePtyAdapter, OutputPatternDetector, PromptDetector, type PromptDetectorOptions, type PruneOptions, type PruneResult, type PtyKeymap, type PtySessionOptions, type RelayConfig, type RelayDefaults, type ResumeCommandResult, RuleDecider, type RunHooks, RunLogger, type RunLoggerOptions, type RunOutcome, type RunnerOptions, type SandboxLevel, type SessionListItem, SessionManager, type SessionMetadata, SessionNotFoundError, type SessionStatus, type ShellHookContext, type ShellHooks, UnknownAdapterError, adapterConfigSchema, approvalPolicySchema, cleanTerminalText, configPath, configSchema, createAdapterFactory, createDecider, createDefaultConfig, deciderConfigFromFlags, deciderSchema, defaultRegistry, defaultsSchema, hooksSchema, listAdapters, listSessions, loadConfig, loadConfigOrDefault, parseCheckbox, parseConfig, parseDecisionReply, pruneSessions, renderDecisionPrompt, resolveApprovalMode, resolvePrompt, resolveSandbox, resumeCommand, runAgent, runCommand, runDoctor, runInit, runPtySession, runShellHook, sandboxSchema, saveConfig, stringifyConfig, stripAnsi, tailLines };
+export { type AbortReason, type AdapterAvailability, type AdapterConfig, type AdapterFactory, type AdapterListItem, type AdapterMode, AdapterRegistry, type AdapterRunContext, type AgentAdapter, type AgentAdapterDefinition, type AgentErrorInfo, type AgentEvent, type AgentEventType, AgentRelayError, type AgentRunInput, type AgentRunResult, type AgentSessionRef, type AgentUsage, AlwaysApproveDecider, ApiDecider, type ApprovalMode, BUILTIN_ADAPTER_DEFINITIONS, CONFIG_FILENAME, ClaudeInteractiveAdapter, CodexInteractiveAdapter, CommandDecider, type CommandPreview, type CompletionContext, type CompletionDetector, CompositeCompletionDetector, ConfigError, type CreateSessionInput, DEFAULT_DENY_PATTERNS, DEFAULT_PRICING, type Decider, type DeciderConfig, type DeciderConfigSchema, type DeciderFlags, type DecisionAction, DefaultCompletionDetector, DefaultKeymap, type DetectedPrompt, type DoctorReport, type FakeAdapterOptions, FakeAgentAdapter, FunctionDecider, type HooksConfig, type InitResult, type InteractionDecision, type InteractionKind, type InteractionRequest, type InteractiveAdapterConfig, InteractivePtyAdapter, type ModelPricing, OutputPatternDetector, type PricingRule, PromptDetector, type PromptDetectorOptions, type PruneOptions, type PruneResult, type PtyKeymap, type PtySessionOptions, type RelayConfig, type RelayDefaults, type ResumeCommandResult, RuleDecider, type RunHooks, RunLogger, type RunLoggerOptions, type RunOutcome, type RunnerOptions, type SandboxLevel, type SessionListItem, SessionManager, type SessionMetadata, SessionNotFoundError, type SessionStatus, type ShellHookContext, type ShellHooks, UnknownAdapterError, adapterConfigSchema, approvalPolicySchema, cleanTerminalText, computeCostUsd, configPath, configSchema, createAdapterFactory, createDecider, createDefaultConfig, deciderConfigFromFlags, deciderSchema, defaultRegistry, defaultsSchema, hooksSchema, listAdapters, listSessions, loadConfig, loadConfigOrDefault, parseCheckbox, parseConfig, parseDecisionReply, pricingForModel, pruneSessions, renderDecisionPrompt, resolveApprovalMode, resolvePrompt, resolveSandbox, resumeCommand, runAgent, runCommand, runDoctor, runInit, runPtySession, runShellHook, sandboxSchema, saveConfig, stringifyConfig, stripAnsi, tailLines };