@yul-labs/agent-relay 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -128,7 +128,9 @@ interface ApiDeciderOptions {
128
128
  * Default 2048. Reasoning models emit a long chain-of-thought before the JSON
129
129
  * answer, so a too-small cap (e.g. a few hundred) truncates them mid-thought,
130
130
  * leaving an empty `content` and an unparseable reply → safe-deny. Raise it
131
- * (not lower) for verbose reasoning models.
131
+ * (not lower) for verbose reasoning models. Values below
132
+ * {@link MIN_API_MAX_TOKENS} are clamped UP to it — a cap that small can't fit
133
+ * even the JSON answer once a reasoning model has spent budget on its CoT.
132
134
  */
133
135
  maxTokens?: number;
134
136
  temperature?: number;
@@ -259,6 +261,58 @@ interface AgentRunInput {
259
261
  /** Arbitrary extra options interpreted by specific adapters (e.g. fake). */
260
262
  options?: Record<string, unknown>;
261
263
  }
264
+ /**
265
+ * Resource usage for a run, exposed under `result.meta.usage`. Every field is
266
+ * optional. Token counts come from the agent's own session transcript/rollout
267
+ * JSONL (the AUTHORITATIVE, device-independent source — written on every run
268
+ * regardless of TUI/status-line settings); `contextPercent` /
269
+ * `subscriptionSessionCostUsd` are best-effort extras scraped from the TUI status
270
+ * line when it is enabled. `costUsd` is COMPUTED from the token counts × the
271
+ * model's list price (see core/pricing.ts). `source` records which path produced
272
+ * the token figures.
273
+ */
274
+ interface AgentUsage {
275
+ /**
276
+ * Provenance of the token counts: "transcript" = parsed from the agent's
277
+ * session log (authoritative); "status-line" = scraped from the TUI (only
278
+ * present when the user has a status line that shows usage). Token fields are
279
+ * trustworthy only when `source` is "transcript".
280
+ */
281
+ source?: "transcript" | "status-line";
282
+ /** Model id when known, e.g. "claude-opus-4-8". */
283
+ model?: string;
284
+ /** Cumulative non-cached input/prompt tokens. */
285
+ inputTokens?: number;
286
+ /** Cumulative output/completion tokens. */
287
+ outputTokens?: number;
288
+ /** Cumulative tokens served from the prompt cache (cheap reads). */
289
+ cachedInputTokens?: number;
290
+ /** Cumulative tokens spent CREATING prompt-cache entries (Claude). */
291
+ cacheCreationTokens?: number;
292
+ /** Cumulative reasoning tokens, when the model reports them (Codex / o-series). */
293
+ reasoningTokens?: number;
294
+ /** Total tokens (the agent's own total when given, else the sum of the above). */
295
+ totalTokens?: number;
296
+ /**
297
+ * API-equivalent REFERENCE cost in USD, COMPUTED as Σ(tokens × the model's list
298
+ * price) — see core/pricing.ts. This is what the run WOULD bill via the API
299
+ * (or `claude -p` / the Agent SDK). agent-relay drives the agent's INTERACTIVE
300
+ * TUI, which on a Claude subscription is covered by the plan — so for those runs
301
+ * the real marginal cost is ~0 and this is a SHADOW cost, not the amount charged.
302
+ * `null` when the model's price is unknown (so it isn't confused with a real 0).
303
+ */
304
+ costUsd?: number | null;
305
+ /** Context-window usage as a percent, when surfaced by the status line. */
306
+ contextPercent?: number;
307
+ /**
308
+ * The NOMINAL session cost the agent prints in its status line ("Session $X").
309
+ * On Team/Max subscription seats this reads 0 / is not the API price — use
310
+ * {@link costUsd} for an actual cost estimate.
311
+ */
312
+ subscriptionSessionCostUsd?: number;
313
+ /** Raw status-line snippet the scraped extras came from (status-line only). */
314
+ raw?: string;
315
+ }
262
316
  /** The terminal result an adapter returns from {@link AgentAdapter.run}. */
263
317
  interface AgentRunResult {
264
318
  /** Whether the adapter believes the run finished successfully. */
@@ -459,7 +513,9 @@ declare const defaultsSchema: z.ZodObject<{
459
513
  type RelayDefaults = z.infer<typeof defaultsSchema>;
460
514
  /** Resolve the effective approval mode (adapter override > defaults > legacy). */
461
515
  declare function resolveApprovalMode(defaults: RelayDefaults, adapter?: AdapterConfig): ApprovalMode;
462
- /** Resolve the effective sandbox level (adapter override > defaults > default). */
516
+ /** Resolve the effective sandbox level (adapter override > defaults > default).
517
+ * The project default is full bypass (`danger-full-access`) so unattended runs
518
+ * "just work"; tighten it per `defaults.sandbox` / `adapters.<name>.sandbox`. */
463
519
  declare function resolveSandbox(defaults: RelayDefaults, adapter?: AdapterConfig): SandboxLevel;
464
520
  declare const hooksSchema: z.ZodObject<{
465
521
  /** Shell command run just before the agent starts. */
@@ -635,6 +691,27 @@ declare const configSchema: z.ZodEffects<z.ZodObject<{
635
691
  apiKey?: string | undefined;
636
692
  maxTokens?: number | undefined;
637
693
  }>>;
694
+ /** Override the built-in per-model token pricing (USD per 1M tokens). */
695
+ pricing: z.ZodOptional<z.ZodArray<z.ZodObject<{
696
+ /** Regex (case-insensitive) matched against the model id; first match wins. */
697
+ match: z.ZodString;
698
+ input: z.ZodNumber;
699
+ output: z.ZodNumber;
700
+ cacheWrite: z.ZodOptional<z.ZodNumber>;
701
+ cacheRead: z.ZodOptional<z.ZodNumber>;
702
+ }, "strict", z.ZodTypeAny, {
703
+ match: string;
704
+ input: number;
705
+ output: number;
706
+ cacheWrite?: number | undefined;
707
+ cacheRead?: number | undefined;
708
+ }, {
709
+ match: string;
710
+ input: number;
711
+ output: number;
712
+ cacheWrite?: number | undefined;
713
+ cacheRead?: number | undefined;
714
+ }>, "many">>;
638
715
  /** Optional shell-command lifecycle hooks. */
639
716
  hooks: z.ZodOptional<z.ZodObject<{
640
717
  /** Shell command run just before the agent starts. */
@@ -684,6 +761,13 @@ declare const configSchema: z.ZodEffects<z.ZodObject<{
684
761
  apiKey?: string | undefined;
685
762
  maxTokens?: number | undefined;
686
763
  } | undefined;
764
+ pricing?: {
765
+ match: string;
766
+ input: number;
767
+ output: number;
768
+ cacheWrite?: number | undefined;
769
+ cacheRead?: number | undefined;
770
+ }[] | undefined;
687
771
  hooks?: {
688
772
  onStart?: string | undefined;
689
773
  onComplete?: string | undefined;
@@ -724,6 +808,13 @@ declare const configSchema: z.ZodEffects<z.ZodObject<{
724
808
  apiKey?: string | undefined;
725
809
  maxTokens?: number | undefined;
726
810
  } | undefined;
811
+ pricing?: {
812
+ match: string;
813
+ input: number;
814
+ output: number;
815
+ cacheWrite?: number | undefined;
816
+ cacheRead?: number | undefined;
817
+ }[] | undefined;
727
818
  hooks?: {
728
819
  onStart?: string | undefined;
729
820
  onComplete?: string | undefined;
@@ -764,6 +855,13 @@ declare const configSchema: z.ZodEffects<z.ZodObject<{
764
855
  apiKey?: string | undefined;
765
856
  maxTokens?: number | undefined;
766
857
  } | undefined;
858
+ pricing?: {
859
+ match: string;
860
+ input: number;
861
+ output: number;
862
+ cacheWrite?: number | undefined;
863
+ cacheRead?: number | undefined;
864
+ }[] | undefined;
767
865
  hooks?: {
768
866
  onStart?: string | undefined;
769
867
  onComplete?: string | undefined;
@@ -804,6 +902,13 @@ declare const configSchema: z.ZodEffects<z.ZodObject<{
804
902
  apiKey?: string | undefined;
805
903
  maxTokens?: number | undefined;
806
904
  } | undefined;
905
+ pricing?: {
906
+ match: string;
907
+ input: number;
908
+ output: number;
909
+ cacheWrite?: number | undefined;
910
+ cacheRead?: number | undefined;
911
+ }[] | undefined;
807
912
  hooks?: {
808
913
  onStart?: string | undefined;
809
914
  onComplete?: string | undefined;
@@ -854,6 +959,64 @@ declare class SessionNotFoundError extends AgentRelayError {
854
959
  constructor(sessionId: string);
855
960
  }
856
961
 
962
+ /**
963
+ * Per-model token pricing → an API-equivalent USD cost for a run's token usage.
964
+ *
965
+ * agent-relay reads exact token counts from the agent's transcript (see
966
+ * `claude-session.ts` / `codex-session.ts`), but the transcript carries no dollar
967
+ * figure, and the TUI "Session $" line is a NOMINAL/subscription number (often $0
968
+ * on Team/Max seats) — NOT the API-equivalent cost. So we compute
969
+ * `cost = Σ(tokens × list rate)` from a built-in price table (overridable via
970
+ * `config.pricing`). Rates are USD per 1M tokens and are LIST prices, so the
971
+ * result is an ESTIMATE — override the table when your actual billing differs.
972
+ *
973
+ * IMPORTANT: this is a REFERENCE (shadow) cost — what the run would bill via the
974
+ * API. agent-relay drives the agent's INTERACTIVE TUI, which on a Claude
975
+ * subscription is covered by the plan (the real marginal cost is ~0). Only the
976
+ * non-interactive paths (`claude -p` / the Agent SDK — and, from 2026-06-15, no
977
+ * longer counted against subscription limits) bill at these rates. So treat
978
+ * `costUsd` as "what this would cost via API", not necessarily what you're charged.
979
+ */
980
+ interface ModelPricing {
981
+ /** USD per 1M input (prompt) tokens. */
982
+ input: number;
983
+ /** USD per 1M output (completion) tokens. */
984
+ output: number;
985
+ /** USD per 1M cache-CREATION tokens (defaults to `input` when unset). */
986
+ cacheWrite?: number;
987
+ /** USD per 1M cache-READ tokens (defaults to 0 when unset). */
988
+ cacheRead?: number;
989
+ }
990
+ interface PricingRule {
991
+ /** Matched (case-insensitive) against the model id — first match wins. */
992
+ match: RegExp;
993
+ pricing: ModelPricing;
994
+ }
995
+ /**
996
+ * Built-in Anthropic list prices (USD / 1M tokens). These are estimates that
997
+ * drift as pricing changes — pass `config.pricing` to override per model. Codex
998
+ * (OpenAI) models aren't listed here, so a Codex run reports `costUsd: null`
999
+ * unless you add a rule.
1000
+ */
1001
+ declare const DEFAULT_PRICING: PricingRule[];
1002
+ /** Resolve the price for a model id (overrides first, then the built-in table). */
1003
+ declare function pricingForModel(model: string | undefined, overrides?: PricingRule[]): ModelPricing | undefined;
1004
+ /** Token usage shape needed to price a run. */
1005
+ interface CostUsageInput {
1006
+ model?: string;
1007
+ inputTokens?: number;
1008
+ outputTokens?: number;
1009
+ cacheCreationTokens?: number;
1010
+ cachedInputTokens?: number;
1011
+ }
1012
+ /**
1013
+ * API-equivalent USD cost for a run's token usage, or `null` when the model's
1014
+ * price is unknown — so callers WARN instead of reporting a misleading `0`.
1015
+ * Assumes Claude-style additive cache accounting (input excludes cached reads;
1016
+ * cache reads are billed separately at the cheap `cacheRead` rate).
1017
+ */
1018
+ declare function computeCostUsd(usage: CostUsageInput, overrides?: PricingRule[]): number | null;
1019
+
857
1020
  /** Session metadata persistence: create, save, load, list. */
858
1021
 
859
1022
  interface CreateSessionInput {
@@ -1289,6 +1452,13 @@ interface PtySessionOptions {
1289
1452
  * think/build is never mistaken for "done".
1290
1453
  */
1291
1454
  workingPattern?: RegExp;
1455
+ /**
1456
+ * Optional per-adapter scrape of the (ANSI-cleaned) screen into structured
1457
+ * usage (tokens / context / cost). Called on each settle; the latest non-empty
1458
+ * result is merged and attached to `result.meta.usage`. Vendor-specific, so the
1459
+ * adapter supplies it — the session loop itself stays vendor-agnostic.
1460
+ */
1461
+ scrapeUsage?: (cleanedText: string) => AgentUsage | undefined;
1292
1462
  /** Keys to send to exit the TUI on completion (e.g. double Ctrl-C). */
1293
1463
  quitKeys?: string;
1294
1464
  /** Optional text to type once the UI is ready (for TUIs needing typed input). */
@@ -1355,6 +1525,13 @@ interface InteractiveAdapterConfig {
1355
1525
  setup?: (input: AgentRunInput) => SetupStep[] | undefined;
1356
1526
  detector?: PromptDetectorOptions;
1357
1527
  keymap?: PtyKeymap;
1528
+ /**
1529
+ * Optional per-adapter scrape of the agent's TUI status line into structured
1530
+ * usage (tokens / context / cost), surfaced as `result.meta.usage`. Heuristic
1531
+ * and vendor-specific, so it lives in the adapter, not the vendor-agnostic
1532
+ * session loop. Omit it for adapters with no parseable status line.
1533
+ */
1534
+ scrapeUsage?: (cleanedText: string) => AgentUsage | undefined;
1358
1535
  completionPattern?: RegExp;
1359
1536
  completionIdleMs?: number;
1360
1537
  /** "Agent is working" indicator that suppresses completion (see PtySession). */
@@ -1387,28 +1564,46 @@ declare class InteractivePtyAdapter implements AgentAdapter {
1387
1564
  * AUTONOMY: by default Claude runs with `--dangerously-skip-permissions` so it
1388
1565
  * acts without per-action prompts. The {@link Decider} still handles the prompts
1389
1566
  * that appear anyway (the directory-trust menu, etc.). `approvalPolicy: "gated"`
1390
- * uses `--permission-mode acceptEdits` so Claude asks more and the decider sees
1391
- * those; `"readonly"` uses `--permission-mode plan`. The prompt is a positional
1392
- * arg so the session starts immediately.
1567
+ * uses `--permission-mode default` Claude's normal interactive mode where it
1568
+ * ASKS before each edit/command, so those approvals are routed to the decider.
1569
+ * (NOT `acceptEdits`, which silently auto-approves edits and so never consults
1570
+ * the decider on them.) `"readonly"` uses `--permission-mode plan`. The prompt is
1571
+ * a positional arg so the session starts immediately.
1393
1572
  */
1394
1573
 
1395
1574
  interface ClaudeInteractiveOptions {
1396
1575
  command?: string;
1397
1576
  env?: Record<string, string>;
1398
1577
  now?: () => Date;
1578
+ /** Override Claude's projects root (~/.claude/projects) — for tests. */
1579
+ projectsDir?: string;
1399
1580
  }
1400
1581
  declare class ClaudeInteractiveAdapter extends InteractivePtyAdapter {
1582
+ private readonly clock;
1583
+ /** Override the projects root (~/.claude/projects) for tests. */
1584
+ private readonly projectsDir?;
1401
1585
  constructor(opts?: ClaudeInteractiveOptions);
1586
+ /**
1587
+ * Run Claude, then read AUTHORITATIVE token usage from its session transcript
1588
+ * (~/.claude/projects/<cwd>/<id>.jsonl) and surface it as `meta.usage`. This is
1589
+ * device-independent — it works regardless of whether the user has a usage
1590
+ * status line — and overwrites the best-effort status-line scrape's token
1591
+ * figures while keeping its context%/cost extras. Best-effort: if no transcript
1592
+ * is found, the status-line usage (if any) is left as-is.
1593
+ */
1594
+ run(input: AgentRunInput, ctx: AdapterRunContext): Promise<AgentRunResult>;
1402
1595
  static fromConfig(config: AdapterConfig): ClaudeInteractiveAdapter;
1403
1596
  }
1404
1597
 
1405
1598
  /**
1406
1599
  * Codex driven interactively in a PTY. The project's concept is PURE AUTONOMY:
1407
- * by default Codex runs with `-a never` (never ask) within the chosen sandbox,
1408
- * so it just works. The {@link Decider} still handles the prompts that appear
1409
- * anyway (the directory-trust dialog, etc.). `approvalPolicy: "gated"` switches
1410
- * Codex to `-a on-request` so the decider sees each action; `"readonly"` runs it
1411
- * read-only. The prompt is a positional arg so the TUI starts immediately.
1600
+ * by default Codex runs with `-a never` (never ask) and `-s danger-full-access`
1601
+ * (full bypass the project default), so it just works unattended. The
1602
+ * {@link Decider} still handles the prompts that appear anyway (the directory-
1603
+ * trust dialog, etc.). `approvalPolicy: "gated"` switches Codex to `-a on-request`
1604
+ * so the decider sees each action; `"readonly"` runs it read-only. Tighten the
1605
+ * sandbox with `defaults.sandbox` / `adapters.codex.sandbox` (e.g. workspace-write).
1606
+ * The prompt is a positional arg so the TUI starts immediately.
1412
1607
  */
1413
1608
 
1414
1609
  interface CodexInteractiveOptions {
@@ -1424,11 +1619,12 @@ declare class CodexInteractiveAdapter extends InteractivePtyAdapter {
1424
1619
  private readonly sessionsDir?;
1425
1620
  constructor(opts?: CodexInteractiveOptions);
1426
1621
  /**
1427
- * Run Codex, then capture its NATIVE session id (the rollout UUID) for this
1428
- * cwd and attach it to the result's `sessionRef` so the runner persists it and
1429
- * a later resume can use `codex resume <id> "<prompt>"`. Capture is best-effort:
1430
- * if no rollout matches (or any I/O fails) the result is returned unchanged, so
1431
- * the run still resumes via the `--last` fallback.
1622
+ * Run Codex, then read its rollout for this cwd to capture (a) the NATIVE
1623
+ * session id (the rollout UUID) for `sessionRef` so a later resume can use
1624
+ * `codex resume <id> "<prompt>"`, and (b) authoritative token usage for
1625
+ * `meta.usage` (device-independent from Codex's own log, not the TUI). Both
1626
+ * are best-effort: if no rollout matches (or any I/O fails) the result is
1627
+ * returned unchanged, so the run still resumes via the `--last` fallback.
1432
1628
  */
1433
1629
  run(input: AgentRunInput, ctx: AdapterRunContext): Promise<AgentRunResult>;
1434
1630
  static fromConfig(config: AdapterConfig): CodexInteractiveAdapter;
@@ -1695,4 +1891,4 @@ declare function cleanTerminalText(input: string): string;
1695
1891
  /** Return the last `n` non-empty lines of cleaned text. */
1696
1892
  declare function tailLines(text: string, n: number): string[];
1697
1893
 
1698
- export { type AbortReason, type AdapterAvailability, type AdapterConfig, type AdapterFactory, type AdapterListItem, type AdapterMode, AdapterRegistry, type AdapterRunContext, type AgentAdapter, type AgentAdapterDefinition, type AgentErrorInfo, type AgentEvent, type AgentEventType, AgentRelayError, type AgentRunInput, type AgentRunResult, type AgentSessionRef, AlwaysApproveDecider, ApiDecider, type ApprovalMode, BUILTIN_ADAPTER_DEFINITIONS, CONFIG_FILENAME, ClaudeInteractiveAdapter, CodexInteractiveAdapter, CommandDecider, type CommandPreview, type CompletionContext, type CompletionDetector, CompositeCompletionDetector, ConfigError, type CreateSessionInput, DEFAULT_DENY_PATTERNS, type Decider, type DeciderConfig, type DeciderConfigSchema, type DeciderFlags, type DecisionAction, DefaultCompletionDetector, DefaultKeymap, type DetectedPrompt, type DoctorReport, type FakeAdapterOptions, FakeAgentAdapter, FunctionDecider, type HooksConfig, type InitResult, type InteractionDecision, type InteractionKind, type InteractionRequest, type InteractiveAdapterConfig, InteractivePtyAdapter, OutputPatternDetector, PromptDetector, type PromptDetectorOptions, type PruneOptions, type PruneResult, type PtyKeymap, type PtySessionOptions, type RelayConfig, type RelayDefaults, type ResumeCommandResult, RuleDecider, type RunHooks, RunLogger, type RunLoggerOptions, type RunOutcome, type RunnerOptions, type SandboxLevel, type SessionListItem, SessionManager, type SessionMetadata, SessionNotFoundError, type SessionStatus, type ShellHookContext, type ShellHooks, UnknownAdapterError, adapterConfigSchema, approvalPolicySchema, cleanTerminalText, configPath, configSchema, createAdapterFactory, createDecider, createDefaultConfig, deciderConfigFromFlags, deciderSchema, defaultRegistry, defaultsSchema, hooksSchema, listAdapters, listSessions, loadConfig, loadConfigOrDefault, parseCheckbox, parseConfig, parseDecisionReply, pruneSessions, renderDecisionPrompt, resolveApprovalMode, resolvePrompt, resolveSandbox, resumeCommand, runAgent, runCommand, runDoctor, runInit, runPtySession, runShellHook, sandboxSchema, saveConfig, stringifyConfig, stripAnsi, tailLines };
1894
+ export { type AbortReason, type AdapterAvailability, type AdapterConfig, type AdapterFactory, type AdapterListItem, type AdapterMode, AdapterRegistry, type AdapterRunContext, type AgentAdapter, type AgentAdapterDefinition, type AgentErrorInfo, type AgentEvent, type AgentEventType, AgentRelayError, type AgentRunInput, type AgentRunResult, type AgentSessionRef, type AgentUsage, AlwaysApproveDecider, ApiDecider, type ApprovalMode, BUILTIN_ADAPTER_DEFINITIONS, CONFIG_FILENAME, ClaudeInteractiveAdapter, CodexInteractiveAdapter, CommandDecider, type CommandPreview, type CompletionContext, type CompletionDetector, CompositeCompletionDetector, ConfigError, type CreateSessionInput, DEFAULT_DENY_PATTERNS, DEFAULT_PRICING, type Decider, type DeciderConfig, type DeciderConfigSchema, type DeciderFlags, type DecisionAction, DefaultCompletionDetector, DefaultKeymap, type DetectedPrompt, type DoctorReport, type FakeAdapterOptions, FakeAgentAdapter, FunctionDecider, type HooksConfig, type InitResult, type InteractionDecision, type InteractionKind, type InteractionRequest, type InteractiveAdapterConfig, InteractivePtyAdapter, type ModelPricing, OutputPatternDetector, type PricingRule, PromptDetector, type PromptDetectorOptions, type PruneOptions, type PruneResult, type PtyKeymap, type PtySessionOptions, type RelayConfig, type RelayDefaults, type ResumeCommandResult, RuleDecider, type RunHooks, RunLogger, type RunLoggerOptions, type RunOutcome, type RunnerOptions, type SandboxLevel, type SessionListItem, SessionManager, type SessionMetadata, SessionNotFoundError, type SessionStatus, type ShellHookContext, type ShellHooks, UnknownAdapterError, adapterConfigSchema, approvalPolicySchema, cleanTerminalText, computeCostUsd, configPath, configSchema, createAdapterFactory, createDecider, createDefaultConfig, deciderConfigFromFlags, deciderSchema, defaultRegistry, defaultsSchema, hooksSchema, listAdapters, listSessions, loadConfig, loadConfigOrDefault, parseCheckbox, parseConfig, parseDecisionReply, pricingForModel, pruneSessions, renderDecisionPrompt, resolveApprovalMode, resolvePrompt, resolveSandbox, resumeCommand, runAgent, runCommand, runDoctor, runInit, runPtySession, runShellHook, sandboxSchema, saveConfig, stringifyConfig, stripAnsi, tailLines };