@punktechnologies/sdk 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -14,6 +14,7 @@ import type {
14
14
  Pattern,
15
15
  PolicyVerdict,
16
16
  PromotionGateStatus,
17
+ RouteExplanation,
17
18
  Run,
18
19
  SavingsSummary,
19
20
  SideEffectLevel,
@@ -52,8 +53,11 @@ export const PUNK_CHORUS_MODEL = "punk/chorus" as const;
52
53
  /** @deprecated Use PUNK_CHORUS_MODEL. */
53
54
  export const PUNK_MODEL = PUNK_CHORUS_MODEL;
54
55
  const DEFAULT_BASE_URL = "http://localhost:4100";
56
+ const DEFAULT_APP = "default-app";
55
57
  const MAX_TOOL_CACHE_TTL_SECONDS = 24 * 60 * 60;
56
58
  const MAX_TOOL_CACHE_DIMENSION_CHARS = 120;
59
+ const DEFAULT_WAIT_POLL_INTERVAL_MS = 250;
60
+ const DEFAULT_WAIT_TIMEOUT_MS = 30_000;
57
61
 
58
62
  export type PunkLatencyMode =
59
63
  | "fast"
@@ -685,6 +689,21 @@ function cleanPathId(label: string, value: string): string {
685
689
  return trimmed;
686
690
  }
687
691
 
692
+ function readEnv(name: string): string | undefined {
693
+ const env = (globalThis as any)?.process?.env;
694
+ if (!env || typeof env !== "object") return undefined;
695
+ const value = env[name];
696
+ return typeof value === "string" ? value : undefined;
697
+ }
698
+
699
+ function optionOrEnv<K extends keyof PunkOptions>(
700
+ opts: PunkOptions,
701
+ key: K,
702
+ envName: string,
703
+ ): PunkOptions[K] | string | undefined {
704
+ return Object.prototype.hasOwnProperty.call(opts, key) ? opts[key] : readEnv(envName);
705
+ }
706
+
688
707
  function normalizeBaseUrl(value: string | undefined): string {
689
708
  const trimmed = typeof value === "string" ? value.trim() : "";
690
709
  return (trimmed || DEFAULT_BASE_URL).replace(/\/+$/, "");
@@ -733,6 +752,170 @@ function chatMessageText(message: any): string {
733
752
  return chatMessageContentToString(message?.content);
734
753
  }
735
754
 
755
+ function finiteNumber(value: unknown): number | undefined {
756
+ return typeof value === "number" && Number.isFinite(value) ? value : undefined;
757
+ }
758
+
759
+ function normalizeOpenAIUsage(raw: any): PunkUsage | undefined {
760
+ if (!isRecord(raw?.usage)) return undefined;
761
+ const usage = raw.usage as Record<string, unknown>;
762
+ const normalized: PunkUsage = { raw: raw.usage };
763
+ const inputTokens = finiteNumber(usage.prompt_tokens ?? usage.input_tokens);
764
+ const outputTokens = finiteNumber(usage.completion_tokens ?? usage.output_tokens);
765
+ const totalTokens = finiteNumber(usage.total_tokens);
766
+ if (inputTokens !== undefined) normalized.inputTokens = inputTokens;
767
+ if (outputTokens !== undefined) normalized.outputTokens = outputTokens;
768
+ if (totalTokens !== undefined) normalized.totalTokens = totalTokens;
769
+ if (normalized.totalTokens === undefined && inputTokens !== undefined && outputTokens !== undefined) {
770
+ normalized.totalTokens = inputTokens + outputTokens;
771
+ }
772
+ return normalized;
773
+ }
774
+
775
+ function normalizeAnthropicUsage(raw: any): PunkUsage | undefined {
776
+ if (!isRecord(raw?.usage)) return undefined;
777
+ const usage = raw.usage as Record<string, unknown>;
778
+ const normalized: PunkUsage = { raw: raw.usage };
779
+ const inputTokens = finiteNumber(usage.input_tokens);
780
+ const outputTokens = finiteNumber(usage.output_tokens);
781
+ if (inputTokens !== undefined) normalized.inputTokens = inputTokens;
782
+ if (outputTokens !== undefined) normalized.outputTokens = outputTokens;
783
+ if (inputTokens !== undefined && outputTokens !== undefined) normalized.totalTokens = inputTokens + outputTokens;
784
+ return normalized;
785
+ }
786
+
787
+ function normalizeModel(raw: any): string | undefined {
788
+ return typeof raw?.model === "string" && raw.model.length > 0 ? raw.model : undefined;
789
+ }
790
+
791
+ function normalizeProvider(raw: any): string | undefined {
792
+ for (const key of ["provider", "providerName", "provider_name"] as const) {
793
+ if (typeof raw?.[key] === "string" && raw[key].length > 0) return raw[key];
794
+ }
795
+ return undefined;
796
+ }
797
+
798
+ function anthropicTextFromContent(value: unknown, method: string, path: string): string {
799
+ if (typeof value === "string") return value;
800
+ if (!Array.isArray(value)) {
801
+ throw malformedApiResponse(method, path, "content must be a string or content block array");
802
+ }
803
+ return value
804
+ .map((part: any) => {
805
+ if (typeof part === "string") return part;
806
+ if (part?.type === "text" && typeof part.text === "string") return part.text;
807
+ return "";
808
+ })
809
+ .join("");
810
+ }
811
+
812
+ function anthropicMessageText(raw: unknown, method: string, path: string): string {
813
+ if (!isRecord(raw)) throw malformedApiResponse(method, path, "missing response object");
814
+ if (!Object.prototype.hasOwnProperty.call(raw, "content")) {
815
+ throw malformedApiResponse(method, path, "missing content");
816
+ }
817
+ return anthropicTextFromContent(raw.content, method, path);
818
+ }
819
+
820
+ function anthropicContentBlocks(raw: unknown, method: string, path: string): AnthropicContentBlock[] {
821
+ if (!isRecord(raw)) throw malformedApiResponse(method, path, "missing response object");
822
+ const content = raw.content;
823
+ if (typeof content === "string") return [{ type: "text", text: content }];
824
+ if (!Array.isArray(content)) throw malformedApiResponse(method, path, "content must be a string or content block array");
825
+ return content.filter(isRecord) as AnthropicContentBlock[];
826
+ }
827
+
828
+ interface SseFrame {
829
+ event?: string;
830
+ data: string;
831
+ }
832
+
833
+ function parseSseFrame(frame: string): SseFrame | null {
834
+ const data: string[] = [];
835
+ let event: string | undefined;
836
+ for (const line of frame.split(/\r?\n/)) {
837
+ if (line.length === 0 || line.startsWith(":")) continue;
838
+ if (line.startsWith("event:")) {
839
+ event = line.slice("event:".length).trim();
840
+ continue;
841
+ }
842
+ if (line.startsWith("data:")) {
843
+ const value = line.slice("data:".length);
844
+ data.push(value.startsWith(" ") ? value.slice(1) : value);
845
+ }
846
+ }
847
+ if (data.length === 0) return null;
848
+ return { event, data: data.join("\n") };
849
+ }
850
+
851
+ async function* sseFrames(
852
+ res: Response,
853
+ method: string,
854
+ path: string,
855
+ ): AsyncGenerator<SseFrame> {
856
+ if (!res.body) {
857
+ throw new Error(`Punk API ${method} ${path} returned an empty stream`);
858
+ }
859
+ const reader = res.body.getReader();
860
+ const decoder = new TextDecoder();
861
+ let buffer = "";
862
+ try {
863
+ while (true) {
864
+ const { done, value } = await reader.read();
865
+ buffer += decoder.decode(value, { stream: !done });
866
+ let boundary = buffer.search(/\r?\n\r?\n/);
867
+ while (boundary >= 0) {
868
+ const frameText = buffer.slice(0, boundary);
869
+ const match = buffer.slice(boundary).match(/^\r?\n\r?\n/);
870
+ buffer = buffer.slice(boundary + (match?.[0].length ?? 2));
871
+ const frame = parseSseFrame(frameText);
872
+ if (frame) yield frame;
873
+ boundary = buffer.search(/\r?\n\r?\n/);
874
+ }
875
+ if (done) break;
876
+ }
877
+ const tail = parseSseFrame(buffer.trim());
878
+ if (tail) yield tail;
879
+ } finally {
880
+ reader.releaseLock();
881
+ }
882
+ }
883
+
884
+ function parseSseJson(frame: SseFrame, method: string, path: string): any {
885
+ try {
886
+ return JSON.parse(frame.data);
887
+ } catch {
888
+ throw malformedApiResponse(method, path, "stream data was not JSON");
889
+ }
890
+ }
891
+
892
+ function streamDeltaText(value: unknown): string {
893
+ if (value === undefined || value === null) return "";
894
+ if (typeof value === "string") return value;
895
+ if (Array.isArray(value)) {
896
+ return value.map((part: any) => (typeof part?.text === "string" ? part.text : "")).join("");
897
+ }
898
+ return "";
899
+ }
900
+
901
+ function isTerminalRunStatus(status: Run["status"]): boolean {
902
+ return status === "completed" || status === "failed" || status === "blocked";
903
+ }
904
+
905
+ function checkedDelay(ms: number, signal?: AbortSignal): Promise<void> {
906
+ if (signal?.aborted) return Promise.reject(new Error("Punk wait aborted"));
907
+ return new Promise((resolve, reject) => {
908
+ const timeout = setTimeout(resolve, ms);
909
+ const onAbort = () => {
910
+ clearTimeout(timeout);
911
+ reject(new Error("Punk wait aborted"));
912
+ };
913
+ if (signal) {
914
+ signal.addEventListener("abort", onAbort, { once: true });
915
+ }
916
+ });
917
+ }
918
+
736
919
  const TOOL_LEVEL_4 = new Set([
737
920
  "delete", "remove", "drop", "pay", "charge", "refund", "purchase", "subscribe",
738
921
  "unsubscribe", "transfer", "withdraw", "deposit", "revoke", "grant", "invite",
@@ -860,6 +1043,143 @@ export interface ChatResult {
860
1043
  runId: string;
861
1044
  route: string;
862
1045
  raw: any;
1046
+ usage?: PunkUsage;
1047
+ model?: string;
1048
+ provider?: string;
1049
+ }
1050
+
1051
+ export interface PunkUsage {
1052
+ inputTokens?: number;
1053
+ outputTokens?: number;
1054
+ totalTokens?: number;
1055
+ raw: unknown;
1056
+ }
1057
+
1058
+ export interface AnthropicContentBlock {
1059
+ type: string;
1060
+ text?: string;
1061
+ [key: string]: unknown;
1062
+ }
1063
+
1064
+ export interface AnthropicMessageParams extends PunkChorusOptions {
1065
+ model: string;
1066
+ max_tokens: number;
1067
+ messages: Array<{
1068
+ role: "user" | "assistant" | (string & {});
1069
+ content: string | AnthropicContentBlock[];
1070
+ }>;
1071
+ system?: string | AnthropicContentBlock[];
1072
+ temperature?: number;
1073
+ top_p?: number;
1074
+ top_k?: number;
1075
+ stop_sequences?: string[];
1076
+ tools?: unknown[];
1077
+ tool_choice?: unknown;
1078
+ metadata?: Record<string, unknown>;
1079
+ }
1080
+
1081
+ export interface AnthropicMessageResult {
1082
+ content: string;
1083
+ contentBlocks: AnthropicContentBlock[];
1084
+ runId: string;
1085
+ route: string;
1086
+ raw: any;
1087
+ usage?: PunkUsage;
1088
+ model?: string;
1089
+ provider?: string;
1090
+ stopReason?: string;
1091
+ }
1092
+
1093
+ export interface ChatStreamChunk {
1094
+ type: "delta" | "done";
1095
+ content: string;
1096
+ toolCalls: ChatToolCall[];
1097
+ runId: string;
1098
+ route: string;
1099
+ raw?: any;
1100
+ usage?: PunkUsage;
1101
+ model?: string;
1102
+ provider?: string;
1103
+ }
1104
+
1105
+ export interface AnthropicMessageStreamChunk {
1106
+ type: "delta" | "done";
1107
+ content: string;
1108
+ runId: string;
1109
+ route: string;
1110
+ event?: string;
1111
+ raw?: any;
1112
+ usage?: PunkUsage;
1113
+ model?: string;
1114
+ provider?: string;
1115
+ }
1116
+
1117
+ export interface PunkIdentityHeadersOptions {
1118
+ /** Include Authorization when this client has an API key. Default: true. */
1119
+ includeAuthorization?: boolean;
1120
+ /** Include Content-Type: application/json. Default: false. */
1121
+ includeContentType?: boolean;
1122
+ /** Override the X-Punk-App value for this config object. */
1123
+ app?: string;
1124
+ /** Override the X-Punk-Agent value for this config object. */
1125
+ agent?: string;
1126
+ /** Override the X-Punk-Subject value for this config object. */
1127
+ subject?: string;
1128
+ /** Extra headers to merge last. Blank values are ignored. */
1129
+ headers?: Record<string, string | undefined>;
1130
+ }
1131
+
1132
+ export interface PunkClientConfigOptions extends PunkIdentityHeadersOptions {
1133
+ baseUrl?: string;
1134
+ apiKey?: string;
1135
+ name?: string;
1136
+ includeUsage?: boolean;
1137
+ }
1138
+
1139
+ export interface PunkOpenAIConfig {
1140
+ baseURL: string;
1141
+ apiKey: string;
1142
+ defaultHeaders: Record<string, string>;
1143
+ }
1144
+
1145
+ export interface PunkAnthropicConfig {
1146
+ baseURL: string;
1147
+ authToken: string;
1148
+ defaultHeaders: Record<string, string>;
1149
+ }
1150
+
1151
+ export interface PunkVercelOpenAICompatibleConfig {
1152
+ name: string;
1153
+ baseURL: string;
1154
+ apiKey: string;
1155
+ headers: Record<string, string>;
1156
+ includeUsage: boolean;
1157
+ }
1158
+
1159
+ export interface PunkLangChainConfig {
1160
+ apiKey: string;
1161
+ configuration: {
1162
+ baseURL: string;
1163
+ defaultHeaders: Record<string, string>;
1164
+ };
1165
+ }
1166
+
1167
+ export interface RunSavings {
1168
+ runId: string;
1169
+ route?: string;
1170
+ status: Run["status"];
1171
+ costUsd: number;
1172
+ savedUsd: number;
1173
+ ghostSavedUsd: number;
1174
+ latencyMs: number;
1175
+ inputTokens: number;
1176
+ outputTokens: number;
1177
+ }
1178
+
1179
+ export interface RunWaitOptions {
1180
+ pollIntervalMs?: number;
1181
+ timeoutMs?: number;
1182
+ signal?: AbortSignal;
863
1183
  }
864
1184
 
865
1185
  export interface PunkReceipt {
@@ -980,13 +1300,120 @@ export class Punk {
980
1300
  readonly agent?: string;
981
1301
  readonly subject?: string;
982
1302
  private readonly apiKey?: string;
1303
+ private readonly runStack: string[] = [];
983
1304
 
984
1305
  constructor(opts: PunkOptions = {}) {
985
- this.baseUrl = normalizeBaseUrl(opts.baseUrl);
986
- this.apiKey = cleanIdentityValue(opts.apiKey);
987
- this.app = cleanIdentityValue(opts.app) ?? "default-app";
988
- this.agent = cleanIdentityValue(opts.agent);
989
- this.subject = cleanIdentityValue(opts.subject);
1306
+ this.baseUrl = normalizeBaseUrl(optionOrEnv(opts, "baseUrl", "PUNK_BASE_URL"));
1307
+ this.apiKey = cleanIdentityValue(optionOrEnv(opts, "apiKey", "PUNK_API_KEY"));
1308
+ this.app = cleanIdentityValue(optionOrEnv(opts, "app", "PUNK_APP")) ?? DEFAULT_APP;
1309
+ this.agent = cleanIdentityValue(optionOrEnv(opts, "agent", "PUNK_AGENT"));
1310
+ this.subject = cleanIdentityValue(optionOrEnv(opts, "subject", "PUNK_SUBJECT"));
1311
+ }
1312
+
1313
+ readonly gateway = {
1314
+ chat: (params: ChatParams): Promise<ChatResult> => this.chat(params),
1315
+ streamChat: (params: ChatParams): AsyncIterable<ChatStreamChunk> => this.streamChat(params),
1316
+ stream: (params: ChatParams): AsyncIterable<ChatStreamChunk> => this.chatStream(params),
1317
+ };
1318
+
1319
+ readonly openai = {
1320
+ chat: (params: ChatParams): Promise<ChatResult> => this.chat(params),
1321
+ streamChat: (params: ChatParams): AsyncIterable<ChatStreamChunk> => this.streamChat(params),
1322
+ stream: (params: ChatParams): AsyncIterable<ChatStreamChunk> => this.chatStream(params),
1323
+ config: (opts?: PunkClientConfigOptions): PunkOpenAIConfig => this.openAIConfig(opts),
1324
+ };
1325
+
1326
+ readonly anthropic = {
1327
+ messages: (params: AnthropicMessageParams): Promise<AnthropicMessageResult> =>
1328
+ this.messages(params),
1329
+ streamMessages: (params: AnthropicMessageParams): AsyncIterable<AnthropicMessageStreamChunk> =>
1330
+ this.streamMessages(params),
1331
+ stream: (params: AnthropicMessageParams): AsyncIterable<AnthropicMessageStreamChunk> =>
1332
+ this.messagesStream(params),
1333
+ config: (opts?: PunkClientConfigOptions): PunkAnthropicConfig => this.anthropicConfig(opts),
1334
+ };
1335
+
1336
+ identityHeaders(opts: PunkIdentityHeadersOptions = {}): Record<string, string> {
1337
+ const includeAuthorization = opts.includeAuthorization !== false;
1338
+ const headers: Record<string, string> = {};
1339
+ if (opts.includeContentType === true) headers["Content-Type"] = "application/json";
1340
+ if (includeAuthorization && this.apiKey) headers["Authorization"] = `Bearer ${this.apiKey}`;
1341
+ const app = cleanIdentityValue(opts.app) ?? this.app;
1342
+ const agent = cleanIdentityValue(opts.agent) ?? this.agent;
1343
+ const subject = cleanIdentityValue(opts.subject) ?? this.subject;
1344
+ headers["X-Punk-App"] = app;
1345
+ if (agent) headers["X-Punk-Agent"] = agent;
1346
+ if (subject) headers["X-Punk-Subject"] = subject;
1347
+ if (opts.headers) {
1348
+ for (const [key, value] of Object.entries(opts.headers)) {
1349
+ if (typeof value === "string" && value.trim().length > 0) headers[key] = value;
1350
+ }
1351
+ }
1352
+ return headers;
1353
+ }
1354
+
1355
+ openAIConfig(opts: PunkClientConfigOptions = {}): PunkOpenAIConfig {
1356
+ const baseUrl = normalizeBaseUrl(opts.baseUrl ?? this.baseUrl);
1357
+ const apiKey = cleanIdentityValue(opts.apiKey) ?? this.apiKey ?? "punk-local";
1358
+ return {
1359
+ baseURL: `${baseUrl}/v1`,
1360
+ apiKey,
1361
+ defaultHeaders: this.identityHeaders({ ...opts, includeAuthorization: false }),
1362
+ };
1363
+ }
1364
+
1365
+ anthropicConfig(opts: PunkClientConfigOptions = {}): PunkAnthropicConfig {
1366
+ const baseUrl = normalizeBaseUrl(opts.baseUrl ?? this.baseUrl);
1367
+ const authToken = cleanIdentityValue(opts.apiKey) ?? this.apiKey ?? "punk-local";
1368
+ return {
1369
+ baseURL: baseUrl,
1370
+ authToken,
1371
+ defaultHeaders: this.identityHeaders({ ...opts, includeAuthorization: true }),
1372
+ };
1373
+ }
1374
+
1375
+ vercelAIConfig(opts: PunkClientConfigOptions = {}): PunkVercelOpenAICompatibleConfig {
1376
+ return this.vercelOpenAICompatibleConfig(opts);
1377
+ }
1378
+
1379
+ vercelOpenAICompatibleConfig(opts: PunkClientConfigOptions = {}): PunkVercelOpenAICompatibleConfig {
1380
+ const baseUrl = normalizeBaseUrl(opts.baseUrl ?? this.baseUrl);
1381
+ return {
1382
+ name: cleanIdentityValue(opts.name) ?? "punk",
1383
+ baseURL: `${baseUrl}/v1`,
1384
+ apiKey: cleanIdentityValue(opts.apiKey) ?? this.apiKey ?? "punk-local",
1385
+ headers: this.identityHeaders({ ...opts, includeAuthorization: false }),
1386
+ includeUsage: opts.includeUsage ?? true,
1387
+ };
1388
+ }
1389
+
1390
+ langChainConfig(opts: PunkClientConfigOptions = {}): PunkLangChainConfig {
1391
+ const baseUrl = normalizeBaseUrl(opts.baseUrl ?? this.baseUrl);
1392
+ return {
1393
+ apiKey: cleanIdentityValue(opts.apiKey) ?? this.apiKey ?? "punk-local",
1394
+ configuration: {
1395
+ baseURL: `${baseUrl}/v1`,
1396
+ defaultHeaders: this.identityHeaders({ ...opts, includeAuthorization: false }),
1397
+ },
1398
+ };
1399
+ }
1400
+
1401
+ currentRunId(): string | undefined {
1402
+ return this.runStack.at(-1);
1403
+ }
1404
+
1405
+ async withRun<T>(
1406
+ run: string | { runId?: string | null | undefined },
1407
+ fn: () => T | Promise<T>,
1408
+ ): Promise<Awaited<T>> {
1409
+ const rawRunId = typeof run === "string" ? run : run.runId;
1410
+ const id = cleanPathId("run id", rawRunId ?? "");
1411
+ this.runStack.push(id);
1412
+ try {
1413
+ return await fn();
1414
+ } finally {
1415
+ this.runStack.pop();
1416
+ }
990
1417
  }
991
1418
 
992
1419
  // -- chat -----------------------------------------------------------------
@@ -1018,16 +1445,170 @@ export class Punk {
1018
1445
  runId: res.headers.get("x-punk-run-id") ?? "",
1019
1446
  route: res.headers.get("x-punk-route") ?? "unknown",
1020
1447
  raw,
1448
+ usage: normalizeOpenAIUsage(raw),
1449
+ model: normalizeModel(raw),
1450
+ provider: normalizeProvider(raw),
1021
1451
  };
1022
1452
  }
1023
1453
 
1454
+ /** Stream an OpenAI-compatible chat completion through the gateway. */
1455
+ streamChat(params: ChatParams): AsyncIterable<ChatStreamChunk> {
1456
+ return this.chatStream(params);
1457
+ }
1458
+
1459
+ /** Stream an OpenAI-compatible chat completion through the gateway. */
1460
+ async *chatStream(params: ChatParams): AsyncIterable<ChatStreamChunk> {
1461
+ const method = "POST";
1462
+ const path = "/v1/chat/completions";
1463
+ let res: Response;
1464
+ try {
1465
+ res = await fetch(`${this.baseUrl}${path}`, {
1466
+ method,
1467
+ headers: this.headers({
1468
+ "X-Punk-App": this.app,
1469
+ "X-Punk-Agent": this.agent,
1470
+ "X-Punk-Subject": this.subject,
1471
+ }),
1472
+ body: JSON.stringify({ ...params, stream: true }),
1473
+ });
1474
+ } catch (err) {
1475
+ throw this.toTransportError(method, path, err);
1476
+ }
1477
+ if (!res.ok) throw this.toError(method, path, res, await res.text());
1478
+ const runId = res.headers.get("x-punk-run-id") ?? "";
1479
+ const route = res.headers.get("x-punk-route") ?? "unknown";
1480
+ for await (const frame of sseFrames(res, method, path)) {
1481
+ if (frame.data === "[DONE]") {
1482
+ yield { type: "done", content: "", toolCalls: [], runId, route };
1483
+ return;
1484
+ }
1485
+ const raw = parseSseJson(frame, method, path);
1486
+ const choice = Array.isArray(raw?.choices) ? raw.choices[0] : undefined;
1487
+ const delta = isRecord(choice?.delta) ? choice.delta as Record<string, unknown> : {};
1488
+ yield {
1489
+ type: "delta",
1490
+ content: streamDeltaText(delta.content),
1491
+ toolCalls: normalizeChatToolCalls(delta.tool_calls),
1492
+ runId,
1493
+ route,
1494
+ raw,
1495
+ usage: normalizeOpenAIUsage(raw),
1496
+ model: normalizeModel(raw),
1497
+ provider: normalizeProvider(raw),
1498
+ };
1499
+ }
1500
+ yield { type: "done", content: "", toolCalls: [], runId, route };
1501
+ }
1502
+
1503
+ /** Send an Anthropic-compatible Messages request through the gateway. */
1504
+ async messages(params: AnthropicMessageParams): Promise<AnthropicMessageResult> {
1505
+ const method = "POST";
1506
+ const path = "/v1/messages";
1507
+ let res: Response;
1508
+ try {
1509
+ res = await fetch(`${this.baseUrl}${path}`, {
1510
+ method,
1511
+ headers: this.headers({
1512
+ "anthropic-version": "2023-06-01",
1513
+ "X-Punk-App": this.app,
1514
+ "X-Punk-Agent": this.agent,
1515
+ "X-Punk-Subject": this.subject,
1516
+ }),
1517
+ body: JSON.stringify({ ...params, stream: false }),
1518
+ });
1519
+ } catch (err) {
1520
+ throw this.toTransportError(method, path, err);
1521
+ }
1522
+ const raw = await this.parseJsonResponse<any>(method, path, res);
1523
+ return {
1524
+ content: anthropicMessageText(raw, method, path),
1525
+ contentBlocks: anthropicContentBlocks(raw, method, path),
1526
+ runId: res.headers.get("x-punk-run-id") ?? "",
1527
+ route: res.headers.get("x-punk-route") ?? "unknown",
1528
+ raw,
1529
+ usage: normalizeAnthropicUsage(raw),
1530
+ model: normalizeModel(raw),
1531
+ provider: normalizeProvider(raw),
1532
+ stopReason: typeof raw?.stop_reason === "string" ? raw.stop_reason : undefined,
1533
+ };
1534
+ }
1535
+
1536
+ /** Stream an Anthropic-compatible Messages request through the gateway. */
1537
+ streamMessages(params: AnthropicMessageParams): AsyncIterable<AnthropicMessageStreamChunk> {
1538
+ return this.messagesStream(params);
1539
+ }
1540
+
1541
+ /** Stream an Anthropic-compatible Messages request through the gateway. */
1542
+ async *messagesStream(params: AnthropicMessageParams): AsyncIterable<AnthropicMessageStreamChunk> {
1543
+ const method = "POST";
1544
+ const path = "/v1/messages";
1545
+ let res: Response;
1546
+ try {
1547
+ res = await fetch(`${this.baseUrl}${path}`, {
1548
+ method,
1549
+ headers: this.headers({
1550
+ "anthropic-version": "2023-06-01",
1551
+ "X-Punk-App": this.app,
1552
+ "X-Punk-Agent": this.agent,
1553
+ "X-Punk-Subject": this.subject,
1554
+ }),
1555
+ body: JSON.stringify({ ...params, stream: true }),
1556
+ });
1557
+ } catch (err) {
1558
+ throw this.toTransportError(method, path, err);
1559
+ }
1560
+ if (!res.ok) throw this.toError(method, path, res, await res.text());
1561
+ const runId = res.headers.get("x-punk-run-id") ?? "";
1562
+ const route = res.headers.get("x-punk-route") ?? "unknown";
1563
+ let model: string | undefined;
1564
+ let usage: PunkUsage | undefined;
1565
+ for await (const frame of sseFrames(res, method, path)) {
1566
+ const raw = parseSseJson(frame, method, path);
1567
+ if (typeof raw?.message?.model === "string") model = raw.message.model;
1568
+ const messageUsage = normalizeAnthropicUsage(raw.message);
1569
+ if (messageUsage) usage = messageUsage;
1570
+ if (isRecord(raw?.usage)) {
1571
+ usage = normalizeAnthropicUsage({ usage: { ...(usage?.raw as Record<string, unknown> | undefined), ...raw.usage } });
1572
+ }
1573
+ if (raw?.type === "content_block_delta") {
1574
+ yield {
1575
+ type: "delta",
1576
+ content: typeof raw?.delta?.text === "string" ? raw.delta.text : "",
1577
+ runId,
1578
+ route,
1579
+ event: frame.event,
1580
+ raw,
1581
+ usage,
1582
+ model,
1583
+ provider: normalizeProvider(raw),
1584
+ };
1585
+ }
1586
+ if (raw?.type === "message_stop") {
1587
+ yield {
1588
+ type: "done",
1589
+ content: "",
1590
+ runId,
1591
+ route,
1592
+ event: frame.event,
1593
+ raw,
1594
+ usage,
1595
+ model,
1596
+ provider: normalizeProvider(raw),
1597
+ };
1598
+ return;
1599
+ }
1600
+ }
1601
+ yield { type: "done", content: "", runId, route, usage, model };
1602
+ }
1603
+
1024
1604
  // -- tool tracing ---------------------------------------------------------
1025
1605
 
1026
1606
  /**
1027
1607
  * Wrap a tool so every invocation is traced into the run it belongs to, and
1028
1608
  * explicitly declared read-only results (level <= 1 with a TTL) flow
1029
- * through the tool-result cache. Tracing requires `ctx.runId`; without it
1030
- * the tool still executes, silently untraced and uncached.
1609
+ * through the tool-result cache. Tracing uses `ctx.runId` first, then an
1610
+ * active `withRun` id; without either the tool still executes, silently
1611
+ * untraced and uncached.
1031
1612
  */
1032
1613
  traceTool<TArgs, TResult>(
1033
1614
  def: ToolDefinition<TArgs, TResult>,
@@ -1038,7 +1619,7 @@ export class Punk {
1038
1619
  const schemaFp = cleanOptionalCacheDimension("schemaFp", def.schemaFp);
1039
1620
 
1040
1621
  return async (args: TArgs, ctx?: { runId?: string }): Promise<TResult> => {
1041
- const runId = cleanIdentityValue(ctx?.runId);
1622
+ const runId = cleanIdentityValue(ctx?.runId) ?? this.currentRunId();
1042
1623
  const cacheable = level <= 1 && ttlSeconds !== undefined && runId !== undefined && this.subject !== undefined;
1043
1624
 
1044
1625
  if (runId) {
@@ -1326,6 +1907,53 @@ export class Punk {
1326
1907
  return requireRunDetailResult(await this.request<unknown>("GET", path), "GET", path);
1327
1908
  }
1328
1909
 
1910
+ async explain(runId: string): Promise<RouteExplanation | null> {
1911
+ return this.runDetail(runId).then((detail) => detail.run.routeExplanation ?? null);
1912
+ }
1913
+
1914
+ async savingsForRun(runId: string): Promise<RunSavings> {
1915
+ const detail = await this.runDetail(runId);
1916
+ return {
1917
+ runId: detail.run.id,
1918
+ route: detail.run.route,
1919
+ status: detail.run.status,
1920
+ costUsd: detail.run.costUsd,
1921
+ savedUsd: detail.run.savedUsd,
1922
+ ghostSavedUsd: detail.run.ghostSavedUsd ?? 0,
1923
+ latencyMs: detail.run.latencyMs,
1924
+ inputTokens: detail.run.inputTokens,
1925
+ outputTokens: detail.run.outputTokens,
1926
+ };
1927
+ }
1928
+
1929
+ async sideEffectsForRun(runId: string): Promise<SideEffectRecord[]> {
1930
+ return this.runDetail(runId).then((detail) => detail.sideEffects);
1931
+ }
1932
+
1933
+ async waitForRun(runId: string, opts: RunWaitOptions = {}): Promise<RunDetail> {
1934
+ for await (const detail of this.watchRun(runId, opts)) {
1935
+ if (isTerminalRunStatus(detail.run.status)) return detail;
1936
+ }
1937
+ throw new Error(`Punk waitForRun(${cleanPathId("run id", runId)}) ended before the run reached a terminal status`);
1938
+ }
1939
+
1940
+ async *watchRun(runId: string, opts: RunWaitOptions = {}): AsyncIterable<RunDetail> {
1941
+ const id = cleanPathId("run id", runId);
1942
+ const pollIntervalMs = opts.pollIntervalMs ?? DEFAULT_WAIT_POLL_INTERVAL_MS;
1943
+ const timeoutMs = opts.timeoutMs ?? DEFAULT_WAIT_TIMEOUT_MS;
1944
+ const startedAt = Date.now();
1945
+ while (true) {
1946
+ if (opts.signal?.aborted) throw new Error("Punk watchRun aborted");
1947
+ if (Date.now() - startedAt > timeoutMs) {
1948
+ throw new Error(`Punk watchRun(${id}) timed out after ${timeoutMs}ms`);
1949
+ }
1950
+ const detail = await this.runDetail(id);
1951
+ yield detail;
1952
+ if (isTerminalRunStatus(detail.run.status)) return;
1953
+ await checkedDelay(pollIntervalMs, opts.signal);
1954
+ }
1955
+ }
1956
+
1329
1957
  async receipt(id: string): Promise<PunkReceipt> {
1330
1958
  const path = `/api/v1/receipts/${encodeURIComponent(cleanPathId("receipt id", id))}`;
1331
1959
  return requireReceiptResult(await this.request<unknown>("GET", path), "GET", path);