@punktechnologies/sdk 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # @punktechnologies/sdk
2
2
 
3
- OpenAI-compatible AI gateway SDK for agent tracing, tool caching, governance, observability, and cost optimization.
3
+ Gateway Agnostic AI runtime SDK for OpenAI, Anthropic, OpenRouter, and more, with agent tracing, tool caching, governance, observability, and cost optimization.
4
4
 
5
5
  Punk is the adaptive runtime for production AI agents. Put the gateway between your agents and model providers, then use this SDK where gateway traffic alone cannot see enough context: tool tracing, side-effect declarations, tool-result caching, semantic web fetches, feedback, receipts, evidence packets, MCP registry helpers, prompt ingest, and learning/artifact APIs.
6
6
 
@@ -16,7 +16,7 @@ npm i @punktechnologies/sdk
16
16
  bun add @punktechnologies/sdk
17
17
  ```
18
18
 
19
- Zero runtime dependencies. Requires Node 18+ or Bun and a running Punk gateway. For local evaluation from the Punk repo:
19
+ Zero runtime dependencies. Requires Node 18+ or Bun and a running Punk gateway. `new Punk()` reads `PUNK_BASE_URL`, `PUNK_API_KEY`, `PUNK_APP`, `PUNK_AGENT`, and `PUNK_SUBJECT`; explicit constructor options still win. For local evaluation from the Punk repo:
20
20
 
21
21
  ```bash
22
22
  bun install
@@ -36,18 +36,17 @@ For hosted trials, use `baseUrl: "https://app.punktechnologies.com"` with a tena
36
36
 
37
37
  ## 60-Second Start
38
38
 
39
- **1. Point existing OpenAI-style traffic at Punk.**
39
+ **1. Point existing model traffic at Punk.**
40
40
 
41
- You do not need this SDK for the core gateway value. OpenAI-style and Anthropic-style clients can talk to Punk by changing the gateway URL.
41
+ You do not need this SDK for the core gateway value. OpenAI-style and Anthropic-style clients can talk to Punk by changing the gateway URL, and the SDK can generate the right config objects when you do use it.
42
42
 
43
43
  ```ts
44
44
  import OpenAI from "openai";
45
+ import { Punk } from "@punktechnologies/sdk";
45
46
 
46
- const client = new OpenAI({
47
- baseURL: "http://localhost:4100/v1",
48
- apiKey: process.env.PUNK_API_KEY ?? "punk-local",
49
- defaultHeaders: { "X-Punk-App": "my-app" },
50
- });
47
+ const punk = new Punk({ app: "my-app", agent: "my-bot", subject: "user-123" });
48
+
49
+ const client = new OpenAI(punk.openAIConfig());
51
50
  ```
52
51
 
53
52
  **2. Use the SDK when you need the richer runtime surface.**
@@ -55,12 +54,11 @@ const client = new OpenAI({
55
54
  ```ts
56
55
  import { Punk } from "@punktechnologies/sdk";
57
56
 
58
- const punk = new Punk({ app: "my-app", agent: "my-bot" });
59
- const result = await punk.chat({
57
+ const result = await punk.gateway.chat({
60
58
  model: "gpt-4o",
61
59
  messages: [{ role: "user", content: "Classify this ticket: refund request" }],
62
60
  });
63
- console.log(result.content, result.route, result.runId);
61
+ console.log(result.content, result.route, result.runId, result.usage);
64
62
  // route is "live" on the first call; repeats become "exact_cache" and,
65
63
  // once learned and proven, "artifact".
66
64
  ```
@@ -71,7 +69,7 @@ Open `http://localhost:4100` locally, or `https://app.punktechnologies.com` for
71
69
 
72
70
  ## API tour
73
71
 
74
- Construct once per app/agent identity:
72
+ Construct once per app/agent identity. Omit options to read `PUNK_BASE_URL`, `PUNK_API_KEY`, `PUNK_APP`, `PUNK_AGENT`, and `PUNK_SUBJECT` from the environment.
75
73
 
76
74
  ```ts
77
75
  const punk = new Punk({
@@ -83,7 +81,27 @@ const punk = new Punk({
83
81
  });
84
82
  ```
85
83
 
86
- ### `chat(params)` OpenAI-style completions through the gateway
84
+ ### Adapter config helpers
85
+
86
+ Use these helpers when your app already has a provider client:
87
+
88
+ ```ts
89
+ new OpenAI(punk.openAIConfig());
90
+ new Anthropic(punk.anthropicConfig());
91
+
92
+ const aiSdkProvider = createOpenAICompatible(
93
+ punk.vercelOpenAICompatibleConfig({ name: "punk" })
94
+ );
95
+
96
+ const model = new ChatOpenAI({
97
+ model: "gpt-4o",
98
+ ...punk.langChainConfig()
99
+ });
100
+ ```
101
+
102
+ `identityHeaders()` returns the `X-Punk-*` headers, with optional `Authorization`. All config helpers accept per-call overrides such as `{ app, agent, subject, baseUrl, apiKey }`.
103
+
104
+ ### `chat(params)` / `openai.chat(params)` — OpenAI-style completions
87
105
 
88
106
  ```ts
89
107
  const r = await punk.chat({
@@ -94,11 +112,53 @@ const r = await punk.chat({
94
112
  r.content; // assistant text
95
113
  r.runId; // from the x-punk-run-id response header — use it for tracing/feedback
96
114
  r.route; // "live" | "exact_cache" | "artifact" | ... (x-punk-route header)
115
+ r.usage; // normalized input/output/total token counts when present
116
+ r.model; // response model when present
117
+ r.provider;// response provider when present
97
118
  r.raw; // the full OpenAI-shaped response body
98
119
  ```
99
120
 
100
121
  Every response carries a run id and the route Punk chose. `punk.runDetail(r.runId)` returns the full trace and the `RouteExplanation` — why this route, what was rejected, what it saved.
101
122
 
123
+ Streaming is built in:
124
+
125
+ ```ts
126
+ for await (const chunk of punk.streamChat({
127
+ model: "gpt-4o",
128
+ messages: [{ role: "user", content: "Stream a support reply." }]
129
+ })) {
130
+ if (chunk.type === "delta") process.stdout.write(chunk.content);
131
+ }
132
+ ```
133
+
134
+ ### `anthropic.messages(params)` — Anthropic Messages
135
+
136
+ ```ts
137
+ const msg = await punk.anthropic.messages({
138
+ model: "claude-sonnet-4-6",
139
+ max_tokens: 256,
140
+ messages: [{ role: "user", content: "What is a deterministic artifact?" }]
141
+ });
142
+
143
+ msg.content; // text blocks joined together
144
+ msg.contentBlocks; // original Anthropic content blocks
145
+ msg.runId;
146
+ msg.route;
147
+ msg.usage;
148
+ ```
149
+
150
+ Streaming Anthropic-shaped responses works the same way:
151
+
152
+ ```ts
153
+ for await (const chunk of punk.streamMessages({
154
+ model: "claude-sonnet-4-6",
155
+ max_tokens: 256,
156
+ messages: [{ role: "user", content: "Stream a haiku about caching." }]
157
+ })) {
158
+ if (chunk.type === "delta") process.stdout.write(chunk.content);
159
+ }
160
+ ```
161
+
102
162
  For Punk Chorus, use `model: "punk/chorus"` and add Chorus-specific routing fields to the same body. The SDK helper below uses the OpenAI-style chat wire; direct HTTP callers can use the same model id through supported gateway wires.
103
163
 
104
164
  ```ts
@@ -149,8 +209,9 @@ const lookupAccount = punk.traceTool({
149
209
  execute: async (args: { accountId: string }) => crm.get(args.accountId),
150
210
  });
151
211
 
152
- // Pass the runId from the chat that triggered the tool:
153
- const account = await lookupAccount({ accountId: "acct_42" }, { runId: r.runId });
212
+ await punk.withRun(r, async () => {
213
+ const account = await lookupAccount({ accountId: "acct_42" });
214
+ });
154
215
  ```
155
216
 
156
217
  Side-effect levels (PRD §17):
@@ -163,7 +224,7 @@ Side-effect levels (PRD §17):
163
224
  | 3 | User-visible write | email, Slack, ticket creation |
164
225
  | 4 | High-impact | payments, deletion, permissions |
165
226
 
166
- Undeclared tools default to **level 3** (conservative). Levels 0–1 with a TTL are cached per tenant/subject; levels ≥ 2 emit `side_effect.planned` before execution so replay and shadow runs can suppress them. Without a `runId`, the tool still executes — just untraced. Cache and trace failures never break the tool call.
227
+ Undeclared tools default to **level 3** (conservative). Levels 0–1 with a TTL are cached per tenant/subject; levels ≥ 2 emit `side_effect.planned` before execution so replay and shadow runs can suppress them. `traceTool` uses an explicit `{ runId }` when supplied, otherwise the active `withRun(...)` context. Without either, the tool still executes — just untraced. Cache and trace failures never break the tool call.
167
228
 
168
229
  ### `feedback(runId, rating, correction?)` — close the loop
169
230
 
@@ -231,6 +292,10 @@ await punk.patterns(); // discovered patterns and their lifecycle state
231
292
  await punk.artifacts(); // synthesized artifacts with confidence + evidence counts
232
293
  await punk.artifactDetail(id); // artifact + replay/shadow evaluations + source pattern
233
294
  await punk.runDetail(id); // run + full trace events + side-effect records
295
+ await punk.explain(id); // routeExplanation only
296
+ await punk.savingsForRun(id); // per-run cost/savings counters
297
+ await punk.sideEffectsForRun(id);
298
+ await punk.waitForRun(id); // poll until completed/failed/blocked
234
299
  await punk.receipt(id); // Chorus receipt for a run
235
300
  await punk.evidencePacket(id); // support/security evidence packet for a run
236
301
  await punk.cacheStats(); // per-tier entries and hits
package/dist/index.d.ts CHANGED
@@ -5,7 +5,7 @@
5
5
  * `traceTool`, and the runtime observes, caches, learns and (after replay +
6
6
  * shadow proof) routes repeated work through deterministic artifacts.
7
7
  */
8
- import type { Artifact, ArtifactEvaluation, McpServerRecord, McpTestResult, Pattern, PromotionGateStatus, Run, SavingsSummary, SideEffectLevel, SideEffectRecord, SomDiff, SomSnapshot, TraceEvent, TraceEventType, TrustLane, WebActionIntent, WebActionResult } from "./types";
8
+ import type { Artifact, ArtifactEvaluation, McpServerRecord, McpTestResult, Pattern, PromotionGateStatus, RouteExplanation, Run, SavingsSummary, SideEffectLevel, SideEffectRecord, SomDiff, SomSnapshot, TraceEvent, TraceEventType, TrustLane, WebActionIntent, WebActionResult } from "./types";
9
9
  export type * from "./types";
10
10
  export interface PunkOptions {
11
11
  /** Gateway base URL. Default: http://localhost:4100 */
@@ -108,6 +108,129 @@ export interface ChatResult {
108
108
  runId: string;
109
109
  route: string;
110
110
  raw: any;
111
+ usage?: PunkUsage;
112
+ model?: string;
113
+ provider?: string;
114
+ }
115
+ export interface PunkUsage {
116
+ inputTokens?: number;
117
+ outputTokens?: number;
118
+ totalTokens?: number;
119
+ raw: unknown;
120
+ }
121
+ export interface AnthropicContentBlock {
122
+ type: string;
123
+ text?: string;
124
+ [key: string]: unknown;
125
+ }
126
+ export interface AnthropicMessageParams extends PunkChorusOptions {
127
+ model: string;
128
+ max_tokens: number;
129
+ messages: Array<{
130
+ role: "user" | "assistant" | (string & {});
131
+ content: string | AnthropicContentBlock[];
132
+ }>;
133
+ system?: string | AnthropicContentBlock[];
134
+ temperature?: number;
135
+ top_p?: number;
136
+ top_k?: number;
137
+ stop_sequences?: string[];
138
+ tools?: unknown[];
139
+ tool_choice?: unknown;
140
+ metadata?: Record<string, unknown>;
141
+ }
142
+ export interface AnthropicMessageResult {
143
+ content: string;
144
+ contentBlocks: AnthropicContentBlock[];
145
+ runId: string;
146
+ route: string;
147
+ raw: any;
148
+ usage?: PunkUsage;
149
+ model?: string;
150
+ provider?: string;
151
+ stopReason?: string;
152
+ }
153
+ export interface ChatStreamChunk {
154
+ type: "delta" | "done";
155
+ content: string;
156
+ toolCalls: ChatToolCall[];
157
+ runId: string;
158
+ route: string;
159
+ raw?: any;
160
+ usage?: PunkUsage;
161
+ model?: string;
162
+ provider?: string;
163
+ }
164
+ export interface AnthropicMessageStreamChunk {
165
+ type: "delta" | "done";
166
+ content: string;
167
+ runId: string;
168
+ route: string;
169
+ event?: string;
170
+ raw?: any;
171
+ usage?: PunkUsage;
172
+ model?: string;
173
+ provider?: string;
174
+ }
175
+ export interface PunkIdentityHeadersOptions {
176
+ /** Include Authorization when this client has an API key. Default: true. */
177
+ includeAuthorization?: boolean;
178
+ /** Include Content-Type: application/json. Default: false. */
179
+ includeContentType?: boolean;
180
+ /** Override the X-Punk-App value for this config object. */
181
+ app?: string;
182
+ /** Override the X-Punk-Agent value for this config object. */
183
+ agent?: string;
184
+ /** Override the X-Punk-Subject value for this config object. */
185
+ subject?: string;
186
+ /** Extra headers to merge last. Blank values are ignored. */
187
+ headers?: Record<string, string | undefined>;
188
+ }
189
+ export interface PunkClientConfigOptions extends PunkIdentityHeadersOptions {
190
+ baseUrl?: string;
191
+ apiKey?: string;
192
+ name?: string;
193
+ includeUsage?: boolean;
194
+ }
195
+ export interface PunkOpenAIConfig {
196
+ baseURL: string;
197
+ apiKey: string;
198
+ defaultHeaders: Record<string, string>;
199
+ }
200
+ export interface PunkAnthropicConfig {
201
+ baseURL: string;
202
+ authToken: string;
203
+ defaultHeaders: Record<string, string>;
204
+ }
205
+ export interface PunkVercelOpenAICompatibleConfig {
206
+ name: string;
207
+ baseURL: string;
208
+ apiKey: string;
209
+ headers: Record<string, string>;
210
+ includeUsage: boolean;
211
+ }
212
+ export interface PunkLangChainConfig {
213
+ apiKey: string;
214
+ configuration: {
215
+ baseURL: string;
216
+ defaultHeaders: Record<string, string>;
217
+ };
218
+ }
219
+ export interface RunSavings {
220
+ runId: string;
221
+ route?: string;
222
+ status: Run["status"];
223
+ costUsd: number;
224
+ savedUsd: number;
225
+ ghostSavedUsd: number;
226
+ latencyMs: number;
227
+ inputTokens: number;
228
+ outputTokens: number;
229
+ }
230
+ export interface RunWaitOptions {
231
+ pollIntervalMs?: number;
232
+ timeoutMs?: number;
233
+ signal?: AbortSignal;
111
234
  }
112
235
  export interface PunkReceipt {
113
236
  id?: string;
@@ -210,14 +333,53 @@ export declare class Punk {
210
333
  readonly agent?: string;
211
334
  readonly subject?: string;
212
335
  private readonly apiKey?;
336
+ private readonly runStack;
213
337
  constructor(opts?: PunkOptions);
338
+ readonly gateway: {
339
+ chat: (params: ChatParams) => Promise<ChatResult>;
340
+ streamChat: (params: ChatParams) => AsyncIterable<ChatStreamChunk>;
341
+ stream: (params: ChatParams) => AsyncIterable<ChatStreamChunk>;
342
+ };
343
+ readonly openai: {
344
+ chat: (params: ChatParams) => Promise<ChatResult>;
345
+ streamChat: (params: ChatParams) => AsyncIterable<ChatStreamChunk>;
346
+ stream: (params: ChatParams) => AsyncIterable<ChatStreamChunk>;
347
+ config: (opts?: PunkClientConfigOptions) => PunkOpenAIConfig;
348
+ };
349
+ readonly anthropic: {
350
+ messages: (params: AnthropicMessageParams) => Promise<AnthropicMessageResult>;
351
+ streamMessages: (params: AnthropicMessageParams) => AsyncIterable<AnthropicMessageStreamChunk>;
352
+ stream: (params: AnthropicMessageParams) => AsyncIterable<AnthropicMessageStreamChunk>;
353
+ config: (opts?: PunkClientConfigOptions) => PunkAnthropicConfig;
354
+ };
355
+ identityHeaders(opts?: PunkIdentityHeadersOptions): Record<string, string>;
356
+ openAIConfig(opts?: PunkClientConfigOptions): PunkOpenAIConfig;
357
+ anthropicConfig(opts?: PunkClientConfigOptions): PunkAnthropicConfig;
358
+ vercelAIConfig(opts?: PunkClientConfigOptions): PunkVercelOpenAICompatibleConfig;
359
+ vercelOpenAICompatibleConfig(opts?: PunkClientConfigOptions): PunkVercelOpenAICompatibleConfig;
360
+ langChainConfig(opts?: PunkClientConfigOptions): PunkLangChainConfig;
361
+ currentRunId(): string | undefined;
362
+ withRun<T>(run: string | {
363
+ runId?: string | null | undefined;
364
+ }, fn: () => T | Promise<T>): Promise<Awaited<T>>;
214
365
  /** Send an OpenAI-compatible chat completion through the gateway. */
215
366
  chat(params: ChatParams): Promise<ChatResult>;
367
+ /** Stream an OpenAI-compatible chat completion through the gateway. */
368
+ streamChat(params: ChatParams): AsyncIterable<ChatStreamChunk>;
369
+ /** Stream an OpenAI-compatible chat completion through the gateway. */
370
+ chatStream(params: ChatParams): AsyncIterable<ChatStreamChunk>;
371
+ /** Send an Anthropic-compatible Messages request through the gateway. */
372
+ messages(params: AnthropicMessageParams): Promise<AnthropicMessageResult>;
373
+ /** Stream an Anthropic-compatible Messages request through the gateway. */
374
+ streamMessages(params: AnthropicMessageParams): AsyncIterable<AnthropicMessageStreamChunk>;
375
+ /** Stream an Anthropic-compatible Messages request through the gateway. */
376
+ messagesStream(params: AnthropicMessageParams): AsyncIterable<AnthropicMessageStreamChunk>;
216
377
  /**
217
378
  * Wrap a tool so every invocation is traced into the run it belongs to, and
218
379
  * explicitly declared read-only results (level <= 1 with a TTL) flow
219
- * through the tool-result cache. Tracing requires `ctx.runId`; without it
220
- * the tool still executes, silently untraced and uncached.
380
+ * through the tool-result cache. Tracing uses `ctx.runId` first, then an
381
+ * active `withRun` id; without either the tool still executes, silently
382
+ * untraced and uncached.
221
383
  */
222
384
  traceTool<TArgs, TResult>(def: ToolDefinition<TArgs, TResult>): TracedTool<TArgs, TResult>;
223
385
  /** Append a trace event to a run. */
@@ -304,6 +466,11 @@ export declare class Punk {
304
466
  artifacts(): Promise<Artifact[]>;
305
467
  artifactDetail(id: string): Promise<ArtifactDetail>;
306
468
  runDetail(id: string): Promise<RunDetail>;
469
+ explain(runId: string): Promise<RouteExplanation | null>;
470
+ savingsForRun(runId: string): Promise<RunSavings>;
471
+ sideEffectsForRun(runId: string): Promise<SideEffectRecord[]>;
472
+ waitForRun(runId: string, opts?: RunWaitOptions): Promise<RunDetail>;
473
+ watchRun(runId: string, opts?: RunWaitOptions): AsyncIterable<RunDetail>;
307
474
  receipt(id: string): Promise<PunkReceipt>;
308
475
  evidencePacket(runId: string): Promise<EvidencePacket>;
309
476
  cacheStats(): Promise<CacheStats>;