@loreai/gateway 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,425 @@
1
+ /**
2
+ * Anthropic ↔ Gateway translation layer.
3
+ *
4
+ * Converts between Anthropic's `/v1/messages` API format and the gateway's
5
+ * internal `GatewayRequest`/`GatewayResponse` types. The parser is lenient —
6
+ * unknown fields pass through in `metadata` rather than causing errors.
7
+ */
8
+ import type {
9
+ GatewayContentBlock,
10
+ GatewayMessage,
11
+ GatewayRequest,
12
+ GatewayResponse,
13
+ GatewayTool,
14
+ } from "./types";
15
+ import { extractAuth, authHeaders } from "../auth";
16
+
17
+ // ---------------------------------------------------------------------------
18
+ // Anthropic API version — used in all outgoing requests
19
+ // ---------------------------------------------------------------------------
20
+
21
+ const ANTHROPIC_VERSION = "2023-06-01";
22
+
23
+ // ---------------------------------------------------------------------------
24
+ // Fields the gateway reads/writes — everything else goes into `metadata`
25
+ // ---------------------------------------------------------------------------
26
+
27
+ /** Top-level body fields that are extracted into `GatewayRequest` fields. */
28
+ const KNOWN_BODY_FIELDS = new Set([
29
+ "model",
30
+ "system",
31
+ "messages",
32
+ "tools",
33
+ "max_tokens",
34
+ "stream",
35
+ ]);
36
+
37
+ // ---------------------------------------------------------------------------
38
+ // Helpers — content block translation
39
+ // ---------------------------------------------------------------------------
40
+
41
+ /**
42
+ * Normalize an Anthropic content block (from a message's `content` array)
43
+ * into a `GatewayContentBlock`. Unknown block types are preserved as text
44
+ * blocks with a JSON dump so no information is lost.
45
+ */
46
+ function toGatewayBlock(block: Record<string, unknown>): GatewayContentBlock {
47
+ switch (block.type) {
48
+ case "text":
49
+ return { type: "text", text: String(block.text ?? "") };
50
+
51
+ case "thinking":
52
+ return {
53
+ type: "thinking",
54
+ thinking: String(block.thinking ?? ""),
55
+ ...(block.signature != null
56
+ ? { signature: String(block.signature) }
57
+ : undefined),
58
+ };
59
+
60
+ case "tool_use":
61
+ return {
62
+ type: "tool_use",
63
+ id: String(block.id ?? ""),
64
+ name: String(block.name ?? ""),
65
+ input: block.input,
66
+ };
67
+
68
+ case "tool_result": {
69
+ // Anthropic `tool_result` content can be a string or array of blocks.
70
+ let content = "";
71
+ if (typeof block.content === "string") {
72
+ content = block.content;
73
+ } else if (Array.isArray(block.content)) {
74
+ content = (block.content as Array<Record<string, unknown>>)
75
+ .filter((b) => b.type === "text")
76
+ .map((b) => String(b.text ?? ""))
77
+ .join("\n");
78
+ }
79
+ return {
80
+ type: "tool_result",
81
+ toolUseId: String(block.tool_use_id ?? ""),
82
+ content,
83
+ ...(block.is_error ? { isError: true } : undefined),
84
+ };
85
+ }
86
+
87
+ default:
88
+ // Unknown block type — preserve as text so nothing is silently dropped
89
+ return { type: "text", text: JSON.stringify(block) };
90
+ }
91
+ }
92
+
93
+ /**
94
+ * Normalize Anthropic message content (string or array of blocks) into
95
+ * a `GatewayContentBlock[]`.
96
+ */
97
+ function normalizeContent(content: unknown): GatewayContentBlock[] {
98
+ if (typeof content === "string") {
99
+ return [{ type: "text", text: content }];
100
+ }
101
+
102
+ if (Array.isArray(content)) {
103
+ return content.map((block) =>
104
+ toGatewayBlock(block as Record<string, unknown>),
105
+ );
106
+ }
107
+
108
+ // Null / undefined / unexpected → empty
109
+ return [];
110
+ }
111
+
112
+ /**
113
+ * Normalize Anthropic's `system` field. Can be:
114
+ * - `undefined` / `null` → `""`
115
+ * - a plain string → used directly
116
+ * - an array of content blocks (e.g. with `cache_control`) → join text blocks
117
+ */
118
+ function normalizeSystem(system: unknown): string {
119
+ if (system == null) return "";
120
+ if (typeof system === "string") return system;
121
+
122
+ if (Array.isArray(system)) {
123
+ return (system as Array<Record<string, unknown>>)
124
+ .filter((block) => block.type === "text")
125
+ .map((block) => String(block.text ?? ""))
126
+ .join("\n");
127
+ }
128
+
129
+ return String(system);
130
+ }
131
+
132
+ // ---------------------------------------------------------------------------
133
+ // Reverse helpers — gateway blocks → Anthropic format
134
+ // ---------------------------------------------------------------------------
135
+
136
+ /**
137
+ * Convert a `GatewayContentBlock` back to Anthropic's wire format.
138
+ */
139
+ function toAnthropicBlock(
140
+ block: GatewayContentBlock,
141
+ ): Record<string, unknown> {
142
+ switch (block.type) {
143
+ case "text":
144
+ return { type: "text", text: block.text };
145
+
146
+ case "thinking":
147
+ return {
148
+ type: "thinking",
149
+ thinking: block.thinking,
150
+ ...(block.signature != null ? { signature: block.signature } : undefined),
151
+ };
152
+
153
+ case "tool_use":
154
+ return {
155
+ type: "tool_use",
156
+ id: block.id,
157
+ name: block.name,
158
+ input: block.input,
159
+ };
160
+
161
+ case "tool_result": {
162
+ const result: Record<string, unknown> = {
163
+ type: "tool_result",
164
+ tool_use_id: block.toolUseId,
165
+ content: block.content,
166
+ };
167
+ if (block.isError) result.is_error = true;
168
+ return result;
169
+ }
170
+ }
171
+ }
172
+
173
+ // ---------------------------------------------------------------------------
174
+ // parseAnthropicRequest
175
+ // ---------------------------------------------------------------------------
176
+
177
+ /**
178
+ * Parse a raw Anthropic `/v1/messages` request body into a `GatewayRequest`.
179
+ *
180
+ * Lenient: unknown top-level fields are preserved in `metadata` for
181
+ * faithful upstream forwarding. Content normalization handles both
182
+ * string and array forms.
183
+ */
184
+ export function parseAnthropicRequest(
185
+ body: unknown,
186
+ headers: Record<string, string>,
187
+ ): GatewayRequest {
188
+ const raw = (body ?? {}) as Record<string, unknown>;
189
+
190
+ // --- Extract known fields ---
191
+ const model = String(raw.model ?? "");
192
+ const system = normalizeSystem(raw.system);
193
+ const stream = raw.stream === true;
194
+ const maxTokens =
195
+ typeof raw.max_tokens === "number" ? raw.max_tokens : 4096;
196
+
197
+ // --- Messages ---
198
+ const rawMessages = Array.isArray(raw.messages) ? raw.messages : [];
199
+ const messages: GatewayMessage[] = rawMessages.map(
200
+ (msg: Record<string, unknown>) => ({
201
+ role: msg.role === "assistant" ? "assistant" : "user",
202
+ content: normalizeContent(msg.content),
203
+ }),
204
+ );
205
+
206
+ // --- Tools ---
207
+ const rawTools = Array.isArray(raw.tools) ? raw.tools : [];
208
+ const tools: GatewayTool[] = rawTools.map(
209
+ (t: Record<string, unknown>) => ({
210
+ name: String(t.name ?? ""),
211
+ description: String(t.description ?? ""),
212
+ inputSchema: (t.input_schema as Record<string, unknown>) ?? {},
213
+ }),
214
+ );
215
+
216
+ // --- Metadata: everything the gateway doesn't explicitly process ---
217
+ const metadata: Record<string, unknown> = {};
218
+ for (const [key, value] of Object.entries(raw)) {
219
+ if (!KNOWN_BODY_FIELDS.has(key)) {
220
+ metadata[key] = value;
221
+ }
222
+ }
223
+
224
+ return {
225
+ protocol: "anthropic",
226
+ model,
227
+ system,
228
+ messages,
229
+ tools,
230
+ stream,
231
+ maxTokens,
232
+ metadata,
233
+ rawHeaders: headers,
234
+ };
235
+ }
236
+
237
+ // ---------------------------------------------------------------------------
238
+ // Caching options
239
+ // ---------------------------------------------------------------------------
240
+
241
+ /**
242
+ * Options controlling Anthropic prompt caching behavior.
243
+ *
244
+ * Two independent mechanisms:
245
+ * 1. **System prompt caching**: sends `system` as a block array with an
246
+ * explicit `cache_control` breakpoint. This is the highest-stability
247
+ * cache slot — the system prompt rarely changes within a session.
248
+ * 2. **Conversation caching**: places an explicit `cache_control` breakpoint
249
+ * on the last message block, enabling Anthropic to cache the conversation
250
+ * prefix up to that point. Between consecutive stable turns (same gradient
251
+ * layer, no distillation arrival, no window eviction), the prefix is
252
+ * byte-identical → cache reads at 0.1× base cost vs 1× uncached.
253
+ *
254
+ * Title/summary passthrough requests should NEVER enable caching — their
255
+ * content varies every call, producing 1.25× write cost with zero reads.
256
+ */
257
+ export type AnthropicCacheOptions = {
258
+ /**
259
+ * Cache the system prompt with an explicit breakpoint.
260
+ * - `"5m"` — default 5-minute TTL (conversation turns, frequent enough
261
+ * for 5m refresh)
262
+ * - `"1h"` — extended 1-hour TTL (worker calls that come in bursts
263
+ * separated by minutes of user thinking)
264
+ * - `false` — no system caching
265
+ */
266
+ systemTTL?: "5m" | "1h" | false;
267
+
268
+ /**
269
+ * Place an explicit `cache_control` breakpoint on the last block of the
270
+ * last message, enabling Anthropic to cache the conversation prefix.
271
+ *
272
+ * When `true`, the gateway adds `cache_control: { type: "ephemeral" }`
273
+ * to the final content block. On the next turn, Anthropic's lookback
274
+ * window finds the prior breakpoint, reads the cached prefix (0.1×
275
+ * cost), and writes only the new tail (1.25×).
276
+ */
277
+ cacheConversation?: boolean;
278
+ };
279
+
280
+ // ---------------------------------------------------------------------------
281
+ // buildAnthropicRequest
282
+ // ---------------------------------------------------------------------------
283
+
284
+ /**
285
+ * Convert a `GatewayRequest` back to Anthropic API format for upstream
286
+ * forwarding.
287
+ *
288
+ * Returns the relative path, headers, and JSON body. The caller prepends
289
+ * the upstream base URL.
290
+ *
291
+ * @param req The normalized gateway request
292
+ * @param cache Optional caching configuration. When omitted, no
293
+ * `cache_control` annotations are added (passthrough behavior).
294
+ */
295
+ export function buildAnthropicRequest(
296
+ req: GatewayRequest,
297
+ cache?: AnthropicCacheOptions,
298
+ ): {
299
+ url: string;
300
+ headers: Record<string, string>;
301
+ body: unknown;
302
+ } {
303
+ // --- Headers ---
304
+ const headers: Record<string, string> = {
305
+ "content-type": "application/json",
306
+ "anthropic-version": ANTHROPIC_VERSION,
307
+ };
308
+
309
+ // Forward auth from the original request (API key or OAuth Bearer)
310
+ const cred = extractAuth(req.rawHeaders);
311
+ if (cred) {
312
+ Object.assign(headers, authHeaders(cred));
313
+ }
314
+
315
+ // Forward anthropic-beta if present (enables features like extended thinking)
316
+ const beta =
317
+ req.rawHeaders["anthropic-beta"] || req.rawHeaders["Anthropic-Beta"] || "";
318
+ if (beta) {
319
+ headers["anthropic-beta"] = beta;
320
+ }
321
+
322
+ // --- Body ---
323
+ const body: Record<string, unknown> = {
324
+ model: req.model,
325
+ max_tokens: req.maxTokens,
326
+ stream: req.stream,
327
+ };
328
+
329
+ // System — only include if non-empty
330
+ if (req.system) {
331
+ const systemTTL = cache?.systemTTL;
332
+ if (systemTTL) {
333
+ // Send as block array with explicit cache_control breakpoint.
334
+ // This creates a stable cache slot for the system prompt — it changes
335
+ // only when LTM entries are added/removed or AGENTS.md is updated.
336
+ const cacheControl: Record<string, string> =
337
+ systemTTL === "1h"
338
+ ? { type: "ephemeral", ttl: "1h" }
339
+ : { type: "ephemeral" };
340
+ body.system = [
341
+ { type: "text", text: req.system, cache_control: cacheControl },
342
+ ];
343
+ } else {
344
+ body.system = req.system;
345
+ }
346
+ }
347
+
348
+ // Messages
349
+ const messages = req.messages.map((msg) => ({
350
+ role: msg.role,
351
+ content: msg.content.map(toAnthropicBlock),
352
+ }));
353
+
354
+ // Conversation caching: place a breakpoint on the final content block of
355
+ // the last message. Anthropic's 20-block lookback finds the prior turn's
356
+ // breakpoint, reads the cached prefix, and writes only the new tail.
357
+ if (cache?.cacheConversation && messages.length > 0) {
358
+ const lastMsg = messages[messages.length - 1]!;
359
+ if (lastMsg.content.length > 0) {
360
+ const lastBlock = lastMsg.content[lastMsg.content.length - 1]!;
361
+ (lastBlock as Record<string, unknown>).cache_control = {
362
+ type: "ephemeral",
363
+ };
364
+ }
365
+ }
366
+
367
+ body.messages = messages;
368
+
369
+ // Tools — only include if present
370
+ if (req.tools.length > 0) {
371
+ body.tools = req.tools.map((t) => ({
372
+ name: t.name,
373
+ description: t.description,
374
+ input_schema: t.inputSchema,
375
+ }));
376
+ }
377
+
378
+ // Restore all metadata params (temperature, top_p, stop_sequences, etc.)
379
+ for (const [key, value] of Object.entries(req.metadata)) {
380
+ body[key] = value;
381
+ }
382
+
383
+ return {
384
+ url: "/v1/messages",
385
+ headers,
386
+ body,
387
+ };
388
+ }
389
+
390
+ // ---------------------------------------------------------------------------
391
+ // buildAnthropicNonStreamResponse
392
+ // ---------------------------------------------------------------------------
393
+
394
+ /**
395
+ * Build a non-streaming Anthropic response JSON from a `GatewayResponse`.
396
+ *
397
+ * Produces the standard Anthropic `/v1/messages` response shape with
398
+ * `type: "message"`, `role: "assistant"`, content blocks, and usage.
399
+ */
400
+ export function buildAnthropicNonStreamResponse(
401
+ resp: GatewayResponse,
402
+ ): unknown {
403
+ const usage: Record<string, number> = {
404
+ input_tokens: resp.usage.inputTokens,
405
+ output_tokens: resp.usage.outputTokens,
406
+ };
407
+
408
+ if (resp.usage.cacheReadInputTokens != null) {
409
+ usage.cache_read_input_tokens = resp.usage.cacheReadInputTokens;
410
+ }
411
+ if (resp.usage.cacheCreationInputTokens != null) {
412
+ usage.cache_creation_input_tokens = resp.usage.cacheCreationInputTokens;
413
+ }
414
+
415
+ return {
416
+ id: resp.id,
417
+ type: "message",
418
+ role: "assistant",
419
+ model: resp.model,
420
+ content: resp.content.map(toAnthropicBlock),
421
+ stop_reason: resp.stopReason,
422
+ stop_sequence: null,
423
+ usage,
424
+ };
425
+ }