@loreai/gateway 0.14.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,469 +0,0 @@
1
- /**
2
- * Anthropic ↔ Gateway translation layer.
3
- *
4
- * Converts between Anthropic's `/v1/messages` API format and the gateway's
5
- * internal `GatewayRequest`/`GatewayResponse` types. The parser is lenient —
6
- * unknown fields pass through in `metadata` rather than causing errors.
7
- */
8
- import type {
9
- GatewayContentBlock,
10
- GatewayMessage,
11
- GatewayRequest,
12
- GatewayResponse,
13
- GatewayTool,
14
- } from "./types";
15
- import { extractAuth, authHeaders } from "../auth";
16
-
17
- // ---------------------------------------------------------------------------
18
- // Anthropic API version — used in all outgoing requests
19
- // ---------------------------------------------------------------------------
20
-
21
- const ANTHROPIC_VERSION = "2023-06-01";
22
-
23
- // ---------------------------------------------------------------------------
24
- // Fields the gateway reads/writes — everything else goes into `metadata`
25
- // ---------------------------------------------------------------------------
26
-
27
- /** Top-level body fields that are extracted into `GatewayRequest` fields. */
28
- const KNOWN_BODY_FIELDS = new Set([
29
- "model",
30
- "system",
31
- "messages",
32
- "tools",
33
- "max_tokens",
34
- "stream",
35
- ]);
36
-
37
- // ---------------------------------------------------------------------------
38
- // Helpers — content block translation
39
- // ---------------------------------------------------------------------------
40
-
41
- /**
42
- * Normalize an Anthropic content block (from a message's `content` array)
43
- * into a `GatewayContentBlock`. Unknown block types are preserved as text
44
- * blocks with a JSON dump so no information is lost.
45
- */
46
- function toGatewayBlock(block: Record<string, unknown>): GatewayContentBlock {
47
- switch (block.type) {
48
- case "text":
49
- return { type: "text", text: String(block.text ?? "") };
50
-
51
- case "thinking":
52
- return {
53
- type: "thinking",
54
- thinking: String(block.thinking ?? ""),
55
- ...(block.signature != null
56
- ? { signature: String(block.signature) }
57
- : undefined),
58
- };
59
-
60
- case "tool_use":
61
- return {
62
- type: "tool_use",
63
- id: String(block.id ?? ""),
64
- name: String(block.name ?? ""),
65
- input: block.input,
66
- };
67
-
68
- case "tool_result": {
69
- // Anthropic `tool_result` content can be a string or array of blocks.
70
- let content = "";
71
- if (typeof block.content === "string") {
72
- content = block.content;
73
- } else if (Array.isArray(block.content)) {
74
- content = (block.content as Array<Record<string, unknown>>)
75
- .filter((b) => b.type === "text")
76
- .map((b) => String(b.text ?? ""))
77
- .join("\n");
78
- }
79
- return {
80
- type: "tool_result",
81
- toolUseId: String(block.tool_use_id ?? ""),
82
- content,
83
- ...(block.is_error ? { isError: true } : undefined),
84
- };
85
- }
86
-
87
- default:
88
- // Unknown block type — preserve as text so nothing is silently dropped
89
- return { type: "text", text: JSON.stringify(block) };
90
- }
91
- }
92
-
93
- /**
94
- * Normalize Anthropic message content (string or array of blocks) into
95
- * a `GatewayContentBlock[]`.
96
- */
97
- function normalizeContent(content: unknown): GatewayContentBlock[] {
98
- if (typeof content === "string") {
99
- return [{ type: "text", text: content }];
100
- }
101
-
102
- if (Array.isArray(content)) {
103
- return content.map((block) =>
104
- toGatewayBlock(block as Record<string, unknown>),
105
- );
106
- }
107
-
108
- // Null / undefined / unexpected → empty
109
- return [];
110
- }
111
-
112
- /**
113
- * Normalize Anthropic's `system` field. Can be:
114
- * - `undefined` / `null` → `""`
115
- * - a plain string → used directly
116
- * - an array of content blocks (e.g. with `cache_control`) → join text blocks
117
- */
118
- function normalizeSystem(system: unknown): string {
119
- if (system == null) return "";
120
- if (typeof system === "string") return system;
121
-
122
- if (Array.isArray(system)) {
123
- return (system as Array<Record<string, unknown>>)
124
- .filter((block) => block.type === "text")
125
- .map((block) => String(block.text ?? ""))
126
- .join("\n");
127
- }
128
-
129
- return String(system);
130
- }
131
-
132
- // ---------------------------------------------------------------------------
133
- // Reverse helpers — gateway blocks → Anthropic format
134
- // ---------------------------------------------------------------------------
135
-
136
- /**
137
- * Convert a `GatewayContentBlock` back to Anthropic's wire format.
138
- */
139
- function toAnthropicBlock(
140
- block: GatewayContentBlock,
141
- ): Record<string, unknown> {
142
- switch (block.type) {
143
- case "text":
144
- return { type: "text", text: block.text };
145
-
146
- case "thinking":
147
- return {
148
- type: "thinking",
149
- thinking: block.thinking,
150
- ...(block.signature != null ? { signature: block.signature } : undefined),
151
- };
152
-
153
- case "tool_use":
154
- return {
155
- type: "tool_use",
156
- id: block.id,
157
- name: block.name,
158
- input: block.input,
159
- };
160
-
161
- case "tool_result": {
162
- const result: Record<string, unknown> = {
163
- type: "tool_result",
164
- tool_use_id: block.toolUseId,
165
- content: block.content,
166
- };
167
- if (block.isError) result.is_error = true;
168
- return result;
169
- }
170
- }
171
- }
172
-
173
- // ---------------------------------------------------------------------------
174
- // parseAnthropicRequest
175
- // ---------------------------------------------------------------------------
176
-
177
- /**
178
- * Parse a raw Anthropic `/v1/messages` request body into a `GatewayRequest`.
179
- *
180
- * Lenient: unknown top-level fields are preserved in `metadata` for
181
- * faithful upstream forwarding. Content normalization handles both
182
- * string and array forms.
183
- */
184
- export function parseAnthropicRequest(
185
- body: unknown,
186
- headers: Record<string, string>,
187
- ): GatewayRequest {
188
- const raw = (body ?? {}) as Record<string, unknown>;
189
-
190
- // --- Extract known fields ---
191
- const model = String(raw.model ?? "");
192
- const system = normalizeSystem(raw.system);
193
- const stream = raw.stream === true;
194
- const maxTokens =
195
- typeof raw.max_tokens === "number" ? raw.max_tokens : 4096;
196
-
197
- // --- Messages ---
198
- const rawMessages = Array.isArray(raw.messages) ? raw.messages : [];
199
- const messages: GatewayMessage[] = rawMessages.map(
200
- (msg: Record<string, unknown>) => ({
201
- role: msg.role === "assistant" ? "assistant" : "user",
202
- content: normalizeContent(msg.content),
203
- }),
204
- );
205
-
206
- // --- Tools ---
207
- const rawTools = Array.isArray(raw.tools) ? raw.tools : [];
208
- const tools: GatewayTool[] = rawTools.map(
209
- (t: Record<string, unknown>) => ({
210
- name: String(t.name ?? ""),
211
- description: String(t.description ?? ""),
212
- inputSchema: (t.input_schema as Record<string, unknown>) ?? {},
213
- }),
214
- );
215
-
216
- // --- Metadata: everything the gateway doesn't explicitly process ---
217
- const metadata: Record<string, unknown> = {};
218
- for (const [key, value] of Object.entries(raw)) {
219
- if (!KNOWN_BODY_FIELDS.has(key)) {
220
- metadata[key] = value;
221
- }
222
- }
223
-
224
- return {
225
- protocol: "anthropic",
226
- model,
227
- system,
228
- messages,
229
- tools,
230
- stream,
231
- maxTokens,
232
- metadata,
233
- rawHeaders: headers,
234
- };
235
- }
236
-
237
- // ---------------------------------------------------------------------------
238
- // Caching options
239
- // ---------------------------------------------------------------------------
240
-
241
- /**
242
- * Options controlling Anthropic prompt caching behavior.
243
- *
244
- * Two independent mechanisms:
245
- * 1. **System prompt caching**: sends `system` as a block array with an
246
- * explicit `cache_control` breakpoint. This is the highest-stability
247
- * cache slot — the system prompt rarely changes within a session.
248
- * 2. **Conversation caching**: places an explicit `cache_control` breakpoint
249
- * on the last message block, enabling Anthropic to cache the conversation
250
- * prefix up to that point. Between consecutive stable turns (same gradient
251
- * layer, no distillation arrival, no window eviction), the prefix is
252
- * byte-identical → cache reads at 0.1× base cost vs 1× uncached.
253
- *
254
- * Title/summary passthrough requests should NEVER enable caching — their
255
- * content varies every call, producing 1.25× write cost with zero reads.
256
- */
257
- export type AnthropicCacheOptions = {
258
- /**
259
- * Cache the system prompt with an explicit breakpoint.
260
- * - `"5m"` — default 5-minute TTL (conversation turns, frequent enough
261
- * for 5m refresh)
262
- * - `"1h"` — extended 1-hour TTL (worker calls that come in bursts
263
- * separated by minutes of user thinking)
264
- * - `false` — no system caching
265
- */
266
- systemTTL?: "5m" | "1h" | false;
267
-
268
- /**
269
- * LTM knowledge text to inject as a separate system block after the host
270
- * prompt. Keeping it in a separate block means the host prompt gets its
271
- * own cache breakpoint (1h) and LTM changes don't bust the host prefix.
272
- *
273
- * When provided AND systemTTL is set, the system becomes a 2-block array:
274
- * system[0]: host prompt — cache_control with systemTTL
275
- * system[1]: LTM content — no cache_control (benefits from prefix)
276
- */
277
- ltmSystem?: string;
278
-
279
- /**
280
- * Cache the last tool definition with an explicit 1h breakpoint.
281
- * Tool definitions (including our injected recall tool) are stable
282
- * across turns — caching them avoids re-processing on every request.
283
- */
284
- cacheTools?: boolean;
285
-
286
- /**
287
- * Place an explicit `cache_control` breakpoint on the last block of the
288
- * last message, enabling Anthropic to cache the conversation prefix.
289
- *
290
- * When `true`, the gateway adds `cache_control: { type: "ephemeral" }`
291
- * to the final content block. On the next turn, Anthropic's lookback
292
- * window finds the prior breakpoint, reads the cached prefix (0.1×
293
- * cost), and writes only the new tail (1.25×).
294
- */
295
- cacheConversation?: boolean;
296
- };
297
-
298
- // ---------------------------------------------------------------------------
299
- // buildAnthropicRequest
300
- // ---------------------------------------------------------------------------
301
-
302
- /**
303
- * Convert a `GatewayRequest` back to Anthropic API format for upstream
304
- * forwarding.
305
- *
306
- * Returns the relative path, headers, and JSON body. The caller prepends
307
- * the upstream base URL.
308
- *
309
- * @param req The normalized gateway request
310
- * @param cache Optional caching configuration. When omitted, no
311
- * `cache_control` annotations are added (passthrough behavior).
312
- */
313
- export function buildAnthropicRequest(
314
- req: GatewayRequest,
315
- cache?: AnthropicCacheOptions,
316
- ): {
317
- url: string;
318
- headers: Record<string, string>;
319
- body: unknown;
320
- } {
321
- // --- Headers ---
322
- const headers: Record<string, string> = {
323
- "content-type": "application/json",
324
- "anthropic-version": ANTHROPIC_VERSION,
325
- };
326
-
327
- // Forward auth from the original request (API key or OAuth Bearer)
328
- const cred = extractAuth(req.rawHeaders);
329
- if (cred) {
330
- Object.assign(headers, authHeaders(cred));
331
- }
332
-
333
- // Forward anthropic-beta if present (enables features like extended thinking)
334
- const beta =
335
- req.rawHeaders["anthropic-beta"] || req.rawHeaders["Anthropic-Beta"] || "";
336
- if (beta) {
337
- headers["anthropic-beta"] = beta;
338
- }
339
-
340
- // --- Body ---
341
- const body: Record<string, unknown> = {
342
- model: req.model,
343
- max_tokens: req.maxTokens,
344
- stream: req.stream,
345
- };
346
-
347
- // System — only include if non-empty
348
- if (req.system) {
349
- const systemTTL = cache?.systemTTL;
350
- const ltmText = cache?.ltmSystem;
351
-
352
- if (systemTTL) {
353
- // Send as block array with explicit cache_control breakpoint on the
354
- // host prompt. The host prompt is the most stable part (changes only
355
- // when the host mutates AGENTS.md, memory, etc.) so it gets a 1h TTL.
356
- const cacheControl: Record<string, string> =
357
- systemTTL === "1h"
358
- ? { type: "ephemeral", ttl: "1h" }
359
- : { type: "ephemeral" };
360
-
361
- const blocks: Record<string, unknown>[] = [
362
- { type: "text", text: req.system, cache_control: cacheControl },
363
- ];
364
-
365
- // LTM knowledge as a separate block — no cache_control of its own,
366
- // but benefits from the host prompt prefix cache. When LTM changes,
367
- // only this block and everything after it is re-processed; the host
368
- // prompt prefix is still a cache read.
369
- if (ltmText) {
370
- blocks.push({ type: "text", text: ltmText });
371
- }
372
-
373
- body.system = blocks;
374
- } else {
375
- // No caching — concatenate LTM into a single string.
376
- body.system = ltmText ? `${req.system}\n\n${ltmText}` : req.system;
377
- }
378
- }
379
-
380
- // Messages
381
- const messages = req.messages.map((msg) => ({
382
- role: msg.role,
383
- content: msg.content.map(toAnthropicBlock),
384
- }));
385
-
386
- // Conversation caching: place a breakpoint on the final content block of
387
- // the last message. Anthropic's 20-block lookback finds the prior turn's
388
- // breakpoint, reads the cached prefix, and writes only the new tail.
389
- if (cache?.cacheConversation && messages.length > 0) {
390
- const lastMsg = messages[messages.length - 1]!;
391
- if (lastMsg.content.length > 0) {
392
- const lastBlock = lastMsg.content[lastMsg.content.length - 1]!;
393
- (lastBlock as Record<string, unknown>).cache_control = {
394
- type: "ephemeral",
395
- };
396
- }
397
- }
398
-
399
- body.messages = messages;
400
-
401
- // Tools — only include if present
402
- if (req.tools.length > 0) {
403
- const tools = req.tools.map((t) => ({
404
- name: t.name,
405
- description: t.description,
406
- input_schema: t.inputSchema,
407
- }));
408
-
409
- // Tool caching: place a 1h breakpoint on the last tool definition.
410
- // Tool definitions (including our recall tool) are stable across turns.
411
- if (cache?.cacheTools && tools.length > 0) {
412
- const lastTool = tools[tools.length - 1]!;
413
- (lastTool as Record<string, unknown>).cache_control = {
414
- type: "ephemeral",
415
- ttl: "1h",
416
- };
417
- }
418
-
419
- body.tools = tools;
420
- }
421
-
422
- // Restore all metadata params (temperature, top_p, stop_sequences, etc.)
423
- for (const [key, value] of Object.entries(req.metadata)) {
424
- body[key] = value;
425
- }
426
-
427
- return {
428
- url: "/v1/messages",
429
- headers,
430
- body,
431
- };
432
- }
433
-
434
- // ---------------------------------------------------------------------------
435
- // buildAnthropicNonStreamResponse
436
- // ---------------------------------------------------------------------------
437
-
438
- /**
439
- * Build a non-streaming Anthropic response JSON from a `GatewayResponse`.
440
- *
441
- * Produces the standard Anthropic `/v1/messages` response shape with
442
- * `type: "message"`, `role: "assistant"`, content blocks, and usage.
443
- */
444
- export function buildAnthropicNonStreamResponse(
445
- resp: GatewayResponse,
446
- ): unknown {
447
- const usage: Record<string, number> = {
448
- input_tokens: resp.usage.inputTokens,
449
- output_tokens: resp.usage.outputTokens,
450
- };
451
-
452
- if (resp.usage.cacheReadInputTokens != null) {
453
- usage.cache_read_input_tokens = resp.usage.cacheReadInputTokens;
454
- }
455
- if (resp.usage.cacheCreationInputTokens != null) {
456
- usage.cache_creation_input_tokens = resp.usage.cacheCreationInputTokens;
457
- }
458
-
459
- return {
460
- id: resp.id,
461
- type: "message",
462
- role: "assistant",
463
- model: resp.model,
464
- content: resp.content.map(toAnthropicBlock),
465
- stop_reason: resp.stopReason,
466
- stop_sequence: null,
467
- usage,
468
- };
469
- }