claude-sdk-proxy 3.1.2 → 3.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/proxy/server.ts +25 -0
- package/src/proxy/server.ts.bak +1841 -0
|
@@ -0,0 +1,1841 @@
|
|
|
1
|
+
import { Hono } from "hono"
|
|
2
|
+
import { cors } from "hono/cors"
|
|
3
|
+
import { query } from "@anthropic-ai/claude-agent-sdk"
|
|
4
|
+
import type { Context } from "hono"
|
|
5
|
+
import type { ProxyConfig } from "./types"
|
|
6
|
+
import { DEFAULT_PROXY_CONFIG } from "./types"
|
|
7
|
+
import { logInfo, logWarn, logError, logDebug, LOG_DIR } from "../logger"
|
|
8
|
+
import { traceStore } from "../trace"
|
|
9
|
+
import { sessionStore } from "../session-store"
|
|
10
|
+
import { execSync } from "child_process"
|
|
11
|
+
import { existsSync, writeFileSync, readFileSync, readdirSync } from "fs"
|
|
12
|
+
import { randomBytes } from "crypto"
|
|
13
|
+
import { fileURLToPath } from "url"
|
|
14
|
+
import { join, dirname } from "path"
|
|
15
|
+
|
|
16
|
+
// Base62 ID generator — matches Anthropic's real ID format (e.g. msg_01XFDUDYJgAACzvnptvVoYEL)
|
|
17
|
+
const BASE62 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
|
18
|
+
function generateId(prefix: string, length = 24): string {
|
|
19
|
+
const bytes = randomBytes(length)
|
|
20
|
+
let id = prefix
|
|
21
|
+
for (let i = 0; i < length; i++) id += BASE62[bytes[i]! % 62]
|
|
22
|
+
return id
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const PROXY_VERSION: string = (() => {
|
|
26
|
+
try {
|
|
27
|
+
const pkg = JSON.parse(readFileSync(join(dirname(fileURLToPath(import.meta.url)), "../../package.json"), "utf-8"))
|
|
28
|
+
return pkg.version ?? "unknown"
|
|
29
|
+
} catch { return "unknown" }
|
|
30
|
+
})()
|
|
31
|
+
|
|
32
|
+
function resolveClaudeExecutable(): string {
|
|
33
|
+
try {
|
|
34
|
+
const sdkPath = fileURLToPath(import.meta.resolve("@anthropic-ai/claude-agent-sdk"))
|
|
35
|
+
const sdkCliJs = join(dirname(sdkPath), "cli.js")
|
|
36
|
+
if (existsSync(sdkCliJs)) return sdkCliJs
|
|
37
|
+
} catch {}
|
|
38
|
+
try {
|
|
39
|
+
const claudePath = execSync("which claude", { encoding: "utf-8" }).trim()
|
|
40
|
+
if (claudePath && existsSync(claudePath)) return claudePath
|
|
41
|
+
} catch {}
|
|
42
|
+
throw new Error("Could not find Claude Code executable. Install: npm install -g @anthropic-ai/claude-code")
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const claudeExecutable = resolveClaudeExecutable()
|
|
46
|
+
|
|
47
|
+
// ── Concurrency control ──────────────────────────────────────────────────────
|
|
48
|
+
// Limits simultaneous Claude SDK sessions to prevent resource exhaustion.
|
|
49
|
+
|
|
50
|
+
const MAX_CONCURRENT = parseInt(process.env.CLAUDE_PROXY_MAX_CONCURRENT ?? "5", 10)
|
|
51
|
+
|
|
52
|
+
const QUEUE_TIMEOUT_MS = parseInt(process.env.CLAUDE_PROXY_QUEUE_TIMEOUT_MS ?? "30000", 10)
|
|
53
|
+
|
|
54
|
+
class RequestQueue {
|
|
55
|
+
private active = 0
|
|
56
|
+
private waiting: Array<{ resolve: () => void; reject: (err: Error) => void }> = []
|
|
57
|
+
|
|
58
|
+
get activeCount() { return this.active }
|
|
59
|
+
get waitingCount() { return this.waiting.length }
|
|
60
|
+
|
|
61
|
+
async acquire(): Promise<void> {
|
|
62
|
+
if (this.active < MAX_CONCURRENT) {
|
|
63
|
+
this.active++
|
|
64
|
+
return
|
|
65
|
+
}
|
|
66
|
+
return new Promise<void>((resolve, reject) => {
|
|
67
|
+
const entry = { resolve: () => { this.active++; resolve() }, reject }
|
|
68
|
+
this.waiting.push(entry)
|
|
69
|
+
const timer = setTimeout(() => {
|
|
70
|
+
const idx = this.waiting.indexOf(entry)
|
|
71
|
+
if (idx !== -1) {
|
|
72
|
+
this.waiting.splice(idx, 1)
|
|
73
|
+
reject(new Error("Queue timeout — all slots busy"))
|
|
74
|
+
}
|
|
75
|
+
}, QUEUE_TIMEOUT_MS)
|
|
76
|
+
const origResolve = entry.resolve
|
|
77
|
+
entry.resolve = () => { clearTimeout(timer); origResolve() }
|
|
78
|
+
})
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
release(): void {
|
|
82
|
+
this.active--
|
|
83
|
+
const next = this.waiting.shift()
|
|
84
|
+
if (next) next.resolve()
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const requestQueue = new RequestQueue()
|
|
89
|
+
|
|
90
|
+
function mapModelToClaudeModel(model: string): "sonnet" | "opus" | "haiku" {
|
|
91
|
+
if (model.includes("opus")) return "opus"
|
|
92
|
+
if (model.includes("haiku")) return "haiku"
|
|
93
|
+
return "sonnet"
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// ── Content-block serialization ──────────────────────────────────────────────
|
|
97
|
+
|
|
98
|
+
function serializeBlock(block: any): string {
|
|
99
|
+
switch (block.type) {
|
|
100
|
+
case "text":
|
|
101
|
+
return block.text || ""
|
|
102
|
+
case "image":
|
|
103
|
+
return "[Image attached]"
|
|
104
|
+
case "tool_use":
|
|
105
|
+
return `<tool_use>\n{"name": "${block.name}", "input": ${JSON.stringify(block.input ?? {})}}\n</tool_use>`
|
|
106
|
+
case "tool_result": {
|
|
107
|
+
const content = Array.isArray(block.content)
|
|
108
|
+
? block.content.filter((b: any) => b.type === "text").map((b: any) => b.text).join("")
|
|
109
|
+
: String(block.content ?? "")
|
|
110
|
+
const truncated = content.length > 4000
|
|
111
|
+
? content.slice(0, 4000) + `\n...[truncated ${content.length - 4000} chars]`
|
|
112
|
+
: content
|
|
113
|
+
return `[Tool Result (id: ${block.tool_use_id})]\n${truncated}\n[/Tool Result]`
|
|
114
|
+
}
|
|
115
|
+
case "thinking":
|
|
116
|
+
return ""
|
|
117
|
+
default:
|
|
118
|
+
return ""
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function serializeContent(content: string | Array<any>): string {
|
|
123
|
+
if (typeof content === "string") return content
|
|
124
|
+
if (!Array.isArray(content)) return String(content)
|
|
125
|
+
return content.map(b => serializeBlock(b)).filter(Boolean).join("\n")
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// ── Image handling via SDKUserMessage ────────────────────────────────────────
|
|
129
|
+
// The SDK query() accepts AsyncIterable<SDKUserMessage> which supports native
|
|
130
|
+
// Anthropic MessageParam content blocks including images. When images are
|
|
131
|
+
// detected, we pass them through natively instead of serializing to text.
|
|
132
|
+
|
|
133
|
+
function contentHasImages(content: string | Array<any>): boolean {
|
|
134
|
+
if (typeof content === "string") return false
|
|
135
|
+
if (!Array.isArray(content)) return false
|
|
136
|
+
return content.some((b: any) => b.type === "image")
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/** Convert an Anthropic image content block to SDK-compatible format */
|
|
140
|
+
function toAnthropicImageBlock(block: any): any {
|
|
141
|
+
if (block.source) return block // already in Anthropic format
|
|
142
|
+
// openclaw may use { type: "image", data: "...", mimeType: "..." }
|
|
143
|
+
if (block.data && block.mimeType) {
|
|
144
|
+
return {
|
|
145
|
+
type: "image",
|
|
146
|
+
source: {
|
|
147
|
+
type: "base64",
|
|
148
|
+
media_type: block.mimeType,
|
|
149
|
+
data: block.data,
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
if (block.data && block.media_type) {
|
|
154
|
+
return {
|
|
155
|
+
type: "image",
|
|
156
|
+
source: {
|
|
157
|
+
type: "base64",
|
|
158
|
+
media_type: block.media_type,
|
|
159
|
+
data: block.data,
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
return block
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/** Build Anthropic MessageParam content array, preserving images natively */
|
|
167
|
+
function buildNativeContent(content: string | Array<any>): Array<any> {
|
|
168
|
+
if (typeof content === "string") return [{ type: "text", text: content }]
|
|
169
|
+
if (!Array.isArray(content)) return [{ type: "text", text: String(content) }]
|
|
170
|
+
return content.map((block: any) => {
|
|
171
|
+
if (block.type === "image") return toAnthropicImageBlock(block)
|
|
172
|
+
if (block.type === "text") return { type: "text", text: block.text ?? "" }
|
|
173
|
+
// For other types, serialize to text
|
|
174
|
+
const serialized = serializeBlock(block)
|
|
175
|
+
return serialized ? { type: "text", text: serialized } : null
|
|
176
|
+
}).filter(Boolean)
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/** Create an async iterable yielding a single SDKUserMessage with native content */
|
|
180
|
+
function createSDKUserMessage(content: Array<any>, sessionId?: string): AsyncIterable<any> {
|
|
181
|
+
const msg = {
|
|
182
|
+
type: "user" as const,
|
|
183
|
+
message: {
|
|
184
|
+
role: "user" as const,
|
|
185
|
+
content,
|
|
186
|
+
},
|
|
187
|
+
parent_tool_use_id: null,
|
|
188
|
+
session_id: sessionId ?? "",
|
|
189
|
+
}
|
|
190
|
+
return {
|
|
191
|
+
async *[Symbol.asyncIterator]() {
|
|
192
|
+
yield msg
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
// ── Client tool-use support ──────────────────────────────────────────────────
|
|
199
|
+
|
|
200
|
+
function buildClientToolsPrompt(tools: any[]): string {
|
|
201
|
+
const defs = tools.map((t: any) => {
|
|
202
|
+
const schema = t.input_schema ? `\nInput schema:\n${JSON.stringify(t.input_schema, null, 2)}` : ""
|
|
203
|
+
return `### ${t.name}\n${t.description ?? ""}${schema}`
|
|
204
|
+
}).join("\n\n")
|
|
205
|
+
return `\n\n## Available Tools\n\nTo call a tool, output a <tool_use> block:\n\n` +
|
|
206
|
+
`<tool_use>\n{"name": "TOOL_NAME", "input": {ARGUMENTS}}\n</tool_use>\n\n` +
|
|
207
|
+
`- You may write reasoning text before the block\n` +
|
|
208
|
+
`- Call multiple tools by including multiple <tool_use> blocks\n` +
|
|
209
|
+
`- Each block must be valid JSON with "name" and "input" keys\n\n` +
|
|
210
|
+
defs
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
interface ToolCall { id: string; name: string; input: unknown }
|
|
214
|
+
|
|
215
|
+
function parseToolUse(text: string): { toolCalls: ToolCall[]; textBefore: string } {
|
|
216
|
+
const calls: ToolCall[] = []
|
|
217
|
+
let firstIdx = -1
|
|
218
|
+
|
|
219
|
+
const xmlRegex = /<tool_use>([\s\S]*?)<\/tool_use>/g
|
|
220
|
+
let m: RegExpExecArray | null
|
|
221
|
+
while ((m = xmlRegex.exec(text)) !== null) {
|
|
222
|
+
if (firstIdx < 0) firstIdx = m.index
|
|
223
|
+
try {
|
|
224
|
+
const p = JSON.parse(m[1]!.trim())
|
|
225
|
+
calls.push({
|
|
226
|
+
id: generateId("toolu_"),
|
|
227
|
+
name: String(p.name ?? ""),
|
|
228
|
+
input: p.input ?? {}
|
|
229
|
+
})
|
|
230
|
+
} catch { /* skip malformed block */ }
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
if (calls.length === 0) {
|
|
234
|
+
const fcRegex = /<function_calls>([\s\S]*?)<\/function_calls>/g
|
|
235
|
+
while ((m = fcRegex.exec(text)) !== null) {
|
|
236
|
+
if (firstIdx < 0) firstIdx = m.index
|
|
237
|
+
try {
|
|
238
|
+
const parsed = JSON.parse(m[1]!.trim())
|
|
239
|
+
const items = Array.isArray(parsed) ? parsed : [parsed]
|
|
240
|
+
for (const p of items) {
|
|
241
|
+
if (p && typeof p.name === "string") {
|
|
242
|
+
calls.push({
|
|
243
|
+
id: generateId("toolu_"),
|
|
244
|
+
name: p.name,
|
|
245
|
+
input: p.input ?? p.parameters ?? {}
|
|
246
|
+
})
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
} catch { /* skip malformed block */ }
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
if (calls.length === 0) {
|
|
254
|
+
const invokeRegex = /<invoke\s+name="([^"]+)">([\s\S]*?)<\/invoke>/g
|
|
255
|
+
while ((m = invokeRegex.exec(text)) !== null) {
|
|
256
|
+
if (firstIdx < 0) firstIdx = m.index
|
|
257
|
+
const toolName = m[1]!
|
|
258
|
+
const body = m[2]!
|
|
259
|
+
const input: Record<string, any> = {}
|
|
260
|
+
const paramRegex = /<parameter\s+name="([^"]+)">([\s\S]*?)<\/parameter>/g
|
|
261
|
+
let pm: RegExpExecArray | null
|
|
262
|
+
while ((pm = paramRegex.exec(body)) !== null) {
|
|
263
|
+
const val = pm[2]!.trim()
|
|
264
|
+
try { input[pm[1]!] = JSON.parse(val) } catch { input[pm[1]!] = val }
|
|
265
|
+
}
|
|
266
|
+
calls.push({ id: generateId("toolu_"), name: toolName, input })
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
if (calls.length === 0) {
|
|
271
|
+
const bracketRegex = /\[Tool call:\s*(\w+)\s*\nInput:\s*([\s\S]*?)\]/g
|
|
272
|
+
while ((m = bracketRegex.exec(text)) !== null) {
|
|
273
|
+
if (firstIdx < 0) firstIdx = m.index
|
|
274
|
+
try {
|
|
275
|
+
const input = JSON.parse(m[2]!.trim())
|
|
276
|
+
calls.push({
|
|
277
|
+
id: generateId("toolu_"),
|
|
278
|
+
name: m[1]!.trim(),
|
|
279
|
+
input
|
|
280
|
+
})
|
|
281
|
+
} catch { /* skip malformed block */ }
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
return { toolCalls: calls, textBefore: firstIdx > 0 ? text.slice(0, firstIdx).trim() : "" }
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
function roughTokens(text: string): number {
|
|
289
|
+
return Math.ceil((text ?? "").length / 4)
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// ── Conversation label extraction ────────────────────────────────────────────
|
|
293
|
+
// Openclaw embeds "Conversation info (untrusted metadata)" in the last user
|
|
294
|
+
// message containing a JSON block with conversation_label. Extract it to use
|
|
295
|
+
// as a stable conversation ID for session persistence.
|
|
296
|
+
|
|
297
|
+
function extractConversationLabel(messages: Array<{ role: string; content: string | Array<any> }>): string | null {
|
|
298
|
+
// Search from the last message backwards for a user message with metadata
|
|
299
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
300
|
+
const msg = messages[i]!
|
|
301
|
+
if (msg.role !== "user") continue
|
|
302
|
+
|
|
303
|
+
const text = typeof msg.content === "string"
|
|
304
|
+
? msg.content
|
|
305
|
+
: Array.isArray(msg.content)
|
|
306
|
+
? msg.content.filter((b: any) => b.type === "text").map((b: any) => b.text ?? "").join("\n")
|
|
307
|
+
: ""
|
|
308
|
+
|
|
309
|
+
// Look for the JSON block after "Conversation info"
|
|
310
|
+
const jsonMatch = text.match(/Conversation info[^`]*```json\s*(\{[\s\S]*?\})\s*```/)
|
|
311
|
+
if (!jsonMatch?.[1]) continue
|
|
312
|
+
|
|
313
|
+
try {
|
|
314
|
+
const meta = JSON.parse(jsonMatch[1])
|
|
315
|
+
// conversation_label is present for both PMs and groups
|
|
316
|
+
if (meta.conversation_label) return meta.conversation_label
|
|
317
|
+
// Fallback: use sender_id if no label (shouldn't happen but just in case)
|
|
318
|
+
if (meta.sender_id) return `dm:${meta.sender_id}`
|
|
319
|
+
} catch {
|
|
320
|
+
// Regex fallback if JSON parse fails
|
|
321
|
+
const labelMatch = text.match(/"conversation_label"\s*:\s*"([^"]*)"/)
|
|
322
|
+
if (labelMatch?.[1]) return labelMatch[1]
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
return null
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// ── Query options builder ────────────────────────────────────────────────────
|
|
329
|
+
|
|
330
|
+
function buildQueryOptions(
|
|
331
|
+
model: "sonnet" | "opus" | "haiku",
|
|
332
|
+
opts: {
|
|
333
|
+
partial?: boolean
|
|
334
|
+
systemPrompt?: string
|
|
335
|
+
abortController?: AbortController
|
|
336
|
+
thinking?: { type: "adaptive" } | { type: "enabled"; budgetTokens?: number } | { type: "disabled" }
|
|
337
|
+
resume?: string
|
|
338
|
+
} = {}
|
|
339
|
+
) {
|
|
340
|
+
return {
|
|
341
|
+
model,
|
|
342
|
+
pathToClaudeCodeExecutable: claudeExecutable,
|
|
343
|
+
permissionMode: "bypassPermissions" as const,
|
|
344
|
+
allowDangerouslySkipPermissions: true,
|
|
345
|
+
persistSession: true,
|
|
346
|
+
settingSources: [],
|
|
347
|
+
tools: ["_proxy_noop_"] as string[],
|
|
348
|
+
maxTurns: 1,
|
|
349
|
+
...(opts.partial ? { includePartialMessages: true } : {}),
|
|
350
|
+
...(opts.abortController ? { abortController: opts.abortController } : {}),
|
|
351
|
+
...(opts.thinking ? { thinking: opts.thinking } : {}),
|
|
352
|
+
...(opts.systemPrompt ? { systemPrompt: opts.systemPrompt } : {}),
|
|
353
|
+
...(opts.resume ? { resume: opts.resume } : {}),
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
// ── Route handler ────────────────────────────────────────────────────────────
|
|
358
|
+
|
|
359
|
+
export function createProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
360
|
+
const finalConfig = { ...DEFAULT_PROXY_CONFIG, ...config }
|
|
361
|
+
const app = new Hono()
|
|
362
|
+
|
|
363
|
+
app.use("*", cors())
|
|
364
|
+
|
|
365
|
+
// Optional API key validation
|
|
366
|
+
const requiredApiKey = process.env.CLAUDE_PROXY_API_KEY
|
|
367
|
+
if (requiredApiKey) {
|
|
368
|
+
app.use("*", async (c, next) => {
|
|
369
|
+
if (c.req.path === "/" || c.req.path.startsWith("/debug") || c.req.method === "OPTIONS") return next()
|
|
370
|
+
const key = c.req.header("x-api-key")
|
|
371
|
+
?? c.req.header("authorization")?.replace(/^Bearer\s+/i, "")
|
|
372
|
+
if (key !== requiredApiKey) {
|
|
373
|
+
return c.json({
|
|
374
|
+
type: "error",
|
|
375
|
+
error: { type: "authentication_error", message: "Invalid API key" },
|
|
376
|
+
request_id: c.res.headers.get("request-id") ?? generateId("req_")
|
|
377
|
+
}, 401)
|
|
378
|
+
}
|
|
379
|
+
return next()
|
|
380
|
+
})
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// Anthropic-compatible headers + HTTP request logging
|
|
384
|
+
app.use("*", async (c, next) => {
|
|
385
|
+
const start = Date.now()
|
|
386
|
+
const requestId = c.req.header("x-request-id") ?? generateId("req_")
|
|
387
|
+
c.header("x-request-id", requestId)
|
|
388
|
+
c.header("request-id", requestId)
|
|
389
|
+
c.header("anthropic-version", "2023-06-01")
|
|
390
|
+
const betaHeader = c.req.header("anthropic-beta")
|
|
391
|
+
if (betaHeader) c.header("anthropic-beta", betaHeader)
|
|
392
|
+
await next()
|
|
393
|
+
const ms = Date.now() - start
|
|
394
|
+
// Only log non-debug HTTP requests at info level; debug endpoints at debug level
|
|
395
|
+
if (c.req.path.startsWith("/debug")) {
|
|
396
|
+
logDebug("http.request", { method: c.req.method, path: c.req.path, status: c.res.status, ms, reqId: requestId })
|
|
397
|
+
} else {
|
|
398
|
+
logInfo("http.request", { method: c.req.method, path: c.req.path, status: c.res.status, ms, reqId: requestId })
|
|
399
|
+
}
|
|
400
|
+
})
|
|
401
|
+
|
|
402
|
+
// ── Health / Info ────────────────────────────────────────────────────────
|
|
403
|
+
|
|
404
|
+
app.get("/", (c) => c.json({
|
|
405
|
+
status: "ok",
|
|
406
|
+
service: "claude-sdk-proxy",
|
|
407
|
+
version: PROXY_VERSION,
|
|
408
|
+
format: "anthropic",
|
|
409
|
+
endpoints: ["/v1/messages", "/v1/models", "/v1/chat/completions", "/debug/stats", "/debug/traces", "/debug/errors", "/debug/active", "/debug/health", "/sessions", "/sessions/cleanup"],
|
|
410
|
+
queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT },
|
|
411
|
+
logDir: LOG_DIR,
|
|
412
|
+
}))
|
|
413
|
+
|
|
414
|
+
// ── Debug / Observability endpoints ──────────────────────────────────────
|
|
415
|
+
|
|
416
|
+
app.get("/debug/stats", (c) => {
|
|
417
|
+
const stats = traceStore.getStats()
|
|
418
|
+
const sessionStats = sessionStore.getStats()
|
|
419
|
+
return c.json({
|
|
420
|
+
version: PROXY_VERSION,
|
|
421
|
+
config: {
|
|
422
|
+
stallTimeoutMs: finalConfig.stallTimeoutMs,
|
|
423
|
+
maxDurationMs: finalConfig.maxDurationMs,
|
|
424
|
+
maxOutputChars: finalConfig.maxOutputChars,
|
|
425
|
+
maxConcurrent: MAX_CONCURRENT,
|
|
426
|
+
queueTimeoutMs: QUEUE_TIMEOUT_MS,
|
|
427
|
+
claudeExecutable,
|
|
428
|
+
logDir: LOG_DIR,
|
|
429
|
+
debug: finalConfig.debug,
|
|
430
|
+
},
|
|
431
|
+
queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT },
|
|
432
|
+
sessions: sessionStats,
|
|
433
|
+
...stats,
|
|
434
|
+
})
|
|
435
|
+
})
|
|
436
|
+
|
|
437
|
+
// ── Session management endpoints ──────────────────────────────────────
|
|
438
|
+
|
|
439
|
+
app.get("/sessions", (c) => {
|
|
440
|
+
return c.json({
|
|
441
|
+
sessions: sessionStore.list(),
|
|
442
|
+
stats: sessionStore.getStats(),
|
|
443
|
+
})
|
|
444
|
+
})
|
|
445
|
+
|
|
446
|
+
app.get("/sessions/cleanup", (c) => {
|
|
447
|
+
const result = sessionStore.cleanup()
|
|
448
|
+
return c.json(result)
|
|
449
|
+
})
|
|
450
|
+
|
|
451
|
+
app.get("/debug/traces", (c) => {
|
|
452
|
+
const limit = parseInt(c.req.query("limit") ?? "20", 10)
|
|
453
|
+
return c.json(traceStore.getRecentTraces(limit))
|
|
454
|
+
})
|
|
455
|
+
|
|
456
|
+
app.get("/debug/traces/:id", (c) => {
|
|
457
|
+
const id = c.req.param("id")
|
|
458
|
+
const trace = traceStore.getTrace(id)
|
|
459
|
+
if (!trace) return c.json({ error: "Trace not found", reqId: id }, 404)
|
|
460
|
+
return c.json(trace)
|
|
461
|
+
})
|
|
462
|
+
|
|
463
|
+
app.get("/debug/errors", (c) => {
|
|
464
|
+
const limit = parseInt(c.req.query("limit") ?? "10", 10)
|
|
465
|
+
return c.json(traceStore.getRecentErrors(limit))
|
|
466
|
+
})
|
|
467
|
+
|
|
468
|
+
app.get("/debug/logs", (c) => {
|
|
469
|
+
// List available log files
|
|
470
|
+
try {
|
|
471
|
+
const files = readdirSync(LOG_DIR)
|
|
472
|
+
.filter(f => f.startsWith("proxy-") && f.endsWith(".log"))
|
|
473
|
+
.sort()
|
|
474
|
+
.reverse()
|
|
475
|
+
return c.json({ logDir: LOG_DIR, files })
|
|
476
|
+
} catch {
|
|
477
|
+
return c.json({ logDir: LOG_DIR, files: [], error: "Cannot read log directory" })
|
|
478
|
+
}
|
|
479
|
+
})
|
|
480
|
+
|
|
481
|
+
app.get("/debug/logs/:filename", (c) => {
|
|
482
|
+
// Serve a specific log file (last N lines)
|
|
483
|
+
const filename = c.req.param("filename")
|
|
484
|
+
if (!filename.match(/^proxy-\d{4}-\d{2}-\d{2}\.log$/)) {
|
|
485
|
+
return c.json({ error: "Invalid log filename" }, 400)
|
|
486
|
+
}
|
|
487
|
+
const tail = parseInt(c.req.query("tail") ?? "100", 10)
|
|
488
|
+
try {
|
|
489
|
+
const content = readFileSync(join(LOG_DIR, filename), "utf-8")
|
|
490
|
+
const lines = content.trim().split("\n")
|
|
491
|
+
const sliced = lines.slice(-tail)
|
|
492
|
+
const parsed = sliced.map(line => {
|
|
493
|
+
try { return JSON.parse(line) } catch { return { raw: line } }
|
|
494
|
+
})
|
|
495
|
+
return c.json({ file: filename, total: lines.length, returned: sliced.length, lines: parsed })
|
|
496
|
+
} catch {
|
|
497
|
+
return c.json({ error: "Log file not found" }, 404)
|
|
498
|
+
}
|
|
499
|
+
})
|
|
500
|
+
|
|
501
|
+
app.get("/debug/errors/:id", (c) => {
|
|
502
|
+
// Serve a specific error dump file
|
|
503
|
+
const id = c.req.param("id")
|
|
504
|
+
if (!id.match(/^req_/)) return c.json({ error: "Invalid request ID format" }, 400)
|
|
505
|
+
try {
|
|
506
|
+
const content = readFileSync(join(LOG_DIR, "errors", `${id}.json`), "utf-8")
|
|
507
|
+
return c.json(JSON.parse(content))
|
|
508
|
+
} catch {
|
|
509
|
+
return c.json({ error: "Error dump not found", reqId: id }, 404)
|
|
510
|
+
}
|
|
511
|
+
})
|
|
512
|
+
|
|
513
|
+
app.get("/debug/active", (c) => {
|
|
514
|
+
// Detailed view of currently active requests
|
|
515
|
+
const stats = traceStore.getStats()
|
|
516
|
+
return c.json({
|
|
517
|
+
queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT },
|
|
518
|
+
activeRequests: stats.activeRequests,
|
|
519
|
+
})
|
|
520
|
+
})
|
|
521
|
+
|
|
522
|
+
app.get("/debug/health", (c) => {
|
|
523
|
+
// Process health: memory, uptime, resource usage
|
|
524
|
+
const mem = process.memoryUsage()
|
|
525
|
+
const stats = traceStore.getStats()
|
|
526
|
+
return c.json({
|
|
527
|
+
version: PROXY_VERSION,
|
|
528
|
+
pid: process.pid,
|
|
529
|
+
uptimeMs: stats.uptimeMs,
|
|
530
|
+
uptimeHuman: stats.uptimeHuman,
|
|
531
|
+
memory: {
|
|
532
|
+
rss: `${(mem.rss / 1024 / 1024).toFixed(1)}MB`,
|
|
533
|
+
heapUsed: `${(mem.heapUsed / 1024 / 1024).toFixed(1)}MB`,
|
|
534
|
+
heapTotal: `${(mem.heapTotal / 1024 / 1024).toFixed(1)}MB`,
|
|
535
|
+
external: `${(mem.external / 1024 / 1024).toFixed(1)}MB`,
|
|
536
|
+
rssBytes: mem.rss,
|
|
537
|
+
heapUsedBytes: mem.heapUsed,
|
|
538
|
+
},
|
|
539
|
+
queue: { active: requestQueue.activeCount, waiting: requestQueue.waitingCount, max: MAX_CONCURRENT },
|
|
540
|
+
requests: stats.requests,
|
|
541
|
+
config: {
|
|
542
|
+
stallTimeoutMs: finalConfig.stallTimeoutMs,
|
|
543
|
+
maxConcurrent: MAX_CONCURRENT,
|
|
544
|
+
queueTimeoutMs: QUEUE_TIMEOUT_MS,
|
|
545
|
+
debug: finalConfig.debug,
|
|
546
|
+
},
|
|
547
|
+
})
|
|
548
|
+
})
|
|
549
|
+
|
|
550
|
+
// ── Model endpoints ──────────────────────────────────────────────────────
|
|
551
|
+
|
|
552
|
+
const MODELS = [
|
|
553
|
+
{ type: "model", id: "claude-opus-4-6", display_name: "Claude Opus 4.6", created_at: "2025-08-01T00:00:00Z" },
|
|
554
|
+
{ type: "model", id: "claude-opus-4-6-20250801", display_name: "Claude Opus 4.6", created_at: "2025-08-01T00:00:00Z" },
|
|
555
|
+
{ type: "model", id: "claude-sonnet-4-6", display_name: "Claude Sonnet 4.6", created_at: "2025-08-01T00:00:00Z" },
|
|
556
|
+
{ type: "model", id: "claude-sonnet-4-6-20250801", display_name: "Claude Sonnet 4.6", created_at: "2025-08-01T00:00:00Z" },
|
|
557
|
+
{ type: "model", id: "claude-sonnet-4-5-20250929", display_name: "Claude Sonnet 4.5", created_at: "2025-09-29T00:00:00Z" },
|
|
558
|
+
{ type: "model", id: "claude-haiku-4-5", display_name: "Claude Haiku 4.5", created_at: "2025-10-01T00:00:00Z" },
|
|
559
|
+
{ type: "model", id: "claude-haiku-4-5-20251001", display_name: "Claude Haiku 4.5", created_at: "2025-10-01T00:00:00Z" },
|
|
560
|
+
]
|
|
561
|
+
|
|
562
|
+
const MODELS_DUAL = MODELS.map(m => ({
|
|
563
|
+
...m,
|
|
564
|
+
object: "model" as const,
|
|
565
|
+
created: Math.floor(new Date(m.created_at).getTime() / 1000),
|
|
566
|
+
owned_by: "anthropic" as const
|
|
567
|
+
}))
|
|
568
|
+
|
|
569
|
+
const handleModels = (c: Context) => c.json({ object: "list", data: MODELS_DUAL })
|
|
570
|
+
app.get("/v1/models", handleModels)
|
|
571
|
+
app.get("/models", handleModels)
|
|
572
|
+
|
|
573
|
+
const handleModel = (c: Context) => {
|
|
574
|
+
const id = c.req.param("id")
|
|
575
|
+
const model = MODELS_DUAL.find(m => m.id === id)
|
|
576
|
+
if (!model) return c.json({ type: "error", error: { type: "not_found_error", message: `Model \`${id}\` not found` } }, 404)
|
|
577
|
+
return c.json(model)
|
|
578
|
+
}
|
|
579
|
+
app.get("/v1/models/:id", handleModel)
|
|
580
|
+
app.get("/models/:id", handleModel)
|
|
581
|
+
|
|
582
|
+
const handleCountTokens = async (c: Context) => {
|
|
583
|
+
try {
|
|
584
|
+
const body = await c.req.json()
|
|
585
|
+
const sysText = Array.isArray(body.system)
|
|
586
|
+
? body.system.filter((b: any) => b.type === "text").map((b: any) => b.text).join("\n")
|
|
587
|
+
: String(body.system ?? "")
|
|
588
|
+
const msgText = (body.messages ?? [])
|
|
589
|
+
.map((m: any) => typeof m.content === "string" ? m.content : JSON.stringify(m.content))
|
|
590
|
+
.join("\n")
|
|
591
|
+
return c.json({ input_tokens: roughTokens(sysText + msgText) })
|
|
592
|
+
} catch {
|
|
593
|
+
return c.json({ input_tokens: 0 })
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
app.post("/v1/messages/count_tokens", handleCountTokens)
|
|
597
|
+
app.post("/messages/count_tokens", handleCountTokens)
|
|
598
|
+
|
|
599
|
+
// ── Messages handler ─────────────────────────────────────────────────────
|
|
600
|
+
|
|
601
|
+
const handleMessages = async (c: Context) => {
|
|
602
|
+
const reqId = generateId("req_")
|
|
603
|
+
// Will be set after body parse; needed for outer catch
|
|
604
|
+
let trace: ReturnType<typeof traceStore.create> | undefined
|
|
605
|
+
let requestStarted = Date.now()
|
|
606
|
+
let clientDisconnected = false
|
|
607
|
+
let abortReason: "stall" | "max_duration" | "max_output" | null = null
|
|
608
|
+
|
|
609
|
+
try {
|
|
610
|
+
let body: any
|
|
611
|
+
try {
|
|
612
|
+
body = await c.req.json()
|
|
613
|
+
} catch (parseErr) {
|
|
614
|
+
logWarn("request.invalid_json", { reqId })
|
|
615
|
+
return c.json({ type: "error", error: { type: "invalid_request_error", message: "Request body must be valid JSON" }, request_id: reqId }, 400)
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
if (!body.messages || !Array.isArray(body.messages) || body.messages.length === 0) {
|
|
619
|
+
logWarn("request.missing_messages", { reqId })
|
|
620
|
+
return c.json({ type: "error", error: { type: "invalid_request_error", message: "messages is required and must be a non-empty array" }, request_id: reqId }, 400)
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
const model = mapModelToClaudeModel(body.model || "sonnet")
|
|
624
|
+
const stream = body.stream ?? false
|
|
625
|
+
const hasTools = body.tools?.length > 0
|
|
626
|
+
const abortController = new AbortController()
|
|
627
|
+
|
|
628
|
+
// Stall-based timeout: only aborts if no SDK events received for stallTimeoutMs.
|
|
629
|
+
// Resets on every SDK event, so active requests never get killed.
|
|
630
|
+
// NOTE: not started until queue is acquired — queue wait doesn't count.
|
|
631
|
+
let stallTimer: ReturnType<typeof setTimeout> | null = null
|
|
632
|
+
const resetStallTimer = () => {
|
|
633
|
+
if (stallTimer) clearTimeout(stallTimer)
|
|
634
|
+
stallTimer = setTimeout(() => {
|
|
635
|
+
abortReason = "stall"
|
|
636
|
+
logWarn("request.stall_timeout", {
|
|
637
|
+
reqId,
|
|
638
|
+
stallTimeoutMs: finalConfig.stallTimeoutMs,
|
|
639
|
+
phase: trace?.phase,
|
|
640
|
+
sdkEventCount: trace?.sdkEventCount,
|
|
641
|
+
outputLen: trace?.outputLen,
|
|
642
|
+
lastEventType: trace?.lastEventType,
|
|
643
|
+
})
|
|
644
|
+
abortController.abort()
|
|
645
|
+
}, finalConfig.stallTimeoutMs)
|
|
646
|
+
}
|
|
647
|
+
const clearStallTimer = () => {
|
|
648
|
+
if (stallTimer) { clearTimeout(stallTimer); stallTimer = null }
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
// Hard max duration: kills request even if actively streaming. Safety valve.
|
|
652
|
+
let hardTimer: ReturnType<typeof setTimeout> | null = null
|
|
653
|
+
const startHardTimer = () => {
|
|
654
|
+
hardTimer = setTimeout(() => {
|
|
655
|
+
abortReason = "max_duration"
|
|
656
|
+
logError("request.max_duration", {
|
|
657
|
+
reqId,
|
|
658
|
+
maxDurationMs: finalConfig.maxDurationMs,
|
|
659
|
+
phase: trace?.phase,
|
|
660
|
+
sdkEventCount: trace?.sdkEventCount,
|
|
661
|
+
outputLen: trace?.outputLen,
|
|
662
|
+
model: trace?.model,
|
|
663
|
+
lastEventType: trace?.lastEventType,
|
|
664
|
+
})
|
|
665
|
+
abortController.abort()
|
|
666
|
+
}, finalConfig.maxDurationMs)
|
|
667
|
+
}
|
|
668
|
+
const clearHardTimer = () => {
|
|
669
|
+
if (hardTimer) { clearTimeout(hardTimer); hardTimer = null }
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
// Output size check: kills request if output exceeds maxOutputChars.
|
|
673
|
+
const checkOutputSize = (outputLen: number) => {
|
|
674
|
+
if (outputLen > finalConfig.maxOutputChars && !abortReason) {
|
|
675
|
+
abortReason = "max_output"
|
|
676
|
+
logError("request.max_output", {
|
|
677
|
+
reqId,
|
|
678
|
+
outputLen,
|
|
679
|
+
maxOutputChars: finalConfig.maxOutputChars,
|
|
680
|
+
phase: trace?.phase,
|
|
681
|
+
sdkEventCount: trace?.sdkEventCount,
|
|
682
|
+
model: trace?.model,
|
|
683
|
+
elapsedMs: trace ? Date.now() - trace.startedAt : undefined,
|
|
684
|
+
})
|
|
685
|
+
abortController.abort()
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
const thinking: { type: "adaptive" } | { type: "enabled"; budgetTokens?: number } | { type: "disabled" } | undefined =
|
|
690
|
+
body.thinking?.type === "enabled" ? { type: "enabled", budgetTokens: body.thinking.budget_tokens }
|
|
691
|
+
: body.thinking?.type === "disabled" ? { type: "disabled" }
|
|
692
|
+
: body.thinking?.type === "adaptive" ? { type: "adaptive" }
|
|
693
|
+
: undefined
|
|
694
|
+
|
|
695
|
+
let systemContext = ""
|
|
696
|
+
if (body.system) {
|
|
697
|
+
if (typeof body.system === "string") {
|
|
698
|
+
systemContext = body.system
|
|
699
|
+
} else if (Array.isArray(body.system)) {
|
|
700
|
+
systemContext = body.system
|
|
701
|
+
.filter((b: any) => b.type === "text" && b.text)
|
|
702
|
+
.map((b: any) => b.text)
|
|
703
|
+
.join("\n")
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
const messages = body.messages as Array<{ role: string; content: string | Array<any> }>
|
|
708
|
+
|
|
709
|
+
let promptText: string // text version for token counting / logging
|
|
710
|
+
let systemPrompt: string | undefined
|
|
711
|
+
const toolsSection = hasTools ? buildClientToolsPrompt(body.tools) : ""
|
|
712
|
+
|
|
713
|
+
// ── Session resumption ─────────────────────────────────────────────
|
|
714
|
+
// Derive conversation ID from: headers (explicit) or conversation_label
|
|
715
|
+
// embedded in openclaw message metadata.
|
|
716
|
+
const conversationId = c.req.header("x-conversation-id")
|
|
717
|
+
?? c.req.header("x-session-id")
|
|
718
|
+
?? extractConversationLabel(messages)
|
|
719
|
+
?? null
|
|
720
|
+
|
|
721
|
+
let resumeSessionId: string | undefined
|
|
722
|
+
let isResuming = false
|
|
723
|
+
|
|
724
|
+
if (conversationId && messages.length > 1) {
|
|
725
|
+
const stored = sessionStore.get(conversationId)
|
|
726
|
+
if (stored && stored.model === model) {
|
|
727
|
+
resumeSessionId = stored.sdkSessionId
|
|
728
|
+
isResuming = true
|
|
729
|
+
logInfo("session.resuming", {
|
|
730
|
+
reqId,
|
|
731
|
+
conversationId,
|
|
732
|
+
sdkSessionId: resumeSessionId,
|
|
733
|
+
storedMsgCount: stored.messageCount,
|
|
734
|
+
currentMsgCount: messages.length,
|
|
735
|
+
resumeCount: stored.resumeCount,
|
|
736
|
+
})
|
|
737
|
+
}
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
// Check if last user message contains images — if so, use native SDK multimodal input
|
|
741
|
+
const lastMsg = messages[messages.length - 1]!
|
|
742
|
+
const lastMsgHasImages = contentHasImages(lastMsg.content)
|
|
743
|
+
|
|
744
|
+
// promptInput: either a string (text-only) or AsyncIterable<SDKUserMessage> (multimodal)
|
|
745
|
+
let promptInput: string | AsyncIterable<any>
|
|
746
|
+
// promptText: always the text-only version for token counting and logging
|
|
747
|
+
promptText = serializeContent(lastMsg.content)
|
|
748
|
+
|
|
749
|
+
if (isResuming && resumeSessionId) {
|
|
750
|
+
systemPrompt = ((systemContext || "") + toolsSection).trim() || undefined
|
|
751
|
+
if (lastMsgHasImages) {
|
|
752
|
+
promptInput = createSDKUserMessage(buildNativeContent(lastMsg.content), resumeSessionId)
|
|
753
|
+
logInfo("session.resume_with_images", { reqId, conversationId })
|
|
754
|
+
} else {
|
|
755
|
+
promptInput = promptText
|
|
756
|
+
}
|
|
757
|
+
} else if (messages.length === 1) {
|
|
758
|
+
systemPrompt = ((systemContext || "") + toolsSection).trim() || undefined
|
|
759
|
+
promptInput = lastMsgHasImages
|
|
760
|
+
? createSDKUserMessage(buildNativeContent(lastMsg.content))
|
|
761
|
+
: promptText
|
|
762
|
+
if (lastMsgHasImages) logInfo("request.native_images", { reqId })
|
|
763
|
+
} else {
|
|
764
|
+
const priorMsgs = messages.slice(0, -1)
|
|
765
|
+
|
|
766
|
+
const contextParts = priorMsgs
|
|
767
|
+
.map((m) => {
|
|
768
|
+
const role = m.role === "assistant" ? "Assistant" : "User"
|
|
769
|
+
return `[${role}]\n${serializeContent(m.content)}`
|
|
770
|
+
})
|
|
771
|
+
.join("\n\n")
|
|
772
|
+
|
|
773
|
+
const baseSystem = systemContext || ""
|
|
774
|
+
const contextSection = contextParts
|
|
775
|
+
? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---`
|
|
776
|
+
: ""
|
|
777
|
+
systemPrompt = (baseSystem + contextSection + toolsSection).trim() || undefined
|
|
778
|
+
|
|
779
|
+
if (lastMsgHasImages) {
|
|
780
|
+
promptInput = createSDKUserMessage(buildNativeContent(lastMsg.content))
|
|
781
|
+
logInfo("request.native_images", { reqId })
|
|
782
|
+
} else {
|
|
783
|
+
promptInput = promptText
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
requestStarted = Date.now()
|
|
788
|
+
|
|
789
|
+
// Capture client info
|
|
790
|
+
const clientIp = c.req.header("x-forwarded-for")
|
|
791
|
+
?? c.req.header("x-real-ip")
|
|
792
|
+
?? c.req.header("cf-connecting-ip")
|
|
793
|
+
?? "unknown"
|
|
794
|
+
const userAgent = c.req.header("user-agent") ?? "unknown"
|
|
795
|
+
const bodyBytes = JSON.stringify(body).length
|
|
796
|
+
|
|
797
|
+
// ── Create trace ──────────────────────────────────────────────────────
|
|
798
|
+
trace = traceStore.create({
|
|
799
|
+
reqId,
|
|
800
|
+
model,
|
|
801
|
+
requestedModel: body.model || "sonnet",
|
|
802
|
+
stream,
|
|
803
|
+
hasTools,
|
|
804
|
+
thinking: thinking?.type,
|
|
805
|
+
promptLen: promptText.length,
|
|
806
|
+
systemLen: systemPrompt?.length ?? 0,
|
|
807
|
+
msgCount: messages.length,
|
|
808
|
+
bodyBytes,
|
|
809
|
+
clientIp,
|
|
810
|
+
userAgent,
|
|
811
|
+
})
|
|
812
|
+
|
|
813
|
+
// ── Queue ─────────────────────────────────────────────────────────────
|
|
814
|
+
const queueActive = requestQueue.activeCount
|
|
815
|
+
const queueWaiting = requestQueue.waitingCount
|
|
816
|
+
const needsQueue = queueActive >= MAX_CONCURRENT
|
|
817
|
+
|
|
818
|
+
traceStore.phase(reqId, "queued", { queueActive, queueWaiting })
|
|
819
|
+
|
|
820
|
+
if (needsQueue) {
|
|
821
|
+
logInfo("queue.waiting", {
|
|
822
|
+
reqId,
|
|
823
|
+
model,
|
|
824
|
+
queueActive,
|
|
825
|
+
queueWaiting,
|
|
826
|
+
queueTimeoutMs: QUEUE_TIMEOUT_MS,
|
|
827
|
+
})
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
await requestQueue.acquire()
|
|
831
|
+
|
|
832
|
+
const queueWaitMs = Date.now() - requestStarted
|
|
833
|
+
traceStore.phase(reqId, "acquired", { queueWaitMs })
|
|
834
|
+
|
|
835
|
+
logInfo("queue.acquired", {
|
|
836
|
+
reqId,
|
|
837
|
+
queueWaitMs,
|
|
838
|
+
queueActive: requestQueue.activeCount,
|
|
839
|
+
queueWaiting: requestQueue.waitingCount,
|
|
840
|
+
})
|
|
841
|
+
|
|
842
|
+
// Start timers AFTER queue acquire — queue wait doesn't count
|
|
843
|
+
resetStallTimer()
|
|
844
|
+
startHardTimer()
|
|
845
|
+
|
|
846
|
+
// ── Non-streaming ──────────────────────────────────────────────────────
|
|
847
|
+
if (!stream) {
|
|
848
|
+
let fullText = ""
|
|
849
|
+
let capturedSessionId: string | undefined
|
|
850
|
+
const queryOpts = buildQueryOptions(model, { partial: false, systemPrompt, abortController, thinking, resume: resumeSessionId })
|
|
851
|
+
try {
|
|
852
|
+
traceStore.phase(reqId, "sdk_starting")
|
|
853
|
+
let sdkEventCount = 0
|
|
854
|
+
for await (const message of query({ prompt: promptInput, options: queryOpts })) {
|
|
855
|
+
sdkEventCount++
|
|
856
|
+
resetStallTimer()
|
|
857
|
+
traceStore.sdkEvent(reqId, sdkEventCount, message.type, (message as any).event?.type ?? (message as any).message?.type)
|
|
858
|
+
// Capture session_id from init message
|
|
859
|
+
if (message.type === "system" && (message as any).subtype === "init") {
|
|
860
|
+
capturedSessionId = (message as any).session_id
|
|
861
|
+
}
|
|
862
|
+
if (message.type === "assistant") {
|
|
863
|
+
let turnText = ""
|
|
864
|
+
for (const block of message.message.content) {
|
|
865
|
+
if (block.type === "text") turnText += block.text
|
|
866
|
+
}
|
|
867
|
+
fullText = turnText
|
|
868
|
+
}
|
|
869
|
+
}
|
|
870
|
+
traceStore.phase(reqId, "sdk_done")
|
|
871
|
+
|
|
872
|
+
// Store session mapping for future resumption
|
|
873
|
+
if (conversationId && capturedSessionId) {
|
|
874
|
+
if (isResuming) {
|
|
875
|
+
sessionStore.recordResume(conversationId)
|
|
876
|
+
logInfo("session.resumed_ok", { reqId, conversationId, sdkSessionId: capturedSessionId })
|
|
877
|
+
} else {
|
|
878
|
+
sessionStore.set(conversationId, capturedSessionId, model, messages.length)
|
|
879
|
+
logInfo("session.created", { reqId, conversationId, sdkSessionId: capturedSessionId })
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
} catch (resumeErr) {
|
|
883
|
+
// If resume failed, retry with full context
|
|
884
|
+
if (isResuming && resumeSessionId) {
|
|
885
|
+
logWarn("session.resume_failed", {
|
|
886
|
+
reqId,
|
|
887
|
+
conversationId,
|
|
888
|
+
sdkSessionId: resumeSessionId,
|
|
889
|
+
error: resumeErr instanceof Error ? resumeErr.message : String(resumeErr),
|
|
890
|
+
})
|
|
891
|
+
if (conversationId) {
|
|
892
|
+
sessionStore.recordFailure(conversationId)
|
|
893
|
+
sessionStore.invalidate(conversationId)
|
|
894
|
+
}
|
|
895
|
+
// Rebuild with full context (non-resume path)
|
|
896
|
+
const fbLastMsg = messages[messages.length - 1]!
|
|
897
|
+
const priorMsgs = messages.slice(0, -1)
|
|
898
|
+
const contextParts = priorMsgs
|
|
899
|
+
.map((m) => {
|
|
900
|
+
const role = m.role === "assistant" ? "Assistant" : "User"
|
|
901
|
+
return `[${role}]\n${serializeContent(m.content)}`
|
|
902
|
+
})
|
|
903
|
+
.join("\n\n")
|
|
904
|
+
const baseSystem = systemContext || ""
|
|
905
|
+
const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
|
|
906
|
+
const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
|
|
907
|
+
const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
|
|
908
|
+
? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
|
|
909
|
+
: serializeContent(fbLastMsg.content)
|
|
910
|
+
const fallbackOpts = buildQueryOptions(model, { partial: false, systemPrompt: fallbackSystem, abortController, thinking })
|
|
911
|
+
|
|
912
|
+
logInfo("session.fallback_full_context", { reqId, conversationId })
|
|
913
|
+
let sdkEventCount = 0
|
|
914
|
+
for await (const message of query({ prompt: fallbackInput, options: fallbackOpts })) {
|
|
915
|
+
sdkEventCount++
|
|
916
|
+
resetStallTimer()
|
|
917
|
+
traceStore.sdkEvent(reqId, sdkEventCount, message.type, (message as any).event?.type ?? (message as any).message?.type)
|
|
918
|
+
if (message.type === "system" && (message as any).subtype === "init") {
|
|
919
|
+
capturedSessionId = (message as any).session_id
|
|
920
|
+
}
|
|
921
|
+
if (message.type === "assistant") {
|
|
922
|
+
let turnText = ""
|
|
923
|
+
for (const block of message.message.content) {
|
|
924
|
+
if (block.type === "text") turnText += block.text
|
|
925
|
+
}
|
|
926
|
+
fullText = turnText
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
traceStore.phase(reqId, "sdk_done")
|
|
930
|
+
// Store the new session
|
|
931
|
+
if (conversationId && capturedSessionId) {
|
|
932
|
+
sessionStore.set(conversationId, capturedSessionId, model, messages.length)
|
|
933
|
+
logInfo("session.recreated_after_fallback", { reqId, conversationId, sdkSessionId: capturedSessionId })
|
|
934
|
+
}
|
|
935
|
+
} else {
|
|
936
|
+
throw resumeErr
|
|
937
|
+
}
|
|
938
|
+
} finally {
|
|
939
|
+
clearStallTimer(); clearHardTimer()
|
|
940
|
+
// (temp files no longer used — images passed natively)
|
|
941
|
+
requestQueue.release()
|
|
942
|
+
logDebug("queue.released", {
|
|
943
|
+
reqId,
|
|
944
|
+
queueActive: requestQueue.activeCount,
|
|
945
|
+
queueWaiting: requestQueue.waitingCount,
|
|
946
|
+
})
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
traceStore.phase(reqId, "responding")
|
|
950
|
+
|
|
951
|
+
if (hasTools) {
|
|
952
|
+
const { toolCalls, textBefore } = parseToolUse(fullText)
|
|
953
|
+
const content: any[] = []
|
|
954
|
+
if (textBefore) content.push({ type: "text", text: textBefore })
|
|
955
|
+
for (const tc of toolCalls) content.push({ type: "tool_use", id: tc.id, name: tc.name, input: tc.input })
|
|
956
|
+
if (content.length === 0) content.push({ type: "text", text: fullText || "..." })
|
|
957
|
+
const stopReason = toolCalls.length > 0 ? "tool_use" : "end_turn"
|
|
958
|
+
|
|
959
|
+
traceStore.complete(reqId, { outputLen: fullText.length, toolCallCount: toolCalls.length })
|
|
960
|
+
|
|
961
|
+
return c.json({
|
|
962
|
+
id: generateId("msg_"),
|
|
963
|
+
type: "message", role: "assistant", content,
|
|
964
|
+
model: body.model, stop_reason: stopReason, stop_sequence: null,
|
|
965
|
+
usage: { input_tokens: roughTokens(promptText), output_tokens: roughTokens(fullText) }
|
|
966
|
+
})
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
if (!fullText || !fullText.trim()) fullText = "..."
|
|
970
|
+
traceStore.complete(reqId, { outputLen: fullText.length })
|
|
971
|
+
|
|
972
|
+
return c.json({
|
|
973
|
+
id: generateId("msg_"),
|
|
974
|
+
type: "message", role: "assistant",
|
|
975
|
+
content: [{ type: "text", text: fullText }],
|
|
976
|
+
model: body.model, stop_reason: "end_turn", stop_sequence: null,
|
|
977
|
+
usage: { input_tokens: roughTokens(promptText), output_tokens: roughTokens(fullText) }
|
|
978
|
+
})
|
|
979
|
+
}
|
|
980
|
+
|
|
981
|
+
// ── Streaming ──────────────────────────────────────────────────────────
|
|
982
|
+
const encoder = new TextEncoder()
|
|
983
|
+
const readable = new ReadableStream({
|
|
984
|
+
cancel() {
|
|
985
|
+
clientDisconnected = true
|
|
986
|
+
logWarn("stream.client_disconnect", {
|
|
987
|
+
reqId,
|
|
988
|
+
phase: trace?.phase,
|
|
989
|
+
sdkEventCount: trace?.sdkEventCount,
|
|
990
|
+
outputLen: trace?.outputLen,
|
|
991
|
+
elapsedMs: trace ? Date.now() - trace.startedAt : undefined,
|
|
992
|
+
model: trace?.model,
|
|
993
|
+
})
|
|
994
|
+
abortController.abort()
|
|
995
|
+
},
|
|
996
|
+
async start(controller) {
|
|
997
|
+
const messageId = generateId("msg_")
|
|
998
|
+
let queueReleased = false
|
|
999
|
+
const releaseQueue = () => {
|
|
1000
|
+
if (!queueReleased) {
|
|
1001
|
+
queueReleased = true
|
|
1002
|
+
requestQueue.release()
|
|
1003
|
+
logDebug("queue.released", {
|
|
1004
|
+
reqId,
|
|
1005
|
+
queueActive: requestQueue.activeCount,
|
|
1006
|
+
queueWaiting: requestQueue.waitingCount,
|
|
1007
|
+
})
|
|
1008
|
+
}
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
let sseSendErrors = 0
|
|
1012
|
+
const sse = (event: string, data: object) => {
|
|
1013
|
+
try {
|
|
1014
|
+
controller.enqueue(encoder.encode(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`))
|
|
1015
|
+
} catch (e) {
|
|
1016
|
+
sseSendErrors++
|
|
1017
|
+
if (sseSendErrors <= 3) {
|
|
1018
|
+
logWarn("stream.sse_send_failed", {
|
|
1019
|
+
reqId,
|
|
1020
|
+
event,
|
|
1021
|
+
sseSendErrors,
|
|
1022
|
+
error: e instanceof Error ? e.message : String(e),
|
|
1023
|
+
})
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
try {
|
|
1029
|
+
const heartbeat = setInterval(() => {
|
|
1030
|
+
try {
|
|
1031
|
+
controller.enqueue(encoder.encode(`event: ping\ndata: {"type": "ping"}\n\n`))
|
|
1032
|
+
} catch (e) {
|
|
1033
|
+
logWarn("stream.heartbeat_failed", {
|
|
1034
|
+
reqId,
|
|
1035
|
+
error: e instanceof Error ? e.message : String(e),
|
|
1036
|
+
phase: trace?.phase,
|
|
1037
|
+
elapsedMs: trace ? Date.now() - trace.startedAt : undefined,
|
|
1038
|
+
})
|
|
1039
|
+
clearInterval(heartbeat)
|
|
1040
|
+
}
|
|
1041
|
+
}, 15_000)
|
|
1042
|
+
|
|
1043
|
+
sse("message_start", {
|
|
1044
|
+
type: "message_start",
|
|
1045
|
+
message: {
|
|
1046
|
+
id: messageId, type: "message", role: "assistant", content: [],
|
|
1047
|
+
model: body.model, stop_reason: null, stop_sequence: null,
|
|
1048
|
+
usage: { input_tokens: roughTokens(promptText), output_tokens: 1 }
|
|
1049
|
+
}
|
|
1050
|
+
})
|
|
1051
|
+
|
|
1052
|
+
if (hasTools) {
|
|
1053
|
+
// ── With tools: buffer output, parse tool_use blocks at end ──
|
|
1054
|
+
let fullText = ""
|
|
1055
|
+
let sdkEventCount = 0
|
|
1056
|
+
let lastEventAt = Date.now()
|
|
1057
|
+
const stallLog = setInterval(() => {
|
|
1058
|
+
const stallMs = Date.now() - lastEventAt
|
|
1059
|
+
traceStore.stall(reqId, stallMs)
|
|
1060
|
+
}, 15_000)
|
|
1061
|
+
let capturedSessionId: string | undefined
|
|
1062
|
+
try {
|
|
1063
|
+
traceStore.phase(reqId, "sdk_starting")
|
|
1064
|
+
for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking, resume: resumeSessionId }) })) {
|
|
1065
|
+
sdkEventCount++
|
|
1066
|
+
lastEventAt = Date.now()
|
|
1067
|
+
resetStallTimer()
|
|
1068
|
+
const subtype = (message as any).event?.type ?? (message as any).message?.type
|
|
1069
|
+
// Capture session_id from init message
|
|
1070
|
+
if (message.type === "system" && (message as any).subtype === "init") {
|
|
1071
|
+
capturedSessionId = (message as any).session_id
|
|
1072
|
+
}
|
|
1073
|
+
if (message.type === "stream_event") {
|
|
1074
|
+
const ev = message.event as any
|
|
1075
|
+
// Detect first content event BEFORE sdkEvent records it
|
|
1076
|
+
if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
|
|
1077
|
+
traceStore.phase(reqId, "sdk_streaming")
|
|
1078
|
+
}
|
|
1079
|
+
if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
|
|
1080
|
+
fullText += ev.delta.text ?? ""
|
|
1081
|
+
traceStore.updateOutput(reqId, fullText.length)
|
|
1082
|
+
checkOutputSize(fullText.length)
|
|
1083
|
+
}
|
|
1084
|
+
}
|
|
1085
|
+
traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
|
|
1086
|
+
}
|
|
1087
|
+
traceStore.phase(reqId, "sdk_done")
|
|
1088
|
+
|
|
1089
|
+
// Store session mapping
|
|
1090
|
+
if (conversationId && capturedSessionId) {
|
|
1091
|
+
if (isResuming) {
|
|
1092
|
+
sessionStore.recordResume(conversationId)
|
|
1093
|
+
} else {
|
|
1094
|
+
sessionStore.set(conversationId, capturedSessionId, model, messages.length)
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
} catch (resumeErr) {
|
|
1098
|
+
// Resume failed in streaming with-tools path — retry with full context
|
|
1099
|
+
if (isResuming && resumeSessionId) {
|
|
1100
|
+
logWarn("session.resume_failed_stream", {
|
|
1101
|
+
reqId, conversationId, sdkSessionId: resumeSessionId,
|
|
1102
|
+
error: resumeErr instanceof Error ? resumeErr.message : String(resumeErr),
|
|
1103
|
+
})
|
|
1104
|
+
if (conversationId) {
|
|
1105
|
+
sessionStore.recordFailure(conversationId)
|
|
1106
|
+
sessionStore.invalidate(conversationId)
|
|
1107
|
+
}
|
|
1108
|
+
const fbLastMsg = messages[messages.length - 1]!
|
|
1109
|
+
const priorMsgs = messages.slice(0, -1)
|
|
1110
|
+
const contextParts = priorMsgs
|
|
1111
|
+
.map((m) => {
|
|
1112
|
+
const role = m.role === "assistant" ? "Assistant" : "User"
|
|
1113
|
+
return `[${role}]\n${serializeContent(m.content)}`
|
|
1114
|
+
})
|
|
1115
|
+
.join("\n\n")
|
|
1116
|
+
const baseSystem = systemContext || ""
|
|
1117
|
+
const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
|
|
1118
|
+
const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
|
|
1119
|
+
const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
|
|
1120
|
+
? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
|
|
1121
|
+
: serializeContent(fbLastMsg.content)
|
|
1122
|
+
const fallbackOpts = buildQueryOptions(model, { partial: true, systemPrompt: fallbackSystem, abortController, thinking })
|
|
1123
|
+
|
|
1124
|
+
logInfo("session.fallback_full_context_stream", { reqId, conversationId })
|
|
1125
|
+
sdkEventCount = 0
|
|
1126
|
+
for await (const message of query({ prompt: fallbackInput, options: fallbackOpts })) {
|
|
1127
|
+
sdkEventCount++
|
|
1128
|
+
lastEventAt = Date.now()
|
|
1129
|
+
resetStallTimer()
|
|
1130
|
+
const subtype = (message as any).event?.type ?? (message as any).message?.type
|
|
1131
|
+
if (message.type === "system" && (message as any).subtype === "init") {
|
|
1132
|
+
capturedSessionId = (message as any).session_id
|
|
1133
|
+
}
|
|
1134
|
+
if (message.type === "stream_event") {
|
|
1135
|
+
const ev = message.event as any
|
|
1136
|
+
if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
|
|
1137
|
+
traceStore.phase(reqId, "sdk_streaming")
|
|
1138
|
+
}
|
|
1139
|
+
if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
|
|
1140
|
+
fullText += ev.delta.text ?? ""
|
|
1141
|
+
traceStore.updateOutput(reqId, fullText.length)
|
|
1142
|
+
checkOutputSize(fullText.length)
|
|
1143
|
+
}
|
|
1144
|
+
}
|
|
1145
|
+
traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
|
|
1146
|
+
}
|
|
1147
|
+
traceStore.phase(reqId, "sdk_done")
|
|
1148
|
+
if (conversationId && capturedSessionId) {
|
|
1149
|
+
sessionStore.set(conversationId, capturedSessionId, model, messages.length)
|
|
1150
|
+
logInfo("session.recreated_after_fallback_stream", { reqId, conversationId, sdkSessionId: capturedSessionId })
|
|
1151
|
+
}
|
|
1152
|
+
} else {
|
|
1153
|
+
throw resumeErr
|
|
1154
|
+
}
|
|
1155
|
+
} finally {
|
|
1156
|
+
clearInterval(stallLog)
|
|
1157
|
+
clearInterval(heartbeat)
|
|
1158
|
+
clearStallTimer(); clearHardTimer()
|
|
1159
|
+
// (temp files no longer used — images passed natively)
|
|
1160
|
+
releaseQueue()
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1163
|
+
traceStore.phase(reqId, "responding")
|
|
1164
|
+
const { toolCalls, textBefore } = parseToolUse(fullText)
|
|
1165
|
+
|
|
1166
|
+
let blockIdx = 0
|
|
1167
|
+
const textContent = toolCalls.length === 0 ? (fullText || "...") : textBefore
|
|
1168
|
+
if (textContent) {
|
|
1169
|
+
sse("content_block_start", { type: "content_block_start", index: blockIdx, content_block: { type: "text", text: "" } })
|
|
1170
|
+
sse("content_block_delta", { type: "content_block_delta", index: blockIdx, delta: { type: "text_delta", text: textContent } })
|
|
1171
|
+
sse("content_block_stop", { type: "content_block_stop", index: blockIdx })
|
|
1172
|
+
blockIdx++
|
|
1173
|
+
} else if (toolCalls.length === 0) {
|
|
1174
|
+
sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
|
|
1175
|
+
sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "..." } })
|
|
1176
|
+
sse("content_block_stop", { type: "content_block_stop", index: 0 })
|
|
1177
|
+
blockIdx = 1
|
|
1178
|
+
}
|
|
1179
|
+
for (const tc of toolCalls) {
|
|
1180
|
+
sse("content_block_start", { type: "content_block_start", index: blockIdx, content_block: { type: "tool_use", id: tc.id, name: tc.name, input: {} } })
|
|
1181
|
+
sse("content_block_delta", { type: "content_block_delta", index: blockIdx, delta: { type: "input_json_delta", partial_json: JSON.stringify(tc.input) } })
|
|
1182
|
+
sse("content_block_stop", { type: "content_block_stop", index: blockIdx })
|
|
1183
|
+
blockIdx++
|
|
1184
|
+
}
|
|
1185
|
+
|
|
1186
|
+
const stopReason = toolCalls.length > 0 ? "tool_use" : "end_turn"
|
|
1187
|
+
sse("message_delta", { type: "message_delta", delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: roughTokens(fullText) } })
|
|
1188
|
+
sse("message_stop", { type: "message_stop" })
|
|
1189
|
+
controller.close()
|
|
1190
|
+
|
|
1191
|
+
traceStore.complete(reqId, { outputLen: fullText.length, toolCallCount: toolCalls.length })
|
|
1192
|
+
return
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
// ── No tools: stream text deltas directly ─────────────────────
|
|
1196
|
+
sse("content_block_start", { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })
|
|
1197
|
+
|
|
1198
|
+
let fullText = ""
|
|
1199
|
+
let hasStreamed = false
|
|
1200
|
+
let sdkEventCount = 0
|
|
1201
|
+
let lastEventAt = Date.now()
|
|
1202
|
+
let capturedSessionId2: string | undefined
|
|
1203
|
+
const stallLog = setInterval(() => {
|
|
1204
|
+
const stallMs = Date.now() - lastEventAt
|
|
1205
|
+
traceStore.stall(reqId, stallMs)
|
|
1206
|
+
}, 15_000)
|
|
1207
|
+
try {
|
|
1208
|
+
traceStore.phase(reqId, "sdk_starting")
|
|
1209
|
+
for await (const message of query({ prompt: promptInput, options: buildQueryOptions(model, { partial: true, systemPrompt, abortController, thinking, resume: resumeSessionId }) })) {
|
|
1210
|
+
sdkEventCount++
|
|
1211
|
+
lastEventAt = Date.now()
|
|
1212
|
+
resetStallTimer()
|
|
1213
|
+
const subtype = (message as any).event?.type ?? (message as any).message?.type
|
|
1214
|
+
// Capture session_id from init message
|
|
1215
|
+
if (message.type === "system" && (message as any).subtype === "init") {
|
|
1216
|
+
capturedSessionId2 = (message as any).session_id
|
|
1217
|
+
}
|
|
1218
|
+
if (message.type === "stream_event") {
|
|
1219
|
+
const ev = message.event as any
|
|
1220
|
+
// Detect first content event BEFORE sdkEvent records it
|
|
1221
|
+
if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
|
|
1222
|
+
traceStore.phase(reqId, "sdk_streaming")
|
|
1223
|
+
}
|
|
1224
|
+
if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
|
|
1225
|
+
const text = ev.delta.text ?? ""
|
|
1226
|
+
if (text) {
|
|
1227
|
+
fullText += text
|
|
1228
|
+
hasStreamed = true
|
|
1229
|
+
traceStore.updateOutput(reqId, fullText.length)
|
|
1230
|
+
checkOutputSize(fullText.length)
|
|
1231
|
+
sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text } })
|
|
1232
|
+
}
|
|
1233
|
+
}
|
|
1234
|
+
}
|
|
1235
|
+
traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
|
|
1236
|
+
}
|
|
1237
|
+
traceStore.phase(reqId, "sdk_done")
|
|
1238
|
+
|
|
1239
|
+
// Store session mapping
|
|
1240
|
+
if (conversationId && capturedSessionId2) {
|
|
1241
|
+
if (isResuming) {
|
|
1242
|
+
sessionStore.recordResume(conversationId)
|
|
1243
|
+
} else {
|
|
1244
|
+
sessionStore.set(conversationId, capturedSessionId2, model, messages.length)
|
|
1245
|
+
}
|
|
1246
|
+
}
|
|
1247
|
+
} catch (resumeErr) {
|
|
1248
|
+
// Resume failed in streaming no-tools path — retry with full context
|
|
1249
|
+
if (isResuming && resumeSessionId) {
|
|
1250
|
+
logWarn("session.resume_failed_stream", {
|
|
1251
|
+
reqId, conversationId, sdkSessionId: resumeSessionId,
|
|
1252
|
+
error: resumeErr instanceof Error ? resumeErr.message : String(resumeErr),
|
|
1253
|
+
})
|
|
1254
|
+
if (conversationId) {
|
|
1255
|
+
sessionStore.recordFailure(conversationId)
|
|
1256
|
+
sessionStore.invalidate(conversationId)
|
|
1257
|
+
}
|
|
1258
|
+
const fbLastMsg = messages[messages.length - 1]!
|
|
1259
|
+
const priorMsgs = messages.slice(0, -1)
|
|
1260
|
+
const contextParts = priorMsgs
|
|
1261
|
+
.map((m) => {
|
|
1262
|
+
const role = m.role === "assistant" ? "Assistant" : "User"
|
|
1263
|
+
return `[${role}]\n${serializeContent(m.content)}`
|
|
1264
|
+
})
|
|
1265
|
+
.join("\n\n")
|
|
1266
|
+
const baseSystem = systemContext || ""
|
|
1267
|
+
const contextSection = contextParts ? `\n\nPrior conversation turns:\n\n${contextParts}\n\n---` : ""
|
|
1268
|
+
const fallbackSystem = (baseSystem + contextSection + toolsSection).trim() || undefined
|
|
1269
|
+
const fallbackInput: string | AsyncIterable<any> = contentHasImages(fbLastMsg.content)
|
|
1270
|
+
? createSDKUserMessage(buildNativeContent(fbLastMsg.content))
|
|
1271
|
+
: serializeContent(fbLastMsg.content)
|
|
1272
|
+
const fallbackOpts = buildQueryOptions(model, { partial: true, systemPrompt: fallbackSystem, abortController, thinking })
|
|
1273
|
+
|
|
1274
|
+
logInfo("session.fallback_full_context_stream", { reqId, conversationId })
|
|
1275
|
+
sdkEventCount = 0
|
|
1276
|
+
for await (const message of query({ prompt: fallbackInput, options: fallbackOpts })) {
|
|
1277
|
+
sdkEventCount++
|
|
1278
|
+
lastEventAt = Date.now()
|
|
1279
|
+
resetStallTimer()
|
|
1280
|
+
const subtype = (message as any).event?.type ?? (message as any).message?.type
|
|
1281
|
+
if (message.type === "system" && (message as any).subtype === "init") {
|
|
1282
|
+
capturedSessionId2 = (message as any).session_id
|
|
1283
|
+
}
|
|
1284
|
+
if (message.type === "stream_event") {
|
|
1285
|
+
const ev = message.event as any
|
|
1286
|
+
if (!trace!.firstTokenAt && (ev.type === "content_block_delta" || ev.type === "content_block_start")) {
|
|
1287
|
+
traceStore.phase(reqId, "sdk_streaming")
|
|
1288
|
+
}
|
|
1289
|
+
if (ev.type === "content_block_delta" && ev.delta?.type === "text_delta") {
|
|
1290
|
+
const text = ev.delta.text ?? ""
|
|
1291
|
+
if (text) {
|
|
1292
|
+
fullText += text
|
|
1293
|
+
hasStreamed = true
|
|
1294
|
+
traceStore.updateOutput(reqId, fullText.length)
|
|
1295
|
+
checkOutputSize(fullText.length)
|
|
1296
|
+
sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text } })
|
|
1297
|
+
}
|
|
1298
|
+
}
|
|
1299
|
+
}
|
|
1300
|
+
traceStore.sdkEvent(reqId, sdkEventCount, message.type, subtype)
|
|
1301
|
+
}
|
|
1302
|
+
traceStore.phase(reqId, "sdk_done")
|
|
1303
|
+
if (conversationId && capturedSessionId2) {
|
|
1304
|
+
sessionStore.set(conversationId, capturedSessionId2, model, messages.length)
|
|
1305
|
+
logInfo("session.recreated_after_fallback_stream", { reqId, conversationId, sdkSessionId: capturedSessionId2 })
|
|
1306
|
+
}
|
|
1307
|
+
} else {
|
|
1308
|
+
throw resumeErr
|
|
1309
|
+
}
|
|
1310
|
+
} finally {
|
|
1311
|
+
clearInterval(stallLog)
|
|
1312
|
+
clearInterval(heartbeat)
|
|
1313
|
+
clearStallTimer(); clearHardTimer()
|
|
1314
|
+
// (temp files no longer used — images passed natively)
|
|
1315
|
+
releaseQueue()
|
|
1316
|
+
}
|
|
1317
|
+
|
|
1318
|
+
if (!hasStreamed) {
|
|
1319
|
+
sse("content_block_delta", { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "..." } })
|
|
1320
|
+
}
|
|
1321
|
+
|
|
1322
|
+
sse("content_block_stop", { type: "content_block_stop", index: 0 })
|
|
1323
|
+
sse("message_delta", { type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { output_tokens: roughTokens(fullText) } })
|
|
1324
|
+
sse("message_stop", { type: "message_stop" })
|
|
1325
|
+
controller.close()
|
|
1326
|
+
|
|
1327
|
+
traceStore.complete(reqId, { outputLen: fullText.length })
|
|
1328
|
+
|
|
1329
|
+
} catch (error) {
|
|
1330
|
+
clearStallTimer(); clearHardTimer()
|
|
1331
|
+
releaseQueue()
|
|
1332
|
+
const err = error instanceof Error ? error : new Error(String(error))
|
|
1333
|
+
const isAbort = err.name === "AbortError" || err.message?.includes("abort")
|
|
1334
|
+
const isQueueTimeout = err.message.includes("Queue timeout")
|
|
1335
|
+
|
|
1336
|
+
let errMsg: string
|
|
1337
|
+
let errType: string
|
|
1338
|
+
if (clientDisconnected) {
|
|
1339
|
+
errMsg = "Client disconnected during streaming."
|
|
1340
|
+
errType = "api_error"
|
|
1341
|
+
} else if (abortReason === "max_duration") {
|
|
1342
|
+
errMsg = `Request exceeded max duration of ${finalConfig.maxDurationMs / 1000}s. Output: ${trace?.outputLen ?? 0} chars.`
|
|
1343
|
+
errType = "api_error"
|
|
1344
|
+
} else if (abortReason === "max_output") {
|
|
1345
|
+
errMsg = `Request exceeded max output size of ${finalConfig.maxOutputChars} chars.`
|
|
1346
|
+
errType = "api_error"
|
|
1347
|
+
} else if (isAbort) {
|
|
1348
|
+
errMsg = `Request stalled — no SDK activity for ${finalConfig.stallTimeoutMs / 1000}s. Please retry.`
|
|
1349
|
+
errType = "api_error"
|
|
1350
|
+
} else if (isQueueTimeout) {
|
|
1351
|
+
errMsg = "Server busy — all request slots are occupied. Please retry shortly."
|
|
1352
|
+
errType = "overloaded_error"
|
|
1353
|
+
} else {
|
|
1354
|
+
errMsg = err.message
|
|
1355
|
+
errType = "api_error"
|
|
1356
|
+
}
|
|
1357
|
+
|
|
1358
|
+
// Trace the failure with full context
|
|
1359
|
+
traceStore.fail(reqId, err, "error", {
|
|
1360
|
+
clientDisconnect: clientDisconnected,
|
|
1361
|
+
abortReason,
|
|
1362
|
+
aborted: isAbort,
|
|
1363
|
+
queueTimeout: isQueueTimeout,
|
|
1364
|
+
stallTimeoutMs: finalConfig.stallTimeoutMs,
|
|
1365
|
+
maxDurationMs: finalConfig.maxDurationMs,
|
|
1366
|
+
maxOutputChars: finalConfig.maxOutputChars,
|
|
1367
|
+
sseSendErrors,
|
|
1368
|
+
})
|
|
1369
|
+
|
|
1370
|
+
// (temp files no longer used — images passed natively)
|
|
1371
|
+
if (!clientDisconnected) {
|
|
1372
|
+
try {
|
|
1373
|
+
sse("error", { type: "error", error: { type: errType, message: errMsg }, request_id: reqId })
|
|
1374
|
+
controller.close()
|
|
1375
|
+
} catch {}
|
|
1376
|
+
} else {
|
|
1377
|
+
try { controller.close() } catch {}
|
|
1378
|
+
}
|
|
1379
|
+
}
|
|
1380
|
+
}
|
|
1381
|
+
})
|
|
1382
|
+
|
|
1383
|
+
return new Response(readable, {
|
|
1384
|
+
headers: {
|
|
1385
|
+
"Content-Type": "text/event-stream",
|
|
1386
|
+
"Cache-Control": "no-cache",
|
|
1387
|
+
"Connection": "keep-alive"
|
|
1388
|
+
}
|
|
1389
|
+
})
|
|
1390
|
+
|
|
1391
|
+
} catch (error) {
|
|
1392
|
+
const err = error instanceof Error ? error : new Error(String(error))
|
|
1393
|
+
const isAbort = err.name === "AbortError" || err.message?.includes("abort")
|
|
1394
|
+
const isQueueTimeout = err.message.includes("Queue timeout")
|
|
1395
|
+
|
|
1396
|
+
let errMsg: string
|
|
1397
|
+
let errType: string
|
|
1398
|
+
if (clientDisconnected) {
|
|
1399
|
+
errMsg = "Client disconnected."
|
|
1400
|
+
errType = "api_error"
|
|
1401
|
+
} else if (abortReason === "max_duration") {
|
|
1402
|
+
errMsg = `Request exceeded max duration of ${finalConfig.maxDurationMs / 1000}s.`
|
|
1403
|
+
errType = "api_error"
|
|
1404
|
+
} else if (abortReason === "max_output") {
|
|
1405
|
+
errMsg = `Request exceeded max output size of ${finalConfig.maxOutputChars} chars.`
|
|
1406
|
+
errType = "api_error"
|
|
1407
|
+
} else if (isAbort) {
|
|
1408
|
+
errMsg = `Request stalled — no SDK activity for ${finalConfig.stallTimeoutMs / 1000}s. Please retry.`
|
|
1409
|
+
errType = "api_error"
|
|
1410
|
+
} else if (isQueueTimeout) {
|
|
1411
|
+
errMsg = "Server busy — all request slots are occupied. Please retry shortly."
|
|
1412
|
+
errType = "overloaded_error"
|
|
1413
|
+
} else {
|
|
1414
|
+
errMsg = err.message
|
|
1415
|
+
errType = "api_error"
|
|
1416
|
+
}
|
|
1417
|
+
|
|
1418
|
+
// Trace the failure
|
|
1419
|
+
if (trace) {
|
|
1420
|
+
traceStore.fail(reqId, err, "error", {
|
|
1421
|
+
clientDisconnect: clientDisconnected,
|
|
1422
|
+
aborted: isAbort,
|
|
1423
|
+
queueTimeout: isQueueTimeout,
|
|
1424
|
+
})
|
|
1425
|
+
} else {
|
|
1426
|
+
logError("request.error.no_trace", { reqId, error: errMsg, stack: err.stack })
|
|
1427
|
+
}
|
|
1428
|
+
|
|
1429
|
+
if (isQueueTimeout) {
|
|
1430
|
+
return new Response(JSON.stringify({ type: "error", error: { type: errType, message: errMsg }, request_id: reqId }), {
|
|
1431
|
+
status: 529, headers: { "Content-Type": "application/json" }
|
|
1432
|
+
})
|
|
1433
|
+
}
|
|
1434
|
+
if (isAbort) {
|
|
1435
|
+
return new Response(JSON.stringify({ type: "error", error: { type: errType, message: errMsg }, request_id: reqId }), {
|
|
1436
|
+
status: 504, headers: { "Content-Type": "application/json" }
|
|
1437
|
+
})
|
|
1438
|
+
}
|
|
1439
|
+
return c.json({ type: "error", error: { type: errType, message: errMsg }, request_id: reqId }, 500)
|
|
1440
|
+
}
|
|
1441
|
+
}
|
|
1442
|
+
|
|
1443
|
+
app.post("/v1/messages", handleMessages)
|
|
1444
|
+
app.post("/messages", handleMessages)
|
|
1445
|
+
|
|
1446
|
+
// Stub: batches API not supported
|
|
1447
|
+
const handleBatches = (c: Context) => c.json({
|
|
1448
|
+
type: "error",
|
|
1449
|
+
error: { type: "not_implemented_error", message: "Batches API is not supported by this proxy" }
|
|
1450
|
+
}, 501)
|
|
1451
|
+
app.post("/v1/messages/batches", handleBatches)
|
|
1452
|
+
app.get("/v1/messages/batches", handleBatches)
|
|
1453
|
+
app.get("/v1/messages/batches/:id", handleBatches)
|
|
1454
|
+
|
|
1455
|
+
// ── OpenAI-compatible /v1/chat/completions ─────────────────────────────
|
|
1456
|
+
|
|
1457
|
+
function convertOpenaiContent(content: any): any {
|
|
1458
|
+
if (typeof content === "string") return content
|
|
1459
|
+
if (!Array.isArray(content)) return String(content ?? "")
|
|
1460
|
+
|
|
1461
|
+
return content.map((part: any) => {
|
|
1462
|
+
if (part.type === "text") return { type: "text", text: part.text ?? "" }
|
|
1463
|
+
if (part.type === "image_url" && part.image_url?.url) {
|
|
1464
|
+
const url = part.image_url.url as string
|
|
1465
|
+
const dataMatch = url.match(/^data:(image\/\w+);base64,(.+)$/)
|
|
1466
|
+
if (dataMatch) {
|
|
1467
|
+
return {
|
|
1468
|
+
type: "image",
|
|
1469
|
+
source: {
|
|
1470
|
+
type: "base64",
|
|
1471
|
+
media_type: dataMatch[1]!,
|
|
1472
|
+
data: dataMatch[2]!
|
|
1473
|
+
}
|
|
1474
|
+
}
|
|
1475
|
+
}
|
|
1476
|
+
return {
|
|
1477
|
+
type: "image",
|
|
1478
|
+
source: { type: "url", url }
|
|
1479
|
+
}
|
|
1480
|
+
}
|
|
1481
|
+
return part
|
|
1482
|
+
})
|
|
1483
|
+
}
|
|
1484
|
+
|
|
1485
|
+
function openaiToAnthropicMessages(messages: any[]): { system?: string; messages: any[] } {
|
|
1486
|
+
let system: string | undefined
|
|
1487
|
+
const converted: any[] = []
|
|
1488
|
+
|
|
1489
|
+
for (const msg of messages) {
|
|
1490
|
+
if (msg.role === "system") {
|
|
1491
|
+
const sysText = typeof msg.content === "string" ? msg.content
|
|
1492
|
+
: Array.isArray(msg.content) ? msg.content.filter((p: any) => p.type === "text").map((p: any) => p.text ?? "").join("")
|
|
1493
|
+
: String(msg.content ?? "")
|
|
1494
|
+
system = (system ? system + "\n" : "") + sysText
|
|
1495
|
+
} else if (msg.role === "user") {
|
|
1496
|
+
converted.push({ role: "user", content: convertOpenaiContent(msg.content) })
|
|
1497
|
+
} else if (msg.role === "assistant") {
|
|
1498
|
+
if (msg.tool_calls?.length) {
|
|
1499
|
+
const content: any[] = []
|
|
1500
|
+
if (msg.content) content.push({ type: "text", text: msg.content })
|
|
1501
|
+
for (const tc of msg.tool_calls) {
|
|
1502
|
+
content.push({
|
|
1503
|
+
type: "tool_use",
|
|
1504
|
+
id: tc.id,
|
|
1505
|
+
name: tc.function?.name ?? "",
|
|
1506
|
+
input: tc.function?.arguments ? JSON.parse(tc.function.arguments) : {}
|
|
1507
|
+
})
|
|
1508
|
+
}
|
|
1509
|
+
converted.push({ role: "assistant", content })
|
|
1510
|
+
} else {
|
|
1511
|
+
converted.push({ role: "assistant", content: msg.content ?? "" })
|
|
1512
|
+
}
|
|
1513
|
+
} else if (msg.role === "tool") {
|
|
1514
|
+
converted.push({
|
|
1515
|
+
role: "user",
|
|
1516
|
+
content: [{
|
|
1517
|
+
type: "tool_result",
|
|
1518
|
+
tool_use_id: msg.tool_call_id,
|
|
1519
|
+
content: msg.content ?? ""
|
|
1520
|
+
}]
|
|
1521
|
+
})
|
|
1522
|
+
}
|
|
1523
|
+
}
|
|
1524
|
+
return { system, messages: converted }
|
|
1525
|
+
}
|
|
1526
|
+
|
|
1527
|
+
function openaiToAnthropicTools(tools: any[]): any[] {
|
|
1528
|
+
return tools
|
|
1529
|
+
.filter((t: any) => t.type === "function" && t.function)
|
|
1530
|
+
.map((t: any) => ({
|
|
1531
|
+
name: t.function.name,
|
|
1532
|
+
description: t.function.description ?? "",
|
|
1533
|
+
input_schema: t.function.parameters ?? { type: "object", properties: {} }
|
|
1534
|
+
}))
|
|
1535
|
+
}
|
|
1536
|
+
|
|
1537
|
+
function anthropicToOpenaiResponse(anthropicBody: any, model: string): any {
|
|
1538
|
+
const textBlocks = (anthropicBody.content ?? []).filter((b: any) => b.type === "text")
|
|
1539
|
+
const toolBlocks = (anthropicBody.content ?? []).filter((b: any) => b.type === "tool_use")
|
|
1540
|
+
|
|
1541
|
+
const text = textBlocks.map((b: any) => b.text).join("") || (toolBlocks.length > 0 ? null : "")
|
|
1542
|
+
|
|
1543
|
+
const message: any = { role: "assistant", content: text }
|
|
1544
|
+
|
|
1545
|
+
if (toolBlocks.length > 0) {
|
|
1546
|
+
message.tool_calls = toolBlocks.map((b: any, i: number) => ({
|
|
1547
|
+
id: b.id,
|
|
1548
|
+
type: "function",
|
|
1549
|
+
function: {
|
|
1550
|
+
name: b.name,
|
|
1551
|
+
arguments: JSON.stringify(b.input ?? {})
|
|
1552
|
+
}
|
|
1553
|
+
}))
|
|
1554
|
+
}
|
|
1555
|
+
|
|
1556
|
+
const finishReason = anthropicBody.stop_reason === "tool_use" ? "tool_calls"
|
|
1557
|
+
: anthropicBody.stop_reason === "max_tokens" ? "length"
|
|
1558
|
+
: "stop"
|
|
1559
|
+
|
|
1560
|
+
return {
|
|
1561
|
+
id: generateId("chatcmpl-"),
|
|
1562
|
+
object: "chat.completion",
|
|
1563
|
+
created: Math.floor(Date.now() / 1000),
|
|
1564
|
+
model,
|
|
1565
|
+
choices: [{
|
|
1566
|
+
index: 0,
|
|
1567
|
+
message,
|
|
1568
|
+
finish_reason: finishReason
|
|
1569
|
+
}],
|
|
1570
|
+
usage: {
|
|
1571
|
+
prompt_tokens: anthropicBody.usage?.input_tokens ?? 0,
|
|
1572
|
+
completion_tokens: anthropicBody.usage?.output_tokens ?? 0,
|
|
1573
|
+
total_tokens: (anthropicBody.usage?.input_tokens ?? 0) + (anthropicBody.usage?.output_tokens ?? 0)
|
|
1574
|
+
}
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
|
|
1578
|
+
const handleChatCompletions = async (c: Context) => {
|
|
1579
|
+
try {
|
|
1580
|
+
let body: any
|
|
1581
|
+
try {
|
|
1582
|
+
body = await c.req.json()
|
|
1583
|
+
} catch {
|
|
1584
|
+
return c.json({ error: { message: "Request body must be valid JSON", type: "invalid_request_error" } }, 400)
|
|
1585
|
+
}
|
|
1586
|
+
|
|
1587
|
+
if (!body.messages || !Array.isArray(body.messages) || body.messages.length === 0) {
|
|
1588
|
+
return c.json({ error: { message: "messages is required and must be a non-empty array", type: "invalid_request_error" } }, 400)
|
|
1589
|
+
}
|
|
1590
|
+
|
|
1591
|
+
const { system, messages } = openaiToAnthropicMessages(body.messages)
|
|
1592
|
+
const stream = body.stream ?? false
|
|
1593
|
+
const requestedModel = body.model ?? "claude-sonnet-4-6"
|
|
1594
|
+
|
|
1595
|
+
const anthropicBody: any = {
|
|
1596
|
+
model: requestedModel,
|
|
1597
|
+
messages,
|
|
1598
|
+
stream,
|
|
1599
|
+
}
|
|
1600
|
+
if (system) anthropicBody.system = system
|
|
1601
|
+
if (body.max_tokens || body.max_completion_tokens) {
|
|
1602
|
+
anthropicBody.max_tokens = body.max_tokens ?? body.max_completion_tokens
|
|
1603
|
+
}
|
|
1604
|
+
if (body.temperature !== undefined) anthropicBody.temperature = body.temperature
|
|
1605
|
+
if (body.top_p !== undefined) anthropicBody.top_p = body.top_p
|
|
1606
|
+
if (body.stop) anthropicBody.stop_sequences = Array.isArray(body.stop) ? body.stop : [body.stop]
|
|
1607
|
+
if (body.tools?.length) {
|
|
1608
|
+
anthropicBody.tools = openaiToAnthropicTools(body.tools)
|
|
1609
|
+
}
|
|
1610
|
+
|
|
1611
|
+
const internalHeaders: Record<string, string> = { "Content-Type": "application/json" }
|
|
1612
|
+
const authHeader = c.req.header("authorization") ?? c.req.header("x-api-key")
|
|
1613
|
+
if (authHeader) {
|
|
1614
|
+
if (c.req.header("authorization")) internalHeaders["authorization"] = authHeader
|
|
1615
|
+
else internalHeaders["x-api-key"] = authHeader
|
|
1616
|
+
}
|
|
1617
|
+
const internalRes = await app.fetch(new Request(`http://localhost/v1/messages`, {
|
|
1618
|
+
method: "POST",
|
|
1619
|
+
headers: internalHeaders,
|
|
1620
|
+
body: JSON.stringify(anthropicBody)
|
|
1621
|
+
}))
|
|
1622
|
+
|
|
1623
|
+
if (!stream) {
|
|
1624
|
+
const anthropicJson = await internalRes.json() as any
|
|
1625
|
+
if (anthropicJson.type === "error") {
|
|
1626
|
+
return c.json({ error: anthropicJson.error }, internalRes.status as any)
|
|
1627
|
+
}
|
|
1628
|
+
return c.json(anthropicToOpenaiResponse(anthropicJson, requestedModel))
|
|
1629
|
+
}
|
|
1630
|
+
|
|
1631
|
+
const includeUsage = body.stream_options?.include_usage === true
|
|
1632
|
+
const encoder = new TextEncoder()
|
|
1633
|
+
const readable = new ReadableStream({
|
|
1634
|
+
async start(controller) {
|
|
1635
|
+
try {
|
|
1636
|
+
const reader = internalRes.body?.getReader()
|
|
1637
|
+
if (!reader) { controller.close(); return }
|
|
1638
|
+
|
|
1639
|
+
const decoder = new TextDecoder()
|
|
1640
|
+
let buffer = ""
|
|
1641
|
+
const chatId = generateId("chatcmpl-")
|
|
1642
|
+
const created = Math.floor(Date.now() / 1000)
|
|
1643
|
+
let sentRole = false
|
|
1644
|
+
let finishReason: string | null = null
|
|
1645
|
+
const activeToolCalls: Map<number, { id: string; name: string }> = new Map()
|
|
1646
|
+
let toolCallIndex = 0
|
|
1647
|
+
let usageInfo: { input_tokens: number; output_tokens: number } | null = null
|
|
1648
|
+
|
|
1649
|
+
while (true) {
|
|
1650
|
+
const { done, value } = await reader.read()
|
|
1651
|
+
if (done) break
|
|
1652
|
+
buffer += decoder.decode(value, { stream: true })
|
|
1653
|
+
|
|
1654
|
+
const lines = buffer.split("\n")
|
|
1655
|
+
buffer = lines.pop() ?? ""
|
|
1656
|
+
|
|
1657
|
+
for (const line of lines) {
|
|
1658
|
+
if (!line.startsWith("data: ")) continue
|
|
1659
|
+
try {
|
|
1660
|
+
const event = JSON.parse(line.slice(6))
|
|
1661
|
+
|
|
1662
|
+
if (!sentRole && (event.type === "content_block_start" || event.type === "content_block_delta")) {
|
|
1663
|
+
sentRole = true
|
|
1664
|
+
controller.enqueue(encoder.encode(`data: ${JSON.stringify({
|
|
1665
|
+
id: chatId, object: "chat.completion.chunk", created, model: requestedModel,
|
|
1666
|
+
choices: [{ index: 0, delta: { role: "assistant" }, finish_reason: null }]
|
|
1667
|
+
})}\n\n`))
|
|
1668
|
+
}
|
|
1669
|
+
|
|
1670
|
+
if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
|
|
1671
|
+
const idx = toolCallIndex++
|
|
1672
|
+
activeToolCalls.set(event.index, { id: event.content_block.id, name: event.content_block.name })
|
|
1673
|
+
controller.enqueue(encoder.encode(`data: ${JSON.stringify({
|
|
1674
|
+
id: chatId, object: "chat.completion.chunk", created, model: requestedModel,
|
|
1675
|
+
choices: [{ index: 0, delta: {
|
|
1676
|
+
tool_calls: [{ index: idx, id: event.content_block.id, type: "function", function: { name: event.content_block.name, arguments: "" } }]
|
|
1677
|
+
}, finish_reason: null }]
|
|
1678
|
+
})}\n\n`))
|
|
1679
|
+
} else if (event.type === "content_block_delta" && event.delta?.type === "input_json_delta") {
|
|
1680
|
+
const tc = activeToolCalls.get(event.index)
|
|
1681
|
+
if (tc) {
|
|
1682
|
+
const idx = Array.from(activeToolCalls.keys()).indexOf(event.index)
|
|
1683
|
+
controller.enqueue(encoder.encode(`data: ${JSON.stringify({
|
|
1684
|
+
id: chatId, object: "chat.completion.chunk", created, model: requestedModel,
|
|
1685
|
+
choices: [{ index: 0, delta: {
|
|
1686
|
+
tool_calls: [{ index: idx, function: { arguments: event.delta.partial_json } }]
|
|
1687
|
+
}, finish_reason: null }]
|
|
1688
|
+
})}\n\n`))
|
|
1689
|
+
}
|
|
1690
|
+
} else if (event.type === "content_block_delta" && event.delta?.type === "text_delta") {
|
|
1691
|
+
controller.enqueue(encoder.encode(`data: ${JSON.stringify({
|
|
1692
|
+
id: chatId, object: "chat.completion.chunk", created, model: requestedModel,
|
|
1693
|
+
choices: [{ index: 0, delta: { content: event.delta.text }, finish_reason: null }]
|
|
1694
|
+
})}\n\n`))
|
|
1695
|
+
} else if (event.type === "message_delta") {
|
|
1696
|
+
const sr = event.delta?.stop_reason
|
|
1697
|
+
finishReason = sr === "tool_use" ? "tool_calls" : sr === "max_tokens" ? "length" : "stop"
|
|
1698
|
+
if (event.usage) {
|
|
1699
|
+
const prevInput: number = usageInfo?.input_tokens ?? 0
|
|
1700
|
+
const prevOutput: number = usageInfo?.output_tokens ?? 0
|
|
1701
|
+
usageInfo = {
|
|
1702
|
+
input_tokens: event.usage.input_tokens ?? prevInput,
|
|
1703
|
+
output_tokens: event.usage.output_tokens ?? prevOutput
|
|
1704
|
+
}
|
|
1705
|
+
}
|
|
1706
|
+
} else if (event.type === "message_start" && event.message?.usage) {
|
|
1707
|
+
usageInfo = { input_tokens: event.message.usage.input_tokens ?? 0, output_tokens: 0 }
|
|
1708
|
+
} else if (event.type === "message_stop") {
|
|
1709
|
+
const finalChunk: any = {
|
|
1710
|
+
id: chatId, object: "chat.completion.chunk", created, model: requestedModel,
|
|
1711
|
+
choices: [{ index: 0, delta: {}, finish_reason: finishReason ?? "stop" }]
|
|
1712
|
+
}
|
|
1713
|
+
if (includeUsage && usageInfo) {
|
|
1714
|
+
finalChunk.usage = {
|
|
1715
|
+
prompt_tokens: usageInfo.input_tokens,
|
|
1716
|
+
completion_tokens: usageInfo.output_tokens,
|
|
1717
|
+
total_tokens: usageInfo.input_tokens + usageInfo.output_tokens
|
|
1718
|
+
}
|
|
1719
|
+
}
|
|
1720
|
+
controller.enqueue(encoder.encode(`data: ${JSON.stringify(finalChunk)}\n\n`))
|
|
1721
|
+
controller.enqueue(encoder.encode("data: [DONE]\n\n"))
|
|
1722
|
+
}
|
|
1723
|
+
} catch {}
|
|
1724
|
+
}
|
|
1725
|
+
}
|
|
1726
|
+
controller.close()
|
|
1727
|
+
} catch {
|
|
1728
|
+
controller.close()
|
|
1729
|
+
}
|
|
1730
|
+
}
|
|
1731
|
+
})
|
|
1732
|
+
|
|
1733
|
+
return new Response(readable, {
|
|
1734
|
+
headers: {
|
|
1735
|
+
"Content-Type": "text/event-stream",
|
|
1736
|
+
"Cache-Control": "no-cache",
|
|
1737
|
+
"Connection": "keep-alive"
|
|
1738
|
+
}
|
|
1739
|
+
})
|
|
1740
|
+
} catch (error) {
|
|
1741
|
+
return c.json({
|
|
1742
|
+
error: { message: error instanceof Error ? error.message : "Unknown error", type: "server_error" }
|
|
1743
|
+
}, 500)
|
|
1744
|
+
}
|
|
1745
|
+
}
|
|
1746
|
+
|
|
1747
|
+
app.post("/v1/chat/completions", handleChatCompletions)
|
|
1748
|
+
app.post("/chat/completions", handleChatCompletions)
|
|
1749
|
+
|
|
1750
|
+
// OpenAI-format model listing
|
|
1751
|
+
const handleOpenaiModels = (c: Context) => c.json({
|
|
1752
|
+
object: "list",
|
|
1753
|
+
data: MODELS.map(m => ({
|
|
1754
|
+
id: m.id,
|
|
1755
|
+
object: "model",
|
|
1756
|
+
created: Math.floor(new Date(m.created_at).getTime() / 1000),
|
|
1757
|
+
owned_by: "anthropic"
|
|
1758
|
+
}))
|
|
1759
|
+
})
|
|
1760
|
+
app.get("/v1/chat/models", handleOpenaiModels)
|
|
1761
|
+
|
|
1762
|
+
// 404 catch-all
|
|
1763
|
+
app.all("*", (c) => c.json({
|
|
1764
|
+
type: "error",
|
|
1765
|
+
error: { type: "not_found_error", message: `${c.req.method} ${c.req.path} not found` }
|
|
1766
|
+
}, 404))
|
|
1767
|
+
|
|
1768
|
+
return { app, config: finalConfig }
|
|
1769
|
+
}
|
|
1770
|
+
|
|
1771
|
+
export async function startProxyServer(config: Partial<ProxyConfig> = {}) {
|
|
1772
|
+
const { app, config: finalConfig } = createProxyServer(config)
|
|
1773
|
+
|
|
1774
|
+
const server = Bun.serve({
|
|
1775
|
+
port: finalConfig.port,
|
|
1776
|
+
hostname: finalConfig.host,
|
|
1777
|
+
fetch: app.fetch,
|
|
1778
|
+
idleTimeout: 0
|
|
1779
|
+
})
|
|
1780
|
+
|
|
1781
|
+
// Startup log with full configuration
|
|
1782
|
+
logInfo("proxy.started", {
|
|
1783
|
+
version: PROXY_VERSION,
|
|
1784
|
+
host: finalConfig.host,
|
|
1785
|
+
port: finalConfig.port,
|
|
1786
|
+
stallTimeoutMs: finalConfig.stallTimeoutMs,
|
|
1787
|
+
maxDurationMs: finalConfig.maxDurationMs,
|
|
1788
|
+
maxOutputChars: finalConfig.maxOutputChars,
|
|
1789
|
+
maxConcurrent: MAX_CONCURRENT,
|
|
1790
|
+
queueTimeoutMs: QUEUE_TIMEOUT_MS,
|
|
1791
|
+
claudeExecutable,
|
|
1792
|
+
logDir: LOG_DIR,
|
|
1793
|
+
debug: finalConfig.debug,
|
|
1794
|
+
pid: process.pid,
|
|
1795
|
+
})
|
|
1796
|
+
|
|
1797
|
+
console.log(`Claude SDK Proxy v${PROXY_VERSION} running at http://${finalConfig.host}:${finalConfig.port}`)
|
|
1798
|
+
console.log(` Logs: ${LOG_DIR}`)
|
|
1799
|
+
console.log(` Debug: http://${finalConfig.host}:${finalConfig.port}/debug/stats`)
|
|
1800
|
+
|
|
1801
|
+
// Periodic health logging (every 5 minutes)
|
|
1802
|
+
const healthInterval = setInterval(() => {
|
|
1803
|
+
const mem = process.memoryUsage()
|
|
1804
|
+
const stats = traceStore.getStats()
|
|
1805
|
+
logInfo("proxy.health", {
|
|
1806
|
+
pid: process.pid,
|
|
1807
|
+
rssBytes: mem.rss,
|
|
1808
|
+
rssMB: +(mem.rss / 1024 / 1024).toFixed(1),
|
|
1809
|
+
heapUsedMB: +(mem.heapUsed / 1024 / 1024).toFixed(1),
|
|
1810
|
+
heapTotalMB: +(mem.heapTotal / 1024 / 1024).toFixed(1),
|
|
1811
|
+
externalMB: +(mem.external / 1024 / 1024).toFixed(1),
|
|
1812
|
+
uptimeMs: stats.uptimeMs,
|
|
1813
|
+
totalRequests: stats.requests.total,
|
|
1814
|
+
totalErrors: stats.requests.errors,
|
|
1815
|
+
activeRequests: stats.requests.active,
|
|
1816
|
+
queueActive: requestQueue.activeCount,
|
|
1817
|
+
queueWaiting: requestQueue.waitingCount,
|
|
1818
|
+
})
|
|
1819
|
+
}, 300_000) // 5 minutes
|
|
1820
|
+
|
|
1821
|
+
// Graceful shutdown
|
|
1822
|
+
const shutdown = (signal: string) => {
|
|
1823
|
+
const stats = traceStore.getStats()
|
|
1824
|
+
logInfo("proxy.shutdown", {
|
|
1825
|
+
signal,
|
|
1826
|
+
pid: process.pid,
|
|
1827
|
+
totalRequests: stats.requests.total,
|
|
1828
|
+
totalErrors: stats.requests.errors,
|
|
1829
|
+
activeRequests: stats.requests.active,
|
|
1830
|
+
uptimeMs: stats.uptimeMs,
|
|
1831
|
+
})
|
|
1832
|
+
clearInterval(healthInterval)
|
|
1833
|
+
console.log(`\nReceived ${signal}, shutting down...`)
|
|
1834
|
+
server.stop(true)
|
|
1835
|
+
process.exit(0)
|
|
1836
|
+
}
|
|
1837
|
+
process.on("SIGINT", () => shutdown("SIGINT"))
|
|
1838
|
+
process.on("SIGTERM", () => shutdown("SIGTERM"))
|
|
1839
|
+
|
|
1840
|
+
return server
|
|
1841
|
+
}
|