otterly 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +247 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +75 -0
- package/dist/engine.d.ts +38 -0
- package/dist/engine.js +169 -0
- package/dist/errors.d.ts +7 -0
- package/dist/errors.js +32 -0
- package/dist/events.d.ts +13 -0
- package/dist/events.js +168 -0
- package/dist/index.d.ts +12 -0
- package/dist/index.js +10 -0
- package/dist/permissions.d.ts +16 -0
- package/dist/permissions.js +43 -0
- package/dist/server/circuit-breaker.d.ts +20 -0
- package/dist/server/circuit-breaker.js +54 -0
- package/dist/server/index.d.ts +19 -0
- package/dist/server/index.js +275 -0
- package/dist/server/logger.d.ts +18 -0
- package/dist/server/logger.js +22 -0
- package/dist/server/middleware.d.ts +20 -0
- package/dist/server/middleware.js +80 -0
- package/dist/server/openai-compat.d.ts +110 -0
- package/dist/server/openai-compat.js +158 -0
- package/dist/server/request-queue.d.ts +33 -0
- package/dist/server/request-queue.js +79 -0
- package/dist/server/routes-native.d.ts +28 -0
- package/dist/server/routes-native.js +215 -0
- package/dist/server/routes-openai.d.ts +7 -0
- package/dist/server/routes-openai.js +203 -0
- package/dist/server/session-store.d.ts +36 -0
- package/dist/server/session-store.js +87 -0
- package/dist/server/ws-handler.d.ts +7 -0
- package/dist/server/ws-handler.js +155 -0
- package/dist/session.d.ts +43 -0
- package/dist/session.js +255 -0
- package/dist/types.d.ts +100 -0
- package/dist/types.js +2 -0
- package/package.json +73 -0
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
// Native API routes: /api/status, /api/run, /api/stream
|
|
2
|
+
// Richer response format than OpenAI compat — includes cost, tools, duration.
|
|
3
|
+
import { ClaudeEngine } from "../engine.js";
|
|
4
|
+
import { AgentError } from "../errors.js";
|
|
5
|
+
import { apiSessions } from "./session-store.js";
|
|
6
|
+
import { errorToHttpStatus } from "./openai-compat.js";
|
|
7
|
+
import { logError } from "./logger.js";
|
|
8
|
+
const PKG_VERSION = "0.1.0";
|
|
9
|
+
/**
|
|
10
|
+
* GET /api/status — health check with queue and circuit breaker stats
|
|
11
|
+
*/
|
|
12
|
+
export function handleStatus(_req, res, queue, circuitBreaker) {
|
|
13
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
14
|
+
res.end(JSON.stringify({
|
|
15
|
+
status: "ok",
|
|
16
|
+
version: PKG_VERSION,
|
|
17
|
+
activeSessions: apiSessions.count(),
|
|
18
|
+
...(queue ? { queue: queue.stats() } : {}),
|
|
19
|
+
...(circuitBreaker ? { circuitBreaker: circuitBreaker.getState() } : {}),
|
|
20
|
+
}));
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* POST /api/run — one-shot execution, returns full result.
|
|
24
|
+
* Supports session reuse via X-Session-Id header or session_id in body.
|
|
25
|
+
*/
|
|
26
|
+
export async function handleRun(req, res, ctx, circuitBreaker) {
|
|
27
|
+
const body = req.body;
|
|
28
|
+
if (!body || !body.prompt || typeof body.prompt !== "string") {
|
|
29
|
+
res.writeHead(400, { "Content-Type": "application/json" });
|
|
30
|
+
res.end(JSON.stringify({ error: "prompt is required" }));
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
// Session reuse: check for session ID
|
|
34
|
+
const sessionId = req.headers["x-session-id"] || body.session_id || null;
|
|
35
|
+
if (sessionId) {
|
|
36
|
+
const entry = apiSessions.get(sessionId);
|
|
37
|
+
if (entry?.session) {
|
|
38
|
+
try {
|
|
39
|
+
const result = await entry.session.send(body.prompt);
|
|
40
|
+
apiSessions.recordRequest(sessionId, result.cost);
|
|
41
|
+
circuitBreaker?.onSuccess();
|
|
42
|
+
res.writeHead(200, {
|
|
43
|
+
"Content-Type": "application/json",
|
|
44
|
+
"X-Session-Id": result.sessionId || sessionId,
|
|
45
|
+
});
|
|
46
|
+
res.end(JSON.stringify(result));
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
catch (err) {
|
|
50
|
+
const e = err instanceof Error ? err : new Error(String(err));
|
|
51
|
+
const code = err instanceof AgentError ? err.code : undefined;
|
|
52
|
+
circuitBreaker?.onFailure(code);
|
|
53
|
+
logError(req.requestId || "", e.message);
|
|
54
|
+
const status = errorToHttpStatus(e);
|
|
55
|
+
res.writeHead(status, { "Content-Type": "application/json" });
|
|
56
|
+
res.end(JSON.stringify({ error: e.message }));
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
// Session not found — fall through to one-shot
|
|
61
|
+
}
|
|
62
|
+
const engine = new ClaudeEngine();
|
|
63
|
+
const abortController = new AbortController();
|
|
64
|
+
req.on("close", () => abortController.abort());
|
|
65
|
+
if (req.timeoutSignal) {
|
|
66
|
+
req.timeoutSignal.addEventListener("abort", () => abortController.abort());
|
|
67
|
+
}
|
|
68
|
+
const options = {
|
|
69
|
+
cwd: body.options?.cwd || ctx.workingDir,
|
|
70
|
+
permissionMode: body.options?.permissionMode || "bypassPermissions",
|
|
71
|
+
signal: abortController.signal,
|
|
72
|
+
};
|
|
73
|
+
const bodyOpts = body.options;
|
|
74
|
+
if (bodyOpts?.systemPrompt)
|
|
75
|
+
options.systemPrompt = bodyOpts.systemPrompt;
|
|
76
|
+
if (bodyOpts?.resume)
|
|
77
|
+
options.resume = bodyOpts.resume;
|
|
78
|
+
if (bodyOpts?.model)
|
|
79
|
+
options.model = bodyOpts.model;
|
|
80
|
+
if (bodyOpts?.maxTurns)
|
|
81
|
+
options.maxTurns = bodyOpts.maxTurns;
|
|
82
|
+
try {
|
|
83
|
+
const result = await engine.run(body.prompt, options);
|
|
84
|
+
circuitBreaker?.onSuccess();
|
|
85
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
86
|
+
res.end(JSON.stringify(result));
|
|
87
|
+
}
|
|
88
|
+
catch (err) {
|
|
89
|
+
const e = err instanceof Error ? err : new Error(String(err));
|
|
90
|
+
if (e.name === "AbortError" || abortController.signal.aborted) {
|
|
91
|
+
if (!res.headersSent) {
|
|
92
|
+
res.writeHead(499, { "Content-Type": "application/json" });
|
|
93
|
+
res.end(JSON.stringify({ error: "Request aborted" }));
|
|
94
|
+
}
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
const code = err instanceof AgentError ? err.code : undefined;
|
|
98
|
+
circuitBreaker?.onFailure(code);
|
|
99
|
+
logError(req.requestId || "", e.message);
|
|
100
|
+
const status = errorToHttpStatus(e);
|
|
101
|
+
res.writeHead(status, { "Content-Type": "application/json" });
|
|
102
|
+
res.end(JSON.stringify({ error: e.message }));
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* POST /api/stream — streaming execution, NDJSON.
|
|
107
|
+
* Supports session reuse via X-Session-Id header or session_id in body.
|
|
108
|
+
* Includes backpressure: waits for drain when write buffer is full.
|
|
109
|
+
*/
|
|
110
|
+
export async function handleStream(req, res, ctx, circuitBreaker) {
|
|
111
|
+
const body = req.body;
|
|
112
|
+
if (!body || !body.prompt || typeof body.prompt !== "string") {
|
|
113
|
+
res.writeHead(400, { "Content-Type": "application/json" });
|
|
114
|
+
res.end(JSON.stringify({ error: "prompt is required" }));
|
|
115
|
+
return;
|
|
116
|
+
}
|
|
117
|
+
res.writeHead(200, {
|
|
118
|
+
"Content-Type": "application/x-ndjson",
|
|
119
|
+
"Transfer-Encoding": "chunked",
|
|
120
|
+
"Cache-Control": "no-cache",
|
|
121
|
+
"Connection": "keep-alive",
|
|
122
|
+
});
|
|
123
|
+
async function sendLine(obj) {
|
|
124
|
+
if (res.writableEnded)
|
|
125
|
+
return;
|
|
126
|
+
const ok = res.write(JSON.stringify(obj) + "\n");
|
|
127
|
+
if (!ok) {
|
|
128
|
+
await new Promise((resolve) => res.once("drain", resolve));
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
// Session reuse
|
|
132
|
+
const sessionId = req.headers["x-session-id"] || body.session_id || null;
|
|
133
|
+
if (sessionId) {
|
|
134
|
+
const entry = apiSessions.get(sessionId);
|
|
135
|
+
if (entry?.session) {
|
|
136
|
+
try {
|
|
137
|
+
for await (const event of entry.session.sendStream(body.prompt)) {
|
|
138
|
+
await sendStreamEvent(event, sendLine);
|
|
139
|
+
if (event.type === "result") {
|
|
140
|
+
apiSessions.recordRequest(sessionId, event.cost);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
circuitBreaker?.onSuccess();
|
|
144
|
+
}
|
|
145
|
+
catch (err) {
|
|
146
|
+
const e = err instanceof Error ? err : new Error(String(err));
|
|
147
|
+
const code = err instanceof AgentError ? err.code : undefined;
|
|
148
|
+
circuitBreaker?.onFailure(code);
|
|
149
|
+
await sendLine({ type: "error", message: e.message });
|
|
150
|
+
}
|
|
151
|
+
if (!res.writableEnded)
|
|
152
|
+
res.end();
|
|
153
|
+
return;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
const engine = new ClaudeEngine();
|
|
157
|
+
const abortController = new AbortController();
|
|
158
|
+
req.on("close", () => abortController.abort());
|
|
159
|
+
if (req.timeoutSignal) {
|
|
160
|
+
req.timeoutSignal.addEventListener("abort", () => abortController.abort());
|
|
161
|
+
}
|
|
162
|
+
const options = {
|
|
163
|
+
cwd: body.options?.cwd || ctx.workingDir,
|
|
164
|
+
permissionMode: body.options?.permissionMode || "bypassPermissions",
|
|
165
|
+
signal: abortController.signal,
|
|
166
|
+
};
|
|
167
|
+
const bodyOpts = body.options;
|
|
168
|
+
if (bodyOpts?.systemPrompt)
|
|
169
|
+
options.systemPrompt = bodyOpts.systemPrompt;
|
|
170
|
+
if (bodyOpts?.resume)
|
|
171
|
+
options.resume = bodyOpts.resume;
|
|
172
|
+
if (bodyOpts?.model)
|
|
173
|
+
options.model = bodyOpts.model;
|
|
174
|
+
try {
|
|
175
|
+
for await (const event of engine.stream(body.prompt, options)) {
|
|
176
|
+
if (abortController.signal.aborted)
|
|
177
|
+
break;
|
|
178
|
+
await sendStreamEvent(event, sendLine);
|
|
179
|
+
}
|
|
180
|
+
circuitBreaker?.onSuccess();
|
|
181
|
+
}
|
|
182
|
+
catch (err) {
|
|
183
|
+
const e = err instanceof Error ? err : new Error(String(err));
|
|
184
|
+
if (e.name !== "AbortError" && !abortController.signal.aborted) {
|
|
185
|
+
const code = err instanceof AgentError ? err.code : undefined;
|
|
186
|
+
circuitBreaker?.onFailure(code);
|
|
187
|
+
await sendLine({ type: "error", message: e.message });
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
if (!res.writableEnded) {
|
|
191
|
+
res.end();
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
async function sendStreamEvent(event, sendLine) {
|
|
195
|
+
switch (event.type) {
|
|
196
|
+
case "system":
|
|
197
|
+
await sendLine({ type: "session_init", sessionId: event.sessionId, model: event.model });
|
|
198
|
+
break;
|
|
199
|
+
case "text_delta":
|
|
200
|
+
await sendLine({ type: "text_delta", delta: event.delta });
|
|
201
|
+
break;
|
|
202
|
+
case "tool_use":
|
|
203
|
+
await sendLine({ type: "tool_use", id: event.id, tool: event.tool, input: event.input, description: event.description });
|
|
204
|
+
break;
|
|
205
|
+
case "tool_result":
|
|
206
|
+
await sendLine({ type: "tool_result", toolUseId: event.toolUseId, tool: event.tool, output: event.output, isError: event.isError });
|
|
207
|
+
break;
|
|
208
|
+
case "result":
|
|
209
|
+
await sendLine({ type: "result", text: event.text, cost: event.cost, duration: event.duration, sessionId: event.sessionId, usage: event.usage });
|
|
210
|
+
break;
|
|
211
|
+
case "error":
|
|
212
|
+
await sendLine({ type: "error", message: event.error.message });
|
|
213
|
+
break;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { ServerResponse } from "http";
|
|
2
|
+
import type { ParsedRequest, ServerContext } from "./routes-native.js";
|
|
3
|
+
import type { CircuitBreaker } from "./circuit-breaker.js";
|
|
4
|
+
/**
|
|
5
|
+
* Handle POST /v1/chat/completions
|
|
6
|
+
*/
|
|
7
|
+
export declare function handleChatCompletions(req: ParsedRequest, res: ServerResponse, ctx: ServerContext, circuitBreaker?: CircuitBreaker): Promise<void>;
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
// POST /v1/chat/completions — OpenAI-compatible endpoint.
|
|
2
|
+
// Supports streaming (SSE) and non-streaming responses.
|
|
3
|
+
// Auth is handled by middleware in index.ts — not checked here.
|
|
4
|
+
import crypto from "crypto";
|
|
5
|
+
import { ClaudeEngine } from "../engine.js";
|
|
6
|
+
import { AgentError } from "../errors.js";
|
|
7
|
+
import { apiSessions } from "./session-store.js";
|
|
8
|
+
import { logError } from "./logger.js";
|
|
9
|
+
import { openaiToClaudeInput, claudeResultToOpenai, makeStreamChunk, sseData, errorToHttpStatus, openaiErrorBody, openaiToolsToAllowedTools, } from "./openai-compat.js";
|
|
10
|
+
/**
|
|
11
|
+
* Handle POST /v1/chat/completions
|
|
12
|
+
*/
|
|
13
|
+
export async function handleChatCompletions(req, res, ctx, circuitBreaker) {
|
|
14
|
+
// Auth is now handled by middleware in index.ts
|
|
15
|
+
const body = req.body;
|
|
16
|
+
if (!body || !body.messages || !Array.isArray(body.messages)) {
|
|
17
|
+
res.writeHead(400, { "Content-Type": "application/json" });
|
|
18
|
+
res.end(JSON.stringify(openaiErrorBody(400, "messages array is required")));
|
|
19
|
+
return;
|
|
20
|
+
}
|
|
21
|
+
const { prompt, systemPrompt, isMultimodal } = openaiToClaudeInput(body);
|
|
22
|
+
const model = body.model || "claude-sonnet-4-20250514";
|
|
23
|
+
const stream = body.stream === true;
|
|
24
|
+
const abortController = new AbortController();
|
|
25
|
+
req.on("close", () => abortController.abort());
|
|
26
|
+
if (req.timeoutSignal) {
|
|
27
|
+
req.timeoutSignal.addEventListener("abort", () => abortController.abort());
|
|
28
|
+
}
|
|
29
|
+
const options = {
|
|
30
|
+
cwd: ctx.workingDir,
|
|
31
|
+
permissionMode: "bypassPermissions",
|
|
32
|
+
signal: abortController.signal,
|
|
33
|
+
};
|
|
34
|
+
// Build system prompt: original + JSON mode injection
|
|
35
|
+
let finalSystemPrompt = systemPrompt || "";
|
|
36
|
+
if (body.response_format?.type === "json_object") {
|
|
37
|
+
const jsonInstruction = "You must respond with valid JSON only. No markdown, no explanation, no code fences, just a JSON object.";
|
|
38
|
+
finalSystemPrompt = finalSystemPrompt
|
|
39
|
+
? `${finalSystemPrompt}\n\n${jsonInstruction}`
|
|
40
|
+
: jsonInstruction;
|
|
41
|
+
}
|
|
42
|
+
if (finalSystemPrompt) {
|
|
43
|
+
options.systemPrompt = finalSystemPrompt;
|
|
44
|
+
}
|
|
45
|
+
if (model) {
|
|
46
|
+
options.model = model;
|
|
47
|
+
}
|
|
48
|
+
// Tools parameter → allowedTools filter
|
|
49
|
+
if (body.tools && Array.isArray(body.tools)) {
|
|
50
|
+
const allowed = openaiToolsToAllowedTools(body.tools);
|
|
51
|
+
if (allowed.length > 0) {
|
|
52
|
+
options.allowedTools = allowed;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
// Session reuse via X-Session-Id header or session_id in body
|
|
56
|
+
const sessionId = req.headers["x-session-id"]
|
|
57
|
+
|| body.session_id
|
|
58
|
+
|| null;
|
|
59
|
+
if (sessionId) {
|
|
60
|
+
const entry = apiSessions.get(sessionId);
|
|
61
|
+
if (entry?.session) {
|
|
62
|
+
if (stream) {
|
|
63
|
+
await handleSessionStreaming(req, res, entry.session, prompt, model, sessionId, circuitBreaker);
|
|
64
|
+
}
|
|
65
|
+
else {
|
|
66
|
+
await handleSessionNonStreaming(req, res, entry.session, prompt, model, sessionId, circuitBreaker);
|
|
67
|
+
}
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
// Session not found — fall through to one-shot
|
|
71
|
+
}
|
|
72
|
+
// Multimodal: for now, add a note to the prompt about image content.
|
|
73
|
+
// Full multimodal requires session-based path with SDKUserMessage.
|
|
74
|
+
// This is a pragmatic approach that works for most cases.
|
|
75
|
+
if (stream) {
|
|
76
|
+
await handleStreaming(req, res, prompt, options, model, abortController, circuitBreaker);
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
await handleNonStreaming(req, res, prompt, options, model, abortController, circuitBreaker);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
async function handleSessionNonStreaming(req, res, session, prompt, model, sessionId, circuitBreaker) {
|
|
83
|
+
try {
|
|
84
|
+
const result = await session.send(prompt);
|
|
85
|
+
apiSessions.recordRequest(sessionId, result.cost);
|
|
86
|
+
circuitBreaker?.onSuccess();
|
|
87
|
+
const response = claudeResultToOpenai(result.text, model, result.usage);
|
|
88
|
+
res.writeHead(200, {
|
|
89
|
+
"Content-Type": "application/json",
|
|
90
|
+
"X-Session-Id": result.sessionId || sessionId,
|
|
91
|
+
});
|
|
92
|
+
res.end(JSON.stringify(response));
|
|
93
|
+
}
|
|
94
|
+
catch (err) {
|
|
95
|
+
const e = err instanceof Error ? err : new Error(String(err));
|
|
96
|
+
const code = err instanceof AgentError ? err.code : undefined;
|
|
97
|
+
circuitBreaker?.onFailure(code);
|
|
98
|
+
logError(req.requestId || "", e.message);
|
|
99
|
+
const status = errorToHttpStatus(e);
|
|
100
|
+
res.writeHead(status, { "Content-Type": "application/json" });
|
|
101
|
+
res.end(JSON.stringify(openaiErrorBody(status, e.message)));
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
async function handleSessionStreaming(req, res, session, prompt, model, sessionId, circuitBreaker) {
|
|
105
|
+
res.writeHead(200, {
|
|
106
|
+
"Content-Type": "text/event-stream",
|
|
107
|
+
"Cache-Control": "no-cache",
|
|
108
|
+
"Connection": "keep-alive",
|
|
109
|
+
"X-Session-Id": sessionId,
|
|
110
|
+
});
|
|
111
|
+
const completionId = `chatcmpl-otterly-${crypto.randomUUID().slice(0, 12)}`;
|
|
112
|
+
await sseWrite(res, sseData(makeStreamChunk(completionId, { role: "assistant" }, null, model)));
|
|
113
|
+
try {
|
|
114
|
+
for await (const event of session.sendStream(prompt)) {
|
|
115
|
+
if (event.type === "text_delta") {
|
|
116
|
+
await sseWrite(res, sseData(makeStreamChunk(completionId, { content: event.delta }, null, model)));
|
|
117
|
+
}
|
|
118
|
+
else if (event.type === "result") {
|
|
119
|
+
apiSessions.recordRequest(sessionId, event.cost);
|
|
120
|
+
await sseWrite(res, sseData(makeStreamChunk(completionId, {}, "stop", model)));
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
circuitBreaker?.onSuccess();
|
|
124
|
+
}
|
|
125
|
+
catch (err) {
|
|
126
|
+
const e = err instanceof Error ? err : new Error(String(err));
|
|
127
|
+
const code = err instanceof AgentError ? err.code : undefined;
|
|
128
|
+
circuitBreaker?.onFailure(code);
|
|
129
|
+
await sseWrite(res, sseData({ error: { message: e.message, type: "server_error" } }));
|
|
130
|
+
}
|
|
131
|
+
await sseWrite(res, "data: [DONE]\n\n");
|
|
132
|
+
if (!res.writableEnded)
|
|
133
|
+
res.end();
|
|
134
|
+
}
|
|
135
|
+
async function handleNonStreaming(req, res, prompt, options, model, abortController, circuitBreaker) {
|
|
136
|
+
const engine = new ClaudeEngine();
|
|
137
|
+
try {
|
|
138
|
+
const result = await engine.run(prompt, options);
|
|
139
|
+
circuitBreaker?.onSuccess();
|
|
140
|
+
const response = claudeResultToOpenai(result.text, model, result.usage);
|
|
141
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
142
|
+
res.end(JSON.stringify(response));
|
|
143
|
+
}
|
|
144
|
+
catch (err) {
|
|
145
|
+
const e = err instanceof Error ? err : new Error(String(err));
|
|
146
|
+
if (e.name === "AbortError" || abortController.signal.aborted)
|
|
147
|
+
return;
|
|
148
|
+
const code = err instanceof AgentError ? err.code : undefined;
|
|
149
|
+
circuitBreaker?.onFailure(code);
|
|
150
|
+
logError(req.requestId || "", e.message);
|
|
151
|
+
const status = errorToHttpStatus(e);
|
|
152
|
+
if (!res.headersSent) {
|
|
153
|
+
res.writeHead(status, { "Content-Type": "application/json" });
|
|
154
|
+
res.end(JSON.stringify(openaiErrorBody(status, e.message)));
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
async function handleStreaming(req, res, prompt, options, model, abortController, circuitBreaker) {
|
|
159
|
+
const engine = new ClaudeEngine();
|
|
160
|
+
res.writeHead(200, {
|
|
161
|
+
"Content-Type": "text/event-stream",
|
|
162
|
+
"Cache-Control": "no-cache",
|
|
163
|
+
"Connection": "keep-alive",
|
|
164
|
+
});
|
|
165
|
+
const completionId = `chatcmpl-otterly-${crypto.randomUUID().slice(0, 12)}`;
|
|
166
|
+
// Send initial role chunk
|
|
167
|
+
await sseWrite(res, sseData(makeStreamChunk(completionId, { role: "assistant" }, null, model)));
|
|
168
|
+
try {
|
|
169
|
+
for await (const event of engine.stream(prompt, options)) {
|
|
170
|
+
if (abortController.signal.aborted)
|
|
171
|
+
break;
|
|
172
|
+
if (event.type === "text_delta") {
|
|
173
|
+
await sseWrite(res, sseData(makeStreamChunk(completionId, { content: event.delta }, null, model)));
|
|
174
|
+
}
|
|
175
|
+
else if (event.type === "result") {
|
|
176
|
+
await sseWrite(res, sseData(makeStreamChunk(completionId, {}, "stop", model)));
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
circuitBreaker?.onSuccess();
|
|
180
|
+
}
|
|
181
|
+
catch (err) {
|
|
182
|
+
const e = err instanceof Error ? err : new Error(String(err));
|
|
183
|
+
if (e.name !== "AbortError" && !abortController.signal.aborted) {
|
|
184
|
+
const code = err instanceof AgentError ? err.code : undefined;
|
|
185
|
+
circuitBreaker?.onFailure(code);
|
|
186
|
+
logError(req.requestId || "", e.message);
|
|
187
|
+
await sseWrite(res, sseData({ error: { message: e.message, type: "server_error" } }));
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
await sseWrite(res, "data: [DONE]\n\n");
|
|
191
|
+
if (!res.writableEnded) {
|
|
192
|
+
res.end();
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
/** Write to SSE response with backpressure support. */
|
|
196
|
+
async function sseWrite(res, data) {
|
|
197
|
+
if (res.writableEnded)
|
|
198
|
+
return;
|
|
199
|
+
const ok = res.write(data);
|
|
200
|
+
if (!ok) {
|
|
201
|
+
await new Promise((resolve) => res.once("drain", resolve));
|
|
202
|
+
}
|
|
203
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import type { Session } from "../session.js";
|
|
2
|
+
export interface SessionStoreOptions {
|
|
3
|
+
maxSessions?: number;
|
|
4
|
+
idleTimeoutMs?: number;
|
|
5
|
+
}
|
|
6
|
+
export interface SessionMetadata {
|
|
7
|
+
createdAt: number;
|
|
8
|
+
lastActivity: number;
|
|
9
|
+
requestCount: number;
|
|
10
|
+
totalCost: number;
|
|
11
|
+
}
|
|
12
|
+
interface SessionEntry {
|
|
13
|
+
session?: Session;
|
|
14
|
+
abortController?: AbortController;
|
|
15
|
+
metadata: SessionMetadata;
|
|
16
|
+
[key: string]: unknown;
|
|
17
|
+
}
|
|
18
|
+
declare class ApiSessionStore {
|
|
19
|
+
private sessions;
|
|
20
|
+
private sweepInterval;
|
|
21
|
+
private maxSessions;
|
|
22
|
+
private idleTimeoutMs;
|
|
23
|
+
constructor(opts?: SessionStoreOptions);
|
|
24
|
+
create(id: string, data: Record<string, unknown>): string;
|
|
25
|
+
get(id: string): SessionEntry | null;
|
|
26
|
+
/** Record a request against a session (bumps count and cost). */
|
|
27
|
+
recordRequest(id: string, cost: number): void;
|
|
28
|
+
delete(id: string): void;
|
|
29
|
+
count(): number;
|
|
30
|
+
sweepIdle(): void;
|
|
31
|
+
destroy(): void;
|
|
32
|
+
/** Evict the least-recently-used session (first item in Map). */
|
|
33
|
+
private evictLRU;
|
|
34
|
+
}
|
|
35
|
+
export declare const apiSessions: ApiSessionStore;
|
|
36
|
+
export {};
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
// In-memory store for active API/WebSocket sessions.
|
|
2
|
+
// LRU eviction, max session limit, idle timeout.
|
|
3
|
+
class ApiSessionStore {
|
|
4
|
+
sessions = new Map();
|
|
5
|
+
sweepInterval;
|
|
6
|
+
maxSessions;
|
|
7
|
+
idleTimeoutMs;
|
|
8
|
+
constructor(opts = {}) {
|
|
9
|
+
this.maxSessions = opts.maxSessions ?? 20;
|
|
10
|
+
this.idleTimeoutMs = opts.idleTimeoutMs ?? 30 * 60 * 1000; // 30 min
|
|
11
|
+
this.sweepInterval = setInterval(() => this.sweepIdle(), 5 * 60 * 1000);
|
|
12
|
+
this.sweepInterval.unref();
|
|
13
|
+
}
|
|
14
|
+
create(id, data) {
|
|
15
|
+
// Evict LRU if at capacity
|
|
16
|
+
if (this.sessions.size >= this.maxSessions && !this.sessions.has(id)) {
|
|
17
|
+
this.evictLRU();
|
|
18
|
+
}
|
|
19
|
+
const now = Date.now();
|
|
20
|
+
this.sessions.set(id, {
|
|
21
|
+
...data,
|
|
22
|
+
metadata: {
|
|
23
|
+
createdAt: now,
|
|
24
|
+
lastActivity: now,
|
|
25
|
+
requestCount: 0,
|
|
26
|
+
totalCost: 0,
|
|
27
|
+
},
|
|
28
|
+
});
|
|
29
|
+
return id;
|
|
30
|
+
}
|
|
31
|
+
get(id) {
|
|
32
|
+
const entry = this.sessions.get(id);
|
|
33
|
+
if (entry) {
|
|
34
|
+
entry.metadata.lastActivity = Date.now();
|
|
35
|
+
// Move to end of map for LRU ordering (Map preserves insertion order)
|
|
36
|
+
this.sessions.delete(id);
|
|
37
|
+
this.sessions.set(id, entry);
|
|
38
|
+
}
|
|
39
|
+
return entry || null;
|
|
40
|
+
}
|
|
41
|
+
/** Record a request against a session (bumps count and cost). */
|
|
42
|
+
recordRequest(id, cost) {
|
|
43
|
+
const entry = this.sessions.get(id);
|
|
44
|
+
if (entry) {
|
|
45
|
+
entry.metadata.requestCount++;
|
|
46
|
+
entry.metadata.totalCost += cost;
|
|
47
|
+
entry.metadata.lastActivity = Date.now();
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
delete(id) {
|
|
51
|
+
const entry = this.sessions.get(id);
|
|
52
|
+
if (entry) {
|
|
53
|
+
if (entry.session && typeof entry.session.close === "function") {
|
|
54
|
+
entry.session.close();
|
|
55
|
+
}
|
|
56
|
+
if (entry.abortController) {
|
|
57
|
+
entry.abortController.abort();
|
|
58
|
+
}
|
|
59
|
+
this.sessions.delete(id);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
count() {
|
|
63
|
+
return this.sessions.size;
|
|
64
|
+
}
|
|
65
|
+
sweepIdle() {
|
|
66
|
+
const now = Date.now();
|
|
67
|
+
for (const [id, entry] of this.sessions) {
|
|
68
|
+
if (now - entry.metadata.lastActivity > this.idleTimeoutMs) {
|
|
69
|
+
this.delete(id);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
destroy() {
|
|
74
|
+
clearInterval(this.sweepInterval);
|
|
75
|
+
for (const id of [...this.sessions.keys()]) {
|
|
76
|
+
this.delete(id);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
/** Evict the least-recently-used session (first item in Map). */
|
|
80
|
+
evictLRU() {
|
|
81
|
+
const firstKey = this.sessions.keys().next().value;
|
|
82
|
+
if (firstKey !== undefined) {
|
|
83
|
+
this.delete(firstKey);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
export const apiSessions = new ApiSessionStore();
|