otterly 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +247 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +75 -0
- package/dist/engine.d.ts +38 -0
- package/dist/engine.js +169 -0
- package/dist/errors.d.ts +7 -0
- package/dist/errors.js +32 -0
- package/dist/events.d.ts +13 -0
- package/dist/events.js +168 -0
- package/dist/index.d.ts +12 -0
- package/dist/index.js +10 -0
- package/dist/permissions.d.ts +16 -0
- package/dist/permissions.js +43 -0
- package/dist/server/circuit-breaker.d.ts +20 -0
- package/dist/server/circuit-breaker.js +54 -0
- package/dist/server/index.d.ts +19 -0
- package/dist/server/index.js +275 -0
- package/dist/server/logger.d.ts +18 -0
- package/dist/server/logger.js +22 -0
- package/dist/server/middleware.d.ts +20 -0
- package/dist/server/middleware.js +80 -0
- package/dist/server/openai-compat.d.ts +110 -0
- package/dist/server/openai-compat.js +158 -0
- package/dist/server/request-queue.d.ts +33 -0
- package/dist/server/request-queue.js +79 -0
- package/dist/server/routes-native.d.ts +28 -0
- package/dist/server/routes-native.js +215 -0
- package/dist/server/routes-openai.d.ts +7 -0
- package/dist/server/routes-openai.js +203 -0
- package/dist/server/session-store.d.ts +36 -0
- package/dist/server/session-store.js +87 -0
- package/dist/server/ws-handler.d.ts +7 -0
- package/dist/server/ws-handler.js +155 -0
- package/dist/session.d.ts +43 -0
- package/dist/session.js +255 -0
- package/dist/types.d.ts +100 -0
- package/dist/types.js +2 -0
- package/package.json +73 -0
package/dist/events.js
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
export function createEventContext() {
|
|
2
|
+
return {
|
|
3
|
+
sessionId: null,
|
|
4
|
+
toolNames: new Map(),
|
|
5
|
+
accumulatedText: "",
|
|
6
|
+
};
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Normalize a raw SDK message into clean AgentEvent(s).
|
|
10
|
+
* Returns an array because one SDK message can contain multiple content blocks.
|
|
11
|
+
*/
|
|
12
|
+
export function normalizeEvents(raw, ctx) {
|
|
13
|
+
const type = raw.type;
|
|
14
|
+
const events = [];
|
|
15
|
+
switch (type) {
|
|
16
|
+
case "system": {
|
|
17
|
+
if (raw.subtype === "init") {
|
|
18
|
+
const sessionId = raw.session_id;
|
|
19
|
+
ctx.sessionId = sessionId;
|
|
20
|
+
events.push({
|
|
21
|
+
type: "system",
|
|
22
|
+
sessionId,
|
|
23
|
+
model: raw.model || "",
|
|
24
|
+
cwd: raw.cwd || "",
|
|
25
|
+
tools: raw.tools || [],
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
break;
|
|
29
|
+
}
|
|
30
|
+
case "assistant": {
|
|
31
|
+
const message = raw.message;
|
|
32
|
+
const content = (message?.content || []);
|
|
33
|
+
for (const block of content) {
|
|
34
|
+
if (block.type === "text") {
|
|
35
|
+
events.push({
|
|
36
|
+
type: "text",
|
|
37
|
+
text: block.text,
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
else if (block.type === "tool_use") {
|
|
41
|
+
const id = block.id;
|
|
42
|
+
const name = block.name;
|
|
43
|
+
const input = (block.input || {});
|
|
44
|
+
ctx.toolNames.set(id, name);
|
|
45
|
+
events.push({
|
|
46
|
+
type: "tool_use",
|
|
47
|
+
id,
|
|
48
|
+
tool: name,
|
|
49
|
+
input,
|
|
50
|
+
description: describeToolUse(name, input),
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
break;
|
|
55
|
+
}
|
|
56
|
+
case "user": {
|
|
57
|
+
if (raw.tool_use_result !== undefined) {
|
|
58
|
+
const message = raw.message;
|
|
59
|
+
const content = (message?.content || []);
|
|
60
|
+
for (const block of content) {
|
|
61
|
+
if (block.type === "tool_result") {
|
|
62
|
+
const toolUseId = block.tool_use_id;
|
|
63
|
+
events.push({
|
|
64
|
+
type: "tool_result",
|
|
65
|
+
toolUseId,
|
|
66
|
+
tool: ctx.toolNames.get(toolUseId) || "unknown",
|
|
67
|
+
output: extractToolResultText(block.content),
|
|
68
|
+
isError: block.is_error || false,
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
break;
|
|
74
|
+
}
|
|
75
|
+
case "stream_event": {
|
|
76
|
+
const event = raw.event;
|
|
77
|
+
if (!event)
|
|
78
|
+
break;
|
|
79
|
+
if (event.type === "content_block_delta") {
|
|
80
|
+
const delta = event.delta;
|
|
81
|
+
if (delta?.type === "text_delta") {
|
|
82
|
+
const text = delta.text;
|
|
83
|
+
ctx.accumulatedText += text;
|
|
84
|
+
events.push({
|
|
85
|
+
type: "text_delta",
|
|
86
|
+
delta: text,
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
break;
|
|
91
|
+
}
|
|
92
|
+
case "result": {
|
|
93
|
+
if (raw.subtype === "success") {
|
|
94
|
+
events.push({
|
|
95
|
+
type: "result",
|
|
96
|
+
text: raw.result || "",
|
|
97
|
+
cost: raw.total_cost_usd || 0,
|
|
98
|
+
duration: raw.duration_ms || 0,
|
|
99
|
+
sessionId: ctx.sessionId || "",
|
|
100
|
+
usage: raw.usage || {
|
|
101
|
+
input_tokens: 0,
|
|
102
|
+
output_tokens: 0,
|
|
103
|
+
},
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
else {
|
|
107
|
+
const errors = raw.errors;
|
|
108
|
+
const msg = errors ? errors.join("\n") : `Stopped: ${raw.subtype}`;
|
|
109
|
+
events.push({
|
|
110
|
+
type: "error",
|
|
111
|
+
error: new Error(msg),
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
return events;
|
|
118
|
+
}
|
|
119
|
+
function truncate(str, max) {
|
|
120
|
+
return str.length > max ? str.slice(0, max) + "..." : str;
|
|
121
|
+
}
|
|
122
|
+
export function describeToolUse(name, input) {
|
|
123
|
+
switch (name) {
|
|
124
|
+
case "Read":
|
|
125
|
+
return `Reading file: ${input.file_path || "unknown"}`;
|
|
126
|
+
case "Write":
|
|
127
|
+
return `Writing file: ${input.file_path || "unknown"}`;
|
|
128
|
+
case "Edit":
|
|
129
|
+
return `Editing file: ${input.file_path || "unknown"}`;
|
|
130
|
+
case "MultiEdit":
|
|
131
|
+
return `Editing file: ${input.file_path || "unknown"}`;
|
|
132
|
+
case "Bash":
|
|
133
|
+
return `Running command: ${truncate(String(input.command || ""), 80)}`;
|
|
134
|
+
case "Glob":
|
|
135
|
+
return `Searching for files: ${input.pattern || ""}`;
|
|
136
|
+
case "Grep":
|
|
137
|
+
return `Searching content: ${truncate(String(input.pattern || ""), 60)}`;
|
|
138
|
+
case "WebFetch":
|
|
139
|
+
return `Fetching: ${input.url || ""}`;
|
|
140
|
+
case "WebSearch":
|
|
141
|
+
return `Searching: ${input.query || ""}`;
|
|
142
|
+
case "Task":
|
|
143
|
+
return `Running sub-task: ${input.description || ""}`;
|
|
144
|
+
case "NotebookEdit":
|
|
145
|
+
return `Editing notebook: ${input.notebook_path || "unknown"}`;
|
|
146
|
+
default:
|
|
147
|
+
return `Using tool: ${name}`;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
function extractToolResultText(content) {
|
|
151
|
+
if (typeof content === "string")
|
|
152
|
+
return content;
|
|
153
|
+
if (Array.isArray(content)) {
|
|
154
|
+
return content
|
|
155
|
+
.map((c) => {
|
|
156
|
+
if (typeof c === "string")
|
|
157
|
+
return c;
|
|
158
|
+
if (typeof c === "object" && c !== null && "type" in c) {
|
|
159
|
+
const obj = c;
|
|
160
|
+
if (obj.type === "text")
|
|
161
|
+
return obj.text;
|
|
162
|
+
}
|
|
163
|
+
return JSON.stringify(c);
|
|
164
|
+
})
|
|
165
|
+
.join("\n");
|
|
166
|
+
}
|
|
167
|
+
return JSON.stringify(content);
|
|
168
|
+
}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export { ClaudeEngine } from "./engine.js";
|
|
2
|
+
export { Session } from "./session.js";
|
|
3
|
+
export { AUTOPILOT, READONLY } from "./permissions.js";
|
|
4
|
+
export { AgentError, classifyError } from "./errors.js";
|
|
5
|
+
export type { ErrorCode } from "./errors.js";
|
|
6
|
+
export { describeToolUse } from "./events.js";
|
|
7
|
+
export type { EngineOptions, PermissionMode, ToolRequest, PermissionDecision, PermissionHandler, TextEvent, TextDeltaEvent, ToolUseEvent, ToolResultEvent, SystemEvent, ErrorEvent, ResultEvent, AgentEvent, ToolExecution, AgentResult, } from "./types.js";
|
|
8
|
+
export { startApiServer } from "./server/index.js";
|
|
9
|
+
export type { ApiServerOptions, ApiServerHandle } from "./server/index.js";
|
|
10
|
+
import { ClaudeEngine } from "./engine.js";
|
|
11
|
+
/** Pre-instantiated default engine. Import and use directly. */
|
|
12
|
+
export declare const claude: ClaudeEngine;
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export { ClaudeEngine } from "./engine.js";
|
|
2
|
+
export { Session } from "./session.js";
|
|
3
|
+
export { AUTOPILOT, READONLY } from "./permissions.js";
|
|
4
|
+
export { AgentError, classifyError } from "./errors.js";
|
|
5
|
+
export { describeToolUse } from "./events.js";
|
|
6
|
+
// Server exports
|
|
7
|
+
export { startApiServer } from "./server/index.js";
|
|
8
|
+
import { ClaudeEngine } from "./engine.js";
|
|
9
|
+
/** Pre-instantiated default engine. Import and use directly. */
|
|
10
|
+
export const claude = new ClaudeEngine();
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { PermissionHandler } from "./types.js";
|
|
2
|
+
/** Always allow everything. Used internally when permissionMode is "bypassPermissions". */
|
|
3
|
+
export declare const AUTOPILOT: PermissionHandler;
|
|
4
|
+
/** Allow read-only tools, deny anything that modifies files or runs commands. */
|
|
5
|
+
export declare const READONLY: PermissionHandler;
|
|
6
|
+
/**
|
|
7
|
+
* Wrap our simple PermissionHandler into the SDK's canUseTool callback shape.
|
|
8
|
+
*
|
|
9
|
+
* SDK expects: (toolName, input, options) => { behavior, updatedInput?, message? }
|
|
10
|
+
* We accept: ({ tool, input, reason }) => { allow, updatedInput?, message? }
|
|
11
|
+
*/
|
|
12
|
+
export declare function wrapPermissionHandler(handler: PermissionHandler): (toolName: string, input: Record<string, unknown>, options: Record<string, unknown>) => Promise<{
|
|
13
|
+
behavior: string;
|
|
14
|
+
updatedInput?: Record<string, unknown>;
|
|
15
|
+
message?: string;
|
|
16
|
+
}>;
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/** Always allow everything. Used internally when permissionMode is "bypassPermissions". */
|
|
2
|
+
export const AUTOPILOT = () => ({ allow: true });
|
|
3
|
+
const READ_ONLY_TOOLS = new Set([
|
|
4
|
+
"Read",
|
|
5
|
+
"Glob",
|
|
6
|
+
"Grep",
|
|
7
|
+
"WebFetch",
|
|
8
|
+
"WebSearch",
|
|
9
|
+
"Task",
|
|
10
|
+
"AskUserQuestion",
|
|
11
|
+
]);
|
|
12
|
+
/** Allow read-only tools, deny anything that modifies files or runs commands. */
|
|
13
|
+
export const READONLY = ({ tool }) => {
|
|
14
|
+
if (READ_ONLY_TOOLS.has(tool)) {
|
|
15
|
+
return { allow: true };
|
|
16
|
+
}
|
|
17
|
+
return { allow: false, message: `Read-only mode: ${tool} is not allowed` };
|
|
18
|
+
};
|
|
19
|
+
/**
|
|
20
|
+
* Wrap our simple PermissionHandler into the SDK's canUseTool callback shape.
|
|
21
|
+
*
|
|
22
|
+
* SDK expects: (toolName, input, options) => { behavior, updatedInput?, message? }
|
|
23
|
+
* We accept: ({ tool, input, reason }) => { allow, updatedInput?, message? }
|
|
24
|
+
*/
|
|
25
|
+
export function wrapPermissionHandler(handler) {
|
|
26
|
+
return async (toolName, input, options) => {
|
|
27
|
+
const decision = await handler({
|
|
28
|
+
tool: toolName,
|
|
29
|
+
input,
|
|
30
|
+
reason: options.decisionReason || undefined,
|
|
31
|
+
});
|
|
32
|
+
if (decision.allow) {
|
|
33
|
+
return {
|
|
34
|
+
behavior: "allow",
|
|
35
|
+
updatedInput: decision.updatedInput || input,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
return {
|
|
39
|
+
behavior: "deny",
|
|
40
|
+
message: decision.message || "Denied by permission handler",
|
|
41
|
+
};
|
|
42
|
+
};
|
|
43
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export type CircuitState = "closed" | "open" | "half-open";
|
|
2
|
+
export interface CircuitBreakerOptions {
|
|
3
|
+
failureThreshold?: number;
|
|
4
|
+
cooldownMs?: number;
|
|
5
|
+
}
|
|
6
|
+
export declare class CircuitBreaker {
|
|
7
|
+
private state;
|
|
8
|
+
private consecutiveFailures;
|
|
9
|
+
private failureThreshold;
|
|
10
|
+
private cooldownMs;
|
|
11
|
+
private lastFailureTime;
|
|
12
|
+
constructor(opts?: CircuitBreakerOptions);
|
|
13
|
+
/** Check if request should proceed. Returns true if allowed. */
|
|
14
|
+
canProceed(): boolean;
|
|
15
|
+
/** Record a successful execution. */
|
|
16
|
+
onSuccess(): void;
|
|
17
|
+
/** Record a failure. Pass the error code from AgentError if available. */
|
|
18
|
+
onFailure(errorCode?: string): void;
|
|
19
|
+
getState(): CircuitState;
|
|
20
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
// Circuit breaker: protects against cascading failures when the upstream API is down.
|
|
2
|
+
// States: closed (normal) → open (failing, reject fast) → half-open (probe) → closed
|
|
3
|
+
const TRIPPABLE_CODES = new Set(["NETWORK", "RATE_LIMITED"]);
|
|
4
|
+
export class CircuitBreaker {
|
|
5
|
+
state = "closed";
|
|
6
|
+
consecutiveFailures = 0;
|
|
7
|
+
failureThreshold;
|
|
8
|
+
cooldownMs;
|
|
9
|
+
lastFailureTime = 0;
|
|
10
|
+
constructor(opts = {}) {
|
|
11
|
+
this.failureThreshold = opts.failureThreshold ?? 5;
|
|
12
|
+
this.cooldownMs = opts.cooldownMs ?? 30_000;
|
|
13
|
+
}
|
|
14
|
+
/** Check if request should proceed. Returns true if allowed. */
|
|
15
|
+
canProceed() {
|
|
16
|
+
if (this.state === "closed")
|
|
17
|
+
return true;
|
|
18
|
+
if (this.state === "open") {
|
|
19
|
+
// Check if cooldown has elapsed
|
|
20
|
+
if (Date.now() - this.lastFailureTime >= this.cooldownMs) {
|
|
21
|
+
this.state = "half-open";
|
|
22
|
+
return true; // allow one probe
|
|
23
|
+
}
|
|
24
|
+
return false;
|
|
25
|
+
}
|
|
26
|
+
// half-open: already allowing the probe, block others
|
|
27
|
+
return false;
|
|
28
|
+
}
|
|
29
|
+
/** Record a successful execution. */
|
|
30
|
+
onSuccess() {
|
|
31
|
+
this.consecutiveFailures = 0;
|
|
32
|
+
if (this.state === "half-open") {
|
|
33
|
+
this.state = "closed";
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
/** Record a failure. Pass the error code from AgentError if available. */
|
|
37
|
+
onFailure(errorCode) {
|
|
38
|
+
// Only trip on network/rate-limit errors, not auth or user errors
|
|
39
|
+
if (errorCode && !TRIPPABLE_CODES.has(errorCode))
|
|
40
|
+
return;
|
|
41
|
+
this.consecutiveFailures++;
|
|
42
|
+
this.lastFailureTime = Date.now();
|
|
43
|
+
if (this.state === "half-open") {
|
|
44
|
+
this.state = "open";
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
if (this.consecutiveFailures >= this.failureThreshold) {
|
|
48
|
+
this.state = "open";
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
getState() {
|
|
52
|
+
return this.state;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { type Server } from "http";
|
|
2
|
+
import { WebSocketServer } from "ws";
|
|
3
|
+
export interface ApiServerOptions {
|
|
4
|
+
port?: number;
|
|
5
|
+
workingDir?: string;
|
|
6
|
+
maxConcurrent?: number;
|
|
7
|
+
maxQueueSize?: number;
|
|
8
|
+
requestsPerMinute?: number;
|
|
9
|
+
requestTimeoutMs?: number;
|
|
10
|
+
streamTimeoutMs?: number;
|
|
11
|
+
}
|
|
12
|
+
export interface ApiServerHandle {
|
|
13
|
+
server: Server;
|
|
14
|
+
wss: WebSocketServer;
|
|
15
|
+
port: number;
|
|
16
|
+
close(): void;
|
|
17
|
+
shutdown(timeoutMs?: number): Promise<void>;
|
|
18
|
+
}
|
|
19
|
+
export declare function startApiServer(opts?: ApiServerOptions): Promise<ApiServerHandle>;
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
// API server: HTTP + WebSocket, no Express.
|
|
2
|
+
// Mounts OpenAI-compatible and native routes on raw http.createServer.
|
|
3
|
+
// Includes: middleware chain, request queue, timeouts, logging, graceful shutdown.
|
|
4
|
+
import { createServer } from "http";
|
|
5
|
+
import { WebSocketServer } from "ws";
|
|
6
|
+
import { handleChatCompletions } from "./routes-openai.js";
|
|
7
|
+
import { handleStatus, handleRun, handleStream } from "./routes-native.js";
|
|
8
|
+
import { attachWsHandler } from "./ws-handler.js";
|
|
9
|
+
import { apiSessions } from "./session-store.js";
|
|
10
|
+
import { RequestQueue, QueueFullError, QueueTimeoutError } from "./request-queue.js";
|
|
11
|
+
import { checkAuth, RateLimiter, sendAuthError, sendRateLimitError } from "./middleware.js";
|
|
12
|
+
import { generateRequestId, logRequest, logResponse, logError } from "./logger.js";
|
|
13
|
+
import { CircuitBreaker } from "./circuit-breaker.js";
|
|
14
|
+
/**
|
|
15
|
+
* Parse JSON body from an incoming request. Returns parsed object or null on failure.
|
|
16
|
+
*/
|
|
17
|
+
function parseBody(req) {
|
|
18
|
+
return new Promise((resolve) => {
|
|
19
|
+
const chunks = [];
|
|
20
|
+
let size = 0;
|
|
21
|
+
const MAX = 20 * 1024 * 1024; // 20MB
|
|
22
|
+
req.on("data", (chunk) => {
|
|
23
|
+
size += chunk.length;
|
|
24
|
+
if (size > MAX) {
|
|
25
|
+
req.destroy();
|
|
26
|
+
resolve(null);
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
chunks.push(chunk);
|
|
30
|
+
});
|
|
31
|
+
req.on("end", () => {
|
|
32
|
+
if (chunks.length === 0) {
|
|
33
|
+
resolve({});
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
try {
|
|
37
|
+
resolve(JSON.parse(Buffer.concat(chunks).toString()));
|
|
38
|
+
}
|
|
39
|
+
catch {
|
|
40
|
+
resolve(null);
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
req.on("error", () => resolve(null));
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
function jsonResponse(res, status, body) {
|
|
47
|
+
if (!res.headersSent) {
|
|
48
|
+
res.writeHead(status, { "Content-Type": "application/json" });
|
|
49
|
+
}
|
|
50
|
+
res.end(JSON.stringify(body));
|
|
51
|
+
}
|
|
52
|
+
function setCors(res) {
|
|
53
|
+
res.setHeader("Access-Control-Allow-Origin", "*");
|
|
54
|
+
res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
|
|
55
|
+
res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization, X-Session-Id");
|
|
56
|
+
}
|
|
57
|
+
export async function startApiServer(opts = {}) {
|
|
58
|
+
const port = opts.port ?? 11434;
|
|
59
|
+
const workingDir = opts.workingDir ?? process.cwd();
|
|
60
|
+
const apiKey = process.env.OTTERLY_API_KEY || null;
|
|
61
|
+
const requestTimeoutMs = opts.requestTimeoutMs ?? 5 * 60 * 1000; // 5 min one-shot
|
|
62
|
+
const streamTimeoutMs = opts.streamTimeoutMs ?? 10 * 60 * 1000; // 10 min streaming
|
|
63
|
+
const ctx = { workingDir, apiKey };
|
|
64
|
+
const queue = new RequestQueue({
|
|
65
|
+
maxConcurrent: opts.maxConcurrent,
|
|
66
|
+
maxQueueSize: opts.maxQueueSize,
|
|
67
|
+
});
|
|
68
|
+
const rateLimiter = new RateLimiter({ requestsPerMinute: opts.requestsPerMinute });
|
|
69
|
+
const circuitBreaker = new CircuitBreaker();
|
|
70
|
+
// Track in-flight requests for graceful shutdown
|
|
71
|
+
let inFlight = 0;
|
|
72
|
+
let shuttingDown = false;
|
|
73
|
+
let drainResolve = null;
|
|
74
|
+
const server = createServer(async (req, res) => {
|
|
75
|
+
const requestId = generateRequestId();
|
|
76
|
+
const startTime = Date.now();
|
|
77
|
+
res.setHeader("X-Request-Id", requestId);
|
|
78
|
+
setCors(res);
|
|
79
|
+
const url = new URL(req.url || "/", `http://${req.headers.host || "localhost"}`);
|
|
80
|
+
const path = url.pathname;
|
|
81
|
+
// Log request start for POST requests
|
|
82
|
+
if (req.method === "POST") {
|
|
83
|
+
logRequest(requestId, req.method, path);
|
|
84
|
+
}
|
|
85
|
+
// Preflight
|
|
86
|
+
if (req.method === "OPTIONS") {
|
|
87
|
+
res.writeHead(204);
|
|
88
|
+
res.end();
|
|
89
|
+
return;
|
|
90
|
+
}
|
|
91
|
+
// Reject new requests during shutdown
|
|
92
|
+
if (shuttingDown && req.method === "POST") {
|
|
93
|
+
jsonResponse(res, 503, { error: "Server is shutting down" });
|
|
94
|
+
return;
|
|
95
|
+
}
|
|
96
|
+
// GET /api/status — no auth, no rate limit, no queue
|
|
97
|
+
if (req.method === "GET" && path === "/api/status") {
|
|
98
|
+
handleStatus(req, res, queue, circuitBreaker);
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
// ── POST routes: auth → rate limit → circuit breaker → queue ──
|
|
102
|
+
if (req.method !== "POST") {
|
|
103
|
+
jsonResponse(res, 404, { error: "Not found" });
|
|
104
|
+
return;
|
|
105
|
+
}
|
|
106
|
+
const isOpenai = path === "/v1/chat/completions";
|
|
107
|
+
const format = isOpenai ? "openai" : "native";
|
|
108
|
+
// Auth
|
|
109
|
+
if (!checkAuth(req, ctx)) {
|
|
110
|
+
sendAuthError(res, format);
|
|
111
|
+
logResponse(requestId, req.method, path, 401, Date.now() - startTime);
|
|
112
|
+
return;
|
|
113
|
+
}
|
|
114
|
+
// Rate limit
|
|
115
|
+
if (!rateLimiter.allow(rateLimiter.keyFor(req))) {
|
|
116
|
+
sendRateLimitError(res, format);
|
|
117
|
+
logResponse(requestId, req.method, path, 429, Date.now() - startTime);
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
// Circuit breaker
|
|
121
|
+
if (!circuitBreaker.canProceed()) {
|
|
122
|
+
const status = 503;
|
|
123
|
+
jsonResponse(res, status, isOpenai
|
|
124
|
+
? { error: { message: "Service temporarily unavailable", type: "server_error", code: status } }
|
|
125
|
+
: { error: "Service temporarily unavailable" });
|
|
126
|
+
logResponse(requestId, req.method, path, status, Date.now() - startTime);
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
// Parse body
|
|
130
|
+
const parsed = req;
|
|
131
|
+
parsed.body = await parseBody(req) ?? undefined;
|
|
132
|
+
if (parsed.body === undefined) {
|
|
133
|
+
const status = 400;
|
|
134
|
+
jsonResponse(res, status, isOpenai
|
|
135
|
+
? { error: { message: "Invalid JSON body", type: "invalid_request_error" } }
|
|
136
|
+
: { error: "Invalid JSON body" });
|
|
137
|
+
logResponse(requestId, req.method, path, status, Date.now() - startTime);
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
// Attach requestId and timing context for route handlers
|
|
141
|
+
parsed.requestId = requestId;
|
|
142
|
+
parsed.startTime = startTime;
|
|
143
|
+
// Determine if streaming (affects timeout)
|
|
144
|
+
const isStream = path === "/api/stream"
|
|
145
|
+
|| (isOpenai && parsed.body?.stream === true);
|
|
146
|
+
const timeoutMs = isStream ? streamTimeoutMs : requestTimeoutMs;
|
|
147
|
+
// Queue + execute with timeout
|
|
148
|
+
try {
|
|
149
|
+
await queue.run(async () => {
|
|
150
|
+
inFlight++;
|
|
151
|
+
try {
|
|
152
|
+
await withTimeout(timeoutMs, parsed, async () => {
|
|
153
|
+
if (isOpenai) {
|
|
154
|
+
await handleChatCompletions(parsed, res, ctx, circuitBreaker);
|
|
155
|
+
}
|
|
156
|
+
else if (path === "/api/run") {
|
|
157
|
+
await handleRun(parsed, res, ctx, circuitBreaker);
|
|
158
|
+
}
|
|
159
|
+
else if (path === "/api/stream") {
|
|
160
|
+
await handleStream(parsed, res, ctx, circuitBreaker);
|
|
161
|
+
}
|
|
162
|
+
else {
|
|
163
|
+
jsonResponse(res, 404, { error: "Not found" });
|
|
164
|
+
}
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
finally {
|
|
168
|
+
inFlight--;
|
|
169
|
+
const status = res.statusCode || 200;
|
|
170
|
+
logResponse(requestId, req.method, path, status, Date.now() - startTime);
|
|
171
|
+
if (inFlight === 0 && drainResolve)
|
|
172
|
+
drainResolve();
|
|
173
|
+
}
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
catch (err) {
|
|
177
|
+
if (err instanceof QueueFullError) {
|
|
178
|
+
jsonResponse(res, 429, isOpenai
|
|
179
|
+
? { error: { message: err.message, type: "rate_limit_error", code: 429 } }
|
|
180
|
+
: { error: err.message });
|
|
181
|
+
logResponse(requestId, req.method, path, 429, Date.now() - startTime);
|
|
182
|
+
}
|
|
183
|
+
else if (err instanceof QueueTimeoutError) {
|
|
184
|
+
jsonResponse(res, 408, isOpenai
|
|
185
|
+
? { error: { message: err.message, type: "timeout_error", code: 408 } }
|
|
186
|
+
: { error: err.message });
|
|
187
|
+
logResponse(requestId, req.method, path, 408, Date.now() - startTime);
|
|
188
|
+
}
|
|
189
|
+
else if (!res.headersSent) {
|
|
190
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
191
|
+
logError(requestId, msg);
|
|
192
|
+
jsonResponse(res, 500, isOpenai
|
|
193
|
+
? { error: { message: "Internal server error", type: "server_error", code: 500 } }
|
|
194
|
+
: { error: "Internal server error" });
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
});
|
|
198
|
+
// Node built-in timeouts (safety net)
|
|
199
|
+
server.requestTimeout = 15 * 60 * 1000; // 15 min absolute max
|
|
200
|
+
server.headersTimeout = 30 * 1000; // 30s to receive headers
|
|
201
|
+
// WebSocket on /ws path
|
|
202
|
+
const wss = new WebSocketServer({ server, path: "/ws" });
|
|
203
|
+
attachWsHandler(wss, { workingDir });
|
|
204
|
+
async function shutdown(timeoutMs = 10_000) {
|
|
205
|
+
if (shuttingDown)
|
|
206
|
+
return;
|
|
207
|
+
shuttingDown = true;
|
|
208
|
+
console.log("\nShutting down gracefully...");
|
|
209
|
+
// Stop accepting new connections
|
|
210
|
+
server.close();
|
|
211
|
+
wss.close();
|
|
212
|
+
// Wait for in-flight to drain or timeout
|
|
213
|
+
if (inFlight > 0) {
|
|
214
|
+
console.log(`Waiting for ${inFlight} in-flight request(s)...`);
|
|
215
|
+
await Promise.race([
|
|
216
|
+
new Promise((resolve) => { drainResolve = resolve; }),
|
|
217
|
+
new Promise((resolve) => setTimeout(resolve, timeoutMs)),
|
|
218
|
+
]);
|
|
219
|
+
}
|
|
220
|
+
// Clean up
|
|
221
|
+
rateLimiter.destroy();
|
|
222
|
+
apiSessions.destroy();
|
|
223
|
+
console.log("Shutdown complete.");
|
|
224
|
+
}
|
|
225
|
+
return new Promise((resolve, reject) => {
|
|
226
|
+
server.on("error", (err) => {
|
|
227
|
+
if (err.code === "EADDRINUSE") {
|
|
228
|
+
console.error(`Port ${port} is in use. Try a different port with -p <port>.`);
|
|
229
|
+
}
|
|
230
|
+
reject(err);
|
|
231
|
+
});
|
|
232
|
+
server.listen(port, () => {
|
|
233
|
+
console.log(`\n otterly serve — local inference server`);
|
|
234
|
+
console.log(` ──────────────────────────────────────`);
|
|
235
|
+
console.log(` OpenAI compat : http://localhost:${port}/v1/chat/completions`);
|
|
236
|
+
console.log(` Native API : http://localhost:${port}/api/run`);
|
|
237
|
+
console.log(` Streaming : http://localhost:${port}/api/stream`);
|
|
238
|
+
console.log(` WebSocket : ws://localhost:${port}/ws`);
|
|
239
|
+
console.log(` Health : http://localhost:${port}/api/status`);
|
|
240
|
+
console.log(` Working dir : ${workingDir}`);
|
|
241
|
+
if (apiKey) {
|
|
242
|
+
console.log(` Auth : API key required (OTTERLY_API_KEY)`);
|
|
243
|
+
}
|
|
244
|
+
else {
|
|
245
|
+
console.log(` Auth : none (set OTTERLY_API_KEY to enable)`);
|
|
246
|
+
}
|
|
247
|
+
console.log();
|
|
248
|
+
resolve({
|
|
249
|
+
server,
|
|
250
|
+
wss,
|
|
251
|
+
port,
|
|
252
|
+
close() {
|
|
253
|
+
rateLimiter.destroy();
|
|
254
|
+
apiSessions.destroy();
|
|
255
|
+
wss.close();
|
|
256
|
+
server.close();
|
|
257
|
+
},
|
|
258
|
+
shutdown,
|
|
259
|
+
});
|
|
260
|
+
});
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
/** Run a handler with an execution timeout. Aborts via the request's AbortController pattern. */
|
|
264
|
+
async function withTimeout(timeoutMs, req, fn) {
|
|
265
|
+
// Attach a timeout abort controller that route handlers can pick up
|
|
266
|
+
const timeoutController = new AbortController();
|
|
267
|
+
const timer = setTimeout(() => timeoutController.abort(), timeoutMs);
|
|
268
|
+
req.timeoutSignal = timeoutController.signal;
|
|
269
|
+
try {
|
|
270
|
+
await fn();
|
|
271
|
+
}
|
|
272
|
+
finally {
|
|
273
|
+
clearTimeout(timer);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
export type LogLevel = "info" | "warn" | "error";
|
|
2
|
+
interface LogEntry {
|
|
3
|
+
timestamp: string;
|
|
4
|
+
level: LogLevel;
|
|
5
|
+
requestId?: string;
|
|
6
|
+
method?: string;
|
|
7
|
+
path?: string;
|
|
8
|
+
status?: number;
|
|
9
|
+
durationMs?: number;
|
|
10
|
+
message?: string;
|
|
11
|
+
[key: string]: unknown;
|
|
12
|
+
}
|
|
13
|
+
export declare function generateRequestId(): string;
|
|
14
|
+
export declare function log(level: LogLevel, fields: Omit<LogEntry, "timestamp" | "level">): void;
|
|
15
|
+
export declare function logRequest(requestId: string, method: string, path: string): void;
|
|
16
|
+
export declare function logResponse(requestId: string, method: string, path: string, status: number, durationMs: number): void;
|
|
17
|
+
export declare function logError(requestId: string, message: string, extra?: Record<string, unknown>): void;
|
|
18
|
+
export {};
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
// Structured JSON logger. Writes JSON lines to stdout.
|
|
2
|
+
import crypto from "crypto";
|
|
3
|
+
export function generateRequestId() {
|
|
4
|
+
return crypto.randomUUID().slice(0, 8);
|
|
5
|
+
}
|
|
6
|
+
export function log(level, fields) {
|
|
7
|
+
const entry = {
|
|
8
|
+
timestamp: new Date().toISOString(),
|
|
9
|
+
level,
|
|
10
|
+
...fields,
|
|
11
|
+
};
|
|
12
|
+
process.stdout.write(JSON.stringify(entry) + "\n");
|
|
13
|
+
}
|
|
14
|
+
export function logRequest(requestId, method, path) {
|
|
15
|
+
log("info", { requestId, method, path, message: "request_start" });
|
|
16
|
+
}
|
|
17
|
+
export function logResponse(requestId, method, path, status, durationMs) {
|
|
18
|
+
log("info", { requestId, method, path, status, durationMs, message: "request_end" });
|
|
19
|
+
}
|
|
20
|
+
export function logError(requestId, message, extra) {
|
|
21
|
+
log("error", { requestId, message, ...extra });
|
|
22
|
+
}
|