@wener/mcps 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/index.mjs +15 -0
- package/dist/mcps-cli.mjs +174727 -0
- package/lib/chat/agent.js +187 -0
- package/lib/chat/agent.js.map +1 -0
- package/lib/chat/audit.js +238 -0
- package/lib/chat/audit.js.map +1 -0
- package/lib/chat/converters.js +467 -0
- package/lib/chat/converters.js.map +1 -0
- package/lib/chat/handler.js +1068 -0
- package/lib/chat/handler.js.map +1 -0
- package/lib/chat/index.js +12 -0
- package/lib/chat/index.js.map +1 -0
- package/lib/chat/types.js +35 -0
- package/lib/chat/types.js.map +1 -0
- package/lib/contracts/AuditContract.js +85 -0
- package/lib/contracts/AuditContract.js.map +1 -0
- package/lib/contracts/McpsContract.js +113 -0
- package/lib/contracts/McpsContract.js.map +1 -0
- package/lib/contracts/index.js +3 -0
- package/lib/contracts/index.js.map +1 -0
- package/lib/dev.server.js +7 -0
- package/lib/dev.server.js.map +1 -0
- package/lib/entities/ChatRequestEntity.js +318 -0
- package/lib/entities/ChatRequestEntity.js.map +1 -0
- package/lib/entities/McpRequestEntity.js +271 -0
- package/lib/entities/McpRequestEntity.js.map +1 -0
- package/lib/entities/RequestLogEntity.js +177 -0
- package/lib/entities/RequestLogEntity.js.map +1 -0
- package/lib/entities/ResponseEntity.js +150 -0
- package/lib/entities/ResponseEntity.js.map +1 -0
- package/lib/entities/index.js +11 -0
- package/lib/entities/index.js.map +1 -0
- package/lib/entities/types.js +11 -0
- package/lib/entities/types.js.map +1 -0
- package/lib/index.js +3 -0
- package/lib/index.js.map +1 -0
- package/lib/mcps-cli.js +44 -0
- package/lib/mcps-cli.js.map +1 -0
- package/lib/providers/McpServerHandlerDef.js +40 -0
- package/lib/providers/McpServerHandlerDef.js.map +1 -0
- package/lib/providers/findMcpServerDef.js +26 -0
- package/lib/providers/findMcpServerDef.js.map +1 -0
- package/lib/providers/prometheus/def.js +24 -0
- package/lib/providers/prometheus/def.js.map +1 -0
- package/lib/providers/prometheus/index.js +2 -0
- package/lib/providers/prometheus/index.js.map +1 -0
- package/lib/providers/relay/def.js +32 -0
- package/lib/providers/relay/def.js.map +1 -0
- package/lib/providers/relay/index.js +2 -0
- package/lib/providers/relay/index.js.map +1 -0
- package/lib/providers/sql/def.js +31 -0
- package/lib/providers/sql/def.js.map +1 -0
- package/lib/providers/sql/index.js +2 -0
- package/lib/providers/sql/index.js.map +1 -0
- package/lib/providers/tencent-cls/def.js +44 -0
- package/lib/providers/tencent-cls/def.js.map +1 -0
- package/lib/providers/tencent-cls/index.js +2 -0
- package/lib/providers/tencent-cls/index.js.map +1 -0
- package/lib/scripts/bundle.js +90 -0
- package/lib/scripts/bundle.js.map +1 -0
- package/lib/server/api-routes.js +96 -0
- package/lib/server/api-routes.js.map +1 -0
- package/lib/server/audit.js +274 -0
- package/lib/server/audit.js.map +1 -0
- package/lib/server/chat-routes.js +82 -0
- package/lib/server/chat-routes.js.map +1 -0
- package/lib/server/config.js +223 -0
- package/lib/server/config.js.map +1 -0
- package/lib/server/db.js +97 -0
- package/lib/server/db.js.map +1 -0
- package/lib/server/index.js +2 -0
- package/lib/server/index.js.map +1 -0
- package/lib/server/mcp-handler.js +167 -0
- package/lib/server/mcp-handler.js.map +1 -0
- package/lib/server/mcp-routes.js +112 -0
- package/lib/server/mcp-routes.js.map +1 -0
- package/lib/server/mcps-router.js +119 -0
- package/lib/server/mcps-router.js.map +1 -0
- package/lib/server/schema.js +129 -0
- package/lib/server/schema.js.map +1 -0
- package/lib/server/server.js +166 -0
- package/lib/server/server.js.map +1 -0
- package/lib/web/ChatPage.js +827 -0
- package/lib/web/ChatPage.js.map +1 -0
- package/lib/web/McpInspectorPage.js +214 -0
- package/lib/web/McpInspectorPage.js.map +1 -0
- package/lib/web/ServersPage.js +93 -0
- package/lib/web/ServersPage.js.map +1 -0
- package/lib/web/main.js +541 -0
- package/lib/web/main.js.map +1 -0
- package/package.json +83 -0
- package/src/chat/agent.ts +240 -0
- package/src/chat/audit.ts +377 -0
- package/src/chat/converters.test.ts +325 -0
- package/src/chat/converters.ts +459 -0
- package/src/chat/handler.test.ts +137 -0
- package/src/chat/handler.ts +1233 -0
- package/src/chat/index.ts +16 -0
- package/src/chat/types.ts +72 -0
- package/src/contracts/AuditContract.ts +93 -0
- package/src/contracts/McpsContract.ts +141 -0
- package/src/contracts/index.ts +18 -0
- package/src/dev.server.ts +7 -0
- package/src/entities/ChatRequestEntity.ts +157 -0
- package/src/entities/McpRequestEntity.ts +149 -0
- package/src/entities/RequestLogEntity.ts +78 -0
- package/src/entities/ResponseEntity.ts +75 -0
- package/src/entities/index.ts +12 -0
- package/src/entities/types.ts +188 -0
- package/src/index.ts +1 -0
- package/src/mcps-cli.ts +59 -0
- package/src/providers/McpServerHandlerDef.ts +105 -0
- package/src/providers/findMcpServerDef.ts +31 -0
- package/src/providers/prometheus/def.ts +21 -0
- package/src/providers/prometheus/index.ts +1 -0
- package/src/providers/relay/def.ts +31 -0
- package/src/providers/relay/index.ts +1 -0
- package/src/providers/relay/relay.test.ts +47 -0
- package/src/providers/sql/def.ts +33 -0
- package/src/providers/sql/index.ts +1 -0
- package/src/providers/tencent-cls/def.ts +38 -0
- package/src/providers/tencent-cls/index.ts +1 -0
- package/src/scripts/bundle.ts +82 -0
- package/src/server/api-routes.ts +98 -0
- package/src/server/audit.ts +310 -0
- package/src/server/chat-routes.ts +95 -0
- package/src/server/config.test.ts +162 -0
- package/src/server/config.ts +198 -0
- package/src/server/db.ts +115 -0
- package/src/server/index.ts +1 -0
- package/src/server/mcp-handler.ts +209 -0
- package/src/server/mcp-routes.ts +133 -0
- package/src/server/mcps-router.ts +133 -0
- package/src/server/schema.ts +175 -0
- package/src/server/server.ts +163 -0
- package/src/web/ChatPage.tsx +1005 -0
- package/src/web/McpInspectorPage.tsx +254 -0
- package/src/web/ServersPage.tsx +139 -0
- package/src/web/main.tsx +600 -0
- package/src/web/styles.css +15 -0
|
@@ -0,0 +1,1068 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chat API Handler
|
|
3
|
+
* Provides unified AI model gateway with protocol conversion
|
|
4
|
+
*/ import consola from "consola";
|
|
5
|
+
import { Hono } from "hono";
|
|
6
|
+
import { streamSSE } from "hono/streaming";
|
|
7
|
+
import { ChatProtocol, createAuditContext, extractClientIp } from "./audit.js";
|
|
8
|
+
import { openaiToAnthropicRequest, anthropicToOpenaiResponse, openaiToGeminiRequest, geminiToOpenaiResponse } from "./converters.js";
|
|
9
|
+
import { CreateChatCompletionRequestSchema, CreateResponseRequestSchema } from "@wener/ai/openai";
|
|
10
|
+
import { CreateMessageRequestSchema } from "@wener/ai/anthropic";
|
|
11
|
+
import { CreateGenerateContentRequestSchema } from "@wener/ai/google";
|
|
12
|
+
const log = consola.withTag("chat");
|
|
13
|
+
/**
|
|
14
|
+
* Create chat handler Hono app
|
|
15
|
+
*/ export function createChatHandler(options = {}) {
|
|
16
|
+
const app = new Hono();
|
|
17
|
+
const { config = {} } = options;
|
|
18
|
+
/**
|
|
19
|
+
* Resolve model configuration
|
|
20
|
+
*/ function resolveModelConfig(modelName) {
|
|
21
|
+
const models = config.models;
|
|
22
|
+
if (!models || models.length === 0)
|
|
23
|
+
return null;
|
|
24
|
+
// First pass: exact match
|
|
25
|
+
for (const modelConfig of models) {
|
|
26
|
+
if (modelConfig.name === modelName) {
|
|
27
|
+
return modelConfig;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
// Second pass: wildcard matches (e.g., "gpt-*" or "claude-*")
|
|
31
|
+
for (const modelConfig of models) {
|
|
32
|
+
const pattern = modelConfig.name;
|
|
33
|
+
if (pattern.includes("*")) {
|
|
34
|
+
const regex = new RegExp(`^${pattern.replace(/\*/g, ".*")}$`);
|
|
35
|
+
if (regex.test(modelName)) {
|
|
36
|
+
return modelConfig;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Make request to upstream provider
|
|
44
|
+
*/ async function makeUpstreamRequest(url, body, headers, _stream = false) {
|
|
45
|
+
const response = await fetch(url, {
|
|
46
|
+
method: "POST",
|
|
47
|
+
headers: {
|
|
48
|
+
"Content-Type": "application/json",
|
|
49
|
+
...headers
|
|
50
|
+
},
|
|
51
|
+
body: JSON.stringify(body)
|
|
52
|
+
});
|
|
53
|
+
if (!response.ok) {
|
|
54
|
+
const errorText = await response.text();
|
|
55
|
+
log.error("Upstream error:", response.status, errorText);
|
|
56
|
+
throw new Error(`Upstream error: ${response.status} ${errorText}`);
|
|
57
|
+
}
|
|
58
|
+
return response;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Normalize base URL - strip trailing /v1 if present
|
|
62
|
+
* This allows baseUrl to be specified as either "http://example.com" or "http://example.com/v1"
|
|
63
|
+
*/ function normalizeBaseUrl(url) {
|
|
64
|
+
return url.replace(/\/v1\/?$/, "");
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Build headers for upstream request
|
|
68
|
+
*/ function buildUpstreamHeaders(modelConfig, adapter) {
|
|
69
|
+
const headers = {};
|
|
70
|
+
// Add API key if configured
|
|
71
|
+
if (modelConfig.apiKey) {
|
|
72
|
+
if (adapter === "anthropic") {
|
|
73
|
+
headers["x-api-key"] = modelConfig.apiKey;
|
|
74
|
+
headers["anthropic-version"] = "2023-06-01";
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
headers.Authorization = `Bearer ${modelConfig.apiKey}`;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
// Add custom headers
|
|
81
|
+
if (modelConfig.headers) {
|
|
82
|
+
Object.assign(headers, modelConfig.headers);
|
|
83
|
+
}
|
|
84
|
+
// Add adapter-specific headers
|
|
85
|
+
if (modelConfig.adapters?.[adapter]?.headers) {
|
|
86
|
+
Object.assign(headers, modelConfig.adapters[adapter]?.headers);
|
|
87
|
+
}
|
|
88
|
+
return headers;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Normalize OpenAI request for different providers
|
|
92
|
+
* Handles max_tokens/max_completion_tokens compatibility and thinking parameters
|
|
93
|
+
*/ function normalizeOpenAIRequest(request) {
|
|
94
|
+
const normalized = {
|
|
95
|
+
...request
|
|
96
|
+
};
|
|
97
|
+
// Handle max_tokens vs max_completion_tokens compatibility
|
|
98
|
+
// Some providers only support max_tokens, others prefer max_completion_tokens
|
|
99
|
+
if (request.max_completion_tokens && !request.max_tokens) {
|
|
100
|
+
normalized.max_tokens = request.max_completion_tokens;
|
|
101
|
+
}
|
|
102
|
+
// Handle enable_thinking parameter (used by Qwen, DeepSeek thinking models)
|
|
103
|
+
// Convert to thinking object format if needed
|
|
104
|
+
if (request.enable_thinking !== undefined && !request.thinking) {
|
|
105
|
+
normalized.thinking = {
|
|
106
|
+
type: request.enable_thinking ? "enabled" : "disabled"
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
return normalized;
|
|
110
|
+
}
|
|
111
|
+
// =========================================================================
|
|
112
|
+
// OpenAI Chat Completions API
|
|
113
|
+
// =========================================================================
|
|
114
|
+
app.post("/v1/chat/completions", async (c) => {
|
|
115
|
+
// Create audit context early
|
|
116
|
+
let auditCtx = null;
|
|
117
|
+
try {
|
|
118
|
+
const body = await c.req.json();
|
|
119
|
+
const parseResult = CreateChatCompletionRequestSchema.safeParse(body);
|
|
120
|
+
if (!parseResult.success) {
|
|
121
|
+
return c.json({
|
|
122
|
+
error: {
|
|
123
|
+
message: "Invalid request",
|
|
124
|
+
type: "invalid_request_error",
|
|
125
|
+
param: null,
|
|
126
|
+
code: "invalid_request",
|
|
127
|
+
details: parseResult.error.issues
|
|
128
|
+
}
|
|
129
|
+
}, 400);
|
|
130
|
+
}
|
|
131
|
+
const request = parseResult.data;
|
|
132
|
+
const modelConfig = resolveModelConfig(request.model);
|
|
133
|
+
if (!modelConfig) {
|
|
134
|
+
return c.json({
|
|
135
|
+
error: {
|
|
136
|
+
message: `Model ${request.model} not configured`,
|
|
137
|
+
type: "invalid_request_error",
|
|
138
|
+
param: "model",
|
|
139
|
+
code: "model_not_found"
|
|
140
|
+
}
|
|
141
|
+
}, 404);
|
|
142
|
+
}
|
|
143
|
+
// Determine upstream adapter
|
|
144
|
+
const adapter = modelConfig.adapter || "openai";
|
|
145
|
+
const baseUrl = normalizeBaseUrl(modelConfig.adapters?.[adapter]?.baseUrl || modelConfig.baseUrl || "https://api.openai.com");
|
|
146
|
+
// Create audit context
|
|
147
|
+
const outputProtocol = adapter === "anthropic" ? ChatProtocol.ANTHROPIC : adapter === "gemini" ? ChatProtocol.GEMINI : ChatProtocol.OPENAI;
|
|
148
|
+
auditCtx = createAuditContext({
|
|
149
|
+
method: "POST",
|
|
150
|
+
endpoint: "/v1/chat/completions",
|
|
151
|
+
model: request.model,
|
|
152
|
+
inputProtocol: ChatProtocol.OPENAI,
|
|
153
|
+
outputProtocol,
|
|
154
|
+
streaming: request.stream || false,
|
|
155
|
+
clientIp: extractClientIp(c),
|
|
156
|
+
userAgent: c.req.header("user-agent"),
|
|
157
|
+
requestMeta: {
|
|
158
|
+
temperature: request.temperature,
|
|
159
|
+
max_tokens: request.max_tokens || request.max_completion_tokens,
|
|
160
|
+
top_p: request.top_p
|
|
161
|
+
}
|
|
162
|
+
});
|
|
163
|
+
// Log incoming request
|
|
164
|
+
log.info(`→ POST /v1/chat/completions model=${request.model} stream=${request.stream || false} messages=${request.messages.length}`);
|
|
165
|
+
// Build upstream request based on protocol
|
|
166
|
+
let upstreamUrl;
|
|
167
|
+
let upstreamBody;
|
|
168
|
+
let upstreamHeaders;
|
|
169
|
+
switch (adapter) {
|
|
170
|
+
case "anthropic":
|
|
171
|
+
{
|
|
172
|
+
upstreamUrl = `${baseUrl}/v1/messages`;
|
|
173
|
+
upstreamBody = openaiToAnthropicRequest(request);
|
|
174
|
+
upstreamHeaders = buildUpstreamHeaders(modelConfig, "anthropic");
|
|
175
|
+
break;
|
|
176
|
+
}
|
|
177
|
+
case "gemini":
|
|
178
|
+
{
|
|
179
|
+
const method = request.stream ? "streamGenerateContent" : "generateContent";
|
|
180
|
+
upstreamUrl = `${baseUrl}/v1/models/${request.model}:${method}`;
|
|
181
|
+
upstreamBody = openaiToGeminiRequest(request);
|
|
182
|
+
upstreamHeaders = buildUpstreamHeaders(modelConfig, "gemini");
|
|
183
|
+
break;
|
|
184
|
+
}
|
|
185
|
+
default:
|
|
186
|
+
{
|
|
187
|
+
// OpenAI adapter - passthrough with parameter normalization
|
|
188
|
+
upstreamUrl = `${baseUrl}/v1/chat/completions`;
|
|
189
|
+
upstreamBody = normalizeOpenAIRequest(request);
|
|
190
|
+
upstreamHeaders = buildUpstreamHeaders(modelConfig, "openai");
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
// Set provider info in audit context
|
|
194
|
+
auditCtx.setProvider({
|
|
195
|
+
provider: adapter,
|
|
196
|
+
upstreamUrl
|
|
197
|
+
});
|
|
198
|
+
// Handle streaming
|
|
199
|
+
if (request.stream) {
|
|
200
|
+
return handleStreamingRequest(c, upstreamUrl, upstreamBody, upstreamHeaders, adapter, request.model, auditCtx);
|
|
201
|
+
}
|
|
202
|
+
// Non-streaming request
|
|
203
|
+
const response = await makeUpstreamRequest(upstreamUrl, upstreamBody, upstreamHeaders);
|
|
204
|
+
const responseData = await response.json();
|
|
205
|
+
// Convert response if needed
|
|
206
|
+
let result;
|
|
207
|
+
switch (adapter) {
|
|
208
|
+
case "anthropic":
|
|
209
|
+
result = anthropicToOpenaiResponse(responseData, request.model);
|
|
210
|
+
break;
|
|
211
|
+
case "gemini":
|
|
212
|
+
result = geminiToOpenaiResponse(responseData, request.model);
|
|
213
|
+
break;
|
|
214
|
+
default:
|
|
215
|
+
result = responseData;
|
|
216
|
+
}
|
|
217
|
+
// Record token usage and complete audit
|
|
218
|
+
if (result.usage) {
|
|
219
|
+
auditCtx.setTokenUsage(result.usage.prompt_tokens || 0, result.usage.completion_tokens || 0);
|
|
220
|
+
}
|
|
221
|
+
auditCtx.setResponseMeta({
|
|
222
|
+
finish_reason: result.choices?.[0]?.finish_reason,
|
|
223
|
+
model: result.model
|
|
224
|
+
});
|
|
225
|
+
await auditCtx.complete(200);
|
|
226
|
+
// Log response
|
|
227
|
+
const usage = result.usage;
|
|
228
|
+
log.info(`← 200 /v1/chat/completions model=${result.model || request.model} tokens=${usage?.total_tokens || 0} (in=${usage?.prompt_tokens || 0} out=${usage?.completion_tokens || 0})`);
|
|
229
|
+
return c.json(result);
|
|
230
|
+
}
|
|
231
|
+
catch (error) {
|
|
232
|
+
// Record error in audit
|
|
233
|
+
if (auditCtx) {
|
|
234
|
+
await auditCtx.error(error instanceof Error ? error.message : "Unknown error", "internal_error", 500);
|
|
235
|
+
}
|
|
236
|
+
log.error("Chat completion error:", error);
|
|
237
|
+
return c.json({
|
|
238
|
+
error: {
|
|
239
|
+
message: error instanceof Error ? error.message : "Internal server error",
|
|
240
|
+
type: "api_error",
|
|
241
|
+
code: "internal_error"
|
|
242
|
+
}
|
|
243
|
+
}, 500);
|
|
244
|
+
}
|
|
245
|
+
});
|
|
246
|
+
/**
|
|
247
|
+
* Handle streaming request
|
|
248
|
+
*/ async function handleStreamingRequest(c, upstreamUrl, upstreamBody, upstreamHeaders, adapter, model, auditCtx) {
|
|
249
|
+
const response = await makeUpstreamRequest(upstreamUrl, upstreamBody, upstreamHeaders, true);
|
|
250
|
+
let firstTokenRecorded = false;
|
|
251
|
+
let totalOutputTokens = 0;
|
|
252
|
+
// For streaming, we need to convert events on the fly
|
|
253
|
+
return streamSSE(c, async (stream) => {
|
|
254
|
+
const reader = response.body?.getReader();
|
|
255
|
+
if (!reader) {
|
|
256
|
+
await stream.writeSSE({
|
|
257
|
+
data: "[DONE]"
|
|
258
|
+
});
|
|
259
|
+
if (auditCtx)
|
|
260
|
+
await auditCtx.complete(200);
|
|
261
|
+
return;
|
|
262
|
+
}
|
|
263
|
+
const decoder = new TextDecoder();
|
|
264
|
+
let buffer = "";
|
|
265
|
+
try {
|
|
266
|
+
while (true) {
|
|
267
|
+
const { done, value } = await reader.read();
|
|
268
|
+
if (done)
|
|
269
|
+
break;
|
|
270
|
+
buffer += decoder.decode(value, {
|
|
271
|
+
stream: true
|
|
272
|
+
});
|
|
273
|
+
const lines = buffer.split("\n");
|
|
274
|
+
buffer = lines.pop() || "";
|
|
275
|
+
for (const line of lines) {
|
|
276
|
+
if (!line.trim())
|
|
277
|
+
continue;
|
|
278
|
+
if (line.startsWith("data: ")) {
|
|
279
|
+
const data = line.slice(6);
|
|
280
|
+
if (data === "[DONE]") {
|
|
281
|
+
await stream.writeSSE({
|
|
282
|
+
data: "[DONE]"
|
|
283
|
+
});
|
|
284
|
+
continue;
|
|
285
|
+
}
|
|
286
|
+
try {
|
|
287
|
+
const parsed = JSON.parse(data);
|
|
288
|
+
const converted = convertStreamEvent(parsed, adapter, model);
|
|
289
|
+
if (converted) {
|
|
290
|
+
// Record first token for TTFT
|
|
291
|
+
if (!firstTokenRecorded && auditCtx) {
|
|
292
|
+
auditCtx.recordFirstToken();
|
|
293
|
+
firstTokenRecorded = true;
|
|
294
|
+
}
|
|
295
|
+
// Track usage from stream events
|
|
296
|
+
if (converted.usage) {
|
|
297
|
+
totalOutputTokens = converted.usage.completion_tokens || totalOutputTokens;
|
|
298
|
+
}
|
|
299
|
+
await stream.writeSSE({
|
|
300
|
+
data: JSON.stringify(converted)
|
|
301
|
+
});
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
catch {
|
|
305
|
+
// Skip invalid JSON
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
else if (line.startsWith("event: ")) { }
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
// Send done signal
|
|
312
|
+
await stream.writeSSE({
|
|
313
|
+
data: "[DONE]"
|
|
314
|
+
});
|
|
315
|
+
// Complete audit
|
|
316
|
+
if (auditCtx) {
|
|
317
|
+
auditCtx.setTokenUsage(0, totalOutputTokens); // Input tokens not available in streaming
|
|
318
|
+
await auditCtx.complete(200);
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
catch (err) {
|
|
322
|
+
// Record streaming error
|
|
323
|
+
if (auditCtx) {
|
|
324
|
+
await auditCtx.error(err instanceof Error ? err.message : "Streaming error", "streaming_error", 500);
|
|
325
|
+
}
|
|
326
|
+
throw err;
|
|
327
|
+
}
|
|
328
|
+
finally {
|
|
329
|
+
reader.releaseLock();
|
|
330
|
+
}
|
|
331
|
+
});
|
|
332
|
+
}
|
|
333
|
+
/**
|
|
334
|
+
* Convert stream event to OpenAI format
|
|
335
|
+
*/ function convertStreamEvent(event, adapter, model) {
|
|
336
|
+
switch (adapter) {
|
|
337
|
+
case "anthropic":
|
|
338
|
+
return convertAnthropicStreamEvent(event, model);
|
|
339
|
+
case "gemini":
|
|
340
|
+
return convertGeminiStreamEvent(event, model);
|
|
341
|
+
default:
|
|
342
|
+
return event;
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
/**
|
|
346
|
+
* Convert Anthropic stream event to OpenAI format
|
|
347
|
+
*/ function convertAnthropicStreamEvent(event, model) {
|
|
348
|
+
if (event.type === "content_block_delta") {
|
|
349
|
+
if (event.delta?.type === "text_delta") {
|
|
350
|
+
return {
|
|
351
|
+
id: `chatcmpl-${Date.now()}`,
|
|
352
|
+
object: "chat.completion.chunk",
|
|
353
|
+
created: Math.floor(Date.now() / 1000),
|
|
354
|
+
model,
|
|
355
|
+
choices: [
|
|
356
|
+
{
|
|
357
|
+
index: 0,
|
|
358
|
+
delta: {
|
|
359
|
+
content: event.delta.text
|
|
360
|
+
},
|
|
361
|
+
finish_reason: null
|
|
362
|
+
}
|
|
363
|
+
]
|
|
364
|
+
};
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
else if (event.type === "message_delta") {
|
|
368
|
+
const finishReason = event.delta?.stop_reason === "end_turn" ? "stop" : event.delta?.stop_reason === "tool_use" ? "tool_calls" : null;
|
|
369
|
+
return {
|
|
370
|
+
id: `chatcmpl-${Date.now()}`,
|
|
371
|
+
object: "chat.completion.chunk",
|
|
372
|
+
created: Math.floor(Date.now() / 1000),
|
|
373
|
+
model,
|
|
374
|
+
choices: [
|
|
375
|
+
{
|
|
376
|
+
index: 0,
|
|
377
|
+
delta: {},
|
|
378
|
+
finish_reason: finishReason
|
|
379
|
+
}
|
|
380
|
+
],
|
|
381
|
+
usage: event.usage ? {
|
|
382
|
+
prompt_tokens: 0,
|
|
383
|
+
completion_tokens: event.usage.output_tokens,
|
|
384
|
+
total_tokens: event.usage.output_tokens
|
|
385
|
+
} : undefined
|
|
386
|
+
};
|
|
387
|
+
}
|
|
388
|
+
return null;
|
|
389
|
+
}
|
|
390
|
+
/**
|
|
391
|
+
* Convert Gemini stream event to OpenAI format
|
|
392
|
+
*/ function convertGeminiStreamEvent(event, model) {
|
|
393
|
+
if (event.candidates?.[0]?.content?.parts) {
|
|
394
|
+
const parts = event.candidates[0].content.parts;
|
|
395
|
+
const textParts = parts.filter((p) => p.text);
|
|
396
|
+
if (textParts.length > 0) {
|
|
397
|
+
return {
|
|
398
|
+
id: `chatcmpl-${Date.now()}`,
|
|
399
|
+
object: "chat.completion.chunk",
|
|
400
|
+
created: Math.floor(Date.now() / 1000),
|
|
401
|
+
model,
|
|
402
|
+
choices: [
|
|
403
|
+
{
|
|
404
|
+
index: 0,
|
|
405
|
+
delta: {
|
|
406
|
+
content: textParts.map((p) => p.text).join("")
|
|
407
|
+
},
|
|
408
|
+
finish_reason: event.candidates[0].finishReason === "STOP" ? "stop" : null
|
|
409
|
+
}
|
|
410
|
+
]
|
|
411
|
+
};
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
return null;
|
|
415
|
+
}
|
|
416
|
+
// =========================================================================
|
|
417
|
+
// Anthropic Messages API
|
|
418
|
+
// =========================================================================
|
|
419
|
+
app.post("/v1/messages", async (c) => {
|
|
420
|
+
try {
|
|
421
|
+
const body = await c.req.json();
|
|
422
|
+
const parseResult = CreateMessageRequestSchema.safeParse(body);
|
|
423
|
+
if (!parseResult.success) {
|
|
424
|
+
return c.json({
|
|
425
|
+
type: "error",
|
|
426
|
+
error: {
|
|
427
|
+
type: "invalid_request_error",
|
|
428
|
+
message: "Invalid request"
|
|
429
|
+
}
|
|
430
|
+
}, 400);
|
|
431
|
+
}
|
|
432
|
+
const request = parseResult.data;
|
|
433
|
+
const modelConfig = resolveModelConfig(request.model);
|
|
434
|
+
if (!modelConfig) {
|
|
435
|
+
return c.json({
|
|
436
|
+
type: "error",
|
|
437
|
+
error: {
|
|
438
|
+
type: "invalid_request_error",
|
|
439
|
+
message: `Model ${request.model} not configured`
|
|
440
|
+
}
|
|
441
|
+
}, 404);
|
|
442
|
+
}
|
|
443
|
+
// For Anthropic endpoint, we pass through to Anthropic or convert from OpenAI
|
|
444
|
+
const adapter = modelConfig.adapter || "anthropic";
|
|
445
|
+
const baseUrl = normalizeBaseUrl(modelConfig.adapters?.[adapter]?.baseUrl || modelConfig.baseUrl || "https://api.anthropic.com");
|
|
446
|
+
const upstreamUrl = `${baseUrl}/v1/messages`;
|
|
447
|
+
const upstreamHeaders = buildUpstreamHeaders(modelConfig, "anthropic");
|
|
448
|
+
// Log incoming request
|
|
449
|
+
log.info(`→ POST /v1/messages model=${request.model} messages=${request.messages.length}`);
|
|
450
|
+
const response = await makeUpstreamRequest(upstreamUrl, request, upstreamHeaders);
|
|
451
|
+
const responseData = await response.json();
|
|
452
|
+
// Log response
|
|
453
|
+
const usage = responseData.usage;
|
|
454
|
+
log.info(`← 200 /v1/messages model=${responseData.model || request.model} tokens=${(usage?.input_tokens || 0) + (usage?.output_tokens || 0)} (in=${usage?.input_tokens || 0} out=${usage?.output_tokens || 0})`);
|
|
455
|
+
return c.json(responseData);
|
|
456
|
+
}
|
|
457
|
+
catch (error) {
|
|
458
|
+
log.error("Messages error:", error);
|
|
459
|
+
return c.json({
|
|
460
|
+
type: "error",
|
|
461
|
+
error: {
|
|
462
|
+
type: "api_error",
|
|
463
|
+
message: error instanceof Error ? error.message : "Internal server error"
|
|
464
|
+
}
|
|
465
|
+
}, 500);
|
|
466
|
+
}
|
|
467
|
+
});
|
|
468
|
+
// =========================================================================
|
|
469
|
+
// Gemini Generate Content API
|
|
470
|
+
// =========================================================================
|
|
471
|
+
app.post("/v1/models/:model\\:generateContent", async (c) => {
|
|
472
|
+
try {
|
|
473
|
+
const model = c.req.param("model");
|
|
474
|
+
if (!model) {
|
|
475
|
+
return c.json({
|
|
476
|
+
error: {
|
|
477
|
+
message: "Model parameter is required"
|
|
478
|
+
}
|
|
479
|
+
}, 400);
|
|
480
|
+
}
|
|
481
|
+
const body = await c.req.json();
|
|
482
|
+
const parseResult = CreateGenerateContentRequestSchema.safeParse(body);
|
|
483
|
+
if (!parseResult.success) {
|
|
484
|
+
return c.json({
|
|
485
|
+
error: {
|
|
486
|
+
message: "Invalid request"
|
|
487
|
+
}
|
|
488
|
+
}, 400);
|
|
489
|
+
}
|
|
490
|
+
const request = parseResult.data;
|
|
491
|
+
const modelConfig = resolveModelConfig(model);
|
|
492
|
+
if (!modelConfig) {
|
|
493
|
+
return c.json({
|
|
494
|
+
error: {
|
|
495
|
+
message: `Model ${model} not configured`
|
|
496
|
+
}
|
|
497
|
+
}, 404);
|
|
498
|
+
}
|
|
499
|
+
const baseUrl = normalizeBaseUrl(modelConfig.adapters?.gemini?.baseUrl || modelConfig.baseUrl || "https://generativelanguage.googleapis.com");
|
|
500
|
+
const upstreamUrl = `${baseUrl}/v1/models/${model}:generateContent`;
|
|
501
|
+
const upstreamHeaders = buildUpstreamHeaders(modelConfig, "gemini");
|
|
502
|
+
// Log incoming request
|
|
503
|
+
log.info(`→ POST /v1/models/${model}:generateContent contents=${request.contents?.length || 0}`);
|
|
504
|
+
const response = await makeUpstreamRequest(upstreamUrl, request, upstreamHeaders);
|
|
505
|
+
const responseData = await response.json();
|
|
506
|
+
// Log response
|
|
507
|
+
const usage = responseData.usageMetadata;
|
|
508
|
+
log.info(`← 200 /v1/models/${model}:generateContent tokens=${usage?.totalTokenCount || 0} (in=${usage?.promptTokenCount || 0} out=${usage?.candidatesTokenCount || 0})`);
|
|
509
|
+
return c.json(responseData);
|
|
510
|
+
}
|
|
511
|
+
catch (error) {
|
|
512
|
+
log.error("Generate content error:", error);
|
|
513
|
+
return c.json({
|
|
514
|
+
error: {
|
|
515
|
+
message: error instanceof Error ? error.message : "Internal server error"
|
|
516
|
+
}
|
|
517
|
+
}, 500);
|
|
518
|
+
}
|
|
519
|
+
});
|
|
520
|
+
// =========================================================================
|
|
521
|
+
// Gemini Streaming Generate Content API
|
|
522
|
+
// =========================================================================
|
|
523
|
+
app.post("/v1/models/:model\\:streamGenerateContent", async (c) => {
|
|
524
|
+
try {
|
|
525
|
+
const model = c.req.param("model");
|
|
526
|
+
if (!model) {
|
|
527
|
+
return c.json({
|
|
528
|
+
error: {
|
|
529
|
+
message: "Model parameter is required"
|
|
530
|
+
}
|
|
531
|
+
}, 400);
|
|
532
|
+
}
|
|
533
|
+
const body = await c.req.json();
|
|
534
|
+
const parseResult = CreateGenerateContentRequestSchema.safeParse(body);
|
|
535
|
+
if (!parseResult.success) {
|
|
536
|
+
return c.json({
|
|
537
|
+
error: {
|
|
538
|
+
message: "Invalid request"
|
|
539
|
+
}
|
|
540
|
+
}, 400);
|
|
541
|
+
}
|
|
542
|
+
const request = parseResult.data;
|
|
543
|
+
// Log incoming request
|
|
544
|
+
log.info(`→ POST /v1/models/${model}:streamGenerateContent contents=${request.contents?.length || 0}`);
|
|
545
|
+
const modelConfig = resolveModelConfig(model);
|
|
546
|
+
if (!modelConfig) {
|
|
547
|
+
return c.json({
|
|
548
|
+
error: {
|
|
549
|
+
message: `Model ${model} not configured`
|
|
550
|
+
}
|
|
551
|
+
}, 404);
|
|
552
|
+
}
|
|
553
|
+
const baseUrl = normalizeBaseUrl(modelConfig.adapters?.gemini?.baseUrl || modelConfig.baseUrl || "https://generativelanguage.googleapis.com");
|
|
554
|
+
const upstreamUrl = `${baseUrl}/v1/models/${model}:streamGenerateContent`;
|
|
555
|
+
const upstreamHeaders = buildUpstreamHeaders(modelConfig, "gemini");
|
|
556
|
+
const response = await makeUpstreamRequest(upstreamUrl, request, upstreamHeaders, true);
|
|
557
|
+
// Stream the response directly
|
|
558
|
+
return streamSSE(c, async (stream) => {
|
|
559
|
+
const reader = response.body?.getReader();
|
|
560
|
+
if (!reader) {
|
|
561
|
+
await stream.writeSSE({
|
|
562
|
+
data: "[DONE]"
|
|
563
|
+
});
|
|
564
|
+
return;
|
|
565
|
+
}
|
|
566
|
+
const decoder = new TextDecoder();
|
|
567
|
+
let buffer = "";
|
|
568
|
+
try {
|
|
569
|
+
while (true) {
|
|
570
|
+
const { done, value } = await reader.read();
|
|
571
|
+
if (done)
|
|
572
|
+
break;
|
|
573
|
+
buffer += decoder.decode(value, {
|
|
574
|
+
stream: true
|
|
575
|
+
});
|
|
576
|
+
const lines = buffer.split("\n");
|
|
577
|
+
buffer = lines.pop() || "";
|
|
578
|
+
for (const line of lines) {
|
|
579
|
+
if (!line.trim())
|
|
580
|
+
continue;
|
|
581
|
+
if (line.startsWith("data: ")) {
|
|
582
|
+
const data = line.slice(6);
|
|
583
|
+
if (data === "[DONE]") {
|
|
584
|
+
await stream.writeSSE({
|
|
585
|
+
data: "[DONE]"
|
|
586
|
+
});
|
|
587
|
+
continue;
|
|
588
|
+
}
|
|
589
|
+
await stream.writeSSE({
|
|
590
|
+
data
|
|
591
|
+
});
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
await stream.writeSSE({
|
|
596
|
+
data: "[DONE]"
|
|
597
|
+
});
|
|
598
|
+
}
|
|
599
|
+
finally {
|
|
600
|
+
reader.releaseLock();
|
|
601
|
+
}
|
|
602
|
+
});
|
|
603
|
+
}
|
|
604
|
+
catch (error) {
|
|
605
|
+
log.error("Stream generate content error:", error);
|
|
606
|
+
return c.json({
|
|
607
|
+
error: {
|
|
608
|
+
message: error instanceof Error ? error.message : "Internal server error"
|
|
609
|
+
}
|
|
610
|
+
}, 500);
|
|
611
|
+
}
|
|
612
|
+
});
|
|
613
|
+
// =========================================================================
|
|
614
|
+
// OpenAI Responses API
|
|
615
|
+
// =========================================================================
|
|
616
|
+
app.post("/v1/responses", async (c) => {
|
|
617
|
+
let auditCtx = null;
|
|
618
|
+
try {
|
|
619
|
+
const body = await c.req.json();
|
|
620
|
+
const parseResult = CreateResponseRequestSchema.safeParse(body);
|
|
621
|
+
if (!parseResult.success) {
|
|
622
|
+
return c.json({
|
|
623
|
+
error: {
|
|
624
|
+
message: "Invalid request",
|
|
625
|
+
type: "invalid_request_error",
|
|
626
|
+
param: null,
|
|
627
|
+
code: "invalid_request",
|
|
628
|
+
details: parseResult.error.issues
|
|
629
|
+
}
|
|
630
|
+
}, 400);
|
|
631
|
+
}
|
|
632
|
+
const request = parseResult.data;
|
|
633
|
+
// Handle previous_response_id - load previous context
|
|
634
|
+
let previousContext = null;
|
|
635
|
+
if (request.previous_response_id) {
|
|
636
|
+
try {
|
|
637
|
+
const { isDbInitialized, getEntityManager } = await import("../server/db.js");
|
|
638
|
+
const { ResponseEntity } = await import("../entities/index.js");
|
|
639
|
+
if (isDbInitialized()) {
|
|
640
|
+
const em = getEntityManager().fork();
|
|
641
|
+
const prevResponse = await em.findOne(ResponseEntity, {
|
|
642
|
+
responseId: request.previous_response_id
|
|
643
|
+
});
|
|
644
|
+
if (prevResponse) {
|
|
645
|
+
previousContext = {
|
|
646
|
+
input: prevResponse.input,
|
|
647
|
+
output: prevResponse.output
|
|
648
|
+
};
|
|
649
|
+
log.info(`Loaded previous response: ${request.previous_response_id}`);
|
|
650
|
+
}
|
|
651
|
+
else {
|
|
652
|
+
log.warn(`Previous response not found: ${request.previous_response_id}`);
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
catch (e) {
|
|
657
|
+
log.warn("Failed to load previous response:", e);
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
const modelConfig = resolveModelConfig(request.model);
|
|
661
|
+
if (!modelConfig) {
|
|
662
|
+
return c.json({
|
|
663
|
+
error: {
|
|
664
|
+
message: `Model ${request.model} not configured`,
|
|
665
|
+
type: "invalid_request_error",
|
|
666
|
+
param: "model",
|
|
667
|
+
code: "model_not_found"
|
|
668
|
+
}
|
|
669
|
+
}, 404);
|
|
670
|
+
}
|
|
671
|
+
// Determine adapter - for Responses API, we convert to Chat Completions
|
|
672
|
+
const adapter = modelConfig.adapter || "openai";
|
|
673
|
+
const baseUrl = normalizeBaseUrl(modelConfig.adapters?.[adapter]?.baseUrl || modelConfig.baseUrl || "https://api.openai.com");
|
|
674
|
+
// Create audit context
|
|
675
|
+
auditCtx = createAuditContext({
|
|
676
|
+
method: "POST",
|
|
677
|
+
endpoint: "/v1/responses",
|
|
678
|
+
model: request.model,
|
|
679
|
+
inputProtocol: ChatProtocol.OPENAI,
|
|
680
|
+
outputProtocol: ChatProtocol.OPENAI,
|
|
681
|
+
streaming: request.stream || false,
|
|
682
|
+
clientIp: extractClientIp(c),
|
|
683
|
+
userAgent: c.req.header("user-agent"),
|
|
684
|
+
requestMeta: {
|
|
685
|
+
temperature: request.temperature,
|
|
686
|
+
max_output_tokens: request.max_output_tokens
|
|
687
|
+
}
|
|
688
|
+
});
|
|
689
|
+
// Log incoming request
|
|
690
|
+
const inputType = typeof request.input === "string" ? "string" : Array.isArray(request.input) ? `array[${request.input.length}]` : "object";
|
|
691
|
+
log.info(`→ POST /v1/responses model=${request.model} stream=${request.stream || false} input=${inputType}`);
|
|
692
|
+
// Convert Responses API request to Chat Completions format
|
|
693
|
+
const chatRequest = responsesToChatCompletions(request, previousContext);
|
|
694
|
+
// Build upstream request
|
|
695
|
+
const upstreamUrl = `${baseUrl}/v1/chat/completions`;
|
|
696
|
+
const upstreamHeaders = buildUpstreamHeaders(modelConfig, "openai");
|
|
697
|
+
auditCtx.setProvider({
|
|
698
|
+
provider: adapter,
|
|
699
|
+
upstreamUrl
|
|
700
|
+
});
|
|
701
|
+
// Handle streaming
|
|
702
|
+
if (request.stream) {
|
|
703
|
+
return handleResponsesStreamingRequest(c, upstreamUrl, chatRequest, upstreamHeaders, request.model, auditCtx);
|
|
704
|
+
}
|
|
705
|
+
// Non-streaming request
|
|
706
|
+
const response = await makeUpstreamRequest(upstreamUrl, chatRequest, upstreamHeaders);
|
|
707
|
+
const chatResponse = await response.json();
|
|
708
|
+
// Convert Chat Completions response to Responses format
|
|
709
|
+
const result = chatCompletionsToResponses(chatResponse, request.model);
|
|
710
|
+
// Store response for future previous_response_id lookups
|
|
711
|
+
try {
|
|
712
|
+
const { isDbInitialized, getEntityManager } = await import("../server/db.js");
|
|
713
|
+
const { ResponseEntity } = await import("../entities/index.js");
|
|
714
|
+
if (isDbInitialized()) {
|
|
715
|
+
const em = getEntityManager().fork();
|
|
716
|
+
const responseEntity = new ResponseEntity();
|
|
717
|
+
responseEntity.responseId = result.id;
|
|
718
|
+
responseEntity.model = result.model;
|
|
719
|
+
responseEntity.status = result.status;
|
|
720
|
+
responseEntity.input = request.input;
|
|
721
|
+
responseEntity.output = result.output;
|
|
722
|
+
responseEntity.usage = result.usage;
|
|
723
|
+
responseEntity.instructions = request.instructions ?? undefined;
|
|
724
|
+
responseEntity.previousResponseId = request.previous_response_id ?? undefined;
|
|
725
|
+
responseEntity.tools = request.tools ?? undefined;
|
|
726
|
+
responseEntity.toolChoice = request.tool_choice ?? undefined;
|
|
727
|
+
responseEntity.metadata = request.metadata;
|
|
728
|
+
responseEntity.durationMs = auditCtx?.getDuration();
|
|
729
|
+
em.persist(responseEntity);
|
|
730
|
+
await em.flush();
|
|
731
|
+
log.debug(`Stored response: ${result.id}`);
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
catch (e) {
|
|
735
|
+
log.warn("Failed to store response:", e);
|
|
736
|
+
}
|
|
737
|
+
// Record usage
|
|
738
|
+
if (chatResponse.usage) {
|
|
739
|
+
auditCtx.setTokenUsage(chatResponse.usage.prompt_tokens || 0, chatResponse.usage.completion_tokens || 0);
|
|
740
|
+
}
|
|
741
|
+
auditCtx.setResponseMeta({
|
|
742
|
+
status: result.status,
|
|
743
|
+
output_items: result.output.length
|
|
744
|
+
});
|
|
745
|
+
await auditCtx.complete(200);
|
|
746
|
+
// Log response
|
|
747
|
+
const usage = chatResponse.usage;
|
|
748
|
+
log.info(`← 200 /v1/responses model=${result.model || request.model} status=${result.status} tokens=${usage?.total_tokens || 0}`);
|
|
749
|
+
return c.json(result);
|
|
750
|
+
}
|
|
751
|
+
catch (error) {
|
|
752
|
+
if (auditCtx) {
|
|
753
|
+
await auditCtx.error(error instanceof Error ? error.message : "Unknown error", "internal_error", 500);
|
|
754
|
+
}
|
|
755
|
+
log.error("Responses API error:", error);
|
|
756
|
+
return c.json({
|
|
757
|
+
error: {
|
|
758
|
+
message: error instanceof Error ? error.message : "Internal server error",
|
|
759
|
+
type: "api_error",
|
|
760
|
+
code: "internal_error"
|
|
761
|
+
}
|
|
762
|
+
}, 500);
|
|
763
|
+
}
|
|
764
|
+
});
|
|
765
|
+
/**
|
|
766
|
+
* Convert Responses API request to Chat Completions format
|
|
767
|
+
*/ function responsesToChatCompletions(request, previousContext) {
|
|
768
|
+
const messages = [];
|
|
769
|
+
// Add system instruction if present
|
|
770
|
+
if (request.instructions) {
|
|
771
|
+
messages.push({
|
|
772
|
+
role: "system",
|
|
773
|
+
content: request.instructions
|
|
774
|
+
});
|
|
775
|
+
}
|
|
776
|
+
// Add previous context if available (previous_response_id)
|
|
777
|
+
if (previousContext) {
|
|
778
|
+
// Add previous input
|
|
779
|
+
const prevInput = previousContext.input;
|
|
780
|
+
if (typeof prevInput === "string") {
|
|
781
|
+
messages.push({
|
|
782
|
+
role: "user",
|
|
783
|
+
content: prevInput
|
|
784
|
+
});
|
|
785
|
+
}
|
|
786
|
+
else if (Array.isArray(prevInput)) {
|
|
787
|
+
for (const item of prevInput) {
|
|
788
|
+
if (item.type === "message") {
|
|
789
|
+
messages.push({
|
|
790
|
+
role: item.role,
|
|
791
|
+
content: typeof item.content === "string" ? item.content : JSON.stringify(item.content)
|
|
792
|
+
});
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
// Add previous output as assistant messages
|
|
797
|
+
for (const item of previousContext.output) {
|
|
798
|
+
if (item.type === "message" && item.role === "assistant") {
|
|
799
|
+
const textContent = item.content?.find((c) => c.type === "text" || c.type === "output_text");
|
|
800
|
+
if (textContent) {
|
|
801
|
+
messages.push({
|
|
802
|
+
role: "assistant",
|
|
803
|
+
content: textContent.text
|
|
804
|
+
});
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
// Convert current input to messages
|
|
810
|
+
if (typeof request.input === "string") {
|
|
811
|
+
messages.push({
|
|
812
|
+
role: "user",
|
|
813
|
+
content: request.input
|
|
814
|
+
});
|
|
815
|
+
}
|
|
816
|
+
else if (Array.isArray(request.input)) {
|
|
817
|
+
for (const item of request.input) {
|
|
818
|
+
if (item.type === "message") {
|
|
819
|
+
messages.push({
|
|
820
|
+
role: item.role,
|
|
821
|
+
content: typeof item.content === "string" ? item.content : JSON.stringify(item.content)
|
|
822
|
+
});
|
|
823
|
+
}
|
|
824
|
+
// item_reference is handled differently - would need to fetch the referenced item
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
return {
|
|
828
|
+
model: request.model,
|
|
829
|
+
messages,
|
|
830
|
+
temperature: request.temperature,
|
|
831
|
+
top_p: request.top_p,
|
|
832
|
+
max_tokens: request.max_output_tokens,
|
|
833
|
+
stream: request.stream,
|
|
834
|
+
tools: request.tools,
|
|
835
|
+
tool_choice: request.tool_choice,
|
|
836
|
+
parallel_tool_calls: request.parallel_tool_calls,
|
|
837
|
+
metadata: request.metadata,
|
|
838
|
+
store: request.store,
|
|
839
|
+
user: request.user
|
|
840
|
+
};
|
|
841
|
+
}
|
|
842
|
+
/**
|
|
843
|
+
* Convert Chat Completions response to Responses format
|
|
844
|
+
*/ function chatCompletionsToResponses(chatResponse, model) {
|
|
845
|
+
const responseId = `resp_${chatResponse.id || Date.now()}`;
|
|
846
|
+
const output = [];
|
|
847
|
+
for (const choice of chatResponse.choices || []) {
|
|
848
|
+
const message = choice.message;
|
|
849
|
+
if (message) {
|
|
850
|
+
output.push({
|
|
851
|
+
id: `item_${responseId}_${choice.index}`,
|
|
852
|
+
type: "message",
|
|
853
|
+
role: "assistant",
|
|
854
|
+
content: message.content ? [
|
|
855
|
+
{
|
|
856
|
+
type: "text",
|
|
857
|
+
text: message.content
|
|
858
|
+
}
|
|
859
|
+
] : [],
|
|
860
|
+
status: "completed"
|
|
861
|
+
});
|
|
862
|
+
// Handle tool calls
|
|
863
|
+
if (message.tool_calls) {
|
|
864
|
+
for (const toolCall of message.tool_calls) {
|
|
865
|
+
output.push({
|
|
866
|
+
id: toolCall.id,
|
|
867
|
+
type: "function_call",
|
|
868
|
+
name: toolCall.function?.name,
|
|
869
|
+
arguments: toolCall.function?.arguments,
|
|
870
|
+
status: "completed"
|
|
871
|
+
});
|
|
872
|
+
}
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
}
|
|
876
|
+
return {
|
|
877
|
+
id: responseId,
|
|
878
|
+
object: "response",
|
|
879
|
+
created_at: chatResponse.created || Math.floor(Date.now() / 1000),
|
|
880
|
+
model: chatResponse.model || model,
|
|
881
|
+
status: "completed",
|
|
882
|
+
output,
|
|
883
|
+
usage: chatResponse.usage,
|
|
884
|
+
metadata: {},
|
|
885
|
+
error: null
|
|
886
|
+
};
|
|
887
|
+
}
|
|
888
|
+
/**
|
|
889
|
+
* Handle Responses API streaming
|
|
890
|
+
*/ async function handleResponsesStreamingRequest(c, upstreamUrl, upstreamBody, upstreamHeaders, model, auditCtx) {
|
|
891
|
+
const response = await makeUpstreamRequest(upstreamUrl, {
|
|
892
|
+
...upstreamBody,
|
|
893
|
+
stream: true
|
|
894
|
+
}, upstreamHeaders, true);
|
|
895
|
+
let firstTokenRecorded = false;
|
|
896
|
+
const responseId = `resp_${Date.now()}`;
|
|
897
|
+
return streamSSE(c, async (stream) => {
|
|
898
|
+
const reader = response.body?.getReader();
|
|
899
|
+
if (!reader) {
|
|
900
|
+
await stream.writeSSE({
|
|
901
|
+
event: "response.done",
|
|
902
|
+
data: JSON.stringify({
|
|
903
|
+
type: "response.done"
|
|
904
|
+
})
|
|
905
|
+
});
|
|
906
|
+
if (auditCtx)
|
|
907
|
+
await auditCtx.complete(200);
|
|
908
|
+
return;
|
|
909
|
+
}
|
|
910
|
+
// Send initial event
|
|
911
|
+
await stream.writeSSE({
|
|
912
|
+
event: "response.created",
|
|
913
|
+
data: JSON.stringify({
|
|
914
|
+
type: "response.created",
|
|
915
|
+
response: {
|
|
916
|
+
id: responseId,
|
|
917
|
+
object: "response",
|
|
918
|
+
created_at: Math.floor(Date.now() / 1000),
|
|
919
|
+
model,
|
|
920
|
+
status: "in_progress",
|
|
921
|
+
output: []
|
|
922
|
+
}
|
|
923
|
+
})
|
|
924
|
+
});
|
|
925
|
+
const decoder = new TextDecoder();
|
|
926
|
+
let buffer = "";
|
|
927
|
+
let outputItemId = `item_${responseId}_0`;
|
|
928
|
+
try {
|
|
929
|
+
while (true) {
|
|
930
|
+
const { done, value } = await reader.read();
|
|
931
|
+
if (done)
|
|
932
|
+
break;
|
|
933
|
+
buffer += decoder.decode(value, {
|
|
934
|
+
stream: true
|
|
935
|
+
});
|
|
936
|
+
const lines = buffer.split("\n");
|
|
937
|
+
buffer = lines.pop() || "";
|
|
938
|
+
for (const line of lines) {
|
|
939
|
+
if (!line.trim() || !line.startsWith("data: "))
|
|
940
|
+
continue;
|
|
941
|
+
const data = line.slice(6);
|
|
942
|
+
if (data === "[DONE]")
|
|
943
|
+
continue;
|
|
944
|
+
try {
|
|
945
|
+
const parsed = JSON.parse(data);
|
|
946
|
+
const delta = parsed.choices?.[0]?.delta;
|
|
947
|
+
if (delta?.content) {
|
|
948
|
+
if (!firstTokenRecorded && auditCtx) {
|
|
949
|
+
auditCtx.recordFirstToken();
|
|
950
|
+
firstTokenRecorded = true;
|
|
951
|
+
}
|
|
952
|
+
await stream.writeSSE({
|
|
953
|
+
event: "response.output_text.delta",
|
|
954
|
+
data: JSON.stringify({
|
|
955
|
+
type: "response.output_text.delta",
|
|
956
|
+
output_index: 0,
|
|
957
|
+
content_index: 0,
|
|
958
|
+
delta: delta.content
|
|
959
|
+
})
|
|
960
|
+
});
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
catch {
|
|
964
|
+
// Skip invalid JSON
|
|
965
|
+
}
|
|
966
|
+
}
|
|
967
|
+
}
|
|
968
|
+
// Send completion events
|
|
969
|
+
await stream.writeSSE({
|
|
970
|
+
event: "response.output_item.done",
|
|
971
|
+
data: JSON.stringify({
|
|
972
|
+
type: "response.output_item.done",
|
|
973
|
+
output_index: 0,
|
|
974
|
+
item: {
|
|
975
|
+
id: outputItemId,
|
|
976
|
+
type: "message",
|
|
977
|
+
role: "assistant",
|
|
978
|
+
status: "completed"
|
|
979
|
+
}
|
|
980
|
+
})
|
|
981
|
+
});
|
|
982
|
+
await stream.writeSSE({
|
|
983
|
+
event: "response.done",
|
|
984
|
+
data: JSON.stringify({
|
|
985
|
+
type: "response.done",
|
|
986
|
+
response: {
|
|
987
|
+
id: responseId,
|
|
988
|
+
status: "completed"
|
|
989
|
+
}
|
|
990
|
+
})
|
|
991
|
+
});
|
|
992
|
+
if (auditCtx)
|
|
993
|
+
await auditCtx.complete(200);
|
|
994
|
+
}
|
|
995
|
+
catch (err) {
|
|
996
|
+
if (auditCtx) {
|
|
997
|
+
await auditCtx.error(err instanceof Error ? err.message : "Streaming error", "streaming_error", 500);
|
|
998
|
+
}
|
|
999
|
+
throw err;
|
|
1000
|
+
}
|
|
1001
|
+
finally {
|
|
1002
|
+
reader.releaseLock();
|
|
1003
|
+
}
|
|
1004
|
+
});
|
|
1005
|
+
}
|
|
1006
|
+
// =========================================================================
|
|
1007
|
+
// Models endpoint
|
|
1008
|
+
// =========================================================================
|
|
1009
|
+
app.get("/v1/models", async (c) => {
|
|
1010
|
+
const fetchUpstream = c.req.query("fetch") === "true";
|
|
1011
|
+
const allModels = [];
|
|
1012
|
+
// Add configured models
|
|
1013
|
+
for (const m of config.models || []) {
|
|
1014
|
+
allModels.push({
|
|
1015
|
+
id: m.name,
|
|
1016
|
+
object: "model",
|
|
1017
|
+
created: Math.floor(Date.now() / 1000),
|
|
1018
|
+
owned_by: "mcps",
|
|
1019
|
+
context_window: m.contextWindow,
|
|
1020
|
+
max_input_tokens: m.maxInputTokens,
|
|
1021
|
+
max_output_tokens: m.maxOutputTokens
|
|
1022
|
+
});
|
|
1023
|
+
// Fetch upstream models if enabled
|
|
1024
|
+
if (fetchUpstream && m.fetchUpstreamModels && m.baseUrl) {
|
|
1025
|
+
try {
|
|
1026
|
+
const headers = {
|
|
1027
|
+
"Content-Type": "application/json"
|
|
1028
|
+
};
|
|
1029
|
+
if (m.apiKey) {
|
|
1030
|
+
headers.Authorization = `Bearer ${m.apiKey}`;
|
|
1031
|
+
}
|
|
1032
|
+
Object.assign(headers, m.headers || {});
|
|
1033
|
+
const normalizedUrl = normalizeBaseUrl(m.baseUrl);
|
|
1034
|
+
const upstreamUrl = `${normalizedUrl}/v1/models`;
|
|
1035
|
+
const res = await fetch(upstreamUrl, {
|
|
1036
|
+
headers
|
|
1037
|
+
});
|
|
1038
|
+
if (res.ok) {
|
|
1039
|
+
const data = await res.json();
|
|
1040
|
+
if (data.data && Array.isArray(data.data)) {
|
|
1041
|
+
for (const model of data.data) {
|
|
1042
|
+
// Avoid duplicates
|
|
1043
|
+
if (!allModels.some((existing) => existing.id === model.id)) {
|
|
1044
|
+
allModels.push({
|
|
1045
|
+
id: model.id,
|
|
1046
|
+
object: model.object || "model",
|
|
1047
|
+
created: model.created || Math.floor(Date.now() / 1000),
|
|
1048
|
+
owned_by: model.owned_by || m.name.split("/")[0] || "upstream"
|
|
1049
|
+
});
|
|
1050
|
+
}
|
|
1051
|
+
}
|
|
1052
|
+
}
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
catch (e) {
|
|
1056
|
+
log.warn(`Failed to fetch upstream models from ${m.baseUrl}: ${e}`);
|
|
1057
|
+
}
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
log.debug(`→ GET /v1/models count=${allModels.length} fetch=${fetchUpstream}`);
|
|
1061
|
+
return c.json({
|
|
1062
|
+
object: "list",
|
|
1063
|
+
data: allModels
|
|
1064
|
+
});
|
|
1065
|
+
});
|
|
1066
|
+
return app;
|
|
1067
|
+
}
|
|
1068
|
+
//# sourceMappingURL=handler.js.map
|