@wener/mcps 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/LICENSE +21 -0
  2. package/dist/index.mjs +15 -0
  3. package/dist/mcps-cli.mjs +174727 -0
  4. package/lib/chat/agent.js +187 -0
  5. package/lib/chat/agent.js.map +1 -0
  6. package/lib/chat/audit.js +238 -0
  7. package/lib/chat/audit.js.map +1 -0
  8. package/lib/chat/converters.js +467 -0
  9. package/lib/chat/converters.js.map +1 -0
  10. package/lib/chat/handler.js +1068 -0
  11. package/lib/chat/handler.js.map +1 -0
  12. package/lib/chat/index.js +12 -0
  13. package/lib/chat/index.js.map +1 -0
  14. package/lib/chat/types.js +35 -0
  15. package/lib/chat/types.js.map +1 -0
  16. package/lib/contracts/AuditContract.js +85 -0
  17. package/lib/contracts/AuditContract.js.map +1 -0
  18. package/lib/contracts/McpsContract.js +113 -0
  19. package/lib/contracts/McpsContract.js.map +1 -0
  20. package/lib/contracts/index.js +3 -0
  21. package/lib/contracts/index.js.map +1 -0
  22. package/lib/dev.server.js +7 -0
  23. package/lib/dev.server.js.map +1 -0
  24. package/lib/entities/ChatRequestEntity.js +318 -0
  25. package/lib/entities/ChatRequestEntity.js.map +1 -0
  26. package/lib/entities/McpRequestEntity.js +271 -0
  27. package/lib/entities/McpRequestEntity.js.map +1 -0
  28. package/lib/entities/RequestLogEntity.js +177 -0
  29. package/lib/entities/RequestLogEntity.js.map +1 -0
  30. package/lib/entities/ResponseEntity.js +150 -0
  31. package/lib/entities/ResponseEntity.js.map +1 -0
  32. package/lib/entities/index.js +11 -0
  33. package/lib/entities/index.js.map +1 -0
  34. package/lib/entities/types.js +11 -0
  35. package/lib/entities/types.js.map +1 -0
  36. package/lib/index.js +3 -0
  37. package/lib/index.js.map +1 -0
  38. package/lib/mcps-cli.js +44 -0
  39. package/lib/mcps-cli.js.map +1 -0
  40. package/lib/providers/McpServerHandlerDef.js +40 -0
  41. package/lib/providers/McpServerHandlerDef.js.map +1 -0
  42. package/lib/providers/findMcpServerDef.js +26 -0
  43. package/lib/providers/findMcpServerDef.js.map +1 -0
  44. package/lib/providers/prometheus/def.js +24 -0
  45. package/lib/providers/prometheus/def.js.map +1 -0
  46. package/lib/providers/prometheus/index.js +2 -0
  47. package/lib/providers/prometheus/index.js.map +1 -0
  48. package/lib/providers/relay/def.js +32 -0
  49. package/lib/providers/relay/def.js.map +1 -0
  50. package/lib/providers/relay/index.js +2 -0
  51. package/lib/providers/relay/index.js.map +1 -0
  52. package/lib/providers/sql/def.js +31 -0
  53. package/lib/providers/sql/def.js.map +1 -0
  54. package/lib/providers/sql/index.js +2 -0
  55. package/lib/providers/sql/index.js.map +1 -0
  56. package/lib/providers/tencent-cls/def.js +44 -0
  57. package/lib/providers/tencent-cls/def.js.map +1 -0
  58. package/lib/providers/tencent-cls/index.js +2 -0
  59. package/lib/providers/tencent-cls/index.js.map +1 -0
  60. package/lib/scripts/bundle.js +90 -0
  61. package/lib/scripts/bundle.js.map +1 -0
  62. package/lib/server/api-routes.js +96 -0
  63. package/lib/server/api-routes.js.map +1 -0
  64. package/lib/server/audit.js +274 -0
  65. package/lib/server/audit.js.map +1 -0
  66. package/lib/server/chat-routes.js +82 -0
  67. package/lib/server/chat-routes.js.map +1 -0
  68. package/lib/server/config.js +223 -0
  69. package/lib/server/config.js.map +1 -0
  70. package/lib/server/db.js +97 -0
  71. package/lib/server/db.js.map +1 -0
  72. package/lib/server/index.js +2 -0
  73. package/lib/server/index.js.map +1 -0
  74. package/lib/server/mcp-handler.js +167 -0
  75. package/lib/server/mcp-handler.js.map +1 -0
  76. package/lib/server/mcp-routes.js +112 -0
  77. package/lib/server/mcp-routes.js.map +1 -0
  78. package/lib/server/mcps-router.js +119 -0
  79. package/lib/server/mcps-router.js.map +1 -0
  80. package/lib/server/schema.js +129 -0
  81. package/lib/server/schema.js.map +1 -0
  82. package/lib/server/server.js +166 -0
  83. package/lib/server/server.js.map +1 -0
  84. package/lib/web/ChatPage.js +827 -0
  85. package/lib/web/ChatPage.js.map +1 -0
  86. package/lib/web/McpInspectorPage.js +214 -0
  87. package/lib/web/McpInspectorPage.js.map +1 -0
  88. package/lib/web/ServersPage.js +93 -0
  89. package/lib/web/ServersPage.js.map +1 -0
  90. package/lib/web/main.js +541 -0
  91. package/lib/web/main.js.map +1 -0
  92. package/package.json +83 -0
  93. package/src/chat/agent.ts +240 -0
  94. package/src/chat/audit.ts +377 -0
  95. package/src/chat/converters.test.ts +325 -0
  96. package/src/chat/converters.ts +459 -0
  97. package/src/chat/handler.test.ts +137 -0
  98. package/src/chat/handler.ts +1233 -0
  99. package/src/chat/index.ts +16 -0
  100. package/src/chat/types.ts +72 -0
  101. package/src/contracts/AuditContract.ts +93 -0
  102. package/src/contracts/McpsContract.ts +141 -0
  103. package/src/contracts/index.ts +18 -0
  104. package/src/dev.server.ts +7 -0
  105. package/src/entities/ChatRequestEntity.ts +157 -0
  106. package/src/entities/McpRequestEntity.ts +149 -0
  107. package/src/entities/RequestLogEntity.ts +78 -0
  108. package/src/entities/ResponseEntity.ts +75 -0
  109. package/src/entities/index.ts +12 -0
  110. package/src/entities/types.ts +188 -0
  111. package/src/index.ts +1 -0
  112. package/src/mcps-cli.ts +59 -0
  113. package/src/providers/McpServerHandlerDef.ts +105 -0
  114. package/src/providers/findMcpServerDef.ts +31 -0
  115. package/src/providers/prometheus/def.ts +21 -0
  116. package/src/providers/prometheus/index.ts +1 -0
  117. package/src/providers/relay/def.ts +31 -0
  118. package/src/providers/relay/index.ts +1 -0
  119. package/src/providers/relay/relay.test.ts +47 -0
  120. package/src/providers/sql/def.ts +33 -0
  121. package/src/providers/sql/index.ts +1 -0
  122. package/src/providers/tencent-cls/def.ts +38 -0
  123. package/src/providers/tencent-cls/index.ts +1 -0
  124. package/src/scripts/bundle.ts +82 -0
  125. package/src/server/api-routes.ts +98 -0
  126. package/src/server/audit.ts +310 -0
  127. package/src/server/chat-routes.ts +95 -0
  128. package/src/server/config.test.ts +162 -0
  129. package/src/server/config.ts +198 -0
  130. package/src/server/db.ts +115 -0
  131. package/src/server/index.ts +1 -0
  132. package/src/server/mcp-handler.ts +209 -0
  133. package/src/server/mcp-routes.ts +133 -0
  134. package/src/server/mcps-router.ts +133 -0
  135. package/src/server/schema.ts +175 -0
  136. package/src/server/server.ts +163 -0
  137. package/src/web/ChatPage.tsx +1005 -0
  138. package/src/web/McpInspectorPage.tsx +254 -0
  139. package/src/web/ServersPage.tsx +139 -0
  140. package/src/web/main.tsx +600 -0
  141. package/src/web/styles.css +15 -0
@@ -0,0 +1,1068 @@
1
+ /**
2
+ * Chat API Handler
3
+ * Provides unified AI model gateway with protocol conversion
4
+ */ import consola from "consola";
5
+ import { Hono } from "hono";
6
+ import { streamSSE } from "hono/streaming";
7
+ import { ChatProtocol, createAuditContext, extractClientIp } from "./audit.js";
8
+ import { openaiToAnthropicRequest, anthropicToOpenaiResponse, openaiToGeminiRequest, geminiToOpenaiResponse } from "./converters.js";
9
+ import { CreateChatCompletionRequestSchema, CreateResponseRequestSchema } from "@wener/ai/openai";
10
+ import { CreateMessageRequestSchema } from "@wener/ai/anthropic";
11
+ import { CreateGenerateContentRequestSchema } from "@wener/ai/google";
12
+ const log = consola.withTag("chat");
13
+ /**
14
+ * Create chat handler Hono app
15
+ */ export function createChatHandler(options = {}) {
16
+ const app = new Hono();
17
+ const { config = {} } = options;
18
+ /**
19
+ * Resolve model configuration
20
+ */ function resolveModelConfig(modelName) {
21
+ const models = config.models;
22
+ if (!models || models.length === 0)
23
+ return null;
24
+ // First pass: exact match
25
+ for (const modelConfig of models) {
26
+ if (modelConfig.name === modelName) {
27
+ return modelConfig;
28
+ }
29
+ }
30
+ // Second pass: wildcard matches (e.g., "gpt-*" or "claude-*")
31
+ for (const modelConfig of models) {
32
+ const pattern = modelConfig.name;
33
+ if (pattern.includes("*")) {
34
+ const regex = new RegExp(`^${pattern.replace(/\*/g, ".*")}$`);
35
+ if (regex.test(modelName)) {
36
+ return modelConfig;
37
+ }
38
+ }
39
+ }
40
+ return null;
41
+ }
42
+ /**
43
+ * Make request to upstream provider
44
+ */ async function makeUpstreamRequest(url, body, headers, _stream = false) {
45
+ const response = await fetch(url, {
46
+ method: "POST",
47
+ headers: {
48
+ "Content-Type": "application/json",
49
+ ...headers
50
+ },
51
+ body: JSON.stringify(body)
52
+ });
53
+ if (!response.ok) {
54
+ const errorText = await response.text();
55
+ log.error("Upstream error:", response.status, errorText);
56
+ throw new Error(`Upstream error: ${response.status} ${errorText}`);
57
+ }
58
+ return response;
59
+ }
60
+ /**
61
+ * Normalize base URL - strip trailing /v1 if present
62
+ * This allows baseUrl to be specified as either "http://example.com" or "http://example.com/v1"
63
+ */ function normalizeBaseUrl(url) {
64
+ return url.replace(/\/v1\/?$/, "");
65
+ }
66
+ /**
67
+ * Build headers for upstream request
68
+ */ function buildUpstreamHeaders(modelConfig, adapter) {
69
+ const headers = {};
70
+ // Add API key if configured
71
+ if (modelConfig.apiKey) {
72
+ if (adapter === "anthropic") {
73
+ headers["x-api-key"] = modelConfig.apiKey;
74
+ headers["anthropic-version"] = "2023-06-01";
75
+ }
76
+ else {
77
+ headers.Authorization = `Bearer ${modelConfig.apiKey}`;
78
+ }
79
+ }
80
+ // Add custom headers
81
+ if (modelConfig.headers) {
82
+ Object.assign(headers, modelConfig.headers);
83
+ }
84
+ // Add adapter-specific headers
85
+ if (modelConfig.adapters?.[adapter]?.headers) {
86
+ Object.assign(headers, modelConfig.adapters[adapter]?.headers);
87
+ }
88
+ return headers;
89
+ }
90
+ /**
91
+ * Normalize OpenAI request for different providers
92
+ * Handles max_tokens/max_completion_tokens compatibility and thinking parameters
93
+ */ function normalizeOpenAIRequest(request) {
94
+ const normalized = {
95
+ ...request
96
+ };
97
+ // Handle max_tokens vs max_completion_tokens compatibility
98
+ // Some providers only support max_tokens, others prefer max_completion_tokens
99
+ if (request.max_completion_tokens && !request.max_tokens) {
100
+ normalized.max_tokens = request.max_completion_tokens;
101
+ }
102
+ // Handle enable_thinking parameter (used by Qwen, DeepSeek thinking models)
103
+ // Convert to thinking object format if needed
104
+ if (request.enable_thinking !== undefined && !request.thinking) {
105
+ normalized.thinking = {
106
+ type: request.enable_thinking ? "enabled" : "disabled"
107
+ };
108
+ }
109
+ return normalized;
110
+ }
111
+ // =========================================================================
112
+ // OpenAI Chat Completions API
113
+ // =========================================================================
114
+ app.post("/v1/chat/completions", async (c) => {
115
+ // Create audit context early
116
+ let auditCtx = null;
117
+ try {
118
+ const body = await c.req.json();
119
+ const parseResult = CreateChatCompletionRequestSchema.safeParse(body);
120
+ if (!parseResult.success) {
121
+ return c.json({
122
+ error: {
123
+ message: "Invalid request",
124
+ type: "invalid_request_error",
125
+ param: null,
126
+ code: "invalid_request",
127
+ details: parseResult.error.issues
128
+ }
129
+ }, 400);
130
+ }
131
+ const request = parseResult.data;
132
+ const modelConfig = resolveModelConfig(request.model);
133
+ if (!modelConfig) {
134
+ return c.json({
135
+ error: {
136
+ message: `Model ${request.model} not configured`,
137
+ type: "invalid_request_error",
138
+ param: "model",
139
+ code: "model_not_found"
140
+ }
141
+ }, 404);
142
+ }
143
+ // Determine upstream adapter
144
+ const adapter = modelConfig.adapter || "openai";
145
+ const baseUrl = normalizeBaseUrl(modelConfig.adapters?.[adapter]?.baseUrl || modelConfig.baseUrl || "https://api.openai.com");
146
+ // Create audit context
147
+ const outputProtocol = adapter === "anthropic" ? ChatProtocol.ANTHROPIC : adapter === "gemini" ? ChatProtocol.GEMINI : ChatProtocol.OPENAI;
148
+ auditCtx = createAuditContext({
149
+ method: "POST",
150
+ endpoint: "/v1/chat/completions",
151
+ model: request.model,
152
+ inputProtocol: ChatProtocol.OPENAI,
153
+ outputProtocol,
154
+ streaming: request.stream || false,
155
+ clientIp: extractClientIp(c),
156
+ userAgent: c.req.header("user-agent"),
157
+ requestMeta: {
158
+ temperature: request.temperature,
159
+ max_tokens: request.max_tokens || request.max_completion_tokens,
160
+ top_p: request.top_p
161
+ }
162
+ });
163
+ // Log incoming request
164
+ log.info(`→ POST /v1/chat/completions model=${request.model} stream=${request.stream || false} messages=${request.messages.length}`);
165
+ // Build upstream request based on protocol
166
+ let upstreamUrl;
167
+ let upstreamBody;
168
+ let upstreamHeaders;
169
+ switch (adapter) {
170
+ case "anthropic":
171
+ {
172
+ upstreamUrl = `${baseUrl}/v1/messages`;
173
+ upstreamBody = openaiToAnthropicRequest(request);
174
+ upstreamHeaders = buildUpstreamHeaders(modelConfig, "anthropic");
175
+ break;
176
+ }
177
+ case "gemini":
178
+ {
179
+ const method = request.stream ? "streamGenerateContent" : "generateContent";
180
+ upstreamUrl = `${baseUrl}/v1/models/${request.model}:${method}`;
181
+ upstreamBody = openaiToGeminiRequest(request);
182
+ upstreamHeaders = buildUpstreamHeaders(modelConfig, "gemini");
183
+ break;
184
+ }
185
+ default:
186
+ {
187
+ // OpenAI adapter - passthrough with parameter normalization
188
+ upstreamUrl = `${baseUrl}/v1/chat/completions`;
189
+ upstreamBody = normalizeOpenAIRequest(request);
190
+ upstreamHeaders = buildUpstreamHeaders(modelConfig, "openai");
191
+ }
192
+ }
193
+ // Set provider info in audit context
194
+ auditCtx.setProvider({
195
+ provider: adapter,
196
+ upstreamUrl
197
+ });
198
+ // Handle streaming
199
+ if (request.stream) {
200
+ return handleStreamingRequest(c, upstreamUrl, upstreamBody, upstreamHeaders, adapter, request.model, auditCtx);
201
+ }
202
+ // Non-streaming request
203
+ const response = await makeUpstreamRequest(upstreamUrl, upstreamBody, upstreamHeaders);
204
+ const responseData = await response.json();
205
+ // Convert response if needed
206
+ let result;
207
+ switch (adapter) {
208
+ case "anthropic":
209
+ result = anthropicToOpenaiResponse(responseData, request.model);
210
+ break;
211
+ case "gemini":
212
+ result = geminiToOpenaiResponse(responseData, request.model);
213
+ break;
214
+ default:
215
+ result = responseData;
216
+ }
217
+ // Record token usage and complete audit
218
+ if (result.usage) {
219
+ auditCtx.setTokenUsage(result.usage.prompt_tokens || 0, result.usage.completion_tokens || 0);
220
+ }
221
+ auditCtx.setResponseMeta({
222
+ finish_reason: result.choices?.[0]?.finish_reason,
223
+ model: result.model
224
+ });
225
+ await auditCtx.complete(200);
226
+ // Log response
227
+ const usage = result.usage;
228
+ log.info(`← 200 /v1/chat/completions model=${result.model || request.model} tokens=${usage?.total_tokens || 0} (in=${usage?.prompt_tokens || 0} out=${usage?.completion_tokens || 0})`);
229
+ return c.json(result);
230
+ }
231
+ catch (error) {
232
+ // Record error in audit
233
+ if (auditCtx) {
234
+ await auditCtx.error(error instanceof Error ? error.message : "Unknown error", "internal_error", 500);
235
+ }
236
+ log.error("Chat completion error:", error);
237
+ return c.json({
238
+ error: {
239
+ message: error instanceof Error ? error.message : "Internal server error",
240
+ type: "api_error",
241
+ code: "internal_error"
242
+ }
243
+ }, 500);
244
+ }
245
+ });
246
+ /**
247
+ * Handle streaming request
248
+ */ async function handleStreamingRequest(c, upstreamUrl, upstreamBody, upstreamHeaders, adapter, model, auditCtx) {
249
+ const response = await makeUpstreamRequest(upstreamUrl, upstreamBody, upstreamHeaders, true);
250
+ let firstTokenRecorded = false;
251
+ let totalOutputTokens = 0;
252
+ // For streaming, we need to convert events on the fly
253
+ return streamSSE(c, async (stream) => {
254
+ const reader = response.body?.getReader();
255
+ if (!reader) {
256
+ await stream.writeSSE({
257
+ data: "[DONE]"
258
+ });
259
+ if (auditCtx)
260
+ await auditCtx.complete(200);
261
+ return;
262
+ }
263
+ const decoder = new TextDecoder();
264
+ let buffer = "";
265
+ try {
266
+ while (true) {
267
+ const { done, value } = await reader.read();
268
+ if (done)
269
+ break;
270
+ buffer += decoder.decode(value, {
271
+ stream: true
272
+ });
273
+ const lines = buffer.split("\n");
274
+ buffer = lines.pop() || "";
275
+ for (const line of lines) {
276
+ if (!line.trim())
277
+ continue;
278
+ if (line.startsWith("data: ")) {
279
+ const data = line.slice(6);
280
+ if (data === "[DONE]") {
281
+ await stream.writeSSE({
282
+ data: "[DONE]"
283
+ });
284
+ continue;
285
+ }
286
+ try {
287
+ const parsed = JSON.parse(data);
288
+ const converted = convertStreamEvent(parsed, adapter, model);
289
+ if (converted) {
290
+ // Record first token for TTFT
291
+ if (!firstTokenRecorded && auditCtx) {
292
+ auditCtx.recordFirstToken();
293
+ firstTokenRecorded = true;
294
+ }
295
+ // Track usage from stream events
296
+ if (converted.usage) {
297
+ totalOutputTokens = converted.usage.completion_tokens || totalOutputTokens;
298
+ }
299
+ await stream.writeSSE({
300
+ data: JSON.stringify(converted)
301
+ });
302
+ }
303
+ }
304
+ catch {
305
+ // Skip invalid JSON
306
+ }
307
+ }
308
+ else if (line.startsWith("event: ")) { }
309
+ }
310
+ }
311
+ // Send done signal
312
+ await stream.writeSSE({
313
+ data: "[DONE]"
314
+ });
315
+ // Complete audit
316
+ if (auditCtx) {
317
+ auditCtx.setTokenUsage(0, totalOutputTokens); // Input tokens not available in streaming
318
+ await auditCtx.complete(200);
319
+ }
320
+ }
321
+ catch (err) {
322
+ // Record streaming error
323
+ if (auditCtx) {
324
+ await auditCtx.error(err instanceof Error ? err.message : "Streaming error", "streaming_error", 500);
325
+ }
326
+ throw err;
327
+ }
328
+ finally {
329
+ reader.releaseLock();
330
+ }
331
+ });
332
+ }
333
+ /**
334
+ * Convert stream event to OpenAI format
335
+ */ function convertStreamEvent(event, adapter, model) {
336
+ switch (adapter) {
337
+ case "anthropic":
338
+ return convertAnthropicStreamEvent(event, model);
339
+ case "gemini":
340
+ return convertGeminiStreamEvent(event, model);
341
+ default:
342
+ return event;
343
+ }
344
+ }
345
+ /**
346
+ * Convert Anthropic stream event to OpenAI format
347
+ */ function convertAnthropicStreamEvent(event, model) {
348
+ if (event.type === "content_block_delta") {
349
+ if (event.delta?.type === "text_delta") {
350
+ return {
351
+ id: `chatcmpl-${Date.now()}`,
352
+ object: "chat.completion.chunk",
353
+ created: Math.floor(Date.now() / 1000),
354
+ model,
355
+ choices: [
356
+ {
357
+ index: 0,
358
+ delta: {
359
+ content: event.delta.text
360
+ },
361
+ finish_reason: null
362
+ }
363
+ ]
364
+ };
365
+ }
366
+ }
367
+ else if (event.type === "message_delta") {
368
+ const finishReason = event.delta?.stop_reason === "end_turn" ? "stop" : event.delta?.stop_reason === "tool_use" ? "tool_calls" : null;
369
+ return {
370
+ id: `chatcmpl-${Date.now()}`,
371
+ object: "chat.completion.chunk",
372
+ created: Math.floor(Date.now() / 1000),
373
+ model,
374
+ choices: [
375
+ {
376
+ index: 0,
377
+ delta: {},
378
+ finish_reason: finishReason
379
+ }
380
+ ],
381
+ usage: event.usage ? {
382
+ prompt_tokens: 0,
383
+ completion_tokens: event.usage.output_tokens,
384
+ total_tokens: event.usage.output_tokens
385
+ } : undefined
386
+ };
387
+ }
388
+ return null;
389
+ }
390
+ /**
391
+ * Convert Gemini stream event to OpenAI format
392
+ */ function convertGeminiStreamEvent(event, model) {
393
+ if (event.candidates?.[0]?.content?.parts) {
394
+ const parts = event.candidates[0].content.parts;
395
+ const textParts = parts.filter((p) => p.text);
396
+ if (textParts.length > 0) {
397
+ return {
398
+ id: `chatcmpl-${Date.now()}`,
399
+ object: "chat.completion.chunk",
400
+ created: Math.floor(Date.now() / 1000),
401
+ model,
402
+ choices: [
403
+ {
404
+ index: 0,
405
+ delta: {
406
+ content: textParts.map((p) => p.text).join("")
407
+ },
408
+ finish_reason: event.candidates[0].finishReason === "STOP" ? "stop" : null
409
+ }
410
+ ]
411
+ };
412
+ }
413
+ }
414
+ return null;
415
+ }
416
+ // =========================================================================
417
+ // Anthropic Messages API
418
+ // =========================================================================
419
+ app.post("/v1/messages", async (c) => {
420
+ try {
421
+ const body = await c.req.json();
422
+ const parseResult = CreateMessageRequestSchema.safeParse(body);
423
+ if (!parseResult.success) {
424
+ return c.json({
425
+ type: "error",
426
+ error: {
427
+ type: "invalid_request_error",
428
+ message: "Invalid request"
429
+ }
430
+ }, 400);
431
+ }
432
+ const request = parseResult.data;
433
+ const modelConfig = resolveModelConfig(request.model);
434
+ if (!modelConfig) {
435
+ return c.json({
436
+ type: "error",
437
+ error: {
438
+ type: "invalid_request_error",
439
+ message: `Model ${request.model} not configured`
440
+ }
441
+ }, 404);
442
+ }
443
+ // For Anthropic endpoint, we pass through to Anthropic or convert from OpenAI
444
+ const adapter = modelConfig.adapter || "anthropic";
445
+ const baseUrl = normalizeBaseUrl(modelConfig.adapters?.[adapter]?.baseUrl || modelConfig.baseUrl || "https://api.anthropic.com");
446
+ const upstreamUrl = `${baseUrl}/v1/messages`;
447
+ const upstreamHeaders = buildUpstreamHeaders(modelConfig, "anthropic");
448
+ // Log incoming request
449
+ log.info(`→ POST /v1/messages model=${request.model} messages=${request.messages.length}`);
450
+ const response = await makeUpstreamRequest(upstreamUrl, request, upstreamHeaders);
451
+ const responseData = await response.json();
452
+ // Log response
453
+ const usage = responseData.usage;
454
+ log.info(`← 200 /v1/messages model=${responseData.model || request.model} tokens=${(usage?.input_tokens || 0) + (usage?.output_tokens || 0)} (in=${usage?.input_tokens || 0} out=${usage?.output_tokens || 0})`);
455
+ return c.json(responseData);
456
+ }
457
+ catch (error) {
458
+ log.error("Messages error:", error);
459
+ return c.json({
460
+ type: "error",
461
+ error: {
462
+ type: "api_error",
463
+ message: error instanceof Error ? error.message : "Internal server error"
464
+ }
465
+ }, 500);
466
+ }
467
+ });
468
+ // =========================================================================
469
+ // Gemini Generate Content API
470
+ // =========================================================================
471
+ app.post("/v1/models/:model\\:generateContent", async (c) => {
472
+ try {
473
+ const model = c.req.param("model");
474
+ if (!model) {
475
+ return c.json({
476
+ error: {
477
+ message: "Model parameter is required"
478
+ }
479
+ }, 400);
480
+ }
481
+ const body = await c.req.json();
482
+ const parseResult = CreateGenerateContentRequestSchema.safeParse(body);
483
+ if (!parseResult.success) {
484
+ return c.json({
485
+ error: {
486
+ message: "Invalid request"
487
+ }
488
+ }, 400);
489
+ }
490
+ const request = parseResult.data;
491
+ const modelConfig = resolveModelConfig(model);
492
+ if (!modelConfig) {
493
+ return c.json({
494
+ error: {
495
+ message: `Model ${model} not configured`
496
+ }
497
+ }, 404);
498
+ }
499
+ const baseUrl = normalizeBaseUrl(modelConfig.adapters?.gemini?.baseUrl || modelConfig.baseUrl || "https://generativelanguage.googleapis.com");
500
+ const upstreamUrl = `${baseUrl}/v1/models/${model}:generateContent`;
501
+ const upstreamHeaders = buildUpstreamHeaders(modelConfig, "gemini");
502
+ // Log incoming request
503
+ log.info(`→ POST /v1/models/${model}:generateContent contents=${request.contents?.length || 0}`);
504
+ const response = await makeUpstreamRequest(upstreamUrl, request, upstreamHeaders);
505
+ const responseData = await response.json();
506
+ // Log response
507
+ const usage = responseData.usageMetadata;
508
+ log.info(`← 200 /v1/models/${model}:generateContent tokens=${usage?.totalTokenCount || 0} (in=${usage?.promptTokenCount || 0} out=${usage?.candidatesTokenCount || 0})`);
509
+ return c.json(responseData);
510
+ }
511
+ catch (error) {
512
+ log.error("Generate content error:", error);
513
+ return c.json({
514
+ error: {
515
+ message: error instanceof Error ? error.message : "Internal server error"
516
+ }
517
+ }, 500);
518
+ }
519
+ });
520
+ // =========================================================================
521
+ // Gemini Streaming Generate Content API
522
+ // =========================================================================
523
+ app.post("/v1/models/:model\\:streamGenerateContent", async (c) => {
524
+ try {
525
+ const model = c.req.param("model");
526
+ if (!model) {
527
+ return c.json({
528
+ error: {
529
+ message: "Model parameter is required"
530
+ }
531
+ }, 400);
532
+ }
533
+ const body = await c.req.json();
534
+ const parseResult = CreateGenerateContentRequestSchema.safeParse(body);
535
+ if (!parseResult.success) {
536
+ return c.json({
537
+ error: {
538
+ message: "Invalid request"
539
+ }
540
+ }, 400);
541
+ }
542
+ const request = parseResult.data;
543
+ // Log incoming request
544
+ log.info(`→ POST /v1/models/${model}:streamGenerateContent contents=${request.contents?.length || 0}`);
545
+ const modelConfig = resolveModelConfig(model);
546
+ if (!modelConfig) {
547
+ return c.json({
548
+ error: {
549
+ message: `Model ${model} not configured`
550
+ }
551
+ }, 404);
552
+ }
553
+ const baseUrl = normalizeBaseUrl(modelConfig.adapters?.gemini?.baseUrl || modelConfig.baseUrl || "https://generativelanguage.googleapis.com");
554
+ const upstreamUrl = `${baseUrl}/v1/models/${model}:streamGenerateContent`;
555
+ const upstreamHeaders = buildUpstreamHeaders(modelConfig, "gemini");
556
+ const response = await makeUpstreamRequest(upstreamUrl, request, upstreamHeaders, true);
557
+ // Stream the response directly
558
+ return streamSSE(c, async (stream) => {
559
+ const reader = response.body?.getReader();
560
+ if (!reader) {
561
+ await stream.writeSSE({
562
+ data: "[DONE]"
563
+ });
564
+ return;
565
+ }
566
+ const decoder = new TextDecoder();
567
+ let buffer = "";
568
+ try {
569
+ while (true) {
570
+ const { done, value } = await reader.read();
571
+ if (done)
572
+ break;
573
+ buffer += decoder.decode(value, {
574
+ stream: true
575
+ });
576
+ const lines = buffer.split("\n");
577
+ buffer = lines.pop() || "";
578
+ for (const line of lines) {
579
+ if (!line.trim())
580
+ continue;
581
+ if (line.startsWith("data: ")) {
582
+ const data = line.slice(6);
583
+ if (data === "[DONE]") {
584
+ await stream.writeSSE({
585
+ data: "[DONE]"
586
+ });
587
+ continue;
588
+ }
589
+ await stream.writeSSE({
590
+ data
591
+ });
592
+ }
593
+ }
594
+ }
595
+ await stream.writeSSE({
596
+ data: "[DONE]"
597
+ });
598
+ }
599
+ finally {
600
+ reader.releaseLock();
601
+ }
602
+ });
603
+ }
604
+ catch (error) {
605
+ log.error("Stream generate content error:", error);
606
+ return c.json({
607
+ error: {
608
+ message: error instanceof Error ? error.message : "Internal server error"
609
+ }
610
+ }, 500);
611
+ }
612
+ });
613
+ // =========================================================================
614
+ // OpenAI Responses API
615
+ // =========================================================================
616
+ app.post("/v1/responses", async (c) => {
617
+ let auditCtx = null;
618
+ try {
619
+ const body = await c.req.json();
620
+ const parseResult = CreateResponseRequestSchema.safeParse(body);
621
+ if (!parseResult.success) {
622
+ return c.json({
623
+ error: {
624
+ message: "Invalid request",
625
+ type: "invalid_request_error",
626
+ param: null,
627
+ code: "invalid_request",
628
+ details: parseResult.error.issues
629
+ }
630
+ }, 400);
631
+ }
632
+ const request = parseResult.data;
633
+ // Handle previous_response_id - load previous context
634
+ let previousContext = null;
635
+ if (request.previous_response_id) {
636
+ try {
637
+ const { isDbInitialized, getEntityManager } = await import("../server/db.js");
638
+ const { ResponseEntity } = await import("../entities/index.js");
639
+ if (isDbInitialized()) {
640
+ const em = getEntityManager().fork();
641
+ const prevResponse = await em.findOne(ResponseEntity, {
642
+ responseId: request.previous_response_id
643
+ });
644
+ if (prevResponse) {
645
+ previousContext = {
646
+ input: prevResponse.input,
647
+ output: prevResponse.output
648
+ };
649
+ log.info(`Loaded previous response: ${request.previous_response_id}`);
650
+ }
651
+ else {
652
+ log.warn(`Previous response not found: ${request.previous_response_id}`);
653
+ }
654
+ }
655
+ }
656
+ catch (e) {
657
+ log.warn("Failed to load previous response:", e);
658
+ }
659
+ }
660
+ const modelConfig = resolveModelConfig(request.model);
661
+ if (!modelConfig) {
662
+ return c.json({
663
+ error: {
664
+ message: `Model ${request.model} not configured`,
665
+ type: "invalid_request_error",
666
+ param: "model",
667
+ code: "model_not_found"
668
+ }
669
+ }, 404);
670
+ }
671
+ // Determine adapter - for Responses API, we convert to Chat Completions
672
+ const adapter = modelConfig.adapter || "openai";
673
+ const baseUrl = normalizeBaseUrl(modelConfig.adapters?.[adapter]?.baseUrl || modelConfig.baseUrl || "https://api.openai.com");
674
+ // Create audit context
675
+ auditCtx = createAuditContext({
676
+ method: "POST",
677
+ endpoint: "/v1/responses",
678
+ model: request.model,
679
+ inputProtocol: ChatProtocol.OPENAI,
680
+ outputProtocol: ChatProtocol.OPENAI,
681
+ streaming: request.stream || false,
682
+ clientIp: extractClientIp(c),
683
+ userAgent: c.req.header("user-agent"),
684
+ requestMeta: {
685
+ temperature: request.temperature,
686
+ max_output_tokens: request.max_output_tokens
687
+ }
688
+ });
689
+ // Log incoming request
690
+ const inputType = typeof request.input === "string" ? "string" : Array.isArray(request.input) ? `array[${request.input.length}]` : "object";
691
+ log.info(`→ POST /v1/responses model=${request.model} stream=${request.stream || false} input=${inputType}`);
692
+ // Convert Responses API request to Chat Completions format
693
+ const chatRequest = responsesToChatCompletions(request, previousContext);
694
+ // Build upstream request
695
+ const upstreamUrl = `${baseUrl}/v1/chat/completions`;
696
+ const upstreamHeaders = buildUpstreamHeaders(modelConfig, "openai");
697
+ auditCtx.setProvider({
698
+ provider: adapter,
699
+ upstreamUrl
700
+ });
701
+ // Handle streaming
702
+ if (request.stream) {
703
+ return handleResponsesStreamingRequest(c, upstreamUrl, chatRequest, upstreamHeaders, request.model, auditCtx);
704
+ }
705
+ // Non-streaming request
706
+ const response = await makeUpstreamRequest(upstreamUrl, chatRequest, upstreamHeaders);
707
+ const chatResponse = await response.json();
708
+ // Convert Chat Completions response to Responses format
709
+ const result = chatCompletionsToResponses(chatResponse, request.model);
710
+ // Store response for future previous_response_id lookups
711
+ try {
712
+ const { isDbInitialized, getEntityManager } = await import("../server/db.js");
713
+ const { ResponseEntity } = await import("../entities/index.js");
714
+ if (isDbInitialized()) {
715
+ const em = getEntityManager().fork();
716
+ const responseEntity = new ResponseEntity();
717
+ responseEntity.responseId = result.id;
718
+ responseEntity.model = result.model;
719
+ responseEntity.status = result.status;
720
+ responseEntity.input = request.input;
721
+ responseEntity.output = result.output;
722
+ responseEntity.usage = result.usage;
723
+ responseEntity.instructions = request.instructions ?? undefined;
724
+ responseEntity.previousResponseId = request.previous_response_id ?? undefined;
725
+ responseEntity.tools = request.tools ?? undefined;
726
+ responseEntity.toolChoice = request.tool_choice ?? undefined;
727
+ responseEntity.metadata = request.metadata;
728
+ responseEntity.durationMs = auditCtx?.getDuration();
729
+ em.persist(responseEntity);
730
+ await em.flush();
731
+ log.debug(`Stored response: ${result.id}`);
732
+ }
733
+ }
734
+ catch (e) {
735
+ log.warn("Failed to store response:", e);
736
+ }
737
+ // Record usage
738
+ if (chatResponse.usage) {
739
+ auditCtx.setTokenUsage(chatResponse.usage.prompt_tokens || 0, chatResponse.usage.completion_tokens || 0);
740
+ }
741
+ auditCtx.setResponseMeta({
742
+ status: result.status,
743
+ output_items: result.output.length
744
+ });
745
+ await auditCtx.complete(200);
746
+ // Log response
747
+ const usage = chatResponse.usage;
748
+ log.info(`← 200 /v1/responses model=${result.model || request.model} status=${result.status} tokens=${usage?.total_tokens || 0}`);
749
+ return c.json(result);
750
+ }
751
+ catch (error) {
752
+ if (auditCtx) {
753
+ await auditCtx.error(error instanceof Error ? error.message : "Unknown error", "internal_error", 500);
754
+ }
755
+ log.error("Responses API error:", error);
756
+ return c.json({
757
+ error: {
758
+ message: error instanceof Error ? error.message : "Internal server error",
759
+ type: "api_error",
760
+ code: "internal_error"
761
+ }
762
+ }, 500);
763
+ }
764
+ });
765
+ /**
766
+ * Convert Responses API request to Chat Completions format
767
+ */ function responsesToChatCompletions(request, previousContext) {
768
+ const messages = [];
769
+ // Add system instruction if present
770
+ if (request.instructions) {
771
+ messages.push({
772
+ role: "system",
773
+ content: request.instructions
774
+ });
775
+ }
776
+ // Add previous context if available (previous_response_id)
777
+ if (previousContext) {
778
+ // Add previous input
779
+ const prevInput = previousContext.input;
780
+ if (typeof prevInput === "string") {
781
+ messages.push({
782
+ role: "user",
783
+ content: prevInput
784
+ });
785
+ }
786
+ else if (Array.isArray(prevInput)) {
787
+ for (const item of prevInput) {
788
+ if (item.type === "message") {
789
+ messages.push({
790
+ role: item.role,
791
+ content: typeof item.content === "string" ? item.content : JSON.stringify(item.content)
792
+ });
793
+ }
794
+ }
795
+ }
796
+ // Add previous output as assistant messages
797
+ for (const item of previousContext.output) {
798
+ if (item.type === "message" && item.role === "assistant") {
799
+ const textContent = item.content?.find((c) => c.type === "text" || c.type === "output_text");
800
+ if (textContent) {
801
+ messages.push({
802
+ role: "assistant",
803
+ content: textContent.text
804
+ });
805
+ }
806
+ }
807
+ }
808
+ }
809
+ // Convert current input to messages
810
+ if (typeof request.input === "string") {
811
+ messages.push({
812
+ role: "user",
813
+ content: request.input
814
+ });
815
+ }
816
+ else if (Array.isArray(request.input)) {
817
+ for (const item of request.input) {
818
+ if (item.type === "message") {
819
+ messages.push({
820
+ role: item.role,
821
+ content: typeof item.content === "string" ? item.content : JSON.stringify(item.content)
822
+ });
823
+ }
824
+ // item_reference is handled differently - would need to fetch the referenced item
825
+ }
826
+ }
827
+ return {
828
+ model: request.model,
829
+ messages,
830
+ temperature: request.temperature,
831
+ top_p: request.top_p,
832
+ max_tokens: request.max_output_tokens,
833
+ stream: request.stream,
834
+ tools: request.tools,
835
+ tool_choice: request.tool_choice,
836
+ parallel_tool_calls: request.parallel_tool_calls,
837
+ metadata: request.metadata,
838
+ store: request.store,
839
+ user: request.user
840
+ };
841
+ }
842
+ /**
843
+ * Convert Chat Completions response to Responses format
844
+ */ function chatCompletionsToResponses(chatResponse, model) {
845
+ const responseId = `resp_${chatResponse.id || Date.now()}`;
846
+ const output = [];
847
+ for (const choice of chatResponse.choices || []) {
848
+ const message = choice.message;
849
+ if (message) {
850
+ output.push({
851
+ id: `item_${responseId}_${choice.index}`,
852
+ type: "message",
853
+ role: "assistant",
854
+ content: message.content ? [
855
+ {
856
+ type: "text",
857
+ text: message.content
858
+ }
859
+ ] : [],
860
+ status: "completed"
861
+ });
862
+ // Handle tool calls
863
+ if (message.tool_calls) {
864
+ for (const toolCall of message.tool_calls) {
865
+ output.push({
866
+ id: toolCall.id,
867
+ type: "function_call",
868
+ name: toolCall.function?.name,
869
+ arguments: toolCall.function?.arguments,
870
+ status: "completed"
871
+ });
872
+ }
873
+ }
874
+ }
875
+ }
876
+ return {
877
+ id: responseId,
878
+ object: "response",
879
+ created_at: chatResponse.created || Math.floor(Date.now() / 1000),
880
+ model: chatResponse.model || model,
881
+ status: "completed",
882
+ output,
883
+ usage: chatResponse.usage,
884
+ metadata: {},
885
+ error: null
886
+ };
887
+ }
888
+ /**
889
+ * Handle Responses API streaming
890
+ */ async function handleResponsesStreamingRequest(c, upstreamUrl, upstreamBody, upstreamHeaders, model, auditCtx) {
891
+ const response = await makeUpstreamRequest(upstreamUrl, {
892
+ ...upstreamBody,
893
+ stream: true
894
+ }, upstreamHeaders, true);
895
+ let firstTokenRecorded = false;
896
+ const responseId = `resp_${Date.now()}`;
897
+ return streamSSE(c, async (stream) => {
898
+ const reader = response.body?.getReader();
899
+ if (!reader) {
900
+ await stream.writeSSE({
901
+ event: "response.done",
902
+ data: JSON.stringify({
903
+ type: "response.done"
904
+ })
905
+ });
906
+ if (auditCtx)
907
+ await auditCtx.complete(200);
908
+ return;
909
+ }
910
+ // Send initial event
911
+ await stream.writeSSE({
912
+ event: "response.created",
913
+ data: JSON.stringify({
914
+ type: "response.created",
915
+ response: {
916
+ id: responseId,
917
+ object: "response",
918
+ created_at: Math.floor(Date.now() / 1000),
919
+ model,
920
+ status: "in_progress",
921
+ output: []
922
+ }
923
+ })
924
+ });
925
+ const decoder = new TextDecoder();
926
+ let buffer = "";
927
+ let outputItemId = `item_${responseId}_0`;
928
+ try {
929
+ while (true) {
930
+ const { done, value } = await reader.read();
931
+ if (done)
932
+ break;
933
+ buffer += decoder.decode(value, {
934
+ stream: true
935
+ });
936
+ const lines = buffer.split("\n");
937
+ buffer = lines.pop() || "";
938
+ for (const line of lines) {
939
+ if (!line.trim() || !line.startsWith("data: "))
940
+ continue;
941
+ const data = line.slice(6);
942
+ if (data === "[DONE]")
943
+ continue;
944
+ try {
945
+ const parsed = JSON.parse(data);
946
+ const delta = parsed.choices?.[0]?.delta;
947
+ if (delta?.content) {
948
+ if (!firstTokenRecorded && auditCtx) {
949
+ auditCtx.recordFirstToken();
950
+ firstTokenRecorded = true;
951
+ }
952
+ await stream.writeSSE({
953
+ event: "response.output_text.delta",
954
+ data: JSON.stringify({
955
+ type: "response.output_text.delta",
956
+ output_index: 0,
957
+ content_index: 0,
958
+ delta: delta.content
959
+ })
960
+ });
961
+ }
962
+ }
963
+ catch {
964
+ // Skip invalid JSON
965
+ }
966
+ }
967
+ }
968
+ // Send completion events
969
+ await stream.writeSSE({
970
+ event: "response.output_item.done",
971
+ data: JSON.stringify({
972
+ type: "response.output_item.done",
973
+ output_index: 0,
974
+ item: {
975
+ id: outputItemId,
976
+ type: "message",
977
+ role: "assistant",
978
+ status: "completed"
979
+ }
980
+ })
981
+ });
982
+ await stream.writeSSE({
983
+ event: "response.done",
984
+ data: JSON.stringify({
985
+ type: "response.done",
986
+ response: {
987
+ id: responseId,
988
+ status: "completed"
989
+ }
990
+ })
991
+ });
992
+ if (auditCtx)
993
+ await auditCtx.complete(200);
994
+ }
995
+ catch (err) {
996
+ if (auditCtx) {
997
+ await auditCtx.error(err instanceof Error ? err.message : "Streaming error", "streaming_error", 500);
998
+ }
999
+ throw err;
1000
+ }
1001
+ finally {
1002
+ reader.releaseLock();
1003
+ }
1004
+ });
1005
+ }
1006
+ // =========================================================================
1007
+ // Models endpoint
1008
+ // =========================================================================
1009
+ app.get("/v1/models", async (c) => {
1010
+ const fetchUpstream = c.req.query("fetch") === "true";
1011
+ const allModels = [];
1012
+ // Add configured models
1013
+ for (const m of config.models || []) {
1014
+ allModels.push({
1015
+ id: m.name,
1016
+ object: "model",
1017
+ created: Math.floor(Date.now() / 1000),
1018
+ owned_by: "mcps",
1019
+ context_window: m.contextWindow,
1020
+ max_input_tokens: m.maxInputTokens,
1021
+ max_output_tokens: m.maxOutputTokens
1022
+ });
1023
+ // Fetch upstream models if enabled
1024
+ if (fetchUpstream && m.fetchUpstreamModels && m.baseUrl) {
1025
+ try {
1026
+ const headers = {
1027
+ "Content-Type": "application/json"
1028
+ };
1029
+ if (m.apiKey) {
1030
+ headers.Authorization = `Bearer ${m.apiKey}`;
1031
+ }
1032
+ Object.assign(headers, m.headers || {});
1033
+ const normalizedUrl = normalizeBaseUrl(m.baseUrl);
1034
+ const upstreamUrl = `${normalizedUrl}/v1/models`;
1035
+ const res = await fetch(upstreamUrl, {
1036
+ headers
1037
+ });
1038
+ if (res.ok) {
1039
+ const data = await res.json();
1040
+ if (data.data && Array.isArray(data.data)) {
1041
+ for (const model of data.data) {
1042
+ // Avoid duplicates
1043
+ if (!allModels.some((existing) => existing.id === model.id)) {
1044
+ allModels.push({
1045
+ id: model.id,
1046
+ object: model.object || "model",
1047
+ created: model.created || Math.floor(Date.now() / 1000),
1048
+ owned_by: model.owned_by || m.name.split("/")[0] || "upstream"
1049
+ });
1050
+ }
1051
+ }
1052
+ }
1053
+ }
1054
+ }
1055
+ catch (e) {
1056
+ log.warn(`Failed to fetch upstream models from ${m.baseUrl}: ${e}`);
1057
+ }
1058
+ }
1059
+ }
1060
+ log.debug(`→ GET /v1/models count=${allModels.length} fetch=${fetchUpstream}`);
1061
+ return c.json({
1062
+ object: "list",
1063
+ data: allModels
1064
+ });
1065
+ });
1066
+ return app;
1067
+ }
1068
+ //# sourceMappingURL=handler.js.map