@wener/mcps 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/LICENSE +21 -0
  2. package/dist/index.mjs +15 -0
  3. package/dist/mcps-cli.mjs +174727 -0
  4. package/lib/chat/agent.js +187 -0
  5. package/lib/chat/agent.js.map +1 -0
  6. package/lib/chat/audit.js +238 -0
  7. package/lib/chat/audit.js.map +1 -0
  8. package/lib/chat/converters.js +467 -0
  9. package/lib/chat/converters.js.map +1 -0
  10. package/lib/chat/handler.js +1068 -0
  11. package/lib/chat/handler.js.map +1 -0
  12. package/lib/chat/index.js +12 -0
  13. package/lib/chat/index.js.map +1 -0
  14. package/lib/chat/types.js +35 -0
  15. package/lib/chat/types.js.map +1 -0
  16. package/lib/contracts/AuditContract.js +85 -0
  17. package/lib/contracts/AuditContract.js.map +1 -0
  18. package/lib/contracts/McpsContract.js +113 -0
  19. package/lib/contracts/McpsContract.js.map +1 -0
  20. package/lib/contracts/index.js +3 -0
  21. package/lib/contracts/index.js.map +1 -0
  22. package/lib/dev.server.js +7 -0
  23. package/lib/dev.server.js.map +1 -0
  24. package/lib/entities/ChatRequestEntity.js +318 -0
  25. package/lib/entities/ChatRequestEntity.js.map +1 -0
  26. package/lib/entities/McpRequestEntity.js +271 -0
  27. package/lib/entities/McpRequestEntity.js.map +1 -0
  28. package/lib/entities/RequestLogEntity.js +177 -0
  29. package/lib/entities/RequestLogEntity.js.map +1 -0
  30. package/lib/entities/ResponseEntity.js +150 -0
  31. package/lib/entities/ResponseEntity.js.map +1 -0
  32. package/lib/entities/index.js +11 -0
  33. package/lib/entities/index.js.map +1 -0
  34. package/lib/entities/types.js +11 -0
  35. package/lib/entities/types.js.map +1 -0
  36. package/lib/index.js +3 -0
  37. package/lib/index.js.map +1 -0
  38. package/lib/mcps-cli.js +44 -0
  39. package/lib/mcps-cli.js.map +1 -0
  40. package/lib/providers/McpServerHandlerDef.js +40 -0
  41. package/lib/providers/McpServerHandlerDef.js.map +1 -0
  42. package/lib/providers/findMcpServerDef.js +26 -0
  43. package/lib/providers/findMcpServerDef.js.map +1 -0
  44. package/lib/providers/prometheus/def.js +24 -0
  45. package/lib/providers/prometheus/def.js.map +1 -0
  46. package/lib/providers/prometheus/index.js +2 -0
  47. package/lib/providers/prometheus/index.js.map +1 -0
  48. package/lib/providers/relay/def.js +32 -0
  49. package/lib/providers/relay/def.js.map +1 -0
  50. package/lib/providers/relay/index.js +2 -0
  51. package/lib/providers/relay/index.js.map +1 -0
  52. package/lib/providers/sql/def.js +31 -0
  53. package/lib/providers/sql/def.js.map +1 -0
  54. package/lib/providers/sql/index.js +2 -0
  55. package/lib/providers/sql/index.js.map +1 -0
  56. package/lib/providers/tencent-cls/def.js +44 -0
  57. package/lib/providers/tencent-cls/def.js.map +1 -0
  58. package/lib/providers/tencent-cls/index.js +2 -0
  59. package/lib/providers/tencent-cls/index.js.map +1 -0
  60. package/lib/scripts/bundle.js +90 -0
  61. package/lib/scripts/bundle.js.map +1 -0
  62. package/lib/server/api-routes.js +96 -0
  63. package/lib/server/api-routes.js.map +1 -0
  64. package/lib/server/audit.js +274 -0
  65. package/lib/server/audit.js.map +1 -0
  66. package/lib/server/chat-routes.js +82 -0
  67. package/lib/server/chat-routes.js.map +1 -0
  68. package/lib/server/config.js +223 -0
  69. package/lib/server/config.js.map +1 -0
  70. package/lib/server/db.js +97 -0
  71. package/lib/server/db.js.map +1 -0
  72. package/lib/server/index.js +2 -0
  73. package/lib/server/index.js.map +1 -0
  74. package/lib/server/mcp-handler.js +167 -0
  75. package/lib/server/mcp-handler.js.map +1 -0
  76. package/lib/server/mcp-routes.js +112 -0
  77. package/lib/server/mcp-routes.js.map +1 -0
  78. package/lib/server/mcps-router.js +119 -0
  79. package/lib/server/mcps-router.js.map +1 -0
  80. package/lib/server/schema.js +129 -0
  81. package/lib/server/schema.js.map +1 -0
  82. package/lib/server/server.js +166 -0
  83. package/lib/server/server.js.map +1 -0
  84. package/lib/web/ChatPage.js +827 -0
  85. package/lib/web/ChatPage.js.map +1 -0
  86. package/lib/web/McpInspectorPage.js +214 -0
  87. package/lib/web/McpInspectorPage.js.map +1 -0
  88. package/lib/web/ServersPage.js +93 -0
  89. package/lib/web/ServersPage.js.map +1 -0
  90. package/lib/web/main.js +541 -0
  91. package/lib/web/main.js.map +1 -0
  92. package/package.json +83 -0
  93. package/src/chat/agent.ts +240 -0
  94. package/src/chat/audit.ts +377 -0
  95. package/src/chat/converters.test.ts +325 -0
  96. package/src/chat/converters.ts +459 -0
  97. package/src/chat/handler.test.ts +137 -0
  98. package/src/chat/handler.ts +1233 -0
  99. package/src/chat/index.ts +16 -0
  100. package/src/chat/types.ts +72 -0
  101. package/src/contracts/AuditContract.ts +93 -0
  102. package/src/contracts/McpsContract.ts +141 -0
  103. package/src/contracts/index.ts +18 -0
  104. package/src/dev.server.ts +7 -0
  105. package/src/entities/ChatRequestEntity.ts +157 -0
  106. package/src/entities/McpRequestEntity.ts +149 -0
  107. package/src/entities/RequestLogEntity.ts +78 -0
  108. package/src/entities/ResponseEntity.ts +75 -0
  109. package/src/entities/index.ts +12 -0
  110. package/src/entities/types.ts +188 -0
  111. package/src/index.ts +1 -0
  112. package/src/mcps-cli.ts +59 -0
  113. package/src/providers/McpServerHandlerDef.ts +105 -0
  114. package/src/providers/findMcpServerDef.ts +31 -0
  115. package/src/providers/prometheus/def.ts +21 -0
  116. package/src/providers/prometheus/index.ts +1 -0
  117. package/src/providers/relay/def.ts +31 -0
  118. package/src/providers/relay/index.ts +1 -0
  119. package/src/providers/relay/relay.test.ts +47 -0
  120. package/src/providers/sql/def.ts +33 -0
  121. package/src/providers/sql/index.ts +1 -0
  122. package/src/providers/tencent-cls/def.ts +38 -0
  123. package/src/providers/tencent-cls/index.ts +1 -0
  124. package/src/scripts/bundle.ts +82 -0
  125. package/src/server/api-routes.ts +98 -0
  126. package/src/server/audit.ts +310 -0
  127. package/src/server/chat-routes.ts +95 -0
  128. package/src/server/config.test.ts +162 -0
  129. package/src/server/config.ts +198 -0
  130. package/src/server/db.ts +115 -0
  131. package/src/server/index.ts +1 -0
  132. package/src/server/mcp-handler.ts +209 -0
  133. package/src/server/mcp-routes.ts +133 -0
  134. package/src/server/mcps-router.ts +133 -0
  135. package/src/server/schema.ts +175 -0
  136. package/src/server/server.ts +163 -0
  137. package/src/web/ChatPage.tsx +1005 -0
  138. package/src/web/McpInspectorPage.tsx +254 -0
  139. package/src/web/ServersPage.tsx +139 -0
  140. package/src/web/main.tsx +600 -0
  141. package/src/web/styles.css +15 -0
@@ -0,0 +1,1233 @@
1
+ /**
2
+ * Chat API Handler
3
+ * Provides unified AI model gateway with protocol conversion
4
+ */
5
+ import consola from 'consola';
6
+ import { Hono } from 'hono';
7
+ import { streamSSE } from 'hono/streaming';
8
+ import type { ChatConfig, ModelConfig } from '../server/schema';
9
+ import { ChatProtocol, createAuditContext, extractClientIp } from './audit';
10
+ import {
11
+ openaiToAnthropicRequest,
12
+ anthropicToOpenaiResponse,
13
+ openaiToGeminiRequest,
14
+ geminiToOpenaiResponse,
15
+ } from './converters';
16
+ import {
17
+ CreateChatCompletionRequestSchema,
18
+ type CreateChatCompletionRequest,
19
+ CreateResponseRequestSchema,
20
+ type CreateResponseRequest,
21
+ } from '@wener/ai/openai';
22
+ import { CreateMessageRequestSchema } from '@wener/ai/anthropic';
23
+ import { CreateGenerateContentRequestSchema } from '@wener/ai/google';
24
+
25
+ const log = consola.withTag('chat');
26
+
27
+ export interface ChatHandlerOptions {
28
+ config?: ChatConfig;
29
+ }
30
+
31
+ /**
32
+ * Create chat handler Hono app
33
+ */
34
+ export function createChatHandler(options: ChatHandlerOptions = {}) {
35
+ const app = new Hono();
36
+ const { config = {} } = options;
37
+
38
+ /**
39
+ * Resolve model configuration
40
+ */
41
+ function resolveModelConfig(modelName: string): ModelConfig | null {
42
+ const models = config.models;
43
+ if (!models || models.length === 0) return null;
44
+
45
+ // First pass: exact match
46
+ for (const modelConfig of models) {
47
+ if (modelConfig.name === modelName) {
48
+ return modelConfig;
49
+ }
50
+ }
51
+
52
+ // Second pass: wildcard matches (e.g., "gpt-*" or "claude-*")
53
+ for (const modelConfig of models) {
54
+ const pattern = modelConfig.name;
55
+ if (pattern.includes('*')) {
56
+ const regex = new RegExp(`^${pattern.replace(/\*/g, '.*')}$`);
57
+ if (regex.test(modelName)) {
58
+ return modelConfig;
59
+ }
60
+ }
61
+ }
62
+
63
+ return null;
64
+ }
65
+
66
+ /**
67
+ * Make request to upstream provider
68
+ */
69
+ async function makeUpstreamRequest(
70
+ url: string,
71
+ body: unknown,
72
+ headers: Record<string, string>,
73
+ _stream: boolean = false,
74
+ ): Promise<Response> {
75
+ const response = await fetch(url, {
76
+ method: 'POST',
77
+ headers: {
78
+ 'Content-Type': 'application/json',
79
+ ...headers,
80
+ },
81
+ body: JSON.stringify(body),
82
+ });
83
+
84
+ if (!response.ok) {
85
+ const errorText = await response.text();
86
+ log.error('Upstream error:', response.status, errorText);
87
+ throw new Error(`Upstream error: ${response.status} ${errorText}`);
88
+ }
89
+
90
+ return response;
91
+ }
92
+
93
+ /**
94
+ * Normalize base URL - strip trailing /v1 if present
95
+ * This allows baseUrl to be specified as either "http://example.com" or "http://example.com/v1"
96
+ */
97
+ function normalizeBaseUrl(url: string): string {
98
+ return url.replace(/\/v1\/?$/, '');
99
+ }
100
+
101
+ /**
102
+ * Build headers for upstream request
103
+ */
104
+ function buildUpstreamHeaders(
105
+ modelConfig: ModelConfig,
106
+ adapter: 'openai' | 'anthropic' | 'gemini',
107
+ ): Record<string, string> {
108
+ const headers: Record<string, string> = {};
109
+
110
+ // Add API key if configured
111
+ if (modelConfig.apiKey) {
112
+ if (adapter === 'anthropic') {
113
+ headers['x-api-key'] = modelConfig.apiKey;
114
+ headers['anthropic-version'] = '2023-06-01';
115
+ } else {
116
+ headers.Authorization = `Bearer ${modelConfig.apiKey}`;
117
+ }
118
+ }
119
+
120
+ // Add custom headers
121
+ if (modelConfig.headers) {
122
+ Object.assign(headers, modelConfig.headers);
123
+ }
124
+
125
+ // Add adapter-specific headers
126
+ if (modelConfig.adapters?.[adapter]?.headers) {
127
+ Object.assign(headers, modelConfig.adapters[adapter]?.headers);
128
+ }
129
+
130
+ return headers;
131
+ }
132
+
133
+ /**
134
+ * Normalize OpenAI request for different providers
135
+ * Handles max_tokens/max_completion_tokens compatibility and thinking parameters
136
+ */
137
+ function normalizeOpenAIRequest(request: CreateChatCompletionRequest): Record<string, unknown> {
138
+ const normalized: Record<string, unknown> = { ...request };
139
+
140
+ // Handle max_tokens vs max_completion_tokens compatibility
141
+ // Some providers only support max_tokens, others prefer max_completion_tokens
142
+ if (request.max_completion_tokens && !request.max_tokens) {
143
+ normalized.max_tokens = request.max_completion_tokens;
144
+ }
145
+
146
+ // Handle enable_thinking parameter (used by Qwen, DeepSeek thinking models)
147
+ // Convert to thinking object format if needed
148
+ if (request.enable_thinking !== undefined && !request.thinking) {
149
+ normalized.thinking = {
150
+ type: request.enable_thinking ? 'enabled' : 'disabled',
151
+ };
152
+ }
153
+
154
+ return normalized;
155
+ }
156
+
157
+ // =========================================================================
158
+ // OpenAI Chat Completions API
159
+ // =========================================================================
160
+
161
+ app.post('/v1/chat/completions', async (c) => {
162
+ // Create audit context early
163
+ let auditCtx: ReturnType<typeof createAuditContext> | null = null;
164
+
165
+ try {
166
+ const body = await c.req.json();
167
+ const parseResult = CreateChatCompletionRequestSchema.safeParse(body);
168
+
169
+ if (!parseResult.success) {
170
+ return c.json(
171
+ {
172
+ error: {
173
+ message: 'Invalid request',
174
+ type: 'invalid_request_error',
175
+ param: null,
176
+ code: 'invalid_request',
177
+ details: parseResult.error.issues,
178
+ },
179
+ },
180
+ 400,
181
+ );
182
+ }
183
+
184
+ const request = parseResult.data;
185
+ const modelConfig = resolveModelConfig(request.model);
186
+
187
+ if (!modelConfig) {
188
+ return c.json(
189
+ {
190
+ error: {
191
+ message: `Model ${request.model} not configured`,
192
+ type: 'invalid_request_error',
193
+ param: 'model',
194
+ code: 'model_not_found',
195
+ },
196
+ },
197
+ 404,
198
+ );
199
+ }
200
+
201
+ // Determine upstream adapter
202
+ const adapter = modelConfig.adapter || 'openai';
203
+ const baseUrl = normalizeBaseUrl(
204
+ modelConfig.adapters?.[adapter]?.baseUrl || modelConfig.baseUrl || 'https://api.openai.com',
205
+ );
206
+
207
+ // Create audit context
208
+ const outputProtocol =
209
+ adapter === 'anthropic'
210
+ ? ChatProtocol.ANTHROPIC
211
+ : adapter === 'gemini'
212
+ ? ChatProtocol.GEMINI
213
+ : ChatProtocol.OPENAI;
214
+
215
+ auditCtx = createAuditContext({
216
+ method: 'POST',
217
+ endpoint: '/v1/chat/completions',
218
+ model: request.model,
219
+ inputProtocol: ChatProtocol.OPENAI,
220
+ outputProtocol,
221
+ streaming: request.stream || false,
222
+ clientIp: extractClientIp(c),
223
+ userAgent: c.req.header('user-agent'),
224
+ requestMeta: {
225
+ temperature: request.temperature,
226
+ max_tokens: request.max_tokens || request.max_completion_tokens,
227
+ top_p: request.top_p,
228
+ },
229
+ });
230
+
231
+ // Log incoming request
232
+ log.info(
233
+ `→ POST /v1/chat/completions model=${request.model} stream=${request.stream || false} messages=${request.messages.length}`,
234
+ );
235
+
236
+ // Build upstream request based on protocol
237
+ let upstreamUrl: string;
238
+ let upstreamBody: unknown;
239
+ let upstreamHeaders: Record<string, string>;
240
+
241
+ switch (adapter) {
242
+ case 'anthropic': {
243
+ upstreamUrl = `${baseUrl}/v1/messages`;
244
+ upstreamBody = openaiToAnthropicRequest(request);
245
+ upstreamHeaders = buildUpstreamHeaders(modelConfig, 'anthropic');
246
+ break;
247
+ }
248
+ case 'gemini': {
249
+ const method = request.stream ? 'streamGenerateContent' : 'generateContent';
250
+ upstreamUrl = `${baseUrl}/v1/models/${request.model}:${method}`;
251
+ upstreamBody = openaiToGeminiRequest(request);
252
+ upstreamHeaders = buildUpstreamHeaders(modelConfig, 'gemini');
253
+ break;
254
+ }
255
+ default: {
256
+ // OpenAI adapter - passthrough with parameter normalization
257
+ upstreamUrl = `${baseUrl}/v1/chat/completions`;
258
+ upstreamBody = normalizeOpenAIRequest(request);
259
+ upstreamHeaders = buildUpstreamHeaders(modelConfig, 'openai');
260
+ }
261
+ }
262
+
263
+ // Set provider info in audit context
264
+ auditCtx.setProvider({
265
+ provider: adapter,
266
+ upstreamUrl,
267
+ });
268
+
269
+ // Handle streaming
270
+ if (request.stream) {
271
+ return handleStreamingRequest(c, upstreamUrl, upstreamBody, upstreamHeaders, adapter, request.model, auditCtx);
272
+ }
273
+
274
+ // Non-streaming request
275
+ const response = await makeUpstreamRequest(upstreamUrl, upstreamBody, upstreamHeaders);
276
+ const responseData = await response.json();
277
+
278
+ // Convert response if needed
279
+ let result: any;
280
+ switch (adapter) {
281
+ case 'anthropic':
282
+ result = anthropicToOpenaiResponse(responseData, request.model);
283
+ break;
284
+ case 'gemini':
285
+ result = geminiToOpenaiResponse(responseData, request.model);
286
+ break;
287
+ default:
288
+ result = responseData;
289
+ }
290
+
291
+ // Record token usage and complete audit
292
+ if (result.usage) {
293
+ auditCtx.setTokenUsage(result.usage.prompt_tokens || 0, result.usage.completion_tokens || 0);
294
+ }
295
+ auditCtx.setResponseMeta({
296
+ finish_reason: result.choices?.[0]?.finish_reason,
297
+ model: result.model,
298
+ });
299
+ await auditCtx.complete(200);
300
+
301
+ // Log response
302
+ const usage = result.usage;
303
+ log.info(
304
+ `← 200 /v1/chat/completions model=${result.model || request.model} tokens=${usage?.total_tokens || 0} (in=${usage?.prompt_tokens || 0} out=${usage?.completion_tokens || 0})`,
305
+ );
306
+
307
+ return c.json(result);
308
+ } catch (error) {
309
+ // Record error in audit
310
+ if (auditCtx) {
311
+ await auditCtx.error(error instanceof Error ? error.message : 'Unknown error', 'internal_error', 500);
312
+ }
313
+ log.error('Chat completion error:', error);
314
+ return c.json(
315
+ {
316
+ error: {
317
+ message: error instanceof Error ? error.message : 'Internal server error',
318
+ type: 'api_error',
319
+ code: 'internal_error',
320
+ },
321
+ },
322
+ 500,
323
+ );
324
+ }
325
+ });
326
+
327
+ /**
328
+ * Handle streaming request
329
+ */
330
+ async function handleStreamingRequest(
331
+ c: any,
332
+ upstreamUrl: string,
333
+ upstreamBody: unknown,
334
+ upstreamHeaders: Record<string, string>,
335
+ adapter: string,
336
+ model: string,
337
+ auditCtx?: ReturnType<typeof createAuditContext>,
338
+ ) {
339
+ const response = await makeUpstreamRequest(upstreamUrl, upstreamBody, upstreamHeaders, true);
340
+
341
+ let firstTokenRecorded = false;
342
+ let totalOutputTokens = 0;
343
+
344
+ // For streaming, we need to convert events on the fly
345
+ return streamSSE(c, async (stream) => {
346
+ const reader = response.body?.getReader();
347
+ if (!reader) {
348
+ await stream.writeSSE({ data: '[DONE]' });
349
+ if (auditCtx) await auditCtx.complete(200);
350
+ return;
351
+ }
352
+
353
+ const decoder = new TextDecoder();
354
+ let buffer = '';
355
+
356
+ try {
357
+ while (true) {
358
+ const { done, value } = await reader.read();
359
+ if (done) break;
360
+
361
+ buffer += decoder.decode(value, { stream: true });
362
+ const lines = buffer.split('\n');
363
+ buffer = lines.pop() || '';
364
+
365
+ for (const line of lines) {
366
+ if (!line.trim()) continue;
367
+
368
+ if (line.startsWith('data: ')) {
369
+ const data = line.slice(6);
370
+ if (data === '[DONE]') {
371
+ await stream.writeSSE({ data: '[DONE]' });
372
+ continue;
373
+ }
374
+
375
+ try {
376
+ const parsed = JSON.parse(data);
377
+ const converted = convertStreamEvent(parsed, adapter, model);
378
+ if (converted) {
379
+ // Record first token for TTFT
380
+ if (!firstTokenRecorded && auditCtx) {
381
+ auditCtx.recordFirstToken();
382
+ firstTokenRecorded = true;
383
+ }
384
+ // Track usage from stream events
385
+ if (converted.usage) {
386
+ totalOutputTokens = converted.usage.completion_tokens || totalOutputTokens;
387
+ }
388
+ await stream.writeSSE({ data: JSON.stringify(converted) });
389
+ }
390
+ } catch {
391
+ // Skip invalid JSON
392
+ }
393
+ } else if (line.startsWith('event: ')) {
394
+ }
395
+ }
396
+ }
397
+
398
+ // Send done signal
399
+ await stream.writeSSE({ data: '[DONE]' });
400
+
401
+ // Complete audit
402
+ if (auditCtx) {
403
+ auditCtx.setTokenUsage(0, totalOutputTokens); // Input tokens not available in streaming
404
+ await auditCtx.complete(200);
405
+ }
406
+ } catch (err) {
407
+ // Record streaming error
408
+ if (auditCtx) {
409
+ await auditCtx.error(err instanceof Error ? err.message : 'Streaming error', 'streaming_error', 500);
410
+ }
411
+ throw err;
412
+ } finally {
413
+ reader.releaseLock();
414
+ }
415
+ });
416
+ }
417
+
418
+ /**
419
+ * Convert stream event to OpenAI format
420
+ */
421
+ function convertStreamEvent(event: any, adapter: string, model: string): any {
422
+ switch (adapter) {
423
+ case 'anthropic':
424
+ return convertAnthropicStreamEvent(event, model);
425
+ case 'gemini':
426
+ return convertGeminiStreamEvent(event, model);
427
+ default:
428
+ return event;
429
+ }
430
+ }
431
+
432
+ /**
433
+ * Convert Anthropic stream event to OpenAI format
434
+ */
435
+ function convertAnthropicStreamEvent(event: any, model: string): any {
436
+ if (event.type === 'content_block_delta') {
437
+ if (event.delta?.type === 'text_delta') {
438
+ return {
439
+ id: `chatcmpl-${Date.now()}`,
440
+ object: 'chat.completion.chunk',
441
+ created: Math.floor(Date.now() / 1000),
442
+ model,
443
+ choices: [
444
+ {
445
+ index: 0,
446
+ delta: { content: event.delta.text },
447
+ finish_reason: null,
448
+ },
449
+ ],
450
+ };
451
+ }
452
+ } else if (event.type === 'message_delta') {
453
+ const finishReason =
454
+ event.delta?.stop_reason === 'end_turn'
455
+ ? 'stop'
456
+ : event.delta?.stop_reason === 'tool_use'
457
+ ? 'tool_calls'
458
+ : null;
459
+ return {
460
+ id: `chatcmpl-${Date.now()}`,
461
+ object: 'chat.completion.chunk',
462
+ created: Math.floor(Date.now() / 1000),
463
+ model,
464
+ choices: [
465
+ {
466
+ index: 0,
467
+ delta: {},
468
+ finish_reason: finishReason,
469
+ },
470
+ ],
471
+ usage: event.usage
472
+ ? {
473
+ prompt_tokens: 0,
474
+ completion_tokens: event.usage.output_tokens,
475
+ total_tokens: event.usage.output_tokens,
476
+ }
477
+ : undefined,
478
+ };
479
+ }
480
+ return null;
481
+ }
482
+
483
+ /**
484
+ * Convert Gemini stream event to OpenAI format
485
+ */
486
+ function convertGeminiStreamEvent(event: any, model: string): any {
487
+ if (event.candidates?.[0]?.content?.parts) {
488
+ const parts = event.candidates[0].content.parts;
489
+ const textParts = parts.filter((p: any) => p.text);
490
+
491
+ if (textParts.length > 0) {
492
+ return {
493
+ id: `chatcmpl-${Date.now()}`,
494
+ object: 'chat.completion.chunk',
495
+ created: Math.floor(Date.now() / 1000),
496
+ model,
497
+ choices: [
498
+ {
499
+ index: 0,
500
+ delta: { content: textParts.map((p: any) => p.text).join('') },
501
+ finish_reason: event.candidates[0].finishReason === 'STOP' ? 'stop' : null,
502
+ },
503
+ ],
504
+ };
505
+ }
506
+ }
507
+ return null;
508
+ }
509
+
510
+ // =========================================================================
511
+ // Anthropic Messages API
512
+ // =========================================================================
513
+
514
+ app.post('/v1/messages', async (c) => {
515
+ try {
516
+ const body = await c.req.json();
517
+ const parseResult = CreateMessageRequestSchema.safeParse(body);
518
+
519
+ if (!parseResult.success) {
520
+ return c.json(
521
+ {
522
+ type: 'error',
523
+ error: {
524
+ type: 'invalid_request_error',
525
+ message: 'Invalid request',
526
+ },
527
+ },
528
+ 400,
529
+ );
530
+ }
531
+
532
+ const request = parseResult.data;
533
+ const modelConfig = resolveModelConfig(request.model);
534
+
535
+ if (!modelConfig) {
536
+ return c.json(
537
+ {
538
+ type: 'error',
539
+ error: {
540
+ type: 'invalid_request_error',
541
+ message: `Model ${request.model} not configured`,
542
+ },
543
+ },
544
+ 404,
545
+ );
546
+ }
547
+
548
+ // For Anthropic endpoint, we pass through to Anthropic or convert from OpenAI
549
+ const adapter = modelConfig.adapter || 'anthropic';
550
+ const baseUrl = normalizeBaseUrl(
551
+ modelConfig.adapters?.[adapter]?.baseUrl || modelConfig.baseUrl || 'https://api.anthropic.com',
552
+ );
553
+
554
+ const upstreamUrl = `${baseUrl}/v1/messages`;
555
+ const upstreamHeaders = buildUpstreamHeaders(modelConfig, 'anthropic');
556
+
557
+ // Log incoming request
558
+ log.info(`→ POST /v1/messages model=${request.model} messages=${request.messages.length}`);
559
+
560
+ const response = await makeUpstreamRequest(upstreamUrl, request, upstreamHeaders);
561
+ const responseData = await response.json();
562
+
563
+ // Log response
564
+ const usage = responseData.usage;
565
+ log.info(
566
+ `← 200 /v1/messages model=${responseData.model || request.model} tokens=${(usage?.input_tokens || 0) + (usage?.output_tokens || 0)} (in=${usage?.input_tokens || 0} out=${usage?.output_tokens || 0})`,
567
+ );
568
+
569
+ return c.json(responseData);
570
+ } catch (error) {
571
+ log.error('Messages error:', error);
572
+ return c.json(
573
+ {
574
+ type: 'error',
575
+ error: {
576
+ type: 'api_error',
577
+ message: error instanceof Error ? error.message : 'Internal server error',
578
+ },
579
+ },
580
+ 500,
581
+ );
582
+ }
583
+ });
584
+
585
+ // =========================================================================
586
+ // Gemini Generate Content API
587
+ // =========================================================================
588
+
589
+ app.post('/v1/models/:model\\:generateContent', async (c) => {
590
+ try {
591
+ const model = c.req.param('model');
592
+ if (!model) {
593
+ return c.json({ error: { message: 'Model parameter is required' } }, 400);
594
+ }
595
+
596
+ const body = await c.req.json();
597
+ const parseResult = CreateGenerateContentRequestSchema.safeParse(body);
598
+
599
+ if (!parseResult.success) {
600
+ return c.json({ error: { message: 'Invalid request' } }, 400);
601
+ }
602
+
603
+ const request = parseResult.data;
604
+ const modelConfig = resolveModelConfig(model);
605
+
606
+ if (!modelConfig) {
607
+ return c.json({ error: { message: `Model ${model} not configured` } }, 404);
608
+ }
609
+
610
+ const baseUrl = normalizeBaseUrl(
611
+ modelConfig.adapters?.gemini?.baseUrl || modelConfig.baseUrl || 'https://generativelanguage.googleapis.com',
612
+ );
613
+
614
+ const upstreamUrl = `${baseUrl}/v1/models/${model}:generateContent`;
615
+ const upstreamHeaders = buildUpstreamHeaders(modelConfig, 'gemini');
616
+
617
+ // Log incoming request
618
+ log.info(`→ POST /v1/models/${model}:generateContent contents=${request.contents?.length || 0}`);
619
+
620
+ const response = await makeUpstreamRequest(upstreamUrl, request, upstreamHeaders);
621
+ const responseData = await response.json();
622
+
623
+ // Log response
624
+ const usage = responseData.usageMetadata;
625
+ log.info(
626
+ `← 200 /v1/models/${model}:generateContent tokens=${usage?.totalTokenCount || 0} (in=${usage?.promptTokenCount || 0} out=${usage?.candidatesTokenCount || 0})`,
627
+ );
628
+
629
+ return c.json(responseData);
630
+ } catch (error) {
631
+ log.error('Generate content error:', error);
632
+ return c.json({ error: { message: error instanceof Error ? error.message : 'Internal server error' } }, 500);
633
+ }
634
+ });
635
+
636
+ // =========================================================================
637
+ // Gemini Streaming Generate Content API
638
+ // =========================================================================
639
+
640
+ app.post('/v1/models/:model\\:streamGenerateContent', async (c) => {
641
+ try {
642
+ const model = c.req.param('model');
643
+ if (!model) {
644
+ return c.json({ error: { message: 'Model parameter is required' } }, 400);
645
+ }
646
+
647
+ const body = await c.req.json();
648
+ const parseResult = CreateGenerateContentRequestSchema.safeParse(body);
649
+
650
+ if (!parseResult.success) {
651
+ return c.json({ error: { message: 'Invalid request' } }, 400);
652
+ }
653
+
654
+ const request = parseResult.data;
655
+
656
+ // Log incoming request
657
+ log.info(`→ POST /v1/models/${model}:streamGenerateContent contents=${request.contents?.length || 0}`);
658
+
659
+ const modelConfig = resolveModelConfig(model);
660
+
661
+ if (!modelConfig) {
662
+ return c.json({ error: { message: `Model ${model} not configured` } }, 404);
663
+ }
664
+
665
+ const baseUrl = normalizeBaseUrl(
666
+ modelConfig.adapters?.gemini?.baseUrl || modelConfig.baseUrl || 'https://generativelanguage.googleapis.com',
667
+ );
668
+
669
+ const upstreamUrl = `${baseUrl}/v1/models/${model}:streamGenerateContent`;
670
+ const upstreamHeaders = buildUpstreamHeaders(modelConfig, 'gemini');
671
+
672
+ const response = await makeUpstreamRequest(upstreamUrl, request, upstreamHeaders, true);
673
+
674
+ // Stream the response directly
675
+ return streamSSE(c, async (stream) => {
676
+ const reader = response.body?.getReader();
677
+ if (!reader) {
678
+ await stream.writeSSE({ data: '[DONE]' });
679
+ return;
680
+ }
681
+
682
+ const decoder = new TextDecoder();
683
+ let buffer = '';
684
+
685
+ try {
686
+ while (true) {
687
+ const { done, value } = await reader.read();
688
+ if (done) break;
689
+
690
+ buffer += decoder.decode(value, { stream: true });
691
+ const lines = buffer.split('\n');
692
+ buffer = lines.pop() || '';
693
+
694
+ for (const line of lines) {
695
+ if (!line.trim()) continue;
696
+
697
+ if (line.startsWith('data: ')) {
698
+ const data = line.slice(6);
699
+ if (data === '[DONE]') {
700
+ await stream.writeSSE({ data: '[DONE]' });
701
+ continue;
702
+ }
703
+ await stream.writeSSE({ data });
704
+ }
705
+ }
706
+ }
707
+ await stream.writeSSE({ data: '[DONE]' });
708
+ } finally {
709
+ reader.releaseLock();
710
+ }
711
+ });
712
+ } catch (error) {
713
+ log.error('Stream generate content error:', error);
714
+ return c.json({ error: { message: error instanceof Error ? error.message : 'Internal server error' } }, 500);
715
+ }
716
+ });
717
+
718
+ // =========================================================================
719
+ // OpenAI Responses API
720
+ // =========================================================================
721
+
722
+ app.post('/v1/responses', async (c) => {
723
+ let auditCtx: ReturnType<typeof createAuditContext> | null = null;
724
+
725
+ try {
726
+ const body = await c.req.json();
727
+ const parseResult = CreateResponseRequestSchema.safeParse(body);
728
+
729
+ if (!parseResult.success) {
730
+ return c.json(
731
+ {
732
+ error: {
733
+ message: 'Invalid request',
734
+ type: 'invalid_request_error',
735
+ param: null,
736
+ code: 'invalid_request',
737
+ details: parseResult.error.issues,
738
+ },
739
+ },
740
+ 400,
741
+ );
742
+ }
743
+
744
+ const request = parseResult.data;
745
+
746
+ // Handle previous_response_id - load previous context
747
+ let previousContext: { input: unknown; output: unknown[] } | null = null;
748
+ if (request.previous_response_id) {
749
+ try {
750
+ const { isDbInitialized, getEntityManager } = await import('../server/db');
751
+ const { ResponseEntity } = await import('../entities');
752
+ if (isDbInitialized()) {
753
+ const em = getEntityManager().fork();
754
+ const prevResponse = await em.findOne(ResponseEntity, { responseId: request.previous_response_id });
755
+ if (prevResponse) {
756
+ previousContext = {
757
+ input: prevResponse.input,
758
+ output: prevResponse.output,
759
+ };
760
+ log.info(`Loaded previous response: ${request.previous_response_id}`);
761
+ } else {
762
+ log.warn(`Previous response not found: ${request.previous_response_id}`);
763
+ }
764
+ }
765
+ } catch (e) {
766
+ log.warn('Failed to load previous response:', e);
767
+ }
768
+ }
769
+
770
+ const modelConfig = resolveModelConfig(request.model);
771
+
772
+ if (!modelConfig) {
773
+ return c.json(
774
+ {
775
+ error: {
776
+ message: `Model ${request.model} not configured`,
777
+ type: 'invalid_request_error',
778
+ param: 'model',
779
+ code: 'model_not_found',
780
+ },
781
+ },
782
+ 404,
783
+ );
784
+ }
785
+
786
+ // Determine adapter - for Responses API, we convert to Chat Completions
787
+ const adapter = modelConfig.adapter || 'openai';
788
+ const baseUrl = normalizeBaseUrl(
789
+ modelConfig.adapters?.[adapter]?.baseUrl || modelConfig.baseUrl || 'https://api.openai.com',
790
+ );
791
+
792
+ // Create audit context
793
+ auditCtx = createAuditContext({
794
+ method: 'POST',
795
+ endpoint: '/v1/responses',
796
+ model: request.model,
797
+ inputProtocol: ChatProtocol.OPENAI,
798
+ outputProtocol: ChatProtocol.OPENAI,
799
+ streaming: request.stream || false,
800
+ clientIp: extractClientIp(c),
801
+ userAgent: c.req.header('user-agent'),
802
+ requestMeta: {
803
+ temperature: request.temperature,
804
+ max_output_tokens: request.max_output_tokens,
805
+ },
806
+ });
807
+
808
+ // Log incoming request
809
+ const inputType =
810
+ typeof request.input === 'string'
811
+ ? 'string'
812
+ : Array.isArray(request.input)
813
+ ? `array[${request.input.length}]`
814
+ : 'object';
815
+ log.info(`→ POST /v1/responses model=${request.model} stream=${request.stream || false} input=${inputType}`);
816
+
817
+ // Convert Responses API request to Chat Completions format
818
+ const chatRequest = responsesToChatCompletions(request, previousContext);
819
+
820
+ // Build upstream request
821
+ const upstreamUrl = `${baseUrl}/v1/chat/completions`;
822
+ const upstreamHeaders = buildUpstreamHeaders(modelConfig, 'openai');
823
+
824
+ auditCtx.setProvider({
825
+ provider: adapter,
826
+ upstreamUrl,
827
+ });
828
+
829
+ // Handle streaming
830
+ if (request.stream) {
831
+ return handleResponsesStreamingRequest(c, upstreamUrl, chatRequest, upstreamHeaders, request.model, auditCtx);
832
+ }
833
+
834
+ // Non-streaming request
835
+ const response = await makeUpstreamRequest(upstreamUrl, chatRequest, upstreamHeaders);
836
+ const chatResponse = await response.json();
837
+
838
+ // Convert Chat Completions response to Responses format
839
+ const result = chatCompletionsToResponses(chatResponse, request.model);
840
+
841
+ // Store response for future previous_response_id lookups
842
+ try {
843
+ const { isDbInitialized, getEntityManager } = await import('../server/db');
844
+ const { ResponseEntity } = await import('../entities');
845
+ if (isDbInitialized()) {
846
+ const em = getEntityManager().fork();
847
+ const responseEntity = new ResponseEntity();
848
+ responseEntity.responseId = result.id;
849
+ responseEntity.model = result.model;
850
+ responseEntity.status = result.status;
851
+ responseEntity.input = request.input;
852
+ responseEntity.output = result.output;
853
+ responseEntity.usage = result.usage;
854
+ responseEntity.instructions = request.instructions ?? undefined;
855
+ responseEntity.previousResponseId = request.previous_response_id ?? undefined;
856
+ responseEntity.tools = request.tools ?? undefined;
857
+ responseEntity.toolChoice = request.tool_choice ?? undefined;
858
+ responseEntity.metadata = request.metadata as Record<string, unknown>;
859
+ responseEntity.durationMs = auditCtx?.getDuration();
860
+ em.persist(responseEntity);
861
+ await em.flush();
862
+ log.debug(`Stored response: ${result.id}`);
863
+ }
864
+ } catch (e) {
865
+ log.warn('Failed to store response:', e);
866
+ }
867
+
868
+ // Record usage
869
+ if (chatResponse.usage) {
870
+ auditCtx.setTokenUsage(chatResponse.usage.prompt_tokens || 0, chatResponse.usage.completion_tokens || 0);
871
+ }
872
+ auditCtx.setResponseMeta({
873
+ status: result.status,
874
+ output_items: result.output.length,
875
+ });
876
+ await auditCtx.complete(200);
877
+
878
+ // Log response
879
+ const usage = chatResponse.usage;
880
+ log.info(
881
+ `← 200 /v1/responses model=${result.model || request.model} status=${result.status} tokens=${usage?.total_tokens || 0}`,
882
+ );
883
+
884
+ return c.json(result);
885
+ } catch (error) {
886
+ if (auditCtx) {
887
+ await auditCtx.error(error instanceof Error ? error.message : 'Unknown error', 'internal_error', 500);
888
+ }
889
+ log.error('Responses API error:', error);
890
+ return c.json(
891
+ {
892
+ error: {
893
+ message: error instanceof Error ? error.message : 'Internal server error',
894
+ type: 'api_error',
895
+ code: 'internal_error',
896
+ },
897
+ },
898
+ 500,
899
+ );
900
+ }
901
+ });
902
+
903
+ /**
904
+ * Convert Responses API request to Chat Completions format
905
+ */
906
+ function responsesToChatCompletions(
907
+ request: CreateResponseRequest,
908
+ previousContext?: { input: unknown; output: unknown[] } | null,
909
+ ): CreateChatCompletionRequest {
910
+ const messages: CreateChatCompletionRequest['messages'] = [];
911
+
912
+ // Add system instruction if present
913
+ if (request.instructions) {
914
+ messages.push({
915
+ role: 'system',
916
+ content: request.instructions,
917
+ });
918
+ }
919
+
920
+ // Add previous context if available (previous_response_id)
921
+ if (previousContext) {
922
+ // Add previous input
923
+ const prevInput = previousContext.input;
924
+ if (typeof prevInput === 'string') {
925
+ messages.push({ role: 'user', content: prevInput });
926
+ } else if (Array.isArray(prevInput)) {
927
+ for (const item of prevInput as any[]) {
928
+ if (item.type === 'message') {
929
+ messages.push({
930
+ role: item.role as 'user' | 'assistant' | 'system',
931
+ content: typeof item.content === 'string' ? item.content : JSON.stringify(item.content),
932
+ } as any);
933
+ }
934
+ }
935
+ }
936
+
937
+ // Add previous output as assistant messages
938
+ for (const item of previousContext.output as any[]) {
939
+ if (item.type === 'message' && item.role === 'assistant') {
940
+ const textContent = item.content?.find((c: any) => c.type === 'text' || c.type === 'output_text');
941
+ if (textContent) {
942
+ messages.push({
943
+ role: 'assistant',
944
+ content: textContent.text,
945
+ });
946
+ }
947
+ }
948
+ }
949
+ }
950
+
951
+ // Convert current input to messages
952
+ if (typeof request.input === 'string') {
953
+ messages.push({
954
+ role: 'user',
955
+ content: request.input,
956
+ });
957
+ } else if (Array.isArray(request.input)) {
958
+ for (const item of request.input) {
959
+ if (item.type === 'message') {
960
+ messages.push({
961
+ role: item.role as 'user' | 'assistant' | 'system',
962
+ content: typeof item.content === 'string' ? item.content : JSON.stringify(item.content),
963
+ } as any);
964
+ }
965
+ // item_reference is handled differently - would need to fetch the referenced item
966
+ }
967
+ }
968
+
969
+ return {
970
+ model: request.model,
971
+ messages,
972
+ temperature: request.temperature,
973
+ top_p: request.top_p,
974
+ max_tokens: request.max_output_tokens,
975
+ stream: request.stream,
976
+ tools: request.tools,
977
+ tool_choice: request.tool_choice,
978
+ parallel_tool_calls: request.parallel_tool_calls,
979
+ metadata: request.metadata,
980
+ store: request.store,
981
+ user: request.user,
982
+ } as CreateChatCompletionRequest;
983
+ }
984
+
985
+ /**
986
+ * Convert Chat Completions response to Responses format
987
+ */
988
+ function chatCompletionsToResponses(chatResponse: any, model: string): any {
989
+ const responseId = `resp_${chatResponse.id || Date.now()}`;
990
+ const output: any[] = [];
991
+
992
+ for (const choice of chatResponse.choices || []) {
993
+ const message = choice.message;
994
+ if (message) {
995
+ output.push({
996
+ id: `item_${responseId}_${choice.index}`,
997
+ type: 'message',
998
+ role: 'assistant',
999
+ content: message.content ? [{ type: 'text', text: message.content }] : [],
1000
+ status: 'completed',
1001
+ });
1002
+
1003
+ // Handle tool calls
1004
+ if (message.tool_calls) {
1005
+ for (const toolCall of message.tool_calls) {
1006
+ output.push({
1007
+ id: toolCall.id,
1008
+ type: 'function_call',
1009
+ name: toolCall.function?.name,
1010
+ arguments: toolCall.function?.arguments,
1011
+ status: 'completed',
1012
+ });
1013
+ }
1014
+ }
1015
+ }
1016
+ }
1017
+
1018
+ return {
1019
+ id: responseId,
1020
+ object: 'response',
1021
+ created_at: chatResponse.created || Math.floor(Date.now() / 1000),
1022
+ model: chatResponse.model || model,
1023
+ status: 'completed',
1024
+ output,
1025
+ usage: chatResponse.usage,
1026
+ metadata: {},
1027
+ error: null,
1028
+ };
1029
+ }
1030
+
1031
+ /**
1032
+ * Handle Responses API streaming
1033
+ */
1034
+ async function handleResponsesStreamingRequest(
1035
+ c: any,
1036
+ upstreamUrl: string,
1037
+ upstreamBody: unknown,
1038
+ upstreamHeaders: Record<string, string>,
1039
+ model: string,
1040
+ auditCtx?: ReturnType<typeof createAuditContext>,
1041
+ ) {
1042
+ const response = await makeUpstreamRequest(
1043
+ upstreamUrl,
1044
+ { ...(upstreamBody as any), stream: true },
1045
+ upstreamHeaders,
1046
+ true,
1047
+ );
1048
+
1049
+ let firstTokenRecorded = false;
1050
+ const responseId = `resp_${Date.now()}`;
1051
+
1052
+ return streamSSE(c, async (stream) => {
1053
+ const reader = response.body?.getReader();
1054
+ if (!reader) {
1055
+ await stream.writeSSE({ event: 'response.done', data: JSON.stringify({ type: 'response.done' }) });
1056
+ if (auditCtx) await auditCtx.complete(200);
1057
+ return;
1058
+ }
1059
+
1060
+ // Send initial event
1061
+ await stream.writeSSE({
1062
+ event: 'response.created',
1063
+ data: JSON.stringify({
1064
+ type: 'response.created',
1065
+ response: {
1066
+ id: responseId,
1067
+ object: 'response',
1068
+ created_at: Math.floor(Date.now() / 1000),
1069
+ model,
1070
+ status: 'in_progress',
1071
+ output: [],
1072
+ },
1073
+ }),
1074
+ });
1075
+
1076
+ const decoder = new TextDecoder();
1077
+ let buffer = '';
1078
+ let outputItemId = `item_${responseId}_0`;
1079
+
1080
+ try {
1081
+ while (true) {
1082
+ const { done, value } = await reader.read();
1083
+ if (done) break;
1084
+
1085
+ buffer += decoder.decode(value, { stream: true });
1086
+ const lines = buffer.split('\n');
1087
+ buffer = lines.pop() || '';
1088
+
1089
+ for (const line of lines) {
1090
+ if (!line.trim() || !line.startsWith('data: ')) continue;
1091
+
1092
+ const data = line.slice(6);
1093
+ if (data === '[DONE]') continue;
1094
+
1095
+ try {
1096
+ const parsed = JSON.parse(data);
1097
+ const delta = parsed.choices?.[0]?.delta;
1098
+
1099
+ if (delta?.content) {
1100
+ if (!firstTokenRecorded && auditCtx) {
1101
+ auditCtx.recordFirstToken();
1102
+ firstTokenRecorded = true;
1103
+ }
1104
+
1105
+ await stream.writeSSE({
1106
+ event: 'response.output_text.delta',
1107
+ data: JSON.stringify({
1108
+ type: 'response.output_text.delta',
1109
+ output_index: 0,
1110
+ content_index: 0,
1111
+ delta: delta.content,
1112
+ }),
1113
+ });
1114
+ }
1115
+ } catch {
1116
+ // Skip invalid JSON
1117
+ }
1118
+ }
1119
+ }
1120
+
1121
+ // Send completion events
1122
+ await stream.writeSSE({
1123
+ event: 'response.output_item.done',
1124
+ data: JSON.stringify({
1125
+ type: 'response.output_item.done',
1126
+ output_index: 0,
1127
+ item: {
1128
+ id: outputItemId,
1129
+ type: 'message',
1130
+ role: 'assistant',
1131
+ status: 'completed',
1132
+ },
1133
+ }),
1134
+ });
1135
+
1136
+ await stream.writeSSE({
1137
+ event: 'response.done',
1138
+ data: JSON.stringify({
1139
+ type: 'response.done',
1140
+ response: {
1141
+ id: responseId,
1142
+ status: 'completed',
1143
+ },
1144
+ }),
1145
+ });
1146
+
1147
+ if (auditCtx) await auditCtx.complete(200);
1148
+ } catch (err) {
1149
+ if (auditCtx) {
1150
+ await auditCtx.error(err instanceof Error ? err.message : 'Streaming error', 'streaming_error', 500);
1151
+ }
1152
+ throw err;
1153
+ } finally {
1154
+ reader.releaseLock();
1155
+ }
1156
+ });
1157
+ }
1158
+
1159
+ // =========================================================================
1160
+ // Models endpoint
1161
+ // =========================================================================
1162
+
1163
+ app.get('/v1/models', async (c) => {
1164
+ const fetchUpstream = c.req.query('fetch') === 'true';
1165
+ const allModels: Array<{
1166
+ id: string;
1167
+ object: string;
1168
+ created: number;
1169
+ owned_by: string;
1170
+ context_window?: number;
1171
+ max_input_tokens?: number;
1172
+ max_output_tokens?: number;
1173
+ }> = [];
1174
+
1175
+ // Add configured models
1176
+ for (const m of config.models || []) {
1177
+ allModels.push({
1178
+ id: m.name,
1179
+ object: 'model',
1180
+ created: Math.floor(Date.now() / 1000),
1181
+ owned_by: 'mcps',
1182
+ context_window: m.contextWindow,
1183
+ max_input_tokens: m.maxInputTokens,
1184
+ max_output_tokens: m.maxOutputTokens,
1185
+ });
1186
+
1187
+ // Fetch upstream models if enabled
1188
+ if (fetchUpstream && m.fetchUpstreamModels && m.baseUrl) {
1189
+ try {
1190
+ const headers: Record<string, string> = {
1191
+ 'Content-Type': 'application/json',
1192
+ };
1193
+ if (m.apiKey) {
1194
+ headers.Authorization = `Bearer ${m.apiKey}`;
1195
+ }
1196
+ Object.assign(headers, m.headers || {});
1197
+
1198
+ const normalizedUrl = normalizeBaseUrl(m.baseUrl);
1199
+ const upstreamUrl = `${normalizedUrl}/v1/models`;
1200
+
1201
+ const res = await fetch(upstreamUrl, { headers });
1202
+ if (res.ok) {
1203
+ const data = await res.json();
1204
+ if (data.data && Array.isArray(data.data)) {
1205
+ for (const model of data.data) {
1206
+ // Avoid duplicates
1207
+ if (!allModels.some((existing) => existing.id === model.id)) {
1208
+ allModels.push({
1209
+ id: model.id,
1210
+ object: model.object || 'model',
1211
+ created: model.created || Math.floor(Date.now() / 1000),
1212
+ owned_by: model.owned_by || m.name.split('/')[0] || 'upstream',
1213
+ });
1214
+ }
1215
+ }
1216
+ }
1217
+ }
1218
+ } catch (e) {
1219
+ log.warn(`Failed to fetch upstream models from ${m.baseUrl}: ${e}`);
1220
+ }
1221
+ }
1222
+ }
1223
+
1224
+ log.debug(`→ GET /v1/models count=${allModels.length} fetch=${fetchUpstream}`);
1225
+
1226
+ return c.json({
1227
+ object: 'list',
1228
+ data: allModels,
1229
+ });
1230
+ });
1231
+
1232
+ return app;
1233
+ }