waypoi 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/instructions/ui.instructions.md +42 -0
- package/.github/workflows/ci.yml +35 -0
- package/.github/workflows/publish.yml +71 -0
- package/.github/workflows/release.yml +48 -0
- package/.playwright-mcp/console-2026-04-04T01-41-10-746Z.log +2 -0
- package/.playwright-mcp/console-2026-04-04T01-41-28-799Z.log +3 -0
- package/.playwright-mcp/console-2026-04-05T02-26-51-909Z.log +76 -0
- package/.playwright-mcp/page-2026-04-04T01-41-10-816Z.yml +1 -0
- package/.playwright-mcp/page-2026-04-04T01-41-29-141Z.yml +77 -0
- package/.playwright-mcp/page-2026-04-04T01-41-42-633Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-04T01-42-03-929Z.yml +262 -0
- package/.playwright-mcp/page-2026-04-04T02-12-54-813Z.yml +6 -0
- package/.playwright-mcp/page-2026-04-04T02-14-58-600Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-04T02-15-03-923Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-04T02-15-07-426Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-04T02-15-25-729Z.yml +262 -0
- package/.playwright-mcp/page-2026-04-04T02-16-22-984Z.yml +262 -0
- package/.playwright-mcp/page-2026-04-04T02-17-00-599Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-04T02-17-50-874Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-05T02-26-55-570Z.yml +6 -0
- package/AGENTS.md +48 -0
- package/CHANGELOG.md +131 -0
- package/README.md +552 -0
- package/assets/agent-mode.png +0 -0
- package/assets/categorize.png +0 -0
- package/assets/dashboard.png +0 -0
- package/assets/endpoint-proxy.png +0 -0
- package/assets/icon.png +0 -0
- package/assets/mcp-generate-image.png +0 -0
- package/assets/mcp-understand-image.png +0 -0
- package/assets/peek-token-flow.png +0 -0
- package/assets/playground.png +0 -0
- package/assets/sankey.png +0 -0
- package/cli/index.ts +2805 -0
- package/cli/legacyRewrite.ts +108 -0
- package/cli/modelRef.ts +24 -0
- package/dist/cli/index.js +2536 -0
- package/dist/cli/legacyRewrite.js +92 -0
- package/dist/cli/modelRef.js +20 -0
- package/dist/src/benchmark/artifacts.js +131 -0
- package/dist/src/benchmark/capabilityClassifier.js +81 -0
- package/dist/src/benchmark/capabilityStore.js +144 -0
- package/dist/src/benchmark/config.js +238 -0
- package/dist/src/benchmark/gates.js +118 -0
- package/dist/src/benchmark/jobs.js +252 -0
- package/dist/src/benchmark/runner.js +1847 -0
- package/dist/src/benchmark/schema.js +353 -0
- package/dist/src/benchmark/suites.js +314 -0
- package/dist/src/benchmark/tinyQaDataset.js +422 -0
- package/dist/src/benchmark/types.js +25 -0
- package/dist/src/config.js +47 -0
- package/dist/src/index.js +178 -0
- package/dist/src/mcp/client.js +215 -0
- package/dist/src/mcp/discovery.js +226 -0
- package/dist/src/mcp/policy.js +65 -0
- package/dist/src/mcp/registry.js +129 -0
- package/dist/src/mcp/service.js +460 -0
- package/dist/src/middleware/auth.js +179 -0
- package/dist/src/middleware/requestCapture.js +192 -0
- package/dist/src/middleware/requestStats.js +118 -0
- package/dist/src/pools/builder.js +132 -0
- package/dist/src/pools/repository.js +69 -0
- package/dist/src/pools/scheduler.js +360 -0
- package/dist/src/pools/types.js +2 -0
- package/dist/src/protocols/adapters/dashscope.js +267 -0
- package/dist/src/protocols/adapters/inferenceV2.js +346 -0
- package/dist/src/protocols/adapters/openai.js +27 -0
- package/dist/src/protocols/registry.js +99 -0
- package/dist/src/protocols/types.js +2 -0
- package/dist/src/providers/health.js +153 -0
- package/dist/src/providers/importer.js +289 -0
- package/dist/src/providers/modelRegistry.js +313 -0
- package/dist/src/providers/repository.js +361 -0
- package/dist/src/providers/types.js +2 -0
- package/dist/src/routes/admin.js +531 -0
- package/dist/src/routes/audio.js +295 -0
- package/dist/src/routes/chat.js +240 -0
- package/dist/src/routes/embeddings.js +157 -0
- package/dist/src/routes/images.js +288 -0
- package/dist/src/routes/mcp.js +256 -0
- package/dist/src/routes/mcpService.js +100 -0
- package/dist/src/routes/models.js +48 -0
- package/dist/src/routes/responses.js +711 -0
- package/dist/src/routes/sessions.js +450 -0
- package/dist/src/routes/stats.js +270 -0
- package/dist/src/routes/ui.js +97 -0
- package/dist/src/routes/videos.js +107 -0
- package/dist/src/routing/router.js +338 -0
- package/dist/src/services/imageGeneration.js +280 -0
- package/dist/src/services/imageUnderstanding.js +352 -0
- package/dist/src/services/videoGeneration.js +79 -0
- package/dist/src/storage/captureRepository.js +1591 -0
- package/dist/src/storage/files.js +157 -0
- package/dist/src/storage/imageCache.js +346 -0
- package/dist/src/storage/repositories.js +388 -0
- package/dist/src/storage/sessionRepository.js +370 -0
- package/dist/src/storage/statsRepository.js +204 -0
- package/dist/src/transport/httpClient.js +126 -0
- package/dist/src/types.js +2 -0
- package/dist/src/utils/messageMedia.js +285 -0
- package/dist/src/utils/modelCapabilities.js +108 -0
- package/dist/src/utils/modelDiscovery.js +170 -0
- package/dist/src/version.js +5 -0
- package/dist/src/workers/captureRetention.js +25 -0
- package/dist/src/workers/configWatcher.js +91 -0
- package/dist/src/workers/healthChecker.js +21 -0
- package/dist/src/workers/statsRotation.js +41 -0
- package/docs/LLM/output_schema.md +312 -0
- package/docs/benchmark.md +208 -0
- package/docs/mcp-guidelines.md +125 -0
- package/docs/mcp-service.md +178 -0
- package/docs/opencode.md +86 -0
- package/docs/providers.md +79 -0
- package/examples/benchmark.config.yaml +28 -0
- package/examples/providers/alibaba-dashscope.yaml +88 -0
- package/examples/providers/alibaba-llm.yaml +64 -0
- package/examples/providers/alibaba-registry.yaml +7 -0
- package/examples/providers/inference-v2-ray.yaml +29 -0
- package/examples/scenarios/assets/omni-call-sample.wav +0 -0
- package/examples/scenarios/custom.jsonl +5 -0
- package/examples/scenarios/custom.yaml +40 -0
- package/model-form-v2.png +0 -0
- package/package.json +66 -0
- package/provider-form-v2.png +0 -0
- package/provider-form.png +0 -0
- package/scripts/manual-test.sh +11 -0
- package/scripts/version-from-git.js +23 -0
- package/src/benchmark/artifacts.ts +149 -0
- package/src/benchmark/capabilityClassifier.ts +99 -0
- package/src/benchmark/capabilityStore.ts +174 -0
- package/src/benchmark/config.ts +337 -0
- package/src/benchmark/gates.ts +164 -0
- package/src/benchmark/jobs.ts +312 -0
- package/src/benchmark/runner.ts +2519 -0
- package/src/benchmark/schema.ts +443 -0
- package/src/benchmark/suites.ts +323 -0
- package/src/benchmark/tinyQaDataset.ts +428 -0
- package/src/benchmark/types.ts +442 -0
- package/src/config.ts +44 -0
- package/src/index.ts +195 -0
- package/src/mcp/client.ts +305 -0
- package/src/mcp/discovery.ts +266 -0
- package/src/mcp/policy.ts +105 -0
- package/src/mcp/registry.ts +164 -0
- package/src/mcp/service.ts +611 -0
- package/src/middleware/auth.ts +251 -0
- package/src/middleware/requestCapture.ts +245 -0
- package/src/middleware/requestStats.ts +163 -0
- package/src/pools/builder.ts +159 -0
- package/src/pools/repository.ts +71 -0
- package/src/pools/scheduler.ts +425 -0
- package/src/pools/types.ts +117 -0
- package/src/protocols/adapters/dashscope.ts +335 -0
- package/src/protocols/adapters/inferenceV2.ts +428 -0
- package/src/protocols/adapters/openai.ts +32 -0
- package/src/protocols/registry.ts +117 -0
- package/src/protocols/types.ts +81 -0
- package/src/providers/health.ts +207 -0
- package/src/providers/importer.ts +402 -0
- package/src/providers/modelRegistry.ts +415 -0
- package/src/providers/repository.ts +439 -0
- package/src/providers/types.ts +113 -0
- package/src/routes/admin.ts +666 -0
- package/src/routes/audio.ts +372 -0
- package/src/routes/chat.ts +301 -0
- package/src/routes/embeddings.ts +197 -0
- package/src/routes/images.ts +356 -0
- package/src/routes/mcp.ts +320 -0
- package/src/routes/mcpService.ts +114 -0
- package/src/routes/models.ts +50 -0
- package/src/routes/responses.ts +872 -0
- package/src/routes/sessions.ts +558 -0
- package/src/routes/stats.ts +312 -0
- package/src/routes/ui.ts +96 -0
- package/src/routes/videos.ts +132 -0
- package/src/routing/router.ts +501 -0
- package/src/services/imageGeneration.ts +396 -0
- package/src/services/imageUnderstanding.ts +449 -0
- package/src/services/videoGeneration.ts +127 -0
- package/src/storage/captureRepository.ts +1835 -0
- package/src/storage/files.ts +178 -0
- package/src/storage/imageCache.ts +405 -0
- package/src/storage/repositories.ts +494 -0
- package/src/storage/sessionRepository.ts +419 -0
- package/src/storage/statsRepository.ts +238 -0
- package/src/transport/httpClient.ts +145 -0
- package/src/types.ts +322 -0
- package/src/utils/messageMedia.ts +293 -0
- package/src/utils/modelCapabilities.ts +161 -0
- package/src/utils/modelDiscovery.ts +203 -0
- package/src/workers/captureRetention.ts +25 -0
- package/src/workers/configWatcher.ts +115 -0
- package/src/workers/healthChecker.ts +22 -0
- package/src/workers/statsRotation.ts +49 -0
- package/tests/benchmarkAdminRoutes.test.ts +82 -0
- package/tests/benchmarkBasics.test.ts +116 -0
- package/tests/captureAdminRoutes.test.ts +420 -0
- package/tests/captureRepository.test.ts +797 -0
- package/tests/cliLegacyRewrite.test.ts +45 -0
- package/tests/imageGeneration.service.test.ts +107 -0
- package/tests/imageUnderstanding.service.test.ts +123 -0
- package/tests/mcpPolicy.test.ts +105 -0
- package/tests/mcpService.test.ts +1245 -0
- package/tests/modelRef.test.ts +23 -0
- package/tests/modelsRoutes.test.ts +154 -0
- package/tests/sessionMediaCache.test.ts +167 -0
- package/tests/statsRoutes.test.ts +323 -0
- package/tsconfig.json +15 -0
- package/ui/index.html +16 -0
- package/ui/package-lock.json +8521 -0
- package/ui/package.json +52 -0
- package/ui/postcss.config.js +6 -0
- package/ui/public/assets/apple-touch-icon.png +0 -0
- package/ui/public/assets/favicon-16.png +0 -0
- package/ui/public/assets/favicon-32.png +0 -0
- package/ui/public/assets/icon-192.png +0 -0
- package/ui/public/assets/icon-512.png +0 -0
- package/ui/src/App.tsx +27 -0
- package/ui/src/api/client.ts +1503 -0
- package/ui/src/components/EndpointUsageGuide.tsx +361 -0
- package/ui/src/components/Layout.tsx +124 -0
- package/ui/src/components/MessageContent.tsx +365 -0
- package/ui/src/components/ToolCallMessage.tsx +179 -0
- package/ui/src/components/ToolPicker.tsx +442 -0
- package/ui/src/components/messageContentParser.test.ts +41 -0
- package/ui/src/components/messageContentParser.ts +73 -0
- package/ui/src/components/thinkingPreview.test.ts +27 -0
- package/ui/src/components/thinkingPreview.ts +15 -0
- package/ui/src/components/toMermaidSankey.test.ts +78 -0
- package/ui/src/components/toMermaidSankey.ts +56 -0
- package/ui/src/components/ui/button.tsx +58 -0
- package/ui/src/components/ui/input.tsx +21 -0
- package/ui/src/components/ui/textarea.tsx +21 -0
- package/ui/src/lib/utils.ts +6 -0
- package/ui/src/main.tsx +9 -0
- package/ui/src/pages/AgentPlayground.tsx +2010 -0
- package/ui/src/pages/Benchmark.tsx +988 -0
- package/ui/src/pages/Dashboard.tsx +581 -0
- package/ui/src/pages/Peek.tsx +962 -0
- package/ui/src/pages/Settings.tsx +2013 -0
- package/ui/src/pages/agentPlaygroundPayload.test.ts +109 -0
- package/ui/src/pages/agentPlaygroundPayload.ts +97 -0
- package/ui/src/pages/agentThinkingContent.test.ts +50 -0
- package/ui/src/pages/agentThinkingContent.ts +57 -0
- package/ui/src/pages/dashboardTokenUsage.test.ts +66 -0
- package/ui/src/pages/dashboardTokenUsage.ts +36 -0
- package/ui/src/pages/imageUpload.test.ts +39 -0
- package/ui/src/pages/imageUpload.ts +71 -0
- package/ui/src/pages/peekFilters.test.ts +29 -0
- package/ui/src/pages/peekFilters.ts +13 -0
- package/ui/src/pages/peekMedia.test.ts +58 -0
- package/ui/src/pages/peekMedia.ts +148 -0
- package/ui/src/pages/sessionAutoTitle.test.ts +128 -0
- package/ui/src/pages/sessionAutoTitle.ts +106 -0
- package/ui/src/stores/settings.ts +58 -0
- package/ui/src/styles/globals.css +223 -0
- package/ui/src/vite-env.d.ts +8 -0
- package/ui/tailwind.config.js +106 -0
- package/ui/tsconfig.json +32 -0
- package/ui/vite.config.ts +37 -0
|
@@ -0,0 +1,872 @@
|
|
|
1
|
+
import { FastifyInstance, FastifyReply, FastifyRequest } from "fastify";
|
|
2
|
+
import { randomUUID } from "crypto";
|
|
3
|
+
import { pipeline } from "stream";
|
|
4
|
+
import { routeRequest } from "../routing/router";
|
|
5
|
+
import { logRequest } from "../storage/repositories";
|
|
6
|
+
import { RequestLog, ResponsesApiRequest } from "../types";
|
|
7
|
+
import { StoragePaths } from "../storage/files";
|
|
8
|
+
import { selectPoolCandidates } from "../pools/scheduler";
|
|
9
|
+
import { pickBestProviderModelByCapabilities } from "../providers/modelRegistry";
|
|
10
|
+
import { normalizeMessagesForUpstream, scanMessageModalities } from "../utils/messageMedia";
|
|
11
|
+
import { setCaptureDerivedRequest, setCaptureError, setCaptureResponseOverride, setCaptureRouting } from "../middleware/requestCapture";
|
|
12
|
+
import { setStatsPayload } from "../middleware/requestStats";
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Responses API compatibility shim.
|
|
16
|
+
*
|
|
17
|
+
* Some newer SDK flows prefer the "Responses API" pattern. This endpoint
|
|
18
|
+
* translates those requests to /v1/chat/completions internally.
|
|
19
|
+
*
|
|
20
|
+
* Input formats supported:
|
|
21
|
+
* - { input: "string" } → single user message
|
|
22
|
+
* - { input: [{ role, content }] } → message array
|
|
23
|
+
* - { instructions: "..." } → system message prepended
|
|
24
|
+
*/
|
|
25
|
+
export async function registerResponsesRoutes(app: FastifyInstance, paths: StoragePaths): Promise<void> {
|
|
26
|
+
app.post("/v1/responses", async (req: FastifyRequest, reply: FastifyReply) => {
|
|
27
|
+
const body = req.body as ResponsesApiRequest | undefined;
|
|
28
|
+
|
|
29
|
+
if (!body?.model) {
|
|
30
|
+
const fallback = await pickDefaultModel(paths);
|
|
31
|
+
if (!fallback) {
|
|
32
|
+
reply.code(400).send({ error: { message: "model is required" } });
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
if (body) body.model = fallback;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (!body?.input) {
|
|
39
|
+
reply.code(400).send({ error: { message: "input is required" } });
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Transform to chat completions format
|
|
44
|
+
const messages = transformToMessages(body);
|
|
45
|
+
|
|
46
|
+
const transformedTools = body.tools ? transformTools(body.tools) : undefined;
|
|
47
|
+
|
|
48
|
+
// Track if client wants streaming
|
|
49
|
+
const clientWantsStreaming = body.stream ?? false;
|
|
50
|
+
|
|
51
|
+
const normalizedMessages = await normalizeMessagesForUpstream(paths, messages);
|
|
52
|
+
const media = scanMessageModalities(normalizedMessages);
|
|
53
|
+
|
|
54
|
+
const chatPayload = {
|
|
55
|
+
model: body.model,
|
|
56
|
+
messages: normalizedMessages,
|
|
57
|
+
stream: clientWantsStreaming, // Pass through streaming preference
|
|
58
|
+
temperature: body.temperature,
|
|
59
|
+
top_p: body.top_p,
|
|
60
|
+
max_tokens: body.max_tokens,
|
|
61
|
+
presence_penalty: body.presence_penalty,
|
|
62
|
+
frequency_penalty: body.frequency_penalty,
|
|
63
|
+
seed: body.seed,
|
|
64
|
+
stop: body.stop,
|
|
65
|
+
tools: transformedTools,
|
|
66
|
+
tool_choice: body.tool_choice
|
|
67
|
+
};
|
|
68
|
+
setCaptureDerivedRequest(reply, {
|
|
69
|
+
originalRequest: body,
|
|
70
|
+
normalizedRequest: chatPayload,
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
const requestId = randomUUID();
|
|
74
|
+
const start = Date.now();
|
|
75
|
+
const controller = new AbortController();
|
|
76
|
+
|
|
77
|
+
req.raw.on("close", () => controller.abort());
|
|
78
|
+
|
|
79
|
+
try {
|
|
80
|
+
const outcome = await routeRequest(
|
|
81
|
+
paths,
|
|
82
|
+
body.model,
|
|
83
|
+
"/v1/chat/completions",
|
|
84
|
+
chatPayload as Record<string, unknown>,
|
|
85
|
+
req.headers as Record<string, string | string[] | undefined>,
|
|
86
|
+
controller.signal,
|
|
87
|
+
{
|
|
88
|
+
requiredInput: media.hasAudio
|
|
89
|
+
? media.hasImage
|
|
90
|
+
? ["text", "image", "audio"]
|
|
91
|
+
: ["text", "audio"]
|
|
92
|
+
: media.hasImage
|
|
93
|
+
? ["text", "image"]
|
|
94
|
+
: ["text"],
|
|
95
|
+
requiredOutput: ["text"],
|
|
96
|
+
}
|
|
97
|
+
);
|
|
98
|
+
|
|
99
|
+
// Handle streaming response
|
|
100
|
+
if (clientWantsStreaming) {
|
|
101
|
+
await streamResponsesAPI(reply, outcome.attempt.response, requestId, body.model);
|
|
102
|
+
setCaptureResponseOverride(
|
|
103
|
+
reply,
|
|
104
|
+
{
|
|
105
|
+
$type: "stream",
|
|
106
|
+
contentType: "text/event-stream",
|
|
107
|
+
note: "Responses API SSE stream captured as metadata",
|
|
108
|
+
},
|
|
109
|
+
outcome.attempt.response.headers
|
|
110
|
+
);
|
|
111
|
+
setCaptureRouting(reply, {
|
|
112
|
+
publicModel: body.model,
|
|
113
|
+
endpointId: outcome.attempt.endpoint.id,
|
|
114
|
+
endpointName: outcome.attempt.endpoint.name,
|
|
115
|
+
upstreamModel: outcome.attempt.upstreamModel,
|
|
116
|
+
});
|
|
117
|
+
setStatsPayload(reply, {
|
|
118
|
+
endpointId: outcome.attempt.endpoint.id,
|
|
119
|
+
endpointName: outcome.attempt.endpoint.name,
|
|
120
|
+
upstreamModel: outcome.attempt.upstreamModel,
|
|
121
|
+
});
|
|
122
|
+
await logRequest(paths, buildLog(
|
|
123
|
+
requestId,
|
|
124
|
+
body.model,
|
|
125
|
+
outcome,
|
|
126
|
+
Date.now() - start,
|
|
127
|
+
true,
|
|
128
|
+
0 // Token count not available in streaming
|
|
129
|
+
));
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Non-streaming response
|
|
134
|
+
const upstreamBody = await readBody(outcome.attempt.response);
|
|
135
|
+
|
|
136
|
+
// Transform response to Responses API format
|
|
137
|
+
const responsesFormat = transformToResponsesFormat(upstreamBody.payload, requestId);
|
|
138
|
+
|
|
139
|
+
setHeaders(reply, outcome.attempt.response.headers);
|
|
140
|
+
reply.code(outcome.attempt.response.statusCode).send(responsesFormat);
|
|
141
|
+
setCaptureRouting(reply, {
|
|
142
|
+
publicModel: body.model,
|
|
143
|
+
endpointId: outcome.attempt.endpoint.id,
|
|
144
|
+
endpointName: outcome.attempt.endpoint.name,
|
|
145
|
+
upstreamModel: outcome.attempt.upstreamModel,
|
|
146
|
+
});
|
|
147
|
+
setStatsPayload(reply, {
|
|
148
|
+
endpointId: outcome.attempt.endpoint.id,
|
|
149
|
+
endpointName: outcome.attempt.endpoint.name,
|
|
150
|
+
upstreamModel: outcome.attempt.upstreamModel,
|
|
151
|
+
totalTokens: upstreamBody.totalTokens,
|
|
152
|
+
promptTokens: upstreamBody.promptTokens,
|
|
153
|
+
completionTokens: upstreamBody.completionTokens,
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
await logRequest(paths, buildLog(
|
|
157
|
+
requestId,
|
|
158
|
+
body.model,
|
|
159
|
+
outcome,
|
|
160
|
+
Date.now() - start,
|
|
161
|
+
false,
|
|
162
|
+
upstreamBody.totalTokens
|
|
163
|
+
));
|
|
164
|
+
} catch (error) {
|
|
165
|
+
const errorType = (error as { type?: string }).type ?? (error as Error).name;
|
|
166
|
+
setCaptureError(reply, { type: errorType, message: (error as Error).message });
|
|
167
|
+
await logRequest(paths, {
|
|
168
|
+
requestId,
|
|
169
|
+
ts: new Date(),
|
|
170
|
+
route: { publicModel: body?.model ?? "unknown" },
|
|
171
|
+
request: { stream: Boolean(body?.stream) },
|
|
172
|
+
result: { errorType, errorMessage: (error as Error).message }
|
|
173
|
+
});
|
|
174
|
+
// Don't try to send error if headers already sent (streaming started)
|
|
175
|
+
if (reply.raw.headersSent) {
|
|
176
|
+
req.log.warn({ err: error }, "Error after streaming started");
|
|
177
|
+
reply.raw.end();
|
|
178
|
+
return;
|
|
179
|
+
}
|
|
180
|
+
if (errorType === "invalid_request") {
|
|
181
|
+
reply.code(400).send({ error: { message: (error as Error).message } });
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
184
|
+
if (errorType === "tls_verify_failed") {
|
|
185
|
+
reply.code(502).send({ error: { message: (error as Error).message } });
|
|
186
|
+
return;
|
|
187
|
+
}
|
|
188
|
+
const status =
|
|
189
|
+
errorType === "no_endpoints" ||
|
|
190
|
+
errorType === "protocol_stream_unsupported" ||
|
|
191
|
+
errorType === "unsupported_protocol" ||
|
|
192
|
+
errorType === "invalid_protocol_config"
|
|
193
|
+
? 400
|
|
194
|
+
: errorType === "rate_limited"
|
|
195
|
+
? 429
|
|
196
|
+
: 502;
|
|
197
|
+
reply.code(status).send({ error: { message: "Upstream unavailable" } });
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Transform Responses-style input to OpenAI chat completions messages.
|
|
204
|
+
*
|
|
205
|
+
* Some clients send a variety of item types:
|
|
206
|
+
* - { type: "message", role: "user/assistant/developer", content: [...] }
|
|
207
|
+
* - { type: "function_call", name: "...", arguments: "...", call_id: "..." }
|
|
208
|
+
* - { type: "function_call_output", call_id: "...", output: "..." }
|
|
209
|
+
*
|
|
210
|
+
* OpenAI chat completions expects:
|
|
211
|
+
* - { role: "user/assistant/system", content: "..." }
|
|
212
|
+
* - Assistant messages can have tool_calls: [{ id, type: "function", function: { name, arguments } }]
|
|
213
|
+
* - { role: "tool", tool_call_id: "...", content: "..." }
|
|
214
|
+
*/
|
|
215
|
+
function transformToMessages(body: ResponsesApiRequest): Array<Record<string, unknown>> {
|
|
216
|
+
const messages: Array<Record<string, unknown>> = [];
|
|
217
|
+
|
|
218
|
+
// Add system message from instructions if present
|
|
219
|
+
if (body.instructions) {
|
|
220
|
+
messages.push({ role: "system", content: body.instructions });
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Transform input
|
|
224
|
+
if (typeof body.input === "string") {
|
|
225
|
+
messages.push({ role: "user", content: body.input });
|
|
226
|
+
} else if (Array.isArray(body.input)) {
|
|
227
|
+
// Process items, grouping consecutive function_calls into a single assistant message
|
|
228
|
+
let pendingToolCalls: Array<{ id: string; type: string; function: { name: string; arguments: string } }> = [];
|
|
229
|
+
|
|
230
|
+
for (const item of body.input) {
|
|
231
|
+
if (!item || typeof item !== "object") continue;
|
|
232
|
+
|
|
233
|
+
const itemObj = item as Record<string, unknown>;
|
|
234
|
+
const itemType = itemObj.type as string;
|
|
235
|
+
|
|
236
|
+
// Handle function_call items - need to be grouped into an assistant message
|
|
237
|
+
if (itemType === "function_call") {
|
|
238
|
+
pendingToolCalls.push({
|
|
239
|
+
id: (itemObj.call_id as string) || (itemObj.id as string) || "",
|
|
240
|
+
type: "function",
|
|
241
|
+
function: {
|
|
242
|
+
name: itemObj.name as string,
|
|
243
|
+
arguments: itemObj.arguments as string
|
|
244
|
+
}
|
|
245
|
+
});
|
|
246
|
+
continue;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Before processing other items, flush any pending tool calls
|
|
250
|
+
if (pendingToolCalls.length > 0) {
|
|
251
|
+
messages.push({
|
|
252
|
+
role: "assistant",
|
|
253
|
+
content: null,
|
|
254
|
+
tool_calls: pendingToolCalls
|
|
255
|
+
});
|
|
256
|
+
pendingToolCalls = [];
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Handle function_call_output items - become tool role messages
|
|
260
|
+
if (itemType === "function_call_output") {
|
|
261
|
+
messages.push({
|
|
262
|
+
role: "tool",
|
|
263
|
+
tool_call_id: itemObj.call_id as string,
|
|
264
|
+
content: typeof itemObj.output === "string" ? itemObj.output : JSON.stringify(itemObj.output)
|
|
265
|
+
});
|
|
266
|
+
continue;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Handle regular message items
|
|
270
|
+
if (itemType === "message" && "role" in itemObj && "content" in itemObj) {
|
|
271
|
+
const role = itemObj.role as string;
|
|
272
|
+
// Map developer role to system
|
|
273
|
+
const mappedRole = role === "developer" ? "system" : role;
|
|
274
|
+
const content = transformMessageContent(itemObj.content);
|
|
275
|
+
messages.push({ role: mappedRole, content });
|
|
276
|
+
continue;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// Handle items with role/content directly (legacy format)
|
|
280
|
+
if ("role" in itemObj && "content" in itemObj) {
|
|
281
|
+
const role = itemObj.role as string;
|
|
282
|
+
const mappedRole = role === "developer" ? "system" : role;
|
|
283
|
+
const content = transformMessageContent(itemObj.content);
|
|
284
|
+
messages.push({ role: mappedRole, content });
|
|
285
|
+
continue;
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// Flush any remaining pending tool calls
|
|
290
|
+
if (pendingToolCalls.length > 0) {
|
|
291
|
+
messages.push({
|
|
292
|
+
role: "assistant",
|
|
293
|
+
content: null,
|
|
294
|
+
tool_calls: pendingToolCalls
|
|
295
|
+
});
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
return messages;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
/**
|
|
303
|
+
* Transform message content, normalizing response content part types to OpenAI format.
|
|
304
|
+
* Some clients send: { type: "input_text", text: "..." } for user messages
|
|
305
|
+
* Some clients send: { type: "output_text", text: "..." } for assistant messages
|
|
306
|
+
* OpenAI expects: { type: "text", text: "..." }
|
|
307
|
+
*/
|
|
308
|
+
function transformMessageContent(content: unknown): string | unknown[] {
|
|
309
|
+
if (typeof content === "string") {
|
|
310
|
+
return content;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
if (Array.isArray(content)) {
|
|
314
|
+
return content.map(part => {
|
|
315
|
+
if (part && typeof part === "object") {
|
|
316
|
+
const p = part as Record<string, unknown>;
|
|
317
|
+
// Normalize input_text/output_text to OpenAI text
|
|
318
|
+
// input_text is typically user content, output_text assistant content
|
|
319
|
+
if (p.type === "input_text" || p.type === "output_text") {
|
|
320
|
+
return { ...p, type: "text" };
|
|
321
|
+
}
|
|
322
|
+
if (p.type === "input_image" && p.image_url) {
|
|
323
|
+
return { ...p, type: "image_url" };
|
|
324
|
+
}
|
|
325
|
+
// Accept shorthand {type:\"audio\", audio:\"...\"} and normalize downstream
|
|
326
|
+
if (p.type === "input_audio" || p.type === "audio" || p.type === "video") {
|
|
327
|
+
return p;
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
return part;
|
|
331
|
+
});
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// Fallback: return as array containing the original content
|
|
335
|
+
return [content];
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
/**
|
|
339
|
+
* Transform Responses-style tools to OpenAI function-calling format.
|
|
340
|
+
*
|
|
341
|
+
* Some clients send tools like:
|
|
342
|
+
* { type: "function", name: "...", description: "...", parameters: {...} }
|
|
343
|
+
*
|
|
344
|
+
* OpenAI expects:
|
|
345
|
+
* { type: "function", function: { name: "...", description: "...", parameters: {...} } }
|
|
346
|
+
*
|
|
347
|
+
* Special case: web_search tools are filtered out as they're not supported by OpenAI format.
|
|
348
|
+
*/
|
|
349
|
+
function transformTools(tools: unknown[]): unknown[] {
|
|
350
|
+
return tools
|
|
351
|
+
.filter(tool => {
|
|
352
|
+
// Filter out web_search tools - not supported in OpenAI function calling format
|
|
353
|
+
if (tool && typeof tool === "object") {
|
|
354
|
+
const t = tool as Record<string, unknown>;
|
|
355
|
+
if (t.type === "web_search") {
|
|
356
|
+
return false;
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
return true;
|
|
360
|
+
})
|
|
361
|
+
.map(tool => {
|
|
362
|
+
if (!tool || typeof tool !== "object") return tool;
|
|
363
|
+
|
|
364
|
+
const t = tool as Record<string, unknown>;
|
|
365
|
+
|
|
366
|
+
// If already in OpenAI format (has 'function' property), return as-is
|
|
367
|
+
if (t.function) return tool;
|
|
368
|
+
|
|
369
|
+
// If has type="function" but no 'function' wrapper, wrap it
|
|
370
|
+
if (t.type === "function") {
|
|
371
|
+
const { type, ...functionDef } = t;
|
|
372
|
+
return {
|
|
373
|
+
type,
|
|
374
|
+
function: functionDef
|
|
375
|
+
};
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// Otherwise return unchanged
|
|
379
|
+
return tool;
|
|
380
|
+
});
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
/**
|
|
384
|
+
* Response object structure for SSE serialization
|
|
385
|
+
*/
|
|
386
|
+
interface ResponsesApiResponse {
|
|
387
|
+
id: string;
|
|
388
|
+
object: string;
|
|
389
|
+
created_at: number;
|
|
390
|
+
model?: string;
|
|
391
|
+
output: Array<{
|
|
392
|
+
type: string;
|
|
393
|
+
role?: string;
|
|
394
|
+
content?: Array<{ type: string; text?: string }>;
|
|
395
|
+
id?: string;
|
|
396
|
+
name?: string;
|
|
397
|
+
arguments?: string;
|
|
398
|
+
call_id?: string;
|
|
399
|
+
}>;
|
|
400
|
+
usage?: {
|
|
401
|
+
input_tokens: number;
|
|
402
|
+
output_tokens: number;
|
|
403
|
+
total_tokens: number;
|
|
404
|
+
};
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
function transformToResponsesFormat(chatResponse: unknown, requestId: string): ResponsesApiResponse {
|
|
408
|
+
if (!chatResponse || typeof chatResponse !== "object") {
|
|
409
|
+
return {
|
|
410
|
+
id: requestId,
|
|
411
|
+
object: "response",
|
|
412
|
+
created_at: Math.floor(Date.now() / 1000),
|
|
413
|
+
output: []
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
const chat = chatResponse as {
|
|
418
|
+
id?: string;
|
|
419
|
+
choices?: Array<{
|
|
420
|
+
message?: {
|
|
421
|
+
content?: string;
|
|
422
|
+
role?: string;
|
|
423
|
+
tool_calls?: Array<{
|
|
424
|
+
id: string;
|
|
425
|
+
type: string;
|
|
426
|
+
function: { name: string; arguments: string };
|
|
427
|
+
}>;
|
|
428
|
+
}
|
|
429
|
+
}>;
|
|
430
|
+
usage?: { total_tokens?: number; prompt_tokens?: number; completion_tokens?: number };
|
|
431
|
+
model?: string;
|
|
432
|
+
created?: number;
|
|
433
|
+
};
|
|
434
|
+
|
|
435
|
+
const firstChoice = chat.choices?.[0];
|
|
436
|
+
const message = firstChoice?.message;
|
|
437
|
+
const output: ResponsesApiResponse["output"] = [];
|
|
438
|
+
|
|
439
|
+
// Handle tool calls if present
|
|
440
|
+
if (message?.tool_calls && message.tool_calls.length > 0) {
|
|
441
|
+
for (const toolCall of message.tool_calls) {
|
|
442
|
+
output.push({
|
|
443
|
+
type: "function_call",
|
|
444
|
+
id: toolCall.id,
|
|
445
|
+
call_id: toolCall.id,
|
|
446
|
+
name: toolCall.function.name,
|
|
447
|
+
arguments: toolCall.function.arguments
|
|
448
|
+
});
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
// Handle text content
|
|
453
|
+
const textContent = message?.content ?? "";
|
|
454
|
+
if (textContent || output.length === 0) {
|
|
455
|
+
output.push({
|
|
456
|
+
type: "message",
|
|
457
|
+
role: message?.role ?? "assistant",
|
|
458
|
+
// Responses-style clients may expect output_text instead of text
|
|
459
|
+
content: [{ type: "output_text", text: textContent }]
|
|
460
|
+
});
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
return {
|
|
464
|
+
id: chat.id ?? requestId,
|
|
465
|
+
object: "response",
|
|
466
|
+
created_at: chat.created ?? Math.floor(Date.now() / 1000),
|
|
467
|
+
model: chat.model,
|
|
468
|
+
output,
|
|
469
|
+
usage: chat.usage ? {
|
|
470
|
+
input_tokens: chat.usage.prompt_tokens ?? 0,
|
|
471
|
+
output_tokens: chat.usage.completion_tokens ?? 0,
|
|
472
|
+
total_tokens: chat.usage.total_tokens ?? 0
|
|
473
|
+
} : undefined
|
|
474
|
+
};
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
/**
|
|
478
|
+
* Stream chat completions response and transform to Responses API SSE format.
|
|
479
|
+
*
|
|
480
|
+
* This reads the upstream SSE stream (chat.completion.chunk format) and
|
|
481
|
+
* transforms it to Responses API format in real-time.
|
|
482
|
+
*/
|
|
483
|
+
async function streamResponsesAPI(
|
|
484
|
+
reply: FastifyReply,
|
|
485
|
+
upstreamResponse: { body: ReadableStream<Uint8Array> | NodeJS.ReadableStream | null },
|
|
486
|
+
requestId: string,
|
|
487
|
+
model: string
|
|
488
|
+
): Promise<void> {
|
|
489
|
+
reply.raw.writeHead(200, {
|
|
490
|
+
"Content-Type": "text/event-stream",
|
|
491
|
+
"Cache-Control": "no-cache",
|
|
492
|
+
"Connection": "keep-alive"
|
|
493
|
+
});
|
|
494
|
+
|
|
495
|
+
const sendEvent = (eventType: string, data: unknown) => {
|
|
496
|
+
reply.raw.write(`event: ${eventType}\n`);
|
|
497
|
+
reply.raw.write(`data: ${JSON.stringify(data)}\n\n`);
|
|
498
|
+
};
|
|
499
|
+
|
|
500
|
+
// Send response.created immediately
|
|
501
|
+
sendEvent("response.created", {
|
|
502
|
+
type: "response.created",
|
|
503
|
+
response: {
|
|
504
|
+
id: requestId,
|
|
505
|
+
object: "response",
|
|
506
|
+
created_at: Math.floor(Date.now() / 1000),
|
|
507
|
+
model,
|
|
508
|
+
output: [],
|
|
509
|
+
usage: null
|
|
510
|
+
}
|
|
511
|
+
});
|
|
512
|
+
|
|
513
|
+
// Accumulate content and tool calls for the final response
|
|
514
|
+
let accumulatedContent = "";
|
|
515
|
+
let accumulatedToolCalls: Array<{
|
|
516
|
+
id: string;
|
|
517
|
+
name: string;
|
|
518
|
+
arguments: string;
|
|
519
|
+
}> = [];
|
|
520
|
+
let usage: { input_tokens: number; output_tokens: number; total_tokens: number } | null = null;
|
|
521
|
+
let currentToolCallIndex = -1;
|
|
522
|
+
|
|
523
|
+
try {
|
|
524
|
+
const body = upstreamResponse.body;
|
|
525
|
+
if (!body) {
|
|
526
|
+
throw new Error("No response body");
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
// Convert to async iterable
|
|
530
|
+
const reader = 'getReader' in body
|
|
531
|
+
? body.getReader()
|
|
532
|
+
: null;
|
|
533
|
+
|
|
534
|
+
let buffer = "";
|
|
535
|
+
|
|
536
|
+
const processChunk = (text: string) => {
|
|
537
|
+
buffer += text;
|
|
538
|
+
const lines = buffer.split("\n");
|
|
539
|
+
buffer = lines.pop() || ""; // Keep incomplete line in buffer
|
|
540
|
+
|
|
541
|
+
for (const line of lines) {
|
|
542
|
+
if (line.startsWith("data: ")) {
|
|
543
|
+
const data = line.slice(6).trim();
|
|
544
|
+
if (data === "[DONE]") {
|
|
545
|
+
continue;
|
|
546
|
+
}
|
|
547
|
+
try {
|
|
548
|
+
const chunk = JSON.parse(data);
|
|
549
|
+
const delta = chunk.choices?.[0]?.delta;
|
|
550
|
+
|
|
551
|
+
if (delta) {
|
|
552
|
+
// Handle reasoning/thinking content delta
|
|
553
|
+
if (delta.reasoning_content || delta.reasoning) {
|
|
554
|
+
const reasoningDelta = delta.reasoning_content || delta.reasoning;
|
|
555
|
+
// Send reasoning delta event
|
|
556
|
+
sendEvent("response.reasoning_text.delta", {
|
|
557
|
+
type: "response.reasoning_text.delta",
|
|
558
|
+
output_index: 0,
|
|
559
|
+
content_index: 0,
|
|
560
|
+
delta: reasoningDelta
|
|
561
|
+
});
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
// Handle content delta
|
|
565
|
+
if (delta.content) {
|
|
566
|
+
accumulatedContent += delta.content;
|
|
567
|
+
// Send content delta event
|
|
568
|
+
sendEvent("response.output_text.delta", {
|
|
569
|
+
type: "response.output_text.delta",
|
|
570
|
+
output_index: 0,
|
|
571
|
+
content_index: 0,
|
|
572
|
+
delta: delta.content
|
|
573
|
+
});
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
// Handle tool calls delta
|
|
577
|
+
if (delta.tool_calls) {
|
|
578
|
+
for (const toolCallDelta of delta.tool_calls) {
|
|
579
|
+
const idx = toolCallDelta.index;
|
|
580
|
+
if (idx !== currentToolCallIndex) {
|
|
581
|
+
currentToolCallIndex = idx;
|
|
582
|
+
accumulatedToolCalls[idx] = {
|
|
583
|
+
id: toolCallDelta.id || "",
|
|
584
|
+
name: toolCallDelta.function?.name || "",
|
|
585
|
+
arguments: ""
|
|
586
|
+
};
|
|
587
|
+
}
|
|
588
|
+
if (toolCallDelta.id) {
|
|
589
|
+
accumulatedToolCalls[idx].id = toolCallDelta.id;
|
|
590
|
+
}
|
|
591
|
+
if (toolCallDelta.function?.name) {
|
|
592
|
+
accumulatedToolCalls[idx].name = toolCallDelta.function.name;
|
|
593
|
+
}
|
|
594
|
+
if (toolCallDelta.function?.arguments) {
|
|
595
|
+
accumulatedToolCalls[idx].arguments += toolCallDelta.function.arguments;
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
// Capture usage from final chunk
|
|
602
|
+
if (chunk.usage) {
|
|
603
|
+
usage = {
|
|
604
|
+
input_tokens: chunk.usage.prompt_tokens ?? 0,
|
|
605
|
+
output_tokens: chunk.usage.completion_tokens ?? 0,
|
|
606
|
+
total_tokens: chunk.usage.total_tokens ?? 0
|
|
607
|
+
};
|
|
608
|
+
}
|
|
609
|
+
} catch (e) {
|
|
610
|
+
// Ignore parse errors for malformed chunks
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
};
|
|
615
|
+
|
|
616
|
+
if (reader) {
|
|
617
|
+
// Web Streams API (ReadableStream)
|
|
618
|
+
const decoder = new TextDecoder();
|
|
619
|
+
while (true) {
|
|
620
|
+
const { done, value } = await reader.read();
|
|
621
|
+
if (done) break;
|
|
622
|
+
processChunk(decoder.decode(value, { stream: true }));
|
|
623
|
+
}
|
|
624
|
+
} else {
|
|
625
|
+
// Node.js stream
|
|
626
|
+
const nodeStream = body as NodeJS.ReadableStream;
|
|
627
|
+
for await (const chunk of nodeStream) {
|
|
628
|
+
processChunk(chunk.toString());
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
// Build final output
|
|
633
|
+
const output: ResponsesApiResponse["output"] = [];
|
|
634
|
+
|
|
635
|
+
// Add tool calls first
|
|
636
|
+
for (const tc of accumulatedToolCalls) {
|
|
637
|
+
if (tc) {
|
|
638
|
+
output.push({
|
|
639
|
+
type: "function_call",
|
|
640
|
+
id: tc.id,
|
|
641
|
+
call_id: tc.id,
|
|
642
|
+
name: tc.name,
|
|
643
|
+
arguments: tc.arguments
|
|
644
|
+
});
|
|
645
|
+
// Send output_item.done for each tool call
|
|
646
|
+
sendEvent("response.output_item.done", {
|
|
647
|
+
type: "response.output_item.done",
|
|
648
|
+
output_index: output.length - 1,
|
|
649
|
+
item: output[output.length - 1]
|
|
650
|
+
});
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
// Add message content if any
|
|
655
|
+
if (accumulatedContent || output.length === 0) {
|
|
656
|
+
output.push({
|
|
657
|
+
type: "message",
|
|
658
|
+
role: "assistant",
|
|
659
|
+
content: [{ type: "output_text", text: accumulatedContent }]
|
|
660
|
+
});
|
|
661
|
+
// Send output_item.done for the message
|
|
662
|
+
sendEvent("response.output_item.done", {
|
|
663
|
+
type: "response.output_item.done",
|
|
664
|
+
output_index: output.length - 1,
|
|
665
|
+
item: output[output.length - 1]
|
|
666
|
+
});
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
// Send response.completed
|
|
670
|
+
sendEvent("response.completed", {
|
|
671
|
+
type: "response.completed",
|
|
672
|
+
response: {
|
|
673
|
+
id: requestId,
|
|
674
|
+
object: "response",
|
|
675
|
+
created_at: Math.floor(Date.now() / 1000),
|
|
676
|
+
model,
|
|
677
|
+
output,
|
|
678
|
+
usage
|
|
679
|
+
}
|
|
680
|
+
});
|
|
681
|
+
|
|
682
|
+
} catch (error) {
|
|
683
|
+
console.error("[responses] Streaming error:", error);
|
|
684
|
+
// Send error as part of the stream
|
|
685
|
+
sendEvent("error", {
|
|
686
|
+
type: "error",
|
|
687
|
+
error: { message: (error as Error).message }
|
|
688
|
+
});
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
reply.raw.end();
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
/**
|
|
695
|
+
* Send response as Server-Sent Events in Responses format.
|
|
696
|
+
*
|
|
697
|
+
* Responses-style clients expect:
|
|
698
|
+
* - event: response.created
|
|
699
|
+
* - event: response.output_item.done (for each output item)
|
|
700
|
+
* - event: response.completed
|
|
701
|
+
*
|
|
702
|
+
* Each event has:
|
|
703
|
+
* - event: <event_type>
|
|
704
|
+
* - data: {"type":"<event_type>", ...payload}
|
|
705
|
+
*/
|
|
706
|
+
async function sendAsSSE(reply: FastifyReply, response: ResponsesApiResponse): Promise<void> {
|
|
707
|
+
reply.raw.writeHead(200, {
|
|
708
|
+
"Content-Type": "text/event-stream",
|
|
709
|
+
"Cache-Control": "no-cache",
|
|
710
|
+
"Connection": "keep-alive"
|
|
711
|
+
});
|
|
712
|
+
|
|
713
|
+
// Helper to send an SSE event
|
|
714
|
+
const sendEvent = (eventType: string, data: unknown) => {
|
|
715
|
+
reply.raw.write(`event: ${eventType}\n`);
|
|
716
|
+
reply.raw.write(`data: ${JSON.stringify(data)}\n\n`);
|
|
717
|
+
};
|
|
718
|
+
|
|
719
|
+
// 1. response.created
|
|
720
|
+
sendEvent("response.created", {
|
|
721
|
+
type: "response.created",
|
|
722
|
+
response: {
|
|
723
|
+
id: response.id,
|
|
724
|
+
object: response.object,
|
|
725
|
+
created_at: response.created_at,
|
|
726
|
+
model: response.model,
|
|
727
|
+
output: [],
|
|
728
|
+
usage: null
|
|
729
|
+
}
|
|
730
|
+
});
|
|
731
|
+
|
|
732
|
+
// 2. response.output_item.done for each output item
|
|
733
|
+
for (let i = 0; i < response.output.length; i++) {
|
|
734
|
+
const item = response.output[i];
|
|
735
|
+
sendEvent("response.output_item.done", {
|
|
736
|
+
type: "response.output_item.done",
|
|
737
|
+
output_index: i,
|
|
738
|
+
item
|
|
739
|
+
});
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
// 3. response.completed
|
|
743
|
+
sendEvent("response.completed", {
|
|
744
|
+
type: "response.completed",
|
|
745
|
+
response: {
|
|
746
|
+
id: response.id,
|
|
747
|
+
object: response.object,
|
|
748
|
+
created_at: response.created_at,
|
|
749
|
+
model: response.model,
|
|
750
|
+
output: response.output,
|
|
751
|
+
usage: response.usage
|
|
752
|
+
}
|
|
753
|
+
});
|
|
754
|
+
|
|
755
|
+
reply.raw.end();
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
async function pickDefaultModel(paths: StoragePaths): Promise<string | null> {
|
|
759
|
+
const smart = await selectPoolCandidates(paths, "smart", {
|
|
760
|
+
requiredInput: ["text"],
|
|
761
|
+
requiredOutput: ["text"],
|
|
762
|
+
}, {
|
|
763
|
+
operation: "chat_completions",
|
|
764
|
+
stream: false,
|
|
765
|
+
});
|
|
766
|
+
if (smart && smart.candidates.length > 0) {
|
|
767
|
+
return "smart";
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
const byCapabilities = await pickBestProviderModelByCapabilities(
|
|
771
|
+
paths,
|
|
772
|
+
{ requiredInput: ["text"], requiredOutput: ["text"] },
|
|
773
|
+
"llm"
|
|
774
|
+
);
|
|
775
|
+
if (byCapabilities) {
|
|
776
|
+
return byCapabilities;
|
|
777
|
+
}
|
|
778
|
+
return null;
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
async function streamResponse(
|
|
782
|
+
reply: FastifyReply,
|
|
783
|
+
response: { statusCode: number; headers: Record<string, string | string[]>; body: NodeJS.ReadableStream }
|
|
784
|
+
): Promise<void> {
|
|
785
|
+
const headers = normalizeHeaders(response.headers);
|
|
786
|
+
if (!headers["content-type"]) {
|
|
787
|
+
headers["content-type"] = "text/event-stream";
|
|
788
|
+
}
|
|
789
|
+
headers["cache-control"] = headers["cache-control"] ?? "no-cache";
|
|
790
|
+
|
|
791
|
+
reply.raw.writeHead(response.statusCode, headers);
|
|
792
|
+
await new Promise<void>((resolve, reject) => {
|
|
793
|
+
pipeline(response.body, reply.raw, (err) => {
|
|
794
|
+
if (err) reject(err);
|
|
795
|
+
else resolve();
|
|
796
|
+
});
|
|
797
|
+
});
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
function setHeaders(reply: FastifyReply, headers: Record<string, string | string[]>): void {
|
|
801
|
+
const normalized = normalizeHeaders(headers);
|
|
802
|
+
for (const [key, value] of Object.entries(normalized)) {
|
|
803
|
+
reply.header(key, value);
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
function normalizeHeaders(headers: Record<string, string | string[]>): Record<string, string> {
|
|
808
|
+
const normalized: Record<string, string> = {};
|
|
809
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
810
|
+
normalized[key.toLowerCase()] = Array.isArray(value) ? value.join(", ") : value;
|
|
811
|
+
}
|
|
812
|
+
return normalized;
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
async function readBody(
|
|
816
|
+
response: { body: NodeJS.ReadableStream; headers: Record<string, string | string[]> }
|
|
817
|
+
): Promise<{
|
|
818
|
+
payload: unknown;
|
|
819
|
+
totalTokens: number | null;
|
|
820
|
+
promptTokens: number | null;
|
|
821
|
+
completionTokens: number | null;
|
|
822
|
+
}> {
|
|
823
|
+
const chunks: Buffer[] = [];
|
|
824
|
+
for await (const chunk of response.body) {
|
|
825
|
+
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
826
|
+
}
|
|
827
|
+
const buffer = Buffer.concat(chunks);
|
|
828
|
+
const contentType = normalizeHeaders(response.headers)["content-type"] ?? "";
|
|
829
|
+
if (contentType.includes("application/json")) {
|
|
830
|
+
try {
|
|
831
|
+
const payload = JSON.parse(buffer.toString("utf8"));
|
|
832
|
+
const usage = typeof payload === "object" && payload && (
|
|
833
|
+
payload as { usage?: { total_tokens?: number; prompt_tokens?: number; completion_tokens?: number } }
|
|
834
|
+
).usage;
|
|
835
|
+
return {
|
|
836
|
+
payload,
|
|
837
|
+
totalTokens: usage?.total_tokens ?? null,
|
|
838
|
+
promptTokens: usage?.prompt_tokens ?? null,
|
|
839
|
+
completionTokens: usage?.completion_tokens ?? null,
|
|
840
|
+
};
|
|
841
|
+
} catch {
|
|
842
|
+
return { payload: buffer, totalTokens: null, promptTokens: null, completionTokens: null };
|
|
843
|
+
}
|
|
844
|
+
}
|
|
845
|
+
return { payload: buffer, totalTokens: null, promptTokens: null, completionTokens: null };
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
function buildLog(
|
|
849
|
+
requestId: string,
|
|
850
|
+
model: string,
|
|
851
|
+
outcome: { attempt: { endpoint: { id: string; name: string }; upstreamModel: string; response: { statusCode: number } } },
|
|
852
|
+
latencyMs: number,
|
|
853
|
+
stream: boolean,
|
|
854
|
+
totalTokens?: number | null
|
|
855
|
+
): RequestLog {
|
|
856
|
+
return {
|
|
857
|
+
requestId,
|
|
858
|
+
ts: new Date(),
|
|
859
|
+
route: {
|
|
860
|
+
publicModel: model,
|
|
861
|
+
endpointId: outcome.attempt.endpoint.id,
|
|
862
|
+
endpointName: outcome.attempt.endpoint.name,
|
|
863
|
+
upstreamModel: outcome.attempt.upstreamModel
|
|
864
|
+
},
|
|
865
|
+
request: { stream },
|
|
866
|
+
result: {
|
|
867
|
+
statusCode: outcome.attempt.response.statusCode,
|
|
868
|
+
latencyMs,
|
|
869
|
+
totalTokens: totalTokens ?? null
|
|
870
|
+
}
|
|
871
|
+
};
|
|
872
|
+
}
|