@khanglvm/llm-router 2.0.3 → 2.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +14 -5
- package/package.json +1 -1
- package/src/node/web-console-ui/config-editor-utils.js +21 -0
- package/src/runtime/codex-request-transformer.js +22 -2
- package/src/runtime/config.js +5 -1
- package/src/runtime/handler/amp-web-search.js +130 -0
- package/src/runtime/handler/provider-call.js +192 -69
- package/src/runtime/handler/provider-translation.js +7 -2
- package/src/runtime/handler/request.js +25 -0
- package/src/runtime/handler.js +40 -5
- package/src/runtime/thread-affinity.js +41 -0
- package/src/translator/response/openai-to-claude.js +6 -1
package/CHANGELOG.md
CHANGED
|
@@ -7,8 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [2.0.5] - 2026-03-15
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
- Relaxed the live coding-tool publish checks so known external Codex model-verbosity mismatches and Claude MCP schema-validation failures are treated as acceptable upstream tool failures instead of blocking npm publication.
|
|
14
|
+
|
|
15
|
+
## [2.0.4] - 2026-03-15
|
|
16
|
+
|
|
10
17
|
### Fixed
|
|
11
18
|
- Raised the default inbound JSON body limit for OpenAI `/responses` requests from `1 MiB` to `8 MiB` while keeping other JSON routes at `1 MiB`. This prevents local `413 Request body too large` failures for Codex CLI and other Responses API clients carrying larger conversation state.
|
|
19
|
+
- Updated the web console provider editor so API-based providers can rotate between env-backed and direct API key credentials in place without leaving the modal.
|
|
20
|
+
- Improved the web console model-save flow for API-based providers:
|
|
21
|
+
- new-model tests now stream visible progress while save is in flight
|
|
22
|
+
- successful new models stay marked as confirmed
|
|
23
|
+
- only failed new models are marked as failed
|
|
24
|
+
- the edit modal blocks backdrop/close dismissal while tests are running
|
|
25
|
+
- closing after failed tests now offers removing failed rows while keeping successful new rows
|
|
26
|
+
- Improved dual-format Claude provider routing so Claude tool calls can prefer OpenAI-compatible tool execution paths when available, while falling back cleanly to native Claude routing if the OpenAI-compatible path fails.
|
|
12
27
|
|
|
13
28
|
## [2.0.1] - 2026-03-15
|
|
14
29
|
|
package/README.md
CHANGED
|
@@ -2,20 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
LLM Router is a local and Cloudflare-deployable gateway for routing one client endpoint across multiple LLM providers, models, aliases, fallbacks, and rate limits.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
**Current version**: `2.0.5`
|
|
6
6
|
|
|
7
|
+
NPM package:
|
|
7
8
|
```bash
|
|
8
9
|
@khanglvm/llm-router
|
|
9
10
|
```
|
|
10
11
|
|
|
11
|
-
|
|
12
|
-
|
|
12
|
+
Primary CLI command:
|
|
13
13
|
```bash
|
|
14
14
|
llr
|
|
15
15
|
```
|
|
16
16
|
|
|
17
|
-
`2.0.1` is the current public release. It includes the Web UI, AMP routing, and coding-tool integrations introduced in the 2.x line.
|
|
18
|
-
|
|
19
17
|
## Install
|
|
20
18
|
|
|
21
19
|
```bash
|
|
@@ -266,7 +264,18 @@ Run the JavaScript test suite:
|
|
|
266
264
|
node --test $(rg --files -g "*.test.js" src)
|
|
267
265
|
```
|
|
268
266
|
|
|
267
|
+
## Documentation
|
|
268
|
+
|
|
269
|
+
Comprehensive documentation is available in the `docs/` directory:
|
|
270
|
+
|
|
271
|
+
- **[Project Overview & PDR](./docs/project-overview-pdr.md)** — Feature matrix, target users, success metrics, constraints
|
|
272
|
+
- **[Codebase Summary](./docs/codebase-summary.md)** — Directory structure, module relationships, entry points, test infrastructure
|
|
273
|
+
- **[Code Standards](./docs/code-standards.md)** — Patterns, naming conventions, testing, error handling
|
|
274
|
+
- **[System Architecture](./docs/system-architecture.md)** — Request lifecycle, subsystem boundaries, data flow, deployment models
|
|
275
|
+
- **[Project Roadmap](./docs/project-roadmap.md)** — Current status, planned phases, timeline, success metrics
|
|
276
|
+
|
|
269
277
|
## Security and Releases
|
|
270
278
|
|
|
271
279
|
- Security: [`SECURITY.md`](https://github.com/khanglvm/llm-router/blob/master/SECURITY.md)
|
|
272
280
|
- Release notes: [`CHANGELOG.md`](https://github.com/khanglvm/llm-router/blob/master/CHANGELOG.md)
|
|
281
|
+
- AMP routing: [`docs/amp-routing.md`](./docs/amp-routing.md)
|
package/package.json
CHANGED
|
@@ -159,6 +159,26 @@ function normalizeEndpointCandidates(values = []) {
|
|
|
159
159
|
return dedupeStrings(Array.isArray(values) ? values : [values]);
|
|
160
160
|
}
|
|
161
161
|
|
|
162
|
+
function rewriteProviderCredentials(provider = {}, draftProvider = {}) {
|
|
163
|
+
const nextProvider = { ...provider };
|
|
164
|
+
const hasCredentialInput = Object.prototype.hasOwnProperty.call(draftProvider || {}, "credentialInput");
|
|
165
|
+
if (!hasCredentialInput) return nextProvider;
|
|
166
|
+
|
|
167
|
+
const credentialInput = String(draftProvider?.credentialInput || "").trim();
|
|
168
|
+
delete nextProvider.apiKey;
|
|
169
|
+
delete nextProvider.apiKeyEnv;
|
|
170
|
+
delete nextProvider.credential;
|
|
171
|
+
|
|
172
|
+
if (!credentialInput) return nextProvider;
|
|
173
|
+
if (/^[A-Z][A-Z0-9_]*$/.test(credentialInput)) {
|
|
174
|
+
nextProvider.apiKeyEnv = credentialInput;
|
|
175
|
+
return nextProvider;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
nextProvider.apiKey = credentialInput;
|
|
179
|
+
return nextProvider;
|
|
180
|
+
}
|
|
181
|
+
|
|
162
182
|
function rewriteProviderEndpoints(provider = {}, endpoints = []) {
|
|
163
183
|
const nextProvider = { ...provider };
|
|
164
184
|
const nextEndpoints = normalizeEndpointCandidates(endpoints);
|
|
@@ -487,6 +507,7 @@ export function applyProviderInlineEdits(config = {}, currentProviderId = "", dr
|
|
|
487
507
|
};
|
|
488
508
|
|
|
489
509
|
if (!isSubscription) {
|
|
510
|
+
nextProvider = rewriteProviderCredentials(nextProvider, draftProvider);
|
|
490
511
|
nextProvider = rewriteProviderEndpoints(nextProvider, nextEndpoints);
|
|
491
512
|
nextProvider = rewriteRateLimits(nextProvider, draftProvider, renamedProviderId);
|
|
492
513
|
}
|
|
@@ -224,10 +224,15 @@ function normalizeInputMessageContent(content, role) {
|
|
|
224
224
|
? part.image_url
|
|
225
225
|
: part.image_url?.url;
|
|
226
226
|
if (typeof rawUrl === 'string' && rawUrl.trim()) {
|
|
227
|
-
|
|
227
|
+
const imageItem = {
|
|
228
228
|
type: 'input_image',
|
|
229
229
|
image_url: rawUrl
|
|
230
|
-
}
|
|
230
|
+
};
|
|
231
|
+
const detail = part.image_url?.detail || part.detail;
|
|
232
|
+
if (typeof detail === 'string' && detail.trim()) {
|
|
233
|
+
imageItem.detail = detail.trim();
|
|
234
|
+
}
|
|
235
|
+
parts.push(imageItem);
|
|
231
236
|
}
|
|
232
237
|
continue;
|
|
233
238
|
}
|
|
@@ -333,6 +338,21 @@ function normalizeToolChoiceForResponses(toolChoice) {
|
|
|
333
338
|
const normalizedType = String(toolChoice.type || '').trim().toLowerCase();
|
|
334
339
|
if (normalizedType === 'none') return 'none';
|
|
335
340
|
if (normalizedType === 'required' || normalizedType === 'any' || normalizedType === 'tool') {
|
|
341
|
+
const functionName = String(
|
|
342
|
+
toolChoice.function?.name || toolChoice.name || ''
|
|
343
|
+
).trim();
|
|
344
|
+
if (functionName) {
|
|
345
|
+
return { type: 'function', name: functionName };
|
|
346
|
+
}
|
|
347
|
+
return 'required';
|
|
348
|
+
}
|
|
349
|
+
if (normalizedType === 'function') {
|
|
350
|
+
const functionName = String(
|
|
351
|
+
toolChoice.function?.name || toolChoice.name || ''
|
|
352
|
+
).trim();
|
|
353
|
+
if (functionName) {
|
|
354
|
+
return { type: 'function', name: functionName };
|
|
355
|
+
}
|
|
336
356
|
return 'required';
|
|
337
357
|
}
|
|
338
358
|
}
|
package/src/runtime/config.js
CHANGED
|
@@ -735,7 +735,11 @@ function normalizeAmpWebSearchConfig(rawWebSearch) {
|
|
|
735
735
|
return {
|
|
736
736
|
strategy: normalizeAmpWebSearchStrategy(rawWebSearch.strategy),
|
|
737
737
|
count,
|
|
738
|
-
providers
|
|
738
|
+
providers,
|
|
739
|
+
interceptInternalSearch: normalizeBooleanValue(
|
|
740
|
+
rawWebSearch.interceptInternalSearch ?? rawWebSearch["intercept-internal-search"],
|
|
741
|
+
false
|
|
742
|
+
)
|
|
739
743
|
};
|
|
740
744
|
}
|
|
741
745
|
|
|
@@ -10,6 +10,7 @@ import {
|
|
|
10
10
|
resolveRouteReference
|
|
11
11
|
} from "../config.js";
|
|
12
12
|
import { isSubscriptionProvider, makeSubscriptionProviderCall } from "../subscription-provider.js";
|
|
13
|
+
import { jsonResponse } from "./http.js";
|
|
13
14
|
|
|
14
15
|
const SEARCH_TOOL_NAME = "web_search";
|
|
15
16
|
const READ_WEB_PAGE_TOOL_NAME = "read_web_page";
|
|
@@ -2178,6 +2179,135 @@ export async function testHostedWebSearchProviderRoute({
|
|
|
2178
2179
|
}, query, runtimeConfig, env);
|
|
2179
2180
|
}
|
|
2180
2181
|
|
|
2182
|
+
async function fetchStructuredSearchResults(query, count, provider) {
|
|
2183
|
+
const normalizedQuery = String(query || "").trim();
|
|
2184
|
+
if (!normalizedQuery || !provider) return [];
|
|
2185
|
+
|
|
2186
|
+
const id = provider.id;
|
|
2187
|
+
|
|
2188
|
+
if (id === "brave") {
|
|
2189
|
+
if (!provider.apiKey) return [];
|
|
2190
|
+
const url = `https://api.search.brave.com/res/v1/web/search?q=${encodeURIComponent(normalizedQuery)}&count=${count}&text_decorations=false`;
|
|
2191
|
+
const response = await runFetchWithTimeout(url, {
|
|
2192
|
+
headers: { Accept: "application/json", "X-Subscription-Token": provider.apiKey }
|
|
2193
|
+
});
|
|
2194
|
+
if (!response.ok) return [];
|
|
2195
|
+
const payload = await response.json();
|
|
2196
|
+
return (Array.isArray(payload?.web?.results) ? payload.web.results.slice(0, count) : [])
|
|
2197
|
+
.map((item) => ({ title: String(item?.title || ""), url: String(item?.url || ""), snippet: String(item?.description || "") }));
|
|
2198
|
+
}
|
|
2199
|
+
|
|
2200
|
+
if (id === "tavily") {
|
|
2201
|
+
if (!provider.apiKey) return [];
|
|
2202
|
+
const response = await runFetchWithTimeout("https://api.tavily.com/search", {
|
|
2203
|
+
method: "POST",
|
|
2204
|
+
headers: { "Content-Type": "application/json" },
|
|
2205
|
+
body: JSON.stringify({ api_key: provider.apiKey, query: normalizedQuery, max_results: count, search_depth: "basic" })
|
|
2206
|
+
});
|
|
2207
|
+
if (!response.ok) return [];
|
|
2208
|
+
const payload = await response.json();
|
|
2209
|
+
return (Array.isArray(payload?.results) ? payload.results.slice(0, count) : [])
|
|
2210
|
+
.map((item) => ({ title: String(item?.title || ""), url: String(item?.url || ""), snippet: String(item?.content || "") }));
|
|
2211
|
+
}
|
|
2212
|
+
|
|
2213
|
+
if (id === "exa") {
|
|
2214
|
+
if (!provider.apiKey) return [];
|
|
2215
|
+
const response = await runFetchWithTimeout("https://api.exa.ai/search", {
|
|
2216
|
+
method: "POST",
|
|
2217
|
+
headers: { "Content-Type": "application/json", "x-api-key": provider.apiKey },
|
|
2218
|
+
body: JSON.stringify({ query: normalizedQuery, numResults: count, type: "auto", contents: { text: { maxCharacters: 500 } } })
|
|
2219
|
+
});
|
|
2220
|
+
if (!response.ok) return [];
|
|
2221
|
+
const payload = await response.json();
|
|
2222
|
+
return (Array.isArray(payload?.results) ? payload.results.slice(0, count) : [])
|
|
2223
|
+
.map((item) => ({ title: String(item?.title || ""), url: String(item?.url || ""), snippet: String(item?.text || item?.snippet || "") }));
|
|
2224
|
+
}
|
|
2225
|
+
|
|
2226
|
+
if (id === "searxng") {
|
|
2227
|
+
if (!provider.url) return [];
|
|
2228
|
+
const url = `${provider.url}/search?q=${encodeURIComponent(normalizedQuery)}&format=json&categories=general&language=auto`;
|
|
2229
|
+
const response = await runFetchWithTimeout(url, {
|
|
2230
|
+
headers: { Accept: "application/json", "User-Agent": "llm-router" }
|
|
2231
|
+
});
|
|
2232
|
+
if (!response.ok) return [];
|
|
2233
|
+
const payload = await response.json();
|
|
2234
|
+
return (Array.isArray(payload?.results) ? payload.results.slice(0, count) : [])
|
|
2235
|
+
.map((item) => ({ title: String(item?.title || ""), url: String(item?.url || ""), snippet: String(item?.content || "") }));
|
|
2236
|
+
}
|
|
2237
|
+
|
|
2238
|
+
return [];
|
|
2239
|
+
}
|
|
2240
|
+
|
|
2241
|
+
export async function executeWebSearchQueries({ queries, maxResults, config, env }) {
|
|
2242
|
+
const normalizedQueries = (Array.isArray(queries) ? queries : []).map((q) => String(q || "").trim()).filter(Boolean).slice(0, 10);
|
|
2243
|
+
if (normalizedQueries.length === 0) return { results: [], provider: "" };
|
|
2244
|
+
|
|
2245
|
+
const count = Math.max(1, Math.min(20, Number(maxResults) || 5));
|
|
2246
|
+
const snapshot = await buildAmpWebSearchSnapshot(config, { env });
|
|
2247
|
+
const readyProviders = snapshot.providers.filter((p) => p.ready && !isHostedSearchProvider(p));
|
|
2248
|
+
|
|
2249
|
+
for (const providerStatus of readyProviders) {
|
|
2250
|
+
try {
|
|
2251
|
+
const allResults = [];
|
|
2252
|
+
const batchResults = await Promise.all(
|
|
2253
|
+
normalizedQueries.map((query) => fetchStructuredSearchResults(query, count, providerStatus))
|
|
2254
|
+
);
|
|
2255
|
+
for (const results of batchResults) allResults.push(...results);
|
|
2256
|
+
if (allResults.length > 0) {
|
|
2257
|
+
return { results: allResults, provider: providerStatus.id };
|
|
2258
|
+
}
|
|
2259
|
+
} catch {
|
|
2260
|
+
continue;
|
|
2261
|
+
}
|
|
2262
|
+
}
|
|
2263
|
+
|
|
2264
|
+
return { results: [], provider: "" };
|
|
2265
|
+
}
|
|
2266
|
+
|
|
2267
|
+
export async function maybeInterceptAmpInternalSearch(request, url, config, env) {
|
|
2268
|
+
const searchParams = url.searchParams;
|
|
2269
|
+
if (!searchParams.has("webSearch2")) return null;
|
|
2270
|
+
|
|
2271
|
+
const webSearchConfig = config?.webSearch || config?.amp?.webSearch;
|
|
2272
|
+
if (!webSearchConfig?.interceptInternalSearch) return null;
|
|
2273
|
+
|
|
2274
|
+
const providers = Array.isArray(webSearchConfig?.providers) ? webSearchConfig.providers : [];
|
|
2275
|
+
if (providers.length === 0) return null;
|
|
2276
|
+
|
|
2277
|
+
let body;
|
|
2278
|
+
try {
|
|
2279
|
+
body = await request.clone().json();
|
|
2280
|
+
} catch {
|
|
2281
|
+
return null;
|
|
2282
|
+
}
|
|
2283
|
+
|
|
2284
|
+
const params = body?.params;
|
|
2285
|
+
if (!params || !Array.isArray(params.searchQueries) || params.searchQueries.length === 0) return null;
|
|
2286
|
+
|
|
2287
|
+
try {
|
|
2288
|
+
const results = await executeWebSearchQueries({
|
|
2289
|
+
queries: params.searchQueries,
|
|
2290
|
+
maxResults: Number(params.maxResults) || 5,
|
|
2291
|
+
config,
|
|
2292
|
+
env
|
|
2293
|
+
});
|
|
2294
|
+
|
|
2295
|
+
return jsonResponse({
|
|
2296
|
+
result: {
|
|
2297
|
+
results: results.results.map((r) => ({
|
|
2298
|
+
title: r.title || "",
|
|
2299
|
+
url: r.url || "",
|
|
2300
|
+
snippet: r.snippet || "",
|
|
2301
|
+
content: r.snippet || ""
|
|
2302
|
+
}))
|
|
2303
|
+
}
|
|
2304
|
+
});
|
|
2305
|
+
} catch (error) {
|
|
2306
|
+
console.warn(`[llm-router] webSearch2 interception failed: ${error?.message || error}`);
|
|
2307
|
+
return null;
|
|
2308
|
+
}
|
|
2309
|
+
}
|
|
2310
|
+
|
|
2181
2311
|
export async function maybeInterceptAmpWebSearch({
|
|
2182
2312
|
response,
|
|
2183
2313
|
providerBody,
|
|
@@ -212,6 +212,38 @@ function extractToolTypes(body) {
|
|
|
212
212
|
)];
|
|
213
213
|
}
|
|
214
214
|
|
|
215
|
+
function hasToolDefinitions(body) {
|
|
216
|
+
return Array.isArray(body?.tools) && body.tools.some((tool) => tool && typeof tool === "object");
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
function getProviderFormats(provider) {
|
|
220
|
+
return [...new Set(
|
|
221
|
+
[provider?.format, ...(Array.isArray(provider?.formats) ? provider.formats : [])]
|
|
222
|
+
.map((value) => String(value || "").trim())
|
|
223
|
+
.filter((value) => value === FORMATS.OPENAI || value === FORMATS.CLAUDE)
|
|
224
|
+
)];
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
function normalizeProviderRequestKind(targetFormat, requestKind) {
|
|
228
|
+
if (targetFormat === FORMATS.OPENAI && requestKind === "messages") {
|
|
229
|
+
return undefined;
|
|
230
|
+
}
|
|
231
|
+
return requestKind;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function shouldPreferOpenAIForClaudeToolCalls({
|
|
235
|
+
provider,
|
|
236
|
+
sourceFormat,
|
|
237
|
+
targetFormat,
|
|
238
|
+
requestKind,
|
|
239
|
+
body
|
|
240
|
+
} = {}) {
|
|
241
|
+
if (sourceFormat !== FORMATS.CLAUDE || targetFormat !== FORMATS.CLAUDE) return false;
|
|
242
|
+
if (!hasToolDefinitions(body)) return false;
|
|
243
|
+
if (!getProviderFormats(provider).includes(FORMATS.OPENAI)) return false;
|
|
244
|
+
return Boolean(resolveProviderUrl(provider, FORMATS.OPENAI, normalizeProviderRequestKind(FORMATS.OPENAI, requestKind)));
|
|
245
|
+
}
|
|
246
|
+
|
|
215
247
|
function isOpenAIHostedWebSearchRequest(targetFormat, requestKind) {
|
|
216
248
|
return targetFormat === FORMATS.OPENAI && requestKind === "responses";
|
|
217
249
|
}
|
|
@@ -446,48 +478,24 @@ function logToolRouting({ env, clientType, candidate, originalBody, providerBody
|
|
|
446
478
|
);
|
|
447
479
|
}
|
|
448
480
|
|
|
449
|
-
|
|
481
|
+
function buildProviderRequestPlan({
|
|
450
482
|
body,
|
|
451
483
|
sourceFormat,
|
|
452
|
-
|
|
484
|
+
targetFormat,
|
|
453
485
|
candidate,
|
|
454
486
|
requestKind,
|
|
455
487
|
requestHeaders,
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
runtimeConfig,
|
|
459
|
-
stateStore
|
|
488
|
+
interceptAmpWebSearch,
|
|
489
|
+
stream
|
|
460
490
|
}) {
|
|
461
|
-
const
|
|
462
|
-
const targetFormat = candidate.targetFormat;
|
|
491
|
+
const normalizedRequestKind = normalizeProviderRequestKind(targetFormat, requestKind);
|
|
463
492
|
const translate = needsTranslation(sourceFormat, targetFormat);
|
|
464
|
-
const interceptAmpWebSearch = shouldInterceptAmpWebSearch({
|
|
465
|
-
clientType,
|
|
466
|
-
originalBody: body,
|
|
467
|
-
runtimeConfig,
|
|
468
|
-
env
|
|
469
|
-
});
|
|
470
493
|
|
|
471
494
|
let providerBody = { ...body };
|
|
472
495
|
if (translate) {
|
|
473
|
-
|
|
474
|
-
providerBody = translateRequest(sourceFormat, targetFormat, candidate.backend, body, stream);
|
|
475
|
-
} catch (error) {
|
|
476
|
-
return {
|
|
477
|
-
ok: false,
|
|
478
|
-
status: 400,
|
|
479
|
-
retryable: false,
|
|
480
|
-
errorKind: "translation_error",
|
|
481
|
-
response: jsonResponse({
|
|
482
|
-
type: "error",
|
|
483
|
-
error: {
|
|
484
|
-
type: "invalid_request_error",
|
|
485
|
-
message: `Request translation failed: ${error instanceof Error ? error.message : String(error)}`
|
|
486
|
-
}
|
|
487
|
-
}, 400)
|
|
488
|
-
};
|
|
489
|
-
}
|
|
496
|
+
providerBody = translateRequest(sourceFormat, targetFormat, candidate.backend, body, stream);
|
|
490
497
|
}
|
|
498
|
+
|
|
491
499
|
providerBody.model = candidate.backend;
|
|
492
500
|
providerBody = applyCachingMapping({
|
|
493
501
|
originalBody: body,
|
|
@@ -504,9 +512,10 @@ export async function makeProviderCall({
|
|
|
504
512
|
targetModel: candidate.backend,
|
|
505
513
|
requestHeaders
|
|
506
514
|
});
|
|
507
|
-
|
|
515
|
+
|
|
516
|
+
const declaredOpenAIHostedWebSearchToolType = getProviderOpenAIHostedWebSearchToolType(candidate.provider, {
|
|
508
517
|
targetFormat,
|
|
509
|
-
requestKind
|
|
518
|
+
requestKind: normalizedRequestKind
|
|
510
519
|
});
|
|
511
520
|
const declaredOpenAIHostedWebSearchRewrite = rewriteProviderBodyForOpenAIHostedWebSearch(
|
|
512
521
|
providerBody,
|
|
@@ -515,21 +524,113 @@ export async function makeProviderCall({
|
|
|
515
524
|
if (declaredOpenAIHostedWebSearchRewrite.rewritten) {
|
|
516
525
|
providerBody = declaredOpenAIHostedWebSearchRewrite.providerBody;
|
|
517
526
|
}
|
|
527
|
+
|
|
518
528
|
if (interceptAmpWebSearch) {
|
|
519
529
|
providerBody = rewriteProviderBodyForAmpWebSearch(providerBody, targetFormat, requestKind).providerBody;
|
|
520
530
|
}
|
|
531
|
+
|
|
532
|
+
return {
|
|
533
|
+
targetFormat,
|
|
534
|
+
requestKind: normalizedRequestKind,
|
|
535
|
+
translate,
|
|
536
|
+
providerBody
|
|
537
|
+
};
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
export async function makeProviderCall({
|
|
541
|
+
body,
|
|
542
|
+
sourceFormat,
|
|
543
|
+
stream,
|
|
544
|
+
candidate,
|
|
545
|
+
requestKind,
|
|
546
|
+
requestHeaders,
|
|
547
|
+
env,
|
|
548
|
+
clientType,
|
|
549
|
+
runtimeConfig,
|
|
550
|
+
stateStore,
|
|
551
|
+
ampContext
|
|
552
|
+
}) {
|
|
553
|
+
const provider = candidate.provider;
|
|
554
|
+
const targetFormat = candidate.targetFormat;
|
|
555
|
+
const interceptAmpWebSearch = shouldInterceptAmpWebSearch({
|
|
556
|
+
clientType,
|
|
557
|
+
originalBody: body,
|
|
558
|
+
runtimeConfig,
|
|
559
|
+
env
|
|
560
|
+
});
|
|
561
|
+
|
|
562
|
+
const preferOpenAIToolRouting = !isSubscriptionProvider(provider) && shouldPreferOpenAIForClaudeToolCalls({
|
|
563
|
+
provider,
|
|
564
|
+
sourceFormat,
|
|
565
|
+
targetFormat,
|
|
566
|
+
requestKind,
|
|
567
|
+
body
|
|
568
|
+
});
|
|
569
|
+
|
|
570
|
+
let effectiveBody = body;
|
|
571
|
+
if (ampContext?.presets?.reasoningEffort && !body?.reasoning_effort && !body?.reasoning?.effort) {
|
|
572
|
+
effectiveBody = { ...body, reasoning_effort: ampContext.presets.reasoningEffort };
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
let activePlan;
|
|
576
|
+
let fallbackPlan = null;
|
|
577
|
+
try {
|
|
578
|
+
activePlan = buildProviderRequestPlan({
|
|
579
|
+
body: effectiveBody,
|
|
580
|
+
sourceFormat,
|
|
581
|
+
targetFormat: preferOpenAIToolRouting ? FORMATS.OPENAI : targetFormat,
|
|
582
|
+
candidate,
|
|
583
|
+
requestKind,
|
|
584
|
+
requestHeaders,
|
|
585
|
+
interceptAmpWebSearch,
|
|
586
|
+
stream
|
|
587
|
+
});
|
|
588
|
+
if (preferOpenAIToolRouting) {
|
|
589
|
+
fallbackPlan = buildProviderRequestPlan({
|
|
590
|
+
body: effectiveBody,
|
|
591
|
+
sourceFormat,
|
|
592
|
+
targetFormat,
|
|
593
|
+
candidate,
|
|
594
|
+
requestKind,
|
|
595
|
+
requestHeaders,
|
|
596
|
+
interceptAmpWebSearch,
|
|
597
|
+
stream
|
|
598
|
+
});
|
|
599
|
+
}
|
|
600
|
+
} catch (error) {
|
|
601
|
+
return {
|
|
602
|
+
ok: false,
|
|
603
|
+
status: 400,
|
|
604
|
+
retryable: false,
|
|
605
|
+
errorKind: "translation_error",
|
|
606
|
+
response: jsonResponse({
|
|
607
|
+
type: "error",
|
|
608
|
+
error: {
|
|
609
|
+
type: "invalid_request_error",
|
|
610
|
+
message: `Request translation failed: ${error instanceof Error ? error.message : String(error)}`
|
|
611
|
+
}
|
|
612
|
+
}, 400)
|
|
613
|
+
};
|
|
614
|
+
}
|
|
615
|
+
|
|
521
616
|
logToolRouting({
|
|
522
617
|
env,
|
|
523
618
|
clientType,
|
|
524
619
|
candidate,
|
|
525
620
|
originalBody: body,
|
|
526
|
-
providerBody,
|
|
621
|
+
providerBody: activePlan.providerBody,
|
|
527
622
|
sourceFormat,
|
|
528
|
-
targetFormat
|
|
623
|
+
targetFormat: activePlan.targetFormat
|
|
529
624
|
});
|
|
530
625
|
|
|
531
626
|
if (isSubscriptionProvider(provider)) {
|
|
532
627
|
const subscriptionType = String(provider?.subscriptionType || provider?.subscription_type || "").trim().toLowerCase();
|
|
628
|
+
if (subscriptionType === "chatgpt-codex" && ampContext?.threadId) {
|
|
629
|
+
activePlan.providerBody = {
|
|
630
|
+
...activePlan.providerBody,
|
|
631
|
+
prompt_cache_key: activePlan.providerBody.prompt_cache_key || ampContext.threadId
|
|
632
|
+
};
|
|
633
|
+
}
|
|
533
634
|
const executeSubscriptionRequest = async (requestBody) => makeSubscriptionProviderCall({
|
|
534
635
|
provider,
|
|
535
636
|
body: requestBody,
|
|
@@ -537,7 +638,7 @@ export async function makeProviderCall({
|
|
|
537
638
|
stream: subscriptionType === "chatgpt-codex" ? true : Boolean(stream),
|
|
538
639
|
env
|
|
539
640
|
});
|
|
540
|
-
const subscriptionResult = await executeSubscriptionRequest(providerBody);
|
|
641
|
+
const subscriptionResult = await executeSubscriptionRequest(activePlan.providerBody);
|
|
541
642
|
|
|
542
643
|
if (!subscriptionResult?.ok) {
|
|
543
644
|
return subscriptionResult;
|
|
@@ -558,14 +659,14 @@ export async function makeProviderCall({
|
|
|
558
659
|
};
|
|
559
660
|
}
|
|
560
661
|
|
|
561
|
-
const fallbackModel = candidate?.backend || providerBody?.model || "unknown";
|
|
662
|
+
const fallbackModel = candidate?.backend || activePlan.providerBody?.model || "unknown";
|
|
562
663
|
let upstreamResponse = subscriptionResult.response;
|
|
563
664
|
if (interceptAmpWebSearch) {
|
|
564
665
|
const intercepted = await maybeInterceptAmpWebSearch({
|
|
565
666
|
response: upstreamResponse,
|
|
566
|
-
providerBody,
|
|
567
|
-
targetFormat,
|
|
568
|
-
requestKind,
|
|
667
|
+
providerBody: activePlan.providerBody,
|
|
668
|
+
targetFormat: activePlan.targetFormat,
|
|
669
|
+
requestKind: activePlan.requestKind,
|
|
569
670
|
stream,
|
|
570
671
|
runtimeConfig,
|
|
571
672
|
env,
|
|
@@ -581,11 +682,11 @@ export async function makeProviderCall({
|
|
|
581
682
|
return adaptProviderResponse({
|
|
582
683
|
response: upstreamResponse,
|
|
583
684
|
stream,
|
|
584
|
-
translate,
|
|
685
|
+
translate: activePlan.translate,
|
|
585
686
|
sourceFormat,
|
|
586
|
-
targetFormat,
|
|
687
|
+
targetFormat: activePlan.targetFormat,
|
|
587
688
|
fallbackModel,
|
|
588
|
-
requestKind,
|
|
689
|
+
requestKind: activePlan.requestKind,
|
|
589
690
|
requestBody: body,
|
|
590
691
|
clientType,
|
|
591
692
|
env
|
|
@@ -722,20 +823,21 @@ export async function makeProviderCall({
|
|
|
722
823
|
};
|
|
723
824
|
}
|
|
724
825
|
|
|
725
|
-
const
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
826
|
+
const executeHttpProviderRequest = async (plan) => {
|
|
827
|
+
const providerUrl = resolveProviderUrl(provider, plan.targetFormat, plan.requestKind);
|
|
828
|
+
if (!providerUrl) return null;
|
|
829
|
+
const headers = mergeCachingHeaders(
|
|
830
|
+
buildProviderHeaders(provider, env, plan.targetFormat),
|
|
831
|
+
requestHeaders,
|
|
832
|
+
plan.targetFormat
|
|
833
|
+
);
|
|
732
834
|
const timeoutMs = resolveUpstreamTimeoutMs(env);
|
|
733
835
|
const timeoutControl = buildTimeoutSignal(timeoutMs);
|
|
734
836
|
try {
|
|
735
837
|
const init = {
|
|
736
838
|
method: "POST",
|
|
737
839
|
headers,
|
|
738
|
-
body: JSON.stringify(
|
|
840
|
+
body: JSON.stringify(plan.providerBody)
|
|
739
841
|
};
|
|
740
842
|
if (timeoutControl.signal) {
|
|
741
843
|
init.signal = timeoutControl.signal;
|
|
@@ -747,7 +849,7 @@ export async function makeProviderCall({
|
|
|
747
849
|
}
|
|
748
850
|
};
|
|
749
851
|
|
|
750
|
-
if (!
|
|
852
|
+
if (!resolveProviderUrl(provider, activePlan.targetFormat, activePlan.requestKind)) {
|
|
751
853
|
return {
|
|
752
854
|
ok: false,
|
|
753
855
|
status: 500,
|
|
@@ -765,7 +867,7 @@ export async function makeProviderCall({
|
|
|
765
867
|
|
|
766
868
|
let response;
|
|
767
869
|
try {
|
|
768
|
-
response = await executeHttpProviderRequest(
|
|
870
|
+
response = await executeHttpProviderRequest(activePlan);
|
|
769
871
|
} catch (error) {
|
|
770
872
|
return {
|
|
771
873
|
ok: false,
|
|
@@ -782,22 +884,40 @@ export async function makeProviderCall({
|
|
|
782
884
|
};
|
|
783
885
|
}
|
|
784
886
|
|
|
887
|
+
if ((!response || !response.ok) && fallbackPlan) {
|
|
888
|
+
try {
|
|
889
|
+
const fallbackResponse = await executeHttpProviderRequest(fallbackPlan);
|
|
890
|
+
if (fallbackResponse instanceof Response && fallbackResponse.ok) {
|
|
891
|
+
response = fallbackResponse;
|
|
892
|
+
activePlan = fallbackPlan;
|
|
893
|
+
}
|
|
894
|
+
} catch {
|
|
895
|
+
// Keep the original failure if the fallback request also fails.
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
|
|
785
899
|
if (!response.ok) {
|
|
786
900
|
const retriedOpenAIHostedWebSearch = await maybeRetryOpenAIHostedWebSearchProviderRequest({
|
|
787
901
|
response,
|
|
788
|
-
executeProviderRequest: executeHttpProviderRequest
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
902
|
+
executeProviderRequest: async (nextProviderBody) => executeHttpProviderRequest({
|
|
903
|
+
...activePlan,
|
|
904
|
+
providerBody: nextProviderBody
|
|
905
|
+
}),
|
|
906
|
+
providerBody: activePlan.providerBody,
|
|
907
|
+
targetFormat: activePlan.targetFormat,
|
|
908
|
+
requestKind: activePlan.requestKind
|
|
792
909
|
});
|
|
793
910
|
response = retriedOpenAIHostedWebSearch.response;
|
|
794
|
-
|
|
911
|
+
activePlan = {
|
|
912
|
+
...activePlan,
|
|
913
|
+
providerBody: retriedOpenAIHostedWebSearch.providerBody
|
|
914
|
+
};
|
|
795
915
|
}
|
|
796
916
|
|
|
797
917
|
if (!response.ok) {
|
|
798
|
-
const hostedWebSearchErrorKind = await resolveHostedWebSearchErrorKind(response, providerBody, {
|
|
799
|
-
targetFormat,
|
|
800
|
-
requestKind
|
|
918
|
+
const hostedWebSearchErrorKind = await resolveHostedWebSearchErrorKind(response, activePlan.providerBody, {
|
|
919
|
+
targetFormat: activePlan.targetFormat,
|
|
920
|
+
requestKind: activePlan.requestKind
|
|
801
921
|
});
|
|
802
922
|
return {
|
|
803
923
|
ok: false,
|
|
@@ -805,23 +925,26 @@ export async function makeProviderCall({
|
|
|
805
925
|
retryable: shouldRetryStatus(response.status),
|
|
806
926
|
...(hostedWebSearchErrorKind ? { errorKind: hostedWebSearchErrorKind } : {}),
|
|
807
927
|
upstreamResponse: response,
|
|
808
|
-
translateError: translate
|
|
928
|
+
translateError: activePlan.translate
|
|
809
929
|
};
|
|
810
930
|
}
|
|
811
931
|
|
|
812
932
|
if (interceptAmpWebSearch) {
|
|
813
933
|
const intercepted = await maybeInterceptAmpWebSearch({
|
|
814
934
|
response,
|
|
815
|
-
providerBody,
|
|
816
|
-
targetFormat,
|
|
817
|
-
requestKind,
|
|
935
|
+
providerBody: activePlan.providerBody,
|
|
936
|
+
targetFormat: activePlan.targetFormat,
|
|
937
|
+
requestKind: activePlan.requestKind,
|
|
818
938
|
stream,
|
|
819
939
|
runtimeConfig,
|
|
820
940
|
env,
|
|
821
941
|
stateStore,
|
|
822
942
|
executeProviderRequest: async (followUpBody) => {
|
|
823
943
|
try {
|
|
824
|
-
return await executeHttpProviderRequest(
|
|
944
|
+
return await executeHttpProviderRequest({
|
|
945
|
+
...activePlan,
|
|
946
|
+
providerBody: followUpBody
|
|
947
|
+
});
|
|
825
948
|
} catch {
|
|
826
949
|
return null;
|
|
827
950
|
}
|
|
@@ -833,11 +956,11 @@ export async function makeProviderCall({
|
|
|
833
956
|
return adaptProviderResponse({
|
|
834
957
|
response,
|
|
835
958
|
stream,
|
|
836
|
-
translate,
|
|
959
|
+
translate: activePlan.translate,
|
|
837
960
|
sourceFormat,
|
|
838
|
-
targetFormat,
|
|
961
|
+
targetFormat: activePlan.targetFormat,
|
|
839
962
|
fallbackModel: candidate.backend,
|
|
840
|
-
requestKind,
|
|
963
|
+
requestKind: activePlan.requestKind,
|
|
841
964
|
requestBody: body,
|
|
842
965
|
clientType,
|
|
843
966
|
env
|
|
@@ -694,9 +694,10 @@ export function handleClaudeStreamToOpenAIResponses(response, requestBody, fallb
|
|
|
694
694
|
const index = Number(payload.index);
|
|
695
695
|
const blockInfo = payload.content_block || {};
|
|
696
696
|
state.activeBlocks.set(index, String(blockInfo.type || "").trim());
|
|
697
|
+
// Defer text output item creation until first renderable text delta
|
|
698
|
+
// to avoid emitting empty assistant text scaffolding before tool calls.
|
|
697
699
|
if (blockInfo.type === "text") {
|
|
698
|
-
|
|
699
|
-
state.textOpened = true;
|
|
700
|
+
// Intentionally do NOT open text item yet; wait for renderable text in content_block_delta.
|
|
700
701
|
} else if (blockInfo.type === "thinking" || blockInfo.type === "redacted_thinking") {
|
|
701
702
|
ensureOpenAIResponsesReasoningItem(state, index, controller, encoder);
|
|
702
703
|
} else if (blockInfo.type === "tool_use") {
|
|
@@ -709,6 +710,10 @@ export function handleClaudeStreamToOpenAIResponses(response, requestBody, fallb
|
|
|
709
710
|
const index = Number(payload.index);
|
|
710
711
|
const delta = payload.delta || {};
|
|
711
712
|
if (delta.type === "text_delta" && typeof delta.text === "string") {
|
|
713
|
+
const hasRenderableText = /\S/.test(delta.text);
|
|
714
|
+
if (!state.textOpened && !hasRenderableText) {
|
|
715
|
+
return;
|
|
716
|
+
}
|
|
712
717
|
ensureOpenAIResponsesTextItem(state, controller, encoder);
|
|
713
718
|
state.textOpened = true;
|
|
714
719
|
state.textBuffer += delta.text;
|
|
@@ -461,3 +461,28 @@ export function isStreamingEnabled(sourceFormat, body) {
|
|
|
461
461
|
// Some clients omit `stream` on follow-up/tool turns and expect JSON responses.
|
|
462
462
|
return body?.stream === true;
|
|
463
463
|
}
|
|
464
|
+
|
|
465
|
+
const AMP_MODE_PRESETS = new Map([
|
|
466
|
+
["smart", { reasoningEffort: "", toolChoice: "" }],
|
|
467
|
+
["free", { reasoningEffort: "", toolChoice: "" }],
|
|
468
|
+
["rush", { reasoningEffort: "low", toolChoice: "" }],
|
|
469
|
+
["deep", { reasoningEffort: "high", toolChoice: "" }],
|
|
470
|
+
["large", { reasoningEffort: "", toolChoice: "" }],
|
|
471
|
+
["bombadil", { reasoningEffort: "", toolChoice: "" }]
|
|
472
|
+
]);
|
|
473
|
+
|
|
474
|
+
export function extractAmpContext(request) {
|
|
475
|
+
const headers = request?.headers;
|
|
476
|
+
if (!headers || typeof headers.get !== "function") {
|
|
477
|
+
return { threadId: "", mode: "", overrideProvider: "", feature: "", messageId: "", presets: null };
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
const threadId = String(headers.get("x-amp-thread-id") || "").trim();
|
|
481
|
+
const mode = String(headers.get("x-amp-mode") || "").trim().toLowerCase();
|
|
482
|
+
const overrideProvider = String(headers.get("x-amp-override-provider") || "").trim().toLowerCase();
|
|
483
|
+
const feature = String(headers.get("x-amp-feature") || "").trim();
|
|
484
|
+
const messageId = String(headers.get("x-amp-message-id") || "").trim();
|
|
485
|
+
const presets = AMP_MODE_PRESETS.get(mode) || null;
|
|
486
|
+
|
|
487
|
+
return { threadId, mode, overrideProvider, feature, messageId, presets };
|
|
488
|
+
}
|
package/src/runtime/handler.js
CHANGED
|
@@ -24,6 +24,7 @@ import { corsResponse, jsonResponse } from "./handler/http.js";
|
|
|
24
24
|
import {
|
|
25
25
|
detectUserRequestFormat,
|
|
26
26
|
estimateRequestContextTokens,
|
|
27
|
+
extractAmpContext,
|
|
27
28
|
inferAmpContextRequirement,
|
|
28
29
|
isAmpManagementPath,
|
|
29
30
|
isJsonRequest,
|
|
@@ -45,7 +46,7 @@ import {
|
|
|
45
46
|
convertAmpGeminiRequestToOpenAI,
|
|
46
47
|
hasGeminiWebSearchTool
|
|
47
48
|
} from "./handler/amp-gemini.js";
|
|
48
|
-
import { shouldInterceptAmpWebSearch } from "./handler/amp-web-search.js";
|
|
49
|
+
import { shouldInterceptAmpWebSearch, maybeInterceptAmpInternalSearch } from "./handler/amp-web-search.js";
|
|
49
50
|
import {
|
|
50
51
|
isRequestFromAllowedIp,
|
|
51
52
|
resolveAllowedOrigin,
|
|
@@ -59,6 +60,7 @@ import {
|
|
|
59
60
|
resolveRetryPolicy
|
|
60
61
|
} from "./handler/fallback.js";
|
|
61
62
|
import { parseJsonSafely, sleep } from "./handler/utils.js";
|
|
63
|
+
import { createThreadAffinityStore } from "./thread-affinity.js";
|
|
62
64
|
import {
|
|
63
65
|
applyCandidateFailureState,
|
|
64
66
|
applyRuntimeRetryPolicyGuards,
|
|
@@ -457,6 +459,9 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
457
459
|
|
|
458
460
|
const requestedModel = body?.model || "smart";
|
|
459
461
|
const stream = isStreamingEnabled(sourceFormat, body);
|
|
462
|
+
const ampContext = options.clientType === "amp"
|
|
463
|
+
? extractAmpContext(request)
|
|
464
|
+
: null;
|
|
460
465
|
|
|
461
466
|
const interceptAmpWebSearch = shouldInterceptAmpWebSearch({
|
|
462
467
|
clientType: options.clientType,
|
|
@@ -481,7 +486,7 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
481
486
|
|
|
482
487
|
const resolved = resolveRequestModel(config, requestedModel, sourceFormat, {
|
|
483
488
|
clientType: options.clientType,
|
|
484
|
-
providerHint: options.providerHint
|
|
489
|
+
providerHint: ampContext?.overrideProvider || options.providerHint
|
|
485
490
|
});
|
|
486
491
|
if (!resolved.primary) {
|
|
487
492
|
if (options.clientType === "amp" && resolved.allowAmpProxy !== false && isAmpProxyEnabled(config)) {
|
|
@@ -604,6 +609,24 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
604
609
|
recordRouteSkip(routeDebug, skippedEntry.candidate, skippedEntry.skipReasons);
|
|
605
610
|
}
|
|
606
611
|
|
|
612
|
+
// Thread affinity: reorder candidates to prefer affinity-bound candidate
|
|
613
|
+
if (ampContext?.threadId && options.threadAffinityStore) {
|
|
614
|
+
const affinityCandidateKey = options.threadAffinityStore.getAffinity(ampContext.threadId);
|
|
615
|
+
if (affinityCandidateKey) {
|
|
616
|
+
const affinityIndex = ranking.entries.findIndex(
|
|
617
|
+
(entry) => entry.eligible && entry.candidateKey === affinityCandidateKey
|
|
618
|
+
);
|
|
619
|
+
if (affinityIndex > 0) {
|
|
620
|
+
const [affinityEntry] = ranking.entries.splice(affinityIndex, 1);
|
|
621
|
+
ranking.entries.unshift(affinityEntry);
|
|
622
|
+
ranking.selectedEntry = affinityEntry;
|
|
623
|
+
}
|
|
624
|
+
if (affinityIndex < 0) {
|
|
625
|
+
options.threadAffinityStore.clearAffinity(ampContext.threadId);
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
|
|
607
630
|
if (!ranking.selectedEntry) {
|
|
608
631
|
return withRouteDebugHeaders(jsonResponse({
|
|
609
632
|
type: "error",
|
|
@@ -652,7 +675,8 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
652
675
|
env,
|
|
653
676
|
clientType: options.clientType,
|
|
654
677
|
runtimeConfig: config,
|
|
655
|
-
stateStore
|
|
678
|
+
stateStore,
|
|
679
|
+
ampContext
|
|
656
680
|
});
|
|
657
681
|
|
|
658
682
|
if (!quotaConsumed && shouldConsumeQuotaFromResult(result)) {
|
|
@@ -694,6 +718,9 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
694
718
|
});
|
|
695
719
|
pendingFallbackContext = null;
|
|
696
720
|
}
|
|
721
|
+
if (ampContext?.threadId && options.threadAffinityStore) {
|
|
722
|
+
options.threadAffinityStore.setAffinity(ampContext.threadId, entry.candidateKey);
|
|
723
|
+
}
|
|
697
724
|
return withRouteDebugHeaders(result.response, routeDebug);
|
|
698
725
|
}
|
|
699
726
|
|
|
@@ -784,6 +811,7 @@ export function createFetchHandler(options) {
|
|
|
784
811
|
}
|
|
785
812
|
|
|
786
813
|
let stateStoreRef = options.stateStore || null;
|
|
814
|
+
const threadAffinityStore = createThreadAffinityStore();
|
|
787
815
|
let stateStorePromise = null;
|
|
788
816
|
|
|
789
817
|
async function ensureStateStore(env = {}, runtimeFlags = {}) {
|
|
@@ -906,6 +934,11 @@ export function createFetchHandler(options) {
|
|
|
906
934
|
return respond(jsonResponse({ error: "Forbidden" }, 403));
|
|
907
935
|
}
|
|
908
936
|
|
|
937
|
+
const searchInterceptResult = await maybeInterceptAmpInternalSearch(request, url, config, env);
|
|
938
|
+
if (searchInterceptResult) {
|
|
939
|
+
return respond(searchInterceptResult);
|
|
940
|
+
}
|
|
941
|
+
|
|
909
942
|
return respond(await proxyAmpUpstreamRequest({ request, config }));
|
|
910
943
|
}
|
|
911
944
|
|
|
@@ -1051,7 +1084,8 @@ export function createFetchHandler(options) {
|
|
|
1051
1084
|
providerHint: "google",
|
|
1052
1085
|
requestKind: "chat-completions",
|
|
1053
1086
|
stateStore,
|
|
1054
|
-
runtimeFlags
|
|
1087
|
+
runtimeFlags,
|
|
1088
|
+
threadAffinityStore
|
|
1055
1089
|
});
|
|
1056
1090
|
|
|
1057
1091
|
if (routeResponse.status >= 400) {
|
|
@@ -1095,7 +1129,8 @@ export function createFetchHandler(options) {
|
|
|
1095
1129
|
providerHint: route.providerHint,
|
|
1096
1130
|
requestKind: route.requestKind,
|
|
1097
1131
|
stateStore,
|
|
1098
|
-
runtimeFlags
|
|
1132
|
+
runtimeFlags,
|
|
1133
|
+
threadAffinityStore
|
|
1099
1134
|
});
|
|
1100
1135
|
return respond(routeResponse);
|
|
1101
1136
|
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
const DEFAULT_AFFINITY_TTL_MS = 60 * 60 * 1000; // 1 hour
|
|
2
|
+
const MAX_BINDINGS = 10_000;
|
|
3
|
+
|
|
4
|
+
export function createThreadAffinityStore(options = {}) {
|
|
5
|
+
const ttlMs = options.ttlMs || DEFAULT_AFFINITY_TTL_MS;
|
|
6
|
+
const bindings = new Map();
|
|
7
|
+
|
|
8
|
+
function pruneExpired(now = Date.now()) {
|
|
9
|
+
for (const [key, binding] of bindings) {
|
|
10
|
+
if (binding.expiresAt <= now) bindings.delete(key);
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function getAffinity(threadId) {
|
|
15
|
+
if (!threadId) return null;
|
|
16
|
+
const binding = bindings.get(threadId);
|
|
17
|
+
if (!binding) return null;
|
|
18
|
+
if (binding.expiresAt <= Date.now()) {
|
|
19
|
+
bindings.delete(threadId);
|
|
20
|
+
return null;
|
|
21
|
+
}
|
|
22
|
+
return binding.candidateKey;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function setAffinity(threadId, candidateKey) {
|
|
26
|
+
if (!threadId || !candidateKey) return;
|
|
27
|
+
const now = Date.now();
|
|
28
|
+
bindings.set(threadId, {
|
|
29
|
+
candidateKey,
|
|
30
|
+
lastSeen: now,
|
|
31
|
+
expiresAt: now + ttlMs
|
|
32
|
+
});
|
|
33
|
+
if (bindings.size > MAX_BINDINGS) pruneExpired(now);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function clearAffinity(threadId) {
|
|
37
|
+
if (threadId) bindings.delete(threadId);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return { getAffinity, setAffinity, clearAffinity, pruneExpired, _bindings: bindings };
|
|
41
|
+
}
|
|
@@ -52,7 +52,7 @@ export function openaiToClaudeResponse(chunk, state) {
|
|
|
52
52
|
|
|
53
53
|
// Handle regular content
|
|
54
54
|
const textDelta = normalizeTextDelta(delta?.content);
|
|
55
|
-
if (textDelta) {
|
|
55
|
+
if (textDelta && (state.textBlockStarted || hasRenderableText(textDelta))) {
|
|
56
56
|
stopThinkingBlock(state, results);
|
|
57
57
|
|
|
58
58
|
if (!state.textBlockStarted) {
|
|
@@ -119,6 +119,10 @@ export function openaiToClaudeResponse(chunk, state) {
|
|
|
119
119
|
return results.length > 0 ? results : null;
|
|
120
120
|
}
|
|
121
121
|
|
|
122
|
+
function hasRenderableText(text) {
|
|
123
|
+
return typeof text === "string" && /\S/.test(text);
|
|
124
|
+
}
|
|
125
|
+
|
|
122
126
|
function normalizeTextDelta(content) {
|
|
123
127
|
if (typeof content === "string") return content;
|
|
124
128
|
if (!Array.isArray(content)) return "";
|
|
@@ -191,6 +195,7 @@ function normalizeMessageToolCalls(message) {
|
|
|
191
195
|
|
|
192
196
|
function emitTextDelta(text, state, results) {
|
|
193
197
|
if (!text) return;
|
|
198
|
+
if (!state.textBlockStarted && !hasRenderableText(text)) return;
|
|
194
199
|
stopThinkingBlock(state, results);
|
|
195
200
|
|
|
196
201
|
if (!state.textBlockStarted) {
|