@khanglvm/llm-router 2.0.5 → 2.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -5
- package/package.json +1 -1
- package/src/runtime/codex-request-transformer.js +22 -2
- package/src/runtime/config.js +5 -1
- package/src/runtime/handler/amp-web-search.js +130 -0
- package/src/runtime/handler/provider-call.js +15 -3
- package/src/runtime/handler/provider-translation.js +7 -2
- package/src/runtime/handler/request.js +25 -0
- package/src/runtime/handler.js +40 -5
- package/src/runtime/thread-affinity.js +41 -0
- package/src/translator/response/openai-to-claude.js +6 -1
package/README.md
CHANGED
|
@@ -2,20 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
LLM Router is a local and Cloudflare-deployable gateway for routing one client endpoint across multiple LLM providers, models, aliases, fallbacks, and rate limits.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
**Current version**: `2.0.5`
|
|
6
6
|
|
|
7
|
+
NPM package:
|
|
7
8
|
```bash
|
|
8
9
|
@khanglvm/llm-router
|
|
9
10
|
```
|
|
10
11
|
|
|
11
|
-
|
|
12
|
-
|
|
12
|
+
Primary CLI command:
|
|
13
13
|
```bash
|
|
14
14
|
llr
|
|
15
15
|
```
|
|
16
16
|
|
|
17
|
-
`2.0.1` is the current public release. It includes the Web UI, AMP routing, and coding-tool integrations introduced in the 2.x line.
|
|
18
|
-
|
|
19
17
|
## Install
|
|
20
18
|
|
|
21
19
|
```bash
|
|
@@ -266,7 +264,18 @@ Run the JavaScript test suite:
|
|
|
266
264
|
node --test $(rg --files -g "*.test.js" src)
|
|
267
265
|
```
|
|
268
266
|
|
|
267
|
+
## Documentation
|
|
268
|
+
|
|
269
|
+
Comprehensive documentation is available in the `docs/` directory:
|
|
270
|
+
|
|
271
|
+
- **[Project Overview & PDR](./docs/project-overview-pdr.md)** — Feature matrix, target users, success metrics, constraints
|
|
272
|
+
- **[Codebase Summary](./docs/codebase-summary.md)** — Directory structure, module relationships, entry points, test infrastructure
|
|
273
|
+
- **[Code Standards](./docs/code-standards.md)** — Patterns, naming conventions, testing, error handling
|
|
274
|
+
- **[System Architecture](./docs/system-architecture.md)** — Request lifecycle, subsystem boundaries, data flow, deployment models
|
|
275
|
+
- **[Project Roadmap](./docs/project-roadmap.md)** — Current status, planned phases, timeline, success metrics
|
|
276
|
+
|
|
269
277
|
## Security and Releases
|
|
270
278
|
|
|
271
279
|
- Security: [`SECURITY.md`](https://github.com/khanglvm/llm-router/blob/master/SECURITY.md)
|
|
272
280
|
- Release notes: [`CHANGELOG.md`](https://github.com/khanglvm/llm-router/blob/master/CHANGELOG.md)
|
|
281
|
+
- AMP routing: [`docs/amp-routing.md`](./docs/amp-routing.md)
|
package/package.json
CHANGED
|
@@ -224,10 +224,15 @@ function normalizeInputMessageContent(content, role) {
|
|
|
224
224
|
? part.image_url
|
|
225
225
|
: part.image_url?.url;
|
|
226
226
|
if (typeof rawUrl === 'string' && rawUrl.trim()) {
|
|
227
|
-
|
|
227
|
+
const imageItem = {
|
|
228
228
|
type: 'input_image',
|
|
229
229
|
image_url: rawUrl
|
|
230
|
-
}
|
|
230
|
+
};
|
|
231
|
+
const detail = part.image_url?.detail || part.detail;
|
|
232
|
+
if (typeof detail === 'string' && detail.trim()) {
|
|
233
|
+
imageItem.detail = detail.trim();
|
|
234
|
+
}
|
|
235
|
+
parts.push(imageItem);
|
|
231
236
|
}
|
|
232
237
|
continue;
|
|
233
238
|
}
|
|
@@ -333,6 +338,21 @@ function normalizeToolChoiceForResponses(toolChoice) {
|
|
|
333
338
|
const normalizedType = String(toolChoice.type || '').trim().toLowerCase();
|
|
334
339
|
if (normalizedType === 'none') return 'none';
|
|
335
340
|
if (normalizedType === 'required' || normalizedType === 'any' || normalizedType === 'tool') {
|
|
341
|
+
const functionName = String(
|
|
342
|
+
toolChoice.function?.name || toolChoice.name || ''
|
|
343
|
+
).trim();
|
|
344
|
+
if (functionName) {
|
|
345
|
+
return { type: 'function', name: functionName };
|
|
346
|
+
}
|
|
347
|
+
return 'required';
|
|
348
|
+
}
|
|
349
|
+
if (normalizedType === 'function') {
|
|
350
|
+
const functionName = String(
|
|
351
|
+
toolChoice.function?.name || toolChoice.name || ''
|
|
352
|
+
).trim();
|
|
353
|
+
if (functionName) {
|
|
354
|
+
return { type: 'function', name: functionName };
|
|
355
|
+
}
|
|
336
356
|
return 'required';
|
|
337
357
|
}
|
|
338
358
|
}
|
package/src/runtime/config.js
CHANGED
|
@@ -735,7 +735,11 @@ function normalizeAmpWebSearchConfig(rawWebSearch) {
|
|
|
735
735
|
return {
|
|
736
736
|
strategy: normalizeAmpWebSearchStrategy(rawWebSearch.strategy),
|
|
737
737
|
count,
|
|
738
|
-
providers
|
|
738
|
+
providers,
|
|
739
|
+
interceptInternalSearch: normalizeBooleanValue(
|
|
740
|
+
rawWebSearch.interceptInternalSearch ?? rawWebSearch["intercept-internal-search"],
|
|
741
|
+
false
|
|
742
|
+
)
|
|
739
743
|
};
|
|
740
744
|
}
|
|
741
745
|
|
|
@@ -10,6 +10,7 @@ import {
|
|
|
10
10
|
resolveRouteReference
|
|
11
11
|
} from "../config.js";
|
|
12
12
|
import { isSubscriptionProvider, makeSubscriptionProviderCall } from "../subscription-provider.js";
|
|
13
|
+
import { jsonResponse } from "./http.js";
|
|
13
14
|
|
|
14
15
|
const SEARCH_TOOL_NAME = "web_search";
|
|
15
16
|
const READ_WEB_PAGE_TOOL_NAME = "read_web_page";
|
|
@@ -2178,6 +2179,135 @@ export async function testHostedWebSearchProviderRoute({
|
|
|
2178
2179
|
}, query, runtimeConfig, env);
|
|
2179
2180
|
}
|
|
2180
2181
|
|
|
2182
|
+
async function fetchStructuredSearchResults(query, count, provider) {
|
|
2183
|
+
const normalizedQuery = String(query || "").trim();
|
|
2184
|
+
if (!normalizedQuery || !provider) return [];
|
|
2185
|
+
|
|
2186
|
+
const id = provider.id;
|
|
2187
|
+
|
|
2188
|
+
if (id === "brave") {
|
|
2189
|
+
if (!provider.apiKey) return [];
|
|
2190
|
+
const url = `https://api.search.brave.com/res/v1/web/search?q=${encodeURIComponent(normalizedQuery)}&count=${count}&text_decorations=false`;
|
|
2191
|
+
const response = await runFetchWithTimeout(url, {
|
|
2192
|
+
headers: { Accept: "application/json", "X-Subscription-Token": provider.apiKey }
|
|
2193
|
+
});
|
|
2194
|
+
if (!response.ok) return [];
|
|
2195
|
+
const payload = await response.json();
|
|
2196
|
+
return (Array.isArray(payload?.web?.results) ? payload.web.results.slice(0, count) : [])
|
|
2197
|
+
.map((item) => ({ title: String(item?.title || ""), url: String(item?.url || ""), snippet: String(item?.description || "") }));
|
|
2198
|
+
}
|
|
2199
|
+
|
|
2200
|
+
if (id === "tavily") {
|
|
2201
|
+
if (!provider.apiKey) return [];
|
|
2202
|
+
const response = await runFetchWithTimeout("https://api.tavily.com/search", {
|
|
2203
|
+
method: "POST",
|
|
2204
|
+
headers: { "Content-Type": "application/json" },
|
|
2205
|
+
body: JSON.stringify({ api_key: provider.apiKey, query: normalizedQuery, max_results: count, search_depth: "basic" })
|
|
2206
|
+
});
|
|
2207
|
+
if (!response.ok) return [];
|
|
2208
|
+
const payload = await response.json();
|
|
2209
|
+
return (Array.isArray(payload?.results) ? payload.results.slice(0, count) : [])
|
|
2210
|
+
.map((item) => ({ title: String(item?.title || ""), url: String(item?.url || ""), snippet: String(item?.content || "") }));
|
|
2211
|
+
}
|
|
2212
|
+
|
|
2213
|
+
if (id === "exa") {
|
|
2214
|
+
if (!provider.apiKey) return [];
|
|
2215
|
+
const response = await runFetchWithTimeout("https://api.exa.ai/search", {
|
|
2216
|
+
method: "POST",
|
|
2217
|
+
headers: { "Content-Type": "application/json", "x-api-key": provider.apiKey },
|
|
2218
|
+
body: JSON.stringify({ query: normalizedQuery, numResults: count, type: "auto", contents: { text: { maxCharacters: 500 } } })
|
|
2219
|
+
});
|
|
2220
|
+
if (!response.ok) return [];
|
|
2221
|
+
const payload = await response.json();
|
|
2222
|
+
return (Array.isArray(payload?.results) ? payload.results.slice(0, count) : [])
|
|
2223
|
+
.map((item) => ({ title: String(item?.title || ""), url: String(item?.url || ""), snippet: String(item?.text || item?.snippet || "") }));
|
|
2224
|
+
}
|
|
2225
|
+
|
|
2226
|
+
if (id === "searxng") {
|
|
2227
|
+
if (!provider.url) return [];
|
|
2228
|
+
const url = `${provider.url}/search?q=${encodeURIComponent(normalizedQuery)}&format=json&categories=general&language=auto`;
|
|
2229
|
+
const response = await runFetchWithTimeout(url, {
|
|
2230
|
+
headers: { Accept: "application/json", "User-Agent": "llm-router" }
|
|
2231
|
+
});
|
|
2232
|
+
if (!response.ok) return [];
|
|
2233
|
+
const payload = await response.json();
|
|
2234
|
+
return (Array.isArray(payload?.results) ? payload.results.slice(0, count) : [])
|
|
2235
|
+
.map((item) => ({ title: String(item?.title || ""), url: String(item?.url || ""), snippet: String(item?.content || "") }));
|
|
2236
|
+
}
|
|
2237
|
+
|
|
2238
|
+
return [];
|
|
2239
|
+
}
|
|
2240
|
+
|
|
2241
|
+
export async function executeWebSearchQueries({ queries, maxResults, config, env }) {
|
|
2242
|
+
const normalizedQueries = (Array.isArray(queries) ? queries : []).map((q) => String(q || "").trim()).filter(Boolean).slice(0, 10);
|
|
2243
|
+
if (normalizedQueries.length === 0) return { results: [], provider: "" };
|
|
2244
|
+
|
|
2245
|
+
const count = Math.max(1, Math.min(20, Number(maxResults) || 5));
|
|
2246
|
+
const snapshot = await buildAmpWebSearchSnapshot(config, { env });
|
|
2247
|
+
const readyProviders = snapshot.providers.filter((p) => p.ready && !isHostedSearchProvider(p));
|
|
2248
|
+
|
|
2249
|
+
for (const providerStatus of readyProviders) {
|
|
2250
|
+
try {
|
|
2251
|
+
const allResults = [];
|
|
2252
|
+
const batchResults = await Promise.all(
|
|
2253
|
+
normalizedQueries.map((query) => fetchStructuredSearchResults(query, count, providerStatus))
|
|
2254
|
+
);
|
|
2255
|
+
for (const results of batchResults) allResults.push(...results);
|
|
2256
|
+
if (allResults.length > 0) {
|
|
2257
|
+
return { results: allResults, provider: providerStatus.id };
|
|
2258
|
+
}
|
|
2259
|
+
} catch {
|
|
2260
|
+
continue;
|
|
2261
|
+
}
|
|
2262
|
+
}
|
|
2263
|
+
|
|
2264
|
+
return { results: [], provider: "" };
|
|
2265
|
+
}
|
|
2266
|
+
|
|
2267
|
+
export async function maybeInterceptAmpInternalSearch(request, url, config, env) {
|
|
2268
|
+
const searchParams = url.searchParams;
|
|
2269
|
+
if (!searchParams.has("webSearch2")) return null;
|
|
2270
|
+
|
|
2271
|
+
const webSearchConfig = config?.webSearch || config?.amp?.webSearch;
|
|
2272
|
+
if (!webSearchConfig?.interceptInternalSearch) return null;
|
|
2273
|
+
|
|
2274
|
+
const providers = Array.isArray(webSearchConfig?.providers) ? webSearchConfig.providers : [];
|
|
2275
|
+
if (providers.length === 0) return null;
|
|
2276
|
+
|
|
2277
|
+
let body;
|
|
2278
|
+
try {
|
|
2279
|
+
body = await request.clone().json();
|
|
2280
|
+
} catch {
|
|
2281
|
+
return null;
|
|
2282
|
+
}
|
|
2283
|
+
|
|
2284
|
+
const params = body?.params;
|
|
2285
|
+
if (!params || !Array.isArray(params.searchQueries) || params.searchQueries.length === 0) return null;
|
|
2286
|
+
|
|
2287
|
+
try {
|
|
2288
|
+
const results = await executeWebSearchQueries({
|
|
2289
|
+
queries: params.searchQueries,
|
|
2290
|
+
maxResults: Number(params.maxResults) || 5,
|
|
2291
|
+
config,
|
|
2292
|
+
env
|
|
2293
|
+
});
|
|
2294
|
+
|
|
2295
|
+
return jsonResponse({
|
|
2296
|
+
result: {
|
|
2297
|
+
results: results.results.map((r) => ({
|
|
2298
|
+
title: r.title || "",
|
|
2299
|
+
url: r.url || "",
|
|
2300
|
+
snippet: r.snippet || "",
|
|
2301
|
+
content: r.snippet || ""
|
|
2302
|
+
}))
|
|
2303
|
+
}
|
|
2304
|
+
});
|
|
2305
|
+
} catch (error) {
|
|
2306
|
+
console.warn(`[llm-router] webSearch2 interception failed: ${error?.message || error}`);
|
|
2307
|
+
return null;
|
|
2308
|
+
}
|
|
2309
|
+
}
|
|
2310
|
+
|
|
2181
2311
|
export async function maybeInterceptAmpWebSearch({
|
|
2182
2312
|
response,
|
|
2183
2313
|
providerBody,
|
|
@@ -547,7 +547,8 @@ export async function makeProviderCall({
|
|
|
547
547
|
env,
|
|
548
548
|
clientType,
|
|
549
549
|
runtimeConfig,
|
|
550
|
-
stateStore
|
|
550
|
+
stateStore,
|
|
551
|
+
ampContext
|
|
551
552
|
}) {
|
|
552
553
|
const provider = candidate.provider;
|
|
553
554
|
const targetFormat = candidate.targetFormat;
|
|
@@ -566,11 +567,16 @@ export async function makeProviderCall({
|
|
|
566
567
|
body
|
|
567
568
|
});
|
|
568
569
|
|
|
570
|
+
let effectiveBody = body;
|
|
571
|
+
if (ampContext?.presets?.reasoningEffort && !body?.reasoning_effort && !body?.reasoning?.effort) {
|
|
572
|
+
effectiveBody = { ...body, reasoning_effort: ampContext.presets.reasoningEffort };
|
|
573
|
+
}
|
|
574
|
+
|
|
569
575
|
let activePlan;
|
|
570
576
|
let fallbackPlan = null;
|
|
571
577
|
try {
|
|
572
578
|
activePlan = buildProviderRequestPlan({
|
|
573
|
-
body,
|
|
579
|
+
body: effectiveBody,
|
|
574
580
|
sourceFormat,
|
|
575
581
|
targetFormat: preferOpenAIToolRouting ? FORMATS.OPENAI : targetFormat,
|
|
576
582
|
candidate,
|
|
@@ -581,7 +587,7 @@ export async function makeProviderCall({
|
|
|
581
587
|
});
|
|
582
588
|
if (preferOpenAIToolRouting) {
|
|
583
589
|
fallbackPlan = buildProviderRequestPlan({
|
|
584
|
-
body,
|
|
590
|
+
body: effectiveBody,
|
|
585
591
|
sourceFormat,
|
|
586
592
|
targetFormat,
|
|
587
593
|
candidate,
|
|
@@ -619,6 +625,12 @@ export async function makeProviderCall({
|
|
|
619
625
|
|
|
620
626
|
if (isSubscriptionProvider(provider)) {
|
|
621
627
|
const subscriptionType = String(provider?.subscriptionType || provider?.subscription_type || "").trim().toLowerCase();
|
|
628
|
+
if (subscriptionType === "chatgpt-codex" && ampContext?.threadId) {
|
|
629
|
+
activePlan.providerBody = {
|
|
630
|
+
...activePlan.providerBody,
|
|
631
|
+
prompt_cache_key: activePlan.providerBody.prompt_cache_key || ampContext.threadId
|
|
632
|
+
};
|
|
633
|
+
}
|
|
622
634
|
const executeSubscriptionRequest = async (requestBody) => makeSubscriptionProviderCall({
|
|
623
635
|
provider,
|
|
624
636
|
body: requestBody,
|
|
@@ -694,9 +694,10 @@ export function handleClaudeStreamToOpenAIResponses(response, requestBody, fallb
|
|
|
694
694
|
const index = Number(payload.index);
|
|
695
695
|
const blockInfo = payload.content_block || {};
|
|
696
696
|
state.activeBlocks.set(index, String(blockInfo.type || "").trim());
|
|
697
|
+
// Defer text output item creation until first renderable text delta
|
|
698
|
+
// to avoid emitting empty assistant text scaffolding before tool calls.
|
|
697
699
|
if (blockInfo.type === "text") {
|
|
698
|
-
|
|
699
|
-
state.textOpened = true;
|
|
700
|
+
// Intentionally do NOT open text item yet; wait for renderable text in content_block_delta.
|
|
700
701
|
} else if (blockInfo.type === "thinking" || blockInfo.type === "redacted_thinking") {
|
|
701
702
|
ensureOpenAIResponsesReasoningItem(state, index, controller, encoder);
|
|
702
703
|
} else if (blockInfo.type === "tool_use") {
|
|
@@ -709,6 +710,10 @@ export function handleClaudeStreamToOpenAIResponses(response, requestBody, fallb
|
|
|
709
710
|
const index = Number(payload.index);
|
|
710
711
|
const delta = payload.delta || {};
|
|
711
712
|
if (delta.type === "text_delta" && typeof delta.text === "string") {
|
|
713
|
+
const hasRenderableText = /\S/.test(delta.text);
|
|
714
|
+
if (!state.textOpened && !hasRenderableText) {
|
|
715
|
+
return;
|
|
716
|
+
}
|
|
712
717
|
ensureOpenAIResponsesTextItem(state, controller, encoder);
|
|
713
718
|
state.textOpened = true;
|
|
714
719
|
state.textBuffer += delta.text;
|
|
@@ -461,3 +461,28 @@ export function isStreamingEnabled(sourceFormat, body) {
|
|
|
461
461
|
// Some clients omit `stream` on follow-up/tool turns and expect JSON responses.
|
|
462
462
|
return body?.stream === true;
|
|
463
463
|
}
|
|
464
|
+
|
|
465
|
+
const AMP_MODE_PRESETS = new Map([
|
|
466
|
+
["smart", { reasoningEffort: "", toolChoice: "" }],
|
|
467
|
+
["free", { reasoningEffort: "", toolChoice: "" }],
|
|
468
|
+
["rush", { reasoningEffort: "low", toolChoice: "" }],
|
|
469
|
+
["deep", { reasoningEffort: "high", toolChoice: "" }],
|
|
470
|
+
["large", { reasoningEffort: "", toolChoice: "" }],
|
|
471
|
+
["bombadil", { reasoningEffort: "", toolChoice: "" }]
|
|
472
|
+
]);
|
|
473
|
+
|
|
474
|
+
export function extractAmpContext(request) {
|
|
475
|
+
const headers = request?.headers;
|
|
476
|
+
if (!headers || typeof headers.get !== "function") {
|
|
477
|
+
return { threadId: "", mode: "", overrideProvider: "", feature: "", messageId: "", presets: null };
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
const threadId = String(headers.get("x-amp-thread-id") || "").trim();
|
|
481
|
+
const mode = String(headers.get("x-amp-mode") || "").trim().toLowerCase();
|
|
482
|
+
const overrideProvider = String(headers.get("x-amp-override-provider") || "").trim().toLowerCase();
|
|
483
|
+
const feature = String(headers.get("x-amp-feature") || "").trim();
|
|
484
|
+
const messageId = String(headers.get("x-amp-message-id") || "").trim();
|
|
485
|
+
const presets = AMP_MODE_PRESETS.get(mode) || null;
|
|
486
|
+
|
|
487
|
+
return { threadId, mode, overrideProvider, feature, messageId, presets };
|
|
488
|
+
}
|
package/src/runtime/handler.js
CHANGED
|
@@ -24,6 +24,7 @@ import { corsResponse, jsonResponse } from "./handler/http.js";
|
|
|
24
24
|
import {
|
|
25
25
|
detectUserRequestFormat,
|
|
26
26
|
estimateRequestContextTokens,
|
|
27
|
+
extractAmpContext,
|
|
27
28
|
inferAmpContextRequirement,
|
|
28
29
|
isAmpManagementPath,
|
|
29
30
|
isJsonRequest,
|
|
@@ -45,7 +46,7 @@ import {
|
|
|
45
46
|
convertAmpGeminiRequestToOpenAI,
|
|
46
47
|
hasGeminiWebSearchTool
|
|
47
48
|
} from "./handler/amp-gemini.js";
|
|
48
|
-
import { shouldInterceptAmpWebSearch } from "./handler/amp-web-search.js";
|
|
49
|
+
import { shouldInterceptAmpWebSearch, maybeInterceptAmpInternalSearch } from "./handler/amp-web-search.js";
|
|
49
50
|
import {
|
|
50
51
|
isRequestFromAllowedIp,
|
|
51
52
|
resolveAllowedOrigin,
|
|
@@ -59,6 +60,7 @@ import {
|
|
|
59
60
|
resolveRetryPolicy
|
|
60
61
|
} from "./handler/fallback.js";
|
|
61
62
|
import { parseJsonSafely, sleep } from "./handler/utils.js";
|
|
63
|
+
import { createThreadAffinityStore } from "./thread-affinity.js";
|
|
62
64
|
import {
|
|
63
65
|
applyCandidateFailureState,
|
|
64
66
|
applyRuntimeRetryPolicyGuards,
|
|
@@ -457,6 +459,9 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
457
459
|
|
|
458
460
|
const requestedModel = body?.model || "smart";
|
|
459
461
|
const stream = isStreamingEnabled(sourceFormat, body);
|
|
462
|
+
const ampContext = options.clientType === "amp"
|
|
463
|
+
? extractAmpContext(request)
|
|
464
|
+
: null;
|
|
460
465
|
|
|
461
466
|
const interceptAmpWebSearch = shouldInterceptAmpWebSearch({
|
|
462
467
|
clientType: options.clientType,
|
|
@@ -481,7 +486,7 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
481
486
|
|
|
482
487
|
const resolved = resolveRequestModel(config, requestedModel, sourceFormat, {
|
|
483
488
|
clientType: options.clientType,
|
|
484
|
-
providerHint: options.providerHint
|
|
489
|
+
providerHint: ampContext?.overrideProvider || options.providerHint
|
|
485
490
|
});
|
|
486
491
|
if (!resolved.primary) {
|
|
487
492
|
if (options.clientType === "amp" && resolved.allowAmpProxy !== false && isAmpProxyEnabled(config)) {
|
|
@@ -604,6 +609,24 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
604
609
|
recordRouteSkip(routeDebug, skippedEntry.candidate, skippedEntry.skipReasons);
|
|
605
610
|
}
|
|
606
611
|
|
|
612
|
+
// Thread affinity: reorder candidates to prefer affinity-bound candidate
|
|
613
|
+
if (ampContext?.threadId && options.threadAffinityStore) {
|
|
614
|
+
const affinityCandidateKey = options.threadAffinityStore.getAffinity(ampContext.threadId);
|
|
615
|
+
if (affinityCandidateKey) {
|
|
616
|
+
const affinityIndex = ranking.entries.findIndex(
|
|
617
|
+
(entry) => entry.eligible && entry.candidateKey === affinityCandidateKey
|
|
618
|
+
);
|
|
619
|
+
if (affinityIndex > 0) {
|
|
620
|
+
const [affinityEntry] = ranking.entries.splice(affinityIndex, 1);
|
|
621
|
+
ranking.entries.unshift(affinityEntry);
|
|
622
|
+
ranking.selectedEntry = affinityEntry;
|
|
623
|
+
}
|
|
624
|
+
if (affinityIndex < 0) {
|
|
625
|
+
options.threadAffinityStore.clearAffinity(ampContext.threadId);
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
|
|
607
630
|
if (!ranking.selectedEntry) {
|
|
608
631
|
return withRouteDebugHeaders(jsonResponse({
|
|
609
632
|
type: "error",
|
|
@@ -652,7 +675,8 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
652
675
|
env,
|
|
653
676
|
clientType: options.clientType,
|
|
654
677
|
runtimeConfig: config,
|
|
655
|
-
stateStore
|
|
678
|
+
stateStore,
|
|
679
|
+
ampContext
|
|
656
680
|
});
|
|
657
681
|
|
|
658
682
|
if (!quotaConsumed && shouldConsumeQuotaFromResult(result)) {
|
|
@@ -694,6 +718,9 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
694
718
|
});
|
|
695
719
|
pendingFallbackContext = null;
|
|
696
720
|
}
|
|
721
|
+
if (ampContext?.threadId && options.threadAffinityStore) {
|
|
722
|
+
options.threadAffinityStore.setAffinity(ampContext.threadId, entry.candidateKey);
|
|
723
|
+
}
|
|
697
724
|
return withRouteDebugHeaders(result.response, routeDebug);
|
|
698
725
|
}
|
|
699
726
|
|
|
@@ -784,6 +811,7 @@ export function createFetchHandler(options) {
|
|
|
784
811
|
}
|
|
785
812
|
|
|
786
813
|
let stateStoreRef = options.stateStore || null;
|
|
814
|
+
const threadAffinityStore = createThreadAffinityStore();
|
|
787
815
|
let stateStorePromise = null;
|
|
788
816
|
|
|
789
817
|
async function ensureStateStore(env = {}, runtimeFlags = {}) {
|
|
@@ -906,6 +934,11 @@ export function createFetchHandler(options) {
|
|
|
906
934
|
return respond(jsonResponse({ error: "Forbidden" }, 403));
|
|
907
935
|
}
|
|
908
936
|
|
|
937
|
+
const searchInterceptResult = await maybeInterceptAmpInternalSearch(request, url, config, env);
|
|
938
|
+
if (searchInterceptResult) {
|
|
939
|
+
return respond(searchInterceptResult);
|
|
940
|
+
}
|
|
941
|
+
|
|
909
942
|
return respond(await proxyAmpUpstreamRequest({ request, config }));
|
|
910
943
|
}
|
|
911
944
|
|
|
@@ -1051,7 +1084,8 @@ export function createFetchHandler(options) {
|
|
|
1051
1084
|
providerHint: "google",
|
|
1052
1085
|
requestKind: "chat-completions",
|
|
1053
1086
|
stateStore,
|
|
1054
|
-
runtimeFlags
|
|
1087
|
+
runtimeFlags,
|
|
1088
|
+
threadAffinityStore
|
|
1055
1089
|
});
|
|
1056
1090
|
|
|
1057
1091
|
if (routeResponse.status >= 400) {
|
|
@@ -1095,7 +1129,8 @@ export function createFetchHandler(options) {
|
|
|
1095
1129
|
providerHint: route.providerHint,
|
|
1096
1130
|
requestKind: route.requestKind,
|
|
1097
1131
|
stateStore,
|
|
1098
|
-
runtimeFlags
|
|
1132
|
+
runtimeFlags,
|
|
1133
|
+
threadAffinityStore
|
|
1099
1134
|
});
|
|
1100
1135
|
return respond(routeResponse);
|
|
1101
1136
|
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
const DEFAULT_AFFINITY_TTL_MS = 60 * 60 * 1000; // 1 hour
|
|
2
|
+
const MAX_BINDINGS = 10_000;
|
|
3
|
+
|
|
4
|
+
export function createThreadAffinityStore(options = {}) {
|
|
5
|
+
const ttlMs = options.ttlMs || DEFAULT_AFFINITY_TTL_MS;
|
|
6
|
+
const bindings = new Map();
|
|
7
|
+
|
|
8
|
+
function pruneExpired(now = Date.now()) {
|
|
9
|
+
for (const [key, binding] of bindings) {
|
|
10
|
+
if (binding.expiresAt <= now) bindings.delete(key);
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function getAffinity(threadId) {
|
|
15
|
+
if (!threadId) return null;
|
|
16
|
+
const binding = bindings.get(threadId);
|
|
17
|
+
if (!binding) return null;
|
|
18
|
+
if (binding.expiresAt <= Date.now()) {
|
|
19
|
+
bindings.delete(threadId);
|
|
20
|
+
return null;
|
|
21
|
+
}
|
|
22
|
+
return binding.candidateKey;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function setAffinity(threadId, candidateKey) {
|
|
26
|
+
if (!threadId || !candidateKey) return;
|
|
27
|
+
const now = Date.now();
|
|
28
|
+
bindings.set(threadId, {
|
|
29
|
+
candidateKey,
|
|
30
|
+
lastSeen: now,
|
|
31
|
+
expiresAt: now + ttlMs
|
|
32
|
+
});
|
|
33
|
+
if (bindings.size > MAX_BINDINGS) pruneExpired(now);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function clearAffinity(threadId) {
|
|
37
|
+
if (threadId) bindings.delete(threadId);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return { getAffinity, setAffinity, clearAffinity, pruneExpired, _bindings: bindings };
|
|
41
|
+
}
|
|
@@ -52,7 +52,7 @@ export function openaiToClaudeResponse(chunk, state) {
|
|
|
52
52
|
|
|
53
53
|
// Handle regular content
|
|
54
54
|
const textDelta = normalizeTextDelta(delta?.content);
|
|
55
|
-
if (textDelta) {
|
|
55
|
+
if (textDelta && (state.textBlockStarted || hasRenderableText(textDelta))) {
|
|
56
56
|
stopThinkingBlock(state, results);
|
|
57
57
|
|
|
58
58
|
if (!state.textBlockStarted) {
|
|
@@ -119,6 +119,10 @@ export function openaiToClaudeResponse(chunk, state) {
|
|
|
119
119
|
return results.length > 0 ? results : null;
|
|
120
120
|
}
|
|
121
121
|
|
|
122
|
+
function hasRenderableText(text) {
|
|
123
|
+
return typeof text === "string" && /\S/.test(text);
|
|
124
|
+
}
|
|
125
|
+
|
|
122
126
|
function normalizeTextDelta(content) {
|
|
123
127
|
if (typeof content === "string") return content;
|
|
124
128
|
if (!Array.isArray(content)) return "";
|
|
@@ -191,6 +195,7 @@ function normalizeMessageToolCalls(message) {
|
|
|
191
195
|
|
|
192
196
|
function emitTextDelta(text, state, results) {
|
|
193
197
|
if (!text) return;
|
|
198
|
+
if (!state.textBlockStarted && !hasRenderableText(text)) return;
|
|
194
199
|
stopThinkingBlock(state, results);
|
|
195
200
|
|
|
196
201
|
if (!state.textBlockStarted) {
|