@khanglvm/llm-router 2.3.4 → 2.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1488,7 +1488,8 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
1488
1488
  const endpointUrl = buildAmpClientEndpointUrl(settings);
1489
1489
  try {
1490
1490
  const state = await readFactoryDroidRoutingState({
1491
- endpointUrl
1491
+ endpointUrl,
1492
+ config
1492
1493
  });
1493
1494
  return {
1494
1495
  ...state,
@@ -1512,6 +1513,13 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
1512
1513
  missionValidatorModel: "",
1513
1514
  reasoningEffort: ""
1514
1515
  },
1516
+ bindingIds: {
1517
+ defaultModel: "",
1518
+ missionOrchestratorModel: "",
1519
+ missionWorkerModel: "",
1520
+ missionValidatorModel: "",
1521
+ reasoningEffort: ""
1522
+ },
1515
1523
  endpointUrl,
1516
1524
  error: error instanceof Error ? error.message : String(error)
1517
1525
  };
@@ -1551,6 +1559,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
1551
1559
  endpointUrl: nextEndpointUrl,
1552
1560
  apiKey: nextMasterKey,
1553
1561
  bindings,
1562
+ config: nextConfig,
1554
1563
  captureBackup: false
1555
1564
  });
1556
1565
  if (endpointOrKeyChanged) {
@@ -3487,6 +3496,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
3487
3496
  endpointUrl,
3488
3497
  apiKey,
3489
3498
  bindings,
3499
+ config: nextConfig,
3490
3500
  captureBackup: true
3491
3501
  });
3492
3502
  addLog("success", "Factory Droid routing enabled.", patchResult.baseUrl);
@@ -3537,6 +3547,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
3537
3547
  endpointUrl,
3538
3548
  apiKey,
3539
3549
  bindings,
3550
+ config: configState.normalizedConfig,
3540
3551
  captureBackup: false
3541
3552
  });
3542
3553
  addLog("success", "Factory Droid model bindings updated.", patchResult.bindings.defaultModel || "Default");
@@ -16,6 +16,16 @@ const EFFORT_HEADER_PATTERNS = [
16
16
  /thinking[-_]?effort/i
17
17
  ];
18
18
 
19
+ const ORDERED_EFFORT_LEVELS = Object.freeze([
20
+ "none",
21
+ "minimal",
22
+ "low",
23
+ "medium",
24
+ "high",
25
+ "xhigh",
26
+ "max"
27
+ ]);
28
+
19
29
  function readHeaderValue(headers, name) {
20
30
  if (!headers || !name) return "";
21
31
  if (typeof headers.get === "function") {
@@ -63,7 +73,8 @@ function normalizeEffort(rawValue) {
63
73
  if (compact === "low") return "low";
64
74
  if (["medium", "normal", "standard", "default"].includes(compact)) return "medium";
65
75
  if (compact === "high") return "high";
66
- if (["xhigh", "extra high", "max", "maximum"].includes(compact)) return "xhigh";
76
+ if (["xhigh", "extra high"].includes(compact)) return "xhigh";
77
+ if (["max", "maximum"].includes(compact)) return "max";
67
78
 
68
79
  if (compact.includes("ultra")) return "xhigh";
69
80
  if (compact.includes("think hard") || compact.includes("harder")) return "high";
@@ -71,6 +82,60 @@ function normalizeEffort(rawValue) {
71
82
  return "";
72
83
  }
73
84
 
85
+ function getEffortRank(effort) {
86
+ return ORDERED_EFFORT_LEVELS.indexOf(normalizeEffort(effort));
87
+ }
88
+
89
+ function normalizeModelMatcherValue(value) {
90
+ let text = String(value || "").trim().toLowerCase();
91
+ if (!text) return "";
92
+
93
+ const slashIndex = Math.max(text.lastIndexOf("/"), text.lastIndexOf(":"));
94
+ if (slashIndex >= 0) {
95
+ text = text.slice(slashIndex + 1);
96
+ }
97
+
98
+ return text
99
+ .replace(/[^a-z0-9]+/g, "-")
100
+ .replace(/-+/g, "-")
101
+ .replace(/^-+|-+$/g, "");
102
+ }
103
+
104
+ function matchesModelPattern(targetModel, pattern) {
105
+ const normalizedModel = normalizeModelMatcherValue(targetModel);
106
+ if (!normalizedModel) return false;
107
+ return new RegExp(`(?:^|-)${pattern}(?:-|$)`).test(normalizedModel);
108
+ }
109
+
110
+ function resolveSupportedEffort(requestedEffort, supportedEfforts = []) {
111
+ const normalizedRequested = normalizeEffort(requestedEffort);
112
+ if (!normalizedRequested) return "";
113
+
114
+ const normalizedSupported = [...new Set(
115
+ (Array.isArray(supportedEfforts) ? supportedEfforts : [supportedEfforts])
116
+ .map((effort) => normalizeEffort(effort))
117
+ .filter(Boolean)
118
+ )];
119
+ if (normalizedSupported.length === 0) return normalizedRequested;
120
+ if (normalizedSupported.includes(normalizedRequested)) return normalizedRequested;
121
+
122
+ const requestedRank = getEffortRank(normalizedRequested);
123
+ let bestAtOrBelow = "";
124
+ let bestAtOrBelowRank = -1;
125
+ for (const supported of normalizedSupported) {
126
+ const supportedRank = getEffortRank(supported);
127
+ if (supportedRank <= requestedRank && supportedRank > bestAtOrBelowRank) {
128
+ bestAtOrBelow = supported;
129
+ bestAtOrBelowRank = supportedRank;
130
+ }
131
+ }
132
+ if (bestAtOrBelow) return bestAtOrBelow;
133
+
134
+ return normalizedSupported.reduce((lowest, supported) => (
135
+ getEffortRank(supported) < getEffortRank(lowest) ? supported : lowest
136
+ ), normalizedSupported[0]);
137
+ }
138
+
74
139
  function parseNumber(value) {
75
140
  const parsed = Number(value);
76
141
  if (!Number.isFinite(parsed)) return undefined;
@@ -81,6 +146,7 @@ function extractEffortFromBody(body) {
81
146
  if (!body || typeof body !== "object") return "";
82
147
 
83
148
  const directCandidates = [
149
+ body.output_config?.effort,
84
150
  body.reasoning_effort,
85
151
  body.reasoningEffort,
86
152
  body["reasoning-effort"],
@@ -121,12 +187,15 @@ function inferEffortFromClaudeThinking(body) {
121
187
 
122
188
  if (Number.isFinite(maxTokens) && maxTokens > 0) {
123
189
  const ratio = budgetTokens / maxTokens;
124
- if (ratio >= 0.9) return "max";
190
+ if (ratio >= 0.97) return "max";
191
+ if (ratio >= 0.82) return "xhigh";
125
192
  if (ratio >= 0.65) return "high";
126
193
  if (ratio >= 0.3) return "medium";
127
194
  return "low";
128
195
  }
129
196
 
197
+ if (budgetTokens >= 31999) return "max";
198
+ if (budgetTokens >= 28000) return "xhigh";
130
199
  if (budgetTokens >= 24000) return "high";
131
200
  if (budgetTokens >= 6000) return "medium";
132
201
  return "low";
@@ -153,39 +222,55 @@ function prefersNestedOpenAIReasoning(targetModel) {
153
222
  return model.startsWith("gpt-5");
154
223
  }
155
224
 
156
- function supportsOpenAIXHighEffort(targetModel) {
157
- const model = String(targetModel || "").trim().toLowerCase();
158
- if (!model) return false;
159
- if (model.startsWith("gpt-5.2")) return true;
160
- if (model.startsWith("gpt-5.3-codex")) return true;
161
- return false;
225
+ function resolveOpenAISupportedEfforts(targetModel) {
226
+ if (matchesModelPattern(targetModel, "gpt-5-4-pro")) return ["medium", "high", "xhigh"];
227
+ if (matchesModelPattern(targetModel, "gpt-5-pro")) return ["high"];
228
+ if (matchesModelPattern(targetModel, "gpt-5-4")) return ["none", "low", "medium", "high", "xhigh"];
229
+ if (matchesModelPattern(targetModel, "gpt-5-3-codex")) return ["low", "medium", "high", "xhigh"];
230
+ if (matchesModelPattern(targetModel, "gpt-5-2-codex")) return ["low", "medium", "high", "xhigh"];
231
+ if (matchesModelPattern(targetModel, "gpt-5-2-pro")) return ["medium", "high", "xhigh"];
232
+ if (matchesModelPattern(targetModel, "gpt-5-2")) return ["none", "low", "medium", "high", "xhigh"];
233
+ if (matchesModelPattern(targetModel, "gpt-5-1-codex")) return ["low", "medium", "high"];
234
+ if (matchesModelPattern(targetModel, "gpt-5-1")) return ["none", "low", "medium", "high"];
235
+ if (matchesModelPattern(targetModel, "gpt-5")) return ["minimal", "low", "medium", "high"];
236
+ return ["low", "medium", "high"];
162
237
  }
163
238
 
164
- function supportsOpenAINoneEffort(targetModel) {
165
- const model = String(targetModel || "").trim().toLowerCase();
166
- if (!model) return false;
167
- if (model.startsWith("gpt-5.1") && !model.includes("codex")) return true;
168
- if (model.startsWith("gpt-5.2") && !model.includes("codex") && !model.includes("pro")) return true;
169
- return false;
239
+ function resolveClaudeEffortProfile(targetModel) {
240
+ if (matchesModelPattern(targetModel, "opus-4-7")) {
241
+ return {
242
+ supportsEffortApi: true,
243
+ requiresAdaptiveThinking: true,
244
+ preserveManualBudgetThinking: false,
245
+ supportedEfforts: ["low", "medium", "high", "xhigh", "max"]
246
+ };
247
+ }
248
+ if (matchesModelPattern(targetModel, "opus-4-6") || matchesModelPattern(targetModel, "sonnet-4-6")) {
249
+ return {
250
+ supportsEffortApi: true,
251
+ requiresAdaptiveThinking: true,
252
+ preserveManualBudgetThinking: true,
253
+ supportedEfforts: ["low", "medium", "high", "max"]
254
+ };
255
+ }
256
+ if (matchesModelPattern(targetModel, "opus-4-5")) {
257
+ return {
258
+ supportsEffortApi: false,
259
+ requiresAdaptiveThinking: false,
260
+ preserveManualBudgetThinking: true,
261
+ supportedEfforts: ["low", "medium", "high", "max"]
262
+ };
263
+ }
264
+ return {
265
+ supportsEffortApi: false,
266
+ requiresAdaptiveThinking: false,
267
+ preserveManualBudgetThinking: true,
268
+ supportedEfforts: ["low", "medium", "high"]
269
+ };
170
270
  }
171
271
 
172
272
  function mapEffortToOpenAI(effort, targetModel) {
173
- switch (effort) {
174
- case "none":
175
- return supportsOpenAINoneEffort(targetModel) ? "none" : "low";
176
- case "minimal":
177
- return "low";
178
- case "low":
179
- return "low";
180
- case "medium":
181
- return "medium";
182
- case "high":
183
- return "high";
184
- case "xhigh":
185
- return supportsOpenAIXHighEffort(targetModel) ? "xhigh" : "high";
186
- default:
187
- return "";
188
- }
273
+ return resolveSupportedEffort(effort, resolveOpenAISupportedEfforts(targetModel));
189
274
  }
190
275
 
191
276
  function applyOpenAIEffort(providerBody, effort, targetModel) {
@@ -236,6 +321,7 @@ function toClaudeThinkingBudget(effort, maxTokens) {
236
321
  case "high":
237
322
  return clampBudget(Math.round(safeMaxTokens * 0.75), 1024, maxBudget);
238
323
  case "xhigh":
324
+ return clampBudget(Math.round(safeMaxTokens * 0.9), 1024, maxBudget);
239
325
  case "max":
240
326
  return maxBudget;
241
327
  default:
@@ -243,10 +329,37 @@ function toClaudeThinkingBudget(effort, maxTokens) {
243
329
  }
244
330
  }
245
331
 
246
- function applyClaudeEffort(providerBody, effort, { sourceFormat, originalBody } = {}) {
332
+ function applyClaudeEffort(providerBody, effort, { sourceFormat, originalBody, targetModel } = {}) {
247
333
  const nextBody = { ...(providerBody || {}) };
334
+ const requestedEffort = normalizeEffort(effort);
335
+ const profile = resolveClaudeEffortProfile(targetModel);
336
+ const mappedEffort = resolveSupportedEffort(requestedEffort, profile.supportedEfforts);
337
+
338
+ if (profile.supportsEffortApi && mappedEffort) {
339
+ nextBody.output_config = {
340
+ ...(nextBody.output_config && typeof nextBody.output_config === "object" && !Array.isArray(nextBody.output_config)
341
+ ? nextBody.output_config
342
+ : {}),
343
+ effort: mappedEffort
344
+ };
345
+
346
+ const explicitBudgetTokens = parseNumber(nextBody?.thinking?.budget_tokens);
347
+ const explicitThinkingType = String(nextBody?.thinking?.type || "").trim().toLowerCase();
348
+ if (profile.preserveManualBudgetThinking && Number.isFinite(explicitBudgetTokens)) {
349
+ return nextBody;
350
+ }
351
+
352
+ if (profile.requiresAdaptiveThinking) {
353
+ if (explicitThinkingType === "disabled") {
354
+ nextBody.thinking = { type: "disabled" };
355
+ } else {
356
+ nextBody.thinking = { type: "adaptive" };
357
+ }
358
+ }
359
+ return nextBody;
360
+ }
248
361
 
249
- if (effort === "none" || effort === "minimal") {
362
+ if (requestedEffort === "none" || requestedEffort === "minimal") {
250
363
  delete nextBody.thinking;
251
364
  return nextBody;
252
365
  }
@@ -267,7 +380,7 @@ function applyClaudeEffort(providerBody, effort, { sourceFormat, originalBody }
267
380
  nextBody.max_tokens = maxTokens;
268
381
  }
269
382
 
270
- const budgetTokens = toClaudeThinkingBudget(effort, maxTokens);
383
+ const budgetTokens = toClaudeThinkingBudget(mappedEffort || requestedEffort, maxTokens);
271
384
  if (!Number.isFinite(budgetTokens)) {
272
385
  return nextBody;
273
386
  }
@@ -316,7 +429,8 @@ export function applyReasoningEffortMapping({
316
429
  if (targetFormat === FORMATS.CLAUDE) {
317
430
  return applyClaudeEffort(providerBody, effort, {
318
431
  sourceFormat,
319
- originalBody
432
+ originalBody,
433
+ targetModel
320
434
  });
321
435
  }
322
436
  return providerBody;
@@ -64,6 +64,139 @@ export const FACTORY_DROID_REASONING_EFFORT_VALUES = Object.freeze([
64
64
  "high"
65
65
  ]);
66
66
 
67
+ function stripFactoryDroidRouterModelIdPrefix(value) {
68
+ const normalized = String(value || "").trim();
69
+ if (normalized.startsWith("custom:")) return normalized.slice("custom:".length).trim();
70
+ return normalized;
71
+ }
72
+
73
+ function sanitizeFactoryDroidRouterModelIdPart(value) {
74
+ return String(value || "")
75
+ .trim()
76
+ .replace(/[/:]+/g, "-")
77
+ .replace(/\s+/g, "-")
78
+ .replace(/[^A-Za-z0-9._-]+/g, "-")
79
+ .replace(/-+/g, "-")
80
+ .replace(/^-+|-+$/g, "");
81
+ }
82
+
83
+ function formatFactoryDroidDisplayNameBase(value) {
84
+ const normalized = String(value || "").trim();
85
+ if (!normalized) return "";
86
+ if (/^gpt(?=[-\s.]|$)/i.test(normalized)) return `GPT${normalized.slice(3)}`;
87
+ if (/^glm(?=[-\s.]|$)/i.test(normalized)) return `GLM${normalized.slice(3)}`;
88
+ if (/^claude(?=[-\s.]|$)/i.test(normalized)) return `Claude${normalized.slice(6)}`;
89
+ return normalized;
90
+ }
91
+
92
+ export function isFactoryDroidRouterModelId(value) {
93
+ const normalized = stripFactoryDroidRouterModelIdPrefix(value);
94
+ return normalized.startsWith("llm-");
95
+ }
96
+
97
+ export function parseFactoryDroidRouterModelId(value) {
98
+ const normalized = stripFactoryDroidRouterModelIdPrefix(value);
99
+ if (!normalized.startsWith("llm-")) return null;
100
+
101
+ if (normalized.startsWith("llm-alias:")) {
102
+ const aliasId = normalized.slice("llm-alias:".length).trim();
103
+ return aliasId
104
+ ? {
105
+ kind: "alias",
106
+ aliasId,
107
+ routeRef: aliasId
108
+ }
109
+ : null;
110
+ }
111
+
112
+ if (normalized.startsWith("llm-alias-")) {
113
+ const aliasId = normalized.slice("llm-alias-".length).trim();
114
+ return aliasId
115
+ ? {
116
+ kind: "alias",
117
+ aliasId,
118
+ routeRef: ""
119
+ }
120
+ : null;
121
+ }
122
+
123
+ const body = normalized.slice("llm-".length);
124
+ const separatorIndex = body.indexOf(":");
125
+ if (separatorIndex <= 0) return null;
126
+
127
+ const providerId = body.slice(0, separatorIndex).trim();
128
+ const modelId = body.slice(separatorIndex + 1).trim();
129
+ if (!providerId || !modelId) return null;
130
+
131
+ return {
132
+ kind: "model",
133
+ providerId,
134
+ modelId,
135
+ routeRef: `${providerId}/${modelId}`
136
+ };
137
+ }
138
+
139
+ export function resolveFactoryDroidRouterModelRef(value) {
140
+ const normalized = String(value || "").trim();
141
+ if (!normalized) return "";
142
+ return parseFactoryDroidRouterModelId(normalized)?.routeRef || normalized;
143
+ }
144
+
145
+ export function buildFactoryDroidRouterModelId(modelRef, { kind = "" } = {}) {
146
+ const normalizedModelRef = String(modelRef || "").trim();
147
+ if (!normalizedModelRef) return "";
148
+ if (normalizedModelRef.startsWith("custom:llm-")) {
149
+ const parsed = parseFactoryDroidRouterModelId(normalizedModelRef);
150
+ return parsed?.routeRef
151
+ ? buildFactoryDroidRouterModelId(parsed.routeRef, { kind: parsed.kind })
152
+ : normalizedModelRef;
153
+ }
154
+ if (normalizedModelRef.startsWith("llm-")) {
155
+ const parsed = parseFactoryDroidRouterModelId(normalizedModelRef);
156
+ return parsed?.routeRef
157
+ ? buildFactoryDroidRouterModelId(parsed.routeRef, { kind: parsed.kind })
158
+ : `custom:${normalizedModelRef}`;
159
+ }
160
+
161
+ const explicitKind = String(kind || "").trim().toLowerCase();
162
+ if (explicitKind === "alias") {
163
+ const aliasId = sanitizeFactoryDroidRouterModelIdPart(normalizedModelRef);
164
+ return aliasId ? `custom:llm-alias-${aliasId}` : "";
165
+ }
166
+
167
+ if (explicitKind === "model") {
168
+ const separatorIndex = normalizedModelRef.indexOf("/");
169
+ if (separatorIndex <= 0 || separatorIndex >= normalizedModelRef.length - 1) return "";
170
+ const providerId = normalizedModelRef.slice(0, separatorIndex).trim();
171
+ const modelId = normalizedModelRef.slice(separatorIndex + 1).trim();
172
+ const providerSlug = sanitizeFactoryDroidRouterModelIdPart(providerId);
173
+ const modelSlug = sanitizeFactoryDroidRouterModelIdPart(modelId);
174
+ return providerSlug && modelSlug ? `custom:llm-${providerSlug}-${modelSlug}` : "";
175
+ }
176
+
177
+ if (!normalizedModelRef.includes("/")) {
178
+ return buildFactoryDroidRouterModelId(normalizedModelRef, { kind: "alias" });
179
+ }
180
+
181
+ return buildFactoryDroidRouterModelId(normalizedModelRef, { kind: "model" });
182
+ }
183
+
184
+ export function buildFactoryDroidRouterDisplayName(modelRef, { kind = "" } = {}) {
185
+ const normalizedModelRef = String(modelRef || "").trim();
186
+ if (!normalizedModelRef) return "";
187
+
188
+ const explicitKind = String(kind || "").trim().toLowerCase();
189
+ const inferredKind = explicitKind || (normalizedModelRef.includes("/") ? "model" : "alias");
190
+ if (inferredKind === "alias") {
191
+ return `[LLM Alias] ${formatFactoryDroidDisplayNameBase(normalizedModelRef)}`;
192
+ }
193
+
194
+ const modelName = normalizedModelRef.includes("/")
195
+ ? normalizedModelRef.slice(normalizedModelRef.indexOf("/") + 1).trim()
196
+ : normalizedModelRef;
197
+ return `[LLM] ${formatFactoryDroidDisplayNameBase(modelName)}`;
198
+ }
199
+
67
200
  export function normalizeFactoryDroidReasoningEffort(value) {
68
201
  const normalized = String(value || "").trim().toLowerCase();
69
202
  return FACTORY_DROID_REASONING_EFFORT_VALUES.includes(normalized) ? normalized : "";