openfox 2.0.24 → 2.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auto-config-FZFXOEEG.js +169 -0
- package/dist/backend-AGXWAU7A.js +9 -0
- package/dist/{chat-handler-4ATHDLH4.js → chat-handler-XH3QUXIC.js} +28 -47
- package/dist/{chunk-XJ4SUDL7.js → chunk-2ELCWCO3.js} +32 -241
- package/dist/{chunk-QK6TYNUN.js → chunk-64NCACBV.js} +6 -6
- package/dist/{chunk-GI24G4OW.js → chunk-AYJTMZVU.js} +79 -59
- package/dist/{chunk-YVF3BLQS.js → chunk-GZOYGODG.js} +36 -22
- package/dist/chunk-HNCM3D7Y.js +28 -0
- package/dist/chunk-IEDE6VK4.js +124 -0
- package/dist/chunk-J2GP3J3X.js +97 -0
- package/dist/{chunk-INRKWEOH.js → chunk-L5FBH2YX.js} +116 -65
- package/dist/{chunk-4EDH3ZXL.js → chunk-LP5RXQW5.js} +3 -3
- package/dist/chunk-M3RB4IF6.js +114 -0
- package/dist/chunk-V4IE7HJY.js +175 -0
- package/dist/{chunk-MDRNKI7D.js → chunk-WCPFR6ZP.js} +55 -48
- package/dist/{chunk-CDIYCGCO.js → chunk-WEXW7ZXJ.js} +2 -2
- package/dist/{chunk-RYHCYZQ7.js → chunk-Y6STCE5Z.js} +26 -24
- package/dist/{chunk-YUHODMKY.js → chunk-YGSBVKFU.js} +11 -5
- package/dist/chunk-Z4FMBCJO.js +52 -0
- package/dist/chunk-ZJ4FP6RS.js +200 -0
- package/dist/cli/dev.js +1 -1
- package/dist/cli/index.js +1 -1
- package/dist/client-725U6BTX.js +13 -0
- package/dist/client-pure-5NOTSIRK.js +19 -0
- package/dist/{compactor-SEZEZSML.js → compactor-JMGSZ4DQ.js} +7 -4
- package/dist/http-client-SIPAW7IM.js +8 -0
- package/dist/{orchestrator-MFN7COWT.js → orchestrator-FRFKYO77.js} +16 -13
- package/dist/package.json +1 -1
- package/dist/{processor-W2ZSJVOJ.js → processor-YAMVUA7K.js} +30 -55
- package/dist/profiles-Q36ELWQF.js +9 -0
- package/dist/{provider-IMW3ITB7.js → provider-KB7GB2O2.js} +15 -9
- package/dist/provider-manager-5VAVOKHC.js +22 -0
- package/dist/{serve-ABSUHKT3.js → serve-XBIN2DEU.js} +23 -17
- package/dist/server/index.d.ts +9 -1
- package/dist/server/index.js +19 -13
- package/dist/{server-7EAYI7T4.js → server-VXOP7JUX.js} +18 -12
- package/dist/{tools-7CKTYL2G.js → tools-JDYXXX2N.js} +11 -8
- package/dist/url-utils-QWAHP54Q.js +15 -0
- package/dist/web/assets/{index-CkUCxzzC.css → index-BLOGpuPE.css} +1 -1
- package/dist/web/assets/{index-Bi5R_oF2.js → index-CtG8oo36.js} +66 -66
- package/dist/web/index.html +2 -2
- package/dist/web/sw.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-UUFEE7VR.js +0 -505
- package/dist/provider-manager-DNBMBP4D.js +0 -16
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
import {
|
|
2
|
+
ensureVersionPrefix
|
|
3
|
+
} from "./chunk-HNCM3D7Y.js";
|
|
4
|
+
import {
|
|
5
|
+
logger
|
|
6
|
+
} from "./chunk-K44MW7JJ.js";
|
|
7
|
+
|
|
8
|
+
// src/server/providers/auto-config.ts
|
|
9
|
+
var NON_THINKING_COMBOS = [
|
|
10
|
+
{},
|
|
11
|
+
{ reasoning_effort: "none" },
|
|
12
|
+
{ chat_template_kwargs: { enable_thinking: false } },
|
|
13
|
+
{ thinking: { type: "disabled" } },
|
|
14
|
+
{ reasoning_effort: "none", chat_template_kwargs: { enable_thinking: false } }
|
|
15
|
+
];
|
|
16
|
+
var THINKING_COMBOS = [
|
|
17
|
+
{ reasoning_effort: "high" },
|
|
18
|
+
{ chat_template_kwargs: { enable_thinking: true } },
|
|
19
|
+
{ thinking: { type: "enabled" } },
|
|
20
|
+
{ reasoning_effort: "high", thinking: { type: "enabled" } }
|
|
21
|
+
];
|
|
22
|
+
async function detectModelInfo(baseUrl, apiKey, backend, modelId) {
|
|
23
|
+
if (backend === "unknown") {
|
|
24
|
+
const known = {
|
|
25
|
+
"deepseek-v4-flash": { ctx: 1e6, vision: false },
|
|
26
|
+
"deepseek-v4-pro": { ctx: 1e6, vision: false },
|
|
27
|
+
"glm-5.2": { ctx: 1e6, vision: false },
|
|
28
|
+
"glm-5.1": { ctx: 1e6, vision: false },
|
|
29
|
+
"glm-5": { ctx: 1e6, vision: false },
|
|
30
|
+
"glm-5-turbo": { ctx: 1e6, vision: false },
|
|
31
|
+
"glm-4.7": { ctx: 128e3, vision: false },
|
|
32
|
+
"glm-4.6": { ctx: 128e3, vision: false },
|
|
33
|
+
"glm-4.5": { ctx: 128e3, vision: false },
|
|
34
|
+
"glm-4-32b-0414-128k": { ctx: 128e3, vision: false }
|
|
35
|
+
};
|
|
36
|
+
const knownVal = known[modelId];
|
|
37
|
+
if (knownVal) return { contextWindow: knownVal.ctx, source: "hardcoded", supportsVision: knownVal.vision };
|
|
38
|
+
}
|
|
39
|
+
try {
|
|
40
|
+
if (backend === "ollama") {
|
|
41
|
+
return await detectOllamaInfo(baseUrl, modelId);
|
|
42
|
+
}
|
|
43
|
+
if (backend === "llamacpp") {
|
|
44
|
+
return await detectLlamacppInfo(baseUrl);
|
|
45
|
+
}
|
|
46
|
+
return await detectVllmInfo(baseUrl, apiKey, modelId);
|
|
47
|
+
} catch {
|
|
48
|
+
return { contextWindow: 2e5, source: "default", supportsVision: false };
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
async function detectVllmInfo(baseUrl, apiKey, modelId) {
|
|
52
|
+
const headers = { "Content-Type": "application/json" };
|
|
53
|
+
if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`;
|
|
54
|
+
const response = await fetch(`${ensureVersionPrefix(baseUrl)}/models`, { headers, signal: AbortSignal.timeout(5e3) });
|
|
55
|
+
if (!response.ok) throw new Error(`HTTP ${response.status}`);
|
|
56
|
+
const data = await response.json();
|
|
57
|
+
const model = data.data?.find((m) => m.id === modelId);
|
|
58
|
+
if (model?.max_model_len) {
|
|
59
|
+
return { contextWindow: model.max_model_len, source: "backend", supportsVision: false };
|
|
60
|
+
}
|
|
61
|
+
throw new Error("No context window in response");
|
|
62
|
+
}
|
|
63
|
+
async function detectLlamacppInfo(baseUrl) {
|
|
64
|
+
const response = await fetch(`${baseUrl}/props`, { signal: AbortSignal.timeout(5e3) });
|
|
65
|
+
if (!response.ok) throw new Error(`HTTP ${response.status}`);
|
|
66
|
+
const data = await response.json();
|
|
67
|
+
const nCtx = data.default_generation_settings?.n_ctx;
|
|
68
|
+
const supportsVision = data.modalities?.vision ?? false;
|
|
69
|
+
if (nCtx) {
|
|
70
|
+
return { contextWindow: nCtx, source: "backend", supportsVision };
|
|
71
|
+
}
|
|
72
|
+
throw new Error("No n_ctx in props");
|
|
73
|
+
}
|
|
74
|
+
async function detectOllamaInfo(baseUrl, modelId) {
|
|
75
|
+
const response = await fetch(`${baseUrl}/api/show`, {
|
|
76
|
+
method: "POST",
|
|
77
|
+
headers: { "Content-Type": "application/json" },
|
|
78
|
+
body: JSON.stringify({ name: modelId }),
|
|
79
|
+
signal: AbortSignal.timeout(5e3)
|
|
80
|
+
});
|
|
81
|
+
if (!response.ok) throw new Error(`HTTP ${response.status}`);
|
|
82
|
+
const data = await response.json();
|
|
83
|
+
const mi = data.model_info ?? {};
|
|
84
|
+
const ctxKey = Object.keys(mi).find((k) => k.endsWith(".context_length") || k === "context_length");
|
|
85
|
+
const ctxLen = ctxKey ? Number(mi[ctxKey]) : void 0;
|
|
86
|
+
const supportsVision = !!mi["vision_start_token_id"] || Object.keys(mi).some((k) => k.includes(".vision."));
|
|
87
|
+
if (ctxLen && !isNaN(ctxLen)) {
|
|
88
|
+
return { contextWindow: ctxLen, source: "backend", supportsVision };
|
|
89
|
+
}
|
|
90
|
+
throw new Error("No context_length in model_info");
|
|
91
|
+
}
|
|
92
|
+
async function probeCombo(baseUrl, apiKey, model, combo, signal) {
|
|
93
|
+
const headers = { "Content-Type": "application/json" };
|
|
94
|
+
if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`;
|
|
95
|
+
const body = {
|
|
96
|
+
model,
|
|
97
|
+
messages: [{ role: "user", content: "say hi in one word" }],
|
|
98
|
+
max_tokens: 50,
|
|
99
|
+
...combo
|
|
100
|
+
};
|
|
101
|
+
const start = Date.now();
|
|
102
|
+
try {
|
|
103
|
+
const response = await fetch(`${ensureVersionPrefix(baseUrl)}/chat/completions`, {
|
|
104
|
+
method: "POST",
|
|
105
|
+
headers,
|
|
106
|
+
body: JSON.stringify(body),
|
|
107
|
+
signal
|
|
108
|
+
});
|
|
109
|
+
const durationMs = Date.now() - start;
|
|
110
|
+
if (!response.ok) {
|
|
111
|
+
return { combo, httpCode: response.status, hasContent: false, durationMs };
|
|
112
|
+
}
|
|
113
|
+
const data = await response.json();
|
|
114
|
+
const message = data.choices?.[0]?.message ?? {};
|
|
115
|
+
const hasContent = !!(message["content"] || message["reasoning"] || message["reasoning_content"] || message["thinking"]);
|
|
116
|
+
return { combo, httpCode: response.status, hasContent, durationMs };
|
|
117
|
+
} catch {
|
|
118
|
+
const durationMs = Date.now() - start;
|
|
119
|
+
return { combo, httpCode: 0, hasContent: false, durationMs };
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
async function probeCombos(baseUrl, apiKey, model, combos) {
|
|
123
|
+
const timeout = AbortSignal.timeout(15e3);
|
|
124
|
+
const results = await Promise.allSettled(combos.map((combo) => probeCombo(baseUrl, apiKey, model, combo, timeout)));
|
|
125
|
+
const successful = results.filter(
|
|
126
|
+
(r) => r.status === "fulfilled" && r.value.httpCode === 200 && r.value.hasContent
|
|
127
|
+
).map((r) => r.value).sort((a, b) => a.durationMs - b.durationMs);
|
|
128
|
+
if (successful.length > 0) {
|
|
129
|
+
const winner = successful[0];
|
|
130
|
+
logger.debug("Auto-config: found working combo", {
|
|
131
|
+
model,
|
|
132
|
+
combo: winner.combo,
|
|
133
|
+
durationMs: winner.durationMs
|
|
134
|
+
});
|
|
135
|
+
return winner.combo;
|
|
136
|
+
}
|
|
137
|
+
logger.debug("Auto-config: no working combo found", { model });
|
|
138
|
+
return null;
|
|
139
|
+
}
|
|
140
|
+
async function autoConfig(input) {
|
|
141
|
+
const { url, apiKey, backend, models } = input;
|
|
142
|
+
const baseUrl = url.replace(/\/+$/, "");
|
|
143
|
+
const results = [];
|
|
144
|
+
for (const model of models) {
|
|
145
|
+
logger.info("Auto-config probing model", { model: model.id, backend });
|
|
146
|
+
const {
|
|
147
|
+
contextWindow,
|
|
148
|
+
source: contextSource,
|
|
149
|
+
supportsVision
|
|
150
|
+
} = await detectModelInfo(baseUrl, apiKey, backend, model.id);
|
|
151
|
+
const [thinkingConfig, nonThinkingConfig] = await Promise.all([
|
|
152
|
+
probeCombos(baseUrl, apiKey, model.id, THINKING_COMBOS),
|
|
153
|
+
probeCombos(baseUrl, apiKey, model.id, NON_THINKING_COMBOS)
|
|
154
|
+
]);
|
|
155
|
+
results.push({
|
|
156
|
+
id: model.id,
|
|
157
|
+
contextWindow,
|
|
158
|
+
contextSource,
|
|
159
|
+
supportsVision,
|
|
160
|
+
thinkingConfig,
|
|
161
|
+
nonThinkingConfig
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
return { models: results };
|
|
165
|
+
}
|
|
166
|
+
export {
|
|
167
|
+
autoConfig
|
|
168
|
+
};
|
|
169
|
+
//# sourceMappingURL=auto-config-FZFXOEEG.js.map
|
|
@@ -1,25 +1,16 @@
|
|
|
1
1
|
import {
|
|
2
|
-
applyGeneratedSessionName,
|
|
3
2
|
buildRunChatTurnParams,
|
|
4
3
|
finalizeTurnCompletion,
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
needsNameGenerationCheck
|
|
8
|
-
} from "./chunk-GI24G4OW.js";
|
|
4
|
+
generateSessionNameForSession
|
|
5
|
+
} from "./chunk-AYJTMZVU.js";
|
|
9
6
|
import {
|
|
10
7
|
runChatTurn
|
|
11
|
-
} from "./chunk-
|
|
12
|
-
import "./chunk-
|
|
13
|
-
import "./chunk-O4TED6AJ.js";
|
|
14
|
-
import "./chunk-YUHODMKY.js";
|
|
8
|
+
} from "./chunk-64NCACBV.js";
|
|
9
|
+
import "./chunk-WCPFR6ZP.js";
|
|
15
10
|
import "./chunk-DL6ZILAF.js";
|
|
16
11
|
import "./chunk-PBGOZMVY.js";
|
|
17
12
|
import "./chunk-VRGRAQDG.js";
|
|
18
13
|
import "./chunk-NWO6GRYE.js";
|
|
19
|
-
import {
|
|
20
|
-
getEventStore
|
|
21
|
-
} from "./chunk-YBWY4DKY.js";
|
|
22
|
-
import "./chunk-6PLAWCHQ.js";
|
|
23
14
|
import {
|
|
24
15
|
createChatMessageMessage,
|
|
25
16
|
createPhaseChangedMessage,
|
|
@@ -27,19 +18,26 @@ import {
|
|
|
27
18
|
} from "./chunk-F4PMNP7S.js";
|
|
28
19
|
import "./chunk-EU3WWTFH.js";
|
|
29
20
|
import "./chunk-RFNEDBVO.js";
|
|
21
|
+
import "./chunk-O4TED6AJ.js";
|
|
22
|
+
import "./chunk-YGSBVKFU.js";
|
|
23
|
+
import {
|
|
24
|
+
getEventStore
|
|
25
|
+
} from "./chunk-YBWY4DKY.js";
|
|
26
|
+
import "./chunk-6PLAWCHQ.js";
|
|
30
27
|
import "./chunk-FBGWG4N6.js";
|
|
28
|
+
import "./chunk-J2GP3J3X.js";
|
|
29
|
+
import "./chunk-Z4FMBCJO.js";
|
|
30
|
+
import "./chunk-ZJ4FP6RS.js";
|
|
31
|
+
import "./chunk-K44MW7JJ.js";
|
|
31
32
|
import "./chunk-YD6NDTKF.js";
|
|
32
33
|
import "./chunk-SNQT7LNU.js";
|
|
33
34
|
import "./chunk-CQGTEGKL.js";
|
|
34
|
-
import "./chunk-
|
|
35
|
-
import {
|
|
36
|
-
logger
|
|
37
|
-
} from "./chunk-K44MW7JJ.js";
|
|
35
|
+
import "./chunk-V4IE7HJY.js";
|
|
38
36
|
|
|
39
37
|
// src/server/session/chat-handler.ts
|
|
40
38
|
var activeAgents = /* @__PURE__ */ new Map();
|
|
41
39
|
async function startChatSession(sessionId, content, deps, options) {
|
|
42
|
-
const { sessionManager,
|
|
40
|
+
const { sessionManager, broadcastForSession } = deps;
|
|
43
41
|
const session = sessionManager.getSession(sessionId);
|
|
44
42
|
if (!session) {
|
|
45
43
|
throw new Error("Session not found");
|
|
@@ -69,34 +67,17 @@ async function startChatSession(sessionId, content, deps, options) {
|
|
|
69
67
|
...options?.isSystemGenerated && { isSystemGenerated: options.isSystemGenerated }
|
|
70
68
|
});
|
|
71
69
|
broadcastForSession(sessionId, createChatMessageMessage(userMessage));
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
name: result.name,
|
|
84
|
-
error: result.error
|
|
85
|
-
});
|
|
86
|
-
if (result.success && result.name) {
|
|
87
|
-
applyGeneratedSessionName(sessionId, result.name, {
|
|
88
|
-
sessionManager,
|
|
89
|
-
eventStore,
|
|
90
|
-
broadcastForSession
|
|
91
|
-
});
|
|
92
|
-
}
|
|
93
|
-
}).catch((error) => {
|
|
94
|
-
logger.error("Session name generation failed", {
|
|
95
|
-
sessionId,
|
|
96
|
-
error: error instanceof Error ? error.message : String(error)
|
|
97
|
-
});
|
|
98
|
-
});
|
|
99
|
-
}
|
|
70
|
+
generateSessionNameForSession(
|
|
71
|
+
sessionId,
|
|
72
|
+
content,
|
|
73
|
+
{
|
|
74
|
+
sessionManager,
|
|
75
|
+
providerManager: deps.providerManager,
|
|
76
|
+
broadcastForSession,
|
|
77
|
+
eventStore
|
|
78
|
+
},
|
|
79
|
+
controller.signal
|
|
80
|
+
);
|
|
100
81
|
startTurnWithCompletionChain(sessionId, controller, deps);
|
|
101
82
|
} catch (error) {
|
|
102
83
|
if (activeAgents.get(sessionId) === controller) {
|
|
@@ -164,4 +145,4 @@ export {
|
|
|
164
145
|
startChatSession,
|
|
165
146
|
stopSessionExecution
|
|
166
147
|
};
|
|
167
|
-
//# sourceMappingURL=chat-handler-
|
|
148
|
+
//# sourceMappingURL=chat-handler-XH3QUXIC.js.map
|
|
@@ -1,132 +1,25 @@
|
|
|
1
|
+
import {
|
|
2
|
+
getBackendCapabilities
|
|
3
|
+
} from "./chunk-Z4FMBCJO.js";
|
|
4
|
+
import {
|
|
5
|
+
ensureVersionPrefix
|
|
6
|
+
} from "./chunk-HNCM3D7Y.js";
|
|
1
7
|
import {
|
|
2
8
|
buildNonStreamingCreateParams,
|
|
3
9
|
buildStreamingCreateParams,
|
|
4
|
-
getBackendCapabilities,
|
|
5
|
-
getModelProfile,
|
|
6
10
|
getThinking,
|
|
7
11
|
mapFinishReason
|
|
8
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-ZJ4FP6RS.js";
|
|
13
|
+
import {
|
|
14
|
+
LLMError,
|
|
15
|
+
OpenAIHttpClient
|
|
16
|
+
} from "./chunk-IEDE6VK4.js";
|
|
9
17
|
import {
|
|
10
18
|
logger
|
|
11
19
|
} from "./chunk-K44MW7JJ.js";
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
function hasVersionPrefix(url) {
|
|
16
|
-
return VERSION_PREFIX_REGEX.test(url);
|
|
17
|
-
}
|
|
18
|
-
function ensureVersionPrefix(url, defaultVersion = "/v1") {
|
|
19
|
-
if (hasVersionPrefix(url)) return url;
|
|
20
|
-
return `${url.replace(/\/+$/, "")}${defaultVersion}`;
|
|
21
|
-
}
|
|
22
|
-
function stripVersionPrefix(url) {
|
|
23
|
-
return url.replace(/\/v\d+\/?$/, "");
|
|
24
|
-
}
|
|
25
|
-
function buildModelsUrl(baseUrl) {
|
|
26
|
-
return `${ensureVersionPrefix(baseUrl)}/models`;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
// src/server/utils/errors.ts
|
|
30
|
-
var OpenFoxError = class extends Error {
|
|
31
|
-
constructor(message, code, details) {
|
|
32
|
-
super(message);
|
|
33
|
-
this.code = code;
|
|
34
|
-
this.details = details;
|
|
35
|
-
this.name = "OpenFoxError";
|
|
36
|
-
}
|
|
37
|
-
};
|
|
38
|
-
var SessionNotFoundError = class extends OpenFoxError {
|
|
39
|
-
constructor(sessionId) {
|
|
40
|
-
super(`Session not found: ${sessionId}`, "SESSION_NOT_FOUND", { sessionId });
|
|
41
|
-
this.name = "SessionNotFoundError";
|
|
42
|
-
}
|
|
43
|
-
};
|
|
44
|
-
var LLMError = class extends OpenFoxError {
|
|
45
|
-
constructor(message, details) {
|
|
46
|
-
super(message, "LLM_ERROR", details);
|
|
47
|
-
this.name = "LLMError";
|
|
48
|
-
}
|
|
49
|
-
};
|
|
50
|
-
|
|
51
|
-
// src/server/llm/http-client.ts
|
|
52
|
-
import { Agent, setGlobalDispatcher } from "undici";
|
|
53
|
-
var agent = new Agent({ allowH2: true });
|
|
54
|
-
setGlobalDispatcher(agent);
|
|
55
|
-
var OpenAIHttpClient = class {
|
|
56
|
-
baseURL;
|
|
57
|
-
apiKey;
|
|
58
|
-
constructor(options) {
|
|
59
|
-
this.baseURL = options.baseURL;
|
|
60
|
-
this.apiKey = options.apiKey;
|
|
61
|
-
}
|
|
62
|
-
async fetchChatCompletion(params, options) {
|
|
63
|
-
const url = `${this.baseURL}/chat/completions`;
|
|
64
|
-
const headers = {
|
|
65
|
-
"Content-Type": "application/json",
|
|
66
|
-
Authorization: `Bearer ${this.apiKey}`
|
|
67
|
-
};
|
|
68
|
-
const response = await fetch(url, {
|
|
69
|
-
method: "POST",
|
|
70
|
-
headers,
|
|
71
|
-
body: JSON.stringify(params),
|
|
72
|
-
signal: options?.signal ?? null
|
|
73
|
-
});
|
|
74
|
-
if (!response.ok) {
|
|
75
|
-
const errorText = await response.text();
|
|
76
|
-
throw new LLMError(`HTTP ${response.status}: ${errorText}`);
|
|
77
|
-
}
|
|
78
|
-
return response;
|
|
79
|
-
}
|
|
80
|
-
async createChatCompletion(params, options) {
|
|
81
|
-
const response = await this.fetchChatCompletion(params, options);
|
|
82
|
-
try {
|
|
83
|
-
const data = await response.json();
|
|
84
|
-
return data;
|
|
85
|
-
} catch (error) {
|
|
86
|
-
throw new LLMError(`Failed to parse response: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
createChatCompletionStream(params, options) {
|
|
90
|
-
const responsePromise = this.fetchChatCompletion(params, options);
|
|
91
|
-
async function* generate() {
|
|
92
|
-
const response = await responsePromise;
|
|
93
|
-
if (!response.body) {
|
|
94
|
-
throw new LLMError("No response body for streaming");
|
|
95
|
-
}
|
|
96
|
-
const reader = response.body.getReader();
|
|
97
|
-
const decoder = new TextDecoder();
|
|
98
|
-
let buffer = "";
|
|
99
|
-
try {
|
|
100
|
-
while (true) {
|
|
101
|
-
const { done, value } = await reader.read();
|
|
102
|
-
if (done) break;
|
|
103
|
-
buffer += decoder.decode(value, { stream: true });
|
|
104
|
-
const lines = buffer.split("\n");
|
|
105
|
-
buffer = lines.pop() || "";
|
|
106
|
-
for (const line of lines) {
|
|
107
|
-
const trimmed = line.trim();
|
|
108
|
-
if (!trimmed) continue;
|
|
109
|
-
if (trimmed.startsWith("data: ")) {
|
|
110
|
-
const data = trimmed.slice(6);
|
|
111
|
-
if (data === "[DONE]") {
|
|
112
|
-
return;
|
|
113
|
-
}
|
|
114
|
-
try {
|
|
115
|
-
const chunk = JSON.parse(data);
|
|
116
|
-
yield chunk;
|
|
117
|
-
} catch (error) {
|
|
118
|
-
logger.warn("Failed to parse SSE chunk", { data, error });
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
} finally {
|
|
124
|
-
reader.releaseLock();
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
return generate();
|
|
128
|
-
}
|
|
129
|
-
};
|
|
20
|
+
import {
|
|
21
|
+
getModelProfile
|
|
22
|
+
} from "./chunk-V4IE7HJY.js";
|
|
130
23
|
|
|
131
24
|
// src/server/llm/client.ts
|
|
132
25
|
function createLLMClient(config, initialBackend = "unknown") {
|
|
@@ -176,7 +69,7 @@ function createLLMClient(config, initialBackend = "unknown") {
|
|
|
176
69
|
reasoningEffort: request.reasoningEffort ?? reasoningEffort
|
|
177
70
|
});
|
|
178
71
|
try {
|
|
179
|
-
const resolvedEffort = request.reasoningEffort ?? reasoningEffort;
|
|
72
|
+
const resolvedEffort = request.skipClientReasoningEffort ? void 0 : request.reasoningEffort ?? reasoningEffort;
|
|
180
73
|
const { params: createParams } = await buildNonStreamingCreateParams({
|
|
181
74
|
model,
|
|
182
75
|
request,
|
|
@@ -185,10 +78,14 @@ function createLLMClient(config, initialBackend = "unknown") {
|
|
|
185
78
|
...resolvedEffort ? { reasoningEffort: resolvedEffort } : {},
|
|
186
79
|
...thinkingField ? { thinkingField } : {}
|
|
187
80
|
});
|
|
188
|
-
const
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
81
|
+
const httpResponse = await httpClient.createChatCompletion(
|
|
82
|
+
createParams,
|
|
83
|
+
{
|
|
84
|
+
signal: request.signal
|
|
85
|
+
},
|
|
86
|
+
request.returnRaw
|
|
87
|
+
);
|
|
88
|
+
const choice = httpResponse.choices[0];
|
|
192
89
|
if (!choice) {
|
|
193
90
|
throw new LLMError("No completion choice returned");
|
|
194
91
|
}
|
|
@@ -201,16 +98,17 @@ function createLLMClient(config, initialBackend = "unknown") {
|
|
|
201
98
|
arguments: JSON.parse(tc.function.arguments)
|
|
202
99
|
}));
|
|
203
100
|
return {
|
|
204
|
-
id:
|
|
101
|
+
id: httpResponse.id,
|
|
205
102
|
content,
|
|
206
103
|
...thinkingContent ? { thinkingContent } : {},
|
|
207
104
|
...toolCalls && toolCalls.length > 0 ? { toolCalls } : {},
|
|
208
105
|
finishReason: mapFinishReason(choice.finish_reason),
|
|
209
106
|
usage: {
|
|
210
|
-
promptTokens:
|
|
211
|
-
completionTokens:
|
|
212
|
-
totalTokens:
|
|
213
|
-
}
|
|
107
|
+
promptTokens: httpResponse.usage?.prompt_tokens ?? 0,
|
|
108
|
+
completionTokens: httpResponse.usage?.completion_tokens ?? 0,
|
|
109
|
+
totalTokens: httpResponse.usage?.total_tokens ?? 0
|
|
110
|
+
},
|
|
111
|
+
...httpResponse.raw ? { raw: httpResponse.raw } : {}
|
|
214
112
|
};
|
|
215
113
|
} catch (error) {
|
|
216
114
|
logger.error("LLM complete error", { error: String(error) });
|
|
@@ -220,7 +118,7 @@ function createLLMClient(config, initialBackend = "unknown") {
|
|
|
220
118
|
}
|
|
221
119
|
},
|
|
222
120
|
async *stream(request) {
|
|
223
|
-
const resolvedEffort = request.reasoningEffort ?? reasoningEffort;
|
|
121
|
+
const resolvedEffort = request.skipClientReasoningEffort ? void 0 : request.reasoningEffort ?? reasoningEffort;
|
|
224
122
|
logger.debug("LLM stream request", {
|
|
225
123
|
messageCount: request.messages.length,
|
|
226
124
|
hasTools: !!request.tools?.length,
|
|
@@ -358,114 +256,7 @@ function createLLMClient(config, initialBackend = "unknown") {
|
|
|
358
256
|
};
|
|
359
257
|
}
|
|
360
258
|
|
|
361
|
-
// src/server/llm/models.ts
|
|
362
|
-
var modelCache = /* @__PURE__ */ new Map();
|
|
363
|
-
var llmStatus = "unknown";
|
|
364
|
-
var lastActiveUrl = null;
|
|
365
|
-
var CACHE_TTL_MS = 3e4;
|
|
366
|
-
function getCacheKey(url) {
|
|
367
|
-
return stripVersionPrefix(url);
|
|
368
|
-
}
|
|
369
|
-
async function detectModel(llmBaseUrl, retries = 3, silent = false) {
|
|
370
|
-
const cacheKey = getCacheKey(llmBaseUrl);
|
|
371
|
-
const now = Date.now();
|
|
372
|
-
const cached = modelCache.get(cacheKey);
|
|
373
|
-
if (cached && now - cached.timestamp < CACHE_TTL_MS) {
|
|
374
|
-
lastActiveUrl = cacheKey;
|
|
375
|
-
llmStatus = "connected";
|
|
376
|
-
return cached.model;
|
|
377
|
-
}
|
|
378
|
-
const url = buildModelsUrl(llmBaseUrl);
|
|
379
|
-
for (let attempt = 1; attempt <= retries; attempt++) {
|
|
380
|
-
try {
|
|
381
|
-
if (silent) {
|
|
382
|
-
logger.debug("Fetching models from LLM server", { url, attempt });
|
|
383
|
-
}
|
|
384
|
-
const response = await fetch(url, {
|
|
385
|
-
signal: AbortSignal.timeout(1e4)
|
|
386
|
-
});
|
|
387
|
-
if (!response.ok) {
|
|
388
|
-
if (silent) {
|
|
389
|
-
logger.debug("Failed to fetch models from LLM server", { status: response.status, attempt });
|
|
390
|
-
} else {
|
|
391
|
-
logger.warn("Failed to fetch models from LLM server", { status: response.status, attempt });
|
|
392
|
-
}
|
|
393
|
-
if (attempt < retries) {
|
|
394
|
-
await new Promise((r) => setTimeout(r, 1e3 * attempt));
|
|
395
|
-
continue;
|
|
396
|
-
}
|
|
397
|
-
llmStatus = "disconnected";
|
|
398
|
-
return cached?.model ?? null;
|
|
399
|
-
}
|
|
400
|
-
const data = await response.json();
|
|
401
|
-
if (data.data && data.data.length > 0) {
|
|
402
|
-
const modelData = data.data[0];
|
|
403
|
-
const modelId = modelData.id;
|
|
404
|
-
modelCache.set(cacheKey, {
|
|
405
|
-
model: modelId,
|
|
406
|
-
modelInfo: modelData,
|
|
407
|
-
timestamp: now
|
|
408
|
-
});
|
|
409
|
-
lastActiveUrl = cacheKey;
|
|
410
|
-
llmStatus = "connected";
|
|
411
|
-
if (silent) {
|
|
412
|
-
logger.debug("Detected LLM model", {
|
|
413
|
-
model: modelId,
|
|
414
|
-
maxLen: modelData.max_model_len,
|
|
415
|
-
root: modelData.root
|
|
416
|
-
});
|
|
417
|
-
} else {
|
|
418
|
-
logger.info("Detected LLM model", {
|
|
419
|
-
model: modelId,
|
|
420
|
-
maxLen: modelData.max_model_len,
|
|
421
|
-
root: modelData.root
|
|
422
|
-
});
|
|
423
|
-
}
|
|
424
|
-
return modelId;
|
|
425
|
-
}
|
|
426
|
-
if (silent) {
|
|
427
|
-
logger.debug("LLM server returned empty models list");
|
|
428
|
-
} else {
|
|
429
|
-
logger.warn("LLM server returned empty models list");
|
|
430
|
-
}
|
|
431
|
-
llmStatus = "disconnected";
|
|
432
|
-
return null;
|
|
433
|
-
} catch (error) {
|
|
434
|
-
const errMsg = error instanceof Error ? error.message : String(error);
|
|
435
|
-
if (silent) {
|
|
436
|
-
logger.debug("Could not detect model from LLM server", { error: errMsg, attempt });
|
|
437
|
-
} else {
|
|
438
|
-
logger.warn("Could not detect model from LLM server", { error: errMsg, attempt });
|
|
439
|
-
}
|
|
440
|
-
if (attempt < retries) {
|
|
441
|
-
await new Promise((r) => setTimeout(r, 1e3 * attempt));
|
|
442
|
-
continue;
|
|
443
|
-
}
|
|
444
|
-
}
|
|
445
|
-
}
|
|
446
|
-
llmStatus = "disconnected";
|
|
447
|
-
return cached?.model ?? null;
|
|
448
|
-
}
|
|
449
|
-
function getLlmStatus() {
|
|
450
|
-
return llmStatus;
|
|
451
|
-
}
|
|
452
|
-
function clearModelCache(url) {
|
|
453
|
-
if (url) {
|
|
454
|
-
modelCache.delete(getCacheKey(url));
|
|
455
|
-
} else {
|
|
456
|
-
modelCache.clear();
|
|
457
|
-
}
|
|
458
|
-
llmStatus = "unknown";
|
|
459
|
-
}
|
|
460
|
-
|
|
461
259
|
export {
|
|
462
|
-
|
|
463
|
-
ensureVersionPrefix,
|
|
464
|
-
stripVersionPrefix,
|
|
465
|
-
buildModelsUrl,
|
|
466
|
-
createLLMClient,
|
|
467
|
-
detectModel,
|
|
468
|
-
getLlmStatus,
|
|
469
|
-
clearModelCache
|
|
260
|
+
createLLMClient
|
|
470
261
|
};
|
|
471
|
-
//# sourceMappingURL=chunk-
|
|
262
|
+
//# sourceMappingURL=chunk-2ELCWCO3.js.map
|
|
@@ -14,7 +14,7 @@ import {
|
|
|
14
14
|
loadAllAgentsDefault,
|
|
15
15
|
processEventsForConversation,
|
|
16
16
|
runTopLevelAgentLoop
|
|
17
|
-
} from "./chunk-
|
|
17
|
+
} from "./chunk-WCPFR6ZP.js";
|
|
18
18
|
import {
|
|
19
19
|
TurnMetrics,
|
|
20
20
|
WORKFLOW_KICKOFF_PROMPT,
|
|
@@ -22,7 +22,7 @@ import {
|
|
|
22
22
|
buildAgentSmallReminder,
|
|
23
23
|
createChatDoneEvent,
|
|
24
24
|
createMessageStartEvent
|
|
25
|
-
} from "./chunk-
|
|
25
|
+
} from "./chunk-YGSBVKFU.js";
|
|
26
26
|
import {
|
|
27
27
|
getCurrentContextWindowId,
|
|
28
28
|
getCurrentWindowMessageOptions,
|
|
@@ -32,12 +32,12 @@ import {
|
|
|
32
32
|
import {
|
|
33
33
|
buildSnapshotFromSessionState
|
|
34
34
|
} from "./chunk-6PLAWCHQ.js";
|
|
35
|
-
import {
|
|
36
|
-
getGlobalConfigDir
|
|
37
|
-
} from "./chunk-CQGTEGKL.js";
|
|
38
35
|
import {
|
|
39
36
|
logger
|
|
40
37
|
} from "./chunk-K44MW7JJ.js";
|
|
38
|
+
import {
|
|
39
|
+
getGlobalConfigDir
|
|
40
|
+
} from "./chunk-CQGTEGKL.js";
|
|
41
41
|
|
|
42
42
|
// src/server/chat/orchestrator.ts
|
|
43
43
|
async function buildRetryPatterns() {
|
|
@@ -320,4 +320,4 @@ export {
|
|
|
320
320
|
runAgentTurn,
|
|
321
321
|
injectWorkflowKickoffIfNeeded
|
|
322
322
|
};
|
|
323
|
-
//# sourceMappingURL=chunk-
|
|
323
|
+
//# sourceMappingURL=chunk-64NCACBV.js.map
|