openfox 2.0.23 → 2.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auto-config-FZFXOEEG.js +169 -0
- package/dist/backend-AGXWAU7A.js +9 -0
- package/dist/{chat-handler-4ATHDLH4.js → chat-handler-XH3QUXIC.js} +28 -47
- package/dist/{chunk-XJ4SUDL7.js → chunk-2ELCWCO3.js} +32 -241
- package/dist/{chunk-QK6TYNUN.js → chunk-64NCACBV.js} +6 -6
- package/dist/{chunk-GI24G4OW.js → chunk-AYJTMZVU.js} +79 -59
- package/dist/{chunk-YVF3BLQS.js → chunk-GZOYGODG.js} +36 -22
- package/dist/chunk-HNCM3D7Y.js +28 -0
- package/dist/chunk-IEDE6VK4.js +124 -0
- package/dist/chunk-J2GP3J3X.js +97 -0
- package/dist/{chunk-FUNQXQ7X.js → chunk-L5FBH2YX.js} +116 -65
- package/dist/{chunk-ZMR2ULLL.js → chunk-LP5RXQW5.js} +3 -3
- package/dist/chunk-M3RB4IF6.js +114 -0
- package/dist/chunk-V4IE7HJY.js +175 -0
- package/dist/{chunk-MDRNKI7D.js → chunk-WCPFR6ZP.js} +55 -48
- package/dist/{chunk-CDIYCGCO.js → chunk-WEXW7ZXJ.js} +2 -2
- package/dist/{chunk-RYHCYZQ7.js → chunk-Y6STCE5Z.js} +26 -24
- package/dist/{chunk-YUHODMKY.js → chunk-YGSBVKFU.js} +11 -5
- package/dist/chunk-Z4FMBCJO.js +52 -0
- package/dist/chunk-ZJ4FP6RS.js +200 -0
- package/dist/cli/dev.js +1 -1
- package/dist/cli/index.js +1 -1
- package/dist/client-725U6BTX.js +13 -0
- package/dist/client-pure-5NOTSIRK.js +19 -0
- package/dist/{compactor-SEZEZSML.js → compactor-JMGSZ4DQ.js} +7 -4
- package/dist/http-client-SIPAW7IM.js +8 -0
- package/dist/{orchestrator-MFN7COWT.js → orchestrator-FRFKYO77.js} +16 -13
- package/dist/package.json +1 -1
- package/dist/{processor-W2ZSJVOJ.js → processor-YAMVUA7K.js} +30 -55
- package/dist/profiles-Q36ELWQF.js +9 -0
- package/dist/{provider-IMW3ITB7.js → provider-KB7GB2O2.js} +15 -9
- package/dist/provider-manager-5VAVOKHC.js +22 -0
- package/dist/{serve-MDNCOVQU.js → serve-XBIN2DEU.js} +23 -17
- package/dist/server/index.d.ts +9 -1
- package/dist/server/index.js +19 -13
- package/dist/{server-7EAYI7T4.js → server-VXOP7JUX.js} +18 -12
- package/dist/{tools-7CKTYL2G.js → tools-JDYXXX2N.js} +11 -8
- package/dist/url-utils-QWAHP54Q.js +15 -0
- package/dist/web/assets/{index-CkUCxzzC.css → index-BLOGpuPE.css} +1 -1
- package/dist/web/assets/{index-CXXGAW1N.js → index-CtG8oo36.js} +66 -66
- package/dist/web/index.html +2 -2
- package/dist/web/sw.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-UUFEE7VR.js +0 -505
- package/dist/provider-manager-DNBMBP4D.js +0 -16
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
import {
|
|
2
|
+
createContextStateMessage,
|
|
3
|
+
createSessionStateMessage
|
|
4
|
+
} from "./chunk-F4PMNP7S.js";
|
|
5
|
+
import {
|
|
6
|
+
getPendingQuestionsForSession
|
|
7
|
+
} from "./chunk-EU3WWTFH.js";
|
|
1
8
|
import {
|
|
2
9
|
getEventStore,
|
|
3
10
|
updateSessionMetadata
|
|
@@ -6,13 +13,6 @@ import {
|
|
|
6
13
|
buildMessagesFromStoredEvents,
|
|
7
14
|
foldPendingConfirmations
|
|
8
15
|
} from "./chunk-6PLAWCHQ.js";
|
|
9
|
-
import {
|
|
10
|
-
createContextStateMessage,
|
|
11
|
-
createSessionStateMessage
|
|
12
|
-
} from "./chunk-F4PMNP7S.js";
|
|
13
|
-
import {
|
|
14
|
-
getPendingQuestionsForSession
|
|
15
|
-
} from "./chunk-EU3WWTFH.js";
|
|
16
16
|
import {
|
|
17
17
|
logger
|
|
18
18
|
} from "./chunk-K44MW7JJ.js";
|
|
@@ -58,53 +58,6 @@ Example inputs and outputs:
|
|
|
58
58
|
- "Add unit tests for the API" \u2192 "Add API unit tests"
|
|
59
59
|
|
|
60
60
|
User message: {message}`;
|
|
61
|
-
async function generateSessionName(options) {
|
|
62
|
-
const { userMessage, llmClient, signal, modelSettings, nonThinkingEnabled } = options;
|
|
63
|
-
try {
|
|
64
|
-
logger.debug("Generating session name", { messagePreview: userMessage.slice(0, 50) });
|
|
65
|
-
const prompt = SESSION_NAME_PROMPT.replace("{message}", userMessage);
|
|
66
|
-
const messages = [
|
|
67
|
-
{
|
|
68
|
-
role: "user",
|
|
69
|
-
content: prompt
|
|
70
|
-
}
|
|
71
|
-
];
|
|
72
|
-
const timeoutSignal = AbortSignal.timeout(6e4);
|
|
73
|
-
const composedSignal = signal ? AbortSignal.any([timeoutSignal, signal]) : timeoutSignal;
|
|
74
|
-
const response = await llmClient.complete({
|
|
75
|
-
messages,
|
|
76
|
-
tools: [],
|
|
77
|
-
signal: composedSignal,
|
|
78
|
-
// Default to non-thinking (reasoningEffort: 'none') when not explicitly configured
|
|
79
|
-
// to prevent thinking output in session names. Only skip when user explicitly
|
|
80
|
-
// set nonThinkingEnabled to false.
|
|
81
|
-
...nonThinkingEnabled !== false ? { reasoningEffort: "none" } : {},
|
|
82
|
-
...modelSettings ? { modelSettings } : {}
|
|
83
|
-
});
|
|
84
|
-
let name = (response.content || response.thinkingContent || "").trim();
|
|
85
|
-
if (name.length > 50) {
|
|
86
|
-
name = name.substring(0, 47) + "...";
|
|
87
|
-
}
|
|
88
|
-
if (!name || name.length < 3) {
|
|
89
|
-
return {
|
|
90
|
-
success: false,
|
|
91
|
-
error: "Generated name is too short or empty"
|
|
92
|
-
};
|
|
93
|
-
}
|
|
94
|
-
logger.debug("Session name generated successfully", { name });
|
|
95
|
-
return {
|
|
96
|
-
success: true,
|
|
97
|
-
name
|
|
98
|
-
};
|
|
99
|
-
} catch (error) {
|
|
100
|
-
const errorMessage = error instanceof Error ? error.message : "Unknown error generating session name";
|
|
101
|
-
logger.debug("Session name generation error", { error: errorMessage });
|
|
102
|
-
return {
|
|
103
|
-
success: false,
|
|
104
|
-
error: errorMessage
|
|
105
|
-
};
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
61
|
function needsNameGeneration(sessionTitle, messageCount) {
|
|
109
62
|
if (messageCount > 1) {
|
|
110
63
|
return false;
|
|
@@ -145,13 +98,80 @@ function applyGeneratedSessionName(sessionId, name, deps) {
|
|
|
145
98
|
);
|
|
146
99
|
}
|
|
147
100
|
}
|
|
101
|
+
function resolveSessionProvider(session, providerManager) {
|
|
102
|
+
const providers = providerManager.getProviders();
|
|
103
|
+
const effectiveModel = session.providerModel ?? providerManager.getCurrentModel();
|
|
104
|
+
if (!effectiveModel) return void 0;
|
|
105
|
+
const provider = session.providerId ? providers.find((p) => p.id === session.providerId) : providers.find((p) => p.isActive);
|
|
106
|
+
if (!provider) return void 0;
|
|
107
|
+
return {
|
|
108
|
+
baseUrl: provider.url,
|
|
109
|
+
...provider.apiKey ? { apiKey: provider.apiKey } : {},
|
|
110
|
+
model: effectiveModel
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
async function generateSessionNameForSession(sessionId, userMessage, deps, signal) {
|
|
114
|
+
const { sessionManager, providerManager, broadcastForSession, eventStore } = deps;
|
|
115
|
+
const session = sessionManager.getSession(sessionId);
|
|
116
|
+
if (!session) return;
|
|
117
|
+
const messageCount = getSessionMessageCount(sessionId);
|
|
118
|
+
if (!needsNameGenerationCheck(sessionId, session.metadata.title, messageCount)) return;
|
|
119
|
+
const providerConfig = resolveSessionProvider(session, providerManager);
|
|
120
|
+
const modelSettings = providerConfig ? providerManager.getModelSettings(providerConfig.model, "non-thinking") : void 0;
|
|
121
|
+
const prompt = SESSION_NAME_PROMPT.replace("{message}", userMessage);
|
|
122
|
+
try {
|
|
123
|
+
let client;
|
|
124
|
+
if (deps.getLLMClient && providerConfig) {
|
|
125
|
+
client = deps.getLLMClient();
|
|
126
|
+
client.setModel(providerConfig.model);
|
|
127
|
+
} else if (providerConfig) {
|
|
128
|
+
const { createLLMClient } = await import("./client-725U6BTX.js");
|
|
129
|
+
client = createLLMClient({
|
|
130
|
+
llm: {
|
|
131
|
+
baseUrl: providerConfig.baseUrl,
|
|
132
|
+
model: providerConfig.model,
|
|
133
|
+
...providerConfig.apiKey ? { apiKey: providerConfig.apiKey } : {}
|
|
134
|
+
}
|
|
135
|
+
});
|
|
136
|
+
} else if (deps.getLLMClient) {
|
|
137
|
+
client = deps.getLLMClient();
|
|
138
|
+
} else {
|
|
139
|
+
logger.debug("Session name generation skipped: no LLM client available", { sessionId });
|
|
140
|
+
return;
|
|
141
|
+
}
|
|
142
|
+
const timeoutSignal = AbortSignal.timeout(3e4);
|
|
143
|
+
const composedSignal = signal ? AbortSignal.any([timeoutSignal, signal]) : timeoutSignal;
|
|
144
|
+
const response = await client.complete({
|
|
145
|
+
messages: [{ role: "user", content: prompt }],
|
|
146
|
+
tools: [],
|
|
147
|
+
...modelSettings ? { modelSettings } : {},
|
|
148
|
+
signal: composedSignal,
|
|
149
|
+
skipClientReasoningEffort: true
|
|
150
|
+
});
|
|
151
|
+
let name = (response.content || response.thinkingContent || "").trim();
|
|
152
|
+
if (name.length > 50) {
|
|
153
|
+
name = name.substring(0, 47) + "...";
|
|
154
|
+
}
|
|
155
|
+
if (name.length >= 3) {
|
|
156
|
+
applyGeneratedSessionName(sessionId, name, {
|
|
157
|
+
sessionManager,
|
|
158
|
+
eventStore,
|
|
159
|
+
broadcastForSession
|
|
160
|
+
});
|
|
161
|
+
} else {
|
|
162
|
+
logger.debug("Session name too short", { sessionId, name });
|
|
163
|
+
}
|
|
164
|
+
} catch (error) {
|
|
165
|
+
logger.error("Session name generation error", {
|
|
166
|
+
sessionId,
|
|
167
|
+
error: error instanceof Error ? error.message : String(error)
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
}
|
|
148
171
|
|
|
149
172
|
export {
|
|
150
|
-
getSessionMessageCount,
|
|
151
173
|
finalizeTurnCompletion,
|
|
152
174
|
buildRunChatTurnParams,
|
|
153
|
-
|
|
154
|
-
needsNameGenerationCheck,
|
|
155
|
-
applyGeneratedSessionName
|
|
175
|
+
generateSessionNameForSession
|
|
156
176
|
};
|
|
157
|
-
//# sourceMappingURL=chunk-
|
|
177
|
+
//# sourceMappingURL=chunk-AYJTMZVU.js.map
|
|
@@ -1,17 +1,21 @@
|
|
|
1
1
|
import {
|
|
2
|
-
buildModelsUrl,
|
|
3
2
|
clearModelCache,
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
detectModel
|
|
4
|
+
} from "./chunk-M3RB4IF6.js";
|
|
5
|
+
import {
|
|
6
|
+
createLLMClient
|
|
7
|
+
} from "./chunk-2ELCWCO3.js";
|
|
8
|
+
import {
|
|
9
|
+
buildModelsUrl,
|
|
6
10
|
ensureVersionPrefix,
|
|
7
11
|
stripVersionPrefix
|
|
8
|
-
} from "./chunk-
|
|
9
|
-
import {
|
|
10
|
-
getModelProfile
|
|
11
|
-
} from "./chunk-UUFEE7VR.js";
|
|
12
|
+
} from "./chunk-HNCM3D7Y.js";
|
|
12
13
|
import {
|
|
13
14
|
logger
|
|
14
15
|
} from "./chunk-K44MW7JJ.js";
|
|
16
|
+
import {
|
|
17
|
+
getModelProfile
|
|
18
|
+
} from "./chunk-V4IE7HJY.js";
|
|
15
19
|
|
|
16
20
|
// src/server/provider-manager.ts
|
|
17
21
|
function normalizeModelId(s) {
|
|
@@ -201,7 +205,7 @@ function createProviderManager(config) {
|
|
|
201
205
|
const { providerId: activeProviderId, model: activeModel } = parseDefaultModelSelection(defaultModelSelection);
|
|
202
206
|
if (activeProviderId && activeModel) {
|
|
203
207
|
const activeProvider = providers.find((p) => p.id === activeProviderId);
|
|
204
|
-
if (activeProvider
|
|
208
|
+
if (activeProvider) {
|
|
205
209
|
llmClient = createLLMClient(createConfigForProvider(activeProvider, activeModel));
|
|
206
210
|
}
|
|
207
211
|
}
|
|
@@ -352,8 +356,7 @@ function createProviderManager(config) {
|
|
|
352
356
|
llmClient = createLLMClient(providerConfig);
|
|
353
357
|
logger.info("setProviders: recreated LLM client for new active provider", {
|
|
354
358
|
providerId: newActiveProviderId,
|
|
355
|
-
url: activeProvider.url
|
|
356
|
-
hasApiKey: !!activeProvider.apiKey
|
|
359
|
+
url: activeProvider.url
|
|
357
360
|
});
|
|
358
361
|
}
|
|
359
362
|
}
|
|
@@ -473,17 +476,28 @@ function createProviderManager(config) {
|
|
|
473
476
|
const provider = providers.find((p) => p.models.some((m) => m.id === modelId));
|
|
474
477
|
const model = provider?.models.find((m) => m.id === modelId);
|
|
475
478
|
if (!model) return void 0;
|
|
476
|
-
const
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
479
|
+
const baseSettings = {};
|
|
480
|
+
if (model["temperature"] !== void 0) baseSettings["temperature"] = model["temperature"];
|
|
481
|
+
if (model["topP"] !== void 0) baseSettings["topP"] = model["topP"];
|
|
482
|
+
if (model["topK"] !== void 0) baseSettings["topK"] = model["topK"];
|
|
483
|
+
if (model["maxTokens"] !== void 0) baseSettings["maxTokens"] = model["maxTokens"];
|
|
484
|
+
if (model["supportsVision"] !== void 0) baseSettings["supportsVision"] = model["supportsVision"];
|
|
485
|
+
const rawQueryParams = mode === "thinking" ? model.thinkingQueryParams : model.nonThinkingQueryParams;
|
|
486
|
+
if (rawQueryParams) {
|
|
487
|
+
return { ...baseSettings, queryParams: JSON.parse(rawQueryParams) };
|
|
488
|
+
}
|
|
489
|
+
const modeEnabled = mode === "thinking" ? model.thinkingEnabled : model.nonThinkingEnabled;
|
|
490
|
+
if (modeEnabled) {
|
|
491
|
+
return {
|
|
492
|
+
...baseSettings,
|
|
493
|
+
chatTemplateKwargs: mode === "thinking" ? { enable_thinking: true } : { enable_thinking: false }
|
|
494
|
+
};
|
|
495
|
+
}
|
|
496
|
+
const fallbackRawQP = mode === "thinking" ? model.nonThinkingQueryParams : model.thinkingQueryParams;
|
|
497
|
+
if (fallbackRawQP) {
|
|
498
|
+
return { ...baseSettings, queryParams: JSON.parse(fallbackRawQP) };
|
|
499
|
+
}
|
|
500
|
+
return void 0;
|
|
487
501
|
},
|
|
488
502
|
async refreshProviderModels(providerId) {
|
|
489
503
|
const provider = providers.find((p) => p.id === providerId);
|
|
@@ -540,4 +554,4 @@ export {
|
|
|
540
554
|
parseDefaultModelSelection,
|
|
541
555
|
createProviderManager
|
|
542
556
|
};
|
|
543
|
-
//# sourceMappingURL=chunk-
|
|
557
|
+
//# sourceMappingURL=chunk-GZOYGODG.js.map
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
// src/server/llm/url-utils.ts
|
|
2
|
+
var VERSION_PREFIX_REGEX = /\/v\d+(\/|$)/;
|
|
3
|
+
function hasVersionPrefix(url) {
|
|
4
|
+
return VERSION_PREFIX_REGEX.test(url);
|
|
5
|
+
}
|
|
6
|
+
function getVersionPrefix(url) {
|
|
7
|
+
const match = url.match(/\/v\d+/);
|
|
8
|
+
return match ? match[0] : null;
|
|
9
|
+
}
|
|
10
|
+
function ensureVersionPrefix(url, defaultVersion = "/v1") {
|
|
11
|
+
if (hasVersionPrefix(url)) return url;
|
|
12
|
+
return `${url.replace(/\/+$/, "")}${defaultVersion}`;
|
|
13
|
+
}
|
|
14
|
+
function stripVersionPrefix(url) {
|
|
15
|
+
return url.replace(/\/v\d+\/?$/, "");
|
|
16
|
+
}
|
|
17
|
+
function buildModelsUrl(baseUrl) {
|
|
18
|
+
return `${ensureVersionPrefix(baseUrl)}/models`;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export {
|
|
22
|
+
hasVersionPrefix,
|
|
23
|
+
getVersionPrefix,
|
|
24
|
+
ensureVersionPrefix,
|
|
25
|
+
stripVersionPrefix,
|
|
26
|
+
buildModelsUrl
|
|
27
|
+
};
|
|
28
|
+
//# sourceMappingURL=chunk-HNCM3D7Y.js.map
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import {
|
|
2
|
+
logger
|
|
3
|
+
} from "./chunk-K44MW7JJ.js";
|
|
4
|
+
|
|
5
|
+
// src/server/llm/http-client.ts
|
|
6
|
+
import { Agent, setGlobalDispatcher } from "undici";
|
|
7
|
+
|
|
8
|
+
// src/server/utils/errors.ts
|
|
9
|
+
var OpenFoxError = class extends Error {
|
|
10
|
+
constructor(message, code, details) {
|
|
11
|
+
super(message);
|
|
12
|
+
this.code = code;
|
|
13
|
+
this.details = details;
|
|
14
|
+
this.name = "OpenFoxError";
|
|
15
|
+
}
|
|
16
|
+
};
|
|
17
|
+
var SessionNotFoundError = class extends OpenFoxError {
|
|
18
|
+
constructor(sessionId) {
|
|
19
|
+
super(`Session not found: ${sessionId}`, "SESSION_NOT_FOUND", { sessionId });
|
|
20
|
+
this.name = "SessionNotFoundError";
|
|
21
|
+
}
|
|
22
|
+
};
|
|
23
|
+
var LLMError = class extends OpenFoxError {
|
|
24
|
+
constructor(message, details) {
|
|
25
|
+
super(message, "LLM_ERROR", details);
|
|
26
|
+
this.name = "LLMError";
|
|
27
|
+
}
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
// src/server/llm/http-client.ts
|
|
31
|
+
var agent = new Agent({ allowH2: true });
|
|
32
|
+
setGlobalDispatcher(agent);
|
|
33
|
+
var OpenAIHttpClient = class {
|
|
34
|
+
baseURL;
|
|
35
|
+
apiKey;
|
|
36
|
+
constructor(options) {
|
|
37
|
+
this.baseURL = options.baseURL;
|
|
38
|
+
this.apiKey = options.apiKey;
|
|
39
|
+
}
|
|
40
|
+
async fetchChatCompletion(params, options) {
|
|
41
|
+
const url = `${this.baseURL}/chat/completions`;
|
|
42
|
+
const headers = {
|
|
43
|
+
"Content-Type": "application/json",
|
|
44
|
+
Authorization: `Bearer ${this.apiKey}`
|
|
45
|
+
};
|
|
46
|
+
const bodyStr = JSON.stringify(params);
|
|
47
|
+
logger.debug("HTTP request to LLM", {
|
|
48
|
+
url,
|
|
49
|
+
body: bodyStr.slice(0, 2e3),
|
|
50
|
+
bodyKeys: Object.keys(params)
|
|
51
|
+
});
|
|
52
|
+
const response = await fetch(url, {
|
|
53
|
+
method: "POST",
|
|
54
|
+
headers,
|
|
55
|
+
body: bodyStr,
|
|
56
|
+
signal: options?.signal ?? null
|
|
57
|
+
});
|
|
58
|
+
if (!response.ok) {
|
|
59
|
+
const errorText = await response.text();
|
|
60
|
+
throw new LLMError(`HTTP ${response.status}: ${errorText}`);
|
|
61
|
+
}
|
|
62
|
+
return response;
|
|
63
|
+
}
|
|
64
|
+
async createChatCompletion(params, options, returnRaw) {
|
|
65
|
+
const response = await this.fetchChatCompletion(params, options);
|
|
66
|
+
const rawText = await response.text();
|
|
67
|
+
try {
|
|
68
|
+
const data = JSON.parse(rawText);
|
|
69
|
+
if (returnRaw) {
|
|
70
|
+
return { ...data, raw: rawText };
|
|
71
|
+
}
|
|
72
|
+
return data;
|
|
73
|
+
} catch (error) {
|
|
74
|
+
throw new LLMError(`Failed to parse response: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
createChatCompletionStream(params, options) {
|
|
78
|
+
const responsePromise = this.fetchChatCompletion(params, options);
|
|
79
|
+
async function* generate() {
|
|
80
|
+
const response = await responsePromise;
|
|
81
|
+
if (!response.body) {
|
|
82
|
+
throw new LLMError("No response body for streaming");
|
|
83
|
+
}
|
|
84
|
+
const reader = response.body.getReader();
|
|
85
|
+
const decoder = new TextDecoder();
|
|
86
|
+
let buffer = "";
|
|
87
|
+
try {
|
|
88
|
+
while (true) {
|
|
89
|
+
const { done, value } = await reader.read();
|
|
90
|
+
if (done) break;
|
|
91
|
+
buffer += decoder.decode(value, { stream: true });
|
|
92
|
+
const lines = buffer.split("\n");
|
|
93
|
+
buffer = lines.pop() || "";
|
|
94
|
+
for (const line of lines) {
|
|
95
|
+
const trimmed = line.trim();
|
|
96
|
+
if (!trimmed) continue;
|
|
97
|
+
if (trimmed.startsWith("data: ")) {
|
|
98
|
+
const data = trimmed.slice(6);
|
|
99
|
+
if (data === "[DONE]") {
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
try {
|
|
103
|
+
const chunk = JSON.parse(data);
|
|
104
|
+
yield chunk;
|
|
105
|
+
} catch (error) {
|
|
106
|
+
logger.warn("Failed to parse SSE chunk", { data, error });
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
} finally {
|
|
112
|
+
reader.releaseLock();
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return generate();
|
|
116
|
+
}
|
|
117
|
+
};
|
|
118
|
+
|
|
119
|
+
export {
|
|
120
|
+
SessionNotFoundError,
|
|
121
|
+
LLMError,
|
|
122
|
+
OpenAIHttpClient
|
|
123
|
+
};
|
|
124
|
+
//# sourceMappingURL=chunk-IEDE6VK4.js.map
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
// src/server/llm/streaming.ts
|
|
2
|
+
async function* streamWithSegments(client, request) {
|
|
3
|
+
let content = "";
|
|
4
|
+
let thinkingContent = "";
|
|
5
|
+
let response = null;
|
|
6
|
+
const segments = [];
|
|
7
|
+
let currentTextSegment = "";
|
|
8
|
+
let currentThinkingSegment = "";
|
|
9
|
+
const startTime = performance.now();
|
|
10
|
+
let firstTokenTime = null;
|
|
11
|
+
const flushText = () => {
|
|
12
|
+
if (currentTextSegment.trim()) {
|
|
13
|
+
segments.push({ type: "text", content: currentTextSegment });
|
|
14
|
+
}
|
|
15
|
+
currentTextSegment = "";
|
|
16
|
+
};
|
|
17
|
+
const flushThinking = () => {
|
|
18
|
+
if (currentThinkingSegment.trim()) {
|
|
19
|
+
segments.push({ type: "thinking", content: currentThinkingSegment });
|
|
20
|
+
}
|
|
21
|
+
currentThinkingSegment = "";
|
|
22
|
+
};
|
|
23
|
+
try {
|
|
24
|
+
for await (const event of client.stream(request)) {
|
|
25
|
+
switch (event.type) {
|
|
26
|
+
case "text_delta":
|
|
27
|
+
if (firstTokenTime === null) {
|
|
28
|
+
firstTokenTime = performance.now();
|
|
29
|
+
}
|
|
30
|
+
flushThinking();
|
|
31
|
+
content += event.content;
|
|
32
|
+
currentTextSegment += event.content;
|
|
33
|
+
yield { type: "text_delta", content: event.content };
|
|
34
|
+
break;
|
|
35
|
+
case "thinking_delta":
|
|
36
|
+
if (firstTokenTime === null) {
|
|
37
|
+
firstTokenTime = performance.now();
|
|
38
|
+
}
|
|
39
|
+
flushText();
|
|
40
|
+
thinkingContent += event.content;
|
|
41
|
+
currentThinkingSegment += event.content;
|
|
42
|
+
yield { type: "thinking_delta", content: event.content };
|
|
43
|
+
break;
|
|
44
|
+
case "tool_call_delta":
|
|
45
|
+
yield {
|
|
46
|
+
type: "tool_call_delta",
|
|
47
|
+
index: event.index,
|
|
48
|
+
...event.id !== void 0 ? { id: event.id } : {},
|
|
49
|
+
...event.name !== void 0 ? { name: event.name } : {},
|
|
50
|
+
...event.arguments !== void 0 ? { arguments: event.arguments } : {}
|
|
51
|
+
};
|
|
52
|
+
break;
|
|
53
|
+
case "done":
|
|
54
|
+
flushThinking();
|
|
55
|
+
flushText();
|
|
56
|
+
response = event.response;
|
|
57
|
+
yield { type: "done", response: event.response };
|
|
58
|
+
break;
|
|
59
|
+
case "error":
|
|
60
|
+
yield { type: "error", error: event.error };
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
} catch (error) {
|
|
65
|
+
yield { type: "error", error: error instanceof Error ? error.message : "Unknown error" };
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
if (!response) {
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
const toolCalls = response.toolCalls ?? [];
|
|
72
|
+
for (const tc of toolCalls) {
|
|
73
|
+
segments.push({ type: "tool_call", toolCallId: tc.id });
|
|
74
|
+
}
|
|
75
|
+
const endTime = performance.now();
|
|
76
|
+
const ttft = ((firstTokenTime ?? endTime) - startTime) / 1e3;
|
|
77
|
+
const completionTime = (endTime - (firstTokenTime ?? startTime)) / 1e3;
|
|
78
|
+
const { promptTokens, completionTokens } = response.usage;
|
|
79
|
+
return {
|
|
80
|
+
content,
|
|
81
|
+
thinkingContent,
|
|
82
|
+
toolCalls,
|
|
83
|
+
response,
|
|
84
|
+
segments,
|
|
85
|
+
timing: {
|
|
86
|
+
ttft,
|
|
87
|
+
completionTime,
|
|
88
|
+
tps: completionTime > 0 ? completionTokens / completionTime : 0,
|
|
89
|
+
prefillTps: ttft > 0 ? promptTokens / ttft : 0
|
|
90
|
+
}
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export {
|
|
95
|
+
streamWithSegments
|
|
96
|
+
};
|
|
97
|
+
//# sourceMappingURL=chunk-J2GP3J3X.js.map
|