@actalk/inkos-core 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/agent-session.d.ts +48 -0
- package/dist/agent/agent-session.d.ts.map +1 -0
- package/dist/agent/agent-session.js +229 -0
- package/dist/agent/agent-session.js.map +1 -0
- package/dist/agent/agent-system-prompt.d.ts +2 -0
- package/dist/agent/agent-system-prompt.d.ts.map +1 -0
- package/dist/agent/agent-system-prompt.js +97 -0
- package/dist/agent/agent-system-prompt.js.map +1 -0
- package/dist/agent/agent-tools.d.ts +30 -0
- package/dist/agent/agent-tools.d.ts.map +1 -0
- package/dist/agent/agent-tools.js +280 -0
- package/dist/agent/agent-tools.js.map +1 -0
- package/dist/agent/index.d.ts +4 -0
- package/dist/agent/index.d.ts.map +1 -0
- package/dist/agent/index.js +4 -0
- package/dist/agent/index.js.map +1 -0
- package/dist/agents/architect.js +5 -5
- package/dist/agents/architect.js.map +1 -1
- package/dist/agents/chapter-analyzer.d.ts.map +1 -1
- package/dist/agents/chapter-analyzer.js +32 -4
- package/dist/agents/chapter-analyzer.js.map +1 -1
- package/dist/agents/consolidator.js +1 -1
- package/dist/agents/consolidator.js.map +1 -1
- package/dist/agents/continuity.js +1 -1
- package/dist/agents/continuity.js.map +1 -1
- package/dist/agents/fanfic-canon-importer.js +1 -1
- package/dist/agents/fanfic-canon-importer.js.map +1 -1
- package/dist/agents/foundation-reviewer.js +1 -1
- package/dist/agents/foundation-reviewer.js.map +1 -1
- package/dist/agents/radar.js +1 -1
- package/dist/agents/radar.js.map +1 -1
- package/dist/agents/settler-delta-parser.d.ts.map +1 -1
- package/dist/agents/settler-delta-parser.js +6 -1
- package/dist/agents/settler-delta-parser.js.map +1 -1
- package/dist/agents/state-validator.js +21 -3
- package/dist/agents/state-validator.js.map +1 -1
- package/dist/agents/writer-prompts.js +13 -13
- package/dist/agents/writer-prompts.js.map +1 -1
- package/dist/agents/writer.js +1 -1
- package/dist/agents/writer.js.map +1 -1
- package/dist/index.d.ts +9 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +10 -2
- package/dist/index.js.map +1 -1
- package/dist/interaction/book-session-store.d.ts +6 -0
- package/dist/interaction/book-session-store.d.ts.map +1 -0
- package/dist/interaction/book-session-store.js +59 -0
- package/dist/interaction/book-session-store.js.map +1 -0
- package/dist/interaction/draft-directive-parser.d.ts +38 -0
- package/dist/interaction/draft-directive-parser.d.ts.map +1 -0
- package/dist/interaction/draft-directive-parser.js +208 -0
- package/dist/interaction/draft-directive-parser.js.map +1 -0
- package/dist/interaction/nl-router.d.ts +1 -0
- package/dist/interaction/nl-router.d.ts.map +1 -1
- package/dist/interaction/nl-router.js +1 -1
- package/dist/interaction/nl-router.js.map +1 -1
- package/dist/interaction/project-session-store.d.ts +3 -1
- package/dist/interaction/project-session-store.d.ts.map +1 -1
- package/dist/interaction/project-session-store.js +19 -1
- package/dist/interaction/project-session-store.js.map +1 -1
- package/dist/interaction/project-tools.d.ts +2 -0
- package/dist/interaction/project-tools.d.ts.map +1 -1
- package/dist/interaction/project-tools.js +186 -126
- package/dist/interaction/project-tools.js.map +1 -1
- package/dist/interaction/runtime.d.ts.map +1 -1
- package/dist/interaction/runtime.js +14 -1
- package/dist/interaction/runtime.js.map +1 -1
- package/dist/interaction/session.d.ts +725 -6
- package/dist/interaction/session.d.ts.map +1 -1
- package/dist/interaction/session.js +65 -0
- package/dist/interaction/session.js.map +1 -1
- package/dist/llm/config-migration.d.ts +5 -0
- package/dist/llm/config-migration.d.ts.map +1 -0
- package/dist/llm/config-migration.js +51 -0
- package/dist/llm/config-migration.js.map +1 -0
- package/dist/llm/provider.d.ts +4 -4
- package/dist/llm/provider.d.ts.map +1 -1
- package/dist/llm/provider.js +472 -540
- package/dist/llm/provider.js.map +1 -1
- package/dist/llm/secrets.d.ts +9 -0
- package/dist/llm/secrets.d.ts.map +1 -0
- package/dist/llm/secrets.js +31 -0
- package/dist/llm/secrets.js.map +1 -0
- package/dist/llm/service-presets.d.ts +37 -0
- package/dist/llm/service-presets.d.ts.map +1 -0
- package/dist/llm/service-presets.js +123 -0
- package/dist/llm/service-presets.js.map +1 -0
- package/dist/llm/service-resolver.d.ts +10 -0
- package/dist/llm/service-resolver.d.ts.map +1 -0
- package/dist/llm/service-resolver.js +46 -0
- package/dist/llm/service-resolver.js.map +1 -0
- package/dist/models/project.d.ts +158 -30
- package/dist/models/project.d.ts.map +1 -1
- package/dist/models/project.js +13 -0
- package/dist/models/project.js.map +1 -1
- package/dist/pipeline/chapter-persistence.d.ts.map +1 -1
- package/dist/pipeline/chapter-persistence.js +5 -1
- package/dist/pipeline/chapter-persistence.js.map +1 -1
- package/dist/pipeline/chapter-truth-validation.d.ts.map +1 -1
- package/dist/pipeline/chapter-truth-validation.js +27 -1
- package/dist/pipeline/chapter-truth-validation.js.map +1 -1
- package/dist/pipeline/runner.d.ts.map +1 -1
- package/dist/pipeline/runner.js +9 -3
- package/dist/pipeline/runner.js.map +1 -1
- package/dist/state/manager.d.ts +2 -0
- package/dist/state/manager.d.ts.map +1 -1
- package/dist/state/manager.js +10 -3
- package/dist/state/manager.js.map +1 -1
- package/dist/state/state-validator.d.ts.map +1 -1
- package/dist/state/state-validator.js +44 -33
- package/dist/state/state-validator.js.map +1 -1
- package/dist/utils/config-loader.d.ts.map +1 -1
- package/dist/utils/config-loader.js +149 -32
- package/dist/utils/config-loader.js.map +1 -1
- package/package.json +4 -3
package/dist/llm/provider.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
1
|
+
import { streamSimple as piStreamSimple, stream as piStream, completeSimple as piCompleteSimple, complete as piComplete, } from "@mariozechner/pi-ai";
|
|
2
|
+
import { resolveServicePreset } from "./service-presets.js";
|
|
3
3
|
export function createStreamMonitor(onProgress, intervalMs = 30000) {
|
|
4
4
|
let totalChars = 0;
|
|
5
5
|
let chineseChars = 0;
|
|
@@ -45,31 +45,42 @@ export function createLLMClient(config) {
|
|
|
45
45
|
};
|
|
46
46
|
const apiFormat = config.apiFormat ?? "chat";
|
|
47
47
|
const stream = config.stream ?? true;
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
apiFormat,
|
|
54
|
-
stream,
|
|
55
|
-
_anthropic: new Anthropic({ apiKey: config.apiKey, baseURL }),
|
|
56
|
-
defaults,
|
|
57
|
-
};
|
|
58
|
-
}
|
|
59
|
-
// openai or custom — both use OpenAI SDK
|
|
48
|
+
// --- Build pi-ai Model object ---
|
|
49
|
+
const serviceName = config.service ?? "custom";
|
|
50
|
+
const preset = resolveServicePreset(serviceName);
|
|
51
|
+
const piApi = resolvePiApi(serviceName, config.apiFormat, preset?.api);
|
|
52
|
+
const baseUrl = config.baseUrl || preset?.baseUrl || "";
|
|
60
53
|
const extraHeaders = config.headers ?? parseEnvHeaders();
|
|
54
|
+
const provider = config.provider === "anthropic" ? "anthropic" : "openai";
|
|
55
|
+
const piModel = {
|
|
56
|
+
id: config.model,
|
|
57
|
+
name: config.model,
|
|
58
|
+
api: piApi,
|
|
59
|
+
provider,
|
|
60
|
+
baseUrl,
|
|
61
|
+
reasoning: (config.thinkingBudget ?? 0) > 0,
|
|
62
|
+
input: ["text"],
|
|
63
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
64
|
+
contextWindow: 128_000,
|
|
65
|
+
maxTokens: config.maxTokens ?? 8192,
|
|
66
|
+
...(extraHeaders ? { headers: extraHeaders } : {}),
|
|
67
|
+
};
|
|
61
68
|
return {
|
|
62
|
-
provider
|
|
69
|
+
provider,
|
|
70
|
+
service: serviceName,
|
|
63
71
|
apiFormat,
|
|
64
72
|
stream,
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
baseURL: config.baseUrl,
|
|
68
|
-
...(extraHeaders ? { defaultHeaders: extraHeaders } : {}),
|
|
69
|
-
}),
|
|
73
|
+
_piModel: piModel,
|
|
74
|
+
_apiKey: config.apiKey,
|
|
70
75
|
defaults,
|
|
71
76
|
};
|
|
72
77
|
}
|
|
78
|
+
function resolvePiApi(serviceName, apiFormat, presetApi) {
|
|
79
|
+
if (serviceName === "custom") {
|
|
80
|
+
return apiFormat === "responses" ? "openai-responses" : "openai-completions";
|
|
81
|
+
}
|
|
82
|
+
return (presetApi ?? "openai-completions");
|
|
83
|
+
}
|
|
73
84
|
function parseEnvHeaders() {
|
|
74
85
|
const raw = process.env.INKOS_LLM_HEADERS;
|
|
75
86
|
if (!raw)
|
|
@@ -173,15 +184,274 @@ function wrapLLMError(error, context) {
|
|
|
173
184
|
}
|
|
174
185
|
return error instanceof Error ? error : new Error(msg);
|
|
175
186
|
}
|
|
176
|
-
function
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
return
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
187
|
+
function shouldUseNativeCustomTransport(client) {
|
|
188
|
+
return client.provider === "openai" && client.service === "custom";
|
|
189
|
+
}
|
|
190
|
+
function buildCustomHeaders(client) {
|
|
191
|
+
return {
|
|
192
|
+
Authorization: `Bearer ${client._apiKey ?? ""}`,
|
|
193
|
+
"Content-Type": "application/json",
|
|
194
|
+
...(client._piModel?.headers ?? {}),
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
function joinSystemPrompt(messages) {
|
|
198
|
+
const systemParts = messages
|
|
199
|
+
.filter((message) => message.role === "system" && message.content.trim().length > 0)
|
|
200
|
+
.map((message) => message.content.trim());
|
|
201
|
+
return systemParts.length > 0 ? systemParts.join("\n\n") : undefined;
|
|
202
|
+
}
|
|
203
|
+
function buildChatMessages(messages) {
|
|
204
|
+
return messages
|
|
205
|
+
.filter((message) => message.role !== "system")
|
|
206
|
+
.map((message) => ({
|
|
207
|
+
role: message.role,
|
|
208
|
+
content: message.content,
|
|
209
|
+
}));
|
|
210
|
+
}
|
|
211
|
+
function buildResponsesInput(messages) {
|
|
212
|
+
return messages
|
|
213
|
+
.filter((message) => message.role !== "system")
|
|
214
|
+
.map((message) => ({
|
|
215
|
+
role: message.role,
|
|
216
|
+
content: [{ type: "input_text", text: message.content }],
|
|
217
|
+
}));
|
|
218
|
+
}
|
|
219
|
+
async function readErrorResponse(res) {
|
|
220
|
+
const text = await res.text().catch(() => "");
|
|
221
|
+
try {
|
|
222
|
+
const json = JSON.parse(text);
|
|
223
|
+
if (typeof json.error === "string" && json.error)
|
|
224
|
+
return `${res.status} ${json.error}`;
|
|
225
|
+
if (json.error && typeof json.error === "object" && typeof json.error.message === "string") {
|
|
226
|
+
return `${res.status} ${json.error.message}`;
|
|
227
|
+
}
|
|
228
|
+
if (typeof json.detail === "string" && json.detail)
|
|
229
|
+
return `${res.status} ${json.detail}`;
|
|
230
|
+
}
|
|
231
|
+
catch {
|
|
232
|
+
// fall through
|
|
233
|
+
}
|
|
234
|
+
return `${res.status} ${text || res.statusText}`.trim();
|
|
235
|
+
}
|
|
236
|
+
function parseSseEvents(buffer) {
|
|
237
|
+
const chunks = buffer.split(/\n\n/);
|
|
238
|
+
const rest = chunks.pop() ?? "";
|
|
239
|
+
const events = [];
|
|
240
|
+
for (const chunk of chunks) {
|
|
241
|
+
const lines = chunk.split(/\r?\n/);
|
|
242
|
+
let eventName;
|
|
243
|
+
const dataLines = [];
|
|
244
|
+
for (const line of lines) {
|
|
245
|
+
if (line.startsWith("event:")) {
|
|
246
|
+
eventName = line.slice("event:".length).trim();
|
|
247
|
+
}
|
|
248
|
+
else if (line.startsWith("data:")) {
|
|
249
|
+
dataLines.push(line.slice("data:".length).trimStart());
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
if (eventName || dataLines.length > 0) {
|
|
253
|
+
events.push({
|
|
254
|
+
...(eventName ? { event: eventName } : {}),
|
|
255
|
+
...(dataLines.length > 0 ? { data: dataLines.join("\n") } : {}),
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
return { events, rest };
|
|
260
|
+
}
|
|
261
|
+
function extractChatContent(json) {
|
|
262
|
+
const content = json?.choices?.[0]?.message?.content;
|
|
263
|
+
if (typeof content === "string")
|
|
264
|
+
return content;
|
|
265
|
+
if (Array.isArray(content)) {
|
|
266
|
+
return content
|
|
267
|
+
.map((item) => typeof item?.text === "string" ? item.text : typeof item?.content === "string" ? item.content : "")
|
|
268
|
+
.join("");
|
|
269
|
+
}
|
|
270
|
+
return "";
|
|
271
|
+
}
|
|
272
|
+
function extractResponsesContent(json) {
|
|
273
|
+
const output = Array.isArray(json?.output) ? json.output : [];
|
|
274
|
+
return output
|
|
275
|
+
.flatMap((item) => Array.isArray(item?.content) ? item.content : [])
|
|
276
|
+
.map((part) => {
|
|
277
|
+
if (typeof part?.text === "string")
|
|
278
|
+
return part.text;
|
|
279
|
+
if (typeof part?.content === "string")
|
|
280
|
+
return part.content;
|
|
281
|
+
if (typeof part?.output_text === "string")
|
|
282
|
+
return part.output_text;
|
|
283
|
+
return "";
|
|
284
|
+
})
|
|
285
|
+
.join("");
|
|
286
|
+
}
|
|
287
|
+
async function chatCompletionViaCustomOpenAICompatible(client, model, messages, resolved, onStreamProgress, onTextDelta) {
|
|
288
|
+
const baseUrl = client._piModel?.baseUrl ?? "";
|
|
289
|
+
const headers = buildCustomHeaders(client);
|
|
290
|
+
const errorCtx = { baseUrl, model };
|
|
291
|
+
const monitor = createStreamMonitor(onStreamProgress);
|
|
292
|
+
const extra = stripReservedKeys(resolved.extra);
|
|
293
|
+
if (client.apiFormat === "responses") {
|
|
294
|
+
const payload = {
|
|
295
|
+
model,
|
|
296
|
+
input: buildResponsesInput(messages),
|
|
297
|
+
stream: client.stream,
|
|
298
|
+
store: false,
|
|
299
|
+
max_output_tokens: resolved.maxTokens,
|
|
300
|
+
temperature: resolved.temperature,
|
|
301
|
+
...extra,
|
|
302
|
+
};
|
|
303
|
+
const instructions = joinSystemPrompt(messages);
|
|
304
|
+
if (instructions)
|
|
305
|
+
payload.instructions = instructions;
|
|
306
|
+
const response = await fetch(`${baseUrl.replace(/\/$/, "")}/responses`, {
|
|
307
|
+
method: "POST",
|
|
308
|
+
headers,
|
|
309
|
+
body: JSON.stringify(payload),
|
|
310
|
+
});
|
|
311
|
+
if (!response.ok) {
|
|
312
|
+
throw wrapLLMError(new Error(await readErrorResponse(response)), errorCtx);
|
|
313
|
+
}
|
|
314
|
+
if (!client.stream) {
|
|
315
|
+
const json = await response.json();
|
|
316
|
+
const content = extractResponsesContent(json);
|
|
317
|
+
if (!content) {
|
|
318
|
+
throw wrapLLMError(new Error("LLM returned empty response"), errorCtx);
|
|
319
|
+
}
|
|
320
|
+
return {
|
|
321
|
+
content,
|
|
322
|
+
usage: {
|
|
323
|
+
promptTokens: json?.usage?.input_tokens ?? 0,
|
|
324
|
+
completionTokens: json?.usage?.output_tokens ?? 0,
|
|
325
|
+
totalTokens: json?.usage?.total_tokens ?? 0,
|
|
326
|
+
},
|
|
327
|
+
};
|
|
328
|
+
}
|
|
329
|
+
const reader = response.body?.getReader();
|
|
330
|
+
if (!reader)
|
|
331
|
+
throw wrapLLMError(new Error("Streaming body unavailable"), errorCtx);
|
|
332
|
+
const decoder = new TextDecoder();
|
|
333
|
+
let buffer = "";
|
|
334
|
+
let content = "";
|
|
335
|
+
let usage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
|
|
336
|
+
try {
|
|
337
|
+
while (true) {
|
|
338
|
+
const { value, done } = await reader.read();
|
|
339
|
+
if (done)
|
|
340
|
+
break;
|
|
341
|
+
buffer += decoder.decode(value, { stream: true });
|
|
342
|
+
const parsed = parseSseEvents(buffer);
|
|
343
|
+
buffer = parsed.rest;
|
|
344
|
+
for (const event of parsed.events) {
|
|
345
|
+
if (!event.data)
|
|
346
|
+
continue;
|
|
347
|
+
const json = JSON.parse(event.data);
|
|
348
|
+
if (json.type === "response.output_text.delta" && typeof json.delta === "string") {
|
|
349
|
+
content += json.delta;
|
|
350
|
+
monitor.onChunk(json.delta);
|
|
351
|
+
onTextDelta?.(json.delta);
|
|
352
|
+
}
|
|
353
|
+
if (json.type === "response.completed") {
|
|
354
|
+
usage = {
|
|
355
|
+
promptTokens: json.response?.usage?.input_tokens ?? 0,
|
|
356
|
+
completionTokens: json.response?.usage?.output_tokens ?? 0,
|
|
357
|
+
totalTokens: json.response?.usage?.total_tokens ?? 0,
|
|
358
|
+
};
|
|
359
|
+
if (!content) {
|
|
360
|
+
content = extractResponsesContent(json.response);
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
finally {
|
|
367
|
+
monitor.stop();
|
|
368
|
+
}
|
|
369
|
+
if (!content) {
|
|
370
|
+
throw wrapLLMError(new Error("LLM returned empty response from stream"), errorCtx);
|
|
371
|
+
}
|
|
372
|
+
return { content, usage };
|
|
373
|
+
}
|
|
374
|
+
const payload = {
|
|
375
|
+
model,
|
|
376
|
+
messages: [
|
|
377
|
+
...messages
|
|
378
|
+
.filter((message) => message.role === "system")
|
|
379
|
+
.map((message) => ({ role: "system", content: message.content })),
|
|
380
|
+
...buildChatMessages(messages),
|
|
381
|
+
],
|
|
382
|
+
stream: client.stream,
|
|
383
|
+
temperature: resolved.temperature,
|
|
384
|
+
max_tokens: resolved.maxTokens,
|
|
385
|
+
...extra,
|
|
386
|
+
};
|
|
387
|
+
if (client.stream) {
|
|
388
|
+
payload.stream_options = { include_usage: true };
|
|
389
|
+
}
|
|
390
|
+
const response = await fetch(`${baseUrl.replace(/\/$/, "")}/chat/completions`, {
|
|
391
|
+
method: "POST",
|
|
392
|
+
headers,
|
|
393
|
+
body: JSON.stringify(payload),
|
|
394
|
+
});
|
|
395
|
+
if (!response.ok) {
|
|
396
|
+
throw wrapLLMError(new Error(await readErrorResponse(response)), errorCtx);
|
|
397
|
+
}
|
|
398
|
+
if (!client.stream) {
|
|
399
|
+
const json = await response.json();
|
|
400
|
+
const content = extractChatContent(json);
|
|
401
|
+
if (!content) {
|
|
402
|
+
throw wrapLLMError(new Error("LLM returned empty response"), errorCtx);
|
|
403
|
+
}
|
|
404
|
+
return {
|
|
405
|
+
content,
|
|
406
|
+
usage: {
|
|
407
|
+
promptTokens: json?.usage?.prompt_tokens ?? 0,
|
|
408
|
+
completionTokens: json?.usage?.completion_tokens ?? 0,
|
|
409
|
+
totalTokens: json?.usage?.total_tokens ?? 0,
|
|
410
|
+
},
|
|
411
|
+
};
|
|
412
|
+
}
|
|
413
|
+
const reader = response.body?.getReader();
|
|
414
|
+
if (!reader)
|
|
415
|
+
throw wrapLLMError(new Error("Streaming body unavailable"), errorCtx);
|
|
416
|
+
const decoder = new TextDecoder();
|
|
417
|
+
let buffer = "";
|
|
418
|
+
let content = "";
|
|
419
|
+
let usage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
|
|
420
|
+
try {
|
|
421
|
+
while (true) {
|
|
422
|
+
const { value, done } = await reader.read();
|
|
423
|
+
if (done)
|
|
424
|
+
break;
|
|
425
|
+
buffer += decoder.decode(value, { stream: true });
|
|
426
|
+
const parsed = parseSseEvents(buffer);
|
|
427
|
+
buffer = parsed.rest;
|
|
428
|
+
for (const event of parsed.events) {
|
|
429
|
+
if (!event.data || event.data === "[DONE]")
|
|
430
|
+
continue;
|
|
431
|
+
const json = JSON.parse(event.data);
|
|
432
|
+
const delta = json?.choices?.[0]?.delta?.content;
|
|
433
|
+
if (typeof delta === "string") {
|
|
434
|
+
content += delta;
|
|
435
|
+
monitor.onChunk(delta);
|
|
436
|
+
onTextDelta?.(delta);
|
|
437
|
+
}
|
|
438
|
+
if (json?.usage) {
|
|
439
|
+
usage = {
|
|
440
|
+
promptTokens: json.usage.prompt_tokens ?? usage.promptTokens,
|
|
441
|
+
completionTokens: json.usage.completion_tokens ?? usage.completionTokens,
|
|
442
|
+
totalTokens: json.usage.total_tokens ?? usage.totalTokens,
|
|
443
|
+
};
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
finally {
|
|
449
|
+
monitor.stop();
|
|
450
|
+
}
|
|
451
|
+
if (!content) {
|
|
452
|
+
throw wrapLLMError(new Error("LLM returned empty response from stream"), errorCtx);
|
|
453
|
+
}
|
|
454
|
+
return { content, usage };
|
|
185
455
|
}
|
|
186
456
|
// === Simple Chat (used by all agents via BaseAgent.chat()) ===
|
|
187
457
|
export async function chatCompletion(client, model, messages, options) {
|
|
@@ -194,21 +464,12 @@ export async function chatCompletion(client, model, messages, options) {
|
|
|
194
464
|
};
|
|
195
465
|
const onStreamProgress = options?.onStreamProgress;
|
|
196
466
|
const onTextDelta = options?.onTextDelta;
|
|
197
|
-
const errorCtx = { baseUrl: client.
|
|
467
|
+
const errorCtx = { baseUrl: client._piModel?.baseUrl ?? "(unknown)", model };
|
|
198
468
|
try {
|
|
199
|
-
if (client
|
|
200
|
-
return client
|
|
201
|
-
? await chatCompletionAnthropic(client._anthropic, model, messages, resolved, client.defaults.thinkingBudget, onStreamProgress, onTextDelta)
|
|
202
|
-
: await chatCompletionAnthropicSync(client._anthropic, model, messages, resolved, client.defaults.thinkingBudget, onTextDelta);
|
|
203
|
-
}
|
|
204
|
-
if (client.apiFormat === "responses") {
|
|
205
|
-
return client.stream
|
|
206
|
-
? await chatCompletionOpenAIResponses(client._openai, model, messages, resolved, options?.webSearch, onStreamProgress, onTextDelta)
|
|
207
|
-
: await chatCompletionOpenAIResponsesSync(client._openai, model, messages, resolved, options?.webSearch, onTextDelta);
|
|
469
|
+
if (shouldUseNativeCustomTransport(client)) {
|
|
470
|
+
return await chatCompletionViaCustomOpenAICompatible(client, model, messages, resolved, onStreamProgress, onTextDelta);
|
|
208
471
|
}
|
|
209
|
-
return client
|
|
210
|
-
? await chatCompletionOpenAIChat(client._openai, model, messages, resolved, options?.webSearch, onStreamProgress, onTextDelta)
|
|
211
|
-
: await chatCompletionOpenAIChatSync(client._openai, model, messages, resolved, options?.webSearch, onTextDelta);
|
|
472
|
+
return await chatCompletionViaPiAi(client, model, messages, resolved, onStreamProgress, onTextDelta);
|
|
212
473
|
}
|
|
213
474
|
catch (error) {
|
|
214
475
|
// Stream interrupted but partial content is usable — return truncated response
|
|
@@ -218,51 +479,9 @@ export async function chatCompletion(client, model, messages, options) {
|
|
|
218
479
|
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
|
|
219
480
|
};
|
|
220
481
|
}
|
|
221
|
-
// Auto-fallback: if streaming failed, retry with sync (many proxies don't support SSE)
|
|
222
|
-
if (client.stream) {
|
|
223
|
-
const isStreamRelated = isLikelyStreamError(error);
|
|
224
|
-
if (isStreamRelated) {
|
|
225
|
-
try {
|
|
226
|
-
if (client.provider === "anthropic") {
|
|
227
|
-
return await chatCompletionAnthropicSync(client._anthropic, model, messages, resolved, client.defaults.thinkingBudget);
|
|
228
|
-
}
|
|
229
|
-
if (client.apiFormat === "responses") {
|
|
230
|
-
return await chatCompletionOpenAIResponsesSync(client._openai, model, messages, resolved, options?.webSearch);
|
|
231
|
-
}
|
|
232
|
-
return await chatCompletionOpenAIChatSync(client._openai, model, messages, resolved, options?.webSearch);
|
|
233
|
-
}
|
|
234
|
-
catch (syncError) {
|
|
235
|
-
if (isStreamRequiredError(syncError)) {
|
|
236
|
-
throw wrapStreamRequiredError(error, syncError, errorCtx);
|
|
237
|
-
}
|
|
238
|
-
throw wrapLLMError(syncError, errorCtx);
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
}
|
|
242
482
|
throw wrapLLMError(error, errorCtx);
|
|
243
483
|
}
|
|
244
484
|
}
|
|
245
|
-
function isLikelyStreamError(error) {
|
|
246
|
-
const msg = String(error).toLowerCase();
|
|
247
|
-
// Common indicators that streaming specifically is the problem:
|
|
248
|
-
// - SSE parse errors, chunked transfer issues, content-type mismatches
|
|
249
|
-
// - Some proxies return 400/415 when stream=true
|
|
250
|
-
// - "stream" mentioned in error, or generic network errors during streaming
|
|
251
|
-
return (msg.includes("stream") ||
|
|
252
|
-
msg.includes("text/event-stream") ||
|
|
253
|
-
msg.includes("chunked") ||
|
|
254
|
-
msg.includes("unexpected end") ||
|
|
255
|
-
msg.includes("premature close") ||
|
|
256
|
-
msg.includes("terminated") ||
|
|
257
|
-
msg.includes("econnreset") ||
|
|
258
|
-
(msg.includes("400") && !msg.includes("content")));
|
|
259
|
-
}
|
|
260
|
-
function isStreamRequiredError(error) {
|
|
261
|
-
const msg = String(error).toLowerCase();
|
|
262
|
-
return (msg.includes("stream must be set to true") ||
|
|
263
|
-
(msg.includes("stream") && msg.includes("must be set to true")) ||
|
|
264
|
-
(msg.includes("stream") && msg.includes("required")));
|
|
265
|
-
}
|
|
266
485
|
// === Tool-calling Chat (used by agent loop) ===
|
|
267
486
|
export async function chatWithTools(client, model, messages, tools, options) {
|
|
268
487
|
try {
|
|
@@ -270,375 +489,172 @@ export async function chatWithTools(client, model, messages, tools, options) {
|
|
|
270
489
|
temperature: clampTemperatureForModel(model, options?.temperature ?? client.defaults.temperature),
|
|
271
490
|
maxTokens: options?.maxTokens ?? client.defaults.maxTokens,
|
|
272
491
|
};
|
|
273
|
-
|
|
274
|
-
if (client.provider === "anthropic") {
|
|
275
|
-
return await chatWithToolsAnthropic(client._anthropic, model, messages, tools, resolved, client.defaults.thinkingBudget);
|
|
276
|
-
}
|
|
277
|
-
if (client.apiFormat === "responses") {
|
|
278
|
-
return await chatWithToolsOpenAIResponses(client._openai, model, messages, tools, resolved);
|
|
279
|
-
}
|
|
280
|
-
return await chatWithToolsOpenAIChat(client._openai, model, messages, tools, resolved);
|
|
492
|
+
return await chatWithToolsViaPiAi(client, model, messages, tools, resolved);
|
|
281
493
|
}
|
|
282
494
|
catch (error) {
|
|
283
495
|
throw wrapLLMError(error);
|
|
284
496
|
}
|
|
285
497
|
}
|
|
286
|
-
// ===
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
};
|
|
298
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
299
|
-
const stream = await client.chat.completions.create(createParams);
|
|
300
|
-
const chunks = [];
|
|
301
|
-
let inputTokens = 0;
|
|
302
|
-
let outputTokens = 0;
|
|
303
|
-
const monitor = createStreamMonitor(onStreamProgress);
|
|
304
|
-
try {
|
|
305
|
-
for await (const chunk of stream) {
|
|
306
|
-
const delta = chunk.choices[0]?.delta?.content;
|
|
307
|
-
if (delta) {
|
|
308
|
-
chunks.push(delta);
|
|
309
|
-
monitor.onChunk(delta);
|
|
310
|
-
onTextDelta?.(delta);
|
|
311
|
-
}
|
|
312
|
-
if (chunk.usage) {
|
|
313
|
-
inputTokens = chunk.usage.prompt_tokens ?? 0;
|
|
314
|
-
outputTokens = chunk.usage.completion_tokens ?? 0;
|
|
315
|
-
}
|
|
316
|
-
}
|
|
317
|
-
}
|
|
318
|
-
catch (streamError) {
|
|
319
|
-
monitor.stop();
|
|
320
|
-
const partial = chunks.join("");
|
|
321
|
-
if (partial.length >= MIN_SALVAGEABLE_CHARS) {
|
|
322
|
-
throw new PartialResponseError(partial, streamError);
|
|
323
|
-
}
|
|
324
|
-
throw streamError;
|
|
325
|
-
}
|
|
326
|
-
finally {
|
|
327
|
-
monitor.stop();
|
|
328
|
-
}
|
|
329
|
-
const content = chunks.join("");
|
|
330
|
-
if (!content)
|
|
331
|
-
throw new Error("LLM returned empty response from stream");
|
|
332
|
-
return {
|
|
333
|
-
content,
|
|
334
|
-
usage: {
|
|
335
|
-
promptTokens: inputTokens,
|
|
336
|
-
completionTokens: outputTokens,
|
|
337
|
-
totalTokens: inputTokens + outputTokens,
|
|
338
|
-
},
|
|
339
|
-
};
|
|
340
|
-
}
|
|
341
|
-
async function chatCompletionOpenAIChatSync(client, model, messages, options, _webSearch, onTextDelta) {
|
|
342
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
343
|
-
const syncParams = {
|
|
344
|
-
model,
|
|
345
|
-
messages: messages.map((m) => ({ role: m.role, content: m.content })),
|
|
346
|
-
temperature: options.temperature,
|
|
347
|
-
max_tokens: options.maxTokens,
|
|
348
|
-
stream: false,
|
|
349
|
-
...stripReservedKeys(options.extra),
|
|
350
|
-
};
|
|
351
|
-
const response = await client.chat.completions.create(syncParams);
|
|
352
|
-
const content = response.choices[0]?.message?.content ?? "";
|
|
353
|
-
if (!content)
|
|
354
|
-
throw new Error("LLM returned empty response");
|
|
355
|
-
onTextDelta?.(content);
|
|
356
|
-
return {
|
|
357
|
-
content,
|
|
358
|
-
usage: {
|
|
359
|
-
promptTokens: response.usage?.prompt_tokens ?? 0,
|
|
360
|
-
completionTokens: response.usage?.completion_tokens ?? 0,
|
|
361
|
-
totalTokens: response.usage?.total_tokens ?? 0,
|
|
362
|
-
},
|
|
363
|
-
};
|
|
498
|
+
// === pi-ai Unified Implementation ===
|
|
499
|
+
/**
|
|
500
|
+
* Build a pi-ai Model<Api> for a specific per-call model name.
|
|
501
|
+
* The base template comes from client._piModel (created in createLLMClient);
|
|
502
|
+
* we override .id / .name when the caller passes a different model string
|
|
503
|
+
* (e.g. agent overrides).
|
|
504
|
+
*/
|
|
505
|
+
function resolvePiModel(client, model) {
|
|
506
|
+
const base = client._piModel;
|
|
507
|
+
if (base.id === model)
|
|
508
|
+
return base;
|
|
509
|
+
return { ...base, id: model, name: model };
|
|
364
510
|
}
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
const
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
}));
|
|
375
|
-
const stream = await client.chat.completions.create({
|
|
376
|
-
model,
|
|
377
|
-
messages: openaiMessages,
|
|
378
|
-
tools: openaiTools,
|
|
379
|
-
temperature: options.temperature,
|
|
380
|
-
max_tokens: options.maxTokens,
|
|
381
|
-
stream: true,
|
|
382
|
-
});
|
|
383
|
-
let content = "";
|
|
384
|
-
const toolCallMap = new Map();
|
|
385
|
-
for await (const chunk of stream) {
|
|
386
|
-
const delta = chunk.choices[0]?.delta;
|
|
387
|
-
if (delta?.content)
|
|
388
|
-
content += delta.content;
|
|
389
|
-
if (delta?.tool_calls) {
|
|
390
|
-
for (const tc of delta.tool_calls) {
|
|
391
|
-
const existing = toolCallMap.get(tc.index);
|
|
392
|
-
if (existing) {
|
|
393
|
-
existing.arguments += tc.function?.arguments ?? "";
|
|
394
|
-
}
|
|
395
|
-
else {
|
|
396
|
-
toolCallMap.set(tc.index, {
|
|
397
|
-
id: tc.id ?? "",
|
|
398
|
-
name: tc.function?.name ?? "",
|
|
399
|
-
arguments: tc.function?.arguments ?? "",
|
|
400
|
-
});
|
|
401
|
-
}
|
|
402
|
-
}
|
|
511
|
+
/** Convert inkos LLMMessage[] to pi-ai Context. */
|
|
512
|
+
function toPiContext(messages) {
|
|
513
|
+
const systemParts = messages.filter((m) => m.role === "system").map((m) => m.content);
|
|
514
|
+
const systemPrompt = systemParts.length > 0 ? systemParts.join("\n\n") : undefined;
|
|
515
|
+
const piMessages = messages
|
|
516
|
+
.filter((m) => m.role !== "system")
|
|
517
|
+
.map((m) => {
|
|
518
|
+
if (m.role === "user") {
|
|
519
|
+
return { role: "user", content: m.content, timestamp: Date.now() };
|
|
403
520
|
}
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
521
|
+
// assistant
|
|
522
|
+
return {
|
|
523
|
+
role: "assistant",
|
|
524
|
+
content: [{ type: "text", text: m.content }],
|
|
525
|
+
api: "openai-completions",
|
|
526
|
+
provider: "openai",
|
|
527
|
+
model: "",
|
|
528
|
+
usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
|
|
529
|
+
stopReason: "stop",
|
|
530
|
+
timestamp: Date.now(),
|
|
531
|
+
};
|
|
532
|
+
});
|
|
533
|
+
return { systemPrompt, messages: piMessages };
|
|
407
534
|
}
|
|
408
|
-
|
|
409
|
-
|
|
535
|
+
/** Convert inkos AgentMessage[] to pi-ai Context (with tool calls/results). */
|
|
536
|
+
function agentMessagesToPiContext(messages) {
|
|
537
|
+
const systemParts = messages.filter((m) => m.role === "system").map((m) => m.content);
|
|
538
|
+
const systemPrompt = systemParts.length > 0 ? systemParts.join("\n\n") : undefined;
|
|
539
|
+
const piMessages = [];
|
|
410
540
|
for (const msg of messages) {
|
|
411
|
-
if (msg.role === "system")
|
|
412
|
-
result.push({ role: "system", content: msg.content });
|
|
541
|
+
if (msg.role === "system")
|
|
413
542
|
continue;
|
|
414
|
-
}
|
|
415
543
|
if (msg.role === "user") {
|
|
416
|
-
|
|
544
|
+
piMessages.push({ role: "user", content: msg.content, timestamp: Date.now() });
|
|
417
545
|
continue;
|
|
418
546
|
}
|
|
419
547
|
if (msg.role === "assistant") {
|
|
420
|
-
const
|
|
421
|
-
|
|
422
|
-
content: msg.content
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
548
|
+
const content = [];
|
|
549
|
+
if (msg.content)
|
|
550
|
+
content.push({ type: "text", text: msg.content });
|
|
551
|
+
if (msg.toolCalls) {
|
|
552
|
+
for (const tc of msg.toolCalls) {
|
|
553
|
+
content.push({
|
|
554
|
+
type: "toolCall",
|
|
555
|
+
id: tc.id,
|
|
556
|
+
name: tc.name,
|
|
557
|
+
arguments: JSON.parse(tc.arguments),
|
|
558
|
+
});
|
|
559
|
+
}
|
|
430
560
|
}
|
|
431
|
-
|
|
561
|
+
if (content.length === 0)
|
|
562
|
+
content.push({ type: "text", text: "" });
|
|
563
|
+
piMessages.push({
|
|
564
|
+
role: "assistant",
|
|
565
|
+
content,
|
|
566
|
+
api: "openai-completions",
|
|
567
|
+
provider: "openai",
|
|
568
|
+
model: "",
|
|
569
|
+
usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
|
|
570
|
+
stopReason: "stop",
|
|
571
|
+
timestamp: Date.now(),
|
|
572
|
+
});
|
|
432
573
|
continue;
|
|
433
574
|
}
|
|
434
575
|
if (msg.role === "tool") {
|
|
435
|
-
|
|
436
|
-
role: "
|
|
437
|
-
|
|
438
|
-
|
|
576
|
+
piMessages.push({
|
|
577
|
+
role: "toolResult",
|
|
578
|
+
toolCallId: msg.toolCallId,
|
|
579
|
+
toolName: "",
|
|
580
|
+
content: [{ type: "text", text: msg.content }],
|
|
581
|
+
isError: false,
|
|
582
|
+
timestamp: Date.now(),
|
|
439
583
|
});
|
|
440
584
|
}
|
|
441
585
|
}
|
|
442
|
-
return
|
|
443
|
-
}
|
|
444
|
-
// === OpenAI Responses API Implementation (optional) ===
|
|
445
|
-
async function chatCompletionOpenAIResponses(client, model, messages, options, webSearch, onStreamProgress, onTextDelta) {
|
|
446
|
-
const input = messages.map((m) => ({
|
|
447
|
-
role: m.role,
|
|
448
|
-
content: m.content,
|
|
449
|
-
}));
|
|
450
|
-
const tools = webSearch
|
|
451
|
-
? [{ type: "web_search_preview" }]
|
|
452
|
-
: undefined;
|
|
453
|
-
const stream = await client.responses.create({
|
|
454
|
-
model,
|
|
455
|
-
input,
|
|
456
|
-
temperature: options.temperature,
|
|
457
|
-
max_output_tokens: options.maxTokens,
|
|
458
|
-
stream: true,
|
|
459
|
-
...(tools ? { tools } : {}),
|
|
460
|
-
});
|
|
461
|
-
const chunks = [];
|
|
462
|
-
let inputTokens = 0;
|
|
463
|
-
let outputTokens = 0;
|
|
464
|
-
const monitor = createStreamMonitor(onStreamProgress);
|
|
465
|
-
try {
|
|
466
|
-
for await (const event of stream) {
|
|
467
|
-
if (event.type === "response.output_text.delta") {
|
|
468
|
-
chunks.push(event.delta);
|
|
469
|
-
monitor.onChunk(event.delta);
|
|
470
|
-
onTextDelta?.(event.delta);
|
|
471
|
-
}
|
|
472
|
-
if (event.type === "response.completed") {
|
|
473
|
-
inputTokens = event.response.usage?.input_tokens ?? 0;
|
|
474
|
-
outputTokens = event.response.usage?.output_tokens ?? 0;
|
|
475
|
-
}
|
|
476
|
-
}
|
|
477
|
-
}
|
|
478
|
-
catch (streamError) {
|
|
479
|
-
monitor.stop();
|
|
480
|
-
const partial = chunks.join("");
|
|
481
|
-
if (partial.length >= MIN_SALVAGEABLE_CHARS) {
|
|
482
|
-
throw new PartialResponseError(partial, streamError);
|
|
483
|
-
}
|
|
484
|
-
throw streamError;
|
|
485
|
-
}
|
|
486
|
-
finally {
|
|
487
|
-
monitor.stop();
|
|
488
|
-
}
|
|
489
|
-
const content = chunks.join("");
|
|
490
|
-
if (!content)
|
|
491
|
-
throw new Error("LLM returned empty response from stream");
|
|
492
|
-
return {
|
|
493
|
-
content,
|
|
494
|
-
usage: {
|
|
495
|
-
promptTokens: inputTokens,
|
|
496
|
-
completionTokens: outputTokens,
|
|
497
|
-
totalTokens: inputTokens + outputTokens,
|
|
498
|
-
},
|
|
499
|
-
};
|
|
500
|
-
}
|
|
501
|
-
async function chatCompletionOpenAIResponsesSync(client, model, messages, options, _webSearch, onTextDelta) {
|
|
502
|
-
const input = messages.map((m) => ({
|
|
503
|
-
role: m.role,
|
|
504
|
-
content: m.content,
|
|
505
|
-
}));
|
|
506
|
-
const response = await client.responses.create({
|
|
507
|
-
model,
|
|
508
|
-
input,
|
|
509
|
-
temperature: options.temperature,
|
|
510
|
-
max_output_tokens: options.maxTokens,
|
|
511
|
-
stream: false,
|
|
512
|
-
});
|
|
513
|
-
const content = response.output
|
|
514
|
-
.filter((item) => item.type === "message")
|
|
515
|
-
.flatMap((item) => item.content)
|
|
516
|
-
.filter((block) => block.type === "output_text")
|
|
517
|
-
.map((block) => block.text)
|
|
518
|
-
.join("");
|
|
519
|
-
if (!content)
|
|
520
|
-
throw new Error("LLM returned empty response");
|
|
521
|
-
onTextDelta?.(content);
|
|
522
|
-
return {
|
|
523
|
-
content,
|
|
524
|
-
usage: {
|
|
525
|
-
promptTokens: response.usage?.input_tokens ?? 0,
|
|
526
|
-
completionTokens: response.usage?.output_tokens ?? 0,
|
|
527
|
-
totalTokens: (response.usage?.input_tokens ?? 0) + (response.usage?.output_tokens ?? 0),
|
|
528
|
-
},
|
|
529
|
-
};
|
|
586
|
+
return { systemPrompt, messages: piMessages };
|
|
530
587
|
}
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
type: "function",
|
|
588
|
+
/** Convert inkos ToolDefinition[] to pi-ai Tool[]. */
|
|
589
|
+
function toPiTools(tools) {
|
|
590
|
+
return tools.map((t) => ({
|
|
535
591
|
name: t.name,
|
|
536
592
|
description: t.description,
|
|
537
593
|
parameters: t.parameters,
|
|
538
|
-
strict: false,
|
|
539
594
|
}));
|
|
540
|
-
const stream = await client.responses.create({
|
|
541
|
-
model,
|
|
542
|
-
input,
|
|
543
|
-
tools: responsesTools,
|
|
544
|
-
temperature: options.temperature,
|
|
545
|
-
max_output_tokens: options.maxTokens,
|
|
546
|
-
stream: true,
|
|
547
|
-
});
|
|
548
|
-
let content = "";
|
|
549
|
-
const toolCalls = [];
|
|
550
|
-
for await (const event of stream) {
|
|
551
|
-
if (event.type === "response.output_text.delta") {
|
|
552
|
-
content += event.delta;
|
|
553
|
-
}
|
|
554
|
-
if (event.type === "response.output_item.done" && event.item.type === "function_call") {
|
|
555
|
-
toolCalls.push({
|
|
556
|
-
id: event.item.call_id,
|
|
557
|
-
name: event.item.name,
|
|
558
|
-
arguments: event.item.arguments,
|
|
559
|
-
});
|
|
560
|
-
}
|
|
561
|
-
}
|
|
562
|
-
return { content, toolCalls };
|
|
563
595
|
}
|
|
564
|
-
function
|
|
565
|
-
const
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
if (
|
|
576
|
-
|
|
577
|
-
result.push({ role: "assistant", content: msg.content });
|
|
578
|
-
}
|
|
579
|
-
if (msg.toolCalls) {
|
|
580
|
-
for (const tc of msg.toolCalls) {
|
|
581
|
-
result.push({
|
|
582
|
-
type: "function_call",
|
|
583
|
-
call_id: tc.id,
|
|
584
|
-
name: tc.name,
|
|
585
|
-
arguments: tc.arguments,
|
|
586
|
-
});
|
|
587
|
-
}
|
|
588
|
-
}
|
|
589
|
-
continue;
|
|
596
|
+
async function chatCompletionViaPiAi(client, model, messages, resolved, onStreamProgress, onTextDelta) {
|
|
597
|
+
const piModel = resolvePiModel(client, model);
|
|
598
|
+
const context = toPiContext(messages);
|
|
599
|
+
const streamOpts = {
|
|
600
|
+
temperature: resolved.temperature,
|
|
601
|
+
maxTokens: resolved.maxTokens,
|
|
602
|
+
apiKey: client._apiKey,
|
|
603
|
+
headers: piModel.headers,
|
|
604
|
+
};
|
|
605
|
+
if (!client.stream) {
|
|
606
|
+
const response = await piCompleteSimple(piModel, context, streamOpts);
|
|
607
|
+
if (response.stopReason === "error" && response.errorMessage) {
|
|
608
|
+
throw new Error(response.errorMessage);
|
|
590
609
|
}
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
}
|
|
610
|
+
const content = response.content
|
|
611
|
+
.filter((block) => block.type === "text")
|
|
612
|
+
.map((block) => block.text)
|
|
613
|
+
.join("");
|
|
614
|
+
if (!content) {
|
|
615
|
+
const diag = `usage=${response.usage.input}+${response.usage.output}`;
|
|
616
|
+
console.warn(`[inkos] LLM 非流式响应无文本内容 (${diag})`);
|
|
617
|
+
throw new Error(`LLM returned empty response (${diag})`);
|
|
597
618
|
}
|
|
619
|
+
return {
|
|
620
|
+
content,
|
|
621
|
+
usage: {
|
|
622
|
+
promptTokens: response.usage.input,
|
|
623
|
+
completionTokens: response.usage.output,
|
|
624
|
+
totalTokens: response.usage.totalTokens,
|
|
625
|
+
},
|
|
626
|
+
};
|
|
598
627
|
}
|
|
599
|
-
|
|
600
|
-
}
|
|
601
|
-
// === Anthropic Implementation ===
|
|
602
|
-
async function chatCompletionAnthropic(client, model, messages, options, thinkingBudget = 0, onStreamProgress, onTextDelta) {
|
|
603
|
-
const systemText = messages
|
|
604
|
-
.filter((m) => m.role === "system")
|
|
605
|
-
.map((m) => m.content)
|
|
606
|
-
.join("\n\n");
|
|
607
|
-
const nonSystem = messages.filter((m) => m.role !== "system");
|
|
608
|
-
const stream = await client.messages.create({
|
|
609
|
-
model,
|
|
610
|
-
...(systemText ? { system: systemText } : {}),
|
|
611
|
-
messages: nonSystem.map((m) => ({
|
|
612
|
-
role: m.role,
|
|
613
|
-
content: m.content,
|
|
614
|
-
})),
|
|
615
|
-
...(thinkingBudget > 0
|
|
616
|
-
? { thinking: { type: "enabled", budget_tokens: thinkingBudget } }
|
|
617
|
-
: { temperature: options.temperature }),
|
|
618
|
-
max_tokens: options.maxTokens,
|
|
619
|
-
stream: true,
|
|
620
|
-
});
|
|
628
|
+
const eventStream = piStreamSimple(piModel, context, streamOpts);
|
|
621
629
|
const chunks = [];
|
|
630
|
+
const monitor = createStreamMonitor(onStreamProgress);
|
|
622
631
|
let inputTokens = 0;
|
|
623
632
|
let outputTokens = 0;
|
|
624
|
-
const monitor = createStreamMonitor(onStreamProgress);
|
|
625
633
|
try {
|
|
626
|
-
for await (const event of
|
|
627
|
-
if (event.type === "
|
|
628
|
-
chunks.push(event.delta
|
|
629
|
-
monitor.onChunk(event.delta
|
|
630
|
-
onTextDelta?.(event.delta
|
|
631
|
-
}
|
|
632
|
-
if (event.type === "message_start") {
|
|
633
|
-
inputTokens = event.message.usage?.input_tokens ?? 0;
|
|
634
|
+
for await (const event of eventStream) {
|
|
635
|
+
if (event.type === "text_delta") {
|
|
636
|
+
chunks.push(event.delta);
|
|
637
|
+
monitor.onChunk(event.delta);
|
|
638
|
+
onTextDelta?.(event.delta);
|
|
634
639
|
}
|
|
635
|
-
if (event.type === "
|
|
636
|
-
|
|
640
|
+
if (event.type === "done" || event.type === "error") {
|
|
641
|
+
const msg = event.type === "done" ? event.message : event.error;
|
|
642
|
+
inputTokens = msg.usage.input;
|
|
643
|
+
outputTokens = msg.usage.output;
|
|
644
|
+
if (event.type === "error" && msg.errorMessage) {
|
|
645
|
+
const partial = chunks.join("");
|
|
646
|
+
if (partial.length >= MIN_SALVAGEABLE_CHARS) {
|
|
647
|
+
throw new PartialResponseError(partial, new Error(msg.errorMessage));
|
|
648
|
+
}
|
|
649
|
+
throw new Error(msg.errorMessage);
|
|
650
|
+
}
|
|
637
651
|
}
|
|
638
652
|
}
|
|
639
653
|
}
|
|
640
654
|
catch (streamError) {
|
|
641
655
|
monitor.stop();
|
|
656
|
+
if (streamError instanceof PartialResponseError)
|
|
657
|
+
throw streamError;
|
|
642
658
|
const partial = chunks.join("");
|
|
643
659
|
if (partial.length >= MIN_SALVAGEABLE_CHARS) {
|
|
644
660
|
throw new PartialResponseError(partial, streamError);
|
|
@@ -649,8 +665,11 @@ async function chatCompletionAnthropic(client, model, messages, options, thinkin
|
|
|
649
665
|
monitor.stop();
|
|
650
666
|
}
|
|
651
667
|
const content = chunks.join("");
|
|
652
|
-
if (!content)
|
|
653
|
-
|
|
668
|
+
if (!content) {
|
|
669
|
+
const diag = `usage=${inputTokens}+${outputTokens}`;
|
|
670
|
+
console.warn(`[inkos] LLM 流式响应无文本内容 (${diag})`);
|
|
671
|
+
throw new Error(`LLM returned empty response from stream (${diag})`);
|
|
672
|
+
}
|
|
654
673
|
return {
|
|
655
674
|
content,
|
|
656
675
|
usage: {
|
|
@@ -660,139 +679,52 @@ async function chatCompletionAnthropic(client, model, messages, options, thinkin
|
|
|
660
679
|
},
|
|
661
680
|
};
|
|
662
681
|
}
|
|
663
|
-
async function
|
|
664
|
-
const
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
messages: nonSystem.map((m) => ({
|
|
673
|
-
role: m.role,
|
|
674
|
-
content: m.content,
|
|
675
|
-
})),
|
|
676
|
-
...(thinkingBudget > 0
|
|
677
|
-
? { thinking: { type: "enabled", budget_tokens: thinkingBudget } }
|
|
678
|
-
: { temperature: options.temperature }),
|
|
679
|
-
max_tokens: options.maxTokens,
|
|
680
|
-
});
|
|
681
|
-
const content = response.content
|
|
682
|
-
.filter((block) => block.type === "text")
|
|
683
|
-
.map((block) => block.text)
|
|
684
|
-
.join("");
|
|
685
|
-
if (!content)
|
|
686
|
-
throw new Error("LLM returned empty response");
|
|
687
|
-
onTextDelta?.(content);
|
|
688
|
-
return {
|
|
689
|
-
content,
|
|
690
|
-
usage: {
|
|
691
|
-
promptTokens: response.usage?.input_tokens ?? 0,
|
|
692
|
-
completionTokens: response.usage?.output_tokens ?? 0,
|
|
693
|
-
totalTokens: (response.usage?.input_tokens ?? 0) + (response.usage?.output_tokens ?? 0),
|
|
694
|
-
},
|
|
682
|
+
async function chatWithToolsViaPiAi(client, model, messages, tools, resolved) {
|
|
683
|
+
const piModel = resolvePiModel(client, model);
|
|
684
|
+
const context = agentMessagesToPiContext(messages);
|
|
685
|
+
context.tools = toPiTools(tools);
|
|
686
|
+
const streamOpts = {
|
|
687
|
+
temperature: resolved.temperature,
|
|
688
|
+
maxTokens: resolved.maxTokens,
|
|
689
|
+
apiKey: client._apiKey,
|
|
690
|
+
headers: piModel.headers,
|
|
695
691
|
};
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
.
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
? { thinking: { type: "enabled", budget_tokens: thinkingBudget } }
|
|
716
|
-
: { temperature: options.temperature }),
|
|
717
|
-
max_tokens: options.maxTokens,
|
|
718
|
-
stream: true,
|
|
719
|
-
});
|
|
692
|
+
if (!client.stream) {
|
|
693
|
+
const response = await piComplete(piModel, context, streamOpts);
|
|
694
|
+
if (response.stopReason === "error" && response.errorMessage) {
|
|
695
|
+
throw new Error(response.errorMessage);
|
|
696
|
+
}
|
|
697
|
+
const content = response.content
|
|
698
|
+
.filter((block) => block.type === "text")
|
|
699
|
+
.map((block) => block.text)
|
|
700
|
+
.join("");
|
|
701
|
+
const toolCalls = response.content
|
|
702
|
+
.filter((block) => block.type === "toolCall")
|
|
703
|
+
.map((block) => ({
|
|
704
|
+
id: block.id,
|
|
705
|
+
name: block.name,
|
|
706
|
+
arguments: JSON.stringify(block.arguments),
|
|
707
|
+
}));
|
|
708
|
+
return { content, toolCalls };
|
|
709
|
+
}
|
|
710
|
+
const eventStream = piStream(piModel, context, streamOpts);
|
|
720
711
|
let content = "";
|
|
721
712
|
const toolCalls = [];
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
currentBlock = {
|
|
726
|
-
id: event.content_block.id,
|
|
727
|
-
name: event.content_block.name,
|
|
728
|
-
input: "",
|
|
729
|
-
};
|
|
730
|
-
}
|
|
731
|
-
if (event.type === "content_block_delta") {
|
|
732
|
-
if (event.delta.type === "text_delta") {
|
|
733
|
-
content += event.delta.text;
|
|
734
|
-
}
|
|
735
|
-
if (event.delta.type === "input_json_delta" && currentBlock) {
|
|
736
|
-
currentBlock.input += event.delta.partial_json;
|
|
737
|
-
}
|
|
713
|
+
for await (const event of eventStream) {
|
|
714
|
+
if (event.type === "text_delta") {
|
|
715
|
+
content += event.delta;
|
|
738
716
|
}
|
|
739
|
-
if (event.type === "
|
|
717
|
+
if (event.type === "toolcall_end") {
|
|
740
718
|
toolCalls.push({
|
|
741
|
-
id:
|
|
742
|
-
name:
|
|
743
|
-
arguments:
|
|
719
|
+
id: event.toolCall.id,
|
|
720
|
+
name: event.toolCall.name,
|
|
721
|
+
arguments: JSON.stringify(event.toolCall.arguments),
|
|
744
722
|
});
|
|
745
|
-
currentBlock = null;
|
|
746
723
|
}
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
}
|
|
750
|
-
function agentMessagesToAnthropic(messages) {
|
|
751
|
-
const result = [];
|
|
752
|
-
for (const msg of messages) {
|
|
753
|
-
if (msg.role === "system")
|
|
754
|
-
continue;
|
|
755
|
-
if (msg.role === "user") {
|
|
756
|
-
result.push({ role: "user", content: msg.content });
|
|
757
|
-
continue;
|
|
758
|
-
}
|
|
759
|
-
if (msg.role === "assistant") {
|
|
760
|
-
const blocks = [];
|
|
761
|
-
if (msg.content) {
|
|
762
|
-
blocks.push({ type: "text", text: msg.content });
|
|
763
|
-
}
|
|
764
|
-
if (msg.toolCalls) {
|
|
765
|
-
for (const tc of msg.toolCalls) {
|
|
766
|
-
blocks.push({
|
|
767
|
-
type: "tool_use",
|
|
768
|
-
id: tc.id,
|
|
769
|
-
name: tc.name,
|
|
770
|
-
input: JSON.parse(tc.arguments),
|
|
771
|
-
});
|
|
772
|
-
}
|
|
773
|
-
}
|
|
774
|
-
if (blocks.length === 0) {
|
|
775
|
-
blocks.push({ type: "text", text: "" });
|
|
776
|
-
}
|
|
777
|
-
result.push({ role: "assistant", content: blocks });
|
|
778
|
-
continue;
|
|
779
|
-
}
|
|
780
|
-
if (msg.role === "tool") {
|
|
781
|
-
const toolResult = {
|
|
782
|
-
type: "tool_result",
|
|
783
|
-
tool_use_id: msg.toolCallId,
|
|
784
|
-
content: msg.content,
|
|
785
|
-
};
|
|
786
|
-
// Merge consecutive tool results into one user message (Anthropic requires alternating roles)
|
|
787
|
-
const prev = result[result.length - 1];
|
|
788
|
-
if (prev && prev.role === "user" && Array.isArray(prev.content)) {
|
|
789
|
-
prev.content.push(toolResult);
|
|
790
|
-
}
|
|
791
|
-
else {
|
|
792
|
-
result.push({ role: "user", content: [toolResult] });
|
|
793
|
-
}
|
|
724
|
+
if (event.type === "error" && event.error.errorMessage) {
|
|
725
|
+
throw new Error(event.error.errorMessage);
|
|
794
726
|
}
|
|
795
727
|
}
|
|
796
|
-
return
|
|
728
|
+
return { content, toolCalls };
|
|
797
729
|
}
|
|
798
730
|
//# sourceMappingURL=provider.js.map
|