@geminilight/mindos 0.5.20 → 0.5.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/app/app/api/ask/route.ts +308 -172
- package/app/components/SettingsModal.tsx +52 -58
- package/app/components/settings/AiTab.tsx +4 -25
- package/app/components/settings/AppearanceTab.tsx +31 -13
- package/app/components/settings/KnowledgeTab.tsx +13 -28
- package/app/components/settings/McpAgentInstall.tsx +227 -0
- package/app/components/settings/McpServerStatus.tsx +172 -0
- package/app/components/settings/McpSkillsSection.tsx +583 -0
- package/app/components/settings/McpTab.tsx +17 -959
- package/app/components/settings/PluginsTab.tsx +4 -27
- package/app/components/settings/Primitives.tsx +69 -0
- package/app/components/settings/ShortcutsTab.tsx +2 -4
- package/app/components/settings/SyncTab.tsx +8 -24
- package/app/components/settings/types.ts +116 -2
- package/app/lib/agent/context.ts +151 -87
- package/app/lib/agent/index.ts +4 -3
- package/app/lib/agent/model.ts +76 -10
- package/app/lib/agent/stream-consumer.ts +73 -77
- package/app/lib/agent/to-agent-messages.ts +106 -0
- package/app/lib/agent/tools.ts +260 -266
- package/app/lib/i18n-en.ts +480 -0
- package/app/lib/i18n-zh.ts +505 -0
- package/app/lib/i18n.ts +4 -963
- package/app/next-env.d.ts +1 -1
- package/app/package-lock.json +3258 -3093
- package/app/package.json +6 -3
- package/bin/cli.js +7 -4
- package/package.json +4 -1
package/app/lib/agent/context.ts
CHANGED
|
@@ -2,31 +2,35 @@
|
|
|
2
2
|
* Phase 3: Context management — token estimation, compaction, tool output truncation.
|
|
3
3
|
*
|
|
4
4
|
* All operations are request-scoped (no persistence to frontend session).
|
|
5
|
+
* Uses pi-ai types (AgentMessage from pi-agent-core, complete from pi-ai).
|
|
5
6
|
*/
|
|
6
|
-
import {
|
|
7
|
-
import type {
|
|
7
|
+
import { complete, type Model } from '@mariozechner/pi-ai';
|
|
8
|
+
import type { AgentMessage } from '@mariozechner/pi-agent-core';
|
|
9
|
+
import type { ToolResultMessage, AssistantMessage, UserMessage } from '@mariozechner/pi-ai';
|
|
8
10
|
|
|
9
11
|
// ---------------------------------------------------------------------------
|
|
10
12
|
// Token estimation (1 token ≈ 4 chars)
|
|
11
13
|
// ---------------------------------------------------------------------------
|
|
12
14
|
|
|
13
|
-
/** Rough token count for a single
|
|
14
|
-
function messageTokens(msg:
|
|
15
|
-
if (
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
15
|
+
/** Rough token count for a single AgentMessage */
|
|
16
|
+
function messageTokens(msg: AgentMessage): number {
|
|
17
|
+
if ('content' in msg) {
|
|
18
|
+
const content = (msg as any).content;
|
|
19
|
+
if (typeof content === 'string') return Math.ceil(content.length / 4);
|
|
20
|
+
if (Array.isArray(content)) {
|
|
21
|
+
let chars = 0;
|
|
22
|
+
for (const part of content) {
|
|
23
|
+
if ('text' in part && typeof part.text === 'string') chars += part.text.length;
|
|
24
|
+
if ('args' in part) chars += JSON.stringify(part.args).length;
|
|
25
|
+
}
|
|
26
|
+
return Math.ceil(chars / 4);
|
|
22
27
|
}
|
|
23
|
-
return Math.ceil(chars / 4);
|
|
24
28
|
}
|
|
25
29
|
return 0;
|
|
26
30
|
}
|
|
27
31
|
|
|
28
32
|
/** Estimate total tokens for a message array */
|
|
29
|
-
export function estimateTokens(messages:
|
|
33
|
+
export function estimateTokens(messages: AgentMessage[]): number {
|
|
30
34
|
let total = 0;
|
|
31
35
|
for (const m of messages) total += messageTokens(m);
|
|
32
36
|
return total;
|
|
@@ -64,7 +68,7 @@ export function getContextLimit(model: string): number {
|
|
|
64
68
|
|
|
65
69
|
/** Check if messages + system prompt exceed threshold of context limit */
|
|
66
70
|
export function needsCompact(
|
|
67
|
-
messages:
|
|
71
|
+
messages: AgentMessage[],
|
|
68
72
|
systemPrompt: string,
|
|
69
73
|
model: string,
|
|
70
74
|
threshold = 0.7,
|
|
@@ -102,38 +106,35 @@ const TOOL_OUTPUT_LIMITS: Record<string, number> = {
|
|
|
102
106
|
|
|
103
107
|
/**
|
|
104
108
|
* Truncate tool outputs in historical messages to save tokens.
|
|
105
|
-
* Only truncates non-last
|
|
109
|
+
* Only truncates non-last toolResult messages (the last one is kept intact
|
|
106
110
|
* because the model may need its full output for the current step).
|
|
107
111
|
*/
|
|
108
|
-
export function truncateToolOutputs(messages:
|
|
109
|
-
// Find the index of the last '
|
|
112
|
+
export function truncateToolOutputs(messages: AgentMessage[]): AgentMessage[] {
|
|
113
|
+
// Find the index of the last 'toolResult' role message
|
|
110
114
|
let lastToolIdx = -1;
|
|
111
115
|
for (let i = messages.length - 1; i >= 0; i--) {
|
|
112
|
-
if (messages[i].role === '
|
|
116
|
+
if ((messages[i] as any).role === 'toolResult') { lastToolIdx = i; break; }
|
|
113
117
|
}
|
|
114
118
|
|
|
115
119
|
return messages.map((msg, idx) => {
|
|
116
|
-
|
|
120
|
+
const m = msg as any;
|
|
121
|
+
if (m.role !== 'toolResult' || idx === lastToolIdx) return msg;
|
|
117
122
|
|
|
118
|
-
const toolMsg =
|
|
119
|
-
const
|
|
120
|
-
|
|
121
|
-
const trp = part as ToolResultPart;
|
|
122
|
-
const toolName = trp.toolName ?? '';
|
|
123
|
-
const limit = TOOL_OUTPUT_LIMITS[toolName] ?? 500;
|
|
124
|
-
if (!trp.output || typeof trp.output !== 'object' || trp.output.type !== 'text') return part;
|
|
125
|
-
if (trp.output.value.length <= limit) return part;
|
|
123
|
+
const toolMsg = m as ToolResultMessage;
|
|
124
|
+
const toolName = toolMsg.toolName ?? '';
|
|
125
|
+
const limit = TOOL_OUTPUT_LIMITS[toolName] ?? 500;
|
|
126
126
|
|
|
127
|
+
// Truncate text content in toolResult
|
|
128
|
+
const truncatedContent = toolMsg.content.map(part => {
|
|
129
|
+
if (part.type !== 'text') return part;
|
|
130
|
+
if (part.text.length <= limit) return part;
|
|
127
131
|
return {
|
|
128
|
-
...
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
value: trp.output.value.slice(0, limit) + `\n[...truncated from ${trp.output.value.length} chars]`,
|
|
132
|
-
},
|
|
133
|
-
} satisfies ToolResultPart;
|
|
132
|
+
...part,
|
|
133
|
+
text: part.text.slice(0, limit) + `\n[...truncated from ${part.text.length} chars]`,
|
|
134
|
+
};
|
|
134
135
|
});
|
|
135
136
|
|
|
136
|
-
return { ...toolMsg, content: truncatedContent }
|
|
137
|
+
return { ...toolMsg, content: truncatedContent } as AgentMessage;
|
|
137
138
|
});
|
|
138
139
|
}
|
|
139
140
|
|
|
@@ -149,23 +150,21 @@ const COMPACT_PROMPT = `Summarize the key points, decisions, and file operations
|
|
|
149
150
|
|
|
150
151
|
Be concise and factual. Output only the summary, no preamble.`;
|
|
151
152
|
|
|
152
|
-
/** Extract a short text representation from
|
|
153
|
-
function messageToText(m:
|
|
154
|
-
const
|
|
153
|
+
/** Extract a short text representation from an AgentMessage for summarization */
|
|
154
|
+
function messageToText(m: AgentMessage): string {
|
|
155
|
+
const msg = m as any;
|
|
156
|
+
const role = msg.role;
|
|
155
157
|
let content = '';
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
158
|
+
|
|
159
|
+
if (typeof msg.content === 'string') {
|
|
160
|
+
content = msg.content;
|
|
161
|
+
} else if (Array.isArray(msg.content)) {
|
|
159
162
|
const pieces: string[] = [];
|
|
160
|
-
for (const part of
|
|
161
|
-
if ('text'
|
|
162
|
-
pieces.push(
|
|
163
|
-
} else if (part.type === '
|
|
164
|
-
pieces.push(`[Tool: ${
|
|
165
|
-
} else if (part.type === 'tool-result' && 'output' in part) {
|
|
166
|
-
const trp = part as ToolResultPart;
|
|
167
|
-
const val = trp.output && typeof trp.output === 'object' && trp.output.type === 'text' ? trp.output.value : '';
|
|
168
|
-
pieces.push(`[Result: ${val.slice(0, 200)}]`);
|
|
163
|
+
for (const part of msg.content) {
|
|
164
|
+
if (part.type === 'text' && typeof part.text === 'string') {
|
|
165
|
+
pieces.push(part.text);
|
|
166
|
+
} else if (part.type === 'toolCall' && 'toolName' in part) {
|
|
167
|
+
pieces.push(`[Tool: ${part.toolName}]`);
|
|
169
168
|
}
|
|
170
169
|
}
|
|
171
170
|
content = pieces.filter(Boolean).join(' ');
|
|
@@ -178,27 +177,24 @@ function messageToText(m: ModelMessage): string {
|
|
|
178
177
|
* Returns a new message array with early messages replaced by a summary.
|
|
179
178
|
* Only called when needsCompact() returns true.
|
|
180
179
|
*
|
|
181
|
-
*
|
|
182
|
-
* (e.g. haiku) would suffice for summarization and avoid competing for rate
|
|
183
|
-
* limits. Deferred until users report rate-limit issues — compact triggers
|
|
184
|
-
* infrequently (>70% context fill).
|
|
180
|
+
* Uses pi-ai complete() for summarization.
|
|
185
181
|
*/
|
|
186
182
|
export async function compactMessages(
|
|
187
|
-
messages:
|
|
188
|
-
model:
|
|
189
|
-
|
|
183
|
+
messages: AgentMessage[],
|
|
184
|
+
model: Model<any>,
|
|
185
|
+
apiKey: string,
|
|
186
|
+
systemPrompt: string,
|
|
187
|
+
modelName: string,
|
|
188
|
+
): Promise<{ messages: AgentMessage[]; compacted: boolean }> {
|
|
190
189
|
if (messages.length < 6) {
|
|
191
190
|
return { messages, compacted: false };
|
|
192
191
|
}
|
|
193
192
|
|
|
194
193
|
// Keep the last 6 messages intact, summarize the rest.
|
|
195
194
|
// Adjust split point to avoid cutting between an assistant (with tool calls)
|
|
196
|
-
// and its tool result.
|
|
197
|
-
// an assistant at the split point is safe because its tool results follow it.
|
|
198
|
-
// (Orphaned assistants without results can't exist in history: only completed
|
|
199
|
-
// tool calls are persisted by the frontend.)
|
|
195
|
+
// and its tool result.
|
|
200
196
|
let splitIdx = messages.length - 6;
|
|
201
|
-
while (splitIdx > 0 && messages[splitIdx]
|
|
197
|
+
while (splitIdx > 0 && (messages[splitIdx] as any).role === 'toolResult') {
|
|
202
198
|
splitIdx--;
|
|
203
199
|
}
|
|
204
200
|
if (splitIdx < 2) {
|
|
@@ -216,46 +212,61 @@ export async function compactMessages(
|
|
|
216
212
|
}
|
|
217
213
|
|
|
218
214
|
try {
|
|
219
|
-
const
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
215
|
+
const summaryMessage = await complete(model, {
|
|
216
|
+
messages: [{
|
|
217
|
+
role: 'user',
|
|
218
|
+
content: `${COMPACT_PROMPT}\n\n---\n\nConversation to summarize:\n\n${earlyText}`,
|
|
219
|
+
timestamp: Date.now(),
|
|
220
|
+
}],
|
|
221
|
+
}, { apiKey });
|
|
223
222
|
|
|
224
|
-
|
|
223
|
+
const summaryText = summaryMessage.content
|
|
224
|
+
.filter(p => p.type === 'text')
|
|
225
|
+
.map(p => (p as any).text)
|
|
226
|
+
.join('');
|
|
225
227
|
|
|
226
|
-
|
|
228
|
+
console.log(`[ask] Compacted ${earlyMessages.length} early messages into summary (${summaryText.length} chars)`);
|
|
229
|
+
|
|
230
|
+
const summaryContent = `[Summary of earlier conversation]\n\n${summaryText}`;
|
|
227
231
|
|
|
228
232
|
// If first recent message is also 'user', merge summary into it to avoid
|
|
229
233
|
// consecutive user messages (Anthropic rejects user→user sequences).
|
|
230
|
-
if (recentMessages[0]?.role === 'user') {
|
|
231
|
-
const merged = { ...recentMessages[0] };
|
|
234
|
+
if ((recentMessages[0] as any)?.role === 'user') {
|
|
235
|
+
const merged = { ...(recentMessages[0] as any) };
|
|
232
236
|
if (typeof merged.content === 'string') {
|
|
233
|
-
merged.content = `${
|
|
237
|
+
merged.content = `${summaryContent}\n\n---\n\n${merged.content}`;
|
|
234
238
|
} else if (Array.isArray(merged.content)) {
|
|
235
|
-
|
|
236
|
-
merged.content = [{ type: 'text' as const, text: `${summaryText}\n\n---\n\n` }, ...merged.content];
|
|
239
|
+
merged.content = [{ type: 'text' as const, text: `${summaryContent}\n\n---\n\n` }, ...merged.content];
|
|
237
240
|
} else {
|
|
238
|
-
merged.content =
|
|
241
|
+
merged.content = summaryContent;
|
|
239
242
|
}
|
|
240
243
|
return {
|
|
241
|
-
messages: [merged, ...recentMessages.slice(1)],
|
|
244
|
+
messages: [merged as AgentMessage, ...recentMessages.slice(1)],
|
|
242
245
|
compacted: true,
|
|
243
246
|
};
|
|
244
247
|
}
|
|
245
248
|
|
|
246
249
|
// Otherwise prepend as separate user message
|
|
247
|
-
const
|
|
250
|
+
const summaryMsg: UserMessage = {
|
|
248
251
|
role: 'user',
|
|
249
|
-
content:
|
|
252
|
+
content: summaryContent,
|
|
253
|
+
timestamp: Date.now(),
|
|
250
254
|
};
|
|
251
255
|
|
|
252
256
|
return {
|
|
253
|
-
messages: [
|
|
257
|
+
messages: [summaryMsg as AgentMessage, ...recentMessages],
|
|
254
258
|
compacted: true,
|
|
255
259
|
};
|
|
256
260
|
} catch (err) {
|
|
257
|
-
|
|
258
|
-
|
|
261
|
+
// API failure: fall back to hard prune instead of risking context overflow
|
|
262
|
+
console.warn('[ask] Compact failed, applying hard prune as fallback:', err);
|
|
263
|
+
const pruned = hardPrune(messages, systemPrompt, modelName);
|
|
264
|
+
if (pruned.length < messages.length) {
|
|
265
|
+
console.log(`[ask] Hard prune fallback succeeded (${messages.length} → ${pruned.length} messages)`);
|
|
266
|
+
return { messages: pruned, compacted: false };
|
|
267
|
+
}
|
|
268
|
+
// If pruning also can't help, let it bubble up so request fails safely
|
|
269
|
+
throw new Error(`Context compaction failed and pruning insufficient: ${err instanceof Error ? err.message : String(err)}`);
|
|
259
270
|
}
|
|
260
271
|
}
|
|
261
272
|
|
|
@@ -269,10 +280,10 @@ export async function compactMessages(
|
|
|
269
280
|
* (containing tool calls) and its following tool result message.
|
|
270
281
|
*/
|
|
271
282
|
export function hardPrune(
|
|
272
|
-
messages:
|
|
283
|
+
messages: AgentMessage[],
|
|
273
284
|
systemPrompt: string,
|
|
274
285
|
model: string,
|
|
275
|
-
):
|
|
286
|
+
): AgentMessage[] {
|
|
276
287
|
const limit = getContextLimit(model);
|
|
277
288
|
const threshold = limit * 0.9;
|
|
278
289
|
const systemTokens = estimateStringTokens(systemPrompt);
|
|
@@ -288,24 +299,27 @@ export function hardPrune(
|
|
|
288
299
|
}
|
|
289
300
|
|
|
290
301
|
// Ensure we don't cut between an assistant (with tool calls) and its tool result.
|
|
291
|
-
|
|
292
|
-
// or is fully removed.
|
|
293
|
-
while (cutIdx < messages.length - 1 && messages[cutIdx].role === 'tool') {
|
|
302
|
+
while (cutIdx < messages.length - 1 && (messages[cutIdx] as any).role === 'toolResult') {
|
|
294
303
|
total -= messageTokens(messages[cutIdx]);
|
|
295
304
|
cutIdx++;
|
|
296
305
|
}
|
|
297
306
|
|
|
298
307
|
// Ensure first message is 'user' (Anthropic requirement)
|
|
299
|
-
while (cutIdx < messages.length - 1 && messages[cutIdx].role !== 'user') {
|
|
308
|
+
while (cutIdx < messages.length - 1 && (messages[cutIdx] as any).role !== 'user') {
|
|
300
309
|
total -= messageTokens(messages[cutIdx]);
|
|
301
310
|
cutIdx++;
|
|
302
311
|
}
|
|
303
312
|
|
|
304
313
|
// Fallback: if no user message found in remaining messages, inject a synthetic one
|
|
305
314
|
const pruned = cutIdx > 0 ? messages.slice(cutIdx) : messages;
|
|
306
|
-
if (pruned.length > 0 && pruned[0].role !== 'user') {
|
|
315
|
+
if (pruned.length > 0 && (pruned[0] as any).role !== 'user') {
|
|
307
316
|
console.log(`[ask] Hard pruned ${cutIdx} messages, injecting synthetic user message (${messages.length} → ${pruned.length + 1})`);
|
|
308
|
-
|
|
317
|
+
const syntheticUser: UserMessage = {
|
|
318
|
+
role: 'user',
|
|
319
|
+
content: '[Conversation context was pruned due to length. Continuing from here.]',
|
|
320
|
+
timestamp: Date.now(),
|
|
321
|
+
};
|
|
322
|
+
return [syntheticUser as AgentMessage, ...pruned];
|
|
309
323
|
}
|
|
310
324
|
|
|
311
325
|
if (cutIdx > 0) {
|
|
@@ -315,3 +329,53 @@ export function hardPrune(
|
|
|
315
329
|
|
|
316
330
|
return messages;
|
|
317
331
|
}
|
|
332
|
+
|
|
333
|
+
// ---------------------------------------------------------------------------
|
|
334
|
+
// transformContext factory — for Agent's transformContext hook
|
|
335
|
+
// ---------------------------------------------------------------------------
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* Create a transformContext function that captures the model and apiKey via closure.
|
|
339
|
+
* Agent calls this before each LLM call to manage context window.
|
|
340
|
+
*/
|
|
341
|
+
export function createTransformContext(
|
|
342
|
+
systemPrompt: string,
|
|
343
|
+
modelName: string,
|
|
344
|
+
getCompactModel: () => Model<any>,
|
|
345
|
+
apiKey: string,
|
|
346
|
+
contextStrategy: string,
|
|
347
|
+
) {
|
|
348
|
+
return async (messages: AgentMessage[], signal?: AbortSignal): Promise<AgentMessage[]> => {
|
|
349
|
+
// 1. Truncate tool outputs in historical messages
|
|
350
|
+
let result = truncateToolOutputs(messages);
|
|
351
|
+
|
|
352
|
+
const preTokens = estimateTokens(result);
|
|
353
|
+
const sysTokens = estimateStringTokens(systemPrompt);
|
|
354
|
+
const ctxLimit = getContextLimit(modelName);
|
|
355
|
+
console.log(`[ask] Context: ~${preTokens + sysTokens} tokens (messages=${preTokens}, system=${sysTokens}), limit=${ctxLimit}`);
|
|
356
|
+
|
|
357
|
+
// 2. Compact if >70% context limit (skip if user disabled)
|
|
358
|
+
if (contextStrategy === 'auto' && needsCompact(result, systemPrompt, modelName)) {
|
|
359
|
+
console.log('[ask] Context >70% limit, compacting...');
|
|
360
|
+
const compactResult = await compactMessages(
|
|
361
|
+
result,
|
|
362
|
+
getCompactModel(),
|
|
363
|
+
apiKey,
|
|
364
|
+
systemPrompt,
|
|
365
|
+
modelName,
|
|
366
|
+
);
|
|
367
|
+
result = compactResult.messages;
|
|
368
|
+
if (compactResult.compacted) {
|
|
369
|
+
const postTokens = estimateTokens(result);
|
|
370
|
+
console.log(`[ask] After compact: ~${postTokens + sysTokens} tokens`);
|
|
371
|
+
} else {
|
|
372
|
+
console.log('[ask] Compact skipped (too few messages or fallback used), hard prune will handle overflow if needed');
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
// 3. Hard prune if still >90% context limit
|
|
377
|
+
result = hardPrune(result, systemPrompt, modelName);
|
|
378
|
+
|
|
379
|
+
return result;
|
|
380
|
+
};
|
|
381
|
+
}
|
package/app/lib/agent/index.ts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
export {
|
|
2
|
-
export { knowledgeBaseTools,
|
|
1
|
+
export { getModelConfig } from './model';
|
|
2
|
+
export { knowledgeBaseTools, WRITE_TOOLS, truncate } from './tools';
|
|
3
3
|
export { AGENT_SYSTEM_PROMPT } from './prompt';
|
|
4
4
|
export {
|
|
5
5
|
estimateTokens, estimateStringTokens, getContextLimit, needsCompact,
|
|
6
|
-
truncateToolOutputs, compactMessages, hardPrune,
|
|
6
|
+
truncateToolOutputs, compactMessages, hardPrune, createTransformContext,
|
|
7
7
|
} from './context';
|
|
8
|
+
export { toAgentMessages } from './to-agent-messages';
|
package/app/lib/agent/model.ts
CHANGED
|
@@ -1,18 +1,84 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { createOpenAI } from '@ai-sdk/openai';
|
|
1
|
+
import { getModel as piGetModel, type Model } from '@mariozechner/pi-ai';
|
|
3
2
|
import { effectiveAiConfig } from '@/lib/settings';
|
|
4
3
|
|
|
5
|
-
|
|
4
|
+
/**
|
|
5
|
+
* Build a pi-ai Model for the configured provider.
|
|
6
|
+
*
|
|
7
|
+
* - Anthropic: uses getModel() from pi-ai registry directly.
|
|
8
|
+
* - OpenAI: uses getModel() then overrides baseUrl if custom endpoint is configured.
|
|
9
|
+
* Falls back to constructing a Model literal for unknown model IDs.
|
|
10
|
+
* Custom API variant can be specified for non-standard endpoints.
|
|
11
|
+
*
|
|
12
|
+
* Returns { model, modelName, apiKey } — Agent needs model + apiKey via getApiKey hook.
|
|
13
|
+
*/
|
|
14
|
+
export function getModelConfig(): {
|
|
15
|
+
model: Model<any>;
|
|
16
|
+
modelName: string;
|
|
17
|
+
apiKey: string;
|
|
18
|
+
provider: 'anthropic' | 'openai';
|
|
19
|
+
} {
|
|
6
20
|
const cfg = effectiveAiConfig();
|
|
7
21
|
|
|
8
22
|
if (cfg.provider === 'openai') {
|
|
9
|
-
const
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
23
|
+
const modelName = cfg.openaiModel;
|
|
24
|
+
let model: Model<any>;
|
|
25
|
+
let apiVariant: string = 'openai-responses'; // Default to responses API
|
|
26
|
+
|
|
27
|
+
// Allow customization of API variant if using custom endpoint
|
|
28
|
+
// Check if config specifies an alternative API type (for non-standard endpoints)
|
|
29
|
+
const customApiVariant = (cfg as any).openaiApiVariant; // May exist in extended config
|
|
30
|
+
|
|
31
|
+
try {
|
|
32
|
+
model = piGetModel('openai', modelName as any);
|
|
33
|
+
} catch {
|
|
34
|
+
// Model not in pi-ai registry — construct manually for custom/proxy endpoints
|
|
35
|
+
model = {
|
|
36
|
+
id: modelName,
|
|
37
|
+
name: modelName,
|
|
38
|
+
api: (customApiVariant ?? apiVariant) as any,
|
|
39
|
+
provider: 'openai',
|
|
40
|
+
baseUrl: 'https://api.openai.com/v1',
|
|
41
|
+
reasoning: false,
|
|
42
|
+
input: ['text'] as const,
|
|
43
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
44
|
+
contextWindow: 128_000,
|
|
45
|
+
maxTokens: 16_384,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Override baseUrl if user configured a custom endpoint
|
|
50
|
+
if (cfg.openaiBaseUrl) {
|
|
51
|
+
model = { ...model, baseUrl: cfg.openaiBaseUrl };
|
|
52
|
+
// Also allow API variant override for custom endpoints
|
|
53
|
+
if (customApiVariant) {
|
|
54
|
+
model = { ...model, api: customApiVariant };
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return { model, modelName, apiKey: cfg.openaiApiKey, provider: 'openai' };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Anthropic
|
|
62
|
+
const modelName = cfg.anthropicModel;
|
|
63
|
+
let model: Model<any>;
|
|
64
|
+
|
|
65
|
+
try {
|
|
66
|
+
model = piGetModel('anthropic', modelName as any);
|
|
67
|
+
} catch {
|
|
68
|
+
// Unknown Anthropic model — construct manually
|
|
69
|
+
model = {
|
|
70
|
+
id: modelName,
|
|
71
|
+
name: modelName,
|
|
72
|
+
api: 'anthropic-messages' as const,
|
|
73
|
+
provider: 'anthropic',
|
|
74
|
+
baseUrl: 'https://api.anthropic.com',
|
|
75
|
+
reasoning: false,
|
|
76
|
+
input: ['text'] as const,
|
|
77
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
78
|
+
contextWindow: 200_000,
|
|
79
|
+
maxTokens: 8_192,
|
|
80
|
+
};
|
|
14
81
|
}
|
|
15
82
|
|
|
16
|
-
|
|
17
|
-
return anthropic(cfg.anthropicModel);
|
|
83
|
+
return { model, modelName, apiKey: cfg.anthropicApiKey, provider: 'anthropic' };
|
|
18
84
|
}
|