@geminilight/mindos 0.5.20 → 0.5.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,31 +2,35 @@
2
2
  * Phase 3: Context management — token estimation, compaction, tool output truncation.
3
3
  *
4
4
  * All operations are request-scoped (no persistence to frontend session).
5
+ * Uses pi-ai types (AgentMessage from pi-agent-core, complete from pi-ai).
5
6
  */
6
- import { generateText, type ModelMessage, type ToolResultPart, type ToolModelMessage } from 'ai';
7
- import type { LanguageModel } from 'ai';
7
+ import { complete, type Model } from '@mariozechner/pi-ai';
8
+ import type { AgentMessage } from '@mariozechner/pi-agent-core';
9
+ import type { ToolResultMessage, AssistantMessage, UserMessage } from '@mariozechner/pi-ai';
8
10
 
9
11
  // ---------------------------------------------------------------------------
10
12
  // Token estimation (1 token ≈ 4 chars)
11
13
  // ---------------------------------------------------------------------------
12
14
 
13
- /** Rough token count for a single ModelMessage */
14
- function messageTokens(msg: ModelMessage): number {
15
- if (typeof msg.content === 'string') return Math.ceil(msg.content.length / 4);
16
- if (Array.isArray(msg.content)) {
17
- let chars = 0;
18
- for (const part of msg.content) {
19
- if ('text' in part && typeof part.text === 'string') chars += part.text.length;
20
- if ('value' in part && typeof part.value === 'string') chars += part.value.length;
21
- if ('input' in part) chars += JSON.stringify(part.input).length;
15
+ /** Rough token count for a single AgentMessage */
16
+ function messageTokens(msg: AgentMessage): number {
17
+ if ('content' in msg) {
18
+ const content = (msg as any).content;
19
+ if (typeof content === 'string') return Math.ceil(content.length / 4);
20
+ if (Array.isArray(content)) {
21
+ let chars = 0;
22
+ for (const part of content) {
23
+ if ('text' in part && typeof part.text === 'string') chars += part.text.length;
24
+ if ('args' in part) chars += JSON.stringify(part.args).length;
25
+ }
26
+ return Math.ceil(chars / 4);
22
27
  }
23
- return Math.ceil(chars / 4);
24
28
  }
25
29
  return 0;
26
30
  }
27
31
 
28
32
  /** Estimate total tokens for a message array */
29
- export function estimateTokens(messages: ModelMessage[]): number {
33
+ export function estimateTokens(messages: AgentMessage[]): number {
30
34
  let total = 0;
31
35
  for (const m of messages) total += messageTokens(m);
32
36
  return total;
@@ -64,7 +68,7 @@ export function getContextLimit(model: string): number {
64
68
 
65
69
  /** Check if messages + system prompt exceed threshold of context limit */
66
70
  export function needsCompact(
67
- messages: ModelMessage[],
71
+ messages: AgentMessage[],
68
72
  systemPrompt: string,
69
73
  model: string,
70
74
  threshold = 0.7,
@@ -102,38 +106,35 @@ const TOOL_OUTPUT_LIMITS: Record<string, number> = {
102
106
 
103
107
  /**
104
108
  * Truncate tool outputs in historical messages to save tokens.
105
- * Only truncates non-last tool messages (the last tool message is kept intact
109
+ * Only truncates non-last toolResult messages (the last one is kept intact
106
110
  * because the model may need its full output for the current step).
107
111
  */
108
- export function truncateToolOutputs(messages: ModelMessage[]): ModelMessage[] {
109
- // Find the index of the last 'tool' role message
112
+ export function truncateToolOutputs(messages: AgentMessage[]): AgentMessage[] {
113
+ // Find the index of the last 'toolResult' role message
110
114
  let lastToolIdx = -1;
111
115
  for (let i = messages.length - 1; i >= 0; i--) {
112
- if (messages[i].role === 'tool') { lastToolIdx = i; break; }
116
+ if ((messages[i] as any).role === 'toolResult') { lastToolIdx = i; break; }
113
117
  }
114
118
 
115
119
  return messages.map((msg, idx) => {
116
- if (msg.role !== 'tool' || idx === lastToolIdx) return msg;
120
+ const m = msg as any;
121
+ if (m.role !== 'toolResult' || idx === lastToolIdx) return msg;
117
122
 
118
- const toolMsg = msg as ToolModelMessage;
119
- const truncatedContent = toolMsg.content.map(part => {
120
- if (part.type !== 'tool-result') return part;
121
- const trp = part as ToolResultPart;
122
- const toolName = trp.toolName ?? '';
123
- const limit = TOOL_OUTPUT_LIMITS[toolName] ?? 500;
124
- if (!trp.output || typeof trp.output !== 'object' || trp.output.type !== 'text') return part;
125
- if (trp.output.value.length <= limit) return part;
123
+ const toolMsg = m as ToolResultMessage;
124
+ const toolName = toolMsg.toolName ?? '';
125
+ const limit = TOOL_OUTPUT_LIMITS[toolName] ?? 500;
126
126
 
127
+ // Truncate text content in toolResult
128
+ const truncatedContent = toolMsg.content.map(part => {
129
+ if (part.type !== 'text') return part;
130
+ if (part.text.length <= limit) return part;
127
131
  return {
128
- ...trp,
129
- output: {
130
- ...trp.output,
131
- value: trp.output.value.slice(0, limit) + `\n[...truncated from ${trp.output.value.length} chars]`,
132
- },
133
- } satisfies ToolResultPart;
132
+ ...part,
133
+ text: part.text.slice(0, limit) + `\n[...truncated from ${part.text.length} chars]`,
134
+ };
134
135
  });
135
136
 
136
- return { ...toolMsg, content: truncatedContent } satisfies ToolModelMessage;
137
+ return { ...toolMsg, content: truncatedContent } as AgentMessage;
137
138
  });
138
139
  }
139
140
 
@@ -149,23 +150,21 @@ const COMPACT_PROMPT = `Summarize the key points, decisions, and file operations
149
150
 
150
151
  Be concise and factual. Output only the summary, no preamble.`;
151
152
 
152
- /** Extract a short text representation from a ModelMessage for summarization */
153
- function messageToText(m: ModelMessage): string {
154
- const role = m.role;
153
+ /** Extract a short text representation from an AgentMessage for summarization */
154
+ function messageToText(m: AgentMessage): string {
155
+ const msg = m as any;
156
+ const role = msg.role;
155
157
  let content = '';
156
- if (typeof m.content === 'string') {
157
- content = m.content;
158
- } else if (Array.isArray(m.content)) {
158
+
159
+ if (typeof msg.content === 'string') {
160
+ content = msg.content;
161
+ } else if (Array.isArray(msg.content)) {
159
162
  const pieces: string[] = [];
160
- for (const part of m.content) {
161
- if ('text' in part && typeof (part as { text?: string }).text === 'string') {
162
- pieces.push((part as { text: string }).text);
163
- } else if (part.type === 'tool-call' && 'toolName' in part) {
164
- pieces.push(`[Tool: ${(part as { toolName: string }).toolName}]`);
165
- } else if (part.type === 'tool-result' && 'output' in part) {
166
- const trp = part as ToolResultPart;
167
- const val = trp.output && typeof trp.output === 'object' && trp.output.type === 'text' ? trp.output.value : '';
168
- pieces.push(`[Result: ${val.slice(0, 200)}]`);
163
+ for (const part of msg.content) {
164
+ if (part.type === 'text' && typeof part.text === 'string') {
165
+ pieces.push(part.text);
166
+ } else if (part.type === 'toolCall' && 'toolName' in part) {
167
+ pieces.push(`[Tool: ${part.toolName}]`);
169
168
  }
170
169
  }
171
170
  content = pieces.filter(Boolean).join(' ');
@@ -178,27 +177,24 @@ function messageToText(m: ModelMessage): string {
178
177
  * Returns a new message array with early messages replaced by a summary.
179
178
  * Only called when needsCompact() returns true.
180
179
  *
181
- * NOTE: Currently uses the same model as the main generation. A cheaper model
182
- * (e.g. haiku) would suffice for summarization and avoid competing for rate
183
- * limits. Deferred until users report rate-limit issues — compact triggers
184
- * infrequently (>70% context fill).
180
+ * Uses pi-ai complete() for summarization.
185
181
  */
186
182
  export async function compactMessages(
187
- messages: ModelMessage[],
188
- model: LanguageModel,
189
- ): Promise<{ messages: ModelMessage[]; compacted: boolean }> {
183
+ messages: AgentMessage[],
184
+ model: Model<any>,
185
+ apiKey: string,
186
+ systemPrompt: string,
187
+ modelName: string,
188
+ ): Promise<{ messages: AgentMessage[]; compacted: boolean }> {
190
189
  if (messages.length < 6) {
191
190
  return { messages, compacted: false };
192
191
  }
193
192
 
194
193
  // Keep the last 6 messages intact, summarize the rest.
195
194
  // Adjust split point to avoid cutting between an assistant (with tool calls)
196
- // and its tool result. Only need to check for orphaned 'tool' messages —
197
- // an assistant at the split point is safe because its tool results follow it.
198
- // (Orphaned assistants without results can't exist in history: only completed
199
- // tool calls are persisted by the frontend.)
195
+ // and its tool result.
200
196
  let splitIdx = messages.length - 6;
201
- while (splitIdx > 0 && messages[splitIdx]?.role === 'tool') {
197
+ while (splitIdx > 0 && (messages[splitIdx] as any).role === 'toolResult') {
202
198
  splitIdx--;
203
199
  }
204
200
  if (splitIdx < 2) {
@@ -216,46 +212,61 @@ export async function compactMessages(
216
212
  }
217
213
 
218
214
  try {
219
- const { text: summary } = await generateText({
220
- model,
221
- prompt: `${COMPACT_PROMPT}\n\n---\n\nConversation to summarize:\n\n${earlyText}`,
222
- });
215
+ const summaryMessage = await complete(model, {
216
+ messages: [{
217
+ role: 'user',
218
+ content: `${COMPACT_PROMPT}\n\n---\n\nConversation to summarize:\n\n${earlyText}`,
219
+ timestamp: Date.now(),
220
+ }],
221
+ }, { apiKey });
223
222
 
224
- console.log(`[ask] Compacted ${earlyMessages.length} early messages into summary (${summary.length} chars)`);
223
+ const summaryText = summaryMessage.content
224
+ .filter(p => p.type === 'text')
225
+ .map(p => (p as any).text)
226
+ .join('');
225
227
 
226
- const summaryText = `[Summary of earlier conversation]\n\n${summary}`;
228
+ console.log(`[ask] Compacted ${earlyMessages.length} early messages into summary (${summaryText.length} chars)`);
229
+
230
+ const summaryContent = `[Summary of earlier conversation]\n\n${summaryText}`;
227
231
 
228
232
  // If first recent message is also 'user', merge summary into it to avoid
229
233
  // consecutive user messages (Anthropic rejects user→user sequences).
230
- if (recentMessages[0]?.role === 'user') {
231
- const merged = { ...recentMessages[0] };
234
+ if ((recentMessages[0] as any)?.role === 'user') {
235
+ const merged = { ...(recentMessages[0] as any) };
232
236
  if (typeof merged.content === 'string') {
233
- merged.content = `${summaryText}\n\n---\n\n${merged.content}`;
237
+ merged.content = `${summaryContent}\n\n---\n\n${merged.content}`;
234
238
  } else if (Array.isArray(merged.content)) {
235
- // Multimodal content (e.g. images) prepend summary as text part
236
- merged.content = [{ type: 'text' as const, text: `${summaryText}\n\n---\n\n` }, ...merged.content];
239
+ merged.content = [{ type: 'text' as const, text: `${summaryContent}\n\n---\n\n` }, ...merged.content];
237
240
  } else {
238
- merged.content = summaryText;
241
+ merged.content = summaryContent;
239
242
  }
240
243
  return {
241
- messages: [merged, ...recentMessages.slice(1)],
244
+ messages: [merged as AgentMessage, ...recentMessages.slice(1)],
242
245
  compacted: true,
243
246
  };
244
247
  }
245
248
 
246
249
  // Otherwise prepend as separate user message
247
- const summaryMessage: ModelMessage = {
250
+ const summaryMsg: UserMessage = {
248
251
  role: 'user',
249
- content: summaryText,
252
+ content: summaryContent,
253
+ timestamp: Date.now(),
250
254
  };
251
255
 
252
256
  return {
253
- messages: [summaryMessage, ...recentMessages],
257
+ messages: [summaryMsg as AgentMessage, ...recentMessages],
254
258
  compacted: true,
255
259
  };
256
260
  } catch (err) {
257
- console.error('[ask] Compact failed, using uncompacted messages:', err);
258
- return { messages, compacted: false };
261
+ // API failure: fall back to hard prune instead of risking context overflow
262
+ console.warn('[ask] Compact failed, applying hard prune as fallback:', err);
263
+ const pruned = hardPrune(messages, systemPrompt, modelName);
264
+ if (pruned.length < messages.length) {
265
+ console.log(`[ask] Hard prune fallback succeeded (${messages.length} → ${pruned.length} messages)`);
266
+ return { messages: pruned, compacted: false };
267
+ }
268
+ // If pruning also can't help, let it bubble up so request fails safely
269
+ throw new Error(`Context compaction failed and pruning insufficient: ${err instanceof Error ? err.message : String(err)}`);
259
270
  }
260
271
  }
261
272
 
@@ -269,10 +280,10 @@ export async function compactMessages(
269
280
  * (containing tool calls) and its following tool result message.
270
281
  */
271
282
  export function hardPrune(
272
- messages: ModelMessage[],
283
+ messages: AgentMessage[],
273
284
  systemPrompt: string,
274
285
  model: string,
275
- ): ModelMessage[] {
286
+ ): AgentMessage[] {
276
287
  const limit = getContextLimit(model);
277
288
  const threshold = limit * 0.9;
278
289
  const systemTokens = estimateStringTokens(systemPrompt);
@@ -288,24 +299,27 @@ export function hardPrune(
288
299
  }
289
300
 
290
301
  // Ensure we don't cut between an assistant (with tool calls) and its tool result.
291
- // If cutIdx lands on a 'tool' message, advance past it so the pair stays together
292
- // or is fully removed.
293
- while (cutIdx < messages.length - 1 && messages[cutIdx].role === 'tool') {
302
+ while (cutIdx < messages.length - 1 && (messages[cutIdx] as any).role === 'toolResult') {
294
303
  total -= messageTokens(messages[cutIdx]);
295
304
  cutIdx++;
296
305
  }
297
306
 
298
307
  // Ensure first message is 'user' (Anthropic requirement)
299
- while (cutIdx < messages.length - 1 && messages[cutIdx].role !== 'user') {
308
+ while (cutIdx < messages.length - 1 && (messages[cutIdx] as any).role !== 'user') {
300
309
  total -= messageTokens(messages[cutIdx]);
301
310
  cutIdx++;
302
311
  }
303
312
 
304
313
  // Fallback: if no user message found in remaining messages, inject a synthetic one
305
314
  const pruned = cutIdx > 0 ? messages.slice(cutIdx) : messages;
306
- if (pruned.length > 0 && pruned[0].role !== 'user') {
315
+ if (pruned.length > 0 && (pruned[0] as any).role !== 'user') {
307
316
  console.log(`[ask] Hard pruned ${cutIdx} messages, injecting synthetic user message (${messages.length} → ${pruned.length + 1})`);
308
- return [{ role: 'user', content: '[Conversation context was pruned due to length. Continuing from here.]' } as ModelMessage, ...pruned];
317
+ const syntheticUser: UserMessage = {
318
+ role: 'user',
319
+ content: '[Conversation context was pruned due to length. Continuing from here.]',
320
+ timestamp: Date.now(),
321
+ };
322
+ return [syntheticUser as AgentMessage, ...pruned];
309
323
  }
310
324
 
311
325
  if (cutIdx > 0) {
@@ -315,3 +329,53 @@ export function hardPrune(
315
329
 
316
330
  return messages;
317
331
  }
332
+
333
+ // ---------------------------------------------------------------------------
334
+ // transformContext factory — for Agent's transformContext hook
335
+ // ---------------------------------------------------------------------------
336
+
337
+ /**
338
+ * Create a transformContext function that captures the model and apiKey via closure.
339
+ * Agent calls this before each LLM call to manage context window.
340
+ */
341
+ export function createTransformContext(
342
+ systemPrompt: string,
343
+ modelName: string,
344
+ getCompactModel: () => Model<any>,
345
+ apiKey: string,
346
+ contextStrategy: string,
347
+ ) {
348
+ return async (messages: AgentMessage[], signal?: AbortSignal): Promise<AgentMessage[]> => {
349
+ // 1. Truncate tool outputs in historical messages
350
+ let result = truncateToolOutputs(messages);
351
+
352
+ const preTokens = estimateTokens(result);
353
+ const sysTokens = estimateStringTokens(systemPrompt);
354
+ const ctxLimit = getContextLimit(modelName);
355
+ console.log(`[ask] Context: ~${preTokens + sysTokens} tokens (messages=${preTokens}, system=${sysTokens}), limit=${ctxLimit}`);
356
+
357
+ // 2. Compact if >70% context limit (skip if user disabled)
358
+ if (contextStrategy === 'auto' && needsCompact(result, systemPrompt, modelName)) {
359
+ console.log('[ask] Context >70% limit, compacting...');
360
+ const compactResult = await compactMessages(
361
+ result,
362
+ getCompactModel(),
363
+ apiKey,
364
+ systemPrompt,
365
+ modelName,
366
+ );
367
+ result = compactResult.messages;
368
+ if (compactResult.compacted) {
369
+ const postTokens = estimateTokens(result);
370
+ console.log(`[ask] After compact: ~${postTokens + sysTokens} tokens`);
371
+ } else {
372
+ console.log('[ask] Compact skipped (too few messages or fallback used), hard prune will handle overflow if needed');
373
+ }
374
+ }
375
+
376
+ // 3. Hard prune if still >90% context limit
377
+ result = hardPrune(result, systemPrompt, modelName);
378
+
379
+ return result;
380
+ };
381
+ }
@@ -1,7 +1,8 @@
1
- export { getModel } from './model';
2
- export { knowledgeBaseTools, truncate, assertWritable } from './tools';
1
+ export { getModelConfig } from './model';
2
+ export { knowledgeBaseTools, WRITE_TOOLS, truncate } from './tools';
3
3
  export { AGENT_SYSTEM_PROMPT } from './prompt';
4
4
  export {
5
5
  estimateTokens, estimateStringTokens, getContextLimit, needsCompact,
6
- truncateToolOutputs, compactMessages, hardPrune,
6
+ truncateToolOutputs, compactMessages, hardPrune, createTransformContext,
7
7
  } from './context';
8
+ export { toAgentMessages } from './to-agent-messages';
@@ -1,18 +1,84 @@
1
- import { createAnthropic } from '@ai-sdk/anthropic';
2
- import { createOpenAI } from '@ai-sdk/openai';
1
+ import { getModel as piGetModel, type Model } from '@mariozechner/pi-ai';
3
2
  import { effectiveAiConfig } from '@/lib/settings';
4
3
 
5
- export function getModel() {
4
+ /**
5
+ * Build a pi-ai Model for the configured provider.
6
+ *
7
+ * - Anthropic: uses getModel() from pi-ai registry directly.
8
+ * - OpenAI: uses getModel() then overrides baseUrl if custom endpoint is configured.
9
+ * Falls back to constructing a Model literal for unknown model IDs.
10
+ * Custom API variant can be specified for non-standard endpoints.
11
+ *
12
+ * Returns { model, modelName, apiKey } — Agent needs model + apiKey via getApiKey hook.
13
+ */
14
+ export function getModelConfig(): {
15
+ model: Model<any>;
16
+ modelName: string;
17
+ apiKey: string;
18
+ provider: 'anthropic' | 'openai';
19
+ } {
6
20
  const cfg = effectiveAiConfig();
7
21
 
8
22
  if (cfg.provider === 'openai') {
9
- const openai = createOpenAI({
10
- apiKey: cfg.openaiApiKey,
11
- baseURL: cfg.openaiBaseUrl || undefined,
12
- });
13
- return openai.chat(cfg.openaiModel);
23
+ const modelName = cfg.openaiModel;
24
+ let model: Model<any>;
25
+ let apiVariant: string = 'openai-responses'; // Default to responses API
26
+
27
+ // Allow customization of API variant if using custom endpoint
28
+ // Check if config specifies an alternative API type (for non-standard endpoints)
29
+ const customApiVariant = (cfg as any).openaiApiVariant; // May exist in extended config
30
+
31
+ try {
32
+ model = piGetModel('openai', modelName as any);
33
+ } catch {
34
+ // Model not in pi-ai registry — construct manually for custom/proxy endpoints
35
+ model = {
36
+ id: modelName,
37
+ name: modelName,
38
+ api: (customApiVariant ?? apiVariant) as any,
39
+ provider: 'openai',
40
+ baseUrl: 'https://api.openai.com/v1',
41
+ reasoning: false,
42
+ input: ['text'] as const,
43
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
44
+ contextWindow: 128_000,
45
+ maxTokens: 16_384,
46
+ };
47
+ }
48
+
49
+ // Override baseUrl if user configured a custom endpoint
50
+ if (cfg.openaiBaseUrl) {
51
+ model = { ...model, baseUrl: cfg.openaiBaseUrl };
52
+ // Also allow API variant override for custom endpoints
53
+ if (customApiVariant) {
54
+ model = { ...model, api: customApiVariant };
55
+ }
56
+ }
57
+
58
+ return { model, modelName, apiKey: cfg.openaiApiKey, provider: 'openai' };
59
+ }
60
+
61
+ // Anthropic
62
+ const modelName = cfg.anthropicModel;
63
+ let model: Model<any>;
64
+
65
+ try {
66
+ model = piGetModel('anthropic', modelName as any);
67
+ } catch {
68
+ // Unknown Anthropic model — construct manually
69
+ model = {
70
+ id: modelName,
71
+ name: modelName,
72
+ api: 'anthropic-messages' as const,
73
+ provider: 'anthropic',
74
+ baseUrl: 'https://api.anthropic.com',
75
+ reasoning: false,
76
+ input: ['text'] as const,
77
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
78
+ contextWindow: 200_000,
79
+ maxTokens: 8_192,
80
+ };
14
81
  }
15
82
 
16
- const anthropic = createAnthropic({ apiKey: cfg.anthropicApiKey });
17
- return anthropic(cfg.anthropicModel);
83
+ return { model, modelName, apiKey: cfg.anthropicApiKey, provider: 'anthropic' };
18
84
  }