activo 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/core/agent.ts CHANGED
@@ -1,6 +1,7 @@
1
+ import * as fs from "fs";
1
2
  import { OllamaClient, ChatMessage } from "./llm/ollama.js";
2
3
  import { Config } from "./config.js";
3
- import { getAllTools, executeTool, ToolCall, ToolResult, Tool } from "./tools/index.js";
4
+ import { getAllTools, selectTools, executeTool, ToolCall, ToolResult, Tool } from "./tools/index.js";
4
5
 
5
6
  export interface AgentEvent {
6
7
  type: "thinking" | "content" | "tool_use" | "tool_result" | "done" | "error";
@@ -23,52 +24,11 @@ export interface AgentResult {
23
24
 
24
25
  const BASE_SYSTEM_PROMPT = `You are ACTIVO, a code quality analyzer. You MUST call tools to perform tasks.
25
26
 
26
- ## CRITICAL RULES
27
+ ## RULES
27
28
  1. Call tool IMMEDIATELY when user requests an action
28
- 2. NEVER output text before calling a tool
29
- 3. NEVER fabricate results - only report actual tool output
30
- 4. After tool returns, summarize in user's language (Korean if user speaks Korean)
31
-
32
- ## TOOLS BY CATEGORY
33
-
34
- ### Document Conversion & RAG
35
- - import_hwp_standards(hwpPath, outputDir): Convert HWP to markdown
36
- - import_pdf_standards(pdfPath, outputDir): Convert PDF to markdown
37
- - index_standards(directory?): Index standards for RAG search (run after import)
38
- - search_standards(query, topK?): Search standards by semantic query
39
- - check_quality_rag(filepath, topK?): Check code using RAG to find relevant standards
40
-
41
- ### Code Analysis (Recommended: use analyze_all)
42
- - analyze_all(path, include?): Analyze all code (Java/JS/TS/Python)
43
- - java_analyze(path): Java code analysis
44
- - java_complexity(path): Java complexity metrics
45
- - spring_check(path): Spring pattern check
46
- - ast_analyze(path): JS/TS AST analysis
47
- - react_check(path): React pattern check
48
- - vue_check(path): Vue pattern check
49
- - python_check(path): Python code analysis
50
-
51
- ### SQL/DB Analysis
52
- - sql_check(path): SQL query analysis
53
- - mybatis_check(path): MyBatis mapper analysis
54
-
55
- ### Web Analysis
56
- - css_check(path): CSS analysis
57
- - html_check(path): HTML analysis
58
- - dependency_check(path): package.json dependency analysis
59
- - openapi_check(path): OpenAPI spec analysis
60
-
61
- ### File Operations
62
- - read_file(path): Read file content
63
- - write_file(path, content): Write file
64
- - list_directory(path): List directory contents
65
- - grep_search(pattern, path): Search pattern in files
66
- - glob_search(pattern): Search files by pattern
67
-
68
- ## EXAMPLE
69
- User: "Analyze src folder"
70
- → Call analyze_all(path="src") immediately
71
- → After result: Summarize findings`;
29
+ 2. NEVER fabricate results - only report actual tool output
30
+ 3. After tool returns, summarize in user's language (Korean if user speaks Korean)
31
+ 4. Use analyze_all for broad code analysis`;
72
32
 
73
33
  // Build system prompt with optional context
74
34
  function buildSystemPrompt(contextSummary?: string): string {
@@ -86,6 +46,396 @@ ${contextSummary}
86
46
  위 내용은 이전 세션에서의 대화 요약입니다. 필요시 참고하세요.`;
87
47
  }
88
48
 
49
+ // ─── Intent Router ───
50
+
51
+ interface IntentResult {
52
+ handled: boolean;
53
+ toolName?: string;
54
+ toolArgs?: Record<string, unknown>;
55
+ toolResult?: ToolResult;
56
+ summaryPrompt?: string;
57
+ }
58
+
59
+ interface IntentPattern {
60
+ keywords: string[];
61
+ tool: string;
62
+ buildArgs: (path: string, message: string) => Record<string, unknown>;
63
+ }
64
+
65
+ // Intent patterns: keyword groups → tool + args builder
66
+ const INTENT_PATTERNS: IntentPattern[] = [
67
+ // Single file analysis (must come before directory patterns)
68
+ {
69
+ keywords: ["분석", "analyze", "검사", "check"],
70
+ tool: "_single_file", // special marker - resolved at match time
71
+ buildArgs: (path: string) => ({ filepath: path }),
72
+ },
73
+ // analyze_all with Java filter
74
+ {
75
+ keywords: ["자바", "java"],
76
+ tool: "analyze_all",
77
+ buildArgs: (path: string) => ({ path, include: ["java"] }),
78
+ },
79
+ // Spring patterns
80
+ {
81
+ keywords: ["spring", "스프링"],
82
+ tool: "analyze_all",
83
+ buildArgs: (path: string) => ({ path, include: ["java"] }),
84
+ },
85
+ // Dependency analysis
86
+ {
87
+ keywords: ["의존성", "dependency", "dependencies", "취약점"],
88
+ tool: "dependency_check",
89
+ buildArgs: (path: string) => ({ path }),
90
+ },
91
+ // Complexity
92
+ {
93
+ keywords: ["복잡도", "complexity"],
94
+ tool: "analyze_all",
95
+ buildArgs: (path: string) => ({ path }),
96
+ },
97
+ // Python
98
+ {
99
+ keywords: ["python", "파이썬", ".py"],
100
+ tool: "analyze_all",
101
+ buildArgs: (path: string) => ({ path, include: ["py"] }),
102
+ },
103
+ // Frontend
104
+ {
105
+ keywords: ["react", "리액트", "vue", "뷰", "프론트엔드", "frontend"],
106
+ tool: "analyze_all",
107
+ buildArgs: (path: string) => ({ path, include: ["js", "ts", "jsx", "tsx", "vue"] }),
108
+ },
109
+ // CSS
110
+ {
111
+ keywords: ["css", "scss", "less", "스타일"],
112
+ tool: "analyze_all",
113
+ buildArgs: (path: string) => ({ path, include: ["css"] }),
114
+ },
115
+ // HTML
116
+ {
117
+ keywords: ["html", "jsp", "접근성", "a11y", "seo"],
118
+ tool: "analyze_all",
119
+ buildArgs: (path: string) => ({ path, include: ["html"] }),
120
+ },
121
+ // SQL / MyBatis
122
+ {
123
+ keywords: ["sql", "mybatis", "마이바티스", "쿼리"],
124
+ tool: "analyze_all",
125
+ buildArgs: (path: string) => ({ path, include: ["java", "xml"] }),
126
+ },
127
+ // Broad analysis (catch-all, must be last)
128
+ {
129
+ keywords: ["전체분석", "전체 분석", "분석해", "코드품질", "코드 품질", "analyze", "분석", "검사", "check"],
130
+ tool: "analyze_all",
131
+ buildArgs: (path: string) => ({ path }),
132
+ },
133
+ ];
134
+
135
+ // File extension → single-file tool mapping
136
+ const FILE_TOOL_MAP: Record<string, string> = {
137
+ ".java": "java_analyze",
138
+ ".js": "ast_analyze",
139
+ ".ts": "ast_analyze",
140
+ ".jsx": "react_check",
141
+ ".tsx": "react_check",
142
+ ".vue": "vue_check",
143
+ ".py": "python_check",
144
+ ".css": "css_check",
145
+ ".scss": "css_check",
146
+ ".less": "css_check",
147
+ ".html": "html_check",
148
+ ".htm": "html_check",
149
+ ".jsp": "html_check",
150
+ };
151
+
152
+ /**
153
+ * Extract filesystem paths from user message.
154
+ * Handles quoted paths (with spaces), simple paths, and greedy path expansion.
155
+ */
156
+ function extractPaths(message: string): string[] {
157
+ const paths: string[] = [];
158
+
159
+ // 1. Quoted paths: '...' or "..."
160
+ const quotedMatches = message.match(/['"]([/\\][^'"]+)['"]/g);
161
+ if (quotedMatches) {
162
+ for (const m of quotedMatches) {
163
+ paths.push(m.slice(1, -1)); // strip quotes
164
+ }
165
+ }
166
+
167
+ // 2. Simple paths (no spaces) - Unix & Windows
168
+ const unixMatches = message.match(/(?:^|\s)(\/[^\s,;:'"]+)/g);
169
+ if (unixMatches) {
170
+ for (const m of unixMatches) {
171
+ paths.push(m.trim());
172
+ }
173
+ }
174
+ const winMatches = message.match(/(?:^|\s)([A-Z]:\\[^\s,;:'"]+)/gi);
175
+ if (winMatches) {
176
+ for (const m of winMatches) {
177
+ paths.push(m.trim());
178
+ }
179
+ }
180
+
181
+ // 3. Greedy path expansion: if simple match doesn't exist,
182
+ // try extending with subsequent words until path is valid
183
+ if (paths.length === 0 || !paths.some((p) => { try { return fs.existsSync(p); } catch { return false; } })) {
184
+ const words = message.split(/\s+/);
185
+ for (let i = 0; i < words.length; i++) {
186
+ if (words[i].startsWith("/") || /^[A-Z]:\\/i.test(words[i])) {
187
+ // Found a path start, try extending
188
+ let candidate = words[i];
189
+ let bestPath = "";
190
+ // Check initial segment
191
+ try { if (fs.existsSync(candidate)) bestPath = candidate; } catch { /* */ }
192
+ // Extend with subsequent words
193
+ for (let j = i + 1; j < words.length; j++) {
194
+ const extended = candidate + " " + words[j];
195
+ try {
196
+ if (fs.existsSync(extended)) {
197
+ bestPath = extended;
198
+ candidate = extended;
199
+ } else {
200
+ // No more valid extensions - stop
201
+ break;
202
+ }
203
+ } catch {
204
+ break;
205
+ }
206
+ }
207
+ if (bestPath) {
208
+ paths.push(bestPath);
209
+ }
210
+ }
211
+ }
212
+ }
213
+
214
+ // Filter to actually existing paths, deduplicate
215
+ const seen = new Set<string>();
216
+ return paths.filter((p) => {
217
+ if (seen.has(p)) return false;
218
+ seen.add(p);
219
+ try {
220
+ return fs.existsSync(p);
221
+ } catch {
222
+ return false;
223
+ }
224
+ });
225
+ }
226
+
227
+ /**
228
+ * Determine if a path is a single file (not a directory).
229
+ */
230
+ function isSingleFile(p: string): boolean {
231
+ try {
232
+ return fs.statSync(p).isFile();
233
+ } catch {
234
+ return false;
235
+ }
236
+ }
237
+
238
+ /**
239
+ * Resolve the correct tool for a single file based on extension.
240
+ */
241
+ function resolveFileAnalysisTool(filepath: string): { tool: string; args: Record<string, unknown> } | null {
242
+ const ext = filepath.substring(filepath.lastIndexOf(".")).toLowerCase();
243
+ const toolName = FILE_TOOL_MAP[ext];
244
+ if (!toolName) return null;
245
+
246
+ // Some tools use 'filepath', others use 'path'
247
+ const argKey = ["python_check", "css_check", "html_check"].includes(toolName) ? "path" : "filepath";
248
+ return { tool: toolName, args: { [argKey]: filepath } };
249
+ }
250
+
251
+ /**
252
+ * Detect user intent from the message and automatically execute the appropriate tool.
253
+ * Returns IntentResult with handled=true if a tool was executed, false otherwise.
254
+ */
255
+ async function detectAndExecuteIntent(
256
+ userMessage: string,
257
+ onEvent?: (event: AgentEvent) => void
258
+ ): Promise<IntentResult> {
259
+ const msg = userMessage.toLowerCase();
260
+ const paths = extractPaths(userMessage);
261
+
262
+ // No path found → can't auto-route
263
+ if (paths.length === 0) {
264
+ return { handled: false };
265
+ }
266
+
267
+ const targetPath = paths[0];
268
+
269
+ // Check if path is a single file
270
+ if (isSingleFile(targetPath)) {
271
+ const fileInfo = resolveFileAnalysisTool(targetPath);
272
+ if (fileInfo) {
273
+ return await executeIntentTool(fileInfo.tool, fileInfo.args, onEvent);
274
+ }
275
+ // Unknown file type → fall back to LLM
276
+ return { handled: false };
277
+ }
278
+
279
+ // Path is a directory → match intent patterns
280
+ for (const pattern of INTENT_PATTERNS) {
281
+ // Skip the single-file marker for directories
282
+ if (pattern.tool === "_single_file") continue;
283
+
284
+ if (pattern.keywords.some((kw) => msg.includes(kw))) {
285
+ const args = pattern.buildArgs(targetPath, userMessage);
286
+ return await executeIntentTool(pattern.tool, args, onEvent);
287
+ }
288
+ }
289
+
290
+ // Has a directory path but no matching keyword → default to analyze_all
291
+ // (user likely wants some kind of analysis if they provided a path)
292
+ const hasAnalysisHint = /분석|검사|확인|체크|check|analyze|review|scan|report/i.test(msg);
293
+ if (hasAnalysisHint) {
294
+ return await executeIntentTool("analyze_all", { path: targetPath }, onEvent);
295
+ }
296
+
297
+ return { handled: false };
298
+ }
299
+
300
+ /**
301
+ * Execute a tool by name and return an IntentResult with the summary prompt.
302
+ */
303
+ async function executeIntentTool(
304
+ toolName: string,
305
+ toolArgs: Record<string, unknown>,
306
+ onEvent?: (event: AgentEvent) => void
307
+ ): Promise<IntentResult> {
308
+ const toolCall: ToolCall = {
309
+ id: `intent_${Date.now()}_${Math.random().toString(36).slice(2)}`,
310
+ name: toolName,
311
+ arguments: toolArgs,
312
+ };
313
+
314
+ // Emit tool_use start event
315
+ onEvent?.({
316
+ type: "tool_use",
317
+ tool: toolName,
318
+ status: "start",
319
+ args: toolArgs,
320
+ });
321
+
322
+ const result = await executeTool(toolCall);
323
+
324
+ // Emit tool_result event
325
+ onEvent?.({
326
+ type: "tool_result",
327
+ tool: toolName,
328
+ status: result.success ? "complete" : "error",
329
+ result,
330
+ });
331
+
332
+ if (!result.success) {
333
+ return {
334
+ handled: true,
335
+ toolName,
336
+ toolArgs,
337
+ toolResult: result,
338
+ summaryPrompt: `도구 "${toolName}" 실행 중 오류가 발생했습니다: ${result.error}\n사용자에게 오류 내용을 설명해주세요.`,
339
+ };
340
+ }
341
+
342
+ // Compress result to fit in context window
343
+ const compressed = compressAnalysisResult(result.content);
344
+
345
+ return {
346
+ handled: true,
347
+ toolName,
348
+ toolArgs,
349
+ toolResult: result,
350
+ summaryPrompt: `아래는 "${toolName}" 도구의 실행 결과입니다. 사용자에게 한국어로 핵심 내용을 요약해주세요.\n\n${compressed}`,
351
+ };
352
+ }
353
+
354
+ /**
355
+ * Compress analysis result JSON to fit within LLM context window.
356
+ * Extracts only key metrics, removing verbose raw data.
357
+ */
358
+ function compressAnalysisResult(resultContent: string, maxChars: number = 2000): string {
359
+ try {
360
+ const parsed = JSON.parse(resultContent);
361
+
362
+ // analyze_all result
363
+ if (parsed.path && parsed.fileStats) {
364
+ const compact: Record<string, unknown> = {
365
+ path: parsed.path,
366
+ totalFiles: parsed.totalFiles,
367
+ fileStats: parsed.fileStats,
368
+ analysesRun: parsed.analysesRun,
369
+ successful: parsed.successful,
370
+ failed: parsed.failed,
371
+ };
372
+
373
+ // Extract issue summaries (compact)
374
+ if (parsed.issuesSummary?.length > 0) {
375
+ compact.issues = parsed.issuesSummary.map((is: { tool: string; issues: string[] }) => ({
376
+ tool: is.tool,
377
+ issues: is.issues.slice(0, 5),
378
+ }));
379
+ }
380
+
381
+ // Extract per-tool summaries (key metrics only)
382
+ if (parsed.details?.length > 0) {
383
+ compact.analyses = parsed.details.map((d: { tool: string; summary: Record<string, unknown> }) => {
384
+ const s = d.summary;
385
+ const brief: Record<string, unknown> = { tool: d.tool };
386
+
387
+ // Extract numeric/small fields only
388
+ for (const [k, v] of Object.entries(s)) {
389
+ if (typeof v === "number" || typeof v === "boolean") {
390
+ brief[k] = v;
391
+ } else if (typeof v === "string" && v.length < 100) {
392
+ brief[k] = v;
393
+ }
394
+ // Skip arrays/objects (raw data) to save space
395
+ }
396
+
397
+ // Include issues from samples (java_analyze etc.)
398
+ if (Array.isArray((s as any).samples)) {
399
+ const allIssues: unknown[] = [];
400
+ for (const sample of (s as any).samples) {
401
+ if (Array.isArray(sample.result?.issues)) {
402
+ allIssues.push(...sample.result.issues.slice(0, 3));
403
+ }
404
+ }
405
+ if (allIssues.length > 0) {
406
+ brief.issues = allIssues.slice(0, 10);
407
+ }
408
+ }
409
+
410
+ return brief;
411
+ });
412
+ }
413
+
414
+ if (parsed.errors?.length > 0) {
415
+ compact.errors = parsed.errors;
416
+ }
417
+
418
+ const result = JSON.stringify(compact, null, 1);
419
+ return result.length > maxChars ? result.slice(0, maxChars) + "..." : result;
420
+ }
421
+
422
+ // java_analyze or other single-file results
423
+ if (parsed.file || parsed.filepath || parsed.classes || parsed.functions) {
424
+ const result = JSON.stringify(parsed, null, 1);
425
+ return result.length > maxChars ? result.slice(0, maxChars) + "..." : result;
426
+ }
427
+
428
+ // Generic: just truncate
429
+ const result = JSON.stringify(parsed, null, 1);
430
+ return result.length > maxChars ? result.slice(0, maxChars) + "..." : result;
431
+ } catch {
432
+ // Not valid JSON, return truncated raw text
433
+ return resultContent.length > maxChars ? resultContent.slice(0, maxChars) + "..." : resultContent;
434
+ }
435
+ }
436
+
437
+ // ─── Main processing functions ───
438
+
89
439
  export async function processMessage(
90
440
  userMessage: string,
91
441
  history: ChatMessage[],
@@ -94,7 +444,33 @@ export async function processMessage(
94
444
  onEvent?: (event: AgentEvent) => void,
95
445
  contextSummary?: string
96
446
  ): Promise<AgentResult> {
97
- const tools = getAllTools();
447
+ // Try intent router first
448
+ const intent = await detectAndExecuteIntent(userMessage, onEvent);
449
+
450
+ if (intent.handled && intent.summaryPrompt) {
451
+ // Tool already executed → ask LLM to summarize only (no tools = VRAM savings)
452
+ onEvent?.({ type: "thinking" });
453
+
454
+ const summaryMessages: ChatMessage[] = [
455
+ { role: "system", content: BASE_SYSTEM_PROMPT },
456
+ { role: "user", content: intent.summaryPrompt },
457
+ ];
458
+
459
+ const response = await client.chat(summaryMessages); // No tools!
460
+
461
+ onEvent?.({ type: "content", content: response.content });
462
+ onEvent?.({ type: "done" });
463
+
464
+ return {
465
+ content: response.content,
466
+ toolCalls: intent.toolResult
467
+ ? [{ tool: intent.toolName!, args: intent.toolArgs!, result: intent.toolResult }]
468
+ : [],
469
+ };
470
+ }
471
+
472
+ // Fallback: existing LLM-driven tool selection
473
+ const tools = selectTools(userMessage);
98
474
  const systemPrompt = buildSystemPrompt(contextSummary);
99
475
 
100
476
  const messages: ChatMessage[] = [
@@ -178,7 +554,64 @@ export async function* streamProcessMessage(
178
554
  abortSignal?: AbortSignal,
179
555
  contextSummary?: string
180
556
  ): AsyncGenerator<AgentEvent> {
181
- const tools = getAllTools();
557
+ // Try intent router first
558
+ const intent = await detectAndExecuteIntent(userMessage, (event) => {
559
+ // Events are yielded by the caller, we collect them via callback
560
+ // But generators can't yield from callbacks, so we handle this differently
561
+ });
562
+
563
+ if (intent.handled) {
564
+ // Emit the tool events that happened during intent detection
565
+ if (intent.toolName) {
566
+ yield {
567
+ type: "tool_use",
568
+ tool: intent.toolName,
569
+ status: "start",
570
+ args: intent.toolArgs,
571
+ };
572
+
573
+ yield {
574
+ type: "tool_result",
575
+ tool: intent.toolName,
576
+ status: intent.toolResult?.success ? "complete" : "error",
577
+ result: intent.toolResult,
578
+ };
579
+ }
580
+
581
+ if (intent.summaryPrompt) {
582
+ if (abortSignal?.aborted) {
583
+ yield { type: "error", error: "Operation cancelled" };
584
+ return;
585
+ }
586
+
587
+ yield { type: "thinking" };
588
+
589
+ // Stream the LLM summary (no tools = streaming mode in ollama client)
590
+ const summaryMessages: ChatMessage[] = [
591
+ { role: "system", content: BASE_SYSTEM_PROMPT },
592
+ { role: "user", content: intent.summaryPrompt },
593
+ ];
594
+
595
+ for await (const event of client.streamChat(summaryMessages, undefined, abortSignal)) {
596
+ if (abortSignal?.aborted) {
597
+ yield { type: "error", error: "Operation cancelled" };
598
+ return;
599
+ }
600
+ if (event.type === "content" && event.content) {
601
+ yield { type: "content", content: event.content };
602
+ } else if (event.type === "error") {
603
+ yield { type: "error", error: event.error };
604
+ return;
605
+ }
606
+ }
607
+ }
608
+
609
+ yield { type: "done" };
610
+ return;
611
+ }
612
+
613
+ // Fallback: existing LLM-driven tool selection
614
+ const tools = selectTools(userMessage);
182
615
  const systemPrompt = buildSystemPrompt(contextSummary);
183
616
 
184
617
  const messages: ChatMessage[] = [
@@ -29,7 +29,7 @@ const DEFAULT_CONFIG: Config = {
29
29
  ollama: {
30
30
  baseUrl: "http://localhost:11434",
31
31
  model: "mistral:latest",
32
- contextLength: 8192,
32
+ contextLength: 4096,
33
33
  keepAlive: 1800, // 30 minutes
34
34
  },
35
35
  standards: {
@@ -1,5 +1,5 @@
1
1
  import { OllamaConfig } from "../config.js";
2
- import { Tool, ToolCall } from "../tools/types.js";
2
+ import type { Tool, ToolCall } from "../tools/types.js";
3
3
 
4
4
  export interface ChatMessage {
5
5
  role: "system" | "user" | "assistant" | "tool";
@@ -30,6 +30,65 @@ interface OllamaChatResponse {
30
30
  done: boolean;
31
31
  }
32
32
 
33
+ // Estimate token count for a string (rough: ~3 chars per token for mixed Korean/English)
34
+ function estimateTokens(text: string): number {
35
+ if (!text) return 0;
36
+ return Math.ceil(text.length / 3);
37
+ }
38
+
39
+ // Estimate tokens for tool definitions sent to Ollama
40
+ function estimateToolTokens(tools: Tool[]): number {
41
+ let total = 0;
42
+ for (const tool of tools) {
43
+ // name + description + JSON schema overhead
44
+ total += estimateTokens(tool.name) + estimateTokens(tool.description) + 20;
45
+ if (tool.parameters?.properties) {
46
+ for (const [key, prop] of Object.entries(tool.parameters.properties)) {
47
+ total += estimateTokens(key) + estimateTokens((prop as { description?: string }).description || "") + 10;
48
+ }
49
+ }
50
+ }
51
+ return total;
52
+ }
53
+
54
+ // Prune messages to fit within context window
55
+ function pruneMessages(
56
+ messages: Array<{ role: string; content: string }>,
57
+ maxContextTokens: number,
58
+ toolTokens: number
59
+ ): Array<{ role: string; content: string }> {
60
+ if (messages.length <= 2) return messages;
61
+
62
+ const responseReserve = 1000; // reserve tokens for model response
63
+ const safetyBuffer = 200;
64
+ const budget = maxContextTokens - toolTokens - responseReserve - safetyBuffer;
65
+
66
+ // Always preserve: first message (system) and last message (user)
67
+ const systemMsg = messages[0];
68
+ const lastMsg = messages[messages.length - 1];
69
+ const history = messages.slice(1, -1);
70
+
71
+ let usedTokens = estimateTokens(systemMsg.content) + estimateTokens(lastMsg.content);
72
+
73
+ // If even system + last message exceeds budget, truncate system prompt
74
+ if (usedTokens > budget) {
75
+ const maxSystemChars = Math.max(200, (budget - estimateTokens(lastMsg.content)) * 3);
76
+ systemMsg.content = systemMsg.content.slice(0, maxSystemChars);
77
+ return [systemMsg, lastMsg];
78
+ }
79
+
80
+ // Add messages from newest to oldest
81
+ const kept: Array<{ role: string; content: string }> = [];
82
+ for (let i = history.length - 1; i >= 0; i--) {
83
+ const msgTokens = estimateTokens(history[i].content) + 4; // 4 tokens overhead per message
84
+ if (usedTokens + msgTokens > budget) break;
85
+ kept.unshift(history[i]);
86
+ usedTokens += msgTokens;
87
+ }
88
+
89
+ return [systemMsg, ...kept, lastMsg];
90
+ }
91
+
33
92
  export class OllamaClient {
34
93
  private baseUrl: string;
35
94
  private model: string;
@@ -63,7 +122,11 @@ export class OllamaClient {
63
122
  messages: ChatMessage[],
64
123
  tools?: Tool[]
65
124
  ): Promise<ChatMessage> {
66
- const ollamaMessages = this.convertMessages(messages);
125
+ let ollamaMessages = this.convertMessages(messages);
126
+
127
+ // Prune messages to fit within context window
128
+ const toolTokens = tools?.length ? estimateToolTokens(tools) : 0;
129
+ ollamaMessages = pruneMessages(ollamaMessages, this.contextLength, toolTokens);
67
130
 
68
131
  const body: Record<string, unknown> = {
69
132
  model: this.model,
@@ -107,7 +170,11 @@ export class OllamaClient {
107
170
  tools?: Tool[],
108
171
  abortSignal?: AbortSignal
109
172
  ): AsyncGenerator<StreamEvent> {
110
- const ollamaMessages = this.convertMessages(messages);
173
+ let ollamaMessages = this.convertMessages(messages);
174
+
175
+ // Prune messages to fit within context window
176
+ const toolTokens = tools?.length ? estimateToolTokens(tools) : 0;
177
+ ollamaMessages = pruneMessages(ollamaMessages, this.contextLength, toolTokens);
111
178
 
112
179
  // Use non-streaming mode when tools are provided to avoid hallucination
113
180
  // (LLM often outputs fake results before tool calls in streaming mode)