vmcode-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/INSTALLATION_METHODS.md +181 -0
  2. package/LICENSE +21 -0
  3. package/README.md +199 -0
  4. package/bin/npm-wrapper.js +171 -0
  5. package/bin/rg +0 -0
  6. package/bin/rg.exe +0 -0
  7. package/config.yaml.example +159 -0
  8. package/package.json +42 -0
  9. package/requirements.txt +7 -0
  10. package/scripts/install.js +132 -0
  11. package/setup.bat +114 -0
  12. package/setup.sh +135 -0
  13. package/src/__init__.py +4 -0
  14. package/src/core/__init__.py +1 -0
  15. package/src/core/agentic.py +2342 -0
  16. package/src/core/chat_manager.py +1201 -0
  17. package/src/core/config_manager.py +269 -0
  18. package/src/core/init.py +161 -0
  19. package/src/core/sub_agent.py +174 -0
  20. package/src/exceptions.py +75 -0
  21. package/src/llm/__init__.py +1 -0
  22. package/src/llm/client.py +149 -0
  23. package/src/llm/config.py +445 -0
  24. package/src/llm/prompts.py +569 -0
  25. package/src/llm/providers.py +402 -0
  26. package/src/llm/token_tracker.py +220 -0
  27. package/src/ui/__init__.py +1 -0
  28. package/src/ui/banner.py +103 -0
  29. package/src/ui/commands.py +489 -0
  30. package/src/ui/displays.py +167 -0
  31. package/src/ui/main.py +351 -0
  32. package/src/ui/prompt_utils.py +162 -0
  33. package/src/utils/__init__.py +1 -0
  34. package/src/utils/editor.py +158 -0
  35. package/src/utils/gitignore_filter.py +149 -0
  36. package/src/utils/logger.py +254 -0
  37. package/src/utils/markdown.py +32 -0
  38. package/src/utils/settings.py +94 -0
  39. package/src/utils/tools/__init__.py +55 -0
  40. package/src/utils/tools/command_executor.py +217 -0
  41. package/src/utils/tools/create_file.py +143 -0
  42. package/src/utils/tools/definitions.py +193 -0
  43. package/src/utils/tools/directory.py +374 -0
  44. package/src/utils/tools/file_editor.py +345 -0
  45. package/src/utils/tools/file_helpers.py +109 -0
  46. package/src/utils/tools/file_reader.py +331 -0
  47. package/src/utils/tools/formatters.py +458 -0
  48. package/src/utils/tools/parallel_executor.py +195 -0
  49. package/src/utils/validation.py +117 -0
  50. package/src/utils/web_search.py +71 -0
  51. package/vmcode-proxy/.env.example +5 -0
  52. package/vmcode-proxy/README.md +235 -0
  53. package/vmcode-proxy/package-lock.json +947 -0
  54. package/vmcode-proxy/package.json +20 -0
  55. package/vmcode-proxy/server.js +248 -0
  56. package/vmcode-proxy/server.js.bak +157 -0
@@ -0,0 +1,1201 @@
1
+ """Chat state and server lifecycle management."""
2
+
3
+ import os
4
+ import json
5
+ import subprocess
6
+ import time
7
+ import requests
8
+ from typing import Optional, IO
9
+
10
+ from llm.client import LLMClient
11
+ from llm.config import get_providers, get_provider_config, reload_config
12
+ from llm.prompts import build_system_prompt
13
+ from pathlib import Path
14
+ from llm.token_tracker import TokenTracker
15
+ from utils.settings import server_settings, context_settings
16
+ from utils.logger import MarkdownConversationLogger
17
+ from core.config_manager import ConfigManager
18
+
19
+ # Token counting constants
20
+ MESSAGE_OVERHEAD_TOKENS = 4 # Approximate tokens for JSON structure: braces, quotes, colons, commas
21
+ CHAR_BASED_OVERHEAD = 20 # Character overhead for JSON structure in character-based estimation
22
+
23
+ class ChatManager:
24
+ """Manages chat state, messages, and provider switching."""
25
+
26
+ def __init__(self, compact_trigger_tokens: Optional[int] = None):
27
+ # Initialize client with provider from global config
28
+ self.client = LLMClient()
29
+ self.messages = []
30
+ self.server_process: Optional[subprocess.Popen] = None
31
+ self._log_file: Optional[IO] = None # Track llama_server log file handle
32
+ self.command_history = [] # Track executed commands to prevent repeats
33
+ self.approve_mode = "safe"
34
+ self.interaction_mode = "edit" # Default to edit mode
35
+ self.learning_mode = "balanced" # Default learning mode (for learn interaction mode)
36
+ self.plan_type = "feature" # Default plan type (for plan interaction mode)
37
+ self.token_tracker = TokenTracker()
38
+ self.context_token_estimate = 0
39
+ # In-session, memory-only task list (used in EDIT workflows)
40
+ self.task_list = []
41
+ self.task_list_title = None
42
+
43
+ # .gitignore filtering state
44
+ self._gitignore_spec = None
45
+ self._gitignore_mtime = None
46
+ self._repo_root = None
47
+
48
+ # Custom compaction threshold (overrides global context_settings if set)
49
+ self._compact_trigger_tokens = compact_trigger_tokens
50
+
51
+ # Conversation logging
52
+ self.markdown_logger: Optional[MarkdownConversationLogger] = None
53
+ if context_settings.log_conversations:
54
+ self.markdown_logger = MarkdownConversationLogger(
55
+ conversations_dir=context_settings.conversations_dir
56
+ )
57
+
58
+ # Pre-tool planning toggle (loaded from config)
59
+ config_manager = ConfigManager()
60
+ self.pre_tool_planning_enabled = config_manager.get_pre_tool_planning()
61
+
62
+ self._init_messages(reset_totals=True)
63
+
64
+ def _init_messages(self, reset_totals: bool = True):
65
+ """Initialize message history with system prompt and agents.md as initial exchange."""
66
+ # Start new conversation logging session
67
+ if self.markdown_logger:
68
+ self.markdown_logger.start_session()
69
+
70
+ # Start with system prompt only (uses current self.interaction_mode)
71
+ self.messages = [{"role": "system", "content": self._build_system_prompt()}]
72
+
73
+ # Add agents.md as initial user/assistant exchange (only if it exists in cwd)
74
+ user_msg, assistant_msg = self._load_agents_md()
75
+ if user_msg and assistant_msg:
76
+ self.messages.append({"role": "user", "content": user_msg})
77
+ self.messages.append({"role": "assistant", "content": assistant_msg})
78
+
79
+ # Log initial messages
80
+ if self.markdown_logger:
81
+ for msg in self.messages:
82
+ self.markdown_logger.log_message(msg)
83
+
84
+ # Reset session totals if requested (keep totals across /clear)
85
+ # For a fresh conversation, cumulative totals start at 0 (no API calls made yet)
86
+ if reset_totals:
87
+ self.token_tracker.reset(prompt_tokens=0, completion_tokens=0)
88
+
89
+ # Always reset conversation tokens (resets on /new and fresh starts)
90
+ self.token_tracker.reset_conversation()
91
+
92
+ # Initialize context tokens with actual message count (including tools if enabled)
93
+ self._update_context_tokens()
94
+ self.context_token_estimate = self.token_tracker.current_context_tokens
95
+
96
+ # NOTE: interaction_mode is NOT reset - it persists across /clear
97
+
98
+ def _build_system_prompt(self) -> str:
99
+ """Build system prompt with mode-specific rules."""
100
+ # Build prompt using modular composition with optional learn_submode, plan_type, or pre_tool_planning
101
+ if self.interaction_mode == "learn":
102
+ return build_system_prompt(self.interaction_mode, self.learning_mode, pre_tool_planning_enabled=self.pre_tool_planning_enabled)
103
+ elif self.interaction_mode == "plan":
104
+ return build_system_prompt(self.interaction_mode, plan_type=self.plan_type, pre_tool_planning_enabled=self.pre_tool_planning_enabled)
105
+ else:
106
+ return build_system_prompt(self.interaction_mode, pre_tool_planning_enabled=self.pre_tool_planning_enabled)
107
+
108
+ def update_system_prompt(self):
109
+ """Rebuild system prompt after mode change."""
110
+ if not self.messages:
111
+ raise RuntimeError("Cannot update system prompt: messages array is empty")
112
+
113
+ if self.messages[0]["role"] != "system":
114
+ raise RuntimeError(f"Cannot update system prompt: messages[0] has role '{self.messages[0]['role']}', expected 'system'")
115
+
116
+ # Update the system message with current mode
117
+ self.messages[0]["content"] = self._build_system_prompt()
118
+ self._update_context_tokens()
119
+
120
+ def _load_agents_md(self) -> tuple[str, str]:
121
+ """Load agents.md content and prepare user/assistant exchange.
122
+
123
+ Returns:
124
+ tuple: (user_message, assistant_message)
125
+ """
126
+ # Check for agents.md in current working directory (user's project)
127
+ agents_path = Path.cwd() / "agents.md"
128
+
129
+ if agents_path.exists():
130
+ map_content = agents_path.read_text(encoding="utf-8").strip()
131
+ user_msg = (
132
+ "Here is the codebase map for this project. "
133
+ "This provides an overview of the repository structure and file purposes. "
134
+ "Use this as a reference when exploring the codebase.\n\n"
135
+ f"## Codebase Map (auto-generated from agents.md)\n\n{map_content}"
136
+ )
137
+ assistant_msg = (
138
+ "I've received the codebase map. I'll use this as a reference when "
139
+ "exploring the repository, but I'll always verify current state by "
140
+ "reading files and searching the codebase before making changes."
141
+ )
142
+ else:
143
+ # No codebase map available - skip entirely
144
+ user_msg = ""
145
+ assistant_msg = ""
146
+
147
+ return user_msg, assistant_msg
148
+
149
+ def _update_context_tokens(self, tools=None):
150
+ """Recount and update current_context_tokens after message changes.
151
+
152
+ Args:
153
+ tools: Optional list of tool definitions to include in token count.
154
+ If None, uses current mode's tools (if enabled).
155
+ """
156
+ message_tokens = self._count_tokens(self.messages)
157
+
158
+ # Count tool tokens if tools are provided or enabled
159
+ if tools is None:
160
+ from llm.config import TOOLS_ENABLED
161
+ if not TOOLS_ENABLED:
162
+ self.token_tracker.set_context_tokens(message_tokens)
163
+ self.context_token_estimate = message_tokens
164
+ return
165
+ else:
166
+ from utils.tools import _tools_for_mode
167
+ tools = _tools_for_mode(self.interaction_mode)
168
+
169
+ if tools:
170
+ # Use character-based approximation for Anthropic (tiktoken doesn't support Claude)
171
+ if self.client.provider == "anthropic":
172
+ tools_json = json.dumps(tools)
173
+ tool_tokens = len(tools_json) // 4
174
+ else:
175
+ try:
176
+ import tiktoken
177
+ model = getattr(self.client, "model", "") or ""
178
+ try:
179
+ enc = tiktoken.encoding_for_model(model)
180
+ except Exception:
181
+ enc = tiktoken.get_encoding("cl100k_base")
182
+
183
+ # Encode tools list as JSON (which is how it's sent to the API)
184
+ tools_json = json.dumps(tools)
185
+ tool_tokens = len(enc.encode(tools_json))
186
+ except Exception:
187
+ # Fallback: character-based approximation
188
+ tools_json = json.dumps(tools)
189
+ tool_tokens = len(tools_json) // 4
190
+
191
+ total_tokens = message_tokens + tool_tokens
192
+ else:
193
+ total_tokens = message_tokens
194
+
195
+ self.token_tracker.set_context_tokens(total_tokens)
196
+ self.context_token_estimate = total_tokens
197
+
198
+ def _collect_message_text(self, msg) -> str:
199
+ """Extract all text fields from a message as a single string.
200
+
201
+ Collects role, content, tool_calls (id, type, function name/args),
202
+ and tool_call_id fields. Used by token counting methods.
203
+
204
+ Args:
205
+ msg: Message dict
206
+
207
+ Returns:
208
+ Concatenated string of all message text fields
209
+ """
210
+ parts = []
211
+
212
+ # Role field
213
+ role = msg.get('role', '')
214
+ if role:
215
+ parts.append(role)
216
+
217
+ # Content
218
+ content = msg.get('content', '')
219
+ if content:
220
+ parts.append(str(content))
221
+
222
+ # Tool calls (assistant messages)
223
+ if msg.get('tool_calls'):
224
+ for tc in msg['tool_calls']:
225
+ # id field (e.g., "call_abc123")
226
+ tc_id = tc.get('id', '')
227
+ if tc_id:
228
+ parts.append(tc_id)
229
+
230
+ # type field (usually "function")
231
+ tc_type = tc.get('type', 'function')
232
+ parts.append(tc_type)
233
+
234
+ # function object
235
+ fn = tc.get('function', {})
236
+ if fn:
237
+ fn_name = fn.get('name', '')
238
+ if fn_name:
239
+ parts.append(fn_name)
240
+
241
+ fn_args = fn.get('arguments', '{}')
242
+ parts.append(fn_args)
243
+
244
+ # Tool call ID (tool messages)
245
+ if msg.get('role') == 'tool' and msg.get('tool_call_id'):
246
+ parts.append(msg['tool_call_id'])
247
+
248
+ return ''.join(p or '' for p in parts)
249
+
250
+ def _count_tokens(self, messages) -> int:
251
+ """Count tokens accurately using tiktoken for OpenAI, character-based for Anthropic.
252
+
253
+ Counts everything the AI receives:
254
+ - All message types: user, assistant, system, tool
255
+ - All fields: role, content, tool_calls (id, type, function, name, arguments)
256
+ - Tool messages: tool_call_id + content
257
+
258
+ Args:
259
+ messages: List of messages to count tokens for
260
+
261
+ Returns:
262
+ int: Estimated token count
263
+ """
264
+ # Use character-based approximation for Anthropic (tiktoken doesn't support Claude)
265
+ if self.client.provider == "anthropic":
266
+ return self._count_tokens_char_based(messages)
267
+
268
+ try:
269
+ import tiktoken
270
+ model = getattr(self.client, "model", "") or ""
271
+ try:
272
+ enc = tiktoken.encoding_for_model(model)
273
+ except Exception:
274
+ enc = tiktoken.get_encoding("cl100k_base")
275
+
276
+ # Collect text from all messages and encode
277
+ total = 0
278
+ for msg in messages:
279
+ text = self._collect_message_text(msg)
280
+ total += len(enc.encode(text))
281
+ total += MESSAGE_OVERHEAD_TOKENS
282
+
283
+ return total
284
+
285
+ except Exception:
286
+ # Fallback to character-based estimation
287
+ return self._count_tokens_char_based(messages)
288
+
289
+ def _count_tokens_char_based(self, messages) -> int:
290
+ """Count tokens using character-based approximation (for Anthropic).
291
+
292
+ Uses ~4 characters per token as a rough estimate.
293
+
294
+ Args:
295
+ messages: List of messages to count tokens for
296
+
297
+ Returns:
298
+ int: Estimated token count
299
+ """
300
+ total = 0
301
+ for msg in messages:
302
+ text = self._collect_message_text(msg)
303
+ total += (len(text) + CHAR_BASED_OVERHEAD) // 4
304
+
305
+ return total
306
+
307
+
308
+ def _generate_compact_summary(self, messages) -> str:
309
+ """Generate a comprehensive summary of messages.
310
+
311
+ Captures:
312
+ - User questions asked
313
+ - Tool calls performed (files read, edits, searches)
314
+ - Key decisions and changes
315
+
316
+ Args:
317
+ messages: List of messages to summarize
318
+
319
+ Returns:
320
+ str: Structured summary preserving context
321
+ """
322
+ # Extract user questions
323
+ user_queries = []
324
+ for m in messages:
325
+ if m.get('role') == 'user':
326
+ content = m.get('content', '')
327
+ if content and not content.startswith("The codebase map"):
328
+ user_queries.append(content[:200]) # Truncate long queries
329
+
330
+ # Extract tool calls
331
+ tool_calls = []
332
+ for m in messages:
333
+ if m.get('tool_calls'):
334
+ for tc in m['tool_calls']:
335
+ fn = tc['function']
336
+ name = fn.get('name', '')
337
+ args = fn.get('arguments', '')
338
+ tool_calls.append(f"- {name}: {args[:100]}")
339
+ elif m.get('role') == 'tool':
340
+ # Extract tool result metadata
341
+ content = m.get('content', '')
342
+ if 'exit_code=' in content:
343
+ lines = content.split('\n')[:5] # First 5 lines for context
344
+ tool_calls.append(f"Result: {'; '.join(lines[:2])}")
345
+
346
+ # Build summary prompt
347
+ summary_prompt = f"""Summarize the following conversation context.
348
+
349
+ User questions:
350
+ {chr(10).join(f'- {q}' for q in user_queries) if user_queries else 'None'}
351
+
352
+ Tool operations performed:
353
+ {chr(10).join(tool_calls) if tool_calls else 'None'}
354
+
355
+ Focus on:
356
+ 1. What problem was being solved
357
+ 2. What files were read or modified
358
+ 3. What searches were performed
359
+ 4. Key code changes or decisions made
360
+ 5. Current state/progress
361
+
362
+ Provide a concise summary (2-4 paragraphs) that captures all essential context for continuing the work."""
363
+
364
+ return summary_prompt
365
+
366
+ # ===== Tool Result Compaction =====
367
+
368
+ def _find_tool_blocks(self):
369
+ """Find all tool-result blocks in message history.
370
+
371
+ Returns:
372
+ list: List of block dicts with keys: user_idx, start, end, tool_calls, tool_results
373
+ """
374
+ blocks = []
375
+ i = 0
376
+
377
+ while i < len(self.messages):
378
+ msg = self.messages[i]
379
+
380
+ # Look for assistant message with tool_calls
381
+ if msg.get('role') == 'assistant' and msg.get('tool_calls'):
382
+
383
+ # Find user question before this
384
+ user_idx = i - 1
385
+ while user_idx >= 0 and self.messages[user_idx].get('role') != 'user':
386
+ user_idx -= 1
387
+
388
+ if user_idx < 0:
389
+ i += 1
390
+ continue
391
+
392
+ # Collect all tool results
393
+ tool_results = []
394
+ j = i + 1
395
+ while j < len(self.messages) and self.messages[j].get('role') == 'tool':
396
+ tool_results.append(self.messages[j].get('content', ''))
397
+ j += 1
398
+
399
+ # Check if next message is assistant with NO tools (final answer)
400
+ if j < len(self.messages):
401
+ next_msg = self.messages[j]
402
+ if (next_msg.get('role') == 'assistant' and
403
+ not next_msg.get('tool_calls')):
404
+ # This is a complete block!
405
+ blocks.append({
406
+ 'user_idx': user_idx,
407
+ 'start': i,
408
+ 'end': j,
409
+ 'tool_calls': msg.get('tool_calls', []),
410
+ 'tool_results': tool_results
411
+ })
412
+
413
+ i = j + 1
414
+ else:
415
+ i += 1
416
+
417
+ return blocks
418
+
419
+ def _get_tool_result_messages(self, start_idx, end_idx):
420
+ """Extract only tool result messages between two indices.
421
+
422
+ Args:
423
+ start_idx: Starting index (exclusive)
424
+ end_idx: Ending index (exclusive)
425
+
426
+ Returns:
427
+ list: Tool result messages (role='tool') between start_idx and end_idx
428
+ """
429
+ tool_results = []
430
+ for i in range(start_idx + 1, end_idx):
431
+ if self.messages[i].get('role') == 'tool':
432
+ tool_results.append(self.messages[i])
433
+ return tool_results
434
+
435
+ def _extract_metadata_from_result(self, tool_result, key):
436
+ """Parse metadata like matches_found, lines_read, etc. from tool result.
437
+
438
+ Args:
439
+ tool_result: Tool result content string
440
+ key: Metadata key to extract (e.g., "matches_found", "lines_read")
441
+
442
+ Returns:
443
+ int or None: Extracted value or None if not found
444
+ """
445
+ if not isinstance(tool_result, str):
446
+ return None
447
+ for line in tool_result.split('\n'):
448
+ if line.startswith(f'{key}='):
449
+ try:
450
+ return int(line.split('=')[1].split()[0])
451
+ except (ValueError, IndexError):
452
+ return None
453
+ return None
454
+
455
+ def _extract_exit_code(self, tool_result):
456
+ """Parse exit_code from tool result.
457
+
458
+ Args:
459
+ tool_result: Tool result content string
460
+
461
+ Returns:
462
+ int or None: Exit code or None if not found
463
+ """
464
+ if not isinstance(tool_result, str):
465
+ return None
466
+ first_line = tool_result.split('\n')[0] if tool_result else ""
467
+ if first_line.startswith('exit_code='):
468
+ try:
469
+ return int(first_line.split('=')[1].split()[0])
470
+ except (ValueError, IndexError):
471
+ return None
472
+ return None
473
+
474
+ def _summarize_tool_call(self, tool_call, tool_result):
475
+ """Extract key info from a single tool call.
476
+
477
+ Args:
478
+ tool_call: Tool call dict from message
479
+ tool_result: Tool result content string
480
+
481
+ Returns:
482
+ str: Summary string for this tool
483
+ """
484
+ try:
485
+ import json
486
+ fn_name = tool_call['function']['name']
487
+ args = json.loads(tool_call['function']['arguments'])
488
+ except (json.JSONDecodeError, KeyError):
489
+ return "Used a tool"
490
+
491
+ if fn_name == "execute_command":
492
+ cmd = args.get('command', '')
493
+ exit_code = self._extract_exit_code(tool_result)
494
+ matches = self._extract_metadata_from_result(tool_result, 'matches_found')
495
+
496
+ if exit_code == 0:
497
+ if matches is not None:
498
+ return f"Searched for '{cmd[:50]}...' (found {matches} matches)"
499
+ else:
500
+ return f"Searched: '{cmd[:50]}...'"
501
+ else:
502
+ return f"Search failed: '{cmd[:30]}...'"
503
+
504
+ elif fn_name == "read_file":
505
+ path = args.get('path', '')
506
+ lines = self._extract_metadata_from_result(tool_result, 'lines_read')
507
+ start_line = self._extract_metadata_from_result(tool_result, 'start_line')
508
+
509
+ if lines is not None:
510
+ if start_line is not None and start_line > 1:
511
+ end_line = start_line + lines - 1
512
+ return f"Read {path} (lines {start_line}-{end_line})"
513
+ else:
514
+ return f"Read {path} ({lines} lines)"
515
+ else:
516
+ return f"Read {path}"
517
+
518
+ elif fn_name == "list_directory":
519
+ path = args.get('path', '.')
520
+ items = self._extract_metadata_from_result(tool_result, 'items_count')
521
+ recursive = args.get('recursive', False)
522
+
523
+ action = "Listed recursively" if recursive else "Listed"
524
+ if items is not None:
525
+ return f"{action} {path} ({items} items)"
526
+ return f"{action} {path}"
527
+
528
+ elif fn_name == "edit_file":
529
+ path = args.get('path', '')
530
+ search = args.get('search', '')
531
+ search_preview = search[:30] + "..." if len(search) > 30 else search
532
+ return f"Edited {path} (replaced '{search_preview}')"
533
+
534
+ elif fn_name == "web_search":
535
+ query = args.get('query', '')
536
+ results = self._extract_metadata_from_result(tool_result, 'results_found')
537
+ if results is not None:
538
+ return f"Searched web for '{query[:40]}...' ({results} results)"
539
+ return f"Searched web: '{query[:40]}...'"
540
+
541
+ return f"Used {fn_name}"
542
+
543
+ def _generate_tool_block_summary(self, tool_calls, tool_results):
544
+ """Generate a single summary line for all tools in a block.
545
+
546
+ Args:
547
+ tool_calls: List of tool call dicts
548
+ tool_results: List of tool result strings
549
+
550
+ Returns:
551
+ str: Human-readable summary
552
+ """
553
+ # Group tools by type for better readability
554
+ searches = []
555
+ reads = []
556
+ lists = []
557
+ edits = []
558
+ web = []
559
+ failed = []
560
+
561
+ for i, tool_call in enumerate(tool_calls):
562
+ result = tool_results[i] if i < len(tool_results) else ""
563
+ summary = self._summarize_tool_call(tool_call, result)
564
+
565
+ if "failed" in summary.lower():
566
+ failed.append(summary)
567
+ elif "searched" in summary.lower() and "web" not in summary.lower():
568
+ searches.append(summary)
569
+ elif "read" in summary.lower():
570
+ reads.append(summary)
571
+ elif "listed" in summary.lower():
572
+ lists.append(summary)
573
+ elif "edited" in summary.lower():
574
+ edits.append(summary)
575
+ elif "web" in summary.lower():
576
+ web.append(summary)
577
+
578
+ # Build human-readable summary
579
+ parts = []
580
+
581
+ if searches:
582
+ count = len(searches)
583
+ if count == 1:
584
+ parts.append(searches[0])
585
+ else:
586
+ parts.append(f"performed {count} searches")
587
+
588
+ if reads:
589
+ if len(reads) == 1:
590
+ parts.append(reads[0])
591
+ else:
592
+ parts.append(f"read {len(reads)} files")
593
+
594
+ if lists:
595
+ parts.append(lists[0] if len(lists) == 1 else "listed directories")
596
+
597
+ if edits:
598
+ parts.append(edits[0] if len(edits) == 1 else f"made {len(edits)} edits")
599
+
600
+ if web:
601
+ parts.append(web[0] if len(web) == 1 else "performed web searches")
602
+
603
+ if failed:
604
+ parts.append(f"{len(failed)} tool(s) failed")
605
+
606
+ if not parts:
607
+ return "Used tools for exploration"
608
+
609
+ # Join with natural language
610
+ if len(parts) <= 2:
611
+ return " and ".join(parts) + "."
612
+ else:
613
+ first = ", ".join(parts[:-1])
614
+ return f"{first}, and {parts[-1]}."
615
+
616
+ def compact_tool_results(self):
617
+ """Replace completed tool-result blocks with summaries.
618
+
619
+ Runs after each completed tool sequence to keep context lean
620
+ without using AI for summarization.
621
+
622
+ This is called after the LLM produces a final answer with no more tool calls.
623
+ """
624
+ if not context_settings.tool_compaction.enable_per_message_compaction:
625
+ return
626
+
627
+ # Safety: Don't compact if very few messages
628
+ if len(self.messages) < 6: # Minimum: user+assistant+tool+assistant+user+assistant
629
+ return
630
+
631
+ # Find tool-result blocks
632
+ blocks = self._find_tool_blocks()
633
+
634
+ if not blocks:
635
+ return
636
+
637
+ # Keep recent N blocks intact
638
+ keep_verbatim = blocks[-context_settings.tool_compaction.keep_recent_tool_blocks:]
639
+ blocks_to_compact = blocks[:-context_settings.tool_compaction.keep_recent_tool_blocks]
640
+
641
+ if not blocks_to_compact:
642
+ return
643
+
644
+ # Track token counts before
645
+ tokens_before = self._count_tokens(self.messages)
646
+
647
+ # Replace old blocks with summaries
648
+ new_messages = []
649
+ processed_indices = set()
650
+
651
+ for i, msg in enumerate(self.messages):
652
+ if i in processed_indices:
653
+ continue # Skip messages that were compacted
654
+
655
+ # Check if this is start of a block to compact
656
+ block_start = next((b for b in blocks_to_compact if b['start'] == i), None)
657
+
658
+ if block_start:
659
+ # Check if any tool in this block failed
660
+ skip_compaction = False
661
+ if not context_settings.tool_compaction.compact_failed_tools:
662
+ for tool_result in block_start['tool_results']:
663
+ exit_code = self._extract_exit_code(tool_result)
664
+ if exit_code is not None and exit_code != 0:
665
+ skip_compaction = True
666
+ break
667
+
668
+ if skip_compaction:
669
+ # Keep this block as-is
670
+ for idx in range(block_start['user_idx'], block_start['end'] + 1):
671
+ new_messages.append(self.messages[idx])
672
+ processed_indices.add(idx)
673
+ continue
674
+
675
+ # Generate summary
676
+ summary = self._generate_tool_block_summary(
677
+ block_start['tool_calls'],
678
+ block_start['tool_results']
679
+ )
680
+
681
+ # Add user question with summary appended
682
+ user_msg = self.messages[block_start['user_idx']].copy()
683
+ user_msg['content'] = user_msg['content'] + f"\n\n[Context: {summary}]"
684
+ new_messages.append(user_msg)
685
+
686
+ # Add final assistant answer
687
+ new_messages.append(self.messages[block_start['end']])
688
+
689
+ # Mark all indices as processed
690
+ for idx in range(block_start['start'], block_start['end'] + 1):
691
+ processed_indices.add(idx)
692
+ else:
693
+ # Keep this message as-is
694
+ new_messages.append(msg)
695
+
696
+ self.messages = new_messages
697
+ self._update_context_tokens()
698
+
699
+ # Track token counts after
700
+ tokens_after = self._count_tokens(self.messages)
701
+ reduction = tokens_before - tokens_after
702
+
703
+ # ===== AI-Based History Compaction =====
704
+
705
+ def compact_history(self, console=None, trigger="manual", aggressive=False):
706
+ """Compact chat history while preserving recent context.
707
+
708
+ Strategy:
709
+ 1. Keep last user message verbatim
710
+ 2. Keep assistant tool_calls message (if present) for context
711
+ 3. Keep last assistant response (without tool calls) verbatim
712
+ 4. Summarize everything prior AND all tool result messages
713
+
714
+ Aggressive mode:
715
+ - Pre-compacts recent tool results first (for older blocks)
716
+ - Also summarizes tool interactions between last user and final answer
717
+
718
+ Args:
719
+ console: Console for notifications (None for silent auto-compact)
720
+ trigger: "manual" or "auto"
721
+ aggressive: If True, also compact recent tool results aggressively
722
+
723
+ Returns:
724
+ dict with compaction stats or None
725
+ """
726
+ if len(self.messages) < 10: # Need enough history
727
+ return None
728
+
729
+ # In aggressive mode, pre-compact tool results first (for older blocks)
730
+ if aggressive and trigger == "manual":
731
+ # Temporarily reduce keep_recent_tool_blocks to 1 for aggressive compaction
732
+ original_keep = context_settings.tool_compaction.keep_recent_tool_blocks
733
+ context_settings.tool_compaction.keep_recent_tool_blocks = 1
734
+ self.compact_tool_results()
735
+ context_settings.tool_compaction.keep_recent_tool_blocks = original_keep
736
+
737
+ # Find the last user message (start from end, skip system/tool messages)
738
+ last_user_idx = None
739
+ for i in range(len(self.messages) - 1, -1, -1):
740
+ role = self.messages[i].get('role')
741
+ # Look for user message that's not the codebase map
742
+ if role == 'user' and not self.messages[i].get('tool_calls'):
743
+ content = self.messages[i].get('content', '')
744
+ if content and not content.startswith("The codebase map"):
745
+ last_user_idx = i
746
+ break
747
+
748
+ if last_user_idx is None or last_user_idx < 3:
749
+ return None # Not enough history to compact
750
+
751
+ # Find the last assistant message WITHOUT tool calls (final answer)
752
+ last_assistant_without_tools_idx = None
753
+ for i in range(len(self.messages) - 1, -1, -1):
754
+ msg = self.messages[i]
755
+ if msg.get('role') == 'assistant' and not msg.get('tool_calls'):
756
+ # This is a final answer
757
+ last_assistant_without_tools_idx = i
758
+ break
759
+
760
+ if last_assistant_without_tools_idx is None:
761
+ return None # No final answer found
762
+
763
+ # Determine what to keep vs summarize
764
+ # We always keep: system prompt, last user message, assistant tool_calls (if present), last assistant answer
765
+ # We summarize: everything between system prompt and last user message,
766
+ # AND all tool result messages (but not the tool_calls message)
767
+
768
+ # Case 1: Last assistant answer is directly after last user message
769
+ # (no tools were called)
770
+ if last_assistant_without_tools_idx == last_user_idx + 1:
771
+ # Original behavior: keep from last_user_idx, summarize before
772
+ messages_to_keep = self.messages[last_user_idx:]
773
+ messages_to_summarize = self.messages[1:last_user_idx]
774
+ else:
775
+ # Case 2: There are tool interactions between last user and last assistant
776
+ # Keep: last user message + assistant tool_calls + last assistant answer
777
+ # Summarize: everything before last user + all tool results
778
+ messages_to_keep = [self.messages[last_user_idx]] # User message
779
+
780
+ # Find the assistant message with tool_calls (should be right after user)
781
+ # This preserves context about what tools were executed
782
+ for i in range(last_user_idx + 1, last_assistant_without_tools_idx):
783
+ if self.messages[i].get('role') == 'assistant' and self.messages[i].get('tool_calls'):
784
+ messages_to_keep.append(self.messages[i])
785
+ break
786
+
787
+ messages_to_keep.append(self.messages[last_assistant_without_tools_idx]) # Final answer
788
+
789
+ # Summarize: everything before last user + all tool result messages
790
+ messages_to_summarize = (
791
+ self.messages[1:last_user_idx] + # History before last user
792
+ self._get_tool_result_messages(last_user_idx, last_assistant_without_tools_idx) # Tool results only
793
+ )
794
+
795
+ if not messages_to_summarize:
796
+ return None
797
+
798
+ # Generate comprehensive summary using extracted context
799
+ summary_prompt_content = self._generate_compact_summary(messages_to_summarize)
800
+
801
+ # Track token counts before (total tokens including system prompt + messages + tools)
802
+ self._update_context_tokens()
803
+ tokens_before = self.token_tracker.current_context_tokens
804
+
805
+ # Call LLM to generate summary
806
+ summary_prompt = [
807
+ {
808
+ "role": "system",
809
+ "content": (
810
+ "You are a helpful assistant that summarizes conversation context. "
811
+ "Provide clear, concise summaries that capture essential information for continuing work."
812
+ ),
813
+ },
814
+ {
815
+ "role": "user",
816
+ "content": summary_prompt_content,
817
+ },
818
+ ]
819
+
820
+ response = self.client.chat_completion(summary_prompt, stream=False, tools=None)
821
+ if isinstance(response, str):
822
+ if console and trigger == "manual":
823
+ console.print(f"Compaction failed: {response}", style="red")
824
+ return None
825
+
826
+ try:
827
+ summary_text = response["choices"][0]["message"].get("content", "").strip()
828
+ except (KeyError, IndexError, TypeError):
829
+ summary_text = ""
830
+
831
+ if not summary_text:
832
+ if console and trigger == "manual":
833
+ console.print("Compaction failed: empty summary.", style="red")
834
+ return None
835
+
836
+ # Build new history: system prompt + summary + recent messages
837
+ summary_message = {
838
+ "role": "system",
839
+ "content": f"Previous conversation context (summarized):\n\n{summary_text}"
840
+ }
841
+
842
+ self.messages = [self.messages[0]] + [summary_message] + messages_to_keep
843
+
844
+ # Update token tracking accurately (include system prompt + messages + tools)
845
+ self._update_context_tokens()
846
+ tokens_after = self.token_tracker.current_context_tokens
847
+ summary_usage = response.get("usage", {})
848
+
849
+ # Add summary generation tokens to cumulative usage
850
+ self.token_tracker.add_usage(summary_usage)
851
+
852
+ # Update context estimate (keeps cumulative API usage intact)
853
+ self.context_token_estimate = tokens_after
854
+
855
+ # Notify only for manual trigger
856
+ if console and trigger == "manual":
857
+ reduction = tokens_before - tokens_after
858
+ console.print(
859
+ f"[dim]Compacted history: {tokens_before:,} → {tokens_after:,} tokens "
860
+ f"(-{reduction:,} / {-100 * reduction // (tokens_before or 1)}%)[/dim]"
861
+ )
862
+
863
+ return {
864
+ "trigger": trigger,
865
+ "before_tokens": tokens_before,
866
+ "after_tokens": tokens_after,
867
+ "summary": summary_text,
868
+ }
869
+
870
+ def maybe_auto_compact(self, console=None):
871
+ """Check token count and auto-compact if over threshold.
872
+
873
+ Args:
874
+ console: None for silent operation (no user notification)
875
+ """
876
+ # Check against total context tokens (system prompt + messages + tools)
877
+ self._update_context_tokens()
878
+ total_tokens = self.token_tracker.current_context_tokens
879
+
880
+ # Use custom threshold if set, otherwise use global setting
881
+ trigger_threshold = (
882
+ self._compact_trigger_tokens
883
+ if self._compact_trigger_tokens is not None
884
+ else context_settings.compact_trigger_tokens
885
+ )
886
+
887
+ if total_tokens >= trigger_threshold:
888
+ # Auto-compact silently (no notification)
889
+ self.compact_history(console=None, trigger="auto")
890
+
891
+ def get_gitignore_spec(self, repo_root: Path):
892
+ """Get cached or load PathSpec object for .gitignore filtering.
893
+
894
+ Caches the spec and reloads if .gitignore is modified.
895
+
896
+ Args:
897
+ repo_root: Repository root directory
898
+
899
+ Returns:
900
+ pathspec.PathSpec or None if .gitignore doesn't exist
901
+ """
902
+ gitignore_path = repo_root / ".gitignore"
903
+
904
+ # Check if we need to reload
905
+ current_mtime = None
906
+ if gitignore_path.exists():
907
+ current_mtime = gitignore_path.stat().st_mtime
908
+
909
+ # Reload if: (1) not initialized, (2) repo changed, (3) file modified
910
+ if (
911
+ self._gitignore_spec is None
912
+ or self._repo_root != repo_root
913
+ or current_mtime != self._gitignore_mtime
914
+ ):
915
+ from utils.gitignore_filter import load_gitignore_spec
916
+
917
+ self._repo_root = repo_root
918
+ self._gitignore_mtime = current_mtime
919
+ self._gitignore_spec = load_gitignore_spec(repo_root)
920
+
921
+ return self._gitignore_spec
922
+
923
+ def switch_provider(self, provider_name):
924
+ """Switch LLM provider.
925
+
926
+ Args:
927
+ provider_name: Provider name ('local' or 'openrouter')
928
+
929
+ Returns:
930
+ str: Result message
931
+ """
932
+ providers = get_providers()
933
+ if provider_name not in providers:
934
+ return f"Invalid provider. Use /provider to list. Available: {', '.join(providers)}"
935
+
936
+ previous_provider = self.client.provider
937
+
938
+ # Terminate server if switching away from local
939
+ if previous_provider == "local" and provider_name != "local":
940
+ self.cleanup()
941
+
942
+ if self.client.switch_provider(provider_name):
943
+ self._init_messages()
944
+ if provider_name == "local":
945
+ server = self.start_server_if_needed()
946
+ if not server:
947
+ # Failed to start server - revert
948
+ self.client.switch_provider(previous_provider)
949
+ self._init_messages()
950
+ return f"Failed to start local server. Reverted to {previous_provider} provider."
951
+ self.server_process = server
952
+ return f"Switched to {provider_name} provider (server ready)."
953
+ return f"Switched to {provider_name} provider."
954
+ return "Provider switch failed."
955
+
956
+ def reload_config(self):
957
+ """Reload configuration from disk and update client.
958
+
959
+ This should be called after any config change (provider, model, api key).
960
+ """
961
+ reload_config()
962
+ self.client.sync_provider_from_config()
963
+
964
+ # ===== Config Methods (for agent use) =====
965
+
966
+ def set_provider(self, provider_name: str) -> str:
967
+ """Set provider for current session (agent-accessible).
968
+
969
+ Args:
970
+ provider_name: Provider name to switch to.
971
+
972
+ Returns:
973
+ str: Result message.
974
+ """
975
+ return self.switch_provider(provider_name)
976
+
977
+ def start_server_if_needed(self):
978
+ """Start local server if using local provider and not already running.
979
+
980
+ Returns:
981
+ subprocess.Popen: Server process or None
982
+ """
983
+ if self.client.provider == "local" and not self.server_process:
984
+ return self._start_local_server()
985
+ return None
986
+
987
+ def _start_local_server(self):
988
+ """Start llama-server process and wait for health check.
989
+
990
+ Returns:
991
+ subprocess.Popen: Server process or None if failed
992
+ """
993
+ from llm.config import get_provider_config, _CONFIG
994
+
995
+ local_config = get_provider_config("local")
996
+ server_path = _CONFIG.get("LOCAL_SERVER_PATH", local_config["config_keys"]["LOCAL_SERVER_PATH"])
997
+ model_path = local_config.get("model", "")
998
+ host = local_config["extra"]["host"]
999
+ port = local_config["extra"]["port"]
1000
+
1001
+ args = [
1002
+ server_path,
1003
+ "-m", model_path,
1004
+ "-ngl", str(server_settings.ngl_layers),
1005
+ "--split-mode", "none",
1006
+ "--ctx-size", str(server_settings.ctx_size),
1007
+ "--n-predict", str(server_settings.n_predict),
1008
+ "--rope-scale", str(server_settings.rope_scale),
1009
+ "--host", host,
1010
+ "--port", str(port),
1011
+ "--jinja",
1012
+ ]
1013
+
1014
+ # Restrict to RTX 5070 Ti only (GPU 0)
1015
+ env = os.environ.copy()
1016
+ env["CUDA_VISIBLE_DEVICES"] = "0"
1017
+
1018
+ # Log stderr to file for debugging
1019
+ log_path = Path(__file__).resolve().parents[2] / "llama_server.log"
1020
+ self._log_file = open(log_path, "w")
1021
+
1022
+ process = subprocess.Popen(
1023
+ args,
1024
+ stdout=subprocess.DEVNULL,
1025
+ stderr=self._log_file,
1026
+ env=env,
1027
+ )
1028
+
1029
+ health_url = f"http://{host}:{port}/health"
1030
+ for i in range(server_settings.health_check_timeout_sec):
1031
+ try:
1032
+ r = requests.get(health_url, timeout=2)
1033
+ if r.status_code == 200:
1034
+ data = r.json()
1035
+ if data.get("status") == "ok":
1036
+ return process
1037
+ except Exception:
1038
+ pass
1039
+ time.sleep(server_settings.health_check_interval_sec)
1040
+
1041
+ # Server failed health check - clean up resources
1042
+ if process:
1043
+ process.terminate()
1044
+ process.wait()
1045
+ if self._log_file:
1046
+ self._log_file.close()
1047
+ self._log_file = None
1048
+ return None
1049
+
1050
+ def cycle_approve_mode(self) -> str:
1051
+ """Cycle to next approval mode (for Edit mode) or plan type (for Plan mode).
1052
+
1053
+ Returns:
1054
+ str: The new approval mode or plan type.
1055
+ """
1056
+ # In Plan mode, cycle plan types instead of approval modes
1057
+ if self.interaction_mode == "plan":
1058
+ return self.cycle_plan_type()
1059
+
1060
+ # In Edit/Learn mode, cycle approval modes
1061
+ from llm.config import APPROVE_MODES
1062
+ modes = APPROVE_MODES
1063
+ try:
1064
+ next_index = (modes.index(self.approve_mode) + 1) % len(modes)
1065
+ except ValueError:
1066
+ next_index = 0
1067
+ self.approve_mode = modes[next_index]
1068
+ return self.approve_mode
1069
+
1070
+ def cycle_plan_type(self) -> str:
1071
+ """Cycle to next plan type (for Plan interaction mode).
1072
+
1073
+ Returns:
1074
+ str: The new plan type.
1075
+ """
1076
+ from llm.config import PLAN_TYPES
1077
+ modes = PLAN_TYPES
1078
+ try:
1079
+ next_index = (modes.index(self.plan_type) + 1) % len(modes)
1080
+ except ValueError:
1081
+ next_index = 0
1082
+ self.plan_type = modes[next_index]
1083
+ # Update system prompt to reflect new plan type
1084
+ if self.interaction_mode == "plan":
1085
+ self.update_system_prompt()
1086
+ # Sync conversation log to reflect plan type changes
1087
+ self.sync_log()
1088
+ return self.plan_type
1089
+
1090
+ def toggle_interaction_mode(self) -> str:
1091
+ """Toggle between plan/edit/learn modes.
1092
+
1093
+ Returns:
1094
+ str: The new interaction mode.
1095
+ """
1096
+ modes = ("edit", "plan", "learn")
1097
+ current_index = modes.index(self.interaction_mode)
1098
+ self.interaction_mode = modes[(current_index + 1) % len(modes)]
1099
+ self.update_system_prompt()
1100
+ # Sync conversation log to reflect mode changes
1101
+ self.sync_log()
1102
+ return self.interaction_mode
1103
+
1104
+ def cycle_learning_mode(self) -> str:
1105
+ """Cycle to next learning mode (for Learn interaction mode).
1106
+
1107
+ Returns:
1108
+ str: The new learning mode.
1109
+ """
1110
+ from llm.config import LEARNING_MODES
1111
+ modes = LEARNING_MODES
1112
+ try:
1113
+ next_index = (modes.index(self.learning_mode) + 1) % len(modes)
1114
+ except ValueError:
1115
+ next_index = 0
1116
+ self.learning_mode = modes[next_index]
1117
+ # Update system prompt to reflect new learning mode
1118
+ if self.interaction_mode == "learn":
1119
+ self.update_system_prompt()
1120
+ # Sync conversation log to reflect learning mode changes
1121
+ self.sync_log()
1122
+ return self.learning_mode
1123
+
1124
+ def reset_session(self):
1125
+ """Reset chat session (clear messages and history).
1126
+
1127
+ This is a public wrapper for _init_messages that also clears
1128
+ command history.
1129
+ """
1130
+ # End current conversation logging session before reset
1131
+ if self.markdown_logger:
1132
+ self.markdown_logger.end_session()
1133
+
1134
+ self._init_messages(reset_totals=False)
1135
+ self.command_history.clear()
1136
+ self.task_list.clear()
1137
+ self.task_list_title = None
1138
+
1139
+ def log_message(self, message: dict):
1140
+ """Log a message to the conversation logger.
1141
+
1142
+ Args:
1143
+ message: Message dict to log
1144
+ """
1145
+ if self.markdown_logger:
1146
+ self.markdown_logger.log_message(message)
1147
+
1148
+ def sync_log(self):
1149
+ """Rewrite the entire conversation log to match current message state.
1150
+
1151
+ This should be called after any operation that modifies the messages array:
1152
+ - After adding new messages
1153
+ - After compaction
1154
+ - After mode changes (which modify system prompts)
1155
+ """
1156
+ if self.markdown_logger:
1157
+ self.markdown_logger.rewrite_log(self.messages)
1158
+
1159
+ def end_conversation(self):
1160
+ """End the current conversation logging session."""
1161
+ if self.markdown_logger:
1162
+ self.markdown_logger.end_session()
1163
+
1164
+ def toggle_logging(self):
1165
+ """Toggle conversation logging on/off.
1166
+
1167
+ Returns:
1168
+ bool: New logging state (True if enabled, False if disabled)
1169
+ """
1170
+ from utils.logger import MarkdownConversationLogger
1171
+
1172
+ if self.markdown_logger:
1173
+ # Disable logging
1174
+ self.markdown_logger.end_session()
1175
+ self.markdown_logger = None
1176
+ return False
1177
+ else:
1178
+ # Enable logging
1179
+ self.markdown_logger = MarkdownConversationLogger(
1180
+ conversations_dir=context_settings.conversations_dir
1181
+ )
1182
+ # Start a new session and log current messages
1183
+ self.markdown_logger.start_session()
1184
+ for msg in self.messages:
1185
+ self.markdown_logger.log_message(msg)
1186
+ return True
1187
+
1188
+ def cleanup(self):
1189
+ """Terminate server process if running."""
1190
+ # End conversation session on cleanup
1191
+ if self.markdown_logger:
1192
+ self.markdown_logger.end_session()
1193
+
1194
+ if self.server_process:
1195
+ self.server_process.terminate()
1196
+ self.server_process.wait()
1197
+
1198
+ # Close log file handle if open
1199
+ if self._log_file:
1200
+ self._log_file.close()
1201
+ self._log_file = None