vmcode-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/INSTALLATION_METHODS.md +181 -0
  2. package/LICENSE +21 -0
  3. package/README.md +199 -0
  4. package/bin/npm-wrapper.js +171 -0
  5. package/bin/rg +0 -0
  6. package/bin/rg.exe +0 -0
  7. package/config.yaml.example +159 -0
  8. package/package.json +42 -0
  9. package/requirements.txt +7 -0
  10. package/scripts/install.js +132 -0
  11. package/setup.bat +114 -0
  12. package/setup.sh +135 -0
  13. package/src/__init__.py +4 -0
  14. package/src/core/__init__.py +1 -0
  15. package/src/core/agentic.py +2342 -0
  16. package/src/core/chat_manager.py +1201 -0
  17. package/src/core/config_manager.py +269 -0
  18. package/src/core/init.py +161 -0
  19. package/src/core/sub_agent.py +174 -0
  20. package/src/exceptions.py +75 -0
  21. package/src/llm/__init__.py +1 -0
  22. package/src/llm/client.py +149 -0
  23. package/src/llm/config.py +445 -0
  24. package/src/llm/prompts.py +569 -0
  25. package/src/llm/providers.py +402 -0
  26. package/src/llm/token_tracker.py +220 -0
  27. package/src/ui/__init__.py +1 -0
  28. package/src/ui/banner.py +103 -0
  29. package/src/ui/commands.py +489 -0
  30. package/src/ui/displays.py +167 -0
  31. package/src/ui/main.py +351 -0
  32. package/src/ui/prompt_utils.py +162 -0
  33. package/src/utils/__init__.py +1 -0
  34. package/src/utils/editor.py +158 -0
  35. package/src/utils/gitignore_filter.py +149 -0
  36. package/src/utils/logger.py +254 -0
  37. package/src/utils/markdown.py +32 -0
  38. package/src/utils/settings.py +94 -0
  39. package/src/utils/tools/__init__.py +55 -0
  40. package/src/utils/tools/command_executor.py +217 -0
  41. package/src/utils/tools/create_file.py +143 -0
  42. package/src/utils/tools/definitions.py +193 -0
  43. package/src/utils/tools/directory.py +374 -0
  44. package/src/utils/tools/file_editor.py +345 -0
  45. package/src/utils/tools/file_helpers.py +109 -0
  46. package/src/utils/tools/file_reader.py +331 -0
  47. package/src/utils/tools/formatters.py +458 -0
  48. package/src/utils/tools/parallel_executor.py +195 -0
  49. package/src/utils/validation.py +117 -0
  50. package/src/utils/web_search.py +71 -0
  51. package/vmcode-proxy/.env.example +5 -0
  52. package/vmcode-proxy/README.md +235 -0
  53. package/vmcode-proxy/package-lock.json +947 -0
  54. package/vmcode-proxy/package.json +20 -0
  55. package/vmcode-proxy/server.js +248 -0
  56. package/vmcode-proxy/server.js.bak +157 -0
@@ -0,0 +1,402 @@
1
+ """Provider-specific request/response handlers.
2
+
3
+ This module isolates provider-specific API quirks into handler classes.
4
+ """
5
+
6
+ import json
7
+ from typing import Optional, Dict, Any, Iterator
8
+ import requests
9
+
10
+ from exceptions import LLMResponseError
11
+
12
+
13
+ class OpenAIHandler:
14
+ """Handler for OpenAI-compatible providers.
15
+
16
+ Supports: OpenAI, OpenRouter, GLM, Gemini, Kimi, MiniMax
17
+ """
18
+
19
+ def build_headers(self, config: Dict[str, Any]) -> Dict[str, str]:
20
+ """Build request headers."""
21
+ headers = {"Content-Type": "application/json"}
22
+ if config.get("type") == "api" and config.get("api_key"):
23
+ headers["Authorization"] = f"Bearer {config['api_key']}"
24
+ if "headers_extra" in config:
25
+ headers.update(config["headers_extra"])
26
+ return headers
27
+
28
+ def build_payload(self, config: Dict[str, Any], messages: list,
29
+ tools: Optional[list] = None, stream: bool = True) -> Dict[str, Any]:
30
+ """Build request payload."""
31
+ payload = {**config.get("payload", {}), "messages": messages, "stream": stream}
32
+
33
+ # Ensure model is set from config if not in payload
34
+ if "model" not in payload:
35
+ model_name = config.get("api_model") or config.get("model")
36
+ if model_name:
37
+ payload["model"] = model_name
38
+
39
+ # Add tools if provided (OpenAI format)
40
+ if tools:
41
+ payload["tools"] = tools
42
+
43
+ # Set default parameters if not in config
44
+ if "temperature" not in payload and config.get("allow_temperature", True):
45
+ payload["temperature"] = config.get("default_temperature", 0.1)
46
+ if "top_p" not in payload and config.get("allow_top_p", True):
47
+ payload["top_p"] = config.get("default_top_p", 0.9)
48
+
49
+ return payload
50
+
51
+ def parse_response(self, response_json: Dict[str, Any]) -> Dict[str, Any]:
52
+ """Parse non-streaming response (already in OpenAI format)."""
53
+ return response_json
54
+
55
+ def parse_stream(self, response: requests.Response) -> Iterator[Dict[str, Any]]:
56
+ """Parse streaming response.
57
+
58
+ Yields text chunks, and finally yields a dict with __usage__ key.
59
+ """
60
+ usage_data = None
61
+
62
+ for line in response.iter_lines():
63
+ if line:
64
+ line = line.decode('utf-8')
65
+
66
+ # Skip OpenRouter comments (start with ':')
67
+ if line.startswith(':'):
68
+ continue
69
+
70
+ if line.startswith('data: '):
71
+ data_str = line[6:]
72
+ if data_str.strip() == '[DONE]':
73
+ break
74
+
75
+ try:
76
+ data = json.loads(data_str)
77
+
78
+ # Check for mid-stream errors
79
+ if 'error' in data:
80
+ error_msg = data.get('error', {}).get('message', 'Unknown streaming error')
81
+ raise LLMResponseError(
82
+ f"Streaming error: {error_msg}",
83
+ details={"error_data": data.get('error')}
84
+ )
85
+
86
+ # Capture usage data if present (usually in final chunk)
87
+ if 'usage' in data:
88
+ usage_data = data['usage']
89
+
90
+ choices = data.get('choices', [])
91
+ if choices:
92
+ delta = choices[0].get('delta', {})
93
+ content = delta.get('content')
94
+ if content is not None:
95
+ yield content
96
+
97
+ except json.JSONDecodeError as e:
98
+ raise LLMResponseError(
99
+ f"Failed to decode streaming response",
100
+ details={"original_error": str(e)}
101
+ )
102
+
103
+ # Yield usage data as final item if captured
104
+ if usage_data:
105
+ yield {'__usage__': usage_data}
106
+
107
+
108
+ class AnthropicHandler:
109
+ """Handler for Anthropic API.
110
+
111
+ Anthropic has significant differences from OpenAI:
112
+ - Different endpoint (/messages vs /chat/completions)
113
+ - Different message format (content arrays vs strings)
114
+ - Different tool format (flat vs nested)
115
+ - Different streaming (SSE with event types vs data: lines)
116
+ - Different headers (x-api-key vs Authorization: Bearer)
117
+ - Different parameters (requires max_tokens, forbids top_p with temperature)
118
+ """
119
+
120
+ def build_headers(self, config: Dict[str, Any]) -> Dict[str, str]:
121
+ """Build request headers (Anthropic uses x-api-key)."""
122
+ headers = {"Content-Type": "application/json"}
123
+ if config.get("type") == "api" and config.get("api_key"):
124
+ headers["x-api-key"] = config['api_key']
125
+ if "headers_extra" in config:
126
+ headers.update(config["headers_extra"])
127
+ return headers
128
+
129
+ def build_payload(self, config: Dict[str, Any], messages: list,
130
+ tools: Optional[list] = None, stream: bool = True) -> Dict[str, Any]:
131
+ """Build request payload (Anthropic format)."""
132
+ # Extract system messages to top-level parameter
133
+ system_messages = [msg["content"] for msg in messages if msg.get("role") == "system"]
134
+ system_content = "\n".join(system_messages) if system_messages else None
135
+ non_system_messages = [msg for msg in messages if msg.get("role") != "system"]
136
+
137
+ # Convert messages and tools to Anthropic format
138
+ anthropic_messages = self._convert_messages_to_anthropic(non_system_messages)
139
+ anthropic_tools = self._convert_tools_to_anthropic(tools) if tools else None
140
+
141
+ payload = {**config.get("payload", {}), "messages": anthropic_messages, "stream": stream}
142
+
143
+ # Ensure model is set from config if not in payload
144
+ if "model" not in payload:
145
+ model_name = config.get("api_model") or config.get("model")
146
+ if model_name:
147
+ payload["model"] = model_name
148
+
149
+ if system_content:
150
+ payload["system"] = system_content
151
+ if anthropic_tools:
152
+ payload["tools"] = anthropic_tools
153
+
154
+ # Set default parameters (Anthropic requires max_tokens)
155
+ if "temperature" not in payload and config.get("allow_temperature", True):
156
+ payload["temperature"] = config.get("default_temperature", 0.1)
157
+ if "max_tokens" not in payload:
158
+ payload["max_tokens"] = config.get("max_tokens", 4096)
159
+
160
+ # Anthropic doesn't allow both temperature and top_p
161
+ # Only set top_p if temperature is not set
162
+ if "temperature" not in payload and "top_p" not in payload:
163
+ payload["top_p"] = config.get("default_top_p", 0.9)
164
+
165
+ return payload
166
+
167
+ def parse_response(self, response_json: Dict[str, Any]) -> Dict[str, Any]:
168
+ """Convert Anthropic response format to OpenAI-style format."""
169
+ # Anthropic format: {"content": [{"type": "text", "text": "..."}], "usage": {...}}
170
+ # OpenAI format: {"choices": [{"message": {"content": "..."}}], "usage": {...}}
171
+
172
+ # Convert Anthropic usage format (input_tokens/output_tokens) to OpenAI format (prompt_tokens/completion_tokens)
173
+ anthropic_usage = response_json.get("usage", {})
174
+ openai_format_usage = {
175
+ 'prompt_tokens': anthropic_usage.get('input_tokens', 0),
176
+ 'completion_tokens': anthropic_usage.get('output_tokens', 0),
177
+ 'total_tokens': anthropic_usage.get('input_tokens', 0) + anthropic_usage.get('output_tokens', 0)
178
+ }
179
+
180
+ result = {
181
+ "choices": [],
182
+ "usage": openai_format_usage
183
+ }
184
+
185
+ # Extract content from Anthropic's content array
186
+ content_blocks = response_json.get("content", [])
187
+ text_parts = []
188
+ tool_calls = []
189
+
190
+ for block in content_blocks:
191
+ if block.get("type") == "text":
192
+ text_parts.append(block.get("text", ""))
193
+ elif block.get("type") == "tool_use":
194
+ # Convert Anthropic tool_use to OpenAI tool_calls format
195
+ tool_calls.append({
196
+ "id": block.get("id"),
197
+ "type": "function",
198
+ "function": {
199
+ "name": block.get("name"),
200
+ "arguments": json.dumps(block.get("input", {}))
201
+ }
202
+ })
203
+
204
+ # Build OpenAI-style message
205
+ message = {"role": "assistant"}
206
+
207
+ # Include either text content or tool calls
208
+ if tool_calls:
209
+ message["content"] = None
210
+ message["tool_calls"] = tool_calls
211
+ else:
212
+ message["content"] = "".join(text_parts)
213
+
214
+ result["choices"].append({"message": message})
215
+
216
+ return result
217
+
218
+ def parse_stream(self, response: requests.Response) -> Iterator[Dict[str, Any]]:
219
+ """Parse Anthropic's SSE-based streaming response.
220
+
221
+ Yields text chunks, and finally yields a dict with __usage__ key.
222
+
223
+ Anthropic splits usage across two events:
224
+ - message_start: contains input_tokens
225
+ - message_delta: contains output_tokens
226
+ We merge both and convert to OpenAI format (prompt_tokens/completion_tokens).
227
+ """
228
+ usage_data = {}
229
+
230
+ for line in response.iter_lines():
231
+ if line:
232
+ line = line.decode('utf-8')
233
+
234
+ # Anthropic uses SSE format: "event: <type>" followed by "data: <json>"
235
+ if line.startswith('data: '):
236
+ data_str = line[6:]
237
+ try:
238
+ data = json.loads(data_str)
239
+
240
+ # Check for errors
241
+ if data.get('type') == 'error':
242
+ error_msg = data.get('error', {}).get('message', 'Unknown error')
243
+ raise LLMResponseError(
244
+ f"Anthropic streaming error: {error_msg}",
245
+ details={"error_data": data.get('error')}
246
+ )
247
+
248
+ # Capture input_tokens from message_start events
249
+ if data.get('type') == 'message_start':
250
+ message_usage = data.get('message', {}).get('usage', {})
251
+ if message_usage:
252
+ usage_data.update(message_usage)
253
+
254
+ # Capture output_tokens from message_delta events
255
+ if data.get('type') == 'message_delta' and 'usage' in data:
256
+ usage_data.update(data['usage'])
257
+
258
+ # Extract text from content_block_delta events
259
+ if data.get('type') == 'content_block_delta':
260
+ delta = data.get('delta', {})
261
+ if delta.get('type') == 'text_delta':
262
+ text = delta.get('text', '')
263
+ if text:
264
+ yield text
265
+
266
+ except json.JSONDecodeError as e:
267
+ raise LLMResponseError(
268
+ f"Failed to decode Anthropic streaming response",
269
+ details={"original_error": str(e)}
270
+ )
271
+
272
+ # Yield usage data as final item if captured
273
+ # Convert Anthropic format (input_tokens/output_tokens) to OpenAI format (prompt_tokens/completion_tokens)
274
+ if usage_data:
275
+ openai_format_usage = {
276
+ 'prompt_tokens': usage_data.get('input_tokens', 0),
277
+ 'completion_tokens': usage_data.get('output_tokens', 0),
278
+ 'total_tokens': usage_data.get('input_tokens', 0) + usage_data.get('output_tokens', 0)
279
+ }
280
+ yield {'__usage__': openai_format_usage}
281
+
282
+ @staticmethod
283
+ def _convert_tools_to_anthropic(openai_tools: list) -> list:
284
+ """Convert OpenAI-style tool definitions to Anthropic format.
285
+
286
+ OpenAI format: {"type": "function", "function": {"name": "...", "parameters": {...}}}
287
+ Anthropic format: {"name": "...", "description": "...", "input_schema": {...}}
288
+ """
289
+ anthropic_tools = []
290
+
291
+ for openai_tool in openai_tools:
292
+ if openai_tool.get("type") == "function":
293
+ func = openai_tool.get("function", {})
294
+ anthropic_tool = {
295
+ "name": func.get("name"),
296
+ "description": func.get("description", ""),
297
+ "input_schema": func.get("parameters", {"type": "object", "properties": {}})
298
+ }
299
+ anthropic_tools.append(anthropic_tool)
300
+
301
+ return anthropic_tools
302
+
303
+ @staticmethod
304
+ def _convert_messages_to_anthropic(openai_messages: list) -> list:
305
+ """Convert OpenAI-style messages to Anthropic format.
306
+
307
+ Anthropic requires all content to be an array, not a string.
308
+
309
+ OpenAI format:
310
+ {"role": "user", "content": "text"}
311
+ {"role": "tool", "content": "...", "tool_call_id": "..."}
312
+
313
+ Anthropic format:
314
+ {"role": "user", "content": [{"type": "text", "text": "..."}]}
315
+ {"role": "user", "content": [{"type": "tool_result", "tool_use_id": "...", "content": "..."}]}
316
+ """
317
+ anthropic_messages = []
318
+
319
+ for msg in openai_messages:
320
+ # Handle tool result messages
321
+ if msg.get("role") == "tool":
322
+ anthropic_msg = {
323
+ "role": "user",
324
+ "content": [
325
+ {
326
+ "type": "tool_result",
327
+ "tool_use_id": msg.get("tool_call_id"),
328
+ "content": msg.get("content", "")
329
+ }
330
+ ]
331
+ }
332
+ anthropic_messages.append(anthropic_msg)
333
+ # Handle user and assistant messages - convert string content to array
334
+ elif msg.get("role") in ("user", "assistant"):
335
+ content = msg.get("content", "")
336
+ tool_calls = msg.get("tool_calls")
337
+
338
+ # Build content blocks array
339
+ content_blocks = []
340
+
341
+ # Add text content if present
342
+ if isinstance(content, str) and content.strip():
343
+ content_blocks.append({
344
+ "type": "text",
345
+ "text": content
346
+ })
347
+ elif isinstance(content, list):
348
+ # Already an array (Anthropic format), use as-is
349
+ anthropic_messages.append(msg)
350
+ continue
351
+
352
+ # Add tool_use blocks if present (for assistant messages with tool calls)
353
+ if tool_calls:
354
+ for tool_call in tool_calls:
355
+ content_blocks.append({
356
+ "type": "tool_use",
357
+ "id": tool_call.get("id"),
358
+ "name": tool_call.get("function", {}).get("name"),
359
+ "input": json.loads(tool_call.get("function", {}).get("arguments", "{}"))
360
+ })
361
+
362
+ # Only add message if we have content blocks (text or tool_use)
363
+ if content_blocks:
364
+ anthropic_msg = {
365
+ "role": msg.get("role"),
366
+ "content": content_blocks
367
+ }
368
+ anthropic_messages.append(anthropic_msg)
369
+ else:
370
+ # Other message types, pass through
371
+ anthropic_messages.append(msg)
372
+
373
+ return anthropic_messages
374
+
375
+
376
+ # Handler registry - maps provider names to handler classes
377
+ HANDLER_REGISTRY = {
378
+ "openai": OpenAIHandler,
379
+ "openrouter": OpenAIHandler,
380
+ "glm": OpenAIHandler,
381
+ "gemini": OpenAIHandler,
382
+ "minimax": OpenAIHandler,
383
+ "kimi": OpenAIHandler,
384
+ "anthropic": AnthropicHandler,
385
+ "local": OpenAIHandler,
386
+ }
387
+
388
+
389
+ def get_handler(provider_name: str):
390
+ """Get handler instance for the given provider.
391
+
392
+ Args:
393
+ provider_name: Name of the provider
394
+
395
+ Returns:
396
+ Handler instance for the provider
397
+ """
398
+ handler_class = HANDLER_REGISTRY.get(provider_name.lower(), OpenAIHandler)
399
+ return handler_class()
400
+
401
+
402
+ __all__ = ['OpenAIHandler', 'AnthropicHandler', 'get_handler']
@@ -0,0 +1,220 @@
1
+ """Token usage tracking for chat sessions."""
2
+
3
+ class TokenTracker:
4
+ """Tracks token usage across a chat session."""
5
+
6
+ def __init__(self):
7
+ self.total_prompt_tokens = 0 # Cumulative input tokens (never reset by compaction)
8
+ self.total_completion_tokens = 0 # Cumulative output tokens (never reset by compaction)
9
+ self.total_tokens = 0 # Cumulative total tokens (never reset by compaction)
10
+
11
+ # Conversation tokens: per-conversation billing (reset on /new)
12
+ self.conv_prompt_tokens = 0 # Current conversation input tokens
13
+ self.conv_completion_tokens = 0 # Current conversation output tokens
14
+ self.conv_total_tokens = 0 # Current conversation total tokens
15
+
16
+ # Context tokens: current conversation length (all messages in context)
17
+ self.current_context_tokens = 0 # Updated via set_context_tokens()
18
+ def add_usage(self, usage_data):
19
+ """Add token usage from API response.
20
+
21
+ Args:
22
+ usage_data: dict with 'prompt_tokens', 'completion_tokens' (total derived)
23
+ """
24
+ if not usage_data or not isinstance(usage_data, dict):
25
+ return
26
+
27
+ # Update cumulative token counts (accumulated for billing, never reset by compaction)
28
+ prompt_tokens = usage_data.get('prompt_tokens', 0)
29
+ completion_tokens = usage_data.get('completion_tokens', 0)
30
+ self.total_prompt_tokens += prompt_tokens
31
+ self.total_completion_tokens += completion_tokens
32
+ self.total_tokens += prompt_tokens + completion_tokens
33
+
34
+ # Update conversation token counts (reset on /new)
35
+ self.conv_prompt_tokens += prompt_tokens
36
+ self.conv_completion_tokens += completion_tokens
37
+ self.conv_total_tokens += prompt_tokens + completion_tokens
38
+ def add_tool_call_usage(self, prompt_tokens=0, completion_tokens=0):
39
+ """Add token usage from tool calls.
40
+
41
+ Args:
42
+ prompt_tokens: Additional prompt tokens from tool calls
43
+ completion_tokens: Additional completion tokens from tool calls
44
+
45
+ DEPRECATED: Use add_usage({'prompt_tokens': X, 'completion_tokens': Y}) instead.
46
+ """
47
+ import warnings
48
+ warnings.warn(
49
+ "add_tool_call_usage() is deprecated, use add_usage() instead",
50
+ DeprecationWarning,
51
+ stacklevel=2
52
+ )
53
+ self.add_usage({
54
+ 'prompt_tokens': prompt_tokens,
55
+ 'completion_tokens': completion_tokens
56
+ })
57
+
58
+ def get_session_summary(self):
59
+ """Return formatted session usage summary string."""
60
+ return (
61
+ f"Session Input: [cyan]{self.total_prompt_tokens:,}[/cyan] | "
62
+ f"Session Output: [cyan]{self.total_completion_tokens:,}[/cyan] | "
63
+ f"Session Total: [cyan]{self.total_tokens:,}[/cyan]"
64
+ )
65
+
66
+ def get_all_token_counts(self):
67
+ """Return all token counts as a dictionary for UI display.
68
+
69
+ Returns:
70
+ dict with keys: prompt_in, completion_out, total
71
+ """
72
+ return {
73
+ 'prompt_in': self.total_prompt_tokens,
74
+ 'completion_out': self.total_completion_tokens,
75
+ 'total': self.total_tokens
76
+ }
77
+
78
+ def reset(self, prompt_tokens=None, completion_tokens=None, total_tokens=None):
79
+ """Reset counters to zero or to specified values.
80
+
81
+ Args:
82
+ prompt_tokens: If provided, set total_prompt_tokens to this value
83
+ completion_tokens: If provided, set total_completion_tokens to this value
84
+ total_tokens: If provided, set total_tokens to this value
85
+ """
86
+ self.total_prompt_tokens = prompt_tokens if prompt_tokens is not None else 0
87
+ self.total_completion_tokens = completion_tokens if completion_tokens is not None else 0
88
+ if total_tokens is None:
89
+ self.total_tokens = self.total_prompt_tokens + self.total_completion_tokens
90
+ else:
91
+ self.total_tokens = total_tokens
92
+ self.current_context_tokens = 0 # Reset context tokens
93
+ @staticmethod
94
+ def estimate_tokens(text, model=""):
95
+ """Estimate token count using tiktoken.
96
+
97
+ Args:
98
+ text: String to estimate tokens for
99
+ model: Optional model name for encoding selection (uses cl100k_base if empty)
100
+
101
+ Returns:
102
+ Estimated token count (int)
103
+ """
104
+ if not text:
105
+ return 0
106
+
107
+ try:
108
+ import tiktoken
109
+ try:
110
+ enc = tiktoken.encoding_for_model(model) if model else tiktoken.get_encoding("cl100k_base")
111
+ except Exception:
112
+ enc = tiktoken.get_encoding("cl100k_base")
113
+ return len(enc.encode(text))
114
+ except ImportError:
115
+ # Fallback to character-based approximation if tiktoken not available
116
+ return len(text) // 4
117
+
118
+ def set_context_tokens(self, token_count):
119
+ """Set the current context token count.
120
+
121
+ Args:
122
+ token_count: Actual token count of the current message list
123
+ """
124
+ self.current_context_tokens = token_count
125
+
126
+ def calculate_session_cost(self, cost_in: float, cost_out: float) -> dict:
127
+ """Calculate session cost based on token usage.
128
+
129
+ Args:
130
+ cost_in: Cost per 1M input tokens
131
+ cost_out: Cost per 1M output tokens
132
+
133
+ Returns:
134
+ Dict with 'input_cost', 'output_cost', 'total_cost' values
135
+ """
136
+ input_cost = (self.total_prompt_tokens / 1_000_000) * cost_in
137
+ output_cost = (self.total_completion_tokens / 1_000_000) * cost_out
138
+ return {
139
+ 'input_cost': input_cost,
140
+ 'output_cost': output_cost,
141
+ 'total_cost': input_cost + output_cost
142
+ }
143
+
144
+ def reset_conversation(self):
145
+ """Reset conversation token counters (called on /new).
146
+
147
+ Session totals (total_prompt_tokens, total_completion_tokens) are preserved.
148
+ """
149
+ self.conv_prompt_tokens = 0
150
+ self.conv_completion_tokens = 0
151
+ self.conv_total_tokens = 0
152
+
153
+ def calculate_conversation_cost(self, cost_in: float, cost_out: float) -> dict:
154
+ """Calculate conversation cost based on token usage.
155
+
156
+ Args:
157
+ cost_in: Cost per 1M input tokens
158
+ cost_out: Cost per 1M output tokens
159
+
160
+ Returns:
161
+ Dict with 'input_cost', 'output_cost', 'total_cost' values
162
+ """
163
+ input_cost = (self.conv_prompt_tokens / 1_000_000) * cost_in
164
+ output_cost = (self.conv_completion_tokens / 1_000_000) * cost_out
165
+ return {
166
+ 'input_cost': input_cost,
167
+ 'output_cost': output_cost,
168
+ 'total_cost': input_cost + output_cost
169
+ }
170
+
171
+ def get_usage_for_prompt(self, context_limit: int = 200_000) -> str:
172
+ """Get formatted usage information for inclusion in agent prompts.
173
+
174
+ This provides agents with awareness of their token consumption to help
175
+ them work within context limits. Shows total tokens burned (cumulative
176
+ across all LLM calls), not just conversation context length.
177
+
178
+ Args:
179
+ context_limit: The context window limit to compare against (default: 200k)
180
+
181
+ Returns:
182
+ Formatted string with usage statistics and guidance
183
+ """
184
+ total_burned = self.total_tokens
185
+ remaining = context_limit - total_burned
186
+ percentage = (total_burned / context_limit) * 100
187
+
188
+ # Determine urgency level
189
+ if percentage >= 90:
190
+ urgency = "CRITICAL"
191
+ guidance = "You have nearly exhausted your token budget. Be extremely concise and limit exploration."
192
+ elif percentage >= 75:
193
+ urgency = "HIGH"
194
+ guidance = "You are approaching your token limit. Prioritize focused exploration over breadth."
195
+ elif percentage >= 50:
196
+ urgency = "MODERATE"
197
+ guidance = "You have used half your token budget. Be mindful of exploration scope."
198
+ else:
199
+ urgency = "LOW"
200
+ guidance = "Token usage is within normal bounds."
201
+
202
+ return (
203
+ f"## Token Usage Awareness\n\n"
204
+ f"**Status:** {urgency} | **Total Burned:** {total_burned:,} / {context_limit:,} ({percentage:.1f}%)\n"
205
+ f"**Remaining:** {remaining:,} tokens\n\n"
206
+ f"**Guidance:** {guidance}\n\n"
207
+ f"**Note:** This count includes ALL tokens burned across the session "
208
+ f"(all LLM calls, tool results, etc.), not just current conversation context."
209
+ )
210
+
211
+ def get_context_summary(self) -> str:
212
+ """Get a brief summary of current context usage.
213
+
214
+ Returns:
215
+ Concise string with context and session totals
216
+ """
217
+ return (
218
+ f"Context: {self.current_context_tokens:,} tokens | "
219
+ f"Session total burned: {self.total_tokens:,} tokens"
220
+ )
@@ -0,0 +1 @@
1
+ """User interface layer for vmCode."""