vmcode-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/INSTALLATION_METHODS.md +181 -0
- package/LICENSE +21 -0
- package/README.md +199 -0
- package/bin/npm-wrapper.js +171 -0
- package/bin/rg +0 -0
- package/bin/rg.exe +0 -0
- package/config.yaml.example +159 -0
- package/package.json +42 -0
- package/requirements.txt +7 -0
- package/scripts/install.js +132 -0
- package/setup.bat +114 -0
- package/setup.sh +135 -0
- package/src/__init__.py +4 -0
- package/src/core/__init__.py +1 -0
- package/src/core/agentic.py +2342 -0
- package/src/core/chat_manager.py +1201 -0
- package/src/core/config_manager.py +269 -0
- package/src/core/init.py +161 -0
- package/src/core/sub_agent.py +174 -0
- package/src/exceptions.py +75 -0
- package/src/llm/__init__.py +1 -0
- package/src/llm/client.py +149 -0
- package/src/llm/config.py +445 -0
- package/src/llm/prompts.py +569 -0
- package/src/llm/providers.py +402 -0
- package/src/llm/token_tracker.py +220 -0
- package/src/ui/__init__.py +1 -0
- package/src/ui/banner.py +103 -0
- package/src/ui/commands.py +489 -0
- package/src/ui/displays.py +167 -0
- package/src/ui/main.py +351 -0
- package/src/ui/prompt_utils.py +162 -0
- package/src/utils/__init__.py +1 -0
- package/src/utils/editor.py +158 -0
- package/src/utils/gitignore_filter.py +149 -0
- package/src/utils/logger.py +254 -0
- package/src/utils/markdown.py +32 -0
- package/src/utils/settings.py +94 -0
- package/src/utils/tools/__init__.py +55 -0
- package/src/utils/tools/command_executor.py +217 -0
- package/src/utils/tools/create_file.py +143 -0
- package/src/utils/tools/definitions.py +193 -0
- package/src/utils/tools/directory.py +374 -0
- package/src/utils/tools/file_editor.py +345 -0
- package/src/utils/tools/file_helpers.py +109 -0
- package/src/utils/tools/file_reader.py +331 -0
- package/src/utils/tools/formatters.py +458 -0
- package/src/utils/tools/parallel_executor.py +195 -0
- package/src/utils/validation.py +117 -0
- package/src/utils/web_search.py +71 -0
- package/vmcode-proxy/.env.example +5 -0
- package/vmcode-proxy/README.md +235 -0
- package/vmcode-proxy/package-lock.json +947 -0
- package/vmcode-proxy/package.json +20 -0
- package/vmcode-proxy/server.js +248 -0
- package/vmcode-proxy/server.js.bak +157 -0
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
"""Provider-specific request/response handlers.
|
|
2
|
+
|
|
3
|
+
This module isolates provider-specific API quirks into handler classes.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
from typing import Optional, Dict, Any, Iterator
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
from exceptions import LLMResponseError
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class OpenAIHandler:
|
|
14
|
+
"""Handler for OpenAI-compatible providers.
|
|
15
|
+
|
|
16
|
+
Supports: OpenAI, OpenRouter, GLM, Gemini, Kimi, MiniMax
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def build_headers(self, config: Dict[str, Any]) -> Dict[str, str]:
|
|
20
|
+
"""Build request headers."""
|
|
21
|
+
headers = {"Content-Type": "application/json"}
|
|
22
|
+
if config.get("type") == "api" and config.get("api_key"):
|
|
23
|
+
headers["Authorization"] = f"Bearer {config['api_key']}"
|
|
24
|
+
if "headers_extra" in config:
|
|
25
|
+
headers.update(config["headers_extra"])
|
|
26
|
+
return headers
|
|
27
|
+
|
|
28
|
+
def build_payload(self, config: Dict[str, Any], messages: list,
|
|
29
|
+
tools: Optional[list] = None, stream: bool = True) -> Dict[str, Any]:
|
|
30
|
+
"""Build request payload."""
|
|
31
|
+
payload = {**config.get("payload", {}), "messages": messages, "stream": stream}
|
|
32
|
+
|
|
33
|
+
# Ensure model is set from config if not in payload
|
|
34
|
+
if "model" not in payload:
|
|
35
|
+
model_name = config.get("api_model") or config.get("model")
|
|
36
|
+
if model_name:
|
|
37
|
+
payload["model"] = model_name
|
|
38
|
+
|
|
39
|
+
# Add tools if provided (OpenAI format)
|
|
40
|
+
if tools:
|
|
41
|
+
payload["tools"] = tools
|
|
42
|
+
|
|
43
|
+
# Set default parameters if not in config
|
|
44
|
+
if "temperature" not in payload and config.get("allow_temperature", True):
|
|
45
|
+
payload["temperature"] = config.get("default_temperature", 0.1)
|
|
46
|
+
if "top_p" not in payload and config.get("allow_top_p", True):
|
|
47
|
+
payload["top_p"] = config.get("default_top_p", 0.9)
|
|
48
|
+
|
|
49
|
+
return payload
|
|
50
|
+
|
|
51
|
+
def parse_response(self, response_json: Dict[str, Any]) -> Dict[str, Any]:
|
|
52
|
+
"""Parse non-streaming response (already in OpenAI format)."""
|
|
53
|
+
return response_json
|
|
54
|
+
|
|
55
|
+
def parse_stream(self, response: requests.Response) -> Iterator[Dict[str, Any]]:
|
|
56
|
+
"""Parse streaming response.
|
|
57
|
+
|
|
58
|
+
Yields text chunks, and finally yields a dict with __usage__ key.
|
|
59
|
+
"""
|
|
60
|
+
usage_data = None
|
|
61
|
+
|
|
62
|
+
for line in response.iter_lines():
|
|
63
|
+
if line:
|
|
64
|
+
line = line.decode('utf-8')
|
|
65
|
+
|
|
66
|
+
# Skip OpenRouter comments (start with ':')
|
|
67
|
+
if line.startswith(':'):
|
|
68
|
+
continue
|
|
69
|
+
|
|
70
|
+
if line.startswith('data: '):
|
|
71
|
+
data_str = line[6:]
|
|
72
|
+
if data_str.strip() == '[DONE]':
|
|
73
|
+
break
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
data = json.loads(data_str)
|
|
77
|
+
|
|
78
|
+
# Check for mid-stream errors
|
|
79
|
+
if 'error' in data:
|
|
80
|
+
error_msg = data.get('error', {}).get('message', 'Unknown streaming error')
|
|
81
|
+
raise LLMResponseError(
|
|
82
|
+
f"Streaming error: {error_msg}",
|
|
83
|
+
details={"error_data": data.get('error')}
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Capture usage data if present (usually in final chunk)
|
|
87
|
+
if 'usage' in data:
|
|
88
|
+
usage_data = data['usage']
|
|
89
|
+
|
|
90
|
+
choices = data.get('choices', [])
|
|
91
|
+
if choices:
|
|
92
|
+
delta = choices[0].get('delta', {})
|
|
93
|
+
content = delta.get('content')
|
|
94
|
+
if content is not None:
|
|
95
|
+
yield content
|
|
96
|
+
|
|
97
|
+
except json.JSONDecodeError as e:
|
|
98
|
+
raise LLMResponseError(
|
|
99
|
+
f"Failed to decode streaming response",
|
|
100
|
+
details={"original_error": str(e)}
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Yield usage data as final item if captured
|
|
104
|
+
if usage_data:
|
|
105
|
+
yield {'__usage__': usage_data}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class AnthropicHandler:
|
|
109
|
+
"""Handler for Anthropic API.
|
|
110
|
+
|
|
111
|
+
Anthropic has significant differences from OpenAI:
|
|
112
|
+
- Different endpoint (/messages vs /chat/completions)
|
|
113
|
+
- Different message format (content arrays vs strings)
|
|
114
|
+
- Different tool format (flat vs nested)
|
|
115
|
+
- Different streaming (SSE with event types vs data: lines)
|
|
116
|
+
- Different headers (x-api-key vs Authorization: Bearer)
|
|
117
|
+
- Different parameters (requires max_tokens, forbids top_p with temperature)
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
def build_headers(self, config: Dict[str, Any]) -> Dict[str, str]:
|
|
121
|
+
"""Build request headers (Anthropic uses x-api-key)."""
|
|
122
|
+
headers = {"Content-Type": "application/json"}
|
|
123
|
+
if config.get("type") == "api" and config.get("api_key"):
|
|
124
|
+
headers["x-api-key"] = config['api_key']
|
|
125
|
+
if "headers_extra" in config:
|
|
126
|
+
headers.update(config["headers_extra"])
|
|
127
|
+
return headers
|
|
128
|
+
|
|
129
|
+
def build_payload(self, config: Dict[str, Any], messages: list,
|
|
130
|
+
tools: Optional[list] = None, stream: bool = True) -> Dict[str, Any]:
|
|
131
|
+
"""Build request payload (Anthropic format)."""
|
|
132
|
+
# Extract system messages to top-level parameter
|
|
133
|
+
system_messages = [msg["content"] for msg in messages if msg.get("role") == "system"]
|
|
134
|
+
system_content = "\n".join(system_messages) if system_messages else None
|
|
135
|
+
non_system_messages = [msg for msg in messages if msg.get("role") != "system"]
|
|
136
|
+
|
|
137
|
+
# Convert messages and tools to Anthropic format
|
|
138
|
+
anthropic_messages = self._convert_messages_to_anthropic(non_system_messages)
|
|
139
|
+
anthropic_tools = self._convert_tools_to_anthropic(tools) if tools else None
|
|
140
|
+
|
|
141
|
+
payload = {**config.get("payload", {}), "messages": anthropic_messages, "stream": stream}
|
|
142
|
+
|
|
143
|
+
# Ensure model is set from config if not in payload
|
|
144
|
+
if "model" not in payload:
|
|
145
|
+
model_name = config.get("api_model") or config.get("model")
|
|
146
|
+
if model_name:
|
|
147
|
+
payload["model"] = model_name
|
|
148
|
+
|
|
149
|
+
if system_content:
|
|
150
|
+
payload["system"] = system_content
|
|
151
|
+
if anthropic_tools:
|
|
152
|
+
payload["tools"] = anthropic_tools
|
|
153
|
+
|
|
154
|
+
# Set default parameters (Anthropic requires max_tokens)
|
|
155
|
+
if "temperature" not in payload and config.get("allow_temperature", True):
|
|
156
|
+
payload["temperature"] = config.get("default_temperature", 0.1)
|
|
157
|
+
if "max_tokens" not in payload:
|
|
158
|
+
payload["max_tokens"] = config.get("max_tokens", 4096)
|
|
159
|
+
|
|
160
|
+
# Anthropic doesn't allow both temperature and top_p
|
|
161
|
+
# Only set top_p if temperature is not set
|
|
162
|
+
if "temperature" not in payload and "top_p" not in payload:
|
|
163
|
+
payload["top_p"] = config.get("default_top_p", 0.9)
|
|
164
|
+
|
|
165
|
+
return payload
|
|
166
|
+
|
|
167
|
+
def parse_response(self, response_json: Dict[str, Any]) -> Dict[str, Any]:
|
|
168
|
+
"""Convert Anthropic response format to OpenAI-style format."""
|
|
169
|
+
# Anthropic format: {"content": [{"type": "text", "text": "..."}], "usage": {...}}
|
|
170
|
+
# OpenAI format: {"choices": [{"message": {"content": "..."}}], "usage": {...}}
|
|
171
|
+
|
|
172
|
+
# Convert Anthropic usage format (input_tokens/output_tokens) to OpenAI format (prompt_tokens/completion_tokens)
|
|
173
|
+
anthropic_usage = response_json.get("usage", {})
|
|
174
|
+
openai_format_usage = {
|
|
175
|
+
'prompt_tokens': anthropic_usage.get('input_tokens', 0),
|
|
176
|
+
'completion_tokens': anthropic_usage.get('output_tokens', 0),
|
|
177
|
+
'total_tokens': anthropic_usage.get('input_tokens', 0) + anthropic_usage.get('output_tokens', 0)
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
result = {
|
|
181
|
+
"choices": [],
|
|
182
|
+
"usage": openai_format_usage
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
# Extract content from Anthropic's content array
|
|
186
|
+
content_blocks = response_json.get("content", [])
|
|
187
|
+
text_parts = []
|
|
188
|
+
tool_calls = []
|
|
189
|
+
|
|
190
|
+
for block in content_blocks:
|
|
191
|
+
if block.get("type") == "text":
|
|
192
|
+
text_parts.append(block.get("text", ""))
|
|
193
|
+
elif block.get("type") == "tool_use":
|
|
194
|
+
# Convert Anthropic tool_use to OpenAI tool_calls format
|
|
195
|
+
tool_calls.append({
|
|
196
|
+
"id": block.get("id"),
|
|
197
|
+
"type": "function",
|
|
198
|
+
"function": {
|
|
199
|
+
"name": block.get("name"),
|
|
200
|
+
"arguments": json.dumps(block.get("input", {}))
|
|
201
|
+
}
|
|
202
|
+
})
|
|
203
|
+
|
|
204
|
+
# Build OpenAI-style message
|
|
205
|
+
message = {"role": "assistant"}
|
|
206
|
+
|
|
207
|
+
# Include either text content or tool calls
|
|
208
|
+
if tool_calls:
|
|
209
|
+
message["content"] = None
|
|
210
|
+
message["tool_calls"] = tool_calls
|
|
211
|
+
else:
|
|
212
|
+
message["content"] = "".join(text_parts)
|
|
213
|
+
|
|
214
|
+
result["choices"].append({"message": message})
|
|
215
|
+
|
|
216
|
+
return result
|
|
217
|
+
|
|
218
|
+
def parse_stream(self, response: requests.Response) -> Iterator[Dict[str, Any]]:
|
|
219
|
+
"""Parse Anthropic's SSE-based streaming response.
|
|
220
|
+
|
|
221
|
+
Yields text chunks, and finally yields a dict with __usage__ key.
|
|
222
|
+
|
|
223
|
+
Anthropic splits usage across two events:
|
|
224
|
+
- message_start: contains input_tokens
|
|
225
|
+
- message_delta: contains output_tokens
|
|
226
|
+
We merge both and convert to OpenAI format (prompt_tokens/completion_tokens).
|
|
227
|
+
"""
|
|
228
|
+
usage_data = {}
|
|
229
|
+
|
|
230
|
+
for line in response.iter_lines():
|
|
231
|
+
if line:
|
|
232
|
+
line = line.decode('utf-8')
|
|
233
|
+
|
|
234
|
+
# Anthropic uses SSE format: "event: <type>" followed by "data: <json>"
|
|
235
|
+
if line.startswith('data: '):
|
|
236
|
+
data_str = line[6:]
|
|
237
|
+
try:
|
|
238
|
+
data = json.loads(data_str)
|
|
239
|
+
|
|
240
|
+
# Check for errors
|
|
241
|
+
if data.get('type') == 'error':
|
|
242
|
+
error_msg = data.get('error', {}).get('message', 'Unknown error')
|
|
243
|
+
raise LLMResponseError(
|
|
244
|
+
f"Anthropic streaming error: {error_msg}",
|
|
245
|
+
details={"error_data": data.get('error')}
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
# Capture input_tokens from message_start events
|
|
249
|
+
if data.get('type') == 'message_start':
|
|
250
|
+
message_usage = data.get('message', {}).get('usage', {})
|
|
251
|
+
if message_usage:
|
|
252
|
+
usage_data.update(message_usage)
|
|
253
|
+
|
|
254
|
+
# Capture output_tokens from message_delta events
|
|
255
|
+
if data.get('type') == 'message_delta' and 'usage' in data:
|
|
256
|
+
usage_data.update(data['usage'])
|
|
257
|
+
|
|
258
|
+
# Extract text from content_block_delta events
|
|
259
|
+
if data.get('type') == 'content_block_delta':
|
|
260
|
+
delta = data.get('delta', {})
|
|
261
|
+
if delta.get('type') == 'text_delta':
|
|
262
|
+
text = delta.get('text', '')
|
|
263
|
+
if text:
|
|
264
|
+
yield text
|
|
265
|
+
|
|
266
|
+
except json.JSONDecodeError as e:
|
|
267
|
+
raise LLMResponseError(
|
|
268
|
+
f"Failed to decode Anthropic streaming response",
|
|
269
|
+
details={"original_error": str(e)}
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
# Yield usage data as final item if captured
|
|
273
|
+
# Convert Anthropic format (input_tokens/output_tokens) to OpenAI format (prompt_tokens/completion_tokens)
|
|
274
|
+
if usage_data:
|
|
275
|
+
openai_format_usage = {
|
|
276
|
+
'prompt_tokens': usage_data.get('input_tokens', 0),
|
|
277
|
+
'completion_tokens': usage_data.get('output_tokens', 0),
|
|
278
|
+
'total_tokens': usage_data.get('input_tokens', 0) + usage_data.get('output_tokens', 0)
|
|
279
|
+
}
|
|
280
|
+
yield {'__usage__': openai_format_usage}
|
|
281
|
+
|
|
282
|
+
@staticmethod
|
|
283
|
+
def _convert_tools_to_anthropic(openai_tools: list) -> list:
|
|
284
|
+
"""Convert OpenAI-style tool definitions to Anthropic format.
|
|
285
|
+
|
|
286
|
+
OpenAI format: {"type": "function", "function": {"name": "...", "parameters": {...}}}
|
|
287
|
+
Anthropic format: {"name": "...", "description": "...", "input_schema": {...}}
|
|
288
|
+
"""
|
|
289
|
+
anthropic_tools = []
|
|
290
|
+
|
|
291
|
+
for openai_tool in openai_tools:
|
|
292
|
+
if openai_tool.get("type") == "function":
|
|
293
|
+
func = openai_tool.get("function", {})
|
|
294
|
+
anthropic_tool = {
|
|
295
|
+
"name": func.get("name"),
|
|
296
|
+
"description": func.get("description", ""),
|
|
297
|
+
"input_schema": func.get("parameters", {"type": "object", "properties": {}})
|
|
298
|
+
}
|
|
299
|
+
anthropic_tools.append(anthropic_tool)
|
|
300
|
+
|
|
301
|
+
return anthropic_tools
|
|
302
|
+
|
|
303
|
+
@staticmethod
|
|
304
|
+
def _convert_messages_to_anthropic(openai_messages: list) -> list:
|
|
305
|
+
"""Convert OpenAI-style messages to Anthropic format.
|
|
306
|
+
|
|
307
|
+
Anthropic requires all content to be an array, not a string.
|
|
308
|
+
|
|
309
|
+
OpenAI format:
|
|
310
|
+
{"role": "user", "content": "text"}
|
|
311
|
+
{"role": "tool", "content": "...", "tool_call_id": "..."}
|
|
312
|
+
|
|
313
|
+
Anthropic format:
|
|
314
|
+
{"role": "user", "content": [{"type": "text", "text": "..."}]}
|
|
315
|
+
{"role": "user", "content": [{"type": "tool_result", "tool_use_id": "...", "content": "..."}]}
|
|
316
|
+
"""
|
|
317
|
+
anthropic_messages = []
|
|
318
|
+
|
|
319
|
+
for msg in openai_messages:
|
|
320
|
+
# Handle tool result messages
|
|
321
|
+
if msg.get("role") == "tool":
|
|
322
|
+
anthropic_msg = {
|
|
323
|
+
"role": "user",
|
|
324
|
+
"content": [
|
|
325
|
+
{
|
|
326
|
+
"type": "tool_result",
|
|
327
|
+
"tool_use_id": msg.get("tool_call_id"),
|
|
328
|
+
"content": msg.get("content", "")
|
|
329
|
+
}
|
|
330
|
+
]
|
|
331
|
+
}
|
|
332
|
+
anthropic_messages.append(anthropic_msg)
|
|
333
|
+
# Handle user and assistant messages - convert string content to array
|
|
334
|
+
elif msg.get("role") in ("user", "assistant"):
|
|
335
|
+
content = msg.get("content", "")
|
|
336
|
+
tool_calls = msg.get("tool_calls")
|
|
337
|
+
|
|
338
|
+
# Build content blocks array
|
|
339
|
+
content_blocks = []
|
|
340
|
+
|
|
341
|
+
# Add text content if present
|
|
342
|
+
if isinstance(content, str) and content.strip():
|
|
343
|
+
content_blocks.append({
|
|
344
|
+
"type": "text",
|
|
345
|
+
"text": content
|
|
346
|
+
})
|
|
347
|
+
elif isinstance(content, list):
|
|
348
|
+
# Already an array (Anthropic format), use as-is
|
|
349
|
+
anthropic_messages.append(msg)
|
|
350
|
+
continue
|
|
351
|
+
|
|
352
|
+
# Add tool_use blocks if present (for assistant messages with tool calls)
|
|
353
|
+
if tool_calls:
|
|
354
|
+
for tool_call in tool_calls:
|
|
355
|
+
content_blocks.append({
|
|
356
|
+
"type": "tool_use",
|
|
357
|
+
"id": tool_call.get("id"),
|
|
358
|
+
"name": tool_call.get("function", {}).get("name"),
|
|
359
|
+
"input": json.loads(tool_call.get("function", {}).get("arguments", "{}"))
|
|
360
|
+
})
|
|
361
|
+
|
|
362
|
+
# Only add message if we have content blocks (text or tool_use)
|
|
363
|
+
if content_blocks:
|
|
364
|
+
anthropic_msg = {
|
|
365
|
+
"role": msg.get("role"),
|
|
366
|
+
"content": content_blocks
|
|
367
|
+
}
|
|
368
|
+
anthropic_messages.append(anthropic_msg)
|
|
369
|
+
else:
|
|
370
|
+
# Other message types, pass through
|
|
371
|
+
anthropic_messages.append(msg)
|
|
372
|
+
|
|
373
|
+
return anthropic_messages
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
# Handler registry - maps provider names to handler classes
|
|
377
|
+
HANDLER_REGISTRY = {
|
|
378
|
+
"openai": OpenAIHandler,
|
|
379
|
+
"openrouter": OpenAIHandler,
|
|
380
|
+
"glm": OpenAIHandler,
|
|
381
|
+
"gemini": OpenAIHandler,
|
|
382
|
+
"minimax": OpenAIHandler,
|
|
383
|
+
"kimi": OpenAIHandler,
|
|
384
|
+
"anthropic": AnthropicHandler,
|
|
385
|
+
"local": OpenAIHandler,
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def get_handler(provider_name: str):
|
|
390
|
+
"""Get handler instance for the given provider.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
provider_name: Name of the provider
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
Handler instance for the provider
|
|
397
|
+
"""
|
|
398
|
+
handler_class = HANDLER_REGISTRY.get(provider_name.lower(), OpenAIHandler)
|
|
399
|
+
return handler_class()
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
__all__ = ['OpenAIHandler', 'AnthropicHandler', 'get_handler']
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""Token usage tracking for chat sessions."""
|
|
2
|
+
|
|
3
|
+
class TokenTracker:
|
|
4
|
+
"""Tracks token usage across a chat session."""
|
|
5
|
+
|
|
6
|
+
def __init__(self):
|
|
7
|
+
self.total_prompt_tokens = 0 # Cumulative input tokens (never reset by compaction)
|
|
8
|
+
self.total_completion_tokens = 0 # Cumulative output tokens (never reset by compaction)
|
|
9
|
+
self.total_tokens = 0 # Cumulative total tokens (never reset by compaction)
|
|
10
|
+
|
|
11
|
+
# Conversation tokens: per-conversation billing (reset on /new)
|
|
12
|
+
self.conv_prompt_tokens = 0 # Current conversation input tokens
|
|
13
|
+
self.conv_completion_tokens = 0 # Current conversation output tokens
|
|
14
|
+
self.conv_total_tokens = 0 # Current conversation total tokens
|
|
15
|
+
|
|
16
|
+
# Context tokens: current conversation length (all messages in context)
|
|
17
|
+
self.current_context_tokens = 0 # Updated via set_context_tokens()
|
|
18
|
+
def add_usage(self, usage_data):
|
|
19
|
+
"""Add token usage from API response.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
usage_data: dict with 'prompt_tokens', 'completion_tokens' (total derived)
|
|
23
|
+
"""
|
|
24
|
+
if not usage_data or not isinstance(usage_data, dict):
|
|
25
|
+
return
|
|
26
|
+
|
|
27
|
+
# Update cumulative token counts (accumulated for billing, never reset by compaction)
|
|
28
|
+
prompt_tokens = usage_data.get('prompt_tokens', 0)
|
|
29
|
+
completion_tokens = usage_data.get('completion_tokens', 0)
|
|
30
|
+
self.total_prompt_tokens += prompt_tokens
|
|
31
|
+
self.total_completion_tokens += completion_tokens
|
|
32
|
+
self.total_tokens += prompt_tokens + completion_tokens
|
|
33
|
+
|
|
34
|
+
# Update conversation token counts (reset on /new)
|
|
35
|
+
self.conv_prompt_tokens += prompt_tokens
|
|
36
|
+
self.conv_completion_tokens += completion_tokens
|
|
37
|
+
self.conv_total_tokens += prompt_tokens + completion_tokens
|
|
38
|
+
def add_tool_call_usage(self, prompt_tokens=0, completion_tokens=0):
|
|
39
|
+
"""Add token usage from tool calls.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
prompt_tokens: Additional prompt tokens from tool calls
|
|
43
|
+
completion_tokens: Additional completion tokens from tool calls
|
|
44
|
+
|
|
45
|
+
DEPRECATED: Use add_usage({'prompt_tokens': X, 'completion_tokens': Y}) instead.
|
|
46
|
+
"""
|
|
47
|
+
import warnings
|
|
48
|
+
warnings.warn(
|
|
49
|
+
"add_tool_call_usage() is deprecated, use add_usage() instead",
|
|
50
|
+
DeprecationWarning,
|
|
51
|
+
stacklevel=2
|
|
52
|
+
)
|
|
53
|
+
self.add_usage({
|
|
54
|
+
'prompt_tokens': prompt_tokens,
|
|
55
|
+
'completion_tokens': completion_tokens
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
def get_session_summary(self):
|
|
59
|
+
"""Return formatted session usage summary string."""
|
|
60
|
+
return (
|
|
61
|
+
f"Session Input: [cyan]{self.total_prompt_tokens:,}[/cyan] | "
|
|
62
|
+
f"Session Output: [cyan]{self.total_completion_tokens:,}[/cyan] | "
|
|
63
|
+
f"Session Total: [cyan]{self.total_tokens:,}[/cyan]"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def get_all_token_counts(self):
|
|
67
|
+
"""Return all token counts as a dictionary for UI display.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
dict with keys: prompt_in, completion_out, total
|
|
71
|
+
"""
|
|
72
|
+
return {
|
|
73
|
+
'prompt_in': self.total_prompt_tokens,
|
|
74
|
+
'completion_out': self.total_completion_tokens,
|
|
75
|
+
'total': self.total_tokens
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
def reset(self, prompt_tokens=None, completion_tokens=None, total_tokens=None):
|
|
79
|
+
"""Reset counters to zero or to specified values.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
prompt_tokens: If provided, set total_prompt_tokens to this value
|
|
83
|
+
completion_tokens: If provided, set total_completion_tokens to this value
|
|
84
|
+
total_tokens: If provided, set total_tokens to this value
|
|
85
|
+
"""
|
|
86
|
+
self.total_prompt_tokens = prompt_tokens if prompt_tokens is not None else 0
|
|
87
|
+
self.total_completion_tokens = completion_tokens if completion_tokens is not None else 0
|
|
88
|
+
if total_tokens is None:
|
|
89
|
+
self.total_tokens = self.total_prompt_tokens + self.total_completion_tokens
|
|
90
|
+
else:
|
|
91
|
+
self.total_tokens = total_tokens
|
|
92
|
+
self.current_context_tokens = 0 # Reset context tokens
|
|
93
|
+
@staticmethod
|
|
94
|
+
def estimate_tokens(text, model=""):
|
|
95
|
+
"""Estimate token count using tiktoken.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
text: String to estimate tokens for
|
|
99
|
+
model: Optional model name for encoding selection (uses cl100k_base if empty)
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Estimated token count (int)
|
|
103
|
+
"""
|
|
104
|
+
if not text:
|
|
105
|
+
return 0
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
import tiktoken
|
|
109
|
+
try:
|
|
110
|
+
enc = tiktoken.encoding_for_model(model) if model else tiktoken.get_encoding("cl100k_base")
|
|
111
|
+
except Exception:
|
|
112
|
+
enc = tiktoken.get_encoding("cl100k_base")
|
|
113
|
+
return len(enc.encode(text))
|
|
114
|
+
except ImportError:
|
|
115
|
+
# Fallback to character-based approximation if tiktoken not available
|
|
116
|
+
return len(text) // 4
|
|
117
|
+
|
|
118
|
+
def set_context_tokens(self, token_count):
|
|
119
|
+
"""Set the current context token count.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
token_count: Actual token count of the current message list
|
|
123
|
+
"""
|
|
124
|
+
self.current_context_tokens = token_count
|
|
125
|
+
|
|
126
|
+
def calculate_session_cost(self, cost_in: float, cost_out: float) -> dict:
|
|
127
|
+
"""Calculate session cost based on token usage.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
cost_in: Cost per 1M input tokens
|
|
131
|
+
cost_out: Cost per 1M output tokens
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
Dict with 'input_cost', 'output_cost', 'total_cost' values
|
|
135
|
+
"""
|
|
136
|
+
input_cost = (self.total_prompt_tokens / 1_000_000) * cost_in
|
|
137
|
+
output_cost = (self.total_completion_tokens / 1_000_000) * cost_out
|
|
138
|
+
return {
|
|
139
|
+
'input_cost': input_cost,
|
|
140
|
+
'output_cost': output_cost,
|
|
141
|
+
'total_cost': input_cost + output_cost
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
def reset_conversation(self):
|
|
145
|
+
"""Reset conversation token counters (called on /new).
|
|
146
|
+
|
|
147
|
+
Session totals (total_prompt_tokens, total_completion_tokens) are preserved.
|
|
148
|
+
"""
|
|
149
|
+
self.conv_prompt_tokens = 0
|
|
150
|
+
self.conv_completion_tokens = 0
|
|
151
|
+
self.conv_total_tokens = 0
|
|
152
|
+
|
|
153
|
+
def calculate_conversation_cost(self, cost_in: float, cost_out: float) -> dict:
|
|
154
|
+
"""Calculate conversation cost based on token usage.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
cost_in: Cost per 1M input tokens
|
|
158
|
+
cost_out: Cost per 1M output tokens
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Dict with 'input_cost', 'output_cost', 'total_cost' values
|
|
162
|
+
"""
|
|
163
|
+
input_cost = (self.conv_prompt_tokens / 1_000_000) * cost_in
|
|
164
|
+
output_cost = (self.conv_completion_tokens / 1_000_000) * cost_out
|
|
165
|
+
return {
|
|
166
|
+
'input_cost': input_cost,
|
|
167
|
+
'output_cost': output_cost,
|
|
168
|
+
'total_cost': input_cost + output_cost
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
def get_usage_for_prompt(self, context_limit: int = 200_000) -> str:
|
|
172
|
+
"""Get formatted usage information for inclusion in agent prompts.
|
|
173
|
+
|
|
174
|
+
This provides agents with awareness of their token consumption to help
|
|
175
|
+
them work within context limits. Shows total tokens burned (cumulative
|
|
176
|
+
across all LLM calls), not just conversation context length.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
context_limit: The context window limit to compare against (default: 200k)
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Formatted string with usage statistics and guidance
|
|
183
|
+
"""
|
|
184
|
+
total_burned = self.total_tokens
|
|
185
|
+
remaining = context_limit - total_burned
|
|
186
|
+
percentage = (total_burned / context_limit) * 100
|
|
187
|
+
|
|
188
|
+
# Determine urgency level
|
|
189
|
+
if percentage >= 90:
|
|
190
|
+
urgency = "CRITICAL"
|
|
191
|
+
guidance = "You have nearly exhausted your token budget. Be extremely concise and limit exploration."
|
|
192
|
+
elif percentage >= 75:
|
|
193
|
+
urgency = "HIGH"
|
|
194
|
+
guidance = "You are approaching your token limit. Prioritize focused exploration over breadth."
|
|
195
|
+
elif percentage >= 50:
|
|
196
|
+
urgency = "MODERATE"
|
|
197
|
+
guidance = "You have used half your token budget. Be mindful of exploration scope."
|
|
198
|
+
else:
|
|
199
|
+
urgency = "LOW"
|
|
200
|
+
guidance = "Token usage is within normal bounds."
|
|
201
|
+
|
|
202
|
+
return (
|
|
203
|
+
f"## Token Usage Awareness\n\n"
|
|
204
|
+
f"**Status:** {urgency} | **Total Burned:** {total_burned:,} / {context_limit:,} ({percentage:.1f}%)\n"
|
|
205
|
+
f"**Remaining:** {remaining:,} tokens\n\n"
|
|
206
|
+
f"**Guidance:** {guidance}\n\n"
|
|
207
|
+
f"**Note:** This count includes ALL tokens burned across the session "
|
|
208
|
+
f"(all LLM calls, tool results, etc.), not just current conversation context."
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
def get_context_summary(self) -> str:
|
|
212
|
+
"""Get a brief summary of current context usage.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Concise string with context and session totals
|
|
216
|
+
"""
|
|
217
|
+
return (
|
|
218
|
+
f"Context: {self.current_context_tokens:,} tokens | "
|
|
219
|
+
f"Session total burned: {self.total_tokens:,} tokens"
|
|
220
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""User interface layer for vmCode."""
|