code-puppy 0.0.172__py3-none-any.whl → 0.0.174__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_puppy/agent.py +14 -14
- code_puppy/agents/__init__.py +4 -6
- code_puppy/agents/agent_manager.py +15 -187
- code_puppy/agents/base_agent.py +798 -4
- code_puppy/command_line/command_handler.py +40 -41
- code_puppy/command_line/mcp/add_command.py +1 -1
- code_puppy/command_line/mcp/install_command.py +1 -1
- code_puppy/command_line/mcp/start_all_command.py +3 -6
- code_puppy/command_line/mcp/start_command.py +0 -5
- code_puppy/command_line/mcp/stop_all_command.py +3 -6
- code_puppy/command_line/mcp/stop_command.py +2 -6
- code_puppy/command_line/model_picker_completion.py +2 -2
- code_puppy/command_line/prompt_toolkit_completion.py +2 -2
- code_puppy/config.py +2 -3
- code_puppy/main.py +13 -49
- code_puppy/messaging/message_queue.py +4 -4
- code_puppy/summarization_agent.py +2 -2
- code_puppy/tools/agent_tools.py +5 -4
- code_puppy/tools/browser/vqa_agent.py +1 -3
- code_puppy/tools/command_runner.py +1 -1
- code_puppy/tui/app.py +49 -78
- code_puppy/tui/screens/settings.py +2 -2
- code_puppy/tui_state.py +55 -0
- {code_puppy-0.0.172.dist-info → code_puppy-0.0.174.dist-info}/METADATA +2 -2
- {code_puppy-0.0.172.dist-info → code_puppy-0.0.174.dist-info}/RECORD +29 -33
- code_puppy/agents/agent_orchestrator.json +0 -26
- code_puppy/agents/runtime_manager.py +0 -272
- code_puppy/command_line/meta_command_handler.py +0 -153
- code_puppy/message_history_processor.py +0 -486
- code_puppy/state_management.py +0 -159
- {code_puppy-0.0.172.data → code_puppy-0.0.174.data}/data/code_puppy/models.json +0 -0
- {code_puppy-0.0.172.dist-info → code_puppy-0.0.174.dist-info}/WHEEL +0 -0
- {code_puppy-0.0.172.dist-info → code_puppy-0.0.174.dist-info}/entry_points.txt +0 -0
- {code_puppy-0.0.172.dist-info → code_puppy-0.0.174.dist-info}/licenses/LICENSE +0 -0
code_puppy/agents/base_agent.py
CHANGED
|
@@ -1,17 +1,62 @@
|
|
|
1
1
|
"""Base agent configuration class for defining agent properties."""
|
|
2
|
+
import math
|
|
2
3
|
|
|
4
|
+
import mcp
|
|
5
|
+
import signal
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
|
|
9
|
+
import json
|
|
3
10
|
import uuid
|
|
4
11
|
from abc import ABC, abstractmethod
|
|
5
|
-
from
|
|
12
|
+
from pydantic_ai import UsageLimitExceeded
|
|
13
|
+
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
|
14
|
+
|
|
15
|
+
import pydantic
|
|
16
|
+
from pydantic_ai.messages import (
|
|
17
|
+
ModelMessage,
|
|
18
|
+
ModelRequest,
|
|
19
|
+
TextPart,
|
|
20
|
+
ToolCallPart,
|
|
21
|
+
ToolCallPartDelta,
|
|
22
|
+
ToolReturn,
|
|
23
|
+
ToolReturnPart,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
from pydantic_ai.settings import ModelSettings
|
|
27
|
+
from pydantic_ai.models.openai import OpenAIModelSettings
|
|
28
|
+
from pydantic_ai import Agent as PydanticAgent
|
|
29
|
+
|
|
30
|
+
# Consolidated relative imports
|
|
31
|
+
from code_puppy.config import (
|
|
32
|
+
get_agent_pinned_model,
|
|
33
|
+
get_compaction_strategy,
|
|
34
|
+
get_compaction_threshold,
|
|
35
|
+
get_message_limit,
|
|
36
|
+
get_global_model_name,
|
|
37
|
+
get_protected_token_count,
|
|
38
|
+
get_value,
|
|
39
|
+
load_mcp_server_configs,
|
|
40
|
+
)
|
|
41
|
+
from code_puppy.messaging import emit_info, emit_error, emit_warning, emit_system_message
|
|
42
|
+
from code_puppy.model_factory import ModelFactory
|
|
43
|
+
from code_puppy.summarization_agent import run_summarization_sync
|
|
44
|
+
from code_puppy.mcp_ import ServerConfig, get_mcp_manager
|
|
45
|
+
from code_puppy.tools.common import console
|
|
6
46
|
|
|
7
47
|
|
|
8
48
|
class BaseAgent(ABC):
|
|
9
49
|
"""Base class for all agent configurations."""
|
|
10
|
-
|
|
50
|
+
|
|
11
51
|
def __init__(self):
|
|
12
52
|
self.id = str(uuid.uuid4())
|
|
13
53
|
self._message_history: List[Any] = []
|
|
14
54
|
self._compacted_message_hashes: Set[str] = set()
|
|
55
|
+
# Agent construction cache
|
|
56
|
+
self._code_generation_agent = None
|
|
57
|
+
self._last_model_name: Optional[str] = None
|
|
58
|
+
# Puppy rules loaded lazily
|
|
59
|
+
self._puppy_rules: Optional[str] = None
|
|
15
60
|
|
|
16
61
|
@property
|
|
17
62
|
@abstractmethod
|
|
@@ -121,5 +166,754 @@ class BaseAgent(ABC):
|
|
|
121
166
|
Returns:
|
|
122
167
|
Model name to use for this agent, or None to use global default.
|
|
123
168
|
"""
|
|
124
|
-
|
|
125
|
-
|
|
169
|
+
pinned = get_agent_pinned_model(self.name)
|
|
170
|
+
if pinned == "" or pinned is None:
|
|
171
|
+
return get_global_model_name()
|
|
172
|
+
return pinned
|
|
173
|
+
|
|
174
|
+
# Message history processing methods (moved from state_management.py and message_history_processor.py)
|
|
175
|
+
def _stringify_part(self, part: Any) -> str:
|
|
176
|
+
"""Create a stable string representation for a message part.
|
|
177
|
+
|
|
178
|
+
We deliberately ignore timestamps so identical content hashes the same even when
|
|
179
|
+
emitted at different times. This prevents status updates from blowing up the
|
|
180
|
+
history when they are repeated with new timestamps."""
|
|
181
|
+
|
|
182
|
+
attributes: List[str] = [part.__class__.__name__]
|
|
183
|
+
|
|
184
|
+
# Role/instructions help disambiguate parts that otherwise share content
|
|
185
|
+
if hasattr(part, "role") and part.role:
|
|
186
|
+
attributes.append(f"role={part.role}")
|
|
187
|
+
if hasattr(part, "instructions") and part.instructions:
|
|
188
|
+
attributes.append(f"instructions={part.instructions}")
|
|
189
|
+
|
|
190
|
+
if hasattr(part, "tool_call_id") and part.tool_call_id:
|
|
191
|
+
attributes.append(f"tool_call_id={part.tool_call_id}")
|
|
192
|
+
|
|
193
|
+
if hasattr(part, "tool_name") and part.tool_name:
|
|
194
|
+
attributes.append(f"tool_name={part.tool_name}")
|
|
195
|
+
|
|
196
|
+
content = getattr(part, "content", None)
|
|
197
|
+
if content is None:
|
|
198
|
+
attributes.append("content=None")
|
|
199
|
+
elif isinstance(content, str):
|
|
200
|
+
attributes.append(f"content={content}")
|
|
201
|
+
elif isinstance(content, pydantic.BaseModel):
|
|
202
|
+
attributes.append(f"content={json.dumps(content.model_dump(), sort_keys=True)}")
|
|
203
|
+
elif isinstance(content, dict):
|
|
204
|
+
attributes.append(f"content={json.dumps(content, sort_keys=True)}")
|
|
205
|
+
else:
|
|
206
|
+
attributes.append(f"content={repr(content)}")
|
|
207
|
+
result = "|".join(attributes)
|
|
208
|
+
return result
|
|
209
|
+
|
|
210
|
+
def hash_message(self, message: Any) -> int:
|
|
211
|
+
"""Create a stable hash for a model message that ignores timestamps."""
|
|
212
|
+
role = getattr(message, "role", None)
|
|
213
|
+
instructions = getattr(message, "instructions", None)
|
|
214
|
+
header_bits: List[str] = []
|
|
215
|
+
if role:
|
|
216
|
+
header_bits.append(f"role={role}")
|
|
217
|
+
if instructions:
|
|
218
|
+
header_bits.append(f"instructions={instructions}")
|
|
219
|
+
|
|
220
|
+
part_strings = [self._stringify_part(part) for part in getattr(message, "parts", [])]
|
|
221
|
+
canonical = "||".join(header_bits + part_strings)
|
|
222
|
+
return hash(canonical)
|
|
223
|
+
|
|
224
|
+
def stringify_message_part(self, part) -> str:
|
|
225
|
+
"""
|
|
226
|
+
Convert a message part to a string representation for token estimation or other uses.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
part: A message part that may contain content or be a tool call
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
String representation of the message part
|
|
233
|
+
"""
|
|
234
|
+
result = ""
|
|
235
|
+
if hasattr(part, "part_kind"):
|
|
236
|
+
result += part.part_kind + ": "
|
|
237
|
+
else:
|
|
238
|
+
result += str(type(part)) + ": "
|
|
239
|
+
|
|
240
|
+
# Handle content
|
|
241
|
+
if hasattr(part, "content") and part.content:
|
|
242
|
+
# Handle different content types
|
|
243
|
+
if isinstance(part.content, str):
|
|
244
|
+
result = part.content
|
|
245
|
+
elif isinstance(part.content, pydantic.BaseModel):
|
|
246
|
+
result = json.dumps(part.content.model_dump())
|
|
247
|
+
elif isinstance(part.content, dict):
|
|
248
|
+
result = json.dumps(part.content)
|
|
249
|
+
else:
|
|
250
|
+
result = str(part.content)
|
|
251
|
+
|
|
252
|
+
# Handle tool calls which may have additional token costs
|
|
253
|
+
# If part also has content, we'll process tool calls separately
|
|
254
|
+
if hasattr(part, "tool_name") and part.tool_name:
|
|
255
|
+
# Estimate tokens for tool name and parameters
|
|
256
|
+
tool_text = part.tool_name
|
|
257
|
+
if hasattr(part, "args"):
|
|
258
|
+
tool_text += f" {str(part.args)}"
|
|
259
|
+
result += tool_text
|
|
260
|
+
|
|
261
|
+
return result
|
|
262
|
+
|
|
263
|
+
def estimate_token_count(self, text: str) -> int:
|
|
264
|
+
"""
|
|
265
|
+
Simple token estimation using len(message) - 4.
|
|
266
|
+
This replaces tiktoken with a much simpler approach.
|
|
267
|
+
"""
|
|
268
|
+
return max(1, math.floor((len(text) / 4)))
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def estimate_tokens_for_message(self, message: ModelMessage) -> int:
|
|
272
|
+
"""
|
|
273
|
+
Estimate the number of tokens in a message using len(message) - 4.
|
|
274
|
+
Simple and fast replacement for tiktoken.
|
|
275
|
+
"""
|
|
276
|
+
total_tokens = 0
|
|
277
|
+
|
|
278
|
+
for part in message.parts:
|
|
279
|
+
part_str = self.stringify_message_part(part)
|
|
280
|
+
if part_str:
|
|
281
|
+
total_tokens += self.estimate_token_count(part_str)
|
|
282
|
+
|
|
283
|
+
return max(1, total_tokens)
|
|
284
|
+
|
|
285
|
+
def _is_tool_call_part(self, part: Any) -> bool:
|
|
286
|
+
if isinstance(part, (ToolCallPart, ToolCallPartDelta)):
|
|
287
|
+
return True
|
|
288
|
+
|
|
289
|
+
part_kind = (getattr(part, "part_kind", "") or "").replace("_", "-")
|
|
290
|
+
if part_kind == "tool-call":
|
|
291
|
+
return True
|
|
292
|
+
|
|
293
|
+
has_tool_name = getattr(part, "tool_name", None) is not None
|
|
294
|
+
has_args = getattr(part, "args", None) is not None
|
|
295
|
+
has_args_delta = getattr(part, "args_delta", None) is not None
|
|
296
|
+
|
|
297
|
+
return bool(has_tool_name and (has_args or has_args_delta))
|
|
298
|
+
|
|
299
|
+
def _is_tool_return_part(self, part: Any) -> bool:
|
|
300
|
+
if isinstance(part, (ToolReturnPart, ToolReturn)):
|
|
301
|
+
return True
|
|
302
|
+
|
|
303
|
+
part_kind = (getattr(part, "part_kind", "") or "").replace("_", "-")
|
|
304
|
+
if part_kind in {"tool-return", "tool-result"}:
|
|
305
|
+
return True
|
|
306
|
+
|
|
307
|
+
if getattr(part, "tool_call_id", None) is None:
|
|
308
|
+
return False
|
|
309
|
+
|
|
310
|
+
has_content = getattr(part, "content", None) is not None
|
|
311
|
+
has_content_delta = getattr(part, "content_delta", None) is not None
|
|
312
|
+
return bool(has_content or has_content_delta)
|
|
313
|
+
|
|
314
|
+
def filter_huge_messages(self, messages: List[ModelMessage]) -> List[ModelMessage]:
|
|
315
|
+
filtered = [m for m in messages if self.estimate_tokens_for_message(m) < 50000]
|
|
316
|
+
pruned = self.prune_interrupted_tool_calls(filtered)
|
|
317
|
+
return pruned
|
|
318
|
+
|
|
319
|
+
def split_messages_for_protected_summarization(
|
|
320
|
+
self,
|
|
321
|
+
messages: List[ModelMessage],
|
|
322
|
+
) -> Tuple[List[ModelMessage], List[ModelMessage]]:
|
|
323
|
+
"""
|
|
324
|
+
Split messages into two groups: messages to summarize and protected recent messages.
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
Tuple of (messages_to_summarize, protected_messages)
|
|
328
|
+
|
|
329
|
+
The protected_messages are the most recent messages that total up to the configured protected token count.
|
|
330
|
+
The system message (first message) is always protected.
|
|
331
|
+
All other messages that don't fit in the protected zone will be summarized.
|
|
332
|
+
"""
|
|
333
|
+
if len(messages) <= 1: # Just system message or empty
|
|
334
|
+
return [], messages
|
|
335
|
+
|
|
336
|
+
# Always protect the system message (first message)
|
|
337
|
+
system_message = messages[0]
|
|
338
|
+
system_tokens = self.estimate_tokens_for_message(system_message)
|
|
339
|
+
|
|
340
|
+
if len(messages) == 1:
|
|
341
|
+
return [], messages
|
|
342
|
+
|
|
343
|
+
# Get the configured protected token count
|
|
344
|
+
protected_tokens_limit = get_protected_token_count()
|
|
345
|
+
|
|
346
|
+
# Calculate tokens for messages from most recent backwards (excluding system message)
|
|
347
|
+
protected_messages = []
|
|
348
|
+
protected_token_count = system_tokens # Start with system message tokens
|
|
349
|
+
|
|
350
|
+
# Go backwards through non-system messages to find protected zone
|
|
351
|
+
for i in range(len(messages) - 1, 0, -1): # Stop at 1, not 0 (skip system message)
|
|
352
|
+
message = messages[i]
|
|
353
|
+
message_tokens = self.estimate_tokens_for_message(message)
|
|
354
|
+
|
|
355
|
+
# If adding this message would exceed protected tokens, stop here
|
|
356
|
+
if protected_token_count + message_tokens > protected_tokens_limit:
|
|
357
|
+
break
|
|
358
|
+
|
|
359
|
+
protected_messages.append(message)
|
|
360
|
+
protected_token_count += message_tokens
|
|
361
|
+
|
|
362
|
+
# Messages that were added while scanning backwards are currently in reverse order.
|
|
363
|
+
# Reverse them to restore chronological ordering, then prepend the system prompt.
|
|
364
|
+
protected_messages.reverse()
|
|
365
|
+
protected_messages.insert(0, system_message)
|
|
366
|
+
|
|
367
|
+
# Messages to summarize are everything between the system message and the
|
|
368
|
+
# protected tail zone we just constructed.
|
|
369
|
+
protected_start_idx = max(1, len(messages) - (len(protected_messages) - 1))
|
|
370
|
+
messages_to_summarize = messages[1:protected_start_idx]
|
|
371
|
+
|
|
372
|
+
# Emit info messages
|
|
373
|
+
emit_info(
|
|
374
|
+
f"🔒 Protecting {len(protected_messages)} recent messages ({protected_token_count} tokens, limit: {protected_tokens_limit})"
|
|
375
|
+
)
|
|
376
|
+
emit_info(f"📝 Summarizing {len(messages_to_summarize)} older messages")
|
|
377
|
+
|
|
378
|
+
return messages_to_summarize, protected_messages
|
|
379
|
+
|
|
380
|
+
def summarize_messages(
|
|
381
|
+
self,
|
|
382
|
+
messages: List[ModelMessage],
|
|
383
|
+
with_protection: bool = True
|
|
384
|
+
) -> Tuple[List[ModelMessage], List[ModelMessage]]:
|
|
385
|
+
"""
|
|
386
|
+
Summarize messages while protecting recent messages up to PROTECTED_TOKENS.
|
|
387
|
+
|
|
388
|
+
Returns:
|
|
389
|
+
Tuple of (compacted_messages, summarized_source_messages)
|
|
390
|
+
where compacted_messages always preserves the original system message
|
|
391
|
+
as the first entry.
|
|
392
|
+
"""
|
|
393
|
+
messages_to_summarize: List[ModelMessage]
|
|
394
|
+
protected_messages: List[ModelMessage]
|
|
395
|
+
|
|
396
|
+
if with_protection:
|
|
397
|
+
messages_to_summarize, protected_messages = (
|
|
398
|
+
self.split_messages_for_protected_summarization(messages)
|
|
399
|
+
)
|
|
400
|
+
else:
|
|
401
|
+
messages_to_summarize = messages[1:] if messages else []
|
|
402
|
+
protected_messages = messages[:1]
|
|
403
|
+
|
|
404
|
+
if not messages:
|
|
405
|
+
return [], []
|
|
406
|
+
|
|
407
|
+
system_message = messages[0]
|
|
408
|
+
|
|
409
|
+
if not messages_to_summarize:
|
|
410
|
+
# Nothing to summarize, so just return the original sequence
|
|
411
|
+
return self.prune_interrupted_tool_calls(messages), []
|
|
412
|
+
|
|
413
|
+
instructions = (
|
|
414
|
+
"The input will be a log of Agentic AI steps that have been taken"
|
|
415
|
+
" as well as user queries, etc. Summarize the contents of these steps."
|
|
416
|
+
" The high level details should remain but the bulk of the content from tool-call"
|
|
417
|
+
" responses should be compacted and summarized. For example if you see a tool-call"
|
|
418
|
+
" reading a file, and the file contents are large, then in your summary you might just"
|
|
419
|
+
" write: * used read_file on space_invaders.cpp - contents removed."
|
|
420
|
+
"\n Make sure your result is a bulleted list of all steps and interactions."
|
|
421
|
+
"\n\nNOTE: This summary represents older conversation history. Recent messages are preserved separately."
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
try:
|
|
425
|
+
new_messages = run_summarization_sync(
|
|
426
|
+
instructions, message_history=messages_to_summarize
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
if not isinstance(new_messages, list):
|
|
430
|
+
emit_warning(
|
|
431
|
+
"Summarization agent returned non-list output; wrapping into message request"
|
|
432
|
+
)
|
|
433
|
+
new_messages = [ModelRequest([TextPart(str(new_messages))])]
|
|
434
|
+
|
|
435
|
+
compacted: List[ModelMessage] = [system_message] + list(new_messages)
|
|
436
|
+
|
|
437
|
+
# Drop the system message from protected_messages because we already included it
|
|
438
|
+
protected_tail = [msg for msg in protected_messages if msg is not system_message]
|
|
439
|
+
|
|
440
|
+
compacted.extend(protected_tail)
|
|
441
|
+
|
|
442
|
+
return self.prune_interrupted_tool_calls(compacted), messages_to_summarize
|
|
443
|
+
except Exception as e:
|
|
444
|
+
emit_error(f"Summarization failed during compaction: {e}")
|
|
445
|
+
return messages, [] # Return original messages on failure
|
|
446
|
+
|
|
447
|
+
def get_model_context_length(self) -> int:
|
|
448
|
+
"""
|
|
449
|
+
Get the context length for the currently configured model from models.json
|
|
450
|
+
"""
|
|
451
|
+
model_configs = ModelFactory.load_config()
|
|
452
|
+
model_name = get_global_model_name()
|
|
453
|
+
|
|
454
|
+
# Get context length from model config
|
|
455
|
+
model_config = model_configs.get(model_name, {})
|
|
456
|
+
context_length = model_config.get("context_length", 128000) # Default value
|
|
457
|
+
|
|
458
|
+
return int(context_length)
|
|
459
|
+
|
|
460
|
+
def prune_interrupted_tool_calls(self, messages: List[ModelMessage]) -> List[ModelMessage]:
|
|
461
|
+
"""
|
|
462
|
+
Remove any messages that participate in mismatched tool call sequences.
|
|
463
|
+
|
|
464
|
+
A mismatched tool call id is one that appears in a ToolCall (model/tool request)
|
|
465
|
+
without a corresponding tool return, or vice versa. We preserve original order
|
|
466
|
+
and only drop messages that contain parts referencing mismatched tool_call_ids.
|
|
467
|
+
"""
|
|
468
|
+
if not messages:
|
|
469
|
+
return messages
|
|
470
|
+
|
|
471
|
+
tool_call_ids: Set[str] = set()
|
|
472
|
+
tool_return_ids: Set[str] = set()
|
|
473
|
+
|
|
474
|
+
# First pass: collect ids for calls vs returns
|
|
475
|
+
for msg in messages:
|
|
476
|
+
for part in getattr(msg, "parts", []) or []:
|
|
477
|
+
tool_call_id = getattr(part, "tool_call_id", None)
|
|
478
|
+
if not tool_call_id:
|
|
479
|
+
continue
|
|
480
|
+
# Heuristic: if it's an explicit ToolCallPart or has a tool_name/args,
|
|
481
|
+
# consider it a call; otherwise it's a return/result.
|
|
482
|
+
if part.part_kind == "tool-call":
|
|
483
|
+
tool_call_ids.add(tool_call_id)
|
|
484
|
+
else:
|
|
485
|
+
tool_return_ids.add(tool_call_id)
|
|
486
|
+
|
|
487
|
+
mismatched: Set[str] = tool_call_ids.symmetric_difference(tool_return_ids)
|
|
488
|
+
if not mismatched:
|
|
489
|
+
return messages
|
|
490
|
+
|
|
491
|
+
pruned: List[ModelMessage] = []
|
|
492
|
+
dropped_count = 0
|
|
493
|
+
for msg in messages:
|
|
494
|
+
has_mismatched = False
|
|
495
|
+
for part in getattr(msg, "parts", []) or []:
|
|
496
|
+
tcid = getattr(part, "tool_call_id", None)
|
|
497
|
+
if tcid and tcid in mismatched:
|
|
498
|
+
has_mismatched = True
|
|
499
|
+
break
|
|
500
|
+
if has_mismatched:
|
|
501
|
+
dropped_count += 1
|
|
502
|
+
continue
|
|
503
|
+
pruned.append(msg)
|
|
504
|
+
|
|
505
|
+
def message_history_processor(self, messages: List[ModelMessage]) -> List[ModelMessage]:
|
|
506
|
+
# First, prune any interrupted/mismatched tool-call conversations
|
|
507
|
+
total_current_tokens = sum(self.estimate_tokens_for_message(msg) for msg in messages)
|
|
508
|
+
|
|
509
|
+
model_max = self.get_model_context_length()
|
|
510
|
+
|
|
511
|
+
proportion_used = total_current_tokens / model_max
|
|
512
|
+
|
|
513
|
+
# Check if we're in TUI mode and can update the status bar
|
|
514
|
+
from code_puppy.tui_state import get_tui_app_instance, is_tui_mode
|
|
515
|
+
|
|
516
|
+
if is_tui_mode():
|
|
517
|
+
tui_app = get_tui_app_instance()
|
|
518
|
+
if tui_app:
|
|
519
|
+
try:
|
|
520
|
+
# Update the status bar instead of emitting a chat message
|
|
521
|
+
status_bar = tui_app.query_one("StatusBar")
|
|
522
|
+
status_bar.update_token_info(
|
|
523
|
+
total_current_tokens, model_max, proportion_used
|
|
524
|
+
)
|
|
525
|
+
except Exception as e:
|
|
526
|
+
emit_error(e)
|
|
527
|
+
# Fallback to chat message if status bar update fails
|
|
528
|
+
emit_info(
|
|
529
|
+
f"\n[bold white on blue] Tokens in context: {total_current_tokens}, total model capacity: {model_max}, proportion used: {proportion_used:.2f} [/bold white on blue] \n",
|
|
530
|
+
message_group="token_context_status",
|
|
531
|
+
)
|
|
532
|
+
else:
|
|
533
|
+
# Fallback if no TUI app instance
|
|
534
|
+
emit_info(
|
|
535
|
+
f"\n[bold white on blue] Tokens in context: {total_current_tokens}, total model capacity: {model_max}, proportion used: {proportion_used:.2f} [/bold white on blue] \n",
|
|
536
|
+
message_group="token_context_status",
|
|
537
|
+
)
|
|
538
|
+
else:
|
|
539
|
+
# Non-TUI mode - emit to console as before
|
|
540
|
+
emit_info(
|
|
541
|
+
f"\n[bold white on blue] Tokens in context: {total_current_tokens}, total model capacity: {model_max}, proportion used: {proportion_used:.2f} [/bold white on blue] \n"
|
|
542
|
+
)
|
|
543
|
+
# Get the configured compaction threshold
|
|
544
|
+
compaction_threshold = get_compaction_threshold()
|
|
545
|
+
|
|
546
|
+
# Get the configured compaction strategy
|
|
547
|
+
compaction_strategy = get_compaction_strategy()
|
|
548
|
+
|
|
549
|
+
if proportion_used > compaction_threshold:
|
|
550
|
+
if compaction_strategy == "truncation":
|
|
551
|
+
# Use truncation instead of summarization
|
|
552
|
+
protected_tokens = get_protected_token_count()
|
|
553
|
+
result_messages = self.truncation(
|
|
554
|
+
self.filter_huge_messages(messages), protected_tokens
|
|
555
|
+
)
|
|
556
|
+
summarized_messages = [] # No summarization in truncation mode
|
|
557
|
+
else:
|
|
558
|
+
# Default to summarization
|
|
559
|
+
result_messages, summarized_messages = self.summarize_messages(
|
|
560
|
+
self.filter_huge_messages(messages)
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
final_token_count = sum(
|
|
564
|
+
self.estimate_tokens_for_message(msg) for msg in result_messages
|
|
565
|
+
)
|
|
566
|
+
# Update status bar with final token count if in TUI mode
|
|
567
|
+
if is_tui_mode():
|
|
568
|
+
tui_app = get_tui_app_instance()
|
|
569
|
+
if tui_app:
|
|
570
|
+
try:
|
|
571
|
+
status_bar = tui_app.query_one("StatusBar")
|
|
572
|
+
status_bar.update_token_info(
|
|
573
|
+
final_token_count, model_max, final_token_count / model_max
|
|
574
|
+
)
|
|
575
|
+
except Exception:
|
|
576
|
+
emit_info(
|
|
577
|
+
f"Final token count after processing: {final_token_count}",
|
|
578
|
+
message_group="token_context_status",
|
|
579
|
+
)
|
|
580
|
+
else:
|
|
581
|
+
emit_info(
|
|
582
|
+
f"Final token count after processing: {final_token_count}",
|
|
583
|
+
message_group="token_context_status",
|
|
584
|
+
)
|
|
585
|
+
else:
|
|
586
|
+
emit_info(f"Final token count after processing: {final_token_count}")
|
|
587
|
+
self.set_message_history(result_messages)
|
|
588
|
+
for m in summarized_messages:
|
|
589
|
+
self.add_compacted_message_hash(self.hash_message(m))
|
|
590
|
+
return result_messages
|
|
591
|
+
return messages
|
|
592
|
+
|
|
593
|
+
def truncation(self, messages: List[ModelMessage], protected_tokens: int) -> List[ModelMessage]:
|
|
594
|
+
"""
|
|
595
|
+
Truncate message history to manage token usage.
|
|
596
|
+
|
|
597
|
+
Args:
|
|
598
|
+
messages: List of messages to truncate
|
|
599
|
+
protected_tokens: Number of tokens to protect
|
|
600
|
+
|
|
601
|
+
Returns:
|
|
602
|
+
Truncated list of messages
|
|
603
|
+
"""
|
|
604
|
+
import queue
|
|
605
|
+
|
|
606
|
+
emit_info("Truncating message history to manage token usage")
|
|
607
|
+
result = [messages[0]] # Always keep the first message (system prompt)
|
|
608
|
+
num_tokens = 0
|
|
609
|
+
stack = queue.LifoQueue()
|
|
610
|
+
|
|
611
|
+
# Put messages in reverse order (most recent first) into the stack
|
|
612
|
+
# but break when we exceed protected_tokens
|
|
613
|
+
for idx, msg in enumerate(reversed(messages[1:])): # Skip the first message
|
|
614
|
+
num_tokens += self.estimate_tokens_for_message(msg)
|
|
615
|
+
if num_tokens > protected_tokens:
|
|
616
|
+
break
|
|
617
|
+
stack.put(msg)
|
|
618
|
+
|
|
619
|
+
# Pop messages from stack to get them in chronological order
|
|
620
|
+
while not stack.empty():
|
|
621
|
+
result.append(stack.get())
|
|
622
|
+
|
|
623
|
+
result = self.prune_interrupted_tool_calls(result)
|
|
624
|
+
return result
|
|
625
|
+
|
|
626
|
+
def run_summarization_sync(
|
|
627
|
+
self,
|
|
628
|
+
instructions: str,
|
|
629
|
+
message_history: List[ModelMessage],
|
|
630
|
+
) -> Union[List[ModelMessage], str]:
|
|
631
|
+
"""
|
|
632
|
+
Run summarization synchronously using the configured summarization agent.
|
|
633
|
+
This is exposed as a method so it can be overridden by subclasses if needed.
|
|
634
|
+
|
|
635
|
+
Args:
|
|
636
|
+
instructions: Instructions for the summarization agent
|
|
637
|
+
message_history: List of messages to summarize
|
|
638
|
+
|
|
639
|
+
Returns:
|
|
640
|
+
Summarized messages or text
|
|
641
|
+
"""
|
|
642
|
+
return run_summarization_sync(instructions, message_history)
|
|
643
|
+
|
|
644
|
+
# ===== Agent wiring formerly in code_puppy/agent.py =====
|
|
645
|
+
def load_puppy_rules(self) -> Optional[str]:
|
|
646
|
+
"""Load AGENT(S).md if present and cache the contents."""
|
|
647
|
+
if self._puppy_rules is not None:
|
|
648
|
+
return self._puppy_rules
|
|
649
|
+
from pathlib import Path
|
|
650
|
+
possible_paths = ["AGENTS.md", "AGENT.md", "agents.md", "agent.md"]
|
|
651
|
+
for path_str in possible_paths:
|
|
652
|
+
puppy_rules_path = Path(path_str)
|
|
653
|
+
if puppy_rules_path.exists():
|
|
654
|
+
with open(puppy_rules_path, "r") as f:
|
|
655
|
+
self._puppy_rules = f.read()
|
|
656
|
+
break
|
|
657
|
+
return self._puppy_rules
|
|
658
|
+
|
|
659
|
+
def load_mcp_servers(self, extra_headers: Optional[Dict[str, str]] = None):
|
|
660
|
+
"""Load MCP servers through the manager and return pydantic-ai compatible servers."""
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
mcp_disabled = get_value("disable_mcp_servers")
|
|
664
|
+
if mcp_disabled and str(mcp_disabled).lower() in ("1", "true", "yes", "on"):
|
|
665
|
+
emit_system_message("[dim]MCP servers disabled via config[/dim]")
|
|
666
|
+
return []
|
|
667
|
+
|
|
668
|
+
manager = get_mcp_manager()
|
|
669
|
+
configs = load_mcp_server_configs()
|
|
670
|
+
if not configs:
|
|
671
|
+
existing_servers = manager.list_servers()
|
|
672
|
+
if not existing_servers:
|
|
673
|
+
emit_system_message("[dim]No MCP servers configured[/dim]")
|
|
674
|
+
return []
|
|
675
|
+
else:
|
|
676
|
+
for name, conf in configs.items():
|
|
677
|
+
try:
|
|
678
|
+
server_config = ServerConfig(
|
|
679
|
+
id=conf.get("id", f"{name}_{hash(name)}"),
|
|
680
|
+
name=name,
|
|
681
|
+
type=conf.get("type", "sse"),
|
|
682
|
+
enabled=conf.get("enabled", True),
|
|
683
|
+
config=conf,
|
|
684
|
+
)
|
|
685
|
+
existing = manager.get_server_by_name(name)
|
|
686
|
+
if not existing:
|
|
687
|
+
manager.register_server(server_config)
|
|
688
|
+
emit_system_message(f"[dim]Registered MCP server: {name}[/dim]")
|
|
689
|
+
else:
|
|
690
|
+
if existing.config != server_config.config:
|
|
691
|
+
manager.update_server(existing.id, server_config)
|
|
692
|
+
emit_system_message(f"[dim]Updated MCP server: {name}[/dim]")
|
|
693
|
+
except Exception as e:
|
|
694
|
+
emit_error(f"Failed to register MCP server '{name}': {str(e)}")
|
|
695
|
+
continue
|
|
696
|
+
|
|
697
|
+
servers = manager.get_servers_for_agent()
|
|
698
|
+
if servers:
|
|
699
|
+
emit_system_message(
|
|
700
|
+
f"[green]Successfully loaded {len(servers)} MCP server(s)[/green]"
|
|
701
|
+
)
|
|
702
|
+
else:
|
|
703
|
+
emit_system_message(
|
|
704
|
+
"[yellow]No MCP servers available (check if servers are enabled)[/yellow]"
|
|
705
|
+
)
|
|
706
|
+
return servers
|
|
707
|
+
|
|
708
|
+
def reload_mcp_servers(self):
|
|
709
|
+
"""Reload MCP servers and return updated servers."""
|
|
710
|
+
self.load_mcp_servers()
|
|
711
|
+
manager = get_mcp_manager()
|
|
712
|
+
return manager.get_servers_for_agent()
|
|
713
|
+
|
|
714
|
+
def reload_code_generation_agent(self, message_group: Optional[str] = None):
|
|
715
|
+
"""Force-reload the pydantic-ai Agent based on current config and model."""
|
|
716
|
+
from code_puppy.tools import register_tools_for_agent
|
|
717
|
+
if message_group is None:
|
|
718
|
+
message_group = str(uuid.uuid4())
|
|
719
|
+
|
|
720
|
+
model_name = self.get_model_name()
|
|
721
|
+
|
|
722
|
+
emit_info(
|
|
723
|
+
f"[bold cyan]Loading Model: {model_name}[/bold cyan]",
|
|
724
|
+
message_group=message_group,
|
|
725
|
+
)
|
|
726
|
+
models_config = ModelFactory.load_config()
|
|
727
|
+
model = ModelFactory.get_model(model_name, models_config)
|
|
728
|
+
|
|
729
|
+
emit_info(
|
|
730
|
+
f"[bold magenta]Loading Agent: {self.name}[/bold magenta]",
|
|
731
|
+
message_group=message_group,
|
|
732
|
+
)
|
|
733
|
+
|
|
734
|
+
instructions = self.get_system_prompt()
|
|
735
|
+
puppy_rules = self.load_puppy_rules()
|
|
736
|
+
if puppy_rules:
|
|
737
|
+
instructions += f"\n{puppy_rules}"
|
|
738
|
+
|
|
739
|
+
mcp_servers = self.load_mcp_servers()
|
|
740
|
+
|
|
741
|
+
model_settings_dict: Dict[str, Any] = {"seed": 42}
|
|
742
|
+
output_tokens = max(
|
|
743
|
+
2048,
|
|
744
|
+
min(int(0.05 * self.get_model_context_length()) - 1024, 16384),
|
|
745
|
+
)
|
|
746
|
+
console.print(f"Max output tokens per message: {output_tokens}")
|
|
747
|
+
model_settings_dict["max_tokens"] = output_tokens
|
|
748
|
+
|
|
749
|
+
model_settings: ModelSettings = ModelSettings(**model_settings_dict)
|
|
750
|
+
if "gpt-5" in model_name:
|
|
751
|
+
model_settings_dict["openai_reasoning_effort"] = "off"
|
|
752
|
+
model_settings_dict["extra_body"] = {"verbosity": "low"}
|
|
753
|
+
model_settings = OpenAIModelSettings(**model_settings_dict)
|
|
754
|
+
|
|
755
|
+
p_agent = PydanticAgent(
|
|
756
|
+
model=model,
|
|
757
|
+
instructions=instructions,
|
|
758
|
+
output_type=str,
|
|
759
|
+
retries=3,
|
|
760
|
+
mcp_servers=mcp_servers,
|
|
761
|
+
history_processors=[self.message_history_accumulator],
|
|
762
|
+
model_settings=model_settings,
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
agent_tools = self.get_available_tools()
|
|
766
|
+
register_tools_for_agent(p_agent, agent_tools)
|
|
767
|
+
|
|
768
|
+
self._code_generation_agent = p_agent
|
|
769
|
+
self._last_model_name = model_name
|
|
770
|
+
# expose for run_with_mcp
|
|
771
|
+
self.pydantic_agent = p_agent
|
|
772
|
+
return self._code_generation_agent
|
|
773
|
+
|
|
774
|
+
|
|
775
|
+
def message_history_accumulator(self, messages: List[Any]):
|
|
776
|
+
_message_history = self.get_message_history()
|
|
777
|
+
message_history_hashes = set([self.hash_message(m) for m in _message_history])
|
|
778
|
+
for msg in messages:
|
|
779
|
+
if (
|
|
780
|
+
self.hash_message(msg) not in message_history_hashes
|
|
781
|
+
and self.hash_message(msg) not in self.get_compacted_message_hashes()
|
|
782
|
+
):
|
|
783
|
+
_message_history.append(msg)
|
|
784
|
+
|
|
785
|
+
# Apply message history trimming using the main processor
|
|
786
|
+
# This ensures we maintain global state while still managing context limits
|
|
787
|
+
self.message_history_processor(_message_history)
|
|
788
|
+
return self.get_message_history()
|
|
789
|
+
|
|
790
|
+
|
|
791
|
+
async def run_with_mcp(
|
|
792
|
+
self, prompt: str, usage_limits = None, **kwargs
|
|
793
|
+
) -> Any:
|
|
794
|
+
"""
|
|
795
|
+
Run the agent with MCP servers and full cancellation support.
|
|
796
|
+
|
|
797
|
+
This method ensures we're always using the current agent instance
|
|
798
|
+
and handles Ctrl+C interruption properly by creating a cancellable task.
|
|
799
|
+
|
|
800
|
+
Args:
|
|
801
|
+
prompt: The user prompt to process
|
|
802
|
+
usage_limits: Optional usage limits for the agent
|
|
803
|
+
**kwargs: Additional arguments to pass to agent.run (e.g., message_history)
|
|
804
|
+
|
|
805
|
+
Returns:
|
|
806
|
+
The agent's response
|
|
807
|
+
|
|
808
|
+
Raises:
|
|
809
|
+
asyncio.CancelledError: When execution is cancelled by user
|
|
810
|
+
"""
|
|
811
|
+
group_id = str(uuid.uuid4())
|
|
812
|
+
pydantic_agent = self.reload_code_generation_agent()
|
|
813
|
+
|
|
814
|
+
async def run_agent_task():
|
|
815
|
+
try:
|
|
816
|
+
result_ = await pydantic_agent.run(prompt, message_history=self.get_message_history(), usage_limits=usage_limits, **kwargs)
|
|
817
|
+
self.set_message_history(
|
|
818
|
+
self.prune_interrupted_tool_calls(self.get_message_history())
|
|
819
|
+
)
|
|
820
|
+
return result_
|
|
821
|
+
except* UsageLimitExceeded as ule:
|
|
822
|
+
emit_info(f"Usage limit exceeded: {str(ule)}", group_id=group_id)
|
|
823
|
+
emit_info(
|
|
824
|
+
"The agent has reached its usage limit. You can ask it to continue by saying 'please continue' or similar.",
|
|
825
|
+
group_id=group_id,
|
|
826
|
+
)
|
|
827
|
+
except* mcp.shared.exceptions.McpError as mcp_error:
|
|
828
|
+
emit_info(f"MCP server error: {str(mcp_error)}", group_id=group_id)
|
|
829
|
+
emit_info(f"{str(mcp_error)}", group_id=group_id)
|
|
830
|
+
emit_info(
|
|
831
|
+
"Try disabling any malfunctioning MCP servers", group_id=group_id
|
|
832
|
+
)
|
|
833
|
+
except* asyncio.exceptions.CancelledError:
|
|
834
|
+
emit_info("Cancelled")
|
|
835
|
+
except* InterruptedError as ie:
|
|
836
|
+
emit_info(f"Interrupted: {str(ie)}")
|
|
837
|
+
except* Exception as other_error:
|
|
838
|
+
# Filter out CancelledError and UsageLimitExceeded from the exception group - let it propagate
|
|
839
|
+
remaining_exceptions = []
|
|
840
|
+
|
|
841
|
+
def collect_non_cancelled_exceptions(exc):
|
|
842
|
+
if isinstance(exc, ExceptionGroup):
|
|
843
|
+
for sub_exc in exc.exceptions:
|
|
844
|
+
collect_non_cancelled_exceptions(sub_exc)
|
|
845
|
+
elif not isinstance(
|
|
846
|
+
exc, (asyncio.CancelledError, UsageLimitExceeded)
|
|
847
|
+
):
|
|
848
|
+
remaining_exceptions.append(exc)
|
|
849
|
+
emit_info(f"Unexpected error: {str(exc)}", group_id=group_id)
|
|
850
|
+
emit_info(f"{str(exc.args)}", group_id=group_id)
|
|
851
|
+
|
|
852
|
+
collect_non_cancelled_exceptions(other_error)
|
|
853
|
+
|
|
854
|
+
# If there are CancelledError exceptions in the group, re-raise them
|
|
855
|
+
cancelled_exceptions = []
|
|
856
|
+
|
|
857
|
+
def collect_cancelled_exceptions(exc):
|
|
858
|
+
if isinstance(exc, ExceptionGroup):
|
|
859
|
+
for sub_exc in exc.exceptions:
|
|
860
|
+
collect_cancelled_exceptions(sub_exc)
|
|
861
|
+
elif isinstance(exc, asyncio.CancelledError):
|
|
862
|
+
cancelled_exceptions.append(exc)
|
|
863
|
+
|
|
864
|
+
collect_cancelled_exceptions(other_error)
|
|
865
|
+
|
|
866
|
+
if cancelled_exceptions:
|
|
867
|
+
# Re-raise the first CancelledError to propagate cancellation
|
|
868
|
+
raise cancelled_exceptions[0]
|
|
869
|
+
|
|
870
|
+
# Create the task FIRST
|
|
871
|
+
agent_task = asyncio.create_task(run_agent_task())
|
|
872
|
+
|
|
873
|
+
# Import shell process killer
|
|
874
|
+
from code_puppy.tools.command_runner import kill_all_running_shell_processes
|
|
875
|
+
|
|
876
|
+
# Ensure the interrupt handler only acts once per task
|
|
877
|
+
def keyboard_interrupt_handler(sig, frame):
|
|
878
|
+
"""Signal handler for Ctrl+C - replicating exact original logic"""
|
|
879
|
+
|
|
880
|
+
# First, nuke any running shell processes triggered by tools
|
|
881
|
+
try:
|
|
882
|
+
killed = kill_all_running_shell_processes()
|
|
883
|
+
if killed:
|
|
884
|
+
emit_info(f"Cancelled {killed} running shell process(es).")
|
|
885
|
+
else:
|
|
886
|
+
# Only cancel the agent task if no shell processes were killed
|
|
887
|
+
if not agent_task.done():
|
|
888
|
+
agent_task.cancel()
|
|
889
|
+
except Exception as e:
|
|
890
|
+
emit_info(f"Shell kill error: {e}")
|
|
891
|
+
# If shell kill failed, still try to cancel the agent task
|
|
892
|
+
if not agent_task.done():
|
|
893
|
+
agent_task.cancel()
|
|
894
|
+
# Don't call the original handler
|
|
895
|
+
# This prevents the application from exiting
|
|
896
|
+
|
|
897
|
+
try:
|
|
898
|
+
# Save original handler and set our custom one AFTER task is created
|
|
899
|
+
original_handler = signal.signal(signal.SIGINT, keyboard_interrupt_handler)
|
|
900
|
+
|
|
901
|
+
# Wait for the task to complete or be cancelled
|
|
902
|
+
result = await agent_task
|
|
903
|
+
return result
|
|
904
|
+
except asyncio.CancelledError:
|
|
905
|
+
# Task was cancelled by our handler
|
|
906
|
+
raise
|
|
907
|
+
except KeyboardInterrupt:
|
|
908
|
+
# Handle direct keyboard interrupt during await
|
|
909
|
+
if not agent_task.done():
|
|
910
|
+
agent_task.cancel()
|
|
911
|
+
try:
|
|
912
|
+
await agent_task
|
|
913
|
+
except asyncio.CancelledError:
|
|
914
|
+
pass
|
|
915
|
+
raise asyncio.CancelledError()
|
|
916
|
+
finally:
|
|
917
|
+
# Restore original signal handler
|
|
918
|
+
if original_handler:
|
|
919
|
+
signal.signal(signal.SIGINT, original_handler)
|