code-puppy 0.0.336__py3-none-any.whl → 0.0.348__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_puppy/agents/base_agent.py +41 -224
- code_puppy/agents/event_stream_handler.py +257 -0
- code_puppy/claude_cache_client.py +208 -2
- code_puppy/cli_runner.py +53 -35
- code_puppy/command_line/add_model_menu.py +8 -9
- code_puppy/command_line/autosave_menu.py +18 -24
- code_puppy/command_line/clipboard.py +527 -0
- code_puppy/command_line/core_commands.py +34 -0
- code_puppy/command_line/mcp/catalog_server_installer.py +5 -6
- code_puppy/command_line/mcp/custom_server_form.py +54 -19
- code_puppy/command_line/mcp/custom_server_installer.py +8 -9
- code_puppy/command_line/mcp/handler.py +0 -2
- code_puppy/command_line/mcp/help_command.py +1 -5
- code_puppy/command_line/mcp/start_command.py +36 -18
- code_puppy/command_line/onboarding_slides.py +0 -1
- code_puppy/command_line/prompt_toolkit_completion.py +124 -0
- code_puppy/command_line/utils.py +54 -0
- code_puppy/http_utils.py +93 -130
- code_puppy/mcp_/async_lifecycle.py +35 -4
- code_puppy/mcp_/managed_server.py +49 -24
- code_puppy/mcp_/manager.py +81 -52
- code_puppy/messaging/message_queue.py +11 -23
- code_puppy/messaging/messages.py +3 -0
- code_puppy/messaging/rich_renderer.py +13 -3
- code_puppy/model_factory.py +16 -0
- code_puppy/models.json +2 -2
- code_puppy/plugins/antigravity_oauth/antigravity_model.py +17 -2
- code_puppy/plugins/claude_code_oauth/utils.py +126 -7
- code_puppy/terminal_utils.py +128 -1
- code_puppy/tools/agent_tools.py +66 -13
- code_puppy/tools/command_runner.py +1 -0
- code_puppy/tools/common.py +3 -9
- {code_puppy-0.0.336.data → code_puppy-0.0.348.data}/data/code_puppy/models.json +2 -2
- {code_puppy-0.0.336.dist-info → code_puppy-0.0.348.dist-info}/METADATA +19 -71
- {code_puppy-0.0.336.dist-info → code_puppy-0.0.348.dist-info}/RECORD +39 -38
- code_puppy/command_line/mcp/add_command.py +0 -170
- {code_puppy-0.0.336.data → code_puppy-0.0.348.data}/data/code_puppy/models_dev_api.json +0 -0
- {code_puppy-0.0.336.dist-info → code_puppy-0.0.348.dist-info}/WHEEL +0 -0
- {code_puppy-0.0.336.dist-info → code_puppy-0.0.348.dist-info}/entry_points.txt +0 -0
- {code_puppy-0.0.336.dist-info → code_puppy-0.0.348.dist-info}/licenses/LICENSE +0 -0
code_puppy/agents/base_agent.py
CHANGED
|
@@ -7,7 +7,6 @@ import signal
|
|
|
7
7
|
import threading
|
|
8
8
|
import uuid
|
|
9
9
|
from abc import ABC, abstractmethod
|
|
10
|
-
from collections.abc import AsyncIterable
|
|
11
10
|
from typing import (
|
|
12
11
|
Any,
|
|
13
12
|
Callable,
|
|
@@ -30,7 +29,6 @@ from pydantic_ai import (
|
|
|
30
29
|
BinaryContent,
|
|
31
30
|
DocumentUrl,
|
|
32
31
|
ImageUrl,
|
|
33
|
-
PartEndEvent,
|
|
34
32
|
RunContext,
|
|
35
33
|
UsageLimitExceeded,
|
|
36
34
|
UsageLimits,
|
|
@@ -48,6 +46,8 @@ from pydantic_ai.messages import (
|
|
|
48
46
|
)
|
|
49
47
|
from rich.text import Text
|
|
50
48
|
|
|
49
|
+
from code_puppy.agents.event_stream_handler import event_stream_handler
|
|
50
|
+
|
|
51
51
|
# Consolidated relative imports
|
|
52
52
|
from code_puppy.config import (
|
|
53
53
|
get_agent_pinned_model,
|
|
@@ -100,9 +100,6 @@ class BaseAgent(ABC):
|
|
|
100
100
|
# Cache for MCP tool definitions (for token estimation)
|
|
101
101
|
# This is populated after the first successful run when MCP tools are retrieved
|
|
102
102
|
self._mcp_tool_definitions_cache: List[Dict[str, Any]] = []
|
|
103
|
-
# Shared console for streaming output - should be set by cli_runner
|
|
104
|
-
# to avoid conflicts between spinner's Live display and response streaming
|
|
105
|
-
self._console: Optional[Any] = None
|
|
106
103
|
|
|
107
104
|
@property
|
|
108
105
|
@abstractmethod
|
|
@@ -913,6 +910,11 @@ class BaseAgent(ABC):
|
|
|
913
910
|
"""
|
|
914
911
|
Truncate message history to manage token usage.
|
|
915
912
|
|
|
913
|
+
Protects:
|
|
914
|
+
- The first message (system prompt) - always kept
|
|
915
|
+
- The second message if it contains a ThinkingPart (extended thinking context)
|
|
916
|
+
- The most recent messages up to protected_tokens
|
|
917
|
+
|
|
916
918
|
Args:
|
|
917
919
|
messages: List of messages to truncate
|
|
918
920
|
protected_tokens: Number of tokens to protect
|
|
@@ -924,12 +926,30 @@ class BaseAgent(ABC):
|
|
|
924
926
|
|
|
925
927
|
emit_info("Truncating message history to manage token usage")
|
|
926
928
|
result = [messages[0]] # Always keep the first message (system prompt)
|
|
929
|
+
|
|
930
|
+
# Check if second message exists and contains a ThinkingPart
|
|
931
|
+
# If so, protect it (extended thinking context shouldn't be lost)
|
|
932
|
+
skip_second = False
|
|
933
|
+
if len(messages) > 1:
|
|
934
|
+
second_msg = messages[1]
|
|
935
|
+
has_thinking = any(
|
|
936
|
+
isinstance(part, ThinkingPart) for part in second_msg.parts
|
|
937
|
+
)
|
|
938
|
+
if has_thinking:
|
|
939
|
+
result.append(second_msg)
|
|
940
|
+
skip_second = True
|
|
941
|
+
|
|
927
942
|
num_tokens = 0
|
|
928
943
|
stack = queue.LifoQueue()
|
|
929
944
|
|
|
945
|
+
# Determine which messages to consider for the recent-tokens window
|
|
946
|
+
# Skip first message (already added), and skip second if it has thinking
|
|
947
|
+
start_idx = 2 if skip_second else 1
|
|
948
|
+
messages_to_scan = messages[start_idx:]
|
|
949
|
+
|
|
930
950
|
# Put messages in reverse order (most recent first) into the stack
|
|
931
951
|
# but break when we exceed protected_tokens
|
|
932
|
-
for
|
|
952
|
+
for msg in reversed(messages_to_scan):
|
|
933
953
|
num_tokens += self.estimate_tokens_for_message(msg)
|
|
934
954
|
if num_tokens > protected_tokens:
|
|
935
955
|
break
|
|
@@ -1210,9 +1230,12 @@ class BaseAgent(ABC):
|
|
|
1210
1230
|
agent_tools = self.get_available_tools()
|
|
1211
1231
|
register_tools_for_agent(agent_without_mcp, agent_tools)
|
|
1212
1232
|
|
|
1213
|
-
# Wrap with DBOS
|
|
1233
|
+
# Wrap with DBOS - pass event_stream_handler at construction time
|
|
1234
|
+
# so DBOSModel gets the handler for streaming output
|
|
1214
1235
|
dbos_agent = DBOSAgent(
|
|
1215
|
-
agent_without_mcp,
|
|
1236
|
+
agent_without_mcp,
|
|
1237
|
+
name=f"{self.name}-{_reload_count}",
|
|
1238
|
+
event_stream_handler=event_stream_handler,
|
|
1216
1239
|
)
|
|
1217
1240
|
self.pydantic_agent = dbos_agent
|
|
1218
1241
|
self._code_generation_agent = dbos_agent
|
|
@@ -1291,8 +1314,11 @@ class BaseAgent(ABC):
|
|
|
1291
1314
|
)
|
|
1292
1315
|
agent_tools = self.get_available_tools()
|
|
1293
1316
|
register_tools_for_agent(temp_agent, agent_tools)
|
|
1317
|
+
# Pass event_stream_handler at construction time for streaming output
|
|
1294
1318
|
dbos_agent = DBOSAgent(
|
|
1295
|
-
temp_agent,
|
|
1319
|
+
temp_agent,
|
|
1320
|
+
name=f"{self.name}-structured-{_reload_count}",
|
|
1321
|
+
event_stream_handler=event_stream_handler,
|
|
1296
1322
|
)
|
|
1297
1323
|
return dbos_agent
|
|
1298
1324
|
else:
|
|
@@ -1334,216 +1360,6 @@ class BaseAgent(ABC):
|
|
|
1334
1360
|
self.set_message_history(result_messages_filtered_empty_thinking)
|
|
1335
1361
|
return self.get_message_history()
|
|
1336
1362
|
|
|
1337
|
-
async def _event_stream_handler(
|
|
1338
|
-
self, ctx: RunContext, events: AsyncIterable[Any]
|
|
1339
|
-
) -> None:
|
|
1340
|
-
"""Handle streaming events from the agent run.
|
|
1341
|
-
|
|
1342
|
-
This method processes streaming events and emits TextPart, ThinkingPart,
|
|
1343
|
-
and ToolCallPart content with styled banners/tokens as they stream in.
|
|
1344
|
-
|
|
1345
|
-
Args:
|
|
1346
|
-
ctx: The run context.
|
|
1347
|
-
events: Async iterable of streaming events (PartStartEvent, PartDeltaEvent, etc.).
|
|
1348
|
-
"""
|
|
1349
|
-
from pydantic_ai import PartDeltaEvent, PartStartEvent
|
|
1350
|
-
from pydantic_ai.messages import (
|
|
1351
|
-
TextPartDelta,
|
|
1352
|
-
ThinkingPartDelta,
|
|
1353
|
-
ToolCallPartDelta,
|
|
1354
|
-
)
|
|
1355
|
-
from rich.console import Console
|
|
1356
|
-
from rich.markdown import Markdown
|
|
1357
|
-
from rich.markup import escape
|
|
1358
|
-
|
|
1359
|
-
from code_puppy.messaging.spinner import pause_all_spinners
|
|
1360
|
-
|
|
1361
|
-
# IMPORTANT: Use the shared console (set by cli_runner) to avoid conflicts
|
|
1362
|
-
# with the spinner's Live display. Multiple Console instances with separate
|
|
1363
|
-
# Live displays cause cursor positioning chaos and line duplication.
|
|
1364
|
-
if self._console is not None:
|
|
1365
|
-
console = self._console
|
|
1366
|
-
else:
|
|
1367
|
-
# Fallback if console not set (shouldn't happen in normal use)
|
|
1368
|
-
console = Console()
|
|
1369
|
-
|
|
1370
|
-
# Track which part indices we're currently streaming (for Text/Thinking/Tool parts)
|
|
1371
|
-
streaming_parts: set[int] = set()
|
|
1372
|
-
thinking_parts: set[int] = (
|
|
1373
|
-
set()
|
|
1374
|
-
) # Track which parts are thinking (for dim style)
|
|
1375
|
-
text_parts: set[int] = set() # Track which parts are text
|
|
1376
|
-
tool_parts: set[int] = set() # Track which parts are tool calls
|
|
1377
|
-
banner_printed: set[int] = set() # Track if banner was already printed
|
|
1378
|
-
text_buffer: dict[int, list[str]] = {} # Buffer text for final markdown render
|
|
1379
|
-
token_count: dict[int, int] = {} # Track token count per text/tool part
|
|
1380
|
-
did_stream_anything = False # Track if we streamed any content
|
|
1381
|
-
|
|
1382
|
-
def _print_thinking_banner() -> None:
|
|
1383
|
-
"""Print the THINKING banner with spinner pause and line clear."""
|
|
1384
|
-
nonlocal did_stream_anything
|
|
1385
|
-
import time
|
|
1386
|
-
|
|
1387
|
-
from code_puppy.config import get_banner_color
|
|
1388
|
-
|
|
1389
|
-
pause_all_spinners()
|
|
1390
|
-
time.sleep(0.1) # Delay to let spinner fully clear
|
|
1391
|
-
# Clear line and print newline before banner
|
|
1392
|
-
console.print(" " * 50, end="\r")
|
|
1393
|
-
console.print() # Newline before banner
|
|
1394
|
-
# Bold banner with configurable color and lightning bolt
|
|
1395
|
-
thinking_color = get_banner_color("thinking")
|
|
1396
|
-
console.print(
|
|
1397
|
-
Text.from_markup(
|
|
1398
|
-
f"[bold white on {thinking_color}] THINKING [/bold white on {thinking_color}] [dim]⚡ "
|
|
1399
|
-
),
|
|
1400
|
-
end="",
|
|
1401
|
-
)
|
|
1402
|
-
did_stream_anything = True
|
|
1403
|
-
|
|
1404
|
-
def _print_response_banner() -> None:
|
|
1405
|
-
"""Print the AGENT RESPONSE banner with spinner pause and line clear."""
|
|
1406
|
-
nonlocal did_stream_anything
|
|
1407
|
-
import time
|
|
1408
|
-
|
|
1409
|
-
from code_puppy.config import get_banner_color
|
|
1410
|
-
|
|
1411
|
-
pause_all_spinners()
|
|
1412
|
-
time.sleep(0.1) # Delay to let spinner fully clear
|
|
1413
|
-
# Clear line and print newline before banner
|
|
1414
|
-
console.print(" " * 50, end="\r")
|
|
1415
|
-
console.print() # Newline before banner
|
|
1416
|
-
response_color = get_banner_color("agent_response")
|
|
1417
|
-
console.print(
|
|
1418
|
-
Text.from_markup(
|
|
1419
|
-
f"[bold white on {response_color}] AGENT RESPONSE [/bold white on {response_color}]"
|
|
1420
|
-
)
|
|
1421
|
-
)
|
|
1422
|
-
did_stream_anything = True
|
|
1423
|
-
|
|
1424
|
-
async for event in events:
|
|
1425
|
-
# PartStartEvent - register the part but defer banner until content arrives
|
|
1426
|
-
if isinstance(event, PartStartEvent):
|
|
1427
|
-
part = event.part
|
|
1428
|
-
if isinstance(part, ThinkingPart):
|
|
1429
|
-
streaming_parts.add(event.index)
|
|
1430
|
-
thinking_parts.add(event.index)
|
|
1431
|
-
# If there's initial content, print banner + content now
|
|
1432
|
-
if part.content and part.content.strip():
|
|
1433
|
-
_print_thinking_banner()
|
|
1434
|
-
escaped = escape(part.content)
|
|
1435
|
-
console.print(f"[dim]{escaped}[/dim]", end="")
|
|
1436
|
-
banner_printed.add(event.index)
|
|
1437
|
-
elif isinstance(part, TextPart):
|
|
1438
|
-
streaming_parts.add(event.index)
|
|
1439
|
-
text_parts.add(event.index)
|
|
1440
|
-
text_buffer[event.index] = [] # Initialize buffer
|
|
1441
|
-
token_count[event.index] = 0 # Initialize token counter
|
|
1442
|
-
# Buffer initial content if present
|
|
1443
|
-
if part.content and part.content.strip():
|
|
1444
|
-
text_buffer[event.index].append(part.content)
|
|
1445
|
-
# Count chunks (each part counts as 1)
|
|
1446
|
-
token_count[event.index] += 1
|
|
1447
|
-
elif isinstance(part, ToolCallPart):
|
|
1448
|
-
streaming_parts.add(event.index)
|
|
1449
|
-
tool_parts.add(event.index)
|
|
1450
|
-
token_count[event.index] = 0 # Initialize token counter
|
|
1451
|
-
# Track tool name for display
|
|
1452
|
-
banner_printed.add(
|
|
1453
|
-
event.index
|
|
1454
|
-
) # Use banner_printed to track if we've shown tool info
|
|
1455
|
-
|
|
1456
|
-
# PartDeltaEvent - stream the content as it arrives
|
|
1457
|
-
elif isinstance(event, PartDeltaEvent):
|
|
1458
|
-
if event.index in streaming_parts:
|
|
1459
|
-
delta = event.delta
|
|
1460
|
-
if isinstance(delta, (TextPartDelta, ThinkingPartDelta)):
|
|
1461
|
-
if delta.content_delta:
|
|
1462
|
-
# For text parts, show token counter then render at end
|
|
1463
|
-
if event.index in text_parts:
|
|
1464
|
-
# Print banner on first content
|
|
1465
|
-
if event.index not in banner_printed:
|
|
1466
|
-
_print_response_banner()
|
|
1467
|
-
banner_printed.add(event.index)
|
|
1468
|
-
# Accumulate text for final markdown render
|
|
1469
|
-
text_buffer[event.index].append(delta.content_delta)
|
|
1470
|
-
# Count chunks received
|
|
1471
|
-
token_count[event.index] += 1
|
|
1472
|
-
# Update chunk counter in place (single line)
|
|
1473
|
-
count = token_count[event.index]
|
|
1474
|
-
console.print(
|
|
1475
|
-
f" ⏳ Receiving... {count} chunks ",
|
|
1476
|
-
end="\r",
|
|
1477
|
-
)
|
|
1478
|
-
else:
|
|
1479
|
-
# For thinking parts, stream immediately (dim)
|
|
1480
|
-
if event.index not in banner_printed:
|
|
1481
|
-
_print_thinking_banner()
|
|
1482
|
-
banner_printed.add(event.index)
|
|
1483
|
-
escaped = escape(delta.content_delta)
|
|
1484
|
-
console.print(f"[dim]{escaped}[/dim]", end="")
|
|
1485
|
-
elif isinstance(delta, ToolCallPartDelta):
|
|
1486
|
-
# For tool calls, count chunks received
|
|
1487
|
-
token_count[event.index] += 1
|
|
1488
|
-
# Get tool name if available
|
|
1489
|
-
tool_name = getattr(delta, "tool_name_delta", "")
|
|
1490
|
-
count = token_count[event.index]
|
|
1491
|
-
# Display with tool wrench icon and tool name
|
|
1492
|
-
if tool_name:
|
|
1493
|
-
console.print(
|
|
1494
|
-
f" 🔧 Calling {tool_name}... {count} chunks ",
|
|
1495
|
-
end="\r",
|
|
1496
|
-
)
|
|
1497
|
-
else:
|
|
1498
|
-
console.print(
|
|
1499
|
-
f" 🔧 Calling tool... {count} chunks ",
|
|
1500
|
-
end="\r",
|
|
1501
|
-
)
|
|
1502
|
-
|
|
1503
|
-
# PartEndEvent - finish the streaming with a newline
|
|
1504
|
-
elif isinstance(event, PartEndEvent):
|
|
1505
|
-
if event.index in streaming_parts:
|
|
1506
|
-
# For text parts, clear counter line and render markdown
|
|
1507
|
-
if event.index in text_parts:
|
|
1508
|
-
# Clear the chunk counter line by printing spaces and returning
|
|
1509
|
-
console.print(" " * 50, end="\r")
|
|
1510
|
-
# Render the final markdown nicely
|
|
1511
|
-
if event.index in text_buffer:
|
|
1512
|
-
try:
|
|
1513
|
-
final_content = "".join(text_buffer[event.index])
|
|
1514
|
-
if final_content.strip():
|
|
1515
|
-
console.print(Markdown(final_content))
|
|
1516
|
-
except Exception:
|
|
1517
|
-
pass
|
|
1518
|
-
del text_buffer[event.index]
|
|
1519
|
-
# For tool parts, clear the chunk counter line
|
|
1520
|
-
elif event.index in tool_parts:
|
|
1521
|
-
# Clear the chunk counter line by printing spaces and returning
|
|
1522
|
-
console.print(" " * 50, end="\r")
|
|
1523
|
-
# For thinking parts, just print newline
|
|
1524
|
-
elif event.index in banner_printed:
|
|
1525
|
-
console.print() # Final newline after streaming
|
|
1526
|
-
|
|
1527
|
-
# Clean up token count
|
|
1528
|
-
token_count.pop(event.index, None)
|
|
1529
|
-
# Clean up all tracking sets
|
|
1530
|
-
streaming_parts.discard(event.index)
|
|
1531
|
-
thinking_parts.discard(event.index)
|
|
1532
|
-
text_parts.discard(event.index)
|
|
1533
|
-
tool_parts.discard(event.index)
|
|
1534
|
-
banner_printed.discard(event.index)
|
|
1535
|
-
|
|
1536
|
-
# Resume spinner if next part is NOT text/thinking/tool (avoid race condition)
|
|
1537
|
-
# If next part is None or handled differently, it's safe to resume
|
|
1538
|
-
# Note: spinner itself handles blank line before appearing
|
|
1539
|
-
from code_puppy.messaging.spinner import resume_all_spinners
|
|
1540
|
-
|
|
1541
|
-
next_kind = getattr(event, "next_part_kind", None)
|
|
1542
|
-
if next_kind not in ("text", "thinking", "tool-call"):
|
|
1543
|
-
resume_all_spinners()
|
|
1544
|
-
|
|
1545
|
-
# Spinner is resumed in PartEndEvent when appropriate (based on next_part_kind)
|
|
1546
|
-
|
|
1547
1363
|
def _spawn_ctrl_x_key_listener(
|
|
1548
1364
|
self,
|
|
1549
1365
|
stop_event: threading.Event,
|
|
@@ -1811,32 +1627,33 @@ class BaseAgent(ABC):
|
|
|
1811
1627
|
prompt_payload,
|
|
1812
1628
|
message_history=self.get_message_history(),
|
|
1813
1629
|
usage_limits=usage_limits,
|
|
1814
|
-
event_stream_handler=
|
|
1630
|
+
event_stream_handler=event_stream_handler,
|
|
1815
1631
|
**kwargs,
|
|
1816
1632
|
)
|
|
1633
|
+
return result_
|
|
1817
1634
|
finally:
|
|
1818
1635
|
# Always restore original toolsets
|
|
1819
1636
|
pydantic_agent._toolsets = original_toolsets
|
|
1820
1637
|
elif get_use_dbos():
|
|
1821
|
-
# DBOS without MCP servers
|
|
1822
1638
|
with SetWorkflowID(group_id):
|
|
1823
1639
|
result_ = await pydantic_agent.run(
|
|
1824
1640
|
prompt_payload,
|
|
1825
1641
|
message_history=self.get_message_history(),
|
|
1826
1642
|
usage_limits=usage_limits,
|
|
1827
|
-
event_stream_handler=
|
|
1643
|
+
event_stream_handler=event_stream_handler,
|
|
1828
1644
|
**kwargs,
|
|
1829
1645
|
)
|
|
1646
|
+
return result_
|
|
1830
1647
|
else:
|
|
1831
1648
|
# Non-DBOS path (MCP servers are already included)
|
|
1832
1649
|
result_ = await pydantic_agent.run(
|
|
1833
1650
|
prompt_payload,
|
|
1834
1651
|
message_history=self.get_message_history(),
|
|
1835
1652
|
usage_limits=usage_limits,
|
|
1836
|
-
event_stream_handler=
|
|
1653
|
+
event_stream_handler=event_stream_handler,
|
|
1837
1654
|
**kwargs,
|
|
1838
1655
|
)
|
|
1839
|
-
|
|
1656
|
+
return result_
|
|
1840
1657
|
except* UsageLimitExceeded as ule:
|
|
1841
1658
|
emit_info(f"Usage limit exceeded: {str(ule)}", group_id=group_id)
|
|
1842
1659
|
emit_info(
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
"""Event stream handler for processing streaming events from agent runs."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import AsyncIterable
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
from pydantic_ai import PartDeltaEvent, PartEndEvent, PartStartEvent, RunContext
|
|
7
|
+
from pydantic_ai.messages import (
|
|
8
|
+
TextPart,
|
|
9
|
+
TextPartDelta,
|
|
10
|
+
ThinkingPart,
|
|
11
|
+
ThinkingPartDelta,
|
|
12
|
+
ToolCallPart,
|
|
13
|
+
ToolCallPartDelta,
|
|
14
|
+
)
|
|
15
|
+
from rich.console import Console
|
|
16
|
+
from rich.markup import escape
|
|
17
|
+
from rich.text import Text
|
|
18
|
+
|
|
19
|
+
from code_puppy.config import get_banner_color
|
|
20
|
+
from code_puppy.messaging.spinner import pause_all_spinners, resume_all_spinners
|
|
21
|
+
|
|
22
|
+
# Module-level console for streaming output
|
|
23
|
+
# Set via set_streaming_console() to share console with spinner
|
|
24
|
+
_streaming_console: Optional[Console] = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def set_streaming_console(console: Optional[Console]) -> None:
|
|
28
|
+
"""Set the console used for streaming output.
|
|
29
|
+
|
|
30
|
+
This should be called with the same console used by the spinner
|
|
31
|
+
to avoid Live display conflicts that cause line duplication.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
console: The Rich console to use, or None to use a fallback.
|
|
35
|
+
"""
|
|
36
|
+
global _streaming_console
|
|
37
|
+
_streaming_console = console
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_streaming_console() -> Console:
|
|
41
|
+
"""Get the console for streaming output.
|
|
42
|
+
|
|
43
|
+
Returns the configured console or creates a fallback Console.
|
|
44
|
+
"""
|
|
45
|
+
if _streaming_console is not None:
|
|
46
|
+
return _streaming_console
|
|
47
|
+
return Console()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
async def event_stream_handler(
|
|
51
|
+
ctx: RunContext,
|
|
52
|
+
events: AsyncIterable[Any],
|
|
53
|
+
) -> None:
|
|
54
|
+
"""Handle streaming events from the agent run.
|
|
55
|
+
|
|
56
|
+
This function processes streaming events and emits TextPart, ThinkingPart,
|
|
57
|
+
and ToolCallPart content with styled banners/tokens as they stream in.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
ctx: The run context.
|
|
61
|
+
events: Async iterable of streaming events (PartStartEvent, PartDeltaEvent, etc.).
|
|
62
|
+
"""
|
|
63
|
+
import time
|
|
64
|
+
|
|
65
|
+
from termflow import Parser as TermflowParser
|
|
66
|
+
from termflow import Renderer as TermflowRenderer
|
|
67
|
+
|
|
68
|
+
# Use the module-level console (set via set_streaming_console)
|
|
69
|
+
console = get_streaming_console()
|
|
70
|
+
|
|
71
|
+
# Track which part indices we're currently streaming (for Text/Thinking/Tool parts)
|
|
72
|
+
streaming_parts: set[int] = set()
|
|
73
|
+
thinking_parts: set[int] = set() # Track which parts are thinking (for dim style)
|
|
74
|
+
text_parts: set[int] = set() # Track which parts are text
|
|
75
|
+
tool_parts: set[int] = set() # Track which parts are tool calls
|
|
76
|
+
banner_printed: set[int] = set() # Track if banner was already printed
|
|
77
|
+
token_count: dict[int, int] = {} # Track token count per text/tool part
|
|
78
|
+
did_stream_anything = False # Track if we streamed any content
|
|
79
|
+
|
|
80
|
+
# Termflow streaming state for text parts
|
|
81
|
+
termflow_parsers: dict[int, TermflowParser] = {}
|
|
82
|
+
termflow_renderers: dict[int, TermflowRenderer] = {}
|
|
83
|
+
termflow_line_buffers: dict[int, str] = {} # Buffer incomplete lines
|
|
84
|
+
|
|
85
|
+
def _print_thinking_banner() -> None:
|
|
86
|
+
"""Print the THINKING banner with spinner pause and line clear."""
|
|
87
|
+
nonlocal did_stream_anything
|
|
88
|
+
|
|
89
|
+
pause_all_spinners()
|
|
90
|
+
time.sleep(0.1) # Delay to let spinner fully clear
|
|
91
|
+
# Clear line and print newline before banner
|
|
92
|
+
console.print(" " * 50, end="\r")
|
|
93
|
+
console.print() # Newline before banner
|
|
94
|
+
# Bold banner with configurable color and lightning bolt
|
|
95
|
+
thinking_color = get_banner_color("thinking")
|
|
96
|
+
console.print(
|
|
97
|
+
Text.from_markup(
|
|
98
|
+
f"[bold white on {thinking_color}] THINKING [/bold white on {thinking_color}] [dim]\u26a1 "
|
|
99
|
+
),
|
|
100
|
+
end="",
|
|
101
|
+
)
|
|
102
|
+
did_stream_anything = True
|
|
103
|
+
|
|
104
|
+
def _print_response_banner() -> None:
|
|
105
|
+
"""Print the AGENT RESPONSE banner with spinner pause and line clear."""
|
|
106
|
+
nonlocal did_stream_anything
|
|
107
|
+
|
|
108
|
+
pause_all_spinners()
|
|
109
|
+
time.sleep(0.1) # Delay to let spinner fully clear
|
|
110
|
+
# Clear line and print newline before banner
|
|
111
|
+
console.print(" " * 50, end="\r")
|
|
112
|
+
console.print() # Newline before banner
|
|
113
|
+
response_color = get_banner_color("agent_response")
|
|
114
|
+
console.print(
|
|
115
|
+
Text.from_markup(
|
|
116
|
+
f"[bold white on {response_color}] AGENT RESPONSE [/bold white on {response_color}]"
|
|
117
|
+
)
|
|
118
|
+
)
|
|
119
|
+
did_stream_anything = True
|
|
120
|
+
|
|
121
|
+
async for event in events:
|
|
122
|
+
# PartStartEvent - register the part but defer banner until content arrives
|
|
123
|
+
if isinstance(event, PartStartEvent):
|
|
124
|
+
part = event.part
|
|
125
|
+
if isinstance(part, ThinkingPart):
|
|
126
|
+
streaming_parts.add(event.index)
|
|
127
|
+
thinking_parts.add(event.index)
|
|
128
|
+
# If there's initial content, print banner + content now
|
|
129
|
+
if part.content and part.content.strip():
|
|
130
|
+
_print_thinking_banner()
|
|
131
|
+
escaped = escape(part.content)
|
|
132
|
+
console.print(f"[dim]{escaped}[/dim]", end="")
|
|
133
|
+
banner_printed.add(event.index)
|
|
134
|
+
elif isinstance(part, TextPart):
|
|
135
|
+
streaming_parts.add(event.index)
|
|
136
|
+
text_parts.add(event.index)
|
|
137
|
+
# Initialize termflow streaming for this text part
|
|
138
|
+
termflow_parsers[event.index] = TermflowParser()
|
|
139
|
+
termflow_renderers[event.index] = TermflowRenderer(
|
|
140
|
+
output=console.file, width=console.width
|
|
141
|
+
)
|
|
142
|
+
termflow_line_buffers[event.index] = ""
|
|
143
|
+
# Handle initial content if present
|
|
144
|
+
if part.content and part.content.strip():
|
|
145
|
+
_print_response_banner()
|
|
146
|
+
banner_printed.add(event.index)
|
|
147
|
+
termflow_line_buffers[event.index] = part.content
|
|
148
|
+
elif isinstance(part, ToolCallPart):
|
|
149
|
+
streaming_parts.add(event.index)
|
|
150
|
+
tool_parts.add(event.index)
|
|
151
|
+
token_count[event.index] = 0 # Initialize token counter
|
|
152
|
+
# Track tool name for display
|
|
153
|
+
banner_printed.add(
|
|
154
|
+
event.index
|
|
155
|
+
) # Use banner_printed to track if we've shown tool info
|
|
156
|
+
|
|
157
|
+
# PartDeltaEvent - stream the content as it arrives
|
|
158
|
+
elif isinstance(event, PartDeltaEvent):
|
|
159
|
+
if event.index in streaming_parts:
|
|
160
|
+
delta = event.delta
|
|
161
|
+
if isinstance(delta, (TextPartDelta, ThinkingPartDelta)):
|
|
162
|
+
if delta.content_delta:
|
|
163
|
+
# For text parts, stream markdown with termflow
|
|
164
|
+
if event.index in text_parts:
|
|
165
|
+
# Print banner on first content
|
|
166
|
+
if event.index not in banner_printed:
|
|
167
|
+
_print_response_banner()
|
|
168
|
+
banner_printed.add(event.index)
|
|
169
|
+
|
|
170
|
+
# Add content to line buffer
|
|
171
|
+
termflow_line_buffers[event.index] += delta.content_delta
|
|
172
|
+
|
|
173
|
+
# Process complete lines
|
|
174
|
+
parser = termflow_parsers[event.index]
|
|
175
|
+
renderer = termflow_renderers[event.index]
|
|
176
|
+
buffer = termflow_line_buffers[event.index]
|
|
177
|
+
|
|
178
|
+
while "\n" in buffer:
|
|
179
|
+
line, buffer = buffer.split("\n", 1)
|
|
180
|
+
events_to_render = parser.parse_line(line)
|
|
181
|
+
renderer.render_all(events_to_render)
|
|
182
|
+
|
|
183
|
+
termflow_line_buffers[event.index] = buffer
|
|
184
|
+
else:
|
|
185
|
+
# For thinking parts, stream immediately (dim)
|
|
186
|
+
if event.index not in banner_printed:
|
|
187
|
+
_print_thinking_banner()
|
|
188
|
+
banner_printed.add(event.index)
|
|
189
|
+
escaped = escape(delta.content_delta)
|
|
190
|
+
console.print(f"[dim]{escaped}[/dim]", end="")
|
|
191
|
+
elif isinstance(delta, ToolCallPartDelta):
|
|
192
|
+
# For tool calls, count chunks received
|
|
193
|
+
token_count[event.index] += 1
|
|
194
|
+
# Get tool name if available
|
|
195
|
+
tool_name = getattr(delta, "tool_name_delta", "")
|
|
196
|
+
count = token_count[event.index]
|
|
197
|
+
# Display with tool wrench icon and tool name
|
|
198
|
+
if tool_name:
|
|
199
|
+
console.print(
|
|
200
|
+
f" \U0001f527 Calling {tool_name}... {count} chunks ",
|
|
201
|
+
end="\r",
|
|
202
|
+
)
|
|
203
|
+
else:
|
|
204
|
+
console.print(
|
|
205
|
+
f" \U0001f527 Calling tool... {count} chunks ",
|
|
206
|
+
end="\r",
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# PartEndEvent - finish the streaming with a newline
|
|
210
|
+
elif isinstance(event, PartEndEvent):
|
|
211
|
+
if event.index in streaming_parts:
|
|
212
|
+
# For text parts, finalize termflow rendering
|
|
213
|
+
if event.index in text_parts:
|
|
214
|
+
# Render any remaining buffered content
|
|
215
|
+
if event.index in termflow_parsers:
|
|
216
|
+
parser = termflow_parsers[event.index]
|
|
217
|
+
renderer = termflow_renderers[event.index]
|
|
218
|
+
remaining = termflow_line_buffers.get(event.index, "")
|
|
219
|
+
|
|
220
|
+
# Parse and render any remaining partial line
|
|
221
|
+
if remaining.strip():
|
|
222
|
+
events_to_render = parser.parse_line(remaining)
|
|
223
|
+
renderer.render_all(events_to_render)
|
|
224
|
+
|
|
225
|
+
# Finalize the parser to close any open blocks
|
|
226
|
+
final_events = parser.finalize()
|
|
227
|
+
renderer.render_all(final_events)
|
|
228
|
+
|
|
229
|
+
# Clean up termflow state
|
|
230
|
+
del termflow_parsers[event.index]
|
|
231
|
+
del termflow_renderers[event.index]
|
|
232
|
+
del termflow_line_buffers[event.index]
|
|
233
|
+
# For tool parts, clear the chunk counter line
|
|
234
|
+
elif event.index in tool_parts:
|
|
235
|
+
# Clear the chunk counter line by printing spaces and returning
|
|
236
|
+
console.print(" " * 50, end="\r")
|
|
237
|
+
# For thinking parts, just print newline
|
|
238
|
+
elif event.index in banner_printed:
|
|
239
|
+
console.print() # Final newline after streaming
|
|
240
|
+
|
|
241
|
+
# Clean up token count
|
|
242
|
+
token_count.pop(event.index, None)
|
|
243
|
+
# Clean up all tracking sets
|
|
244
|
+
streaming_parts.discard(event.index)
|
|
245
|
+
thinking_parts.discard(event.index)
|
|
246
|
+
text_parts.discard(event.index)
|
|
247
|
+
tool_parts.discard(event.index)
|
|
248
|
+
banner_printed.discard(event.index)
|
|
249
|
+
|
|
250
|
+
# Resume spinner if next part is NOT text/thinking/tool (avoid race condition)
|
|
251
|
+
# If next part is None or handled differently, it's safe to resume
|
|
252
|
+
# Note: spinner itself handles blank line before appearing
|
|
253
|
+
next_kind = getattr(event, "next_part_kind", None)
|
|
254
|
+
if next_kind not in ("text", "thinking", "tool-call"):
|
|
255
|
+
resume_all_spinners()
|
|
256
|
+
|
|
257
|
+
# Spinner is resumed in PartEndEvent when appropriate (based on next_part_kind)
|