kollabor 0.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. core/__init__.py +18 -0
  2. core/application.py +578 -0
  3. core/cli.py +193 -0
  4. core/commands/__init__.py +43 -0
  5. core/commands/executor.py +277 -0
  6. core/commands/menu_renderer.py +319 -0
  7. core/commands/parser.py +186 -0
  8. core/commands/registry.py +331 -0
  9. core/commands/system_commands.py +479 -0
  10. core/config/__init__.py +7 -0
  11. core/config/llm_task_config.py +110 -0
  12. core/config/loader.py +501 -0
  13. core/config/manager.py +112 -0
  14. core/config/plugin_config_manager.py +346 -0
  15. core/config/plugin_schema.py +424 -0
  16. core/config/service.py +399 -0
  17. core/effects/__init__.py +1 -0
  18. core/events/__init__.py +12 -0
  19. core/events/bus.py +129 -0
  20. core/events/executor.py +154 -0
  21. core/events/models.py +258 -0
  22. core/events/processor.py +176 -0
  23. core/events/registry.py +289 -0
  24. core/fullscreen/__init__.py +19 -0
  25. core/fullscreen/command_integration.py +290 -0
  26. core/fullscreen/components/__init__.py +12 -0
  27. core/fullscreen/components/animation.py +258 -0
  28. core/fullscreen/components/drawing.py +160 -0
  29. core/fullscreen/components/matrix_components.py +177 -0
  30. core/fullscreen/manager.py +302 -0
  31. core/fullscreen/plugin.py +204 -0
  32. core/fullscreen/renderer.py +282 -0
  33. core/fullscreen/session.py +324 -0
  34. core/io/__init__.py +52 -0
  35. core/io/buffer_manager.py +362 -0
  36. core/io/config_status_view.py +272 -0
  37. core/io/core_status_views.py +410 -0
  38. core/io/input_errors.py +313 -0
  39. core/io/input_handler.py +2655 -0
  40. core/io/input_mode_manager.py +402 -0
  41. core/io/key_parser.py +344 -0
  42. core/io/layout.py +587 -0
  43. core/io/message_coordinator.py +204 -0
  44. core/io/message_renderer.py +601 -0
  45. core/io/modal_interaction_handler.py +315 -0
  46. core/io/raw_input_processor.py +946 -0
  47. core/io/status_renderer.py +845 -0
  48. core/io/terminal_renderer.py +586 -0
  49. core/io/terminal_state.py +551 -0
  50. core/io/visual_effects.py +734 -0
  51. core/llm/__init__.py +26 -0
  52. core/llm/api_communication_service.py +863 -0
  53. core/llm/conversation_logger.py +473 -0
  54. core/llm/conversation_manager.py +414 -0
  55. core/llm/file_operations_executor.py +1401 -0
  56. core/llm/hook_system.py +402 -0
  57. core/llm/llm_service.py +1629 -0
  58. core/llm/mcp_integration.py +386 -0
  59. core/llm/message_display_service.py +450 -0
  60. core/llm/model_router.py +214 -0
  61. core/llm/plugin_sdk.py +396 -0
  62. core/llm/response_parser.py +848 -0
  63. core/llm/response_processor.py +364 -0
  64. core/llm/tool_executor.py +520 -0
  65. core/logging/__init__.py +19 -0
  66. core/logging/setup.py +208 -0
  67. core/models/__init__.py +5 -0
  68. core/models/base.py +23 -0
  69. core/plugins/__init__.py +13 -0
  70. core/plugins/collector.py +212 -0
  71. core/plugins/discovery.py +386 -0
  72. core/plugins/factory.py +263 -0
  73. core/plugins/registry.py +152 -0
  74. core/storage/__init__.py +5 -0
  75. core/storage/state_manager.py +84 -0
  76. core/ui/__init__.py +6 -0
  77. core/ui/config_merger.py +176 -0
  78. core/ui/config_widgets.py +369 -0
  79. core/ui/live_modal_renderer.py +276 -0
  80. core/ui/modal_actions.py +162 -0
  81. core/ui/modal_overlay_renderer.py +373 -0
  82. core/ui/modal_renderer.py +591 -0
  83. core/ui/modal_state_manager.py +443 -0
  84. core/ui/widget_integration.py +222 -0
  85. core/ui/widgets/__init__.py +27 -0
  86. core/ui/widgets/base_widget.py +136 -0
  87. core/ui/widgets/checkbox.py +85 -0
  88. core/ui/widgets/dropdown.py +140 -0
  89. core/ui/widgets/label.py +78 -0
  90. core/ui/widgets/slider.py +185 -0
  91. core/ui/widgets/text_input.py +224 -0
  92. core/utils/__init__.py +11 -0
  93. core/utils/config_utils.py +656 -0
  94. core/utils/dict_utils.py +212 -0
  95. core/utils/error_utils.py +275 -0
  96. core/utils/key_reader.py +171 -0
  97. core/utils/plugin_utils.py +267 -0
  98. core/utils/prompt_renderer.py +151 -0
  99. kollabor-0.4.9.dist-info/METADATA +298 -0
  100. kollabor-0.4.9.dist-info/RECORD +128 -0
  101. kollabor-0.4.9.dist-info/WHEEL +5 -0
  102. kollabor-0.4.9.dist-info/entry_points.txt +2 -0
  103. kollabor-0.4.9.dist-info/licenses/LICENSE +21 -0
  104. kollabor-0.4.9.dist-info/top_level.txt +4 -0
  105. kollabor_cli_main.py +20 -0
  106. plugins/__init__.py +1 -0
  107. plugins/enhanced_input/__init__.py +18 -0
  108. plugins/enhanced_input/box_renderer.py +103 -0
  109. plugins/enhanced_input/box_styles.py +142 -0
  110. plugins/enhanced_input/color_engine.py +165 -0
  111. plugins/enhanced_input/config.py +150 -0
  112. plugins/enhanced_input/cursor_manager.py +72 -0
  113. plugins/enhanced_input/geometry.py +81 -0
  114. plugins/enhanced_input/state.py +130 -0
  115. plugins/enhanced_input/text_processor.py +115 -0
  116. plugins/enhanced_input_plugin.py +385 -0
  117. plugins/fullscreen/__init__.py +9 -0
  118. plugins/fullscreen/example_plugin.py +327 -0
  119. plugins/fullscreen/matrix_plugin.py +132 -0
  120. plugins/hook_monitoring_plugin.py +1299 -0
  121. plugins/query_enhancer_plugin.py +350 -0
  122. plugins/save_conversation_plugin.py +502 -0
  123. plugins/system_commands_plugin.py +93 -0
  124. plugins/tmux_plugin.py +795 -0
  125. plugins/workflow_enforcement_plugin.py +629 -0
  126. system_prompt/default.md +1286 -0
  127. system_prompt/default_win.md +265 -0
  128. system_prompt/example_with_trender.md +47 -0
@@ -0,0 +1,1629 @@
1
+ """Core LLM Service for Kollabor CLI.
2
+
3
+ This is the essential LLM service that provides core language model
4
+ functionality as a critical part of the application infrastructure.
5
+ """
6
+
7
+ import asyncio
8
+ import logging
9
+ import subprocess
10
+ import time
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Set, Optional
13
+ from datetime import datetime
14
+
15
+ from ..models import ConversationMessage
16
+ from ..events import EventType, Hook, HookPriority
17
+ from ..config.llm_task_config import LLMTaskConfig
18
+ from .api_communication_service import APICommunicationService
19
+ from .conversation_logger import KollaborConversationLogger
20
+ from .hook_system import LLMHookSystem
21
+ from .mcp_integration import MCPIntegration
22
+ from .message_display_service import MessageDisplayService
23
+ from .response_parser import ResponseParser
24
+ from .tool_executor import ToolExecutor
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class LLMService:
30
+ """Core LLM service providing essential language model functionality.
31
+
32
+ This service is initialized as a core component and cannot be disabled.
33
+ It manages conversation history, model communication, and intelligent
34
+ conversation logging with memory features.
35
+ """
36
+
37
+ def _add_conversation_message(self, message_or_role, content=None, parent_uuid=None) -> str:
38
+ """Add a message to both conversation manager and legacy history.
39
+
40
+ This wrapper method ensures that messages are added to both the
41
+ ConversationManager and the legacy conversation_history for compatibility.
42
+
43
+ Args:
44
+ message_or_role: Either a ConversationMessage object or a role string
45
+ content: Message content (required if first arg is role string)
46
+ parent_uuid: Optional parent UUID for message threading
47
+
48
+ Returns:
49
+ UUID of the added message
50
+ """
51
+ from ..models import ConversationMessage
52
+
53
+ # Handle both signatures: ConversationMessage object or separate role/content
54
+ if isinstance(message_or_role, ConversationMessage):
55
+ message = message_or_role
56
+ role = message.role
57
+ content = message.content
58
+ else:
59
+ role = message_or_role
60
+ if content is None:
61
+ raise TypeError("Content is required when role is provided as string")
62
+ message = ConversationMessage(role=role, content=content)
63
+
64
+ # Add to conversation manager if available
65
+ if hasattr(self, "conversation_manager") and self.conversation_manager:
66
+ message_uuid = self.conversation_manager.add_message(
67
+ role=role,
68
+ content=content,
69
+ parent_uuid=parent_uuid
70
+ )
71
+ else:
72
+ # Fallback - create a UUID if conversation manager not available
73
+ import uuid
74
+ message_uuid = str(uuid.uuid4())
75
+
76
+ # Add to legacy history for compatibility
77
+ self.conversation_history.append(message)
78
+
79
+ return message_uuid
80
+
81
+
82
+ def __init__(self, config, state_manager, event_bus, renderer, default_timeout: Optional[float] = None, enable_metrics: bool = False):
83
+ """Initialize the core LLM service.
84
+
85
+ Args:
86
+ config: Configuration manager instance
87
+ state_manager: State management system
88
+ event_bus: Event bus for hook registration
89
+ renderer: Terminal renderer for output
90
+ default_timeout: Default timeout for background tasks in seconds
91
+ enable_metrics: Whether to enable detailed task metrics tracking
92
+ """
93
+ self.config = config
94
+ self.state_manager = state_manager
95
+ self.event_bus = event_bus
96
+ self.renderer = renderer
97
+
98
+ # Timeout and metrics configuration
99
+ self.default_timeout = default_timeout
100
+ self.enable_metrics = enable_metrics
101
+
102
+ # Load LLM configuration from core.llm section (API details handled by API service)
103
+ self.max_history = config.get("core.llm.max_history", 90)
104
+
105
+ # Load task management configuration using structured dataclass
106
+ task_config_dict = config.get("core.llm.task_management", {})
107
+ self.task_config = LLMTaskConfig.from_dict(task_config_dict)
108
+
109
+ # Conversation state
110
+ self.conversation_history: List[ConversationMessage] = []
111
+ # Queue management with memory leak prevention
112
+ self.max_queue_size = self.task_config.queue.max_size
113
+ self.processing_queue = asyncio.Queue(maxsize=self.max_queue_size)
114
+ self.dropped_messages = 0
115
+ self.is_processing = False
116
+ self.turn_completed = False
117
+ self.cancel_processing = False
118
+ self.cancellation_message_shown = False
119
+
120
+ # Initialize conversation logger with intelligence features
121
+ from ..utils.config_utils import get_config_directory
122
+ config_dir = get_config_directory()
123
+ conversations_dir = config_dir / "conversations"
124
+ conversations_dir.mkdir(parents=True, exist_ok=True)
125
+
126
+ # Initialize raw conversation logging directory
127
+ self.raw_conversations_dir = config_dir / "conversations_raw"
128
+ self.raw_conversations_dir.mkdir(parents=True, exist_ok=True)
129
+ self.conversation_logger = KollaborConversationLogger(conversations_dir)
130
+
131
+ # Initialize hook system
132
+ self.hook_system = LLMHookSystem(event_bus)
133
+
134
+ # Initialize MCP integration and tool components
135
+ self.mcp_integration = MCPIntegration()
136
+ self.response_parser = ResponseParser()
137
+ self.tool_executor = ToolExecutor(
138
+ mcp_integration=self.mcp_integration,
139
+ event_bus=event_bus,
140
+ terminal_timeout=config.get("core.llm.terminal_timeout", 30),
141
+ mcp_timeout=config.get("core.llm.mcp_timeout", 60)
142
+ )
143
+
144
+ # Initialize message display service (KISS/DRY: eliminates duplicated display code)
145
+ self.message_display = MessageDisplayService(renderer)
146
+
147
+ # Initialize API communication service (KISS: pure API communication separation)
148
+ self.api_service = APICommunicationService(config, self.raw_conversations_dir)
149
+
150
+ # Track current message threading
151
+ self.current_parent_uuid = None
152
+
153
+ # Create hooks for LLM service
154
+ self.hooks = [
155
+ Hook(
156
+ name="process_user_input",
157
+ plugin_name="llm_core",
158
+ event_type=EventType.USER_INPUT,
159
+ priority=HookPriority.LLM.value,
160
+ callback=self._handle_user_input
161
+ ),
162
+ Hook(
163
+ name="cancel_request",
164
+ plugin_name="llm_core",
165
+ event_type=EventType.CANCEL_REQUEST,
166
+ priority=HookPriority.SYSTEM.value,
167
+ callback=self._handle_cancel_request
168
+ )
169
+ ]
170
+
171
+ # Session statistics
172
+ self.stats = {
173
+ "total_messages": 0,
174
+ "total_thinking_time": 0.0,
175
+ "sessions_count": 0,
176
+ "last_session": None,
177
+ "total_input_tokens": 0,
178
+ "total_output_tokens": 0
179
+ }
180
+
181
+ self.session_stats = {
182
+ "input_tokens": 0,
183
+ "output_tokens": 0,
184
+ "messages": 0
185
+ }
186
+
187
+ # Current processing state
188
+ self.current_processing_tokens = 0
189
+ self.processing_start_time = None
190
+
191
+ # Background task tracking system
192
+ self._background_tasks: Set[asyncio.Task] = set()
193
+ self._task_metadata: Dict[str, Any] = {}
194
+ self._max_concurrent_tasks = self.task_config.background_tasks.max_concurrent
195
+ self._task_error_count = 0
196
+ self._monitoring_task: Optional[asyncio.Task] = None
197
+
198
+ # Circuit breaker state variables
199
+ self._circuit_breaker_state = "CLOSED" # CLOSED, OPEN, HALF_OPEN
200
+
201
+ # Queue overflow metrics counters
202
+ self._queue_metrics = {
203
+ 'drop_oldest_count': 0,
204
+ 'drop_newest_count': 0,
205
+ 'block_count': 0,
206
+ 'block_timeout_count': 0,
207
+ 'total_enqueue_attempts': 0,
208
+ 'total_enqueue_successes': 0
209
+ }
210
+ self._circuit_breaker_failures = 0
211
+ self._circuit_breaker_last_failure_time = None
212
+ self._circuit_breaker_test_task_running = False
213
+
214
+ # Metrics tracking system
215
+ if self.enable_metrics:
216
+ self._task_metrics: Dict[str, Dict[str, Any]] = {}
217
+
218
+ logger.info("Core LLM Service initialized")
219
+
220
+ async def initialize(self):
221
+ """Initialize the LLM service components."""
222
+ # Initialize API communication service (KISS refactoring)
223
+ await self.api_service.initialize()
224
+
225
+ # Register hooks
226
+ await self.hook_system.register_hooks()
227
+
228
+ # Discover and register MCP servers and tools
229
+ try:
230
+ discovered_servers = await self.mcp_integration.discover_mcp_servers()
231
+ logger.info(f"Discovered {len(discovered_servers)} MCP servers")
232
+ except Exception as e:
233
+ logger.warning(f"MCP discovery failed: {e}")
234
+
235
+ # Initialize conversation with context
236
+ await self._initialize_conversation()
237
+
238
+ # Log conversation start
239
+ await self.conversation_logger.log_conversation_start()
240
+
241
+ # Start task monitoring
242
+ if self.task_config.background_tasks.enable_monitoring:
243
+ await self.start_task_monitor()
244
+
245
+ logger.info("Core LLM Service initialized and ready")
246
+
247
+ async def _initialize_conversation(self):
248
+ """Initialize conversation with project context."""
249
+ try:
250
+ # Clear any existing history
251
+ self.conversation_history = []
252
+ self.state_manager.set("llm.conversation_history", [])
253
+
254
+ # Build system prompt from configuration
255
+ initial_message = self._build_system_prompt()
256
+
257
+ self._add_conversation_message(ConversationMessage(
258
+ role="system",
259
+ content=initial_message
260
+ ))
261
+
262
+ # Log initial context message
263
+ self.current_parent_uuid = await self.conversation_logger.log_user_message(
264
+ initial_message,
265
+ user_context={
266
+ "type": "system_initialization",
267
+ "project_context_loaded": True
268
+ }
269
+ )
270
+
271
+ logger.info("Conversation initialized with project context")
272
+
273
+ except Exception as e:
274
+ logger.error(f"Failed to initialize conversation: {e}")
275
+
276
+ async def _enqueue_with_overflow_strategy(self, message: str) -> None:
277
+ """Enqueue message with configurable overflow strategy.
278
+
279
+ Args:
280
+ message: Message to enqueue
281
+
282
+ Raises:
283
+ RuntimeError: If overflow_strategy is 'drop_newest' and queue is full
284
+ """
285
+ self._queue_metrics['total_enqueue_attempts'] += 1
286
+
287
+ # Log queue events if enabled
288
+ if self.task_config.queue.log_queue_events:
289
+ logger.debug(f"Attempting to enqueue message (queue size: {self.processing_queue.qsize()}/{self.max_queue_size})")
290
+
291
+ # Try to enqueue immediately
292
+ try:
293
+ self.processing_queue.put_nowait(message)
294
+ self._queue_metrics['total_enqueue_successes'] += 1
295
+ if self.task_config.queue.log_queue_events:
296
+ logger.debug(f"Message enqueued successfully")
297
+ return
298
+ except asyncio.QueueFull:
299
+ pass # Queue is full, apply overflow strategy
300
+
301
+ # Apply configured overflow strategy
302
+ strategy = self.task_config.queue.overflow_strategy
303
+
304
+ if strategy == "drop_oldest":
305
+ # Cancel oldest task by start_time and drop it
306
+ if self.task_config.queue.log_queue_events:
307
+ logger.debug("Applying drop_oldest strategy")
308
+
309
+ # Find oldest task by start_time
310
+ oldest_task = None
311
+ oldest_start_time = None
312
+
313
+ for task in self._background_tasks:
314
+ task_name = task.get_name()
315
+ if task_name in self._task_metadata:
316
+ start_time = self._task_metadata[task_name].get('created_at')
317
+ if start_time and (oldest_start_time is None or start_time < oldest_start_time):
318
+ oldest_task = task
319
+ oldest_start_time = start_time
320
+
321
+ if oldest_task:
322
+ oldest_task.cancel()
323
+ self._queue_metrics['drop_oldest_count'] += 1
324
+ if self.task_config.queue.log_queue_events:
325
+ logger.info(f"Cancelled oldest task {oldest_task.get_name()} to make room")
326
+
327
+ # Wait a moment for cancellation to take effect
328
+ await asyncio.sleep(0.01)
329
+
330
+ # Try to enqueue again
331
+ try:
332
+ self.processing_queue.put_nowait(message)
333
+ self._queue_metrics['total_enqueue_successes'] += 1
334
+ if self.task_config.queue.log_queue_events:
335
+ logger.info("Message enqueued after dropping oldest task")
336
+ except asyncio.QueueFull:
337
+ # Still full, drop the message
338
+ self.dropped_messages += 1
339
+ if self.task_config.queue.log_queue_events:
340
+ logger.warning("Queue still full after dropping oldest task, dropping message")
341
+
342
+ elif strategy == "drop_newest":
343
+ # Raise RuntimeError when queue is full
344
+ self._queue_metrics['drop_newest_count'] += 1
345
+ if self.task_config.queue.log_queue_events:
346
+ logger.debug("Applying drop_newest strategy - raising RuntimeError")
347
+ raise RuntimeError(f"Queue is full (max size: {self.max_queue_size}) and overflow strategy is 'drop_newest'")
348
+
349
+ elif strategy == "block":
350
+ # Wait with asyncio.sleep polling until space or block_timeout
351
+ self._queue_metrics['block_count'] += 1
352
+ if self.task_config.queue.log_queue_events:
353
+ logger.debug(f"Applying block strategy (timeout: {self.task_config.queue.block_timeout}s)")
354
+
355
+ start_time = time.time()
356
+ poll_interval = 0.01 # 10ms polling
357
+
358
+ while True:
359
+ # Check if queue has space
360
+ if self.processing_queue.qsize() < self.max_queue_size:
361
+ try:
362
+ self.processing_queue.put_nowait(message)
363
+ self._queue_metrics['total_enqueue_successes'] += 1
364
+ if self.task_config.queue.log_queue_events:
365
+ logger.info("Message enqueued after blocking")
366
+ return
367
+ except asyncio.QueueFull:
368
+ pass # Still full, continue blocking
369
+
370
+ # Check timeout
371
+ elapsed = time.time() - start_time
372
+ if self.task_config.queue.block_timeout is not None and elapsed >= self.task_config.queue.block_timeout:
373
+ self._queue_metrics['block_timeout_count'] += 1
374
+ if self.task_config.queue.log_queue_events:
375
+ logger.warning(f"Block timeout after {elapsed:.2f}s, dropping message")
376
+ self.dropped_messages += 1
377
+ return
378
+
379
+ # Brief sleep before next poll
380
+ await asyncio.sleep(poll_interval)
381
+
382
+ else:
383
+ # Unknown strategy, default to dropping oldest
384
+ logger.warning(f"Unknown overflow strategy '{strategy}', defaulting to drop_oldest")
385
+ try:
386
+ self.processing_queue.get_nowait() # Drop oldest
387
+ self.processing_queue.put_nowait(message)
388
+ self._queue_metrics['total_enqueue_successes'] += 1
389
+ except asyncio.QueueEmpty:
390
+ self.dropped_messages += 1
391
+
392
+ def create_background_task(self, coro, name: str = None) -> asyncio.Task:
393
+ """Create and track a background task with proper error handling and circuit breaker."""
394
+ # Check circuit breaker state
395
+ if self.task_config.background_tasks.enable_task_circuit_breaker:
396
+ # Reject tasks if circuit is OPEN
397
+ if self._circuit_breaker_state == "OPEN":
398
+ # Check if timeout has passed to transition to HALF_OPEN
399
+ if self._circuit_breaker_last_failure_time:
400
+ time_since_failure = time.time() - self._circuit_breaker_last_failure_time
401
+ timeout = self.task_config.background_tasks.circuit_breaker_timeout
402
+ if time_since_failure >= timeout:
403
+ logger.info(f"Circuit breaker timeout elapsed, transitioning to HALF_OPEN")
404
+ self._circuit_breaker_state = "HALF_OPEN"
405
+ self._circuit_breaker_test_task_running = False
406
+ else:
407
+ logger.warning(f"Circuit breaker OPEN - rejecting task '{name or 'unnamed'}'")
408
+ raise Exception(f"Circuit breaker OPEN - tasks rejected for {timeout - time_since_failure:.1f}s more")
409
+ else:
410
+ logger.warning(f"Circuit breaker OPEN - rejecting task '{name or 'unnamed'}'")
411
+ raise Exception("Circuit breaker OPEN - tasks rejected")
412
+
413
+ # Allow only one test task in HALF_OPEN state
414
+ elif self._circuit_breaker_state == "HALF_OPEN" and self._circuit_breaker_test_task_running:
415
+ logger.warning(f"Circuit breaker HALF_OPEN - test task already running, rejecting '{name or 'unnamed'}'")
416
+ raise Exception("Circuit breaker HALF_OPEN - test task already running")
417
+
418
+ # Handle task overflow using configured queue strategy
419
+ if len(self._background_tasks) >= self._max_concurrent_tasks:
420
+ strategy = self.task_config.queue.overflow_strategy
421
+
422
+ if self.task_config.queue.log_queue_events:
423
+ logger.debug(f"Background task queue full ({len(self._background_tasks)}/{self._max_concurrent_tasks}), applying strategy: {strategy}")
424
+
425
+ if strategy == "drop_newest":
426
+ # Raise RuntimeError when task queue is full
427
+ self._queue_metrics['drop_newest_count'] += 1
428
+ if self.task_config.queue.log_queue_events:
429
+ logger.debug("Background task queue full - raising RuntimeError")
430
+ raise RuntimeError(f"Maximum concurrent tasks ({self._max_concurrent_tasks}) reached and overflow strategy is 'drop_newest'")
431
+
432
+ elif strategy == "drop_oldest":
433
+ # Cancel oldest task by start_time to make room
434
+ oldest_task = None
435
+ oldest_start_time = None
436
+
437
+ for task in self._background_tasks:
438
+ task_name = task.get_name()
439
+ if task_name in self._task_metadata:
440
+ start_time = self._task_metadata[task_name].get('created_at')
441
+ if start_time and (oldest_start_time is None or start_time < oldest_start_time):
442
+ oldest_task = task
443
+ oldest_start_time = start_time
444
+
445
+ if oldest_task:
446
+ oldest_task.cancel()
447
+ self._queue_metrics['drop_oldest_count'] += 1
448
+ if self.task_config.queue.log_queue_events:
449
+ logger.info(f"Cancelled oldest background task {oldest_task.get_name()} to make room")
450
+ else:
451
+ # No suitable task found, raise error
452
+ raise RuntimeError(f"Maximum concurrent tasks ({self._max_concurrent_tasks}) reached and no cancellable tasks found")
453
+
454
+ elif strategy == "block":
455
+ # For block strategy, create a background task that handles the blocking
456
+ self._queue_metrics['block_count'] += 1
457
+ if self.task_config.queue.log_queue_events:
458
+ logger.debug(f"Creating background task to handle blocking strategy (timeout: {self.task_config.queue.block_timeout}s)")
459
+
460
+ # Create a task that will wait for space and then run the actual task
461
+ blocking_task = asyncio.create_task(
462
+ self._create_task_with_blocking(coro, name),
463
+ name=f"blocking_wrapper_{name or 'unnamed'}"
464
+ )
465
+ return blocking_task
466
+
467
+ else:
468
+ # Unknown strategy, default to drop_oldest
469
+ logger.warning(f"Unknown overflow strategy '{strategy}', defaulting to drop_oldest")
470
+ raise RuntimeError(f"Maximum concurrent tasks ({self._max_concurrent_tasks}) reached")
471
+
472
+ task_name = name or f"bg_task_{datetime.now().timestamp()}"
473
+ start_time = time.time()
474
+
475
+ # Store original coroutine before timeout wrapping for retry purposes
476
+ original_coro = coro
477
+
478
+ # Add timeout wrapping if default_timeout is set (0 = disabled for autonomous LLM work)
479
+ default_timeout = getattr(self.task_config.background_tasks, 'default_timeout', 0)
480
+ if default_timeout is not None and default_timeout > 0:
481
+ wrapped_coro = asyncio.wait_for(coro, timeout=default_timeout)
482
+ else:
483
+ wrapped_coro = coro
484
+
485
+ # Mark test task running in HALF_OPEN state
486
+ if self.task_config.background_tasks.enable_task_circuit_breaker and self._circuit_breaker_state == "HALF_OPEN":
487
+ self._circuit_breaker_test_task_running = True
488
+ logger.info(f"Circuit breaker HALF_OPEN - allowing test task '{task_name}'")
489
+
490
+ task = asyncio.create_task(
491
+ self._safe_task_wrapper(wrapped_coro, task_name),
492
+ name=task_name
493
+ )
494
+
495
+ # Track the task with retry information
496
+ self._background_tasks.add(task)
497
+ self._task_metadata[task_name] = {
498
+ 'created_at': datetime.now(),
499
+ 'coro_name': coro.__name__ if hasattr(coro, '__name__') else str(coro),
500
+ 'start_time': start_time,
501
+ 'retry_count': 0,
502
+ 'original_coro': original_coro # Store original coroutine for retries
503
+ }
504
+
505
+ # Add cleanup callback
506
+ task.add_done_callback(self._task_done_callback)
507
+
508
+ return task
509
+
510
+ async def _create_task_with_blocking(self, coro, name: str = None) -> Any:
511
+ """Handle blocking strategy by waiting for available task slot."""
512
+ start_time = time.time()
513
+ poll_interval = 0.01 # 10ms polling
514
+
515
+ while len(self._background_tasks) >= self._max_concurrent_tasks:
516
+ # Check timeout
517
+ elapsed = time.time() - start_time
518
+ if self.task_config.queue.block_timeout is not None and elapsed >= self.task_config.queue.block_timeout:
519
+ self._queue_metrics['block_timeout_count'] += 1
520
+ if self.task_config.queue.log_queue_events:
521
+ logger.warning(f"Background task block timeout after {elapsed:.2f}s")
522
+ raise RuntimeError(f"Timeout waiting for available task slot (timeout: {self.task_config.queue.block_timeout}s)")
523
+
524
+ # Brief sleep before next poll
525
+ await asyncio.sleep(poll_interval)
526
+
527
+ # Space is available, create the actual task using the normal path
528
+ # We can call the synchronous version since we now have space
529
+ return self.create_background_task(coro, name)
530
+
531
+ async def _safe_task_wrapper(self, coro, task_name: str):
532
+ """Wrapper that safely executes task and handles exceptions."""
533
+ try:
534
+ if self.task_config.background_tasks.log_task_events:
535
+ logger.debug(f"Starting background task: {task_name}")
536
+ result = await coro
537
+ if self.task_config.background_tasks.log_task_events:
538
+ logger.debug(f"Background task completed successfully: {task_name}")
539
+ return result
540
+
541
+ except asyncio.CancelledError:
542
+ logger.info(f"Background task cancelled: {task_name}")
543
+ raise
544
+
545
+ except Exception as e:
546
+ if self.task_config.background_tasks.log_task_errors:
547
+ logger.error(f"Background task failed: {task_name} - {type(e).__name__}: {e}")
548
+ self._task_error_count += 1
549
+ await self._handle_task_error(task_name, e)
550
+ raise
551
+
552
+ def _task_done_callback(self, task: asyncio.Task):
553
+ """Called when a task completes."""
554
+ self._background_tasks.discard(task)
555
+
556
+ task_name = task.get_name()
557
+
558
+ # Track duration and metrics if enabled - capture metadata before deletion
559
+ metadata = None
560
+ if task_name in self._task_metadata:
561
+ metadata = self._task_metadata[task_name]
562
+
563
+ # Store metrics if enabled and we have start_time
564
+ if self.enable_metrics and hasattr(self, '_task_metrics') and metadata:
565
+ start_time = metadata.get('start_time')
566
+
567
+ if start_time:
568
+ duration = time.time() - start_time
569
+
570
+ # Store metrics
571
+ self._task_metrics[task_name] = {
572
+ 'duration': duration,
573
+ 'status': 'cancelled' if task.cancelled() else 'failed' if task.exception() else 'completed',
574
+ 'cancelled': task.cancelled(),
575
+ 'exception': str(task.exception()) if task.exception() else None,
576
+ 'completed_at': datetime.now(),
577
+ 'coro_name': metadata.get('coro_name', 'unknown')
578
+ }
579
+
580
+ # Clean up metadata
581
+ del self._task_metadata[task_name]
582
+
583
+ if task.cancelled():
584
+ if self.task_config.background_tasks.log_task_events:
585
+ logger.debug(f"Task cancelled: {task_name}")
586
+ elif task.exception():
587
+ if self.task_config.background_tasks.log_task_errors:
588
+ logger.error(f"Task failed with exception: {task_name} - {task.exception()}")
589
+ else:
590
+ # Task completed successfully - check circuit breaker state
591
+ if (self.task_config.background_tasks.enable_task_circuit_breaker and
592
+ self._circuit_breaker_state == "HALF_OPEN"):
593
+ logger.info(f"Circuit breaker HALF_OPEN - test task '{task_name}' completed successfully, transitioning to CLOSED")
594
+ self._circuit_breaker_state = "CLOSED"
595
+ self._circuit_breaker_failures = 0
596
+ self._circuit_breaker_last_failure_time = None
597
+ self._circuit_breaker_test_task_running = False
598
+
599
+ if self.task_config.background_tasks.log_task_events:
600
+ logger.debug(f"Task completed: {task_name}")
601
+
602
+ async def _handle_task_error(self, task_name: str, error: Exception):
603
+ """Handle errors from background tasks."""
604
+ # Circuit breaker pattern implementation
605
+ if self.task_config.background_tasks.enable_task_circuit_breaker:
606
+ self._circuit_breaker_failures += 1
607
+ self._circuit_breaker_last_failure_time = time.time()
608
+
609
+ # Check if failure threshold reached
610
+ threshold = self.task_config.background_tasks.circuit_breaker_threshold
611
+ if self._circuit_breaker_failures >= threshold:
612
+ if self._circuit_breaker_state != "OPEN":
613
+ logger.warning(f"Circuit breaker threshold ({threshold}) reached, opening circuit due to task failure: {task_name}")
614
+ self._circuit_breaker_state = "OPEN"
615
+ self._circuit_breaker_test_task_running = False
616
+ else:
617
+ logger.debug(f"Circuit breaker already OPEN, failure count: {self._circuit_breaker_failures}")
618
+ else:
619
+ logger.warning(f"Task failure ({self._circuit_breaker_failures}/{threshold}) - circuit breaker {self._circuit_breaker_state}")
620
+
621
+ # Retry logic implementation
622
+ task_metadata = self._task_metadata.get(task_name, {})
623
+ retry_count = task_metadata.get('retry_count', 0)
624
+ original_coro = task_metadata.get('original_coro')
625
+
626
+ # Check if we should retry this task
627
+ max_retries = self.task_config.background_tasks.task_retry_attempts
628
+ retry_delay = self.task_config.background_tasks.task_retry_delay
629
+
630
+ if retry_count < max_retries and original_coro is not None:
631
+ # Increment retry count
632
+ self._task_metadata[task_name]['retry_count'] = retry_count + 1
633
+
634
+ logger.warning(
635
+ f"Retrying task {task_name} (attempt {retry_count + 1}/{max_retries}) "
636
+ f"after {retry_delay}s delay due to {type(error).__name__}: {error}"
637
+ )
638
+
639
+ # Wait for retry delay
640
+ await asyncio.sleep(retry_delay)
641
+
642
+ # Create new task with original coroutine
643
+ new_task_name = f"{task_name}_retry_{retry_count + 1}"
644
+ self.create_background_task(original_coro, new_task_name)
645
+
646
+ logger.info(f"Created retry task: {new_task_name}")
647
+ else:
648
+ # No more retries or no original coroutine available
649
+ if retry_count >= max_retries:
650
+ logger.error(
651
+ f"Task {task_name} failed after {max_retries} retry attempts. "
652
+ f"Final error: {type(error).__name__}: {error}"
653
+ )
654
+ else:
655
+ logger.error(f"Task {task_name} failed (no retry possible): {error}")
656
+
657
+ # Could implement additional error handling:
658
+ # - Error reporting to monitoring service
659
+ # - Error notifications
660
+
661
+ async def start_task_monitor(self):
662
+ """Start background task monitoring and cleanup."""
663
+ self._monitoring_task = asyncio.create_task(self._monitor_tasks())
664
+ logger.info("Task monitoring started")
665
+
666
+ async def _monitor_tasks(self):
667
+ """Monitor and cleanup completed tasks."""
668
+ cleanup_interval = self.task_config.background_tasks.cleanup_interval
669
+
670
+ while True:
671
+ try:
672
+ # Remove completed tasks
673
+ completed_tasks = [t for t in self._background_tasks if t.done()]
674
+ for task in completed_tasks:
675
+ self._background_tasks.discard(task)
676
+
677
+ if completed_tasks:
678
+ logger.debug(f"Cleaned up {len(completed_tasks)} completed tasks")
679
+
680
+ # Log status
681
+ if len(self._background_tasks) > 0:
682
+ logger.debug(f"Active background tasks: {len(self._background_tasks)}")
683
+
684
+ # Monitor queue health
685
+ queue_size = self.processing_queue.qsize()
686
+ queue_utilization = (queue_size / self.max_queue_size * 100) if self.max_queue_size > 0 else 0
687
+
688
+ if queue_utilization > 80:
689
+ logger.warning(f"Queue utilization high: {queue_utilization:.1f}% ({queue_size}/{self.max_queue_size})")
690
+
691
+ if self.dropped_messages > 0:
692
+ logger.warning(f"Messages dropped: {self.dropped_messages}")
693
+
694
+ await asyncio.sleep(cleanup_interval)
695
+
696
+ except Exception as e:
697
+ logger.error(f"Error in task monitoring: {e}")
698
+ await asyncio.sleep(cleanup_interval)
699
+
700
+ async def get_task_status(self):
701
+ """Get status of all background tasks."""
702
+ status = {
703
+ 'active_tasks': len(self._background_tasks),
704
+ 'max_concurrent': self._max_concurrent_tasks,
705
+ 'error_count': self._task_error_count,
706
+ 'tasks': []
707
+ }
708
+
709
+ for task in self._background_tasks:
710
+ task_info = {
711
+ 'name': task.get_name(),
712
+ 'done': task.done(),
713
+ 'cancelled': task.cancelled(),
714
+ 'exception': str(task.exception()) if task.exception() else None
715
+ }
716
+ status['tasks'].append(task_info)
717
+
718
+ return status
719
+
720
+ async def cancel_all_tasks(self):
721
+ """Cancel all background tasks and wait for cleanup."""
722
+ logger.info(f"Cancelling {len(self._background_tasks)} background tasks")
723
+
724
+ for task in self._background_tasks:
725
+ if not task.done():
726
+ task.cancel()
727
+
728
+ # Wait for all tasks to complete (with timeout)
729
+ if self._background_tasks:
730
+ try:
731
+ await asyncio.wait_for(
732
+ asyncio.gather(*self._background_tasks, return_exceptions=True),
733
+ timeout=10.0
734
+ )
735
+ except asyncio.TimeoutError:
736
+ logger.warning("Some tasks didn't finish gracefully")
737
+
738
+ self._background_tasks.clear()
739
+ self._task_metadata.clear()
740
+
741
+ async def wait_for_tasks(self, timeout: float = 30.0):
742
+ """Wait for all background tasks to complete."""
743
+ if not self._background_tasks:
744
+ return
745
+
746
+ try:
747
+ await asyncio.wait_for(
748
+ asyncio.gather(*self._background_tasks, return_exceptions=True),
749
+ timeout=timeout
750
+ )
751
+ except asyncio.TimeoutError:
752
+ logger.warning(f"Timeout waiting for tasks to complete")
753
+ # Cancel remaining tasks
754
+ await self.cancel_all_tasks()
755
+
756
+ def _get_tree_output(self) -> str:
757
+ """Get project directory tree output."""
758
+ try:
759
+ result = subprocess.run(
760
+ ["tree", "-I", "__pycache__|*.pyc|.git|.venv|venv|node_modules", "-L", "3"],
761
+ capture_output=True,
762
+ text=True,
763
+ timeout=5,
764
+ cwd=Path.cwd()
765
+ )
766
+ if result.returncode == 0:
767
+ return result.stdout
768
+ else:
769
+ # Fallback to basic ls if tree is not available
770
+ result = subprocess.run(
771
+ ["ls", "-la"],
772
+ capture_output=True,
773
+ text=True,
774
+ timeout=5,
775
+ cwd=Path.cwd()
776
+ )
777
+ return result.stdout if result.returncode == 0 else "Could not get directory listing"
778
+ except Exception as e:
779
+ logger.warning(f"Failed to get tree output: {e}")
780
+ return "Could not get directory listing"
781
+
782
+ def _build_system_prompt(self) -> str:
783
+ """Build system prompt from file (not config.json).
784
+
785
+ Priority:
786
+ 1. KOLLABOR_SYSTEM_PROMPT environment variable (direct string)
787
+ 2. KOLLABOR_SYSTEM_PROMPT_FILE environment variable (custom file path)
788
+ 3. Local .kollabor-cli/system_prompt/default.md (project override)
789
+ 4. Global ~/.kollabor-cli/system_prompt/default.md
790
+ 5. Fallback to minimal default
791
+
792
+ Returns:
793
+ Fully rendered system prompt with all <trender> tags executed.
794
+ """
795
+ from ..utils.config_utils import get_system_prompt_content, initialize_system_prompt
796
+ from ..utils.prompt_renderer import render_system_prompt
797
+
798
+ # Ensure system prompts are initialized (copies global to local if needed)
799
+ initialize_system_prompt()
800
+
801
+ # Load base prompt (checks env vars and files in priority order)
802
+ base_prompt = get_system_prompt_content()
803
+
804
+ # Render <trender> tags BEFORE building the full prompt
805
+ base_prompt = render_system_prompt(base_prompt, timeout=5)
806
+
807
+ prompt_parts = [base_prompt]
808
+
809
+ # Add project structure if enabled
810
+ include_structure = self.config.get("core.llm.system_prompt.include_project_structure", True)
811
+ if include_structure:
812
+ tree_output = self._get_tree_output()
813
+ prompt_parts.append(f"## Project Structure\n```\n{tree_output}\n```")
814
+
815
+ # Add attachment files
816
+ attachment_files = self.config.get("core.llm.system_prompt.attachment_files", [])
817
+ for filename in attachment_files:
818
+ file_path = Path.cwd() / filename
819
+ if file_path.exists():
820
+ try:
821
+ content = file_path.read_text(encoding='utf-8')
822
+ prompt_parts.append(f"## {filename}\n```markdown\n{content}\n```")
823
+ logger.debug(f"Attached file: {filename}")
824
+ except Exception as e:
825
+ logger.warning(f"Failed to read {filename}: {e}")
826
+
827
+ # Add custom prompt files
828
+ custom_files = self.config.get("core.llm.system_prompt.custom_prompt_files", [])
829
+ for filename in custom_files:
830
+ file_path = Path.cwd() / filename
831
+ if file_path.exists():
832
+ try:
833
+ content = file_path.read_text(encoding='utf-8')
834
+ prompt_parts.append(f"## Custom Instructions ({filename})\n{content}")
835
+ logger.debug(f"Added custom prompt: {filename}")
836
+ except Exception as e:
837
+ logger.warning(f"Failed to read custom prompt {filename}: {e}")
838
+
839
+ # Add closing statement
840
+ prompt_parts.append("This is the codebase and context for our session. You now have full project awareness.")
841
+
842
+ return "\n\n".join(prompt_parts)
843
+
844
+ async def process_user_input(self, message: str) -> Dict[str, Any]:
845
+ """Process user input through the LLM.
846
+
847
+ This is the main entry point for user messages.
848
+
849
+ Args:
850
+ message: User's input message
851
+
852
+ Returns:
853
+ Status information about processing
854
+ """
855
+ # Display user message using MessageDisplayService (DRY refactoring)
856
+ logger.debug(f"DISPLAY DEBUG: About to display user message: '{message[:100]}...' ({len(message)} chars)")
857
+ self.message_display.display_user_message(message)
858
+
859
+ # Reset turn_completed flag
860
+ self.turn_completed = False
861
+ self.cancel_processing = False
862
+ self.cancellation_message_shown = False
863
+
864
+ # Log user message
865
+ self.current_parent_uuid = await self.conversation_logger.log_user_message(
866
+ message,
867
+ parent_uuid=self.current_parent_uuid
868
+ )
869
+
870
+ # Add to processing queue with overflow handling
871
+ await self._enqueue_with_overflow_strategy(message)
872
+
873
+ # Start processing if not already running
874
+ if not self.is_processing:
875
+ self.create_background_task(self._process_queue(), name="process_queue")
876
+
877
+ return {"status": "queued"}
878
+
879
+ async def _handle_user_input(self, data: Dict[str, Any], event) -> Dict[str, Any]:
880
+ """Handle user input hook callback.
881
+
882
+ This is called by the event bus when user input occurs.
883
+
884
+ Args:
885
+ data: Event data containing user message
886
+ event: The event object
887
+
888
+ Returns:
889
+ Result of processing
890
+ """
891
+ message = data.get("message", "")
892
+ if message.strip():
893
+ result = await self.process_user_input(message)
894
+ return result
895
+ return {"status": "empty_message"}
896
+
897
+ async def _handle_cancel_request(self, data: Dict[str, Any], event) -> Dict[str, Any]:
898
+ """Handle cancel request hook callback.
899
+
900
+ This is called by the event bus when a cancellation request occurs.
901
+
902
+ Args:
903
+ data: Event data containing cancellation reason
904
+ event: The event object
905
+
906
+ Returns:
907
+ Result of cancellation
908
+ """
909
+ reason = data.get("reason", "unknown")
910
+ source = data.get("source", "unknown")
911
+
912
+ # Check if we're in pipe mode - ignore cancel requests from stdin
913
+ if hasattr(self.renderer, 'pipe_mode') and getattr(self.renderer, 'pipe_mode', False):
914
+ logger.info(f"LLM SERVICE: Ignoring cancel request in pipe mode (from {source}: {reason})")
915
+ return {"status": "ignored", "reason": "pipe_mode"}
916
+
917
+ logger.info(f"LLM SERVICE: Cancel request hook called! From {source}: {reason}")
918
+ logger.info(f"LLM SERVICE: Currently processing: {self.is_processing}")
919
+
920
+ # Cancel current request
921
+ self.cancel_current_request()
922
+
923
+ logger.info(f"LLM SERVICE: Cancellation flag set: {self.cancel_processing}")
924
+ return {"status": "cancelled", "reason": reason}
925
+
926
+ async def register_hooks(self) -> None:
927
+ """Register LLM service hooks with the event bus."""
928
+ for hook in self.hooks:
929
+ await self.event_bus.register_hook(hook)
930
+ logger.info(f"Registered {len(self.hooks)} hooks for LLM core service")
931
+
932
+ def cancel_current_request(self):
933
+ """Cancel the current processing request."""
934
+ if self.is_processing:
935
+ self.cancel_processing = True
936
+ # Cancel API request through API service (KISS refactoring)
937
+ self.api_service.cancel_current_request()
938
+ logger.info("Processing cancellation requested")
939
+
940
+ async def _process_queue(self):
941
+ """Process queued messages."""
942
+ self.is_processing = True
943
+ self.current_processing_tokens = 0 # Reset token counter
944
+ self.processing_start_time = time.time() # Track elapsed time
945
+ logger.info("Started processing queue")
946
+
947
+ while not self.processing_queue.empty() and not self.cancel_processing:
948
+ try:
949
+ # Collect all queued messages
950
+ messages = []
951
+ while not self.processing_queue.empty():
952
+ message = await self.processing_queue.get()
953
+ messages.append(message)
954
+
955
+ if messages and not self.cancel_processing:
956
+ await self._process_message_batch(messages)
957
+
958
+ except Exception as e:
959
+ logger.error(f"Queue processing error: {e}")
960
+ # Display error using MessageDisplayService (DRY refactoring)
961
+ self.message_display.display_error_message(str(e))
962
+ break
963
+
964
+ # Continue conversation until completed (unlimited agentic turns)
965
+ turn_count = 0
966
+ while not self.turn_completed and not self.cancel_processing:
967
+ try:
968
+ turn_count += 1
969
+ logger.info(f"Turn not completed - continuing conversation (turn {turn_count})")
970
+ await self._continue_conversation()
971
+ except Exception as e:
972
+ logger.error(f"Continued conversation error (turn {turn_count}): {e}")
973
+ # On error, mark turn as completed to prevent infinite error loops
974
+ self.turn_completed = True
975
+ break
976
+
977
+ self.is_processing = False
978
+ self.current_processing_tokens = 0 # Reset token counter when done
979
+ self.processing_start_time = None # Clear elapsed time tracking
980
+ if self.cancel_processing:
981
+ logger.info("Processing cancelled by user")
982
+ # Show cancellation message (only once)
983
+ if not self.cancellation_message_shown:
984
+ self.cancellation_message_shown = True
985
+ # Display cancellation using MessageDisplayService (DRY refactoring)
986
+ self.message_display.display_cancellation_message()
987
+ else:
988
+ logger.info("Finished processing queue")
989
+
990
+ async def _process_message_batch(self, messages: List[str]):
991
+ """Process a batch of messages."""
992
+ # Combine messages
993
+ combined_message = "\n".join(messages)
994
+
995
+ # Add to conversation history
996
+ self._add_conversation_message(ConversationMessage(
997
+ role="user",
998
+ content=combined_message
999
+ ))
1000
+
1001
+ # Start thinking animation
1002
+ self.renderer.update_thinking(True, "Processing...")
1003
+ thinking_start = time.time()
1004
+
1005
+ # Estimate input tokens for status display
1006
+ total_input_chars = sum(len(msg.content) for msg in self.conversation_history[-3:]) # Last 3 messages
1007
+ estimated_input_tokens = total_input_chars // 4 # Rough approximation
1008
+ self.current_processing_tokens = estimated_input_tokens
1009
+
1010
+ try:
1011
+ # Call LLM API (streaming handled by API service)
1012
+ response = await self._call_llm()
1013
+
1014
+ # Update session stats with actual token usage from API response
1015
+ token_usage = self.api_service.get_last_token_usage()
1016
+ if token_usage:
1017
+ prompt_tokens = token_usage.get("prompt_tokens", 0)
1018
+ completion_tokens = token_usage.get("completion_tokens", 0)
1019
+ self.session_stats["input_tokens"] += prompt_tokens
1020
+ self.session_stats["output_tokens"] += completion_tokens
1021
+ logger.debug(f"Token usage: {prompt_tokens} input, {completion_tokens} output")
1022
+
1023
+ # Stop thinking animation and show completion message
1024
+ thinking_duration = time.time() - thinking_start
1025
+ self.renderer.update_thinking(False)
1026
+
1027
+ # Brief pause to ensure clean transition from thinking to completion message
1028
+ await asyncio.sleep(self.config.get("core.llm.processing_delay", 0.1))
1029
+
1030
+ # Parse response using new ResponseParser
1031
+ parsed_response = self.response_parser.parse_response(response)
1032
+ clean_response = parsed_response["content"]
1033
+ all_tools = self.response_parser.get_all_tools(parsed_response)
1034
+
1035
+ # Update turn completion state
1036
+ self.turn_completed = parsed_response["turn_completed"]
1037
+
1038
+ # Update statistics
1039
+ self.stats["total_thinking_time"] += thinking_duration
1040
+ self.session_stats["messages"] += 1
1041
+
1042
+ # Show "Generating..." briefly before displaying messages
1043
+ if clean_response.strip() or all_tools:
1044
+ # Estimate token count (rough approximation: ~4 chars per token)
1045
+ estimated_tokens = len(clean_response) // 4 if clean_response else 0
1046
+ self.current_processing_tokens = estimated_tokens # Update current processing tokens
1047
+ self.renderer.update_thinking(True, f"Generating... ({estimated_tokens} tokens)")
1048
+
1049
+ # Brief pause to show generating state
1050
+ await asyncio.sleep(self.config.get("core.llm.thinking_delay", 0.3))
1051
+
1052
+ # Stop generating animation before message display
1053
+ self.renderer.update_thinking(False)
1054
+
1055
+ # Execute all tools (terminal commands and MCP tools) if any
1056
+ tool_results = None
1057
+ if all_tools:
1058
+ tool_results = await self.tool_executor.execute_all_tools(all_tools)
1059
+
1060
+ # Display thinking duration, response, and tool results atomically using unified method
1061
+ self.message_display.display_complete_response(
1062
+ thinking_duration=thinking_duration,
1063
+ response=clean_response,
1064
+ tool_results=tool_results,
1065
+ original_tools=all_tools
1066
+ )
1067
+
1068
+ # Log assistant response
1069
+ self.current_parent_uuid = await self.conversation_logger.log_assistant_message(
1070
+ clean_response or response,
1071
+ parent_uuid=self.current_parent_uuid,
1072
+ usage_stats={
1073
+ "input_tokens": self.session_stats.get("input_tokens", 0),
1074
+ "output_tokens": self.session_stats.get("output_tokens", 0),
1075
+ "thinking_duration": thinking_duration
1076
+ }
1077
+ )
1078
+
1079
+ # Add to conversation history
1080
+ self._add_conversation_message(ConversationMessage(
1081
+ role="assistant",
1082
+ content=response
1083
+ ))
1084
+
1085
+ # Log tool execution results and batch them for conversation history (if tools were executed)
1086
+ if tool_results:
1087
+ batched_tool_results = []
1088
+ for result in tool_results:
1089
+ await self.conversation_logger.log_system_message(
1090
+ f"Executed {result.tool_type} ({result.tool_id}): {result.output if result.success else result.error}",
1091
+ parent_uuid=self.current_parent_uuid,
1092
+ subtype="tool_call"
1093
+ )
1094
+
1095
+ # Collect tool results for batching
1096
+ tool_context = self.tool_executor.format_result_for_conversation(result)
1097
+ batched_tool_results.append(f"Tool result: {tool_context}")
1098
+
1099
+ # Add all tool results as single conversation message
1100
+ if batched_tool_results:
1101
+ self._add_conversation_message(ConversationMessage(
1102
+ role="user",
1103
+ content="\n".join(batched_tool_results)
1104
+ ))
1105
+
1106
+ except asyncio.CancelledError:
1107
+ logger.info("Message processing cancelled by user")
1108
+ thinking_duration = time.time() - thinking_start
1109
+ self.renderer.update_thinking(False)
1110
+
1111
+ # Clear any display artifacts
1112
+ self.renderer.clear_active_area()
1113
+
1114
+ # Remove the user message that was just added since processing was cancelled
1115
+ if self.conversation_history and self.conversation_history[-1].role == "user":
1116
+ self.conversation_history.pop()
1117
+ logger.info("Removed cancelled user message from conversation history")
1118
+
1119
+ # Show cancellation message (only once)
1120
+ if not self.cancellation_message_shown:
1121
+ self.cancellation_message_shown = True
1122
+ # Display cancellation using MessageDisplayService (DRY refactoring)
1123
+ self.message_display.display_cancellation_message()
1124
+
1125
+ # Complete turn to reset state
1126
+ self.turn_completed = True
1127
+
1128
+ # Update stats
1129
+ self.stats["total_thinking_time"] += thinking_duration
1130
+
1131
+ except Exception as e:
1132
+ logger.error(f"Error processing message batch: {e}")
1133
+ self.renderer.update_thinking(False)
1134
+ # Display error using MessageDisplayService (DRY refactoring)
1135
+ self.message_display.display_error_message(str(e))
1136
+ # Complete turn on error to prevent infinite loops
1137
+ self.turn_completed = True
1138
+
1139
+ async def _continue_conversation(self):
1140
+ """Continue an ongoing conversation."""
1141
+ # Similar to _process_message_batch but without adding user message
1142
+ self.renderer.update_thinking(True, "Continuing...")
1143
+ thinking_start = time.time()
1144
+
1145
+ # Estimate input tokens for status display
1146
+ total_input_chars = sum(len(msg.content) for msg in self.conversation_history[-3:]) # Last 3 messages
1147
+ estimated_input_tokens = total_input_chars // 4 # Rough approximation
1148
+ self.current_processing_tokens = estimated_input_tokens
1149
+
1150
+ try:
1151
+ response = await self._call_llm()
1152
+
1153
+ # Update session stats with actual token usage from API response
1154
+ token_usage = self.api_service.get_last_token_usage()
1155
+ if token_usage:
1156
+ prompt_tokens = token_usage.get("prompt_tokens", 0)
1157
+ completion_tokens = token_usage.get("completion_tokens", 0)
1158
+ self.session_stats["input_tokens"] += prompt_tokens
1159
+ self.session_stats["output_tokens"] += completion_tokens
1160
+ logger.debug(f"Token usage: {prompt_tokens} input, {completion_tokens} output")
1161
+
1162
+ # Parse response using new ResponseParser
1163
+ parsed_response = self.response_parser.parse_response(response)
1164
+ clean_response = parsed_response["content"]
1165
+ all_tools = self.response_parser.get_all_tools(parsed_response)
1166
+
1167
+ # Update turn completion state
1168
+ self.turn_completed = parsed_response["turn_completed"]
1169
+
1170
+ thinking_duration = time.time() - thinking_start
1171
+ self.renderer.update_thinking(False)
1172
+
1173
+ # Brief pause to ensure clean transition
1174
+ await asyncio.sleep(self.config.get("core.llm.processing_delay", 0.1))
1175
+
1176
+ # Show "Generating..." briefly before displaying messages
1177
+ if clean_response.strip() or all_tools:
1178
+ # Estimate token count (rough approximation: ~4 chars per token)
1179
+ estimated_tokens = len(clean_response) // 4 if clean_response else 0
1180
+ self.current_processing_tokens = estimated_tokens # Update current processing tokens
1181
+ self.renderer.update_thinking(True, f"Generating... ({estimated_tokens} tokens)")
1182
+
1183
+ # Brief pause to show generating state
1184
+ await asyncio.sleep(self.config.get("core.llm.thinking_delay", 0.3))
1185
+
1186
+ # Stop generating animation before message display
1187
+ self.renderer.update_thinking(False)
1188
+
1189
+ # Execute all tools (terminal commands and MCP tools) if any
1190
+ tool_results = None
1191
+ if all_tools:
1192
+ tool_results = await self.tool_executor.execute_all_tools(all_tools)
1193
+
1194
+ # Display thinking duration, response, and tool results atomically using unified method
1195
+ self.message_display.display_complete_response(
1196
+ thinking_duration=thinking_duration,
1197
+ response=clean_response,
1198
+ tool_results=tool_results,
1199
+ original_tools=all_tools
1200
+ )
1201
+
1202
+ # Log continuation
1203
+ self.current_parent_uuid = await self.conversation_logger.log_assistant_message(
1204
+ clean_response or response,
1205
+ parent_uuid=self.current_parent_uuid,
1206
+ usage_stats={
1207
+ "thinking_duration": thinking_duration
1208
+ }
1209
+ )
1210
+
1211
+ self._add_conversation_message(ConversationMessage(
1212
+ role="assistant",
1213
+ content=response
1214
+ ))
1215
+
1216
+ # Log tool execution results and batch them for conversation history (if tools were executed)
1217
+ if tool_results:
1218
+ batched_tool_results = []
1219
+ for result in tool_results:
1220
+ await self.conversation_logger.log_system_message(
1221
+ f"Executed {result.tool_type} ({result.tool_id}): {result.output if result.success else result.error}",
1222
+ parent_uuid=self.current_parent_uuid,
1223
+ subtype="tool_call"
1224
+ )
1225
+
1226
+ # Collect tool results for batching
1227
+ tool_context = self.tool_executor.format_result_for_conversation(result)
1228
+ batched_tool_results.append(f"Tool result: {tool_context}")
1229
+
1230
+ # Add all tool results as single conversation message
1231
+ if batched_tool_results:
1232
+ self._add_conversation_message(ConversationMessage(
1233
+ role="user",
1234
+ content="\n".join(batched_tool_results)
1235
+ ))
1236
+
1237
+ except asyncio.CancelledError:
1238
+ logger.info("Conversation continuation cancelled by user")
1239
+ thinking_duration = time.time() - thinking_start
1240
+ self.renderer.update_thinking(False)
1241
+
1242
+ # Clear any display artifacts
1243
+ self.renderer.clear_active_area()
1244
+
1245
+ # Show cancellation message (only once)
1246
+ if not self.cancellation_message_shown:
1247
+ self.cancellation_message_shown = True
1248
+ # Display cancellation using MessageDisplayService (DRY refactoring)
1249
+ self.message_display.display_cancellation_message()
1250
+
1251
+ # Complete turn to reset state
1252
+ self.turn_completed = True
1253
+
1254
+ except Exception as e:
1255
+ logger.error(f"Error continuing conversation: {e}")
1256
+ self.renderer.update_thinking(False)
1257
+
1258
+
1259
+ def _stream_thinking_content(self, thinking_content: str) -> None:
1260
+ """Process complete thinking content block.
1261
+
1262
+ Args:
1263
+ thinking_content: Complete thinking content from <think> tags
1264
+ """
1265
+ logger.debug(f"Processing complete thinking block: {thinking_content[:50]}...")
1266
+
1267
+ def _stream_thinking_sentences(self, thinking_buffer: str, final: bool = False) -> str:
1268
+ """Stream thinking content with terminal width-based truncation (legacy method).
1269
+
1270
+ Args:
1271
+ thinking_buffer: Current thinking content buffer
1272
+ final: Whether this is the final processing (show remaining content)
1273
+
1274
+ Returns:
1275
+ Empty string (no remaining content processing needed)
1276
+ """
1277
+ return self._stream_thinking_width_based(thinking_buffer, final)
1278
+
1279
+ def _stream_thinking_width_based(self, thinking_buffer: str, final: bool = False) -> str:
1280
+ """Stream thinking content in 70% terminal width chunks.
1281
+
1282
+ Args:
1283
+ thinking_buffer: Current thinking content buffer
1284
+ final: Whether this is the final processing (show remaining content)
1285
+
1286
+ Returns:
1287
+ Remaining buffer after displaying complete chunks
1288
+ """
1289
+ # Initialize tracking if not exists
1290
+ if not hasattr(self, '_last_chunk_position'):
1291
+ self._last_chunk_position = 0
1292
+
1293
+ # Get terminal width and calculate thinking display width (70% of terminal width)
1294
+ try:
1295
+ import os
1296
+ terminal_width = os.get_terminal_size().columns
1297
+ chunk_width = int(terminal_width * 0.7)
1298
+ except:
1299
+ chunk_width = 80 # Fallback width
1300
+
1301
+ # Normalize whitespace in thinking buffer (convert line breaks to spaces)
1302
+ # REASON: LLM generates thinking content with line breaks which breaks our chunk logic
1303
+ # Example: "scanning directory.\n\nuser wants..." becomes "scanning directory. user wants..."
1304
+ # This prevents line breaks from creating artificial chunk boundaries that cause repetition
1305
+ normalized_buffer = ' '.join(thinking_buffer.split())
1306
+
1307
+ # Filter out confusing thinking content that shouldn't be displayed
1308
+ # REASON: Sometimes LLM outputs "Generating..." or similar terms during thinking
1309
+ # which confuses users as it looks like our UI state, not actual thinking content
1310
+ if normalized_buffer.strip().lower() in ['generating...', 'generating', 'processing...', 'processing']:
1311
+ # Don't display confusing meta-content, show a generic thinking message instead
1312
+ normalized_buffer = "Analyzing your request..."
1313
+
1314
+ # Get content from where we left off
1315
+ remaining_content = normalized_buffer[self._last_chunk_position:]
1316
+
1317
+ if final:
1318
+ # Final processing - show whatever remains
1319
+ if remaining_content.strip():
1320
+ display_text = remaining_content.strip()
1321
+ if len(display_text) > chunk_width:
1322
+ # Truncate with word boundary
1323
+ truncated = display_text[:chunk_width - 3]
1324
+ last_space = truncated.rfind(' ')
1325
+ if last_space > chunk_width * 0.8:
1326
+ truncated = truncated[:last_space]
1327
+ display_text = truncated + "..."
1328
+ self.renderer.update_thinking(True, display_text)
1329
+ return ""
1330
+
1331
+ # Check if we have enough content for a full chunk
1332
+ if len(remaining_content) >= chunk_width:
1333
+ # Extract a chunk of chunk_width characters
1334
+ chunk = remaining_content[:chunk_width]
1335
+
1336
+ # Try to break at word boundary to avoid cutting words
1337
+ last_space = chunk.rfind(' ')
1338
+ if last_space > chunk_width * 0.8: # Only break at space if it's not too short
1339
+ chunk = chunk[:last_space]
1340
+
1341
+ chunk = chunk.strip()
1342
+ if chunk:
1343
+ self.renderer.update_thinking(True, chunk + "...")
1344
+ # Update position to after this chunk
1345
+ self._last_chunk_position += len(chunk)
1346
+ # Add space to position if we broke at a space
1347
+ if chunk != remaining_content[:len(chunk)].strip():
1348
+ self._last_chunk_position += 1
1349
+
1350
+ # Return the original buffer (we track position internally)
1351
+ return thinking_buffer
1352
+
1353
+ async def _handle_streaming_chunk(self, chunk: str) -> None:
1354
+ """Handle streaming content chunk from API.
1355
+
1356
+ Args:
1357
+ chunk: Content chunk from streaming API response
1358
+ """
1359
+ # Initialize streaming state if not exists
1360
+ if not hasattr(self, '_streaming_buffer'):
1361
+ self._streaming_buffer = ""
1362
+ self._in_thinking = False
1363
+ self._thinking_buffer = ""
1364
+ self._response_started = False
1365
+
1366
+ # Add chunk to buffer
1367
+ self._streaming_buffer += chunk
1368
+
1369
+ # Process thinking content in real-time
1370
+ while True:
1371
+ if not self._in_thinking:
1372
+ # Look for start of thinking
1373
+ if '<think>' in self._streaming_buffer:
1374
+ parts = self._streaming_buffer.split('<think>', 1)
1375
+ if len(parts) == 2:
1376
+ # Stream any content before thinking tag
1377
+ if parts[0].strip():
1378
+ self._stream_response_chunk(parts[0])
1379
+ self._streaming_buffer = parts[1]
1380
+ self._in_thinking = True
1381
+ self._thinking_buffer = ""
1382
+ else:
1383
+ break
1384
+ else:
1385
+ # No thinking tags found, stream the content as response
1386
+ if self._streaming_buffer.strip():
1387
+ self._stream_response_chunk(self._streaming_buffer)
1388
+ self._streaming_buffer = ""
1389
+ break
1390
+ else:
1391
+ # We're in thinking mode, look for end or accumulate content
1392
+ if '</think>' in self._streaming_buffer:
1393
+ parts = self._streaming_buffer.split('</think>', 1)
1394
+ self._thinking_buffer += parts[0]
1395
+ self._streaming_buffer = parts[1]
1396
+
1397
+ # Process complete thinking content
1398
+ if self._thinking_buffer.strip():
1399
+ self._stream_thinking_sentences(self._thinking_buffer, final=True)
1400
+
1401
+ # Switch to generating mode after thinking is complete
1402
+ self.renderer.update_thinking(True, "Generating...")
1403
+
1404
+ # Reset thinking state
1405
+ self._in_thinking = False
1406
+ self._thinking_buffer = ""
1407
+ else:
1408
+ # Still in thinking, accumulate and stream chunks
1409
+ if self._streaming_buffer:
1410
+ self._thinking_buffer += self._streaming_buffer
1411
+ # Stream thinking content as we get it
1412
+ self._stream_thinking_sentences(self._thinking_buffer)
1413
+ self._streaming_buffer = ""
1414
+ break
1415
+
1416
+ def _stream_response_chunk(self, chunk: str) -> None:
1417
+ """Stream a response chunk in real-time to the message renderer.
1418
+
1419
+ Args:
1420
+ chunk: Response content chunk to stream immediately
1421
+ """
1422
+ # Handle empty chunks gracefully
1423
+ if not chunk or not chunk.strip():
1424
+ return
1425
+
1426
+ # Initialize streaming response if this is the first chunk
1427
+ if not self._response_started:
1428
+ self.message_display.message_coordinator.start_streaming_response()
1429
+ self._response_started = True
1430
+
1431
+ # Stream the chunk through the message coordinator (proper architecture)
1432
+ self.message_display.message_coordinator.write_streaming_chunk(chunk)
1433
+
1434
+ async def _call_llm(self) -> str:
1435
+ """Make API call to LLM using APICommunicationService (KISS refactoring)."""
1436
+ # Reset streaming state for new request
1437
+ self._streaming_buffer = ""
1438
+ self._in_thinking = False
1439
+ self._thinking_buffer = ""
1440
+ self._last_chunk_position = 0
1441
+ self._response_started = False
1442
+
1443
+ # Check for cancellation before starting
1444
+ if self.cancel_processing:
1445
+ logger.info("API call cancelled before starting")
1446
+ raise asyncio.CancelledError("Request cancelled by user")
1447
+
1448
+ # Delegate to API communication service (eliminates ~160 lines of duplicated API code)
1449
+ try:
1450
+ return await self.api_service.call_llm(
1451
+ conversation_history=self.conversation_history,
1452
+ max_history=self.max_history,
1453
+ streaming_callback=self._handle_streaming_chunk
1454
+ )
1455
+ except asyncio.CancelledError:
1456
+ logger.info("LLM API call was cancelled")
1457
+ # Clean up streaming state on cancellation
1458
+ self._cleanup_streaming_state()
1459
+ raise
1460
+ except Exception as e:
1461
+ logger.error(f"LLM API call failed: {e}")
1462
+ # Clean up streaming state on error
1463
+ self._cleanup_streaming_state()
1464
+ raise
1465
+
1466
+ def _cleanup_streaming_state(self) -> None:
1467
+ """Clean up streaming state after request completion or failure.
1468
+
1469
+ This ensures streaming state is properly reset even if errors occur.
1470
+ """
1471
+ self._streaming_buffer = ""
1472
+ self._in_thinking = False
1473
+ self._thinking_buffer = ""
1474
+ self._response_started = False
1475
+ self._last_chunk_position = 0
1476
+
1477
+ # End streaming session in message display service if active
1478
+ if hasattr(self, 'message_display_service') and self.message_display_service.is_streaming_active():
1479
+ self.message_display_service.end_streaming_response()
1480
+
1481
+ logger.debug("Cleaned up streaming state")
1482
+
1483
+
1484
+ def get_status_line(self) -> Dict[str, List[str]]:
1485
+ """Get status information for display."""
1486
+ status = {
1487
+ "A": [],
1488
+ "B": [],
1489
+ "C": []
1490
+ }
1491
+
1492
+ # Area B - LLM status
1493
+ if self.is_processing:
1494
+ # Show elapsed time and tokens
1495
+ elapsed = ""
1496
+ if self.processing_start_time:
1497
+ elapsed_secs = time.time() - self.processing_start_time
1498
+ elapsed = f" ({elapsed_secs:.1f}s)"
1499
+
1500
+ if self.current_processing_tokens > 0:
1501
+ status["A"].append(f"Processing: {self.current_processing_tokens} tokens{elapsed}")
1502
+ else:
1503
+ status["A"].append(f"Processing: Yes{elapsed}")
1504
+ else:
1505
+ status["A"].append(f"Processing: No")
1506
+
1507
+ # Enhanced queue metrics with memory leak monitoring
1508
+ queue_size = self.processing_queue.qsize()
1509
+ queue_utilization = (queue_size / self.max_queue_size * 100) if self.max_queue_size > 0 else 0
1510
+ dropped_indicator = f" ({self.dropped_messages} dropped)" if self.dropped_messages > 0 else ""
1511
+
1512
+ status["C"].append(f"Queue: {queue_size}/{self.max_queue_size} ({queue_utilization:.0f}%){dropped_indicator}")
1513
+
1514
+ # Add warning if queue utilization is high
1515
+ if queue_utilization > 80:
1516
+ status["C"].append(f"⚠️ Queue usage high!")
1517
+ status["C"].append(f"History: {len(self.conversation_history)}")
1518
+ status["C"].append(f"Tasks: {len(self._background_tasks)}")
1519
+ if self._task_error_count > 0:
1520
+ status["C"].append(f"Task Errors: {self._task_error_count}")
1521
+
1522
+ # Circuit breaker status if enabled
1523
+ if self.task_config.background_tasks.enable_task_circuit_breaker:
1524
+ cb_state = self._circuit_breaker_state
1525
+ cb_failures = self._circuit_breaker_failures
1526
+ cb_threshold = self.task_config.background_tasks.circuit_breaker_threshold
1527
+
1528
+ if cb_state == "OPEN":
1529
+ status["C"].append(f"⚠️ Circuit: OPEN ({cb_failures}/{cb_threshold})")
1530
+ elif cb_state == "HALF_OPEN":
1531
+ status["C"].append(f"🔧 Circuit: HALF_OPEN ({cb_failures}/{cb_threshold})")
1532
+ else: # CLOSED
1533
+ if cb_failures > 0:
1534
+ status["C"].append(f"✓ Circuit: CLOSED ({cb_failures}/{cb_threshold})")
1535
+
1536
+ # Area C - Session stats
1537
+ if self.session_stats["messages"] > 0:
1538
+ status["C"].append(f"Messages: {self.session_stats['messages']}")
1539
+ status["C"].append(f"Tokens In: {self.session_stats.get('input_tokens', 0)}")
1540
+ status["C"].append(f"Tokens Out: {self.session_stats.get('output_tokens', 0)}")
1541
+
1542
+ # Area A - Tool execution stats
1543
+ tool_stats = self.tool_executor.get_execution_stats()
1544
+ if tool_stats["total_executions"] > 0:
1545
+ status["A"].append(f"Tools: {tool_stats['total_executions']}")
1546
+ status["A"].append(f"Terminal: {tool_stats['terminal_executions']}")
1547
+ status["A"].append(f"MCP: {tool_stats['mcp_executions']}")
1548
+ status["A"].append(f"Success: {tool_stats['success_rate']:.1%}")
1549
+
1550
+ return status
1551
+
1552
+
1553
+ def get_queue_metrics(self) -> dict:
1554
+ """Get comprehensive queue metrics for monitoring."""
1555
+ queue_size = self.processing_queue.qsize()
1556
+ queue_utilization = (queue_size / self.max_queue_size * 100) if self.max_queue_size > 0 else 0
1557
+
1558
+ base_metrics = {
1559
+ 'current_size': queue_size,
1560
+ 'max_size': self.max_queue_size,
1561
+ 'utilization_percent': round(queue_utilization, 1),
1562
+ 'dropped_messages': self.dropped_messages,
1563
+ 'status': 'healthy' if queue_utilization < 80 else 'warning' if queue_utilization < 95 else 'critical',
1564
+ 'memory_safe': queue_utilization < 90,
1565
+ 'overflow_strategy': self.task_config.queue.overflow_strategy
1566
+ }
1567
+
1568
+ # Add overflow strategy metrics if enabled
1569
+ if self.task_config.queue.enable_queue_metrics:
1570
+ base_metrics.update({
1571
+ 'overflow_metrics': {
1572
+ 'drop_oldest_count': self._queue_metrics['drop_oldest_count'],
1573
+ 'drop_newest_count': self._queue_metrics['drop_newest_count'],
1574
+ 'block_count': self._queue_metrics['block_count'],
1575
+ 'block_timeout_count': self._queue_metrics['block_timeout_count'],
1576
+ 'total_enqueue_attempts': self._queue_metrics['total_enqueue_attempts'],
1577
+ 'total_enqueue_successes': self._queue_metrics['total_enqueue_successes'],
1578
+ 'success_rate': (
1579
+ (self._queue_metrics['total_enqueue_successes'] /
1580
+ self._queue_metrics['total_enqueue_attempts'] * 100)
1581
+ if self._queue_metrics['total_enqueue_attempts'] > 0 else 100.0
1582
+ )
1583
+ }
1584
+ })
1585
+
1586
+ return base_metrics
1587
+
1588
+ def reset_queue_metrics(self):
1589
+ """Reset queue metrics (for testing or maintenance)."""
1590
+ self.dropped_messages = 0
1591
+
1592
+ # Reset overflow strategy metrics
1593
+ for key in self._queue_metrics:
1594
+ self._queue_metrics[key] = 0
1595
+
1596
+ logger.info("Queue metrics reset")
1597
+
1598
+ async def shutdown(self):
1599
+ """Shutdown the LLM service."""
1600
+ # Log conversation end
1601
+ await self.conversation_logger.log_conversation_end()
1602
+
1603
+ # Cancel all background tasks
1604
+ await self.cancel_all_tasks()
1605
+
1606
+ # Stop task monitoring
1607
+ if self._monitoring_task and not self._monitoring_task.done():
1608
+ self._monitoring_task.cancel()
1609
+ try:
1610
+ await self._monitoring_task
1611
+ except asyncio.CancelledError:
1612
+ pass
1613
+
1614
+ # Shutdown API communication service (KISS refactoring)
1615
+ await self.api_service.shutdown()
1616
+
1617
+ # Shutdown MCP integration
1618
+ try:
1619
+ await self.mcp_integration.shutdown()
1620
+ logger.info("MCP integration shutdown complete")
1621
+ except Exception as e:
1622
+ logger.warning(f"MCP shutdown error: {e}")
1623
+
1624
+ # Save statistics
1625
+ self.state_manager.set("llm.stats", self.stats)
1626
+
1627
+
1628
+
1629
+ logger.info("Core LLM Service shutdown complete")