dtSpark 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. dtSpark/__init__.py +0 -0
  2. dtSpark/_description.txt +1 -0
  3. dtSpark/_full_name.txt +1 -0
  4. dtSpark/_licence.txt +21 -0
  5. dtSpark/_metadata.yaml +6 -0
  6. dtSpark/_name.txt +1 -0
  7. dtSpark/_version.txt +1 -0
  8. dtSpark/aws/__init__.py +7 -0
  9. dtSpark/aws/authentication.py +296 -0
  10. dtSpark/aws/bedrock.py +578 -0
  11. dtSpark/aws/costs.py +318 -0
  12. dtSpark/aws/pricing.py +580 -0
  13. dtSpark/cli_interface.py +2645 -0
  14. dtSpark/conversation_manager.py +3050 -0
  15. dtSpark/core/__init__.py +12 -0
  16. dtSpark/core/application.py +3355 -0
  17. dtSpark/core/context_compaction.py +735 -0
  18. dtSpark/daemon/__init__.py +104 -0
  19. dtSpark/daemon/__main__.py +10 -0
  20. dtSpark/daemon/action_monitor.py +213 -0
  21. dtSpark/daemon/daemon_app.py +730 -0
  22. dtSpark/daemon/daemon_manager.py +289 -0
  23. dtSpark/daemon/execution_coordinator.py +194 -0
  24. dtSpark/daemon/pid_file.py +169 -0
  25. dtSpark/database/__init__.py +482 -0
  26. dtSpark/database/autonomous_actions.py +1191 -0
  27. dtSpark/database/backends.py +329 -0
  28. dtSpark/database/connection.py +122 -0
  29. dtSpark/database/conversations.py +520 -0
  30. dtSpark/database/credential_prompt.py +218 -0
  31. dtSpark/database/files.py +205 -0
  32. dtSpark/database/mcp_ops.py +355 -0
  33. dtSpark/database/messages.py +161 -0
  34. dtSpark/database/schema.py +673 -0
  35. dtSpark/database/tool_permissions.py +186 -0
  36. dtSpark/database/usage.py +167 -0
  37. dtSpark/files/__init__.py +4 -0
  38. dtSpark/files/manager.py +322 -0
  39. dtSpark/launch.py +39 -0
  40. dtSpark/limits/__init__.py +10 -0
  41. dtSpark/limits/costs.py +296 -0
  42. dtSpark/limits/tokens.py +342 -0
  43. dtSpark/llm/__init__.py +17 -0
  44. dtSpark/llm/anthropic_direct.py +446 -0
  45. dtSpark/llm/base.py +146 -0
  46. dtSpark/llm/context_limits.py +438 -0
  47. dtSpark/llm/manager.py +177 -0
  48. dtSpark/llm/ollama.py +578 -0
  49. dtSpark/mcp_integration/__init__.py +5 -0
  50. dtSpark/mcp_integration/manager.py +653 -0
  51. dtSpark/mcp_integration/tool_selector.py +225 -0
  52. dtSpark/resources/config.yaml.template +631 -0
  53. dtSpark/safety/__init__.py +22 -0
  54. dtSpark/safety/llm_service.py +111 -0
  55. dtSpark/safety/patterns.py +229 -0
  56. dtSpark/safety/prompt_inspector.py +442 -0
  57. dtSpark/safety/violation_logger.py +346 -0
  58. dtSpark/scheduler/__init__.py +20 -0
  59. dtSpark/scheduler/creation_tools.py +599 -0
  60. dtSpark/scheduler/execution_queue.py +159 -0
  61. dtSpark/scheduler/executor.py +1152 -0
  62. dtSpark/scheduler/manager.py +395 -0
  63. dtSpark/tools/__init__.py +4 -0
  64. dtSpark/tools/builtin.py +833 -0
  65. dtSpark/web/__init__.py +20 -0
  66. dtSpark/web/auth.py +152 -0
  67. dtSpark/web/dependencies.py +37 -0
  68. dtSpark/web/endpoints/__init__.py +17 -0
  69. dtSpark/web/endpoints/autonomous_actions.py +1125 -0
  70. dtSpark/web/endpoints/chat.py +621 -0
  71. dtSpark/web/endpoints/conversations.py +353 -0
  72. dtSpark/web/endpoints/main_menu.py +547 -0
  73. dtSpark/web/endpoints/streaming.py +421 -0
  74. dtSpark/web/server.py +578 -0
  75. dtSpark/web/session.py +167 -0
  76. dtSpark/web/ssl_utils.py +195 -0
  77. dtSpark/web/static/css/dark-theme.css +427 -0
  78. dtSpark/web/static/js/actions.js +1101 -0
  79. dtSpark/web/static/js/chat.js +614 -0
  80. dtSpark/web/static/js/main.js +496 -0
  81. dtSpark/web/static/js/sse-client.js +242 -0
  82. dtSpark/web/templates/actions.html +408 -0
  83. dtSpark/web/templates/base.html +93 -0
  84. dtSpark/web/templates/chat.html +814 -0
  85. dtSpark/web/templates/conversations.html +350 -0
  86. dtSpark/web/templates/goodbye.html +81 -0
  87. dtSpark/web/templates/login.html +90 -0
  88. dtSpark/web/templates/main_menu.html +983 -0
  89. dtSpark/web/templates/new_conversation.html +191 -0
  90. dtSpark/web/web_interface.py +137 -0
  91. dtspark-1.0.4.dist-info/METADATA +187 -0
  92. dtspark-1.0.4.dist-info/RECORD +96 -0
  93. dtspark-1.0.4.dist-info/WHEEL +5 -0
  94. dtspark-1.0.4.dist-info/entry_points.txt +3 -0
  95. dtspark-1.0.4.dist-info/licenses/LICENSE +21 -0
  96. dtspark-1.0.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,735 @@
1
+ """
2
+ Context compaction module for intelligent conversation history management.
3
+
4
+ This module implements a single-pass LLM-driven compaction system that:
5
+ - Analyses conversation history for importance categorisation
6
+ - Selectively preserves critical information (architectural decisions, bugs, implementation details)
7
+ - Compresses less critical information (resolved tasks, exploration)
8
+ - Discards redundant information (duplicates, superseded decisions)
9
+
10
+ Design Goals:
11
+ Compaction is designed to "distill the contents of a context window in a high-fidelity manner,
12
+ enabling the agent to continue with minimal performance degradation."
13
+ """
14
+
15
+ import logging
16
+ import json
17
+ import re
18
+ from typing import Dict, List, Optional, Any, Tuple
19
+ from datetime import datetime
20
+
21
+
22
+ # Compaction prompt template for single-pass intelligent compaction
23
+ COMPACTION_PROMPT_TEMPLATE = '''You are performing context compaction for an ongoing conversation. Your task is to distill the conversation history into a high-fidelity compressed format that enables continuation with minimal performance degradation.
24
+
25
+ ## CATEGORISATION RULES
26
+
27
+ Analyse each segment of the conversation and categorise it:
28
+
29
+ ### MUST PRESERVE (Full Fidelity)
30
+ - **Architectural Decisions**: Any decisions about system design, patterns, or structure with their rationale
31
+ - **Unresolved Issues**: Bugs, errors, or problems that have not been fully resolved
32
+ - **Implementation Details**: Specific code paths, file locations, configurations that affect future work
33
+ - **User Preferences**: Explicit requests, constraints, or preferences stated by the user
34
+ - **Critical Data**: Important numbers, calculations, findings, and their sources
35
+ - **Active Tasks**: Work in progress, next steps, or pending actions
36
+ - **Error Context**: Error messages, stack traces, and debugging information for unresolved issues
37
+
38
+ ### COMPRESS (Reduced Fidelity)
39
+ - **Resolved Tasks**: Brief outcome note only (e.g., "Fixed authentication bug in auth.py")
40
+ - **Exploratory Discussion**: Conclusions only, not the exploration process
41
+ - **Tool Outputs**: Key findings only, not raw output
42
+ - **Explanations**: Final understanding only, not iterative clarification
43
+
44
+ ### DISCARD
45
+ - **Redundant Information**: Duplicate tool outputs, repeated explanations
46
+ - **Superseded Decisions**: Earlier decisions that were later changed
47
+ - **Verbose Completions**: Detailed explanations of work that is finished and won't be referenced
48
+ - **Pleasantries**: Greetings, acknowledgments, conversational filler
49
+
50
+ ## OUTPUT FORMAT
51
+
52
+ Produce a structured compacted context in the following format:
53
+
54
+ # COMPACTED CONTEXT
55
+
56
+ ## Critical Decisions & Architecture
57
+ [List architectural decisions with brief rationale - preserve exact details]
58
+
59
+ ## Unresolved Issues
60
+ [List any bugs, errors, or problems still being worked on - preserve full context]
61
+
62
+ ## Implementation State
63
+ [Current state of implementation: what's done, what's in progress, key file paths]
64
+
65
+ ## Key Data & Findings
66
+ [Important numbers, calculations, discoveries with sources - preserve exact values]
67
+
68
+ ## User Preferences & Constraints
69
+ [Explicit user requirements and constraints]
70
+
71
+ ## Recent Context Summary
72
+ [Brief summary of the most recent exchanges not covered above]
73
+
74
+ ## Discarded Topics
75
+ [List topics that were discussed but are no longer relevant - titles only for reference]
76
+
77
+ ## CONVERSATION TO COMPACT
78
+
79
+ The original conversation contained {message_count} messages with approximately {token_count:,} tokens.
80
+
81
+ {conversation_history}
82
+
83
+ ## INSTRUCTIONS
84
+
85
+ 1. Read through the entire conversation carefully
86
+ 2. Categorise each meaningful segment according to the rules above
87
+ 3. PRESERVE critical information with HIGH FIDELITY - do not lose important details
88
+ 4. COMPRESS resolved/completed items to brief summaries
89
+ 5. DISCARD redundant and superseded information
90
+ 6. Output the structured compacted context
91
+ 7. Ensure the compacted context contains ALL information needed to continue the conversation effectively
92
+ 8. For numerical data, preserve EXACT values - do not round or approximate
93
+
94
+ Begin your compacted context output now:'''
95
+
96
+
97
+ class ContextCompactor:
98
+ """
99
+ Manages intelligent context compaction for conversations.
100
+
101
+ This class implements a single-pass LLM-driven compaction that analyses
102
+ conversation history and produces a structured, compressed representation
103
+ while preserving critical information.
104
+ """
105
+
106
+ def __init__(self, bedrock_service, database, context_limit_resolver,
107
+ cli_interface=None, web_interface=None,
108
+ compaction_threshold: float = 0.7,
109
+ emergency_threshold: float = 0.95,
110
+ compaction_ratio: float = 0.3):
111
+ """
112
+ Initialise the context compactor.
113
+
114
+ Args:
115
+ bedrock_service: Service for LLM invocation
116
+ database: Database instance for message storage
117
+ context_limit_resolver: ContextLimitResolver instance for model limits
118
+ cli_interface: Optional CLI interface for progress display
119
+ web_interface: Optional web interface for progress display
120
+ compaction_threshold: Fraction of context window to trigger compaction (default 0.7)
121
+ emergency_threshold: Fraction of context window for emergency compaction (default 0.95)
122
+ compaction_ratio: Target ratio for compacted content (default 0.3)
123
+ """
124
+ self.bedrock_service = bedrock_service
125
+ self.database = database
126
+ self.context_limit_resolver = context_limit_resolver
127
+ self.cli_interface = cli_interface
128
+ self.web_interface = web_interface
129
+ self.compaction_threshold = compaction_threshold
130
+ self.emergency_threshold = emergency_threshold
131
+ self.compaction_ratio = compaction_ratio
132
+
133
+ logging.info(f"ContextCompactor initialised with threshold={compaction_threshold}, "
134
+ f"emergency={emergency_threshold}, ratio={compaction_ratio}")
135
+
136
+ def update_service(self, bedrock_service):
137
+ """
138
+ Update the LLM service used for compaction.
139
+
140
+ This should be called when the active provider changes.
141
+
142
+ Args:
143
+ bedrock_service: The new LLM service to use
144
+ """
145
+ old_provider = "unknown"
146
+ new_provider = "unknown"
147
+
148
+ if self.bedrock_service and hasattr(self.bedrock_service, 'get_provider_name'):
149
+ old_provider = self.bedrock_service.get_provider_name()
150
+ if bedrock_service and hasattr(bedrock_service, 'get_provider_name'):
151
+ new_provider = bedrock_service.get_provider_name()
152
+
153
+ self.bedrock_service = bedrock_service
154
+ logging.info(f"ContextCompactor service updated: {old_provider} -> {new_provider}")
155
+
156
+ def check_and_compact(self, conversation_id: int, model_id: str,
157
+ provider: str, in_tool_use_loop: bool = False) -> bool:
158
+ """
159
+ Check if compaction is needed and perform it.
160
+
161
+ Args:
162
+ conversation_id: Current conversation ID
163
+ model_id: Current model ID
164
+ provider: Current provider name
165
+ in_tool_use_loop: Whether currently in tool use sequence
166
+
167
+ Returns:
168
+ True if compaction was performed, False otherwise
169
+ """
170
+ # Get context limits for current model
171
+ limits = self.context_limit_resolver.get_context_limits(model_id, provider)
172
+ context_window = limits['context_window']
173
+
174
+ # Calculate current token usage
175
+ current_tokens = self.database.get_conversation_token_count(conversation_id)
176
+
177
+ # Calculate thresholds
178
+ compaction_threshold_tokens = int(context_window * self.compaction_threshold)
179
+ emergency_threshold_tokens = int(context_window * self.emergency_threshold)
180
+
181
+ logging.debug(f"Compaction check: {current_tokens:,}/{context_window:,} tokens "
182
+ f"(threshold: {compaction_threshold_tokens:,}, emergency: {emergency_threshold_tokens:,})")
183
+
184
+ # Check emergency threshold (force compaction even during tool use)
185
+ if current_tokens >= emergency_threshold_tokens:
186
+ logging.warning(f"EMERGENCY COMPACTION: {current_tokens:,}/{context_window:,} tokens "
187
+ f"({current_tokens/context_window*100:.1f}% of context window)")
188
+ if self.cli_interface:
189
+ self.cli_interface.print_warning(
190
+ f"Emergency compaction triggered at {current_tokens/context_window*100:.1f}% of context window"
191
+ )
192
+ return self._perform_compaction(conversation_id, model_id, provider, limits)
193
+
194
+ # Defer during tool use unless emergency
195
+ if in_tool_use_loop:
196
+ logging.debug(f"Deferring compaction during tool use loop "
197
+ f"({current_tokens:,}/{emergency_threshold_tokens:,} tokens)")
198
+ return False
199
+
200
+ # Normal threshold check
201
+ if current_tokens > compaction_threshold_tokens:
202
+ logging.info(f"Compaction triggered: {current_tokens:,}/{compaction_threshold_tokens:,} tokens "
203
+ f"({current_tokens/context_window*100:.1f}% of context window)")
204
+ return self._perform_compaction(conversation_id, model_id, provider, limits)
205
+
206
+ return False
207
+
208
+ def _perform_compaction(self, conversation_id: int, model_id: str,
209
+ provider: str, limits: Dict[str, int]) -> bool:
210
+ """
211
+ Perform the actual context compaction.
212
+
213
+ Args:
214
+ conversation_id: Conversation to compact
215
+ model_id: Current model ID
216
+ provider: Current provider name
217
+ limits: Context limits dict with 'context_window' and 'max_output'
218
+
219
+ Returns:
220
+ True if successful, False otherwise
221
+ """
222
+ start_time = datetime.now()
223
+
224
+ # Display progress
225
+ self._display_progress("🗜️ Starting intelligent context compaction...")
226
+ self._display_separator()
227
+
228
+ try:
229
+ # Get ALL messages (including previously compacted ones for full recompaction)
230
+ messages = self.database.get_conversation_messages(
231
+ conversation_id, include_rolled_up=True
232
+ )
233
+
234
+ if len(messages) <= 4:
235
+ logging.warning("Not enough messages to compact")
236
+ self._display_warning("Not enough messages to compact")
237
+ return False
238
+
239
+ # Calculate original metrics
240
+ original_token_count = sum(msg.get('token_count', 0) for msg in messages)
241
+ original_message_count = len(messages)
242
+
243
+ self._display_info(
244
+ f"Analysing {original_message_count} messages ({original_token_count:,} tokens)..."
245
+ )
246
+
247
+ # Format conversation history for compaction
248
+ conversation_history = self._format_messages_for_compaction(messages)
249
+
250
+ # Build the compaction prompt
251
+ compaction_prompt = self._build_compaction_prompt(
252
+ conversation_history,
253
+ original_message_count,
254
+ original_token_count
255
+ )
256
+
257
+ # Check provider rate limits before attempting compaction
258
+ rate_limit_check = self._check_rate_limits_for_compaction(
259
+ compaction_prompt, original_token_count
260
+ )
261
+ if not rate_limit_check['can_proceed']:
262
+ self._display_warning(rate_limit_check['message'])
263
+ logging.warning(f"Compaction skipped: {rate_limit_check['message']}")
264
+ return False
265
+
266
+ # Calculate max tokens for compacted output
267
+ # Use model's max_output but cap at reasonable size
268
+ max_compaction_tokens = min(
269
+ limits.get('max_output', 8192),
270
+ max(2000, int(original_token_count * self.compaction_ratio)),
271
+ 16000 # Absolute cap
272
+ )
273
+
274
+ # Estimate prompt size and validate against context window
275
+ context_window = limits.get('context_window', 8192)
276
+ prompt_tokens = 0
277
+ if hasattr(self.bedrock_service, 'count_tokens'):
278
+ try:
279
+ prompt_tokens = self.bedrock_service.count_tokens(compaction_prompt)
280
+ except Exception:
281
+ prompt_tokens = len(compaction_prompt) // 4 # Fallback estimate
282
+ else:
283
+ prompt_tokens = len(compaction_prompt) // 4
284
+
285
+ # Check if prompt exceeds context window (need room for output too)
286
+ max_input_tokens = context_window - max_compaction_tokens - 1000 # Safety buffer
287
+ if prompt_tokens > max_input_tokens:
288
+ logging.warning(
289
+ f"Compaction prompt ({prompt_tokens:,} tokens) too large for context window "
290
+ f"({context_window:,} tokens with {max_compaction_tokens:,} reserved for output)"
291
+ )
292
+ self._display_warning(
293
+ f"Conversation too large ({prompt_tokens:,} tokens) for compaction in a single pass. "
294
+ f"Context window: {context_window:,} tokens"
295
+ )
296
+ # Still proceed - let the API handle it and return a proper error
297
+ # The model might still be able to handle it or provide partial results
298
+
299
+ logging.info(
300
+ f"Compaction: input={prompt_tokens:,} tokens, target_output={max_compaction_tokens:,} tokens, "
301
+ f"context_window={context_window:,} tokens"
302
+ )
303
+
304
+ self._display_info(f"Generating compacted context (target: {max_compaction_tokens:,} tokens)...")
305
+
306
+ # Invoke LLM for compaction
307
+ response = self.bedrock_service.invoke_model(
308
+ [{'role': 'user', 'content': compaction_prompt}],
309
+ max_tokens=max_compaction_tokens,
310
+ temperature=0.2 # Low temperature for consistent compaction
311
+ )
312
+
313
+ # Check for error response
314
+ if not response:
315
+ logging.error("Compaction failed - null response from model")
316
+ self._display_error("Compaction failed - no response from model")
317
+ return False
318
+
319
+ if response.get('error'):
320
+ error_msg = response.get('error_message', 'Unknown error')
321
+ error_type = response.get('error_type', 'Unknown')
322
+ logging.error(f"Compaction failed - {error_type}: {error_msg}")
323
+ self._display_error(f"Compaction failed: {error_msg}")
324
+ return False
325
+
326
+ # Get content from response (may be in 'content' or 'content_blocks')
327
+ content = response.get('content', '')
328
+ if not content and response.get('content_blocks'):
329
+ # Try to extract text from content_blocks
330
+ for block in response.get('content_blocks', []):
331
+ if block.get('type') == 'text':
332
+ content += block.get('text', '')
333
+
334
+ if not content:
335
+ logging.error(f"Compaction failed - empty response. Response keys: {list(response.keys())}")
336
+ self._display_error("Compaction failed - no content in model response")
337
+ return False
338
+
339
+ compacted_content = content.strip()
340
+ compacted_token_count = self.bedrock_service.count_tokens(compacted_content)
341
+
342
+ # Validate compaction quality
343
+ if len(compacted_content) < 200:
344
+ logging.warning(f"Compacted content too brief ({len(compacted_content)} chars), aborting")
345
+ self._display_warning("Compacted content too brief, keeping original messages")
346
+ return False
347
+
348
+ # Create compaction marker
349
+ compaction_marker = self._create_compaction_marker(
350
+ original_message_count=original_message_count,
351
+ original_token_count=original_token_count,
352
+ compacted_token_count=compacted_token_count,
353
+ model_id=model_id,
354
+ context_window=limits['context_window']
355
+ )
356
+
357
+ # Add compacted context as special message
358
+ self.database.add_message(
359
+ conversation_id,
360
+ 'user',
361
+ f"[COMPACTED CONTEXT - {compaction_marker}]\n\n{compacted_content}",
362
+ compacted_token_count
363
+ )
364
+
365
+ # Mark all previous messages as compacted (rolled_up)
366
+ message_ids = [msg['id'] for msg in messages]
367
+ self.database.mark_messages_as_rolled_up(message_ids)
368
+
369
+ # Record compaction in rollup history
370
+ self.database.record_rollup(
371
+ conversation_id,
372
+ original_message_count,
373
+ compacted_content,
374
+ original_token_count,
375
+ compacted_token_count
376
+ )
377
+
378
+ # Recalculate total_tokens to fix any accounting errors from the rollup
379
+ # This is necessary because record_rollup uses incremental arithmetic that
380
+ # can become corrupted when include_rolled_up=True includes already-subtracted tokens
381
+ actual_token_count = self.database.recalculate_total_tokens(conversation_id)
382
+ logging.debug(f"Recalculated total_tokens after compaction: {actual_token_count:,}")
383
+
384
+ # Calculate metrics
385
+ elapsed_time = (datetime.now() - start_time).total_seconds()
386
+ reduction_pct = ((original_token_count - compacted_token_count) /
387
+ original_token_count * 100) if original_token_count > 0 else 0
388
+
389
+ # Log success
390
+ logging.info(f"Compaction completed in {elapsed_time:.1f}s: "
391
+ f"{original_message_count} messages → structured context, "
392
+ f"{original_token_count:,} → {compacted_token_count:,} tokens "
393
+ f"({reduction_pct:.1f}% reduction)")
394
+
395
+ # Display completion
396
+ self._display_success(
397
+ f"✓ Compaction complete: {original_message_count} messages → structured context"
398
+ )
399
+ self._display_info(
400
+ f"Token reduction: {original_token_count:,} → {compacted_token_count:,} "
401
+ f"({reduction_pct:.1f}% reduction)"
402
+ )
403
+ self._display_info(f"Completed in {elapsed_time:.1f} seconds")
404
+ self._display_separator()
405
+
406
+ return True
407
+
408
+ except Exception as e:
409
+ logging.error(f"Compaction failed with error: {e}", exc_info=True)
410
+ self._display_error(f"Compaction failed: {str(e)}")
411
+ return False
412
+
413
+ def _check_rate_limits_for_compaction(
414
+ self, compaction_prompt: str, original_token_count: int
415
+ ) -> Dict[str, Any]:
416
+ """
417
+ Check if the compaction request would exceed provider rate limits.
418
+
419
+ Args:
420
+ compaction_prompt: The full compaction prompt to be sent
421
+ original_token_count: Original token count of messages being compacted
422
+
423
+ Returns:
424
+ Dictionary with:
425
+ - can_proceed: bool - Whether compaction can proceed
426
+ - message: str - Explanation message
427
+ - estimated_tokens: int - Estimated input tokens for the request
428
+ """
429
+ # Get rate limits from the service
430
+ rate_limits = None
431
+ if hasattr(self.bedrock_service, 'get_rate_limits'):
432
+ rate_limits = self.bedrock_service.get_rate_limits()
433
+
434
+ # If no rate limits or provider doesn't have limits, proceed
435
+ if not rate_limits or not rate_limits.get('has_limits', False):
436
+ return {
437
+ 'can_proceed': True,
438
+ 'message': 'No rate limits detected',
439
+ 'estimated_tokens': original_token_count
440
+ }
441
+
442
+ # Estimate input tokens for the compaction request
443
+ # Use the service's token counter if available
444
+ if hasattr(self.bedrock_service, 'count_tokens'):
445
+ try:
446
+ estimated_tokens = self.bedrock_service.count_tokens(compaction_prompt)
447
+ except Exception:
448
+ # Fallback: estimate at 4 chars per token
449
+ estimated_tokens = len(compaction_prompt) // 4
450
+ else:
451
+ estimated_tokens = len(compaction_prompt) // 4
452
+
453
+ # Get input token limit
454
+ input_limit = rate_limits.get('input_tokens_per_minute')
455
+
456
+ if input_limit and estimated_tokens > input_limit:
457
+ # Request exceeds rate limit - cannot proceed
458
+ provider_name = "Anthropic Direct"
459
+ if hasattr(self.bedrock_service, 'get_provider_name'):
460
+ provider_name = self.bedrock_service.get_provider_name()
461
+ elif hasattr(self.bedrock_service, 'get_active_provider'):
462
+ provider_name = self.bedrock_service.get_active_provider() or provider_name
463
+
464
+ message = (
465
+ f"Compaction request ({estimated_tokens:,} tokens) exceeds {provider_name} "
466
+ f"rate limit ({input_limit:,} tokens/minute). "
467
+ f"Consider using AWS Bedrock which has higher rate limits, "
468
+ f"or wait for the conversation to naturally reduce in size."
469
+ )
470
+
471
+ logging.warning(
472
+ f"Compaction blocked: {estimated_tokens:,} tokens exceeds "
473
+ f"{input_limit:,} token rate limit for {provider_name}"
474
+ )
475
+
476
+ return {
477
+ 'can_proceed': False,
478
+ 'message': message,
479
+ 'estimated_tokens': estimated_tokens,
480
+ 'rate_limit': input_limit
481
+ }
482
+
483
+ # Within limits, can proceed
484
+ return {
485
+ 'can_proceed': True,
486
+ 'message': f'Request within rate limits ({estimated_tokens:,}/{input_limit:,} tokens)',
487
+ 'estimated_tokens': estimated_tokens,
488
+ 'rate_limit': input_limit
489
+ }
490
+
491
+ def _format_messages_for_compaction(self, messages: List[Dict]) -> str:
492
+ """
493
+ Format messages into readable conversation history for compaction.
494
+
495
+ Handles different message types including tool_use, tool_result,
496
+ and regular messages.
497
+
498
+ Args:
499
+ messages: List of message dictionaries from database
500
+
501
+ Returns:
502
+ Formatted conversation history string
503
+ """
504
+ formatted_lines = []
505
+ message_number = 0
506
+
507
+ for msg in messages:
508
+ message_number += 1
509
+ role = msg.get('role', 'unknown').upper()
510
+ content = msg.get('content', '')
511
+ timestamp = msg.get('timestamp', '')
512
+
513
+ # Format timestamp if available
514
+ time_str = ""
515
+ if timestamp:
516
+ try:
517
+ if isinstance(timestamp, str):
518
+ dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
519
+ else:
520
+ dt = timestamp
521
+ time_str = f" [{dt.strftime('%Y-%m-%d %H:%M')}]"
522
+ except (ValueError, AttributeError):
523
+ pass
524
+
525
+ # Check for previously compacted content
526
+ if content.startswith('[COMPACTED CONTEXT'):
527
+ formatted_lines.append(f"\n--- PREVIOUS COMPACTION{time_str} ---")
528
+ # Extract just the summary sections, not the full compacted content
529
+ formatted_lines.append("[Previous conversation was compacted - key points preserved below]")
530
+ # Include a truncated version of the compacted content
531
+ compacted_preview = content[:2000] + "..." if len(content) > 2000 else content
532
+ formatted_lines.append(compacted_preview)
533
+ formatted_lines.append("--- END PREVIOUS COMPACTION ---\n")
534
+ continue
535
+
536
+ # Check for tool results
537
+ if content.startswith('[TOOL_RESULTS]'):
538
+ formatted_lines.append(f"\n[{role}]{time_str} Tool Results:")
539
+ try:
540
+ tool_results_json = content.replace('[TOOL_RESULTS]', '', 1)
541
+ tool_results = json.loads(tool_results_json)
542
+ if isinstance(tool_results, list):
543
+ for i, result in enumerate(tool_results, 1):
544
+ if isinstance(result, dict) and result.get('type') == 'tool_result':
545
+ tool_id = result.get('tool_use_id', 'unknown')[:8]
546
+ result_content = result.get('content', '')
547
+ # Truncate long tool results
548
+ if len(str(result_content)) > 500:
549
+ result_content = str(result_content)[:500] + "... [truncated]"
550
+ formatted_lines.append(f" Result {i} (tool:{tool_id}): {result_content}")
551
+ except json.JSONDecodeError:
552
+ formatted_lines.append(f" [Raw tool results - {len(content)} chars]")
553
+ continue
554
+
555
+ # Check for tool use blocks
556
+ if role == 'ASSISTANT' and content.startswith('['):
557
+ try:
558
+ content_blocks = json.loads(content)
559
+ if isinstance(content_blocks, list):
560
+ text_parts = []
561
+ tool_calls = []
562
+ for block in content_blocks:
563
+ if isinstance(block, dict):
564
+ if block.get('type') == 'text':
565
+ text_parts.append(block.get('text', ''))
566
+ elif block.get('type') == 'tool_use':
567
+ tool_name = block.get('name', 'unknown')
568
+ tool_input = block.get('input', {})
569
+ # Summarise tool input
570
+ input_summary = self._summarise_tool_input(tool_input)
571
+ tool_calls.append(f"{tool_name}({input_summary})")
572
+
573
+ if text_parts:
574
+ formatted_lines.append(f"\n[{role}]{time_str}")
575
+ formatted_lines.append(''.join(text_parts))
576
+ if tool_calls:
577
+ formatted_lines.append(f"[Tool calls: {', '.join(tool_calls)}]")
578
+ continue
579
+ except json.JSONDecodeError:
580
+ pass # Not JSON, treat as regular message
581
+
582
+ # Check for rollup summaries
583
+ if content.startswith('[Summary of previous conversation]'):
584
+ formatted_lines.append(f"\n--- PREVIOUS SUMMARY{time_str} ---")
585
+ formatted_lines.append(content)
586
+ formatted_lines.append("--- END PREVIOUS SUMMARY ---\n")
587
+ continue
588
+
589
+ # Regular message
590
+ formatted_lines.append(f"\n[{role}]{time_str}")
591
+ # Truncate very long messages
592
+ if len(content) > 3000:
593
+ formatted_lines.append(content[:3000] + "\n... [message truncated, {0} more chars]".format(len(content) - 3000))
594
+ else:
595
+ formatted_lines.append(content)
596
+
597
+ return '\n'.join(formatted_lines)
598
+
599
+ def _summarise_tool_input(self, tool_input: Dict) -> str:
600
+ """
601
+ Create a brief summary of tool input for readability.
602
+
603
+ Args:
604
+ tool_input: Tool input dictionary
605
+
606
+ Returns:
607
+ Brief summary string
608
+ """
609
+ if not tool_input:
610
+ return ""
611
+
612
+ # Get key parameters, truncate long values
613
+ parts = []
614
+ for key, value in list(tool_input.items())[:3]: # Max 3 params
615
+ value_str = str(value)
616
+ if len(value_str) > 50:
617
+ value_str = value_str[:50] + "..."
618
+ parts.append(f"{key}={value_str}")
619
+
620
+ if len(tool_input) > 3:
621
+ parts.append(f"...+{len(tool_input) - 3} more")
622
+
623
+ return ', '.join(parts)
624
+
625
+ def _build_compaction_prompt(self, conversation_history: str,
626
+ message_count: int, token_count: int) -> str:
627
+ """
628
+ Build the single-pass compaction prompt.
629
+
630
+ Args:
631
+ conversation_history: Formatted conversation history
632
+ message_count: Number of messages being compacted
633
+ token_count: Approximate token count
634
+
635
+ Returns:
636
+ Complete compaction prompt
637
+ """
638
+ return COMPACTION_PROMPT_TEMPLATE.format(
639
+ message_count=message_count,
640
+ token_count=token_count,
641
+ conversation_history=conversation_history
642
+ )
643
+
644
+ def _create_compaction_marker(self, original_message_count: int,
645
+ original_token_count: int,
646
+ compacted_token_count: int,
647
+ model_id: str,
648
+ context_window: int) -> str:
649
+ """
650
+ Create a marker string for the compaction event.
651
+
652
+ Args:
653
+ original_message_count: Number of messages compacted
654
+ original_token_count: Original token count
655
+ compacted_token_count: Compacted token count
656
+ model_id: Model used for compaction
657
+ context_window: Model's context window size
658
+
659
+ Returns:
660
+ Formatted marker string
661
+ """
662
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
663
+ reduction_pct = ((original_token_count - compacted_token_count) /
664
+ original_token_count * 100) if original_token_count > 0 else 0
665
+
666
+ return (f"Compacted at {timestamp} | "
667
+ f"{original_message_count} messages | "
668
+ f"{original_token_count:,} → {compacted_token_count:,} tokens "
669
+ f"({reduction_pct:.1f}% reduction) | "
670
+ f"Context: {context_window:,} tokens")
671
+
672
+ # UI Helper Methods
673
+
674
+ def _display_progress(self, message: str):
675
+ """Display progress message via available interface."""
676
+ if self.cli_interface:
677
+ self.cli_interface.print_info(message)
678
+ # Web interface handling would go here if needed
679
+
680
+ def _display_info(self, message: str):
681
+ """Display info message via available interface."""
682
+ if self.cli_interface:
683
+ self.cli_interface.print_info(message)
684
+
685
+ def _display_success(self, message: str):
686
+ """Display success message via available interface."""
687
+ if self.cli_interface:
688
+ self.cli_interface.print_success(message)
689
+
690
+ def _display_warning(self, message: str):
691
+ """Display warning message via available interface."""
692
+ if self.cli_interface:
693
+ self.cli_interface.print_warning(message)
694
+
695
+ def _display_error(self, message: str):
696
+ """Display error message via available interface."""
697
+ if self.cli_interface:
698
+ self.cli_interface.print_error(message)
699
+
700
+ def _display_separator(self):
701
+ """Display separator via available interface."""
702
+ if self.cli_interface:
703
+ self.cli_interface.print_separator("─")
704
+
705
+
706
+ def get_provider_from_model_id(model_id: str) -> str:
707
+ """
708
+ Attempt to determine provider from model ID.
709
+
710
+ Args:
711
+ model_id: The model identifier
712
+
713
+ Returns:
714
+ Provider name string
715
+ """
716
+ model_lower = model_id.lower()
717
+
718
+ # Check for Anthropic/Claude models
719
+ if 'claude' in model_lower or 'anthropic' in model_lower:
720
+ return 'anthropic'
721
+
722
+ # Check for Bedrock-specific patterns
723
+ if 'amazon.' in model_lower or 'titan' in model_lower:
724
+ return 'aws_bedrock'
725
+ if 'meta.' in model_lower or 'llama' in model_lower:
726
+ return 'aws_bedrock'
727
+ if 'mistral.' in model_lower:
728
+ return 'aws_bedrock'
729
+ if 'cohere.' in model_lower:
730
+ return 'aws_bedrock'
731
+ if 'ai21.' in model_lower:
732
+ return 'aws_bedrock'
733
+
734
+ # Default to ollama for simple model names
735
+ return 'ollama'