dtSpark 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtSpark/__init__.py +0 -0
- dtSpark/_description.txt +1 -0
- dtSpark/_full_name.txt +1 -0
- dtSpark/_licence.txt +21 -0
- dtSpark/_metadata.yaml +6 -0
- dtSpark/_name.txt +1 -0
- dtSpark/_version.txt +1 -0
- dtSpark/aws/__init__.py +7 -0
- dtSpark/aws/authentication.py +296 -0
- dtSpark/aws/bedrock.py +578 -0
- dtSpark/aws/costs.py +318 -0
- dtSpark/aws/pricing.py +580 -0
- dtSpark/cli_interface.py +2645 -0
- dtSpark/conversation_manager.py +3050 -0
- dtSpark/core/__init__.py +12 -0
- dtSpark/core/application.py +3355 -0
- dtSpark/core/context_compaction.py +735 -0
- dtSpark/daemon/__init__.py +104 -0
- dtSpark/daemon/__main__.py +10 -0
- dtSpark/daemon/action_monitor.py +213 -0
- dtSpark/daemon/daemon_app.py +730 -0
- dtSpark/daemon/daemon_manager.py +289 -0
- dtSpark/daemon/execution_coordinator.py +194 -0
- dtSpark/daemon/pid_file.py +169 -0
- dtSpark/database/__init__.py +482 -0
- dtSpark/database/autonomous_actions.py +1191 -0
- dtSpark/database/backends.py +329 -0
- dtSpark/database/connection.py +122 -0
- dtSpark/database/conversations.py +520 -0
- dtSpark/database/credential_prompt.py +218 -0
- dtSpark/database/files.py +205 -0
- dtSpark/database/mcp_ops.py +355 -0
- dtSpark/database/messages.py +161 -0
- dtSpark/database/schema.py +673 -0
- dtSpark/database/tool_permissions.py +186 -0
- dtSpark/database/usage.py +167 -0
- dtSpark/files/__init__.py +4 -0
- dtSpark/files/manager.py +322 -0
- dtSpark/launch.py +39 -0
- dtSpark/limits/__init__.py +10 -0
- dtSpark/limits/costs.py +296 -0
- dtSpark/limits/tokens.py +342 -0
- dtSpark/llm/__init__.py +17 -0
- dtSpark/llm/anthropic_direct.py +446 -0
- dtSpark/llm/base.py +146 -0
- dtSpark/llm/context_limits.py +438 -0
- dtSpark/llm/manager.py +177 -0
- dtSpark/llm/ollama.py +578 -0
- dtSpark/mcp_integration/__init__.py +5 -0
- dtSpark/mcp_integration/manager.py +653 -0
- dtSpark/mcp_integration/tool_selector.py +225 -0
- dtSpark/resources/config.yaml.template +631 -0
- dtSpark/safety/__init__.py +22 -0
- dtSpark/safety/llm_service.py +111 -0
- dtSpark/safety/patterns.py +229 -0
- dtSpark/safety/prompt_inspector.py +442 -0
- dtSpark/safety/violation_logger.py +346 -0
- dtSpark/scheduler/__init__.py +20 -0
- dtSpark/scheduler/creation_tools.py +599 -0
- dtSpark/scheduler/execution_queue.py +159 -0
- dtSpark/scheduler/executor.py +1152 -0
- dtSpark/scheduler/manager.py +395 -0
- dtSpark/tools/__init__.py +4 -0
- dtSpark/tools/builtin.py +833 -0
- dtSpark/web/__init__.py +20 -0
- dtSpark/web/auth.py +152 -0
- dtSpark/web/dependencies.py +37 -0
- dtSpark/web/endpoints/__init__.py +17 -0
- dtSpark/web/endpoints/autonomous_actions.py +1125 -0
- dtSpark/web/endpoints/chat.py +621 -0
- dtSpark/web/endpoints/conversations.py +353 -0
- dtSpark/web/endpoints/main_menu.py +547 -0
- dtSpark/web/endpoints/streaming.py +421 -0
- dtSpark/web/server.py +578 -0
- dtSpark/web/session.py +167 -0
- dtSpark/web/ssl_utils.py +195 -0
- dtSpark/web/static/css/dark-theme.css +427 -0
- dtSpark/web/static/js/actions.js +1101 -0
- dtSpark/web/static/js/chat.js +614 -0
- dtSpark/web/static/js/main.js +496 -0
- dtSpark/web/static/js/sse-client.js +242 -0
- dtSpark/web/templates/actions.html +408 -0
- dtSpark/web/templates/base.html +93 -0
- dtSpark/web/templates/chat.html +814 -0
- dtSpark/web/templates/conversations.html +350 -0
- dtSpark/web/templates/goodbye.html +81 -0
- dtSpark/web/templates/login.html +90 -0
- dtSpark/web/templates/main_menu.html +983 -0
- dtSpark/web/templates/new_conversation.html +191 -0
- dtSpark/web/web_interface.py +137 -0
- dtspark-1.0.4.dist-info/METADATA +187 -0
- dtspark-1.0.4.dist-info/RECORD +96 -0
- dtspark-1.0.4.dist-info/WHEEL +5 -0
- dtspark-1.0.4.dist-info/entry_points.txt +3 -0
- dtspark-1.0.4.dist-info/licenses/LICENSE +21 -0
- dtspark-1.0.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,735 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Context compaction module for intelligent conversation history management.
|
|
3
|
+
|
|
4
|
+
This module implements a single-pass LLM-driven compaction system that:
|
|
5
|
+
- Analyses conversation history for importance categorisation
|
|
6
|
+
- Selectively preserves critical information (architectural decisions, bugs, implementation details)
|
|
7
|
+
- Compresses less critical information (resolved tasks, exploration)
|
|
8
|
+
- Discards redundant information (duplicates, superseded decisions)
|
|
9
|
+
|
|
10
|
+
Design Goals:
|
|
11
|
+
Compaction is designed to "distill the contents of a context window in a high-fidelity manner,
|
|
12
|
+
enabling the agent to continue with minimal performance degradation."
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
import json
|
|
17
|
+
import re
|
|
18
|
+
from typing import Dict, List, Optional, Any, Tuple
|
|
19
|
+
from datetime import datetime
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# Compaction prompt template for single-pass intelligent compaction
|
|
23
|
+
COMPACTION_PROMPT_TEMPLATE = '''You are performing context compaction for an ongoing conversation. Your task is to distill the conversation history into a high-fidelity compressed format that enables continuation with minimal performance degradation.
|
|
24
|
+
|
|
25
|
+
## CATEGORISATION RULES
|
|
26
|
+
|
|
27
|
+
Analyse each segment of the conversation and categorise it:
|
|
28
|
+
|
|
29
|
+
### MUST PRESERVE (Full Fidelity)
|
|
30
|
+
- **Architectural Decisions**: Any decisions about system design, patterns, or structure with their rationale
|
|
31
|
+
- **Unresolved Issues**: Bugs, errors, or problems that have not been fully resolved
|
|
32
|
+
- **Implementation Details**: Specific code paths, file locations, configurations that affect future work
|
|
33
|
+
- **User Preferences**: Explicit requests, constraints, or preferences stated by the user
|
|
34
|
+
- **Critical Data**: Important numbers, calculations, findings, and their sources
|
|
35
|
+
- **Active Tasks**: Work in progress, next steps, or pending actions
|
|
36
|
+
- **Error Context**: Error messages, stack traces, and debugging information for unresolved issues
|
|
37
|
+
|
|
38
|
+
### COMPRESS (Reduced Fidelity)
|
|
39
|
+
- **Resolved Tasks**: Brief outcome note only (e.g., "Fixed authentication bug in auth.py")
|
|
40
|
+
- **Exploratory Discussion**: Conclusions only, not the exploration process
|
|
41
|
+
- **Tool Outputs**: Key findings only, not raw output
|
|
42
|
+
- **Explanations**: Final understanding only, not iterative clarification
|
|
43
|
+
|
|
44
|
+
### DISCARD
|
|
45
|
+
- **Redundant Information**: Duplicate tool outputs, repeated explanations
|
|
46
|
+
- **Superseded Decisions**: Earlier decisions that were later changed
|
|
47
|
+
- **Verbose Completions**: Detailed explanations of work that is finished and won't be referenced
|
|
48
|
+
- **Pleasantries**: Greetings, acknowledgments, conversational filler
|
|
49
|
+
|
|
50
|
+
## OUTPUT FORMAT
|
|
51
|
+
|
|
52
|
+
Produce a structured compacted context in the following format:
|
|
53
|
+
|
|
54
|
+
# COMPACTED CONTEXT
|
|
55
|
+
|
|
56
|
+
## Critical Decisions & Architecture
|
|
57
|
+
[List architectural decisions with brief rationale - preserve exact details]
|
|
58
|
+
|
|
59
|
+
## Unresolved Issues
|
|
60
|
+
[List any bugs, errors, or problems still being worked on - preserve full context]
|
|
61
|
+
|
|
62
|
+
## Implementation State
|
|
63
|
+
[Current state of implementation: what's done, what's in progress, key file paths]
|
|
64
|
+
|
|
65
|
+
## Key Data & Findings
|
|
66
|
+
[Important numbers, calculations, discoveries with sources - preserve exact values]
|
|
67
|
+
|
|
68
|
+
## User Preferences & Constraints
|
|
69
|
+
[Explicit user requirements and constraints]
|
|
70
|
+
|
|
71
|
+
## Recent Context Summary
|
|
72
|
+
[Brief summary of the most recent exchanges not covered above]
|
|
73
|
+
|
|
74
|
+
## Discarded Topics
|
|
75
|
+
[List topics that were discussed but are no longer relevant - titles only for reference]
|
|
76
|
+
|
|
77
|
+
## CONVERSATION TO COMPACT
|
|
78
|
+
|
|
79
|
+
The original conversation contained {message_count} messages with approximately {token_count:,} tokens.
|
|
80
|
+
|
|
81
|
+
{conversation_history}
|
|
82
|
+
|
|
83
|
+
## INSTRUCTIONS
|
|
84
|
+
|
|
85
|
+
1. Read through the entire conversation carefully
|
|
86
|
+
2. Categorise each meaningful segment according to the rules above
|
|
87
|
+
3. PRESERVE critical information with HIGH FIDELITY - do not lose important details
|
|
88
|
+
4. COMPRESS resolved/completed items to brief summaries
|
|
89
|
+
5. DISCARD redundant and superseded information
|
|
90
|
+
6. Output the structured compacted context
|
|
91
|
+
7. Ensure the compacted context contains ALL information needed to continue the conversation effectively
|
|
92
|
+
8. For numerical data, preserve EXACT values - do not round or approximate
|
|
93
|
+
|
|
94
|
+
Begin your compacted context output now:'''
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class ContextCompactor:
|
|
98
|
+
"""
|
|
99
|
+
Manages intelligent context compaction for conversations.
|
|
100
|
+
|
|
101
|
+
This class implements a single-pass LLM-driven compaction that analyses
|
|
102
|
+
conversation history and produces a structured, compressed representation
|
|
103
|
+
while preserving critical information.
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
def __init__(self, bedrock_service, database, context_limit_resolver,
|
|
107
|
+
cli_interface=None, web_interface=None,
|
|
108
|
+
compaction_threshold: float = 0.7,
|
|
109
|
+
emergency_threshold: float = 0.95,
|
|
110
|
+
compaction_ratio: float = 0.3):
|
|
111
|
+
"""
|
|
112
|
+
Initialise the context compactor.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
bedrock_service: Service for LLM invocation
|
|
116
|
+
database: Database instance for message storage
|
|
117
|
+
context_limit_resolver: ContextLimitResolver instance for model limits
|
|
118
|
+
cli_interface: Optional CLI interface for progress display
|
|
119
|
+
web_interface: Optional web interface for progress display
|
|
120
|
+
compaction_threshold: Fraction of context window to trigger compaction (default 0.7)
|
|
121
|
+
emergency_threshold: Fraction of context window for emergency compaction (default 0.95)
|
|
122
|
+
compaction_ratio: Target ratio for compacted content (default 0.3)
|
|
123
|
+
"""
|
|
124
|
+
self.bedrock_service = bedrock_service
|
|
125
|
+
self.database = database
|
|
126
|
+
self.context_limit_resolver = context_limit_resolver
|
|
127
|
+
self.cli_interface = cli_interface
|
|
128
|
+
self.web_interface = web_interface
|
|
129
|
+
self.compaction_threshold = compaction_threshold
|
|
130
|
+
self.emergency_threshold = emergency_threshold
|
|
131
|
+
self.compaction_ratio = compaction_ratio
|
|
132
|
+
|
|
133
|
+
logging.info(f"ContextCompactor initialised with threshold={compaction_threshold}, "
|
|
134
|
+
f"emergency={emergency_threshold}, ratio={compaction_ratio}")
|
|
135
|
+
|
|
136
|
+
def update_service(self, bedrock_service):
|
|
137
|
+
"""
|
|
138
|
+
Update the LLM service used for compaction.
|
|
139
|
+
|
|
140
|
+
This should be called when the active provider changes.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
bedrock_service: The new LLM service to use
|
|
144
|
+
"""
|
|
145
|
+
old_provider = "unknown"
|
|
146
|
+
new_provider = "unknown"
|
|
147
|
+
|
|
148
|
+
if self.bedrock_service and hasattr(self.bedrock_service, 'get_provider_name'):
|
|
149
|
+
old_provider = self.bedrock_service.get_provider_name()
|
|
150
|
+
if bedrock_service and hasattr(bedrock_service, 'get_provider_name'):
|
|
151
|
+
new_provider = bedrock_service.get_provider_name()
|
|
152
|
+
|
|
153
|
+
self.bedrock_service = bedrock_service
|
|
154
|
+
logging.info(f"ContextCompactor service updated: {old_provider} -> {new_provider}")
|
|
155
|
+
|
|
156
|
+
def check_and_compact(self, conversation_id: int, model_id: str,
|
|
157
|
+
provider: str, in_tool_use_loop: bool = False) -> bool:
|
|
158
|
+
"""
|
|
159
|
+
Check if compaction is needed and perform it.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
conversation_id: Current conversation ID
|
|
163
|
+
model_id: Current model ID
|
|
164
|
+
provider: Current provider name
|
|
165
|
+
in_tool_use_loop: Whether currently in tool use sequence
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
True if compaction was performed, False otherwise
|
|
169
|
+
"""
|
|
170
|
+
# Get context limits for current model
|
|
171
|
+
limits = self.context_limit_resolver.get_context_limits(model_id, provider)
|
|
172
|
+
context_window = limits['context_window']
|
|
173
|
+
|
|
174
|
+
# Calculate current token usage
|
|
175
|
+
current_tokens = self.database.get_conversation_token_count(conversation_id)
|
|
176
|
+
|
|
177
|
+
# Calculate thresholds
|
|
178
|
+
compaction_threshold_tokens = int(context_window * self.compaction_threshold)
|
|
179
|
+
emergency_threshold_tokens = int(context_window * self.emergency_threshold)
|
|
180
|
+
|
|
181
|
+
logging.debug(f"Compaction check: {current_tokens:,}/{context_window:,} tokens "
|
|
182
|
+
f"(threshold: {compaction_threshold_tokens:,}, emergency: {emergency_threshold_tokens:,})")
|
|
183
|
+
|
|
184
|
+
# Check emergency threshold (force compaction even during tool use)
|
|
185
|
+
if current_tokens >= emergency_threshold_tokens:
|
|
186
|
+
logging.warning(f"EMERGENCY COMPACTION: {current_tokens:,}/{context_window:,} tokens "
|
|
187
|
+
f"({current_tokens/context_window*100:.1f}% of context window)")
|
|
188
|
+
if self.cli_interface:
|
|
189
|
+
self.cli_interface.print_warning(
|
|
190
|
+
f"Emergency compaction triggered at {current_tokens/context_window*100:.1f}% of context window"
|
|
191
|
+
)
|
|
192
|
+
return self._perform_compaction(conversation_id, model_id, provider, limits)
|
|
193
|
+
|
|
194
|
+
# Defer during tool use unless emergency
|
|
195
|
+
if in_tool_use_loop:
|
|
196
|
+
logging.debug(f"Deferring compaction during tool use loop "
|
|
197
|
+
f"({current_tokens:,}/{emergency_threshold_tokens:,} tokens)")
|
|
198
|
+
return False
|
|
199
|
+
|
|
200
|
+
# Normal threshold check
|
|
201
|
+
if current_tokens > compaction_threshold_tokens:
|
|
202
|
+
logging.info(f"Compaction triggered: {current_tokens:,}/{compaction_threshold_tokens:,} tokens "
|
|
203
|
+
f"({current_tokens/context_window*100:.1f}% of context window)")
|
|
204
|
+
return self._perform_compaction(conversation_id, model_id, provider, limits)
|
|
205
|
+
|
|
206
|
+
return False
|
|
207
|
+
|
|
208
|
+
def _perform_compaction(self, conversation_id: int, model_id: str,
|
|
209
|
+
provider: str, limits: Dict[str, int]) -> bool:
|
|
210
|
+
"""
|
|
211
|
+
Perform the actual context compaction.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
conversation_id: Conversation to compact
|
|
215
|
+
model_id: Current model ID
|
|
216
|
+
provider: Current provider name
|
|
217
|
+
limits: Context limits dict with 'context_window' and 'max_output'
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
True if successful, False otherwise
|
|
221
|
+
"""
|
|
222
|
+
start_time = datetime.now()
|
|
223
|
+
|
|
224
|
+
# Display progress
|
|
225
|
+
self._display_progress("🗜️ Starting intelligent context compaction...")
|
|
226
|
+
self._display_separator()
|
|
227
|
+
|
|
228
|
+
try:
|
|
229
|
+
# Get ALL messages (including previously compacted ones for full recompaction)
|
|
230
|
+
messages = self.database.get_conversation_messages(
|
|
231
|
+
conversation_id, include_rolled_up=True
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
if len(messages) <= 4:
|
|
235
|
+
logging.warning("Not enough messages to compact")
|
|
236
|
+
self._display_warning("Not enough messages to compact")
|
|
237
|
+
return False
|
|
238
|
+
|
|
239
|
+
# Calculate original metrics
|
|
240
|
+
original_token_count = sum(msg.get('token_count', 0) for msg in messages)
|
|
241
|
+
original_message_count = len(messages)
|
|
242
|
+
|
|
243
|
+
self._display_info(
|
|
244
|
+
f"Analysing {original_message_count} messages ({original_token_count:,} tokens)..."
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Format conversation history for compaction
|
|
248
|
+
conversation_history = self._format_messages_for_compaction(messages)
|
|
249
|
+
|
|
250
|
+
# Build the compaction prompt
|
|
251
|
+
compaction_prompt = self._build_compaction_prompt(
|
|
252
|
+
conversation_history,
|
|
253
|
+
original_message_count,
|
|
254
|
+
original_token_count
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# Check provider rate limits before attempting compaction
|
|
258
|
+
rate_limit_check = self._check_rate_limits_for_compaction(
|
|
259
|
+
compaction_prompt, original_token_count
|
|
260
|
+
)
|
|
261
|
+
if not rate_limit_check['can_proceed']:
|
|
262
|
+
self._display_warning(rate_limit_check['message'])
|
|
263
|
+
logging.warning(f"Compaction skipped: {rate_limit_check['message']}")
|
|
264
|
+
return False
|
|
265
|
+
|
|
266
|
+
# Calculate max tokens for compacted output
|
|
267
|
+
# Use model's max_output but cap at reasonable size
|
|
268
|
+
max_compaction_tokens = min(
|
|
269
|
+
limits.get('max_output', 8192),
|
|
270
|
+
max(2000, int(original_token_count * self.compaction_ratio)),
|
|
271
|
+
16000 # Absolute cap
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
# Estimate prompt size and validate against context window
|
|
275
|
+
context_window = limits.get('context_window', 8192)
|
|
276
|
+
prompt_tokens = 0
|
|
277
|
+
if hasattr(self.bedrock_service, 'count_tokens'):
|
|
278
|
+
try:
|
|
279
|
+
prompt_tokens = self.bedrock_service.count_tokens(compaction_prompt)
|
|
280
|
+
except Exception:
|
|
281
|
+
prompt_tokens = len(compaction_prompt) // 4 # Fallback estimate
|
|
282
|
+
else:
|
|
283
|
+
prompt_tokens = len(compaction_prompt) // 4
|
|
284
|
+
|
|
285
|
+
# Check if prompt exceeds context window (need room for output too)
|
|
286
|
+
max_input_tokens = context_window - max_compaction_tokens - 1000 # Safety buffer
|
|
287
|
+
if prompt_tokens > max_input_tokens:
|
|
288
|
+
logging.warning(
|
|
289
|
+
f"Compaction prompt ({prompt_tokens:,} tokens) too large for context window "
|
|
290
|
+
f"({context_window:,} tokens with {max_compaction_tokens:,} reserved for output)"
|
|
291
|
+
)
|
|
292
|
+
self._display_warning(
|
|
293
|
+
f"Conversation too large ({prompt_tokens:,} tokens) for compaction in a single pass. "
|
|
294
|
+
f"Context window: {context_window:,} tokens"
|
|
295
|
+
)
|
|
296
|
+
# Still proceed - let the API handle it and return a proper error
|
|
297
|
+
# The model might still be able to handle it or provide partial results
|
|
298
|
+
|
|
299
|
+
logging.info(
|
|
300
|
+
f"Compaction: input={prompt_tokens:,} tokens, target_output={max_compaction_tokens:,} tokens, "
|
|
301
|
+
f"context_window={context_window:,} tokens"
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
self._display_info(f"Generating compacted context (target: {max_compaction_tokens:,} tokens)...")
|
|
305
|
+
|
|
306
|
+
# Invoke LLM for compaction
|
|
307
|
+
response = self.bedrock_service.invoke_model(
|
|
308
|
+
[{'role': 'user', 'content': compaction_prompt}],
|
|
309
|
+
max_tokens=max_compaction_tokens,
|
|
310
|
+
temperature=0.2 # Low temperature for consistent compaction
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
# Check for error response
|
|
314
|
+
if not response:
|
|
315
|
+
logging.error("Compaction failed - null response from model")
|
|
316
|
+
self._display_error("Compaction failed - no response from model")
|
|
317
|
+
return False
|
|
318
|
+
|
|
319
|
+
if response.get('error'):
|
|
320
|
+
error_msg = response.get('error_message', 'Unknown error')
|
|
321
|
+
error_type = response.get('error_type', 'Unknown')
|
|
322
|
+
logging.error(f"Compaction failed - {error_type}: {error_msg}")
|
|
323
|
+
self._display_error(f"Compaction failed: {error_msg}")
|
|
324
|
+
return False
|
|
325
|
+
|
|
326
|
+
# Get content from response (may be in 'content' or 'content_blocks')
|
|
327
|
+
content = response.get('content', '')
|
|
328
|
+
if not content and response.get('content_blocks'):
|
|
329
|
+
# Try to extract text from content_blocks
|
|
330
|
+
for block in response.get('content_blocks', []):
|
|
331
|
+
if block.get('type') == 'text':
|
|
332
|
+
content += block.get('text', '')
|
|
333
|
+
|
|
334
|
+
if not content:
|
|
335
|
+
logging.error(f"Compaction failed - empty response. Response keys: {list(response.keys())}")
|
|
336
|
+
self._display_error("Compaction failed - no content in model response")
|
|
337
|
+
return False
|
|
338
|
+
|
|
339
|
+
compacted_content = content.strip()
|
|
340
|
+
compacted_token_count = self.bedrock_service.count_tokens(compacted_content)
|
|
341
|
+
|
|
342
|
+
# Validate compaction quality
|
|
343
|
+
if len(compacted_content) < 200:
|
|
344
|
+
logging.warning(f"Compacted content too brief ({len(compacted_content)} chars), aborting")
|
|
345
|
+
self._display_warning("Compacted content too brief, keeping original messages")
|
|
346
|
+
return False
|
|
347
|
+
|
|
348
|
+
# Create compaction marker
|
|
349
|
+
compaction_marker = self._create_compaction_marker(
|
|
350
|
+
original_message_count=original_message_count,
|
|
351
|
+
original_token_count=original_token_count,
|
|
352
|
+
compacted_token_count=compacted_token_count,
|
|
353
|
+
model_id=model_id,
|
|
354
|
+
context_window=limits['context_window']
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
# Add compacted context as special message
|
|
358
|
+
self.database.add_message(
|
|
359
|
+
conversation_id,
|
|
360
|
+
'user',
|
|
361
|
+
f"[COMPACTED CONTEXT - {compaction_marker}]\n\n{compacted_content}",
|
|
362
|
+
compacted_token_count
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
# Mark all previous messages as compacted (rolled_up)
|
|
366
|
+
message_ids = [msg['id'] for msg in messages]
|
|
367
|
+
self.database.mark_messages_as_rolled_up(message_ids)
|
|
368
|
+
|
|
369
|
+
# Record compaction in rollup history
|
|
370
|
+
self.database.record_rollup(
|
|
371
|
+
conversation_id,
|
|
372
|
+
original_message_count,
|
|
373
|
+
compacted_content,
|
|
374
|
+
original_token_count,
|
|
375
|
+
compacted_token_count
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
# Recalculate total_tokens to fix any accounting errors from the rollup
|
|
379
|
+
# This is necessary because record_rollup uses incremental arithmetic that
|
|
380
|
+
# can become corrupted when include_rolled_up=True includes already-subtracted tokens
|
|
381
|
+
actual_token_count = self.database.recalculate_total_tokens(conversation_id)
|
|
382
|
+
logging.debug(f"Recalculated total_tokens after compaction: {actual_token_count:,}")
|
|
383
|
+
|
|
384
|
+
# Calculate metrics
|
|
385
|
+
elapsed_time = (datetime.now() - start_time).total_seconds()
|
|
386
|
+
reduction_pct = ((original_token_count - compacted_token_count) /
|
|
387
|
+
original_token_count * 100) if original_token_count > 0 else 0
|
|
388
|
+
|
|
389
|
+
# Log success
|
|
390
|
+
logging.info(f"Compaction completed in {elapsed_time:.1f}s: "
|
|
391
|
+
f"{original_message_count} messages → structured context, "
|
|
392
|
+
f"{original_token_count:,} → {compacted_token_count:,} tokens "
|
|
393
|
+
f"({reduction_pct:.1f}% reduction)")
|
|
394
|
+
|
|
395
|
+
# Display completion
|
|
396
|
+
self._display_success(
|
|
397
|
+
f"✓ Compaction complete: {original_message_count} messages → structured context"
|
|
398
|
+
)
|
|
399
|
+
self._display_info(
|
|
400
|
+
f"Token reduction: {original_token_count:,} → {compacted_token_count:,} "
|
|
401
|
+
f"({reduction_pct:.1f}% reduction)"
|
|
402
|
+
)
|
|
403
|
+
self._display_info(f"Completed in {elapsed_time:.1f} seconds")
|
|
404
|
+
self._display_separator()
|
|
405
|
+
|
|
406
|
+
return True
|
|
407
|
+
|
|
408
|
+
except Exception as e:
|
|
409
|
+
logging.error(f"Compaction failed with error: {e}", exc_info=True)
|
|
410
|
+
self._display_error(f"Compaction failed: {str(e)}")
|
|
411
|
+
return False
|
|
412
|
+
|
|
413
|
+
def _check_rate_limits_for_compaction(
|
|
414
|
+
self, compaction_prompt: str, original_token_count: int
|
|
415
|
+
) -> Dict[str, Any]:
|
|
416
|
+
"""
|
|
417
|
+
Check if the compaction request would exceed provider rate limits.
|
|
418
|
+
|
|
419
|
+
Args:
|
|
420
|
+
compaction_prompt: The full compaction prompt to be sent
|
|
421
|
+
original_token_count: Original token count of messages being compacted
|
|
422
|
+
|
|
423
|
+
Returns:
|
|
424
|
+
Dictionary with:
|
|
425
|
+
- can_proceed: bool - Whether compaction can proceed
|
|
426
|
+
- message: str - Explanation message
|
|
427
|
+
- estimated_tokens: int - Estimated input tokens for the request
|
|
428
|
+
"""
|
|
429
|
+
# Get rate limits from the service
|
|
430
|
+
rate_limits = None
|
|
431
|
+
if hasattr(self.bedrock_service, 'get_rate_limits'):
|
|
432
|
+
rate_limits = self.bedrock_service.get_rate_limits()
|
|
433
|
+
|
|
434
|
+
# If no rate limits or provider doesn't have limits, proceed
|
|
435
|
+
if not rate_limits or not rate_limits.get('has_limits', False):
|
|
436
|
+
return {
|
|
437
|
+
'can_proceed': True,
|
|
438
|
+
'message': 'No rate limits detected',
|
|
439
|
+
'estimated_tokens': original_token_count
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
# Estimate input tokens for the compaction request
|
|
443
|
+
# Use the service's token counter if available
|
|
444
|
+
if hasattr(self.bedrock_service, 'count_tokens'):
|
|
445
|
+
try:
|
|
446
|
+
estimated_tokens = self.bedrock_service.count_tokens(compaction_prompt)
|
|
447
|
+
except Exception:
|
|
448
|
+
# Fallback: estimate at 4 chars per token
|
|
449
|
+
estimated_tokens = len(compaction_prompt) // 4
|
|
450
|
+
else:
|
|
451
|
+
estimated_tokens = len(compaction_prompt) // 4
|
|
452
|
+
|
|
453
|
+
# Get input token limit
|
|
454
|
+
input_limit = rate_limits.get('input_tokens_per_minute')
|
|
455
|
+
|
|
456
|
+
if input_limit and estimated_tokens > input_limit:
|
|
457
|
+
# Request exceeds rate limit - cannot proceed
|
|
458
|
+
provider_name = "Anthropic Direct"
|
|
459
|
+
if hasattr(self.bedrock_service, 'get_provider_name'):
|
|
460
|
+
provider_name = self.bedrock_service.get_provider_name()
|
|
461
|
+
elif hasattr(self.bedrock_service, 'get_active_provider'):
|
|
462
|
+
provider_name = self.bedrock_service.get_active_provider() or provider_name
|
|
463
|
+
|
|
464
|
+
message = (
|
|
465
|
+
f"Compaction request ({estimated_tokens:,} tokens) exceeds {provider_name} "
|
|
466
|
+
f"rate limit ({input_limit:,} tokens/minute). "
|
|
467
|
+
f"Consider using AWS Bedrock which has higher rate limits, "
|
|
468
|
+
f"or wait for the conversation to naturally reduce in size."
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
logging.warning(
|
|
472
|
+
f"Compaction blocked: {estimated_tokens:,} tokens exceeds "
|
|
473
|
+
f"{input_limit:,} token rate limit for {provider_name}"
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
return {
|
|
477
|
+
'can_proceed': False,
|
|
478
|
+
'message': message,
|
|
479
|
+
'estimated_tokens': estimated_tokens,
|
|
480
|
+
'rate_limit': input_limit
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
# Within limits, can proceed
|
|
484
|
+
return {
|
|
485
|
+
'can_proceed': True,
|
|
486
|
+
'message': f'Request within rate limits ({estimated_tokens:,}/{input_limit:,} tokens)',
|
|
487
|
+
'estimated_tokens': estimated_tokens,
|
|
488
|
+
'rate_limit': input_limit
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
def _format_messages_for_compaction(self, messages: List[Dict]) -> str:
|
|
492
|
+
"""
|
|
493
|
+
Format messages into readable conversation history for compaction.
|
|
494
|
+
|
|
495
|
+
Handles different message types including tool_use, tool_result,
|
|
496
|
+
and regular messages.
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
messages: List of message dictionaries from database
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
Formatted conversation history string
|
|
503
|
+
"""
|
|
504
|
+
formatted_lines = []
|
|
505
|
+
message_number = 0
|
|
506
|
+
|
|
507
|
+
for msg in messages:
|
|
508
|
+
message_number += 1
|
|
509
|
+
role = msg.get('role', 'unknown').upper()
|
|
510
|
+
content = msg.get('content', '')
|
|
511
|
+
timestamp = msg.get('timestamp', '')
|
|
512
|
+
|
|
513
|
+
# Format timestamp if available
|
|
514
|
+
time_str = ""
|
|
515
|
+
if timestamp:
|
|
516
|
+
try:
|
|
517
|
+
if isinstance(timestamp, str):
|
|
518
|
+
dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
|
|
519
|
+
else:
|
|
520
|
+
dt = timestamp
|
|
521
|
+
time_str = f" [{dt.strftime('%Y-%m-%d %H:%M')}]"
|
|
522
|
+
except (ValueError, AttributeError):
|
|
523
|
+
pass
|
|
524
|
+
|
|
525
|
+
# Check for previously compacted content
|
|
526
|
+
if content.startswith('[COMPACTED CONTEXT'):
|
|
527
|
+
formatted_lines.append(f"\n--- PREVIOUS COMPACTION{time_str} ---")
|
|
528
|
+
# Extract just the summary sections, not the full compacted content
|
|
529
|
+
formatted_lines.append("[Previous conversation was compacted - key points preserved below]")
|
|
530
|
+
# Include a truncated version of the compacted content
|
|
531
|
+
compacted_preview = content[:2000] + "..." if len(content) > 2000 else content
|
|
532
|
+
formatted_lines.append(compacted_preview)
|
|
533
|
+
formatted_lines.append("--- END PREVIOUS COMPACTION ---\n")
|
|
534
|
+
continue
|
|
535
|
+
|
|
536
|
+
# Check for tool results
|
|
537
|
+
if content.startswith('[TOOL_RESULTS]'):
|
|
538
|
+
formatted_lines.append(f"\n[{role}]{time_str} Tool Results:")
|
|
539
|
+
try:
|
|
540
|
+
tool_results_json = content.replace('[TOOL_RESULTS]', '', 1)
|
|
541
|
+
tool_results = json.loads(tool_results_json)
|
|
542
|
+
if isinstance(tool_results, list):
|
|
543
|
+
for i, result in enumerate(tool_results, 1):
|
|
544
|
+
if isinstance(result, dict) and result.get('type') == 'tool_result':
|
|
545
|
+
tool_id = result.get('tool_use_id', 'unknown')[:8]
|
|
546
|
+
result_content = result.get('content', '')
|
|
547
|
+
# Truncate long tool results
|
|
548
|
+
if len(str(result_content)) > 500:
|
|
549
|
+
result_content = str(result_content)[:500] + "... [truncated]"
|
|
550
|
+
formatted_lines.append(f" Result {i} (tool:{tool_id}): {result_content}")
|
|
551
|
+
except json.JSONDecodeError:
|
|
552
|
+
formatted_lines.append(f" [Raw tool results - {len(content)} chars]")
|
|
553
|
+
continue
|
|
554
|
+
|
|
555
|
+
# Check for tool use blocks
|
|
556
|
+
if role == 'ASSISTANT' and content.startswith('['):
|
|
557
|
+
try:
|
|
558
|
+
content_blocks = json.loads(content)
|
|
559
|
+
if isinstance(content_blocks, list):
|
|
560
|
+
text_parts = []
|
|
561
|
+
tool_calls = []
|
|
562
|
+
for block in content_blocks:
|
|
563
|
+
if isinstance(block, dict):
|
|
564
|
+
if block.get('type') == 'text':
|
|
565
|
+
text_parts.append(block.get('text', ''))
|
|
566
|
+
elif block.get('type') == 'tool_use':
|
|
567
|
+
tool_name = block.get('name', 'unknown')
|
|
568
|
+
tool_input = block.get('input', {})
|
|
569
|
+
# Summarise tool input
|
|
570
|
+
input_summary = self._summarise_tool_input(tool_input)
|
|
571
|
+
tool_calls.append(f"{tool_name}({input_summary})")
|
|
572
|
+
|
|
573
|
+
if text_parts:
|
|
574
|
+
formatted_lines.append(f"\n[{role}]{time_str}")
|
|
575
|
+
formatted_lines.append(''.join(text_parts))
|
|
576
|
+
if tool_calls:
|
|
577
|
+
formatted_lines.append(f"[Tool calls: {', '.join(tool_calls)}]")
|
|
578
|
+
continue
|
|
579
|
+
except json.JSONDecodeError:
|
|
580
|
+
pass # Not JSON, treat as regular message
|
|
581
|
+
|
|
582
|
+
# Check for rollup summaries
|
|
583
|
+
if content.startswith('[Summary of previous conversation]'):
|
|
584
|
+
formatted_lines.append(f"\n--- PREVIOUS SUMMARY{time_str} ---")
|
|
585
|
+
formatted_lines.append(content)
|
|
586
|
+
formatted_lines.append("--- END PREVIOUS SUMMARY ---\n")
|
|
587
|
+
continue
|
|
588
|
+
|
|
589
|
+
# Regular message
|
|
590
|
+
formatted_lines.append(f"\n[{role}]{time_str}")
|
|
591
|
+
# Truncate very long messages
|
|
592
|
+
if len(content) > 3000:
|
|
593
|
+
formatted_lines.append(content[:3000] + "\n... [message truncated, {0} more chars]".format(len(content) - 3000))
|
|
594
|
+
else:
|
|
595
|
+
formatted_lines.append(content)
|
|
596
|
+
|
|
597
|
+
return '\n'.join(formatted_lines)
|
|
598
|
+
|
|
599
|
+
def _summarise_tool_input(self, tool_input: Dict) -> str:
|
|
600
|
+
"""
|
|
601
|
+
Create a brief summary of tool input for readability.
|
|
602
|
+
|
|
603
|
+
Args:
|
|
604
|
+
tool_input: Tool input dictionary
|
|
605
|
+
|
|
606
|
+
Returns:
|
|
607
|
+
Brief summary string
|
|
608
|
+
"""
|
|
609
|
+
if not tool_input:
|
|
610
|
+
return ""
|
|
611
|
+
|
|
612
|
+
# Get key parameters, truncate long values
|
|
613
|
+
parts = []
|
|
614
|
+
for key, value in list(tool_input.items())[:3]: # Max 3 params
|
|
615
|
+
value_str = str(value)
|
|
616
|
+
if len(value_str) > 50:
|
|
617
|
+
value_str = value_str[:50] + "..."
|
|
618
|
+
parts.append(f"{key}={value_str}")
|
|
619
|
+
|
|
620
|
+
if len(tool_input) > 3:
|
|
621
|
+
parts.append(f"...+{len(tool_input) - 3} more")
|
|
622
|
+
|
|
623
|
+
return ', '.join(parts)
|
|
624
|
+
|
|
625
|
+
def _build_compaction_prompt(self, conversation_history: str,
|
|
626
|
+
message_count: int, token_count: int) -> str:
|
|
627
|
+
"""
|
|
628
|
+
Build the single-pass compaction prompt.
|
|
629
|
+
|
|
630
|
+
Args:
|
|
631
|
+
conversation_history: Formatted conversation history
|
|
632
|
+
message_count: Number of messages being compacted
|
|
633
|
+
token_count: Approximate token count
|
|
634
|
+
|
|
635
|
+
Returns:
|
|
636
|
+
Complete compaction prompt
|
|
637
|
+
"""
|
|
638
|
+
return COMPACTION_PROMPT_TEMPLATE.format(
|
|
639
|
+
message_count=message_count,
|
|
640
|
+
token_count=token_count,
|
|
641
|
+
conversation_history=conversation_history
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
def _create_compaction_marker(self, original_message_count: int,
|
|
645
|
+
original_token_count: int,
|
|
646
|
+
compacted_token_count: int,
|
|
647
|
+
model_id: str,
|
|
648
|
+
context_window: int) -> str:
|
|
649
|
+
"""
|
|
650
|
+
Create a marker string for the compaction event.
|
|
651
|
+
|
|
652
|
+
Args:
|
|
653
|
+
original_message_count: Number of messages compacted
|
|
654
|
+
original_token_count: Original token count
|
|
655
|
+
compacted_token_count: Compacted token count
|
|
656
|
+
model_id: Model used for compaction
|
|
657
|
+
context_window: Model's context window size
|
|
658
|
+
|
|
659
|
+
Returns:
|
|
660
|
+
Formatted marker string
|
|
661
|
+
"""
|
|
662
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
663
|
+
reduction_pct = ((original_token_count - compacted_token_count) /
|
|
664
|
+
original_token_count * 100) if original_token_count > 0 else 0
|
|
665
|
+
|
|
666
|
+
return (f"Compacted at {timestamp} | "
|
|
667
|
+
f"{original_message_count} messages | "
|
|
668
|
+
f"{original_token_count:,} → {compacted_token_count:,} tokens "
|
|
669
|
+
f"({reduction_pct:.1f}% reduction) | "
|
|
670
|
+
f"Context: {context_window:,} tokens")
|
|
671
|
+
|
|
672
|
+
# UI Helper Methods
|
|
673
|
+
|
|
674
|
+
def _display_progress(self, message: str):
|
|
675
|
+
"""Display progress message via available interface."""
|
|
676
|
+
if self.cli_interface:
|
|
677
|
+
self.cli_interface.print_info(message)
|
|
678
|
+
# Web interface handling would go here if needed
|
|
679
|
+
|
|
680
|
+
def _display_info(self, message: str):
|
|
681
|
+
"""Display info message via available interface."""
|
|
682
|
+
if self.cli_interface:
|
|
683
|
+
self.cli_interface.print_info(message)
|
|
684
|
+
|
|
685
|
+
def _display_success(self, message: str):
|
|
686
|
+
"""Display success message via available interface."""
|
|
687
|
+
if self.cli_interface:
|
|
688
|
+
self.cli_interface.print_success(message)
|
|
689
|
+
|
|
690
|
+
def _display_warning(self, message: str):
|
|
691
|
+
"""Display warning message via available interface."""
|
|
692
|
+
if self.cli_interface:
|
|
693
|
+
self.cli_interface.print_warning(message)
|
|
694
|
+
|
|
695
|
+
def _display_error(self, message: str):
|
|
696
|
+
"""Display error message via available interface."""
|
|
697
|
+
if self.cli_interface:
|
|
698
|
+
self.cli_interface.print_error(message)
|
|
699
|
+
|
|
700
|
+
def _display_separator(self):
|
|
701
|
+
"""Display separator via available interface."""
|
|
702
|
+
if self.cli_interface:
|
|
703
|
+
self.cli_interface.print_separator("─")
|
|
704
|
+
|
|
705
|
+
|
|
706
|
+
def get_provider_from_model_id(model_id: str) -> str:
|
|
707
|
+
"""
|
|
708
|
+
Attempt to determine provider from model ID.
|
|
709
|
+
|
|
710
|
+
Args:
|
|
711
|
+
model_id: The model identifier
|
|
712
|
+
|
|
713
|
+
Returns:
|
|
714
|
+
Provider name string
|
|
715
|
+
"""
|
|
716
|
+
model_lower = model_id.lower()
|
|
717
|
+
|
|
718
|
+
# Check for Anthropic/Claude models
|
|
719
|
+
if 'claude' in model_lower or 'anthropic' in model_lower:
|
|
720
|
+
return 'anthropic'
|
|
721
|
+
|
|
722
|
+
# Check for Bedrock-specific patterns
|
|
723
|
+
if 'amazon.' in model_lower or 'titan' in model_lower:
|
|
724
|
+
return 'aws_bedrock'
|
|
725
|
+
if 'meta.' in model_lower or 'llama' in model_lower:
|
|
726
|
+
return 'aws_bedrock'
|
|
727
|
+
if 'mistral.' in model_lower:
|
|
728
|
+
return 'aws_bedrock'
|
|
729
|
+
if 'cohere.' in model_lower:
|
|
730
|
+
return 'aws_bedrock'
|
|
731
|
+
if 'ai21.' in model_lower:
|
|
732
|
+
return 'aws_bedrock'
|
|
733
|
+
|
|
734
|
+
# Default to ollama for simple model names
|
|
735
|
+
return 'ollama'
|