hanuscode 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hanus/__init__.py +5 -0
- hanus/__main__.py +10 -0
- hanus/action_handlers.py +76 -0
- hanus/action_parser.py +82 -0
- hanus/agent_runner.py +1445 -0
- hanus/analysis/__init__.py +5 -0
- hanus/analysis/debt.py +702 -0
- hanus/analysis/dependencies.py +475 -0
- hanus/cache/__init__.py +5 -0
- hanus/cache/response_cache.py +560 -0
- hanus/config.py +401 -0
- hanus/connectors/__init__.py +19 -0
- hanus/connectors/base.py +114 -0
- hanus/connectors/claude_connector.py +146 -0
- hanus/connectors/gemini_connector.py +141 -0
- hanus/connectors/glm_connector.py +160 -0
- hanus/connectors/ollama_connector.py +174 -0
- hanus/connectors/openai_connector.py +122 -0
- hanus/connectors/registry.py +26 -0
- hanus/context/__init__.py +7 -0
- hanus/context/manager.py +837 -0
- hanus/context/selective.py +626 -0
- hanus/error_recovery/__init__.py +5 -0
- hanus/error_recovery/auto_fix.py +605 -0
- hanus/hooks/__init__.py +5 -0
- hanus/hooks/manager.py +247 -0
- hanus/instincts/__init__.py +44 -0
- hanus/instincts/cli.py +372 -0
- hanus/instincts/detector.py +281 -0
- hanus/instincts/evolver.py +361 -0
- hanus/instincts/manager.py +343 -0
- hanus/instincts/types.py +253 -0
- hanus/logger.py +81 -0
- hanus/memory/__init__.py +8 -0
- hanus/memory/manager.py +265 -0
- hanus/memory/types.py +119 -0
- hanus/monitor.py +341 -0
- hanus/parallel/__init__.py +5 -0
- hanus/parallel/executor.py +300 -0
- hanus/permissions.py +182 -0
- hanus/plan/__init__.py +8 -0
- hanus/plan/mode.py +267 -0
- hanus/plan/models.py +152 -0
- hanus/plugin_manager.py +754 -0
- hanus/plugin_registry.py +391 -0
- hanus/plugins/__init__.py +1 -0
- hanus/plugins/arena.py +630 -0
- hanus/plugins/code_review.py +123 -0
- hanus/plugins/cortex.py +1750 -0
- hanus/plugins/deps_check.py +27 -0
- hanus/plugins/git_ops.py +33 -0
- hanus/plugins/metasploit.py +530 -0
- hanus/plugins/notes.py +583 -0
- hanus/plugins/search_code.py +59 -0
- hanus/plugins/searchsploit.py +495 -0
- hanus/plugins/strategist.py +175 -0
- hanus/plugins/webui.py +5200 -0
- hanus/profiles.py +479 -0
- hanus/profiles_builtin/__init__.py +0 -0
- hanus/profiles_builtin/architect/profile.yaml +12 -0
- hanus/profiles_builtin/architect/system_prompt.txt +71 -0
- hanus/profiles_builtin/deep/profile.yaml +12 -0
- hanus/profiles_builtin/deep/system_prompt.txt +66 -0
- hanus/profiles_builtin/developer/__init__.py +0 -0
- hanus/profiles_builtin/developer/profile.yaml +9 -0
- hanus/profiles_builtin/developer/system_prompt.txt +176 -0
- hanus/profiles_builtin/speed/profile.yaml +12 -0
- hanus/profiles_builtin/speed/system_prompt.txt +51 -0
- hanus/project_tools.py +177 -0
- hanus/query_engine.py +1594 -0
- hanus/rules/__init__.py +237 -0
- hanus/search/__init__.py +5 -0
- hanus/search/semantic.py +596 -0
- hanus/session_manager.py +547 -0
- hanus/skill_manager.py +702 -0
- hanus/skills/__init__.py +4 -0
- hanus/subagent/__init__.py +8 -0
- hanus/subagent/agents/__init__.py +253 -0
- hanus/subagent/manager.py +309 -0
- hanus/subagent/types.py +266 -0
- hanus/suggestions/__init__.py +5 -0
- hanus/suggestions/proactive.py +451 -0
- hanus/tasks/__init__.py +8 -0
- hanus/tasks/manager.py +330 -0
- hanus/tasks/models.py +106 -0
- hanus/terminal_prompt.py +166 -0
- hanus/tools.py +1849 -0
- hanus/ui.py +939 -0
- hanuscode-1.0.0.dist-info/METADATA +1151 -0
- hanuscode-1.0.0.dist-info/RECORD +93 -0
- hanuscode-1.0.0.dist-info/WHEEL +5 -0
- hanuscode-1.0.0.dist-info/entry_points.txt +2 -0
- hanuscode-1.0.0.dist-info/top_level.txt +1 -0
hanus/context/manager.py
ADDED
|
@@ -0,0 +1,837 @@
|
|
|
1
|
+
# hanus/context/manager.py
|
|
2
|
+
"""
|
|
3
|
+
Gestor de contexto con compresión inteligente.
|
|
4
|
+
|
|
5
|
+
Maneja conversaciones largas sin agotar el límite de tokens del modelo.
|
|
6
|
+
Incluye:
|
|
7
|
+
- Compresión con resúmenes jerárquicos
|
|
8
|
+
- Índice de decisiones arquitectónicas persistentes
|
|
9
|
+
- Compresión incremental
|
|
10
|
+
- Metadatos de debugging
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
import time
|
|
14
|
+
import json
|
|
15
|
+
import hashlib
|
|
16
|
+
from dataclasses import dataclass, field, asdict
|
|
17
|
+
from typing import List, Dict, Tuple, Optional, Any, Set
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from datetime import datetime
|
|
20
|
+
|
|
21
|
+
# Intentar usar tiktoken para conteo preciso, si no está disponible usar estimación
|
|
22
|
+
try:
|
|
23
|
+
import tiktoken
|
|
24
|
+
HAS_TIKTOKEN = True
|
|
25
|
+
except ImportError:
|
|
26
|
+
HAS_TIKTOKEN = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
30
|
+
# DATA CLASSES
|
|
31
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class CompressionStats:
|
|
35
|
+
"""Estadísticas de compresión."""
|
|
36
|
+
original_tokens: int = 0
|
|
37
|
+
compressed_tokens: int = 0
|
|
38
|
+
messages_removed: int = 0
|
|
39
|
+
messages_summarized: int = 0
|
|
40
|
+
compression_ratio: float = 0.0
|
|
41
|
+
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class MessageImportance:
|
|
46
|
+
"""Puntuación de importancia de un mensaje."""
|
|
47
|
+
index: int
|
|
48
|
+
role: str
|
|
49
|
+
score: float
|
|
50
|
+
reason: str = ""
|
|
51
|
+
tokens: int = 0
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class ArchitectureDecision:
|
|
56
|
+
"""Una decisión arquitectónica importante que nunca debe perderse."""
|
|
57
|
+
id: str
|
|
58
|
+
title: str
|
|
59
|
+
description: str
|
|
60
|
+
rationale: str
|
|
61
|
+
timestamp: str
|
|
62
|
+
files_involved: List[str] = field(default_factory=list)
|
|
63
|
+
tags: List[str] = field(default_factory=list)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class SectionSummary:
|
|
68
|
+
"""Resumen de una sección de mensajes."""
|
|
69
|
+
start_index: int
|
|
70
|
+
end_index: int
|
|
71
|
+
message_count: int
|
|
72
|
+
original_tokens: int
|
|
73
|
+
summary_tokens: int
|
|
74
|
+
summary: str
|
|
75
|
+
topics: List[str] = field(default_factory=list)
|
|
76
|
+
files_mentioned: List[str] = field(default_factory=list)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass
|
|
80
|
+
class CompressionMetadata:
|
|
81
|
+
"""Metadatos de una operación de compresión."""
|
|
82
|
+
compression_id: str
|
|
83
|
+
timestamp: str
|
|
84
|
+
strategy: str # "hierarchical", "incremental", "aggressive"
|
|
85
|
+
stats: CompressionStats
|
|
86
|
+
sections_created: int = 0
|
|
87
|
+
decisions_preserved: int = 0
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
91
|
+
# CONTEXT MANAGER
|
|
92
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
93
|
+
|
|
94
|
+
class ContextManager:
|
|
95
|
+
"""
|
|
96
|
+
Gestiona el contexto de conversación con compresión inteligente.
|
|
97
|
+
|
|
98
|
+
Estrategias:
|
|
99
|
+
1. Token counting antes de API calls
|
|
100
|
+
2. Sliding window para mensajes antiguos
|
|
101
|
+
3. Hierarchical summarization de secciones
|
|
102
|
+
4. Importance scoring para retención
|
|
103
|
+
5. Architecture Decision Record (ADR) persistence
|
|
104
|
+
|
|
105
|
+
Uso:
|
|
106
|
+
manager = ContextManager(max_tokens=100000)
|
|
107
|
+
if manager.should_compress(messages):
|
|
108
|
+
messages, summary = manager.compress_with_summaries(messages)
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
# Límites por defecto
|
|
112
|
+
DEFAULT_MAX_TOKENS = 200000 # 200k tokens para modelos modernos
|
|
113
|
+
SUMMARY_THRESHOLD = 0.70 # Comprimir al 70% del límite
|
|
114
|
+
SAFETY_MARGIN = 0.90 # Nunca superar el 90% del límite del modelo
|
|
115
|
+
MIN_MESSAGES_TO_KEEP = 10
|
|
116
|
+
SYSTEM_MESSAGES_TO_KEEP = 3 # Siempre mantener los primeros N mensajes de sistema
|
|
117
|
+
SECTION_SIZE = 10 # Mensajes por sección para resúmenes jerárquicos
|
|
118
|
+
|
|
119
|
+
def __init__(
|
|
120
|
+
self,
|
|
121
|
+
max_tokens: int = DEFAULT_MAX_TOKENS,
|
|
122
|
+
preserve_recent: int = 20,
|
|
123
|
+
preserve_system: bool = True,
|
|
124
|
+
cache_dir: Optional[Path] = None,
|
|
125
|
+
):
|
|
126
|
+
self.max_tokens = max_tokens
|
|
127
|
+
self.preserve_recent = preserve_recent
|
|
128
|
+
self.preserve_system = preserve_system
|
|
129
|
+
self._tokenizer = None
|
|
130
|
+
|
|
131
|
+
# Cache y persistencia
|
|
132
|
+
self.cache_dir = cache_dir or (Path.home() / ".hanus" / "context_cache")
|
|
133
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
134
|
+
|
|
135
|
+
# Índice de decisiones arquitectónicas
|
|
136
|
+
self._architecture_decisions: List[ArchitectureDecision] = []
|
|
137
|
+
self._decisions_file = self.cache_dir / "architecture_decisions.json"
|
|
138
|
+
self._load_decisions()
|
|
139
|
+
|
|
140
|
+
# Resúmenes de secciones
|
|
141
|
+
self._section_summaries: List[SectionSummary] = []
|
|
142
|
+
|
|
143
|
+
# Metadatos de compresión
|
|
144
|
+
self._compression_history: List[CompressionMetadata] = []
|
|
145
|
+
|
|
146
|
+
# Inicializar tokenizer si está disponible
|
|
147
|
+
if HAS_TIKTOKEN:
|
|
148
|
+
try:
|
|
149
|
+
self._tokenizer = tiktoken.get_encoding("cl100k_base")
|
|
150
|
+
except Exception:
|
|
151
|
+
pass
|
|
152
|
+
|
|
153
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
154
|
+
# API PÚBLICA
|
|
155
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
156
|
+
|
|
157
|
+
def count_tokens(self, messages: List[Dict]) -> int:
|
|
158
|
+
"""
|
|
159
|
+
Cuenta el total de tokens en el historial de mensajes.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
messages: Lista de mensajes con 'role' y 'content'
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Número estimado de tokens
|
|
166
|
+
"""
|
|
167
|
+
total = 0
|
|
168
|
+
|
|
169
|
+
for msg in messages:
|
|
170
|
+
content = str(msg.get("content", ""))
|
|
171
|
+
|
|
172
|
+
if self._tokenizer:
|
|
173
|
+
# Conteo preciso con tiktoken
|
|
174
|
+
total += len(self._tokenizer.encode(content))
|
|
175
|
+
# Overhead por mensaje (role, structure)
|
|
176
|
+
total += 4
|
|
177
|
+
else:
|
|
178
|
+
# Estimación: ~4 caracteres por token
|
|
179
|
+
total += len(content) // 4 + 1
|
|
180
|
+
# Overhead por mensaje
|
|
181
|
+
total += 10
|
|
182
|
+
|
|
183
|
+
return total
|
|
184
|
+
|
|
185
|
+
def should_compress(self, messages: List[Dict]) -> bool:
|
|
186
|
+
"""
|
|
187
|
+
Determina si el contexto necesita compresión.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
messages: Lista de mensajes actuales
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
True si se debe comprimir
|
|
194
|
+
"""
|
|
195
|
+
token_count = self.count_tokens(messages)
|
|
196
|
+
threshold = int(self.max_tokens * self.SUMMARY_THRESHOLD)
|
|
197
|
+
return token_count > threshold
|
|
198
|
+
|
|
199
|
+
def compress(self, messages: List[Dict]) -> Tuple[List[Dict], str]:
|
|
200
|
+
"""
|
|
201
|
+
Comprime el historial de mensajes (método legacy, usa compress_with_summaries).
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
messages: Lista de mensajes actuales
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
- Lista de mensajes comprimidos
|
|
208
|
+
- String con resumen de lo eliminado
|
|
209
|
+
"""
|
|
210
|
+
return self.compress_with_summaries(messages)
|
|
211
|
+
|
|
212
|
+
def compress_with_summaries(
|
|
213
|
+
self,
|
|
214
|
+
messages: List[Dict],
|
|
215
|
+
strategy: str = "hierarchical"
|
|
216
|
+
) -> Tuple[List[Dict], str]:
|
|
217
|
+
"""
|
|
218
|
+
Comprime el historial con resúmenes jerárquicos por sección.
|
|
219
|
+
|
|
220
|
+
Estrategia:
|
|
221
|
+
1. Preservar mensajes del sistema
|
|
222
|
+
2. Preservar mensajes recientes
|
|
223
|
+
3. Crear resúmenes de secciones intermedias
|
|
224
|
+
4. Preservar decisiones arquitectónicas detectadas
|
|
225
|
+
5. Generar índice de temas y archivos
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
messages: Lista de mensajes actuales
|
|
229
|
+
strategy: "hierarchical", "incremental", o "aggressive"
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
- Lista de mensajes comprimidos con resúmenes
|
|
233
|
+
- String con resumen de la compresión
|
|
234
|
+
"""
|
|
235
|
+
stats = CompressionStats()
|
|
236
|
+
stats.original_tokens = self.count_tokens(messages)
|
|
237
|
+
|
|
238
|
+
if len(messages) <= self.MIN_MESSAGES_TO_KEEP:
|
|
239
|
+
return messages, ""
|
|
240
|
+
|
|
241
|
+
# Detectar y guardar decisiones arquitectónicas
|
|
242
|
+
self._detect_architecture_decisions(messages)
|
|
243
|
+
|
|
244
|
+
# Identificar mensajes a preservar
|
|
245
|
+
preserved_indices: Set[int] = set()
|
|
246
|
+
|
|
247
|
+
# 1. Preservar mensajes del sistema (primeros)
|
|
248
|
+
if self.preserve_system:
|
|
249
|
+
for i, msg in enumerate(messages):
|
|
250
|
+
if msg.get("role") == "system":
|
|
251
|
+
preserved_indices.add(i)
|
|
252
|
+
else:
|
|
253
|
+
break
|
|
254
|
+
|
|
255
|
+
# 2. Preservar mensajes recientes
|
|
256
|
+
recent_start = max(0, len(messages) - self.preserve_recent)
|
|
257
|
+
for i in range(recent_start, len(messages)):
|
|
258
|
+
preserved_indices.add(i)
|
|
259
|
+
|
|
260
|
+
# 3. Preservar mensajes con decisiones arquitectónicas
|
|
261
|
+
for decision in self._architecture_decisions:
|
|
262
|
+
for i, msg in enumerate(messages):
|
|
263
|
+
if decision.title.lower() in str(msg.get("content", "")).lower():
|
|
264
|
+
preserved_indices.add(i)
|
|
265
|
+
|
|
266
|
+
# 4. Agrupar mensajes restantes en secciones y crear resúmenes
|
|
267
|
+
sections_to_summarize = self._create_sections(
|
|
268
|
+
messages,
|
|
269
|
+
preserved_indices,
|
|
270
|
+
section_size=self.SECTION_SIZE
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
# 5. Construir lista comprimida
|
|
274
|
+
compressed: List[Dict] = []
|
|
275
|
+
summary_parts: List[str] = []
|
|
276
|
+
|
|
277
|
+
# Añadir resumen de contexto previo si existe
|
|
278
|
+
if self._section_summaries:
|
|
279
|
+
context_summary = self._build_context_summary()
|
|
280
|
+
if context_summary:
|
|
281
|
+
compressed.append({
|
|
282
|
+
"role": "system",
|
|
283
|
+
"content": f"[Contexto previo comprimido]\n{context_summary}"
|
|
284
|
+
})
|
|
285
|
+
|
|
286
|
+
# Añadir resúmenes de secciones
|
|
287
|
+
for section in sections_to_summarize:
|
|
288
|
+
section_summary = self._summarize_section(messages, section)
|
|
289
|
+
if section_summary:
|
|
290
|
+
self._section_summaries.append(section_summary)
|
|
291
|
+
stats.messages_summarized += section.message_count
|
|
292
|
+
|
|
293
|
+
# Añadir mensaje de resumen
|
|
294
|
+
compressed.append({
|
|
295
|
+
"role": "system",
|
|
296
|
+
"content": f"[Resumen de {section.message_count} mensajes]\n{section_summary.summary}"
|
|
297
|
+
})
|
|
298
|
+
|
|
299
|
+
# Añadir mensajes preservados
|
|
300
|
+
for i, msg in enumerate(messages):
|
|
301
|
+
if i in preserved_indices:
|
|
302
|
+
compressed.append(msg)
|
|
303
|
+
else:
|
|
304
|
+
stats.messages_removed += 1
|
|
305
|
+
|
|
306
|
+
# 6. Guardar estadísticas
|
|
307
|
+
stats.compressed_tokens = self.count_tokens(compressed)
|
|
308
|
+
stats.compression_ratio = (
|
|
309
|
+
(stats.original_tokens - stats.compressed_tokens) / stats.original_tokens
|
|
310
|
+
if stats.original_tokens > 0 else 0
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
# 7. Crear metadatos
|
|
314
|
+
metadata = CompressionMetadata(
|
|
315
|
+
compression_id=hashlib.md5(str(time.time()).encode()).hexdigest()[:8],
|
|
316
|
+
timestamp=datetime.now().isoformat(),
|
|
317
|
+
strategy=strategy,
|
|
318
|
+
stats=stats,
|
|
319
|
+
sections_created=len(sections_to_summarize),
|
|
320
|
+
decisions_preserved=len([d for d in self._architecture_decisions])
|
|
321
|
+
)
|
|
322
|
+
self._compression_history.append(metadata)
|
|
323
|
+
|
|
324
|
+
# 8. Guardar decisiones actualizadas
|
|
325
|
+
self._save_decisions()
|
|
326
|
+
|
|
327
|
+
# 9. Crear resumen legible
|
|
328
|
+
summary = self._build_compression_summary(stats, metadata)
|
|
329
|
+
|
|
330
|
+
return compressed, summary
|
|
331
|
+
|
|
332
|
+
def incremental_compress(self, new_message: Dict, messages: List[Dict]) -> Tuple[bool, str]:
|
|
333
|
+
"""
|
|
334
|
+
Compresión incremental: evalúa si un nuevo mensaje requiere compresión.
|
|
335
|
+
|
|
336
|
+
Más eficiente que compresión batch para conversaciones largas.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
new_message: Nuevo mensaje a añadir
|
|
340
|
+
messages: Lista actual de mensajes
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
- True si se realizó compresión
|
|
344
|
+
- Mensaje de estado
|
|
345
|
+
"""
|
|
346
|
+
# Añadir mensaje temporalmente
|
|
347
|
+
test_messages = messages + [new_message]
|
|
348
|
+
current_tokens = self.count_tokens(test_messages)
|
|
349
|
+
threshold = int(self.max_tokens * self.SUMMARY_THRESHOLD)
|
|
350
|
+
|
|
351
|
+
if current_tokens <= threshold:
|
|
352
|
+
return False, "No se requiere compresión"
|
|
353
|
+
|
|
354
|
+
# Detectar si el nuevo mensaje contiene una decisión arquitectónica
|
|
355
|
+
content = str(new_message.get("content", ""))
|
|
356
|
+
if self._is_architecture_decision(content):
|
|
357
|
+
self._add_architecture_decision_from_message(new_message, len(messages))
|
|
358
|
+
|
|
359
|
+
# Compresión ligera: solo si estamos cerca del límite
|
|
360
|
+
if current_tokens > int(self.max_tokens * 0.85):
|
|
361
|
+
compressed, summary = self.compress_with_summaries(test_messages, strategy="incremental")
|
|
362
|
+
return True, f"Compresión incremental realizada: {summary}"
|
|
363
|
+
|
|
364
|
+
return False, "Dentro del umbral de seguridad"
|
|
365
|
+
|
|
366
|
+
def get_architecture_index(self) -> List[Dict]:
|
|
367
|
+
"""
|
|
368
|
+
Retorna el índice de decisiones arquitectónicas.
|
|
369
|
+
|
|
370
|
+
Este índice nunca se pierde durante la compresión.
|
|
371
|
+
"""
|
|
372
|
+
return [asdict(d) for d in self._architecture_decisions]
|
|
373
|
+
|
|
374
|
+
def add_architecture_decision(
|
|
375
|
+
self,
|
|
376
|
+
title: str,
|
|
377
|
+
description: str,
|
|
378
|
+
rationale: str,
|
|
379
|
+
files_involved: List[str] = None,
|
|
380
|
+
tags: List[str] = None
|
|
381
|
+
) -> ArchitectureDecision:
|
|
382
|
+
"""
|
|
383
|
+
Añade manualmente una decisión arquitectónica.
|
|
384
|
+
|
|
385
|
+
Args:
|
|
386
|
+
title: Título corto de la decisión
|
|
387
|
+
description: Descripción de la decisión
|
|
388
|
+
rationale: Por qué se tomó esta decisión
|
|
389
|
+
files_involved: Archivos relacionados
|
|
390
|
+
tags: Etiquetas para categorización
|
|
391
|
+
|
|
392
|
+
Returns:
|
|
393
|
+
La decisión creada
|
|
394
|
+
"""
|
|
395
|
+
decision = ArchitectureDecision(
|
|
396
|
+
id=hashlib.md5(f"{title}{time.time()}".encode()).hexdigest()[:8],
|
|
397
|
+
title=title,
|
|
398
|
+
description=description,
|
|
399
|
+
rationale=rationale,
|
|
400
|
+
timestamp=datetime.now().isoformat(),
|
|
401
|
+
files_involved=files_involved or [],
|
|
402
|
+
tags=tags or []
|
|
403
|
+
)
|
|
404
|
+
self._architecture_decisions.append(decision)
|
|
405
|
+
self._save_decisions()
|
|
406
|
+
return decision
|
|
407
|
+
|
|
408
|
+
def get_context_stats(self, messages: List[Dict]) -> Dict[str, Any]:
|
|
409
|
+
"""
|
|
410
|
+
Retorna estadísticas del contexto actual.
|
|
411
|
+
|
|
412
|
+
Args:
|
|
413
|
+
messages: Lista de mensajes
|
|
414
|
+
|
|
415
|
+
Returns:
|
|
416
|
+
Diccionario con estadísticas
|
|
417
|
+
"""
|
|
418
|
+
total_tokens = self.count_tokens(messages)
|
|
419
|
+
threshold = int(self.max_tokens * self.SUMMARY_THRESHOLD)
|
|
420
|
+
safety_limit = int(self.max_tokens * self.SAFETY_MARGIN)
|
|
421
|
+
|
|
422
|
+
needs_compression = total_tokens > threshold or total_tokens > safety_limit
|
|
423
|
+
|
|
424
|
+
return {
|
|
425
|
+
"total_tokens": total_tokens,
|
|
426
|
+
"max_tokens": self.max_tokens,
|
|
427
|
+
"threshold_tokens": threshold,
|
|
428
|
+
"safety_limit": safety_limit,
|
|
429
|
+
"usage_percent": (total_tokens / self.max_tokens) * 100 if self.max_tokens > 0 else 0,
|
|
430
|
+
"message_count": len(messages),
|
|
431
|
+
"needs_compression": needs_compression,
|
|
432
|
+
"tokenizer": "tiktoken" if self._tokenizer else "estimate",
|
|
433
|
+
"architecture_decisions": len(self._architecture_decisions),
|
|
434
|
+
"section_summaries": len(self._section_summaries),
|
|
435
|
+
"compression_count": len(self._compression_history),
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
def get_sliding_window(
|
|
439
|
+
self,
|
|
440
|
+
messages: List[Dict],
|
|
441
|
+
max_tokens: Optional[int] = None
|
|
442
|
+
) -> List[Dict]:
|
|
443
|
+
"""
|
|
444
|
+
Obtiene una ventana deslizante de mensajes que cabe en el límite de tokens.
|
|
445
|
+
|
|
446
|
+
Args:
|
|
447
|
+
messages: Lista completa de mensajes
|
|
448
|
+
max_tokens: Límite de tokens (usa self.max_tokens si no se especifica)
|
|
449
|
+
|
|
450
|
+
Returns:
|
|
451
|
+
Lista de mensajes que caben en el límite
|
|
452
|
+
"""
|
|
453
|
+
max_tokens = max_tokens or self.max_tokens
|
|
454
|
+
|
|
455
|
+
# Siempre incluir mensajes del sistema
|
|
456
|
+
system_messages = []
|
|
457
|
+
other_messages = []
|
|
458
|
+
|
|
459
|
+
for msg in messages:
|
|
460
|
+
if msg.get("role") == "system":
|
|
461
|
+
system_messages.append(msg)
|
|
462
|
+
else:
|
|
463
|
+
other_messages.append(msg)
|
|
464
|
+
|
|
465
|
+
# Calcular tokens del sistema
|
|
466
|
+
system_tokens = self.count_tokens(system_messages)
|
|
467
|
+
remaining_tokens = max_tokens - system_tokens
|
|
468
|
+
|
|
469
|
+
if remaining_tokens <= 0:
|
|
470
|
+
return system_messages
|
|
471
|
+
|
|
472
|
+
# Añadir mensajes desde el más reciente
|
|
473
|
+
result = list(system_messages)
|
|
474
|
+
current_tokens = system_tokens
|
|
475
|
+
|
|
476
|
+
for msg in reversed(other_messages):
|
|
477
|
+
msg_tokens = self._estimate_message_tokens(msg)
|
|
478
|
+
if current_tokens + msg_tokens <= max_tokens:
|
|
479
|
+
result.append(msg)
|
|
480
|
+
current_tokens += msg_tokens
|
|
481
|
+
else:
|
|
482
|
+
break
|
|
483
|
+
|
|
484
|
+
return result
|
|
485
|
+
|
|
486
|
+
def get_compression_history(self) -> List[Dict]:
|
|
487
|
+
"""Retorna el historial de compresiones realizadas."""
|
|
488
|
+
return [asdict(m) for m in self._compression_history[-20:]] # Últimas 20
|
|
489
|
+
|
|
490
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
491
|
+
# MÉTODOS PRIVADOS
|
|
492
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
493
|
+
|
|
494
|
+
def _score_importance(self, msg: Dict, index: int, total: int) -> MessageImportance:
|
|
495
|
+
"""
|
|
496
|
+
Puntúa la importancia de un mensaje (0-1).
|
|
497
|
+
|
|
498
|
+
Mensajes más importantes:
|
|
499
|
+
- Sistema (siempre 1.0)
|
|
500
|
+
- Errores y fallos
|
|
501
|
+
- Decisiones importantes
|
|
502
|
+
- Código clave
|
|
503
|
+
"""
|
|
504
|
+
content = str(msg.get("content", ""))
|
|
505
|
+
role = msg.get("role", "")
|
|
506
|
+
score = 0.5 # Base
|
|
507
|
+
reasons = []
|
|
508
|
+
|
|
509
|
+
# Sistema siempre es importante
|
|
510
|
+
if role == "system":
|
|
511
|
+
return MessageImportance(
|
|
512
|
+
index=index,
|
|
513
|
+
role=role,
|
|
514
|
+
score=1.0,
|
|
515
|
+
reason="system message",
|
|
516
|
+
tokens=self._estimate_message_tokens(msg)
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
# Posición relativa (mensajes recientes son más importantes)
|
|
520
|
+
position_score = index / total
|
|
521
|
+
score += position_score * 0.2
|
|
522
|
+
reasons.append(f"position={position_score:.2f}")
|
|
523
|
+
|
|
524
|
+
# Contenido con errores es importante
|
|
525
|
+
error_keywords = ["error", "fail", "exception", "warning", "bug", "fix", "critical"]
|
|
526
|
+
if any(kw in content.lower() for kw in error_keywords):
|
|
527
|
+
score += 0.2
|
|
528
|
+
reasons.append("has_errors")
|
|
529
|
+
|
|
530
|
+
# Contenido con código es importante
|
|
531
|
+
if "```" in content or "def " in content or "class " in content:
|
|
532
|
+
score += 0.15
|
|
533
|
+
reasons.append("has_code")
|
|
534
|
+
|
|
535
|
+
# Contenido con rutas de archivo es importante
|
|
536
|
+
if "/" in content or ".py" in content or ".js" in content:
|
|
537
|
+
score += 0.1
|
|
538
|
+
reasons.append("has_paths")
|
|
539
|
+
|
|
540
|
+
# Mensajes con resultados de herramientas son importantes
|
|
541
|
+
if "[✓" in content or "[✗" in content or "Tool:" in content:
|
|
542
|
+
score += 0.15
|
|
543
|
+
reasons.append("tool_result")
|
|
544
|
+
|
|
545
|
+
# Decisiones arquitectónicas son MUY importantes
|
|
546
|
+
if self._is_architecture_decision(content):
|
|
547
|
+
score += 0.3
|
|
548
|
+
reasons.append("architecture_decision")
|
|
549
|
+
|
|
550
|
+
# Mensajes muy largos son menos importantes (probablemente outputs)
|
|
551
|
+
if len(content) > 5000:
|
|
552
|
+
score -= 0.1
|
|
553
|
+
reasons.append("very_long")
|
|
554
|
+
|
|
555
|
+
return MessageImportance(
|
|
556
|
+
index=index,
|
|
557
|
+
role=role,
|
|
558
|
+
score=min(1.0, max(0.0, score)),
|
|
559
|
+
reason=", ".join(reasons),
|
|
560
|
+
tokens=self._estimate_message_tokens(msg)
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
def _estimate_message_tokens(self, msg: Dict) -> int:
|
|
564
|
+
"""Estima los tokens de un mensaje."""
|
|
565
|
+
content = str(msg.get("content", ""))
|
|
566
|
+
if self._tokenizer:
|
|
567
|
+
return len(self._tokenizer.encode(content)) + 4
|
|
568
|
+
return len(content) // 4 + 10
|
|
569
|
+
|
|
570
|
+
def _is_architecture_decision(self, content: str) -> bool:
|
|
571
|
+
"""Detecta si un contenido contiene una decisión arquitectónica."""
|
|
572
|
+
decision_patterns = [
|
|
573
|
+
"decidí", "decidimos", "vamos a usar", "opté por",
|
|
574
|
+
"la arquitectura será", "el patrón será", "implementaremos con",
|
|
575
|
+
"we decided", "we will use", "the architecture", "the pattern",
|
|
576
|
+
"ADR:", "decision:", "architecture decision"
|
|
577
|
+
]
|
|
578
|
+
content_lower = content.lower()
|
|
579
|
+
return any(pattern in content_lower for pattern in decision_patterns)
|
|
580
|
+
|
|
581
|
+
def _detect_architecture_decisions(self, messages: List[Dict]) -> None:
|
|
582
|
+
"""Detecta y guarda decisiones arquitectónicas de los mensajes."""
|
|
583
|
+
for i, msg in enumerate(messages):
|
|
584
|
+
content = str(msg.get("content", ""))
|
|
585
|
+
if self._is_architecture_decision(content):
|
|
586
|
+
self._add_architecture_decision_from_message(msg, i)
|
|
587
|
+
|
|
588
|
+
def _add_architecture_decision_from_message(self, msg: Dict, index: int) -> None:
|
|
589
|
+
"""Extrae y añade una decisión arquitectónica de un mensaje."""
|
|
590
|
+
content = str(msg.get("content", ""))
|
|
591
|
+
|
|
592
|
+
# Extraer título (primera línea relevante)
|
|
593
|
+
lines = content.split("\n")
|
|
594
|
+
title = "Decisión detectada"
|
|
595
|
+
for line in lines:
|
|
596
|
+
line = line.strip()
|
|
597
|
+
if line and not line.startswith("```") and len(line) > 10:
|
|
598
|
+
title = line[:80]
|
|
599
|
+
break
|
|
600
|
+
|
|
601
|
+
# Verificar que no existe ya
|
|
602
|
+
for existing in self._architecture_decisions:
|
|
603
|
+
if existing.title == title:
|
|
604
|
+
return
|
|
605
|
+
|
|
606
|
+
# Extraer archivos mencionados
|
|
607
|
+
import re
|
|
608
|
+
files = re.findall(r'[\w/.-]+\.(py|js|ts|java|go|rs|c|cpp|h)', content)
|
|
609
|
+
|
|
610
|
+
decision = ArchitectureDecision(
|
|
611
|
+
id=hashlib.md5(f"{title}{time.time()}".encode()).hexdigest()[:8],
|
|
612
|
+
title=title,
|
|
613
|
+
description=content[:500],
|
|
614
|
+
rationale="Detectado automáticamente del contexto",
|
|
615
|
+
timestamp=datetime.now().isoformat(),
|
|
616
|
+
files_involved=files[:10],
|
|
617
|
+
tags=["auto-detected"]
|
|
618
|
+
)
|
|
619
|
+
self._architecture_decisions.append(decision)
|
|
620
|
+
|
|
621
|
+
def _create_sections(
|
|
622
|
+
self,
|
|
623
|
+
messages: List[Dict],
|
|
624
|
+
preserved_indices: Set[int],
|
|
625
|
+
section_size: int = 10
|
|
626
|
+
) -> List[SectionSummary]:
|
|
627
|
+
"""
|
|
628
|
+
Crea secciones de mensajes para resumir.
|
|
629
|
+
|
|
630
|
+
Args:
|
|
631
|
+
messages: Lista de mensajes
|
|
632
|
+
preserved_indices: Índices que no se deben incluir en secciones
|
|
633
|
+
section_size: Tamaño máximo de cada sección
|
|
634
|
+
|
|
635
|
+
Returns:
|
|
636
|
+
Lista de resúmenes de sección
|
|
637
|
+
"""
|
|
638
|
+
sections = []
|
|
639
|
+
current_section_start = None
|
|
640
|
+
current_section_end = None
|
|
641
|
+
current_tokens = 0
|
|
642
|
+
|
|
643
|
+
for i, msg in enumerate(messages):
|
|
644
|
+
if i in preserved_indices:
|
|
645
|
+
# Cerrar sección actual si existe
|
|
646
|
+
if current_section_start is not None:
|
|
647
|
+
sections.append(SectionSummary(
|
|
648
|
+
start_index=current_section_start,
|
|
649
|
+
end_index=current_section_end,
|
|
650
|
+
message_count=current_section_end - current_section_start + 1,
|
|
651
|
+
original_tokens=current_tokens,
|
|
652
|
+
summary_tokens=0,
|
|
653
|
+
summary=""
|
|
654
|
+
))
|
|
655
|
+
current_section_start = None
|
|
656
|
+
current_section_end = None
|
|
657
|
+
current_tokens = 0
|
|
658
|
+
continue
|
|
659
|
+
|
|
660
|
+
if current_section_start is None:
|
|
661
|
+
current_section_start = i
|
|
662
|
+
|
|
663
|
+
current_section_end = i
|
|
664
|
+
current_tokens += self._estimate_message_tokens(msg)
|
|
665
|
+
|
|
666
|
+
# Crear nueva sección si alcanzamos el tamaño
|
|
667
|
+
if (i - current_section_start + 1) >= section_size:
|
|
668
|
+
sections.append(SectionSummary(
|
|
669
|
+
start_index=current_section_start,
|
|
670
|
+
end_index=current_section_end,
|
|
671
|
+
message_count=current_section_end - current_section_start + 1,
|
|
672
|
+
original_tokens=current_tokens,
|
|
673
|
+
summary_tokens=0,
|
|
674
|
+
summary=""
|
|
675
|
+
))
|
|
676
|
+
current_section_start = None
|
|
677
|
+
current_section_end = None
|
|
678
|
+
current_tokens = 0
|
|
679
|
+
|
|
680
|
+
# Cerrar última sección
|
|
681
|
+
if current_section_start is not None:
|
|
682
|
+
sections.append(SectionSummary(
|
|
683
|
+
start_index=current_section_start,
|
|
684
|
+
end_index=current_section_end,
|
|
685
|
+
message_count=current_section_end - current_section_start + 1,
|
|
686
|
+
original_tokens=current_tokens,
|
|
687
|
+
summary_tokens=0,
|
|
688
|
+
summary=""
|
|
689
|
+
))
|
|
690
|
+
|
|
691
|
+
return sections
|
|
692
|
+
|
|
693
|
+
def _summarize_section(
|
|
694
|
+
self,
|
|
695
|
+
messages: List[Dict],
|
|
696
|
+
section: SectionSummary
|
|
697
|
+
) -> Optional[SectionSummary]:
|
|
698
|
+
"""
|
|
699
|
+
Genera un resumen de una sección de mensajes.
|
|
700
|
+
|
|
701
|
+
Args:
|
|
702
|
+
messages: Lista completa de mensajes
|
|
703
|
+
section: Sección a resumir
|
|
704
|
+
|
|
705
|
+
Returns:
|
|
706
|
+
Sección con resumen, o None si no se puede resumir
|
|
707
|
+
"""
|
|
708
|
+
section_messages = messages[section.start_index:section.end_index + 1]
|
|
709
|
+
|
|
710
|
+
if not section_messages:
|
|
711
|
+
return None
|
|
712
|
+
|
|
713
|
+
# Extraer información clave
|
|
714
|
+
topics: Set[str] = set()
|
|
715
|
+
files: Set[str] = set()
|
|
716
|
+
user_questions: List[str] = []
|
|
717
|
+
assistant_actions: List[str] = []
|
|
718
|
+
|
|
719
|
+
import re
|
|
720
|
+
|
|
721
|
+
for msg in section_messages:
|
|
722
|
+
content = str(msg.get("content", ""))
|
|
723
|
+
role = msg.get("role", "")
|
|
724
|
+
|
|
725
|
+
# Extraer temas
|
|
726
|
+
words = re.findall(r'\b[A-Z][a-z]+\b', content)
|
|
727
|
+
topics.update(words[:5])
|
|
728
|
+
|
|
729
|
+
# Extraer archivos
|
|
730
|
+
file_matches = re.findall(r'[\w/.-]+\.(py|js|ts|java|go|rs|c|cpp|h|md|yaml|json)', content)
|
|
731
|
+
files.update(file_matches[:5])
|
|
732
|
+
|
|
733
|
+
if role == "user":
|
|
734
|
+
# Extraer pregunta principal
|
|
735
|
+
first_sentence = content.split(".")[0][:100]
|
|
736
|
+
if first_sentence:
|
|
737
|
+
user_questions.append(first_sentence)
|
|
738
|
+
elif role == "assistant":
|
|
739
|
+
# Detectar acciones
|
|
740
|
+
if "write_file" in content or "edit_file" in content:
|
|
741
|
+
assistant_actions.append("modificó archivos")
|
|
742
|
+
elif "exec_cmd" in content:
|
|
743
|
+
assistant_actions.append("ejecutó comandos")
|
|
744
|
+
elif "grep_search" in content or "glob_search" in content:
|
|
745
|
+
assistant_actions.append("buscó en código")
|
|
746
|
+
|
|
747
|
+
# Crear resumen
|
|
748
|
+
summary_parts = []
|
|
749
|
+
|
|
750
|
+
if user_questions:
|
|
751
|
+
summary_parts.append(f"Preguntas: {'; '.join(user_questions[:3])}")
|
|
752
|
+
|
|
753
|
+
if assistant_actions:
|
|
754
|
+
actions = list(set(assistant_actions))
|
|
755
|
+
summary_parts.append(f"Acciones: {', '.join(actions)}")
|
|
756
|
+
|
|
757
|
+
if files:
|
|
758
|
+
summary_parts.append(f"Archivos: {', '.join(list(files)[:5])}")
|
|
759
|
+
|
|
760
|
+
summary = "\n".join(summary_parts)
|
|
761
|
+
|
|
762
|
+
# Estimar tokens del resumen
|
|
763
|
+
summary_tokens = len(summary) // 4 + 10
|
|
764
|
+
|
|
765
|
+
return SectionSummary(
|
|
766
|
+
start_index=section.start_index,
|
|
767
|
+
end_index=section.end_index,
|
|
768
|
+
message_count=section.message_count,
|
|
769
|
+
original_tokens=section.original_tokens,
|
|
770
|
+
summary_tokens=summary_tokens,
|
|
771
|
+
summary=summary,
|
|
772
|
+
topics=list(topics)[:10],
|
|
773
|
+
files_mentioned=list(files)[:10]
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
def _build_context_summary(self) -> str:
|
|
777
|
+
"""Construye un resumen del contexto previo a partir de secciones."""
|
|
778
|
+
if not self._section_summaries:
|
|
779
|
+
return ""
|
|
780
|
+
|
|
781
|
+
parts = ["## Resumen del contexto previo\n"]
|
|
782
|
+
|
|
783
|
+
for i, section in enumerate(self._section_summaries[-5:]): # Últimas 5 secciones
|
|
784
|
+
if section.summary:
|
|
785
|
+
parts.append(f"### Sección {i + 1}")
|
|
786
|
+
parts.append(section.summary)
|
|
787
|
+
parts.append("")
|
|
788
|
+
|
|
789
|
+
# Añadir decisiones arquitectónicas
|
|
790
|
+
if self._architecture_decisions:
|
|
791
|
+
parts.append("### Decisiones arquitectónicas")
|
|
792
|
+
for decision in self._architecture_decisions[-5:]:
|
|
793
|
+
parts.append(f"- **{decision.title}**: {decision.description[:100]}...")
|
|
794
|
+
parts.append("")
|
|
795
|
+
|
|
796
|
+
return "\n".join(parts)
|
|
797
|
+
|
|
798
|
+
def _build_compression_summary(
|
|
799
|
+
self,
|
|
800
|
+
stats: CompressionStats,
|
|
801
|
+
metadata: CompressionMetadata
|
|
802
|
+
) -> str:
|
|
803
|
+
"""Construye un resumen legible de la compresión."""
|
|
804
|
+
lines = [
|
|
805
|
+
f"[Contexto comprimido - ID: {metadata.compression_id}]",
|
|
806
|
+
f"Strategy: {metadata.strategy}",
|
|
807
|
+
f"Tokens: {stats.original_tokens} → {stats.compressed_tokens} ({stats.compression_ratio:.1%} reducción)",
|
|
808
|
+
f"Mensajes: {stats.messages_removed} eliminados, {stats.messages_summarized} resumidos",
|
|
809
|
+
f"Secciones creadas: {metadata.sections_created}",
|
|
810
|
+
f"Decisiones preservadas: {metadata.decisions_preserved}",
|
|
811
|
+
]
|
|
812
|
+
return "\n".join(lines)
|
|
813
|
+
|
|
814
|
+
def _load_decisions(self) -> None:
|
|
815
|
+
"""Carga las decisiones arquitectónicas desde disco."""
|
|
816
|
+
if self._decisions_file.exists():
|
|
817
|
+
try:
|
|
818
|
+
data = json.loads(self._decisions_file.read_text(encoding="utf-8"))
|
|
819
|
+
self._architecture_decisions = [
|
|
820
|
+
ArchitectureDecision(**d) for d in data.get("decisions", [])
|
|
821
|
+
]
|
|
822
|
+
except Exception:
|
|
823
|
+
self._architecture_decisions = []
|
|
824
|
+
|
|
825
|
+
def _save_decisions(self) -> None:
|
|
826
|
+
"""Guarda las decisiones arquitectónicas a disco."""
|
|
827
|
+
try:
|
|
828
|
+
data = {
|
|
829
|
+
"decisions": [asdict(d) for d in self._architecture_decisions],
|
|
830
|
+
"updated": datetime.now().isoformat()
|
|
831
|
+
}
|
|
832
|
+
self._decisions_file.write_text(
|
|
833
|
+
json.dumps(data, indent=2, ensure_ascii=False),
|
|
834
|
+
encoding="utf-8"
|
|
835
|
+
)
|
|
836
|
+
except Exception:
|
|
837
|
+
pass
|