evolutia 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolutia/__init__.py +9 -0
- evolutia/async_llm_providers.py +157 -0
- evolutia/cache/__init__.py +9 -0
- evolutia/cache/exercise_cache.py +226 -0
- evolutia/cache/llm_cache.py +487 -0
- evolutia/complexity_validator.py +33 -31
- evolutia/config_manager.py +53 -40
- evolutia/evolutia_engine.py +341 -66
- evolutia/exam_generator.py +44 -43
- evolutia/exceptions.py +38 -0
- evolutia/exercise_analyzer.py +54 -91
- evolutia/imports.py +175 -0
- evolutia/llm_providers.py +223 -61
- evolutia/material_extractor.py +166 -88
- evolutia/rag/rag_indexer.py +107 -90
- evolutia/rag/rag_retriever.py +130 -103
- evolutia/retry_utils.py +280 -0
- evolutia/utils/json_parser.py +29 -19
- evolutia/utils/markdown_parser.py +185 -159
- evolutia/utils/math_extractor.py +153 -144
- evolutia/validation/__init__.py +1 -0
- evolutia/validation/args_validator.py +253 -0
- evolutia/validation/config_validator.py +502 -0
- evolutia/variation_generator.py +82 -70
- evolutia-0.1.2.dist-info/METADATA +536 -0
- evolutia-0.1.2.dist-info/RECORD +37 -0
- evolutia_cli.py +22 -9
- evolutia-0.1.1.dist-info/METADATA +0 -221
- evolutia-0.1.1.dist-info/RECORD +0 -27
- {evolutia-0.1.1.dist-info → evolutia-0.1.2.dist-info}/WHEEL +0 -0
- {evolutia-0.1.1.dist-info → evolutia-0.1.2.dist-info}/entry_points.txt +0 -0
- {evolutia-0.1.1.dist-info → evolutia-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {evolutia-0.1.1.dist-info → evolutia-0.1.2.dist-info}/top_level.txt +0 -0
evolutia/__init__.py
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
from .evolutia_engine import EvolutiaEngine
|
|
2
2
|
from .variation_generator import VariationGenerator
|
|
3
3
|
from .llm_providers import LLMProvider, get_provider
|
|
4
|
+
from .exceptions import (
|
|
5
|
+
EvolutiaError,
|
|
6
|
+
ConfigurationError,
|
|
7
|
+
ProviderError,
|
|
8
|
+
ValidationError,
|
|
9
|
+
MaterialExtractionError,
|
|
10
|
+
ExamGenerationError,
|
|
11
|
+
RAGError
|
|
12
|
+
)
|
|
4
13
|
|
|
5
14
|
__version__ = "0.1.1"
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Proveedores asíncronos de LLM para EvolutIA.
|
|
3
|
+
Usa asyncio para llamadas concurrentes a APIs de LLM.
|
|
4
|
+
"""
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from typing import Optional, Dict, Any
|
|
9
|
+
from functools import wraps
|
|
10
|
+
|
|
11
|
+
from .retry_utils import retry_async
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def async_sync_wrapper(sync_func):
|
|
17
|
+
"""
|
|
18
|
+
Wrapper para ejecutar funciones síncronas de forma asíncrona usando run_in_executor.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
sync_func: Función síncrona a envolver
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Función asíncrona que ejecuta la función síncrona en un executor
|
|
25
|
+
"""
|
|
26
|
+
@wraps(sync_func)
|
|
27
|
+
async def async_wrapper(*args, **kwargs):
|
|
28
|
+
loop = asyncio.get_event_loop()
|
|
29
|
+
return await loop.run_in_executor(None, sync_func, *args, **kwargs)
|
|
30
|
+
return async_wrapper
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class AsyncLLMProvider(ABC):
|
|
34
|
+
"""Clase base abstracta para proveedores asíncronos de LLM."""
|
|
35
|
+
|
|
36
|
+
DEFAULT_SYSTEM_PROMPT = "Eres un experto en métodos matemáticos para física e ingeniería."
|
|
37
|
+
DEFAULT_MAX_TOKENS = 2000
|
|
38
|
+
DEFAULT_TEMPERATURE = 0.7
|
|
39
|
+
|
|
40
|
+
def __init__(self, model_name: Optional[str] = None):
|
|
41
|
+
"""
|
|
42
|
+
Inicializa el proveedor asíncrono de LLM.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
model_name: Nombre del modelo a usar
|
|
46
|
+
"""
|
|
47
|
+
self.model_name = model_name
|
|
48
|
+
|
|
49
|
+
@abstractmethod
|
|
50
|
+
async def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
51
|
+
"""Genera contenido de forma asíncrona."""
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class AsyncOpenAIProvider(AsyncLLMProvider):
|
|
56
|
+
"""Proveedor asíncrono para OpenAI."""
|
|
57
|
+
|
|
58
|
+
def __init__(self, model_name: Optional[str] = None):
|
|
59
|
+
super().__init__(model_name)
|
|
60
|
+
self.sync_provider = None
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
from evolutia.llm_providers import OpenAIProvider
|
|
64
|
+
self.sync_provider = OpenAIProvider(model_name=model_name)
|
|
65
|
+
logger.debug("[AsyncOpenAIProvider] Inicializado con proveedor síncrono")
|
|
66
|
+
except Exception as e:
|
|
67
|
+
logger.error(f"[AsyncOpenAIProvider] Error inicializando proveedor síncrono: {e}")
|
|
68
|
+
|
|
69
|
+
@retry_async(max_retries=3, initial_delay=1.0, max_delay=10.0)
|
|
70
|
+
async def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
71
|
+
"""Genera contenido usando el proveedor síncrono en un executor."""
|
|
72
|
+
if not self.sync_provider:
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
loop = asyncio.get_event_loop()
|
|
76
|
+
result = await loop.run_in_executor(
|
|
77
|
+
None,
|
|
78
|
+
self.sync_provider.generate_content,
|
|
79
|
+
prompt,
|
|
80
|
+
system_prompt,
|
|
81
|
+
kwargs
|
|
82
|
+
)
|
|
83
|
+
return result
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class AsyncAnthropicProvider(AsyncLLMProvider):
|
|
87
|
+
"""Proveedor asíncrono para Anthropic."""
|
|
88
|
+
|
|
89
|
+
def __init__(self, model_name: Optional[str] = None):
|
|
90
|
+
super().__init__(model_name)
|
|
91
|
+
self.sync_provider = None
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
from evolutia.llm_providers import AnthropicProvider
|
|
95
|
+
self.sync_provider = AnthropicProvider(model_name=model_name)
|
|
96
|
+
logger.debug("[AsyncAnthropicProvider] Inicializado con proveedor síncrono")
|
|
97
|
+
except Exception as e:
|
|
98
|
+
logger.error(f"[AsyncAnthropicProvider] Error inicializando proveedor síncrono: {e}")
|
|
99
|
+
|
|
100
|
+
@retry_async(max_retries=3, initial_delay=1.0, max_delay=10.0)
|
|
101
|
+
async def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
102
|
+
"""Genera contenido usando el proveedor síncrono en un executor."""
|
|
103
|
+
if not self.sync_provider:
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
loop = asyncio.get_event_loop()
|
|
107
|
+
result = await loop.run_in_executor(
|
|
108
|
+
None,
|
|
109
|
+
self.sync_provider.generate_content,
|
|
110
|
+
prompt,
|
|
111
|
+
system_prompt,
|
|
112
|
+
kwargs
|
|
113
|
+
)
|
|
114
|
+
return result
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class AsyncGeminiProvider(AsyncLLMProvider):
|
|
118
|
+
"""Proveedor asíncrono para Gemini."""
|
|
119
|
+
|
|
120
|
+
def __init__(self, model_name: Optional[str] = None):
|
|
121
|
+
super().__init__(model_name)
|
|
122
|
+
self.sync_provider = None
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
from evolutia.llm_providers import GeminiProvider
|
|
126
|
+
self.sync_provider = GeminiProvider(model_name=model_name)
|
|
127
|
+
logger.debug("[AsyncGeminiProvider] Inicializado con proveedor síncrono")
|
|
128
|
+
except Exception as e:
|
|
129
|
+
logger.error(f"[AsyncGeminiProvider] Error inicializando proveedor síncrono: {e}")
|
|
130
|
+
|
|
131
|
+
@retry_async(max_retries=3, initial_delay=1.0, max_delay=10.0)
|
|
132
|
+
async def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
133
|
+
"""Genera contenido usando el proveedor síncrono en un executor."""
|
|
134
|
+
if not self.sync_provider:
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
loop = asyncio.get_event_loop()
|
|
138
|
+
result = await loop.run_in_executor(
|
|
139
|
+
None,
|
|
140
|
+
self.sync_provider.generate_content,
|
|
141
|
+
prompt,
|
|
142
|
+
system_prompt,
|
|
143
|
+
kwargs
|
|
144
|
+
)
|
|
145
|
+
return result
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def get_async_provider(provider_name: str, **kwargs) -> AsyncLLMProvider:
|
|
149
|
+
"""Factory method para obtener un proveedor asíncrono."""
|
|
150
|
+
if provider_name == "openai":
|
|
151
|
+
return AsyncOpenAIProvider(**kwargs)
|
|
152
|
+
elif provider_name == "anthropic":
|
|
153
|
+
return AsyncAnthropicProvider(**kwargs)
|
|
154
|
+
elif provider_name == "gemini":
|
|
155
|
+
return AsyncGeminiProvider(**kwargs)
|
|
156
|
+
else:
|
|
157
|
+
raise ValueError(f"Proveedor asíncrono desconocido: {provider_name}")
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Paquete de caché para EvolutIA.
|
|
3
|
+
Proporciona sistemas de caché para respuestas LLM, análisis de ejercicios y embeddings.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .llm_cache import LLMCache
|
|
7
|
+
from .exercise_cache import ExerciseAnalysisCache
|
|
8
|
+
|
|
9
|
+
__all__ = ['LLMCache', 'ExerciseAnalysisCache']
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Caché de análisis de ejercicios para EvolutIA.
|
|
3
|
+
Reduce tiempo de ejecución almacenando análisis de ejercicios.
|
|
4
|
+
"""
|
|
5
|
+
import hashlib
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Optional, Dict
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ExerciseAnalysisCache:
|
|
15
|
+
"""
|
|
16
|
+
Sistema de caché para análisis de ejercicios.
|
|
17
|
+
|
|
18
|
+
Características:
|
|
19
|
+
- Persistencia en disco por defecto
|
|
20
|
+
- Basado en hash del contenido del ejercicio
|
|
21
|
+
- Valida integridad del caché
|
|
22
|
+
- Logging de cache hits y misses
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, cache_dir: Optional[Path] = None, enabled: bool = True):
|
|
26
|
+
"""
|
|
27
|
+
Inicializa el caché de análisis de ejercicios.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
cache_dir: Directorio para caché (defecto: ./storage/cache/exercises)
|
|
31
|
+
enabled: Si False, el caché está deshabilitado (pasa a través)
|
|
32
|
+
"""
|
|
33
|
+
self.enabled = enabled
|
|
34
|
+
|
|
35
|
+
if cache_dir is None:
|
|
36
|
+
cache_dir = Path('./storage/cache/exercises')
|
|
37
|
+
self.cache_dir = Path(cache_dir)
|
|
38
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
39
|
+
|
|
40
|
+
self.hits = 0
|
|
41
|
+
self.misses = 0
|
|
42
|
+
|
|
43
|
+
if self.enabled:
|
|
44
|
+
logger.info(f"[ExerciseAnalysisCache] Inicializado: {self.cache_dir}")
|
|
45
|
+
else:
|
|
46
|
+
logger.info("[ExerciseAnalysisCache] Deshabilitado")
|
|
47
|
+
|
|
48
|
+
def _get_cache_file(self, content_hash: str) -> Path:
|
|
49
|
+
"""
|
|
50
|
+
Obtiene la ruta del archivo de caché para un hash.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
content_hash: Hash del contenido del ejercicio
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Ruta del archivo de caché
|
|
57
|
+
"""
|
|
58
|
+
return self.cache_dir / f"{content_hash}.json"
|
|
59
|
+
|
|
60
|
+
def _hash_content(self, content: str) -> str:
|
|
61
|
+
"""
|
|
62
|
+
Genera un hash SHA256 del contenido.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
content: Contenido del ejercicio
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Hash SHA256 hexadecimal
|
|
69
|
+
"""
|
|
70
|
+
return hashlib.sha256(content.encode()).hexdigest()
|
|
71
|
+
|
|
72
|
+
def get(self, exercise: Dict) -> Optional[Dict]:
|
|
73
|
+
"""
|
|
74
|
+
Obtiene el análisis cacheado de un ejercicio.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
exercise: Diccionario del ejercicio con campo 'content'
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Análisis cacheado si existe, None en caso contrario
|
|
81
|
+
"""
|
|
82
|
+
if not self.enabled:
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
content = exercise.get('content', '')
|
|
86
|
+
if not content:
|
|
87
|
+
self.misses += 1
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
content_hash = self._hash_content(content)
|
|
91
|
+
cache_file = self._get_cache_file(content_hash)
|
|
92
|
+
|
|
93
|
+
if not cache_file.exists():
|
|
94
|
+
self.misses += 1
|
|
95
|
+
logger.debug("[ExerciseAnalysisCache] Cache miss")
|
|
96
|
+
return None
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
with open(cache_file, 'r', encoding='utf-8') as f:
|
|
100
|
+
analysis = json.load(f)
|
|
101
|
+
|
|
102
|
+
self.hits += 1
|
|
103
|
+
logger.info(
|
|
104
|
+
f"[ExerciseAnalysisCache] Cache HIT "
|
|
105
|
+
f"(hit_rate={self.hit_rate:.1%})"
|
|
106
|
+
)
|
|
107
|
+
return analysis
|
|
108
|
+
except Exception as e:
|
|
109
|
+
logger.warning(f"[ExerciseAnalysisCache] Error leyendo caché: {e}")
|
|
110
|
+
self.misses += 1
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
def put(self, exercise: Dict, analysis: Dict) -> bool:
|
|
114
|
+
"""
|
|
115
|
+
Almacena el análisis de un ejercicio en caché.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
exercise: Diccionario del ejercicio con campo 'content'
|
|
119
|
+
analysis: Análisis del ejercicio
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
True si se almacenó exitosamente, False si hubo error
|
|
123
|
+
"""
|
|
124
|
+
if not self.enabled:
|
|
125
|
+
return False
|
|
126
|
+
|
|
127
|
+
content = exercise.get('content', '')
|
|
128
|
+
if not content:
|
|
129
|
+
logger.debug("[ExerciseAnalysisCache] Rechazando ejercicio sin contenido")
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
# Validar que el análisis tiene los campos mínimos
|
|
133
|
+
required_fields = ['total_complexity']
|
|
134
|
+
if not all(field in analysis for field in required_fields):
|
|
135
|
+
logger.warning("[ExerciseAnalysisCache] Análisis incompleto, no cachéando")
|
|
136
|
+
return False
|
|
137
|
+
|
|
138
|
+
content_hash = self._hash_content(content)
|
|
139
|
+
cache_file = self._get_cache_file(content_hash)
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
# Almacenar análisis con metadata
|
|
143
|
+
cache_data = {
|
|
144
|
+
'analysis': analysis,
|
|
145
|
+
'metadata': {
|
|
146
|
+
'cached_at': None, # Se llenará después
|
|
147
|
+
'content_length': len(content),
|
|
148
|
+
'exercise_label': exercise.get('label', 'unknown')
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
import time
|
|
153
|
+
cache_data['metadata']['cached_at'] = time.time()
|
|
154
|
+
|
|
155
|
+
with open(cache_file, 'w', encoding='utf-8') as f:
|
|
156
|
+
json.dump(cache_data, f, indent=2)
|
|
157
|
+
|
|
158
|
+
logger.debug(f"[ExerciseAnalysisCache] Análisis cachéado: {cache_file.name}")
|
|
159
|
+
return True
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.warning(f"[ExerciseAnalysisCache] Error guardando caché: {e}")
|
|
162
|
+
return False
|
|
163
|
+
|
|
164
|
+
def clear(self):
|
|
165
|
+
"""Limpia todo el caché de ejercicios."""
|
|
166
|
+
if not self.enabled:
|
|
167
|
+
return
|
|
168
|
+
|
|
169
|
+
initial_count = len(list(self.cache_dir.glob('*.json')))
|
|
170
|
+
|
|
171
|
+
for cache_file in self.cache_dir.glob('*.json'):
|
|
172
|
+
try:
|
|
173
|
+
cache_file.unlink()
|
|
174
|
+
except Exception as e:
|
|
175
|
+
logger.warning(f"[ExerciseAnalysisCache] Error eliminando {cache_file}: {e}")
|
|
176
|
+
|
|
177
|
+
self.hits = 0
|
|
178
|
+
self.misses = 0
|
|
179
|
+
|
|
180
|
+
logger.info(f"[ExerciseAnalysisCache] Caché limpiado (eliminados {initial_count} archivos)")
|
|
181
|
+
|
|
182
|
+
def get_stats(self) -> Dict[str, any]:
|
|
183
|
+
"""
|
|
184
|
+
Obtiene estadísticas del caché.
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
Diccionario con estadísticas
|
|
188
|
+
"""
|
|
189
|
+
cache_files = list(self.cache_dir.glob('*.json'))
|
|
190
|
+
total_requests = self.hits + self.misses
|
|
191
|
+
hit_rate = self.hits / total_requests if total_requests > 0 else 0.0
|
|
192
|
+
|
|
193
|
+
return {
|
|
194
|
+
'entries': len(cache_files),
|
|
195
|
+
'hits': self.hits,
|
|
196
|
+
'misses': self.misses,
|
|
197
|
+
'hit_rate': hit_rate,
|
|
198
|
+
'enabled': self.enabled,
|
|
199
|
+
'cache_dir': str(self.cache_dir)
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def hit_rate(self) -> float:
|
|
204
|
+
"""
|
|
205
|
+
Tasa de aciertos del caché.
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
Proporción de aciertos (0.0 a 1.0)
|
|
209
|
+
"""
|
|
210
|
+
total_requests = self.hits + self.misses
|
|
211
|
+
return self.hits / total_requests if total_requests > 0 else 0.0
|
|
212
|
+
|
|
213
|
+
def __len__(self) -> int:
|
|
214
|
+
"""Retorna el número de entradas en caché."""
|
|
215
|
+
if not self.enabled:
|
|
216
|
+
return 0
|
|
217
|
+
return len(list(self.cache_dir.glob('*.json')))
|
|
218
|
+
|
|
219
|
+
def __repr__(self) -> str:
|
|
220
|
+
"""Representación del caché."""
|
|
221
|
+
status = "enabled" if self.enabled else "disabled"
|
|
222
|
+
return (
|
|
223
|
+
f"ExerciseAnalysisCache(status={status}, "
|
|
224
|
+
f"entries={len(self)}, hits={self.hits}, "
|
|
225
|
+
f"misses={self.misses}, hit_rate={self.hit_rate:.1%})"
|
|
226
|
+
)
|