evolutia 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolutia/__init__.py +10 -1
- evolutia/async_llm_providers.py +157 -0
- evolutia/cache/__init__.py +9 -0
- evolutia/cache/exercise_cache.py +226 -0
- evolutia/cache/llm_cache.py +487 -0
- evolutia/complexity_validator.py +33 -31
- evolutia/config_manager.py +60 -41
- evolutia/evolutia_engine.py +341 -66
- evolutia/exam_generator.py +44 -43
- evolutia/exceptions.py +38 -0
- evolutia/exercise_analyzer.py +54 -91
- evolutia/imports.py +175 -0
- evolutia/llm_providers.py +224 -60
- evolutia/material_extractor.py +166 -88
- evolutia/rag/rag_indexer.py +107 -90
- evolutia/rag/rag_retriever.py +130 -103
- evolutia/retry_utils.py +280 -0
- evolutia/utils/json_parser.py +29 -19
- evolutia/utils/markdown_parser.py +185 -159
- evolutia/utils/math_extractor.py +153 -144
- evolutia/validation/__init__.py +1 -0
- evolutia/validation/args_validator.py +253 -0
- evolutia/validation/config_validator.py +502 -0
- evolutia/variation_generator.py +252 -50
- evolutia-0.1.2.dist-info/METADATA +536 -0
- evolutia-0.1.2.dist-info/RECORD +37 -0
- {evolutia-0.1.0.dist-info → evolutia-0.1.2.dist-info}/WHEEL +1 -1
- {evolutia-0.1.0.dist-info → evolutia-0.1.2.dist-info}/licenses/LICENSE +1 -1
- evolutia_cli.py +30 -7
- evolutia-0.1.0.dist-info/METADATA +0 -723
- evolutia-0.1.0.dist-info/RECORD +0 -27
- {evolutia-0.1.0.dist-info → evolutia-0.1.2.dist-info}/entry_points.txt +0 -0
- {evolutia-0.1.0.dist-info → evolutia-0.1.2.dist-info}/top_level.txt +0 -0
evolutia/llm_providers.py
CHANGED
|
@@ -4,21 +4,38 @@ Módulo que define los proveedores de LLM abstractos y concretos.
|
|
|
4
4
|
import os
|
|
5
5
|
import logging
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
|
-
from typing import Optional, Dict, Any
|
|
7
|
+
from typing import Optional, Dict, Any, Union, TYPE_CHECKING
|
|
8
8
|
|
|
9
9
|
logger = logging.getLogger(__name__)
|
|
10
10
|
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from evolutia.cache.llm_cache import LLMCache
|
|
13
|
+
|
|
11
14
|
class LLMProvider(ABC):
|
|
12
15
|
"""Clase base abstracta para proveedores de LLM."""
|
|
13
16
|
|
|
14
|
-
|
|
17
|
+
DEFAULT_SYSTEM_PROMPT = "Eres un experto en métodos matemáticos para física e ingeniería."
|
|
18
|
+
DEFAULT_MAX_TOKENS = 2000
|
|
19
|
+
DEFAULT_TEMPERATURE = 0.7
|
|
20
|
+
|
|
21
|
+
def __init__(self, model_name: Optional[str] = None, cache: Optional['LLMCache'] = None):
|
|
22
|
+
"""
|
|
23
|
+
Inicializa el proveedor de LLM.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
model_name: Nombre del modelo a usar
|
|
27
|
+
cache: Instancia opcional de LLMCache para cachear respuestas
|
|
28
|
+
"""
|
|
15
29
|
self.model_name = model_name
|
|
30
|
+
self.client = None
|
|
31
|
+
self.genai = None
|
|
32
|
+
self.cache = cache
|
|
16
33
|
self.api_key = self._get_api_key()
|
|
17
34
|
if self.api_key:
|
|
18
35
|
self._setup_client()
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
36
|
+
|
|
37
|
+
if self.cache:
|
|
38
|
+
logger.debug(f"[LLMProvider] Caché de LLM habilitado")
|
|
22
39
|
|
|
23
40
|
@abstractmethod
|
|
24
41
|
def _get_api_key(self) -> Optional[str]:
|
|
@@ -35,30 +52,86 @@ class LLMProvider(ABC):
|
|
|
35
52
|
"""Genera contenido a partir de un prompt."""
|
|
36
53
|
pass
|
|
37
54
|
|
|
55
|
+
def _get_provider_name(self) -> str:
|
|
56
|
+
"""
|
|
57
|
+
Obtiene el nombre del proveedor para usar en el caché.
|
|
38
58
|
|
|
39
|
-
|
|
40
|
-
|
|
59
|
+
Returns:
|
|
60
|
+
Nombre del proveedor (ej: 'OpenAI', 'Anthropic')
|
|
61
|
+
"""
|
|
62
|
+
return self.__class__.__name__.replace('Provider', '')
|
|
41
63
|
|
|
42
|
-
def _get_api_key(self) -> Optional[str]:
|
|
43
|
-
key = os.getenv("OPENAI_API_KEY")
|
|
44
|
-
if not key:
|
|
45
|
-
logger.warning("OPENAI_API_KEY no encontrada")
|
|
46
|
-
return key
|
|
47
64
|
|
|
48
|
-
|
|
65
|
+
class OpenAICompatibleProvider(LLMProvider):
|
|
66
|
+
"""Base clase para proveedores compatibles con OpenAI API."""
|
|
67
|
+
|
|
68
|
+
def __init__(self, model_name: Optional[str] = None, base_url: Optional[str] = None, timeout: Optional[float] = None, cache: Optional['LLMCache'] = None):
|
|
69
|
+
"""
|
|
70
|
+
Inicializa el proveedor OpenAI-compatible.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
model_name: Nombre del modelo a usar
|
|
74
|
+
base_url: URL base de la API (para proveedores compatibles)
|
|
75
|
+
timeout: Timeout para las llamadas a la API
|
|
76
|
+
cache: Instancia opcional de LLMCache
|
|
77
|
+
"""
|
|
78
|
+
self.base_url = base_url
|
|
79
|
+
self.timeout = timeout
|
|
80
|
+
super().__init__(model_name, cache=cache)
|
|
81
|
+
|
|
82
|
+
def _setup_openai_client(self, api_key: Optional[str], base_url: Optional[str] = None, timeout: Optional[float] = None) -> bool:
|
|
83
|
+
"""
|
|
84
|
+
Configura cliente OpenAI compartido.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
True si la configuración fue exitosa
|
|
88
|
+
False si no se pudo configurar (cliente no inicializado)
|
|
89
|
+
"""
|
|
90
|
+
if not api_key:
|
|
91
|
+
return False
|
|
49
92
|
try:
|
|
50
93
|
from openai import OpenAI
|
|
51
|
-
|
|
94
|
+
client_kwargs = {"api_key": api_key}
|
|
95
|
+
if base_url:
|
|
96
|
+
client_kwargs["base_url"] = base_url
|
|
97
|
+
if timeout is not None:
|
|
98
|
+
client_kwargs["timeout"] = timeout
|
|
99
|
+
self.client = OpenAI(**client_kwargs)
|
|
100
|
+
logger.info(f"[OpenAICompatibleProvider] Cliente OpenAI inicializado (base_url={base_url}, timeout={timeout})")
|
|
101
|
+
return True
|
|
52
102
|
except ImportError:
|
|
53
|
-
logger.error("Biblioteca openai no instalada. Instala con: pip install openai")
|
|
103
|
+
logger.error("[OpenAICompatibleProvider] Biblioteca openai no instalada. Instala con: pip install openai")
|
|
104
|
+
self.client = None
|
|
105
|
+
return False
|
|
106
|
+
except Exception as e:
|
|
107
|
+
logger.error(f"[OpenAICompatibleProvider] Error inesperado inicializando cliente OpenAI: {e}")
|
|
54
108
|
self.client = None
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
def _openai_generate_content(self, provider_name: str, default_model: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
112
|
+
"""
|
|
113
|
+
Genera contenido usando API OpenAI-compatible con caché.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Contenido generado si la llamada fue exitosa
|
|
117
|
+
None si hubo un error de API o configuración
|
|
118
|
+
"""
|
|
119
|
+
if not self.client:
|
|
120
|
+
logger.error(f"[{provider_name}] Cliente no inicializado, no se puede generar contenido")
|
|
121
|
+
return None
|
|
55
122
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
123
|
+
system_content = system_prompt or self.DEFAULT_SYSTEM_PROMPT
|
|
124
|
+
model = self.model_name or default_model
|
|
125
|
+
prompt = kwargs.get("prompt", "")
|
|
126
|
+
|
|
127
|
+
# Intentar caché primero
|
|
128
|
+
if self.cache:
|
|
129
|
+
cached = self.cache.get(prompt, self._get_provider_name(), model)
|
|
130
|
+
if cached:
|
|
131
|
+
logger.info(f"[{provider_name}] Contenido obtenido del caché (modelo={model})")
|
|
132
|
+
return cached
|
|
133
|
+
|
|
134
|
+
# Generar respuesta
|
|
62
135
|
try:
|
|
63
136
|
response = self.client.chat.completions.create(
|
|
64
137
|
model=model,
|
|
@@ -66,15 +139,44 @@ class OpenAIProvider(LLMProvider):
|
|
|
66
139
|
{"role": "system", "content": system_content},
|
|
67
140
|
{"role": "user", "content": prompt}
|
|
68
141
|
],
|
|
69
|
-
temperature=kwargs.get("temperature",
|
|
70
|
-
max_tokens=kwargs.get("max_tokens",
|
|
142
|
+
temperature=kwargs.get("temperature", self.DEFAULT_TEMPERATURE),
|
|
143
|
+
max_tokens=kwargs.get("max_tokens", self.DEFAULT_MAX_TOKENS)
|
|
71
144
|
)
|
|
72
|
-
|
|
145
|
+
content = response.choices[0].message.content.strip()
|
|
146
|
+
logger.info(f"[{provider_name}] Contenido generado exitosamente (modelo={model}, longitud={len(content)})")
|
|
147
|
+
|
|
148
|
+
# Guardar en caché
|
|
149
|
+
if self.cache:
|
|
150
|
+
metadata = {
|
|
151
|
+
'provider': provider_name,
|
|
152
|
+
'model': model,
|
|
153
|
+
'temperature': kwargs.get("temperature", self.DEFAULT_TEMPERATURE),
|
|
154
|
+
'max_tokens': kwargs.get("max_tokens", self.DEFAULT_MAX_TOKENS)
|
|
155
|
+
}
|
|
156
|
+
self.cache.put(prompt, provider_name, model, content, metadata=metadata)
|
|
157
|
+
|
|
158
|
+
return content
|
|
73
159
|
except Exception as e:
|
|
74
|
-
logger.error(f"Error
|
|
160
|
+
logger.error(f"[{provider_name}] Error en llamada a API: {e}")
|
|
75
161
|
return None
|
|
76
162
|
|
|
77
163
|
|
|
164
|
+
class OpenAIProvider(OpenAICompatibleProvider):
|
|
165
|
+
"""Proveedor para OpenAI."""
|
|
166
|
+
|
|
167
|
+
def _get_api_key(self) -> Optional[str]:
|
|
168
|
+
key = os.getenv("OPENAI_API_KEY")
|
|
169
|
+
if not key:
|
|
170
|
+
logger.warning("OPENAI_API_KEY no encontrada")
|
|
171
|
+
return key
|
|
172
|
+
|
|
173
|
+
def _setup_client(self):
|
|
174
|
+
self._setup_openai_client(self.api_key)
|
|
175
|
+
|
|
176
|
+
def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
177
|
+
return self._openai_generate_content("OpenAI", "gpt-4", system_prompt, prompt=prompt, **kwargs)
|
|
178
|
+
|
|
179
|
+
|
|
78
180
|
class AnthropicProvider(LLMProvider):
|
|
79
181
|
"""Proveedor para Anthropic (Claude)."""
|
|
80
182
|
|
|
@@ -97,7 +199,16 @@ class AnthropicProvider(LLMProvider):
|
|
|
97
199
|
|
|
98
200
|
system_content = system_prompt or "Eres un experto en métodos matemáticos para física e ingeniería."
|
|
99
201
|
model = self.model_name or "claude-3-opus-20240229"
|
|
202
|
+
provider_name = self._get_provider_name()
|
|
203
|
+
|
|
204
|
+
# Intentar caché primero
|
|
205
|
+
if self.cache:
|
|
206
|
+
cached = self.cache.get(prompt, provider_name, model)
|
|
207
|
+
if cached:
|
|
208
|
+
logger.info(f"[{provider_name}] Contenido obtenido del caché (modelo={model})")
|
|
209
|
+
return cached
|
|
100
210
|
|
|
211
|
+
# Generar respuesta
|
|
101
212
|
try:
|
|
102
213
|
message = self.client.messages.create(
|
|
103
214
|
model=model,
|
|
@@ -106,9 +217,22 @@ class AnthropicProvider(LLMProvider):
|
|
|
106
217
|
system=system_content,
|
|
107
218
|
messages=[{"role": "user", "content": prompt}]
|
|
108
219
|
)
|
|
109
|
-
|
|
220
|
+
content = message.content[0].text.strip()
|
|
221
|
+
logger.info(f"[{provider_name}] Contenido generado exitosamente (modelo={model}, longitud={len(content)})")
|
|
222
|
+
|
|
223
|
+
# Guardar en caché
|
|
224
|
+
if self.cache:
|
|
225
|
+
metadata = {
|
|
226
|
+
'provider': provider_name,
|
|
227
|
+
'model': model,
|
|
228
|
+
'temperature': kwargs.get("temperature", 0.7),
|
|
229
|
+
'max_tokens': kwargs.get("max_tokens", 2000)
|
|
230
|
+
}
|
|
231
|
+
self.cache.put(prompt, provider_name, model, content, metadata=metadata)
|
|
232
|
+
|
|
233
|
+
return content
|
|
110
234
|
except Exception as e:
|
|
111
|
-
logger.error(f"Error llamando a Anthropic API: {e}")
|
|
235
|
+
logger.error(f"[{provider_name}] Error llamando a Anthropic API: {e}")
|
|
112
236
|
return None
|
|
113
237
|
|
|
114
238
|
|
|
@@ -135,6 +259,14 @@ class GeminiProvider(LLMProvider):
|
|
|
135
259
|
|
|
136
260
|
model_name = self.model_name or "gemini-2.5-pro"
|
|
137
261
|
if model_name == 'gemini': model_name = "gemini-2.5-pro"
|
|
262
|
+
provider_name = self._get_provider_name()
|
|
263
|
+
|
|
264
|
+
# Intentar caché primero
|
|
265
|
+
if self.cache:
|
|
266
|
+
cached = self.cache.get(prompt, provider_name, model_name)
|
|
267
|
+
if cached:
|
|
268
|
+
logger.info(f"[{provider_name}] Contenido obtenido del caché (modelo={model_name})")
|
|
269
|
+
return cached
|
|
138
270
|
|
|
139
271
|
generation_config = {
|
|
140
272
|
"temperature": kwargs.get("temperature", 0.7),
|
|
@@ -144,6 +276,7 @@ class GeminiProvider(LLMProvider):
|
|
|
144
276
|
"response_mime_type": "text/plain",
|
|
145
277
|
}
|
|
146
278
|
|
|
279
|
+
# Generar respuesta
|
|
147
280
|
try:
|
|
148
281
|
model_instance = self.genai.GenerativeModel(
|
|
149
282
|
model_name=model_name,
|
|
@@ -153,54 +286,81 @@ class GeminiProvider(LLMProvider):
|
|
|
153
286
|
system_instruction=system_prompt
|
|
154
287
|
)
|
|
155
288
|
response = model_instance.generate_content(prompt)
|
|
156
|
-
|
|
289
|
+
content = response.text
|
|
290
|
+
logger.info(f"[{provider_name}] Contenido generado exitosamente (modelo={model_name}, longitud={len(content)})")
|
|
291
|
+
|
|
292
|
+
# Guardar en caché
|
|
293
|
+
if self.cache:
|
|
294
|
+
metadata = {
|
|
295
|
+
'provider': provider_name,
|
|
296
|
+
'model': model_name,
|
|
297
|
+
'temperature': kwargs.get("temperature", 0.7),
|
|
298
|
+
'max_tokens': kwargs.get("max_tokens", 8192)
|
|
299
|
+
}
|
|
300
|
+
self.cache.put(prompt, provider_name, model_name, content, metadata=metadata)
|
|
301
|
+
|
|
302
|
+
return content
|
|
157
303
|
except Exception as e:
|
|
158
|
-
logger.error(f"Error llamando a Gemini API: {e}")
|
|
304
|
+
logger.error(f"[{provider_name}] Error llamando a Gemini API: {e}")
|
|
159
305
|
return None
|
|
160
306
|
|
|
161
307
|
|
|
162
|
-
class LocalProvider(
|
|
308
|
+
class LocalProvider(OpenAICompatibleProvider):
|
|
163
309
|
"""Proveedor para modelos locales (Ollama/LM Studio) vía OpenAI compatible API."""
|
|
164
310
|
|
|
165
|
-
def __init__(self, model_name: Optional[str] = None, base_url: str = "http://localhost:11434/v1"):
|
|
166
|
-
|
|
167
|
-
super().__init__(model_name)
|
|
311
|
+
def __init__(self, model_name: Optional[str] = None, base_url: str = "http://localhost:11434/v1", cache: Optional['LLMCache'] = None):
|
|
312
|
+
super().__init__(model_name, base_url=base_url, timeout=300.0, cache=cache)
|
|
168
313
|
|
|
169
314
|
def _get_api_key(self) -> Optional[str]:
|
|
170
315
|
return "not-needed"
|
|
171
316
|
|
|
172
317
|
def _setup_client(self):
|
|
173
|
-
|
|
174
|
-
from openai import OpenAI
|
|
175
|
-
self.client = OpenAI(
|
|
176
|
-
base_url=self.base_url,
|
|
177
|
-
api_key=self.api_key,
|
|
178
|
-
timeout=300.0
|
|
179
|
-
)
|
|
180
|
-
except ImportError:
|
|
181
|
-
logger.error("Biblioteca openai no instalada")
|
|
182
|
-
self.client = None
|
|
318
|
+
self._setup_openai_client(self.api_key, base_url=self.base_url, timeout=self.timeout)
|
|
183
319
|
|
|
184
320
|
def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
185
|
-
|
|
321
|
+
return self._openai_generate_content("Local", "llama3", system_prompt, prompt=prompt, **kwargs)
|
|
186
322
|
|
|
187
|
-
system_content = system_prompt or "Eres un experto en métodos matemáticos para física e ingeniería."
|
|
188
|
-
model = self.model_name or "llama3"
|
|
189
323
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
)
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
324
|
+
class DeepSeekProvider(OpenAICompatibleProvider):
|
|
325
|
+
"""Proveedor para DeepSeek (OpenAI-compatible)."""
|
|
326
|
+
|
|
327
|
+
def __init__(self, model_name: Optional[str] = None, cache: Optional['LLMCache'] = None):
|
|
328
|
+
super().__init__(model_name, base_url="https://api.deepseek.com", cache=cache)
|
|
329
|
+
|
|
330
|
+
def _get_api_key(self) -> Optional[str]:
|
|
331
|
+
key = os.getenv("DEEPSEEK_API_KEY")
|
|
332
|
+
if not key:
|
|
333
|
+
logger.warning("DEEPSEEK_API_KEY no encontrada")
|
|
334
|
+
return key
|
|
335
|
+
|
|
336
|
+
def _setup_client(self):
|
|
337
|
+
self._setup_openai_client(self.api_key, base_url=self.base_url)
|
|
338
|
+
|
|
339
|
+
def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
340
|
+
return self._openai_generate_content("DeepSeek", "deepseek-chat", system_prompt, prompt=prompt, **kwargs)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
class GenericProvider(OpenAICompatibleProvider):
|
|
344
|
+
"""Proveedor Genérico Compatible con OpenAI (Groq, Mistral, etc)."""
|
|
345
|
+
|
|
346
|
+
def __init__(self, model_name: Optional[str] = None, base_url: Optional[str] = None, cache: Optional['LLMCache'] = None):
|
|
347
|
+
super().__init__(model_name, base_url=base_url or os.getenv("GENERIC_BASE_URL"), cache=cache)
|
|
348
|
+
|
|
349
|
+
def _get_api_key(self) -> Optional[str]:
|
|
350
|
+
key = os.getenv("GENERIC_API_KEY")
|
|
351
|
+
if not key:
|
|
352
|
+
logger.warning("GENERIC_API_KEY no encontrada")
|
|
353
|
+
return key
|
|
354
|
+
|
|
355
|
+
def _setup_client(self):
|
|
356
|
+
if not self.base_url:
|
|
357
|
+
logger.warning("GENERIC_BASE_URL no definida")
|
|
358
|
+
self._setup_openai_client(self.api_key, base_url=self.base_url)
|
|
359
|
+
|
|
360
|
+
def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
361
|
+
default_system_prompt = self.DEFAULT_SYSTEM_PROMPT
|
|
362
|
+
default_model = os.getenv("GENERIC_MODEL") or "gpt-3.5-turbo"
|
|
363
|
+
return self._openai_generate_content("Generic", default_model, system_prompt or default_system_prompt, prompt=prompt, **kwargs)
|
|
204
364
|
|
|
205
365
|
|
|
206
366
|
def get_provider(provider_name: str, **kwargs) -> LLMProvider:
|
|
@@ -213,5 +373,9 @@ def get_provider(provider_name: str, **kwargs) -> LLMProvider:
|
|
|
213
373
|
return GeminiProvider(**kwargs)
|
|
214
374
|
elif provider_name == "local":
|
|
215
375
|
return LocalProvider(**kwargs)
|
|
376
|
+
elif provider_name == "deepseek":
|
|
377
|
+
return DeepSeekProvider(**kwargs)
|
|
378
|
+
elif provider_name == "generic":
|
|
379
|
+
return GenericProvider(**kwargs)
|
|
216
380
|
else:
|
|
217
381
|
raise ValueError(f"Proveedor desconocido: {provider_name}")
|
evolutia/material_extractor.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Extractor de materiales didácticos.
|
|
3
|
-
Lee y parsea archivos Markdown de lecturas, prácticas y tareas.
|
|
4
|
-
"""
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from typing import Dict, List, Optional
|
|
7
|
-
import logging
|
|
1
|
+
"""
|
|
2
|
+
Extractor de materiales didácticos.
|
|
3
|
+
Lee y parsea archivos Markdown de lecturas, prácticas y tareas.
|
|
4
|
+
"""
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, List, Optional, Union
|
|
7
|
+
import logging
|
|
8
|
+
import time
|
|
8
9
|
|
|
9
10
|
try:
|
|
10
11
|
from utils.markdown_parser import (
|
|
@@ -27,31 +28,43 @@ except ImportError:
|
|
|
27
28
|
logger = logging.getLogger(__name__)
|
|
28
29
|
|
|
29
30
|
|
|
30
|
-
class MaterialExtractor:
|
|
31
|
-
"""Extrae ejercicios y soluciones de materiales didácticos."""
|
|
31
|
+
class MaterialExtractor:
|
|
32
|
+
"""Extrae ejercicios y soluciones de materiales didácticos."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, base_path: Union[Path, str]):
|
|
35
|
+
"""
|
|
36
|
+
Inicializa el extractor.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
base_path: Ruta base del proyecto (donde están los directorios de temas)
|
|
40
|
+
"""
|
|
41
|
+
self.base_path = Path(base_path)
|
|
42
|
+
self.exercises = []
|
|
43
|
+
self.solutions = []
|
|
44
|
+
# Caché de rutas de archivos válidos para evitar escaneos repetidos
|
|
45
|
+
self._file_cache: Dict[Path, Dict] = {}
|
|
46
|
+
# Timestamp del último escaneo
|
|
47
|
+
self._last_scan_timestamp: float = 0
|
|
48
|
+
# TTL del caché en segundos (5 minutos)
|
|
49
|
+
self._cache_ttl = 300
|
|
32
50
|
|
|
33
|
-
def
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
Args:
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
Returns:
|
|
52
|
-
Diccionario con ejercicios y soluciones extraídos
|
|
53
|
-
"""
|
|
54
|
-
try:
|
|
51
|
+
def extract_from_file(self, file_path: Path, use_cache: bool = True) -> Dict:
|
|
52
|
+
"""
|
|
53
|
+
Extrae ejercicios y soluciones de un archivo Markdown.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
file_path: Ruta al archivo
|
|
57
|
+
use_cache: Si True, usa el caché si es válido (default: True)
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Diccionario con ejercicios y soluciones extraídos
|
|
61
|
+
"""
|
|
62
|
+
# Verificar caché primero
|
|
63
|
+
if use_cache and self._is_cache_valid(file_path):
|
|
64
|
+
logger.debug(f"[MaterialExtractor] Usando caché para {file_path.name}")
|
|
65
|
+
return self._file_cache[file_path]['data']
|
|
66
|
+
|
|
67
|
+
try:
|
|
55
68
|
content = read_markdown_file(file_path)
|
|
56
69
|
frontmatter, content_body = extract_frontmatter(content)
|
|
57
70
|
|
|
@@ -65,62 +78,81 @@ class MaterialExtractor:
|
|
|
65
78
|
exercise['include_path'],
|
|
66
79
|
file_path.parent
|
|
67
80
|
)
|
|
68
|
-
if include_path.exists():
|
|
69
|
-
exercise['resolved_content'] = read_markdown_file(include_path)
|
|
70
|
-
else:
|
|
71
|
-
logger.warning(f"Include no encontrado: {include_path}")
|
|
72
|
-
exercise['resolved_content'] = exercise['content']
|
|
73
|
-
else:
|
|
74
|
-
exercise['resolved_content'] = exercise['content']
|
|
75
|
-
|
|
76
|
-
# Resolver includes de soluciones
|
|
77
|
-
for solution in solutions:
|
|
78
|
-
resolved_content_parts = []
|
|
79
|
-
for include_path_str in solution['include_paths']:
|
|
80
|
-
include_path = resolve_include_path(
|
|
81
|
-
include_path_str,
|
|
82
|
-
file_path.parent
|
|
83
|
-
)
|
|
84
|
-
if include_path.exists():
|
|
85
|
-
resolved_content_parts.append(read_markdown_file(include_path))
|
|
86
|
-
else:
|
|
87
|
-
logger.warning(f"Include no encontrado: {include_path}")
|
|
88
|
-
|
|
89
|
-
if resolved_content_parts:
|
|
90
|
-
solution['resolved_content'] = '\n\n---\n\n'.join(resolved_content_parts)
|
|
91
|
-
else:
|
|
92
|
-
solution['resolved_content'] = solution['content']
|
|
93
|
-
|
|
94
|
-
return {
|
|
95
|
-
'file_path': file_path,
|
|
96
|
-
'frontmatter': frontmatter,
|
|
97
|
-
'exercises': exercises,
|
|
98
|
-
'solutions': solutions,
|
|
99
|
-
'content_body': content_body # Exponer contenido para indexación de lecturas
|
|
100
|
-
}
|
|
101
|
-
except Exception as e:
|
|
102
|
-
logger.error(f"Error extrayendo de {file_path}: {e}")
|
|
103
|
-
return {
|
|
104
|
-
'file_path': file_path,
|
|
105
|
-
'frontmatter': {},
|
|
106
|
-
'exercises': [],
|
|
107
|
-
'solutions': []
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
def extract_from_directory(self, directory: Path, pattern: str = "*.md") -> List[Dict]:
|
|
111
|
-
"""
|
|
112
|
-
Extrae materiales de todos los archivos .md en un directorio.
|
|
113
|
-
|
|
114
|
-
Args:
|
|
115
|
-
directory: Directorio a procesar
|
|
116
|
-
pattern: Patrón de búsqueda de archivos
|
|
81
|
+
if include_path.exists():
|
|
82
|
+
exercise['resolved_content'] = read_markdown_file(include_path)
|
|
83
|
+
else:
|
|
84
|
+
logger.warning(f"[MaterialExtractor] Include no encontrado en ejercicio: {include_path} (archivo: {file_path})")
|
|
85
|
+
exercise['resolved_content'] = exercise['content']
|
|
86
|
+
else:
|
|
87
|
+
exercise['resolved_content'] = exercise['content']
|
|
88
|
+
|
|
89
|
+
# Resolver includes de soluciones
|
|
90
|
+
for solution in solutions:
|
|
91
|
+
resolved_content_parts = []
|
|
92
|
+
for include_path_str in solution['include_paths']:
|
|
93
|
+
include_path = resolve_include_path(
|
|
94
|
+
include_path_str,
|
|
95
|
+
file_path.parent
|
|
96
|
+
)
|
|
97
|
+
if include_path.exists():
|
|
98
|
+
resolved_content_parts.append(read_markdown_file(include_path))
|
|
99
|
+
else:
|
|
100
|
+
logger.warning(f"[MaterialExtractor] Include no encontrado en solución: {include_path} (archivo: {file_path})")
|
|
101
|
+
|
|
102
|
+
if resolved_content_parts:
|
|
103
|
+
solution['resolved_content'] = '\n\n---\n\n'.join(resolved_content_parts)
|
|
104
|
+
else:
|
|
105
|
+
solution['resolved_content'] = solution['content']
|
|
117
106
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
107
|
+
return {
|
|
108
|
+
'file_path': file_path,
|
|
109
|
+
'frontmatter': frontmatter,
|
|
110
|
+
'exercises': exercises,
|
|
111
|
+
'solutions': solutions,
|
|
112
|
+
'content_body': content_body # Exponer contenido para indexación de lecturas
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
# Guardar en caché
|
|
116
|
+
if use_cache:
|
|
117
|
+
self._file_cache[file_path] = {
|
|
118
|
+
'data': result,
|
|
119
|
+
'timestamp': file_path.stat().st_mtime
|
|
120
|
+
}
|
|
121
|
+
self._last_scan_timestamp = max(self._last_scan_timestamp, file_path.stat().st_mtime)
|
|
122
|
+
|
|
123
|
+
return result
|
|
124
|
+
except Exception as e:
|
|
125
|
+
logger.error(f"[MaterialExtractor] Error extrayendo de {file_path}: {e}")
|
|
126
|
+
error_result = {
|
|
127
|
+
'file_path': file_path,
|
|
128
|
+
'frontmatter': {},
|
|
129
|
+
'exercises': [],
|
|
130
|
+
'solutions': []
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# Guardar incluso errores en caché para evitar reintentos fallidos
|
|
134
|
+
if use_cache:
|
|
135
|
+
self._file_cache[file_path] = {
|
|
136
|
+
'data': error_result,
|
|
137
|
+
'timestamp': time.time() # Usar tiempo actual para archivos que no existen
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return error_result
|
|
141
|
+
|
|
142
|
+
def extract_from_directory(self, directory: Path, pattern: str = "*.md") -> List[Dict]:
|
|
143
|
+
"""
|
|
144
|
+
Extrae materiales de todos los archivos .md en un directorio.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
directory: Directorio a procesar
|
|
148
|
+
pattern: Patrón de búsqueda de archivos
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Lista de diccionarios con materiales extraídos
|
|
152
|
+
"""
|
|
153
|
+
directory = Path(directory)
|
|
154
|
+
if not directory.exists():
|
|
155
|
+
logger.warning(f"[MaterialExtractor] Directorio no existe: {directory}")
|
|
124
156
|
return []
|
|
125
157
|
|
|
126
158
|
materials = []
|
|
@@ -233,5 +265,51 @@ class MaterialExtractor:
|
|
|
233
265
|
}
|
|
234
266
|
all_exercises.append(exercise_data)
|
|
235
267
|
|
|
236
|
-
return all_exercises
|
|
237
|
-
|
|
268
|
+
return all_exercises
|
|
269
|
+
|
|
270
|
+
def clear_cache(self):
|
|
271
|
+
"""Limpia el caché de archivos."""
|
|
272
|
+
self._file_cache.clear()
|
|
273
|
+
self._last_scan_timestamp = 0
|
|
274
|
+
logger.debug("[MaterialExtractor] Caché de archivos limpiado")
|
|
275
|
+
|
|
276
|
+
def _is_cache_valid(self, file_path: Path) -> bool:
|
|
277
|
+
"""
|
|
278
|
+
Verifica si el caché para un archivo es válido.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
file_path: Ruta del archivo a verificar
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
True si el caché es válido, False si necesita recacheo
|
|
285
|
+
"""
|
|
286
|
+
if file_path not in self._file_cache:
|
|
287
|
+
return False
|
|
288
|
+
|
|
289
|
+
# Verificar si el archivo fue modificado
|
|
290
|
+
try:
|
|
291
|
+
cache_entry = self._file_cache[file_path]
|
|
292
|
+
file_mtime = file_path.stat().st_mtime
|
|
293
|
+
|
|
294
|
+
# Usar el timestamp de escaneo más reciente para verificar
|
|
295
|
+
if file_mtime > self._last_scan_timestamp:
|
|
296
|
+
return False
|
|
297
|
+
|
|
298
|
+
return True
|
|
299
|
+
except (OSError, KeyError):
|
|
300
|
+
return False
|
|
301
|
+
|
|
302
|
+
def get_cache_stats(self) -> Dict:
|
|
303
|
+
"""
|
|
304
|
+
Obtiene estadísticas del caché de archivos.
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
Diccionario con estadísticas del caché
|
|
308
|
+
"""
|
|
309
|
+
return {
|
|
310
|
+
'cached_files': len(self._file_cache),
|
|
311
|
+
'last_scan_timestamp': self._last_scan_timestamp,
|
|
312
|
+
'cache_ttl': self._cache_ttl
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
|