evolutia 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolutia/__init__.py +9 -0
- evolutia/async_llm_providers.py +157 -0
- evolutia/cache/__init__.py +9 -0
- evolutia/cache/exercise_cache.py +226 -0
- evolutia/cache/llm_cache.py +487 -0
- evolutia/complexity_validator.py +33 -31
- evolutia/config_manager.py +53 -40
- evolutia/evolutia_engine.py +341 -66
- evolutia/exam_generator.py +44 -43
- evolutia/exceptions.py +38 -0
- evolutia/exercise_analyzer.py +54 -91
- evolutia/imports.py +175 -0
- evolutia/llm_providers.py +223 -61
- evolutia/material_extractor.py +166 -88
- evolutia/rag/rag_indexer.py +107 -90
- evolutia/rag/rag_retriever.py +130 -103
- evolutia/retry_utils.py +280 -0
- evolutia/utils/json_parser.py +29 -19
- evolutia/utils/markdown_parser.py +185 -159
- evolutia/utils/math_extractor.py +153 -144
- evolutia/validation/__init__.py +1 -0
- evolutia/validation/args_validator.py +253 -0
- evolutia/validation/config_validator.py +502 -0
- evolutia/variation_generator.py +82 -70
- evolutia-0.1.2.dist-info/METADATA +536 -0
- evolutia-0.1.2.dist-info/RECORD +37 -0
- evolutia_cli.py +22 -9
- evolutia-0.1.1.dist-info/METADATA +0 -221
- evolutia-0.1.1.dist-info/RECORD +0 -27
- {evolutia-0.1.1.dist-info → evolutia-0.1.2.dist-info}/WHEEL +0 -0
- {evolutia-0.1.1.dist-info → evolutia-0.1.2.dist-info}/entry_points.txt +0 -0
- {evolutia-0.1.1.dist-info → evolutia-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {evolutia-0.1.1.dist-info → evolutia-0.1.2.dist-info}/top_level.txt +0 -0
evolutia/llm_providers.py
CHANGED
|
@@ -4,23 +4,38 @@ Módulo que define los proveedores de LLM abstractos y concretos.
|
|
|
4
4
|
import os
|
|
5
5
|
import logging
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
|
-
from typing import Optional, Dict, Any
|
|
7
|
+
from typing import Optional, Dict, Any, Union, TYPE_CHECKING
|
|
8
8
|
|
|
9
9
|
logger = logging.getLogger(__name__)
|
|
10
10
|
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from evolutia.cache.llm_cache import LLMCache
|
|
13
|
+
|
|
11
14
|
class LLMProvider(ABC):
|
|
12
15
|
"""Clase base abstracta para proveedores de LLM."""
|
|
13
16
|
|
|
14
|
-
|
|
17
|
+
DEFAULT_SYSTEM_PROMPT = "Eres un experto en métodos matemáticos para física e ingeniería."
|
|
18
|
+
DEFAULT_MAX_TOKENS = 2000
|
|
19
|
+
DEFAULT_TEMPERATURE = 0.7
|
|
20
|
+
|
|
21
|
+
def __init__(self, model_name: Optional[str] = None, cache: Optional['LLMCache'] = None):
|
|
22
|
+
"""
|
|
23
|
+
Inicializa el proveedor de LLM.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
model_name: Nombre del modelo a usar
|
|
27
|
+
cache: Instancia opcional de LLMCache para cachear respuestas
|
|
28
|
+
"""
|
|
15
29
|
self.model_name = model_name
|
|
16
30
|
self.client = None
|
|
17
|
-
self.genai = None
|
|
31
|
+
self.genai = None
|
|
32
|
+
self.cache = cache
|
|
18
33
|
self.api_key = self._get_api_key()
|
|
19
34
|
if self.api_key:
|
|
20
35
|
self._setup_client()
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
36
|
+
|
|
37
|
+
if self.cache:
|
|
38
|
+
logger.debug(f"[LLMProvider] Caché de LLM habilitado")
|
|
24
39
|
|
|
25
40
|
@abstractmethod
|
|
26
41
|
def _get_api_key(self) -> Optional[str]:
|
|
@@ -37,30 +52,86 @@ class LLMProvider(ABC):
|
|
|
37
52
|
"""Genera contenido a partir de un prompt."""
|
|
38
53
|
pass
|
|
39
54
|
|
|
55
|
+
def _get_provider_name(self) -> str:
|
|
56
|
+
"""
|
|
57
|
+
Obtiene el nombre del proveedor para usar en el caché.
|
|
40
58
|
|
|
41
|
-
|
|
42
|
-
|
|
59
|
+
Returns:
|
|
60
|
+
Nombre del proveedor (ej: 'OpenAI', 'Anthropic')
|
|
61
|
+
"""
|
|
62
|
+
return self.__class__.__name__.replace('Provider', '')
|
|
43
63
|
|
|
44
|
-
def _get_api_key(self) -> Optional[str]:
|
|
45
|
-
key = os.getenv("OPENAI_API_KEY")
|
|
46
|
-
if not key:
|
|
47
|
-
logger.warning("OPENAI_API_KEY no encontrada")
|
|
48
|
-
return key
|
|
49
64
|
|
|
50
|
-
|
|
65
|
+
class OpenAICompatibleProvider(LLMProvider):
|
|
66
|
+
"""Base clase para proveedores compatibles con OpenAI API."""
|
|
67
|
+
|
|
68
|
+
def __init__(self, model_name: Optional[str] = None, base_url: Optional[str] = None, timeout: Optional[float] = None, cache: Optional['LLMCache'] = None):
|
|
69
|
+
"""
|
|
70
|
+
Inicializa el proveedor OpenAI-compatible.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
model_name: Nombre del modelo a usar
|
|
74
|
+
base_url: URL base de la API (para proveedores compatibles)
|
|
75
|
+
timeout: Timeout para las llamadas a la API
|
|
76
|
+
cache: Instancia opcional de LLMCache
|
|
77
|
+
"""
|
|
78
|
+
self.base_url = base_url
|
|
79
|
+
self.timeout = timeout
|
|
80
|
+
super().__init__(model_name, cache=cache)
|
|
81
|
+
|
|
82
|
+
def _setup_openai_client(self, api_key: Optional[str], base_url: Optional[str] = None, timeout: Optional[float] = None) -> bool:
|
|
83
|
+
"""
|
|
84
|
+
Configura cliente OpenAI compartido.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
True si la configuración fue exitosa
|
|
88
|
+
False si no se pudo configurar (cliente no inicializado)
|
|
89
|
+
"""
|
|
90
|
+
if not api_key:
|
|
91
|
+
return False
|
|
51
92
|
try:
|
|
52
93
|
from openai import OpenAI
|
|
53
|
-
|
|
94
|
+
client_kwargs = {"api_key": api_key}
|
|
95
|
+
if base_url:
|
|
96
|
+
client_kwargs["base_url"] = base_url
|
|
97
|
+
if timeout is not None:
|
|
98
|
+
client_kwargs["timeout"] = timeout
|
|
99
|
+
self.client = OpenAI(**client_kwargs)
|
|
100
|
+
logger.info(f"[OpenAICompatibleProvider] Cliente OpenAI inicializado (base_url={base_url}, timeout={timeout})")
|
|
101
|
+
return True
|
|
54
102
|
except ImportError:
|
|
55
|
-
logger.error("Biblioteca openai no instalada. Instala con: pip install openai")
|
|
103
|
+
logger.error("[OpenAICompatibleProvider] Biblioteca openai no instalada. Instala con: pip install openai")
|
|
104
|
+
self.client = None
|
|
105
|
+
return False
|
|
106
|
+
except Exception as e:
|
|
107
|
+
logger.error(f"[OpenAICompatibleProvider] Error inesperado inicializando cliente OpenAI: {e}")
|
|
56
108
|
self.client = None
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
def _openai_generate_content(self, provider_name: str, default_model: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
112
|
+
"""
|
|
113
|
+
Genera contenido usando API OpenAI-compatible con caché.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Contenido generado si la llamada fue exitosa
|
|
117
|
+
None si hubo un error de API o configuración
|
|
118
|
+
"""
|
|
119
|
+
if not self.client:
|
|
120
|
+
logger.error(f"[{provider_name}] Cliente no inicializado, no se puede generar contenido")
|
|
121
|
+
return None
|
|
57
122
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
123
|
+
system_content = system_prompt or self.DEFAULT_SYSTEM_PROMPT
|
|
124
|
+
model = self.model_name or default_model
|
|
125
|
+
prompt = kwargs.get("prompt", "")
|
|
126
|
+
|
|
127
|
+
# Intentar caché primero
|
|
128
|
+
if self.cache:
|
|
129
|
+
cached = self.cache.get(prompt, self._get_provider_name(), model)
|
|
130
|
+
if cached:
|
|
131
|
+
logger.info(f"[{provider_name}] Contenido obtenido del caché (modelo={model})")
|
|
132
|
+
return cached
|
|
133
|
+
|
|
134
|
+
# Generar respuesta
|
|
64
135
|
try:
|
|
65
136
|
response = self.client.chat.completions.create(
|
|
66
137
|
model=model,
|
|
@@ -68,15 +139,44 @@ class OpenAIProvider(LLMProvider):
|
|
|
68
139
|
{"role": "system", "content": system_content},
|
|
69
140
|
{"role": "user", "content": prompt}
|
|
70
141
|
],
|
|
71
|
-
temperature=kwargs.get("temperature",
|
|
72
|
-
max_tokens=kwargs.get("max_tokens",
|
|
142
|
+
temperature=kwargs.get("temperature", self.DEFAULT_TEMPERATURE),
|
|
143
|
+
max_tokens=kwargs.get("max_tokens", self.DEFAULT_MAX_TOKENS)
|
|
73
144
|
)
|
|
74
|
-
|
|
145
|
+
content = response.choices[0].message.content.strip()
|
|
146
|
+
logger.info(f"[{provider_name}] Contenido generado exitosamente (modelo={model}, longitud={len(content)})")
|
|
147
|
+
|
|
148
|
+
# Guardar en caché
|
|
149
|
+
if self.cache:
|
|
150
|
+
metadata = {
|
|
151
|
+
'provider': provider_name,
|
|
152
|
+
'model': model,
|
|
153
|
+
'temperature': kwargs.get("temperature", self.DEFAULT_TEMPERATURE),
|
|
154
|
+
'max_tokens': kwargs.get("max_tokens", self.DEFAULT_MAX_TOKENS)
|
|
155
|
+
}
|
|
156
|
+
self.cache.put(prompt, provider_name, model, content, metadata=metadata)
|
|
157
|
+
|
|
158
|
+
return content
|
|
75
159
|
except Exception as e:
|
|
76
|
-
logger.error(f"Error
|
|
160
|
+
logger.error(f"[{provider_name}] Error en llamada a API: {e}")
|
|
77
161
|
return None
|
|
78
162
|
|
|
79
163
|
|
|
164
|
+
class OpenAIProvider(OpenAICompatibleProvider):
|
|
165
|
+
"""Proveedor para OpenAI."""
|
|
166
|
+
|
|
167
|
+
def _get_api_key(self) -> Optional[str]:
|
|
168
|
+
key = os.getenv("OPENAI_API_KEY")
|
|
169
|
+
if not key:
|
|
170
|
+
logger.warning("OPENAI_API_KEY no encontrada")
|
|
171
|
+
return key
|
|
172
|
+
|
|
173
|
+
def _setup_client(self):
|
|
174
|
+
self._setup_openai_client(self.api_key)
|
|
175
|
+
|
|
176
|
+
def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
177
|
+
return self._openai_generate_content("OpenAI", "gpt-4", system_prompt, prompt=prompt, **kwargs)
|
|
178
|
+
|
|
179
|
+
|
|
80
180
|
class AnthropicProvider(LLMProvider):
|
|
81
181
|
"""Proveedor para Anthropic (Claude)."""
|
|
82
182
|
|
|
@@ -99,7 +199,16 @@ class AnthropicProvider(LLMProvider):
|
|
|
99
199
|
|
|
100
200
|
system_content = system_prompt or "Eres un experto en métodos matemáticos para física e ingeniería."
|
|
101
201
|
model = self.model_name or "claude-3-opus-20240229"
|
|
202
|
+
provider_name = self._get_provider_name()
|
|
203
|
+
|
|
204
|
+
# Intentar caché primero
|
|
205
|
+
if self.cache:
|
|
206
|
+
cached = self.cache.get(prompt, provider_name, model)
|
|
207
|
+
if cached:
|
|
208
|
+
logger.info(f"[{provider_name}] Contenido obtenido del caché (modelo={model})")
|
|
209
|
+
return cached
|
|
102
210
|
|
|
211
|
+
# Generar respuesta
|
|
103
212
|
try:
|
|
104
213
|
message = self.client.messages.create(
|
|
105
214
|
model=model,
|
|
@@ -108,9 +217,22 @@ class AnthropicProvider(LLMProvider):
|
|
|
108
217
|
system=system_content,
|
|
109
218
|
messages=[{"role": "user", "content": prompt}]
|
|
110
219
|
)
|
|
111
|
-
|
|
220
|
+
content = message.content[0].text.strip()
|
|
221
|
+
logger.info(f"[{provider_name}] Contenido generado exitosamente (modelo={model}, longitud={len(content)})")
|
|
222
|
+
|
|
223
|
+
# Guardar en caché
|
|
224
|
+
if self.cache:
|
|
225
|
+
metadata = {
|
|
226
|
+
'provider': provider_name,
|
|
227
|
+
'model': model,
|
|
228
|
+
'temperature': kwargs.get("temperature", 0.7),
|
|
229
|
+
'max_tokens': kwargs.get("max_tokens", 2000)
|
|
230
|
+
}
|
|
231
|
+
self.cache.put(prompt, provider_name, model, content, metadata=metadata)
|
|
232
|
+
|
|
233
|
+
return content
|
|
112
234
|
except Exception as e:
|
|
113
|
-
logger.error(f"Error llamando a Anthropic API: {e}")
|
|
235
|
+
logger.error(f"[{provider_name}] Error llamando a Anthropic API: {e}")
|
|
114
236
|
return None
|
|
115
237
|
|
|
116
238
|
|
|
@@ -137,6 +259,14 @@ class GeminiProvider(LLMProvider):
|
|
|
137
259
|
|
|
138
260
|
model_name = self.model_name or "gemini-2.5-pro"
|
|
139
261
|
if model_name == 'gemini': model_name = "gemini-2.5-pro"
|
|
262
|
+
provider_name = self._get_provider_name()
|
|
263
|
+
|
|
264
|
+
# Intentar caché primero
|
|
265
|
+
if self.cache:
|
|
266
|
+
cached = self.cache.get(prompt, provider_name, model_name)
|
|
267
|
+
if cached:
|
|
268
|
+
logger.info(f"[{provider_name}] Contenido obtenido del caché (modelo={model_name})")
|
|
269
|
+
return cached
|
|
140
270
|
|
|
141
271
|
generation_config = {
|
|
142
272
|
"temperature": kwargs.get("temperature", 0.7),
|
|
@@ -146,6 +276,7 @@ class GeminiProvider(LLMProvider):
|
|
|
146
276
|
"response_mime_type": "text/plain",
|
|
147
277
|
}
|
|
148
278
|
|
|
279
|
+
# Generar respuesta
|
|
149
280
|
try:
|
|
150
281
|
model_instance = self.genai.GenerativeModel(
|
|
151
282
|
model_name=model_name,
|
|
@@ -155,54 +286,81 @@ class GeminiProvider(LLMProvider):
|
|
|
155
286
|
system_instruction=system_prompt
|
|
156
287
|
)
|
|
157
288
|
response = model_instance.generate_content(prompt)
|
|
158
|
-
|
|
289
|
+
content = response.text
|
|
290
|
+
logger.info(f"[{provider_name}] Contenido generado exitosamente (modelo={model_name}, longitud={len(content)})")
|
|
291
|
+
|
|
292
|
+
# Guardar en caché
|
|
293
|
+
if self.cache:
|
|
294
|
+
metadata = {
|
|
295
|
+
'provider': provider_name,
|
|
296
|
+
'model': model_name,
|
|
297
|
+
'temperature': kwargs.get("temperature", 0.7),
|
|
298
|
+
'max_tokens': kwargs.get("max_tokens", 8192)
|
|
299
|
+
}
|
|
300
|
+
self.cache.put(prompt, provider_name, model_name, content, metadata=metadata)
|
|
301
|
+
|
|
302
|
+
return content
|
|
159
303
|
except Exception as e:
|
|
160
|
-
logger.error(f"Error llamando a Gemini API: {e}")
|
|
304
|
+
logger.error(f"[{provider_name}] Error llamando a Gemini API: {e}")
|
|
161
305
|
return None
|
|
162
306
|
|
|
163
307
|
|
|
164
|
-
class LocalProvider(
|
|
308
|
+
class LocalProvider(OpenAICompatibleProvider):
|
|
165
309
|
"""Proveedor para modelos locales (Ollama/LM Studio) vía OpenAI compatible API."""
|
|
166
310
|
|
|
167
|
-
def __init__(self, model_name: Optional[str] = None, base_url: str = "http://localhost:11434/v1"):
|
|
168
|
-
|
|
169
|
-
super().__init__(model_name)
|
|
311
|
+
def __init__(self, model_name: Optional[str] = None, base_url: str = "http://localhost:11434/v1", cache: Optional['LLMCache'] = None):
|
|
312
|
+
super().__init__(model_name, base_url=base_url, timeout=300.0, cache=cache)
|
|
170
313
|
|
|
171
314
|
def _get_api_key(self) -> Optional[str]:
|
|
172
315
|
return "not-needed"
|
|
173
316
|
|
|
174
317
|
def _setup_client(self):
|
|
175
|
-
|
|
176
|
-
from openai import OpenAI
|
|
177
|
-
self.client = OpenAI(
|
|
178
|
-
base_url=self.base_url,
|
|
179
|
-
api_key=self.api_key,
|
|
180
|
-
timeout=300.0
|
|
181
|
-
)
|
|
182
|
-
except ImportError:
|
|
183
|
-
logger.error("Biblioteca openai no instalada")
|
|
184
|
-
self.client = None
|
|
318
|
+
self._setup_openai_client(self.api_key, base_url=self.base_url, timeout=self.timeout)
|
|
185
319
|
|
|
186
320
|
def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
187
|
-
|
|
321
|
+
return self._openai_generate_content("Local", "llama3", system_prompt, prompt=prompt, **kwargs)
|
|
188
322
|
|
|
189
|
-
system_content = system_prompt or "Eres un experto en métodos matemáticos para física e ingeniería."
|
|
190
|
-
model = self.model_name or "llama3"
|
|
191
323
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
)
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
324
|
+
class DeepSeekProvider(OpenAICompatibleProvider):
|
|
325
|
+
"""Proveedor para DeepSeek (OpenAI-compatible)."""
|
|
326
|
+
|
|
327
|
+
def __init__(self, model_name: Optional[str] = None, cache: Optional['LLMCache'] = None):
|
|
328
|
+
super().__init__(model_name, base_url="https://api.deepseek.com", cache=cache)
|
|
329
|
+
|
|
330
|
+
def _get_api_key(self) -> Optional[str]:
|
|
331
|
+
key = os.getenv("DEEPSEEK_API_KEY")
|
|
332
|
+
if not key:
|
|
333
|
+
logger.warning("DEEPSEEK_API_KEY no encontrada")
|
|
334
|
+
return key
|
|
335
|
+
|
|
336
|
+
def _setup_client(self):
|
|
337
|
+
self._setup_openai_client(self.api_key, base_url=self.base_url)
|
|
338
|
+
|
|
339
|
+
def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
340
|
+
return self._openai_generate_content("DeepSeek", "deepseek-chat", system_prompt, prompt=prompt, **kwargs)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
class GenericProvider(OpenAICompatibleProvider):
|
|
344
|
+
"""Proveedor Genérico Compatible con OpenAI (Groq, Mistral, etc)."""
|
|
345
|
+
|
|
346
|
+
def __init__(self, model_name: Optional[str] = None, base_url: Optional[str] = None, cache: Optional['LLMCache'] = None):
|
|
347
|
+
super().__init__(model_name, base_url=base_url or os.getenv("GENERIC_BASE_URL"), cache=cache)
|
|
348
|
+
|
|
349
|
+
def _get_api_key(self) -> Optional[str]:
|
|
350
|
+
key = os.getenv("GENERIC_API_KEY")
|
|
351
|
+
if not key:
|
|
352
|
+
logger.warning("GENERIC_API_KEY no encontrada")
|
|
353
|
+
return key
|
|
354
|
+
|
|
355
|
+
def _setup_client(self):
|
|
356
|
+
if not self.base_url:
|
|
357
|
+
logger.warning("GENERIC_BASE_URL no definida")
|
|
358
|
+
self._setup_openai_client(self.api_key, base_url=self.base_url)
|
|
359
|
+
|
|
360
|
+
def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
361
|
+
default_system_prompt = self.DEFAULT_SYSTEM_PROMPT
|
|
362
|
+
default_model = os.getenv("GENERIC_MODEL") or "gpt-3.5-turbo"
|
|
363
|
+
return self._openai_generate_content("Generic", default_model, system_prompt or default_system_prompt, prompt=prompt, **kwargs)
|
|
206
364
|
|
|
207
365
|
|
|
208
366
|
def get_provider(provider_name: str, **kwargs) -> LLMProvider:
|
|
@@ -215,5 +373,9 @@ def get_provider(provider_name: str, **kwargs) -> LLMProvider:
|
|
|
215
373
|
return GeminiProvider(**kwargs)
|
|
216
374
|
elif provider_name == "local":
|
|
217
375
|
return LocalProvider(**kwargs)
|
|
376
|
+
elif provider_name == "deepseek":
|
|
377
|
+
return DeepSeekProvider(**kwargs)
|
|
378
|
+
elif provider_name == "generic":
|
|
379
|
+
return GenericProvider(**kwargs)
|
|
218
380
|
else:
|
|
219
381
|
raise ValueError(f"Proveedor desconocido: {provider_name}")
|
evolutia/material_extractor.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Extractor de materiales didácticos.
|
|
3
|
-
Lee y parsea archivos Markdown de lecturas, prácticas y tareas.
|
|
4
|
-
"""
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from typing import Dict, List, Optional
|
|
7
|
-
import logging
|
|
1
|
+
"""
|
|
2
|
+
Extractor de materiales didácticos.
|
|
3
|
+
Lee y parsea archivos Markdown de lecturas, prácticas y tareas.
|
|
4
|
+
"""
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, List, Optional, Union
|
|
7
|
+
import logging
|
|
8
|
+
import time
|
|
8
9
|
|
|
9
10
|
try:
|
|
10
11
|
from utils.markdown_parser import (
|
|
@@ -27,31 +28,43 @@ except ImportError:
|
|
|
27
28
|
logger = logging.getLogger(__name__)
|
|
28
29
|
|
|
29
30
|
|
|
30
|
-
class MaterialExtractor:
|
|
31
|
-
"""Extrae ejercicios y soluciones de materiales didácticos."""
|
|
31
|
+
class MaterialExtractor:
|
|
32
|
+
"""Extrae ejercicios y soluciones de materiales didácticos."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, base_path: Union[Path, str]):
|
|
35
|
+
"""
|
|
36
|
+
Inicializa el extractor.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
base_path: Ruta base del proyecto (donde están los directorios de temas)
|
|
40
|
+
"""
|
|
41
|
+
self.base_path = Path(base_path)
|
|
42
|
+
self.exercises = []
|
|
43
|
+
self.solutions = []
|
|
44
|
+
# Caché de rutas de archivos válidos para evitar escaneos repetidos
|
|
45
|
+
self._file_cache: Dict[Path, Dict] = {}
|
|
46
|
+
# Timestamp del último escaneo
|
|
47
|
+
self._last_scan_timestamp: float = 0
|
|
48
|
+
# TTL del caché en segundos (5 minutos)
|
|
49
|
+
self._cache_ttl = 300
|
|
32
50
|
|
|
33
|
-
def
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
Args:
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
Returns:
|
|
52
|
-
Diccionario con ejercicios y soluciones extraídos
|
|
53
|
-
"""
|
|
54
|
-
try:
|
|
51
|
+
def extract_from_file(self, file_path: Path, use_cache: bool = True) -> Dict:
|
|
52
|
+
"""
|
|
53
|
+
Extrae ejercicios y soluciones de un archivo Markdown.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
file_path: Ruta al archivo
|
|
57
|
+
use_cache: Si True, usa el caché si es válido (default: True)
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Diccionario con ejercicios y soluciones extraídos
|
|
61
|
+
"""
|
|
62
|
+
# Verificar caché primero
|
|
63
|
+
if use_cache and self._is_cache_valid(file_path):
|
|
64
|
+
logger.debug(f"[MaterialExtractor] Usando caché para {file_path.name}")
|
|
65
|
+
return self._file_cache[file_path]['data']
|
|
66
|
+
|
|
67
|
+
try:
|
|
55
68
|
content = read_markdown_file(file_path)
|
|
56
69
|
frontmatter, content_body = extract_frontmatter(content)
|
|
57
70
|
|
|
@@ -65,62 +78,81 @@ class MaterialExtractor:
|
|
|
65
78
|
exercise['include_path'],
|
|
66
79
|
file_path.parent
|
|
67
80
|
)
|
|
68
|
-
if include_path.exists():
|
|
69
|
-
exercise['resolved_content'] = read_markdown_file(include_path)
|
|
70
|
-
else:
|
|
71
|
-
logger.warning(f"Include no encontrado: {include_path}")
|
|
72
|
-
exercise['resolved_content'] = exercise['content']
|
|
73
|
-
else:
|
|
74
|
-
exercise['resolved_content'] = exercise['content']
|
|
75
|
-
|
|
76
|
-
# Resolver includes de soluciones
|
|
77
|
-
for solution in solutions:
|
|
78
|
-
resolved_content_parts = []
|
|
79
|
-
for include_path_str in solution['include_paths']:
|
|
80
|
-
include_path = resolve_include_path(
|
|
81
|
-
include_path_str,
|
|
82
|
-
file_path.parent
|
|
83
|
-
)
|
|
84
|
-
if include_path.exists():
|
|
85
|
-
resolved_content_parts.append(read_markdown_file(include_path))
|
|
86
|
-
else:
|
|
87
|
-
logger.warning(f"Include no encontrado: {include_path}")
|
|
88
|
-
|
|
89
|
-
if resolved_content_parts:
|
|
90
|
-
solution['resolved_content'] = '\n\n---\n\n'.join(resolved_content_parts)
|
|
91
|
-
else:
|
|
92
|
-
solution['resolved_content'] = solution['content']
|
|
93
|
-
|
|
94
|
-
return {
|
|
95
|
-
'file_path': file_path,
|
|
96
|
-
'frontmatter': frontmatter,
|
|
97
|
-
'exercises': exercises,
|
|
98
|
-
'solutions': solutions,
|
|
99
|
-
'content_body': content_body # Exponer contenido para indexación de lecturas
|
|
100
|
-
}
|
|
101
|
-
except Exception as e:
|
|
102
|
-
logger.error(f"Error extrayendo de {file_path}: {e}")
|
|
103
|
-
return {
|
|
104
|
-
'file_path': file_path,
|
|
105
|
-
'frontmatter': {},
|
|
106
|
-
'exercises': [],
|
|
107
|
-
'solutions': []
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
def extract_from_directory(self, directory: Path, pattern: str = "*.md") -> List[Dict]:
|
|
111
|
-
"""
|
|
112
|
-
Extrae materiales de todos los archivos .md en un directorio.
|
|
113
|
-
|
|
114
|
-
Args:
|
|
115
|
-
directory: Directorio a procesar
|
|
116
|
-
pattern: Patrón de búsqueda de archivos
|
|
81
|
+
if include_path.exists():
|
|
82
|
+
exercise['resolved_content'] = read_markdown_file(include_path)
|
|
83
|
+
else:
|
|
84
|
+
logger.warning(f"[MaterialExtractor] Include no encontrado en ejercicio: {include_path} (archivo: {file_path})")
|
|
85
|
+
exercise['resolved_content'] = exercise['content']
|
|
86
|
+
else:
|
|
87
|
+
exercise['resolved_content'] = exercise['content']
|
|
88
|
+
|
|
89
|
+
# Resolver includes de soluciones
|
|
90
|
+
for solution in solutions:
|
|
91
|
+
resolved_content_parts = []
|
|
92
|
+
for include_path_str in solution['include_paths']:
|
|
93
|
+
include_path = resolve_include_path(
|
|
94
|
+
include_path_str,
|
|
95
|
+
file_path.parent
|
|
96
|
+
)
|
|
97
|
+
if include_path.exists():
|
|
98
|
+
resolved_content_parts.append(read_markdown_file(include_path))
|
|
99
|
+
else:
|
|
100
|
+
logger.warning(f"[MaterialExtractor] Include no encontrado en solución: {include_path} (archivo: {file_path})")
|
|
101
|
+
|
|
102
|
+
if resolved_content_parts:
|
|
103
|
+
solution['resolved_content'] = '\n\n---\n\n'.join(resolved_content_parts)
|
|
104
|
+
else:
|
|
105
|
+
solution['resolved_content'] = solution['content']
|
|
117
106
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
107
|
+
return {
|
|
108
|
+
'file_path': file_path,
|
|
109
|
+
'frontmatter': frontmatter,
|
|
110
|
+
'exercises': exercises,
|
|
111
|
+
'solutions': solutions,
|
|
112
|
+
'content_body': content_body # Exponer contenido para indexación de lecturas
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
# Guardar en caché
|
|
116
|
+
if use_cache:
|
|
117
|
+
self._file_cache[file_path] = {
|
|
118
|
+
'data': result,
|
|
119
|
+
'timestamp': file_path.stat().st_mtime
|
|
120
|
+
}
|
|
121
|
+
self._last_scan_timestamp = max(self._last_scan_timestamp, file_path.stat().st_mtime)
|
|
122
|
+
|
|
123
|
+
return result
|
|
124
|
+
except Exception as e:
|
|
125
|
+
logger.error(f"[MaterialExtractor] Error extrayendo de {file_path}: {e}")
|
|
126
|
+
error_result = {
|
|
127
|
+
'file_path': file_path,
|
|
128
|
+
'frontmatter': {},
|
|
129
|
+
'exercises': [],
|
|
130
|
+
'solutions': []
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# Guardar incluso errores en caché para evitar reintentos fallidos
|
|
134
|
+
if use_cache:
|
|
135
|
+
self._file_cache[file_path] = {
|
|
136
|
+
'data': error_result,
|
|
137
|
+
'timestamp': time.time() # Usar tiempo actual para archivos que no existen
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return error_result
|
|
141
|
+
|
|
142
|
+
def extract_from_directory(self, directory: Path, pattern: str = "*.md") -> List[Dict]:
|
|
143
|
+
"""
|
|
144
|
+
Extrae materiales de todos los archivos .md en un directorio.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
directory: Directorio a procesar
|
|
148
|
+
pattern: Patrón de búsqueda de archivos
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Lista de diccionarios con materiales extraídos
|
|
152
|
+
"""
|
|
153
|
+
directory = Path(directory)
|
|
154
|
+
if not directory.exists():
|
|
155
|
+
logger.warning(f"[MaterialExtractor] Directorio no existe: {directory}")
|
|
124
156
|
return []
|
|
125
157
|
|
|
126
158
|
materials = []
|
|
@@ -233,5 +265,51 @@ class MaterialExtractor:
|
|
|
233
265
|
}
|
|
234
266
|
all_exercises.append(exercise_data)
|
|
235
267
|
|
|
236
|
-
return all_exercises
|
|
237
|
-
|
|
268
|
+
return all_exercises
|
|
269
|
+
|
|
270
|
+
def clear_cache(self):
|
|
271
|
+
"""Limpia el caché de archivos."""
|
|
272
|
+
self._file_cache.clear()
|
|
273
|
+
self._last_scan_timestamp = 0
|
|
274
|
+
logger.debug("[MaterialExtractor] Caché de archivos limpiado")
|
|
275
|
+
|
|
276
|
+
def _is_cache_valid(self, file_path: Path) -> bool:
|
|
277
|
+
"""
|
|
278
|
+
Verifica si el caché para un archivo es válido.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
file_path: Ruta del archivo a verificar
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
True si el caché es válido, False si necesita recacheo
|
|
285
|
+
"""
|
|
286
|
+
if file_path not in self._file_cache:
|
|
287
|
+
return False
|
|
288
|
+
|
|
289
|
+
# Verificar si el archivo fue modificado
|
|
290
|
+
try:
|
|
291
|
+
cache_entry = self._file_cache[file_path]
|
|
292
|
+
file_mtime = file_path.stat().st_mtime
|
|
293
|
+
|
|
294
|
+
# Usar el timestamp de escaneo más reciente para verificar
|
|
295
|
+
if file_mtime > self._last_scan_timestamp:
|
|
296
|
+
return False
|
|
297
|
+
|
|
298
|
+
return True
|
|
299
|
+
except (OSError, KeyError):
|
|
300
|
+
return False
|
|
301
|
+
|
|
302
|
+
def get_cache_stats(self) -> Dict:
|
|
303
|
+
"""
|
|
304
|
+
Obtiene estadísticas del caché de archivos.
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
Diccionario con estadísticas del caché
|
|
308
|
+
"""
|
|
309
|
+
return {
|
|
310
|
+
'cached_files': len(self._file_cache),
|
|
311
|
+
'last_scan_timestamp': self._last_scan_timestamp,
|
|
312
|
+
'cache_ttl': self._cache_ttl
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
|