evolutia 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
evolutia/llm_providers.py CHANGED
@@ -4,23 +4,38 @@ Módulo que define los proveedores de LLM abstractos y concretos.
4
4
  import os
5
5
  import logging
6
6
  from abc import ABC, abstractmethod
7
- from typing import Optional, Dict, Any
7
+ from typing import Optional, Dict, Any, Union, TYPE_CHECKING
8
8
 
9
9
  logger = logging.getLogger(__name__)
10
10
 
11
+ if TYPE_CHECKING:
12
+ from evolutia.cache.llm_cache import LLMCache
13
+
11
14
  class LLMProvider(ABC):
12
15
  """Clase base abstracta para proveedores de LLM."""
13
16
 
14
- def __init__(self, model_name: Optional[str] = None):
17
+ DEFAULT_SYSTEM_PROMPT = "Eres un experto en métodos matemáticos para física e ingeniería."
18
+ DEFAULT_MAX_TOKENS = 2000
19
+ DEFAULT_TEMPERATURE = 0.7
20
+
21
+ def __init__(self, model_name: Optional[str] = None, cache: Optional['LLMCache'] = None):
22
+ """
23
+ Inicializa el proveedor de LLM.
24
+
25
+ Args:
26
+ model_name: Nombre del modelo a usar
27
+ cache: Instancia opcional de LLMCache para cachear respuestas
28
+ """
15
29
  self.model_name = model_name
16
30
  self.client = None
17
- self.genai = None # For Gemini
31
+ self.genai = None
32
+ self.cache = cache
18
33
  self.api_key = self._get_api_key()
19
34
  if self.api_key:
20
35
  self._setup_client()
21
- else:
22
- # Some providers like local might not strictly need an API key from env
23
- pass
36
+
37
+ if self.cache:
38
+ logger.debug(f"[LLMProvider] Caché de LLM habilitado")
24
39
 
25
40
  @abstractmethod
26
41
  def _get_api_key(self) -> Optional[str]:
@@ -37,30 +52,86 @@ class LLMProvider(ABC):
37
52
  """Genera contenido a partir de un prompt."""
38
53
  pass
39
54
 
55
+ def _get_provider_name(self) -> str:
56
+ """
57
+ Obtiene el nombre del proveedor para usar en el caché.
40
58
 
41
- class OpenAIProvider(LLMProvider):
42
- """Proveedor para OpenAI."""
59
+ Returns:
60
+ Nombre del proveedor (ej: 'OpenAI', 'Anthropic')
61
+ """
62
+ return self.__class__.__name__.replace('Provider', '')
43
63
 
44
- def _get_api_key(self) -> Optional[str]:
45
- key = os.getenv("OPENAI_API_KEY")
46
- if not key:
47
- logger.warning("OPENAI_API_KEY no encontrada")
48
- return key
49
64
 
50
- def _setup_client(self):
65
+ class OpenAICompatibleProvider(LLMProvider):
66
+ """Base clase para proveedores compatibles con OpenAI API."""
67
+
68
+ def __init__(self, model_name: Optional[str] = None, base_url: Optional[str] = None, timeout: Optional[float] = None, cache: Optional['LLMCache'] = None):
69
+ """
70
+ Inicializa el proveedor OpenAI-compatible.
71
+
72
+ Args:
73
+ model_name: Nombre del modelo a usar
74
+ base_url: URL base de la API (para proveedores compatibles)
75
+ timeout: Timeout para las llamadas a la API
76
+ cache: Instancia opcional de LLMCache
77
+ """
78
+ self.base_url = base_url
79
+ self.timeout = timeout
80
+ super().__init__(model_name, cache=cache)
81
+
82
+ def _setup_openai_client(self, api_key: Optional[str], base_url: Optional[str] = None, timeout: Optional[float] = None) -> bool:
83
+ """
84
+ Configura cliente OpenAI compartido.
85
+
86
+ Returns:
87
+ True si la configuración fue exitosa
88
+ False si no se pudo configurar (cliente no inicializado)
89
+ """
90
+ if not api_key:
91
+ return False
51
92
  try:
52
93
  from openai import OpenAI
53
- self.client = OpenAI(api_key=self.api_key)
94
+ client_kwargs = {"api_key": api_key}
95
+ if base_url:
96
+ client_kwargs["base_url"] = base_url
97
+ if timeout is not None:
98
+ client_kwargs["timeout"] = timeout
99
+ self.client = OpenAI(**client_kwargs)
100
+ logger.info(f"[OpenAICompatibleProvider] Cliente OpenAI inicializado (base_url={base_url}, timeout={timeout})")
101
+ return True
54
102
  except ImportError:
55
- logger.error("Biblioteca openai no instalada. Instala con: pip install openai")
103
+ logger.error("[OpenAICompatibleProvider] Biblioteca openai no instalada. Instala con: pip install openai")
104
+ self.client = None
105
+ return False
106
+ except Exception as e:
107
+ logger.error(f"[OpenAICompatibleProvider] Error inesperado inicializando cliente OpenAI: {e}")
56
108
  self.client = None
109
+ return False
110
+
111
+ def _openai_generate_content(self, provider_name: str, default_model: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
112
+ """
113
+ Genera contenido usando API OpenAI-compatible con caché.
114
+
115
+ Returns:
116
+ Contenido generado si la llamada fue exitosa
117
+ None si hubo un error de API o configuración
118
+ """
119
+ if not self.client:
120
+ logger.error(f"[{provider_name}] Cliente no inicializado, no se puede generar contenido")
121
+ return None
57
122
 
58
- def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
59
- if not self.client: return None
60
-
61
- system_content = system_prompt or "Eres un experto en métodos matemáticos para física e ingeniería."
62
- model = self.model_name or "gpt-4"
63
-
123
+ system_content = system_prompt or self.DEFAULT_SYSTEM_PROMPT
124
+ model = self.model_name or default_model
125
+ prompt = kwargs.get("prompt", "")
126
+
127
+ # Intentar caché primero
128
+ if self.cache:
129
+ cached = self.cache.get(prompt, self._get_provider_name(), model)
130
+ if cached:
131
+ logger.info(f"[{provider_name}] Contenido obtenido del caché (modelo={model})")
132
+ return cached
133
+
134
+ # Generar respuesta
64
135
  try:
65
136
  response = self.client.chat.completions.create(
66
137
  model=model,
@@ -68,15 +139,44 @@ class OpenAIProvider(LLMProvider):
68
139
  {"role": "system", "content": system_content},
69
140
  {"role": "user", "content": prompt}
70
141
  ],
71
- temperature=kwargs.get("temperature", 0.7),
72
- max_tokens=kwargs.get("max_tokens", 2000)
142
+ temperature=kwargs.get("temperature", self.DEFAULT_TEMPERATURE),
143
+ max_tokens=kwargs.get("max_tokens", self.DEFAULT_MAX_TOKENS)
73
144
  )
74
- return response.choices[0].message.content.strip()
145
+ content = response.choices[0].message.content.strip()
146
+ logger.info(f"[{provider_name}] Contenido generado exitosamente (modelo={model}, longitud={len(content)})")
147
+
148
+ # Guardar en caché
149
+ if self.cache:
150
+ metadata = {
151
+ 'provider': provider_name,
152
+ 'model': model,
153
+ 'temperature': kwargs.get("temperature", self.DEFAULT_TEMPERATURE),
154
+ 'max_tokens': kwargs.get("max_tokens", self.DEFAULT_MAX_TOKENS)
155
+ }
156
+ self.cache.put(prompt, provider_name, model, content, metadata=metadata)
157
+
158
+ return content
75
159
  except Exception as e:
76
- logger.error(f"Error llamando a OpenAI API: {e}")
160
+ logger.error(f"[{provider_name}] Error en llamada a API: {e}")
77
161
  return None
78
162
 
79
163
 
164
+ class OpenAIProvider(OpenAICompatibleProvider):
165
+ """Proveedor para OpenAI."""
166
+
167
+ def _get_api_key(self) -> Optional[str]:
168
+ key = os.getenv("OPENAI_API_KEY")
169
+ if not key:
170
+ logger.warning("OPENAI_API_KEY no encontrada")
171
+ return key
172
+
173
+ def _setup_client(self):
174
+ self._setup_openai_client(self.api_key)
175
+
176
+ def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
177
+ return self._openai_generate_content("OpenAI", "gpt-4", system_prompt, prompt=prompt, **kwargs)
178
+
179
+
80
180
  class AnthropicProvider(LLMProvider):
81
181
  """Proveedor para Anthropic (Claude)."""
82
182
 
@@ -99,7 +199,16 @@ class AnthropicProvider(LLMProvider):
99
199
 
100
200
  system_content = system_prompt or "Eres un experto en métodos matemáticos para física e ingeniería."
101
201
  model = self.model_name or "claude-3-opus-20240229"
202
+ provider_name = self._get_provider_name()
203
+
204
+ # Intentar caché primero
205
+ if self.cache:
206
+ cached = self.cache.get(prompt, provider_name, model)
207
+ if cached:
208
+ logger.info(f"[{provider_name}] Contenido obtenido del caché (modelo={model})")
209
+ return cached
102
210
 
211
+ # Generar respuesta
103
212
  try:
104
213
  message = self.client.messages.create(
105
214
  model=model,
@@ -108,9 +217,22 @@ class AnthropicProvider(LLMProvider):
108
217
  system=system_content,
109
218
  messages=[{"role": "user", "content": prompt}]
110
219
  )
111
- return message.content[0].text.strip()
220
+ content = message.content[0].text.strip()
221
+ logger.info(f"[{provider_name}] Contenido generado exitosamente (modelo={model}, longitud={len(content)})")
222
+
223
+ # Guardar en caché
224
+ if self.cache:
225
+ metadata = {
226
+ 'provider': provider_name,
227
+ 'model': model,
228
+ 'temperature': kwargs.get("temperature", 0.7),
229
+ 'max_tokens': kwargs.get("max_tokens", 2000)
230
+ }
231
+ self.cache.put(prompt, provider_name, model, content, metadata=metadata)
232
+
233
+ return content
112
234
  except Exception as e:
113
- logger.error(f"Error llamando a Anthropic API: {e}")
235
+ logger.error(f"[{provider_name}] Error llamando a Anthropic API: {e}")
114
236
  return None
115
237
 
116
238
 
@@ -137,6 +259,14 @@ class GeminiProvider(LLMProvider):
137
259
 
138
260
  model_name = self.model_name or "gemini-2.5-pro"
139
261
  if model_name == 'gemini': model_name = "gemini-2.5-pro"
262
+ provider_name = self._get_provider_name()
263
+
264
+ # Intentar caché primero
265
+ if self.cache:
266
+ cached = self.cache.get(prompt, provider_name, model_name)
267
+ if cached:
268
+ logger.info(f"[{provider_name}] Contenido obtenido del caché (modelo={model_name})")
269
+ return cached
140
270
 
141
271
  generation_config = {
142
272
  "temperature": kwargs.get("temperature", 0.7),
@@ -146,6 +276,7 @@ class GeminiProvider(LLMProvider):
146
276
  "response_mime_type": "text/plain",
147
277
  }
148
278
 
279
+ # Generar respuesta
149
280
  try:
150
281
  model_instance = self.genai.GenerativeModel(
151
282
  model_name=model_name,
@@ -155,54 +286,81 @@ class GeminiProvider(LLMProvider):
155
286
  system_instruction=system_prompt
156
287
  )
157
288
  response = model_instance.generate_content(prompt)
158
- return response.text
289
+ content = response.text
290
+ logger.info(f"[{provider_name}] Contenido generado exitosamente (modelo={model_name}, longitud={len(content)})")
291
+
292
+ # Guardar en caché
293
+ if self.cache:
294
+ metadata = {
295
+ 'provider': provider_name,
296
+ 'model': model_name,
297
+ 'temperature': kwargs.get("temperature", 0.7),
298
+ 'max_tokens': kwargs.get("max_tokens", 8192)
299
+ }
300
+ self.cache.put(prompt, provider_name, model_name, content, metadata=metadata)
301
+
302
+ return content
159
303
  except Exception as e:
160
- logger.error(f"Error llamando a Gemini API: {e}")
304
+ logger.error(f"[{provider_name}] Error llamando a Gemini API: {e}")
161
305
  return None
162
306
 
163
307
 
164
- class LocalProvider(LLMProvider):
308
+ class LocalProvider(OpenAICompatibleProvider):
165
309
  """Proveedor para modelos locales (Ollama/LM Studio) vía OpenAI compatible API."""
166
310
 
167
- def __init__(self, model_name: Optional[str] = None, base_url: str = "http://localhost:11434/v1"):
168
- self.base_url = base_url
169
- super().__init__(model_name)
311
+ def __init__(self, model_name: Optional[str] = None, base_url: str = "http://localhost:11434/v1", cache: Optional['LLMCache'] = None):
312
+ super().__init__(model_name, base_url=base_url, timeout=300.0, cache=cache)
170
313
 
171
314
  def _get_api_key(self) -> Optional[str]:
172
315
  return "not-needed"
173
316
 
174
317
  def _setup_client(self):
175
- try:
176
- from openai import OpenAI
177
- self.client = OpenAI(
178
- base_url=self.base_url,
179
- api_key=self.api_key,
180
- timeout=300.0
181
- )
182
- except ImportError:
183
- logger.error("Biblioteca openai no instalada")
184
- self.client = None
318
+ self._setup_openai_client(self.api_key, base_url=self.base_url, timeout=self.timeout)
185
319
 
186
320
  def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
187
- if not self.client: return None
321
+ return self._openai_generate_content("Local", "llama3", system_prompt, prompt=prompt, **kwargs)
188
322
 
189
- system_content = system_prompt or "Eres un experto en métodos matemáticos para física e ingeniería."
190
- model = self.model_name or "llama3"
191
323
 
192
- try:
193
- response = self.client.chat.completions.create(
194
- model=model,
195
- messages=[
196
- {"role": "system", "content": system_content},
197
- {"role": "user", "content": prompt}
198
- ],
199
- temperature=kwargs.get("temperature", 0.7),
200
- max_tokens=kwargs.get("max_tokens", 2000)
201
- )
202
- return response.choices[0].message.content.strip()
203
- except Exception as e:
204
- logger.error(f"Error llamando a Local API: {e}")
205
- return None
324
+ class DeepSeekProvider(OpenAICompatibleProvider):
325
+ """Proveedor para DeepSeek (OpenAI-compatible)."""
326
+
327
+ def __init__(self, model_name: Optional[str] = None, cache: Optional['LLMCache'] = None):
328
+ super().__init__(model_name, base_url="https://api.deepseek.com", cache=cache)
329
+
330
+ def _get_api_key(self) -> Optional[str]:
331
+ key = os.getenv("DEEPSEEK_API_KEY")
332
+ if not key:
333
+ logger.warning("DEEPSEEK_API_KEY no encontrada")
334
+ return key
335
+
336
+ def _setup_client(self):
337
+ self._setup_openai_client(self.api_key, base_url=self.base_url)
338
+
339
+ def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
340
+ return self._openai_generate_content("DeepSeek", "deepseek-chat", system_prompt, prompt=prompt, **kwargs)
341
+
342
+
343
+ class GenericProvider(OpenAICompatibleProvider):
344
+ """Proveedor Genérico Compatible con OpenAI (Groq, Mistral, etc)."""
345
+
346
+ def __init__(self, model_name: Optional[str] = None, base_url: Optional[str] = None, cache: Optional['LLMCache'] = None):
347
+ super().__init__(model_name, base_url=base_url or os.getenv("GENERIC_BASE_URL"), cache=cache)
348
+
349
+ def _get_api_key(self) -> Optional[str]:
350
+ key = os.getenv("GENERIC_API_KEY")
351
+ if not key:
352
+ logger.warning("GENERIC_API_KEY no encontrada")
353
+ return key
354
+
355
+ def _setup_client(self):
356
+ if not self.base_url:
357
+ logger.warning("GENERIC_BASE_URL no definida")
358
+ self._setup_openai_client(self.api_key, base_url=self.base_url)
359
+
360
+ def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
361
+ default_system_prompt = self.DEFAULT_SYSTEM_PROMPT
362
+ default_model = os.getenv("GENERIC_MODEL") or "gpt-3.5-turbo"
363
+ return self._openai_generate_content("Generic", default_model, system_prompt or default_system_prompt, prompt=prompt, **kwargs)
206
364
 
207
365
 
208
366
  def get_provider(provider_name: str, **kwargs) -> LLMProvider:
@@ -215,5 +373,9 @@ def get_provider(provider_name: str, **kwargs) -> LLMProvider:
215
373
  return GeminiProvider(**kwargs)
216
374
  elif provider_name == "local":
217
375
  return LocalProvider(**kwargs)
376
+ elif provider_name == "deepseek":
377
+ return DeepSeekProvider(**kwargs)
378
+ elif provider_name == "generic":
379
+ return GenericProvider(**kwargs)
218
380
  else:
219
381
  raise ValueError(f"Proveedor desconocido: {provider_name}")
@@ -1,10 +1,11 @@
1
- """
2
- Extractor de materiales didácticos.
3
- Lee y parsea archivos Markdown de lecturas, prácticas y tareas.
4
- """
5
- from pathlib import Path
6
- from typing import Dict, List, Optional
7
- import logging
1
+ """
2
+ Extractor de materiales didácticos.
3
+ Lee y parsea archivos Markdown de lecturas, prácticas y tareas.
4
+ """
5
+ from pathlib import Path
6
+ from typing import Dict, List, Optional, Union
7
+ import logging
8
+ import time
8
9
 
9
10
  try:
10
11
  from utils.markdown_parser import (
@@ -27,31 +28,43 @@ except ImportError:
27
28
  logger = logging.getLogger(__name__)
28
29
 
29
30
 
30
- class MaterialExtractor:
31
- """Extrae ejercicios y soluciones de materiales didácticos."""
31
+ class MaterialExtractor:
32
+ """Extrae ejercicios y soluciones de materiales didácticos."""
33
+
34
+ def __init__(self, base_path: Union[Path, str]):
35
+ """
36
+ Inicializa el extractor.
37
+
38
+ Args:
39
+ base_path: Ruta base del proyecto (donde están los directorios de temas)
40
+ """
41
+ self.base_path = Path(base_path)
42
+ self.exercises = []
43
+ self.solutions = []
44
+ # Caché de rutas de archivos válidos para evitar escaneos repetidos
45
+ self._file_cache: Dict[Path, Dict] = {}
46
+ # Timestamp del último escaneo
47
+ self._last_scan_timestamp: float = 0
48
+ # TTL del caché en segundos (5 minutos)
49
+ self._cache_ttl = 300
32
50
 
33
- def __init__(self, base_path: Path):
34
- """
35
- Inicializa el extractor.
36
-
37
- Args:
38
- base_path: Ruta base del proyecto (donde están los directorios de temas)
39
- """
40
- self.base_path = Path(base_path)
41
- self.exercises = []
42
- self.solutions = []
43
-
44
- def extract_from_file(self, file_path: Path) -> Dict:
45
- """
46
- Extrae ejercicios y soluciones de un archivo Markdown.
47
-
48
- Args:
49
- file_path: Ruta al archivo
50
-
51
- Returns:
52
- Diccionario con ejercicios y soluciones extraídos
53
- """
54
- try:
51
+ def extract_from_file(self, file_path: Path, use_cache: bool = True) -> Dict:
52
+ """
53
+ Extrae ejercicios y soluciones de un archivo Markdown.
54
+
55
+ Args:
56
+ file_path: Ruta al archivo
57
+ use_cache: Si True, usa el caché si es válido (default: True)
58
+
59
+ Returns:
60
+ Diccionario con ejercicios y soluciones extraídos
61
+ """
62
+ # Verificar caché primero
63
+ if use_cache and self._is_cache_valid(file_path):
64
+ logger.debug(f"[MaterialExtractor] Usando caché para {file_path.name}")
65
+ return self._file_cache[file_path]['data']
66
+
67
+ try:
55
68
  content = read_markdown_file(file_path)
56
69
  frontmatter, content_body = extract_frontmatter(content)
57
70
 
@@ -65,62 +78,81 @@ class MaterialExtractor:
65
78
  exercise['include_path'],
66
79
  file_path.parent
67
80
  )
68
- if include_path.exists():
69
- exercise['resolved_content'] = read_markdown_file(include_path)
70
- else:
71
- logger.warning(f"Include no encontrado: {include_path}")
72
- exercise['resolved_content'] = exercise['content']
73
- else:
74
- exercise['resolved_content'] = exercise['content']
75
-
76
- # Resolver includes de soluciones
77
- for solution in solutions:
78
- resolved_content_parts = []
79
- for include_path_str in solution['include_paths']:
80
- include_path = resolve_include_path(
81
- include_path_str,
82
- file_path.parent
83
- )
84
- if include_path.exists():
85
- resolved_content_parts.append(read_markdown_file(include_path))
86
- else:
87
- logger.warning(f"Include no encontrado: {include_path}")
88
-
89
- if resolved_content_parts:
90
- solution['resolved_content'] = '\n\n---\n\n'.join(resolved_content_parts)
91
- else:
92
- solution['resolved_content'] = solution['content']
93
-
94
- return {
95
- 'file_path': file_path,
96
- 'frontmatter': frontmatter,
97
- 'exercises': exercises,
98
- 'solutions': solutions,
99
- 'content_body': content_body # Exponer contenido para indexación de lecturas
100
- }
101
- except Exception as e:
102
- logger.error(f"Error extrayendo de {file_path}: {e}")
103
- return {
104
- 'file_path': file_path,
105
- 'frontmatter': {},
106
- 'exercises': [],
107
- 'solutions': []
108
- }
109
-
110
- def extract_from_directory(self, directory: Path, pattern: str = "*.md") -> List[Dict]:
111
- """
112
- Extrae materiales de todos los archivos .md en un directorio.
113
-
114
- Args:
115
- directory: Directorio a procesar
116
- pattern: Patrón de búsqueda de archivos
81
+ if include_path.exists():
82
+ exercise['resolved_content'] = read_markdown_file(include_path)
83
+ else:
84
+ logger.warning(f"[MaterialExtractor] Include no encontrado en ejercicio: {include_path} (archivo: {file_path})")
85
+ exercise['resolved_content'] = exercise['content']
86
+ else:
87
+ exercise['resolved_content'] = exercise['content']
88
+
89
+ # Resolver includes de soluciones
90
+ for solution in solutions:
91
+ resolved_content_parts = []
92
+ for include_path_str in solution['include_paths']:
93
+ include_path = resolve_include_path(
94
+ include_path_str,
95
+ file_path.parent
96
+ )
97
+ if include_path.exists():
98
+ resolved_content_parts.append(read_markdown_file(include_path))
99
+ else:
100
+ logger.warning(f"[MaterialExtractor] Include no encontrado en solución: {include_path} (archivo: {file_path})")
101
+
102
+ if resolved_content_parts:
103
+ solution['resolved_content'] = '\n\n---\n\n'.join(resolved_content_parts)
104
+ else:
105
+ solution['resolved_content'] = solution['content']
117
106
 
118
- Returns:
119
- Lista de diccionarios con materiales extraídos
120
- """
121
- directory = Path(directory)
122
- if not directory.exists():
123
- logger.warning(f"Directorio no existe: {directory}")
107
+ return {
108
+ 'file_path': file_path,
109
+ 'frontmatter': frontmatter,
110
+ 'exercises': exercises,
111
+ 'solutions': solutions,
112
+ 'content_body': content_body # Exponer contenido para indexación de lecturas
113
+ }
114
+
115
+ # Guardar en caché
116
+ if use_cache:
117
+ self._file_cache[file_path] = {
118
+ 'data': result,
119
+ 'timestamp': file_path.stat().st_mtime
120
+ }
121
+ self._last_scan_timestamp = max(self._last_scan_timestamp, file_path.stat().st_mtime)
122
+
123
+ return result
124
+ except Exception as e:
125
+ logger.error(f"[MaterialExtractor] Error extrayendo de {file_path}: {e}")
126
+ error_result = {
127
+ 'file_path': file_path,
128
+ 'frontmatter': {},
129
+ 'exercises': [],
130
+ 'solutions': []
131
+ }
132
+
133
+ # Guardar incluso errores en caché para evitar reintentos fallidos
134
+ if use_cache:
135
+ self._file_cache[file_path] = {
136
+ 'data': error_result,
137
+ 'timestamp': time.time() # Usar tiempo actual para archivos que no existen
138
+ }
139
+
140
+ return error_result
141
+
142
+ def extract_from_directory(self, directory: Path, pattern: str = "*.md") -> List[Dict]:
143
+ """
144
+ Extrae materiales de todos los archivos .md en un directorio.
145
+
146
+ Args:
147
+ directory: Directorio a procesar
148
+ pattern: Patrón de búsqueda de archivos
149
+
150
+ Returns:
151
+ Lista de diccionarios con materiales extraídos
152
+ """
153
+ directory = Path(directory)
154
+ if not directory.exists():
155
+ logger.warning(f"[MaterialExtractor] Directorio no existe: {directory}")
124
156
  return []
125
157
 
126
158
  materials = []
@@ -233,5 +265,51 @@ class MaterialExtractor:
233
265
  }
234
266
  all_exercises.append(exercise_data)
235
267
 
236
- return all_exercises
237
-
268
+ return all_exercises
269
+
270
+ def clear_cache(self):
271
+ """Limpia el caché de archivos."""
272
+ self._file_cache.clear()
273
+ self._last_scan_timestamp = 0
274
+ logger.debug("[MaterialExtractor] Caché de archivos limpiado")
275
+
276
+ def _is_cache_valid(self, file_path: Path) -> bool:
277
+ """
278
+ Verifica si el caché para un archivo es válido.
279
+
280
+ Args:
281
+ file_path: Ruta del archivo a verificar
282
+
283
+ Returns:
284
+ True si el caché es válido, False si necesita recacheo
285
+ """
286
+ if file_path not in self._file_cache:
287
+ return False
288
+
289
+ # Verificar si el archivo fue modificado
290
+ try:
291
+ cache_entry = self._file_cache[file_path]
292
+ file_mtime = file_path.stat().st_mtime
293
+
294
+ # Usar el timestamp de escaneo más reciente para verificar
295
+ if file_mtime > self._last_scan_timestamp:
296
+ return False
297
+
298
+ return True
299
+ except (OSError, KeyError):
300
+ return False
301
+
302
+ def get_cache_stats(self) -> Dict:
303
+ """
304
+ Obtiene estadísticas del caché de archivos.
305
+
306
+ Returns:
307
+ Diccionario con estadísticas del caché
308
+ """
309
+ return {
310
+ 'cached_files': len(self._file_cache),
311
+ 'last_scan_timestamp': self._last_scan_timestamp,
312
+ 'cache_ttl': self._cache_ttl
313
+ }
314
+
315
+