evolutia 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolutia/__init__.py +10 -1
- evolutia/async_llm_providers.py +157 -0
- evolutia/cache/__init__.py +9 -0
- evolutia/cache/exercise_cache.py +226 -0
- evolutia/cache/llm_cache.py +487 -0
- evolutia/complexity_validator.py +33 -31
- evolutia/config_manager.py +60 -41
- evolutia/evolutia_engine.py +341 -66
- evolutia/exam_generator.py +44 -43
- evolutia/exceptions.py +38 -0
- evolutia/exercise_analyzer.py +54 -91
- evolutia/imports.py +175 -0
- evolutia/llm_providers.py +224 -60
- evolutia/material_extractor.py +166 -88
- evolutia/rag/rag_indexer.py +107 -90
- evolutia/rag/rag_retriever.py +130 -103
- evolutia/retry_utils.py +280 -0
- evolutia/utils/json_parser.py +29 -19
- evolutia/utils/markdown_parser.py +185 -159
- evolutia/utils/math_extractor.py +153 -144
- evolutia/validation/__init__.py +1 -0
- evolutia/validation/args_validator.py +253 -0
- evolutia/validation/config_validator.py +502 -0
- evolutia/variation_generator.py +252 -50
- evolutia-0.1.2.dist-info/METADATA +536 -0
- evolutia-0.1.2.dist-info/RECORD +37 -0
- {evolutia-0.1.0.dist-info → evolutia-0.1.2.dist-info}/WHEEL +1 -1
- {evolutia-0.1.0.dist-info → evolutia-0.1.2.dist-info}/licenses/LICENSE +1 -1
- evolutia_cli.py +30 -7
- evolutia-0.1.0.dist-info/METADATA +0 -723
- evolutia-0.1.0.dist-info/RECORD +0 -27
- {evolutia-0.1.0.dist-info → evolutia-0.1.2.dist-info}/entry_points.txt +0 -0
- {evolutia-0.1.0.dist-info → evolutia-0.1.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,502 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Validador de configuración para EvolutIA.
|
|
3
|
+
Valida exhaustivamente la configuración del sistema.
|
|
4
|
+
"""
|
|
5
|
+
import logging
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, Any, List, Tuple, Optional, Union
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ConfigValidationError(Exception):
|
|
13
|
+
"""Excepción para errores de validación de configuración."""
|
|
14
|
+
def __init__(self, message: str, errors: Optional[List[str]] = None):
|
|
15
|
+
super().__init__(message)
|
|
16
|
+
self.errors = errors or []
|
|
17
|
+
self.message = message
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ConfigValidator:
|
|
21
|
+
"""Validador de configuración del sistema."""
|
|
22
|
+
|
|
23
|
+
# Valores válidos para algunas configuraciones
|
|
24
|
+
VALID_API_PROVIDERS = {
|
|
25
|
+
'openai', 'anthropic', 'local', 'gemini', 'deepseek', 'generic'
|
|
26
|
+
}
|
|
27
|
+
VALID_EMBEDDING_PROVIDERS = {'openai', 'sentence-transformers'}
|
|
28
|
+
VALID_VECTOR_STORE_TYPES = {'chromadb'}
|
|
29
|
+
|
|
30
|
+
def __init__(self):
|
|
31
|
+
self.errors: List[str] = []
|
|
32
|
+
self.warnings: List[str] = []
|
|
33
|
+
|
|
34
|
+
def validate_config(self, config: Dict[str, Any]) -> Tuple[bool, List[str]]:
|
|
35
|
+
"""
|
|
36
|
+
Valida toda la configuración.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
config: Diccionario de configuración
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Tupla (is_valid, error_messages) donde is_valid es True si la
|
|
43
|
+
configuración es válida, y error_messages es una lista con
|
|
44
|
+
mensajes de error (vacía si is_valid es True)
|
|
45
|
+
"""
|
|
46
|
+
self.errors = []
|
|
47
|
+
self.warnings = []
|
|
48
|
+
|
|
49
|
+
# Validar secciones principales
|
|
50
|
+
self._validate_paths(config.get('paths', {}))
|
|
51
|
+
self._validate_api(config.get('api', {}))
|
|
52
|
+
self._validate_exam(config.get('exam', {}))
|
|
53
|
+
self._validate_generation(config.get('generation', {}))
|
|
54
|
+
self._validate_rag(config.get('rag', {}))
|
|
55
|
+
|
|
56
|
+
# Log warnings
|
|
57
|
+
for warning in self.warnings:
|
|
58
|
+
logger.warning(f"[ConfigValidator] {warning}")
|
|
59
|
+
|
|
60
|
+
return len(self.errors) == 0, self.errors
|
|
61
|
+
|
|
62
|
+
def _validate_paths(self, paths: Dict[str, Any]):
|
|
63
|
+
"""Valida la configuración de rutas."""
|
|
64
|
+
if not paths:
|
|
65
|
+
self.warnings.append("No se encontró configuración de rutas")
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
# Validar base_path
|
|
69
|
+
base_path = paths.get('base_path')
|
|
70
|
+
if base_path:
|
|
71
|
+
path = Path(base_path)
|
|
72
|
+
if not path.exists():
|
|
73
|
+
self.errors.append(
|
|
74
|
+
f"paths.base_path no existe: {base_path}"
|
|
75
|
+
)
|
|
76
|
+
elif not path.is_dir():
|
|
77
|
+
self.errors.append(
|
|
78
|
+
f"paths.base_path no es un directorio: {base_path}"
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Validar materials_directories
|
|
82
|
+
materials_dirs = paths.get('materials_directories')
|
|
83
|
+
if materials_dirs:
|
|
84
|
+
if isinstance(materials_dirs, str) and materials_dirs == 'auto':
|
|
85
|
+
# Valor especial para descubrimiento automático
|
|
86
|
+
pass
|
|
87
|
+
elif isinstance(materials_dirs, list):
|
|
88
|
+
for topic in materials_dirs:
|
|
89
|
+
if base_path:
|
|
90
|
+
topic_path = Path(base_path) / topic
|
|
91
|
+
if not topic_path.exists():
|
|
92
|
+
self.warnings.append(
|
|
93
|
+
f"paths.materials_directories contiene tema no existente: {topic}"
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
def _validate_api(self, api: Dict[str, Any]):
|
|
97
|
+
"""Valida la configuración de API."""
|
|
98
|
+
if not api:
|
|
99
|
+
self.warnings.append("No se encontró configuración de API")
|
|
100
|
+
return
|
|
101
|
+
|
|
102
|
+
# Validar default_provider
|
|
103
|
+
default_provider = api.get('default_provider')
|
|
104
|
+
if default_provider:
|
|
105
|
+
if default_provider not in self.VALID_API_PROVIDERS:
|
|
106
|
+
self.errors.append(
|
|
107
|
+
f"api.default_provider debe ser uno de {sorted(self.VALID_API_PROVIDERS)}, "
|
|
108
|
+
f"obtenido: {default_provider}"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# Validar configuración de proveedores
|
|
112
|
+
providers = api.get('providers', {})
|
|
113
|
+
if providers:
|
|
114
|
+
for provider_name, provider_config in providers.items():
|
|
115
|
+
if provider_name not in self.VALID_API_PROVIDERS:
|
|
116
|
+
self.warnings.append(
|
|
117
|
+
f"api.providers contiene proveedor desconocido: {provider_name}"
|
|
118
|
+
)
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
self._validate_provider_config(provider_name, provider_config)
|
|
122
|
+
|
|
123
|
+
def _validate_provider_config(self, provider_name: str, provider_config: Dict[str, Any]):
|
|
124
|
+
"""Valida la configuración de un proveedor específico."""
|
|
125
|
+
if provider_name == 'openai':
|
|
126
|
+
self._validate_openai_config(provider_config)
|
|
127
|
+
elif provider_name == 'anthropic':
|
|
128
|
+
self._validate_anthropic_config(provider_config)
|
|
129
|
+
elif provider_name == 'local':
|
|
130
|
+
self._validate_local_config(provider_config)
|
|
131
|
+
elif provider_name == 'gemini':
|
|
132
|
+
self._validate_gemini_config(provider_config)
|
|
133
|
+
elif provider_name == 'deepseek':
|
|
134
|
+
self._validate_deepseek_config(provider_config)
|
|
135
|
+
elif provider_name == 'generic':
|
|
136
|
+
self._validate_generic_config(provider_config)
|
|
137
|
+
|
|
138
|
+
def _validate_openai_config(self, config: Dict[str, Any]):
|
|
139
|
+
"""Valida configuración de OpenAI."""
|
|
140
|
+
model = config.get('model')
|
|
141
|
+
if model and not isinstance(model, str):
|
|
142
|
+
self.errors.append(
|
|
143
|
+
f"api.providers.openai.model debe ser string, obtenido: {type(model)}"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
max_tokens = config.get('max_tokens')
|
|
147
|
+
if max_tokens is not None:
|
|
148
|
+
if not isinstance(max_tokens, int) or max_tokens <= 0:
|
|
149
|
+
self.errors.append(
|
|
150
|
+
f"api.providers.openai.max_tokens debe ser entero positivo, "
|
|
151
|
+
f"obtenido: {max_tokens}"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
temperature = config.get('temperature')
|
|
155
|
+
if temperature is not None:
|
|
156
|
+
if not isinstance(temperature, (int, float)) or not (0 <= temperature <= 2):
|
|
157
|
+
self.errors.append(
|
|
158
|
+
f"api.providers.openai.temperature debe estar entre 0 y 2, "
|
|
159
|
+
f"obtenido: {temperature}"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
def _validate_anthropic_config(self, config: Dict[str, Any]):
|
|
163
|
+
"""Valida configuración de Anthropic."""
|
|
164
|
+
model = config.get('model')
|
|
165
|
+
if model and not isinstance(model, str):
|
|
166
|
+
self.errors.append(
|
|
167
|
+
f"api.providers.anthropic.model debe ser string, obtenido: {type(model)}"
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
max_tokens = config.get('max_tokens')
|
|
171
|
+
if max_tokens is not None:
|
|
172
|
+
if not isinstance(max_tokens, int) or max_tokens <= 0:
|
|
173
|
+
self.errors.append(
|
|
174
|
+
f"api.providers.anthropic.max_tokens debe ser entero positivo, "
|
|
175
|
+
f"obtenido: {max_tokens}"
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
temperature = config.get('temperature')
|
|
179
|
+
if temperature is not None:
|
|
180
|
+
if not isinstance(temperature, (int, float)) or not (0 <= temperature <= 1):
|
|
181
|
+
self.errors.append(
|
|
182
|
+
f"api.providers.anthropic.temperature debe estar entre 0 y 1, "
|
|
183
|
+
f"obtenido: {temperature}"
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
def _validate_local_config(self, config: Dict[str, Any]):
|
|
187
|
+
"""Valida configuración de modelos locales."""
|
|
188
|
+
base_url = config.get('base_url')
|
|
189
|
+
if base_url:
|
|
190
|
+
if not isinstance(base_url, str):
|
|
191
|
+
self.errors.append(
|
|
192
|
+
f"api.providers.local.base_url debe ser string, obtenido: {type(base_url)}"
|
|
193
|
+
)
|
|
194
|
+
elif not base_url.startswith(('http://', 'https://')):
|
|
195
|
+
self.errors.append(
|
|
196
|
+
f"api.providers.local.base_url debe ser una URL válida, "
|
|
197
|
+
f"obtenido: {base_url}"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
model = config.get('model')
|
|
201
|
+
if model and not isinstance(model, str):
|
|
202
|
+
self.errors.append(
|
|
203
|
+
f"api.providers.local.model debe ser string, obtenido: {type(model)}"
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
timeout = config.get('timeout')
|
|
207
|
+
if timeout is not None:
|
|
208
|
+
if not isinstance(timeout, (int, float)) or timeout <= 0:
|
|
209
|
+
self.errors.append(
|
|
210
|
+
f"api.providers.local.timeout debe ser numérico positivo, "
|
|
211
|
+
f"obtenido: {timeout}"
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
def _validate_gemini_config(self, config: Dict[str, Any]):
|
|
215
|
+
"""Valida configuración de Gemini."""
|
|
216
|
+
model = config.get('model')
|
|
217
|
+
if model and not isinstance(model, str):
|
|
218
|
+
self.errors.append(
|
|
219
|
+
f"api.providers.gemini.model debe ser string, obtenido: {type(model)}"
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
temperature = config.get('temperature')
|
|
223
|
+
if temperature is not None:
|
|
224
|
+
if not isinstance(temperature, (int, float)) or not (0 <= temperature <= 2):
|
|
225
|
+
self.errors.append(
|
|
226
|
+
f"api.providers.gemini.temperature debe estar entre 0 y 2, "
|
|
227
|
+
f"obtenido: {temperature}"
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
def _validate_deepseek_config(self, config: Dict[str, Any]):
|
|
231
|
+
"""Valida configuración de DeepSeek."""
|
|
232
|
+
model = config.get('model')
|
|
233
|
+
if model and not isinstance(model, str):
|
|
234
|
+
self.errors.append(
|
|
235
|
+
f"api.providers.deepseek.model debe ser string, obtenido: {type(model)}"
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
temperature = config.get('temperature')
|
|
239
|
+
if temperature is not None:
|
|
240
|
+
if not isinstance(temperature, (int, float)) or not (0 <= temperature <= 2):
|
|
241
|
+
self.errors.append(
|
|
242
|
+
f"api.providers.deepseek.temperature debe estar entre 0 y 2, "
|
|
243
|
+
f"obtenido: {temperature}"
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
def _validate_generic_config(self, config: Dict[str, Any]):
|
|
247
|
+
"""Valida configuración genérica."""
|
|
248
|
+
base_url = config.get('base_url')
|
|
249
|
+
if base_url:
|
|
250
|
+
if not isinstance(base_url, str):
|
|
251
|
+
self.errors.append(
|
|
252
|
+
f"api.providers.generic.base_url debe ser string, obtenido: {type(base_url)}"
|
|
253
|
+
)
|
|
254
|
+
elif not base_url.startswith(('http://', 'https://')):
|
|
255
|
+
self.errors.append(
|
|
256
|
+
f"api.providers.generic.base_url debe ser una URL válida, "
|
|
257
|
+
f"obtenido: {base_url}"
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
model = config.get('model')
|
|
261
|
+
if model and not isinstance(model, str):
|
|
262
|
+
self.errors.append(
|
|
263
|
+
f"api.providers.generic.model debe ser string, obtenido: {type(model)}"
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
def _validate_exam(self, exam: Dict[str, Any]):
|
|
267
|
+
"""Valida la configuración de examen."""
|
|
268
|
+
if not exam:
|
|
269
|
+
self.warnings.append("No se encontró configuración de examen")
|
|
270
|
+
return
|
|
271
|
+
|
|
272
|
+
default = exam.get('default', {})
|
|
273
|
+
if default:
|
|
274
|
+
self._validate_exam_default(default)
|
|
275
|
+
|
|
276
|
+
keywords = exam.get('keywords', {})
|
|
277
|
+
if keywords:
|
|
278
|
+
self._validate_exam_keywords(keywords)
|
|
279
|
+
|
|
280
|
+
def _validate_exam_default(self, default: Dict[str, Any]):
|
|
281
|
+
"""Valida configuración por defecto de examen."""
|
|
282
|
+
subject = default.get('subject')
|
|
283
|
+
if subject and not isinstance(subject, str):
|
|
284
|
+
self.errors.append(
|
|
285
|
+
f"exam.default.subject debe ser string, obtenido: {type(subject)}"
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
points_per_exercise = default.get('points_per_exercise')
|
|
289
|
+
if points_per_exercise is not None:
|
|
290
|
+
if not isinstance(points_per_exercise, int) or points_per_exercise <= 0:
|
|
291
|
+
self.errors.append(
|
|
292
|
+
f"exam.default.points_per_exercise debe ser entero positivo, "
|
|
293
|
+
f"obtenido: {points_per_exercise}"
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
duration_hours = default.get('duration_hours')
|
|
297
|
+
if duration_hours is not None:
|
|
298
|
+
if not isinstance(duration_hours, (int, float)) or not (0 < duration_hours <= 24):
|
|
299
|
+
self.errors.append(
|
|
300
|
+
f"exam.default.duration_hours debe estar entre 0 y 24, "
|
|
301
|
+
f"obtenido: {duration_hours}"
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
def _validate_exam_keywords(self, keywords: Dict[str, Any]):
|
|
305
|
+
"""Valida configuración de keywords de examen."""
|
|
306
|
+
if not isinstance(keywords, dict):
|
|
307
|
+
self.errors.append(
|
|
308
|
+
f"exam.keywords debe ser un diccionario, obtenido: {type(keywords)}"
|
|
309
|
+
)
|
|
310
|
+
return
|
|
311
|
+
|
|
312
|
+
for topic, kw_list in keywords.items():
|
|
313
|
+
if not isinstance(kw_list, list):
|
|
314
|
+
self.errors.append(
|
|
315
|
+
f"exam.keywords.{topic} debe ser una lista, obtenido: {type(kw_list)}"
|
|
316
|
+
)
|
|
317
|
+
else:
|
|
318
|
+
for kw in kw_list:
|
|
319
|
+
if not isinstance(kw, str):
|
|
320
|
+
self.errors.append(
|
|
321
|
+
f"exam.keywords.{topic} debe contener solo strings, "
|
|
322
|
+
f"encontrado: {kw} ({type(kw)})"
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
def _validate_generation(self, generation: Dict[str, Any]):
|
|
326
|
+
"""Valida la configuración de generación."""
|
|
327
|
+
if not generation:
|
|
328
|
+
self.warnings.append("No se encontró configuración de generación")
|
|
329
|
+
return
|
|
330
|
+
|
|
331
|
+
max_workers = generation.get('max_workers')
|
|
332
|
+
if max_workers is not None:
|
|
333
|
+
if not isinstance(max_workers, int) or not (1 <= max_workers <= 50):
|
|
334
|
+
self.errors.append(
|
|
335
|
+
f"generation.max_workers debe estar entre 1 y 50, "
|
|
336
|
+
f"obtenido: {max_workers}"
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
request_delay = generation.get('request_delay')
|
|
340
|
+
if request_delay is not None:
|
|
341
|
+
if not isinstance(request_delay, (int, float)) or request_delay < 0:
|
|
342
|
+
self.errors.append(
|
|
343
|
+
f"generation.request_delay debe ser numérico no negativo, "
|
|
344
|
+
f"obtenido: {request_delay}"
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
retry_attempts = generation.get('retry_attempts')
|
|
348
|
+
if retry_attempts is not None:
|
|
349
|
+
if not isinstance(retry_attempts, int) or retry_attempts < 0:
|
|
350
|
+
self.errors.append(
|
|
351
|
+
f"generation.retry_attempts debe ser entero no negativo, "
|
|
352
|
+
f"obtenido: {retry_attempts}"
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
llm_params = generation.get('llm_params', {})
|
|
356
|
+
if llm_params:
|
|
357
|
+
self._validate_llm_params(llm_params)
|
|
358
|
+
|
|
359
|
+
complexity = generation.get('complexity', {})
|
|
360
|
+
if complexity:
|
|
361
|
+
self._validate_complexity_config(complexity)
|
|
362
|
+
|
|
363
|
+
def _validate_llm_params(self, llm_params: Dict[str, Any]):
|
|
364
|
+
"""Valida parámetros LLM de generación."""
|
|
365
|
+
default_temperature = llm_params.get('default_temperature')
|
|
366
|
+
if default_temperature is not None:
|
|
367
|
+
if not isinstance(default_temperature, (int, float)) or not (0 <= default_temperature <= 2):
|
|
368
|
+
self.errors.append(
|
|
369
|
+
f"generation.llm_params.default_temperature debe estar entre 0 y 2, "
|
|
370
|
+
f"obtenido: {default_temperature}"
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
default_max_tokens = llm_params.get('default_max_tokens')
|
|
374
|
+
if default_max_tokens is not None:
|
|
375
|
+
if not isinstance(default_max_tokens, int) or default_max_tokens <= 0:
|
|
376
|
+
self.errors.append(
|
|
377
|
+
f"generation.llm_params.default_max_tokens debe ser entero positivo, "
|
|
378
|
+
f"obtenido: {default_max_tokens}"
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
def _validate_complexity_config(self, complexity: Dict[str, Any]):
|
|
382
|
+
"""Valida configuración de complejidad."""
|
|
383
|
+
min_improvement_percent = complexity.get('min_improvement_percent')
|
|
384
|
+
if min_improvement_percent is not None:
|
|
385
|
+
if not isinstance(min_improvement_percent, (int, float)) or not (0 <= min_improvement_percent <= 100):
|
|
386
|
+
self.errors.append(
|
|
387
|
+
f"generation.complexity.min_improvement_percent debe estar entre 0 y 100, "
|
|
388
|
+
f"obtenido: {min_improvement_percent}"
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
required_improvements_count = complexity.get('required_improvements_count')
|
|
392
|
+
if required_improvements_count is not None:
|
|
393
|
+
if not isinstance(required_improvements_count, int) or required_improvements_count < 0:
|
|
394
|
+
self.errors.append(
|
|
395
|
+
f"generation.complexity.required_improvements_count debe ser entero no negativo, "
|
|
396
|
+
f"obtenido: {required_improvements_count}"
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
def _validate_rag(self, rag: Dict[str, Any]):
|
|
400
|
+
"""Valida la configuración de RAG."""
|
|
401
|
+
if not rag:
|
|
402
|
+
return # RAG es opcional
|
|
403
|
+
|
|
404
|
+
vector_store = rag.get('vector_store', {})
|
|
405
|
+
if vector_store:
|
|
406
|
+
self._validate_rag_vector_store(vector_store)
|
|
407
|
+
|
|
408
|
+
embeddings = rag.get('embeddings', {})
|
|
409
|
+
if embeddings:
|
|
410
|
+
self._validate_rag_embeddings(embeddings)
|
|
411
|
+
|
|
412
|
+
retrieval = rag.get('retrieval', {})
|
|
413
|
+
if retrieval:
|
|
414
|
+
self._validate_rag_retrieval(retrieval)
|
|
415
|
+
|
|
416
|
+
chunking = rag.get('chunking', {})
|
|
417
|
+
if chunking:
|
|
418
|
+
self._validate_rag_chunking(chunking)
|
|
419
|
+
|
|
420
|
+
def _validate_rag_vector_store(self, vector_store: Dict[str, Any]):
|
|
421
|
+
"""Valida configuración de vector store RAG."""
|
|
422
|
+
store_type = vector_store.get('type')
|
|
423
|
+
if store_type and store_type not in self.VALID_VECTOR_STORE_TYPES:
|
|
424
|
+
self.errors.append(
|
|
425
|
+
f"rag.vector_store.type debe ser uno de {sorted(self.VALID_VECTOR_STORE_TYPES)}, "
|
|
426
|
+
f"obtenido: {store_type}"
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
persist_directory = vector_store.get('persist_directory')
|
|
430
|
+
if persist_directory:
|
|
431
|
+
# Verificar que el directorio pueda crearse
|
|
432
|
+
path = Path(persist_directory)
|
|
433
|
+
if path.exists() and not path.is_dir():
|
|
434
|
+
self.errors.append(
|
|
435
|
+
f"rag.vector_store.persist_directory debe ser un directorio, "
|
|
436
|
+
f"obtenido: {persist_directory}"
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
def _validate_rag_embeddings(self, embeddings: Dict[str, Any]):
|
|
440
|
+
"""Valida configuración de embeddings RAG."""
|
|
441
|
+
provider = embeddings.get('provider')
|
|
442
|
+
if provider and provider not in self.VALID_EMBEDDING_PROVIDERS:
|
|
443
|
+
self.errors.append(
|
|
444
|
+
f"rag.embeddings.provider debe ser uno de {sorted(self.VALID_EMBEDDING_PROVIDERS)}, "
|
|
445
|
+
f"obtenido: {provider}"
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
model = embeddings.get('model')
|
|
449
|
+
if model and not isinstance(model, str):
|
|
450
|
+
self.errors.append(
|
|
451
|
+
f"rag.embeddings.model debe ser string, obtenido: {type(model)}"
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
batch_size = embeddings.get('batch_size')
|
|
455
|
+
if batch_size is not None:
|
|
456
|
+
if not isinstance(batch_size, int) or batch_size <= 0:
|
|
457
|
+
self.errors.append(
|
|
458
|
+
f"rag.embeddings.batch_size debe ser entero positivo, "
|
|
459
|
+
f"obtenido: {batch_size}"
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
def _validate_rag_retrieval(self, retrieval: Dict[str, Any]):
|
|
463
|
+
"""Valida configuración de recuperación RAG."""
|
|
464
|
+
top_k = retrieval.get('top_k')
|
|
465
|
+
if top_k is not None:
|
|
466
|
+
if not isinstance(top_k, int) or not (1 <= top_k <= 100):
|
|
467
|
+
self.errors.append(
|
|
468
|
+
f"rag.retrieval.top_k debe estar entre 1 y 100, "
|
|
469
|
+
f"obtenido: {top_k}"
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
similarity_threshold = retrieval.get('similarity_threshold')
|
|
473
|
+
if similarity_threshold is not None:
|
|
474
|
+
if not isinstance(similarity_threshold, (int, float)) or not (0 <= similarity_threshold <= 1):
|
|
475
|
+
self.errors.append(
|
|
476
|
+
f"rag.retrieval.similarity_threshold debe estar entre 0 y 1, "
|
|
477
|
+
f"obtenido: {similarity_threshold}"
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
def _validate_rag_chunking(self, chunking: Dict[str, Any]):
|
|
481
|
+
"""Valida configuración de chunking RAG."""
|
|
482
|
+
chunk_size = chunking.get('chunk_size')
|
|
483
|
+
if chunk_size is not None:
|
|
484
|
+
if not isinstance(chunk_size, int) or chunk_size <= 0:
|
|
485
|
+
self.errors.append(
|
|
486
|
+
f"rag.chunking.chunk_size debe ser entero positivo, "
|
|
487
|
+
f"obtenido: {chunk_size}"
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
chunk_overlap = chunking.get('chunk_overlap')
|
|
491
|
+
if chunk_overlap is not None:
|
|
492
|
+
if not isinstance(chunk_overlap, int) or chunk_overlap < 0:
|
|
493
|
+
self.errors.append(
|
|
494
|
+
f"rag.chunking.chunk_overlap debe ser entero no negativo, "
|
|
495
|
+
f"obtenido: {chunk_overlap}"
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
if chunk_size and chunk_overlap and chunk_overlap >= chunk_size:
|
|
499
|
+
self.errors.append(
|
|
500
|
+
f"rag.chunking.chunk_overlap debe ser menor que chunk_size, "
|
|
501
|
+
f"obtenido: overlap={chunk_overlap}, size={chunk_size}"
|
|
502
|
+
)
|