evolutia 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolutia/__init__.py +5 -0
- evolutia/complexity_validator.py +179 -0
- evolutia/config_manager.py +208 -0
- evolutia/evolutia_engine.py +284 -0
- evolutia/exam_generator.py +328 -0
- evolutia/exercise_analyzer.py +256 -0
- evolutia/llm_providers.py +217 -0
- evolutia/material_extractor.py +237 -0
- evolutia/rag/__init__.py +6 -0
- evolutia/rag/consistency_validator.py +200 -0
- evolutia/rag/context_enricher.py +285 -0
- evolutia/rag/enhanced_variation_generator.py +349 -0
- evolutia/rag/rag_indexer.py +424 -0
- evolutia/rag/rag_manager.py +221 -0
- evolutia/rag/rag_retriever.py +366 -0
- evolutia/utils/__init__.py +4 -0
- evolutia/utils/json_parser.py +69 -0
- evolutia/utils/markdown_parser.py +160 -0
- evolutia/utils/math_extractor.py +144 -0
- evolutia/variation_generator.py +97 -0
- evolutia-0.1.0.dist-info/METADATA +723 -0
- evolutia-0.1.0.dist-info/RECORD +27 -0
- evolutia-0.1.0.dist-info/WHEEL +5 -0
- evolutia-0.1.0.dist-info/entry_points.txt +2 -0
- evolutia-0.1.0.dist-info/licenses/LICENSE +201 -0
- evolutia-0.1.0.dist-info/top_level.txt +2 -0
- evolutia_cli.py +160 -0
evolutia/__init__.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Validador de complejidad.
|
|
3
|
+
Verifica que las variaciones generadas sean más complejas que los originales.
|
|
4
|
+
"""
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Dict
|
|
7
|
+
|
|
8
|
+
from .exercise_analyzer import ExerciseAnalyzer
|
|
9
|
+
from .utils.math_extractor import extract_math_expressions, estimate_complexity
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ComplexityValidator:
|
|
16
|
+
"""Valida que las variaciones sean más complejas que los originales."""
|
|
17
|
+
|
|
18
|
+
def __init__(self):
|
|
19
|
+
"""Inicializa el validador."""
|
|
20
|
+
self.analyzer = ExerciseAnalyzer()
|
|
21
|
+
|
|
22
|
+
def validate(self, original_exercise: Dict, original_analysis: Dict,
|
|
23
|
+
variation: Dict) -> Dict:
|
|
24
|
+
"""
|
|
25
|
+
Valida que la variación sea más compleja que el original.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
original_exercise: Ejercicio original
|
|
29
|
+
original_analysis: Análisis del ejercicio original
|
|
30
|
+
variation: Variación generada (debe tener 'variation_content')
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Diccionario con resultado de validación:
|
|
34
|
+
- 'is_valid': bool
|
|
35
|
+
- 'variation_analysis': análisis de la variación
|
|
36
|
+
- 'improvements': lista de mejoras detectadas
|
|
37
|
+
- 'warnings': lista de advertencias
|
|
38
|
+
"""
|
|
39
|
+
variation_content = variation.get('variation_content', '')
|
|
40
|
+
variation_solution = variation.get('variation_solution', '')
|
|
41
|
+
|
|
42
|
+
if not variation_content:
|
|
43
|
+
return {
|
|
44
|
+
'is_valid': False,
|
|
45
|
+
'reason': 'Variación sin contenido',
|
|
46
|
+
'variation_analysis': None,
|
|
47
|
+
'improvements': [],
|
|
48
|
+
'warnings': ['Variación generada está vacía']
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# Analizar la variación
|
|
52
|
+
variation_exercise = {
|
|
53
|
+
'content': variation_content,
|
|
54
|
+
'solution': variation_solution
|
|
55
|
+
}
|
|
56
|
+
variation_analysis = self.analyzer.analyze(variation_exercise)
|
|
57
|
+
|
|
58
|
+
# Comparar métricas
|
|
59
|
+
improvements = []
|
|
60
|
+
warnings = []
|
|
61
|
+
|
|
62
|
+
# Comparar complejidad total
|
|
63
|
+
original_complexity = original_analysis.get('total_complexity', 0)
|
|
64
|
+
variation_complexity = variation_analysis.get('total_complexity', 0)
|
|
65
|
+
|
|
66
|
+
if variation_complexity > original_complexity * 1.1: # Al menos 10% más complejo
|
|
67
|
+
improvements.append(
|
|
68
|
+
f"Complejidad total aumentó de {original_complexity:.2f} a {variation_complexity:.2f}"
|
|
69
|
+
)
|
|
70
|
+
elif variation_complexity < original_complexity:
|
|
71
|
+
warnings.append(
|
|
72
|
+
f"Complejidad total disminuyó de {original_complexity:.2f} a {variation_complexity:.2f}"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# Comparar número de pasos
|
|
76
|
+
original_steps = original_analysis.get('solution_steps', 0)
|
|
77
|
+
variation_steps = variation_analysis.get('solution_steps', 0)
|
|
78
|
+
|
|
79
|
+
if variation_steps > original_steps:
|
|
80
|
+
improvements.append(
|
|
81
|
+
f"Número de pasos aumentó de {original_steps} a {variation_steps}"
|
|
82
|
+
)
|
|
83
|
+
elif variation_steps < original_steps and original_steps > 0:
|
|
84
|
+
warnings.append(
|
|
85
|
+
f"Número de pasos disminuyó de {original_steps} a {variation_steps}"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Comparar número de variables
|
|
89
|
+
original_vars = original_analysis.get('num_variables', 0)
|
|
90
|
+
variation_vars = variation_analysis.get('num_variables', 0)
|
|
91
|
+
|
|
92
|
+
if variation_vars > original_vars:
|
|
93
|
+
improvements.append(
|
|
94
|
+
f"Número de variables aumentó de {original_vars} a {variation_vars}"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Comparar número de conceptos
|
|
98
|
+
original_concepts = original_analysis.get('num_concepts', 0)
|
|
99
|
+
variation_concepts = variation_analysis.get('num_concepts', 0)
|
|
100
|
+
|
|
101
|
+
if variation_concepts > original_concepts:
|
|
102
|
+
improvements.append(
|
|
103
|
+
f"Número de conceptos aumentó de {original_concepts} a {variation_concepts}"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Comparar complejidad matemática
|
|
107
|
+
original_math = original_analysis.get('math_complexity', 0)
|
|
108
|
+
variation_math = variation_analysis.get('math_complexity', 0)
|
|
109
|
+
|
|
110
|
+
if variation_math > original_math * 1.1:
|
|
111
|
+
improvements.append(
|
|
112
|
+
f"Complejidad matemática aumentó de {original_math:.2f} a {variation_math:.2f}"
|
|
113
|
+
)
|
|
114
|
+
elif variation_math < original_math:
|
|
115
|
+
warnings.append(
|
|
116
|
+
f"Complejidad matemática disminuyó de {original_math:.2f} a {variation_math:.2f}"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Comparar operaciones
|
|
120
|
+
original_ops = original_analysis.get('operations', {})
|
|
121
|
+
variation_ops = variation_analysis.get('operations', {})
|
|
122
|
+
|
|
123
|
+
for op_type in ['integrals', 'derivatives', 'sums', 'vectors', 'matrices']:
|
|
124
|
+
orig_count = original_ops.get(op_type, 0)
|
|
125
|
+
var_count = variation_ops.get(op_type, 0)
|
|
126
|
+
if var_count > orig_count:
|
|
127
|
+
improvements.append(
|
|
128
|
+
f"Operaciones de {op_type} aumentaron de {orig_count} a {var_count}"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Determinar si es válida
|
|
132
|
+
# Requisitos mínimos:
|
|
133
|
+
# 1. Complejidad total debe ser mayor
|
|
134
|
+
# 2. Al menos una métrica debe mejorar significativamente
|
|
135
|
+
is_valid = (
|
|
136
|
+
variation_complexity > original_complexity and
|
|
137
|
+
len(improvements) >= 2
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
if not is_valid and len(warnings) > 0:
|
|
141
|
+
warnings.append("La variación no cumple con los requisitos mínimos de complejidad")
|
|
142
|
+
|
|
143
|
+
return {
|
|
144
|
+
'is_valid': is_valid,
|
|
145
|
+
'variation_analysis': variation_analysis,
|
|
146
|
+
'improvements': improvements,
|
|
147
|
+
'warnings': warnings,
|
|
148
|
+
'original_complexity': original_complexity,
|
|
149
|
+
'variation_complexity': variation_complexity,
|
|
150
|
+
'complexity_increase': variation_complexity - original_complexity,
|
|
151
|
+
'complexity_increase_percent': (
|
|
152
|
+
(variation_complexity - original_complexity) / original_complexity * 100
|
|
153
|
+
if original_complexity > 0 else 0
|
|
154
|
+
)
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
def validate_batch(self, exercises_and_variations: list) -> list:
|
|
158
|
+
"""
|
|
159
|
+
Valida un lote de variaciones.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
exercises_and_variations: Lista de tuplas (ejercicio_original, análisis_original, variación)
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Lista de resultados de validación
|
|
166
|
+
"""
|
|
167
|
+
results = []
|
|
168
|
+
|
|
169
|
+
for original_exercise, original_analysis, variation in exercises_and_variations:
|
|
170
|
+
result = self.validate(original_exercise, original_analysis, variation)
|
|
171
|
+
results.append(result)
|
|
172
|
+
|
|
173
|
+
if result['is_valid']:
|
|
174
|
+
logger.info(f"Variación válida: {len(result['improvements'])} mejoras detectadas")
|
|
175
|
+
else:
|
|
176
|
+
logger.warning(f"Variación inválida: {result.get('reason', 'Complejidad insuficiente')}")
|
|
177
|
+
|
|
178
|
+
return results
|
|
179
|
+
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Gestor de configuración automática.
|
|
4
|
+
Genera config.yaml basado en la estructura del proyecto y metadatos de archivos.
|
|
5
|
+
"""
|
|
6
|
+
import yaml
|
|
7
|
+
import logging
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Dict, List, Set, Any
|
|
10
|
+
import sys
|
|
11
|
+
import json
|
|
12
|
+
try:
|
|
13
|
+
import jsonschema
|
|
14
|
+
JSONSCHEMA_AVAILABLE = True
|
|
15
|
+
except ImportError:
|
|
16
|
+
JSONSCHEMA_AVAILABLE = False
|
|
17
|
+
|
|
18
|
+
# Configurar logging
|
|
19
|
+
logging.basicConfig(
|
|
20
|
+
level=logging.INFO,
|
|
21
|
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
22
|
+
)
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
# Directorios a excluir de la búsqueda de temas
|
|
26
|
+
EXCLUDED_DIRS = {
|
|
27
|
+
'evolutia',
|
|
28
|
+
#'examenes',
|
|
29
|
+
#'tareas',
|
|
30
|
+
'proyecto',
|
|
31
|
+
'_build',
|
|
32
|
+
'.git',
|
|
33
|
+
'__pycache__',
|
|
34
|
+
'.ipynb_checkpoints',
|
|
35
|
+
'images',
|
|
36
|
+
'static',
|
|
37
|
+
'storage',
|
|
38
|
+
'thumbnails',
|
|
39
|
+
'config'
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
class ConfigManager:
|
|
43
|
+
def __init__(self, base_path: Path, config_path: Path = None):
|
|
44
|
+
self.base_path = Path(base_path)
|
|
45
|
+
|
|
46
|
+
if config_path:
|
|
47
|
+
self.config_path = Path(config_path)
|
|
48
|
+
else:
|
|
49
|
+
# Intentar encontrar evolutia_config.yaml en la raíz
|
|
50
|
+
root_config = self.base_path / 'evolutia_config.yaml'
|
|
51
|
+
if root_config.exists():
|
|
52
|
+
self.config_path = root_config
|
|
53
|
+
else:
|
|
54
|
+
# Default interno: evolutia/config/config.yaml
|
|
55
|
+
# self.base_path suele ser root
|
|
56
|
+
# Si base_path es root, la config está en root/evolutia/config/config.yaml
|
|
57
|
+
self.config_path = self.base_path / 'evolutia' / 'config' / 'config.yaml'
|
|
58
|
+
|
|
59
|
+
logger.info(f"Usando archivo de configuración: {self.config_path}")
|
|
60
|
+
|
|
61
|
+
def validate_config(self, config_data: Dict[str, Any]) -> bool:
|
|
62
|
+
"""Valida la configuración contra el esquema JSON."""
|
|
63
|
+
if not JSONSCHEMA_AVAILABLE:
|
|
64
|
+
logger.warning("jsonschema no instalado, omitiendo validación de esquema.")
|
|
65
|
+
return True
|
|
66
|
+
|
|
67
|
+
# Buscar schema
|
|
68
|
+
# Intentar rutas relativas comunes
|
|
69
|
+
schema_path = Path(__file__).parent / 'schemas' / 'config.schema.json'
|
|
70
|
+
|
|
71
|
+
if not schema_path.exists():
|
|
72
|
+
logger.warning(f"No se encontró esquema en {schema_path}, omitiendo validación.")
|
|
73
|
+
return True
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
with open(schema_path, 'r', encoding='utf-8') as f:
|
|
77
|
+
schema = json.load(f)
|
|
78
|
+
|
|
79
|
+
jsonschema.validate(instance=config_data, schema=schema)
|
|
80
|
+
logger.info("Configuración válida según esquema.")
|
|
81
|
+
return True
|
|
82
|
+
except jsonschema.exceptions.ValidationError as e:
|
|
83
|
+
logger.error(f"Error de validación de configuración: {e.message}")
|
|
84
|
+
logger.error(f"Ruta del error: {' -> '.join([str(p) for p in e.path])}")
|
|
85
|
+
return False
|
|
86
|
+
except Exception as e:
|
|
87
|
+
logger.error(f"Error inesperado validando esquema: {e}")
|
|
88
|
+
return False
|
|
89
|
+
|
|
90
|
+
def load_current_config(self) -> Dict[str, Any]:
|
|
91
|
+
"""Carga la configuración actual si existe."""
|
|
92
|
+
if self.config_path.exists():
|
|
93
|
+
try:
|
|
94
|
+
with open(self.config_path, 'r', encoding='utf-8') as f:
|
|
95
|
+
config = yaml.safe_load(f) or {}
|
|
96
|
+
# Validate on load
|
|
97
|
+
if config:
|
|
98
|
+
self.validate_config(config)
|
|
99
|
+
return config
|
|
100
|
+
except Exception as e:
|
|
101
|
+
logger.error(f"Error leyendo config actual: {e}")
|
|
102
|
+
return {}
|
|
103
|
+
return {}
|
|
104
|
+
|
|
105
|
+
def discover_topics(self) -> List[str]:
|
|
106
|
+
"""Descubre directorios de temas basados en la existencia de archivos .md."""
|
|
107
|
+
topics = []
|
|
108
|
+
for p in self.base_path.iterdir():
|
|
109
|
+
if p.is_dir() and p.name not in EXCLUDED_DIRS and not p.name.startswith('.'):
|
|
110
|
+
# Verificar si contiene archivos markdown relevantes (lecturas o prácticas)
|
|
111
|
+
md_files = list(p.glob("*.md"))
|
|
112
|
+
if md_files:
|
|
113
|
+
topics.append(p.name)
|
|
114
|
+
return sorted(topics)
|
|
115
|
+
|
|
116
|
+
def extract_keywords_from_topic(self, topic: str) -> List[str]:
|
|
117
|
+
"""Extrae keywords de los archivos del tema."""
|
|
118
|
+
topic_dir = self.base_path / topic
|
|
119
|
+
keywords_set: Set[str] = set()
|
|
120
|
+
|
|
121
|
+
# Buscar en todos los md, pero priorizar lecturas
|
|
122
|
+
for md_file in topic_dir.glob("*.md"):
|
|
123
|
+
try:
|
|
124
|
+
content = md_file.read_text(encoding='utf-8')
|
|
125
|
+
# Extracción simple de frontmatter yaml
|
|
126
|
+
if content.startswith('---'):
|
|
127
|
+
end_fm = content.find('---', 3)
|
|
128
|
+
if end_fm != -1:
|
|
129
|
+
fm_text = content[3:end_fm]
|
|
130
|
+
fm = yaml.safe_load(fm_text)
|
|
131
|
+
if fm and 'keywords' in fm:
|
|
132
|
+
kw = fm['keywords']
|
|
133
|
+
if isinstance(kw, list):
|
|
134
|
+
keywords_set.update(kw)
|
|
135
|
+
elif isinstance(kw, str):
|
|
136
|
+
keywords_set.add(kw)
|
|
137
|
+
except Exception as e:
|
|
138
|
+
logger.warning(f"No se pudo leer keywords de {md_file}: {e}")
|
|
139
|
+
|
|
140
|
+
return sorted(list(keywords_set))
|
|
141
|
+
|
|
142
|
+
def update_config(self):
|
|
143
|
+
"""Actualiza el archivo de configuración."""
|
|
144
|
+
current_config = self.load_current_config()
|
|
145
|
+
|
|
146
|
+
# Descubrir temas
|
|
147
|
+
topics = self.discover_topics()
|
|
148
|
+
logger.info(f"Temas encontrados: {topics}")
|
|
149
|
+
|
|
150
|
+
# Extraer keywords por tema
|
|
151
|
+
topic_keywords = {}
|
|
152
|
+
for topic in topics:
|
|
153
|
+
kws = self.extract_keywords_from_topic(topic)
|
|
154
|
+
if kws:
|
|
155
|
+
topic_keywords[topic] = kws
|
|
156
|
+
logger.info(f"Keywords para {topic}: {len(kws)}")
|
|
157
|
+
else:
|
|
158
|
+
logger.warning(f"No se encontraron keywords para {topic}")
|
|
159
|
+
|
|
160
|
+
# Estructura base si no existe
|
|
161
|
+
if 'paths' not in current_config:
|
|
162
|
+
current_config['paths'] = {'base_path': '..'}
|
|
163
|
+
|
|
164
|
+
if 'exam' not in current_config:
|
|
165
|
+
current_config['exam'] = {
|
|
166
|
+
'default': {
|
|
167
|
+
'subject': "IF3602 - II semestre 2025",
|
|
168
|
+
'points_per_exercise': 25,
|
|
169
|
+
'duration_hours': 2.0
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
# Actualizar valores dinámicos
|
|
174
|
+
current_config['paths']['materials_directories'] = topics
|
|
175
|
+
|
|
176
|
+
if 'keywords' not in current_config['exam']:
|
|
177
|
+
current_config['exam']['keywords'] = {}
|
|
178
|
+
|
|
179
|
+
# Mezclar keywords existentes con nuevas (priorizando las extraídas si se prefiere, o haciendo merge)
|
|
180
|
+
# Aquí reemplazamos para reflejar el estado actual del proyecto
|
|
181
|
+
current_config['exam']['keywords'] = topic_keywords
|
|
182
|
+
|
|
183
|
+
# Guardar configuración
|
|
184
|
+
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
185
|
+
with open(self.config_path, 'w', encoding='utf-8') as f:
|
|
186
|
+
yaml.dump(current_config, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
187
|
+
|
|
188
|
+
logger.info(f"Configuración actualizada en {self.config_path}")
|
|
189
|
+
|
|
190
|
+
def main():
|
|
191
|
+
import argparse
|
|
192
|
+
parser = argparse.ArgumentParser(description='Gestor de configuración automática de Evolutia')
|
|
193
|
+
parser.add_argument('--config', type=str, help='Ruta al archivo de configuración a actualizar')
|
|
194
|
+
parser.add_argument('--base_path', type=str, default='.', help='Ruta base del proyecto')
|
|
195
|
+
args = parser.parse_args()
|
|
196
|
+
|
|
197
|
+
base_path = Path(args.base_path).resolve()
|
|
198
|
+
# Si estamos ejecutando desde evolutia (y no se dio base_path explícito), subir un nivel
|
|
199
|
+
if args.base_path == '.' and base_path.name == 'evolutia':
|
|
200
|
+
base_path = base_path.parent
|
|
201
|
+
|
|
202
|
+
config_path = Path(args.config) if args.config else None
|
|
203
|
+
|
|
204
|
+
manager = ConfigManager(base_path, config_path)
|
|
205
|
+
manager.update_config()
|
|
206
|
+
|
|
207
|
+
if __name__ == '__main__':
|
|
208
|
+
main()
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Motor principal de EvoluIA.
|
|
3
|
+
Encapsula la lógica de orquestación, extracción, análisis y generación paralela.
|
|
4
|
+
"""
|
|
5
|
+
import logging
|
|
6
|
+
import random
|
|
7
|
+
import concurrent.futures
|
|
8
|
+
import time
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import List, Dict, Optional, Tuple, Any
|
|
11
|
+
from tqdm import tqdm
|
|
12
|
+
|
|
13
|
+
# Imports from internal modules
|
|
14
|
+
from .material_extractor import MaterialExtractor
|
|
15
|
+
from .exercise_analyzer import ExerciseAnalyzer
|
|
16
|
+
from .variation_generator import VariationGenerator
|
|
17
|
+
from .complexity_validator import ComplexityValidator
|
|
18
|
+
from .exam_generator import ExamGenerator
|
|
19
|
+
from .config_manager import ConfigManager
|
|
20
|
+
|
|
21
|
+
# Conditional RAG imports
|
|
22
|
+
try:
|
|
23
|
+
from rag.rag_manager import RAGManager
|
|
24
|
+
from rag.enhanced_variation_generator import EnhancedVariationGenerator
|
|
25
|
+
from rag.consistency_validator import ConsistencyValidator
|
|
26
|
+
RAG_AVAILABLE = True
|
|
27
|
+
except ImportError:
|
|
28
|
+
RAG_AVAILABLE = False
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
class EvolutiaEngine:
|
|
33
|
+
"""
|
|
34
|
+
Motor central que coordina el flujo de trabajo de EvolutIA.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, base_path: Path, config_path: Optional[Path] = None):
|
|
38
|
+
self.base_path = base_path
|
|
39
|
+
self.config_path = config_path
|
|
40
|
+
self.rag_manager = None
|
|
41
|
+
|
|
42
|
+
# Load configuration manager
|
|
43
|
+
self.config_manager = ConfigManager(base_path, config_path)
|
|
44
|
+
self.full_config = self.config_manager.load_current_config()
|
|
45
|
+
|
|
46
|
+
def initialize_rag(self, force_reindex: bool = False) -> bool:
|
|
47
|
+
"""Inicializa el subsistema RAG si está disponible."""
|
|
48
|
+
if not RAG_AVAILABLE:
|
|
49
|
+
logger.error("RAG solicitado pero no disponible. Instala dependencias.")
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
self.rag_manager = RAGManager(config_path=self.config_path, base_path=self.base_path)
|
|
54
|
+
self.rag_manager.initialize(force_reindex=force_reindex)
|
|
55
|
+
return True
|
|
56
|
+
except Exception as e:
|
|
57
|
+
logger.error(f"Error inicializando RAG: {e}")
|
|
58
|
+
return False
|
|
59
|
+
|
|
60
|
+
def get_api_config(self, provider: str) -> Dict[str, Any]:
|
|
61
|
+
"""Obtiene la configuración específica para una API."""
|
|
62
|
+
return self.full_config.get('api', {}).get(provider, {})
|
|
63
|
+
|
|
64
|
+
def extract_materials_and_exercises(self, topics: List[str], label_filter: Optional[List[str]] = None) -> Tuple[List[Dict], List[Dict]]:
|
|
65
|
+
"""
|
|
66
|
+
Paso 1 & 2: Extrae materiales y lista todos los ejercicios disponibles.
|
|
67
|
+
"""
|
|
68
|
+
logger.info("Paso 1: Extrayendo materiales didácticos...")
|
|
69
|
+
extractor = MaterialExtractor(self.base_path)
|
|
70
|
+
materials = []
|
|
71
|
+
|
|
72
|
+
# 1. Extract by topic
|
|
73
|
+
if topics:
|
|
74
|
+
for topic in topics:
|
|
75
|
+
topic_materials = extractor.extract_by_topic(topic)
|
|
76
|
+
if topic_materials:
|
|
77
|
+
materials.extend(topic_materials)
|
|
78
|
+
else:
|
|
79
|
+
logger.warning(f"No se encontraron materiales para el tema: {topic}")
|
|
80
|
+
|
|
81
|
+
# 2. Fallback: Search all if no materials found yet or topics were empty (e.g., list mode)
|
|
82
|
+
if not materials:
|
|
83
|
+
logger.info("Buscando en todos los directorios...")
|
|
84
|
+
for topic_dir in self.base_path.iterdir():
|
|
85
|
+
if topic_dir.is_dir() and topic_dir.name not in ['_build', 'evolutia', 'proyecto', '.git']:
|
|
86
|
+
materials.extend(extractor.extract_from_directory(topic_dir))
|
|
87
|
+
|
|
88
|
+
if not materials:
|
|
89
|
+
return [], []
|
|
90
|
+
|
|
91
|
+
logger.info(f"Encontrados {len(materials)} archivos con materiales")
|
|
92
|
+
|
|
93
|
+
# Get exercises
|
|
94
|
+
logger.info("Paso 2: Obteniendo ejercicios...")
|
|
95
|
+
all_exercises = extractor.get_all_exercises(materials)
|
|
96
|
+
|
|
97
|
+
# Filter by label if requested
|
|
98
|
+
if label_filter:
|
|
99
|
+
logger.info(f"Filtrando por labels: {label_filter}")
|
|
100
|
+
filtered = [ex for ex in all_exercises if ex.get('label') in label_filter]
|
|
101
|
+
if not filtered:
|
|
102
|
+
available = [ex.get('label') for ex in all_exercises if ex.get('label')]
|
|
103
|
+
logger.warning(f"No se encontraron ejercicios con los labels solicitados. Disponibles: {available[:10]}...")
|
|
104
|
+
all_exercises = filtered
|
|
105
|
+
|
|
106
|
+
logger.info(f"Encontrados {len(all_exercises)} ejercicios")
|
|
107
|
+
return materials, all_exercises
|
|
108
|
+
|
|
109
|
+
def analyze_exercises(self, exercises: List[Dict]) -> List[Tuple[Dict, Dict]]:
|
|
110
|
+
"""Paso 3: Analiza la complejidad de los ejercicios."""
|
|
111
|
+
logger.info("Paso 3: Analizando complejidad de ejercicios...")
|
|
112
|
+
analyzer = ExerciseAnalyzer()
|
|
113
|
+
exercises_with_analysis = []
|
|
114
|
+
|
|
115
|
+
for exercise in exercises:
|
|
116
|
+
analysis = analyzer.analyze(exercise)
|
|
117
|
+
exercises_with_analysis.append((exercise, analysis))
|
|
118
|
+
|
|
119
|
+
# Sort by total complexity descending
|
|
120
|
+
exercises_with_analysis.sort(key=lambda x: x[1]['total_complexity'], reverse=True)
|
|
121
|
+
return exercises_with_analysis
|
|
122
|
+
|
|
123
|
+
def _generate_single_variation(self, generator, validator, exercise_base, analysis, args) -> Optional[Dict]:
|
|
124
|
+
"""Helper para generar una única variación (thread-safe logic)."""
|
|
125
|
+
attempt_count = 0
|
|
126
|
+
while attempt_count < 3:
|
|
127
|
+
try:
|
|
128
|
+
# Generate
|
|
129
|
+
if args.type == 'multiple_choice':
|
|
130
|
+
variation = generator.generate_variation(
|
|
131
|
+
exercise_base,
|
|
132
|
+
analysis,
|
|
133
|
+
exercise_type=args.type
|
|
134
|
+
)
|
|
135
|
+
elif not args.no_generar_soluciones:
|
|
136
|
+
variation = generator.generate_variation_with_solution(
|
|
137
|
+
exercise_base,
|
|
138
|
+
analysis
|
|
139
|
+
)
|
|
140
|
+
else:
|
|
141
|
+
variation = generator.generate_variation(
|
|
142
|
+
exercise_base,
|
|
143
|
+
analysis,
|
|
144
|
+
exercise_type=args.type
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
if not variation:
|
|
148
|
+
attempt_count += 1
|
|
149
|
+
continue
|
|
150
|
+
|
|
151
|
+
# Validate
|
|
152
|
+
if args.use_rag:
|
|
153
|
+
validation = validator.validate(exercise_base, analysis, variation)
|
|
154
|
+
is_valid = validation['is_valid']
|
|
155
|
+
else:
|
|
156
|
+
validation = validator.validate(exercise_base, analysis, variation)
|
|
157
|
+
is_valid = validation['is_valid']
|
|
158
|
+
|
|
159
|
+
if is_valid:
|
|
160
|
+
return variation
|
|
161
|
+
|
|
162
|
+
except Exception as e:
|
|
163
|
+
logger.error(f"Error en hilo de generación: {e}")
|
|
164
|
+
|
|
165
|
+
attempt_count += 1
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
def _generate_creation_mode(self, generator, topic, tags, complexity, ex_type) -> Optional[Dict]:
|
|
169
|
+
"""Helper para modo creación."""
|
|
170
|
+
try:
|
|
171
|
+
return generator.generate_new_exercise_from_topic(
|
|
172
|
+
topic,
|
|
173
|
+
tags,
|
|
174
|
+
difficulty=complexity,
|
|
175
|
+
exercise_type=ex_type
|
|
176
|
+
)
|
|
177
|
+
except Exception as e:
|
|
178
|
+
logger.error(f"Error en creación de ejercicio nuevo: {e}")
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
def generate_variations_parallel(self,
|
|
182
|
+
selected_exercises: List[Tuple[Dict, Dict]],
|
|
183
|
+
args,
|
|
184
|
+
max_workers: int = 5) -> List[Dict]:
|
|
185
|
+
"""
|
|
186
|
+
Paso 4: Genera variaciones en paralelo.
|
|
187
|
+
"""
|
|
188
|
+
logger.info(f"Paso 4: Generando variaciones en paralelo (Workers: {max_workers})...")
|
|
189
|
+
|
|
190
|
+
# Setup Generator
|
|
191
|
+
api_config = self.get_api_config(args.api)
|
|
192
|
+
|
|
193
|
+
if (args.use_rag and self.rag_manager) or args.mode == 'creation':
|
|
194
|
+
retriever = self.rag_manager.get_retriever() if (args.use_rag and self.rag_manager) else None
|
|
195
|
+
generator = EnhancedVariationGenerator(api_provider=args.api, retriever=retriever)
|
|
196
|
+
validator = ConsistencyValidator(retriever=retriever) if retriever else ComplexityValidator()
|
|
197
|
+
else:
|
|
198
|
+
generator = VariationGenerator(api_provider=args.api)
|
|
199
|
+
validator = ComplexityValidator()
|
|
200
|
+
|
|
201
|
+
# Configure model
|
|
202
|
+
if args.api == 'local':
|
|
203
|
+
generator.base_url = api_config.get('base_url', "http://localhost:11434/v1")
|
|
204
|
+
generator.local_model = api_config.get('model', "llama3")
|
|
205
|
+
elif args.api in ['openai', 'anthropic']:
|
|
206
|
+
if 'model' in api_config:
|
|
207
|
+
generator.model_name = api_config['model']
|
|
208
|
+
|
|
209
|
+
# Determine tasks based on mode
|
|
210
|
+
tasks = []
|
|
211
|
+
|
|
212
|
+
if args.mode == 'creation':
|
|
213
|
+
# Creation Mode Logic
|
|
214
|
+
for i in range(args.num_ejercicios):
|
|
215
|
+
current_topic = args.tema[i % len(args.tema)]
|
|
216
|
+
current_tags = [args.tags[i % len(args.tags)]] if args.tags else [current_topic]
|
|
217
|
+
|
|
218
|
+
tasks.append({
|
|
219
|
+
'func': self._generate_creation_mode,
|
|
220
|
+
'args': (generator, current_topic, current_tags, args.complejidad, args.type)
|
|
221
|
+
})
|
|
222
|
+
else:
|
|
223
|
+
# Variation Mode Logic
|
|
224
|
+
|
|
225
|
+
# If explicit lables, use exactly those
|
|
226
|
+
if args.label:
|
|
227
|
+
target_exercises = list(selected_exercises)
|
|
228
|
+
else:
|
|
229
|
+
# Random selection to fill num_ejercicios
|
|
230
|
+
target_exercises = []
|
|
231
|
+
candidates = selected_exercises[:max(5, len(selected_exercises)//2)]
|
|
232
|
+
for _ in range(args.num_ejercicios):
|
|
233
|
+
if candidates:
|
|
234
|
+
target_exercises.append(random.choice(candidates))
|
|
235
|
+
|
|
236
|
+
for ex_base, analysis in target_exercises:
|
|
237
|
+
tasks.append({
|
|
238
|
+
'func': self._generate_single_variation,
|
|
239
|
+
'args': (generator, validator, ex_base, analysis, args)
|
|
240
|
+
})
|
|
241
|
+
|
|
242
|
+
# Execute Parallel
|
|
243
|
+
valid_variations = []
|
|
244
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
245
|
+
future_to_task = {}
|
|
246
|
+
for t in tasks:
|
|
247
|
+
future = executor.submit(t['func'], *t['args'])
|
|
248
|
+
future_to_task[future] = t
|
|
249
|
+
# Stagger requests to avoid hitting rate limits instantly
|
|
250
|
+
time.sleep(1.0)
|
|
251
|
+
|
|
252
|
+
for future in tqdm(concurrent.futures.as_completed(future_to_task), total=len(tasks), desc="Generando"):
|
|
253
|
+
try:
|
|
254
|
+
result = future.result()
|
|
255
|
+
if result:
|
|
256
|
+
valid_variations.append(result)
|
|
257
|
+
except Exception as e:
|
|
258
|
+
logger.error(f"Excepción no manejada en worker: {e}")
|
|
259
|
+
|
|
260
|
+
logger.info(f"Generación completada. {len(valid_variations)} variaciones exitosas.")
|
|
261
|
+
return valid_variations
|
|
262
|
+
|
|
263
|
+
def generate_exam_files(self, variations: List[Dict], args, output_dir: Path, exam_number: int) -> bool:
|
|
264
|
+
"""Paso 5: Genera los archivos finales del examen."""
|
|
265
|
+
logger.info("Paso 5: Generando archivos de examen...")
|
|
266
|
+
exam_gen = ExamGenerator(self.base_path)
|
|
267
|
+
|
|
268
|
+
keywords = args.keywords or []
|
|
269
|
+
metadata = {
|
|
270
|
+
'model': args.api, # Simplified, internal details hidden
|
|
271
|
+
'provider': args.api,
|
|
272
|
+
'rag_enabled': args.use_rag,
|
|
273
|
+
'mode': args.mode,
|
|
274
|
+
'target_difficulty': args.complejidad
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
return exam_gen.generate_exam(
|
|
278
|
+
variations,
|
|
279
|
+
exam_number,
|
|
280
|
+
output_dir,
|
|
281
|
+
args.subject,
|
|
282
|
+
keywords,
|
|
283
|
+
metadata=metadata
|
|
284
|
+
)
|