evolutia 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolutia/__init__.py +5 -0
- evolutia/complexity_validator.py +179 -0
- evolutia/config_manager.py +208 -0
- evolutia/evolutia_engine.py +284 -0
- evolutia/exam_generator.py +328 -0
- evolutia/exercise_analyzer.py +256 -0
- evolutia/llm_providers.py +217 -0
- evolutia/material_extractor.py +237 -0
- evolutia/rag/__init__.py +6 -0
- evolutia/rag/consistency_validator.py +200 -0
- evolutia/rag/context_enricher.py +285 -0
- evolutia/rag/enhanced_variation_generator.py +349 -0
- evolutia/rag/rag_indexer.py +424 -0
- evolutia/rag/rag_manager.py +221 -0
- evolutia/rag/rag_retriever.py +366 -0
- evolutia/utils/__init__.py +4 -0
- evolutia/utils/json_parser.py +69 -0
- evolutia/utils/markdown_parser.py +160 -0
- evolutia/utils/math_extractor.py +144 -0
- evolutia/variation_generator.py +97 -0
- evolutia-0.1.0.dist-info/METADATA +723 -0
- evolutia-0.1.0.dist-info/RECORD +27 -0
- evolutia-0.1.0.dist-info/WHEEL +5 -0
- evolutia-0.1.0.dist-info/entry_points.txt +2 -0
- evolutia-0.1.0.dist-info/licenses/LICENSE +201 -0
- evolutia-0.1.0.dist-info/top_level.txt +2 -0
- evolutia_cli.py +160 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Módulo que define los proveedores de LLM abstractos y concretos.
|
|
3
|
+
"""
|
|
4
|
+
import os
|
|
5
|
+
import logging
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from typing import Optional, Dict, Any
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
class LLMProvider(ABC):
|
|
12
|
+
"""Clase base abstracta para proveedores de LLM."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, model_name: Optional[str] = None):
|
|
15
|
+
self.model_name = model_name
|
|
16
|
+
self.api_key = self._get_api_key()
|
|
17
|
+
if self.api_key:
|
|
18
|
+
self._setup_client()
|
|
19
|
+
else:
|
|
20
|
+
# Some providers like local might not strictly need an API key from env
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def _get_api_key(self) -> Optional[str]:
|
|
25
|
+
"""Obtiene la API key de las variables de entorno."""
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
@abstractmethod
|
|
29
|
+
def _setup_client(self):
|
|
30
|
+
"""Configura el cliente de la API."""
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
35
|
+
"""Genera contenido a partir de un prompt."""
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class OpenAIProvider(LLMProvider):
|
|
40
|
+
"""Proveedor para OpenAI."""
|
|
41
|
+
|
|
42
|
+
def _get_api_key(self) -> Optional[str]:
|
|
43
|
+
key = os.getenv("OPENAI_API_KEY")
|
|
44
|
+
if not key:
|
|
45
|
+
logger.warning("OPENAI_API_KEY no encontrada")
|
|
46
|
+
return key
|
|
47
|
+
|
|
48
|
+
def _setup_client(self):
|
|
49
|
+
try:
|
|
50
|
+
from openai import OpenAI
|
|
51
|
+
self.client = OpenAI(api_key=self.api_key)
|
|
52
|
+
except ImportError:
|
|
53
|
+
logger.error("Biblioteca openai no instalada. Instala con: pip install openai")
|
|
54
|
+
self.client = None
|
|
55
|
+
|
|
56
|
+
def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
57
|
+
if not self.client: return None
|
|
58
|
+
|
|
59
|
+
system_content = system_prompt or "Eres un experto en métodos matemáticos para física e ingeniería."
|
|
60
|
+
model = self.model_name or "gpt-4"
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
response = self.client.chat.completions.create(
|
|
64
|
+
model=model,
|
|
65
|
+
messages=[
|
|
66
|
+
{"role": "system", "content": system_content},
|
|
67
|
+
{"role": "user", "content": prompt}
|
|
68
|
+
],
|
|
69
|
+
temperature=kwargs.get("temperature", 0.7),
|
|
70
|
+
max_tokens=kwargs.get("max_tokens", 2000)
|
|
71
|
+
)
|
|
72
|
+
return response.choices[0].message.content.strip()
|
|
73
|
+
except Exception as e:
|
|
74
|
+
logger.error(f"Error llamando a OpenAI API: {e}")
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class AnthropicProvider(LLMProvider):
|
|
79
|
+
"""Proveedor para Anthropic (Claude)."""
|
|
80
|
+
|
|
81
|
+
def _get_api_key(self) -> Optional[str]:
|
|
82
|
+
key = os.getenv("ANTHROPIC_API_KEY")
|
|
83
|
+
if not key:
|
|
84
|
+
logger.warning("ANTHROPIC_API_KEY no encontrada")
|
|
85
|
+
return key
|
|
86
|
+
|
|
87
|
+
def _setup_client(self):
|
|
88
|
+
try:
|
|
89
|
+
import anthropic
|
|
90
|
+
self.client = anthropic.Anthropic(api_key=self.api_key)
|
|
91
|
+
except ImportError:
|
|
92
|
+
logger.error("Biblioteca anthropic no instalada. Instala con: pip install anthropic")
|
|
93
|
+
self.client = None
|
|
94
|
+
|
|
95
|
+
def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
96
|
+
if not self.client: return None
|
|
97
|
+
|
|
98
|
+
system_content = system_prompt or "Eres un experto en métodos matemáticos para física e ingeniería."
|
|
99
|
+
model = self.model_name or "claude-3-opus-20240229"
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
message = self.client.messages.create(
|
|
103
|
+
model=model,
|
|
104
|
+
max_tokens=kwargs.get("max_tokens", 2000),
|
|
105
|
+
temperature=kwargs.get("temperature", 0.7),
|
|
106
|
+
system=system_content,
|
|
107
|
+
messages=[{"role": "user", "content": prompt}]
|
|
108
|
+
)
|
|
109
|
+
return message.content[0].text.strip()
|
|
110
|
+
except Exception as e:
|
|
111
|
+
logger.error(f"Error llamando a Anthropic API: {e}")
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class GeminiProvider(LLMProvider):
|
|
116
|
+
"""Proveedor para Google Gemini."""
|
|
117
|
+
|
|
118
|
+
def _get_api_key(self) -> Optional[str]:
|
|
119
|
+
key = os.getenv("GOOGLE_API_KEY")
|
|
120
|
+
if not key:
|
|
121
|
+
logger.warning("GOOGLE_API_KEY no encontrada")
|
|
122
|
+
return key
|
|
123
|
+
|
|
124
|
+
def _setup_client(self):
|
|
125
|
+
try:
|
|
126
|
+
import google.generativeai as genai
|
|
127
|
+
genai.configure(api_key=self.api_key)
|
|
128
|
+
self.genai = genai
|
|
129
|
+
except ImportError:
|
|
130
|
+
logger.error("Biblioteca google-generativeai no instalada")
|
|
131
|
+
self.genai = None
|
|
132
|
+
|
|
133
|
+
def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
134
|
+
if not self.genai: return None
|
|
135
|
+
|
|
136
|
+
model_name = self.model_name or "gemini-2.5-pro"
|
|
137
|
+
if model_name == 'gemini': model_name = "gemini-2.5-pro"
|
|
138
|
+
|
|
139
|
+
generation_config = {
|
|
140
|
+
"temperature": kwargs.get("temperature", 0.7),
|
|
141
|
+
"top_p": 0.95,
|
|
142
|
+
"top_k": 64,
|
|
143
|
+
"max_output_tokens": kwargs.get("max_tokens", 8192),
|
|
144
|
+
"response_mime_type": "text/plain",
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
model_instance = self.genai.GenerativeModel(
|
|
149
|
+
model_name=model_name,
|
|
150
|
+
generation_config=generation_config,
|
|
151
|
+
# System instructions can be passed to model if supported,
|
|
152
|
+
# or prepended to prompt. Gemini 1.5 supports system_instruction arg.
|
|
153
|
+
system_instruction=system_prompt
|
|
154
|
+
)
|
|
155
|
+
response = model_instance.generate_content(prompt)
|
|
156
|
+
return response.text
|
|
157
|
+
except Exception as e:
|
|
158
|
+
logger.error(f"Error llamando a Gemini API: {e}")
|
|
159
|
+
return None
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class LocalProvider(LLMProvider):
|
|
163
|
+
"""Proveedor para modelos locales (Ollama/LM Studio) vía OpenAI compatible API."""
|
|
164
|
+
|
|
165
|
+
def __init__(self, model_name: Optional[str] = None, base_url: str = "http://localhost:11434/v1"):
|
|
166
|
+
self.base_url = base_url
|
|
167
|
+
super().__init__(model_name)
|
|
168
|
+
|
|
169
|
+
def _get_api_key(self) -> Optional[str]:
|
|
170
|
+
return "not-needed"
|
|
171
|
+
|
|
172
|
+
def _setup_client(self):
|
|
173
|
+
try:
|
|
174
|
+
from openai import OpenAI
|
|
175
|
+
self.client = OpenAI(
|
|
176
|
+
base_url=self.base_url,
|
|
177
|
+
api_key=self.api_key,
|
|
178
|
+
timeout=300.0
|
|
179
|
+
)
|
|
180
|
+
except ImportError:
|
|
181
|
+
logger.error("Biblioteca openai no instalada")
|
|
182
|
+
self.client = None
|
|
183
|
+
|
|
184
|
+
def generate_content(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
185
|
+
if not self.client: return None
|
|
186
|
+
|
|
187
|
+
system_content = system_prompt or "Eres un experto en métodos matemáticos para física e ingeniería."
|
|
188
|
+
model = self.model_name or "llama3"
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
response = self.client.chat.completions.create(
|
|
192
|
+
model=model,
|
|
193
|
+
messages=[
|
|
194
|
+
{"role": "system", "content": system_content},
|
|
195
|
+
{"role": "user", "content": prompt}
|
|
196
|
+
],
|
|
197
|
+
temperature=kwargs.get("temperature", 0.7),
|
|
198
|
+
max_tokens=kwargs.get("max_tokens", 2000)
|
|
199
|
+
)
|
|
200
|
+
return response.choices[0].message.content.strip()
|
|
201
|
+
except Exception as e:
|
|
202
|
+
logger.error(f"Error llamando a Local API: {e}")
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def get_provider(provider_name: str, **kwargs) -> LLMProvider:
|
|
207
|
+
"""Factory method para obtener un proveedor."""
|
|
208
|
+
if provider_name == "openai":
|
|
209
|
+
return OpenAIProvider(**kwargs)
|
|
210
|
+
elif provider_name == "anthropic":
|
|
211
|
+
return AnthropicProvider(**kwargs)
|
|
212
|
+
elif provider_name == "gemini":
|
|
213
|
+
return GeminiProvider(**kwargs)
|
|
214
|
+
elif provider_name == "local":
|
|
215
|
+
return LocalProvider(**kwargs)
|
|
216
|
+
else:
|
|
217
|
+
raise ValueError(f"Proveedor desconocido: {provider_name}")
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Extractor de materiales didácticos.
|
|
3
|
+
Lee y parsea archivos Markdown de lecturas, prácticas y tareas.
|
|
4
|
+
"""
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, List, Optional
|
|
7
|
+
import logging
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from utils.markdown_parser import (
|
|
11
|
+
read_markdown_file,
|
|
12
|
+
extract_frontmatter,
|
|
13
|
+
extract_exercise_blocks,
|
|
14
|
+
extract_solution_blocks,
|
|
15
|
+
resolve_include_path
|
|
16
|
+
)
|
|
17
|
+
except ImportError:
|
|
18
|
+
from .utils.markdown_parser import (
|
|
19
|
+
read_markdown_file,
|
|
20
|
+
extract_frontmatter,
|
|
21
|
+
extract_exercise_blocks,
|
|
22
|
+
extract_solution_blocks,
|
|
23
|
+
resolve_include_path
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class MaterialExtractor:
|
|
31
|
+
"""Extrae ejercicios y soluciones de materiales didácticos."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, base_path: Path):
|
|
34
|
+
"""
|
|
35
|
+
Inicializa el extractor.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
base_path: Ruta base del proyecto (donde están los directorios de temas)
|
|
39
|
+
"""
|
|
40
|
+
self.base_path = Path(base_path)
|
|
41
|
+
self.exercises = []
|
|
42
|
+
self.solutions = []
|
|
43
|
+
|
|
44
|
+
def extract_from_file(self, file_path: Path) -> Dict:
|
|
45
|
+
"""
|
|
46
|
+
Extrae ejercicios y soluciones de un archivo Markdown.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
file_path: Ruta al archivo
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Diccionario con ejercicios y soluciones extraídos
|
|
53
|
+
"""
|
|
54
|
+
try:
|
|
55
|
+
content = read_markdown_file(file_path)
|
|
56
|
+
frontmatter, content_body = extract_frontmatter(content)
|
|
57
|
+
|
|
58
|
+
exercises = extract_exercise_blocks(content_body)
|
|
59
|
+
solutions = extract_solution_blocks(content_body)
|
|
60
|
+
|
|
61
|
+
# Resolver includes de ejercicios
|
|
62
|
+
for exercise in exercises:
|
|
63
|
+
if exercise['include_path']:
|
|
64
|
+
include_path = resolve_include_path(
|
|
65
|
+
exercise['include_path'],
|
|
66
|
+
file_path.parent
|
|
67
|
+
)
|
|
68
|
+
if include_path.exists():
|
|
69
|
+
exercise['resolved_content'] = read_markdown_file(include_path)
|
|
70
|
+
else:
|
|
71
|
+
logger.warning(f"Include no encontrado: {include_path}")
|
|
72
|
+
exercise['resolved_content'] = exercise['content']
|
|
73
|
+
else:
|
|
74
|
+
exercise['resolved_content'] = exercise['content']
|
|
75
|
+
|
|
76
|
+
# Resolver includes de soluciones
|
|
77
|
+
for solution in solutions:
|
|
78
|
+
resolved_content_parts = []
|
|
79
|
+
for include_path_str in solution['include_paths']:
|
|
80
|
+
include_path = resolve_include_path(
|
|
81
|
+
include_path_str,
|
|
82
|
+
file_path.parent
|
|
83
|
+
)
|
|
84
|
+
if include_path.exists():
|
|
85
|
+
resolved_content_parts.append(read_markdown_file(include_path))
|
|
86
|
+
else:
|
|
87
|
+
logger.warning(f"Include no encontrado: {include_path}")
|
|
88
|
+
|
|
89
|
+
if resolved_content_parts:
|
|
90
|
+
solution['resolved_content'] = '\n\n---\n\n'.join(resolved_content_parts)
|
|
91
|
+
else:
|
|
92
|
+
solution['resolved_content'] = solution['content']
|
|
93
|
+
|
|
94
|
+
return {
|
|
95
|
+
'file_path': file_path,
|
|
96
|
+
'frontmatter': frontmatter,
|
|
97
|
+
'exercises': exercises,
|
|
98
|
+
'solutions': solutions,
|
|
99
|
+
'content_body': content_body # Exponer contenido para indexación de lecturas
|
|
100
|
+
}
|
|
101
|
+
except Exception as e:
|
|
102
|
+
logger.error(f"Error extrayendo de {file_path}: {e}")
|
|
103
|
+
return {
|
|
104
|
+
'file_path': file_path,
|
|
105
|
+
'frontmatter': {},
|
|
106
|
+
'exercises': [],
|
|
107
|
+
'solutions': []
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
def extract_from_directory(self, directory: Path, pattern: str = "*.md") -> List[Dict]:
|
|
111
|
+
"""
|
|
112
|
+
Extrae materiales de todos los archivos .md en un directorio.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
directory: Directorio a procesar
|
|
116
|
+
pattern: Patrón de búsqueda de archivos
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Lista de diccionarios con materiales extraídos
|
|
120
|
+
"""
|
|
121
|
+
directory = Path(directory)
|
|
122
|
+
if not directory.exists():
|
|
123
|
+
logger.warning(f"Directorio no existe: {directory}")
|
|
124
|
+
return []
|
|
125
|
+
|
|
126
|
+
materials = []
|
|
127
|
+
for md_file in directory.rglob(pattern):
|
|
128
|
+
# Ignorar archivos en _build y otros directorios temporales
|
|
129
|
+
if '_build' in md_file.parts or 'node_modules' in md_file.parts:
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
material = self.extract_from_file(md_file)
|
|
133
|
+
# Incluirlos si tienen ejercicios/soluciones O si parecen ser materiales de lectura/teoría
|
|
134
|
+
if material['exercises'] or material['solutions'] or 'lectura' in md_file.name.lower() or 'teoria' in md_file.name.lower():
|
|
135
|
+
materials.append(material)
|
|
136
|
+
|
|
137
|
+
return materials
|
|
138
|
+
|
|
139
|
+
def extract_by_topic(self, topic: str) -> List[Dict]:
|
|
140
|
+
"""
|
|
141
|
+
Extrae materiales de un tema específico.
|
|
142
|
+
|
|
143
|
+
Busca en:
|
|
144
|
+
- {topic}/semana*_practica.md
|
|
145
|
+
- {topic}/semana*_lectura.md
|
|
146
|
+
- tareas/tarea*/tarea*.md
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
topic: Nombre del tema (ej: "analisis_vectorial")
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Lista de materiales extraídos
|
|
153
|
+
"""
|
|
154
|
+
materials = []
|
|
155
|
+
|
|
156
|
+
# Buscar en directorio del tema
|
|
157
|
+
topic_dir = self.base_path / topic
|
|
158
|
+
if topic_dir.exists():
|
|
159
|
+
# Buscar prácticas
|
|
160
|
+
practice_files = list(topic_dir.glob("*practica*.md"))
|
|
161
|
+
for file in practice_files:
|
|
162
|
+
materials.append(self.extract_from_file(file))
|
|
163
|
+
|
|
164
|
+
# Buscar lecturas (pueden tener ejercicios)
|
|
165
|
+
reading_files = list(topic_dir.glob("*lectura*.md"))
|
|
166
|
+
for file in reading_files:
|
|
167
|
+
materials.append(self.extract_from_file(file))
|
|
168
|
+
|
|
169
|
+
# Buscar en tareas (pueden ser de múltiples temas)
|
|
170
|
+
tareas_dir = self.base_path / "tareas"
|
|
171
|
+
if tareas_dir.exists():
|
|
172
|
+
for tarea_dir in tareas_dir.iterdir():
|
|
173
|
+
if tarea_dir.is_dir():
|
|
174
|
+
tarea_file = tarea_dir / f"{tarea_dir.name}.md"
|
|
175
|
+
if tarea_file.exists():
|
|
176
|
+
material = self.extract_from_file(tarea_file)
|
|
177
|
+
# Filtrar por tema si es relevante (checking subject or tags)
|
|
178
|
+
subject_match = material['frontmatter'].get('subject', '').lower().find(topic.lower()) != -1
|
|
179
|
+
tags_match = any(topic.lower() in tag.lower() for tag in material['frontmatter'].get('tags', []))
|
|
180
|
+
if subject_match or tags_match:
|
|
181
|
+
materials.append(material)
|
|
182
|
+
|
|
183
|
+
# Buscar en examenes (pueden ser de múltiples temas)
|
|
184
|
+
examenes_dir = self.base_path / "examenes"
|
|
185
|
+
if examenes_dir.exists():
|
|
186
|
+
for examen_dir in examenes_dir.iterdir():
|
|
187
|
+
if examen_dir.is_dir():
|
|
188
|
+
examen_file = examen_dir / f"{examen_dir.name}.md"
|
|
189
|
+
if examen_file.exists():
|
|
190
|
+
material = self.extract_from_file(examen_file)
|
|
191
|
+
# Filtrar por tema si es relevante
|
|
192
|
+
subject_match = material['frontmatter'].get('subject', '').lower().find(topic.lower()) != -1
|
|
193
|
+
tags_match = any(topic.lower() in tag.lower() for tag in material['frontmatter'].get('tags', []))
|
|
194
|
+
|
|
195
|
+
# Si es examen, a veces no tiene subject especifico o tiene "Examen X".
|
|
196
|
+
# Si no hay match explícito, tal vez incluirlo si no se encontraron otros materiales?
|
|
197
|
+
# Para seguridad, requerimos algún match en subject, tags o keywords
|
|
198
|
+
keywords_match = any(topic.lower() in kw.lower() for kw in material['frontmatter'].get('keywords', []))
|
|
199
|
+
|
|
200
|
+
if subject_match or tags_match or keywords_match:
|
|
201
|
+
materials.append(material)
|
|
202
|
+
|
|
203
|
+
return materials
|
|
204
|
+
|
|
205
|
+
def get_all_exercises(self, materials: List[Dict]) -> List[Dict]:
|
|
206
|
+
"""
|
|
207
|
+
Obtiene todos los ejercicios de una lista de materiales.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
materials: Lista de materiales extraídos
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
Lista de ejercicios con sus metadatos
|
|
214
|
+
"""
|
|
215
|
+
all_exercises = []
|
|
216
|
+
|
|
217
|
+
for material in materials:
|
|
218
|
+
for exercise in material['exercises']:
|
|
219
|
+
# Buscar solución correspondiente
|
|
220
|
+
solution = None
|
|
221
|
+
for sol in material['solutions']:
|
|
222
|
+
if sol['exercise_label'] == exercise['label']:
|
|
223
|
+
solution = sol
|
|
224
|
+
break
|
|
225
|
+
|
|
226
|
+
exercise_data = {
|
|
227
|
+
'label': exercise['label'],
|
|
228
|
+
'content': exercise['resolved_content'],
|
|
229
|
+
'source_file': material['file_path'],
|
|
230
|
+
'frontmatter': material['frontmatter'],
|
|
231
|
+
'solution': solution['resolved_content'] if solution else None,
|
|
232
|
+
'solution_label': solution['label'] if solution else None
|
|
233
|
+
}
|
|
234
|
+
all_exercises.append(exercise_data)
|
|
235
|
+
|
|
236
|
+
return all_exercises
|
|
237
|
+
|
evolutia/rag/__init__.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Consistency Validator: Valida consistencia usando RAG.
|
|
3
|
+
"""
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from complexity_validator import ComplexityValidator
|
|
9
|
+
except ImportError:
|
|
10
|
+
try:
|
|
11
|
+
from ..complexity_validator import ComplexityValidator
|
|
12
|
+
except ImportError:
|
|
13
|
+
import sys
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
16
|
+
from complexity_validator import ComplexityValidator
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from rag.rag_retriever import RAGRetriever
|
|
20
|
+
from rag.context_enricher import ContextEnricher
|
|
21
|
+
except ImportError:
|
|
22
|
+
try:
|
|
23
|
+
from .rag_retriever import RAGRetriever
|
|
24
|
+
from .context_enricher import ContextEnricher
|
|
25
|
+
except ImportError:
|
|
26
|
+
from rag_retriever import RAGRetriever
|
|
27
|
+
from context_enricher import ContextEnricher
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ConsistencyValidator(ComplexityValidator):
|
|
33
|
+
"""Valida consistencia usando RAG además de validación de complejidad."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, retriever: RAGRetriever = None, context_enricher: ContextEnricher = None):
|
|
36
|
+
"""
|
|
37
|
+
Inicializa el validador de consistencia.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
retriever: Instancia de RAGRetriever
|
|
41
|
+
context_enricher: Instancia de ContextEnricher
|
|
42
|
+
"""
|
|
43
|
+
super().__init__()
|
|
44
|
+
self.retriever = retriever
|
|
45
|
+
self.context_enricher = context_enricher or ContextEnricher()
|
|
46
|
+
|
|
47
|
+
def validate_consistency(self, variation_content: str, original_exercise: Dict,
|
|
48
|
+
original_analysis: Dict) -> Dict:
|
|
49
|
+
"""
|
|
50
|
+
Valida consistencia de la variación con ejercicios similares del curso.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
variation_content: Contenido de la variación generada
|
|
54
|
+
original_exercise: Ejercicio original
|
|
55
|
+
original_analysis: Análisis del ejercicio original
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Diccionario con resultados de validación de consistencia
|
|
59
|
+
"""
|
|
60
|
+
if not self.retriever:
|
|
61
|
+
return {
|
|
62
|
+
'is_consistent': True,
|
|
63
|
+
'reason': 'RAG no disponible, saltando validación de consistencia',
|
|
64
|
+
'similarity_scores': [],
|
|
65
|
+
'warnings': []
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
# Buscar ejercicios similares a la variación
|
|
70
|
+
similar_exercises = self.retriever.retrieve_similar_exercises(
|
|
71
|
+
variation_content,
|
|
72
|
+
exclude_label=original_exercise.get('label'),
|
|
73
|
+
top_k=5
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
if not similar_exercises:
|
|
77
|
+
return {
|
|
78
|
+
'is_consistent': True,
|
|
79
|
+
'reason': 'No se encontraron ejercicios similares para comparar',
|
|
80
|
+
'similarity_scores': [],
|
|
81
|
+
'warnings': []
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
# Analizar similitudes
|
|
85
|
+
similarity_scores = [ex.get('similarity', 0) for ex in similar_exercises]
|
|
86
|
+
avg_similarity = sum(similarity_scores) / len(similarity_scores) if similarity_scores else 0
|
|
87
|
+
|
|
88
|
+
# Verificar consistencia de complejidad
|
|
89
|
+
complexity_warnings = []
|
|
90
|
+
variation_complexity = original_analysis.get('total_complexity', 0) * 1.2 # Estimación
|
|
91
|
+
|
|
92
|
+
for exercise in similar_exercises[:3]: # Top 3
|
|
93
|
+
ex_complexity = float(exercise.get('metadata', {}).get('complexity', 0))
|
|
94
|
+
if ex_complexity > 0:
|
|
95
|
+
diff = abs(variation_complexity - ex_complexity) / ex_complexity
|
|
96
|
+
if diff > 0.5: # Más del 50% de diferencia
|
|
97
|
+
complexity_warnings.append(
|
|
98
|
+
f"Complejidad muy diferente de ejercicio similar "
|
|
99
|
+
f"(variación: {variation_complexity:.2f}, similar: {ex_complexity:.2f})"
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Verificar consistencia de conceptos
|
|
103
|
+
concept_warnings = []
|
|
104
|
+
original_concepts = set(original_analysis.get('concepts', []))
|
|
105
|
+
|
|
106
|
+
for exercise in similar_exercises[:3]:
|
|
107
|
+
ex_concepts = set(
|
|
108
|
+
exercise.get('metadata', {}).get('concepts', '').split(',')
|
|
109
|
+
if exercise.get('metadata', {}).get('concepts') else []
|
|
110
|
+
)
|
|
111
|
+
ex_concepts = {c.strip() for c in ex_concepts if c.strip()}
|
|
112
|
+
|
|
113
|
+
# Verificar si hay conceptos muy diferentes
|
|
114
|
+
if ex_concepts and original_concepts:
|
|
115
|
+
overlap = len(original_concepts & ex_concepts) / len(original_concepts | ex_concepts)
|
|
116
|
+
if overlap < 0.3: # Menos del 30% de overlap
|
|
117
|
+
concept_warnings.append(
|
|
118
|
+
f"Conceptos muy diferentes de ejercicios similares "
|
|
119
|
+
f"(overlap: {overlap:.2f})"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Determinar si es consistente
|
|
123
|
+
is_consistent = (
|
|
124
|
+
avg_similarity >= 0.5 and # Al menos 50% de similitud promedio
|
|
125
|
+
len(complexity_warnings) < 2 and # No demasiadas advertencias de complejidad
|
|
126
|
+
len(concept_warnings) < 2 # No demasiadas advertencias de conceptos
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
return {
|
|
130
|
+
'is_consistent': is_consistent,
|
|
131
|
+
'avg_similarity': avg_similarity,
|
|
132
|
+
'similarity_scores': similarity_scores,
|
|
133
|
+
'similar_exercises_count': len(similar_exercises),
|
|
134
|
+
'complexity_warnings': complexity_warnings,
|
|
135
|
+
'concept_warnings': concept_warnings,
|
|
136
|
+
'warnings': complexity_warnings + concept_warnings
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
except Exception as e:
|
|
140
|
+
logger.error(f"Error en validación de consistencia: {e}")
|
|
141
|
+
return {
|
|
142
|
+
'is_consistent': True, # Por defecto, asumir consistente si hay error
|
|
143
|
+
'reason': f'Error en validación: {str(e)}',
|
|
144
|
+
'similarity_scores': [],
|
|
145
|
+
'warnings': []
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
def validate(self, original_exercise: Dict, original_analysis: Dict,
|
|
149
|
+
variation: Dict) -> Dict:
|
|
150
|
+
"""
|
|
151
|
+
Valida variación usando tanto complejidad como consistencia RAG.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
original_exercise: Ejercicio original
|
|
155
|
+
original_analysis: Análisis del ejercicio original
|
|
156
|
+
variation: Variación generada
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
Diccionario con validación completa
|
|
160
|
+
"""
|
|
161
|
+
# Primero validar complejidad (método del padre)
|
|
162
|
+
complexity_validation = super().validate(
|
|
163
|
+
original_exercise,
|
|
164
|
+
original_analysis,
|
|
165
|
+
variation
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Luego validar consistencia con RAG
|
|
169
|
+
variation_content = variation.get('variation_content', '')
|
|
170
|
+
consistency_validation = self.validate_consistency(
|
|
171
|
+
variation_content,
|
|
172
|
+
original_exercise,
|
|
173
|
+
original_analysis
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Combinar resultados
|
|
177
|
+
combined_validation = {
|
|
178
|
+
**complexity_validation,
|
|
179
|
+
'consistency': consistency_validation,
|
|
180
|
+
'is_valid': (
|
|
181
|
+
complexity_validation.get('is_valid', False) and
|
|
182
|
+
consistency_validation.get('is_consistent', True) # Consistencia es opcional
|
|
183
|
+
)
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
# Agregar advertencias de consistencia
|
|
187
|
+
if consistency_validation.get('warnings'):
|
|
188
|
+
combined_validation['warnings'].extend(
|
|
189
|
+
[f"Consistencia: {w}" for w in consistency_validation['warnings']]
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Agregar información de similitud
|
|
193
|
+
if consistency_validation.get('avg_similarity'):
|
|
194
|
+
combined_validation['rag_avg_similarity'] = consistency_validation['avg_similarity']
|
|
195
|
+
combined_validation['improvements'].append(
|
|
196
|
+
f"Similitud promedio con ejercicios del curso: {consistency_validation['avg_similarity']:.2f}"
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
return combined_validation
|
|
200
|
+
|