@saulwade/swl-ses 1.1.4 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +2 -2
- package/README.md +3 -3
- package/bin/swl-mcp-server.js +187 -0
- package/habilidades/benchmark-memoria/SKILL.md +186 -0
- package/habilidades/contenedores-docker/SKILL.md +8 -1
- package/habilidades/datos-etl/SKILL.md +18 -1
- package/habilidades/eval-framework/SKILL.md +212 -0
- package/habilidades/memoria-busqueda/SKILL.md +24 -1
- package/habilidades/planear-fase/SKILL.md +299 -269
- package/habilidades/postgresql-experto/SKILL.md +24 -1
- package/habilidades/verificar-trabajo/SKILL.md +7 -1
- package/hooks/lib/evolution-tracker.js +65 -11
- package/hooks/lib/memory-search.js +44 -13
- package/hooks/sugerir-contribuir.js +226 -0
- package/manifiestos/hooks-config.json +9 -0
- package/manifiestos/modulos.json +33 -1
- package/manifiestos/perfiles.json +2 -1
- package/package.json +4 -3
- package/plugin.json +343 -343
- package/scripts/benchmark-memoria.js +167 -0
- package/scripts/detectar-aprendizajes-duplicados.js +151 -0
- package/scripts/lib/benchmark-metrics.js +160 -0
- package/scripts/lib/eval-metrics-store.js +218 -0
- package/scripts/lib/eval-quality.js +171 -0
- package/scripts/lib/eval-schemas.js +144 -0
- package/scripts/lib/eval-self-correct.js +106 -0
- package/scripts/lib/eval-validator.js +185 -0
- package/scripts/lib/jaccard-similarity.js +98 -0
- package/scripts/lib/longmemeval-runner.js +125 -0
- package/scripts/lib/rrf-fusion.js +175 -0
- package/scripts/lib/scoring-instintos.js +40 -3
- package/scripts/mcp-server/README.md +128 -0
- package/scripts/mcp-server/handlers.js +206 -0
- package/scripts/run-eval.js +141 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* handlers.js — Handlers para los 3 endpoints MCP stub de swl-ses.
|
|
5
|
+
*
|
|
6
|
+
* **EXPERIMENTAL** — no producción. Sin auth, sin rate limiting, sin
|
|
7
|
+
* tests robustos. Ver `scripts/mcp-server/README.md` para limitaciones.
|
|
8
|
+
*
|
|
9
|
+
* Los handlers leen el estado file-based de swl-ses (APRENDIZAJES.md,
|
|
10
|
+
* .planning/sessions/, instintos/proyecto.yaml) y devuelven datos
|
|
11
|
+
* estructurados al cliente MCP. NO escriben — solo lectura.
|
|
12
|
+
*
|
|
13
|
+
* @module scripts/mcp-server/handlers
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
const fs = require('fs');
|
|
17
|
+
const path = require('path');
|
|
18
|
+
|
|
19
|
+
const memorySearch = require('../../hooks/lib/memory-search');
|
|
20
|
+
const scoringInstintos = require('../lib/scoring-instintos');
|
|
21
|
+
|
|
22
|
+
// ── handler: swl_memory_search ────────────────────────────────────────────────
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Búsqueda hybrid sobre memoria SWL (aprendizajes + sesiones + instintos).
|
|
26
|
+
*
|
|
27
|
+
* @param {object} args - { query: string, limit?: number, tipo?: string }
|
|
28
|
+
* @returns {object} { results: Array, count: number }
|
|
29
|
+
*/
|
|
30
|
+
function swlMemorySearch(baseDir, args) {
|
|
31
|
+
if (!args || typeof args.query !== 'string' || !args.query.trim()) {
|
|
32
|
+
return { error: 'query (string) requerido', results: [] };
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const filtros = {};
|
|
36
|
+
if (typeof args.limit === 'number' && args.limit > 0) filtros.limit = Math.min(args.limit, 50);
|
|
37
|
+
if (typeof args.tipo === 'string') filtros.tipo = args.tipo;
|
|
38
|
+
|
|
39
|
+
const results = memorySearch.search(baseDir, args.query, filtros);
|
|
40
|
+
return {
|
|
41
|
+
results: results.map(r => ({
|
|
42
|
+
id: r.id,
|
|
43
|
+
tipo: r.tipo,
|
|
44
|
+
titulo: r.titulo,
|
|
45
|
+
fecha: r.fecha,
|
|
46
|
+
relevancia: r.relevancia,
|
|
47
|
+
combinedScore: r.combinedScore,
|
|
48
|
+
})),
|
|
49
|
+
count: results.length,
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// ── handler: swl_aprendizajes_recientes ───────────────────────────────────────
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Devuelve los N aprendizajes más recientes de APRENDIZAJES.md.
|
|
57
|
+
*
|
|
58
|
+
* @param {object} args - { limit?: number (default 10) }
|
|
59
|
+
* @returns {object} { results, count }
|
|
60
|
+
*/
|
|
61
|
+
function swlAprendizajesRecientes(baseDir, args = {}) {
|
|
62
|
+
const limit = (typeof args.limit === 'number' && args.limit > 0)
|
|
63
|
+
? Math.min(args.limit, 50)
|
|
64
|
+
: 10;
|
|
65
|
+
|
|
66
|
+
const ruta = path.join(baseDir, '.planning', 'APRENDIZAJES.md');
|
|
67
|
+
if (!fs.existsSync(ruta)) {
|
|
68
|
+
return { error: 'APRENDIZAJES.md no encontrado', results: [] };
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
let contenido;
|
|
72
|
+
try {
|
|
73
|
+
contenido = fs.readFileSync(ruta, 'utf8');
|
|
74
|
+
} catch (err) {
|
|
75
|
+
return { error: 'Error de lectura: ' + err.message, results: [] };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const bloques = contenido.split(/^## /m).filter(b => b.trim().length > 0);
|
|
79
|
+
// Los más recientes están al FINAL del archivo (append-only por convención)
|
|
80
|
+
const recientes = bloques.slice(-limit).reverse();
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
results: recientes.map((b, i) => {
|
|
84
|
+
const primeraLinea = b.split('\n')[0].trim();
|
|
85
|
+
const cuerpoTrim = b.split('\n').slice(1).join('\n').trim().slice(0, 500);
|
|
86
|
+
return {
|
|
87
|
+
index: bloques.length - i,
|
|
88
|
+
titulo: primeraLinea,
|
|
89
|
+
contenido: cuerpoTrim,
|
|
90
|
+
};
|
|
91
|
+
}),
|
|
92
|
+
count: recientes.length,
|
|
93
|
+
total: bloques.length,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// ── handler: swl_instintos_activos ────────────────────────────────────────────
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Devuelve instintos con effective_confidence ≥ umbral.
|
|
101
|
+
*
|
|
102
|
+
* @param {object} args - { minConfidence?: number (default 0.5), limit?: number }
|
|
103
|
+
* @returns {object} { results, count }
|
|
104
|
+
*/
|
|
105
|
+
function swlInstintosActivos(baseDir, args = {}) {
|
|
106
|
+
const minConfidence = (typeof args.minConfidence === 'number')
|
|
107
|
+
? args.minConfidence : 0.5;
|
|
108
|
+
const limit = (typeof args.limit === 'number' && args.limit > 0)
|
|
109
|
+
? Math.min(args.limit, 100) : 20;
|
|
110
|
+
|
|
111
|
+
const ruta = path.join(baseDir, 'instintos', 'proyecto.yaml');
|
|
112
|
+
if (!fs.existsSync(ruta)) {
|
|
113
|
+
return { error: 'instintos/proyecto.yaml no encontrado', results: [] };
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
let contenido;
|
|
117
|
+
try {
|
|
118
|
+
contenido = fs.readFileSync(ruta, 'utf8');
|
|
119
|
+
} catch (err) {
|
|
120
|
+
return { error: 'Error de lectura: ' + err.message, results: [] };
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Parser simple sin dep YAML (mismo patrón que memory-search.js)
|
|
124
|
+
const instinto_re = /- id:\s*(\S+)[\s\S]*?pattern:\s*"([^"]+)"[\s\S]*?confidence:\s*([\d.]+)[\s\S]*?status:\s*(\w+)/g;
|
|
125
|
+
const results = [];
|
|
126
|
+
let match;
|
|
127
|
+
const ahora = new Date();
|
|
128
|
+
|
|
129
|
+
while ((match = instinto_re.exec(contenido)) !== null) {
|
|
130
|
+
const [, id, pattern, confidenceStr, status] = match;
|
|
131
|
+
const confidence = parseFloat(confidenceStr);
|
|
132
|
+
if (status !== 'active') continue;
|
|
133
|
+
|
|
134
|
+
// Construir objeto mínimo para scoring
|
|
135
|
+
const instinto = {
|
|
136
|
+
id,
|
|
137
|
+
pattern,
|
|
138
|
+
confidence,
|
|
139
|
+
status,
|
|
140
|
+
// Sin más metadata, effective_confidence ≈ confidence
|
|
141
|
+
};
|
|
142
|
+
const effective = scoringInstintos.effectiveConfidence(instinto, ahora);
|
|
143
|
+
if (effective < minConfidence) continue;
|
|
144
|
+
|
|
145
|
+
results.push({
|
|
146
|
+
id,
|
|
147
|
+
pattern,
|
|
148
|
+
confidence,
|
|
149
|
+
effective_confidence: Math.round(effective * 1000) / 1000,
|
|
150
|
+
status,
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
results.sort((a, b) => b.effective_confidence - a.effective_confidence);
|
|
155
|
+
return {
|
|
156
|
+
results: results.slice(0, limit),
|
|
157
|
+
count: Math.min(results.length, limit),
|
|
158
|
+
total: results.length,
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// ── exports ───────────────────────────────────────────────────────────────────
|
|
163
|
+
|
|
164
|
+
const HANDLERS = {
|
|
165
|
+
swl_memory_search: {
|
|
166
|
+
description: 'Búsqueda hybrid sobre memoria SWL (aprendizajes + sesiones + instintos) con RRF fusion.',
|
|
167
|
+
inputSchema: {
|
|
168
|
+
type: 'object',
|
|
169
|
+
properties: {
|
|
170
|
+
query: { type: 'string', description: 'Texto libre de búsqueda' },
|
|
171
|
+
limit: { type: 'number', description: 'Máximo de resultados (default 20, max 50)' },
|
|
172
|
+
tipo: { type: 'string', enum: ['aprendizaje', 'sesion', 'instinto'], description: 'Filtrar por tipo' },
|
|
173
|
+
},
|
|
174
|
+
required: ['query'],
|
|
175
|
+
},
|
|
176
|
+
handler: swlMemorySearch,
|
|
177
|
+
},
|
|
178
|
+
swl_aprendizajes_recientes: {
|
|
179
|
+
description: 'Últimos N aprendizajes de .planning/APRENDIZAJES.md (más recientes primero).',
|
|
180
|
+
inputSchema: {
|
|
181
|
+
type: 'object',
|
|
182
|
+
properties: {
|
|
183
|
+
limit: { type: 'number', description: 'Cuántos retornar (default 10, max 50)' },
|
|
184
|
+
},
|
|
185
|
+
},
|
|
186
|
+
handler: swlAprendizajesRecientes,
|
|
187
|
+
},
|
|
188
|
+
swl_instintos_activos: {
|
|
189
|
+
description: 'Instintos con effective_confidence ≥ umbral. Default 0.5.',
|
|
190
|
+
inputSchema: {
|
|
191
|
+
type: 'object',
|
|
192
|
+
properties: {
|
|
193
|
+
minConfidence: { type: 'number', description: 'Umbral mínimo (default 0.5)' },
|
|
194
|
+
limit: { type: 'number', description: 'Máximo (default 20, max 100)' },
|
|
195
|
+
},
|
|
196
|
+
},
|
|
197
|
+
handler: swlInstintosActivos,
|
|
198
|
+
},
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
module.exports = {
|
|
202
|
+
HANDLERS,
|
|
203
|
+
swlMemorySearch,
|
|
204
|
+
swlAprendizajesRecientes,
|
|
205
|
+
swlInstintosActivos,
|
|
206
|
+
};
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* run-eval.js — CLI para ejecutar evaluaciones del eval framework.
|
|
6
|
+
*
|
|
7
|
+
* Uso:
|
|
8
|
+
* node scripts/run-eval.js <ruta-eval.json>
|
|
9
|
+
*
|
|
10
|
+
* Formato del archivo JSON de eval:
|
|
11
|
+
* {
|
|
12
|
+
* "functionId": "memoria-busqueda::search",
|
|
13
|
+
* "schemaName": "MEMORY_SEARCH_RESULT_SCHEMA", // opcional, valida output
|
|
14
|
+
* "qualityScorer": "scoreObservacion", // opcional, calcula calidad
|
|
15
|
+
* "input": { ... }, // datos de entrada
|
|
16
|
+
* "expectedKeys": ["id", "tipo", "titulo"], // opcional, valida presencia
|
|
17
|
+
* "output": { ... } // output a evaluar
|
|
18
|
+
* }
|
|
19
|
+
*
|
|
20
|
+
* Exit codes:
|
|
21
|
+
* 0 - eval pasa (valid: true)
|
|
22
|
+
* 1 - eval falla (valid: false) o error de I/O
|
|
23
|
+
* 2 - error de uso (argumentos inválidos)
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
const fs = require('fs');
|
|
27
|
+
const path = require('path');
|
|
28
|
+
|
|
29
|
+
const schemas = require('./lib/eval-schemas');
|
|
30
|
+
const validator = require('./lib/eval-validator');
|
|
31
|
+
const quality = require('./lib/eval-quality');
|
|
32
|
+
const metricsStore = require('./lib/eval-metrics-store');
|
|
33
|
+
|
|
34
|
+
function uso() {
|
|
35
|
+
console.error('Uso: node scripts/run-eval.js <ruta-eval.json>');
|
|
36
|
+
console.error(' node scripts/run-eval.js --rebuild-aggregate');
|
|
37
|
+
process.exit(2);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function ejecutarEval(ruta) {
|
|
41
|
+
if (!fs.existsSync(ruta)) {
|
|
42
|
+
console.error(`Archivo no existe: ${ruta}`);
|
|
43
|
+
return 1;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
let definicion;
|
|
47
|
+
try {
|
|
48
|
+
definicion = JSON.parse(fs.readFileSync(ruta, 'utf8'));
|
|
49
|
+
} catch (err) {
|
|
50
|
+
console.error(`Error parseando ${ruta}: ${err.message}`);
|
|
51
|
+
return 1;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const { functionId, schemaName, qualityScorer, output } = definicion;
|
|
55
|
+
if (!functionId || typeof functionId !== 'string') {
|
|
56
|
+
console.error('functionId requerido en el archivo de eval');
|
|
57
|
+
return 1;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const inicio = Date.now();
|
|
61
|
+
let valid = true;
|
|
62
|
+
let errors = [];
|
|
63
|
+
let qualityScore = null;
|
|
64
|
+
|
|
65
|
+
// Validar contra schema si se especifica
|
|
66
|
+
if (schemaName) {
|
|
67
|
+
const schema = schemas[schemaName];
|
|
68
|
+
if (!schema) {
|
|
69
|
+
console.error(`Schema desconocido: ${schemaName}. Disponibles: ${Object.keys(schemas).filter(k => k.endsWith('_SCHEMA')).join(', ')}`);
|
|
70
|
+
return 1;
|
|
71
|
+
}
|
|
72
|
+
const r = validator.validar(output, schema);
|
|
73
|
+
valid = valid && r.valid;
|
|
74
|
+
if (!r.valid) errors.push(...r.errors);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Validar campos esperados si se especifican
|
|
78
|
+
if (Array.isArray(definicion.expectedKeys)) {
|
|
79
|
+
for (const key of definicion.expectedKeys) {
|
|
80
|
+
if (!output || !(key in output)) {
|
|
81
|
+
valid = false;
|
|
82
|
+
errors.push(`expectedKey faltante: ${key}`);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Calcular quality score si se especifica
|
|
88
|
+
if (qualityScorer) {
|
|
89
|
+
const scorer = quality[qualityScorer];
|
|
90
|
+
if (typeof scorer !== 'function') {
|
|
91
|
+
console.error(`Quality scorer desconocido: ${qualityScorer}. Disponibles: ${Object.keys(quality).join(', ')}`);
|
|
92
|
+
return 1;
|
|
93
|
+
}
|
|
94
|
+
qualityScore = scorer(output);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const latencyMs = Date.now() - inicio;
|
|
98
|
+
const exito = valid;
|
|
99
|
+
|
|
100
|
+
// Persistir resultado
|
|
101
|
+
const baseDir = process.cwd();
|
|
102
|
+
metricsStore.registrar(baseDir, {
|
|
103
|
+
functionId,
|
|
104
|
+
latencyMs,
|
|
105
|
+
success: exito,
|
|
106
|
+
qualityScore,
|
|
107
|
+
metadata: { schemaName, qualityScorer, evalFile: path.basename(ruta) },
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
// Reportar
|
|
111
|
+
console.log(JSON.stringify({
|
|
112
|
+
functionId,
|
|
113
|
+
valid,
|
|
114
|
+
errors: errors.length > 0 ? errors : undefined,
|
|
115
|
+
qualityScore,
|
|
116
|
+
latencyMs,
|
|
117
|
+
}, null, 2));
|
|
118
|
+
|
|
119
|
+
return valid ? 0 : 1;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function main() {
|
|
123
|
+
const args = process.argv.slice(2);
|
|
124
|
+
if (args.length === 0) uso();
|
|
125
|
+
|
|
126
|
+
if (args[0] === '--rebuild-aggregate') {
|
|
127
|
+
const r = metricsStore.reconstruirAgregado(process.cwd());
|
|
128
|
+
console.log(`Agregado reconstruido: ${r.rebuilt} eventos, ${r.functions} funciones únicas.`);
|
|
129
|
+
process.exit(0);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
process.exit(ejecutarEval(args[0]));
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
if (require.main === module) {
|
|
136
|
+
main();
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
module.exports = {
|
|
140
|
+
ejecutarEval,
|
|
141
|
+
};
|