@saulwade/swl-ses 1.3.8 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +15 -6
- package/README.md +15 -14
- package/agentes/nemesis-auditor-swl.md +161 -0
- package/bin/swl-mcp-server.js +187 -187
- package/bin/swl-webhook-server.js +198 -0
- package/comandos/swl/.evolved.json +22 -22
- package/comandos/swl/adoptar-proyecto.md +21 -1
- package/comandos/swl/claudemd.md +14 -1
- package/comandos/swl/contribuir.md +233 -233
- package/comandos/swl/exportar-vault.md +108 -0
- package/comandos/swl/nemesis.md +122 -0
- package/comandos/swl/nuevo-proyecto.md +24 -2
- package/comandos/swl/salud.md +34 -0
- package/comandos/swl/verificar.md +45 -0
- package/gateway/adapters/base.js +109 -0
- package/gateway/adapters/discord.js +167 -0
- package/gateway/adapters/email.js +221 -0
- package/gateway/adapters/slack.js +192 -0
- package/gateway/adapters/telegram.js +183 -0
- package/gateway/adapters/webhook.js +113 -0
- package/gateway/adapters/whatsapp.js +214 -0
- package/gateway/agent-executor.js +322 -0
- package/gateway/command-relay.js +271 -0
- package/gateway/cron/jobs.js +263 -0
- package/gateway/cron/scheduler.js +322 -0
- package/gateway/cron/store.js +335 -0
- package/gateway/index.js +320 -0
- package/gateway/lib/event-channel.js +191 -0
- package/gateway/session.js +131 -0
- package/gateway/webhook-server.js +324 -0
- package/habilidades/backend-production-resilience/SKILL.md +288 -288
- package/habilidades/benchmark-memoria/SKILL.md +186 -186
- package/habilidades/build-errors-nextjs/SKILL.md +55 -1
- package/habilidades/diagrama-arquitectura/assets/template.html +276 -276
- package/habilidades/doubt-driven-review/SKILL.md +171 -171
- package/habilidades/doubt-driven-review/recursos/EXAMPLES.md +130 -130
- package/habilidades/eval-framework/SKILL.md +212 -212
- package/habilidades/extractor-de-aprendizajes/SKILL.md +20 -10
- package/habilidades/feynman-auditor-swl/SKILL.md +123 -0
- package/habilidades/feynman-auditor-swl/recursos/preguntas-language-agnostic.md +108 -0
- package/habilidades/harness-claude-code/SKILL.md +299 -299
- package/habilidades/infra-github-actions/SKILL.md +166 -166
- package/habilidades/legacy-code-rescue/SKILL.md +267 -267
- package/habilidades/manejo-errores/.evolved.json +8 -8
- package/habilidades/meta-skills-estandar/recursos/convencion-examples.md +93 -93
- package/habilidades/meta-skills-estandar/recursos/skills-as-agents.md +163 -163
- package/habilidades/nextjs-testing/SKILL.md +89 -5
- package/habilidades/node-experto/SKILL.md +37 -1
- package/habilidades/patrones-python/SKILL.md +229 -229
- package/habilidades/patrones-python/recursos/patrones-avanzados.md +469 -469
- package/habilidades/planear-fase/SKILL.md +319 -319
- package/habilidades/react-experto/SKILL.md +45 -4
- package/habilidades/release-semver/.evolved.json +8 -8
- package/habilidades/state-inconsistency-auditor-swl/SKILL.md +166 -0
- package/habilidades/state-inconsistency-auditor-swl/recursos/coupled-state-patterns.md +147 -0
- package/habilidades/tdd-workflow/SKILL.md +36 -4
- package/habilidades/testing-python/SKILL.md +340 -340
- package/habilidades/web-fetcher-routing/SKILL.md +75 -0
- package/hooks/claudemd-bloat-detector.js +161 -161
- package/hooks/inyeccion-contexto.js +8 -3
- package/hooks/lib/agent-routing.js +107 -107
- package/hooks/lib/auto-consolidator.js +335 -335
- package/hooks/lib/error-classifier.js +308 -308
- package/hooks/lib/merkle-audit.js +96 -96
- package/hooks/lib/provenance-tracker.js +191 -191
- package/hooks/lib/rate-limit-ip.js +177 -0
- package/hooks/lib/rate-limit-tracker.js +253 -253
- package/hooks/lib/resource-quota.js +122 -122
- package/hooks/lib/retry-jitter.js +165 -165
- package/hooks/lib/security-net.js +201 -0
- package/hooks/lib/skill-auditor.js +588 -588
- package/hooks/lib/sync-status.js +228 -228
- package/hooks/lib/taint-tracker.js +107 -107
- package/hooks/lib/text-similarity.js +241 -241
- package/hooks/lib/toon-compressor.js +245 -245
- package/hooks/lib/webhook-dedup.js +184 -0
- package/hooks/lib/webhook-verify.js +123 -0
- package/hooks/proteccion-rutas.js +120 -15
- package/hooks/registro-turnos.js +209 -209
- package/hooks/sugerir-regenerar-inventario.js +170 -170
- package/hooks/validar-formato-post-subagente.js +140 -140
- package/hooks/validar-memoria-hook.js +218 -218
- package/instintos/prompt-appendices.yaml +57 -57
- package/manifiestos/agent-output-schemas.json +57 -57
- package/manifiestos/modulos.json +31 -0
- package/manifiestos/skills-lock.json +1114 -1093
- package/package.json +6 -4
- package/plantillas/auditor-veto-template.md +105 -105
- package/plantillas/github-workflows/README.md +47 -47
- package/plantillas/github-workflows/release-please.yml +44 -44
- package/plantillas/github-workflows/swl-ci.yml +107 -107
- package/plantillas/github-workflows/swl-security.yml +51 -51
- package/plugin.json +2 -2
- package/reglas/analisis-previo-tareas-grandes.md +172 -172
- package/reglas/arreglar-al-detectar.md +147 -147
- package/reglas/fragmentos-compartidos.md +152 -152
- package/reglas/harness-claude-code.md +213 -213
- package/reglas/usar-context7.md +226 -226
- package/reglas/usar-sistema-swl.md +251 -0
- package/schemas/diary-entry.schema.json +80 -80
- package/scripts/audit-tools/audit-history.js +330 -0
- package/scripts/audit-tools/bundle-tracker.js +290 -0
- package/scripts/audit-tools/canary-monitor.js +352 -0
- package/scripts/audit-tools/code-profiler.js +605 -0
- package/scripts/audit-tools/dep-doctor.js +320 -0
- package/scripts/audit-tools/env-validator.js +206 -0
- package/scripts/audit-tools/lib/fs-walk.js +48 -0
- package/scripts/audit-tools/lib/output.js +23 -0
- package/scripts/audit-tools/migration-checker.js +392 -0
- package/scripts/audit-tools/pentest-scanner.js +1436 -0
- package/scripts/benchmark-memoria.js +167 -167
- package/scripts/comandos/skills.js +251 -2
- package/scripts/configurar-branch-protection.js +418 -418
- package/scripts/detectar-aprendizajes-duplicados.js +151 -151
- package/scripts/field-report.js +199 -199
- package/scripts/generar-checklists-consolidados.js +273 -273
- package/scripts/generar-inventario.js +420 -420
- package/scripts/generar-matriz-lenguajes.js +271 -271
- package/scripts/lib/artefactos-python.js +43 -43
- package/scripts/lib/benchmark-metrics.js +160 -160
- package/scripts/lib/budget-enforcer.js +252 -252
- package/scripts/lib/configurar-ci.js +380 -380
- package/scripts/lib/contadores-inventario.js +217 -217
- package/scripts/lib/detectar-stack-detallado.js +307 -307
- package/scripts/lib/diary-entry.js +234 -234
- package/scripts/lib/eval-metrics-store.js +218 -218
- package/scripts/lib/eval-quality.js +171 -171
- package/scripts/lib/eval-schemas.js +144 -144
- package/scripts/lib/eval-self-correct.js +106 -106
- package/scripts/lib/eval-validator.js +185 -185
- package/scripts/lib/jaccard-similarity.js +98 -98
- package/scripts/lib/longmemeval-runner.js +125 -125
- package/scripts/lib/npm-version.js +261 -261
- package/scripts/lib/paquetes-conocidos.js +50 -50
- package/scripts/lib/prompt-builder.js +264 -264
- package/scripts/lib/rrf-fusion.js +175 -175
- package/scripts/lib/scoring-instintos.js +277 -277
- package/scripts/lib/semantic-search.js +252 -252
- package/scripts/limpiar-artefactos-python.js +131 -131
- package/scripts/mcp-server/README.md +128 -128
- package/scripts/mcp-server/handlers.js +206 -206
- package/scripts/migrar-csv-a-array.js +168 -168
- package/scripts/migrar-fase-dominio.js +201 -201
- package/scripts/publicar.js +511 -511
- package/scripts/run-eval.js +141 -141
- package/scripts/validar-manifest.js +195 -195
- package/scripts/validar-userland-vacio.js +110 -110
- package/scripts/verificar-release.js +110 -0
|
@@ -1,98 +1,98 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* jaccard-similarity.js — Métrica de Jaccard sobre conjuntos de tokens.
|
|
5
|
-
*
|
|
6
|
-
* Patrón adoptado de `temp/agentmemory-main/src/functions/auto-forget.ts`
|
|
7
|
-
* para detectar memorias contradictorias/duplicadas con vocabulario compartido.
|
|
8
|
-
*
|
|
9
|
-
* Jaccard(A, B) = |A ∩ B| / |A ∪ B|
|
|
10
|
-
*
|
|
11
|
-
* Propiedades:
|
|
12
|
-
* - Rango [0, 1]: 0 = sin overlap, 1 = idénticos.
|
|
13
|
-
* - Simétrico: J(A, B) = J(B, A).
|
|
14
|
-
* - Independiente de longitudes absolutas (ambos cortos pueden ser 1.0).
|
|
15
|
-
*
|
|
16
|
-
* Sin dependencias — Node stdlib only. Funciones puras.
|
|
17
|
-
*
|
|
18
|
-
* @module scripts/lib/jaccard-similarity
|
|
19
|
-
*/
|
|
20
|
-
|
|
21
|
-
// ── constantes ────────────────────────────────────────────────────────────────
|
|
22
|
-
|
|
23
|
-
/** Longitud mínima de un token para ser considerado significativo. */
|
|
24
|
-
const MIN_TOKEN_LENGTH = 3;
|
|
25
|
-
|
|
26
|
-
/** Stop words en español que se excluyen del análisis. */
|
|
27
|
-
const STOP_WORDS = new Set([
|
|
28
|
-
'que', 'los', 'las', 'del', 'una', 'por', 'con', 'para', 'como',
|
|
29
|
-
'sin', 'mas', 'sus', 'lo', 'le', 'la', 'el', 'al', 'no', 'es',
|
|
30
|
-
'se', 'de', 'en', 'un', 'a', 'y', 'o', 'pero', 'cuando',
|
|
31
|
-
'donde', 'porque', 'desde', 'hasta', 'sobre', 'bajo', 'entre',
|
|
32
|
-
'esta', 'este', 'esto', 'esa', 'ese', 'eso', 'tras', 'durante',
|
|
33
|
-
'mediante', 'segun', 'asi', 'tan', 'ya', 'aun', 'aunque',
|
|
34
|
-
// English equivalents (frequently mixed in technical text)
|
|
35
|
-
'the', 'and', 'for', 'with', 'this', 'that', 'have', 'from',
|
|
36
|
-
'are', 'was', 'will', 'not', 'has', 'had', 'but', 'can',
|
|
37
|
-
]);
|
|
38
|
-
|
|
39
|
-
// ── funciones puras ───────────────────────────────────────────────────────────
|
|
40
|
-
|
|
41
|
-
/**
|
|
42
|
-
* Convierte un texto en un Set de tokens significativos (lowercase, sin stop
|
|
43
|
-
* words, longitud mínima). Preserva acentos.
|
|
44
|
-
*
|
|
45
|
-
* @param {string} text
|
|
46
|
-
* @returns {Set<string>}
|
|
47
|
-
*/
|
|
48
|
-
function tokenize(text) {
|
|
49
|
-
if (!text || typeof text !== 'string') return new Set();
|
|
50
|
-
return new Set(
|
|
51
|
-
String(text)
|
|
52
|
-
.toLowerCase()
|
|
53
|
-
.replace(/[`*_~\[\](){}<>#"'\-.,;:!?\/\\]/g, ' ')
|
|
54
|
-
.split(/\s+/)
|
|
55
|
-
.filter(t => t.length >= MIN_TOKEN_LENGTH && !STOP_WORDS.has(t)),
|
|
56
|
-
);
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
/**
|
|
60
|
-
* Jaccard similarity entre dos Sets.
|
|
61
|
-
*
|
|
62
|
-
* @param {Set} setA
|
|
63
|
-
* @param {Set} setB
|
|
64
|
-
* @returns {number} en [0, 1]
|
|
65
|
-
*/
|
|
66
|
-
function jaccard(setA, setB) {
|
|
67
|
-
if (!(setA instanceof Set) || !(setB instanceof Set)) return 0;
|
|
68
|
-
if (setA.size === 0 && setB.size === 0) return 0;
|
|
69
|
-
if (setA.size === 0 || setB.size === 0) return 0;
|
|
70
|
-
|
|
71
|
-
let intersection = 0;
|
|
72
|
-
for (const token of setA) {
|
|
73
|
-
if (setB.has(token)) intersection++;
|
|
74
|
-
}
|
|
75
|
-
const union = setA.size + setB.size - intersection;
|
|
76
|
-
return union === 0 ? 0 : intersection / union;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
/**
|
|
80
|
-
* Conveniencia: jaccard sobre dos textos.
|
|
81
|
-
*
|
|
82
|
-
* @param {string} a
|
|
83
|
-
* @param {string} b
|
|
84
|
-
* @returns {number} en [0, 1]
|
|
85
|
-
*/
|
|
86
|
-
function similarity(a, b) {
|
|
87
|
-
return jaccard(tokenize(a), tokenize(b));
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
// ── exports ───────────────────────────────────────────────────────────────────
|
|
91
|
-
|
|
92
|
-
module.exports = {
|
|
93
|
-
tokenize,
|
|
94
|
-
jaccard,
|
|
95
|
-
similarity,
|
|
96
|
-
MIN_TOKEN_LENGTH,
|
|
97
|
-
STOP_WORDS,
|
|
98
|
-
};
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* jaccard-similarity.js — Métrica de Jaccard sobre conjuntos de tokens.
|
|
5
|
+
*
|
|
6
|
+
* Patrón adoptado de `temp/agentmemory-main/src/functions/auto-forget.ts`
|
|
7
|
+
* para detectar memorias contradictorias/duplicadas con vocabulario compartido.
|
|
8
|
+
*
|
|
9
|
+
* Jaccard(A, B) = |A ∩ B| / |A ∪ B|
|
|
10
|
+
*
|
|
11
|
+
* Propiedades:
|
|
12
|
+
* - Rango [0, 1]: 0 = sin overlap, 1 = idénticos.
|
|
13
|
+
* - Simétrico: J(A, B) = J(B, A).
|
|
14
|
+
* - Independiente de longitudes absolutas (ambos cortos pueden ser 1.0).
|
|
15
|
+
*
|
|
16
|
+
* Sin dependencias — Node stdlib only. Funciones puras.
|
|
17
|
+
*
|
|
18
|
+
* @module scripts/lib/jaccard-similarity
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
// ── constantes ────────────────────────────────────────────────────────────────
|
|
22
|
+
|
|
23
|
+
/** Longitud mínima de un token para ser considerado significativo. */
|
|
24
|
+
const MIN_TOKEN_LENGTH = 3;
|
|
25
|
+
|
|
26
|
+
/** Stop words en español que se excluyen del análisis. */
|
|
27
|
+
const STOP_WORDS = new Set([
|
|
28
|
+
'que', 'los', 'las', 'del', 'una', 'por', 'con', 'para', 'como',
|
|
29
|
+
'sin', 'mas', 'sus', 'lo', 'le', 'la', 'el', 'al', 'no', 'es',
|
|
30
|
+
'se', 'de', 'en', 'un', 'a', 'y', 'o', 'pero', 'cuando',
|
|
31
|
+
'donde', 'porque', 'desde', 'hasta', 'sobre', 'bajo', 'entre',
|
|
32
|
+
'esta', 'este', 'esto', 'esa', 'ese', 'eso', 'tras', 'durante',
|
|
33
|
+
'mediante', 'segun', 'asi', 'tan', 'ya', 'aun', 'aunque',
|
|
34
|
+
// English equivalents (frequently mixed in technical text)
|
|
35
|
+
'the', 'and', 'for', 'with', 'this', 'that', 'have', 'from',
|
|
36
|
+
'are', 'was', 'will', 'not', 'has', 'had', 'but', 'can',
|
|
37
|
+
]);
|
|
38
|
+
|
|
39
|
+
// ── funciones puras ───────────────────────────────────────────────────────────
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Convierte un texto en un Set de tokens significativos (lowercase, sin stop
|
|
43
|
+
* words, longitud mínima). Preserva acentos.
|
|
44
|
+
*
|
|
45
|
+
* @param {string} text
|
|
46
|
+
* @returns {Set<string>}
|
|
47
|
+
*/
|
|
48
|
+
function tokenize(text) {
|
|
49
|
+
if (!text || typeof text !== 'string') return new Set();
|
|
50
|
+
return new Set(
|
|
51
|
+
String(text)
|
|
52
|
+
.toLowerCase()
|
|
53
|
+
.replace(/[`*_~\[\](){}<>#"'\-.,;:!?\/\\]/g, ' ')
|
|
54
|
+
.split(/\s+/)
|
|
55
|
+
.filter(t => t.length >= MIN_TOKEN_LENGTH && !STOP_WORDS.has(t)),
|
|
56
|
+
);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Jaccard similarity entre dos Sets.
|
|
61
|
+
*
|
|
62
|
+
* @param {Set} setA
|
|
63
|
+
* @param {Set} setB
|
|
64
|
+
* @returns {number} en [0, 1]
|
|
65
|
+
*/
|
|
66
|
+
function jaccard(setA, setB) {
|
|
67
|
+
if (!(setA instanceof Set) || !(setB instanceof Set)) return 0;
|
|
68
|
+
if (setA.size === 0 && setB.size === 0) return 0;
|
|
69
|
+
if (setA.size === 0 || setB.size === 0) return 0;
|
|
70
|
+
|
|
71
|
+
let intersection = 0;
|
|
72
|
+
for (const token of setA) {
|
|
73
|
+
if (setB.has(token)) intersection++;
|
|
74
|
+
}
|
|
75
|
+
const union = setA.size + setB.size - intersection;
|
|
76
|
+
return union === 0 ? 0 : intersection / union;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Conveniencia: jaccard sobre dos textos.
|
|
81
|
+
*
|
|
82
|
+
* @param {string} a
|
|
83
|
+
* @param {string} b
|
|
84
|
+
* @returns {number} en [0, 1]
|
|
85
|
+
*/
|
|
86
|
+
function similarity(a, b) {
|
|
87
|
+
return jaccard(tokenize(a), tokenize(b));
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// ── exports ───────────────────────────────────────────────────────────────────
|
|
91
|
+
|
|
92
|
+
module.exports = {
|
|
93
|
+
tokenize,
|
|
94
|
+
jaccard,
|
|
95
|
+
similarity,
|
|
96
|
+
MIN_TOKEN_LENGTH,
|
|
97
|
+
STOP_WORDS,
|
|
98
|
+
};
|
|
@@ -1,125 +1,125 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* longmemeval-runner.js — Adapter que ejecuta queries del benchmark contra
|
|
5
|
-
* `hooks/lib/memory-search` y devuelve métricas.
|
|
6
|
-
*
|
|
7
|
-
* Patrón adoptado de `temp/agentmemory-main/benchmark/longmemeval-bench.ts`.
|
|
8
|
-
* Adaptado: en lugar de cargar haystack desde el dataset, usa el estado
|
|
9
|
-
* actual del proyecto SWL (APRENDIZAJES.md, sesiones, instintos).
|
|
10
|
-
*
|
|
11
|
-
* El dataset es un JSONL donde cada línea es:
|
|
12
|
-
* {
|
|
13
|
-
* "question_id": "q-001",
|
|
14
|
-
* "question": "texto libre de la query",
|
|
15
|
-
* "gold_ids": ["apr-N", "ses-YYYY-MM-DD-HHMM"],
|
|
16
|
-
* "category": "decision" | "patron" | "anti-patron" | "gotcha" | ...,
|
|
17
|
-
* "status": "real" | "placeholder"
|
|
18
|
-
* }
|
|
19
|
-
*
|
|
20
|
-
* @module scripts/lib/longmemeval-runner
|
|
21
|
-
*/
|
|
22
|
-
|
|
23
|
-
const fs = require('fs');
|
|
24
|
-
const path = require('path');
|
|
25
|
-
|
|
26
|
-
const memorySearch = require('../../hooks/lib/memory-search');
|
|
27
|
-
const benchmarkMetrics = require('./benchmark-metrics');
|
|
28
|
-
|
|
29
|
-
// ── parser de dataset ─────────────────────────────────────────────────────────
|
|
30
|
-
|
|
31
|
-
/**
|
|
32
|
-
* Parsea un archivo JSONL del dataset.
|
|
33
|
-
* @param {string} ruta
|
|
34
|
-
* @returns {object[]}
|
|
35
|
-
*/
|
|
36
|
-
function leerDataset(ruta) {
|
|
37
|
-
if (!fs.existsSync(ruta)) {
|
|
38
|
-
throw new Error(`Dataset no encontrado: ${ruta}`);
|
|
39
|
-
}
|
|
40
|
-
const contenido = fs.readFileSync(ruta, 'utf8');
|
|
41
|
-
const entries = [];
|
|
42
|
-
let lineNum = 0;
|
|
43
|
-
for (const linea of contenido.split('\n')) {
|
|
44
|
-
lineNum++;
|
|
45
|
-
if (!linea.trim()) continue;
|
|
46
|
-
if (linea.trim().startsWith('//')) continue; // comentarios
|
|
47
|
-
try {
|
|
48
|
-
entries.push(JSON.parse(linea));
|
|
49
|
-
} catch (err) {
|
|
50
|
-
throw new Error(`JSONL malformado en línea ${lineNum}: ${err.message}`);
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
return entries;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
// ── ejecución de query individual ─────────────────────────────────────────────
|
|
57
|
-
|
|
58
|
-
/**
|
|
59
|
-
* Ejecuta una query del benchmark contra memoria SWL y compara con gold.
|
|
60
|
-
*
|
|
61
|
-
* @param {string} baseDir - Raíz del proyecto.
|
|
62
|
-
* @param {object} entry - Una línea del dataset.
|
|
63
|
-
* @param {object} [opts]
|
|
64
|
-
* @param {number} [opts.limit=20] - Top-k a recuperar.
|
|
65
|
-
* @returns {object} Métricas + ids retrieved + entry original.
|
|
66
|
-
*/
|
|
67
|
-
function ejecutarEntry(baseDir, entry, opts = {}) {
|
|
68
|
-
const limit = opts.limit || 20;
|
|
69
|
-
const inicio = Date.now();
|
|
70
|
-
const resultados = memorySearch.search(baseDir, entry.question, { limit });
|
|
71
|
-
const latencyMs = Date.now() - inicio;
|
|
72
|
-
|
|
73
|
-
const retrievedIds = resultados.map(r => r.id);
|
|
74
|
-
const goldIds = Array.isArray(entry.gold_ids) ? entry.gold_ids : [];
|
|
75
|
-
const metricas = benchmarkMetrics.calcularMetricas(retrievedIds, goldIds);
|
|
76
|
-
|
|
77
|
-
return {
|
|
78
|
-
question_id: entry.question_id || 'unknown',
|
|
79
|
-
question: entry.question,
|
|
80
|
-
category: entry.category || null,
|
|
81
|
-
status: entry.status || 'unknown',
|
|
82
|
-
retrievedIds,
|
|
83
|
-
goldIds,
|
|
84
|
-
metricas,
|
|
85
|
-
latencyMs,
|
|
86
|
-
};
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
/**
|
|
90
|
-
* Ejecuta el dataset completo y devuelve resultados + métricas agregadas.
|
|
91
|
-
*
|
|
92
|
-
* @param {string} baseDir
|
|
93
|
-
* @param {string} datasetPath
|
|
94
|
-
* @param {object} [opts]
|
|
95
|
-
* @returns {{ entries: object[], promedio: object, dataset: object }}
|
|
96
|
-
*/
|
|
97
|
-
function ejecutarDataset(baseDir, datasetPath, opts = {}) {
|
|
98
|
-
const entries = leerDataset(datasetPath);
|
|
99
|
-
const resultados = entries.map(e => ejecutarEntry(baseDir, e, opts));
|
|
100
|
-
const promedio = benchmarkMetrics.promediar(resultados.map(r => r.metricas));
|
|
101
|
-
|
|
102
|
-
// Estadísticas del dataset
|
|
103
|
-
const placeholderCount = entries.filter(e => e.status === 'placeholder').length;
|
|
104
|
-
const realCount = entries.filter(e => e.status === 'real').length;
|
|
105
|
-
const datasetMeta = {
|
|
106
|
-
total: entries.length,
|
|
107
|
-
real: realCount,
|
|
108
|
-
placeholder: placeholderCount,
|
|
109
|
-
significativo: realCount >= 30,
|
|
110
|
-
};
|
|
111
|
-
|
|
112
|
-
return {
|
|
113
|
-
entries: resultados,
|
|
114
|
-
promedio,
|
|
115
|
-
dataset: datasetMeta,
|
|
116
|
-
};
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
// ── exports ───────────────────────────────────────────────────────────────────
|
|
120
|
-
|
|
121
|
-
module.exports = {
|
|
122
|
-
leerDataset,
|
|
123
|
-
ejecutarEntry,
|
|
124
|
-
ejecutarDataset,
|
|
125
|
-
};
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* longmemeval-runner.js — Adapter que ejecuta queries del benchmark contra
|
|
5
|
+
* `hooks/lib/memory-search` y devuelve métricas.
|
|
6
|
+
*
|
|
7
|
+
* Patrón adoptado de `temp/agentmemory-main/benchmark/longmemeval-bench.ts`.
|
|
8
|
+
* Adaptado: en lugar de cargar haystack desde el dataset, usa el estado
|
|
9
|
+
* actual del proyecto SWL (APRENDIZAJES.md, sesiones, instintos).
|
|
10
|
+
*
|
|
11
|
+
* El dataset es un JSONL donde cada línea es:
|
|
12
|
+
* {
|
|
13
|
+
* "question_id": "q-001",
|
|
14
|
+
* "question": "texto libre de la query",
|
|
15
|
+
* "gold_ids": ["apr-N", "ses-YYYY-MM-DD-HHMM"],
|
|
16
|
+
* "category": "decision" | "patron" | "anti-patron" | "gotcha" | ...,
|
|
17
|
+
* "status": "real" | "placeholder"
|
|
18
|
+
* }
|
|
19
|
+
*
|
|
20
|
+
* @module scripts/lib/longmemeval-runner
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
const fs = require('fs');
|
|
24
|
+
const path = require('path');
|
|
25
|
+
|
|
26
|
+
const memorySearch = require('../../hooks/lib/memory-search');
|
|
27
|
+
const benchmarkMetrics = require('./benchmark-metrics');
|
|
28
|
+
|
|
29
|
+
// ── parser de dataset ─────────────────────────────────────────────────────────
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Parsea un archivo JSONL del dataset.
|
|
33
|
+
* @param {string} ruta
|
|
34
|
+
* @returns {object[]}
|
|
35
|
+
*/
|
|
36
|
+
function leerDataset(ruta) {
|
|
37
|
+
if (!fs.existsSync(ruta)) {
|
|
38
|
+
throw new Error(`Dataset no encontrado: ${ruta}`);
|
|
39
|
+
}
|
|
40
|
+
const contenido = fs.readFileSync(ruta, 'utf8');
|
|
41
|
+
const entries = [];
|
|
42
|
+
let lineNum = 0;
|
|
43
|
+
for (const linea of contenido.split('\n')) {
|
|
44
|
+
lineNum++;
|
|
45
|
+
if (!linea.trim()) continue;
|
|
46
|
+
if (linea.trim().startsWith('//')) continue; // comentarios
|
|
47
|
+
try {
|
|
48
|
+
entries.push(JSON.parse(linea));
|
|
49
|
+
} catch (err) {
|
|
50
|
+
throw new Error(`JSONL malformado en línea ${lineNum}: ${err.message}`);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return entries;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// ── ejecución de query individual ─────────────────────────────────────────────
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Ejecuta una query del benchmark contra memoria SWL y compara con gold.
|
|
60
|
+
*
|
|
61
|
+
* @param {string} baseDir - Raíz del proyecto.
|
|
62
|
+
* @param {object} entry - Una línea del dataset.
|
|
63
|
+
* @param {object} [opts]
|
|
64
|
+
* @param {number} [opts.limit=20] - Top-k a recuperar.
|
|
65
|
+
* @returns {object} Métricas + ids retrieved + entry original.
|
|
66
|
+
*/
|
|
67
|
+
function ejecutarEntry(baseDir, entry, opts = {}) {
|
|
68
|
+
const limit = opts.limit || 20;
|
|
69
|
+
const inicio = Date.now();
|
|
70
|
+
const resultados = memorySearch.search(baseDir, entry.question, { limit });
|
|
71
|
+
const latencyMs = Date.now() - inicio;
|
|
72
|
+
|
|
73
|
+
const retrievedIds = resultados.map(r => r.id);
|
|
74
|
+
const goldIds = Array.isArray(entry.gold_ids) ? entry.gold_ids : [];
|
|
75
|
+
const metricas = benchmarkMetrics.calcularMetricas(retrievedIds, goldIds);
|
|
76
|
+
|
|
77
|
+
return {
|
|
78
|
+
question_id: entry.question_id || 'unknown',
|
|
79
|
+
question: entry.question,
|
|
80
|
+
category: entry.category || null,
|
|
81
|
+
status: entry.status || 'unknown',
|
|
82
|
+
retrievedIds,
|
|
83
|
+
goldIds,
|
|
84
|
+
metricas,
|
|
85
|
+
latencyMs,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Ejecuta el dataset completo y devuelve resultados + métricas agregadas.
|
|
91
|
+
*
|
|
92
|
+
* @param {string} baseDir
|
|
93
|
+
* @param {string} datasetPath
|
|
94
|
+
* @param {object} [opts]
|
|
95
|
+
* @returns {{ entries: object[], promedio: object, dataset: object }}
|
|
96
|
+
*/
|
|
97
|
+
function ejecutarDataset(baseDir, datasetPath, opts = {}) {
|
|
98
|
+
const entries = leerDataset(datasetPath);
|
|
99
|
+
const resultados = entries.map(e => ejecutarEntry(baseDir, e, opts));
|
|
100
|
+
const promedio = benchmarkMetrics.promediar(resultados.map(r => r.metricas));
|
|
101
|
+
|
|
102
|
+
// Estadísticas del dataset
|
|
103
|
+
const placeholderCount = entries.filter(e => e.status === 'placeholder').length;
|
|
104
|
+
const realCount = entries.filter(e => e.status === 'real').length;
|
|
105
|
+
const datasetMeta = {
|
|
106
|
+
total: entries.length,
|
|
107
|
+
real: realCount,
|
|
108
|
+
placeholder: placeholderCount,
|
|
109
|
+
significativo: realCount >= 30,
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
entries: resultados,
|
|
114
|
+
promedio,
|
|
115
|
+
dataset: datasetMeta,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// ── exports ───────────────────────────────────────────────────────────────────
|
|
120
|
+
|
|
121
|
+
module.exports = {
|
|
122
|
+
leerDataset,
|
|
123
|
+
ejecutarEntry,
|
|
124
|
+
ejecutarDataset,
|
|
125
|
+
};
|