@saulwade/swl-ses 1.3.7 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/CLAUDE.md +12 -4
  2. package/README.md +1 -1
  3. package/bin/swl-mcp-server.js +187 -187
  4. package/bin/swl-webhook-server.js +198 -0
  5. package/comandos/swl/.evolved.json +22 -22
  6. package/comandos/swl/adoptar-proyecto.md +21 -1
  7. package/comandos/swl/claudemd.md +14 -1
  8. package/comandos/swl/contribuir.md +233 -233
  9. package/comandos/swl/exportar-vault.md +207 -7
  10. package/comandos/swl/nuevo-proyecto.md +24 -2
  11. package/gateway/adapters/base.js +109 -0
  12. package/gateway/adapters/discord.js +167 -0
  13. package/gateway/adapters/email.js +221 -0
  14. package/gateway/adapters/slack.js +192 -0
  15. package/gateway/adapters/telegram.js +183 -0
  16. package/gateway/adapters/webhook.js +113 -0
  17. package/gateway/adapters/whatsapp.js +214 -0
  18. package/gateway/agent-executor.js +322 -0
  19. package/gateway/command-relay.js +271 -0
  20. package/gateway/cron/jobs.js +263 -0
  21. package/gateway/cron/scheduler.js +322 -0
  22. package/gateway/cron/store.js +335 -0
  23. package/gateway/index.js +320 -0
  24. package/gateway/lib/event-channel.js +191 -0
  25. package/gateway/session.js +131 -0
  26. package/gateway/webhook-server.js +324 -0
  27. package/habilidades/backend-production-resilience/SKILL.md +288 -288
  28. package/habilidades/benchmark-memoria/SKILL.md +186 -186
  29. package/habilidades/build-errors-nextjs/SKILL.md +55 -1
  30. package/habilidades/diagrama-arquitectura/assets/template.html +276 -276
  31. package/habilidades/doubt-driven-review/SKILL.md +171 -171
  32. package/habilidades/doubt-driven-review/recursos/EXAMPLES.md +130 -130
  33. package/habilidades/eval-framework/SKILL.md +212 -212
  34. package/habilidades/extractor-de-aprendizajes/SKILL.md +24 -10
  35. package/habilidades/harness-claude-code/SKILL.md +299 -299
  36. package/habilidades/infra-github-actions/SKILL.md +166 -166
  37. package/habilidades/legacy-code-rescue/SKILL.md +267 -267
  38. package/habilidades/manejo-errores/.evolved.json +8 -8
  39. package/habilidades/meta-skills-estandar/recursos/convencion-examples.md +93 -93
  40. package/habilidades/meta-skills-estandar/recursos/skills-as-agents.md +163 -163
  41. package/habilidades/nextjs-testing/SKILL.md +89 -5
  42. package/habilidades/node-experto/SKILL.md +37 -1
  43. package/habilidades/patrones-python/SKILL.md +229 -229
  44. package/habilidades/patrones-python/recursos/patrones-avanzados.md +469 -469
  45. package/habilidades/planear-fase/SKILL.md +319 -319
  46. package/habilidades/react-experto/SKILL.md +45 -4
  47. package/habilidades/release-semver/.evolved.json +8 -8
  48. package/habilidades/swl-claudemd/SKILL.md +15 -1
  49. package/habilidades/tdd-workflow/SKILL.md +36 -4
  50. package/habilidades/testing-python/SKILL.md +340 -340
  51. package/hooks/claudemd-bloat-detector.js +161 -161
  52. package/hooks/inyeccion-contexto.js +8 -3
  53. package/hooks/lib/agent-routing.js +107 -107
  54. package/hooks/lib/auto-consolidator.js +335 -335
  55. package/hooks/lib/error-classifier.js +308 -308
  56. package/hooks/lib/merkle-audit.js +96 -96
  57. package/hooks/lib/provenance-tracker.js +191 -191
  58. package/hooks/lib/rate-limit-ip.js +177 -0
  59. package/hooks/lib/rate-limit-tracker.js +253 -253
  60. package/hooks/lib/resource-quota.js +122 -122
  61. package/hooks/lib/retry-jitter.js +165 -165
  62. package/hooks/lib/skill-auditor.js +588 -588
  63. package/hooks/lib/sync-status.js +228 -228
  64. package/hooks/lib/taint-tracker.js +107 -107
  65. package/hooks/lib/text-similarity.js +241 -241
  66. package/hooks/lib/toon-compressor.js +245 -245
  67. package/hooks/lib/webhook-dedup.js +184 -0
  68. package/hooks/lib/webhook-verify.js +123 -0
  69. package/hooks/proteccion-rutas.js +120 -15
  70. package/hooks/registro-turnos.js +209 -209
  71. package/hooks/sugerir-regenerar-inventario.js +170 -170
  72. package/hooks/validar-formato-post-subagente.js +140 -140
  73. package/hooks/validar-memoria-hook.js +218 -218
  74. package/instintos/prompt-appendices.yaml +57 -57
  75. package/manifiestos/agent-output-schemas.json +57 -57
  76. package/manifiestos/modulos.json +1 -0
  77. package/manifiestos/skills-lock.json +37 -37
  78. package/package.json +5 -3
  79. package/plantillas/auditor-veto-template.md +105 -105
  80. package/plantillas/github-workflows/README.md +47 -47
  81. package/plantillas/github-workflows/release-please.yml +44 -44
  82. package/plantillas/github-workflows/swl-ci.yml +107 -107
  83. package/plantillas/github-workflows/swl-security.yml +51 -51
  84. package/plugin.json +1 -1
  85. package/reglas/analisis-previo-tareas-grandes.md +172 -172
  86. package/reglas/arreglar-al-detectar.md +147 -147
  87. package/reglas/fragmentos-compartidos.md +152 -152
  88. package/reglas/harness-claude-code.md +213 -213
  89. package/reglas/usar-context7.md +226 -226
  90. package/reglas/usar-sistema-swl.md +251 -0
  91. package/schemas/diary-entry.schema.json +80 -80
  92. package/scripts/benchmark-memoria.js +167 -167
  93. package/scripts/comandos/skills.js +251 -2
  94. package/scripts/configurar-branch-protection.js +418 -418
  95. package/scripts/detectar-aprendizajes-duplicados.js +151 -151
  96. package/scripts/field-report.js +199 -199
  97. package/scripts/generar-checklists-consolidados.js +273 -273
  98. package/scripts/generar-inventario.js +420 -420
  99. package/scripts/generar-matriz-lenguajes.js +271 -271
  100. package/scripts/lib/artefactos-python.js +43 -43
  101. package/scripts/lib/benchmark-metrics.js +160 -160
  102. package/scripts/lib/budget-enforcer.js +252 -252
  103. package/scripts/lib/configurar-ci.js +380 -380
  104. package/scripts/lib/contadores-inventario.js +217 -217
  105. package/scripts/lib/detectar-stack-detallado.js +307 -307
  106. package/scripts/lib/diary-entry.js +234 -234
  107. package/scripts/lib/eval-metrics-store.js +218 -218
  108. package/scripts/lib/eval-quality.js +171 -171
  109. package/scripts/lib/eval-schemas.js +144 -144
  110. package/scripts/lib/eval-self-correct.js +106 -106
  111. package/scripts/lib/eval-validator.js +185 -185
  112. package/scripts/lib/jaccard-similarity.js +98 -98
  113. package/scripts/lib/longmemeval-runner.js +125 -125
  114. package/scripts/lib/npm-version.js +261 -261
  115. package/scripts/lib/paquetes-conocidos.js +50 -50
  116. package/scripts/lib/prompt-builder.js +264 -264
  117. package/scripts/lib/rrf-fusion.js +175 -175
  118. package/scripts/lib/scoring-instintos.js +277 -277
  119. package/scripts/lib/semantic-search.js +252 -252
  120. package/scripts/limpiar-artefactos-python.js +131 -131
  121. package/scripts/mcp-server/README.md +128 -128
  122. package/scripts/mcp-server/handlers.js +206 -206
  123. package/scripts/migrar-csv-a-array.js +168 -168
  124. package/scripts/migrar-fase-dominio.js +201 -201
  125. package/scripts/publicar.js +511 -511
  126. package/scripts/run-eval.js +141 -141
  127. package/scripts/validar-manifest.js +195 -195
  128. package/scripts/validar-userland-vacio.js +110 -110
  129. package/scripts/verificar-release.js +110 -0
@@ -1,98 +1,98 @@
1
- 'use strict';
2
-
3
- /**
4
- * jaccard-similarity.js — Métrica de Jaccard sobre conjuntos de tokens.
5
- *
6
- * Patrón adoptado de `temp/agentmemory-main/src/functions/auto-forget.ts`
7
- * para detectar memorias contradictorias/duplicadas con vocabulario compartido.
8
- *
9
- * Jaccard(A, B) = |A ∩ B| / |A ∪ B|
10
- *
11
- * Propiedades:
12
- * - Rango [0, 1]: 0 = sin overlap, 1 = idénticos.
13
- * - Simétrico: J(A, B) = J(B, A).
14
- * - Independiente de longitudes absolutas (ambos cortos pueden ser 1.0).
15
- *
16
- * Sin dependencias — Node stdlib only. Funciones puras.
17
- *
18
- * @module scripts/lib/jaccard-similarity
19
- */
20
-
21
- // ── constantes ────────────────────────────────────────────────────────────────
22
-
23
- /** Longitud mínima de un token para ser considerado significativo. */
24
- const MIN_TOKEN_LENGTH = 3;
25
-
26
- /** Stop words en español que se excluyen del análisis. */
27
- const STOP_WORDS = new Set([
28
- 'que', 'los', 'las', 'del', 'una', 'por', 'con', 'para', 'como',
29
- 'sin', 'mas', 'sus', 'lo', 'le', 'la', 'el', 'al', 'no', 'es',
30
- 'se', 'de', 'en', 'un', 'a', 'y', 'o', 'pero', 'cuando',
31
- 'donde', 'porque', 'desde', 'hasta', 'sobre', 'bajo', 'entre',
32
- 'esta', 'este', 'esto', 'esa', 'ese', 'eso', 'tras', 'durante',
33
- 'mediante', 'segun', 'asi', 'tan', 'ya', 'aun', 'aunque',
34
- // English equivalents (frequently mixed in technical text)
35
- 'the', 'and', 'for', 'with', 'this', 'that', 'have', 'from',
36
- 'are', 'was', 'will', 'not', 'has', 'had', 'but', 'can',
37
- ]);
38
-
39
- // ── funciones puras ───────────────────────────────────────────────────────────
40
-
41
- /**
42
- * Convierte un texto en un Set de tokens significativos (lowercase, sin stop
43
- * words, longitud mínima). Preserva acentos.
44
- *
45
- * @param {string} text
46
- * @returns {Set<string>}
47
- */
48
- function tokenize(text) {
49
- if (!text || typeof text !== 'string') return new Set();
50
- return new Set(
51
- String(text)
52
- .toLowerCase()
53
- .replace(/[`*_~\[\](){}<>#"'\-.,;:!?\/\\]/g, ' ')
54
- .split(/\s+/)
55
- .filter(t => t.length >= MIN_TOKEN_LENGTH && !STOP_WORDS.has(t)),
56
- );
57
- }
58
-
59
- /**
60
- * Jaccard similarity entre dos Sets.
61
- *
62
- * @param {Set} setA
63
- * @param {Set} setB
64
- * @returns {number} en [0, 1]
65
- */
66
- function jaccard(setA, setB) {
67
- if (!(setA instanceof Set) || !(setB instanceof Set)) return 0;
68
- if (setA.size === 0 && setB.size === 0) return 0;
69
- if (setA.size === 0 || setB.size === 0) return 0;
70
-
71
- let intersection = 0;
72
- for (const token of setA) {
73
- if (setB.has(token)) intersection++;
74
- }
75
- const union = setA.size + setB.size - intersection;
76
- return union === 0 ? 0 : intersection / union;
77
- }
78
-
79
- /**
80
- * Conveniencia: jaccard sobre dos textos.
81
- *
82
- * @param {string} a
83
- * @param {string} b
84
- * @returns {number} en [0, 1]
85
- */
86
- function similarity(a, b) {
87
- return jaccard(tokenize(a), tokenize(b));
88
- }
89
-
90
- // ── exports ───────────────────────────────────────────────────────────────────
91
-
92
- module.exports = {
93
- tokenize,
94
- jaccard,
95
- similarity,
96
- MIN_TOKEN_LENGTH,
97
- STOP_WORDS,
98
- };
1
+ 'use strict';
2
+
3
+ /**
4
+ * jaccard-similarity.js — Métrica de Jaccard sobre conjuntos de tokens.
5
+ *
6
+ * Patrón adoptado de `temp/agentmemory-main/src/functions/auto-forget.ts`
7
+ * para detectar memorias contradictorias/duplicadas con vocabulario compartido.
8
+ *
9
+ * Jaccard(A, B) = |A ∩ B| / |A ∪ B|
10
+ *
11
+ * Propiedades:
12
+ * - Rango [0, 1]: 0 = sin overlap, 1 = idénticos.
13
+ * - Simétrico: J(A, B) = J(B, A).
14
+ * - Independiente de longitudes absolutas (ambos cortos pueden ser 1.0).
15
+ *
16
+ * Sin dependencias — Node stdlib only. Funciones puras.
17
+ *
18
+ * @module scripts/lib/jaccard-similarity
19
+ */
20
+
21
+ // ── constantes ────────────────────────────────────────────────────────────────
22
+
23
+ /** Longitud mínima de un token para ser considerado significativo. */
24
+ const MIN_TOKEN_LENGTH = 3;
25
+
26
+ /** Stop words en español que se excluyen del análisis. */
27
+ const STOP_WORDS = new Set([
28
+ 'que', 'los', 'las', 'del', 'una', 'por', 'con', 'para', 'como',
29
+ 'sin', 'mas', 'sus', 'lo', 'le', 'la', 'el', 'al', 'no', 'es',
30
+ 'se', 'de', 'en', 'un', 'a', 'y', 'o', 'pero', 'cuando',
31
+ 'donde', 'porque', 'desde', 'hasta', 'sobre', 'bajo', 'entre',
32
+ 'esta', 'este', 'esto', 'esa', 'ese', 'eso', 'tras', 'durante',
33
+ 'mediante', 'segun', 'asi', 'tan', 'ya', 'aun', 'aunque',
34
+ // English equivalents (frequently mixed in technical text)
35
+ 'the', 'and', 'for', 'with', 'this', 'that', 'have', 'from',
36
+ 'are', 'was', 'will', 'not', 'has', 'had', 'but', 'can',
37
+ ]);
38
+
39
+ // ── funciones puras ───────────────────────────────────────────────────────────
40
+
41
+ /**
42
+ * Convierte un texto en un Set de tokens significativos (lowercase, sin stop
43
+ * words, longitud mínima). Preserva acentos.
44
+ *
45
+ * @param {string} text
46
+ * @returns {Set<string>}
47
+ */
48
+ function tokenize(text) {
49
+ if (!text || typeof text !== 'string') return new Set();
50
+ return new Set(
51
+ String(text)
52
+ .toLowerCase()
53
+ .replace(/[`*_~\[\](){}<>#"'\-.,;:!?\/\\]/g, ' ')
54
+ .split(/\s+/)
55
+ .filter(t => t.length >= MIN_TOKEN_LENGTH && !STOP_WORDS.has(t)),
56
+ );
57
+ }
58
+
59
+ /**
60
+ * Jaccard similarity entre dos Sets.
61
+ *
62
+ * @param {Set} setA
63
+ * @param {Set} setB
64
+ * @returns {number} en [0, 1]
65
+ */
66
+ function jaccard(setA, setB) {
67
+ if (!(setA instanceof Set) || !(setB instanceof Set)) return 0;
68
+ if (setA.size === 0 && setB.size === 0) return 0;
69
+ if (setA.size === 0 || setB.size === 0) return 0;
70
+
71
+ let intersection = 0;
72
+ for (const token of setA) {
73
+ if (setB.has(token)) intersection++;
74
+ }
75
+ const union = setA.size + setB.size - intersection;
76
+ return union === 0 ? 0 : intersection / union;
77
+ }
78
+
79
+ /**
80
+ * Conveniencia: jaccard sobre dos textos.
81
+ *
82
+ * @param {string} a
83
+ * @param {string} b
84
+ * @returns {number} en [0, 1]
85
+ */
86
+ function similarity(a, b) {
87
+ return jaccard(tokenize(a), tokenize(b));
88
+ }
89
+
90
+ // ── exports ───────────────────────────────────────────────────────────────────
91
+
92
+ module.exports = {
93
+ tokenize,
94
+ jaccard,
95
+ similarity,
96
+ MIN_TOKEN_LENGTH,
97
+ STOP_WORDS,
98
+ };
@@ -1,125 +1,125 @@
1
- 'use strict';
2
-
3
- /**
4
- * longmemeval-runner.js — Adapter que ejecuta queries del benchmark contra
5
- * `hooks/lib/memory-search` y devuelve métricas.
6
- *
7
- * Patrón adoptado de `temp/agentmemory-main/benchmark/longmemeval-bench.ts`.
8
- * Adaptado: en lugar de cargar haystack desde el dataset, usa el estado
9
- * actual del proyecto SWL (APRENDIZAJES.md, sesiones, instintos).
10
- *
11
- * El dataset es un JSONL donde cada línea es:
12
- * {
13
- * "question_id": "q-001",
14
- * "question": "texto libre de la query",
15
- * "gold_ids": ["apr-N", "ses-YYYY-MM-DD-HHMM"],
16
- * "category": "decision" | "patron" | "anti-patron" | "gotcha" | ...,
17
- * "status": "real" | "placeholder"
18
- * }
19
- *
20
- * @module scripts/lib/longmemeval-runner
21
- */
22
-
23
- const fs = require('fs');
24
- const path = require('path');
25
-
26
- const memorySearch = require('../../hooks/lib/memory-search');
27
- const benchmarkMetrics = require('./benchmark-metrics');
28
-
29
- // ── parser de dataset ─────────────────────────────────────────────────────────
30
-
31
- /**
32
- * Parsea un archivo JSONL del dataset.
33
- * @param {string} ruta
34
- * @returns {object[]}
35
- */
36
- function leerDataset(ruta) {
37
- if (!fs.existsSync(ruta)) {
38
- throw new Error(`Dataset no encontrado: ${ruta}`);
39
- }
40
- const contenido = fs.readFileSync(ruta, 'utf8');
41
- const entries = [];
42
- let lineNum = 0;
43
- for (const linea of contenido.split('\n')) {
44
- lineNum++;
45
- if (!linea.trim()) continue;
46
- if (linea.trim().startsWith('//')) continue; // comentarios
47
- try {
48
- entries.push(JSON.parse(linea));
49
- } catch (err) {
50
- throw new Error(`JSONL malformado en línea ${lineNum}: ${err.message}`);
51
- }
52
- }
53
- return entries;
54
- }
55
-
56
- // ── ejecución de query individual ─────────────────────────────────────────────
57
-
58
- /**
59
- * Ejecuta una query del benchmark contra memoria SWL y compara con gold.
60
- *
61
- * @param {string} baseDir - Raíz del proyecto.
62
- * @param {object} entry - Una línea del dataset.
63
- * @param {object} [opts]
64
- * @param {number} [opts.limit=20] - Top-k a recuperar.
65
- * @returns {object} Métricas + ids retrieved + entry original.
66
- */
67
- function ejecutarEntry(baseDir, entry, opts = {}) {
68
- const limit = opts.limit || 20;
69
- const inicio = Date.now();
70
- const resultados = memorySearch.search(baseDir, entry.question, { limit });
71
- const latencyMs = Date.now() - inicio;
72
-
73
- const retrievedIds = resultados.map(r => r.id);
74
- const goldIds = Array.isArray(entry.gold_ids) ? entry.gold_ids : [];
75
- const metricas = benchmarkMetrics.calcularMetricas(retrievedIds, goldIds);
76
-
77
- return {
78
- question_id: entry.question_id || 'unknown',
79
- question: entry.question,
80
- category: entry.category || null,
81
- status: entry.status || 'unknown',
82
- retrievedIds,
83
- goldIds,
84
- metricas,
85
- latencyMs,
86
- };
87
- }
88
-
89
- /**
90
- * Ejecuta el dataset completo y devuelve resultados + métricas agregadas.
91
- *
92
- * @param {string} baseDir
93
- * @param {string} datasetPath
94
- * @param {object} [opts]
95
- * @returns {{ entries: object[], promedio: object, dataset: object }}
96
- */
97
- function ejecutarDataset(baseDir, datasetPath, opts = {}) {
98
- const entries = leerDataset(datasetPath);
99
- const resultados = entries.map(e => ejecutarEntry(baseDir, e, opts));
100
- const promedio = benchmarkMetrics.promediar(resultados.map(r => r.metricas));
101
-
102
- // Estadísticas del dataset
103
- const placeholderCount = entries.filter(e => e.status === 'placeholder').length;
104
- const realCount = entries.filter(e => e.status === 'real').length;
105
- const datasetMeta = {
106
- total: entries.length,
107
- real: realCount,
108
- placeholder: placeholderCount,
109
- significativo: realCount >= 30,
110
- };
111
-
112
- return {
113
- entries: resultados,
114
- promedio,
115
- dataset: datasetMeta,
116
- };
117
- }
118
-
119
- // ── exports ───────────────────────────────────────────────────────────────────
120
-
121
- module.exports = {
122
- leerDataset,
123
- ejecutarEntry,
124
- ejecutarDataset,
125
- };
1
+ 'use strict';
2
+
3
+ /**
4
+ * longmemeval-runner.js — Adapter que ejecuta queries del benchmark contra
5
+ * `hooks/lib/memory-search` y devuelve métricas.
6
+ *
7
+ * Patrón adoptado de `temp/agentmemory-main/benchmark/longmemeval-bench.ts`.
8
+ * Adaptado: en lugar de cargar haystack desde el dataset, usa el estado
9
+ * actual del proyecto SWL (APRENDIZAJES.md, sesiones, instintos).
10
+ *
11
+ * El dataset es un JSONL donde cada línea es:
12
+ * {
13
+ * "question_id": "q-001",
14
+ * "question": "texto libre de la query",
15
+ * "gold_ids": ["apr-N", "ses-YYYY-MM-DD-HHMM"],
16
+ * "category": "decision" | "patron" | "anti-patron" | "gotcha" | ...,
17
+ * "status": "real" | "placeholder"
18
+ * }
19
+ *
20
+ * @module scripts/lib/longmemeval-runner
21
+ */
22
+
23
+ const fs = require('fs');
24
+ const path = require('path');
25
+
26
+ const memorySearch = require('../../hooks/lib/memory-search');
27
+ const benchmarkMetrics = require('./benchmark-metrics');
28
+
29
+ // ── parser de dataset ─────────────────────────────────────────────────────────
30
+
31
+ /**
32
+ * Parsea un archivo JSONL del dataset.
33
+ * @param {string} ruta
34
+ * @returns {object[]}
35
+ */
36
+ function leerDataset(ruta) {
37
+ if (!fs.existsSync(ruta)) {
38
+ throw new Error(`Dataset no encontrado: ${ruta}`);
39
+ }
40
+ const contenido = fs.readFileSync(ruta, 'utf8');
41
+ const entries = [];
42
+ let lineNum = 0;
43
+ for (const linea of contenido.split('\n')) {
44
+ lineNum++;
45
+ if (!linea.trim()) continue;
46
+ if (linea.trim().startsWith('//')) continue; // comentarios
47
+ try {
48
+ entries.push(JSON.parse(linea));
49
+ } catch (err) {
50
+ throw new Error(`JSONL malformado en línea ${lineNum}: ${err.message}`);
51
+ }
52
+ }
53
+ return entries;
54
+ }
55
+
56
+ // ── ejecución de query individual ─────────────────────────────────────────────
57
+
58
+ /**
59
+ * Ejecuta una query del benchmark contra memoria SWL y compara con gold.
60
+ *
61
+ * @param {string} baseDir - Raíz del proyecto.
62
+ * @param {object} entry - Una línea del dataset.
63
+ * @param {object} [opts]
64
+ * @param {number} [opts.limit=20] - Top-k a recuperar.
65
+ * @returns {object} Métricas + ids retrieved + entry original.
66
+ */
67
+ function ejecutarEntry(baseDir, entry, opts = {}) {
68
+ const limit = opts.limit || 20;
69
+ const inicio = Date.now();
70
+ const resultados = memorySearch.search(baseDir, entry.question, { limit });
71
+ const latencyMs = Date.now() - inicio;
72
+
73
+ const retrievedIds = resultados.map(r => r.id);
74
+ const goldIds = Array.isArray(entry.gold_ids) ? entry.gold_ids : [];
75
+ const metricas = benchmarkMetrics.calcularMetricas(retrievedIds, goldIds);
76
+
77
+ return {
78
+ question_id: entry.question_id || 'unknown',
79
+ question: entry.question,
80
+ category: entry.category || null,
81
+ status: entry.status || 'unknown',
82
+ retrievedIds,
83
+ goldIds,
84
+ metricas,
85
+ latencyMs,
86
+ };
87
+ }
88
+
89
+ /**
90
+ * Ejecuta el dataset completo y devuelve resultados + métricas agregadas.
91
+ *
92
+ * @param {string} baseDir
93
+ * @param {string} datasetPath
94
+ * @param {object} [opts]
95
+ * @returns {{ entries: object[], promedio: object, dataset: object }}
96
+ */
97
+ function ejecutarDataset(baseDir, datasetPath, opts = {}) {
98
+ const entries = leerDataset(datasetPath);
99
+ const resultados = entries.map(e => ejecutarEntry(baseDir, e, opts));
100
+ const promedio = benchmarkMetrics.promediar(resultados.map(r => r.metricas));
101
+
102
+ // Estadísticas del dataset
103
+ const placeholderCount = entries.filter(e => e.status === 'placeholder').length;
104
+ const realCount = entries.filter(e => e.status === 'real').length;
105
+ const datasetMeta = {
106
+ total: entries.length,
107
+ real: realCount,
108
+ placeholder: placeholderCount,
109
+ significativo: realCount >= 30,
110
+ };
111
+
112
+ return {
113
+ entries: resultados,
114
+ promedio,
115
+ dataset: datasetMeta,
116
+ };
117
+ }
118
+
119
+ // ── exports ───────────────────────────────────────────────────────────────────
120
+
121
+ module.exports = {
122
+ leerDataset,
123
+ ejecutarEntry,
124
+ ejecutarDataset,
125
+ };