@saulwade/swl-ses 1.4.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/CLAUDE.md +4 -3
  2. package/README.md +15 -14
  3. package/agentes/nemesis-auditor-swl.md +161 -0
  4. package/bin/swl-mcp-server.js +187 -187
  5. package/comandos/swl/.evolved.json +22 -22
  6. package/comandos/swl/contribuir.md +233 -233
  7. package/comandos/swl/nemesis.md +122 -0
  8. package/comandos/swl/salud.md +34 -0
  9. package/comandos/swl/verificar.md +45 -0
  10. package/gateway/lib/event-channel.js +191 -191
  11. package/habilidades/backend-production-resilience/SKILL.md +288 -288
  12. package/habilidades/benchmark-memoria/SKILL.md +186 -186
  13. package/habilidades/diagrama-arquitectura/assets/template.html +276 -276
  14. package/habilidades/doubt-driven-review/SKILL.md +171 -171
  15. package/habilidades/doubt-driven-review/recursos/EXAMPLES.md +130 -130
  16. package/habilidades/eval-framework/SKILL.md +212 -212
  17. package/habilidades/feynman-auditor-swl/SKILL.md +123 -0
  18. package/habilidades/feynman-auditor-swl/recursos/preguntas-language-agnostic.md +108 -0
  19. package/habilidades/harness-claude-code/SKILL.md +299 -299
  20. package/habilidades/infra-github-actions/SKILL.md +166 -166
  21. package/habilidades/legacy-code-rescue/SKILL.md +267 -267
  22. package/habilidades/manejo-errores/.evolved.json +8 -8
  23. package/habilidades/meta-skills-estandar/recursos/convencion-examples.md +93 -93
  24. package/habilidades/meta-skills-estandar/recursos/skills-as-agents.md +163 -163
  25. package/habilidades/patrones-python/SKILL.md +229 -229
  26. package/habilidades/patrones-python/recursos/patrones-avanzados.md +469 -469
  27. package/habilidades/planear-fase/SKILL.md +319 -319
  28. package/habilidades/release-semver/.evolved.json +8 -8
  29. package/habilidades/state-inconsistency-auditor-swl/SKILL.md +166 -0
  30. package/habilidades/state-inconsistency-auditor-swl/recursos/coupled-state-patterns.md +147 -0
  31. package/habilidades/testing-python/SKILL.md +340 -340
  32. package/habilidades/web-fetcher-routing/SKILL.md +75 -0
  33. package/hooks/claudemd-bloat-detector.js +161 -161
  34. package/hooks/lib/agent-routing.js +107 -107
  35. package/hooks/lib/auto-consolidator.js +335 -335
  36. package/hooks/lib/error-classifier.js +308 -308
  37. package/hooks/lib/merkle-audit.js +96 -96
  38. package/hooks/lib/provenance-tracker.js +191 -191
  39. package/hooks/lib/rate-limit-tracker.js +253 -253
  40. package/hooks/lib/resource-quota.js +122 -122
  41. package/hooks/lib/retry-jitter.js +165 -165
  42. package/hooks/lib/security-net.js +201 -0
  43. package/hooks/lib/skill-auditor.js +588 -588
  44. package/hooks/lib/sync-status.js +228 -228
  45. package/hooks/lib/taint-tracker.js +107 -107
  46. package/hooks/lib/text-similarity.js +241 -241
  47. package/hooks/lib/toon-compressor.js +245 -245
  48. package/hooks/registro-turnos.js +209 -209
  49. package/hooks/sugerir-regenerar-inventario.js +170 -170
  50. package/hooks/validar-formato-post-subagente.js +140 -140
  51. package/hooks/validar-memoria-hook.js +218 -218
  52. package/instintos/prompt-appendices.yaml +57 -57
  53. package/manifiestos/agent-output-schemas.json +57 -57
  54. package/manifiestos/modulos.json +41 -6
  55. package/manifiestos/perfiles.json +2 -1
  56. package/manifiestos/skills-lock.json +30 -9
  57. package/package.json +2 -2
  58. package/plantillas/auditor-veto-template.md +105 -105
  59. package/plantillas/github-workflows/README.md +47 -47
  60. package/plantillas/github-workflows/release-please.yml +44 -44
  61. package/plantillas/github-workflows/swl-ci.yml +107 -107
  62. package/plantillas/github-workflows/swl-security.yml +51 -51
  63. package/plugin.json +10 -2
  64. package/reglas/analisis-previo-tareas-grandes.md +172 -172
  65. package/reglas/arreglar-al-detectar.md +147 -147
  66. package/reglas/fragmentos-compartidos.md +152 -152
  67. package/reglas/harness-claude-code.md +213 -213
  68. package/reglas/usar-context7.md +226 -226
  69. package/schemas/diary-entry.schema.json +80 -80
  70. package/scripts/audit-tools/audit-history.js +330 -0
  71. package/scripts/audit-tools/bundle-tracker.js +290 -0
  72. package/scripts/audit-tools/canary-monitor.js +352 -0
  73. package/scripts/audit-tools/code-profiler.js +605 -0
  74. package/scripts/audit-tools/dep-doctor.js +320 -0
  75. package/scripts/audit-tools/env-validator.js +206 -0
  76. package/scripts/audit-tools/lib/fs-walk.js +48 -0
  77. package/scripts/audit-tools/lib/output.js +23 -0
  78. package/scripts/audit-tools/migration-checker.js +392 -0
  79. package/scripts/audit-tools/pentest-scanner.js +1436 -0
  80. package/scripts/benchmark-memoria.js +167 -167
  81. package/scripts/configurar-branch-protection.js +418 -418
  82. package/scripts/detectar-aprendizajes-duplicados.js +151 -151
  83. package/scripts/field-report.js +199 -199
  84. package/scripts/generar-checklists-consolidados.js +273 -273
  85. package/scripts/generar-inventario.js +420 -420
  86. package/scripts/generar-matriz-lenguajes.js +271 -271
  87. package/scripts/lib/artefactos-python.js +43 -43
  88. package/scripts/lib/benchmark-metrics.js +160 -160
  89. package/scripts/lib/budget-enforcer.js +252 -252
  90. package/scripts/lib/configurar-ci.js +380 -380
  91. package/scripts/lib/contadores-inventario.js +217 -217
  92. package/scripts/lib/detectar-stack-detallado.js +307 -307
  93. package/scripts/lib/diary-entry.js +234 -234
  94. package/scripts/lib/eval-metrics-store.js +218 -218
  95. package/scripts/lib/eval-quality.js +171 -171
  96. package/scripts/lib/eval-schemas.js +144 -144
  97. package/scripts/lib/eval-self-correct.js +106 -106
  98. package/scripts/lib/eval-validator.js +185 -185
  99. package/scripts/lib/jaccard-similarity.js +98 -98
  100. package/scripts/lib/longmemeval-runner.js +125 -125
  101. package/scripts/lib/manifiestos.js +42 -1
  102. package/scripts/lib/npm-version.js +261 -261
  103. package/scripts/lib/paquetes-conocidos.js +50 -50
  104. package/scripts/lib/prompt-builder.js +264 -264
  105. package/scripts/lib/rrf-fusion.js +175 -175
  106. package/scripts/lib/scoring-instintos.js +277 -277
  107. package/scripts/lib/semantic-search.js +252 -252
  108. package/scripts/limpiar-artefactos-python.js +131 -131
  109. package/scripts/mcp-server/README.md +128 -128
  110. package/scripts/mcp-server/handlers.js +206 -206
  111. package/scripts/migrar-csv-a-array.js +168 -168
  112. package/scripts/migrar-fase-dominio.js +201 -201
  113. package/scripts/publicar.js +511 -511
  114. package/scripts/run-eval.js +141 -141
  115. package/scripts/validar-manifest.js +231 -195
  116. package/scripts/validar-userland-vacio.js +110 -110
@@ -1,167 +1,167 @@
1
- #!/usr/bin/env node
2
- 'use strict';
3
-
4
- /**
5
- * benchmark-memoria.js — CLI runner para benchmark de retrieval sobre memoria SWL.
6
- *
7
- * Ejecuta queries de un dataset JSONL contra `hooks/lib/memory-search`
8
- * (que usa RRF fusion sobre aprendizajes/sesiones/instintos) y reporta
9
- * métricas R@5, R@10, R@20, MRR, nDCG@10, P@5.
10
- *
11
- * Patrón adoptado de `temp/agentmemory-main/benchmark/longmemeval-bench.ts`.
12
- * Adaptado para swl-ses: file-based, sin embeddings ML, dataset SWL-específico.
13
- *
14
- * Uso:
15
- * node scripts/benchmark-memoria.js [opciones]
16
- *
17
- * Opciones:
18
- * --dataset <ruta> Dataset JSONL (default: .planning/benchmark/dataset.jsonl)
19
- * --limit <n> Top-k a recuperar por query (default: 20)
20
- * --json Output en JSON (para scripts)
21
- * --verbose Detalle por query
22
- *
23
- * Exit codes:
24
- * 0 - OK
25
- * 1 - Error de I/O o dataset inválido
26
- * 2 - Argumentos inválidos
27
- *
28
- * Persistencia opcional: si se setea SWL_BENCHMARK_PERSIST=1, escribe
29
- * el resultado agregado a `.planning/evolucion/benchmark-memoria.jsonl`
30
- * para tracking histórico.
31
- */
32
-
33
- const fs = require('fs');
34
- const path = require('path');
35
-
36
- const { ejecutarDataset } = require('./lib/longmemeval-runner');
37
-
38
- const DATASET_DEFAULT = '.planning/benchmark/dataset.jsonl';
39
- const HISTORICO_PATH = '.planning/evolucion/benchmark-memoria.jsonl';
40
-
41
- function uso() {
42
- console.error('Uso: node scripts/benchmark-memoria.js [--dataset <ruta>] [--limit <n>] [--json] [--verbose]');
43
- process.exit(2);
44
- }
45
-
46
- function parseArgs(argv) {
47
- const opts = {
48
- dataset: DATASET_DEFAULT,
49
- limit: 20,
50
- json: false,
51
- verbose: false,
52
- };
53
- for (let i = 0; i < argv.length; i++) {
54
- const arg = argv[i];
55
- if (arg === '--dataset') opts.dataset = argv[++i];
56
- else if (arg === '--limit') opts.limit = parseInt(argv[++i], 10) || 20;
57
- else if (arg === '--json') opts.json = true;
58
- else if (arg === '--verbose') opts.verbose = true;
59
- else if (arg === '--help' || arg === '-h') uso();
60
- }
61
- return opts;
62
- }
63
-
64
- function persistirHistorico(baseDir, resumen) {
65
- if (process.env.SWL_BENCHMARK_PERSIST !== '1') return;
66
- try {
67
- const dirEvolucion = path.join(baseDir, '.planning', 'evolucion');
68
- if (!fs.existsSync(dirEvolucion)) fs.mkdirSync(dirEvolucion, { recursive: true });
69
- const linea = JSON.stringify({
70
- timestamp: new Date().toISOString(),
71
- ...resumen,
72
- });
73
- fs.appendFileSync(path.join(baseDir, HISTORICO_PATH), linea + '\n', 'utf8');
74
- } catch (_) {
75
- // best-effort
76
- }
77
- }
78
-
79
- function reportarTexto(resultado, opts) {
80
- const { promedio, dataset, entries } = resultado;
81
-
82
- console.log('================================================================');
83
- console.log(' Benchmark de retrieval de memoria SWL');
84
- console.log('================================================================');
85
- console.log('');
86
- console.log(`Dataset: ${opts.dataset}`);
87
- console.log(`Total queries: ${dataset.total}`);
88
- console.log(` Reales: ${dataset.real}`);
89
- console.log(` Placeholder: ${dataset.placeholder}`);
90
- console.log(`Significativo: ${dataset.significativo ? 'sí' : 'NO (requiere ≥30 reales)'}`);
91
- console.log('');
92
-
93
- if (!dataset.significativo) {
94
- console.log('⚠ ADVERTENCIA: dataset con menos de 30 queries reales.');
95
- console.log(' Las métricas son INDICATIVAS, no estadísticamente significativas.');
96
- console.log(' Para usar como gate de release, expandir el dataset con preguntas');
97
- console.log(' curadas extraídas de uso real (ver SKILL.md de benchmark-memoria).');
98
- console.log('');
99
- }
100
-
101
- console.log('────────────── Métricas agregadas ──────────────');
102
- console.log(` Recall @ 5: ${(promedio.recall_at_5 * 100).toFixed(1)}%`);
103
- console.log(` Recall @ 10: ${(promedio.recall_at_10 * 100).toFixed(1)}%`);
104
- console.log(` Recall @ 20: ${(promedio.recall_at_20 * 100).toFixed(1)}%`);
105
- console.log(` MRR: ${promedio.mrr.toFixed(3)}`);
106
- console.log(` nDCG @ 10: ${promedio.ndcg_at_10.toFixed(3)}`);
107
- console.log(` Precision @ 5: ${(promedio.precision_at_5 * 100).toFixed(1)}%`);
108
- console.log('');
109
-
110
- if (opts.verbose) {
111
- console.log('────────────── Detalle por query ──────────────');
112
- for (const r of entries) {
113
- const mark = r.metricas.recall_at_5 > 0 ? '✓' : '✗';
114
- console.log(` ${mark} ${r.question_id} [${r.category || 'n/a'}, ${r.status}] ` +
115
- `R@5=${r.metricas.recall_at_5} R@10=${r.metricas.recall_at_10} ` +
116
- `MRR=${r.metricas.mrr.toFixed(2)} (${r.latencyMs}ms)`);
117
- if (r.metricas.recall_at_5 === 0 && opts.verbose) {
118
- console.log(` Q: ${r.question.slice(0, 80)}`);
119
- console.log(` Gold: ${r.goldIds.slice(0, 3).join(', ')}${r.goldIds.length > 3 ? '...' : ''}`);
120
- console.log(` Retrieved: ${r.retrievedIds.slice(0, 5).join(', ')}`);
121
- }
122
- }
123
- console.log('');
124
- }
125
- }
126
-
127
- function main() {
128
- const opts = parseArgs(process.argv.slice(2));
129
- const baseDir = process.cwd();
130
-
131
- if (!fs.existsSync(opts.dataset)) {
132
- console.error(`Dataset no encontrado: ${opts.dataset}`);
133
- console.error(`Crea uno o usa el placeholder en ${DATASET_DEFAULT}.`);
134
- process.exit(1);
135
- }
136
-
137
- let resultado;
138
- try {
139
- resultado = ejecutarDataset(baseDir, opts.dataset, { limit: opts.limit });
140
- } catch (err) {
141
- console.error(`Error ejecutando benchmark: ${err.message}`);
142
- process.exit(1);
143
- }
144
-
145
- if (opts.json) {
146
- console.log(JSON.stringify(resultado, null, 2));
147
- } else {
148
- reportarTexto(resultado, opts);
149
- }
150
-
151
- persistirHistorico(baseDir, {
152
- dataset: opts.dataset,
153
- n: resultado.dataset.total,
154
- significativo: resultado.dataset.significativo,
155
- promedio: resultado.promedio,
156
- });
157
-
158
- process.exit(0);
159
- }
160
-
161
- if (require.main === module) {
162
- main();
163
- }
164
-
165
- module.exports = {
166
- parseArgs,
167
- };
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * benchmark-memoria.js — CLI runner para benchmark de retrieval sobre memoria SWL.
6
+ *
7
+ * Ejecuta queries de un dataset JSONL contra `hooks/lib/memory-search`
8
+ * (que usa RRF fusion sobre aprendizajes/sesiones/instintos) y reporta
9
+ * métricas R@5, R@10, R@20, MRR, nDCG@10, P@5.
10
+ *
11
+ * Patrón adoptado de `temp/agentmemory-main/benchmark/longmemeval-bench.ts`.
12
+ * Adaptado para swl-ses: file-based, sin embeddings ML, dataset SWL-específico.
13
+ *
14
+ * Uso:
15
+ * node scripts/benchmark-memoria.js [opciones]
16
+ *
17
+ * Opciones:
18
+ * --dataset <ruta> Dataset JSONL (default: .planning/benchmark/dataset.jsonl)
19
+ * --limit <n> Top-k a recuperar por query (default: 20)
20
+ * --json Output en JSON (para scripts)
21
+ * --verbose Detalle por query
22
+ *
23
+ * Exit codes:
24
+ * 0 - OK
25
+ * 1 - Error de I/O o dataset inválido
26
+ * 2 - Argumentos inválidos
27
+ *
28
+ * Persistencia opcional: si se setea SWL_BENCHMARK_PERSIST=1, escribe
29
+ * el resultado agregado a `.planning/evolucion/benchmark-memoria.jsonl`
30
+ * para tracking histórico.
31
+ */
32
+
33
+ const fs = require('fs');
34
+ const path = require('path');
35
+
36
+ const { ejecutarDataset } = require('./lib/longmemeval-runner');
37
+
38
+ const DATASET_DEFAULT = '.planning/benchmark/dataset.jsonl';
39
+ const HISTORICO_PATH = '.planning/evolucion/benchmark-memoria.jsonl';
40
+
41
+ function uso() {
42
+ console.error('Uso: node scripts/benchmark-memoria.js [--dataset <ruta>] [--limit <n>] [--json] [--verbose]');
43
+ process.exit(2);
44
+ }
45
+
46
+ function parseArgs(argv) {
47
+ const opts = {
48
+ dataset: DATASET_DEFAULT,
49
+ limit: 20,
50
+ json: false,
51
+ verbose: false,
52
+ };
53
+ for (let i = 0; i < argv.length; i++) {
54
+ const arg = argv[i];
55
+ if (arg === '--dataset') opts.dataset = argv[++i];
56
+ else if (arg === '--limit') opts.limit = parseInt(argv[++i], 10) || 20;
57
+ else if (arg === '--json') opts.json = true;
58
+ else if (arg === '--verbose') opts.verbose = true;
59
+ else if (arg === '--help' || arg === '-h') uso();
60
+ }
61
+ return opts;
62
+ }
63
+
64
+ function persistirHistorico(baseDir, resumen) {
65
+ if (process.env.SWL_BENCHMARK_PERSIST !== '1') return;
66
+ try {
67
+ const dirEvolucion = path.join(baseDir, '.planning', 'evolucion');
68
+ if (!fs.existsSync(dirEvolucion)) fs.mkdirSync(dirEvolucion, { recursive: true });
69
+ const linea = JSON.stringify({
70
+ timestamp: new Date().toISOString(),
71
+ ...resumen,
72
+ });
73
+ fs.appendFileSync(path.join(baseDir, HISTORICO_PATH), linea + '\n', 'utf8');
74
+ } catch (_) {
75
+ // best-effort
76
+ }
77
+ }
78
+
79
+ function reportarTexto(resultado, opts) {
80
+ const { promedio, dataset, entries } = resultado;
81
+
82
+ console.log('================================================================');
83
+ console.log(' Benchmark de retrieval de memoria SWL');
84
+ console.log('================================================================');
85
+ console.log('');
86
+ console.log(`Dataset: ${opts.dataset}`);
87
+ console.log(`Total queries: ${dataset.total}`);
88
+ console.log(` Reales: ${dataset.real}`);
89
+ console.log(` Placeholder: ${dataset.placeholder}`);
90
+ console.log(`Significativo: ${dataset.significativo ? 'sí' : 'NO (requiere ≥30 reales)'}`);
91
+ console.log('');
92
+
93
+ if (!dataset.significativo) {
94
+ console.log('⚠ ADVERTENCIA: dataset con menos de 30 queries reales.');
95
+ console.log(' Las métricas son INDICATIVAS, no estadísticamente significativas.');
96
+ console.log(' Para usar como gate de release, expandir el dataset con preguntas');
97
+ console.log(' curadas extraídas de uso real (ver SKILL.md de benchmark-memoria).');
98
+ console.log('');
99
+ }
100
+
101
+ console.log('────────────── Métricas agregadas ──────────────');
102
+ console.log(` Recall @ 5: ${(promedio.recall_at_5 * 100).toFixed(1)}%`);
103
+ console.log(` Recall @ 10: ${(promedio.recall_at_10 * 100).toFixed(1)}%`);
104
+ console.log(` Recall @ 20: ${(promedio.recall_at_20 * 100).toFixed(1)}%`);
105
+ console.log(` MRR: ${promedio.mrr.toFixed(3)}`);
106
+ console.log(` nDCG @ 10: ${promedio.ndcg_at_10.toFixed(3)}`);
107
+ console.log(` Precision @ 5: ${(promedio.precision_at_5 * 100).toFixed(1)}%`);
108
+ console.log('');
109
+
110
+ if (opts.verbose) {
111
+ console.log('────────────── Detalle por query ──────────────');
112
+ for (const r of entries) {
113
+ const mark = r.metricas.recall_at_5 > 0 ? '✓' : '✗';
114
+ console.log(` ${mark} ${r.question_id} [${r.category || 'n/a'}, ${r.status}] ` +
115
+ `R@5=${r.metricas.recall_at_5} R@10=${r.metricas.recall_at_10} ` +
116
+ `MRR=${r.metricas.mrr.toFixed(2)} (${r.latencyMs}ms)`);
117
+ if (r.metricas.recall_at_5 === 0 && opts.verbose) {
118
+ console.log(` Q: ${r.question.slice(0, 80)}`);
119
+ console.log(` Gold: ${r.goldIds.slice(0, 3).join(', ')}${r.goldIds.length > 3 ? '...' : ''}`);
120
+ console.log(` Retrieved: ${r.retrievedIds.slice(0, 5).join(', ')}`);
121
+ }
122
+ }
123
+ console.log('');
124
+ }
125
+ }
126
+
127
+ function main() {
128
+ const opts = parseArgs(process.argv.slice(2));
129
+ const baseDir = process.cwd();
130
+
131
+ if (!fs.existsSync(opts.dataset)) {
132
+ console.error(`Dataset no encontrado: ${opts.dataset}`);
133
+ console.error(`Crea uno o usa el placeholder en ${DATASET_DEFAULT}.`);
134
+ process.exit(1);
135
+ }
136
+
137
+ let resultado;
138
+ try {
139
+ resultado = ejecutarDataset(baseDir, opts.dataset, { limit: opts.limit });
140
+ } catch (err) {
141
+ console.error(`Error ejecutando benchmark: ${err.message}`);
142
+ process.exit(1);
143
+ }
144
+
145
+ if (opts.json) {
146
+ console.log(JSON.stringify(resultado, null, 2));
147
+ } else {
148
+ reportarTexto(resultado, opts);
149
+ }
150
+
151
+ persistirHistorico(baseDir, {
152
+ dataset: opts.dataset,
153
+ n: resultado.dataset.total,
154
+ significativo: resultado.dataset.significativo,
155
+ promedio: resultado.promedio,
156
+ });
157
+
158
+ process.exit(0);
159
+ }
160
+
161
+ if (require.main === module) {
162
+ main();
163
+ }
164
+
165
+ module.exports = {
166
+ parseArgs,
167
+ };