@saulwade/swl-ses 1.3.7 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +12 -4
- package/README.md +1 -1
- package/bin/swl-mcp-server.js +187 -187
- package/bin/swl-webhook-server.js +198 -0
- package/comandos/swl/.evolved.json +22 -22
- package/comandos/swl/adoptar-proyecto.md +21 -1
- package/comandos/swl/claudemd.md +14 -1
- package/comandos/swl/contribuir.md +233 -233
- package/comandos/swl/exportar-vault.md +207 -7
- package/comandos/swl/nuevo-proyecto.md +24 -2
- package/gateway/adapters/base.js +109 -0
- package/gateway/adapters/discord.js +167 -0
- package/gateway/adapters/email.js +221 -0
- package/gateway/adapters/slack.js +192 -0
- package/gateway/adapters/telegram.js +183 -0
- package/gateway/adapters/webhook.js +113 -0
- package/gateway/adapters/whatsapp.js +214 -0
- package/gateway/agent-executor.js +322 -0
- package/gateway/command-relay.js +271 -0
- package/gateway/cron/jobs.js +263 -0
- package/gateway/cron/scheduler.js +322 -0
- package/gateway/cron/store.js +335 -0
- package/gateway/index.js +320 -0
- package/gateway/lib/event-channel.js +191 -0
- package/gateway/session.js +131 -0
- package/gateway/webhook-server.js +324 -0
- package/habilidades/backend-production-resilience/SKILL.md +288 -288
- package/habilidades/benchmark-memoria/SKILL.md +186 -186
- package/habilidades/build-errors-nextjs/SKILL.md +55 -1
- package/habilidades/diagrama-arquitectura/assets/template.html +276 -276
- package/habilidades/doubt-driven-review/SKILL.md +171 -171
- package/habilidades/doubt-driven-review/recursos/EXAMPLES.md +130 -130
- package/habilidades/eval-framework/SKILL.md +212 -212
- package/habilidades/extractor-de-aprendizajes/SKILL.md +24 -10
- package/habilidades/harness-claude-code/SKILL.md +299 -299
- package/habilidades/infra-github-actions/SKILL.md +166 -166
- package/habilidades/legacy-code-rescue/SKILL.md +267 -267
- package/habilidades/manejo-errores/.evolved.json +8 -8
- package/habilidades/meta-skills-estandar/recursos/convencion-examples.md +93 -93
- package/habilidades/meta-skills-estandar/recursos/skills-as-agents.md +163 -163
- package/habilidades/nextjs-testing/SKILL.md +89 -5
- package/habilidades/node-experto/SKILL.md +37 -1
- package/habilidades/patrones-python/SKILL.md +229 -229
- package/habilidades/patrones-python/recursos/patrones-avanzados.md +469 -469
- package/habilidades/planear-fase/SKILL.md +319 -319
- package/habilidades/react-experto/SKILL.md +45 -4
- package/habilidades/release-semver/.evolved.json +8 -8
- package/habilidades/swl-claudemd/SKILL.md +15 -1
- package/habilidades/tdd-workflow/SKILL.md +36 -4
- package/habilidades/testing-python/SKILL.md +340 -340
- package/hooks/claudemd-bloat-detector.js +161 -161
- package/hooks/inyeccion-contexto.js +8 -3
- package/hooks/lib/agent-routing.js +107 -107
- package/hooks/lib/auto-consolidator.js +335 -335
- package/hooks/lib/error-classifier.js +308 -308
- package/hooks/lib/merkle-audit.js +96 -96
- package/hooks/lib/provenance-tracker.js +191 -191
- package/hooks/lib/rate-limit-ip.js +177 -0
- package/hooks/lib/rate-limit-tracker.js +253 -253
- package/hooks/lib/resource-quota.js +122 -122
- package/hooks/lib/retry-jitter.js +165 -165
- package/hooks/lib/skill-auditor.js +588 -588
- package/hooks/lib/sync-status.js +228 -228
- package/hooks/lib/taint-tracker.js +107 -107
- package/hooks/lib/text-similarity.js +241 -241
- package/hooks/lib/toon-compressor.js +245 -245
- package/hooks/lib/webhook-dedup.js +184 -0
- package/hooks/lib/webhook-verify.js +123 -0
- package/hooks/proteccion-rutas.js +120 -15
- package/hooks/registro-turnos.js +209 -209
- package/hooks/sugerir-regenerar-inventario.js +170 -170
- package/hooks/validar-formato-post-subagente.js +140 -140
- package/hooks/validar-memoria-hook.js +218 -218
- package/instintos/prompt-appendices.yaml +57 -57
- package/manifiestos/agent-output-schemas.json +57 -57
- package/manifiestos/modulos.json +1 -0
- package/manifiestos/skills-lock.json +37 -37
- package/package.json +5 -3
- package/plantillas/auditor-veto-template.md +105 -105
- package/plantillas/github-workflows/README.md +47 -47
- package/plantillas/github-workflows/release-please.yml +44 -44
- package/plantillas/github-workflows/swl-ci.yml +107 -107
- package/plantillas/github-workflows/swl-security.yml +51 -51
- package/plugin.json +1 -1
- package/reglas/analisis-previo-tareas-grandes.md +172 -172
- package/reglas/arreglar-al-detectar.md +147 -147
- package/reglas/fragmentos-compartidos.md +152 -152
- package/reglas/harness-claude-code.md +213 -213
- package/reglas/usar-context7.md +226 -226
- package/reglas/usar-sistema-swl.md +251 -0
- package/schemas/diary-entry.schema.json +80 -80
- package/scripts/benchmark-memoria.js +167 -167
- package/scripts/comandos/skills.js +251 -2
- package/scripts/configurar-branch-protection.js +418 -418
- package/scripts/detectar-aprendizajes-duplicados.js +151 -151
- package/scripts/field-report.js +199 -199
- package/scripts/generar-checklists-consolidados.js +273 -273
- package/scripts/generar-inventario.js +420 -420
- package/scripts/generar-matriz-lenguajes.js +271 -271
- package/scripts/lib/artefactos-python.js +43 -43
- package/scripts/lib/benchmark-metrics.js +160 -160
- package/scripts/lib/budget-enforcer.js +252 -252
- package/scripts/lib/configurar-ci.js +380 -380
- package/scripts/lib/contadores-inventario.js +217 -217
- package/scripts/lib/detectar-stack-detallado.js +307 -307
- package/scripts/lib/diary-entry.js +234 -234
- package/scripts/lib/eval-metrics-store.js +218 -218
- package/scripts/lib/eval-quality.js +171 -171
- package/scripts/lib/eval-schemas.js +144 -144
- package/scripts/lib/eval-self-correct.js +106 -106
- package/scripts/lib/eval-validator.js +185 -185
- package/scripts/lib/jaccard-similarity.js +98 -98
- package/scripts/lib/longmemeval-runner.js +125 -125
- package/scripts/lib/npm-version.js +261 -261
- package/scripts/lib/paquetes-conocidos.js +50 -50
- package/scripts/lib/prompt-builder.js +264 -264
- package/scripts/lib/rrf-fusion.js +175 -175
- package/scripts/lib/scoring-instintos.js +277 -277
- package/scripts/lib/semantic-search.js +252 -252
- package/scripts/limpiar-artefactos-python.js +131 -131
- package/scripts/mcp-server/README.md +128 -128
- package/scripts/mcp-server/handlers.js +206 -206
- package/scripts/migrar-csv-a-array.js +168 -168
- package/scripts/migrar-fase-dominio.js +201 -201
- package/scripts/publicar.js +511 -511
- package/scripts/run-eval.js +141 -141
- package/scripts/validar-manifest.js +195 -195
- package/scripts/validar-userland-vacio.js +110 -110
- package/scripts/verificar-release.js +110 -0
|
@@ -1,167 +1,167 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
'use strict';
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* benchmark-memoria.js — CLI runner para benchmark de retrieval sobre memoria SWL.
|
|
6
|
-
*
|
|
7
|
-
* Ejecuta queries de un dataset JSONL contra `hooks/lib/memory-search`
|
|
8
|
-
* (que usa RRF fusion sobre aprendizajes/sesiones/instintos) y reporta
|
|
9
|
-
* métricas R@5, R@10, R@20, MRR, nDCG@10, P@5.
|
|
10
|
-
*
|
|
11
|
-
* Patrón adoptado de `temp/agentmemory-main/benchmark/longmemeval-bench.ts`.
|
|
12
|
-
* Adaptado para swl-ses: file-based, sin embeddings ML, dataset SWL-específico.
|
|
13
|
-
*
|
|
14
|
-
* Uso:
|
|
15
|
-
* node scripts/benchmark-memoria.js [opciones]
|
|
16
|
-
*
|
|
17
|
-
* Opciones:
|
|
18
|
-
* --dataset <ruta> Dataset JSONL (default: .planning/benchmark/dataset.jsonl)
|
|
19
|
-
* --limit <n> Top-k a recuperar por query (default: 20)
|
|
20
|
-
* --json Output en JSON (para scripts)
|
|
21
|
-
* --verbose Detalle por query
|
|
22
|
-
*
|
|
23
|
-
* Exit codes:
|
|
24
|
-
* 0 - OK
|
|
25
|
-
* 1 - Error de I/O o dataset inválido
|
|
26
|
-
* 2 - Argumentos inválidos
|
|
27
|
-
*
|
|
28
|
-
* Persistencia opcional: si se setea SWL_BENCHMARK_PERSIST=1, escribe
|
|
29
|
-
* el resultado agregado a `.planning/evolucion/benchmark-memoria.jsonl`
|
|
30
|
-
* para tracking histórico.
|
|
31
|
-
*/
|
|
32
|
-
|
|
33
|
-
const fs = require('fs');
|
|
34
|
-
const path = require('path');
|
|
35
|
-
|
|
36
|
-
const { ejecutarDataset } = require('./lib/longmemeval-runner');
|
|
37
|
-
|
|
38
|
-
const DATASET_DEFAULT = '.planning/benchmark/dataset.jsonl';
|
|
39
|
-
const HISTORICO_PATH = '.planning/evolucion/benchmark-memoria.jsonl';
|
|
40
|
-
|
|
41
|
-
function uso() {
|
|
42
|
-
console.error('Uso: node scripts/benchmark-memoria.js [--dataset <ruta>] [--limit <n>] [--json] [--verbose]');
|
|
43
|
-
process.exit(2);
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
function parseArgs(argv) {
|
|
47
|
-
const opts = {
|
|
48
|
-
dataset: DATASET_DEFAULT,
|
|
49
|
-
limit: 20,
|
|
50
|
-
json: false,
|
|
51
|
-
verbose: false,
|
|
52
|
-
};
|
|
53
|
-
for (let i = 0; i < argv.length; i++) {
|
|
54
|
-
const arg = argv[i];
|
|
55
|
-
if (arg === '--dataset') opts.dataset = argv[++i];
|
|
56
|
-
else if (arg === '--limit') opts.limit = parseInt(argv[++i], 10) || 20;
|
|
57
|
-
else if (arg === '--json') opts.json = true;
|
|
58
|
-
else if (arg === '--verbose') opts.verbose = true;
|
|
59
|
-
else if (arg === '--help' || arg === '-h') uso();
|
|
60
|
-
}
|
|
61
|
-
return opts;
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
function persistirHistorico(baseDir, resumen) {
|
|
65
|
-
if (process.env.SWL_BENCHMARK_PERSIST !== '1') return;
|
|
66
|
-
try {
|
|
67
|
-
const dirEvolucion = path.join(baseDir, '.planning', 'evolucion');
|
|
68
|
-
if (!fs.existsSync(dirEvolucion)) fs.mkdirSync(dirEvolucion, { recursive: true });
|
|
69
|
-
const linea = JSON.stringify({
|
|
70
|
-
timestamp: new Date().toISOString(),
|
|
71
|
-
...resumen,
|
|
72
|
-
});
|
|
73
|
-
fs.appendFileSync(path.join(baseDir, HISTORICO_PATH), linea + '\n', 'utf8');
|
|
74
|
-
} catch (_) {
|
|
75
|
-
// best-effort
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
function reportarTexto(resultado, opts) {
|
|
80
|
-
const { promedio, dataset, entries } = resultado;
|
|
81
|
-
|
|
82
|
-
console.log('================================================================');
|
|
83
|
-
console.log(' Benchmark de retrieval de memoria SWL');
|
|
84
|
-
console.log('================================================================');
|
|
85
|
-
console.log('');
|
|
86
|
-
console.log(`Dataset: ${opts.dataset}`);
|
|
87
|
-
console.log(`Total queries: ${dataset.total}`);
|
|
88
|
-
console.log(` Reales: ${dataset.real}`);
|
|
89
|
-
console.log(` Placeholder: ${dataset.placeholder}`);
|
|
90
|
-
console.log(`Significativo: ${dataset.significativo ? 'sí' : 'NO (requiere ≥30 reales)'}`);
|
|
91
|
-
console.log('');
|
|
92
|
-
|
|
93
|
-
if (!dataset.significativo) {
|
|
94
|
-
console.log('⚠ ADVERTENCIA: dataset con menos de 30 queries reales.');
|
|
95
|
-
console.log(' Las métricas son INDICATIVAS, no estadísticamente significativas.');
|
|
96
|
-
console.log(' Para usar como gate de release, expandir el dataset con preguntas');
|
|
97
|
-
console.log(' curadas extraídas de uso real (ver SKILL.md de benchmark-memoria).');
|
|
98
|
-
console.log('');
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
console.log('────────────── Métricas agregadas ──────────────');
|
|
102
|
-
console.log(` Recall @ 5: ${(promedio.recall_at_5 * 100).toFixed(1)}%`);
|
|
103
|
-
console.log(` Recall @ 10: ${(promedio.recall_at_10 * 100).toFixed(1)}%`);
|
|
104
|
-
console.log(` Recall @ 20: ${(promedio.recall_at_20 * 100).toFixed(1)}%`);
|
|
105
|
-
console.log(` MRR: ${promedio.mrr.toFixed(3)}`);
|
|
106
|
-
console.log(` nDCG @ 10: ${promedio.ndcg_at_10.toFixed(3)}`);
|
|
107
|
-
console.log(` Precision @ 5: ${(promedio.precision_at_5 * 100).toFixed(1)}%`);
|
|
108
|
-
console.log('');
|
|
109
|
-
|
|
110
|
-
if (opts.verbose) {
|
|
111
|
-
console.log('────────────── Detalle por query ──────────────');
|
|
112
|
-
for (const r of entries) {
|
|
113
|
-
const mark = r.metricas.recall_at_5 > 0 ? '✓' : '✗';
|
|
114
|
-
console.log(` ${mark} ${r.question_id} [${r.category || 'n/a'}, ${r.status}] ` +
|
|
115
|
-
`R@5=${r.metricas.recall_at_5} R@10=${r.metricas.recall_at_10} ` +
|
|
116
|
-
`MRR=${r.metricas.mrr.toFixed(2)} (${r.latencyMs}ms)`);
|
|
117
|
-
if (r.metricas.recall_at_5 === 0 && opts.verbose) {
|
|
118
|
-
console.log(` Q: ${r.question.slice(0, 80)}`);
|
|
119
|
-
console.log(` Gold: ${r.goldIds.slice(0, 3).join(', ')}${r.goldIds.length > 3 ? '...' : ''}`);
|
|
120
|
-
console.log(` Retrieved: ${r.retrievedIds.slice(0, 5).join(', ')}`);
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
console.log('');
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
function main() {
|
|
128
|
-
const opts = parseArgs(process.argv.slice(2));
|
|
129
|
-
const baseDir = process.cwd();
|
|
130
|
-
|
|
131
|
-
if (!fs.existsSync(opts.dataset)) {
|
|
132
|
-
console.error(`Dataset no encontrado: ${opts.dataset}`);
|
|
133
|
-
console.error(`Crea uno o usa el placeholder en ${DATASET_DEFAULT}.`);
|
|
134
|
-
process.exit(1);
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
let resultado;
|
|
138
|
-
try {
|
|
139
|
-
resultado = ejecutarDataset(baseDir, opts.dataset, { limit: opts.limit });
|
|
140
|
-
} catch (err) {
|
|
141
|
-
console.error(`Error ejecutando benchmark: ${err.message}`);
|
|
142
|
-
process.exit(1);
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
if (opts.json) {
|
|
146
|
-
console.log(JSON.stringify(resultado, null, 2));
|
|
147
|
-
} else {
|
|
148
|
-
reportarTexto(resultado, opts);
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
persistirHistorico(baseDir, {
|
|
152
|
-
dataset: opts.dataset,
|
|
153
|
-
n: resultado.dataset.total,
|
|
154
|
-
significativo: resultado.dataset.significativo,
|
|
155
|
-
promedio: resultado.promedio,
|
|
156
|
-
});
|
|
157
|
-
|
|
158
|
-
process.exit(0);
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
if (require.main === module) {
|
|
162
|
-
main();
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
module.exports = {
|
|
166
|
-
parseArgs,
|
|
167
|
-
};
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* benchmark-memoria.js — CLI runner para benchmark de retrieval sobre memoria SWL.
|
|
6
|
+
*
|
|
7
|
+
* Ejecuta queries de un dataset JSONL contra `hooks/lib/memory-search`
|
|
8
|
+
* (que usa RRF fusion sobre aprendizajes/sesiones/instintos) y reporta
|
|
9
|
+
* métricas R@5, R@10, R@20, MRR, nDCG@10, P@5.
|
|
10
|
+
*
|
|
11
|
+
* Patrón adoptado de `temp/agentmemory-main/benchmark/longmemeval-bench.ts`.
|
|
12
|
+
* Adaptado para swl-ses: file-based, sin embeddings ML, dataset SWL-específico.
|
|
13
|
+
*
|
|
14
|
+
* Uso:
|
|
15
|
+
* node scripts/benchmark-memoria.js [opciones]
|
|
16
|
+
*
|
|
17
|
+
* Opciones:
|
|
18
|
+
* --dataset <ruta> Dataset JSONL (default: .planning/benchmark/dataset.jsonl)
|
|
19
|
+
* --limit <n> Top-k a recuperar por query (default: 20)
|
|
20
|
+
* --json Output en JSON (para scripts)
|
|
21
|
+
* --verbose Detalle por query
|
|
22
|
+
*
|
|
23
|
+
* Exit codes:
|
|
24
|
+
* 0 - OK
|
|
25
|
+
* 1 - Error de I/O o dataset inválido
|
|
26
|
+
* 2 - Argumentos inválidos
|
|
27
|
+
*
|
|
28
|
+
* Persistencia opcional: si se setea SWL_BENCHMARK_PERSIST=1, escribe
|
|
29
|
+
* el resultado agregado a `.planning/evolucion/benchmark-memoria.jsonl`
|
|
30
|
+
* para tracking histórico.
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
const fs = require('fs');
|
|
34
|
+
const path = require('path');
|
|
35
|
+
|
|
36
|
+
const { ejecutarDataset } = require('./lib/longmemeval-runner');
|
|
37
|
+
|
|
38
|
+
const DATASET_DEFAULT = '.planning/benchmark/dataset.jsonl';
|
|
39
|
+
const HISTORICO_PATH = '.planning/evolucion/benchmark-memoria.jsonl';
|
|
40
|
+
|
|
41
|
+
function uso() {
|
|
42
|
+
console.error('Uso: node scripts/benchmark-memoria.js [--dataset <ruta>] [--limit <n>] [--json] [--verbose]');
|
|
43
|
+
process.exit(2);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function parseArgs(argv) {
|
|
47
|
+
const opts = {
|
|
48
|
+
dataset: DATASET_DEFAULT,
|
|
49
|
+
limit: 20,
|
|
50
|
+
json: false,
|
|
51
|
+
verbose: false,
|
|
52
|
+
};
|
|
53
|
+
for (let i = 0; i < argv.length; i++) {
|
|
54
|
+
const arg = argv[i];
|
|
55
|
+
if (arg === '--dataset') opts.dataset = argv[++i];
|
|
56
|
+
else if (arg === '--limit') opts.limit = parseInt(argv[++i], 10) || 20;
|
|
57
|
+
else if (arg === '--json') opts.json = true;
|
|
58
|
+
else if (arg === '--verbose') opts.verbose = true;
|
|
59
|
+
else if (arg === '--help' || arg === '-h') uso();
|
|
60
|
+
}
|
|
61
|
+
return opts;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function persistirHistorico(baseDir, resumen) {
|
|
65
|
+
if (process.env.SWL_BENCHMARK_PERSIST !== '1') return;
|
|
66
|
+
try {
|
|
67
|
+
const dirEvolucion = path.join(baseDir, '.planning', 'evolucion');
|
|
68
|
+
if (!fs.existsSync(dirEvolucion)) fs.mkdirSync(dirEvolucion, { recursive: true });
|
|
69
|
+
const linea = JSON.stringify({
|
|
70
|
+
timestamp: new Date().toISOString(),
|
|
71
|
+
...resumen,
|
|
72
|
+
});
|
|
73
|
+
fs.appendFileSync(path.join(baseDir, HISTORICO_PATH), linea + '\n', 'utf8');
|
|
74
|
+
} catch (_) {
|
|
75
|
+
// best-effort
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function reportarTexto(resultado, opts) {
|
|
80
|
+
const { promedio, dataset, entries } = resultado;
|
|
81
|
+
|
|
82
|
+
console.log('================================================================');
|
|
83
|
+
console.log(' Benchmark de retrieval de memoria SWL');
|
|
84
|
+
console.log('================================================================');
|
|
85
|
+
console.log('');
|
|
86
|
+
console.log(`Dataset: ${opts.dataset}`);
|
|
87
|
+
console.log(`Total queries: ${dataset.total}`);
|
|
88
|
+
console.log(` Reales: ${dataset.real}`);
|
|
89
|
+
console.log(` Placeholder: ${dataset.placeholder}`);
|
|
90
|
+
console.log(`Significativo: ${dataset.significativo ? 'sí' : 'NO (requiere ≥30 reales)'}`);
|
|
91
|
+
console.log('');
|
|
92
|
+
|
|
93
|
+
if (!dataset.significativo) {
|
|
94
|
+
console.log('⚠ ADVERTENCIA: dataset con menos de 30 queries reales.');
|
|
95
|
+
console.log(' Las métricas son INDICATIVAS, no estadísticamente significativas.');
|
|
96
|
+
console.log(' Para usar como gate de release, expandir el dataset con preguntas');
|
|
97
|
+
console.log(' curadas extraídas de uso real (ver SKILL.md de benchmark-memoria).');
|
|
98
|
+
console.log('');
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
console.log('────────────── Métricas agregadas ──────────────');
|
|
102
|
+
console.log(` Recall @ 5: ${(promedio.recall_at_5 * 100).toFixed(1)}%`);
|
|
103
|
+
console.log(` Recall @ 10: ${(promedio.recall_at_10 * 100).toFixed(1)}%`);
|
|
104
|
+
console.log(` Recall @ 20: ${(promedio.recall_at_20 * 100).toFixed(1)}%`);
|
|
105
|
+
console.log(` MRR: ${promedio.mrr.toFixed(3)}`);
|
|
106
|
+
console.log(` nDCG @ 10: ${promedio.ndcg_at_10.toFixed(3)}`);
|
|
107
|
+
console.log(` Precision @ 5: ${(promedio.precision_at_5 * 100).toFixed(1)}%`);
|
|
108
|
+
console.log('');
|
|
109
|
+
|
|
110
|
+
if (opts.verbose) {
|
|
111
|
+
console.log('────────────── Detalle por query ──────────────');
|
|
112
|
+
for (const r of entries) {
|
|
113
|
+
const mark = r.metricas.recall_at_5 > 0 ? '✓' : '✗';
|
|
114
|
+
console.log(` ${mark} ${r.question_id} [${r.category || 'n/a'}, ${r.status}] ` +
|
|
115
|
+
`R@5=${r.metricas.recall_at_5} R@10=${r.metricas.recall_at_10} ` +
|
|
116
|
+
`MRR=${r.metricas.mrr.toFixed(2)} (${r.latencyMs}ms)`);
|
|
117
|
+
if (r.metricas.recall_at_5 === 0 && opts.verbose) {
|
|
118
|
+
console.log(` Q: ${r.question.slice(0, 80)}`);
|
|
119
|
+
console.log(` Gold: ${r.goldIds.slice(0, 3).join(', ')}${r.goldIds.length > 3 ? '...' : ''}`);
|
|
120
|
+
console.log(` Retrieved: ${r.retrievedIds.slice(0, 5).join(', ')}`);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
console.log('');
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function main() {
|
|
128
|
+
const opts = parseArgs(process.argv.slice(2));
|
|
129
|
+
const baseDir = process.cwd();
|
|
130
|
+
|
|
131
|
+
if (!fs.existsSync(opts.dataset)) {
|
|
132
|
+
console.error(`Dataset no encontrado: ${opts.dataset}`);
|
|
133
|
+
console.error(`Crea uno o usa el placeholder en ${DATASET_DEFAULT}.`);
|
|
134
|
+
process.exit(1);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
let resultado;
|
|
138
|
+
try {
|
|
139
|
+
resultado = ejecutarDataset(baseDir, opts.dataset, { limit: opts.limit });
|
|
140
|
+
} catch (err) {
|
|
141
|
+
console.error(`Error ejecutando benchmark: ${err.message}`);
|
|
142
|
+
process.exit(1);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
if (opts.json) {
|
|
146
|
+
console.log(JSON.stringify(resultado, null, 2));
|
|
147
|
+
} else {
|
|
148
|
+
reportarTexto(resultado, opts);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
persistirHistorico(baseDir, {
|
|
152
|
+
dataset: opts.dataset,
|
|
153
|
+
n: resultado.dataset.total,
|
|
154
|
+
significativo: resultado.dataset.significativo,
|
|
155
|
+
promedio: resultado.promedio,
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
process.exit(0);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (require.main === module) {
|
|
162
|
+
main();
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
module.exports = {
|
|
166
|
+
parseArgs,
|
|
167
|
+
};
|
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
const { spawnSync } = require('child_process');
|
|
6
|
+
|
|
3
7
|
const { listarComponentes, agregarComponente } = require('../lib/gestor-componentes');
|
|
4
8
|
const { descubrirSkills, limpiarDescubrimiento } = require('../lib/resolver-externo');
|
|
5
9
|
const { selectorCheckbox, mostrarBanner, mostrarInfoFuente } = require('../lib/selector-interactivo');
|
|
10
|
+
const { atomicWriteJSON } = require('../../hooks/lib/atomic-write');
|
|
6
11
|
|
|
7
12
|
const AYUDA = `
|
|
8
13
|
swl-ses skills — Gestión de skills
|
|
@@ -13,16 +18,30 @@ USO:
|
|
|
13
18
|
SUBCOMANDOS:
|
|
14
19
|
list Lista skills instalados
|
|
15
20
|
add <fuente> [--skill <n>] Agrega skill desde repo Git o path local
|
|
21
|
+
auto Auto-detecta stack e instala skills oficiales
|
|
22
|
+
de comunidad (envuelve "npx autoskills" con
|
|
23
|
+
"-a claude-code" por default para evitar
|
|
24
|
+
duplicar en .agents/skills/)
|
|
16
25
|
remove <nombre> Remueve un skill
|
|
17
26
|
|
|
18
|
-
OPCIONES:
|
|
27
|
+
OPCIONES (add):
|
|
19
28
|
--target <runtime> Runtime destino (default: claude)
|
|
20
29
|
--skill <nombre> Nombre del skill a instalar directamente (sin selector)
|
|
21
30
|
--global Operar en directorio global
|
|
22
31
|
--force Sobreescribir sin confirmar
|
|
23
32
|
--all Instalar todos los skills sin preguntar
|
|
24
33
|
|
|
25
|
-
|
|
34
|
+
OPCIONES (auto):
|
|
35
|
+
--dry-run Solo mostrar qué se instalaría (no instala)
|
|
36
|
+
-y, --yes Sin confirmación interactiva
|
|
37
|
+
--runtimes <csv> Lista de runtimes destino (default: claude-code).
|
|
38
|
+
Ejemplos: claude-code,cursor,codex,opencode,gemini-cli
|
|
39
|
+
--clean-agents Eliminar .agents/skills/ residual tras instalar
|
|
40
|
+
(útil si se ejecutó "npx autoskills" sin flag antes
|
|
41
|
+
y solo se usa Claude Code en este proyecto)
|
|
42
|
+
--verbose Mostrar output completo de autoskills
|
|
43
|
+
|
|
44
|
+
FUENTES SOPORTADAS (add):
|
|
26
45
|
owner/repo GitHub shorthand
|
|
27
46
|
github:user/repo GitHub con prefijo
|
|
28
47
|
https://github.com/user/repo URL completa
|
|
@@ -34,6 +53,11 @@ EJEMPLOS:
|
|
|
34
53
|
swl-ses skills add anthropics/skills --skill docx
|
|
35
54
|
swl-ses skills add https://github.com/user/repo --all
|
|
36
55
|
swl-ses skills add ./path/local/mi-skill
|
|
56
|
+
swl-ses skills auto # Auto-detectar y instalar
|
|
57
|
+
swl-ses skills auto --dry-run # Solo ver qué instalaría
|
|
58
|
+
swl-ses skills auto -y # Sin confirmación
|
|
59
|
+
swl-ses skills auto --runtimes claude-code,cursor # Multi-runtime
|
|
60
|
+
swl-ses skills auto --clean-agents # Instalar + limpiar residuos
|
|
37
61
|
swl-ses skills remove mi-skill
|
|
38
62
|
`;
|
|
39
63
|
|
|
@@ -75,6 +99,11 @@ async function skills(subcomando, opciones) {
|
|
|
75
99
|
break;
|
|
76
100
|
}
|
|
77
101
|
|
|
102
|
+
case 'auto': {
|
|
103
|
+
await ejecutarAuto(opciones);
|
|
104
|
+
break;
|
|
105
|
+
}
|
|
106
|
+
|
|
78
107
|
case 'remove': {
|
|
79
108
|
const nombre = opciones._args && opciones._args[0];
|
|
80
109
|
if (!nombre) {
|
|
@@ -208,4 +237,224 @@ function mostrarResultadoInstalacion(resultado) {
|
|
|
208
237
|
console.log('');
|
|
209
238
|
}
|
|
210
239
|
|
|
240
|
+
// ---------------------------------------------------------------------------
|
|
241
|
+
// `skills auto` — wrapper de npx autoskills con default Claude Code-only
|
|
242
|
+
// ---------------------------------------------------------------------------
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Construye los argumentos para `npx autoskills` a partir de las opciones
|
|
246
|
+
* del subcomando `skills auto`. Default fuerza `-a claude-code` para evitar
|
|
247
|
+
* duplicación en .agents/skills/ cuando el proyecto solo usa Claude Code.
|
|
248
|
+
*
|
|
249
|
+
* @param {object} opciones
|
|
250
|
+
* @returns {string[]} argv para npx
|
|
251
|
+
*/
|
|
252
|
+
function construirArgsAutoskills(opciones) {
|
|
253
|
+
const args = ['autoskills'];
|
|
254
|
+
|
|
255
|
+
if (opciones['dry-run']) args.push('--dry-run');
|
|
256
|
+
if (opciones.yes || opciones.y) args.push('-y');
|
|
257
|
+
if (opciones.verbose) args.push('-v');
|
|
258
|
+
|
|
259
|
+
// Distinguir "no especificó --runtimes" (usar default) de "--runtimes ''"
|
|
260
|
+
// (uso explícito inválido — lanza error).
|
|
261
|
+
const especificado = Object.prototype.hasOwnProperty.call(opciones, 'runtimes');
|
|
262
|
+
const runtimesRaw = especificado ? opciones.runtimes : 'claude-code';
|
|
263
|
+
const runtimes = String(runtimesRaw)
|
|
264
|
+
.split(',')
|
|
265
|
+
.map(s => s.trim())
|
|
266
|
+
.filter(Boolean);
|
|
267
|
+
|
|
268
|
+
if (runtimes.length === 0) {
|
|
269
|
+
throw new Error('--runtimes no puede estar vacío');
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
args.push('-a', ...runtimes);
|
|
273
|
+
return args;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/**
|
|
277
|
+
* Captura un snapshot de los directorios de skills (.claude/skills/ y
|
|
278
|
+
* .agents/skills/) para mostrar el delta tras la instalación.
|
|
279
|
+
*/
|
|
280
|
+
function snapshotSkills(cwd) {
|
|
281
|
+
const recolectar = (dir) => {
|
|
282
|
+
try {
|
|
283
|
+
if (!fs.existsSync(dir)) return [];
|
|
284
|
+
return fs.readdirSync(dir, { withFileTypes: true })
|
|
285
|
+
.filter(d => d.isDirectory())
|
|
286
|
+
.map(d => d.name)
|
|
287
|
+
.sort();
|
|
288
|
+
} catch (_) {
|
|
289
|
+
return [];
|
|
290
|
+
}
|
|
291
|
+
};
|
|
292
|
+
return {
|
|
293
|
+
claude: recolectar(path.join(cwd, '.claude', 'skills')),
|
|
294
|
+
agents: recolectar(path.join(cwd, '.agents', 'skills')),
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
function delta(antes, despues) {
|
|
299
|
+
const set = new Set(antes);
|
|
300
|
+
return despues.filter(n => !set.has(n));
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* Path del marker que indica que `skills auto` se ejecutó. El hook
|
|
305
|
+
* inyeccion-contexto.js revisa este archivo para evitar sugerir la
|
|
306
|
+
* instalación cuando ya se hizo.
|
|
307
|
+
*/
|
|
308
|
+
function rutaMarkerAutoskills(cwd) {
|
|
309
|
+
return path.join(cwd, '.claude', 'skills', '.autoskills-installed.json');
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
function escribirMarkerAutoskills(cwd, datos) {
|
|
313
|
+
const ruta = rutaMarkerAutoskills(cwd);
|
|
314
|
+
// Crear el directorio padre primero — atomicWriteJSON espera que exista.
|
|
315
|
+
// Usar atomicWriteJSON de hooks/lib/atomic-write.js (CLAUDE.md: "Escrituras
|
|
316
|
+
// atómicas obligatorias [...] NUNCA fs.writeFileSync directo en archivos
|
|
317
|
+
// del sistema"). El marker es archivo del sistema porque hooks/inyeccion-
|
|
318
|
+
// contexto.js lo lee como señal canónica.
|
|
319
|
+
fs.mkdirSync(path.dirname(ruta), { recursive: true });
|
|
320
|
+
atomicWriteJSON(ruta, datos, 2);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
function leerMarkerAutoskills(cwd) {
|
|
324
|
+
try {
|
|
325
|
+
const ruta = rutaMarkerAutoskills(cwd);
|
|
326
|
+
if (!fs.existsSync(ruta)) return null;
|
|
327
|
+
return JSON.parse(fs.readFileSync(ruta, 'utf8'));
|
|
328
|
+
} catch (_) {
|
|
329
|
+
return null;
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
function leerVersionCli() {
|
|
334
|
+
try {
|
|
335
|
+
return require('../../package.json').version;
|
|
336
|
+
} catch (_) {
|
|
337
|
+
return 'unknown';
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Elimina .agents/skills/ si existe. La carpeta es huérfana cuando el
|
|
343
|
+
* proyecto solo usa Claude Code y autoskills la creó por defecto sin el
|
|
344
|
+
* flag -a claude-code.
|
|
345
|
+
*/
|
|
346
|
+
function limpiarAgentsSkills(cwd) {
|
|
347
|
+
const dir = path.join(cwd, '.agents', 'skills');
|
|
348
|
+
if (!fs.existsSync(dir)) return { eliminado: false, razon: 'no existe' };
|
|
349
|
+
try {
|
|
350
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
351
|
+
// Si .agents/ quedó vacío, eliminar también el padre
|
|
352
|
+
const padre = path.join(cwd, '.agents');
|
|
353
|
+
if (fs.existsSync(padre) && fs.readdirSync(padre).length === 0) {
|
|
354
|
+
fs.rmdirSync(padre);
|
|
355
|
+
}
|
|
356
|
+
return { eliminado: true };
|
|
357
|
+
} catch (err) {
|
|
358
|
+
return { eliminado: false, razon: err.message };
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
async function ejecutarAuto(opciones) {
|
|
363
|
+
const cwd = process.cwd();
|
|
364
|
+
let args;
|
|
365
|
+
|
|
366
|
+
try {
|
|
367
|
+
args = construirArgsAutoskills(opciones);
|
|
368
|
+
} catch (err) {
|
|
369
|
+
console.error(`Error: ${err.message}`);
|
|
370
|
+
process.exit(1);
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
console.log('\n \x1b[36m›\x1b[0m swl-ses skills auto');
|
|
374
|
+
console.log(` Comando: npx ${args.join(' ')}`);
|
|
375
|
+
console.log('');
|
|
376
|
+
|
|
377
|
+
const antes = snapshotSkills(cwd);
|
|
378
|
+
|
|
379
|
+
// Ejecutar autoskills heredando stdio para preservar interactividad
|
|
380
|
+
const result = spawnSync('npx', ['-y', ...args], {
|
|
381
|
+
cwd,
|
|
382
|
+
stdio: 'inherit',
|
|
383
|
+
shell: process.platform === 'win32',
|
|
384
|
+
});
|
|
385
|
+
|
|
386
|
+
if (result.error) {
|
|
387
|
+
console.error(`\n Error al ejecutar npx autoskills: ${result.error.message}`);
|
|
388
|
+
console.error(' Verifica que tienes Node.js y npm instalados.');
|
|
389
|
+
process.exit(1);
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
if (result.status !== 0) {
|
|
393
|
+
console.error(`\n autoskills termino con codigo ${result.status}`);
|
|
394
|
+
process.exit(result.status || 1);
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// Si fue dry-run, no hay cambios reales para reportar
|
|
398
|
+
if (opciones['dry-run']) {
|
|
399
|
+
console.log('\n (dry-run: no se instaló nada)\n');
|
|
400
|
+
return;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
const despues = snapshotSkills(cwd);
|
|
404
|
+
const nuevosClaude = delta(antes.claude, despues.claude);
|
|
405
|
+
const nuevosAgents = delta(antes.agents, despues.agents);
|
|
406
|
+
|
|
407
|
+
console.log('\n \x1b[36m›\x1b[0m Reporte post-instalación:');
|
|
408
|
+
console.log(` .claude/skills/: ${despues.claude.length} skills (${nuevosClaude.length} nuevos)`);
|
|
409
|
+
if (nuevosClaude.length > 0) {
|
|
410
|
+
for (const n of nuevosClaude) console.log(` + ${n}`);
|
|
411
|
+
}
|
|
412
|
+
if (despues.agents.length > 0) {
|
|
413
|
+
console.log(` .agents/skills/: ${despues.agents.length} skills (${nuevosAgents.length} nuevos)`);
|
|
414
|
+
if (nuevosAgents.length > 0) {
|
|
415
|
+
for (const n of nuevosAgents) console.log(` + ${n}`);
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
// Escribir marker para que hooks/inyeccion-contexto.js detecte que
|
|
420
|
+
// los skills de comunidad ya están instalados. Sin este marker el
|
|
421
|
+
// hook seguiría sugiriendo `skills auto` aunque ya se haya ejecutado
|
|
422
|
+
// (no puede distinguir skills SWL de skills de comunidad en .claude/skills/).
|
|
423
|
+
try {
|
|
424
|
+
escribirMarkerAutoskills(cwd, {
|
|
425
|
+
installed_at: new Date().toISOString(),
|
|
426
|
+
runtimes: (opciones.runtimes || 'claude-code').split(',').map(s => s.trim()).filter(Boolean),
|
|
427
|
+
skills_claude: despues.claude,
|
|
428
|
+
skills_agents: despues.agents,
|
|
429
|
+
cli_version: leerVersionCli(),
|
|
430
|
+
});
|
|
431
|
+
} catch (err) {
|
|
432
|
+
console.log(`\n (advertencia: no se pudo escribir marker autoskills: ${err.message})`);
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// Limpieza opcional de .agents/skills/
|
|
436
|
+
if (opciones['clean-agents']) {
|
|
437
|
+
const res = limpiarAgentsSkills(cwd);
|
|
438
|
+
if (res.eliminado) {
|
|
439
|
+
console.log('\n \x1b[32m✓\x1b[0m .agents/skills/ eliminado (Claude Code-only).');
|
|
440
|
+
} else {
|
|
441
|
+
console.log(`\n (--clean-agents: ${res.razon})`);
|
|
442
|
+
}
|
|
443
|
+
} else if (despues.agents.length > 0) {
|
|
444
|
+
const runtimes = (opciones.runtimes || 'claude-code').split(',').map(s => s.trim());
|
|
445
|
+
if (runtimes.length === 1 && runtimes[0] === 'claude-code') {
|
|
446
|
+
console.log('\n \x1b[33m!\x1b[0m .agents/skills/ tiene contenido pero el proyecto usa solo Claude Code.');
|
|
447
|
+
console.log(' Considera ejecutar de nuevo con --clean-agents para limpiarlo.');
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
console.log('');
|
|
452
|
+
}
|
|
453
|
+
|
|
211
454
|
module.exports = skills;
|
|
455
|
+
module.exports.construirArgsAutoskills = construirArgsAutoskills;
|
|
456
|
+
module.exports.snapshotSkills = snapshotSkills;
|
|
457
|
+
module.exports.delta = delta;
|
|
458
|
+
module.exports.rutaMarkerAutoskills = rutaMarkerAutoskills;
|
|
459
|
+
module.exports.leerMarkerAutoskills = leerMarkerAutoskills;
|
|
460
|
+
module.exports.escribirMarkerAutoskills = escribirMarkerAutoskills;
|