@booklib/skills 1.5.2 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +23 -1
- package/README.md +55 -0
- package/agents/architecture-reviewer.md +136 -0
- package/agents/booklib-reviewer.md +90 -0
- package/agents/data-reviewer.md +107 -0
- package/agents/jvm-reviewer.md +146 -0
- package/agents/python-reviewer.md +128 -0
- package/agents/rust-reviewer.md +115 -0
- package/agents/ts-reviewer.md +110 -0
- package/agents/ui-reviewer.md +117 -0
- package/bin/skills.js +368 -73
- package/commands/animation-at-work.md +10 -0
- package/commands/clean-code-reviewer.md +10 -0
- package/commands/data-intensive-patterns.md +10 -0
- package/commands/data-pipelines.md +10 -0
- package/commands/design-patterns.md +10 -0
- package/commands/domain-driven-design.md +10 -0
- package/commands/effective-java.md +10 -0
- package/commands/effective-kotlin.md +10 -0
- package/commands/effective-python.md +10 -0
- package/commands/effective-typescript.md +10 -0
- package/commands/kotlin-in-action.md +10 -0
- package/commands/lean-startup.md +10 -0
- package/commands/microservices-patterns.md +10 -0
- package/commands/programming-with-rust.md +10 -0
- package/commands/refactoring-ui.md +10 -0
- package/commands/rust-in-action.md +10 -0
- package/commands/skill-router.md +10 -0
- package/commands/spring-boot-in-action.md +10 -0
- package/commands/storytelling-with-data.md +10 -0
- package/commands/system-design-interview.md +10 -0
- package/commands/using-asyncio-python.md +10 -0
- package/commands/web-scraping-python.md +10 -0
- package/package.json +4 -1
- package/scripts/gen-og.mjs +142 -0
- package/skills/skill-router/SKILL.md +23 -0
- package/demo.gif +0 -0
- package/demo.tape +0 -40
- package/docs/index.html +0 -362
package/bin/skills.js
CHANGED
|
@@ -4,10 +4,14 @@ const fs = require('fs');
|
|
|
4
4
|
const path = require('path');
|
|
5
5
|
const os = require('os');
|
|
6
6
|
const https = require('https');
|
|
7
|
+
const http = require('http');
|
|
8
|
+
const { spawnSync } = require('child_process');
|
|
7
9
|
|
|
8
10
|
const args = process.argv.slice(2);
|
|
9
11
|
const command = args[0];
|
|
10
|
-
const skillsRoot
|
|
12
|
+
const skillsRoot = path.join(__dirname, '..', 'skills');
|
|
13
|
+
const commandsRoot = path.join(__dirname, '..', 'commands');
|
|
14
|
+
const agentsRoot = path.join(__dirname, '..', 'agents');
|
|
11
15
|
|
|
12
16
|
// ─── ANSI helpers ─────────────────────────────────────────────────────────────
|
|
13
17
|
const c = {
|
|
@@ -86,10 +90,42 @@ function copySkill(skillName, targetDir) {
|
|
|
86
90
|
console.log(c.green('✓') + ` ${c.bold(skillName)} → ${c.dim(dest)}`);
|
|
87
91
|
}
|
|
88
92
|
|
|
89
|
-
const isGlobal
|
|
90
|
-
const targetDir
|
|
93
|
+
const isGlobal = args.includes('--global');
|
|
94
|
+
const targetDir = isGlobal
|
|
91
95
|
? path.join(os.homedir(), '.claude', 'skills')
|
|
92
96
|
: path.join(process.cwd(), '.claude', 'skills');
|
|
97
|
+
const commandsTargetDir = isGlobal
|
|
98
|
+
? path.join(os.homedir(), '.claude', 'commands')
|
|
99
|
+
: path.join(process.cwd(), '.claude', 'commands');
|
|
100
|
+
const agentsTargetDir = isGlobal
|
|
101
|
+
? path.join(os.homedir(), '.claude', 'agents')
|
|
102
|
+
: path.join(process.cwd(), '.claude', 'agents');
|
|
103
|
+
|
|
104
|
+
function copyCommand(skillName) {
|
|
105
|
+
const src = path.join(commandsRoot, `${skillName}.md`);
|
|
106
|
+
if (!fs.existsSync(src)) return;
|
|
107
|
+
fs.mkdirSync(commandsTargetDir, { recursive: true });
|
|
108
|
+
const dest = path.join(commandsTargetDir, `${skillName}.md`);
|
|
109
|
+
fs.copyFileSync(src, dest);
|
|
110
|
+
console.log(c.green('✓') + ` /${skillName} command → ${c.dim(dest)}`);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function getAvailableAgents() {
|
|
114
|
+
if (!fs.existsSync(agentsRoot)) return [];
|
|
115
|
+
return fs.readdirSync(agentsRoot)
|
|
116
|
+
.filter(f => f.endsWith('.md'))
|
|
117
|
+
.map(f => f.replace(/\.md$/, ''))
|
|
118
|
+
.sort();
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function copyAgent(agentName) {
|
|
122
|
+
const src = path.join(agentsRoot, `${agentName}.md`);
|
|
123
|
+
if (!fs.existsSync(src)) return;
|
|
124
|
+
fs.mkdirSync(agentsTargetDir, { recursive: true });
|
|
125
|
+
const dest = path.join(agentsTargetDir, `${agentName}.md`);
|
|
126
|
+
fs.copyFileSync(src, dest);
|
|
127
|
+
console.log(c.green('✓') + ` @${agentName} agent → ${c.dim(dest)}`);
|
|
128
|
+
}
|
|
93
129
|
|
|
94
130
|
// ─── CHECK command ────────────────────────────────────────────────────────────
|
|
95
131
|
function checkSkill(skillName) {
|
|
@@ -201,6 +237,33 @@ function checkSkill(skillName) {
|
|
|
201
237
|
: fail('platinum', 'scripts/ exists but is empty'));
|
|
202
238
|
}
|
|
203
239
|
|
|
240
|
+
const resultsPath = path.join(skillDir, 'evals', 'results.json');
|
|
241
|
+
if (!fs.existsSync(resultsPath)) {
|
|
242
|
+
checks.push(fail('platinum', 'evals/results.json missing — run: npx @booklib/skills eval <name>'));
|
|
243
|
+
} else {
|
|
244
|
+
let results = null;
|
|
245
|
+
try { results = JSON.parse(fs.readFileSync(resultsPath, 'utf8')); } catch {
|
|
246
|
+
checks.push(fail('platinum', 'evals/results.json is invalid JSON'));
|
|
247
|
+
}
|
|
248
|
+
if (results) {
|
|
249
|
+
if (results.non_standard_provider) {
|
|
250
|
+
checks.push(fail('platinum', `eval results from non-standard provider (${results.model}) — rerun with ANTHROPIC_API_KEY or OPENAI_API_KEY`));
|
|
251
|
+
}
|
|
252
|
+
const pct = Math.round((results.pass_rate || 0) * 100);
|
|
253
|
+
const meta = `(${results.evals_run} evals, ${results.model}, ${results.date})`;
|
|
254
|
+
checks.push(pct >= 80
|
|
255
|
+
? pass('platinum', `eval pass rate: ${pct}% with skill ${meta}`)
|
|
256
|
+
: fail('platinum', `eval pass rate ${pct}% below 80% minimum — run: npx @booklib/skills eval <name>`));
|
|
257
|
+
if (results.delta !== undefined) {
|
|
258
|
+
const deltaPp = Math.round(results.delta * 100);
|
|
259
|
+
const basePct = Math.round((results.baseline_pass_rate || 0) * 100);
|
|
260
|
+
checks.push(deltaPp >= 20
|
|
261
|
+
? pass('platinum', `eval delta: +${deltaPp}pp over baseline (${basePct}% without skill)`)
|
|
262
|
+
: fail('platinum', `eval delta +${deltaPp}pp below 20pp minimum (baseline: ${basePct}%)`));
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
204
267
|
return checks;
|
|
205
268
|
}
|
|
206
269
|
|
|
@@ -240,16 +303,30 @@ function printCheckResults(skillName, checks) {
|
|
|
240
303
|
}
|
|
241
304
|
|
|
242
305
|
// ─── EVAL command ─────────────────────────────────────────────────────────────
|
|
243
|
-
function callClaude(systemPrompt, userMessage, model) {
|
|
244
|
-
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
245
|
-
if (!apiKey) throw new Error('ANTHROPIC_API_KEY environment variable not set');
|
|
246
306
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
307
|
+
function commandExists(cmd) {
|
|
308
|
+
const result = spawnSync(process.platform === 'win32' ? 'where' : 'which', [cmd], { stdio: 'ignore' });
|
|
309
|
+
return result.status === 0;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
function detectProvider() {
|
|
313
|
+
if (process.env.ANTHROPIC_API_KEY)
|
|
314
|
+
return { type: 'anthropic', defaultModel: 'claude-haiku-4-5-20251001' };
|
|
315
|
+
if (process.env.OPENAI_API_KEY)
|
|
316
|
+
return { type: 'openai-compat', baseUrl: 'https://api.openai.com/v1', key: process.env.OPENAI_API_KEY, defaultModel: 'gpt-4o-mini' };
|
|
317
|
+
if (process.env.EVAL_API_KEY && process.env.EVAL_BASE_URL)
|
|
318
|
+
return { type: 'openai-compat', baseUrl: process.env.EVAL_BASE_URL, key: process.env.EVAL_API_KEY, defaultModel: null };
|
|
319
|
+
if (commandExists('claude'))
|
|
320
|
+
return { type: 'claude-cli', defaultModel: 'default' };
|
|
321
|
+
if (commandExists('ollama'))
|
|
322
|
+
return { type: 'openai-compat', baseUrl: 'http://localhost:11434/v1', key: 'ollama', defaultModel: null };
|
|
323
|
+
return null;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
function callAnthropicApi(systemPrompt, userMessage, model) {
|
|
327
|
+
const reqBody = { model, max_tokens: 4096, messages: [{ role: 'user', content: userMessage }] };
|
|
328
|
+
if (systemPrompt) reqBody.system = systemPrompt;
|
|
329
|
+
const body = JSON.stringify(reqBody);
|
|
253
330
|
|
|
254
331
|
return new Promise((resolve, reject) => {
|
|
255
332
|
const req = https.request({
|
|
@@ -258,7 +335,7 @@ function callClaude(systemPrompt, userMessage, model) {
|
|
|
258
335
|
method: 'POST',
|
|
259
336
|
headers: {
|
|
260
337
|
'Content-Type': 'application/json',
|
|
261
|
-
'x-api-key':
|
|
338
|
+
'x-api-key': process.env.ANTHROPIC_API_KEY,
|
|
262
339
|
'anthropic-version': '2023-06-01',
|
|
263
340
|
'Content-Length': Buffer.byteLength(body),
|
|
264
341
|
},
|
|
@@ -279,6 +356,78 @@ function callClaude(systemPrompt, userMessage, model) {
|
|
|
279
356
|
});
|
|
280
357
|
}
|
|
281
358
|
|
|
359
|
+
function callOpenAICompat(baseUrl, apiKey, systemPrompt, userMessage, model) {
|
|
360
|
+
const messages = [];
|
|
361
|
+
if (systemPrompt) messages.push({ role: 'system', content: systemPrompt });
|
|
362
|
+
messages.push({ role: 'user', content: userMessage });
|
|
363
|
+
const body = JSON.stringify({ model, max_tokens: 4096, messages });
|
|
364
|
+
|
|
365
|
+
const url = new URL('/chat/completions', baseUrl);
|
|
366
|
+
const isHttps = url.protocol === 'https:';
|
|
367
|
+
const transport = isHttps ? https : http;
|
|
368
|
+
|
|
369
|
+
return new Promise((resolve, reject) => {
|
|
370
|
+
const req = transport.request({
|
|
371
|
+
hostname: url.hostname,
|
|
372
|
+
port: url.port || (isHttps ? 443 : 80),
|
|
373
|
+
path: url.pathname + url.search,
|
|
374
|
+
method: 'POST',
|
|
375
|
+
headers: {
|
|
376
|
+
'Content-Type': 'application/json',
|
|
377
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
378
|
+
'Content-Length': Buffer.byteLength(body),
|
|
379
|
+
},
|
|
380
|
+
}, res => {
|
|
381
|
+
let data = '';
|
|
382
|
+
res.on('data', chunk => data += chunk);
|
|
383
|
+
res.on('end', () => {
|
|
384
|
+
try {
|
|
385
|
+
const parsed = JSON.parse(data);
|
|
386
|
+
if (parsed.error) reject(new Error(parsed.error.message || JSON.stringify(parsed.error)));
|
|
387
|
+
else resolve(parsed.choices?.[0]?.message?.content ?? '');
|
|
388
|
+
} catch (e) { reject(e); }
|
|
389
|
+
});
|
|
390
|
+
});
|
|
391
|
+
req.on('error', reject);
|
|
392
|
+
req.write(body);
|
|
393
|
+
req.end();
|
|
394
|
+
});
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
function callClaudeCli(systemPrompt, userMessage) {
|
|
398
|
+
// --bare disables OAuth/keychain auth (requires ANTHROPIC_API_KEY), so omit it
|
|
399
|
+
// when using a subscription-based Claude login.
|
|
400
|
+
const cliArgs = ['-p', userMessage, '--tools', ''];
|
|
401
|
+
if (systemPrompt) cliArgs.push('--system-prompt', systemPrompt);
|
|
402
|
+
const result = spawnSync('claude', cliArgs, {
|
|
403
|
+
encoding: 'utf8',
|
|
404
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
405
|
+
timeout: 120000,
|
|
406
|
+
});
|
|
407
|
+
if (result.error) return Promise.reject(result.error);
|
|
408
|
+
if (result.status !== 0) return Promise.reject(new Error(result.stderr?.trim() || 'claude CLI failed'));
|
|
409
|
+
return Promise.resolve(result.stdout.trim());
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
let _provider = null;
|
|
413
|
+
function getProvider() {
|
|
414
|
+
if (!_provider) _provider = detectProvider();
|
|
415
|
+
return _provider;
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
function callLLM(systemPrompt, userMessage, model) {
|
|
419
|
+
const provider = getProvider();
|
|
420
|
+
if (!provider) throw new Error(
|
|
421
|
+
'No LLM provider found.\n' +
|
|
422
|
+
' Set ANTHROPIC_API_KEY, OPENAI_API_KEY, or EVAL_API_KEY+EVAL_BASE_URL\n' +
|
|
423
|
+
' — or install Claude Code (claude.ai/code) or Ollama (ollama.com)'
|
|
424
|
+
);
|
|
425
|
+
if (provider.type === 'anthropic') return callAnthropicApi(systemPrompt, userMessage, model);
|
|
426
|
+
if (provider.type === 'openai-compat') return callOpenAICompat(provider.baseUrl, provider.key, systemPrompt, userMessage, model);
|
|
427
|
+
if (provider.type === 'claude-cli') return callClaudeCli(systemPrompt, userMessage);
|
|
428
|
+
throw new Error(`Unknown provider type: ${provider.type}`);
|
|
429
|
+
}
|
|
430
|
+
|
|
282
431
|
function judgeResponse(response, expectations, model) {
|
|
283
432
|
const numbered = expectations.map((e, i) => `${i + 1}. ${e}`).join('\n');
|
|
284
433
|
const judgeSystem = `You are an eval judge. For each numbered expectation, respond with exactly:
|
|
@@ -293,7 +442,7 @@ ${response}
|
|
|
293
442
|
=== Expectations ===
|
|
294
443
|
${numbered}`;
|
|
295
444
|
|
|
296
|
-
return
|
|
445
|
+
return callLLM(judgeSystem, judgePrompt, model);
|
|
297
446
|
}
|
|
298
447
|
|
|
299
448
|
function parseJudgement(judgement, count) {
|
|
@@ -309,66 +458,40 @@ function parseJudgement(judgement, count) {
|
|
|
309
458
|
return results;
|
|
310
459
|
}
|
|
311
460
|
|
|
312
|
-
async function
|
|
313
|
-
const skillDir = path.join(skillsRoot, skillName);
|
|
314
|
-
const evalsPath = path.join(skillDir, 'evals', 'evals.json');
|
|
315
|
-
const model = opts.model || 'claude-haiku-4-5-20251001';
|
|
316
|
-
const judgeModel = opts.judgeModel || 'claude-haiku-4-5-20251001';
|
|
317
|
-
const filterId = opts.id || null;
|
|
318
|
-
|
|
319
|
-
if (!fs.existsSync(evalsPath)) {
|
|
320
|
-
console.error(c.red(`✗ No evals/evals.json found for "${skillName}"`));
|
|
321
|
-
process.exit(1);
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
let evals;
|
|
325
|
-
try {
|
|
326
|
-
evals = JSON.parse(fs.readFileSync(evalsPath, 'utf8')).evals || [];
|
|
327
|
-
} catch {
|
|
328
|
-
console.error(c.red('✗ evals/evals.json is invalid JSON'));
|
|
329
|
-
process.exit(1);
|
|
330
|
-
}
|
|
331
|
-
|
|
332
|
-
if (filterId) evals = evals.filter(e => e.id === filterId);
|
|
333
|
-
if (!evals.length) {
|
|
334
|
-
console.error(c.red(`✗ No evals found${filterId ? ` matching --id ${filterId}` : ''}`));
|
|
335
|
-
process.exit(1);
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
const skillMd = getSkillMdContent(skillName);
|
|
339
|
-
|
|
340
|
-
console.log('');
|
|
341
|
-
console.log(c.bold(` ${skillName}`) + c.dim(` — evals (${evals.length})`));
|
|
342
|
-
console.log(' ' + c.line(55));
|
|
343
|
-
console.log(c.dim(` model: ${model} judge: ${judgeModel}\n`));
|
|
344
|
-
|
|
461
|
+
async function runEvalSet(evals, systemPrompt, model, judgeModel, verbose) {
|
|
345
462
|
let totalPass = 0, totalFail = 0, evalsFullyPassed = 0;
|
|
346
463
|
|
|
347
464
|
for (const ev of evals) {
|
|
348
465
|
const promptLines = (ev.prompt || '').split('\n').length;
|
|
349
466
|
const expectations = ev.expectations || [];
|
|
350
467
|
|
|
351
|
-
|
|
352
|
-
|
|
468
|
+
if (verbose) {
|
|
469
|
+
process.stdout.write(` ${c.cyan('●')} ${c.bold(ev.id)}\n`);
|
|
470
|
+
process.stdout.write(c.dim(` prompt: ${promptLines} lines — calling ${model}...`));
|
|
471
|
+
} else {
|
|
472
|
+
process.stdout.write(c.dim(` ${ev.id}...`));
|
|
473
|
+
}
|
|
353
474
|
|
|
354
475
|
let response;
|
|
355
476
|
try {
|
|
356
|
-
response = await
|
|
357
|
-
process.stdout.write(c.green(' done\n'));
|
|
477
|
+
response = await callLLM(systemPrompt, ev.prompt, model);
|
|
478
|
+
if (verbose) process.stdout.write(c.green(' done\n'));
|
|
479
|
+
else process.stdout.write(c.dim(' ✓\n'));
|
|
358
480
|
} catch (e) {
|
|
359
|
-
process.stdout.write(c.red(` failed: ${e.message}\n`));
|
|
481
|
+
if (verbose) process.stdout.write(c.red(` failed: ${e.message}\n`));
|
|
482
|
+
else process.stdout.write(c.red(` ✗\n`));
|
|
360
483
|
totalFail += expectations.length;
|
|
361
484
|
continue;
|
|
362
485
|
}
|
|
363
486
|
|
|
364
|
-
process.stdout.write(c.dim(` judging ${expectations.length} expectations...`));
|
|
487
|
+
if (verbose) process.stdout.write(c.dim(` judging ${expectations.length} expectations...`));
|
|
365
488
|
|
|
366
489
|
let judgement;
|
|
367
490
|
try {
|
|
368
491
|
judgement = await judgeResponse(response, expectations, judgeModel);
|
|
369
|
-
process.stdout.write(c.dim(' done\n'));
|
|
492
|
+
if (verbose) process.stdout.write(c.dim(' done\n'));
|
|
370
493
|
} catch (e) {
|
|
371
|
-
process.stdout.write(c.red(` judge failed: ${e.message}\n`));
|
|
494
|
+
if (verbose) process.stdout.write(c.red(` judge failed: ${e.message}\n`));
|
|
372
495
|
totalFail += expectations.length;
|
|
373
496
|
continue;
|
|
374
497
|
}
|
|
@@ -378,26 +501,134 @@ async function runEvals(skillName, opts = {}) {
|
|
|
378
501
|
|
|
379
502
|
for (let i = 0; i < expectations.length; i++) {
|
|
380
503
|
const r = results[i];
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
504
|
+
if (verbose) {
|
|
505
|
+
const icon = r.ok ? c.green('✓') : c.red('✗');
|
|
506
|
+
const exp = expectations[i].length > 80 ? expectations[i].slice(0, 79) + '…' : expectations[i];
|
|
507
|
+
console.log(` ${icon} ${exp}`);
|
|
508
|
+
if (!r.ok) console.log(c.dim(` → ${r.reason}`));
|
|
509
|
+
}
|
|
385
510
|
if (r.ok) { evalPass++; totalPass++; } else { totalFail++; }
|
|
386
511
|
}
|
|
387
512
|
|
|
388
513
|
const evalTotal = expectations.length;
|
|
389
514
|
const allPassed = evalPass === evalTotal;
|
|
390
515
|
if (allPassed) evalsFullyPassed++;
|
|
391
|
-
console.log(c.dim(` ${evalPass}/${evalTotal} expectations passed`) + (allPassed ? ' ' + c.green('✓') : '') + '\n');
|
|
516
|
+
if (verbose) console.log(c.dim(` ${evalPass}/${evalTotal} expectations passed`) + (allPassed ? ' ' + c.green('✓') : '') + '\n');
|
|
392
517
|
}
|
|
393
518
|
|
|
394
519
|
const total = totalPass + totalFail;
|
|
395
|
-
|
|
396
|
-
|
|
520
|
+
return { passed: totalPass, failed: totalFail, total, evalsFullyPassed, pass_rate: total > 0 ? totalPass / total : 0 };
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
async function runEvals(skillName, opts = {}) {
|
|
524
|
+
const skillDir = path.join(skillsRoot, skillName);
|
|
525
|
+
const evalsPath = path.join(skillDir, 'evals', 'evals.json');
|
|
526
|
+
const provider = getProvider();
|
|
527
|
+
if (!provider) {
|
|
528
|
+
console.error(c.red(
|
|
529
|
+
'✗ No LLM provider found.\n' +
|
|
530
|
+
' Options (pick one):\n' +
|
|
531
|
+
' ANTHROPIC_API_KEY=sk-ant-... (Anthropic API)\n' +
|
|
532
|
+
' OPENAI_API_KEY=sk-... (OpenAI)\n' +
|
|
533
|
+
' EVAL_API_KEY=... EVAL_BASE_URL=https://api.groq.com/openai/v1 (any OpenAI-compatible)\n' +
|
|
534
|
+
' Install Claude Code: claude.ai/code (subscription, no key)\n' +
|
|
535
|
+
' Install Ollama: ollama.com (local, no key)'
|
|
536
|
+
));
|
|
537
|
+
process.exit(1);
|
|
538
|
+
}
|
|
539
|
+
const defaultModel = provider.defaultModel;
|
|
540
|
+
const model = opts.model || process.env.EVAL_MODEL || defaultModel;
|
|
541
|
+
if (!model) {
|
|
542
|
+
console.error(c.red(`✗ No model specified. Use --model=<name> or set EVAL_MODEL env var.`));
|
|
543
|
+
process.exit(1);
|
|
544
|
+
}
|
|
545
|
+
const judgeModel = model;
|
|
546
|
+
const filterId = opts.id || null;
|
|
547
|
+
|
|
548
|
+
if (!fs.existsSync(evalsPath)) {
|
|
549
|
+
console.error(c.red(`✗ No evals/evals.json found for "${skillName}"`));
|
|
550
|
+
process.exit(1);
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
let evals;
|
|
554
|
+
try {
|
|
555
|
+
evals = JSON.parse(fs.readFileSync(evalsPath, 'utf8')).evals || [];
|
|
556
|
+
} catch {
|
|
557
|
+
console.error(c.red('✗ evals/evals.json is invalid JSON'));
|
|
558
|
+
process.exit(1);
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
if (filterId) evals = evals.filter(e => e.id === filterId);
|
|
562
|
+
if (!evals.length) {
|
|
563
|
+
console.error(c.red(`✗ No evals found${filterId ? ` matching --id ${filterId}` : ''}`));
|
|
564
|
+
process.exit(1);
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
const skillMd = getSkillMdContent(skillName);
|
|
397
568
|
|
|
398
|
-
console.log(' ' + c.line(55));
|
|
399
|
-
console.log(` ${color(`${pct}%`)} — ${evalsFullyPassed}/${evals.length} evals fully passed, ${totalPass}/${total} expectations met`);
|
|
400
569
|
console.log('');
|
|
570
|
+
console.log(c.bold(` ${skillName}`) + c.dim(` — evals (${evals.length})`));
|
|
571
|
+
console.log(' ' + c.line(55));
|
|
572
|
+
const providerLabel = provider.type === 'claude-cli' ? 'claude CLI' : provider.type === 'anthropic' ? 'Anthropic API' : provider.baseUrl;
|
|
573
|
+
console.log(c.dim(` provider: ${providerLabel} model: ${model}\n`));
|
|
574
|
+
|
|
575
|
+
// ── With-skill run ──────────────────────────────────────────────────────────
|
|
576
|
+
console.log(c.bold(' With skill\n'));
|
|
577
|
+
const withResult = await runEvalSet(evals, skillMd, model, judgeModel, true);
|
|
578
|
+
const withPct = Math.round(withResult.pass_rate * 100);
|
|
579
|
+
const withColor = withPct >= 80 ? c.green : withPct >= 60 ? c.yellow : c.red;
|
|
580
|
+
console.log(' ' + c.line(55));
|
|
581
|
+
console.log(` ${withColor(`${withPct}%`)} — ${withResult.evalsFullyPassed}/${evals.length} evals fully passed, ${withResult.passed}/${withResult.total} expectations met\n`);
|
|
582
|
+
|
|
583
|
+
// ── Baseline run (no skill system prompt) ───────────────────────────────────
|
|
584
|
+
console.log(c.dim(' Baseline (without skill)\n'));
|
|
585
|
+
const baseResult = await runEvalSet(evals, null, model, judgeModel, false);
|
|
586
|
+
const basePct = Math.round(baseResult.pass_rate * 100);
|
|
587
|
+
console.log(' ' + c.line(55));
|
|
588
|
+
console.log(c.dim(` ${basePct}% — ${baseResult.passed}/${baseResult.total} expectations met\n`));
|
|
589
|
+
|
|
590
|
+
// ── Summary ─────────────────────────────────────────────────────────────────
|
|
591
|
+
const deltaPp = withPct - basePct;
|
|
592
|
+
const deltaColor = deltaPp >= 20 ? c.green : deltaPp >= 10 ? c.yellow : c.red;
|
|
593
|
+
console.log(' ' + c.line(55));
|
|
594
|
+
console.log(` ${c.bold('Summary')} with skill: ${withColor(`${withPct}%`)} baseline: ${c.dim(`${basePct}%`)} delta: ${deltaColor(`+${deltaPp}pp`)}`);
|
|
595
|
+
|
|
596
|
+
// ── Warn if using a non-standard provider ───────────────────────────────────
|
|
597
|
+
const isLocalModel = provider.type === 'openai-compat' && provider.baseUrl.includes('localhost');
|
|
598
|
+
const isCliModel = provider.type === 'claude-cli';
|
|
599
|
+
if (isLocalModel || isCliModel) {
|
|
600
|
+
const providerName = isLocalModel ? `local model (${model})` : 'claude CLI';
|
|
601
|
+
console.log('');
|
|
602
|
+
console.log(c.yellow(` ⚠ Results generated with ${providerName}.`));
|
|
603
|
+
console.log(c.dim(' For committing to the repo, use a standardized provider so scores'));
|
|
604
|
+
console.log(c.dim(' are comparable across all skills:'));
|
|
605
|
+
console.log(c.dim(' ANTHROPIC_API_KEY=... (recommended: claude-haiku-4-5-20251001)'));
|
|
606
|
+
console.log(c.dim(' OPENAI_API_KEY=... (recommended: gpt-4o-mini)'));
|
|
607
|
+
console.log(c.dim(' results.json will be written but should not be committed as-is.'));
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
// ── Write results.json ───────────────────────────────────────────────────────
|
|
611
|
+
const resultsData = {
|
|
612
|
+
pass_rate: Math.round(withResult.pass_rate * 1000) / 1000,
|
|
613
|
+
passed: withResult.passed,
|
|
614
|
+
total: withResult.total,
|
|
615
|
+
baseline_pass_rate: Math.round(baseResult.pass_rate * 1000) / 1000,
|
|
616
|
+
baseline_passed: baseResult.passed,
|
|
617
|
+
baseline_total: baseResult.total,
|
|
618
|
+
delta: Math.round((withResult.pass_rate - baseResult.pass_rate) * 1000) / 1000,
|
|
619
|
+
model,
|
|
620
|
+
evals_run: evals.length,
|
|
621
|
+
date: new Date().toISOString().split('T')[0],
|
|
622
|
+
...(isLocalModel || isCliModel ? { non_standard_provider: true } : {}),
|
|
623
|
+
};
|
|
624
|
+
const resultsPath = path.join(skillDir, 'evals', 'results.json');
|
|
625
|
+
fs.writeFileSync(resultsPath, JSON.stringify(resultsData, null, 2));
|
|
626
|
+
console.log(c.dim(`\n ✓ results saved → evals/results.json\n`));
|
|
627
|
+
|
|
628
|
+
if (withPct < 80) {
|
|
629
|
+
console.error(c.red(` ✗ Pass rate ${withPct}% is below the 80% minimum\n`));
|
|
630
|
+
process.exit(1);
|
|
631
|
+
}
|
|
401
632
|
}
|
|
402
633
|
|
|
403
634
|
// ─── Router ───────────────────────────────────────────────────────────────────
|
|
@@ -491,17 +722,34 @@ async function main() {
|
|
|
491
722
|
}
|
|
492
723
|
|
|
493
724
|
case 'add': {
|
|
494
|
-
const addAll
|
|
495
|
-
const
|
|
496
|
-
|
|
725
|
+
const addAll = args.includes('--all');
|
|
726
|
+
const noCommands = args.includes('--no-commands');
|
|
727
|
+
const noAgents = args.includes('--no-agents');
|
|
728
|
+
const agentArg = args.find(a => a.startsWith('--agent='))?.split('=')[1];
|
|
729
|
+
const skillName = args.find(a => !a.startsWith('--') && a !== 'add');
|
|
730
|
+
|
|
731
|
+
if (agentArg) {
|
|
732
|
+
// explicit: skills add --agent=booklib-reviewer
|
|
733
|
+
const agents = getAvailableAgents();
|
|
734
|
+
if (!agents.includes(agentArg)) {
|
|
735
|
+
console.error(c.red(`✗ Agent "${agentArg}" not found.`) + ' Available: ' + c.dim(agents.join(', ')));
|
|
736
|
+
process.exit(1);
|
|
737
|
+
}
|
|
738
|
+
copyAgent(agentArg);
|
|
739
|
+
console.log(c.dim(`\nInstalled to ${agentsTargetDir}`));
|
|
740
|
+
} else if (addAll) {
|
|
497
741
|
const skills = getAvailableSkills();
|
|
498
742
|
skills.forEach(s => copySkill(s, targetDir));
|
|
499
|
-
|
|
743
|
+
if (!noCommands) skills.forEach(s => copyCommand(s));
|
|
744
|
+
if (!noAgents) getAvailableAgents().forEach(a => copyAgent(a));
|
|
745
|
+
const agentCount = noAgents ? 0 : getAvailableAgents().length;
|
|
746
|
+
console.log(c.dim(`\nInstalled ${skills.length} skills, ${agentCount} agents to .claude/`));
|
|
500
747
|
} else if (skillName) {
|
|
501
748
|
copySkill(skillName, targetDir);
|
|
749
|
+
if (!noCommands) copyCommand(skillName);
|
|
502
750
|
console.log(c.dim(`\nInstalled to ${targetDir}`));
|
|
503
751
|
} else {
|
|
504
|
-
console.error(c.red('Usage: skills add <skill-name> | skills add --all'));
|
|
752
|
+
console.error(c.red('Usage: skills add <skill-name> | skills add --all | skills add --agent=<name>'));
|
|
505
753
|
process.exit(1);
|
|
506
754
|
}
|
|
507
755
|
break;
|
|
@@ -569,6 +817,42 @@ async function main() {
|
|
|
569
817
|
break;
|
|
570
818
|
}
|
|
571
819
|
|
|
820
|
+
case 'update-readme': {
|
|
821
|
+
const skills = getAvailableSkills();
|
|
822
|
+
const rows = skills.map(skillName => {
|
|
823
|
+
const resultsPath = path.join(skillsRoot, skillName, 'evals', 'results.json');
|
|
824
|
+
if (!fs.existsSync(resultsPath)) return `| ${skillName} | — | — | — | — | — |`;
|
|
825
|
+
try {
|
|
826
|
+
const r = JSON.parse(fs.readFileSync(resultsPath, 'utf8'));
|
|
827
|
+
const pct = Math.round((r.pass_rate || 0) * 100) + '%';
|
|
828
|
+
const basePct = r.baseline_pass_rate !== undefined ? Math.round(r.baseline_pass_rate * 100) + '%' : '—';
|
|
829
|
+
const delta = r.delta !== undefined ? `+${Math.round(r.delta * 100)}pp` : '—';
|
|
830
|
+
const warn = r.non_standard_provider ? ' ⚠' : '';
|
|
831
|
+
return `| ${skillName} | ${pct}${warn} | ${basePct} | ${delta} | ${r.evals_run ?? '—'} | ${r.date ?? '—'} |`;
|
|
832
|
+
} catch { return `| ${skillName} | — | — | — | — | — |`; }
|
|
833
|
+
});
|
|
834
|
+
|
|
835
|
+
const tableHeader = '| Skill | Pass Rate | Baseline | Delta | Evals | Last Run |\n|-------|-----------|----------|-------|-------|----------|';
|
|
836
|
+
const newTable = `<!-- quality-table-start -->\n${tableHeader}\n${rows.join('\n')}\n<!-- quality-table-end -->`;
|
|
837
|
+
|
|
838
|
+
const readmePath = path.join(__dirname, '..', 'README.md');
|
|
839
|
+
let readme = fs.readFileSync(readmePath, 'utf8');
|
|
840
|
+
readme = readme.replace(/<!-- quality-table-start -->[\s\S]*?<!-- quality-table-end -->/, newTable);
|
|
841
|
+
fs.writeFileSync(readmePath, readme);
|
|
842
|
+
|
|
843
|
+
const missing = skills.filter(s => !fs.existsSync(path.join(skillsRoot, s, 'evals', 'results.json')));
|
|
844
|
+
const nonStd = skills.filter(s => {
|
|
845
|
+
try { return JSON.parse(fs.readFileSync(path.join(skillsRoot, s, 'evals', 'results.json'), 'utf8')).non_standard_provider; }
|
|
846
|
+
catch { return false; }
|
|
847
|
+
});
|
|
848
|
+
console.log('');
|
|
849
|
+
console.log(c.green('✓') + ` README.md quality table updated (${skills.length} skills)`);
|
|
850
|
+
if (missing.length) console.log(c.dim(` ${missing.length} pending: ${missing.join(', ')}`));
|
|
851
|
+
if (nonStd.length) console.log(c.yellow(` ⚠ ${nonStd.length} non-standard provider: ${nonStd.join(', ')}`));
|
|
852
|
+
console.log('');
|
|
853
|
+
break;
|
|
854
|
+
}
|
|
855
|
+
|
|
572
856
|
default:
|
|
573
857
|
console.log(`
|
|
574
858
|
${c.bold(' @booklib/skills')} — book knowledge distilled into AI agent skills
|
|
@@ -577,14 +861,25 @@ ${c.bold(' Usage:')}
|
|
|
577
861
|
${c.cyan('skills list')} list all available skills
|
|
578
862
|
${c.cyan('skills info')} ${c.dim('<name>')} full description of a skill
|
|
579
863
|
${c.cyan('skills demo')} ${c.dim('<name>')} before/after example
|
|
580
|
-
${c.cyan('skills add')} ${c.dim('<name>')} install to .claude/
|
|
581
|
-
${c.cyan('skills add --all')} install all skills
|
|
582
|
-
${c.cyan('skills add')} ${c.dim('<name> --global')} install globally
|
|
864
|
+
${c.cyan('skills add')} ${c.dim('<name>')} install skill + /command to .claude/
|
|
865
|
+
${c.cyan('skills add --all')} install all skills + commands + agents
|
|
866
|
+
${c.cyan('skills add')} ${c.dim('<name> --global')} install globally (~/.claude/)
|
|
867
|
+
${c.cyan('skills add')} ${c.dim('<name> --no-commands')} install skill only, skip command
|
|
868
|
+
${c.cyan('skills add')} ${c.dim('--agent=<name>')} install a single agent to .claude/agents/
|
|
869
|
+
${c.cyan('skills add --all --no-agents')} install skills + commands, skip agents
|
|
583
870
|
${c.cyan('skills check')} ${c.dim('<name>')} quality check (Bronze/Silver/Gold/Platinum)
|
|
584
871
|
${c.cyan('skills check --all')} quality summary for all skills
|
|
585
|
-
${c.cyan('skills
|
|
872
|
+
${c.cyan('skills update-readme')} refresh README quality table from results.json files
|
|
873
|
+
${c.cyan('skills eval')} ${c.dim('<name>')} run evals (auto-detects provider)
|
|
586
874
|
${c.cyan('skills eval')} ${c.dim('<name> --model=<id>')} use a specific model
|
|
587
875
|
${c.cyan('skills eval')} ${c.dim('<name> --id=<eval-id>')} run a single eval
|
|
876
|
+
|
|
877
|
+
${c.bold('Provider auto-detection (first match wins):')}
|
|
878
|
+
ANTHROPIC_API_KEY Anthropic API (default model: claude-haiku-4-5-20251001)
|
|
879
|
+
OPENAI_API_KEY OpenAI API (default model: gpt-4o-mini)
|
|
880
|
+
EVAL_API_KEY+EVAL_BASE_URL any OpenAI-compatible endpoint (Groq, Together, etc.)
|
|
881
|
+
ollama installed local Ollama (requires --model or EVAL_MODEL)
|
|
882
|
+
claude CLI installed Claude Code subscription — no key needed
|
|
588
883
|
`);
|
|
589
884
|
}
|
|
590
885
|
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review web animations using Animation at Work principles (Rachel Nabors). Usage: /animation-at-work [file | component]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `animation-at-work` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review changed animation code.
|
|
9
|
+
|
|
10
|
+
Focus on purpose, duration, easing, performance (compositor-only properties), and accessibility (prefers-reduced-motion). Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review code against Clean Code principles (Robert C. Martin). Usage: /clean-code-reviewer [file | path]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `clean-code-reviewer` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review those changes.
|
|
9
|
+
|
|
10
|
+
Classify each finding as **HIGH** (correctness, naming that obscures intent), **MEDIUM** (design, structure), or **LOW** (style). Reference every finding as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review data storage and distributed systems patterns (DDIA, Martin Kleppmann). Usage: /data-intensive-patterns [file | path]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `data-intensive-patterns` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review those changes.
|
|
9
|
+
|
|
10
|
+
Focus on replication, partitioning, transactions, consistency models, and storage engine trade-offs. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review data pipeline code using Data Pipelines Pocket Reference (James Densmore). Usage: /data-pipelines [file | path]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `data-pipelines` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review those changes.
|
|
9
|
+
|
|
10
|
+
Focus on ingestion, transformation, orchestration, error handling, idempotency, and observability. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review OO design and apply GoF patterns (Head First Design Patterns). Usage: /design-patterns [file | path]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `design-patterns` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review those changes.
|
|
9
|
+
|
|
10
|
+
Identify which GoF patterns are present, misapplied, or missing. Flag over-engineering. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review domain models using DDD patterns (Eric Evans). Usage: /domain-driven-design [file | path]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `domain-driven-design` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review those changes.
|
|
9
|
+
|
|
10
|
+
Focus on Aggregates, Entities, Value Objects, Bounded Contexts, and domain invariants. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review Java code against Effective Java items (Joshua Bloch). Usage: /effective-java [file | path]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `effective-java` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review those changes.
|
|
9
|
+
|
|
10
|
+
Focus on generics, builders, enums, exceptions, and API design. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review Kotlin code against Effective Kotlin best practices (Marcin Moskała). Usage: /effective-kotlin [file | path]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `effective-kotlin` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review those changes.
|
|
9
|
+
|
|
10
|
+
Focus on safety, readability, idiomatic Kotlin, and common pitfalls. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
|