@booklib/skills 1.5.1 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +23 -1
- package/README.md +55 -0
- package/benchmark/devto-post.md +178 -0
- package/benchmark/order-processing.original.js +158 -0
- package/benchmark/order-processing.pr-toolkit.js +181 -0
- package/benchmark/order-processing.skill-router.js +271 -0
- package/benchmark/review-report.md +129 -0
- package/bin/skills.js +327 -69
- package/commands/animation-at-work.md +10 -0
- package/commands/clean-code-reviewer.md +10 -0
- package/commands/data-intensive-patterns.md +10 -0
- package/commands/data-pipelines.md +10 -0
- package/commands/design-patterns.md +10 -0
- package/commands/domain-driven-design.md +10 -0
- package/commands/effective-java.md +10 -0
- package/commands/effective-kotlin.md +10 -0
- package/commands/effective-python.md +10 -0
- package/commands/effective-typescript.md +10 -0
- package/commands/kotlin-in-action.md +10 -0
- package/commands/lean-startup.md +10 -0
- package/commands/microservices-patterns.md +10 -0
- package/commands/programming-with-rust.md +10 -0
- package/commands/refactoring-ui.md +10 -0
- package/commands/rust-in-action.md +10 -0
- package/commands/skill-router.md +10 -0
- package/commands/spring-boot-in-action.md +10 -0
- package/commands/storytelling-with-data.md +10 -0
- package/commands/system-design-interview.md +10 -0
- package/commands/using-asyncio-python.md +10 -0
- package/commands/web-scraping-python.md +10 -0
- package/docs/index.html +268 -44
- package/package.json +4 -1
- package/scripts/gen-og.mjs +142 -0
- package/skills/skill-router/SKILL.md +23 -0
package/bin/skills.js
CHANGED
|
@@ -4,10 +4,13 @@ const fs = require('fs');
|
|
|
4
4
|
const path = require('path');
|
|
5
5
|
const os = require('os');
|
|
6
6
|
const https = require('https');
|
|
7
|
+
const http = require('http');
|
|
8
|
+
const { spawnSync } = require('child_process');
|
|
7
9
|
|
|
8
10
|
const args = process.argv.slice(2);
|
|
9
11
|
const command = args[0];
|
|
10
|
-
const skillsRoot
|
|
12
|
+
const skillsRoot = path.join(__dirname, '..', 'skills');
|
|
13
|
+
const commandsRoot = path.join(__dirname, '..', 'commands');
|
|
11
14
|
|
|
12
15
|
// ─── ANSI helpers ─────────────────────────────────────────────────────────────
|
|
13
16
|
const c = {
|
|
@@ -86,10 +89,22 @@ function copySkill(skillName, targetDir) {
|
|
|
86
89
|
console.log(c.green('✓') + ` ${c.bold(skillName)} → ${c.dim(dest)}`);
|
|
87
90
|
}
|
|
88
91
|
|
|
89
|
-
const isGlobal
|
|
90
|
-
const targetDir
|
|
92
|
+
const isGlobal = args.includes('--global');
|
|
93
|
+
const targetDir = isGlobal
|
|
91
94
|
? path.join(os.homedir(), '.claude', 'skills')
|
|
92
95
|
: path.join(process.cwd(), '.claude', 'skills');
|
|
96
|
+
const commandsTargetDir = isGlobal
|
|
97
|
+
? path.join(os.homedir(), '.claude', 'commands')
|
|
98
|
+
: path.join(process.cwd(), '.claude', 'commands');
|
|
99
|
+
|
|
100
|
+
function copyCommand(skillName) {
|
|
101
|
+
const src = path.join(commandsRoot, `${skillName}.md`);
|
|
102
|
+
if (!fs.existsSync(src)) return;
|
|
103
|
+
fs.mkdirSync(commandsTargetDir, { recursive: true });
|
|
104
|
+
const dest = path.join(commandsTargetDir, `${skillName}.md`);
|
|
105
|
+
fs.copyFileSync(src, dest);
|
|
106
|
+
console.log(c.green('✓') + ` /${skillName} command → ${c.dim(dest)}`);
|
|
107
|
+
}
|
|
93
108
|
|
|
94
109
|
// ─── CHECK command ────────────────────────────────────────────────────────────
|
|
95
110
|
function checkSkill(skillName) {
|
|
@@ -201,6 +216,33 @@ function checkSkill(skillName) {
|
|
|
201
216
|
: fail('platinum', 'scripts/ exists but is empty'));
|
|
202
217
|
}
|
|
203
218
|
|
|
219
|
+
const resultsPath = path.join(skillDir, 'evals', 'results.json');
|
|
220
|
+
if (!fs.existsSync(resultsPath)) {
|
|
221
|
+
checks.push(fail('platinum', 'evals/results.json missing — run: npx @booklib/skills eval <name>'));
|
|
222
|
+
} else {
|
|
223
|
+
let results = null;
|
|
224
|
+
try { results = JSON.parse(fs.readFileSync(resultsPath, 'utf8')); } catch {
|
|
225
|
+
checks.push(fail('platinum', 'evals/results.json is invalid JSON'));
|
|
226
|
+
}
|
|
227
|
+
if (results) {
|
|
228
|
+
if (results.non_standard_provider) {
|
|
229
|
+
checks.push(fail('platinum', `eval results from non-standard provider (${results.model}) — rerun with ANTHROPIC_API_KEY or OPENAI_API_KEY`));
|
|
230
|
+
}
|
|
231
|
+
const pct = Math.round((results.pass_rate || 0) * 100);
|
|
232
|
+
const meta = `(${results.evals_run} evals, ${results.model}, ${results.date})`;
|
|
233
|
+
checks.push(pct >= 80
|
|
234
|
+
? pass('platinum', `eval pass rate: ${pct}% with skill ${meta}`)
|
|
235
|
+
: fail('platinum', `eval pass rate ${pct}% below 80% minimum — run: npx @booklib/skills eval <name>`));
|
|
236
|
+
if (results.delta !== undefined) {
|
|
237
|
+
const deltaPp = Math.round(results.delta * 100);
|
|
238
|
+
const basePct = Math.round((results.baseline_pass_rate || 0) * 100);
|
|
239
|
+
checks.push(deltaPp >= 20
|
|
240
|
+
? pass('platinum', `eval delta: +${deltaPp}pp over baseline (${basePct}% without skill)`)
|
|
241
|
+
: fail('platinum', `eval delta +${deltaPp}pp below 20pp minimum (baseline: ${basePct}%)`));
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
204
246
|
return checks;
|
|
205
247
|
}
|
|
206
248
|
|
|
@@ -240,16 +282,30 @@ function printCheckResults(skillName, checks) {
|
|
|
240
282
|
}
|
|
241
283
|
|
|
242
284
|
// ─── EVAL command ─────────────────────────────────────────────────────────────
|
|
243
|
-
function callClaude(systemPrompt, userMessage, model) {
|
|
244
|
-
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
245
|
-
if (!apiKey) throw new Error('ANTHROPIC_API_KEY environment variable not set');
|
|
246
285
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
286
|
+
function commandExists(cmd) {
|
|
287
|
+
const result = spawnSync(process.platform === 'win32' ? 'where' : 'which', [cmd], { stdio: 'ignore' });
|
|
288
|
+
return result.status === 0;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
function detectProvider() {
|
|
292
|
+
if (process.env.ANTHROPIC_API_KEY)
|
|
293
|
+
return { type: 'anthropic', defaultModel: 'claude-haiku-4-5-20251001' };
|
|
294
|
+
if (process.env.OPENAI_API_KEY)
|
|
295
|
+
return { type: 'openai-compat', baseUrl: 'https://api.openai.com/v1', key: process.env.OPENAI_API_KEY, defaultModel: 'gpt-4o-mini' };
|
|
296
|
+
if (process.env.EVAL_API_KEY && process.env.EVAL_BASE_URL)
|
|
297
|
+
return { type: 'openai-compat', baseUrl: process.env.EVAL_BASE_URL, key: process.env.EVAL_API_KEY, defaultModel: null };
|
|
298
|
+
if (commandExists('claude'))
|
|
299
|
+
return { type: 'claude-cli', defaultModel: 'default' };
|
|
300
|
+
if (commandExists('ollama'))
|
|
301
|
+
return { type: 'openai-compat', baseUrl: 'http://localhost:11434/v1', key: 'ollama', defaultModel: null };
|
|
302
|
+
return null;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
function callAnthropicApi(systemPrompt, userMessage, model) {
|
|
306
|
+
const reqBody = { model, max_tokens: 4096, messages: [{ role: 'user', content: userMessage }] };
|
|
307
|
+
if (systemPrompt) reqBody.system = systemPrompt;
|
|
308
|
+
const body = JSON.stringify(reqBody);
|
|
253
309
|
|
|
254
310
|
return new Promise((resolve, reject) => {
|
|
255
311
|
const req = https.request({
|
|
@@ -258,7 +314,7 @@ function callClaude(systemPrompt, userMessage, model) {
|
|
|
258
314
|
method: 'POST',
|
|
259
315
|
headers: {
|
|
260
316
|
'Content-Type': 'application/json',
|
|
261
|
-
'x-api-key':
|
|
317
|
+
'x-api-key': process.env.ANTHROPIC_API_KEY,
|
|
262
318
|
'anthropic-version': '2023-06-01',
|
|
263
319
|
'Content-Length': Buffer.byteLength(body),
|
|
264
320
|
},
|
|
@@ -279,6 +335,78 @@ function callClaude(systemPrompt, userMessage, model) {
|
|
|
279
335
|
});
|
|
280
336
|
}
|
|
281
337
|
|
|
338
|
+
function callOpenAICompat(baseUrl, apiKey, systemPrompt, userMessage, model) {
|
|
339
|
+
const messages = [];
|
|
340
|
+
if (systemPrompt) messages.push({ role: 'system', content: systemPrompt });
|
|
341
|
+
messages.push({ role: 'user', content: userMessage });
|
|
342
|
+
const body = JSON.stringify({ model, max_tokens: 4096, messages });
|
|
343
|
+
|
|
344
|
+
const url = new URL('/chat/completions', baseUrl);
|
|
345
|
+
const isHttps = url.protocol === 'https:';
|
|
346
|
+
const transport = isHttps ? https : http;
|
|
347
|
+
|
|
348
|
+
return new Promise((resolve, reject) => {
|
|
349
|
+
const req = transport.request({
|
|
350
|
+
hostname: url.hostname,
|
|
351
|
+
port: url.port || (isHttps ? 443 : 80),
|
|
352
|
+
path: url.pathname + url.search,
|
|
353
|
+
method: 'POST',
|
|
354
|
+
headers: {
|
|
355
|
+
'Content-Type': 'application/json',
|
|
356
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
357
|
+
'Content-Length': Buffer.byteLength(body),
|
|
358
|
+
},
|
|
359
|
+
}, res => {
|
|
360
|
+
let data = '';
|
|
361
|
+
res.on('data', chunk => data += chunk);
|
|
362
|
+
res.on('end', () => {
|
|
363
|
+
try {
|
|
364
|
+
const parsed = JSON.parse(data);
|
|
365
|
+
if (parsed.error) reject(new Error(parsed.error.message || JSON.stringify(parsed.error)));
|
|
366
|
+
else resolve(parsed.choices?.[0]?.message?.content ?? '');
|
|
367
|
+
} catch (e) { reject(e); }
|
|
368
|
+
});
|
|
369
|
+
});
|
|
370
|
+
req.on('error', reject);
|
|
371
|
+
req.write(body);
|
|
372
|
+
req.end();
|
|
373
|
+
});
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
function callClaudeCli(systemPrompt, userMessage) {
|
|
377
|
+
// --bare disables OAuth/keychain auth (requires ANTHROPIC_API_KEY), so omit it
|
|
378
|
+
// when using a subscription-based Claude login.
|
|
379
|
+
const cliArgs = ['-p', userMessage, '--tools', ''];
|
|
380
|
+
if (systemPrompt) cliArgs.push('--system-prompt', systemPrompt);
|
|
381
|
+
const result = spawnSync('claude', cliArgs, {
|
|
382
|
+
encoding: 'utf8',
|
|
383
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
384
|
+
timeout: 120000,
|
|
385
|
+
});
|
|
386
|
+
if (result.error) return Promise.reject(result.error);
|
|
387
|
+
if (result.status !== 0) return Promise.reject(new Error(result.stderr?.trim() || 'claude CLI failed'));
|
|
388
|
+
return Promise.resolve(result.stdout.trim());
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
let _provider = null;
|
|
392
|
+
function getProvider() {
|
|
393
|
+
if (!_provider) _provider = detectProvider();
|
|
394
|
+
return _provider;
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
function callLLM(systemPrompt, userMessage, model) {
|
|
398
|
+
const provider = getProvider();
|
|
399
|
+
if (!provider) throw new Error(
|
|
400
|
+
'No LLM provider found.\n' +
|
|
401
|
+
' Set ANTHROPIC_API_KEY, OPENAI_API_KEY, or EVAL_API_KEY+EVAL_BASE_URL\n' +
|
|
402
|
+
' — or install Claude Code (claude.ai/code) or Ollama (ollama.com)'
|
|
403
|
+
);
|
|
404
|
+
if (provider.type === 'anthropic') return callAnthropicApi(systemPrompt, userMessage, model);
|
|
405
|
+
if (provider.type === 'openai-compat') return callOpenAICompat(provider.baseUrl, provider.key, systemPrompt, userMessage, model);
|
|
406
|
+
if (provider.type === 'claude-cli') return callClaudeCli(systemPrompt, userMessage);
|
|
407
|
+
throw new Error(`Unknown provider type: ${provider.type}`);
|
|
408
|
+
}
|
|
409
|
+
|
|
282
410
|
function judgeResponse(response, expectations, model) {
|
|
283
411
|
const numbered = expectations.map((e, i) => `${i + 1}. ${e}`).join('\n');
|
|
284
412
|
const judgeSystem = `You are an eval judge. For each numbered expectation, respond with exactly:
|
|
@@ -293,7 +421,7 @@ ${response}
|
|
|
293
421
|
=== Expectations ===
|
|
294
422
|
${numbered}`;
|
|
295
423
|
|
|
296
|
-
return
|
|
424
|
+
return callLLM(judgeSystem, judgePrompt, model);
|
|
297
425
|
}
|
|
298
426
|
|
|
299
427
|
function parseJudgement(judgement, count) {
|
|
@@ -309,66 +437,40 @@ function parseJudgement(judgement, count) {
|
|
|
309
437
|
return results;
|
|
310
438
|
}
|
|
311
439
|
|
|
312
|
-
async function
|
|
313
|
-
const skillDir = path.join(skillsRoot, skillName);
|
|
314
|
-
const evalsPath = path.join(skillDir, 'evals', 'evals.json');
|
|
315
|
-
const model = opts.model || 'claude-haiku-4-5-20251001';
|
|
316
|
-
const judgeModel = opts.judgeModel || 'claude-haiku-4-5-20251001';
|
|
317
|
-
const filterId = opts.id || null;
|
|
318
|
-
|
|
319
|
-
if (!fs.existsSync(evalsPath)) {
|
|
320
|
-
console.error(c.red(`✗ No evals/evals.json found for "${skillName}"`));
|
|
321
|
-
process.exit(1);
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
let evals;
|
|
325
|
-
try {
|
|
326
|
-
evals = JSON.parse(fs.readFileSync(evalsPath, 'utf8')).evals || [];
|
|
327
|
-
} catch {
|
|
328
|
-
console.error(c.red('✗ evals/evals.json is invalid JSON'));
|
|
329
|
-
process.exit(1);
|
|
330
|
-
}
|
|
331
|
-
|
|
332
|
-
if (filterId) evals = evals.filter(e => e.id === filterId);
|
|
333
|
-
if (!evals.length) {
|
|
334
|
-
console.error(c.red(`✗ No evals found${filterId ? ` matching --id ${filterId}` : ''}`));
|
|
335
|
-
process.exit(1);
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
const skillMd = getSkillMdContent(skillName);
|
|
339
|
-
|
|
340
|
-
console.log('');
|
|
341
|
-
console.log(c.bold(` ${skillName}`) + c.dim(` — evals (${evals.length})`));
|
|
342
|
-
console.log(' ' + c.line(55));
|
|
343
|
-
console.log(c.dim(` model: ${model} judge: ${judgeModel}\n`));
|
|
344
|
-
|
|
440
|
+
async function runEvalSet(evals, systemPrompt, model, judgeModel, verbose) {
|
|
345
441
|
let totalPass = 0, totalFail = 0, evalsFullyPassed = 0;
|
|
346
442
|
|
|
347
443
|
for (const ev of evals) {
|
|
348
444
|
const promptLines = (ev.prompt || '').split('\n').length;
|
|
349
445
|
const expectations = ev.expectations || [];
|
|
350
446
|
|
|
351
|
-
|
|
352
|
-
|
|
447
|
+
if (verbose) {
|
|
448
|
+
process.stdout.write(` ${c.cyan('●')} ${c.bold(ev.id)}\n`);
|
|
449
|
+
process.stdout.write(c.dim(` prompt: ${promptLines} lines — calling ${model}...`));
|
|
450
|
+
} else {
|
|
451
|
+
process.stdout.write(c.dim(` ${ev.id}...`));
|
|
452
|
+
}
|
|
353
453
|
|
|
354
454
|
let response;
|
|
355
455
|
try {
|
|
356
|
-
response = await
|
|
357
|
-
process.stdout.write(c.green(' done\n'));
|
|
456
|
+
response = await callLLM(systemPrompt, ev.prompt, model);
|
|
457
|
+
if (verbose) process.stdout.write(c.green(' done\n'));
|
|
458
|
+
else process.stdout.write(c.dim(' ✓\n'));
|
|
358
459
|
} catch (e) {
|
|
359
|
-
process.stdout.write(c.red(` failed: ${e.message}\n`));
|
|
460
|
+
if (verbose) process.stdout.write(c.red(` failed: ${e.message}\n`));
|
|
461
|
+
else process.stdout.write(c.red(` ✗\n`));
|
|
360
462
|
totalFail += expectations.length;
|
|
361
463
|
continue;
|
|
362
464
|
}
|
|
363
465
|
|
|
364
|
-
process.stdout.write(c.dim(` judging ${expectations.length} expectations...`));
|
|
466
|
+
if (verbose) process.stdout.write(c.dim(` judging ${expectations.length} expectations...`));
|
|
365
467
|
|
|
366
468
|
let judgement;
|
|
367
469
|
try {
|
|
368
470
|
judgement = await judgeResponse(response, expectations, judgeModel);
|
|
369
|
-
process.stdout.write(c.dim(' done\n'));
|
|
471
|
+
if (verbose) process.stdout.write(c.dim(' done\n'));
|
|
370
472
|
} catch (e) {
|
|
371
|
-
process.stdout.write(c.red(` judge failed: ${e.message}\n`));
|
|
473
|
+
if (verbose) process.stdout.write(c.red(` judge failed: ${e.message}\n`));
|
|
372
474
|
totalFail += expectations.length;
|
|
373
475
|
continue;
|
|
374
476
|
}
|
|
@@ -378,26 +480,134 @@ async function runEvals(skillName, opts = {}) {
|
|
|
378
480
|
|
|
379
481
|
for (let i = 0; i < expectations.length; i++) {
|
|
380
482
|
const r = results[i];
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
483
|
+
if (verbose) {
|
|
484
|
+
const icon = r.ok ? c.green('✓') : c.red('✗');
|
|
485
|
+
const exp = expectations[i].length > 80 ? expectations[i].slice(0, 79) + '…' : expectations[i];
|
|
486
|
+
console.log(` ${icon} ${exp}`);
|
|
487
|
+
if (!r.ok) console.log(c.dim(` → ${r.reason}`));
|
|
488
|
+
}
|
|
385
489
|
if (r.ok) { evalPass++; totalPass++; } else { totalFail++; }
|
|
386
490
|
}
|
|
387
491
|
|
|
388
492
|
const evalTotal = expectations.length;
|
|
389
493
|
const allPassed = evalPass === evalTotal;
|
|
390
494
|
if (allPassed) evalsFullyPassed++;
|
|
391
|
-
console.log(c.dim(` ${evalPass}/${evalTotal} expectations passed`) + (allPassed ? ' ' + c.green('✓') : '') + '\n');
|
|
495
|
+
if (verbose) console.log(c.dim(` ${evalPass}/${evalTotal} expectations passed`) + (allPassed ? ' ' + c.green('✓') : '') + '\n');
|
|
392
496
|
}
|
|
393
497
|
|
|
394
498
|
const total = totalPass + totalFail;
|
|
395
|
-
|
|
396
|
-
|
|
499
|
+
return { passed: totalPass, failed: totalFail, total, evalsFullyPassed, pass_rate: total > 0 ? totalPass / total : 0 };
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
async function runEvals(skillName, opts = {}) {
|
|
503
|
+
const skillDir = path.join(skillsRoot, skillName);
|
|
504
|
+
const evalsPath = path.join(skillDir, 'evals', 'evals.json');
|
|
505
|
+
const provider = getProvider();
|
|
506
|
+
if (!provider) {
|
|
507
|
+
console.error(c.red(
|
|
508
|
+
'✗ No LLM provider found.\n' +
|
|
509
|
+
' Options (pick one):\n' +
|
|
510
|
+
' ANTHROPIC_API_KEY=sk-ant-... (Anthropic API)\n' +
|
|
511
|
+
' OPENAI_API_KEY=sk-... (OpenAI)\n' +
|
|
512
|
+
' EVAL_API_KEY=... EVAL_BASE_URL=https://api.groq.com/openai/v1 (any OpenAI-compatible)\n' +
|
|
513
|
+
' Install Claude Code: claude.ai/code (subscription, no key)\n' +
|
|
514
|
+
' Install Ollama: ollama.com (local, no key)'
|
|
515
|
+
));
|
|
516
|
+
process.exit(1);
|
|
517
|
+
}
|
|
518
|
+
const defaultModel = provider.defaultModel;
|
|
519
|
+
const model = opts.model || process.env.EVAL_MODEL || defaultModel;
|
|
520
|
+
if (!model) {
|
|
521
|
+
console.error(c.red(`✗ No model specified. Use --model=<name> or set EVAL_MODEL env var.`));
|
|
522
|
+
process.exit(1);
|
|
523
|
+
}
|
|
524
|
+
const judgeModel = model;
|
|
525
|
+
const filterId = opts.id || null;
|
|
526
|
+
|
|
527
|
+
if (!fs.existsSync(evalsPath)) {
|
|
528
|
+
console.error(c.red(`✗ No evals/evals.json found for "${skillName}"`));
|
|
529
|
+
process.exit(1);
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
let evals;
|
|
533
|
+
try {
|
|
534
|
+
evals = JSON.parse(fs.readFileSync(evalsPath, 'utf8')).evals || [];
|
|
535
|
+
} catch {
|
|
536
|
+
console.error(c.red('✗ evals/evals.json is invalid JSON'));
|
|
537
|
+
process.exit(1);
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
if (filterId) evals = evals.filter(e => e.id === filterId);
|
|
541
|
+
if (!evals.length) {
|
|
542
|
+
console.error(c.red(`✗ No evals found${filterId ? ` matching --id ${filterId}` : ''}`));
|
|
543
|
+
process.exit(1);
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
const skillMd = getSkillMdContent(skillName);
|
|
397
547
|
|
|
398
|
-
console.log(' ' + c.line(55));
|
|
399
|
-
console.log(` ${color(`${pct}%`)} — ${evalsFullyPassed}/${evals.length} evals fully passed, ${totalPass}/${total} expectations met`);
|
|
400
548
|
console.log('');
|
|
549
|
+
console.log(c.bold(` ${skillName}`) + c.dim(` — evals (${evals.length})`));
|
|
550
|
+
console.log(' ' + c.line(55));
|
|
551
|
+
const providerLabel = provider.type === 'claude-cli' ? 'claude CLI' : provider.type === 'anthropic' ? 'Anthropic API' : provider.baseUrl;
|
|
552
|
+
console.log(c.dim(` provider: ${providerLabel} model: ${model}\n`));
|
|
553
|
+
|
|
554
|
+
// ── With-skill run ──────────────────────────────────────────────────────────
|
|
555
|
+
console.log(c.bold(' With skill\n'));
|
|
556
|
+
const withResult = await runEvalSet(evals, skillMd, model, judgeModel, true);
|
|
557
|
+
const withPct = Math.round(withResult.pass_rate * 100);
|
|
558
|
+
const withColor = withPct >= 80 ? c.green : withPct >= 60 ? c.yellow : c.red;
|
|
559
|
+
console.log(' ' + c.line(55));
|
|
560
|
+
console.log(` ${withColor(`${withPct}%`)} — ${withResult.evalsFullyPassed}/${evals.length} evals fully passed, ${withResult.passed}/${withResult.total} expectations met\n`);
|
|
561
|
+
|
|
562
|
+
// ── Baseline run (no skill system prompt) ───────────────────────────────────
|
|
563
|
+
console.log(c.dim(' Baseline (without skill)\n'));
|
|
564
|
+
const baseResult = await runEvalSet(evals, null, model, judgeModel, false);
|
|
565
|
+
const basePct = Math.round(baseResult.pass_rate * 100);
|
|
566
|
+
console.log(' ' + c.line(55));
|
|
567
|
+
console.log(c.dim(` ${basePct}% — ${baseResult.passed}/${baseResult.total} expectations met\n`));
|
|
568
|
+
|
|
569
|
+
// ── Summary ─────────────────────────────────────────────────────────────────
|
|
570
|
+
const deltaPp = withPct - basePct;
|
|
571
|
+
const deltaColor = deltaPp >= 20 ? c.green : deltaPp >= 10 ? c.yellow : c.red;
|
|
572
|
+
console.log(' ' + c.line(55));
|
|
573
|
+
console.log(` ${c.bold('Summary')} with skill: ${withColor(`${withPct}%`)} baseline: ${c.dim(`${basePct}%`)} delta: ${deltaColor(`+${deltaPp}pp`)}`);
|
|
574
|
+
|
|
575
|
+
// ── Warn if using a non-standard provider ───────────────────────────────────
|
|
576
|
+
const isLocalModel = provider.type === 'openai-compat' && provider.baseUrl.includes('localhost');
|
|
577
|
+
const isCliModel = provider.type === 'claude-cli';
|
|
578
|
+
if (isLocalModel || isCliModel) {
|
|
579
|
+
const providerName = isLocalModel ? `local model (${model})` : 'claude CLI';
|
|
580
|
+
console.log('');
|
|
581
|
+
console.log(c.yellow(` ⚠ Results generated with ${providerName}.`));
|
|
582
|
+
console.log(c.dim(' For committing to the repo, use a standardized provider so scores'));
|
|
583
|
+
console.log(c.dim(' are comparable across all skills:'));
|
|
584
|
+
console.log(c.dim(' ANTHROPIC_API_KEY=... (recommended: claude-haiku-4-5-20251001)'));
|
|
585
|
+
console.log(c.dim(' OPENAI_API_KEY=... (recommended: gpt-4o-mini)'));
|
|
586
|
+
console.log(c.dim(' results.json will be written but should not be committed as-is.'));
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
// ── Write results.json ───────────────────────────────────────────────────────
|
|
590
|
+
const resultsData = {
|
|
591
|
+
pass_rate: Math.round(withResult.pass_rate * 1000) / 1000,
|
|
592
|
+
passed: withResult.passed,
|
|
593
|
+
total: withResult.total,
|
|
594
|
+
baseline_pass_rate: Math.round(baseResult.pass_rate * 1000) / 1000,
|
|
595
|
+
baseline_passed: baseResult.passed,
|
|
596
|
+
baseline_total: baseResult.total,
|
|
597
|
+
delta: Math.round((withResult.pass_rate - baseResult.pass_rate) * 1000) / 1000,
|
|
598
|
+
model,
|
|
599
|
+
evals_run: evals.length,
|
|
600
|
+
date: new Date().toISOString().split('T')[0],
|
|
601
|
+
...(isLocalModel || isCliModel ? { non_standard_provider: true } : {}),
|
|
602
|
+
};
|
|
603
|
+
const resultsPath = path.join(skillDir, 'evals', 'results.json');
|
|
604
|
+
fs.writeFileSync(resultsPath, JSON.stringify(resultsData, null, 2));
|
|
605
|
+
console.log(c.dim(`\n ✓ results saved → evals/results.json\n`));
|
|
606
|
+
|
|
607
|
+
if (withPct < 80) {
|
|
608
|
+
console.error(c.red(` ✗ Pass rate ${withPct}% is below the 80% minimum\n`));
|
|
609
|
+
process.exit(1);
|
|
610
|
+
}
|
|
401
611
|
}
|
|
402
612
|
|
|
403
613
|
// ─── Router ───────────────────────────────────────────────────────────────────
|
|
@@ -491,14 +701,17 @@ async function main() {
|
|
|
491
701
|
}
|
|
492
702
|
|
|
493
703
|
case 'add': {
|
|
494
|
-
const addAll
|
|
704
|
+
const addAll = args.includes('--all');
|
|
705
|
+
const noCommands = args.includes('--no-commands');
|
|
495
706
|
const skillName = args.find(a => !a.startsWith('--') && a !== 'add');
|
|
496
707
|
if (addAll) {
|
|
497
708
|
const skills = getAvailableSkills();
|
|
498
709
|
skills.forEach(s => copySkill(s, targetDir));
|
|
710
|
+
if (!noCommands) skills.forEach(s => copyCommand(s));
|
|
499
711
|
console.log(c.dim(`\nInstalled ${skills.length} skills to ${targetDir}`));
|
|
500
712
|
} else if (skillName) {
|
|
501
713
|
copySkill(skillName, targetDir);
|
|
714
|
+
if (!noCommands) copyCommand(skillName);
|
|
502
715
|
console.log(c.dim(`\nInstalled to ${targetDir}`));
|
|
503
716
|
} else {
|
|
504
717
|
console.error(c.red('Usage: skills add <skill-name> | skills add --all'));
|
|
@@ -569,6 +782,42 @@ async function main() {
|
|
|
569
782
|
break;
|
|
570
783
|
}
|
|
571
784
|
|
|
785
|
+
case 'update-readme': {
|
|
786
|
+
const skills = getAvailableSkills();
|
|
787
|
+
const rows = skills.map(skillName => {
|
|
788
|
+
const resultsPath = path.join(skillsRoot, skillName, 'evals', 'results.json');
|
|
789
|
+
if (!fs.existsSync(resultsPath)) return `| ${skillName} | — | — | — | — | — |`;
|
|
790
|
+
try {
|
|
791
|
+
const r = JSON.parse(fs.readFileSync(resultsPath, 'utf8'));
|
|
792
|
+
const pct = Math.round((r.pass_rate || 0) * 100) + '%';
|
|
793
|
+
const basePct = r.baseline_pass_rate !== undefined ? Math.round(r.baseline_pass_rate * 100) + '%' : '—';
|
|
794
|
+
const delta = r.delta !== undefined ? `+${Math.round(r.delta * 100)}pp` : '—';
|
|
795
|
+
const warn = r.non_standard_provider ? ' ⚠' : '';
|
|
796
|
+
return `| ${skillName} | ${pct}${warn} | ${basePct} | ${delta} | ${r.evals_run ?? '—'} | ${r.date ?? '—'} |`;
|
|
797
|
+
} catch { return `| ${skillName} | — | — | — | — | — |`; }
|
|
798
|
+
});
|
|
799
|
+
|
|
800
|
+
const tableHeader = '| Skill | Pass Rate | Baseline | Delta | Evals | Last Run |\n|-------|-----------|----------|-------|-------|----------|';
|
|
801
|
+
const newTable = `<!-- quality-table-start -->\n${tableHeader}\n${rows.join('\n')}\n<!-- quality-table-end -->`;
|
|
802
|
+
|
|
803
|
+
const readmePath = path.join(__dirname, '..', 'README.md');
|
|
804
|
+
let readme = fs.readFileSync(readmePath, 'utf8');
|
|
805
|
+
readme = readme.replace(/<!-- quality-table-start -->[\s\S]*?<!-- quality-table-end -->/, newTable);
|
|
806
|
+
fs.writeFileSync(readmePath, readme);
|
|
807
|
+
|
|
808
|
+
const missing = skills.filter(s => !fs.existsSync(path.join(skillsRoot, s, 'evals', 'results.json')));
|
|
809
|
+
const nonStd = skills.filter(s => {
|
|
810
|
+
try { return JSON.parse(fs.readFileSync(path.join(skillsRoot, s, 'evals', 'results.json'), 'utf8')).non_standard_provider; }
|
|
811
|
+
catch { return false; }
|
|
812
|
+
});
|
|
813
|
+
console.log('');
|
|
814
|
+
console.log(c.green('✓') + ` README.md quality table updated (${skills.length} skills)`);
|
|
815
|
+
if (missing.length) console.log(c.dim(` ${missing.length} pending: ${missing.join(', ')}`));
|
|
816
|
+
if (nonStd.length) console.log(c.yellow(` ⚠ ${nonStd.length} non-standard provider: ${nonStd.join(', ')}`));
|
|
817
|
+
console.log('');
|
|
818
|
+
break;
|
|
819
|
+
}
|
|
820
|
+
|
|
572
821
|
default:
|
|
573
822
|
console.log(`
|
|
574
823
|
${c.bold(' @booklib/skills')} — book knowledge distilled into AI agent skills
|
|
@@ -577,14 +826,23 @@ ${c.bold(' Usage:')}
|
|
|
577
826
|
${c.cyan('skills list')} list all available skills
|
|
578
827
|
${c.cyan('skills info')} ${c.dim('<name>')} full description of a skill
|
|
579
828
|
${c.cyan('skills demo')} ${c.dim('<name>')} before/after example
|
|
580
|
-
${c.cyan('skills add')} ${c.dim('<name>')} install to .claude/
|
|
581
|
-
${c.cyan('skills add --all')} install all skills
|
|
582
|
-
${c.cyan('skills add')} ${c.dim('<name> --global')} install globally
|
|
829
|
+
${c.cyan('skills add')} ${c.dim('<name>')} install skill + /command to .claude/
|
|
830
|
+
${c.cyan('skills add --all')} install all skills + commands
|
|
831
|
+
${c.cyan('skills add')} ${c.dim('<name> --global')} install globally (~/.claude/)
|
|
832
|
+
${c.cyan('skills add')} ${c.dim('<name> --no-commands')} install skill only, skip command
|
|
583
833
|
${c.cyan('skills check')} ${c.dim('<name>')} quality check (Bronze/Silver/Gold/Platinum)
|
|
584
834
|
${c.cyan('skills check --all')} quality summary for all skills
|
|
585
|
-
${c.cyan('skills
|
|
835
|
+
${c.cyan('skills update-readme')} refresh README quality table from results.json files
|
|
836
|
+
${c.cyan('skills eval')} ${c.dim('<name>')} run evals (auto-detects provider)
|
|
586
837
|
${c.cyan('skills eval')} ${c.dim('<name> --model=<id>')} use a specific model
|
|
587
838
|
${c.cyan('skills eval')} ${c.dim('<name> --id=<eval-id>')} run a single eval
|
|
839
|
+
|
|
840
|
+
${c.bold('Provider auto-detection (first match wins):')}
|
|
841
|
+
ANTHROPIC_API_KEY Anthropic API (default model: claude-haiku-4-5-20251001)
|
|
842
|
+
OPENAI_API_KEY OpenAI API (default model: gpt-4o-mini)
|
|
843
|
+
EVAL_API_KEY+EVAL_BASE_URL any OpenAI-compatible endpoint (Groq, Together, etc.)
|
|
844
|
+
ollama installed local Ollama (requires --model or EVAL_MODEL)
|
|
845
|
+
claude CLI installed Claude Code subscription — no key needed
|
|
588
846
|
`);
|
|
589
847
|
}
|
|
590
848
|
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review web animations using Animation at Work principles (Rachel Nabors). Usage: /animation-at-work [file | component]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `animation-at-work` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review changed animation code.
|
|
9
|
+
|
|
10
|
+
Focus on purpose, duration, easing, performance (compositor-only properties), and accessibility (prefers-reduced-motion). Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review code against Clean Code principles (Robert C. Martin). Usage: /clean-code-reviewer [file | path]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `clean-code-reviewer` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review those changes.
|
|
9
|
+
|
|
10
|
+
Classify each finding as **HIGH** (correctness, naming that obscures intent), **MEDIUM** (design, structure), or **LOW** (style). Reference every finding as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review data storage and distributed systems patterns (DDIA, Martin Kleppmann). Usage: /data-intensive-patterns [file | path]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `data-intensive-patterns` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review those changes.
|
|
9
|
+
|
|
10
|
+
Focus on replication, partitioning, transactions, consistency models, and storage engine trade-offs. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review data pipeline code using Data Pipelines Pocket Reference (James Densmore). Usage: /data-pipelines [file | path]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `data-pipelines` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review those changes.
|
|
9
|
+
|
|
10
|
+
Focus on ingestion, transformation, orchestration, error handling, idempotency, and observability. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review OO design and apply GoF patterns (Head First Design Patterns). Usage: /design-patterns [file | path]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `design-patterns` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review those changes.
|
|
9
|
+
|
|
10
|
+
Identify which GoF patterns are present, misapplied, or missing. Flag over-engineering. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review domain models using DDD patterns (Eric Evans). Usage: /domain-driven-design [file | path]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `domain-driven-design` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review those changes.
|
|
9
|
+
|
|
10
|
+
Focus on Aggregates, Entities, Value Objects, Bounded Contexts, and domain invariants. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review Java code against Effective Java items (Joshua Bloch). Usage: /effective-java [file | path]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `effective-java` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review those changes.
|
|
9
|
+
|
|
10
|
+
Focus on generics, builders, enums, exceptions, and API design. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review Kotlin code against Effective Kotlin best practices (Marcin Moskała). Usage: /effective-kotlin [file | path]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `effective-kotlin` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review those changes.
|
|
9
|
+
|
|
10
|
+
Focus on safety, readability, idiomatic Kotlin, and common pitfalls. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review Python code against Effective Python practices (Brett Slatkin). Usage: /effective-python [file | path]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `effective-python` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review those changes.
|
|
9
|
+
|
|
10
|
+
Focus on Pythonic idioms, list/dict usage, comprehensions, classes, and robustness. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review TypeScript code against Effective TypeScript (Dan Vanderkam). Usage: /effective-typescript [file | path]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `effective-typescript` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review those changes.
|
|
9
|
+
|
|
10
|
+
Focus on type design, avoiding `any`, structural typing, and type inference. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Review Kotlin code using Kotlin in Action patterns (Elizarov, Isakova). Usage: /kotlin-in-action [file | path]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `kotlin-in-action` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, run `git diff HEAD` and review those changes.
|
|
9
|
+
|
|
10
|
+
Focus on language features: coroutines, extension functions, sealed classes, delegation, and DSL patterns. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Evaluate product/feature decisions using The Lean Startup framework (Eric Ries). Usage: /lean-startup [feature description | file]
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
Apply the `lean-startup` skill.
|
|
6
|
+
|
|
7
|
+
**Target:** $ARGUMENTS
|
|
8
|
+
If no target was given, describe the feature or product decision you want evaluated.
|
|
9
|
+
|
|
10
|
+
Focus on validated learning, Build-Measure-Learn cycle, MVP scope, pivot signals, and vanity vs. actionable metrics.
|