@booklib/skills 1.5.2 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/CONTRIBUTING.md +23 -1
  2. package/README.md +55 -0
  3. package/benchmark/devto-post.md +178 -0
  4. package/benchmark/order-processing.original.js +158 -0
  5. package/benchmark/order-processing.pr-toolkit.js +181 -0
  6. package/benchmark/order-processing.skill-router.js +271 -0
  7. package/benchmark/review-report.md +129 -0
  8. package/bin/skills.js +327 -69
  9. package/commands/animation-at-work.md +10 -0
  10. package/commands/clean-code-reviewer.md +10 -0
  11. package/commands/data-intensive-patterns.md +10 -0
  12. package/commands/data-pipelines.md +10 -0
  13. package/commands/design-patterns.md +10 -0
  14. package/commands/domain-driven-design.md +10 -0
  15. package/commands/effective-java.md +10 -0
  16. package/commands/effective-kotlin.md +10 -0
  17. package/commands/effective-python.md +10 -0
  18. package/commands/effective-typescript.md +10 -0
  19. package/commands/kotlin-in-action.md +10 -0
  20. package/commands/lean-startup.md +10 -0
  21. package/commands/microservices-patterns.md +10 -0
  22. package/commands/programming-with-rust.md +10 -0
  23. package/commands/refactoring-ui.md +10 -0
  24. package/commands/rust-in-action.md +10 -0
  25. package/commands/skill-router.md +10 -0
  26. package/commands/spring-boot-in-action.md +10 -0
  27. package/commands/storytelling-with-data.md +10 -0
  28. package/commands/system-design-interview.md +10 -0
  29. package/commands/using-asyncio-python.md +10 -0
  30. package/commands/web-scraping-python.md +10 -0
  31. package/docs/index.html +62 -13
  32. package/package.json +4 -1
  33. package/scripts/gen-og.mjs +142 -0
  34. package/skills/skill-router/SKILL.md +23 -0
package/bin/skills.js CHANGED
@@ -4,10 +4,13 @@ const fs = require('fs');
4
4
  const path = require('path');
5
5
  const os = require('os');
6
6
  const https = require('https');
7
+ const http = require('http');
8
+ const { spawnSync } = require('child_process');
7
9
 
8
10
  const args = process.argv.slice(2);
9
11
  const command = args[0];
10
- const skillsRoot = path.join(__dirname, '..', 'skills');
12
+ const skillsRoot = path.join(__dirname, '..', 'skills');
13
+ const commandsRoot = path.join(__dirname, '..', 'commands');
11
14
 
12
15
  // ─── ANSI helpers ─────────────────────────────────────────────────────────────
13
16
  const c = {
@@ -86,10 +89,22 @@ function copySkill(skillName, targetDir) {
86
89
  console.log(c.green('✓') + ` ${c.bold(skillName)} → ${c.dim(dest)}`);
87
90
  }
88
91
 
89
- const isGlobal = args.includes('--global');
90
- const targetDir = isGlobal
92
+ const isGlobal = args.includes('--global');
93
+ const targetDir = isGlobal
91
94
  ? path.join(os.homedir(), '.claude', 'skills')
92
95
  : path.join(process.cwd(), '.claude', 'skills');
96
+ const commandsTargetDir = isGlobal
97
+ ? path.join(os.homedir(), '.claude', 'commands')
98
+ : path.join(process.cwd(), '.claude', 'commands');
99
+
100
+ function copyCommand(skillName) {
101
+ const src = path.join(commandsRoot, `${skillName}.md`);
102
+ if (!fs.existsSync(src)) return;
103
+ fs.mkdirSync(commandsTargetDir, { recursive: true });
104
+ const dest = path.join(commandsTargetDir, `${skillName}.md`);
105
+ fs.copyFileSync(src, dest);
106
+ console.log(c.green('✓') + ` /${skillName} command → ${c.dim(dest)}`);
107
+ }
93
108
 
94
109
  // ─── CHECK command ────────────────────────────────────────────────────────────
95
110
  function checkSkill(skillName) {
@@ -201,6 +216,33 @@ function checkSkill(skillName) {
201
216
  : fail('platinum', 'scripts/ exists but is empty'));
202
217
  }
203
218
 
219
+ const resultsPath = path.join(skillDir, 'evals', 'results.json');
220
+ if (!fs.existsSync(resultsPath)) {
221
+ checks.push(fail('platinum', 'evals/results.json missing — run: npx @booklib/skills eval <name>'));
222
+ } else {
223
+ let results = null;
224
+ try { results = JSON.parse(fs.readFileSync(resultsPath, 'utf8')); } catch {
225
+ checks.push(fail('platinum', 'evals/results.json is invalid JSON'));
226
+ }
227
+ if (results) {
228
+ if (results.non_standard_provider) {
229
+ checks.push(fail('platinum', `eval results from non-standard provider (${results.model}) — rerun with ANTHROPIC_API_KEY or OPENAI_API_KEY`));
230
+ }
231
+ const pct = Math.round((results.pass_rate || 0) * 100);
232
+ const meta = `(${results.evals_run} evals, ${results.model}, ${results.date})`;
233
+ checks.push(pct >= 80
234
+ ? pass('platinum', `eval pass rate: ${pct}% with skill ${meta}`)
235
+ : fail('platinum', `eval pass rate ${pct}% below 80% minimum — run: npx @booklib/skills eval <name>`));
236
+ if (results.delta !== undefined) {
237
+ const deltaPp = Math.round(results.delta * 100);
238
+ const basePct = Math.round((results.baseline_pass_rate || 0) * 100);
239
+ checks.push(deltaPp >= 20
240
+ ? pass('platinum', `eval delta: +${deltaPp}pp over baseline (${basePct}% without skill)`)
241
+ : fail('platinum', `eval delta +${deltaPp}pp below 20pp minimum (baseline: ${basePct}%)`));
242
+ }
243
+ }
244
+ }
245
+
204
246
  return checks;
205
247
  }
206
248
 
@@ -240,16 +282,30 @@ function printCheckResults(skillName, checks) {
240
282
  }
241
283
 
242
284
  // ─── EVAL command ─────────────────────────────────────────────────────────────
243
- function callClaude(systemPrompt, userMessage, model) {
244
- const apiKey = process.env.ANTHROPIC_API_KEY;
245
- if (!apiKey) throw new Error('ANTHROPIC_API_KEY environment variable not set');
246
285
 
247
- const body = JSON.stringify({
248
- model,
249
- max_tokens: 4096,
250
- system: systemPrompt,
251
- messages: [{ role: 'user', content: userMessage }],
252
- });
286
+ function commandExists(cmd) {
287
+ const result = spawnSync(process.platform === 'win32' ? 'where' : 'which', [cmd], { stdio: 'ignore' });
288
+ return result.status === 0;
289
+ }
290
+
291
+ function detectProvider() {
292
+ if (process.env.ANTHROPIC_API_KEY)
293
+ return { type: 'anthropic', defaultModel: 'claude-haiku-4-5-20251001' };
294
+ if (process.env.OPENAI_API_KEY)
295
+ return { type: 'openai-compat', baseUrl: 'https://api.openai.com/v1', key: process.env.OPENAI_API_KEY, defaultModel: 'gpt-4o-mini' };
296
+ if (process.env.EVAL_API_KEY && process.env.EVAL_BASE_URL)
297
+ return { type: 'openai-compat', baseUrl: process.env.EVAL_BASE_URL, key: process.env.EVAL_API_KEY, defaultModel: null };
298
+ if (commandExists('claude'))
299
+ return { type: 'claude-cli', defaultModel: 'default' };
300
+ if (commandExists('ollama'))
301
+ return { type: 'openai-compat', baseUrl: 'http://localhost:11434/v1', key: 'ollama', defaultModel: null };
302
+ return null;
303
+ }
304
+
305
+ function callAnthropicApi(systemPrompt, userMessage, model) {
306
+ const reqBody = { model, max_tokens: 4096, messages: [{ role: 'user', content: userMessage }] };
307
+ if (systemPrompt) reqBody.system = systemPrompt;
308
+ const body = JSON.stringify(reqBody);
253
309
 
254
310
  return new Promise((resolve, reject) => {
255
311
  const req = https.request({
@@ -258,7 +314,7 @@ function callClaude(systemPrompt, userMessage, model) {
258
314
  method: 'POST',
259
315
  headers: {
260
316
  'Content-Type': 'application/json',
261
- 'x-api-key': apiKey,
317
+ 'x-api-key': process.env.ANTHROPIC_API_KEY,
262
318
  'anthropic-version': '2023-06-01',
263
319
  'Content-Length': Buffer.byteLength(body),
264
320
  },
@@ -279,6 +335,78 @@ function callClaude(systemPrompt, userMessage, model) {
279
335
  });
280
336
  }
281
337
 
338
+ function callOpenAICompat(baseUrl, apiKey, systemPrompt, userMessage, model) {
339
+ const messages = [];
340
+ if (systemPrompt) messages.push({ role: 'system', content: systemPrompt });
341
+ messages.push({ role: 'user', content: userMessage });
342
+ const body = JSON.stringify({ model, max_tokens: 4096, messages });
343
+
344
+ const url = new URL('/chat/completions', baseUrl);
345
+ const isHttps = url.protocol === 'https:';
346
+ const transport = isHttps ? https : http;
347
+
348
+ return new Promise((resolve, reject) => {
349
+ const req = transport.request({
350
+ hostname: url.hostname,
351
+ port: url.port || (isHttps ? 443 : 80),
352
+ path: url.pathname + url.search,
353
+ method: 'POST',
354
+ headers: {
355
+ 'Content-Type': 'application/json',
356
+ 'Authorization': `Bearer ${apiKey}`,
357
+ 'Content-Length': Buffer.byteLength(body),
358
+ },
359
+ }, res => {
360
+ let data = '';
361
+ res.on('data', chunk => data += chunk);
362
+ res.on('end', () => {
363
+ try {
364
+ const parsed = JSON.parse(data);
365
+ if (parsed.error) reject(new Error(parsed.error.message || JSON.stringify(parsed.error)));
366
+ else resolve(parsed.choices?.[0]?.message?.content ?? '');
367
+ } catch (e) { reject(e); }
368
+ });
369
+ });
370
+ req.on('error', reject);
371
+ req.write(body);
372
+ req.end();
373
+ });
374
+ }
375
+
376
+ function callClaudeCli(systemPrompt, userMessage) {
377
+ // --bare disables OAuth/keychain auth (requires ANTHROPIC_API_KEY), so omit it
378
+ // when using a subscription-based Claude login.
379
+ const cliArgs = ['-p', userMessage, '--tools', ''];
380
+ if (systemPrompt) cliArgs.push('--system-prompt', systemPrompt);
381
+ const result = spawnSync('claude', cliArgs, {
382
+ encoding: 'utf8',
383
+ maxBuffer: 10 * 1024 * 1024,
384
+ timeout: 120000,
385
+ });
386
+ if (result.error) return Promise.reject(result.error);
387
+ if (result.status !== 0) return Promise.reject(new Error(result.stderr?.trim() || 'claude CLI failed'));
388
+ return Promise.resolve(result.stdout.trim());
389
+ }
390
+
391
+ let _provider = null;
392
+ function getProvider() {
393
+ if (!_provider) _provider = detectProvider();
394
+ return _provider;
395
+ }
396
+
397
+ function callLLM(systemPrompt, userMessage, model) {
398
+ const provider = getProvider();
399
+ if (!provider) throw new Error(
400
+ 'No LLM provider found.\n' +
401
+ ' Set ANTHROPIC_API_KEY, OPENAI_API_KEY, or EVAL_API_KEY+EVAL_BASE_URL\n' +
402
+ ' — or install Claude Code (claude.ai/code) or Ollama (ollama.com)'
403
+ );
404
+ if (provider.type === 'anthropic') return callAnthropicApi(systemPrompt, userMessage, model);
405
+ if (provider.type === 'openai-compat') return callOpenAICompat(provider.baseUrl, provider.key, systemPrompt, userMessage, model);
406
+ if (provider.type === 'claude-cli') return callClaudeCli(systemPrompt, userMessage);
407
+ throw new Error(`Unknown provider type: ${provider.type}`);
408
+ }
409
+
282
410
  function judgeResponse(response, expectations, model) {
283
411
  const numbered = expectations.map((e, i) => `${i + 1}. ${e}`).join('\n');
284
412
  const judgeSystem = `You are an eval judge. For each numbered expectation, respond with exactly:
@@ -293,7 +421,7 @@ ${response}
293
421
  === Expectations ===
294
422
  ${numbered}`;
295
423
 
296
- return callClaude(judgeSystem, judgePrompt, model);
424
+ return callLLM(judgeSystem, judgePrompt, model);
297
425
  }
298
426
 
299
427
  function parseJudgement(judgement, count) {
@@ -309,66 +437,40 @@ function parseJudgement(judgement, count) {
309
437
  return results;
310
438
  }
311
439
 
312
- async function runEvals(skillName, opts = {}) {
313
- const skillDir = path.join(skillsRoot, skillName);
314
- const evalsPath = path.join(skillDir, 'evals', 'evals.json');
315
- const model = opts.model || 'claude-haiku-4-5-20251001';
316
- const judgeModel = opts.judgeModel || 'claude-haiku-4-5-20251001';
317
- const filterId = opts.id || null;
318
-
319
- if (!fs.existsSync(evalsPath)) {
320
- console.error(c.red(`✗ No evals/evals.json found for "${skillName}"`));
321
- process.exit(1);
322
- }
323
-
324
- let evals;
325
- try {
326
- evals = JSON.parse(fs.readFileSync(evalsPath, 'utf8')).evals || [];
327
- } catch {
328
- console.error(c.red('✗ evals/evals.json is invalid JSON'));
329
- process.exit(1);
330
- }
331
-
332
- if (filterId) evals = evals.filter(e => e.id === filterId);
333
- if (!evals.length) {
334
- console.error(c.red(`✗ No evals found${filterId ? ` matching --id ${filterId}` : ''}`));
335
- process.exit(1);
336
- }
337
-
338
- const skillMd = getSkillMdContent(skillName);
339
-
340
- console.log('');
341
- console.log(c.bold(` ${skillName}`) + c.dim(` — evals (${evals.length})`));
342
- console.log(' ' + c.line(55));
343
- console.log(c.dim(` model: ${model} judge: ${judgeModel}\n`));
344
-
440
+ async function runEvalSet(evals, systemPrompt, model, judgeModel, verbose) {
345
441
  let totalPass = 0, totalFail = 0, evalsFullyPassed = 0;
346
442
 
347
443
  for (const ev of evals) {
348
444
  const promptLines = (ev.prompt || '').split('\n').length;
349
445
  const expectations = ev.expectations || [];
350
446
 
351
- process.stdout.write(` ${c.cyan('●')} ${c.bold(ev.id)}\n`);
352
- process.stdout.write(c.dim(` prompt: ${promptLines} lines — calling ${model}...`));
447
+ if (verbose) {
448
+ process.stdout.write(` ${c.cyan('●')} ${c.bold(ev.id)}\n`);
449
+ process.stdout.write(c.dim(` prompt: ${promptLines} lines — calling ${model}...`));
450
+ } else {
451
+ process.stdout.write(c.dim(` ${ev.id}...`));
452
+ }
353
453
 
354
454
  let response;
355
455
  try {
356
- response = await callClaude(skillMd, ev.prompt, model);
357
- process.stdout.write(c.green(' done\n'));
456
+ response = await callLLM(systemPrompt, ev.prompt, model);
457
+ if (verbose) process.stdout.write(c.green(' done\n'));
458
+ else process.stdout.write(c.dim(' ✓\n'));
358
459
  } catch (e) {
359
- process.stdout.write(c.red(` failed: ${e.message}\n`));
460
+ if (verbose) process.stdout.write(c.red(` failed: ${e.message}\n`));
461
+ else process.stdout.write(c.red(` ✗\n`));
360
462
  totalFail += expectations.length;
361
463
  continue;
362
464
  }
363
465
 
364
- process.stdout.write(c.dim(` judging ${expectations.length} expectations...`));
466
+ if (verbose) process.stdout.write(c.dim(` judging ${expectations.length} expectations...`));
365
467
 
366
468
  let judgement;
367
469
  try {
368
470
  judgement = await judgeResponse(response, expectations, judgeModel);
369
- process.stdout.write(c.dim(' done\n'));
471
+ if (verbose) process.stdout.write(c.dim(' done\n'));
370
472
  } catch (e) {
371
- process.stdout.write(c.red(` judge failed: ${e.message}\n`));
473
+ if (verbose) process.stdout.write(c.red(` judge failed: ${e.message}\n`));
372
474
  totalFail += expectations.length;
373
475
  continue;
374
476
  }
@@ -378,26 +480,134 @@ async function runEvals(skillName, opts = {}) {
378
480
 
379
481
  for (let i = 0; i < expectations.length; i++) {
380
482
  const r = results[i];
381
- const icon = r.ok ? c.green('✓') : c.red('✗');
382
- const exp = expectations[i].length > 80 ? expectations[i].slice(0, 79) + '' : expectations[i];
383
- console.log(` ${icon} ${exp}`);
384
- if (!r.ok) console.log(c.dim(` ${r.reason}`));
483
+ if (verbose) {
484
+ const icon = r.ok ? c.green('✓') : c.red('');
485
+ const exp = expectations[i].length > 80 ? expectations[i].slice(0, 79) + '…' : expectations[i];
486
+ console.log(` ${icon} ${exp}`);
487
+ if (!r.ok) console.log(c.dim(` → ${r.reason}`));
488
+ }
385
489
  if (r.ok) { evalPass++; totalPass++; } else { totalFail++; }
386
490
  }
387
491
 
388
492
  const evalTotal = expectations.length;
389
493
  const allPassed = evalPass === evalTotal;
390
494
  if (allPassed) evalsFullyPassed++;
391
- console.log(c.dim(` ${evalPass}/${evalTotal} expectations passed`) + (allPassed ? ' ' + c.green('✓') : '') + '\n');
495
+ if (verbose) console.log(c.dim(` ${evalPass}/${evalTotal} expectations passed`) + (allPassed ? ' ' + c.green('✓') : '') + '\n');
392
496
  }
393
497
 
394
498
  const total = totalPass + totalFail;
395
- const pct = total > 0 ? Math.round((totalPass / total) * 100) : 0;
396
- const color = pct >= 80 ? c.green : pct >= 60 ? c.yellow : c.red;
499
+ return { passed: totalPass, failed: totalFail, total, evalsFullyPassed, pass_rate: total > 0 ? totalPass / total : 0 };
500
+ }
501
+
502
+ async function runEvals(skillName, opts = {}) {
503
+ const skillDir = path.join(skillsRoot, skillName);
504
+ const evalsPath = path.join(skillDir, 'evals', 'evals.json');
505
+ const provider = getProvider();
506
+ if (!provider) {
507
+ console.error(c.red(
508
+ '✗ No LLM provider found.\n' +
509
+ ' Options (pick one):\n' +
510
+ ' ANTHROPIC_API_KEY=sk-ant-... (Anthropic API)\n' +
511
+ ' OPENAI_API_KEY=sk-... (OpenAI)\n' +
512
+ ' EVAL_API_KEY=... EVAL_BASE_URL=https://api.groq.com/openai/v1 (any OpenAI-compatible)\n' +
513
+ ' Install Claude Code: claude.ai/code (subscription, no key)\n' +
514
+ ' Install Ollama: ollama.com (local, no key)'
515
+ ));
516
+ process.exit(1);
517
+ }
518
+ const defaultModel = provider.defaultModel;
519
+ const model = opts.model || process.env.EVAL_MODEL || defaultModel;
520
+ if (!model) {
521
+ console.error(c.red(`✗ No model specified. Use --model=<name> or set EVAL_MODEL env var.`));
522
+ process.exit(1);
523
+ }
524
+ const judgeModel = model;
525
+ const filterId = opts.id || null;
526
+
527
+ if (!fs.existsSync(evalsPath)) {
528
+ console.error(c.red(`✗ No evals/evals.json found for "${skillName}"`));
529
+ process.exit(1);
530
+ }
531
+
532
+ let evals;
533
+ try {
534
+ evals = JSON.parse(fs.readFileSync(evalsPath, 'utf8')).evals || [];
535
+ } catch {
536
+ console.error(c.red('✗ evals/evals.json is invalid JSON'));
537
+ process.exit(1);
538
+ }
539
+
540
+ if (filterId) evals = evals.filter(e => e.id === filterId);
541
+ if (!evals.length) {
542
+ console.error(c.red(`✗ No evals found${filterId ? ` matching --id ${filterId}` : ''}`));
543
+ process.exit(1);
544
+ }
545
+
546
+ const skillMd = getSkillMdContent(skillName);
397
547
 
398
- console.log(' ' + c.line(55));
399
- console.log(` ${color(`${pct}%`)} — ${evalsFullyPassed}/${evals.length} evals fully passed, ${totalPass}/${total} expectations met`);
400
548
  console.log('');
549
+ console.log(c.bold(` ${skillName}`) + c.dim(` — evals (${evals.length})`));
550
+ console.log(' ' + c.line(55));
551
+ const providerLabel = provider.type === 'claude-cli' ? 'claude CLI' : provider.type === 'anthropic' ? 'Anthropic API' : provider.baseUrl;
552
+ console.log(c.dim(` provider: ${providerLabel} model: ${model}\n`));
553
+
554
+ // ── With-skill run ──────────────────────────────────────────────────────────
555
+ console.log(c.bold(' With skill\n'));
556
+ const withResult = await runEvalSet(evals, skillMd, model, judgeModel, true);
557
+ const withPct = Math.round(withResult.pass_rate * 100);
558
+ const withColor = withPct >= 80 ? c.green : withPct >= 60 ? c.yellow : c.red;
559
+ console.log(' ' + c.line(55));
560
+ console.log(` ${withColor(`${withPct}%`)} — ${withResult.evalsFullyPassed}/${evals.length} evals fully passed, ${withResult.passed}/${withResult.total} expectations met\n`);
561
+
562
+ // ── Baseline run (no skill system prompt) ───────────────────────────────────
563
+ console.log(c.dim(' Baseline (without skill)\n'));
564
+ const baseResult = await runEvalSet(evals, null, model, judgeModel, false);
565
+ const basePct = Math.round(baseResult.pass_rate * 100);
566
+ console.log(' ' + c.line(55));
567
+ console.log(c.dim(` ${basePct}% — ${baseResult.passed}/${baseResult.total} expectations met\n`));
568
+
569
+ // ── Summary ─────────────────────────────────────────────────────────────────
570
+ const deltaPp = withPct - basePct;
571
+ const deltaColor = deltaPp >= 20 ? c.green : deltaPp >= 10 ? c.yellow : c.red;
572
+ console.log(' ' + c.line(55));
573
+ console.log(` ${c.bold('Summary')} with skill: ${withColor(`${withPct}%`)} baseline: ${c.dim(`${basePct}%`)} delta: ${deltaColor(`+${deltaPp}pp`)}`);
574
+
575
+ // ── Warn if using a non-standard provider ───────────────────────────────────
576
+ const isLocalModel = provider.type === 'openai-compat' && provider.baseUrl.includes('localhost');
577
+ const isCliModel = provider.type === 'claude-cli';
578
+ if (isLocalModel || isCliModel) {
579
+ const providerName = isLocalModel ? `local model (${model})` : 'claude CLI';
580
+ console.log('');
581
+ console.log(c.yellow(` ⚠ Results generated with ${providerName}.`));
582
+ console.log(c.dim(' For committing to the repo, use a standardized provider so scores'));
583
+ console.log(c.dim(' are comparable across all skills:'));
584
+ console.log(c.dim(' ANTHROPIC_API_KEY=... (recommended: claude-haiku-4-5-20251001)'));
585
+ console.log(c.dim(' OPENAI_API_KEY=... (recommended: gpt-4o-mini)'));
586
+ console.log(c.dim(' results.json will be written but should not be committed as-is.'));
587
+ }
588
+
589
+ // ── Write results.json ───────────────────────────────────────────────────────
590
+ const resultsData = {
591
+ pass_rate: Math.round(withResult.pass_rate * 1000) / 1000,
592
+ passed: withResult.passed,
593
+ total: withResult.total,
594
+ baseline_pass_rate: Math.round(baseResult.pass_rate * 1000) / 1000,
595
+ baseline_passed: baseResult.passed,
596
+ baseline_total: baseResult.total,
597
+ delta: Math.round((withResult.pass_rate - baseResult.pass_rate) * 1000) / 1000,
598
+ model,
599
+ evals_run: evals.length,
600
+ date: new Date().toISOString().split('T')[0],
601
+ ...(isLocalModel || isCliModel ? { non_standard_provider: true } : {}),
602
+ };
603
+ const resultsPath = path.join(skillDir, 'evals', 'results.json');
604
+ fs.writeFileSync(resultsPath, JSON.stringify(resultsData, null, 2));
605
+ console.log(c.dim(`\n ✓ results saved → evals/results.json\n`));
606
+
607
+ if (withPct < 80) {
608
+ console.error(c.red(` ✗ Pass rate ${withPct}% is below the 80% minimum\n`));
609
+ process.exit(1);
610
+ }
401
611
  }
402
612
 
403
613
  // ─── Router ───────────────────────────────────────────────────────────────────
@@ -491,14 +701,17 @@ async function main() {
491
701
  }
492
702
 
493
703
  case 'add': {
494
- const addAll = args.includes('--all');
704
+ const addAll = args.includes('--all');
705
+ const noCommands = args.includes('--no-commands');
495
706
  const skillName = args.find(a => !a.startsWith('--') && a !== 'add');
496
707
  if (addAll) {
497
708
  const skills = getAvailableSkills();
498
709
  skills.forEach(s => copySkill(s, targetDir));
710
+ if (!noCommands) skills.forEach(s => copyCommand(s));
499
711
  console.log(c.dim(`\nInstalled ${skills.length} skills to ${targetDir}`));
500
712
  } else if (skillName) {
501
713
  copySkill(skillName, targetDir);
714
+ if (!noCommands) copyCommand(skillName);
502
715
  console.log(c.dim(`\nInstalled to ${targetDir}`));
503
716
  } else {
504
717
  console.error(c.red('Usage: skills add <skill-name> | skills add --all'));
@@ -569,6 +782,42 @@ async function main() {
569
782
  break;
570
783
  }
571
784
 
785
+ case 'update-readme': {
786
+ const skills = getAvailableSkills();
787
+ const rows = skills.map(skillName => {
788
+ const resultsPath = path.join(skillsRoot, skillName, 'evals', 'results.json');
789
+ if (!fs.existsSync(resultsPath)) return `| ${skillName} | — | — | — | — | — |`;
790
+ try {
791
+ const r = JSON.parse(fs.readFileSync(resultsPath, 'utf8'));
792
+ const pct = Math.round((r.pass_rate || 0) * 100) + '%';
793
+ const basePct = r.baseline_pass_rate !== undefined ? Math.round(r.baseline_pass_rate * 100) + '%' : '—';
794
+ const delta = r.delta !== undefined ? `+${Math.round(r.delta * 100)}pp` : '—';
795
+ const warn = r.non_standard_provider ? ' ⚠' : '';
796
+ return `| ${skillName} | ${pct}${warn} | ${basePct} | ${delta} | ${r.evals_run ?? '—'} | ${r.date ?? '—'} |`;
797
+ } catch { return `| ${skillName} | — | — | — | — | — |`; }
798
+ });
799
+
800
+ const tableHeader = '| Skill | Pass Rate | Baseline | Delta | Evals | Last Run |\n|-------|-----------|----------|-------|-------|----------|';
801
+ const newTable = `<!-- quality-table-start -->\n${tableHeader}\n${rows.join('\n')}\n<!-- quality-table-end -->`;
802
+
803
+ const readmePath = path.join(__dirname, '..', 'README.md');
804
+ let readme = fs.readFileSync(readmePath, 'utf8');
805
+ readme = readme.replace(/<!-- quality-table-start -->[\s\S]*?<!-- quality-table-end -->/, newTable);
806
+ fs.writeFileSync(readmePath, readme);
807
+
808
+ const missing = skills.filter(s => !fs.existsSync(path.join(skillsRoot, s, 'evals', 'results.json')));
809
+ const nonStd = skills.filter(s => {
810
+ try { return JSON.parse(fs.readFileSync(path.join(skillsRoot, s, 'evals', 'results.json'), 'utf8')).non_standard_provider; }
811
+ catch { return false; }
812
+ });
813
+ console.log('');
814
+ console.log(c.green('✓') + ` README.md quality table updated (${skills.length} skills)`);
815
+ if (missing.length) console.log(c.dim(` ${missing.length} pending: ${missing.join(', ')}`));
816
+ if (nonStd.length) console.log(c.yellow(` ⚠ ${nonStd.length} non-standard provider: ${nonStd.join(', ')}`));
817
+ console.log('');
818
+ break;
819
+ }
820
+
572
821
  default:
573
822
  console.log(`
574
823
  ${c.bold(' @booklib/skills')} — book knowledge distilled into AI agent skills
@@ -577,14 +826,23 @@ ${c.bold(' Usage:')}
577
826
  ${c.cyan('skills list')} list all available skills
578
827
  ${c.cyan('skills info')} ${c.dim('<name>')} full description of a skill
579
828
  ${c.cyan('skills demo')} ${c.dim('<name>')} before/after example
580
- ${c.cyan('skills add')} ${c.dim('<name>')} install to .claude/skills/
581
- ${c.cyan('skills add --all')} install all skills
582
- ${c.cyan('skills add')} ${c.dim('<name> --global')} install globally
829
+ ${c.cyan('skills add')} ${c.dim('<name>')} install skill + /command to .claude/
830
+ ${c.cyan('skills add --all')} install all skills + commands
831
+ ${c.cyan('skills add')} ${c.dim('<name> --global')} install globally (~/.claude/)
832
+ ${c.cyan('skills add')} ${c.dim('<name> --no-commands')} install skill only, skip command
583
833
  ${c.cyan('skills check')} ${c.dim('<name>')} quality check (Bronze/Silver/Gold/Platinum)
584
834
  ${c.cyan('skills check --all')} quality summary for all skills
585
- ${c.cyan('skills eval')} ${c.dim('<name>')} run evals against Claude (needs ANTHROPIC_API_KEY)
835
+ ${c.cyan('skills update-readme')} refresh README quality table from results.json files
836
+ ${c.cyan('skills eval')} ${c.dim('<name>')} run evals (auto-detects provider)
586
837
  ${c.cyan('skills eval')} ${c.dim('<name> --model=<id>')} use a specific model
587
838
  ${c.cyan('skills eval')} ${c.dim('<name> --id=<eval-id>')} run a single eval
839
+
840
+ ${c.bold('Provider auto-detection (first match wins):')}
841
+ ANTHROPIC_API_KEY Anthropic API (default model: claude-haiku-4-5-20251001)
842
+ OPENAI_API_KEY OpenAI API (default model: gpt-4o-mini)
843
+ EVAL_API_KEY+EVAL_BASE_URL any OpenAI-compatible endpoint (Groq, Together, etc.)
844
+ ollama installed local Ollama (requires --model or EVAL_MODEL)
845
+ claude CLI installed Claude Code subscription — no key needed
588
846
  `);
589
847
  }
590
848
  }
@@ -0,0 +1,10 @@
1
+ ---
2
+ description: Review web animations using Animation at Work principles (Rachel Nabors). Usage: /animation-at-work [file | component]
3
+ ---
4
+
5
+ Apply the `animation-at-work` skill.
6
+
7
+ **Target:** $ARGUMENTS
8
+ If no target was given, run `git diff HEAD` and review changed animation code.
9
+
10
+ Focus on purpose, duration, easing, performance (compositor-only properties), and accessibility (prefers-reduced-motion). Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
@@ -0,0 +1,10 @@
1
+ ---
2
+ description: Review code against Clean Code principles (Robert C. Martin). Usage: /clean-code-reviewer [file | path]
3
+ ---
4
+
5
+ Apply the `clean-code-reviewer` skill.
6
+
7
+ **Target:** $ARGUMENTS
8
+ If no target was given, run `git diff HEAD` and review those changes.
9
+
10
+ Classify each finding as **HIGH** (correctness, naming that obscures intent), **MEDIUM** (design, structure), or **LOW** (style). Reference every finding as `file:line`.
@@ -0,0 +1,10 @@
1
+ ---
2
+ description: Review data storage and distributed systems patterns (DDIA, Martin Kleppmann). Usage: /data-intensive-patterns [file | path]
3
+ ---
4
+
5
+ Apply the `data-intensive-patterns` skill.
6
+
7
+ **Target:** $ARGUMENTS
8
+ If no target was given, run `git diff HEAD` and review those changes.
9
+
10
+ Focus on replication, partitioning, transactions, consistency models, and storage engine trade-offs. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
@@ -0,0 +1,10 @@
1
+ ---
2
+ description: Review data pipeline code using Data Pipelines Pocket Reference (James Densmore). Usage: /data-pipelines [file | path]
3
+ ---
4
+
5
+ Apply the `data-pipelines` skill.
6
+
7
+ **Target:** $ARGUMENTS
8
+ If no target was given, run `git diff HEAD` and review those changes.
9
+
10
+ Focus on ingestion, transformation, orchestration, error handling, idempotency, and observability. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
@@ -0,0 +1,10 @@
1
+ ---
2
+ description: Review OO design and apply GoF patterns (Head First Design Patterns). Usage: /design-patterns [file | path]
3
+ ---
4
+
5
+ Apply the `design-patterns` skill.
6
+
7
+ **Target:** $ARGUMENTS
8
+ If no target was given, run `git diff HEAD` and review those changes.
9
+
10
+ Identify which GoF patterns are present, misapplied, or missing. Flag over-engineering. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
@@ -0,0 +1,10 @@
1
+ ---
2
+ description: Review domain models using DDD patterns (Eric Evans). Usage: /domain-driven-design [file | path]
3
+ ---
4
+
5
+ Apply the `domain-driven-design` skill.
6
+
7
+ **Target:** $ARGUMENTS
8
+ If no target was given, run `git diff HEAD` and review those changes.
9
+
10
+ Focus on Aggregates, Entities, Value Objects, Bounded Contexts, and domain invariants. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
@@ -0,0 +1,10 @@
1
+ ---
2
+ description: Review Java code against Effective Java items (Joshua Bloch). Usage: /effective-java [file | path]
3
+ ---
4
+
5
+ Apply the `effective-java` skill.
6
+
7
+ **Target:** $ARGUMENTS
8
+ If no target was given, run `git diff HEAD` and review those changes.
9
+
10
+ Focus on generics, builders, enums, exceptions, and API design. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
@@ -0,0 +1,10 @@
1
+ ---
2
+ description: Review Kotlin code against Effective Kotlin best practices (Marcin Moskała). Usage: /effective-kotlin [file | path]
3
+ ---
4
+
5
+ Apply the `effective-kotlin` skill.
6
+
7
+ **Target:** $ARGUMENTS
8
+ If no target was given, run `git diff HEAD` and review those changes.
9
+
10
+ Focus on safety, readability, idiomatic Kotlin, and common pitfalls. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
@@ -0,0 +1,10 @@
1
+ ---
2
+ description: Review Python code against Effective Python practices (Brett Slatkin). Usage: /effective-python [file | path]
3
+ ---
4
+
5
+ Apply the `effective-python` skill.
6
+
7
+ **Target:** $ARGUMENTS
8
+ If no target was given, run `git diff HEAD` and review those changes.
9
+
10
+ Focus on Pythonic idioms, list/dict usage, comprehensions, classes, and robustness. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
@@ -0,0 +1,10 @@
1
+ ---
2
+ description: Review TypeScript code against Effective TypeScript (Dan Vanderkam). Usage: /effective-typescript [file | path]
3
+ ---
4
+
5
+ Apply the `effective-typescript` skill.
6
+
7
+ **Target:** $ARGUMENTS
8
+ If no target was given, run `git diff HEAD` and review those changes.
9
+
10
+ Focus on type design, avoiding `any`, structural typing, and type inference. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
@@ -0,0 +1,10 @@
1
+ ---
2
+ description: Review Kotlin code using Kotlin in Action patterns (Elizarov, Isakova). Usage: /kotlin-in-action [file | path]
3
+ ---
4
+
5
+ Apply the `kotlin-in-action` skill.
6
+
7
+ **Target:** $ARGUMENTS
8
+ If no target was given, run `git diff HEAD` and review those changes.
9
+
10
+ Focus on language features: coroutines, extension functions, sealed classes, delegation, and DSL patterns. Classify each finding as **HIGH** / **MEDIUM** / **LOW** and reference as `file:line`.
@@ -0,0 +1,10 @@
1
+ ---
2
+ description: Evaluate product/feature decisions using The Lean Startup framework (Eric Ries). Usage: /lean-startup [feature description | file]
3
+ ---
4
+
5
+ Apply the `lean-startup` skill.
6
+
7
+ **Target:** $ARGUMENTS
8
+ If no target was given, describe the feature or product decision you want evaluated.
9
+
10
+ Focus on validated learning, Build-Measure-Learn cycle, MVP scope, pivot signals, and vanity vs. actionable metrics.