tribunal-kit 4.3.1 → 4.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/.agent/agents/api-architect.md +66 -66
  2. package/.agent/agents/db-latency-auditor.md +216 -216
  3. package/.agent/agents/precedence-reviewer.md +250 -250
  4. package/.agent/agents/resilience-reviewer.md +88 -88
  5. package/.agent/agents/schema-reviewer.md +67 -67
  6. package/.agent/agents/throughput-optimizer.md +299 -299
  7. package/.agent/agents/ui-ux-auditor.md +292 -292
  8. package/.agent/agents/vitals-reviewer.md +223 -223
  9. package/.agent/scripts/_colors.js +18 -18
  10. package/.agent/scripts/_utils.js +42 -42
  11. package/.agent/scripts/append_flow.js +72 -72
  12. package/.agent/scripts/auto_preview.js +197 -197
  13. package/.agent/scripts/bundle_analyzer.js +290 -290
  14. package/.agent/scripts/case_law_manager.js +17 -6
  15. package/.agent/scripts/checklist.js +266 -266
  16. package/.agent/scripts/colors.js +17 -17
  17. package/.agent/scripts/compress_skills.js +141 -141
  18. package/.agent/scripts/consolidate_skills.js +149 -149
  19. package/.agent/scripts/context_broker.js +611 -609
  20. package/.agent/scripts/deep_compress.js +150 -150
  21. package/.agent/scripts/dependency_analyzer.js +272 -272
  22. package/.agent/scripts/graph_builder.js +151 -37
  23. package/.agent/scripts/graph_visualizer.js +384 -0
  24. package/.agent/scripts/inner_loop_validator.js +451 -465
  25. package/.agent/scripts/lint_runner.js +187 -187
  26. package/.agent/scripts/minify_context.js +100 -100
  27. package/.agent/scripts/mutation_runner.js +280 -0
  28. package/.agent/scripts/patch_skills_meta.js +156 -156
  29. package/.agent/scripts/patch_skills_output.js +244 -244
  30. package/.agent/scripts/schema_validator.js +297 -297
  31. package/.agent/scripts/security_scan.js +303 -303
  32. package/.agent/scripts/session_manager.js +276 -276
  33. package/.agent/scripts/skill_evolution.js +644 -644
  34. package/.agent/scripts/skill_integrator.js +313 -313
  35. package/.agent/scripts/strengthen_skills.js +193 -193
  36. package/.agent/scripts/strip_tribunal.js +47 -47
  37. package/.agent/scripts/swarm_dispatcher.js +360 -360
  38. package/.agent/scripts/test_runner.js +193 -193
  39. package/.agent/scripts/utils.js +32 -32
  40. package/.agent/scripts/verify_all.js +257 -256
  41. package/.agent/skills/app-builder/templates/astro-static/TEMPLATE.md +1 -1
  42. package/.agent/skills/app-builder/templates/chrome-extension/TEMPLATE.md +1 -1
  43. package/.agent/skills/app-builder/templates/cli-tool/TEMPLATE.md +1 -1
  44. package/.agent/skills/app-builder/templates/electron-desktop/TEMPLATE.md +1 -1
  45. package/.agent/skills/app-builder/templates/express-api/TEMPLATE.md +1 -1
  46. package/.agent/skills/app-builder/templates/flutter-app/TEMPLATE.md +1 -1
  47. package/.agent/skills/app-builder/templates/monorepo-turborepo/TEMPLATE.md +1 -1
  48. package/.agent/skills/app-builder/templates/nextjs-fullstack/TEMPLATE.md +1 -1
  49. package/.agent/skills/app-builder/templates/nextjs-saas/TEMPLATE.md +1 -1
  50. package/.agent/skills/app-builder/templates/nextjs-static/TEMPLATE.md +1 -1
  51. package/.agent/skills/app-builder/templates/nuxt-app/TEMPLATE.md +1 -1
  52. package/.agent/skills/app-builder/templates/python-fastapi/TEMPLATE.md +1 -1
  53. package/.agent/skills/app-builder/templates/react-native-app/TEMPLATE.md +1 -1
  54. package/.agent/skills/doc.md +1 -1
  55. package/.agent/skills/knowledge-graph/SKILL.md +32 -16
  56. package/.agent/skills/testing-patterns/SKILL.md +19 -2
  57. package/.agent/skills/ui-ux-pro-max/SKILL.md +480 -43
  58. package/.agent/workflows/generate.md +183 -183
  59. package/.agent/workflows/tribunal-speed.md +183 -183
  60. package/README.md +1 -1
  61. package/bin/tribunal-kit.js +134 -17
  62. package/package.json +6 -3
  63. package/scripts/changelog.js +167 -167
  64. package/scripts/sync-version.js +81 -81
  65. package/.agent/scripts/__pycache__/_colors.cpython-311.pyc +0 -0
  66. package/.agent/scripts/__pycache__/_utils.cpython-311.pyc +0 -0
  67. package/.agent/scripts/__pycache__/case_law_manager.cpython-311.pyc +0 -0
@@ -1,644 +1,644 @@
1
- #!/usr/bin/env node
2
- /**
3
- * skill_evolution.js — Tribunal Kit Skill Evolution Forge
4
- * =========================================================
5
- * Analyzes the delta between what the AI proposed and what the developer
6
- * actually committed, then distills those decisions into evolving
7
- * project-specific SKILL idioms — WITHOUT sending full files to any LLM.
8
- *
9
- * Core Strategy: Semantic Delta Extraction
10
- * 1. Read the raw git diff of staged/recent changes
11
- * 2. Strip trivial noise (whitespace, comments, import renames)
12
- * 3. Score remaining lines for "Architectural Weight"
13
- * 4. Only high-weight deltas reach the LLM reflection prompt
14
- * 5. LLM returns structured YAML idiom entries (not prose)
15
- * 6. Idioms are merged into .agent/skills/project-idioms/SKILL.md
16
- *
17
- * Usage:
18
- * node .agent/scripts/skill_evolution.js digest
19
- * node .agent/scripts/skill_evolution.js digest --dry-run
20
- * node .agent/scripts/skill_evolution.js show
21
- * node .agent/scripts/skill_evolution.js reset
22
- * node .agent/scripts/skill_evolution.js status
23
- */
24
-
25
- 'use strict';
26
-
27
- const fs = require('fs');
28
- const path = require('path');
29
- const https = require('https');
30
- const { execSync } = require('child_process');
31
- const readline = require('readline');
32
-
33
- // ── Colours ──────────────────────────────────────────────────────────────────
34
- const GREEN = '\x1b[92m';
35
- const YELLOW = '\x1b[93m';
36
- const CYAN = '\x1b[96m';
37
- const RED = '\x1b[91m';
38
- const BLUE = '\x1b[94m';
39
- const BOLD = '\x1b[1m';
40
- const DIM = '\x1b[2m';
41
- const RESET = '\x1b[0m';
42
-
43
- // ── Find .agent directory ─────────────────────────────────────────────────────
44
- function findAgentDir() {
45
- let current = path.resolve(process.cwd());
46
- const root = path.parse(current).root;
47
- while (current !== root) {
48
- const candidate = path.join(current, '.agent');
49
- if (fs.existsSync(candidate) && fs.statSync(candidate).isDirectory()) return candidate;
50
- current = path.dirname(current);
51
- }
52
- console.error(`${RED}✖ Error: '.agent' directory not found. Please run 'npx tribunal-kit init' first.${RESET}`);
53
- process.exit(1);
54
- }
55
-
56
- const AGENT_DIR = findAgentDir();
57
- const SKILL_DIR = path.join(AGENT_DIR, 'skills', 'project-idioms');
58
- const SKILL_FILE = path.join(SKILL_DIR, 'SKILL.md');
59
- const HISTORY_DIR = path.join(AGENT_DIR, 'history', 'skill-evolution');
60
- const LOG_FILE = path.join(HISTORY_DIR, 'digest-log.json');
61
-
62
- // ── Architectural Weight Patterns ────────────────────────────────────────────
63
- const HIGH_WEIGHT_PATTERNS = [
64
- /\bclass\b/, /\binterface\b/, /\btype\s+\w+\s*=/, /\bextends\b/, /\bimplements\b/,
65
- /\bthrow\b/, /\bcatch\b/, /\btry\b/,
66
- /\bprisma\.\w+\(/, /\bsupabase\./, /\bfetch\(/, /\baxios\./,
67
- /\bReturnType\b/, /\bPromise</, /\basync\s+function/, /\bawait\b/,
68
- /\bexport\s+(default\s+)?(class|function|const)/, /\bmodule\.exports\b/,
69
- /\bRouter\b|\bapp\.(get|post|put|delete|patch)\(/,
70
- /\buse[A-Z]\w+\(/, /\bcreateContext\(/,
71
- /\bz\.object\(/, /\bPrisma\b|\bdrizzle\b/,
72
- /\benv\.\w+/, /\bprocess\.env\./,
73
- ];
74
-
75
- const NOISE_PATTERNS = [
76
- /^\s*$/, /^\s*(\/\/|#|\/\*).*$/, /^\s*\*/,
77
- /^\s*import\s+\{[^}]+\}\s+from\s+['"](?!\.)/, /^\s*(console\.(log|warn|error)|print\()/,
78
- /^\s*\w+\s*[:,]?\s*$/,
79
- ];
80
-
81
- function architecturalWeight(line) {
82
- const code = line.replace(/^[+-]/, '').trim();
83
- for (const p of NOISE_PATTERNS) { if (p.test(code)) return 0; }
84
- for (const p of HIGH_WEIGHT_PATTERNS) { if (p.test(code)) return 2; }
85
- return 1;
86
- }
87
-
88
- // ── Levenshtein Semantic Deduplication ───────────────────────────────────────
89
- // FIX: Replaces over-aggressive substring matching (.includes) with normalised
90
- // edit-distance similarity. A new idiom must differ by >= 20% from all existing
91
- // ones to be accepted. Threshold 0.80 = 80% similar → considered a duplicate.
92
-
93
- function levenshtein(a, b) {
94
- const m = a.length, n = b.length;
95
- // Allocate DP table with base cases pre-filled
96
- const dp = Array.from({ length: m + 1 }, (_, i) =>
97
- Array.from({ length: n + 1 }, (_, j) => i === 0 ? j : j === 0 ? i : 0)
98
- );
99
- for (let i = 1; i <= m; i++) {
100
- for (let j = 1; j <= n; j++) {
101
- dp[i][j] = a[i - 1] === b[j - 1]
102
- ? dp[i - 1][j - 1]
103
- : 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]);
104
- }
105
- }
106
- return dp[m][n];
107
- }
108
-
109
- function normalizedSimilarity(a, b) {
110
- if (!a.length && !b.length) return 1.0;
111
- const maxLen = Math.max(a.length, b.length);
112
- if (maxLen === 0) return 1.0;
113
- return 1 - levenshtein(a, b) / maxLen;
114
- }
115
-
116
- /**
117
- * Returns true if newPattern is semantically similar to any existing idiom.
118
- * Uses Levenshtein normalised similarity with a configurable threshold.
119
- * @param {string} newPattern
120
- * @param {object[]} existingIdioms - array of { pattern } objects
121
- * @param {number} threshold - 0.0–1.0 (default 0.80 = 80% similar = duplicate)
122
- */
123
- function isDuplicateIdiom(newPattern, existingIdioms, threshold = 0.80) {
124
- const newLow = newPattern.toLowerCase();
125
- return existingIdioms.some(ex => {
126
- const exLow = (ex.pattern || '').toLowerCase();
127
- return normalizedSimilarity(newLow, exLow) >= threshold;
128
- });
129
- }
130
-
131
- function semanticDelta(diffText, minWeight = 2) {
132
- const lines = diffText.split('\n');
133
- const kept = [];
134
- let currentHunkHasHigh = false;
135
- let hunkLines = [];
136
-
137
- for (const line of lines) {
138
- if (line.startsWith('---') || line.startsWith('+++') || line.startsWith('diff --git')) {
139
- kept.push(line); continue;
140
- }
141
- if (line.startsWith('@@')) {
142
- if (currentHunkHasHigh) kept.push(...hunkLines);
143
- currentHunkHasHigh = false;
144
- hunkLines = [line]; continue;
145
- }
146
- if (line.startsWith('+') || line.startsWith('-')) {
147
- const w = architecturalWeight(line);
148
- hunkLines.push(line);
149
- if (w >= minWeight) currentHunkHasHigh = true;
150
- } else {
151
- hunkLines.push(line);
152
- }
153
- }
154
- if (currentHunkHasHigh) kept.push(...hunkLines);
155
-
156
- let result = kept.join('\n');
157
- result = result.replace(/\n( ?\n){3,}/g, '\n\n');
158
- return result.trim();
159
- }
160
-
161
- // ── Git helpers ────────────────────────────────────────────────────────────────
162
- function getGitDiff(mode = 'staged') {
163
- try {
164
- let cmd;
165
- if (mode === 'staged') cmd = 'git diff --cached --unified=3';
166
- else if (mode === 'head') cmd = 'git diff HEAD~1 HEAD --unified=3';
167
- else cmd = 'git diff --unified=3';
168
- return execSync(cmd, { encoding: 'utf8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] });
169
- } catch { return ''; }
170
- }
171
-
172
- function countTokensEstimate(text) {
173
- return Math.max(1, Math.floor(text.length / 4));
174
- }
175
-
176
- // ── Idiom management ──────────────────────────────────────────────────────────
177
- function loadExistingIdioms() {
178
- if (!fs.existsSync(SKILL_FILE)) return [];
179
- const content = fs.readFileSync(SKILL_FILE, 'utf8');
180
- const idioms = [];
181
- const pattern = /\|\s*(\d+)\s*\|\s*`([^`]+)`\s*\|\s*([^|]+)\|\s*([^|]+)\|\s*([^|]+)\|/g;
182
- let m;
183
- while ((m = pattern.exec(content)) !== null) {
184
- idioms.push({
185
- id: parseInt(m[1], 10), pattern: m[2].trim(),
186
- reason: m[3].trim(), domain: m[4].trim(), since: m[5].trim(),
187
- });
188
- }
189
- return idioms;
190
- }
191
-
192
- function nextIdiomId(idioms) {
193
- if (!idioms.length) return 1;
194
- return Math.max(...idioms.map(i => i.id)) + 1;
195
- }
196
-
197
- function renderSkillMd(idioms, digestCount) {
198
- const now = new Date().toISOString().slice(0, 10);
199
- const rows = idioms.map(i =>
200
- `| ${i.id} | \`${i.pattern}\` | ${i.reason} | ${i.domain} | ${i.since} |`
201
- );
202
- const table = rows.length ? rows.join('\n') : '_No idioms recorded yet._';
203
-
204
- return `---
205
- name: project-idioms
206
- description: >
207
- Auto-evolved skill containing project-specific architectural idioms.
208
- Generated by skill_evolution.js — do not edit manually. Commit this
209
- file to share your Engineering Culture across the team.
210
- version: auto
211
- last-updated: ${now}
212
- digest-cycles: ${digestCount}
213
- pattern: generator
214
- ---
215
-
216
- # Project Idioms — Auto-Evolved Skill
217
-
218
- > **Authority Level: ABSOLUTE**
219
- > These idioms were extracted from the developer's own code decisions.
220
- > They override generic agent defaults. Every agent MUST respect them.
221
-
222
- ---
223
-
224
- ## How Idioms Are Born
225
-
226
- 1. Developer commits code that differs from the AI proposal.
227
- 2. \`skill_evolution.js digest\` extracts architectural deltas only.
228
- 3. A minimal LLM reflection prompt (< 500 tokens) identifies the "WHY."
229
- 4. The idiom is recorded here with a stable pattern + reason pair.
230
-
231
- ---
232
-
233
- ## Recorded Idioms
234
-
235
- | ID | Pattern | Why This Project Uses It | Domain | Since |
236
- |:---|:--------|:-------------------------|:-------|:------|
237
- ${table}
238
-
239
- ---
240
-
241
- ## Enforcement Rules for All Agents
242
-
243
- \`\`\`
244
- □ Before proposing code: scan this skill's idiom table
245
- □ If your proposal contradicts an idiom → flag it explicitly
246
- □ Never override an idiom silently — always ask the developer first
247
- □ When citing an idiom: "Per Project Idiom #N: [pattern] — [reason]"
248
- \`\`\`
249
-
250
- ---
251
-
252
- ## Digest History
253
-
254
- Last digest: \`${now}\`
255
- Total cycles: \`${digestCount}\`
256
-
257
- Run \`node .agent/scripts/skill_evolution.js status\` to see the full log.
258
- `;
259
- }
260
-
261
- function generateReflectionPrompt(delta) {
262
- return `You are analyzing a code delta from a developer who changed an AI-proposed solution.
263
- Your only job: identify the ARCHITECTURAL IDIOM this change reveals about their project.
264
-
265
- Rules:
266
- - Return ONLY a YAML list of idioms. No prose. No explanation outside YAML.
267
- - Each idiom: pattern (code signature), reason (1 sentence WHY), domain (backend/frontend/database/general)
268
- - Ignore whitespace, comment, import changes — only architectural choices
269
- - If no meaningful idiom can be extracted, return: "idioms: []"
270
- - Maximum 3 idioms per delta.
271
-
272
- Delta:
273
- \`\`\`
274
- ${delta.slice(0, 1500)}
275
- \`\`\`
276
-
277
- Output format (YAML only):
278
- idioms:
279
- - pattern: "<code pattern or convention>"
280
- reason: "<why this project uses this pattern>"
281
- domain: "<backend|frontend|database|security|performance|general>"
282
- `;
283
- }
284
-
285
- function parseLlmYamlResponse(response) {
286
- const idioms = [];
287
- let inIdioms = false;
288
- let current = {};
289
-
290
- for (const line of response.split('\n')) {
291
- const stripped = line.trim();
292
- if (stripped === 'idioms:') { inIdioms = true; continue; }
293
- if (!inIdioms) continue;
294
- if (stripped.startsWith('- pattern:')) {
295
- if (current.pattern) idioms.push(current);
296
- current = { pattern: stripped.split(':', 2)[1].trim().replace(/^"|"$/g, '') };
297
- } else if (stripped.startsWith('reason:') && current.pattern) {
298
- current.reason = stripped.split(':', 2)[1].trim().replace(/^"|"$/g, '');
299
- } else if (stripped.startsWith('domain:') && current.pattern) {
300
- current.domain = stripped.split(':', 2)[1].trim().replace(/^"|"$/g, '');
301
- }
302
- }
303
- if (current.pattern) idioms.push(current);
304
- return idioms;
305
- }
306
-
307
- // ── Log helpers ────────────────────────────────────────────────────────────────
308
- function loadLog() {
309
- fs.mkdirSync(HISTORY_DIR, { recursive: true });
310
- if (fs.existsSync(LOG_FILE)) {
311
- try { return JSON.parse(fs.readFileSync(LOG_FILE, 'utf8')); } catch { /* fallthrough */ }
312
- }
313
- return { cycles: [], total_tokens_saved: 0, total_idioms: 0 };
314
- }
315
-
316
- function saveLog(log) {
317
- fs.mkdirSync(HISTORY_DIR, { recursive: true });
318
- fs.writeFileSync(LOG_FILE, JSON.stringify(log, null, 2), 'utf8');
319
- }
320
-
321
- // ── Auto-LLM API Integration ─────────────────────────────────────────────────
322
- // GENERATE: Eliminates the manual copy-paste loop by auto-detecting an API key
323
- // and calling the LLM directly. Falls back to manual mode if no key is found.
324
- // Supported providers (checked in order): Anthropic → OpenAI → Gemini.
325
-
326
- function detectLlmProvider() {
327
- if (process.env.ANTHROPIC_API_KEY) return { provider: 'anthropic', key: process.env.ANTHROPIC_API_KEY };
328
- if (process.env.OPENAI_API_KEY) return { provider: 'openai', key: process.env.OPENAI_API_KEY };
329
- if (process.env.GEMINI_API_KEY) return { provider: 'gemini', key: process.env.GEMINI_API_KEY };
330
- return null;
331
- }
332
-
333
- /**
334
- * Call an LLM API with the reflection prompt. Returns the raw text response.
335
- * Uses only built-in Node.js `https` — zero external dependencies.
336
- *
337
- * @param {string} prompt - The reflection prompt to send
338
- * @param {string} provider - 'anthropic' | 'openai' | 'gemini'
339
- * @param {string} apiKey - The API key
340
- * @returns {Promise<string|null>} LLM response text or null on failure
341
- */
342
- async function callLlmApi(prompt, provider, apiKey) {
343
- const timeout = 30000; // 30s max — skill evolution is non-blocking
344
-
345
- function httpsPost(hostname, path, headers, body) {
346
- return new Promise((resolve, reject) => {
347
- const data = JSON.stringify(body);
348
- const req = https.request(
349
- { method: 'POST', hostname, path, headers: { ...headers, 'Content-Length': Buffer.byteLength(data) } },
350
- (res) => {
351
- let raw = '';
352
- res.on('data', c => { raw += c; });
353
- res.on('end', () => resolve(raw));
354
- res.on('error', reject);
355
- }
356
- );
357
- req.on('error', reject);
358
- req.setTimeout(timeout, () => { req.destroy(new Error('LLM API timeout')); });
359
- req.write(data);
360
- req.end();
361
- });
362
- }
363
-
364
- try {
365
- if (provider === 'anthropic') {
366
- const raw = await httpsPost(
367
- 'api.anthropic.com',
368
- '/v1/messages',
369
- {
370
- 'Content-Type': 'application/json',
371
- 'x-api-key': apiKey,
372
- 'anthropic-version': '2023-06-01',
373
- },
374
- {
375
- model: 'claude-3-haiku-20240307', // Fastest/cheapest — idiom extraction
376
- max_tokens: 512,
377
- messages: [{ role: 'user', content: prompt }],
378
- }
379
- );
380
- const json = JSON.parse(raw);
381
- return json?.content?.[0]?.text ?? null;
382
- }
383
-
384
- if (provider === 'openai') {
385
- const raw = await httpsPost(
386
- 'api.openai.com',
387
- '/v1/chat/completions',
388
- {
389
- 'Content-Type': 'application/json',
390
- 'Authorization': `Bearer ${apiKey}`,
391
- },
392
- {
393
- model: 'gpt-4o-mini', // Cheapest capable model for YAML extraction
394
- max_tokens: 512,
395
- messages: [{ role: 'user', content: prompt }],
396
- temperature: 0.1,
397
- }
398
- );
399
- const json = JSON.parse(raw);
400
- return json?.choices?.[0]?.message?.content ?? null;
401
- }
402
-
403
- if (provider === 'gemini') {
404
- const raw = await httpsPost(
405
- 'generativelanguage.googleapis.com',
406
- `/v1beta/models/gemini-1.5-flash:generateContent?key=${apiKey}`,
407
- { 'Content-Type': 'application/json' },
408
- {
409
- contents: [{ parts: [{ text: prompt }] }],
410
- generationConfig: { maxOutputTokens: 512, temperature: 0.1 },
411
- }
412
- );
413
- const json = JSON.parse(raw);
414
- return json?.candidates?.[0]?.content?.parts?.[0]?.text ?? null;
415
- }
416
- } catch (e) {
417
- return null; // Network/parse failure — caller falls back to manual mode
418
- }
419
- return null;
420
- }
421
-
422
- // ── Commands ──────────────────────────────────────────────────────────────────
423
- async function cmdDigest(args) {
424
- const dryRun = args.includes('--dry-run');
425
- const diffMode = args.includes('--head') ? 'head' : 'staged';
426
-
427
- console.log(`\n${BOLD}${CYAN}━━━ Skill Evolution — Digest Cycle ━━━━━━━━━━━━━━━━${RESET}`);
428
- if (dryRun) console.log(` ${YELLOW}DRY RUN — no files will be written${RESET}\n`);
429
-
430
- console.log(` ${DIM}[1/5] Fetching git diff (${diffMode})...${RESET}`);
431
- const rawDiff = getGitDiff(diffMode);
432
- if (!rawDiff.trim()) {
433
- console.log(` ${YELLOW}⚠ No diff found. Commit or stage changes first.${RESET}`);
434
- console.log(` ${DIM}Tip: Use --head to diff against the last commit.${RESET}\n`);
435
- return;
436
- }
437
-
438
- const rawTokens = countTokensEstimate(rawDiff);
439
- console.log(` ${DIM} Raw diff: ~${rawTokens} tokens (${rawDiff.length} chars)${RESET}`);
440
-
441
- console.log(` ${DIM}[2/5] Extracting architectural delta (Semantic Filter)...${RESET}`);
442
- const delta = semanticDelta(rawDiff, 2);
443
- if (!delta.trim()) {
444
- console.log(` ${GREEN}✔ Delta is 100% trivial (whitespace/comments/imports only).${RESET}`);
445
- console.log(` ${DIM} No LLM call needed. Zero tokens consumed.${RESET}\n`);
446
- return;
447
- }
448
-
449
- const deltaTokens = countTokensEstimate(delta);
450
- const savedTokens = rawTokens - deltaTokens;
451
- const savedPct = Math.floor((savedTokens / Math.max(rawTokens, 1)) * 100);
452
- console.log(` ${GREEN}✔ Filtered to ~${deltaTokens} tokens (${savedPct}% reduction, saved ~${savedTokens} tokens)${RESET}`);
453
-
454
- console.log(`\n ${BOLD}Architectural Delta Preview:${RESET}`);
455
- const previewLines = delta.split('\n').slice(0, 20);
456
- for (const line of previewLines) {
457
- if (line.startsWith('+')) console.log(` ${GREEN}${line}${RESET}`);
458
- else if (line.startsWith('-')) console.log(` ${RED}${line}${RESET}`);
459
- else if (line.startsWith('@@')) console.log(` ${BLUE}${line}${RESET}`);
460
- else console.log(` ${DIM}${line}${RESET}`);
461
- }
462
- if (delta.split('\n').length > 20) console.log(` ${DIM}... (${delta.split('\n').length - 20} more lines)${RESET}`);
463
-
464
- if (dryRun) {
465
- console.log(`\n ${YELLOW}[DRY RUN] Would send ${deltaTokens} tokens to LLM for reflection.${RESET}`);
466
- console.log(` ${DIM}Run without --dry-run to complete the digest.${RESET}\n`);
467
- return;
468
- }
469
-
470
- // GENERATE: Auto-LLM call. Tries API first, falls back to manual paste if no key.
471
- const reflectionPrompt = generateReflectionPrompt(delta);
472
- let llmResponse = '';
473
-
474
- let llmCreds = detectLlmProvider();
475
- if (llmCreds) {
476
- console.log(` ${DIM}[3/5] LLM Reflection — auto-calling ${llmCreds.provider} API...${RESET}`);
477
- const autoResponse = await callLlmApi(reflectionPrompt, llmCreds.provider, llmCreds.key);
478
- if (autoResponse) {
479
- llmResponse = autoResponse;
480
- console.log(` ${GREEN}✔ Auto-response received (${llmCreds.provider}) — ${llmResponse.split('\n').length} lines${RESET}`);
481
- } else {
482
- console.log(` ${YELLOW}⚠ API call failed — falling back to manual mode${RESET}`);
483
- llmCreds = null; // triggers manual fallback below
484
- }
485
- }
486
-
487
- if (!llmCreds || !llmResponse) {
488
- // Manual fallback: copy-paste mode (no API key configured)
489
- console.log(`\n ${DIM}[3/5] LLM Reflection — copy the prompt below and paste the response${RESET}`);
490
- console.log(` ${DIM} Tip: Set ANTHROPIC_API_KEY, OPENAI_API_KEY, or GEMINI_API_KEY to automate this step.${RESET}`);
491
- console.log(`\n ${BOLD}${'─'.repeat(60)}${RESET}`);
492
- console.log(reflectionPrompt);
493
- console.log(` ${BOLD}${'─'.repeat(60)}${RESET}`);
494
- console.log(`\n ${BOLD}Paste LLM response below (type END_RESPONSE when done):${RESET}`);
495
-
496
- const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
497
- const responseLines = [];
498
- await new Promise(resolve => {
499
- const listener = (line) => {
500
- if (line.trim() === 'END_RESPONSE') { rl.removeListener('line', listener); resolve(); }
501
- else responseLines.push(line);
502
- };
503
- rl.on('line', listener);
504
- });
505
- rl.close();
506
- llmResponse = responseLines.join('\n');
507
- }
508
-
509
- console.log(`\n ${DIM}[4/5] Parsing idioms...${RESET}`);
510
- const newIdioms = parseLlmYamlResponse(llmResponse);
511
- if (!newIdioms.length) {
512
- console.log(` ${YELLOW}⚠ No idioms extracted from LLM response.${RESET}`);
513
- console.log(` ${DIM} The LLM may have returned idioms: [] — no architectural pattern detected.${RESET}\n`);
514
- return;
515
- }
516
-
517
- console.log(` ${GREEN}✔ Extracted ${newIdioms.length} idiom(s)${RESET}`);
518
- for (const idiom of newIdioms) {
519
- console.log(` ${CYAN}• ${idiom.pattern || '?'}${RESET} — ${idiom.reason || ''}`);
520
- }
521
-
522
- console.log(`\n ${DIM}[5/5] Merging into project-idioms/SKILL.md...${RESET}`);
523
- const existing = loadExistingIdioms();
524
- const log = loadLog();
525
- let nextId = nextIdiomId(existing);
526
- const today = new Date().toISOString().slice(0, 10);
527
- const merged = [...existing];
528
- let added = 0;
529
-
530
- for (const idiom of newIdioms) {
531
- // FIX: Use Levenshtein normalised similarity (threshold 0.80) instead of
532
- // substring .includes() which was over-aggressive and blocked valid idioms.
533
- if (isDuplicateIdiom(idiom.pattern || '', existing)) {
534
- console.log(` ${DIM} Skipped near-duplicate: ${idiom.pattern}${RESET}`);
535
- continue;
536
- }
537
- merged.push({
538
- id: nextId, pattern: idiom.pattern || '?',
539
- reason: idiom.reason || 'No reason provided.',
540
- domain: idiom.domain || 'general', since: today,
541
- });
542
- nextId++;
543
- added++;
544
- }
545
-
546
- if (added === 0) {
547
- console.log(` ${YELLOW}⚠ All extracted idioms were duplicates. SKILL.md unchanged.${RESET}\n`);
548
- return;
549
- }
550
-
551
- log.total_idioms = merged.length;
552
- const skillMd = renderSkillMd(merged, (log.cycles || []).length + 1);
553
- fs.mkdirSync(SKILL_DIR, { recursive: true });
554
- fs.writeFileSync(SKILL_FILE, skillMd, 'utf8');
555
-
556
- log.cycles = log.cycles || [];
557
- log.cycles.push({
558
- timestamp: new Date().toISOString().slice(0, 19),
559
- raw_tokens: rawTokens, delta_tokens: deltaTokens,
560
- tokens_saved: savedTokens, idioms_added: added,
561
- });
562
- log.total_tokens_saved = (log.total_tokens_saved || 0) + savedTokens;
563
- saveLog(log);
564
-
565
- console.log(`\n ${GREEN}✔ ${added} new idiom(s) added to SKILL.md${RESET}`);
566
- console.log(` ${DIM} File: ${SKILL_FILE}${RESET}`);
567
- console.log(` ${DIM} Total idioms: ${merged.length}${RESET}`);
568
- console.log(` ${DIM} Lifetime tokens saved: ${log.total_tokens_saved}${RESET}\n`);
569
- console.log(` ${CYAN}Commit SKILL.md to share your Engineering Culture with the team.${RESET}\n`);
570
- }
571
-
572
- function cmdShow() {
573
- if (!fs.existsSync(SKILL_FILE)) { console.log(`${YELLOW}No project-idioms skill found. Run 'digest' first.${RESET}`); return; }
574
- console.log(fs.readFileSync(SKILL_FILE, 'utf8'));
575
- }
576
-
577
- function cmdReset() {
578
- if (fs.existsSync(SKILL_FILE)) { fs.unlinkSync(SKILL_FILE); console.log(`${GREEN}✔ project-idioms/SKILL.md deleted.${RESET}`); }
579
- if (fs.existsSync(LOG_FILE)) { fs.unlinkSync(LOG_FILE); console.log(`${GREEN}✔ Digest log cleared.${RESET}`); }
580
- console.log(`${DIM}Run 'digest' to start a fresh evolution cycle.${RESET}`);
581
- }
582
-
583
- function cmdStatus() {
584
- const log = loadLog();
585
- const cycles = log.cycles || [];
586
- const totalSaved = log.total_tokens_saved || 0;
587
- const totalIdioms = log.total_idioms || 0;
588
- const idiomsExist = fs.existsSync(SKILL_FILE);
589
-
590
- console.log(`\n${BOLD}${CYAN}━━━ Skill Evolution Status ━━━━━━━━━━━━━━━━━━━━━━━━${RESET}`);
591
- console.log(` Digest cycles : ${BOLD}${cycles.length}${RESET}`);
592
- console.log(` Total idioms : ${BOLD}${totalIdioms}${RESET}`);
593
- console.log(` Tokens saved : ${GREEN}${totalSaved.toLocaleString()} tokens${RESET} (≈ $${(totalSaved / 1_000_000 * 3).toFixed(4)} at $3/M)`);
594
- console.log(` SKILL.md exists : ${idiomsExist ? '✔' : '✗'}`);
595
-
596
- if (cycles.length) {
597
- console.log(`\n ${BOLD}Last 5 digest cycles:${RESET}`);
598
- for (const cycle of cycles.slice(-5).reverse()) {
599
- const ts = (cycle.timestamp || '?').slice(0, 16);
600
- const deltaT = cycle.delta_tokens || 0;
601
- const saved = cycle.tokens_saved || 0;
602
- const addedCount = cycle.idioms_added || 0;
603
- const pct = Math.floor((saved / Math.max(cycle.raw_tokens || 1, 1)) * 100);
604
- console.log(` ${DIM}${ts}${RESET} delta=${deltaT}tok saved=${saved}tok (${pct}%) idioms+=${addedCount}`);
605
- }
606
- }
607
- console.log(`${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n`);
608
- }
609
-
610
- // ── Main ──────────────────────────────────────────────────────────────────────
611
- const COMMANDS = { digest: cmdDigest, show: cmdShow, reset: cmdReset, status: cmdStatus };
612
-
613
- async function main() {
614
- const argv = process.argv.slice(2);
615
- if (!argv.length || ['-h', '--help', 'help'].includes(argv[0])) {
616
- console.log(`
617
- ${BOLD}skill_evolution.js${RESET} — Tribunal Skill Evolution Forge
618
-
619
- ${BOLD}Commands:${RESET}
620
- digest [--dry-run] [--head] Analyze latest git diff and evolve SKILL.md
621
- --dry-run : preview without writing
622
- --head : diff last commit instead of staged
623
- show Print current project-idioms/SKILL.md
624
- status Show digest history and token savings
625
- reset Clear all idioms and start fresh
626
-
627
- ${BOLD}Token Budget:${RESET}
628
- Raw diff -> Semantic Filter -> Only architectural lines -> LLM
629
- Typical savings: 70–90% of tokens. Most trivial commits = 0 tokens.
630
- `);
631
- return;
632
- }
633
-
634
- const cmd = argv[0];
635
- const rest = argv.slice(1);
636
- if (!COMMANDS[cmd]) { console.log(`${RED}✖ Unknown command: '${cmd}'${RESET}`); process.exit(1); }
637
- await COMMANDS[cmd](rest);
638
- }
639
-
640
- module.exports = { semanticDelta, architecturalWeight, parseLlmYamlResponse, loadExistingIdioms };
641
-
642
- if (require.main === module) {
643
- main().catch(err => { console.error(err); process.exit(1); });
644
- }
1
+ #!/usr/bin/env node
2
+ /**
3
+ * skill_evolution.js — Tribunal Kit Skill Evolution Forge
4
+ * =========================================================
5
+ * Analyzes the delta between what the AI proposed and what the developer
6
+ * actually committed, then distills those decisions into evolving
7
+ * project-specific SKILL idioms — WITHOUT sending full files to any LLM.
8
+ *
9
+ * Core Strategy: Semantic Delta Extraction
10
+ * 1. Read the raw git diff of staged/recent changes
11
+ * 2. Strip trivial noise (whitespace, comments, import renames)
12
+ * 3. Score remaining lines for "Architectural Weight"
13
+ * 4. Only high-weight deltas reach the LLM reflection prompt
14
+ * 5. LLM returns structured YAML idiom entries (not prose)
15
+ * 6. Idioms are merged into .agent/skills/project-idioms/SKILL.md
16
+ *
17
+ * Usage:
18
+ * node .agent/scripts/skill_evolution.js digest
19
+ * node .agent/scripts/skill_evolution.js digest --dry-run
20
+ * node .agent/scripts/skill_evolution.js show
21
+ * node .agent/scripts/skill_evolution.js reset
22
+ * node .agent/scripts/skill_evolution.js status
23
+ */
24
+
25
+ 'use strict';
26
+
27
+ const fs = require('fs');
28
+ const path = require('path');
29
+ const https = require('https');
30
+ const { execSync } = require('child_process');
31
+ const readline = require('readline');
32
+
33
+ // ── Colours ──────────────────────────────────────────────────────────────────
34
+ const GREEN = '\x1b[92m';
35
+ const YELLOW = '\x1b[93m';
36
+ const CYAN = '\x1b[96m';
37
+ const RED = '\x1b[91m';
38
+ const BLUE = '\x1b[94m';
39
+ const BOLD = '\x1b[1m';
40
+ const DIM = '\x1b[2m';
41
+ const RESET = '\x1b[0m';
42
+
43
+ // ── Find .agent directory ─────────────────────────────────────────────────────
44
+ function findAgentDir() {
45
+ let current = path.resolve(process.cwd());
46
+ const root = path.parse(current).root;
47
+ while (current !== root) {
48
+ const candidate = path.join(current, '.agent');
49
+ if (fs.existsSync(candidate) && fs.statSync(candidate).isDirectory()) return candidate;
50
+ current = path.dirname(current);
51
+ }
52
+ console.error(`${RED}✖ Error: '.agent' directory not found. Please run 'npx tribunal-kit init' first.${RESET}`);
53
+ process.exit(1);
54
+ }
55
+
56
+ const AGENT_DIR = findAgentDir();
57
+ const SKILL_DIR = path.join(AGENT_DIR, 'skills', 'project-idioms');
58
+ const SKILL_FILE = path.join(SKILL_DIR, 'SKILL.md');
59
+ const HISTORY_DIR = path.join(AGENT_DIR, 'history', 'skill-evolution');
60
+ const LOG_FILE = path.join(HISTORY_DIR, 'digest-log.json');
61
+
62
+ // ── Architectural Weight Patterns ────────────────────────────────────────────
63
+ const HIGH_WEIGHT_PATTERNS = [
64
+ /\bclass\b/, /\binterface\b/, /\btype\s+\w+\s*=/, /\bextends\b/, /\bimplements\b/,
65
+ /\bthrow\b/, /\bcatch\b/, /\btry\b/,
66
+ /\bprisma\.\w+\(/, /\bsupabase\./, /\bfetch\(/, /\baxios\./,
67
+ /\bReturnType\b/, /\bPromise</, /\basync\s+function/, /\bawait\b/,
68
+ /\bexport\s+(default\s+)?(class|function|const)/, /\bmodule\.exports\b/,
69
+ /\bRouter\b|\bapp\.(get|post|put|delete|patch)\(/,
70
+ /\buse[A-Z]\w+\(/, /\bcreateContext\(/,
71
+ /\bz\.object\(/, /\bPrisma\b|\bdrizzle\b/,
72
+ /\benv\.\w+/, /\bprocess\.env\./,
73
+ ];
74
+
75
+ const NOISE_PATTERNS = [
76
+ /^\s*$/, /^\s*(\/\/|#|\/\*).*$/, /^\s*\*/,
77
+ /^\s*import\s+\{[^}]+\}\s+from\s+['"](?!\.)/, /^\s*(console\.(log|warn|error)|print\()/,
78
+ /^\s*\w+\s*[:,]?\s*$/,
79
+ ];
80
+
81
+ function architecturalWeight(line) {
82
+ const code = line.replace(/^[+-]/, '').trim();
83
+ for (const p of NOISE_PATTERNS) { if (p.test(code)) return 0; }
84
+ for (const p of HIGH_WEIGHT_PATTERNS) { if (p.test(code)) return 2; }
85
+ return 1;
86
+ }
87
+
88
+ // ── Levenshtein Semantic Deduplication ───────────────────────────────────────
89
+ // FIX: Replaces over-aggressive substring matching (.includes) with normalised
90
+ // edit-distance similarity. A new idiom must differ by >= 20% from all existing
91
+ // ones to be accepted. Threshold 0.80 = 80% similar → considered a duplicate.
92
+
93
+ function levenshtein(a, b) {
94
+ const m = a.length, n = b.length;
95
+ // Allocate DP table with base cases pre-filled
96
+ const dp = Array.from({ length: m + 1 }, (_, i) =>
97
+ Array.from({ length: n + 1 }, (_, j) => i === 0 ? j : j === 0 ? i : 0)
98
+ );
99
+ for (let i = 1; i <= m; i++) {
100
+ for (let j = 1; j <= n; j++) {
101
+ dp[i][j] = a[i - 1] === b[j - 1]
102
+ ? dp[i - 1][j - 1]
103
+ : 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]);
104
+ }
105
+ }
106
+ return dp[m][n];
107
+ }
108
+
109
+ function normalizedSimilarity(a, b) {
110
+ if (!a.length && !b.length) return 1.0;
111
+ const maxLen = Math.max(a.length, b.length);
112
+ if (maxLen === 0) return 1.0;
113
+ return 1 - levenshtein(a, b) / maxLen;
114
+ }
115
+
116
+ /**
117
+ * Returns true if newPattern is semantically similar to any existing idiom.
118
+ * Uses Levenshtein normalised similarity with a configurable threshold.
119
+ * @param {string} newPattern
120
+ * @param {object[]} existingIdioms - array of { pattern } objects
121
+ * @param {number} threshold - 0.0–1.0 (default 0.80 = 80% similar = duplicate)
122
+ */
123
+ function isDuplicateIdiom(newPattern, existingIdioms, threshold = 0.80) {
124
+ const newLow = newPattern.toLowerCase();
125
+ return existingIdioms.some(ex => {
126
+ const exLow = (ex.pattern || '').toLowerCase();
127
+ return normalizedSimilarity(newLow, exLow) >= threshold;
128
+ });
129
+ }
130
+
131
+ function semanticDelta(diffText, minWeight = 2) {
132
+ const lines = diffText.split('\n');
133
+ const kept = [];
134
+ let currentHunkHasHigh = false;
135
+ let hunkLines = [];
136
+
137
+ for (const line of lines) {
138
+ if (line.startsWith('---') || line.startsWith('+++') || line.startsWith('diff --git')) {
139
+ kept.push(line); continue;
140
+ }
141
+ if (line.startsWith('@@')) {
142
+ if (currentHunkHasHigh) kept.push(...hunkLines);
143
+ currentHunkHasHigh = false;
144
+ hunkLines = [line]; continue;
145
+ }
146
+ if (line.startsWith('+') || line.startsWith('-')) {
147
+ const w = architecturalWeight(line);
148
+ hunkLines.push(line);
149
+ if (w >= minWeight) currentHunkHasHigh = true;
150
+ } else {
151
+ hunkLines.push(line);
152
+ }
153
+ }
154
+ if (currentHunkHasHigh) kept.push(...hunkLines);
155
+
156
+ let result = kept.join('\n');
157
+ result = result.replace(/\n( ?\n){3,}/g, '\n\n');
158
+ return result.trim();
159
+ }
160
+
161
+ // ── Git helpers ────────────────────────────────────────────────────────────────
162
+ function getGitDiff(mode = 'staged') {
163
+ try {
164
+ let cmd;
165
+ if (mode === 'staged') cmd = 'git diff --cached --unified=3';
166
+ else if (mode === 'head') cmd = 'git diff HEAD~1 HEAD --unified=3';
167
+ else cmd = 'git diff --unified=3';
168
+ return execSync(cmd, { encoding: 'utf8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] });
169
+ } catch { return ''; }
170
+ }
171
+
172
+ function countTokensEstimate(text) {
173
+ return Math.max(1, Math.floor(text.length / 4));
174
+ }
175
+
176
+ // ── Idiom management ──────────────────────────────────────────────────────────
177
+ function loadExistingIdioms() {
178
+ if (!fs.existsSync(SKILL_FILE)) return [];
179
+ const content = fs.readFileSync(SKILL_FILE, 'utf8');
180
+ const idioms = [];
181
+ const pattern = /\|\s*(\d+)\s*\|\s*`([^`]+)`\s*\|\s*([^|]+)\|\s*([^|]+)\|\s*([^|]+)\|/g;
182
+ let m;
183
+ while ((m = pattern.exec(content)) !== null) {
184
+ idioms.push({
185
+ id: parseInt(m[1], 10), pattern: m[2].trim(),
186
+ reason: m[3].trim(), domain: m[4].trim(), since: m[5].trim(),
187
+ });
188
+ }
189
+ return idioms;
190
+ }
191
+
192
+ function nextIdiomId(idioms) {
193
+ if (!idioms.length) return 1;
194
+ return Math.max(...idioms.map(i => i.id)) + 1;
195
+ }
196
+
197
+ function renderSkillMd(idioms, digestCount) {
198
+ const now = new Date().toISOString().slice(0, 10);
199
+ const rows = idioms.map(i =>
200
+ `| ${i.id} | \`${i.pattern}\` | ${i.reason} | ${i.domain} | ${i.since} |`
201
+ );
202
+ const table = rows.length ? rows.join('\n') : '_No idioms recorded yet._';
203
+
204
+ return `---
205
+ name: project-idioms
206
+ description: >
207
+ Auto-evolved skill containing project-specific architectural idioms.
208
+ Generated by skill_evolution.js — do not edit manually. Commit this
209
+ file to share your Engineering Culture across the team.
210
+ version: auto
211
+ last-updated: ${now}
212
+ digest-cycles: ${digestCount}
213
+ pattern: generator
214
+ ---
215
+
216
+ # Project Idioms — Auto-Evolved Skill
217
+
218
+ > **Authority Level: ABSOLUTE**
219
+ > These idioms were extracted from the developer's own code decisions.
220
+ > They override generic agent defaults. Every agent MUST respect them.
221
+
222
+ ---
223
+
224
+ ## How Idioms Are Born
225
+
226
+ 1. Developer commits code that differs from the AI proposal.
227
+ 2. \`skill_evolution.js digest\` extracts architectural deltas only.
228
+ 3. A minimal LLM reflection prompt (< 500 tokens) identifies the "WHY."
229
+ 4. The idiom is recorded here with a stable pattern + reason pair.
230
+
231
+ ---
232
+
233
+ ## Recorded Idioms
234
+
235
+ | ID | Pattern | Why This Project Uses It | Domain | Since |
236
+ |:---|:--------|:-------------------------|:-------|:------|
237
+ ${table}
238
+
239
+ ---
240
+
241
+ ## Enforcement Rules for All Agents
242
+
243
+ \`\`\`
244
+ □ Before proposing code: scan this skill's idiom table
245
+ □ If your proposal contradicts an idiom → flag it explicitly
246
+ □ Never override an idiom silently — always ask the developer first
247
+ □ When citing an idiom: "Per Project Idiom #N: [pattern] — [reason]"
248
+ \`\`\`
249
+
250
+ ---
251
+
252
+ ## Digest History
253
+
254
+ Last digest: \`${now}\`
255
+ Total cycles: \`${digestCount}\`
256
+
257
+ Run \`node .agent/scripts/skill_evolution.js status\` to see the full log.
258
+ `;
259
+ }
260
+
261
+ function generateReflectionPrompt(delta) {
262
+ return `You are analyzing a code delta from a developer who changed an AI-proposed solution.
263
+ Your only job: identify the ARCHITECTURAL IDIOM this change reveals about their project.
264
+
265
+ Rules:
266
+ - Return ONLY a YAML list of idioms. No prose. No explanation outside YAML.
267
+ - Each idiom: pattern (code signature), reason (1 sentence WHY), domain (backend/frontend/database/general)
268
+ - Ignore whitespace, comment, import changes — only architectural choices
269
+ - If no meaningful idiom can be extracted, return: "idioms: []"
270
+ - Maximum 3 idioms per delta.
271
+
272
+ Delta:
273
+ \`\`\`
274
+ ${delta.slice(0, 1500)}
275
+ \`\`\`
276
+
277
+ Output format (YAML only):
278
+ idioms:
279
+ - pattern: "<code pattern or convention>"
280
+ reason: "<why this project uses this pattern>"
281
+ domain: "<backend|frontend|database|security|performance|general>"
282
+ `;
283
+ }
284
+
285
+ function parseLlmYamlResponse(response) {
286
+ const idioms = [];
287
+ let inIdioms = false;
288
+ let current = {};
289
+
290
+ for (const line of response.split('\n')) {
291
+ const stripped = line.trim();
292
+ if (stripped === 'idioms:') { inIdioms = true; continue; }
293
+ if (!inIdioms) continue;
294
+ if (stripped.startsWith('- pattern:')) {
295
+ if (current.pattern) idioms.push(current);
296
+ current = { pattern: stripped.split(':', 2)[1].trim().replace(/^"|"$/g, '') };
297
+ } else if (stripped.startsWith('reason:') && current.pattern) {
298
+ current.reason = stripped.split(':', 2)[1].trim().replace(/^"|"$/g, '');
299
+ } else if (stripped.startsWith('domain:') && current.pattern) {
300
+ current.domain = stripped.split(':', 2)[1].trim().replace(/^"|"$/g, '');
301
+ }
302
+ }
303
+ if (current.pattern) idioms.push(current);
304
+ return idioms;
305
+ }
306
+
307
+ // ── Log helpers ────────────────────────────────────────────────────────────────
308
+ function loadLog() {
309
+ fs.mkdirSync(HISTORY_DIR, { recursive: true });
310
+ if (fs.existsSync(LOG_FILE)) {
311
+ try { return JSON.parse(fs.readFileSync(LOG_FILE, 'utf8')); } catch { /* fallthrough */ }
312
+ }
313
+ return { cycles: [], total_tokens_saved: 0, total_idioms: 0 };
314
+ }
315
+
316
+ function saveLog(log) {
317
+ fs.mkdirSync(HISTORY_DIR, { recursive: true });
318
+ fs.writeFileSync(LOG_FILE, JSON.stringify(log, null, 2), 'utf8');
319
+ }
320
+
321
+ // ── Auto-LLM API Integration ─────────────────────────────────────────────────
322
+ // GENERATE: Eliminates the manual copy-paste loop by auto-detecting an API key
323
+ // and calling the LLM directly. Falls back to manual mode if no key is found.
324
+ // Supported providers (checked in order): Anthropic → OpenAI → Gemini.
325
+
326
+ function detectLlmProvider() {
327
+ if (process.env.ANTHROPIC_API_KEY) return { provider: 'anthropic', key: process.env.ANTHROPIC_API_KEY };
328
+ if (process.env.OPENAI_API_KEY) return { provider: 'openai', key: process.env.OPENAI_API_KEY };
329
+ if (process.env.GEMINI_API_KEY) return { provider: 'gemini', key: process.env.GEMINI_API_KEY };
330
+ return null;
331
+ }
332
+
333
+ /**
334
+ * Call an LLM API with the reflection prompt. Returns the raw text response.
335
+ * Uses only built-in Node.js `https` — zero external dependencies.
336
+ *
337
+ * @param {string} prompt - The reflection prompt to send
338
+ * @param {string} provider - 'anthropic' | 'openai' | 'gemini'
339
+ * @param {string} apiKey - The API key
340
+ * @returns {Promise<string|null>} LLM response text or null on failure
341
+ */
342
+ async function callLlmApi(prompt, provider, apiKey) {
343
+ const timeout = 30000; // 30s max — skill evolution is non-blocking
344
+
345
+ function httpsPost(hostname, path, headers, body) {
346
+ return new Promise((resolve, reject) => {
347
+ const data = JSON.stringify(body);
348
+ const req = https.request(
349
+ { method: 'POST', hostname, path, headers: { ...headers, 'Content-Length': Buffer.byteLength(data) } },
350
+ (res) => {
351
+ let raw = '';
352
+ res.on('data', c => { raw += c; });
353
+ res.on('end', () => resolve(raw));
354
+ res.on('error', reject);
355
+ }
356
+ );
357
+ req.on('error', reject);
358
+ req.setTimeout(timeout, () => { req.destroy(new Error('LLM API timeout')); });
359
+ req.write(data);
360
+ req.end();
361
+ });
362
+ }
363
+
364
+ try {
365
+ if (provider === 'anthropic') {
366
+ const raw = await httpsPost(
367
+ 'api.anthropic.com',
368
+ '/v1/messages',
369
+ {
370
+ 'Content-Type': 'application/json',
371
+ 'x-api-key': apiKey,
372
+ 'anthropic-version': '2023-06-01',
373
+ },
374
+ {
375
+ model: 'claude-3-haiku-20240307', // Fastest/cheapest — idiom extraction
376
+ max_tokens: 512,
377
+ messages: [{ role: 'user', content: prompt }],
378
+ }
379
+ );
380
+ const json = JSON.parse(raw);
381
+ return json?.content?.[0]?.text ?? null;
382
+ }
383
+
384
+ if (provider === 'openai') {
385
+ const raw = await httpsPost(
386
+ 'api.openai.com',
387
+ '/v1/chat/completions',
388
+ {
389
+ 'Content-Type': 'application/json',
390
+ 'Authorization': `Bearer ${apiKey}`,
391
+ },
392
+ {
393
+ model: 'gpt-4o-mini', // Cheapest capable model for YAML extraction
394
+ max_tokens: 512,
395
+ messages: [{ role: 'user', content: prompt }],
396
+ temperature: 0.1,
397
+ }
398
+ );
399
+ const json = JSON.parse(raw);
400
+ return json?.choices?.[0]?.message?.content ?? null;
401
+ }
402
+
403
+ if (provider === 'gemini') {
404
+ const raw = await httpsPost(
405
+ 'generativelanguage.googleapis.com',
406
+ `/v1beta/models/gemini-1.5-flash:generateContent?key=${apiKey}`,
407
+ { 'Content-Type': 'application/json' },
408
+ {
409
+ contents: [{ parts: [{ text: prompt }] }],
410
+ generationConfig: { maxOutputTokens: 512, temperature: 0.1 },
411
+ }
412
+ );
413
+ const json = JSON.parse(raw);
414
+ return json?.candidates?.[0]?.content?.parts?.[0]?.text ?? null;
415
+ }
416
+ } catch {
417
+ return null; // Network/parse failure — caller falls back to manual mode
418
+ }
419
+ return null;
420
+ }
421
+
422
+ // ── Commands ──────────────────────────────────────────────────────────────────
423
+ async function cmdDigest(args) {
424
+ const dryRun = args.includes('--dry-run');
425
+ const diffMode = args.includes('--head') ? 'head' : 'staged';
426
+
427
+ console.log(`\n${BOLD}${CYAN}━━━ Skill Evolution — Digest Cycle ━━━━━━━━━━━━━━━━${RESET}`);
428
+ if (dryRun) console.log(` ${YELLOW}DRY RUN — no files will be written${RESET}\n`);
429
+
430
+ console.log(` ${DIM}[1/5] Fetching git diff (${diffMode})...${RESET}`);
431
+ const rawDiff = getGitDiff(diffMode);
432
+ if (!rawDiff.trim()) {
433
+ console.log(` ${YELLOW}⚠ No diff found. Commit or stage changes first.${RESET}`);
434
+ console.log(` ${DIM}Tip: Use --head to diff against the last commit.${RESET}\n`);
435
+ return;
436
+ }
437
+
438
+ const rawTokens = countTokensEstimate(rawDiff);
439
+ console.log(` ${DIM} Raw diff: ~${rawTokens} tokens (${rawDiff.length} chars)${RESET}`);
440
+
441
+ console.log(` ${DIM}[2/5] Extracting architectural delta (Semantic Filter)...${RESET}`);
442
+ const delta = semanticDelta(rawDiff, 2);
443
+ if (!delta.trim()) {
444
+ console.log(` ${GREEN}✔ Delta is 100% trivial (whitespace/comments/imports only).${RESET}`);
445
+ console.log(` ${DIM} No LLM call needed. Zero tokens consumed.${RESET}\n`);
446
+ return;
447
+ }
448
+
449
+ const deltaTokens = countTokensEstimate(delta);
450
+ const savedTokens = rawTokens - deltaTokens;
451
+ const savedPct = Math.floor((savedTokens / Math.max(rawTokens, 1)) * 100);
452
+ console.log(` ${GREEN}✔ Filtered to ~${deltaTokens} tokens (${savedPct}% reduction, saved ~${savedTokens} tokens)${RESET}`);
453
+
454
+ console.log(`\n ${BOLD}Architectural Delta Preview:${RESET}`);
455
+ const previewLines = delta.split('\n').slice(0, 20);
456
+ for (const line of previewLines) {
457
+ if (line.startsWith('+')) console.log(` ${GREEN}${line}${RESET}`);
458
+ else if (line.startsWith('-')) console.log(` ${RED}${line}${RESET}`);
459
+ else if (line.startsWith('@@')) console.log(` ${BLUE}${line}${RESET}`);
460
+ else console.log(` ${DIM}${line}${RESET}`);
461
+ }
462
+ if (delta.split('\n').length > 20) console.log(` ${DIM}... (${delta.split('\n').length - 20} more lines)${RESET}`);
463
+
464
+ if (dryRun) {
465
+ console.log(`\n ${YELLOW}[DRY RUN] Would send ${deltaTokens} tokens to LLM for reflection.${RESET}`);
466
+ console.log(` ${DIM}Run without --dry-run to complete the digest.${RESET}\n`);
467
+ return;
468
+ }
469
+
470
+ // GENERATE: Auto-LLM call. Tries API first, falls back to manual paste if no key.
471
+ const reflectionPrompt = generateReflectionPrompt(delta);
472
+ let llmResponse = '';
473
+
474
+ let llmCreds = detectLlmProvider();
475
+ if (llmCreds) {
476
+ console.log(` ${DIM}[3/5] LLM Reflection — auto-calling ${llmCreds.provider} API...${RESET}`);
477
+ const autoResponse = await callLlmApi(reflectionPrompt, llmCreds.provider, llmCreds.key);
478
+ if (autoResponse) {
479
+ llmResponse = autoResponse;
480
+ console.log(` ${GREEN}✔ Auto-response received (${llmCreds.provider}) — ${llmResponse.split('\n').length} lines${RESET}`);
481
+ } else {
482
+ console.log(` ${YELLOW}⚠ API call failed — falling back to manual mode${RESET}`);
483
+ llmCreds = null; // triggers manual fallback below
484
+ }
485
+ }
486
+
487
+ if (!llmCreds || !llmResponse) {
488
+ // Manual fallback: copy-paste mode (no API key configured)
489
+ console.log(`\n ${DIM}[3/5] LLM Reflection — copy the prompt below and paste the response${RESET}`);
490
+ console.log(` ${DIM} Tip: Set ANTHROPIC_API_KEY, OPENAI_API_KEY, or GEMINI_API_KEY to automate this step.${RESET}`);
491
+ console.log(`\n ${BOLD}${'─'.repeat(60)}${RESET}`);
492
+ console.log(reflectionPrompt);
493
+ console.log(` ${BOLD}${'─'.repeat(60)}${RESET}`);
494
+ console.log(`\n ${BOLD}Paste LLM response below (type END_RESPONSE when done):${RESET}`);
495
+
496
+ const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
497
+ const responseLines = [];
498
+ await new Promise(resolve => {
499
+ const listener = (line) => {
500
+ if (line.trim() === 'END_RESPONSE') { rl.removeListener('line', listener); resolve(); }
501
+ else responseLines.push(line);
502
+ };
503
+ rl.on('line', listener);
504
+ });
505
+ rl.close();
506
+ llmResponse = responseLines.join('\n');
507
+ }
508
+
509
+ console.log(`\n ${DIM}[4/5] Parsing idioms...${RESET}`);
510
+ const newIdioms = parseLlmYamlResponse(llmResponse);
511
+ if (!newIdioms.length) {
512
+ console.log(` ${YELLOW}⚠ No idioms extracted from LLM response.${RESET}`);
513
+ console.log(` ${DIM} The LLM may have returned idioms: [] — no architectural pattern detected.${RESET}\n`);
514
+ return;
515
+ }
516
+
517
+ console.log(` ${GREEN}✔ Extracted ${newIdioms.length} idiom(s)${RESET}`);
518
+ for (const idiom of newIdioms) {
519
+ console.log(` ${CYAN}• ${idiom.pattern || '?'}${RESET} — ${idiom.reason || ''}`);
520
+ }
521
+
522
+ console.log(`\n ${DIM}[5/5] Merging into project-idioms/SKILL.md...${RESET}`);
523
+ const existing = loadExistingIdioms();
524
+ const log = loadLog();
525
+ let nextId = nextIdiomId(existing);
526
+ const today = new Date().toISOString().slice(0, 10);
527
+ const merged = [...existing];
528
+ let added = 0;
529
+
530
+ for (const idiom of newIdioms) {
531
+ // FIX: Use Levenshtein normalised similarity (threshold 0.80) instead of
532
+ // substring .includes() which was over-aggressive and blocked valid idioms.
533
+ if (isDuplicateIdiom(idiom.pattern || '', existing)) {
534
+ console.log(` ${DIM} Skipped near-duplicate: ${idiom.pattern}${RESET}`);
535
+ continue;
536
+ }
537
+ merged.push({
538
+ id: nextId, pattern: idiom.pattern || '?',
539
+ reason: idiom.reason || 'No reason provided.',
540
+ domain: idiom.domain || 'general', since: today,
541
+ });
542
+ nextId++;
543
+ added++;
544
+ }
545
+
546
+ if (added === 0) {
547
+ console.log(` ${YELLOW}⚠ All extracted idioms were duplicates. SKILL.md unchanged.${RESET}\n`);
548
+ return;
549
+ }
550
+
551
+ log.total_idioms = merged.length;
552
+ const skillMd = renderSkillMd(merged, (log.cycles || []).length + 1);
553
+ fs.mkdirSync(SKILL_DIR, { recursive: true });
554
+ fs.writeFileSync(SKILL_FILE, skillMd, 'utf8');
555
+
556
+ log.cycles = log.cycles || [];
557
+ log.cycles.push({
558
+ timestamp: new Date().toISOString().slice(0, 19),
559
+ raw_tokens: rawTokens, delta_tokens: deltaTokens,
560
+ tokens_saved: savedTokens, idioms_added: added,
561
+ });
562
+ log.total_tokens_saved = (log.total_tokens_saved || 0) + savedTokens;
563
+ saveLog(log);
564
+
565
+ console.log(`\n ${GREEN}✔ ${added} new idiom(s) added to SKILL.md${RESET}`);
566
+ console.log(` ${DIM} File: ${SKILL_FILE}${RESET}`);
567
+ console.log(` ${DIM} Total idioms: ${merged.length}${RESET}`);
568
+ console.log(` ${DIM} Lifetime tokens saved: ${log.total_tokens_saved}${RESET}\n`);
569
+ console.log(` ${CYAN}Commit SKILL.md to share your Engineering Culture with the team.${RESET}\n`);
570
+ }
571
+
572
+ function cmdShow() {
573
+ if (!fs.existsSync(SKILL_FILE)) { console.log(`${YELLOW}No project-idioms skill found. Run 'digest' first.${RESET}`); return; }
574
+ console.log(fs.readFileSync(SKILL_FILE, 'utf8'));
575
+ }
576
+
577
+ function cmdReset() {
578
+ if (fs.existsSync(SKILL_FILE)) { fs.unlinkSync(SKILL_FILE); console.log(`${GREEN}✔ project-idioms/SKILL.md deleted.${RESET}`); }
579
+ if (fs.existsSync(LOG_FILE)) { fs.unlinkSync(LOG_FILE); console.log(`${GREEN}✔ Digest log cleared.${RESET}`); }
580
+ console.log(`${DIM}Run 'digest' to start a fresh evolution cycle.${RESET}`);
581
+ }
582
+
583
+ function cmdStatus() {
584
+ const log = loadLog();
585
+ const cycles = log.cycles || [];
586
+ const totalSaved = log.total_tokens_saved || 0;
587
+ const totalIdioms = log.total_idioms || 0;
588
+ const idiomsExist = fs.existsSync(SKILL_FILE);
589
+
590
+ console.log(`\n${BOLD}${CYAN}━━━ Skill Evolution Status ━━━━━━━━━━━━━━━━━━━━━━━━${RESET}`);
591
+ console.log(` Digest cycles : ${BOLD}${cycles.length}${RESET}`);
592
+ console.log(` Total idioms : ${BOLD}${totalIdioms}${RESET}`);
593
+ console.log(` Tokens saved : ${GREEN}${totalSaved.toLocaleString()} tokens${RESET} (≈ $${(totalSaved / 1_000_000 * 3).toFixed(4)} at $3/M)`);
594
+ console.log(` SKILL.md exists : ${idiomsExist ? '✔' : '✗'}`);
595
+
596
+ if (cycles.length) {
597
+ console.log(`\n ${BOLD}Last 5 digest cycles:${RESET}`);
598
+ for (const cycle of cycles.slice(-5).reverse()) {
599
+ const ts = (cycle.timestamp || '?').slice(0, 16);
600
+ const deltaT = cycle.delta_tokens || 0;
601
+ const saved = cycle.tokens_saved || 0;
602
+ const addedCount = cycle.idioms_added || 0;
603
+ const pct = Math.floor((saved / Math.max(cycle.raw_tokens || 1, 1)) * 100);
604
+ console.log(` ${DIM}${ts}${RESET} delta=${deltaT}tok saved=${saved}tok (${pct}%) idioms+=${addedCount}`);
605
+ }
606
+ }
607
+ console.log(`${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n`);
608
+ }
609
+
610
+ // ── Main ──────────────────────────────────────────────────────────────────────
611
+ const COMMANDS = { digest: cmdDigest, show: cmdShow, reset: cmdReset, status: cmdStatus };
612
+
613
+ async function main() {
614
+ const argv = process.argv.slice(2);
615
+ if (!argv.length || ['-h', '--help', 'help'].includes(argv[0])) {
616
+ console.log(`
617
+ ${BOLD}skill_evolution.js${RESET} — Tribunal Skill Evolution Forge
618
+
619
+ ${BOLD}Commands:${RESET}
620
+ digest [--dry-run] [--head] Analyze latest git diff and evolve SKILL.md
621
+ --dry-run : preview without writing
622
+ --head : diff last commit instead of staged
623
+ show Print current project-idioms/SKILL.md
624
+ status Show digest history and token savings
625
+ reset Clear all idioms and start fresh
626
+
627
+ ${BOLD}Token Budget:${RESET}
628
+ Raw diff -> Semantic Filter -> Only architectural lines -> LLM
629
+ Typical savings: 70–90% of tokens. Most trivial commits = 0 tokens.
630
+ `);
631
+ return;
632
+ }
633
+
634
+ const cmd = argv[0];
635
+ const rest = argv.slice(1);
636
+ if (!COMMANDS[cmd]) { console.log(`${RED}✖ Unknown command: '${cmd}'${RESET}`); process.exit(1); }
637
+ await COMMANDS[cmd](rest);
638
+ }
639
+
640
+ module.exports = { semanticDelta, architecturalWeight, parseLlmYamlResponse, loadExistingIdioms };
641
+
642
+ if (require.main === module) {
643
+ main().catch(err => { console.error(err); process.exit(1); });
644
+ }