tribunal-kit 4.4.0 → 4.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/.agent/agents/api-architect.md +66 -66
  2. package/.agent/agents/db-latency-auditor.md +216 -216
  3. package/.agent/agents/precedence-reviewer.md +250 -250
  4. package/.agent/agents/resilience-reviewer.md +88 -88
  5. package/.agent/agents/schema-reviewer.md +67 -67
  6. package/.agent/agents/throughput-optimizer.md +299 -299
  7. package/.agent/agents/ui-ux-auditor.md +292 -292
  8. package/.agent/agents/vitals-reviewer.md +223 -223
  9. package/.agent/history/architecture-graph.yaml +32 -1
  10. package/.agent/history/graph-cache.json +66 -19
  11. package/.agent/history/snapshots/bin__tribunal-kit.js.json +19 -0
  12. package/.agent/history/snapshots/eslint.config.js.json +9 -0
  13. package/.agent/history/snapshots/migrate_refs.js.json +3 -3
  14. package/.agent/history/snapshots/scripts__changelog.js.json +2 -1
  15. package/.agent/history/snapshots/scripts__sync-version.js.json +2 -1
  16. package/.agent/history/snapshots/scripts__validate-payload.js.json +1 -0
  17. package/.agent/history/snapshots/test__integration__bridges.test.js.json +2 -1
  18. package/.agent/history/snapshots/test__integration__init.test.js.json +1 -0
  19. package/.agent/history/snapshots/test__integration__routing.test.js.json +1 -0
  20. package/.agent/history/snapshots/test__integration__swarm_dispatcher.test.js.json +2 -1
  21. package/.agent/history/snapshots/test__integration__wave2.test.js.json +2 -1
  22. package/.agent/history/snapshots/test__unit__args.test.js.json +11 -1
  23. package/.agent/history/snapshots/test__unit__case_law_manager.test.js.json +1 -0
  24. package/.agent/history/snapshots/test__unit__context_broker.test.js.json +11 -0
  25. package/.agent/history/snapshots/test__unit__copyDir.test.js.json +11 -1
  26. package/.agent/history/snapshots/test__unit__graph_tools.test.js.json +1 -0
  27. package/.agent/history/snapshots/test__unit__inner_loop_validator.test.js.json +11 -0
  28. package/.agent/history/snapshots/test__unit__selfInstall.test.js.json +11 -1
  29. package/.agent/history/snapshots/test__unit__semver.test.js.json +11 -1
  30. package/.agent/history/snapshots/test__unit__swarm_dispatcher.test.js.json +1 -0
  31. package/.agent/scripts/_colors.js +154 -2
  32. package/.agent/scripts/_utils.js +205 -3
  33. package/.agent/scripts/append_flow.js +72 -72
  34. package/.agent/scripts/auto_preview.js +197 -197
  35. package/.agent/scripts/bundle_analyzer.js +90 -119
  36. package/.agent/scripts/case_law_manager.js +18 -13
  37. package/.agent/scripts/checklist.js +100 -88
  38. package/.agent/scripts/colors.js +7 -13
  39. package/.agent/scripts/compress_skills.js +141 -141
  40. package/.agent/scripts/consolidate_skills.js +149 -149
  41. package/.agent/scripts/context_broker.js +605 -609
  42. package/.agent/scripts/deep_compress.js +150 -150
  43. package/.agent/scripts/dependency_analyzer.js +68 -106
  44. package/.agent/scripts/graph_builder.js +341 -311
  45. package/.agent/scripts/graph_visualizer.js +390 -384
  46. package/.agent/scripts/graph_zoom.js +6 -4
  47. package/.agent/scripts/inner_loop_validator.js +445 -465
  48. package/.agent/scripts/lint_runner.js +27 -28
  49. package/.agent/scripts/minify_context.js +100 -100
  50. package/.agent/scripts/mutation_runner.js +280 -280
  51. package/.agent/scripts/patch_skills_meta.js +156 -156
  52. package/.agent/scripts/patch_skills_output.js +244 -244
  53. package/.agent/scripts/schema_validator.js +280 -297
  54. package/.agent/scripts/security_scan.js +37 -64
  55. package/.agent/scripts/session_manager.js +270 -276
  56. package/.agent/scripts/skill_evolution.js +637 -644
  57. package/.agent/scripts/skill_integrator.js +307 -313
  58. package/.agent/scripts/strengthen_skills.js +193 -193
  59. package/.agent/scripts/strip_tribunal.js +47 -47
  60. package/.agent/scripts/swarm_dispatcher.js +360 -360
  61. package/.agent/scripts/test_runner.js +32 -39
  62. package/.agent/scripts/utils.js +10 -25
  63. package/.agent/scripts/verify_all.js +84 -92
  64. package/.agent/skills/app-builder/templates/astro-static/TEMPLATE.md +1 -1
  65. package/.agent/skills/app-builder/templates/chrome-extension/TEMPLATE.md +1 -1
  66. package/.agent/skills/app-builder/templates/cli-tool/TEMPLATE.md +1 -1
  67. package/.agent/skills/app-builder/templates/electron-desktop/TEMPLATE.md +1 -1
  68. package/.agent/skills/app-builder/templates/express-api/TEMPLATE.md +1 -1
  69. package/.agent/skills/app-builder/templates/flutter-app/TEMPLATE.md +1 -1
  70. package/.agent/skills/app-builder/templates/monorepo-turborepo/TEMPLATE.md +1 -1
  71. package/.agent/skills/app-builder/templates/nextjs-fullstack/TEMPLATE.md +1 -1
  72. package/.agent/skills/app-builder/templates/nextjs-saas/TEMPLATE.md +1 -1
  73. package/.agent/skills/app-builder/templates/nextjs-static/TEMPLATE.md +1 -1
  74. package/.agent/skills/app-builder/templates/nuxt-app/TEMPLATE.md +1 -1
  75. package/.agent/skills/app-builder/templates/python-fastapi/TEMPLATE.md +1 -1
  76. package/.agent/skills/app-builder/templates/react-native-app/TEMPLATE.md +1 -1
  77. package/.agent/skills/doc.md +1 -1
  78. package/.agent/skills/knowledge-graph/SKILL.md +52 -52
  79. package/.agent/skills/ui-ux-pro-max/SKILL.md +562 -562
  80. package/.agent/workflows/generate.md +183 -183
  81. package/.agent/workflows/tribunal-speed.md +183 -183
  82. package/README.md +1 -1
  83. package/bin/tribunal-kit.js +76 -87
  84. package/package.json +6 -3
  85. package/scripts/changelog.js +167 -167
  86. package/scripts/sync-version.js +81 -81
  87. package/.agent/history/architecture-explorer.html +0 -352
  88. package/.agent/scripts/__pycache__/_colors.cpython-311.pyc +0 -0
  89. package/.agent/scripts/__pycache__/_utils.cpython-311.pyc +0 -0
  90. package/.agent/scripts/__pycache__/case_law_manager.cpython-311.pyc +0 -0
@@ -1,644 +1,637 @@
1
- #!/usr/bin/env node
2
- /**
3
- * skill_evolution.js — Tribunal Kit Skill Evolution Forge
4
- * =========================================================
5
- * Analyzes the delta between what the AI proposed and what the developer
6
- * actually committed, then distills those decisions into evolving
7
- * project-specific SKILL idioms — WITHOUT sending full files to any LLM.
8
- *
9
- * Core Strategy: Semantic Delta Extraction
10
- * 1. Read the raw git diff of staged/recent changes
11
- * 2. Strip trivial noise (whitespace, comments, import renames)
12
- * 3. Score remaining lines for "Architectural Weight"
13
- * 4. Only high-weight deltas reach the LLM reflection prompt
14
- * 5. LLM returns structured YAML idiom entries (not prose)
15
- * 6. Idioms are merged into .agent/skills/project-idioms/SKILL.md
16
- *
17
- * Usage:
18
- * node .agent/scripts/skill_evolution.js digest
19
- * node .agent/scripts/skill_evolution.js digest --dry-run
20
- * node .agent/scripts/skill_evolution.js show
21
- * node .agent/scripts/skill_evolution.js reset
22
- * node .agent/scripts/skill_evolution.js status
23
- */
24
-
25
- 'use strict';
26
-
27
- const fs = require('fs');
28
- const path = require('path');
29
- const https = require('https');
30
- const { execSync } = require('child_process');
31
- const readline = require('readline');
32
-
33
- // ── Colours ──────────────────────────────────────────────────────────────────
34
- const GREEN = '\x1b[92m';
35
- const YELLOW = '\x1b[93m';
36
- const CYAN = '\x1b[96m';
37
- const RED = '\x1b[91m';
38
- const BLUE = '\x1b[94m';
39
- const BOLD = '\x1b[1m';
40
- const DIM = '\x1b[2m';
41
- const RESET = '\x1b[0m';
42
-
43
- // ── Find .agent directory ─────────────────────────────────────────────────────
44
- function findAgentDir() {
45
- let current = path.resolve(process.cwd());
46
- const root = path.parse(current).root;
47
- while (current !== root) {
48
- const candidate = path.join(current, '.agent');
49
- if (fs.existsSync(candidate) && fs.statSync(candidate).isDirectory()) return candidate;
50
- current = path.dirname(current);
51
- }
52
- console.error(`${RED}✖ Error: '.agent' directory not found. Please run 'npx tribunal-kit init' first.${RESET}`);
53
- process.exit(1);
54
- }
55
-
56
- const AGENT_DIR = findAgentDir();
57
- const SKILL_DIR = path.join(AGENT_DIR, 'skills', 'project-idioms');
58
- const SKILL_FILE = path.join(SKILL_DIR, 'SKILL.md');
59
- const HISTORY_DIR = path.join(AGENT_DIR, 'history', 'skill-evolution');
60
- const LOG_FILE = path.join(HISTORY_DIR, 'digest-log.json');
61
-
62
- // ── Architectural Weight Patterns ────────────────────────────────────────────
63
- const HIGH_WEIGHT_PATTERNS = [
64
- /\bclass\b/, /\binterface\b/, /\btype\s+\w+\s*=/, /\bextends\b/, /\bimplements\b/,
65
- /\bthrow\b/, /\bcatch\b/, /\btry\b/,
66
- /\bprisma\.\w+\(/, /\bsupabase\./, /\bfetch\(/, /\baxios\./,
67
- /\bReturnType\b/, /\bPromise</, /\basync\s+function/, /\bawait\b/,
68
- /\bexport\s+(default\s+)?(class|function|const)/, /\bmodule\.exports\b/,
69
- /\bRouter\b|\bapp\.(get|post|put|delete|patch)\(/,
70
- /\buse[A-Z]\w+\(/, /\bcreateContext\(/,
71
- /\bz\.object\(/, /\bPrisma\b|\bdrizzle\b/,
72
- /\benv\.\w+/, /\bprocess\.env\./,
73
- ];
74
-
75
- const NOISE_PATTERNS = [
76
- /^\s*$/, /^\s*(\/\/|#|\/\*).*$/, /^\s*\*/,
77
- /^\s*import\s+\{[^}]+\}\s+from\s+['"](?!\.)/, /^\s*(console\.(log|warn|error)|print\()/,
78
- /^\s*\w+\s*[:,]?\s*$/,
79
- ];
80
-
81
- function architecturalWeight(line) {
82
- const code = line.replace(/^[+-]/, '').trim();
83
- for (const p of NOISE_PATTERNS) { if (p.test(code)) return 0; }
84
- for (const p of HIGH_WEIGHT_PATTERNS) { if (p.test(code)) return 2; }
85
- return 1;
86
- }
87
-
88
- // ── Levenshtein Semantic Deduplication ───────────────────────────────────────
89
- // FIX: Replaces over-aggressive substring matching (.includes) with normalised
90
- // edit-distance similarity. A new idiom must differ by >= 20% from all existing
91
- // ones to be accepted. Threshold 0.80 = 80% similar → considered a duplicate.
92
-
93
- function levenshtein(a, b) {
94
- const m = a.length, n = b.length;
95
- // Allocate DP table with base cases pre-filled
96
- const dp = Array.from({ length: m + 1 }, (_, i) =>
97
- Array.from({ length: n + 1 }, (_, j) => i === 0 ? j : j === 0 ? i : 0)
98
- );
99
- for (let i = 1; i <= m; i++) {
100
- for (let j = 1; j <= n; j++) {
101
- dp[i][j] = a[i - 1] === b[j - 1]
102
- ? dp[i - 1][j - 1]
103
- : 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]);
104
- }
105
- }
106
- return dp[m][n];
107
- }
108
-
109
- function normalizedSimilarity(a, b) {
110
- if (!a.length && !b.length) return 1.0;
111
- const maxLen = Math.max(a.length, b.length);
112
- if (maxLen === 0) return 1.0;
113
- return 1 - levenshtein(a, b) / maxLen;
114
- }
115
-
116
- /**
117
- * Returns true if newPattern is semantically similar to any existing idiom.
118
- * Uses Levenshtein normalised similarity with a configurable threshold.
119
- * @param {string} newPattern
120
- * @param {object[]} existingIdioms - array of { pattern } objects
121
- * @param {number} threshold - 0.0–1.0 (default 0.80 = 80% similar = duplicate)
122
- */
123
- function isDuplicateIdiom(newPattern, existingIdioms, threshold = 0.80) {
124
- const newLow = newPattern.toLowerCase();
125
- return existingIdioms.some(ex => {
126
- const exLow = (ex.pattern || '').toLowerCase();
127
- return normalizedSimilarity(newLow, exLow) >= threshold;
128
- });
129
- }
130
-
131
- function semanticDelta(diffText, minWeight = 2) {
132
- const lines = diffText.split('\n');
133
- const kept = [];
134
- let currentHunkHasHigh = false;
135
- let hunkLines = [];
136
-
137
- for (const line of lines) {
138
- if (line.startsWith('---') || line.startsWith('+++') || line.startsWith('diff --git')) {
139
- kept.push(line); continue;
140
- }
141
- if (line.startsWith('@@')) {
142
- if (currentHunkHasHigh) kept.push(...hunkLines);
143
- currentHunkHasHigh = false;
144
- hunkLines = [line]; continue;
145
- }
146
- if (line.startsWith('+') || line.startsWith('-')) {
147
- const w = architecturalWeight(line);
148
- hunkLines.push(line);
149
- if (w >= minWeight) currentHunkHasHigh = true;
150
- } else {
151
- hunkLines.push(line);
152
- }
153
- }
154
- if (currentHunkHasHigh) kept.push(...hunkLines);
155
-
156
- let result = kept.join('\n');
157
- result = result.replace(/\n( ?\n){3,}/g, '\n\n');
158
- return result.trim();
159
- }
160
-
161
- // ── Git helpers ────────────────────────────────────────────────────────────────
162
- function getGitDiff(mode = 'staged') {
163
- try {
164
- let cmd;
165
- if (mode === 'staged') cmd = 'git diff --cached --unified=3';
166
- else if (mode === 'head') cmd = 'git diff HEAD~1 HEAD --unified=3';
167
- else cmd = 'git diff --unified=3';
168
- return execSync(cmd, { encoding: 'utf8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] });
169
- } catch { return ''; }
170
- }
171
-
172
- function countTokensEstimate(text) {
173
- return Math.max(1, Math.floor(text.length / 4));
174
- }
175
-
176
- // ── Idiom management ──────────────────────────────────────────────────────────
177
- function loadExistingIdioms() {
178
- if (!fs.existsSync(SKILL_FILE)) return [];
179
- const content = fs.readFileSync(SKILL_FILE, 'utf8');
180
- const idioms = [];
181
- const pattern = /\|\s*(\d+)\s*\|\s*`([^`]+)`\s*\|\s*([^|]+)\|\s*([^|]+)\|\s*([^|]+)\|/g;
182
- let m;
183
- while ((m = pattern.exec(content)) !== null) {
184
- idioms.push({
185
- id: parseInt(m[1], 10), pattern: m[2].trim(),
186
- reason: m[3].trim(), domain: m[4].trim(), since: m[5].trim(),
187
- });
188
- }
189
- return idioms;
190
- }
191
-
192
- function nextIdiomId(idioms) {
193
- if (!idioms.length) return 1;
194
- return Math.max(...idioms.map(i => i.id)) + 1;
195
- }
196
-
197
- function renderSkillMd(idioms, digestCount) {
198
- const now = new Date().toISOString().slice(0, 10);
199
- const rows = idioms.map(i =>
200
- `| ${i.id} | \`${i.pattern}\` | ${i.reason} | ${i.domain} | ${i.since} |`
201
- );
202
- const table = rows.length ? rows.join('\n') : '_No idioms recorded yet._';
203
-
204
- return `---
205
- name: project-idioms
206
- description: >
207
- Auto-evolved skill containing project-specific architectural idioms.
208
- Generated by skill_evolution.js — do not edit manually. Commit this
209
- file to share your Engineering Culture across the team.
210
- version: auto
211
- last-updated: ${now}
212
- digest-cycles: ${digestCount}
213
- pattern: generator
214
- ---
215
-
216
- # Project Idioms — Auto-Evolved Skill
217
-
218
- > **Authority Level: ABSOLUTE**
219
- > These idioms were extracted from the developer's own code decisions.
220
- > They override generic agent defaults. Every agent MUST respect them.
221
-
222
- ---
223
-
224
- ## How Idioms Are Born
225
-
226
- 1. Developer commits code that differs from the AI proposal.
227
- 2. \`skill_evolution.js digest\` extracts architectural deltas only.
228
- 3. A minimal LLM reflection prompt (< 500 tokens) identifies the "WHY."
229
- 4. The idiom is recorded here with a stable pattern + reason pair.
230
-
231
- ---
232
-
233
- ## Recorded Idioms
234
-
235
- | ID | Pattern | Why This Project Uses It | Domain | Since |
236
- |:---|:--------|:-------------------------|:-------|:------|
237
- ${table}
238
-
239
- ---
240
-
241
- ## Enforcement Rules for All Agents
242
-
243
- \`\`\`
244
- □ Before proposing code: scan this skill's idiom table
245
- If your proposal contradicts an idiom → flag it explicitly
246
- □ Never override an idiom silently — always ask the developer first
247
- When citing an idiom: "Per Project Idiom #N: [pattern] — [reason]"
248
- \`\`\`
249
-
250
- ---
251
-
252
- ## Digest History
253
-
254
- Last digest: \`${now}\`
255
- Total cycles: \`${digestCount}\`
256
-
257
- Run \`node .agent/scripts/skill_evolution.js status\` to see the full log.
258
- `;
259
- }
260
-
261
- function generateReflectionPrompt(delta) {
262
- return `You are analyzing a code delta from a developer who changed an AI-proposed solution.
263
- Your only job: identify the ARCHITECTURAL IDIOM this change reveals about their project.
264
-
265
- Rules:
266
- - Return ONLY a YAML list of idioms. No prose. No explanation outside YAML.
267
- - Each idiom: pattern (code signature), reason (1 sentence WHY), domain (backend/frontend/database/general)
268
- - Ignore whitespace, comment, import changes — only architectural choices
269
- - If no meaningful idiom can be extracted, return: "idioms: []"
270
- - Maximum 3 idioms per delta.
271
-
272
- Delta:
273
- \`\`\`
274
- ${delta.slice(0, 1500)}
275
- \`\`\`
276
-
277
- Output format (YAML only):
278
- idioms:
279
- - pattern: "<code pattern or convention>"
280
- reason: "<why this project uses this pattern>"
281
- domain: "<backend|frontend|database|security|performance|general>"
282
- `;
283
- }
284
-
285
- function parseLlmYamlResponse(response) {
286
- const idioms = [];
287
- let inIdioms = false;
288
- let current = {};
289
-
290
- for (const line of response.split('\n')) {
291
- const stripped = line.trim();
292
- if (stripped === 'idioms:') { inIdioms = true; continue; }
293
- if (!inIdioms) continue;
294
- if (stripped.startsWith('- pattern:')) {
295
- if (current.pattern) idioms.push(current);
296
- current = { pattern: stripped.split(':', 2)[1].trim().replace(/^"|"$/g, '') };
297
- } else if (stripped.startsWith('reason:') && current.pattern) {
298
- current.reason = stripped.split(':', 2)[1].trim().replace(/^"|"$/g, '');
299
- } else if (stripped.startsWith('domain:') && current.pattern) {
300
- current.domain = stripped.split(':', 2)[1].trim().replace(/^"|"$/g, '');
301
- }
302
- }
303
- if (current.pattern) idioms.push(current);
304
- return idioms;
305
- }
306
-
307
- // ── Log helpers ────────────────────────────────────────────────────────────────
308
- function loadLog() {
309
- fs.mkdirSync(HISTORY_DIR, { recursive: true });
310
- if (fs.existsSync(LOG_FILE)) {
311
- try { return JSON.parse(fs.readFileSync(LOG_FILE, 'utf8')); } catch { /* fallthrough */ }
312
- }
313
- return { cycles: [], total_tokens_saved: 0, total_idioms: 0 };
314
- }
315
-
316
- function saveLog(log) {
317
- fs.mkdirSync(HISTORY_DIR, { recursive: true });
318
- fs.writeFileSync(LOG_FILE, JSON.stringify(log, null, 2), 'utf8');
319
- }
320
-
321
- // ── Auto-LLM API Integration ─────────────────────────────────────────────────
322
- // GENERATE: Eliminates the manual copy-paste loop by auto-detecting an API key
323
- // and calling the LLM directly. Falls back to manual mode if no key is found.
324
- // Supported providers (checked in order): Anthropic → OpenAI → Gemini.
325
-
326
- function detectLlmProvider() {
327
- if (process.env.ANTHROPIC_API_KEY) return { provider: 'anthropic', key: process.env.ANTHROPIC_API_KEY };
328
- if (process.env.OPENAI_API_KEY) return { provider: 'openai', key: process.env.OPENAI_API_KEY };
329
- if (process.env.GEMINI_API_KEY) return { provider: 'gemini', key: process.env.GEMINI_API_KEY };
330
- return null;
331
- }
332
-
333
- /**
334
- * Call an LLM API with the reflection prompt. Returns the raw text response.
335
- * Uses only built-in Node.js `https` — zero external dependencies.
336
- *
337
- * @param {string} prompt - The reflection prompt to send
338
- * @param {string} provider - 'anthropic' | 'openai' | 'gemini'
339
- * @param {string} apiKey - The API key
340
- * @returns {Promise<string|null>} LLM response text or null on failure
341
- */
342
- async function callLlmApi(prompt, provider, apiKey) {
343
- const timeout = 30000; // 30s max — skill evolution is non-blocking
344
-
345
- function httpsPost(hostname, path, headers, body) {
346
- return new Promise((resolve, reject) => {
347
- const data = JSON.stringify(body);
348
- const req = https.request(
349
- { method: 'POST', hostname, path, headers: { ...headers, 'Content-Length': Buffer.byteLength(data) } },
350
- (res) => {
351
- let raw = '';
352
- res.on('data', c => { raw += c; });
353
- res.on('end', () => resolve(raw));
354
- res.on('error', reject);
355
- }
356
- );
357
- req.on('error', reject);
358
- req.setTimeout(timeout, () => { req.destroy(new Error('LLM API timeout')); });
359
- req.write(data);
360
- req.end();
361
- });
362
- }
363
-
364
- try {
365
- if (provider === 'anthropic') {
366
- const raw = await httpsPost(
367
- 'api.anthropic.com',
368
- '/v1/messages',
369
- {
370
- 'Content-Type': 'application/json',
371
- 'x-api-key': apiKey,
372
- 'anthropic-version': '2023-06-01',
373
- },
374
- {
375
- model: 'claude-3-haiku-20240307', // Fastest/cheapest — idiom extraction
376
- max_tokens: 512,
377
- messages: [{ role: 'user', content: prompt }],
378
- }
379
- );
380
- const json = JSON.parse(raw);
381
- return json?.content?.[0]?.text ?? null;
382
- }
383
-
384
- if (provider === 'openai') {
385
- const raw = await httpsPost(
386
- 'api.openai.com',
387
- '/v1/chat/completions',
388
- {
389
- 'Content-Type': 'application/json',
390
- 'Authorization': `Bearer ${apiKey}`,
391
- },
392
- {
393
- model: 'gpt-4o-mini', // Cheapest capable model for YAML extraction
394
- max_tokens: 512,
395
- messages: [{ role: 'user', content: prompt }],
396
- temperature: 0.1,
397
- }
398
- );
399
- const json = JSON.parse(raw);
400
- return json?.choices?.[0]?.message?.content ?? null;
401
- }
402
-
403
- if (provider === 'gemini') {
404
- const raw = await httpsPost(
405
- 'generativelanguage.googleapis.com',
406
- `/v1beta/models/gemini-1.5-flash:generateContent?key=${apiKey}`,
407
- { 'Content-Type': 'application/json' },
408
- {
409
- contents: [{ parts: [{ text: prompt }] }],
410
- generationConfig: { maxOutputTokens: 512, temperature: 0.1 },
411
- }
412
- );
413
- const json = JSON.parse(raw);
414
- return json?.candidates?.[0]?.content?.parts?.[0]?.text ?? null;
415
- }
416
- } catch (e) {
417
- return null; // Network/parse failure — caller falls back to manual mode
418
- }
419
- return null;
420
- }
421
-
422
- // ── Commands ──────────────────────────────────────────────────────────────────
423
- async function cmdDigest(args) {
424
- const dryRun = args.includes('--dry-run');
425
- const diffMode = args.includes('--head') ? 'head' : 'staged';
426
-
427
- console.log(`\n${BOLD}${CYAN}━━━ Skill Evolution Digest Cycle ━━━━━━━━━━━━━━━━${RESET}`);
428
- if (dryRun) console.log(` ${YELLOW}DRY RUN — no files will be written${RESET}\n`);
429
-
430
- console.log(` ${DIM}[1/5] Fetching git diff (${diffMode})...${RESET}`);
431
- const rawDiff = getGitDiff(diffMode);
432
- if (!rawDiff.trim()) {
433
- console.log(` ${YELLOW}⚠ No diff found. Commit or stage changes first.${RESET}`);
434
- console.log(` ${DIM}Tip: Use --head to diff against the last commit.${RESET}\n`);
435
- return;
436
- }
437
-
438
- const rawTokens = countTokensEstimate(rawDiff);
439
- console.log(` ${DIM} Raw diff: ~${rawTokens} tokens (${rawDiff.length} chars)${RESET}`);
440
-
441
- console.log(` ${DIM}[2/5] Extracting architectural delta (Semantic Filter)...${RESET}`);
442
- const delta = semanticDelta(rawDiff, 2);
443
- if (!delta.trim()) {
444
- console.log(` ${GREEN}✔ Delta is 100% trivial (whitespace/comments/imports only).${RESET}`);
445
- console.log(` ${DIM} No LLM call needed. Zero tokens consumed.${RESET}\n`);
446
- return;
447
- }
448
-
449
- const deltaTokens = countTokensEstimate(delta);
450
- const savedTokens = rawTokens - deltaTokens;
451
- const savedPct = Math.floor((savedTokens / Math.max(rawTokens, 1)) * 100);
452
- console.log(` ${GREEN}✔ Filtered to ~${deltaTokens} tokens (${savedPct}% reduction, saved ~${savedTokens} tokens)${RESET}`);
453
-
454
- console.log(`\n ${BOLD}Architectural Delta Preview:${RESET}`);
455
- const previewLines = delta.split('\n').slice(0, 20);
456
- for (const line of previewLines) {
457
- if (line.startsWith('+')) console.log(` ${GREEN}${line}${RESET}`);
458
- else if (line.startsWith('-')) console.log(` ${RED}${line}${RESET}`);
459
- else if (line.startsWith('@@')) console.log(` ${BLUE}${line}${RESET}`);
460
- else console.log(` ${DIM}${line}${RESET}`);
461
- }
462
- if (delta.split('\n').length > 20) console.log(` ${DIM}... (${delta.split('\n').length - 20} more lines)${RESET}`);
463
-
464
- if (dryRun) {
465
- console.log(`\n ${YELLOW}[DRY RUN] Would send ${deltaTokens} tokens to LLM for reflection.${RESET}`);
466
- console.log(` ${DIM}Run without --dry-run to complete the digest.${RESET}\n`);
467
- return;
468
- }
469
-
470
- // GENERATE: Auto-LLM call. Tries API first, falls back to manual paste if no key.
471
- const reflectionPrompt = generateReflectionPrompt(delta);
472
- let llmResponse = '';
473
-
474
- let llmCreds = detectLlmProvider();
475
- if (llmCreds) {
476
- console.log(` ${DIM}[3/5] LLM Reflection auto-calling ${llmCreds.provider} API...${RESET}`);
477
- const autoResponse = await callLlmApi(reflectionPrompt, llmCreds.provider, llmCreds.key);
478
- if (autoResponse) {
479
- llmResponse = autoResponse;
480
- console.log(` ${GREEN}✔ Auto-response received (${llmCreds.provider}) ${llmResponse.split('\n').length} lines${RESET}`);
481
- } else {
482
- console.log(` ${YELLOW} API call failed falling back to manual mode${RESET}`);
483
- llmCreds = null; // triggers manual fallback below
484
- }
485
- }
486
-
487
- if (!llmCreds || !llmResponse) {
488
- // Manual fallback: copy-paste mode (no API key configured)
489
- console.log(`\n ${DIM}[3/5] LLM Reflection copy the prompt below and paste the response${RESET}`);
490
- console.log(` ${DIM} Tip: Set ANTHROPIC_API_KEY, OPENAI_API_KEY, or GEMINI_API_KEY to automate this step.${RESET}`);
491
- console.log(`\n ${BOLD}${'─'.repeat(60)}${RESET}`);
492
- console.log(reflectionPrompt);
493
- console.log(` ${BOLD}${''.repeat(60)}${RESET}`);
494
- console.log(`\n ${BOLD}Paste LLM response below (type END_RESPONSE when done):${RESET}`);
495
-
496
- const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
497
- const responseLines = [];
498
- await new Promise(resolve => {
499
- const listener = (line) => {
500
- if (line.trim() === 'END_RESPONSE') { rl.removeListener('line', listener); resolve(); }
501
- else responseLines.push(line);
502
- };
503
- rl.on('line', listener);
504
- });
505
- rl.close();
506
- llmResponse = responseLines.join('\n');
507
- }
508
-
509
- console.log(`\n ${DIM}[4/5] Parsing idioms...${RESET}`);
510
- const newIdioms = parseLlmYamlResponse(llmResponse);
511
- if (!newIdioms.length) {
512
- console.log(` ${YELLOW} No idioms extracted from LLM response.${RESET}`);
513
- console.log(` ${DIM} The LLM may have returned idioms: [] — no architectural pattern detected.${RESET}\n`);
514
- return;
515
- }
516
-
517
- console.log(` ${GREEN}✔ Extracted ${newIdioms.length} idiom(s)${RESET}`);
518
- for (const idiom of newIdioms) {
519
- console.log(` ${CYAN}• ${idiom.pattern || '?'}${RESET} — ${idiom.reason || ''}`);
520
- }
521
-
522
- console.log(`\n ${DIM}[5/5] Merging into project-idioms/SKILL.md...${RESET}`);
523
- const existing = loadExistingIdioms();
524
- const log = loadLog();
525
- let nextId = nextIdiomId(existing);
526
- const today = new Date().toISOString().slice(0, 10);
527
- const merged = [...existing];
528
- let added = 0;
529
-
530
- for (const idiom of newIdioms) {
531
- // FIX: Use Levenshtein normalised similarity (threshold 0.80) instead of
532
- // substring .includes() which was over-aggressive and blocked valid idioms.
533
- if (isDuplicateIdiom(idiom.pattern || '', existing)) {
534
- console.log(` ${DIM} Skipped near-duplicate: ${idiom.pattern}${RESET}`);
535
- continue;
536
- }
537
- merged.push({
538
- id: nextId, pattern: idiom.pattern || '?',
539
- reason: idiom.reason || 'No reason provided.',
540
- domain: idiom.domain || 'general', since: today,
541
- });
542
- nextId++;
543
- added++;
544
- }
545
-
546
- if (added === 0) {
547
- console.log(` ${YELLOW}⚠ All extracted idioms were duplicates. SKILL.md unchanged.${RESET}\n`);
548
- return;
549
- }
550
-
551
- log.total_idioms = merged.length;
552
- const skillMd = renderSkillMd(merged, (log.cycles || []).length + 1);
553
- fs.mkdirSync(SKILL_DIR, { recursive: true });
554
- fs.writeFileSync(SKILL_FILE, skillMd, 'utf8');
555
-
556
- log.cycles = log.cycles || [];
557
- log.cycles.push({
558
- timestamp: new Date().toISOString().slice(0, 19),
559
- raw_tokens: rawTokens, delta_tokens: deltaTokens,
560
- tokens_saved: savedTokens, idioms_added: added,
561
- });
562
- log.total_tokens_saved = (log.total_tokens_saved || 0) + savedTokens;
563
- saveLog(log);
564
-
565
- console.log(`\n ${GREEN}✔ ${added} new idiom(s) added to SKILL.md${RESET}`);
566
- console.log(` ${DIM} File: ${SKILL_FILE}${RESET}`);
567
- console.log(` ${DIM} Total idioms: ${merged.length}${RESET}`);
568
- console.log(` ${DIM} Lifetime tokens saved: ${log.total_tokens_saved}${RESET}\n`);
569
- console.log(` ${CYAN}Commit SKILL.md to share your Engineering Culture with the team.${RESET}\n`);
570
- }
571
-
572
- function cmdShow() {
573
- if (!fs.existsSync(SKILL_FILE)) { console.log(`${YELLOW}No project-idioms skill found. Run 'digest' first.${RESET}`); return; }
574
- console.log(fs.readFileSync(SKILL_FILE, 'utf8'));
575
- }
576
-
577
- function cmdReset() {
578
- if (fs.existsSync(SKILL_FILE)) { fs.unlinkSync(SKILL_FILE); console.log(`${GREEN}✔ project-idioms/SKILL.md deleted.${RESET}`); }
579
- if (fs.existsSync(LOG_FILE)) { fs.unlinkSync(LOG_FILE); console.log(`${GREEN}✔ Digest log cleared.${RESET}`); }
580
- console.log(`${DIM}Run 'digest' to start a fresh evolution cycle.${RESET}`);
581
- }
582
-
583
- function cmdStatus() {
584
- const log = loadLog();
585
- const cycles = log.cycles || [];
586
- const totalSaved = log.total_tokens_saved || 0;
587
- const totalIdioms = log.total_idioms || 0;
588
- const idiomsExist = fs.existsSync(SKILL_FILE);
589
-
590
- console.log(`\n${BOLD}${CYAN}━━━ Skill Evolution Status ━━━━━━━━━━━━━━━━━━━━━━━━${RESET}`);
591
- console.log(` Digest cycles : ${BOLD}${cycles.length}${RESET}`);
592
- console.log(` Total idioms : ${BOLD}${totalIdioms}${RESET}`);
593
- console.log(` Tokens saved : ${GREEN}${totalSaved.toLocaleString()} tokens${RESET} (≈ $${(totalSaved / 1_000_000 * 3).toFixed(4)} at $3/M)`);
594
- console.log(` SKILL.md exists : ${idiomsExist ? '✔' : '✗'}`);
595
-
596
- if (cycles.length) {
597
- console.log(`\n ${BOLD}Last 5 digest cycles:${RESET}`);
598
- for (const cycle of cycles.slice(-5).reverse()) {
599
- const ts = (cycle.timestamp || '?').slice(0, 16);
600
- const deltaT = cycle.delta_tokens || 0;
601
- const saved = cycle.tokens_saved || 0;
602
- const addedCount = cycle.idioms_added || 0;
603
- const pct = Math.floor((saved / Math.max(cycle.raw_tokens || 1, 1)) * 100);
604
- console.log(` ${DIM}${ts}${RESET} delta=${deltaT}tok saved=${saved}tok (${pct}%) idioms+=${addedCount}`);
605
- }
606
- }
607
- console.log(`${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n`);
608
- }
609
-
610
- // ── Main ──────────────────────────────────────────────────────────────────────
611
- const COMMANDS = { digest: cmdDigest, show: cmdShow, reset: cmdReset, status: cmdStatus };
612
-
613
- async function main() {
614
- const argv = process.argv.slice(2);
615
- if (!argv.length || ['-h', '--help', 'help'].includes(argv[0])) {
616
- console.log(`
617
- ${BOLD}skill_evolution.js${RESET} Tribunal Skill Evolution Forge
618
-
619
- ${BOLD}Commands:${RESET}
620
- digest [--dry-run] [--head] Analyze latest git diff and evolve SKILL.md
621
- --dry-run : preview without writing
622
- --head : diff last commit instead of staged
623
- show Print current project-idioms/SKILL.md
624
- status Show digest history and token savings
625
- reset Clear all idioms and start fresh
626
-
627
- ${BOLD}Token Budget:${RESET}
628
- Raw diff -> Semantic Filter -> Only architectural lines -> LLM
629
- Typical savings: 70–90% of tokens. Most trivial commits = 0 tokens.
630
- `);
631
- return;
632
- }
633
-
634
- const cmd = argv[0];
635
- const rest = argv.slice(1);
636
- if (!COMMANDS[cmd]) { console.log(`${RED}✖ Unknown command: '${cmd}'${RESET}`); process.exit(1); }
637
- await COMMANDS[cmd](rest);
638
- }
639
-
640
- module.exports = { semanticDelta, architecturalWeight, parseLlmYamlResponse, loadExistingIdioms };
641
-
642
- if (require.main === module) {
643
- main().catch(err => { console.error(err); process.exit(1); });
644
- }
1
+ #!/usr/bin/env node
2
+ /**
3
+ * skill_evolution.js — Tribunal Kit Skill Evolution Forge
4
+ * =========================================================
5
+ * Analyzes the delta between what the AI proposed and what the developer
6
+ * actually committed, then distills those decisions into evolving
7
+ * project-specific SKILL idioms — WITHOUT sending full files to any LLM.
8
+ *
9
+ * Core Strategy: Semantic Delta Extraction
10
+ * 1. Read the raw git diff of staged/recent changes
11
+ * 2. Strip trivial noise (whitespace, comments, import renames)
12
+ * 3. Score remaining lines for "Architectural Weight"
13
+ * 4. Only high-weight deltas reach the LLM reflection prompt
14
+ * 5. LLM returns structured YAML idiom entries (not prose)
15
+ * 6. Idioms are merged into .agent/skills/project-idioms/SKILL.md
16
+ *
17
+ * Usage:
18
+ * node .agent/scripts/skill_evolution.js digest
19
+ * node .agent/scripts/skill_evolution.js digest --dry-run
20
+ * node .agent/scripts/skill_evolution.js show
21
+ * node .agent/scripts/skill_evolution.js reset
22
+ * node .agent/scripts/skill_evolution.js status
23
+ */
24
+
25
+ 'use strict';
26
+
27
+ const fs = require('fs');
28
+ const path = require('path');
29
+ const https = require('https');
30
+ const { execSync } = require('child_process');
31
+ const readline = require('readline');
32
+
33
+ // ── Colours ──────────────────────────────────────────────────────────────────
34
+ const { GREEN, YELLOW, CYAN, RED, BLUE, BOLD, DIM, RESET } = require('./_colors');
35
+
36
+ // ── Find .agent directory ─────────────────────────────────────────────────────
37
+ function findAgentDir() {
38
+ let current = path.resolve(process.cwd());
39
+ const root = path.parse(current).root;
40
+ while (current !== root) {
41
+ const candidate = path.join(current, '.agent');
42
+ if (fs.existsSync(candidate) && fs.statSync(candidate).isDirectory()) return candidate;
43
+ current = path.dirname(current);
44
+ }
45
+ console.error(`${RED}✖ Error: '.agent' directory not found. Please run 'npx tribunal-kit init' first.${RESET}`);
46
+ process.exit(1);
47
+ }
48
+
49
+ const AGENT_DIR = findAgentDir();
50
+ const SKILL_DIR = path.join(AGENT_DIR, 'skills', 'project-idioms');
51
+ const SKILL_FILE = path.join(SKILL_DIR, 'SKILL.md');
52
+ const HISTORY_DIR = path.join(AGENT_DIR, 'history', 'skill-evolution');
53
+ const LOG_FILE = path.join(HISTORY_DIR, 'digest-log.json');
54
+
55
+ // ── Architectural Weight Patterns ────────────────────────────────────────────
56
+ const HIGH_WEIGHT_PATTERNS = [
57
+ /\bclass\b/, /\binterface\b/, /\btype\s+\w+\s*=/, /\bextends\b/, /\bimplements\b/,
58
+ /\bthrow\b/, /\bcatch\b/, /\btry\b/,
59
+ /\bprisma\.\w+\(/, /\bsupabase\./, /\bfetch\(/, /\baxios\./,
60
+ /\bReturnType\b/, /\bPromise</, /\basync\s+function/, /\bawait\b/,
61
+ /\bexport\s+(default\s+)?(class|function|const)/, /\bmodule\.exports\b/,
62
+ /\bRouter\b|\bapp\.(get|post|put|delete|patch)\(/,
63
+ /\buse[A-Z]\w+\(/, /\bcreateContext\(/,
64
+ /\bz\.object\(/, /\bPrisma\b|\bdrizzle\b/,
65
+ /\benv\.\w+/, /\bprocess\.env\./,
66
+ ];
67
+
68
+ const NOISE_PATTERNS = [
69
+ /^\s*$/, /^\s*(\/\/|#|\/\*).*$/, /^\s*\*/,
70
+ /^\s*import\s+\{[^}]+\}\s+from\s+['"](?!\.)/, /^\s*(console\.(log|warn|error)|print\()/,
71
+ /^\s*\w+\s*[:,]?\s*$/,
72
+ ];
73
+
74
+ function architecturalWeight(line) {
75
+ const code = line.replace(/^[+-]/, '').trim();
76
+ for (const p of NOISE_PATTERNS) { if (p.test(code)) return 0; }
77
+ for (const p of HIGH_WEIGHT_PATTERNS) { if (p.test(code)) return 2; }
78
+ return 1;
79
+ }
80
+
81
+ // ── Levenshtein Semantic Deduplication ───────────────────────────────────────
82
+ // FIX: Replaces over-aggressive substring matching (.includes) with normalised
83
+ // edit-distance similarity. A new idiom must differ by >= 20% from all existing
84
+ // ones to be accepted. Threshold 0.80 = 80% similar → considered a duplicate.
85
+
86
+ function levenshtein(a, b) {
87
+ const m = a.length, n = b.length;
88
+ // Allocate DP table with base cases pre-filled
89
+ const dp = Array.from({ length: m + 1 }, (_, i) =>
90
+ Array.from({ length: n + 1 }, (_, j) => i === 0 ? j : j === 0 ? i : 0)
91
+ );
92
+ for (let i = 1; i <= m; i++) {
93
+ for (let j = 1; j <= n; j++) {
94
+ dp[i][j] = a[i - 1] === b[j - 1]
95
+ ? dp[i - 1][j - 1]
96
+ : 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]);
97
+ }
98
+ }
99
+ return dp[m][n];
100
+ }
101
+
102
+ function normalizedSimilarity(a, b) {
103
+ if (!a.length && !b.length) return 1.0;
104
+ const maxLen = Math.max(a.length, b.length);
105
+ if (maxLen === 0) return 1.0;
106
+ return 1 - levenshtein(a, b) / maxLen;
107
+ }
108
+
109
+ /**
110
+ * Returns true if newPattern is semantically similar to any existing idiom.
111
+ * Uses Levenshtein normalised similarity with a configurable threshold.
112
+ * @param {string} newPattern
113
+ * @param {object[]} existingIdioms - array of { pattern } objects
114
+ * @param {number} threshold - 0.0–1.0 (default 0.80 = 80% similar = duplicate)
115
+ */
116
+ function isDuplicateIdiom(newPattern, existingIdioms, threshold = 0.80) {
117
+ const newLow = newPattern.toLowerCase();
118
+ return existingIdioms.some(ex => {
119
+ const exLow = (ex.pattern || '').toLowerCase();
120
+ return normalizedSimilarity(newLow, exLow) >= threshold;
121
+ });
122
+ }
123
+
124
+ function semanticDelta(diffText, minWeight = 2) {
125
+ const lines = diffText.split('\n');
126
+ const kept = [];
127
+ let currentHunkHasHigh = false;
128
+ let hunkLines = [];
129
+
130
+ for (const line of lines) {
131
+ if (line.startsWith('---') || line.startsWith('+++') || line.startsWith('diff --git')) {
132
+ kept.push(line); continue;
133
+ }
134
+ if (line.startsWith('@@')) {
135
+ if (currentHunkHasHigh) kept.push(...hunkLines);
136
+ currentHunkHasHigh = false;
137
+ hunkLines = [line]; continue;
138
+ }
139
+ if (line.startsWith('+') || line.startsWith('-')) {
140
+ const w = architecturalWeight(line);
141
+ hunkLines.push(line);
142
+ if (w >= minWeight) currentHunkHasHigh = true;
143
+ } else {
144
+ hunkLines.push(line);
145
+ }
146
+ }
147
+ if (currentHunkHasHigh) kept.push(...hunkLines);
148
+
149
+ let result = kept.join('\n');
150
+ result = result.replace(/\n( ?\n){3,}/g, '\n\n');
151
+ return result.trim();
152
+ }
153
+
154
+ // ── Git helpers ────────────────────────────────────────────────────────────────
155
+ function getGitDiff(mode = 'staged') {
156
+ try {
157
+ let cmd;
158
+ if (mode === 'staged') cmd = 'git diff --cached --unified=3';
159
+ else if (mode === 'head') cmd = 'git diff HEAD~1 HEAD --unified=3';
160
+ else cmd = 'git diff --unified=3';
161
+ return execSync(cmd, { encoding: 'utf8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] });
162
+ } catch { return ''; }
163
+ }
164
+
165
+ function countTokensEstimate(text) {
166
+ return Math.max(1, Math.floor(text.length / 4));
167
+ }
168
+
169
+ // ── Idiom management ──────────────────────────────────────────────────────────
170
+ function loadExistingIdioms() {
171
+ if (!fs.existsSync(SKILL_FILE)) return [];
172
+ const content = fs.readFileSync(SKILL_FILE, 'utf8');
173
+ const idioms = [];
174
+ const pattern = /\|\s*(\d+)\s*\|\s*`([^`]+)`\s*\|\s*([^|]+)\|\s*([^|]+)\|\s*([^|]+)\|/g;
175
+ let m;
176
+ while ((m = pattern.exec(content)) !== null) {
177
+ idioms.push({
178
+ id: parseInt(m[1], 10), pattern: m[2].trim(),
179
+ reason: m[3].trim(), domain: m[4].trim(), since: m[5].trim(),
180
+ });
181
+ }
182
+ return idioms;
183
+ }
184
+
185
+ function nextIdiomId(idioms) {
186
+ if (!idioms.length) return 1;
187
+ return Math.max(...idioms.map(i => i.id)) + 1;
188
+ }
189
+
190
+ function renderSkillMd(idioms, digestCount) {
191
+ const now = new Date().toISOString().slice(0, 10);
192
+ const rows = idioms.map(i =>
193
+ `| ${i.id} | \`${i.pattern}\` | ${i.reason} | ${i.domain} | ${i.since} |`
194
+ );
195
+ const table = rows.length ? rows.join('\n') : '_No idioms recorded yet._';
196
+
197
+ return `---
198
+ name: project-idioms
199
+ description: >
200
+ Auto-evolved skill containing project-specific architectural idioms.
201
+ Generated by skill_evolution.js — do not edit manually. Commit this
202
+ file to share your Engineering Culture across the team.
203
+ version: auto
204
+ last-updated: ${now}
205
+ digest-cycles: ${digestCount}
206
+ pattern: generator
207
+ ---
208
+
209
+ # Project Idioms Auto-Evolved Skill
210
+
211
+ > **Authority Level: ABSOLUTE**
212
+ > These idioms were extracted from the developer's own code decisions.
213
+ > They override generic agent defaults. Every agent MUST respect them.
214
+
215
+ ---
216
+
217
+ ## How Idioms Are Born
218
+
219
+ 1. Developer commits code that differs from the AI proposal.
220
+ 2. \`skill_evolution.js digest\` extracts architectural deltas only.
221
+ 3. A minimal LLM reflection prompt (< 500 tokens) identifies the "WHY."
222
+ 4. The idiom is recorded here with a stable pattern + reason pair.
223
+
224
+ ---
225
+
226
+ ## Recorded Idioms
227
+
228
+ | ID | Pattern | Why This Project Uses It | Domain | Since |
229
+ |:---|:--------|:-------------------------|:-------|:------|
230
+ ${table}
231
+
232
+ ---
233
+
234
+ ## Enforcement Rules for All Agents
235
+
236
+ \`\`\`
237
+ □ Before proposing code: scan this skill's idiom table
238
+ □ If your proposal contradicts an idiom → flag it explicitly
239
+ □ Never override an idiom silently — always ask the developer first
240
+ □ When citing an idiom: "Per Project Idiom #N: [pattern] — [reason]"
241
+ \`\`\`
242
+
243
+ ---
244
+
245
+ ## Digest History
246
+
247
+ Last digest: \`${now}\`
248
+ Total cycles: \`${digestCount}\`
249
+
250
+ Run \`node .agent/scripts/skill_evolution.js status\` to see the full log.
251
+ `;
252
+ }
253
+
254
+ function generateReflectionPrompt(delta) {
255
+ return `You are analyzing a code delta from a developer who changed an AI-proposed solution.
256
+ Your only job: identify the ARCHITECTURAL IDIOM this change reveals about their project.
257
+
258
+ Rules:
259
+ - Return ONLY a YAML list of idioms. No prose. No explanation outside YAML.
260
+ - Each idiom: pattern (code signature), reason (1 sentence WHY), domain (backend/frontend/database/general)
261
+ - Ignore whitespace, comment, import changes — only architectural choices
262
+ - If no meaningful idiom can be extracted, return: "idioms: []"
263
+ - Maximum 3 idioms per delta.
264
+
265
+ Delta:
266
+ \`\`\`
267
+ ${delta.slice(0, 1500)}
268
+ \`\`\`
269
+
270
+ Output format (YAML only):
271
+ idioms:
272
+ - pattern: "<code pattern or convention>"
273
+ reason: "<why this project uses this pattern>"
274
+ domain: "<backend|frontend|database|security|performance|general>"
275
+ `;
276
+ }
277
+
278
+ function parseLlmYamlResponse(response) {
279
+ const idioms = [];
280
+ let inIdioms = false;
281
+ let current = {};
282
+
283
+ for (const line of response.split('\n')) {
284
+ const stripped = line.trim();
285
+ if (stripped === 'idioms:') { inIdioms = true; continue; }
286
+ if (!inIdioms) continue;
287
+ if (stripped.startsWith('- pattern:')) {
288
+ if (current.pattern) idioms.push(current);
289
+ current = { pattern: stripped.split(':', 2)[1].trim().replace(/^"|"$/g, '') };
290
+ } else if (stripped.startsWith('reason:') && current.pattern) {
291
+ current.reason = stripped.split(':', 2)[1].trim().replace(/^"|"$/g, '');
292
+ } else if (stripped.startsWith('domain:') && current.pattern) {
293
+ current.domain = stripped.split(':', 2)[1].trim().replace(/^"|"$/g, '');
294
+ }
295
+ }
296
+ if (current.pattern) idioms.push(current);
297
+ return idioms;
298
+ }
299
+
300
+ // ── Log helpers ────────────────────────────────────────────────────────────────
301
+ function loadLog() {
302
+ fs.mkdirSync(HISTORY_DIR, { recursive: true });
303
+ if (fs.existsSync(LOG_FILE)) {
304
+ try { return JSON.parse(fs.readFileSync(LOG_FILE, 'utf8')); } catch { /* fallthrough */ }
305
+ }
306
+ return { cycles: [], total_tokens_saved: 0, total_idioms: 0 };
307
+ }
308
+
309
+ function saveLog(log) {
310
+ fs.mkdirSync(HISTORY_DIR, { recursive: true });
311
+ fs.writeFileSync(LOG_FILE, JSON.stringify(log, null, 2), 'utf8');
312
+ }
313
+
314
+ // ── Auto-LLM API Integration ─────────────────────────────────────────────────
315
+ // GENERATE: Eliminates the manual copy-paste loop by auto-detecting an API key
316
+ // and calling the LLM directly. Falls back to manual mode if no key is found.
317
+ // Supported providers (checked in order): Anthropic → OpenAI → Gemini.
318
+
319
+ function detectLlmProvider() {
320
+ if (process.env.ANTHROPIC_API_KEY) return { provider: 'anthropic', key: process.env.ANTHROPIC_API_KEY };
321
+ if (process.env.OPENAI_API_KEY) return { provider: 'openai', key: process.env.OPENAI_API_KEY };
322
+ if (process.env.GEMINI_API_KEY) return { provider: 'gemini', key: process.env.GEMINI_API_KEY };
323
+ return null;
324
+ }
325
+
326
+ /**
327
+ * Call an LLM API with the reflection prompt. Returns the raw text response.
328
+ * Uses only built-in Node.js `https` zero external dependencies.
329
+ *
330
+ * @param {string} prompt - The reflection prompt to send
331
+ * @param {string} provider - 'anthropic' | 'openai' | 'gemini'
332
+ * @param {string} apiKey - The API key
333
+ * @returns {Promise<string|null>} LLM response text or null on failure
334
+ */
335
+ async function callLlmApi(prompt, provider, apiKey) {
336
+ const timeout = 30000; // 30s max — skill evolution is non-blocking
337
+
338
+ function httpsPost(hostname, path, headers, body) {
339
+ return new Promise((resolve, reject) => {
340
+ const data = JSON.stringify(body);
341
+ const req = https.request(
342
+ { method: 'POST', hostname, path, headers: { ...headers, 'Content-Length': Buffer.byteLength(data) } },
343
+ (res) => {
344
+ let raw = '';
345
+ res.on('data', c => { raw += c; });
346
+ res.on('end', () => resolve(raw));
347
+ res.on('error', reject);
348
+ }
349
+ );
350
+ req.on('error', reject);
351
+ req.setTimeout(timeout, () => { req.destroy(new Error('LLM API timeout')); });
352
+ req.write(data);
353
+ req.end();
354
+ });
355
+ }
356
+
357
+ try {
358
+ if (provider === 'anthropic') {
359
+ const raw = await httpsPost(
360
+ 'api.anthropic.com',
361
+ '/v1/messages',
362
+ {
363
+ 'Content-Type': 'application/json',
364
+ 'x-api-key': apiKey,
365
+ 'anthropic-version': '2023-06-01',
366
+ },
367
+ {
368
+ model: 'claude-3-haiku-20240307', // Fastest/cheapest — idiom extraction
369
+ max_tokens: 512,
370
+ messages: [{ role: 'user', content: prompt }],
371
+ }
372
+ );
373
+ const json = JSON.parse(raw);
374
+ return json?.content?.[0]?.text ?? null;
375
+ }
376
+
377
+ if (provider === 'openai') {
378
+ const raw = await httpsPost(
379
+ 'api.openai.com',
380
+ '/v1/chat/completions',
381
+ {
382
+ 'Content-Type': 'application/json',
383
+ 'Authorization': `Bearer ${apiKey}`,
384
+ },
385
+ {
386
+ model: 'gpt-4o-mini', // Cheapest capable model for YAML extraction
387
+ max_tokens: 512,
388
+ messages: [{ role: 'user', content: prompt }],
389
+ temperature: 0.1,
390
+ }
391
+ );
392
+ const json = JSON.parse(raw);
393
+ return json?.choices?.[0]?.message?.content ?? null;
394
+ }
395
+
396
+ if (provider === 'gemini') {
397
+ const raw = await httpsPost(
398
+ 'generativelanguage.googleapis.com',
399
+ `/v1beta/models/gemini-1.5-flash:generateContent?key=${apiKey}`,
400
+ { 'Content-Type': 'application/json' },
401
+ {
402
+ contents: [{ parts: [{ text: prompt }] }],
403
+ generationConfig: { maxOutputTokens: 512, temperature: 0.1 },
404
+ }
405
+ );
406
+ const json = JSON.parse(raw);
407
+ return json?.candidates?.[0]?.content?.parts?.[0]?.text ?? null;
408
+ }
409
+ } catch {
410
+ return null; // Network/parse failure caller falls back to manual mode
411
+ }
412
+ return null;
413
+ }
414
+
415
+ // ── Commands ──────────────────────────────────────────────────────────────────
416
+ async function cmdDigest(args) {
417
+ const dryRun = args.includes('--dry-run');
418
+ const diffMode = args.includes('--head') ? 'head' : 'staged';
419
+
420
+ console.log(`\n${BOLD}${CYAN}━━━ Skill Evolution — Digest Cycle ━━━━━━━━━━━━━━━━${RESET}`);
421
+ if (dryRun) console.log(` ${YELLOW}DRY RUN — no files will be written${RESET}\n`);
422
+
423
+ console.log(` ${DIM}[1/5] Fetching git diff (${diffMode})...${RESET}`);
424
+ const rawDiff = getGitDiff(diffMode);
425
+ if (!rawDiff.trim()) {
426
+ console.log(` ${YELLOW}⚠ No diff found. Commit or stage changes first.${RESET}`);
427
+ console.log(` ${DIM}Tip: Use --head to diff against the last commit.${RESET}\n`);
428
+ return;
429
+ }
430
+
431
+ const rawTokens = countTokensEstimate(rawDiff);
432
+ console.log(` ${DIM} Raw diff: ~${rawTokens} tokens (${rawDiff.length} chars)${RESET}`);
433
+
434
+ console.log(` ${DIM}[2/5] Extracting architectural delta (Semantic Filter)...${RESET}`);
435
+ const delta = semanticDelta(rawDiff, 2);
436
+ if (!delta.trim()) {
437
+ console.log(` ${GREEN}✔ Delta is 100% trivial (whitespace/comments/imports only).${RESET}`);
438
+ console.log(` ${DIM} No LLM call needed. Zero tokens consumed.${RESET}\n`);
439
+ return;
440
+ }
441
+
442
+ const deltaTokens = countTokensEstimate(delta);
443
+ const savedTokens = rawTokens - deltaTokens;
444
+ const savedPct = Math.floor((savedTokens / Math.max(rawTokens, 1)) * 100);
445
+ console.log(` ${GREEN} Filtered to ~${deltaTokens} tokens (${savedPct}% reduction, saved ~${savedTokens} tokens)${RESET}`);
446
+
447
+ console.log(`\n ${BOLD}Architectural Delta Preview:${RESET}`);
448
+ const previewLines = delta.split('\n').slice(0, 20);
449
+ for (const line of previewLines) {
450
+ if (line.startsWith('+')) console.log(` ${GREEN}${line}${RESET}`);
451
+ else if (line.startsWith('-')) console.log(` ${RED}${line}${RESET}`);
452
+ else if (line.startsWith('@@')) console.log(` ${BLUE}${line}${RESET}`);
453
+ else console.log(` ${DIM}${line}${RESET}`);
454
+ }
455
+ if (delta.split('\n').length > 20) console.log(` ${DIM}... (${delta.split('\n').length - 20} more lines)${RESET}`);
456
+
457
+ if (dryRun) {
458
+ console.log(`\n ${YELLOW}[DRY RUN] Would send ${deltaTokens} tokens to LLM for reflection.${RESET}`);
459
+ console.log(` ${DIM}Run without --dry-run to complete the digest.${RESET}\n`);
460
+ return;
461
+ }
462
+
463
+ // GENERATE: Auto-LLM call. Tries API first, falls back to manual paste if no key.
464
+ const reflectionPrompt = generateReflectionPrompt(delta);
465
+ let llmResponse = '';
466
+
467
+ let llmCreds = detectLlmProvider();
468
+ if (llmCreds) {
469
+ console.log(` ${DIM}[3/5] LLM Reflection — auto-calling ${llmCreds.provider} API...${RESET}`);
470
+ const autoResponse = await callLlmApi(reflectionPrompt, llmCreds.provider, llmCreds.key);
471
+ if (autoResponse) {
472
+ llmResponse = autoResponse;
473
+ console.log(` ${GREEN}✔ Auto-response received (${llmCreds.provider}) — ${llmResponse.split('\n').length} lines${RESET}`);
474
+ } else {
475
+ console.log(` ${YELLOW}⚠ API call failed — falling back to manual mode${RESET}`);
476
+ llmCreds = null; // triggers manual fallback below
477
+ }
478
+ }
479
+
480
+ if (!llmCreds || !llmResponse) {
481
+ // Manual fallback: copy-paste mode (no API key configured)
482
+ console.log(`\n ${DIM}[3/5] LLM Reflectioncopy the prompt below and paste the response${RESET}`);
483
+ console.log(` ${DIM} Tip: Set ANTHROPIC_API_KEY, OPENAI_API_KEY, or GEMINI_API_KEY to automate this step.${RESET}`);
484
+ console.log(`\n ${BOLD}${'─'.repeat(60)}${RESET}`);
485
+ console.log(reflectionPrompt);
486
+ console.log(` ${BOLD}${'─'.repeat(60)}${RESET}`);
487
+ console.log(`\n ${BOLD}Paste LLM response below (type END_RESPONSE when done):${RESET}`);
488
+
489
+ const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
490
+ const responseLines = [];
491
+ await new Promise(resolve => {
492
+ const listener = (line) => {
493
+ if (line.trim() === 'END_RESPONSE') { rl.removeListener('line', listener); resolve(); }
494
+ else responseLines.push(line);
495
+ };
496
+ rl.on('line', listener);
497
+ });
498
+ rl.close();
499
+ llmResponse = responseLines.join('\n');
500
+ }
501
+
502
+ console.log(`\n ${DIM}[4/5] Parsing idioms...${RESET}`);
503
+ const newIdioms = parseLlmYamlResponse(llmResponse);
504
+ if (!newIdioms.length) {
505
+ console.log(` ${YELLOW}⚠ No idioms extracted from LLM response.${RESET}`);
506
+ console.log(` ${DIM} The LLM may have returned idioms: [] — no architectural pattern detected.${RESET}\n`);
507
+ return;
508
+ }
509
+
510
+ console.log(` ${GREEN}✔ Extracted ${newIdioms.length} idiom(s)${RESET}`);
511
+ for (const idiom of newIdioms) {
512
+ console.log(` ${CYAN} ${idiom.pattern || '?'}${RESET} — ${idiom.reason || ''}`);
513
+ }
514
+
515
+ console.log(`\n ${DIM}[5/5] Merging into project-idioms/SKILL.md...${RESET}`);
516
+ const existing = loadExistingIdioms();
517
+ const log = loadLog();
518
+ let nextId = nextIdiomId(existing);
519
+ const today = new Date().toISOString().slice(0, 10);
520
+ const merged = [...existing];
521
+ let added = 0;
522
+
523
+ for (const idiom of newIdioms) {
524
+ // FIX: Use Levenshtein normalised similarity (threshold 0.80) instead of
525
+ // substring .includes() which was over-aggressive and blocked valid idioms.
526
+ if (isDuplicateIdiom(idiom.pattern || '', existing)) {
527
+ console.log(` ${DIM} Skipped near-duplicate: ${idiom.pattern}${RESET}`);
528
+ continue;
529
+ }
530
+ merged.push({
531
+ id: nextId, pattern: idiom.pattern || '?',
532
+ reason: idiom.reason || 'No reason provided.',
533
+ domain: idiom.domain || 'general', since: today,
534
+ });
535
+ nextId++;
536
+ added++;
537
+ }
538
+
539
+ if (added === 0) {
540
+ console.log(` ${YELLOW}⚠ All extracted idioms were duplicates. SKILL.md unchanged.${RESET}\n`);
541
+ return;
542
+ }
543
+
544
+ log.total_idioms = merged.length;
545
+ const skillMd = renderSkillMd(merged, (log.cycles || []).length + 1);
546
+ fs.mkdirSync(SKILL_DIR, { recursive: true });
547
+ fs.writeFileSync(SKILL_FILE, skillMd, 'utf8');
548
+
549
+ log.cycles = log.cycles || [];
550
+ log.cycles.push({
551
+ timestamp: new Date().toISOString().slice(0, 19),
552
+ raw_tokens: rawTokens, delta_tokens: deltaTokens,
553
+ tokens_saved: savedTokens, idioms_added: added,
554
+ });
555
+ log.total_tokens_saved = (log.total_tokens_saved || 0) + savedTokens;
556
+ saveLog(log);
557
+
558
+ console.log(`\n ${GREEN}✔ ${added} new idiom(s) added to SKILL.md${RESET}`);
559
+ console.log(` ${DIM} File: ${SKILL_FILE}${RESET}`);
560
+ console.log(` ${DIM} Total idioms: ${merged.length}${RESET}`);
561
+ console.log(` ${DIM} Lifetime tokens saved: ${log.total_tokens_saved}${RESET}\n`);
562
+ console.log(` ${CYAN}Commit SKILL.md to share your Engineering Culture with the team.${RESET}\n`);
563
+ }
564
+
565
+ function cmdShow() {
566
+ if (!fs.existsSync(SKILL_FILE)) { console.log(`${YELLOW}No project-idioms skill found. Run 'digest' first.${RESET}`); return; }
567
+ console.log(fs.readFileSync(SKILL_FILE, 'utf8'));
568
+ }
569
+
570
+ function cmdReset() {
571
+ if (fs.existsSync(SKILL_FILE)) { fs.unlinkSync(SKILL_FILE); console.log(`${GREEN}✔ project-idioms/SKILL.md deleted.${RESET}`); }
572
+ if (fs.existsSync(LOG_FILE)) { fs.unlinkSync(LOG_FILE); console.log(`${GREEN}✔ Digest log cleared.${RESET}`); }
573
+ console.log(`${DIM}Run 'digest' to start a fresh evolution cycle.${RESET}`);
574
+ }
575
+
576
+ function cmdStatus() {
577
+ const log = loadLog();
578
+ const cycles = log.cycles || [];
579
+ const totalSaved = log.total_tokens_saved || 0;
580
+ const totalIdioms = log.total_idioms || 0;
581
+ const idiomsExist = fs.existsSync(SKILL_FILE);
582
+
583
+ console.log(`\n${BOLD}${CYAN}━━━ Skill Evolution Status ━━━━━━━━━━━━━━━━━━━━━━━━${RESET}`);
584
+ console.log(` Digest cycles : ${BOLD}${cycles.length}${RESET}`);
585
+ console.log(` Total idioms : ${BOLD}${totalIdioms}${RESET}`);
586
+ console.log(` Tokens saved : ${GREEN}${totalSaved.toLocaleString()} tokens${RESET} (≈ $${(totalSaved / 1_000_000 * 3).toFixed(4)} at $3/M)`);
587
+ console.log(` SKILL.md exists : ${idiomsExist ? '✔' : '✗'}`);
588
+
589
+ if (cycles.length) {
590
+ console.log(`\n ${BOLD}Last 5 digest cycles:${RESET}`);
591
+ for (const cycle of cycles.slice(-5).reverse()) {
592
+ const ts = (cycle.timestamp || '?').slice(0, 16);
593
+ const deltaT = cycle.delta_tokens || 0;
594
+ const saved = cycle.tokens_saved || 0;
595
+ const addedCount = cycle.idioms_added || 0;
596
+ const pct = Math.floor((saved / Math.max(cycle.raw_tokens || 1, 1)) * 100);
597
+ console.log(` ${DIM}${ts}${RESET} delta=${deltaT}tok saved=${saved}tok (${pct}%) idioms+=${addedCount}`);
598
+ }
599
+ }
600
+ console.log(`${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n`);
601
+ }
602
+
603
+ // ── Main ──────────────────────────────────────────────────────────────────────
604
+ const COMMANDS = { digest: cmdDigest, show: cmdShow, reset: cmdReset, status: cmdStatus };
605
+
606
+ async function main() {
607
+ const argv = process.argv.slice(2);
608
+ if (!argv.length || ['-h', '--help', 'help'].includes(argv[0])) {
609
+ console.log(`
610
+ ${BOLD}skill_evolution.js${RESET} Tribunal Skill Evolution Forge
611
+
612
+ ${BOLD}Commands:${RESET}
613
+ digest [--dry-run] [--head] Analyze latest git diff and evolve SKILL.md
614
+ --dry-run : preview without writing
615
+ --head : diff last commit instead of staged
616
+ show Print current project-idioms/SKILL.md
617
+ status Show digest history and token savings
618
+ reset Clear all idioms and start fresh
619
+
620
+ ${BOLD}Token Budget:${RESET}
621
+ Raw diff -> Semantic Filter -> Only architectural lines -> LLM
622
+ Typical savings: 70–90% of tokens. Most trivial commits = 0 tokens.
623
+ `);
624
+ return;
625
+ }
626
+
627
+ const cmd = argv[0];
628
+ const rest = argv.slice(1);
629
+ if (!COMMANDS[cmd]) { console.log(`${RED}✖ Unknown command: '${cmd}'${RESET}`); process.exit(1); }
630
+ await COMMANDS[cmd](rest);
631
+ }
632
+
633
+ module.exports = { semanticDelta, architecturalWeight, parseLlmYamlResponse, loadExistingIdioms };
634
+
635
+ if (require.main === module) {
636
+ main().catch(err => { console.error(err); process.exit(1); });
637
+ }