tribunal-kit 3.1.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/.agent/agents/precedence-reviewer.md +213 -0
  2. package/.agent/scripts/append_flow.js +72 -0
  3. package/.agent/scripts/case_law_manager.py +525 -0
  4. package/.agent/scripts/skill_evolution.py +563 -0
  5. package/.agent/skills/agent-organizer/SKILL.md +8 -0
  6. package/.agent/skills/ai-prompt-injection-defense/SKILL.md +8 -0
  7. package/.agent/skills/app-builder/SKILL.md +8 -0
  8. package/.agent/skills/appflow-wireframe/SKILL.md +8 -0
  9. package/.agent/skills/architecture/SKILL.md +169 -161
  10. package/.agent/skills/bash-linux/SKILL.md +9 -0
  11. package/.agent/skills/brainstorming/SKILL.md +8 -0
  12. package/.agent/skills/building-native-ui/SKILL.md +9 -0
  13. package/.agent/skills/clean-code/SKILL.md +8 -0
  14. package/.agent/skills/config-validator/SKILL.md +8 -0
  15. package/.agent/skills/deployment-procedures/SKILL.md +8 -0
  16. package/.agent/skills/devops-incident-responder/SKILL.md +8 -0
  17. package/.agent/skills/documentation-templates/SKILL.md +8 -0
  18. package/.agent/skills/edge-computing/SKILL.md +8 -0
  19. package/.agent/skills/extract-design-system/SKILL.md +8 -0
  20. package/.agent/skills/game-design-expert/SKILL.md +8 -0
  21. package/.agent/skills/game-engineering-expert/SKILL.md +8 -0
  22. package/.agent/skills/geo-fundamentals/SKILL.md +8 -0
  23. package/.agent/skills/i18n-localization/SKILL.md +9 -0
  24. package/.agent/skills/intelligent-routing/SKILL.md +8 -0
  25. package/.agent/skills/lint-and-validate/SKILL.md +8 -0
  26. package/.agent/skills/local-first/SKILL.md +8 -0
  27. package/.agent/skills/mcp-builder/SKILL.md +8 -0
  28. package/.agent/skills/parallel-agents/SKILL.md +8 -0
  29. package/.agent/skills/plan-writing/SKILL.md +8 -0
  30. package/.agent/skills/platform-engineer/SKILL.md +8 -0
  31. package/.agent/skills/playwright-best-practices/SKILL.md +9 -0
  32. package/.agent/skills/project-idioms/SKILL.md +87 -0
  33. package/.agent/skills/python-patterns/SKILL.md +8 -0
  34. package/.agent/skills/readme-builder/SKILL.md +8 -0
  35. package/.agent/skills/red-team-tactics/SKILL.md +8 -0
  36. package/.agent/skills/seo-fundamentals/SKILL.md +9 -0
  37. package/.agent/skills/server-management/SKILL.md +8 -0
  38. package/.agent/skills/shadcn-ui-expert/SKILL.md +9 -0
  39. package/.agent/skills/skill-creator/SKILL.md +8 -0
  40. package/.agent/skills/supabase-postgres-best-practices/SKILL.md +9 -0
  41. package/.agent/skills/swiftui-expert/SKILL.md +9 -0
  42. package/.agent/skills/systematic-debugging/SKILL.md +8 -0
  43. package/.agent/skills/tdd-workflow/SKILL.md +8 -0
  44. package/.agent/skills/ui-ux-pro-max/SKILL.md +8 -0
  45. package/.agent/skills/web-accessibility-auditor/SKILL.md +9 -0
  46. package/.agent/skills/web-design-guidelines/SKILL.md +8 -0
  47. package/.agent/skills/webapp-testing/SKILL.md +8 -0
  48. package/.agent/workflows/generate.md +1 -0
  49. package/.agent/workflows/tribunal-backend.md +2 -1
  50. package/.agent/workflows/tribunal-database.md +2 -1
  51. package/.agent/workflows/tribunal-frontend.md +2 -1
  52. package/.agent/workflows/tribunal-full.md +1 -0
  53. package/.agent/workflows/tribunal-mobile.md +2 -1
  54. package/.agent/workflows/tribunal-performance.md +2 -1
  55. package/README.md +30 -1
  56. package/bin/tribunal-kit.js +182 -20
  57. package/package.json +28 -4
@@ -0,0 +1,563 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ skill_evolution.py — Tribunal Kit Skill Evolution Forge
4
+ =========================================================
5
+ Analyzes the delta between what the AI proposed and what the developer
6
+ actually committed, then distills those decisions into evolving
7
+ project-specific SKILL idioms — WITHOUT sending full files to any LLM.
8
+
9
+ Core Strategy: Semantic Delta Extraction
10
+ 1. Read the raw git diff of staged/recent changes
11
+ 2. Strip trivial noise (whitespace, comments, import renames)
12
+ 3. Score remaining lines for "Architectural Weight"
13
+ 4. Only high-weight deltas reach the LLM reflection prompt
14
+ 5. LLM returns structured YAML idiom entries (not prose)
15
+ 6. Idioms are merged into .agent/skills/project-idioms/SKILL.md
16
+
17
+ This keeps token consumption minimal — typically < 500 tokens per digest.
18
+
19
+ Usage:
20
+ python .agent/scripts/skill_evolution.py digest
21
+ python .agent/scripts/skill_evolution.py digest --dry-run
22
+ python .agent/scripts/skill_evolution.py show
23
+ python .agent/scripts/skill_evolution.py reset
24
+ python .agent/scripts/skill_evolution.py status
25
+ """
26
+
27
+ import os
28
+ import sys
29
+ import re
30
+ import json
31
+ import subprocess
32
+ from pathlib import Path
33
+ from datetime import datetime
34
+
35
+ # ── Colours ──────────────────────────────────────────────────────────────────
36
+ GREEN = "\033[92m"
37
+ YELLOW = "\033[93m"
38
+ CYAN = "\033[96m"
39
+ RED = "\033[91m"
40
+ BLUE = "\033[94m"
41
+ BOLD = "\033[1m"
42
+ DIM = "\033[2m"
43
+ RESET = "\033[0m"
44
+
45
+ # ── Paths ─────────────────────────────────────────────────────────────────────
46
+ def find_agent_dir() -> Path:
47
+ current = Path.cwd()
48
+ while current != current.parent:
49
+ candidate = current / ".agent"
50
+ if candidate.is_dir():
51
+ return candidate
52
+ current = current.parent
53
+
54
+ print("\033[91m✖ Error: '.agent' directory not found. Please run 'npx tribunal-kit init' first.\033[0m")
55
+ sys.exit(1)
56
+
57
+ AGENT_DIR = find_agent_dir()
58
+ SKILL_DIR = AGENT_DIR / "skills" / "project-idioms"
59
+ SKILL_FILE = SKILL_DIR / "SKILL.md"
60
+ HISTORY_DIR = AGENT_DIR / "history" / "skill-evolution"
61
+ LOG_FILE = HISTORY_DIR / "digest-log.json"
62
+
63
+ # ── Semantic Delta Thresholds ─────────────────────────────────────────────────
64
+ # Lines with any of these patterns score HIGH architectural weight
65
+ HIGH_WEIGHT_PATTERNS = [
66
+ r"\bclass\b",
67
+ r"\binterface\b",
68
+ r"\btype\s+\w+\s*=",
69
+ r"\bextends\b",
70
+ r"\bimplements\b",
71
+ r"\bthrow\b",
72
+ r"\bcatch\b",
73
+ r"\btry\b",
74
+ r"\bprisma\.\w+\(",
75
+ r"\bsupabase\.",
76
+ r"\bfetch\(",
77
+ r"\baxios\.",
78
+ r"\bReturnType\b",
79
+ r"\bPromise<",
80
+ r"\basync\s+function",
81
+ r"\bawait\b",
82
+ r"\bexport\s+(default\s+)?(class|function|const)",
83
+ r"\bmodule\.exports\b",
84
+ r"\bRouter\b|\bapp\.(get|post|put|delete|patch)\(",
85
+ r"\buse[A-Z]\w+\(", # React hooks
86
+ r"\bcreateContext\(",
87
+ r"\bz\.object\(", # Zod schemas
88
+ r"\bPrisma\b|\bdrizzle\b",
89
+ r"\benv\.\w+",
90
+ r"\bprocess\.env\.",
91
+ ]
92
+
93
+ # Lines that are definitely noise — never escalate to LLM
94
+ NOISE_PATTERNS = [
95
+ r"^\s*$",
96
+ r"^\s*(//|#|/\*).*$",
97
+ r"^\s*\*",
98
+ r"^\s*import\s+\{[^}]+\}\s+from\s+['\"](?!\.)",
99
+ r"^\s*(console\.(log|warn|error)|print\()",
100
+ r"^\s*\w+\s*[:,]?\s*$",
101
+ ]
102
+
103
+ def architectural_weight(line: str) -> int:
104
+ """Return 0 (noise), 1 (low), or 2 (high) for a diff line."""
105
+ code = line.lstrip("+-").strip()
106
+ for p in NOISE_PATTERNS:
107
+ if re.match(p, code):
108
+ return 0
109
+ for p in HIGH_WEIGHT_PATTERNS:
110
+ if re.search(p, code):
111
+ return 2
112
+ return 1
113
+
114
+ def semantic_delta(diff_text: str, min_weight: int = 2) -> str:
115
+ """
116
+ Filter diff to only architectural lines. Returns the trimmed delta
117
+ that will be sent to the LLM — minimal tokens, maximum signal.
118
+ """
119
+ lines = diff_text.splitlines()
120
+ kept = []
121
+ current_hunk_has_high = False
122
+ hunk_lines: list[str] = []
123
+
124
+ for line in lines:
125
+ if line.startswith(("---", "+++", "diff --git")):
126
+ kept.append(line)
127
+ continue
128
+ if line.startswith("@@"):
129
+ # Flush previous hunk if it had high-weight lines
130
+ if current_hunk_has_high:
131
+ kept.extend(hunk_lines)
132
+ current_hunk_has_high = False
133
+ hunk_lines = [line]
134
+ continue
135
+ if line.startswith(("+", "-")):
136
+ w = architectural_weight(line)
137
+ hunk_lines.append(line)
138
+ if w >= min_weight:
139
+ current_hunk_has_high = True
140
+ else:
141
+ hunk_lines.append(line)
142
+
143
+ # Flush final hunk
144
+ if current_hunk_has_high:
145
+ kept.extend(hunk_lines)
146
+
147
+ result = "\n".join(kept)
148
+ # Collapse 3+ blank context lines
149
+ result = re.sub(r"\n([ ]{0,1}\n){3,}", "\n\n", result)
150
+ return result.strip()
151
+
152
+ # ── Git helpers ────────────────────────────────────────────────────────────────
153
+ def get_git_diff(mode: str = "staged") -> str:
154
+ """
155
+ Get the current diff. mode = 'staged' | 'head' | 'all'
156
+ Returns empty string if git is unavailable.
157
+ """
158
+ try:
159
+ if mode == "staged":
160
+ cmd = ["git", "diff", "--cached", "--unified=3"]
161
+ elif mode == "head":
162
+ cmd = ["git", "diff", "HEAD~1", "HEAD", "--unified=3"]
163
+ else:
164
+ cmd = ["git", "diff", "--unified=3"]
165
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
166
+ return result.stdout if result.returncode == 0 else ""
167
+ except (subprocess.SubprocessError, FileNotFoundError):
168
+ return ""
169
+
170
+ def count_tokens_estimate(text: str) -> int:
171
+ """Rough estimate: 1 token ≈ 4 chars for code."""
172
+ return max(1, len(text) // 4)
173
+
174
+ # ── Idiom management ──────────────────────────────────────────────────────────
175
+ def load_existing_idioms() -> list[dict]:
176
+ """Parse the SKILL.md idiom table into structured dicts."""
177
+ if not SKILL_FILE.exists():
178
+ return []
179
+
180
+ content = SKILL_FILE.read_text(encoding="utf-8")
181
+ idioms = []
182
+ # Match rows in the idiom table: | ID | Pattern | Reason | Domain | Since |
183
+ pattern = re.compile(
184
+ r"\|\s*(\d+)\s*\|\s*`([^`]+)`\s*\|\s*([^|]+)\|\s*([^|]+)\|\s*([^|]+)\|"
185
+ )
186
+ for m in pattern.finditer(content):
187
+ idioms.append({
188
+ "id": int(m.group(1)),
189
+ "pattern": m.group(2).strip(),
190
+ "reason": m.group(3).strip(),
191
+ "domain": m.group(4).strip(),
192
+ "since": m.group(5).strip(),
193
+ })
194
+ return idioms
195
+
196
+ def next_idiom_id(idioms: list[dict]) -> int:
197
+ if not idioms:
198
+ return 1
199
+ return max(i["id"] for i in idioms) + 1
200
+
201
+ def render_skill_md(idioms: list[dict], digest_count: int) -> str:
202
+ """Render the full SKILL.md content from idiom list."""
203
+ now = datetime.now().strftime("%Y-%m-%d")
204
+ rows = []
205
+ for idiom in idioms:
206
+ rows.append(
207
+ f"| {idiom['id']} | `{idiom['pattern']}` "
208
+ f"| {idiom['reason']} "
209
+ f"| {idiom['domain']} "
210
+ f"| {idiom['since']} |"
211
+ )
212
+ table = "\n".join(rows) if rows else "_No idioms recorded yet._"
213
+
214
+ return f"""---
215
+ name: project-idioms
216
+ description: >
217
+ Auto-evolved skill containing project-specific architectural idioms.
218
+ Generated by skill_evolution.py — do not edit manually. Commit this
219
+ file to share your Engineering Culture across the team.
220
+ version: auto
221
+ last-updated: {now}
222
+ digest-cycles: {digest_count}
223
+ pattern: generator
224
+ ---
225
+
226
+ # Project Idioms — Auto-Evolved Skill
227
+
228
+ > **Authority Level: ABSOLUTE**
229
+ > These idioms were extracted from the developer's own code decisions.
230
+ > They override generic agent defaults. Every agent MUST respect them.
231
+
232
+ ---
233
+
234
+ ## How Idioms Are Born
235
+
236
+ 1. Developer commits code that differs from the AI proposal.
237
+ 2. `skill_evolution.py digest` extracts architectural deltas only.
238
+ 3. A minimal LLM reflection prompt (< 500 tokens) identifies the "WHY."
239
+ 4. The idiom is recorded here with a stable pattern + reason pair.
240
+
241
+ ---
242
+
243
+ ## Recorded Idioms
244
+
245
+ | ID | Pattern | Why This Project Uses It | Domain | Since |
246
+ |:---|:--------|:-------------------------|:-------|:------|
247
+ {table}
248
+
249
+ ---
250
+
251
+ ## Enforcement Rules for All Agents
252
+
253
+ ```
254
+ □ Before proposing code: scan this skill's idiom table
255
+ □ If your proposal contradicts an idiom → flag it explicitly
256
+ □ Never override an idiom silently — always ask the developer first
257
+ □ When citing an idiom: "Per Project Idiom #N: [pattern] — [reason]"
258
+ ```
259
+
260
+ ---
261
+
262
+ ## Digest History
263
+
264
+ Last digest: `{now}`
265
+ Total cycles: `{digest_count}`
266
+
267
+ Run `python .agent/scripts/skill_evolution.py status` to see the full log.
268
+ """
269
+
270
+ def generate_reflection_prompt(delta: str) -> str:
271
+ """
272
+ Minimal, structured prompt for the LLM. Returns YAML idioms only.
273
+ Designed to consume < 500 tokens total (prompt + response).
274
+ """
275
+ return f"""You are analyzing a code delta from a developer who changed an AI-proposed solution.
276
+ Your only job: identify the ARCHITECTURAL IDIOM this change reveals about their project.
277
+
278
+ Rules:
279
+ - Return ONLY a YAML list of idioms. No prose. No explanation outside YAML.
280
+ - Each idiom: pattern (code signature), reason (1 sentence WHY), domain (backend/frontend/database/general)
281
+ - Ignore whitespace, comment, import changes — only architectural choices
282
+ - If no meaningful idiom can be extracted, return: "idioms: []"
283
+ - Maximum 3 idioms per delta.
284
+
285
+ Delta:
286
+ ```
287
+ {delta[:1500]}
288
+ ```
289
+
290
+ Output format (YAML only):
291
+ idioms:
292
+ - pattern: "<code pattern or convention>"
293
+ reason: "<why this project uses this pattern>"
294
+ domain: "<backend|frontend|database|security|performance|general>"
295
+ """
296
+
297
+ def parse_llm_yaml_response(response: str) -> list[dict]:
298
+ """Parse structured YAML from LLM response without pyyaml dependency."""
299
+ idioms = []
300
+ in_idioms = False
301
+ current: dict = {}
302
+
303
+ for line in response.splitlines():
304
+ stripped = line.strip()
305
+ if stripped == "idioms:":
306
+ in_idioms = True
307
+ continue
308
+ if not in_idioms:
309
+ continue
310
+ if stripped.startswith("- pattern:"):
311
+ if current:
312
+ idioms.append(current)
313
+ current = {"pattern": stripped.split(":", 1)[1].strip().strip('"')}
314
+ elif stripped.startswith("reason:") and current:
315
+ current["reason"] = stripped.split(":", 1)[1].strip().strip('"')
316
+ elif stripped.startswith("domain:") and current:
317
+ current["domain"] = stripped.split(":", 1)[1].strip().strip('"')
318
+
319
+ if current and "pattern" in current:
320
+ idioms.append(current)
321
+
322
+ return idioms
323
+
324
+ # ── Log helpers ────────────────────────────────────────────────────────────────
325
+ def load_log() -> dict:
326
+ HISTORY_DIR.mkdir(parents=True, exist_ok=True)
327
+ if LOG_FILE.exists():
328
+ try:
329
+ return json.loads(LOG_FILE.read_text(encoding="utf-8"))
330
+ except Exception:
331
+ pass
332
+ return {"cycles": [], "total_tokens_saved": 0, "total_idioms": 0}
333
+
334
+ def save_log(log: dict) -> None:
335
+ HISTORY_DIR.mkdir(parents=True, exist_ok=True)
336
+ LOG_FILE.write_text(json.dumps(log, indent=2), encoding="utf-8")
337
+
338
+ # ── Commands ──────────────────────────────────────────────────────────────────
339
+ def cmd_digest(args: list[str]) -> None:
340
+ dry_run = "--dry-run" in args
341
+ diff_mode = "head" if "--head" in args else "staged"
342
+
343
+ print(f"\n{BOLD}{CYAN}━━━ Skill Evolution — Digest Cycle ━━━━━━━━━━━━━━━━{RESET}")
344
+ if dry_run:
345
+ print(f" {YELLOW}DRY RUN — no files will be written{RESET}\n")
346
+
347
+ # Step 1: Get diff
348
+ print(f" {DIM}[1/5] Fetching git diff ({diff_mode})...{RESET}")
349
+ raw_diff = get_git_diff(diff_mode)
350
+ if not raw_diff.strip():
351
+ print(f" {YELLOW}⚠ No diff found. Commit or stage changes first.{RESET}")
352
+ print(f" {DIM}Tip: Use --head to diff against the last commit.{RESET}\n")
353
+ return
354
+
355
+ raw_tokens = count_tokens_estimate(raw_diff)
356
+ print(f" {DIM} Raw diff: ~{raw_tokens} tokens ({len(raw_diff)} chars){RESET}")
357
+
358
+ # Step 2: Extract semantic delta
359
+ print(f" {DIM}[2/5] Extracting architectural delta (Semantic Filter)...{RESET}")
360
+ delta = semantic_delta(raw_diff, min_weight=2)
361
+ if not delta.strip():
362
+ print(f" {GREEN}✔ Delta is 100% trivial (whitespace/comments/imports only).{RESET}")
363
+ print(f" {DIM} No LLM call needed. Zero tokens consumed.{RESET}\n")
364
+ return
365
+
366
+ delta_tokens = count_tokens_estimate(delta)
367
+ saved_tokens = raw_tokens - delta_tokens
368
+ saved_pct = int((saved_tokens / max(raw_tokens, 1)) * 100)
369
+ print(f" {GREEN}✔ Filtered to ~{delta_tokens} tokens "
370
+ f"({saved_pct}% reduction, saved ~{saved_tokens} tokens){RESET}")
371
+
372
+ # Step 3: Show delta preview
373
+ print(f"\n {BOLD}Architectural Delta Preview:{RESET}")
374
+ preview_lines = delta.splitlines()[:20]
375
+ for line in preview_lines:
376
+ if line.startswith("+"):
377
+ print(f" {GREEN}{line}{RESET}")
378
+ elif line.startswith("-"):
379
+ print(f" {RED}{line}{RESET}")
380
+ elif line.startswith("@@"):
381
+ print(f" {BLUE}{line}{RESET}")
382
+ else:
383
+ print(f" {DIM}{line}{RESET}")
384
+ if len(delta.splitlines()) > 20:
385
+ print(f" {DIM}... ({len(delta.splitlines()) - 20} more lines){RESET}")
386
+
387
+ if dry_run:
388
+ print(f"\n {YELLOW}[DRY RUN] Would send {delta_tokens} tokens to LLM for reflection.{RESET}")
389
+ print(f" {DIM}Run without --dry-run to complete the digest.{RESET}\n")
390
+ return
391
+
392
+ # Step 4: LLM reflection (user pastes response)
393
+ print(f"\n {DIM}[3/5] LLM Reflection — copy the prompt below and paste the response{RESET}")
394
+ print(f"\n {BOLD}{'─'*60}{RESET}")
395
+ prompt = generate_reflection_prompt(delta)
396
+ print(prompt)
397
+ print(f" {BOLD}{'─'*60}{RESET}")
398
+ print(f"\n {BOLD}Paste LLM response below (type END_RESPONSE when done):{RESET}")
399
+
400
+ response_lines = []
401
+ while True:
402
+ try:
403
+ line = input()
404
+ except EOFError:
405
+ break
406
+ if line.strip() == "END_RESPONSE":
407
+ break
408
+ response_lines.append(line)
409
+ llm_response = "\n".join(response_lines)
410
+
411
+ # Step 5: Parse + merge
412
+ print(f"\n {DIM}[4/5] Parsing idioms...{RESET}")
413
+ new_idioms = parse_llm_yaml_response(llm_response)
414
+ if not new_idioms:
415
+ print(f" {YELLOW}⚠ No idioms extracted from LLM response.{RESET}")
416
+ print(f" {DIM} The LLM may have returned idioms: [] — no architectural pattern detected.{RESET}\n")
417
+ return
418
+
419
+ print(f" {GREEN}✔ Extracted {len(new_idioms)} idiom(s){RESET}")
420
+ for idiom in new_idioms:
421
+ print(f" {CYAN}• {idiom.get('pattern', '?')}{RESET} — {idiom.get('reason', '')}")
422
+
423
+ print(f"\n {DIM}[5/5] Merging into project-idioms/SKILL.md...{RESET}")
424
+ existing = load_existing_idioms()
425
+ log = load_log()
426
+ next_id = next_idiom_id(existing)
427
+
428
+ today = datetime.now().strftime("%Y-%m-%d")
429
+ merged = existing.copy()
430
+ added = 0
431
+ for idiom in new_idioms:
432
+ # Deduplicate: skip if pattern is highly similar (simple substring check)
433
+ pattern = idiom.get("pattern", "").lower()
434
+ if any(pattern in ex["pattern"].lower() or ex["pattern"].lower() in pattern
435
+ for ex in existing):
436
+ print(f" {DIM} Skipped duplicate: {idiom.get('pattern')}{RESET}")
437
+ continue
438
+ merged.append({
439
+ "id": next_id,
440
+ "pattern": idiom.get("pattern", "?"),
441
+ "reason": idiom.get("reason", "No reason provided."),
442
+ "domain": idiom.get("domain", "general"),
443
+ "since": today,
444
+ })
445
+ next_id += 1
446
+ added += 1
447
+
448
+ if added == 0:
449
+ print(f" {YELLOW}⚠ All extracted idioms were duplicates. SKILL.md unchanged.{RESET}\n")
450
+ return
451
+
452
+ # Write SKILL.md
453
+ log["total_idioms"] = len(merged)
454
+ skill_md = render_skill_md(merged, len(log["cycles"]) + 1)
455
+ SKILL_DIR.mkdir(parents=True, exist_ok=True)
456
+ SKILL_FILE.write_text(skill_md, encoding="utf-8")
457
+
458
+ # Update log
459
+ log["cycles"].append({
460
+ "timestamp": datetime.now().isoformat(timespec="seconds"),
461
+ "raw_tokens": raw_tokens,
462
+ "delta_tokens": delta_tokens,
463
+ "tokens_saved": saved_tokens,
464
+ "idioms_added": added,
465
+ })
466
+ log["total_tokens_saved"] = log.get("total_tokens_saved", 0) + saved_tokens
467
+ save_log(log)
468
+
469
+ print(f"\n {GREEN}✔ {added} new idiom(s) added to SKILL.md{RESET}")
470
+ print(f" {DIM} File: {SKILL_FILE}{RESET}")
471
+ print(f" {DIM} Total idioms: {len(merged)}{RESET}")
472
+ print(f" {DIM} Lifetime tokens saved: {log['total_tokens_saved']}{RESET}\n")
473
+ print(f" {CYAN}Commit {SKILL_FILE.name} to share your Engineering Culture with the team.{RESET}\n")
474
+
475
+
476
+ def cmd_show(args: list[str]) -> None:
477
+ if not SKILL_FILE.exists():
478
+ print(f"{YELLOW}No project-idioms skill found. Run 'digest' first.{RESET}")
479
+ return
480
+ print(SKILL_FILE.read_text(encoding="utf-8"))
481
+
482
+
483
+ def cmd_reset(args: list[str]) -> None:
484
+ if SKILL_FILE.exists():
485
+ SKILL_FILE.unlink()
486
+ print(f"{GREEN}✔ project-idioms/SKILL.md deleted.{RESET}")
487
+ if LOG_FILE.exists():
488
+ LOG_FILE.unlink()
489
+ print(f"{GREEN}✔ Digest log cleared.{RESET}")
490
+ print(f"{DIM}Run 'digest' to start a fresh evolution cycle.{RESET}")
491
+
492
+
493
+ def cmd_status(args: list[str]) -> None:
494
+ log = load_log()
495
+ cycles = log.get("cycles", [])
496
+ total_saved = log.get("total_tokens_saved", 0)
497
+ total_idioms = log.get("total_idioms", 0)
498
+
499
+ idioms_exist = SKILL_FILE.exists()
500
+
501
+ print(f"\n{BOLD}{CYAN}━━━ Skill Evolution Status ━━━━━━━━━━━━━━━━━━━━━━━━{RESET}")
502
+ print(f" Digest cycles : {BOLD}{len(cycles)}{RESET}")
503
+ print(f" Total idioms : {BOLD}{total_idioms}{RESET}")
504
+ print(f" Tokens saved : {GREEN}{total_saved:,} tokens{RESET} "
505
+ f"(≈ ${total_saved / 1_000_000 * 3:.4f} at $3/M)")
506
+ print(f" SKILL.md exists : {'✔' if idioms_exist else '✗'}")
507
+
508
+ if cycles:
509
+ print(f"\n {BOLD}Last 5 digest cycles:{RESET}")
510
+ for cycle in reversed(cycles[-5:]):
511
+ ts = cycle.get("timestamp", "?")[:16]
512
+ delta_t = cycle.get("delta_tokens", 0)
513
+ saved = cycle.get("tokens_saved", 0)
514
+ added = cycle.get("idioms_added", 0)
515
+ pct = int((saved / max(cycle.get("raw_tokens", 1), 1)) * 100)
516
+ print(f" {DIM}{ts}{RESET} "
517
+ f"delta={delta_t}tok saved={saved}tok ({pct}%) "
518
+ f"idioms+={added}")
519
+
520
+ print(f"{CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━{RESET}\n")
521
+
522
+
523
+ # ── Main ──────────────────────────────────────────────────────────────────────
524
+ COMMANDS = {
525
+ "digest": cmd_digest,
526
+ "show": cmd_show,
527
+ "reset": cmd_reset,
528
+ "status": cmd_status,
529
+ }
530
+
531
+ def main() -> None:
532
+ # Ensure Unicode output works on Windows terminals
533
+ if hasattr(sys.stdout, "reconfigure"):
534
+ sys.stdout.reconfigure(encoding="utf-8", errors="replace")
535
+ argv = sys.argv[1:]
536
+ if not argv or argv[0] in ("-h", "--help", "help"):
537
+ print(f"""
538
+ {BOLD}skill_evolution.py{RESET} — Tribunal Skill Evolution Forge
539
+
540
+ {BOLD}Commands:{RESET}
541
+ digest [--dry-run] [--head] Analyze latest git diff and evolve SKILL.md
542
+ --dry-run : preview without writing
543
+ --head : diff last commit instead of staged
544
+ show Print current project-idioms/SKILL.md
545
+ status Show digest history and token savings
546
+ reset Clear all idioms and start fresh
547
+
548
+ {BOLD}Token Budget:{RESET}
549
+ Raw diff -> Semantic Filter -> Only architectural lines -> LLM
550
+ Typical savings: 70–90% of tokens. Most trivial commits = 0 tokens.
551
+ """)
552
+ return
553
+
554
+ cmd = argv[0]
555
+ rest = argv[1:]
556
+ if cmd not in COMMANDS:
557
+ print(f"{RED}✖ Unknown command: '{cmd}'{RESET}")
558
+ sys.exit(1)
559
+ COMMANDS[cmd](rest)
560
+
561
+
562
+ if __name__ == "__main__":
563
+ main()
@@ -7,6 +7,14 @@ last-updated: 2026-04-02
7
7
  applies-to-model: gemini-2.5-pro, claude-3-7-sonnet
8
8
  ---
9
9
 
10
+ ## Hallucination Traps (Read First)
11
+ - ❌ Dispatching sub-agents without a context_summary -> ✅ Always send a trimmed context, never the full conversation
12
+ - ❌ Assuming sub-agents share memory -> ✅ Each agent invocation is stateless unless explicitly passed context
13
+ - ❌ Running agents sequentially when they are independent -> ✅ Use fan-out/fan-in for parallelizable work
14
+
15
+ ---
16
+
17
+
10
18
  # Agent Organizer — Multi-Agent Orchestration Mastery
11
19
 
12
20
  ---
@@ -7,6 +7,14 @@ last-updated: 2026-04-02
7
7
  applies-to-model: gemini-2.5-pro, claude-3-7-sonnet
8
8
  ---
9
9
 
10
+ ## Hallucination Traps (Read First)
11
+ - ❌ Putting user input into role:'system' messages -> ✅ User input MUST go in role:'user' only
12
+ - ❌ Relying on 'ignore previous instructions' disclaimer -> ✅ Delimiters + structural separation are required
13
+ - ❌ Assuming output filtering catches all injection -> ✅ Defense-in-depth: input validation + output validation + structural isolation
14
+
15
+ ---
16
+
17
+
10
18
  # Prompt Injection Defense — AI Security Mastery
11
19
 
12
20
  ---
@@ -7,6 +7,14 @@ last-updated: 2026-04-06
7
7
  applies-to-model: gemini-2.5-pro, claude-3-7-sonnet
8
8
  ---
9
9
 
10
+ ## Hallucination Traps (Read First)
11
+ - ❌ Generating entire applications in one shot -> ✅ Build one module at a time, verify each
12
+ - ❌ Choosing a tech stack without asking the user -> ✅ Always ask about existing preferences, team skills, and deployment target
13
+ - ❌ Hardcoding API keys or secrets during scaffolding -> ✅ Use .env.example with placeholder values from day one
14
+
15
+ ---
16
+
17
+
10
18
  # App Builder — Application Orchestrator
11
19
 
12
20
  ---
@@ -7,6 +7,14 @@ last-updated: 2026-04-02
7
7
  applies-to-model: gemini-2.5-pro, claude-3-7-sonnet
8
8
  ---
9
9
 
10
+ ## Hallucination Traps (Read First)
11
+ - ❌ Drawing wireframes without defined user personas -> ✅ Establish WHO uses each screen before designing
12
+ - ❌ Skipping error/empty/loading states in flow diagrams -> ✅ Every screen needs 4 states: loading, empty, populated, error
13
+ - ❌ Assuming linear user journeys -> ✅ Real users jump between screens, go back, and abandon flows mid-way
14
+
15
+ ---
16
+
17
+
10
18
  # Appflow & Wireframing — Visualization Mastery
11
19
 
12
20
  ---