tribunal-kit 3.1.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/agents/precedence-reviewer.md +213 -0
- package/.agent/scripts/append_flow.js +72 -0
- package/.agent/scripts/case_law_manager.py +525 -0
- package/.agent/scripts/skill_evolution.py +563 -0
- package/.agent/skills/agent-organizer/SKILL.md +8 -0
- package/.agent/skills/ai-prompt-injection-defense/SKILL.md +8 -0
- package/.agent/skills/app-builder/SKILL.md +8 -0
- package/.agent/skills/appflow-wireframe/SKILL.md +8 -0
- package/.agent/skills/architecture/SKILL.md +169 -161
- package/.agent/skills/bash-linux/SKILL.md +9 -0
- package/.agent/skills/brainstorming/SKILL.md +8 -0
- package/.agent/skills/building-native-ui/SKILL.md +9 -0
- package/.agent/skills/clean-code/SKILL.md +8 -0
- package/.agent/skills/config-validator/SKILL.md +8 -0
- package/.agent/skills/deployment-procedures/SKILL.md +8 -0
- package/.agent/skills/devops-incident-responder/SKILL.md +8 -0
- package/.agent/skills/documentation-templates/SKILL.md +8 -0
- package/.agent/skills/edge-computing/SKILL.md +8 -0
- package/.agent/skills/extract-design-system/SKILL.md +8 -0
- package/.agent/skills/game-design-expert/SKILL.md +8 -0
- package/.agent/skills/game-engineering-expert/SKILL.md +8 -0
- package/.agent/skills/geo-fundamentals/SKILL.md +8 -0
- package/.agent/skills/i18n-localization/SKILL.md +9 -0
- package/.agent/skills/intelligent-routing/SKILL.md +8 -0
- package/.agent/skills/lint-and-validate/SKILL.md +8 -0
- package/.agent/skills/local-first/SKILL.md +8 -0
- package/.agent/skills/mcp-builder/SKILL.md +8 -0
- package/.agent/skills/parallel-agents/SKILL.md +8 -0
- package/.agent/skills/plan-writing/SKILL.md +8 -0
- package/.agent/skills/platform-engineer/SKILL.md +8 -0
- package/.agent/skills/playwright-best-practices/SKILL.md +9 -0
- package/.agent/skills/project-idioms/SKILL.md +87 -0
- package/.agent/skills/python-patterns/SKILL.md +8 -0
- package/.agent/skills/readme-builder/SKILL.md +8 -0
- package/.agent/skills/red-team-tactics/SKILL.md +8 -0
- package/.agent/skills/seo-fundamentals/SKILL.md +9 -0
- package/.agent/skills/server-management/SKILL.md +8 -0
- package/.agent/skills/shadcn-ui-expert/SKILL.md +9 -0
- package/.agent/skills/skill-creator/SKILL.md +8 -0
- package/.agent/skills/supabase-postgres-best-practices/SKILL.md +9 -0
- package/.agent/skills/swiftui-expert/SKILL.md +9 -0
- package/.agent/skills/systematic-debugging/SKILL.md +8 -0
- package/.agent/skills/tdd-workflow/SKILL.md +8 -0
- package/.agent/skills/ui-ux-pro-max/SKILL.md +8 -0
- package/.agent/skills/web-accessibility-auditor/SKILL.md +9 -0
- package/.agent/skills/web-design-guidelines/SKILL.md +8 -0
- package/.agent/skills/webapp-testing/SKILL.md +8 -0
- package/.agent/workflows/generate.md +1 -0
- package/.agent/workflows/tribunal-backend.md +2 -1
- package/.agent/workflows/tribunal-database.md +2 -1
- package/.agent/workflows/tribunal-frontend.md +2 -1
- package/.agent/workflows/tribunal-full.md +1 -0
- package/.agent/workflows/tribunal-mobile.md +2 -1
- package/.agent/workflows/tribunal-performance.md +2 -1
- package/README.md +30 -1
- package/bin/tribunal-kit.js +182 -20
- package/package.json +28 -4
|
@@ -0,0 +1,563 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
skill_evolution.py — Tribunal Kit Skill Evolution Forge
|
|
4
|
+
=========================================================
|
|
5
|
+
Analyzes the delta between what the AI proposed and what the developer
|
|
6
|
+
actually committed, then distills those decisions into evolving
|
|
7
|
+
project-specific SKILL idioms — WITHOUT sending full files to any LLM.
|
|
8
|
+
|
|
9
|
+
Core Strategy: Semantic Delta Extraction
|
|
10
|
+
1. Read the raw git diff of staged/recent changes
|
|
11
|
+
2. Strip trivial noise (whitespace, comments, import renames)
|
|
12
|
+
3. Score remaining lines for "Architectural Weight"
|
|
13
|
+
4. Only high-weight deltas reach the LLM reflection prompt
|
|
14
|
+
5. LLM returns structured YAML idiom entries (not prose)
|
|
15
|
+
6. Idioms are merged into .agent/skills/project-idioms/SKILL.md
|
|
16
|
+
|
|
17
|
+
This keeps token consumption minimal — typically < 500 tokens per digest.
|
|
18
|
+
|
|
19
|
+
Usage:
|
|
20
|
+
python .agent/scripts/skill_evolution.py digest
|
|
21
|
+
python .agent/scripts/skill_evolution.py digest --dry-run
|
|
22
|
+
python .agent/scripts/skill_evolution.py show
|
|
23
|
+
python .agent/scripts/skill_evolution.py reset
|
|
24
|
+
python .agent/scripts/skill_evolution.py status
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import os
|
|
28
|
+
import sys
|
|
29
|
+
import re
|
|
30
|
+
import json
|
|
31
|
+
import subprocess
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
from datetime import datetime
|
|
34
|
+
|
|
35
|
+
# ── Colours ──────────────────────────────────────────────────────────────────
|
|
36
|
+
GREEN = "\033[92m"
|
|
37
|
+
YELLOW = "\033[93m"
|
|
38
|
+
CYAN = "\033[96m"
|
|
39
|
+
RED = "\033[91m"
|
|
40
|
+
BLUE = "\033[94m"
|
|
41
|
+
BOLD = "\033[1m"
|
|
42
|
+
DIM = "\033[2m"
|
|
43
|
+
RESET = "\033[0m"
|
|
44
|
+
|
|
45
|
+
# ── Paths ─────────────────────────────────────────────────────────────────────
|
|
46
|
+
def find_agent_dir() -> Path:
|
|
47
|
+
current = Path.cwd()
|
|
48
|
+
while current != current.parent:
|
|
49
|
+
candidate = current / ".agent"
|
|
50
|
+
if candidate.is_dir():
|
|
51
|
+
return candidate
|
|
52
|
+
current = current.parent
|
|
53
|
+
|
|
54
|
+
print("\033[91m✖ Error: '.agent' directory not found. Please run 'npx tribunal-kit init' first.\033[0m")
|
|
55
|
+
sys.exit(1)
|
|
56
|
+
|
|
57
|
+
AGENT_DIR = find_agent_dir()
|
|
58
|
+
SKILL_DIR = AGENT_DIR / "skills" / "project-idioms"
|
|
59
|
+
SKILL_FILE = SKILL_DIR / "SKILL.md"
|
|
60
|
+
HISTORY_DIR = AGENT_DIR / "history" / "skill-evolution"
|
|
61
|
+
LOG_FILE = HISTORY_DIR / "digest-log.json"
|
|
62
|
+
|
|
63
|
+
# ── Semantic Delta Thresholds ─────────────────────────────────────────────────
|
|
64
|
+
# Lines with any of these patterns score HIGH architectural weight
|
|
65
|
+
HIGH_WEIGHT_PATTERNS = [
|
|
66
|
+
r"\bclass\b",
|
|
67
|
+
r"\binterface\b",
|
|
68
|
+
r"\btype\s+\w+\s*=",
|
|
69
|
+
r"\bextends\b",
|
|
70
|
+
r"\bimplements\b",
|
|
71
|
+
r"\bthrow\b",
|
|
72
|
+
r"\bcatch\b",
|
|
73
|
+
r"\btry\b",
|
|
74
|
+
r"\bprisma\.\w+\(",
|
|
75
|
+
r"\bsupabase\.",
|
|
76
|
+
r"\bfetch\(",
|
|
77
|
+
r"\baxios\.",
|
|
78
|
+
r"\bReturnType\b",
|
|
79
|
+
r"\bPromise<",
|
|
80
|
+
r"\basync\s+function",
|
|
81
|
+
r"\bawait\b",
|
|
82
|
+
r"\bexport\s+(default\s+)?(class|function|const)",
|
|
83
|
+
r"\bmodule\.exports\b",
|
|
84
|
+
r"\bRouter\b|\bapp\.(get|post|put|delete|patch)\(",
|
|
85
|
+
r"\buse[A-Z]\w+\(", # React hooks
|
|
86
|
+
r"\bcreateContext\(",
|
|
87
|
+
r"\bz\.object\(", # Zod schemas
|
|
88
|
+
r"\bPrisma\b|\bdrizzle\b",
|
|
89
|
+
r"\benv\.\w+",
|
|
90
|
+
r"\bprocess\.env\.",
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
# Lines that are definitely noise — never escalate to LLM
|
|
94
|
+
NOISE_PATTERNS = [
|
|
95
|
+
r"^\s*$",
|
|
96
|
+
r"^\s*(//|#|/\*).*$",
|
|
97
|
+
r"^\s*\*",
|
|
98
|
+
r"^\s*import\s+\{[^}]+\}\s+from\s+['\"](?!\.)",
|
|
99
|
+
r"^\s*(console\.(log|warn|error)|print\()",
|
|
100
|
+
r"^\s*\w+\s*[:,]?\s*$",
|
|
101
|
+
]
|
|
102
|
+
|
|
103
|
+
def architectural_weight(line: str) -> int:
|
|
104
|
+
"""Return 0 (noise), 1 (low), or 2 (high) for a diff line."""
|
|
105
|
+
code = line.lstrip("+-").strip()
|
|
106
|
+
for p in NOISE_PATTERNS:
|
|
107
|
+
if re.match(p, code):
|
|
108
|
+
return 0
|
|
109
|
+
for p in HIGH_WEIGHT_PATTERNS:
|
|
110
|
+
if re.search(p, code):
|
|
111
|
+
return 2
|
|
112
|
+
return 1
|
|
113
|
+
|
|
114
|
+
def semantic_delta(diff_text: str, min_weight: int = 2) -> str:
|
|
115
|
+
"""
|
|
116
|
+
Filter diff to only architectural lines. Returns the trimmed delta
|
|
117
|
+
that will be sent to the LLM — minimal tokens, maximum signal.
|
|
118
|
+
"""
|
|
119
|
+
lines = diff_text.splitlines()
|
|
120
|
+
kept = []
|
|
121
|
+
current_hunk_has_high = False
|
|
122
|
+
hunk_lines: list[str] = []
|
|
123
|
+
|
|
124
|
+
for line in lines:
|
|
125
|
+
if line.startswith(("---", "+++", "diff --git")):
|
|
126
|
+
kept.append(line)
|
|
127
|
+
continue
|
|
128
|
+
if line.startswith("@@"):
|
|
129
|
+
# Flush previous hunk if it had high-weight lines
|
|
130
|
+
if current_hunk_has_high:
|
|
131
|
+
kept.extend(hunk_lines)
|
|
132
|
+
current_hunk_has_high = False
|
|
133
|
+
hunk_lines = [line]
|
|
134
|
+
continue
|
|
135
|
+
if line.startswith(("+", "-")):
|
|
136
|
+
w = architectural_weight(line)
|
|
137
|
+
hunk_lines.append(line)
|
|
138
|
+
if w >= min_weight:
|
|
139
|
+
current_hunk_has_high = True
|
|
140
|
+
else:
|
|
141
|
+
hunk_lines.append(line)
|
|
142
|
+
|
|
143
|
+
# Flush final hunk
|
|
144
|
+
if current_hunk_has_high:
|
|
145
|
+
kept.extend(hunk_lines)
|
|
146
|
+
|
|
147
|
+
result = "\n".join(kept)
|
|
148
|
+
# Collapse 3+ blank context lines
|
|
149
|
+
result = re.sub(r"\n([ ]{0,1}\n){3,}", "\n\n", result)
|
|
150
|
+
return result.strip()
|
|
151
|
+
|
|
152
|
+
# ── Git helpers ────────────────────────────────────────────────────────────────
|
|
153
|
+
def get_git_diff(mode: str = "staged") -> str:
|
|
154
|
+
"""
|
|
155
|
+
Get the current diff. mode = 'staged' | 'head' | 'all'
|
|
156
|
+
Returns empty string if git is unavailable.
|
|
157
|
+
"""
|
|
158
|
+
try:
|
|
159
|
+
if mode == "staged":
|
|
160
|
+
cmd = ["git", "diff", "--cached", "--unified=3"]
|
|
161
|
+
elif mode == "head":
|
|
162
|
+
cmd = ["git", "diff", "HEAD~1", "HEAD", "--unified=3"]
|
|
163
|
+
else:
|
|
164
|
+
cmd = ["git", "diff", "--unified=3"]
|
|
165
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
|
166
|
+
return result.stdout if result.returncode == 0 else ""
|
|
167
|
+
except (subprocess.SubprocessError, FileNotFoundError):
|
|
168
|
+
return ""
|
|
169
|
+
|
|
170
|
+
def count_tokens_estimate(text: str) -> int:
|
|
171
|
+
"""Rough estimate: 1 token ≈ 4 chars for code."""
|
|
172
|
+
return max(1, len(text) // 4)
|
|
173
|
+
|
|
174
|
+
# ── Idiom management ──────────────────────────────────────────────────────────
|
|
175
|
+
def load_existing_idioms() -> list[dict]:
|
|
176
|
+
"""Parse the SKILL.md idiom table into structured dicts."""
|
|
177
|
+
if not SKILL_FILE.exists():
|
|
178
|
+
return []
|
|
179
|
+
|
|
180
|
+
content = SKILL_FILE.read_text(encoding="utf-8")
|
|
181
|
+
idioms = []
|
|
182
|
+
# Match rows in the idiom table: | ID | Pattern | Reason | Domain | Since |
|
|
183
|
+
pattern = re.compile(
|
|
184
|
+
r"\|\s*(\d+)\s*\|\s*`([^`]+)`\s*\|\s*([^|]+)\|\s*([^|]+)\|\s*([^|]+)\|"
|
|
185
|
+
)
|
|
186
|
+
for m in pattern.finditer(content):
|
|
187
|
+
idioms.append({
|
|
188
|
+
"id": int(m.group(1)),
|
|
189
|
+
"pattern": m.group(2).strip(),
|
|
190
|
+
"reason": m.group(3).strip(),
|
|
191
|
+
"domain": m.group(4).strip(),
|
|
192
|
+
"since": m.group(5).strip(),
|
|
193
|
+
})
|
|
194
|
+
return idioms
|
|
195
|
+
|
|
196
|
+
def next_idiom_id(idioms: list[dict]) -> int:
|
|
197
|
+
if not idioms:
|
|
198
|
+
return 1
|
|
199
|
+
return max(i["id"] for i in idioms) + 1
|
|
200
|
+
|
|
201
|
+
def render_skill_md(idioms: list[dict], digest_count: int) -> str:
|
|
202
|
+
"""Render the full SKILL.md content from idiom list."""
|
|
203
|
+
now = datetime.now().strftime("%Y-%m-%d")
|
|
204
|
+
rows = []
|
|
205
|
+
for idiom in idioms:
|
|
206
|
+
rows.append(
|
|
207
|
+
f"| {idiom['id']} | `{idiom['pattern']}` "
|
|
208
|
+
f"| {idiom['reason']} "
|
|
209
|
+
f"| {idiom['domain']} "
|
|
210
|
+
f"| {idiom['since']} |"
|
|
211
|
+
)
|
|
212
|
+
table = "\n".join(rows) if rows else "_No idioms recorded yet._"
|
|
213
|
+
|
|
214
|
+
return f"""---
|
|
215
|
+
name: project-idioms
|
|
216
|
+
description: >
|
|
217
|
+
Auto-evolved skill containing project-specific architectural idioms.
|
|
218
|
+
Generated by skill_evolution.py — do not edit manually. Commit this
|
|
219
|
+
file to share your Engineering Culture across the team.
|
|
220
|
+
version: auto
|
|
221
|
+
last-updated: {now}
|
|
222
|
+
digest-cycles: {digest_count}
|
|
223
|
+
pattern: generator
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
# Project Idioms — Auto-Evolved Skill
|
|
227
|
+
|
|
228
|
+
> **Authority Level: ABSOLUTE**
|
|
229
|
+
> These idioms were extracted from the developer's own code decisions.
|
|
230
|
+
> They override generic agent defaults. Every agent MUST respect them.
|
|
231
|
+
|
|
232
|
+
---
|
|
233
|
+
|
|
234
|
+
## How Idioms Are Born
|
|
235
|
+
|
|
236
|
+
1. Developer commits code that differs from the AI proposal.
|
|
237
|
+
2. `skill_evolution.py digest` extracts architectural deltas only.
|
|
238
|
+
3. A minimal LLM reflection prompt (< 500 tokens) identifies the "WHY."
|
|
239
|
+
4. The idiom is recorded here with a stable pattern + reason pair.
|
|
240
|
+
|
|
241
|
+
---
|
|
242
|
+
|
|
243
|
+
## Recorded Idioms
|
|
244
|
+
|
|
245
|
+
| ID | Pattern | Why This Project Uses It | Domain | Since |
|
|
246
|
+
|:---|:--------|:-------------------------|:-------|:------|
|
|
247
|
+
{table}
|
|
248
|
+
|
|
249
|
+
---
|
|
250
|
+
|
|
251
|
+
## Enforcement Rules for All Agents
|
|
252
|
+
|
|
253
|
+
```
|
|
254
|
+
□ Before proposing code: scan this skill's idiom table
|
|
255
|
+
□ If your proposal contradicts an idiom → flag it explicitly
|
|
256
|
+
□ Never override an idiom silently — always ask the developer first
|
|
257
|
+
□ When citing an idiom: "Per Project Idiom #N: [pattern] — [reason]"
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
---
|
|
261
|
+
|
|
262
|
+
## Digest History
|
|
263
|
+
|
|
264
|
+
Last digest: `{now}`
|
|
265
|
+
Total cycles: `{digest_count}`
|
|
266
|
+
|
|
267
|
+
Run `python .agent/scripts/skill_evolution.py status` to see the full log.
|
|
268
|
+
"""
|
|
269
|
+
|
|
270
|
+
def generate_reflection_prompt(delta: str) -> str:
|
|
271
|
+
"""
|
|
272
|
+
Minimal, structured prompt for the LLM. Returns YAML idioms only.
|
|
273
|
+
Designed to consume < 500 tokens total (prompt + response).
|
|
274
|
+
"""
|
|
275
|
+
return f"""You are analyzing a code delta from a developer who changed an AI-proposed solution.
|
|
276
|
+
Your only job: identify the ARCHITECTURAL IDIOM this change reveals about their project.
|
|
277
|
+
|
|
278
|
+
Rules:
|
|
279
|
+
- Return ONLY a YAML list of idioms. No prose. No explanation outside YAML.
|
|
280
|
+
- Each idiom: pattern (code signature), reason (1 sentence WHY), domain (backend/frontend/database/general)
|
|
281
|
+
- Ignore whitespace, comment, import changes — only architectural choices
|
|
282
|
+
- If no meaningful idiom can be extracted, return: "idioms: []"
|
|
283
|
+
- Maximum 3 idioms per delta.
|
|
284
|
+
|
|
285
|
+
Delta:
|
|
286
|
+
```
|
|
287
|
+
{delta[:1500]}
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
Output format (YAML only):
|
|
291
|
+
idioms:
|
|
292
|
+
- pattern: "<code pattern or convention>"
|
|
293
|
+
reason: "<why this project uses this pattern>"
|
|
294
|
+
domain: "<backend|frontend|database|security|performance|general>"
|
|
295
|
+
"""
|
|
296
|
+
|
|
297
|
+
def parse_llm_yaml_response(response: str) -> list[dict]:
|
|
298
|
+
"""Parse structured YAML from LLM response without pyyaml dependency."""
|
|
299
|
+
idioms = []
|
|
300
|
+
in_idioms = False
|
|
301
|
+
current: dict = {}
|
|
302
|
+
|
|
303
|
+
for line in response.splitlines():
|
|
304
|
+
stripped = line.strip()
|
|
305
|
+
if stripped == "idioms:":
|
|
306
|
+
in_idioms = True
|
|
307
|
+
continue
|
|
308
|
+
if not in_idioms:
|
|
309
|
+
continue
|
|
310
|
+
if stripped.startswith("- pattern:"):
|
|
311
|
+
if current:
|
|
312
|
+
idioms.append(current)
|
|
313
|
+
current = {"pattern": stripped.split(":", 1)[1].strip().strip('"')}
|
|
314
|
+
elif stripped.startswith("reason:") and current:
|
|
315
|
+
current["reason"] = stripped.split(":", 1)[1].strip().strip('"')
|
|
316
|
+
elif stripped.startswith("domain:") and current:
|
|
317
|
+
current["domain"] = stripped.split(":", 1)[1].strip().strip('"')
|
|
318
|
+
|
|
319
|
+
if current and "pattern" in current:
|
|
320
|
+
idioms.append(current)
|
|
321
|
+
|
|
322
|
+
return idioms
|
|
323
|
+
|
|
324
|
+
# ── Log helpers ────────────────────────────────────────────────────────────────
|
|
325
|
+
def load_log() -> dict:
|
|
326
|
+
HISTORY_DIR.mkdir(parents=True, exist_ok=True)
|
|
327
|
+
if LOG_FILE.exists():
|
|
328
|
+
try:
|
|
329
|
+
return json.loads(LOG_FILE.read_text(encoding="utf-8"))
|
|
330
|
+
except Exception:
|
|
331
|
+
pass
|
|
332
|
+
return {"cycles": [], "total_tokens_saved": 0, "total_idioms": 0}
|
|
333
|
+
|
|
334
|
+
def save_log(log: dict) -> None:
|
|
335
|
+
HISTORY_DIR.mkdir(parents=True, exist_ok=True)
|
|
336
|
+
LOG_FILE.write_text(json.dumps(log, indent=2), encoding="utf-8")
|
|
337
|
+
|
|
338
|
+
# ── Commands ──────────────────────────────────────────────────────────────────
|
|
339
|
+
def cmd_digest(args: list[str]) -> None:
|
|
340
|
+
dry_run = "--dry-run" in args
|
|
341
|
+
diff_mode = "head" if "--head" in args else "staged"
|
|
342
|
+
|
|
343
|
+
print(f"\n{BOLD}{CYAN}━━━ Skill Evolution — Digest Cycle ━━━━━━━━━━━━━━━━{RESET}")
|
|
344
|
+
if dry_run:
|
|
345
|
+
print(f" {YELLOW}DRY RUN — no files will be written{RESET}\n")
|
|
346
|
+
|
|
347
|
+
# Step 1: Get diff
|
|
348
|
+
print(f" {DIM}[1/5] Fetching git diff ({diff_mode})...{RESET}")
|
|
349
|
+
raw_diff = get_git_diff(diff_mode)
|
|
350
|
+
if not raw_diff.strip():
|
|
351
|
+
print(f" {YELLOW}⚠ No diff found. Commit or stage changes first.{RESET}")
|
|
352
|
+
print(f" {DIM}Tip: Use --head to diff against the last commit.{RESET}\n")
|
|
353
|
+
return
|
|
354
|
+
|
|
355
|
+
raw_tokens = count_tokens_estimate(raw_diff)
|
|
356
|
+
print(f" {DIM} Raw diff: ~{raw_tokens} tokens ({len(raw_diff)} chars){RESET}")
|
|
357
|
+
|
|
358
|
+
# Step 2: Extract semantic delta
|
|
359
|
+
print(f" {DIM}[2/5] Extracting architectural delta (Semantic Filter)...{RESET}")
|
|
360
|
+
delta = semantic_delta(raw_diff, min_weight=2)
|
|
361
|
+
if not delta.strip():
|
|
362
|
+
print(f" {GREEN}✔ Delta is 100% trivial (whitespace/comments/imports only).{RESET}")
|
|
363
|
+
print(f" {DIM} No LLM call needed. Zero tokens consumed.{RESET}\n")
|
|
364
|
+
return
|
|
365
|
+
|
|
366
|
+
delta_tokens = count_tokens_estimate(delta)
|
|
367
|
+
saved_tokens = raw_tokens - delta_tokens
|
|
368
|
+
saved_pct = int((saved_tokens / max(raw_tokens, 1)) * 100)
|
|
369
|
+
print(f" {GREEN}✔ Filtered to ~{delta_tokens} tokens "
|
|
370
|
+
f"({saved_pct}% reduction, saved ~{saved_tokens} tokens){RESET}")
|
|
371
|
+
|
|
372
|
+
# Step 3: Show delta preview
|
|
373
|
+
print(f"\n {BOLD}Architectural Delta Preview:{RESET}")
|
|
374
|
+
preview_lines = delta.splitlines()[:20]
|
|
375
|
+
for line in preview_lines:
|
|
376
|
+
if line.startswith("+"):
|
|
377
|
+
print(f" {GREEN}{line}{RESET}")
|
|
378
|
+
elif line.startswith("-"):
|
|
379
|
+
print(f" {RED}{line}{RESET}")
|
|
380
|
+
elif line.startswith("@@"):
|
|
381
|
+
print(f" {BLUE}{line}{RESET}")
|
|
382
|
+
else:
|
|
383
|
+
print(f" {DIM}{line}{RESET}")
|
|
384
|
+
if len(delta.splitlines()) > 20:
|
|
385
|
+
print(f" {DIM}... ({len(delta.splitlines()) - 20} more lines){RESET}")
|
|
386
|
+
|
|
387
|
+
if dry_run:
|
|
388
|
+
print(f"\n {YELLOW}[DRY RUN] Would send {delta_tokens} tokens to LLM for reflection.{RESET}")
|
|
389
|
+
print(f" {DIM}Run without --dry-run to complete the digest.{RESET}\n")
|
|
390
|
+
return
|
|
391
|
+
|
|
392
|
+
# Step 4: LLM reflection (user pastes response)
|
|
393
|
+
print(f"\n {DIM}[3/5] LLM Reflection — copy the prompt below and paste the response{RESET}")
|
|
394
|
+
print(f"\n {BOLD}{'─'*60}{RESET}")
|
|
395
|
+
prompt = generate_reflection_prompt(delta)
|
|
396
|
+
print(prompt)
|
|
397
|
+
print(f" {BOLD}{'─'*60}{RESET}")
|
|
398
|
+
print(f"\n {BOLD}Paste LLM response below (type END_RESPONSE when done):{RESET}")
|
|
399
|
+
|
|
400
|
+
response_lines = []
|
|
401
|
+
while True:
|
|
402
|
+
try:
|
|
403
|
+
line = input()
|
|
404
|
+
except EOFError:
|
|
405
|
+
break
|
|
406
|
+
if line.strip() == "END_RESPONSE":
|
|
407
|
+
break
|
|
408
|
+
response_lines.append(line)
|
|
409
|
+
llm_response = "\n".join(response_lines)
|
|
410
|
+
|
|
411
|
+
# Step 5: Parse + merge
|
|
412
|
+
print(f"\n {DIM}[4/5] Parsing idioms...{RESET}")
|
|
413
|
+
new_idioms = parse_llm_yaml_response(llm_response)
|
|
414
|
+
if not new_idioms:
|
|
415
|
+
print(f" {YELLOW}⚠ No idioms extracted from LLM response.{RESET}")
|
|
416
|
+
print(f" {DIM} The LLM may have returned idioms: [] — no architectural pattern detected.{RESET}\n")
|
|
417
|
+
return
|
|
418
|
+
|
|
419
|
+
print(f" {GREEN}✔ Extracted {len(new_idioms)} idiom(s){RESET}")
|
|
420
|
+
for idiom in new_idioms:
|
|
421
|
+
print(f" {CYAN}• {idiom.get('pattern', '?')}{RESET} — {idiom.get('reason', '')}")
|
|
422
|
+
|
|
423
|
+
print(f"\n {DIM}[5/5] Merging into project-idioms/SKILL.md...{RESET}")
|
|
424
|
+
existing = load_existing_idioms()
|
|
425
|
+
log = load_log()
|
|
426
|
+
next_id = next_idiom_id(existing)
|
|
427
|
+
|
|
428
|
+
today = datetime.now().strftime("%Y-%m-%d")
|
|
429
|
+
merged = existing.copy()
|
|
430
|
+
added = 0
|
|
431
|
+
for idiom in new_idioms:
|
|
432
|
+
# Deduplicate: skip if pattern is highly similar (simple substring check)
|
|
433
|
+
pattern = idiom.get("pattern", "").lower()
|
|
434
|
+
if any(pattern in ex["pattern"].lower() or ex["pattern"].lower() in pattern
|
|
435
|
+
for ex in existing):
|
|
436
|
+
print(f" {DIM} Skipped duplicate: {idiom.get('pattern')}{RESET}")
|
|
437
|
+
continue
|
|
438
|
+
merged.append({
|
|
439
|
+
"id": next_id,
|
|
440
|
+
"pattern": idiom.get("pattern", "?"),
|
|
441
|
+
"reason": idiom.get("reason", "No reason provided."),
|
|
442
|
+
"domain": idiom.get("domain", "general"),
|
|
443
|
+
"since": today,
|
|
444
|
+
})
|
|
445
|
+
next_id += 1
|
|
446
|
+
added += 1
|
|
447
|
+
|
|
448
|
+
if added == 0:
|
|
449
|
+
print(f" {YELLOW}⚠ All extracted idioms were duplicates. SKILL.md unchanged.{RESET}\n")
|
|
450
|
+
return
|
|
451
|
+
|
|
452
|
+
# Write SKILL.md
|
|
453
|
+
log["total_idioms"] = len(merged)
|
|
454
|
+
skill_md = render_skill_md(merged, len(log["cycles"]) + 1)
|
|
455
|
+
SKILL_DIR.mkdir(parents=True, exist_ok=True)
|
|
456
|
+
SKILL_FILE.write_text(skill_md, encoding="utf-8")
|
|
457
|
+
|
|
458
|
+
# Update log
|
|
459
|
+
log["cycles"].append({
|
|
460
|
+
"timestamp": datetime.now().isoformat(timespec="seconds"),
|
|
461
|
+
"raw_tokens": raw_tokens,
|
|
462
|
+
"delta_tokens": delta_tokens,
|
|
463
|
+
"tokens_saved": saved_tokens,
|
|
464
|
+
"idioms_added": added,
|
|
465
|
+
})
|
|
466
|
+
log["total_tokens_saved"] = log.get("total_tokens_saved", 0) + saved_tokens
|
|
467
|
+
save_log(log)
|
|
468
|
+
|
|
469
|
+
print(f"\n {GREEN}✔ {added} new idiom(s) added to SKILL.md{RESET}")
|
|
470
|
+
print(f" {DIM} File: {SKILL_FILE}{RESET}")
|
|
471
|
+
print(f" {DIM} Total idioms: {len(merged)}{RESET}")
|
|
472
|
+
print(f" {DIM} Lifetime tokens saved: {log['total_tokens_saved']}{RESET}\n")
|
|
473
|
+
print(f" {CYAN}Commit {SKILL_FILE.name} to share your Engineering Culture with the team.{RESET}\n")
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def cmd_show(args: list[str]) -> None:
|
|
477
|
+
if not SKILL_FILE.exists():
|
|
478
|
+
print(f"{YELLOW}No project-idioms skill found. Run 'digest' first.{RESET}")
|
|
479
|
+
return
|
|
480
|
+
print(SKILL_FILE.read_text(encoding="utf-8"))
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def cmd_reset(args: list[str]) -> None:
|
|
484
|
+
if SKILL_FILE.exists():
|
|
485
|
+
SKILL_FILE.unlink()
|
|
486
|
+
print(f"{GREEN}✔ project-idioms/SKILL.md deleted.{RESET}")
|
|
487
|
+
if LOG_FILE.exists():
|
|
488
|
+
LOG_FILE.unlink()
|
|
489
|
+
print(f"{GREEN}✔ Digest log cleared.{RESET}")
|
|
490
|
+
print(f"{DIM}Run 'digest' to start a fresh evolution cycle.{RESET}")
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def cmd_status(args: list[str]) -> None:
|
|
494
|
+
log = load_log()
|
|
495
|
+
cycles = log.get("cycles", [])
|
|
496
|
+
total_saved = log.get("total_tokens_saved", 0)
|
|
497
|
+
total_idioms = log.get("total_idioms", 0)
|
|
498
|
+
|
|
499
|
+
idioms_exist = SKILL_FILE.exists()
|
|
500
|
+
|
|
501
|
+
print(f"\n{BOLD}{CYAN}━━━ Skill Evolution Status ━━━━━━━━━━━━━━━━━━━━━━━━{RESET}")
|
|
502
|
+
print(f" Digest cycles : {BOLD}{len(cycles)}{RESET}")
|
|
503
|
+
print(f" Total idioms : {BOLD}{total_idioms}{RESET}")
|
|
504
|
+
print(f" Tokens saved : {GREEN}{total_saved:,} tokens{RESET} "
|
|
505
|
+
f"(≈ ${total_saved / 1_000_000 * 3:.4f} at $3/M)")
|
|
506
|
+
print(f" SKILL.md exists : {'✔' if idioms_exist else '✗'}")
|
|
507
|
+
|
|
508
|
+
if cycles:
|
|
509
|
+
print(f"\n {BOLD}Last 5 digest cycles:{RESET}")
|
|
510
|
+
for cycle in reversed(cycles[-5:]):
|
|
511
|
+
ts = cycle.get("timestamp", "?")[:16]
|
|
512
|
+
delta_t = cycle.get("delta_tokens", 0)
|
|
513
|
+
saved = cycle.get("tokens_saved", 0)
|
|
514
|
+
added = cycle.get("idioms_added", 0)
|
|
515
|
+
pct = int((saved / max(cycle.get("raw_tokens", 1), 1)) * 100)
|
|
516
|
+
print(f" {DIM}{ts}{RESET} "
|
|
517
|
+
f"delta={delta_t}tok saved={saved}tok ({pct}%) "
|
|
518
|
+
f"idioms+={added}")
|
|
519
|
+
|
|
520
|
+
print(f"{CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━{RESET}\n")
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
# ── Main ──────────────────────────────────────────────────────────────────────
|
|
524
|
+
COMMANDS = {
|
|
525
|
+
"digest": cmd_digest,
|
|
526
|
+
"show": cmd_show,
|
|
527
|
+
"reset": cmd_reset,
|
|
528
|
+
"status": cmd_status,
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
def main() -> None:
|
|
532
|
+
# Ensure Unicode output works on Windows terminals
|
|
533
|
+
if hasattr(sys.stdout, "reconfigure"):
|
|
534
|
+
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
|
|
535
|
+
argv = sys.argv[1:]
|
|
536
|
+
if not argv or argv[0] in ("-h", "--help", "help"):
|
|
537
|
+
print(f"""
|
|
538
|
+
{BOLD}skill_evolution.py{RESET} — Tribunal Skill Evolution Forge
|
|
539
|
+
|
|
540
|
+
{BOLD}Commands:{RESET}
|
|
541
|
+
digest [--dry-run] [--head] Analyze latest git diff and evolve SKILL.md
|
|
542
|
+
--dry-run : preview without writing
|
|
543
|
+
--head : diff last commit instead of staged
|
|
544
|
+
show Print current project-idioms/SKILL.md
|
|
545
|
+
status Show digest history and token savings
|
|
546
|
+
reset Clear all idioms and start fresh
|
|
547
|
+
|
|
548
|
+
{BOLD}Token Budget:{RESET}
|
|
549
|
+
Raw diff -> Semantic Filter -> Only architectural lines -> LLM
|
|
550
|
+
Typical savings: 70–90% of tokens. Most trivial commits = 0 tokens.
|
|
551
|
+
""")
|
|
552
|
+
return
|
|
553
|
+
|
|
554
|
+
cmd = argv[0]
|
|
555
|
+
rest = argv[1:]
|
|
556
|
+
if cmd not in COMMANDS:
|
|
557
|
+
print(f"{RED}✖ Unknown command: '{cmd}'{RESET}")
|
|
558
|
+
sys.exit(1)
|
|
559
|
+
COMMANDS[cmd](rest)
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
if __name__ == "__main__":
|
|
563
|
+
main()
|
|
@@ -7,6 +7,14 @@ last-updated: 2026-04-02
|
|
|
7
7
|
applies-to-model: gemini-2.5-pro, claude-3-7-sonnet
|
|
8
8
|
---
|
|
9
9
|
|
|
10
|
+
## Hallucination Traps (Read First)
|
|
11
|
+
- ❌ Dispatching sub-agents without a context_summary -> ✅ Always send a trimmed context, never the full conversation
|
|
12
|
+
- ❌ Assuming sub-agents share memory -> ✅ Each agent invocation is stateless unless explicitly passed context
|
|
13
|
+
- ❌ Running agents sequentially when they are independent -> ✅ Use fan-out/fan-in for parallelizable work
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
|
|
10
18
|
# Agent Organizer — Multi-Agent Orchestration Mastery
|
|
11
19
|
|
|
12
20
|
---
|
|
@@ -7,6 +7,14 @@ last-updated: 2026-04-02
|
|
|
7
7
|
applies-to-model: gemini-2.5-pro, claude-3-7-sonnet
|
|
8
8
|
---
|
|
9
9
|
|
|
10
|
+
## Hallucination Traps (Read First)
|
|
11
|
+
- ❌ Putting user input into role:'system' messages -> ✅ User input MUST go in role:'user' only
|
|
12
|
+
- ❌ Relying on 'ignore previous instructions' disclaimer -> ✅ Delimiters + structural separation are required
|
|
13
|
+
- ❌ Assuming output filtering catches all injection -> ✅ Defense-in-depth: input validation + output validation + structural isolation
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
|
|
10
18
|
# Prompt Injection Defense — AI Security Mastery
|
|
11
19
|
|
|
12
20
|
---
|
|
@@ -7,6 +7,14 @@ last-updated: 2026-04-06
|
|
|
7
7
|
applies-to-model: gemini-2.5-pro, claude-3-7-sonnet
|
|
8
8
|
---
|
|
9
9
|
|
|
10
|
+
## Hallucination Traps (Read First)
|
|
11
|
+
- ❌ Generating entire applications in one shot -> ✅ Build one module at a time, verify each
|
|
12
|
+
- ❌ Choosing a tech stack without asking the user -> ✅ Always ask about existing preferences, team skills, and deployment target
|
|
13
|
+
- ❌ Hardcoding API keys or secrets during scaffolding -> ✅ Use .env.example with placeholder values from day one
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
|
|
10
18
|
# App Builder — Application Orchestrator
|
|
11
19
|
|
|
12
20
|
---
|
|
@@ -7,6 +7,14 @@ last-updated: 2026-04-02
|
|
|
7
7
|
applies-to-model: gemini-2.5-pro, claude-3-7-sonnet
|
|
8
8
|
---
|
|
9
9
|
|
|
10
|
+
## Hallucination Traps (Read First)
|
|
11
|
+
- ❌ Drawing wireframes without defined user personas -> ✅ Establish WHO uses each screen before designing
|
|
12
|
+
- ❌ Skipping error/empty/loading states in flow diagrams -> ✅ Every screen needs 4 states: loading, empty, populated, error
|
|
13
|
+
- ❌ Assuming linear user journeys -> ✅ Real users jump between screens, go back, and abandon flows mid-way
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
|
|
10
18
|
# Appflow & Wireframing — Visualization Mastery
|
|
11
19
|
|
|
12
20
|
---
|