superlocalmemory 3.4.10 → 3.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +17 -11
  2. package/docs/skill-evolution.md +77 -10
  3. package/ide/hooks/tool-event-hook.sh +4 -4
  4. package/package.json +1 -1
  5. package/pyproject.toml +3 -2
  6. package/src/superlocalmemory/cli/commands.py +170 -0
  7. package/src/superlocalmemory/cli/main.py +21 -0
  8. package/src/superlocalmemory/cli/setup_wizard.py +54 -11
  9. package/src/superlocalmemory/core/config.py +35 -0
  10. package/src/superlocalmemory/core/consolidation_engine.py +128 -0
  11. package/src/superlocalmemory/core/embedding_worker.py +1 -1
  12. package/src/superlocalmemory/core/engine.py +12 -0
  13. package/src/superlocalmemory/core/fact_consolidator.py +425 -0
  14. package/src/superlocalmemory/core/graph_pruner.py +290 -0
  15. package/src/superlocalmemory/core/maintenance_scheduler.py +20 -0
  16. package/src/superlocalmemory/core/recall_pipeline.py +9 -0
  17. package/src/superlocalmemory/core/tier_manager.py +325 -0
  18. package/src/superlocalmemory/encoding/entity_resolver.py +6 -5
  19. package/src/superlocalmemory/evolution/__init__.py +29 -0
  20. package/src/superlocalmemory/evolution/blind_verifier.py +115 -0
  21. package/src/superlocalmemory/evolution/evolution_store.py +302 -0
  22. package/src/superlocalmemory/evolution/mutation_generator.py +181 -0
  23. package/src/superlocalmemory/evolution/skill_evolver.py +555 -0
  24. package/src/superlocalmemory/evolution/triggers.py +367 -0
  25. package/src/superlocalmemory/evolution/types.py +92 -0
  26. package/src/superlocalmemory/hooks/hook_handlers.py +13 -0
  27. package/src/superlocalmemory/learning/skill_performance_miner.py +44 -11
  28. package/src/superlocalmemory/mcp/server.py +4 -0
  29. package/src/superlocalmemory/mcp/tools_evolution.py +338 -0
  30. package/src/superlocalmemory/retrieval/engine.py +98 -11
  31. package/src/superlocalmemory/retrieval/entity_channel.py +118 -0
  32. package/src/superlocalmemory/retrieval/forgetting_filter.py +22 -7
  33. package/src/superlocalmemory/retrieval/strategy.py +2 -2
  34. package/src/superlocalmemory/server/routes/behavioral.py +19 -15
  35. package/src/superlocalmemory/server/routes/evolution.py +213 -0
  36. package/src/superlocalmemory/server/routes/tiers.py +195 -0
  37. package/src/superlocalmemory/server/unified_daemon.py +39 -5
  38. package/src/superlocalmemory/storage/schema_v3411.py +149 -0
  39. package/src/superlocalmemory/ui/index.html +5 -2
  40. package/src/superlocalmemory/ui/js/lifecycle.js +83 -0
  41. package/src/superlocalmemory/ui/js/ng-skills.js +394 -10
  42. package/src/superlocalmemory.egg-info/PKG-INFO +614 -0
  43. package/src/superlocalmemory.egg-info/SOURCES.txt +335 -0
  44. package/src/superlocalmemory.egg-info/dependency_links.txt +1 -0
  45. package/src/superlocalmemory.egg-info/entry_points.txt +2 -0
  46. package/src/superlocalmemory.egg-info/requires.txt +55 -0
  47. package/src/superlocalmemory.egg-info/top_level.txt +1 -0
@@ -0,0 +1,29 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Skill Evolution Engine — track, analyze, and evolve AI agent skills.
6
+
7
+ 3-trigger system (post-session + degradation + health check) with
8
+ LLM confirmation gate and blind verification.
9
+
10
+ Inspired by: HKUDS/OpenSpace (arXiv:2604.01687), ECC continuous learning.
11
+
12
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
13
+ """
14
+
15
+ from superlocalmemory.evolution.types import (
16
+ EvolutionCandidate,
17
+ EvolutionRecord,
18
+ EvolutionType,
19
+ TriggerType,
20
+ EvolutionStatus,
21
+ )
22
+
23
+ __all__ = [
24
+ "EvolutionCandidate",
25
+ "EvolutionRecord",
26
+ "EvolutionType",
27
+ "TriggerType",
28
+ "EvolutionStatus",
29
+ ]
@@ -0,0 +1,115 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Blind Verifier — information-isolated skill verification.
6
+
7
+ The key insight from EvoSkills (arXiv:2604.01687): when a generator
8
+ creates a skill and the same model verifies it, confirmation bias is
9
+ nearly guaranteed. The verifier must be BLIND to the generator's reasoning.
10
+
11
+ This verifier:
12
+ - Uses a DIFFERENT model from the generator (Haiku vs Sonnet)
13
+ - CANNOT see: original skill, mutation rationale, generator's reasoning
14
+ - CAN see: task description (what the skill should do), evolved SKILL.md
15
+ - Evaluates independently: "Does this skill correctly address the task?"
16
+
17
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import json
23
+ import logging
24
+ import re
25
+ from dataclasses import dataclass
26
+ from typing import Optional
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ @dataclass(frozen=True)
32
+ class VerificationResult:
33
+ """Result of blind verification."""
34
+ passed: bool
35
+ confidence: float # 0.0-1.0
36
+ issues: tuple[str, ...] = ()
37
+ reasoning: str = ""
38
+
39
+
40
+ def build_verification_prompt(
41
+ skill_name: str,
42
+ skill_description: str,
43
+ evolved_content: str,
44
+ ) -> str:
45
+ """Build blind verification prompt.
46
+
47
+ The verifier sees ONLY:
48
+ - What the skill is supposed to do (name + description)
49
+ - The evolved skill content
50
+
51
+ The verifier does NOT see:
52
+ - The original skill
53
+ - Why it was evolved
54
+ - What evidence triggered evolution
55
+ - The generator's reasoning
56
+ """
57
+ return f"""You are an independent skill quality reviewer. You have NOT seen the original
58
+ version of this skill or why it was modified. Evaluate it purely on its merits.
59
+
60
+ SKILL PURPOSE: {skill_name}
61
+ EXPECTED BEHAVIOR: {skill_description}
62
+
63
+ SKILL CONTENT TO REVIEW:
64
+ {evolved_content[:8000]}
65
+
66
+ EVALUATE:
67
+ 1. Does the skill clearly explain what to do? (clarity)
68
+ 2. Are the instructions specific and actionable? (specificity)
69
+ 3. Are there any obvious errors, contradictions, or missing steps? (correctness)
70
+ 4. Would an AI agent be able to follow these instructions? (executability)
71
+
72
+ RESPOND IN JSON FORMAT:
73
+ {{
74
+ "passed": true/false,
75
+ "confidence": 0.0-1.0,
76
+ "issues": ["issue1", "issue2"],
77
+ "reasoning": "brief explanation"
78
+ }}
79
+
80
+ Be strict. Only pass skills that are genuinely clear, correct, and actionable.
81
+ A mediocre skill that might work sometimes should FAIL — evolution should produce
82
+ clear improvements, not marginal changes."""
83
+
84
+
85
+ def parse_verification_response(response: str) -> VerificationResult:
86
+ """Parse the verifier's JSON response."""
87
+ # Try parsing JSON from response
88
+ json_match = re.search(r"\{[^{}]*\"passed\"[^{}]*\}", response, re.DOTALL)
89
+ if json_match:
90
+ try:
91
+ data = json.loads(json_match.group(0))
92
+ return VerificationResult(
93
+ passed=bool(data.get("passed", False)),
94
+ confidence=float(data.get("confidence", 0.5)),
95
+ issues=tuple(data.get("issues", [])),
96
+ reasoning=str(data.get("reasoning", "")),
97
+ )
98
+ except (json.JSONDecodeError, TypeError, ValueError):
99
+ pass
100
+
101
+ # Fallback: keyword detection
102
+ lower = response.lower()
103
+ if any(kw in lower for kw in ("\"passed\": true", "passed: true", "approve", "looks good")):
104
+ return VerificationResult(passed=True, confidence=0.6, reasoning="keyword match")
105
+
106
+ if any(kw in lower for kw in ("\"passed\": false", "passed: false", "reject", "fail")):
107
+ return VerificationResult(passed=False, confidence=0.6, reasoning="keyword match")
108
+
109
+ # Default: reject if can't parse (conservative)
110
+ return VerificationResult(
111
+ passed=False,
112
+ confidence=0.3,
113
+ reasoning="Could not parse verification response",
114
+ issues=("Unparseable response",),
115
+ )
@@ -0,0 +1,302 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Evolution Store — SQLite persistence for skill evolution history.
6
+
7
+ Stores evolution records, lineage DAG, and anti-loop state.
8
+ Uses the same memory.db as the rest of SLM — no separate database.
9
+
10
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import logging
17
+ import sqlite3
18
+ from datetime import datetime, timezone
19
+ from pathlib import Path
20
+ from typing import Optional
21
+
22
+ from superlocalmemory.evolution.types import (
23
+ EvolutionCandidate,
24
+ EvolutionRecord,
25
+ EvolutionStatus,
26
+ EvolutionType,
27
+ TriggerType,
28
+ )
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ _SCHEMA_DDL = """
33
+ CREATE TABLE IF NOT EXISTS skill_evolution_log (
34
+ id TEXT PRIMARY KEY,
35
+ skill_name TEXT NOT NULL,
36
+ parent_skill_id TEXT,
37
+ evolution_type TEXT NOT NULL,
38
+ trigger_type TEXT NOT NULL,
39
+ generation INTEGER DEFAULT 0,
40
+ status TEXT DEFAULT 'candidate',
41
+ mutation_summary TEXT DEFAULT '',
42
+ evidence TEXT DEFAULT '[]',
43
+ original_content TEXT DEFAULT '',
44
+ evolved_content TEXT DEFAULT '',
45
+ content_diff TEXT DEFAULT '',
46
+ blind_verified INTEGER DEFAULT 0,
47
+ rejection_reason TEXT DEFAULT '',
48
+ created_at TEXT NOT NULL,
49
+ completed_at TEXT
50
+ );
51
+
52
+ CREATE INDEX IF NOT EXISTS idx_evo_skill ON skill_evolution_log(skill_name);
53
+ CREATE INDEX IF NOT EXISTS idx_evo_status ON skill_evolution_log(status);
54
+ CREATE INDEX IF NOT EXISTS idx_evo_created ON skill_evolution_log(created_at);
55
+
56
+ CREATE TABLE IF NOT EXISTS evolution_cycle_state (
57
+ key TEXT PRIMARY KEY,
58
+ value INTEGER DEFAULT 0,
59
+ updated_at TEXT
60
+ );
61
+ """
62
+
63
+ # Anti-loop budget
64
+ MAX_EVOLUTIONS_PER_CYCLE = 3
65
+ MAX_ATTEMPTS_PER_SKILL = 3
66
+ MIN_FRESH_INVOCATIONS = 5
67
+
68
+
69
+ class EvolutionStore:
70
+ """SQLite persistence for evolution history and anti-loop state."""
71
+
72
+ def __init__(self, db_path: str | Path):
73
+ self._db_path = str(db_path)
74
+ self._ensure_schema()
75
+ self._addressed_degradations: dict[str, set[str]] = {}
76
+
77
+ def _ensure_schema(self) -> None:
78
+ conn = sqlite3.connect(self._db_path, timeout=10)
79
+ try:
80
+ conn.executescript(_SCHEMA_DDL)
81
+ conn.commit()
82
+ except sqlite3.OperationalError as exc:
83
+ logger.warning("Evolution schema creation failed: %s", exc)
84
+ finally:
85
+ conn.close()
86
+
87
+ def reset_cycle(self) -> None:
88
+ """Reset per-cycle counters. Call at start of each consolidation."""
89
+ now = datetime.now(timezone.utc).isoformat()
90
+ conn = sqlite3.connect(self._db_path, timeout=10)
91
+ try:
92
+ conn.execute(
93
+ "INSERT OR REPLACE INTO evolution_cycle_state (key, value, updated_at) "
94
+ "VALUES ('cycle_count', 0, ?)",
95
+ (now,),
96
+ )
97
+ conn.commit()
98
+ finally:
99
+ conn.close()
100
+
101
+ def can_evolve(self) -> bool:
102
+ """Check if budget allows another evolution this cycle."""
103
+ conn = sqlite3.connect(self._db_path, timeout=10)
104
+ try:
105
+ row = conn.execute(
106
+ "SELECT value FROM evolution_cycle_state WHERE key = 'cycle_count'",
107
+ ).fetchone()
108
+ count = row[0] if row else 0
109
+ return count < MAX_EVOLUTIONS_PER_CYCLE
110
+ finally:
111
+ conn.close()
112
+
113
+ def record_evolution_attempt(self) -> None:
114
+ """Increment cycle counter in DB."""
115
+ now = datetime.now(timezone.utc).isoformat()
116
+ conn = sqlite3.connect(self._db_path, timeout=10)
117
+ try:
118
+ row = conn.execute(
119
+ "SELECT value FROM evolution_cycle_state WHERE key = 'cycle_count'",
120
+ ).fetchone()
121
+ current = row[0] if row else 0
122
+ conn.execute(
123
+ "INSERT OR REPLACE INTO evolution_cycle_state (key, value, updated_at) "
124
+ "VALUES ('cycle_count', ?, ?)",
125
+ (current + 1, now),
126
+ )
127
+ conn.commit()
128
+ finally:
129
+ conn.close()
130
+
131
+ def _get_cycle_count(self) -> int:
132
+ """Read current cycle count from DB."""
133
+ conn = sqlite3.connect(self._db_path, timeout=10)
134
+ try:
135
+ row = conn.execute(
136
+ "SELECT value FROM evolution_cycle_state WHERE key = 'cycle_count'",
137
+ ).fetchone()
138
+ return row[0] if row else 0
139
+ finally:
140
+ conn.close()
141
+
142
+ # ------------------------------------------------------------------
143
+ # Anti-loop: addressed degradations (adopted from OpenSpace)
144
+ # ------------------------------------------------------------------
145
+
146
+ def is_addressed(self, skill_name: str, context_hash: str) -> bool:
147
+ return context_hash in self._addressed_degradations.get(skill_name, set())
148
+
149
+ def mark_addressed(self, skill_name: str, context_hash: str) -> None:
150
+ self._addressed_degradations.setdefault(skill_name, set()).add(context_hash)
151
+
152
+ def prune_recovered(self, active_degraded_skills: set[str]) -> None:
153
+ """Remove tracking for skills that recovered."""
154
+ recovered = [
155
+ k for k in self._addressed_degradations
156
+ if k not in active_degraded_skills
157
+ ]
158
+ for k in recovered:
159
+ del self._addressed_degradations[k]
160
+
161
+ # ------------------------------------------------------------------
162
+ # CRUD
163
+ # ------------------------------------------------------------------
164
+
165
+ def save_record(self, record: EvolutionRecord) -> None:
166
+ conn = sqlite3.connect(self._db_path, timeout=10)
167
+ try:
168
+ conn.execute(
169
+ "INSERT OR REPLACE INTO skill_evolution_log "
170
+ "(id, skill_name, parent_skill_id, evolution_type, trigger_type, "
171
+ " generation, status, mutation_summary, evidence, "
172
+ " original_content, evolved_content, content_diff, "
173
+ " blind_verified, rejection_reason, created_at, completed_at) "
174
+ "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
175
+ (
176
+ record.id,
177
+ record.skill_name,
178
+ record.parent_skill_id,
179
+ record.evolution_type.value,
180
+ record.trigger.value,
181
+ record.generation,
182
+ record.status.value,
183
+ record.mutation_summary,
184
+ json.dumps(list(record.evidence)),
185
+ record.original_content,
186
+ record.evolved_content,
187
+ record.content_diff,
188
+ 1 if record.blind_verified else 0,
189
+ record.rejection_reason,
190
+ record.created_at,
191
+ record.completed_at,
192
+ ),
193
+ )
194
+ conn.commit()
195
+ finally:
196
+ conn.close()
197
+
198
+ def get_record(self, record_id: str) -> Optional[EvolutionRecord]:
199
+ conn = sqlite3.connect(self._db_path, timeout=10)
200
+ conn.row_factory = sqlite3.Row
201
+ try:
202
+ row = conn.execute(
203
+ "SELECT * FROM skill_evolution_log WHERE id = ?",
204
+ (record_id,),
205
+ ).fetchone()
206
+ if not row:
207
+ return None
208
+ return self._row_to_record(dict(row))
209
+ finally:
210
+ conn.close()
211
+
212
+ def get_skill_history(self, skill_name: str, limit: int = 20) -> list[EvolutionRecord]:
213
+ conn = sqlite3.connect(self._db_path, timeout=10)
214
+ conn.row_factory = sqlite3.Row
215
+ try:
216
+ rows = conn.execute(
217
+ "SELECT * FROM skill_evolution_log "
218
+ "WHERE skill_name = ? ORDER BY created_at DESC LIMIT ?",
219
+ (skill_name, limit),
220
+ ).fetchall()
221
+ return [self._row_to_record(dict(r)) for r in rows]
222
+ finally:
223
+ conn.close()
224
+
225
+ def get_recent(self, limit: int = 10) -> list[EvolutionRecord]:
226
+ conn = sqlite3.connect(self._db_path, timeout=10)
227
+ conn.row_factory = sqlite3.Row
228
+ try:
229
+ rows = conn.execute(
230
+ "SELECT * FROM skill_evolution_log "
231
+ "ORDER BY created_at DESC LIMIT ?",
232
+ (limit,),
233
+ ).fetchall()
234
+ return [self._row_to_record(dict(r)) for r in rows]
235
+ finally:
236
+ conn.close()
237
+
238
+ def count_attempts(self, skill_name: str) -> int:
239
+ conn = sqlite3.connect(self._db_path, timeout=10)
240
+ try:
241
+ row = conn.execute(
242
+ "SELECT COUNT(*) FROM skill_evolution_log "
243
+ "WHERE skill_name = ? AND status NOT IN ('promoted')",
244
+ (skill_name,),
245
+ ).fetchone()
246
+ return row[0] if row else 0
247
+ finally:
248
+ conn.close()
249
+
250
+ def has_exceeded_attempts(self, skill_name: str) -> bool:
251
+ return self.count_attempts(skill_name) >= MAX_ATTEMPTS_PER_SKILL
252
+
253
+ def get_stats(self) -> dict:
254
+ conn = sqlite3.connect(self._db_path, timeout=10)
255
+ try:
256
+ total = conn.execute(
257
+ "SELECT COUNT(*) FROM skill_evolution_log",
258
+ ).fetchone()[0]
259
+ by_status = {}
260
+ for row in conn.execute(
261
+ "SELECT status, COUNT(*) FROM skill_evolution_log GROUP BY status",
262
+ ).fetchall():
263
+ by_status[row[0]] = row[1]
264
+ by_type = {}
265
+ for row in conn.execute(
266
+ "SELECT evolution_type, COUNT(*) FROM skill_evolution_log GROUP BY evolution_type",
267
+ ).fetchall():
268
+ by_type[row[0]] = row[1]
269
+ return {
270
+ "total": total,
271
+ "by_status": by_status,
272
+ "by_type": by_type,
273
+ "cycle_budget_remaining": MAX_EVOLUTIONS_PER_CYCLE - self._get_cycle_count(),
274
+ }
275
+ finally:
276
+ conn.close()
277
+
278
+ def _row_to_record(self, row: dict) -> EvolutionRecord:
279
+ evidence_raw = row.get("evidence", "[]")
280
+ try:
281
+ evidence = tuple(json.loads(evidence_raw))
282
+ except (json.JSONDecodeError, TypeError):
283
+ evidence = ()
284
+
285
+ return EvolutionRecord(
286
+ id=row["id"],
287
+ skill_name=row["skill_name"],
288
+ parent_skill_id=row.get("parent_skill_id"),
289
+ evolution_type=EvolutionType(row["evolution_type"]),
290
+ trigger=TriggerType(row["trigger_type"]),
291
+ generation=row.get("generation", 0),
292
+ status=EvolutionStatus(row.get("status", "candidate")),
293
+ mutation_summary=row.get("mutation_summary", ""),
294
+ evidence=evidence,
295
+ original_content=row.get("original_content", ""),
296
+ evolved_content=row.get("evolved_content", ""),
297
+ content_diff=row.get("content_diff", ""),
298
+ blind_verified=bool(row.get("blind_verified", 0)),
299
+ rejection_reason=row.get("rejection_reason", ""),
300
+ created_at=row.get("created_at", ""),
301
+ completed_at=row.get("completed_at"),
302
+ )
@@ -0,0 +1,181 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Mutation Generator — LLM-driven skill improvement.
6
+
7
+ Reads the original SKILL.md + failure evidence + performance data,
8
+ generates an improved version. Apply-retry cycle (3 attempts) for
9
+ malformed output.
10
+
11
+ Token-driven termination: <EVOLUTION_COMPLETE> or <EVOLUTION_FAILED>.
12
+ Adopted from OpenSpace evolver.py patterns.
13
+
14
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ import re
21
+ from typing import Optional
22
+
23
+ from superlocalmemory.evolution.types import (
24
+ EvolutionCandidate,
25
+ EvolutionType,
26
+ )
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ MAX_APPLY_RETRIES = 3
31
+ MAX_CONTENT_CHARS = 12_000 # Truncate skill content in prompt
32
+
33
+
34
+ def build_mutation_prompt(
35
+ candidate: EvolutionCandidate,
36
+ original_content: str,
37
+ ) -> str:
38
+ """Build the LLM prompt for skill mutation."""
39
+ truncated = original_content[:MAX_CONTENT_CHARS]
40
+ evidence_text = "\n".join(f"- {e}" for e in candidate.evidence)
41
+
42
+ if candidate.evolution_type == EvolutionType.FIX:
43
+ return _fix_prompt(candidate.skill_name, truncated, evidence_text, candidate.effective_score)
44
+ elif candidate.evolution_type == EvolutionType.DERIVED:
45
+ return _derived_prompt(candidate.skill_name, truncated, evidence_text, candidate.effective_score)
46
+ else:
47
+ return _captured_prompt(candidate.skill_name, evidence_text)
48
+
49
+
50
+ def parse_mutation_output(output: str) -> Optional[str]:
51
+ """Extract evolved SKILL.md content from LLM output.
52
+
53
+ Looks for content between markdown code fences or after
54
+ <EVOLUTION_COMPLETE> token. Returns None if <EVOLUTION_FAILED>
55
+ or no valid content found.
56
+ """
57
+ if "<EVOLUTION_FAILED>" in output:
58
+ return None
59
+
60
+ # Try extracting from code fence
61
+ fence_match = re.search(
62
+ r"```(?:markdown|md)?\s*\n(---\s*\n.*?)```",
63
+ output,
64
+ re.DOTALL,
65
+ )
66
+ if fence_match:
67
+ return fence_match.group(1).strip()
68
+
69
+ # Try extracting after EVOLUTION_COMPLETE token
70
+ complete_match = re.search(
71
+ r"<EVOLUTION_COMPLETE>\s*(---\s*\n.*)",
72
+ output,
73
+ re.DOTALL,
74
+ )
75
+ if complete_match:
76
+ return complete_match.group(1).strip()
77
+
78
+ # Try finding YAML frontmatter directly
79
+ frontmatter_match = re.search(
80
+ r"(---\s*\nname:.*?)(?:\n---|\Z)",
81
+ output,
82
+ re.DOTALL,
83
+ )
84
+ if frontmatter_match:
85
+ # Return everything from the frontmatter start
86
+ idx = output.index(frontmatter_match.group(0))
87
+ return output[idx:].strip()
88
+
89
+ return None
90
+
91
+
92
+ def validate_skill_content(content: str) -> Optional[str]:
93
+ """Validate evolved skill content. Returns error message or None if valid."""
94
+ if not content or len(content) < 50:
95
+ return "Content too short (< 50 chars)"
96
+ if "---" not in content:
97
+ return "Missing YAML frontmatter (no --- found)"
98
+ if content.count("---") >= 2 and "name:" not in content.split("---")[1]:
99
+ return "Missing 'name:' in frontmatter"
100
+ return None
101
+
102
+
103
+ def build_retry_prompt(original_prompt: str, error: str, attempt: int) -> str:
104
+ """Build retry prompt after failed mutation attempt."""
105
+ return (
106
+ f"{original_prompt}\n\n"
107
+ f"--- RETRY (attempt {attempt}/{MAX_APPLY_RETRIES}) ---\n"
108
+ f"Previous output was invalid: {error}\n"
109
+ f"Please generate a valid SKILL.md with proper YAML frontmatter "
110
+ f"(--- / name: / description: / ---) followed by markdown instructions.\n"
111
+ f"End with <EVOLUTION_COMPLETE> or <EVOLUTION_FAILED>."
112
+ )
113
+
114
+
115
+ # ------------------------------------------------------------------
116
+ # Prompt templates
117
+ # ------------------------------------------------------------------
118
+
119
+ def _fix_prompt(skill_name: str, content: str, evidence: str, score: float) -> str:
120
+ return f"""You are a skill evolution engine. A skill is underperforming and needs repair.
121
+
122
+ SKILL NAME: {skill_name}
123
+ EFFECTIVE SCORE: {score:.0%} (approximate)
124
+
125
+ CURRENT SKILL CONTENT:
126
+ {content}
127
+
128
+ EVIDENCE OF PROBLEMS:
129
+ {evidence}
130
+
131
+ YOUR TASK:
132
+ Generate an improved version of this SKILL.md that addresses the identified problems.
133
+ Keep the same overall structure and purpose. Fix what's broken, don't rewrite from scratch.
134
+
135
+ OUTPUT FORMAT:
136
+ Return the complete improved SKILL.md content inside a markdown code fence.
137
+ The file must start with YAML frontmatter (--- / name: / description: / ---).
138
+ End your response with <EVOLUTION_COMPLETE> if you generated a valid improvement,
139
+ or <EVOLUTION_FAILED> if you cannot improve this skill."""
140
+
141
+
142
+ def _derived_prompt(skill_name: str, content: str, evidence: str, score: float) -> str:
143
+ return f"""You are a skill evolution engine. A skill works for some tasks but not others.
144
+ Create a specialized variant for the failing task type.
145
+
146
+ PARENT SKILL: {skill_name}
147
+ EFFECTIVE SCORE: {score:.0%} (moderate — works sometimes, fails sometimes)
148
+
149
+ PARENT SKILL CONTENT:
150
+ {content}
151
+
152
+ EVIDENCE:
153
+ {evidence}
154
+
155
+ YOUR TASK:
156
+ Create a specialized DERIVED variant that handles the failing cases better.
157
+ Give it a new name (e.g., "{skill_name}-specialized" or a descriptive name).
158
+ Keep the parent's strengths. Add specific handling for the failure patterns.
159
+
160
+ OUTPUT FORMAT:
161
+ Return the complete new SKILL.md inside a markdown code fence.
162
+ Must start with YAML frontmatter (--- / name: / description: / ---).
163
+ End with <EVOLUTION_COMPLETE> or <EVOLUTION_FAILED>."""
164
+
165
+
166
+ def _captured_prompt(skill_name: str, evidence: str) -> str:
167
+ return f"""You are a skill evolution engine. A repeated workflow pattern was detected
168
+ that no existing skill covers. Create a new skill to codify this pattern.
169
+
170
+ PATTERN NAME: {skill_name}
171
+ EVIDENCE:
172
+ {evidence}
173
+
174
+ YOUR TASK:
175
+ Create a new SKILL.md that codifies this workflow pattern into a reusable skill.
176
+ Make it specific and actionable — not generic advice.
177
+
178
+ OUTPUT FORMAT:
179
+ Return the complete SKILL.md inside a markdown code fence.
180
+ Must start with YAML frontmatter (--- / name: / description: / ---).
181
+ End with <EVOLUTION_COMPLETE> or <EVOLUTION_FAILED>."""