superlocalmemory 3.4.8 → 3.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,389 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Skill Performance Miner — tracks per-skill effectiveness from tool events.
6
+
7
+ Zero-LLM approach: mines tool_events table for Skill tool invocations,
8
+ builds execution traces from surrounding events, computes approximate
9
+ outcome heuristics, and creates skill-level behavioral assertions.
10
+
11
+ Runs as Step 10 in the consolidation pipeline (after Step 9: soft prompts).
12
+ Depends on enriched tool_events (v3.4.10 hook with input_summary/output_summary).
13
+
14
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import hashlib
20
+ import json
21
+ import logging
22
+ import sqlite3
23
+ from collections import Counter, defaultdict
24
+ from datetime import datetime, timezone
25
+ from pathlib import Path
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ # Thresholds — conservative to avoid hallucinating patterns
30
+ MIN_INVOCATIONS = 5 # Don't create assertions for skills with fewer uses
31
+ MIN_CONFIDENCE = 0.5 # Don't inject into soft prompts below this
32
+ TRACE_WINDOW = 10 # Number of tool events to look at after a Skill call
33
+ RETRY_WINDOW_SECONDS = 300 # 5 minutes — same Skill re-invoked = potential retry
34
+ REINFORCEMENT_NUDGE = 0.10 # Bayesian confidence increase per consolidation cycle
35
+
36
+
37
+ class SkillPerformanceMiner:
38
+ """Mine tool_events for per-skill performance metrics.
39
+
40
+ Discovers patterns like:
41
+ - "brainstorming skill: 82% effective, 47 invocations, best for feature planning"
42
+ - "TDD + code-review used together: +23% effective vs individually"
43
+ - "brainstorm skill degraded: effective_rate dropped from 0.82 to 0.35"
44
+ """
45
+
46
+ def __init__(self, db_path: str | Path):
47
+ self._db_path = str(db_path)
48
+
49
+ def mine(self, profile_id: str = "default") -> dict:
50
+ """Run skill performance mining. Returns summary."""
51
+ result = {
52
+ "skills_found": 0,
53
+ "assertions_created": 0,
54
+ "assertions_reinforced": 0,
55
+ "entities_updated": 0,
56
+ }
57
+
58
+ conn = sqlite3.connect(self._db_path, timeout=10)
59
+ conn.row_factory = sqlite3.Row
60
+
61
+ try:
62
+ # Step 1: Find all Skill tool invocations
63
+ skill_events = self._get_skill_events(conn, profile_id)
64
+ if not skill_events:
65
+ return result
66
+
67
+ # Step 2: Extract skill names from input_summary
68
+ skill_invocations = self._parse_skill_invocations(skill_events)
69
+ result["skills_found"] = len(set(s["skill_name"] for s in skill_invocations))
70
+
71
+ if not skill_invocations:
72
+ return result
73
+
74
+ # Step 3: Build execution traces and compute outcomes
75
+ skill_metrics = self._compute_skill_metrics(
76
+ conn, profile_id, skill_invocations,
77
+ )
78
+
79
+ # Step 4: Create/update behavioral assertions for each skill
80
+ for skill_name, metrics in skill_metrics.items():
81
+ if metrics["total_invocations"] < MIN_INVOCATIONS:
82
+ continue
83
+
84
+ r = self._upsert_skill_assertion(conn, profile_id, skill_name, metrics)
85
+ result[f"assertions_{r}"] = result.get(f"assertions_{r}", 0) + 1
86
+
87
+ # Step 5: Detect skill correlations (pairs used together)
88
+ correlations = self._detect_skill_correlations(skill_invocations)
89
+ for pair, corr_data in correlations.items():
90
+ if corr_data["count"] >= 3:
91
+ self._upsert_correlation_assertion(
92
+ conn, profile_id, pair, corr_data,
93
+ )
94
+
95
+ conn.commit()
96
+ except Exception as exc:
97
+ logger.warning("Skill performance mining failed: %s", exc)
98
+ result["error"] = str(exc)
99
+ finally:
100
+ conn.close()
101
+
102
+ logger.info(
103
+ "Skill performance mining: %d skills, %d assertions",
104
+ result["skills_found"],
105
+ result.get("assertions_created", 0) + result.get("assertions_reinforced", 0),
106
+ )
107
+ return result
108
+
109
+ def _get_skill_events(
110
+ self, conn: sqlite3.Connection, profile_id: str,
111
+ ) -> list[dict]:
112
+ """Get all Skill tool events with enriched data."""
113
+ rows = conn.execute(
114
+ "SELECT id, session_id, tool_name, event_type, input_summary, "
115
+ "output_summary, project_path, created_at "
116
+ "FROM tool_events "
117
+ "WHERE profile_id = ? AND tool_name = 'Skill' "
118
+ "ORDER BY created_at ASC",
119
+ (profile_id,),
120
+ ).fetchall()
121
+ return [dict(r) for r in rows]
122
+
123
+ def _parse_skill_invocations(self, skill_events: list[dict]) -> list[dict]:
124
+ """Extract skill name and args from input_summary JSON."""
125
+ invocations = []
126
+
127
+ for event in skill_events:
128
+ input_raw = event.get("input_summary", "")
129
+ output_raw = event.get("output_summary", "")
130
+ skill_name = ""
131
+
132
+ # Try extracting from input_summary (enriched hook format)
133
+ if input_raw:
134
+ try:
135
+ inp = json.loads(input_raw) if input_raw.startswith("{") else {}
136
+ skill_name = inp.get("skill", "")
137
+ except (json.JSONDecodeError, TypeError):
138
+ pass
139
+
140
+ # Fallback: try output_summary (ECC ingestion format)
141
+ if not skill_name and output_raw:
142
+ try:
143
+ out = json.loads(output_raw) if output_raw.startswith("{") else {}
144
+ skill_name = out.get("commandName", "")
145
+ except (json.JSONDecodeError, TypeError):
146
+ pass
147
+
148
+ if not skill_name:
149
+ continue
150
+
151
+ invocations.append({
152
+ "skill_name": skill_name,
153
+ "session_id": event.get("session_id", ""),
154
+ "event_id": event.get("id", 0),
155
+ "created_at": event.get("created_at", ""),
156
+ "project_path": event.get("project_path", ""),
157
+ })
158
+
159
+ return invocations
160
+
161
+ def _compute_skill_metrics(
162
+ self,
163
+ conn: sqlite3.Connection,
164
+ profile_id: str,
165
+ invocations: list[dict],
166
+ ) -> dict[str, dict]:
167
+ """Compute per-skill metrics using execution trace heuristic.
168
+
169
+ Outcome heuristic (conservative, labeled as APPROXIMATE):
170
+ - Signal 1 (POSITIVE): Productive tools follow (Edit, Write, Bash success)
171
+ - Signal 2 (NEGATIVE): Same Skill re-invoked within 5 min
172
+ - Signal 3 (NEGATIVE): Bash errors in next 3 events
173
+ - Signal 4 (WEAK POSITIVE): Session continues 10+ events
174
+ """
175
+ metrics: dict[str, dict] = defaultdict(lambda: {
176
+ "total_invocations": 0,
177
+ "positive_signals": 0,
178
+ "negative_signals": 0,
179
+ "sessions": set(),
180
+ "projects": set(),
181
+ })
182
+
183
+ for inv in invocations:
184
+ skill = inv["skill_name"]
185
+ m = metrics[skill]
186
+ m["total_invocations"] += 1
187
+ m["sessions"].add(inv["session_id"])
188
+ if inv["project_path"]:
189
+ m["projects"].add(inv["project_path"])
190
+
191
+ # Get surrounding tool events for execution trace
192
+ trace = conn.execute(
193
+ "SELECT tool_name, event_type, output_summary, created_at "
194
+ "FROM tool_events "
195
+ "WHERE profile_id = ? AND id > ? "
196
+ "ORDER BY id ASC LIMIT ?",
197
+ (profile_id, inv["event_id"], TRACE_WINDOW),
198
+ ).fetchall()
199
+
200
+ trace_list = [dict(r) for r in trace]
201
+ outcome = self._evaluate_trace(skill, inv, trace_list, invocations)
202
+
203
+ if outcome > 0:
204
+ m["positive_signals"] += 1
205
+ elif outcome < 0:
206
+ m["negative_signals"] += 1
207
+
208
+ # Compute final metrics per skill
209
+ result = {}
210
+ for skill, m in metrics.items():
211
+ total = m["total_invocations"]
212
+ positive = m["positive_signals"]
213
+ negative = m["negative_signals"]
214
+
215
+ effective_score = (positive - negative) / total if total > 0 else 0.0
216
+ result[skill] = {
217
+ "total_invocations": total,
218
+ "positive_signals": positive,
219
+ "negative_signals": negative,
220
+ "effective_score": round(max(-1.0, min(1.0, effective_score)), 3),
221
+ "session_count": len(m["sessions"]),
222
+ "project_count": len(m["projects"]),
223
+ }
224
+
225
+ return result
226
+
227
+ def _evaluate_trace(
228
+ self,
229
+ skill_name: str,
230
+ invocation: dict,
231
+ trace: list[dict],
232
+ all_invocations: list[dict],
233
+ ) -> int:
234
+ """Evaluate execution trace after a Skill call. Returns +1, 0, or -1."""
235
+ if not trace:
236
+ return 0
237
+
238
+ score = 0
239
+
240
+ # Signal 1: Productive tools in trace → +1
241
+ productive_tools = {"Edit", "Write"}
242
+ if any(t["tool_name"] in productive_tools for t in trace[:TRACE_WINDOW]):
243
+ score += 1
244
+
245
+ # Signal 2: Same Skill re-invoked within RETRY_WINDOW → -1
246
+ inv_time = invocation.get("created_at", "")
247
+ for other in all_invocations:
248
+ if other["event_id"] == invocation["event_id"]:
249
+ continue
250
+ if other["skill_name"] != skill_name:
251
+ continue
252
+ if other["session_id"] != invocation["session_id"]:
253
+ continue
254
+ try:
255
+ t1 = datetime.fromisoformat(inv_time.replace("Z", "+00:00"))
256
+ t2 = datetime.fromisoformat(
257
+ other["created_at"].replace("Z", "+00:00"),
258
+ )
259
+ delta = abs((t2 - t1).total_seconds())
260
+ if 0 < delta <= RETRY_WINDOW_SECONDS:
261
+ score -= 1
262
+ break
263
+ except (ValueError, TypeError):
264
+ pass
265
+
266
+ # Signal 3: Bash errors in first 3 events → -1
267
+ for t in trace[:3]:
268
+ if t["tool_name"] == "Bash":
269
+ output = t.get("output_summary", "")
270
+ if output and any(
271
+ kw in output.lower()
272
+ for kw in ("error", "failed", "command not found", "permission denied")
273
+ ):
274
+ score -= 1
275
+ break
276
+
277
+ # Clamp to [-1, +1]
278
+ return max(-1, min(1, score))
279
+
280
+ def _detect_skill_correlations(
281
+ self, invocations: list[dict],
282
+ ) -> dict[tuple[str, str], dict]:
283
+ """Find skills frequently used together in the same session."""
284
+ session_skills: dict[str, set[str]] = defaultdict(set)
285
+ for inv in invocations:
286
+ session_skills[inv["session_id"]].add(inv["skill_name"])
287
+
288
+ pair_counts: Counter = Counter()
289
+ for skills in session_skills.values():
290
+ skill_list = sorted(skills)
291
+ for i in range(len(skill_list)):
292
+ for j in range(i + 1, len(skill_list)):
293
+ pair_counts[(skill_list[i], skill_list[j])] += 1
294
+
295
+ return {
296
+ pair: {"count": count, "sessions": count}
297
+ for pair, count in pair_counts.most_common(10)
298
+ if count >= 2
299
+ }
300
+
301
+ def _upsert_skill_assertion(
302
+ self,
303
+ conn: sqlite3.Connection,
304
+ profile_id: str,
305
+ skill_name: str,
306
+ metrics: dict,
307
+ ) -> str:
308
+ """Create or reinforce a skill performance assertion."""
309
+ now = datetime.now(timezone.utc).isoformat()
310
+ eff = metrics["effective_score"]
311
+ total = metrics["total_invocations"]
312
+
313
+ trigger = f"when considering skill {skill_name}"
314
+ action = (
315
+ f"effective score: {eff:.0%} (approximate, {total} invocations, "
316
+ f"{metrics['session_count']} sessions)"
317
+ )
318
+
319
+ assertion_id = hashlib.sha256(
320
+ f"{profile_id}:skill_perf:{skill_name}".encode(),
321
+ ).hexdigest()[:16]
322
+
323
+ existing = conn.execute(
324
+ "SELECT id, confidence FROM behavioral_assertions WHERE id = ?",
325
+ (assertion_id,),
326
+ ).fetchone()
327
+
328
+ confidence = min(0.85, max(0.3, abs(eff) * 0.8 + total / 100))
329
+
330
+ if existing:
331
+ old_conf = dict(existing)["confidence"]
332
+ new_conf = old_conf + (1.0 - old_conf) * REINFORCEMENT_NUDGE
333
+ conn.execute(
334
+ "UPDATE behavioral_assertions SET "
335
+ "action = ?, confidence = ?, evidence_count = ?, "
336
+ "reinforcement_count = reinforcement_count + 1, "
337
+ "last_reinforced_at = ?, updated_at = ? WHERE id = ?",
338
+ (action, round(min(0.95, new_conf), 4), total, now, now, assertion_id),
339
+ )
340
+ return "reinforced"
341
+ else:
342
+ conn.execute(
343
+ "INSERT INTO behavioral_assertions "
344
+ "(id, profile_id, project_path, trigger_condition, action, "
345
+ " category, confidence, evidence_count, source, created_at, updated_at) "
346
+ "VALUES (?, ?, '', ?, ?, 'skill_performance', ?, ?, 'skill_miner', ?, ?)",
347
+ (assertion_id, profile_id, trigger, action,
348
+ round(confidence, 4), total, now, now),
349
+ )
350
+ return "created"
351
+
352
+ def _upsert_correlation_assertion(
353
+ self,
354
+ conn: sqlite3.Connection,
355
+ profile_id: str,
356
+ pair: tuple[str, str],
357
+ corr_data: dict,
358
+ ) -> None:
359
+ """Create assertion for skill correlation."""
360
+ now = datetime.now(timezone.utc).isoformat()
361
+ trigger = f"when using {pair[0]}"
362
+ action = f"often paired with {pair[1]} ({corr_data['count']} sessions together)"
363
+
364
+ assertion_id = hashlib.sha256(
365
+ f"{profile_id}:skill_corr:{pair[0]}:{pair[1]}".encode(),
366
+ ).hexdigest()[:16]
367
+
368
+ existing = conn.execute(
369
+ "SELECT id FROM behavioral_assertions WHERE id = ?",
370
+ (assertion_id,),
371
+ ).fetchone()
372
+
373
+ if existing:
374
+ conn.execute(
375
+ "UPDATE behavioral_assertions SET "
376
+ "action = ?, reinforcement_count = reinforcement_count + 1, "
377
+ "last_reinforced_at = ?, updated_at = ? WHERE id = ?",
378
+ (action, now, now, assertion_id),
379
+ )
380
+ else:
381
+ conn.execute(
382
+ "INSERT INTO behavioral_assertions "
383
+ "(id, profile_id, project_path, trigger_condition, action, "
384
+ " category, confidence, evidence_count, source, created_at, updated_at) "
385
+ "VALUES (?, ?, '', ?, ?, 'skill_correlation', ?, ?, 'skill_miner', ?, ?)",
386
+ (assertion_id, profile_id, trigger, action,
387
+ round(min(0.7, corr_data["count"] / 10), 4),
388
+ corr_data["count"], now, now),
389
+ )