superlocalmemory 2.6.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +167 -1803
- package/README.md +212 -397
- package/bin/slm +179 -3
- package/bin/superlocalmemoryv2:learning +4 -0
- package/bin/superlocalmemoryv2:patterns +4 -0
- package/docs/ACCESSIBILITY.md +291 -0
- package/docs/ARCHITECTURE.md +12 -6
- package/docs/FRAMEWORK-INTEGRATIONS.md +300 -0
- package/docs/MCP-MANUAL-SETUP.md +14 -4
- package/install.sh +99 -3
- package/mcp_server.py +291 -1
- package/package.json +2 -1
- package/requirements-learning.txt +12 -0
- package/scripts/verify-v27.sh +233 -0
- package/skills/slm-show-patterns/SKILL.md +224 -0
- package/src/learning/__init__.py +201 -0
- package/src/learning/adaptive_ranker.py +826 -0
- package/src/learning/cross_project_aggregator.py +866 -0
- package/src/learning/engagement_tracker.py +638 -0
- package/src/learning/feature_extractor.py +461 -0
- package/src/learning/feedback_collector.py +690 -0
- package/src/learning/learning_db.py +842 -0
- package/src/learning/project_context_manager.py +582 -0
- package/src/learning/source_quality_scorer.py +685 -0
- package/src/learning/synthetic_bootstrap.py +1047 -0
- package/src/learning/tests/__init__.py +0 -0
- package/src/learning/tests/test_adaptive_ranker.py +328 -0
- package/src/learning/tests/test_aggregator.py +309 -0
- package/src/learning/tests/test_feedback_collector.py +295 -0
- package/src/learning/tests/test_learning_db.py +606 -0
- package/src/learning/tests/test_project_context.py +296 -0
- package/src/learning/tests/test_source_quality.py +355 -0
- package/src/learning/tests/test_synthetic_bootstrap.py +433 -0
- package/src/learning/tests/test_workflow_miner.py +322 -0
- package/src/learning/workflow_pattern_miner.py +665 -0
- package/ui/index.html +346 -13
- package/ui/js/clusters.js +90 -1
- package/ui/js/graph-core.js +445 -0
- package/ui/js/graph-cytoscape-monolithic-backup.js +1168 -0
- package/ui/js/graph-cytoscape.js +1168 -0
- package/ui/js/graph-d3-backup.js +32 -0
- package/ui/js/graph-filters.js +220 -0
- package/ui/js/graph-interactions.js +354 -0
- package/ui/js/graph-ui.js +214 -0
- package/ui/js/memories.js +52 -0
- package/ui/js/modal.js +104 -1
|
@@ -0,0 +1,866 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
SuperLocalMemory V2 - Cross-Project Aggregator (v2.7)
|
|
4
|
+
Copyright (c) 2026 Varun Pratap Bhardwaj
|
|
5
|
+
Licensed under MIT License
|
|
6
|
+
|
|
7
|
+
Repository: https://github.com/varun369/SuperLocalMemoryV2
|
|
8
|
+
Author: Varun Pratap Bhardwaj (Solution Architect)
|
|
9
|
+
|
|
10
|
+
NOTICE: This software is protected by MIT License.
|
|
11
|
+
Attribution must be preserved in all copies or derivatives.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
CrossProjectAggregator — Layer 1: Transferable Tech Preferences.
|
|
16
|
+
|
|
17
|
+
Aggregates technology preferences across ALL user profiles by wrapping
|
|
18
|
+
the existing FrequencyAnalyzer from pattern_learner.py. This module
|
|
19
|
+
does NOT replace pattern_learner.py — it sits on top, reading its
|
|
20
|
+
per-profile results and merging them into cross-project patterns stored
|
|
21
|
+
in learning.db's `transferable_patterns` table.
|
|
22
|
+
|
|
23
|
+
Key behaviors:
|
|
24
|
+
- Reads memories from memory.db across all profiles (READ-ONLY)
|
|
25
|
+
- Wraps FrequencyAnalyzer.analyze_preferences() for per-profile analysis
|
|
26
|
+
- Merges profile results with exponential temporal decay (1-year half-life)
|
|
27
|
+
- Detects contradictions when preferences change across profiles or time
|
|
28
|
+
- Stores merged patterns in learning.db via LearningDB.upsert_transferable_pattern()
|
|
29
|
+
|
|
30
|
+
Temporal Decay:
|
|
31
|
+
weight = exp(-age_days / 365)
|
|
32
|
+
This gives a 1-year half-life: memories from 365 days ago contribute ~37%
|
|
33
|
+
of their original weight. Recent profiles dominate, but old preferences
|
|
34
|
+
are not forgotten unless contradicted.
|
|
35
|
+
|
|
36
|
+
Contradiction Detection:
|
|
37
|
+
If the preferred value for a category changed within the last 90 days
|
|
38
|
+
(comparing the current top choice against previous stored value),
|
|
39
|
+
a contradiction is logged. This signals preference evolution — not an
|
|
40
|
+
error. The adaptive ranker can use contradictions to weight recent
|
|
41
|
+
preferences higher.
|
|
42
|
+
|
|
43
|
+
Research Backing:
|
|
44
|
+
- MACLA (arXiv:2512.18950): Bayesian confidence with temporal priors
|
|
45
|
+
- MemoryBank (AAAI 2024): Cross-session preference persistence
|
|
46
|
+
- Pattern originally from pattern_learner.py Layer 4
|
|
47
|
+
|
|
48
|
+
Thread Safety:
|
|
49
|
+
Write operations to learning.db are protected by LearningDB's internal
|
|
50
|
+
write lock. Read operations to memory.db use per-call connections (SQLite
|
|
51
|
+
WAL mode supports concurrent reads).
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
import json
|
|
55
|
+
import logging
|
|
56
|
+
import math
|
|
57
|
+
import sqlite3
|
|
58
|
+
import sys
|
|
59
|
+
import threading
|
|
60
|
+
from datetime import datetime, timedelta
|
|
61
|
+
from pathlib import Path
|
|
62
|
+
from typing import Dict, List, Optional, Any, Tuple
|
|
63
|
+
|
|
64
|
+
logger = logging.getLogger("superlocalmemory.learning.aggregator")
|
|
65
|
+
|
|
66
|
+
# ---------------------------------------------------------------------------
|
|
67
|
+
# Import FrequencyAnalyzer from pattern_learner.py (lives in ~/.claude-memory/)
|
|
68
|
+
# ---------------------------------------------------------------------------
|
|
69
|
+
MEMORY_DIR = Path.home() / ".claude-memory"
|
|
70
|
+
DEFAULT_MEMORY_DB = MEMORY_DIR / "memory.db"
|
|
71
|
+
|
|
72
|
+
if str(MEMORY_DIR) not in sys.path:
|
|
73
|
+
sys.path.insert(0, str(MEMORY_DIR))
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
from pattern_learner import FrequencyAnalyzer
|
|
77
|
+
HAS_FREQ_ANALYZER = True
|
|
78
|
+
except ImportError:
|
|
79
|
+
HAS_FREQ_ANALYZER = False
|
|
80
|
+
logger.warning(
|
|
81
|
+
"FrequencyAnalyzer not available. "
|
|
82
|
+
"Ensure pattern_learner.py is in %s",
|
|
83
|
+
MEMORY_DIR,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# ---------------------------------------------------------------------------
|
|
87
|
+
# Import LearningDB (sibling module in src/learning/)
|
|
88
|
+
# ---------------------------------------------------------------------------
|
|
89
|
+
try:
|
|
90
|
+
from .learning_db import LearningDB
|
|
91
|
+
except ImportError:
|
|
92
|
+
try:
|
|
93
|
+
from learning_db import LearningDB
|
|
94
|
+
except ImportError:
|
|
95
|
+
LearningDB = None
|
|
96
|
+
logger.warning("LearningDB not available — aggregator results will not persist.")
|
|
97
|
+
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
# Constants
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
|
|
102
|
+
# Temporal decay half-life: 365 days (1 year)
|
|
103
|
+
DECAY_HALF_LIFE_DAYS = 365.0
|
|
104
|
+
|
|
105
|
+
# Contradiction detection window: 90 days
|
|
106
|
+
CONTRADICTION_WINDOW_DAYS = 90
|
|
107
|
+
|
|
108
|
+
# Minimum evidence to consider a pattern valid for merging
|
|
109
|
+
MIN_EVIDENCE_FOR_MERGE = 2
|
|
110
|
+
|
|
111
|
+
# Minimum confidence for a merged pattern to be stored
|
|
112
|
+
MIN_MERGE_CONFIDENCE = 0.3
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class CrossProjectAggregator:
|
|
116
|
+
"""
|
|
117
|
+
Aggregates tech preferences across all user profiles.
|
|
118
|
+
|
|
119
|
+
Wraps FrequencyAnalyzer to analyze per-profile memories, then merges
|
|
120
|
+
results with temporal decay into transferable patterns stored in
|
|
121
|
+
learning.db.
|
|
122
|
+
|
|
123
|
+
Usage:
|
|
124
|
+
aggregator = CrossProjectAggregator()
|
|
125
|
+
results = aggregator.aggregate_all_profiles()
|
|
126
|
+
prefs = aggregator.get_tech_preferences(min_confidence=0.6)
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
def __init__(
|
|
130
|
+
self,
|
|
131
|
+
memory_db_path: Optional[Path] = None,
|
|
132
|
+
learning_db: Optional[Any] = None,
|
|
133
|
+
):
|
|
134
|
+
"""
|
|
135
|
+
Initialize the cross-project aggregator.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
memory_db_path: Path to memory.db. Defaults to ~/.claude-memory/memory.db.
|
|
139
|
+
This database is READ-ONLY from this module's perspective.
|
|
140
|
+
learning_db: A LearningDB instance for storing results. If None, one is
|
|
141
|
+
created using the default path.
|
|
142
|
+
"""
|
|
143
|
+
self.memory_db_path = Path(memory_db_path) if memory_db_path else DEFAULT_MEMORY_DB
|
|
144
|
+
self._lock = threading.Lock()
|
|
145
|
+
|
|
146
|
+
# Initialize LearningDB for storing aggregated patterns
|
|
147
|
+
if learning_db is not None:
|
|
148
|
+
self._learning_db = learning_db
|
|
149
|
+
elif LearningDB is not None:
|
|
150
|
+
try:
|
|
151
|
+
self._learning_db = LearningDB.get_instance()
|
|
152
|
+
except Exception as e:
|
|
153
|
+
logger.error("Failed to initialize LearningDB: %s", e)
|
|
154
|
+
self._learning_db = None
|
|
155
|
+
else:
|
|
156
|
+
self._learning_db = None
|
|
157
|
+
|
|
158
|
+
# Initialize FrequencyAnalyzer if available
|
|
159
|
+
if HAS_FREQ_ANALYZER:
|
|
160
|
+
self._analyzer = FrequencyAnalyzer(self.memory_db_path)
|
|
161
|
+
else:
|
|
162
|
+
self._analyzer = None
|
|
163
|
+
|
|
164
|
+
logger.info(
|
|
165
|
+
"CrossProjectAggregator initialized: memory_db=%s, "
|
|
166
|
+
"freq_analyzer=%s, learning_db=%s",
|
|
167
|
+
self.memory_db_path,
|
|
168
|
+
"available" if self._analyzer else "unavailable",
|
|
169
|
+
"available" if self._learning_db else "unavailable",
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# ======================================================================
|
|
173
|
+
# Core Aggregation
|
|
174
|
+
# ======================================================================
|
|
175
|
+
|
|
176
|
+
def aggregate_all_profiles(self) -> Dict[str, dict]:
|
|
177
|
+
"""
|
|
178
|
+
Aggregate tech preferences across ALL profiles in memory.db.
|
|
179
|
+
|
|
180
|
+
Workflow:
|
|
181
|
+
1. List all distinct profiles from memory.db
|
|
182
|
+
2. For each profile, collect memory IDs and timestamps
|
|
183
|
+
3. Run FrequencyAnalyzer.analyze_preferences() per profile
|
|
184
|
+
4. Merge results with exponential temporal decay
|
|
185
|
+
5. Detect contradictions against previously stored patterns
|
|
186
|
+
6. Store merged patterns in learning.db
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
Dict mapping pattern_key -> {value, confidence, evidence_count,
|
|
190
|
+
profiles_seen, contradictions, decay_factor}
|
|
191
|
+
"""
|
|
192
|
+
if not self._analyzer:
|
|
193
|
+
logger.warning("FrequencyAnalyzer unavailable — cannot aggregate.")
|
|
194
|
+
return {}
|
|
195
|
+
|
|
196
|
+
# Step 1: List all profiles and their memory data
|
|
197
|
+
profile_data = self._get_all_profile_data()
|
|
198
|
+
if not profile_data:
|
|
199
|
+
logger.info("No profiles found in memory.db — nothing to aggregate.")
|
|
200
|
+
return {}
|
|
201
|
+
|
|
202
|
+
logger.info(
|
|
203
|
+
"Aggregating preferences across %d profile(s): %s",
|
|
204
|
+
len(profile_data),
|
|
205
|
+
", ".join(p["profile"] for p in profile_data),
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Step 2-3: Analyze each profile
|
|
209
|
+
profile_patterns = []
|
|
210
|
+
for pdata in profile_data:
|
|
211
|
+
profile_name = pdata["profile"]
|
|
212
|
+
memory_ids = pdata["memory_ids"]
|
|
213
|
+
|
|
214
|
+
if not memory_ids:
|
|
215
|
+
logger.debug("Profile '%s' has no memories — skipping.", profile_name)
|
|
216
|
+
continue
|
|
217
|
+
|
|
218
|
+
try:
|
|
219
|
+
patterns = self._analyzer.analyze_preferences(memory_ids)
|
|
220
|
+
if patterns:
|
|
221
|
+
profile_patterns.append({
|
|
222
|
+
"profile": profile_name,
|
|
223
|
+
"patterns": patterns,
|
|
224
|
+
"latest_timestamp": pdata["latest_timestamp"],
|
|
225
|
+
"memory_count": len(memory_ids),
|
|
226
|
+
})
|
|
227
|
+
logger.debug(
|
|
228
|
+
"Profile '%s': %d patterns from %d memories",
|
|
229
|
+
profile_name, len(patterns), len(memory_ids),
|
|
230
|
+
)
|
|
231
|
+
except Exception as e:
|
|
232
|
+
logger.error(
|
|
233
|
+
"Failed to analyze profile '%s': %s",
|
|
234
|
+
profile_name, e,
|
|
235
|
+
)
|
|
236
|
+
continue
|
|
237
|
+
|
|
238
|
+
if not profile_patterns:
|
|
239
|
+
logger.info("No patterns found across any profile.")
|
|
240
|
+
return {}
|
|
241
|
+
|
|
242
|
+
# Step 4: Merge with temporal decay
|
|
243
|
+
merged = self._merge_with_decay(profile_patterns)
|
|
244
|
+
|
|
245
|
+
# Step 5: Detect contradictions
|
|
246
|
+
for key, pattern_data in merged.items():
|
|
247
|
+
contradictions = self._detect_contradictions(key, pattern_data)
|
|
248
|
+
pattern_data["contradictions"] = contradictions
|
|
249
|
+
|
|
250
|
+
# Step 6: Store in learning.db
|
|
251
|
+
self._store_merged_patterns(merged)
|
|
252
|
+
|
|
253
|
+
logger.info(
|
|
254
|
+
"Aggregation complete: %d transferable patterns stored.",
|
|
255
|
+
len(merged),
|
|
256
|
+
)
|
|
257
|
+
return merged
|
|
258
|
+
|
|
259
|
+
# ======================================================================
|
|
260
|
+
# Profile Data Extraction (READ-ONLY on memory.db)
|
|
261
|
+
# ======================================================================
|
|
262
|
+
|
|
263
|
+
def _get_all_profile_data(self) -> List[dict]:
|
|
264
|
+
"""
|
|
265
|
+
Get all profiles and their memory IDs from memory.db.
|
|
266
|
+
|
|
267
|
+
Returns list of {profile, memory_ids, latest_timestamp, memory_count}.
|
|
268
|
+
"""
|
|
269
|
+
results = []
|
|
270
|
+
|
|
271
|
+
try:
|
|
272
|
+
conn = sqlite3.connect(str(self.memory_db_path), timeout=10)
|
|
273
|
+
conn.execute("PRAGMA busy_timeout=5000")
|
|
274
|
+
cursor = conn.cursor()
|
|
275
|
+
|
|
276
|
+
# Get distinct profiles
|
|
277
|
+
cursor.execute(
|
|
278
|
+
"SELECT DISTINCT profile FROM memories "
|
|
279
|
+
"WHERE profile IS NOT NULL ORDER BY profile"
|
|
280
|
+
)
|
|
281
|
+
profiles = [row[0] for row in cursor.fetchall()]
|
|
282
|
+
|
|
283
|
+
if not profiles:
|
|
284
|
+
# Fallback: if no profile column or all NULL, treat as 'default'
|
|
285
|
+
cursor.execute("SELECT id FROM memories ORDER BY created_at")
|
|
286
|
+
all_ids = [row[0] for row in cursor.fetchall()]
|
|
287
|
+
if all_ids:
|
|
288
|
+
# Get the latest timestamp
|
|
289
|
+
cursor.execute(
|
|
290
|
+
"SELECT MAX(created_at) FROM memories"
|
|
291
|
+
)
|
|
292
|
+
latest = cursor.fetchone()[0] or datetime.now().isoformat()
|
|
293
|
+
results.append({
|
|
294
|
+
"profile": "default",
|
|
295
|
+
"memory_ids": all_ids,
|
|
296
|
+
"latest_timestamp": latest,
|
|
297
|
+
})
|
|
298
|
+
conn.close()
|
|
299
|
+
return results
|
|
300
|
+
|
|
301
|
+
# For each profile, get memory IDs and latest timestamp
|
|
302
|
+
for profile in profiles:
|
|
303
|
+
cursor.execute(
|
|
304
|
+
"SELECT id FROM memories WHERE profile = ? ORDER BY created_at",
|
|
305
|
+
(profile,),
|
|
306
|
+
)
|
|
307
|
+
memory_ids = [row[0] for row in cursor.fetchall()]
|
|
308
|
+
|
|
309
|
+
cursor.execute(
|
|
310
|
+
"SELECT MAX(created_at) FROM memories WHERE profile = ?",
|
|
311
|
+
(profile,),
|
|
312
|
+
)
|
|
313
|
+
latest = cursor.fetchone()[0] or datetime.now().isoformat()
|
|
314
|
+
|
|
315
|
+
if memory_ids:
|
|
316
|
+
results.append({
|
|
317
|
+
"profile": profile,
|
|
318
|
+
"memory_ids": memory_ids,
|
|
319
|
+
"latest_timestamp": latest,
|
|
320
|
+
})
|
|
321
|
+
|
|
322
|
+
conn.close()
|
|
323
|
+
|
|
324
|
+
except sqlite3.OperationalError as e:
|
|
325
|
+
# Handle case where 'profile' column doesn't exist
|
|
326
|
+
logger.warning(
|
|
327
|
+
"Could not query profiles from memory.db: %s. "
|
|
328
|
+
"Falling back to all memories as 'default' profile.",
|
|
329
|
+
e,
|
|
330
|
+
)
|
|
331
|
+
try:
|
|
332
|
+
conn = sqlite3.connect(str(self.memory_db_path), timeout=10)
|
|
333
|
+
cursor = conn.cursor()
|
|
334
|
+
cursor.execute("SELECT id FROM memories ORDER BY created_at")
|
|
335
|
+
all_ids = [row[0] for row in cursor.fetchall()]
|
|
336
|
+
if all_ids:
|
|
337
|
+
cursor.execute("SELECT MAX(created_at) FROM memories")
|
|
338
|
+
latest = cursor.fetchone()[0] or datetime.now().isoformat()
|
|
339
|
+
results.append({
|
|
340
|
+
"profile": "default",
|
|
341
|
+
"memory_ids": all_ids,
|
|
342
|
+
"latest_timestamp": latest,
|
|
343
|
+
})
|
|
344
|
+
conn.close()
|
|
345
|
+
except Exception as inner_e:
|
|
346
|
+
logger.error("Failed to read memory.db: %s", inner_e)
|
|
347
|
+
|
|
348
|
+
except Exception as e:
|
|
349
|
+
logger.error("Unexpected error reading profiles: %s", e)
|
|
350
|
+
|
|
351
|
+
return results
|
|
352
|
+
|
|
353
|
+
# ======================================================================
|
|
354
|
+
# Temporal Decay Merging
|
|
355
|
+
# ======================================================================
|
|
356
|
+
|
|
357
|
+
def _merge_with_decay(
|
|
358
|
+
self,
|
|
359
|
+
profile_patterns: List[dict],
|
|
360
|
+
) -> Dict[str, dict]:
|
|
361
|
+
"""
|
|
362
|
+
Merge per-profile patterns with exponential temporal decay.
|
|
363
|
+
|
|
364
|
+
Each profile's contribution is weighted by:
|
|
365
|
+
weight = exp(-age_days / DECAY_HALF_LIFE_DAYS)
|
|
366
|
+
|
|
367
|
+
where age_days is the number of days since the profile's most
|
|
368
|
+
recent memory was created. This ensures recent profiles dominate
|
|
369
|
+
while old preferences decay gracefully.
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
profile_patterns: List of {profile, patterns, latest_timestamp, memory_count}
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
Dict[category_key, {value, confidence, evidence_count, profiles_seen,
|
|
376
|
+
decay_factor, profile_history}]
|
|
377
|
+
"""
|
|
378
|
+
now = datetime.now()
|
|
379
|
+
|
|
380
|
+
# Collect all contributions per category key
|
|
381
|
+
# key -> list of {value, confidence, evidence_count, weight, profile}
|
|
382
|
+
contributions: Dict[str, List[dict]] = {}
|
|
383
|
+
|
|
384
|
+
for pdata in profile_patterns:
|
|
385
|
+
# Calculate temporal weight for this profile
|
|
386
|
+
age_days = self._days_since(pdata["latest_timestamp"], now)
|
|
387
|
+
weight = math.exp(-age_days / DECAY_HALF_LIFE_DAYS)
|
|
388
|
+
|
|
389
|
+
for category_key, pattern in pdata["patterns"].items():
|
|
390
|
+
if category_key not in contributions:
|
|
391
|
+
contributions[category_key] = []
|
|
392
|
+
|
|
393
|
+
contributions[category_key].append({
|
|
394
|
+
"value": pattern.get("value", ""),
|
|
395
|
+
"confidence": pattern.get("confidence", 0.0),
|
|
396
|
+
"evidence_count": pattern.get("evidence_count", 0),
|
|
397
|
+
"weight": weight,
|
|
398
|
+
"profile": pdata["profile"],
|
|
399
|
+
"latest_timestamp": pdata["latest_timestamp"],
|
|
400
|
+
})
|
|
401
|
+
|
|
402
|
+
# Merge contributions per category
|
|
403
|
+
merged = {}
|
|
404
|
+
for category_key, contribs in contributions.items():
|
|
405
|
+
merged_pattern = self._merge_category_contributions(
|
|
406
|
+
category_key, contribs
|
|
407
|
+
)
|
|
408
|
+
if merged_pattern is not None:
|
|
409
|
+
merged[category_key] = merged_pattern
|
|
410
|
+
|
|
411
|
+
return merged
|
|
412
|
+
|
|
413
|
+
def _merge_category_contributions(
|
|
414
|
+
self,
|
|
415
|
+
category_key: str,
|
|
416
|
+
contributions: List[dict],
|
|
417
|
+
) -> Optional[dict]:
|
|
418
|
+
"""
|
|
419
|
+
Merge contributions for a single category across profiles.
|
|
420
|
+
|
|
421
|
+
Strategy:
|
|
422
|
+
1. Group contributions by value (the preferred tech)
|
|
423
|
+
2. For each value, sum weighted evidence
|
|
424
|
+
3. The value with highest weighted evidence wins
|
|
425
|
+
4. Confidence = weighted_evidence / total_weighted_evidence
|
|
426
|
+
"""
|
|
427
|
+
if not contributions:
|
|
428
|
+
return None
|
|
429
|
+
|
|
430
|
+
# Group by value
|
|
431
|
+
value_scores: Dict[str, float] = {}
|
|
432
|
+
value_evidence: Dict[str, int] = {}
|
|
433
|
+
value_profiles: Dict[str, set] = {}
|
|
434
|
+
value_weights: Dict[str, float] = {}
|
|
435
|
+
|
|
436
|
+
total_weighted_evidence = 0.0
|
|
437
|
+
|
|
438
|
+
for contrib in contributions:
|
|
439
|
+
value = contrib["value"]
|
|
440
|
+
weighted_ev = contrib["evidence_count"] * contrib["weight"]
|
|
441
|
+
|
|
442
|
+
if value not in value_scores:
|
|
443
|
+
value_scores[value] = 0.0
|
|
444
|
+
value_evidence[value] = 0
|
|
445
|
+
value_profiles[value] = set()
|
|
446
|
+
value_weights[value] = 0.0
|
|
447
|
+
|
|
448
|
+
value_scores[value] += weighted_ev
|
|
449
|
+
value_evidence[value] += contrib["evidence_count"]
|
|
450
|
+
value_profiles[value].add(contrib["profile"])
|
|
451
|
+
value_weights[value] = max(value_weights[value], contrib["weight"])
|
|
452
|
+
total_weighted_evidence += weighted_ev
|
|
453
|
+
|
|
454
|
+
if total_weighted_evidence == 0:
|
|
455
|
+
return None
|
|
456
|
+
|
|
457
|
+
# Find the winning value
|
|
458
|
+
winning_value = max(value_scores, key=value_scores.get)
|
|
459
|
+
winning_score = value_scores[winning_value]
|
|
460
|
+
|
|
461
|
+
# Calculate merged confidence
|
|
462
|
+
confidence = winning_score / total_weighted_evidence if total_weighted_evidence > 0 else 0.0
|
|
463
|
+
|
|
464
|
+
total_evidence = sum(value_evidence.values())
|
|
465
|
+
winning_evidence = value_evidence[winning_value]
|
|
466
|
+
|
|
467
|
+
if winning_evidence < MIN_EVIDENCE_FOR_MERGE:
|
|
468
|
+
return None
|
|
469
|
+
|
|
470
|
+
if confidence < MIN_MERGE_CONFIDENCE:
|
|
471
|
+
return None
|
|
472
|
+
|
|
473
|
+
# Average decay factor across contributing profiles for the winner
|
|
474
|
+
winning_decay = value_weights[winning_value]
|
|
475
|
+
|
|
476
|
+
# Build profile history for contradiction detection
|
|
477
|
+
profile_history = []
|
|
478
|
+
for contrib in contributions:
|
|
479
|
+
profile_history.append({
|
|
480
|
+
"profile": contrib["profile"],
|
|
481
|
+
"value": contrib["value"],
|
|
482
|
+
"confidence": round(contrib["confidence"], 3),
|
|
483
|
+
"weight": round(contrib["weight"], 3),
|
|
484
|
+
"timestamp": contrib["latest_timestamp"],
|
|
485
|
+
})
|
|
486
|
+
|
|
487
|
+
return {
|
|
488
|
+
"value": winning_value,
|
|
489
|
+
"confidence": round(min(0.95, confidence), 3),
|
|
490
|
+
"evidence_count": winning_evidence,
|
|
491
|
+
"profiles_seen": len(value_profiles[winning_value]),
|
|
492
|
+
"total_profiles": len(set(c["profile"] for c in contributions)),
|
|
493
|
+
"decay_factor": round(winning_decay, 4),
|
|
494
|
+
"profile_history": profile_history,
|
|
495
|
+
"contradictions": [], # Filled in by _detect_contradictions
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
# ======================================================================
|
|
499
|
+
# Contradiction Detection
|
|
500
|
+
# ======================================================================
|
|
501
|
+
|
|
502
|
+
def _detect_contradictions(
|
|
503
|
+
self,
|
|
504
|
+
pattern_key: str,
|
|
505
|
+
pattern_data: dict,
|
|
506
|
+
) -> List[str]:
|
|
507
|
+
"""
|
|
508
|
+
Detect if the preferred value changed recently.
|
|
509
|
+
|
|
510
|
+
A contradiction is logged when:
|
|
511
|
+
1. The current winning value differs from the previously stored value
|
|
512
|
+
2. The change happened within the last CONTRADICTION_WINDOW_DAYS
|
|
513
|
+
3. Multiple profiles disagree on the preferred value
|
|
514
|
+
|
|
515
|
+
Contradictions are informational — they signal preference evolution,
|
|
516
|
+
not errors. The adaptive ranker uses them to weight recent preferences.
|
|
517
|
+
|
|
518
|
+
Args:
|
|
519
|
+
pattern_key: Category key (e.g., 'frontend_framework')
|
|
520
|
+
pattern_data: Merged pattern data with profile_history
|
|
521
|
+
|
|
522
|
+
Returns:
|
|
523
|
+
List of contradiction description strings.
|
|
524
|
+
"""
|
|
525
|
+
contradictions = []
|
|
526
|
+
current_value = pattern_data["value"]
|
|
527
|
+
|
|
528
|
+
# Check 1: Cross-profile disagreement
|
|
529
|
+
profile_history = pattern_data.get("profile_history", [])
|
|
530
|
+
distinct_values = set(h["value"] for h in profile_history)
|
|
531
|
+
|
|
532
|
+
if len(distinct_values) > 1:
|
|
533
|
+
other_values = distinct_values - {current_value}
|
|
534
|
+
for other_val in other_values:
|
|
535
|
+
disagreeing_profiles = [
|
|
536
|
+
h["profile"] for h in profile_history
|
|
537
|
+
if h["value"] == other_val
|
|
538
|
+
]
|
|
539
|
+
contradictions.append(
|
|
540
|
+
"Profile(s) %s prefer '%s' instead of '%s'" % (
|
|
541
|
+
", ".join(disagreeing_profiles),
|
|
542
|
+
other_val,
|
|
543
|
+
current_value,
|
|
544
|
+
)
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
# Check 2: Change from previously stored value (in learning.db)
|
|
548
|
+
if self._learning_db is not None:
|
|
549
|
+
try:
|
|
550
|
+
stored = self._learning_db.get_transferable_patterns(
|
|
551
|
+
min_confidence=0.0,
|
|
552
|
+
pattern_type="preference",
|
|
553
|
+
)
|
|
554
|
+
for row in stored:
|
|
555
|
+
if row.get("key") == pattern_key:
|
|
556
|
+
old_value = row.get("value", "")
|
|
557
|
+
old_updated = row.get("updated_at") or row.get("last_seen")
|
|
558
|
+
if old_value and old_value != current_value:
|
|
559
|
+
# Check if the old pattern was updated recently
|
|
560
|
+
if old_updated and self._is_within_window(
|
|
561
|
+
old_updated, CONTRADICTION_WINDOW_DAYS
|
|
562
|
+
):
|
|
563
|
+
contradictions.append(
|
|
564
|
+
"Preference changed from '%s' to '%s' "
|
|
565
|
+
"within last %d days" % (
|
|
566
|
+
old_value,
|
|
567
|
+
current_value,
|
|
568
|
+
CONTRADICTION_WINDOW_DAYS,
|
|
569
|
+
)
|
|
570
|
+
)
|
|
571
|
+
break
|
|
572
|
+
except Exception as e:
|
|
573
|
+
logger.debug(
|
|
574
|
+
"Could not check stored patterns for contradictions: %s", e
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
if contradictions:
|
|
578
|
+
logger.info(
|
|
579
|
+
"Contradictions for '%s': %s",
|
|
580
|
+
pattern_key,
|
|
581
|
+
"; ".join(contradictions),
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
return contradictions
|
|
585
|
+
|
|
586
|
+
# ======================================================================
|
|
587
|
+
# Storage (learning.db)
|
|
588
|
+
# ======================================================================
|
|
589
|
+
|
|
590
|
+
def _store_merged_patterns(self, merged: Dict[str, dict]):
|
|
591
|
+
"""
|
|
592
|
+
Store merged patterns in learning.db's transferable_patterns table.
|
|
593
|
+
|
|
594
|
+
Uses LearningDB.upsert_transferable_pattern() which handles
|
|
595
|
+
INSERT ON CONFLICT UPDATE internally with its own write lock.
|
|
596
|
+
"""
|
|
597
|
+
if self._learning_db is None:
|
|
598
|
+
logger.warning(
|
|
599
|
+
"LearningDB unavailable — %d patterns computed but not stored.",
|
|
600
|
+
len(merged),
|
|
601
|
+
)
|
|
602
|
+
return
|
|
603
|
+
|
|
604
|
+
stored_count = 0
|
|
605
|
+
for key, data in merged.items():
|
|
606
|
+
try:
|
|
607
|
+
self._learning_db.upsert_transferable_pattern(
|
|
608
|
+
pattern_type="preference",
|
|
609
|
+
key=key,
|
|
610
|
+
value=data["value"],
|
|
611
|
+
confidence=data["confidence"],
|
|
612
|
+
evidence_count=data["evidence_count"],
|
|
613
|
+
profiles_seen=data.get("profiles_seen", 1),
|
|
614
|
+
decay_factor=data.get("decay_factor", 1.0),
|
|
615
|
+
contradictions=data.get("contradictions"),
|
|
616
|
+
)
|
|
617
|
+
stored_count += 1
|
|
618
|
+
except Exception as e:
|
|
619
|
+
logger.error(
|
|
620
|
+
"Failed to store pattern '%s': %s", key, e
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
logger.info(
|
|
624
|
+
"Stored %d/%d merged patterns in learning.db.",
|
|
625
|
+
stored_count, len(merged),
|
|
626
|
+
)
|
|
627
|
+
|
|
628
|
+
# ======================================================================
|
|
629
|
+
# Query Interface
|
|
630
|
+
# ======================================================================
|
|
631
|
+
|
|
632
|
+
def get_tech_preferences(
|
|
633
|
+
self,
|
|
634
|
+
min_confidence: float = 0.6,
|
|
635
|
+
) -> Dict[str, dict]:
|
|
636
|
+
"""
|
|
637
|
+
Retrieve aggregated tech preferences from learning.db.
|
|
638
|
+
|
|
639
|
+
This reads from the `transferable_patterns` table — the stored
|
|
640
|
+
results of a previous aggregate_all_profiles() call.
|
|
641
|
+
|
|
642
|
+
Args:
|
|
643
|
+
min_confidence: Minimum confidence threshold (0.0 to 1.0).
|
|
644
|
+
Default 0.6 matches FrequencyAnalyzer's threshold.
|
|
645
|
+
|
|
646
|
+
Returns:
|
|
647
|
+
Dict mapping category_key -> {value, confidence, evidence_count,
|
|
648
|
+
profiles_seen, decay_factor, contradictions}
|
|
649
|
+
"""
|
|
650
|
+
if self._learning_db is None:
|
|
651
|
+
logger.warning("LearningDB unavailable — cannot read preferences.")
|
|
652
|
+
return {}
|
|
653
|
+
|
|
654
|
+
try:
|
|
655
|
+
rows = self._learning_db.get_transferable_patterns(
|
|
656
|
+
min_confidence=min_confidence,
|
|
657
|
+
pattern_type="preference",
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
preferences = {}
|
|
661
|
+
for row in rows:
|
|
662
|
+
key = row.get("key", "")
|
|
663
|
+
if not key:
|
|
664
|
+
continue
|
|
665
|
+
|
|
666
|
+
# Parse contradictions from JSON
|
|
667
|
+
contradictions = []
|
|
668
|
+
raw_contradictions = row.get("contradictions", "[]")
|
|
669
|
+
if isinstance(raw_contradictions, str):
|
|
670
|
+
try:
|
|
671
|
+
contradictions = json.loads(raw_contradictions)
|
|
672
|
+
except (json.JSONDecodeError, TypeError):
|
|
673
|
+
contradictions = []
|
|
674
|
+
elif isinstance(raw_contradictions, list):
|
|
675
|
+
contradictions = raw_contradictions
|
|
676
|
+
|
|
677
|
+
preferences[key] = {
|
|
678
|
+
"value": row.get("value", ""),
|
|
679
|
+
"confidence": row.get("confidence", 0.0),
|
|
680
|
+
"evidence_count": row.get("evidence_count", 0),
|
|
681
|
+
"profiles_seen": row.get("profiles_seen", 1),
|
|
682
|
+
"decay_factor": row.get("decay_factor", 1.0),
|
|
683
|
+
"contradictions": contradictions,
|
|
684
|
+
"first_seen": row.get("first_seen"),
|
|
685
|
+
"last_seen": row.get("last_seen"),
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
return preferences
|
|
689
|
+
|
|
690
|
+
except Exception as e:
|
|
691
|
+
logger.error("Failed to read tech preferences: %s", e)
|
|
692
|
+
return {}
|
|
693
|
+
|
|
694
|
+
def get_preference_context(self, min_confidence: float = 0.6) -> str:
|
|
695
|
+
"""
|
|
696
|
+
Format transferable preferences for injection into AI context.
|
|
697
|
+
|
|
698
|
+
Returns a human-readable markdown string suitable for CLAUDE.md
|
|
699
|
+
or system prompt injection.
|
|
700
|
+
|
|
701
|
+
Args:
|
|
702
|
+
min_confidence: Minimum confidence threshold.
|
|
703
|
+
|
|
704
|
+
Returns:
|
|
705
|
+
Formatted markdown string.
|
|
706
|
+
"""
|
|
707
|
+
prefs = self.get_tech_preferences(min_confidence)
|
|
708
|
+
|
|
709
|
+
if not prefs:
|
|
710
|
+
return (
|
|
711
|
+
"## Cross-Project Tech Preferences\n\n"
|
|
712
|
+
"No transferable preferences learned yet. "
|
|
713
|
+
"Use more profiles and add memories to build your tech profile."
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
lines = ["## Cross-Project Tech Preferences\n"]
|
|
717
|
+
|
|
718
|
+
for key, data in sorted(prefs.items(), key=lambda x: -x[1]["confidence"]):
|
|
719
|
+
display_key = key.replace("_", " ").title()
|
|
720
|
+
conf_pct = data["confidence"] * 100
|
|
721
|
+
evidence = data["evidence_count"]
|
|
722
|
+
profiles = data["profiles_seen"]
|
|
723
|
+
line = (
|
|
724
|
+
"- **%s:** %s (%.0f%% confidence, %d evidence, %d profile%s)"
|
|
725
|
+
% (
|
|
726
|
+
display_key,
|
|
727
|
+
data["value"],
|
|
728
|
+
conf_pct,
|
|
729
|
+
evidence,
|
|
730
|
+
profiles,
|
|
731
|
+
"s" if profiles != 1 else "",
|
|
732
|
+
)
|
|
733
|
+
)
|
|
734
|
+
|
|
735
|
+
# Flag contradictions
|
|
736
|
+
if data.get("contradictions"):
|
|
737
|
+
line += " [EVOLVING]"
|
|
738
|
+
|
|
739
|
+
lines.append(line)
|
|
740
|
+
|
|
741
|
+
return "\n".join(lines)
|
|
742
|
+
|
|
743
|
+
# ======================================================================
|
|
744
|
+
# Utility Methods
|
|
745
|
+
# ======================================================================
|
|
746
|
+
|
|
747
|
+
@staticmethod
|
|
748
|
+
def _days_since(timestamp_str: str, now: Optional[datetime] = None) -> float:
|
|
749
|
+
"""
|
|
750
|
+
Calculate days between a timestamp string and now.
|
|
751
|
+
|
|
752
|
+
Handles multiple timestamp formats from SQLite (ISO 8601, space-separated).
|
|
753
|
+
Returns 0.0 on parse failure (treat as recent).
|
|
754
|
+
"""
|
|
755
|
+
if now is None:
|
|
756
|
+
now = datetime.now()
|
|
757
|
+
|
|
758
|
+
if not timestamp_str:
|
|
759
|
+
return 0.0
|
|
760
|
+
|
|
761
|
+
try:
|
|
762
|
+
ts = datetime.fromisoformat(timestamp_str.replace(" ", "T"))
|
|
763
|
+
delta = now - ts
|
|
764
|
+
return max(0.0, delta.total_seconds() / 86400.0)
|
|
765
|
+
except (ValueError, AttributeError, TypeError):
|
|
766
|
+
pass
|
|
767
|
+
|
|
768
|
+
# Fallback: try common formats
|
|
769
|
+
for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%Y-%m-%dT%H:%M:%S.%f"):
|
|
770
|
+
try:
|
|
771
|
+
ts = datetime.strptime(str(timestamp_str), fmt)
|
|
772
|
+
delta = now - ts
|
|
773
|
+
return max(0.0, delta.total_seconds() / 86400.0)
|
|
774
|
+
except (ValueError, TypeError):
|
|
775
|
+
continue
|
|
776
|
+
|
|
777
|
+
logger.debug("Could not parse timestamp: %s", timestamp_str)
|
|
778
|
+
return 0.0
|
|
779
|
+
|
|
780
|
+
@staticmethod
|
|
781
|
+
def _is_within_window(timestamp_str: str, window_days: int) -> bool:
|
|
782
|
+
"""Check if a timestamp is within the given window (in days)."""
|
|
783
|
+
if not timestamp_str:
|
|
784
|
+
return False
|
|
785
|
+
try:
|
|
786
|
+
ts = datetime.fromisoformat(
|
|
787
|
+
str(timestamp_str).replace(" ", "T")
|
|
788
|
+
)
|
|
789
|
+
return (datetime.now() - ts).days <= window_days
|
|
790
|
+
except (ValueError, AttributeError, TypeError):
|
|
791
|
+
return False
|
|
792
|
+
|
|
793
|
+
|
|
794
|
+
# ===========================================================================
|
|
795
|
+
# CLI Interface
|
|
796
|
+
# ===========================================================================
|
|
797
|
+
|
|
798
|
+
if __name__ == "__main__":
|
|
799
|
+
import sys as _sys
|
|
800
|
+
|
|
801
|
+
logging.basicConfig(
|
|
802
|
+
level=logging.INFO,
|
|
803
|
+
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
804
|
+
)
|
|
805
|
+
|
|
806
|
+
aggregator = CrossProjectAggregator()
|
|
807
|
+
|
|
808
|
+
if len(_sys.argv) < 2:
|
|
809
|
+
print("CrossProjectAggregator — Layer 1: Transferable Tech Preferences")
|
|
810
|
+
print()
|
|
811
|
+
print("Usage:")
|
|
812
|
+
print(" python cross_project_aggregator.py aggregate # Run full aggregation")
|
|
813
|
+
print(" python cross_project_aggregator.py preferences # Show stored preferences")
|
|
814
|
+
print(" python cross_project_aggregator.py context [min] # Get context for AI injection")
|
|
815
|
+
_sys.exit(0)
|
|
816
|
+
|
|
817
|
+
command = _sys.argv[1]
|
|
818
|
+
|
|
819
|
+
if command == "aggregate":
|
|
820
|
+
results = aggregator.aggregate_all_profiles()
|
|
821
|
+
if results:
|
|
822
|
+
print("\nAggregated %d transferable patterns:" % len(results))
|
|
823
|
+
for key, data in sorted(results.items()):
|
|
824
|
+
print(
|
|
825
|
+
" %-25s %-30s conf=%.2f evidence=%d profiles=%d%s"
|
|
826
|
+
% (
|
|
827
|
+
key,
|
|
828
|
+
data["value"],
|
|
829
|
+
data["confidence"],
|
|
830
|
+
data["evidence_count"],
|
|
831
|
+
data.get("profiles_seen", 1),
|
|
832
|
+
" [CONTRADICTIONS]" if data.get("contradictions") else "",
|
|
833
|
+
)
|
|
834
|
+
)
|
|
835
|
+
else:
|
|
836
|
+
print("No patterns found. Add memories across profiles first.")
|
|
837
|
+
|
|
838
|
+
elif command == "preferences":
|
|
839
|
+
min_conf = float(_sys.argv[2]) if len(_sys.argv) > 2 else 0.6
|
|
840
|
+
prefs = aggregator.get_tech_preferences(min_confidence=min_conf)
|
|
841
|
+
if prefs:
|
|
842
|
+
print("\nTransferable Tech Preferences (min confidence: %.0f%%):" % (min_conf * 100))
|
|
843
|
+
for key, data in sorted(prefs.items(), key=lambda x: -x[1]["confidence"]):
|
|
844
|
+
print(
|
|
845
|
+
" %-25s %-30s conf=%.2f evidence=%d profiles=%d"
|
|
846
|
+
% (
|
|
847
|
+
key,
|
|
848
|
+
data["value"],
|
|
849
|
+
data["confidence"],
|
|
850
|
+
data["evidence_count"],
|
|
851
|
+
data.get("profiles_seen", 1),
|
|
852
|
+
)
|
|
853
|
+
)
|
|
854
|
+
if data.get("contradictions"):
|
|
855
|
+
for c in data["contradictions"]:
|
|
856
|
+
print(" ^-- %s" % c)
|
|
857
|
+
else:
|
|
858
|
+
print("No preferences stored. Run 'aggregate' first.")
|
|
859
|
+
|
|
860
|
+
elif command == "context":
|
|
861
|
+
min_conf = float(_sys.argv[2]) if len(_sys.argv) > 2 else 0.6
|
|
862
|
+
print(aggregator.get_preference_context(min_conf))
|
|
863
|
+
|
|
864
|
+
else:
|
|
865
|
+
print("Unknown command: %s" % command)
|
|
866
|
+
_sys.exit(1)
|