superlocalmemory 2.6.0 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +167 -1803
  2. package/README.md +212 -397
  3. package/bin/slm +179 -3
  4. package/bin/superlocalmemoryv2:learning +4 -0
  5. package/bin/superlocalmemoryv2:patterns +4 -0
  6. package/docs/ACCESSIBILITY.md +291 -0
  7. package/docs/ARCHITECTURE.md +12 -6
  8. package/docs/FRAMEWORK-INTEGRATIONS.md +300 -0
  9. package/docs/MCP-MANUAL-SETUP.md +14 -4
  10. package/install.sh +99 -3
  11. package/mcp_server.py +291 -1
  12. package/package.json +2 -1
  13. package/requirements-learning.txt +12 -0
  14. package/scripts/verify-v27.sh +233 -0
  15. package/skills/slm-show-patterns/SKILL.md +224 -0
  16. package/src/learning/__init__.py +201 -0
  17. package/src/learning/adaptive_ranker.py +826 -0
  18. package/src/learning/cross_project_aggregator.py +866 -0
  19. package/src/learning/engagement_tracker.py +638 -0
  20. package/src/learning/feature_extractor.py +461 -0
  21. package/src/learning/feedback_collector.py +690 -0
  22. package/src/learning/learning_db.py +842 -0
  23. package/src/learning/project_context_manager.py +582 -0
  24. package/src/learning/source_quality_scorer.py +685 -0
  25. package/src/learning/synthetic_bootstrap.py +1047 -0
  26. package/src/learning/tests/__init__.py +0 -0
  27. package/src/learning/tests/test_adaptive_ranker.py +328 -0
  28. package/src/learning/tests/test_aggregator.py +309 -0
  29. package/src/learning/tests/test_feedback_collector.py +295 -0
  30. package/src/learning/tests/test_learning_db.py +606 -0
  31. package/src/learning/tests/test_project_context.py +296 -0
  32. package/src/learning/tests/test_source_quality.py +355 -0
  33. package/src/learning/tests/test_synthetic_bootstrap.py +433 -0
  34. package/src/learning/tests/test_workflow_miner.py +322 -0
  35. package/src/learning/workflow_pattern_miner.py +665 -0
  36. package/ui/index.html +346 -13
  37. package/ui/js/clusters.js +90 -1
  38. package/ui/js/graph-core.js +445 -0
  39. package/ui/js/graph-cytoscape-monolithic-backup.js +1168 -0
  40. package/ui/js/graph-cytoscape.js +1168 -0
  41. package/ui/js/graph-d3-backup.js +32 -0
  42. package/ui/js/graph-filters.js +220 -0
  43. package/ui/js/graph-interactions.js +354 -0
  44. package/ui/js/graph-ui.js +214 -0
  45. package/ui/js/memories.js +52 -0
  46. package/ui/js/modal.js +104 -1
@@ -0,0 +1,866 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ SuperLocalMemory V2 - Cross-Project Aggregator (v2.7)
4
+ Copyright (c) 2026 Varun Pratap Bhardwaj
5
+ Licensed under MIT License
6
+
7
+ Repository: https://github.com/varun369/SuperLocalMemoryV2
8
+ Author: Varun Pratap Bhardwaj (Solution Architect)
9
+
10
+ NOTICE: This software is protected by MIT License.
11
+ Attribution must be preserved in all copies or derivatives.
12
+ """
13
+
14
+ """
15
+ CrossProjectAggregator — Layer 1: Transferable Tech Preferences.
16
+
17
+ Aggregates technology preferences across ALL user profiles by wrapping
18
+ the existing FrequencyAnalyzer from pattern_learner.py. This module
19
+ does NOT replace pattern_learner.py — it sits on top, reading its
20
+ per-profile results and merging them into cross-project patterns stored
21
+ in learning.db's `transferable_patterns` table.
22
+
23
+ Key behaviors:
24
+ - Reads memories from memory.db across all profiles (READ-ONLY)
25
+ - Wraps FrequencyAnalyzer.analyze_preferences() for per-profile analysis
26
+ - Merges profile results with exponential temporal decay (1-year half-life)
27
+ - Detects contradictions when preferences change across profiles or time
28
+ - Stores merged patterns in learning.db via LearningDB.upsert_transferable_pattern()
29
+
30
+ Temporal Decay:
31
+ weight = exp(-age_days / 365)
32
+ This gives a 1-year half-life: memories from 365 days ago contribute ~37%
33
+ of their original weight. Recent profiles dominate, but old preferences
34
+ are not forgotten unless contradicted.
35
+
36
+ Contradiction Detection:
37
+ If the preferred value for a category changed within the last 90 days
38
+ (comparing the current top choice against previous stored value),
39
+ a contradiction is logged. This signals preference evolution — not an
40
+ error. The adaptive ranker can use contradictions to weight recent
41
+ preferences higher.
42
+
43
+ Research Backing:
44
+ - MACLA (arXiv:2512.18950): Bayesian confidence with temporal priors
45
+ - MemoryBank (AAAI 2024): Cross-session preference persistence
46
+ - Pattern originally from pattern_learner.py Layer 4
47
+
48
+ Thread Safety:
49
+ Write operations to learning.db are protected by LearningDB's internal
50
+ write lock. Read operations to memory.db use per-call connections (SQLite
51
+ WAL mode supports concurrent reads).
52
+ """
53
+
54
+ import json
55
+ import logging
56
+ import math
57
+ import sqlite3
58
+ import sys
59
+ import threading
60
+ from datetime import datetime, timedelta
61
+ from pathlib import Path
62
+ from typing import Dict, List, Optional, Any, Tuple
63
+
64
+ logger = logging.getLogger("superlocalmemory.learning.aggregator")
65
+
66
+ # ---------------------------------------------------------------------------
67
+ # Import FrequencyAnalyzer from pattern_learner.py (lives in ~/.claude-memory/)
68
+ # ---------------------------------------------------------------------------
69
+ MEMORY_DIR = Path.home() / ".claude-memory"
70
+ DEFAULT_MEMORY_DB = MEMORY_DIR / "memory.db"
71
+
72
+ if str(MEMORY_DIR) not in sys.path:
73
+ sys.path.insert(0, str(MEMORY_DIR))
74
+
75
+ try:
76
+ from pattern_learner import FrequencyAnalyzer
77
+ HAS_FREQ_ANALYZER = True
78
+ except ImportError:
79
+ HAS_FREQ_ANALYZER = False
80
+ logger.warning(
81
+ "FrequencyAnalyzer not available. "
82
+ "Ensure pattern_learner.py is in %s",
83
+ MEMORY_DIR,
84
+ )
85
+
86
+ # ---------------------------------------------------------------------------
87
+ # Import LearningDB (sibling module in src/learning/)
88
+ # ---------------------------------------------------------------------------
89
+ try:
90
+ from .learning_db import LearningDB
91
+ except ImportError:
92
+ try:
93
+ from learning_db import LearningDB
94
+ except ImportError:
95
+ LearningDB = None
96
+ logger.warning("LearningDB not available — aggregator results will not persist.")
97
+
98
+ # ---------------------------------------------------------------------------
99
+ # Constants
100
+ # ---------------------------------------------------------------------------
101
+
102
+ # Temporal decay half-life: 365 days (1 year)
103
+ DECAY_HALF_LIFE_DAYS = 365.0
104
+
105
+ # Contradiction detection window: 90 days
106
+ CONTRADICTION_WINDOW_DAYS = 90
107
+
108
+ # Minimum evidence to consider a pattern valid for merging
109
+ MIN_EVIDENCE_FOR_MERGE = 2
110
+
111
+ # Minimum confidence for a merged pattern to be stored
112
+ MIN_MERGE_CONFIDENCE = 0.3
113
+
114
+
115
+ class CrossProjectAggregator:
116
+ """
117
+ Aggregates tech preferences across all user profiles.
118
+
119
+ Wraps FrequencyAnalyzer to analyze per-profile memories, then merges
120
+ results with temporal decay into transferable patterns stored in
121
+ learning.db.
122
+
123
+ Usage:
124
+ aggregator = CrossProjectAggregator()
125
+ results = aggregator.aggregate_all_profiles()
126
+ prefs = aggregator.get_tech_preferences(min_confidence=0.6)
127
+ """
128
+
129
+ def __init__(
130
+ self,
131
+ memory_db_path: Optional[Path] = None,
132
+ learning_db: Optional[Any] = None,
133
+ ):
134
+ """
135
+ Initialize the cross-project aggregator.
136
+
137
+ Args:
138
+ memory_db_path: Path to memory.db. Defaults to ~/.claude-memory/memory.db.
139
+ This database is READ-ONLY from this module's perspective.
140
+ learning_db: A LearningDB instance for storing results. If None, one is
141
+ created using the default path.
142
+ """
143
+ self.memory_db_path = Path(memory_db_path) if memory_db_path else DEFAULT_MEMORY_DB
144
+ self._lock = threading.Lock()
145
+
146
+ # Initialize LearningDB for storing aggregated patterns
147
+ if learning_db is not None:
148
+ self._learning_db = learning_db
149
+ elif LearningDB is not None:
150
+ try:
151
+ self._learning_db = LearningDB.get_instance()
152
+ except Exception as e:
153
+ logger.error("Failed to initialize LearningDB: %s", e)
154
+ self._learning_db = None
155
+ else:
156
+ self._learning_db = None
157
+
158
+ # Initialize FrequencyAnalyzer if available
159
+ if HAS_FREQ_ANALYZER:
160
+ self._analyzer = FrequencyAnalyzer(self.memory_db_path)
161
+ else:
162
+ self._analyzer = None
163
+
164
+ logger.info(
165
+ "CrossProjectAggregator initialized: memory_db=%s, "
166
+ "freq_analyzer=%s, learning_db=%s",
167
+ self.memory_db_path,
168
+ "available" if self._analyzer else "unavailable",
169
+ "available" if self._learning_db else "unavailable",
170
+ )
171
+
172
+ # ======================================================================
173
+ # Core Aggregation
174
+ # ======================================================================
175
+
176
+ def aggregate_all_profiles(self) -> Dict[str, dict]:
177
+ """
178
+ Aggregate tech preferences across ALL profiles in memory.db.
179
+
180
+ Workflow:
181
+ 1. List all distinct profiles from memory.db
182
+ 2. For each profile, collect memory IDs and timestamps
183
+ 3. Run FrequencyAnalyzer.analyze_preferences() per profile
184
+ 4. Merge results with exponential temporal decay
185
+ 5. Detect contradictions against previously stored patterns
186
+ 6. Store merged patterns in learning.db
187
+
188
+ Returns:
189
+ Dict mapping pattern_key -> {value, confidence, evidence_count,
190
+ profiles_seen, contradictions, decay_factor}
191
+ """
192
+ if not self._analyzer:
193
+ logger.warning("FrequencyAnalyzer unavailable — cannot aggregate.")
194
+ return {}
195
+
196
+ # Step 1: List all profiles and their memory data
197
+ profile_data = self._get_all_profile_data()
198
+ if not profile_data:
199
+ logger.info("No profiles found in memory.db — nothing to aggregate.")
200
+ return {}
201
+
202
+ logger.info(
203
+ "Aggregating preferences across %d profile(s): %s",
204
+ len(profile_data),
205
+ ", ".join(p["profile"] for p in profile_data),
206
+ )
207
+
208
+ # Step 2-3: Analyze each profile
209
+ profile_patterns = []
210
+ for pdata in profile_data:
211
+ profile_name = pdata["profile"]
212
+ memory_ids = pdata["memory_ids"]
213
+
214
+ if not memory_ids:
215
+ logger.debug("Profile '%s' has no memories — skipping.", profile_name)
216
+ continue
217
+
218
+ try:
219
+ patterns = self._analyzer.analyze_preferences(memory_ids)
220
+ if patterns:
221
+ profile_patterns.append({
222
+ "profile": profile_name,
223
+ "patterns": patterns,
224
+ "latest_timestamp": pdata["latest_timestamp"],
225
+ "memory_count": len(memory_ids),
226
+ })
227
+ logger.debug(
228
+ "Profile '%s': %d patterns from %d memories",
229
+ profile_name, len(patterns), len(memory_ids),
230
+ )
231
+ except Exception as e:
232
+ logger.error(
233
+ "Failed to analyze profile '%s': %s",
234
+ profile_name, e,
235
+ )
236
+ continue
237
+
238
+ if not profile_patterns:
239
+ logger.info("No patterns found across any profile.")
240
+ return {}
241
+
242
+ # Step 4: Merge with temporal decay
243
+ merged = self._merge_with_decay(profile_patterns)
244
+
245
+ # Step 5: Detect contradictions
246
+ for key, pattern_data in merged.items():
247
+ contradictions = self._detect_contradictions(key, pattern_data)
248
+ pattern_data["contradictions"] = contradictions
249
+
250
+ # Step 6: Store in learning.db
251
+ self._store_merged_patterns(merged)
252
+
253
+ logger.info(
254
+ "Aggregation complete: %d transferable patterns stored.",
255
+ len(merged),
256
+ )
257
+ return merged
258
+
259
+ # ======================================================================
260
+ # Profile Data Extraction (READ-ONLY on memory.db)
261
+ # ======================================================================
262
+
263
+ def _get_all_profile_data(self) -> List[dict]:
264
+ """
265
+ Get all profiles and their memory IDs from memory.db.
266
+
267
+ Returns list of {profile, memory_ids, latest_timestamp, memory_count}.
268
+ """
269
+ results = []
270
+
271
+ try:
272
+ conn = sqlite3.connect(str(self.memory_db_path), timeout=10)
273
+ conn.execute("PRAGMA busy_timeout=5000")
274
+ cursor = conn.cursor()
275
+
276
+ # Get distinct profiles
277
+ cursor.execute(
278
+ "SELECT DISTINCT profile FROM memories "
279
+ "WHERE profile IS NOT NULL ORDER BY profile"
280
+ )
281
+ profiles = [row[0] for row in cursor.fetchall()]
282
+
283
+ if not profiles:
284
+ # Fallback: if no profile column or all NULL, treat as 'default'
285
+ cursor.execute("SELECT id FROM memories ORDER BY created_at")
286
+ all_ids = [row[0] for row in cursor.fetchall()]
287
+ if all_ids:
288
+ # Get the latest timestamp
289
+ cursor.execute(
290
+ "SELECT MAX(created_at) FROM memories"
291
+ )
292
+ latest = cursor.fetchone()[0] or datetime.now().isoformat()
293
+ results.append({
294
+ "profile": "default",
295
+ "memory_ids": all_ids,
296
+ "latest_timestamp": latest,
297
+ })
298
+ conn.close()
299
+ return results
300
+
301
+ # For each profile, get memory IDs and latest timestamp
302
+ for profile in profiles:
303
+ cursor.execute(
304
+ "SELECT id FROM memories WHERE profile = ? ORDER BY created_at",
305
+ (profile,),
306
+ )
307
+ memory_ids = [row[0] for row in cursor.fetchall()]
308
+
309
+ cursor.execute(
310
+ "SELECT MAX(created_at) FROM memories WHERE profile = ?",
311
+ (profile,),
312
+ )
313
+ latest = cursor.fetchone()[0] or datetime.now().isoformat()
314
+
315
+ if memory_ids:
316
+ results.append({
317
+ "profile": profile,
318
+ "memory_ids": memory_ids,
319
+ "latest_timestamp": latest,
320
+ })
321
+
322
+ conn.close()
323
+
324
+ except sqlite3.OperationalError as e:
325
+ # Handle case where 'profile' column doesn't exist
326
+ logger.warning(
327
+ "Could not query profiles from memory.db: %s. "
328
+ "Falling back to all memories as 'default' profile.",
329
+ e,
330
+ )
331
+ try:
332
+ conn = sqlite3.connect(str(self.memory_db_path), timeout=10)
333
+ cursor = conn.cursor()
334
+ cursor.execute("SELECT id FROM memories ORDER BY created_at")
335
+ all_ids = [row[0] for row in cursor.fetchall()]
336
+ if all_ids:
337
+ cursor.execute("SELECT MAX(created_at) FROM memories")
338
+ latest = cursor.fetchone()[0] or datetime.now().isoformat()
339
+ results.append({
340
+ "profile": "default",
341
+ "memory_ids": all_ids,
342
+ "latest_timestamp": latest,
343
+ })
344
+ conn.close()
345
+ except Exception as inner_e:
346
+ logger.error("Failed to read memory.db: %s", inner_e)
347
+
348
+ except Exception as e:
349
+ logger.error("Unexpected error reading profiles: %s", e)
350
+
351
+ return results
352
+
353
+ # ======================================================================
354
+ # Temporal Decay Merging
355
+ # ======================================================================
356
+
357
+ def _merge_with_decay(
358
+ self,
359
+ profile_patterns: List[dict],
360
+ ) -> Dict[str, dict]:
361
+ """
362
+ Merge per-profile patterns with exponential temporal decay.
363
+
364
+ Each profile's contribution is weighted by:
365
+ weight = exp(-age_days / DECAY_HALF_LIFE_DAYS)
366
+
367
+ where age_days is the number of days since the profile's most
368
+ recent memory was created. This ensures recent profiles dominate
369
+ while old preferences decay gracefully.
370
+
371
+ Args:
372
+ profile_patterns: List of {profile, patterns, latest_timestamp, memory_count}
373
+
374
+ Returns:
375
+ Dict[category_key, {value, confidence, evidence_count, profiles_seen,
376
+ decay_factor, profile_history}]
377
+ """
378
+ now = datetime.now()
379
+
380
+ # Collect all contributions per category key
381
+ # key -> list of {value, confidence, evidence_count, weight, profile}
382
+ contributions: Dict[str, List[dict]] = {}
383
+
384
+ for pdata in profile_patterns:
385
+ # Calculate temporal weight for this profile
386
+ age_days = self._days_since(pdata["latest_timestamp"], now)
387
+ weight = math.exp(-age_days / DECAY_HALF_LIFE_DAYS)
388
+
389
+ for category_key, pattern in pdata["patterns"].items():
390
+ if category_key not in contributions:
391
+ contributions[category_key] = []
392
+
393
+ contributions[category_key].append({
394
+ "value": pattern.get("value", ""),
395
+ "confidence": pattern.get("confidence", 0.0),
396
+ "evidence_count": pattern.get("evidence_count", 0),
397
+ "weight": weight,
398
+ "profile": pdata["profile"],
399
+ "latest_timestamp": pdata["latest_timestamp"],
400
+ })
401
+
402
+ # Merge contributions per category
403
+ merged = {}
404
+ for category_key, contribs in contributions.items():
405
+ merged_pattern = self._merge_category_contributions(
406
+ category_key, contribs
407
+ )
408
+ if merged_pattern is not None:
409
+ merged[category_key] = merged_pattern
410
+
411
+ return merged
412
+
413
+ def _merge_category_contributions(
414
+ self,
415
+ category_key: str,
416
+ contributions: List[dict],
417
+ ) -> Optional[dict]:
418
+ """
419
+ Merge contributions for a single category across profiles.
420
+
421
+ Strategy:
422
+ 1. Group contributions by value (the preferred tech)
423
+ 2. For each value, sum weighted evidence
424
+ 3. The value with highest weighted evidence wins
425
+ 4. Confidence = weighted_evidence / total_weighted_evidence
426
+ """
427
+ if not contributions:
428
+ return None
429
+
430
+ # Group by value
431
+ value_scores: Dict[str, float] = {}
432
+ value_evidence: Dict[str, int] = {}
433
+ value_profiles: Dict[str, set] = {}
434
+ value_weights: Dict[str, float] = {}
435
+
436
+ total_weighted_evidence = 0.0
437
+
438
+ for contrib in contributions:
439
+ value = contrib["value"]
440
+ weighted_ev = contrib["evidence_count"] * contrib["weight"]
441
+
442
+ if value not in value_scores:
443
+ value_scores[value] = 0.0
444
+ value_evidence[value] = 0
445
+ value_profiles[value] = set()
446
+ value_weights[value] = 0.0
447
+
448
+ value_scores[value] += weighted_ev
449
+ value_evidence[value] += contrib["evidence_count"]
450
+ value_profiles[value].add(contrib["profile"])
451
+ value_weights[value] = max(value_weights[value], contrib["weight"])
452
+ total_weighted_evidence += weighted_ev
453
+
454
+ if total_weighted_evidence == 0:
455
+ return None
456
+
457
+ # Find the winning value
458
+ winning_value = max(value_scores, key=value_scores.get)
459
+ winning_score = value_scores[winning_value]
460
+
461
+ # Calculate merged confidence
462
+ confidence = winning_score / total_weighted_evidence if total_weighted_evidence > 0 else 0.0
463
+
464
+ total_evidence = sum(value_evidence.values())
465
+ winning_evidence = value_evidence[winning_value]
466
+
467
+ if winning_evidence < MIN_EVIDENCE_FOR_MERGE:
468
+ return None
469
+
470
+ if confidence < MIN_MERGE_CONFIDENCE:
471
+ return None
472
+
473
+ # Average decay factor across contributing profiles for the winner
474
+ winning_decay = value_weights[winning_value]
475
+
476
+ # Build profile history for contradiction detection
477
+ profile_history = []
478
+ for contrib in contributions:
479
+ profile_history.append({
480
+ "profile": contrib["profile"],
481
+ "value": contrib["value"],
482
+ "confidence": round(contrib["confidence"], 3),
483
+ "weight": round(contrib["weight"], 3),
484
+ "timestamp": contrib["latest_timestamp"],
485
+ })
486
+
487
+ return {
488
+ "value": winning_value,
489
+ "confidence": round(min(0.95, confidence), 3),
490
+ "evidence_count": winning_evidence,
491
+ "profiles_seen": len(value_profiles[winning_value]),
492
+ "total_profiles": len(set(c["profile"] for c in contributions)),
493
+ "decay_factor": round(winning_decay, 4),
494
+ "profile_history": profile_history,
495
+ "contradictions": [], # Filled in by _detect_contradictions
496
+ }
497
+
498
+ # ======================================================================
499
+ # Contradiction Detection
500
+ # ======================================================================
501
+
502
+ def _detect_contradictions(
503
+ self,
504
+ pattern_key: str,
505
+ pattern_data: dict,
506
+ ) -> List[str]:
507
+ """
508
+ Detect if the preferred value changed recently.
509
+
510
+ A contradiction is logged when:
511
+ 1. The current winning value differs from the previously stored value
512
+ 2. The change happened within the last CONTRADICTION_WINDOW_DAYS
513
+ 3. Multiple profiles disagree on the preferred value
514
+
515
+ Contradictions are informational — they signal preference evolution,
516
+ not errors. The adaptive ranker uses them to weight recent preferences.
517
+
518
+ Args:
519
+ pattern_key: Category key (e.g., 'frontend_framework')
520
+ pattern_data: Merged pattern data with profile_history
521
+
522
+ Returns:
523
+ List of contradiction description strings.
524
+ """
525
+ contradictions = []
526
+ current_value = pattern_data["value"]
527
+
528
+ # Check 1: Cross-profile disagreement
529
+ profile_history = pattern_data.get("profile_history", [])
530
+ distinct_values = set(h["value"] for h in profile_history)
531
+
532
+ if len(distinct_values) > 1:
533
+ other_values = distinct_values - {current_value}
534
+ for other_val in other_values:
535
+ disagreeing_profiles = [
536
+ h["profile"] for h in profile_history
537
+ if h["value"] == other_val
538
+ ]
539
+ contradictions.append(
540
+ "Profile(s) %s prefer '%s' instead of '%s'" % (
541
+ ", ".join(disagreeing_profiles),
542
+ other_val,
543
+ current_value,
544
+ )
545
+ )
546
+
547
+ # Check 2: Change from previously stored value (in learning.db)
548
+ if self._learning_db is not None:
549
+ try:
550
+ stored = self._learning_db.get_transferable_patterns(
551
+ min_confidence=0.0,
552
+ pattern_type="preference",
553
+ )
554
+ for row in stored:
555
+ if row.get("key") == pattern_key:
556
+ old_value = row.get("value", "")
557
+ old_updated = row.get("updated_at") or row.get("last_seen")
558
+ if old_value and old_value != current_value:
559
+ # Check if the old pattern was updated recently
560
+ if old_updated and self._is_within_window(
561
+ old_updated, CONTRADICTION_WINDOW_DAYS
562
+ ):
563
+ contradictions.append(
564
+ "Preference changed from '%s' to '%s' "
565
+ "within last %d days" % (
566
+ old_value,
567
+ current_value,
568
+ CONTRADICTION_WINDOW_DAYS,
569
+ )
570
+ )
571
+ break
572
+ except Exception as e:
573
+ logger.debug(
574
+ "Could not check stored patterns for contradictions: %s", e
575
+ )
576
+
577
+ if contradictions:
578
+ logger.info(
579
+ "Contradictions for '%s': %s",
580
+ pattern_key,
581
+ "; ".join(contradictions),
582
+ )
583
+
584
+ return contradictions
585
+
586
+ # ======================================================================
587
+ # Storage (learning.db)
588
+ # ======================================================================
589
+
590
+ def _store_merged_patterns(self, merged: Dict[str, dict]):
591
+ """
592
+ Store merged patterns in learning.db's transferable_patterns table.
593
+
594
+ Uses LearningDB.upsert_transferable_pattern() which handles
595
+ INSERT ON CONFLICT UPDATE internally with its own write lock.
596
+ """
597
+ if self._learning_db is None:
598
+ logger.warning(
599
+ "LearningDB unavailable — %d patterns computed but not stored.",
600
+ len(merged),
601
+ )
602
+ return
603
+
604
+ stored_count = 0
605
+ for key, data in merged.items():
606
+ try:
607
+ self._learning_db.upsert_transferable_pattern(
608
+ pattern_type="preference",
609
+ key=key,
610
+ value=data["value"],
611
+ confidence=data["confidence"],
612
+ evidence_count=data["evidence_count"],
613
+ profiles_seen=data.get("profiles_seen", 1),
614
+ decay_factor=data.get("decay_factor", 1.0),
615
+ contradictions=data.get("contradictions"),
616
+ )
617
+ stored_count += 1
618
+ except Exception as e:
619
+ logger.error(
620
+ "Failed to store pattern '%s': %s", key, e
621
+ )
622
+
623
+ logger.info(
624
+ "Stored %d/%d merged patterns in learning.db.",
625
+ stored_count, len(merged),
626
+ )
627
+
628
+ # ======================================================================
629
+ # Query Interface
630
+ # ======================================================================
631
+
632
+ def get_tech_preferences(
633
+ self,
634
+ min_confidence: float = 0.6,
635
+ ) -> Dict[str, dict]:
636
+ """
637
+ Retrieve aggregated tech preferences from learning.db.
638
+
639
+ This reads from the `transferable_patterns` table — the stored
640
+ results of a previous aggregate_all_profiles() call.
641
+
642
+ Args:
643
+ min_confidence: Minimum confidence threshold (0.0 to 1.0).
644
+ Default 0.6 matches FrequencyAnalyzer's threshold.
645
+
646
+ Returns:
647
+ Dict mapping category_key -> {value, confidence, evidence_count,
648
+ profiles_seen, decay_factor, contradictions}
649
+ """
650
+ if self._learning_db is None:
651
+ logger.warning("LearningDB unavailable — cannot read preferences.")
652
+ return {}
653
+
654
+ try:
655
+ rows = self._learning_db.get_transferable_patterns(
656
+ min_confidence=min_confidence,
657
+ pattern_type="preference",
658
+ )
659
+
660
+ preferences = {}
661
+ for row in rows:
662
+ key = row.get("key", "")
663
+ if not key:
664
+ continue
665
+
666
+ # Parse contradictions from JSON
667
+ contradictions = []
668
+ raw_contradictions = row.get("contradictions", "[]")
669
+ if isinstance(raw_contradictions, str):
670
+ try:
671
+ contradictions = json.loads(raw_contradictions)
672
+ except (json.JSONDecodeError, TypeError):
673
+ contradictions = []
674
+ elif isinstance(raw_contradictions, list):
675
+ contradictions = raw_contradictions
676
+
677
+ preferences[key] = {
678
+ "value": row.get("value", ""),
679
+ "confidence": row.get("confidence", 0.0),
680
+ "evidence_count": row.get("evidence_count", 0),
681
+ "profiles_seen": row.get("profiles_seen", 1),
682
+ "decay_factor": row.get("decay_factor", 1.0),
683
+ "contradictions": contradictions,
684
+ "first_seen": row.get("first_seen"),
685
+ "last_seen": row.get("last_seen"),
686
+ }
687
+
688
+ return preferences
689
+
690
+ except Exception as e:
691
+ logger.error("Failed to read tech preferences: %s", e)
692
+ return {}
693
+
694
+ def get_preference_context(self, min_confidence: float = 0.6) -> str:
695
+ """
696
+ Format transferable preferences for injection into AI context.
697
+
698
+ Returns a human-readable markdown string suitable for CLAUDE.md
699
+ or system prompt injection.
700
+
701
+ Args:
702
+ min_confidence: Minimum confidence threshold.
703
+
704
+ Returns:
705
+ Formatted markdown string.
706
+ """
707
+ prefs = self.get_tech_preferences(min_confidence)
708
+
709
+ if not prefs:
710
+ return (
711
+ "## Cross-Project Tech Preferences\n\n"
712
+ "No transferable preferences learned yet. "
713
+ "Use more profiles and add memories to build your tech profile."
714
+ )
715
+
716
+ lines = ["## Cross-Project Tech Preferences\n"]
717
+
718
+ for key, data in sorted(prefs.items(), key=lambda x: -x[1]["confidence"]):
719
+ display_key = key.replace("_", " ").title()
720
+ conf_pct = data["confidence"] * 100
721
+ evidence = data["evidence_count"]
722
+ profiles = data["profiles_seen"]
723
+ line = (
724
+ "- **%s:** %s (%.0f%% confidence, %d evidence, %d profile%s)"
725
+ % (
726
+ display_key,
727
+ data["value"],
728
+ conf_pct,
729
+ evidence,
730
+ profiles,
731
+ "s" if profiles != 1 else "",
732
+ )
733
+ )
734
+
735
+ # Flag contradictions
736
+ if data.get("contradictions"):
737
+ line += " [EVOLVING]"
738
+
739
+ lines.append(line)
740
+
741
+ return "\n".join(lines)
742
+
743
+ # ======================================================================
744
+ # Utility Methods
745
+ # ======================================================================
746
+
747
+ @staticmethod
748
+ def _days_since(timestamp_str: str, now: Optional[datetime] = None) -> float:
749
+ """
750
+ Calculate days between a timestamp string and now.
751
+
752
+ Handles multiple timestamp formats from SQLite (ISO 8601, space-separated).
753
+ Returns 0.0 on parse failure (treat as recent).
754
+ """
755
+ if now is None:
756
+ now = datetime.now()
757
+
758
+ if not timestamp_str:
759
+ return 0.0
760
+
761
+ try:
762
+ ts = datetime.fromisoformat(timestamp_str.replace(" ", "T"))
763
+ delta = now - ts
764
+ return max(0.0, delta.total_seconds() / 86400.0)
765
+ except (ValueError, AttributeError, TypeError):
766
+ pass
767
+
768
+ # Fallback: try common formats
769
+ for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%Y-%m-%dT%H:%M:%S.%f"):
770
+ try:
771
+ ts = datetime.strptime(str(timestamp_str), fmt)
772
+ delta = now - ts
773
+ return max(0.0, delta.total_seconds() / 86400.0)
774
+ except (ValueError, TypeError):
775
+ continue
776
+
777
+ logger.debug("Could not parse timestamp: %s", timestamp_str)
778
+ return 0.0
779
+
780
+ @staticmethod
781
+ def _is_within_window(timestamp_str: str, window_days: int) -> bool:
782
+ """Check if a timestamp is within the given window (in days)."""
783
+ if not timestamp_str:
784
+ return False
785
+ try:
786
+ ts = datetime.fromisoformat(
787
+ str(timestamp_str).replace(" ", "T")
788
+ )
789
+ return (datetime.now() - ts).days <= window_days
790
+ except (ValueError, AttributeError, TypeError):
791
+ return False
792
+
793
+
794
+ # ===========================================================================
795
+ # CLI Interface
796
+ # ===========================================================================
797
+
798
+ if __name__ == "__main__":
799
+ import sys as _sys
800
+
801
+ logging.basicConfig(
802
+ level=logging.INFO,
803
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
804
+ )
805
+
806
+ aggregator = CrossProjectAggregator()
807
+
808
+ if len(_sys.argv) < 2:
809
+ print("CrossProjectAggregator — Layer 1: Transferable Tech Preferences")
810
+ print()
811
+ print("Usage:")
812
+ print(" python cross_project_aggregator.py aggregate # Run full aggregation")
813
+ print(" python cross_project_aggregator.py preferences # Show stored preferences")
814
+ print(" python cross_project_aggregator.py context [min] # Get context for AI injection")
815
+ _sys.exit(0)
816
+
817
+ command = _sys.argv[1]
818
+
819
+ if command == "aggregate":
820
+ results = aggregator.aggregate_all_profiles()
821
+ if results:
822
+ print("\nAggregated %d transferable patterns:" % len(results))
823
+ for key, data in sorted(results.items()):
824
+ print(
825
+ " %-25s %-30s conf=%.2f evidence=%d profiles=%d%s"
826
+ % (
827
+ key,
828
+ data["value"],
829
+ data["confidence"],
830
+ data["evidence_count"],
831
+ data.get("profiles_seen", 1),
832
+ " [CONTRADICTIONS]" if data.get("contradictions") else "",
833
+ )
834
+ )
835
+ else:
836
+ print("No patterns found. Add memories across profiles first.")
837
+
838
+ elif command == "preferences":
839
+ min_conf = float(_sys.argv[2]) if len(_sys.argv) > 2 else 0.6
840
+ prefs = aggregator.get_tech_preferences(min_confidence=min_conf)
841
+ if prefs:
842
+ print("\nTransferable Tech Preferences (min confidence: %.0f%%):" % (min_conf * 100))
843
+ for key, data in sorted(prefs.items(), key=lambda x: -x[1]["confidence"]):
844
+ print(
845
+ " %-25s %-30s conf=%.2f evidence=%d profiles=%d"
846
+ % (
847
+ key,
848
+ data["value"],
849
+ data["confidence"],
850
+ data["evidence_count"],
851
+ data.get("profiles_seen", 1),
852
+ )
853
+ )
854
+ if data.get("contradictions"):
855
+ for c in data["contradictions"]:
856
+ print(" ^-- %s" % c)
857
+ else:
858
+ print("No preferences stored. Run 'aggregate' first.")
859
+
860
+ elif command == "context":
861
+ min_conf = float(_sys.argv[2]) if len(_sys.argv) > 2 else 0.6
862
+ print(aggregator.get_preference_context(min_conf))
863
+
864
+ else:
865
+ print("Unknown command: %s" % command)
866
+ _sys.exit(1)