superlocalmemory 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/ATTRIBUTION.md +140 -0
  2. package/CHANGELOG.md +1749 -0
  3. package/LICENSE +21 -0
  4. package/README.md +600 -0
  5. package/bin/aider-smart +72 -0
  6. package/bin/slm +202 -0
  7. package/bin/slm-npm +73 -0
  8. package/bin/slm.bat +195 -0
  9. package/bin/slm.cmd +10 -0
  10. package/bin/superlocalmemoryv2:list +3 -0
  11. package/bin/superlocalmemoryv2:profile +3 -0
  12. package/bin/superlocalmemoryv2:recall +3 -0
  13. package/bin/superlocalmemoryv2:remember +3 -0
  14. package/bin/superlocalmemoryv2:reset +3 -0
  15. package/bin/superlocalmemoryv2:status +3 -0
  16. package/completions/slm.bash +58 -0
  17. package/completions/slm.zsh +76 -0
  18. package/configs/antigravity-mcp.json +13 -0
  19. package/configs/chatgpt-desktop-mcp.json +7 -0
  20. package/configs/claude-desktop-mcp.json +15 -0
  21. package/configs/codex-mcp.toml +13 -0
  22. package/configs/cody-commands.json +29 -0
  23. package/configs/continue-mcp.yaml +14 -0
  24. package/configs/continue-skills.yaml +26 -0
  25. package/configs/cursor-mcp.json +15 -0
  26. package/configs/gemini-cli-mcp.json +11 -0
  27. package/configs/jetbrains-mcp.json +11 -0
  28. package/configs/opencode-mcp.json +12 -0
  29. package/configs/perplexity-mcp.json +9 -0
  30. package/configs/vscode-copilot-mcp.json +12 -0
  31. package/configs/windsurf-mcp.json +16 -0
  32. package/configs/zed-mcp.json +12 -0
  33. package/docs/ARCHITECTURE.md +877 -0
  34. package/docs/CLI-COMMANDS-REFERENCE.md +425 -0
  35. package/docs/COMPETITIVE-ANALYSIS.md +210 -0
  36. package/docs/COMPRESSION-README.md +390 -0
  37. package/docs/GRAPH-ENGINE.md +503 -0
  38. package/docs/MCP-MANUAL-SETUP.md +720 -0
  39. package/docs/MCP-TROUBLESHOOTING.md +787 -0
  40. package/docs/PATTERN-LEARNING.md +363 -0
  41. package/docs/PROFILES-GUIDE.md +453 -0
  42. package/docs/RESET-GUIDE.md +353 -0
  43. package/docs/SEARCH-ENGINE-V2.2.0.md +748 -0
  44. package/docs/SEARCH-INTEGRATION-GUIDE.md +502 -0
  45. package/docs/UI-SERVER.md +254 -0
  46. package/docs/UNIVERSAL-INTEGRATION.md +432 -0
  47. package/docs/V2.2.0-OPTIONAL-SEARCH.md +666 -0
  48. package/docs/WINDOWS-INSTALL-README.txt +34 -0
  49. package/docs/WINDOWS-POST-INSTALL.txt +45 -0
  50. package/docs/example_graph_usage.py +148 -0
  51. package/hooks/memory-list-skill.js +130 -0
  52. package/hooks/memory-profile-skill.js +284 -0
  53. package/hooks/memory-recall-skill.js +109 -0
  54. package/hooks/memory-remember-skill.js +127 -0
  55. package/hooks/memory-reset-skill.js +274 -0
  56. package/install-skills.sh +436 -0
  57. package/install.ps1 +417 -0
  58. package/install.sh +755 -0
  59. package/mcp_server.py +585 -0
  60. package/package.json +94 -0
  61. package/requirements-core.txt +24 -0
  62. package/requirements.txt +10 -0
  63. package/scripts/postinstall.js +126 -0
  64. package/scripts/preuninstall.js +57 -0
  65. package/skills/slm-build-graph/SKILL.md +423 -0
  66. package/skills/slm-list-recent/SKILL.md +348 -0
  67. package/skills/slm-recall/SKILL.md +325 -0
  68. package/skills/slm-remember/SKILL.md +194 -0
  69. package/skills/slm-status/SKILL.md +363 -0
  70. package/skills/slm-switch-profile/SKILL.md +442 -0
  71. package/src/__pycache__/cache_manager.cpython-312.pyc +0 -0
  72. package/src/__pycache__/embedding_engine.cpython-312.pyc +0 -0
  73. package/src/__pycache__/graph_engine.cpython-312.pyc +0 -0
  74. package/src/__pycache__/hnsw_index.cpython-312.pyc +0 -0
  75. package/src/__pycache__/hybrid_search.cpython-312.pyc +0 -0
  76. package/src/__pycache__/memory-profiles.cpython-312.pyc +0 -0
  77. package/src/__pycache__/memory-reset.cpython-312.pyc +0 -0
  78. package/src/__pycache__/memory_compression.cpython-312.pyc +0 -0
  79. package/src/__pycache__/memory_store_v2.cpython-312.pyc +0 -0
  80. package/src/__pycache__/migrate_v1_to_v2.cpython-312.pyc +0 -0
  81. package/src/__pycache__/pattern_learner.cpython-312.pyc +0 -0
  82. package/src/__pycache__/query_optimizer.cpython-312.pyc +0 -0
  83. package/src/__pycache__/search_engine_v2.cpython-312.pyc +0 -0
  84. package/src/__pycache__/setup_validator.cpython-312.pyc +0 -0
  85. package/src/__pycache__/tree_manager.cpython-312.pyc +0 -0
  86. package/src/cache_manager.py +520 -0
  87. package/src/embedding_engine.py +671 -0
  88. package/src/graph_engine.py +970 -0
  89. package/src/hnsw_index.py +626 -0
  90. package/src/hybrid_search.py +693 -0
  91. package/src/memory-profiles.py +518 -0
  92. package/src/memory-reset.py +485 -0
  93. package/src/memory_compression.py +999 -0
  94. package/src/memory_store_v2.py +1088 -0
  95. package/src/migrate_v1_to_v2.py +638 -0
  96. package/src/pattern_learner.py +898 -0
  97. package/src/query_optimizer.py +513 -0
  98. package/src/search_engine_v2.py +403 -0
  99. package/src/setup_validator.py +479 -0
  100. package/src/tree_manager.py +720 -0
@@ -0,0 +1,898 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ SuperLocalMemory V2 - Intelligent Local Memory System
4
+ Copyright (c) 2026 Varun Pratap Bhardwaj
5
+ Licensed under MIT License
6
+
7
+ Repository: https://github.com/varun369/SuperLocalMemoryV2
8
+ Author: Varun Pratap Bhardwaj (Solution Architect)
9
+
10
+ NOTICE: This software is protected by MIT License.
11
+ Attribution must be preserved in all copies or derivatives.
12
+ """
13
+
14
+ """
15
+ Pattern Learner - Identity Profile Extraction (Layer 4)
16
+
17
+ Learns user preferences, coding style, and terminology patterns from memories.
18
+ Uses local TF-IDF, frequency analysis, and heuristics - NO EXTERNAL APIs.
19
+
20
+ Based on architecture: docs/architecture/05-pattern-learner.md
21
+ """
22
+
23
+ import sqlite3
24
+ import json
25
+ import re
26
+ from datetime import datetime, timedelta
27
+ from pathlib import Path
28
+ from typing import Dict, List, Optional, Any, Counter as CounterType
29
+ from collections import Counter
30
+
31
+ # Local NLP tools (no external APIs)
32
+ try:
33
+ from sklearn.feature_extraction.text import TfidfVectorizer
34
+ from sklearn.metrics.pairwise import cosine_similarity
35
+ import numpy as np
36
+ SKLEARN_AVAILABLE = True
37
+ except ImportError:
38
+ SKLEARN_AVAILABLE = False
39
+
40
+ MEMORY_DIR = Path.home() / ".claude-memory"
41
+ DB_PATH = MEMORY_DIR / "memory.db"
42
+
43
+
44
+ class FrequencyAnalyzer:
45
+ """Analyzes technology and tool preferences via frequency counting."""
46
+
47
+ def __init__(self, db_path: Path):
48
+ self.db_path = db_path
49
+
50
+ # Predefined technology categories
51
+ self.tech_categories = {
52
+ 'frontend_framework': ['react', 'nextjs', 'next.js', 'vue', 'angular', 'svelte', 'solid'],
53
+ 'backend_framework': ['express', 'fastapi', 'django', 'flask', 'nestjs', 'spring', 'rails'],
54
+ 'database': ['postgres', 'postgresql', 'mysql', 'mongodb', 'redis', 'dynamodb', 'sqlite'],
55
+ 'state_management': ['redux', 'context', 'zustand', 'mobx', 'recoil', 'jotai'],
56
+ 'styling': ['tailwind', 'css modules', 'styled-components', 'emotion', 'sass', 'less'],
57
+ 'language': ['python', 'javascript', 'typescript', 'go', 'rust', 'java', 'c++'],
58
+ 'deployment': ['docker', 'kubernetes', 'vercel', 'netlify', 'aws', 'gcp', 'azure'],
59
+ 'testing': ['jest', 'pytest', 'vitest', 'mocha', 'cypress', 'playwright'],
60
+ }
61
+
62
+ def analyze_preferences(self, memory_ids: List[int]) -> Dict[str, Dict[str, Any]]:
63
+ """Analyze technology preferences across memories."""
64
+ patterns = {}
65
+
66
+ conn = sqlite3.connect(self.db_path)
67
+ cursor = conn.cursor()
68
+
69
+ for category, keywords in self.tech_categories.items():
70
+ keyword_counts = Counter()
71
+ evidence_memories = {} # {keyword: [memory_ids]}
72
+
73
+ for memory_id in memory_ids:
74
+ cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
75
+ row = cursor.fetchone()
76
+
77
+ if not row:
78
+ continue
79
+
80
+ content = row[0].lower()
81
+
82
+ for keyword in keywords:
83
+ # Count occurrences with word boundaries
84
+ pattern = r'\b' + re.escape(keyword.replace('.', r'\.')) + r'\b'
85
+ matches = re.findall(pattern, content, re.IGNORECASE)
86
+ count = len(matches)
87
+
88
+ if count > 0:
89
+ keyword_counts[keyword] += count
90
+
91
+ if keyword not in evidence_memories:
92
+ evidence_memories[keyword] = []
93
+ evidence_memories[keyword].append(memory_id)
94
+
95
+ # Determine preference (most mentioned)
96
+ if keyword_counts:
97
+ top_keyword = keyword_counts.most_common(1)[0][0]
98
+ total_mentions = sum(keyword_counts.values())
99
+ top_count = keyword_counts[top_keyword]
100
+
101
+ # Calculate confidence (% of mentions)
102
+ confidence = top_count / total_mentions if total_mentions > 0 else 0
103
+
104
+ # Only create pattern if confidence > 0.6 and at least 3 mentions
105
+ if confidence > 0.6 and top_count >= 3:
106
+ value = self._format_preference(top_keyword, keyword_counts)
107
+ evidence_list = list(set(evidence_memories.get(top_keyword, [])))
108
+
109
+ patterns[category] = {
110
+ 'pattern_type': 'preference',
111
+ 'key': category,
112
+ 'value': value,
113
+ 'confidence': round(confidence, 2),
114
+ 'evidence_count': len(evidence_list),
115
+ 'memory_ids': evidence_list,
116
+ 'category': self._categorize_pattern(category)
117
+ }
118
+
119
+ conn.close()
120
+ return patterns
121
+
122
+ def _format_preference(self, top_keyword: str, all_counts: Counter) -> str:
123
+ """Format preference value (e.g., 'Next.js over React')."""
124
+ # Normalize keyword for display
125
+ display_map = {
126
+ 'nextjs': 'Next.js',
127
+ 'next.js': 'Next.js',
128
+ 'postgres': 'PostgreSQL',
129
+ 'postgresql': 'PostgreSQL',
130
+ 'fastapi': 'FastAPI',
131
+ 'nestjs': 'NestJS',
132
+ 'mongodb': 'MongoDB',
133
+ 'redis': 'Redis',
134
+ 'dynamodb': 'DynamoDB',
135
+ 'tailwind': 'Tailwind CSS',
136
+ }
137
+
138
+ top_display = display_map.get(top_keyword.lower(), top_keyword.title())
139
+
140
+ if len(all_counts) > 1:
141
+ second = all_counts.most_common(2)[1]
142
+ second_keyword = second[0]
143
+ second_display = display_map.get(second_keyword.lower(), second_keyword.title())
144
+
145
+ # Only show comparison if second choice has significant mentions
146
+ if second[1] / all_counts[top_keyword] > 0.3:
147
+ return f"{top_display} over {second_display}"
148
+
149
+ return top_display
150
+
151
+ def _categorize_pattern(self, tech_category: str) -> str:
152
+ """Map tech category to high-level category."""
153
+ category_map = {
154
+ 'frontend_framework': 'frontend',
155
+ 'state_management': 'frontend',
156
+ 'styling': 'frontend',
157
+ 'backend_framework': 'backend',
158
+ 'database': 'backend',
159
+ 'language': 'general',
160
+ 'deployment': 'devops',
161
+ 'testing': 'general',
162
+ }
163
+ return category_map.get(tech_category, 'general')
164
+
165
+
166
+ class ContextAnalyzer:
167
+ """Analyzes coding style patterns from context."""
168
+
169
+ def __init__(self, db_path: Path):
170
+ self.db_path = db_path
171
+
172
+ # Style pattern detection rules
173
+ self.style_indicators = {
174
+ 'optimization_priority': {
175
+ 'performance': ['optimize', 'faster', 'performance', 'speed', 'latency', 'efficient', 'cache'],
176
+ 'readability': ['readable', 'clean', 'maintainable', 'clear', 'simple', 'understandable']
177
+ },
178
+ 'error_handling': {
179
+ 'explicit': ['error boundary', 'explicit', 'throw', 'handle error', 'try catch', 'error handling'],
180
+ 'permissive': ['ignore', 'suppress', 'skip error', 'optional']
181
+ },
182
+ 'testing_approach': {
183
+ 'comprehensive': ['test coverage', 'unit test', 'integration test', 'e2e test', 'test suite'],
184
+ 'minimal': ['manual test', 'skip test', 'no tests']
185
+ },
186
+ 'code_organization': {
187
+ 'modular': ['separate', 'module', 'component', 'split', 'refactor', 'extract'],
188
+ 'monolithic': ['single file', 'one place', 'combined']
189
+ }
190
+ }
191
+
192
+ def analyze_style(self, memory_ids: List[int]) -> Dict[str, Dict[str, Any]]:
193
+ """Detect stylistic patterns from context."""
194
+ patterns = {}
195
+
196
+ conn = sqlite3.connect(self.db_path)
197
+ cursor = conn.cursor()
198
+
199
+ for pattern_key, indicators in self.style_indicators.items():
200
+ indicator_counts = Counter()
201
+ evidence_memories = {} # {style_type: [memory_ids]}
202
+
203
+ for memory_id in memory_ids:
204
+ cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
205
+ row = cursor.fetchone()
206
+
207
+ if not row:
208
+ continue
209
+
210
+ content = row[0].lower()
211
+
212
+ for style_type, keywords in indicators.items():
213
+ for keyword in keywords:
214
+ if keyword in content:
215
+ indicator_counts[style_type] += 1
216
+
217
+ if style_type not in evidence_memories:
218
+ evidence_memories[style_type] = []
219
+ evidence_memories[style_type].append(memory_id)
220
+
221
+ # Determine dominant style
222
+ if indicator_counts:
223
+ top_style = indicator_counts.most_common(1)[0][0]
224
+ total = sum(indicator_counts.values())
225
+ top_count = indicator_counts[top_style]
226
+ confidence = top_count / total if total > 0 else 0
227
+
228
+ # Only create pattern if confidence > 0.65 and at least 3 mentions
229
+ if confidence > 0.65 and top_count >= 3:
230
+ value = self._format_style_value(pattern_key, top_style, indicator_counts)
231
+ evidence_list = list(set(evidence_memories.get(top_style, [])))
232
+
233
+ patterns[pattern_key] = {
234
+ 'pattern_type': 'style',
235
+ 'key': pattern_key,
236
+ 'value': value,
237
+ 'confidence': round(confidence, 2),
238
+ 'evidence_count': len(evidence_list),
239
+ 'memory_ids': evidence_list,
240
+ 'category': 'general'
241
+ }
242
+
243
+ conn.close()
244
+ return patterns
245
+
246
+ def _format_style_value(self, pattern_key: str, top_style: str, all_counts: Counter) -> str:
247
+ """Format style value as comparison or preference."""
248
+ style_formats = {
249
+ 'optimization_priority': {
250
+ 'performance': 'Performance over readability',
251
+ 'readability': 'Readability over performance'
252
+ },
253
+ 'error_handling': {
254
+ 'explicit': 'Explicit error boundaries',
255
+ 'permissive': 'Permissive error handling'
256
+ },
257
+ 'testing_approach': {
258
+ 'comprehensive': 'Comprehensive testing',
259
+ 'minimal': 'Minimal testing'
260
+ },
261
+ 'code_organization': {
262
+ 'modular': 'Modular organization',
263
+ 'monolithic': 'Monolithic organization'
264
+ }
265
+ }
266
+
267
+ if pattern_key in style_formats and top_style in style_formats[pattern_key]:
268
+ return style_formats[pattern_key][top_style]
269
+
270
+ return top_style.replace('_', ' ').title()
271
+
272
+
273
+ class TerminologyLearner:
274
+ """Learns user-specific definitions of common terms."""
275
+
276
+ def __init__(self, db_path: Path):
277
+ self.db_path = db_path
278
+
279
+ # Common ambiguous terms to learn
280
+ self.ambiguous_terms = [
281
+ 'optimize', 'refactor', 'clean', 'simple',
282
+ 'mvp', 'prototype', 'scale', 'production-ready',
283
+ 'fix', 'improve', 'update', 'enhance'
284
+ ]
285
+
286
+ def learn_terminology(self, memory_ids: List[int]) -> Dict[str, Dict[str, Any]]:
287
+ """Learn user-specific term definitions."""
288
+ patterns = {}
289
+
290
+ conn = sqlite3.connect(self.db_path)
291
+ cursor = conn.cursor()
292
+
293
+ for term in self.ambiguous_terms:
294
+ contexts = []
295
+
296
+ # Find all contexts where term appears
297
+ for memory_id in memory_ids:
298
+ cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
299
+ row = cursor.fetchone()
300
+
301
+ if not row:
302
+ continue
303
+
304
+ content = row[0]
305
+
306
+ # Find term in content (case-insensitive)
307
+ pattern = r'\b' + re.escape(term) + r'\b'
308
+ for match in re.finditer(pattern, content, re.IGNORECASE):
309
+ term_idx = match.start()
310
+
311
+ # Extract 100-char window around term
312
+ start = max(0, term_idx - 100)
313
+ end = min(len(content), term_idx + len(term) + 100)
314
+ context_window = content[start:end]
315
+
316
+ contexts.append({
317
+ 'memory_id': memory_id,
318
+ 'context': context_window
319
+ })
320
+
321
+ # Analyze contexts to extract meaning (need at least 3 examples)
322
+ if len(contexts) >= 3:
323
+ definition = self._extract_definition(term, contexts)
324
+
325
+ if definition:
326
+ evidence_list = list(set([ctx['memory_id'] for ctx in contexts]))
327
+
328
+ # Confidence increases with more examples, capped at 0.95
329
+ confidence = min(0.95, 0.6 + (len(contexts) * 0.05))
330
+
331
+ patterns[term] = {
332
+ 'pattern_type': 'terminology',
333
+ 'key': term,
334
+ 'value': definition,
335
+ 'confidence': round(confidence, 2),
336
+ 'evidence_count': len(evidence_list),
337
+ 'memory_ids': evidence_list,
338
+ 'category': 'general'
339
+ }
340
+
341
+ conn.close()
342
+ return patterns
343
+
344
+ def _extract_definition(self, term: str, contexts: List[Dict]) -> Optional[str]:
345
+ """Extract definition from contexts using pattern matching."""
346
+ # Collect words near the term across all contexts
347
+ nearby_words = []
348
+
349
+ for ctx in contexts:
350
+ words = re.findall(r'\b\w+\b', ctx['context'].lower())
351
+ nearby_words.extend(words)
352
+
353
+ # Count word frequencies
354
+ word_counts = Counter(nearby_words)
355
+
356
+ # Remove the term itself and common stopwords
357
+ stopwords = {'the', 'a', 'an', 'is', 'to', 'for', 'of', 'in', 'on', 'at',
358
+ 'and', 'or', 'but', 'with', 'from', 'by', 'this', 'that'}
359
+ word_counts = Counter({w: c for w, c in word_counts.items()
360
+ if w not in stopwords and w != term.lower()})
361
+
362
+ # Get top co-occurring words
363
+ top_words = [w for w, _ in word_counts.most_common(8)]
364
+
365
+ # Apply heuristic rules based on term and context
366
+ if term == 'optimize':
367
+ if any(w in top_words for w in ['performance', 'speed', 'faster', 'latency']):
368
+ return "Performance optimization (speed/latency)"
369
+ elif any(w in top_words for w in ['code', 'clean', 'refactor']):
370
+ return "Code quality optimization"
371
+
372
+ elif term == 'refactor':
373
+ if any(w in top_words for w in ['architecture', 'structure', 'design']):
374
+ return "Architecture change, not just renaming"
375
+ elif any(w in top_words for w in ['clean', 'organize', 'simplify']):
376
+ return "Code organization improvement"
377
+
378
+ elif term == 'mvp':
379
+ if any(w in top_words for w in ['core', 'basic', 'essential', 'minimal']):
380
+ return "Core features only, no polish"
381
+
382
+ elif term == 'production-ready':
383
+ if any(w in top_words for w in ['test', 'error', 'monitoring', 'deploy']):
384
+ return "Fully tested and monitored for deployment"
385
+
386
+ # Generic definition if specific pattern not matched
387
+ if len(top_words) >= 3:
388
+ return f"Commonly used with: {', '.join(top_words[:3])}"
389
+
390
+ return None
391
+
392
+
393
+ class ConfidenceScorer:
394
+ """Calculates and tracks pattern confidence scores."""
395
+
396
+ def __init__(self, db_path: Path):
397
+ self.db_path = db_path
398
+
399
+ def calculate_confidence(
400
+ self,
401
+ pattern_type: str,
402
+ key: str,
403
+ value: str,
404
+ evidence_memory_ids: List[int],
405
+ total_memories: int
406
+ ) -> float:
407
+ """Calculate confidence score for a pattern."""
408
+ if total_memories == 0 or not evidence_memory_ids:
409
+ return 0.0
410
+
411
+ # Base confidence: % of memories supporting this
412
+ base_confidence = len(evidence_memory_ids) / total_memories
413
+
414
+ # Consistency check: recency bonus
415
+ recency_bonus = self._calculate_recency_bonus(evidence_memory_ids)
416
+
417
+ # Distribution check: are memories spread over time or clustered?
418
+ distribution_factor = self._calculate_distribution_factor(evidence_memory_ids)
419
+
420
+ # Final confidence
421
+ confidence = base_confidence * recency_bonus * distribution_factor
422
+
423
+ return min(1.0, confidence) # Cap at 1.0
424
+
425
+ def _calculate_recency_bonus(self, memory_ids: List[int]) -> float:
426
+ """Give bonus to patterns with recent evidence."""
427
+ conn = sqlite3.connect(self.db_path)
428
+ cursor = conn.cursor()
429
+
430
+ # Get timestamps
431
+ placeholders = ','.join('?' * len(memory_ids))
432
+ cursor.execute(f'''
433
+ SELECT created_at FROM memories
434
+ WHERE id IN ({placeholders})
435
+ ORDER BY created_at DESC
436
+ ''', memory_ids)
437
+
438
+ timestamps = cursor.fetchall()
439
+ conn.close()
440
+
441
+ if not timestamps:
442
+ return 1.0
443
+
444
+ # Check if any memories are from last 30 days
445
+ recent_count = 0
446
+ cutoff = datetime.now() - timedelta(days=30)
447
+
448
+ for ts_tuple in timestamps:
449
+ ts_str = ts_tuple[0]
450
+ try:
451
+ ts = datetime.fromisoformat(ts_str.replace(' ', 'T'))
452
+ if ts > cutoff:
453
+ recent_count += 1
454
+ except (ValueError, AttributeError):
455
+ pass
456
+
457
+ # Bonus if >50% are recent
458
+ if len(timestamps) > 0 and recent_count / len(timestamps) > 0.5:
459
+ return 1.2
460
+ else:
461
+ return 1.0
462
+
463
+ def _calculate_distribution_factor(self, memory_ids: List[int]) -> float:
464
+ """Better confidence if memories are distributed over time, not just one session."""
465
+ if len(memory_ids) < 3:
466
+ return 0.8 # Penalize low sample size
467
+
468
+ conn = sqlite3.connect(self.db_path)
469
+ cursor = conn.cursor()
470
+
471
+ placeholders = ','.join('?' * len(memory_ids))
472
+ cursor.execute(f'''
473
+ SELECT created_at FROM memories
474
+ WHERE id IN ({placeholders})
475
+ ORDER BY created_at
476
+ ''', memory_ids)
477
+
478
+ timestamps = [row[0] for row in cursor.fetchall()]
479
+ conn.close()
480
+
481
+ if len(timestamps) < 2:
482
+ return 0.8
483
+
484
+ try:
485
+ # Parse timestamps
486
+ dates = []
487
+ for ts_str in timestamps:
488
+ try:
489
+ ts = datetime.fromisoformat(ts_str.replace(' ', 'T'))
490
+ dates.append(ts)
491
+ except (ValueError, AttributeError):
492
+ pass
493
+
494
+ if len(dates) < 2:
495
+ return 0.8
496
+
497
+ # Calculate time span
498
+ time_span = (dates[-1] - dates[0]).days
499
+
500
+ # If memories span multiple days, higher confidence
501
+ if time_span > 7:
502
+ return 1.1
503
+ elif time_span > 1:
504
+ return 1.0
505
+ else:
506
+ return 0.9 # All on same day = might be one-off
507
+
508
+ except Exception:
509
+ return 1.0
510
+
511
+
512
+ class PatternStore:
513
+ """Handles pattern storage and retrieval."""
514
+
515
+ def __init__(self, db_path: Path):
516
+ self.db_path = db_path
517
+ self._init_tables()
518
+
519
+ def _init_tables(self):
520
+ """Initialize pattern tables if they don't exist."""
521
+ conn = sqlite3.connect(self.db_path)
522
+ cursor = conn.cursor()
523
+
524
+ # Identity patterns table
525
+ cursor.execute('''
526
+ CREATE TABLE IF NOT EXISTS identity_patterns (
527
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
528
+ pattern_type TEXT NOT NULL,
529
+ key TEXT NOT NULL,
530
+ value TEXT NOT NULL,
531
+ confidence REAL DEFAULT 0.5,
532
+ evidence_count INTEGER DEFAULT 1,
533
+ memory_ids TEXT,
534
+ category TEXT,
535
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
536
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
537
+ UNIQUE(pattern_type, key, category)
538
+ )
539
+ ''')
540
+
541
+ # Pattern examples table
542
+ cursor.execute('''
543
+ CREATE TABLE IF NOT EXISTS pattern_examples (
544
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
545
+ pattern_id INTEGER NOT NULL,
546
+ memory_id INTEGER NOT NULL,
547
+ example_text TEXT,
548
+ FOREIGN KEY (pattern_id) REFERENCES identity_patterns(id) ON DELETE CASCADE,
549
+ FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
550
+ )
551
+ ''')
552
+
553
+ # Indexes
554
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_pattern_type ON identity_patterns(pattern_type)')
555
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_pattern_confidence ON identity_patterns(confidence)')
556
+
557
+ conn.commit()
558
+ conn.close()
559
+
560
+ def save_pattern(self, pattern: Dict[str, Any]) -> int:
561
+ """Save or update a pattern."""
562
+ conn = sqlite3.connect(self.db_path)
563
+ cursor = conn.cursor()
564
+
565
+ try:
566
+ # Check if pattern exists
567
+ cursor.execute('''
568
+ SELECT id FROM identity_patterns
569
+ WHERE pattern_type = ? AND key = ? AND category = ?
570
+ ''', (pattern['pattern_type'], pattern['key'], pattern['category']))
571
+
572
+ existing = cursor.fetchone()
573
+
574
+ memory_ids_json = json.dumps(pattern['memory_ids'])
575
+
576
+ if existing:
577
+ # Update existing pattern
578
+ pattern_id = existing[0]
579
+ cursor.execute('''
580
+ UPDATE identity_patterns
581
+ SET value = ?, confidence = ?, evidence_count = ?,
582
+ memory_ids = ?, updated_at = CURRENT_TIMESTAMP
583
+ WHERE id = ?
584
+ ''', (
585
+ pattern['value'],
586
+ pattern['confidence'],
587
+ pattern['evidence_count'],
588
+ memory_ids_json,
589
+ pattern_id
590
+ ))
591
+ else:
592
+ # Insert new pattern
593
+ cursor.execute('''
594
+ INSERT INTO identity_patterns
595
+ (pattern_type, key, value, confidence, evidence_count, memory_ids, category)
596
+ VALUES (?, ?, ?, ?, ?, ?, ?)
597
+ ''', (
598
+ pattern['pattern_type'],
599
+ pattern['key'],
600
+ pattern['value'],
601
+ pattern['confidence'],
602
+ pattern['evidence_count'],
603
+ memory_ids_json,
604
+ pattern['category']
605
+ ))
606
+ pattern_id = cursor.lastrowid
607
+
608
+ # Save examples
609
+ self._save_pattern_examples(cursor, pattern_id, pattern['memory_ids'], pattern['key'])
610
+
611
+ conn.commit()
612
+ return pattern_id
613
+
614
+ finally:
615
+ conn.close()
616
+
617
+ def _save_pattern_examples(self, cursor, pattern_id: int, memory_ids: List[int], key: str):
618
+ """Save representative examples for pattern."""
619
+ # Clear old examples
620
+ cursor.execute('DELETE FROM pattern_examples WHERE pattern_id = ?', (pattern_id,))
621
+
622
+ # Save top 3 examples
623
+ for memory_id in memory_ids[:3]:
624
+ cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
625
+ row = cursor.fetchone()
626
+
627
+ if row:
628
+ content = row[0]
629
+ excerpt = self._extract_relevant_excerpt(content, key)
630
+
631
+ cursor.execute('''
632
+ INSERT INTO pattern_examples (pattern_id, memory_id, example_text)
633
+ VALUES (?, ?, ?)
634
+ ''', (pattern_id, memory_id, excerpt))
635
+
636
+ def _extract_relevant_excerpt(self, content: str, key: str) -> str:
637
+ """Extract 150-char excerpt showing pattern."""
638
+ # Find first mention of key term
639
+ key_lower = key.lower().replace('_', ' ')
640
+ idx = content.lower().find(key_lower)
641
+
642
+ if idx >= 0:
643
+ start = max(0, idx - 50)
644
+ end = min(len(content), idx + 100)
645
+ excerpt = content[start:end]
646
+ return excerpt if len(excerpt) <= 150 else excerpt[:150] + '...'
647
+
648
+ # Fallback: first 150 chars
649
+ return content[:150] + ('...' if len(content) > 150 else '')
650
+
651
+ def get_patterns(self, min_confidence: float = 0.7, pattern_type: Optional[str] = None) -> List[Dict[str, Any]]:
652
+ """Get patterns above confidence threshold."""
653
+ conn = sqlite3.connect(self.db_path)
654
+ cursor = conn.cursor()
655
+
656
+ if pattern_type:
657
+ cursor.execute('''
658
+ SELECT id, pattern_type, pattern_key, pattern_value, confidence, frequency, last_seen, created_at
659
+ FROM identity_patterns
660
+ WHERE confidence >= ? AND pattern_type = ?
661
+ ORDER BY confidence DESC, frequency DESC
662
+ ''', (min_confidence, pattern_type))
663
+ else:
664
+ cursor.execute('''
665
+ SELECT id, pattern_type, pattern_key, pattern_value, confidence, frequency, last_seen, created_at
666
+ FROM identity_patterns
667
+ WHERE confidence >= ?
668
+ ORDER BY confidence DESC, frequency DESC
669
+ ''', (min_confidence,))
670
+
671
+ patterns = []
672
+ for row in cursor.fetchall():
673
+ patterns.append({
674
+ 'id': row[0],
675
+ 'pattern_type': row[1],
676
+ 'key': row[2],
677
+ 'value': row[3],
678
+ 'confidence': row[4],
679
+ 'frequency': row[5],
680
+ 'last_seen': row[6],
681
+ 'created_at': row[7]
682
+ })
683
+
684
+ conn.close()
685
+ return patterns
686
+
687
+
688
+ class PatternLearner:
689
+ """Main pattern learning orchestrator."""
690
+
691
+ def __init__(self, db_path: Path = DB_PATH):
692
+ self.db_path = db_path
693
+ self.frequency_analyzer = FrequencyAnalyzer(db_path)
694
+ self.context_analyzer = ContextAnalyzer(db_path)
695
+ self.terminology_learner = TerminologyLearner(db_path)
696
+ self.confidence_scorer = ConfidenceScorer(db_path)
697
+ self.pattern_store = PatternStore(db_path)
698
+
699
+ def weekly_pattern_update(self) -> Dict[str, int]:
700
+ """Full pattern analysis of all memories. Run this weekly."""
701
+ print("Starting weekly pattern update...")
702
+
703
+ # Get all memory IDs
704
+ conn = sqlite3.connect(self.db_path)
705
+ cursor = conn.cursor()
706
+ cursor.execute('SELECT id FROM memories ORDER BY created_at')
707
+ all_memory_ids = [row[0] for row in cursor.fetchall()]
708
+ total_memories = len(all_memory_ids)
709
+ conn.close()
710
+
711
+ if total_memories == 0:
712
+ print("No memories found. Add memories first.")
713
+ return {'preferences': 0, 'styles': 0, 'terminology': 0}
714
+
715
+ print(f"Analyzing {total_memories} memories...")
716
+
717
+ # Run all analyzers
718
+ preferences = self.frequency_analyzer.analyze_preferences(all_memory_ids)
719
+ print(f" Found {len(preferences)} preference patterns")
720
+
721
+ styles = self.context_analyzer.analyze_style(all_memory_ids)
722
+ print(f" Found {len(styles)} style patterns")
723
+
724
+ terms = self.terminology_learner.learn_terminology(all_memory_ids)
725
+ print(f" Found {len(terms)} terminology patterns")
726
+
727
+ # Recalculate confidence scores and save all patterns
728
+ counts = {'preferences': 0, 'styles': 0, 'terminology': 0}
729
+
730
+ for pattern in preferences.values():
731
+ confidence = self.confidence_scorer.calculate_confidence(
732
+ pattern['pattern_type'],
733
+ pattern['key'],
734
+ pattern['value'],
735
+ pattern['memory_ids'],
736
+ total_memories
737
+ )
738
+ pattern['confidence'] = round(confidence, 2)
739
+ self.pattern_store.save_pattern(pattern)
740
+ counts['preferences'] += 1
741
+
742
+ for pattern in styles.values():
743
+ confidence = self.confidence_scorer.calculate_confidence(
744
+ pattern['pattern_type'],
745
+ pattern['key'],
746
+ pattern['value'],
747
+ pattern['memory_ids'],
748
+ total_memories
749
+ )
750
+ pattern['confidence'] = round(confidence, 2)
751
+ self.pattern_store.save_pattern(pattern)
752
+ counts['styles'] += 1
753
+
754
+ for pattern in terms.values():
755
+ confidence = self.confidence_scorer.calculate_confidence(
756
+ pattern['pattern_type'],
757
+ pattern['key'],
758
+ pattern['value'],
759
+ pattern['memory_ids'],
760
+ total_memories
761
+ )
762
+ pattern['confidence'] = round(confidence, 2)
763
+ self.pattern_store.save_pattern(pattern)
764
+ counts['terminology'] += 1
765
+
766
+ print(f"\nPattern update complete:")
767
+ print(f" {counts['preferences']} preferences")
768
+ print(f" {counts['styles']} styles")
769
+ print(f" {counts['terminology']} terminology")
770
+
771
+ return counts
772
+
773
+ def on_new_memory(self, memory_id: int):
774
+ """Incremental update when new memory is added."""
775
+ # For now, just trigger full update if memory count is low
776
+ # Future optimization: only update affected patterns
777
+ conn = sqlite3.connect(self.db_path)
778
+ cursor = conn.cursor()
779
+ cursor.execute('SELECT COUNT(*) FROM memories')
780
+ total = cursor.fetchone()[0]
781
+ conn.close()
782
+
783
+ # Only do incremental updates if we have many memories (>50)
784
+ if total > 50:
785
+ # TODO: Implement true incremental update
786
+ print(f"New memory #{memory_id} added. Run weekly_pattern_update() to update patterns.")
787
+ else:
788
+ # For small memory counts, just do full update
789
+ self.weekly_pattern_update()
790
+
791
+ def get_patterns(self, min_confidence: float = 0.7) -> List[Dict[str, Any]]:
792
+ """Query patterns above confidence threshold."""
793
+ return self.pattern_store.get_patterns(min_confidence)
794
+
795
+ def get_identity_context(self, min_confidence: float = 0.7) -> str:
796
+ """Format patterns for Claude context injection."""
797
+ patterns = self.get_patterns(min_confidence)
798
+
799
+ if not patterns:
800
+ return "## Working with User - Learned Patterns\n\nNo patterns learned yet. Add more memories to build your profile."
801
+
802
+ # Group by pattern type
803
+ sections = {
804
+ 'preference': [],
805
+ 'style': [],
806
+ 'terminology': []
807
+ }
808
+
809
+ for p in patterns:
810
+ sections[p['pattern_type']].append(
811
+ f"- **{p['key'].replace('_', ' ').title()}:** {p['value']} "
812
+ f"(confidence: {p['confidence']:.0%}, {p['evidence_count']} examples)"
813
+ )
814
+
815
+ output = "## Working with User - Learned Patterns\n\n"
816
+
817
+ if sections['preference']:
818
+ output += "**Technology Preferences:**\n" + '\n'.join(sections['preference']) + '\n\n'
819
+
820
+ if sections['style']:
821
+ output += "**Coding Style:**\n" + '\n'.join(sections['style']) + '\n\n'
822
+
823
+ if sections['terminology']:
824
+ output += "**Terminology:**\n" + '\n'.join(sections['terminology']) + '\n'
825
+
826
+ return output
827
+
828
+
829
+ # CLI Interface
830
+ if __name__ == "__main__":
831
+ import sys
832
+
833
+ learner = PatternLearner()
834
+
835
+ if len(sys.argv) < 2:
836
+ print("Pattern Learner - Identity Profile Extraction")
837
+ print("\nUsage:")
838
+ print(" python pattern_learner.py update # Full pattern update (weekly)")
839
+ print(" python pattern_learner.py list [min_conf] # List learned patterns (default: 0.7)")
840
+ print(" python pattern_learner.py context [min] # Get context for Claude")
841
+ print(" python pattern_learner.py stats # Pattern statistics")
842
+ sys.exit(0)
843
+
844
+ command = sys.argv[1]
845
+
846
+ if command == "update":
847
+ counts = learner.weekly_pattern_update()
848
+ print(f"\nTotal patterns learned: {sum(counts.values())}")
849
+
850
+ elif command == "list":
851
+ min_conf = float(sys.argv[2]) if len(sys.argv) > 2 else 0.7
852
+ patterns = learner.get_patterns(min_conf)
853
+
854
+ if not patterns:
855
+ print(f"No patterns found with confidence >= {min_conf:.0%}")
856
+ else:
857
+ print(f"\n{'Type':<15} {'Category':<12} {'Pattern':<30} {'Confidence':<12} {'Evidence':<10}")
858
+ print("-" * 95)
859
+
860
+ for p in patterns:
861
+ pattern_display = f"{p['key'].replace('_', ' ').title()}: {p['value']}"
862
+ if len(pattern_display) > 28:
863
+ pattern_display = pattern_display[:28] + "..."
864
+
865
+ print(f"{p['pattern_type']:<15} {p['category']:<12} {pattern_display:<30} "
866
+ f"{p['confidence']:>6.0%} {p['evidence_count']:<10}")
867
+
868
+ elif command == "context":
869
+ min_conf = float(sys.argv[2]) if len(sys.argv) > 2 else 0.7
870
+ context = learner.get_identity_context(min_conf)
871
+ print(context)
872
+
873
+ elif command == "stats":
874
+ patterns = learner.get_patterns(0.5) # Include all patterns
875
+
876
+ if not patterns:
877
+ print("No patterns learned yet.")
878
+ else:
879
+ by_type = Counter([p['pattern_type'] for p in patterns])
880
+ by_category = Counter([p['category'] for p in patterns])
881
+
882
+ avg_confidence = sum(p['confidence'] for p in patterns) / len(patterns)
883
+ high_conf = len([p for p in patterns if p['confidence'] >= 0.8])
884
+
885
+ print(f"\nPattern Statistics:")
886
+ print(f" Total patterns: {len(patterns)}")
887
+ print(f" Average confidence: {avg_confidence:.0%}")
888
+ print(f" High confidence (>=80%): {high_conf}")
889
+ print(f"\nBy Type:")
890
+ for ptype, count in by_type.most_common():
891
+ print(f" {ptype}: {count}")
892
+ print(f"\nBy Category:")
893
+ for cat, count in by_category.most_common():
894
+ print(f" {cat}: {count}")
895
+
896
+ else:
897
+ print(f"Unknown command: {command}")
898
+ sys.exit(1)