superlocalmemory 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ATTRIBUTION.md +140 -0
- package/CHANGELOG.md +1749 -0
- package/LICENSE +21 -0
- package/README.md +600 -0
- package/bin/aider-smart +72 -0
- package/bin/slm +202 -0
- package/bin/slm-npm +73 -0
- package/bin/slm.bat +195 -0
- package/bin/slm.cmd +10 -0
- package/bin/superlocalmemoryv2:list +3 -0
- package/bin/superlocalmemoryv2:profile +3 -0
- package/bin/superlocalmemoryv2:recall +3 -0
- package/bin/superlocalmemoryv2:remember +3 -0
- package/bin/superlocalmemoryv2:reset +3 -0
- package/bin/superlocalmemoryv2:status +3 -0
- package/completions/slm.bash +58 -0
- package/completions/slm.zsh +76 -0
- package/configs/antigravity-mcp.json +13 -0
- package/configs/chatgpt-desktop-mcp.json +7 -0
- package/configs/claude-desktop-mcp.json +15 -0
- package/configs/codex-mcp.toml +13 -0
- package/configs/cody-commands.json +29 -0
- package/configs/continue-mcp.yaml +14 -0
- package/configs/continue-skills.yaml +26 -0
- package/configs/cursor-mcp.json +15 -0
- package/configs/gemini-cli-mcp.json +11 -0
- package/configs/jetbrains-mcp.json +11 -0
- package/configs/opencode-mcp.json +12 -0
- package/configs/perplexity-mcp.json +9 -0
- package/configs/vscode-copilot-mcp.json +12 -0
- package/configs/windsurf-mcp.json +16 -0
- package/configs/zed-mcp.json +12 -0
- package/docs/ARCHITECTURE.md +877 -0
- package/docs/CLI-COMMANDS-REFERENCE.md +425 -0
- package/docs/COMPETITIVE-ANALYSIS.md +210 -0
- package/docs/COMPRESSION-README.md +390 -0
- package/docs/GRAPH-ENGINE.md +503 -0
- package/docs/MCP-MANUAL-SETUP.md +720 -0
- package/docs/MCP-TROUBLESHOOTING.md +787 -0
- package/docs/PATTERN-LEARNING.md +363 -0
- package/docs/PROFILES-GUIDE.md +453 -0
- package/docs/RESET-GUIDE.md +353 -0
- package/docs/SEARCH-ENGINE-V2.2.0.md +748 -0
- package/docs/SEARCH-INTEGRATION-GUIDE.md +502 -0
- package/docs/UI-SERVER.md +254 -0
- package/docs/UNIVERSAL-INTEGRATION.md +432 -0
- package/docs/V2.2.0-OPTIONAL-SEARCH.md +666 -0
- package/docs/WINDOWS-INSTALL-README.txt +34 -0
- package/docs/WINDOWS-POST-INSTALL.txt +45 -0
- package/docs/example_graph_usage.py +148 -0
- package/hooks/memory-list-skill.js +130 -0
- package/hooks/memory-profile-skill.js +284 -0
- package/hooks/memory-recall-skill.js +109 -0
- package/hooks/memory-remember-skill.js +127 -0
- package/hooks/memory-reset-skill.js +274 -0
- package/install-skills.sh +436 -0
- package/install.ps1 +417 -0
- package/install.sh +755 -0
- package/mcp_server.py +585 -0
- package/package.json +94 -0
- package/requirements-core.txt +24 -0
- package/requirements.txt +10 -0
- package/scripts/postinstall.js +126 -0
- package/scripts/preuninstall.js +57 -0
- package/skills/slm-build-graph/SKILL.md +423 -0
- package/skills/slm-list-recent/SKILL.md +348 -0
- package/skills/slm-recall/SKILL.md +325 -0
- package/skills/slm-remember/SKILL.md +194 -0
- package/skills/slm-status/SKILL.md +363 -0
- package/skills/slm-switch-profile/SKILL.md +442 -0
- package/src/__pycache__/cache_manager.cpython-312.pyc +0 -0
- package/src/__pycache__/embedding_engine.cpython-312.pyc +0 -0
- package/src/__pycache__/graph_engine.cpython-312.pyc +0 -0
- package/src/__pycache__/hnsw_index.cpython-312.pyc +0 -0
- package/src/__pycache__/hybrid_search.cpython-312.pyc +0 -0
- package/src/__pycache__/memory-profiles.cpython-312.pyc +0 -0
- package/src/__pycache__/memory-reset.cpython-312.pyc +0 -0
- package/src/__pycache__/memory_compression.cpython-312.pyc +0 -0
- package/src/__pycache__/memory_store_v2.cpython-312.pyc +0 -0
- package/src/__pycache__/migrate_v1_to_v2.cpython-312.pyc +0 -0
- package/src/__pycache__/pattern_learner.cpython-312.pyc +0 -0
- package/src/__pycache__/query_optimizer.cpython-312.pyc +0 -0
- package/src/__pycache__/search_engine_v2.cpython-312.pyc +0 -0
- package/src/__pycache__/setup_validator.cpython-312.pyc +0 -0
- package/src/__pycache__/tree_manager.cpython-312.pyc +0 -0
- package/src/cache_manager.py +520 -0
- package/src/embedding_engine.py +671 -0
- package/src/graph_engine.py +970 -0
- package/src/hnsw_index.py +626 -0
- package/src/hybrid_search.py +693 -0
- package/src/memory-profiles.py +518 -0
- package/src/memory-reset.py +485 -0
- package/src/memory_compression.py +999 -0
- package/src/memory_store_v2.py +1088 -0
- package/src/migrate_v1_to_v2.py +638 -0
- package/src/pattern_learner.py +898 -0
- package/src/query_optimizer.py +513 -0
- package/src/search_engine_v2.py +403 -0
- package/src/setup_validator.py +479 -0
- package/src/tree_manager.py +720 -0
|
@@ -0,0 +1,898 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
SuperLocalMemory V2 - Intelligent Local Memory System
|
|
4
|
+
Copyright (c) 2026 Varun Pratap Bhardwaj
|
|
5
|
+
Licensed under MIT License
|
|
6
|
+
|
|
7
|
+
Repository: https://github.com/varun369/SuperLocalMemoryV2
|
|
8
|
+
Author: Varun Pratap Bhardwaj (Solution Architect)
|
|
9
|
+
|
|
10
|
+
NOTICE: This software is protected by MIT License.
|
|
11
|
+
Attribution must be preserved in all copies or derivatives.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
Pattern Learner - Identity Profile Extraction (Layer 4)
|
|
16
|
+
|
|
17
|
+
Learns user preferences, coding style, and terminology patterns from memories.
|
|
18
|
+
Uses local TF-IDF, frequency analysis, and heuristics - NO EXTERNAL APIs.
|
|
19
|
+
|
|
20
|
+
Based on architecture: docs/architecture/05-pattern-learner.md
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import sqlite3
|
|
24
|
+
import json
|
|
25
|
+
import re
|
|
26
|
+
from datetime import datetime, timedelta
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import Dict, List, Optional, Any, Counter as CounterType
|
|
29
|
+
from collections import Counter
|
|
30
|
+
|
|
31
|
+
# Local NLP tools (no external APIs)
|
|
32
|
+
try:
|
|
33
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
34
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
35
|
+
import numpy as np
|
|
36
|
+
SKLEARN_AVAILABLE = True
|
|
37
|
+
except ImportError:
|
|
38
|
+
SKLEARN_AVAILABLE = False
|
|
39
|
+
|
|
40
|
+
MEMORY_DIR = Path.home() / ".claude-memory"
|
|
41
|
+
DB_PATH = MEMORY_DIR / "memory.db"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class FrequencyAnalyzer:
|
|
45
|
+
"""Analyzes technology and tool preferences via frequency counting."""
|
|
46
|
+
|
|
47
|
+
def __init__(self, db_path: Path):
|
|
48
|
+
self.db_path = db_path
|
|
49
|
+
|
|
50
|
+
# Predefined technology categories
|
|
51
|
+
self.tech_categories = {
|
|
52
|
+
'frontend_framework': ['react', 'nextjs', 'next.js', 'vue', 'angular', 'svelte', 'solid'],
|
|
53
|
+
'backend_framework': ['express', 'fastapi', 'django', 'flask', 'nestjs', 'spring', 'rails'],
|
|
54
|
+
'database': ['postgres', 'postgresql', 'mysql', 'mongodb', 'redis', 'dynamodb', 'sqlite'],
|
|
55
|
+
'state_management': ['redux', 'context', 'zustand', 'mobx', 'recoil', 'jotai'],
|
|
56
|
+
'styling': ['tailwind', 'css modules', 'styled-components', 'emotion', 'sass', 'less'],
|
|
57
|
+
'language': ['python', 'javascript', 'typescript', 'go', 'rust', 'java', 'c++'],
|
|
58
|
+
'deployment': ['docker', 'kubernetes', 'vercel', 'netlify', 'aws', 'gcp', 'azure'],
|
|
59
|
+
'testing': ['jest', 'pytest', 'vitest', 'mocha', 'cypress', 'playwright'],
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
def analyze_preferences(self, memory_ids: List[int]) -> Dict[str, Dict[str, Any]]:
|
|
63
|
+
"""Analyze technology preferences across memories."""
|
|
64
|
+
patterns = {}
|
|
65
|
+
|
|
66
|
+
conn = sqlite3.connect(self.db_path)
|
|
67
|
+
cursor = conn.cursor()
|
|
68
|
+
|
|
69
|
+
for category, keywords in self.tech_categories.items():
|
|
70
|
+
keyword_counts = Counter()
|
|
71
|
+
evidence_memories = {} # {keyword: [memory_ids]}
|
|
72
|
+
|
|
73
|
+
for memory_id in memory_ids:
|
|
74
|
+
cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
|
|
75
|
+
row = cursor.fetchone()
|
|
76
|
+
|
|
77
|
+
if not row:
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
content = row[0].lower()
|
|
81
|
+
|
|
82
|
+
for keyword in keywords:
|
|
83
|
+
# Count occurrences with word boundaries
|
|
84
|
+
pattern = r'\b' + re.escape(keyword.replace('.', r'\.')) + r'\b'
|
|
85
|
+
matches = re.findall(pattern, content, re.IGNORECASE)
|
|
86
|
+
count = len(matches)
|
|
87
|
+
|
|
88
|
+
if count > 0:
|
|
89
|
+
keyword_counts[keyword] += count
|
|
90
|
+
|
|
91
|
+
if keyword not in evidence_memories:
|
|
92
|
+
evidence_memories[keyword] = []
|
|
93
|
+
evidence_memories[keyword].append(memory_id)
|
|
94
|
+
|
|
95
|
+
# Determine preference (most mentioned)
|
|
96
|
+
if keyword_counts:
|
|
97
|
+
top_keyword = keyword_counts.most_common(1)[0][0]
|
|
98
|
+
total_mentions = sum(keyword_counts.values())
|
|
99
|
+
top_count = keyword_counts[top_keyword]
|
|
100
|
+
|
|
101
|
+
# Calculate confidence (% of mentions)
|
|
102
|
+
confidence = top_count / total_mentions if total_mentions > 0 else 0
|
|
103
|
+
|
|
104
|
+
# Only create pattern if confidence > 0.6 and at least 3 mentions
|
|
105
|
+
if confidence > 0.6 and top_count >= 3:
|
|
106
|
+
value = self._format_preference(top_keyword, keyword_counts)
|
|
107
|
+
evidence_list = list(set(evidence_memories.get(top_keyword, [])))
|
|
108
|
+
|
|
109
|
+
patterns[category] = {
|
|
110
|
+
'pattern_type': 'preference',
|
|
111
|
+
'key': category,
|
|
112
|
+
'value': value,
|
|
113
|
+
'confidence': round(confidence, 2),
|
|
114
|
+
'evidence_count': len(evidence_list),
|
|
115
|
+
'memory_ids': evidence_list,
|
|
116
|
+
'category': self._categorize_pattern(category)
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
conn.close()
|
|
120
|
+
return patterns
|
|
121
|
+
|
|
122
|
+
def _format_preference(self, top_keyword: str, all_counts: Counter) -> str:
|
|
123
|
+
"""Format preference value (e.g., 'Next.js over React')."""
|
|
124
|
+
# Normalize keyword for display
|
|
125
|
+
display_map = {
|
|
126
|
+
'nextjs': 'Next.js',
|
|
127
|
+
'next.js': 'Next.js',
|
|
128
|
+
'postgres': 'PostgreSQL',
|
|
129
|
+
'postgresql': 'PostgreSQL',
|
|
130
|
+
'fastapi': 'FastAPI',
|
|
131
|
+
'nestjs': 'NestJS',
|
|
132
|
+
'mongodb': 'MongoDB',
|
|
133
|
+
'redis': 'Redis',
|
|
134
|
+
'dynamodb': 'DynamoDB',
|
|
135
|
+
'tailwind': 'Tailwind CSS',
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
top_display = display_map.get(top_keyword.lower(), top_keyword.title())
|
|
139
|
+
|
|
140
|
+
if len(all_counts) > 1:
|
|
141
|
+
second = all_counts.most_common(2)[1]
|
|
142
|
+
second_keyword = second[0]
|
|
143
|
+
second_display = display_map.get(second_keyword.lower(), second_keyword.title())
|
|
144
|
+
|
|
145
|
+
# Only show comparison if second choice has significant mentions
|
|
146
|
+
if second[1] / all_counts[top_keyword] > 0.3:
|
|
147
|
+
return f"{top_display} over {second_display}"
|
|
148
|
+
|
|
149
|
+
return top_display
|
|
150
|
+
|
|
151
|
+
def _categorize_pattern(self, tech_category: str) -> str:
|
|
152
|
+
"""Map tech category to high-level category."""
|
|
153
|
+
category_map = {
|
|
154
|
+
'frontend_framework': 'frontend',
|
|
155
|
+
'state_management': 'frontend',
|
|
156
|
+
'styling': 'frontend',
|
|
157
|
+
'backend_framework': 'backend',
|
|
158
|
+
'database': 'backend',
|
|
159
|
+
'language': 'general',
|
|
160
|
+
'deployment': 'devops',
|
|
161
|
+
'testing': 'general',
|
|
162
|
+
}
|
|
163
|
+
return category_map.get(tech_category, 'general')
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class ContextAnalyzer:
|
|
167
|
+
"""Analyzes coding style patterns from context."""
|
|
168
|
+
|
|
169
|
+
def __init__(self, db_path: Path):
|
|
170
|
+
self.db_path = db_path
|
|
171
|
+
|
|
172
|
+
# Style pattern detection rules
|
|
173
|
+
self.style_indicators = {
|
|
174
|
+
'optimization_priority': {
|
|
175
|
+
'performance': ['optimize', 'faster', 'performance', 'speed', 'latency', 'efficient', 'cache'],
|
|
176
|
+
'readability': ['readable', 'clean', 'maintainable', 'clear', 'simple', 'understandable']
|
|
177
|
+
},
|
|
178
|
+
'error_handling': {
|
|
179
|
+
'explicit': ['error boundary', 'explicit', 'throw', 'handle error', 'try catch', 'error handling'],
|
|
180
|
+
'permissive': ['ignore', 'suppress', 'skip error', 'optional']
|
|
181
|
+
},
|
|
182
|
+
'testing_approach': {
|
|
183
|
+
'comprehensive': ['test coverage', 'unit test', 'integration test', 'e2e test', 'test suite'],
|
|
184
|
+
'minimal': ['manual test', 'skip test', 'no tests']
|
|
185
|
+
},
|
|
186
|
+
'code_organization': {
|
|
187
|
+
'modular': ['separate', 'module', 'component', 'split', 'refactor', 'extract'],
|
|
188
|
+
'monolithic': ['single file', 'one place', 'combined']
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
def analyze_style(self, memory_ids: List[int]) -> Dict[str, Dict[str, Any]]:
|
|
193
|
+
"""Detect stylistic patterns from context."""
|
|
194
|
+
patterns = {}
|
|
195
|
+
|
|
196
|
+
conn = sqlite3.connect(self.db_path)
|
|
197
|
+
cursor = conn.cursor()
|
|
198
|
+
|
|
199
|
+
for pattern_key, indicators in self.style_indicators.items():
|
|
200
|
+
indicator_counts = Counter()
|
|
201
|
+
evidence_memories = {} # {style_type: [memory_ids]}
|
|
202
|
+
|
|
203
|
+
for memory_id in memory_ids:
|
|
204
|
+
cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
|
|
205
|
+
row = cursor.fetchone()
|
|
206
|
+
|
|
207
|
+
if not row:
|
|
208
|
+
continue
|
|
209
|
+
|
|
210
|
+
content = row[0].lower()
|
|
211
|
+
|
|
212
|
+
for style_type, keywords in indicators.items():
|
|
213
|
+
for keyword in keywords:
|
|
214
|
+
if keyword in content:
|
|
215
|
+
indicator_counts[style_type] += 1
|
|
216
|
+
|
|
217
|
+
if style_type not in evidence_memories:
|
|
218
|
+
evidence_memories[style_type] = []
|
|
219
|
+
evidence_memories[style_type].append(memory_id)
|
|
220
|
+
|
|
221
|
+
# Determine dominant style
|
|
222
|
+
if indicator_counts:
|
|
223
|
+
top_style = indicator_counts.most_common(1)[0][0]
|
|
224
|
+
total = sum(indicator_counts.values())
|
|
225
|
+
top_count = indicator_counts[top_style]
|
|
226
|
+
confidence = top_count / total if total > 0 else 0
|
|
227
|
+
|
|
228
|
+
# Only create pattern if confidence > 0.65 and at least 3 mentions
|
|
229
|
+
if confidence > 0.65 and top_count >= 3:
|
|
230
|
+
value = self._format_style_value(pattern_key, top_style, indicator_counts)
|
|
231
|
+
evidence_list = list(set(evidence_memories.get(top_style, [])))
|
|
232
|
+
|
|
233
|
+
patterns[pattern_key] = {
|
|
234
|
+
'pattern_type': 'style',
|
|
235
|
+
'key': pattern_key,
|
|
236
|
+
'value': value,
|
|
237
|
+
'confidence': round(confidence, 2),
|
|
238
|
+
'evidence_count': len(evidence_list),
|
|
239
|
+
'memory_ids': evidence_list,
|
|
240
|
+
'category': 'general'
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
conn.close()
|
|
244
|
+
return patterns
|
|
245
|
+
|
|
246
|
+
def _format_style_value(self, pattern_key: str, top_style: str, all_counts: Counter) -> str:
|
|
247
|
+
"""Format style value as comparison or preference."""
|
|
248
|
+
style_formats = {
|
|
249
|
+
'optimization_priority': {
|
|
250
|
+
'performance': 'Performance over readability',
|
|
251
|
+
'readability': 'Readability over performance'
|
|
252
|
+
},
|
|
253
|
+
'error_handling': {
|
|
254
|
+
'explicit': 'Explicit error boundaries',
|
|
255
|
+
'permissive': 'Permissive error handling'
|
|
256
|
+
},
|
|
257
|
+
'testing_approach': {
|
|
258
|
+
'comprehensive': 'Comprehensive testing',
|
|
259
|
+
'minimal': 'Minimal testing'
|
|
260
|
+
},
|
|
261
|
+
'code_organization': {
|
|
262
|
+
'modular': 'Modular organization',
|
|
263
|
+
'monolithic': 'Monolithic organization'
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
if pattern_key in style_formats and top_style in style_formats[pattern_key]:
|
|
268
|
+
return style_formats[pattern_key][top_style]
|
|
269
|
+
|
|
270
|
+
return top_style.replace('_', ' ').title()
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
class TerminologyLearner:
|
|
274
|
+
"""Learns user-specific definitions of common terms."""
|
|
275
|
+
|
|
276
|
+
def __init__(self, db_path: Path):
|
|
277
|
+
self.db_path = db_path
|
|
278
|
+
|
|
279
|
+
# Common ambiguous terms to learn
|
|
280
|
+
self.ambiguous_terms = [
|
|
281
|
+
'optimize', 'refactor', 'clean', 'simple',
|
|
282
|
+
'mvp', 'prototype', 'scale', 'production-ready',
|
|
283
|
+
'fix', 'improve', 'update', 'enhance'
|
|
284
|
+
]
|
|
285
|
+
|
|
286
|
+
def learn_terminology(self, memory_ids: List[int]) -> Dict[str, Dict[str, Any]]:
|
|
287
|
+
"""Learn user-specific term definitions."""
|
|
288
|
+
patterns = {}
|
|
289
|
+
|
|
290
|
+
conn = sqlite3.connect(self.db_path)
|
|
291
|
+
cursor = conn.cursor()
|
|
292
|
+
|
|
293
|
+
for term in self.ambiguous_terms:
|
|
294
|
+
contexts = []
|
|
295
|
+
|
|
296
|
+
# Find all contexts where term appears
|
|
297
|
+
for memory_id in memory_ids:
|
|
298
|
+
cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
|
|
299
|
+
row = cursor.fetchone()
|
|
300
|
+
|
|
301
|
+
if not row:
|
|
302
|
+
continue
|
|
303
|
+
|
|
304
|
+
content = row[0]
|
|
305
|
+
|
|
306
|
+
# Find term in content (case-insensitive)
|
|
307
|
+
pattern = r'\b' + re.escape(term) + r'\b'
|
|
308
|
+
for match in re.finditer(pattern, content, re.IGNORECASE):
|
|
309
|
+
term_idx = match.start()
|
|
310
|
+
|
|
311
|
+
# Extract 100-char window around term
|
|
312
|
+
start = max(0, term_idx - 100)
|
|
313
|
+
end = min(len(content), term_idx + len(term) + 100)
|
|
314
|
+
context_window = content[start:end]
|
|
315
|
+
|
|
316
|
+
contexts.append({
|
|
317
|
+
'memory_id': memory_id,
|
|
318
|
+
'context': context_window
|
|
319
|
+
})
|
|
320
|
+
|
|
321
|
+
# Analyze contexts to extract meaning (need at least 3 examples)
|
|
322
|
+
if len(contexts) >= 3:
|
|
323
|
+
definition = self._extract_definition(term, contexts)
|
|
324
|
+
|
|
325
|
+
if definition:
|
|
326
|
+
evidence_list = list(set([ctx['memory_id'] for ctx in contexts]))
|
|
327
|
+
|
|
328
|
+
# Confidence increases with more examples, capped at 0.95
|
|
329
|
+
confidence = min(0.95, 0.6 + (len(contexts) * 0.05))
|
|
330
|
+
|
|
331
|
+
patterns[term] = {
|
|
332
|
+
'pattern_type': 'terminology',
|
|
333
|
+
'key': term,
|
|
334
|
+
'value': definition,
|
|
335
|
+
'confidence': round(confidence, 2),
|
|
336
|
+
'evidence_count': len(evidence_list),
|
|
337
|
+
'memory_ids': evidence_list,
|
|
338
|
+
'category': 'general'
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
conn.close()
|
|
342
|
+
return patterns
|
|
343
|
+
|
|
344
|
+
def _extract_definition(self, term: str, contexts: List[Dict]) -> Optional[str]:
|
|
345
|
+
"""Extract definition from contexts using pattern matching."""
|
|
346
|
+
# Collect words near the term across all contexts
|
|
347
|
+
nearby_words = []
|
|
348
|
+
|
|
349
|
+
for ctx in contexts:
|
|
350
|
+
words = re.findall(r'\b\w+\b', ctx['context'].lower())
|
|
351
|
+
nearby_words.extend(words)
|
|
352
|
+
|
|
353
|
+
# Count word frequencies
|
|
354
|
+
word_counts = Counter(nearby_words)
|
|
355
|
+
|
|
356
|
+
# Remove the term itself and common stopwords
|
|
357
|
+
stopwords = {'the', 'a', 'an', 'is', 'to', 'for', 'of', 'in', 'on', 'at',
|
|
358
|
+
'and', 'or', 'but', 'with', 'from', 'by', 'this', 'that'}
|
|
359
|
+
word_counts = Counter({w: c for w, c in word_counts.items()
|
|
360
|
+
if w not in stopwords and w != term.lower()})
|
|
361
|
+
|
|
362
|
+
# Get top co-occurring words
|
|
363
|
+
top_words = [w for w, _ in word_counts.most_common(8)]
|
|
364
|
+
|
|
365
|
+
# Apply heuristic rules based on term and context
|
|
366
|
+
if term == 'optimize':
|
|
367
|
+
if any(w in top_words for w in ['performance', 'speed', 'faster', 'latency']):
|
|
368
|
+
return "Performance optimization (speed/latency)"
|
|
369
|
+
elif any(w in top_words for w in ['code', 'clean', 'refactor']):
|
|
370
|
+
return "Code quality optimization"
|
|
371
|
+
|
|
372
|
+
elif term == 'refactor':
|
|
373
|
+
if any(w in top_words for w in ['architecture', 'structure', 'design']):
|
|
374
|
+
return "Architecture change, not just renaming"
|
|
375
|
+
elif any(w in top_words for w in ['clean', 'organize', 'simplify']):
|
|
376
|
+
return "Code organization improvement"
|
|
377
|
+
|
|
378
|
+
elif term == 'mvp':
|
|
379
|
+
if any(w in top_words for w in ['core', 'basic', 'essential', 'minimal']):
|
|
380
|
+
return "Core features only, no polish"
|
|
381
|
+
|
|
382
|
+
elif term == 'production-ready':
|
|
383
|
+
if any(w in top_words for w in ['test', 'error', 'monitoring', 'deploy']):
|
|
384
|
+
return "Fully tested and monitored for deployment"
|
|
385
|
+
|
|
386
|
+
# Generic definition if specific pattern not matched
|
|
387
|
+
if len(top_words) >= 3:
|
|
388
|
+
return f"Commonly used with: {', '.join(top_words[:3])}"
|
|
389
|
+
|
|
390
|
+
return None
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
class ConfidenceScorer:
|
|
394
|
+
"""Calculates and tracks pattern confidence scores."""
|
|
395
|
+
|
|
396
|
+
def __init__(self, db_path: Path):
|
|
397
|
+
self.db_path = db_path
|
|
398
|
+
|
|
399
|
+
def calculate_confidence(
|
|
400
|
+
self,
|
|
401
|
+
pattern_type: str,
|
|
402
|
+
key: str,
|
|
403
|
+
value: str,
|
|
404
|
+
evidence_memory_ids: List[int],
|
|
405
|
+
total_memories: int
|
|
406
|
+
) -> float:
|
|
407
|
+
"""Calculate confidence score for a pattern."""
|
|
408
|
+
if total_memories == 0 or not evidence_memory_ids:
|
|
409
|
+
return 0.0
|
|
410
|
+
|
|
411
|
+
# Base confidence: % of memories supporting this
|
|
412
|
+
base_confidence = len(evidence_memory_ids) / total_memories
|
|
413
|
+
|
|
414
|
+
# Consistency check: recency bonus
|
|
415
|
+
recency_bonus = self._calculate_recency_bonus(evidence_memory_ids)
|
|
416
|
+
|
|
417
|
+
# Distribution check: are memories spread over time or clustered?
|
|
418
|
+
distribution_factor = self._calculate_distribution_factor(evidence_memory_ids)
|
|
419
|
+
|
|
420
|
+
# Final confidence
|
|
421
|
+
confidence = base_confidence * recency_bonus * distribution_factor
|
|
422
|
+
|
|
423
|
+
return min(1.0, confidence) # Cap at 1.0
|
|
424
|
+
|
|
425
|
+
def _calculate_recency_bonus(self, memory_ids: List[int]) -> float:
|
|
426
|
+
"""Give bonus to patterns with recent evidence."""
|
|
427
|
+
conn = sqlite3.connect(self.db_path)
|
|
428
|
+
cursor = conn.cursor()
|
|
429
|
+
|
|
430
|
+
# Get timestamps
|
|
431
|
+
placeholders = ','.join('?' * len(memory_ids))
|
|
432
|
+
cursor.execute(f'''
|
|
433
|
+
SELECT created_at FROM memories
|
|
434
|
+
WHERE id IN ({placeholders})
|
|
435
|
+
ORDER BY created_at DESC
|
|
436
|
+
''', memory_ids)
|
|
437
|
+
|
|
438
|
+
timestamps = cursor.fetchall()
|
|
439
|
+
conn.close()
|
|
440
|
+
|
|
441
|
+
if not timestamps:
|
|
442
|
+
return 1.0
|
|
443
|
+
|
|
444
|
+
# Check if any memories are from last 30 days
|
|
445
|
+
recent_count = 0
|
|
446
|
+
cutoff = datetime.now() - timedelta(days=30)
|
|
447
|
+
|
|
448
|
+
for ts_tuple in timestamps:
|
|
449
|
+
ts_str = ts_tuple[0]
|
|
450
|
+
try:
|
|
451
|
+
ts = datetime.fromisoformat(ts_str.replace(' ', 'T'))
|
|
452
|
+
if ts > cutoff:
|
|
453
|
+
recent_count += 1
|
|
454
|
+
except (ValueError, AttributeError):
|
|
455
|
+
pass
|
|
456
|
+
|
|
457
|
+
# Bonus if >50% are recent
|
|
458
|
+
if len(timestamps) > 0 and recent_count / len(timestamps) > 0.5:
|
|
459
|
+
return 1.2
|
|
460
|
+
else:
|
|
461
|
+
return 1.0
|
|
462
|
+
|
|
463
|
+
def _calculate_distribution_factor(self, memory_ids: List[int]) -> float:
|
|
464
|
+
"""Better confidence if memories are distributed over time, not just one session."""
|
|
465
|
+
if len(memory_ids) < 3:
|
|
466
|
+
return 0.8 # Penalize low sample size
|
|
467
|
+
|
|
468
|
+
conn = sqlite3.connect(self.db_path)
|
|
469
|
+
cursor = conn.cursor()
|
|
470
|
+
|
|
471
|
+
placeholders = ','.join('?' * len(memory_ids))
|
|
472
|
+
cursor.execute(f'''
|
|
473
|
+
SELECT created_at FROM memories
|
|
474
|
+
WHERE id IN ({placeholders})
|
|
475
|
+
ORDER BY created_at
|
|
476
|
+
''', memory_ids)
|
|
477
|
+
|
|
478
|
+
timestamps = [row[0] for row in cursor.fetchall()]
|
|
479
|
+
conn.close()
|
|
480
|
+
|
|
481
|
+
if len(timestamps) < 2:
|
|
482
|
+
return 0.8
|
|
483
|
+
|
|
484
|
+
try:
|
|
485
|
+
# Parse timestamps
|
|
486
|
+
dates = []
|
|
487
|
+
for ts_str in timestamps:
|
|
488
|
+
try:
|
|
489
|
+
ts = datetime.fromisoformat(ts_str.replace(' ', 'T'))
|
|
490
|
+
dates.append(ts)
|
|
491
|
+
except (ValueError, AttributeError):
|
|
492
|
+
pass
|
|
493
|
+
|
|
494
|
+
if len(dates) < 2:
|
|
495
|
+
return 0.8
|
|
496
|
+
|
|
497
|
+
# Calculate time span
|
|
498
|
+
time_span = (dates[-1] - dates[0]).days
|
|
499
|
+
|
|
500
|
+
# If memories span multiple days, higher confidence
|
|
501
|
+
if time_span > 7:
|
|
502
|
+
return 1.1
|
|
503
|
+
elif time_span > 1:
|
|
504
|
+
return 1.0
|
|
505
|
+
else:
|
|
506
|
+
return 0.9 # All on same day = might be one-off
|
|
507
|
+
|
|
508
|
+
except Exception:
|
|
509
|
+
return 1.0
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
class PatternStore:
|
|
513
|
+
"""Handles pattern storage and retrieval."""
|
|
514
|
+
|
|
515
|
+
def __init__(self, db_path: Path):
|
|
516
|
+
self.db_path = db_path
|
|
517
|
+
self._init_tables()
|
|
518
|
+
|
|
519
|
+
def _init_tables(self):
|
|
520
|
+
"""Initialize pattern tables if they don't exist."""
|
|
521
|
+
conn = sqlite3.connect(self.db_path)
|
|
522
|
+
cursor = conn.cursor()
|
|
523
|
+
|
|
524
|
+
# Identity patterns table
|
|
525
|
+
cursor.execute('''
|
|
526
|
+
CREATE TABLE IF NOT EXISTS identity_patterns (
|
|
527
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
528
|
+
pattern_type TEXT NOT NULL,
|
|
529
|
+
key TEXT NOT NULL,
|
|
530
|
+
value TEXT NOT NULL,
|
|
531
|
+
confidence REAL DEFAULT 0.5,
|
|
532
|
+
evidence_count INTEGER DEFAULT 1,
|
|
533
|
+
memory_ids TEXT,
|
|
534
|
+
category TEXT,
|
|
535
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
536
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
537
|
+
UNIQUE(pattern_type, key, category)
|
|
538
|
+
)
|
|
539
|
+
''')
|
|
540
|
+
|
|
541
|
+
# Pattern examples table
|
|
542
|
+
cursor.execute('''
|
|
543
|
+
CREATE TABLE IF NOT EXISTS pattern_examples (
|
|
544
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
545
|
+
pattern_id INTEGER NOT NULL,
|
|
546
|
+
memory_id INTEGER NOT NULL,
|
|
547
|
+
example_text TEXT,
|
|
548
|
+
FOREIGN KEY (pattern_id) REFERENCES identity_patterns(id) ON DELETE CASCADE,
|
|
549
|
+
FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
|
|
550
|
+
)
|
|
551
|
+
''')
|
|
552
|
+
|
|
553
|
+
# Indexes
|
|
554
|
+
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pattern_type ON identity_patterns(pattern_type)')
|
|
555
|
+
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pattern_confidence ON identity_patterns(confidence)')
|
|
556
|
+
|
|
557
|
+
conn.commit()
|
|
558
|
+
conn.close()
|
|
559
|
+
|
|
560
|
+
def save_pattern(self, pattern: Dict[str, Any]) -> int:
|
|
561
|
+
"""Save or update a pattern."""
|
|
562
|
+
conn = sqlite3.connect(self.db_path)
|
|
563
|
+
cursor = conn.cursor()
|
|
564
|
+
|
|
565
|
+
try:
|
|
566
|
+
# Check if pattern exists
|
|
567
|
+
cursor.execute('''
|
|
568
|
+
SELECT id FROM identity_patterns
|
|
569
|
+
WHERE pattern_type = ? AND key = ? AND category = ?
|
|
570
|
+
''', (pattern['pattern_type'], pattern['key'], pattern['category']))
|
|
571
|
+
|
|
572
|
+
existing = cursor.fetchone()
|
|
573
|
+
|
|
574
|
+
memory_ids_json = json.dumps(pattern['memory_ids'])
|
|
575
|
+
|
|
576
|
+
if existing:
|
|
577
|
+
# Update existing pattern
|
|
578
|
+
pattern_id = existing[0]
|
|
579
|
+
cursor.execute('''
|
|
580
|
+
UPDATE identity_patterns
|
|
581
|
+
SET value = ?, confidence = ?, evidence_count = ?,
|
|
582
|
+
memory_ids = ?, updated_at = CURRENT_TIMESTAMP
|
|
583
|
+
WHERE id = ?
|
|
584
|
+
''', (
|
|
585
|
+
pattern['value'],
|
|
586
|
+
pattern['confidence'],
|
|
587
|
+
pattern['evidence_count'],
|
|
588
|
+
memory_ids_json,
|
|
589
|
+
pattern_id
|
|
590
|
+
))
|
|
591
|
+
else:
|
|
592
|
+
# Insert new pattern
|
|
593
|
+
cursor.execute('''
|
|
594
|
+
INSERT INTO identity_patterns
|
|
595
|
+
(pattern_type, key, value, confidence, evidence_count, memory_ids, category)
|
|
596
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
597
|
+
''', (
|
|
598
|
+
pattern['pattern_type'],
|
|
599
|
+
pattern['key'],
|
|
600
|
+
pattern['value'],
|
|
601
|
+
pattern['confidence'],
|
|
602
|
+
pattern['evidence_count'],
|
|
603
|
+
memory_ids_json,
|
|
604
|
+
pattern['category']
|
|
605
|
+
))
|
|
606
|
+
pattern_id = cursor.lastrowid
|
|
607
|
+
|
|
608
|
+
# Save examples
|
|
609
|
+
self._save_pattern_examples(cursor, pattern_id, pattern['memory_ids'], pattern['key'])
|
|
610
|
+
|
|
611
|
+
conn.commit()
|
|
612
|
+
return pattern_id
|
|
613
|
+
|
|
614
|
+
finally:
|
|
615
|
+
conn.close()
|
|
616
|
+
|
|
617
|
+
def _save_pattern_examples(self, cursor, pattern_id: int, memory_ids: List[int], key: str):
|
|
618
|
+
"""Save representative examples for pattern."""
|
|
619
|
+
# Clear old examples
|
|
620
|
+
cursor.execute('DELETE FROM pattern_examples WHERE pattern_id = ?', (pattern_id,))
|
|
621
|
+
|
|
622
|
+
# Save top 3 examples
|
|
623
|
+
for memory_id in memory_ids[:3]:
|
|
624
|
+
cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
|
|
625
|
+
row = cursor.fetchone()
|
|
626
|
+
|
|
627
|
+
if row:
|
|
628
|
+
content = row[0]
|
|
629
|
+
excerpt = self._extract_relevant_excerpt(content, key)
|
|
630
|
+
|
|
631
|
+
cursor.execute('''
|
|
632
|
+
INSERT INTO pattern_examples (pattern_id, memory_id, example_text)
|
|
633
|
+
VALUES (?, ?, ?)
|
|
634
|
+
''', (pattern_id, memory_id, excerpt))
|
|
635
|
+
|
|
636
|
+
def _extract_relevant_excerpt(self, content: str, key: str) -> str:
|
|
637
|
+
"""Extract 150-char excerpt showing pattern."""
|
|
638
|
+
# Find first mention of key term
|
|
639
|
+
key_lower = key.lower().replace('_', ' ')
|
|
640
|
+
idx = content.lower().find(key_lower)
|
|
641
|
+
|
|
642
|
+
if idx >= 0:
|
|
643
|
+
start = max(0, idx - 50)
|
|
644
|
+
end = min(len(content), idx + 100)
|
|
645
|
+
excerpt = content[start:end]
|
|
646
|
+
return excerpt if len(excerpt) <= 150 else excerpt[:150] + '...'
|
|
647
|
+
|
|
648
|
+
# Fallback: first 150 chars
|
|
649
|
+
return content[:150] + ('...' if len(content) > 150 else '')
|
|
650
|
+
|
|
651
|
+
def get_patterns(self, min_confidence: float = 0.7, pattern_type: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
652
|
+
"""Get patterns above confidence threshold."""
|
|
653
|
+
conn = sqlite3.connect(self.db_path)
|
|
654
|
+
cursor = conn.cursor()
|
|
655
|
+
|
|
656
|
+
if pattern_type:
|
|
657
|
+
cursor.execute('''
|
|
658
|
+
SELECT id, pattern_type, pattern_key, pattern_value, confidence, frequency, last_seen, created_at
|
|
659
|
+
FROM identity_patterns
|
|
660
|
+
WHERE confidence >= ? AND pattern_type = ?
|
|
661
|
+
ORDER BY confidence DESC, frequency DESC
|
|
662
|
+
''', (min_confidence, pattern_type))
|
|
663
|
+
else:
|
|
664
|
+
cursor.execute('''
|
|
665
|
+
SELECT id, pattern_type, pattern_key, pattern_value, confidence, frequency, last_seen, created_at
|
|
666
|
+
FROM identity_patterns
|
|
667
|
+
WHERE confidence >= ?
|
|
668
|
+
ORDER BY confidence DESC, frequency DESC
|
|
669
|
+
''', (min_confidence,))
|
|
670
|
+
|
|
671
|
+
patterns = []
|
|
672
|
+
for row in cursor.fetchall():
|
|
673
|
+
patterns.append({
|
|
674
|
+
'id': row[0],
|
|
675
|
+
'pattern_type': row[1],
|
|
676
|
+
'key': row[2],
|
|
677
|
+
'value': row[3],
|
|
678
|
+
'confidence': row[4],
|
|
679
|
+
'frequency': row[5],
|
|
680
|
+
'last_seen': row[6],
|
|
681
|
+
'created_at': row[7]
|
|
682
|
+
})
|
|
683
|
+
|
|
684
|
+
conn.close()
|
|
685
|
+
return patterns
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
class PatternLearner:
|
|
689
|
+
"""Main pattern learning orchestrator."""
|
|
690
|
+
|
|
691
|
+
def __init__(self, db_path: Path = DB_PATH):
|
|
692
|
+
self.db_path = db_path
|
|
693
|
+
self.frequency_analyzer = FrequencyAnalyzer(db_path)
|
|
694
|
+
self.context_analyzer = ContextAnalyzer(db_path)
|
|
695
|
+
self.terminology_learner = TerminologyLearner(db_path)
|
|
696
|
+
self.confidence_scorer = ConfidenceScorer(db_path)
|
|
697
|
+
self.pattern_store = PatternStore(db_path)
|
|
698
|
+
|
|
699
|
+
def weekly_pattern_update(self) -> Dict[str, int]:
|
|
700
|
+
"""Full pattern analysis of all memories. Run this weekly."""
|
|
701
|
+
print("Starting weekly pattern update...")
|
|
702
|
+
|
|
703
|
+
# Get all memory IDs
|
|
704
|
+
conn = sqlite3.connect(self.db_path)
|
|
705
|
+
cursor = conn.cursor()
|
|
706
|
+
cursor.execute('SELECT id FROM memories ORDER BY created_at')
|
|
707
|
+
all_memory_ids = [row[0] for row in cursor.fetchall()]
|
|
708
|
+
total_memories = len(all_memory_ids)
|
|
709
|
+
conn.close()
|
|
710
|
+
|
|
711
|
+
if total_memories == 0:
|
|
712
|
+
print("No memories found. Add memories first.")
|
|
713
|
+
return {'preferences': 0, 'styles': 0, 'terminology': 0}
|
|
714
|
+
|
|
715
|
+
print(f"Analyzing {total_memories} memories...")
|
|
716
|
+
|
|
717
|
+
# Run all analyzers
|
|
718
|
+
preferences = self.frequency_analyzer.analyze_preferences(all_memory_ids)
|
|
719
|
+
print(f" Found {len(preferences)} preference patterns")
|
|
720
|
+
|
|
721
|
+
styles = self.context_analyzer.analyze_style(all_memory_ids)
|
|
722
|
+
print(f" Found {len(styles)} style patterns")
|
|
723
|
+
|
|
724
|
+
terms = self.terminology_learner.learn_terminology(all_memory_ids)
|
|
725
|
+
print(f" Found {len(terms)} terminology patterns")
|
|
726
|
+
|
|
727
|
+
# Recalculate confidence scores and save all patterns
|
|
728
|
+
counts = {'preferences': 0, 'styles': 0, 'terminology': 0}
|
|
729
|
+
|
|
730
|
+
for pattern in preferences.values():
|
|
731
|
+
confidence = self.confidence_scorer.calculate_confidence(
|
|
732
|
+
pattern['pattern_type'],
|
|
733
|
+
pattern['key'],
|
|
734
|
+
pattern['value'],
|
|
735
|
+
pattern['memory_ids'],
|
|
736
|
+
total_memories
|
|
737
|
+
)
|
|
738
|
+
pattern['confidence'] = round(confidence, 2)
|
|
739
|
+
self.pattern_store.save_pattern(pattern)
|
|
740
|
+
counts['preferences'] += 1
|
|
741
|
+
|
|
742
|
+
for pattern in styles.values():
|
|
743
|
+
confidence = self.confidence_scorer.calculate_confidence(
|
|
744
|
+
pattern['pattern_type'],
|
|
745
|
+
pattern['key'],
|
|
746
|
+
pattern['value'],
|
|
747
|
+
pattern['memory_ids'],
|
|
748
|
+
total_memories
|
|
749
|
+
)
|
|
750
|
+
pattern['confidence'] = round(confidence, 2)
|
|
751
|
+
self.pattern_store.save_pattern(pattern)
|
|
752
|
+
counts['styles'] += 1
|
|
753
|
+
|
|
754
|
+
for pattern in terms.values():
|
|
755
|
+
confidence = self.confidence_scorer.calculate_confidence(
|
|
756
|
+
pattern['pattern_type'],
|
|
757
|
+
pattern['key'],
|
|
758
|
+
pattern['value'],
|
|
759
|
+
pattern['memory_ids'],
|
|
760
|
+
total_memories
|
|
761
|
+
)
|
|
762
|
+
pattern['confidence'] = round(confidence, 2)
|
|
763
|
+
self.pattern_store.save_pattern(pattern)
|
|
764
|
+
counts['terminology'] += 1
|
|
765
|
+
|
|
766
|
+
print(f"\nPattern update complete:")
|
|
767
|
+
print(f" {counts['preferences']} preferences")
|
|
768
|
+
print(f" {counts['styles']} styles")
|
|
769
|
+
print(f" {counts['terminology']} terminology")
|
|
770
|
+
|
|
771
|
+
return counts
|
|
772
|
+
|
|
773
|
+
def on_new_memory(self, memory_id: int):
|
|
774
|
+
"""Incremental update when new memory is added."""
|
|
775
|
+
# For now, just trigger full update if memory count is low
|
|
776
|
+
# Future optimization: only update affected patterns
|
|
777
|
+
conn = sqlite3.connect(self.db_path)
|
|
778
|
+
cursor = conn.cursor()
|
|
779
|
+
cursor.execute('SELECT COUNT(*) FROM memories')
|
|
780
|
+
total = cursor.fetchone()[0]
|
|
781
|
+
conn.close()
|
|
782
|
+
|
|
783
|
+
# Only do incremental updates if we have many memories (>50)
|
|
784
|
+
if total > 50:
|
|
785
|
+
# TODO: Implement true incremental update
|
|
786
|
+
print(f"New memory #{memory_id} added. Run weekly_pattern_update() to update patterns.")
|
|
787
|
+
else:
|
|
788
|
+
# For small memory counts, just do full update
|
|
789
|
+
self.weekly_pattern_update()
|
|
790
|
+
|
|
791
|
+
def get_patterns(self, min_confidence: float = 0.7) -> List[Dict[str, Any]]:
|
|
792
|
+
"""Query patterns above confidence threshold."""
|
|
793
|
+
return self.pattern_store.get_patterns(min_confidence)
|
|
794
|
+
|
|
795
|
+
def get_identity_context(self, min_confidence: float = 0.7) -> str:
|
|
796
|
+
"""Format patterns for Claude context injection."""
|
|
797
|
+
patterns = self.get_patterns(min_confidence)
|
|
798
|
+
|
|
799
|
+
if not patterns:
|
|
800
|
+
return "## Working with User - Learned Patterns\n\nNo patterns learned yet. Add more memories to build your profile."
|
|
801
|
+
|
|
802
|
+
# Group by pattern type
|
|
803
|
+
sections = {
|
|
804
|
+
'preference': [],
|
|
805
|
+
'style': [],
|
|
806
|
+
'terminology': []
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
for p in patterns:
|
|
810
|
+
sections[p['pattern_type']].append(
|
|
811
|
+
f"- **{p['key'].replace('_', ' ').title()}:** {p['value']} "
|
|
812
|
+
f"(confidence: {p['confidence']:.0%}, {p['evidence_count']} examples)"
|
|
813
|
+
)
|
|
814
|
+
|
|
815
|
+
output = "## Working with User - Learned Patterns\n\n"
|
|
816
|
+
|
|
817
|
+
if sections['preference']:
|
|
818
|
+
output += "**Technology Preferences:**\n" + '\n'.join(sections['preference']) + '\n\n'
|
|
819
|
+
|
|
820
|
+
if sections['style']:
|
|
821
|
+
output += "**Coding Style:**\n" + '\n'.join(sections['style']) + '\n\n'
|
|
822
|
+
|
|
823
|
+
if sections['terminology']:
|
|
824
|
+
output += "**Terminology:**\n" + '\n'.join(sections['terminology']) + '\n'
|
|
825
|
+
|
|
826
|
+
return output
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
# CLI Interface
|
|
830
|
+
if __name__ == "__main__":
|
|
831
|
+
import sys
|
|
832
|
+
|
|
833
|
+
learner = PatternLearner()
|
|
834
|
+
|
|
835
|
+
if len(sys.argv) < 2:
|
|
836
|
+
print("Pattern Learner - Identity Profile Extraction")
|
|
837
|
+
print("\nUsage:")
|
|
838
|
+
print(" python pattern_learner.py update # Full pattern update (weekly)")
|
|
839
|
+
print(" python pattern_learner.py list [min_conf] # List learned patterns (default: 0.7)")
|
|
840
|
+
print(" python pattern_learner.py context [min] # Get context for Claude")
|
|
841
|
+
print(" python pattern_learner.py stats # Pattern statistics")
|
|
842
|
+
sys.exit(0)
|
|
843
|
+
|
|
844
|
+
command = sys.argv[1]
|
|
845
|
+
|
|
846
|
+
if command == "update":
|
|
847
|
+
counts = learner.weekly_pattern_update()
|
|
848
|
+
print(f"\nTotal patterns learned: {sum(counts.values())}")
|
|
849
|
+
|
|
850
|
+
elif command == "list":
|
|
851
|
+
min_conf = float(sys.argv[2]) if len(sys.argv) > 2 else 0.7
|
|
852
|
+
patterns = learner.get_patterns(min_conf)
|
|
853
|
+
|
|
854
|
+
if not patterns:
|
|
855
|
+
print(f"No patterns found with confidence >= {min_conf:.0%}")
|
|
856
|
+
else:
|
|
857
|
+
print(f"\n{'Type':<15} {'Category':<12} {'Pattern':<30} {'Confidence':<12} {'Evidence':<10}")
|
|
858
|
+
print("-" * 95)
|
|
859
|
+
|
|
860
|
+
for p in patterns:
|
|
861
|
+
pattern_display = f"{p['key'].replace('_', ' ').title()}: {p['value']}"
|
|
862
|
+
if len(pattern_display) > 28:
|
|
863
|
+
pattern_display = pattern_display[:28] + "..."
|
|
864
|
+
|
|
865
|
+
print(f"{p['pattern_type']:<15} {p['category']:<12} {pattern_display:<30} "
|
|
866
|
+
f"{p['confidence']:>6.0%} {p['evidence_count']:<10}")
|
|
867
|
+
|
|
868
|
+
elif command == "context":
|
|
869
|
+
min_conf = float(sys.argv[2]) if len(sys.argv) > 2 else 0.7
|
|
870
|
+
context = learner.get_identity_context(min_conf)
|
|
871
|
+
print(context)
|
|
872
|
+
|
|
873
|
+
elif command == "stats":
|
|
874
|
+
patterns = learner.get_patterns(0.5) # Include all patterns
|
|
875
|
+
|
|
876
|
+
if not patterns:
|
|
877
|
+
print("No patterns learned yet.")
|
|
878
|
+
else:
|
|
879
|
+
by_type = Counter([p['pattern_type'] for p in patterns])
|
|
880
|
+
by_category = Counter([p['category'] for p in patterns])
|
|
881
|
+
|
|
882
|
+
avg_confidence = sum(p['confidence'] for p in patterns) / len(patterns)
|
|
883
|
+
high_conf = len([p for p in patterns if p['confidence'] >= 0.8])
|
|
884
|
+
|
|
885
|
+
print(f"\nPattern Statistics:")
|
|
886
|
+
print(f" Total patterns: {len(patterns)}")
|
|
887
|
+
print(f" Average confidence: {avg_confidence:.0%}")
|
|
888
|
+
print(f" High confidence (>=80%): {high_conf}")
|
|
889
|
+
print(f"\nBy Type:")
|
|
890
|
+
for ptype, count in by_type.most_common():
|
|
891
|
+
print(f" {ptype}: {count}")
|
|
892
|
+
print(f"\nBy Category:")
|
|
893
|
+
for cat, count in by_category.most_common():
|
|
894
|
+
print(f" {cat}: {count}")
|
|
895
|
+
|
|
896
|
+
else:
|
|
897
|
+
print(f"Unknown command: {command}")
|
|
898
|
+
sys.exit(1)
|