superlocalmemory 2.7.6 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +120 -155
- package/README.md +115 -89
- package/api_server.py +2 -12
- package/docs/PATTERN-LEARNING.md +64 -199
- package/docs/example_graph_usage.py +4 -6
- package/install.sh +59 -0
- package/mcp_server.py +83 -7
- package/package.json +1 -8
- package/scripts/generate-thumbnails.py +3 -5
- package/skills/slm-build-graph/SKILL.md +1 -1
- package/skills/slm-list-recent/SKILL.md +1 -1
- package/skills/slm-recall/SKILL.md +1 -1
- package/skills/slm-remember/SKILL.md +1 -1
- package/skills/slm-show-patterns/SKILL.md +1 -1
- package/skills/slm-status/SKILL.md +1 -1
- package/skills/slm-switch-profile/SKILL.md +1 -1
- package/src/agent_registry.py +7 -18
- package/src/auth_middleware.py +3 -5
- package/src/auto_backup.py +3 -7
- package/src/behavioral/__init__.py +49 -0
- package/src/behavioral/behavioral_listener.py +203 -0
- package/src/behavioral/behavioral_patterns.py +275 -0
- package/src/behavioral/cross_project_transfer.py +206 -0
- package/src/behavioral/outcome_inference.py +194 -0
- package/src/behavioral/outcome_tracker.py +193 -0
- package/src/behavioral/tests/__init__.py +4 -0
- package/src/behavioral/tests/test_behavioral_integration.py +108 -0
- package/src/behavioral/tests/test_behavioral_patterns.py +150 -0
- package/src/behavioral/tests/test_cross_project_transfer.py +142 -0
- package/src/behavioral/tests/test_mcp_behavioral.py +139 -0
- package/src/behavioral/tests/test_mcp_report_outcome.py +117 -0
- package/src/behavioral/tests/test_outcome_inference.py +107 -0
- package/src/behavioral/tests/test_outcome_tracker.py +96 -0
- package/src/cache_manager.py +4 -6
- package/src/compliance/__init__.py +48 -0
- package/src/compliance/abac_engine.py +149 -0
- package/src/compliance/abac_middleware.py +116 -0
- package/src/compliance/audit_db.py +215 -0
- package/src/compliance/audit_logger.py +148 -0
- package/src/compliance/retention_manager.py +289 -0
- package/src/compliance/retention_scheduler.py +186 -0
- package/src/compliance/tests/__init__.py +4 -0
- package/src/compliance/tests/test_abac_enforcement.py +95 -0
- package/src/compliance/tests/test_abac_engine.py +124 -0
- package/src/compliance/tests/test_abac_mcp_integration.py +118 -0
- package/src/compliance/tests/test_audit_db.py +123 -0
- package/src/compliance/tests/test_audit_logger.py +98 -0
- package/src/compliance/tests/test_mcp_audit.py +128 -0
- package/src/compliance/tests/test_mcp_retention_policy.py +125 -0
- package/src/compliance/tests/test_retention_manager.py +131 -0
- package/src/compliance/tests/test_retention_scheduler.py +99 -0
- package/src/db_connection_manager.py +2 -12
- package/src/embedding_engine.py +61 -669
- package/src/embeddings/__init__.py +47 -0
- package/src/embeddings/cache.py +70 -0
- package/src/embeddings/cli.py +113 -0
- package/src/embeddings/constants.py +47 -0
- package/src/embeddings/database.py +91 -0
- package/src/embeddings/engine.py +247 -0
- package/src/embeddings/model_loader.py +145 -0
- package/src/event_bus.py +3 -13
- package/src/graph/__init__.py +36 -0
- package/src/graph/build_helpers.py +74 -0
- package/src/graph/cli.py +87 -0
- package/src/graph/cluster_builder.py +188 -0
- package/src/graph/cluster_summary.py +148 -0
- package/src/graph/constants.py +47 -0
- package/src/graph/edge_builder.py +162 -0
- package/src/graph/entity_extractor.py +95 -0
- package/src/graph/graph_core.py +226 -0
- package/src/graph/graph_search.py +231 -0
- package/src/graph/hierarchical.py +207 -0
- package/src/graph/schema.py +99 -0
- package/src/graph_engine.py +45 -1451
- package/src/hnsw_index.py +3 -7
- package/src/hybrid_search.py +36 -683
- package/src/learning/__init__.py +27 -12
- package/src/learning/adaptive_ranker.py +50 -12
- package/src/learning/cross_project_aggregator.py +2 -12
- package/src/learning/engagement_tracker.py +2 -12
- package/src/learning/feature_extractor.py +175 -43
- package/src/learning/feedback_collector.py +7 -12
- package/src/learning/learning_db.py +180 -12
- package/src/learning/project_context_manager.py +2 -12
- package/src/learning/source_quality_scorer.py +2 -12
- package/src/learning/synthetic_bootstrap.py +2 -12
- package/src/learning/tests/__init__.py +2 -0
- package/src/learning/tests/test_adaptive_ranker.py +2 -6
- package/src/learning/tests/test_adaptive_ranker_v28.py +60 -0
- package/src/learning/tests/test_aggregator.py +2 -6
- package/src/learning/tests/test_auto_retrain_v28.py +35 -0
- package/src/learning/tests/test_e2e_ranking_v28.py +82 -0
- package/src/learning/tests/test_feature_extractor_v28.py +93 -0
- package/src/learning/tests/test_feedback_collector.py +2 -6
- package/src/learning/tests/test_learning_db.py +2 -6
- package/src/learning/tests/test_learning_db_v28.py +110 -0
- package/src/learning/tests/test_learning_init_v28.py +48 -0
- package/src/learning/tests/test_outcome_signals.py +48 -0
- package/src/learning/tests/test_project_context.py +2 -6
- package/src/learning/tests/test_schema_migration.py +319 -0
- package/src/learning/tests/test_signal_inference.py +11 -13
- package/src/learning/tests/test_source_quality.py +2 -6
- package/src/learning/tests/test_synthetic_bootstrap.py +3 -7
- package/src/learning/tests/test_workflow_miner.py +2 -6
- package/src/learning/workflow_pattern_miner.py +2 -12
- package/src/lifecycle/__init__.py +54 -0
- package/src/lifecycle/bounded_growth.py +239 -0
- package/src/lifecycle/compaction_engine.py +226 -0
- package/src/lifecycle/lifecycle_engine.py +302 -0
- package/src/lifecycle/lifecycle_evaluator.py +225 -0
- package/src/lifecycle/lifecycle_scheduler.py +130 -0
- package/src/lifecycle/retention_policy.py +285 -0
- package/src/lifecycle/tests/__init__.py +4 -0
- package/src/lifecycle/tests/test_bounded_growth.py +193 -0
- package/src/lifecycle/tests/test_compaction.py +179 -0
- package/src/lifecycle/tests/test_lifecycle_engine.py +137 -0
- package/src/lifecycle/tests/test_lifecycle_evaluation.py +177 -0
- package/src/lifecycle/tests/test_lifecycle_scheduler.py +127 -0
- package/src/lifecycle/tests/test_lifecycle_search.py +109 -0
- package/src/lifecycle/tests/test_mcp_compact.py +149 -0
- package/src/lifecycle/tests/test_mcp_lifecycle_status.py +114 -0
- package/src/lifecycle/tests/test_retention_policy.py +162 -0
- package/src/mcp_tools_v28.py +280 -0
- package/src/memory-profiles.py +2 -12
- package/src/memory-reset.py +2 -12
- package/src/memory_compression.py +2 -12
- package/src/memory_store_v2.py +76 -20
- package/src/migrate_v1_to_v2.py +2 -12
- package/src/pattern_learner.py +29 -975
- package/src/patterns/__init__.py +24 -0
- package/src/patterns/analyzers.py +247 -0
- package/src/patterns/learner.py +267 -0
- package/src/patterns/scoring.py +167 -0
- package/src/patterns/store.py +223 -0
- package/src/patterns/terminology.py +138 -0
- package/src/provenance_tracker.py +4 -14
- package/src/query_optimizer.py +4 -6
- package/src/rate_limiter.py +2 -6
- package/src/search/__init__.py +20 -0
- package/src/search/cli.py +77 -0
- package/src/search/constants.py +26 -0
- package/src/search/engine.py +239 -0
- package/src/search/fusion.py +122 -0
- package/src/search/index_loader.py +112 -0
- package/src/search/methods.py +162 -0
- package/src/search_engine_v2.py +4 -6
- package/src/setup_validator.py +7 -13
- package/src/subscription_manager.py +2 -12
- package/src/tree/__init__.py +59 -0
- package/src/tree/builder.py +183 -0
- package/src/tree/nodes.py +196 -0
- package/src/tree/queries.py +252 -0
- package/src/tree/schema.py +76 -0
- package/src/tree_manager.py +10 -711
- package/src/trust/__init__.py +45 -0
- package/src/trust/constants.py +66 -0
- package/src/trust/queries.py +157 -0
- package/src/trust/schema.py +95 -0
- package/src/trust/scorer.py +299 -0
- package/src/trust/signals.py +95 -0
- package/src/trust_scorer.py +39 -697
- package/src/webhook_dispatcher.py +2 -12
- package/ui/app.js +1 -1
- package/ui/js/agents.js +1 -1
- package/ui_server.py +2 -14
- package/ATTRIBUTION.md +0 -140
- package/docs/ARCHITECTURE-V2.5.md +0 -190
- package/docs/GRAPH-ENGINE.md +0 -503
- package/docs/architecture-diagram.drawio +0 -405
- package/docs/plans/2026-02-13-benchmark-suite.md +0 -1349
package/src/pattern_learner.py
CHANGED
|
@@ -1,980 +1,34 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
2
4
|
"""
|
|
3
|
-
|
|
4
|
-
Copyright (c) 2026 Varun Pratap Bhardwaj
|
|
5
|
-
Licensed under MIT License
|
|
5
|
+
Pattern Learner - Legacy import compatibility shim.
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
NOTICE: This software is protected by MIT License.
|
|
11
|
-
Attribution must be preserved in all copies or derivatives.
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
"""
|
|
15
|
-
Pattern Learner - Identity Profile Extraction (Layer 4)
|
|
16
|
-
|
|
17
|
-
Learns user preferences, coding style, and terminology patterns from memories.
|
|
18
|
-
Uses local TF-IDF, frequency analysis, and heuristics - NO EXTERNAL APIs.
|
|
19
|
-
|
|
20
|
-
Based on architecture: docs/architecture/05-pattern-learner.md
|
|
7
|
+
All implementation has moved to the `patterns` package.
|
|
8
|
+
This file re-exports every public symbol so that existing imports
|
|
9
|
+
like `from pattern_learner import PatternLearner` continue to work.
|
|
21
10
|
"""
|
|
22
11
|
|
|
23
|
-
import
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
MEMORY_DIR
|
|
44
|
-
DB_PATH
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
class FrequencyAnalyzer:
|
|
48
|
-
"""Analyzes technology and tool preferences via frequency counting."""
|
|
49
|
-
|
|
50
|
-
def __init__(self, db_path: Path):
|
|
51
|
-
self.db_path = db_path
|
|
52
|
-
|
|
53
|
-
# Predefined technology categories
|
|
54
|
-
self.tech_categories = {
|
|
55
|
-
'frontend_framework': ['react', 'nextjs', 'next.js', 'vue', 'angular', 'svelte', 'solid'],
|
|
56
|
-
'backend_framework': ['express', 'fastapi', 'django', 'flask', 'nestjs', 'spring', 'rails'],
|
|
57
|
-
'database': ['postgres', 'postgresql', 'mysql', 'mongodb', 'redis', 'dynamodb', 'sqlite'],
|
|
58
|
-
'state_management': ['redux', 'context', 'zustand', 'mobx', 'recoil', 'jotai'],
|
|
59
|
-
'styling': ['tailwind', 'css modules', 'styled-components', 'emotion', 'sass', 'less'],
|
|
60
|
-
'language': ['python', 'javascript', 'typescript', 'go', 'rust', 'java', 'c++'],
|
|
61
|
-
'deployment': ['docker', 'kubernetes', 'vercel', 'netlify', 'aws', 'gcp', 'azure'],
|
|
62
|
-
'testing': ['jest', 'pytest', 'vitest', 'mocha', 'cypress', 'playwright'],
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
def analyze_preferences(self, memory_ids: List[int]) -> Dict[str, Dict[str, Any]]:
|
|
66
|
-
"""Analyze technology preferences across memories."""
|
|
67
|
-
patterns = {}
|
|
68
|
-
|
|
69
|
-
conn = sqlite3.connect(self.db_path)
|
|
70
|
-
cursor = conn.cursor()
|
|
71
|
-
|
|
72
|
-
for category, keywords in self.tech_categories.items():
|
|
73
|
-
keyword_counts = Counter()
|
|
74
|
-
evidence_memories = {} # {keyword: [memory_ids]}
|
|
75
|
-
|
|
76
|
-
for memory_id in memory_ids:
|
|
77
|
-
cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
|
|
78
|
-
row = cursor.fetchone()
|
|
79
|
-
|
|
80
|
-
if not row:
|
|
81
|
-
continue
|
|
82
|
-
|
|
83
|
-
content = row[0].lower()
|
|
84
|
-
|
|
85
|
-
for keyword in keywords:
|
|
86
|
-
# Count occurrences with word boundaries
|
|
87
|
-
pattern = r'\b' + re.escape(keyword.replace('.', r'\.')) + r'\b'
|
|
88
|
-
matches = re.findall(pattern, content, re.IGNORECASE)
|
|
89
|
-
count = len(matches)
|
|
90
|
-
|
|
91
|
-
if count > 0:
|
|
92
|
-
keyword_counts[keyword] += count
|
|
93
|
-
|
|
94
|
-
if keyword not in evidence_memories:
|
|
95
|
-
evidence_memories[keyword] = []
|
|
96
|
-
evidence_memories[keyword].append(memory_id)
|
|
97
|
-
|
|
98
|
-
# Determine preference (most mentioned)
|
|
99
|
-
if keyword_counts:
|
|
100
|
-
top_keyword = keyword_counts.most_common(1)[0][0]
|
|
101
|
-
total_mentions = sum(keyword_counts.values())
|
|
102
|
-
top_count = keyword_counts[top_keyword]
|
|
103
|
-
|
|
104
|
-
# Calculate confidence (% of mentions)
|
|
105
|
-
confidence = top_count / total_mentions if total_mentions > 0 else 0
|
|
106
|
-
|
|
107
|
-
# Only create pattern if confidence > 0.6 and at least 3 mentions
|
|
108
|
-
if confidence > 0.6 and top_count >= 3:
|
|
109
|
-
value = self._format_preference(top_keyword, keyword_counts)
|
|
110
|
-
evidence_list = list(set(evidence_memories.get(top_keyword, [])))
|
|
111
|
-
|
|
112
|
-
patterns[category] = {
|
|
113
|
-
'pattern_type': 'preference',
|
|
114
|
-
'key': category,
|
|
115
|
-
'value': value,
|
|
116
|
-
'confidence': round(confidence, 2),
|
|
117
|
-
'evidence_count': len(evidence_list),
|
|
118
|
-
'memory_ids': evidence_list,
|
|
119
|
-
'category': self._categorize_pattern(category)
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
conn.close()
|
|
123
|
-
return patterns
|
|
124
|
-
|
|
125
|
-
def _format_preference(self, top_keyword: str, all_counts: Counter) -> str:
|
|
126
|
-
"""Format preference value (e.g., 'Next.js over React')."""
|
|
127
|
-
# Normalize keyword for display
|
|
128
|
-
display_map = {
|
|
129
|
-
'nextjs': 'Next.js',
|
|
130
|
-
'next.js': 'Next.js',
|
|
131
|
-
'postgres': 'PostgreSQL',
|
|
132
|
-
'postgresql': 'PostgreSQL',
|
|
133
|
-
'fastapi': 'FastAPI',
|
|
134
|
-
'nestjs': 'NestJS',
|
|
135
|
-
'mongodb': 'MongoDB',
|
|
136
|
-
'redis': 'Redis',
|
|
137
|
-
'dynamodb': 'DynamoDB',
|
|
138
|
-
'tailwind': 'Tailwind CSS',
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
top_display = display_map.get(top_keyword.lower(), top_keyword.title())
|
|
142
|
-
|
|
143
|
-
if len(all_counts) > 1:
|
|
144
|
-
second = all_counts.most_common(2)[1]
|
|
145
|
-
second_keyword = second[0]
|
|
146
|
-
second_display = display_map.get(second_keyword.lower(), second_keyword.title())
|
|
147
|
-
|
|
148
|
-
# Only show comparison if second choice has significant mentions
|
|
149
|
-
if second[1] / all_counts[top_keyword] > 0.3:
|
|
150
|
-
return f"{top_display} over {second_display}"
|
|
151
|
-
|
|
152
|
-
return top_display
|
|
153
|
-
|
|
154
|
-
def _categorize_pattern(self, tech_category: str) -> str:
|
|
155
|
-
"""Map tech category to high-level category."""
|
|
156
|
-
category_map = {
|
|
157
|
-
'frontend_framework': 'frontend',
|
|
158
|
-
'state_management': 'frontend',
|
|
159
|
-
'styling': 'frontend',
|
|
160
|
-
'backend_framework': 'backend',
|
|
161
|
-
'database': 'backend',
|
|
162
|
-
'language': 'general',
|
|
163
|
-
'deployment': 'devops',
|
|
164
|
-
'testing': 'general',
|
|
165
|
-
}
|
|
166
|
-
return category_map.get(tech_category, 'general')
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
class ContextAnalyzer:
|
|
170
|
-
"""Analyzes coding style patterns from context."""
|
|
171
|
-
|
|
172
|
-
def __init__(self, db_path: Path):
|
|
173
|
-
self.db_path = db_path
|
|
174
|
-
|
|
175
|
-
# Style pattern detection rules
|
|
176
|
-
self.style_indicators = {
|
|
177
|
-
'optimization_priority': {
|
|
178
|
-
'performance': ['optimize', 'faster', 'performance', 'speed', 'latency', 'efficient', 'cache'],
|
|
179
|
-
'readability': ['readable', 'clean', 'maintainable', 'clear', 'simple', 'understandable']
|
|
180
|
-
},
|
|
181
|
-
'error_handling': {
|
|
182
|
-
'explicit': ['error boundary', 'explicit', 'throw', 'handle error', 'try catch', 'error handling'],
|
|
183
|
-
'permissive': ['ignore', 'suppress', 'skip error', 'optional']
|
|
184
|
-
},
|
|
185
|
-
'testing_approach': {
|
|
186
|
-
'comprehensive': ['test coverage', 'unit test', 'integration test', 'e2e test', 'test suite'],
|
|
187
|
-
'minimal': ['manual test', 'skip test', 'no tests']
|
|
188
|
-
},
|
|
189
|
-
'code_organization': {
|
|
190
|
-
'modular': ['separate', 'module', 'component', 'split', 'refactor', 'extract'],
|
|
191
|
-
'monolithic': ['single file', 'one place', 'combined']
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
def analyze_style(self, memory_ids: List[int]) -> Dict[str, Dict[str, Any]]:
|
|
196
|
-
"""Detect stylistic patterns from context."""
|
|
197
|
-
patterns = {}
|
|
198
|
-
|
|
199
|
-
conn = sqlite3.connect(self.db_path)
|
|
200
|
-
cursor = conn.cursor()
|
|
201
|
-
|
|
202
|
-
for pattern_key, indicators in self.style_indicators.items():
|
|
203
|
-
indicator_counts = Counter()
|
|
204
|
-
evidence_memories = {} # {style_type: [memory_ids]}
|
|
205
|
-
|
|
206
|
-
for memory_id in memory_ids:
|
|
207
|
-
cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
|
|
208
|
-
row = cursor.fetchone()
|
|
209
|
-
|
|
210
|
-
if not row:
|
|
211
|
-
continue
|
|
212
|
-
|
|
213
|
-
content = row[0].lower()
|
|
214
|
-
|
|
215
|
-
for style_type, keywords in indicators.items():
|
|
216
|
-
for keyword in keywords:
|
|
217
|
-
if keyword in content:
|
|
218
|
-
indicator_counts[style_type] += 1
|
|
219
|
-
|
|
220
|
-
if style_type not in evidence_memories:
|
|
221
|
-
evidence_memories[style_type] = []
|
|
222
|
-
evidence_memories[style_type].append(memory_id)
|
|
223
|
-
|
|
224
|
-
# Determine dominant style
|
|
225
|
-
if indicator_counts:
|
|
226
|
-
top_style = indicator_counts.most_common(1)[0][0]
|
|
227
|
-
total = sum(indicator_counts.values())
|
|
228
|
-
top_count = indicator_counts[top_style]
|
|
229
|
-
confidence = top_count / total if total > 0 else 0
|
|
230
|
-
|
|
231
|
-
# Only create pattern if confidence > 0.65 and at least 3 mentions
|
|
232
|
-
if confidence > 0.65 and top_count >= 3:
|
|
233
|
-
value = self._format_style_value(pattern_key, top_style, indicator_counts)
|
|
234
|
-
evidence_list = list(set(evidence_memories.get(top_style, [])))
|
|
235
|
-
|
|
236
|
-
patterns[pattern_key] = {
|
|
237
|
-
'pattern_type': 'style',
|
|
238
|
-
'key': pattern_key,
|
|
239
|
-
'value': value,
|
|
240
|
-
'confidence': round(confidence, 2),
|
|
241
|
-
'evidence_count': len(evidence_list),
|
|
242
|
-
'memory_ids': evidence_list,
|
|
243
|
-
'category': 'general'
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
conn.close()
|
|
247
|
-
return patterns
|
|
248
|
-
|
|
249
|
-
def _format_style_value(self, pattern_key: str, top_style: str, all_counts: Counter) -> str:
|
|
250
|
-
"""Format style value as comparison or preference."""
|
|
251
|
-
style_formats = {
|
|
252
|
-
'optimization_priority': {
|
|
253
|
-
'performance': 'Performance over readability',
|
|
254
|
-
'readability': 'Readability over performance'
|
|
255
|
-
},
|
|
256
|
-
'error_handling': {
|
|
257
|
-
'explicit': 'Explicit error boundaries',
|
|
258
|
-
'permissive': 'Permissive error handling'
|
|
259
|
-
},
|
|
260
|
-
'testing_approach': {
|
|
261
|
-
'comprehensive': 'Comprehensive testing',
|
|
262
|
-
'minimal': 'Minimal testing'
|
|
263
|
-
},
|
|
264
|
-
'code_organization': {
|
|
265
|
-
'modular': 'Modular organization',
|
|
266
|
-
'monolithic': 'Monolithic organization'
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
if pattern_key in style_formats and top_style in style_formats[pattern_key]:
|
|
271
|
-
return style_formats[pattern_key][top_style]
|
|
272
|
-
|
|
273
|
-
return top_style.replace('_', ' ').title()
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
class TerminologyLearner:
|
|
277
|
-
"""Learns user-specific definitions of common terms."""
|
|
278
|
-
|
|
279
|
-
def __init__(self, db_path: Path):
|
|
280
|
-
self.db_path = db_path
|
|
281
|
-
|
|
282
|
-
# Common ambiguous terms to learn
|
|
283
|
-
self.ambiguous_terms = [
|
|
284
|
-
'optimize', 'refactor', 'clean', 'simple',
|
|
285
|
-
'mvp', 'prototype', 'scale', 'production-ready',
|
|
286
|
-
'fix', 'improve', 'update', 'enhance'
|
|
287
|
-
]
|
|
288
|
-
|
|
289
|
-
def learn_terminology(self, memory_ids: List[int]) -> Dict[str, Dict[str, Any]]:
|
|
290
|
-
"""Learn user-specific term definitions."""
|
|
291
|
-
patterns = {}
|
|
292
|
-
|
|
293
|
-
conn = sqlite3.connect(self.db_path)
|
|
294
|
-
cursor = conn.cursor()
|
|
295
|
-
|
|
296
|
-
for term in self.ambiguous_terms:
|
|
297
|
-
contexts = []
|
|
298
|
-
|
|
299
|
-
# Find all contexts where term appears
|
|
300
|
-
for memory_id in memory_ids:
|
|
301
|
-
cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
|
|
302
|
-
row = cursor.fetchone()
|
|
303
|
-
|
|
304
|
-
if not row:
|
|
305
|
-
continue
|
|
306
|
-
|
|
307
|
-
content = row[0]
|
|
308
|
-
|
|
309
|
-
# Find term in content (case-insensitive)
|
|
310
|
-
pattern = r'\b' + re.escape(term) + r'\b'
|
|
311
|
-
for match in re.finditer(pattern, content, re.IGNORECASE):
|
|
312
|
-
term_idx = match.start()
|
|
313
|
-
|
|
314
|
-
# Extract 100-char window around term
|
|
315
|
-
start = max(0, term_idx - 100)
|
|
316
|
-
end = min(len(content), term_idx + len(term) + 100)
|
|
317
|
-
context_window = content[start:end]
|
|
318
|
-
|
|
319
|
-
contexts.append({
|
|
320
|
-
'memory_id': memory_id,
|
|
321
|
-
'context': context_window
|
|
322
|
-
})
|
|
323
|
-
|
|
324
|
-
# Analyze contexts to extract meaning (need at least 3 examples)
|
|
325
|
-
if len(contexts) >= 3:
|
|
326
|
-
definition = self._extract_definition(term, contexts)
|
|
327
|
-
|
|
328
|
-
if definition:
|
|
329
|
-
evidence_list = list(set([ctx['memory_id'] for ctx in contexts]))
|
|
330
|
-
|
|
331
|
-
# Confidence increases with more examples, capped at 0.95
|
|
332
|
-
confidence = min(0.95, 0.6 + (len(contexts) * 0.05))
|
|
333
|
-
|
|
334
|
-
patterns[term] = {
|
|
335
|
-
'pattern_type': 'terminology',
|
|
336
|
-
'key': term,
|
|
337
|
-
'value': definition,
|
|
338
|
-
'confidence': round(confidence, 2),
|
|
339
|
-
'evidence_count': len(evidence_list),
|
|
340
|
-
'memory_ids': evidence_list,
|
|
341
|
-
'category': 'general'
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
conn.close()
|
|
345
|
-
return patterns
|
|
346
|
-
|
|
347
|
-
def _extract_definition(self, term: str, contexts: List[Dict]) -> Optional[str]:
|
|
348
|
-
"""Extract definition from contexts using pattern matching."""
|
|
349
|
-
# Collect words near the term across all contexts
|
|
350
|
-
nearby_words = []
|
|
351
|
-
|
|
352
|
-
for ctx in contexts:
|
|
353
|
-
words = re.findall(r'\b\w+\b', ctx['context'].lower())
|
|
354
|
-
nearby_words.extend(words)
|
|
355
|
-
|
|
356
|
-
# Count word frequencies
|
|
357
|
-
word_counts = Counter(nearby_words)
|
|
358
|
-
|
|
359
|
-
# Remove the term itself and common stopwords
|
|
360
|
-
stopwords = {'the', 'a', 'an', 'is', 'to', 'for', 'of', 'in', 'on', 'at',
|
|
361
|
-
'and', 'or', 'but', 'with', 'from', 'by', 'this', 'that'}
|
|
362
|
-
word_counts = Counter({w: c for w, c in word_counts.items()
|
|
363
|
-
if w not in stopwords and w != term.lower()})
|
|
364
|
-
|
|
365
|
-
# Get top co-occurring words
|
|
366
|
-
top_words = [w for w, _ in word_counts.most_common(8)]
|
|
367
|
-
|
|
368
|
-
# Apply heuristic rules based on term and context
|
|
369
|
-
if term == 'optimize':
|
|
370
|
-
if any(w in top_words for w in ['performance', 'speed', 'faster', 'latency']):
|
|
371
|
-
return "Performance optimization (speed/latency)"
|
|
372
|
-
elif any(w in top_words for w in ['code', 'clean', 'refactor']):
|
|
373
|
-
return "Code quality optimization"
|
|
374
|
-
|
|
375
|
-
elif term == 'refactor':
|
|
376
|
-
if any(w in top_words for w in ['architecture', 'structure', 'design']):
|
|
377
|
-
return "Architecture change, not just renaming"
|
|
378
|
-
elif any(w in top_words for w in ['clean', 'organize', 'simplify']):
|
|
379
|
-
return "Code organization improvement"
|
|
380
|
-
|
|
381
|
-
elif term == 'mvp':
|
|
382
|
-
if any(w in top_words for w in ['core', 'basic', 'essential', 'minimal']):
|
|
383
|
-
return "Core features only, no polish"
|
|
384
|
-
|
|
385
|
-
elif term == 'production-ready':
|
|
386
|
-
if any(w in top_words for w in ['test', 'error', 'monitoring', 'deploy']):
|
|
387
|
-
return "Fully tested and monitored for deployment"
|
|
388
|
-
|
|
389
|
-
# Generic definition if specific pattern not matched
|
|
390
|
-
if len(top_words) >= 3:
|
|
391
|
-
return f"Commonly used with: {', '.join(top_words[:3])}"
|
|
392
|
-
|
|
393
|
-
return None
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
class ConfidenceScorer:
|
|
397
|
-
"""Calculates and tracks pattern confidence scores."""
|
|
398
|
-
|
|
399
|
-
def __init__(self, db_path: Path):
|
|
400
|
-
self.db_path = db_path
|
|
401
|
-
|
|
402
|
-
def calculate_confidence(
|
|
403
|
-
self,
|
|
404
|
-
pattern_type: str,
|
|
405
|
-
key: str,
|
|
406
|
-
value: str,
|
|
407
|
-
evidence_memory_ids: List[int],
|
|
408
|
-
total_memories: int
|
|
409
|
-
) -> float:
|
|
410
|
-
"""
|
|
411
|
-
Calculate confidence using Beta-Binomial Bayesian posterior.
|
|
412
|
-
|
|
413
|
-
Based on MACLA (arXiv:2512.18950, Forouzandeh et al., Dec 2025):
|
|
414
|
-
posterior_mean = (alpha + evidence) / (alpha + beta + evidence + competition)
|
|
415
|
-
|
|
416
|
-
Adaptation: MACLA's Beta-Binomial uses pairwise interaction counts.
|
|
417
|
-
Our corpus has sparse signals (most memories are irrelevant to any
|
|
418
|
-
single pattern). We use log-scaled competition instead of raw total
|
|
419
|
-
to avoid over-dilution: competition = log2(total_memories).
|
|
420
|
-
|
|
421
|
-
Pattern-specific priors (alpha, beta):
|
|
422
|
-
- preference (1, 4): prior mean 0.20, ~8 items to reach 0.5
|
|
423
|
-
- style (1, 5): prior mean 0.17, subtler signals need more evidence
|
|
424
|
-
- terminology (2, 3): prior mean 0.40, direct usage signal
|
|
425
|
-
"""
|
|
426
|
-
if total_memories == 0 or not evidence_memory_ids:
|
|
427
|
-
return 0.0
|
|
428
|
-
|
|
429
|
-
import math
|
|
430
|
-
evidence_count = len(evidence_memory_ids)
|
|
431
|
-
|
|
432
|
-
# Pattern-specific Beta priors (alpha, beta)
|
|
433
|
-
PRIORS = {
|
|
434
|
-
'preference': (1.0, 4.0),
|
|
435
|
-
'style': (1.0, 5.0),
|
|
436
|
-
'terminology': (2.0, 3.0),
|
|
437
|
-
}
|
|
438
|
-
alpha, beta = PRIORS.get(pattern_type, (1.0, 4.0))
|
|
439
|
-
|
|
440
|
-
# Log-scaled competition: grows slowly with corpus size
|
|
441
|
-
# 10 memories -> 3.3, 60 -> 5.9, 500 -> 9.0, 5000 -> 12.3
|
|
442
|
-
competition = math.log2(max(2, total_memories))
|
|
443
|
-
|
|
444
|
-
# MACLA-inspired Beta posterior with log competition
|
|
445
|
-
posterior_mean = (alpha + evidence_count) / (alpha + beta + evidence_count + competition)
|
|
446
|
-
|
|
447
|
-
# Recency adjustment (mild: 1.0 to 1.15)
|
|
448
|
-
recency_bonus = self._calculate_recency_bonus(evidence_memory_ids)
|
|
449
|
-
recency_factor = 1.0 + min(0.15, 0.075 * (recency_bonus - 1.0) / 0.2) if recency_bonus > 1.0 else 1.0
|
|
450
|
-
|
|
451
|
-
# Temporal spread adjustment (0.9 to 1.1)
|
|
452
|
-
distribution_factor = self._calculate_distribution_factor(evidence_memory_ids)
|
|
453
|
-
|
|
454
|
-
# Final confidence
|
|
455
|
-
confidence = posterior_mean * recency_factor * distribution_factor
|
|
456
|
-
|
|
457
|
-
return min(0.95, round(confidence, 3))
|
|
458
|
-
|
|
459
|
-
def _calculate_recency_bonus(self, memory_ids: List[int]) -> float:
|
|
460
|
-
"""Give bonus to patterns with recent evidence."""
|
|
461
|
-
conn = sqlite3.connect(self.db_path)
|
|
462
|
-
cursor = conn.cursor()
|
|
463
|
-
|
|
464
|
-
# Get timestamps
|
|
465
|
-
placeholders = ','.join('?' * len(memory_ids))
|
|
466
|
-
cursor.execute(f'''
|
|
467
|
-
SELECT created_at FROM memories
|
|
468
|
-
WHERE id IN ({placeholders})
|
|
469
|
-
ORDER BY created_at DESC
|
|
470
|
-
''', memory_ids)
|
|
471
|
-
|
|
472
|
-
timestamps = cursor.fetchall()
|
|
473
|
-
conn.close()
|
|
474
|
-
|
|
475
|
-
if not timestamps:
|
|
476
|
-
return 1.0
|
|
477
|
-
|
|
478
|
-
# Check if any memories are from last 30 days
|
|
479
|
-
recent_count = 0
|
|
480
|
-
cutoff = datetime.now() - timedelta(days=30)
|
|
481
|
-
|
|
482
|
-
for ts_tuple in timestamps:
|
|
483
|
-
ts_str = ts_tuple[0]
|
|
484
|
-
try:
|
|
485
|
-
ts = datetime.fromisoformat(ts_str.replace(' ', 'T'))
|
|
486
|
-
if ts > cutoff:
|
|
487
|
-
recent_count += 1
|
|
488
|
-
except (ValueError, AttributeError):
|
|
489
|
-
pass
|
|
490
|
-
|
|
491
|
-
# Bonus if >50% are recent
|
|
492
|
-
if len(timestamps) > 0 and recent_count / len(timestamps) > 0.5:
|
|
493
|
-
return 1.2
|
|
494
|
-
else:
|
|
495
|
-
return 1.0
|
|
496
|
-
|
|
497
|
-
def _calculate_distribution_factor(self, memory_ids: List[int]) -> float:
|
|
498
|
-
"""Better confidence if memories are distributed over time, not just one session."""
|
|
499
|
-
if len(memory_ids) < 3:
|
|
500
|
-
return 0.8 # Penalize low sample size
|
|
501
|
-
|
|
502
|
-
conn = sqlite3.connect(self.db_path)
|
|
503
|
-
cursor = conn.cursor()
|
|
504
|
-
|
|
505
|
-
placeholders = ','.join('?' * len(memory_ids))
|
|
506
|
-
cursor.execute(f'''
|
|
507
|
-
SELECT created_at FROM memories
|
|
508
|
-
WHERE id IN ({placeholders})
|
|
509
|
-
ORDER BY created_at
|
|
510
|
-
''', memory_ids)
|
|
511
|
-
|
|
512
|
-
timestamps = [row[0] for row in cursor.fetchall()]
|
|
513
|
-
conn.close()
|
|
514
|
-
|
|
515
|
-
if len(timestamps) < 2:
|
|
516
|
-
return 0.8
|
|
517
|
-
|
|
518
|
-
try:
|
|
519
|
-
# Parse timestamps
|
|
520
|
-
dates = []
|
|
521
|
-
for ts_str in timestamps:
|
|
522
|
-
try:
|
|
523
|
-
ts = datetime.fromisoformat(ts_str.replace(' ', 'T'))
|
|
524
|
-
dates.append(ts)
|
|
525
|
-
except (ValueError, AttributeError):
|
|
526
|
-
pass
|
|
527
|
-
|
|
528
|
-
if len(dates) < 2:
|
|
529
|
-
return 0.8
|
|
530
|
-
|
|
531
|
-
# Calculate time span
|
|
532
|
-
time_span = (dates[-1] - dates[0]).days
|
|
533
|
-
|
|
534
|
-
# If memories span multiple days, higher confidence
|
|
535
|
-
if time_span > 7:
|
|
536
|
-
return 1.1
|
|
537
|
-
elif time_span > 1:
|
|
538
|
-
return 1.0
|
|
539
|
-
else:
|
|
540
|
-
return 0.9 # All on same day = might be one-off
|
|
541
|
-
|
|
542
|
-
except Exception:
|
|
543
|
-
return 1.0
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
class PatternStore:
|
|
547
|
-
"""Handles pattern storage and retrieval."""
|
|
548
|
-
|
|
549
|
-
def __init__(self, db_path: Path):
|
|
550
|
-
self.db_path = db_path
|
|
551
|
-
self._init_tables()
|
|
552
|
-
|
|
553
|
-
def _init_tables(self):
|
|
554
|
-
"""Initialize pattern tables if they don't exist, or recreate if schema is incomplete."""
|
|
555
|
-
conn = sqlite3.connect(self.db_path)
|
|
556
|
-
cursor = conn.cursor()
|
|
557
|
-
|
|
558
|
-
# Check if existing tables have correct schema
|
|
559
|
-
for table_name, required_cols in [
|
|
560
|
-
('identity_patterns', {'pattern_type', 'key', 'value', 'confidence'}),
|
|
561
|
-
('pattern_examples', {'pattern_id', 'memory_id'}),
|
|
562
|
-
]:
|
|
563
|
-
cursor.execute(f"PRAGMA table_info({table_name})")
|
|
564
|
-
existing_cols = {row[1] for row in cursor.fetchall()}
|
|
565
|
-
if existing_cols and not required_cols.issubset(existing_cols):
|
|
566
|
-
logger.warning(f"Dropping incomplete {table_name} table (missing: {required_cols - existing_cols})")
|
|
567
|
-
cursor.execute(f'DROP TABLE IF EXISTS {table_name}')
|
|
568
|
-
|
|
569
|
-
# Identity patterns table
|
|
570
|
-
cursor.execute('''
|
|
571
|
-
CREATE TABLE IF NOT EXISTS identity_patterns (
|
|
572
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
573
|
-
pattern_type TEXT NOT NULL,
|
|
574
|
-
key TEXT NOT NULL,
|
|
575
|
-
value TEXT NOT NULL,
|
|
576
|
-
confidence REAL DEFAULT 0.5,
|
|
577
|
-
evidence_count INTEGER DEFAULT 1,
|
|
578
|
-
memory_ids TEXT,
|
|
579
|
-
category TEXT,
|
|
580
|
-
profile TEXT DEFAULT 'default',
|
|
581
|
-
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
582
|
-
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
583
|
-
UNIQUE(pattern_type, key, category, profile)
|
|
584
|
-
)
|
|
585
|
-
''')
|
|
586
|
-
|
|
587
|
-
# Add profile column if upgrading from older schema
|
|
588
|
-
try:
|
|
589
|
-
cursor.execute('ALTER TABLE identity_patterns ADD COLUMN profile TEXT DEFAULT "default"')
|
|
590
|
-
except sqlite3.OperationalError:
|
|
591
|
-
pass # Column already exists
|
|
592
|
-
|
|
593
|
-
# Pattern examples table
|
|
594
|
-
cursor.execute('''
|
|
595
|
-
CREATE TABLE IF NOT EXISTS pattern_examples (
|
|
596
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
597
|
-
pattern_id INTEGER NOT NULL,
|
|
598
|
-
memory_id INTEGER NOT NULL,
|
|
599
|
-
example_text TEXT,
|
|
600
|
-
FOREIGN KEY (pattern_id) REFERENCES identity_patterns(id) ON DELETE CASCADE,
|
|
601
|
-
FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
|
|
602
|
-
)
|
|
603
|
-
''')
|
|
604
|
-
|
|
605
|
-
# Indexes
|
|
606
|
-
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pattern_type ON identity_patterns(pattern_type)')
|
|
607
|
-
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pattern_confidence ON identity_patterns(confidence)')
|
|
608
|
-
cursor.execute('CREATE INDEX IF NOT EXISTS idx_pattern_profile ON identity_patterns(profile)')
|
|
609
|
-
|
|
610
|
-
conn.commit()
|
|
611
|
-
conn.close()
|
|
612
|
-
|
|
613
|
-
def save_pattern(self, pattern: Dict[str, Any]) -> int:
|
|
614
|
-
"""Save or update a pattern (scoped by profile)."""
|
|
615
|
-
conn = sqlite3.connect(self.db_path)
|
|
616
|
-
cursor = conn.cursor()
|
|
617
|
-
profile = pattern.get('profile', 'default')
|
|
618
|
-
|
|
619
|
-
try:
|
|
620
|
-
# Check if pattern exists for this profile
|
|
621
|
-
cursor.execute('''
|
|
622
|
-
SELECT id FROM identity_patterns
|
|
623
|
-
WHERE pattern_type = ? AND key = ? AND category = ? AND profile = ?
|
|
624
|
-
''', (pattern['pattern_type'], pattern['key'], pattern['category'], profile))
|
|
625
|
-
|
|
626
|
-
existing = cursor.fetchone()
|
|
627
|
-
|
|
628
|
-
memory_ids_json = json.dumps(pattern['memory_ids'])
|
|
629
|
-
|
|
630
|
-
if existing:
|
|
631
|
-
# Update existing pattern
|
|
632
|
-
pattern_id = existing[0]
|
|
633
|
-
cursor.execute('''
|
|
634
|
-
UPDATE identity_patterns
|
|
635
|
-
SET value = ?, confidence = ?, evidence_count = ?,
|
|
636
|
-
memory_ids = ?, updated_at = CURRENT_TIMESTAMP
|
|
637
|
-
WHERE id = ?
|
|
638
|
-
''', (
|
|
639
|
-
pattern['value'],
|
|
640
|
-
pattern['confidence'],
|
|
641
|
-
pattern['evidence_count'],
|
|
642
|
-
memory_ids_json,
|
|
643
|
-
pattern_id
|
|
644
|
-
))
|
|
645
|
-
else:
|
|
646
|
-
# Insert new pattern
|
|
647
|
-
cursor.execute('''
|
|
648
|
-
INSERT INTO identity_patterns
|
|
649
|
-
(pattern_type, key, value, confidence, evidence_count, memory_ids, category, profile)
|
|
650
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
651
|
-
''', (
|
|
652
|
-
pattern['pattern_type'],
|
|
653
|
-
pattern['key'],
|
|
654
|
-
pattern['value'],
|
|
655
|
-
pattern['confidence'],
|
|
656
|
-
pattern['evidence_count'],
|
|
657
|
-
memory_ids_json,
|
|
658
|
-
pattern['category'],
|
|
659
|
-
profile
|
|
660
|
-
))
|
|
661
|
-
pattern_id = cursor.lastrowid
|
|
662
|
-
|
|
663
|
-
# Save examples
|
|
664
|
-
self._save_pattern_examples(cursor, pattern_id, pattern['memory_ids'], pattern['key'])
|
|
665
|
-
|
|
666
|
-
conn.commit()
|
|
667
|
-
return pattern_id
|
|
668
|
-
|
|
669
|
-
finally:
|
|
670
|
-
conn.close()
|
|
671
|
-
|
|
672
|
-
def _save_pattern_examples(self, cursor, pattern_id: int, memory_ids: List[int], key: str):
|
|
673
|
-
"""Save representative examples for pattern."""
|
|
674
|
-
# Clear old examples
|
|
675
|
-
cursor.execute('DELETE FROM pattern_examples WHERE pattern_id = ?', (pattern_id,))
|
|
676
|
-
|
|
677
|
-
# Save top 3 examples
|
|
678
|
-
for memory_id in memory_ids[:3]:
|
|
679
|
-
cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
|
|
680
|
-
row = cursor.fetchone()
|
|
681
|
-
|
|
682
|
-
if row:
|
|
683
|
-
content = row[0]
|
|
684
|
-
excerpt = self._extract_relevant_excerpt(content, key)
|
|
685
|
-
|
|
686
|
-
cursor.execute('''
|
|
687
|
-
INSERT INTO pattern_examples (pattern_id, memory_id, example_text)
|
|
688
|
-
VALUES (?, ?, ?)
|
|
689
|
-
''', (pattern_id, memory_id, excerpt))
|
|
690
|
-
|
|
691
|
-
def _extract_relevant_excerpt(self, content: str, key: str) -> str:
|
|
692
|
-
"""Extract 150-char excerpt showing pattern."""
|
|
693
|
-
# Find first mention of key term
|
|
694
|
-
key_lower = key.lower().replace('_', ' ')
|
|
695
|
-
idx = content.lower().find(key_lower)
|
|
696
|
-
|
|
697
|
-
if idx >= 0:
|
|
698
|
-
start = max(0, idx - 50)
|
|
699
|
-
end = min(len(content), idx + 100)
|
|
700
|
-
excerpt = content[start:end]
|
|
701
|
-
return excerpt if len(excerpt) <= 150 else excerpt[:150] + '...'
|
|
702
|
-
|
|
703
|
-
# Fallback: first 150 chars
|
|
704
|
-
return content[:150] + ('...' if len(content) > 150 else '')
|
|
705
|
-
|
|
706
|
-
def get_patterns(self, min_confidence: float = 0.7, pattern_type: Optional[str] = None,
|
|
707
|
-
profile: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
708
|
-
"""Get patterns above confidence threshold, optionally filtered by profile."""
|
|
709
|
-
conn = sqlite3.connect(self.db_path)
|
|
710
|
-
cursor = conn.cursor()
|
|
711
|
-
|
|
712
|
-
# Build query with optional filters
|
|
713
|
-
conditions = ['confidence >= ?']
|
|
714
|
-
params = [min_confidence]
|
|
715
|
-
|
|
716
|
-
if pattern_type:
|
|
717
|
-
conditions.append('pattern_type = ?')
|
|
718
|
-
params.append(pattern_type)
|
|
719
|
-
|
|
720
|
-
if profile:
|
|
721
|
-
conditions.append('profile = ?')
|
|
722
|
-
params.append(profile)
|
|
723
|
-
|
|
724
|
-
where_clause = ' AND '.join(conditions)
|
|
725
|
-
cursor.execute(f'''
|
|
726
|
-
SELECT id, pattern_type, key, value, confidence, evidence_count,
|
|
727
|
-
updated_at, created_at, category
|
|
728
|
-
FROM identity_patterns
|
|
729
|
-
WHERE {where_clause}
|
|
730
|
-
ORDER BY confidence DESC, evidence_count DESC
|
|
731
|
-
''', params)
|
|
732
|
-
|
|
733
|
-
patterns = []
|
|
734
|
-
for row in cursor.fetchall():
|
|
735
|
-
patterns.append({
|
|
736
|
-
'id': row[0],
|
|
737
|
-
'pattern_type': row[1],
|
|
738
|
-
'key': row[2],
|
|
739
|
-
'value': row[3],
|
|
740
|
-
'confidence': row[4],
|
|
741
|
-
'evidence_count': row[5],
|
|
742
|
-
'frequency': row[5],
|
|
743
|
-
'last_seen': row[6],
|
|
744
|
-
'created_at': row[7],
|
|
745
|
-
'category': row[8]
|
|
746
|
-
})
|
|
747
|
-
|
|
748
|
-
conn.close()
|
|
749
|
-
return patterns
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
class PatternLearner:
|
|
753
|
-
"""Main pattern learning orchestrator."""
|
|
754
|
-
|
|
755
|
-
def __init__(self, db_path: Path = DB_PATH):
|
|
756
|
-
self.db_path = db_path
|
|
757
|
-
self.frequency_analyzer = FrequencyAnalyzer(db_path)
|
|
758
|
-
self.context_analyzer = ContextAnalyzer(db_path)
|
|
759
|
-
self.terminology_learner = TerminologyLearner(db_path)
|
|
760
|
-
self.confidence_scorer = ConfidenceScorer(db_path)
|
|
761
|
-
self.pattern_store = PatternStore(db_path)
|
|
762
|
-
|
|
763
|
-
def _get_active_profile(self) -> str:
|
|
764
|
-
"""Get the currently active profile name from config."""
|
|
765
|
-
config_file = MEMORY_DIR / "profiles.json"
|
|
766
|
-
if config_file.exists():
|
|
767
|
-
try:
|
|
768
|
-
with open(config_file, 'r') as f:
|
|
769
|
-
config = json.load(f)
|
|
770
|
-
return config.get('active_profile', 'default')
|
|
771
|
-
except (json.JSONDecodeError, IOError):
|
|
772
|
-
pass
|
|
773
|
-
return 'default'
|
|
774
|
-
|
|
775
|
-
def weekly_pattern_update(self) -> Dict[str, int]:
|
|
776
|
-
"""Full pattern analysis of all memories for active profile. Run this weekly."""
|
|
777
|
-
active_profile = self._get_active_profile()
|
|
778
|
-
print(f"Starting weekly pattern update for profile: {active_profile}...")
|
|
779
|
-
|
|
780
|
-
# Get memory IDs for active profile only
|
|
781
|
-
conn = sqlite3.connect(self.db_path)
|
|
782
|
-
cursor = conn.cursor()
|
|
783
|
-
cursor.execute('SELECT id FROM memories WHERE profile = ? ORDER BY created_at',
|
|
784
|
-
(active_profile,))
|
|
785
|
-
all_memory_ids = [row[0] for row in cursor.fetchall()]
|
|
786
|
-
total_memories = len(all_memory_ids)
|
|
787
|
-
conn.close()
|
|
788
|
-
|
|
789
|
-
if total_memories == 0:
|
|
790
|
-
print(f"No memories found for profile '{active_profile}'. Add memories first.")
|
|
791
|
-
return {'preferences': 0, 'styles': 0, 'terminology': 0}
|
|
792
|
-
|
|
793
|
-
print(f"Analyzing {total_memories} memories for profile '{active_profile}'...")
|
|
794
|
-
|
|
795
|
-
# Run all analyzers
|
|
796
|
-
preferences = self.frequency_analyzer.analyze_preferences(all_memory_ids)
|
|
797
|
-
print(f" Found {len(preferences)} preference patterns")
|
|
798
|
-
|
|
799
|
-
styles = self.context_analyzer.analyze_style(all_memory_ids)
|
|
800
|
-
print(f" Found {len(styles)} style patterns")
|
|
801
|
-
|
|
802
|
-
terms = self.terminology_learner.learn_terminology(all_memory_ids)
|
|
803
|
-
print(f" Found {len(terms)} terminology patterns")
|
|
804
|
-
|
|
805
|
-
# Recalculate confidence scores and save all patterns (tagged with profile)
|
|
806
|
-
counts = {'preferences': 0, 'styles': 0, 'terminology': 0}
|
|
807
|
-
|
|
808
|
-
for pattern in preferences.values():
|
|
809
|
-
confidence = self.confidence_scorer.calculate_confidence(
|
|
810
|
-
pattern['pattern_type'],
|
|
811
|
-
pattern['key'],
|
|
812
|
-
pattern['value'],
|
|
813
|
-
pattern['memory_ids'],
|
|
814
|
-
total_memories
|
|
815
|
-
)
|
|
816
|
-
pattern['confidence'] = round(confidence, 2)
|
|
817
|
-
pattern['profile'] = active_profile
|
|
818
|
-
self.pattern_store.save_pattern(pattern)
|
|
819
|
-
counts['preferences'] += 1
|
|
820
|
-
|
|
821
|
-
for pattern in styles.values():
|
|
822
|
-
confidence = self.confidence_scorer.calculate_confidence(
|
|
823
|
-
pattern['pattern_type'],
|
|
824
|
-
pattern['key'],
|
|
825
|
-
pattern['value'],
|
|
826
|
-
pattern['memory_ids'],
|
|
827
|
-
total_memories
|
|
828
|
-
)
|
|
829
|
-
pattern['confidence'] = round(confidence, 2)
|
|
830
|
-
pattern['profile'] = active_profile
|
|
831
|
-
self.pattern_store.save_pattern(pattern)
|
|
832
|
-
counts['styles'] += 1
|
|
833
|
-
|
|
834
|
-
for pattern in terms.values():
|
|
835
|
-
confidence = self.confidence_scorer.calculate_confidence(
|
|
836
|
-
pattern['pattern_type'],
|
|
837
|
-
pattern['key'],
|
|
838
|
-
pattern['value'],
|
|
839
|
-
pattern['memory_ids'],
|
|
840
|
-
total_memories
|
|
841
|
-
)
|
|
842
|
-
pattern['confidence'] = round(confidence, 2)
|
|
843
|
-
pattern['profile'] = active_profile
|
|
844
|
-
self.pattern_store.save_pattern(pattern)
|
|
845
|
-
counts['terminology'] += 1
|
|
846
|
-
|
|
847
|
-
print(f"\nPattern update complete:")
|
|
848
|
-
print(f" {counts['preferences']} preferences")
|
|
849
|
-
print(f" {counts['styles']} styles")
|
|
850
|
-
print(f" {counts['terminology']} terminology")
|
|
851
|
-
|
|
852
|
-
return counts
|
|
853
|
-
|
|
854
|
-
def on_new_memory(self, memory_id: int):
|
|
855
|
-
"""Incremental update when new memory is added."""
|
|
856
|
-
active_profile = self._get_active_profile()
|
|
857
|
-
conn = sqlite3.connect(self.db_path)
|
|
858
|
-
cursor = conn.cursor()
|
|
859
|
-
cursor.execute('SELECT COUNT(*) FROM memories WHERE profile = ?',
|
|
860
|
-
(active_profile,))
|
|
861
|
-
total = cursor.fetchone()[0]
|
|
862
|
-
conn.close()
|
|
863
|
-
|
|
864
|
-
# Only do incremental updates if we have many memories (>50)
|
|
865
|
-
if total > 50:
|
|
866
|
-
# TODO: Implement true incremental update
|
|
867
|
-
print(f"New memory #{memory_id} added. Run weekly_pattern_update() to update patterns.")
|
|
868
|
-
else:
|
|
869
|
-
# For small memory counts, just do full update
|
|
870
|
-
self.weekly_pattern_update()
|
|
871
|
-
|
|
872
|
-
def get_patterns(self, min_confidence: float = 0.7) -> List[Dict[str, Any]]:
|
|
873
|
-
"""Query patterns above confidence threshold for active profile."""
|
|
874
|
-
active_profile = self._get_active_profile()
|
|
875
|
-
return self.pattern_store.get_patterns(min_confidence, profile=active_profile)
|
|
876
|
-
|
|
877
|
-
def get_identity_context(self, min_confidence: float = 0.7) -> str:
|
|
878
|
-
"""Format patterns for Claude context injection."""
|
|
879
|
-
patterns = self.get_patterns(min_confidence)
|
|
880
|
-
|
|
881
|
-
if not patterns:
|
|
882
|
-
return "## Working with User - Learned Patterns\n\nNo patterns learned yet. Add more memories to build your profile."
|
|
883
|
-
|
|
884
|
-
# Group by pattern type
|
|
885
|
-
sections = {
|
|
886
|
-
'preference': [],
|
|
887
|
-
'style': [],
|
|
888
|
-
'terminology': []
|
|
889
|
-
}
|
|
890
|
-
|
|
891
|
-
for p in patterns:
|
|
892
|
-
sections[p['pattern_type']].append(
|
|
893
|
-
f"- **{p['key'].replace('_', ' ').title()}:** {p['value']} "
|
|
894
|
-
f"(confidence: {p['confidence']:.0%}, {p['evidence_count']} examples)"
|
|
895
|
-
)
|
|
896
|
-
|
|
897
|
-
output = "## Working with User - Learned Patterns\n\n"
|
|
898
|
-
|
|
899
|
-
if sections['preference']:
|
|
900
|
-
output += "**Technology Preferences:**\n" + '\n'.join(sections['preference']) + '\n\n'
|
|
901
|
-
|
|
902
|
-
if sections['style']:
|
|
903
|
-
output += "**Coding Style:**\n" + '\n'.join(sections['style']) + '\n\n'
|
|
904
|
-
|
|
905
|
-
if sections['terminology']:
|
|
906
|
-
output += "**Terminology:**\n" + '\n'.join(sections['terminology']) + '\n'
|
|
907
|
-
|
|
908
|
-
return output
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
# CLI Interface
|
|
912
|
-
if __name__ == "__main__":
|
|
913
|
-
import sys
|
|
914
|
-
|
|
915
|
-
learner = PatternLearner()
|
|
916
|
-
|
|
917
|
-
if len(sys.argv) < 2:
|
|
918
|
-
print("Pattern Learner - Identity Profile Extraction")
|
|
919
|
-
print("\nUsage:")
|
|
920
|
-
print(" python pattern_learner.py update # Full pattern update (weekly)")
|
|
921
|
-
print(" python pattern_learner.py list [min_conf] # List learned patterns (default: 0.7)")
|
|
922
|
-
print(" python pattern_learner.py context [min] # Get context for Claude")
|
|
923
|
-
print(" python pattern_learner.py stats # Pattern statistics")
|
|
924
|
-
sys.exit(0)
|
|
925
|
-
|
|
926
|
-
command = sys.argv[1]
|
|
927
|
-
|
|
928
|
-
if command == "update":
|
|
929
|
-
counts = learner.weekly_pattern_update()
|
|
930
|
-
print(f"\nTotal patterns learned: {sum(counts.values())}")
|
|
931
|
-
|
|
932
|
-
elif command == "list":
|
|
933
|
-
min_conf = float(sys.argv[2]) if len(sys.argv) > 2 else 0.7
|
|
934
|
-
patterns = learner.get_patterns(min_conf)
|
|
935
|
-
|
|
936
|
-
if not patterns:
|
|
937
|
-
print(f"No patterns found with confidence >= {min_conf:.0%}")
|
|
938
|
-
else:
|
|
939
|
-
print(f"\n{'Type':<15} {'Category':<12} {'Pattern':<30} {'Confidence':<12} {'Evidence':<10}")
|
|
940
|
-
print("-" * 95)
|
|
941
|
-
|
|
942
|
-
for p in patterns:
|
|
943
|
-
pattern_display = f"{p['key'].replace('_', ' ').title()}: {p['value']}"
|
|
944
|
-
if len(pattern_display) > 28:
|
|
945
|
-
pattern_display = pattern_display[:28] + "..."
|
|
946
|
-
|
|
947
|
-
print(f"{p['pattern_type']:<15} {p['category']:<12} {pattern_display:<30} "
|
|
948
|
-
f"{p['confidence']:>6.0%} {p['evidence_count']:<10}")
|
|
949
|
-
|
|
950
|
-
elif command == "context":
|
|
951
|
-
min_conf = float(sys.argv[2]) if len(sys.argv) > 2 else 0.7
|
|
952
|
-
context = learner.get_identity_context(min_conf)
|
|
953
|
-
print(context)
|
|
954
|
-
|
|
955
|
-
elif command == "stats":
|
|
956
|
-
patterns = learner.get_patterns(0.5) # Include all patterns
|
|
957
|
-
|
|
958
|
-
if not patterns:
|
|
959
|
-
print("No patterns learned yet.")
|
|
960
|
-
else:
|
|
961
|
-
by_type = Counter([p['pattern_type'] for p in patterns])
|
|
962
|
-
by_category = Counter([p['category'] for p in patterns])
|
|
963
|
-
|
|
964
|
-
avg_confidence = sum(p['confidence'] for p in patterns) / len(patterns)
|
|
965
|
-
high_conf = len([p for p in patterns if p['confidence'] >= 0.8])
|
|
966
|
-
|
|
967
|
-
print(f"\nPattern Statistics:")
|
|
968
|
-
print(f" Total patterns: {len(patterns)}")
|
|
969
|
-
print(f" Average confidence: {avg_confidence:.0%}")
|
|
970
|
-
print(f" High confidence (>=80%): {high_conf}")
|
|
971
|
-
print(f"\nBy Type:")
|
|
972
|
-
for ptype, count in by_type.most_common():
|
|
973
|
-
print(f" {ptype}: {count}")
|
|
974
|
-
print(f"\nBy Category:")
|
|
975
|
-
for cat, count in by_category.most_common():
|
|
976
|
-
print(f" {cat}: {count}")
|
|
977
|
-
|
|
978
|
-
else:
|
|
979
|
-
print(f"Unknown command: {command}")
|
|
980
|
-
sys.exit(1)
|
|
12
|
+
from patterns import ( # noqa: F401
|
|
13
|
+
FrequencyAnalyzer,
|
|
14
|
+
ContextAnalyzer,
|
|
15
|
+
TerminologyLearner,
|
|
16
|
+
ConfidenceScorer,
|
|
17
|
+
PatternStore,
|
|
18
|
+
PatternLearner,
|
|
19
|
+
SKLEARN_AVAILABLE,
|
|
20
|
+
MEMORY_DIR,
|
|
21
|
+
DB_PATH,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
'FrequencyAnalyzer',
|
|
26
|
+
'ContextAnalyzer',
|
|
27
|
+
'TerminologyLearner',
|
|
28
|
+
'ConfidenceScorer',
|
|
29
|
+
'PatternStore',
|
|
30
|
+
'PatternLearner',
|
|
31
|
+
'SKLEARN_AVAILABLE',
|
|
32
|
+
'MEMORY_DIR',
|
|
33
|
+
'DB_PATH',
|
|
34
|
+
]
|