superlocalmemory 2.7.6 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +120 -155
- package/README.md +115 -89
- package/api_server.py +2 -12
- package/docs/PATTERN-LEARNING.md +64 -199
- package/docs/example_graph_usage.py +4 -6
- package/install.sh +59 -0
- package/mcp_server.py +83 -7
- package/package.json +1 -8
- package/scripts/generate-thumbnails.py +3 -5
- package/skills/slm-build-graph/SKILL.md +1 -1
- package/skills/slm-list-recent/SKILL.md +1 -1
- package/skills/slm-recall/SKILL.md +1 -1
- package/skills/slm-remember/SKILL.md +1 -1
- package/skills/slm-show-patterns/SKILL.md +1 -1
- package/skills/slm-status/SKILL.md +1 -1
- package/skills/slm-switch-profile/SKILL.md +1 -1
- package/src/agent_registry.py +7 -18
- package/src/auth_middleware.py +3 -5
- package/src/auto_backup.py +3 -7
- package/src/behavioral/__init__.py +49 -0
- package/src/behavioral/behavioral_listener.py +203 -0
- package/src/behavioral/behavioral_patterns.py +275 -0
- package/src/behavioral/cross_project_transfer.py +206 -0
- package/src/behavioral/outcome_inference.py +194 -0
- package/src/behavioral/outcome_tracker.py +193 -0
- package/src/behavioral/tests/__init__.py +4 -0
- package/src/behavioral/tests/test_behavioral_integration.py +108 -0
- package/src/behavioral/tests/test_behavioral_patterns.py +150 -0
- package/src/behavioral/tests/test_cross_project_transfer.py +142 -0
- package/src/behavioral/tests/test_mcp_behavioral.py +139 -0
- package/src/behavioral/tests/test_mcp_report_outcome.py +117 -0
- package/src/behavioral/tests/test_outcome_inference.py +107 -0
- package/src/behavioral/tests/test_outcome_tracker.py +96 -0
- package/src/cache_manager.py +4 -6
- package/src/compliance/__init__.py +48 -0
- package/src/compliance/abac_engine.py +149 -0
- package/src/compliance/abac_middleware.py +116 -0
- package/src/compliance/audit_db.py +215 -0
- package/src/compliance/audit_logger.py +148 -0
- package/src/compliance/retention_manager.py +289 -0
- package/src/compliance/retention_scheduler.py +186 -0
- package/src/compliance/tests/__init__.py +4 -0
- package/src/compliance/tests/test_abac_enforcement.py +95 -0
- package/src/compliance/tests/test_abac_engine.py +124 -0
- package/src/compliance/tests/test_abac_mcp_integration.py +118 -0
- package/src/compliance/tests/test_audit_db.py +123 -0
- package/src/compliance/tests/test_audit_logger.py +98 -0
- package/src/compliance/tests/test_mcp_audit.py +128 -0
- package/src/compliance/tests/test_mcp_retention_policy.py +125 -0
- package/src/compliance/tests/test_retention_manager.py +131 -0
- package/src/compliance/tests/test_retention_scheduler.py +99 -0
- package/src/db_connection_manager.py +2 -12
- package/src/embedding_engine.py +61 -669
- package/src/embeddings/__init__.py +47 -0
- package/src/embeddings/cache.py +70 -0
- package/src/embeddings/cli.py +113 -0
- package/src/embeddings/constants.py +47 -0
- package/src/embeddings/database.py +91 -0
- package/src/embeddings/engine.py +247 -0
- package/src/embeddings/model_loader.py +145 -0
- package/src/event_bus.py +3 -13
- package/src/graph/__init__.py +36 -0
- package/src/graph/build_helpers.py +74 -0
- package/src/graph/cli.py +87 -0
- package/src/graph/cluster_builder.py +188 -0
- package/src/graph/cluster_summary.py +148 -0
- package/src/graph/constants.py +47 -0
- package/src/graph/edge_builder.py +162 -0
- package/src/graph/entity_extractor.py +95 -0
- package/src/graph/graph_core.py +226 -0
- package/src/graph/graph_search.py +231 -0
- package/src/graph/hierarchical.py +207 -0
- package/src/graph/schema.py +99 -0
- package/src/graph_engine.py +45 -1451
- package/src/hnsw_index.py +3 -7
- package/src/hybrid_search.py +36 -683
- package/src/learning/__init__.py +27 -12
- package/src/learning/adaptive_ranker.py +50 -12
- package/src/learning/cross_project_aggregator.py +2 -12
- package/src/learning/engagement_tracker.py +2 -12
- package/src/learning/feature_extractor.py +175 -43
- package/src/learning/feedback_collector.py +7 -12
- package/src/learning/learning_db.py +180 -12
- package/src/learning/project_context_manager.py +2 -12
- package/src/learning/source_quality_scorer.py +2 -12
- package/src/learning/synthetic_bootstrap.py +2 -12
- package/src/learning/tests/__init__.py +2 -0
- package/src/learning/tests/test_adaptive_ranker.py +2 -6
- package/src/learning/tests/test_adaptive_ranker_v28.py +60 -0
- package/src/learning/tests/test_aggregator.py +2 -6
- package/src/learning/tests/test_auto_retrain_v28.py +35 -0
- package/src/learning/tests/test_e2e_ranking_v28.py +82 -0
- package/src/learning/tests/test_feature_extractor_v28.py +93 -0
- package/src/learning/tests/test_feedback_collector.py +2 -6
- package/src/learning/tests/test_learning_db.py +2 -6
- package/src/learning/tests/test_learning_db_v28.py +110 -0
- package/src/learning/tests/test_learning_init_v28.py +48 -0
- package/src/learning/tests/test_outcome_signals.py +48 -0
- package/src/learning/tests/test_project_context.py +2 -6
- package/src/learning/tests/test_schema_migration.py +319 -0
- package/src/learning/tests/test_signal_inference.py +11 -13
- package/src/learning/tests/test_source_quality.py +2 -6
- package/src/learning/tests/test_synthetic_bootstrap.py +3 -7
- package/src/learning/tests/test_workflow_miner.py +2 -6
- package/src/learning/workflow_pattern_miner.py +2 -12
- package/src/lifecycle/__init__.py +54 -0
- package/src/lifecycle/bounded_growth.py +239 -0
- package/src/lifecycle/compaction_engine.py +226 -0
- package/src/lifecycle/lifecycle_engine.py +302 -0
- package/src/lifecycle/lifecycle_evaluator.py +225 -0
- package/src/lifecycle/lifecycle_scheduler.py +130 -0
- package/src/lifecycle/retention_policy.py +285 -0
- package/src/lifecycle/tests/__init__.py +4 -0
- package/src/lifecycle/tests/test_bounded_growth.py +193 -0
- package/src/lifecycle/tests/test_compaction.py +179 -0
- package/src/lifecycle/tests/test_lifecycle_engine.py +137 -0
- package/src/lifecycle/tests/test_lifecycle_evaluation.py +177 -0
- package/src/lifecycle/tests/test_lifecycle_scheduler.py +127 -0
- package/src/lifecycle/tests/test_lifecycle_search.py +109 -0
- package/src/lifecycle/tests/test_mcp_compact.py +149 -0
- package/src/lifecycle/tests/test_mcp_lifecycle_status.py +114 -0
- package/src/lifecycle/tests/test_retention_policy.py +162 -0
- package/src/mcp_tools_v28.py +280 -0
- package/src/memory-profiles.py +2 -12
- package/src/memory-reset.py +2 -12
- package/src/memory_compression.py +2 -12
- package/src/memory_store_v2.py +76 -20
- package/src/migrate_v1_to_v2.py +2 -12
- package/src/pattern_learner.py +29 -975
- package/src/patterns/__init__.py +24 -0
- package/src/patterns/analyzers.py +247 -0
- package/src/patterns/learner.py +267 -0
- package/src/patterns/scoring.py +167 -0
- package/src/patterns/store.py +223 -0
- package/src/patterns/terminology.py +138 -0
- package/src/provenance_tracker.py +4 -14
- package/src/query_optimizer.py +4 -6
- package/src/rate_limiter.py +2 -6
- package/src/search/__init__.py +20 -0
- package/src/search/cli.py +77 -0
- package/src/search/constants.py +26 -0
- package/src/search/engine.py +239 -0
- package/src/search/fusion.py +122 -0
- package/src/search/index_loader.py +112 -0
- package/src/search/methods.py +162 -0
- package/src/search_engine_v2.py +4 -6
- package/src/setup_validator.py +7 -13
- package/src/subscription_manager.py +2 -12
- package/src/tree/__init__.py +59 -0
- package/src/tree/builder.py +183 -0
- package/src/tree/nodes.py +196 -0
- package/src/tree/queries.py +252 -0
- package/src/tree/schema.py +76 -0
- package/src/tree_manager.py +10 -711
- package/src/trust/__init__.py +45 -0
- package/src/trust/constants.py +66 -0
- package/src/trust/queries.py +157 -0
- package/src/trust/schema.py +95 -0
- package/src/trust/scorer.py +299 -0
- package/src/trust/signals.py +95 -0
- package/src/trust_scorer.py +39 -697
- package/src/webhook_dispatcher.py +2 -12
- package/ui/app.js +1 -1
- package/ui/js/agents.js +1 -1
- package/ui_server.py +2 -14
- package/ATTRIBUTION.md +0 -140
- package/docs/ARCHITECTURE-V2.5.md +0 -190
- package/docs/GRAPH-ENGINE.md +0 -503
- package/docs/architecture-diagram.drawio +0 -405
- package/docs/plans/2026-02-13-benchmark-suite.md +0 -1349
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# SPDX-License-Identifier: MIT
|
|
2
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
3
|
+
"""SuperLocalMemory V2 - Pattern Learning Package
|
|
4
|
+
|
|
5
|
+
Re-exports all public classes for backward-compatible imports:
|
|
6
|
+
from patterns import PatternLearner, FrequencyAnalyzer, ...
|
|
7
|
+
"""
|
|
8
|
+
from .analyzers import FrequencyAnalyzer, ContextAnalyzer
|
|
9
|
+
from .terminology import TerminologyLearner
|
|
10
|
+
from .scoring import ConfidenceScorer
|
|
11
|
+
from .store import PatternStore
|
|
12
|
+
from .learner import PatternLearner, SKLEARN_AVAILABLE, MEMORY_DIR, DB_PATH
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
'FrequencyAnalyzer',
|
|
16
|
+
'ContextAnalyzer',
|
|
17
|
+
'TerminologyLearner',
|
|
18
|
+
'ConfidenceScorer',
|
|
19
|
+
'PatternStore',
|
|
20
|
+
'PatternLearner',
|
|
21
|
+
'SKLEARN_AVAILABLE',
|
|
22
|
+
'MEMORY_DIR',
|
|
23
|
+
'DB_PATH',
|
|
24
|
+
]
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Pattern Analyzers - Frequency and Context Analysis
|
|
6
|
+
|
|
7
|
+
Technology preference detection via frequency counting,
|
|
8
|
+
and coding style pattern detection from context.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import sqlite3
|
|
12
|
+
import re
|
|
13
|
+
import logging
|
|
14
|
+
from typing import Dict, List, Any
|
|
15
|
+
from collections import Counter
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class FrequencyAnalyzer:
|
|
22
|
+
"""Analyzes technology and tool preferences via frequency counting."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, db_path: Path):
|
|
25
|
+
self.db_path = db_path
|
|
26
|
+
|
|
27
|
+
# Predefined technology categories
|
|
28
|
+
self.tech_categories = {
|
|
29
|
+
'frontend_framework': ['react', 'nextjs', 'next.js', 'vue', 'angular', 'svelte', 'solid'],
|
|
30
|
+
'backend_framework': ['express', 'fastapi', 'django', 'flask', 'nestjs', 'spring', 'rails'],
|
|
31
|
+
'database': ['postgres', 'postgresql', 'mysql', 'mongodb', 'redis', 'dynamodb', 'sqlite'],
|
|
32
|
+
'state_management': ['redux', 'context', 'zustand', 'mobx', 'recoil', 'jotai'],
|
|
33
|
+
'styling': ['tailwind', 'css modules', 'styled-components', 'emotion', 'sass', 'less'],
|
|
34
|
+
'language': ['python', 'javascript', 'typescript', 'go', 'rust', 'java', 'c++'],
|
|
35
|
+
'deployment': ['docker', 'kubernetes', 'vercel', 'netlify', 'aws', 'gcp', 'azure'],
|
|
36
|
+
'testing': ['jest', 'pytest', 'vitest', 'mocha', 'cypress', 'playwright'],
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
def analyze_preferences(self, memory_ids: List[int]) -> Dict[str, Dict[str, Any]]:
|
|
40
|
+
"""Analyze technology preferences across memories."""
|
|
41
|
+
patterns = {}
|
|
42
|
+
|
|
43
|
+
conn = sqlite3.connect(self.db_path)
|
|
44
|
+
cursor = conn.cursor()
|
|
45
|
+
|
|
46
|
+
for category, keywords in self.tech_categories.items():
|
|
47
|
+
keyword_counts = Counter()
|
|
48
|
+
evidence_memories = {} # {keyword: [memory_ids]}
|
|
49
|
+
|
|
50
|
+
for memory_id in memory_ids:
|
|
51
|
+
cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
|
|
52
|
+
row = cursor.fetchone()
|
|
53
|
+
|
|
54
|
+
if not row:
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
content = row[0].lower()
|
|
58
|
+
|
|
59
|
+
for keyword in keywords:
|
|
60
|
+
# Count occurrences with word boundaries
|
|
61
|
+
pattern = r'\b' + re.escape(keyword.replace('.', r'\.')) + r'\b'
|
|
62
|
+
matches = re.findall(pattern, content, re.IGNORECASE)
|
|
63
|
+
count = len(matches)
|
|
64
|
+
|
|
65
|
+
if count > 0:
|
|
66
|
+
keyword_counts[keyword] += count
|
|
67
|
+
|
|
68
|
+
if keyword not in evidence_memories:
|
|
69
|
+
evidence_memories[keyword] = []
|
|
70
|
+
evidence_memories[keyword].append(memory_id)
|
|
71
|
+
|
|
72
|
+
# Determine preference (most mentioned)
|
|
73
|
+
if keyword_counts:
|
|
74
|
+
top_keyword = keyword_counts.most_common(1)[0][0]
|
|
75
|
+
total_mentions = sum(keyword_counts.values())
|
|
76
|
+
top_count = keyword_counts[top_keyword]
|
|
77
|
+
|
|
78
|
+
# Calculate confidence (% of mentions)
|
|
79
|
+
confidence = top_count / total_mentions if total_mentions > 0 else 0
|
|
80
|
+
|
|
81
|
+
# Only create pattern if confidence > 0.6 and at least 3 mentions
|
|
82
|
+
if confidence > 0.6 and top_count >= 3:
|
|
83
|
+
value = self._format_preference(top_keyword, keyword_counts)
|
|
84
|
+
evidence_list = list(set(evidence_memories.get(top_keyword, [])))
|
|
85
|
+
|
|
86
|
+
patterns[category] = {
|
|
87
|
+
'pattern_type': 'preference',
|
|
88
|
+
'key': category,
|
|
89
|
+
'value': value,
|
|
90
|
+
'confidence': round(confidence, 2),
|
|
91
|
+
'evidence_count': len(evidence_list),
|
|
92
|
+
'memory_ids': evidence_list,
|
|
93
|
+
'category': self._categorize_pattern(category)
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
conn.close()
|
|
97
|
+
return patterns
|
|
98
|
+
|
|
99
|
+
def _format_preference(self, top_keyword: str, all_counts: Counter) -> str:
|
|
100
|
+
"""Format preference value (e.g., 'Next.js over React')."""
|
|
101
|
+
# Normalize keyword for display
|
|
102
|
+
display_map = {
|
|
103
|
+
'nextjs': 'Next.js',
|
|
104
|
+
'next.js': 'Next.js',
|
|
105
|
+
'postgres': 'PostgreSQL',
|
|
106
|
+
'postgresql': 'PostgreSQL',
|
|
107
|
+
'fastapi': 'FastAPI',
|
|
108
|
+
'nestjs': 'NestJS',
|
|
109
|
+
'mongodb': 'MongoDB',
|
|
110
|
+
'redis': 'Redis',
|
|
111
|
+
'dynamodb': 'DynamoDB',
|
|
112
|
+
'tailwind': 'Tailwind CSS',
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
top_display = display_map.get(top_keyword.lower(), top_keyword.title())
|
|
116
|
+
|
|
117
|
+
if len(all_counts) > 1:
|
|
118
|
+
second = all_counts.most_common(2)[1]
|
|
119
|
+
second_keyword = second[0]
|
|
120
|
+
second_display = display_map.get(second_keyword.lower(), second_keyword.title())
|
|
121
|
+
|
|
122
|
+
# Only show comparison if second choice has significant mentions
|
|
123
|
+
if second[1] / all_counts[top_keyword] > 0.3:
|
|
124
|
+
return f"{top_display} over {second_display}"
|
|
125
|
+
|
|
126
|
+
return top_display
|
|
127
|
+
|
|
128
|
+
def _categorize_pattern(self, tech_category: str) -> str:
|
|
129
|
+
"""Map tech category to high-level category."""
|
|
130
|
+
category_map = {
|
|
131
|
+
'frontend_framework': 'frontend',
|
|
132
|
+
'state_management': 'frontend',
|
|
133
|
+
'styling': 'frontend',
|
|
134
|
+
'backend_framework': 'backend',
|
|
135
|
+
'database': 'backend',
|
|
136
|
+
'language': 'general',
|
|
137
|
+
'deployment': 'devops',
|
|
138
|
+
'testing': 'general',
|
|
139
|
+
}
|
|
140
|
+
return category_map.get(tech_category, 'general')
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class ContextAnalyzer:
|
|
144
|
+
"""Analyzes coding style patterns from context."""
|
|
145
|
+
|
|
146
|
+
def __init__(self, db_path: Path):
|
|
147
|
+
self.db_path = db_path
|
|
148
|
+
|
|
149
|
+
# Style pattern detection rules
|
|
150
|
+
self.style_indicators = {
|
|
151
|
+
'optimization_priority': {
|
|
152
|
+
'performance': ['optimize', 'faster', 'performance', 'speed', 'latency', 'efficient', 'cache'],
|
|
153
|
+
'readability': ['readable', 'clean', 'maintainable', 'clear', 'simple', 'understandable']
|
|
154
|
+
},
|
|
155
|
+
'error_handling': {
|
|
156
|
+
'explicit': ['error boundary', 'explicit', 'throw', 'handle error', 'try catch', 'error handling'],
|
|
157
|
+
'permissive': ['ignore', 'suppress', 'skip error', 'optional']
|
|
158
|
+
},
|
|
159
|
+
'testing_approach': {
|
|
160
|
+
'comprehensive': ['test coverage', 'unit test', 'integration test', 'e2e test', 'test suite'],
|
|
161
|
+
'minimal': ['manual test', 'skip test', 'no tests']
|
|
162
|
+
},
|
|
163
|
+
'code_organization': {
|
|
164
|
+
'modular': ['separate', 'module', 'component', 'split', 'refactor', 'extract'],
|
|
165
|
+
'monolithic': ['single file', 'one place', 'combined']
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
def analyze_style(self, memory_ids: List[int]) -> Dict[str, Dict[str, Any]]:
|
|
170
|
+
"""Detect stylistic patterns from context."""
|
|
171
|
+
patterns = {}
|
|
172
|
+
|
|
173
|
+
conn = sqlite3.connect(self.db_path)
|
|
174
|
+
cursor = conn.cursor()
|
|
175
|
+
|
|
176
|
+
for pattern_key, indicators in self.style_indicators.items():
|
|
177
|
+
indicator_counts = Counter()
|
|
178
|
+
evidence_memories = {} # {style_type: [memory_ids]}
|
|
179
|
+
|
|
180
|
+
for memory_id in memory_ids:
|
|
181
|
+
cursor.execute('SELECT content FROM memories WHERE id = ?', (memory_id,))
|
|
182
|
+
row = cursor.fetchone()
|
|
183
|
+
|
|
184
|
+
if not row:
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
content = row[0].lower()
|
|
188
|
+
|
|
189
|
+
for style_type, keywords in indicators.items():
|
|
190
|
+
for keyword in keywords:
|
|
191
|
+
if keyword in content:
|
|
192
|
+
indicator_counts[style_type] += 1
|
|
193
|
+
|
|
194
|
+
if style_type not in evidence_memories:
|
|
195
|
+
evidence_memories[style_type] = []
|
|
196
|
+
evidence_memories[style_type].append(memory_id)
|
|
197
|
+
|
|
198
|
+
# Determine dominant style
|
|
199
|
+
if indicator_counts:
|
|
200
|
+
top_style = indicator_counts.most_common(1)[0][0]
|
|
201
|
+
total = sum(indicator_counts.values())
|
|
202
|
+
top_count = indicator_counts[top_style]
|
|
203
|
+
confidence = top_count / total if total > 0 else 0
|
|
204
|
+
|
|
205
|
+
# Only create pattern if confidence > 0.65 and at least 3 mentions
|
|
206
|
+
if confidence > 0.65 and top_count >= 3:
|
|
207
|
+
value = self._format_style_value(pattern_key, top_style, indicator_counts)
|
|
208
|
+
evidence_list = list(set(evidence_memories.get(top_style, [])))
|
|
209
|
+
|
|
210
|
+
patterns[pattern_key] = {
|
|
211
|
+
'pattern_type': 'style',
|
|
212
|
+
'key': pattern_key,
|
|
213
|
+
'value': value,
|
|
214
|
+
'confidence': round(confidence, 2),
|
|
215
|
+
'evidence_count': len(evidence_list),
|
|
216
|
+
'memory_ids': evidence_list,
|
|
217
|
+
'category': 'general'
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
conn.close()
|
|
221
|
+
return patterns
|
|
222
|
+
|
|
223
|
+
def _format_style_value(self, pattern_key: str, top_style: str, all_counts: Counter) -> str:
|
|
224
|
+
"""Format style value as comparison or preference."""
|
|
225
|
+
style_formats = {
|
|
226
|
+
'optimization_priority': {
|
|
227
|
+
'performance': 'Performance over readability',
|
|
228
|
+
'readability': 'Readability over performance'
|
|
229
|
+
},
|
|
230
|
+
'error_handling': {
|
|
231
|
+
'explicit': 'Explicit error boundaries',
|
|
232
|
+
'permissive': 'Permissive error handling'
|
|
233
|
+
},
|
|
234
|
+
'testing_approach': {
|
|
235
|
+
'comprehensive': 'Comprehensive testing',
|
|
236
|
+
'minimal': 'Minimal testing'
|
|
237
|
+
},
|
|
238
|
+
'code_organization': {
|
|
239
|
+
'modular': 'Modular organization',
|
|
240
|
+
'monolithic': 'Monolithic organization'
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
if pattern_key in style_formats and top_style in style_formats[pattern_key]:
|
|
245
|
+
return style_formats[pattern_key][top_style]
|
|
246
|
+
|
|
247
|
+
return top_style.replace('_', ' ').title()
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Pattern Learner - Main orchestrator and CLI.
|
|
6
|
+
|
|
7
|
+
Coordinates frequency analysis, context analysis, terminology learning,
|
|
8
|
+
confidence scoring, and pattern storage into a unified learning pipeline.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import sqlite3
|
|
12
|
+
import json
|
|
13
|
+
import logging
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Dict, List, Any
|
|
17
|
+
from collections import Counter
|
|
18
|
+
|
|
19
|
+
from .analyzers import FrequencyAnalyzer, ContextAnalyzer
|
|
20
|
+
from .terminology import TerminologyLearner
|
|
21
|
+
from .scoring import ConfidenceScorer
|
|
22
|
+
from .store import PatternStore
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
# Local NLP tools (no external APIs)
|
|
27
|
+
try:
|
|
28
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
29
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
30
|
+
import numpy as np
|
|
31
|
+
SKLEARN_AVAILABLE = True
|
|
32
|
+
except ImportError:
|
|
33
|
+
SKLEARN_AVAILABLE = False
|
|
34
|
+
|
|
35
|
+
MEMORY_DIR = Path.home() / ".claude-memory"
|
|
36
|
+
DB_PATH = MEMORY_DIR / "memory.db"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class PatternLearner:
|
|
40
|
+
"""Main pattern learning orchestrator."""
|
|
41
|
+
|
|
42
|
+
def __init__(self, db_path: Path = DB_PATH):
|
|
43
|
+
self.db_path = db_path
|
|
44
|
+
self.frequency_analyzer = FrequencyAnalyzer(db_path)
|
|
45
|
+
self.context_analyzer = ContextAnalyzer(db_path)
|
|
46
|
+
self.terminology_learner = TerminologyLearner(db_path)
|
|
47
|
+
self.confidence_scorer = ConfidenceScorer(db_path)
|
|
48
|
+
self.pattern_store = PatternStore(db_path)
|
|
49
|
+
|
|
50
|
+
def _get_active_profile(self) -> str:
|
|
51
|
+
"""Get the currently active profile name from config."""
|
|
52
|
+
config_file = MEMORY_DIR / "profiles.json"
|
|
53
|
+
if config_file.exists():
|
|
54
|
+
try:
|
|
55
|
+
with open(config_file, 'r') as f:
|
|
56
|
+
config = json.load(f)
|
|
57
|
+
return config.get('active_profile', 'default')
|
|
58
|
+
except (json.JSONDecodeError, IOError):
|
|
59
|
+
pass
|
|
60
|
+
return 'default'
|
|
61
|
+
|
|
62
|
+
def weekly_pattern_update(self) -> Dict[str, int]:
|
|
63
|
+
"""Full pattern analysis of all memories for active profile. Run this weekly."""
|
|
64
|
+
active_profile = self._get_active_profile()
|
|
65
|
+
print(f"Starting weekly pattern update for profile: {active_profile}...")
|
|
66
|
+
|
|
67
|
+
# Get memory IDs for active profile only
|
|
68
|
+
conn = sqlite3.connect(self.db_path)
|
|
69
|
+
cursor = conn.cursor()
|
|
70
|
+
cursor.execute('SELECT id FROM memories WHERE profile = ? ORDER BY created_at',
|
|
71
|
+
(active_profile,))
|
|
72
|
+
all_memory_ids = [row[0] for row in cursor.fetchall()]
|
|
73
|
+
total_memories = len(all_memory_ids)
|
|
74
|
+
conn.close()
|
|
75
|
+
|
|
76
|
+
if total_memories == 0:
|
|
77
|
+
print(f"No memories found for profile '{active_profile}'. Add memories first.")
|
|
78
|
+
return {'preferences': 0, 'styles': 0, 'terminology': 0}
|
|
79
|
+
|
|
80
|
+
print(f"Analyzing {total_memories} memories for profile '{active_profile}'...")
|
|
81
|
+
|
|
82
|
+
# Run all analyzers
|
|
83
|
+
preferences = self.frequency_analyzer.analyze_preferences(all_memory_ids)
|
|
84
|
+
print(f" Found {len(preferences)} preference patterns")
|
|
85
|
+
|
|
86
|
+
styles = self.context_analyzer.analyze_style(all_memory_ids)
|
|
87
|
+
print(f" Found {len(styles)} style patterns")
|
|
88
|
+
|
|
89
|
+
terms = self.terminology_learner.learn_terminology(all_memory_ids)
|
|
90
|
+
print(f" Found {len(terms)} terminology patterns")
|
|
91
|
+
|
|
92
|
+
# Recalculate confidence scores and save all patterns (tagged with profile)
|
|
93
|
+
counts = {'preferences': 0, 'styles': 0, 'terminology': 0}
|
|
94
|
+
|
|
95
|
+
for pattern in preferences.values():
|
|
96
|
+
confidence = self.confidence_scorer.calculate_confidence(
|
|
97
|
+
pattern['pattern_type'],
|
|
98
|
+
pattern['key'],
|
|
99
|
+
pattern['value'],
|
|
100
|
+
pattern['memory_ids'],
|
|
101
|
+
total_memories
|
|
102
|
+
)
|
|
103
|
+
pattern['confidence'] = round(confidence, 2)
|
|
104
|
+
pattern['profile'] = active_profile
|
|
105
|
+
self.pattern_store.save_pattern(pattern)
|
|
106
|
+
counts['preferences'] += 1
|
|
107
|
+
|
|
108
|
+
for pattern in styles.values():
|
|
109
|
+
confidence = self.confidence_scorer.calculate_confidence(
|
|
110
|
+
pattern['pattern_type'],
|
|
111
|
+
pattern['key'],
|
|
112
|
+
pattern['value'],
|
|
113
|
+
pattern['memory_ids'],
|
|
114
|
+
total_memories
|
|
115
|
+
)
|
|
116
|
+
pattern['confidence'] = round(confidence, 2)
|
|
117
|
+
pattern['profile'] = active_profile
|
|
118
|
+
self.pattern_store.save_pattern(pattern)
|
|
119
|
+
counts['styles'] += 1
|
|
120
|
+
|
|
121
|
+
for pattern in terms.values():
|
|
122
|
+
confidence = self.confidence_scorer.calculate_confidence(
|
|
123
|
+
pattern['pattern_type'],
|
|
124
|
+
pattern['key'],
|
|
125
|
+
pattern['value'],
|
|
126
|
+
pattern['memory_ids'],
|
|
127
|
+
total_memories
|
|
128
|
+
)
|
|
129
|
+
pattern['confidence'] = round(confidence, 2)
|
|
130
|
+
pattern['profile'] = active_profile
|
|
131
|
+
self.pattern_store.save_pattern(pattern)
|
|
132
|
+
counts['terminology'] += 1
|
|
133
|
+
|
|
134
|
+
print(f"\nPattern update complete:")
|
|
135
|
+
print(f" {counts['preferences']} preferences")
|
|
136
|
+
print(f" {counts['styles']} styles")
|
|
137
|
+
print(f" {counts['terminology']} terminology")
|
|
138
|
+
|
|
139
|
+
return counts
|
|
140
|
+
|
|
141
|
+
def on_new_memory(self, memory_id: int):
|
|
142
|
+
"""Incremental update when new memory is added."""
|
|
143
|
+
active_profile = self._get_active_profile()
|
|
144
|
+
conn = sqlite3.connect(self.db_path)
|
|
145
|
+
cursor = conn.cursor()
|
|
146
|
+
cursor.execute('SELECT COUNT(*) FROM memories WHERE profile = ?',
|
|
147
|
+
(active_profile,))
|
|
148
|
+
total = cursor.fetchone()[0]
|
|
149
|
+
conn.close()
|
|
150
|
+
|
|
151
|
+
# Only do incremental updates if we have many memories (>50)
|
|
152
|
+
if total > 50:
|
|
153
|
+
# TODO: Implement true incremental update
|
|
154
|
+
print(f"New memory #{memory_id} added. Run weekly_pattern_update() to update patterns.")
|
|
155
|
+
else:
|
|
156
|
+
# For small memory counts, just do full update
|
|
157
|
+
self.weekly_pattern_update()
|
|
158
|
+
|
|
159
|
+
def get_patterns(self, min_confidence: float = 0.7) -> List[Dict[str, Any]]:
|
|
160
|
+
"""Query patterns above confidence threshold for active profile."""
|
|
161
|
+
active_profile = self._get_active_profile()
|
|
162
|
+
return self.pattern_store.get_patterns(min_confidence, profile=active_profile)
|
|
163
|
+
|
|
164
|
+
def get_identity_context(self, min_confidence: float = 0.7) -> str:
|
|
165
|
+
"""Format patterns for Claude context injection."""
|
|
166
|
+
patterns = self.get_patterns(min_confidence)
|
|
167
|
+
|
|
168
|
+
if not patterns:
|
|
169
|
+
return "## Working with User - Learned Patterns\n\nNo patterns learned yet. Add more memories to build your profile."
|
|
170
|
+
|
|
171
|
+
# Group by pattern type
|
|
172
|
+
sections = {
|
|
173
|
+
'preference': [],
|
|
174
|
+
'style': [],
|
|
175
|
+
'terminology': []
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
for p in patterns:
|
|
179
|
+
sections[p['pattern_type']].append(
|
|
180
|
+
f"- **{p['key'].replace('_', ' ').title()}:** {p['value']} "
|
|
181
|
+
f"(confidence: {p['confidence']:.0%}, {p['evidence_count']} examples)"
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
output = "## Working with User - Learned Patterns\n\n"
|
|
185
|
+
|
|
186
|
+
if sections['preference']:
|
|
187
|
+
output += "**Technology Preferences:**\n" + '\n'.join(sections['preference']) + '\n\n'
|
|
188
|
+
|
|
189
|
+
if sections['style']:
|
|
190
|
+
output += "**Coding Style:**\n" + '\n'.join(sections['style']) + '\n\n'
|
|
191
|
+
|
|
192
|
+
if sections['terminology']:
|
|
193
|
+
output += "**Terminology:**\n" + '\n'.join(sections['terminology']) + '\n'
|
|
194
|
+
|
|
195
|
+
return output
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
# CLI Interface
|
|
199
|
+
if __name__ == "__main__":
|
|
200
|
+
import sys
|
|
201
|
+
|
|
202
|
+
learner = PatternLearner()
|
|
203
|
+
|
|
204
|
+
if len(sys.argv) < 2:
|
|
205
|
+
print("Pattern Learner - Identity Profile Extraction")
|
|
206
|
+
print("\nUsage:")
|
|
207
|
+
print(" python pattern_learner.py update # Full pattern update (weekly)")
|
|
208
|
+
print(" python pattern_learner.py list [min_conf] # List learned patterns (default: 0.7)")
|
|
209
|
+
print(" python pattern_learner.py context [min] # Get context for Claude")
|
|
210
|
+
print(" python pattern_learner.py stats # Pattern statistics")
|
|
211
|
+
sys.exit(0)
|
|
212
|
+
|
|
213
|
+
command = sys.argv[1]
|
|
214
|
+
|
|
215
|
+
if command == "update":
|
|
216
|
+
counts = learner.weekly_pattern_update()
|
|
217
|
+
print(f"\nTotal patterns learned: {sum(counts.values())}")
|
|
218
|
+
|
|
219
|
+
elif command == "list":
|
|
220
|
+
min_conf = float(sys.argv[2]) if len(sys.argv) > 2 else 0.7
|
|
221
|
+
patterns = learner.get_patterns(min_conf)
|
|
222
|
+
|
|
223
|
+
if not patterns:
|
|
224
|
+
print(f"No patterns found with confidence >= {min_conf:.0%}")
|
|
225
|
+
else:
|
|
226
|
+
print(f"\n{'Type':<15} {'Category':<12} {'Pattern':<30} {'Confidence':<12} {'Evidence':<10}")
|
|
227
|
+
print("-" * 95)
|
|
228
|
+
|
|
229
|
+
for p in patterns:
|
|
230
|
+
pattern_display = f"{p['key'].replace('_', ' ').title()}: {p['value']}"
|
|
231
|
+
if len(pattern_display) > 28:
|
|
232
|
+
pattern_display = pattern_display[:28] + "..."
|
|
233
|
+
|
|
234
|
+
print(f"{p['pattern_type']:<15} {p['category']:<12} {pattern_display:<30} "
|
|
235
|
+
f"{p['confidence']:>6.0%} {p['evidence_count']:<10}")
|
|
236
|
+
|
|
237
|
+
elif command == "context":
|
|
238
|
+
min_conf = float(sys.argv[2]) if len(sys.argv) > 2 else 0.7
|
|
239
|
+
context = learner.get_identity_context(min_conf)
|
|
240
|
+
print(context)
|
|
241
|
+
|
|
242
|
+
elif command == "stats":
|
|
243
|
+
patterns = learner.get_patterns(0.5) # Include all patterns
|
|
244
|
+
|
|
245
|
+
if not patterns:
|
|
246
|
+
print("No patterns learned yet.")
|
|
247
|
+
else:
|
|
248
|
+
by_type = Counter([p['pattern_type'] for p in patterns])
|
|
249
|
+
by_category = Counter([p['category'] for p in patterns])
|
|
250
|
+
|
|
251
|
+
avg_confidence = sum(p['confidence'] for p in patterns) / len(patterns)
|
|
252
|
+
high_conf = len([p for p in patterns if p['confidence'] >= 0.8])
|
|
253
|
+
|
|
254
|
+
print(f"\nPattern Statistics:")
|
|
255
|
+
print(f" Total patterns: {len(patterns)}")
|
|
256
|
+
print(f" Average confidence: {avg_confidence:.0%}")
|
|
257
|
+
print(f" High confidence (>=80%): {high_conf}")
|
|
258
|
+
print(f"\nBy Type:")
|
|
259
|
+
for ptype, count in by_type.most_common():
|
|
260
|
+
print(f" {ptype}: {count}")
|
|
261
|
+
print(f"\nBy Category:")
|
|
262
|
+
for cat, count in by_category.most_common():
|
|
263
|
+
print(f" {cat}: {count}")
|
|
264
|
+
|
|
265
|
+
else:
|
|
266
|
+
print(f"Unknown command: {command}")
|
|
267
|
+
sys.exit(1)
|