superlocalmemory 2.8.2 → 2.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ATTRIBUTION.md +1 -1
- package/CHANGELOG.md +17 -0
- package/README.md +7 -5
- package/api_server.py +5 -0
- package/bin/slm +35 -0
- package/bin/slm.bat +3 -3
- package/docs/SECURITY-QUICK-REFERENCE.md +214 -0
- package/install.ps1 +11 -11
- package/mcp_server.py +78 -10
- package/package.json +2 -2
- package/requirements-core.txt +16 -18
- package/requirements-learning.txt +8 -8
- package/requirements.txt +9 -7
- package/scripts/prepack.js +33 -0
- package/scripts/verify-v27.ps1 +301 -0
- package/src/agent_registry.py +32 -28
- package/src/auto_backup.py +12 -6
- package/src/cache_manager.py +2 -2
- package/src/compression/__init__.py +25 -0
- package/src/compression/cli.py +150 -0
- package/src/compression/cold_storage.py +217 -0
- package/src/compression/config.py +72 -0
- package/src/compression/orchestrator.py +133 -0
- package/src/compression/tier2_compressor.py +228 -0
- package/src/compression/tier3_compressor.py +153 -0
- package/src/compression/tier_classifier.py +148 -0
- package/src/db_connection_manager.py +5 -5
- package/src/event_bus.py +24 -22
- package/src/hnsw_index.py +3 -3
- package/src/learning/__init__.py +5 -4
- package/src/learning/adaptive_ranker.py +14 -265
- package/src/learning/bootstrap/__init__.py +69 -0
- package/src/learning/bootstrap/constants.py +93 -0
- package/src/learning/bootstrap/db_queries.py +316 -0
- package/src/learning/bootstrap/sampling.py +82 -0
- package/src/learning/bootstrap/text_utils.py +71 -0
- package/src/learning/cross_project_aggregator.py +58 -57
- package/src/learning/db/__init__.py +40 -0
- package/src/learning/db/constants.py +44 -0
- package/src/learning/db/schema.py +279 -0
- package/src/learning/learning_db.py +15 -234
- package/src/learning/ranking/__init__.py +33 -0
- package/src/learning/ranking/constants.py +84 -0
- package/src/learning/ranking/helpers.py +278 -0
- package/src/learning/source_quality_scorer.py +66 -65
- package/src/learning/synthetic_bootstrap.py +28 -310
- package/src/memory/__init__.py +36 -0
- package/src/memory/cli.py +205 -0
- package/src/memory/constants.py +39 -0
- package/src/memory/helpers.py +28 -0
- package/src/memory/schema.py +166 -0
- package/src/memory-profiles.py +94 -86
- package/src/memory-reset.py +187 -185
- package/src/memory_compression.py +2 -2
- package/src/memory_store_v2.py +44 -354
- package/src/migrate_v1_to_v2.py +11 -10
- package/src/patterns/analyzers.py +104 -100
- package/src/patterns/learner.py +17 -13
- package/src/patterns/scoring.py +25 -21
- package/src/patterns/store.py +40 -38
- package/src/patterns/terminology.py +53 -51
- package/src/provenance_tracker.py +2 -2
- package/src/qualixar_attribution.py +1 -1
- package/src/search/engine.py +16 -14
- package/src/search/index_loader.py +13 -11
- package/src/setup_validator.py +160 -158
- package/src/subscription_manager.py +20 -18
- package/src/tree/builder.py +66 -64
- package/src/tree/nodes.py +103 -97
- package/src/tree/queries.py +142 -137
- package/src/tree/schema.py +46 -42
- package/src/webhook_dispatcher.py +3 -3
- package/ui_server.py +7 -4
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Constants for AdaptiveRanker.
|
|
6
|
+
|
|
7
|
+
Includes phase thresholds, rule-based boost multipliers, and LightGBM
|
|
8
|
+
training parameters.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
# ============================================================================
|
|
14
|
+
# Paths
|
|
15
|
+
# ============================================================================
|
|
16
|
+
|
|
17
|
+
MODELS_DIR = Path.home() / ".claude-memory" / "models"
|
|
18
|
+
MODEL_PATH = MODELS_DIR / "ranker.txt"
|
|
19
|
+
|
|
20
|
+
# ============================================================================
|
|
21
|
+
# Phase Thresholds
|
|
22
|
+
# ============================================================================
|
|
23
|
+
|
|
24
|
+
# Phase thresholds — how many feedback signals to trigger each phase
|
|
25
|
+
PHASE_THRESHOLDS = {
|
|
26
|
+
'baseline': 0, # 0 feedback samples -> no re-ranking
|
|
27
|
+
'rule_based': 20, # 20+ feedback -> rule-based boosting
|
|
28
|
+
'ml_model': 200, # 200+ feedback across 50+ unique queries -> ML
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
# Minimum unique queries required for ML phase (prevents overfitting
|
|
32
|
+
# to a small number of repeated queries)
|
|
33
|
+
MIN_UNIQUE_QUERIES_FOR_ML = 50
|
|
34
|
+
|
|
35
|
+
# ============================================================================
|
|
36
|
+
# Rule-Based Boost Multipliers (Phase 1)
|
|
37
|
+
# ============================================================================
|
|
38
|
+
|
|
39
|
+
# These are conservative — they nudge the ranking without flipping order
|
|
40
|
+
RULE_BOOST = {
|
|
41
|
+
'tech_match_strong': 1.3, # Memory matches 2+ preferred techs
|
|
42
|
+
'tech_match_weak': 1.1, # Memory matches 1 preferred tech
|
|
43
|
+
'project_match': 1.5, # Memory from current project
|
|
44
|
+
'project_unknown': 1.0, # No project context — no boost
|
|
45
|
+
'project_mismatch': 0.9, # Memory from different project
|
|
46
|
+
'source_quality_high': 1.2, # Source quality > 0.7
|
|
47
|
+
'source_quality_low': 0.85, # Source quality < 0.3
|
|
48
|
+
'recency_boost_max': 1.2, # Recent memory (< 7 days)
|
|
49
|
+
'recency_penalty_max': 0.8, # Old memory (> 365 days)
|
|
50
|
+
'high_importance': 1.15, # Importance >= 8
|
|
51
|
+
'high_access': 1.1, # Accessed 5+ times
|
|
52
|
+
# v2.8: Lifecycle + behavioral boosts
|
|
53
|
+
'lifecycle_active': 1.0,
|
|
54
|
+
'lifecycle_warm': 0.85,
|
|
55
|
+
'lifecycle_cold': 0.6,
|
|
56
|
+
'outcome_success_high': 1.3,
|
|
57
|
+
'outcome_failure_high': 0.7,
|
|
58
|
+
'behavioral_match_strong': 1.25,
|
|
59
|
+
'cross_project_boost': 1.15,
|
|
60
|
+
'high_trust_creator': 1.1,
|
|
61
|
+
'low_trust_creator': 0.8,
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
# ============================================================================
|
|
65
|
+
# LightGBM Training Parameters
|
|
66
|
+
# ============================================================================
|
|
67
|
+
|
|
68
|
+
# LightGBM training parameters — tuned for small, personal datasets
|
|
69
|
+
# Aggressive regularization prevents overfitting on < 10K samples
|
|
70
|
+
TRAINING_PARAMS = {
|
|
71
|
+
'objective': 'lambdarank',
|
|
72
|
+
'metric': 'ndcg',
|
|
73
|
+
'ndcg_eval_at': [5, 10],
|
|
74
|
+
'learning_rate': 0.05,
|
|
75
|
+
'num_leaves': 16,
|
|
76
|
+
'max_depth': 4,
|
|
77
|
+
'min_child_samples': 10,
|
|
78
|
+
'subsample': 0.8,
|
|
79
|
+
'reg_alpha': 0.1,
|
|
80
|
+
'reg_lambda': 1.0,
|
|
81
|
+
'boosting_type': 'dart',
|
|
82
|
+
'n_estimators': 50,
|
|
83
|
+
'verbose': -1,
|
|
84
|
+
}
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Helper functions for AdaptiveRanker.
|
|
6
|
+
|
|
7
|
+
Extracted from adaptive_ranker.py to reduce file size while maintaining
|
|
8
|
+
backward compatibility.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import sqlite3
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Dict, List, Optional
|
|
15
|
+
|
|
16
|
+
from .constants import RULE_BOOST
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger("superlocalmemory.learning.ranking.helpers")
|
|
19
|
+
|
|
20
|
+
# NumPy is optional — used for feature matrix construction
|
|
21
|
+
try:
|
|
22
|
+
import numpy as np
|
|
23
|
+
HAS_NUMPY = True
|
|
24
|
+
except ImportError:
|
|
25
|
+
np = None
|
|
26
|
+
HAS_NUMPY = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def calculate_rule_boost(features: List[float]) -> float:
|
|
30
|
+
"""
|
|
31
|
+
Calculate rule-based boost multiplier from extracted features.
|
|
32
|
+
|
|
33
|
+
This function encapsulates the rule-based boosting logic from Phase 1,
|
|
34
|
+
making the main rerank method more readable.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
features: Feature vector extracted for a memory.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
Boost multiplier (typically 0.5 to 2.0).
|
|
41
|
+
"""
|
|
42
|
+
boost = 1.0
|
|
43
|
+
|
|
44
|
+
# Feature [2]: tech_match
|
|
45
|
+
tech_match = features[2]
|
|
46
|
+
if tech_match >= 0.8:
|
|
47
|
+
boost *= RULE_BOOST['tech_match_strong']
|
|
48
|
+
elif tech_match >= 0.4:
|
|
49
|
+
boost *= RULE_BOOST['tech_match_weak']
|
|
50
|
+
|
|
51
|
+
# Feature [3]: project_match
|
|
52
|
+
project_match = features[3]
|
|
53
|
+
if project_match >= 0.9:
|
|
54
|
+
boost *= RULE_BOOST['project_match']
|
|
55
|
+
elif project_match <= 0.35:
|
|
56
|
+
boost *= RULE_BOOST['project_mismatch']
|
|
57
|
+
|
|
58
|
+
# Feature [5]: source_quality
|
|
59
|
+
source_quality = features[5]
|
|
60
|
+
if source_quality >= 0.7:
|
|
61
|
+
boost *= RULE_BOOST['source_quality_high']
|
|
62
|
+
elif source_quality < 0.3:
|
|
63
|
+
boost *= RULE_BOOST['source_quality_low']
|
|
64
|
+
|
|
65
|
+
# Feature [7]: recency_score (exponential decay)
|
|
66
|
+
recency = features[7]
|
|
67
|
+
# Linear interpolation between penalty and boost
|
|
68
|
+
recency_factor = (
|
|
69
|
+
RULE_BOOST['recency_penalty_max']
|
|
70
|
+
+ recency * (
|
|
71
|
+
RULE_BOOST['recency_boost_max']
|
|
72
|
+
- RULE_BOOST['recency_penalty_max']
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
boost *= recency_factor
|
|
76
|
+
|
|
77
|
+
# Feature [6]: importance_norm
|
|
78
|
+
importance_norm = features[6]
|
|
79
|
+
if importance_norm >= 0.8:
|
|
80
|
+
boost *= RULE_BOOST['high_importance']
|
|
81
|
+
|
|
82
|
+
# Feature [8]: access_frequency
|
|
83
|
+
access_freq = features[8]
|
|
84
|
+
if access_freq >= 0.5:
|
|
85
|
+
boost *= RULE_BOOST['high_access']
|
|
86
|
+
|
|
87
|
+
# Feature [10]: signal_count (v2.7.4 — feedback volume)
|
|
88
|
+
if len(features) > 10:
|
|
89
|
+
signal_count = features[10]
|
|
90
|
+
if signal_count >= 0.3: # 3+ signals
|
|
91
|
+
boost *= 1.1 # Mild boost for well-known memories
|
|
92
|
+
|
|
93
|
+
# Feature [11]: avg_signal_value (v2.7.4 — feedback quality)
|
|
94
|
+
if len(features) > 11:
|
|
95
|
+
avg_signal = features[11]
|
|
96
|
+
if avg_signal >= 0.7:
|
|
97
|
+
boost *= 1.15 # Boost memories with positive feedback
|
|
98
|
+
elif avg_signal < 0.3 and avg_signal > 0.0:
|
|
99
|
+
boost *= 0.85 # Penalize memories with negative feedback
|
|
100
|
+
|
|
101
|
+
# Feature [12]: lifecycle_state (v2.8)
|
|
102
|
+
if len(features) > 12:
|
|
103
|
+
lifecycle_state = features[12]
|
|
104
|
+
if lifecycle_state >= 0.9:
|
|
105
|
+
boost *= RULE_BOOST.get('lifecycle_active', 1.0)
|
|
106
|
+
elif lifecycle_state >= 0.6:
|
|
107
|
+
boost *= RULE_BOOST.get('lifecycle_warm', 0.85)
|
|
108
|
+
elif lifecycle_state >= 0.3:
|
|
109
|
+
boost *= RULE_BOOST.get('lifecycle_cold', 0.6)
|
|
110
|
+
|
|
111
|
+
# Feature [13]: outcome_success_rate (v2.8)
|
|
112
|
+
if len(features) > 13:
|
|
113
|
+
success_rate = features[13]
|
|
114
|
+
if success_rate >= 0.8:
|
|
115
|
+
boost *= RULE_BOOST.get('outcome_success_high', 1.3)
|
|
116
|
+
elif success_rate <= 0.2:
|
|
117
|
+
boost *= RULE_BOOST.get('outcome_failure_high', 0.7)
|
|
118
|
+
|
|
119
|
+
# Feature [15]: behavioral_match (v2.8)
|
|
120
|
+
if len(features) > 15:
|
|
121
|
+
behavioral = features[15]
|
|
122
|
+
if behavioral >= 0.7:
|
|
123
|
+
boost *= RULE_BOOST.get('behavioral_match_strong', 1.25)
|
|
124
|
+
|
|
125
|
+
# Feature [16]: cross_project_score (v2.8)
|
|
126
|
+
if len(features) > 16:
|
|
127
|
+
cross_project = features[16]
|
|
128
|
+
if cross_project >= 0.5:
|
|
129
|
+
boost *= RULE_BOOST.get('cross_project_boost', 1.15)
|
|
130
|
+
|
|
131
|
+
# Feature [18]: trust_at_creation (v2.8)
|
|
132
|
+
if len(features) > 18:
|
|
133
|
+
trust = features[18]
|
|
134
|
+
if trust >= 0.9:
|
|
135
|
+
boost *= RULE_BOOST.get('high_trust_creator', 1.1)
|
|
136
|
+
elif trust <= 0.3:
|
|
137
|
+
boost *= RULE_BOOST.get('low_trust_creator', 0.8)
|
|
138
|
+
|
|
139
|
+
return boost
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def prepare_training_data_internal(
|
|
143
|
+
feedback: List[dict],
|
|
144
|
+
feature_extractor,
|
|
145
|
+
) -> Optional[tuple]:
|
|
146
|
+
"""
|
|
147
|
+
Prepare training data from feedback records.
|
|
148
|
+
|
|
149
|
+
For each unique query (grouped by query_hash):
|
|
150
|
+
- Fetch all feedback entries for that query
|
|
151
|
+
- Look up the corresponding memory from memory.db
|
|
152
|
+
- Extract features for each memory
|
|
153
|
+
- Use signal_value as the relevance label
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
feedback: List of feedback records from LearningDB.
|
|
157
|
+
feature_extractor: FeatureExtractor instance with context set.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Tuple of (X, y, groups) for LGBMRanker, or None if insufficient.
|
|
161
|
+
X: numpy array (n_samples, NUM_FEATURES)
|
|
162
|
+
y: numpy array (n_samples,) — relevance labels
|
|
163
|
+
groups: list of ints — samples per query group
|
|
164
|
+
"""
|
|
165
|
+
if not HAS_NUMPY:
|
|
166
|
+
logger.warning("NumPy not available for training data preparation")
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
if not feedback:
|
|
170
|
+
return None
|
|
171
|
+
|
|
172
|
+
# Group feedback by query_hash
|
|
173
|
+
query_groups: Dict[str, List[dict]] = {}
|
|
174
|
+
for entry in feedback:
|
|
175
|
+
qh = entry['query_hash']
|
|
176
|
+
if qh not in query_groups:
|
|
177
|
+
query_groups[qh] = []
|
|
178
|
+
query_groups[qh].append(entry)
|
|
179
|
+
|
|
180
|
+
# Filter: only keep groups with 2+ items (ranking requires pairs)
|
|
181
|
+
query_groups = {
|
|
182
|
+
qh: entries for qh, entries in query_groups.items()
|
|
183
|
+
if len(entries) >= 2
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
if not query_groups:
|
|
187
|
+
logger.info("No query groups with 2+ feedback entries")
|
|
188
|
+
return None
|
|
189
|
+
|
|
190
|
+
# Collect memory IDs we need to look up
|
|
191
|
+
memory_ids_needed = set()
|
|
192
|
+
for entries in query_groups.values():
|
|
193
|
+
for entry in entries:
|
|
194
|
+
memory_ids_needed.add(entry['memory_id'])
|
|
195
|
+
|
|
196
|
+
# Fetch memories from memory.db
|
|
197
|
+
memory_db_path = Path.home() / ".claude-memory" / "memory.db"
|
|
198
|
+
if not memory_db_path.exists():
|
|
199
|
+
logger.warning("memory.db not found at %s", memory_db_path)
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
memories_by_id = {}
|
|
203
|
+
try:
|
|
204
|
+
conn = sqlite3.connect(str(memory_db_path), timeout=5)
|
|
205
|
+
try:
|
|
206
|
+
conn.row_factory = sqlite3.Row
|
|
207
|
+
cursor = conn.cursor()
|
|
208
|
+
|
|
209
|
+
# Batch fetch memories (in chunks to avoid SQLite variable limit)
|
|
210
|
+
id_list = list(memory_ids_needed)
|
|
211
|
+
chunk_size = 500
|
|
212
|
+
for i in range(0, len(id_list), chunk_size):
|
|
213
|
+
chunk = id_list[i:i + chunk_size]
|
|
214
|
+
placeholders = ','.join('?' for _ in chunk)
|
|
215
|
+
cursor.execute(f'''
|
|
216
|
+
SELECT id, content, summary, project_path, project_name,
|
|
217
|
+
tags, category, memory_type, importance, created_at,
|
|
218
|
+
last_accessed, access_count
|
|
219
|
+
FROM memories
|
|
220
|
+
WHERE id IN ({placeholders})
|
|
221
|
+
''', chunk)
|
|
222
|
+
for row in cursor.fetchall():
|
|
223
|
+
memories_by_id[row['id']] = dict(row)
|
|
224
|
+
finally:
|
|
225
|
+
conn.close()
|
|
226
|
+
except Exception as e:
|
|
227
|
+
logger.error("Failed to fetch memories for training: %s", e)
|
|
228
|
+
return None
|
|
229
|
+
|
|
230
|
+
# Build feature matrix and labels
|
|
231
|
+
all_features = []
|
|
232
|
+
all_labels = []
|
|
233
|
+
groups = []
|
|
234
|
+
|
|
235
|
+
# Set a neutral context for training (we don't have query-time context)
|
|
236
|
+
feature_extractor.set_context()
|
|
237
|
+
|
|
238
|
+
for qh, entries in query_groups.items():
|
|
239
|
+
group_features = []
|
|
240
|
+
group_labels = []
|
|
241
|
+
|
|
242
|
+
for entry in entries:
|
|
243
|
+
mid = entry['memory_id']
|
|
244
|
+
memory = memories_by_id.get(mid)
|
|
245
|
+
if memory is None:
|
|
246
|
+
continue # Memory may have been deleted
|
|
247
|
+
|
|
248
|
+
# Use query_keywords as proxy for query text
|
|
249
|
+
query_text = entry.get('query_keywords', '') or ''
|
|
250
|
+
|
|
251
|
+
features = feature_extractor.extract_features(
|
|
252
|
+
memory, query_text
|
|
253
|
+
)
|
|
254
|
+
group_features.append(features)
|
|
255
|
+
group_labels.append(float(entry['signal_value']))
|
|
256
|
+
|
|
257
|
+
# Only include groups with 2+ valid entries
|
|
258
|
+
if len(group_features) >= 2:
|
|
259
|
+
all_features.extend(group_features)
|
|
260
|
+
all_labels.extend(group_labels)
|
|
261
|
+
groups.append(len(group_features))
|
|
262
|
+
|
|
263
|
+
if not groups or len(all_features) < 4:
|
|
264
|
+
logger.info(
|
|
265
|
+
"Insufficient valid training data: %d features, %d groups",
|
|
266
|
+
len(all_features), len(groups)
|
|
267
|
+
)
|
|
268
|
+
return None
|
|
269
|
+
|
|
270
|
+
X = np.array(all_features, dtype=np.float64)
|
|
271
|
+
y = np.array(all_labels, dtype=np.float64)
|
|
272
|
+
|
|
273
|
+
logger.info(
|
|
274
|
+
"Prepared training data: %d samples, %d groups, %d features",
|
|
275
|
+
X.shape[0], len(groups), X.shape[1]
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
return X, y, groups
|
|
@@ -274,38 +274,39 @@ class SourceQualityScorer:
|
|
|
274
274
|
|
|
275
275
|
try:
|
|
276
276
|
conn = sqlite3.connect(str(self.memory_db_path), timeout=10)
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
277
|
+
try:
|
|
278
|
+
conn.execute("PRAGMA busy_timeout=5000")
|
|
279
|
+
cursor = conn.cursor()
|
|
280
|
+
|
|
281
|
+
# Check if created_by column exists
|
|
282
|
+
cursor.execute("PRAGMA table_info(memories)")
|
|
283
|
+
columns = {row[1] for row in cursor.fetchall()}
|
|
284
|
+
|
|
285
|
+
if "created_by" in columns:
|
|
286
|
+
cursor.execute("""
|
|
287
|
+
SELECT
|
|
288
|
+
COALESCE(created_by, 'unknown') AS source,
|
|
289
|
+
COUNT(*) AS cnt
|
|
290
|
+
FROM memories
|
|
291
|
+
GROUP BY source
|
|
292
|
+
ORDER BY cnt DESC
|
|
293
|
+
""")
|
|
294
|
+
for row in cursor.fetchall():
|
|
295
|
+
source_id = row[0] if row[0] else "unknown"
|
|
296
|
+
counts[source_id] = row[1]
|
|
297
|
+
else:
|
|
298
|
+
# Column doesn't exist — count all as 'unknown'
|
|
299
|
+
cursor.execute("SELECT COUNT(*) FROM memories")
|
|
300
|
+
total = cursor.fetchone()[0]
|
|
301
|
+
if total > 0:
|
|
302
|
+
counts["unknown"] = total
|
|
303
|
+
logger.debug(
|
|
304
|
+
"created_by column not in memory.db — "
|
|
305
|
+
"all %d memories grouped as 'unknown'.",
|
|
306
|
+
total,
|
|
307
|
+
)
|
|
308
|
+
finally:
|
|
309
|
+
conn.close()
|
|
309
310
|
|
|
310
311
|
except sqlite3.OperationalError as e:
|
|
311
312
|
logger.warning("Error reading memory counts by source: %s", e)
|
|
@@ -361,40 +362,40 @@ class SourceQualityScorer:
|
|
|
361
362
|
# Step 2: Look up created_by for each feedback memory_id in memory.db
|
|
362
363
|
try:
|
|
363
364
|
conn = sqlite3.connect(str(self.memory_db_path), timeout=10)
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
365
|
+
try:
|
|
366
|
+
conn.execute("PRAGMA busy_timeout=5000")
|
|
367
|
+
cursor = conn.cursor()
|
|
368
|
+
|
|
369
|
+
# Check if created_by column exists
|
|
370
|
+
cursor.execute("PRAGMA table_info(memories)")
|
|
371
|
+
columns = {row[1] for row in cursor.fetchall()}
|
|
372
|
+
|
|
373
|
+
if "created_by" not in columns:
|
|
374
|
+
# All positives go to 'unknown'
|
|
375
|
+
total_positives = sum(feedback_memory_ids.values())
|
|
376
|
+
if total_positives > 0:
|
|
377
|
+
positives["unknown"] = total_positives
|
|
378
|
+
return positives
|
|
379
|
+
|
|
380
|
+
# Batch lookup in chunks to avoid SQLite variable limit
|
|
381
|
+
mem_ids = list(feedback_memory_ids.keys())
|
|
382
|
+
chunk_size = 500 # SQLite max variables is 999
|
|
383
|
+
|
|
384
|
+
for i in range(0, len(mem_ids), chunk_size):
|
|
385
|
+
chunk = mem_ids[i:i + chunk_size]
|
|
386
|
+
placeholders = ",".join("?" * len(chunk))
|
|
387
|
+
cursor.execute(
|
|
388
|
+
"SELECT id, COALESCE(created_by, 'unknown') "
|
|
389
|
+
"FROM memories WHERE id IN (%s)" % placeholders,
|
|
390
|
+
chunk,
|
|
391
|
+
)
|
|
392
|
+
for row in cursor.fetchall():
|
|
393
|
+
mem_id = row[0]
|
|
394
|
+
source_id = row[1] if row[1] else "unknown"
|
|
395
|
+
count = feedback_memory_ids.get(mem_id, 0)
|
|
396
|
+
positives[source_id] = positives.get(source_id, 0) + count
|
|
397
|
+
finally:
|
|
376
398
|
conn.close()
|
|
377
|
-
return positives
|
|
378
|
-
|
|
379
|
-
# Batch lookup in chunks to avoid SQLite variable limit
|
|
380
|
-
mem_ids = list(feedback_memory_ids.keys())
|
|
381
|
-
chunk_size = 500 # SQLite max variables is 999
|
|
382
|
-
|
|
383
|
-
for i in range(0, len(mem_ids), chunk_size):
|
|
384
|
-
chunk = mem_ids[i:i + chunk_size]
|
|
385
|
-
placeholders = ",".join("?" * len(chunk))
|
|
386
|
-
cursor.execute(
|
|
387
|
-
"SELECT id, COALESCE(created_by, 'unknown') "
|
|
388
|
-
"FROM memories WHERE id IN (%s)" % placeholders,
|
|
389
|
-
chunk,
|
|
390
|
-
)
|
|
391
|
-
for row in cursor.fetchall():
|
|
392
|
-
mem_id = row[0]
|
|
393
|
-
source_id = row[1] if row[1] else "unknown"
|
|
394
|
-
count = feedback_memory_ids.get(mem_id, 0)
|
|
395
|
-
positives[source_id] = positives.get(source_id, 0) + count
|
|
396
|
-
|
|
397
|
-
conn.close()
|
|
398
399
|
|
|
399
400
|
except sqlite3.OperationalError as e:
|
|
400
401
|
logger.warning("Error looking up memory sources: %s", e)
|