superlocalmemory 3.0.36 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,289 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Sleep-Time Consolidation Worker — background memory maintenance.
6
+
7
+ Runs periodically (every 6 hours or on-demand) to:
8
+ 1. Decay confidence on unused facts (floor 0.1)
9
+ 2. Deduplicate near-identical facts
10
+ 3. Auto-retrain the adaptive ranker when signal threshold is met
11
+ 4. Report consolidation stats
12
+
13
+ Inspired by: Letta's sleep-time compute, neuroscience memory consolidation.
14
+
15
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+ import sqlite3
22
+ from datetime import datetime, timezone
23
+ from pathlib import Path
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class ConsolidationWorker:
29
+ """Background memory maintenance worker.
30
+
31
+ Call `run()` periodically or via dashboard button.
32
+ All operations are safe — they improve quality without losing data.
33
+ """
34
+
35
+ def __init__(self, memory_db: str | Path, learning_db: str | Path) -> None:
36
+ self._memory_db = str(memory_db)
37
+ self._learning_db = str(learning_db)
38
+
39
+ def run(self, profile_id: str, dry_run: bool = False) -> dict:
40
+ """Run full consolidation cycle. Returns stats."""
41
+ stats = {
42
+ "decayed": 0,
43
+ "deduped": 0,
44
+ "retrained": False,
45
+ "signal_count": 0,
46
+ "ranker_phase": 1,
47
+ "timestamp": datetime.now(timezone.utc).isoformat(),
48
+ }
49
+
50
+ # 1. Confidence decay on unused facts
51
+ try:
52
+ from superlocalmemory.learning.signals import LearningSignals
53
+ decayed = LearningSignals.decay_confidence(
54
+ self._memory_db, profile_id, rate=0.001,
55
+ )
56
+ stats["decayed"] = decayed
57
+ if not dry_run:
58
+ logger.info("Confidence decay: %d facts affected", decayed)
59
+ except Exception as exc:
60
+ logger.debug("Decay failed: %s", exc)
61
+
62
+ # 2. Deduplication (mark near-identical facts)
63
+ try:
64
+ deduped = self._deduplicate(profile_id, dry_run)
65
+ stats["deduped"] = deduped
66
+ except Exception as exc:
67
+ logger.debug("Dedup failed: %s", exc)
68
+
69
+ # 3. Generate behavioral patterns from memories
70
+ try:
71
+ patterns = self._generate_patterns(profile_id, dry_run)
72
+ stats["patterns_generated"] = patterns
73
+ except Exception as exc:
74
+ logger.debug("Pattern generation failed: %s", exc)
75
+
76
+ # 4. Check if ranker should retrain
77
+ try:
78
+ from superlocalmemory.learning.feedback import FeedbackCollector
79
+ collector = FeedbackCollector(Path(self._learning_db))
80
+ signal_count = collector.get_feedback_count(profile_id)
81
+ stats["signal_count"] = signal_count
82
+ stats["ranker_phase"] = 1 if signal_count < 50 else (2 if signal_count < 200 else 3)
83
+
84
+ # Auto-retrain at threshold crossings
85
+ if signal_count >= 200 and not dry_run:
86
+ retrained = self._retrain_ranker(profile_id, signal_count)
87
+ stats["retrained"] = retrained
88
+ except Exception as exc:
89
+ logger.debug("Retrain check failed: %s", exc)
90
+
91
+ return stats
92
+
93
+ def _deduplicate(self, profile_id: str, dry_run: bool) -> int:
94
+ """Find and mark near-duplicate facts.
95
+
96
+ Uses content similarity (exact prefix match for now).
97
+ Does NOT delete — marks with lower confidence.
98
+ """
99
+ try:
100
+ conn = sqlite3.connect(self._memory_db, timeout=10)
101
+ conn.execute("PRAGMA busy_timeout=5000")
102
+ conn.row_factory = sqlite3.Row
103
+
104
+ rows = conn.execute(
105
+ "SELECT fact_id, content FROM atomic_facts "
106
+ "WHERE profile_id = ? ORDER BY created_at",
107
+ (profile_id,),
108
+ ).fetchall()
109
+
110
+ seen_prefixes: dict[str, str] = {}
111
+ duplicates = []
112
+
113
+ for r in rows:
114
+ d = dict(r)
115
+ prefix = d["content"][:100].strip().lower()
116
+ if prefix in seen_prefixes:
117
+ duplicates.append(d["fact_id"])
118
+ else:
119
+ seen_prefixes[prefix] = d["fact_id"]
120
+
121
+ if duplicates and not dry_run:
122
+ for fid in duplicates:
123
+ conn.execute(
124
+ "UPDATE atomic_facts SET confidence = MAX(0.1, confidence * 0.5) "
125
+ "WHERE fact_id = ?",
126
+ (fid,),
127
+ )
128
+ conn.commit()
129
+
130
+ conn.close()
131
+ return len(duplicates)
132
+ except Exception:
133
+ return 0
134
+
135
+ def _generate_patterns(self, profile_id: str, dry_run: bool) -> int:
136
+ """Mine behavioral patterns from existing memories.
137
+
138
+ Scans all facts to detect:
139
+ - Tech preferences (language/framework mentions)
140
+ - Topic clusters (frequently discussed subjects)
141
+ - Temporal patterns (time-of-day activity)
142
+ """
143
+ try:
144
+ from superlocalmemory.learning.behavioral import BehavioralPatternStore
145
+ import re
146
+ from collections import Counter
147
+
148
+ conn = sqlite3.connect(self._memory_db, timeout=10)
149
+ conn.execute("PRAGMA busy_timeout=5000")
150
+ conn.row_factory = sqlite3.Row
151
+
152
+ facts = conn.execute(
153
+ "SELECT content, created_at FROM atomic_facts "
154
+ "WHERE profile_id = ? ORDER BY created_at DESC LIMIT 500",
155
+ (profile_id,),
156
+ ).fetchall()
157
+ conn.close()
158
+
159
+ if len(facts) < 10:
160
+ return 0
161
+
162
+ store = BehavioralPatternStore(self._learning_db)
163
+ generated = 0
164
+
165
+ # Tech preferences: detect technology mentions
166
+ tech_keywords = {
167
+ "python": "Python", "javascript": "JavaScript", "typescript": "TypeScript",
168
+ "react": "React", "vue": "Vue", "angular": "Angular",
169
+ "postgresql": "PostgreSQL", "mysql": "MySQL", "sqlite": "SQLite",
170
+ "docker": "Docker", "kubernetes": "Kubernetes", "aws": "AWS",
171
+ "azure": "Azure", "gcp": "GCP", "node": "Node.js",
172
+ "fastapi": "FastAPI", "django": "Django", "flask": "Flask",
173
+ "rust": "Rust", "go": "Go", "java": "Java",
174
+ "git": "Git", "npm": "npm", "pip": "pip",
175
+ "langchain": "LangChain", "ollama": "Ollama", "pytorch": "PyTorch",
176
+ "claude": "Claude", "openai": "OpenAI", "anthropic": "Anthropic",
177
+ }
178
+
179
+ tech_counts = Counter()
180
+ for f in facts:
181
+ content = dict(f)["content"].lower()
182
+ for keyword, label in tech_keywords.items():
183
+ if keyword in content:
184
+ tech_counts[label] += 1
185
+
186
+ for tech, count in tech_counts.most_common(15):
187
+ if count >= 3 and not dry_run:
188
+ confidence = min(1.0, count / 20)
189
+ store.record_pattern(
190
+ profile_id=profile_id,
191
+ pattern_type="tech_preference",
192
+ data={"topic": tech, "pattern_key": tech, "value": tech,
193
+ "key": "tech", "evidence": count},
194
+ success_rate=confidence,
195
+ confidence=confidence,
196
+ )
197
+ generated += 1
198
+
199
+ # Topic clusters: most discussed subjects
200
+ word_counts = Counter()
201
+ stopwords = frozenset({
202
+ "the", "is", "a", "an", "in", "on", "at", "to", "for", "of",
203
+ "and", "or", "not", "with", "that", "this", "was", "are", "be",
204
+ "has", "had", "have", "from", "by", "it", "its", "as", "but",
205
+ })
206
+ for f in facts:
207
+ words = re.findall(r'\b[a-zA-Z]{4,}\b', dict(f)["content"].lower())
208
+ for w in words:
209
+ if w not in stopwords:
210
+ word_counts[w] += 1
211
+
212
+ for topic, count in word_counts.most_common(10):
213
+ if count >= 5 and not dry_run:
214
+ confidence = min(1.0, count / 30)
215
+ store.record_pattern(
216
+ profile_id=profile_id,
217
+ pattern_type="interest",
218
+ data={"topic": topic, "pattern_key": topic,
219
+ "count": count, "evidence": count},
220
+ success_rate=confidence,
221
+ confidence=confidence,
222
+ )
223
+ generated += 1
224
+
225
+ # Temporal patterns: time-of-day activity
226
+ hour_counts = Counter()
227
+ for f in facts:
228
+ created = dict(f).get("created_at", "")
229
+ if "T" in created:
230
+ try:
231
+ hour = int(created.split("T")[1][:2])
232
+ period = "morning" if 6 <= hour < 12 else (
233
+ "afternoon" if 12 <= hour < 18 else (
234
+ "evening" if 18 <= hour < 22 else "night"))
235
+ hour_counts[period] += 1
236
+ except (ValueError, IndexError):
237
+ pass
238
+
239
+ for period, count in hour_counts.most_common():
240
+ if count >= 3 and not dry_run:
241
+ total = sum(hour_counts.values())
242
+ pct = round(count / total * 100)
243
+ store.record_pattern(
244
+ profile_id=profile_id,
245
+ pattern_type="temporal",
246
+ data={"topic": period, "pattern_key": period,
247
+ "value": f"{period} ({pct}%)", "evidence": count,
248
+ "key": period, "distribution": dict(hour_counts)},
249
+ success_rate=pct / 100,
250
+ confidence=min(1.0, count / 20),
251
+ )
252
+ generated += 1
253
+
254
+ return generated
255
+ except Exception as exc:
256
+ logger.debug("Pattern generation error: %s", exc)
257
+ return 0
258
+
259
+ def _retrain_ranker(self, profile_id: str, signal_count: int) -> bool:
260
+ """Retrain the adaptive ranker from accumulated feedback."""
261
+ try:
262
+ from superlocalmemory.learning.feedback import FeedbackCollector
263
+ from superlocalmemory.learning.ranker import AdaptiveRanker
264
+
265
+ collector = FeedbackCollector(Path(self._learning_db))
266
+ feedback = collector.get_feedback(profile_id, limit=500)
267
+
268
+ if len(feedback) < 200:
269
+ return False
270
+
271
+ # Build training data from feedback
272
+ training_data = []
273
+ for f in feedback:
274
+ label = f.get("signal_value", 0.5)
275
+ training_data.append({
276
+ "features": {"signal_value": label},
277
+ "label": label,
278
+ })
279
+
280
+ ranker = AdaptiveRanker(signal_count=signal_count)
281
+ trained = ranker.train(training_data)
282
+
283
+ if trained:
284
+ logger.info("Ranker retrained with %d examples (Phase 3)", len(training_data))
285
+
286
+ return trained
287
+ except Exception as exc:
288
+ logger.debug("Retrain failed: %s", exc)
289
+ return False
@@ -0,0 +1,326 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Zero-Cost Learning Signals — mathematical learning without LLM tokens.
6
+
7
+ Four signal types that improve retrieval quality over time:
8
+
9
+ 1. Entropy Gap — Surprising content gets deeper indexing.
10
+ 2. Co-Retrieval — Memories retrieved together strengthen graph edges.
11
+ 3. Channel Credit — Track which retrieval channel works for which query type.
12
+ 4. Confidence Lifecycle — Boost on access, decay over time.
13
+
14
+ All signals are computed locally with zero LLM cost.
15
+ Inspired by: Nemori (entropy), A-Mem (link evolution), RMM (citation feedback).
16
+
17
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import logging
23
+ import math
24
+ import sqlite3
25
+ import threading
26
+ from datetime import datetime, timezone
27
+ from pathlib import Path
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ class LearningSignals:
33
+ """Compute and apply zero-cost learning signals.
34
+
35
+ Uses the main memory.db via direct sqlite3 (no engine dependency).
36
+ Thread-safe via lock.
37
+ """
38
+
39
+ def __init__(self, db_path: str | Path) -> None:
40
+ self._db_path = str(db_path)
41
+ self._lock = threading.Lock()
42
+ self._ensure_tables()
43
+
44
+ # ------------------------------------------------------------------
45
+ # Schema
46
+ # ------------------------------------------------------------------
47
+
48
+ def _ensure_tables(self) -> None:
49
+ """Create learning signal tables if they don't exist."""
50
+ with self._lock:
51
+ conn = self._connect()
52
+ try:
53
+ conn.execute(
54
+ "CREATE TABLE IF NOT EXISTS channel_credits ("
55
+ "id INTEGER PRIMARY KEY AUTOINCREMENT, "
56
+ "profile_id TEXT NOT NULL, "
57
+ "query_type TEXT NOT NULL, "
58
+ "channel TEXT NOT NULL, "
59
+ "hits INTEGER DEFAULT 0, "
60
+ "total INTEGER DEFAULT 0, "
61
+ "updated_at TEXT NOT NULL)"
62
+ )
63
+ conn.execute(
64
+ "CREATE UNIQUE INDEX IF NOT EXISTS idx_channel_credit_unique "
65
+ "ON channel_credits(profile_id, query_type, channel)"
66
+ )
67
+ conn.execute(
68
+ "CREATE TABLE IF NOT EXISTS co_retrieval_edges ("
69
+ "id INTEGER PRIMARY KEY AUTOINCREMENT, "
70
+ "profile_id TEXT NOT NULL, "
71
+ "fact_id_a TEXT NOT NULL, "
72
+ "fact_id_b TEXT NOT NULL, "
73
+ "co_count INTEGER DEFAULT 1, "
74
+ "updated_at TEXT NOT NULL)"
75
+ )
76
+ conn.execute(
77
+ "CREATE UNIQUE INDEX IF NOT EXISTS idx_co_retrieval_unique "
78
+ "ON co_retrieval_edges(profile_id, fact_id_a, fact_id_b)"
79
+ )
80
+ conn.commit()
81
+ finally:
82
+ conn.close()
83
+
84
+ def _connect(self) -> sqlite3.Connection:
85
+ conn = sqlite3.connect(self._db_path, timeout=10)
86
+ conn.execute("PRAGMA journal_mode=WAL")
87
+ conn.execute("PRAGMA busy_timeout=5000")
88
+ conn.row_factory = sqlite3.Row
89
+ return conn
90
+
91
+ # ------------------------------------------------------------------
92
+ # Signal 1: Entropy Gap (store-time)
93
+ # ------------------------------------------------------------------
94
+
95
+ @staticmethod
96
+ def compute_entropy_gap(
97
+ new_embedding: list[float],
98
+ cluster_embeddings: list[list[float]],
99
+ ) -> float:
100
+ """Compute how surprising new content is relative to existing cluster.
101
+
102
+ High gap = surprising content = should get deeper indexing.
103
+ Low gap = redundant content = standard indexing.
104
+
105
+ Returns a value in [0.0, 1.0]. >0.7 is 'surprising'.
106
+ """
107
+ if not cluster_embeddings or not new_embedding:
108
+ return 0.5 # neutral when no comparison available
109
+
110
+ similarities = []
111
+ for existing in cluster_embeddings:
112
+ sim = _cosine_sim(new_embedding, existing)
113
+ similarities.append(sim)
114
+
115
+ avg_sim = sum(similarities) / len(similarities)
116
+ gap = max(0.0, min(1.0, 1.0 - avg_sim))
117
+ return gap
118
+
119
+ # ------------------------------------------------------------------
120
+ # Signal 2: Co-Retrieval (recall-time)
121
+ # ------------------------------------------------------------------
122
+
123
+ def record_co_retrieval(
124
+ self, profile_id: str, fact_ids: list[str],
125
+ ) -> int:
126
+ """Record that these facts were co-retrieved.
127
+
128
+ All pairs of facts in the result set get their co-retrieval
129
+ count incremented. This strengthens implicit graph edges.
130
+ """
131
+ if len(fact_ids) < 2:
132
+ return 0
133
+
134
+ now = datetime.now(timezone.utc).isoformat()
135
+ pairs = []
136
+ for i in range(len(fact_ids)):
137
+ for j in range(i + 1, min(len(fact_ids), i + 5)):
138
+ a, b = sorted([fact_ids[i], fact_ids[j]])
139
+ pairs.append((profile_id, a, b, now))
140
+
141
+ if not pairs:
142
+ return 0
143
+
144
+ with self._lock:
145
+ conn = self._connect()
146
+ try:
147
+ for pid, a, b, ts in pairs:
148
+ conn.execute(
149
+ "INSERT INTO co_retrieval_edges "
150
+ "(profile_id, fact_id_a, fact_id_b, co_count, updated_at) "
151
+ "VALUES (?, ?, ?, 1, ?) "
152
+ "ON CONFLICT(profile_id, fact_id_a, fact_id_b) "
153
+ "DO UPDATE SET co_count = co_count + 1, updated_at = ?",
154
+ (pid, a, b, ts, ts),
155
+ )
156
+ conn.commit()
157
+ return len(pairs)
158
+ finally:
159
+ conn.close()
160
+
161
+ def get_co_retrieval_boost(
162
+ self, profile_id: str, fact_id: str, top_k: int = 5,
163
+ ) -> list[dict]:
164
+ """Get top co-retrieved facts for boosting."""
165
+ with self._lock:
166
+ conn = self._connect()
167
+ try:
168
+ rows = conn.execute(
169
+ "SELECT fact_id_a, fact_id_b, co_count FROM co_retrieval_edges "
170
+ "WHERE profile_id = ? AND (fact_id_a = ? OR fact_id_b = ?) "
171
+ "ORDER BY co_count DESC LIMIT ?",
172
+ (profile_id, fact_id, fact_id, top_k),
173
+ ).fetchall()
174
+ results = []
175
+ for r in rows:
176
+ d = dict(r)
177
+ other = d["fact_id_b"] if d["fact_id_a"] == fact_id else d["fact_id_a"]
178
+ results.append({"fact_id": other, "co_count": d["co_count"]})
179
+ return results
180
+ finally:
181
+ conn.close()
182
+
183
+ # ------------------------------------------------------------------
184
+ # Signal 3: Channel Credit (recall-time)
185
+ # ------------------------------------------------------------------
186
+
187
+ def credit_channel(
188
+ self, profile_id: str, query_type: str, channel: str, hit: bool,
189
+ ) -> None:
190
+ """Credit a retrieval channel for a hit or miss."""
191
+ now = datetime.now(timezone.utc).isoformat()
192
+ with self._lock:
193
+ conn = self._connect()
194
+ try:
195
+ hit_val = 1 if hit else 0
196
+ conn.execute(
197
+ "INSERT INTO channel_credits "
198
+ "(profile_id, query_type, channel, hits, total, updated_at) "
199
+ "VALUES (?, ?, ?, ?, 1, ?) "
200
+ "ON CONFLICT(profile_id, query_type, channel) "
201
+ "DO UPDATE SET hits = hits + ?, total = total + 1, updated_at = ?",
202
+ (profile_id, query_type, channel, hit_val, now, hit_val, now),
203
+ )
204
+ conn.commit()
205
+ finally:
206
+ conn.close()
207
+
208
+ def get_channel_weights(
209
+ self, profile_id: str, query_type: str,
210
+ ) -> dict[str, float]:
211
+ """Get learned channel weights for a query type.
212
+
213
+ Returns weight multipliers based on historical hit rates.
214
+ """
215
+ with self._lock:
216
+ conn = self._connect()
217
+ try:
218
+ rows = conn.execute(
219
+ "SELECT channel, hits, total FROM channel_credits "
220
+ "WHERE profile_id = ? AND query_type = ? AND total >= 5",
221
+ (profile_id, query_type),
222
+ ).fetchall()
223
+ if not rows:
224
+ return {}
225
+ weights = {}
226
+ for r in rows:
227
+ d = dict(r)
228
+ rate = d["hits"] / max(d["total"], 1)
229
+ weights[d["channel"]] = 0.7 + (rate * 0.8)
230
+ return weights
231
+ finally:
232
+ conn.close()
233
+
234
+ # ------------------------------------------------------------------
235
+ # Signal 4: Confidence Lifecycle (store + recall time)
236
+ # ------------------------------------------------------------------
237
+
238
+ @staticmethod
239
+ def boost_confidence(db_path: str, fact_id: str, amount: float = 0.02) -> None:
240
+ """Boost a fact's confidence on access. Capped at 1.0."""
241
+ try:
242
+ conn = sqlite3.connect(db_path, timeout=10)
243
+ conn.execute("PRAGMA busy_timeout=5000")
244
+ conn.execute(
245
+ "UPDATE atomic_facts SET confidence = MIN(1.0, confidence + ?) "
246
+ "WHERE fact_id = ?",
247
+ (amount, fact_id),
248
+ )
249
+ conn.execute(
250
+ "UPDATE atomic_facts SET access_count = access_count + 1 "
251
+ "WHERE fact_id = ?",
252
+ (fact_id,),
253
+ )
254
+ conn.commit()
255
+ conn.close()
256
+ except Exception:
257
+ pass
258
+
259
+ @staticmethod
260
+ def decay_confidence(db_path: str, profile_id: str, rate: float = 0.001) -> int:
261
+ """Decay confidence on unused facts. Floor: 0.1."""
262
+ try:
263
+ conn = sqlite3.connect(db_path, timeout=10)
264
+ conn.execute("PRAGMA busy_timeout=5000")
265
+ cursor = conn.execute(
266
+ "UPDATE atomic_facts SET confidence = MAX(0.1, confidence - ?) "
267
+ "WHERE profile_id = ? AND access_count = 0 "
268
+ "AND created_at < datetime('now', '-7 days')",
269
+ (rate, profile_id),
270
+ )
271
+ conn.commit()
272
+ affected = cursor.rowcount
273
+ conn.close()
274
+ return affected
275
+ except Exception:
276
+ return 0
277
+
278
+ # ------------------------------------------------------------------
279
+ # Stats
280
+ # ------------------------------------------------------------------
281
+
282
+ def get_signal_stats(self, profile_id: str) -> dict:
283
+ """Get learning signal statistics for dashboard."""
284
+ with self._lock:
285
+ conn = self._connect()
286
+ try:
287
+ co_rows = conn.execute(
288
+ "SELECT COUNT(*) AS c, COALESCE(SUM(co_count), 0) AS total "
289
+ "FROM co_retrieval_edges WHERE profile_id = ?",
290
+ (profile_id,),
291
+ ).fetchone()
292
+ co = dict(co_rows) if co_rows else {"c": 0, "total": 0}
293
+
294
+ ch_rows = conn.execute(
295
+ "SELECT channel, hits, total FROM channel_credits "
296
+ "WHERE profile_id = ? ORDER BY total DESC",
297
+ (profile_id,),
298
+ ).fetchall()
299
+ channels = {
300
+ dict(r)["channel"]: {
301
+ "hits": dict(r)["hits"],
302
+ "total": dict(r)["total"],
303
+ "rate": round(dict(r)["hits"] / max(dict(r)["total"], 1), 3),
304
+ }
305
+ for r in ch_rows
306
+ }
307
+
308
+ return {
309
+ "co_retrieval_edges": co["c"],
310
+ "co_retrieval_events": co["total"],
311
+ "channel_performance": channels,
312
+ }
313
+ finally:
314
+ conn.close()
315
+
316
+
317
+ def _cosine_sim(a: list[float], b: list[float]) -> float:
318
+ """Cosine similarity between two vectors."""
319
+ if len(a) != len(b) or not a:
320
+ return 0.0
321
+ dot = sum(x * y for x, y in zip(a, b))
322
+ norm_a = math.sqrt(sum(x * x for x in a))
323
+ norm_b = math.sqrt(sum(x * x for x in b))
324
+ if norm_a < 1e-10 or norm_b < 1e-10:
325
+ return 0.0
326
+ return dot / (norm_a * norm_b)