opencode-semantic-memory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,323 @@
1
+ """Memory linker that discovers relationships between memories."""
2
+
3
+ import logging
4
+ import re
5
+ from datetime import datetime, timedelta
6
+
7
+ from opencode_memory.ingestion.embeddings import EmbeddingEngine
8
+ from opencode_memory.models import LinkType, Memory, MemoryCategory, MemoryLink
9
+ from opencode_memory.storage.sqlite import SQLiteStorage
10
+ from opencode_memory.storage.vectors import VectorStorage
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Similarity thresholds - lower threshold allows more links, strength reflects actual similarity
15
+ SIMILARITY_THRESHOLD = 0.50 # Minimum to create any link
16
+ STRONG_LINK_THRESHOLD = 0.70 # Links above this are "strong"
17
+ SUPERSEDES_THRESHOLD = 0.85 # Very high similarity + same category = supersedes
18
+ SUPERSEDES_TIME_WINDOW_DAYS = 30
19
+ ENTITY_PATTERN = re.compile(r"[!#&@]\d+|@\w+")
20
+
21
+ # Category-specific thresholds (some categories should link more readily)
22
+ CATEGORY_THRESHOLDS = {
23
+ # High-value categories should link at lower similarity (more recall)
24
+ MemoryCategory.DIRECTIVE: 0.45,
25
+ MemoryCategory.PROCEDURE: 0.48,
26
+ MemoryCategory.PLAN: 0.45, # Plans should link readily to related content
27
+ MemoryCategory.FACT: 0.50,
28
+ # Lower-value categories need higher similarity (more precision)
29
+ MemoryCategory.CONVERSATION_SUMMARY: 0.50, # Summaries link at baseline
30
+ MemoryCategory.CONVERSATION: 0.60, # Full convos need higher similarity (noisy)
31
+ MemoryCategory.EVENT: 0.50,
32
+ MemoryCategory.DECISION: 0.50,
33
+ MemoryCategory.BLOCKER: 0.50,
34
+ }
35
+
36
+
37
+ class MemoryLinker:
38
+ """Discovers and creates links between memories based on various signals."""
39
+
40
+ def __init__(
41
+ self,
42
+ sqlite: SQLiteStorage,
43
+ vectors: VectorStorage,
44
+ embeddings: EmbeddingEngine,
45
+ ):
46
+ self.sqlite = sqlite
47
+ self.vectors = vectors
48
+ self.embeddings = embeddings
49
+
50
+ async def link_memory(
51
+ self, memory: Memory, embedding: list[float] | None = None
52
+ ) -> list[MemoryLink]:
53
+ """Find and create links for a single memory.
54
+
55
+ Returns list of links created.
56
+ """
57
+ if memory.id is None:
58
+ return []
59
+
60
+ links_created: list[MemoryLink] = []
61
+
62
+ if embedding is None:
63
+ embedding = await self.embeddings.embed_async(memory.embedding_content())
64
+
65
+ links_created.extend(self._link_by_similarity(memory, embedding))
66
+ links_created.extend(self._link_by_entity_overlap(memory))
67
+ links_created.extend(self._link_by_temporal_category(memory))
68
+
69
+ return links_created
70
+
71
+ def _link_by_similarity(self, memory: Memory, embedding: list[float]) -> list[MemoryLink]:
72
+ """Create links based on vector similarity.
73
+
74
+ Link strength equals the actual similarity score, providing a natural
75
+ quality indicator. Category-specific thresholds allow high-value memories
76
+ (directives, procedures) to link more readily than conversations.
77
+ """
78
+ links: list[MemoryLink] = []
79
+
80
+ similar = self.vectors.search(embedding, limit=10)
81
+
82
+ # Use category-specific threshold, fall back to default
83
+ threshold = CATEGORY_THRESHOLDS.get(memory.category, SIMILARITY_THRESHOLD)
84
+
85
+ for match in similar:
86
+ target_id = match.get("memory_id")
87
+ if not target_id or target_id == memory.id:
88
+ continue
89
+
90
+ distance = match.get("_distance", 1.0)
91
+ similarity = 1.0 / (1.0 + distance)
92
+
93
+ if similarity < threshold:
94
+ continue
95
+
96
+ # Skip if any link already exists between these memories
97
+ if self.sqlite.any_link_exists(memory.id, target_id):
98
+ continue
99
+
100
+ target = self.sqlite.get_memory_by_id(target_id)
101
+ if not target:
102
+ continue
103
+
104
+ link_type, reason = self._classify_similarity_link(memory, target, similarity)
105
+
106
+ link = MemoryLink(
107
+ source_memory_id=memory.id,
108
+ target_memory_id=target_id,
109
+ link_type=link_type,
110
+ strength=similarity,
111
+ reason=reason,
112
+ )
113
+
114
+ link_id = self.sqlite.insert_link(link)
115
+ if link_id:
116
+ link.id = link_id
117
+ links.append(link)
118
+ logger.debug(
119
+ f"Created {link_type.value} link: {memory.id} -> {target_id} "
120
+ f"(strength={similarity:.3f})"
121
+ )
122
+
123
+ return links
124
+
125
+ def _classify_similarity_link(
126
+ self, source: Memory, target: Memory, similarity: float
127
+ ) -> tuple[LinkType, str]:
128
+ """Determine the specific link type based on memory characteristics."""
129
+
130
+ # Normalize timestamps for comparison (strip timezone if present)
131
+ source_time = (
132
+ source.created_at.replace(tzinfo=None)
133
+ if source.created_at.tzinfo
134
+ else source.created_at
135
+ )
136
+ target_time = (
137
+ target.created_at.replace(tzinfo=None)
138
+ if target.created_at.tzinfo
139
+ else target.created_at
140
+ )
141
+
142
+ if (
143
+ source.category == target.category
144
+ and similarity > SUPERSEDES_THRESHOLD
145
+ and source_time > target_time
146
+ ):
147
+ age_diff = (source_time - target_time).days
148
+ if age_diff <= SUPERSEDES_TIME_WINDOW_DAYS:
149
+ if target.category in (MemoryCategory.FACT, MemoryCategory.PROCEDURE):
150
+ return (
151
+ LinkType.SUPERSEDES,
152
+ f"Same category, high similarity ({similarity:.2f}), newer by {age_diff}d",
153
+ )
154
+
155
+ source_words = set(re.findall(r"\w{4,}", source.content.lower()))
156
+ target_words = set(re.findall(r"\w{4,}", target.content.lower()))
157
+ if source_words and target_words:
158
+ source_only = source_words - target_words
159
+ target_only = target_words - source_words
160
+ common = source_words & target_words
161
+
162
+ if len(source_only) > len(common) * 0.5:
163
+ return (
164
+ LinkType.EXTENDS,
165
+ f"Extends with additional content ({len(source_only)} new concepts)",
166
+ )
167
+
168
+ # Classify link strength for the reason string
169
+ if similarity >= STRONG_LINK_THRESHOLD:
170
+ strength_desc = "strong"
171
+ elif similarity >= 0.60:
172
+ strength_desc = "moderate"
173
+ else:
174
+ strength_desc = "weak"
175
+
176
+ return LinkType.RELATED, f"Semantic similarity ({similarity:.2f}, {strength_desc})"
177
+
178
+ def _link_by_entity_overlap(self, memory: Memory) -> list[MemoryLink]:
179
+ """Create links based on shared entity references."""
180
+ links: list[MemoryLink] = []
181
+
182
+ entities_in_content = set(ENTITY_PATTERN.findall(memory.content))
183
+ entities_in_memory = set(memory.entities) if memory.entities else set()
184
+ all_entities = entities_in_content | entities_in_memory
185
+
186
+ if not all_entities:
187
+ return links
188
+
189
+ for entity_ref in all_entities:
190
+ entity = self._parse_entity_ref(entity_ref)
191
+ if not entity:
192
+ continue
193
+
194
+ db_entity = self.sqlite.get_entity(entity.ref, entity.type)
195
+ if not db_entity or db_entity.id is None:
196
+ continue
197
+
198
+ related_memories = self.sqlite.get_memories_for_entity(db_entity.id)
199
+
200
+ for related in related_memories:
201
+ if related.id == memory.id or related.id is None:
202
+ continue
203
+
204
+ if self.sqlite.link_exists(memory.id, related.id, LinkType.SAME_ENTITY):
205
+ continue
206
+
207
+ link = MemoryLink(
208
+ source_memory_id=memory.id,
209
+ target_memory_id=related.id,
210
+ link_type=LinkType.SAME_ENTITY,
211
+ strength=0.8,
212
+ reason=f"Both reference {entity_ref}",
213
+ )
214
+
215
+ link_id = self.sqlite.insert_link(link)
216
+ if link_id:
217
+ link.id = link_id
218
+ links.append(link)
219
+
220
+ return links
221
+
222
+ def _link_by_temporal_category(self, memory: Memory) -> list[MemoryLink]:
223
+ """Create links based on temporal proximity within same category.
224
+
225
+ For example: decisions made in the same session are likely related.
226
+ """
227
+ links: list[MemoryLink] = []
228
+
229
+ if memory.category not in (MemoryCategory.DECISION, MemoryCategory.EVENT):
230
+ return links
231
+
232
+ time_window = timedelta(hours=2)
233
+
234
+ # Make timestamps naive for comparison (strip timezone if present)
235
+ memory_time = (
236
+ memory.created_at.replace(tzinfo=None)
237
+ if memory.created_at.tzinfo
238
+ else memory.created_at
239
+ )
240
+ start_time = memory_time - time_window
241
+ end_time = memory_time + time_window
242
+
243
+ category_memories = self.sqlite.get_memories_by_category(
244
+ memory.category, limit=20, include_resolved=True
245
+ )
246
+
247
+ for other in category_memories:
248
+ if other.id == memory.id or other.id is None:
249
+ continue
250
+
251
+ other_time = (
252
+ other.created_at.replace(tzinfo=None)
253
+ if other.created_at.tzinfo
254
+ else other.created_at
255
+ )
256
+ if not (start_time <= other_time <= end_time):
257
+ continue
258
+
259
+ if self.sqlite.link_exists(memory.id, other.id, LinkType.RELATED):
260
+ continue
261
+
262
+ time_diff = abs((memory_time - other_time).total_seconds())
263
+ strength = max(0.5, 1.0 - (time_diff / time_window.total_seconds()))
264
+
265
+ link = MemoryLink(
266
+ source_memory_id=memory.id,
267
+ target_memory_id=other.id,
268
+ link_type=LinkType.RELATED,
269
+ strength=strength,
270
+ reason=f"Same session (within {int(time_diff / 60)}min)",
271
+ )
272
+
273
+ link_id = self.sqlite.insert_link(link)
274
+ if link_id:
275
+ link.id = link_id
276
+ links.append(link)
277
+
278
+ return links
279
+
280
+ async def process_batch(self, memories: list[Memory], batch_size: int = 10) -> int:
281
+ """Process a batch of memories for linking.
282
+
283
+ Returns total number of links created.
284
+ """
285
+ total_links = 0
286
+
287
+ for memory in memories:
288
+ if memory.id is None:
289
+ continue
290
+
291
+ links = await self.link_memory(memory)
292
+ total_links += len(links)
293
+
294
+ return total_links
295
+
296
+ async def run_linking_pass(self, limit: int = 50) -> dict:
297
+ """Run a single pass of the linking process.
298
+
299
+ Finds memories that need linking and processes them.
300
+ Returns stats about what was done.
301
+ """
302
+ unlinked = self.sqlite.get_unlinked_memories(limit=limit // 2)
303
+ recent = self.sqlite.get_memories_needing_links(since_hours=24, limit=limit // 2)
304
+
305
+ all_memories = {m.id: m for m in unlinked + recent if m.id is not None}
306
+ memories_to_process = list(all_memories.values())
307
+
308
+ if not memories_to_process:
309
+ return {"processed": 0, "links_created": 0, "status": "no_work"}
310
+
311
+ links_created = await self.process_batch(memories_to_process)
312
+
313
+ return {
314
+ "processed": len(memories_to_process),
315
+ "links_created": links_created,
316
+ "status": "completed",
317
+ }
318
+
319
+ def _parse_entity_ref(self, ref: str):
320
+ """Parse an entity reference string."""
321
+ from opencode_memory.models import Entity
322
+
323
+ return Entity.from_ref(ref)
@@ -0,0 +1,273 @@
1
+ """Prometheus-compatible metrics for opencode-memory.
2
+
3
+ Exposes metrics in Prometheus text format at /metrics endpoint.
4
+ No external dependencies required - uses simple text format.
5
+ """
6
+
7
+ from collections import defaultdict
8
+ from dataclasses import dataclass, field
9
+ from typing import Any
10
+
11
+
12
+ @dataclass
13
+ class Counter:
14
+ """A simple counter metric."""
15
+
16
+ name: str
17
+ help: str
18
+ labels: list[str] = field(default_factory=list)
19
+ _values: dict[tuple, float] = field(default_factory=lambda: defaultdict(float))
20
+
21
+ def inc(self, value: float = 1.0, **label_values: str) -> None:
22
+ """Increment counter."""
23
+ key = tuple(label_values.get(lbl, "") for lbl in self.labels)
24
+ self._values[key] += value
25
+
26
+ def get(self, **label_values: str) -> float:
27
+ """Get current value."""
28
+ key = tuple(label_values.get(lbl, "") for lbl in self.labels)
29
+ return self._values[key]
30
+
31
+
32
+ @dataclass
33
+ class Gauge:
34
+ """A simple gauge metric."""
35
+
36
+ name: str
37
+ help: str
38
+ labels: list[str] = field(default_factory=list)
39
+ _values: dict[tuple, float] = field(default_factory=lambda: defaultdict(float))
40
+
41
+ def set(self, value: float, **label_values: str) -> None:
42
+ """Set gauge value."""
43
+ key = tuple(label_values.get(lbl, "") for lbl in self.labels)
44
+ self._values[key] = value
45
+
46
+ def inc(self, value: float = 1.0, **label_values: str) -> None:
47
+ """Increment gauge."""
48
+ key = tuple(label_values.get(lbl, "") for lbl in self.labels)
49
+ self._values[key] += value
50
+
51
+ def dec(self, value: float = 1.0, **label_values: str) -> None:
52
+ """Decrement gauge."""
53
+ key = tuple(label_values.get(lbl, "") for lbl in self.labels)
54
+ self._values[key] -= value
55
+
56
+ def get(self, **label_values: str) -> float:
57
+ """Get current value."""
58
+ key = tuple(label_values.get(lbl, "") for lbl in self.labels)
59
+ return self._values[key]
60
+
61
+
62
+ @dataclass
63
+ class Histogram:
64
+ """A simple histogram metric with fixed buckets."""
65
+
66
+ name: str
67
+ help: str
68
+ buckets: list[float] = field(default_factory=lambda: [0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0])
69
+ labels: list[str] = field(default_factory=list)
70
+ _bucket_counts: dict[tuple, list[int]] = field(default_factory=dict)
71
+ _sums: dict[tuple, float] = field(default_factory=lambda: defaultdict(float))
72
+ _counts: dict[tuple, int] = field(default_factory=lambda: defaultdict(int))
73
+
74
+ def observe(self, value: float, **label_values: str) -> None:
75
+ """Observe a value."""
76
+ key = tuple(label_values.get(lbl, "") for lbl in self.labels)
77
+ if key not in self._bucket_counts:
78
+ self._bucket_counts[key] = [0] * len(self.buckets)
79
+
80
+ self._sums[key] += value
81
+ self._counts[key] += 1
82
+
83
+ for i, bucket in enumerate(self.buckets):
84
+ if value <= bucket:
85
+ self._bucket_counts[key][i] += 1
86
+
87
+
88
+ class MetricsRegistry:
89
+ """Registry for all metrics."""
90
+
91
+ def __init__(self):
92
+ self.metrics: dict[str, Counter | Gauge | Histogram] = {}
93
+
94
+ def counter(self, name: str, help: str, labels: list[str] | None = None) -> Counter:
95
+ """Create or get a counter."""
96
+ if name not in self.metrics:
97
+ self.metrics[name] = Counter(name, help, labels or [])
98
+ return self.metrics[name]
99
+
100
+ def gauge(self, name: str, help: str, labels: list[str] | None = None) -> Gauge:
101
+ """Create or get a gauge."""
102
+ if name not in self.metrics:
103
+ self.metrics[name] = Gauge(name, help, labels or [])
104
+ return self.metrics[name]
105
+
106
+ def histogram(
107
+ self,
108
+ name: str,
109
+ help: str,
110
+ buckets: list[float] | None = None,
111
+ labels: list[str] | None = None,
112
+ ) -> Histogram:
113
+ """Create or get a histogram."""
114
+ if name not in self.metrics:
115
+ self.metrics[name] = Histogram(
116
+ name, help, buckets or [0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0], labels or []
117
+ )
118
+ return self.metrics[name]
119
+
120
+ def render(self) -> str:
121
+ """Render all metrics in Prometheus text format."""
122
+ lines = []
123
+ for metric in self.metrics.values():
124
+ lines.append(f"# HELP {metric.name} {metric.help}")
125
+
126
+ if isinstance(metric, Counter):
127
+ lines.append(f"# TYPE {metric.name} counter")
128
+ for labels_tuple, value in metric._values.items():
129
+ label_str = self._format_labels(metric.labels, labels_tuple)
130
+ lines.append(f"{metric.name}{label_str} {value}")
131
+
132
+ elif isinstance(metric, Gauge):
133
+ lines.append(f"# TYPE {metric.name} gauge")
134
+ for labels_tuple, value in metric._values.items():
135
+ label_str = self._format_labels(metric.labels, labels_tuple)
136
+ lines.append(f"{metric.name}{label_str} {value}")
137
+
138
+ elif isinstance(metric, Histogram):
139
+ lines.append(f"# TYPE {metric.name} histogram")
140
+ for labels_tuple, bucket_counts in metric._bucket_counts.items():
141
+ label_str = self._format_labels(metric.labels, labels_tuple)
142
+ cumulative = 0
143
+ for i, bucket in enumerate(metric.buckets):
144
+ cumulative += bucket_counts[i]
145
+ le_label = f'le="{bucket}"'
146
+ if label_str:
147
+ full_label = label_str[:-1] + "," + le_label + "}"
148
+ else:
149
+ full_label = "{" + le_label + "}"
150
+ lines.append(f"{metric.name}_bucket{full_label} {cumulative}")
151
+
152
+ # +Inf bucket
153
+ le_label = 'le="+Inf"'
154
+ if label_str:
155
+ full_label = label_str[:-1] + "," + le_label + "}"
156
+ else:
157
+ full_label = "{" + le_label + "}"
158
+ lines.append(f"{metric.name}_bucket{full_label} {metric._counts[labels_tuple]}")
159
+
160
+ lines.append(f"{metric.name}_sum{label_str} {metric._sums[labels_tuple]}")
161
+ lines.append(f"{metric.name}_count{label_str} {metric._counts[labels_tuple]}")
162
+
163
+ return "\n".join(lines) + "\n"
164
+
165
+ def _format_labels(self, label_names: list[str], label_values: tuple) -> str:
166
+ """Format labels for Prometheus output."""
167
+ if not label_names:
168
+ return ""
169
+ pairs = [f'{name}="{value}"' for name, value in zip(label_names, label_values)]
170
+ return "{" + ",".join(pairs) + "}"
171
+
172
+
173
+ # Global registry
174
+ registry = MetricsRegistry()
175
+
176
+ # Define metrics
177
+ requests_total = registry.counter(
178
+ "opencode_memory_requests_total",
179
+ "Total number of MCP requests",
180
+ ["tool"],
181
+ )
182
+
183
+ request_duration = registry.histogram(
184
+ "opencode_memory_request_duration_seconds",
185
+ "Request duration in seconds",
186
+ [0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0],
187
+ ["tool"],
188
+ )
189
+
190
+ memories_total = registry.gauge(
191
+ "opencode_memory_memories_total",
192
+ "Total number of memories",
193
+ ["category"],
194
+ )
195
+
196
+ embedding_queue_size = registry.gauge(
197
+ "opencode_memory_embedding_queue_size",
198
+ "Number of pending embedding tasks",
199
+ )
200
+
201
+ storage_bytes = registry.gauge(
202
+ "opencode_memory_storage_bytes",
203
+ "Storage size in bytes",
204
+ ["type"],
205
+ )
206
+
207
+ rate_limit_rejections = registry.counter(
208
+ "opencode_memory_rate_limit_rejections_total",
209
+ "Total number of rate-limited requests",
210
+ )
211
+
212
+ auth_failures = registry.counter(
213
+ "opencode_memory_auth_failures_total",
214
+ "Total number of authentication failures",
215
+ )
216
+
217
+ # Cache metrics
218
+ cache_size = registry.gauge(
219
+ "opencode_memory_cache_size",
220
+ "Number of entries in memory cache",
221
+ )
222
+
223
+ cache_hits = registry.counter(
224
+ "opencode_memory_cache_hits_total",
225
+ "Total cache hits",
226
+ )
227
+
228
+ cache_misses = registry.counter(
229
+ "opencode_memory_cache_misses_total",
230
+ "Total cache misses",
231
+ )
232
+
233
+ cache_hit_rate = registry.gauge(
234
+ "opencode_memory_cache_hit_rate",
235
+ "Cache hit rate (0-1)",
236
+ )
237
+
238
+ # Link metrics
239
+ links_total = registry.gauge(
240
+ "opencode_memory_links_total",
241
+ "Total number of memory links",
242
+ ["type"],
243
+ )
244
+
245
+
246
+ def update_from_status(status: dict[str, Any]) -> None:
247
+ """Update metrics from server status."""
248
+ # Update embedding queue
249
+ eq = status.get("embedding_queue", {})
250
+ embedding_queue_size.set(eq.get("pending", 0))
251
+
252
+ # Update storage
253
+ storage = status.get("storage", {})
254
+ storage_bytes.set(storage.get("db_size_mb", 0) * 1024 * 1024, type="sqlite")
255
+ storage_bytes.set(storage.get("vectors_size_mb", 0) * 1024 * 1024, type="vectors")
256
+
257
+ # Update memory counts
258
+ for category, count in status.get("memories", {}).items():
259
+ memories_total.set(count, category=category)
260
+
261
+ # Update cache metrics
262
+ cache_stats = status.get("cache", {})
263
+ if cache_stats:
264
+ cache_size.set(cache_stats.get("size", 0))
265
+ cache_hits._values[()] = cache_stats.get("hits", 0)
266
+ cache_misses._values[()] = cache_stats.get("misses", 0)
267
+ cache_hit_rate.set(cache_stats.get("hit_rate", 0))
268
+
269
+ # Update link metrics
270
+ link_stats = status.get("links", {})
271
+ if link_stats:
272
+ for link_type, count in link_stats.get("by_type", {}).items():
273
+ links_total.set(count, type=link_type)